ufs_dirhash.c revision 149178
1/*- 2 * Copyright (c) 2001, 2002 Ian Dowse. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 */ 25 26/* 27 * This implements a hash-based lookup scheme for UFS directories. 28 */ 29 30#include <sys/cdefs.h> 31__FBSDID("$FreeBSD: head/sys/ufs/ufs/ufs_dirhash.c 149178 2005-08-17 08:48:42Z iedowse $"); 32 33#include "opt_ufs.h" 34 35#ifdef UFS_DIRHASH 36 37#include <sys/param.h> 38#include <sys/systm.h> 39#include <sys/kernel.h> 40#include <sys/lock.h> 41#include <sys/mutex.h> 42#include <sys/malloc.h> 43#include <sys/fnv_hash.h> 44#include <sys/proc.h> 45#include <sys/bio.h> 46#include <sys/buf.h> 47#include <sys/vnode.h> 48#include <sys/mount.h> 49#include <sys/sysctl.h> 50#include <vm/uma.h> 51 52#include <ufs/ufs/quota.h> 53#include <ufs/ufs/inode.h> 54#include <ufs/ufs/dir.h> 55#include <ufs/ufs/dirhash.h> 56#include <ufs/ufs/extattr.h> 57#include <ufs/ufs/ufsmount.h> 58#include <ufs/ufs/ufs_extern.h> 59 60#define WRAPINCR(val, limit) (((val) + 1 == (limit)) ? 0 : ((val) + 1)) 61#define WRAPDECR(val, limit) (((val) == 0) ? ((limit) - 1) : ((val) - 1)) 62#define OFSFMT(vp) ((vp)->v_mount->mnt_maxsymlinklen <= 0) 63#define BLKFREE2IDX(n) ((n) > DH_NFSTATS ? DH_NFSTATS : (n)) 64 65static MALLOC_DEFINE(M_DIRHASH, "UFS dirhash", "UFS directory hash tables"); 66 67static SYSCTL_NODE(_vfs, OID_AUTO, ufs, CTLFLAG_RD, 0, "UFS filesystem"); 68 69static int ufs_mindirhashsize = DIRBLKSIZ * 5; 70SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_minsize, CTLFLAG_RW, 71 &ufs_mindirhashsize, 72 0, "minimum directory size in bytes for which to use hashed lookup"); 73static int ufs_dirhashmaxmem = 2 * 1024 * 1024; 74SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_maxmem, CTLFLAG_RW, &ufs_dirhashmaxmem, 75 0, "maximum allowed dirhash memory usage"); 76static int ufs_dirhashmem; 77SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_mem, CTLFLAG_RD, &ufs_dirhashmem, 78 0, "current dirhash memory usage"); 79static int ufs_dirhashcheck = 0; 80SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_docheck, CTLFLAG_RW, &ufs_dirhashcheck, 81 0, "enable extra sanity tests"); 82 83 84static int ufsdirhash_hash(struct dirhash *dh, char *name, int namelen); 85static void ufsdirhash_adjfree(struct dirhash *dh, doff_t offset, int diff); 86static void ufsdirhash_delslot(struct dirhash *dh, int slot); 87static int ufsdirhash_findslot(struct dirhash *dh, char *name, int namelen, 88 doff_t offset); 89static doff_t ufsdirhash_getprev(struct direct *dp, doff_t offset); 90static int ufsdirhash_recycle(int wanted); 91 92static uma_zone_t ufsdirhash_zone; 93 94#define DIRHASHLIST_LOCK() mtx_lock(&ufsdirhash_mtx) 95#define DIRHASHLIST_UNLOCK() mtx_unlock(&ufsdirhash_mtx) 96#define DIRHASH_LOCK(dh) mtx_lock(&(dh)->dh_mtx) 97#define DIRHASH_UNLOCK(dh) mtx_unlock(&(dh)->dh_mtx) 98#define DIRHASH_BLKALLOC_WAITOK() uma_zalloc(ufsdirhash_zone, M_WAITOK) 99#define DIRHASH_BLKFREE(ptr) uma_zfree(ufsdirhash_zone, (ptr)) 100 101/* Dirhash list; recently-used entries are near the tail. */ 102static TAILQ_HEAD(, dirhash) ufsdirhash_list; 103 104/* Protects: ufsdirhash_list, `dh_list' field, ufs_dirhashmem. */ 105static struct mtx ufsdirhash_mtx; 106 107/* 108 * Locking order: 109 * ufsdirhash_mtx 110 * dh_mtx 111 * 112 * The dh_mtx mutex should be acquired either via the inode lock, or via 113 * ufsdirhash_mtx. Only the owner of the inode may free the associated 114 * dirhash, but anything can steal its memory and set dh_hash to NULL. 115 */ 116 117/* 118 * Attempt to build up a hash table for the directory contents in 119 * inode 'ip'. Returns 0 on success, or -1 of the operation failed. 120 */ 121int 122ufsdirhash_build(struct inode *ip) 123{ 124 struct dirhash *dh; 125 struct buf *bp = NULL; 126 struct direct *ep; 127 struct vnode *vp; 128 doff_t bmask, pos; 129 int dirblocks, i, j, memreqd, nblocks, narrays, nslots, slot; 130 131 /* Check if we can/should use dirhash. */ 132 if (ip->i_dirhash == NULL) { 133 if (ip->i_size < ufs_mindirhashsize || OFSFMT(ip->i_vnode)) 134 return (-1); 135 } else { 136 /* Hash exists, but sysctls could have changed. */ 137 if (ip->i_size < ufs_mindirhashsize || 138 ufs_dirhashmem > ufs_dirhashmaxmem) { 139 ufsdirhash_free(ip); 140 return (-1); 141 } 142 /* Check if hash exists and is intact (note: unlocked read). */ 143 if (ip->i_dirhash->dh_hash != NULL) 144 return (0); 145 /* Free the old, recycled hash and build a new one. */ 146 ufsdirhash_free(ip); 147 } 148 149 /* Don't hash removed directories. */ 150 if (ip->i_effnlink == 0) 151 return (-1); 152 153 vp = ip->i_vnode; 154 /* Allocate 50% more entries than this dir size could ever need. */ 155 KASSERT(ip->i_size >= DIRBLKSIZ, ("ufsdirhash_build size")); 156 nslots = ip->i_size / DIRECTSIZ(1); 157 nslots = (nslots * 3 + 1) / 2; 158 narrays = howmany(nslots, DH_NBLKOFF); 159 nslots = narrays * DH_NBLKOFF; 160 dirblocks = howmany(ip->i_size, DIRBLKSIZ); 161 nblocks = (dirblocks * 3 + 1) / 2; 162 163 memreqd = sizeof(*dh) + narrays * sizeof(*dh->dh_hash) + 164 narrays * DH_NBLKOFF * sizeof(**dh->dh_hash) + 165 nblocks * sizeof(*dh->dh_blkfree); 166 DIRHASHLIST_LOCK(); 167 if (memreqd + ufs_dirhashmem > ufs_dirhashmaxmem) { 168 DIRHASHLIST_UNLOCK(); 169 if (memreqd > ufs_dirhashmaxmem / 2) 170 return (-1); 171 172 /* Try to free some space. */ 173 if (ufsdirhash_recycle(memreqd) != 0) 174 return (-1); 175 /* Enough was freed, and list has been locked. */ 176 } 177 ufs_dirhashmem += memreqd; 178 DIRHASHLIST_UNLOCK(); 179 180 /* 181 * Use non-blocking mallocs so that we will revert to a linear 182 * lookup on failure rather than potentially blocking forever. 183 */ 184 MALLOC(dh, struct dirhash *, sizeof *dh, M_DIRHASH, M_NOWAIT | M_ZERO); 185 if (dh == NULL) { 186 DIRHASHLIST_LOCK(); 187 ufs_dirhashmem -= memreqd; 188 DIRHASHLIST_UNLOCK(); 189 return (-1); 190 } 191 mtx_init(&dh->dh_mtx, "dirhash", NULL, MTX_DEF); 192 MALLOC(dh->dh_hash, doff_t **, narrays * sizeof(dh->dh_hash[0]), 193 M_DIRHASH, M_NOWAIT | M_ZERO); 194 MALLOC(dh->dh_blkfree, u_int8_t *, nblocks * sizeof(dh->dh_blkfree[0]), 195 M_DIRHASH, M_NOWAIT); 196 if (dh->dh_hash == NULL || dh->dh_blkfree == NULL) 197 goto fail; 198 for (i = 0; i < narrays; i++) { 199 if ((dh->dh_hash[i] = DIRHASH_BLKALLOC_WAITOK()) == NULL) 200 goto fail; 201 for (j = 0; j < DH_NBLKOFF; j++) 202 dh->dh_hash[i][j] = DIRHASH_EMPTY; 203 } 204 205 /* Initialise the hash table and block statistics. */ 206 dh->dh_narrays = narrays; 207 dh->dh_hlen = nslots; 208 dh->dh_nblk = nblocks; 209 dh->dh_dirblks = dirblocks; 210 for (i = 0; i < dirblocks; i++) 211 dh->dh_blkfree[i] = DIRBLKSIZ / DIRALIGN; 212 for (i = 0; i < DH_NFSTATS; i++) 213 dh->dh_firstfree[i] = -1; 214 dh->dh_firstfree[DH_NFSTATS] = 0; 215 dh->dh_seqopt = 0; 216 dh->dh_seqoff = 0; 217 dh->dh_score = DH_SCOREINIT; 218 ip->i_dirhash = dh; 219 220 bmask = VFSTOUFS(vp->v_mount)->um_mountp->mnt_stat.f_iosize - 1; 221 pos = 0; 222 while (pos < ip->i_size) { 223 /* If necessary, get the next directory block. */ 224 if ((pos & bmask) == 0) { 225 if (bp != NULL) 226 brelse(bp); 227 if (UFS_BLKATOFF(vp, (off_t)pos, NULL, &bp) != 0) 228 goto fail; 229 } 230 231 /* Add this entry to the hash. */ 232 ep = (struct direct *)((char *)bp->b_data + (pos & bmask)); 233 if (ep->d_reclen == 0 || ep->d_reclen > 234 DIRBLKSIZ - (pos & (DIRBLKSIZ - 1))) { 235 /* Corrupted directory. */ 236 brelse(bp); 237 goto fail; 238 } 239 if (ep->d_ino != 0) { 240 /* Add the entry (simplified ufsdirhash_add). */ 241 slot = ufsdirhash_hash(dh, ep->d_name, ep->d_namlen); 242 while (DH_ENTRY(dh, slot) != DIRHASH_EMPTY) 243 slot = WRAPINCR(slot, dh->dh_hlen); 244 dh->dh_hused++; 245 DH_ENTRY(dh, slot) = pos; 246 ufsdirhash_adjfree(dh, pos, -DIRSIZ(0, ep)); 247 } 248 pos += ep->d_reclen; 249 } 250 251 if (bp != NULL) 252 brelse(bp); 253 DIRHASHLIST_LOCK(); 254 TAILQ_INSERT_TAIL(&ufsdirhash_list, dh, dh_list); 255 dh->dh_onlist = 1; 256 DIRHASHLIST_UNLOCK(); 257 return (0); 258 259fail: 260 if (dh->dh_hash != NULL) { 261 for (i = 0; i < narrays; i++) 262 if (dh->dh_hash[i] != NULL) 263 DIRHASH_BLKFREE(dh->dh_hash[i]); 264 FREE(dh->dh_hash, M_DIRHASH); 265 } 266 if (dh->dh_blkfree != NULL) 267 FREE(dh->dh_blkfree, M_DIRHASH); 268 mtx_destroy(&dh->dh_mtx); 269 FREE(dh, M_DIRHASH); 270 ip->i_dirhash = NULL; 271 DIRHASHLIST_LOCK(); 272 ufs_dirhashmem -= memreqd; 273 DIRHASHLIST_UNLOCK(); 274 return (-1); 275} 276 277/* 278 * Free any hash table associated with inode 'ip'. 279 */ 280void 281ufsdirhash_free(struct inode *ip) 282{ 283 struct dirhash *dh; 284 int i, mem; 285 286 if ((dh = ip->i_dirhash) == NULL) 287 return; 288 DIRHASHLIST_LOCK(); 289 DIRHASH_LOCK(dh); 290 if (dh->dh_onlist) 291 TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list); 292 DIRHASH_UNLOCK(dh); 293 DIRHASHLIST_UNLOCK(); 294 295 /* The dirhash pointed to by 'dh' is exclusively ours now. */ 296 297 mem = sizeof(*dh); 298 if (dh->dh_hash != NULL) { 299 for (i = 0; i < dh->dh_narrays; i++) 300 DIRHASH_BLKFREE(dh->dh_hash[i]); 301 FREE(dh->dh_hash, M_DIRHASH); 302 FREE(dh->dh_blkfree, M_DIRHASH); 303 mem += dh->dh_narrays * sizeof(*dh->dh_hash) + 304 dh->dh_narrays * DH_NBLKOFF * sizeof(**dh->dh_hash) + 305 dh->dh_nblk * sizeof(*dh->dh_blkfree); 306 } 307 mtx_destroy(&dh->dh_mtx); 308 FREE(dh, M_DIRHASH); 309 ip->i_dirhash = NULL; 310 311 DIRHASHLIST_LOCK(); 312 ufs_dirhashmem -= mem; 313 DIRHASHLIST_UNLOCK(); 314} 315 316/* 317 * Find the offset of the specified name within the given inode. 318 * Returns 0 on success, ENOENT if the entry does not exist, or 319 * EJUSTRETURN if the caller should revert to a linear search. 320 * 321 * If successful, the directory offset is stored in *offp, and a 322 * pointer to a struct buf containing the entry is stored in *bpp. If 323 * prevoffp is non-NULL, the offset of the previous entry within 324 * the DIRBLKSIZ-sized block is stored in *prevoffp (if the entry 325 * is the first in a block, the start of the block is used). 326 */ 327int 328ufsdirhash_lookup(struct inode *ip, char *name, int namelen, doff_t *offp, 329 struct buf **bpp, doff_t *prevoffp) 330{ 331 struct dirhash *dh, *dh_next; 332 struct direct *dp; 333 struct vnode *vp; 334 struct buf *bp; 335 doff_t blkoff, bmask, offset, prevoff; 336 int i, slot; 337 338 if ((dh = ip->i_dirhash) == NULL) 339 return (EJUSTRETURN); 340 /* 341 * Move this dirhash towards the end of the list if it has a 342 * score higher than the next entry, and acquire the dh_mtx. 343 * Optimise the case where it's already the last by performing 344 * an unlocked read of the TAILQ_NEXT pointer. 345 * 346 * In both cases, end up holding just dh_mtx. 347 */ 348 if (TAILQ_NEXT(dh, dh_list) != NULL) { 349 DIRHASHLIST_LOCK(); 350 DIRHASH_LOCK(dh); 351 /* 352 * If the new score will be greater than that of the next 353 * entry, then move this entry past it. With both mutexes 354 * held, dh_next won't go away, but its dh_score could 355 * change; that's not important since it is just a hint. 356 */ 357 if (dh->dh_hash != NULL && 358 (dh_next = TAILQ_NEXT(dh, dh_list)) != NULL && 359 dh->dh_score >= dh_next->dh_score) { 360 KASSERT(dh->dh_onlist, ("dirhash: not on list")); 361 TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list); 362 TAILQ_INSERT_AFTER(&ufsdirhash_list, dh_next, dh, 363 dh_list); 364 } 365 DIRHASHLIST_UNLOCK(); 366 } else { 367 /* Already the last, though that could change as we wait. */ 368 DIRHASH_LOCK(dh); 369 } 370 if (dh->dh_hash == NULL) { 371 DIRHASH_UNLOCK(dh); 372 ufsdirhash_free(ip); 373 return (EJUSTRETURN); 374 } 375 376 /* Update the score. */ 377 if (dh->dh_score < DH_SCOREMAX) 378 dh->dh_score++; 379 380 vp = ip->i_vnode; 381 bmask = VFSTOUFS(vp->v_mount)->um_mountp->mnt_stat.f_iosize - 1; 382 blkoff = -1; 383 bp = NULL; 384restart: 385 slot = ufsdirhash_hash(dh, name, namelen); 386 387 if (dh->dh_seqopt) { 388 /* 389 * Sequential access optimisation. dh_seqoff contains the 390 * offset of the directory entry immediately following 391 * the last entry that was looked up. Check if this offset 392 * appears in the hash chain for the name we are looking for. 393 */ 394 for (i = slot; (offset = DH_ENTRY(dh, i)) != DIRHASH_EMPTY; 395 i = WRAPINCR(i, dh->dh_hlen)) 396 if (offset == dh->dh_seqoff) 397 break; 398 if (offset == dh->dh_seqoff) { 399 /* 400 * We found an entry with the expected offset. This 401 * is probably the entry we want, but if not, the 402 * code below will turn off seqopt and retry. 403 */ 404 slot = i; 405 } else 406 dh->dh_seqopt = 0; 407 } 408 409 for (; (offset = DH_ENTRY(dh, slot)) != DIRHASH_EMPTY; 410 slot = WRAPINCR(slot, dh->dh_hlen)) { 411 if (offset == DIRHASH_DEL) 412 continue; 413 DIRHASH_UNLOCK(dh); 414 415 if (offset < 0 || offset >= ip->i_size) 416 panic("ufsdirhash_lookup: bad offset in hash array"); 417 if ((offset & ~bmask) != blkoff) { 418 if (bp != NULL) 419 brelse(bp); 420 blkoff = offset & ~bmask; 421 if (UFS_BLKATOFF(vp, (off_t)blkoff, NULL, &bp) != 0) 422 return (EJUSTRETURN); 423 } 424 dp = (struct direct *)(bp->b_data + (offset & bmask)); 425 if (dp->d_reclen == 0 || dp->d_reclen > 426 DIRBLKSIZ - (offset & (DIRBLKSIZ - 1))) { 427 /* Corrupted directory. */ 428 brelse(bp); 429 return (EJUSTRETURN); 430 } 431 if (dp->d_namlen == namelen && 432 bcmp(dp->d_name, name, namelen) == 0) { 433 /* Found. Get the prev offset if needed. */ 434 if (prevoffp != NULL) { 435 if (offset & (DIRBLKSIZ - 1)) { 436 prevoff = ufsdirhash_getprev(dp, 437 offset); 438 if (prevoff == -1) { 439 brelse(bp); 440 return (EJUSTRETURN); 441 } 442 } else 443 prevoff = offset; 444 *prevoffp = prevoff; 445 } 446 447 /* Check for sequential access, and update offset. */ 448 if (dh->dh_seqopt == 0 && dh->dh_seqoff == offset) 449 dh->dh_seqopt = 1; 450 dh->dh_seqoff = offset + DIRSIZ(0, dp); 451 452 *bpp = bp; 453 *offp = offset; 454 return (0); 455 } 456 457 DIRHASH_LOCK(dh); 458 if (dh->dh_hash == NULL) { 459 DIRHASH_UNLOCK(dh); 460 if (bp != NULL) 461 brelse(bp); 462 ufsdirhash_free(ip); 463 return (EJUSTRETURN); 464 } 465 /* 466 * When the name doesn't match in the seqopt case, go back 467 * and search normally. 468 */ 469 if (dh->dh_seqopt) { 470 dh->dh_seqopt = 0; 471 goto restart; 472 } 473 } 474 DIRHASH_UNLOCK(dh); 475 if (bp != NULL) 476 brelse(bp); 477 return (ENOENT); 478} 479 480/* 481 * Find a directory block with room for 'slotneeded' bytes. Returns 482 * the offset of the directory entry that begins the free space. 483 * This will either be the offset of an existing entry that has free 484 * space at the end, or the offset of an entry with d_ino == 0 at 485 * the start of a DIRBLKSIZ block. 486 * 487 * To use the space, the caller may need to compact existing entries in 488 * the directory. The total number of bytes in all of the entries involved 489 * in the compaction is stored in *slotsize. In other words, all of 490 * the entries that must be compacted are exactly contained in the 491 * region beginning at the returned offset and spanning *slotsize bytes. 492 * 493 * Returns -1 if no space was found, indicating that the directory 494 * must be extended. 495 */ 496doff_t 497ufsdirhash_findfree(struct inode *ip, int slotneeded, int *slotsize) 498{ 499 struct direct *dp; 500 struct dirhash *dh; 501 struct buf *bp; 502 doff_t pos, slotstart; 503 int dirblock, error, freebytes, i; 504 505 if ((dh = ip->i_dirhash) == NULL) 506 return (-1); 507 DIRHASH_LOCK(dh); 508 if (dh->dh_hash == NULL) { 509 DIRHASH_UNLOCK(dh); 510 ufsdirhash_free(ip); 511 return (-1); 512 } 513 514 /* Find a directory block with the desired free space. */ 515 dirblock = -1; 516 for (i = howmany(slotneeded, DIRALIGN); i <= DH_NFSTATS; i++) 517 if ((dirblock = dh->dh_firstfree[i]) != -1) 518 break; 519 if (dirblock == -1) { 520 DIRHASH_UNLOCK(dh); 521 return (-1); 522 } 523 524 KASSERT(dirblock < dh->dh_nblk && 525 dh->dh_blkfree[dirblock] >= howmany(slotneeded, DIRALIGN), 526 ("ufsdirhash_findfree: bad stats")); 527 DIRHASH_UNLOCK(dh); 528 pos = dirblock * DIRBLKSIZ; 529 error = UFS_BLKATOFF(ip->i_vnode, (off_t)pos, (char **)&dp, &bp); 530 if (error) 531 return (-1); 532 533 /* Find the first entry with free space. */ 534 for (i = 0; i < DIRBLKSIZ; ) { 535 if (dp->d_reclen == 0) { 536 brelse(bp); 537 return (-1); 538 } 539 if (dp->d_ino == 0 || dp->d_reclen > DIRSIZ(0, dp)) 540 break; 541 i += dp->d_reclen; 542 dp = (struct direct *)((char *)dp + dp->d_reclen); 543 } 544 if (i > DIRBLKSIZ) { 545 brelse(bp); 546 return (-1); 547 } 548 slotstart = pos + i; 549 550 /* Find the range of entries needed to get enough space */ 551 freebytes = 0; 552 while (i < DIRBLKSIZ && freebytes < slotneeded) { 553 freebytes += dp->d_reclen; 554 if (dp->d_ino != 0) 555 freebytes -= DIRSIZ(0, dp); 556 if (dp->d_reclen == 0) { 557 brelse(bp); 558 return (-1); 559 } 560 i += dp->d_reclen; 561 dp = (struct direct *)((char *)dp + dp->d_reclen); 562 } 563 if (i > DIRBLKSIZ) { 564 brelse(bp); 565 return (-1); 566 } 567 if (freebytes < slotneeded) 568 panic("ufsdirhash_findfree: free mismatch"); 569 brelse(bp); 570 *slotsize = pos + i - slotstart; 571 return (slotstart); 572} 573 574/* 575 * Return the start of the unused space at the end of a directory, or 576 * -1 if there are no trailing unused blocks. 577 */ 578doff_t 579ufsdirhash_enduseful(struct inode *ip) 580{ 581 582 struct dirhash *dh; 583 int i; 584 585 if ((dh = ip->i_dirhash) == NULL) 586 return (-1); 587 DIRHASH_LOCK(dh); 588 if (dh->dh_hash == NULL) { 589 DIRHASH_UNLOCK(dh); 590 ufsdirhash_free(ip); 591 return (-1); 592 } 593 594 if (dh->dh_blkfree[dh->dh_dirblks - 1] != DIRBLKSIZ / DIRALIGN) { 595 DIRHASH_UNLOCK(dh); 596 return (-1); 597 } 598 599 for (i = dh->dh_dirblks - 1; i >= 0; i--) 600 if (dh->dh_blkfree[i] != DIRBLKSIZ / DIRALIGN) 601 break; 602 DIRHASH_UNLOCK(dh); 603 return ((doff_t)(i + 1) * DIRBLKSIZ); 604} 605 606/* 607 * Insert information into the hash about a new directory entry. dirp 608 * points to a struct direct containing the entry, and offset specifies 609 * the offset of this entry. 610 */ 611void 612ufsdirhash_add(struct inode *ip, struct direct *dirp, doff_t offset) 613{ 614 struct dirhash *dh; 615 int slot; 616 617 if ((dh = ip->i_dirhash) == NULL) 618 return; 619 DIRHASH_LOCK(dh); 620 if (dh->dh_hash == NULL) { 621 DIRHASH_UNLOCK(dh); 622 ufsdirhash_free(ip); 623 return; 624 } 625 626 KASSERT(offset < dh->dh_dirblks * DIRBLKSIZ, 627 ("ufsdirhash_add: bad offset")); 628 /* 629 * Normal hash usage is < 66%. If the usage gets too high then 630 * remove the hash entirely and let it be rebuilt later. 631 */ 632 if (dh->dh_hused >= (dh->dh_hlen * 3) / 4) { 633 DIRHASH_UNLOCK(dh); 634 ufsdirhash_free(ip); 635 return; 636 } 637 638 /* Find a free hash slot (empty or deleted), and add the entry. */ 639 slot = ufsdirhash_hash(dh, dirp->d_name, dirp->d_namlen); 640 while (DH_ENTRY(dh, slot) >= 0) 641 slot = WRAPINCR(slot, dh->dh_hlen); 642 if (DH_ENTRY(dh, slot) == DIRHASH_EMPTY) 643 dh->dh_hused++; 644 DH_ENTRY(dh, slot) = offset; 645 646 /* Update the per-block summary info. */ 647 ufsdirhash_adjfree(dh, offset, -DIRSIZ(0, dirp)); 648 DIRHASH_UNLOCK(dh); 649} 650 651/* 652 * Remove the specified directory entry from the hash. The entry to remove 653 * is defined by the name in `dirp', which must exist at the specified 654 * `offset' within the directory. 655 */ 656void 657ufsdirhash_remove(struct inode *ip, struct direct *dirp, doff_t offset) 658{ 659 struct dirhash *dh; 660 int slot; 661 662 if ((dh = ip->i_dirhash) == NULL) 663 return; 664 DIRHASH_LOCK(dh); 665 if (dh->dh_hash == NULL) { 666 DIRHASH_UNLOCK(dh); 667 ufsdirhash_free(ip); 668 return; 669 } 670 671 KASSERT(offset < dh->dh_dirblks * DIRBLKSIZ, 672 ("ufsdirhash_remove: bad offset")); 673 /* Find the entry */ 674 slot = ufsdirhash_findslot(dh, dirp->d_name, dirp->d_namlen, offset); 675 676 /* Remove the hash entry. */ 677 ufsdirhash_delslot(dh, slot); 678 679 /* Update the per-block summary info. */ 680 ufsdirhash_adjfree(dh, offset, DIRSIZ(0, dirp)); 681 DIRHASH_UNLOCK(dh); 682} 683 684/* 685 * Change the offset associated with a directory entry in the hash. Used 686 * when compacting directory blocks. 687 */ 688void 689ufsdirhash_move(struct inode *ip, struct direct *dirp, doff_t oldoff, 690 doff_t newoff) 691{ 692 struct dirhash *dh; 693 int slot; 694 695 if ((dh = ip->i_dirhash) == NULL) 696 return; 697 DIRHASH_LOCK(dh); 698 if (dh->dh_hash == NULL) { 699 DIRHASH_UNLOCK(dh); 700 ufsdirhash_free(ip); 701 return; 702 } 703 704 KASSERT(oldoff < dh->dh_dirblks * DIRBLKSIZ && 705 newoff < dh->dh_dirblks * DIRBLKSIZ, 706 ("ufsdirhash_move: bad offset")); 707 /* Find the entry, and update the offset. */ 708 slot = ufsdirhash_findslot(dh, dirp->d_name, dirp->d_namlen, oldoff); 709 DH_ENTRY(dh, slot) = newoff; 710 DIRHASH_UNLOCK(dh); 711} 712 713/* 714 * Inform dirhash that the directory has grown by one block that 715 * begins at offset (i.e. the new length is offset + DIRBLKSIZ). 716 */ 717void 718ufsdirhash_newblk(struct inode *ip, doff_t offset) 719{ 720 struct dirhash *dh; 721 int block; 722 723 if ((dh = ip->i_dirhash) == NULL) 724 return; 725 DIRHASH_LOCK(dh); 726 if (dh->dh_hash == NULL) { 727 DIRHASH_UNLOCK(dh); 728 ufsdirhash_free(ip); 729 return; 730 } 731 732 KASSERT(offset == dh->dh_dirblks * DIRBLKSIZ, 733 ("ufsdirhash_newblk: bad offset")); 734 block = offset / DIRBLKSIZ; 735 if (block >= dh->dh_nblk) { 736 /* Out of space; must rebuild. */ 737 DIRHASH_UNLOCK(dh); 738 ufsdirhash_free(ip); 739 return; 740 } 741 dh->dh_dirblks = block + 1; 742 743 /* Account for the new free block. */ 744 dh->dh_blkfree[block] = DIRBLKSIZ / DIRALIGN; 745 if (dh->dh_firstfree[DH_NFSTATS] == -1) 746 dh->dh_firstfree[DH_NFSTATS] = block; 747 DIRHASH_UNLOCK(dh); 748} 749 750/* 751 * Inform dirhash that the directory is being truncated. 752 */ 753void 754ufsdirhash_dirtrunc(struct inode *ip, doff_t offset) 755{ 756 struct dirhash *dh; 757 int block, i; 758 759 if ((dh = ip->i_dirhash) == NULL) 760 return; 761 DIRHASH_LOCK(dh); 762 if (dh->dh_hash == NULL) { 763 DIRHASH_UNLOCK(dh); 764 ufsdirhash_free(ip); 765 return; 766 } 767 768 KASSERT(offset <= dh->dh_dirblks * DIRBLKSIZ, 769 ("ufsdirhash_dirtrunc: bad offset")); 770 block = howmany(offset, DIRBLKSIZ); 771 /* 772 * If the directory shrinks to less than 1/8 of dh_nblk blocks 773 * (about 20% of its original size due to the 50% extra added in 774 * ufsdirhash_build) then free it, and let the caller rebuild 775 * if necessary. 776 */ 777 if (block < dh->dh_nblk / 8 && dh->dh_narrays > 1) { 778 DIRHASH_UNLOCK(dh); 779 ufsdirhash_free(ip); 780 return; 781 } 782 783 /* 784 * Remove any `first free' information pertaining to the 785 * truncated blocks. All blocks we're removing should be 786 * completely unused. 787 */ 788 if (dh->dh_firstfree[DH_NFSTATS] >= block) 789 dh->dh_firstfree[DH_NFSTATS] = -1; 790 for (i = block; i < dh->dh_dirblks; i++) 791 if (dh->dh_blkfree[i] != DIRBLKSIZ / DIRALIGN) 792 panic("ufsdirhash_dirtrunc: blocks in use"); 793 for (i = 0; i < DH_NFSTATS; i++) 794 if (dh->dh_firstfree[i] >= block) 795 panic("ufsdirhash_dirtrunc: first free corrupt"); 796 dh->dh_dirblks = block; 797 DIRHASH_UNLOCK(dh); 798} 799 800/* 801 * Debugging function to check that the dirhash information about 802 * a directory block matches its actual contents. Panics if a mismatch 803 * is detected. 804 * 805 * On entry, `buf' should point to the start of an in-core 806 * DIRBLKSIZ-sized directory block, and `offset' should contain the 807 * offset from the start of the directory of that block. 808 */ 809void 810ufsdirhash_checkblock(struct inode *ip, char *buf, doff_t offset) 811{ 812 struct dirhash *dh; 813 struct direct *dp; 814 int block, ffslot, i, nfree; 815 816 if (!ufs_dirhashcheck) 817 return; 818 if ((dh = ip->i_dirhash) == NULL) 819 return; 820 DIRHASH_LOCK(dh); 821 if (dh->dh_hash == NULL) { 822 DIRHASH_UNLOCK(dh); 823 ufsdirhash_free(ip); 824 return; 825 } 826 827 block = offset / DIRBLKSIZ; 828 if ((offset & (DIRBLKSIZ - 1)) != 0 || block >= dh->dh_dirblks) 829 panic("ufsdirhash_checkblock: bad offset"); 830 831 nfree = 0; 832 for (i = 0; i < DIRBLKSIZ; i += dp->d_reclen) { 833 dp = (struct direct *)(buf + i); 834 if (dp->d_reclen == 0 || i + dp->d_reclen > DIRBLKSIZ) 835 panic("ufsdirhash_checkblock: bad dir"); 836 837 if (dp->d_ino == 0) { 838#if 0 839 /* 840 * XXX entries with d_ino == 0 should only occur 841 * at the start of a DIRBLKSIZ block. However the 842 * ufs code is tolerant of such entries at other 843 * offsets, and fsck does not fix them. 844 */ 845 if (i != 0) 846 panic("ufsdirhash_checkblock: bad dir inode"); 847#endif 848 nfree += dp->d_reclen; 849 continue; 850 } 851 852 /* Check that the entry exists (will panic if it doesn't). */ 853 ufsdirhash_findslot(dh, dp->d_name, dp->d_namlen, offset + i); 854 855 nfree += dp->d_reclen - DIRSIZ(0, dp); 856 } 857 if (i != DIRBLKSIZ) 858 panic("ufsdirhash_checkblock: bad dir end"); 859 860 if (dh->dh_blkfree[block] * DIRALIGN != nfree) 861 panic("ufsdirhash_checkblock: bad free count"); 862 863 ffslot = BLKFREE2IDX(nfree / DIRALIGN); 864 for (i = 0; i <= DH_NFSTATS; i++) 865 if (dh->dh_firstfree[i] == block && i != ffslot) 866 panic("ufsdirhash_checkblock: bad first-free"); 867 if (dh->dh_firstfree[ffslot] == -1) 868 panic("ufsdirhash_checkblock: missing first-free entry"); 869 DIRHASH_UNLOCK(dh); 870} 871 872/* 873 * Hash the specified filename into a dirhash slot. 874 */ 875static int 876ufsdirhash_hash(struct dirhash *dh, char *name, int namelen) 877{ 878 u_int32_t hash; 879 880 /* 881 * We hash the name and then some other bit of data that is 882 * invariant over the dirhash's lifetime. Otherwise names 883 * differing only in the last byte are placed close to one 884 * another in the table, which is bad for linear probing. 885 */ 886 hash = fnv_32_buf(name, namelen, FNV1_32_INIT); 887 hash = fnv_32_buf(&dh, sizeof(dh), hash); 888 return (hash % dh->dh_hlen); 889} 890 891/* 892 * Adjust the number of free bytes in the block containing `offset' 893 * by the value specified by `diff'. 894 * 895 * The caller must ensure we have exclusive access to `dh'; normally 896 * that means that dh_mtx should be held, but this is also called 897 * from ufsdirhash_build() where exclusive access can be assumed. 898 */ 899static void 900ufsdirhash_adjfree(struct dirhash *dh, doff_t offset, int diff) 901{ 902 int block, i, nfidx, ofidx; 903 904 /* Update the per-block summary info. */ 905 block = offset / DIRBLKSIZ; 906 KASSERT(block < dh->dh_nblk && block < dh->dh_dirblks, 907 ("dirhash bad offset")); 908 ofidx = BLKFREE2IDX(dh->dh_blkfree[block]); 909 dh->dh_blkfree[block] = (int)dh->dh_blkfree[block] + (diff / DIRALIGN); 910 nfidx = BLKFREE2IDX(dh->dh_blkfree[block]); 911 912 /* Update the `first free' list if necessary. */ 913 if (ofidx != nfidx) { 914 /* If removing, scan forward for the next block. */ 915 if (dh->dh_firstfree[ofidx] == block) { 916 for (i = block + 1; i < dh->dh_dirblks; i++) 917 if (BLKFREE2IDX(dh->dh_blkfree[i]) == ofidx) 918 break; 919 dh->dh_firstfree[ofidx] = (i < dh->dh_dirblks) ? i : -1; 920 } 921 922 /* Make this the new `first free' if necessary */ 923 if (dh->dh_firstfree[nfidx] > block || 924 dh->dh_firstfree[nfidx] == -1) 925 dh->dh_firstfree[nfidx] = block; 926 } 927} 928 929/* 930 * Find the specified name which should have the specified offset. 931 * Returns a slot number, and panics on failure. 932 * 933 * `dh' must be locked on entry and remains so on return. 934 */ 935static int 936ufsdirhash_findslot(struct dirhash *dh, char *name, int namelen, doff_t offset) 937{ 938 int slot; 939 940 mtx_assert(&dh->dh_mtx, MA_OWNED); 941 942 /* Find the entry. */ 943 KASSERT(dh->dh_hused < dh->dh_hlen, ("dirhash find full")); 944 slot = ufsdirhash_hash(dh, name, namelen); 945 while (DH_ENTRY(dh, slot) != offset && 946 DH_ENTRY(dh, slot) != DIRHASH_EMPTY) 947 slot = WRAPINCR(slot, dh->dh_hlen); 948 if (DH_ENTRY(dh, slot) != offset) 949 panic("ufsdirhash_findslot: '%.*s' not found", namelen, name); 950 951 return (slot); 952} 953 954/* 955 * Remove the entry corresponding to the specified slot from the hash array. 956 * 957 * `dh' must be locked on entry and remains so on return. 958 */ 959static void 960ufsdirhash_delslot(struct dirhash *dh, int slot) 961{ 962 int i; 963 964 mtx_assert(&dh->dh_mtx, MA_OWNED); 965 966 /* Mark the entry as deleted. */ 967 DH_ENTRY(dh, slot) = DIRHASH_DEL; 968 969 /* If this is the end of a chain of DIRHASH_DEL slots, remove them. */ 970 for (i = slot; DH_ENTRY(dh, i) == DIRHASH_DEL; ) 971 i = WRAPINCR(i, dh->dh_hlen); 972 if (DH_ENTRY(dh, i) == DIRHASH_EMPTY) { 973 i = WRAPDECR(i, dh->dh_hlen); 974 while (DH_ENTRY(dh, i) == DIRHASH_DEL) { 975 DH_ENTRY(dh, i) = DIRHASH_EMPTY; 976 dh->dh_hused--; 977 i = WRAPDECR(i, dh->dh_hlen); 978 } 979 KASSERT(dh->dh_hused >= 0, ("ufsdirhash_delslot neg hlen")); 980 } 981} 982 983/* 984 * Given a directory entry and its offset, find the offset of the 985 * previous entry in the same DIRBLKSIZ-sized block. Returns an 986 * offset, or -1 if there is no previous entry in the block or some 987 * other problem occurred. 988 */ 989static doff_t 990ufsdirhash_getprev(struct direct *dirp, doff_t offset) 991{ 992 struct direct *dp; 993 char *blkbuf; 994 doff_t blkoff, prevoff; 995 int entrypos, i; 996 997 blkoff = offset & ~(DIRBLKSIZ - 1); /* offset of start of block */ 998 entrypos = offset & (DIRBLKSIZ - 1); /* entry relative to block */ 999 blkbuf = (char *)dirp - entrypos; 1000 prevoff = blkoff; 1001 1002 /* If `offset' is the start of a block, there is no previous entry. */ 1003 if (entrypos == 0) 1004 return (-1); 1005 1006 /* Scan from the start of the block until we get to the entry. */ 1007 for (i = 0; i < entrypos; i += dp->d_reclen) { 1008 dp = (struct direct *)(blkbuf + i); 1009 if (dp->d_reclen == 0 || i + dp->d_reclen > entrypos) 1010 return (-1); /* Corrupted directory. */ 1011 prevoff = blkoff + i; 1012 } 1013 return (prevoff); 1014} 1015 1016/* 1017 * Try to free up `wanted' bytes by stealing memory from existing 1018 * dirhashes. Returns zero with list locked if successful. 1019 */ 1020static int 1021ufsdirhash_recycle(int wanted) 1022{ 1023 struct dirhash *dh; 1024 doff_t **hash; 1025 u_int8_t *blkfree; 1026 int i, mem, narrays; 1027 1028 DIRHASHLIST_LOCK(); 1029 while (wanted + ufs_dirhashmem > ufs_dirhashmaxmem) { 1030 /* Find a dirhash, and lock it. */ 1031 if ((dh = TAILQ_FIRST(&ufsdirhash_list)) == NULL) { 1032 DIRHASHLIST_UNLOCK(); 1033 return (-1); 1034 } 1035 DIRHASH_LOCK(dh); 1036 KASSERT(dh->dh_hash != NULL, ("dirhash: NULL hash on list")); 1037 1038 /* Decrement the score; only recycle if it becomes zero. */ 1039 if (--dh->dh_score > 0) { 1040 DIRHASH_UNLOCK(dh); 1041 DIRHASHLIST_UNLOCK(); 1042 return (-1); 1043 } 1044 1045 /* Remove it from the list and detach its memory. */ 1046 TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list); 1047 dh->dh_onlist = 0; 1048 hash = dh->dh_hash; 1049 dh->dh_hash = NULL; 1050 blkfree = dh->dh_blkfree; 1051 dh->dh_blkfree = NULL; 1052 narrays = dh->dh_narrays; 1053 mem = narrays * sizeof(*dh->dh_hash) + 1054 narrays * DH_NBLKOFF * sizeof(**dh->dh_hash) + 1055 dh->dh_nblk * sizeof(*dh->dh_blkfree); 1056 1057 /* Unlock everything, free the detached memory. */ 1058 DIRHASH_UNLOCK(dh); 1059 DIRHASHLIST_UNLOCK(); 1060 for (i = 0; i < narrays; i++) 1061 DIRHASH_BLKFREE(hash[i]); 1062 FREE(hash, M_DIRHASH); 1063 FREE(blkfree, M_DIRHASH); 1064 1065 /* Account for the returned memory, and repeat if necessary. */ 1066 DIRHASHLIST_LOCK(); 1067 ufs_dirhashmem -= mem; 1068 } 1069 /* Success; return with list locked. */ 1070 return (0); 1071} 1072 1073 1074void 1075ufsdirhash_init() 1076{ 1077 ufsdirhash_zone = uma_zcreate("DIRHASH", DH_NBLKOFF * sizeof(doff_t), 1078 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 1079 mtx_init(&ufsdirhash_mtx, "dirhash list", NULL, MTX_DEF); 1080 TAILQ_INIT(&ufsdirhash_list); 1081} 1082 1083void 1084ufsdirhash_uninit() 1085{ 1086 KASSERT(TAILQ_EMPTY(&ufsdirhash_list), ("ufsdirhash_uninit")); 1087 uma_zdestroy(ufsdirhash_zone); 1088 mtx_destroy(&ufsdirhash_mtx); 1089} 1090 1091#endif /* UFS_DIRHASH */ 1092