1/* $OpenBSD: ufs_dirhash.c,v 1.43 2024/01/09 03:15:59 guenther Exp $ */ 2/* 3 * Copyright (c) 2001, 2002 Ian Dowse. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27/* 28 * This implements a hash-based lookup scheme for UFS directories. 29 */ 30 31#include <sys/param.h> 32#include <sys/systm.h> 33#include <sys/lock.h> 34#include <sys/malloc.h> 35#include <sys/pool.h> 36#include <sys/buf.h> 37#include <sys/vnode.h> 38#include <sys/mount.h> 39#include <sys/sysctl.h> 40#include <sys/mutex.h> 41 42#include <crypto/siphash.h> 43 44#include <ufs/ufs/quota.h> 45#include <ufs/ufs/inode.h> 46#include <ufs/ufs/dir.h> 47#include <ufs/ufs/dirhash.h> 48#include <ufs/ufs/ufsmount.h> 49#include <ufs/ufs/ufs_extern.h> 50 51#define WRAPINCR(val, limit) (((val) + 1 == (limit)) ? 0 : ((val) + 1)) 52#define WRAPDECR(val, limit) (((val) == 0) ? ((limit) - 1) : ((val) - 1)) 53#define BLKFREE2IDX(n) ((n) > DH_NFSTATS ? DH_NFSTATS : (n)) 54 55int ufs_mindirhashsize; 56int ufs_dirhashmaxmem; 57int ufs_dirhashmem; 58int ufs_dirhashcheck; 59 60SIPHASH_KEY ufsdirhash_key; 61 62int ufsdirhash_hash(struct dirhash *dh, char *name, int namelen); 63void ufsdirhash_adjfree(struct dirhash *dh, doff_t offset, int diff); 64void ufsdirhash_delslot(struct dirhash *dh, int slot); 65int ufsdirhash_findslot(struct dirhash *dh, char *name, int namelen, 66 doff_t offset); 67doff_t ufsdirhash_getprev(struct direct *dp, doff_t offset); 68int ufsdirhash_recycle(int wanted); 69 70struct pool ufsdirhash_pool; 71 72#define DIRHASHLIST_LOCK() rw_enter_write(&ufsdirhash_mtx) 73#define DIRHASHLIST_UNLOCK() rw_exit_write(&ufsdirhash_mtx) 74#define DIRHASH_LOCK(dh) rw_enter_write(&(dh)->dh_mtx) 75#define DIRHASH_UNLOCK(dh) rw_exit_write(&(dh)->dh_mtx) 76#define DIRHASH_BLKALLOC_WAITOK() pool_get(&ufsdirhash_pool, PR_WAITOK) 77#define DIRHASH_BLKFREE(v) pool_put(&ufsdirhash_pool, v) 78 79#define mtx_assert(l, f) /* nothing */ 80#define DIRHASH_ASSERT(e, m) KASSERT((e)) 81 82/* Dirhash list; recently-used entries are near the tail. */ 83TAILQ_HEAD(, dirhash) ufsdirhash_list; 84 85/* Protects: ufsdirhash_list, `dh_list' field, ufs_dirhashmem. */ 86struct rwlock ufsdirhash_mtx; 87 88/* 89 * Locking order: 90 * ufsdirhash_mtx 91 * dh_mtx 92 * 93 * The dh_mtx mutex should be acquired either via the inode lock, or via 94 * ufsdirhash_mtx. Only the owner of the inode may free the associated 95 * dirhash, but anything can steal its memory and set dh_hash to NULL. 96 */ 97 98/* 99 * Attempt to build up a hash table for the directory contents in 100 * inode 'ip'. Returns 0 on success, or -1 of the operation failed. 101 */ 102int 103ufsdirhash_build(struct inode *ip) 104{ 105 struct dirhash *dh; 106 struct buf *bp = NULL; 107 struct direct *ep; 108 struct vnode *vp; 109 doff_t bmask, pos; 110 int dirblocks, i, j, memreqd, nblocks, narrays, nslots, slot; 111 112 /* Check if we can/should use dirhash. */ 113 if (ip->i_dirhash == NULL) { 114 if (DIP(ip, size) < ufs_mindirhashsize) 115 return (-1); 116 } else { 117 /* Hash exists, but sysctls could have changed. */ 118 if (DIP(ip, size) < ufs_mindirhashsize || 119 ufs_dirhashmem > ufs_dirhashmaxmem) { 120 ufsdirhash_free(ip); 121 return (-1); 122 } 123 /* Check if hash exists and is intact (note: unlocked read). */ 124 if (ip->i_dirhash->dh_hash != NULL) 125 return (0); 126 /* Free the old, recycled hash and build a new one. */ 127 ufsdirhash_free(ip); 128 } 129 130 /* Don't hash removed directories. */ 131 if (ip->i_effnlink == 0) 132 return (-1); 133 134 vp = ip->i_vnode; 135 /* Allocate 50% more entries than this dir size could ever need. */ 136 DIRHASH_ASSERT(DIP(ip, size) >= DIRBLKSIZ, ("ufsdirhash_build size")); 137 nslots = DIP(ip, size) / DIRECTSIZ(1); 138 nslots = (nslots * 3 + 1) / 2; 139 narrays = howmany(nslots, DH_NBLKOFF); 140 nslots = narrays * DH_NBLKOFF; 141 dirblocks = howmany(DIP(ip, size), DIRBLKSIZ); 142 nblocks = (dirblocks * 3 + 1) / 2; 143 144 memreqd = sizeof(*dh) + narrays * sizeof(*dh->dh_hash) + 145 narrays * DH_NBLKOFF * sizeof(**dh->dh_hash) + 146 nblocks * sizeof(*dh->dh_blkfree); 147 DIRHASHLIST_LOCK(); 148 if (memreqd + ufs_dirhashmem > ufs_dirhashmaxmem) { 149 DIRHASHLIST_UNLOCK(); 150 if (memreqd > ufs_dirhashmaxmem / 2) 151 return (-1); 152 153 /* Try to free some space. */ 154 if (ufsdirhash_recycle(memreqd) != 0) 155 return (-1); 156 /* Enough was freed, and list has been locked. */ 157 } 158 ufs_dirhashmem += memreqd; 159 DIRHASHLIST_UNLOCK(); 160 161 /* 162 * Use non-blocking mallocs so that we will revert to a linear 163 * lookup on failure rather than potentially blocking forever. 164 */ 165 dh = malloc(sizeof(*dh), M_DIRHASH, M_NOWAIT|M_ZERO); 166 if (dh == NULL) { 167 DIRHASHLIST_LOCK(); 168 ufs_dirhashmem -= memreqd; 169 DIRHASHLIST_UNLOCK(); 170 return (-1); 171 } 172 dh->dh_hash = mallocarray(narrays, sizeof(dh->dh_hash[0]), 173 M_DIRHASH, M_NOWAIT|M_ZERO); 174 dh->dh_blkfree = mallocarray(nblocks, sizeof(dh->dh_blkfree[0]), 175 M_DIRHASH, M_NOWAIT | M_ZERO); 176 if (dh->dh_hash == NULL || dh->dh_blkfree == NULL) 177 goto fail; 178 for (i = 0; i < narrays; i++) { 179 if ((dh->dh_hash[i] = DIRHASH_BLKALLOC_WAITOK()) == NULL) 180 goto fail; 181 for (j = 0; j < DH_NBLKOFF; j++) 182 dh->dh_hash[i][j] = DIRHASH_EMPTY; 183 } 184 185 /* Initialise the hash table and block statistics. */ 186 rw_init(&dh->dh_mtx, "dirhash"); 187 dh->dh_narrays = narrays; 188 dh->dh_hlen = nslots; 189 dh->dh_nblk = nblocks; 190 dh->dh_dirblks = dirblocks; 191 for (i = 0; i < dirblocks; i++) 192 dh->dh_blkfree[i] = DIRBLKSIZ / DIRALIGN; 193 for (i = 0; i < DH_NFSTATS; i++) 194 dh->dh_firstfree[i] = -1; 195 dh->dh_firstfree[DH_NFSTATS] = 0; 196 dh->dh_seqopt = 0; 197 dh->dh_seqoff = 0; 198 dh->dh_score = DH_SCOREINIT; 199 ip->i_dirhash = dh; 200 201 bmask = VFSTOUFS(vp->v_mount)->um_mountp->mnt_stat.f_iosize - 1; 202 pos = 0; 203 while (pos < DIP(ip, size)) { 204 /* If necessary, get the next directory block. */ 205 if ((pos & bmask) == 0) { 206 if (bp != NULL) 207 brelse(bp); 208 if (UFS_BUFATOFF(ip, (off_t)pos, NULL, &bp) != 0) 209 goto fail; 210 } 211 /* Add this entry to the hash. */ 212 ep = (struct direct *)((char *)bp->b_data + (pos & bmask)); 213 if (ep->d_reclen == 0 || ep->d_reclen > 214 DIRBLKSIZ - (pos & (DIRBLKSIZ - 1))) { 215 /* Corrupted directory. */ 216 brelse(bp); 217 goto fail; 218 } 219 if (ep->d_ino != 0) { 220 /* Add the entry (simplified ufsdirhash_add). */ 221 slot = ufsdirhash_hash(dh, ep->d_name, ep->d_namlen); 222 while (DH_ENTRY(dh, slot) != DIRHASH_EMPTY) 223 slot = WRAPINCR(slot, dh->dh_hlen); 224 dh->dh_hused++; 225 DH_ENTRY(dh, slot) = pos; 226 ufsdirhash_adjfree(dh, pos, -DIRSIZ(ep)); 227 } 228 pos += ep->d_reclen; 229 } 230 231 if (bp != NULL) 232 brelse(bp); 233 DIRHASHLIST_LOCK(); 234 TAILQ_INSERT_TAIL(&ufsdirhash_list, dh, dh_list); 235 dh->dh_onlist = 1; 236 DIRHASHLIST_UNLOCK(); 237 return (0); 238 239fail: 240 if (dh->dh_hash != NULL) { 241 for (i = 0; i < narrays; i++) 242 if (dh->dh_hash[i] != NULL) 243 DIRHASH_BLKFREE(dh->dh_hash[i]); 244 free(dh->dh_hash, M_DIRHASH, 245 narrays * sizeof(dh->dh_hash[0])); 246 } 247 if (dh->dh_blkfree != NULL) 248 free(dh->dh_blkfree, M_DIRHASH, 249 nblocks * sizeof(dh->dh_blkfree[0])); 250 free(dh, M_DIRHASH, sizeof(*dh)); 251 ip->i_dirhash = NULL; 252 DIRHASHLIST_LOCK(); 253 ufs_dirhashmem -= memreqd; 254 DIRHASHLIST_UNLOCK(); 255 return (-1); 256} 257 258/* 259 * Free any hash table associated with inode 'ip'. 260 */ 261void 262ufsdirhash_free(struct inode *ip) 263{ 264 struct dirhash *dh; 265 int i, mem; 266 267 if ((dh = ip->i_dirhash) == NULL) 268 return; 269 DIRHASHLIST_LOCK(); 270 DIRHASH_LOCK(dh); 271 if (dh->dh_onlist) 272 TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list); 273 DIRHASH_UNLOCK(dh); 274 DIRHASHLIST_UNLOCK(); 275 276 /* The dirhash pointed to by 'dh' is exclusively ours now. */ 277 278 mem = sizeof(*dh); 279 if (dh->dh_hash != NULL) { 280 for (i = 0; i < dh->dh_narrays; i++) 281 DIRHASH_BLKFREE(dh->dh_hash[i]); 282 free(dh->dh_hash, M_DIRHASH, 283 dh->dh_narrays * sizeof(dh->dh_hash[0])); 284 free(dh->dh_blkfree, M_DIRHASH, 285 dh->dh_nblk * sizeof(dh->dh_blkfree[0])); 286 mem += dh->dh_narrays * sizeof(*dh->dh_hash) + 287 dh->dh_narrays * DH_NBLKOFF * sizeof(**dh->dh_hash) + 288 dh->dh_nblk * sizeof(*dh->dh_blkfree); 289 } 290 free(dh, M_DIRHASH, sizeof(*dh)); 291 ip->i_dirhash = NULL; 292 293 DIRHASHLIST_LOCK(); 294 ufs_dirhashmem -= mem; 295 DIRHASHLIST_UNLOCK(); 296} 297 298/* 299 * Find the offset of the specified name within the given inode. 300 * Returns 0 on success, ENOENT if the entry does not exist, or 301 * EJUSTRETURN if the caller should revert to a linear search. 302 * 303 * If successful, the directory offset is stored in *offp, and a 304 * pointer to a struct buf containing the entry is stored in *bpp. If 305 * prevoffp is non-NULL, the offset of the previous entry within 306 * the DIRBLKSIZ-sized block is stored in *prevoffp (if the entry 307 * is the first in a block, the start of the block is used). 308 */ 309int 310ufsdirhash_lookup(struct inode *ip, char *name, int namelen, doff_t *offp, 311 struct buf **bpp, doff_t *prevoffp) 312{ 313 struct dirhash *dh, *dh_next; 314 struct direct *dp; 315 struct vnode *vp; 316 struct buf *bp; 317 doff_t blkoff, bmask, offset, prevoff; 318 int i, slot; 319 320 if ((dh = ip->i_dirhash) == NULL) 321 return (EJUSTRETURN); 322 /* 323 * Move this dirhash towards the end of the list if it has a 324 * score higher than the next entry, and acquire the dh_mtx. 325 * Optimise the case where it's already the last by performing 326 * an unlocked read of the TAILQ_NEXT pointer. 327 * 328 * In both cases, end up holding just dh_mtx. 329 */ 330 if (TAILQ_NEXT(dh, dh_list) != NULL) { 331 DIRHASHLIST_LOCK(); 332 DIRHASH_LOCK(dh); 333 /* 334 * If the new score will be greater than that of the next 335 * entry, then move this entry past it. With both mutexes 336 * held, dh_next won't go away, but its dh_score could 337 * change; that's not important since it is just a hint. 338 */ 339 if (dh->dh_hash != NULL && 340 (dh_next = TAILQ_NEXT(dh, dh_list)) != NULL && 341 dh->dh_score >= dh_next->dh_score) { 342 DIRHASH_ASSERT(dh->dh_onlist, ("dirhash: not on list")); 343 TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list); 344 TAILQ_INSERT_AFTER(&ufsdirhash_list, dh_next, dh, 345 dh_list); 346 } 347 DIRHASHLIST_UNLOCK(); 348 } else { 349 /* Already the last, though that could change as we wait. */ 350 DIRHASH_LOCK(dh); 351 } 352 if (dh->dh_hash == NULL) { 353 DIRHASH_UNLOCK(dh); 354 ufsdirhash_free(ip); 355 return (EJUSTRETURN); 356 } 357 358 /* Update the score. */ 359 if (dh->dh_score < DH_SCOREMAX) 360 dh->dh_score++; 361 362 vp = ip->i_vnode; 363 bmask = VFSTOUFS(vp->v_mount)->um_mountp->mnt_stat.f_iosize - 1; 364 blkoff = -1; 365 bp = NULL; 366restart: 367 slot = ufsdirhash_hash(dh, name, namelen); 368 369 if (dh->dh_seqopt) { 370 /* 371 * Sequential access optimisation. dh_seqoff contains the 372 * offset of the directory entry immediately following 373 * the last entry that was looked up. Check if this offset 374 * appears in the hash chain for the name we are looking for. 375 */ 376 for (i = slot; (offset = DH_ENTRY(dh, i)) != DIRHASH_EMPTY; 377 i = WRAPINCR(i, dh->dh_hlen)) 378 if (offset == dh->dh_seqoff) 379 break; 380 if (offset == dh->dh_seqoff) { 381 /* 382 * We found an entry with the expected offset. This 383 * is probably the entry we want, but if not, the 384 * code below will turn off seqopt and retry. 385 */ 386 slot = i; 387 } else 388 dh->dh_seqopt = 0; 389 } 390 391 for (; (offset = DH_ENTRY(dh, slot)) != DIRHASH_EMPTY; 392 slot = WRAPINCR(slot, dh->dh_hlen)) { 393 if (offset == DIRHASH_DEL) 394 continue; 395 DIRHASH_UNLOCK(dh); 396 397 if (offset < 0 || offset >= DIP(ip, size)) 398 panic("ufsdirhash_lookup: bad offset in hash array"); 399 if ((offset & ~bmask) != blkoff) { 400 if (bp != NULL) 401 brelse(bp); 402 blkoff = offset & ~bmask; 403 if (UFS_BUFATOFF(ip, (off_t)blkoff, NULL, &bp) != 0) 404 return (EJUSTRETURN); 405 } 406 dp = (struct direct *)(bp->b_data + (offset & bmask)); 407 if (dp->d_reclen == 0 || dp->d_reclen > 408 DIRBLKSIZ - (offset & (DIRBLKSIZ - 1))) { 409 /* Corrupted directory. */ 410 brelse(bp); 411 return (EJUSTRETURN); 412 } 413 if (dp->d_namlen == namelen && 414 memcmp(dp->d_name, name, namelen) == 0) { 415 /* Found. Get the prev offset if needed. */ 416 if (prevoffp != NULL) { 417 if (offset & (DIRBLKSIZ - 1)) { 418 prevoff = ufsdirhash_getprev(dp, 419 offset); 420 if (prevoff == -1) { 421 brelse(bp); 422 return (EJUSTRETURN); 423 } 424 } else 425 prevoff = offset; 426 *prevoffp = prevoff; 427 } 428 429 /* Check for sequential access, and update offset. */ 430 if (dh->dh_seqopt == 0 && dh->dh_seqoff == offset) 431 dh->dh_seqopt = 1; 432 dh->dh_seqoff = offset + DIRSIZ(dp); 433 434 *bpp = bp; 435 *offp = offset; 436 return (0); 437 } 438 439 DIRHASH_LOCK(dh); 440 if (dh->dh_hash == NULL) { 441 DIRHASH_UNLOCK(dh); 442 if (bp != NULL) 443 brelse(bp); 444 ufsdirhash_free(ip); 445 return (EJUSTRETURN); 446 } 447 /* 448 * When the name doesn't match in the seqopt case, go back 449 * and search normally. 450 */ 451 if (dh->dh_seqopt) { 452 dh->dh_seqopt = 0; 453 goto restart; 454 } 455 } 456 DIRHASH_UNLOCK(dh); 457 if (bp != NULL) 458 brelse(bp); 459 return (ENOENT); 460} 461 462/* 463 * Find a directory block with room for 'slotneeded' bytes. Returns 464 * the offset of the directory entry that begins the free space. 465 * This will either be the offset of an existing entry that has free 466 * space at the end, or the offset of an entry with d_ino == 0 at 467 * the start of a DIRBLKSIZ block. 468 * 469 * To use the space, the caller may need to compact existing entries in 470 * the directory. The total number of bytes in all of the entries involved 471 * in the compaction is stored in *slotsize. In other words, all of 472 * the entries that must be compacted are exactly contained in the 473 * region beginning at the returned offset and spanning *slotsize bytes. 474 * 475 * Returns -1 if no space was found, indicating that the directory 476 * must be extended. 477 */ 478doff_t 479ufsdirhash_findfree(struct inode *ip, int slotneeded, int *slotsize) 480{ 481 struct direct *dp; 482 struct dirhash *dh; 483 struct buf *bp; 484 doff_t pos, slotstart; 485 int dirblock, error, freebytes, i; 486 487 if ((dh = ip->i_dirhash) == NULL) 488 return (-1); 489 DIRHASH_LOCK(dh); 490 if (dh->dh_hash == NULL) { 491 DIRHASH_UNLOCK(dh); 492 ufsdirhash_free(ip); 493 return (-1); 494 } 495 496 /* Find a directory block with the desired free space. */ 497 dirblock = -1; 498 for (i = howmany(slotneeded, DIRALIGN); i <= DH_NFSTATS; i++) 499 if ((dirblock = dh->dh_firstfree[i]) != -1) 500 break; 501 if (dirblock == -1) { 502 DIRHASH_UNLOCK(dh); 503 return (-1); 504 } 505 506 DIRHASH_ASSERT(dirblock < dh->dh_nblk && 507 dh->dh_blkfree[dirblock] >= howmany(slotneeded, DIRALIGN), 508 ("ufsdirhash_findfree: bad stats")); 509 DIRHASH_UNLOCK(dh); 510 pos = dirblock * DIRBLKSIZ; 511 error = UFS_BUFATOFF(ip, (off_t)pos, (char **)&dp, &bp); 512 if (error) 513 return (-1); 514 515 /* Find the first entry with free space. */ 516 for (i = 0; i < DIRBLKSIZ; ) { 517 if (dp->d_reclen == 0) { 518 brelse(bp); 519 return (-1); 520 } 521 if (dp->d_ino == 0 || dp->d_reclen > DIRSIZ(dp)) 522 break; 523 i += dp->d_reclen; 524 dp = (struct direct *)((char *)dp + dp->d_reclen); 525 } 526 if (i > DIRBLKSIZ) { 527 brelse(bp); 528 return (-1); 529 } 530 slotstart = pos + i; 531 532 /* Find the range of entries needed to get enough space */ 533 freebytes = 0; 534 while (i < DIRBLKSIZ && freebytes < slotneeded) { 535 freebytes += dp->d_reclen; 536 if (dp->d_ino != 0) 537 freebytes -= DIRSIZ(dp); 538 if (dp->d_reclen == 0) { 539 brelse(bp); 540 return (-1); 541 } 542 i += dp->d_reclen; 543 dp = (struct direct *)((char *)dp + dp->d_reclen); 544 } 545 if (i > DIRBLKSIZ) { 546 brelse(bp); 547 return (-1); 548 } 549 if (freebytes < slotneeded) 550 panic("ufsdirhash_findfree: free mismatch"); 551 brelse(bp); 552 *slotsize = pos + i - slotstart; 553 return (slotstart); 554} 555 556/* 557 * Return the start of the unused space at the end of a directory, or 558 * -1 if there are no trailing unused blocks. 559 */ 560doff_t 561ufsdirhash_enduseful(struct inode *ip) 562{ 563 564 struct dirhash *dh; 565 int i; 566 567 if ((dh = ip->i_dirhash) == NULL) 568 return (-1); 569 DIRHASH_LOCK(dh); 570 if (dh->dh_hash == NULL) { 571 DIRHASH_UNLOCK(dh); 572 ufsdirhash_free(ip); 573 return (-1); 574 } 575 576 if (dh->dh_blkfree[dh->dh_dirblks - 1] != DIRBLKSIZ / DIRALIGN) { 577 DIRHASH_UNLOCK(dh); 578 return (-1); 579 } 580 581 for (i = dh->dh_dirblks - 1; i >= 0; i--) 582 if (dh->dh_blkfree[i] != DIRBLKSIZ / DIRALIGN) 583 break; 584 DIRHASH_UNLOCK(dh); 585 return ((doff_t)(i + 1) * DIRBLKSIZ); 586} 587 588/* 589 * Insert information into the hash about a new directory entry. dirp 590 * points to a struct direct containing the entry, and offset specifies 591 * the offset of this entry. 592 */ 593void 594ufsdirhash_add(struct inode *ip, struct direct *dirp, doff_t offset) 595{ 596 struct dirhash *dh; 597 int slot; 598 599 if ((dh = ip->i_dirhash) == NULL) 600 return; 601 DIRHASH_LOCK(dh); 602 if (dh->dh_hash == NULL) { 603 DIRHASH_UNLOCK(dh); 604 ufsdirhash_free(ip); 605 return; 606 } 607 608 DIRHASH_ASSERT(offset < dh->dh_dirblks * DIRBLKSIZ, 609 ("ufsdirhash_add: bad offset")); 610 /* 611 * Normal hash usage is < 66%. If the usage gets too high then 612 * remove the hash entirely and let it be rebuilt later. 613 */ 614 if (dh->dh_hused >= (dh->dh_hlen * 3) / 4) { 615 DIRHASH_UNLOCK(dh); 616 ufsdirhash_free(ip); 617 return; 618 } 619 620 /* Find a free hash slot (empty or deleted), and add the entry. */ 621 slot = ufsdirhash_hash(dh, dirp->d_name, dirp->d_namlen); 622 while (DH_ENTRY(dh, slot) >= 0) 623 slot = WRAPINCR(slot, dh->dh_hlen); 624 if (DH_ENTRY(dh, slot) == DIRHASH_EMPTY) 625 dh->dh_hused++; 626 DH_ENTRY(dh, slot) = offset; 627 628 /* Update the per-block summary info. */ 629 ufsdirhash_adjfree(dh, offset, -DIRSIZ(dirp)); 630 DIRHASH_UNLOCK(dh); 631} 632 633/* 634 * Remove the specified directory entry from the hash. The entry to remove 635 * is defined by the name in `dirp', which must exist at the specified 636 * `offset' within the directory. 637 */ 638void 639ufsdirhash_remove(struct inode *ip, struct direct *dirp, doff_t offset) 640{ 641 struct dirhash *dh; 642 int slot; 643 644 if ((dh = ip->i_dirhash) == NULL) 645 return; 646 DIRHASH_LOCK(dh); 647 if (dh->dh_hash == NULL) { 648 DIRHASH_UNLOCK(dh); 649 ufsdirhash_free(ip); 650 return; 651 } 652 653 DIRHASH_ASSERT(offset < dh->dh_dirblks * DIRBLKSIZ, 654 ("ufsdirhash_remove: bad offset")); 655 /* Find the entry */ 656 slot = ufsdirhash_findslot(dh, dirp->d_name, dirp->d_namlen, offset); 657 658 /* Remove the hash entry. */ 659 ufsdirhash_delslot(dh, slot); 660 661 /* Update the per-block summary info. */ 662 ufsdirhash_adjfree(dh, offset, DIRSIZ(dirp)); 663 DIRHASH_UNLOCK(dh); 664} 665 666/* 667 * Change the offset associated with a directory entry in the hash. Used 668 * when compacting directory blocks. 669 */ 670void 671ufsdirhash_move(struct inode *ip, struct direct *dirp, doff_t oldoff, 672 doff_t newoff) 673{ 674 struct dirhash *dh; 675 int slot; 676 677 if ((dh = ip->i_dirhash) == NULL) 678 return; 679 DIRHASH_LOCK(dh); 680 if (dh->dh_hash == NULL) { 681 DIRHASH_UNLOCK(dh); 682 ufsdirhash_free(ip); 683 return; 684 } 685 686 DIRHASH_ASSERT(oldoff < dh->dh_dirblks * DIRBLKSIZ && 687 newoff < dh->dh_dirblks * DIRBLKSIZ, 688 ("ufsdirhash_move: bad offset")); 689 /* Find the entry, and update the offset. */ 690 slot = ufsdirhash_findslot(dh, dirp->d_name, dirp->d_namlen, oldoff); 691 DH_ENTRY(dh, slot) = newoff; 692 DIRHASH_UNLOCK(dh); 693} 694 695/* 696 * Inform dirhash that the directory has grown by one block that 697 * begins at offset (i.e. the new length is offset + DIRBLKSIZ). 698 */ 699void 700ufsdirhash_newblk(struct inode *ip, doff_t offset) 701{ 702 struct dirhash *dh; 703 int block; 704 705 if ((dh = ip->i_dirhash) == NULL) 706 return; 707 DIRHASH_LOCK(dh); 708 if (dh->dh_hash == NULL) { 709 DIRHASH_UNLOCK(dh); 710 ufsdirhash_free(ip); 711 return; 712 } 713 714 DIRHASH_ASSERT(offset == dh->dh_dirblks * DIRBLKSIZ, 715 ("ufsdirhash_newblk: bad offset")); 716 block = offset / DIRBLKSIZ; 717 if (block >= dh->dh_nblk) { 718 /* Out of space; must rebuild. */ 719 DIRHASH_UNLOCK(dh); 720 ufsdirhash_free(ip); 721 return; 722 } 723 dh->dh_dirblks = block + 1; 724 725 /* Account for the new free block. */ 726 dh->dh_blkfree[block] = DIRBLKSIZ / DIRALIGN; 727 if (dh->dh_firstfree[DH_NFSTATS] == -1) 728 dh->dh_firstfree[DH_NFSTATS] = block; 729 DIRHASH_UNLOCK(dh); 730} 731 732/* 733 * Inform dirhash that the directory is being truncated. 734 */ 735void 736ufsdirhash_dirtrunc(struct inode *ip, doff_t offset) 737{ 738 struct dirhash *dh; 739 int block, i; 740 741 if ((dh = ip->i_dirhash) == NULL) 742 return; 743 DIRHASH_LOCK(dh); 744 if (dh->dh_hash == NULL) { 745 DIRHASH_UNLOCK(dh); 746 ufsdirhash_free(ip); 747 return; 748 } 749 750 DIRHASH_ASSERT(offset <= dh->dh_dirblks * DIRBLKSIZ, 751 ("ufsdirhash_dirtrunc: bad offset")); 752 block = howmany(offset, DIRBLKSIZ); 753 /* 754 * If the directory shrinks to less than 1/8 of dh_nblk blocks 755 * (about 20% of its original size due to the 50% extra added in 756 * ufsdirhash_build) then free it, and let the caller rebuild 757 * if necessary. 758 */ 759 if (block < dh->dh_nblk / 8 && dh->dh_narrays > 1) { 760 DIRHASH_UNLOCK(dh); 761 ufsdirhash_free(ip); 762 return; 763 } 764 765 /* 766 * Remove any `first free' information pertaining to the 767 * truncated blocks. All blocks we're removing should be 768 * completely unused. 769 */ 770 if (dh->dh_firstfree[DH_NFSTATS] >= block) 771 dh->dh_firstfree[DH_NFSTATS] = -1; 772 for (i = block; i < dh->dh_dirblks; i++) 773 if (dh->dh_blkfree[i] != DIRBLKSIZ / DIRALIGN) 774 panic("ufsdirhash_dirtrunc: blocks in use"); 775 for (i = 0; i < DH_NFSTATS; i++) 776 if (dh->dh_firstfree[i] >= block) 777 panic("ufsdirhash_dirtrunc: first free corrupt"); 778 dh->dh_dirblks = block; 779 DIRHASH_UNLOCK(dh); 780} 781 782/* 783 * Debugging function to check that the dirhash information about 784 * a directory block matches its actual contents. Panics if a mismatch 785 * is detected. 786 * 787 * On entry, `buf' should point to the start of an in-core 788 * DIRBLKSIZ-sized directory block, and `offset' should contain the 789 * offset from the start of the directory of that block. 790 */ 791void 792ufsdirhash_checkblock(struct inode *ip, char *buf, doff_t offset) 793{ 794 struct dirhash *dh; 795 struct direct *dp; 796 int block, ffslot, i, nfree; 797 798 if (!ufs_dirhashcheck) 799 return; 800 if ((dh = ip->i_dirhash) == NULL) 801 return; 802 DIRHASH_LOCK(dh); 803 if (dh->dh_hash == NULL) { 804 DIRHASH_UNLOCK(dh); 805 ufsdirhash_free(ip); 806 return; 807 } 808 809 block = offset / DIRBLKSIZ; 810 if ((offset & (DIRBLKSIZ - 1)) != 0 || block >= dh->dh_dirblks) 811 panic("ufsdirhash_checkblock: bad offset"); 812 813 nfree = 0; 814 for (i = 0; i < DIRBLKSIZ; i += dp->d_reclen) { 815 dp = (struct direct *)(buf + i); 816 if (dp->d_reclen == 0 || i + dp->d_reclen > DIRBLKSIZ) 817 panic("ufsdirhash_checkblock: bad dir"); 818 819 if (dp->d_ino == 0) { 820#if 0 821 /* 822 * XXX entries with d_ino == 0 should only occur 823 * at the start of a DIRBLKSIZ block. However the 824 * ufs code is tolerant of such entries at other 825 * offsets, and fsck does not fix them. 826 */ 827 if (i != 0) 828 panic("ufsdirhash_checkblock: bad dir inode"); 829#endif 830 nfree += dp->d_reclen; 831 continue; 832 } 833 834 /* Check that the entry exists (will panic if it doesn't). */ 835 ufsdirhash_findslot(dh, dp->d_name, dp->d_namlen, offset + i); 836 837 nfree += dp->d_reclen - DIRSIZ(dp); 838 } 839 if (i != DIRBLKSIZ) 840 panic("ufsdirhash_checkblock: bad dir end"); 841 842 if (dh->dh_blkfree[block] * DIRALIGN != nfree) 843 panic("ufsdirhash_checkblock: bad free count"); 844 845 ffslot = BLKFREE2IDX(nfree / DIRALIGN); 846 for (i = 0; i <= DH_NFSTATS; i++) 847 if (dh->dh_firstfree[i] == block && i != ffslot) 848 panic("ufsdirhash_checkblock: bad first-free"); 849 if (dh->dh_firstfree[ffslot] == -1) 850 panic("ufsdirhash_checkblock: missing first-free entry"); 851 DIRHASH_UNLOCK(dh); 852} 853 854/* 855 * Hash the specified filename into a dirhash slot. 856 */ 857int 858ufsdirhash_hash(struct dirhash *dh, char *name, int namelen) 859{ 860 return SipHash24(&ufsdirhash_key, name, namelen) % dh->dh_hlen; 861} 862 863/* 864 * Adjust the number of free bytes in the block containing `offset' 865 * by the value specified by `diff'. 866 * 867 * The caller must ensure we have exclusive access to `dh'; normally 868 * that means that dh_mtx should be held, but this is also called 869 * from ufsdirhash_build() where exclusive access can be assumed. 870 */ 871void 872ufsdirhash_adjfree(struct dirhash *dh, doff_t offset, int diff) 873{ 874 int block, i, nfidx, ofidx; 875 876 /* Update the per-block summary info. */ 877 block = offset / DIRBLKSIZ; 878 DIRHASH_ASSERT(block < dh->dh_nblk && block < dh->dh_dirblks, 879 ("dirhash bad offset")); 880 ofidx = BLKFREE2IDX(dh->dh_blkfree[block]); 881 dh->dh_blkfree[block] = (int)dh->dh_blkfree[block] + (diff / DIRALIGN); 882 nfidx = BLKFREE2IDX(dh->dh_blkfree[block]); 883 884 /* Update the `first free' list if necessary. */ 885 if (ofidx != nfidx) { 886 /* If removing, scan forward for the next block. */ 887 if (dh->dh_firstfree[ofidx] == block) { 888 for (i = block + 1; i < dh->dh_dirblks; i++) 889 if (BLKFREE2IDX(dh->dh_blkfree[i]) == ofidx) 890 break; 891 dh->dh_firstfree[ofidx] = (i < dh->dh_dirblks) ? i : -1; 892 } 893 894 /* Make this the new `first free' if necessary */ 895 if (dh->dh_firstfree[nfidx] > block || 896 dh->dh_firstfree[nfidx] == -1) 897 dh->dh_firstfree[nfidx] = block; 898 } 899} 900 901/* 902 * Find the specified name which should have the specified offset. 903 * Returns a slot number, and panics on failure. 904 * 905 * `dh' must be locked on entry and remains so on return. 906 */ 907int 908ufsdirhash_findslot(struct dirhash *dh, char *name, int namelen, doff_t offset) 909{ 910 int slot; 911 912 mtx_assert(&dh->dh_mtx, MA_OWNED); 913 914 /* Find the entry. */ 915 DIRHASH_ASSERT(dh->dh_hused < dh->dh_hlen, ("dirhash find full")); 916 slot = ufsdirhash_hash(dh, name, namelen); 917 while (DH_ENTRY(dh, slot) != offset && 918 DH_ENTRY(dh, slot) != DIRHASH_EMPTY) 919 slot = WRAPINCR(slot, dh->dh_hlen); 920 if (DH_ENTRY(dh, slot) != offset) 921 panic("ufsdirhash_findslot: '%.*s' not found", namelen, name); 922 923 return (slot); 924} 925 926/* 927 * Remove the entry corresponding to the specified slot from the hash array. 928 * 929 * `dh' must be locked on entry and remains so on return. 930 */ 931void 932ufsdirhash_delslot(struct dirhash *dh, int slot) 933{ 934 int i; 935 936 mtx_assert(&dh->dh_mtx, MA_OWNED); 937 938 /* Mark the entry as deleted. */ 939 DH_ENTRY(dh, slot) = DIRHASH_DEL; 940 941 /* If this is the end of a chain of DIRHASH_DEL slots, remove them. */ 942 for (i = slot; DH_ENTRY(dh, i) == DIRHASH_DEL; ) 943 i = WRAPINCR(i, dh->dh_hlen); 944 if (DH_ENTRY(dh, i) == DIRHASH_EMPTY) { 945 i = WRAPDECR(i, dh->dh_hlen); 946 while (DH_ENTRY(dh, i) == DIRHASH_DEL) { 947 DH_ENTRY(dh, i) = DIRHASH_EMPTY; 948 dh->dh_hused--; 949 i = WRAPDECR(i, dh->dh_hlen); 950 } 951 DIRHASH_ASSERT(dh->dh_hused >= 0, ("ufsdirhash_delslot neg hlen")); 952 } 953} 954 955/* 956 * Given a directory entry and its offset, find the offset of the 957 * previous entry in the same DIRBLKSIZ-sized block. Returns an 958 * offset, or -1 if there is no previous entry in the block or some 959 * other problem occurred. 960 */ 961doff_t 962ufsdirhash_getprev(struct direct *dirp, doff_t offset) 963{ 964 struct direct *dp; 965 char *blkbuf; 966 doff_t blkoff, prevoff; 967 int entrypos, i; 968 969 blkoff = offset & ~(DIRBLKSIZ - 1); /* offset of start of block */ 970 entrypos = offset & (DIRBLKSIZ - 1); /* entry relative to block */ 971 blkbuf = (char *)dirp - entrypos; 972 prevoff = blkoff; 973 974 /* If `offset' is the start of a block, there is no previous entry. */ 975 if (entrypos == 0) 976 return (-1); 977 978 /* Scan from the start of the block until we get to the entry. */ 979 for (i = 0; i < entrypos; i += dp->d_reclen) { 980 dp = (struct direct *)(blkbuf + i); 981 if (dp->d_reclen == 0 || i + dp->d_reclen > entrypos) 982 return (-1); /* Corrupted directory. */ 983 prevoff = blkoff + i; 984 } 985 return (prevoff); 986} 987 988/* 989 * Try to free up `wanted' bytes by stealing memory from existing 990 * dirhashes. Returns zero with list locked if successful. 991 */ 992int 993ufsdirhash_recycle(int wanted) 994{ 995 struct dirhash *dh; 996 doff_t **hash; 997 u_int8_t *blkfree; 998 int i, mem, narrays, nblk; 999 1000 DIRHASHLIST_LOCK(); 1001 while (wanted + ufs_dirhashmem > ufs_dirhashmaxmem) { 1002 /* Find a dirhash, and lock it. */ 1003 if ((dh = TAILQ_FIRST(&ufsdirhash_list)) == NULL) { 1004 DIRHASHLIST_UNLOCK(); 1005 return (-1); 1006 } 1007 DIRHASH_LOCK(dh); 1008 DIRHASH_ASSERT(dh->dh_hash != NULL, ("dirhash: NULL hash on list")); 1009 1010 /* Decrement the score; only recycle if it becomes zero. */ 1011 if (--dh->dh_score > 0) { 1012 DIRHASH_UNLOCK(dh); 1013 DIRHASHLIST_UNLOCK(); 1014 return (-1); 1015 } 1016 1017 /* Remove it from the list and detach its memory. */ 1018 TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list); 1019 dh->dh_onlist = 0; 1020 hash = dh->dh_hash; 1021 dh->dh_hash = NULL; 1022 blkfree = dh->dh_blkfree; 1023 dh->dh_blkfree = NULL; 1024 narrays = dh->dh_narrays; 1025 nblk = dh->dh_nblk; 1026 mem = narrays * sizeof(*dh->dh_hash) + 1027 narrays * DH_NBLKOFF * sizeof(**dh->dh_hash) + 1028 dh->dh_nblk * sizeof(*dh->dh_blkfree); 1029 1030 /* Unlock everything, free the detached memory. */ 1031 DIRHASH_UNLOCK(dh); 1032 DIRHASHLIST_UNLOCK(); 1033 for (i = 0; i < narrays; i++) 1034 DIRHASH_BLKFREE(hash[i]); 1035 free(hash, M_DIRHASH, narrays * sizeof(hash[0])); 1036 free(blkfree, M_DIRHASH, nblk * sizeof(blkfree[0])); 1037 1038 /* Account for the returned memory, and repeat if necessary. */ 1039 DIRHASHLIST_LOCK(); 1040 ufs_dirhashmem -= mem; 1041 } 1042 /* Success; return with list locked. */ 1043 return (0); 1044} 1045 1046 1047void 1048ufsdirhash_init(void) 1049{ 1050 pool_init(&ufsdirhash_pool, DH_NBLKOFF * sizeof(doff_t), 0, IPL_NONE, 1051 PR_WAITOK, "dirhash", NULL); 1052 rw_init(&ufsdirhash_mtx, "dirhash_list"); 1053 arc4random_buf(&ufsdirhash_key, sizeof(ufsdirhash_key)); 1054 TAILQ_INIT(&ufsdirhash_list); 1055 ufs_dirhashmaxmem = 5 * 1024 * 1024; 1056 ufs_mindirhashsize = 5 * DIRBLKSIZ; 1057} 1058 1059void 1060ufsdirhash_uninit(void) 1061{ 1062 DIRHASH_ASSERT(TAILQ_EMPTY(&ufsdirhash_list), ("ufsdirhash_uninit")); 1063 pool_destroy(&ufsdirhash_pool); 1064} 1065