ulfs_lookup.c revision 1.4
1/* $NetBSD: ulfs_lookup.c,v 1.4 2013/06/06 00:48:04 dholland Exp $ */ 2/* from NetBSD: ufs_lookup.c,v 1.122 2013/01/22 09:39:18 dholland Exp */ 3 4/* 5 * Copyright (c) 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * (c) UNIX System Laboratories, Inc. 8 * All or some portions of this file are derived from material licensed 9 * to the University of California by American Telephone and Telegraph 10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 11 * the permission of UNIX System Laboratories, Inc. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)ufs_lookup.c 8.9 (Berkeley) 8/11/94 38 */ 39 40#include <sys/cdefs.h> 41__KERNEL_RCSID(0, "$NetBSD: ulfs_lookup.c,v 1.4 2013/06/06 00:48:04 dholland Exp $"); 42 43#ifdef _KERNEL_OPT 44#include "opt_lfs.h" 45#endif 46 47#include <sys/param.h> 48#include <sys/systm.h> 49#include <sys/namei.h> 50#include <sys/buf.h> 51#include <sys/file.h> 52#include <sys/stat.h> 53#include <sys/mount.h> 54#include <sys/vnode.h> 55#include <sys/kernel.h> 56#include <sys/kauth.h> 57#include <sys/wapbl.h> 58#include <sys/fstrans.h> 59#include <sys/proc.h> 60#include <sys/kmem.h> 61 62#include <ufs/lfs/ulfs_inode.h> 63#include <ufs/lfs/ulfs_dir.h> 64#ifdef LFS_DIRHASH 65#include <ufs/lfs/ulfs_dirhash.h> 66#endif 67#include <ufs/lfs/ulfsmount.h> 68#include <ufs/lfs/ulfs_extern.h> 69#include <ufs/lfs/ulfs_bswap.h> 70#include <ufs/lfs/ulfs_wapbl.h> 71 72#include <miscfs/genfs/genfs.h> 73 74#ifdef DIAGNOSTIC 75int dirchk = 1; 76#else 77int dirchk = 0; 78#endif 79 80/* 81 * Convert a component of a pathname into a pointer to a locked inode. 82 * This is a very central and rather complicated routine. 83 * If the file system is not maintained in a strict tree hierarchy, 84 * this can result in a deadlock situation (see comments in code below). 85 * 86 * The cnp->cn_nameiop argument is LOOKUP, CREATE, RENAME, or DELETE depending 87 * on whether the name is to be looked up, created, renamed, or deleted. 88 * When CREATE, RENAME, or DELETE is specified, information usable in 89 * creating, renaming, or deleting a directory entry may be calculated. 90 * If flag has LOCKPARENT or'ed into it and the target of the pathname 91 * exists, lookup returns both the target and its parent directory locked. 92 * When creating or renaming and LOCKPARENT is specified, the target may 93 * not be ".". When deleting and LOCKPARENT is specified, the target may 94 * be "."., but the caller must check to ensure it does an vrele and vput 95 * instead of two vputs. 96 * 97 * Overall outline of ulfs_lookup: 98 * 99 * check accessibility of directory 100 * look for name in cache, if found, then if at end of path 101 * and deleting or creating, drop it, else return name 102 * search for name in directory, to found or notfound 103 * notfound: 104 * if creating, return locked directory, leaving info on available slots 105 * else return error 106 * found: 107 * if at end of path and deleting, return information to allow delete 108 * if at end of path and rewriting (RENAME and LOCKPARENT), lock target 109 * inode and return info to allow rewrite 110 * if not at end, add name to cache; if at end and neither creating 111 * nor deleting, add name to cache 112 */ 113int 114ulfs_lookup(void *v) 115{ 116 struct vop_lookup_args /* { 117 struct vnode *a_dvp; 118 struct vnode **a_vpp; 119 struct componentname *a_cnp; 120 } */ *ap = v; 121 struct vnode *vdp = ap->a_dvp; /* vnode for directory being searched */ 122 struct inode *dp = VTOI(vdp); /* inode for directory being searched */ 123 struct buf *bp; /* a buffer of directory entries */ 124 struct direct *ep; /* the current directory entry */ 125 int entryoffsetinblock; /* offset of ep in bp's buffer */ 126 enum { 127 NONE, /* need to search a slot for our new entry */ 128 COMPACT, /* a compaction can make a slot in the current 129 DIRBLKSIZ block */ 130 FOUND, /* found a slot (or no need to search) */ 131 } slotstatus; 132 doff_t slotoffset; /* offset of area with free space. 133 a special value -1 for invalid */ 134 int slotsize; /* size of area at slotoffset */ 135 int slotfreespace; /* accumulated amount of space free in 136 the current DIRBLKSIZ block */ 137 int slotneeded; /* size of the entry we're seeking */ 138 int numdirpasses; /* strategy for directory search */ 139 doff_t endsearch; /* offset to end directory search */ 140 doff_t prevoff; /* previous value of ulr_offset */ 141 struct vnode *pdp; /* saved dp during symlink work */ 142 struct vnode *tdp; /* returned by VFS_VGET */ 143 doff_t enduseful; /* pointer past last used dir slot. 144 used for directory truncation. */ 145 u_long bmask; /* block offset mask */ 146 int error; 147 struct vnode **vpp = ap->a_vpp; 148 struct componentname *cnp = ap->a_cnp; 149 kauth_cred_t cred = cnp->cn_cred; 150 int flags; 151 int nameiop = cnp->cn_nameiop; 152 struct ulfsmount *ump = dp->i_ump; 153 const int needswap = ULFS_MPNEEDSWAP(ump); 154 int dirblksiz = ump->um_dirblksiz; 155 ino_t foundino; 156 struct ulfs_lookup_results *results; 157 int iswhiteout; /* temp result from cache_lookup() */ 158 159 flags = cnp->cn_flags; 160 161 bp = NULL; 162 slotoffset = -1; 163 *vpp = NULL; 164 endsearch = 0; /* silence compiler warning */ 165 166 /* 167 * Produce the auxiliary lookup results into i_crap. Increment 168 * its serial number so elsewhere we can tell if we're using 169 * stale results. This should not be done this way. XXX. 170 */ 171 results = &dp->i_crap; 172 dp->i_crapcounter++; 173 174 /* 175 * Check accessiblity of directory. 176 */ 177 if ((error = VOP_ACCESS(vdp, VEXEC, cred)) != 0) 178 return (error); 179 180 if ((flags & ISLASTCN) && (vdp->v_mount->mnt_flag & MNT_RDONLY) && 181 (nameiop == DELETE || nameiop == RENAME)) 182 return (EROFS); 183 184 /* 185 * We now have a segment name to search for, and a directory to search. 186 * 187 * Before tediously performing a linear scan of the directory, 188 * check the name cache to see if the directory/name pair 189 * we are looking for is known already. 190 */ 191 if (cache_lookup(vdp, cnp->cn_nameptr, cnp->cn_namelen, 192 cnp->cn_nameiop, cnp->cn_flags, &iswhiteout, vpp)) { 193 if (iswhiteout) { 194 cnp->cn_flags |= ISWHITEOUT; 195 } 196 return *vpp == NULLVP ? ENOENT : 0; 197 } 198 if (iswhiteout) { 199 /* 200 * The namecache set iswhiteout without finding a 201 * cache entry. As of this writing (20121014), this 202 * can happen if there was a whiteout entry that has 203 * been invalidated by the lookup. It is not clear if 204 * it is correct to set ISWHITEOUT in this case or 205 * not; however, doing so retains the prior behavior, 206 * so we'll go with that until some clearer answer 207 * appears. XXX 208 */ 209 cnp->cn_flags |= ISWHITEOUT; 210 } 211 212 fstrans_start(vdp->v_mount, FSTRANS_SHARED); 213 214 /* 215 * Suppress search for slots unless creating 216 * file and at end of pathname, in which case 217 * we watch for a place to put the new file in 218 * case it doesn't already exist. 219 */ 220 slotstatus = FOUND; 221 slotfreespace = slotsize = slotneeded = 0; 222 if ((nameiop == CREATE || nameiop == RENAME) && (flags & ISLASTCN)) { 223 slotstatus = NONE; 224 slotneeded = DIRECTSIZ(cnp->cn_namelen); 225 } 226 227 /* 228 * If there is cached information on a previous search of 229 * this directory, pick up where we last left off. 230 * We cache only lookups as these are the most common 231 * and have the greatest payoff. Caching CREATE has little 232 * benefit as it usually must search the entire directory 233 * to determine that the entry does not exist. Caching the 234 * location of the last DELETE or RENAME has not reduced 235 * profiling time and hence has been removed in the interest 236 * of simplicity. 237 */ 238 bmask = vdp->v_mount->mnt_stat.f_iosize - 1; 239 240#ifdef LFS_DIRHASH 241 /* 242 * Use dirhash for fast operations on large directories. The logic 243 * to determine whether to hash the directory is contained within 244 * ulfsdirhash_build(); a zero return means that it decided to hash 245 * this directory and it successfully built up the hash table. 246 */ 247 if (ulfsdirhash_build(dp) == 0) { 248 /* Look for a free slot if needed. */ 249 enduseful = dp->i_size; 250 if (slotstatus != FOUND) { 251 slotoffset = ulfsdirhash_findfree(dp, slotneeded, 252 &slotsize); 253 if (slotoffset >= 0) { 254 slotstatus = COMPACT; 255 enduseful = ulfsdirhash_enduseful(dp); 256 if (enduseful < 0) 257 enduseful = dp->i_size; 258 } 259 } 260 /* Look up the component. */ 261 numdirpasses = 1; 262 entryoffsetinblock = 0; /* silence compiler warning */ 263 switch (ulfsdirhash_lookup(dp, cnp->cn_nameptr, cnp->cn_namelen, 264 &results->ulr_offset, &bp, nameiop == DELETE ? &prevoff : NULL)) { 265 case 0: 266 ep = (struct direct *)((char *)bp->b_data + 267 (results->ulr_offset & bmask)); 268 goto foundentry; 269 case ENOENT: 270 results->ulr_offset = roundup(dp->i_size, dirblksiz); 271 goto notfound; 272 default: 273 /* Something failed; just do a linear search. */ 274 break; 275 } 276 } 277#endif /* LFS_DIRHASH */ 278 279 if (nameiop != LOOKUP || results->ulr_diroff == 0 || 280 results->ulr_diroff >= dp->i_size) { 281 entryoffsetinblock = 0; 282 results->ulr_offset = 0; 283 numdirpasses = 1; 284 } else { 285 results->ulr_offset = results->ulr_diroff; 286 if ((entryoffsetinblock = results->ulr_offset & bmask) && 287 (error = ulfs_blkatoff(vdp, (off_t)results->ulr_offset, 288 NULL, &bp, false))) 289 goto out; 290 numdirpasses = 2; 291 nchstats.ncs_2passes++; 292 } 293 prevoff = results->ulr_offset; 294 endsearch = roundup(dp->i_size, dirblksiz); 295 enduseful = 0; 296 297searchloop: 298 while (results->ulr_offset < endsearch) { 299 if (curcpu()->ci_schedstate.spc_flags & SPCF_SHOULDYIELD) 300 preempt(); 301 /* 302 * If necessary, get the next directory block. 303 */ 304 if ((results->ulr_offset & bmask) == 0) { 305 if (bp != NULL) 306 brelse(bp, 0); 307 error = ulfs_blkatoff(vdp, (off_t)results->ulr_offset, 308 NULL, &bp, false); 309 if (error) 310 goto out; 311 entryoffsetinblock = 0; 312 } 313 /* 314 * If still looking for a slot, and at a DIRBLKSIZ 315 * boundary, have to start looking for free space again. 316 */ 317 if (slotstatus == NONE && 318 (entryoffsetinblock & (dirblksiz - 1)) == 0) { 319 slotoffset = -1; 320 slotfreespace = 0; 321 } 322 /* 323 * Get pointer to next entry. 324 * Full validation checks are slow, so we only check 325 * enough to insure forward progress through the 326 * directory. Complete checks can be run by patching 327 * "dirchk" to be true. 328 */ 329 KASSERT(bp != NULL); 330 ep = (struct direct *)((char *)bp->b_data + entryoffsetinblock); 331 if (ep->d_reclen == 0 || 332 (dirchk && ulfs_dirbadentry(vdp, ep, entryoffsetinblock))) { 333 int i; 334 335 ulfs_dirbad(dp, results->ulr_offset, "mangled entry"); 336 i = dirblksiz - (entryoffsetinblock & (dirblksiz - 1)); 337 results->ulr_offset += i; 338 entryoffsetinblock += i; 339 continue; 340 } 341 342 /* 343 * If an appropriate sized slot has not yet been found, 344 * check to see if one is available. Also accumulate space 345 * in the current block so that we can determine if 346 * compaction is viable. 347 */ 348 if (slotstatus != FOUND) { 349 int size = ulfs_rw16(ep->d_reclen, needswap); 350 351 if (ep->d_ino != 0) 352 size -= DIRSIZ(FSFMT(vdp), ep, needswap); 353 if (size > 0) { 354 if (size >= slotneeded) { 355 slotstatus = FOUND; 356 slotoffset = results->ulr_offset; 357 slotsize = ulfs_rw16(ep->d_reclen, 358 needswap); 359 } else if (slotstatus == NONE) { 360 slotfreespace += size; 361 if (slotoffset == -1) 362 slotoffset = results->ulr_offset; 363 if (slotfreespace >= slotneeded) { 364 slotstatus = COMPACT; 365 slotsize = results->ulr_offset + 366 ulfs_rw16(ep->d_reclen, 367 needswap) - 368 slotoffset; 369 } 370 } 371 } 372 } 373 374 /* 375 * Check for a name match. 376 */ 377 if (ep->d_ino) { 378 int namlen; 379 380#if (BYTE_ORDER == LITTLE_ENDIAN) 381 if (FSFMT(vdp) && needswap == 0) 382 namlen = ep->d_type; 383 else 384 namlen = ep->d_namlen; 385#else 386 if (FSFMT(vdp) && needswap != 0) 387 namlen = ep->d_type; 388 else 389 namlen = ep->d_namlen; 390#endif 391 if (namlen == cnp->cn_namelen && 392 !memcmp(cnp->cn_nameptr, ep->d_name, 393 (unsigned)namlen)) { 394#ifdef LFS_DIRHASH 395foundentry: 396#endif 397 /* 398 * Save directory entry's inode number and 399 * reclen, and release directory buffer. 400 */ 401 if (!FSFMT(vdp) && ep->d_type == DT_WHT) { 402 slotstatus = FOUND; 403 slotoffset = results->ulr_offset; 404 slotsize = ulfs_rw16(ep->d_reclen, 405 needswap); 406 results->ulr_reclen = slotsize; 407 /* 408 * This is used to set 409 * results->ulr_endoff, 410 * which may be used by ulfs_direnter() 411 * as a length to truncate the 412 * directory to. Therefore, it must 413 * point past the end of the last 414 * non-empty directory entry. We don't 415 * know where that is in this case, so 416 * we effectively disable shrinking by 417 * using the existing size of the 418 * directory. 419 * 420 * Note that we wouldn't expect to 421 * shrink the directory while rewriting 422 * an existing entry anyway. 423 */ 424 enduseful = endsearch; 425 cnp->cn_flags |= ISWHITEOUT; 426 numdirpasses--; 427 goto notfound; 428 } 429 foundino = ulfs_rw32(ep->d_ino, needswap); 430 results->ulr_reclen = 431 ulfs_rw16(ep->d_reclen, needswap); 432 goto found; 433 } 434 } 435 prevoff = results->ulr_offset; 436 results->ulr_offset += ulfs_rw16(ep->d_reclen, needswap); 437 entryoffsetinblock += ulfs_rw16(ep->d_reclen, needswap); 438 if (ep->d_ino) 439 enduseful = results->ulr_offset; 440 } 441notfound: 442 /* 443 * If we started in the middle of the directory and failed 444 * to find our target, we must check the beginning as well. 445 */ 446 if (numdirpasses == 2) { 447 numdirpasses--; 448 results->ulr_offset = 0; 449 endsearch = results->ulr_diroff; 450 goto searchloop; 451 } 452 if (bp != NULL) 453 brelse(bp, 0); 454 /* 455 * If creating, and at end of pathname and current 456 * directory has not been removed, then can consider 457 * allowing file to be created. 458 */ 459 if ((nameiop == CREATE || nameiop == RENAME || 460 (nameiop == DELETE && 461 (cnp->cn_flags & DOWHITEOUT) && 462 (cnp->cn_flags & ISWHITEOUT))) && 463 (flags & ISLASTCN) && dp->i_nlink != 0) { 464 /* 465 * Access for write is interpreted as allowing 466 * creation of files in the directory. 467 */ 468 error = VOP_ACCESS(vdp, VWRITE, cred); 469 if (error) 470 goto out; 471 /* 472 * Return an indication of where the new directory 473 * entry should be put. If we didn't find a slot, 474 * then set results->ulr_count to 0 indicating 475 * that the new slot belongs at the end of the 476 * directory. If we found a slot, then the new entry 477 * can be put in the range from results->ulr_offset to 478 * results->ulr_offset + results->ulr_count. 479 */ 480 if (slotstatus == NONE) { 481 results->ulr_offset = roundup(dp->i_size, dirblksiz); 482 results->ulr_count = 0; 483 enduseful = results->ulr_offset; 484 } else if (nameiop == DELETE) { 485 results->ulr_offset = slotoffset; 486 if ((results->ulr_offset & (dirblksiz - 1)) == 0) 487 results->ulr_count = 0; 488 else 489 results->ulr_count = 490 results->ulr_offset - prevoff; 491 } else { 492 results->ulr_offset = slotoffset; 493 results->ulr_count = slotsize; 494 if (enduseful < slotoffset + slotsize) 495 enduseful = slotoffset + slotsize; 496 } 497 results->ulr_endoff = roundup(enduseful, dirblksiz); 498#if 0 /* commented out by dbj. none of the on disk fields changed */ 499 dp->i_flag |= IN_CHANGE | IN_UPDATE; 500#endif 501 /* 502 * We return with the directory locked, so that 503 * the parameters we set up above will still be 504 * valid if we actually decide to do a direnter(). 505 * We return ni_vp == NULL to indicate that the entry 506 * does not currently exist; we leave a pointer to 507 * the (locked) directory inode in ndp->ni_dvp. 508 * 509 * NB - if the directory is unlocked, then this 510 * information cannot be used. 511 */ 512 error = EJUSTRETURN; 513 goto out; 514 } 515 /* 516 * Insert name into cache (as non-existent) if appropriate. 517 */ 518 if (nameiop != CREATE) { 519 cache_enter(vdp, *vpp, cnp->cn_nameptr, cnp->cn_namelen, 520 cnp->cn_flags); 521 } 522 error = ENOENT; 523 goto out; 524 525found: 526 if (numdirpasses == 2) 527 nchstats.ncs_pass2++; 528 /* 529 * Check that directory length properly reflects presence 530 * of this entry. 531 */ 532 if (results->ulr_offset + DIRSIZ(FSFMT(vdp), ep, needswap) > dp->i_size) { 533 ulfs_dirbad(dp, results->ulr_offset, "i_size too small"); 534 dp->i_size = 535 results->ulr_offset + DIRSIZ(FSFMT(vdp), ep, needswap); 536 DIP_ASSIGN(dp, size, dp->i_size); 537 dp->i_flag |= IN_CHANGE | IN_UPDATE; 538 ULFS_WAPBL_UPDATE(vdp, NULL, NULL, UPDATE_DIROP); 539 } 540 brelse(bp, 0); 541 542 /* 543 * Found component in pathname. 544 * If the final component of path name, save information 545 * in the cache as to where the entry was found. 546 */ 547 if ((flags & ISLASTCN) && nameiop == LOOKUP) 548 results->ulr_diroff = results->ulr_offset &~ (dirblksiz - 1); 549 550 /* 551 * If deleting, and at end of pathname, return 552 * parameters which can be used to remove file. 553 * Lock the inode, being careful with ".". 554 */ 555 if (nameiop == DELETE && (flags & ISLASTCN)) { 556 /* 557 * Return pointer to current entry in results->ulr_offset, 558 * and distance past previous entry (if there 559 * is a previous entry in this block) in results->ulr_count. 560 * Save directory inode pointer in ndp->ni_dvp for dirremove(). 561 */ 562 if ((results->ulr_offset & (dirblksiz - 1)) == 0) 563 results->ulr_count = 0; 564 else 565 results->ulr_count = results->ulr_offset - prevoff; 566 if (dp->i_number == foundino) { 567 vref(vdp); 568 tdp = vdp; 569 } else { 570 if (flags & ISDOTDOT) 571 VOP_UNLOCK(vdp); /* race to get the inode */ 572 error = VFS_VGET(vdp->v_mount, foundino, &tdp); 573 if (flags & ISDOTDOT) 574 vn_lock(vdp, LK_EXCLUSIVE | LK_RETRY); 575 if (error) 576 goto out; 577 } 578 /* 579 * Write access to directory required to delete files. 580 */ 581 error = VOP_ACCESS(vdp, VWRITE, cred); 582 if (error) { 583 if (dp->i_number == foundino) 584 vrele(tdp); 585 else 586 vput(tdp); 587 goto out; 588 } 589 /* 590 * If directory is "sticky", then user must own 591 * the directory, or the file in it, else she 592 * may not delete it (unless she's root). This 593 * implements append-only directories. 594 */ 595 if (dp->i_mode & ISVTX) { 596 error = kauth_authorize_vnode(cred, KAUTH_VNODE_DELETE, 597 tdp, vdp, genfs_can_sticky(cred, dp->i_uid, 598 VTOI(tdp)->i_uid)); 599 if (error) { 600 if (dp->i_number == foundino) 601 vrele(tdp); 602 else 603 vput(tdp); 604 error = EPERM; 605 goto out; 606 } 607 } 608 *vpp = tdp; 609 error = 0; 610 goto out; 611 } 612 613 /* 614 * If rewriting (RENAME), return the inode and the 615 * information required to rewrite the present directory 616 * Must get inode of directory entry to verify it's a 617 * regular file, or empty directory. 618 */ 619 if (nameiop == RENAME && (flags & ISLASTCN)) { 620 error = VOP_ACCESS(vdp, VWRITE, cred); 621 if (error) 622 goto out; 623 /* 624 * Careful about locking second inode. 625 * This can only occur if the target is ".". 626 */ 627 if (dp->i_number == foundino) { 628 error = EISDIR; 629 goto out; 630 } 631 if (flags & ISDOTDOT) 632 VOP_UNLOCK(vdp); /* race to get the inode */ 633 error = VFS_VGET(vdp->v_mount, foundino, &tdp); 634 if (flags & ISDOTDOT) 635 vn_lock(vdp, LK_EXCLUSIVE | LK_RETRY); 636 if (error) 637 goto out; 638 *vpp = tdp; 639 error = 0; 640 goto out; 641 } 642 643 /* 644 * Step through the translation in the name. We do not `vput' the 645 * directory because we may need it again if a symbolic link 646 * is relative to the current directory. Instead we save it 647 * unlocked as "pdp". We must get the target inode before unlocking 648 * the directory to insure that the inode will not be removed 649 * before we get it. We prevent deadlock by always fetching 650 * inodes from the root, moving down the directory tree. Thus 651 * when following backward pointers ".." we must unlock the 652 * parent directory before getting the requested directory. 653 * There is a potential race condition here if both the current 654 * and parent directories are removed before the VFS_VGET for the 655 * inode associated with ".." returns. We hope that this occurs 656 * infrequently since we cannot avoid this race condition without 657 * implementing a sophisticated deadlock detection algorithm. 658 * Note also that this simple deadlock detection scheme will not 659 * work if the file system has any hard links other than ".." 660 * that point backwards in the directory structure. 661 */ 662 pdp = vdp; 663 if (flags & ISDOTDOT) { 664 VOP_UNLOCK(pdp); /* race to get the inode */ 665 error = VFS_VGET(vdp->v_mount, foundino, &tdp); 666 vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY); 667 if (error) { 668 goto out; 669 } 670 *vpp = tdp; 671 } else if (dp->i_number == foundino) { 672 vref(vdp); /* we want ourself, ie "." */ 673 *vpp = vdp; 674 } else { 675 error = VFS_VGET(vdp->v_mount, foundino, &tdp); 676 if (error) 677 goto out; 678 *vpp = tdp; 679 } 680 681 /* 682 * Insert name into cache if appropriate. 683 */ 684 cache_enter(vdp, *vpp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_flags); 685 error = 0; 686 687out: 688 fstrans_done(vdp->v_mount); 689 return error; 690} 691 692void 693ulfs_dirbad(struct inode *ip, doff_t offset, const char *how) 694{ 695 struct mount *mp; 696 697 mp = ITOV(ip)->v_mount; 698 printf("%s: bad dir ino %llu at offset %d: %s\n", 699 mp->mnt_stat.f_mntonname, (unsigned long long)ip->i_number, 700 offset, how); 701 if ((mp->mnt_stat.f_flag & MNT_RDONLY) == 0) 702 panic("bad dir"); 703} 704 705/* 706 * Do consistency checking on a directory entry: 707 * record length must be multiple of 4 708 * entry must fit in rest of its DIRBLKSIZ block 709 * record must be large enough to contain entry 710 * name is not longer than FFS_MAXNAMLEN 711 * name must be as long as advertised, and null terminated 712 */ 713int 714ulfs_dirbadentry(struct vnode *dp, struct direct *ep, int entryoffsetinblock) 715{ 716 int i; 717 int namlen; 718 struct ulfsmount *ump = VFSTOULFS(dp->v_mount); 719 const int needswap = ULFS_MPNEEDSWAP(ump); 720 int dirblksiz = ump->um_dirblksiz; 721 722#if (BYTE_ORDER == LITTLE_ENDIAN) 723 if (FSFMT(dp) && needswap == 0) 724 namlen = ep->d_type; 725 else 726 namlen = ep->d_namlen; 727#else 728 if (FSFMT(dp) && needswap != 0) 729 namlen = ep->d_type; 730 else 731 namlen = ep->d_namlen; 732#endif 733 if ((ulfs_rw16(ep->d_reclen, needswap) & 0x3) != 0 || 734 ulfs_rw16(ep->d_reclen, needswap) > 735 dirblksiz - (entryoffsetinblock & (dirblksiz - 1)) || 736 ulfs_rw16(ep->d_reclen, needswap) < 737 DIRSIZ(FSFMT(dp), ep, needswap) || 738 namlen > FFS_MAXNAMLEN) { 739 /*return (1); */ 740 printf("First bad, reclen=%#x, DIRSIZ=%lu, namlen=%d, " 741 "flags=%#x, entryoffsetinblock=%d, dirblksiz = %d\n", 742 ulfs_rw16(ep->d_reclen, needswap), 743 (u_long)DIRSIZ(FSFMT(dp), ep, needswap), 744 namlen, dp->v_mount->mnt_flag, entryoffsetinblock, 745 dirblksiz); 746 goto bad; 747 } 748 if (ep->d_ino == 0) 749 return (0); 750 for (i = 0; i < namlen; i++) 751 if (ep->d_name[i] == '\0') { 752 /*return (1); */ 753 printf("Second bad\n"); 754 goto bad; 755 } 756 if (ep->d_name[i]) 757 goto bad; 758 return (0); 759bad: 760 return (1); 761} 762 763/* 764 * Construct a new directory entry after a call to namei, using the 765 * name in the componentname argument cnp. The argument ip is the 766 * inode to which the new directory entry will refer. 767 */ 768void 769ulfs_makedirentry(struct inode *ip, struct componentname *cnp, 770 struct direct *newdirp) 771{ 772 newdirp->d_ino = ip->i_number; 773 newdirp->d_namlen = cnp->cn_namelen; 774 memcpy(newdirp->d_name, cnp->cn_nameptr, (size_t)cnp->cn_namelen); 775 newdirp->d_name[cnp->cn_namelen] = '\0'; 776 if (FSFMT(ITOV(ip))) 777 newdirp->d_type = 0; 778 else 779 newdirp->d_type = IFTODT(ip->i_mode); 780} 781 782/* 783 * Write a directory entry after a call to namei, using the parameters 784 * that ulfs_lookup left in nameidata and in the ulfs_lookup_results. 785 * 786 * DVP is the directory to be updated. It must be locked. 787 * ULR is the ulfs_lookup_results structure from the final lookup step. 788 * TVP is not used. (XXX: why is it here? remove it) 789 * DIRP is the new directory entry contents. 790 * CNP is the componentname from the final lookup step. 791 * NEWDIRBP is not used and (XXX) should be removed. The previous 792 * comment here said it was used by the now-removed softupdates code. 793 * 794 * The link count of the target inode is *not* incremented; the 795 * caller does that. 796 * 797 * If ulr->ulr_count is 0, ulfs_lookup did not find space to insert the 798 * directory entry. ulr_offset, which is the place to put the entry, 799 * should be on a block boundary (and should be at the end of the 800 * directory AFAIK) and a fresh block is allocated to put the new 801 * directory entry in. 802 * 803 * If ulr->ulr_count is not zero, ulfs_lookup found a slot to insert 804 * the entry into. This slot ranges from ulr_offset to ulr_offset + 805 * ulr_count. However, this slot may already be partially populated 806 * requiring compaction. See notes below. 807 * 808 * Furthermore, if ulr_count is not zero and ulr_endoff is not the 809 * same as i_size, the directory is truncated to size ulr_endoff. 810 */ 811int 812ulfs_direnter(struct vnode *dvp, const struct ulfs_lookup_results *ulr, 813 struct vnode *tvp, struct direct *dirp, 814 struct componentname *cnp, struct buf *newdirbp) 815{ 816 kauth_cred_t cr; 817 struct lwp *l; 818 int newentrysize; 819 struct inode *dp; 820 struct buf *bp; 821 u_int dsize; 822 struct direct *ep, *nep; 823 int error, ret, blkoff, loc, spacefree; 824 char *dirbuf; 825 struct timespec ts; 826 struct ulfsmount *ump = VFSTOULFS(dvp->v_mount); 827 const int needswap = ULFS_MPNEEDSWAP(ump); 828 int dirblksiz = ump->um_dirblksiz; 829 830 ULFS_WAPBL_JLOCK_ASSERT(dvp->v_mount); 831 832 error = 0; 833 cr = cnp->cn_cred; 834 l = curlwp; 835 836 dp = VTOI(dvp); 837 newentrysize = DIRSIZ(0, dirp, 0); 838 839 if (ulr->ulr_count == 0) { 840 /* 841 * If ulr_count is 0, then namei could find no 842 * space in the directory. Here, ulr_offset will 843 * be on a directory block boundary and we will write the 844 * new entry into a fresh block. 845 */ 846 if (ulr->ulr_offset & (dirblksiz - 1)) 847 panic("ulfs_direnter: newblk"); 848 if ((error = ULFS_BALLOC(dvp, (off_t)ulr->ulr_offset, dirblksiz, 849 cr, B_CLRBUF | B_SYNC, &bp)) != 0) { 850 return (error); 851 } 852 dp->i_size = ulr->ulr_offset + dirblksiz; 853 DIP_ASSIGN(dp, size, dp->i_size); 854 dp->i_flag |= IN_CHANGE | IN_UPDATE; 855 uvm_vnp_setsize(dvp, dp->i_size); 856 dirp->d_reclen = ulfs_rw16(dirblksiz, needswap); 857 dirp->d_ino = ulfs_rw32(dirp->d_ino, needswap); 858 if (FSFMT(dvp)) { 859#if (BYTE_ORDER == LITTLE_ENDIAN) 860 if (needswap == 0) { 861#else 862 if (needswap != 0) { 863#endif 864 u_char tmp = dirp->d_namlen; 865 dirp->d_namlen = dirp->d_type; 866 dirp->d_type = tmp; 867 } 868 } 869 blkoff = ulr->ulr_offset & (ump->um_mountp->mnt_stat.f_iosize - 1); 870 memcpy((char *)bp->b_data + blkoff, dirp, newentrysize); 871#ifdef LFS_DIRHASH 872 if (dp->i_dirhash != NULL) { 873 ulfsdirhash_newblk(dp, ulr->ulr_offset); 874 ulfsdirhash_add(dp, dirp, ulr->ulr_offset); 875 ulfsdirhash_checkblock(dp, (char *)bp->b_data + blkoff, 876 ulr->ulr_offset); 877 } 878#endif 879 error = VOP_BWRITE(bp->b_vp, bp); 880 vfs_timestamp(&ts); 881 ret = ULFS_UPDATE(dvp, &ts, &ts, UPDATE_DIROP); 882 if (error == 0) 883 return (ret); 884 return (error); 885 } 886 887 /* 888 * If ulr_count is non-zero, then namei found space for the new 889 * entry in the range ulr_offset to ulr_offset + ulr_count 890 * in the directory. To use this space, we may have to compact 891 * the entries located there, by copying them together towards the 892 * beginning of the block, leaving the free space in one usable 893 * chunk at the end. 894 */ 895 896 /* 897 * Increase size of directory if entry eats into new space. 898 * This should never push the size past a new multiple of 899 * DIRBLKSIZ. 900 * 901 * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN. 902 */ 903 if (ulr->ulr_offset + ulr->ulr_count > dp->i_size) { 904#ifdef DIAGNOSTIC 905 printf("ulfs_direnter: reached 4.2-only block, " 906 "not supposed to happen\n"); 907#endif 908 dp->i_size = ulr->ulr_offset + ulr->ulr_count; 909 DIP_ASSIGN(dp, size, dp->i_size); 910 dp->i_flag |= IN_CHANGE | IN_UPDATE; 911 ULFS_WAPBL_UPDATE(dvp, NULL, NULL, UPDATE_DIROP); 912 } 913 /* 914 * Get the block containing the space for the new directory entry. 915 */ 916 error = ulfs_blkatoff(dvp, (off_t)ulr->ulr_offset, &dirbuf, &bp, true); 917 if (error) { 918 return (error); 919 } 920 /* 921 * Find space for the new entry. In the simple case, the entry at 922 * offset base will have the space. If it does not, then namei 923 * arranged that compacting the region ulr_offset to 924 * ulr_offset + ulr_count would yield the space. 925 */ 926 ep = (struct direct *)dirbuf; 927 dsize = (ep->d_ino != 0) ? DIRSIZ(FSFMT(dvp), ep, needswap) : 0; 928 spacefree = ulfs_rw16(ep->d_reclen, needswap) - dsize; 929 for (loc = ulfs_rw16(ep->d_reclen, needswap); loc < ulr->ulr_count; ) { 930 uint16_t reclen; 931 932 nep = (struct direct *)(dirbuf + loc); 933 934 /* Trim the existing slot (NB: dsize may be zero). */ 935 ep->d_reclen = ulfs_rw16(dsize, needswap); 936 ep = (struct direct *)((char *)ep + dsize); 937 938 reclen = ulfs_rw16(nep->d_reclen, needswap); 939 loc += reclen; 940 if (nep->d_ino == 0) { 941 /* 942 * A mid-block unused entry. Such entries are 943 * never created by the kernel, but fsck_ffs 944 * can create them (and it doesn't fix them). 945 * 946 * Add up the free space, and initialise the 947 * relocated entry since we don't memcpy it. 948 */ 949 spacefree += reclen; 950 ep->d_ino = 0; 951 dsize = 0; 952 continue; 953 } 954 dsize = DIRSIZ(FSFMT(dvp), nep, needswap); 955 spacefree += reclen - dsize; 956#ifdef LFS_DIRHASH 957 if (dp->i_dirhash != NULL) 958 ulfsdirhash_move(dp, nep, 959 ulr->ulr_offset + ((char *)nep - dirbuf), 960 ulr->ulr_offset + ((char *)ep - dirbuf)); 961#endif 962 memcpy((void *)ep, (void *)nep, dsize); 963 } 964 /* 965 * Here, `ep' points to a directory entry containing `dsize' in-use 966 * bytes followed by `spacefree' unused bytes. If ep->d_ino == 0, 967 * then the entry is completely unused (dsize == 0). The value 968 * of ep->d_reclen is always indeterminate. 969 * 970 * Update the pointer fields in the previous entry (if any), 971 * copy in the new entry, and write out the block. 972 */ 973 if (ep->d_ino == 0 || 974 (ulfs_rw32(ep->d_ino, needswap) == ULFS_WINO && 975 memcmp(ep->d_name, dirp->d_name, dirp->d_namlen) == 0)) { 976 if (spacefree + dsize < newentrysize) 977 panic("ulfs_direnter: compact1"); 978 dirp->d_reclen = spacefree + dsize; 979 } else { 980 if (spacefree < newentrysize) 981 panic("ulfs_direnter: compact2"); 982 dirp->d_reclen = spacefree; 983 ep->d_reclen = ulfs_rw16(dsize, needswap); 984 ep = (struct direct *)((char *)ep + dsize); 985 } 986 dirp->d_reclen = ulfs_rw16(dirp->d_reclen, needswap); 987 dirp->d_ino = ulfs_rw32(dirp->d_ino, needswap); 988 if (FSFMT(dvp)) { 989#if (BYTE_ORDER == LITTLE_ENDIAN) 990 if (needswap == 0) { 991#else 992 if (needswap != 0) { 993#endif 994 u_char tmp = dirp->d_namlen; 995 dirp->d_namlen = dirp->d_type; 996 dirp->d_type = tmp; 997 } 998 } 999#ifdef LFS_DIRHASH 1000 if (dp->i_dirhash != NULL && (ep->d_ino == 0 || 1001 dirp->d_reclen == spacefree)) 1002 ulfsdirhash_add(dp, dirp, ulr->ulr_offset + ((char *)ep - dirbuf)); 1003#endif 1004 memcpy((void *)ep, (void *)dirp, (u_int)newentrysize); 1005#ifdef LFS_DIRHASH 1006 if (dp->i_dirhash != NULL) 1007 ulfsdirhash_checkblock(dp, dirbuf - 1008 (ulr->ulr_offset & (dirblksiz - 1)), 1009 ulr->ulr_offset & ~(dirblksiz - 1)); 1010#endif 1011 error = VOP_BWRITE(bp->b_vp, bp); 1012 dp->i_flag |= IN_CHANGE | IN_UPDATE; 1013 /* 1014 * If all went well, and the directory can be shortened, proceed 1015 * with the truncation. Note that we have to unlock the inode for 1016 * the entry that we just entered, as the truncation may need to 1017 * lock other inodes which can lead to deadlock if we also hold a 1018 * lock on the newly entered node. 1019 */ 1020 if (error == 0 && ulr->ulr_endoff && ulr->ulr_endoff < dp->i_size) { 1021#ifdef LFS_DIRHASH 1022 if (dp->i_dirhash != NULL) 1023 ulfsdirhash_dirtrunc(dp, ulr->ulr_endoff); 1024#endif 1025 (void) ULFS_TRUNCATE(dvp, (off_t)ulr->ulr_endoff, IO_SYNC, cr); 1026 } 1027 ULFS_WAPBL_UPDATE(dvp, NULL, NULL, UPDATE_DIROP); 1028 return (error); 1029} 1030 1031/* 1032 * Remove a directory entry after a call to namei, using the 1033 * parameters that ulfs_lookup left in nameidata and in the 1034 * ulfs_lookup_results. 1035 * 1036 * DVP is the directory to be updated. It must be locked. 1037 * ULR is the ulfs_lookup_results structure from the final lookup step. 1038 * IP, if not null, is the inode being unlinked. 1039 * FLAGS may contain DOWHITEOUT. 1040 * ISRMDIR is not used and (XXX) should be removed. 1041 * 1042 * If FLAGS contains DOWHITEOUT the entry is replaced with a whiteout 1043 * instead of being cleared. 1044 * 1045 * ulr->ulr_offset contains the position of the directory entry 1046 * to be removed. 1047 * 1048 * ulr->ulr_reclen contains the size of the directory entry to be 1049 * removed. 1050 * 1051 * ulr->ulr_count contains the size of the *previous* directory 1052 * entry. This allows finding it, for free space management. If 1053 * ulr_count is 0, the target entry is at the beginning of the 1054 * directory. (Does this ever happen? The first entry should be ".", 1055 * which should only be removed at rmdir time. Does rmdir come here 1056 * to clear out the "." and ".." entries? Perhaps, but I doubt it.) 1057 * 1058 * The space is marked free by adding it to the record length (not 1059 * name length) of the preceding entry. If the first entry becomes 1060 * free, it is marked free by setting the inode number to 0. 1061 * 1062 * The link count of IP is decremented. Note that this is not the 1063 * inverse behavior of ulfs_direnter, which does not adjust link 1064 * counts. Sigh. 1065 */ 1066int 1067ulfs_dirremove(struct vnode *dvp, const struct ulfs_lookup_results *ulr, 1068 struct inode *ip, int flags, int isrmdir) 1069{ 1070 struct inode *dp = VTOI(dvp); 1071 struct direct *ep; 1072 struct buf *bp; 1073 int error; 1074#ifdef LFS_EI 1075 const int needswap = ULFS_MPNEEDSWAP(dp->i_ump); 1076#endif 1077 1078 ULFS_WAPBL_JLOCK_ASSERT(dvp->v_mount); 1079 1080 if (flags & DOWHITEOUT) { 1081 /* 1082 * Whiteout entry: set d_ino to ULFS_WINO. 1083 */ 1084 error = ulfs_blkatoff(dvp, (off_t)ulr->ulr_offset, (void *)&ep, 1085 &bp, true); 1086 if (error) 1087 return (error); 1088 ep->d_ino = ulfs_rw32(ULFS_WINO, needswap); 1089 ep->d_type = DT_WHT; 1090 goto out; 1091 } 1092 1093 if ((error = ulfs_blkatoff(dvp, 1094 (off_t)(ulr->ulr_offset - ulr->ulr_count), (void *)&ep, &bp, true)) != 0) 1095 return (error); 1096 1097#ifdef LFS_DIRHASH 1098 /* 1099 * Remove the dirhash entry. This is complicated by the fact 1100 * that `ep' is the previous entry when ulr_count != 0. 1101 */ 1102 if (dp->i_dirhash != NULL) 1103 ulfsdirhash_remove(dp, (ulr->ulr_count == 0) ? ep : 1104 (struct direct *)((char *)ep + 1105 ulfs_rw16(ep->d_reclen, needswap)), ulr->ulr_offset); 1106#endif 1107 1108 if (ulr->ulr_count == 0) { 1109 /* 1110 * First entry in block: set d_ino to zero. 1111 */ 1112 ep->d_ino = 0; 1113 } else { 1114 /* 1115 * Collapse new free space into previous entry. 1116 */ 1117 ep->d_reclen = 1118 ulfs_rw16(ulfs_rw16(ep->d_reclen, needswap) + ulr->ulr_reclen, 1119 needswap); 1120 } 1121 1122#ifdef LFS_DIRHASH 1123 if (dp->i_dirhash != NULL) { 1124 int dirblksiz = ip->i_ump->um_dirblksiz; 1125 ulfsdirhash_checkblock(dp, (char *)ep - 1126 ((ulr->ulr_offset - ulr->ulr_count) & (dirblksiz - 1)), 1127 ulr->ulr_offset & ~(dirblksiz - 1)); 1128 } 1129#endif 1130 1131out: 1132 if (ip) { 1133 ip->i_nlink--; 1134 DIP_ASSIGN(ip, nlink, ip->i_nlink); 1135 ip->i_flag |= IN_CHANGE; 1136 ULFS_WAPBL_UPDATE(ITOV(ip), NULL, NULL, 0); 1137 } 1138 /* 1139 * XXX did it ever occur to anyone that it might be a good 1140 * idea to restore ip->i_nlink if this fails? Or something? 1141 * Currently on error return from this function the state of 1142 * ip->i_nlink depends on what happened, and callers 1143 * definitely do not take this into account. 1144 */ 1145 error = VOP_BWRITE(bp->b_vp, bp); 1146 dp->i_flag |= IN_CHANGE | IN_UPDATE; 1147 /* 1148 * If the last named reference to a snapshot goes away, 1149 * drop its snapshot reference so that it will be reclaimed 1150 * when last open reference goes away. 1151 */ 1152 if (ip != 0 && (ip->i_flags & SF_SNAPSHOT) != 0 && 1153 ip->i_nlink == 0) 1154 lfs_snapgone(ip); 1155 ULFS_WAPBL_UPDATE(dvp, NULL, NULL, 0); 1156 return (error); 1157} 1158 1159/* 1160 * Rewrite an existing directory entry to point at the inode supplied. 1161 * 1162 * DP is the directory to update. 1163 * OFFSET is the position of the entry in question. It may come 1164 * from ulr_offset of a ulfs_lookup_results. 1165 * OIP is the old inode the directory previously pointed to. 1166 * NEWINUM is the number of the new inode. 1167 * NEWTYPE is the new value for the type field of the directory entry. 1168 * (This is ignored if the fs doesn't support that.) 1169 * ISRMDIR is not used and (XXX) should be removed. 1170 * IFLAGS are added to DP's inode flags. 1171 * 1172 * The link count of OIP is decremented. Note that the link count of 1173 * the new inode is *not* incremented. Yay for symmetry. 1174 */ 1175int 1176ulfs_dirrewrite(struct inode *dp, off_t offset, 1177 struct inode *oip, ino_t newinum, int newtype, 1178 int isrmdir, int iflags) 1179{ 1180 struct buf *bp; 1181 struct direct *ep; 1182 struct vnode *vdp = ITOV(dp); 1183 int error; 1184 1185 error = ulfs_blkatoff(vdp, offset, (void *)&ep, &bp, true); 1186 if (error) 1187 return (error); 1188 ep->d_ino = ulfs_rw32(newinum, ULFS_MPNEEDSWAP(dp->i_ump)); 1189 if (!FSFMT(vdp)) 1190 ep->d_type = newtype; 1191 oip->i_nlink--; 1192 DIP_ASSIGN(oip, nlink, oip->i_nlink); 1193 oip->i_flag |= IN_CHANGE; 1194 ULFS_WAPBL_UPDATE(ITOV(oip), NULL, NULL, UPDATE_DIROP); 1195 error = VOP_BWRITE(bp->b_vp, bp); 1196 dp->i_flag |= iflags; 1197 /* 1198 * If the last named reference to a snapshot goes away, 1199 * drop its snapshot reference so that it will be reclaimed 1200 * when last open reference goes away. 1201 */ 1202 if ((oip->i_flags & SF_SNAPSHOT) != 0 && oip->i_nlink == 0) 1203 lfs_snapgone(oip); 1204 ULFS_WAPBL_UPDATE(vdp, NULL, NULL, UPDATE_DIROP); 1205 return (error); 1206} 1207 1208/* 1209 * Check if a directory is empty or not. 1210 * Inode supplied must be locked. 1211 * 1212 * Using a struct dirtemplate here is not precisely 1213 * what we want, but better than using a struct direct. 1214 * 1215 * NB: does not handle corrupted directories. 1216 */ 1217int 1218ulfs_dirempty(struct inode *ip, ino_t parentino, kauth_cred_t cred) 1219{ 1220 doff_t off; 1221 struct dirtemplate dbuf; 1222 struct direct *dp = (struct direct *)&dbuf; 1223 int error, namlen; 1224 size_t count; 1225 const int needswap = ULFS_IPNEEDSWAP(ip); 1226#define MINDIRSIZ (sizeof (struct dirtemplate) / 2) 1227 1228 for (off = 0; off < ip->i_size; 1229 off += ulfs_rw16(dp->d_reclen, needswap)) { 1230 error = vn_rdwr(UIO_READ, ITOV(ip), (void *)dp, MINDIRSIZ, off, 1231 UIO_SYSSPACE, IO_NODELOCKED, cred, &count, NULL); 1232 /* 1233 * Since we read MINDIRSIZ, residual must 1234 * be 0 unless we're at end of file. 1235 */ 1236 if (error || count != 0) 1237 return (0); 1238 /* avoid infinite loops */ 1239 if (dp->d_reclen == 0) 1240 return (0); 1241 /* skip empty entries */ 1242 if (dp->d_ino == 0 || ulfs_rw32(dp->d_ino, needswap) == ULFS_WINO) 1243 continue; 1244 /* accept only "." and ".." */ 1245#if (BYTE_ORDER == LITTLE_ENDIAN) 1246 if (FSFMT(ITOV(ip)) && needswap == 0) 1247 namlen = dp->d_type; 1248 else 1249 namlen = dp->d_namlen; 1250#else 1251 if (FSFMT(ITOV(ip)) && needswap != 0) 1252 namlen = dp->d_type; 1253 else 1254 namlen = dp->d_namlen; 1255#endif 1256 if (namlen > 2) 1257 return (0); 1258 if (dp->d_name[0] != '.') 1259 return (0); 1260 /* 1261 * At this point namlen must be 1 or 2. 1262 * 1 implies ".", 2 implies ".." if second 1263 * char is also "." 1264 */ 1265 if (namlen == 1 && 1266 ulfs_rw32(dp->d_ino, needswap) == ip->i_number) 1267 continue; 1268 if (dp->d_name[1] == '.' && 1269 ulfs_rw32(dp->d_ino, needswap) == parentino) 1270 continue; 1271 return (0); 1272 } 1273 return (1); 1274} 1275 1276/* 1277 * Check if source directory is in the path of the target directory. 1278 * Target is supplied locked, source is unlocked. 1279 * The target is always vput before returning. 1280 */ 1281int 1282ulfs_checkpath(struct inode *source, struct inode *target, kauth_cred_t cred) 1283{ 1284 struct vnode *nextvp, *vp; 1285 int error, rootino, namlen; 1286 struct dirtemplate dirbuf; 1287 const int needswap = ULFS_MPNEEDSWAP(target->i_ump); 1288 1289 vp = ITOV(target); 1290 if (target->i_number == source->i_number) { 1291 error = EEXIST; 1292 goto out; 1293 } 1294 rootino = ULFS_ROOTINO; 1295 error = 0; 1296 if (target->i_number == rootino) 1297 goto out; 1298 1299 for (;;) { 1300 if (vp->v_type != VDIR) { 1301 error = ENOTDIR; 1302 break; 1303 } 1304 error = vn_rdwr(UIO_READ, vp, (void *)&dirbuf, 1305 sizeof (struct dirtemplate), (off_t)0, UIO_SYSSPACE, 1306 IO_NODELOCKED, cred, NULL, NULL); 1307 if (error != 0) 1308 break; 1309#if (BYTE_ORDER == LITTLE_ENDIAN) 1310 if (FSFMT(vp) && needswap == 0) 1311 namlen = dirbuf.dotdot_type; 1312 else 1313 namlen = dirbuf.dotdot_namlen; 1314#else 1315 if (FSFMT(vp) && needswap != 0) 1316 namlen = dirbuf.dotdot_type; 1317 else 1318 namlen = dirbuf.dotdot_namlen; 1319#endif 1320 if (namlen != 2 || 1321 dirbuf.dotdot_name[0] != '.' || 1322 dirbuf.dotdot_name[1] != '.') { 1323 error = ENOTDIR; 1324 break; 1325 } 1326 if (ulfs_rw32(dirbuf.dotdot_ino, needswap) == source->i_number) { 1327 error = EINVAL; 1328 break; 1329 } 1330 if (ulfs_rw32(dirbuf.dotdot_ino, needswap) == rootino) 1331 break; 1332 VOP_UNLOCK(vp); 1333 error = VFS_VGET(vp->v_mount, 1334 ulfs_rw32(dirbuf.dotdot_ino, needswap), &nextvp); 1335 vrele(vp); 1336 if (error) { 1337 vp = NULL; 1338 break; 1339 } 1340 vp = nextvp; 1341 } 1342 1343out: 1344 if (error == ENOTDIR) 1345 printf("checkpath: .. not a directory\n"); 1346 if (vp != NULL) 1347 vput(vp); 1348 return (error); 1349} 1350 1351/* 1352 * Extract the inode number of ".." from a directory. 1353 * Helper for ulfs_parentcheck. 1354 */ 1355static int 1356ulfs_readdotdot(struct vnode *vp, int needswap, kauth_cred_t cred, ino_t *result) 1357{ 1358 struct dirtemplate dirbuf; 1359 int namlen, error; 1360 1361 error = vn_rdwr(UIO_READ, vp, &dirbuf, 1362 sizeof (struct dirtemplate), (off_t)0, UIO_SYSSPACE, 1363 IO_NODELOCKED, cred, NULL, NULL); 1364 if (error) { 1365 return error; 1366 } 1367 1368#if (BYTE_ORDER == LITTLE_ENDIAN) 1369 if (FSFMT(vp) && needswap == 0) 1370 namlen = dirbuf.dotdot_type; 1371 else 1372 namlen = dirbuf.dotdot_namlen; 1373#else 1374 if (FSFMT(vp) && needswap != 0) 1375 namlen = dirbuf.dotdot_type; 1376 else 1377 namlen = dirbuf.dotdot_namlen; 1378#endif 1379 if (namlen != 2 || 1380 dirbuf.dotdot_name[0] != '.' || 1381 dirbuf.dotdot_name[1] != '.') { 1382 printf("ulfs_readdotdot: directory %llu contains " 1383 "garbage instead of ..\n", 1384 (unsigned long long) VTOI(vp)->i_number); 1385 return ENOTDIR; 1386 } 1387 *result = ulfs_rw32(dirbuf.dotdot_ino, needswap); 1388 return 0; 1389} 1390 1391/* 1392 * Check if LOWER is a descendent of UPPER. If we find UPPER, return 1393 * nonzero in FOUND and return a reference to the immediate descendent 1394 * of UPPER in UPPERCHILD. If we don't find UPPER (that is, if we 1395 * reach the volume root and that isn't UPPER), return zero in FOUND 1396 * and null in UPPERCHILD. 1397 * 1398 * Neither UPPER nor LOWER should be locked. 1399 * 1400 * On error (such as a permissions error checking up the directory 1401 * tree) fail entirely. 1402 * 1403 * Note that UPPER and LOWER must be on the same volume, and because 1404 * we inspect only that volume NEEDSWAP can be constant. 1405 */ 1406int 1407ulfs_parentcheck(struct vnode *upper, struct vnode *lower, kauth_cred_t cred, 1408 int *found_ret, struct vnode **upperchild_ret) 1409{ 1410 const int needswap = ULFS_MPNEEDSWAP(VTOI(lower)->i_ump); 1411 ino_t upper_ino, found_ino; 1412 struct vnode *current, *next; 1413 int error; 1414 1415 if (upper == lower) { 1416 vref(upper); 1417 *found_ret = 1; 1418 *upperchild_ret = upper; 1419 return 0; 1420 } 1421 if (VTOI(lower)->i_number == ULFS_ROOTINO) { 1422 *found_ret = 0; 1423 *upperchild_ret = NULL; 1424 return 0; 1425 } 1426 1427 upper_ino = VTOI(upper)->i_number; 1428 1429 current = lower; 1430 vref(current); 1431 vn_lock(current, LK_EXCLUSIVE | LK_RETRY); 1432 1433 for (;;) { 1434 error = ulfs_readdotdot(current, needswap, cred, &found_ino); 1435 if (error) { 1436 vput(current); 1437 return error; 1438 } 1439 if (found_ino == upper_ino) { 1440 VOP_UNLOCK(current); 1441 *found_ret = 1; 1442 *upperchild_ret = current; 1443 return 0; 1444 } 1445 if (found_ino == ULFS_ROOTINO) { 1446 vput(current); 1447 *found_ret = 0; 1448 *upperchild_ret = NULL; 1449 return 0; 1450 } 1451 VOP_UNLOCK(current); 1452 error = VFS_VGET(current->v_mount, found_ino, &next); 1453 if (error) { 1454 vrele(current); 1455 return error; 1456 } 1457 KASSERT(VOP_ISLOCKED(next)); 1458 if (next->v_type != VDIR) { 1459 printf("ulfs_parentcheck: inode %llu reached via .. of " 1460 "inode %llu is not a directory\n", 1461 (unsigned long long)VTOI(next)->i_number, 1462 (unsigned long long)VTOI(current)->i_number); 1463 vput(next); 1464 vrele(current); 1465 return ENOTDIR; 1466 } 1467 vrele(current); 1468 current = next; 1469 } 1470 1471 return 0; 1472} 1473 1474#define ULFS_DIRRABLKS 0 1475int ulfs_dirrablks = ULFS_DIRRABLKS; 1476 1477/* 1478 * ulfs_blkatoff: Return buffer with the contents of block "offset" from 1479 * the beginning of directory "vp". If "res" is non-NULL, fill it in with 1480 * a pointer to the remaining space in the directory. If the caller intends 1481 * to modify the buffer returned, "modify" must be true. 1482 */ 1483 1484int 1485ulfs_blkatoff(struct vnode *vp, off_t offset, char **res, struct buf **bpp, 1486 bool modify) 1487{ 1488 struct inode *ip; 1489 struct buf *bp; 1490 daddr_t lbn; 1491 const int dirrablks = ulfs_dirrablks; 1492 daddr_t *blks; 1493 int *blksizes; 1494 int run, error; 1495 struct mount *mp = vp->v_mount; 1496 const int bshift = mp->mnt_fs_bshift; 1497 const int bsize = 1 << bshift; 1498 off_t eof; 1499 1500 blks = kmem_alloc((1 + dirrablks) * sizeof(daddr_t), KM_SLEEP); 1501 blksizes = kmem_alloc((1 + dirrablks) * sizeof(int), KM_SLEEP); 1502 ip = VTOI(vp); 1503 KASSERT(vp->v_size == ip->i_size); 1504 GOP_SIZE(vp, vp->v_size, &eof, 0); 1505 lbn = offset >> bshift; 1506 1507 for (run = 0; run <= dirrablks;) { 1508 const off_t curoff = lbn << bshift; 1509 const int size = MIN(eof - curoff, bsize); 1510 1511 if (size == 0) { 1512 break; 1513 } 1514 KASSERT(curoff < eof); 1515 blks[run] = lbn; 1516 blksizes[run] = size; 1517 lbn++; 1518 run++; 1519 if (size != bsize) { 1520 break; 1521 } 1522 } 1523 KASSERT(run >= 1); 1524 error = breadn(vp, blks[0], blksizes[0], &blks[1], &blksizes[1], 1525 run - 1, NOCRED, (modify ? B_MODIFY : 0), &bp); 1526 if (error != 0) { 1527 *bpp = NULL; 1528 goto out; 1529 } 1530 if (res) { 1531 *res = (char *)bp->b_data + (offset & (bsize - 1)); 1532 } 1533 *bpp = bp; 1534 1535 out: 1536 kmem_free(blks, (1 + dirrablks) * sizeof(daddr_t)); 1537 kmem_free(blksizes, (1 + dirrablks) * sizeof(int)); 1538 return error; 1539} 1540