ulfs_lookup.c revision 1.13
1/* $NetBSD: ulfs_lookup.c,v 1.13 2013/07/28 00:29:18 dholland Exp $ */ 2/* from NetBSD: ufs_lookup.c,v 1.122 2013/01/22 09:39:18 dholland Exp */ 3 4/* 5 * Copyright (c) 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * (c) UNIX System Laboratories, Inc. 8 * All or some portions of this file are derived from material licensed 9 * to the University of California by American Telephone and Telegraph 10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 11 * the permission of UNIX System Laboratories, Inc. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)ufs_lookup.c 8.9 (Berkeley) 8/11/94 38 */ 39 40#include <sys/cdefs.h> 41__KERNEL_RCSID(0, "$NetBSD: ulfs_lookup.c,v 1.13 2013/07/28 00:29:18 dholland Exp $"); 42 43#ifdef _KERNEL_OPT 44#include "opt_lfs.h" 45#endif 46 47#include <sys/param.h> 48#include <sys/systm.h> 49#include <sys/namei.h> 50#include <sys/buf.h> 51#include <sys/file.h> 52#include <sys/stat.h> 53#include <sys/mount.h> 54#include <sys/vnode.h> 55#include <sys/kernel.h> 56#include <sys/kauth.h> 57#include <sys/wapbl.h> 58#include <sys/fstrans.h> 59#include <sys/proc.h> 60#include <sys/kmem.h> 61 62#include <ufs/lfs/lfs_extern.h> 63 64#include <ufs/lfs/ulfs_inode.h> 65#ifdef LFS_DIRHASH 66#include <ufs/lfs/ulfs_dirhash.h> 67#endif 68#include <ufs/lfs/ulfsmount.h> 69#include <ufs/lfs/ulfs_extern.h> 70#include <ufs/lfs/ulfs_bswap.h> 71 72#include <miscfs/genfs/genfs.h> 73 74#ifdef DIAGNOSTIC 75int lfs_dirchk = 1; 76#else 77int lfs_dirchk = 0; 78#endif 79 80/* 81 * Convert a component of a pathname into a pointer to a locked inode. 82 * This is a very central and rather complicated routine. 83 * If the file system is not maintained in a strict tree hierarchy, 84 * this can result in a deadlock situation (see comments in code below). 85 * 86 * The cnp->cn_nameiop argument is LOOKUP, CREATE, RENAME, or DELETE depending 87 * on whether the name is to be looked up, created, renamed, or deleted. 88 * When CREATE, RENAME, or DELETE is specified, information usable in 89 * creating, renaming, or deleting a directory entry may be calculated. 90 * If flag has LOCKPARENT or'ed into it and the target of the pathname 91 * exists, lookup returns both the target and its parent directory locked. 92 * When creating or renaming and LOCKPARENT is specified, the target may 93 * not be ".". When deleting and LOCKPARENT is specified, the target may 94 * be "."., but the caller must check to ensure it does an vrele and vput 95 * instead of two vputs. 96 * 97 * Overall outline of ulfs_lookup: 98 * 99 * check accessibility of directory 100 * look for name in cache, if found, then if at end of path 101 * and deleting or creating, drop it, else return name 102 * search for name in directory, to found or notfound 103 * notfound: 104 * if creating, return locked directory, leaving info on available slots 105 * else return error 106 * found: 107 * if at end of path and deleting, return information to allow delete 108 * if at end of path and rewriting (RENAME and LOCKPARENT), lock target 109 * inode and return info to allow rewrite 110 * if not at end, add name to cache; if at end and neither creating 111 * nor deleting, add name to cache 112 */ 113int 114ulfs_lookup(void *v) 115{ 116 struct vop_lookup_args /* { 117 struct vnode *a_dvp; 118 struct vnode **a_vpp; 119 struct componentname *a_cnp; 120 } */ *ap = v; 121 struct vnode *vdp = ap->a_dvp; /* vnode for directory being searched */ 122 struct inode *dp = VTOI(vdp); /* inode for directory being searched */ 123 struct buf *bp; /* a buffer of directory entries */ 124 struct lfs_direct *ep; /* the current directory entry */ 125 int entryoffsetinblock; /* offset of ep in bp's buffer */ 126 enum { 127 NONE, /* need to search a slot for our new entry */ 128 COMPACT, /* a compaction can make a slot in the current 129 DIRBLKSIZ block */ 130 FOUND, /* found a slot (or no need to search) */ 131 } slotstatus; 132 doff_t slotoffset; /* offset of area with free space. 133 a special value -1 for invalid */ 134 int slotsize; /* size of area at slotoffset */ 135 int slotfreespace; /* accumulated amount of space free in 136 the current DIRBLKSIZ block */ 137 int slotneeded; /* size of the entry we're seeking */ 138 int numdirpasses; /* strategy for directory search */ 139 doff_t endsearch; /* offset to end directory search */ 140 doff_t prevoff; /* previous value of ulr_offset */ 141 struct vnode *pdp; /* saved dp during symlink work */ 142 struct vnode *tdp; /* returned by VFS_VGET */ 143 doff_t enduseful; /* pointer past last used dir slot. 144 used for directory truncation. */ 145 u_long bmask; /* block offset mask */ 146 int error; 147 struct vnode **vpp = ap->a_vpp; 148 struct componentname *cnp = ap->a_cnp; 149 kauth_cred_t cred = cnp->cn_cred; 150 int flags; 151 int nameiop = cnp->cn_nameiop; 152 struct ulfsmount *ump = dp->i_ump; 153 const int needswap = ULFS_MPNEEDSWAP(ump); 154 int dirblksiz = ump->um_dirblksiz; 155 ino_t foundino; 156 struct ulfs_lookup_results *results; 157 int iswhiteout; /* temp result from cache_lookup() */ 158 159 flags = cnp->cn_flags; 160 161 bp = NULL; 162 slotoffset = -1; 163 *vpp = NULL; 164 endsearch = 0; /* silence compiler warning */ 165 166 /* 167 * Produce the auxiliary lookup results into i_crap. Increment 168 * its serial number so elsewhere we can tell if we're using 169 * stale results. This should not be done this way. XXX. 170 */ 171 results = &dp->i_crap; 172 dp->i_crapcounter++; 173 174 /* 175 * Check accessiblity of directory. 176 */ 177 if ((error = VOP_ACCESS(vdp, VEXEC, cred)) != 0) 178 return (error); 179 180 if ((flags & ISLASTCN) && (vdp->v_mount->mnt_flag & MNT_RDONLY) && 181 (nameiop == DELETE || nameiop == RENAME)) 182 return (EROFS); 183 184 /* 185 * We now have a segment name to search for, and a directory to search. 186 * 187 * Before tediously performing a linear scan of the directory, 188 * check the name cache to see if the directory/name pair 189 * we are looking for is known already. 190 */ 191 if (cache_lookup(vdp, cnp->cn_nameptr, cnp->cn_namelen, 192 cnp->cn_nameiop, cnp->cn_flags, &iswhiteout, vpp)) { 193 if (iswhiteout) { 194 cnp->cn_flags |= ISWHITEOUT; 195 } 196 return *vpp == NULLVP ? ENOENT : 0; 197 } 198 if (iswhiteout) { 199 /* 200 * The namecache set iswhiteout without finding a 201 * cache entry. As of this writing (20121014), this 202 * can happen if there was a whiteout entry that has 203 * been invalidated by the lookup. It is not clear if 204 * it is correct to set ISWHITEOUT in this case or 205 * not; however, doing so retains the prior behavior, 206 * so we'll go with that until some clearer answer 207 * appears. XXX 208 */ 209 cnp->cn_flags |= ISWHITEOUT; 210 } 211 212 fstrans_start(vdp->v_mount, FSTRANS_SHARED); 213 214 /* 215 * Suppress search for slots unless creating 216 * file and at end of pathname, in which case 217 * we watch for a place to put the new file in 218 * case it doesn't already exist. 219 */ 220 slotstatus = FOUND; 221 slotfreespace = slotsize = slotneeded = 0; 222 if ((nameiop == CREATE || nameiop == RENAME) && (flags & ISLASTCN)) { 223 slotstatus = NONE; 224 slotneeded = LFS_DIRECTSIZ(cnp->cn_namelen); 225 } 226 227 /* 228 * If there is cached information on a previous search of 229 * this directory, pick up where we last left off. 230 * We cache only lookups as these are the most common 231 * and have the greatest payoff. Caching CREATE has little 232 * benefit as it usually must search the entire directory 233 * to determine that the entry does not exist. Caching the 234 * location of the last DELETE or RENAME has not reduced 235 * profiling time and hence has been removed in the interest 236 * of simplicity. 237 */ 238 bmask = vdp->v_mount->mnt_stat.f_iosize - 1; 239 240#ifdef LFS_DIRHASH 241 /* 242 * Use dirhash for fast operations on large directories. The logic 243 * to determine whether to hash the directory is contained within 244 * ulfsdirhash_build(); a zero return means that it decided to hash 245 * this directory and it successfully built up the hash table. 246 */ 247 if (ulfsdirhash_build(dp) == 0) { 248 /* Look for a free slot if needed. */ 249 enduseful = dp->i_size; 250 if (slotstatus != FOUND) { 251 slotoffset = ulfsdirhash_findfree(dp, slotneeded, 252 &slotsize); 253 if (slotoffset >= 0) { 254 slotstatus = COMPACT; 255 enduseful = ulfsdirhash_enduseful(dp); 256 if (enduseful < 0) 257 enduseful = dp->i_size; 258 } 259 } 260 /* Look up the component. */ 261 numdirpasses = 1; 262 entryoffsetinblock = 0; /* silence compiler warning */ 263 switch (ulfsdirhash_lookup(dp, cnp->cn_nameptr, cnp->cn_namelen, 264 &results->ulr_offset, &bp, nameiop == DELETE ? &prevoff : NULL)) { 265 case 0: 266 ep = (struct lfs_direct *)((char *)bp->b_data + 267 (results->ulr_offset & bmask)); 268 goto foundentry; 269 case ENOENT: 270 results->ulr_offset = roundup(dp->i_size, dirblksiz); 271 goto notfound; 272 default: 273 /* Something failed; just do a linear search. */ 274 break; 275 } 276 } 277#endif /* LFS_DIRHASH */ 278 279 if (nameiop != LOOKUP || results->ulr_diroff == 0 || 280 results->ulr_diroff >= dp->i_size) { 281 entryoffsetinblock = 0; 282 results->ulr_offset = 0; 283 numdirpasses = 1; 284 } else { 285 results->ulr_offset = results->ulr_diroff; 286 if ((entryoffsetinblock = results->ulr_offset & bmask) && 287 (error = ulfs_blkatoff(vdp, (off_t)results->ulr_offset, 288 NULL, &bp, false))) 289 goto out; 290 numdirpasses = 2; 291 nchstats.ncs_2passes++; 292 } 293 prevoff = results->ulr_offset; 294 endsearch = roundup(dp->i_size, dirblksiz); 295 enduseful = 0; 296 297searchloop: 298 while (results->ulr_offset < endsearch) { 299 if (curcpu()->ci_schedstate.spc_flags & SPCF_SHOULDYIELD) 300 preempt(); 301 /* 302 * If necessary, get the next directory block. 303 */ 304 if ((results->ulr_offset & bmask) == 0) { 305 if (bp != NULL) 306 brelse(bp, 0); 307 error = ulfs_blkatoff(vdp, (off_t)results->ulr_offset, 308 NULL, &bp, false); 309 if (error) 310 goto out; 311 entryoffsetinblock = 0; 312 } 313 /* 314 * If still looking for a slot, and at a DIRBLKSIZ 315 * boundary, have to start looking for free space again. 316 */ 317 if (slotstatus == NONE && 318 (entryoffsetinblock & (dirblksiz - 1)) == 0) { 319 slotoffset = -1; 320 slotfreespace = 0; 321 } 322 /* 323 * Get pointer to next entry. 324 * Full validation checks are slow, so we only check 325 * enough to insure forward progress through the 326 * directory. Complete checks can be run by patching 327 * "lfs_dirchk" to be true. 328 */ 329 KASSERT(bp != NULL); 330 ep = (struct lfs_direct *)((char *)bp->b_data + entryoffsetinblock); 331 if (ep->d_reclen == 0 || 332 (lfs_dirchk && ulfs_dirbadentry(vdp, ep, entryoffsetinblock))) { 333 int i; 334 335 ulfs_dirbad(dp, results->ulr_offset, "mangled entry"); 336 i = dirblksiz - (entryoffsetinblock & (dirblksiz - 1)); 337 results->ulr_offset += i; 338 entryoffsetinblock += i; 339 continue; 340 } 341 342 /* 343 * If an appropriate sized slot has not yet been found, 344 * check to see if one is available. Also accumulate space 345 * in the current block so that we can determine if 346 * compaction is viable. 347 */ 348 if (slotstatus != FOUND) { 349 int size = ulfs_rw16(ep->d_reclen, needswap); 350 351 if (ep->d_ino != 0) 352 size -= LFS_DIRSIZ(FSFMT(vdp), ep, needswap); 353 if (size > 0) { 354 if (size >= slotneeded) { 355 slotstatus = FOUND; 356 slotoffset = results->ulr_offset; 357 slotsize = ulfs_rw16(ep->d_reclen, 358 needswap); 359 } else if (slotstatus == NONE) { 360 slotfreespace += size; 361 if (slotoffset == -1) 362 slotoffset = results->ulr_offset; 363 if (slotfreespace >= slotneeded) { 364 slotstatus = COMPACT; 365 slotsize = results->ulr_offset + 366 ulfs_rw16(ep->d_reclen, 367 needswap) - 368 slotoffset; 369 } 370 } 371 } 372 } 373 374 /* 375 * Check for a name match. 376 */ 377 if (ep->d_ino) { 378 int namlen; 379 380#if (BYTE_ORDER == LITTLE_ENDIAN) 381 if (FSFMT(vdp) && needswap == 0) 382 namlen = ep->d_type; 383 else 384 namlen = ep->d_namlen; 385#else 386 if (FSFMT(vdp) && needswap != 0) 387 namlen = ep->d_type; 388 else 389 namlen = ep->d_namlen; 390#endif 391 if (namlen == cnp->cn_namelen && 392 !memcmp(cnp->cn_nameptr, ep->d_name, 393 (unsigned)namlen)) { 394#ifdef LFS_DIRHASH 395foundentry: 396#endif 397 /* 398 * Save directory entry's inode number and 399 * reclen, and release directory buffer. 400 */ 401 if (!FSFMT(vdp) && ep->d_type == LFS_DT_WHT) { 402 slotstatus = FOUND; 403 slotoffset = results->ulr_offset; 404 slotsize = ulfs_rw16(ep->d_reclen, 405 needswap); 406 results->ulr_reclen = slotsize; 407 /* 408 * This is used to set 409 * results->ulr_endoff, 410 * which may be used by ulfs_direnter() 411 * as a length to truncate the 412 * directory to. Therefore, it must 413 * point past the end of the last 414 * non-empty directory entry. We don't 415 * know where that is in this case, so 416 * we effectively disable shrinking by 417 * using the existing size of the 418 * directory. 419 * 420 * Note that we wouldn't expect to 421 * shrink the directory while rewriting 422 * an existing entry anyway. 423 */ 424 enduseful = endsearch; 425 cnp->cn_flags |= ISWHITEOUT; 426 numdirpasses--; 427 goto notfound; 428 } 429 foundino = ulfs_rw32(ep->d_ino, needswap); 430 results->ulr_reclen = 431 ulfs_rw16(ep->d_reclen, needswap); 432 goto found; 433 } 434 } 435 prevoff = results->ulr_offset; 436 results->ulr_offset += ulfs_rw16(ep->d_reclen, needswap); 437 entryoffsetinblock += ulfs_rw16(ep->d_reclen, needswap); 438 if (ep->d_ino) 439 enduseful = results->ulr_offset; 440 } 441notfound: 442 /* 443 * If we started in the middle of the directory and failed 444 * to find our target, we must check the beginning as well. 445 */ 446 if (numdirpasses == 2) { 447 numdirpasses--; 448 results->ulr_offset = 0; 449 endsearch = results->ulr_diroff; 450 goto searchloop; 451 } 452 if (bp != NULL) 453 brelse(bp, 0); 454 /* 455 * If creating, and at end of pathname and current 456 * directory has not been removed, then can consider 457 * allowing file to be created. 458 */ 459 if ((nameiop == CREATE || nameiop == RENAME || 460 (nameiop == DELETE && 461 (cnp->cn_flags & DOWHITEOUT) && 462 (cnp->cn_flags & ISWHITEOUT))) && 463 (flags & ISLASTCN) && dp->i_nlink != 0) { 464 /* 465 * Access for write is interpreted as allowing 466 * creation of files in the directory. 467 */ 468 error = VOP_ACCESS(vdp, VWRITE, cred); 469 if (error) 470 goto out; 471 /* 472 * Return an indication of where the new directory 473 * entry should be put. If we didn't find a slot, 474 * then set results->ulr_count to 0 indicating 475 * that the new slot belongs at the end of the 476 * directory. If we found a slot, then the new entry 477 * can be put in the range from results->ulr_offset to 478 * results->ulr_offset + results->ulr_count. 479 */ 480 if (slotstatus == NONE) { 481 results->ulr_offset = roundup(dp->i_size, dirblksiz); 482 results->ulr_count = 0; 483 enduseful = results->ulr_offset; 484 } else if (nameiop == DELETE) { 485 results->ulr_offset = slotoffset; 486 if ((results->ulr_offset & (dirblksiz - 1)) == 0) 487 results->ulr_count = 0; 488 else 489 results->ulr_count = 490 results->ulr_offset - prevoff; 491 } else { 492 results->ulr_offset = slotoffset; 493 results->ulr_count = slotsize; 494 if (enduseful < slotoffset + slotsize) 495 enduseful = slotoffset + slotsize; 496 } 497 results->ulr_endoff = roundup(enduseful, dirblksiz); 498#if 0 /* commented out by dbj. none of the on disk fields changed */ 499 dp->i_flag |= IN_CHANGE | IN_UPDATE; 500#endif 501 /* 502 * We return with the directory locked, so that 503 * the parameters we set up above will still be 504 * valid if we actually decide to do a direnter(). 505 * We return ni_vp == NULL to indicate that the entry 506 * does not currently exist; we leave a pointer to 507 * the (locked) directory inode in ndp->ni_dvp. 508 * 509 * NB - if the directory is unlocked, then this 510 * information cannot be used. 511 */ 512 error = EJUSTRETURN; 513 goto out; 514 } 515 /* 516 * Insert name into cache (as non-existent) if appropriate. 517 */ 518 if (nameiop != CREATE) { 519 cache_enter(vdp, *vpp, cnp->cn_nameptr, cnp->cn_namelen, 520 cnp->cn_flags); 521 } 522 error = ENOENT; 523 goto out; 524 525found: 526 if (numdirpasses == 2) 527 nchstats.ncs_pass2++; 528 /* 529 * Check that directory length properly reflects presence 530 * of this entry. 531 */ 532 if (results->ulr_offset + LFS_DIRSIZ(FSFMT(vdp), ep, needswap) > dp->i_size) { 533 ulfs_dirbad(dp, results->ulr_offset, "i_size too small"); 534 dp->i_size = 535 results->ulr_offset + LFS_DIRSIZ(FSFMT(vdp), ep, needswap); 536 DIP_ASSIGN(dp, size, dp->i_size); 537 dp->i_flag |= IN_CHANGE | IN_UPDATE; 538 } 539 brelse(bp, 0); 540 541 /* 542 * Found component in pathname. 543 * If the final component of path name, save information 544 * in the cache as to where the entry was found. 545 */ 546 if ((flags & ISLASTCN) && nameiop == LOOKUP) 547 results->ulr_diroff = results->ulr_offset &~ (dirblksiz - 1); 548 549 /* 550 * If deleting, and at end of pathname, return 551 * parameters which can be used to remove file. 552 * Lock the inode, being careful with ".". 553 */ 554 if (nameiop == DELETE && (flags & ISLASTCN)) { 555 /* 556 * Return pointer to current entry in results->ulr_offset, 557 * and distance past previous entry (if there 558 * is a previous entry in this block) in results->ulr_count. 559 * Save directory inode pointer in ndp->ni_dvp for dirremove(). 560 */ 561 if ((results->ulr_offset & (dirblksiz - 1)) == 0) 562 results->ulr_count = 0; 563 else 564 results->ulr_count = results->ulr_offset - prevoff; 565 if (dp->i_number == foundino) { 566 vref(vdp); 567 tdp = vdp; 568 } else { 569 if (flags & ISDOTDOT) 570 VOP_UNLOCK(vdp); /* race to get the inode */ 571 error = VFS_VGET(vdp->v_mount, foundino, &tdp); 572 if (flags & ISDOTDOT) 573 vn_lock(vdp, LK_EXCLUSIVE | LK_RETRY); 574 if (error) 575 goto out; 576 } 577 /* 578 * Write access to directory required to delete files. 579 */ 580 error = VOP_ACCESS(vdp, VWRITE, cred); 581 if (error) { 582 if (dp->i_number == foundino) 583 vrele(tdp); 584 else 585 vput(tdp); 586 goto out; 587 } 588 /* 589 * If directory is "sticky", then user must own 590 * the directory, or the file in it, else she 591 * may not delete it (unless she's root). This 592 * implements append-only directories. 593 */ 594 if (dp->i_mode & ISVTX) { 595 error = kauth_authorize_vnode(cred, KAUTH_VNODE_DELETE, 596 tdp, vdp, genfs_can_sticky(cred, dp->i_uid, 597 VTOI(tdp)->i_uid)); 598 if (error) { 599 if (dp->i_number == foundino) 600 vrele(tdp); 601 else 602 vput(tdp); 603 error = EPERM; 604 goto out; 605 } 606 } 607 *vpp = tdp; 608 error = 0; 609 goto out; 610 } 611 612 /* 613 * If rewriting (RENAME), return the inode and the 614 * information required to rewrite the present directory 615 * Must get inode of directory entry to verify it's a 616 * regular file, or empty directory. 617 */ 618 if (nameiop == RENAME && (flags & ISLASTCN)) { 619 error = VOP_ACCESS(vdp, VWRITE, cred); 620 if (error) 621 goto out; 622 /* 623 * Careful about locking second inode. 624 * This can only occur if the target is ".". 625 */ 626 if (dp->i_number == foundino) { 627 error = EISDIR; 628 goto out; 629 } 630 if (flags & ISDOTDOT) 631 VOP_UNLOCK(vdp); /* race to get the inode */ 632 error = VFS_VGET(vdp->v_mount, foundino, &tdp); 633 if (flags & ISDOTDOT) 634 vn_lock(vdp, LK_EXCLUSIVE | LK_RETRY); 635 if (error) 636 goto out; 637 *vpp = tdp; 638 error = 0; 639 goto out; 640 } 641 642 /* 643 * Step through the translation in the name. We do not `vput' the 644 * directory because we may need it again if a symbolic link 645 * is relative to the current directory. Instead we save it 646 * unlocked as "pdp". We must get the target inode before unlocking 647 * the directory to insure that the inode will not be removed 648 * before we get it. We prevent deadlock by always fetching 649 * inodes from the root, moving down the directory tree. Thus 650 * when following backward pointers ".." we must unlock the 651 * parent directory before getting the requested directory. 652 * There is a potential race condition here if both the current 653 * and parent directories are removed before the VFS_VGET for the 654 * inode associated with ".." returns. We hope that this occurs 655 * infrequently since we cannot avoid this race condition without 656 * implementing a sophisticated deadlock detection algorithm. 657 * Note also that this simple deadlock detection scheme will not 658 * work if the file system has any hard links other than ".." 659 * that point backwards in the directory structure. 660 */ 661 pdp = vdp; 662 if (flags & ISDOTDOT) { 663 VOP_UNLOCK(pdp); /* race to get the inode */ 664 error = VFS_VGET(vdp->v_mount, foundino, &tdp); 665 vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY); 666 if (error) { 667 goto out; 668 } 669 *vpp = tdp; 670 } else if (dp->i_number == foundino) { 671 vref(vdp); /* we want ourself, ie "." */ 672 *vpp = vdp; 673 } else { 674 error = VFS_VGET(vdp->v_mount, foundino, &tdp); 675 if (error) 676 goto out; 677 *vpp = tdp; 678 } 679 680 /* 681 * Insert name into cache if appropriate. 682 */ 683 cache_enter(vdp, *vpp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_flags); 684 error = 0; 685 686out: 687 fstrans_done(vdp->v_mount); 688 return error; 689} 690 691void 692ulfs_dirbad(struct inode *ip, doff_t offset, const char *how) 693{ 694 struct mount *mp; 695 696 mp = ITOV(ip)->v_mount; 697 printf("%s: bad dir ino %llu at offset %d: %s\n", 698 mp->mnt_stat.f_mntonname, (unsigned long long)ip->i_number, 699 offset, how); 700 if ((mp->mnt_stat.f_flag & MNT_RDONLY) == 0) 701 panic("bad dir"); 702} 703 704/* 705 * Do consistency checking on a directory entry: 706 * record length must be multiple of 4 707 * entry must fit in rest of its DIRBLKSIZ block 708 * record must be large enough to contain entry 709 * name is not longer than LFS_MAXNAMLEN 710 * name must be as long as advertised, and null terminated 711 */ 712int 713ulfs_dirbadentry(struct vnode *dp, struct lfs_direct *ep, int entryoffsetinblock) 714{ 715 int i; 716 int namlen; 717 struct ulfsmount *ump = VFSTOULFS(dp->v_mount); 718 const int needswap = ULFS_MPNEEDSWAP(ump); 719 int dirblksiz = ump->um_dirblksiz; 720 721#if (BYTE_ORDER == LITTLE_ENDIAN) 722 if (FSFMT(dp) && needswap == 0) 723 namlen = ep->d_type; 724 else 725 namlen = ep->d_namlen; 726#else 727 if (FSFMT(dp) && needswap != 0) 728 namlen = ep->d_type; 729 else 730 namlen = ep->d_namlen; 731#endif 732 if ((ulfs_rw16(ep->d_reclen, needswap) & 0x3) != 0 || 733 ulfs_rw16(ep->d_reclen, needswap) > 734 dirblksiz - (entryoffsetinblock & (dirblksiz - 1)) || 735 ulfs_rw16(ep->d_reclen, needswap) < 736 LFS_DIRSIZ(FSFMT(dp), ep, needswap) || 737 namlen > LFS_MAXNAMLEN) { 738 /*return (1); */ 739 printf("First bad, reclen=%#x, DIRSIZ=%lu, namlen=%d, " 740 "flags=%#x, entryoffsetinblock=%d, dirblksiz = %d\n", 741 ulfs_rw16(ep->d_reclen, needswap), 742 (u_long)LFS_DIRSIZ(FSFMT(dp), ep, needswap), 743 namlen, dp->v_mount->mnt_flag, entryoffsetinblock, 744 dirblksiz); 745 goto bad; 746 } 747 if (ep->d_ino == 0) 748 return (0); 749 for (i = 0; i < namlen; i++) 750 if (ep->d_name[i] == '\0') { 751 /*return (1); */ 752 printf("Second bad\n"); 753 goto bad; 754 } 755 if (ep->d_name[i]) 756 goto bad; 757 return (0); 758bad: 759 return (1); 760} 761 762/* 763 * Construct a new directory entry after a call to namei, using the 764 * name in the componentname argument cnp. The argument ip is the 765 * inode to which the new directory entry will refer. 766 */ 767void 768ulfs_makedirentry(struct inode *ip, struct componentname *cnp, 769 struct lfs_direct *newdirp) 770{ 771 newdirp->d_ino = ip->i_number; 772 newdirp->d_namlen = cnp->cn_namelen; 773 memcpy(newdirp->d_name, cnp->cn_nameptr, (size_t)cnp->cn_namelen); 774 newdirp->d_name[cnp->cn_namelen] = '\0'; 775 if (FSFMT(ITOV(ip))) 776 newdirp->d_type = 0; 777 else 778 newdirp->d_type = LFS_IFTODT(ip->i_mode); 779} 780 781/* 782 * Write a directory entry after a call to namei, using the parameters 783 * that ulfs_lookup left in nameidata and in the ulfs_lookup_results. 784 * 785 * DVP is the directory to be updated. It must be locked. 786 * ULR is the ulfs_lookup_results structure from the final lookup step. 787 * TVP is not used. (XXX: why is it here? remove it) 788 * DIRP is the new directory entry contents. 789 * CNP is the componentname from the final lookup step. 790 * NEWDIRBP is not used and (XXX) should be removed. The previous 791 * comment here said it was used by the now-removed softupdates code. 792 * 793 * The link count of the target inode is *not* incremented; the 794 * caller does that. 795 * 796 * If ulr->ulr_count is 0, ulfs_lookup did not find space to insert the 797 * directory entry. ulr_offset, which is the place to put the entry, 798 * should be on a block boundary (and should be at the end of the 799 * directory AFAIK) and a fresh block is allocated to put the new 800 * directory entry in. 801 * 802 * If ulr->ulr_count is not zero, ulfs_lookup found a slot to insert 803 * the entry into. This slot ranges from ulr_offset to ulr_offset + 804 * ulr_count. However, this slot may already be partially populated 805 * requiring compaction. See notes below. 806 * 807 * Furthermore, if ulr_count is not zero and ulr_endoff is not the 808 * same as i_size, the directory is truncated to size ulr_endoff. 809 */ 810int 811ulfs_direnter(struct vnode *dvp, const struct ulfs_lookup_results *ulr, 812 struct vnode *tvp, struct lfs_direct *dirp, 813 struct componentname *cnp, struct buf *newdirbp) 814{ 815 kauth_cred_t cr; 816 struct lwp *l; 817 int newentrysize; 818 struct inode *dp; 819 struct buf *bp; 820 u_int dsize; 821 struct lfs_direct *ep, *nep; 822 int error, ret, lfs_blkoff, loc, spacefree; 823 char *dirbuf; 824 struct timespec ts; 825 struct ulfsmount *ump = VFSTOULFS(dvp->v_mount); 826 const int needswap = ULFS_MPNEEDSWAP(ump); 827 int dirblksiz = ump->um_dirblksiz; 828 829 error = 0; 830 cr = cnp->cn_cred; 831 l = curlwp; 832 833 dp = VTOI(dvp); 834 newentrysize = LFS_DIRSIZ(0, dirp, 0); 835 836 if (ulr->ulr_count == 0) { 837 /* 838 * If ulr_count is 0, then namei could find no 839 * space in the directory. Here, ulr_offset will 840 * be on a directory block boundary and we will write the 841 * new entry into a fresh block. 842 */ 843 if (ulr->ulr_offset & (dirblksiz - 1)) 844 panic("ulfs_direnter: newblk"); 845 if ((error = ULFS_BALLOC(dvp, (off_t)ulr->ulr_offset, dirblksiz, 846 cr, B_CLRBUF | B_SYNC, &bp)) != 0) { 847 return (error); 848 } 849 dp->i_size = ulr->ulr_offset + dirblksiz; 850 DIP_ASSIGN(dp, size, dp->i_size); 851 dp->i_flag |= IN_CHANGE | IN_UPDATE; 852 uvm_vnp_setsize(dvp, dp->i_size); 853 dirp->d_reclen = ulfs_rw16(dirblksiz, needswap); 854 dirp->d_ino = ulfs_rw32(dirp->d_ino, needswap); 855 if (FSFMT(dvp)) { 856#if (BYTE_ORDER == LITTLE_ENDIAN) 857 if (needswap == 0) { 858#else 859 if (needswap != 0) { 860#endif 861 u_char tmp = dirp->d_namlen; 862 dirp->d_namlen = dirp->d_type; 863 dirp->d_type = tmp; 864 } 865 } 866 lfs_blkoff = ulr->ulr_offset & (ump->um_mountp->mnt_stat.f_iosize - 1); 867 memcpy((char *)bp->b_data + lfs_blkoff, dirp, newentrysize); 868#ifdef LFS_DIRHASH 869 if (dp->i_dirhash != NULL) { 870 ulfsdirhash_newblk(dp, ulr->ulr_offset); 871 ulfsdirhash_add(dp, dirp, ulr->ulr_offset); 872 ulfsdirhash_checkblock(dp, (char *)bp->b_data + lfs_blkoff, 873 ulr->ulr_offset); 874 } 875#endif 876 error = VOP_BWRITE(bp->b_vp, bp); 877 vfs_timestamp(&ts); 878 ret = ULFS_UPDATE(dvp, &ts, &ts, UPDATE_DIROP); 879 if (error == 0) 880 return (ret); 881 return (error); 882 } 883 884 /* 885 * If ulr_count is non-zero, then namei found space for the new 886 * entry in the range ulr_offset to ulr_offset + ulr_count 887 * in the directory. To use this space, we may have to compact 888 * the entries located there, by copying them together towards the 889 * beginning of the block, leaving the free space in one usable 890 * chunk at the end. 891 */ 892 893 /* 894 * Increase size of directory if entry eats into new space. 895 * This should never push the size past a new multiple of 896 * DIRBLKSIZ. 897 * 898 * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN. 899 */ 900 if (ulr->ulr_offset + ulr->ulr_count > dp->i_size) { 901#ifdef DIAGNOSTIC 902 printf("ulfs_direnter: reached 4.2-only block, " 903 "not supposed to happen\n"); 904#endif 905 dp->i_size = ulr->ulr_offset + ulr->ulr_count; 906 DIP_ASSIGN(dp, size, dp->i_size); 907 dp->i_flag |= IN_CHANGE | IN_UPDATE; 908 } 909 /* 910 * Get the block containing the space for the new directory entry. 911 */ 912 error = ulfs_blkatoff(dvp, (off_t)ulr->ulr_offset, &dirbuf, &bp, true); 913 if (error) { 914 return (error); 915 } 916 /* 917 * Find space for the new entry. In the simple case, the entry at 918 * offset base will have the space. If it does not, then namei 919 * arranged that compacting the region ulr_offset to 920 * ulr_offset + ulr_count would yield the space. 921 */ 922 ep = (struct lfs_direct *)dirbuf; 923 dsize = (ep->d_ino != 0) ? LFS_DIRSIZ(FSFMT(dvp), ep, needswap) : 0; 924 spacefree = ulfs_rw16(ep->d_reclen, needswap) - dsize; 925 for (loc = ulfs_rw16(ep->d_reclen, needswap); loc < ulr->ulr_count; ) { 926 uint16_t reclen; 927 928 nep = (struct lfs_direct *)(dirbuf + loc); 929 930 /* Trim the existing slot (NB: dsize may be zero). */ 931 ep->d_reclen = ulfs_rw16(dsize, needswap); 932 ep = (struct lfs_direct *)((char *)ep + dsize); 933 934 reclen = ulfs_rw16(nep->d_reclen, needswap); 935 loc += reclen; 936 if (nep->d_ino == 0) { 937 /* 938 * A mid-block unused entry. Such entries are 939 * never created by the kernel, but fsck_ffs 940 * can create them (and it doesn't fix them). 941 * 942 * Add up the free space, and initialise the 943 * relocated entry since we don't memcpy it. 944 */ 945 spacefree += reclen; 946 ep->d_ino = 0; 947 dsize = 0; 948 continue; 949 } 950 dsize = LFS_DIRSIZ(FSFMT(dvp), nep, needswap); 951 spacefree += reclen - dsize; 952#ifdef LFS_DIRHASH 953 if (dp->i_dirhash != NULL) 954 ulfsdirhash_move(dp, nep, 955 ulr->ulr_offset + ((char *)nep - dirbuf), 956 ulr->ulr_offset + ((char *)ep - dirbuf)); 957#endif 958 memcpy((void *)ep, (void *)nep, dsize); 959 } 960 /* 961 * Here, `ep' points to a directory entry containing `dsize' in-use 962 * bytes followed by `spacefree' unused bytes. If ep->d_ino == 0, 963 * then the entry is completely unused (dsize == 0). The value 964 * of ep->d_reclen is always indeterminate. 965 * 966 * Update the pointer fields in the previous entry (if any), 967 * copy in the new entry, and write out the block. 968 */ 969 if (ep->d_ino == 0 || 970 (ulfs_rw32(ep->d_ino, needswap) == ULFS_WINO && 971 memcmp(ep->d_name, dirp->d_name, dirp->d_namlen) == 0)) { 972 if (spacefree + dsize < newentrysize) 973 panic("ulfs_direnter: compact1"); 974 dirp->d_reclen = spacefree + dsize; 975 } else { 976 if (spacefree < newentrysize) 977 panic("ulfs_direnter: compact2"); 978 dirp->d_reclen = spacefree; 979 ep->d_reclen = ulfs_rw16(dsize, needswap); 980 ep = (struct lfs_direct *)((char *)ep + dsize); 981 } 982 dirp->d_reclen = ulfs_rw16(dirp->d_reclen, needswap); 983 dirp->d_ino = ulfs_rw32(dirp->d_ino, needswap); 984 if (FSFMT(dvp)) { 985#if (BYTE_ORDER == LITTLE_ENDIAN) 986 if (needswap == 0) { 987#else 988 if (needswap != 0) { 989#endif 990 u_char tmp = dirp->d_namlen; 991 dirp->d_namlen = dirp->d_type; 992 dirp->d_type = tmp; 993 } 994 } 995#ifdef LFS_DIRHASH 996 if (dp->i_dirhash != NULL && (ep->d_ino == 0 || 997 dirp->d_reclen == spacefree)) 998 ulfsdirhash_add(dp, dirp, ulr->ulr_offset + ((char *)ep - dirbuf)); 999#endif 1000 memcpy((void *)ep, (void *)dirp, (u_int)newentrysize); 1001#ifdef LFS_DIRHASH 1002 if (dp->i_dirhash != NULL) 1003 ulfsdirhash_checkblock(dp, dirbuf - 1004 (ulr->ulr_offset & (dirblksiz - 1)), 1005 ulr->ulr_offset & ~(dirblksiz - 1)); 1006#endif 1007 error = VOP_BWRITE(bp->b_vp, bp); 1008 dp->i_flag |= IN_CHANGE | IN_UPDATE; 1009 /* 1010 * If all went well, and the directory can be shortened, proceed 1011 * with the truncation. Note that we have to unlock the inode for 1012 * the entry that we just entered, as the truncation may need to 1013 * lock other inodes which can lead to deadlock if we also hold a 1014 * lock on the newly entered node. 1015 */ 1016 if (error == 0 && ulr->ulr_endoff && ulr->ulr_endoff < dp->i_size) { 1017#ifdef LFS_DIRHASH 1018 if (dp->i_dirhash != NULL) 1019 ulfsdirhash_dirtrunc(dp, ulr->ulr_endoff); 1020#endif 1021 (void) ULFS_TRUNCATE(dvp, (off_t)ulr->ulr_endoff, IO_SYNC, cr); 1022 } 1023 return (error); 1024} 1025 1026/* 1027 * Remove a directory entry after a call to namei, using the 1028 * parameters that ulfs_lookup left in nameidata and in the 1029 * ulfs_lookup_results. 1030 * 1031 * DVP is the directory to be updated. It must be locked. 1032 * ULR is the ulfs_lookup_results structure from the final lookup step. 1033 * IP, if not null, is the inode being unlinked. 1034 * FLAGS may contain DOWHITEOUT. 1035 * ISRMDIR is not used and (XXX) should be removed. 1036 * 1037 * If FLAGS contains DOWHITEOUT the entry is replaced with a whiteout 1038 * instead of being cleared. 1039 * 1040 * ulr->ulr_offset contains the position of the directory entry 1041 * to be removed. 1042 * 1043 * ulr->ulr_reclen contains the size of the directory entry to be 1044 * removed. 1045 * 1046 * ulr->ulr_count contains the size of the *previous* directory 1047 * entry. This allows finding it, for free space management. If 1048 * ulr_count is 0, the target entry is at the beginning of the 1049 * directory. (Does this ever happen? The first entry should be ".", 1050 * which should only be removed at rmdir time. Does rmdir come here 1051 * to clear out the "." and ".." entries? Perhaps, but I doubt it.) 1052 * 1053 * The space is marked free by adding it to the record length (not 1054 * name length) of the preceding entry. If the first entry becomes 1055 * free, it is marked free by setting the inode number to 0. 1056 * 1057 * The link count of IP is decremented. Note that this is not the 1058 * inverse behavior of ulfs_direnter, which does not adjust link 1059 * counts. Sigh. 1060 */ 1061int 1062ulfs_dirremove(struct vnode *dvp, const struct ulfs_lookup_results *ulr, 1063 struct inode *ip, int flags, int isrmdir) 1064{ 1065 struct inode *dp = VTOI(dvp); 1066 struct lfs_direct *ep; 1067 struct buf *bp; 1068 int error; 1069#ifdef LFS_EI 1070 const int needswap = ULFS_MPNEEDSWAP(dp->i_ump); 1071#endif 1072 1073 if (flags & DOWHITEOUT) { 1074 /* 1075 * Whiteout entry: set d_ino to ULFS_WINO. 1076 */ 1077 error = ulfs_blkatoff(dvp, (off_t)ulr->ulr_offset, (void *)&ep, 1078 &bp, true); 1079 if (error) 1080 return (error); 1081 ep->d_ino = ulfs_rw32(ULFS_WINO, needswap); 1082 ep->d_type = LFS_DT_WHT; 1083 goto out; 1084 } 1085 1086 if ((error = ulfs_blkatoff(dvp, 1087 (off_t)(ulr->ulr_offset - ulr->ulr_count), (void *)&ep, &bp, true)) != 0) 1088 return (error); 1089 1090#ifdef LFS_DIRHASH 1091 /* 1092 * Remove the dirhash entry. This is complicated by the fact 1093 * that `ep' is the previous entry when ulr_count != 0. 1094 */ 1095 if (dp->i_dirhash != NULL) 1096 ulfsdirhash_remove(dp, (ulr->ulr_count == 0) ? ep : 1097 (struct lfs_direct *)((char *)ep + 1098 ulfs_rw16(ep->d_reclen, needswap)), ulr->ulr_offset); 1099#endif 1100 1101 if (ulr->ulr_count == 0) { 1102 /* 1103 * First entry in block: set d_ino to zero. 1104 */ 1105 ep->d_ino = 0; 1106 } else { 1107 /* 1108 * Collapse new free space into previous entry. 1109 */ 1110 ep->d_reclen = 1111 ulfs_rw16(ulfs_rw16(ep->d_reclen, needswap) + ulr->ulr_reclen, 1112 needswap); 1113 } 1114 1115#ifdef LFS_DIRHASH 1116 if (dp->i_dirhash != NULL) { 1117 int dirblksiz = ip->i_ump->um_dirblksiz; 1118 ulfsdirhash_checkblock(dp, (char *)ep - 1119 ((ulr->ulr_offset - ulr->ulr_count) & (dirblksiz - 1)), 1120 ulr->ulr_offset & ~(dirblksiz - 1)); 1121 } 1122#endif 1123 1124out: 1125 if (ip) { 1126 ip->i_nlink--; 1127 DIP_ASSIGN(ip, nlink, ip->i_nlink); 1128 ip->i_flag |= IN_CHANGE; 1129 } 1130 /* 1131 * XXX did it ever occur to anyone that it might be a good 1132 * idea to restore ip->i_nlink if this fails? Or something? 1133 * Currently on error return from this function the state of 1134 * ip->i_nlink depends on what happened, and callers 1135 * definitely do not take this into account. 1136 */ 1137 error = VOP_BWRITE(bp->b_vp, bp); 1138 dp->i_flag |= IN_CHANGE | IN_UPDATE; 1139 /* 1140 * If the last named reference to a snapshot goes away, 1141 * drop its snapshot reference so that it will be reclaimed 1142 * when last open reference goes away. 1143 */ 1144 if (ip != 0 && (ip->i_flags & SF_SNAPSHOT) != 0 && 1145 ip->i_nlink == 0) 1146 ulfs_snapgone(ip); 1147 return (error); 1148} 1149 1150/* 1151 * Rewrite an existing directory entry to point at the inode supplied. 1152 * 1153 * DP is the directory to update. 1154 * OFFSET is the position of the entry in question. It may come 1155 * from ulr_offset of a ulfs_lookup_results. 1156 * OIP is the old inode the directory previously pointed to. 1157 * NEWINUM is the number of the new inode. 1158 * NEWTYPE is the new value for the type field of the directory entry. 1159 * (This is ignored if the fs doesn't support that.) 1160 * ISRMDIR is not used and (XXX) should be removed. 1161 * IFLAGS are added to DP's inode flags. 1162 * 1163 * The link count of OIP is decremented. Note that the link count of 1164 * the new inode is *not* incremented. Yay for symmetry. 1165 */ 1166int 1167ulfs_dirrewrite(struct inode *dp, off_t offset, 1168 struct inode *oip, ino_t newinum, int newtype, 1169 int isrmdir, int iflags) 1170{ 1171 struct buf *bp; 1172 struct lfs_direct *ep; 1173 struct vnode *vdp = ITOV(dp); 1174 int error; 1175 1176 error = ulfs_blkatoff(vdp, offset, (void *)&ep, &bp, true); 1177 if (error) 1178 return (error); 1179 ep->d_ino = ulfs_rw32(newinum, ULFS_MPNEEDSWAP(dp->i_ump)); 1180 if (!FSFMT(vdp)) 1181 ep->d_type = newtype; 1182 oip->i_nlink--; 1183 DIP_ASSIGN(oip, nlink, oip->i_nlink); 1184 oip->i_flag |= IN_CHANGE; 1185 error = VOP_BWRITE(bp->b_vp, bp); 1186 dp->i_flag |= iflags; 1187 /* 1188 * If the last named reference to a snapshot goes away, 1189 * drop its snapshot reference so that it will be reclaimed 1190 * when last open reference goes away. 1191 */ 1192 if ((oip->i_flags & SF_SNAPSHOT) != 0 && oip->i_nlink == 0) 1193 ulfs_snapgone(oip); 1194 return (error); 1195} 1196 1197/* 1198 * Check if a directory is empty or not. 1199 * Inode supplied must be locked. 1200 * 1201 * Using a struct lfs_dirtemplate here is not precisely 1202 * what we want, but better than using a struct lfs_direct. 1203 * 1204 * NB: does not handle corrupted directories. 1205 */ 1206int 1207ulfs_dirempty(struct inode *ip, ino_t parentino, kauth_cred_t cred) 1208{ 1209 doff_t off; 1210 struct lfs_dirtemplate dbuf; 1211 struct lfs_direct *dp = (struct lfs_direct *)&dbuf; 1212 int error, namlen; 1213 size_t count; 1214 const int needswap = ULFS_IPNEEDSWAP(ip); 1215#define MINDIRSIZ (sizeof (struct lfs_dirtemplate) / 2) 1216 1217 for (off = 0; off < ip->i_size; 1218 off += ulfs_rw16(dp->d_reclen, needswap)) { 1219 error = vn_rdwr(UIO_READ, ITOV(ip), (void *)dp, MINDIRSIZ, off, 1220 UIO_SYSSPACE, IO_NODELOCKED, cred, &count, NULL); 1221 /* 1222 * Since we read MINDIRSIZ, residual must 1223 * be 0 unless we're at end of file. 1224 */ 1225 if (error || count != 0) 1226 return (0); 1227 /* avoid infinite loops */ 1228 if (dp->d_reclen == 0) 1229 return (0); 1230 /* skip empty entries */ 1231 if (dp->d_ino == 0 || ulfs_rw32(dp->d_ino, needswap) == ULFS_WINO) 1232 continue; 1233 /* accept only "." and ".." */ 1234#if (BYTE_ORDER == LITTLE_ENDIAN) 1235 if (FSFMT(ITOV(ip)) && needswap == 0) 1236 namlen = dp->d_type; 1237 else 1238 namlen = dp->d_namlen; 1239#else 1240 if (FSFMT(ITOV(ip)) && needswap != 0) 1241 namlen = dp->d_type; 1242 else 1243 namlen = dp->d_namlen; 1244#endif 1245 if (namlen > 2) 1246 return (0); 1247 if (dp->d_name[0] != '.') 1248 return (0); 1249 /* 1250 * At this point namlen must be 1 or 2. 1251 * 1 implies ".", 2 implies ".." if second 1252 * char is also "." 1253 */ 1254 if (namlen == 1 && 1255 ulfs_rw32(dp->d_ino, needswap) == ip->i_number) 1256 continue; 1257 if (dp->d_name[1] == '.' && 1258 ulfs_rw32(dp->d_ino, needswap) == parentino) 1259 continue; 1260 return (0); 1261 } 1262 return (1); 1263} 1264 1265/* 1266 * Check if source directory is in the path of the target directory. 1267 * Target is supplied locked, source is unlocked. 1268 * The target is always vput before returning. 1269 */ 1270int 1271ulfs_checkpath(struct inode *source, struct inode *target, kauth_cred_t cred) 1272{ 1273 struct vnode *nextvp, *vp; 1274 int error, rootino, namlen; 1275 struct lfs_dirtemplate dirbuf; 1276 const int needswap = ULFS_MPNEEDSWAP(target->i_ump); 1277 1278 vp = ITOV(target); 1279 if (target->i_number == source->i_number) { 1280 error = EEXIST; 1281 goto out; 1282 } 1283 rootino = ULFS_ROOTINO; 1284 error = 0; 1285 if (target->i_number == rootino) 1286 goto out; 1287 1288 for (;;) { 1289 if (vp->v_type != VDIR) { 1290 error = ENOTDIR; 1291 break; 1292 } 1293 error = vn_rdwr(UIO_READ, vp, (void *)&dirbuf, 1294 sizeof (struct lfs_dirtemplate), (off_t)0, UIO_SYSSPACE, 1295 IO_NODELOCKED, cred, NULL, NULL); 1296 if (error != 0) 1297 break; 1298#if (BYTE_ORDER == LITTLE_ENDIAN) 1299 if (FSFMT(vp) && needswap == 0) 1300 namlen = dirbuf.dotdot_type; 1301 else 1302 namlen = dirbuf.dotdot_namlen; 1303#else 1304 if (FSFMT(vp) && needswap != 0) 1305 namlen = dirbuf.dotdot_type; 1306 else 1307 namlen = dirbuf.dotdot_namlen; 1308#endif 1309 if (namlen != 2 || 1310 dirbuf.dotdot_name[0] != '.' || 1311 dirbuf.dotdot_name[1] != '.') { 1312 error = ENOTDIR; 1313 break; 1314 } 1315 if (ulfs_rw32(dirbuf.dotdot_ino, needswap) == source->i_number) { 1316 error = EINVAL; 1317 break; 1318 } 1319 if (ulfs_rw32(dirbuf.dotdot_ino, needswap) == rootino) 1320 break; 1321 VOP_UNLOCK(vp); 1322 error = VFS_VGET(vp->v_mount, 1323 ulfs_rw32(dirbuf.dotdot_ino, needswap), &nextvp); 1324 vrele(vp); 1325 if (error) { 1326 vp = NULL; 1327 break; 1328 } 1329 vp = nextvp; 1330 } 1331 1332out: 1333 if (error == ENOTDIR) 1334 printf("checkpath: .. not a directory\n"); 1335 if (vp != NULL) 1336 vput(vp); 1337 return (error); 1338} 1339 1340/* 1341 * Extract the inode number of ".." from a directory. 1342 * Helper for ulfs_parentcheck. 1343 */ 1344static int 1345ulfs_readdotdot(struct vnode *vp, int needswap, kauth_cred_t cred, ino_t *result) 1346{ 1347 struct lfs_dirtemplate dirbuf; 1348 int namlen, error; 1349 1350 error = vn_rdwr(UIO_READ, vp, &dirbuf, 1351 sizeof (struct lfs_dirtemplate), (off_t)0, UIO_SYSSPACE, 1352 IO_NODELOCKED, cred, NULL, NULL); 1353 if (error) { 1354 return error; 1355 } 1356 1357#if (BYTE_ORDER == LITTLE_ENDIAN) 1358 if (FSFMT(vp) && needswap == 0) 1359 namlen = dirbuf.dotdot_type; 1360 else 1361 namlen = dirbuf.dotdot_namlen; 1362#else 1363 if (FSFMT(vp) && needswap != 0) 1364 namlen = dirbuf.dotdot_type; 1365 else 1366 namlen = dirbuf.dotdot_namlen; 1367#endif 1368 if (namlen != 2 || 1369 dirbuf.dotdot_name[0] != '.' || 1370 dirbuf.dotdot_name[1] != '.') { 1371 printf("ulfs_readdotdot: directory %llu contains " 1372 "garbage instead of ..\n", 1373 (unsigned long long) VTOI(vp)->i_number); 1374 return ENOTDIR; 1375 } 1376 *result = ulfs_rw32(dirbuf.dotdot_ino, needswap); 1377 return 0; 1378} 1379 1380/* 1381 * Check if LOWER is a descendent of UPPER. If we find UPPER, return 1382 * nonzero in FOUND and return a reference to the immediate descendent 1383 * of UPPER in UPPERCHILD. If we don't find UPPER (that is, if we 1384 * reach the volume root and that isn't UPPER), return zero in FOUND 1385 * and null in UPPERCHILD. 1386 * 1387 * Neither UPPER nor LOWER should be locked. 1388 * 1389 * On error (such as a permissions error checking up the directory 1390 * tree) fail entirely. 1391 * 1392 * Note that UPPER and LOWER must be on the same volume, and because 1393 * we inspect only that volume NEEDSWAP can be constant. 1394 */ 1395int 1396ulfs_parentcheck(struct vnode *upper, struct vnode *lower, kauth_cred_t cred, 1397 int *found_ret, struct vnode **upperchild_ret) 1398{ 1399 const int needswap = ULFS_MPNEEDSWAP(VTOI(lower)->i_ump); 1400 ino_t upper_ino, found_ino; 1401 struct vnode *current, *next; 1402 int error; 1403 1404 if (upper == lower) { 1405 vref(upper); 1406 *found_ret = 1; 1407 *upperchild_ret = upper; 1408 return 0; 1409 } 1410 if (VTOI(lower)->i_number == ULFS_ROOTINO) { 1411 *found_ret = 0; 1412 *upperchild_ret = NULL; 1413 return 0; 1414 } 1415 1416 upper_ino = VTOI(upper)->i_number; 1417 1418 current = lower; 1419 vref(current); 1420 vn_lock(current, LK_EXCLUSIVE | LK_RETRY); 1421 1422 for (;;) { 1423 error = ulfs_readdotdot(current, needswap, cred, &found_ino); 1424 if (error) { 1425 vput(current); 1426 return error; 1427 } 1428 if (found_ino == upper_ino) { 1429 VOP_UNLOCK(current); 1430 *found_ret = 1; 1431 *upperchild_ret = current; 1432 return 0; 1433 } 1434 if (found_ino == ULFS_ROOTINO) { 1435 vput(current); 1436 *found_ret = 0; 1437 *upperchild_ret = NULL; 1438 return 0; 1439 } 1440 VOP_UNLOCK(current); 1441 error = VFS_VGET(current->v_mount, found_ino, &next); 1442 if (error) { 1443 vrele(current); 1444 return error; 1445 } 1446 KASSERT(VOP_ISLOCKED(next)); 1447 if (next->v_type != VDIR) { 1448 printf("ulfs_parentcheck: inode %llu reached via .. of " 1449 "inode %llu is not a directory\n", 1450 (unsigned long long)VTOI(next)->i_number, 1451 (unsigned long long)VTOI(current)->i_number); 1452 vput(next); 1453 vrele(current); 1454 return ENOTDIR; 1455 } 1456 vrele(current); 1457 current = next; 1458 } 1459 1460 return 0; 1461} 1462 1463#define ULFS_DIRRABLKS 0 1464int ulfs_dirrablks = ULFS_DIRRABLKS; 1465 1466/* 1467 * ulfs_blkatoff: Return buffer with the contents of block "offset" from 1468 * the beginning of directory "vp". If "res" is non-NULL, fill it in with 1469 * a pointer to the remaining space in the directory. If the caller intends 1470 * to modify the buffer returned, "modify" must be true. 1471 */ 1472 1473int 1474ulfs_blkatoff(struct vnode *vp, off_t offset, char **res, struct buf **bpp, 1475 bool modify) 1476{ 1477 struct inode *ip; 1478 struct buf *bp; 1479 daddr_t lbn; 1480 const int dirrablks = ulfs_dirrablks; 1481 daddr_t *blks; 1482 int *blksizes; 1483 int run, error; 1484 struct mount *mp = vp->v_mount; 1485 const int bshift = mp->mnt_fs_bshift; 1486 const int bsize = 1 << bshift; 1487 off_t eof; 1488 1489 blks = kmem_alloc((1 + dirrablks) * sizeof(daddr_t), KM_SLEEP); 1490 blksizes = kmem_alloc((1 + dirrablks) * sizeof(int), KM_SLEEP); 1491 ip = VTOI(vp); 1492 KASSERT(vp->v_size == ip->i_size); 1493 GOP_SIZE(vp, vp->v_size, &eof, 0); 1494 lbn = offset >> bshift; 1495 1496 for (run = 0; run <= dirrablks;) { 1497 const off_t curoff = lbn << bshift; 1498 const int size = MIN(eof - curoff, bsize); 1499 1500 if (size == 0) { 1501 break; 1502 } 1503 KASSERT(curoff < eof); 1504 blks[run] = lbn; 1505 blksizes[run] = size; 1506 lbn++; 1507 run++; 1508 if (size != bsize) { 1509 break; 1510 } 1511 } 1512 KASSERT(run >= 1); 1513 error = breadn(vp, blks[0], blksizes[0], &blks[1], &blksizes[1], 1514 run - 1, NOCRED, (modify ? B_MODIFY : 0), &bp); 1515 if (error != 0) { 1516 *bpp = NULL; 1517 goto out; 1518 } 1519 if (res) { 1520 *res = (char *)bp->b_data + (offset & (bsize - 1)); 1521 } 1522 *bpp = bp; 1523 1524 out: 1525 kmem_free(blks, (1 + dirrablks) * sizeof(daddr_t)); 1526 kmem_free(blksizes, (1 + dirrablks) * sizeof(int)); 1527 return error; 1528} 1529