ulfs_lookup.c revision 1.15
1/* $NetBSD: ulfs_lookup.c,v 1.15 2013/07/28 01:10:49 dholland Exp $ */ 2/* from NetBSD: ufs_lookup.c,v 1.122 2013/01/22 09:39:18 dholland Exp */ 3 4/* 5 * Copyright (c) 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * (c) UNIX System Laboratories, Inc. 8 * All or some portions of this file are derived from material licensed 9 * to the University of California by American Telephone and Telegraph 10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 11 * the permission of UNIX System Laboratories, Inc. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)ufs_lookup.c 8.9 (Berkeley) 8/11/94 38 */ 39 40#include <sys/cdefs.h> 41__KERNEL_RCSID(0, "$NetBSD: ulfs_lookup.c,v 1.15 2013/07/28 01:10:49 dholland Exp $"); 42 43#ifdef _KERNEL_OPT 44#include "opt_lfs.h" 45#endif 46 47#include <sys/param.h> 48#include <sys/systm.h> 49#include <sys/namei.h> 50#include <sys/buf.h> 51#include <sys/file.h> 52#include <sys/stat.h> 53#include <sys/mount.h> 54#include <sys/vnode.h> 55#include <sys/kernel.h> 56#include <sys/kauth.h> 57#include <sys/wapbl.h> 58#include <sys/fstrans.h> 59#include <sys/proc.h> 60#include <sys/kmem.h> 61 62#include <ufs/lfs/lfs_extern.h> 63 64#include <ufs/lfs/ulfs_inode.h> 65#ifdef LFS_DIRHASH 66#include <ufs/lfs/ulfs_dirhash.h> 67#endif 68#include <ufs/lfs/ulfsmount.h> 69#include <ufs/lfs/ulfs_extern.h> 70#include <ufs/lfs/ulfs_bswap.h> 71 72#include <miscfs/genfs/genfs.h> 73 74#ifdef DIAGNOSTIC 75int lfs_dirchk = 1; 76#else 77int lfs_dirchk = 0; 78#endif 79 80/* 81 * Convert a component of a pathname into a pointer to a locked inode. 82 * This is a very central and rather complicated routine. 83 * If the file system is not maintained in a strict tree hierarchy, 84 * this can result in a deadlock situation (see comments in code below). 85 * 86 * The cnp->cn_nameiop argument is LOOKUP, CREATE, RENAME, or DELETE depending 87 * on whether the name is to be looked up, created, renamed, or deleted. 88 * When CREATE, RENAME, or DELETE is specified, information usable in 89 * creating, renaming, or deleting a directory entry may be calculated. 90 * If flag has LOCKPARENT or'ed into it and the target of the pathname 91 * exists, lookup returns both the target and its parent directory locked. 92 * When creating or renaming and LOCKPARENT is specified, the target may 93 * not be ".". When deleting and LOCKPARENT is specified, the target may 94 * be "."., but the caller must check to ensure it does an vrele and vput 95 * instead of two vputs. 96 * 97 * Overall outline of ulfs_lookup: 98 * 99 * check accessibility of directory 100 * look for name in cache, if found, then if at end of path 101 * and deleting or creating, drop it, else return name 102 * search for name in directory, to found or notfound 103 * notfound: 104 * if creating, return locked directory, leaving info on available slots 105 * else return error 106 * found: 107 * if at end of path and deleting, return information to allow delete 108 * if at end of path and rewriting (RENAME and LOCKPARENT), lock target 109 * inode and return info to allow rewrite 110 * if not at end, add name to cache; if at end and neither creating 111 * nor deleting, add name to cache 112 */ 113int 114ulfs_lookup(void *v) 115{ 116 struct vop_lookup_args /* { 117 struct vnode *a_dvp; 118 struct vnode **a_vpp; 119 struct componentname *a_cnp; 120 } */ *ap = v; 121 struct vnode *vdp = ap->a_dvp; /* vnode for directory being searched */ 122 struct inode *dp = VTOI(vdp); /* inode for directory being searched */ 123 struct buf *bp; /* a buffer of directory entries */ 124 struct lfs_direct *ep; /* the current directory entry */ 125 int entryoffsetinblock; /* offset of ep in bp's buffer */ 126 enum { 127 NONE, /* need to search a slot for our new entry */ 128 COMPACT, /* a compaction can make a slot in the current 129 DIRBLKSIZ block */ 130 FOUND, /* found a slot (or no need to search) */ 131 } slotstatus; 132 doff_t slotoffset; /* offset of area with free space. 133 a special value -1 for invalid */ 134 int slotsize; /* size of area at slotoffset */ 135 int slotfreespace; /* accumulated amount of space free in 136 the current DIRBLKSIZ block */ 137 int slotneeded; /* size of the entry we're seeking */ 138 int numdirpasses; /* strategy for directory search */ 139 doff_t endsearch; /* offset to end directory search */ 140 doff_t prevoff; /* previous value of ulr_offset */ 141 struct vnode *pdp; /* saved dp during symlink work */ 142 struct vnode *tdp; /* returned by VFS_VGET */ 143 doff_t enduseful; /* pointer past last used dir slot. 144 used for directory truncation. */ 145 u_long bmask; /* block offset mask */ 146 int error; 147 struct vnode **vpp = ap->a_vpp; 148 struct componentname *cnp = ap->a_cnp; 149 kauth_cred_t cred = cnp->cn_cred; 150 int flags; 151 int nameiop = cnp->cn_nameiop; 152 struct lfs *fs = dp->i_lfs; 153 const int needswap = ULFS_MPNEEDSWAP(fs); 154 int dirblksiz = fs->um_dirblksiz; 155 ino_t foundino; 156 struct ulfs_lookup_results *results; 157 int iswhiteout; /* temp result from cache_lookup() */ 158 159 flags = cnp->cn_flags; 160 161 bp = NULL; 162 slotoffset = -1; 163 *vpp = NULL; 164 endsearch = 0; /* silence compiler warning */ 165 166 /* 167 * Produce the auxiliary lookup results into i_crap. Increment 168 * its serial number so elsewhere we can tell if we're using 169 * stale results. This should not be done this way. XXX. 170 */ 171 results = &dp->i_crap; 172 dp->i_crapcounter++; 173 174 /* 175 * Check accessiblity of directory. 176 */ 177 if ((error = VOP_ACCESS(vdp, VEXEC, cred)) != 0) 178 return (error); 179 180 if ((flags & ISLASTCN) && (vdp->v_mount->mnt_flag & MNT_RDONLY) && 181 (nameiop == DELETE || nameiop == RENAME)) 182 return (EROFS); 183 184 /* 185 * We now have a segment name to search for, and a directory to search. 186 * 187 * Before tediously performing a linear scan of the directory, 188 * check the name cache to see if the directory/name pair 189 * we are looking for is known already. 190 */ 191 if (cache_lookup(vdp, cnp->cn_nameptr, cnp->cn_namelen, 192 cnp->cn_nameiop, cnp->cn_flags, &iswhiteout, vpp)) { 193 if (iswhiteout) { 194 cnp->cn_flags |= ISWHITEOUT; 195 } 196 return *vpp == NULLVP ? ENOENT : 0; 197 } 198 if (iswhiteout) { 199 /* 200 * The namecache set iswhiteout without finding a 201 * cache entry. As of this writing (20121014), this 202 * can happen if there was a whiteout entry that has 203 * been invalidated by the lookup. It is not clear if 204 * it is correct to set ISWHITEOUT in this case or 205 * not; however, doing so retains the prior behavior, 206 * so we'll go with that until some clearer answer 207 * appears. XXX 208 */ 209 cnp->cn_flags |= ISWHITEOUT; 210 } 211 212 fstrans_start(vdp->v_mount, FSTRANS_SHARED); 213 214 /* 215 * Suppress search for slots unless creating 216 * file and at end of pathname, in which case 217 * we watch for a place to put the new file in 218 * case it doesn't already exist. 219 */ 220 slotstatus = FOUND; 221 slotfreespace = slotsize = slotneeded = 0; 222 if ((nameiop == CREATE || nameiop == RENAME) && (flags & ISLASTCN)) { 223 slotstatus = NONE; 224 slotneeded = LFS_DIRECTSIZ(cnp->cn_namelen); 225 } 226 227 /* 228 * If there is cached information on a previous search of 229 * this directory, pick up where we last left off. 230 * We cache only lookups as these are the most common 231 * and have the greatest payoff. Caching CREATE has little 232 * benefit as it usually must search the entire directory 233 * to determine that the entry does not exist. Caching the 234 * location of the last DELETE or RENAME has not reduced 235 * profiling time and hence has been removed in the interest 236 * of simplicity. 237 */ 238 bmask = vdp->v_mount->mnt_stat.f_iosize - 1; 239 240#ifdef LFS_DIRHASH 241 /* 242 * Use dirhash for fast operations on large directories. The logic 243 * to determine whether to hash the directory is contained within 244 * ulfsdirhash_build(); a zero return means that it decided to hash 245 * this directory and it successfully built up the hash table. 246 */ 247 if (ulfsdirhash_build(dp) == 0) { 248 /* Look for a free slot if needed. */ 249 enduseful = dp->i_size; 250 if (slotstatus != FOUND) { 251 slotoffset = ulfsdirhash_findfree(dp, slotneeded, 252 &slotsize); 253 if (slotoffset >= 0) { 254 slotstatus = COMPACT; 255 enduseful = ulfsdirhash_enduseful(dp); 256 if (enduseful < 0) 257 enduseful = dp->i_size; 258 } 259 } 260 /* Look up the component. */ 261 numdirpasses = 1; 262 entryoffsetinblock = 0; /* silence compiler warning */ 263 switch (ulfsdirhash_lookup(dp, cnp->cn_nameptr, cnp->cn_namelen, 264 &results->ulr_offset, &bp, nameiop == DELETE ? &prevoff : NULL)) { 265 case 0: 266 ep = (struct lfs_direct *)((char *)bp->b_data + 267 (results->ulr_offset & bmask)); 268 goto foundentry; 269 case ENOENT: 270 results->ulr_offset = roundup(dp->i_size, dirblksiz); 271 goto notfound; 272 default: 273 /* Something failed; just do a linear search. */ 274 break; 275 } 276 } 277#endif /* LFS_DIRHASH */ 278 279 if (nameiop != LOOKUP || results->ulr_diroff == 0 || 280 results->ulr_diroff >= dp->i_size) { 281 entryoffsetinblock = 0; 282 results->ulr_offset = 0; 283 numdirpasses = 1; 284 } else { 285 results->ulr_offset = results->ulr_diroff; 286 if ((entryoffsetinblock = results->ulr_offset & bmask) && 287 (error = ulfs_blkatoff(vdp, (off_t)results->ulr_offset, 288 NULL, &bp, false))) 289 goto out; 290 numdirpasses = 2; 291 nchstats.ncs_2passes++; 292 } 293 prevoff = results->ulr_offset; 294 endsearch = roundup(dp->i_size, dirblksiz); 295 enduseful = 0; 296 297searchloop: 298 while (results->ulr_offset < endsearch) { 299 if (curcpu()->ci_schedstate.spc_flags & SPCF_SHOULDYIELD) 300 preempt(); 301 /* 302 * If necessary, get the next directory block. 303 */ 304 if ((results->ulr_offset & bmask) == 0) { 305 if (bp != NULL) 306 brelse(bp, 0); 307 error = ulfs_blkatoff(vdp, (off_t)results->ulr_offset, 308 NULL, &bp, false); 309 if (error) 310 goto out; 311 entryoffsetinblock = 0; 312 } 313 /* 314 * If still looking for a slot, and at a DIRBLKSIZ 315 * boundary, have to start looking for free space again. 316 */ 317 if (slotstatus == NONE && 318 (entryoffsetinblock & (dirblksiz - 1)) == 0) { 319 slotoffset = -1; 320 slotfreespace = 0; 321 } 322 /* 323 * Get pointer to next entry. 324 * Full validation checks are slow, so we only check 325 * enough to insure forward progress through the 326 * directory. Complete checks can be run by patching 327 * "lfs_dirchk" to be true. 328 */ 329 KASSERT(bp != NULL); 330 ep = (struct lfs_direct *)((char *)bp->b_data + entryoffsetinblock); 331 if (ep->d_reclen == 0 || 332 (lfs_dirchk && ulfs_dirbadentry(vdp, ep, entryoffsetinblock))) { 333 int i; 334 335 ulfs_dirbad(dp, results->ulr_offset, "mangled entry"); 336 i = dirblksiz - (entryoffsetinblock & (dirblksiz - 1)); 337 results->ulr_offset += i; 338 entryoffsetinblock += i; 339 continue; 340 } 341 342 /* 343 * If an appropriate sized slot has not yet been found, 344 * check to see if one is available. Also accumulate space 345 * in the current block so that we can determine if 346 * compaction is viable. 347 */ 348 if (slotstatus != FOUND) { 349 int size = ulfs_rw16(ep->d_reclen, needswap); 350 351 if (ep->d_ino != 0) 352 size -= LFS_DIRSIZ(FSFMT(vdp), ep, needswap); 353 if (size > 0) { 354 if (size >= slotneeded) { 355 slotstatus = FOUND; 356 slotoffset = results->ulr_offset; 357 slotsize = ulfs_rw16(ep->d_reclen, 358 needswap); 359 } else if (slotstatus == NONE) { 360 slotfreespace += size; 361 if (slotoffset == -1) 362 slotoffset = results->ulr_offset; 363 if (slotfreespace >= slotneeded) { 364 slotstatus = COMPACT; 365 slotsize = results->ulr_offset + 366 ulfs_rw16(ep->d_reclen, 367 needswap) - 368 slotoffset; 369 } 370 } 371 } 372 } 373 374 /* 375 * Check for a name match. 376 */ 377 if (ep->d_ino) { 378 int namlen; 379 380#if (BYTE_ORDER == LITTLE_ENDIAN) 381 if (FSFMT(vdp) && needswap == 0) 382 namlen = ep->d_type; 383 else 384 namlen = ep->d_namlen; 385#else 386 if (FSFMT(vdp) && needswap != 0) 387 namlen = ep->d_type; 388 else 389 namlen = ep->d_namlen; 390#endif 391 if (namlen == cnp->cn_namelen && 392 !memcmp(cnp->cn_nameptr, ep->d_name, 393 (unsigned)namlen)) { 394#ifdef LFS_DIRHASH 395foundentry: 396#endif 397 /* 398 * Save directory entry's inode number and 399 * reclen, and release directory buffer. 400 */ 401 if (!FSFMT(vdp) && ep->d_type == LFS_DT_WHT) { 402 slotstatus = FOUND; 403 slotoffset = results->ulr_offset; 404 slotsize = ulfs_rw16(ep->d_reclen, 405 needswap); 406 results->ulr_reclen = slotsize; 407 /* 408 * This is used to set 409 * results->ulr_endoff, 410 * which may be used by ulfs_direnter() 411 * as a length to truncate the 412 * directory to. Therefore, it must 413 * point past the end of the last 414 * non-empty directory entry. We don't 415 * know where that is in this case, so 416 * we effectively disable shrinking by 417 * using the existing size of the 418 * directory. 419 * 420 * Note that we wouldn't expect to 421 * shrink the directory while rewriting 422 * an existing entry anyway. 423 */ 424 enduseful = endsearch; 425 cnp->cn_flags |= ISWHITEOUT; 426 numdirpasses--; 427 goto notfound; 428 } 429 foundino = ulfs_rw32(ep->d_ino, needswap); 430 results->ulr_reclen = 431 ulfs_rw16(ep->d_reclen, needswap); 432 goto found; 433 } 434 } 435 prevoff = results->ulr_offset; 436 results->ulr_offset += ulfs_rw16(ep->d_reclen, needswap); 437 entryoffsetinblock += ulfs_rw16(ep->d_reclen, needswap); 438 if (ep->d_ino) 439 enduseful = results->ulr_offset; 440 } 441notfound: 442 /* 443 * If we started in the middle of the directory and failed 444 * to find our target, we must check the beginning as well. 445 */ 446 if (numdirpasses == 2) { 447 numdirpasses--; 448 results->ulr_offset = 0; 449 endsearch = results->ulr_diroff; 450 goto searchloop; 451 } 452 if (bp != NULL) 453 brelse(bp, 0); 454 /* 455 * If creating, and at end of pathname and current 456 * directory has not been removed, then can consider 457 * allowing file to be created. 458 */ 459 if ((nameiop == CREATE || nameiop == RENAME || 460 (nameiop == DELETE && 461 (cnp->cn_flags & DOWHITEOUT) && 462 (cnp->cn_flags & ISWHITEOUT))) && 463 (flags & ISLASTCN) && dp->i_nlink != 0) { 464 /* 465 * Access for write is interpreted as allowing 466 * creation of files in the directory. 467 */ 468 error = VOP_ACCESS(vdp, VWRITE, cred); 469 if (error) 470 goto out; 471 /* 472 * Return an indication of where the new directory 473 * entry should be put. If we didn't find a slot, 474 * then set results->ulr_count to 0 indicating 475 * that the new slot belongs at the end of the 476 * directory. If we found a slot, then the new entry 477 * can be put in the range from results->ulr_offset to 478 * results->ulr_offset + results->ulr_count. 479 */ 480 if (slotstatus == NONE) { 481 results->ulr_offset = roundup(dp->i_size, dirblksiz); 482 results->ulr_count = 0; 483 enduseful = results->ulr_offset; 484 } else if (nameiop == DELETE) { 485 results->ulr_offset = slotoffset; 486 if ((results->ulr_offset & (dirblksiz - 1)) == 0) 487 results->ulr_count = 0; 488 else 489 results->ulr_count = 490 results->ulr_offset - prevoff; 491 } else { 492 results->ulr_offset = slotoffset; 493 results->ulr_count = slotsize; 494 if (enduseful < slotoffset + slotsize) 495 enduseful = slotoffset + slotsize; 496 } 497 results->ulr_endoff = roundup(enduseful, dirblksiz); 498#if 0 /* commented out by dbj. none of the on disk fields changed */ 499 dp->i_flag |= IN_CHANGE | IN_UPDATE; 500#endif 501 /* 502 * We return with the directory locked, so that 503 * the parameters we set up above will still be 504 * valid if we actually decide to do a direnter(). 505 * We return ni_vp == NULL to indicate that the entry 506 * does not currently exist; we leave a pointer to 507 * the (locked) directory inode in ndp->ni_dvp. 508 * 509 * NB - if the directory is unlocked, then this 510 * information cannot be used. 511 */ 512 error = EJUSTRETURN; 513 goto out; 514 } 515 /* 516 * Insert name into cache (as non-existent) if appropriate. 517 */ 518 if (nameiop != CREATE) { 519 cache_enter(vdp, *vpp, cnp->cn_nameptr, cnp->cn_namelen, 520 cnp->cn_flags); 521 } 522 error = ENOENT; 523 goto out; 524 525found: 526 if (numdirpasses == 2) 527 nchstats.ncs_pass2++; 528 /* 529 * Check that directory length properly reflects presence 530 * of this entry. 531 */ 532 if (results->ulr_offset + LFS_DIRSIZ(FSFMT(vdp), ep, needswap) > dp->i_size) { 533 ulfs_dirbad(dp, results->ulr_offset, "i_size too small"); 534 dp->i_size = 535 results->ulr_offset + LFS_DIRSIZ(FSFMT(vdp), ep, needswap); 536 DIP_ASSIGN(dp, size, dp->i_size); 537 dp->i_flag |= IN_CHANGE | IN_UPDATE; 538 } 539 brelse(bp, 0); 540 541 /* 542 * Found component in pathname. 543 * If the final component of path name, save information 544 * in the cache as to where the entry was found. 545 */ 546 if ((flags & ISLASTCN) && nameiop == LOOKUP) 547 results->ulr_diroff = results->ulr_offset &~ (dirblksiz - 1); 548 549 /* 550 * If deleting, and at end of pathname, return 551 * parameters which can be used to remove file. 552 * Lock the inode, being careful with ".". 553 */ 554 if (nameiop == DELETE && (flags & ISLASTCN)) { 555 /* 556 * Return pointer to current entry in results->ulr_offset, 557 * and distance past previous entry (if there 558 * is a previous entry in this block) in results->ulr_count. 559 * Save directory inode pointer in ndp->ni_dvp for dirremove(). 560 */ 561 if ((results->ulr_offset & (dirblksiz - 1)) == 0) 562 results->ulr_count = 0; 563 else 564 results->ulr_count = results->ulr_offset - prevoff; 565 if (dp->i_number == foundino) { 566 vref(vdp); 567 tdp = vdp; 568 } else { 569 if (flags & ISDOTDOT) 570 VOP_UNLOCK(vdp); /* race to get the inode */ 571 error = VFS_VGET(vdp->v_mount, foundino, &tdp); 572 if (flags & ISDOTDOT) 573 vn_lock(vdp, LK_EXCLUSIVE | LK_RETRY); 574 if (error) 575 goto out; 576 } 577 /* 578 * Write access to directory required to delete files. 579 */ 580 error = VOP_ACCESS(vdp, VWRITE, cred); 581 if (error) { 582 if (dp->i_number == foundino) 583 vrele(tdp); 584 else 585 vput(tdp); 586 goto out; 587 } 588 /* 589 * If directory is "sticky", then user must own 590 * the directory, or the file in it, else she 591 * may not delete it (unless she's root). This 592 * implements append-only directories. 593 */ 594 if (dp->i_mode & ISVTX) { 595 error = kauth_authorize_vnode(cred, KAUTH_VNODE_DELETE, 596 tdp, vdp, genfs_can_sticky(cred, dp->i_uid, 597 VTOI(tdp)->i_uid)); 598 if (error) { 599 if (dp->i_number == foundino) 600 vrele(tdp); 601 else 602 vput(tdp); 603 error = EPERM; 604 goto out; 605 } 606 } 607 *vpp = tdp; 608 error = 0; 609 goto out; 610 } 611 612 /* 613 * If rewriting (RENAME), return the inode and the 614 * information required to rewrite the present directory 615 * Must get inode of directory entry to verify it's a 616 * regular file, or empty directory. 617 */ 618 if (nameiop == RENAME && (flags & ISLASTCN)) { 619 error = VOP_ACCESS(vdp, VWRITE, cred); 620 if (error) 621 goto out; 622 /* 623 * Careful about locking second inode. 624 * This can only occur if the target is ".". 625 */ 626 if (dp->i_number == foundino) { 627 error = EISDIR; 628 goto out; 629 } 630 if (flags & ISDOTDOT) 631 VOP_UNLOCK(vdp); /* race to get the inode */ 632 error = VFS_VGET(vdp->v_mount, foundino, &tdp); 633 if (flags & ISDOTDOT) 634 vn_lock(vdp, LK_EXCLUSIVE | LK_RETRY); 635 if (error) 636 goto out; 637 *vpp = tdp; 638 error = 0; 639 goto out; 640 } 641 642 /* 643 * Step through the translation in the name. We do not `vput' the 644 * directory because we may need it again if a symbolic link 645 * is relative to the current directory. Instead we save it 646 * unlocked as "pdp". We must get the target inode before unlocking 647 * the directory to insure that the inode will not be removed 648 * before we get it. We prevent deadlock by always fetching 649 * inodes from the root, moving down the directory tree. Thus 650 * when following backward pointers ".." we must unlock the 651 * parent directory before getting the requested directory. 652 * There is a potential race condition here if both the current 653 * and parent directories are removed before the VFS_VGET for the 654 * inode associated with ".." returns. We hope that this occurs 655 * infrequently since we cannot avoid this race condition without 656 * implementing a sophisticated deadlock detection algorithm. 657 * Note also that this simple deadlock detection scheme will not 658 * work if the file system has any hard links other than ".." 659 * that point backwards in the directory structure. 660 */ 661 pdp = vdp; 662 if (flags & ISDOTDOT) { 663 VOP_UNLOCK(pdp); /* race to get the inode */ 664 error = VFS_VGET(vdp->v_mount, foundino, &tdp); 665 vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY); 666 if (error) { 667 goto out; 668 } 669 *vpp = tdp; 670 } else if (dp->i_number == foundino) { 671 vref(vdp); /* we want ourself, ie "." */ 672 *vpp = vdp; 673 } else { 674 error = VFS_VGET(vdp->v_mount, foundino, &tdp); 675 if (error) 676 goto out; 677 *vpp = tdp; 678 } 679 680 /* 681 * Insert name into cache if appropriate. 682 */ 683 cache_enter(vdp, *vpp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_flags); 684 error = 0; 685 686out: 687 fstrans_done(vdp->v_mount); 688 return error; 689} 690 691void 692ulfs_dirbad(struct inode *ip, doff_t offset, const char *how) 693{ 694 struct mount *mp; 695 696 mp = ITOV(ip)->v_mount; 697 printf("%s: bad dir ino %llu at offset %d: %s\n", 698 mp->mnt_stat.f_mntonname, (unsigned long long)ip->i_number, 699 offset, how); 700 if ((mp->mnt_stat.f_flag & MNT_RDONLY) == 0) 701 panic("bad dir"); 702} 703 704/* 705 * Do consistency checking on a directory entry: 706 * record length must be multiple of 4 707 * entry must fit in rest of its DIRBLKSIZ block 708 * record must be large enough to contain entry 709 * name is not longer than LFS_MAXNAMLEN 710 * name must be as long as advertised, and null terminated 711 */ 712int 713ulfs_dirbadentry(struct vnode *dp, struct lfs_direct *ep, int entryoffsetinblock) 714{ 715 int i; 716 int namlen; 717 struct ulfsmount *ump = VFSTOULFS(dp->v_mount); 718 struct lfs *fs = ump->um_lfs; 719 const int needswap = ULFS_MPNEEDSWAP(fs); 720 int dirblksiz = fs->um_dirblksiz; 721 722#if (BYTE_ORDER == LITTLE_ENDIAN) 723 if (FSFMT(dp) && needswap == 0) 724 namlen = ep->d_type; 725 else 726 namlen = ep->d_namlen; 727#else 728 if (FSFMT(dp) && needswap != 0) 729 namlen = ep->d_type; 730 else 731 namlen = ep->d_namlen; 732#endif 733 if ((ulfs_rw16(ep->d_reclen, needswap) & 0x3) != 0 || 734 ulfs_rw16(ep->d_reclen, needswap) > 735 dirblksiz - (entryoffsetinblock & (dirblksiz - 1)) || 736 ulfs_rw16(ep->d_reclen, needswap) < 737 LFS_DIRSIZ(FSFMT(dp), ep, needswap) || 738 namlen > LFS_MAXNAMLEN) { 739 /*return (1); */ 740 printf("First bad, reclen=%#x, DIRSIZ=%lu, namlen=%d, " 741 "flags=%#x, entryoffsetinblock=%d, dirblksiz = %d\n", 742 ulfs_rw16(ep->d_reclen, needswap), 743 (u_long)LFS_DIRSIZ(FSFMT(dp), ep, needswap), 744 namlen, dp->v_mount->mnt_flag, entryoffsetinblock, 745 dirblksiz); 746 goto bad; 747 } 748 if (ep->d_ino == 0) 749 return (0); 750 for (i = 0; i < namlen; i++) 751 if (ep->d_name[i] == '\0') { 752 /*return (1); */ 753 printf("Second bad\n"); 754 goto bad; 755 } 756 if (ep->d_name[i]) 757 goto bad; 758 return (0); 759bad: 760 return (1); 761} 762 763/* 764 * Construct a new directory entry after a call to namei, using the 765 * name in the componentname argument cnp. The argument ip is the 766 * inode to which the new directory entry will refer. 767 */ 768void 769ulfs_makedirentry(struct inode *ip, struct componentname *cnp, 770 struct lfs_direct *newdirp) 771{ 772 newdirp->d_ino = ip->i_number; 773 newdirp->d_namlen = cnp->cn_namelen; 774 memcpy(newdirp->d_name, cnp->cn_nameptr, (size_t)cnp->cn_namelen); 775 newdirp->d_name[cnp->cn_namelen] = '\0'; 776 if (FSFMT(ITOV(ip))) 777 newdirp->d_type = 0; 778 else 779 newdirp->d_type = LFS_IFTODT(ip->i_mode); 780} 781 782/* 783 * Write a directory entry after a call to namei, using the parameters 784 * that ulfs_lookup left in nameidata and in the ulfs_lookup_results. 785 * 786 * DVP is the directory to be updated. It must be locked. 787 * ULR is the ulfs_lookup_results structure from the final lookup step. 788 * TVP is not used. (XXX: why is it here? remove it) 789 * DIRP is the new directory entry contents. 790 * CNP is the componentname from the final lookup step. 791 * NEWDIRBP is not used and (XXX) should be removed. The previous 792 * comment here said it was used by the now-removed softupdates code. 793 * 794 * The link count of the target inode is *not* incremented; the 795 * caller does that. 796 * 797 * If ulr->ulr_count is 0, ulfs_lookup did not find space to insert the 798 * directory entry. ulr_offset, which is the place to put the entry, 799 * should be on a block boundary (and should be at the end of the 800 * directory AFAIK) and a fresh block is allocated to put the new 801 * directory entry in. 802 * 803 * If ulr->ulr_count is not zero, ulfs_lookup found a slot to insert 804 * the entry into. This slot ranges from ulr_offset to ulr_offset + 805 * ulr_count. However, this slot may already be partially populated 806 * requiring compaction. See notes below. 807 * 808 * Furthermore, if ulr_count is not zero and ulr_endoff is not the 809 * same as i_size, the directory is truncated to size ulr_endoff. 810 */ 811int 812ulfs_direnter(struct vnode *dvp, const struct ulfs_lookup_results *ulr, 813 struct vnode *tvp, struct lfs_direct *dirp, 814 struct componentname *cnp, struct buf *newdirbp) 815{ 816 kauth_cred_t cr; 817 struct lwp *l; 818 int newentrysize; 819 struct inode *dp; 820 struct buf *bp; 821 u_int dsize; 822 struct lfs_direct *ep, *nep; 823 int error, ret, lfs_blkoff, loc, spacefree; 824 char *dirbuf; 825 struct timespec ts; 826 struct ulfsmount *ump = VFSTOULFS(dvp->v_mount); 827 struct lfs *fs = ump->um_lfs; 828 const int needswap = ULFS_MPNEEDSWAP(fs); 829 int dirblksiz = fs->um_dirblksiz; 830 831 error = 0; 832 cr = cnp->cn_cred; 833 l = curlwp; 834 835 dp = VTOI(dvp); 836 newentrysize = LFS_DIRSIZ(0, dirp, 0); 837 838 if (ulr->ulr_count == 0) { 839 /* 840 * If ulr_count is 0, then namei could find no 841 * space in the directory. Here, ulr_offset will 842 * be on a directory block boundary and we will write the 843 * new entry into a fresh block. 844 */ 845 if (ulr->ulr_offset & (dirblksiz - 1)) 846 panic("ulfs_direnter: newblk"); 847 if ((error = lfs_balloc(dvp, (off_t)ulr->ulr_offset, dirblksiz, 848 cr, B_CLRBUF | B_SYNC, &bp)) != 0) { 849 return (error); 850 } 851 dp->i_size = ulr->ulr_offset + dirblksiz; 852 DIP_ASSIGN(dp, size, dp->i_size); 853 dp->i_flag |= IN_CHANGE | IN_UPDATE; 854 uvm_vnp_setsize(dvp, dp->i_size); 855 dirp->d_reclen = ulfs_rw16(dirblksiz, needswap); 856 dirp->d_ino = ulfs_rw32(dirp->d_ino, needswap); 857 if (FSFMT(dvp)) { 858#if (BYTE_ORDER == LITTLE_ENDIAN) 859 if (needswap == 0) { 860#else 861 if (needswap != 0) { 862#endif 863 u_char tmp = dirp->d_namlen; 864 dirp->d_namlen = dirp->d_type; 865 dirp->d_type = tmp; 866 } 867 } 868 lfs_blkoff = ulr->ulr_offset & (ump->um_mountp->mnt_stat.f_iosize - 1); 869 memcpy((char *)bp->b_data + lfs_blkoff, dirp, newentrysize); 870#ifdef LFS_DIRHASH 871 if (dp->i_dirhash != NULL) { 872 ulfsdirhash_newblk(dp, ulr->ulr_offset); 873 ulfsdirhash_add(dp, dirp, ulr->ulr_offset); 874 ulfsdirhash_checkblock(dp, (char *)bp->b_data + lfs_blkoff, 875 ulr->ulr_offset); 876 } 877#endif 878 error = VOP_BWRITE(bp->b_vp, bp); 879 vfs_timestamp(&ts); 880 ret = lfs_update(dvp, &ts, &ts, UPDATE_DIROP); 881 if (error == 0) 882 return (ret); 883 return (error); 884 } 885 886 /* 887 * If ulr_count is non-zero, then namei found space for the new 888 * entry in the range ulr_offset to ulr_offset + ulr_count 889 * in the directory. To use this space, we may have to compact 890 * the entries located there, by copying them together towards the 891 * beginning of the block, leaving the free space in one usable 892 * chunk at the end. 893 */ 894 895 /* 896 * Increase size of directory if entry eats into new space. 897 * This should never push the size past a new multiple of 898 * DIRBLKSIZ. 899 * 900 * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN. 901 */ 902 if (ulr->ulr_offset + ulr->ulr_count > dp->i_size) { 903#ifdef DIAGNOSTIC 904 printf("ulfs_direnter: reached 4.2-only block, " 905 "not supposed to happen\n"); 906#endif 907 dp->i_size = ulr->ulr_offset + ulr->ulr_count; 908 DIP_ASSIGN(dp, size, dp->i_size); 909 dp->i_flag |= IN_CHANGE | IN_UPDATE; 910 } 911 /* 912 * Get the block containing the space for the new directory entry. 913 */ 914 error = ulfs_blkatoff(dvp, (off_t)ulr->ulr_offset, &dirbuf, &bp, true); 915 if (error) { 916 return (error); 917 } 918 /* 919 * Find space for the new entry. In the simple case, the entry at 920 * offset base will have the space. If it does not, then namei 921 * arranged that compacting the region ulr_offset to 922 * ulr_offset + ulr_count would yield the space. 923 */ 924 ep = (struct lfs_direct *)dirbuf; 925 dsize = (ep->d_ino != 0) ? LFS_DIRSIZ(FSFMT(dvp), ep, needswap) : 0; 926 spacefree = ulfs_rw16(ep->d_reclen, needswap) - dsize; 927 for (loc = ulfs_rw16(ep->d_reclen, needswap); loc < ulr->ulr_count; ) { 928 uint16_t reclen; 929 930 nep = (struct lfs_direct *)(dirbuf + loc); 931 932 /* Trim the existing slot (NB: dsize may be zero). */ 933 ep->d_reclen = ulfs_rw16(dsize, needswap); 934 ep = (struct lfs_direct *)((char *)ep + dsize); 935 936 reclen = ulfs_rw16(nep->d_reclen, needswap); 937 loc += reclen; 938 if (nep->d_ino == 0) { 939 /* 940 * A mid-block unused entry. Such entries are 941 * never created by the kernel, but fsck_ffs 942 * can create them (and it doesn't fix them). 943 * 944 * Add up the free space, and initialise the 945 * relocated entry since we don't memcpy it. 946 */ 947 spacefree += reclen; 948 ep->d_ino = 0; 949 dsize = 0; 950 continue; 951 } 952 dsize = LFS_DIRSIZ(FSFMT(dvp), nep, needswap); 953 spacefree += reclen - dsize; 954#ifdef LFS_DIRHASH 955 if (dp->i_dirhash != NULL) 956 ulfsdirhash_move(dp, nep, 957 ulr->ulr_offset + ((char *)nep - dirbuf), 958 ulr->ulr_offset + ((char *)ep - dirbuf)); 959#endif 960 memcpy((void *)ep, (void *)nep, dsize); 961 } 962 /* 963 * Here, `ep' points to a directory entry containing `dsize' in-use 964 * bytes followed by `spacefree' unused bytes. If ep->d_ino == 0, 965 * then the entry is completely unused (dsize == 0). The value 966 * of ep->d_reclen is always indeterminate. 967 * 968 * Update the pointer fields in the previous entry (if any), 969 * copy in the new entry, and write out the block. 970 */ 971 if (ep->d_ino == 0 || 972 (ulfs_rw32(ep->d_ino, needswap) == ULFS_WINO && 973 memcmp(ep->d_name, dirp->d_name, dirp->d_namlen) == 0)) { 974 if (spacefree + dsize < newentrysize) 975 panic("ulfs_direnter: compact1"); 976 dirp->d_reclen = spacefree + dsize; 977 } else { 978 if (spacefree < newentrysize) 979 panic("ulfs_direnter: compact2"); 980 dirp->d_reclen = spacefree; 981 ep->d_reclen = ulfs_rw16(dsize, needswap); 982 ep = (struct lfs_direct *)((char *)ep + dsize); 983 } 984 dirp->d_reclen = ulfs_rw16(dirp->d_reclen, needswap); 985 dirp->d_ino = ulfs_rw32(dirp->d_ino, needswap); 986 if (FSFMT(dvp)) { 987#if (BYTE_ORDER == LITTLE_ENDIAN) 988 if (needswap == 0) { 989#else 990 if (needswap != 0) { 991#endif 992 u_char tmp = dirp->d_namlen; 993 dirp->d_namlen = dirp->d_type; 994 dirp->d_type = tmp; 995 } 996 } 997#ifdef LFS_DIRHASH 998 if (dp->i_dirhash != NULL && (ep->d_ino == 0 || 999 dirp->d_reclen == spacefree)) 1000 ulfsdirhash_add(dp, dirp, ulr->ulr_offset + ((char *)ep - dirbuf)); 1001#endif 1002 memcpy((void *)ep, (void *)dirp, (u_int)newentrysize); 1003#ifdef LFS_DIRHASH 1004 if (dp->i_dirhash != NULL) 1005 ulfsdirhash_checkblock(dp, dirbuf - 1006 (ulr->ulr_offset & (dirblksiz - 1)), 1007 ulr->ulr_offset & ~(dirblksiz - 1)); 1008#endif 1009 error = VOP_BWRITE(bp->b_vp, bp); 1010 dp->i_flag |= IN_CHANGE | IN_UPDATE; 1011 /* 1012 * If all went well, and the directory can be shortened, proceed 1013 * with the truncation. Note that we have to unlock the inode for 1014 * the entry that we just entered, as the truncation may need to 1015 * lock other inodes which can lead to deadlock if we also hold a 1016 * lock on the newly entered node. 1017 */ 1018 if (error == 0 && ulr->ulr_endoff && ulr->ulr_endoff < dp->i_size) { 1019#ifdef LFS_DIRHASH 1020 if (dp->i_dirhash != NULL) 1021 ulfsdirhash_dirtrunc(dp, ulr->ulr_endoff); 1022#endif 1023 (void) lfs_truncate(dvp, (off_t)ulr->ulr_endoff, IO_SYNC, cr); 1024 } 1025 return (error); 1026} 1027 1028/* 1029 * Remove a directory entry after a call to namei, using the 1030 * parameters that ulfs_lookup left in nameidata and in the 1031 * ulfs_lookup_results. 1032 * 1033 * DVP is the directory to be updated. It must be locked. 1034 * ULR is the ulfs_lookup_results structure from the final lookup step. 1035 * IP, if not null, is the inode being unlinked. 1036 * FLAGS may contain DOWHITEOUT. 1037 * ISRMDIR is not used and (XXX) should be removed. 1038 * 1039 * If FLAGS contains DOWHITEOUT the entry is replaced with a whiteout 1040 * instead of being cleared. 1041 * 1042 * ulr->ulr_offset contains the position of the directory entry 1043 * to be removed. 1044 * 1045 * ulr->ulr_reclen contains the size of the directory entry to be 1046 * removed. 1047 * 1048 * ulr->ulr_count contains the size of the *previous* directory 1049 * entry. This allows finding it, for free space management. If 1050 * ulr_count is 0, the target entry is at the beginning of the 1051 * directory. (Does this ever happen? The first entry should be ".", 1052 * which should only be removed at rmdir time. Does rmdir come here 1053 * to clear out the "." and ".." entries? Perhaps, but I doubt it.) 1054 * 1055 * The space is marked free by adding it to the record length (not 1056 * name length) of the preceding entry. If the first entry becomes 1057 * free, it is marked free by setting the inode number to 0. 1058 * 1059 * The link count of IP is decremented. Note that this is not the 1060 * inverse behavior of ulfs_direnter, which does not adjust link 1061 * counts. Sigh. 1062 */ 1063int 1064ulfs_dirremove(struct vnode *dvp, const struct ulfs_lookup_results *ulr, 1065 struct inode *ip, int flags, int isrmdir) 1066{ 1067 struct inode *dp = VTOI(dvp); 1068 struct lfs_direct *ep; 1069 struct buf *bp; 1070 int error; 1071#ifdef LFS_EI 1072 const int needswap = ULFS_MPNEEDSWAP(dp->i_lfs); 1073#endif 1074 1075 if (flags & DOWHITEOUT) { 1076 /* 1077 * Whiteout entry: set d_ino to ULFS_WINO. 1078 */ 1079 error = ulfs_blkatoff(dvp, (off_t)ulr->ulr_offset, (void *)&ep, 1080 &bp, true); 1081 if (error) 1082 return (error); 1083 ep->d_ino = ulfs_rw32(ULFS_WINO, needswap); 1084 ep->d_type = LFS_DT_WHT; 1085 goto out; 1086 } 1087 1088 if ((error = ulfs_blkatoff(dvp, 1089 (off_t)(ulr->ulr_offset - ulr->ulr_count), (void *)&ep, &bp, true)) != 0) 1090 return (error); 1091 1092#ifdef LFS_DIRHASH 1093 /* 1094 * Remove the dirhash entry. This is complicated by the fact 1095 * that `ep' is the previous entry when ulr_count != 0. 1096 */ 1097 if (dp->i_dirhash != NULL) 1098 ulfsdirhash_remove(dp, (ulr->ulr_count == 0) ? ep : 1099 (struct lfs_direct *)((char *)ep + 1100 ulfs_rw16(ep->d_reclen, needswap)), ulr->ulr_offset); 1101#endif 1102 1103 if (ulr->ulr_count == 0) { 1104 /* 1105 * First entry in block: set d_ino to zero. 1106 */ 1107 ep->d_ino = 0; 1108 } else { 1109 /* 1110 * Collapse new free space into previous entry. 1111 */ 1112 ep->d_reclen = 1113 ulfs_rw16(ulfs_rw16(ep->d_reclen, needswap) + ulr->ulr_reclen, 1114 needswap); 1115 } 1116 1117#ifdef LFS_DIRHASH 1118 if (dp->i_dirhash != NULL) { 1119 int dirblksiz = ip->i_lfs->um_dirblksiz; 1120 ulfsdirhash_checkblock(dp, (char *)ep - 1121 ((ulr->ulr_offset - ulr->ulr_count) & (dirblksiz - 1)), 1122 ulr->ulr_offset & ~(dirblksiz - 1)); 1123 } 1124#endif 1125 1126out: 1127 if (ip) { 1128 ip->i_nlink--; 1129 DIP_ASSIGN(ip, nlink, ip->i_nlink); 1130 ip->i_flag |= IN_CHANGE; 1131 } 1132 /* 1133 * XXX did it ever occur to anyone that it might be a good 1134 * idea to restore ip->i_nlink if this fails? Or something? 1135 * Currently on error return from this function the state of 1136 * ip->i_nlink depends on what happened, and callers 1137 * definitely do not take this into account. 1138 */ 1139 error = VOP_BWRITE(bp->b_vp, bp); 1140 dp->i_flag |= IN_CHANGE | IN_UPDATE; 1141 /* 1142 * If the last named reference to a snapshot goes away, 1143 * drop its snapshot reference so that it will be reclaimed 1144 * when last open reference goes away. 1145 */ 1146 if (ip != 0 && (ip->i_flags & SF_SNAPSHOT) != 0 && 1147 ip->i_nlink == 0) 1148 ulfs_snapgone(ip); 1149 return (error); 1150} 1151 1152/* 1153 * Rewrite an existing directory entry to point at the inode supplied. 1154 * 1155 * DP is the directory to update. 1156 * OFFSET is the position of the entry in question. It may come 1157 * from ulr_offset of a ulfs_lookup_results. 1158 * OIP is the old inode the directory previously pointed to. 1159 * NEWINUM is the number of the new inode. 1160 * NEWTYPE is the new value for the type field of the directory entry. 1161 * (This is ignored if the fs doesn't support that.) 1162 * ISRMDIR is not used and (XXX) should be removed. 1163 * IFLAGS are added to DP's inode flags. 1164 * 1165 * The link count of OIP is decremented. Note that the link count of 1166 * the new inode is *not* incremented. Yay for symmetry. 1167 */ 1168int 1169ulfs_dirrewrite(struct inode *dp, off_t offset, 1170 struct inode *oip, ino_t newinum, int newtype, 1171 int isrmdir, int iflags) 1172{ 1173 struct buf *bp; 1174 struct lfs_direct *ep; 1175 struct vnode *vdp = ITOV(dp); 1176 int error; 1177 1178 error = ulfs_blkatoff(vdp, offset, (void *)&ep, &bp, true); 1179 if (error) 1180 return (error); 1181 ep->d_ino = ulfs_rw32(newinum, ULFS_IPNEEDSWAP(dp)); 1182 if (!FSFMT(vdp)) 1183 ep->d_type = newtype; 1184 oip->i_nlink--; 1185 DIP_ASSIGN(oip, nlink, oip->i_nlink); 1186 oip->i_flag |= IN_CHANGE; 1187 error = VOP_BWRITE(bp->b_vp, bp); 1188 dp->i_flag |= iflags; 1189 /* 1190 * If the last named reference to a snapshot goes away, 1191 * drop its snapshot reference so that it will be reclaimed 1192 * when last open reference goes away. 1193 */ 1194 if ((oip->i_flags & SF_SNAPSHOT) != 0 && oip->i_nlink == 0) 1195 ulfs_snapgone(oip); 1196 return (error); 1197} 1198 1199/* 1200 * Check if a directory is empty or not. 1201 * Inode supplied must be locked. 1202 * 1203 * Using a struct lfs_dirtemplate here is not precisely 1204 * what we want, but better than using a struct lfs_direct. 1205 * 1206 * NB: does not handle corrupted directories. 1207 */ 1208int 1209ulfs_dirempty(struct inode *ip, ino_t parentino, kauth_cred_t cred) 1210{ 1211 doff_t off; 1212 struct lfs_dirtemplate dbuf; 1213 struct lfs_direct *dp = (struct lfs_direct *)&dbuf; 1214 int error, namlen; 1215 size_t count; 1216 const int needswap = ULFS_IPNEEDSWAP(ip); 1217#define MINDIRSIZ (sizeof (struct lfs_dirtemplate) / 2) 1218 1219 for (off = 0; off < ip->i_size; 1220 off += ulfs_rw16(dp->d_reclen, needswap)) { 1221 error = vn_rdwr(UIO_READ, ITOV(ip), (void *)dp, MINDIRSIZ, off, 1222 UIO_SYSSPACE, IO_NODELOCKED, cred, &count, NULL); 1223 /* 1224 * Since we read MINDIRSIZ, residual must 1225 * be 0 unless we're at end of file. 1226 */ 1227 if (error || count != 0) 1228 return (0); 1229 /* avoid infinite loops */ 1230 if (dp->d_reclen == 0) 1231 return (0); 1232 /* skip empty entries */ 1233 if (dp->d_ino == 0 || ulfs_rw32(dp->d_ino, needswap) == ULFS_WINO) 1234 continue; 1235 /* accept only "." and ".." */ 1236#if (BYTE_ORDER == LITTLE_ENDIAN) 1237 if (FSFMT(ITOV(ip)) && needswap == 0) 1238 namlen = dp->d_type; 1239 else 1240 namlen = dp->d_namlen; 1241#else 1242 if (FSFMT(ITOV(ip)) && needswap != 0) 1243 namlen = dp->d_type; 1244 else 1245 namlen = dp->d_namlen; 1246#endif 1247 if (namlen > 2) 1248 return (0); 1249 if (dp->d_name[0] != '.') 1250 return (0); 1251 /* 1252 * At this point namlen must be 1 or 2. 1253 * 1 implies ".", 2 implies ".." if second 1254 * char is also "." 1255 */ 1256 if (namlen == 1 && 1257 ulfs_rw32(dp->d_ino, needswap) == ip->i_number) 1258 continue; 1259 if (dp->d_name[1] == '.' && 1260 ulfs_rw32(dp->d_ino, needswap) == parentino) 1261 continue; 1262 return (0); 1263 } 1264 return (1); 1265} 1266 1267/* 1268 * Check if source directory is in the path of the target directory. 1269 * Target is supplied locked, source is unlocked. 1270 * The target is always vput before returning. 1271 */ 1272int 1273ulfs_checkpath(struct inode *source, struct inode *target, kauth_cred_t cred) 1274{ 1275 struct vnode *nextvp, *vp; 1276 int error, rootino, namlen; 1277 struct lfs_dirtemplate dirbuf; 1278 const int needswap = ULFS_IPNEEDSWAP(target); 1279 1280 vp = ITOV(target); 1281 if (target->i_number == source->i_number) { 1282 error = EEXIST; 1283 goto out; 1284 } 1285 rootino = ULFS_ROOTINO; 1286 error = 0; 1287 if (target->i_number == rootino) 1288 goto out; 1289 1290 for (;;) { 1291 if (vp->v_type != VDIR) { 1292 error = ENOTDIR; 1293 break; 1294 } 1295 error = vn_rdwr(UIO_READ, vp, (void *)&dirbuf, 1296 sizeof (struct lfs_dirtemplate), (off_t)0, UIO_SYSSPACE, 1297 IO_NODELOCKED, cred, NULL, NULL); 1298 if (error != 0) 1299 break; 1300#if (BYTE_ORDER == LITTLE_ENDIAN) 1301 if (FSFMT(vp) && needswap == 0) 1302 namlen = dirbuf.dotdot_type; 1303 else 1304 namlen = dirbuf.dotdot_namlen; 1305#else 1306 if (FSFMT(vp) && needswap != 0) 1307 namlen = dirbuf.dotdot_type; 1308 else 1309 namlen = dirbuf.dotdot_namlen; 1310#endif 1311 if (namlen != 2 || 1312 dirbuf.dotdot_name[0] != '.' || 1313 dirbuf.dotdot_name[1] != '.') { 1314 error = ENOTDIR; 1315 break; 1316 } 1317 if (ulfs_rw32(dirbuf.dotdot_ino, needswap) == source->i_number) { 1318 error = EINVAL; 1319 break; 1320 } 1321 if (ulfs_rw32(dirbuf.dotdot_ino, needswap) == rootino) 1322 break; 1323 VOP_UNLOCK(vp); 1324 error = VFS_VGET(vp->v_mount, 1325 ulfs_rw32(dirbuf.dotdot_ino, needswap), &nextvp); 1326 vrele(vp); 1327 if (error) { 1328 vp = NULL; 1329 break; 1330 } 1331 vp = nextvp; 1332 } 1333 1334out: 1335 if (error == ENOTDIR) 1336 printf("checkpath: .. not a directory\n"); 1337 if (vp != NULL) 1338 vput(vp); 1339 return (error); 1340} 1341 1342/* 1343 * Extract the inode number of ".." from a directory. 1344 * Helper for ulfs_parentcheck. 1345 */ 1346static int 1347ulfs_readdotdot(struct vnode *vp, int needswap, kauth_cred_t cred, ino_t *result) 1348{ 1349 struct lfs_dirtemplate dirbuf; 1350 int namlen, error; 1351 1352 error = vn_rdwr(UIO_READ, vp, &dirbuf, 1353 sizeof (struct lfs_dirtemplate), (off_t)0, UIO_SYSSPACE, 1354 IO_NODELOCKED, cred, NULL, NULL); 1355 if (error) { 1356 return error; 1357 } 1358 1359#if (BYTE_ORDER == LITTLE_ENDIAN) 1360 if (FSFMT(vp) && needswap == 0) 1361 namlen = dirbuf.dotdot_type; 1362 else 1363 namlen = dirbuf.dotdot_namlen; 1364#else 1365 if (FSFMT(vp) && needswap != 0) 1366 namlen = dirbuf.dotdot_type; 1367 else 1368 namlen = dirbuf.dotdot_namlen; 1369#endif 1370 if (namlen != 2 || 1371 dirbuf.dotdot_name[0] != '.' || 1372 dirbuf.dotdot_name[1] != '.') { 1373 printf("ulfs_readdotdot: directory %llu contains " 1374 "garbage instead of ..\n", 1375 (unsigned long long) VTOI(vp)->i_number); 1376 return ENOTDIR; 1377 } 1378 *result = ulfs_rw32(dirbuf.dotdot_ino, needswap); 1379 return 0; 1380} 1381 1382/* 1383 * Check if LOWER is a descendent of UPPER. If we find UPPER, return 1384 * nonzero in FOUND and return a reference to the immediate descendent 1385 * of UPPER in UPPERCHILD. If we don't find UPPER (that is, if we 1386 * reach the volume root and that isn't UPPER), return zero in FOUND 1387 * and null in UPPERCHILD. 1388 * 1389 * Neither UPPER nor LOWER should be locked. 1390 * 1391 * On error (such as a permissions error checking up the directory 1392 * tree) fail entirely. 1393 * 1394 * Note that UPPER and LOWER must be on the same volume, and because 1395 * we inspect only that volume NEEDSWAP can be constant. 1396 */ 1397int 1398ulfs_parentcheck(struct vnode *upper, struct vnode *lower, kauth_cred_t cred, 1399 int *found_ret, struct vnode **upperchild_ret) 1400{ 1401 const int needswap = ULFS_IPNEEDSWAP(VTOI(lower)); 1402 ino_t upper_ino, found_ino; 1403 struct vnode *current, *next; 1404 int error; 1405 1406 if (upper == lower) { 1407 vref(upper); 1408 *found_ret = 1; 1409 *upperchild_ret = upper; 1410 return 0; 1411 } 1412 if (VTOI(lower)->i_number == ULFS_ROOTINO) { 1413 *found_ret = 0; 1414 *upperchild_ret = NULL; 1415 return 0; 1416 } 1417 1418 upper_ino = VTOI(upper)->i_number; 1419 1420 current = lower; 1421 vref(current); 1422 vn_lock(current, LK_EXCLUSIVE | LK_RETRY); 1423 1424 for (;;) { 1425 error = ulfs_readdotdot(current, needswap, cred, &found_ino); 1426 if (error) { 1427 vput(current); 1428 return error; 1429 } 1430 if (found_ino == upper_ino) { 1431 VOP_UNLOCK(current); 1432 *found_ret = 1; 1433 *upperchild_ret = current; 1434 return 0; 1435 } 1436 if (found_ino == ULFS_ROOTINO) { 1437 vput(current); 1438 *found_ret = 0; 1439 *upperchild_ret = NULL; 1440 return 0; 1441 } 1442 VOP_UNLOCK(current); 1443 error = VFS_VGET(current->v_mount, found_ino, &next); 1444 if (error) { 1445 vrele(current); 1446 return error; 1447 } 1448 KASSERT(VOP_ISLOCKED(next)); 1449 if (next->v_type != VDIR) { 1450 printf("ulfs_parentcheck: inode %llu reached via .. of " 1451 "inode %llu is not a directory\n", 1452 (unsigned long long)VTOI(next)->i_number, 1453 (unsigned long long)VTOI(current)->i_number); 1454 vput(next); 1455 vrele(current); 1456 return ENOTDIR; 1457 } 1458 vrele(current); 1459 current = next; 1460 } 1461 1462 return 0; 1463} 1464 1465#define ULFS_DIRRABLKS 0 1466int ulfs_dirrablks = ULFS_DIRRABLKS; 1467 1468/* 1469 * ulfs_blkatoff: Return buffer with the contents of block "offset" from 1470 * the beginning of directory "vp". If "res" is non-NULL, fill it in with 1471 * a pointer to the remaining space in the directory. If the caller intends 1472 * to modify the buffer returned, "modify" must be true. 1473 */ 1474 1475int 1476ulfs_blkatoff(struct vnode *vp, off_t offset, char **res, struct buf **bpp, 1477 bool modify) 1478{ 1479 struct inode *ip; 1480 struct buf *bp; 1481 daddr_t lbn; 1482 const int dirrablks = ulfs_dirrablks; 1483 daddr_t *blks; 1484 int *blksizes; 1485 int run, error; 1486 struct mount *mp = vp->v_mount; 1487 const int bshift = mp->mnt_fs_bshift; 1488 const int bsize = 1 << bshift; 1489 off_t eof; 1490 1491 blks = kmem_alloc((1 + dirrablks) * sizeof(daddr_t), KM_SLEEP); 1492 blksizes = kmem_alloc((1 + dirrablks) * sizeof(int), KM_SLEEP); 1493 ip = VTOI(vp); 1494 KASSERT(vp->v_size == ip->i_size); 1495 GOP_SIZE(vp, vp->v_size, &eof, 0); 1496 lbn = offset >> bshift; 1497 1498 for (run = 0; run <= dirrablks;) { 1499 const off_t curoff = lbn << bshift; 1500 const int size = MIN(eof - curoff, bsize); 1501 1502 if (size == 0) { 1503 break; 1504 } 1505 KASSERT(curoff < eof); 1506 blks[run] = lbn; 1507 blksizes[run] = size; 1508 lbn++; 1509 run++; 1510 if (size != bsize) { 1511 break; 1512 } 1513 } 1514 KASSERT(run >= 1); 1515 error = breadn(vp, blks[0], blksizes[0], &blks[1], &blksizes[1], 1516 run - 1, NOCRED, (modify ? B_MODIFY : 0), &bp); 1517 if (error != 0) { 1518 *bpp = NULL; 1519 goto out; 1520 } 1521 if (res) { 1522 *res = (char *)bp->b_data + (offset & (bsize - 1)); 1523 } 1524 *bpp = bp; 1525 1526 out: 1527 kmem_free(blks, (1 + dirrablks) * sizeof(daddr_t)); 1528 kmem_free(blksizes, (1 + dirrablks) * sizeof(int)); 1529 return error; 1530} 1531