1/* $NetBSD$ */ 2 3/* 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)ufs_lookup.c 8.9 (Berkeley) 8/11/94 37 */ 38 39#include <sys/cdefs.h> 40__KERNEL_RCSID(0, "$NetBSD$"); 41 42#ifdef _KERNEL_OPT 43#include "opt_ffs.h" 44#endif 45 46#include <sys/param.h> 47#include <sys/systm.h> 48#include <sys/namei.h> 49#include <sys/buf.h> 50#include <sys/file.h> 51#include <sys/stat.h> 52#include <sys/mount.h> 53#include <sys/vnode.h> 54#include <sys/kernel.h> 55#include <sys/kauth.h> 56#include <sys/wapbl.h> 57#include <sys/fstrans.h> 58#include <sys/proc.h> 59#include <sys/kmem.h> 60 61#include <ufs/ufs/inode.h> 62#include <ufs/ufs/dir.h> 63#ifdef UFS_DIRHASH 64#include <ufs/ufs/dirhash.h> 65#endif 66#include <ufs/ufs/ufsmount.h> 67#include <ufs/ufs/ufs_extern.h> 68#include <ufs/ufs/ufs_bswap.h> 69#include <ufs/ufs/ufs_wapbl.h> 70 71#ifdef DIAGNOSTIC 72int dirchk = 1; 73#else 74int dirchk = 0; 75#endif 76 77#define FSFMT(vp) (((vp)->v_mount->mnt_iflag & IMNT_DTYPE) == 0) 78 79/* 80 * Convert a component of a pathname into a pointer to a locked inode. 81 * This is a very central and rather complicated routine. 82 * If the file system is not maintained in a strict tree hierarchy, 83 * this can result in a deadlock situation (see comments in code below). 84 * 85 * The cnp->cn_nameiop argument is LOOKUP, CREATE, RENAME, or DELETE depending 86 * on whether the name is to be looked up, created, renamed, or deleted. 87 * When CREATE, RENAME, or DELETE is specified, information usable in 88 * creating, renaming, or deleting a directory entry may be calculated. 89 * If flag has LOCKPARENT or'ed into it and the target of the pathname 90 * exists, lookup returns both the target and its parent directory locked. 91 * When creating or renaming and LOCKPARENT is specified, the target may 92 * not be ".". When deleting and LOCKPARENT is specified, the target may 93 * be "."., but the caller must check to ensure it does an vrele and vput 94 * instead of two vputs. 95 * 96 * Overall outline of ufs_lookup: 97 * 98 * check accessibility of directory 99 * look for name in cache, if found, then if at end of path 100 * and deleting or creating, drop it, else return name 101 * search for name in directory, to found or notfound 102 * notfound: 103 * if creating, return locked directory, leaving info on available slots 104 * else return error 105 * found: 106 * if at end of path and deleting, return information to allow delete 107 * if at end of path and rewriting (RENAME and LOCKPARENT), lock target 108 * inode and return info to allow rewrite 109 * if not at end, add name to cache; if at end and neither creating 110 * nor deleting, add name to cache 111 */ 112int 113ufs_lookup(void *v) 114{ 115 struct vop_lookup_args /* { 116 struct vnode *a_dvp; 117 struct vnode **a_vpp; 118 struct componentname *a_cnp; 119 } */ *ap = v; 120 struct vnode *vdp = ap->a_dvp; /* vnode for directory being searched */ 121 struct inode *dp = VTOI(vdp); /* inode for directory being searched */ 122 struct buf *bp; /* a buffer of directory entries */ 123 struct direct *ep; /* the current directory entry */ 124 int entryoffsetinblock; /* offset of ep in bp's buffer */ 125 enum {NONE, COMPACT, FOUND} slotstatus; 126 doff_t slotoffset; /* offset of area with free space */ 127 int slotsize; /* size of area at slotoffset */ 128 int slotfreespace; /* amount of space free in slot */ 129 int slotneeded; /* size of the entry we're seeking */ 130 int numdirpasses; /* strategy for directory search */ 131 doff_t endsearch; /* offset to end directory search */ 132 doff_t prevoff; /* prev entry dp->i_offset */ 133 struct vnode *pdp; /* saved dp during symlink work */ 134 struct vnode *tdp; /* returned by VFS_VGET */ 135 doff_t enduseful; /* pointer past last used dir slot */ 136 u_long bmask; /* block offset mask */ 137 int namlen, error; 138 struct vnode **vpp = ap->a_vpp; 139 struct componentname *cnp = ap->a_cnp; 140 kauth_cred_t cred = cnp->cn_cred; 141 int flags; 142 int nameiop = cnp->cn_nameiop; 143 struct ufsmount *ump = dp->i_ump; 144 const int needswap = UFS_MPNEEDSWAP(ump); 145 int dirblksiz = ump->um_dirblksiz; 146 ino_t foundino; 147 struct ufs_lookup_results *results; 148 149 flags = cnp->cn_flags; 150 151 bp = NULL; 152 slotoffset = -1; 153 *vpp = NULL; 154 endsearch = 0; /* silence compiler warning */ 155 156 /* 157 * Produce the auxiliary lookup results into i_crap. Increment 158 * its serial number so elsewhere we can tell if we're using 159 * stale results. This should not be done this way. XXX. 160 */ 161 results = &dp->i_crap; 162 dp->i_crapcounter++; 163 164 /* 165 * Check accessiblity of directory. 166 */ 167 if ((error = VOP_ACCESS(vdp, VEXEC, cred)) != 0) 168 return (error); 169 170 if ((flags & ISLASTCN) && (vdp->v_mount->mnt_flag & MNT_RDONLY) && 171 (nameiop == DELETE || nameiop == RENAME)) 172 return (EROFS); 173 174 /* 175 * We now have a segment name to search for, and a directory to search. 176 * 177 * Before tediously performing a linear scan of the directory, 178 * check the name cache to see if the directory/name pair 179 * we are looking for is known already. 180 */ 181 if ((error = cache_lookup(vdp, vpp, cnp)) >= 0) { 182 return (error); 183 } 184 185 fstrans_start(vdp->v_mount, FSTRANS_SHARED); 186 187 /* 188 * Suppress search for slots unless creating 189 * file and at end of pathname, in which case 190 * we watch for a place to put the new file in 191 * case it doesn't already exist. 192 */ 193 slotstatus = FOUND; 194 slotfreespace = slotsize = slotneeded = 0; 195 if ((nameiop == CREATE || nameiop == RENAME) && 196 (flags & ISLASTCN)) { 197 slotstatus = NONE; 198 slotneeded = DIRECTSIZ(cnp->cn_namelen); 199 } 200 201 /* 202 * If there is cached information on a previous search of 203 * this directory, pick up where we last left off. 204 * We cache only lookups as these are the most common 205 * and have the greatest payoff. Caching CREATE has little 206 * benefit as it usually must search the entire directory 207 * to determine that the entry does not exist. Caching the 208 * location of the last DELETE or RENAME has not reduced 209 * profiling time and hence has been removed in the interest 210 * of simplicity. 211 */ 212 bmask = vdp->v_mount->mnt_stat.f_iosize - 1; 213 214#ifdef UFS_DIRHASH 215 /* 216 * Use dirhash for fast operations on large directories. The logic 217 * to determine whether to hash the directory is contained within 218 * ufsdirhash_build(); a zero return means that it decided to hash 219 * this directory and it successfully built up the hash table. 220 */ 221 if (ufsdirhash_build(dp) == 0) { 222 /* Look for a free slot if needed. */ 223 enduseful = dp->i_size; 224 if (slotstatus != FOUND) { 225 slotoffset = ufsdirhash_findfree(dp, slotneeded, 226 &slotsize); 227 if (slotoffset >= 0) { 228 slotstatus = COMPACT; 229 enduseful = ufsdirhash_enduseful(dp); 230 if (enduseful < 0) 231 enduseful = dp->i_size; 232 } 233 } 234 /* Look up the component. */ 235 numdirpasses = 1; 236 entryoffsetinblock = 0; /* silence compiler warning */ 237 switch (ufsdirhash_lookup(dp, cnp->cn_nameptr, cnp->cn_namelen, 238 &results->ulr_offset, &bp, nameiop == DELETE ? &prevoff : NULL)) { 239 case 0: 240 ep = (struct direct *)((char *)bp->b_data + 241 (results->ulr_offset & bmask)); 242 goto foundentry; 243 case ENOENT: 244 results->ulr_offset = roundup(dp->i_size, dirblksiz); 245 goto notfound; 246 default: 247 /* Something failed; just do a linear search. */ 248 break; 249 } 250 } 251#endif /* UFS_DIRHASH */ 252 253 if (nameiop != LOOKUP || results->ulr_diroff == 0 || 254 results->ulr_diroff >= dp->i_size) { 255 entryoffsetinblock = 0; 256 results->ulr_offset = 0; 257 numdirpasses = 1; 258 } else { 259 results->ulr_offset = results->ulr_diroff; 260 if ((entryoffsetinblock = results->ulr_offset & bmask) && 261 (error = ufs_blkatoff(vdp, (off_t)results->ulr_offset, 262 NULL, &bp, false))) 263 goto out; 264 numdirpasses = 2; 265 nchstats.ncs_2passes++; 266 } 267 prevoff = results->ulr_offset; 268 endsearch = roundup(dp->i_size, dirblksiz); 269 enduseful = 0; 270 271searchloop: 272 while (results->ulr_offset < endsearch) { 273 if (curcpu()->ci_schedstate.spc_flags & SPCF_SHOULDYIELD) 274 preempt(); 275 /* 276 * If necessary, get the next directory block. 277 */ 278 if ((results->ulr_offset & bmask) == 0) { 279 if (bp != NULL) 280 brelse(bp, 0); 281 error = ufs_blkatoff(vdp, (off_t)results->ulr_offset, NULL, 282 &bp, false); 283 if (error) 284 goto out; 285 entryoffsetinblock = 0; 286 } 287 /* 288 * If still looking for a slot, and at a DIRBLKSIZ 289 * boundary, have to start looking for free space again. 290 */ 291 if (slotstatus == NONE && 292 (entryoffsetinblock & (dirblksiz - 1)) == 0) { 293 slotoffset = -1; 294 slotfreespace = 0; 295 } 296 /* 297 * Get pointer to next entry. 298 * Full validation checks are slow, so we only check 299 * enough to insure forward progress through the 300 * directory. Complete checks can be run by patching 301 * "dirchk" to be true. 302 */ 303 KASSERT(bp != NULL); 304 ep = (struct direct *)((char *)bp->b_data + entryoffsetinblock); 305 if (ep->d_reclen == 0 || 306 (dirchk && ufs_dirbadentry(vdp, ep, entryoffsetinblock))) { 307 int i; 308 309 ufs_dirbad(dp, results->ulr_offset, "mangled entry"); 310 i = dirblksiz - (entryoffsetinblock & (dirblksiz - 1)); 311 results->ulr_offset += i; 312 entryoffsetinblock += i; 313 continue; 314 } 315 316 /* 317 * If an appropriate sized slot has not yet been found, 318 * check to see if one is available. Also accumulate space 319 * in the current block so that we can determine if 320 * compaction is viable. 321 */ 322 if (slotstatus != FOUND) { 323 int size = ufs_rw16(ep->d_reclen, needswap); 324 325 if (ep->d_ino != 0) 326 size -= DIRSIZ(FSFMT(vdp), ep, needswap); 327 if (size > 0) { 328 if (size >= slotneeded) { 329 slotstatus = FOUND; 330 slotoffset = results->ulr_offset; 331 slotsize = ufs_rw16(ep->d_reclen, 332 needswap); 333 } else if (slotstatus == NONE) { 334 slotfreespace += size; 335 if (slotoffset == -1) 336 slotoffset = results->ulr_offset; 337 if (slotfreespace >= slotneeded) { 338 slotstatus = COMPACT; 339 slotsize = results->ulr_offset + 340 ufs_rw16(ep->d_reclen, 341 needswap) - 342 slotoffset; 343 } 344 } 345 } 346 } 347 348 /* 349 * Check for a name match. 350 */ 351 if (ep->d_ino) { 352#if (BYTE_ORDER == LITTLE_ENDIAN) 353 if (FSFMT(vdp) && needswap == 0) 354 namlen = ep->d_type; 355 else 356 namlen = ep->d_namlen; 357#else 358 if (FSFMT(vdp) && needswap != 0) 359 namlen = ep->d_type; 360 else 361 namlen = ep->d_namlen; 362#endif 363 if (namlen == cnp->cn_namelen && 364 !memcmp(cnp->cn_nameptr, ep->d_name, 365 (unsigned)namlen)) { 366#ifdef UFS_DIRHASH 367foundentry: 368#endif 369 /* 370 * Save directory entry's inode number and 371 * reclen in ndp->ni_ufs area, and release 372 * directory buffer. 373 */ 374 if (!FSFMT(vdp) && ep->d_type == DT_WHT) { 375 slotstatus = FOUND; 376 slotoffset = results->ulr_offset; 377 slotsize = ufs_rw16(ep->d_reclen, 378 needswap); 379 results->ulr_reclen = slotsize; 380 /* 381 * This is used to set results->ulr_endoff, 382 * which may be used by ufs_direnter2() 383 * as a length to truncate the 384 * directory to. Therefore, it must 385 * point past the end of the last 386 * non-empty directory entry. We don't 387 * know where that is in this case, so 388 * we effectively disable shrinking by 389 * using the existing size of the 390 * directory. 391 * 392 * Note that we wouldn't expect to 393 * shrink the directory while rewriting 394 * an existing entry anyway. 395 */ 396 enduseful = endsearch; 397 ap->a_cnp->cn_flags |= ISWHITEOUT; 398 numdirpasses--; 399 goto notfound; 400 } 401 foundino = ufs_rw32(ep->d_ino, needswap); 402 results->ulr_reclen = ufs_rw16(ep->d_reclen, needswap); 403 goto found; 404 } 405 } 406 prevoff = results->ulr_offset; 407 results->ulr_offset += ufs_rw16(ep->d_reclen, needswap); 408 entryoffsetinblock += ufs_rw16(ep->d_reclen, needswap); 409 if (ep->d_ino) 410 enduseful = results->ulr_offset; 411 } 412notfound: 413 /* 414 * If we started in the middle of the directory and failed 415 * to find our target, we must check the beginning as well. 416 */ 417 if (numdirpasses == 2) { 418 numdirpasses--; 419 results->ulr_offset = 0; 420 endsearch = results->ulr_diroff; 421 goto searchloop; 422 } 423 if (bp != NULL) 424 brelse(bp, 0); 425 /* 426 * If creating, and at end of pathname and current 427 * directory has not been removed, then can consider 428 * allowing file to be created. 429 */ 430 if ((nameiop == CREATE || nameiop == RENAME || 431 (nameiop == DELETE && 432 (ap->a_cnp->cn_flags & DOWHITEOUT) && 433 (ap->a_cnp->cn_flags & ISWHITEOUT))) && 434 (flags & ISLASTCN) && dp->i_nlink != 0) { 435 /* 436 * Access for write is interpreted as allowing 437 * creation of files in the directory. 438 */ 439 error = VOP_ACCESS(vdp, VWRITE, cred); 440 if (error) 441 goto out; 442 /* 443 * Return an indication of where the new directory 444 * entry should be put. If we didn't find a slot, 445 * then set results->ulr_count to 0 indicating 446 * that the new slot belongs at the end of the 447 * directory. If we found a slot, then the new entry 448 * can be put in the range from results->ulr_offset to 449 * results->ulr_offset + results->ulr_count. 450 */ 451 if (slotstatus == NONE) { 452 results->ulr_offset = roundup(dp->i_size, dirblksiz); 453 results->ulr_count = 0; 454 enduseful = results->ulr_offset; 455 } else if (nameiop == DELETE) { 456 results->ulr_offset = slotoffset; 457 if ((results->ulr_offset & (dirblksiz - 1)) == 0) 458 results->ulr_count = 0; 459 else 460 results->ulr_count = results->ulr_offset - prevoff; 461 } else { 462 results->ulr_offset = slotoffset; 463 results->ulr_count = slotsize; 464 if (enduseful < slotoffset + slotsize) 465 enduseful = slotoffset + slotsize; 466 } 467 results->ulr_endoff = roundup(enduseful, dirblksiz); 468#if 0 /* commented out by dbj. none of the on disk fields changed */ 469 dp->i_flag |= IN_CHANGE | IN_UPDATE; 470#endif 471 /* 472 * We return with the directory locked, so that 473 * the parameters we set up above will still be 474 * valid if we actually decide to do a direnter(). 475 * We return ni_vp == NULL to indicate that the entry 476 * does not currently exist; we leave a pointer to 477 * the (locked) directory inode in ndp->ni_dvp. 478 * 479 * NB - if the directory is unlocked, then this 480 * information cannot be used. 481 */ 482 error = EJUSTRETURN; 483 goto out; 484 } 485 /* 486 * Insert name into cache (as non-existent) if appropriate. 487 */ 488 if (nameiop != CREATE) { 489 cache_enter(vdp, *vpp, cnp); 490 } 491 error = ENOENT; 492 goto out; 493 494found: 495 if (numdirpasses == 2) 496 nchstats.ncs_pass2++; 497 /* 498 * Check that directory length properly reflects presence 499 * of this entry. 500 */ 501 if (results->ulr_offset + DIRSIZ(FSFMT(vdp), ep, needswap) > dp->i_size) { 502 ufs_dirbad(dp, results->ulr_offset, "i_size too small"); 503 dp->i_size = results->ulr_offset + DIRSIZ(FSFMT(vdp), ep, needswap); 504 DIP_ASSIGN(dp, size, dp->i_size); 505 dp->i_flag |= IN_CHANGE | IN_UPDATE; 506 UFS_WAPBL_UPDATE(vdp, NULL, NULL, UPDATE_DIROP); 507 } 508 brelse(bp, 0); 509 510 /* 511 * Found component in pathname. 512 * If the final component of path name, save information 513 * in the cache as to where the entry was found. 514 */ 515 if ((flags & ISLASTCN) && nameiop == LOOKUP) 516 results->ulr_diroff = results->ulr_offset &~ (dirblksiz - 1); 517 518 /* 519 * If deleting, and at end of pathname, return 520 * parameters which can be used to remove file. 521 * Lock the inode, being careful with ".". 522 */ 523 if (nameiop == DELETE && (flags & ISLASTCN)) { 524 /* 525 * Write access to directory required to delete files. 526 */ 527 error = VOP_ACCESS(vdp, VWRITE, cred); 528 if (error) 529 goto out; 530 /* 531 * Return pointer to current entry in results->ulr_offset, 532 * and distance past previous entry (if there 533 * is a previous entry in this block) in results->ulr_count. 534 * Save directory inode pointer in ndp->ni_dvp for dirremove(). 535 */ 536 if ((results->ulr_offset & (dirblksiz - 1)) == 0) 537 results->ulr_count = 0; 538 else 539 results->ulr_count = results->ulr_offset - prevoff; 540 if (dp->i_number == foundino) { 541 vref(vdp); 542 *vpp = vdp; 543 error = 0; 544 goto out; 545 } 546 if (flags & ISDOTDOT) 547 VOP_UNLOCK(vdp); /* race to get the inode */ 548 error = VFS_VGET(vdp->v_mount, foundino, &tdp); 549 if (flags & ISDOTDOT) 550 vn_lock(vdp, LK_EXCLUSIVE | LK_RETRY); 551 if (error) 552 goto out; 553 /* 554 * If directory is "sticky", then user must own 555 * the directory, or the file in it, else she 556 * may not delete it (unless she's root). This 557 * implements append-only directories. 558 */ 559 if ((dp->i_mode & ISVTX) && 560 kauth_authorize_generic(cred, KAUTH_GENERIC_ISSUSER, 561 NULL) != 0 && 562 kauth_cred_geteuid(cred) != dp->i_uid && 563 VTOI(tdp)->i_uid != kauth_cred_geteuid(cred)) { 564 vput(tdp); 565 error = EPERM; 566 goto out; 567 } 568 *vpp = tdp; 569 error = 0; 570 goto out; 571 } 572 573 /* 574 * If rewriting (RENAME), return the inode and the 575 * information required to rewrite the present directory 576 * Must get inode of directory entry to verify it's a 577 * regular file, or empty directory. 578 */ 579 if (nameiop == RENAME && (flags & ISLASTCN)) { 580 error = VOP_ACCESS(vdp, VWRITE, cred); 581 if (error) 582 goto out; 583 /* 584 * Careful about locking second inode. 585 * This can only occur if the target is ".". 586 */ 587 if (dp->i_number == foundino) { 588 error = EISDIR; 589 goto out; 590 } 591 if (flags & ISDOTDOT) 592 VOP_UNLOCK(vdp); /* race to get the inode */ 593 error = VFS_VGET(vdp->v_mount, foundino, &tdp); 594 if (flags & ISDOTDOT) 595 vn_lock(vdp, LK_EXCLUSIVE | LK_RETRY); 596 if (error) 597 goto out; 598 *vpp = tdp; 599 error = 0; 600 goto out; 601 } 602 603 /* 604 * Step through the translation in the name. We do not `vput' the 605 * directory because we may need it again if a symbolic link 606 * is relative to the current directory. Instead we save it 607 * unlocked as "pdp". We must get the target inode before unlocking 608 * the directory to insure that the inode will not be removed 609 * before we get it. We prevent deadlock by always fetching 610 * inodes from the root, moving down the directory tree. Thus 611 * when following backward pointers ".." we must unlock the 612 * parent directory before getting the requested directory. 613 * There is a potential race condition here if both the current 614 * and parent directories are removed before the VFS_VGET for the 615 * inode associated with ".." returns. We hope that this occurs 616 * infrequently since we cannot avoid this race condition without 617 * implementing a sophisticated deadlock detection algorithm. 618 * Note also that this simple deadlock detection scheme will not 619 * work if the file system has any hard links other than ".." 620 * that point backwards in the directory structure. 621 */ 622 pdp = vdp; 623 if (flags & ISDOTDOT) { 624 VOP_UNLOCK(pdp); /* race to get the inode */ 625 error = VFS_VGET(vdp->v_mount, foundino, &tdp); 626 vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY); 627 if (error) { 628 goto out; 629 } 630 *vpp = tdp; 631 } else if (dp->i_number == foundino) { 632 vref(vdp); /* we want ourself, ie "." */ 633 *vpp = vdp; 634 } else { 635 error = VFS_VGET(vdp->v_mount, foundino, &tdp); 636 if (error) 637 goto out; 638 *vpp = tdp; 639 } 640 641 /* 642 * Insert name into cache if appropriate. 643 */ 644 cache_enter(vdp, *vpp, cnp); 645 error = 0; 646 647out: 648 fstrans_done(vdp->v_mount); 649 return error; 650} 651 652void 653ufs_dirbad(struct inode *ip, doff_t offset, const char *how) 654{ 655 struct mount *mp; 656 657 mp = ITOV(ip)->v_mount; 658 printf("%s: bad dir ino %llu at offset %d: %s\n", 659 mp->mnt_stat.f_mntonname, (unsigned long long)ip->i_number, 660 offset, how); 661 if ((mp->mnt_stat.f_flag & MNT_RDONLY) == 0) 662 panic("bad dir"); 663} 664 665/* 666 * Do consistency checking on a directory entry: 667 * record length must be multiple of 4 668 * entry must fit in rest of its DIRBLKSIZ block 669 * record must be large enough to contain entry 670 * name is not longer than FFS_MAXNAMLEN 671 * name must be as long as advertised, and null terminated 672 */ 673int 674ufs_dirbadentry(struct vnode *dp, struct direct *ep, int entryoffsetinblock) 675{ 676 int i; 677 int namlen; 678 struct ufsmount *ump = VFSTOUFS(dp->v_mount); 679 const int needswap = UFS_MPNEEDSWAP(ump); 680 int dirblksiz = ump->um_dirblksiz; 681 682#if (BYTE_ORDER == LITTLE_ENDIAN) 683 if (FSFMT(dp) && needswap == 0) 684 namlen = ep->d_type; 685 else 686 namlen = ep->d_namlen; 687#else 688 if (FSFMT(dp) && needswap != 0) 689 namlen = ep->d_type; 690 else 691 namlen = ep->d_namlen; 692#endif 693 if ((ufs_rw16(ep->d_reclen, needswap) & 0x3) != 0 || 694 ufs_rw16(ep->d_reclen, needswap) > 695 dirblksiz - (entryoffsetinblock & (dirblksiz - 1)) || 696 ufs_rw16(ep->d_reclen, needswap) < 697 DIRSIZ(FSFMT(dp), ep, needswap) || 698 namlen > FFS_MAXNAMLEN) { 699 /*return (1); */ 700 printf("First bad, reclen=%#x, DIRSIZ=%lu, namlen=%d, " 701 "flags=%#x, entryoffsetinblock=%d, dirblksiz = %d\n", 702 ufs_rw16(ep->d_reclen, needswap), 703 (u_long)DIRSIZ(FSFMT(dp), ep, needswap), 704 namlen, dp->v_mount->mnt_flag, entryoffsetinblock, 705 dirblksiz); 706 goto bad; 707 } 708 if (ep->d_ino == 0) 709 return (0); 710 for (i = 0; i < namlen; i++) 711 if (ep->d_name[i] == '\0') { 712 /*return (1); */ 713 printf("Second bad\n"); 714 goto bad; 715 } 716 if (ep->d_name[i]) 717 goto bad; 718 return (0); 719bad: 720 return (1); 721} 722 723/* 724 * Construct a new directory entry after a call to namei, using the 725 * name in the componentname argument cnp. The argument ip is the 726 * inode to which the new directory entry will refer. 727 */ 728void 729ufs_makedirentry(struct inode *ip, struct componentname *cnp, 730 struct direct *newdirp) 731{ 732 newdirp->d_ino = ip->i_number; 733 newdirp->d_namlen = cnp->cn_namelen; 734 memcpy(newdirp->d_name, cnp->cn_nameptr, (size_t)cnp->cn_namelen); 735 newdirp->d_name[cnp->cn_namelen] = '\0'; 736 if (FSFMT(ITOV(ip))) 737 newdirp->d_type = 0; 738 else 739 newdirp->d_type = IFTODT(ip->i_mode); 740} 741 742/* 743 * Write a directory entry after a call to namei, using the parameters 744 * that ufs_lookup left in nameidata and in the ufs_lookup_results. 745 * 746 * DVP is the directory to be updated. It must be locked. 747 * ULR is the ufs_lookup_results structure from the final lookup step. 748 * TVP is not used. (XXX: why is it here? remove it) 749 * DIRP is the new directory entry contents. 750 * CNP is the componentname from the final lookup step. 751 * NEWDIRBP is not used and (XXX) should be removed. The previous 752 * comment here said it was used by the now-removed softupdates code. 753 * 754 * The link count of the target inode is *not* incremented; the 755 * caller does that. 756 * 757 * If ulr->ulr_count is 0, ufs_lookup did not find space to insert the 758 * directory entry. ulr_offset, which is the place to put the entry, 759 * should be on a block boundary (and should be at the end of the 760 * directory AFAIK) and a fresh block is allocated to put the new 761 * directory entry in. 762 * 763 * If ulr->ulr_count is not zero, ufs_lookup found a slot to insert 764 * the entry into. This slot ranges from ulr_offset to ulr_offset + 765 * ulr_count. However, this slot may already be partially populated 766 * requiring compaction. See notes below. 767 * 768 * Furthermore, if ulr_count is not zero and ulr_endoff is not the 769 * same as i_size, the directory is truncated to size ulr_endoff. 770 */ 771int 772ufs_direnter(struct vnode *dvp, const struct ufs_lookup_results *ulr, 773 struct vnode *tvp, struct direct *dirp, 774 struct componentname *cnp, struct buf *newdirbp) 775{ 776 kauth_cred_t cr; 777 struct lwp *l; 778 int newentrysize; 779 struct inode *dp; 780 struct buf *bp; 781 u_int dsize; 782 struct direct *ep, *nep; 783 int error, ret, blkoff, loc, spacefree; 784 char *dirbuf; 785 struct timespec ts; 786 struct ufsmount *ump = VFSTOUFS(dvp->v_mount); 787 const int needswap = UFS_MPNEEDSWAP(ump); 788 int dirblksiz = ump->um_dirblksiz; 789 790 UFS_WAPBL_JLOCK_ASSERT(dvp->v_mount); 791 792 error = 0; 793 cr = cnp->cn_cred; 794 l = curlwp; 795 796 dp = VTOI(dvp); 797 newentrysize = DIRSIZ(0, dirp, 0); 798 799#if 0 800 struct ufs_lookup_results *ulr; 801 /* XXX should handle this material another way */ 802 ulr = &dp->i_crap; 803 UFS_CHECK_CRAPCOUNTER(dp); 804#endif 805 806 if (ulr->ulr_count == 0) { 807 /* 808 * If ulr_count is 0, then namei could find no 809 * space in the directory. Here, ulr_offset will 810 * be on a directory block boundary and we will write the 811 * new entry into a fresh block. 812 */ 813 if (ulr->ulr_offset & (dirblksiz - 1)) 814 panic("ufs_direnter: newblk"); 815 if ((error = UFS_BALLOC(dvp, (off_t)ulr->ulr_offset, dirblksiz, 816 cr, B_CLRBUF | B_SYNC, &bp)) != 0) { 817 return (error); 818 } 819 dp->i_size = ulr->ulr_offset + dirblksiz; 820 DIP_ASSIGN(dp, size, dp->i_size); 821 dp->i_flag |= IN_CHANGE | IN_UPDATE; 822 uvm_vnp_setsize(dvp, dp->i_size); 823 dirp->d_reclen = ufs_rw16(dirblksiz, needswap); 824 dirp->d_ino = ufs_rw32(dirp->d_ino, needswap); 825 if (FSFMT(dvp)) { 826#if (BYTE_ORDER == LITTLE_ENDIAN) 827 if (needswap == 0) { 828#else 829 if (needswap != 0) { 830#endif 831 u_char tmp = dirp->d_namlen; 832 dirp->d_namlen = dirp->d_type; 833 dirp->d_type = tmp; 834 } 835 } 836 blkoff = ulr->ulr_offset & (ump->um_mountp->mnt_stat.f_iosize - 1); 837 memcpy((char *)bp->b_data + blkoff, dirp, newentrysize); 838#ifdef UFS_DIRHASH 839 if (dp->i_dirhash != NULL) { 840 ufsdirhash_newblk(dp, ulr->ulr_offset); 841 ufsdirhash_add(dp, dirp, ulr->ulr_offset); 842 ufsdirhash_checkblock(dp, (char *)bp->b_data + blkoff, 843 ulr->ulr_offset); 844 } 845#endif 846 error = VOP_BWRITE(bp->b_vp, bp); 847 vfs_timestamp(&ts); 848 ret = UFS_UPDATE(dvp, &ts, &ts, UPDATE_DIROP); 849 if (error == 0) 850 return (ret); 851 return (error); 852 } 853 854 /* 855 * If ulr_count is non-zero, then namei found space for the new 856 * entry in the range ulr_offset to url_offset + url_count 857 * in the directory. To use this space, we may have to compact 858 * the entries located there, by copying them together towards the 859 * beginning of the block, leaving the free space in one usable 860 * chunk at the end. 861 */ 862 863 /* 864 * Increase size of directory if entry eats into new space. 865 * This should never push the size past a new multiple of 866 * DIRBLKSIZ. 867 * 868 * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN. 869 */ 870 if (ulr->ulr_offset + ulr->ulr_count > dp->i_size) { 871#ifdef DIAGNOSTIC 872 printf("ufs_direnter: reached 4.2-only block, " 873 "not supposed to happen\n"); 874#endif 875 dp->i_size = ulr->ulr_offset + ulr->ulr_count; 876 DIP_ASSIGN(dp, size, dp->i_size); 877 dp->i_flag |= IN_CHANGE | IN_UPDATE; 878 UFS_WAPBL_UPDATE(dvp, NULL, NULL, UPDATE_DIROP); 879 } 880 /* 881 * Get the block containing the space for the new directory entry. 882 */ 883 error = ufs_blkatoff(dvp, (off_t)ulr->ulr_offset, &dirbuf, &bp, true); 884 if (error) { 885 return (error); 886 } 887 /* 888 * Find space for the new entry. In the simple case, the entry at 889 * offset base will have the space. If it does not, then namei 890 * arranged that compacting the region dp->i_offset to 891 * dp->i_offset + dp->i_count would yield the space. 892 */ 893 ep = (struct direct *)dirbuf; 894 dsize = (ep->d_ino != 0) ? DIRSIZ(FSFMT(dvp), ep, needswap) : 0; 895 spacefree = ufs_rw16(ep->d_reclen, needswap) - dsize; 896 for (loc = ufs_rw16(ep->d_reclen, needswap); loc < ulr->ulr_count; ) { 897 uint16_t reclen; 898 899 nep = (struct direct *)(dirbuf + loc); 900 901 /* Trim the existing slot (NB: dsize may be zero). */ 902 ep->d_reclen = ufs_rw16(dsize, needswap); 903 ep = (struct direct *)((char *)ep + dsize); 904 905 reclen = ufs_rw16(nep->d_reclen, needswap); 906 loc += reclen; 907 if (nep->d_ino == 0) { 908 /* 909 * A mid-block unused entry. Such entries are 910 * never created by the kernel, but fsck_ffs 911 * can create them (and it doesn't fix them). 912 * 913 * Add up the free space, and initialise the 914 * relocated entry since we don't memcpy it. 915 */ 916 spacefree += reclen; 917 ep->d_ino = 0; 918 dsize = 0; 919 continue; 920 } 921 dsize = DIRSIZ(FSFMT(dvp), nep, needswap); 922 spacefree += reclen - dsize; 923#ifdef UFS_DIRHASH 924 if (dp->i_dirhash != NULL) 925 ufsdirhash_move(dp, nep, 926 ulr->ulr_offset + ((char *)nep - dirbuf), 927 ulr->ulr_offset + ((char *)ep - dirbuf)); 928#endif 929 memcpy((void *)ep, (void *)nep, dsize); 930 } 931 /* 932 * Here, `ep' points to a directory entry containing `dsize' in-use 933 * bytes followed by `spacefree' unused bytes. If ep->d_ino == 0, 934 * then the entry is completely unused (dsize == 0). The value 935 * of ep->d_reclen is always indeterminate. 936 * 937 * Update the pointer fields in the previous entry (if any), 938 * copy in the new entry, and write out the block. 939 */ 940 if (ep->d_ino == 0 || 941 (ufs_rw32(ep->d_ino, needswap) == WINO && 942 memcmp(ep->d_name, dirp->d_name, dirp->d_namlen) == 0)) { 943 if (spacefree + dsize < newentrysize) 944 panic("ufs_direnter: compact1"); 945 dirp->d_reclen = spacefree + dsize; 946 } else { 947 if (spacefree < newentrysize) 948 panic("ufs_direnter: compact2"); 949 dirp->d_reclen = spacefree; 950 ep->d_reclen = ufs_rw16(dsize, needswap); 951 ep = (struct direct *)((char *)ep + dsize); 952 } 953 dirp->d_reclen = ufs_rw16(dirp->d_reclen, needswap); 954 dirp->d_ino = ufs_rw32(dirp->d_ino, needswap); 955 if (FSFMT(dvp)) { 956#if (BYTE_ORDER == LITTLE_ENDIAN) 957 if (needswap == 0) { 958#else 959 if (needswap != 0) { 960#endif 961 u_char tmp = dirp->d_namlen; 962 dirp->d_namlen = dirp->d_type; 963 dirp->d_type = tmp; 964 } 965 } 966#ifdef UFS_DIRHASH 967 if (dp->i_dirhash != NULL && (ep->d_ino == 0 || 968 dirp->d_reclen == spacefree)) 969 ufsdirhash_add(dp, dirp, ulr->ulr_offset + ((char *)ep - dirbuf)); 970#endif 971 memcpy((void *)ep, (void *)dirp, (u_int)newentrysize); 972#ifdef UFS_DIRHASH 973 if (dp->i_dirhash != NULL) 974 ufsdirhash_checkblock(dp, dirbuf - 975 (ulr->ulr_offset & (dirblksiz - 1)), 976 ulr->ulr_offset & ~(dirblksiz - 1)); 977#endif 978 error = VOP_BWRITE(bp->b_vp, bp); 979 dp->i_flag |= IN_CHANGE | IN_UPDATE; 980 /* 981 * If all went well, and the directory can be shortened, proceed 982 * with the truncation. Note that we have to unlock the inode for 983 * the entry that we just entered, as the truncation may need to 984 * lock other inodes which can lead to deadlock if we also hold a 985 * lock on the newly entered node. 986 */ 987 if (error == 0 && ulr->ulr_endoff && ulr->ulr_endoff < dp->i_size) { 988#ifdef UFS_DIRHASH 989 if (dp->i_dirhash != NULL) 990 ufsdirhash_dirtrunc(dp, ulr->ulr_endoff); 991#endif 992 (void) UFS_TRUNCATE(dvp, (off_t)ulr->ulr_endoff, IO_SYNC, cr); 993 } 994 UFS_WAPBL_UPDATE(dvp, NULL, NULL, UPDATE_DIROP); 995 return (error); 996} 997 998/* 999 * Remove a directory entry after a call to namei, using the 1000 * parameters that ufs_lookup left in nameidata and in the 1001 * ufs_lookup_results. 1002 * 1003 * DVP is the directory to be updated. It must be locked. 1004 * ULR is the ufs_lookup_results structure from the final lookup step. 1005 * IP, if not null, is the inode being unlinked. 1006 * FLAGS may contain DOWHITEOUT. 1007 * ISRMDIR is not used and (XXX) should be removed. 1008 * 1009 * If FLAGS contains DOWHITEOUT the entry is replaced with a whiteout 1010 * instead of being cleared. 1011 * 1012 * ulr->ulr_offset contains the position of the directory entry 1013 * to be removed. 1014 * 1015 * ulr->ulr_reclen contains the size of the directory entry to be 1016 * removed. 1017 * 1018 * ulr->ulr_count contains the size of the *previous* directory 1019 * entry. This allows finding it, for free space management. If 1020 * ulr_count is 0, the target entry is at the beginning of the 1021 * directory. (Does this ever happen? The first entry should be ".", 1022 * which should only be removed at rmdir time. Does rmdir come here 1023 * to clear out the "." and ".." entries? Perhaps, but I doubt it.) 1024 * 1025 * The space is marked free by adding it to the record length (not 1026 * name length) of the preceding entry. If the first entry becomes 1027 * free, it is marked free by setting the inode number to 0. 1028 * 1029 * The link count of IP is decremented. Note that this is not the 1030 * inverse behavior of ufs_direnter, which does not adjust link 1031 * counts. Sigh. 1032 */ 1033int 1034ufs_dirremove(struct vnode *dvp, const struct ufs_lookup_results *ulr, 1035 struct inode *ip, int flags, int isrmdir) 1036{ 1037 struct inode *dp = VTOI(dvp); 1038 struct direct *ep; 1039 struct buf *bp; 1040 int error; 1041#ifdef FFS_EI 1042 const int needswap = UFS_MPNEEDSWAP(dp->i_ump); 1043#endif 1044 1045 UFS_WAPBL_JLOCK_ASSERT(dvp->v_mount); 1046 1047 if (flags & DOWHITEOUT) { 1048 /* 1049 * Whiteout entry: set d_ino to WINO. 1050 */ 1051 error = ufs_blkatoff(dvp, (off_t)ulr->ulr_offset, (void *)&ep, 1052 &bp, true); 1053 if (error) 1054 return (error); 1055 ep->d_ino = ufs_rw32(WINO, needswap); 1056 ep->d_type = DT_WHT; 1057 goto out; 1058 } 1059 1060 if ((error = ufs_blkatoff(dvp, 1061 (off_t)(ulr->ulr_offset - ulr->ulr_count), (void *)&ep, &bp, true)) != 0) 1062 return (error); 1063 1064#ifdef UFS_DIRHASH 1065 /* 1066 * Remove the dirhash entry. This is complicated by the fact 1067 * that `ep' is the previous entry when dp->i_count != 0. 1068 */ 1069 if (dp->i_dirhash != NULL) 1070 ufsdirhash_remove(dp, (ulr->ulr_count == 0) ? ep : 1071 (struct direct *)((char *)ep + 1072 ufs_rw16(ep->d_reclen, needswap)), ulr->ulr_offset); 1073#endif 1074 1075 if (ulr->ulr_count == 0) { 1076 /* 1077 * First entry in block: set d_ino to zero. 1078 */ 1079 ep->d_ino = 0; 1080 } else { 1081 /* 1082 * Collapse new free space into previous entry. 1083 */ 1084 ep->d_reclen = 1085 ufs_rw16(ufs_rw16(ep->d_reclen, needswap) + ulr->ulr_reclen, 1086 needswap); 1087 } 1088 1089#ifdef UFS_DIRHASH 1090 if (dp->i_dirhash != NULL) { 1091 int dirblksiz = ip->i_ump->um_dirblksiz; 1092 ufsdirhash_checkblock(dp, (char *)ep - 1093 ((ulr->ulr_offset - ulr->ulr_count) & (dirblksiz - 1)), 1094 ulr->ulr_offset & ~(dirblksiz - 1)); 1095 } 1096#endif 1097 1098out: 1099 if (ip) { 1100 ip->i_nlink--; 1101 DIP_ASSIGN(ip, nlink, ip->i_nlink); 1102 ip->i_flag |= IN_CHANGE; 1103 UFS_WAPBL_UPDATE(ITOV(ip), NULL, NULL, 0); 1104 } 1105 error = VOP_BWRITE(bp->b_vp, bp); 1106 dp->i_flag |= IN_CHANGE | IN_UPDATE; 1107 /* 1108 * If the last named reference to a snapshot goes away, 1109 * drop its snapshot reference so that it will be reclaimed 1110 * when last open reference goes away. 1111 */ 1112 if (ip != 0 && (ip->i_flags & SF_SNAPSHOT) != 0 && 1113 ip->i_nlink == 0) 1114 ffs_snapgone(ip); 1115 UFS_WAPBL_UPDATE(dvp, NULL, NULL, 0); 1116 return (error); 1117} 1118 1119/* 1120 * Rewrite an existing directory entry to point at the inode supplied. 1121 * 1122 * DP is the directory to update. 1123 * OFFSET is the position of the entry in question. It may come 1124 * from ulr_offset of a ufs_lookup_results. 1125 * OIP is the old inode the directory previously pointed to. 1126 * NEWINUM is the number of the new inode. 1127 * NEWTYPE is the new value for the type field of the directory entry. 1128 * (This is ignored if the fs doesn't support that.) 1129 * ISRMDIR is not used and (XXX) should be removed. 1130 * IFLAGS are added to DP's inode flags. 1131 * 1132 * The link count of OIP is decremented. Note that the link count of 1133 * the new inode is *not* incremented. Yay for symmetry. 1134 */ 1135int 1136ufs_dirrewrite(struct inode *dp, off_t offset, 1137 struct inode *oip, ino_t newinum, int newtype, 1138 int isrmdir, int iflags) 1139{ 1140 struct buf *bp; 1141 struct direct *ep; 1142 struct vnode *vdp = ITOV(dp); 1143 int error; 1144 1145 error = ufs_blkatoff(vdp, offset, (void *)&ep, &bp, true); 1146 if (error) 1147 return (error); 1148 ep->d_ino = ufs_rw32(newinum, UFS_MPNEEDSWAP(dp->i_ump)); 1149 if (!FSFMT(vdp)) 1150 ep->d_type = newtype; 1151 oip->i_nlink--; 1152 DIP_ASSIGN(oip, nlink, oip->i_nlink); 1153 oip->i_flag |= IN_CHANGE; 1154 UFS_WAPBL_UPDATE(ITOV(oip), NULL, NULL, UPDATE_DIROP); 1155 error = VOP_BWRITE(bp->b_vp, bp); 1156 dp->i_flag |= iflags; 1157 /* 1158 * If the last named reference to a snapshot goes away, 1159 * drop its snapshot reference so that it will be reclaimed 1160 * when last open reference goes away. 1161 */ 1162 if ((oip->i_flags & SF_SNAPSHOT) != 0 && oip->i_nlink == 0) 1163 ffs_snapgone(oip); 1164 UFS_WAPBL_UPDATE(vdp, NULL, NULL, UPDATE_DIROP); 1165 return (error); 1166} 1167 1168/* 1169 * Check if a directory is empty or not. 1170 * Inode supplied must be locked. 1171 * 1172 * Using a struct dirtemplate here is not precisely 1173 * what we want, but better than using a struct direct. 1174 * 1175 * NB: does not handle corrupted directories. 1176 */ 1177int 1178ufs_dirempty(struct inode *ip, ino_t parentino, kauth_cred_t cred) 1179{ 1180 doff_t off; 1181 struct dirtemplate dbuf; 1182 struct direct *dp = (struct direct *)&dbuf; 1183 int error, namlen; 1184 size_t count; 1185 const int needswap = UFS_IPNEEDSWAP(ip); 1186#define MINDIRSIZ (sizeof (struct dirtemplate) / 2) 1187 1188 for (off = 0; off < ip->i_size; 1189 off += ufs_rw16(dp->d_reclen, needswap)) { 1190 error = vn_rdwr(UIO_READ, ITOV(ip), (void *)dp, MINDIRSIZ, off, 1191 UIO_SYSSPACE, IO_NODELOCKED, cred, &count, NULL); 1192 /* 1193 * Since we read MINDIRSIZ, residual must 1194 * be 0 unless we're at end of file. 1195 */ 1196 if (error || count != 0) 1197 return (0); 1198 /* avoid infinite loops */ 1199 if (dp->d_reclen == 0) 1200 return (0); 1201 /* skip empty entries */ 1202 if (dp->d_ino == 0 || ufs_rw32(dp->d_ino, needswap) == WINO) 1203 continue; 1204 /* accept only "." and ".." */ 1205#if (BYTE_ORDER == LITTLE_ENDIAN) 1206 if (FSFMT(ITOV(ip)) && needswap == 0) 1207 namlen = dp->d_type; 1208 else 1209 namlen = dp->d_namlen; 1210#else 1211 if (FSFMT(ITOV(ip)) && needswap != 0) 1212 namlen = dp->d_type; 1213 else 1214 namlen = dp->d_namlen; 1215#endif 1216 if (namlen > 2) 1217 return (0); 1218 if (dp->d_name[0] != '.') 1219 return (0); 1220 /* 1221 * At this point namlen must be 1 or 2. 1222 * 1 implies ".", 2 implies ".." if second 1223 * char is also "." 1224 */ 1225 if (namlen == 1 && 1226 ufs_rw32(dp->d_ino, needswap) == ip->i_number) 1227 continue; 1228 if (dp->d_name[1] == '.' && 1229 ufs_rw32(dp->d_ino, needswap) == parentino) 1230 continue; 1231 return (0); 1232 } 1233 return (1); 1234} 1235 1236/* 1237 * Check if source directory is in the path of the target directory. 1238 * Target is supplied locked, source is unlocked. 1239 * The target is always vput before returning. 1240 */ 1241int 1242ufs_checkpath(struct inode *source, struct inode *target, kauth_cred_t cred) 1243{ 1244 struct vnode *nextvp, *vp; 1245 int error, rootino, namlen; 1246 struct dirtemplate dirbuf; 1247 const int needswap = UFS_MPNEEDSWAP(target->i_ump); 1248 1249 vp = ITOV(target); 1250 if (target->i_number == source->i_number) { 1251 error = EEXIST; 1252 goto out; 1253 } 1254 rootino = ROOTINO; 1255 error = 0; 1256 if (target->i_number == rootino) 1257 goto out; 1258 1259 for (;;) { 1260 if (vp->v_type != VDIR) { 1261 error = ENOTDIR; 1262 break; 1263 } 1264 error = vn_rdwr(UIO_READ, vp, (void *)&dirbuf, 1265 sizeof (struct dirtemplate), (off_t)0, UIO_SYSSPACE, 1266 IO_NODELOCKED, cred, NULL, NULL); 1267 if (error != 0) 1268 break; 1269#if (BYTE_ORDER == LITTLE_ENDIAN) 1270 if (FSFMT(vp) && needswap == 0) 1271 namlen = dirbuf.dotdot_type; 1272 else 1273 namlen = dirbuf.dotdot_namlen; 1274#else 1275 if (FSFMT(vp) && needswap != 0) 1276 namlen = dirbuf.dotdot_type; 1277 else 1278 namlen = dirbuf.dotdot_namlen; 1279#endif 1280 if (namlen != 2 || 1281 dirbuf.dotdot_name[0] != '.' || 1282 dirbuf.dotdot_name[1] != '.') { 1283 error = ENOTDIR; 1284 break; 1285 } 1286 if (ufs_rw32(dirbuf.dotdot_ino, needswap) == source->i_number) { 1287 error = EINVAL; 1288 break; 1289 } 1290 if (ufs_rw32(dirbuf.dotdot_ino, needswap) == rootino) 1291 break; 1292 VOP_UNLOCK(vp); 1293 error = VFS_VGET(vp->v_mount, 1294 ufs_rw32(dirbuf.dotdot_ino, needswap), &nextvp); 1295 vrele(vp); 1296 if (error) { 1297 vp = NULL; 1298 break; 1299 } 1300 vp = nextvp; 1301 } 1302 1303out: 1304 if (error == ENOTDIR) 1305 printf("checkpath: .. not a directory\n"); 1306 if (vp != NULL) 1307 vput(vp); 1308 return (error); 1309} 1310 1311/* 1312 * Extract the inode number of ".." from a directory. 1313 * Helper for ufs_parentcheck. 1314 */ 1315static int 1316ufs_readdotdot(struct vnode *vp, int needswap, kauth_cred_t cred, ino_t *result) 1317{ 1318 struct dirtemplate dirbuf; 1319 int namlen, error; 1320 1321 error = vn_rdwr(UIO_READ, vp, &dirbuf, 1322 sizeof (struct dirtemplate), (off_t)0, UIO_SYSSPACE, 1323 IO_NODELOCKED, cred, NULL, NULL); 1324 if (error) { 1325 return error; 1326 } 1327 1328#if (BYTE_ORDER == LITTLE_ENDIAN) 1329 if (FSFMT(vp) && needswap == 0) 1330 namlen = dirbuf.dotdot_type; 1331 else 1332 namlen = dirbuf.dotdot_namlen; 1333#else 1334 if (FSFMT(vp) && needswap != 0) 1335 namlen = dirbuf.dotdot_type; 1336 else 1337 namlen = dirbuf.dotdot_namlen; 1338#endif 1339 if (namlen != 2 || 1340 dirbuf.dotdot_name[0] != '.' || 1341 dirbuf.dotdot_name[1] != '.') { 1342 printf("ufs_readdotdot: directory %llu contains " 1343 "garbage instead of ..\n", 1344 (unsigned long long) VTOI(vp)->i_number); 1345 return ENOTDIR; 1346 } 1347 *result = ufs_rw32(dirbuf.dotdot_ino, needswap); 1348 return 0; 1349} 1350 1351/* 1352 * Check if LOWER is a descendent of UPPER. If we find UPPER, return 1353 * nonzero in FOUND and return a reference to the immediate descendent 1354 * of UPPER in UPPERCHILD. If we don't find UPPER (that is, if we 1355 * reach the volume root and that isn't UPPER), return zero in FOUND 1356 * and null in UPPERCHILD. 1357 * 1358 * Neither UPPER nor LOWER should be locked. 1359 * 1360 * On error (such as a permissions error checking up the directory 1361 * tree) fail entirely. 1362 * 1363 * Note that UPPER and LOWER must be on the same volume, and because 1364 * we inspect only that volume NEEDSWAP can be constant. 1365 */ 1366int 1367ufs_parentcheck(struct vnode *upper, struct vnode *lower, kauth_cred_t cred, 1368 int *found_ret, struct vnode **upperchild_ret) 1369{ 1370 const int needswap = UFS_MPNEEDSWAP(VTOI(lower)->i_ump); 1371 ino_t upper_ino, found_ino; 1372 struct vnode *current, *next; 1373 int error; 1374 1375 if (upper == lower) { 1376 vref(upper); 1377 *found_ret = 1; 1378 *upperchild_ret = upper; 1379 return 0; 1380 } 1381 if (VTOI(lower)->i_number == ROOTINO) { 1382 *found_ret = 0; 1383 *upperchild_ret = NULL; 1384 return 0; 1385 } 1386 1387 upper_ino = VTOI(upper)->i_number; 1388 1389 current = lower; 1390 vref(current); 1391 vn_lock(current, LK_EXCLUSIVE | LK_RETRY); 1392 1393 for (;;) { 1394 error = ufs_readdotdot(current, needswap, cred, &found_ino); 1395 if (error) { 1396 vput(current); 1397 return error; 1398 } 1399 if (found_ino == upper_ino) { 1400 VOP_UNLOCK(current); 1401 *found_ret = 1; 1402 *upperchild_ret = current; 1403 return 0; 1404 } 1405 if (found_ino == ROOTINO) { 1406 vput(current); 1407 *found_ret = 0; 1408 *upperchild_ret = NULL; 1409 return 0; 1410 } 1411 VOP_UNLOCK(current); 1412 error = VFS_VGET(current->v_mount, found_ino, &next); 1413 if (error) { 1414 vrele(current); 1415 return error; 1416 } 1417 KASSERT(VOP_ISLOCKED(next)); 1418 if (next->v_type != VDIR) { 1419 printf("ufs_parentcheck: inode %llu reached via .. of " 1420 "inode %llu is not a directory\n", 1421 (unsigned long long)VTOI(next)->i_number, 1422 (unsigned long long)VTOI(current)->i_number); 1423 vput(next); 1424 vrele(current); 1425 return ENOTDIR; 1426 } 1427 vrele(current); 1428 current = next; 1429 } 1430 1431 return 0; 1432} 1433 1434#define UFS_DIRRABLKS 0 1435int ufs_dirrablks = UFS_DIRRABLKS; 1436 1437/* 1438 * ufs_blkatoff: Return buffer with the contents of block "offset" from 1439 * the beginning of directory "vp". If "res" is non-zero, fill it in with 1440 * a pointer to the remaining space in the directory. If the caller intends 1441 * to modify the buffer returned, "modify" must be true. 1442 */ 1443 1444int 1445ufs_blkatoff(struct vnode *vp, off_t offset, char **res, struct buf **bpp, 1446 bool modify) 1447{ 1448 struct inode *ip; 1449 struct buf *bp; 1450 daddr_t lbn; 1451 const int dirrablks = ufs_dirrablks; 1452 daddr_t *blks; 1453 int *blksizes; 1454 int run, error; 1455 struct mount *mp = vp->v_mount; 1456 const int bshift = mp->mnt_fs_bshift; 1457 const int bsize = 1 << bshift; 1458 off_t eof; 1459 1460 blks = kmem_alloc((1 + dirrablks) * sizeof(daddr_t), KM_SLEEP); 1461 blksizes = kmem_alloc((1 + dirrablks) * sizeof(int), KM_SLEEP); 1462 ip = VTOI(vp); 1463 KASSERT(vp->v_size == ip->i_size); 1464 GOP_SIZE(vp, vp->v_size, &eof, 0); 1465 lbn = offset >> bshift; 1466 1467 for (run = 0; run <= dirrablks;) { 1468 const off_t curoff = lbn << bshift; 1469 const int size = MIN(eof - curoff, bsize); 1470 1471 if (size == 0) { 1472 break; 1473 } 1474 KASSERT(curoff < eof); 1475 blks[run] = lbn; 1476 blksizes[run] = size; 1477 lbn++; 1478 run++; 1479 if (size != bsize) { 1480 break; 1481 } 1482 } 1483 KASSERT(run >= 1); 1484 error = breadn(vp, blks[0], blksizes[0], &blks[1], &blksizes[1], 1485 run - 1, NOCRED, (modify ? B_MODIFY : 0), &bp); 1486 if (error != 0) { 1487 brelse(bp, 0); 1488 *bpp = NULL; 1489 goto out; 1490 } 1491 if (res) { 1492 *res = (char *)bp->b_data + (offset & (bsize - 1)); 1493 } 1494 *bpp = bp; 1495 1496 out: 1497 kmem_free(blks, (1 + dirrablks) * sizeof(daddr_t)); 1498 kmem_free(blksizes, (1 + dirrablks) * sizeof(int)); 1499 return error; 1500} 1501