ufs_lookup.c revision 1.36
1/* $OpenBSD: ufs_lookup.c,v 1.36 2006/11/17 11:09:00 pedro Exp $ */ 2/* $NetBSD: ufs_lookup.c,v 1.7 1996/02/09 22:36:06 christos Exp $ */ 3 4/* 5 * Copyright (c) 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * (c) UNIX System Laboratories, Inc. 8 * All or some portions of this file are derived from material licensed 9 * to the University of California by American Telephone and Telegraph 10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 11 * the permission of UNIX System Laboratories, Inc. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)ufs_lookup.c 8.9 (Berkeley) 8/11/94 38 */ 39 40#include <sys/param.h> 41#include <sys/systm.h> 42#include <sys/kernel.h> 43#include <sys/namei.h> 44#include <sys/buf.h> 45#include <sys/file.h> 46#include <sys/stat.h> 47#include <sys/mount.h> 48#include <sys/vnode.h> 49 50#include <uvm/uvm_extern.h> 51 52#include <ufs/ufs/quota.h> 53#include <ufs/ufs/inode.h> 54#include <ufs/ufs/dir.h> 55#ifdef UFS_DIRHASH 56#include <ufs/ufs/dirhash.h> 57#endif 58#include <ufs/ufs/ufsmount.h> 59#include <ufs/ufs/ufs_extern.h> 60 61extern struct nchstats nchstats; 62 63#ifdef DIAGNOSTIC 64int dirchk = 1; 65#else 66int dirchk = 0; 67#endif 68 69#define FSFMT(vp) ((vp)->v_mount->mnt_maxsymlinklen <= 0) 70 71/* 72 * Convert a component of a pathname into a pointer to a locked inode. 73 * This is a very central and rather complicated routine. 74 * If the file system is not maintained in a strict tree hierarchy, 75 * this can result in a deadlock situation (see comments in code below). 76 * 77 * The cnp->cn_nameiop argument is LOOKUP, CREATE, RENAME, or DELETE depending 78 * on whether the name is to be looked up, created, renamed, or deleted. 79 * When CREATE, RENAME, or DELETE is specified, information usable in 80 * creating, renaming, or deleting a directory entry may be calculated. 81 * If flag has LOCKPARENT or'ed into it and the target of the pathname 82 * exists, lookup returns both the target and its parent directory locked. 83 * When creating or renaming and LOCKPARENT is specified, the target may 84 * not be ".". When deleting and LOCKPARENT is specified, the target may 85 * be "."., but the caller must check to ensure it does an vrele and vput 86 * instead of two vputs. 87 * 88 * Overall outline of ufs_lookup: 89 * 90 * check accessibility of directory 91 * look for name in cache, if found, then if at end of path 92 * and deleting or creating, drop it, else return name 93 * search for name in directory, to found or notfound 94 * notfound: 95 * if creating, return locked directory, leaving info on available slots 96 * else return error 97 * found: 98 * if at end of path and deleting, return information to allow delete 99 * if at end of path and rewriting (RENAME and LOCKPARENT), lock target 100 * inode and return info to allow rewrite 101 * if not at end, add name to cache; if at end and neither creating 102 * nor deleting, add name to cache 103 */ 104int 105ufs_lookup(void *v) 106{ 107 struct vop_lookup_args /* { 108 struct vnode *a_dvp; 109 struct vnode **a_vpp; 110 struct componentname *a_cnp; 111 } */ *ap = v; 112 struct vnode *vdp; /* vnode for directory being searched */ 113 struct inode *dp; /* inode for directory being searched */ 114 struct buf *bp; /* a buffer of directory entries */ 115 struct direct *ep; /* the current directory entry */ 116 int entryoffsetinblock; /* offset of ep in bp's buffer */ 117 enum {NONE, COMPACT, FOUND} slotstatus; 118 doff_t slotoffset; /* offset of area with free space */ 119 int slotsize; /* size of area at slotoffset */ 120 int slotfreespace; /* amount of space free in slot */ 121 int slotneeded; /* size of the entry we're seeking */ 122 int numdirpasses; /* strategy for directory search */ 123 doff_t endsearch; /* offset to end directory search */ 124 doff_t prevoff; /* prev entry dp->i_offset */ 125 struct vnode *pdp; /* saved dp during symlink work */ 126 struct vnode *tdp; /* returned by VFS_VGET */ 127 doff_t enduseful; /* pointer past last used dir slot */ 128 u_long bmask; /* block offset mask */ 129 int lockparent; /* 1 => lockparent flag is set */ 130 int wantparent; /* 1 => wantparent or lockparent flag */ 131 int namlen, error; 132 struct vnode **vpp = ap->a_vpp; 133 struct componentname *cnp = ap->a_cnp; 134 struct ucred *cred = cnp->cn_cred; 135 int flags; 136 int nameiop = cnp->cn_nameiop; 137 struct proc *p = cnp->cn_proc; 138 139 cnp->cn_flags &= ~PDIRUNLOCK; 140 flags = cnp->cn_flags; 141 142 bp = NULL; 143 slotoffset = -1; 144 *vpp = NULL; 145 vdp = ap->a_dvp; 146 dp = VTOI(vdp); 147 lockparent = flags & LOCKPARENT; 148 wantparent = flags & (LOCKPARENT|WANTPARENT); 149 150 /* 151 * Check accessiblity of directory. 152 */ 153 if ((DIP(dp, mode) & IFMT) != IFDIR) 154 return (ENOTDIR); 155 if ((error = VOP_ACCESS(vdp, VEXEC, cred, cnp->cn_proc)) != 0) 156 return (error); 157 158 if ((flags & ISLASTCN) && (vdp->v_mount->mnt_flag & MNT_RDONLY) && 159 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) 160 return (EROFS); 161 162 /* 163 * We now have a segment name to search for, and a directory to search. 164 * 165 * Before tediously performing a linear scan of the directory, 166 * check the name cache to see if the directory/name pair 167 * we are looking for is known already. 168 */ 169 if ((error = cache_lookup(vdp, vpp, cnp)) >= 0) 170 return (error); 171 172 /* 173 * Suppress search for slots unless creating 174 * file and at end of pathname, in which case 175 * we watch for a place to put the new file in 176 * case it doesn't already exist. 177 */ 178 slotstatus = FOUND; 179 slotfreespace = slotsize = slotneeded = 0; 180 if ((nameiop == CREATE || nameiop == RENAME) && 181 (flags & ISLASTCN)) { 182 slotstatus = NONE; 183 slotneeded = (sizeof(struct direct) - MAXNAMLEN + 184 cnp->cn_namelen + 3) &~ 3; 185 } 186 187 /* 188 * If there is cached information on a previous search of 189 * this directory, pick up where we last left off. 190 * We cache only lookups as these are the most common 191 * and have the greatest payoff. Caching CREATE has little 192 * benefit as it usually must search the entire directory 193 * to determine that the entry does not exist. Caching the 194 * location of the last DELETE or RENAME has not reduced 195 * profiling time and hence has been removed in the interest 196 * of simplicity. 197 */ 198 bmask = VFSTOUFS(vdp->v_mount)->um_mountp->mnt_stat.f_iosize - 1; 199 200#ifdef UFS_DIRHASH 201 /* 202 * Use dirhash for fast operations on large directories. The logic 203 * to determine whether to hash the directory is contained within 204 * ufsdirhash_build(); a zero return means that it decided to hash 205 * this directory and it successfully built up the hash table. 206 */ 207 if (ufsdirhash_build(dp) == 0) { 208 /* Look for a free slot if needed. */ 209 enduseful = DIP(dp, size); 210 if (slotstatus != FOUND) { 211 slotoffset = ufsdirhash_findfree(dp, slotneeded, 212 &slotsize); 213 if (slotoffset >= 0) { 214 slotstatus = COMPACT; 215 enduseful = ufsdirhash_enduseful(dp); 216 if (enduseful < 0) 217 enduseful = DIP(dp, size); 218 } 219 } 220 /* Look up the component. */ 221 numdirpasses = 1; 222 entryoffsetinblock = 0; /* silence compiler warning */ 223 switch (ufsdirhash_lookup(dp, cnp->cn_nameptr, cnp->cn_namelen, 224 &dp->i_offset, &bp, nameiop == DELETE ? &prevoff : NULL)) { 225 case 0: 226 ep = (struct direct *)((char *)bp->b_data + 227 (dp->i_offset & bmask)); 228 goto foundentry; 229 case ENOENT: 230#define roundup2(x, y) (((x)+((y)-1))&(~((y)-1))) /* if y is powers of two */ 231 dp->i_offset = roundup2(DIP(dp, size), DIRBLKSIZ); 232 goto notfound; 233 default: 234 /* Something failed; just do a linear search. */ 235 break; 236 } 237 } 238#endif /* UFS_DIRHASH */ 239 240 if (nameiop != LOOKUP || dp->i_diroff == 0 || 241 dp->i_diroff >= DIP(dp, size)) { 242 entryoffsetinblock = 0; 243 dp->i_offset = 0; 244 numdirpasses = 1; 245 } else { 246 dp->i_offset = dp->i_diroff; 247 if ((entryoffsetinblock = dp->i_offset & bmask) && 248 (error = UFS_BUFATOFF(dp, (off_t)dp->i_offset, NULL, &bp))) 249 return (error); 250 numdirpasses = 2; 251 nchstats.ncs_2passes++; 252 } 253 prevoff = dp->i_offset; 254 endsearch = roundup(DIP(dp, size), DIRBLKSIZ); 255 enduseful = 0; 256 257searchloop: 258 while (dp->i_offset < endsearch) { 259 /* 260 * If necessary, get the next directory block. 261 */ 262 if ((dp->i_offset & bmask) == 0) { 263 if (bp != NULL) 264 brelse(bp); 265 error = UFS_BUFATOFF(dp, (off_t)dp->i_offset, NULL, 266 &bp); 267 if (error) 268 return (error); 269 entryoffsetinblock = 0; 270 } 271 /* 272 * If still looking for a slot, and at a DIRBLKSIZE 273 * boundary, have to start looking for free space again. 274 */ 275 if (slotstatus == NONE && 276 (entryoffsetinblock & (DIRBLKSIZ - 1)) == 0) { 277 slotoffset = -1; 278 slotfreespace = 0; 279 } 280 /* 281 * Get pointer to next entry. 282 * Full validation checks are slow, so we only check 283 * enough to insure forward progress through the 284 * directory. Complete checks can be run by patching 285 * "dirchk" to be true. 286 */ 287 ep = (struct direct *)((char *)bp->b_data + entryoffsetinblock); 288 if (ep->d_reclen == 0 || 289 (dirchk && ufs_dirbadentry(vdp, ep, entryoffsetinblock))) { 290 int i; 291 292 ufs_dirbad(dp, dp->i_offset, "mangled entry"); 293 i = DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)); 294 dp->i_offset += i; 295 entryoffsetinblock += i; 296 continue; 297 } 298 299 /* 300 * If an appropriate sized slot has not yet been found, 301 * check to see if one is available. Also accumulate space 302 * in the current block so that we can determine if 303 * compaction is viable. 304 */ 305 if (slotstatus != FOUND) { 306 int size = ep->d_reclen; 307 308 if (ep->d_ino != 0) 309 size -= DIRSIZ(FSFMT(vdp), ep); 310 if (size > 0) { 311 if (size >= slotneeded) { 312 slotstatus = FOUND; 313 slotoffset = dp->i_offset; 314 slotsize = ep->d_reclen; 315 } else if (slotstatus == NONE) { 316 slotfreespace += size; 317 if (slotoffset == -1) 318 slotoffset = dp->i_offset; 319 if (slotfreespace >= slotneeded) { 320 slotstatus = COMPACT; 321 slotsize = dp->i_offset + 322 ep->d_reclen - slotoffset; 323 } 324 } 325 } 326 } 327 328 /* 329 * Check for a name match. 330 */ 331 if (ep->d_ino) { 332# if (BYTE_ORDER == LITTLE_ENDIAN) 333 if (vdp->v_mount->mnt_maxsymlinklen > 0) 334 namlen = ep->d_namlen; 335 else 336 namlen = ep->d_type; 337# else 338 namlen = ep->d_namlen; 339# endif 340 if (namlen == cnp->cn_namelen && 341 !bcmp(cnp->cn_nameptr, ep->d_name, 342 (unsigned)namlen)) { 343#ifdef UFS_DIRHASH 344foundentry: 345#endif 346 /* 347 * Save directory entry's inode number and 348 * reclen in ndp->ni_ufs area, and release 349 * directory buffer. 350 */ 351 dp->i_ino = ep->d_ino; 352 dp->i_reclen = ep->d_reclen; 353 goto found; 354 } 355 } 356 prevoff = dp->i_offset; 357 dp->i_offset += ep->d_reclen; 358 entryoffsetinblock += ep->d_reclen; 359 if (ep->d_ino) 360 enduseful = dp->i_offset; 361 } 362#ifdef UFS_DIRHASH 363notfound: 364#endif 365 /* 366 * If we started in the middle of the directory and failed 367 * to find our target, we must check the beginning as well. 368 */ 369 if (numdirpasses == 2) { 370 numdirpasses--; 371 dp->i_offset = 0; 372 endsearch = dp->i_diroff; 373 goto searchloop; 374 } 375 if (bp != NULL) 376 brelse(bp); 377 /* 378 * If creating, and at end of pathname and current 379 * directory has not been removed, then can consider 380 * allowing file to be created. 381 */ 382 if ((nameiop == CREATE || nameiop == RENAME) && 383 (flags & ISLASTCN) && dp->i_effnlink != 0) { 384 /* 385 * Access for write is interpreted as allowing 386 * creation of files in the directory. 387 */ 388 error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_proc); 389 if (error) 390 return (error); 391 /* 392 * Return an indication of where the new directory 393 * entry should be put. If we didn't find a slot, 394 * then set dp->i_count to 0 indicating 395 * that the new slot belongs at the end of the 396 * directory. If we found a slot, then the new entry 397 * can be put in the range from dp->i_offset to 398 * dp->i_offset + dp->i_count. 399 */ 400 if (slotstatus == NONE) { 401 dp->i_offset = roundup(DIP(dp, size), DIRBLKSIZ); 402 dp->i_count = 0; 403 enduseful = dp->i_offset; 404 } else if (nameiop == DELETE) { 405 dp->i_offset = slotoffset; 406 if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0) 407 dp->i_count = 0; 408 else 409 dp->i_count = dp->i_offset - prevoff; 410 } else { 411 dp->i_offset = slotoffset; 412 dp->i_count = slotsize; 413 if (enduseful < slotoffset + slotsize) 414 enduseful = slotoffset + slotsize; 415 } 416 dp->i_endoff = roundup(enduseful, DIRBLKSIZ); 417 /* 418 * We return with the directory locked, so that 419 * the parameters we set up above will still be 420 * valid if we actually decide to do a direnter(). 421 * We return ni_vp == NULL to indicate that the entry 422 * does not currently exist; we leave a pointer to 423 * the (locked) directory inode in ndp->ni_dvp. 424 * The pathname buffer is saved so that the name 425 * can be obtained later. 426 * 427 * NB - if the directory is unlocked, then this 428 * information cannot be used. 429 */ 430 cnp->cn_flags |= SAVENAME; 431 if (!lockparent) { 432 VOP_UNLOCK(vdp, 0, p); 433 cnp->cn_flags |= PDIRUNLOCK; 434 } 435 return (EJUSTRETURN); 436 } 437 /* 438 * Insert name into cache (as non-existent) if appropriate. 439 */ 440 if ((cnp->cn_flags & MAKEENTRY) && nameiop != CREATE) 441 cache_enter(vdp, *vpp, cnp); 442 return (ENOENT); 443 444found: 445 if (numdirpasses == 2) 446 nchstats.ncs_pass2++; 447 /* 448 * Check that directory length properly reflects presence 449 * of this entry. 450 */ 451 if (dp->i_offset + DIRSIZ(FSFMT(vdp), ep) > DIP(dp, size)) { 452 ufs_dirbad(dp, dp->i_offset, "i_ffs_size too small"); 453 DIP_ASSIGN(dp, size, dp->i_offset + DIRSIZ(FSFMT(vdp), ep)); 454 dp->i_flag |= IN_CHANGE | IN_UPDATE; 455 } 456 brelse(bp); 457 458 /* 459 * Found component in pathname. 460 * If the final component of path name, save information 461 * in the cache as to where the entry was found. 462 */ 463 if ((flags & ISLASTCN) && nameiop == LOOKUP) 464 dp->i_diroff = dp->i_offset &~ (DIRBLKSIZ - 1); 465 466 /* 467 * If deleting, and at end of pathname, return 468 * parameters which can be used to remove file. 469 * If the wantparent flag isn't set, we return only 470 * the directory (in ndp->ni_dvp), otherwise we go 471 * on and lock the inode, being careful with ".". 472 */ 473 if (nameiop == DELETE && (flags & ISLASTCN)) { 474 /* 475 * Write access to directory required to delete files. 476 */ 477 error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_proc); 478 if (error) 479 return (error); 480 /* 481 * Return pointer to current entry in dp->i_offset, 482 * and distance past previous entry (if there 483 * is a previous entry in this block) in dp->i_count. 484 * Save directory inode pointer in ndp->ni_dvp for dirremove(). 485 */ 486 if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0) 487 dp->i_count = 0; 488 else 489 dp->i_count = dp->i_offset - prevoff; 490 if (dp->i_number == dp->i_ino) { 491 VREF(vdp); 492 *vpp = vdp; 493 return (0); 494 } 495 error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp); 496 if (error) 497 return (error); 498 /* 499 * If directory is "sticky", then user must own 500 * the directory, or the file in it, else she 501 * may not delete it (unless she's root). This 502 * implements append-only directories. 503 */ 504 if ((DIP(dp, mode) & ISVTX) && 505 cred->cr_uid != 0 && 506 cred->cr_uid != DIP(dp, uid) && 507 DIP(VTOI(tdp), uid) != cred->cr_uid) { 508 vput(tdp); 509 return (EPERM); 510 } 511 *vpp = tdp; 512 if (!lockparent) { 513 VOP_UNLOCK(vdp, 0, p); 514 cnp->cn_flags |= PDIRUNLOCK; 515 } 516 return (0); 517 } 518 519 /* 520 * If rewriting (RENAME), return the inode and the 521 * information required to rewrite the present directory 522 * Must get inode of directory entry to verify it's a 523 * regular file, or empty directory. 524 */ 525 if (nameiop == RENAME && wantparent && 526 (flags & ISLASTCN)) { 527 error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_proc); 528 if (error) 529 return (error); 530 /* 531 * Careful about locking second inode. 532 * This can only occur if the target is ".". 533 */ 534 if (dp->i_number == dp->i_ino) 535 return (EISDIR); 536 error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp); 537 if (error) 538 return (error); 539 *vpp = tdp; 540 cnp->cn_flags |= SAVENAME; 541 if (!lockparent) { 542 VOP_UNLOCK(vdp, 0, p); 543 cnp->cn_flags |= PDIRUNLOCK; 544 } 545 return (0); 546 } 547 548 /* 549 * Step through the translation in the name. We do not `vput' the 550 * directory because we may need it again if a symbolic link 551 * is relative to the current directory. Instead we save it 552 * unlocked as "pdp". We must get the target inode before unlocking 553 * the directory to insure that the inode will not be removed 554 * before we get it. We prevent deadlock by always fetching 555 * inodes from the root, moving down the directory tree. Thus 556 * when following backward pointers ".." we must unlock the 557 * parent directory before getting the requested directory. 558 * There is a potential race condition here if both the current 559 * and parent directories are removed before the VFS_VGET for the 560 * inode associated with ".." returns. We hope that this occurs 561 * infrequently since we cannot avoid this race condition without 562 * implementing a sophisticated deadlock detection algorithm. 563 * Note also that this simple deadlock detection scheme will not 564 * work if the file system has any hard links other than ".." 565 * that point backwards in the directory structure. 566 */ 567 pdp = vdp; 568 if (flags & ISDOTDOT) { 569 VOP_UNLOCK(pdp, 0, p); /* race to get the inode */ 570 cnp->cn_flags |= PDIRUNLOCK; 571 error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp); 572 if (error) { 573 if (vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY, p) == 0) 574 cnp->cn_flags &= ~PDIRUNLOCK; 575 return (error); 576 } 577 if (lockparent && (flags & ISLASTCN)) { 578 if ((error = vn_lock(pdp, LK_EXCLUSIVE, p))) { 579 vput(tdp); 580 return (error); 581 } 582 cnp->cn_flags &= ~PDIRUNLOCK; 583 } 584 *vpp = tdp; 585 } else if (dp->i_number == dp->i_ino) { 586 VREF(vdp); /* we want ourself, ie "." */ 587 *vpp = vdp; 588 } else { 589 error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp); 590 if (error) 591 return (error); 592 if (!lockparent || !(flags & ISLASTCN)) { 593 VOP_UNLOCK(pdp, 0, p); 594 cnp->cn_flags |= PDIRUNLOCK; 595 } 596 *vpp = tdp; 597 } 598 599 /* 600 * Insert name into cache if appropriate. 601 */ 602 if (cnp->cn_flags & MAKEENTRY) 603 cache_enter(vdp, *vpp, cnp); 604 return (0); 605} 606 607void 608ufs_dirbad(struct inode *ip, doff_t offset, char *how) 609{ 610 struct mount *mp; 611 612 mp = ITOV(ip)->v_mount; 613 (void)printf("%s: bad dir ino %d at offset %d: %s\n", 614 mp->mnt_stat.f_mntonname, ip->i_number, offset, how); 615 if ((mp->mnt_stat.f_flags & MNT_RDONLY) == 0) 616 panic("bad dir"); 617} 618 619/* 620 * Do consistency checking on a directory entry: 621 * record length must be multiple of 4 622 * entry must fit in rest of its DIRBLKSIZ block 623 * record must be large enough to contain entry 624 * name is not longer than MAXNAMLEN 625 * name must be as long as advertised, and null terminated 626 */ 627int 628ufs_dirbadentry(struct vnode *dp, struct direct *ep, int entryoffsetinblock) 629{ 630 int i; 631 int namlen; 632 633# if (BYTE_ORDER == LITTLE_ENDIAN) 634 if (dp->v_mount->mnt_maxsymlinklen > 0) 635 namlen = ep->d_namlen; 636 else 637 namlen = ep->d_type; 638# else 639 namlen = ep->d_namlen; 640# endif 641 if ((ep->d_reclen & 0x3) != 0 || 642 ep->d_reclen > DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)) || 643 ep->d_reclen < DIRSIZ(FSFMT(dp), ep) || namlen > MAXNAMLEN) { 644 /*return (1); */ 645 printf("First bad\n"); 646 goto bad; 647 } 648 if (ep->d_ino == 0) 649 return (0); 650 for (i = 0; i < namlen; i++) 651 if (ep->d_name[i] == '\0') { 652 /*return (1); */ 653 printf("Second bad\n"); 654 goto bad; 655 } 656 if (ep->d_name[i]) 657 goto bad; 658 return (0); 659bad: 660 return (1); 661} 662 663/* 664 * Construct a new directory entry after a call to namei, using the 665 * parameters that it left in the componentname argument cnp. The 666 * argument ip is the inode to which the new directory entry will refer. 667 */ 668void 669ufs_makedirentry(struct inode *ip, struct componentname *cnp, 670 struct direct *newdirp) 671{ 672#ifdef DIAGNOSTIC 673 if ((cnp->cn_flags & SAVENAME) == 0) 674 panic("ufs_makedirentry: missing name"); 675#endif 676 newdirp->d_ino = ip->i_number; 677 newdirp->d_namlen = cnp->cn_namelen; 678 bcopy(cnp->cn_nameptr, newdirp->d_name, (unsigned)cnp->cn_namelen + 1); 679 if (ITOV(ip)->v_mount->mnt_maxsymlinklen > 0) 680 newdirp->d_type = IFTODT(DIP(ip, mode)); 681 else { 682 newdirp->d_type = 0; 683# if (BYTE_ORDER == LITTLE_ENDIAN) 684 { u_char tmp = newdirp->d_namlen; 685 newdirp->d_namlen = newdirp->d_type; 686 newdirp->d_type = tmp; } 687# endif 688 } 689} 690 691/* 692 * Write a directory entry after a call to namei, using the parameters 693 * that it left in nameidata. The argument dirp is the new directory 694 * entry contents. Dvp is a pointer to the directory to be written, 695 * which was left locked by namei. Remaining parameters (dp->i_offset, 696 * dp->i_count) indicate how the space for the new entry is to be obtained. 697 * Non-null bp indicates that a directory is being created (for the 698 * soft dependency code). 699 */ 700int 701ufs_direnter(struct vnode *dvp, struct vnode *tvp, struct direct *dirp, 702 struct componentname *cnp, struct buf *newdirbp) 703{ 704 struct ucred *cr; 705 struct proc *p; 706 int newentrysize; 707 struct inode *dp; 708 struct buf *bp; 709 u_int dsize; 710 struct direct *ep, *nep; 711 int error, ret, blkoff, loc, spacefree, flags; 712 char *dirbuf; 713 714 error = 0; 715 cr = cnp->cn_cred; 716 p = cnp->cn_proc; 717 dp = VTOI(dvp); 718 newentrysize = DIRSIZ(FSFMT(dvp), dirp); 719 720 if (dp->i_count == 0) { 721 /* 722 * If dp->i_count is 0, then namei could find no 723 * space in the directory. Here, dp->i_offset will 724 * be on a directory block boundary and we will write the 725 * new entry into a fresh block. 726 */ 727 if (dp->i_offset & (DIRBLKSIZ - 1)) 728 panic("ufs_direnter: newblk"); 729 flags = B_CLRBUF; 730 if (!DOINGSOFTDEP(dvp)) 731 flags |= B_SYNC; 732 if ((error = UFS_BUF_ALLOC(dp, (off_t)dp->i_offset, DIRBLKSIZ, 733 cr, flags, &bp)) != 0) { 734 if (DOINGSOFTDEP(dvp) && newdirbp != NULL) 735 bdwrite(newdirbp); 736 return (error); 737 } 738 DIP_ASSIGN(dp, size, dp->i_offset + DIRBLKSIZ); 739 dp->i_flag |= IN_CHANGE | IN_UPDATE; 740 uvm_vnp_setsize(dvp, DIP(dp, size)); 741 dirp->d_reclen = DIRBLKSIZ; 742 blkoff = dp->i_offset & 743 (VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_iosize - 1); 744 bcopy((caddr_t)dirp, (caddr_t)bp->b_data + blkoff,newentrysize); 745 746#ifdef UFS_DIRHASH 747 if (dp->i_dirhash != NULL) { 748 ufsdirhash_newblk(dp, dp->i_offset); 749 ufsdirhash_add(dp, dirp, dp->i_offset); 750 ufsdirhash_checkblock(dp, (char *)bp->b_data + blkoff, 751 dp->i_offset); 752 } 753#endif 754 755 if (DOINGSOFTDEP(dvp)) { 756 /* 757 * Ensure that the entire newly allocated block is a 758 * valid directory so that future growth within the 759 * block does not have to ensure that the block is 760 * written before the inode. 761 */ 762 blkoff += DIRBLKSIZ; 763 while (blkoff < bp->b_bcount) { 764 ((struct direct *) 765 (bp->b_data + blkoff))->d_reclen = DIRBLKSIZ; 766 blkoff += DIRBLKSIZ; 767 } 768 if (softdep_setup_directory_add(bp, dp, dp->i_offset, 769 dirp->d_ino, newdirbp, 1) == 0) { 770 bdwrite(bp); 771 return (UFS_UPDATE(dp, 0)); 772 } 773 /* We have just allocated a directory block in an 774 * indirect block. Rather than tracking when it gets 775 * claimed by the inode, we simply do a VOP_FSYNC 776 * now to ensure that it is there (in case the user 777 * does a future fsync). Note that we have to unlock 778 * the inode for the entry that we just entered, as 779 * the VOP_FSYNC may need to lock other inodes which 780 * can lead to deadlock if we also hold a lock on 781 * the newly entered node. 782 */ 783 if ((error = VOP_BWRITE(bp))) 784 return (error); 785 if (tvp != NULL) 786 VOP_UNLOCK(tvp, 0, p); 787 error = VOP_FSYNC(dvp, p->p_ucred, MNT_WAIT, p); 788 if (tvp != NULL) 789 vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, p); 790 return (error); 791 } 792 error = VOP_BWRITE(bp); 793 ret = UFS_UPDATE(dp, !DOINGSOFTDEP(dvp)); 794 if (error == 0) 795 return (ret); 796 return (error); 797 } 798 799 /* 800 * If dp->i_count is non-zero, then namei found space for the new 801 * entry in the range dp->i_offset to dp->i_offset + dp->i_count 802 * in the directory. To use this space, we may have to compact 803 * the entries located there, by copying them together towards the 804 * beginning of the block, leaving the free space in one usable 805 * chunk at the end. 806 */ 807 808 /* 809 * Increase size of directory if entry eats into new space. 810 * This should never push the size past a new multiple of 811 * DIRBLKSIZE. 812 * 813 * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN. 814 */ 815 if (dp->i_offset + dp->i_count > DIP(dp, size)) 816 DIP_ASSIGN(dp, size, dp->i_offset + dp->i_count); 817 /* 818 * Get the block containing the space for the new directory entry. 819 */ 820 if ((error = UFS_BUFATOFF(dp, (off_t)dp->i_offset, &dirbuf, &bp)) 821 != 0) { 822 if (DOINGSOFTDEP(dvp) && newdirbp != NULL) 823 bdwrite(newdirbp); 824 return (error); 825 } 826 /* 827 * Find space for the new entry. In the simple case, the entry at 828 * offset base will have the space. If it does not, then namei 829 * arranged that compacting the region dp->i_offset to 830 * dp->i_offset + dp->i_count would yield the space. 831 */ 832 ep = (struct direct *)dirbuf; 833 dsize = ep->d_ino ? DIRSIZ(FSFMT(dvp), ep) : 0; 834 spacefree = ep->d_reclen - dsize; 835 for (loc = ep->d_reclen; loc < dp->i_count; ) { 836 nep = (struct direct *)(dirbuf + loc); 837 838 /* Trim the existing slot (NB: dsize may be zero). */ 839 ep->d_reclen = dsize; 840 ep = (struct direct *)((char *)ep + dsize); 841 842 /* Read nep->d_reclen now as the bcopy() may clobber it. */ 843 loc += nep->d_reclen; 844 if (nep->d_ino == 0) { 845 /* 846 * A mid-block unused entry. Such entries are 847 * never created by the kernel, but fsck_ffs 848 * can create them (and it doesn't fix them). 849 * 850 * Add up the free space, and initialise the 851 * relocated entry since we don't bcopy it. 852 */ 853 spacefree += nep->d_reclen; 854 ep->d_ino = 0; 855 dsize = 0; 856 continue; 857 } 858 dsize = DIRSIZ(FSFMT(dvp), nep); 859 spacefree += nep->d_reclen - dsize; 860#ifdef UFS_DIRHASH 861 if (dp->i_dirhash != NULL) 862 ufsdirhash_move(dp, nep, 863 dp->i_offset + ((char *)nep - dirbuf), 864 dp->i_offset + ((char *)ep - dirbuf)); 865#endif 866 if (DOINGSOFTDEP(dvp)) 867 softdep_change_directoryentry_offset(dp, dirbuf, 868 (caddr_t)nep, (caddr_t)ep, dsize); 869 else 870 bcopy((caddr_t)nep, (caddr_t)ep, dsize); 871 } 872 /* 873 * Here, `ep' points to a directory entry containing `dsize' in-use 874 * bytes followed by `spacefree' unused bytes. If ep->d_ino == 0, 875 * then the entry is completely unused (dsize == 0). The value 876 * of ep->d_reclen is always indeterminate. 877 * 878 * Update the pointer fields in the previous entry (if any), 879 * copy in the new entry, and write out the block. 880 */ 881 if (ep->d_ino == 0) { 882 if (spacefree + dsize < newentrysize) 883 panic("ufs_direnter: compact1"); 884 dirp->d_reclen = spacefree + dsize; 885 } else { 886 if (spacefree < newentrysize) 887 panic("ufs_direnter: compact2"); 888 dirp->d_reclen = spacefree; 889 ep->d_reclen = dsize; 890 ep = (struct direct *)((char *)ep + dsize); 891 } 892 893#ifdef UFS_DIRHASH 894 if (dp->i_dirhash != NULL && (ep->d_ino == 0 || 895 dirp->d_reclen == spacefree)) 896 ufsdirhash_add(dp, dirp, dp->i_offset + ((char *)ep - dirbuf)); 897#endif 898 bcopy((caddr_t)dirp, (caddr_t)ep, (u_int)newentrysize); 899#ifdef UFS_DIRHASH 900 if (dp->i_dirhash != NULL) 901 ufsdirhash_checkblock(dp, dirbuf - 902 (dp->i_offset & (DIRBLKSIZ - 1)), 903 dp->i_offset & ~(DIRBLKSIZ - 1)); 904#endif 905 906 if (DOINGSOFTDEP(dvp)) { 907 (void)softdep_setup_directory_add(bp, dp, 908 dp->i_offset + (caddr_t)ep - dirbuf, 909 dirp->d_ino, newdirbp, 0); 910 bdwrite(bp); 911 } else { 912 error = VOP_BWRITE(bp); 913 } 914 dp->i_flag |= IN_CHANGE | IN_UPDATE; 915 916 /* 917 * If all went well, and the directory can be shortened, proceed 918 * with the truncation. Note that we have to unlock the inode for 919 * the entry that we just entered, as the truncation may need to 920 * lock other inodes which can lead to deadlock if we also hold a 921 * lock on the newly entered node. 922 */ 923 924 if (error == 0 && dp->i_endoff && dp->i_endoff < DIP(dp, size)) { 925 if (tvp != NULL) 926 VOP_UNLOCK(tvp, 0, p); 927#ifdef UFS_DIRHASH 928 if (dp->i_dirhash != NULL) 929 ufsdirhash_dirtrunc(dp, dp->i_endoff); 930#endif 931 932 933 error = UFS_TRUNCATE(dp, (off_t)dp->i_endoff, IO_SYNC, cr); 934 935 if (tvp != NULL) 936 vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, p); 937 } 938 return (error); 939} 940 941/* 942 * Remove a directory entry after a call to namei, using 943 * the parameters which it left in nameidata. The entry 944 * dp->i_offset contains the offset into the directory of the 945 * entry to be eliminated. The dp->i_count field contains the 946 * size of the previous record in the directory. If this 947 * is 0, the first entry is being deleted, so we need only 948 * zero the inode number to mark the entry as free. If the 949 * entry is not the first in the directory, we must reclaim 950 * the space of the now empty record by adding the record size 951 * to the size of the previous entry. 952 */ 953int 954ufs_dirremove(struct vnode *dvp, struct inode *ip, int flags, int isrmdir) 955{ 956 struct inode *dp; 957 struct direct *ep; 958 struct buf *bp; 959 int error; 960 961 dp = VTOI(dvp); 962 963 if ((error = UFS_BUFATOFF(dp, 964 (off_t)(dp->i_offset - dp->i_count), (char **)&ep, &bp)) != 0) 965 return (error); 966#ifdef UFS_DIRHASH 967 /* 968 * Remove the dirhash entry. This is complicated by the fact 969 * that `ep' is the previous entry when dp->i_count != 0. 970 */ 971 if (dp->i_dirhash != NULL) 972 ufsdirhash_remove(dp, (dp->i_count == 0) ? ep : 973 (struct direct *)((char *)ep + ep->d_reclen), dp->i_offset); 974#endif 975 976 if (dp->i_count == 0) { 977 /* 978 * First entry in block: set d_ino to zero. 979 */ 980 ep->d_ino = 0; 981 } else { 982 /* 983 * Collapse new free space into previous entry. 984 */ 985 ep->d_reclen += dp->i_reclen; 986 } 987#ifdef UFS_DIRHASH 988 if (dp->i_dirhash != NULL) 989 ufsdirhash_checkblock(dp, (char *)ep - 990 ((dp->i_offset - dp->i_count) & (DIRBLKSIZ - 1)), 991 dp->i_offset & ~(DIRBLKSIZ - 1)); 992#endif 993 if (DOINGSOFTDEP(dvp)) { 994 if (ip) { 995 ip->i_effnlink--; 996 softdep_change_linkcnt(ip, 0); 997 softdep_setup_remove(bp, dp, ip, isrmdir); 998 } 999 if (softdep_slowdown(dvp)) { 1000 error = bwrite(bp); 1001 } else { 1002 bdwrite(bp); 1003 error = 0; 1004 } 1005 } else { 1006 if (ip) { 1007 ip->i_effnlink--; 1008 DIP_ADD(ip, nlink, -1); 1009 ip->i_flag |= IN_CHANGE; 1010 } 1011 if (DOINGASYNC(dvp) && dp->i_count != 0) { 1012 bdwrite(bp); 1013 error = 0; 1014 } else 1015 error = bwrite(bp); 1016 } 1017 dp->i_flag |= IN_CHANGE | IN_UPDATE; 1018 return (error); 1019} 1020 1021/* 1022 * Rewrite an existing directory entry to point at the inode 1023 * supplied. The parameters describing the directory entry are 1024 * set up by a call to namei. 1025 */ 1026int 1027ufs_dirrewrite(struct inode *dp, struct inode *oip, ino_t newinum, int newtype, 1028 int isrmdir) 1029{ 1030 struct buf *bp; 1031 struct direct *ep; 1032 struct vnode *vdp = ITOV(dp); 1033 int error; 1034 1035 error = UFS_BUFATOFF(dp, (off_t)dp->i_offset, (char **)&ep, &bp); 1036 if (error) 1037 return (error); 1038 ep->d_ino = newinum; 1039 if (vdp->v_mount->mnt_maxsymlinklen > 0) 1040 ep->d_type = newtype; 1041 oip->i_effnlink--; 1042 if (DOINGSOFTDEP(vdp)) { 1043 softdep_change_linkcnt(oip, 0); 1044 softdep_setup_directory_change(bp, dp, oip, newinum, isrmdir); 1045 bdwrite(bp); 1046 } else { 1047 DIP_ADD(oip, nlink, -1); 1048 oip->i_flag |= IN_CHANGE; 1049 if (DOINGASYNC(vdp)) { 1050 bdwrite(bp); 1051 error = 0; 1052 } else { 1053 error = VOP_BWRITE(bp); 1054 } 1055 } 1056 dp->i_flag |= IN_CHANGE | IN_UPDATE; 1057 return (error); 1058} 1059 1060/* 1061 * Check if a directory is empty or not. 1062 * Inode supplied must be locked. 1063 * 1064 * Using a struct dirtemplate here is not precisely 1065 * what we want, but better than using a struct direct. 1066 * 1067 * NB: does not handle corrupted directories. 1068 */ 1069int 1070ufs_dirempty(struct inode *ip, ino_t parentino, struct ucred *cred) 1071{ 1072 off_t off, m; 1073 struct dirtemplate dbuf; 1074 struct direct *dp = (struct direct *)&dbuf; 1075 int error, namlen; 1076 size_t count; 1077#define MINDIRSIZ (sizeof (struct dirtemplate) / 2) 1078 1079 m = DIP(ip, size); 1080 for (off = 0; off < m; off += dp->d_reclen) { 1081 error = vn_rdwr(UIO_READ, ITOV(ip), (caddr_t)dp, MINDIRSIZ, off, 1082 UIO_SYSSPACE, IO_NODELOCKED, cred, &count, (struct proc *)0); 1083 /* 1084 * Since we read MINDIRSIZ, residual must 1085 * be 0 unless we're at end of file. 1086 */ 1087 if (error || count != 0) 1088 return (0); 1089 /* avoid infinite loops */ 1090 if (dp->d_reclen == 0) 1091 return (0); 1092 /* skip empty entries */ 1093 if (dp->d_ino == 0) 1094 continue; 1095 /* accept only "." and ".." */ 1096# if (BYTE_ORDER == LITTLE_ENDIAN) 1097 if (ITOV(ip)->v_mount->mnt_maxsymlinklen > 0) 1098 namlen = dp->d_namlen; 1099 else 1100 namlen = dp->d_type; 1101# else 1102 namlen = dp->d_namlen; 1103# endif 1104 if (namlen > 2) 1105 return (0); 1106 if (dp->d_name[0] != '.') 1107 return (0); 1108 /* 1109 * At this point namlen must be 1 or 2. 1110 * 1 implies ".", 2 implies ".." if second 1111 * char is also "." 1112 */ 1113 if (namlen == 1 && dp->d_ino == ip->i_number) 1114 continue; 1115 if (dp->d_name[1] == '.' && dp->d_ino == parentino) 1116 continue; 1117 return (0); 1118 } 1119 return (1); 1120} 1121 1122/* 1123 * Check if source directory is in the path of the target directory. 1124 * Target is supplied locked, source is unlocked. 1125 * The target is always vput before returning. 1126 */ 1127int 1128ufs_checkpath(struct inode *source, struct inode *target, struct ucred *cred) 1129{ 1130 struct vnode *vp; 1131 int error, rootino, namlen; 1132 struct dirtemplate dirbuf; 1133 1134 vp = ITOV(target); 1135 if (target->i_number == source->i_number) { 1136 error = EEXIST; 1137 goto out; 1138 } 1139 rootino = ROOTINO; 1140 error = 0; 1141 if (target->i_number == rootino) 1142 goto out; 1143 1144 for (;;) { 1145 if (vp->v_type != VDIR) { 1146 error = ENOTDIR; 1147 break; 1148 } 1149 error = vn_rdwr(UIO_READ, vp, (caddr_t)&dirbuf, 1150 sizeof (struct dirtemplate), (off_t)0, UIO_SYSSPACE, 1151 IO_NODELOCKED, cred, NULL, (struct proc *)0); 1152 if (error != 0) 1153 break; 1154# if (BYTE_ORDER == LITTLE_ENDIAN) 1155 if (vp->v_mount->mnt_maxsymlinklen > 0) 1156 namlen = dirbuf.dotdot_namlen; 1157 else 1158 namlen = dirbuf.dotdot_type; 1159# else 1160 namlen = dirbuf.dotdot_namlen; 1161# endif 1162 if (namlen != 2 || 1163 dirbuf.dotdot_name[0] != '.' || 1164 dirbuf.dotdot_name[1] != '.') { 1165 error = ENOTDIR; 1166 break; 1167 } 1168 if (dirbuf.dotdot_ino == source->i_number) { 1169 error = EINVAL; 1170 break; 1171 } 1172 if (dirbuf.dotdot_ino == rootino) 1173 break; 1174 vput(vp); 1175 error = VFS_VGET(vp->v_mount, dirbuf.dotdot_ino, &vp); 1176 if (error) { 1177 vp = NULL; 1178 break; 1179 } 1180 } 1181 1182out: 1183 if (error == ENOTDIR) 1184 printf("checkpath: .. not a directory\n"); 1185 if (vp != NULL) 1186 vput(vp); 1187 return (error); 1188} 1189