ext2fs_lookup.c revision 1.32
1/* $OpenBSD: ext2fs_lookup.c,v 1.32 2014/07/10 09:24:18 pelikan Exp $ */ 2/* $NetBSD: ext2fs_lookup.c,v 1.16 2000/08/03 20:29:26 thorpej Exp $ */ 3 4/* 5 * Modified for NetBSD 1.2E 6 * May 1997, Manuel Bouyer 7 * Laboratoire d'informatique de Paris VI 8 */ 9/* 10 * modified for Lites 1.1 11 * 12 * Aug 1995, Godmar Back (gback@cs.utah.edu) 13 * University of Utah, Department of Computer Science 14 */ 15/* 16 * Copyright (c) 1989, 1993 17 * The Regents of the University of California. All rights reserved. 18 * (c) UNIX System Laboratories, Inc. 19 * All or some portions of this file are derived from material licensed 20 * to the University of California by American Telephone and Telegraph 21 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 22 * the permission of UNIX System Laboratories, Inc. 23 * 24 * Redistribution and use in source and binary forms, with or without 25 * modification, are permitted provided that the following conditions 26 * are met: 27 * 1. Redistributions of source code must retain the above copyright 28 * notice, this list of conditions and the following disclaimer. 29 * 2. Redistributions in binary form must reproduce the above copyright 30 * notice, this list of conditions and the following disclaimer in the 31 * documentation and/or other materials provided with the distribution. 32 * 3. Neither the name of the University nor the names of its contributors 33 * may be used to endorse or promote products derived from this software 34 * without specific prior written permission. 35 * 36 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 37 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 39 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 40 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 41 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 42 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 44 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 45 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 46 * SUCH DAMAGE. 47 * 48 * @(#)ufs_lookup.c 8.6 (Berkeley) 4/1/94 49 */ 50 51#include <sys/param.h> 52#include <sys/systm.h> 53#include <sys/namei.h> 54#include <sys/buf.h> 55#include <sys/file.h> 56#include <sys/mount.h> 57#include <sys/vnode.h> 58#include <sys/malloc.h> 59#include <sys/dirent.h> 60 61#include <ufs/ufs/quota.h> 62#include <ufs/ufs/inode.h> 63#include <ufs/ufs/ufsmount.h> 64#include <ufs/ufs/ufs_extern.h> 65 66#include <ufs/ext2fs/ext2fs_extern.h> 67#include <ufs/ext2fs/ext2fs_dir.h> 68#include <ufs/ext2fs/ext2fs.h> 69 70extern int dirchk; 71 72static void ext2fs_dirconv2ffs(struct ext2fs_direct *e2dir, 73 struct dirent *ffsdir); 74static int ext2fs_dirbadentry(struct vnode *dp, 75 struct ext2fs_direct *de, 76 int entryoffsetinblock); 77 78/* 79 * the problem that is tackled below is the fact that FFS 80 * includes the terminating zero on disk while EXT2FS doesn't 81 * this implies that we need to introduce some padding. 82 * For instance, a filename "sbin" has normally a reclen 12 83 * in EXT2, but 16 in FFS. 84 * This reminds me of that Pepsi commercial: 'Kid saved a lousy nine cents...' 85 * If it wasn't for that, the complete ufs code for directories would 86 * have worked w/o changes (except for the difference in DIRBLKSIZ) 87 */ 88static void 89ext2fs_dirconv2ffs(struct ext2fs_direct *e2dir, struct dirent *ffsdir) 90{ 91 memset(ffsdir, 0, sizeof(struct dirent)); 92 ffsdir->d_fileno = fs2h32(e2dir->e2d_ino); 93 ffsdir->d_namlen = e2dir->e2d_namlen; 94 95 ffsdir->d_type = DT_UNKNOWN; /* don't know more here */ 96#ifdef DIAGNOSTIC 97 /* 98 * XXX Rigth now this can't happen, but if one day 99 * MAXNAMLEN != E2FS_MAXNAMLEN we should handle this more gracefully ! 100 */ 101 /* XXX: e2d_namlen is to small for such comparison 102 if (e2dir->e2d_namlen > MAXNAMLEN) 103 panic("ext2fs: e2dir->e2d_namlen"); 104 */ 105#endif 106 strncpy(ffsdir->d_name, e2dir->e2d_name, ffsdir->d_namlen); 107 108 /* Godmar thinks: since e2dir->e2d_reclen can be big and means 109 nothing anyway, we compute our own reclen according to what 110 we think is right 111 */ 112 ffsdir->d_reclen = DIRENT_SIZE(ffsdir); 113} 114 115/* 116 * Vnode op for reading directories. 117 * 118 * Convert the on-disk entries to <sys/dirent.h> entries. 119 * the problem is that the conversion will blow up some entries by four bytes, 120 * so it can't be done in place. This is too bad. Right now the conversion is 121 * done entry by entry, the converted entry is sent via uiomove. 122 * 123 * XXX allocate a buffer, convert as many entries as possible, then send 124 * the whole buffer to uiomove 125 */ 126int 127ext2fs_readdir(void *v) 128{ 129 struct vop_readdir_args *ap = v; 130 struct uio *uio = ap->a_uio; 131 int error; 132 size_t e2fs_count, readcnt, entries; 133 struct vnode *vp = ap->a_vp; 134 struct m_ext2fs *fs = VTOI(vp)->i_e2fs; 135 136 struct ext2fs_direct *dp; 137 struct dirent dstd; 138 struct uio auio; 139 struct iovec aiov; 140 caddr_t dirbuf; 141 off_t off = uio->uio_offset; 142 int e2d_reclen; 143 144 if (vp->v_type != VDIR) 145 return (ENOTDIR); 146 147 e2fs_count = uio->uio_resid; 148 entries = (uio->uio_offset + e2fs_count) & (fs->e2fs_bsize - 1); 149 150 /* Make sure we don't return partial entries. */ 151 if (e2fs_count <= entries) 152 return (EINVAL); 153 154 e2fs_count -= entries; 155 auio = *uio; 156 auio.uio_iov = &aiov; 157 auio.uio_iovcnt = 1; 158 auio.uio_segflg = UIO_SYSSPACE; 159 aiov.iov_len = e2fs_count; 160 auio.uio_resid = e2fs_count; 161 dirbuf = malloc(e2fs_count, M_TEMP, M_WAITOK | M_ZERO); 162 aiov.iov_base = dirbuf; 163 164 error = VOP_READ(ap->a_vp, &auio, 0, ap->a_cred); 165 if (error == 0) { 166 readcnt = e2fs_count - auio.uio_resid; 167 dp = (struct ext2fs_direct *) dirbuf; 168 while ((char *) dp < (char *) dirbuf + readcnt) { 169 e2d_reclen = fs2h16(dp->e2d_reclen); 170 if (e2d_reclen == 0) { 171 error = EIO; 172 break; 173 } 174 ext2fs_dirconv2ffs(dp, &dstd); 175 if(dstd.d_reclen > uio->uio_resid) { 176 break; 177 } 178 dstd.d_off = off + e2d_reclen; 179 if ((error = uiomove((caddr_t)&dstd, dstd.d_reclen, uio)) != 0) { 180 break; 181 } 182 off = off + e2d_reclen; 183 /* advance dp */ 184 dp = (struct ext2fs_direct *) ((char *)dp + e2d_reclen); 185 } 186 /* we need to correct uio_offset */ 187 uio->uio_offset = off; 188 } 189 free(dirbuf, M_TEMP); 190 *ap->a_eofflag = ext2fs_size(VTOI(ap->a_vp)) <= uio->uio_offset; 191 return (error); 192} 193 194/* 195 * Convert a component of a pathname into a pointer to a locked inode. 196 * This is a very central and rather complicated routine. 197 * If the file system is not maintained in a strict tree hierarchy, 198 * this can result in a deadlock situation (see comments in code below). 199 * 200 * The cnp->cn_nameiop argument is LOOKUP, CREATE, RENAME, or DELETE depending 201 * on whether the name is to be looked up, created, renamed, or deleted. 202 * When CREATE, RENAME, or DELETE is specified, information usable in 203 * creating, renaming, or deleting a directory entry may be calculated. 204 * If flag has LOCKPARENT or'ed into it and the target of the pathname 205 * exists, lookup returns both the target and its parent directory locked. 206 * When creating or renaming and LOCKPARENT is specified, the target may 207 * not be ".". When deleting and LOCKPARENT is specified, the target may 208 * be "."., but the caller must check to ensure it does an vrele and vput 209 * instead of two vputs. 210 * 211 * Overall outline of ext2fs_lookup: 212 * 213 * check accessibility of directory 214 * look for name in cache, if found, then if at end of path 215 * and deleting or creating, drop it, else return name 216 * search for name in directory, to found or notfound 217 * notfound: 218 * if creating, return locked directory, leaving info on available slots 219 * else return error 220 * found: 221 * if at end of path and deleting, return information to allow delete 222 * if at end of path and rewriting (RENAME and LOCKPARENT), lock target 223 * inode and return info to allow rewrite 224 * if not at end, add name to cache; if at end and neither creating 225 * nor deleting, add name to cache 226 */ 227int 228ext2fs_lookup(void *v) 229{ 230 struct vop_lookup_args *ap = v; 231 struct vnode *vdp; /* vnode for directory being searched */ 232 struct inode *dp; /* inode for directory being searched */ 233 struct buf *bp; /* a buffer of directory entries */ 234 struct ext2fs_direct *ep; /* the current directory entry */ 235 int entryoffsetinblock; /* offset of ep in bp's buffer */ 236 enum {NONE, COMPACT, FOUND} slotstatus; 237 doff_t slotoffset; /* offset of area with free space */ 238 int slotsize; /* size of area at slotoffset */ 239 int slotfreespace; /* amount of space free in slot */ 240 int slotneeded; /* size of the entry we're seeking */ 241 int numdirpasses; /* strategy for directory search */ 242 doff_t endsearch; /* offset to end directory search */ 243 doff_t prevoff; /* prev entry dp->i_offset */ 244 struct vnode *pdp; /* saved dp during symlink work */ 245 struct vnode *tdp; /* returned by VFS_VGET */ 246 doff_t enduseful; /* pointer past last used dir slot */ 247 u_long bmask; /* block offset mask */ 248 int lockparent; /* 1 => lockparent flag is set */ 249 int wantparent; /* 1 => wantparent or lockparent flag */ 250 int namlen, error; 251 struct vnode **vpp = ap->a_vpp; 252 struct componentname *cnp = ap->a_cnp; 253 struct ucred *cred = cnp->cn_cred; 254 int flags = cnp->cn_flags; 255 int nameiop = cnp->cn_nameiop; 256 struct proc *p = cnp->cn_proc; 257 int dirblksize = VTOI(ap->a_dvp)->i_e2fs->e2fs_bsize; 258 259 bp = NULL; 260 slotoffset = -1; 261 *vpp = NULL; 262 vdp = ap->a_dvp; 263 dp = VTOI(vdp); 264 lockparent = flags & LOCKPARENT; 265 wantparent = flags & (LOCKPARENT|WANTPARENT); 266 267 /* 268 * Check accessiblity of directory. 269 */ 270 if ((error = VOP_ACCESS(vdp, VEXEC, cred, cnp->cn_proc)) != 0) 271 return (error); 272 273 if ((flags & ISLASTCN) && (vdp->v_mount->mnt_flag & MNT_RDONLY) && 274 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) 275 return (EROFS); 276 277 /* 278 * We now have a segment name to search for, and a directory to search. 279 * 280 * Before tediously performing a linear scan of the directory, 281 * check the name cache to see if the directory/name pair 282 * we are looking for is known already. 283 */ 284 if ((error = cache_lookup(vdp, vpp, cnp)) >= 0) 285 return (error); 286 287 /* 288 * Suppress search for slots unless creating 289 * file and at end of pathname, in which case 290 * we watch for a place to put the new file in 291 * case it doesn't already exist. 292 */ 293 slotstatus = FOUND; 294 slotfreespace = slotsize = slotneeded = 0; 295 if ((nameiop == CREATE || nameiop == RENAME) && 296 (flags & ISLASTCN)) { 297 slotstatus = NONE; 298 slotneeded = EXT2FS_DIRSIZ(cnp->cn_namelen); 299 } 300 301 /* 302 * If there is cached information on a previous search of 303 * this directory, pick up where we last left off. 304 * We cache only lookups as these are the most common 305 * and have the greatest payoff. Caching CREATE has little 306 * benefit as it usually must search the entire directory 307 * to determine that the entry does not exist. Caching the 308 * location of the last DELETE or RENAME has not reduced 309 * profiling time and hence has been removed in the interest 310 * of simplicity. 311 */ 312 bmask = VFSTOUFS(vdp->v_mount)->um_mountp->mnt_stat.f_iosize - 1; 313 if (nameiop != LOOKUP || dp->i_diroff == 0 || 314 dp->i_diroff >ext2fs_size(dp)) { 315 entryoffsetinblock = 0; 316 dp->i_offset = 0; 317 numdirpasses = 1; 318 } else { 319 dp->i_offset = dp->i_diroff; 320 if ((entryoffsetinblock = dp->i_offset & bmask) && 321 (error = ext2fs_bufatoff(dp, (off_t)dp->i_offset, 322 NULL, &bp))) 323 return (error); 324 numdirpasses = 2; 325 } 326 prevoff = dp->i_offset; 327 endsearch = roundup(ext2fs_size(dp), dirblksize); 328 enduseful = 0; 329 330searchloop: 331 while (dp->i_offset < endsearch) { 332 /* 333 * If necessary, get the next directory block. 334 */ 335 if ((dp->i_offset & bmask) == 0) { 336 if (bp != NULL) 337 brelse(bp); 338 339 error = ext2fs_bufatoff(dp, (off_t)dp->i_offset, 340 NULL, &bp); 341 if (error != 0) 342 return (error); 343 entryoffsetinblock = 0; 344 } 345 /* 346 * If still looking for a slot, and at a dirblksize 347 * boundary, have to start looking for free space again. 348 */ 349 if (slotstatus == NONE && 350 (entryoffsetinblock & (dirblksize - 1)) == 0) { 351 slotoffset = -1; 352 slotfreespace = 0; 353 } 354 /* 355 * Get pointer to next entry. 356 * Full validation checks are slow, so we only check 357 * enough to insure forward progress through the 358 * directory. Complete checks can be run by patching 359 * "dirchk" to be true. 360 */ 361 ep = (struct ext2fs_direct *) 362 ((char *)bp->b_data + entryoffsetinblock); 363 if (ep->e2d_reclen == 0 || 364 (dirchk && 365 ext2fs_dirbadentry(vdp, ep, entryoffsetinblock))) { 366 int i; 367 ufs_dirbad(dp, dp->i_offset, "mangled entry"); 368 i = dirblksize - 369 (entryoffsetinblock & (dirblksize - 1)); 370 dp->i_offset += i; 371 entryoffsetinblock += i; 372 continue; 373 } 374 375 /* 376 * If an appropriate sized slot has not yet been found, 377 * check to see if one is available. Also accumulate space 378 * in the current block so that we can determine if 379 * compaction is viable. 380 */ 381 if (slotstatus != FOUND) { 382 int size = fs2h16(ep->e2d_reclen); 383 384 if (ep->e2d_ino != 0) 385 size -= EXT2FS_DIRSIZ(ep->e2d_namlen); 386 if (size > 0) { 387 if (size >= slotneeded) { 388 slotstatus = FOUND; 389 slotoffset = dp->i_offset; 390 slotsize = fs2h16(ep->e2d_reclen); 391 } else if (slotstatus == NONE) { 392 slotfreespace += size; 393 if (slotoffset == -1) 394 slotoffset = dp->i_offset; 395 if (slotfreespace >= slotneeded) { 396 slotstatus = COMPACT; 397 slotsize = dp->i_offset + 398 fs2h16(ep->e2d_reclen) - slotoffset; 399 } 400 } 401 } 402 } 403 404 /* 405 * Check for a name match. 406 */ 407 if (ep->e2d_ino) { 408 namlen = ep->e2d_namlen; 409 if (namlen == cnp->cn_namelen && 410 !memcmp(cnp->cn_nameptr, ep->e2d_name, 411 (unsigned)namlen)) { 412 /* 413 * Save directory entry's inode number and 414 * reclen in ndp->ni_ufs area, and release 415 * directory buffer. 416 */ 417 dp->i_ino = fs2h32(ep->e2d_ino); 418 dp->i_reclen = fs2h16(ep->e2d_reclen); 419 brelse(bp); 420 goto found; 421 } 422 } 423 prevoff = dp->i_offset; 424 dp->i_offset += fs2h16(ep->e2d_reclen); 425 entryoffsetinblock += fs2h16(ep->e2d_reclen); 426 if (ep->e2d_ino) 427 enduseful = dp->i_offset; 428 } 429/* notfound: */ 430 /* 431 * If we started in the middle of the directory and failed 432 * to find our target, we must check the beginning as well. 433 */ 434 if (numdirpasses == 2) { 435 numdirpasses--; 436 dp->i_offset = 0; 437 endsearch = dp->i_diroff; 438 goto searchloop; 439 } 440 if (bp != NULL) 441 brelse(bp); 442 /* 443 * If creating, and at end of pathname and current 444 * directory has not been removed, then can consider 445 * allowing file to be created. 446 */ 447 if ((nameiop == CREATE || nameiop == RENAME) && 448 (flags & ISLASTCN) && dp->i_e2fs_nlink != 0) { 449 /* 450 * Creation of files on a read-only mounted file system 451 * is pointless, so don't proceed any further. 452 */ 453 if (vdp->v_mount->mnt_flag & MNT_RDONLY) 454 return (EROFS); 455 /* 456 * Access for write is interpreted as allowing 457 * creation of files in the directory. 458 */ 459 if ((error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_proc)) != 0) 460 return (error); 461 /* 462 * Return an indication of where the new directory 463 * entry should be put. If we didn't find a slot, 464 * then set dp->i_count to 0 indicating 465 * that the new slot belongs at the end of the 466 * directory. If we found a slot, then the new entry 467 * can be put in the range from dp->i_offset to 468 * dp->i_offset + dp->i_count. 469 */ 470 if (slotstatus == NONE) { 471 dp->i_offset = roundup(ext2fs_size(dp), dirblksize); 472 dp->i_count = 0; 473 enduseful = dp->i_offset; 474 } else { 475 dp->i_offset = slotoffset; 476 dp->i_count = slotsize; 477 if (enduseful < slotoffset + slotsize) 478 enduseful = slotoffset + slotsize; 479 } 480 dp->i_endoff = roundup(enduseful, dirblksize); 481 dp->i_flag |= IN_CHANGE | IN_UPDATE; 482 /* 483 * We return with the directory locked, so that 484 * the parameters we set up above will still be 485 * valid if we actually decide to do a direnter(). 486 * We return ni_vp == NULL to indicate that the entry 487 * does not currently exist; we leave a pointer to 488 * the (locked) directory inode in ndp->ni_dvp. 489 * The pathname buffer is saved so that the name 490 * can be obtained later. 491 * 492 * NB - if the directory is unlocked, then this 493 * information cannot be used. 494 */ 495 cnp->cn_flags |= SAVENAME; 496 if (!lockparent) { 497 VOP_UNLOCK(vdp, 0, p); 498 cnp->cn_flags |= PDIRUNLOCK; 499 } 500 return (EJUSTRETURN); 501 } 502 /* 503 * Insert name into cache (as non-existent) if appropriate. 504 */ 505 if ((cnp->cn_flags & MAKEENTRY) && nameiop != CREATE) 506 cache_enter(vdp, *vpp, cnp); 507 return (ENOENT); 508 509found: 510 /* 511 * Check that directory length properly reflects presence 512 * of this entry. 513 */ 514 if (entryoffsetinblock + EXT2FS_DIRSIZ(ep->e2d_namlen) 515 > ext2fs_size(dp)) { 516 ufs_dirbad(dp, dp->i_offset, "i_size too small"); 517 error = ext2fs_setsize(dp, 518 entryoffsetinblock + EXT2FS_DIRSIZ(ep->e2d_namlen)); 519 if (error) { 520 brelse(bp); 521 return(error); 522 } 523 dp->i_flag |= IN_CHANGE | IN_UPDATE; 524 } 525 526 /* 527 * Found component in pathname. 528 * If the final component of path name, save information 529 * in the cache as to where the entry was found. 530 */ 531 if ((flags & ISLASTCN) && nameiop == LOOKUP) 532 dp->i_diroff = dp->i_offset &~ (dirblksize - 1); 533 534 /* 535 * If deleting, and at end of pathname, return 536 * parameters which can be used to remove file. 537 * If the wantparent flag isn't set, we return only 538 * the directory (in ndp->ni_dvp), otherwise we go 539 * on and lock the inode, being careful with ".". 540 */ 541 if (nameiop == DELETE && (flags & ISLASTCN)) { 542 /* 543 * Write access to directory required to delete files. 544 */ 545 if ((error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_proc)) != 0) 546 return (error); 547 /* 548 * Return pointer to current entry in dp->i_offset, 549 * and distance past previous entry (if there 550 * is a previous entry in this block) in dp->i_count. 551 * Save directory inode pointer in ndp->ni_dvp for dirremove(). 552 */ 553 if ((dp->i_offset & (dirblksize - 1)) == 0) 554 dp->i_count = 0; 555 else 556 dp->i_count = dp->i_offset - prevoff; 557 if (dp->i_number == dp->i_ino) { 558 vref(vdp); 559 *vpp = vdp; 560 return (0); 561 } 562 if ((error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp)) != 0) 563 return (error); 564 /* 565 * If directory is "sticky", then user must own 566 * the directory, or the file in it, else she 567 * may not delete it (unless she's root). This 568 * implements append-only directories. 569 */ 570 if ((dp->i_e2fs_mode & ISVTX) && 571 cred->cr_uid != 0 && 572 cred->cr_uid != dp->i_e2fs_uid && 573 VTOI(tdp)->i_e2fs_uid != cred->cr_uid) { 574 vput(tdp); 575 return (EPERM); 576 } 577 *vpp = tdp; 578 if (!lockparent) { 579 VOP_UNLOCK(vdp, 0, p); 580 cnp->cn_flags |= PDIRUNLOCK; 581 } 582 return (0); 583 } 584 585 /* 586 * If rewriting (RENAME), return the inode and the 587 * information required to rewrite the present directory 588 * Must get inode of directory entry to verify it's a 589 * regular file, or empty directory. 590 */ 591 if (nameiop == RENAME && wantparent && 592 (flags & ISLASTCN)) { 593 if ((error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_proc)) != 0) 594 return (error); 595 /* 596 * Careful about locking second inode. 597 * This can only occur if the target is ".". 598 */ 599 if (dp->i_number == dp->i_ino) 600 return (EISDIR); 601 if ((error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp)) != 0) 602 return (error); 603 *vpp = tdp; 604 cnp->cn_flags |= SAVENAME; 605 if (!lockparent) { 606 VOP_UNLOCK(vdp, 0, p); 607 cnp->cn_flags |= PDIRUNLOCK; 608 } 609 return (0); 610 } 611 612 /* 613 * Step through the translation in the name. We do not `vput' the 614 * directory because we may need it again if a symbolic link 615 * is relative to the current directory. Instead we save it 616 * unlocked as "pdp". We must get the target inode before unlocking 617 * the directory to insure that the inode will not be removed 618 * before we get it. We prevent deadlock by always fetching 619 * inodes from the root, moving down the directory tree. Thus 620 * when following backward pointers ".." we must unlock the 621 * parent directory before getting the requested directory. 622 * There is a potential race condition here if both the current 623 * and parent directories are removed before the VFS_VGET for the 624 * inode associated with ".." returns. We hope that this occurs 625 * infrequently since we cannot avoid this race condition without 626 * implementing a sophisticated deadlock detection algorithm. 627 * Note also that this simple deadlock detection scheme will not 628 * work if the file system has any hard links other than ".." 629 * that point backwards in the directory structure. 630 */ 631 pdp = vdp; 632 if (flags & ISDOTDOT) { 633 VOP_UNLOCK(pdp, 0, p); /* race to get the inode */ 634 cnp->cn_flags |= PDIRUNLOCK; 635 if ((error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp)) != 0) { 636 if (vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY, p) == 0) 637 cnp->cn_flags &= ~PDIRUNLOCK; 638 return (error); 639 } 640 if (lockparent && (flags & ISLASTCN)) { 641 if ((error = vn_lock(pdp, LK_EXCLUSIVE, p)) != 0) { 642 vput(tdp); 643 return (error); 644 } 645 cnp->cn_flags &= ~PDIRUNLOCK; 646 } 647 *vpp = tdp; 648 } else if (dp->i_number == dp->i_ino) { 649 vref(vdp); /* we want ourself, ie "." */ 650 *vpp = vdp; 651 } else { 652 if ((error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp)) != 0) 653 return (error); 654 if (!lockparent || !(flags & ISLASTCN)) { 655 VOP_UNLOCK(pdp, 0, p); 656 cnp->cn_flags |= PDIRUNLOCK; 657 } 658 *vpp = tdp; 659 } 660 661 /* 662 * Insert name into cache if appropriate. 663 */ 664 if (cnp->cn_flags & MAKEENTRY) 665 cache_enter(vdp, *vpp, cnp); 666 return (0); 667} 668 669/* 670 * Do consistency checking on a directory entry: 671 * record length must be multiple of 4 672 * entry must fit in rest of its dirblksize block 673 * record must be large enough to contain entry 674 * name is not longer than MAXNAMLEN 675 * name must be as long as advertised, and null terminated 676 */ 677/* 678 * changed so that it confirms to ext2fs_check_dir_entry 679 */ 680static int 681ext2fs_dirbadentry(struct vnode *dp, struct ext2fs_direct *de, 682 int entryoffsetinblock) 683{ 684 int dirblksize = VTOI(dp)->i_e2fs->e2fs_bsize; 685 686 char * error_msg = NULL; 687 int reclen = fs2h16(de->e2d_reclen); 688 int namlen = de->e2d_namlen; 689 690 if (reclen < EXT2FS_DIRSIZ(1)) /* e2d_namlen = 1 */ 691 error_msg = "rec_len is smaller than minimal"; 692 else if (reclen % 4 != 0) 693 error_msg = "rec_len % 4 != 0"; 694 else if (reclen < EXT2FS_DIRSIZ(namlen)) 695 error_msg = "reclen is too small for name_len"; 696 else if (entryoffsetinblock + reclen > dirblksize) 697 error_msg = "directory entry across blocks"; 698 else if (fs2h32(de->e2d_ino) > 699 VTOI(dp)->i_e2fs->e2fs.e2fs_icount) 700 error_msg = "inode out of bounds"; 701 702 if (error_msg != NULL) { 703 printf( "bad directory entry: %s\n" 704 "offset=%d, inode=%lu, rec_len=%d, name_len=%d \n", 705 error_msg, entryoffsetinblock, 706 (unsigned long) fs2h32(de->e2d_ino), 707 reclen, namlen); 708 panic("ext2fs_dirbadentry"); 709 } 710 return error_msg == NULL ? 0 : 1; 711} 712 713/* 714 * Write a directory entry after a call to namei, using the parameters 715 * that it left in nameidata. The argument ip is the inode which the new 716 * directory entry will refer to. Dvp is a pointer to the directory to 717 * be written, which was left locked by namei. Remaining parameters 718 * (dp->i_offset, dp->i_count) indicate how the space for the new 719 * entry is to be obtained. 720 */ 721int 722ext2fs_direnter(struct inode *ip, struct vnode *dvp, 723 struct componentname *cnp) 724{ 725 struct ext2fs_direct *ep, *nep; 726 struct inode *dp; 727 struct buf *bp; 728 struct ext2fs_direct newdir; 729 struct iovec aiov; 730 struct uio auio; 731 u_int dsize; 732 int error, loc, newentrysize, spacefree; 733 char *dirbuf; 734 int dirblksize = ip->i_e2fs->e2fs_bsize; 735 736 737#ifdef DIAGNOSTIC 738 if ((cnp->cn_flags & SAVENAME) == 0) 739 panic("direnter: missing name"); 740#endif 741 dp = VTOI(dvp); 742 newdir.e2d_ino = h2fs32(ip->i_number); 743 newdir.e2d_namlen = cnp->cn_namelen; 744 if (ip->i_e2fs->e2fs.e2fs_rev > E2FS_REV0 && 745 (ip->i_e2fs->e2fs.e2fs_features_incompat & EXT2F_INCOMPAT_FTYPE)) { 746 newdir.e2d_type = inot2ext2dt(IFTODT(ip->i_e2fs_mode)); 747 } else { 748 newdir.e2d_type = 0; 749 }; 750 memcpy(newdir.e2d_name, cnp->cn_nameptr, (unsigned)cnp->cn_namelen + 1); 751 newentrysize = EXT2FS_DIRSIZ(cnp->cn_namelen); 752 if (dp->i_count == 0) { 753 /* 754 * If dp->i_count is 0, then namei could find no 755 * space in the directory. Here, dp->i_offset will 756 * be on a directory block boundary and we will write the 757 * new entry into a fresh block. 758 */ 759 if (dp->i_offset & (dirblksize - 1)) 760 panic("ext2fs_direnter: newblk"); 761 auio.uio_offset = dp->i_offset; 762 newdir.e2d_reclen = h2fs16(dirblksize); 763 auio.uio_resid = newentrysize; 764 aiov.iov_len = newentrysize; 765 aiov.iov_base = (caddr_t)&newdir; 766 auio.uio_iov = &aiov; 767 auio.uio_iovcnt = 1; 768 auio.uio_rw = UIO_WRITE; 769 auio.uio_segflg = UIO_SYSSPACE; 770 auio.uio_procp = (struct proc *)0; 771 error = VOP_WRITE(dvp, &auio, IO_SYNC, cnp->cn_cred); 772 if (dirblksize > 773 VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_bsize) 774 /* XXX should grow with balloc() */ 775 panic("ext2fs_direnter: frag size"); 776 else if (!error) { 777 error = ext2fs_setsize(dp, 778 roundup(ext2fs_size(dp), dirblksize)); 779 if (error) 780 return (error); 781 dp->i_flag |= IN_CHANGE; 782 } 783 return (error); 784 } 785 786 /* 787 * If dp->i_count is non-zero, then namei found space 788 * for the new entry in the range dp->i_offset to 789 * dp->i_offset + dp->i_count in the directory. 790 * To use this space, we may have to compact the entries located 791 * there, by copying them together towards the beginning of the 792 * block, leaving the free space in one usable chunk at the end. 793 */ 794 795 /* 796 * Get the block containing the space for the new directory entry. 797 */ 798 if ((error = ext2fs_bufatoff(dp, (off_t)dp->i_offset, &dirbuf, &bp)) 799 != 0) 800 return (error); 801 /* 802 * Find space for the new entry. In the simple case, the entry at 803 * offset base will have the space. If it does not, then namei 804 * arranged that compacting the region dp->i_offset to 805 * dp->i_offset + dp->i_count would yield the 806 * space. 807 */ 808 ep = (struct ext2fs_direct *)dirbuf; 809 dsize = EXT2FS_DIRSIZ(ep->e2d_namlen); 810 spacefree = fs2h16(ep->e2d_reclen) - dsize; 811 for (loc = fs2h16(ep->e2d_reclen); loc < dp->i_count; ) { 812 nep = (struct ext2fs_direct *)(dirbuf + loc); 813 if (ep->e2d_ino) { 814 /* trim the existing slot */ 815 ep->e2d_reclen = h2fs16(dsize); 816 ep = (struct ext2fs_direct *)((char *)ep + dsize); 817 } else { 818 /* overwrite; nothing there; header is ours */ 819 spacefree += dsize; 820 } 821 dsize = EXT2FS_DIRSIZ(nep->e2d_namlen); 822 spacefree += fs2h16(nep->e2d_reclen) - dsize; 823 loc += fs2h16(nep->e2d_reclen); 824 memcpy(ep, nep, dsize); 825 } 826 /* 827 * Update the pointer fields in the previous entry (if any), 828 * copy in the new entry, and write out the block. 829 */ 830 if (ep->e2d_ino == 0) { 831#ifdef DIAGNOSTIC 832 if (spacefree + dsize < newentrysize) 833 panic("ext2fs_direnter: compact1"); 834#endif 835 newdir.e2d_reclen = h2fs16(spacefree + dsize); 836 } else { 837#ifdef DIAGNOSTIC 838 if (spacefree < newentrysize) { 839 printf("ext2fs_direnter: compact2 %u %u", 840 (u_int)spacefree, (u_int)newentrysize); 841 panic("ext2fs_direnter: compact2"); 842 } 843#endif 844 newdir.e2d_reclen = h2fs16(spacefree); 845 ep->e2d_reclen = h2fs16(dsize); 846 ep = (struct ext2fs_direct *)((char *)ep + dsize); 847 } 848 memcpy(ep, &newdir, newentrysize); 849 error = VOP_BWRITE(bp); 850 dp->i_flag |= IN_CHANGE | IN_UPDATE; 851 if (!error && dp->i_endoff && dp->i_endoff < ext2fs_size(dp)) 852 error = ext2fs_truncate(dp, (off_t)dp->i_endoff, IO_SYNC, 853 cnp->cn_cred); 854 return (error); 855} 856 857/* 858 * Remove a directory entry after a call to namei, using 859 * the parameters which it left in nameidata. The entry 860 * dp->i_offset contains the offset into the directory of the 861 * entry to be eliminated. The dp->i_count field contains the 862 * size of the previous record in the directory. If this 863 * is 0, the first entry is being deleted, so we need only 864 * zero the inode number to mark the entry as free. If the 865 * entry is not the first in the directory, we must reclaim 866 * the space of the now empty record by adding the record size 867 * to the size of the previous entry. 868 */ 869int 870ext2fs_dirremove(struct vnode *dvp, struct componentname *cnp) 871{ 872 struct inode *dp; 873 struct ext2fs_direct *ep; 874 struct buf *bp; 875 int error; 876 877 dp = VTOI(dvp); 878 if (dp->i_count == 0) { 879 /* 880 * First entry in block: set d_ino to zero. 881 */ 882 error = ext2fs_bufatoff(dp, (off_t)dp->i_offset, (char **)&ep, 883 &bp); 884 if (error != 0) 885 return (error); 886 ep->e2d_ino = 0; 887 error = VOP_BWRITE(bp); 888 dp->i_flag |= IN_CHANGE | IN_UPDATE; 889 return (error); 890 } 891 /* 892 * Collapse new free space into previous entry. 893 */ 894 error = ext2fs_bufatoff(dp, (off_t)(dp->i_offset - dp->i_count), 895 (char **)&ep, &bp); 896 if (error != 0) 897 return (error); 898 ep->e2d_reclen = h2fs16(fs2h16(ep->e2d_reclen) + dp->i_reclen); 899 error = VOP_BWRITE(bp); 900 dp->i_flag |= IN_CHANGE | IN_UPDATE; 901 return (error); 902} 903 904/* 905 * Rewrite an existing directory entry to point at the inode 906 * supplied. The parameters describing the directory entry are 907 * set up by a call to namei. 908 */ 909int 910ext2fs_dirrewrite(struct inode *dp, struct inode *ip, 911 struct componentname *cnp) 912{ 913 struct buf *bp; 914 struct ext2fs_direct *ep; 915 int error; 916 917 error = ext2fs_bufatoff(dp, (off_t)dp->i_offset, (char **)&ep, &bp); 918 if (error != 0) 919 return (error); 920 ep->e2d_ino = h2fs32(ip->i_number); 921 if (ip->i_e2fs->e2fs.e2fs_rev > E2FS_REV0 && 922 (ip->i_e2fs->e2fs.e2fs_features_incompat & EXT2F_INCOMPAT_FTYPE)) { 923 ep->e2d_type = inot2ext2dt(IFTODT(ip->i_e2fs_mode)); 924 } else { 925 ep->e2d_type = 0; 926 } 927 error = VOP_BWRITE(bp); 928 dp->i_flag |= IN_CHANGE | IN_UPDATE; 929 return (error); 930} 931 932/* 933 * Check if a directory is empty or not. 934 * Inode supplied must be locked. 935 * 936 * Using a struct dirtemplate here is not precisely 937 * what we want, but better than using a struct ext2fs_direct. 938 * 939 * NB: does not handle corrupted directories. 940 */ 941int 942ext2fs_dirempty(struct inode *ip, ufsino_t parentino, struct ucred *cred) 943{ 944 off_t off; 945 struct ext2fs_dirtemplate dbuf; 946 struct ext2fs_direct *dp = (struct ext2fs_direct *)&dbuf; 947 int error, namlen; 948 size_t count; 949 950#define MINDIRSIZ (sizeof (struct ext2fs_dirtemplate) / 2) 951 952 for (off = 0; off < ext2fs_size(ip); off += fs2h16(dp->e2d_reclen)) { 953 error = vn_rdwr(UIO_READ, ITOV(ip), (caddr_t)dp, MINDIRSIZ, off, 954 UIO_SYSSPACE, IO_NODELOCKED, cred, &count, curproc); 955 /* 956 * Since we read MINDIRSIZ, residual must 957 * be 0 unless we're at end of file. 958 */ 959 if (error || count != 0) 960 return (0); 961 /* avoid infinite loops */ 962 if (dp->e2d_reclen == 0) 963 return (0); 964 /* skip empty entries */ 965 if (dp->e2d_ino == 0) 966 continue; 967 /* accept only "." and ".." */ 968 namlen = dp->e2d_namlen; 969 if (namlen > 2) 970 return (0); 971 if (dp->e2d_name[0] != '.') 972 return (0); 973 /* 974 * At this point namlen must be 1 or 2. 975 * 1 implies ".", 2 implies ".." if second 976 * char is also "." 977 */ 978 if (namlen == 1) 979 continue; 980 if (dp->e2d_name[1] == '.' && fs2h32(dp->e2d_ino) == parentino) 981 continue; 982 return (0); 983 } 984 return (1); 985} 986 987/* 988 * Check if source directory is in the path of the target directory. 989 * Target is supplied locked, source is unlocked. 990 * The target is always vput before returning. 991 */ 992int 993ext2fs_checkpath(struct inode *source, struct inode *target, 994 struct ucred *cred) 995{ 996 struct vnode *vp; 997 int error, rootino, namlen; 998 struct ext2fs_dirtemplate dirbuf; 999 u_int32_t ino; 1000 1001 vp = ITOV(target); 1002 if (target->i_number == source->i_number) { 1003 error = EEXIST; 1004 goto out; 1005 } 1006 rootino = ROOTINO; 1007 error = 0; 1008 if (target->i_number == rootino) 1009 goto out; 1010 1011 for (;;) { 1012 if (vp->v_type != VDIR) { 1013 error = ENOTDIR; 1014 break; 1015 } 1016 error = vn_rdwr(UIO_READ, vp, (caddr_t)&dirbuf, 1017 sizeof (struct ext2fs_dirtemplate), (off_t)0, 1018 UIO_SYSSPACE, IO_NODELOCKED, cred, NULL, 1019 curproc); 1020 if (error != 0) 1021 break; 1022 namlen = dirbuf.dotdot_namlen; 1023 if (namlen != 2 || 1024 dirbuf.dotdot_name[0] != '.' || 1025 dirbuf.dotdot_name[1] != '.') { 1026 error = ENOTDIR; 1027 break; 1028 } 1029 ino = fs2h32(dirbuf.dotdot_ino); 1030 if (ino == source->i_number) { 1031 error = EINVAL; 1032 break; 1033 } 1034 if (ino == rootino) 1035 break; 1036 vput(vp); 1037 error = VFS_VGET(vp->v_mount, ino, &vp); 1038 if (error != 0) { 1039 vp = NULL; 1040 break; 1041 } 1042 } 1043 1044out: 1045 if (error == ENOTDIR) { 1046 printf("checkpath: .. not a directory\n"); 1047 panic("checkpath"); 1048 } 1049 if (vp != NULL) 1050 vput(vp); 1051 return (error); 1052} 1053