ext2_lookup.c revision 111742
1/* 2 * modified for Lites 1.1 3 * 4 * Aug 1995, Godmar Back (gback@cs.utah.edu) 5 * University of Utah, Department of Computer Science 6 */ 7/* 8 * Copyright (c) 1989, 1993 9 * The Regents of the University of California. All rights reserved. 10 * (c) UNIX System Laboratories, Inc. 11 * All or some portions of this file are derived from material licensed 12 * to the University of California by American Telephone and Telegraph 13 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 14 * the permission of UNIX System Laboratories, Inc. 15 * 16 * Redistribution and use in source and binary forms, with or without 17 * modification, are permitted provided that the following conditions 18 * are met: 19 * 1. Redistributions of source code must retain the above copyright 20 * notice, this list of conditions and the following disclaimer. 21 * 2. Redistributions in binary form must reproduce the above copyright 22 * notice, this list of conditions and the following disclaimer in the 23 * documentation and/or other materials provided with the distribution. 24 * 3. All advertising materials mentioning features or use of this software 25 * must display the following acknowledgement: 26 * This product includes software developed by the University of 27 * California, Berkeley and its contributors. 28 * 4. Neither the name of the University nor the names of its contributors 29 * may be used to endorse or promote products derived from this software 30 * without specific prior written permission. 31 * 32 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 33 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 34 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 35 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 36 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 37 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 38 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 39 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 40 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 41 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 42 * SUCH DAMAGE. 43 * 44 * @(#)ufs_lookup.c 8.6 (Berkeley) 4/1/94 45 * $FreeBSD: head/sys/gnu/fs/ext2fs/ext2_lookup.c 111742 2003-03-02 15:56:49Z des $ 46 */ 47 48#include <sys/param.h> 49#include <sys/systm.h> 50#include <sys/namei.h> 51#include <sys/bio.h> 52#include <sys/buf.h> 53#include <sys/mount.h> 54#include <sys/vnode.h> 55#include <sys/malloc.h> 56#include <sys/dirent.h> 57#include <sys/sysctl.h> 58 59#include <ufs/ufs/dir.h> 60 61#include <gnu/ext2fs/inode.h> 62#include <gnu/ext2fs/ext2_mount.h> 63#include <gnu/ext2fs/ext2_extern.h> 64#include <gnu/ext2fs/ext2_fs.h> 65#include <gnu/ext2fs/ext2_fs_sb.h> 66 67#ifdef DIAGNOSTIC 68static int dirchk = 1; 69#else 70static int dirchk = 0; 71#endif 72 73SYSCTL_NODE(_vfs, OID_AUTO, e2fs, CTLFLAG_RD, 0, "EXT2FS filesystem"); 74SYSCTL_INT(_vfs_e2fs, OID_AUTO, dircheck, CTLFLAG_RW, &dirchk, 0, ""); 75 76/* 77 DIRBLKSIZE in ffs is DEV_BSIZE (in most cases 512) 78 while it is the native blocksize in ext2fs - thus, a #define 79 is no longer appropriate 80*/ 81#undef DIRBLKSIZ 82 83static u_char ext2_ft_to_dt[] = { 84 DT_UNKNOWN, /* EXT2_FT_UNKNOWN */ 85 DT_REG, /* EXT2_FT_REG_FILE */ 86 DT_DIR, /* EXT2_FT_DIR */ 87 DT_CHR, /* EXT2_FT_CHRDEV */ 88 DT_BLK, /* EXT2_FT_BLKDEV */ 89 DT_FIFO, /* EXT2_FT_FIFO */ 90 DT_SOCK, /* EXT2_FT_SOCK */ 91 DT_LNK, /* EXT2_FT_SYMLINK */ 92}; 93#define FTTODT(ft) \ 94 ((ft) > sizeof(ext2_ft_to_dt) / sizeof(ext2_ft_to_dt[0]) ? \ 95 DT_UNKNOWN : ext2_ft_to_dt[(ft)]) 96 97static u_char dt_to_ext2_ft[] = { 98 EXT2_FT_UNKNOWN, /* DT_UNKNOWN */ 99 EXT2_FT_FIFO, /* DT_FIFO */ 100 EXT2_FT_CHRDEV, /* DT_CHR */ 101 EXT2_FT_UNKNOWN, /* unused */ 102 EXT2_FT_DIR, /* DT_DIR */ 103 EXT2_FT_UNKNOWN, /* unused */ 104 EXT2_FT_BLKDEV, /* DT_BLK */ 105 EXT2_FT_UNKNOWN, /* unused */ 106 EXT2_FT_REG_FILE, /* DT_REG */ 107 EXT2_FT_UNKNOWN, /* unused */ 108 EXT2_FT_SYMLINK, /* DT_LNK */ 109 EXT2_FT_UNKNOWN, /* unused */ 110 EXT2_FT_SOCK, /* DT_SOCK */ 111 EXT2_FT_UNKNOWN, /* unused */ 112 EXT2_FT_UNKNOWN, /* DT_WHT */ 113}; 114#define DTTOFT(dt) \ 115 ((dt) > sizeof(dt_to_ext2_ft) / sizeof(dt_to_ext2_ft[0]) ? \ 116 EXT2_FT_UNKNOWN : dt_to_ext2_ft[(dt)]) 117 118static int ext2_dirbadentry(struct vnode *dp, struct ext2_dir_entry_2 *de, 119 int entryoffsetinblock); 120 121/* 122 * Vnode op for reading directories. 123 * 124 * The routine below assumes that the on-disk format of a directory 125 * is the same as that defined by <sys/dirent.h>. If the on-disk 126 * format changes, then it will be necessary to do a conversion 127 * from the on-disk format that read returns to the format defined 128 * by <sys/dirent.h>. 129 */ 130/* 131 * this is exactly what we do here - the problem is that the conversion 132 * will blow up some entries by four bytes, so it can't be done in place. 133 * This is too bad. Right now the conversion is done entry by entry, the 134 * converted entry is sent via uiomove. 135 * 136 * XXX allocate a buffer, convert as many entries as possible, then send 137 * the whole buffer to uiomove 138 */ 139int 140ext2_readdir(ap) 141 struct vop_readdir_args /* { 142 struct vnode *a_vp; 143 struct uio *a_uio; 144 struct ucred *a_cred; 145 } */ *ap; 146{ 147 struct uio *uio = ap->a_uio; 148 int count, error; 149 150 struct ext2_dir_entry_2 *edp, *dp; 151 int ncookies; 152 struct dirent dstdp; 153 struct uio auio; 154 struct iovec aiov; 155 caddr_t dirbuf; 156 int DIRBLKSIZ = VTOI(ap->a_vp)->i_e2fs->s_blocksize; 157 int readcnt; 158 off_t startoffset = uio->uio_offset; 159 160 count = uio->uio_resid; 161 /* 162 * Avoid complications for partial directory entries by adjusting 163 * the i/o to end at a block boundary. Don't give up (like ufs 164 * does) if the initial adjustment gives a negative count, since 165 * many callers don't supply a large enough buffer. The correct 166 * size is a little larger than DIRBLKSIZ to allow for expansion 167 * of directory entries, but some callers just use 512. 168 */ 169 count -= (uio->uio_offset + count) & (DIRBLKSIZ -1); 170 if (count <= 0) 171 count += DIRBLKSIZ; 172 173#ifdef EXT2FS_DEBUG 174 printf("ext2_readdir: uio_offset = %lld, uio_resid = %d, count = %d\n", 175 uio->uio_offset, uio->uio_resid, count); 176#endif 177 178 auio = *uio; 179 auio.uio_iov = &aiov; 180 auio.uio_iovcnt = 1; 181 auio.uio_resid = count; 182 auio.uio_segflg = UIO_SYSSPACE; 183 aiov.iov_len = count; 184 MALLOC(dirbuf, caddr_t, count, M_TEMP, M_WAITOK); 185 aiov.iov_base = dirbuf; 186 error = VOP_READ(ap->a_vp, &auio, 0, ap->a_cred); 187 if (error == 0) { 188 readcnt = count - auio.uio_resid; 189 edp = (struct ext2_dir_entry_2 *)&dirbuf[readcnt]; 190 ncookies = 0; 191 bzero(&dstdp, offsetof(struct dirent, d_name)); 192 for (dp = (struct ext2_dir_entry_2 *)dirbuf; 193 !error && uio->uio_resid > 0 && dp < edp; ) { 194 /*- 195 * "New" ext2fs directory entries differ in 3 ways 196 * from ufs on-disk ones: 197 * - the name is not necessarily NUL-terminated. 198 * - the file type field always exists and always 199 * follows the name length field. 200 * - the file type is encoded in a different way. 201 * 202 * "Old" ext2fs directory entries need no special 203 * conversions, since they binary compatible with 204 * "new" entries having a file type of 0 (i.e., 205 * EXT2_FT_UNKNOWN). Splitting the old name length 206 * field didn't make a mess like it did in ufs, 207 * because ext2fs uses a machine-dependent disk 208 * layout. 209 */ 210 dstdp.d_fileno = dp->inode; 211 dstdp.d_type = FTTODT(dp->file_type); 212 dstdp.d_namlen = dp->name_len; 213 dstdp.d_reclen = GENERIC_DIRSIZ(&dstdp); 214 bcopy(dp->name, dstdp.d_name, dstdp.d_namlen); 215 bzero(dstdp.d_name + dstdp.d_namlen, 216 dstdp.d_reclen - offsetof(struct dirent, d_name) - 217 dstdp.d_namlen); 218 219 if (dp->rec_len > 0) { 220 if(dstdp.d_reclen <= uio->uio_resid) { 221 /* advance dp */ 222 dp = (struct ext2_dir_entry_2 *) 223 ((char *)dp + dp->rec_len); 224 error = 225 uiomove(&dstdp, dstdp.d_reclen, uio); 226 if (!error) 227 ncookies++; 228 } else 229 break; 230 } else { 231 error = EIO; 232 break; 233 } 234 } 235 /* we need to correct uio_offset */ 236 uio->uio_offset = startoffset + (caddr_t)dp - dirbuf; 237 238 if (!error && ap->a_ncookies != NULL) { 239 u_long *cookiep, *cookies, *ecookies; 240 off_t off; 241 242 if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) 243 panic("ext2fs_readdir: unexpected uio from NFS server"); 244 MALLOC(cookies, u_long *, ncookies * sizeof(u_long), M_TEMP, 245 M_WAITOK); 246 off = startoffset; 247 for (dp = (struct ext2_dir_entry_2 *)dirbuf, 248 cookiep = cookies, ecookies = cookies + ncookies; 249 cookiep < ecookies; 250 dp = (struct ext2_dir_entry_2 *)((caddr_t) dp + dp->rec_len)) { 251 off += dp->rec_len; 252 *cookiep++ = (u_long) off; 253 } 254 *ap->a_ncookies = ncookies; 255 *ap->a_cookies = cookies; 256 } 257 } 258 FREE(dirbuf, M_TEMP); 259 if (ap->a_eofflag) 260 *ap->a_eofflag = VTOI(ap->a_vp)->i_size <= uio->uio_offset; 261 return (error); 262} 263 264/* 265 * Convert a component of a pathname into a pointer to a locked inode. 266 * This is a very central and rather complicated routine. 267 * If the file system is not maintained in a strict tree hierarchy, 268 * this can result in a deadlock situation (see comments in code below). 269 * 270 * The cnp->cn_nameiop argument is LOOKUP, CREATE, RENAME, or DELETE depending 271 * on whether the name is to be looked up, created, renamed, or deleted. 272 * When CREATE, RENAME, or DELETE is specified, information usable in 273 * creating, renaming, or deleting a directory entry may be calculated. 274 * If flag has LOCKPARENT or'ed into it and the target of the pathname 275 * exists, lookup returns both the target and its parent directory locked. 276 * When creating or renaming and LOCKPARENT is specified, the target may 277 * not be ".". When deleting and LOCKPARENT is specified, the target may 278 * be "."., but the caller must check to ensure it does an vrele and vput 279 * instead of two vputs. 280 * 281 * Overall outline of ufs_lookup: 282 * 283 * search for name in directory, to found or notfound 284 * notfound: 285 * if creating, return locked directory, leaving info on available slots 286 * else return error 287 * found: 288 * if at end of path and deleting, return information to allow delete 289 * if at end of path and rewriting (RENAME and LOCKPARENT), lock target 290 * inode and return info to allow rewrite 291 * if not at end, add name to cache; if at end and neither creating 292 * nor deleting, add name to cache 293 */ 294int 295ext2_lookup(ap) 296 struct vop_cachedlookup_args /* { 297 struct vnode *a_dvp; 298 struct vnode **a_vpp; 299 struct componentname *a_cnp; 300 } */ *ap; 301{ 302 struct vnode *vdp; /* vnode for directory being searched */ 303 struct inode *dp; /* inode for directory being searched */ 304 struct buf *bp; /* a buffer of directory entries */ 305 struct ext2_dir_entry_2 *ep; /* the current directory entry */ 306 int entryoffsetinblock; /* offset of ep in bp's buffer */ 307 enum {NONE, COMPACT, FOUND} slotstatus; 308 doff_t slotoffset; /* offset of area with free space */ 309 int slotsize; /* size of area at slotoffset */ 310 int slotfreespace; /* amount of space free in slot */ 311 int slotneeded; /* size of the entry we're seeking */ 312 int numdirpasses; /* strategy for directory search */ 313 doff_t endsearch; /* offset to end directory search */ 314 doff_t prevoff; /* prev entry dp->i_offset */ 315 struct vnode *pdp; /* saved dp during symlink work */ 316 struct vnode *tdp; /* returned by VFS_VGET */ 317 doff_t enduseful; /* pointer past last used dir slot */ 318 u_long bmask; /* block offset mask */ 319 int lockparent; /* 1 => lockparent flag is set */ 320 int wantparent; /* 1 => wantparent or lockparent flag */ 321 int namlen, error; 322 struct vnode **vpp = ap->a_vpp; 323 struct componentname *cnp = ap->a_cnp; 324 struct ucred *cred = cnp->cn_cred; 325 int flags = cnp->cn_flags; 326 int nameiop = cnp->cn_nameiop; 327 struct thread *td = cnp->cn_thread; 328 329 int DIRBLKSIZ = VTOI(ap->a_dvp)->i_e2fs->s_blocksize; 330 331 bp = NULL; 332 slotoffset = -1; 333 *vpp = NULL; 334 vdp = ap->a_dvp; 335 dp = VTOI(vdp); 336 lockparent = flags & LOCKPARENT; 337 wantparent = flags & (LOCKPARENT|WANTPARENT); 338 339 /* 340 * We now have a segment name to search for, and a directory to search. 341 */ 342 343 /* 344 * Suppress search for slots unless creating 345 * file and at end of pathname, in which case 346 * we watch for a place to put the new file in 347 * case it doesn't already exist. 348 */ 349 slotstatus = FOUND; 350 slotfreespace = slotsize = slotneeded = 0; 351 if ((nameiop == CREATE || nameiop == RENAME) && 352 (flags & ISLASTCN)) { 353 slotstatus = NONE; 354 slotneeded = EXT2_DIR_REC_LEN(cnp->cn_namelen); 355 /* was 356 slotneeded = (sizeof(struct direct) - MAXNAMLEN + 357 cnp->cn_namelen + 3) &~ 3; */ 358 } 359 360 /* 361 * If there is cached information on a previous search of 362 * this directory, pick up where we last left off. 363 * We cache only lookups as these are the most common 364 * and have the greatest payoff. Caching CREATE has little 365 * benefit as it usually must search the entire directory 366 * to determine that the entry does not exist. Caching the 367 * location of the last DELETE or RENAME has not reduced 368 * profiling time and hence has been removed in the interest 369 * of simplicity. 370 */ 371 bmask = VFSTOEXT2(vdp->v_mount)->um_mountp->mnt_stat.f_iosize - 1; 372 if (nameiop != LOOKUP || dp->i_diroff == 0 || 373 dp->i_diroff > dp->i_size) { 374 entryoffsetinblock = 0; 375 dp->i_offset = 0; 376 numdirpasses = 1; 377 } else { 378 dp->i_offset = dp->i_diroff; 379 if ((entryoffsetinblock = dp->i_offset & bmask) && 380 (error = ext2_blkatoff(vdp, (off_t)dp->i_offset, NULL, 381 &bp))) 382 return (error); 383 numdirpasses = 2; 384 nchstats.ncs_2passes++; 385 } 386 prevoff = dp->i_offset; 387 endsearch = roundup(dp->i_size, DIRBLKSIZ); 388 enduseful = 0; 389 390searchloop: 391 while (dp->i_offset < endsearch) { 392 /* 393 * If necessary, get the next directory block. 394 */ 395 if ((dp->i_offset & bmask) == 0) { 396 if (bp != NULL) 397 brelse(bp); 398 if ((error = 399 ext2_blkatoff(vdp, (off_t)dp->i_offset, NULL, 400 &bp)) != 0) 401 return (error); 402 entryoffsetinblock = 0; 403 } 404 /* 405 * If still looking for a slot, and at a DIRBLKSIZE 406 * boundary, have to start looking for free space again. 407 */ 408 if (slotstatus == NONE && 409 (entryoffsetinblock & (DIRBLKSIZ - 1)) == 0) { 410 slotoffset = -1; 411 slotfreespace = 0; 412 } 413 /* 414 * Get pointer to next entry. 415 * Full validation checks are slow, so we only check 416 * enough to insure forward progress through the 417 * directory. Complete checks can be run by setting 418 * "vfs.e2fs.dirchk" to be true. 419 */ 420 ep = (struct ext2_dir_entry_2 *) 421 ((char *)bp->b_data + entryoffsetinblock); 422 if (ep->rec_len == 0 || 423 (dirchk && ext2_dirbadentry(vdp, ep, entryoffsetinblock))) { 424 int i; 425 ext2_dirbad(dp, dp->i_offset, "mangled entry"); 426 i = DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)); 427 dp->i_offset += i; 428 entryoffsetinblock += i; 429 continue; 430 } 431 432 /* 433 * If an appropriate sized slot has not yet been found, 434 * check to see if one is available. Also accumulate space 435 * in the current block so that we can determine if 436 * compaction is viable. 437 */ 438 if (slotstatus != FOUND) { 439 int size = ep->rec_len; 440 441 if (ep->inode != 0) 442 size -= EXT2_DIR_REC_LEN(ep->name_len); 443 if (size > 0) { 444 if (size >= slotneeded) { 445 slotstatus = FOUND; 446 slotoffset = dp->i_offset; 447 slotsize = ep->rec_len; 448 } else if (slotstatus == NONE) { 449 slotfreespace += size; 450 if (slotoffset == -1) 451 slotoffset = dp->i_offset; 452 if (slotfreespace >= slotneeded) { 453 slotstatus = COMPACT; 454 slotsize = dp->i_offset + 455 ep->rec_len - slotoffset; 456 } 457 } 458 } 459 } 460 461 /* 462 * Check for a name match. 463 */ 464 if (ep->inode) { 465 namlen = ep->name_len; 466 if (namlen == cnp->cn_namelen && 467 !bcmp(cnp->cn_nameptr, ep->name, 468 (unsigned)namlen)) { 469 /* 470 * Save directory entry's inode number and 471 * reclen in ndp->ni_ufs area, and release 472 * directory buffer. 473 */ 474 dp->i_ino = ep->inode; 475 dp->i_reclen = ep->rec_len; 476 goto found; 477 } 478 } 479 prevoff = dp->i_offset; 480 dp->i_offset += ep->rec_len; 481 entryoffsetinblock += ep->rec_len; 482 if (ep->inode) 483 enduseful = dp->i_offset; 484 } 485/* notfound: */ 486 /* 487 * If we started in the middle of the directory and failed 488 * to find our target, we must check the beginning as well. 489 */ 490 if (numdirpasses == 2) { 491 numdirpasses--; 492 dp->i_offset = 0; 493 endsearch = dp->i_diroff; 494 goto searchloop; 495 } 496 if (bp != NULL) 497 brelse(bp); 498 /* 499 * If creating, and at end of pathname and current 500 * directory has not been removed, then can consider 501 * allowing file to be created. 502 */ 503 if ((nameiop == CREATE || nameiop == RENAME) && 504 (flags & ISLASTCN) && dp->i_nlink != 0) { 505 /* 506 * Access for write is interpreted as allowing 507 * creation of files in the directory. 508 */ 509 if ((error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_thread)) != 0) 510 return (error); 511 /* 512 * Return an indication of where the new directory 513 * entry should be put. If we didn't find a slot, 514 * then set dp->i_count to 0 indicating 515 * that the new slot belongs at the end of the 516 * directory. If we found a slot, then the new entry 517 * can be put in the range from dp->i_offset to 518 * dp->i_offset + dp->i_count. 519 */ 520 if (slotstatus == NONE) { 521 dp->i_offset = roundup(dp->i_size, DIRBLKSIZ); 522 dp->i_count = 0; 523 enduseful = dp->i_offset; 524 } else { 525 dp->i_offset = slotoffset; 526 dp->i_count = slotsize; 527 if (enduseful < slotoffset + slotsize) 528 enduseful = slotoffset + slotsize; 529 } 530 dp->i_endoff = roundup(enduseful, DIRBLKSIZ); 531 dp->i_flag |= IN_CHANGE | IN_UPDATE; 532 /* 533 * We return with the directory locked, so that 534 * the parameters we set up above will still be 535 * valid if we actually decide to do a direnter(). 536 * We return ni_vp == NULL to indicate that the entry 537 * does not currently exist; we leave a pointer to 538 * the (locked) directory inode in ndp->ni_dvp. 539 * The pathname buffer is saved so that the name 540 * can be obtained later. 541 * 542 * NB - if the directory is unlocked, then this 543 * information cannot be used. 544 */ 545 cnp->cn_flags |= SAVENAME; 546 if (!lockparent) 547 VOP_UNLOCK(vdp, 0, td); 548 return (EJUSTRETURN); 549 } 550 /* 551 * Insert name into cache (as non-existent) if appropriate. 552 */ 553 if ((cnp->cn_flags & MAKEENTRY) && nameiop != CREATE) 554 cache_enter(vdp, *vpp, cnp); 555 return (ENOENT); 556 557found: 558 if (numdirpasses == 2) 559 nchstats.ncs_pass2++; 560 /* 561 * Check that directory length properly reflects presence 562 * of this entry. 563 */ 564 if (entryoffsetinblock + EXT2_DIR_REC_LEN(ep->name_len) 565 > dp->i_size) { 566 ext2_dirbad(dp, dp->i_offset, "i_size too small"); 567 dp->i_size = entryoffsetinblock+EXT2_DIR_REC_LEN(ep->name_len); 568 dp->i_flag |= IN_CHANGE | IN_UPDATE; 569 } 570 brelse(bp); 571 572 /* 573 * Found component in pathname. 574 * If the final component of path name, save information 575 * in the cache as to where the entry was found. 576 */ 577 if ((flags & ISLASTCN) && nameiop == LOOKUP) 578 dp->i_diroff = dp->i_offset &~ (DIRBLKSIZ - 1); 579 580 /* 581 * If deleting, and at end of pathname, return 582 * parameters which can be used to remove file. 583 * If the wantparent flag isn't set, we return only 584 * the directory (in ndp->ni_dvp), otherwise we go 585 * on and lock the inode, being careful with ".". 586 */ 587 if (nameiop == DELETE && (flags & ISLASTCN)) { 588 /* 589 * Write access to directory required to delete files. 590 */ 591 if ((error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_thread)) != 0) 592 return (error); 593 /* 594 * Return pointer to current entry in dp->i_offset, 595 * and distance past previous entry (if there 596 * is a previous entry in this block) in dp->i_count. 597 * Save directory inode pointer in ndp->ni_dvp for dirremove(). 598 */ 599 if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0) 600 dp->i_count = 0; 601 else 602 dp->i_count = dp->i_offset - prevoff; 603 if (dp->i_number == dp->i_ino) { 604 VREF(vdp); 605 *vpp = vdp; 606 return (0); 607 } 608 if ((error = VFS_VGET(vdp->v_mount, dp->i_ino, LK_EXCLUSIVE, 609 &tdp)) != 0) 610 return (error); 611 /* 612 * If directory is "sticky", then user must own 613 * the directory, or the file in it, else she 614 * may not delete it (unless she's root). This 615 * implements append-only directories. 616 */ 617 if ((dp->i_mode & ISVTX) && 618 cred->cr_uid != 0 && 619 cred->cr_uid != dp->i_uid && 620 VTOI(tdp)->i_uid != cred->cr_uid) { 621 vput(tdp); 622 return (EPERM); 623 } 624 *vpp = tdp; 625 if (!lockparent) 626 VOP_UNLOCK(vdp, 0, td); 627 return (0); 628 } 629 630 /* 631 * If rewriting (RENAME), return the inode and the 632 * information required to rewrite the present directory 633 * Must get inode of directory entry to verify it's a 634 * regular file, or empty directory. 635 */ 636 if (nameiop == RENAME && wantparent && 637 (flags & ISLASTCN)) { 638 if ((error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_thread)) != 0) 639 return (error); 640 /* 641 * Careful about locking second inode. 642 * This can only occur if the target is ".". 643 */ 644 if (dp->i_number == dp->i_ino) 645 return (EISDIR); 646 if ((error = VFS_VGET(vdp->v_mount, dp->i_ino, LK_EXCLUSIVE, 647 &tdp)) != 0) 648 return (error); 649 *vpp = tdp; 650 cnp->cn_flags |= SAVENAME; 651 if (!lockparent) 652 VOP_UNLOCK(vdp, 0, td); 653 return (0); 654 } 655 656 /* 657 * Step through the translation in the name. We do not `vput' the 658 * directory because we may need it again if a symbolic link 659 * is relative to the current directory. Instead we save it 660 * unlocked as "pdp". We must get the target inode before unlocking 661 * the directory to insure that the inode will not be removed 662 * before we get it. We prevent deadlock by always fetching 663 * inodes from the root, moving down the directory tree. Thus 664 * when following backward pointers ".." we must unlock the 665 * parent directory before getting the requested directory. 666 * There is a potential race condition here if both the current 667 * and parent directories are removed before the VFS_VGET for the 668 * inode associated with ".." returns. We hope that this occurs 669 * infrequently since we cannot avoid this race condition without 670 * implementing a sophisticated deadlock detection algorithm. 671 * Note also that this simple deadlock detection scheme will not 672 * work if the file system has any hard links other than ".." 673 * that point backwards in the directory structure. 674 */ 675 pdp = vdp; 676 if (flags & ISDOTDOT) { 677 VOP_UNLOCK(pdp, 0, td); /* race to get the inode */ 678 if ((error = VFS_VGET(vdp->v_mount, dp->i_ino, LK_EXCLUSIVE, 679 &tdp)) != 0) { 680 vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY, td); 681 return (error); 682 } 683 if (lockparent && (flags & ISLASTCN) && 684 (error = vn_lock(pdp, LK_EXCLUSIVE, td))) { 685 vput(tdp); 686 return (error); 687 } 688 *vpp = tdp; 689 } else if (dp->i_number == dp->i_ino) { 690 VREF(vdp); /* we want ourself, ie "." */ 691 *vpp = vdp; 692 } else { 693 if ((error = VFS_VGET(vdp->v_mount, dp->i_ino, LK_EXCLUSIVE, 694 &tdp)) != 0) 695 return (error); 696 if (!lockparent || !(flags & ISLASTCN)) 697 VOP_UNLOCK(pdp, 0, td); 698 *vpp = tdp; 699 } 700 701 /* 702 * Insert name into cache if appropriate. 703 */ 704 if (cnp->cn_flags & MAKEENTRY) 705 cache_enter(vdp, *vpp, cnp); 706 return (0); 707} 708 709void 710ext2_dirbad(ip, offset, how) 711 struct inode *ip; 712 doff_t offset; 713 char *how; 714{ 715 struct mount *mp; 716 717 mp = ITOV(ip)->v_mount; 718 (void)printf("%s: bad dir ino %lu at offset %ld: %s\n", 719 mp->mnt_stat.f_mntonname, (u_long)ip->i_number, (long)offset, how); 720 if ((mp->mnt_flag & MNT_RDONLY) == 0) 721 panic("ext2_dirbad: bad dir"); 722} 723 724/* 725 * Do consistency checking on a directory entry: 726 * record length must be multiple of 4 727 * entry must fit in rest of its DIRBLKSIZ block 728 * record must be large enough to contain entry 729 * name is not longer than MAXNAMLEN 730 * name must be as long as advertised, and null terminated 731 */ 732/* 733 * changed so that it confirms to ext2_check_dir_entry 734 */ 735static int 736ext2_dirbadentry(dp, de, entryoffsetinblock) 737 struct vnode *dp; 738 struct ext2_dir_entry_2 *de; 739 int entryoffsetinblock; 740{ 741 int DIRBLKSIZ = VTOI(dp)->i_e2fs->s_blocksize; 742 743 char * error_msg = NULL; 744 745 if (de->rec_len < EXT2_DIR_REC_LEN(1)) 746 error_msg = "rec_len is smaller than minimal"; 747 else if (de->rec_len % 4 != 0) 748 error_msg = "rec_len % 4 != 0"; 749 else if (de->rec_len < EXT2_DIR_REC_LEN(de->name_len)) 750 error_msg = "reclen is too small for name_len"; 751 else if (entryoffsetinblock + de->rec_len > DIRBLKSIZ) 752 error_msg = "directory entry across blocks"; 753 /* else LATER 754 if (de->inode > dir->i_sb->u.ext2_sb.s_es->s_inodes_count) 755 error_msg = "inode out of bounds"; 756 */ 757 758 if (error_msg != NULL) { 759 printf("bad directory entry: %s\n", error_msg); 760 printf("offset=%d, inode=%lu, rec_len=%u, name_len=%u\n", 761 entryoffsetinblock, (unsigned long)de->inode, 762 de->rec_len, de->name_len); 763 } 764 return error_msg == NULL ? 0 : 1; 765} 766 767/* 768 * Write a directory entry after a call to namei, using the parameters 769 * that it left in nameidata. The argument ip is the inode which the new 770 * directory entry will refer to. Dvp is a pointer to the directory to 771 * be written, which was left locked by namei. Remaining parameters 772 * (dp->i_offset, dp->i_count) indicate how the space for the new 773 * entry is to be obtained. 774 */ 775int 776ext2_direnter(ip, dvp, cnp) 777 struct inode *ip; 778 struct vnode *dvp; 779 struct componentname *cnp; 780{ 781 struct ext2_dir_entry_2 *ep, *nep; 782 struct inode *dp; 783 struct buf *bp; 784 struct ext2_dir_entry_2 newdir; 785 struct iovec aiov; 786 struct uio auio; 787 u_int dsize; 788 int error, loc, newentrysize, spacefree; 789 char *dirbuf; 790 int DIRBLKSIZ = ip->i_e2fs->s_blocksize; 791 792 793#if DIAGNOSTIC 794 if ((cnp->cn_flags & SAVENAME) == 0) 795 panic("direnter: missing name"); 796#endif 797 dp = VTOI(dvp); 798 newdir.inode = ip->i_number; 799 newdir.name_len = cnp->cn_namelen; 800 if (EXT2_HAS_INCOMPAT_FEATURE(ip->i_e2fs->s_es, 801 EXT2_FEATURE_INCOMPAT_FILETYPE)) 802 newdir.file_type = DTTOFT(IFTODT(ip->i_mode)); 803 else 804 newdir.file_type = EXT2_FT_UNKNOWN; 805 bcopy(cnp->cn_nameptr, newdir.name, (unsigned)cnp->cn_namelen + 1); 806 newentrysize = EXT2_DIR_REC_LEN(newdir.name_len); 807 if (dp->i_count == 0) { 808 /* 809 * If dp->i_count is 0, then namei could find no 810 * space in the directory. Here, dp->i_offset will 811 * be on a directory block boundary and we will write the 812 * new entry into a fresh block. 813 */ 814 if (dp->i_offset & (DIRBLKSIZ - 1)) 815 panic("ext2_direnter: newblk"); 816 auio.uio_offset = dp->i_offset; 817 newdir.rec_len = DIRBLKSIZ; 818 auio.uio_resid = newentrysize; 819 aiov.iov_len = newentrysize; 820 aiov.iov_base = (caddr_t)&newdir; 821 auio.uio_iov = &aiov; 822 auio.uio_iovcnt = 1; 823 auio.uio_rw = UIO_WRITE; 824 auio.uio_segflg = UIO_SYSSPACE; 825 auio.uio_td = (struct thread *)0; 826 error = VOP_WRITE(dvp, &auio, IO_SYNC, cnp->cn_cred); 827 if (DIRBLKSIZ > 828 VFSTOEXT2(dvp->v_mount)->um_mountp->mnt_stat.f_bsize) 829 /* XXX should grow with balloc() */ 830 panic("ext2_direnter: frag size"); 831 else if (!error) { 832 dp->i_size = roundup(dp->i_size, DIRBLKSIZ); 833 dp->i_flag |= IN_CHANGE; 834 } 835 return (error); 836 } 837 838 /* 839 * If dp->i_count is non-zero, then namei found space 840 * for the new entry in the range dp->i_offset to 841 * dp->i_offset + dp->i_count in the directory. 842 * To use this space, we may have to compact the entries located 843 * there, by copying them together towards the beginning of the 844 * block, leaving the free space in one usable chunk at the end. 845 */ 846 847 /* 848 * Increase size of directory if entry eats into new space. 849 * This should never push the size past a new multiple of 850 * DIRBLKSIZE. 851 * 852 * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN. 853 */ 854 if (dp->i_offset + dp->i_count > dp->i_size) 855 dp->i_size = dp->i_offset + dp->i_count; 856 /* 857 * Get the block containing the space for the new directory entry. 858 */ 859 if ((error = ext2_blkatoff(dvp, (off_t)dp->i_offset, &dirbuf, 860 &bp)) != 0) 861 return (error); 862 /* 863 * Find space for the new entry. In the simple case, the entry at 864 * offset base will have the space. If it does not, then namei 865 * arranged that compacting the region dp->i_offset to 866 * dp->i_offset + dp->i_count would yield the 867 * space. 868 */ 869 ep = (struct ext2_dir_entry_2 *)dirbuf; 870 dsize = EXT2_DIR_REC_LEN(ep->name_len); 871 spacefree = ep->rec_len - dsize; 872 for (loc = ep->rec_len; loc < dp->i_count; ) { 873 nep = (struct ext2_dir_entry_2 *)(dirbuf + loc); 874 if (ep->inode) { 875 /* trim the existing slot */ 876 ep->rec_len = dsize; 877 ep = (struct ext2_dir_entry_2 *)((char *)ep + dsize); 878 } else { 879 /* overwrite; nothing there; header is ours */ 880 spacefree += dsize; 881 } 882 dsize = EXT2_DIR_REC_LEN(nep->name_len); 883 spacefree += nep->rec_len - dsize; 884 loc += nep->rec_len; 885 bcopy((caddr_t)nep, (caddr_t)ep, dsize); 886 } 887 /* 888 * Update the pointer fields in the previous entry (if any), 889 * copy in the new entry, and write out the block. 890 */ 891 if (ep->inode == 0) { 892 if (spacefree + dsize < newentrysize) 893 panic("ext2_direnter: compact1"); 894 newdir.rec_len = spacefree + dsize; 895 } else { 896 if (spacefree < newentrysize) 897 panic("ext2_direnter: compact2"); 898 newdir.rec_len = spacefree; 899 ep->rec_len = dsize; 900 ep = (struct ext2_dir_entry_2 *)((char *)ep + dsize); 901 } 902 bcopy((caddr_t)&newdir, (caddr_t)ep, (u_int)newentrysize); 903 error = BUF_WRITE(bp); 904 dp->i_flag |= IN_CHANGE | IN_UPDATE; 905 if (!error && dp->i_endoff && dp->i_endoff < dp->i_size) 906 error = ext2_truncate(dvp, (off_t)dp->i_endoff, IO_SYNC, 907 cnp->cn_cred, cnp->cn_thread); 908 return (error); 909} 910 911/* 912 * Remove a directory entry after a call to namei, using 913 * the parameters which it left in nameidata. The entry 914 * dp->i_offset contains the offset into the directory of the 915 * entry to be eliminated. The dp->i_count field contains the 916 * size of the previous record in the directory. If this 917 * is 0, the first entry is being deleted, so we need only 918 * zero the inode number to mark the entry as free. If the 919 * entry is not the first in the directory, we must reclaim 920 * the space of the now empty record by adding the record size 921 * to the size of the previous entry. 922 */ 923int 924ext2_dirremove(dvp, cnp) 925 struct vnode *dvp; 926 struct componentname *cnp; 927{ 928 struct inode *dp; 929 struct ext2_dir_entry_2 *ep; 930 struct buf *bp; 931 int error; 932 933 dp = VTOI(dvp); 934 if (dp->i_count == 0) { 935 /* 936 * First entry in block: set d_ino to zero. 937 */ 938 if ((error = 939 ext2_blkatoff(dvp, (off_t)dp->i_offset, (char **)&ep, 940 &bp)) != 0) 941 return (error); 942 ep->inode = 0; 943 error = BUF_WRITE(bp); 944 dp->i_flag |= IN_CHANGE | IN_UPDATE; 945 return (error); 946 } 947 /* 948 * Collapse new free space into previous entry. 949 */ 950 if ((error = ext2_blkatoff(dvp, (off_t)(dp->i_offset - dp->i_count), 951 (char **)&ep, &bp)) != 0) 952 return (error); 953 ep->rec_len += dp->i_reclen; 954 error = BUF_WRITE(bp); 955 dp->i_flag |= IN_CHANGE | IN_UPDATE; 956 return (error); 957} 958 959/* 960 * Rewrite an existing directory entry to point at the inode 961 * supplied. The parameters describing the directory entry are 962 * set up by a call to namei. 963 */ 964int 965ext2_dirrewrite(dp, ip, cnp) 966 struct inode *dp, *ip; 967 struct componentname *cnp; 968{ 969 struct buf *bp; 970 struct ext2_dir_entry_2 *ep; 971 struct vnode *vdp = ITOV(dp); 972 int error; 973 974 if ((error = ext2_blkatoff(vdp, (off_t)dp->i_offset, (char **)&ep, 975 &bp)) != 0) 976 return (error); 977 ep->inode = ip->i_number; 978 if (EXT2_HAS_INCOMPAT_FEATURE(ip->i_e2fs->s_es, 979 EXT2_FEATURE_INCOMPAT_FILETYPE)) 980 ep->file_type = DTTOFT(IFTODT(ip->i_mode)); 981 else 982 ep->file_type = EXT2_FT_UNKNOWN; 983 error = BUF_WRITE(bp); 984 dp->i_flag |= IN_CHANGE | IN_UPDATE; 985 return (error); 986} 987 988/* 989 * Check if a directory is empty or not. 990 * Inode supplied must be locked. 991 * 992 * Using a struct dirtemplate here is not precisely 993 * what we want, but better than using a struct direct. 994 * 995 * NB: does not handle corrupted directories. 996 */ 997int 998ext2_dirempty(ip, parentino, cred) 999 struct inode *ip; 1000 ino_t parentino; 1001 struct ucred *cred; 1002{ 1003 off_t off; 1004 struct dirtemplate dbuf; 1005 struct ext2_dir_entry_2 *dp = (struct ext2_dir_entry_2 *)&dbuf; 1006 int error, count, namlen; 1007 1008#define MINDIRSIZ (sizeof (struct dirtemplate) / 2) 1009 1010 for (off = 0; off < ip->i_size; off += dp->rec_len) { 1011 error = vn_rdwr(UIO_READ, ITOV(ip), (caddr_t)dp, MINDIRSIZ, 1012 off, UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK, cred, 1013 NOCRED, &count, (struct thread *)0); 1014 /* 1015 * Since we read MINDIRSIZ, residual must 1016 * be 0 unless we're at end of file. 1017 */ 1018 if (error || count != 0) 1019 return (0); 1020 /* avoid infinite loops */ 1021 if (dp->rec_len == 0) 1022 return (0); 1023 /* skip empty entries */ 1024 if (dp->inode == 0) 1025 continue; 1026 /* accept only "." and ".." */ 1027 namlen = dp->name_len; 1028 if (namlen > 2) 1029 return (0); 1030 if (dp->name[0] != '.') 1031 return (0); 1032 /* 1033 * At this point namlen must be 1 or 2. 1034 * 1 implies ".", 2 implies ".." if second 1035 * char is also "." 1036 */ 1037 if (namlen == 1) 1038 continue; 1039 if (dp->name[1] == '.' && dp->inode == parentino) 1040 continue; 1041 return (0); 1042 } 1043 return (1); 1044} 1045 1046/* 1047 * Check if source directory is in the path of the target directory. 1048 * Target is supplied locked, source is unlocked. 1049 * The target is always vput before returning. 1050 */ 1051int 1052ext2_checkpath(source, target, cred) 1053 struct inode *source, *target; 1054 struct ucred *cred; 1055{ 1056 struct vnode *vp; 1057 int error, rootino, namlen; 1058 struct dirtemplate dirbuf; 1059 1060 vp = ITOV(target); 1061 if (target->i_number == source->i_number) { 1062 error = EEXIST; 1063 goto out; 1064 } 1065 rootino = ROOTINO; 1066 error = 0; 1067 if (target->i_number == rootino) 1068 goto out; 1069 1070 for (;;) { 1071 if (vp->v_type != VDIR) { 1072 error = ENOTDIR; 1073 break; 1074 } 1075 error = vn_rdwr(UIO_READ, vp, (caddr_t)&dirbuf, 1076 sizeof (struct dirtemplate), (off_t)0, UIO_SYSSPACE, 1077 IO_NODELOCKED | IO_NOMACCHECK, cred, NOCRED, (int *)0, 1078 (struct thread *)0); 1079 if (error != 0) 1080 break; 1081 namlen = dirbuf.dotdot_type; /* like ufs little-endian */ 1082 if (namlen != 2 || 1083 dirbuf.dotdot_name[0] != '.' || 1084 dirbuf.dotdot_name[1] != '.') { 1085 error = ENOTDIR; 1086 break; 1087 } 1088 if (dirbuf.dotdot_ino == source->i_number) { 1089 error = EINVAL; 1090 break; 1091 } 1092 if (dirbuf.dotdot_ino == rootino) 1093 break; 1094 vput(vp); 1095 if ((error = VFS_VGET(vp->v_mount, dirbuf.dotdot_ino, 1096 LK_EXCLUSIVE, &vp)) != 0) { 1097 vp = NULL; 1098 break; 1099 } 1100 } 1101 1102out: 1103 if (error == ENOTDIR) 1104 printf("checkpath: .. not a directory\n"); 1105 if (vp != NULL) 1106 vput(vp); 1107 return (error); 1108} 1109