vfs_lookup.c revision 193028
1/*- 2 * Copyright (c) 1982, 1986, 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_lookup.c 8.4 (Berkeley) 2/16/94 35 */ 36 37#include <sys/cdefs.h> 38__FBSDID("$FreeBSD: head/sys/kern/vfs_lookup.c 193028 2009-05-29 10:02:44Z des $"); 39 40#include "opt_kdtrace.h" 41#include "opt_ktrace.h" 42#include "opt_mac.h" 43 44#include <sys/param.h> 45#include <sys/systm.h> 46#include <sys/kernel.h> 47#include <sys/fcntl.h> 48#include <sys/jail.h> 49#include <sys/lock.h> 50#include <sys/mutex.h> 51#include <sys/namei.h> 52#include <sys/vnode.h> 53#include <sys/mount.h> 54#include <sys/filedesc.h> 55#include <sys/proc.h> 56#include <sys/sdt.h> 57#include <sys/syscallsubr.h> 58#include <sys/sysctl.h> 59#ifdef KTRACE 60#include <sys/ktrace.h> 61#endif 62 63#include <security/audit/audit.h> 64#include <security/mac/mac_framework.h> 65 66#include <vm/uma.h> 67 68#define NAMEI_DIAGNOSTIC 1 69#undef NAMEI_DIAGNOSTIC 70 71SDT_PROVIDER_DECLARE(vfs); 72SDT_PROBE_DEFINE3(vfs, namei, lookup, entry, "struct vnode *", "char *", 73 "unsigned long"); 74SDT_PROBE_DEFINE2(vfs, namei, lookup, return, "int", "struct vnode *"); 75 76/* 77 * Allocation zone for namei 78 */ 79uma_zone_t namei_zone; 80/* 81 * Placeholder vnode for mp traversal 82 */ 83static struct vnode *vp_crossmp; 84 85static void 86nameiinit(void *dummy __unused) 87{ 88 int error; 89 90 namei_zone = uma_zcreate("NAMEI", MAXPATHLEN, NULL, NULL, NULL, NULL, 91 UMA_ALIGN_PTR, 0); 92 error = getnewvnode("crossmp", NULL, &dead_vnodeops, &vp_crossmp); 93 if (error != 0) 94 panic("nameiinit: getnewvnode"); 95 VN_LOCK_ASHARE(vp_crossmp); 96} 97SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nameiinit, NULL); 98 99static int lookup_shared = 1; 100SYSCTL_INT(_vfs, OID_AUTO, lookup_shared, CTLFLAG_RW, &lookup_shared, 0, 101 "Enables/Disables shared locks for path name translation"); 102TUNABLE_INT("vfs.lookup_shared", &lookup_shared); 103 104/* 105 * Convert a pathname into a pointer to a locked vnode. 106 * 107 * The FOLLOW flag is set when symbolic links are to be followed 108 * when they occur at the end of the name translation process. 109 * Symbolic links are always followed for all other pathname 110 * components other than the last. 111 * 112 * The segflg defines whether the name is to be copied from user 113 * space or kernel space. 114 * 115 * Overall outline of namei: 116 * 117 * copy in name 118 * get starting directory 119 * while (!done && !error) { 120 * call lookup to search path. 121 * if symbolic link, massage name in buffer and continue 122 * } 123 */ 124int 125namei(struct nameidata *ndp) 126{ 127 struct filedesc *fdp; /* pointer to file descriptor state */ 128 char *cp; /* pointer into pathname argument */ 129 struct vnode *dp; /* the directory we are searching */ 130 struct iovec aiov; /* uio for reading symbolic links */ 131 struct uio auio; 132 int error, linklen; 133 struct componentname *cnp = &ndp->ni_cnd; 134 struct thread *td = cnp->cn_thread; 135 struct proc *p = td->td_proc; 136 int vfslocked; 137 138 KASSERT((cnp->cn_flags & MPSAFE) != 0 || mtx_owned(&Giant) != 0, 139 ("NOT MPSAFE and Giant not held")); 140 ndp->ni_cnd.cn_cred = ndp->ni_cnd.cn_thread->td_ucred; 141 KASSERT(cnp->cn_cred && p, ("namei: bad cred/proc")); 142 KASSERT((cnp->cn_nameiop & (~OPMASK)) == 0, 143 ("namei: nameiop contaminated with flags")); 144 KASSERT((cnp->cn_flags & OPMASK) == 0, 145 ("namei: flags contaminated with nameiops")); 146 if (!lookup_shared) 147 cnp->cn_flags &= ~LOCKSHARED; 148 fdp = p->p_fd; 149 150 /* We will set this ourselves if we need it. */ 151 cnp->cn_flags &= ~TRAILINGSLASH; 152 153 /* 154 * Get a buffer for the name to be translated, and copy the 155 * name into the buffer. 156 */ 157 if ((cnp->cn_flags & HASBUF) == 0) 158 cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK); 159 if (ndp->ni_segflg == UIO_SYSSPACE) 160 error = copystr(ndp->ni_dirp, cnp->cn_pnbuf, 161 MAXPATHLEN, (size_t *)&ndp->ni_pathlen); 162 else 163 error = copyinstr(ndp->ni_dirp, cnp->cn_pnbuf, 164 MAXPATHLEN, (size_t *)&ndp->ni_pathlen); 165 166 /* If we are auditing the kernel pathname, save the user pathname. */ 167 if (cnp->cn_flags & AUDITVNODE1) 168 AUDIT_ARG(upath, td, cnp->cn_pnbuf, ARG_UPATH1); 169 if (cnp->cn_flags & AUDITVNODE2) 170 AUDIT_ARG(upath, td, cnp->cn_pnbuf, ARG_UPATH2); 171 172 /* 173 * Don't allow empty pathnames. 174 */ 175 if (!error && *cnp->cn_pnbuf == '\0') 176 error = ENOENT; 177 178 if (error) { 179 uma_zfree(namei_zone, cnp->cn_pnbuf); 180#ifdef DIAGNOSTIC 181 cnp->cn_pnbuf = NULL; 182 cnp->cn_nameptr = NULL; 183#endif 184 ndp->ni_vp = NULL; 185 return (error); 186 } 187 ndp->ni_loopcnt = 0; 188#ifdef KTRACE 189 if (KTRPOINT(td, KTR_NAMEI)) { 190 KASSERT(cnp->cn_thread == curthread, 191 ("namei not using curthread")); 192 ktrnamei(cnp->cn_pnbuf); 193 } 194#endif 195 /* 196 * Get starting point for the translation. 197 */ 198 FILEDESC_SLOCK(fdp); 199 ndp->ni_rootdir = fdp->fd_rdir; 200 ndp->ni_topdir = fdp->fd_jdir; 201 202 dp = NULL; 203 if (cnp->cn_pnbuf[0] != '/') { 204 if (ndp->ni_startdir != NULL) { 205 dp = ndp->ni_startdir; 206 error = 0; 207 } else if (ndp->ni_dirfd != AT_FDCWD) 208 error = fgetvp(td, ndp->ni_dirfd, &dp); 209 if (error != 0 || dp != NULL) { 210 FILEDESC_SUNLOCK(fdp); 211 if (error == 0 && dp->v_type != VDIR) { 212 vfslocked = VFS_LOCK_GIANT(dp->v_mount); 213 vrele(dp); 214 VFS_UNLOCK_GIANT(vfslocked); 215 error = ENOTDIR; 216 } 217 } 218 if (error) { 219 uma_zfree(namei_zone, cnp->cn_pnbuf); 220#ifdef DIAGNOSTIC 221 cnp->cn_pnbuf = NULL; 222 cnp->cn_nameptr = NULL; 223#endif 224 return (error); 225 } 226 } 227 if (dp == NULL) { 228 dp = fdp->fd_cdir; 229 VREF(dp); 230 FILEDESC_SUNLOCK(fdp); 231 if (ndp->ni_startdir != NULL) { 232 vfslocked = VFS_LOCK_GIANT(ndp->ni_startdir->v_mount); 233 vrele(ndp->ni_startdir); 234 VFS_UNLOCK_GIANT(vfslocked); 235 } 236 } 237 SDT_PROBE(vfs, namei, lookup, entry, dp, cnp->cn_pnbuf, 238 cnp->cn_flags, 0, 0); 239 vfslocked = VFS_LOCK_GIANT(dp->v_mount); 240 for (;;) { 241 /* 242 * Check if root directory should replace current directory. 243 * Done at start of translation and after symbolic link. 244 */ 245 cnp->cn_nameptr = cnp->cn_pnbuf; 246 if (*(cnp->cn_nameptr) == '/') { 247 vrele(dp); 248 VFS_UNLOCK_GIANT(vfslocked); 249 while (*(cnp->cn_nameptr) == '/') { 250 cnp->cn_nameptr++; 251 ndp->ni_pathlen--; 252 } 253 dp = ndp->ni_rootdir; 254 vfslocked = VFS_LOCK_GIANT(dp->v_mount); 255 VREF(dp); 256 } 257 if (vfslocked) 258 ndp->ni_cnd.cn_flags |= GIANTHELD; 259 ndp->ni_startdir = dp; 260 error = lookup(ndp); 261 if (error) { 262 uma_zfree(namei_zone, cnp->cn_pnbuf); 263#ifdef DIAGNOSTIC 264 cnp->cn_pnbuf = NULL; 265 cnp->cn_nameptr = NULL; 266#endif 267 SDT_PROBE(vfs, namei, lookup, return, error, NULL, 0, 268 0, 0); 269 return (error); 270 } 271 vfslocked = (ndp->ni_cnd.cn_flags & GIANTHELD) != 0; 272 ndp->ni_cnd.cn_flags &= ~GIANTHELD; 273 /* 274 * If not a symbolic link, we're done. 275 */ 276 if ((cnp->cn_flags & ISSYMLINK) == 0) { 277 if ((cnp->cn_flags & (SAVENAME | SAVESTART)) == 0) { 278 uma_zfree(namei_zone, cnp->cn_pnbuf); 279#ifdef DIAGNOSTIC 280 cnp->cn_pnbuf = NULL; 281 cnp->cn_nameptr = NULL; 282#endif 283 } else 284 cnp->cn_flags |= HASBUF; 285 286 if ((cnp->cn_flags & MPSAFE) == 0) { 287 VFS_UNLOCK_GIANT(vfslocked); 288 } else if (vfslocked) 289 ndp->ni_cnd.cn_flags |= GIANTHELD; 290 SDT_PROBE(vfs, namei, lookup, return, 0, ndp->ni_vp, 291 0, 0, 0); 292 return (0); 293 } 294 if (ndp->ni_loopcnt++ >= MAXSYMLINKS) { 295 error = ELOOP; 296 break; 297 } 298#ifdef MAC 299 if ((cnp->cn_flags & NOMACCHECK) == 0) { 300 error = mac_vnode_check_readlink(td->td_ucred, 301 ndp->ni_vp); 302 if (error) 303 break; 304 } 305#endif 306 if (ndp->ni_pathlen > 1) 307 cp = uma_zalloc(namei_zone, M_WAITOK); 308 else 309 cp = cnp->cn_pnbuf; 310 aiov.iov_base = cp; 311 aiov.iov_len = MAXPATHLEN; 312 auio.uio_iov = &aiov; 313 auio.uio_iovcnt = 1; 314 auio.uio_offset = 0; 315 auio.uio_rw = UIO_READ; 316 auio.uio_segflg = UIO_SYSSPACE; 317 auio.uio_td = (struct thread *)0; 318 auio.uio_resid = MAXPATHLEN; 319 error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred); 320 if (error) { 321 if (ndp->ni_pathlen > 1) 322 uma_zfree(namei_zone, cp); 323 break; 324 } 325 linklen = MAXPATHLEN - auio.uio_resid; 326 if (linklen == 0) { 327 if (ndp->ni_pathlen > 1) 328 uma_zfree(namei_zone, cp); 329 error = ENOENT; 330 break; 331 } 332 if (linklen + ndp->ni_pathlen >= MAXPATHLEN) { 333 if (ndp->ni_pathlen > 1) 334 uma_zfree(namei_zone, cp); 335 error = ENAMETOOLONG; 336 break; 337 } 338 if (ndp->ni_pathlen > 1) { 339 bcopy(ndp->ni_next, cp + linklen, ndp->ni_pathlen); 340 uma_zfree(namei_zone, cnp->cn_pnbuf); 341 cnp->cn_pnbuf = cp; 342 } else 343 cnp->cn_pnbuf[linklen] = '\0'; 344 ndp->ni_pathlen += linklen; 345 vput(ndp->ni_vp); 346 dp = ndp->ni_dvp; 347 } 348 uma_zfree(namei_zone, cnp->cn_pnbuf); 349#ifdef DIAGNOSTIC 350 cnp->cn_pnbuf = NULL; 351 cnp->cn_nameptr = NULL; 352#endif 353 vput(ndp->ni_vp); 354 ndp->ni_vp = NULL; 355 vrele(ndp->ni_dvp); 356 VFS_UNLOCK_GIANT(vfslocked); 357 SDT_PROBE(vfs, namei, lookup, return, error, NULL, 0, 0, 0); 358 return (error); 359} 360 361static int 362compute_cn_lkflags(struct mount *mp, int lkflags) 363{ 364 365 if (mp == NULL || 366 ((lkflags & LK_SHARED) && !(mp->mnt_kern_flag & MNTK_LOOKUP_SHARED))) { 367 lkflags &= ~LK_SHARED; 368 lkflags |= LK_EXCLUSIVE; 369 } 370 return (lkflags); 371} 372 373static __inline int 374needs_exclusive_leaf(struct mount *mp, int flags) 375{ 376 377 /* 378 * Intermediate nodes can use shared locks, we only need to 379 * force an exclusive lock for leaf nodes. 380 */ 381 if ((flags & (ISLASTCN | LOCKLEAF)) != (ISLASTCN | LOCKLEAF)) 382 return (0); 383 384 /* Always use exclusive locks if LOCKSHARED isn't set. */ 385 if (!(flags & LOCKSHARED)) 386 return (1); 387 388 /* 389 * For lookups during open(), if the mount point supports 390 * extended shared operations, then use a shared lock for the 391 * leaf node, otherwise use an exclusive lock. 392 */ 393 if (flags & ISOPEN) { 394 if (mp != NULL && 395 (mp->mnt_kern_flag & MNTK_EXTENDED_SHARED)) 396 return (0); 397 else 398 return (1); 399 } 400 401 /* 402 * Lookup requests outside of open() that specify LOCKSHARED 403 * only need a shared lock on the leaf vnode. 404 */ 405 return (0); 406} 407 408/* 409 * Search a pathname. 410 * This is a very central and rather complicated routine. 411 * 412 * The pathname is pointed to by ni_ptr and is of length ni_pathlen. 413 * The starting directory is taken from ni_startdir. The pathname is 414 * descended until done, or a symbolic link is encountered. The variable 415 * ni_more is clear if the path is completed; it is set to one if a 416 * symbolic link needing interpretation is encountered. 417 * 418 * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on 419 * whether the name is to be looked up, created, renamed, or deleted. 420 * When CREATE, RENAME, or DELETE is specified, information usable in 421 * creating, renaming, or deleting a directory entry may be calculated. 422 * If flag has LOCKPARENT or'ed into it, the parent directory is returned 423 * locked. If flag has WANTPARENT or'ed into it, the parent directory is 424 * returned unlocked. Otherwise the parent directory is not returned. If 425 * the target of the pathname exists and LOCKLEAF is or'ed into the flag 426 * the target is returned locked, otherwise it is returned unlocked. 427 * When creating or renaming and LOCKPARENT is specified, the target may not 428 * be ".". When deleting and LOCKPARENT is specified, the target may be ".". 429 * 430 * Overall outline of lookup: 431 * 432 * dirloop: 433 * identify next component of name at ndp->ni_ptr 434 * handle degenerate case where name is null string 435 * if .. and crossing mount points and on mounted filesys, find parent 436 * call VOP_LOOKUP routine for next component name 437 * directory vnode returned in ni_dvp, unlocked unless LOCKPARENT set 438 * component vnode returned in ni_vp (if it exists), locked. 439 * if result vnode is mounted on and crossing mount points, 440 * find mounted on vnode 441 * if more components of name, do next level at dirloop 442 * return the answer in ni_vp, locked if LOCKLEAF set 443 * if LOCKPARENT set, return locked parent in ni_dvp 444 * if WANTPARENT set, return unlocked parent in ni_dvp 445 */ 446int 447lookup(struct nameidata *ndp) 448{ 449 char *cp; /* pointer into pathname argument */ 450 struct vnode *dp = 0; /* the directory we are searching */ 451 struct vnode *tdp; /* saved dp */ 452 struct mount *mp; /* mount table entry */ 453 struct prison *pr; 454 int docache; /* == 0 do not cache last component */ 455 int wantparent; /* 1 => wantparent or lockparent flag */ 456 int rdonly; /* lookup read-only flag bit */ 457 int trailing_slash; 458 int error = 0; 459 int dpunlocked = 0; /* dp has already been unlocked */ 460 struct componentname *cnp = &ndp->ni_cnd; 461 int vfslocked; /* VFS Giant state for child */ 462 int dvfslocked; /* VFS Giant state for parent */ 463 int tvfslocked; 464 int lkflags_save; 465#ifdef AUDIT 466 struct thread *td = curthread; 467#endif 468 469 /* 470 * Setup: break out flag bits into variables. 471 */ 472 dvfslocked = (ndp->ni_cnd.cn_flags & GIANTHELD) != 0; 473 vfslocked = 0; 474 ndp->ni_cnd.cn_flags &= ~GIANTHELD; 475 wantparent = cnp->cn_flags & (LOCKPARENT | WANTPARENT); 476 KASSERT(cnp->cn_nameiop == LOOKUP || wantparent, 477 ("CREATE, DELETE, RENAME require LOCKPARENT or WANTPARENT.")); 478 docache = (cnp->cn_flags & NOCACHE) ^ NOCACHE; 479 if (cnp->cn_nameiop == DELETE || 480 (wantparent && cnp->cn_nameiop != CREATE && 481 cnp->cn_nameiop != LOOKUP)) 482 docache = 0; 483 rdonly = cnp->cn_flags & RDONLY; 484 cnp->cn_flags &= ~ISSYMLINK; 485 ndp->ni_dvp = NULL; 486 /* 487 * We use shared locks until we hit the parent of the last cn then 488 * we adjust based on the requesting flags. 489 */ 490 if (lookup_shared) 491 cnp->cn_lkflags = LK_SHARED; 492 else 493 cnp->cn_lkflags = LK_EXCLUSIVE; 494 dp = ndp->ni_startdir; 495 ndp->ni_startdir = NULLVP; 496 vn_lock(dp, 497 compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | LK_RETRY)); 498 499dirloop: 500 /* 501 * Search a new directory. 502 * 503 * The last component of the filename is left accessible via 504 * cnp->cn_nameptr for callers that need the name. Callers needing 505 * the name set the SAVENAME flag. When done, they assume 506 * responsibility for freeing the pathname buffer. 507 */ 508 cnp->cn_consume = 0; 509 for (cp = cnp->cn_nameptr; *cp != 0 && *cp != '/'; cp++) 510 continue; 511 cnp->cn_namelen = cp - cnp->cn_nameptr; 512 if (cnp->cn_namelen > NAME_MAX) { 513 error = ENAMETOOLONG; 514 goto bad; 515 } 516#ifdef NAMEI_DIAGNOSTIC 517 { char c = *cp; 518 *cp = '\0'; 519 printf("{%s}: ", cnp->cn_nameptr); 520 *cp = c; } 521#endif 522 ndp->ni_pathlen -= cnp->cn_namelen; 523 ndp->ni_next = cp; 524 525 /* 526 * Replace multiple slashes by a single slash and trailing slashes 527 * by a null. This must be done before VOP_LOOKUP() because some 528 * fs's don't know about trailing slashes. Remember if there were 529 * trailing slashes to handle symlinks, existing non-directories 530 * and non-existing files that won't be directories specially later. 531 */ 532 trailing_slash = 0; 533 while (*cp == '/' && (cp[1] == '/' || cp[1] == '\0')) { 534 cp++; 535 ndp->ni_pathlen--; 536 if (*cp == '\0') { 537 trailing_slash = 1; 538 *ndp->ni_next = '\0'; /* XXX for direnter() ... */ 539 cnp->cn_flags |= TRAILINGSLASH; 540 } 541 } 542 ndp->ni_next = cp; 543 544 cnp->cn_flags |= MAKEENTRY; 545 if (*cp == '\0' && docache == 0) 546 cnp->cn_flags &= ~MAKEENTRY; 547 if (cnp->cn_namelen == 2 && 548 cnp->cn_nameptr[1] == '.' && cnp->cn_nameptr[0] == '.') 549 cnp->cn_flags |= ISDOTDOT; 550 else 551 cnp->cn_flags &= ~ISDOTDOT; 552 if (*ndp->ni_next == 0) 553 cnp->cn_flags |= ISLASTCN; 554 else 555 cnp->cn_flags &= ~ISLASTCN; 556 557 558 /* 559 * Check for degenerate name (e.g. / or "") 560 * which is a way of talking about a directory, 561 * e.g. like "/." or ".". 562 */ 563 if (cnp->cn_nameptr[0] == '\0') { 564 if (dp->v_type != VDIR) { 565 error = ENOTDIR; 566 goto bad; 567 } 568 if (cnp->cn_nameiop != LOOKUP) { 569 error = EISDIR; 570 goto bad; 571 } 572 if (wantparent) { 573 ndp->ni_dvp = dp; 574 VREF(dp); 575 } 576 ndp->ni_vp = dp; 577 578 if (cnp->cn_flags & AUDITVNODE1) 579 AUDIT_ARG(vnode, dp, ARG_VNODE1); 580 else if (cnp->cn_flags & AUDITVNODE2) 581 AUDIT_ARG(vnode, dp, ARG_VNODE2); 582 583 if (!(cnp->cn_flags & (LOCKPARENT | LOCKLEAF))) 584 VOP_UNLOCK(dp, 0); 585 /* XXX This should probably move to the top of function. */ 586 if (cnp->cn_flags & SAVESTART) 587 panic("lookup: SAVESTART"); 588 goto success; 589 } 590 591 /* 592 * Handle "..": four special cases. 593 * 1. Return an error if this is the last component of 594 * the name and the operation is DELETE or RENAME. 595 * 2. If at root directory (e.g. after chroot) 596 * or at absolute root directory 597 * then ignore it so can't get out. 598 * 3. If this vnode is the root of a mounted 599 * filesystem, then replace it with the 600 * vnode which was mounted on so we take the 601 * .. in the other filesystem. 602 * 4. If the vnode is the top directory of 603 * the jail or chroot, don't let them out. 604 */ 605 if (cnp->cn_flags & ISDOTDOT) { 606 if ((cnp->cn_flags & ISLASTCN) != 0 && 607 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { 608 error = EINVAL; 609 goto bad; 610 } 611 for (;;) { 612 for (pr = cnp->cn_cred->cr_prison; pr != NULL; 613 pr = pr->pr_parent) 614 if (dp == pr->pr_root) 615 break; 616 if (dp == ndp->ni_rootdir || 617 dp == ndp->ni_topdir || 618 dp == rootvnode || 619 pr != NULL || 620 ((dp->v_vflag & VV_ROOT) != 0 && 621 (cnp->cn_flags & NOCROSSMOUNT) != 0)) { 622 ndp->ni_dvp = dp; 623 ndp->ni_vp = dp; 624 vfslocked = VFS_LOCK_GIANT(dp->v_mount); 625 VREF(dp); 626 goto nextname; 627 } 628 if ((dp->v_vflag & VV_ROOT) == 0) 629 break; 630 if (dp->v_iflag & VI_DOOMED) { /* forced unmount */ 631 error = ENOENT; 632 goto bad; 633 } 634 tdp = dp; 635 dp = dp->v_mount->mnt_vnodecovered; 636 tvfslocked = dvfslocked; 637 dvfslocked = VFS_LOCK_GIANT(dp->v_mount); 638 VREF(dp); 639 vput(tdp); 640 VFS_UNLOCK_GIANT(tvfslocked); 641 vn_lock(dp, 642 compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | 643 LK_RETRY)); 644 } 645 } 646 647 /* 648 * We now have a segment name to search for, and a directory to search. 649 */ 650unionlookup: 651#ifdef MAC 652 if ((cnp->cn_flags & NOMACCHECK) == 0) { 653 error = mac_vnode_check_lookup(cnp->cn_thread->td_ucred, dp, 654 cnp); 655 if (error) 656 goto bad; 657 } 658#endif 659 ndp->ni_dvp = dp; 660 ndp->ni_vp = NULL; 661 ASSERT_VOP_LOCKED(dp, "lookup"); 662 VNASSERT(vfslocked == 0, dp, ("lookup: vfslocked %d", vfslocked)); 663 /* 664 * If we have a shared lock we may need to upgrade the lock for the 665 * last operation. 666 */ 667 if (dp != vp_crossmp && 668 VOP_ISLOCKED(dp) == LK_SHARED && 669 (cnp->cn_flags & ISLASTCN) && (cnp->cn_flags & LOCKPARENT)) 670 vn_lock(dp, LK_UPGRADE|LK_RETRY); 671 /* 672 * If we're looking up the last component and we need an exclusive 673 * lock, adjust our lkflags. 674 */ 675 if (needs_exclusive_leaf(dp->v_mount, cnp->cn_flags)) 676 cnp->cn_lkflags = LK_EXCLUSIVE; 677#ifdef NAMEI_DIAGNOSTIC 678 vprint("lookup in", dp); 679#endif 680 lkflags_save = cnp->cn_lkflags; 681 cnp->cn_lkflags = compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags); 682 if ((error = VOP_LOOKUP(dp, &ndp->ni_vp, cnp)) != 0) { 683 cnp->cn_lkflags = lkflags_save; 684 KASSERT(ndp->ni_vp == NULL, ("leaf should be empty")); 685#ifdef NAMEI_DIAGNOSTIC 686 printf("not found\n"); 687#endif 688 if ((error == ENOENT) && 689 (dp->v_vflag & VV_ROOT) && (dp->v_mount != NULL) && 690 (dp->v_mount->mnt_flag & MNT_UNION)) { 691 tdp = dp; 692 dp = dp->v_mount->mnt_vnodecovered; 693 tvfslocked = dvfslocked; 694 dvfslocked = VFS_LOCK_GIANT(dp->v_mount); 695 VREF(dp); 696 vput(tdp); 697 VFS_UNLOCK_GIANT(tvfslocked); 698 vn_lock(dp, 699 compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | 700 LK_RETRY)); 701 goto unionlookup; 702 } 703 704 if (error != EJUSTRETURN) 705 goto bad; 706 /* 707 * If creating and at end of pathname, then can consider 708 * allowing file to be created. 709 */ 710 if (rdonly) { 711 error = EROFS; 712 goto bad; 713 } 714 if (*cp == '\0' && trailing_slash && 715 !(cnp->cn_flags & WILLBEDIR)) { 716 error = ENOENT; 717 goto bad; 718 } 719 if ((cnp->cn_flags & LOCKPARENT) == 0) 720 VOP_UNLOCK(dp, 0); 721 /* 722 * This is a temporary assert to make sure I know what the 723 * behavior here was. 724 */ 725 KASSERT((cnp->cn_flags & (WANTPARENT|LOCKPARENT)) != 0, 726 ("lookup: Unhandled case.")); 727 /* 728 * We return with ni_vp NULL to indicate that the entry 729 * doesn't currently exist, leaving a pointer to the 730 * (possibly locked) directory vnode in ndp->ni_dvp. 731 */ 732 if (cnp->cn_flags & SAVESTART) { 733 ndp->ni_startdir = ndp->ni_dvp; 734 VREF(ndp->ni_startdir); 735 } 736 goto success; 737 } else 738 cnp->cn_lkflags = lkflags_save; 739#ifdef NAMEI_DIAGNOSTIC 740 printf("found\n"); 741#endif 742 /* 743 * Take into account any additional components consumed by 744 * the underlying filesystem. 745 */ 746 if (cnp->cn_consume > 0) { 747 cnp->cn_nameptr += cnp->cn_consume; 748 ndp->ni_next += cnp->cn_consume; 749 ndp->ni_pathlen -= cnp->cn_consume; 750 cnp->cn_consume = 0; 751 } 752 753 dp = ndp->ni_vp; 754 vfslocked = VFS_LOCK_GIANT(dp->v_mount); 755 756 /* 757 * Check to see if the vnode has been mounted on; 758 * if so find the root of the mounted filesystem. 759 */ 760 while (dp->v_type == VDIR && (mp = dp->v_mountedhere) && 761 (cnp->cn_flags & NOCROSSMOUNT) == 0) { 762 if (vfs_busy(mp, 0)) 763 continue; 764 vput(dp); 765 VFS_UNLOCK_GIANT(vfslocked); 766 vfslocked = VFS_LOCK_GIANT(mp); 767 if (dp != ndp->ni_dvp) 768 vput(ndp->ni_dvp); 769 else 770 vrele(ndp->ni_dvp); 771 VFS_UNLOCK_GIANT(dvfslocked); 772 dvfslocked = 0; 773 vref(vp_crossmp); 774 ndp->ni_dvp = vp_crossmp; 775 error = VFS_ROOT(mp, compute_cn_lkflags(mp, cnp->cn_lkflags), 776 &tdp); 777 vfs_unbusy(mp); 778 if (vn_lock(vp_crossmp, LK_SHARED | LK_NOWAIT)) 779 panic("vp_crossmp exclusively locked or reclaimed"); 780 if (error) { 781 dpunlocked = 1; 782 goto bad2; 783 } 784 ndp->ni_vp = dp = tdp; 785 } 786 787 /* 788 * Check for symbolic link 789 */ 790 if ((dp->v_type == VLNK) && 791 ((cnp->cn_flags & FOLLOW) || trailing_slash || 792 *ndp->ni_next == '/')) { 793 cnp->cn_flags |= ISSYMLINK; 794 if (dp->v_iflag & VI_DOOMED) { 795 /* 796 * We can't know whether the directory was mounted with 797 * NOSYMFOLLOW, so we can't follow safely. 798 */ 799 error = ENOENT; 800 goto bad2; 801 } 802 if (dp->v_mount->mnt_flag & MNT_NOSYMFOLLOW) { 803 error = EACCES; 804 goto bad2; 805 } 806 /* 807 * Symlink code always expects an unlocked dvp. 808 */ 809 if (ndp->ni_dvp != ndp->ni_vp) 810 VOP_UNLOCK(ndp->ni_dvp, 0); 811 goto success; 812 } 813 814nextname: 815 /* 816 * Not a symbolic link. If more pathname, 817 * continue at next component, else return. 818 */ 819 KASSERT((cnp->cn_flags & ISLASTCN) || *ndp->ni_next == '/', 820 ("lookup: invalid path state.")); 821 if (*ndp->ni_next == '/') { 822 cnp->cn_nameptr = ndp->ni_next; 823 while (*cnp->cn_nameptr == '/') { 824 cnp->cn_nameptr++; 825 ndp->ni_pathlen--; 826 } 827 if (ndp->ni_dvp != dp) 828 vput(ndp->ni_dvp); 829 else 830 vrele(ndp->ni_dvp); 831 VFS_UNLOCK_GIANT(dvfslocked); 832 dvfslocked = vfslocked; /* dp becomes dvp in dirloop */ 833 vfslocked = 0; 834 goto dirloop; 835 } 836 /* 837 * If we're processing a path with a trailing slash, 838 * check that the end result is a directory. 839 */ 840 if ((cnp->cn_flags & TRAILINGSLASH) && dp->v_type != VDIR) { 841 error = ENOTDIR; 842 goto bad2; 843 } 844 /* 845 * Disallow directory write attempts on read-only filesystems. 846 */ 847 if (rdonly && 848 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { 849 error = EROFS; 850 goto bad2; 851 } 852 if (cnp->cn_flags & SAVESTART) { 853 ndp->ni_startdir = ndp->ni_dvp; 854 VREF(ndp->ni_startdir); 855 } 856 if (!wantparent) { 857 if (ndp->ni_dvp != dp) 858 vput(ndp->ni_dvp); 859 else 860 vrele(ndp->ni_dvp); 861 VFS_UNLOCK_GIANT(dvfslocked); 862 dvfslocked = 0; 863 } else if ((cnp->cn_flags & LOCKPARENT) == 0 && ndp->ni_dvp != dp) 864 VOP_UNLOCK(ndp->ni_dvp, 0); 865 866 if (cnp->cn_flags & AUDITVNODE1) 867 AUDIT_ARG(vnode, dp, ARG_VNODE1); 868 else if (cnp->cn_flags & AUDITVNODE2) 869 AUDIT_ARG(vnode, dp, ARG_VNODE2); 870 871 if ((cnp->cn_flags & LOCKLEAF) == 0) 872 VOP_UNLOCK(dp, 0); 873success: 874 /* 875 * Because of lookup_shared we may have the vnode shared locked, but 876 * the caller may want it to be exclusively locked. 877 */ 878 if (needs_exclusive_leaf(dp->v_mount, cnp->cn_flags) && 879 VOP_ISLOCKED(dp) != LK_EXCLUSIVE) { 880 vn_lock(dp, LK_UPGRADE | LK_RETRY); 881 if (dp->v_iflag & VI_DOOMED) { 882 error = ENOENT; 883 goto bad2; 884 } 885 } 886 if (vfslocked && dvfslocked) 887 VFS_UNLOCK_GIANT(dvfslocked); /* Only need one */ 888 if (vfslocked || dvfslocked) 889 ndp->ni_cnd.cn_flags |= GIANTHELD; 890 return (0); 891 892bad2: 893 if (dp != ndp->ni_dvp) 894 vput(ndp->ni_dvp); 895 else 896 vrele(ndp->ni_dvp); 897bad: 898 if (!dpunlocked) 899 vput(dp); 900 VFS_UNLOCK_GIANT(vfslocked); 901 VFS_UNLOCK_GIANT(dvfslocked); 902 ndp->ni_cnd.cn_flags &= ~GIANTHELD; 903 ndp->ni_vp = NULL; 904 return (error); 905} 906 907/* 908 * relookup - lookup a path name component 909 * Used by lookup to re-acquire things. 910 */ 911int 912relookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp) 913{ 914 struct vnode *dp = 0; /* the directory we are searching */ 915 int wantparent; /* 1 => wantparent or lockparent flag */ 916 int rdonly; /* lookup read-only flag bit */ 917 int error = 0; 918 919 KASSERT(cnp->cn_flags & ISLASTCN, 920 ("relookup: Not given last component.")); 921 /* 922 * Setup: break out flag bits into variables. 923 */ 924 wantparent = cnp->cn_flags & (LOCKPARENT|WANTPARENT); 925 KASSERT(wantparent, ("relookup: parent not wanted.")); 926 rdonly = cnp->cn_flags & RDONLY; 927 cnp->cn_flags &= ~ISSYMLINK; 928 dp = dvp; 929 cnp->cn_lkflags = LK_EXCLUSIVE; 930 vn_lock(dp, LK_EXCLUSIVE | LK_RETRY); 931 932 /* 933 * Search a new directory. 934 * 935 * The last component of the filename is left accessible via 936 * cnp->cn_nameptr for callers that need the name. Callers needing 937 * the name set the SAVENAME flag. When done, they assume 938 * responsibility for freeing the pathname buffer. 939 */ 940#ifdef NAMEI_DIAGNOSTIC 941 printf("{%s}: ", cnp->cn_nameptr); 942#endif 943 944 /* 945 * Check for degenerate name (e.g. / or "") 946 * which is a way of talking about a directory, 947 * e.g. like "/." or ".". 948 */ 949 if (cnp->cn_nameptr[0] == '\0') { 950 if (cnp->cn_nameiop != LOOKUP || wantparent) { 951 error = EISDIR; 952 goto bad; 953 } 954 if (dp->v_type != VDIR) { 955 error = ENOTDIR; 956 goto bad; 957 } 958 if (!(cnp->cn_flags & LOCKLEAF)) 959 VOP_UNLOCK(dp, 0); 960 *vpp = dp; 961 /* XXX This should probably move to the top of function. */ 962 if (cnp->cn_flags & SAVESTART) 963 panic("lookup: SAVESTART"); 964 return (0); 965 } 966 967 if (cnp->cn_flags & ISDOTDOT) 968 panic ("relookup: lookup on dot-dot"); 969 970 /* 971 * We now have a segment name to search for, and a directory to search. 972 */ 973#ifdef NAMEI_DIAGNOSTIC 974 vprint("search in:", dp); 975#endif 976 if ((error = VOP_LOOKUP(dp, vpp, cnp)) != 0) { 977 KASSERT(*vpp == NULL, ("leaf should be empty")); 978 if (error != EJUSTRETURN) 979 goto bad; 980 /* 981 * If creating and at end of pathname, then can consider 982 * allowing file to be created. 983 */ 984 if (rdonly) { 985 error = EROFS; 986 goto bad; 987 } 988 /* ASSERT(dvp == ndp->ni_startdir) */ 989 if (cnp->cn_flags & SAVESTART) 990 VREF(dvp); 991 if ((cnp->cn_flags & LOCKPARENT) == 0) 992 VOP_UNLOCK(dp, 0); 993 /* 994 * This is a temporary assert to make sure I know what the 995 * behavior here was. 996 */ 997 KASSERT((cnp->cn_flags & (WANTPARENT|LOCKPARENT)) != 0, 998 ("relookup: Unhandled case.")); 999 /* 1000 * We return with ni_vp NULL to indicate that the entry 1001 * doesn't currently exist, leaving a pointer to the 1002 * (possibly locked) directory vnode in ndp->ni_dvp. 1003 */ 1004 return (0); 1005 } 1006 1007 dp = *vpp; 1008 1009 /* 1010 * Disallow directory write attempts on read-only filesystems. 1011 */ 1012 if (rdonly && 1013 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { 1014 if (dvp == dp) 1015 vrele(dvp); 1016 else 1017 vput(dvp); 1018 error = EROFS; 1019 goto bad; 1020 } 1021 /* 1022 * Set the parent lock/ref state to the requested state. 1023 */ 1024 if ((cnp->cn_flags & LOCKPARENT) == 0 && dvp != dp) { 1025 if (wantparent) 1026 VOP_UNLOCK(dvp, 0); 1027 else 1028 vput(dvp); 1029 } else if (!wantparent) 1030 vrele(dvp); 1031 /* 1032 * Check for symbolic link 1033 */ 1034 KASSERT(dp->v_type != VLNK || !(cnp->cn_flags & FOLLOW), 1035 ("relookup: symlink found.\n")); 1036 1037 /* ASSERT(dvp == ndp->ni_startdir) */ 1038 if (cnp->cn_flags & SAVESTART) 1039 VREF(dvp); 1040 1041 if ((cnp->cn_flags & LOCKLEAF) == 0) 1042 VOP_UNLOCK(dp, 0); 1043 return (0); 1044bad: 1045 vput(dp); 1046 *vpp = NULL; 1047 return (error); 1048} 1049 1050/* 1051 * Free data allocated by namei(); see namei(9) for details. 1052 */ 1053void 1054NDFREE(struct nameidata *ndp, const u_int flags) 1055{ 1056 int unlock_dvp; 1057 int unlock_vp; 1058 1059 unlock_dvp = 0; 1060 unlock_vp = 0; 1061 1062 if (!(flags & NDF_NO_FREE_PNBUF) && 1063 (ndp->ni_cnd.cn_flags & HASBUF)) { 1064 uma_zfree(namei_zone, ndp->ni_cnd.cn_pnbuf); 1065 ndp->ni_cnd.cn_flags &= ~HASBUF; 1066 } 1067 if (!(flags & NDF_NO_VP_UNLOCK) && 1068 (ndp->ni_cnd.cn_flags & LOCKLEAF) && ndp->ni_vp) 1069 unlock_vp = 1; 1070 if (!(flags & NDF_NO_VP_RELE) && ndp->ni_vp) { 1071 if (unlock_vp) { 1072 vput(ndp->ni_vp); 1073 unlock_vp = 0; 1074 } else 1075 vrele(ndp->ni_vp); 1076 ndp->ni_vp = NULL; 1077 } 1078 if (unlock_vp) 1079 VOP_UNLOCK(ndp->ni_vp, 0); 1080 if (!(flags & NDF_NO_DVP_UNLOCK) && 1081 (ndp->ni_cnd.cn_flags & LOCKPARENT) && 1082 ndp->ni_dvp != ndp->ni_vp) 1083 unlock_dvp = 1; 1084 if (!(flags & NDF_NO_DVP_RELE) && 1085 (ndp->ni_cnd.cn_flags & (LOCKPARENT|WANTPARENT))) { 1086 if (unlock_dvp) { 1087 vput(ndp->ni_dvp); 1088 unlock_dvp = 0; 1089 } else 1090 vrele(ndp->ni_dvp); 1091 ndp->ni_dvp = NULL; 1092 } 1093 if (unlock_dvp) 1094 VOP_UNLOCK(ndp->ni_dvp, 0); 1095 if (!(flags & NDF_NO_STARTDIR_RELE) && 1096 (ndp->ni_cnd.cn_flags & SAVESTART)) { 1097 vrele(ndp->ni_startdir); 1098 ndp->ni_startdir = NULL; 1099 } 1100} 1101 1102/* 1103 * Determine if there is a suitable alternate filename under the specified 1104 * prefix for the specified path. If the create flag is set, then the 1105 * alternate prefix will be used so long as the parent directory exists. 1106 * This is used by the various compatiblity ABIs so that Linux binaries prefer 1107 * files under /compat/linux for example. The chosen path (whether under 1108 * the prefix or under /) is returned in a kernel malloc'd buffer pointed 1109 * to by pathbuf. The caller is responsible for free'ing the buffer from 1110 * the M_TEMP bucket if one is returned. 1111 */ 1112int 1113kern_alternate_path(struct thread *td, const char *prefix, const char *path, 1114 enum uio_seg pathseg, char **pathbuf, int create, int dirfd) 1115{ 1116 struct nameidata nd, ndroot; 1117 char *ptr, *buf, *cp; 1118 size_t len, sz; 1119 int error; 1120 1121 buf = (char *) malloc(MAXPATHLEN, M_TEMP, M_WAITOK); 1122 *pathbuf = buf; 1123 1124 /* Copy the prefix into the new pathname as a starting point. */ 1125 len = strlcpy(buf, prefix, MAXPATHLEN); 1126 if (len >= MAXPATHLEN) { 1127 *pathbuf = NULL; 1128 free(buf, M_TEMP); 1129 return (EINVAL); 1130 } 1131 sz = MAXPATHLEN - len; 1132 ptr = buf + len; 1133 1134 /* Append the filename to the prefix. */ 1135 if (pathseg == UIO_SYSSPACE) 1136 error = copystr(path, ptr, sz, &len); 1137 else 1138 error = copyinstr(path, ptr, sz, &len); 1139 1140 if (error) { 1141 *pathbuf = NULL; 1142 free(buf, M_TEMP); 1143 return (error); 1144 } 1145 1146 /* Only use a prefix with absolute pathnames. */ 1147 if (*ptr != '/') { 1148 error = EINVAL; 1149 goto keeporig; 1150 } 1151 1152 if (dirfd != AT_FDCWD) { 1153 /* 1154 * We want the original because the "prefix" is 1155 * included in the already opened dirfd. 1156 */ 1157 bcopy(ptr, buf, len); 1158 return (0); 1159 } 1160 1161 /* 1162 * We know that there is a / somewhere in this pathname. 1163 * Search backwards for it, to find the file's parent dir 1164 * to see if it exists in the alternate tree. If it does, 1165 * and we want to create a file (cflag is set). We don't 1166 * need to worry about the root comparison in this case. 1167 */ 1168 1169 if (create) { 1170 for (cp = &ptr[len] - 1; *cp != '/'; cp--); 1171 *cp = '\0'; 1172 1173 NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_SYSSPACE, buf, td); 1174 error = namei(&nd); 1175 *cp = '/'; 1176 if (error != 0) 1177 goto keeporig; 1178 } else { 1179 NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_SYSSPACE, buf, td); 1180 1181 error = namei(&nd); 1182 if (error != 0) 1183 goto keeporig; 1184 1185 /* 1186 * We now compare the vnode of the prefix to the one 1187 * vnode asked. If they resolve to be the same, then we 1188 * ignore the match so that the real root gets used. 1189 * This avoids the problem of traversing "../.." to find the 1190 * root directory and never finding it, because "/" resolves 1191 * to the emulation root directory. This is expensive :-( 1192 */ 1193 NDINIT(&ndroot, LOOKUP, FOLLOW | MPSAFE, UIO_SYSSPACE, prefix, 1194 td); 1195 1196 /* We shouldn't ever get an error from this namei(). */ 1197 error = namei(&ndroot); 1198 if (error == 0) { 1199 if (nd.ni_vp == ndroot.ni_vp) 1200 error = ENOENT; 1201 1202 NDFREE(&ndroot, NDF_ONLY_PNBUF); 1203 vrele(ndroot.ni_vp); 1204 VFS_UNLOCK_GIANT(NDHASGIANT(&ndroot)); 1205 } 1206 } 1207 1208 NDFREE(&nd, NDF_ONLY_PNBUF); 1209 vrele(nd.ni_vp); 1210 VFS_UNLOCK_GIANT(NDHASGIANT(&nd)); 1211 1212keeporig: 1213 /* If there was an error, use the original path name. */ 1214 if (error) 1215 bcopy(ptr, buf, len); 1216 return (error); 1217} 1218