1/*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1982, 1986, 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)vfs_lookup.c 8.4 (Berkeley) 2/16/94 37 */ 38 39#include <sys/cdefs.h> 40__FBSDID("$FreeBSD$"); 41 42#include "opt_capsicum.h" 43#include "opt_ktrace.h" 44 45#include <sys/param.h> 46#include <sys/systm.h> 47#include <sys/kernel.h> 48#include <sys/capsicum.h> 49#include <sys/fcntl.h> 50#include <sys/jail.h> 51#include <sys/lock.h> 52#include <sys/mutex.h> 53#include <sys/namei.h> 54#include <sys/vnode.h> 55#include <sys/mount.h> 56#include <sys/filedesc.h> 57#include <sys/proc.h> 58#include <sys/sdt.h> 59#include <sys/syscallsubr.h> 60#include <sys/sysctl.h> 61#ifdef KTRACE 62#include <sys/ktrace.h> 63#endif 64 65#include <security/audit/audit.h> 66#include <security/mac/mac_framework.h> 67 68#include <vm/uma.h> 69 70#define NAMEI_DIAGNOSTIC 1 71#undef NAMEI_DIAGNOSTIC 72 73SDT_PROVIDER_DECLARE(vfs); 74SDT_PROBE_DEFINE3(vfs, namei, lookup, entry, "struct vnode *", "char *", 75 "unsigned long"); 76SDT_PROBE_DEFINE2(vfs, namei, lookup, return, "int", "struct vnode *"); 77 78/* Allocation zone for namei. */ 79uma_zone_t namei_zone; 80 81/* Placeholder vnode for mp traversal. */ 82static struct vnode *vp_crossmp; 83 84static int 85crossmp_vop_islocked(struct vop_islocked_args *ap) 86{ 87 88 return (LK_SHARED); 89} 90 91static int 92crossmp_vop_lock1(struct vop_lock1_args *ap) 93{ 94 struct vnode *vp; 95 struct lock *lk __unused; 96 const char *file __unused; 97 int flags, line __unused; 98 99 vp = ap->a_vp; 100 lk = vp->v_vnlock; 101 flags = ap->a_flags; 102 file = ap->a_file; 103 line = ap->a_line; 104 105 if ((flags & LK_SHARED) == 0) 106 panic("invalid lock request for crossmp"); 107 108 WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER, file, line, 109 flags & LK_INTERLOCK ? &VI_MTX(vp)->lock_object : NULL); 110 WITNESS_LOCK(&lk->lock_object, 0, file, line); 111 if ((flags & LK_INTERLOCK) != 0) 112 VI_UNLOCK(vp); 113 LOCK_LOG_LOCK("SLOCK", &lk->lock_object, 0, 0, ap->a_file, line); 114 return (0); 115} 116 117static int 118crossmp_vop_unlock(struct vop_unlock_args *ap) 119{ 120 struct vnode *vp; 121 struct lock *lk __unused; 122 int flags; 123 124 vp = ap->a_vp; 125 lk = vp->v_vnlock; 126 flags = ap->a_flags; 127 128 if ((flags & LK_INTERLOCK) != 0) 129 VI_UNLOCK(vp); 130 WITNESS_UNLOCK(&lk->lock_object, 0, LOCK_FILE, LOCK_LINE); 131 LOCK_LOG_LOCK("SUNLOCK", &lk->lock_object, 0, 0, LOCK_FILE, 132 LOCK_LINE); 133 return (0); 134} 135 136static struct vop_vector crossmp_vnodeops = { 137 .vop_default = &default_vnodeops, 138 .vop_islocked = crossmp_vop_islocked, 139 .vop_lock1 = crossmp_vop_lock1, 140 .vop_unlock = crossmp_vop_unlock, 141}; 142 143struct nameicap_tracker { 144 struct vnode *dp; 145 TAILQ_ENTRY(nameicap_tracker) nm_link; 146}; 147 148/* Zone for cap mode tracker elements used for dotdot capability checks. */ 149static uma_zone_t nt_zone; 150 151static void 152nameiinit(void *dummy __unused) 153{ 154 155 namei_zone = uma_zcreate("NAMEI", MAXPATHLEN, NULL, NULL, NULL, NULL, 156 UMA_ALIGN_PTR, 0); 157 nt_zone = uma_zcreate("rentr", sizeof(struct nameicap_tracker), 158 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 159 getnewvnode("crossmp", NULL, &crossmp_vnodeops, &vp_crossmp); 160} 161SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nameiinit, NULL); 162 163static int lookup_cap_dotdot = 1; 164SYSCTL_INT(_vfs, OID_AUTO, lookup_cap_dotdot, CTLFLAG_RWTUN, 165 &lookup_cap_dotdot, 0, 166 "enables \"..\" components in path lookup in capability mode"); 167static int lookup_cap_dotdot_nonlocal = 1; 168SYSCTL_INT(_vfs, OID_AUTO, lookup_cap_dotdot_nonlocal, CTLFLAG_RWTUN, 169 &lookup_cap_dotdot_nonlocal, 0, 170 "enables \"..\" components in path lookup in capability mode " 171 "on non-local mount"); 172 173static void 174nameicap_tracker_add(struct nameidata *ndp, struct vnode *dp) 175{ 176 struct nameicap_tracker *nt; 177 struct componentname *cnp; 178 179 if ((ndp->ni_lcf & NI_LCF_CAP_DOTDOT) == 0 || dp->v_type != VDIR) 180 return; 181 cnp = &ndp->ni_cnd; 182 nt = TAILQ_LAST(&ndp->ni_cap_tracker, nameicap_tracker_head); 183 if (nt != NULL && nt->dp == dp) 184 return; 185 nt = uma_zalloc(nt_zone, M_WAITOK); 186 vhold(dp); 187 nt->dp = dp; 188 TAILQ_INSERT_TAIL(&ndp->ni_cap_tracker, nt, nm_link); 189} 190 191static void 192nameicap_cleanup_from(struct nameidata *ndp, struct nameicap_tracker *first) 193{ 194 struct nameicap_tracker *nt, *nt1; 195 196 nt = first; 197 TAILQ_FOREACH_FROM_SAFE(nt, &ndp->ni_cap_tracker, nm_link, nt1) { 198 TAILQ_REMOVE(&ndp->ni_cap_tracker, nt, nm_link); 199 vdrop(nt->dp); 200 uma_zfree(nt_zone, nt); 201 } 202} 203 204static void 205nameicap_cleanup(struct nameidata *ndp) 206{ 207 KASSERT(TAILQ_EMPTY(&ndp->ni_cap_tracker) || 208 (ndp->ni_lcf & NI_LCF_CAP_DOTDOT) != 0, ("not strictrelative")); 209 nameicap_cleanup_from(ndp, NULL); 210} 211 212/* 213 * For dotdot lookups in capability mode, only allow the component 214 * lookup to succeed if the resulting directory was already traversed 215 * during the operation. This catches situations where already 216 * traversed directory is moved to different parent, and then we walk 217 * over it with dotdots. 218 * 219 * Also allow to force failure of dotdot lookups for non-local 220 * filesystems, where external agents might assist local lookups to 221 * escape the compartment. 222 */ 223static int 224nameicap_check_dotdot(struct nameidata *ndp, struct vnode *dp) 225{ 226 struct nameicap_tracker *nt; 227 struct mount *mp; 228 229 if (dp == NULL || dp->v_type != VDIR || (ndp->ni_lcf & 230 NI_LCF_STRICTRELATIVE) == 0) 231 return (0); 232 if ((ndp->ni_lcf & NI_LCF_CAP_DOTDOT) == 0) 233 return (ENOTCAPABLE); 234 mp = dp->v_mount; 235 if (lookup_cap_dotdot_nonlocal == 0 && mp != NULL && 236 (mp->mnt_flag & MNT_LOCAL) == 0) 237 return (ENOTCAPABLE); 238 TAILQ_FOREACH_REVERSE(nt, &ndp->ni_cap_tracker, nameicap_tracker_head, 239 nm_link) { 240 if (dp == nt->dp) { 241 nt = TAILQ_NEXT(nt, nm_link); 242 if (nt != NULL) 243 nameicap_cleanup_from(ndp, nt); 244 return (0); 245 } 246 } 247 return (ENOTCAPABLE); 248} 249 250static void 251namei_cleanup_cnp(struct componentname *cnp) 252{ 253 254 uma_zfree(namei_zone, cnp->cn_pnbuf); 255#ifdef DIAGNOSTIC 256 cnp->cn_pnbuf = NULL; 257 cnp->cn_nameptr = NULL; 258#endif 259} 260 261static int 262namei_handle_root(struct nameidata *ndp, struct vnode **dpp) 263{ 264 struct componentname *cnp; 265 266 cnp = &ndp->ni_cnd; 267 if ((ndp->ni_lcf & NI_LCF_STRICTRELATIVE) != 0) { 268#ifdef KTRACE 269 if (KTRPOINT(curthread, KTR_CAPFAIL)) 270 ktrcapfail(CAPFAIL_LOOKUP, NULL, NULL); 271#endif 272 return (ENOTCAPABLE); 273 } 274 while (*(cnp->cn_nameptr) == '/') { 275 cnp->cn_nameptr++; 276 ndp->ni_pathlen--; 277 } 278 *dpp = ndp->ni_rootdir; 279 vrefact(*dpp); 280 return (0); 281} 282 283/* 284 * Convert a pathname into a pointer to a locked vnode. 285 * 286 * The FOLLOW flag is set when symbolic links are to be followed 287 * when they occur at the end of the name translation process. 288 * Symbolic links are always followed for all other pathname 289 * components other than the last. 290 * 291 * The segflg defines whether the name is to be copied from user 292 * space or kernel space. 293 * 294 * Overall outline of namei: 295 * 296 * copy in name 297 * get starting directory 298 * while (!done && !error) { 299 * call lookup to search path. 300 * if symbolic link, massage name in buffer and continue 301 * } 302 */ 303int 304namei(struct nameidata *ndp) 305{ 306 struct filedesc *fdp; /* pointer to file descriptor state */ 307 char *cp; /* pointer into pathname argument */ 308 struct vnode *dp; /* the directory we are searching */ 309 struct iovec aiov; /* uio for reading symbolic links */ 310 struct componentname *cnp; 311 struct file *dfp; 312 struct thread *td; 313 struct proc *p; 314 cap_rights_t rights; 315 struct uio auio; 316 int error, linklen, startdir_used; 317 318 cnp = &ndp->ni_cnd; 319 td = cnp->cn_thread; 320 p = td->td_proc; 321 ndp->ni_cnd.cn_cred = ndp->ni_cnd.cn_thread->td_ucred; 322 KASSERT(cnp->cn_cred && p, ("namei: bad cred/proc")); 323 KASSERT((cnp->cn_nameiop & (~OPMASK)) == 0, 324 ("namei: nameiop contaminated with flags")); 325 KASSERT((cnp->cn_flags & OPMASK) == 0, 326 ("namei: flags contaminated with nameiops")); 327 MPASS(ndp->ni_startdir == NULL || ndp->ni_startdir->v_type == VDIR || 328 ndp->ni_startdir->v_type == VBAD); 329 fdp = p->p_fd; 330 TAILQ_INIT(&ndp->ni_cap_tracker); 331 ndp->ni_lcf = 0; 332 333 /* We will set this ourselves if we need it. */ 334 cnp->cn_flags &= ~TRAILINGSLASH; 335 336 /* 337 * Get a buffer for the name to be translated, and copy the 338 * name into the buffer. 339 */ 340 if ((cnp->cn_flags & HASBUF) == 0) 341 cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK); 342 if (ndp->ni_segflg == UIO_SYSSPACE) 343 error = copystr(ndp->ni_dirp, cnp->cn_pnbuf, MAXPATHLEN, 344 &ndp->ni_pathlen); 345 else 346 error = copyinstr(ndp->ni_dirp, cnp->cn_pnbuf, MAXPATHLEN, 347 &ndp->ni_pathlen); 348 349 /* 350 * Don't allow empty pathnames. 351 */ 352 if (error == 0 && *cnp->cn_pnbuf == '\0') 353 error = ENOENT; 354 355#ifdef CAPABILITY_MODE 356 /* 357 * In capability mode, lookups must be restricted to happen in 358 * the subtree with the root specified by the file descriptor: 359 * - The root must be real file descriptor, not the pseudo-descriptor 360 * AT_FDCWD. 361 * - The passed path must be relative and not absolute. 362 * - If lookup_cap_dotdot is disabled, path must not contain the 363 * '..' components. 364 * - If lookup_cap_dotdot is enabled, we verify that all '..' 365 * components lookups result in the directories which were 366 * previously walked by us, which prevents an escape from 367 * the relative root. 368 */ 369 if (error == 0 && IN_CAPABILITY_MODE(td) && 370 (cnp->cn_flags & NOCAPCHECK) == 0) { 371 ndp->ni_lcf |= NI_LCF_STRICTRELATIVE; 372 ndp->ni_resflags |= NIRES_STRICTREL; 373 if (ndp->ni_dirfd == AT_FDCWD) { 374#ifdef KTRACE 375 if (KTRPOINT(td, KTR_CAPFAIL)) 376 ktrcapfail(CAPFAIL_LOOKUP, NULL, NULL); 377#endif 378 error = ECAPMODE; 379 } 380 } 381#endif 382 if (error != 0) { 383 namei_cleanup_cnp(cnp); 384 ndp->ni_vp = NULL; 385 return (error); 386 } 387 ndp->ni_loopcnt = 0; 388#ifdef KTRACE 389 if (KTRPOINT(td, KTR_NAMEI)) { 390 KASSERT(cnp->cn_thread == curthread, 391 ("namei not using curthread")); 392 ktrnamei(cnp->cn_pnbuf); 393 } 394#endif 395 /* 396 * Get starting point for the translation. 397 */ 398 FILEDESC_SLOCK(fdp); 399 ndp->ni_rootdir = fdp->fd_rdir; 400 vrefact(ndp->ni_rootdir); 401 ndp->ni_topdir = fdp->fd_jdir; 402 403 /* 404 * If we are auditing the kernel pathname, save the user pathname. 405 */ 406 if (cnp->cn_flags & AUDITVNODE1) 407 AUDIT_ARG_UPATH1(td, ndp->ni_dirfd, cnp->cn_pnbuf); 408 if (cnp->cn_flags & AUDITVNODE2) 409 AUDIT_ARG_UPATH2(td, ndp->ni_dirfd, cnp->cn_pnbuf); 410 411 startdir_used = 0; 412 dp = NULL; 413 cnp->cn_nameptr = cnp->cn_pnbuf; 414 if (cnp->cn_pnbuf[0] == '/') { 415 ndp->ni_resflags |= NIRES_ABS; 416 error = namei_handle_root(ndp, &dp); 417 } else { 418 if (ndp->ni_startdir != NULL) { 419 dp = ndp->ni_startdir; 420 startdir_used = 1; 421 } else if (ndp->ni_dirfd == AT_FDCWD) { 422 dp = fdp->fd_cdir; 423 vrefact(dp); 424 } else { 425 rights = ndp->ni_rightsneeded; 426 cap_rights_set(&rights, CAP_LOOKUP); 427 428 if (cnp->cn_flags & AUDITVNODE1) 429 AUDIT_ARG_ATFD1(ndp->ni_dirfd); 430 if (cnp->cn_flags & AUDITVNODE2) 431 AUDIT_ARG_ATFD2(ndp->ni_dirfd); 432 /* 433 * Effectively inlined fgetvp_rights, because we need to 434 * inspect the file as well as grabbing the vnode. 435 */ 436 error = fget_cap_locked(fdp, ndp->ni_dirfd, &rights, 437 &dfp, &ndp->ni_filecaps); 438 if (error != 0) { 439 /* 440 * Preserve the error; it should either be EBADF 441 * or capability-related, both of which can be 442 * safely returned to the caller. 443 */ 444 } else if (dfp->f_ops == &badfileops) { 445 error = EBADF; 446 } else if (dfp->f_vnode == NULL) { 447 error = ENOTDIR; 448 } else { 449 dp = dfp->f_vnode; 450 vrefact(dp); 451 452 if ((dfp->f_flag & FSEARCH) != 0) 453 cnp->cn_flags |= NOEXECCHECK; 454 } 455#ifdef CAPABILITIES 456 /* 457 * If file descriptor doesn't have all rights, 458 * all lookups relative to it must also be 459 * strictly relative. 460 */ 461 CAP_ALL(&rights); 462 if (!cap_rights_contains(&ndp->ni_filecaps.fc_rights, 463 &rights) || 464 ndp->ni_filecaps.fc_fcntls != CAP_FCNTL_ALL || 465 ndp->ni_filecaps.fc_nioctls != -1) { 466 ndp->ni_lcf |= NI_LCF_STRICTRELATIVE; 467 ndp->ni_resflags |= NIRES_STRICTREL; 468 } 469#endif 470 } 471 if (error == 0 && dp->v_type != VDIR) 472 error = ENOTDIR; 473 } 474 FILEDESC_SUNLOCK(fdp); 475 476 if (error == 0 && (cnp->cn_flags & RBENEATH) != 0) { 477 if (cnp->cn_pnbuf[0] == '/') { 478 error = ENOTCAPABLE; 479 } else if ((ndp->ni_lcf & NI_LCF_STRICTRELATIVE) == 0) { 480 ndp->ni_lcf |= NI_LCF_STRICTRELATIVE | 481 NI_LCF_CAP_DOTDOT; 482 } 483 } 484 485 if (ndp->ni_startdir != NULL && !startdir_used) 486 vrele(ndp->ni_startdir); 487 if (error != 0) { 488 if (dp != NULL) 489 vrele(dp); 490 goto out; 491 } 492 if ((ndp->ni_lcf & NI_LCF_STRICTRELATIVE) != 0 && 493 lookup_cap_dotdot != 0) 494 ndp->ni_lcf |= NI_LCF_CAP_DOTDOT; 495 SDT_PROBE3(vfs, namei, lookup, entry, dp, cnp->cn_pnbuf, 496 cnp->cn_flags); 497 for (;;) { 498 ndp->ni_startdir = dp; 499 error = lookup(ndp); 500 if (error != 0) 501 goto out; 502 503 /* 504 * If not a symbolic link, we're done. 505 */ 506 if ((cnp->cn_flags & ISSYMLINK) == 0) { 507 vrele(ndp->ni_rootdir); 508 if ((cnp->cn_flags & (SAVENAME | SAVESTART)) == 0) { 509 namei_cleanup_cnp(cnp); 510 } else 511 cnp->cn_flags |= HASBUF; 512 nameicap_cleanup(ndp); 513 SDT_PROBE2(vfs, namei, lookup, return, error, 514 (error == 0 ? ndp->ni_vp : NULL)); 515 return (error); 516 } 517 if (ndp->ni_loopcnt++ >= MAXSYMLINKS) { 518 error = ELOOP; 519 break; 520 } 521#ifdef MAC 522 if ((cnp->cn_flags & NOMACCHECK) == 0) { 523 error = mac_vnode_check_readlink(td->td_ucred, 524 ndp->ni_vp); 525 if (error != 0) 526 break; 527 } 528#endif 529 if (ndp->ni_pathlen > 1) 530 cp = uma_zalloc(namei_zone, M_WAITOK); 531 else 532 cp = cnp->cn_pnbuf; 533 aiov.iov_base = cp; 534 aiov.iov_len = MAXPATHLEN; 535 auio.uio_iov = &aiov; 536 auio.uio_iovcnt = 1; 537 auio.uio_offset = 0; 538 auio.uio_rw = UIO_READ; 539 auio.uio_segflg = UIO_SYSSPACE; 540 auio.uio_td = td; 541 auio.uio_resid = MAXPATHLEN; 542 error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred); 543 if (error != 0) { 544 if (ndp->ni_pathlen > 1) 545 uma_zfree(namei_zone, cp); 546 break; 547 } 548 linklen = MAXPATHLEN - auio.uio_resid; 549 if (linklen == 0) { 550 if (ndp->ni_pathlen > 1) 551 uma_zfree(namei_zone, cp); 552 error = ENOENT; 553 break; 554 } 555 if (linklen + ndp->ni_pathlen > MAXPATHLEN) { 556 if (ndp->ni_pathlen > 1) 557 uma_zfree(namei_zone, cp); 558 error = ENAMETOOLONG; 559 break; 560 } 561 if (ndp->ni_pathlen > 1) { 562 bcopy(ndp->ni_next, cp + linklen, ndp->ni_pathlen); 563 uma_zfree(namei_zone, cnp->cn_pnbuf); 564 cnp->cn_pnbuf = cp; 565 } else 566 cnp->cn_pnbuf[linklen] = '\0'; 567 ndp->ni_pathlen += linklen; 568 vput(ndp->ni_vp); 569 dp = ndp->ni_dvp; 570 /* 571 * Check if root directory should replace current directory. 572 */ 573 cnp->cn_nameptr = cnp->cn_pnbuf; 574 if (*(cnp->cn_nameptr) == '/') { 575 vrele(dp); 576 error = namei_handle_root(ndp, &dp); 577 if (error != 0) 578 goto out; 579 } 580 } 581 vput(ndp->ni_vp); 582 ndp->ni_vp = NULL; 583 vrele(ndp->ni_dvp); 584out: 585 vrele(ndp->ni_rootdir); 586 MPASS(error != 0); 587 namei_cleanup_cnp(cnp); 588 nameicap_cleanup(ndp); 589 SDT_PROBE2(vfs, namei, lookup, return, error, NULL); 590 return (error); 591} 592 593static int 594compute_cn_lkflags(struct mount *mp, int lkflags, int cnflags) 595{ 596 597 if (mp == NULL || ((lkflags & LK_SHARED) && 598 (!(mp->mnt_kern_flag & MNTK_LOOKUP_SHARED) || 599 ((cnflags & ISDOTDOT) && 600 (mp->mnt_kern_flag & MNTK_LOOKUP_EXCL_DOTDOT))))) { 601 lkflags &= ~LK_SHARED; 602 lkflags |= LK_EXCLUSIVE; 603 } 604 lkflags |= LK_NODDLKTREAT; 605 return (lkflags); 606} 607 608static __inline int 609needs_exclusive_leaf(struct mount *mp, int flags) 610{ 611 612 /* 613 * Intermediate nodes can use shared locks, we only need to 614 * force an exclusive lock for leaf nodes. 615 */ 616 if ((flags & (ISLASTCN | LOCKLEAF)) != (ISLASTCN | LOCKLEAF)) 617 return (0); 618 619 /* Always use exclusive locks if LOCKSHARED isn't set. */ 620 if (!(flags & LOCKSHARED)) 621 return (1); 622 623 /* 624 * For lookups during open(), if the mount point supports 625 * extended shared operations, then use a shared lock for the 626 * leaf node, otherwise use an exclusive lock. 627 */ 628 if ((flags & ISOPEN) != 0) 629 return (!MNT_EXTENDED_SHARED(mp)); 630 631 /* 632 * Lookup requests outside of open() that specify LOCKSHARED 633 * only need a shared lock on the leaf vnode. 634 */ 635 return (0); 636} 637 638/* 639 * Search a pathname. 640 * This is a very central and rather complicated routine. 641 * 642 * The pathname is pointed to by ni_ptr and is of length ni_pathlen. 643 * The starting directory is taken from ni_startdir. The pathname is 644 * descended until done, or a symbolic link is encountered. The variable 645 * ni_more is clear if the path is completed; it is set to one if a 646 * symbolic link needing interpretation is encountered. 647 * 648 * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on 649 * whether the name is to be looked up, created, renamed, or deleted. 650 * When CREATE, RENAME, or DELETE is specified, information usable in 651 * creating, renaming, or deleting a directory entry may be calculated. 652 * If flag has LOCKPARENT or'ed into it, the parent directory is returned 653 * locked. If flag has WANTPARENT or'ed into it, the parent directory is 654 * returned unlocked. Otherwise the parent directory is not returned. If 655 * the target of the pathname exists and LOCKLEAF is or'ed into the flag 656 * the target is returned locked, otherwise it is returned unlocked. 657 * When creating or renaming and LOCKPARENT is specified, the target may not 658 * be ".". When deleting and LOCKPARENT is specified, the target may be ".". 659 * 660 * Overall outline of lookup: 661 * 662 * dirloop: 663 * identify next component of name at ndp->ni_ptr 664 * handle degenerate case where name is null string 665 * if .. and crossing mount points and on mounted filesys, find parent 666 * call VOP_LOOKUP routine for next component name 667 * directory vnode returned in ni_dvp, unlocked unless LOCKPARENT set 668 * component vnode returned in ni_vp (if it exists), locked. 669 * if result vnode is mounted on and crossing mount points, 670 * find mounted on vnode 671 * if more components of name, do next level at dirloop 672 * return the answer in ni_vp, locked if LOCKLEAF set 673 * if LOCKPARENT set, return locked parent in ni_dvp 674 * if WANTPARENT set, return unlocked parent in ni_dvp 675 */ 676int 677lookup(struct nameidata *ndp) 678{ 679 char *cp; /* pointer into pathname argument */ 680 char *prev_ni_next; /* saved ndp->ni_next */ 681 struct vnode *dp = NULL; /* the directory we are searching */ 682 struct vnode *tdp; /* saved dp */ 683 struct mount *mp; /* mount table entry */ 684 struct prison *pr; 685 size_t prev_ni_pathlen; /* saved ndp->ni_pathlen */ 686 int docache; /* == 0 do not cache last component */ 687 int wantparent; /* 1 => wantparent or lockparent flag */ 688 int rdonly; /* lookup read-only flag bit */ 689 int error = 0; 690 int dpunlocked = 0; /* dp has already been unlocked */ 691 int relookup = 0; /* do not consume the path component */ 692 struct componentname *cnp = &ndp->ni_cnd; 693 int lkflags_save; 694 int ni_dvp_unlocked; 695 696 /* 697 * Setup: break out flag bits into variables. 698 */ 699 ni_dvp_unlocked = 0; 700 wantparent = cnp->cn_flags & (LOCKPARENT | WANTPARENT); 701 KASSERT(cnp->cn_nameiop == LOOKUP || wantparent, 702 ("CREATE, DELETE, RENAME require LOCKPARENT or WANTPARENT.")); 703 /* 704 * When set to zero, docache causes the last component of the 705 * pathname to be deleted from the cache and the full lookup 706 * of the name to be done (via VOP_CACHEDLOOKUP()). Often 707 * filesystems need some pre-computed values that are made 708 * during the full lookup, for instance UFS sets dp->i_offset. 709 * 710 * The docache variable is set to zero when requested by the 711 * NOCACHE flag and for all modifying operations except CREATE. 712 */ 713 docache = (cnp->cn_flags & NOCACHE) ^ NOCACHE; 714 if (cnp->cn_nameiop == DELETE || 715 (wantparent && cnp->cn_nameiop != CREATE && 716 cnp->cn_nameiop != LOOKUP)) 717 docache = 0; 718 rdonly = cnp->cn_flags & RDONLY; 719 cnp->cn_flags &= ~ISSYMLINK; 720 ndp->ni_dvp = NULL; 721 /* 722 * We use shared locks until we hit the parent of the last cn then 723 * we adjust based on the requesting flags. 724 */ 725 cnp->cn_lkflags = LK_SHARED; 726 dp = ndp->ni_startdir; 727 ndp->ni_startdir = NULLVP; 728 vn_lock(dp, 729 compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | LK_RETRY, 730 cnp->cn_flags)); 731 732dirloop: 733 /* 734 * Search a new directory. 735 * 736 * The last component of the filename is left accessible via 737 * cnp->cn_nameptr for callers that need the name. Callers needing 738 * the name set the SAVENAME flag. When done, they assume 739 * responsibility for freeing the pathname buffer. 740 */ 741 for (cp = cnp->cn_nameptr; *cp != 0 && *cp != '/'; cp++) 742 continue; 743 cnp->cn_namelen = cp - cnp->cn_nameptr; 744 if (cnp->cn_namelen > NAME_MAX) { 745 error = ENAMETOOLONG; 746 goto bad; 747 } 748#ifdef NAMEI_DIAGNOSTIC 749 { char c = *cp; 750 *cp = '\0'; 751 printf("{%s}: ", cnp->cn_nameptr); 752 *cp = c; } 753#endif 754 prev_ni_pathlen = ndp->ni_pathlen; 755 ndp->ni_pathlen -= cnp->cn_namelen; 756 KASSERT(ndp->ni_pathlen <= PATH_MAX, 757 ("%s: ni_pathlen underflow to %zd\n", __func__, ndp->ni_pathlen)); 758 prev_ni_next = ndp->ni_next; 759 ndp->ni_next = cp; 760 761 /* 762 * Replace multiple slashes by a single slash and trailing slashes 763 * by a null. This must be done before VOP_LOOKUP() because some 764 * fs's don't know about trailing slashes. Remember if there were 765 * trailing slashes to handle symlinks, existing non-directories 766 * and non-existing files that won't be directories specially later. 767 */ 768 while (*cp == '/' && (cp[1] == '/' || cp[1] == '\0')) { 769 cp++; 770 ndp->ni_pathlen--; 771 if (*cp == '\0') { 772 *ndp->ni_next = '\0'; 773 cnp->cn_flags |= TRAILINGSLASH; 774 } 775 } 776 ndp->ni_next = cp; 777 778 cnp->cn_flags |= MAKEENTRY; 779 if (*cp == '\0' && docache == 0) 780 cnp->cn_flags &= ~MAKEENTRY; 781 if (cnp->cn_namelen == 2 && 782 cnp->cn_nameptr[1] == '.' && cnp->cn_nameptr[0] == '.') 783 cnp->cn_flags |= ISDOTDOT; 784 else 785 cnp->cn_flags &= ~ISDOTDOT; 786 if (*ndp->ni_next == 0) 787 cnp->cn_flags |= ISLASTCN; 788 else 789 cnp->cn_flags &= ~ISLASTCN; 790 791 if ((cnp->cn_flags & ISLASTCN) != 0 && 792 cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.' && 793 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { 794 error = EINVAL; 795 goto bad; 796 } 797 798 nameicap_tracker_add(ndp, dp); 799 800 /* 801 * Check for degenerate name (e.g. / or "") 802 * which is a way of talking about a directory, 803 * e.g. like "/." or ".". 804 */ 805 if (cnp->cn_nameptr[0] == '\0') { 806 if (dp->v_type != VDIR) { 807 error = ENOTDIR; 808 goto bad; 809 } 810 if (cnp->cn_nameiop != LOOKUP) { 811 error = EISDIR; 812 goto bad; 813 } 814 if (wantparent) { 815 ndp->ni_dvp = dp; 816 VREF(dp); 817 } 818 ndp->ni_vp = dp; 819 820 if (cnp->cn_flags & AUDITVNODE1) 821 AUDIT_ARG_VNODE1(dp); 822 else if (cnp->cn_flags & AUDITVNODE2) 823 AUDIT_ARG_VNODE2(dp); 824 825 if (!(cnp->cn_flags & (LOCKPARENT | LOCKLEAF))) 826 VOP_UNLOCK(dp, 0); 827 /* XXX This should probably move to the top of function. */ 828 if (cnp->cn_flags & SAVESTART) 829 panic("lookup: SAVESTART"); 830 goto success; 831 } 832 833 /* 834 * Handle "..": five special cases. 835 * 0. If doing a capability lookup and lookup_cap_dotdot is 836 * disabled, return ENOTCAPABLE. 837 * 1. Return an error if this is the last component of 838 * the name and the operation is DELETE or RENAME. 839 * 2. If at root directory (e.g. after chroot) 840 * or at absolute root directory 841 * then ignore it so can't get out. 842 * 3. If this vnode is the root of a mounted 843 * filesystem, then replace it with the 844 * vnode which was mounted on so we take the 845 * .. in the other filesystem. 846 * 4. If the vnode is the top directory of 847 * the jail or chroot, don't let them out. 848 * 5. If doing a capability lookup and lookup_cap_dotdot is 849 * enabled, return ENOTCAPABLE if the lookup would escape 850 * from the initial file descriptor directory. Checks are 851 * done by ensuring that namei() already traversed the 852 * result of dotdot lookup. 853 */ 854 if (cnp->cn_flags & ISDOTDOT) { 855 if ((ndp->ni_lcf & (NI_LCF_STRICTRELATIVE | NI_LCF_CAP_DOTDOT)) 856 == NI_LCF_STRICTRELATIVE) { 857#ifdef KTRACE 858 if (KTRPOINT(curthread, KTR_CAPFAIL)) 859 ktrcapfail(CAPFAIL_LOOKUP, NULL, NULL); 860#endif 861 error = ENOTCAPABLE; 862 goto bad; 863 } 864 if ((cnp->cn_flags & ISLASTCN) != 0 && 865 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { 866 error = EINVAL; 867 goto bad; 868 } 869 for (;;) { 870 for (pr = cnp->cn_cred->cr_prison; pr != NULL; 871 pr = pr->pr_parent) 872 if (dp == pr->pr_root) 873 break; 874 if (dp == ndp->ni_rootdir || 875 dp == ndp->ni_topdir || 876 dp == rootvnode || 877 pr != NULL || 878 ((dp->v_vflag & VV_ROOT) != 0 && 879 (cnp->cn_flags & NOCROSSMOUNT) != 0)) { 880 ndp->ni_dvp = dp; 881 ndp->ni_vp = dp; 882 VREF(dp); 883 goto nextname; 884 } 885 if ((dp->v_vflag & VV_ROOT) == 0) 886 break; 887 if (dp->v_iflag & VI_DOOMED) { /* forced unmount */ 888 error = ENOENT; 889 goto bad; 890 } 891 tdp = dp; 892 dp = dp->v_mount->mnt_vnodecovered; 893 VREF(dp); 894 vput(tdp); 895 vn_lock(dp, 896 compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | 897 LK_RETRY, ISDOTDOT)); 898 error = nameicap_check_dotdot(ndp, dp); 899 if (error != 0) { 900#ifdef KTRACE 901 if (KTRPOINT(curthread, KTR_CAPFAIL)) 902 ktrcapfail(CAPFAIL_LOOKUP, NULL, NULL); 903#endif 904 goto bad; 905 } 906 } 907 } 908 909 /* 910 * We now have a segment name to search for, and a directory to search. 911 */ 912unionlookup: 913#ifdef MAC 914 if ((cnp->cn_flags & NOMACCHECK) == 0) { 915 error = mac_vnode_check_lookup(cnp->cn_thread->td_ucred, dp, 916 cnp); 917 if (error) 918 goto bad; 919 } 920#endif 921 ndp->ni_dvp = dp; 922 ndp->ni_vp = NULL; 923 ASSERT_VOP_LOCKED(dp, "lookup"); 924 /* 925 * If we have a shared lock we may need to upgrade the lock for the 926 * last operation. 927 */ 928 if ((cnp->cn_flags & LOCKPARENT) && (cnp->cn_flags & ISLASTCN) && 929 dp != vp_crossmp && VOP_ISLOCKED(dp) == LK_SHARED) 930 vn_lock(dp, LK_UPGRADE|LK_RETRY); 931 if ((dp->v_iflag & VI_DOOMED) != 0) { 932 error = ENOENT; 933 goto bad; 934 } 935 /* 936 * If we're looking up the last component and we need an exclusive 937 * lock, adjust our lkflags. 938 */ 939 if (needs_exclusive_leaf(dp->v_mount, cnp->cn_flags)) 940 cnp->cn_lkflags = LK_EXCLUSIVE; 941#ifdef NAMEI_DIAGNOSTIC 942 vn_printf(dp, "lookup in "); 943#endif 944 lkflags_save = cnp->cn_lkflags; 945 cnp->cn_lkflags = compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags, 946 cnp->cn_flags); 947 error = VOP_LOOKUP(dp, &ndp->ni_vp, cnp); 948 cnp->cn_lkflags = lkflags_save; 949 if (error != 0) { 950 KASSERT(ndp->ni_vp == NULL, ("leaf should be empty")); 951#ifdef NAMEI_DIAGNOSTIC 952 printf("not found\n"); 953#endif 954 if ((error == ENOENT) && 955 (dp->v_vflag & VV_ROOT) && (dp->v_mount != NULL) && 956 (dp->v_mount->mnt_flag & MNT_UNION)) { 957 tdp = dp; 958 dp = dp->v_mount->mnt_vnodecovered; 959 VREF(dp); 960 vput(tdp); 961 vn_lock(dp, 962 compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | 963 LK_RETRY, cnp->cn_flags)); 964 nameicap_tracker_add(ndp, dp); 965 goto unionlookup; 966 } 967 968 if (error == ERELOOKUP) { 969 vref(dp); 970 ndp->ni_vp = dp; 971 error = 0; 972 relookup = 1; 973 goto good; 974 } 975 976 if (error != EJUSTRETURN) 977 goto bad; 978 /* 979 * At this point, we know we're at the end of the 980 * pathname. If creating / renaming, we can consider 981 * allowing the file or directory to be created / renamed, 982 * provided we're not on a read-only filesystem. 983 */ 984 if (rdonly) { 985 error = EROFS; 986 goto bad; 987 } 988 /* trailing slash only allowed for directories */ 989 if ((cnp->cn_flags & TRAILINGSLASH) && 990 !(cnp->cn_flags & WILLBEDIR)) { 991 error = ENOENT; 992 goto bad; 993 } 994 if ((cnp->cn_flags & LOCKPARENT) == 0) 995 VOP_UNLOCK(dp, 0); 996 /* 997 * We return with ni_vp NULL to indicate that the entry 998 * doesn't currently exist, leaving a pointer to the 999 * (possibly locked) directory vnode in ndp->ni_dvp. 1000 */ 1001 if (cnp->cn_flags & SAVESTART) { 1002 ndp->ni_startdir = ndp->ni_dvp; 1003 VREF(ndp->ni_startdir); 1004 } 1005 goto success; 1006 } 1007 1008good: 1009#ifdef NAMEI_DIAGNOSTIC 1010 printf("found\n"); 1011#endif 1012 dp = ndp->ni_vp; 1013 1014 /* 1015 * Check to see if the vnode has been mounted on; 1016 * if so find the root of the mounted filesystem. 1017 */ 1018 while (dp->v_type == VDIR && (mp = dp->v_mountedhere) && 1019 (cnp->cn_flags & NOCROSSMOUNT) == 0) { 1020 if (vfs_busy(mp, 0)) 1021 continue; 1022 vput(dp); 1023 if (dp != ndp->ni_dvp) 1024 vput(ndp->ni_dvp); 1025 else 1026 vrele(ndp->ni_dvp); 1027 vrefact(vp_crossmp); 1028 ndp->ni_dvp = vp_crossmp; 1029 error = VFS_ROOT(mp, compute_cn_lkflags(mp, cnp->cn_lkflags, 1030 cnp->cn_flags), &tdp); 1031 vfs_unbusy(mp); 1032 if (vn_lock(vp_crossmp, LK_SHARED | LK_NOWAIT)) 1033 panic("vp_crossmp exclusively locked or reclaimed"); 1034 if (error) { 1035 dpunlocked = 1; 1036 goto bad2; 1037 } 1038 ndp->ni_vp = dp = tdp; 1039 } 1040 1041 /* 1042 * Check for symbolic link 1043 */ 1044 if ((dp->v_type == VLNK) && 1045 ((cnp->cn_flags & FOLLOW) || (cnp->cn_flags & TRAILINGSLASH) || 1046 *ndp->ni_next == '/')) { 1047 cnp->cn_flags |= ISSYMLINK; 1048 if (dp->v_iflag & VI_DOOMED) { 1049 /* 1050 * We can't know whether the directory was mounted with 1051 * NOSYMFOLLOW, so we can't follow safely. 1052 */ 1053 error = ENOENT; 1054 goto bad2; 1055 } 1056 if (dp->v_mount->mnt_flag & MNT_NOSYMFOLLOW) { 1057 error = EACCES; 1058 goto bad2; 1059 } 1060 /* 1061 * Symlink code always expects an unlocked dvp. 1062 */ 1063 if (ndp->ni_dvp != ndp->ni_vp) { 1064 VOP_UNLOCK(ndp->ni_dvp, 0); 1065 ni_dvp_unlocked = 1; 1066 } 1067 goto success; 1068 } 1069 1070nextname: 1071 /* 1072 * Not a symbolic link that we will follow. Continue with the 1073 * next component if there is any; otherwise, we're done. 1074 */ 1075 KASSERT((cnp->cn_flags & ISLASTCN) || *ndp->ni_next == '/', 1076 ("lookup: invalid path state.")); 1077 if (relookup) { 1078 relookup = 0; 1079 ndp->ni_pathlen = prev_ni_pathlen; 1080 ndp->ni_next = prev_ni_next; 1081 if (ndp->ni_dvp != dp) 1082 vput(ndp->ni_dvp); 1083 else 1084 vrele(ndp->ni_dvp); 1085 goto dirloop; 1086 } 1087 if (cnp->cn_flags & ISDOTDOT) { 1088 error = nameicap_check_dotdot(ndp, ndp->ni_vp); 1089 if (error != 0) { 1090#ifdef KTRACE 1091 if (KTRPOINT(curthread, KTR_CAPFAIL)) 1092 ktrcapfail(CAPFAIL_LOOKUP, NULL, NULL); 1093#endif 1094 goto bad2; 1095 } 1096 } 1097 if (*ndp->ni_next == '/') { 1098 cnp->cn_nameptr = ndp->ni_next; 1099 while (*cnp->cn_nameptr == '/') { 1100 cnp->cn_nameptr++; 1101 ndp->ni_pathlen--; 1102 } 1103 if (ndp->ni_dvp != dp) 1104 vput(ndp->ni_dvp); 1105 else 1106 vrele(ndp->ni_dvp); 1107 goto dirloop; 1108 } 1109 /* 1110 * If we're processing a path with a trailing slash, 1111 * check that the end result is a directory. 1112 */ 1113 if ((cnp->cn_flags & TRAILINGSLASH) && dp->v_type != VDIR) { 1114 error = ENOTDIR; 1115 goto bad2; 1116 } 1117 /* 1118 * Disallow directory write attempts on read-only filesystems. 1119 */ 1120 if (rdonly && 1121 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { 1122 error = EROFS; 1123 goto bad2; 1124 } 1125 if (cnp->cn_flags & SAVESTART) { 1126 ndp->ni_startdir = ndp->ni_dvp; 1127 VREF(ndp->ni_startdir); 1128 } 1129 if (!wantparent) { 1130 ni_dvp_unlocked = 2; 1131 if (ndp->ni_dvp != dp) 1132 vput(ndp->ni_dvp); 1133 else 1134 vrele(ndp->ni_dvp); 1135 } else if ((cnp->cn_flags & LOCKPARENT) == 0 && ndp->ni_dvp != dp) { 1136 VOP_UNLOCK(ndp->ni_dvp, 0); 1137 ni_dvp_unlocked = 1; 1138 } 1139 1140 if (cnp->cn_flags & AUDITVNODE1) 1141 AUDIT_ARG_VNODE1(dp); 1142 else if (cnp->cn_flags & AUDITVNODE2) 1143 AUDIT_ARG_VNODE2(dp); 1144 1145 if ((cnp->cn_flags & LOCKLEAF) == 0) 1146 VOP_UNLOCK(dp, 0); 1147success: 1148 /* 1149 * Because of shared lookup we may have the vnode shared locked, but 1150 * the caller may want it to be exclusively locked. 1151 */ 1152 if (needs_exclusive_leaf(dp->v_mount, cnp->cn_flags) && 1153 VOP_ISLOCKED(dp) != LK_EXCLUSIVE) { 1154 vn_lock(dp, LK_UPGRADE | LK_RETRY); 1155 if (dp->v_iflag & VI_DOOMED) { 1156 error = ENOENT; 1157 goto bad2; 1158 } 1159 } 1160 if (ndp->ni_vp != NULL && ndp->ni_vp->v_type == VDIR) { 1161 if ((cnp->cn_flags & ISDOTDOT) == 0) 1162 nameicap_tracker_add(ndp, ndp->ni_vp); 1163 } 1164 return (0); 1165 1166bad2: 1167 if (ni_dvp_unlocked != 2) { 1168 if (dp != ndp->ni_dvp && !ni_dvp_unlocked) 1169 vput(ndp->ni_dvp); 1170 else 1171 vrele(ndp->ni_dvp); 1172 } 1173bad: 1174 if (!dpunlocked) 1175 vput(dp); 1176 ndp->ni_vp = NULL; 1177 return (error); 1178} 1179 1180/* 1181 * relookup - lookup a path name component 1182 * Used by lookup to re-acquire things. 1183 */ 1184int 1185relookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp) 1186{ 1187 struct vnode *dp = NULL; /* the directory we are searching */ 1188 int wantparent; /* 1 => wantparent or lockparent flag */ 1189 int rdonly; /* lookup read-only flag bit */ 1190 int error = 0; 1191 1192 KASSERT(cnp->cn_flags & ISLASTCN, 1193 ("relookup: Not given last component.")); 1194 /* 1195 * Setup: break out flag bits into variables. 1196 */ 1197 wantparent = cnp->cn_flags & (LOCKPARENT|WANTPARENT); 1198 KASSERT(wantparent, ("relookup: parent not wanted.")); 1199 rdonly = cnp->cn_flags & RDONLY; 1200 cnp->cn_flags &= ~ISSYMLINK; 1201 dp = dvp; 1202 cnp->cn_lkflags = LK_EXCLUSIVE; 1203 vn_lock(dp, LK_EXCLUSIVE | LK_RETRY); 1204 1205 /* 1206 * Search a new directory. 1207 * 1208 * The last component of the filename is left accessible via 1209 * cnp->cn_nameptr for callers that need the name. Callers needing 1210 * the name set the SAVENAME flag. When done, they assume 1211 * responsibility for freeing the pathname buffer. 1212 */ 1213#ifdef NAMEI_DIAGNOSTIC 1214 printf("{%s}: ", cnp->cn_nameptr); 1215#endif 1216 1217 /* 1218 * Check for "" which represents the root directory after slash 1219 * removal. 1220 */ 1221 if (cnp->cn_nameptr[0] == '\0') { 1222 /* 1223 * Support only LOOKUP for "/" because lookup() 1224 * can't succeed for CREATE, DELETE and RENAME. 1225 */ 1226 KASSERT(cnp->cn_nameiop == LOOKUP, ("nameiop must be LOOKUP")); 1227 KASSERT(dp->v_type == VDIR, ("dp is not a directory")); 1228 1229 if (!(cnp->cn_flags & LOCKLEAF)) 1230 VOP_UNLOCK(dp, 0); 1231 *vpp = dp; 1232 /* XXX This should probably move to the top of function. */ 1233 if (cnp->cn_flags & SAVESTART) 1234 panic("lookup: SAVESTART"); 1235 return (0); 1236 } 1237 1238 if (cnp->cn_flags & ISDOTDOT) 1239 panic ("relookup: lookup on dot-dot"); 1240 1241 /* 1242 * We now have a segment name to search for, and a directory to search. 1243 */ 1244#ifdef NAMEI_DIAGNOSTIC 1245 vn_printf(dp, "search in "); 1246#endif 1247 if ((error = VOP_LOOKUP(dp, vpp, cnp)) != 0) { 1248 KASSERT(*vpp == NULL, ("leaf should be empty")); 1249 if (error != EJUSTRETURN) 1250 goto bad; 1251 /* 1252 * If creating and at end of pathname, then can consider 1253 * allowing file to be created. 1254 */ 1255 if (rdonly) { 1256 error = EROFS; 1257 goto bad; 1258 } 1259 /* ASSERT(dvp == ndp->ni_startdir) */ 1260 if (cnp->cn_flags & SAVESTART) 1261 VREF(dvp); 1262 if ((cnp->cn_flags & LOCKPARENT) == 0) 1263 VOP_UNLOCK(dp, 0); 1264 /* 1265 * We return with ni_vp NULL to indicate that the entry 1266 * doesn't currently exist, leaving a pointer to the 1267 * (possibly locked) directory vnode in ndp->ni_dvp. 1268 */ 1269 return (0); 1270 } 1271 1272 dp = *vpp; 1273 1274 /* 1275 * Disallow directory write attempts on read-only filesystems. 1276 */ 1277 if (rdonly && 1278 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { 1279 if (dvp == dp) 1280 vrele(dvp); 1281 else 1282 vput(dvp); 1283 error = EROFS; 1284 goto bad; 1285 } 1286 /* 1287 * Set the parent lock/ref state to the requested state. 1288 */ 1289 if ((cnp->cn_flags & LOCKPARENT) == 0 && dvp != dp) { 1290 if (wantparent) 1291 VOP_UNLOCK(dvp, 0); 1292 else 1293 vput(dvp); 1294 } else if (!wantparent) 1295 vrele(dvp); 1296 /* 1297 * Check for symbolic link 1298 */ 1299 KASSERT(dp->v_type != VLNK || !(cnp->cn_flags & FOLLOW), 1300 ("relookup: symlink found.\n")); 1301 1302 /* ASSERT(dvp == ndp->ni_startdir) */ 1303 if (cnp->cn_flags & SAVESTART) 1304 VREF(dvp); 1305 1306 if ((cnp->cn_flags & LOCKLEAF) == 0) 1307 VOP_UNLOCK(dp, 0); 1308 return (0); 1309bad: 1310 vput(dp); 1311 *vpp = NULL; 1312 return (error); 1313} 1314 1315void 1316NDINIT_ALL(struct nameidata *ndp, u_long op, u_long flags, enum uio_seg segflg, 1317 const char *namep, int dirfd, struct vnode *startdir, cap_rights_t *rightsp, 1318 struct thread *td) 1319{ 1320 1321 ndp->ni_cnd.cn_nameiop = op; 1322 ndp->ni_cnd.cn_flags = flags; 1323 ndp->ni_segflg = segflg; 1324 ndp->ni_dirp = namep; 1325 ndp->ni_dirfd = dirfd; 1326 ndp->ni_startdir = startdir; 1327 ndp->ni_resflags = 0; 1328 if (rightsp != NULL) 1329 ndp->ni_rightsneeded = *rightsp; 1330 else 1331 cap_rights_init(&ndp->ni_rightsneeded); 1332 filecaps_init(&ndp->ni_filecaps); 1333 ndp->ni_cnd.cn_thread = td; 1334} 1335 1336/* 1337 * Free data allocated by namei(); see namei(9) for details. 1338 */ 1339void 1340NDFREE(struct nameidata *ndp, const u_int flags) 1341{ 1342 int unlock_dvp; 1343 int unlock_vp; 1344 1345 unlock_dvp = 0; 1346 unlock_vp = 0; 1347 1348 if (!(flags & NDF_NO_FREE_PNBUF) && 1349 (ndp->ni_cnd.cn_flags & HASBUF)) { 1350 uma_zfree(namei_zone, ndp->ni_cnd.cn_pnbuf); 1351 ndp->ni_cnd.cn_flags &= ~HASBUF; 1352 } 1353 if (!(flags & NDF_NO_VP_UNLOCK) && 1354 (ndp->ni_cnd.cn_flags & LOCKLEAF) && ndp->ni_vp) 1355 unlock_vp = 1; 1356 if (!(flags & NDF_NO_DVP_UNLOCK) && 1357 (ndp->ni_cnd.cn_flags & LOCKPARENT) && 1358 ndp->ni_dvp != ndp->ni_vp) 1359 unlock_dvp = 1; 1360 if (!(flags & NDF_NO_VP_RELE) && ndp->ni_vp) { 1361 if (unlock_vp) { 1362 vput(ndp->ni_vp); 1363 unlock_vp = 0; 1364 } else 1365 vrele(ndp->ni_vp); 1366 ndp->ni_vp = NULL; 1367 } 1368 if (unlock_vp) 1369 VOP_UNLOCK(ndp->ni_vp, 0); 1370 if (!(flags & NDF_NO_DVP_RELE) && 1371 (ndp->ni_cnd.cn_flags & (LOCKPARENT|WANTPARENT))) { 1372 if (unlock_dvp) { 1373 vput(ndp->ni_dvp); 1374 unlock_dvp = 0; 1375 } else 1376 vrele(ndp->ni_dvp); 1377 ndp->ni_dvp = NULL; 1378 } 1379 if (unlock_dvp) 1380 VOP_UNLOCK(ndp->ni_dvp, 0); 1381 if (!(flags & NDF_NO_STARTDIR_RELE) && 1382 (ndp->ni_cnd.cn_flags & SAVESTART)) { 1383 vrele(ndp->ni_startdir); 1384 ndp->ni_startdir = NULL; 1385 } 1386} 1387 1388/* 1389 * Determine if there is a suitable alternate filename under the specified 1390 * prefix for the specified path. If the create flag is set, then the 1391 * alternate prefix will be used so long as the parent directory exists. 1392 * This is used by the various compatibility ABIs so that Linux binaries prefer 1393 * files under /compat/linux for example. The chosen path (whether under 1394 * the prefix or under /) is returned in a kernel malloc'd buffer pointed 1395 * to by pathbuf. The caller is responsible for free'ing the buffer from 1396 * the M_TEMP bucket if one is returned. 1397 */ 1398int 1399kern_alternate_path(struct thread *td, const char *prefix, const char *path, 1400 enum uio_seg pathseg, char **pathbuf, int create, int dirfd) 1401{ 1402 struct nameidata nd, ndroot; 1403 char *ptr, *buf, *cp; 1404 size_t len, sz; 1405 int error; 1406 1407 buf = (char *) malloc(MAXPATHLEN, M_TEMP, M_WAITOK); 1408 *pathbuf = buf; 1409 1410 /* Copy the prefix into the new pathname as a starting point. */ 1411 len = strlcpy(buf, prefix, MAXPATHLEN); 1412 if (len >= MAXPATHLEN) { 1413 *pathbuf = NULL; 1414 free(buf, M_TEMP); 1415 return (EINVAL); 1416 } 1417 sz = MAXPATHLEN - len; 1418 ptr = buf + len; 1419 1420 /* Append the filename to the prefix. */ 1421 if (pathseg == UIO_SYSSPACE) 1422 error = copystr(path, ptr, sz, &len); 1423 else 1424 error = copyinstr(path, ptr, sz, &len); 1425 1426 if (error) { 1427 *pathbuf = NULL; 1428 free(buf, M_TEMP); 1429 return (error); 1430 } 1431 1432 /* Only use a prefix with absolute pathnames. */ 1433 if (*ptr != '/') { 1434 error = EINVAL; 1435 goto keeporig; 1436 } 1437 1438 if (dirfd != AT_FDCWD) { 1439 /* 1440 * We want the original because the "prefix" is 1441 * included in the already opened dirfd. 1442 */ 1443 bcopy(ptr, buf, len); 1444 return (0); 1445 } 1446 1447 /* 1448 * We know that there is a / somewhere in this pathname. 1449 * Search backwards for it, to find the file's parent dir 1450 * to see if it exists in the alternate tree. If it does, 1451 * and we want to create a file (cflag is set). We don't 1452 * need to worry about the root comparison in this case. 1453 */ 1454 1455 if (create) { 1456 for (cp = &ptr[len] - 1; *cp != '/'; cp--); 1457 *cp = '\0'; 1458 1459 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, buf, td); 1460 error = namei(&nd); 1461 *cp = '/'; 1462 if (error != 0) 1463 goto keeporig; 1464 } else { 1465 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, buf, td); 1466 1467 error = namei(&nd); 1468 if (error != 0) 1469 goto keeporig; 1470 1471 /* 1472 * We now compare the vnode of the prefix to the one 1473 * vnode asked. If they resolve to be the same, then we 1474 * ignore the match so that the real root gets used. 1475 * This avoids the problem of traversing "../.." to find the 1476 * root directory and never finding it, because "/" resolves 1477 * to the emulation root directory. This is expensive :-( 1478 */ 1479 NDINIT(&ndroot, LOOKUP, FOLLOW, UIO_SYSSPACE, prefix, 1480 td); 1481 1482 /* We shouldn't ever get an error from this namei(). */ 1483 error = namei(&ndroot); 1484 if (error == 0) { 1485 if (nd.ni_vp == ndroot.ni_vp) 1486 error = ENOENT; 1487 1488 NDFREE(&ndroot, NDF_ONLY_PNBUF); 1489 vrele(ndroot.ni_vp); 1490 } 1491 } 1492 1493 NDFREE(&nd, NDF_ONLY_PNBUF); 1494 vrele(nd.ni_vp); 1495 1496keeporig: 1497 /* If there was an error, use the original path name. */ 1498 if (error) 1499 bcopy(ptr, buf, len); 1500 return (error); 1501} 1502