vfs_lookup.c revision 273415
1/*- 2 * Copyright (c) 1982, 1986, 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_lookup.c 8.4 (Berkeley) 2/16/94 35 */ 36 37#include <sys/cdefs.h> 38__FBSDID("$FreeBSD: releng/10.0/sys/kern/vfs_lookup.c 273415 2014-10-21 20:21:10Z delphij $"); 39 40#include "opt_capsicum.h" 41#include "opt_kdtrace.h" 42#include "opt_ktrace.h" 43 44#include <sys/param.h> 45#include <sys/systm.h> 46#include <sys/kernel.h> 47#include <sys/capability.h> 48#include <sys/fcntl.h> 49#include <sys/jail.h> 50#include <sys/lock.h> 51#include <sys/mutex.h> 52#include <sys/namei.h> 53#include <sys/vnode.h> 54#include <sys/mount.h> 55#include <sys/filedesc.h> 56#include <sys/proc.h> 57#include <sys/sdt.h> 58#include <sys/syscallsubr.h> 59#include <sys/sysctl.h> 60#ifdef KTRACE 61#include <sys/ktrace.h> 62#endif 63 64#include <security/audit/audit.h> 65#include <security/mac/mac_framework.h> 66 67#include <vm/uma.h> 68 69#define NAMEI_DIAGNOSTIC 1 70#undef NAMEI_DIAGNOSTIC 71 72SDT_PROVIDER_DECLARE(vfs); 73SDT_PROBE_DEFINE3(vfs, namei, lookup, entry, entry, "struct vnode *", "char *", 74 "unsigned long"); 75SDT_PROBE_DEFINE2(vfs, namei, lookup, return, return, "int", "struct vnode *"); 76 77/* 78 * Allocation zone for namei 79 */ 80uma_zone_t namei_zone; 81/* 82 * Placeholder vnode for mp traversal 83 */ 84static struct vnode *vp_crossmp; 85 86static void 87nameiinit(void *dummy __unused) 88{ 89 90 namei_zone = uma_zcreate("NAMEI", MAXPATHLEN, NULL, NULL, NULL, NULL, 91 UMA_ALIGN_PTR, 0); 92 getnewvnode("crossmp", NULL, &dead_vnodeops, &vp_crossmp); 93 vn_lock(vp_crossmp, LK_EXCLUSIVE); 94 VN_LOCK_ASHARE(vp_crossmp); 95 VOP_UNLOCK(vp_crossmp, 0); 96} 97SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nameiinit, NULL); 98 99static int lookup_shared = 1; 100SYSCTL_INT(_vfs, OID_AUTO, lookup_shared, CTLFLAG_RW, &lookup_shared, 0, 101 "Enables/Disables shared locks for path name translation"); 102TUNABLE_INT("vfs.lookup_shared", &lookup_shared); 103 104/* 105 * Convert a pathname into a pointer to a locked vnode. 106 * 107 * The FOLLOW flag is set when symbolic links are to be followed 108 * when they occur at the end of the name translation process. 109 * Symbolic links are always followed for all other pathname 110 * components other than the last. 111 * 112 * The segflg defines whether the name is to be copied from user 113 * space or kernel space. 114 * 115 * Overall outline of namei: 116 * 117 * copy in name 118 * get starting directory 119 * while (!done && !error) { 120 * call lookup to search path. 121 * if symbolic link, massage name in buffer and continue 122 * } 123 */ 124static void 125namei_cleanup_cnp(struct componentname *cnp) 126{ 127 uma_zfree(namei_zone, cnp->cn_pnbuf); 128#ifdef DIAGNOSTIC 129 cnp->cn_pnbuf = NULL; 130 cnp->cn_nameptr = NULL; 131#endif 132} 133 134int 135namei(struct nameidata *ndp) 136{ 137 struct filedesc *fdp; /* pointer to file descriptor state */ 138 char *cp; /* pointer into pathname argument */ 139 struct vnode *dp; /* the directory we are searching */ 140 struct iovec aiov; /* uio for reading symbolic links */ 141 struct uio auio; 142 int error, linklen; 143 struct componentname *cnp = &ndp->ni_cnd; 144 struct thread *td = cnp->cn_thread; 145 struct proc *p = td->td_proc; 146 147 ndp->ni_cnd.cn_cred = ndp->ni_cnd.cn_thread->td_ucred; 148 KASSERT(cnp->cn_cred && p, ("namei: bad cred/proc")); 149 KASSERT((cnp->cn_nameiop & (~OPMASK)) == 0, 150 ("namei: nameiop contaminated with flags")); 151 KASSERT((cnp->cn_flags & OPMASK) == 0, 152 ("namei: flags contaminated with nameiops")); 153 if (!lookup_shared) 154 cnp->cn_flags &= ~LOCKSHARED; 155 fdp = p->p_fd; 156 157 /* We will set this ourselves if we need it. */ 158 cnp->cn_flags &= ~TRAILINGSLASH; 159 160 /* 161 * Get a buffer for the name to be translated, and copy the 162 * name into the buffer. 163 */ 164 if ((cnp->cn_flags & HASBUF) == 0) 165 cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK); 166 if (ndp->ni_segflg == UIO_SYSSPACE) 167 error = copystr(ndp->ni_dirp, cnp->cn_pnbuf, 168 MAXPATHLEN, (size_t *)&ndp->ni_pathlen); 169 else 170 error = copyinstr(ndp->ni_dirp, cnp->cn_pnbuf, 171 MAXPATHLEN, (size_t *)&ndp->ni_pathlen); 172 173 /* 174 * Don't allow empty pathnames. 175 */ 176 if (!error && *cnp->cn_pnbuf == '\0') 177 error = ENOENT; 178 179#ifdef CAPABILITY_MODE 180 /* 181 * In capability mode, lookups must be "strictly relative" (i.e. 182 * not an absolute path, and not containing '..' components) to 183 * a real file descriptor, not the pseudo-descriptor AT_FDCWD. 184 */ 185 if (error == 0 && IN_CAPABILITY_MODE(td) && 186 (cnp->cn_flags & NOCAPCHECK) == 0) { 187 ndp->ni_strictrelative = 1; 188 if (ndp->ni_dirfd == AT_FDCWD) { 189#ifdef KTRACE 190 if (KTRPOINT(td, KTR_CAPFAIL)) 191 ktrcapfail(CAPFAIL_LOOKUP, NULL, NULL); 192#endif 193 error = ECAPMODE; 194 } 195 } 196#endif 197 if (error) { 198 namei_cleanup_cnp(cnp); 199 ndp->ni_vp = NULL; 200 return (error); 201 } 202 ndp->ni_loopcnt = 0; 203#ifdef KTRACE 204 if (KTRPOINT(td, KTR_NAMEI)) { 205 KASSERT(cnp->cn_thread == curthread, 206 ("namei not using curthread")); 207 ktrnamei(cnp->cn_pnbuf); 208 } 209#endif 210 /* 211 * Get starting point for the translation. 212 */ 213 FILEDESC_SLOCK(fdp); 214 ndp->ni_rootdir = fdp->fd_rdir; 215 ndp->ni_topdir = fdp->fd_jdir; 216 217 /* 218 * If we are auditing the kernel pathname, save the user pathname. 219 */ 220 if (cnp->cn_flags & AUDITVNODE1) 221 AUDIT_ARG_UPATH1(td, ndp->ni_dirfd, cnp->cn_pnbuf); 222 if (cnp->cn_flags & AUDITVNODE2) 223 AUDIT_ARG_UPATH2(td, ndp->ni_dirfd, cnp->cn_pnbuf); 224 225 dp = NULL; 226 if (cnp->cn_pnbuf[0] != '/') { 227 if (ndp->ni_startdir != NULL) { 228 dp = ndp->ni_startdir; 229 error = 0; 230 } else if (ndp->ni_dirfd != AT_FDCWD) { 231 cap_rights_t rights; 232 233 rights = ndp->ni_rightsneeded; 234 cap_rights_set(&rights, CAP_LOOKUP); 235 236 if (cnp->cn_flags & AUDITVNODE1) 237 AUDIT_ARG_ATFD1(ndp->ni_dirfd); 238 if (cnp->cn_flags & AUDITVNODE2) 239 AUDIT_ARG_ATFD2(ndp->ni_dirfd); 240 error = fgetvp_rights(td, ndp->ni_dirfd, 241 &rights, &ndp->ni_filecaps, &dp); 242#ifdef CAPABILITIES 243 /* 244 * If file descriptor doesn't have all rights, 245 * all lookups relative to it must also be 246 * strictly relative. 247 */ 248 CAP_ALL(&rights); 249 if (!cap_rights_contains(&ndp->ni_filecaps.fc_rights, 250 &rights) || 251 ndp->ni_filecaps.fc_fcntls != CAP_FCNTL_ALL || 252 ndp->ni_filecaps.fc_nioctls != -1) { 253 ndp->ni_strictrelative = 1; 254 } 255#endif 256 } 257 if (error != 0 || dp != NULL) { 258 FILEDESC_SUNLOCK(fdp); 259 if (error == 0 && dp->v_type != VDIR) { 260 vrele(dp); 261 error = ENOTDIR; 262 } 263 } 264 if (error) { 265 namei_cleanup_cnp(cnp); 266 return (error); 267 } 268 } 269 if (dp == NULL) { 270 dp = fdp->fd_cdir; 271 VREF(dp); 272 FILEDESC_SUNLOCK(fdp); 273 if (ndp->ni_startdir != NULL) 274 vrele(ndp->ni_startdir); 275 } 276 SDT_PROBE(vfs, namei, lookup, entry, dp, cnp->cn_pnbuf, 277 cnp->cn_flags, 0, 0); 278 for (;;) { 279 /* 280 * Check if root directory should replace current directory. 281 * Done at start of translation and after symbolic link. 282 */ 283 cnp->cn_nameptr = cnp->cn_pnbuf; 284 if (*(cnp->cn_nameptr) == '/') { 285 vrele(dp); 286 if (ndp->ni_strictrelative != 0) { 287#ifdef KTRACE 288 if (KTRPOINT(curthread, KTR_CAPFAIL)) 289 ktrcapfail(CAPFAIL_LOOKUP, NULL, NULL); 290#endif 291 namei_cleanup_cnp(cnp); 292 return (ENOTCAPABLE); 293 } 294 while (*(cnp->cn_nameptr) == '/') { 295 cnp->cn_nameptr++; 296 ndp->ni_pathlen--; 297 } 298 dp = ndp->ni_rootdir; 299 VREF(dp); 300 } 301 ndp->ni_startdir = dp; 302 error = lookup(ndp); 303 if (error) { 304 namei_cleanup_cnp(cnp); 305 SDT_PROBE(vfs, namei, lookup, return, error, NULL, 0, 306 0, 0); 307 return (error); 308 } 309 /* 310 * If not a symbolic link, we're done. 311 */ 312 if ((cnp->cn_flags & ISSYMLINK) == 0) { 313 if ((cnp->cn_flags & (SAVENAME | SAVESTART)) == 0) { 314 namei_cleanup_cnp(cnp); 315 } else 316 cnp->cn_flags |= HASBUF; 317 318 SDT_PROBE(vfs, namei, lookup, return, 0, ndp->ni_vp, 319 0, 0, 0); 320 return (0); 321 } 322 if (ndp->ni_loopcnt++ >= MAXSYMLINKS) { 323 error = ELOOP; 324 break; 325 } 326#ifdef MAC 327 if ((cnp->cn_flags & NOMACCHECK) == 0) { 328 error = mac_vnode_check_readlink(td->td_ucred, 329 ndp->ni_vp); 330 if (error) 331 break; 332 } 333#endif 334 if (ndp->ni_pathlen > 1) 335 cp = uma_zalloc(namei_zone, M_WAITOK); 336 else 337 cp = cnp->cn_pnbuf; 338 aiov.iov_base = cp; 339 aiov.iov_len = MAXPATHLEN; 340 auio.uio_iov = &aiov; 341 auio.uio_iovcnt = 1; 342 auio.uio_offset = 0; 343 auio.uio_rw = UIO_READ; 344 auio.uio_segflg = UIO_SYSSPACE; 345 auio.uio_td = td; 346 auio.uio_resid = MAXPATHLEN; 347 error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred); 348 if (error) { 349 if (ndp->ni_pathlen > 1) 350 uma_zfree(namei_zone, cp); 351 break; 352 } 353 linklen = MAXPATHLEN - auio.uio_resid; 354 if (linklen == 0) { 355 if (ndp->ni_pathlen > 1) 356 uma_zfree(namei_zone, cp); 357 error = ENOENT; 358 break; 359 } 360 if (linklen + ndp->ni_pathlen >= MAXPATHLEN) { 361 if (ndp->ni_pathlen > 1) 362 uma_zfree(namei_zone, cp); 363 error = ENAMETOOLONG; 364 break; 365 } 366 if (ndp->ni_pathlen > 1) { 367 bcopy(ndp->ni_next, cp + linklen, ndp->ni_pathlen); 368 uma_zfree(namei_zone, cnp->cn_pnbuf); 369 cnp->cn_pnbuf = cp; 370 } else 371 cnp->cn_pnbuf[linklen] = '\0'; 372 ndp->ni_pathlen += linklen; 373 vput(ndp->ni_vp); 374 dp = ndp->ni_dvp; 375 } 376 namei_cleanup_cnp(cnp); 377 vput(ndp->ni_vp); 378 ndp->ni_vp = NULL; 379 vrele(ndp->ni_dvp); 380 SDT_PROBE(vfs, namei, lookup, return, error, NULL, 0, 0, 0); 381 return (error); 382} 383 384static int 385compute_cn_lkflags(struct mount *mp, int lkflags, int cnflags) 386{ 387 388 if (mp == NULL || ((lkflags & LK_SHARED) && 389 (!(mp->mnt_kern_flag & MNTK_LOOKUP_SHARED) || 390 ((cnflags & ISDOTDOT) && 391 (mp->mnt_kern_flag & MNTK_LOOKUP_EXCL_DOTDOT))))) { 392 lkflags &= ~LK_SHARED; 393 lkflags |= LK_EXCLUSIVE; 394 } 395 return (lkflags); 396} 397 398static __inline int 399needs_exclusive_leaf(struct mount *mp, int flags) 400{ 401 402 /* 403 * Intermediate nodes can use shared locks, we only need to 404 * force an exclusive lock for leaf nodes. 405 */ 406 if ((flags & (ISLASTCN | LOCKLEAF)) != (ISLASTCN | LOCKLEAF)) 407 return (0); 408 409 /* Always use exclusive locks if LOCKSHARED isn't set. */ 410 if (!(flags & LOCKSHARED)) 411 return (1); 412 413 /* 414 * For lookups during open(), if the mount point supports 415 * extended shared operations, then use a shared lock for the 416 * leaf node, otherwise use an exclusive lock. 417 */ 418 if (flags & ISOPEN) { 419 if (mp != NULL && 420 (mp->mnt_kern_flag & MNTK_EXTENDED_SHARED)) 421 return (0); 422 else 423 return (1); 424 } 425 426 /* 427 * Lookup requests outside of open() that specify LOCKSHARED 428 * only need a shared lock on the leaf vnode. 429 */ 430 return (0); 431} 432 433/* 434 * Search a pathname. 435 * This is a very central and rather complicated routine. 436 * 437 * The pathname is pointed to by ni_ptr and is of length ni_pathlen. 438 * The starting directory is taken from ni_startdir. The pathname is 439 * descended until done, or a symbolic link is encountered. The variable 440 * ni_more is clear if the path is completed; it is set to one if a 441 * symbolic link needing interpretation is encountered. 442 * 443 * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on 444 * whether the name is to be looked up, created, renamed, or deleted. 445 * When CREATE, RENAME, or DELETE is specified, information usable in 446 * creating, renaming, or deleting a directory entry may be calculated. 447 * If flag has LOCKPARENT or'ed into it, the parent directory is returned 448 * locked. If flag has WANTPARENT or'ed into it, the parent directory is 449 * returned unlocked. Otherwise the parent directory is not returned. If 450 * the target of the pathname exists and LOCKLEAF is or'ed into the flag 451 * the target is returned locked, otherwise it is returned unlocked. 452 * When creating or renaming and LOCKPARENT is specified, the target may not 453 * be ".". When deleting and LOCKPARENT is specified, the target may be ".". 454 * 455 * Overall outline of lookup: 456 * 457 * dirloop: 458 * identify next component of name at ndp->ni_ptr 459 * handle degenerate case where name is null string 460 * if .. and crossing mount points and on mounted filesys, find parent 461 * call VOP_LOOKUP routine for next component name 462 * directory vnode returned in ni_dvp, unlocked unless LOCKPARENT set 463 * component vnode returned in ni_vp (if it exists), locked. 464 * if result vnode is mounted on and crossing mount points, 465 * find mounted on vnode 466 * if more components of name, do next level at dirloop 467 * return the answer in ni_vp, locked if LOCKLEAF set 468 * if LOCKPARENT set, return locked parent in ni_dvp 469 * if WANTPARENT set, return unlocked parent in ni_dvp 470 */ 471int 472lookup(struct nameidata *ndp) 473{ 474 char *cp; /* pointer into pathname argument */ 475 struct vnode *dp = 0; /* the directory we are searching */ 476 struct vnode *tdp; /* saved dp */ 477 struct mount *mp; /* mount table entry */ 478 struct prison *pr; 479 int docache; /* == 0 do not cache last component */ 480 int wantparent; /* 1 => wantparent or lockparent flag */ 481 int rdonly; /* lookup read-only flag bit */ 482 int error = 0; 483 int dpunlocked = 0; /* dp has already been unlocked */ 484 struct componentname *cnp = &ndp->ni_cnd; 485 int lkflags_save; 486 int ni_dvp_unlocked; 487 488 /* 489 * Setup: break out flag bits into variables. 490 */ 491 ni_dvp_unlocked = 0; 492 wantparent = cnp->cn_flags & (LOCKPARENT | WANTPARENT); 493 KASSERT(cnp->cn_nameiop == LOOKUP || wantparent, 494 ("CREATE, DELETE, RENAME require LOCKPARENT or WANTPARENT.")); 495 docache = (cnp->cn_flags & NOCACHE) ^ NOCACHE; 496 if (cnp->cn_nameiop == DELETE || 497 (wantparent && cnp->cn_nameiop != CREATE && 498 cnp->cn_nameiop != LOOKUP)) 499 docache = 0; 500 rdonly = cnp->cn_flags & RDONLY; 501 cnp->cn_flags &= ~ISSYMLINK; 502 ndp->ni_dvp = NULL; 503 /* 504 * We use shared locks until we hit the parent of the last cn then 505 * we adjust based on the requesting flags. 506 */ 507 if (lookup_shared) 508 cnp->cn_lkflags = LK_SHARED; 509 else 510 cnp->cn_lkflags = LK_EXCLUSIVE; 511 dp = ndp->ni_startdir; 512 ndp->ni_startdir = NULLVP; 513 vn_lock(dp, 514 compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | LK_RETRY, 515 cnp->cn_flags)); 516 517dirloop: 518 /* 519 * Search a new directory. 520 * 521 * The last component of the filename is left accessible via 522 * cnp->cn_nameptr for callers that need the name. Callers needing 523 * the name set the SAVENAME flag. When done, they assume 524 * responsibility for freeing the pathname buffer. 525 */ 526 cnp->cn_consume = 0; 527 for (cp = cnp->cn_nameptr; *cp != 0 && *cp != '/'; cp++) 528 continue; 529 cnp->cn_namelen = cp - cnp->cn_nameptr; 530 if (cnp->cn_namelen > NAME_MAX) { 531 error = ENAMETOOLONG; 532 goto bad; 533 } 534#ifdef NAMEI_DIAGNOSTIC 535 { char c = *cp; 536 *cp = '\0'; 537 printf("{%s}: ", cnp->cn_nameptr); 538 *cp = c; } 539#endif 540 ndp->ni_pathlen -= cnp->cn_namelen; 541 ndp->ni_next = cp; 542 543 /* 544 * Replace multiple slashes by a single slash and trailing slashes 545 * by a null. This must be done before VOP_LOOKUP() because some 546 * fs's don't know about trailing slashes. Remember if there were 547 * trailing slashes to handle symlinks, existing non-directories 548 * and non-existing files that won't be directories specially later. 549 */ 550 while (*cp == '/' && (cp[1] == '/' || cp[1] == '\0')) { 551 cp++; 552 ndp->ni_pathlen--; 553 if (*cp == '\0') { 554 *ndp->ni_next = '\0'; 555 cnp->cn_flags |= TRAILINGSLASH; 556 } 557 } 558 ndp->ni_next = cp; 559 560 cnp->cn_flags |= MAKEENTRY; 561 if (*cp == '\0' && docache == 0) 562 cnp->cn_flags &= ~MAKEENTRY; 563 if (cnp->cn_namelen == 2 && 564 cnp->cn_nameptr[1] == '.' && cnp->cn_nameptr[0] == '.') 565 cnp->cn_flags |= ISDOTDOT; 566 else 567 cnp->cn_flags &= ~ISDOTDOT; 568 if (*ndp->ni_next == 0) 569 cnp->cn_flags |= ISLASTCN; 570 else 571 cnp->cn_flags &= ~ISLASTCN; 572 573 if ((cnp->cn_flags & ISLASTCN) != 0 && 574 cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.' && 575 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { 576 error = EINVAL; 577 goto bad; 578 } 579 580 /* 581 * Check for degenerate name (e.g. / or "") 582 * which is a way of talking about a directory, 583 * e.g. like "/." or ".". 584 */ 585 if (cnp->cn_nameptr[0] == '\0') { 586 if (dp->v_type != VDIR) { 587 error = ENOTDIR; 588 goto bad; 589 } 590 if (cnp->cn_nameiop != LOOKUP) { 591 error = EISDIR; 592 goto bad; 593 } 594 if (wantparent) { 595 ndp->ni_dvp = dp; 596 VREF(dp); 597 } 598 ndp->ni_vp = dp; 599 600 if (cnp->cn_flags & AUDITVNODE1) 601 AUDIT_ARG_VNODE1(dp); 602 else if (cnp->cn_flags & AUDITVNODE2) 603 AUDIT_ARG_VNODE2(dp); 604 605 if (!(cnp->cn_flags & (LOCKPARENT | LOCKLEAF))) 606 VOP_UNLOCK(dp, 0); 607 /* XXX This should probably move to the top of function. */ 608 if (cnp->cn_flags & SAVESTART) 609 panic("lookup: SAVESTART"); 610 goto success; 611 } 612 613 /* 614 * Handle "..": five special cases. 615 * 0. If doing a capability lookup, return ENOTCAPABLE (this is a 616 * fairly conservative design choice, but it's the only one that we 617 * are satisfied guarantees the property we're looking for). 618 * 1. Return an error if this is the last component of 619 * the name and the operation is DELETE or RENAME. 620 * 2. If at root directory (e.g. after chroot) 621 * or at absolute root directory 622 * then ignore it so can't get out. 623 * 3. If this vnode is the root of a mounted 624 * filesystem, then replace it with the 625 * vnode which was mounted on so we take the 626 * .. in the other filesystem. 627 * 4. If the vnode is the top directory of 628 * the jail or chroot, don't let them out. 629 */ 630 if (cnp->cn_flags & ISDOTDOT) { 631 if (ndp->ni_strictrelative != 0) { 632#ifdef KTRACE 633 if (KTRPOINT(curthread, KTR_CAPFAIL)) 634 ktrcapfail(CAPFAIL_LOOKUP, NULL, NULL); 635#endif 636 error = ENOTCAPABLE; 637 goto bad; 638 } 639 if ((cnp->cn_flags & ISLASTCN) != 0 && 640 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { 641 error = EINVAL; 642 goto bad; 643 } 644 for (;;) { 645 for (pr = cnp->cn_cred->cr_prison; pr != NULL; 646 pr = pr->pr_parent) 647 if (dp == pr->pr_root) 648 break; 649 if (dp == ndp->ni_rootdir || 650 dp == ndp->ni_topdir || 651 dp == rootvnode || 652 pr != NULL || 653 ((dp->v_vflag & VV_ROOT) != 0 && 654 (cnp->cn_flags & NOCROSSMOUNT) != 0)) { 655 ndp->ni_dvp = dp; 656 ndp->ni_vp = dp; 657 VREF(dp); 658 goto nextname; 659 } 660 if ((dp->v_vflag & VV_ROOT) == 0) 661 break; 662 if (dp->v_iflag & VI_DOOMED) { /* forced unmount */ 663 error = ENOENT; 664 goto bad; 665 } 666 tdp = dp; 667 dp = dp->v_mount->mnt_vnodecovered; 668 VREF(dp); 669 vput(tdp); 670 vn_lock(dp, 671 compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | 672 LK_RETRY, ISDOTDOT)); 673 } 674 } 675 676 /* 677 * We now have a segment name to search for, and a directory to search. 678 */ 679unionlookup: 680#ifdef MAC 681 if ((cnp->cn_flags & NOMACCHECK) == 0) { 682 error = mac_vnode_check_lookup(cnp->cn_thread->td_ucred, dp, 683 cnp); 684 if (error) 685 goto bad; 686 } 687#endif 688 ndp->ni_dvp = dp; 689 ndp->ni_vp = NULL; 690 ASSERT_VOP_LOCKED(dp, "lookup"); 691 /* 692 * If we have a shared lock we may need to upgrade the lock for the 693 * last operation. 694 */ 695 if (dp != vp_crossmp && 696 VOP_ISLOCKED(dp) == LK_SHARED && 697 (cnp->cn_flags & ISLASTCN) && (cnp->cn_flags & LOCKPARENT)) 698 vn_lock(dp, LK_UPGRADE|LK_RETRY); 699 if ((dp->v_iflag & VI_DOOMED) != 0) { 700 error = ENOENT; 701 goto bad; 702 } 703 /* 704 * If we're looking up the last component and we need an exclusive 705 * lock, adjust our lkflags. 706 */ 707 if (needs_exclusive_leaf(dp->v_mount, cnp->cn_flags)) 708 cnp->cn_lkflags = LK_EXCLUSIVE; 709#ifdef NAMEI_DIAGNOSTIC 710 vprint("lookup in", dp); 711#endif 712 lkflags_save = cnp->cn_lkflags; 713 cnp->cn_lkflags = compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags, 714 cnp->cn_flags); 715 if ((error = VOP_LOOKUP(dp, &ndp->ni_vp, cnp)) != 0) { 716 cnp->cn_lkflags = lkflags_save; 717 KASSERT(ndp->ni_vp == NULL, ("leaf should be empty")); 718#ifdef NAMEI_DIAGNOSTIC 719 printf("not found\n"); 720#endif 721 if ((error == ENOENT) && 722 (dp->v_vflag & VV_ROOT) && (dp->v_mount != NULL) && 723 (dp->v_mount->mnt_flag & MNT_UNION)) { 724 tdp = dp; 725 dp = dp->v_mount->mnt_vnodecovered; 726 VREF(dp); 727 vput(tdp); 728 vn_lock(dp, 729 compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | 730 LK_RETRY, cnp->cn_flags)); 731 goto unionlookup; 732 } 733 734 if (error != EJUSTRETURN) 735 goto bad; 736 /* 737 * At this point, we know we're at the end of the 738 * pathname. If creating / renaming, we can consider 739 * allowing the file or directory to be created / renamed, 740 * provided we're not on a read-only filesystem. 741 */ 742 if (rdonly) { 743 error = EROFS; 744 goto bad; 745 } 746 /* trailing slash only allowed for directories */ 747 if ((cnp->cn_flags & TRAILINGSLASH) && 748 !(cnp->cn_flags & WILLBEDIR)) { 749 error = ENOENT; 750 goto bad; 751 } 752 if ((cnp->cn_flags & LOCKPARENT) == 0) 753 VOP_UNLOCK(dp, 0); 754 /* 755 * We return with ni_vp NULL to indicate that the entry 756 * doesn't currently exist, leaving a pointer to the 757 * (possibly locked) directory vnode in ndp->ni_dvp. 758 */ 759 if (cnp->cn_flags & SAVESTART) { 760 ndp->ni_startdir = ndp->ni_dvp; 761 VREF(ndp->ni_startdir); 762 } 763 goto success; 764 } else 765 cnp->cn_lkflags = lkflags_save; 766#ifdef NAMEI_DIAGNOSTIC 767 printf("found\n"); 768#endif 769 /* 770 * Take into account any additional components consumed by 771 * the underlying filesystem. 772 */ 773 if (cnp->cn_consume > 0) { 774 cnp->cn_nameptr += cnp->cn_consume; 775 ndp->ni_next += cnp->cn_consume; 776 ndp->ni_pathlen -= cnp->cn_consume; 777 cnp->cn_consume = 0; 778 } 779 780 dp = ndp->ni_vp; 781 782 /* 783 * Check to see if the vnode has been mounted on; 784 * if so find the root of the mounted filesystem. 785 */ 786 while (dp->v_type == VDIR && (mp = dp->v_mountedhere) && 787 (cnp->cn_flags & NOCROSSMOUNT) == 0) { 788 if (vfs_busy(mp, 0)) 789 continue; 790 vput(dp); 791 if (dp != ndp->ni_dvp) 792 vput(ndp->ni_dvp); 793 else 794 vrele(ndp->ni_dvp); 795 vref(vp_crossmp); 796 ndp->ni_dvp = vp_crossmp; 797 error = VFS_ROOT(mp, compute_cn_lkflags(mp, cnp->cn_lkflags, 798 cnp->cn_flags), &tdp); 799 vfs_unbusy(mp); 800 if (vn_lock(vp_crossmp, LK_SHARED | LK_NOWAIT)) 801 panic("vp_crossmp exclusively locked or reclaimed"); 802 if (error) { 803 dpunlocked = 1; 804 goto bad2; 805 } 806 ndp->ni_vp = dp = tdp; 807 } 808 809 /* 810 * Check for symbolic link 811 */ 812 if ((dp->v_type == VLNK) && 813 ((cnp->cn_flags & FOLLOW) || (cnp->cn_flags & TRAILINGSLASH) || 814 *ndp->ni_next == '/')) { 815 cnp->cn_flags |= ISSYMLINK; 816 if (dp->v_iflag & VI_DOOMED) { 817 /* 818 * We can't know whether the directory was mounted with 819 * NOSYMFOLLOW, so we can't follow safely. 820 */ 821 error = ENOENT; 822 goto bad2; 823 } 824 if (dp->v_mount->mnt_flag & MNT_NOSYMFOLLOW) { 825 error = EACCES; 826 goto bad2; 827 } 828 /* 829 * Symlink code always expects an unlocked dvp. 830 */ 831 if (ndp->ni_dvp != ndp->ni_vp) { 832 VOP_UNLOCK(ndp->ni_dvp, 0); 833 ni_dvp_unlocked = 1; 834 } 835 goto success; 836 } 837 838nextname: 839 /* 840 * Not a symbolic link that we will follow. Continue with the 841 * next component if there is any; otherwise, we're done. 842 */ 843 KASSERT((cnp->cn_flags & ISLASTCN) || *ndp->ni_next == '/', 844 ("lookup: invalid path state.")); 845 if (*ndp->ni_next == '/') { 846 cnp->cn_nameptr = ndp->ni_next; 847 while (*cnp->cn_nameptr == '/') { 848 cnp->cn_nameptr++; 849 ndp->ni_pathlen--; 850 } 851 if (ndp->ni_dvp != dp) 852 vput(ndp->ni_dvp); 853 else 854 vrele(ndp->ni_dvp); 855 goto dirloop; 856 } 857 /* 858 * If we're processing a path with a trailing slash, 859 * check that the end result is a directory. 860 */ 861 if ((cnp->cn_flags & TRAILINGSLASH) && dp->v_type != VDIR) { 862 error = ENOTDIR; 863 goto bad2; 864 } 865 /* 866 * Disallow directory write attempts on read-only filesystems. 867 */ 868 if (rdonly && 869 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { 870 error = EROFS; 871 goto bad2; 872 } 873 if (cnp->cn_flags & SAVESTART) { 874 ndp->ni_startdir = ndp->ni_dvp; 875 VREF(ndp->ni_startdir); 876 } 877 if (!wantparent) { 878 ni_dvp_unlocked = 2; 879 if (ndp->ni_dvp != dp) 880 vput(ndp->ni_dvp); 881 else 882 vrele(ndp->ni_dvp); 883 } else if ((cnp->cn_flags & LOCKPARENT) == 0 && ndp->ni_dvp != dp) { 884 VOP_UNLOCK(ndp->ni_dvp, 0); 885 ni_dvp_unlocked = 1; 886 } 887 888 if (cnp->cn_flags & AUDITVNODE1) 889 AUDIT_ARG_VNODE1(dp); 890 else if (cnp->cn_flags & AUDITVNODE2) 891 AUDIT_ARG_VNODE2(dp); 892 893 if ((cnp->cn_flags & LOCKLEAF) == 0) 894 VOP_UNLOCK(dp, 0); 895success: 896 /* 897 * Because of lookup_shared we may have the vnode shared locked, but 898 * the caller may want it to be exclusively locked. 899 */ 900 if (needs_exclusive_leaf(dp->v_mount, cnp->cn_flags) && 901 VOP_ISLOCKED(dp) != LK_EXCLUSIVE) { 902 vn_lock(dp, LK_UPGRADE | LK_RETRY); 903 if (dp->v_iflag & VI_DOOMED) { 904 error = ENOENT; 905 goto bad2; 906 } 907 } 908 return (0); 909 910bad2: 911 if (ni_dvp_unlocked != 2) { 912 if (dp != ndp->ni_dvp && !ni_dvp_unlocked) 913 vput(ndp->ni_dvp); 914 else 915 vrele(ndp->ni_dvp); 916 } 917bad: 918 if (!dpunlocked) 919 vput(dp); 920 ndp->ni_vp = NULL; 921 return (error); 922} 923 924/* 925 * relookup - lookup a path name component 926 * Used by lookup to re-acquire things. 927 */ 928int 929relookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp) 930{ 931 struct vnode *dp = 0; /* the directory we are searching */ 932 int wantparent; /* 1 => wantparent or lockparent flag */ 933 int rdonly; /* lookup read-only flag bit */ 934 int error = 0; 935 936 KASSERT(cnp->cn_flags & ISLASTCN, 937 ("relookup: Not given last component.")); 938 /* 939 * Setup: break out flag bits into variables. 940 */ 941 wantparent = cnp->cn_flags & (LOCKPARENT|WANTPARENT); 942 KASSERT(wantparent, ("relookup: parent not wanted.")); 943 rdonly = cnp->cn_flags & RDONLY; 944 cnp->cn_flags &= ~ISSYMLINK; 945 dp = dvp; 946 cnp->cn_lkflags = LK_EXCLUSIVE; 947 vn_lock(dp, LK_EXCLUSIVE | LK_RETRY); 948 949 /* 950 * Search a new directory. 951 * 952 * The last component of the filename is left accessible via 953 * cnp->cn_nameptr for callers that need the name. Callers needing 954 * the name set the SAVENAME flag. When done, they assume 955 * responsibility for freeing the pathname buffer. 956 */ 957#ifdef NAMEI_DIAGNOSTIC 958 printf("{%s}: ", cnp->cn_nameptr); 959#endif 960 961 /* 962 * Check for "" which represents the root directory after slash 963 * removal. 964 */ 965 if (cnp->cn_nameptr[0] == '\0') { 966 /* 967 * Support only LOOKUP for "/" because lookup() 968 * can't succeed for CREATE, DELETE and RENAME. 969 */ 970 KASSERT(cnp->cn_nameiop == LOOKUP, ("nameiop must be LOOKUP")); 971 KASSERT(dp->v_type == VDIR, ("dp is not a directory")); 972 973 if (!(cnp->cn_flags & LOCKLEAF)) 974 VOP_UNLOCK(dp, 0); 975 *vpp = dp; 976 /* XXX This should probably move to the top of function. */ 977 if (cnp->cn_flags & SAVESTART) 978 panic("lookup: SAVESTART"); 979 return (0); 980 } 981 982 if (cnp->cn_flags & ISDOTDOT) 983 panic ("relookup: lookup on dot-dot"); 984 985 /* 986 * We now have a segment name to search for, and a directory to search. 987 */ 988#ifdef NAMEI_DIAGNOSTIC 989 vprint("search in:", dp); 990#endif 991 if ((error = VOP_LOOKUP(dp, vpp, cnp)) != 0) { 992 KASSERT(*vpp == NULL, ("leaf should be empty")); 993 if (error != EJUSTRETURN) 994 goto bad; 995 /* 996 * If creating and at end of pathname, then can consider 997 * allowing file to be created. 998 */ 999 if (rdonly) { 1000 error = EROFS; 1001 goto bad; 1002 } 1003 /* ASSERT(dvp == ndp->ni_startdir) */ 1004 if (cnp->cn_flags & SAVESTART) 1005 VREF(dvp); 1006 if ((cnp->cn_flags & LOCKPARENT) == 0) 1007 VOP_UNLOCK(dp, 0); 1008 /* 1009 * We return with ni_vp NULL to indicate that the entry 1010 * doesn't currently exist, leaving a pointer to the 1011 * (possibly locked) directory vnode in ndp->ni_dvp. 1012 */ 1013 return (0); 1014 } 1015 1016 dp = *vpp; 1017 1018 /* 1019 * Disallow directory write attempts on read-only filesystems. 1020 */ 1021 if (rdonly && 1022 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { 1023 if (dvp == dp) 1024 vrele(dvp); 1025 else 1026 vput(dvp); 1027 error = EROFS; 1028 goto bad; 1029 } 1030 /* 1031 * Set the parent lock/ref state to the requested state. 1032 */ 1033 if ((cnp->cn_flags & LOCKPARENT) == 0 && dvp != dp) { 1034 if (wantparent) 1035 VOP_UNLOCK(dvp, 0); 1036 else 1037 vput(dvp); 1038 } else if (!wantparent) 1039 vrele(dvp); 1040 /* 1041 * Check for symbolic link 1042 */ 1043 KASSERT(dp->v_type != VLNK || !(cnp->cn_flags & FOLLOW), 1044 ("relookup: symlink found.\n")); 1045 1046 /* ASSERT(dvp == ndp->ni_startdir) */ 1047 if (cnp->cn_flags & SAVESTART) 1048 VREF(dvp); 1049 1050 if ((cnp->cn_flags & LOCKLEAF) == 0) 1051 VOP_UNLOCK(dp, 0); 1052 return (0); 1053bad: 1054 vput(dp); 1055 *vpp = NULL; 1056 return (error); 1057} 1058 1059void 1060NDINIT_ALL(struct nameidata *ndp, u_long op, u_long flags, enum uio_seg segflg, 1061 const char *namep, int dirfd, struct vnode *startdir, cap_rights_t *rightsp, 1062 struct thread *td) 1063{ 1064 1065 ndp->ni_cnd.cn_nameiop = op; 1066 ndp->ni_cnd.cn_flags = flags; 1067 ndp->ni_segflg = segflg; 1068 ndp->ni_dirp = namep; 1069 ndp->ni_dirfd = dirfd; 1070 ndp->ni_startdir = startdir; 1071 ndp->ni_strictrelative = 0; 1072 if (rightsp != NULL) 1073 ndp->ni_rightsneeded = *rightsp; 1074 else 1075 cap_rights_init(&ndp->ni_rightsneeded); 1076 filecaps_init(&ndp->ni_filecaps); 1077 ndp->ni_cnd.cn_thread = td; 1078} 1079 1080/* 1081 * Free data allocated by namei(); see namei(9) for details. 1082 */ 1083void 1084NDFREE(struct nameidata *ndp, const u_int flags) 1085{ 1086 int unlock_dvp; 1087 int unlock_vp; 1088 1089 unlock_dvp = 0; 1090 unlock_vp = 0; 1091 1092 if (!(flags & NDF_NO_FREE_PNBUF) && 1093 (ndp->ni_cnd.cn_flags & HASBUF)) { 1094 uma_zfree(namei_zone, ndp->ni_cnd.cn_pnbuf); 1095 ndp->ni_cnd.cn_flags &= ~HASBUF; 1096 } 1097 if (!(flags & NDF_NO_VP_UNLOCK) && 1098 (ndp->ni_cnd.cn_flags & LOCKLEAF) && ndp->ni_vp) 1099 unlock_vp = 1; 1100 if (!(flags & NDF_NO_VP_RELE) && ndp->ni_vp) { 1101 if (unlock_vp) { 1102 vput(ndp->ni_vp); 1103 unlock_vp = 0; 1104 } else 1105 vrele(ndp->ni_vp); 1106 ndp->ni_vp = NULL; 1107 } 1108 if (unlock_vp) 1109 VOP_UNLOCK(ndp->ni_vp, 0); 1110 if (!(flags & NDF_NO_DVP_UNLOCK) && 1111 (ndp->ni_cnd.cn_flags & LOCKPARENT) && 1112 ndp->ni_dvp != ndp->ni_vp) 1113 unlock_dvp = 1; 1114 if (!(flags & NDF_NO_DVP_RELE) && 1115 (ndp->ni_cnd.cn_flags & (LOCKPARENT|WANTPARENT))) { 1116 if (unlock_dvp) { 1117 vput(ndp->ni_dvp); 1118 unlock_dvp = 0; 1119 } else 1120 vrele(ndp->ni_dvp); 1121 ndp->ni_dvp = NULL; 1122 } 1123 if (unlock_dvp) 1124 VOP_UNLOCK(ndp->ni_dvp, 0); 1125 if (!(flags & NDF_NO_STARTDIR_RELE) && 1126 (ndp->ni_cnd.cn_flags & SAVESTART)) { 1127 vrele(ndp->ni_startdir); 1128 ndp->ni_startdir = NULL; 1129 } 1130} 1131 1132/* 1133 * Determine if there is a suitable alternate filename under the specified 1134 * prefix for the specified path. If the create flag is set, then the 1135 * alternate prefix will be used so long as the parent directory exists. 1136 * This is used by the various compatiblity ABIs so that Linux binaries prefer 1137 * files under /compat/linux for example. The chosen path (whether under 1138 * the prefix or under /) is returned in a kernel malloc'd buffer pointed 1139 * to by pathbuf. The caller is responsible for free'ing the buffer from 1140 * the M_TEMP bucket if one is returned. 1141 */ 1142int 1143kern_alternate_path(struct thread *td, const char *prefix, const char *path, 1144 enum uio_seg pathseg, char **pathbuf, int create, int dirfd) 1145{ 1146 struct nameidata nd, ndroot; 1147 char *ptr, *buf, *cp; 1148 size_t len, sz; 1149 int error; 1150 1151 buf = (char *) malloc(MAXPATHLEN, M_TEMP, M_WAITOK); 1152 *pathbuf = buf; 1153 1154 /* Copy the prefix into the new pathname as a starting point. */ 1155 len = strlcpy(buf, prefix, MAXPATHLEN); 1156 if (len >= MAXPATHLEN) { 1157 *pathbuf = NULL; 1158 free(buf, M_TEMP); 1159 return (EINVAL); 1160 } 1161 sz = MAXPATHLEN - len; 1162 ptr = buf + len; 1163 1164 /* Append the filename to the prefix. */ 1165 if (pathseg == UIO_SYSSPACE) 1166 error = copystr(path, ptr, sz, &len); 1167 else 1168 error = copyinstr(path, ptr, sz, &len); 1169 1170 if (error) { 1171 *pathbuf = NULL; 1172 free(buf, M_TEMP); 1173 return (error); 1174 } 1175 1176 /* Only use a prefix with absolute pathnames. */ 1177 if (*ptr != '/') { 1178 error = EINVAL; 1179 goto keeporig; 1180 } 1181 1182 if (dirfd != AT_FDCWD) { 1183 /* 1184 * We want the original because the "prefix" is 1185 * included in the already opened dirfd. 1186 */ 1187 bcopy(ptr, buf, len); 1188 return (0); 1189 } 1190 1191 /* 1192 * We know that there is a / somewhere in this pathname. 1193 * Search backwards for it, to find the file's parent dir 1194 * to see if it exists in the alternate tree. If it does, 1195 * and we want to create a file (cflag is set). We don't 1196 * need to worry about the root comparison in this case. 1197 */ 1198 1199 if (create) { 1200 for (cp = &ptr[len] - 1; *cp != '/'; cp--); 1201 *cp = '\0'; 1202 1203 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, buf, td); 1204 error = namei(&nd); 1205 *cp = '/'; 1206 if (error != 0) 1207 goto keeporig; 1208 } else { 1209 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, buf, td); 1210 1211 error = namei(&nd); 1212 if (error != 0) 1213 goto keeporig; 1214 1215 /* 1216 * We now compare the vnode of the prefix to the one 1217 * vnode asked. If they resolve to be the same, then we 1218 * ignore the match so that the real root gets used. 1219 * This avoids the problem of traversing "../.." to find the 1220 * root directory and never finding it, because "/" resolves 1221 * to the emulation root directory. This is expensive :-( 1222 */ 1223 NDINIT(&ndroot, LOOKUP, FOLLOW, UIO_SYSSPACE, prefix, 1224 td); 1225 1226 /* We shouldn't ever get an error from this namei(). */ 1227 error = namei(&ndroot); 1228 if (error == 0) { 1229 if (nd.ni_vp == ndroot.ni_vp) 1230 error = ENOENT; 1231 1232 NDFREE(&ndroot, NDF_ONLY_PNBUF); 1233 vrele(ndroot.ni_vp); 1234 } 1235 } 1236 1237 NDFREE(&nd, NDF_ONLY_PNBUF); 1238 vrele(nd.ni_vp); 1239 1240keeporig: 1241 /* If there was an error, use the original path name. */ 1242 if (error) 1243 bcopy(ptr, buf, len); 1244 return (error); 1245} 1246