vfs_lookup.c revision 193557
1139804Simp/*-
21541Srgrimes * Copyright (c) 1982, 1986, 1989, 1993
31541Srgrimes *	The Regents of the University of California.  All rights reserved.
41541Srgrimes * (c) UNIX System Laboratories, Inc.
51541Srgrimes * All or some portions of this file are derived from material licensed
61541Srgrimes * to the University of California by American Telephone and Telegraph
71541Srgrimes * Co. or Unix System Laboratories, Inc. and are reproduced herein with
81541Srgrimes * the permission of UNIX System Laboratories, Inc.
91541Srgrimes *
101541Srgrimes * Redistribution and use in source and binary forms, with or without
111541Srgrimes * modification, are permitted provided that the following conditions
121541Srgrimes * are met:
131541Srgrimes * 1. Redistributions of source code must retain the above copyright
141541Srgrimes *    notice, this list of conditions and the following disclaimer.
151541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
161541Srgrimes *    notice, this list of conditions and the following disclaimer in the
171541Srgrimes *    documentation and/or other materials provided with the distribution.
181541Srgrimes * 4. Neither the name of the University nor the names of its contributors
191541Srgrimes *    may be used to endorse or promote products derived from this software
201541Srgrimes *    without specific prior written permission.
211541Srgrimes *
221541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
231541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
241541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
251541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
261541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
271541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
281541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
291541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
301541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
311541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
321541Srgrimes * SUCH DAMAGE.
331541Srgrimes *
341541Srgrimes *	@(#)vfs_lookup.c	8.4 (Berkeley) 2/16/94
351541Srgrimes */
361541Srgrimes
37116182Sobrien#include <sys/cdefs.h>
38116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/vfs_lookup.c 193557 2009-06-06 00:49:49Z des $");
39116182Sobrien
40190759Srwatson#include "opt_kdtrace.h"
4113203Swollman#include "opt_ktrace.h"
4213203Swollman
431541Srgrimes#include <sys/param.h>
442112Swollman#include <sys/systm.h>
4569664Speter#include <sys/kernel.h>
46177785Skib#include <sys/fcntl.h>
47192895Sjamie#include <sys/jail.h>
4876166Smarkm#include <sys/lock.h>
4989316Salfred#include <sys/mutex.h>
501541Srgrimes#include <sys/namei.h>
511541Srgrimes#include <sys/vnode.h>
521541Srgrimes#include <sys/mount.h>
531541Srgrimes#include <sys/filedesc.h>
541541Srgrimes#include <sys/proc.h>
55190759Srwatson#include <sys/sdt.h>
56141471Sjhb#include <sys/syscallsubr.h>
57144613Sjeff#include <sys/sysctl.h>
581541Srgrimes#ifdef KTRACE
591541Srgrimes#include <sys/ktrace.h>
601541Srgrimes#endif
611541Srgrimes
62155334Srwatson#include <security/audit/audit.h>
63163606Srwatson#include <security/mac/mac_framework.h>
64155334Srwatson
6592751Sjeff#include <vm/uma.h>
6632011Sbde
67155168Sjeff#define	NAMEI_DIAGNOSTIC 1
68138345Sphk#undef NAMEI_DIAGNOSTIC
69138345Sphk
70190759SrwatsonSDT_PROVIDER_DECLARE(vfs);
71190759SrwatsonSDT_PROBE_DEFINE3(vfs, namei, lookup, entry, "struct vnode *", "char *",
72190759Srwatson    "unsigned long");
73190759SrwatsonSDT_PROBE_DEFINE2(vfs, namei, lookup, return, "int", "struct vnode *");
74190759Srwatson
751541Srgrimes/*
7669664Speter * Allocation zone for namei
7769664Speter */
7892751Sjeffuma_zone_t namei_zone;
79166167Skib/*
80166167Skib * Placeholder vnode for mp traversal
81166167Skib */
82166167Skibstatic struct vnode *vp_crossmp;
8369664Speter
8469664Speterstatic void
8569664Speternameiinit(void *dummy __unused)
8669664Speter{
87168138Srwatson	int error;
88168138Srwatson
8992654Sjeff	namei_zone = uma_zcreate("NAMEI", MAXPATHLEN, NULL, NULL, NULL, NULL,
9092654Sjeff	    UMA_ALIGN_PTR, 0);
91168138Srwatson	error = getnewvnode("crossmp", NULL, &dead_vnodeops, &vp_crossmp);
92168138Srwatson	if (error != 0)
93168138Srwatson		panic("nameiinit: getnewvnode");
94176519Sattilio	VN_LOCK_ASHARE(vp_crossmp);
9569664Speter}
96177253SrwatsonSYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nameiinit, NULL);
9769664Speter
98183520Sjhbstatic int lookup_shared = 1;
99144613SjeffSYSCTL_INT(_vfs, OID_AUTO, lookup_shared, CTLFLAG_RW, &lookup_shared, 0,
100144613Sjeff    "Enables/Disables shared locks for path name translation");
101183519SjhbTUNABLE_INT("vfs.lookup_shared", &lookup_shared);
102144613Sjeff
10369664Speter/*
104161010Srwatson * Convert a pathname into a pointer to a locked vnode.
1051541Srgrimes *
1061541Srgrimes * The FOLLOW flag is set when symbolic links are to be followed
1071541Srgrimes * when they occur at the end of the name translation process.
1081541Srgrimes * Symbolic links are always followed for all other pathname
1091541Srgrimes * components other than the last.
1101541Srgrimes *
1111541Srgrimes * The segflg defines whether the name is to be copied from user
1121541Srgrimes * space or kernel space.
1131541Srgrimes *
1141541Srgrimes * Overall outline of namei:
1151541Srgrimes *
1161541Srgrimes *	copy in name
1171541Srgrimes *	get starting directory
1181541Srgrimes *	while (!done && !error) {
1191541Srgrimes *		call lookup to search path.
1201541Srgrimes *		if symbolic link, massage name in buffer and continue
1211541Srgrimes *	}
1221541Srgrimes */
1231541Srgrimesint
124161011Srwatsonnamei(struct nameidata *ndp)
1251541Srgrimes{
126161011Srwatson	struct filedesc *fdp;	/* pointer to file descriptor state */
127161011Srwatson	char *cp;		/* pointer into pathname argument */
128161011Srwatson	struct vnode *dp;	/* the directory we are searching */
1291541Srgrimes	struct iovec aiov;		/* uio for reading symbolic links */
1301541Srgrimes	struct uio auio;
1311541Srgrimes	int error, linklen;
1321541Srgrimes	struct componentname *cnp = &ndp->ni_cnd;
13383366Sjulian	struct thread *td = cnp->cn_thread;
13483366Sjulian	struct proc *p = td->td_proc;
135140714Sjeff	int vfslocked;
1361541Srgrimes
137150164Scsjp	KASSERT((cnp->cn_flags & MPSAFE) != 0 || mtx_owned(&Giant) != 0,
138150164Scsjp	    ("NOT MPSAFE and Giant not held"));
13991419Sjhb	ndp->ni_cnd.cn_cred = ndp->ni_cnd.cn_thread->td_ucred;
14083366Sjulian	KASSERT(cnp->cn_cred && p, ("namei: bad cred/proc"));
14142408Seivind	KASSERT((cnp->cn_nameiop & (~OPMASK)) == 0,
14242453Seivind	    ("namei: nameiop contaminated with flags"));
14342408Seivind	KASSERT((cnp->cn_flags & OPMASK) == 0,
14442453Seivind	    ("namei: flags contaminated with nameiops"));
145144613Sjeff	if (!lookup_shared)
146144613Sjeff		cnp->cn_flags &= ~LOCKSHARED;
14783366Sjulian	fdp = p->p_fd;
1481541Srgrimes
149193028Sdes	/* We will set this ourselves if we need it. */
150193028Sdes	cnp->cn_flags &= ~TRAILINGSLASH;
151193028Sdes
1521541Srgrimes	/*
1531541Srgrimes	 * Get a buffer for the name to be translated, and copy the
1541541Srgrimes	 * name into the buffer.
1551541Srgrimes	 */
1561541Srgrimes	if ((cnp->cn_flags & HASBUF) == 0)
157111119Simp		cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
1581541Srgrimes	if (ndp->ni_segflg == UIO_SYSSPACE)
1591541Srgrimes		error = copystr(ndp->ni_dirp, cnp->cn_pnbuf,
16036735Sdfr			    MAXPATHLEN, (size_t *)&ndp->ni_pathlen);
1611541Srgrimes	else
1621541Srgrimes		error = copyinstr(ndp->ni_dirp, cnp->cn_pnbuf,
16336735Sdfr			    MAXPATHLEN, (size_t *)&ndp->ni_pathlen);
16420069Sbde
165155334Srwatson	/* If we are auditing the kernel pathname, save the user pathname. */
166155334Srwatson	if (cnp->cn_flags & AUDITVNODE1)
167155334Srwatson		AUDIT_ARG(upath, td, cnp->cn_pnbuf, ARG_UPATH1);
168155334Srwatson	if (cnp->cn_flags & AUDITVNODE2)
169155334Srwatson		AUDIT_ARG(upath, td, cnp->cn_pnbuf, ARG_UPATH2);
170155334Srwatson
17120069Sbde	/*
17220069Sbde	 * Don't allow empty pathnames.
17320069Sbde	 */
17420069Sbde	if (!error && *cnp->cn_pnbuf == '\0')
17520069Sbde		error = ENOENT;
17620069Sbde
1771541Srgrimes	if (error) {
17892751Sjeff		uma_zfree(namei_zone, cnp->cn_pnbuf);
179100613Srwatson#ifdef DIAGNOSTIC
180100613Srwatson		cnp->cn_pnbuf = NULL;
181100613Srwatson		cnp->cn_nameptr = NULL;
182100613Srwatson#endif
1831541Srgrimes		ndp->ni_vp = NULL;
1841541Srgrimes		return (error);
1851541Srgrimes	}
1861541Srgrimes	ndp->ni_loopcnt = 0;
1871541Srgrimes#ifdef KTRACE
18897994Sjhb	if (KTRPOINT(td, KTR_NAMEI)) {
18997994Sjhb		KASSERT(cnp->cn_thread == curthread,
19097994Sjhb		    ("namei not using curthread"));
19197994Sjhb		ktrnamei(cnp->cn_pnbuf);
19297994Sjhb	}
1931541Srgrimes#endif
1941541Srgrimes	/*
1951541Srgrimes	 * Get starting point for the translation.
1961541Srgrimes	 */
197168355Srwatson	FILEDESC_SLOCK(fdp);
19833360Sdyson	ndp->ni_rootdir = fdp->fd_rdir;
19951649Sphk	ndp->ni_topdir = fdp->fd_jdir;
20033360Sdyson
201185029Spjd	dp = NULL;
202185029Spjd	if (cnp->cn_pnbuf[0] != '/') {
203185029Spjd		if (ndp->ni_startdir != NULL) {
204185029Spjd			dp = ndp->ni_startdir;
205185029Spjd			error = 0;
206185029Spjd		} else if (ndp->ni_dirfd != AT_FDCWD)
207185029Spjd			error = fgetvp(td, ndp->ni_dirfd, &dp);
208185029Spjd		if (error != 0 || dp != NULL) {
209185029Spjd			FILEDESC_SUNLOCK(fdp);
210185029Spjd			if (error == 0 && dp->v_type != VDIR) {
211185029Spjd				vfslocked = VFS_LOCK_GIANT(dp->v_mount);
212185029Spjd				vrele(dp);
213185029Spjd				VFS_UNLOCK_GIANT(vfslocked);
214185029Spjd				error = ENOTDIR;
215185029Spjd			}
216177785Skib		}
217177785Skib		if (error) {
218177785Skib			uma_zfree(namei_zone, cnp->cn_pnbuf);
219177785Skib#ifdef DIAGNOSTIC
220177785Skib			cnp->cn_pnbuf = NULL;
221177785Skib			cnp->cn_nameptr = NULL;
222177785Skib#endif
223177785Skib			return (error);
224177785Skib		}
225185029Spjd	}
226185029Spjd	if (dp == NULL) {
227177785Skib		dp = fdp->fd_cdir;
228177785Skib		VREF(dp);
229177785Skib		FILEDESC_SUNLOCK(fdp);
230185029Spjd		if (ndp->ni_startdir != NULL) {
231185029Spjd			vfslocked = VFS_LOCK_GIANT(ndp->ni_startdir->v_mount);
232185029Spjd			vrele(ndp->ni_startdir);
233185029Spjd			VFS_UNLOCK_GIANT(vfslocked);
234185029Spjd		}
235177785Skib	}
236190759Srwatson	SDT_PROBE(vfs, namei, lookup, entry, dp, cnp->cn_pnbuf,
237190759Srwatson	    cnp->cn_flags, 0, 0);
238140714Sjeff	vfslocked = VFS_LOCK_GIANT(dp->v_mount);
2391541Srgrimes	for (;;) {
2401541Srgrimes		/*
2411541Srgrimes		 * Check if root directory should replace current directory.
2421541Srgrimes		 * Done at start of translation and after symbolic link.
2431541Srgrimes		 */
2441541Srgrimes		cnp->cn_nameptr = cnp->cn_pnbuf;
2451541Srgrimes		if (*(cnp->cn_nameptr) == '/') {
2461541Srgrimes			vrele(dp);
247140714Sjeff			VFS_UNLOCK_GIANT(vfslocked);
2481541Srgrimes			while (*(cnp->cn_nameptr) == '/') {
2491541Srgrimes				cnp->cn_nameptr++;
2501541Srgrimes				ndp->ni_pathlen--;
2511541Srgrimes			}
2521541Srgrimes			dp = ndp->ni_rootdir;
253140714Sjeff			vfslocked = VFS_LOCK_GIANT(dp->v_mount);
2541541Srgrimes			VREF(dp);
2551541Srgrimes		}
256140714Sjeff		if (vfslocked)
257140714Sjeff			ndp->ni_cnd.cn_flags |= GIANTHELD;
2581541Srgrimes		ndp->ni_startdir = dp;
2593148Sphk		error = lookup(ndp);
2603148Sphk		if (error) {
26192751Sjeff			uma_zfree(namei_zone, cnp->cn_pnbuf);
262100613Srwatson#ifdef DIAGNOSTIC
263100613Srwatson			cnp->cn_pnbuf = NULL;
264100613Srwatson			cnp->cn_nameptr = NULL;
265100613Srwatson#endif
266190759Srwatson			SDT_PROBE(vfs, namei, lookup, return, error, NULL, 0,
267190759Srwatson			    0, 0);
2681541Srgrimes			return (error);
2691541Srgrimes		}
270140714Sjeff		vfslocked = (ndp->ni_cnd.cn_flags & GIANTHELD) != 0;
271140714Sjeff		ndp->ni_cnd.cn_flags &= ~GIANTHELD;
2721541Srgrimes		/*
273193027Sdes		 * If not a symbolic link, we're done.
2741541Srgrimes		 */
2751541Srgrimes		if ((cnp->cn_flags & ISSYMLINK) == 0) {
276100613Srwatson			if ((cnp->cn_flags & (SAVENAME | SAVESTART)) == 0) {
27792751Sjeff				uma_zfree(namei_zone, cnp->cn_pnbuf);
278100613Srwatson#ifdef DIAGNOSTIC
279100613Srwatson				cnp->cn_pnbuf = NULL;
280100613Srwatson				cnp->cn_nameptr = NULL;
281100613Srwatson#endif
282100613Srwatson			} else
2831541Srgrimes				cnp->cn_flags |= HASBUF;
28432286Sdyson
285140714Sjeff			if ((cnp->cn_flags & MPSAFE) == 0) {
286140714Sjeff				VFS_UNLOCK_GIANT(vfslocked);
287140714Sjeff			} else if (vfslocked)
288140714Sjeff				ndp->ni_cnd.cn_flags |= GIANTHELD;
289190759Srwatson			SDT_PROBE(vfs, namei, lookup, return, 0, ndp->ni_vp,
290190759Srwatson			    0, 0, 0);
2911541Srgrimes			return (0);
2921541Srgrimes		}
2931541Srgrimes		if (ndp->ni_loopcnt++ >= MAXSYMLINKS) {
2941541Srgrimes			error = ELOOP;
2951541Srgrimes			break;
2961541Srgrimes		}
297101127Srwatson#ifdef MAC
298105479Srwatson		if ((cnp->cn_flags & NOMACCHECK) == 0) {
299172930Srwatson			error = mac_vnode_check_readlink(td->td_ucred,
300105479Srwatson			    ndp->ni_vp);
301105479Srwatson			if (error)
302105479Srwatson				break;
303105479Srwatson		}
304101127Srwatson#endif
3051541Srgrimes		if (ndp->ni_pathlen > 1)
306111119Simp			cp = uma_zalloc(namei_zone, M_WAITOK);
3071541Srgrimes		else
3081541Srgrimes			cp = cnp->cn_pnbuf;
3091541Srgrimes		aiov.iov_base = cp;
3101541Srgrimes		aiov.iov_len = MAXPATHLEN;
3111541Srgrimes		auio.uio_iov = &aiov;
3121541Srgrimes		auio.uio_iovcnt = 1;
3131541Srgrimes		auio.uio_offset = 0;
3141541Srgrimes		auio.uio_rw = UIO_READ;
3151541Srgrimes		auio.uio_segflg = UIO_SYSSPACE;
31683366Sjulian		auio.uio_td = (struct thread *)0;
3171541Srgrimes		auio.uio_resid = MAXPATHLEN;
3183148Sphk		error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred);
3193148Sphk		if (error) {
3201541Srgrimes			if (ndp->ni_pathlen > 1)
32192751Sjeff				uma_zfree(namei_zone, cp);
3221541Srgrimes			break;
3231541Srgrimes		}
3241541Srgrimes		linklen = MAXPATHLEN - auio.uio_resid;
32578692Sdillon		if (linklen == 0) {
32678692Sdillon			if (ndp->ni_pathlen > 1)
32792751Sjeff				uma_zfree(namei_zone, cp);
32878692Sdillon			error = ENOENT;
32978692Sdillon			break;
33078692Sdillon		}
3311541Srgrimes		if (linklen + ndp->ni_pathlen >= MAXPATHLEN) {
3321541Srgrimes			if (ndp->ni_pathlen > 1)
33392751Sjeff				uma_zfree(namei_zone, cp);
3341541Srgrimes			error = ENAMETOOLONG;
3351541Srgrimes			break;
3361541Srgrimes		}
3371541Srgrimes		if (ndp->ni_pathlen > 1) {
3381541Srgrimes			bcopy(ndp->ni_next, cp + linklen, ndp->ni_pathlen);
33992751Sjeff			uma_zfree(namei_zone, cnp->cn_pnbuf);
3401541Srgrimes			cnp->cn_pnbuf = cp;
3411541Srgrimes		} else
3421541Srgrimes			cnp->cn_pnbuf[linklen] = '\0';
3431541Srgrimes		ndp->ni_pathlen += linklen;
3441541Srgrimes		vput(ndp->ni_vp);
3451541Srgrimes		dp = ndp->ni_dvp;
3461541Srgrimes	}
34792751Sjeff	uma_zfree(namei_zone, cnp->cn_pnbuf);
348100613Srwatson#ifdef DIAGNOSTIC
349100613Srwatson	cnp->cn_pnbuf = NULL;
350100613Srwatson	cnp->cn_nameptr = NULL;
351100613Srwatson#endif
352144833Sjeff	vput(ndp->ni_vp);
353144833Sjeff	ndp->ni_vp = NULL;
3541541Srgrimes	vrele(ndp->ni_dvp);
355140714Sjeff	VFS_UNLOCK_GIANT(vfslocked);
356190759Srwatson	SDT_PROBE(vfs, namei, lookup, return, error, NULL, 0, 0, 0);
3571541Srgrimes	return (error);
3581541Srgrimes}
3591541Srgrimes
360162288Smohansstatic int
361162288Smohanscompute_cn_lkflags(struct mount *mp, int lkflags)
362162288Smohans{
363184597Sjhb
364162310Smohans	if (mp == NULL ||
365162310Smohans	    ((lkflags & LK_SHARED) && !(mp->mnt_kern_flag & MNTK_LOOKUP_SHARED))) {
366162288Smohans		lkflags &= ~LK_SHARED;
367162288Smohans		lkflags |= LK_EXCLUSIVE;
368162288Smohans	}
369184597Sjhb	return (lkflags);
370162288Smohans}
371162288Smohans
372189696Sjhbstatic __inline int
373189696Sjhbneeds_exclusive_leaf(struct mount *mp, int flags)
374189696Sjhb{
375189696Sjhb
376189696Sjhb	/*
377189696Sjhb	 * Intermediate nodes can use shared locks, we only need to
378189696Sjhb	 * force an exclusive lock for leaf nodes.
379189696Sjhb	 */
380189696Sjhb	if ((flags & (ISLASTCN | LOCKLEAF)) != (ISLASTCN | LOCKLEAF))
381189696Sjhb		return (0);
382189696Sjhb
383189696Sjhb	/* Always use exclusive locks if LOCKSHARED isn't set. */
384189696Sjhb	if (!(flags & LOCKSHARED))
385189696Sjhb		return (1);
386189696Sjhb
387189696Sjhb	/*
388189696Sjhb	 * For lookups during open(), if the mount point supports
389189696Sjhb	 * extended shared operations, then use a shared lock for the
390189696Sjhb	 * leaf node, otherwise use an exclusive lock.
391189696Sjhb	 */
392189696Sjhb	if (flags & ISOPEN) {
393189696Sjhb		if (mp != NULL &&
394189696Sjhb		    (mp->mnt_kern_flag & MNTK_EXTENDED_SHARED))
395189696Sjhb			return (0);
396189696Sjhb		else
397189696Sjhb			return (1);
398189696Sjhb	}
399189696Sjhb
400189696Sjhb	/*
401189696Sjhb	 * Lookup requests outside of open() that specify LOCKSHARED
402189696Sjhb	 * only need a shared lock on the leaf vnode.
403189696Sjhb	 */
404189697Sjhb	return (0);
405189696Sjhb}
406189696Sjhb
4071541Srgrimes/*
4081541Srgrimes * Search a pathname.
4091541Srgrimes * This is a very central and rather complicated routine.
4101541Srgrimes *
4111541Srgrimes * The pathname is pointed to by ni_ptr and is of length ni_pathlen.
4121541Srgrimes * The starting directory is taken from ni_startdir. The pathname is
4131541Srgrimes * descended until done, or a symbolic link is encountered. The variable
4141541Srgrimes * ni_more is clear if the path is completed; it is set to one if a
4151541Srgrimes * symbolic link needing interpretation is encountered.
4161541Srgrimes *
4171541Srgrimes * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on
4181541Srgrimes * whether the name is to be looked up, created, renamed, or deleted.
4191541Srgrimes * When CREATE, RENAME, or DELETE is specified, information usable in
4201541Srgrimes * creating, renaming, or deleting a directory entry may be calculated.
4211541Srgrimes * If flag has LOCKPARENT or'ed into it, the parent directory is returned
4221541Srgrimes * locked. If flag has WANTPARENT or'ed into it, the parent directory is
4231541Srgrimes * returned unlocked. Otherwise the parent directory is not returned. If
4241541Srgrimes * the target of the pathname exists and LOCKLEAF is or'ed into the flag
4251541Srgrimes * the target is returned locked, otherwise it is returned unlocked.
4261541Srgrimes * When creating or renaming and LOCKPARENT is specified, the target may not
4271541Srgrimes * be ".".  When deleting and LOCKPARENT is specified, the target may be ".".
4288876Srgrimes *
4291541Srgrimes * Overall outline of lookup:
4301541Srgrimes *
4311541Srgrimes * dirloop:
4321541Srgrimes *	identify next component of name at ndp->ni_ptr
4331541Srgrimes *	handle degenerate case where name is null string
4341541Srgrimes *	if .. and crossing mount points and on mounted filesys, find parent
4351541Srgrimes *	call VOP_LOOKUP routine for next component name
4361541Srgrimes *	    directory vnode returned in ni_dvp, unlocked unless LOCKPARENT set
4371541Srgrimes *	    component vnode returned in ni_vp (if it exists), locked.
4381541Srgrimes *	if result vnode is mounted on and crossing mount points,
4391541Srgrimes *	    find mounted on vnode
4401541Srgrimes *	if more components of name, do next level at dirloop
4411541Srgrimes *	return the answer in ni_vp, locked if LOCKLEAF set
4421541Srgrimes *	    if LOCKPARENT set, return locked parent in ni_dvp
4431541Srgrimes *	    if WANTPARENT set, return unlocked parent in ni_dvp
4441541Srgrimes */
4451541Srgrimesint
446161011Srwatsonlookup(struct nameidata *ndp)
4471541Srgrimes{
448161011Srwatson	char *cp;		/* pointer into pathname argument */
449161011Srwatson	struct vnode *dp = 0;	/* the directory we are searching */
4501541Srgrimes	struct vnode *tdp;		/* saved dp */
4511541Srgrimes	struct mount *mp;		/* mount table entry */
452192895Sjamie	struct prison *pr;
4531541Srgrimes	int docache;			/* == 0 do not cache last component */
4541541Srgrimes	int wantparent;			/* 1 => wantparent or lockparent flag */
4551541Srgrimes	int rdonly;			/* lookup read-only flag bit */
4561541Srgrimes	int error = 0;
45765805Sbp	int dpunlocked = 0;		/* dp has already been unlocked */
4581541Srgrimes	struct componentname *cnp = &ndp->ni_cnd;
459158094Sjeff	int vfslocked;			/* VFS Giant state for child */
460158094Sjeff	int dvfslocked;			/* VFS Giant state for parent */
461140714Sjeff	int tvfslocked;
462162288Smohans	int lkflags_save;
463191991Sattilio#ifdef AUDIT
464191991Sattilio	struct thread *td = curthread;
465191991Sattilio#endif
466162288Smohans
4671541Srgrimes	/*
4681541Srgrimes	 * Setup: break out flag bits into variables.
4691541Srgrimes	 */
470158094Sjeff	dvfslocked = (ndp->ni_cnd.cn_flags & GIANTHELD) != 0;
471158094Sjeff	vfslocked = 0;
472140714Sjeff	ndp->ni_cnd.cn_flags &= ~GIANTHELD;
4731541Srgrimes	wantparent = cnp->cn_flags & (LOCKPARENT | WANTPARENT);
474144229Sjeff	KASSERT(cnp->cn_nameiop == LOOKUP || wantparent,
475144229Sjeff	    ("CREATE, DELETE, RENAME require LOCKPARENT or WANTPARENT."));
4761541Srgrimes	docache = (cnp->cn_flags & NOCACHE) ^ NOCACHE;
4771541Srgrimes	if (cnp->cn_nameiop == DELETE ||
47822874Sbde	    (wantparent && cnp->cn_nameiop != CREATE &&
47922874Sbde	     cnp->cn_nameiop != LOOKUP))
4801541Srgrimes		docache = 0;
4811541Srgrimes	rdonly = cnp->cn_flags & RDONLY;
482144286Sjeff	cnp->cn_flags &= ~ISSYMLINK;
4831541Srgrimes	ndp->ni_dvp = NULL;
484144286Sjeff	/*
485144286Sjeff	 * We use shared locks until we hit the parent of the last cn then
486144286Sjeff	 * we adjust based on the requesting flags.
487144286Sjeff	 */
488144613Sjeff	if (lookup_shared)
489144613Sjeff		cnp->cn_lkflags = LK_SHARED;
490144613Sjeff	else
491144613Sjeff		cnp->cn_lkflags = LK_EXCLUSIVE;
4921541Srgrimes	dp = ndp->ni_startdir;
4931541Srgrimes	ndp->ni_startdir = NULLVP;
494175202Sattilio	vn_lock(dp,
495175202Sattilio	    compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | LK_RETRY));
4961541Srgrimes
4971541Srgrimesdirloop:
4981541Srgrimes	/*
4991541Srgrimes	 * Search a new directory.
5001541Srgrimes	 *
5011541Srgrimes	 * The last component of the filename is left accessible via
5021541Srgrimes	 * cnp->cn_nameptr for callers that need the name. Callers needing
5031541Srgrimes	 * the name set the SAVENAME flag. When done, they assume
5041541Srgrimes	 * responsibility for freeing the pathname buffer.
5051541Srgrimes	 */
5061541Srgrimes	cnp->cn_consume = 0;
5071541Srgrimes	for (cp = cnp->cn_nameptr; *cp != 0 && *cp != '/'; cp++)
50851906Sphk		continue;
5091541Srgrimes	cnp->cn_namelen = cp - cnp->cn_nameptr;
5101541Srgrimes	if (cnp->cn_namelen > NAME_MAX) {
5111541Srgrimes		error = ENAMETOOLONG;
5121541Srgrimes		goto bad;
5131541Srgrimes	}
5141541Srgrimes#ifdef NAMEI_DIAGNOSTIC
5151541Srgrimes	{ char c = *cp;
5161541Srgrimes	*cp = '\0';
5171541Srgrimes	printf("{%s}: ", cnp->cn_nameptr);
5181541Srgrimes	*cp = c; }
5191541Srgrimes#endif
5201541Srgrimes	ndp->ni_pathlen -= cnp->cn_namelen;
5211541Srgrimes	ndp->ni_next = cp;
5229804Sbde
5239804Sbde	/*
5249804Sbde	 * Replace multiple slashes by a single slash and trailing slashes
5259804Sbde	 * by a null.  This must be done before VOP_LOOKUP() because some
5269804Sbde	 * fs's don't know about trailing slashes.  Remember if there were
5279804Sbde	 * trailing slashes to handle symlinks, existing non-directories
5289804Sbde	 * and non-existing files that won't be directories specially later.
5299804Sbde	 */
5309804Sbde	while (*cp == '/' && (cp[1] == '/' || cp[1] == '\0')) {
5319804Sbde		cp++;
5329804Sbde		ndp->ni_pathlen--;
5339804Sbde		if (*cp == '\0') {
534193557Sdes			*ndp->ni_next = '\0';
535193028Sdes			cnp->cn_flags |= TRAILINGSLASH;
5369804Sbde		}
5379804Sbde	}
5389804Sbde	ndp->ni_next = cp;
5399804Sbde
5401541Srgrimes	cnp->cn_flags |= MAKEENTRY;
5411541Srgrimes	if (*cp == '\0' && docache == 0)
5421541Srgrimes		cnp->cn_flags &= ~MAKEENTRY;
5431541Srgrimes	if (cnp->cn_namelen == 2 &&
5441541Srgrimes	    cnp->cn_nameptr[1] == '.' && cnp->cn_nameptr[0] == '.')
5451541Srgrimes		cnp->cn_flags |= ISDOTDOT;
5461541Srgrimes	else
5471541Srgrimes		cnp->cn_flags &= ~ISDOTDOT;
5481541Srgrimes	if (*ndp->ni_next == 0)
5491541Srgrimes		cnp->cn_flags |= ISLASTCN;
5501541Srgrimes	else
5511541Srgrimes		cnp->cn_flags &= ~ISLASTCN;
5521541Srgrimes
5531541Srgrimes
5541541Srgrimes	/*
5551541Srgrimes	 * Check for degenerate name (e.g. / or "")
5561541Srgrimes	 * which is a way of talking about a directory,
5571541Srgrimes	 * e.g. like "/." or ".".
5581541Srgrimes	 */
5591541Srgrimes	if (cnp->cn_nameptr[0] == '\0') {
56022521Sdyson		if (dp->v_type != VDIR) {
56122521Sdyson			error = ENOTDIR;
56222521Sdyson			goto bad;
56322521Sdyson		}
5641541Srgrimes		if (cnp->cn_nameiop != LOOKUP) {
5651541Srgrimes			error = EISDIR;
5661541Srgrimes			goto bad;
5671541Srgrimes		}
5681541Srgrimes		if (wantparent) {
5691541Srgrimes			ndp->ni_dvp = dp;
5701541Srgrimes			VREF(dp);
5711541Srgrimes		}
5721541Srgrimes		ndp->ni_vp = dp;
573155334Srwatson
574155334Srwatson		if (cnp->cn_flags & AUDITVNODE1)
575155334Srwatson			AUDIT_ARG(vnode, dp, ARG_VNODE1);
576155334Srwatson		else if (cnp->cn_flags & AUDITVNODE2)
577155334Srwatson			AUDIT_ARG(vnode, dp, ARG_VNODE2);
578155334Srwatson
5791541Srgrimes		if (!(cnp->cn_flags & (LOCKPARENT | LOCKLEAF)))
580175294Sattilio			VOP_UNLOCK(dp, 0);
58154655Seivind		/* XXX This should probably move to the top of function. */
5821541Srgrimes		if (cnp->cn_flags & SAVESTART)
5831541Srgrimes			panic("lookup: SAVESTART");
584140714Sjeff		goto success;
5851541Srgrimes	}
5861541Srgrimes
5871541Srgrimes	/*
588154649Struckman	 * Handle "..": four special cases.
589154649Struckman	 * 1. Return an error if this is the last component of
590154649Struckman	 *    the name and the operation is DELETE or RENAME.
591154649Struckman	 * 2. If at root directory (e.g. after chroot)
5921541Srgrimes	 *    or at absolute root directory
5931541Srgrimes	 *    then ignore it so can't get out.
594154649Struckman	 * 3. If this vnode is the root of a mounted
5951541Srgrimes	 *    filesystem, then replace it with the
5961541Srgrimes	 *    vnode which was mounted on so we take the
59796755Strhodes	 *    .. in the other filesystem.
598154649Struckman	 * 4. If the vnode is the top directory of
59951649Sphk	 *    the jail or chroot, don't let them out.
6001541Srgrimes	 */
6011541Srgrimes	if (cnp->cn_flags & ISDOTDOT) {
602154649Struckman		if ((cnp->cn_flags & ISLASTCN) != 0 &&
603154649Struckman		    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
604154690Struckman			error = EINVAL;
605154649Struckman			goto bad;
606154649Struckman		}
6071541Srgrimes		for (;;) {
608192895Sjamie			for (pr = cnp->cn_cred->cr_prison; pr != NULL;
609192895Sjamie			     pr = pr->pr_parent)
610192895Sjamie				if (dp == pr->pr_root)
611192895Sjamie					break;
61251649Sphk			if (dp == ndp->ni_rootdir ||
61351649Sphk			    dp == ndp->ni_topdir ||
614166744Skib			    dp == rootvnode ||
615192895Sjamie			    pr != NULL ||
616166744Skib			    ((dp->v_vflag & VV_ROOT) != 0 &&
617166744Skib			     (cnp->cn_flags & NOCROSSMOUNT) != 0)) {
6181541Srgrimes				ndp->ni_dvp = dp;
6191541Srgrimes				ndp->ni_vp = dp;
620158142Skris				vfslocked = VFS_LOCK_GIANT(dp->v_mount);
6211541Srgrimes				VREF(dp);
6221541Srgrimes				goto nextname;
6231541Srgrimes			}
624166744Skib			if ((dp->v_vflag & VV_ROOT) == 0)
6251541Srgrimes				break;
626155385Sjeff			if (dp->v_iflag & VI_DOOMED) {	/* forced unmount */
627190387Sjhb				error = ENOENT;
62869405Salfred				goto bad;
62969405Salfred			}
6301541Srgrimes			tdp = dp;
631144833Sjeff			dp = dp->v_mount->mnt_vnodecovered;
632158094Sjeff			tvfslocked = dvfslocked;
633158094Sjeff			dvfslocked = VFS_LOCK_GIANT(dp->v_mount);
634144833Sjeff			VREF(dp);
6351541Srgrimes			vput(tdp);
636140714Sjeff			VFS_UNLOCK_GIANT(tvfslocked);
637175202Sattilio			vn_lock(dp,
638175202Sattilio			    compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags |
639175202Sattilio			    LK_RETRY));
6401541Srgrimes		}
6411541Srgrimes	}
6421541Srgrimes
6431541Srgrimes	/*
6441541Srgrimes	 * We now have a segment name to search for, and a directory to search.
6451541Srgrimes	 */
6461541Srgrimesunionlookup:
647101127Srwatson#ifdef MAC
648105479Srwatson	if ((cnp->cn_flags & NOMACCHECK) == 0) {
649191990Sattilio		error = mac_vnode_check_lookup(cnp->cn_thread->td_ucred, dp,
650191990Sattilio		    cnp);
651105479Srwatson		if (error)
652105479Srwatson			goto bad;
653105479Srwatson	}
654101127Srwatson#endif
6551541Srgrimes	ndp->ni_dvp = dp;
65622521Sdyson	ndp->ni_vp = NULL;
65724624Sdfr	ASSERT_VOP_LOCKED(dp, "lookup");
658158094Sjeff	VNASSERT(vfslocked == 0, dp, ("lookup: vfslocked %d", vfslocked));
659144286Sjeff	/*
660144286Sjeff	 * If we have a shared lock we may need to upgrade the lock for the
661144286Sjeff	 * last operation.
662144286Sjeff	 */
663166167Skib	if (dp != vp_crossmp &&
664176559Sattilio	    VOP_ISLOCKED(dp) == LK_SHARED &&
665144286Sjeff	    (cnp->cn_flags & ISLASTCN) && (cnp->cn_flags & LOCKPARENT))
666175202Sattilio		vn_lock(dp, LK_UPGRADE|LK_RETRY);
667144286Sjeff	/*
668144286Sjeff	 * If we're looking up the last component and we need an exclusive
669144286Sjeff	 * lock, adjust our lkflags.
670144286Sjeff	 */
671189696Sjhb	if (needs_exclusive_leaf(dp->v_mount, cnp->cn_flags))
672144286Sjeff		cnp->cn_lkflags = LK_EXCLUSIVE;
673138345Sphk#ifdef NAMEI_DIAGNOSTIC
674138345Sphk	vprint("lookup in", dp);
675138345Sphk#endif
676162288Smohans	lkflags_save = cnp->cn_lkflags;
677162288Smohans	cnp->cn_lkflags = compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags);
67843301Sdillon	if ((error = VOP_LOOKUP(dp, &ndp->ni_vp, cnp)) != 0) {
679162288Smohans		cnp->cn_lkflags = lkflags_save;
68042408Seivind		KASSERT(ndp->ni_vp == NULL, ("leaf should be empty"));
6811541Srgrimes#ifdef NAMEI_DIAGNOSTIC
6821541Srgrimes		printf("not found\n");
6831541Srgrimes#endif
6841541Srgrimes		if ((error == ENOENT) &&
685101308Sjeff		    (dp->v_vflag & VV_ROOT) && (dp->v_mount != NULL) &&
6861541Srgrimes		    (dp->v_mount->mnt_flag & MNT_UNION)) {
6871541Srgrimes			tdp = dp;
688144833Sjeff			dp = dp->v_mount->mnt_vnodecovered;
689158094Sjeff			tvfslocked = dvfslocked;
690158094Sjeff			dvfslocked = VFS_LOCK_GIANT(dp->v_mount);
691144833Sjeff			VREF(dp);
692144203Sjeff			vput(tdp);
693140714Sjeff			VFS_UNLOCK_GIANT(tvfslocked);
694175202Sattilio			vn_lock(dp,
695175202Sattilio			    compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags |
696175202Sattilio			    LK_RETRY));
6971541Srgrimes			goto unionlookup;
6981541Srgrimes		}
6991541Srgrimes
7001541Srgrimes		if (error != EJUSTRETURN)
7011541Srgrimes			goto bad;
7021541Srgrimes		/*
703193557Sdes		 * At this point, we know we're at the end of the
704193557Sdes		 * pathname.  If creating / renaming, we can consider
705193557Sdes		 * allowing the file or directory to be created / renamed,
706193557Sdes		 * provided we're not on a read-only filesystem.
7071541Srgrimes		 */
70811644Sdg		if (rdonly) {
7091541Srgrimes			error = EROFS;
7101541Srgrimes			goto bad;
7111541Srgrimes		}
712193557Sdes		/* trailing slash only allowed for directories */
713193557Sdes		if ((cnp->cn_flags & TRAILINGSLASH) &&
714193557Sdes		    !(cnp->cn_flags & WILLBEDIR)) {
7159804Sbde			error = ENOENT;
7169804Sbde			goto bad;
7179804Sbde		}
718144203Sjeff		if ((cnp->cn_flags & LOCKPARENT) == 0)
719175294Sattilio			VOP_UNLOCK(dp, 0);
7201541Srgrimes		/*
7211541Srgrimes		 * We return with ni_vp NULL to indicate that the entry
7221541Srgrimes		 * doesn't currently exist, leaving a pointer to the
723161010Srwatson		 * (possibly locked) directory vnode in ndp->ni_dvp.
7241541Srgrimes		 */
7251541Srgrimes		if (cnp->cn_flags & SAVESTART) {
7261541Srgrimes			ndp->ni_startdir = ndp->ni_dvp;
7271541Srgrimes			VREF(ndp->ni_startdir);
7281541Srgrimes		}
729140714Sjeff		goto success;
730162288Smohans	} else
731162288Smohans		cnp->cn_lkflags = lkflags_save;
7321541Srgrimes#ifdef NAMEI_DIAGNOSTIC
7331541Srgrimes	printf("found\n");
7341541Srgrimes#endif
735144203Sjeff	/*
7361541Srgrimes	 * Take into account any additional components consumed by
7371541Srgrimes	 * the underlying filesystem.
7381541Srgrimes	 */
7391541Srgrimes	if (cnp->cn_consume > 0) {
7401541Srgrimes		cnp->cn_nameptr += cnp->cn_consume;
7411541Srgrimes		ndp->ni_next += cnp->cn_consume;
7421541Srgrimes		ndp->ni_pathlen -= cnp->cn_consume;
7431541Srgrimes		cnp->cn_consume = 0;
7441541Srgrimes	}
7451541Srgrimes
7461541Srgrimes	dp = ndp->ni_vp;
747158094Sjeff	vfslocked = VFS_LOCK_GIANT(dp->v_mount);
7481541Srgrimes
7491541Srgrimes	/*
7501541Srgrimes	 * Check to see if the vnode has been mounted on;
75196755Strhodes	 * if so find the root of the mounted filesystem.
7521541Srgrimes	 */
7531541Srgrimes	while (dp->v_type == VDIR && (mp = dp->v_mountedhere) &&
7541541Srgrimes	       (cnp->cn_flags & NOCROSSMOUNT) == 0) {
755184554Sattilio		if (vfs_busy(mp, 0))
7561541Srgrimes			continue;
757144833Sjeff		vput(dp);
758158094Sjeff		VFS_UNLOCK_GIANT(vfslocked);
759155168Sjeff		vfslocked = VFS_LOCK_GIANT(mp);
760158094Sjeff		if (dp != ndp->ni_dvp)
761166167Skib			vput(ndp->ni_dvp);
762166167Skib		else
763166167Skib			vrele(ndp->ni_dvp);
764166167Skib		VFS_UNLOCK_GIANT(dvfslocked);
765166167Skib		dvfslocked = 0;
766166167Skib		vref(vp_crossmp);
767166167Skib		ndp->ni_dvp = vp_crossmp;
768191990Sattilio		error = VFS_ROOT(mp, compute_cn_lkflags(mp, cnp->cn_lkflags),
769191990Sattilio		    &tdp);
770182542Sattilio		vfs_unbusy(mp);
771175202Sattilio		if (vn_lock(vp_crossmp, LK_SHARED | LK_NOWAIT))
772166167Skib			panic("vp_crossmp exclusively locked or reclaimed");
77365805Sbp		if (error) {
77465805Sbp			dpunlocked = 1;
7751541Srgrimes			goto bad2;
77665805Sbp		}
7771541Srgrimes		ndp->ni_vp = dp = tdp;
7781541Srgrimes	}
7791541Srgrimes
78010219Sdfr	/*
78110219Sdfr	 * Check for symbolic link
78210219Sdfr	 */
78310219Sdfr	if ((dp->v_type == VLNK) &&
784193557Sdes	    ((cnp->cn_flags & FOLLOW) || (cnp->cn_flags & TRAILINGSLASH) ||
78510219Sdfr	     *ndp->ni_next == '/')) {
78610219Sdfr		cnp->cn_flags |= ISSYMLINK;
787155385Sjeff		if (dp->v_iflag & VI_DOOMED) {
788190387Sjhb			/*
789190387Sjhb			 * We can't know whether the directory was mounted with
790190387Sjhb			 * NOSYMFOLLOW, so we can't follow safely.
791190387Sjhb			 */
792190387Sjhb			error = ENOENT;
79369405Salfred			goto bad2;
79469405Salfred		}
79535105Swosch		if (dp->v_mount->mnt_flag & MNT_NOSYMFOLLOW) {
79635105Swosch			error = EACCES;
79735105Swosch			goto bad2;
79835105Swosch		}
799144833Sjeff		/*
800144833Sjeff		 * Symlink code always expects an unlocked dvp.
801144833Sjeff		 */
802144833Sjeff		if (ndp->ni_dvp != ndp->ni_vp)
803175294Sattilio			VOP_UNLOCK(ndp->ni_dvp, 0);
804140714Sjeff		goto success;
80510219Sdfr	}
80610219Sdfr
8071541Srgrimesnextname:
8081541Srgrimes	/*
809193557Sdes	 * Not a symbolic link that we will follow.  Continue with the
810193557Sdes	 * next component if there is any; otherwise, we're done.
8111541Srgrimes	 */
812144203Sjeff	KASSERT((cnp->cn_flags & ISLASTCN) || *ndp->ni_next == '/',
813144203Sjeff	    ("lookup: invalid path state."));
8141541Srgrimes	if (*ndp->ni_next == '/') {
8151541Srgrimes		cnp->cn_nameptr = ndp->ni_next;
8161541Srgrimes		while (*cnp->cn_nameptr == '/') {
8171541Srgrimes			cnp->cn_nameptr++;
8181541Srgrimes			ndp->ni_pathlen--;
8191541Srgrimes		}
820144833Sjeff		if (ndp->ni_dvp != dp)
821144833Sjeff			vput(ndp->ni_dvp);
822144833Sjeff		else
823144833Sjeff			vrele(ndp->ni_dvp);
824155168Sjeff		VFS_UNLOCK_GIANT(dvfslocked);
825158094Sjeff		dvfslocked = vfslocked;	/* dp becomes dvp in dirloop */
826158094Sjeff		vfslocked = 0;
8271541Srgrimes		goto dirloop;
8281541Srgrimes	}
8291541Srgrimes	/*
830193028Sdes	 * If we're processing a path with a trailing slash,
831193028Sdes	 * check that the end result is a directory.
832193028Sdes	 */
833193028Sdes	if ((cnp->cn_flags & TRAILINGSLASH) && dp->v_type != VDIR) {
834193028Sdes		error = ENOTDIR;
835193028Sdes		goto bad2;
836193028Sdes	}
837193028Sdes	/*
83896755Strhodes	 * Disallow directory write attempts on read-only filesystems.
8391541Srgrimes	 */
84011644Sdg	if (rdonly &&
84111644Sdg	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
84211644Sdg		error = EROFS;
84311644Sdg		goto bad2;
8441541Srgrimes	}
8451541Srgrimes	if (cnp->cn_flags & SAVESTART) {
8461541Srgrimes		ndp->ni_startdir = ndp->ni_dvp;
8471541Srgrimes		VREF(ndp->ni_startdir);
8481541Srgrimes	}
849144833Sjeff	if (!wantparent) {
850144833Sjeff		if (ndp->ni_dvp != dp)
851144833Sjeff			vput(ndp->ni_dvp);
852144833Sjeff		else
853144833Sjeff			vrele(ndp->ni_dvp);
854155168Sjeff		VFS_UNLOCK_GIANT(dvfslocked);
855155168Sjeff		dvfslocked = 0;
856144833Sjeff	} else if ((cnp->cn_flags & LOCKPARENT) == 0 && ndp->ni_dvp != dp)
857175294Sattilio		VOP_UNLOCK(ndp->ni_dvp, 0);
85832071Sdyson
859155334Srwatson	if (cnp->cn_flags & AUDITVNODE1)
860155334Srwatson		AUDIT_ARG(vnode, dp, ARG_VNODE1);
861155334Srwatson	else if (cnp->cn_flags & AUDITVNODE2)
862155334Srwatson		AUDIT_ARG(vnode, dp, ARG_VNODE2);
863155334Srwatson
8641541Srgrimes	if ((cnp->cn_flags & LOCKLEAF) == 0)
865175294Sattilio		VOP_UNLOCK(dp, 0);
866140714Sjeffsuccess:
867172274Spjd	/*
868172274Spjd	 * Because of lookup_shared we may have the vnode shared locked, but
869172274Spjd	 * the caller may want it to be exclusively locked.
870172274Spjd	 */
871189696Sjhb	if (needs_exclusive_leaf(dp->v_mount, cnp->cn_flags) &&
872189696Sjhb	    VOP_ISLOCKED(dp) != LK_EXCLUSIVE) {
873175202Sattilio		vn_lock(dp, LK_UPGRADE | LK_RETRY);
874186276Skib		if (dp->v_iflag & VI_DOOMED) {
875186276Skib			error = ENOENT;
876186276Skib			goto bad2;
877186276Skib		}
878172274Spjd	}
879155168Sjeff	if (vfslocked && dvfslocked)
880155168Sjeff		VFS_UNLOCK_GIANT(dvfslocked);	/* Only need one */
881155168Sjeff	if (vfslocked || dvfslocked)
882140714Sjeff		ndp->ni_cnd.cn_flags |= GIANTHELD;
8831541Srgrimes	return (0);
8841541Srgrimes
8851541Srgrimesbad2:
886144833Sjeff	if (dp != ndp->ni_dvp)
887144203Sjeff		vput(ndp->ni_dvp);
888144203Sjeff	else
889144203Sjeff		vrele(ndp->ni_dvp);
8901541Srgrimesbad:
891144833Sjeff	if (!dpunlocked)
89265805Sbp		vput(dp);
893140714Sjeff	VFS_UNLOCK_GIANT(vfslocked);
894155168Sjeff	VFS_UNLOCK_GIANT(dvfslocked);
895140714Sjeff	ndp->ni_cnd.cn_flags &= ~GIANTHELD;
8961541Srgrimes	ndp->ni_vp = NULL;
8971541Srgrimes	return (error);
8981541Srgrimes}
8991541Srgrimes
9003148Sphk/*
9013148Sphk * relookup - lookup a path name component
902170035Srwatson *    Used by lookup to re-acquire things.
9033148Sphk */
9043148Sphkint
905161011Srwatsonrelookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp)
9063148Sphk{
90722521Sdyson	struct vnode *dp = 0;		/* the directory we are searching */
9083148Sphk	int wantparent;			/* 1 => wantparent or lockparent flag */
9093148Sphk	int rdonly;			/* lookup read-only flag bit */
9103148Sphk	int error = 0;
9111541Srgrimes
912144203Sjeff	KASSERT(cnp->cn_flags & ISLASTCN,
913144203Sjeff	    ("relookup: Not given last component."));
9143148Sphk	/*
9153148Sphk	 * Setup: break out flag bits into variables.
9163148Sphk	 */
9173148Sphk	wantparent = cnp->cn_flags & (LOCKPARENT|WANTPARENT);
918145004Sjeff	KASSERT(wantparent, ("relookup: parent not wanted."));
9193148Sphk	rdonly = cnp->cn_flags & RDONLY;
9203148Sphk	cnp->cn_flags &= ~ISSYMLINK;
9213148Sphk	dp = dvp;
922144286Sjeff	cnp->cn_lkflags = LK_EXCLUSIVE;
923175202Sattilio	vn_lock(dp, LK_EXCLUSIVE | LK_RETRY);
9243148Sphk
9253148Sphk	/*
9263148Sphk	 * Search a new directory.
9273148Sphk	 *
9283148Sphk	 * The last component of the filename is left accessible via
9293148Sphk	 * cnp->cn_nameptr for callers that need the name. Callers needing
9303148Sphk	 * the name set the SAVENAME flag. When done, they assume
9313148Sphk	 * responsibility for freeing the pathname buffer.
9323148Sphk	 */
9333148Sphk#ifdef NAMEI_DIAGNOSTIC
9343148Sphk	printf("{%s}: ", cnp->cn_nameptr);
9353148Sphk#endif
9363148Sphk
9373148Sphk	/*
9383148Sphk	 * Check for degenerate name (e.g. / or "")
9393148Sphk	 * which is a way of talking about a directory,
9403148Sphk	 * e.g. like "/." or ".".
9413148Sphk	 */
9423148Sphk	if (cnp->cn_nameptr[0] == '\0') {
9433148Sphk		if (cnp->cn_nameiop != LOOKUP || wantparent) {
9443148Sphk			error = EISDIR;
9453148Sphk			goto bad;
9463148Sphk		}
9473148Sphk		if (dp->v_type != VDIR) {
9483148Sphk			error = ENOTDIR;
9493148Sphk			goto bad;
9503148Sphk		}
9513148Sphk		if (!(cnp->cn_flags & LOCKLEAF))
952175294Sattilio			VOP_UNLOCK(dp, 0);
9533148Sphk		*vpp = dp;
95454655Seivind		/* XXX This should probably move to the top of function. */
9553148Sphk		if (cnp->cn_flags & SAVESTART)
9563148Sphk			panic("lookup: SAVESTART");
9573148Sphk		return (0);
9583148Sphk	}
9593148Sphk
9603148Sphk	if (cnp->cn_flags & ISDOTDOT)
9613148Sphk		panic ("relookup: lookup on dot-dot");
9623148Sphk
9633148Sphk	/*
9643148Sphk	 * We now have a segment name to search for, and a directory to search.
9653148Sphk	 */
966138345Sphk#ifdef NAMEI_DIAGNOSTIC
967138345Sphk	vprint("search in:", dp);
968138345Sphk#endif
96943311Sdillon	if ((error = VOP_LOOKUP(dp, vpp, cnp)) != 0) {
97042408Seivind		KASSERT(*vpp == NULL, ("leaf should be empty"));
9713148Sphk		if (error != EJUSTRETURN)
9723148Sphk			goto bad;
9733148Sphk		/*
9743148Sphk		 * If creating and at end of pathname, then can consider
9753148Sphk		 * allowing file to be created.
9763148Sphk		 */
97711644Sdg		if (rdonly) {
9783148Sphk			error = EROFS;
9793148Sphk			goto bad;
9803148Sphk		}
9813148Sphk		/* ASSERT(dvp == ndp->ni_startdir) */
9823148Sphk		if (cnp->cn_flags & SAVESTART)
9833148Sphk			VREF(dvp);
984144203Sjeff		if ((cnp->cn_flags & LOCKPARENT) == 0)
985175294Sattilio			VOP_UNLOCK(dp, 0);
9863148Sphk		/*
9873148Sphk		 * We return with ni_vp NULL to indicate that the entry
9883148Sphk		 * doesn't currently exist, leaving a pointer to the
989161010Srwatson		 * (possibly locked) directory vnode in ndp->ni_dvp.
9903148Sphk		 */
9913148Sphk		return (0);
9923148Sphk	}
993162288Smohans
9943148Sphk	dp = *vpp;
9953148Sphk
9963148Sphk	/*
99796755Strhodes	 * Disallow directory write attempts on read-only filesystems.
9983148Sphk	 */
99911644Sdg	if (rdonly &&
100011644Sdg	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
1001145004Sjeff		if (dvp == dp)
1002145004Sjeff			vrele(dvp);
1003145004Sjeff		else
1004145004Sjeff			vput(dvp);
100511644Sdg		error = EROFS;
1006145004Sjeff		goto bad;
10073148Sphk	}
1008145004Sjeff	/*
1009145004Sjeff	 * Set the parent lock/ref state to the requested state.
1010145004Sjeff	 */
1011145004Sjeff	if ((cnp->cn_flags & LOCKPARENT) == 0 && dvp != dp) {
1012145004Sjeff		if (wantparent)
1013175294Sattilio			VOP_UNLOCK(dvp, 0);
1014145004Sjeff		else
1015145004Sjeff			vput(dvp);
1016145004Sjeff	} else if (!wantparent)
1017145004Sjeff		vrele(dvp);
1018145004Sjeff	/*
1019145004Sjeff	 * Check for symbolic link
1020145004Sjeff	 */
1021145004Sjeff	KASSERT(dp->v_type != VLNK || !(cnp->cn_flags & FOLLOW),
1022145004Sjeff	    ("relookup: symlink found.\n"));
1023145004Sjeff
10243148Sphk	/* ASSERT(dvp == ndp->ni_startdir) */
10253148Sphk	if (cnp->cn_flags & SAVESTART)
10263148Sphk		VREF(dvp);
102722521Sdyson
10283148Sphk	if ((cnp->cn_flags & LOCKLEAF) == 0)
1029175294Sattilio		VOP_UNLOCK(dp, 0);
10303148Sphk	return (0);
10313148Sphkbad:
10323148Sphk	vput(dp);
10333148Sphk	*vpp = NULL;
10343148Sphk	return (error);
10353148Sphk}
1036141471Sjhb
1037141471Sjhb/*
1038144661Sjeff * Free data allocated by namei(); see namei(9) for details.
1039144661Sjeff */
1040144661Sjeffvoid
1041161011SrwatsonNDFREE(struct nameidata *ndp, const u_int flags)
1042144661Sjeff{
1043144833Sjeff	int unlock_dvp;
1044144833Sjeff	int unlock_vp;
1045144661Sjeff
1046144833Sjeff	unlock_dvp = 0;
1047144833Sjeff	unlock_vp = 0;
1048144833Sjeff
1049144661Sjeff	if (!(flags & NDF_NO_FREE_PNBUF) &&
1050144661Sjeff	    (ndp->ni_cnd.cn_flags & HASBUF)) {
1051144661Sjeff		uma_zfree(namei_zone, ndp->ni_cnd.cn_pnbuf);
1052144661Sjeff		ndp->ni_cnd.cn_flags &= ~HASBUF;
1053144661Sjeff	}
1054144833Sjeff	if (!(flags & NDF_NO_VP_UNLOCK) &&
1055144833Sjeff	    (ndp->ni_cnd.cn_flags & LOCKLEAF) && ndp->ni_vp)
1056144833Sjeff		unlock_vp = 1;
1057144833Sjeff	if (!(flags & NDF_NO_VP_RELE) && ndp->ni_vp) {
1058144833Sjeff		if (unlock_vp) {
1059144833Sjeff			vput(ndp->ni_vp);
1060144833Sjeff			unlock_vp = 0;
1061144833Sjeff		} else
1062144833Sjeff			vrele(ndp->ni_vp);
1063144833Sjeff		ndp->ni_vp = NULL;
1064144833Sjeff	}
1065144833Sjeff	if (unlock_vp)
1066175294Sattilio		VOP_UNLOCK(ndp->ni_vp, 0);
1067144661Sjeff	if (!(flags & NDF_NO_DVP_UNLOCK) &&
1068144661Sjeff	    (ndp->ni_cnd.cn_flags & LOCKPARENT) &&
1069144661Sjeff	    ndp->ni_dvp != ndp->ni_vp)
1070144833Sjeff		unlock_dvp = 1;
1071144661Sjeff	if (!(flags & NDF_NO_DVP_RELE) &&
1072144661Sjeff	    (ndp->ni_cnd.cn_flags & (LOCKPARENT|WANTPARENT))) {
1073144833Sjeff		if (unlock_dvp) {
1074144833Sjeff			vput(ndp->ni_dvp);
1075144833Sjeff			unlock_dvp = 0;
1076144833Sjeff		} else
1077144833Sjeff			vrele(ndp->ni_dvp);
1078144661Sjeff		ndp->ni_dvp = NULL;
1079144661Sjeff	}
1080144833Sjeff	if (unlock_dvp)
1081175294Sattilio		VOP_UNLOCK(ndp->ni_dvp, 0);
1082144661Sjeff	if (!(flags & NDF_NO_STARTDIR_RELE) &&
1083144661Sjeff	    (ndp->ni_cnd.cn_flags & SAVESTART)) {
1084144661Sjeff		vrele(ndp->ni_startdir);
1085144661Sjeff		ndp->ni_startdir = NULL;
1086144661Sjeff	}
1087144661Sjeff}
1088144661Sjeff
1089144661Sjeff/*
1090141471Sjhb * Determine if there is a suitable alternate filename under the specified
1091141471Sjhb * prefix for the specified path.  If the create flag is set, then the
1092141471Sjhb * alternate prefix will be used so long as the parent directory exists.
1093141471Sjhb * This is used by the various compatiblity ABIs so that Linux binaries prefer
1094141471Sjhb * files under /compat/linux for example.  The chosen path (whether under
1095141471Sjhb * the prefix or under /) is returned in a kernel malloc'd buffer pointed
1096141471Sjhb * to by pathbuf.  The caller is responsible for free'ing the buffer from
1097141471Sjhb * the M_TEMP bucket if one is returned.
1098141471Sjhb */
1099141471Sjhbint
1100177997Skibkern_alternate_path(struct thread *td, const char *prefix, const char *path,
1101177997Skib    enum uio_seg pathseg, char **pathbuf, int create, int dirfd)
1102141471Sjhb{
1103141471Sjhb	struct nameidata nd, ndroot;
1104141471Sjhb	char *ptr, *buf, *cp;
1105141471Sjhb	size_t len, sz;
1106141471Sjhb	int error;
1107141471Sjhb
1108141471Sjhb	buf = (char *) malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
1109141471Sjhb	*pathbuf = buf;
1110141471Sjhb
1111141471Sjhb	/* Copy the prefix into the new pathname as a starting point. */
1112141471Sjhb	len = strlcpy(buf, prefix, MAXPATHLEN);
1113141471Sjhb	if (len >= MAXPATHLEN) {
1114141471Sjhb		*pathbuf = NULL;
1115141471Sjhb		free(buf, M_TEMP);
1116141471Sjhb		return (EINVAL);
1117141471Sjhb	}
1118141471Sjhb	sz = MAXPATHLEN - len;
1119141471Sjhb	ptr = buf + len;
1120141471Sjhb
1121141471Sjhb	/* Append the filename to the prefix. */
1122141471Sjhb	if (pathseg == UIO_SYSSPACE)
1123141471Sjhb		error = copystr(path, ptr, sz, &len);
1124141471Sjhb	else
1125141471Sjhb		error = copyinstr(path, ptr, sz, &len);
1126141471Sjhb
1127141471Sjhb	if (error) {
1128141471Sjhb		*pathbuf = NULL;
1129141471Sjhb		free(buf, M_TEMP);
1130141471Sjhb		return (error);
1131141471Sjhb	}
1132141471Sjhb
1133141471Sjhb	/* Only use a prefix with absolute pathnames. */
1134141471Sjhb	if (*ptr != '/') {
1135141471Sjhb		error = EINVAL;
1136141471Sjhb		goto keeporig;
1137141471Sjhb	}
1138141471Sjhb
1139177997Skib	if (dirfd != AT_FDCWD) {
1140177997Skib		/*
1141177997Skib		 * We want the original because the "prefix" is
1142177997Skib		 * included in the already opened dirfd.
1143177997Skib		 */
1144177997Skib		bcopy(ptr, buf, len);
1145177997Skib		return (0);
1146177997Skib	}
1147177997Skib
1148141471Sjhb	/*
1149141471Sjhb	 * We know that there is a / somewhere in this pathname.
1150141471Sjhb	 * Search backwards for it, to find the file's parent dir
1151141471Sjhb	 * to see if it exists in the alternate tree. If it does,
1152141471Sjhb	 * and we want to create a file (cflag is set). We don't
1153141471Sjhb	 * need to worry about the root comparison in this case.
1154141471Sjhb	 */
1155141471Sjhb
1156141471Sjhb	if (create) {
1157141471Sjhb		for (cp = &ptr[len] - 1; *cp != '/'; cp--);
1158141471Sjhb		*cp = '\0';
1159141471Sjhb
1160150431Sjhb		NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_SYSSPACE, buf, td);
1161141471Sjhb		error = namei(&nd);
1162141471Sjhb		*cp = '/';
1163141471Sjhb		if (error != 0)
1164150431Sjhb			goto keeporig;
1165141471Sjhb	} else {
1166150431Sjhb		NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_SYSSPACE, buf, td);
1167141471Sjhb
1168141471Sjhb		error = namei(&nd);
1169141471Sjhb		if (error != 0)
1170150431Sjhb			goto keeporig;
1171141471Sjhb
1172141471Sjhb		/*
1173141471Sjhb		 * We now compare the vnode of the prefix to the one
1174141471Sjhb		 * vnode asked. If they resolve to be the same, then we
1175141471Sjhb		 * ignore the match so that the real root gets used.
1176141471Sjhb		 * This avoids the problem of traversing "../.." to find the
1177141471Sjhb		 * root directory and never finding it, because "/" resolves
1178141471Sjhb		 * to the emulation root directory. This is expensive :-(
1179141471Sjhb		 */
1180150431Sjhb		NDINIT(&ndroot, LOOKUP, FOLLOW | MPSAFE, UIO_SYSSPACE, prefix,
1181150431Sjhb		    td);
1182141471Sjhb
1183141471Sjhb		/* We shouldn't ever get an error from this namei(). */
1184141471Sjhb		error = namei(&ndroot);
1185141471Sjhb		if (error == 0) {
1186141471Sjhb			if (nd.ni_vp == ndroot.ni_vp)
1187141471Sjhb				error = ENOENT;
1188141471Sjhb
1189141471Sjhb			NDFREE(&ndroot, NDF_ONLY_PNBUF);
1190141471Sjhb			vrele(ndroot.ni_vp);
1191150431Sjhb			VFS_UNLOCK_GIANT(NDHASGIANT(&ndroot));
1192141471Sjhb		}
1193141471Sjhb	}
1194141471Sjhb
1195141471Sjhb	NDFREE(&nd, NDF_ONLY_PNBUF);
1196141471Sjhb	vrele(nd.ni_vp);
1197150431Sjhb	VFS_UNLOCK_GIANT(NDHASGIANT(&nd));
1198141471Sjhb
1199141471Sjhbkeeporig:
1200141471Sjhb	/* If there was an error, use the original path name. */
1201141471Sjhb	if (error)
1202141471Sjhb		bcopy(ptr, buf, len);
1203141471Sjhb	return (error);
1204141471Sjhb}
1205