vfs_lookup.c revision 273415
1/*-
2 * Copyright (c) 1982, 1986, 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 4. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 *	@(#)vfs_lookup.c	8.4 (Berkeley) 2/16/94
35 */
36
37#include <sys/cdefs.h>
38__FBSDID("$FreeBSD: releng/9.3/sys/kern/vfs_lookup.c 273415 2014-10-21 20:21:10Z delphij $");
39
40#include "opt_capsicum.h"
41#include "opt_kdtrace.h"
42#include "opt_ktrace.h"
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/kernel.h>
47#include <sys/capability.h>
48#include <sys/fcntl.h>
49#include <sys/jail.h>
50#include <sys/lock.h>
51#include <sys/mutex.h>
52#include <sys/namei.h>
53#include <sys/vnode.h>
54#include <sys/mount.h>
55#include <sys/filedesc.h>
56#include <sys/proc.h>
57#include <sys/sdt.h>
58#include <sys/syscallsubr.h>
59#include <sys/sysctl.h>
60#ifdef KTRACE
61#include <sys/ktrace.h>
62#endif
63
64#include <security/audit/audit.h>
65#include <security/mac/mac_framework.h>
66
67#include <vm/uma.h>
68
69#define	NAMEI_DIAGNOSTIC 1
70#undef NAMEI_DIAGNOSTIC
71
72SDT_PROVIDER_DECLARE(vfs);
73SDT_PROBE_DEFINE3(vfs, namei, lookup, entry, "struct vnode *", "char *",
74    "unsigned long");
75SDT_PROBE_DEFINE2(vfs, namei, lookup, return, "int", "struct vnode *");
76
77/*
78 * Allocation zone for namei
79 */
80uma_zone_t namei_zone;
81/*
82 * Placeholder vnode for mp traversal
83 */
84static struct vnode *vp_crossmp;
85
86static void
87nameiinit(void *dummy __unused)
88{
89
90	namei_zone = uma_zcreate("NAMEI", MAXPATHLEN, NULL, NULL, NULL, NULL,
91	    UMA_ALIGN_PTR, 0);
92	getnewvnode("crossmp", NULL, &dead_vnodeops, &vp_crossmp);
93	vn_lock(vp_crossmp, LK_EXCLUSIVE);
94	VN_LOCK_ASHARE(vp_crossmp);
95	VOP_UNLOCK(vp_crossmp, 0);
96}
97SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nameiinit, NULL);
98
99static int lookup_shared = 1;
100SYSCTL_INT(_vfs, OID_AUTO, lookup_shared, CTLFLAG_RW, &lookup_shared, 0,
101    "Enables/Disables shared locks for path name translation");
102TUNABLE_INT("vfs.lookup_shared", &lookup_shared);
103
104/*
105 * Convert a pathname into a pointer to a locked vnode.
106 *
107 * The FOLLOW flag is set when symbolic links are to be followed
108 * when they occur at the end of the name translation process.
109 * Symbolic links are always followed for all other pathname
110 * components other than the last.
111 *
112 * The segflg defines whether the name is to be copied from user
113 * space or kernel space.
114 *
115 * Overall outline of namei:
116 *
117 *	copy in name
118 *	get starting directory
119 *	while (!done && !error) {
120 *		call lookup to search path.
121 *		if symbolic link, massage name in buffer and continue
122 *	}
123 */
124static void
125namei_cleanup_cnp(struct componentname *cnp)
126{
127	uma_zfree(namei_zone, cnp->cn_pnbuf);
128#ifdef DIAGNOSTIC
129	cnp->cn_pnbuf = NULL;
130	cnp->cn_nameptr = NULL;
131#endif
132}
133
134int
135namei(struct nameidata *ndp)
136{
137	struct filedesc *fdp;	/* pointer to file descriptor state */
138	char *cp;		/* pointer into pathname argument */
139	struct vnode *dp;	/* the directory we are searching */
140	struct iovec aiov;		/* uio for reading symbolic links */
141	struct uio auio;
142	int error, linklen;
143	struct componentname *cnp = &ndp->ni_cnd;
144	struct thread *td = cnp->cn_thread;
145	struct proc *p = td->td_proc;
146	int vfslocked;
147
148	KASSERT((cnp->cn_flags & MPSAFE) != 0 || mtx_owned(&Giant) != 0,
149	    ("NOT MPSAFE and Giant not held"));
150	ndp->ni_cnd.cn_cred = ndp->ni_cnd.cn_thread->td_ucred;
151	KASSERT(cnp->cn_cred && p, ("namei: bad cred/proc"));
152	KASSERT((cnp->cn_nameiop & (~OPMASK)) == 0,
153	    ("namei: nameiop contaminated with flags"));
154	KASSERT((cnp->cn_flags & OPMASK) == 0,
155	    ("namei: flags contaminated with nameiops"));
156	if (!lookup_shared)
157		cnp->cn_flags &= ~LOCKSHARED;
158	fdp = p->p_fd;
159
160	/* We will set this ourselves if we need it. */
161	cnp->cn_flags &= ~TRAILINGSLASH;
162
163	/*
164	 * Get a buffer for the name to be translated, and copy the
165	 * name into the buffer.
166	 */
167	if ((cnp->cn_flags & HASBUF) == 0)
168		cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
169	if (ndp->ni_segflg == UIO_SYSSPACE)
170		error = copystr(ndp->ni_dirp, cnp->cn_pnbuf,
171			    MAXPATHLEN, (size_t *)&ndp->ni_pathlen);
172	else
173		error = copyinstr(ndp->ni_dirp, cnp->cn_pnbuf,
174			    MAXPATHLEN, (size_t *)&ndp->ni_pathlen);
175
176	/*
177	 * Don't allow empty pathnames.
178	 */
179	if (!error && *cnp->cn_pnbuf == '\0')
180		error = ENOENT;
181
182#ifdef CAPABILITY_MODE
183	/*
184	 * In capability mode, lookups must be "strictly relative" (i.e.
185	 * not an absolute path, and not containing '..' components) to
186	 * a real file descriptor, not the pseudo-descriptor AT_FDCWD.
187	 */
188	if (error == 0 && IN_CAPABILITY_MODE(td)) {
189		ndp->ni_strictrelative = 1;
190		if (ndp->ni_dirfd == AT_FDCWD)
191			error = ECAPMODE;
192	}
193#endif
194	if (error) {
195		namei_cleanup_cnp(cnp);
196		ndp->ni_vp = NULL;
197		return (error);
198	}
199	ndp->ni_loopcnt = 0;
200#ifdef KTRACE
201	if (KTRPOINT(td, KTR_NAMEI)) {
202		KASSERT(cnp->cn_thread == curthread,
203		    ("namei not using curthread"));
204		ktrnamei(cnp->cn_pnbuf);
205	}
206#endif
207	/*
208	 * Get starting point for the translation.
209	 */
210	FILEDESC_SLOCK(fdp);
211	ndp->ni_rootdir = fdp->fd_rdir;
212	ndp->ni_topdir = fdp->fd_jdir;
213
214	/*
215	 * If we are auditing the kernel pathname, save the user pathname.
216	 */
217	if (cnp->cn_flags & AUDITVNODE1)
218		AUDIT_ARG_UPATH1(td, ndp->ni_dirfd, cnp->cn_pnbuf);
219	if (cnp->cn_flags & AUDITVNODE2)
220		AUDIT_ARG_UPATH2(td, ndp->ni_dirfd, cnp->cn_pnbuf);
221
222	dp = NULL;
223	if (cnp->cn_pnbuf[0] != '/') {
224		if (ndp->ni_startdir != NULL) {
225			dp = ndp->ni_startdir;
226			error = 0;
227		} else if (ndp->ni_dirfd != AT_FDCWD) {
228			if (cnp->cn_flags & AUDITVNODE1)
229				AUDIT_ARG_ATFD1(ndp->ni_dirfd);
230			if (cnp->cn_flags & AUDITVNODE2)
231				AUDIT_ARG_ATFD2(ndp->ni_dirfd);
232			error = fgetvp_rights(td, ndp->ni_dirfd,
233			    ndp->ni_rightsneeded | CAP_LOOKUP,
234			    &(ndp->ni_baserights), &dp);
235#ifdef CAPABILITIES
236			/*
237			 * Lookups relative to a capability must also be
238			 * strictly relative.
239			 *
240			 * Note that a capability with rights CAP_MASK_VALID
241			 * is treated exactly like a regular file descriptor.
242			 */
243			if (ndp->ni_baserights != CAP_MASK_VALID)
244				ndp->ni_strictrelative = 1;
245#endif
246		}
247		if (error != 0 || dp != NULL) {
248			FILEDESC_SUNLOCK(fdp);
249			if (error == 0 && dp->v_type != VDIR) {
250				vfslocked = VFS_LOCK_GIANT(dp->v_mount);
251				vrele(dp);
252				VFS_UNLOCK_GIANT(vfslocked);
253				error = ENOTDIR;
254			}
255		}
256		if (error) {
257			namei_cleanup_cnp(cnp);
258			return (error);
259		}
260	}
261	if (dp == NULL) {
262		dp = fdp->fd_cdir;
263		VREF(dp);
264		FILEDESC_SUNLOCK(fdp);
265		if (ndp->ni_startdir != NULL) {
266			vfslocked = VFS_LOCK_GIANT(ndp->ni_startdir->v_mount);
267			vrele(ndp->ni_startdir);
268			VFS_UNLOCK_GIANT(vfslocked);
269		}
270	}
271	SDT_PROBE(vfs, namei, lookup, entry, dp, cnp->cn_pnbuf,
272	    cnp->cn_flags, 0, 0);
273	vfslocked = VFS_LOCK_GIANT(dp->v_mount);
274	for (;;) {
275		/*
276		 * Check if root directory should replace current directory.
277		 * Done at start of translation and after symbolic link.
278		 */
279		cnp->cn_nameptr = cnp->cn_pnbuf;
280		if (*(cnp->cn_nameptr) == '/') {
281			vrele(dp);
282			VFS_UNLOCK_GIANT(vfslocked);
283			if (ndp->ni_strictrelative != 0) {
284				namei_cleanup_cnp(cnp);
285				return (ENOTCAPABLE);
286			}
287			while (*(cnp->cn_nameptr) == '/') {
288				cnp->cn_nameptr++;
289				ndp->ni_pathlen--;
290			}
291			dp = ndp->ni_rootdir;
292			vfslocked = VFS_LOCK_GIANT(dp->v_mount);
293			VREF(dp);
294		}
295		if (vfslocked)
296			ndp->ni_cnd.cn_flags |= GIANTHELD;
297		ndp->ni_startdir = dp;
298		error = lookup(ndp);
299		if (error) {
300			namei_cleanup_cnp(cnp);
301			SDT_PROBE(vfs, namei, lookup, return, error, NULL, 0,
302			    0, 0);
303			return (error);
304		}
305		vfslocked = (ndp->ni_cnd.cn_flags & GIANTHELD) != 0;
306		ndp->ni_cnd.cn_flags &= ~GIANTHELD;
307		/*
308		 * If not a symbolic link, we're done.
309		 */
310		if ((cnp->cn_flags & ISSYMLINK) == 0) {
311			if ((cnp->cn_flags & (SAVENAME | SAVESTART)) == 0) {
312				namei_cleanup_cnp(cnp);
313			} else
314				cnp->cn_flags |= HASBUF;
315
316			if ((cnp->cn_flags & MPSAFE) == 0) {
317				VFS_UNLOCK_GIANT(vfslocked);
318			} else if (vfslocked)
319				ndp->ni_cnd.cn_flags |= GIANTHELD;
320			SDT_PROBE(vfs, namei, lookup, return, 0, ndp->ni_vp,
321			    0, 0, 0);
322			return (0);
323		}
324		if (ndp->ni_loopcnt++ >= MAXSYMLINKS) {
325			error = ELOOP;
326			break;
327		}
328#ifdef MAC
329		if ((cnp->cn_flags & NOMACCHECK) == 0) {
330			error = mac_vnode_check_readlink(td->td_ucred,
331			    ndp->ni_vp);
332			if (error)
333				break;
334		}
335#endif
336		if (ndp->ni_pathlen > 1)
337			cp = uma_zalloc(namei_zone, M_WAITOK);
338		else
339			cp = cnp->cn_pnbuf;
340		aiov.iov_base = cp;
341		aiov.iov_len = MAXPATHLEN;
342		auio.uio_iov = &aiov;
343		auio.uio_iovcnt = 1;
344		auio.uio_offset = 0;
345		auio.uio_rw = UIO_READ;
346		auio.uio_segflg = UIO_SYSSPACE;
347		auio.uio_td = td;
348		auio.uio_resid = MAXPATHLEN;
349		error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred);
350		if (error) {
351			if (ndp->ni_pathlen > 1)
352				uma_zfree(namei_zone, cp);
353			break;
354		}
355		linklen = MAXPATHLEN - auio.uio_resid;
356		if (linklen == 0) {
357			if (ndp->ni_pathlen > 1)
358				uma_zfree(namei_zone, cp);
359			error = ENOENT;
360			break;
361		}
362		if (linklen + ndp->ni_pathlen >= MAXPATHLEN) {
363			if (ndp->ni_pathlen > 1)
364				uma_zfree(namei_zone, cp);
365			error = ENAMETOOLONG;
366			break;
367		}
368		if (ndp->ni_pathlen > 1) {
369			bcopy(ndp->ni_next, cp + linklen, ndp->ni_pathlen);
370			uma_zfree(namei_zone, cnp->cn_pnbuf);
371			cnp->cn_pnbuf = cp;
372		} else
373			cnp->cn_pnbuf[linklen] = '\0';
374		ndp->ni_pathlen += linklen;
375		vput(ndp->ni_vp);
376		dp = ndp->ni_dvp;
377	}
378	namei_cleanup_cnp(cnp);
379	vput(ndp->ni_vp);
380	ndp->ni_vp = NULL;
381	vrele(ndp->ni_dvp);
382	VFS_UNLOCK_GIANT(vfslocked);
383	SDT_PROBE(vfs, namei, lookup, return, error, NULL, 0, 0, 0);
384	return (error);
385}
386
387static int
388compute_cn_lkflags(struct mount *mp, int lkflags, int cnflags)
389{
390
391	if (mp == NULL || ((lkflags & LK_SHARED) &&
392	    (!(mp->mnt_kern_flag & MNTK_LOOKUP_SHARED) ||
393	    ((cnflags & ISDOTDOT) &&
394	    (mp->mnt_kern_flag & MNTK_LOOKUP_EXCL_DOTDOT))))) {
395		lkflags &= ~LK_SHARED;
396		lkflags |= LK_EXCLUSIVE;
397	}
398	return (lkflags);
399}
400
401static __inline int
402needs_exclusive_leaf(struct mount *mp, int flags)
403{
404
405	/*
406	 * Intermediate nodes can use shared locks, we only need to
407	 * force an exclusive lock for leaf nodes.
408	 */
409	if ((flags & (ISLASTCN | LOCKLEAF)) != (ISLASTCN | LOCKLEAF))
410		return (0);
411
412	/* Always use exclusive locks if LOCKSHARED isn't set. */
413	if (!(flags & LOCKSHARED))
414		return (1);
415
416	/*
417	 * For lookups during open(), if the mount point supports
418	 * extended shared operations, then use a shared lock for the
419	 * leaf node, otherwise use an exclusive lock.
420	 */
421	if (flags & ISOPEN) {
422		if (mp != NULL &&
423		    (mp->mnt_kern_flag & MNTK_EXTENDED_SHARED))
424			return (0);
425		else
426			return (1);
427	}
428
429	/*
430	 * Lookup requests outside of open() that specify LOCKSHARED
431	 * only need a shared lock on the leaf vnode.
432	 */
433	return (0);
434}
435
436/*
437 * Search a pathname.
438 * This is a very central and rather complicated routine.
439 *
440 * The pathname is pointed to by ni_ptr and is of length ni_pathlen.
441 * The starting directory is taken from ni_startdir. The pathname is
442 * descended until done, or a symbolic link is encountered. The variable
443 * ni_more is clear if the path is completed; it is set to one if a
444 * symbolic link needing interpretation is encountered.
445 *
446 * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on
447 * whether the name is to be looked up, created, renamed, or deleted.
448 * When CREATE, RENAME, or DELETE is specified, information usable in
449 * creating, renaming, or deleting a directory entry may be calculated.
450 * If flag has LOCKPARENT or'ed into it, the parent directory is returned
451 * locked. If flag has WANTPARENT or'ed into it, the parent directory is
452 * returned unlocked. Otherwise the parent directory is not returned. If
453 * the target of the pathname exists and LOCKLEAF is or'ed into the flag
454 * the target is returned locked, otherwise it is returned unlocked.
455 * When creating or renaming and LOCKPARENT is specified, the target may not
456 * be ".".  When deleting and LOCKPARENT is specified, the target may be ".".
457 *
458 * Overall outline of lookup:
459 *
460 * dirloop:
461 *	identify next component of name at ndp->ni_ptr
462 *	handle degenerate case where name is null string
463 *	if .. and crossing mount points and on mounted filesys, find parent
464 *	call VOP_LOOKUP routine for next component name
465 *	    directory vnode returned in ni_dvp, unlocked unless LOCKPARENT set
466 *	    component vnode returned in ni_vp (if it exists), locked.
467 *	if result vnode is mounted on and crossing mount points,
468 *	    find mounted on vnode
469 *	if more components of name, do next level at dirloop
470 *	return the answer in ni_vp, locked if LOCKLEAF set
471 *	    if LOCKPARENT set, return locked parent in ni_dvp
472 *	    if WANTPARENT set, return unlocked parent in ni_dvp
473 */
474int
475lookup(struct nameidata *ndp)
476{
477	char *cp;		/* pointer into pathname argument */
478	struct vnode *dp = 0;	/* the directory we are searching */
479	struct vnode *tdp;		/* saved dp */
480	struct mount *mp;		/* mount table entry */
481	struct prison *pr;
482	int docache;			/* == 0 do not cache last component */
483	int wantparent;			/* 1 => wantparent or lockparent flag */
484	int rdonly;			/* lookup read-only flag bit */
485	int error = 0;
486	int dpunlocked = 0;		/* dp has already been unlocked */
487	struct componentname *cnp = &ndp->ni_cnd;
488	int vfslocked;			/* VFS Giant state for child */
489	int dvfslocked;			/* VFS Giant state for parent */
490	int tvfslocked;
491	int lkflags_save;
492	int ni_dvp_unlocked;
493
494	/*
495	 * Setup: break out flag bits into variables.
496	 */
497	dvfslocked = (ndp->ni_cnd.cn_flags & GIANTHELD) != 0;
498	vfslocked = 0;
499	ni_dvp_unlocked = 0;
500	ndp->ni_cnd.cn_flags &= ~GIANTHELD;
501	wantparent = cnp->cn_flags & (LOCKPARENT | WANTPARENT);
502	KASSERT(cnp->cn_nameiop == LOOKUP || wantparent,
503	    ("CREATE, DELETE, RENAME require LOCKPARENT or WANTPARENT."));
504	docache = (cnp->cn_flags & NOCACHE) ^ NOCACHE;
505	if (cnp->cn_nameiop == DELETE ||
506	    (wantparent && cnp->cn_nameiop != CREATE &&
507	     cnp->cn_nameiop != LOOKUP))
508		docache = 0;
509	rdonly = cnp->cn_flags & RDONLY;
510	cnp->cn_flags &= ~ISSYMLINK;
511	ndp->ni_dvp = NULL;
512	/*
513	 * We use shared locks until we hit the parent of the last cn then
514	 * we adjust based on the requesting flags.
515	 */
516	if (lookup_shared)
517		cnp->cn_lkflags = LK_SHARED;
518	else
519		cnp->cn_lkflags = LK_EXCLUSIVE;
520	dp = ndp->ni_startdir;
521	ndp->ni_startdir = NULLVP;
522	vn_lock(dp,
523	    compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | LK_RETRY,
524	    cnp->cn_flags));
525
526dirloop:
527	/*
528	 * Search a new directory.
529	 *
530	 * The last component of the filename is left accessible via
531	 * cnp->cn_nameptr for callers that need the name. Callers needing
532	 * the name set the SAVENAME flag. When done, they assume
533	 * responsibility for freeing the pathname buffer.
534	 */
535	cnp->cn_consume = 0;
536	for (cp = cnp->cn_nameptr; *cp != 0 && *cp != '/'; cp++)
537		continue;
538	cnp->cn_namelen = cp - cnp->cn_nameptr;
539	if (cnp->cn_namelen > NAME_MAX) {
540		error = ENAMETOOLONG;
541		goto bad;
542	}
543#ifdef NAMEI_DIAGNOSTIC
544	{ char c = *cp;
545	*cp = '\0';
546	printf("{%s}: ", cnp->cn_nameptr);
547	*cp = c; }
548#endif
549	ndp->ni_pathlen -= cnp->cn_namelen;
550	ndp->ni_next = cp;
551
552	/*
553	 * Replace multiple slashes by a single slash and trailing slashes
554	 * by a null.  This must be done before VOP_LOOKUP() because some
555	 * fs's don't know about trailing slashes.  Remember if there were
556	 * trailing slashes to handle symlinks, existing non-directories
557	 * and non-existing files that won't be directories specially later.
558	 */
559	while (*cp == '/' && (cp[1] == '/' || cp[1] == '\0')) {
560		cp++;
561		ndp->ni_pathlen--;
562		if (*cp == '\0') {
563			*ndp->ni_next = '\0';
564			cnp->cn_flags |= TRAILINGSLASH;
565		}
566	}
567	ndp->ni_next = cp;
568
569	cnp->cn_flags |= MAKEENTRY;
570	if (*cp == '\0' && docache == 0)
571		cnp->cn_flags &= ~MAKEENTRY;
572	if (cnp->cn_namelen == 2 &&
573	    cnp->cn_nameptr[1] == '.' && cnp->cn_nameptr[0] == '.')
574		cnp->cn_flags |= ISDOTDOT;
575	else
576		cnp->cn_flags &= ~ISDOTDOT;
577	if (*ndp->ni_next == 0)
578		cnp->cn_flags |= ISLASTCN;
579	else
580		cnp->cn_flags &= ~ISLASTCN;
581
582	if ((cnp->cn_flags & ISLASTCN) != 0 &&
583	    cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.' &&
584	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
585		error = EINVAL;
586		goto bad;
587	}
588
589	/*
590	 * Check for degenerate name (e.g. / or "")
591	 * which is a way of talking about a directory,
592	 * e.g. like "/." or ".".
593	 */
594	if (cnp->cn_nameptr[0] == '\0') {
595		if (dp->v_type != VDIR) {
596			error = ENOTDIR;
597			goto bad;
598		}
599		if (cnp->cn_nameiop != LOOKUP) {
600			error = EISDIR;
601			goto bad;
602		}
603		if (wantparent) {
604			ndp->ni_dvp = dp;
605			VREF(dp);
606		}
607		ndp->ni_vp = dp;
608
609		if (cnp->cn_flags & AUDITVNODE1)
610			AUDIT_ARG_VNODE1(dp);
611		else if (cnp->cn_flags & AUDITVNODE2)
612			AUDIT_ARG_VNODE2(dp);
613
614		if (!(cnp->cn_flags & (LOCKPARENT | LOCKLEAF)))
615			VOP_UNLOCK(dp, 0);
616		/* XXX This should probably move to the top of function. */
617		if (cnp->cn_flags & SAVESTART)
618			panic("lookup: SAVESTART");
619		goto success;
620	}
621
622	/*
623	 * Handle "..": five special cases.
624	 * 0. If doing a capability lookup, return ENOTCAPABLE (this is a
625	 *    fairly conservative design choice, but it's the only one that we
626	 *    are satisfied guarantees the property we're looking for).
627	 * 1. Return an error if this is the last component of
628	 *    the name and the operation is DELETE or RENAME.
629	 * 2. If at root directory (e.g. after chroot)
630	 *    or at absolute root directory
631	 *    then ignore it so can't get out.
632	 * 3. If this vnode is the root of a mounted
633	 *    filesystem, then replace it with the
634	 *    vnode which was mounted on so we take the
635	 *    .. in the other filesystem.
636	 * 4. If the vnode is the top directory of
637	 *    the jail or chroot, don't let them out.
638	 */
639	if (cnp->cn_flags & ISDOTDOT) {
640		if (ndp->ni_strictrelative != 0) {
641			error = ENOTCAPABLE;
642			goto bad;
643		}
644		if ((cnp->cn_flags & ISLASTCN) != 0 &&
645		    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
646			error = EINVAL;
647			goto bad;
648		}
649		for (;;) {
650			for (pr = cnp->cn_cred->cr_prison; pr != NULL;
651			     pr = pr->pr_parent)
652				if (dp == pr->pr_root)
653					break;
654			if (dp == ndp->ni_rootdir ||
655			    dp == ndp->ni_topdir ||
656			    dp == rootvnode ||
657			    pr != NULL ||
658			    ((dp->v_vflag & VV_ROOT) != 0 &&
659			     (cnp->cn_flags & NOCROSSMOUNT) != 0)) {
660				ndp->ni_dvp = dp;
661				ndp->ni_vp = dp;
662				vfslocked = VFS_LOCK_GIANT(dp->v_mount);
663				VREF(dp);
664				goto nextname;
665			}
666			if ((dp->v_vflag & VV_ROOT) == 0)
667				break;
668			if (dp->v_iflag & VI_DOOMED) {	/* forced unmount */
669				error = ENOENT;
670				goto bad;
671			}
672			tdp = dp;
673			dp = dp->v_mount->mnt_vnodecovered;
674			tvfslocked = dvfslocked;
675			dvfslocked = VFS_LOCK_GIANT(dp->v_mount);
676			VREF(dp);
677			vput(tdp);
678			VFS_UNLOCK_GIANT(tvfslocked);
679			vn_lock(dp,
680			    compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags |
681			    LK_RETRY, ISDOTDOT));
682		}
683	}
684
685	/*
686	 * We now have a segment name to search for, and a directory to search.
687	 */
688unionlookup:
689#ifdef MAC
690	if ((cnp->cn_flags & NOMACCHECK) == 0) {
691		error = mac_vnode_check_lookup(cnp->cn_thread->td_ucred, dp,
692		    cnp);
693		if (error)
694			goto bad;
695	}
696#endif
697	ndp->ni_dvp = dp;
698	ndp->ni_vp = NULL;
699	ASSERT_VOP_LOCKED(dp, "lookup");
700	VNASSERT(vfslocked == 0, dp, ("lookup: vfslocked %d", vfslocked));
701	/*
702	 * If we have a shared lock we may need to upgrade the lock for the
703	 * last operation.
704	 */
705	if (dp != vp_crossmp &&
706	    VOP_ISLOCKED(dp) == LK_SHARED &&
707	    (cnp->cn_flags & ISLASTCN) && (cnp->cn_flags & LOCKPARENT))
708		vn_lock(dp, LK_UPGRADE|LK_RETRY);
709	if ((dp->v_iflag & VI_DOOMED) != 0) {
710		error = ENOENT;
711		goto bad;
712	}
713	/*
714	 * If we're looking up the last component and we need an exclusive
715	 * lock, adjust our lkflags.
716	 */
717	if (needs_exclusive_leaf(dp->v_mount, cnp->cn_flags))
718		cnp->cn_lkflags = LK_EXCLUSIVE;
719#ifdef NAMEI_DIAGNOSTIC
720	vprint("lookup in", dp);
721#endif
722	lkflags_save = cnp->cn_lkflags;
723	cnp->cn_lkflags = compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags,
724	    cnp->cn_flags);
725	if ((error = VOP_LOOKUP(dp, &ndp->ni_vp, cnp)) != 0) {
726		cnp->cn_lkflags = lkflags_save;
727		KASSERT(ndp->ni_vp == NULL, ("leaf should be empty"));
728#ifdef NAMEI_DIAGNOSTIC
729		printf("not found\n");
730#endif
731		if ((error == ENOENT) &&
732		    (dp->v_vflag & VV_ROOT) && (dp->v_mount != NULL) &&
733		    (dp->v_mount->mnt_flag & MNT_UNION)) {
734			tdp = dp;
735			dp = dp->v_mount->mnt_vnodecovered;
736			tvfslocked = dvfslocked;
737			dvfslocked = VFS_LOCK_GIANT(dp->v_mount);
738			VREF(dp);
739			vput(tdp);
740			VFS_UNLOCK_GIANT(tvfslocked);
741			vn_lock(dp,
742			    compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags |
743			    LK_RETRY, cnp->cn_flags));
744			goto unionlookup;
745		}
746
747		if (error != EJUSTRETURN)
748			goto bad;
749		/*
750		 * At this point, we know we're at the end of the
751		 * pathname.  If creating / renaming, we can consider
752		 * allowing the file or directory to be created / renamed,
753		 * provided we're not on a read-only filesystem.
754		 */
755		if (rdonly) {
756			error = EROFS;
757			goto bad;
758		}
759		/* trailing slash only allowed for directories */
760		if ((cnp->cn_flags & TRAILINGSLASH) &&
761		    !(cnp->cn_flags & WILLBEDIR)) {
762			error = ENOENT;
763			goto bad;
764		}
765		if ((cnp->cn_flags & LOCKPARENT) == 0)
766			VOP_UNLOCK(dp, 0);
767		/*
768		 * We return with ni_vp NULL to indicate that the entry
769		 * doesn't currently exist, leaving a pointer to the
770		 * (possibly locked) directory vnode in ndp->ni_dvp.
771		 */
772		if (cnp->cn_flags & SAVESTART) {
773			ndp->ni_startdir = ndp->ni_dvp;
774			VREF(ndp->ni_startdir);
775		}
776		goto success;
777	} else
778		cnp->cn_lkflags = lkflags_save;
779#ifdef NAMEI_DIAGNOSTIC
780	printf("found\n");
781#endif
782	/*
783	 * Take into account any additional components consumed by
784	 * the underlying filesystem.
785	 */
786	if (cnp->cn_consume > 0) {
787		cnp->cn_nameptr += cnp->cn_consume;
788		ndp->ni_next += cnp->cn_consume;
789		ndp->ni_pathlen -= cnp->cn_consume;
790		cnp->cn_consume = 0;
791	}
792
793	dp = ndp->ni_vp;
794	vfslocked = VFS_LOCK_GIANT(dp->v_mount);
795
796	/*
797	 * Check to see if the vnode has been mounted on;
798	 * if so find the root of the mounted filesystem.
799	 */
800	while (dp->v_type == VDIR && (mp = dp->v_mountedhere) &&
801	       (cnp->cn_flags & NOCROSSMOUNT) == 0) {
802		if (vfs_busy(mp, 0))
803			continue;
804		vput(dp);
805		VFS_UNLOCK_GIANT(vfslocked);
806		vfslocked = VFS_LOCK_GIANT(mp);
807		if (dp != ndp->ni_dvp)
808			vput(ndp->ni_dvp);
809		else
810			vrele(ndp->ni_dvp);
811		VFS_UNLOCK_GIANT(dvfslocked);
812		dvfslocked = 0;
813		vref(vp_crossmp);
814		ndp->ni_dvp = vp_crossmp;
815		error = VFS_ROOT(mp, compute_cn_lkflags(mp, cnp->cn_lkflags,
816		    cnp->cn_flags), &tdp);
817		vfs_unbusy(mp);
818		if (vn_lock(vp_crossmp, LK_SHARED | LK_NOWAIT))
819			panic("vp_crossmp exclusively locked or reclaimed");
820		if (error) {
821			dpunlocked = 1;
822			goto bad2;
823		}
824		ndp->ni_vp = dp = tdp;
825	}
826
827	/*
828	 * Check for symbolic link
829	 */
830	if ((dp->v_type == VLNK) &&
831	    ((cnp->cn_flags & FOLLOW) || (cnp->cn_flags & TRAILINGSLASH) ||
832	     *ndp->ni_next == '/')) {
833		cnp->cn_flags |= ISSYMLINK;
834		if (dp->v_iflag & VI_DOOMED) {
835			/*
836			 * We can't know whether the directory was mounted with
837			 * NOSYMFOLLOW, so we can't follow safely.
838			 */
839			error = ENOENT;
840			goto bad2;
841		}
842		if (dp->v_mount->mnt_flag & MNT_NOSYMFOLLOW) {
843			error = EACCES;
844			goto bad2;
845		}
846		/*
847		 * Symlink code always expects an unlocked dvp.
848		 */
849		if (ndp->ni_dvp != ndp->ni_vp) {
850			VOP_UNLOCK(ndp->ni_dvp, 0);
851			ni_dvp_unlocked = 1;
852		}
853		goto success;
854	}
855
856nextname:
857	/*
858	 * Not a symbolic link that we will follow.  Continue with the
859	 * next component if there is any; otherwise, we're done.
860	 */
861	KASSERT((cnp->cn_flags & ISLASTCN) || *ndp->ni_next == '/',
862	    ("lookup: invalid path state."));
863	if (*ndp->ni_next == '/') {
864		cnp->cn_nameptr = ndp->ni_next;
865		while (*cnp->cn_nameptr == '/') {
866			cnp->cn_nameptr++;
867			ndp->ni_pathlen--;
868		}
869		if (ndp->ni_dvp != dp)
870			vput(ndp->ni_dvp);
871		else
872			vrele(ndp->ni_dvp);
873		VFS_UNLOCK_GIANT(dvfslocked);
874		dvfslocked = vfslocked;	/* dp becomes dvp in dirloop */
875		vfslocked = 0;
876		goto dirloop;
877	}
878	/*
879	 * If we're processing a path with a trailing slash,
880	 * check that the end result is a directory.
881	 */
882	if ((cnp->cn_flags & TRAILINGSLASH) && dp->v_type != VDIR) {
883		error = ENOTDIR;
884		goto bad2;
885	}
886	/*
887	 * Disallow directory write attempts on read-only filesystems.
888	 */
889	if (rdonly &&
890	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
891		error = EROFS;
892		goto bad2;
893	}
894	if (cnp->cn_flags & SAVESTART) {
895		ndp->ni_startdir = ndp->ni_dvp;
896		VREF(ndp->ni_startdir);
897	}
898	if (!wantparent) {
899		ni_dvp_unlocked = 2;
900		if (ndp->ni_dvp != dp)
901			vput(ndp->ni_dvp);
902		else
903			vrele(ndp->ni_dvp);
904		VFS_UNLOCK_GIANT(dvfslocked);
905		dvfslocked = 0;
906	} else if ((cnp->cn_flags & LOCKPARENT) == 0 && ndp->ni_dvp != dp) {
907		VOP_UNLOCK(ndp->ni_dvp, 0);
908		ni_dvp_unlocked = 1;
909	}
910
911	if (cnp->cn_flags & AUDITVNODE1)
912		AUDIT_ARG_VNODE1(dp);
913	else if (cnp->cn_flags & AUDITVNODE2)
914		AUDIT_ARG_VNODE2(dp);
915
916	if ((cnp->cn_flags & LOCKLEAF) == 0)
917		VOP_UNLOCK(dp, 0);
918success:
919	/*
920	 * Because of lookup_shared we may have the vnode shared locked, but
921	 * the caller may want it to be exclusively locked.
922	 */
923	if (needs_exclusive_leaf(dp->v_mount, cnp->cn_flags) &&
924	    VOP_ISLOCKED(dp) != LK_EXCLUSIVE) {
925		vn_lock(dp, LK_UPGRADE | LK_RETRY);
926		if (dp->v_iflag & VI_DOOMED) {
927			error = ENOENT;
928			goto bad2;
929		}
930	}
931	if (vfslocked && dvfslocked)
932		VFS_UNLOCK_GIANT(dvfslocked);	/* Only need one */
933	if (vfslocked || dvfslocked)
934		ndp->ni_cnd.cn_flags |= GIANTHELD;
935	return (0);
936
937bad2:
938	if (ni_dvp_unlocked != 2) {
939		if (dp != ndp->ni_dvp && !ni_dvp_unlocked)
940			vput(ndp->ni_dvp);
941		else
942			vrele(ndp->ni_dvp);
943	}
944bad:
945	if (!dpunlocked)
946		vput(dp);
947	VFS_UNLOCK_GIANT(vfslocked);
948	VFS_UNLOCK_GIANT(dvfslocked);
949	ndp->ni_cnd.cn_flags &= ~GIANTHELD;
950	ndp->ni_vp = NULL;
951	return (error);
952}
953
954/*
955 * relookup - lookup a path name component
956 *    Used by lookup to re-acquire things.
957 */
958int
959relookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp)
960{
961	struct vnode *dp = 0;		/* the directory we are searching */
962	int wantparent;			/* 1 => wantparent or lockparent flag */
963	int rdonly;			/* lookup read-only flag bit */
964	int error = 0;
965
966	KASSERT(cnp->cn_flags & ISLASTCN,
967	    ("relookup: Not given last component."));
968	/*
969	 * Setup: break out flag bits into variables.
970	 */
971	wantparent = cnp->cn_flags & (LOCKPARENT|WANTPARENT);
972	KASSERT(wantparent, ("relookup: parent not wanted."));
973	rdonly = cnp->cn_flags & RDONLY;
974	cnp->cn_flags &= ~ISSYMLINK;
975	dp = dvp;
976	cnp->cn_lkflags = LK_EXCLUSIVE;
977	vn_lock(dp, LK_EXCLUSIVE | LK_RETRY);
978
979	/*
980	 * Search a new directory.
981	 *
982	 * The last component of the filename is left accessible via
983	 * cnp->cn_nameptr for callers that need the name. Callers needing
984	 * the name set the SAVENAME flag. When done, they assume
985	 * responsibility for freeing the pathname buffer.
986	 */
987#ifdef NAMEI_DIAGNOSTIC
988	printf("{%s}: ", cnp->cn_nameptr);
989#endif
990
991	/*
992	 * Check for "" which represents the root directory after slash
993	 * removal.
994	 */
995	if (cnp->cn_nameptr[0] == '\0') {
996		/*
997		 * Support only LOOKUP for "/" because lookup()
998		 * can't succeed for CREATE, DELETE and RENAME.
999		 */
1000		KASSERT(cnp->cn_nameiop == LOOKUP, ("nameiop must be LOOKUP"));
1001		KASSERT(dp->v_type == VDIR, ("dp is not a directory"));
1002
1003		if (!(cnp->cn_flags & LOCKLEAF))
1004			VOP_UNLOCK(dp, 0);
1005		*vpp = dp;
1006		/* XXX This should probably move to the top of function. */
1007		if (cnp->cn_flags & SAVESTART)
1008			panic("lookup: SAVESTART");
1009		return (0);
1010	}
1011
1012	if (cnp->cn_flags & ISDOTDOT)
1013		panic ("relookup: lookup on dot-dot");
1014
1015	/*
1016	 * We now have a segment name to search for, and a directory to search.
1017	 */
1018#ifdef NAMEI_DIAGNOSTIC
1019	vprint("search in:", dp);
1020#endif
1021	if ((error = VOP_LOOKUP(dp, vpp, cnp)) != 0) {
1022		KASSERT(*vpp == NULL, ("leaf should be empty"));
1023		if (error != EJUSTRETURN)
1024			goto bad;
1025		/*
1026		 * If creating and at end of pathname, then can consider
1027		 * allowing file to be created.
1028		 */
1029		if (rdonly) {
1030			error = EROFS;
1031			goto bad;
1032		}
1033		/* ASSERT(dvp == ndp->ni_startdir) */
1034		if (cnp->cn_flags & SAVESTART)
1035			VREF(dvp);
1036		if ((cnp->cn_flags & LOCKPARENT) == 0)
1037			VOP_UNLOCK(dp, 0);
1038		/*
1039		 * We return with ni_vp NULL to indicate that the entry
1040		 * doesn't currently exist, leaving a pointer to the
1041		 * (possibly locked) directory vnode in ndp->ni_dvp.
1042		 */
1043		return (0);
1044	}
1045
1046	dp = *vpp;
1047
1048	/*
1049	 * Disallow directory write attempts on read-only filesystems.
1050	 */
1051	if (rdonly &&
1052	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
1053		if (dvp == dp)
1054			vrele(dvp);
1055		else
1056			vput(dvp);
1057		error = EROFS;
1058		goto bad;
1059	}
1060	/*
1061	 * Set the parent lock/ref state to the requested state.
1062	 */
1063	if ((cnp->cn_flags & LOCKPARENT) == 0 && dvp != dp) {
1064		if (wantparent)
1065			VOP_UNLOCK(dvp, 0);
1066		else
1067			vput(dvp);
1068	} else if (!wantparent)
1069		vrele(dvp);
1070	/*
1071	 * Check for symbolic link
1072	 */
1073	KASSERT(dp->v_type != VLNK || !(cnp->cn_flags & FOLLOW),
1074	    ("relookup: symlink found.\n"));
1075
1076	/* ASSERT(dvp == ndp->ni_startdir) */
1077	if (cnp->cn_flags & SAVESTART)
1078		VREF(dvp);
1079
1080	if ((cnp->cn_flags & LOCKLEAF) == 0)
1081		VOP_UNLOCK(dp, 0);
1082	return (0);
1083bad:
1084	vput(dp);
1085	*vpp = NULL;
1086	return (error);
1087}
1088
1089/*
1090 * Free data allocated by namei(); see namei(9) for details.
1091 */
1092void
1093NDFREE(struct nameidata *ndp, const u_int flags)
1094{
1095	int unlock_dvp;
1096	int unlock_vp;
1097
1098	unlock_dvp = 0;
1099	unlock_vp = 0;
1100
1101	if (!(flags & NDF_NO_FREE_PNBUF) &&
1102	    (ndp->ni_cnd.cn_flags & HASBUF)) {
1103		uma_zfree(namei_zone, ndp->ni_cnd.cn_pnbuf);
1104		ndp->ni_cnd.cn_flags &= ~HASBUF;
1105	}
1106	if (!(flags & NDF_NO_VP_UNLOCK) &&
1107	    (ndp->ni_cnd.cn_flags & LOCKLEAF) && ndp->ni_vp)
1108		unlock_vp = 1;
1109	if (!(flags & NDF_NO_VP_RELE) && ndp->ni_vp) {
1110		if (unlock_vp) {
1111			vput(ndp->ni_vp);
1112			unlock_vp = 0;
1113		} else
1114			vrele(ndp->ni_vp);
1115		ndp->ni_vp = NULL;
1116	}
1117	if (unlock_vp)
1118		VOP_UNLOCK(ndp->ni_vp, 0);
1119	if (!(flags & NDF_NO_DVP_UNLOCK) &&
1120	    (ndp->ni_cnd.cn_flags & LOCKPARENT) &&
1121	    ndp->ni_dvp != ndp->ni_vp)
1122		unlock_dvp = 1;
1123	if (!(flags & NDF_NO_DVP_RELE) &&
1124	    (ndp->ni_cnd.cn_flags & (LOCKPARENT|WANTPARENT))) {
1125		if (unlock_dvp) {
1126			vput(ndp->ni_dvp);
1127			unlock_dvp = 0;
1128		} else
1129			vrele(ndp->ni_dvp);
1130		ndp->ni_dvp = NULL;
1131	}
1132	if (unlock_dvp)
1133		VOP_UNLOCK(ndp->ni_dvp, 0);
1134	if (!(flags & NDF_NO_STARTDIR_RELE) &&
1135	    (ndp->ni_cnd.cn_flags & SAVESTART)) {
1136		vrele(ndp->ni_startdir);
1137		ndp->ni_startdir = NULL;
1138	}
1139}
1140
1141/*
1142 * Determine if there is a suitable alternate filename under the specified
1143 * prefix for the specified path.  If the create flag is set, then the
1144 * alternate prefix will be used so long as the parent directory exists.
1145 * This is used by the various compatiblity ABIs so that Linux binaries prefer
1146 * files under /compat/linux for example.  The chosen path (whether under
1147 * the prefix or under /) is returned in a kernel malloc'd buffer pointed
1148 * to by pathbuf.  The caller is responsible for free'ing the buffer from
1149 * the M_TEMP bucket if one is returned.
1150 */
1151int
1152kern_alternate_path(struct thread *td, const char *prefix, const char *path,
1153    enum uio_seg pathseg, char **pathbuf, int create, int dirfd)
1154{
1155	struct nameidata nd, ndroot;
1156	char *ptr, *buf, *cp;
1157	size_t len, sz;
1158	int error;
1159
1160	buf = (char *) malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
1161	*pathbuf = buf;
1162
1163	/* Copy the prefix into the new pathname as a starting point. */
1164	len = strlcpy(buf, prefix, MAXPATHLEN);
1165	if (len >= MAXPATHLEN) {
1166		*pathbuf = NULL;
1167		free(buf, M_TEMP);
1168		return (EINVAL);
1169	}
1170	sz = MAXPATHLEN - len;
1171	ptr = buf + len;
1172
1173	/* Append the filename to the prefix. */
1174	if (pathseg == UIO_SYSSPACE)
1175		error = copystr(path, ptr, sz, &len);
1176	else
1177		error = copyinstr(path, ptr, sz, &len);
1178
1179	if (error) {
1180		*pathbuf = NULL;
1181		free(buf, M_TEMP);
1182		return (error);
1183	}
1184
1185	/* Only use a prefix with absolute pathnames. */
1186	if (*ptr != '/') {
1187		error = EINVAL;
1188		goto keeporig;
1189	}
1190
1191	if (dirfd != AT_FDCWD) {
1192		/*
1193		 * We want the original because the "prefix" is
1194		 * included in the already opened dirfd.
1195		 */
1196		bcopy(ptr, buf, len);
1197		return (0);
1198	}
1199
1200	/*
1201	 * We know that there is a / somewhere in this pathname.
1202	 * Search backwards for it, to find the file's parent dir
1203	 * to see if it exists in the alternate tree. If it does,
1204	 * and we want to create a file (cflag is set). We don't
1205	 * need to worry about the root comparison in this case.
1206	 */
1207
1208	if (create) {
1209		for (cp = &ptr[len] - 1; *cp != '/'; cp--);
1210		*cp = '\0';
1211
1212		NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_SYSSPACE, buf, td);
1213		error = namei(&nd);
1214		*cp = '/';
1215		if (error != 0)
1216			goto keeporig;
1217	} else {
1218		NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_SYSSPACE, buf, td);
1219
1220		error = namei(&nd);
1221		if (error != 0)
1222			goto keeporig;
1223
1224		/*
1225		 * We now compare the vnode of the prefix to the one
1226		 * vnode asked. If they resolve to be the same, then we
1227		 * ignore the match so that the real root gets used.
1228		 * This avoids the problem of traversing "../.." to find the
1229		 * root directory and never finding it, because "/" resolves
1230		 * to the emulation root directory. This is expensive :-(
1231		 */
1232		NDINIT(&ndroot, LOOKUP, FOLLOW | MPSAFE, UIO_SYSSPACE, prefix,
1233		    td);
1234
1235		/* We shouldn't ever get an error from this namei(). */
1236		error = namei(&ndroot);
1237		if (error == 0) {
1238			if (nd.ni_vp == ndroot.ni_vp)
1239				error = ENOENT;
1240
1241			NDFREE(&ndroot, NDF_ONLY_PNBUF);
1242			vrele(ndroot.ni_vp);
1243			VFS_UNLOCK_GIANT(NDHASGIANT(&ndroot));
1244		}
1245	}
1246
1247	NDFREE(&nd, NDF_ONLY_PNBUF);
1248	vrele(nd.ni_vp);
1249	VFS_UNLOCK_GIANT(NDHASGIANT(&nd));
1250
1251keeporig:
1252	/* If there was an error, use the original path name. */
1253	if (error)
1254		bcopy(ptr, buf, len);
1255	return (error);
1256}
1257