1/*	$NetBSD: vfs_lookup.c,v 1.234 2023/05/01 05:12:44 mlelstv Exp $	*/
2
3/*
4 * Copyright (c) 1982, 1986, 1989, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 *    notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 *    notice, this list of conditions and the following disclaimer in the
19 *    documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 *    may be used to endorse or promote products derived from this software
22 *    without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 *	@(#)vfs_lookup.c	8.10 (Berkeley) 5/27/95
37 */
38
39#include <sys/cdefs.h>
40__KERNEL_RCSID(0, "$NetBSD: vfs_lookup.c,v 1.234 2023/05/01 05:12:44 mlelstv Exp $");
41
42#ifdef _KERNEL_OPT
43#include "opt_magiclinks.h"
44#endif
45
46#include <sys/param.h>
47#include <sys/systm.h>
48#include <sys/kernel.h>
49#include <sys/syslimits.h>
50#include <sys/time.h>
51#include <sys/namei.h>
52#include <sys/vnode.h>
53#include <sys/vnode_impl.h>
54#include <sys/fstrans.h>
55#include <sys/mount.h>
56#include <sys/errno.h>
57#include <sys/filedesc.h>
58#include <sys/hash.h>
59#include <sys/proc.h>
60#include <sys/syslog.h>
61#include <sys/kauth.h>
62#include <sys/ktrace.h>
63#include <sys/dirent.h>
64
65#ifndef MAGICLINKS
66#define MAGICLINKS 0
67#endif
68
69int vfs_magiclinks = MAGICLINKS;
70
71__CTASSERT(MAXNAMLEN == NAME_MAX);
72
73/*
74 * Substitute replacement text for 'magic' strings in symlinks.
75 * Returns 0 if successful, and returns non-zero if an error
76 * occurs.  (Currently, the only possible error is running out
77 * of temporary pathname space.)
78 *
79 * Looks for "@<string>" and "@<string>/", where <string> is a
80 * recognized 'magic' string.  Replaces the "@<string>" with the
81 * appropriate replacement text.  (Note that in some cases the
82 * replacement text may have zero length.)
83 *
84 * This would have been table driven, but the variance in
85 * replacement strings (and replacement string lengths) made
86 * that impractical.
87 */
88#define	VNL(x)							\
89	(sizeof(x) - 1)
90
91#define	VO	'{'
92#define	VC	'}'
93
94#define	MATCH(str)						\
95	((termchar == '/' && i + VNL(str) == *len) ||		\
96	 (i + VNL(str) < *len &&				\
97	  cp[i + VNL(str)] == termchar)) &&			\
98	!strncmp((str), &cp[i], VNL(str))
99
100#define	SUBSTITUTE(m, s, sl)					\
101	if ((newlen + (sl)) >= MAXPATHLEN)			\
102		return 1;					\
103	i += VNL(m);						\
104	if (termchar != '/')					\
105		i++;						\
106	(void)memcpy(&tmp[newlen], (s), (sl));			\
107	newlen += (sl);						\
108	change = 1;						\
109	termchar = '/';
110
111static int
112symlink_magic(struct proc *p, char *cp, size_t *len)
113{
114	char *tmp;
115	size_t change, i, newlen, slen;
116	char termchar = '/';
117	char idtmp[11]; /* enough for 32 bit *unsigned* integer */
118
119
120	tmp = PNBUF_GET();
121	for (change = i = newlen = 0; i < *len; ) {
122		if (cp[i] != '@') {
123			tmp[newlen++] = cp[i++];
124			continue;
125		}
126
127		i++;
128
129		/* Check for @{var} syntax. */
130		if (cp[i] == VO) {
131			termchar = VC;
132			i++;
133		}
134
135		/*
136		 * The following checks should be ordered according
137		 * to frequency of use.
138		 */
139		if (MATCH("machine_arch")) {
140			slen = strlen(PROC_MACHINE_ARCH(p));
141			SUBSTITUTE("machine_arch", PROC_MACHINE_ARCH(p), slen);
142		} else if (MATCH("machine")) {
143			slen = VNL(MACHINE);
144			SUBSTITUTE("machine", MACHINE, slen);
145		} else if (MATCH("hostname")) {
146			SUBSTITUTE("hostname", hostname, hostnamelen);
147		} else if (MATCH("osrelease")) {
148			slen = strlen(osrelease);
149			SUBSTITUTE("osrelease", osrelease, slen);
150		} else if (MATCH("emul")) {
151			slen = strlen(p->p_emul->e_name);
152			SUBSTITUTE("emul", p->p_emul->e_name, slen);
153		} else if (MATCH("kernel_ident")) {
154			slen = strlen(kernel_ident);
155			SUBSTITUTE("kernel_ident", kernel_ident, slen);
156		} else if (MATCH("domainname")) {
157			SUBSTITUTE("domainname", domainname, domainnamelen);
158		} else if (MATCH("ostype")) {
159			slen = strlen(ostype);
160			SUBSTITUTE("ostype", ostype, slen);
161		} else if (MATCH("uid")) {
162			slen = snprintf(idtmp, sizeof(idtmp), "%u",
163			    kauth_cred_geteuid(kauth_cred_get()));
164			SUBSTITUTE("uid", idtmp, slen);
165		} else if (MATCH("ruid")) {
166			slen = snprintf(idtmp, sizeof(idtmp), "%u",
167			    kauth_cred_getuid(kauth_cred_get()));
168			SUBSTITUTE("ruid", idtmp, slen);
169		} else if (MATCH("gid")) {
170			slen = snprintf(idtmp, sizeof(idtmp), "%u",
171			    kauth_cred_getegid(kauth_cred_get()));
172			SUBSTITUTE("gid", idtmp, slen);
173		} else if (MATCH("rgid")) {
174			slen = snprintf(idtmp, sizeof(idtmp), "%u",
175			    kauth_cred_getgid(kauth_cred_get()));
176			SUBSTITUTE("rgid", idtmp, slen);
177		} else {
178			tmp[newlen++] = '@';
179			if (termchar == VC)
180				tmp[newlen++] = VO;
181		}
182	}
183
184	if (change) {
185		(void)memcpy(cp, tmp, newlen);
186		*len = newlen;
187	}
188	PNBUF_PUT(tmp);
189
190	return 0;
191}
192
193#undef VNL
194#undef VO
195#undef VC
196#undef MATCH
197#undef SUBSTITUTE
198
199////////////////////////////////////////////////////////////
200
201/*
202 * Determine the namei hash (for the namecache) for name.
203 * If *ep != NULL, hash from name to ep-1.
204 * If *ep == NULL, hash from name until the first NUL or '/', and
205 * return the location of this termination character in *ep.
206 *
207 * This function returns an equivalent hash to the MI hash32_strn().
208 * The latter isn't used because in the *ep == NULL case, determining
209 * the length of the string to the first NUL or `/' and then calling
210 * hash32_strn() involves unnecessary double-handling of the data.
211 */
212uint32_t
213namei_hash(const char *name, const char **ep)
214{
215	uint32_t	hash;
216
217	hash = HASH32_STR_INIT;
218	if (*ep != NULL) {
219		for (; name < *ep; name++)
220			hash = hash * 33 + *(const uint8_t *)name;
221	} else {
222		for (; *name != '\0' && *name != '/'; name++)
223			hash = hash * 33 + *(const uint8_t *)name;
224		*ep = name;
225	}
226	return (hash + (hash >> 5));
227}
228
229////////////////////////////////////////////////////////////
230
231/*
232 * Sealed abstraction for pathnames.
233 *
234 * System-call-layer level code that is going to call namei should
235 * first create a pathbuf and adjust all the bells and whistles on it
236 * as needed by context.
237 */
238
239struct pathbuf {
240	char *pb_path;
241	char *pb_pathcopy;
242	unsigned pb_pathcopyuses;
243};
244
245static struct pathbuf *
246pathbuf_create_raw(void)
247{
248	struct pathbuf *pb;
249
250	pb = kmem_alloc(sizeof(*pb), KM_SLEEP);
251	pb->pb_path = PNBUF_GET();
252	if (pb->pb_path == NULL) {
253		kmem_free(pb, sizeof(*pb));
254		return NULL;
255	}
256	pb->pb_pathcopy = NULL;
257	pb->pb_pathcopyuses = 0;
258	return pb;
259}
260
261void
262pathbuf_destroy(struct pathbuf *pb)
263{
264	KASSERT(pb->pb_pathcopyuses == 0);
265	KASSERT(pb->pb_pathcopy == NULL);
266	PNBUF_PUT(pb->pb_path);
267	kmem_free(pb, sizeof(*pb));
268}
269
270struct pathbuf *
271pathbuf_assimilate(char *pnbuf)
272{
273	struct pathbuf *pb;
274
275	pb = kmem_alloc(sizeof(*pb), KM_SLEEP);
276	pb->pb_path = pnbuf;
277	pb->pb_pathcopy = NULL;
278	pb->pb_pathcopyuses = 0;
279	return pb;
280}
281
282struct pathbuf *
283pathbuf_create(const char *path)
284{
285	struct pathbuf *pb;
286	int error;
287
288	pb = pathbuf_create_raw();
289	if (pb == NULL) {
290		return NULL;
291	}
292	error = copystr(path, pb->pb_path, PATH_MAX, NULL);
293	if (error != 0) {
294		KASSERT(!"kernel path too long in pathbuf_create");
295		/* make sure it's null-terminated, just in case */
296		pb->pb_path[PATH_MAX-1] = '\0';
297	}
298	return pb;
299}
300
301int
302pathbuf_copyin(const char *userpath, struct pathbuf **ret)
303{
304	struct pathbuf *pb;
305	int error;
306
307	pb = pathbuf_create_raw();
308	if (pb == NULL) {
309		return ENOMEM;
310	}
311	error = copyinstr(userpath, pb->pb_path, PATH_MAX, NULL);
312	if (error) {
313		pathbuf_destroy(pb);
314		return error;
315	}
316	*ret = pb;
317	return 0;
318}
319
320/*
321 * XXX should not exist:
322 *   1. whether a pointer is kernel or user should be statically checkable.
323 *   2. copyin should be handled by the upper part of the syscall layer,
324 *      not in here.
325 */
326int
327pathbuf_maybe_copyin(const char *path, enum uio_seg seg, struct pathbuf **ret)
328{
329	if (seg == UIO_USERSPACE) {
330		return pathbuf_copyin(path, ret);
331	} else {
332		*ret = pathbuf_create(path);
333		if (*ret == NULL) {
334			return ENOMEM;
335		}
336		return 0;
337	}
338}
339
340/*
341 * Get a copy of the path buffer as it currently exists. If this is
342 * called after namei starts the results may be arbitrary.
343 */
344void
345pathbuf_copystring(const struct pathbuf *pb, char *buf, size_t maxlen)
346{
347	strlcpy(buf, pb->pb_path, maxlen);
348}
349
350/*
351 * These two functions allow access to a saved copy of the original
352 * path string. The first copy should be gotten before namei is
353 * called. Each copy that is gotten should be put back.
354 */
355
356const char *
357pathbuf_stringcopy_get(struct pathbuf *pb)
358{
359	if (pb->pb_pathcopyuses == 0) {
360		pb->pb_pathcopy = PNBUF_GET();
361		strcpy(pb->pb_pathcopy, pb->pb_path);
362	}
363	pb->pb_pathcopyuses++;
364	return pb->pb_pathcopy;
365}
366
367void
368pathbuf_stringcopy_put(struct pathbuf *pb, const char *str)
369{
370	KASSERT(str == pb->pb_pathcopy);
371	KASSERT(pb->pb_pathcopyuses > 0);
372	pb->pb_pathcopyuses--;
373	if (pb->pb_pathcopyuses == 0) {
374		PNBUF_PUT(pb->pb_pathcopy);
375		pb->pb_pathcopy = NULL;
376	}
377}
378
379
380////////////////////////////////////////////////////////////
381
382/*
383 * namei: convert a pathname into a pointer to a (maybe-locked) vnode,
384 * and maybe also its parent directory vnode, and assorted other guff.
385 * See namei(9) for the interface documentation.
386 *
387 *
388 * The FOLLOW flag is set when symbolic links are to be followed
389 * when they occur at the end of the name translation process.
390 * Symbolic links are always followed for all other pathname
391 * components other than the last.
392 *
393 * The segflg defines whether the name is to be copied from user
394 * space or kernel space.
395 *
396 * Overall outline of namei:
397 *
398 *	copy in name
399 *	get starting directory
400 *	while (!done && !error) {
401 *		call lookup to search path.
402 *		if symbolic link, massage name in buffer and continue
403 *	}
404 */
405
406/*
407 * Search a pathname.
408 * This is a very central and rather complicated routine.
409 *
410 * The pathname is pointed to by ni_ptr and is of length ni_pathlen.
411 * The starting directory is passed in. The pathname is descended
412 * until done, or a symbolic link is encountered. The variable ni_more
413 * is clear if the path is completed; it is set to one if a symbolic
414 * link needing interpretation is encountered.
415 *
416 * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on
417 * whether the name is to be looked up, created, renamed, or deleted.
418 * When CREATE, RENAME, or DELETE is specified, information usable in
419 * creating, renaming, or deleting a directory entry may be calculated.
420 * If flag has LOCKPARENT or'ed into it, the parent directory is returned
421 * locked.  Otherwise the parent directory is not returned. If the target
422 * of the pathname exists and LOCKLEAF is or'ed into the flag the target
423 * is returned locked, otherwise it is returned unlocked.  When creating
424 * or renaming and LOCKPARENT is specified, the target may not be ".".
425 * When deleting and LOCKPARENT is specified, the target may be ".".
426 *
427 * Overall outline of lookup:
428 *
429 * dirloop:
430 *	identify next component of name at ndp->ni_ptr
431 *	handle degenerate case where name is null string
432 *	if .. and crossing mount points and on mounted filesys, find parent
433 *	call VOP_LOOKUP routine for next component name
434 *	    directory vnode returned in ni_dvp, locked.
435 *	    component vnode returned in ni_vp (if it exists), locked.
436 *	if result vnode is mounted on and crossing mount points,
437 *	    find mounted on vnode
438 *	if more components of name, do next level at dirloop
439 *	return the answer in ni_vp, locked if LOCKLEAF set
440 *	    if LOCKPARENT set, return locked parent in ni_dvp
441 */
442
443
444/*
445 * Internal state for a namei operation.
446 *
447 * cnp is always equal to &ndp->ni_cnp.
448 */
449struct namei_state {
450	struct nameidata *ndp;
451	struct componentname *cnp;
452
453	int docache;			/* == 0 do not cache last component */
454	int rdonly;			/* lookup read-only flag bit */
455	int slashes;
456
457	unsigned attempt_retry:1;	/* true if error allows emul retry */
458	unsigned root_referenced:1;	/* true if ndp->ni_rootdir and
459					     ndp->ni_erootdir were referenced */
460};
461
462
463/*
464 * Initialize the namei working state.
465 */
466static void
467namei_init(struct namei_state *state, struct nameidata *ndp)
468{
469
470	state->ndp = ndp;
471	state->cnp = &ndp->ni_cnd;
472
473	state->docache = 0;
474	state->rdonly = 0;
475	state->slashes = 0;
476
477	state->root_referenced = 0;
478
479	KASSERTMSG((state->cnp->cn_cred != NULL), "namei: bad cred/proc");
480	KASSERTMSG(((state->cnp->cn_nameiop & (~OPMASK)) == 0),
481	    "namei: nameiop contaminated with flags: %08"PRIx32,
482	    state->cnp->cn_nameiop);
483	KASSERTMSG(((state->cnp->cn_flags & OPMASK) == 0),
484	    "name: flags contaminated with nameiops: %08"PRIx32,
485	    state->cnp->cn_flags);
486
487	/*
488	 * The buffer for name translation shall be the one inside the
489	 * pathbuf.
490	 */
491	state->ndp->ni_pnbuf = state->ndp->ni_pathbuf->pb_path;
492}
493
494/*
495 * Clean up the working namei state, leaving things ready for return
496 * from namei.
497 */
498static void
499namei_cleanup(struct namei_state *state)
500{
501	KASSERT(state->cnp == &state->ndp->ni_cnd);
502
503	if (state->root_referenced) {
504		if (state->ndp->ni_rootdir != NULL)
505			vrele(state->ndp->ni_rootdir);
506		if (state->ndp->ni_erootdir != NULL)
507			vrele(state->ndp->ni_erootdir);
508	}
509}
510
511//////////////////////////////
512
513/*
514 * Get the directory context.
515 * Initializes the rootdir and erootdir state and returns a reference
516 * to the starting dir.
517 */
518static struct vnode *
519namei_getstartdir(struct namei_state *state)
520{
521	struct nameidata *ndp = state->ndp;
522	struct componentname *cnp = state->cnp;
523	struct cwdinfo *cwdi;		/* pointer to cwd state */
524	struct lwp *self = curlwp;	/* thread doing namei() */
525	struct vnode *rootdir, *erootdir, *curdir, *startdir;
526
527	if (state->root_referenced) {
528		if (state->ndp->ni_rootdir != NULL)
529			vrele(state->ndp->ni_rootdir);
530		if (state->ndp->ni_erootdir != NULL)
531			vrele(state->ndp->ni_erootdir);
532		state->root_referenced = 0;
533	}
534
535	cwdi = self->l_proc->p_cwdi;
536	rw_enter(&cwdi->cwdi_lock, RW_READER);
537
538	/* root dir */
539	if (cwdi->cwdi_rdir == NULL || (cnp->cn_flags & NOCHROOT)) {
540		rootdir = rootvnode;
541	} else {
542		rootdir = cwdi->cwdi_rdir;
543	}
544
545	/* emulation root dir, if any */
546	if ((cnp->cn_flags & TRYEMULROOT) == 0) {
547		/* if we don't want it, don't fetch it */
548		erootdir = NULL;
549	} else if (cnp->cn_flags & EMULROOTSET) {
550		/* explicitly set emulroot; "/../" doesn't override this */
551		erootdir = ndp->ni_erootdir;
552	} else if (!strncmp(ndp->ni_pnbuf, "/../", 4)) {
553		/* explicit reference to real rootdir */
554		erootdir = NULL;
555	} else {
556		/* may be null */
557		erootdir = cwdi->cwdi_edir;
558	}
559
560	/* current dir */
561	curdir = cwdi->cwdi_cdir;
562
563	if (ndp->ni_pnbuf[0] != '/') {
564		if (ndp->ni_atdir != NULL) {
565			startdir = ndp->ni_atdir;
566		} else {
567			startdir = curdir;
568		}
569		erootdir = NULL;
570	} else if (cnp->cn_flags & TRYEMULROOT && erootdir != NULL) {
571		startdir = erootdir;
572	} else {
573		startdir = rootdir;
574		erootdir = NULL;
575	}
576
577	state->ndp->ni_rootdir = rootdir;
578	state->ndp->ni_erootdir = erootdir;
579
580	/*
581	 * Get a reference to the start dir so we can safely unlock cwdi.
582	 *
583	 * Must hold references to rootdir and erootdir while we're running.
584	 * A multithreaded process may chroot during namei.
585	 */
586	if (startdir != NULL)
587		vref(startdir);
588	if (state->ndp->ni_rootdir != NULL)
589		vref(state->ndp->ni_rootdir);
590	if (state->ndp->ni_erootdir != NULL)
591		vref(state->ndp->ni_erootdir);
592	state->root_referenced = 1;
593
594	rw_exit(&cwdi->cwdi_lock);
595	return startdir;
596}
597
598/*
599 * Get the directory context for the nfsd case, in parallel to
600 * getstartdir. Initializes the rootdir and erootdir state and
601 * returns a reference to the passed-in starting dir.
602 */
603static struct vnode *
604namei_getstartdir_for_nfsd(struct namei_state *state)
605{
606	KASSERT(state->ndp->ni_atdir != NULL);
607
608	/* always use the real root, and never set an emulation root */
609	if (rootvnode == NULL) {
610		return NULL;
611	}
612	state->ndp->ni_rootdir = rootvnode;
613	state->ndp->ni_erootdir = NULL;
614
615	vref(state->ndp->ni_atdir);
616	KASSERT(! state->root_referenced);
617	vref(state->ndp->ni_rootdir);
618	state->root_referenced = 1;
619	return state->ndp->ni_atdir;
620}
621
622
623/*
624 * Ktrace the namei operation.
625 */
626static void
627namei_ktrace(struct namei_state *state)
628{
629	struct nameidata *ndp = state->ndp;
630	struct componentname *cnp = state->cnp;
631	struct lwp *self = curlwp;	/* thread doing namei() */
632	const char *emul_path;
633
634	if (ktrpoint(KTR_NAMEI)) {
635		if (ndp->ni_erootdir != NULL) {
636			/*
637			 * To make any sense, the trace entry need to have the
638			 * text of the emulation path prepended.
639			 * Usually we can get this from the current process,
640			 * but when called from emul_find_interp() it is only
641			 * in the exec_package - so we get it passed in ni_next
642			 * (this is a hack).
643			 */
644			if (cnp->cn_flags & EMULROOTSET)
645				emul_path = ndp->ni_next;
646			else
647				emul_path = self->l_proc->p_emul->e_path;
648			ktrnamei2(emul_path, strlen(emul_path),
649			    ndp->ni_pnbuf, ndp->ni_pathlen);
650		} else
651			ktrnamei(ndp->ni_pnbuf, ndp->ni_pathlen);
652	}
653}
654
655/*
656 * Start up namei. Find the root dir and cwd, establish the starting
657 * directory for lookup, and lock it. Also calls ktrace when
658 * appropriate.
659 */
660static int
661namei_start(struct namei_state *state, int isnfsd,
662	    struct vnode **startdir_ret)
663{
664	struct nameidata *ndp = state->ndp;
665	struct vnode *startdir;
666
667	/* length includes null terminator (was originally from copyinstr) */
668	ndp->ni_pathlen = strlen(ndp->ni_pnbuf) + 1;
669
670	/*
671	 * POSIX.1 requirement: "" is not a valid file name.
672	 */
673	if (ndp->ni_pathlen == 1) {
674		ndp->ni_erootdir = NULL;
675		return ENOENT;
676	}
677
678	ndp->ni_loopcnt = 0;
679
680	/* Get starting directory, set up root, and ktrace. */
681	if (isnfsd) {
682		startdir = namei_getstartdir_for_nfsd(state);
683		/* no ktrace */
684	} else {
685		startdir = namei_getstartdir(state);
686		namei_ktrace(state);
687	}
688
689	if (startdir == NULL) {
690		return ENOENT;
691	}
692
693	/* NDAT may feed us with a non directory namei_getstartdir */
694	if (startdir->v_type != VDIR) {
695		vrele(startdir);
696		return ENOTDIR;
697	}
698
699	*startdir_ret = startdir;
700	return 0;
701}
702
703/*
704 * Check for being at a symlink that we're going to follow.
705 */
706static inline int
707namei_atsymlink(struct namei_state *state, struct vnode *foundobj)
708{
709	return (foundobj->v_type == VLNK) &&
710		(state->cnp->cn_flags & (FOLLOW|REQUIREDIR));
711}
712
713/*
714 * Follow a symlink.
715 *
716 * Updates searchdir. inhibitmagic causes magic symlinks to not be
717 * interpreted; this is used by nfsd.
718 *
719 * Unlocks foundobj on success (ugh)
720 */
721static inline int
722namei_follow(struct namei_state *state, int inhibitmagic,
723	     struct vnode *searchdir, struct vnode *foundobj,
724	     struct vnode **newsearchdir_ret)
725{
726	struct nameidata *ndp = state->ndp;
727	struct componentname *cnp = state->cnp;
728
729	struct lwp *self = curlwp;	/* thread doing namei() */
730	struct iovec aiov;		/* uio for reading symbolic links */
731	struct uio auio;
732	char *cp;			/* pointer into pathname argument */
733	size_t linklen;
734	int error;
735
736	if (ndp->ni_loopcnt++ >= MAXSYMLINKS) {
737		return ELOOP;
738	}
739
740	vn_lock(foundobj, LK_EXCLUSIVE | LK_RETRY);
741	if (foundobj->v_mount->mnt_flag & MNT_SYMPERM) {
742		error = VOP_ACCESS(foundobj, VEXEC, cnp->cn_cred);
743		if (error != 0) {
744			VOP_UNLOCK(foundobj);
745			return error;
746		}
747	}
748
749	/* FUTURE: fix this to not use a second buffer */
750	cp = PNBUF_GET();
751	aiov.iov_base = cp;
752	aiov.iov_len = MAXPATHLEN;
753	auio.uio_iov = &aiov;
754	auio.uio_iovcnt = 1;
755	auio.uio_offset = 0;
756	auio.uio_rw = UIO_READ;
757	auio.uio_resid = MAXPATHLEN;
758	UIO_SETUP_SYSSPACE(&auio);
759	error = VOP_READLINK(foundobj, &auio, cnp->cn_cred);
760	VOP_UNLOCK(foundobj);
761	if (error) {
762		PNBUF_PUT(cp);
763		return error;
764	}
765	linklen = MAXPATHLEN - auio.uio_resid;
766	if (linklen == 0) {
767		PNBUF_PUT(cp);
768		return ENOENT;
769	}
770
771	/*
772	 * Do symlink substitution, if appropriate, and
773	 * check length for potential overflow.
774	 *
775	 * Inhibit symlink substitution for nfsd.
776	 * XXX: This is how it was before; is that a bug or a feature?
777	 */
778	if ((!inhibitmagic && vfs_magiclinks &&
779	     symlink_magic(self->l_proc, cp, &linklen)) ||
780	    (linklen + ndp->ni_pathlen >= MAXPATHLEN)) {
781		PNBUF_PUT(cp);
782		return ENAMETOOLONG;
783	}
784	if (ndp->ni_pathlen > 1) {
785		/* includes a null-terminator */
786		memcpy(cp + linklen, ndp->ni_next, ndp->ni_pathlen);
787	} else {
788		cp[linklen] = '\0';
789	}
790	ndp->ni_pathlen += linklen;
791	memcpy(ndp->ni_pnbuf, cp, ndp->ni_pathlen);
792	PNBUF_PUT(cp);
793
794	/* we're now starting from the beginning of the buffer again */
795	cnp->cn_nameptr = ndp->ni_pnbuf;
796
797	/*
798	 * Check if root directory should replace current directory.
799	 */
800	if (ndp->ni_pnbuf[0] == '/') {
801		vrele(searchdir);
802		/* Keep absolute symbolic links inside emulation root */
803		searchdir = ndp->ni_erootdir;
804		if (searchdir == NULL ||
805		    (ndp->ni_pnbuf[1] == '.'
806		     && ndp->ni_pnbuf[2] == '.'
807		     && ndp->ni_pnbuf[3] == '/')) {
808			ndp->ni_erootdir = NULL;
809			searchdir = ndp->ni_rootdir;
810		}
811		vref(searchdir);
812		while (cnp->cn_nameptr[0] == '/') {
813			cnp->cn_nameptr++;
814			ndp->ni_pathlen--;
815		}
816	}
817
818	*newsearchdir_ret = searchdir;
819	return 0;
820}
821
822//////////////////////////////
823
824/*
825 * Inspect the leading path component and update the state accordingly.
826 */
827static int
828lookup_parsepath(struct namei_state *state, struct vnode *searchdir)
829{
830	const char *cp;			/* pointer into pathname argument */
831	int error;
832
833	struct componentname *cnp = state->cnp;
834	struct nameidata *ndp = state->ndp;
835
836	KASSERT(cnp == &ndp->ni_cnd);
837
838	/*
839	 * Search a new directory.
840	 *
841	 * The last component of the filename is left accessible via
842	 * cnp->cn_nameptr for callers that need the name. Callers needing
843	 * the name set the SAVENAME flag. When done, they assume
844	 * responsibility for freeing the pathname buffer.
845	 *
846	 * At this point, our only vnode state is that the search dir
847	 * is held.
848	 */
849	error = VOP_PARSEPATH(searchdir, cnp->cn_nameptr, &cnp->cn_namelen);
850	if (error) {
851		return error;
852	}
853	cp = cnp->cn_nameptr + cnp->cn_namelen;
854	if (cnp->cn_namelen > KERNEL_NAME_MAX) {
855		return ENAMETOOLONG;
856	}
857#ifdef NAMEI_DIAGNOSTIC
858	{ char c = *cp;
859	*(char *)cp = '\0';
860	printf("{%s}: ", cnp->cn_nameptr);
861	*(char *)cp = c; }
862#endif /* NAMEI_DIAGNOSTIC */
863	ndp->ni_pathlen -= cnp->cn_namelen;
864	ndp->ni_next = cp;
865	/*
866	 * If this component is followed by a slash, then move the pointer to
867	 * the next component forward, and remember that this component must be
868	 * a directory.
869	 */
870	if (*cp == '/') {
871		do {
872			cp++;
873		} while (*cp == '/');
874		state->slashes = cp - ndp->ni_next;
875		ndp->ni_pathlen -= state->slashes;
876		ndp->ni_next = cp;
877		cnp->cn_flags |= REQUIREDIR;
878	} else {
879		state->slashes = 0;
880		cnp->cn_flags &= ~REQUIREDIR;
881	}
882	/*
883	 * We do special processing on the last component, whether or not it's
884	 * a directory.  Cache all intervening lookups, but not the final one.
885	 */
886	if (*cp == '\0') {
887		if (state->docache)
888			cnp->cn_flags |= MAKEENTRY;
889		else
890			cnp->cn_flags &= ~MAKEENTRY;
891		cnp->cn_flags |= ISLASTCN;
892	} else {
893		cnp->cn_flags |= MAKEENTRY;
894		cnp->cn_flags &= ~ISLASTCN;
895	}
896	if (cnp->cn_namelen == 2 &&
897	    cnp->cn_nameptr[1] == '.' && cnp->cn_nameptr[0] == '.')
898		cnp->cn_flags |= ISDOTDOT;
899	else
900		cnp->cn_flags &= ~ISDOTDOT;
901
902	return 0;
903}
904
905/*
906 * Take care of crossing a mounted-on vnode.  On error, foundobj_ret will be
907 * vrele'd, but searchdir is left alone.
908 */
909static int
910lookup_crossmount(struct namei_state *state,
911		  struct vnode **searchdir_ret,
912		  struct vnode **foundobj_ret,
913		  bool *searchdir_locked)
914{
915	struct componentname *cnp = state->cnp;
916	struct vnode *foundobj, *vp;
917	struct vnode *searchdir;
918	struct mount *mp;
919	int error, lktype;
920
921	searchdir = *searchdir_ret;
922	foundobj = *foundobj_ret;
923	error = 0;
924
925	KASSERT((cnp->cn_flags & NOCROSSMOUNT) == 0);
926
927	/* First, unlock searchdir (oof). */
928	if (*searchdir_locked) {
929		KASSERT(searchdir != NULL);
930		lktype = VOP_ISLOCKED(searchdir);
931		VOP_UNLOCK(searchdir);
932		*searchdir_locked = false;
933	} else {
934		lktype = LK_NONE;
935	}
936
937	/*
938	 * Do an unlocked check to see if the vnode has been mounted on; if
939	 * so find the root of the mounted file system.
940	 */
941	while (foundobj->v_type == VDIR &&
942	    (mp = foundobj->v_mountedhere) != NULL &&
943	    (cnp->cn_flags & NOCROSSMOUNT) == 0) {
944		/*
945		 * Try the namecache first.  If that doesn't work, do
946		 * it the hard way.
947		 */
948		if (cache_lookup_mount(foundobj, &vp)) {
949			vrele(foundobj);
950			foundobj = vp;
951		} else {
952			/* First get the vnodes mount stable. */
953			while ((mp = foundobj->v_mountedhere) != NULL) {
954				fstrans_start(mp);
955				if (fstrans_held(mp) &&
956				    mp == foundobj->v_mountedhere) {
957					break;
958				}
959				fstrans_done(mp);
960			}
961			if (mp == NULL) {
962				break;
963			}
964
965			/*
966			 * Now get a reference on the root vnode.
967			 * XXX Future - maybe allow only VDIR here.
968			 */
969			error = VFS_ROOT(mp, LK_NONE, &vp);
970
971			/*
972			 * If successful, enter it into the cache while
973			 * holding the mount busy (competing with unmount).
974			 */
975			if (error == 0) {
976				cache_enter_mount(foundobj, vp);
977			}
978
979			/* Finally, drop references to foundobj & mountpoint. */
980			vrele(foundobj);
981			fstrans_done(mp);
982			if (error) {
983				foundobj = NULL;
984				break;
985			}
986			foundobj = vp;
987		}
988
989		/*
990		 * Avoid locking vnodes from two filesystems because
991		 * it's prone to deadlock, e.g. when using puffs.
992		 * Also, it isn't a good idea to propagate slowness of
993		 * a filesystem up to the root directory. For now,
994		 * only handle the common case, where foundobj is
995		 * VDIR.
996		 *
997		 * In this case set searchdir to null to avoid using
998		 * it again. It is not correct to set searchdir ==
999		 * foundobj here as that will confuse the caller.
1000		 * (See PR 40740.)
1001		 */
1002		if (searchdir == NULL) {
1003			/* already been here once; do nothing further */
1004		} else if (foundobj->v_type == VDIR) {
1005			vrele(searchdir);
1006			*searchdir_ret = searchdir = NULL;
1007			lktype = LK_NONE;
1008		}
1009	}
1010
1011	/* If searchdir is still around, re-lock it. */
1012 	if (error == 0 && lktype != LK_NONE) {
1013		vn_lock(searchdir, lktype | LK_RETRY);
1014		*searchdir_locked = true;
1015	}
1016	*foundobj_ret = foundobj;
1017	return error;
1018}
1019
1020/*
1021 * Determine the desired locking mode for the directory of a lookup.
1022 */
1023static int
1024lookup_lktype(struct vnode *searchdir, struct componentname *cnp)
1025{
1026
1027	/*
1028	 * If the file system supports VOP_LOOKUP() with a shared lock, and
1029	 * we are not making any modifications (nameiop LOOKUP) or this is
1030	 * not the last component then get a shared lock.  Where we can't do
1031	 * fast-forwarded lookups (for example with layered file systems)
1032	 * then this is the fallback for reducing lock contention.
1033	 */
1034	if ((searchdir->v_mount->mnt_iflag & IMNT_SHRLOOKUP) != 0 &&
1035	    (cnp->cn_nameiop == LOOKUP || (cnp->cn_flags & ISLASTCN) == 0)) {
1036		return LK_SHARED;
1037	} else {
1038		return LK_EXCLUSIVE;
1039	}
1040}
1041
1042/*
1043 * Call VOP_LOOKUP for a single lookup; return a new search directory
1044 * (used when crossing mountpoints up or searching union mounts down) and
1045 * the found object, which for create operations may be NULL on success.
1046 *
1047 * Note that the new search directory may be null, which means the
1048 * searchdir was unlocked and released. This happens in the common case
1049 * when crossing a mount point downwards, in order to avoid coupling
1050 * locks between different file system volumes. Importantly, this can
1051 * happen even if the call fails. (XXX: this is gross and should be
1052 * tidied somehow.)
1053 */
1054static int
1055lookup_once(struct namei_state *state,
1056	    struct vnode *searchdir,
1057	    struct vnode **newsearchdir_ret,
1058	    struct vnode **foundobj_ret,
1059	    bool *newsearchdir_locked_ret)
1060{
1061	struct vnode *tmpvn;		/* scratch vnode */
1062	struct vnode *foundobj;		/* result */
1063	struct lwp *l = curlwp;
1064	bool searchdir_locked = false;
1065	int error, lktype;
1066
1067	struct componentname *cnp = state->cnp;
1068	struct nameidata *ndp = state->ndp;
1069
1070	KASSERT(cnp == &ndp->ni_cnd);
1071	*newsearchdir_ret = searchdir;
1072
1073	/*
1074	 * Handle "..": two special cases.
1075	 * 1. If at root directory (e.g. after chroot)
1076	 *    or at absolute root directory
1077	 *    then ignore it so can't get out.
1078	 * 1a. If at the root of the emulation filesystem go to the real
1079	 *    root. So "/../<path>" is always absolute.
1080	 * 1b. If we have somehow gotten out of a jail, warn
1081	 *    and also ignore it so we can't get farther out.
1082	 * 2. If this vnode is the root of a mounted
1083	 *    filesystem, then replace it with the
1084	 *    vnode which was mounted on so we take the
1085	 *    .. in the other file system.
1086	 */
1087	if (cnp->cn_flags & ISDOTDOT) {
1088		struct proc *p = l->l_proc;
1089
1090		for (;;) {
1091			if (searchdir == ndp->ni_rootdir ||
1092			    searchdir == rootvnode) {
1093				foundobj = searchdir;
1094				vref(foundobj);
1095				*foundobj_ret = foundobj;
1096				if (cnp->cn_flags & LOCKPARENT) {
1097					lktype = lookup_lktype(searchdir, cnp);
1098					vn_lock(searchdir, lktype | LK_RETRY);
1099					searchdir_locked = true;
1100				}
1101				error = 0;
1102				goto done;
1103			}
1104			if (ndp->ni_rootdir != rootvnode) {
1105				int retval;
1106
1107				retval = vn_isunder(searchdir, ndp->ni_rootdir, l);
1108				if (!retval) {
1109				    /* Oops! We got out of jail! */
1110				    log(LOG_WARNING,
1111					"chrooted pid %d uid %d (%s) "
1112					"detected outside of its chroot\n",
1113					p->p_pid, kauth_cred_geteuid(l->l_cred),
1114					p->p_comm);
1115				    /* Put us at the jail root. */
1116				    vrele(searchdir);
1117				    searchdir = NULL;
1118				    foundobj = ndp->ni_rootdir;
1119				    vref(foundobj);
1120				    vref(foundobj);
1121				    *newsearchdir_ret = foundobj;
1122				    *foundobj_ret = foundobj;
1123				    error = 0;
1124				    goto done;
1125				}
1126			}
1127			if ((searchdir->v_vflag & VV_ROOT) == 0 ||
1128			    (cnp->cn_flags & NOCROSSMOUNT))
1129				break;
1130			tmpvn = searchdir;
1131			searchdir = searchdir->v_mount->mnt_vnodecovered;
1132			vref(searchdir);
1133			vrele(tmpvn);
1134			*newsearchdir_ret = searchdir;
1135		}
1136	}
1137
1138	lktype = lookup_lktype(searchdir, cnp);
1139
1140	/*
1141	 * We now have a segment name to search for, and a directory to search.
1142	 * Our vnode state here is that "searchdir" is held.
1143	 */
1144unionlookup:
1145	foundobj = NULL;
1146	if (!searchdir_locked) {
1147		vn_lock(searchdir, lktype | LK_RETRY);
1148		searchdir_locked = true;
1149	}
1150	error = VOP_LOOKUP(searchdir, &foundobj, cnp);
1151
1152	if (error != 0) {
1153		KASSERTMSG((foundobj == NULL),
1154		    "leaf `%s' should be empty but is %p",
1155		    cnp->cn_nameptr, foundobj);
1156#ifdef NAMEI_DIAGNOSTIC
1157		printf("not found\n");
1158#endif /* NAMEI_DIAGNOSTIC */
1159
1160		/*
1161		 * If ENOLCK, the file system needs us to retry the lookup
1162		 * with an exclusive lock.  It's likely nothing was found in
1163		 * cache and/or modifications need to be made.
1164		 */
1165		if (error == ENOLCK) {
1166			KASSERT(VOP_ISLOCKED(searchdir) == LK_SHARED);
1167			KASSERT(searchdir_locked);
1168			if (vn_lock(searchdir, LK_UPGRADE | LK_NOWAIT)) {
1169				VOP_UNLOCK(searchdir);
1170				searchdir_locked = false;
1171			}
1172			lktype = LK_EXCLUSIVE;
1173			goto unionlookup;
1174		}
1175
1176		if ((error == ENOENT) &&
1177		    (searchdir->v_vflag & VV_ROOT) &&
1178		    (searchdir->v_mount->mnt_flag & MNT_UNION)) {
1179			tmpvn = searchdir;
1180			searchdir = searchdir->v_mount->mnt_vnodecovered;
1181			vref(searchdir);
1182			vput(tmpvn);
1183			searchdir_locked = false;
1184			*newsearchdir_ret = searchdir;
1185			goto unionlookup;
1186		}
1187
1188		if (error != EJUSTRETURN)
1189			goto done;
1190
1191		/*
1192		 * If this was not the last component, or there were trailing
1193		 * slashes, and we are not going to create a directory,
1194		 * then the name must exist.
1195		 */
1196		if ((cnp->cn_flags & (REQUIREDIR | CREATEDIR)) == REQUIREDIR) {
1197			error = ENOENT;
1198			goto done;
1199		}
1200
1201		/*
1202		 * If creating and at end of pathname, then can consider
1203		 * allowing file to be created.
1204		 */
1205		if (state->rdonly) {
1206			error = EROFS;
1207			goto done;
1208		}
1209
1210		/*
1211		 * We return success and a NULL foundobj to indicate
1212		 * that the entry doesn't currently exist, leaving a
1213		 * pointer to the (normally, locked) directory vnode
1214		 * as searchdir.
1215		 */
1216		*foundobj_ret = NULL;
1217		error = 0;
1218		goto done;
1219	}
1220#ifdef NAMEI_DIAGNOSTIC
1221	printf("found\n");
1222#endif /* NAMEI_DIAGNOSTIC */
1223
1224	/* Unlock, unless the caller needs the parent locked. */
1225	if (searchdir != NULL) {
1226		KASSERT(searchdir_locked);
1227		if ((cnp->cn_flags & (ISLASTCN | LOCKPARENT)) !=
1228		    (ISLASTCN | LOCKPARENT)) {
1229		    	VOP_UNLOCK(searchdir);
1230		    	searchdir_locked = false;
1231		}
1232	} else {
1233		KASSERT(!searchdir_locked);
1234	}
1235
1236	*foundobj_ret = foundobj;
1237	error = 0;
1238done:
1239	*newsearchdir_locked_ret = searchdir_locked;
1240	return error;
1241}
1242
1243/*
1244 * Parse out the first path name component that we need to to consider.
1245 *
1246 * While doing this, attempt to use the name cache to fast-forward through
1247 * as many "easy" to find components of the path as possible.
1248 *
1249 * We use the namecache's node locks to form a chain, and avoid as many
1250 * vnode references and locks as possible.  In the ideal case, only the
1251 * final vnode will have its reference count adjusted and lock taken.
1252 */
1253static int
1254lookup_fastforward(struct namei_state *state, struct vnode **searchdir_ret,
1255		   struct vnode **foundobj_ret)
1256{
1257	struct componentname *cnp = state->cnp;
1258	struct nameidata *ndp = state->ndp;
1259	krwlock_t *plock;
1260	struct vnode *foundobj, *searchdir;
1261	int error, error2;
1262	size_t oldpathlen;
1263	const char *oldnameptr;
1264	bool terminal;
1265
1266	/*
1267	 * Eat as many path name components as possible before giving up and
1268	 * letting lookup_once() handle it.  Remember the starting point in
1269	 * case we can't get vnode references and need to roll back.
1270	 */
1271	plock = NULL;
1272	searchdir = *searchdir_ret;
1273	oldnameptr = cnp->cn_nameptr;
1274	oldpathlen = ndp->ni_pathlen;
1275	terminal = false;
1276	for (;;) {
1277		foundobj = NULL;
1278
1279		/*
1280		 * Get the next component name.  There should be no slashes
1281		 * here, and we shouldn't have looped around if we were
1282		 * done.
1283		 */
1284		KASSERT(cnp->cn_nameptr[0] != '/');
1285		KASSERT(cnp->cn_nameptr[0] != '\0');
1286		if ((error = lookup_parsepath(state, searchdir)) != 0) {
1287			break;
1288		}
1289
1290		/*
1291		 * Can't deal with DOTDOT lookups if NOCROSSMOUNT or the
1292		 * lookup is chrooted.
1293		 */
1294		if ((cnp->cn_flags & ISDOTDOT) != 0) {
1295			if ((searchdir->v_vflag & VV_ROOT) != 0 &&
1296			    (cnp->cn_flags & NOCROSSMOUNT)) {
1297			    	error = EOPNOTSUPP;
1298				break;
1299			}
1300			if (ndp->ni_rootdir != rootvnode) {
1301			    	error = EOPNOTSUPP;
1302				break;
1303			}
1304		}
1305
1306		/*
1307		 * Can't deal with last component when modifying; this needs
1308		 * searchdir locked and VOP_LOOKUP() called (which can and
1309		 * does modify state, despite the name).  NB: this case means
1310		 * terminal is never set true when LOCKPARENT.
1311		 */
1312		if ((cnp->cn_flags & ISLASTCN) != 0) {
1313			if (cnp->cn_nameiop != LOOKUP ||
1314			    (cnp->cn_flags & LOCKPARENT) != 0) {
1315				error = EOPNOTSUPP;
1316				break;
1317			}
1318		}
1319
1320		/*
1321		 * Good, now look for it in cache.  cache_lookup_linked()
1322		 * will fail if there's nothing there, or if there's no
1323		 * ownership info for the directory, or if the user doesn't
1324		 * have permission to look up files in this directory.
1325		 */
1326		if (!cache_lookup_linked(searchdir, cnp->cn_nameptr,
1327		    cnp->cn_namelen, &foundobj, &plock, cnp->cn_cred)) {
1328			error = EOPNOTSUPP;
1329			break;
1330		}
1331		KASSERT(plock != NULL);
1332		KASSERT(rw_lock_held(plock));
1333
1334		/*
1335		 * Scored a hit.  Negative is good too (ENOENT).  If there's
1336		 * a '-o union' mount here, punt and let lookup_once() deal
1337		 * with it.
1338		 */
1339		if (foundobj == NULL) {
1340			if ((searchdir->v_vflag & VV_ROOT) != 0 &&
1341			    (searchdir->v_mount->mnt_flag & MNT_UNION) != 0) {
1342			    	error = EOPNOTSUPP;
1343			} else {
1344				error = ENOENT;
1345				terminal = ((cnp->cn_flags & ISLASTCN) != 0);
1346			}
1347			break;
1348		}
1349
1350		/*
1351		 * Stop and get a hold on the vnode if we've encountered
1352		 * something other than a dirctory.
1353		 */
1354		if (foundobj->v_type != VDIR) {
1355			error = vcache_tryvget(foundobj);
1356			if (error != 0) {
1357				foundobj = NULL;
1358				error = EOPNOTSUPP;
1359			} else {
1360				terminal = (foundobj->v_type != VLNK &&
1361				    (cnp->cn_flags & ISLASTCN) != 0);
1362			}
1363			break;
1364		}
1365
1366		/*
1367		 * Try to cross mountpoints, bearing in mind that they can
1368		 * be stacked.  If at any point we can't go further, stop
1369		 * and try to get a reference on the vnode.  If we are able
1370		 * to get a ref then lookup_crossmount() will take care of
1371		 * it, otherwise we'll fall through to lookup_once().
1372		 */
1373		if (foundobj->v_mountedhere != NULL) {
1374			while (foundobj->v_mountedhere != NULL &&
1375			    (cnp->cn_flags & NOCROSSMOUNT) == 0 &&
1376			    cache_cross_mount(&foundobj, &plock)) {
1377				KASSERT(foundobj != NULL);
1378				KASSERT(foundobj->v_type == VDIR);
1379			}
1380			if (foundobj->v_mountedhere != NULL) {
1381				error = vcache_tryvget(foundobj);
1382				if (error != 0) {
1383					foundobj = NULL;
1384					error = EOPNOTSUPP;
1385				}
1386				break;
1387			} else {
1388				searchdir = NULL;
1389			}
1390		}
1391
1392		/*
1393		 * Time to stop if we found the last component & traversed
1394		 * all mounts.
1395		 */
1396		if ((cnp->cn_flags & ISLASTCN) != 0) {
1397			error = vcache_tryvget(foundobj);
1398			if (error != 0) {
1399				foundobj = NULL;
1400				error = EOPNOTSUPP;
1401			} else {
1402				terminal = (foundobj->v_type != VLNK);
1403			}
1404			break;
1405		}
1406
1407		/*
1408		 * Otherwise, we're still in business.  Set the found VDIR
1409		 * vnode as the search dir for the next component and
1410		 * continue on to it.
1411		 */
1412		cnp->cn_nameptr = ndp->ni_next;
1413		searchdir = foundobj;
1414	}
1415
1416	if (terminal) {
1417		/*
1418		 * If we exited the loop above having successfully located
1419		 * the last component with a zero error code, and it's not a
1420		 * symbolic link, then the parent directory is not needed.
1421		 * Release reference to the starting parent and make the
1422		 * terminal parent disappear into thin air.
1423		 */
1424		KASSERT(plock != NULL);
1425		rw_exit(plock);
1426		vrele(*searchdir_ret);
1427		*searchdir_ret = NULL;
1428	} else if (searchdir != *searchdir_ret) {
1429		/*
1430		 * Otherwise we need to return the parent.  If we ended up
1431		 * with a new search dir, ref it before dropping the
1432		 * namecache's lock.  The lock prevents both searchdir and
1433		 * foundobj from disappearing.  If we can't ref the new
1434		 * searchdir, we have a bit of a problem.  Roll back the
1435		 * fastforward to the beginning and let lookup_once() take
1436		 * care of it.
1437		 */
1438		if (searchdir == NULL) {
1439			/*
1440			 * It's possible for searchdir to be NULL in the
1441			 * case of a root vnode being reclaimed while
1442			 * trying to cross a mount.
1443			 */
1444			error2 = EOPNOTSUPP;
1445		} else {
1446			error2 = vcache_tryvget(searchdir);
1447		}
1448		KASSERT(plock != NULL);
1449		rw_exit(plock);
1450		if (__predict_true(error2 == 0)) {
1451			/* Returning new searchdir, and maybe new foundobj. */
1452			vrele(*searchdir_ret);
1453			*searchdir_ret = searchdir;
1454		} else {
1455			/* Returning nothing. */
1456			if (foundobj != NULL) {
1457				vrele(foundobj);
1458				foundobj = NULL;
1459			}
1460			cnp->cn_nameptr = oldnameptr;
1461			ndp->ni_pathlen = oldpathlen;
1462			error = lookup_parsepath(state, *searchdir_ret);
1463			if (error == 0) {
1464				error = EOPNOTSUPP;
1465			}
1466		}
1467	} else if (plock != NULL) {
1468		/* Drop any namecache lock still held. */
1469		rw_exit(plock);
1470	}
1471
1472	KASSERT(error == 0 ? foundobj != NULL : foundobj == NULL);
1473	*foundobj_ret = foundobj;
1474	return error;
1475}
1476
1477//////////////////////////////
1478
1479/*
1480 * Do a complete path search from a single root directory.
1481 * (This is called up to twice if TRYEMULROOT is in effect.)
1482 */
1483static int
1484namei_oneroot(struct namei_state *state,
1485	 int neverfollow, int inhibitmagic, int isnfsd)
1486{
1487	struct nameidata *ndp = state->ndp;
1488	struct componentname *cnp = state->cnp;
1489	struct vnode *searchdir, *foundobj;
1490	bool searchdir_locked = false;
1491	int error;
1492
1493	error = namei_start(state, isnfsd, &searchdir);
1494	if (error) {
1495		ndp->ni_dvp = NULL;
1496		ndp->ni_vp = NULL;
1497		return error;
1498	}
1499	KASSERT(searchdir->v_type == VDIR);
1500
1501	/*
1502	 * Setup: break out flag bits into variables.
1503	 */
1504	state->docache = (cnp->cn_flags & NOCACHE) ^ NOCACHE;
1505	if (cnp->cn_nameiop == DELETE)
1506		state->docache = 0;
1507	state->rdonly = cnp->cn_flags & RDONLY;
1508
1509	/*
1510	 * Keep going until we run out of path components.
1511	 */
1512	cnp->cn_nameptr = ndp->ni_pnbuf;
1513
1514	/* drop leading slashes (already used them to choose startdir) */
1515	while (cnp->cn_nameptr[0] == '/') {
1516		cnp->cn_nameptr++;
1517		ndp->ni_pathlen--;
1518	}
1519	/* was it just "/"? */
1520	if (cnp->cn_nameptr[0] == '\0') {
1521		foundobj = searchdir;
1522		searchdir = NULL;
1523		cnp->cn_flags |= ISLASTCN;
1524
1525		/* bleh */
1526		goto skiploop;
1527	}
1528
1529	for (;;) {
1530		KASSERT(searchdir != NULL);
1531		KASSERT(!searchdir_locked);
1532
1533		/*
1534		 * Parse out the first path name component that we need to
1535		 * to consider.  While doing this, attempt to use the name
1536		 * cache to fast-forward through as many "easy" to find
1537		 * components of the path as possible.
1538		 */
1539		error = lookup_fastforward(state, &searchdir, &foundobj);
1540
1541		/*
1542		 * If we didn't get a good answer from the namecache, then
1543		 * go directly to the file system.
1544		 */
1545		if (error == EOPNOTSUPP) {
1546			error = lookup_once(state, searchdir, &searchdir,
1547			    &foundobj, &searchdir_locked);
1548		}
1549
1550		/*
1551		 * If the vnode we found is mounted on, then cross the mount
1552		 * and get the root vnode in foundobj.  If this encounters
1553		 * an error, it will dispose of foundobj, but searchdir is
1554		 * untouched.
1555		 */
1556		if (error == 0 && foundobj != NULL &&
1557		    foundobj->v_type == VDIR &&
1558		    foundobj->v_mountedhere != NULL &&
1559		    (cnp->cn_flags & NOCROSSMOUNT) == 0) {
1560		    	error = lookup_crossmount(state, &searchdir,
1561		    	    &foundobj, &searchdir_locked);
1562		}
1563
1564		if (error) {
1565			if (searchdir != NULL) {
1566				if (searchdir_locked) {
1567					searchdir_locked = false;
1568					vput(searchdir);
1569				} else {
1570					vrele(searchdir);
1571				}
1572			}
1573			ndp->ni_dvp = NULL;
1574			ndp->ni_vp = NULL;
1575			/*
1576			 * Note that if we're doing TRYEMULROOT we can
1577			 * retry with the normal root. Where this is
1578			 * currently set matches previous practice,
1579			 * but the previous practice didn't make much
1580			 * sense and somebody should sit down and
1581			 * figure out which cases should cause retry
1582			 * and which shouldn't. XXX.
1583			 */
1584			state->attempt_retry = 1;
1585			return (error);
1586		}
1587
1588		if (foundobj == NULL) {
1589			/*
1590			 * Success with no object returned means we're
1591			 * creating something and it isn't already
1592			 * there. Break out of the main loop now so
1593			 * the code below doesn't have to test for
1594			 * foundobj == NULL.
1595			 */
1596			/* lookup_once can't have dropped the searchdir */
1597			KASSERT(searchdir != NULL ||
1598			    (cnp->cn_flags & ISLASTCN) != 0);
1599			break;
1600		}
1601
1602		/*
1603		 * Check for symbolic link. If we've reached one,
1604		 * follow it, unless we aren't supposed to. Back up
1605		 * over any slashes that we skipped, as we will need
1606		 * them again.
1607		 */
1608		if (namei_atsymlink(state, foundobj)) {
1609			/* Don't need searchdir locked any more. */
1610			if (searchdir_locked) {
1611				searchdir_locked = false;
1612				VOP_UNLOCK(searchdir);
1613			}
1614			ndp->ni_pathlen += state->slashes;
1615			ndp->ni_next -= state->slashes;
1616			if (neverfollow) {
1617				error = EINVAL;
1618			} else if (searchdir == NULL) {
1619				/*
1620				 * dholland 20160410: lookup_once only
1621				 * drops searchdir if it crossed a
1622				 * mount point. Therefore, if we get
1623				 * here it means we crossed a mount
1624				 * point to a mounted filesystem whose
1625				 * root vnode is a symlink. In theory
1626				 * we could continue at this point by
1627				 * using the pre-crossing searchdir
1628				 * (e.g. just take out an extra
1629				 * reference on it before calling
1630				 * lookup_once so we still have it),
1631				 * but this will make an ugly mess and
1632				 * it should never happen in practice
1633				 * as only badly broken filesystems
1634				 * have non-directory root vnodes. (I
1635				 * have seen this sort of thing with
1636				 * NFS occasionally but even then it
1637				 * means something's badly wrong.)
1638				 */
1639				error = ENOTDIR;
1640			} else {
1641				/*
1642				 * dholland 20110410: if we're at a
1643				 * union mount it might make sense to
1644				 * use the top of the union stack here
1645				 * rather than the layer we found the
1646				 * symlink in. (FUTURE)
1647				 */
1648				error = namei_follow(state, inhibitmagic,
1649						     searchdir, foundobj,
1650						     &searchdir);
1651			}
1652			if (error) {
1653				KASSERT(searchdir != foundobj);
1654				if (searchdir != NULL) {
1655					vrele(searchdir);
1656				}
1657				vrele(foundobj);
1658				ndp->ni_dvp = NULL;
1659				ndp->ni_vp = NULL;
1660				return error;
1661			}
1662			vrele(foundobj);
1663			foundobj = NULL;
1664
1665			/*
1666			 * If we followed a symlink to `/' and there
1667			 * are no more components after the symlink,
1668			 * we're done with the loop and what we found
1669			 * is the searchdir.
1670			 */
1671			if (cnp->cn_nameptr[0] == '\0') {
1672				KASSERT(searchdir != NULL);
1673				foundobj = searchdir;
1674				searchdir = NULL;
1675				cnp->cn_flags |= ISLASTCN;
1676				break;
1677			}
1678
1679			continue;
1680		}
1681
1682		/*
1683		 * Not a symbolic link.
1684		 *
1685		 * Check for directory, if the component was
1686		 * followed by a series of slashes.
1687		 */
1688		if ((foundobj->v_type != VDIR) &&
1689		    (cnp->cn_flags & REQUIREDIR)) {
1690			KASSERT(foundobj != searchdir);
1691			if (searchdir) {
1692				if (searchdir_locked) {
1693					searchdir_locked = false;
1694					vput(searchdir);
1695				} else {
1696					vrele(searchdir);
1697				}
1698			} else {
1699				KASSERT(!searchdir_locked);
1700			}
1701			vrele(foundobj);
1702			ndp->ni_dvp = NULL;
1703			ndp->ni_vp = NULL;
1704			state->attempt_retry = 1;
1705			return ENOTDIR;
1706		}
1707
1708		/*
1709		 * Stop if we've reached the last component.
1710		 */
1711		if (cnp->cn_flags & ISLASTCN) {
1712			break;
1713		}
1714
1715		/*
1716		 * Continue with the next component.
1717		 */
1718		cnp->cn_nameptr = ndp->ni_next;
1719		if (searchdir != NULL) {
1720			if (searchdir_locked) {
1721				searchdir_locked = false;
1722				vput(searchdir);
1723			} else {
1724				vrele(searchdir);
1725			}
1726		}
1727		searchdir = foundobj;
1728		foundobj = NULL;
1729	}
1730
1731	KASSERT((cnp->cn_flags & LOCKPARENT) == 0 || searchdir == NULL ||
1732	    VOP_ISLOCKED(searchdir) == LK_EXCLUSIVE);
1733
1734 skiploop:
1735
1736	if (foundobj != NULL) {
1737		if (foundobj == ndp->ni_erootdir) {
1738			/*
1739			 * We are about to return the emulation root.
1740			 * This isn't a good idea because code might
1741			 * repeatedly lookup ".." until the file
1742			 * matches that returned for "/" and loop
1743			 * forever.  So convert it to the real root.
1744			 */
1745			if (searchdir != NULL) {
1746				if (searchdir_locked) {
1747					vput(searchdir);
1748					searchdir_locked = false;
1749				} else {
1750					vrele(searchdir);
1751				}
1752				searchdir = NULL;
1753			}
1754			vrele(foundobj);
1755			foundobj = ndp->ni_rootdir;
1756			vref(foundobj);
1757		}
1758
1759		/*
1760		 * If the caller requested the parent node (i.e. it's
1761		 * a CREATE, DELETE, or RENAME), and we don't have one
1762		 * (because this is the root directory, or we crossed
1763		 * a mount point), then we must fail.
1764		 *
1765		 * 20210604 dholland when NONEXCLHACK is set (open
1766		 * with O_CREAT but not O_EXCL) skip this logic. Since
1767		 * we have a foundobj, open will not be creating, so
1768		 * it doesn't actually need or use the searchdir, so
1769		 * it's ok to return it even if it's on a different
1770		 * volume, and it's also ok to return NULL; by setting
1771		 * NONEXCLHACK the open code promises to cope with
1772		 * those cases correctly. (That is, it should do what
1773		 * it would do anyway, that is, just release the
1774		 * searchdir, except not crash if it's null.) This is
1775		 * needed because otherwise opening mountpoints with
1776		 * O_CREAT but not O_EXCL fails... which is a silly
1777		 * thing to do but ought to work. (This whole issue
1778		 * came to light because 3rd party code wanted to open
1779		 * certain procfs nodes with O_CREAT for some 3rd
1780		 * party reason, and it failed.)
1781		 *
1782		 * Note that NONEXCLHACK is properly a different
1783		 * nameiop (it is partway between LOOKUP and CREATE)
1784		 * but it was stuffed in as a flag instead to make the
1785		 * resulting patch less invasive for pullup. Blah.
1786		 */
1787		if (cnp->cn_nameiop != LOOKUP &&
1788		    (searchdir == NULL ||
1789		     searchdir->v_mount != foundobj->v_mount) &&
1790		    (cnp->cn_flags & NONEXCLHACK) == 0) {
1791			if (searchdir) {
1792				if (searchdir_locked) {
1793					vput(searchdir);
1794					searchdir_locked = false;
1795				} else {
1796					vrele(searchdir);
1797				}
1798				searchdir = NULL;
1799			}
1800			vrele(foundobj);
1801			foundobj = NULL;
1802			ndp->ni_dvp = NULL;
1803			ndp->ni_vp = NULL;
1804			state->attempt_retry = 1;
1805
1806			switch (cnp->cn_nameiop) {
1807			    case CREATE:
1808				return EEXIST;
1809			    case DELETE:
1810			    case RENAME:
1811				return EBUSY;
1812			    default:
1813				break;
1814			}
1815			panic("Invalid nameiop\n");
1816		}
1817
1818		/*
1819		 * Disallow directory write attempts on read-only lookups.
1820		 * Prefers EEXIST over EROFS for the CREATE case.
1821		 */
1822		if (state->rdonly &&
1823		    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
1824			if (searchdir) {
1825				if (searchdir_locked) {
1826					vput(searchdir);
1827					searchdir_locked = false;
1828				} else {
1829					vrele(searchdir);
1830				}
1831				searchdir = NULL;
1832			}
1833			vrele(foundobj);
1834			foundobj = NULL;
1835			ndp->ni_dvp = NULL;
1836			ndp->ni_vp = NULL;
1837			state->attempt_retry = 1;
1838			return EROFS;
1839		}
1840
1841		/* Lock the leaf node if requested. */
1842		if ((cnp->cn_flags & (LOCKLEAF | LOCKPARENT)) == LOCKPARENT &&
1843		    searchdir == foundobj) {
1844			/*
1845			 * Note: if LOCKPARENT but not LOCKLEAF is
1846			 * set, and searchdir == foundobj, this code
1847			 * necessarily unlocks the parent as well as
1848			 * the leaf. That is, just because you specify
1849			 * LOCKPARENT doesn't mean you necessarily get
1850			 * a locked parent vnode. The code in
1851			 * vfs_syscalls.c, and possibly elsewhere,
1852			 * that uses this combination "knows" this, so
1853			 * it can't be safely changed. Feh. XXX
1854			 */
1855			KASSERT(searchdir_locked);
1856		    	VOP_UNLOCK(searchdir);
1857		    	searchdir_locked = false;
1858		} else if ((cnp->cn_flags & LOCKLEAF) != 0 &&
1859		    (searchdir != foundobj ||
1860		    (cnp->cn_flags & LOCKPARENT) == 0)) {
1861			const int lktype = (cnp->cn_flags & LOCKSHARED) != 0 ?
1862			    LK_SHARED : LK_EXCLUSIVE;
1863			vn_lock(foundobj, lktype | LK_RETRY);
1864		}
1865	}
1866
1867	/*
1868	 * Done.
1869	 */
1870
1871	/*
1872	 * If LOCKPARENT is not set, the parent directory isn't returned.
1873	 */
1874	if ((cnp->cn_flags & LOCKPARENT) == 0 && searchdir != NULL) {
1875		vrele(searchdir);
1876		searchdir = NULL;
1877	}
1878
1879	ndp->ni_dvp = searchdir;
1880	ndp->ni_vp = foundobj;
1881	return 0;
1882}
1883
1884/*
1885 * Do namei; wrapper layer that handles TRYEMULROOT.
1886 */
1887static int
1888namei_tryemulroot(struct namei_state *state,
1889	 int neverfollow, int inhibitmagic, int isnfsd)
1890{
1891	int error;
1892
1893	struct nameidata *ndp = state->ndp;
1894	struct componentname *cnp = state->cnp;
1895	const char *savepath = NULL;
1896
1897	KASSERT(cnp == &ndp->ni_cnd);
1898
1899	if (cnp->cn_flags & TRYEMULROOT) {
1900		savepath = pathbuf_stringcopy_get(ndp->ni_pathbuf);
1901	}
1902
1903    emul_retry:
1904	state->attempt_retry = 0;
1905
1906	error = namei_oneroot(state, neverfollow, inhibitmagic, isnfsd);
1907	if (error) {
1908		/*
1909		 * Once namei has started up, the existence of ni_erootdir
1910		 * tells us whether we're working from an emulation root.
1911		 * The TRYEMULROOT flag isn't necessarily authoritative.
1912		 */
1913		if (ndp->ni_erootdir != NULL && state->attempt_retry) {
1914			/* Retry the whole thing using the normal root */
1915			cnp->cn_flags &= ~TRYEMULROOT;
1916			state->attempt_retry = 0;
1917
1918			/* kinda gross */
1919			strcpy(ndp->ni_pathbuf->pb_path, savepath);
1920			pathbuf_stringcopy_put(ndp->ni_pathbuf, savepath);
1921			savepath = NULL;
1922
1923			goto emul_retry;
1924		}
1925	}
1926	if (savepath != NULL) {
1927		pathbuf_stringcopy_put(ndp->ni_pathbuf, savepath);
1928	}
1929	return error;
1930}
1931
1932/*
1933 * External interface.
1934 */
1935int
1936namei(struct nameidata *ndp)
1937{
1938	struct namei_state state;
1939	int error;
1940
1941	namei_init(&state, ndp);
1942	error = namei_tryemulroot(&state,
1943				  0/*!neverfollow*/, 0/*!inhibitmagic*/,
1944				  0/*isnfsd*/);
1945	namei_cleanup(&state);
1946
1947	if (error) {
1948		/* make sure no stray refs leak out */
1949		KASSERT(ndp->ni_dvp == NULL);
1950		KASSERT(ndp->ni_vp == NULL);
1951	}
1952
1953	return error;
1954}
1955
1956////////////////////////////////////////////////////////////
1957
1958/*
1959 * External interface used by nfsd. This is basically different from
1960 * namei only in that it has the ability to pass in the "current
1961 * directory", and uses an extra flag "neverfollow" for which there's
1962 * no physical flag defined in namei.h. (There used to be a cut&paste
1963 * copy of about half of namei in nfsd to allow these minor
1964 * adjustments to exist.)
1965 *
1966 * XXX: the namei interface should be adjusted so nfsd can just use
1967 * ordinary namei().
1968 */
1969int
1970lookup_for_nfsd(struct nameidata *ndp, struct vnode *forcecwd, int neverfollow)
1971{
1972	struct namei_state state;
1973	int error;
1974
1975	KASSERT(ndp->ni_atdir == NULL);
1976	ndp->ni_atdir = forcecwd;
1977
1978	namei_init(&state, ndp);
1979	error = namei_tryemulroot(&state,
1980				  neverfollow, 1/*inhibitmagic*/, 1/*isnfsd*/);
1981	namei_cleanup(&state);
1982
1983	if (error) {
1984		/* make sure no stray refs leak out */
1985		KASSERT(ndp->ni_dvp == NULL);
1986		KASSERT(ndp->ni_vp == NULL);
1987	}
1988
1989	return error;
1990}
1991
1992/*
1993 * A second external interface used by nfsd. This turns out to be a
1994 * single lookup used by the WebNFS code (ha!) to get "index.html" or
1995 * equivalent when asked for a directory. It should eventually evolve
1996 * into some kind of namei_once() call; for the time being it's kind
1997 * of a mess. XXX.
1998 *
1999 * dholland 20110109: I don't think it works, and I don't think it
2000 * worked before I started hacking and slashing either, and I doubt
2001 * anyone will ever notice.
2002 */
2003
2004/*
2005 * Internals. This calls lookup_once() after setting up the assorted
2006 * pieces of state the way they ought to be.
2007 */
2008static int
2009do_lookup_for_nfsd_index(struct namei_state *state)
2010{
2011	int error;
2012
2013	struct componentname *cnp = state->cnp;
2014	struct nameidata *ndp = state->ndp;
2015	struct vnode *startdir;
2016	struct vnode *foundobj;
2017	bool startdir_locked;
2018	const char *cp;			/* pointer into pathname argument */
2019
2020	KASSERT(cnp == &ndp->ni_cnd);
2021
2022	startdir = state->ndp->ni_atdir;
2023
2024	cnp->cn_nameptr = ndp->ni_pnbuf;
2025	state->docache = 1;
2026	state->rdonly = cnp->cn_flags & RDONLY;
2027	ndp->ni_dvp = NULL;
2028
2029	error = VOP_PARSEPATH(startdir, cnp->cn_nameptr, &cnp->cn_namelen);
2030	if (error) {
2031		return error;
2032	}
2033
2034	cp = cnp->cn_nameptr + cnp->cn_namelen;
2035	KASSERT(cnp->cn_namelen <= KERNEL_NAME_MAX);
2036	ndp->ni_pathlen -= cnp->cn_namelen;
2037	ndp->ni_next = cp;
2038	state->slashes = 0;
2039	cnp->cn_flags &= ~REQUIREDIR;
2040	cnp->cn_flags |= MAKEENTRY|ISLASTCN;
2041
2042	if (cnp->cn_namelen == 2 &&
2043	    cnp->cn_nameptr[1] == '.' && cnp->cn_nameptr[0] == '.')
2044		cnp->cn_flags |= ISDOTDOT;
2045	else
2046		cnp->cn_flags &= ~ISDOTDOT;
2047
2048	/*
2049	 * Because lookup_once can change the startdir, we need our
2050	 * own reference to it to avoid consuming the caller's.
2051	 */
2052	vref(startdir);
2053	error = lookup_once(state, startdir, &startdir, &foundobj,
2054	    &startdir_locked);
2055
2056	KASSERT((cnp->cn_flags & LOCKPARENT) == 0);
2057	if (startdir_locked) {
2058		VOP_UNLOCK(startdir);
2059		startdir_locked = false;
2060	}
2061
2062	/*
2063	 * If the vnode we found is mounted on, then cross the mount and get
2064	 * the root vnode in foundobj.  If this encounters an error, it will
2065	 * dispose of foundobj, but searchdir is untouched.
2066	 */
2067	if (error == 0 && foundobj != NULL &&
2068	    foundobj->v_type == VDIR &&
2069	    foundobj->v_mountedhere != NULL &&
2070	    (cnp->cn_flags & NOCROSSMOUNT) == 0) {
2071		error = lookup_crossmount(state, &startdir, &foundobj,
2072		    &startdir_locked);
2073	}
2074
2075	/* Now toss startdir and see if we have an error. */
2076	if (startdir != NULL)
2077		vrele(startdir);
2078	if (error)
2079		foundobj = NULL;
2080	else if (foundobj != NULL && (cnp->cn_flags & LOCKLEAF) != 0)
2081		vn_lock(foundobj, LK_EXCLUSIVE | LK_RETRY);
2082
2083	ndp->ni_vp = foundobj;
2084	return (error);
2085}
2086
2087/*
2088 * External interface. The partitioning between this function and the
2089 * above isn't very clear - the above function exists mostly so code
2090 * that uses "state->" can be shuffled around without having to change
2091 * it to "state.".
2092 */
2093int
2094lookup_for_nfsd_index(struct nameidata *ndp, struct vnode *startdir)
2095{
2096	struct namei_state state;
2097	int error;
2098
2099	KASSERT(ndp->ni_atdir == NULL);
2100	ndp->ni_atdir = startdir;
2101
2102	/*
2103	 * Note: the name sent in here (is not|should not be) allowed
2104	 * to contain a slash.
2105	 */
2106	if (strlen(ndp->ni_pathbuf->pb_path) > KERNEL_NAME_MAX) {
2107		return ENAMETOOLONG;
2108	}
2109	if (strchr(ndp->ni_pathbuf->pb_path, '/')) {
2110		return EINVAL;
2111	}
2112
2113	ndp->ni_pathlen = strlen(ndp->ni_pathbuf->pb_path) + 1;
2114	ndp->ni_pnbuf = NULL;
2115	ndp->ni_cnd.cn_nameptr = NULL;
2116
2117	namei_init(&state, ndp);
2118	error = do_lookup_for_nfsd_index(&state);
2119	namei_cleanup(&state);
2120
2121	return error;
2122}
2123
2124////////////////////////////////////////////////////////////
2125
2126/*
2127 * Reacquire a path name component.
2128 * dvp is locked on entry and exit.
2129 * *vpp is locked on exit unless it's NULL.
2130 */
2131int
2132relookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, int dummy)
2133{
2134	int rdonly;			/* lookup read-only flag bit */
2135	int error = 0;
2136#ifdef DEBUG
2137	size_t newlen;			/* DEBUG: check name len */
2138	const char *cp;			/* DEBUG: check name ptr */
2139#endif /* DEBUG */
2140
2141	(void)dummy;
2142
2143	/*
2144	 * Setup: break out flag bits into variables.
2145	 */
2146	rdonly = cnp->cn_flags & RDONLY;
2147
2148	/*
2149	 * Search a new directory.
2150	 *
2151	 * The cn_hash value is for use by vfs_cache.
2152	 * The last component of the filename is left accessible via
2153	 * cnp->cn_nameptr for callers that need the name. Callers needing
2154	 * the name set the SAVENAME flag. When done, they assume
2155	 * responsibility for freeing the pathname buffer.
2156	 */
2157#ifdef DEBUG
2158#if 0
2159	cp = NULL;
2160	newhash = namei_hash(cnp->cn_nameptr, &cp);
2161	if ((uint32_t)newhash != (uint32_t)cnp->cn_hash)
2162		panic("relookup: bad hash");
2163#endif
2164	error = VOP_PARSEPATH(dvp, cnp->cn_nameptr, &newlen);
2165	if (error) {
2166		panic("relookup: parsepath failed with error %d", error);
2167	}
2168	if (cnp->cn_namelen != newlen)
2169		panic("relookup: bad len");
2170	cp = cnp->cn_nameptr + cnp->cn_namelen;
2171	while (*cp == '/')
2172		cp++;
2173	if (*cp != 0)
2174		panic("relookup: not last component");
2175#endif /* DEBUG */
2176
2177	/*
2178	 * Check for degenerate name (e.g. / or "")
2179	 * which is a way of talking about a directory,
2180	 * e.g. like "/." or ".".
2181	 */
2182	if (cnp->cn_nameptr[0] == '\0')
2183		panic("relookup: null name");
2184
2185	if (cnp->cn_flags & ISDOTDOT)
2186		panic("relookup: lookup on dot-dot");
2187
2188	/*
2189	 * We now have a segment name to search for, and a directory to search.
2190	 */
2191	*vpp = NULL;
2192	error = VOP_LOOKUP(dvp, vpp, cnp);
2193	if ((error) != 0) {
2194		KASSERTMSG((*vpp == NULL),
2195		    "leaf `%s' should be empty but is %p",
2196		    cnp->cn_nameptr, *vpp);
2197		if (error != EJUSTRETURN)
2198			goto bad;
2199	}
2200
2201	/*
2202	 * Check for symbolic link
2203	 */
2204	KASSERTMSG((*vpp == NULL || (*vpp)->v_type != VLNK ||
2205		(cnp->cn_flags & FOLLOW) == 0),
2206	    "relookup: symlink found");
2207
2208	/*
2209	 * Check for read-only lookups.
2210	 */
2211	if (rdonly && cnp->cn_nameiop != LOOKUP) {
2212		error = EROFS;
2213		if (*vpp) {
2214			vrele(*vpp);
2215		}
2216		goto bad;
2217	}
2218	/*
2219	 * Lock result.
2220	 */
2221	if (*vpp && *vpp != dvp) {
2222		error = vn_lock(*vpp, LK_EXCLUSIVE);
2223		if (error != 0) {
2224			vrele(*vpp);
2225			goto bad;
2226		}
2227	}
2228	return (0);
2229
2230bad:
2231	*vpp = NULL;
2232	return (error);
2233}
2234
2235/*
2236 * namei_simple - simple forms of namei.
2237 *
2238 * These are wrappers to allow the simple case callers of namei to be
2239 * left alone while everything else changes under them.
2240 */
2241
2242/* Flags */
2243struct namei_simple_flags_type {
2244	int dummy;
2245};
2246static const struct namei_simple_flags_type ns_nn, ns_nt, ns_fn, ns_ft;
2247const namei_simple_flags_t NSM_NOFOLLOW_NOEMULROOT = &ns_nn;
2248const namei_simple_flags_t NSM_NOFOLLOW_TRYEMULROOT = &ns_nt;
2249const namei_simple_flags_t NSM_FOLLOW_NOEMULROOT = &ns_fn;
2250const namei_simple_flags_t NSM_FOLLOW_TRYEMULROOT = &ns_ft;
2251
2252static
2253int
2254namei_simple_convert_flags(namei_simple_flags_t sflags)
2255{
2256	if (sflags == NSM_NOFOLLOW_NOEMULROOT)
2257		return NOFOLLOW | 0;
2258	if (sflags == NSM_NOFOLLOW_TRYEMULROOT)
2259		return NOFOLLOW | TRYEMULROOT;
2260	if (sflags == NSM_FOLLOW_NOEMULROOT)
2261		return FOLLOW | 0;
2262	if (sflags == NSM_FOLLOW_TRYEMULROOT)
2263		return FOLLOW | TRYEMULROOT;
2264	panic("namei_simple_convert_flags: bogus sflags\n");
2265	return 0;
2266}
2267
2268int
2269namei_simple_kernel(const char *path, namei_simple_flags_t sflags,
2270	struct vnode **vp_ret)
2271{
2272	return nameiat_simple_kernel(NULL, path, sflags, vp_ret);
2273}
2274
2275int
2276nameiat_simple_kernel(struct vnode *dvp, const char *path,
2277	namei_simple_flags_t sflags, struct vnode **vp_ret)
2278{
2279	struct nameidata nd;
2280	struct pathbuf *pb;
2281	int err;
2282
2283	pb = pathbuf_create(path);
2284	if (pb == NULL) {
2285		return ENOMEM;
2286	}
2287
2288	NDINIT(&nd,
2289		LOOKUP,
2290		namei_simple_convert_flags(sflags),
2291		pb);
2292
2293	if (dvp != NULL)
2294		NDAT(&nd, dvp);
2295
2296	err = namei(&nd);
2297	if (err != 0) {
2298		pathbuf_destroy(pb);
2299		return err;
2300	}
2301	*vp_ret = nd.ni_vp;
2302	pathbuf_destroy(pb);
2303	return 0;
2304}
2305
2306int
2307namei_simple_user(const char *path, namei_simple_flags_t sflags,
2308	struct vnode **vp_ret)
2309{
2310	return nameiat_simple_user(NULL, path, sflags, vp_ret);
2311}
2312
2313int
2314nameiat_simple_user(struct vnode *dvp, const char *path,
2315	namei_simple_flags_t sflags, struct vnode **vp_ret)
2316{
2317	struct pathbuf *pb;
2318	struct nameidata nd;
2319	int err;
2320
2321	err = pathbuf_copyin(path, &pb);
2322	if (err) {
2323		return err;
2324	}
2325
2326	NDINIT(&nd,
2327		LOOKUP,
2328		namei_simple_convert_flags(sflags),
2329		pb);
2330
2331	if (dvp != NULL)
2332		NDAT(&nd, dvp);
2333
2334	err = namei(&nd);
2335	if (err != 0) {
2336		pathbuf_destroy(pb);
2337		return err;
2338	}
2339	*vp_ret = nd.ni_vp;
2340	pathbuf_destroy(pb);
2341	return 0;
2342}
2343