vfs_cache.c revision 211616
1139804Simp/*-
222521Sdyson * Copyright (c) 1989, 1993, 1995
31541Srgrimes *	The Regents of the University of California.  All rights reserved.
41541Srgrimes *
522521Sdyson * This code is derived from software contributed to Berkeley by
622521Sdyson * Poul-Henning Kamp of the FreeBSD Project.
722521Sdyson *
81541Srgrimes * Redistribution and use in source and binary forms, with or without
91541Srgrimes * modification, are permitted provided that the following conditions
101541Srgrimes * are met:
111541Srgrimes * 1. Redistributions of source code must retain the above copyright
121541Srgrimes *    notice, this list of conditions and the following disclaimer.
131541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
141541Srgrimes *    notice, this list of conditions and the following disclaimer in the
151541Srgrimes *    documentation and/or other materials provided with the distribution.
161541Srgrimes * 4. Neither the name of the University nor the names of its contributors
171541Srgrimes *    may be used to endorse or promote products derived from this software
181541Srgrimes *    without specific prior written permission.
191541Srgrimes *
201541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
211541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
221541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
231541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
241541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
251541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
261541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
271541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
281541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
291541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
301541Srgrimes * SUCH DAMAGE.
311541Srgrimes *
3223521Sbde *	@(#)vfs_cache.c	8.5 (Berkeley) 3/22/95
331541Srgrimes */
341541Srgrimes
35116182Sobrien#include <sys/cdefs.h>
36116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/vfs_cache.c 211616 2010-08-22 11:18:57Z rpaulo $");
37116182Sobrien
38190829Srwatson#include "opt_kdtrace.h"
39190141Skib#include "opt_ktrace.h"
40190141Skib
411541Srgrimes#include <sys/param.h>
42183155Sjhb#include <sys/filedesc.h>
43183155Sjhb#include <sys/fnv_hash.h>
4412820Sphk#include <sys/kernel.h>
4576166Smarkm#include <sys/lock.h>
46183155Sjhb#include <sys/malloc.h>
47183155Sjhb#include <sys/mount.h>
481541Srgrimes#include <sys/namei.h>
49183155Sjhb#include <sys/proc.h>
50187839Sjhb#include <sys/rwlock.h>
51190829Srwatson#include <sys/sdt.h>
52102870Siedowse#include <sys/syscallsubr.h>
53183155Sjhb#include <sys/sysctl.h>
5451906Sphk#include <sys/sysproto.h>
55183155Sjhb#include <sys/systm.h>
56183155Sjhb#include <sys/vnode.h>
57190141Skib#ifdef KTRACE
58190141Skib#include <sys/ktrace.h>
59190141Skib#endif
601541Srgrimes
61116289Sdes#include <vm/uma.h>
62116289Sdes
63190829SrwatsonSDT_PROVIDER_DECLARE(vfs);
64211616SrpauloSDT_PROBE_DEFINE3(vfs, namecache, enter, done, done, "struct vnode *", "char *",
65190829Srwatson    "struct vnode *");
66211616SrpauloSDT_PROBE_DEFINE2(vfs, namecache, enter_negative, done, done, "struct vnode *",
67190829Srwatson    "char *");
68211616SrpauloSDT_PROBE_DEFINE1(vfs, namecache, fullpath, entry, entry, "struct vnode *");
69211616SrpauloSDT_PROBE_DEFINE3(vfs, namecache, fullpath, hit, hit, "struct vnode *",
70190829Srwatson    "struct char *", "struct vnode *");
71211616SrpauloSDT_PROBE_DEFINE1(vfs, namecache, fullpath, miss, miss, "struct vnode *");
72211616SrpauloSDT_PROBE_DEFINE3(vfs, namecache, fullpath, return, return, "int",
73211616Srpaulo    "struct vnode *", "struct char *");
74211616SrpauloSDT_PROBE_DEFINE3(vfs, namecache, lookup, hit, hit, "struct vnode *", "char *",
75190829Srwatson    "struct vnode *");
76211616SrpauloSDT_PROBE_DEFINE2(vfs, namecache, lookup, hit_negative, hit-negative,
77211616Srpaulo    "struct vnode *", "char *");
78211616SrpauloSDT_PROBE_DEFINE2(vfs, namecache, lookup, miss, miss, "struct vnode *",
79190829Srwatson    "char *");
80211616SrpauloSDT_PROBE_DEFINE1(vfs, namecache, purge, done, done, "struct vnode *");
81211616SrpauloSDT_PROBE_DEFINE1(vfs, namecache, purge_negative, done, done, "struct vnode *");
82211616SrpauloSDT_PROBE_DEFINE1(vfs, namecache, purgevfs, done, done, "struct mount *");
83211616SrpauloSDT_PROBE_DEFINE3(vfs, namecache, zap, done, done, "struct vnode *", "char *",
84190829Srwatson    "struct vnode *");
85211616SrpauloSDT_PROBE_DEFINE2(vfs, namecache, zap_negative, done, done, "struct vnode *",
86190829Srwatson    "char *");
87190829Srwatson
8851906Sphk/*
8959652Sgreen * This structure describes the elements in the cache of recent
9059652Sgreen * names looked up by namei.
9159652Sgreen */
9259652Sgreen
9359652Sgreenstruct	namecache {
9460938Sjake	LIST_ENTRY(namecache) nc_hash;	/* hash chain */
9560938Sjake	LIST_ENTRY(namecache) nc_src;	/* source vnode list */
9660938Sjake	TAILQ_ENTRY(namecache) nc_dst;	/* destination vnode list */
9759652Sgreen	struct	vnode *nc_dvp;		/* vnode of parent of name */
9859652Sgreen	struct	vnode *nc_vp;		/* vnode the name refers to */
9959652Sgreen	u_char	nc_flag;		/* flag bits */
10059652Sgreen	u_char	nc_nlen;		/* length of name */
101190829Srwatson	char	nc_name[0];		/* segment name + nul */
10259652Sgreen};
10359652Sgreen
10459652Sgreen/*
1051541Srgrimes * Name caching works as follows:
1061541Srgrimes *
1071541Srgrimes * Names found by directory scans are retained in a cache
1081541Srgrimes * for future reference.  It is managed LRU, so frequently
1091541Srgrimes * used names will hang around.  Cache is indexed by hash value
1101541Srgrimes * obtained from (vp, name) where vp refers to the directory
1111541Srgrimes * containing name.
1121541Srgrimes *
11322521Sdyson * If it is a "negative" entry, (i.e. for a name that is known NOT to
11422521Sdyson * exist) the vnode pointer will be NULL.
1156968Sphk *
1161541Srgrimes * Upon reaching the last segment of a path, if the reference
1171541Srgrimes * is for DELETE, or NOCACHE is set (rewrite), and the
1181541Srgrimes * name is located in the cache, it will be dropped.
1191541Srgrimes */
1201541Srgrimes
1211541Srgrimes/*
1221541Srgrimes * Structures associated with name cacheing.
1231541Srgrimes */
12474501Speter#define NCHHASH(hash) \
12574501Speter	(&nchashtbl[(hash) & nchash])
12660938Sjakestatic LIST_HEAD(nchashhead, namecache) *nchashtbl;	/* Hash Table */
12760938Sjakestatic TAILQ_HEAD(, namecache) ncneg;	/* Hash Table */
12823521Sbdestatic u_long	nchash;			/* size of hash table */
12962622SjhbSYSCTL_ULONG(_debug, OID_AUTO, nchash, CTLFLAG_RD, &nchash, 0, "");
13025453Sphkstatic u_long	ncnegfactor = 16;	/* ratio of negative entries */
13162622SjhbSYSCTL_ULONG(_debug, OID_AUTO, ncnegfactor, CTLFLAG_RW, &ncnegfactor, 0, "");
13291690Seivindstatic u_long	numneg;			/* number of cache entries allocated */
13362622SjhbSYSCTL_ULONG(_debug, OID_AUTO, numneg, CTLFLAG_RD, &numneg, 0, "");
13423521Sbdestatic u_long	numcache;		/* number of cache entries allocated */
13562622SjhbSYSCTL_ULONG(_debug, OID_AUTO, numcache, CTLFLAG_RD, &numcache, 0, "");
13675654Stanimurastatic u_long	numcachehv;		/* number of cache entries with vnodes held */
13775654StanimuraSYSCTL_ULONG(_debug, OID_AUTO, numcachehv, CTLFLAG_RD, &numcachehv, 0, "");
13884249Sdillon#if 0
13975654Stanimurastatic u_long	numcachepl;		/* number of cache purge for leaf entries */
14075654StanimuraSYSCTL_ULONG(_debug, OID_AUTO, numcachepl, CTLFLAG_RD, &numcachepl, 0, "");
14184249Sdillon#endif
14222521Sdysonstruct	nchstats nchstats;		/* cache effectiveness statistics */
1431541Srgrimes
144187839Sjhbstatic struct rwlock cache_lock;
145187839SjhbRW_SYSINIT(vfscache, &cache_lock, "Name Cache");
146120792Sjeff
147187839Sjhb#define	CACHE_UPGRADE_LOCK()	rw_try_upgrade(&cache_lock)
148187839Sjhb#define	CACHE_RLOCK()		rw_rlock(&cache_lock)
149187839Sjhb#define	CACHE_RUNLOCK()		rw_runlock(&cache_lock)
150187839Sjhb#define	CACHE_WLOCK()		rw_wlock(&cache_lock)
151187839Sjhb#define	CACHE_WUNLOCK()		rw_wunlock(&cache_lock)
152120792Sjeff
153116289Sdes/*
154116289Sdes * UMA zones for the VFS cache.
155116289Sdes *
156116289Sdes * The small cache is used for entries with short names, which are the
157116289Sdes * most common.  The large cache is used for entries which are too big to
158116289Sdes * fit in the small cache.
159116289Sdes */
160116289Sdesstatic uma_zone_t cache_zone_small;
161116289Sdesstatic uma_zone_t cache_zone_large;
162116289Sdes
163190829Srwatson#define	CACHE_PATH_CUTOFF	35
164190829Srwatson#define	CACHE_ZONE_SMALL	(sizeof(struct namecache) + CACHE_PATH_CUTOFF \
165190829Srwatson				    + 1)
166190829Srwatson#define	CACHE_ZONE_LARGE	(sizeof(struct namecache) + NAME_MAX + 1)
167116289Sdes
168116289Sdes#define cache_alloc(len)	uma_zalloc(((len) <= CACHE_PATH_CUTOFF) ? \
169116289Sdes	cache_zone_small : cache_zone_large, M_WAITOK)
170116289Sdes#define cache_free(ncp)		do { \
171116289Sdes	if (ncp != NULL) \
172116289Sdes		uma_zfree(((ncp)->nc_nlen <= CACHE_PATH_CUTOFF) ? \
173116289Sdes		    cache_zone_small : cache_zone_large, (ncp)); \
174116289Sdes} while (0)
175116289Sdes
17623521Sbdestatic int	doingcache = 1;		/* 1 => enable the cache */
17723521SbdeSYSCTL_INT(_debug, OID_AUTO, vfscache, CTLFLAG_RW, &doingcache, 0, "");
17891690Seivind
17991690Seivind/* Export size information to userland */
180157799SjmgSYSCTL_INT(_debug_sizeof, OID_AUTO, namecache, CTLFLAG_RD, 0,
181157799Sjmg	sizeof(struct namecache), "");
18223521Sbde
18329788Sphk/*
18429788Sphk * The new name cache statistics
18529788Sphk */
186141627Sphkstatic SYSCTL_NODE(_vfs, OID_AUTO, cache, CTLFLAG_RW, 0, "Name cache statistics");
18729788Sphk#define STATNODE(mode, name, var) \
18862622Sjhb	SYSCTL_ULONG(_vfs_cache, OID_AUTO, name, mode, var, 0, "");
18929788SphkSTATNODE(CTLFLAG_RD, numneg, &numneg);
19029788SphkSTATNODE(CTLFLAG_RD, numcache, &numcache);
19129788Sphkstatic u_long numcalls; STATNODE(CTLFLAG_RD, numcalls, &numcalls);
19229788Sphkstatic u_long dothits; STATNODE(CTLFLAG_RD, dothits, &dothits);
19329788Sphkstatic u_long dotdothits; STATNODE(CTLFLAG_RD, dotdothits, &dotdothits);
19429788Sphkstatic u_long numchecks; STATNODE(CTLFLAG_RD, numchecks, &numchecks);
19529788Sphkstatic u_long nummiss; STATNODE(CTLFLAG_RD, nummiss, &nummiss);
19629804Sphkstatic u_long nummisszap; STATNODE(CTLFLAG_RD, nummisszap, &nummisszap);
19729788Sphkstatic u_long numposzaps; STATNODE(CTLFLAG_RD, numposzaps, &numposzaps);
19829788Sphkstatic u_long numposhits; STATNODE(CTLFLAG_RD, numposhits, &numposhits);
19929788Sphkstatic u_long numnegzaps; STATNODE(CTLFLAG_RD, numnegzaps, &numnegzaps);
20029788Sphkstatic u_long numneghits; STATNODE(CTLFLAG_RD, numneghits, &numneghits);
201187839Sjhbstatic u_long numupgrades; STATNODE(CTLFLAG_RD, numupgrades, &numupgrades);
20229788Sphk
203187658SjhbSYSCTL_OPAQUE(_vfs_cache, OID_AUTO, nchstats, CTLFLAG_RD | CTLFLAG_MPSAFE,
204187658Sjhb	&nchstats, sizeof(nchstats), "LU", "VFS cache effectiveness statistics");
20529788Sphk
20668922Srwatson
20768922Srwatson
208140712Sjeffstatic void cache_zap(struct namecache *ncp);
209194601Skibstatic int vn_vptocnp_locked(struct vnode **vp, struct ucred *cred, char *buf,
210194601Skib    u_int *buflen);
211144318Sdasstatic int vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir,
212144318Sdas    char *buf, char **retbuf, u_int buflen);
2136968Sphk
21469774Sphkstatic MALLOC_DEFINE(M_VFSCACHE, "vfscache", "VFS name cache entries");
21551906Sphk
21622521Sdyson/*
21725453Sphk * Flags in namecache.nc_flag
21825453Sphk */
219190533Skan#define NCF_WHITE	0x01
220190533Skan#define NCF_ISDOTDOT	0x02
22175402Speter
222189593Sjhb#ifdef DIAGNOSTIC
22325453Sphk/*
22475402Speter * Grab an atomic snapshot of the name cache hash chain lengths
22575402Speter */
22675402SpeterSYSCTL_NODE(_debug, OID_AUTO, hashstat, CTLFLAG_RW, NULL, "hash table stats");
22775402Speter
22875402Speterstatic int
22975402Spetersysctl_debug_hashstat_rawnchash(SYSCTL_HANDLER_ARGS)
23075402Speter{
23175402Speter	int error;
23275402Speter	struct nchashhead *ncpp;
23375402Speter	struct namecache *ncp;
23475402Speter	int n_nchash;
23575402Speter	int count;
23675402Speter
23775402Speter	n_nchash = nchash + 1;	/* nchash is max index, not count */
23875402Speter	if (!req->oldptr)
23975402Speter		return SYSCTL_OUT(req, 0, n_nchash * sizeof(int));
24075402Speter
24175402Speter	/* Scan hash tables for applicable entries */
24275402Speter	for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) {
243187839Sjhb		CACHE_RLOCK();
24475402Speter		count = 0;
24575402Speter		LIST_FOREACH(ncp, ncpp, nc_hash) {
24675402Speter			count++;
24775402Speter		}
248187839Sjhb		CACHE_RUNLOCK();
24998994Salfred		error = SYSCTL_OUT(req, &count, sizeof(count));
25075402Speter		if (error)
25175402Speter			return (error);
25275402Speter	}
25375402Speter	return (0);
25475402Speter}
255187658SjhbSYSCTL_PROC(_debug_hashstat, OID_AUTO, rawnchash, CTLTYPE_INT|CTLFLAG_RD|
256187658Sjhb	CTLFLAG_MPSAFE, 0, 0, sysctl_debug_hashstat_rawnchash, "S,int",
257187658Sjhb	"nchash chain lengths");
25875402Speter
25975402Speterstatic int
26075402Spetersysctl_debug_hashstat_nchash(SYSCTL_HANDLER_ARGS)
26175402Speter{
26275402Speter	int error;
26375402Speter	struct nchashhead *ncpp;
26475402Speter	struct namecache *ncp;
26575402Speter	int n_nchash;
26675402Speter	int count, maxlength, used, pct;
26775402Speter
26875402Speter	if (!req->oldptr)
26975402Speter		return SYSCTL_OUT(req, 0, 4 * sizeof(int));
27075402Speter
27175402Speter	n_nchash = nchash + 1;	/* nchash is max index, not count */
27275402Speter	used = 0;
27375402Speter	maxlength = 0;
27475402Speter
27575402Speter	/* Scan hash tables for applicable entries */
27675402Speter	for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) {
27775402Speter		count = 0;
278187839Sjhb		CACHE_RLOCK();
27975402Speter		LIST_FOREACH(ncp, ncpp, nc_hash) {
28075402Speter			count++;
28175402Speter		}
282187839Sjhb		CACHE_RUNLOCK();
28375402Speter		if (count)
28475402Speter			used++;
28575402Speter		if (maxlength < count)
28675402Speter			maxlength = count;
28775402Speter	}
28875402Speter	n_nchash = nchash + 1;
28975402Speter	pct = (used * 100 * 100) / n_nchash;
29098994Salfred	error = SYSCTL_OUT(req, &n_nchash, sizeof(n_nchash));
29175402Speter	if (error)
29275402Speter		return (error);
29398994Salfred	error = SYSCTL_OUT(req, &used, sizeof(used));
29475402Speter	if (error)
29575402Speter		return (error);
29698994Salfred	error = SYSCTL_OUT(req, &maxlength, sizeof(maxlength));
29775402Speter	if (error)
29875402Speter		return (error);
29998994Salfred	error = SYSCTL_OUT(req, &pct, sizeof(pct));
30075402Speter	if (error)
30175402Speter		return (error);
30275402Speter	return (0);
30375402Speter}
304187658SjhbSYSCTL_PROC(_debug_hashstat, OID_AUTO, nchash, CTLTYPE_INT|CTLFLAG_RD|
305187658Sjhb	CTLFLAG_MPSAFE, 0, 0, sysctl_debug_hashstat_nchash, "I",
306187658Sjhb	"nchash chain lengths");
307189593Sjhb#endif
30875402Speter
30975402Speter/*
310110952Sarr * cache_zap():
311110952Sarr *
312110952Sarr *   Removes a namecache entry from cache, whether it contains an actual
313110952Sarr *   pointer to a vnode or if it is just a negative cache entry.
31422521Sdyson */
31525453Sphkstatic void
316140712Sjeffcache_zap(ncp)
31725453Sphk	struct namecache *ncp;
31825453Sphk{
319120792Sjeff	struct vnode *vp;
320120792Sjeff
321187839Sjhb	rw_assert(&cache_lock, RA_WLOCKED);
322147326Sjeff	CTR2(KTR_VFS, "cache_zap(%p) vp %p", ncp, ncp->nc_vp);
323190829Srwatson#ifdef KDTRACE_HOOKS
324190829Srwatson	if (ncp->nc_vp != NULL) {
325190829Srwatson		SDT_PROBE(vfs, namecache, zap, done, ncp->nc_dvp,
326190829Srwatson		    ncp->nc_name, ncp->nc_vp, 0, 0);
327190829Srwatson	} else {
328190829Srwatson		SDT_PROBE(vfs, namecache, zap_negative, done, ncp->nc_dvp,
329190829Srwatson		    ncp->nc_name, 0, 0, 0);
330190829Srwatson	}
331190829Srwatson#endif
332120792Sjeff	vp = NULL;
33325453Sphk	LIST_REMOVE(ncp, nc_hash);
334190533Skan	if (ncp->nc_flag & NCF_ISDOTDOT) {
335190533Skan		if (ncp == ncp->nc_dvp->v_cache_dd)
336190533Skan			ncp->nc_dvp->v_cache_dd = NULL;
337190533Skan	} else {
338190533Skan		LIST_REMOVE(ncp, nc_src);
339190533Skan		if (LIST_EMPTY(&ncp->nc_dvp->v_cache_src)) {
340190533Skan			vp = ncp->nc_dvp;
341190533Skan			numcachehv--;
342190533Skan		}
34375654Stanimura	}
34425453Sphk	if (ncp->nc_vp) {
34525453Sphk		TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_dst);
346190533Skan		if (ncp == ncp->nc_vp->v_cache_dd)
347190533Skan			ncp->nc_vp->v_cache_dd = NULL;
34825453Sphk	} else {
34925453Sphk		TAILQ_REMOVE(&ncneg, ncp, nc_dst);
35025453Sphk		numneg--;
35125453Sphk	}
35225453Sphk	numcache--;
353116289Sdes	cache_free(ncp);
354120792Sjeff	if (vp)
355120792Sjeff		vdrop(vp);
35622521Sdyson}
3576968Sphk
35822521Sdyson/*
35923521Sbde * Lookup an entry in the cache
3606968Sphk *
3616968Sphk * Lookup is called with dvp pointing to the directory to search,
36222521Sdyson * cnp pointing to the name of the entry being sought. If the lookup
36322521Sdyson * succeeds, the vnode is returned in *vpp, and a status of -1 is
36422521Sdyson * returned. If the lookup determines that the name does not exist
36522521Sdyson * (negative cacheing), a status of ENOENT is returned. If the lookup
366183330Sjhb * fails, a status of zero is returned.  If the directory vnode is
367183330Sjhb * recycled out from under us due to a forced unmount, a status of
368190387Sjhb * ENOENT is returned.
369144296Sjeff *
370144296Sjeff * vpp is locked and ref'd on return.  If we're looking up DOTDOT, dvp is
371144296Sjeff * unlocked.  If we're looking up . an extra ref is taken, but the lock is
372144296Sjeff * not recursively acquired.
3731541Srgrimes */
3746968Sphk
3751541Srgrimesint
3761541Srgrimescache_lookup(dvp, vpp, cnp)
3771541Srgrimes	struct vnode *dvp;
3781541Srgrimes	struct vnode **vpp;
3791541Srgrimes	struct componentname *cnp;
3801541Srgrimes{
38151906Sphk	struct namecache *ncp;
382209390Sed	uint32_t hash;
383187839Sjhb	int error, ltype, wlocked;
3841541Srgrimes
3856928Sphk	if (!doingcache) {
3866928Sphk		cnp->cn_flags &= ~MAKEENTRY;
3871541Srgrimes		return (0);
3886928Sphk	}
389144296Sjeffretry:
390187839Sjhb	CACHE_RLOCK();
391187839Sjhb	wlocked = 0;
39229788Sphk	numcalls++;
393187839Sjhb	error = 0;
39429788Sphk
395187839Sjhbretry_wlocked:
39625453Sphk	if (cnp->cn_nameptr[0] == '.') {
39725453Sphk		if (cnp->cn_namelen == 1) {
39825453Sphk			*vpp = dvp;
399147326Sjeff			CTR2(KTR_VFS, "cache_lookup(%p, %s) found via .",
400147326Sjeff			    dvp, cnp->cn_nameptr);
40129788Sphk			dothits++;
402190829Srwatson			SDT_PROBE(vfs, namecache, lookup, hit, dvp, ".",
403190829Srwatson			    *vpp, 0, 0);
404144296Sjeff			goto success;
40525453Sphk		}
40625453Sphk		if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') {
40729788Sphk			dotdothits++;
408190829Srwatson			if (dvp->v_cache_dd == NULL) {
409190829Srwatson				SDT_PROBE(vfs, namecache, lookup, miss, dvp,
410190829Srwatson				    "..", NULL, 0, 0);
411187839Sjhb				goto unlock;
412190829Srwatson			}
413190533Skan			if ((cnp->cn_flags & MAKEENTRY) == 0) {
414190942Skib				if (!wlocked && !CACHE_UPGRADE_LOCK())
415190942Skib					goto wlock;
416190533Skan				if (dvp->v_cache_dd->nc_flag & NCF_ISDOTDOT)
417190533Skan					cache_zap(dvp->v_cache_dd);
418190533Skan				dvp->v_cache_dd = NULL;
419196203Skib				CACHE_WUNLOCK();
420196203Skib				return (0);
42125453Sphk			}
422190533Skan			if (dvp->v_cache_dd->nc_flag & NCF_ISDOTDOT)
423190533Skan				*vpp = dvp->v_cache_dd->nc_vp;
424190533Skan			else
425190533Skan				*vpp = dvp->v_cache_dd->nc_dvp;
426191081Skan			/* Return failure if negative entry was found. */
427191081Skan			if (*vpp == NULL) {
428191082Skan				ncp = dvp->v_cache_dd;
429191082Skan				goto negative_success;
430191081Skan			}
431147326Sjeff			CTR3(KTR_VFS, "cache_lookup(%p, %s) found %p via ..",
432147326Sjeff			    dvp, cnp->cn_nameptr, *vpp);
433190829Srwatson			SDT_PROBE(vfs, namecache, lookup, hit, dvp, "..",
434190829Srwatson			    *vpp, 0, 0);
435144296Sjeff			goto success;
43625453Sphk		}
4371541Srgrimes	}
4386968Sphk
43974501Speter	hash = fnv_32_buf(cnp->cn_nameptr, cnp->cn_namelen, FNV1_32_INIT);
440144319Sdas	hash = fnv_32_buf(&dvp, sizeof(dvp), hash);
44174501Speter	LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) {
44229788Sphk		numchecks++;
44325453Sphk		if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen &&
44431879Sbde		    !bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen))
44522521Sdyson			break;
4461541Srgrimes	}
4476968Sphk
44822521Sdyson	/* We failed to find an entry */
449187839Sjhb	if (ncp == NULL) {
450190829Srwatson		SDT_PROBE(vfs, namecache, lookup, miss, dvp, cnp->cn_nameptr,
451190829Srwatson		    NULL, 0, 0);
45229804Sphk		if ((cnp->cn_flags & MAKEENTRY) == 0) {
45329804Sphk			nummisszap++;
45429804Sphk		} else {
45529804Sphk			nummiss++;
45629804Sphk		}
45722521Sdyson		nchstats.ncs_miss++;
458187839Sjhb		goto unlock;
45922521Sdyson	}
46022521Sdyson
4616968Sphk	/* We don't want to have an entry, so dump it */
4626928Sphk	if ((cnp->cn_flags & MAKEENTRY) == 0) {
46329788Sphk		numposzaps++;
4641541Srgrimes		nchstats.ncs_badhits++;
465187839Sjhb		if (!wlocked && !CACHE_UPGRADE_LOCK())
466187839Sjhb			goto wlock;
467140712Sjeff		cache_zap(ncp);
468187839Sjhb		CACHE_WUNLOCK();
4696968Sphk		return (0);
47023521Sbde	}
4716968Sphk
4726968Sphk	/* We found a "positive" match, return the vnode */
473116201Sdes	if (ncp->nc_vp) {
47429788Sphk		numposhits++;
4751541Srgrimes		nchstats.ncs_goodhits++;
4761541Srgrimes		*vpp = ncp->nc_vp;
477147326Sjeff		CTR4(KTR_VFS, "cache_lookup(%p, %s) found %p via ncp %p",
478147326Sjeff		    dvp, cnp->cn_nameptr, *vpp, ncp);
479190829Srwatson		SDT_PROBE(vfs, namecache, lookup, hit, dvp, ncp->nc_name,
480190829Srwatson		    *vpp, 0, 0);
481144296Sjeff		goto success;
4821541Srgrimes	}
4831541Srgrimes
484191082Skannegative_success:
4856968Sphk	/* We found a negative match, and want to create it, so purge */
4866968Sphk	if (cnp->cn_nameiop == CREATE) {
48729788Sphk		numnegzaps++;
4887013Sphk		nchstats.ncs_badhits++;
489187839Sjhb		if (!wlocked && !CACHE_UPGRADE_LOCK())
490187839Sjhb			goto wlock;
491140712Sjeff		cache_zap(ncp);
492187839Sjhb		CACHE_WUNLOCK();
4936968Sphk		return (0);
4946968Sphk	}
4956968Sphk
496187839Sjhb	if (!wlocked && !CACHE_UPGRADE_LOCK())
497187839Sjhb		goto wlock;
49829788Sphk	numneghits++;
49922521Sdyson	/*
500110967Sarr	 * We found a "negative" match, so we shift it to the end of
501110967Sarr	 * the "negative" cache entries queue to satisfy LRU.  Also,
502110967Sarr	 * check to see if the entry is a whiteout; indicate this to
503110967Sarr	 * the componentname, if so.
50422521Sdyson	 */
50525453Sphk	TAILQ_REMOVE(&ncneg, ncp, nc_dst);
50625453Sphk	TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst);
5076968Sphk	nchstats.ncs_neghits++;
50825453Sphk	if (ncp->nc_flag & NCF_WHITE)
50925453Sphk		cnp->cn_flags |= ISWHITEOUT;
510190829Srwatson	SDT_PROBE(vfs, namecache, lookup, hit_negative, dvp, ncp->nc_name,
511190829Srwatson	    0, 0, 0);
512187839Sjhb	CACHE_WUNLOCK();
5136968Sphk	return (ENOENT);
514144296Sjeff
515187839Sjhbwlock:
516187839Sjhb	/*
517187839Sjhb	 * We need to update the cache after our lookup, so upgrade to
518187839Sjhb	 * a write lock and retry the operation.
519187839Sjhb	 */
520187839Sjhb	CACHE_RUNLOCK();
521187839Sjhb	CACHE_WLOCK();
522187839Sjhb	numupgrades++;
523187839Sjhb	wlocked = 1;
524187839Sjhb	goto retry_wlocked;
525187839Sjhb
526144296Sjeffsuccess:
527144296Sjeff	/*
528144296Sjeff	 * On success we return a locked and ref'd vnode as per the lookup
529144296Sjeff	 * protocol.
530144296Sjeff	 */
531144296Sjeff	if (dvp == *vpp) {   /* lookup on "." */
532144296Sjeff		VREF(*vpp);
533187839Sjhb		if (wlocked)
534187839Sjhb			CACHE_WUNLOCK();
535187839Sjhb		else
536187839Sjhb			CACHE_RUNLOCK();
537172274Spjd		/*
538172274Spjd		 * When we lookup "." we still can be asked to lock it
539172274Spjd		 * differently...
540172274Spjd		 */
541178046Spjd		ltype = cnp->cn_lkflags & LK_TYPE_MASK;
542183330Sjhb		if (ltype != VOP_ISLOCKED(*vpp)) {
543183330Sjhb			if (ltype == LK_EXCLUSIVE) {
544183330Sjhb				vn_lock(*vpp, LK_UPGRADE | LK_RETRY);
545183330Sjhb				if ((*vpp)->v_iflag & VI_DOOMED) {
546183330Sjhb					/* forced unmount */
547183330Sjhb					vrele(*vpp);
548183330Sjhb					*vpp = NULL;
549190387Sjhb					return (ENOENT);
550183330Sjhb				}
551183330Sjhb			} else
552183330Sjhb				vn_lock(*vpp, LK_DOWNGRADE | LK_RETRY);
553183330Sjhb		}
554144296Sjeff		return (-1);
555144296Sjeff	}
556170000Spjd	ltype = 0;	/* silence gcc warning */
557170000Spjd	if (cnp->cn_flags & ISDOTDOT) {
558176559Sattilio		ltype = VOP_ISLOCKED(dvp);
559175294Sattilio		VOP_UNLOCK(dvp, 0);
560170000Spjd	}
561144296Sjeff	VI_LOCK(*vpp);
562187839Sjhb	if (wlocked)
563187839Sjhb		CACHE_WUNLOCK();
564187839Sjhb	else
565187839Sjhb		CACHE_RUNLOCK();
566176559Sattilio	error = vget(*vpp, cnp->cn_lkflags | LK_INTERLOCK, cnp->cn_thread);
567190887Skib	if (cnp->cn_flags & ISDOTDOT) {
568175202Sattilio		vn_lock(dvp, ltype | LK_RETRY);
569190887Skib		if (dvp->v_iflag & VI_DOOMED) {
570190887Skib			if (error == 0)
571190887Skib				vput(*vpp);
572190887Skib			*vpp = NULL;
573190887Skib			return (ENOENT);
574190887Skib		}
575190887Skib	}
576145006Sjeff	if (error) {
577144296Sjeff		*vpp = NULL;
578144296Sjeff		goto retry;
579144296Sjeff	}
580178046Spjd	if ((cnp->cn_flags & ISLASTCN) &&
581178046Spjd	    (cnp->cn_lkflags & LK_TYPE_MASK) == LK_EXCLUSIVE) {
582178046Spjd		ASSERT_VOP_ELOCKED(*vpp, "cache_lookup");
583178046Spjd	}
584144296Sjeff	return (-1);
585187839Sjhb
586187839Sjhbunlock:
587187839Sjhb	if (wlocked)
588187839Sjhb		CACHE_WUNLOCK();
589187839Sjhb	else
590187839Sjhb		CACHE_RUNLOCK();
591187839Sjhb	return (0);
5921541Srgrimes}
5931541Srgrimes
5941541Srgrimes/*
5956968Sphk * Add an entry to the cache.
5961541Srgrimes */
5971549Srgrimesvoid
5981541Srgrimescache_enter(dvp, vp, cnp)
5991541Srgrimes	struct vnode *dvp;
6001541Srgrimes	struct vnode *vp;
6011541Srgrimes	struct componentname *cnp;
6021541Srgrimes{
603185557Skib	struct namecache *ncp, *n2;
60451906Sphk	struct nchashhead *ncpp;
605209390Sed	uint32_t hash;
606190533Skan	int flag;
607120792Sjeff	int hold;
608120792Sjeff	int zap;
60951906Sphk	int len;
6101541Srgrimes
611147326Sjeff	CTR3(KTR_VFS, "cache_enter(%p, %p, %s)", dvp, vp, cnp->cn_nameptr);
612147296Sjeff	VNASSERT(vp == NULL || (vp->v_iflag & VI_DOOMED) == 0, vp,
613206671Skib	    ("cache_enter: Adding a doomed vnode"));
614206894Skib	VNASSERT(dvp == NULL || (dvp->v_iflag & VI_DOOMED) == 0, dvp,
615206894Skib	    ("cache_enter: Doomed vnode used as src"));
616147296Sjeff
6171541Srgrimes	if (!doingcache)
6181541Srgrimes		return;
6196968Sphk
620187460Smckay	/*
621187460Smckay	 * Avoid blowout in namecache entries.
622187460Smckay	 */
623187460Smckay	if (numcache >= desiredvnodes * 2)
624187460Smckay		return;
625187460Smckay
626190533Skan	flag = 0;
62725453Sphk	if (cnp->cn_nameptr[0] == '.') {
628190533Skan		if (cnp->cn_namelen == 1)
62925453Sphk			return;
63025453Sphk		if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') {
631187839Sjhb			CACHE_WLOCK();
632190533Skan			/*
633190533Skan			 * If dotdot entry already exists, just retarget it
634190533Skan			 * to new parent vnode, otherwise continue with new
635190533Skan			 * namecache entry allocation.
636190533Skan			 */
637191218Skan			if ((ncp = dvp->v_cache_dd) != NULL &&
638191218Skan			    ncp->nc_flag & NCF_ISDOTDOT) {
639191218Skan				KASSERT(ncp->nc_dvp == dvp,
640191218Skan				    ("wrong isdotdot parent"));
641191218Skan				if (ncp->nc_vp != NULL)
642190533Skan					TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst,
643190533Skan					    ncp, nc_dst);
644191218Skan				else
645191218Skan					TAILQ_REMOVE(&ncneg, ncp, nc_dst);
646191218Skan				if (vp != NULL)
647190533Skan					TAILQ_INSERT_HEAD(&vp->v_cache_dst,
648190533Skan					    ncp, nc_dst);
649191218Skan				else
650191218Skan					TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst);
651191218Skan				ncp->nc_vp = vp;
652191218Skan				CACHE_WUNLOCK();
653191218Skan				return;
654190533Skan			}
655190533Skan			dvp->v_cache_dd = NULL;
656190829Srwatson			SDT_PROBE(vfs, namecache, enter, done, dvp, "..", vp,
657190829Srwatson			    0, 0);
658187839Sjhb			CACHE_WUNLOCK();
659190533Skan			flag = NCF_ISDOTDOT;
66025453Sphk		}
6616968Sphk	}
662116201Sdes
663120792Sjeff	hold = 0;
664120792Sjeff	zap = 0;
665182061Sjhb
666182061Sjhb	/*
667182061Sjhb	 * Calculate the hash key and setup as much of the new
668182061Sjhb	 * namecache entry as possible before acquiring the lock.
669182061Sjhb	 */
670116289Sdes	ncp = cache_alloc(cnp->cn_namelen);
671182061Sjhb	ncp->nc_vp = vp;
672182061Sjhb	ncp->nc_dvp = dvp;
673190533Skan	ncp->nc_flag = flag;
674182061Sjhb	len = ncp->nc_nlen = cnp->cn_namelen;
675182061Sjhb	hash = fnv_32_buf(cnp->cn_nameptr, len, FNV1_32_INIT);
676190829Srwatson	strlcpy(ncp->nc_name, cnp->cn_nameptr, len + 1);
677182061Sjhb	hash = fnv_32_buf(&dvp, sizeof(dvp), hash);
678187839Sjhb	CACHE_WLOCK();
679182061Sjhb
680182061Sjhb	/*
681186600Skib	 * See if this vnode or negative entry is already in the cache
682186600Skib	 * with this name.  This can happen with concurrent lookups of
683186600Skib	 * the same path name.
684182061Sjhb	 */
685186600Skib	ncpp = NCHHASH(hash);
686186600Skib	LIST_FOREACH(n2, ncpp, nc_hash) {
687186600Skib		if (n2->nc_dvp == dvp &&
688186600Skib		    n2->nc_nlen == cnp->cn_namelen &&
689186600Skib		    !bcmp(n2->nc_name, cnp->cn_nameptr, n2->nc_nlen)) {
690187839Sjhb			CACHE_WUNLOCK();
691186600Skib			cache_free(ncp);
692186600Skib			return;
693182061Sjhb		}
694185557Skib	}
695182061Sjhb
696190945Skan	if (flag == NCF_ISDOTDOT) {
697190945Skan		/*
698190945Skan		 * See if we are trying to add .. entry, but some other lookup
699190945Skan		 * has populated v_cache_dd pointer already.
700190945Skan		 */
701190945Skan		if (dvp->v_cache_dd != NULL) {
702190945Skan		    CACHE_WUNLOCK();
703190945Skan		    cache_free(ncp);
704190945Skan		    return;
705190945Skan		}
706190945Skan		KASSERT(vp == NULL || vp->v_type == VDIR,
707190945Skan		    ("wrong vnode type %p", vp));
708190945Skan		dvp->v_cache_dd = ncp;
709190533Skan	}
710190533Skan
71125453Sphk	numcache++;
71228954Sphk	if (!vp) {
71325453Sphk		numneg++;
714190533Skan		if (cnp->cn_flags & ISWHITEOUT)
715190533Skan			ncp->nc_flag |= NCF_WHITE;
71629071Sphk	} else if (vp->v_type == VDIR) {
717190945Skan		if (flag != NCF_ISDOTDOT) {
718190533Skan			if ((n2 = vp->v_cache_dd) != NULL &&
719190533Skan			    (n2->nc_flag & NCF_ISDOTDOT) != 0)
720190533Skan				cache_zap(n2);
721190533Skan			vp->v_cache_dd = ncp;
722190533Skan		}
723144319Sdas	} else {
724190533Skan		vp->v_cache_dd = NULL;
72528954Sphk	}
72623521Sbde
72722521Sdyson	/*
728182061Sjhb	 * Insert the new namecache entry into the appropriate chain
729182061Sjhb	 * within the cache entries table.
73022521Sdyson	 */
7316928Sphk	LIST_INSERT_HEAD(ncpp, ncp, nc_hash);
732190533Skan	if (flag != NCF_ISDOTDOT) {
733190533Skan		if (LIST_EMPTY(&dvp->v_cache_src)) {
734190533Skan			hold = 1;
735190533Skan			numcachehv++;
736190533Skan		}
737190533Skan		LIST_INSERT_HEAD(&dvp->v_cache_src, ncp, nc_src);
73875654Stanimura	}
739190533Skan
740110967Sarr	/*
741110967Sarr	 * If the entry is "negative", we place it into the
742110967Sarr	 * "negative" cache queue, otherwise, we place it into the
743110967Sarr	 * destination vnode's cache entries queue.
744110967Sarr	 */
74525453Sphk	if (vp) {
74625453Sphk		TAILQ_INSERT_HEAD(&vp->v_cache_dst, ncp, nc_dst);
747190829Srwatson		SDT_PROBE(vfs, namecache, enter, done, dvp, ncp->nc_name, vp,
748190829Srwatson		    0, 0);
74925453Sphk	} else {
75025453Sphk		TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst);
751190829Srwatson		SDT_PROBE(vfs, namecache, enter_negative, done, dvp,
752190829Srwatson		    ncp->nc_name, 0, 0, 0);
75325453Sphk	}
75451906Sphk	if (numneg * ncnegfactor > numcache) {
75525453Sphk		ncp = TAILQ_FIRST(&ncneg);
756120792Sjeff		zap = 1;
75725453Sphk	}
758120792Sjeff	if (hold)
759120792Sjeff		vhold(dvp);
760120792Sjeff	if (zap)
761140712Sjeff		cache_zap(ncp);
762187839Sjhb	CACHE_WUNLOCK();
7631541Srgrimes}
7641541Srgrimes
7651541Srgrimes/*
7661541Srgrimes * Name cache initialization, from vfs_init() when we are booting
7671541Srgrimes */
76869664Speterstatic void
76969664Speternchinit(void *dummy __unused)
7701541Srgrimes{
77123521Sbde
77225453Sphk	TAILQ_INIT(&ncneg);
773116289Sdes
774116289Sdes	cache_zone_small = uma_zcreate("S VFS Cache", CACHE_ZONE_SMALL, NULL,
775116289Sdes	    NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT);
776116289Sdes	cache_zone_large = uma_zcreate("L VFS Cache", CACHE_ZONE_LARGE, NULL,
777116289Sdes	    NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT);
778116289Sdes
77969664Speter	nchashtbl = hashinit(desiredvnodes * 2, M_VFSCACHE, &nchash);
7801541Srgrimes}
781177253SrwatsonSYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nchinit, NULL);
7821541Srgrimes
78369664Speter
7841541Srgrimes/*
78546011Sphk * Invalidate all entries to a particular vnode.
7861541Srgrimes */
7871549Srgrimesvoid
7881541Srgrimescache_purge(vp)
7891541Srgrimes	struct vnode *vp;
7901541Srgrimes{
7911541Srgrimes
792147326Sjeff	CTR1(KTR_VFS, "cache_purge(%p)", vp);
793190829Srwatson	SDT_PROBE(vfs, namecache, purge, done, vp, 0, 0, 0, 0);
794187839Sjhb	CACHE_WLOCK();
795147331Sjeff	while (!LIST_EMPTY(&vp->v_cache_src))
796147331Sjeff		cache_zap(LIST_FIRST(&vp->v_cache_src));
797116201Sdes	while (!TAILQ_EMPTY(&vp->v_cache_dst))
798140712Sjeff		cache_zap(TAILQ_FIRST(&vp->v_cache_dst));
799190533Skan	if (vp->v_cache_dd != NULL) {
800190533Skan		KASSERT(vp->v_cache_dd->nc_flag & NCF_ISDOTDOT,
801190533Skan		   ("lost dotdot link"));
802190533Skan		cache_zap(vp->v_cache_dd);
803190533Skan	}
804190533Skan	KASSERT(vp->v_cache_dd == NULL, ("incomplete purge"));
805187839Sjhb	CACHE_WUNLOCK();
8061541Srgrimes}
8071541Srgrimes
8081541Srgrimes/*
809188833Sjhb * Invalidate all negative entries for a particular directory vnode.
810188833Sjhb */
811188833Sjhbvoid
812188833Sjhbcache_purge_negative(vp)
813188833Sjhb	struct vnode *vp;
814188833Sjhb{
815188833Sjhb	struct namecache *cp, *ncp;
816188833Sjhb
817188833Sjhb	CTR1(KTR_VFS, "cache_purge_negative(%p)", vp);
818190829Srwatson	SDT_PROBE(vfs, namecache, purge_negative, done, vp, 0, 0, 0, 0);
819188833Sjhb	CACHE_WLOCK();
820188833Sjhb	LIST_FOREACH_SAFE(cp, &vp->v_cache_src, nc_src, ncp) {
821188833Sjhb		if (cp->nc_vp == NULL)
822188833Sjhb			cache_zap(cp);
823188833Sjhb	}
824188833Sjhb	CACHE_WUNLOCK();
825188833Sjhb}
826188833Sjhb
827188833Sjhb/*
8286968Sphk * Flush all entries referencing a particular filesystem.
8291541Srgrimes */
8301549Srgrimesvoid
8311541Srgrimescache_purgevfs(mp)
8321541Srgrimes	struct mount *mp;
8331541Srgrimes{
8346968Sphk	struct nchashhead *ncpp;
83522521Sdyson	struct namecache *ncp, *nnp;
8361541Srgrimes
8376968Sphk	/* Scan hash tables for applicable entries */
838190829Srwatson	SDT_PROBE(vfs, namecache, purgevfs, done, mp, 0, 0, 0, 0);
839187839Sjhb	CACHE_WLOCK();
84029071Sphk	for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) {
841169999Spjd		LIST_FOREACH_SAFE(ncp, ncpp, nc_hash, nnp) {
842169999Spjd			if (ncp->nc_dvp->v_mount == mp)
843169999Spjd				cache_zap(ncp);
8441541Srgrimes		}
8451541Srgrimes	}
846187839Sjhb	CACHE_WUNLOCK();
8471541Srgrimes}
84828787Sphk
84928787Sphk/*
85028787Sphk * Perform canonical checks and cache lookup and pass on to filesystem
85128787Sphk * through the vop_cachedlookup only if needed.
85228787Sphk */
85328787Sphk
85428787Sphkint
85528787Sphkvfs_cache_lookup(ap)
85628787Sphk	struct vop_lookup_args /* {
85728787Sphk		struct vnode *a_dvp;
85828787Sphk		struct vnode **a_vpp;
85928787Sphk		struct componentname *a_cnp;
86028787Sphk	} */ *ap;
86128787Sphk{
862144296Sjeff	struct vnode *dvp;
86328787Sphk	int error;
86428787Sphk	struct vnode **vpp = ap->a_vpp;
86528787Sphk	struct componentname *cnp = ap->a_cnp;
86628787Sphk	struct ucred *cred = cnp->cn_cred;
86728787Sphk	int flags = cnp->cn_flags;
86883366Sjulian	struct thread *td = cnp->cn_thread;
86928787Sphk
87028787Sphk	*vpp = NULL;
87165665Sbp	dvp = ap->a_dvp;
87228787Sphk
87365665Sbp	if (dvp->v_type != VDIR)
874116201Sdes		return (ENOTDIR);
87528787Sphk
87665665Sbp	if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
87728787Sphk	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
87828787Sphk		return (EROFS);
87928787Sphk
88083366Sjulian	error = VOP_ACCESS(dvp, VEXEC, cred, td);
88128787Sphk	if (error)
88228787Sphk		return (error);
88328787Sphk
88465665Sbp	error = cache_lookup(dvp, vpp, cnp);
885144296Sjeff	if (error == 0)
886144287Sjeff		return (VOP_CACHEDLOOKUP(dvp, vpp, cnp));
887183330Sjhb	if (error == -1)
888183330Sjhb		return (0);
889183330Sjhb	return (error);
89028787Sphk}
89151906Sphk
89251906Sphk
89351906Sphk#ifndef _SYS_SYSPROTO_H_
89451906Sphkstruct  __getcwd_args {
89551906Sphk	u_char	*buf;
89651906Sphk	u_int	buflen;
89751906Sphk};
89851906Sphk#endif
89951906Sphk
90091690Seivind/*
90191690Seivind * XXX All of these sysctls would probably be more productive dead.
90291690Seivind */
90351906Sphkstatic int disablecwd;
90491690SeivindSYSCTL_INT(_debug, OID_AUTO, disablecwd, CTLFLAG_RW, &disablecwd, 0,
90591690Seivind   "Disable the getcwd syscall");
90651906Sphk
907167232Srwatson/* Implementation of the getcwd syscall. */
90851906Sphkint
90983366Sjulian__getcwd(td, uap)
91083366Sjulian	struct thread *td;
91151906Sphk	struct __getcwd_args *uap;
91251906Sphk{
913112430Sphk
914102870Siedowse	return (kern___getcwd(td, uap->buf, UIO_USERSPACE, uap->buflen));
915102870Siedowse}
916102870Siedowse
917102870Siedowseint
918112430Sphkkern___getcwd(struct thread *td, u_char *buf, enum uio_seg bufseg, u_int buflen)
919102870Siedowse{
920102870Siedowse	char *bp, *tmpbuf;
92151906Sphk	struct filedesc *fdp;
922185298Smarcus	struct vnode *cdir, *rdir;
923185298Smarcus	int error, vfslocked;
92451906Sphk
925112430Sphk	if (disablecwd)
92651906Sphk		return (ENODEV);
927102870Siedowse	if (buflen < 2)
92851906Sphk		return (EINVAL);
929102870Siedowse	if (buflen > MAXPATHLEN)
930102870Siedowse		buflen = MAXPATHLEN;
931144318Sdas
932144318Sdas	tmpbuf = malloc(buflen, M_TEMP, M_WAITOK);
933144318Sdas	fdp = td->td_proc->p_fd;
934168355Srwatson	FILEDESC_SLOCK(fdp);
935185298Smarcus	cdir = fdp->fd_cdir;
936185298Smarcus	VREF(cdir);
937185298Smarcus	rdir = fdp->fd_rdir;
938185298Smarcus	VREF(rdir);
939168355Srwatson	FILEDESC_SUNLOCK(fdp);
940185298Smarcus	error = vn_fullpath1(td, cdir, rdir, tmpbuf, &bp, buflen);
941185298Smarcus	vfslocked = VFS_LOCK_GIANT(rdir->v_mount);
942185298Smarcus	vrele(rdir);
943185298Smarcus	VFS_UNLOCK_GIANT(vfslocked);
944185298Smarcus	vfslocked = VFS_LOCK_GIANT(cdir->v_mount);
945185298Smarcus	vrele(cdir);
946185298Smarcus	VFS_UNLOCK_GIANT(vfslocked);
947144318Sdas
948144318Sdas	if (!error) {
949144318Sdas		if (bufseg == UIO_SYSSPACE)
950144318Sdas			bcopy(bp, buf, strlen(bp) + 1);
951144318Sdas		else
952144318Sdas			error = copyout(bp, buf, strlen(bp) + 1);
953190141Skib#ifdef KTRACE
954190141Skib	if (KTRPOINT(curthread, KTR_NAMEI))
955190141Skib		ktrnamei(bp);
956190141Skib#endif
957144318Sdas	}
958102870Siedowse	free(tmpbuf, M_TEMP);
95951906Sphk	return (error);
96051906Sphk}
96151906Sphk
96259652Sgreen/*
96359652Sgreen * Thus begins the fullpath magic.
96459652Sgreen */
96559652Sgreen
96659652Sgreen#undef STATNODE
96759652Sgreen#define STATNODE(name)							\
96859652Sgreen	static u_int name;						\
96962622Sjhb	SYSCTL_UINT(_vfs_cache, OID_AUTO, name, CTLFLAG_RD, &name, 0, "")
97059652Sgreen
97159652Sgreenstatic int disablefullpath;
97291690SeivindSYSCTL_INT(_debug, OID_AUTO, disablefullpath, CTLFLAG_RW, &disablefullpath, 0,
97391690Seivind	"Disable the vn_fullpath function");
97459652Sgreen
975144318Sdas/* These count for kern___getcwd(), too. */
97659652SgreenSTATNODE(numfullpathcalls);
97759652SgreenSTATNODE(numfullpathfail1);
97859652SgreenSTATNODE(numfullpathfail2);
97959652SgreenSTATNODE(numfullpathfail4);
98059652SgreenSTATNODE(numfullpathfound);
98159652Sgreen
98291690Seivind/*
98391690Seivind * Retrieve the full filesystem path that correspond to a vnode from the name
98491690Seivind * cache (if available)
98591690Seivind */
98659652Sgreenint
98785287Sdesvn_fullpath(struct thread *td, struct vnode *vn, char **retbuf, char **freebuf)
98885287Sdes{
989144318Sdas	char *buf;
99059652Sgreen	struct filedesc *fdp;
991185298Smarcus	struct vnode *rdir;
992185298Smarcus	int error, vfslocked;
99359652Sgreen
99459652Sgreen	if (disablefullpath)
99559652Sgreen		return (ENODEV);
99685287Sdes	if (vn == NULL)
99759652Sgreen		return (EINVAL);
998144318Sdas
999111119Simp	buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
1000144318Sdas	fdp = td->td_proc->p_fd;
1001168355Srwatson	FILEDESC_SLOCK(fdp);
1002185298Smarcus	rdir = fdp->fd_rdir;
1003185298Smarcus	VREF(rdir);
1004168355Srwatson	FILEDESC_SUNLOCK(fdp);
1005185298Smarcus	error = vn_fullpath1(td, vn, rdir, buf, retbuf, MAXPATHLEN);
1006185298Smarcus	vfslocked = VFS_LOCK_GIANT(rdir->v_mount);
1007185298Smarcus	vrele(rdir);
1008185298Smarcus	VFS_UNLOCK_GIANT(vfslocked);
1009144318Sdas
1010144318Sdas	if (!error)
1011144318Sdas		*freebuf = buf;
1012144318Sdas	else
1013144318Sdas		free(buf, M_TEMP);
1014144318Sdas	return (error);
1015144318Sdas}
1016144318Sdas
1017144318Sdas/*
1018181060Scsjp * This function is similar to vn_fullpath, but it attempts to lookup the
1019181060Scsjp * pathname relative to the global root mount point.  This is required for the
1020181060Scsjp * auditing sub-system, as audited pathnames must be absolute, relative to the
1021181060Scsjp * global root mount point.
1022181060Scsjp */
1023181060Scsjpint
1024181060Scsjpvn_fullpath_global(struct thread *td, struct vnode *vn,
1025181060Scsjp    char **retbuf, char **freebuf)
1026181060Scsjp{
1027181060Scsjp	char *buf;
1028181060Scsjp	int error;
1029181060Scsjp
1030181060Scsjp	if (disablefullpath)
1031181060Scsjp		return (ENODEV);
1032181060Scsjp	if (vn == NULL)
1033181060Scsjp		return (EINVAL);
1034181060Scsjp	buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
1035181060Scsjp	error = vn_fullpath1(td, vn, rootvnode, buf, retbuf, MAXPATHLEN);
1036181060Scsjp	if (!error)
1037181060Scsjp		*freebuf = buf;
1038181060Scsjp	else
1039181060Scsjp		free(buf, M_TEMP);
1040181060Scsjp	return (error);
1041181060Scsjp}
1042181060Scsjp
1043193174Skibint
1044194601Skibvn_vptocnp(struct vnode **vp, struct ucred *cred, char *buf, u_int *buflen)
1045193174Skib{
1046193174Skib	int error;
1047193174Skib
1048193174Skib	CACHE_RLOCK();
1049194601Skib	error = vn_vptocnp_locked(vp, cred, buf, buflen);
1050193174Skib	if (error == 0) {
1051193174Skib		/*
1052193174Skib		 * vn_vptocnp_locked() dropped hold acquired by
1053193174Skib		 * VOP_VPTOCNP immediately after locking the
1054193174Skib		 * cache. Since we are going to drop the cache rlock,
1055193174Skib		 * re-hold the result.
1056193174Skib		 */
1057193174Skib		vhold(*vp);
1058193174Skib		CACHE_RUNLOCK();
1059193174Skib	}
1060193174Skib	return (error);
1061193174Skib}
1062193174Skib
1063185956Smarcusstatic int
1064194601Skibvn_vptocnp_locked(struct vnode **vp, struct ucred *cred, char *buf,
1065194601Skib    u_int *buflen)
1066185956Smarcus{
1067185956Smarcus	struct vnode *dvp;
1068193174Skib	struct namecache *ncp;
1069185956Smarcus	int error, vfslocked;
1070185956Smarcus
1071193174Skib	TAILQ_FOREACH(ncp, &((*vp)->v_cache_dst), nc_dst) {
1072193174Skib		if ((ncp->nc_flag & NCF_ISDOTDOT) == 0)
1073193174Skib			break;
1074193174Skib	}
1075193174Skib	if (ncp != NULL) {
1076193174Skib		if (*buflen < ncp->nc_nlen) {
1077193174Skib			CACHE_RUNLOCK();
1078193174Skib			numfullpathfail4++;
1079193174Skib			error = ENOMEM;
1080193174Skib			SDT_PROBE(vfs, namecache, fullpath, return, error,
1081193186Skib			    vp, NULL, 0, 0);
1082193174Skib			return (error);
1083193174Skib		}
1084193174Skib		*buflen -= ncp->nc_nlen;
1085193174Skib		memcpy(buf + *buflen, ncp->nc_name, ncp->nc_nlen);
1086193174Skib		SDT_PROBE(vfs, namecache, fullpath, hit, ncp->nc_dvp,
1087193174Skib		    ncp->nc_name, vp, 0, 0);
1088193174Skib		*vp = ncp->nc_dvp;
1089193174Skib		return (0);
1090193174Skib	}
1091193174Skib	SDT_PROBE(vfs, namecache, fullpath, miss, vp, 0, 0, 0, 0);
1092193174Skib
1093185956Smarcus	vhold(*vp);
1094187839Sjhb	CACHE_RUNLOCK();
1095185956Smarcus	vfslocked = VFS_LOCK_GIANT((*vp)->v_mount);
1096185956Smarcus	vn_lock(*vp, LK_SHARED | LK_RETRY);
1097194601Skib	error = VOP_VPTOCNP(*vp, &dvp, cred, buf, buflen);
1098185956Smarcus	VOP_UNLOCK(*vp, 0);
1099186455Skib	vdrop(*vp);
1100185956Smarcus	VFS_UNLOCK_GIANT(vfslocked);
1101185956Smarcus	if (error) {
1102185956Smarcus		numfullpathfail2++;
1103193186Skib		SDT_PROBE(vfs, namecache, fullpath, return,  error, vp,
1104193174Skib		    NULL, 0, 0);
1105185956Smarcus		return (error);
1106185956Smarcus	}
1107193174Skib
1108185956Smarcus	*vp = dvp;
1109187839Sjhb	CACHE_RLOCK();
1110185956Smarcus	if ((*vp)->v_iflag & VI_DOOMED) {
1111185956Smarcus		/* forced unmount */
1112190697Skan		CACHE_RUNLOCK();
1113185956Smarcus		vdrop(*vp);
1114193174Skib		error = ENOENT;
1115193186Skib		SDT_PROBE(vfs, namecache, fullpath, return, error, vp,
1116193174Skib		    NULL, 0, 0);
1117193174Skib		return (error);
1118185956Smarcus	}
1119185956Smarcus	vdrop(*vp);
1120185956Smarcus
1121185956Smarcus	return (0);
1122185956Smarcus}
1123185956Smarcus
1124181060Scsjp/*
1125144318Sdas * The magic behind kern___getcwd() and vn_fullpath().
1126144318Sdas */
1127144318Sdasstatic int
1128144318Sdasvn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir,
1129144318Sdas    char *buf, char **retbuf, u_int buflen)
1130144318Sdas{
1131193174Skib	int error, slash_prefixed;
1132190829Srwatson#ifdef KDTRACE_HOOKS
1133190829Srwatson	struct vnode *startvp = vp;
1134190829Srwatson#endif
1135144318Sdas
1136185956Smarcus	buflen--;
1137193174Skib	buf[buflen] = '\0';
1138144318Sdas	error = 0;
113959652Sgreen	slash_prefixed = 0;
1140144318Sdas
1141190829Srwatson	SDT_PROBE(vfs, namecache, fullpath, entry, vp, 0, 0, 0, 0);
1142193174Skib	numfullpathcalls++;
1143187839Sjhb	CACHE_RLOCK();
1144144318Sdas	if (vp->v_type != VDIR) {
1145194601Skib		error = vn_vptocnp_locked(&vp, td->td_ucred, buf, &buflen);
1146193174Skib		if (error)
1147190829Srwatson			return (error);
1148193518Smarcus		if (buflen == 0) {
1149193518Smarcus			CACHE_RUNLOCK();
1150193174Skib			return (ENOMEM);
1151193518Smarcus		}
1152193174Skib		buf[--buflen] = '/';
1153144318Sdas		slash_prefixed = 1;
1154144318Sdas	}
1155144318Sdas	while (vp != rdir && vp != rootvnode) {
1156101308Sjeff		if (vp->v_vflag & VV_ROOT) {
1157155385Sjeff			if (vp->v_iflag & VI_DOOMED) {	/* forced unmount */
1158187839Sjhb				CACHE_RUNLOCK();
1159190387Sjhb				error = ENOENT;
1160193186Skib				SDT_PROBE(vfs, namecache, fullpath, return,
1161193186Skib				    error, vp, NULL, 0, 0);
1162144318Sdas				break;
116359652Sgreen			}
116459652Sgreen			vp = vp->v_mount->mnt_vnodecovered;
116559652Sgreen			continue;
116659652Sgreen		}
1167185956Smarcus		if (vp->v_type != VDIR) {
1168193174Skib			CACHE_RUNLOCK();
116959652Sgreen			numfullpathfail1++;
1170144318Sdas			error = ENOTDIR;
1171193186Skib			SDT_PROBE(vfs, namecache, fullpath, return,
1172193186Skib			    error, vp, NULL, 0, 0);
1173144318Sdas			break;
117459652Sgreen		}
1175194601Skib		error = vn_vptocnp_locked(&vp, td->td_ucred, buf, &buflen);
1176193174Skib		if (error)
1177193174Skib			break;
1178193174Skib		if (buflen == 0) {
1179193518Smarcus			CACHE_RUNLOCK();
1180144318Sdas			error = ENOMEM;
1181193186Skib			SDT_PROBE(vfs, namecache, fullpath, return, error,
1182193186Skib			    startvp, NULL, 0, 0);
1183144318Sdas			break;
118459652Sgreen		}
1185193174Skib		buf[--buflen] = '/';
118659652Sgreen		slash_prefixed = 1;
1187144318Sdas	}
1188193174Skib	if (error)
1189144318Sdas		return (error);
119059652Sgreen	if (!slash_prefixed) {
1191193174Skib		if (buflen == 0) {
1192193174Skib			CACHE_RUNLOCK();
1193120792Sjeff			numfullpathfail4++;
1194193186Skib			SDT_PROBE(vfs, namecache, fullpath, return, ENOMEM,
1195193186Skib			    startvp, NULL, 0, 0);
119659652Sgreen			return (ENOMEM);
1197193174Skib		}
1198193174Skib		buf[--buflen] = '/';
119959652Sgreen	}
120059652Sgreen	numfullpathfound++;
1201187839Sjhb	CACHE_RUNLOCK();
1202144318Sdas
1203193186Skib	SDT_PROBE(vfs, namecache, fullpath, return, 0, startvp, buf + buflen,
1204193174Skib	    0, 0);
1205193174Skib	*retbuf = buf + buflen;
120659652Sgreen	return (0);
120759652Sgreen}
1208177782Skib
1209177782Skibint
1210177782Skibvn_commname(struct vnode *vp, char *buf, u_int buflen)
1211177782Skib{
1212177782Skib	struct namecache *ncp;
1213177782Skib	int l;
1214177782Skib
1215187839Sjhb	CACHE_RLOCK();
1216190533Skan	TAILQ_FOREACH(ncp, &vp->v_cache_dst, nc_dst)
1217190533Skan		if ((ncp->nc_flag & NCF_ISDOTDOT) == 0)
1218190533Skan			break;
1219190533Skan	if (ncp == NULL) {
1220187839Sjhb		CACHE_RUNLOCK();
1221177782Skib		return (ENOENT);
1222177782Skib	}
1223177782Skib	l = min(ncp->nc_nlen, buflen - 1);
1224177782Skib	memcpy(buf, ncp->nc_name, l);
1225187839Sjhb	CACHE_RUNLOCK();
1226177782Skib	buf[l] = '\0';
1227177782Skib	return (0);
1228177782Skib}
1229