vfs_cache.c revision 139804
1139804Simp/*-
222521Sdyson * Copyright (c) 1989, 1993, 1995
31541Srgrimes *	The Regents of the University of California.  All rights reserved.
41541Srgrimes *
522521Sdyson * This code is derived from software contributed to Berkeley by
622521Sdyson * Poul-Henning Kamp of the FreeBSD Project.
722521Sdyson *
81541Srgrimes * Redistribution and use in source and binary forms, with or without
91541Srgrimes * modification, are permitted provided that the following conditions
101541Srgrimes * are met:
111541Srgrimes * 1. Redistributions of source code must retain the above copyright
121541Srgrimes *    notice, this list of conditions and the following disclaimer.
131541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
141541Srgrimes *    notice, this list of conditions and the following disclaimer in the
151541Srgrimes *    documentation and/or other materials provided with the distribution.
161541Srgrimes * 4. Neither the name of the University nor the names of its contributors
171541Srgrimes *    may be used to endorse or promote products derived from this software
181541Srgrimes *    without specific prior written permission.
191541Srgrimes *
201541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
211541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
221541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
231541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
241541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
251541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
261541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
271541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
281541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
291541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
301541Srgrimes * SUCH DAMAGE.
311541Srgrimes *
3223521Sbde *	@(#)vfs_cache.c	8.5 (Berkeley) 3/22/95
331541Srgrimes */
341541Srgrimes
35116182Sobrien#include <sys/cdefs.h>
36116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/vfs_cache.c 139804 2005-01-06 23:35:40Z imp $");
37116182Sobrien
381541Srgrimes#include <sys/param.h>
391541Srgrimes#include <sys/systm.h>
4012820Sphk#include <sys/kernel.h>
4176166Smarkm#include <sys/lock.h>
4289316Salfred#include <sys/mutex.h>
4312820Sphk#include <sys/sysctl.h>
441541Srgrimes#include <sys/mount.h>
451541Srgrimes#include <sys/vnode.h>
461541Srgrimes#include <sys/namei.h>
471541Srgrimes#include <sys/malloc.h>
48102870Siedowse#include <sys/syscallsubr.h>
4951906Sphk#include <sys/sysproto.h>
5051906Sphk#include <sys/proc.h>
5151906Sphk#include <sys/filedesc.h>
5274384Speter#include <sys/fnv_hash.h>
531541Srgrimes
54116289Sdes#include <vm/uma.h>
55116289Sdes
5651906Sphk/*
5759652Sgreen * This structure describes the elements in the cache of recent
5859652Sgreen * names looked up by namei.
5959652Sgreen */
6059652Sgreen
6159652Sgreenstruct	namecache {
6260938Sjake	LIST_ENTRY(namecache) nc_hash;	/* hash chain */
6360938Sjake	LIST_ENTRY(namecache) nc_src;	/* source vnode list */
6460938Sjake	TAILQ_ENTRY(namecache) nc_dst;	/* destination vnode list */
6559652Sgreen	struct	vnode *nc_dvp;		/* vnode of parent of name */
6659652Sgreen	struct	vnode *nc_vp;		/* vnode the name refers to */
6759652Sgreen	u_char	nc_flag;		/* flag bits */
6859652Sgreen	u_char	nc_nlen;		/* length of name */
6959652Sgreen	char	nc_name[0];		/* segment name */
7059652Sgreen};
7159652Sgreen
7259652Sgreen/*
731541Srgrimes * Name caching works as follows:
741541Srgrimes *
751541Srgrimes * Names found by directory scans are retained in a cache
761541Srgrimes * for future reference.  It is managed LRU, so frequently
771541Srgrimes * used names will hang around.  Cache is indexed by hash value
781541Srgrimes * obtained from (vp, name) where vp refers to the directory
791541Srgrimes * containing name.
801541Srgrimes *
8122521Sdyson * If it is a "negative" entry, (i.e. for a name that is known NOT to
8222521Sdyson * exist) the vnode pointer will be NULL.
836968Sphk *
841541Srgrimes * Upon reaching the last segment of a path, if the reference
851541Srgrimes * is for DELETE, or NOCACHE is set (rewrite), and the
861541Srgrimes * name is located in the cache, it will be dropped.
871541Srgrimes */
881541Srgrimes
891541Srgrimes/*
901541Srgrimes * Structures associated with name cacheing.
911541Srgrimes */
9274501Speter#define NCHHASH(hash) \
9374501Speter	(&nchashtbl[(hash) & nchash])
9460938Sjakestatic LIST_HEAD(nchashhead, namecache) *nchashtbl;	/* Hash Table */
9560938Sjakestatic TAILQ_HEAD(, namecache) ncneg;	/* Hash Table */
9623521Sbdestatic u_long	nchash;			/* size of hash table */
9762622SjhbSYSCTL_ULONG(_debug, OID_AUTO, nchash, CTLFLAG_RD, &nchash, 0, "");
9825453Sphkstatic u_long	ncnegfactor = 16;	/* ratio of negative entries */
9962622SjhbSYSCTL_ULONG(_debug, OID_AUTO, ncnegfactor, CTLFLAG_RW, &ncnegfactor, 0, "");
10091690Seivindstatic u_long	numneg;			/* number of cache entries allocated */
10162622SjhbSYSCTL_ULONG(_debug, OID_AUTO, numneg, CTLFLAG_RD, &numneg, 0, "");
10223521Sbdestatic u_long	numcache;		/* number of cache entries allocated */
10362622SjhbSYSCTL_ULONG(_debug, OID_AUTO, numcache, CTLFLAG_RD, &numcache, 0, "");
10475654Stanimurastatic u_long	numcachehv;		/* number of cache entries with vnodes held */
10575654StanimuraSYSCTL_ULONG(_debug, OID_AUTO, numcachehv, CTLFLAG_RD, &numcachehv, 0, "");
10684249Sdillon#if 0
10775654Stanimurastatic u_long	numcachepl;		/* number of cache purge for leaf entries */
10875654StanimuraSYSCTL_ULONG(_debug, OID_AUTO, numcachepl, CTLFLAG_RD, &numcachepl, 0, "");
10984249Sdillon#endif
11022521Sdysonstruct	nchstats nchstats;		/* cache effectiveness statistics */
1111541Srgrimes
112120792Sjeffstruct mtx cache_lock;
113120792SjeffMTX_SYSINIT(vfscache, &cache_lock, "Name Cache", MTX_DEF);
114120792Sjeff
115120792Sjeff#define	CACHE_LOCK()	mtx_lock(&cache_lock)
116120792Sjeff#define	CACHE_UNLOCK()	mtx_unlock(&cache_lock)
117120792Sjeff
118116289Sdes/*
119116289Sdes * UMA zones for the VFS cache.
120116289Sdes *
121116289Sdes * The small cache is used for entries with short names, which are the
122116289Sdes * most common.  The large cache is used for entries which are too big to
123116289Sdes * fit in the small cache.
124116289Sdes */
125116289Sdesstatic uma_zone_t cache_zone_small;
126116289Sdesstatic uma_zone_t cache_zone_large;
127116289Sdes
128116289Sdes#define	CACHE_PATH_CUTOFF	32
129116289Sdes#define	CACHE_ZONE_SMALL	(sizeof(struct namecache) + CACHE_PATH_CUTOFF)
130116289Sdes#define	CACHE_ZONE_LARGE	(sizeof(struct namecache) + NAME_MAX)
131116289Sdes
132116289Sdes#define cache_alloc(len)	uma_zalloc(((len) <= CACHE_PATH_CUTOFF) ? \
133116289Sdes	cache_zone_small : cache_zone_large, M_WAITOK)
134116289Sdes#define cache_free(ncp)		do { \
135116289Sdes	if (ncp != NULL) \
136116289Sdes		uma_zfree(((ncp)->nc_nlen <= CACHE_PATH_CUTOFF) ? \
137116289Sdes		    cache_zone_small : cache_zone_large, (ncp)); \
138116289Sdes} while (0)
139116289Sdes
14023521Sbdestatic int	doingcache = 1;		/* 1 => enable the cache */
14123521SbdeSYSCTL_INT(_debug, OID_AUTO, vfscache, CTLFLAG_RW, &doingcache, 0, "");
14291690Seivind
14391690Seivind/* Export size information to userland */
14425453SphkSYSCTL_INT(_debug, OID_AUTO, vnsize, CTLFLAG_RD, 0, sizeof(struct vnode), "");
14525453SphkSYSCTL_INT(_debug, OID_AUTO, ncsize, CTLFLAG_RD, 0, sizeof(struct namecache), "");
14623521Sbde
14729788Sphk/*
14829788Sphk * The new name cache statistics
14929788Sphk */
15038984SbdeSYSCTL_NODE(_vfs, OID_AUTO, cache, CTLFLAG_RW, 0, "Name cache statistics");
15129788Sphk#define STATNODE(mode, name, var) \
15262622Sjhb	SYSCTL_ULONG(_vfs_cache, OID_AUTO, name, mode, var, 0, "");
15329788SphkSTATNODE(CTLFLAG_RD, numneg, &numneg);
15429788SphkSTATNODE(CTLFLAG_RD, numcache, &numcache);
15529788Sphkstatic u_long numcalls; STATNODE(CTLFLAG_RD, numcalls, &numcalls);
15629788Sphkstatic u_long dothits; STATNODE(CTLFLAG_RD, dothits, &dothits);
15729788Sphkstatic u_long dotdothits; STATNODE(CTLFLAG_RD, dotdothits, &dotdothits);
15829788Sphkstatic u_long numchecks; STATNODE(CTLFLAG_RD, numchecks, &numchecks);
15929788Sphkstatic u_long nummiss; STATNODE(CTLFLAG_RD, nummiss, &nummiss);
16029804Sphkstatic u_long nummisszap; STATNODE(CTLFLAG_RD, nummisszap, &nummisszap);
16129788Sphkstatic u_long numposzaps; STATNODE(CTLFLAG_RD, numposzaps, &numposzaps);
16229788Sphkstatic u_long numposhits; STATNODE(CTLFLAG_RD, numposhits, &numposhits);
16329788Sphkstatic u_long numnegzaps; STATNODE(CTLFLAG_RD, numnegzaps, &numnegzaps);
16429788Sphkstatic u_long numneghits; STATNODE(CTLFLAG_RD, numneghits, &numneghits);
16529788Sphk
16668922SrwatsonSYSCTL_OPAQUE(_vfs_cache, OID_AUTO, nchstats, CTLFLAG_RD, &nchstats,
167116201Sdes	sizeof(nchstats), "LU", "VFS cache effectiveness statistics");
16829788Sphk
16968922Srwatson
17068922Srwatson
171120792Sjeffstatic void cache_zap(struct namecache *ncp, int locked);
1726968Sphk
17369774Sphkstatic MALLOC_DEFINE(M_VFSCACHE, "vfscache", "VFS name cache entries");
17451906Sphk
17522521Sdyson/*
17625453Sphk * Flags in namecache.nc_flag
17725453Sphk */
17825453Sphk#define NCF_WHITE	1
17975402Speter
18025453Sphk/*
18175402Speter * Grab an atomic snapshot of the name cache hash chain lengths
18275402Speter */
18375402SpeterSYSCTL_NODE(_debug, OID_AUTO, hashstat, CTLFLAG_RW, NULL, "hash table stats");
18475402Speter
18575402Speterstatic int
18675402Spetersysctl_debug_hashstat_rawnchash(SYSCTL_HANDLER_ARGS)
18775402Speter{
18875402Speter	int error;
18975402Speter	struct nchashhead *ncpp;
19075402Speter	struct namecache *ncp;
19175402Speter	int n_nchash;
19275402Speter	int count;
19375402Speter
19475402Speter	n_nchash = nchash + 1;	/* nchash is max index, not count */
19575402Speter	if (!req->oldptr)
19675402Speter		return SYSCTL_OUT(req, 0, n_nchash * sizeof(int));
19775402Speter
19875402Speter	/* Scan hash tables for applicable entries */
19975402Speter	for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) {
20075402Speter		count = 0;
20175402Speter		LIST_FOREACH(ncp, ncpp, nc_hash) {
20275402Speter			count++;
20375402Speter		}
20498994Salfred		error = SYSCTL_OUT(req, &count, sizeof(count));
20575402Speter		if (error)
20675402Speter			return (error);
20775402Speter	}
20875402Speter	return (0);
20975402Speter}
21075402SpeterSYSCTL_PROC(_debug_hashstat, OID_AUTO, rawnchash, CTLTYPE_INT|CTLFLAG_RD,
21175402Speter	0, 0, sysctl_debug_hashstat_rawnchash, "S,int", "nchash chain lengths");
21275402Speter
21375402Speterstatic int
21475402Spetersysctl_debug_hashstat_nchash(SYSCTL_HANDLER_ARGS)
21575402Speter{
21675402Speter	int error;
21775402Speter	struct nchashhead *ncpp;
21875402Speter	struct namecache *ncp;
21975402Speter	int n_nchash;
22075402Speter	int count, maxlength, used, pct;
22175402Speter
22275402Speter	if (!req->oldptr)
22375402Speter		return SYSCTL_OUT(req, 0, 4 * sizeof(int));
22475402Speter
22575402Speter	n_nchash = nchash + 1;	/* nchash is max index, not count */
22675402Speter	used = 0;
22775402Speter	maxlength = 0;
22875402Speter
22975402Speter	/* Scan hash tables for applicable entries */
23075402Speter	for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) {
23175402Speter		count = 0;
23275402Speter		LIST_FOREACH(ncp, ncpp, nc_hash) {
23375402Speter			count++;
23475402Speter		}
23575402Speter		if (count)
23675402Speter			used++;
23775402Speter		if (maxlength < count)
23875402Speter			maxlength = count;
23975402Speter	}
24075402Speter	n_nchash = nchash + 1;
24175402Speter	pct = (used * 100 * 100) / n_nchash;
24298994Salfred	error = SYSCTL_OUT(req, &n_nchash, sizeof(n_nchash));
24375402Speter	if (error)
24475402Speter		return (error);
24598994Salfred	error = SYSCTL_OUT(req, &used, sizeof(used));
24675402Speter	if (error)
24775402Speter		return (error);
24898994Salfred	error = SYSCTL_OUT(req, &maxlength, sizeof(maxlength));
24975402Speter	if (error)
25075402Speter		return (error);
25198994Salfred	error = SYSCTL_OUT(req, &pct, sizeof(pct));
25275402Speter	if (error)
25375402Speter		return (error);
25475402Speter	return (0);
25575402Speter}
25675402SpeterSYSCTL_PROC(_debug_hashstat, OID_AUTO, nchash, CTLTYPE_INT|CTLFLAG_RD,
25775402Speter	0, 0, sysctl_debug_hashstat_nchash, "I", "nchash chain lengths");
25875402Speter
25975402Speter/*
260110952Sarr * cache_zap():
261110952Sarr *
262110952Sarr *   Removes a namecache entry from cache, whether it contains an actual
263110952Sarr *   pointer to a vnode or if it is just a negative cache entry.
26422521Sdyson */
26525453Sphkstatic void
266120792Sjeffcache_zap(ncp, locked)
26725453Sphk	struct namecache *ncp;
268120792Sjeff	int locked;
26925453Sphk{
270120792Sjeff	struct vnode *vp;
271120792Sjeff
272120792Sjeff	vp = NULL;
273120792Sjeff	if (!locked)
274120792Sjeff		CACHE_LOCK();
27525453Sphk	LIST_REMOVE(ncp, nc_hash);
27625453Sphk	LIST_REMOVE(ncp, nc_src);
27775654Stanimura	if (LIST_EMPTY(&ncp->nc_dvp->v_cache_src)) {
278120792Sjeff		vp = ncp->nc_dvp;
27975654Stanimura		numcachehv--;
28075654Stanimura	}
28125453Sphk	if (ncp->nc_vp) {
28225453Sphk		TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_dst);
28325453Sphk	} else {
28425453Sphk		TAILQ_REMOVE(&ncneg, ncp, nc_dst);
28525453Sphk		numneg--;
28625453Sphk	}
28725453Sphk	numcache--;
288120792Sjeff	CACHE_UNLOCK();
289116289Sdes	cache_free(ncp);
290120792Sjeff	if (vp)
291120792Sjeff		vdrop(vp);
292120792Sjeff	if (locked)
293120792Sjeff		CACHE_LOCK();
29422521Sdyson}
2956968Sphk
29622521Sdyson/*
29784249Sdillon * cache_leaf_test()
298116201Sdes *
29984249Sdillon *      Test whether this (directory) vnode's namei cache entry contains
30084249Sdillon *      subdirectories or not.  Used to determine whether the directory is
301116201Sdes *      a leaf in the namei cache or not.  Note: the directory may still
30284249Sdillon *      contain files in the namei cache.
30384249Sdillon *
30484249Sdillon *      Returns 0 if the directory is a leaf, -1 if it isn't.
30584249Sdillon */
30684249Sdillonint
30784249Sdilloncache_leaf_test(struct vnode *vp)
30884249Sdillon{
30984249Sdillon	struct namecache *ncpc;
310120792Sjeff	int leaf;
31184249Sdillon
312120792Sjeff	leaf = 0;
313120792Sjeff	CACHE_LOCK();
31484249Sdillon	for (ncpc = LIST_FIRST(&vp->v_cache_src);
31584249Sdillon	     ncpc != NULL;
31684249Sdillon	     ncpc = LIST_NEXT(ncpc, nc_src)
317120792Sjeff	 ) {
318120792Sjeff		if (ncpc->nc_vp != NULL && ncpc->nc_vp->v_type == VDIR) {
319120792Sjeff			leaf = -1;
320120792Sjeff			break;
321120792Sjeff		}
32284249Sdillon	}
323120792Sjeff	CACHE_UNLOCK();
324120792Sjeff	return (leaf);
32584249Sdillon}
32684249Sdillon
32784249Sdillon/*
32823521Sbde * Lookup an entry in the cache
3296968Sphk *
3306968Sphk * Lookup is called with dvp pointing to the directory to search,
33122521Sdyson * cnp pointing to the name of the entry being sought. If the lookup
33222521Sdyson * succeeds, the vnode is returned in *vpp, and a status of -1 is
33322521Sdyson * returned. If the lookup determines that the name does not exist
33422521Sdyson * (negative cacheing), a status of ENOENT is returned. If the lookup
33522521Sdyson * fails, a status of zero is returned.
3361541Srgrimes */
3376968Sphk
3381541Srgrimesint
3391541Srgrimescache_lookup(dvp, vpp, cnp)
3401541Srgrimes	struct vnode *dvp;
3411541Srgrimes	struct vnode **vpp;
3421541Srgrimes	struct componentname *cnp;
3431541Srgrimes{
34451906Sphk	struct namecache *ncp;
34574384Speter	u_int32_t hash;
3461541Srgrimes
3476928Sphk	if (!doingcache) {
3486928Sphk		cnp->cn_flags &= ~MAKEENTRY;
3491541Srgrimes		return (0);
3506928Sphk	}
35125453Sphk
352120792Sjeff	CACHE_LOCK();
35329788Sphk	numcalls++;
35429788Sphk
35525453Sphk	if (cnp->cn_nameptr[0] == '.') {
35625453Sphk		if (cnp->cn_namelen == 1) {
35725453Sphk			*vpp = dvp;
35829788Sphk			dothits++;
359120792Sjeff			CACHE_UNLOCK();
36025453Sphk			return (-1);
36125453Sphk		}
36225453Sphk		if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') {
36329788Sphk			dotdothits++;
36425453Sphk			if (dvp->v_dd->v_id != dvp->v_ddid ||
36525453Sphk			    (cnp->cn_flags & MAKEENTRY) == 0) {
36625453Sphk				dvp->v_ddid = 0;
367120792Sjeff				CACHE_UNLOCK();
36825453Sphk				return (0);
36925453Sphk			}
37025453Sphk			*vpp = dvp->v_dd;
371120792Sjeff			CACHE_UNLOCK();
37225453Sphk			return (-1);
37325453Sphk		}
3741541Srgrimes	}
3756968Sphk
37674501Speter	hash = fnv_32_buf(cnp->cn_nameptr, cnp->cn_namelen, FNV1_32_INIT);
37774501Speter	hash = fnv_32_buf(&dvp->v_id, sizeof(dvp->v_id), hash);
37874501Speter	LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) {
37929788Sphk		numchecks++;
38025453Sphk		if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen &&
38131879Sbde		    !bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen))
38222521Sdyson			break;
3831541Srgrimes	}
3846968Sphk
38522521Sdyson	/* We failed to find an entry */
38622521Sdyson	if (ncp == 0) {
38729804Sphk		if ((cnp->cn_flags & MAKEENTRY) == 0) {
38829804Sphk			nummisszap++;
38929804Sphk		} else {
39029804Sphk			nummiss++;
39129804Sphk		}
39222521Sdyson		nchstats.ncs_miss++;
393120792Sjeff		CACHE_UNLOCK();
39422521Sdyson		return (0);
39522521Sdyson	}
39622521Sdyson
3976968Sphk	/* We don't want to have an entry, so dump it */
3986928Sphk	if ((cnp->cn_flags & MAKEENTRY) == 0) {
39929788Sphk		numposzaps++;
4001541Srgrimes		nchstats.ncs_badhits++;
401120792Sjeff		CACHE_UNLOCK();
402120792Sjeff		cache_zap(ncp, 0);
4036968Sphk		return (0);
40423521Sbde	}
4056968Sphk
4066968Sphk	/* We found a "positive" match, return the vnode */
407116201Sdes	if (ncp->nc_vp) {
40829788Sphk		numposhits++;
4091541Srgrimes		nchstats.ncs_goodhits++;
4101541Srgrimes		*vpp = ncp->nc_vp;
411120792Sjeff		CACHE_UNLOCK();
4121541Srgrimes		return (-1);
4131541Srgrimes	}
4141541Srgrimes
4156968Sphk	/* We found a negative match, and want to create it, so purge */
4166968Sphk	if (cnp->cn_nameiop == CREATE) {
41729788Sphk		numnegzaps++;
4187013Sphk		nchstats.ncs_badhits++;
419120792Sjeff		CACHE_UNLOCK();
420120792Sjeff		cache_zap(ncp, 0);
4216968Sphk		return (0);
4226968Sphk	}
4236968Sphk
42429788Sphk	numneghits++;
42522521Sdyson	/*
426110967Sarr	 * We found a "negative" match, so we shift it to the end of
427110967Sarr	 * the "negative" cache entries queue to satisfy LRU.  Also,
428110967Sarr	 * check to see if the entry is a whiteout; indicate this to
429110967Sarr	 * the componentname, if so.
43022521Sdyson	 */
43125453Sphk	TAILQ_REMOVE(&ncneg, ncp, nc_dst);
43225453Sphk	TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst);
4336968Sphk	nchstats.ncs_neghits++;
43425453Sphk	if (ncp->nc_flag & NCF_WHITE)
43525453Sphk		cnp->cn_flags |= ISWHITEOUT;
436120792Sjeff	CACHE_UNLOCK();
4376968Sphk	return (ENOENT);
4381541Srgrimes}
4391541Srgrimes
4401541Srgrimes/*
4416968Sphk * Add an entry to the cache.
4421541Srgrimes */
4431549Srgrimesvoid
4441541Srgrimescache_enter(dvp, vp, cnp)
4451541Srgrimes	struct vnode *dvp;
4461541Srgrimes	struct vnode *vp;
4471541Srgrimes	struct componentname *cnp;
4481541Srgrimes{
44951906Sphk	struct namecache *ncp;
45051906Sphk	struct nchashhead *ncpp;
45174384Speter	u_int32_t hash;
452120792Sjeff	int hold;
453120792Sjeff	int zap;
45451906Sphk	int len;
4551541Srgrimes
4561541Srgrimes	if (!doingcache)
4571541Srgrimes		return;
4586968Sphk
45925453Sphk	if (cnp->cn_nameptr[0] == '.') {
46025453Sphk		if (cnp->cn_namelen == 1) {
46125453Sphk			return;
4626928Sphk		}
46325453Sphk		if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') {
46425453Sphk			if (vp) {
46525453Sphk				dvp->v_dd = vp;
46625453Sphk				dvp->v_ddid = vp->v_id;
46725453Sphk			} else {
46825453Sphk				dvp->v_dd = dvp;
46925453Sphk				dvp->v_ddid = 0;
47025453Sphk			}
47125453Sphk			return;
47225453Sphk		}
4736968Sphk	}
474116201Sdes
475120792Sjeff	hold = 0;
476120792Sjeff	zap = 0;
477116289Sdes	ncp = cache_alloc(cnp->cn_namelen);
478120792Sjeff	CACHE_LOCK();
47925453Sphk	numcache++;
48028954Sphk	if (!vp) {
48125453Sphk		numneg++;
48228954Sphk		ncp->nc_flag = cnp->cn_flags & ISWHITEOUT ? NCF_WHITE : 0;
48329071Sphk	} else if (vp->v_type == VDIR) {
48429071Sphk		vp->v_dd = dvp;
48529071Sphk		vp->v_ddid = dvp->v_id;
48628954Sphk	}
48723521Sbde
48822521Sdyson	/*
489110967Sarr	 * Set the rest of the namecache entry elements, calculate it's
490110967Sarr	 * hash key and insert it into the appropriate chain within
491110967Sarr	 * the cache entries table.
49222521Sdyson	 */
4931541Srgrimes	ncp->nc_vp = vp;
4941541Srgrimes	ncp->nc_dvp = dvp;
49551906Sphk	len = ncp->nc_nlen = cnp->cn_namelen;
49674501Speter	hash = fnv_32_buf(cnp->cn_nameptr, len, FNV1_32_INIT);
49774384Speter	bcopy(cnp->cn_nameptr, ncp->nc_name, len);
49874501Speter	hash = fnv_32_buf(&dvp->v_id, sizeof(dvp->v_id), hash);
49974501Speter	ncpp = NCHHASH(hash);
5006928Sphk	LIST_INSERT_HEAD(ncpp, ncp, nc_hash);
50175654Stanimura	if (LIST_EMPTY(&dvp->v_cache_src)) {
502120792Sjeff		hold = 1;
50375654Stanimura		numcachehv++;
50475654Stanimura	}
50525453Sphk	LIST_INSERT_HEAD(&dvp->v_cache_src, ncp, nc_src);
506110967Sarr	/*
507110967Sarr	 * If the entry is "negative", we place it into the
508110967Sarr	 * "negative" cache queue, otherwise, we place it into the
509110967Sarr	 * destination vnode's cache entries queue.
510110967Sarr	 */
51125453Sphk	if (vp) {
51225453Sphk		TAILQ_INSERT_HEAD(&vp->v_cache_dst, ncp, nc_dst);
51325453Sphk	} else {
51425453Sphk		TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst);
51525453Sphk	}
51651906Sphk	if (numneg * ncnegfactor > numcache) {
51725453Sphk		ncp = TAILQ_FIRST(&ncneg);
518120792Sjeff		zap = 1;
51925453Sphk	}
520120792Sjeff	CACHE_UNLOCK();
521120792Sjeff	if (hold)
522120792Sjeff		vhold(dvp);
523120792Sjeff	if (zap)
524120792Sjeff		cache_zap(ncp, 0);
5251541Srgrimes}
5261541Srgrimes
5271541Srgrimes/*
5281541Srgrimes * Name cache initialization, from vfs_init() when we are booting
5291541Srgrimes */
53069664Speterstatic void
53169664Speternchinit(void *dummy __unused)
5321541Srgrimes{
53323521Sbde
53425453Sphk	TAILQ_INIT(&ncneg);
535116289Sdes
536116289Sdes	cache_zone_small = uma_zcreate("S VFS Cache", CACHE_ZONE_SMALL, NULL,
537116289Sdes	    NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT);
538116289Sdes	cache_zone_large = uma_zcreate("L VFS Cache", CACHE_ZONE_LARGE, NULL,
539116289Sdes	    NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT);
540116289Sdes
54169664Speter	nchashtbl = hashinit(desiredvnodes * 2, M_VFSCACHE, &nchash);
5421541Srgrimes}
54369664SpeterSYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nchinit, NULL)
5441541Srgrimes
54569664Speter
5461541Srgrimes/*
54746011Sphk * Invalidate all entries to a particular vnode.
54823521Sbde *
54946011Sphk * Remove all entries in the namecache relating to this vnode and
55046011Sphk * change the v_id.  We take the v_id from a global counter, since
55146011Sphk * it becomes a handy sequence number in crash-dumps that way.
55246011Sphk * No valid vnode will ever have (v_id == 0).
55346011Sphk *
55446011Sphk * XXX: Only time and the size of v_id prevents this from failing:
55546011Sphk * XXX: In theory we should hunt down all (struct vnode*, v_id)
55646011Sphk * XXX: soft references and nuke them, at least on the global
55746011Sphk * XXX: v_id wraparound.  The period of resistance can be extended
55846011Sphk * XXX: by incrementing each vnodes v_id individually instead of
55946011Sphk * XXX: using the global v_id.
5601541Srgrimes */
56146011Sphk
562120792Sjeff/*
563120792Sjeff * XXX This is sometimes called when a vnode may still be re-used, in which
564120792Sjeff * case v_dd may be invalid.  Need to look this up.
565120792Sjeff */
5661549Srgrimesvoid
5671541Srgrimescache_purge(vp)
5681541Srgrimes	struct vnode *vp;
5691541Srgrimes{
57029094Sphk	static u_long nextid;
5711541Srgrimes
572120792Sjeff	CACHE_LOCK();
573116201Sdes	while (!LIST_EMPTY(&vp->v_cache_src))
574120792Sjeff		cache_zap(LIST_FIRST(&vp->v_cache_src), 1);
575116201Sdes	while (!TAILQ_EMPTY(&vp->v_cache_dst))
576120792Sjeff		cache_zap(TAILQ_FIRST(&vp->v_cache_dst), 1);
57725453Sphk
57846011Sphk	do
57946011Sphk		nextid++;
58046011Sphk	while (nextid == vp->v_id || !nextid);
58129094Sphk	vp->v_id = nextid;
58225453Sphk	vp->v_dd = vp;
58325453Sphk	vp->v_ddid = 0;
584120792Sjeff	CACHE_UNLOCK();
5851541Srgrimes}
5861541Srgrimes
5871541Srgrimes/*
5886968Sphk * Flush all entries referencing a particular filesystem.
5891541Srgrimes *
5906968Sphk * Since we need to check it anyway, we will flush all the invalid
59112968Sphk * entries at the same time.
5921541Srgrimes */
5931549Srgrimesvoid
5941541Srgrimescache_purgevfs(mp)
5951541Srgrimes	struct mount *mp;
5961541Srgrimes{
5976968Sphk	struct nchashhead *ncpp;
59822521Sdyson	struct namecache *ncp, *nnp;
599120792Sjeff	struct nchashhead mplist;
6001541Srgrimes
601120792Sjeff	LIST_INIT(&mplist);
602120792Sjeff	ncp = NULL;
603120792Sjeff
6046968Sphk	/* Scan hash tables for applicable entries */
605120792Sjeff	CACHE_LOCK();
60629071Sphk	for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) {
60725453Sphk		for (ncp = LIST_FIRST(ncpp); ncp != 0; ncp = nnp) {
60825453Sphk			nnp = LIST_NEXT(ncp, nc_hash);
60925453Sphk			if (ncp->nc_dvp->v_mount == mp) {
610120792Sjeff				LIST_REMOVE(ncp, nc_hash);
611120792Sjeff				LIST_INSERT_HEAD(&mplist, ncp, nc_hash);
6126968Sphk			}
6131541Srgrimes		}
6141541Srgrimes	}
615120792Sjeff	CACHE_UNLOCK();
616120792Sjeff	while (!LIST_EMPTY(&mplist))
617120792Sjeff		cache_zap(LIST_FIRST(&mplist), 0);
6181541Srgrimes}
61928787Sphk
62028787Sphk/*
62128787Sphk * Perform canonical checks and cache lookup and pass on to filesystem
62228787Sphk * through the vop_cachedlookup only if needed.
62328787Sphk */
62428787Sphk
62528787Sphkint
62628787Sphkvfs_cache_lookup(ap)
62728787Sphk	struct vop_lookup_args /* {
62828787Sphk		struct vnode *a_dvp;
62928787Sphk		struct vnode **a_vpp;
63028787Sphk		struct componentname *a_cnp;
63128787Sphk	} */ *ap;
63228787Sphk{
63365665Sbp	struct vnode *dvp, *vp;
63465665Sbp	int lockparent;
63528787Sphk	int error;
63628787Sphk	struct vnode **vpp = ap->a_vpp;
63728787Sphk	struct componentname *cnp = ap->a_cnp;
63828787Sphk	struct ucred *cred = cnp->cn_cred;
63928787Sphk	int flags = cnp->cn_flags;
64083366Sjulian	struct thread *td = cnp->cn_thread;
64128787Sphk	u_long vpid;	/* capability number of vnode */
64228787Sphk
64328787Sphk	*vpp = NULL;
64465665Sbp	dvp = ap->a_dvp;
64528787Sphk	lockparent = flags & LOCKPARENT;
64628787Sphk
64765665Sbp	if (dvp->v_type != VDIR)
648116201Sdes		return (ENOTDIR);
64928787Sphk
65065665Sbp	if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
65128787Sphk	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
65228787Sphk		return (EROFS);
65328787Sphk
65483366Sjulian	error = VOP_ACCESS(dvp, VEXEC, cred, td);
65528787Sphk
65628787Sphk	if (error)
65728787Sphk		return (error);
65828787Sphk
65965665Sbp	error = cache_lookup(dvp, vpp, cnp);
66028787Sphk
66196616Sjeff#ifdef LOOKUP_SHARED
66292130Sjeff	if (!error) {
66392130Sjeff		/* We do this because the rest of the system now expects to get
66492130Sjeff		 * a shared lock, which is later upgraded if LOCKSHARED is not
66592130Sjeff		 * set.  We have so many cases here because of bugs that yield
66692130Sjeff		 * inconsistant lock states.  This all badly needs to be fixed
66792130Sjeff		 */
66892130Sjeff		error = VOP_CACHEDLOOKUP(dvp, vpp, cnp);
66992130Sjeff		if (!error) {
67092130Sjeff			int flock;
67192130Sjeff
67292130Sjeff			flock = VOP_ISLOCKED(*vpp, td);
67392130Sjeff			if (flock != LK_EXCLUSIVE) {
67492130Sjeff				if (flock == 0) {
67592130Sjeff					if ((flags & ISLASTCN) &&
67692130Sjeff					    (flags & LOCKSHARED))
67792130Sjeff						VOP_LOCK(*vpp, LK_SHARED, td);
67892130Sjeff					else
67992130Sjeff						VOP_LOCK(*vpp, LK_EXCLUSIVE, td);
68092130Sjeff				}
68192130Sjeff			} else if ((flags & ISLASTCN) && (flags & LOCKSHARED))
68292130Sjeff				VOP_LOCK(*vpp, LK_DOWNGRADE, td);
68392130Sjeff		}
68492130Sjeff		return (error);
68592130Sjeff	}
68692130Sjeff#else
687116201Sdes	if (!error)
68865665Sbp		return (VOP_CACHEDLOOKUP(dvp, vpp, cnp));
68992130Sjeff#endif
69028787Sphk
69128787Sphk	if (error == ENOENT)
69228787Sphk		return (error);
69328787Sphk
69465665Sbp	vp = *vpp;
69565665Sbp	vpid = vp->v_id;
69665973Sbp	cnp->cn_flags &= ~PDIRUNLOCK;
69765665Sbp	if (dvp == vp) {   /* lookup on "." */
69865665Sbp		VREF(vp);
69928787Sphk		error = 0;
70028787Sphk	} else if (flags & ISDOTDOT) {
70183366Sjulian		VOP_UNLOCK(dvp, 0, td);
70265973Sbp		cnp->cn_flags |= PDIRUNLOCK;
70396616Sjeff#ifdef LOOKUP_SHARED
70492130Sjeff		if ((flags & ISLASTCN) && (flags & LOCKSHARED))
70592130Sjeff			error = vget(vp, LK_SHARED, td);
70692130Sjeff		else
70792130Sjeff			error = vget(vp, LK_EXCLUSIVE, td);
70892130Sjeff#else
70983366Sjulian		error = vget(vp, LK_EXCLUSIVE, td);
71092130Sjeff#endif
71192130Sjeff
71265973Sbp		if (!error && lockparent && (flags & ISLASTCN)) {
71383366Sjulian			if ((error = vn_lock(dvp, LK_EXCLUSIVE, td)) == 0)
71465973Sbp				cnp->cn_flags &= ~PDIRUNLOCK;
71565973Sbp		}
71628787Sphk	} else {
71796616Sjeff#ifdef LOOKUP_SHARED
71892130Sjeff		if ((flags & ISLASTCN) && (flags & LOCKSHARED))
71992130Sjeff			error = vget(vp, LK_SHARED, td);
72092130Sjeff		else
72192130Sjeff			error = vget(vp, LK_EXCLUSIVE, td);
72292130Sjeff#else
72383366Sjulian		error = vget(vp, LK_EXCLUSIVE, td);
72492130Sjeff#endif
72565973Sbp		if (!lockparent || error || !(flags & ISLASTCN)) {
72683366Sjulian			VOP_UNLOCK(dvp, 0, td);
72765973Sbp			cnp->cn_flags |= PDIRUNLOCK;
72865973Sbp		}
72928787Sphk	}
73028787Sphk	/*
73128787Sphk	 * Check that the capability number did not change
73228787Sphk	 * while we were waiting for the lock.
73328787Sphk	 */
73428787Sphk	if (!error) {
73565665Sbp		if (vpid == vp->v_id)
73628787Sphk			return (0);
73765665Sbp		vput(vp);
73865973Sbp		if (lockparent && dvp != vp && (flags & ISLASTCN)) {
73983366Sjulian			VOP_UNLOCK(dvp, 0, td);
74065973Sbp			cnp->cn_flags |= PDIRUNLOCK;
74165973Sbp		}
74228787Sphk	}
74365973Sbp	if (cnp->cn_flags & PDIRUNLOCK) {
74483366Sjulian		error = vn_lock(dvp, LK_EXCLUSIVE, td);
74565973Sbp		if (error)
74665973Sbp			return (error);
74765973Sbp		cnp->cn_flags &= ~PDIRUNLOCK;
74865973Sbp	}
74996616Sjeff#ifdef LOOKUP_SHARED
75092130Sjeff	error = VOP_CACHEDLOOKUP(dvp, vpp, cnp);
75192130Sjeff
75292130Sjeff	if (!error) {
75392130Sjeff		int flock = 0;
75492130Sjeff
75592130Sjeff		flock = VOP_ISLOCKED(*vpp, td);
75692130Sjeff		if (flock != LK_EXCLUSIVE) {
75792130Sjeff			if (flock == 0) {
75892130Sjeff				if ((flags & ISLASTCN) && (flags & LOCKSHARED))
75992130Sjeff					VOP_LOCK(*vpp, LK_SHARED, td);
76092130Sjeff				else
76192130Sjeff					VOP_LOCK(*vpp, LK_EXCLUSIVE, td);
76292130Sjeff			}
76392130Sjeff		} else if ((flags & ISLASTCN) && (flags & LOCKSHARED))
76492130Sjeff			VOP_LOCK(*vpp, LK_DOWNGRADE, td);
76592130Sjeff	}
76692130Sjeff
76792130Sjeff	return (error);
76892130Sjeff#else
76965665Sbp	return (VOP_CACHEDLOOKUP(dvp, vpp, cnp));
77092130Sjeff#endif
77128787Sphk}
77251906Sphk
77351906Sphk
77451906Sphk#ifndef _SYS_SYSPROTO_H_
77551906Sphkstruct  __getcwd_args {
77651906Sphk	u_char	*buf;
77751906Sphk	u_int	buflen;
77851906Sphk};
77951906Sphk#endif
78051906Sphk
78191690Seivind/*
78291690Seivind * XXX All of these sysctls would probably be more productive dead.
78391690Seivind */
78451906Sphkstatic int disablecwd;
78591690SeivindSYSCTL_INT(_debug, OID_AUTO, disablecwd, CTLFLAG_RW, &disablecwd, 0,
78691690Seivind   "Disable the getcwd syscall");
78751906Sphk
78891690Seivind/* Various statistics for the getcwd syscall */
78951906Sphkstatic u_long numcwdcalls; STATNODE(CTLFLAG_RD, numcwdcalls, &numcwdcalls);
79051906Sphkstatic u_long numcwdfail1; STATNODE(CTLFLAG_RD, numcwdfail1, &numcwdfail1);
79151906Sphkstatic u_long numcwdfail2; STATNODE(CTLFLAG_RD, numcwdfail2, &numcwdfail2);
79251906Sphkstatic u_long numcwdfail3; STATNODE(CTLFLAG_RD, numcwdfail3, &numcwdfail3);
79351906Sphkstatic u_long numcwdfail4; STATNODE(CTLFLAG_RD, numcwdfail4, &numcwdfail4);
79451906Sphkstatic u_long numcwdfound; STATNODE(CTLFLAG_RD, numcwdfound, &numcwdfound);
79591690Seivind
79691690Seivind/* Implementation of the getcwd syscall */
79751906Sphkint
79883366Sjulian__getcwd(td, uap)
79983366Sjulian	struct thread *td;
80051906Sphk	struct __getcwd_args *uap;
80151906Sphk{
802112430Sphk
803102870Siedowse	return (kern___getcwd(td, uap->buf, UIO_USERSPACE, uap->buflen));
804102870Siedowse}
805102870Siedowse
806102870Siedowseint
807112430Sphkkern___getcwd(struct thread *td, u_char *buf, enum uio_seg bufseg, u_int buflen)
808102870Siedowse{
809102870Siedowse	char *bp, *tmpbuf;
81051906Sphk	int error, i, slash_prefixed;
81151906Sphk	struct filedesc *fdp;
81251906Sphk	struct namecache *ncp;
81351906Sphk	struct vnode *vp;
81451906Sphk
81551906Sphk	numcwdcalls++;
816112430Sphk	if (disablecwd)
81751906Sphk		return (ENODEV);
818102870Siedowse	if (buflen < 2)
81951906Sphk		return (EINVAL);
820102870Siedowse	if (buflen > MAXPATHLEN)
821102870Siedowse		buflen = MAXPATHLEN;
822102870Siedowse	error = 0;
823111119Simp	tmpbuf = bp = malloc(buflen, M_TEMP, M_WAITOK);
824102870Siedowse	bp += buflen - 1;
82551906Sphk	*bp = '\0';
82683366Sjulian	fdp = td->td_proc->p_fd;
82751906Sphk	slash_prefixed = 0;
82889306Salfred	FILEDESC_LOCK(fdp);
82951906Sphk	for (vp = fdp->fd_cdir; vp != fdp->fd_rdir && vp != rootvnode;) {
830101308Sjeff		if (vp->v_vflag & VV_ROOT) {
83183000Siedowse			if (vp->v_mount == NULL) {	/* forced unmount */
83289306Salfred				FILEDESC_UNLOCK(fdp);
833102870Siedowse				free(tmpbuf, M_TEMP);
83457199Speter				return (EBADF);
83583000Siedowse			}
83651906Sphk			vp = vp->v_mount->mnt_vnodecovered;
83751906Sphk			continue;
83851906Sphk		}
83951906Sphk		if (vp->v_dd->v_id != vp->v_ddid) {
84089306Salfred			FILEDESC_UNLOCK(fdp);
84151906Sphk			numcwdfail1++;
842102870Siedowse			free(tmpbuf, M_TEMP);
84351906Sphk			return (ENOTDIR);
84451906Sphk		}
845120792Sjeff		CACHE_LOCK();
84651906Sphk		ncp = TAILQ_FIRST(&vp->v_cache_dst);
84751906Sphk		if (!ncp) {
848120792Sjeff			numcwdfail2++;
849120792Sjeff			CACHE_UNLOCK();
85089306Salfred			FILEDESC_UNLOCK(fdp);
851102870Siedowse			free(tmpbuf, M_TEMP);
85251906Sphk			return (ENOENT);
85351906Sphk		}
85451906Sphk		if (ncp->nc_dvp != vp->v_dd) {
855120792Sjeff			numcwdfail3++;
856120792Sjeff			CACHE_UNLOCK();
85789306Salfred			FILEDESC_UNLOCK(fdp);
858102870Siedowse			free(tmpbuf, M_TEMP);
85951906Sphk			return (EBADF);
86051906Sphk		}
86151906Sphk		for (i = ncp->nc_nlen - 1; i >= 0; i--) {
862102870Siedowse			if (bp == tmpbuf) {
863120792Sjeff				numcwdfail4++;
864120792Sjeff				CACHE_UNLOCK();
86589306Salfred				FILEDESC_UNLOCK(fdp);
866102870Siedowse				free(tmpbuf, M_TEMP);
86751906Sphk				return (ENOMEM);
86851906Sphk			}
86951906Sphk			*--bp = ncp->nc_name[i];
87051906Sphk		}
871102870Siedowse		if (bp == tmpbuf) {
872120792Sjeff			numcwdfail4++;
873120792Sjeff			CACHE_UNLOCK();
87489306Salfred			FILEDESC_UNLOCK(fdp);
875102870Siedowse			free(tmpbuf, M_TEMP);
87651906Sphk			return (ENOMEM);
87751906Sphk		}
87851906Sphk		*--bp = '/';
87951906Sphk		slash_prefixed = 1;
88051906Sphk		vp = vp->v_dd;
881120792Sjeff		CACHE_UNLOCK();
88251906Sphk	}
88389306Salfred	FILEDESC_UNLOCK(fdp);
88451906Sphk	if (!slash_prefixed) {
885102870Siedowse		if (bp == tmpbuf) {
88651906Sphk			numcwdfail4++;
887102870Siedowse			free(tmpbuf, M_TEMP);
88851906Sphk			return (ENOMEM);
88951906Sphk		}
89051906Sphk		*--bp = '/';
89151906Sphk	}
89251906Sphk	numcwdfound++;
893102870Siedowse	if (bufseg == UIO_SYSSPACE)
894102870Siedowse		bcopy(bp, buf, strlen(bp) + 1);
895102870Siedowse	else
896102870Siedowse		error = copyout(bp, buf, strlen(bp) + 1);
897102870Siedowse	free(tmpbuf, M_TEMP);
89851906Sphk	return (error);
89951906Sphk}
90051906Sphk
90159652Sgreen/*
90259652Sgreen * Thus begins the fullpath magic.
90359652Sgreen */
90459652Sgreen
90559652Sgreen#undef STATNODE
90659652Sgreen#define STATNODE(name)							\
90759652Sgreen	static u_int name;						\
90862622Sjhb	SYSCTL_UINT(_vfs_cache, OID_AUTO, name, CTLFLAG_RD, &name, 0, "")
90959652Sgreen
91059652Sgreenstatic int disablefullpath;
91191690SeivindSYSCTL_INT(_debug, OID_AUTO, disablefullpath, CTLFLAG_RW, &disablefullpath, 0,
91291690Seivind	"Disable the vn_fullpath function");
91359652Sgreen
91459652SgreenSTATNODE(numfullpathcalls);
91559652SgreenSTATNODE(numfullpathfail1);
91659652SgreenSTATNODE(numfullpathfail2);
91759652SgreenSTATNODE(numfullpathfail3);
91859652SgreenSTATNODE(numfullpathfail4);
91959652SgreenSTATNODE(numfullpathfound);
92059652Sgreen
92191690Seivind/*
92291690Seivind * Retrieve the full filesystem path that correspond to a vnode from the name
92391690Seivind * cache (if available)
92491690Seivind */
92559652Sgreenint
92685287Sdesvn_fullpath(struct thread *td, struct vnode *vn, char **retbuf, char **freebuf)
92785287Sdes{
92859652Sgreen	char *bp, *buf;
92959652Sgreen	int i, slash_prefixed;
93059652Sgreen	struct filedesc *fdp;
93159652Sgreen	struct namecache *ncp;
93285287Sdes	struct vnode *vp;
93359652Sgreen
93459652Sgreen	numfullpathcalls++;
93559652Sgreen	if (disablefullpath)
93659652Sgreen		return (ENODEV);
93785287Sdes	if (vn == NULL)
93859652Sgreen		return (EINVAL);
939111119Simp	buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
94059652Sgreen	bp = buf + MAXPATHLEN - 1;
94159652Sgreen	*bp = '\0';
94285287Sdes	fdp = td->td_proc->p_fd;
94359652Sgreen	slash_prefixed = 0;
944120792Sjeff	ASSERT_VOP_LOCKED(vn, "vn_fullpath");
94589306Salfred	FILEDESC_LOCK(fdp);
94685287Sdes	for (vp = vn; vp != fdp->fd_rdir && vp != rootvnode;) {
947101308Sjeff		if (vp->v_vflag & VV_ROOT) {
94859652Sgreen			if (vp->v_mount == NULL) {	/* forced unmount */
94989306Salfred				FILEDESC_UNLOCK(fdp);
95059652Sgreen				free(buf, M_TEMP);
95159652Sgreen				return (EBADF);
95259652Sgreen			}
95359652Sgreen			vp = vp->v_mount->mnt_vnodecovered;
95459652Sgreen			continue;
95559652Sgreen		}
95685287Sdes		if (vp != vn && vp->v_dd->v_id != vp->v_ddid) {
95789306Salfred			FILEDESC_UNLOCK(fdp);
958120792Sjeff			free(buf, M_TEMP);
95959652Sgreen			numfullpathfail1++;
96059652Sgreen			return (ENOTDIR);
96159652Sgreen		}
962120792Sjeff		CACHE_LOCK();
96359652Sgreen		ncp = TAILQ_FIRST(&vp->v_cache_dst);
96459652Sgreen		if (!ncp) {
965120792Sjeff			numfullpathfail2++;
966120792Sjeff			CACHE_UNLOCK();
96789306Salfred			FILEDESC_UNLOCK(fdp);
96859652Sgreen			free(buf, M_TEMP);
96959652Sgreen			return (ENOENT);
97059652Sgreen		}
97185287Sdes		if (vp != vn && ncp->nc_dvp != vp->v_dd) {
972120792Sjeff			numfullpathfail3++;
973120792Sjeff			CACHE_UNLOCK();
97489306Salfred			FILEDESC_UNLOCK(fdp);
97559652Sgreen			free(buf, M_TEMP);
97659652Sgreen			return (EBADF);
97759652Sgreen		}
97859652Sgreen		for (i = ncp->nc_nlen - 1; i >= 0; i--) {
97959652Sgreen			if (bp == buf) {
980120792Sjeff				numfullpathfail4++;
981120792Sjeff				CACHE_UNLOCK();
98289306Salfred				FILEDESC_UNLOCK(fdp);
98359652Sgreen				free(buf, M_TEMP);
98459652Sgreen				return (ENOMEM);
98559652Sgreen			}
98659652Sgreen			*--bp = ncp->nc_name[i];
98759652Sgreen		}
98859652Sgreen		if (bp == buf) {
989120792Sjeff			numfullpathfail4++;
990120792Sjeff			CACHE_UNLOCK();
99189306Salfred			FILEDESC_UNLOCK(fdp);
99259652Sgreen			free(buf, M_TEMP);
99359652Sgreen			return (ENOMEM);
99459652Sgreen		}
99559652Sgreen		*--bp = '/';
99659652Sgreen		slash_prefixed = 1;
99759652Sgreen		vp = ncp->nc_dvp;
998120792Sjeff		CACHE_UNLOCK();
99959652Sgreen	}
100059652Sgreen	if (!slash_prefixed) {
100159652Sgreen		if (bp == buf) {
1002120792Sjeff			numfullpathfail4++;
100389306Salfred			FILEDESC_UNLOCK(fdp);
100459652Sgreen			free(buf, M_TEMP);
100559652Sgreen			return (ENOMEM);
100659652Sgreen		}
100759652Sgreen		*--bp = '/';
100859652Sgreen	}
100989306Salfred	FILEDESC_UNLOCK(fdp);
101059652Sgreen	numfullpathfound++;
1011116201Sdes	*retbuf = bp;
101285287Sdes	*freebuf = buf;
101359652Sgreen	return (0);
101459652Sgreen}
1015