vfs_cache.c revision 92130
11541Srgrimes/*
222521Sdyson * Copyright (c) 1989, 1993, 1995
31541Srgrimes *	The Regents of the University of California.  All rights reserved.
41541Srgrimes *
522521Sdyson * This code is derived from software contributed to Berkeley by
622521Sdyson * Poul-Henning Kamp of the FreeBSD Project.
722521Sdyson *
81541Srgrimes * Redistribution and use in source and binary forms, with or without
91541Srgrimes * modification, are permitted provided that the following conditions
101541Srgrimes * are met:
111541Srgrimes * 1. Redistributions of source code must retain the above copyright
121541Srgrimes *    notice, this list of conditions and the following disclaimer.
131541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
141541Srgrimes *    notice, this list of conditions and the following disclaimer in the
151541Srgrimes *    documentation and/or other materials provided with the distribution.
161541Srgrimes * 3. All advertising materials mentioning features or use of this software
171541Srgrimes *    must display the following acknowledgement:
181541Srgrimes *	This product includes software developed by the University of
191541Srgrimes *	California, Berkeley and its contributors.
201541Srgrimes * 4. Neither the name of the University nor the names of its contributors
211541Srgrimes *    may be used to endorse or promote products derived from this software
221541Srgrimes *    without specific prior written permission.
231541Srgrimes *
241541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
251541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
261541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
271541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
281541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
291541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
301541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
311541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
321541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
331541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
341541Srgrimes * SUCH DAMAGE.
351541Srgrimes *
3623521Sbde *	@(#)vfs_cache.c	8.5 (Berkeley) 3/22/95
3750477Speter * $FreeBSD: head/sys/kern/vfs_cache.c 92130 2002-03-12 04:00:11Z jeff $
381541Srgrimes */
391541Srgrimes
401541Srgrimes#include <sys/param.h>
411541Srgrimes#include <sys/systm.h>
4212820Sphk#include <sys/kernel.h>
4376166Smarkm#include <sys/lock.h>
4489316Salfred#include <sys/mutex.h>
4512820Sphk#include <sys/sysctl.h>
461541Srgrimes#include <sys/mount.h>
471541Srgrimes#include <sys/vnode.h>
481541Srgrimes#include <sys/namei.h>
491541Srgrimes#include <sys/malloc.h>
5051906Sphk#include <sys/sysproto.h>
5151906Sphk#include <sys/proc.h>
5251906Sphk#include <sys/filedesc.h>
5374384Speter#include <sys/fnv_hash.h>
541541Srgrimes
5551906Sphk/*
5659652Sgreen * This structure describes the elements in the cache of recent
5759652Sgreen * names looked up by namei.
5859652Sgreen */
5959652Sgreen
6059652Sgreenstruct	namecache {
6160938Sjake	LIST_ENTRY(namecache) nc_hash;	/* hash chain */
6260938Sjake	LIST_ENTRY(namecache) nc_src;	/* source vnode list */
6360938Sjake	TAILQ_ENTRY(namecache) nc_dst;	/* destination vnode list */
6459652Sgreen	struct	vnode *nc_dvp;		/* vnode of parent of name */
6559652Sgreen	struct	vnode *nc_vp;		/* vnode the name refers to */
6659652Sgreen	u_char	nc_flag;		/* flag bits */
6759652Sgreen	u_char	nc_nlen;		/* length of name */
6859652Sgreen	char	nc_name[0];		/* segment name */
6959652Sgreen};
7059652Sgreen
7159652Sgreen/*
721541Srgrimes * Name caching works as follows:
731541Srgrimes *
741541Srgrimes * Names found by directory scans are retained in a cache
751541Srgrimes * for future reference.  It is managed LRU, so frequently
761541Srgrimes * used names will hang around.  Cache is indexed by hash value
771541Srgrimes * obtained from (vp, name) where vp refers to the directory
781541Srgrimes * containing name.
791541Srgrimes *
8022521Sdyson * If it is a "negative" entry, (i.e. for a name that is known NOT to
8122521Sdyson * exist) the vnode pointer will be NULL.
826968Sphk *
831541Srgrimes * Upon reaching the last segment of a path, if the reference
841541Srgrimes * is for DELETE, or NOCACHE is set (rewrite), and the
851541Srgrimes * name is located in the cache, it will be dropped.
861541Srgrimes */
871541Srgrimes
881541Srgrimes/*
891541Srgrimes * Structures associated with name cacheing.
901541Srgrimes */
9174501Speter#define NCHHASH(hash) \
9274501Speter	(&nchashtbl[(hash) & nchash])
9360938Sjakestatic LIST_HEAD(nchashhead, namecache) *nchashtbl;	/* Hash Table */
9460938Sjakestatic TAILQ_HEAD(, namecache) ncneg;	/* Hash Table */
9523521Sbdestatic u_long	nchash;			/* size of hash table */
9662622SjhbSYSCTL_ULONG(_debug, OID_AUTO, nchash, CTLFLAG_RD, &nchash, 0, "");
9725453Sphkstatic u_long	ncnegfactor = 16;	/* ratio of negative entries */
9862622SjhbSYSCTL_ULONG(_debug, OID_AUTO, ncnegfactor, CTLFLAG_RW, &ncnegfactor, 0, "");
9991690Seivindstatic u_long	numneg;			/* number of cache entries allocated */
10062622SjhbSYSCTL_ULONG(_debug, OID_AUTO, numneg, CTLFLAG_RD, &numneg, 0, "");
10123521Sbdestatic u_long	numcache;		/* number of cache entries allocated */
10262622SjhbSYSCTL_ULONG(_debug, OID_AUTO, numcache, CTLFLAG_RD, &numcache, 0, "");
10375654Stanimurastatic u_long	numcachehv;		/* number of cache entries with vnodes held */
10475654StanimuraSYSCTL_ULONG(_debug, OID_AUTO, numcachehv, CTLFLAG_RD, &numcachehv, 0, "");
10584249Sdillon#if 0
10675654Stanimurastatic u_long	numcachepl;		/* number of cache purge for leaf entries */
10775654StanimuraSYSCTL_ULONG(_debug, OID_AUTO, numcachepl, CTLFLAG_RD, &numcachepl, 0, "");
10884249Sdillon#endif
10922521Sdysonstruct	nchstats nchstats;		/* cache effectiveness statistics */
1101541Srgrimes
11123521Sbdestatic int	doingcache = 1;		/* 1 => enable the cache */
11223521SbdeSYSCTL_INT(_debug, OID_AUTO, vfscache, CTLFLAG_RW, &doingcache, 0, "");
11391690Seivind
11491690Seivind/* Export size information to userland */
11525453SphkSYSCTL_INT(_debug, OID_AUTO, vnsize, CTLFLAG_RD, 0, sizeof(struct vnode), "");
11625453SphkSYSCTL_INT(_debug, OID_AUTO, ncsize, CTLFLAG_RD, 0, sizeof(struct namecache), "");
11723521Sbde
11829788Sphk/*
11929788Sphk * The new name cache statistics
12029788Sphk */
12138984SbdeSYSCTL_NODE(_vfs, OID_AUTO, cache, CTLFLAG_RW, 0, "Name cache statistics");
12229788Sphk#define STATNODE(mode, name, var) \
12362622Sjhb	SYSCTL_ULONG(_vfs_cache, OID_AUTO, name, mode, var, 0, "");
12429788SphkSTATNODE(CTLFLAG_RD, numneg, &numneg);
12529788SphkSTATNODE(CTLFLAG_RD, numcache, &numcache);
12629788Sphkstatic u_long numcalls; STATNODE(CTLFLAG_RD, numcalls, &numcalls);
12729788Sphkstatic u_long dothits; STATNODE(CTLFLAG_RD, dothits, &dothits);
12829788Sphkstatic u_long dotdothits; STATNODE(CTLFLAG_RD, dotdothits, &dotdothits);
12929788Sphkstatic u_long numchecks; STATNODE(CTLFLAG_RD, numchecks, &numchecks);
13029788Sphkstatic u_long nummiss; STATNODE(CTLFLAG_RD, nummiss, &nummiss);
13129804Sphkstatic u_long nummisszap; STATNODE(CTLFLAG_RD, nummisszap, &nummisszap);
13229788Sphkstatic u_long numposzaps; STATNODE(CTLFLAG_RD, numposzaps, &numposzaps);
13329788Sphkstatic u_long numposhits; STATNODE(CTLFLAG_RD, numposhits, &numposhits);
13429788Sphkstatic u_long numnegzaps; STATNODE(CTLFLAG_RD, numnegzaps, &numnegzaps);
13529788Sphkstatic u_long numneghits; STATNODE(CTLFLAG_RD, numneghits, &numneghits);
13629788Sphk
13768922SrwatsonSYSCTL_OPAQUE(_vfs_cache, OID_AUTO, nchstats, CTLFLAG_RD, &nchstats,
13868922Srwatson        sizeof(nchstats), "LU", "VFS cache effectiveness statistics");
13929788Sphk
14068922Srwatson
14168922Srwatson
14225453Sphkstatic void cache_zap __P((struct namecache *ncp));
1436968Sphk
14469774Sphkstatic MALLOC_DEFINE(M_VFSCACHE, "vfscache", "VFS name cache entries");
14551906Sphk
14622521Sdyson/*
14725453Sphk * Flags in namecache.nc_flag
14825453Sphk */
14925453Sphk#define NCF_WHITE	1
15075402Speter
15125453Sphk/*
15275402Speter * Grab an atomic snapshot of the name cache hash chain lengths
15375402Speter */
15475402SpeterSYSCTL_NODE(_debug, OID_AUTO, hashstat, CTLFLAG_RW, NULL, "hash table stats");
15575402Speter
15675402Speterstatic int
15775402Spetersysctl_debug_hashstat_rawnchash(SYSCTL_HANDLER_ARGS)
15875402Speter{
15975402Speter	int error;
16075402Speter	struct nchashhead *ncpp;
16175402Speter	struct namecache *ncp;
16275402Speter	int n_nchash;
16375402Speter	int count;
16475402Speter
16575402Speter	n_nchash = nchash + 1;	/* nchash is max index, not count */
16675402Speter	if (!req->oldptr)
16775402Speter		return SYSCTL_OUT(req, 0, n_nchash * sizeof(int));
16875402Speter
16975402Speter	/* Scan hash tables for applicable entries */
17075402Speter	for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) {
17175402Speter		count = 0;
17275402Speter		LIST_FOREACH(ncp, ncpp, nc_hash) {
17375402Speter			count++;
17475402Speter		}
17575402Speter		error = SYSCTL_OUT(req, (caddr_t)&count, sizeof(count));
17675402Speter		if (error)
17775402Speter			return (error);
17875402Speter	}
17975402Speter	return (0);
18075402Speter}
18175402SpeterSYSCTL_PROC(_debug_hashstat, OID_AUTO, rawnchash, CTLTYPE_INT|CTLFLAG_RD,
18275402Speter	0, 0, sysctl_debug_hashstat_rawnchash, "S,int", "nchash chain lengths");
18375402Speter
18475402Speterstatic int
18575402Spetersysctl_debug_hashstat_nchash(SYSCTL_HANDLER_ARGS)
18675402Speter{
18775402Speter	int error;
18875402Speter	struct nchashhead *ncpp;
18975402Speter	struct namecache *ncp;
19075402Speter	int n_nchash;
19175402Speter	int count, maxlength, used, pct;
19275402Speter
19375402Speter	if (!req->oldptr)
19475402Speter		return SYSCTL_OUT(req, 0, 4 * sizeof(int));
19575402Speter
19675402Speter	n_nchash = nchash + 1;	/* nchash is max index, not count */
19775402Speter	used = 0;
19875402Speter	maxlength = 0;
19975402Speter
20075402Speter	/* Scan hash tables for applicable entries */
20175402Speter	for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) {
20275402Speter		count = 0;
20375402Speter		LIST_FOREACH(ncp, ncpp, nc_hash) {
20475402Speter			count++;
20575402Speter		}
20675402Speter		if (count)
20775402Speter			used++;
20875402Speter		if (maxlength < count)
20975402Speter			maxlength = count;
21075402Speter	}
21175402Speter	n_nchash = nchash + 1;
21275402Speter	pct = (used * 100 * 100) / n_nchash;
21375402Speter	error = SYSCTL_OUT(req, (caddr_t)&n_nchash, sizeof(n_nchash));
21475402Speter	if (error)
21575402Speter		return (error);
21675402Speter	error = SYSCTL_OUT(req, (caddr_t)&used, sizeof(used));
21775402Speter	if (error)
21875402Speter		return (error);
21975402Speter	error = SYSCTL_OUT(req, (caddr_t)&maxlength, sizeof(maxlength));
22075402Speter	if (error)
22175402Speter		return (error);
22275402Speter	error = SYSCTL_OUT(req, (caddr_t)&pct, sizeof(pct));
22375402Speter	if (error)
22475402Speter		return (error);
22575402Speter	return (0);
22675402Speter}
22775402SpeterSYSCTL_PROC(_debug_hashstat, OID_AUTO, nchash, CTLTYPE_INT|CTLFLAG_RD,
22875402Speter	0, 0, sysctl_debug_hashstat_nchash, "I", "nchash chain lengths");
22975402Speter
23075402Speter/*
23122521Sdyson * Delete an entry from its hash list and move it to the front
23222521Sdyson * of the LRU list for immediate reuse.
23322521Sdyson */
23425453Sphkstatic void
23525453Sphkcache_zap(ncp)
23625453Sphk	struct namecache *ncp;
23725453Sphk{
23825453Sphk	LIST_REMOVE(ncp, nc_hash);
23925453Sphk	LIST_REMOVE(ncp, nc_src);
24075654Stanimura	if (LIST_EMPTY(&ncp->nc_dvp->v_cache_src)) {
24128954Sphk		vdrop(ncp->nc_dvp);
24275654Stanimura		numcachehv--;
24375654Stanimura	}
24425453Sphk	if (ncp->nc_vp) {
24525453Sphk		TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_dst);
24625453Sphk	} else {
24725453Sphk		TAILQ_REMOVE(&ncneg, ncp, nc_dst);
24825453Sphk		numneg--;
24925453Sphk	}
25025453Sphk	numcache--;
25151906Sphk	free(ncp, M_VFSCACHE);
25222521Sdyson}
2536968Sphk
25422521Sdyson/*
25584249Sdillon * cache_leaf_test()
25684249Sdillon *
25784249Sdillon *      Test whether this (directory) vnode's namei cache entry contains
25884249Sdillon *      subdirectories or not.  Used to determine whether the directory is
25984249Sdillon *      a leaf in the namei cache or not.  Note: the directory may still
26084249Sdillon *      contain files in the namei cache.
26184249Sdillon *
26284249Sdillon *      Returns 0 if the directory is a leaf, -1 if it isn't.
26384249Sdillon */
26484249Sdillonint
26584249Sdilloncache_leaf_test(struct vnode *vp)
26684249Sdillon{
26784249Sdillon	struct namecache *ncpc;
26884249Sdillon
26984249Sdillon	for (ncpc = LIST_FIRST(&vp->v_cache_src);
27084249Sdillon	     ncpc != NULL;
27184249Sdillon	     ncpc = LIST_NEXT(ncpc, nc_src)
27284249Sdillon	) {
27384249Sdillon		if (ncpc->nc_vp != NULL && ncpc->nc_vp->v_type == VDIR)
27484249Sdillon			return(-1);
27584249Sdillon	}
27684249Sdillon	return(0);
27784249Sdillon}
27884249Sdillon
27984249Sdillon/*
28023521Sbde * Lookup an entry in the cache
2816968Sphk *
28223521Sbde * We don't do this if the segment name is long, simply so the cache
2836968Sphk * can avoid holding long names (which would either waste space, or
2841541Srgrimes * add greatly to the complexity).
2851541Srgrimes *
2866968Sphk * Lookup is called with dvp pointing to the directory to search,
28722521Sdyson * cnp pointing to the name of the entry being sought. If the lookup
28822521Sdyson * succeeds, the vnode is returned in *vpp, and a status of -1 is
28922521Sdyson * returned. If the lookup determines that the name does not exist
29022521Sdyson * (negative cacheing), a status of ENOENT is returned. If the lookup
29122521Sdyson * fails, a status of zero is returned.
2921541Srgrimes */
2936968Sphk
2941541Srgrimesint
2951541Srgrimescache_lookup(dvp, vpp, cnp)
2961541Srgrimes	struct vnode *dvp;
2971541Srgrimes	struct vnode **vpp;
2981541Srgrimes	struct componentname *cnp;
2991541Srgrimes{
30051906Sphk	struct namecache *ncp;
30174384Speter	u_int32_t hash;
3021541Srgrimes
3036928Sphk	if (!doingcache) {
3046928Sphk		cnp->cn_flags &= ~MAKEENTRY;
3051541Srgrimes		return (0);
3066928Sphk	}
30725453Sphk
30829788Sphk	numcalls++;
30929788Sphk
31025453Sphk	if (cnp->cn_nameptr[0] == '.') {
31125453Sphk		if (cnp->cn_namelen == 1) {
31225453Sphk			*vpp = dvp;
31329788Sphk			dothits++;
31425453Sphk			return (-1);
31525453Sphk		}
31625453Sphk		if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') {
31729788Sphk			dotdothits++;
31825453Sphk			if (dvp->v_dd->v_id != dvp->v_ddid ||
31925453Sphk			    (cnp->cn_flags & MAKEENTRY) == 0) {
32025453Sphk				dvp->v_ddid = 0;
32125453Sphk				return (0);
32225453Sphk			}
32325453Sphk			*vpp = dvp->v_dd;
32425453Sphk			return (-1);
32525453Sphk		}
3261541Srgrimes	}
3276968Sphk
32874501Speter	hash = fnv_32_buf(cnp->cn_nameptr, cnp->cn_namelen, FNV1_32_INIT);
32974501Speter	hash = fnv_32_buf(&dvp->v_id, sizeof(dvp->v_id), hash);
33074501Speter	LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) {
33129788Sphk		numchecks++;
33225453Sphk		if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen &&
33331879Sbde		    !bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen))
33422521Sdyson			break;
3351541Srgrimes	}
3366968Sphk
33722521Sdyson	/* We failed to find an entry */
33822521Sdyson	if (ncp == 0) {
33929804Sphk		if ((cnp->cn_flags & MAKEENTRY) == 0) {
34029804Sphk			nummisszap++;
34129804Sphk		} else {
34229804Sphk			nummiss++;
34329804Sphk		}
34422521Sdyson		nchstats.ncs_miss++;
34522521Sdyson		return (0);
34622521Sdyson	}
34722521Sdyson
3486968Sphk	/* We don't want to have an entry, so dump it */
3496928Sphk	if ((cnp->cn_flags & MAKEENTRY) == 0) {
35029788Sphk		numposzaps++;
3511541Srgrimes		nchstats.ncs_badhits++;
35225453Sphk		cache_zap(ncp);
3536968Sphk		return (0);
35423521Sbde	}
3556968Sphk
3566968Sphk	/* We found a "positive" match, return the vnode */
35722521Sdyson        if (ncp->nc_vp) {
35829788Sphk		numposhits++;
3591541Srgrimes		nchstats.ncs_goodhits++;
3601541Srgrimes		*vpp = ncp->nc_vp;
3611541Srgrimes		return (-1);
3621541Srgrimes	}
3631541Srgrimes
3646968Sphk	/* We found a negative match, and want to create it, so purge */
3656968Sphk	if (cnp->cn_nameiop == CREATE) {
36629788Sphk		numnegzaps++;
3677013Sphk		nchstats.ncs_badhits++;
36825453Sphk		cache_zap(ncp);
3696968Sphk		return (0);
3706968Sphk	}
3716968Sphk
37229788Sphk	numneghits++;
37322521Sdyson	/*
37422521Sdyson	 * We found a "negative" match, ENOENT notifies client of this match.
37522521Sdyson	 * The nc_vpid field records whether this is a whiteout.
37622521Sdyson	 */
37725453Sphk	TAILQ_REMOVE(&ncneg, ncp, nc_dst);
37825453Sphk	TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst);
3796968Sphk	nchstats.ncs_neghits++;
38025453Sphk	if (ncp->nc_flag & NCF_WHITE)
38125453Sphk		cnp->cn_flags |= ISWHITEOUT;
3826968Sphk	return (ENOENT);
3831541Srgrimes}
3841541Srgrimes
3851541Srgrimes/*
3866968Sphk * Add an entry to the cache.
3871541Srgrimes */
3881549Srgrimesvoid
3891541Srgrimescache_enter(dvp, vp, cnp)
3901541Srgrimes	struct vnode *dvp;
3911541Srgrimes	struct vnode *vp;
3921541Srgrimes	struct componentname *cnp;
3931541Srgrimes{
39451906Sphk	struct namecache *ncp;
39551906Sphk	struct nchashhead *ncpp;
39674384Speter	u_int32_t hash;
39751906Sphk	int len;
3981541Srgrimes
3991541Srgrimes	if (!doingcache)
4001541Srgrimes		return;
4016968Sphk
40225453Sphk	if (cnp->cn_nameptr[0] == '.') {
40325453Sphk		if (cnp->cn_namelen == 1) {
40425453Sphk			return;
4056928Sphk		}
40625453Sphk		if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') {
40725453Sphk			if (vp) {
40825453Sphk				dvp->v_dd = vp;
40925453Sphk				dvp->v_ddid = vp->v_id;
41025453Sphk			} else {
41125453Sphk				dvp->v_dd = dvp;
41225453Sphk				dvp->v_ddid = 0;
41325453Sphk			}
41425453Sphk			return;
41525453Sphk		}
4166968Sphk	}
41725453Sphk
41825453Sphk	ncp = (struct namecache *)
41951906Sphk		malloc(sizeof *ncp + cnp->cn_namelen, M_VFSCACHE, M_WAITOK);
42025453Sphk	bzero((char *)ncp, sizeof *ncp);
42125453Sphk	numcache++;
42228954Sphk	if (!vp) {
42325453Sphk		numneg++;
42428954Sphk		ncp->nc_flag = cnp->cn_flags & ISWHITEOUT ? NCF_WHITE : 0;
42529071Sphk	} else if (vp->v_type == VDIR) {
42629071Sphk		vp->v_dd = dvp;
42729071Sphk		vp->v_ddid = dvp->v_id;
42828954Sphk	}
42923521Sbde
43022521Sdyson	/*
43122521Sdyson	 * Fill in cache info, if vp is NULL this is a "negative" cache entry.
43222521Sdyson	 * For negative entries, we have to record whether it is a whiteout.
43322521Sdyson	 * the whiteout flag is stored in the nc_vpid field which is
43422521Sdyson	 * otherwise unused.
43522521Sdyson	 */
4361541Srgrimes	ncp->nc_vp = vp;
4371541Srgrimes	ncp->nc_dvp = dvp;
43851906Sphk	len = ncp->nc_nlen = cnp->cn_namelen;
43974501Speter	hash = fnv_32_buf(cnp->cn_nameptr, len, FNV1_32_INIT);
44074384Speter	bcopy(cnp->cn_nameptr, ncp->nc_name, len);
44174501Speter	hash = fnv_32_buf(&dvp->v_id, sizeof(dvp->v_id), hash);
44274501Speter	ncpp = NCHHASH(hash);
4436928Sphk	LIST_INSERT_HEAD(ncpp, ncp, nc_hash);
44475654Stanimura	if (LIST_EMPTY(&dvp->v_cache_src)) {
44528954Sphk		vhold(dvp);
44675654Stanimura		numcachehv++;
44775654Stanimura	}
44825453Sphk	LIST_INSERT_HEAD(&dvp->v_cache_src, ncp, nc_src);
44925453Sphk	if (vp) {
45025453Sphk		TAILQ_INSERT_HEAD(&vp->v_cache_dst, ncp, nc_dst);
45125453Sphk	} else {
45225453Sphk		TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst);
45325453Sphk	}
45451906Sphk	if (numneg * ncnegfactor > numcache) {
45525453Sphk		ncp = TAILQ_FIRST(&ncneg);
45625453Sphk		cache_zap(ncp);
45725453Sphk	}
4581541Srgrimes}
4591541Srgrimes
4601541Srgrimes/*
4611541Srgrimes * Name cache initialization, from vfs_init() when we are booting
4621541Srgrimes */
46369664Speterstatic void
46469664Speternchinit(void *dummy __unused)
4651541Srgrimes{
46623521Sbde
46725453Sphk	TAILQ_INIT(&ncneg);
46869664Speter	nchashtbl = hashinit(desiredvnodes * 2, M_VFSCACHE, &nchash);
4691541Srgrimes}
47069664SpeterSYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nchinit, NULL)
4711541Srgrimes
47269664Speter
4731541Srgrimes/*
47446011Sphk * Invalidate all entries to a particular vnode.
47523521Sbde *
47646011Sphk * Remove all entries in the namecache relating to this vnode and
47746011Sphk * change the v_id.  We take the v_id from a global counter, since
47846011Sphk * it becomes a handy sequence number in crash-dumps that way.
47946011Sphk * No valid vnode will ever have (v_id == 0).
48046011Sphk *
48146011Sphk * XXX: Only time and the size of v_id prevents this from failing:
48246011Sphk * XXX: In theory we should hunt down all (struct vnode*, v_id)
48346011Sphk * XXX: soft references and nuke them, at least on the global
48446011Sphk * XXX: v_id wraparound.  The period of resistance can be extended
48546011Sphk * XXX: by incrementing each vnodes v_id individually instead of
48646011Sphk * XXX: using the global v_id.
4871541Srgrimes */
48846011Sphk
4891549Srgrimesvoid
4901541Srgrimescache_purge(vp)
4911541Srgrimes	struct vnode *vp;
4921541Srgrimes{
49329094Sphk	static u_long nextid;
4941541Srgrimes
49525453Sphk	while (!LIST_EMPTY(&vp->v_cache_src))
49625453Sphk		cache_zap(LIST_FIRST(&vp->v_cache_src));
49725453Sphk	while (!TAILQ_EMPTY(&vp->v_cache_dst))
49825453Sphk		cache_zap(TAILQ_FIRST(&vp->v_cache_dst));
49925453Sphk
50046011Sphk	do
50146011Sphk		nextid++;
50246011Sphk	while (nextid == vp->v_id || !nextid);
50329094Sphk	vp->v_id = nextid;
50425453Sphk	vp->v_dd = vp;
50525453Sphk	vp->v_ddid = 0;
5061541Srgrimes}
5071541Srgrimes
5081541Srgrimes/*
5096968Sphk * Flush all entries referencing a particular filesystem.
5101541Srgrimes *
5116968Sphk * Since we need to check it anyway, we will flush all the invalid
51212968Sphk * entries at the same time.
5131541Srgrimes */
5141549Srgrimesvoid
5151541Srgrimescache_purgevfs(mp)
5161541Srgrimes	struct mount *mp;
5171541Srgrimes{
5186968Sphk	struct nchashhead *ncpp;
51922521Sdyson	struct namecache *ncp, *nnp;
5201541Srgrimes
5216968Sphk	/* Scan hash tables for applicable entries */
52229071Sphk	for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) {
52325453Sphk		for (ncp = LIST_FIRST(ncpp); ncp != 0; ncp = nnp) {
52425453Sphk			nnp = LIST_NEXT(ncp, nc_hash);
52525453Sphk			if (ncp->nc_dvp->v_mount == mp) {
52625453Sphk				cache_zap(ncp);
5276968Sphk			}
5281541Srgrimes		}
5291541Srgrimes	}
5301541Srgrimes}
53128787Sphk
53228787Sphk/*
53328787Sphk * Perform canonical checks and cache lookup and pass on to filesystem
53428787Sphk * through the vop_cachedlookup only if needed.
53528787Sphk */
53628787Sphk
53728787Sphkint
53828787Sphkvfs_cache_lookup(ap)
53928787Sphk	struct vop_lookup_args /* {
54028787Sphk		struct vnode *a_dvp;
54128787Sphk		struct vnode **a_vpp;
54228787Sphk		struct componentname *a_cnp;
54328787Sphk	} */ *ap;
54428787Sphk{
54565665Sbp	struct vnode *dvp, *vp;
54665665Sbp	int lockparent;
54728787Sphk	int error;
54828787Sphk	struct vnode **vpp = ap->a_vpp;
54928787Sphk	struct componentname *cnp = ap->a_cnp;
55028787Sphk	struct ucred *cred = cnp->cn_cred;
55128787Sphk	int flags = cnp->cn_flags;
55283366Sjulian	struct thread *td = cnp->cn_thread;
55328787Sphk	u_long vpid;	/* capability number of vnode */
55428787Sphk
55528787Sphk	*vpp = NULL;
55665665Sbp	dvp = ap->a_dvp;
55728787Sphk	lockparent = flags & LOCKPARENT;
55828787Sphk
55965665Sbp	if (dvp->v_type != VDIR)
56028787Sphk                return (ENOTDIR);
56128787Sphk
56265665Sbp	if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
56328787Sphk	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
56428787Sphk		return (EROFS);
56528787Sphk
56683366Sjulian	error = VOP_ACCESS(dvp, VEXEC, cred, td);
56728787Sphk
56828787Sphk	if (error)
56928787Sphk		return (error);
57028787Sphk
57165665Sbp	error = cache_lookup(dvp, vpp, cnp);
57228787Sphk
57392130Sjeff#ifdef LOOKUP_SHARED
57492130Sjeff	if (!error) {
57592130Sjeff		/* We do this because the rest of the system now expects to get
57692130Sjeff		 * a shared lock, which is later upgraded if LOCKSHARED is not
57792130Sjeff		 * set.  We have so many cases here because of bugs that yield
57892130Sjeff		 * inconsistant lock states.  This all badly needs to be fixed
57992130Sjeff		 */
58092130Sjeff		error = VOP_CACHEDLOOKUP(dvp, vpp, cnp);
58192130Sjeff		if (!error) {
58292130Sjeff			int flock;
58392130Sjeff
58492130Sjeff			flock = VOP_ISLOCKED(*vpp, td);
58592130Sjeff			if (flock != LK_EXCLUSIVE) {
58692130Sjeff				if (flock == 0) {
58792130Sjeff					if ((flags & ISLASTCN) &&
58892130Sjeff					    (flags & LOCKSHARED))
58992130Sjeff						VOP_LOCK(*vpp, LK_SHARED, td);
59092130Sjeff					else
59192130Sjeff						VOP_LOCK(*vpp, LK_EXCLUSIVE, td);
59292130Sjeff				}
59392130Sjeff			} else if ((flags & ISLASTCN) && (flags & LOCKSHARED))
59492130Sjeff				VOP_LOCK(*vpp, LK_DOWNGRADE, td);
59592130Sjeff		}
59692130Sjeff		return (error);
59792130Sjeff	}
59892130Sjeff#else
59928787Sphk	if (!error)
60065665Sbp		return (VOP_CACHEDLOOKUP(dvp, vpp, cnp));
60192130Sjeff#endif
60228787Sphk
60328787Sphk	if (error == ENOENT)
60428787Sphk		return (error);
60528787Sphk
60665665Sbp	vp = *vpp;
60765665Sbp	vpid = vp->v_id;
60865973Sbp	cnp->cn_flags &= ~PDIRUNLOCK;
60965665Sbp	if (dvp == vp) {   /* lookup on "." */
61065665Sbp		VREF(vp);
61128787Sphk		error = 0;
61228787Sphk	} else if (flags & ISDOTDOT) {
61383366Sjulian		VOP_UNLOCK(dvp, 0, td);
61465973Sbp		cnp->cn_flags |= PDIRUNLOCK;
61592130Sjeff#ifdef LOOKUP_SHARED
61692130Sjeff		if ((flags & ISLASTCN) && (flags & LOCKSHARED))
61792130Sjeff			error = vget(vp, LK_SHARED, td);
61892130Sjeff		else
61992130Sjeff			error = vget(vp, LK_EXCLUSIVE, td);
62092130Sjeff#else
62183366Sjulian		error = vget(vp, LK_EXCLUSIVE, td);
62292130Sjeff#endif
62392130Sjeff
62465973Sbp		if (!error && lockparent && (flags & ISLASTCN)) {
62583366Sjulian			if ((error = vn_lock(dvp, LK_EXCLUSIVE, td)) == 0)
62665973Sbp				cnp->cn_flags &= ~PDIRUNLOCK;
62765973Sbp		}
62828787Sphk	} else {
62992130Sjeff#ifdef LOOKUP_SHARED
63092130Sjeff		if ((flags & ISLASTCN) && (flags & LOCKSHARED))
63192130Sjeff			error = vget(vp, LK_SHARED, td);
63292130Sjeff		else
63392130Sjeff			error = vget(vp, LK_EXCLUSIVE, td);
63492130Sjeff#else
63583366Sjulian		error = vget(vp, LK_EXCLUSIVE, td);
63692130Sjeff#endif
63765973Sbp		if (!lockparent || error || !(flags & ISLASTCN)) {
63883366Sjulian			VOP_UNLOCK(dvp, 0, td);
63965973Sbp			cnp->cn_flags |= PDIRUNLOCK;
64065973Sbp		}
64128787Sphk	}
64228787Sphk	/*
64328787Sphk	 * Check that the capability number did not change
64428787Sphk	 * while we were waiting for the lock.
64528787Sphk	 */
64628787Sphk	if (!error) {
64765665Sbp		if (vpid == vp->v_id)
64828787Sphk			return (0);
64965665Sbp		vput(vp);
65065973Sbp		if (lockparent && dvp != vp && (flags & ISLASTCN)) {
65183366Sjulian			VOP_UNLOCK(dvp, 0, td);
65265973Sbp			cnp->cn_flags |= PDIRUNLOCK;
65365973Sbp		}
65428787Sphk	}
65565973Sbp	if (cnp->cn_flags & PDIRUNLOCK) {
65683366Sjulian		error = vn_lock(dvp, LK_EXCLUSIVE, td);
65765973Sbp		if (error)
65865973Sbp			return (error);
65965973Sbp		cnp->cn_flags &= ~PDIRUNLOCK;
66065973Sbp	}
66192130Sjeff#ifdef LOOKUP_SHARED
66292130Sjeff	error = VOP_CACHEDLOOKUP(dvp, vpp, cnp);
66392130Sjeff
66492130Sjeff	if (!error) {
66592130Sjeff		int flock = 0;
66692130Sjeff
66792130Sjeff		flock = VOP_ISLOCKED(*vpp, td);
66892130Sjeff		if (flock != LK_EXCLUSIVE) {
66992130Sjeff			if (flock == 0) {
67092130Sjeff				if ((flags & ISLASTCN) && (flags & LOCKSHARED))
67192130Sjeff					VOP_LOCK(*vpp, LK_SHARED, td);
67292130Sjeff				else
67392130Sjeff					VOP_LOCK(*vpp, LK_EXCLUSIVE, td);
67492130Sjeff			}
67592130Sjeff		} else if ((flags & ISLASTCN) && (flags & LOCKSHARED))
67692130Sjeff			VOP_LOCK(*vpp, LK_DOWNGRADE, td);
67792130Sjeff	}
67892130Sjeff
67992130Sjeff	return (error);
68092130Sjeff#else
68165665Sbp	return (VOP_CACHEDLOOKUP(dvp, vpp, cnp));
68292130Sjeff#endif
68328787Sphk}
68451906Sphk
68551906Sphk
68651906Sphk#ifndef _SYS_SYSPROTO_H_
68751906Sphkstruct  __getcwd_args {
68851906Sphk	u_char	*buf;
68951906Sphk	u_int	buflen;
69051906Sphk};
69151906Sphk#endif
69251906Sphk
69391690Seivind/*
69491690Seivind * XXX All of these sysctls would probably be more productive dead.
69591690Seivind */
69651906Sphkstatic int disablecwd;
69791690SeivindSYSCTL_INT(_debug, OID_AUTO, disablecwd, CTLFLAG_RW, &disablecwd, 0,
69891690Seivind   "Disable the getcwd syscall");
69951906Sphk
70091690Seivind/* Various statistics for the getcwd syscall */
70151906Sphkstatic u_long numcwdcalls; STATNODE(CTLFLAG_RD, numcwdcalls, &numcwdcalls);
70251906Sphkstatic u_long numcwdfail1; STATNODE(CTLFLAG_RD, numcwdfail1, &numcwdfail1);
70351906Sphkstatic u_long numcwdfail2; STATNODE(CTLFLAG_RD, numcwdfail2, &numcwdfail2);
70451906Sphkstatic u_long numcwdfail3; STATNODE(CTLFLAG_RD, numcwdfail3, &numcwdfail3);
70551906Sphkstatic u_long numcwdfail4; STATNODE(CTLFLAG_RD, numcwdfail4, &numcwdfail4);
70651906Sphkstatic u_long numcwdfound; STATNODE(CTLFLAG_RD, numcwdfound, &numcwdfound);
70791690Seivind
70891690Seivind/* Implementation of the getcwd syscall */
70951906Sphkint
71083366Sjulian__getcwd(td, uap)
71183366Sjulian	struct thread *td;
71251906Sphk	struct __getcwd_args *uap;
71351906Sphk{
71451906Sphk	char *bp, *buf;
71551906Sphk	int error, i, slash_prefixed;
71651906Sphk	struct filedesc *fdp;
71751906Sphk	struct namecache *ncp;
71851906Sphk	struct vnode *vp;
71951906Sphk
72051906Sphk	numcwdcalls++;
72151906Sphk	if (disablecwd)
72251906Sphk		return (ENODEV);
72351906Sphk	if (uap->buflen < 2)
72451906Sphk		return (EINVAL);
72551906Sphk	if (uap->buflen > MAXPATHLEN)
72651906Sphk		uap->buflen = MAXPATHLEN;
72751906Sphk	buf = bp = malloc(uap->buflen, M_TEMP, M_WAITOK);
72851906Sphk	bp += uap->buflen - 1;
72951906Sphk	*bp = '\0';
73083366Sjulian	fdp = td->td_proc->p_fd;
73151906Sphk	slash_prefixed = 0;
73289306Salfred	FILEDESC_LOCK(fdp);
73351906Sphk	for (vp = fdp->fd_cdir; vp != fdp->fd_rdir && vp != rootvnode;) {
73451906Sphk		if (vp->v_flag & VROOT) {
73583000Siedowse			if (vp->v_mount == NULL) {	/* forced unmount */
73689306Salfred				FILEDESC_UNLOCK(fdp);
73783000Siedowse				free(buf, M_TEMP);
73857199Speter				return (EBADF);
73983000Siedowse			}
74051906Sphk			vp = vp->v_mount->mnt_vnodecovered;
74151906Sphk			continue;
74251906Sphk		}
74351906Sphk		if (vp->v_dd->v_id != vp->v_ddid) {
74489306Salfred			FILEDESC_UNLOCK(fdp);
74551906Sphk			numcwdfail1++;
74651906Sphk			free(buf, M_TEMP);
74751906Sphk			return (ENOTDIR);
74851906Sphk		}
74951906Sphk		ncp = TAILQ_FIRST(&vp->v_cache_dst);
75051906Sphk		if (!ncp) {
75189306Salfred			FILEDESC_UNLOCK(fdp);
75251906Sphk			numcwdfail2++;
75351906Sphk			free(buf, M_TEMP);
75451906Sphk			return (ENOENT);
75551906Sphk		}
75651906Sphk		if (ncp->nc_dvp != vp->v_dd) {
75789306Salfred			FILEDESC_UNLOCK(fdp);
75851906Sphk			numcwdfail3++;
75951906Sphk			free(buf, M_TEMP);
76051906Sphk			return (EBADF);
76151906Sphk		}
76251906Sphk		for (i = ncp->nc_nlen - 1; i >= 0; i--) {
76351906Sphk			if (bp == buf) {
76489306Salfred				FILEDESC_UNLOCK(fdp);
76551906Sphk				numcwdfail4++;
76651906Sphk				free(buf, M_TEMP);
76751906Sphk				return (ENOMEM);
76851906Sphk			}
76951906Sphk			*--bp = ncp->nc_name[i];
77051906Sphk		}
77151906Sphk		if (bp == buf) {
77289306Salfred			FILEDESC_UNLOCK(fdp);
77351906Sphk			numcwdfail4++;
77451906Sphk			free(buf, M_TEMP);
77551906Sphk			return (ENOMEM);
77651906Sphk		}
77751906Sphk		*--bp = '/';
77851906Sphk		slash_prefixed = 1;
77951906Sphk		vp = vp->v_dd;
78051906Sphk	}
78189306Salfred	FILEDESC_UNLOCK(fdp);
78251906Sphk	if (!slash_prefixed) {
78351906Sphk		if (bp == buf) {
78451906Sphk			numcwdfail4++;
78551906Sphk			free(buf, M_TEMP);
78651906Sphk			return (ENOMEM);
78751906Sphk		}
78851906Sphk		*--bp = '/';
78951906Sphk	}
79051906Sphk	numcwdfound++;
79151906Sphk	error = copyout(bp, uap->buf, strlen(bp) + 1);
79251906Sphk	free(buf, M_TEMP);
79351906Sphk	return (error);
79451906Sphk}
79551906Sphk
79659652Sgreen/*
79759652Sgreen * Thus begins the fullpath magic.
79859652Sgreen */
79959652Sgreen
80059652Sgreen#undef STATNODE
80159652Sgreen#define STATNODE(name)							\
80259652Sgreen	static u_int name;						\
80362622Sjhb	SYSCTL_UINT(_vfs_cache, OID_AUTO, name, CTLFLAG_RD, &name, 0, "")
80459652Sgreen
80559652Sgreenstatic int disablefullpath;
80691690SeivindSYSCTL_INT(_debug, OID_AUTO, disablefullpath, CTLFLAG_RW, &disablefullpath, 0,
80791690Seivind	"Disable the vn_fullpath function");
80859652Sgreen
80959652SgreenSTATNODE(numfullpathcalls);
81059652SgreenSTATNODE(numfullpathfail1);
81159652SgreenSTATNODE(numfullpathfail2);
81259652SgreenSTATNODE(numfullpathfail3);
81359652SgreenSTATNODE(numfullpathfail4);
81459652SgreenSTATNODE(numfullpathfound);
81559652Sgreen
81691690Seivind/*
81791690Seivind * Retrieve the full filesystem path that correspond to a vnode from the name
81891690Seivind * cache (if available)
81991690Seivind */
82059652Sgreenint
82185287Sdesvn_fullpath(struct thread *td, struct vnode *vn, char **retbuf, char **freebuf)
82285287Sdes{
82359652Sgreen	char *bp, *buf;
82459652Sgreen	int i, slash_prefixed;
82559652Sgreen	struct filedesc *fdp;
82659652Sgreen	struct namecache *ncp;
82785287Sdes	struct vnode *vp;
82859652Sgreen
82959652Sgreen	numfullpathcalls++;
83059652Sgreen	if (disablefullpath)
83159652Sgreen		return (ENODEV);
83285287Sdes	if (vn == NULL)
83359652Sgreen		return (EINVAL);
83459652Sgreen	buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
83559652Sgreen	bp = buf + MAXPATHLEN - 1;
83659652Sgreen	*bp = '\0';
83785287Sdes	fdp = td->td_proc->p_fd;
83859652Sgreen	slash_prefixed = 0;
83989306Salfred	FILEDESC_LOCK(fdp);
84085287Sdes	for (vp = vn; vp != fdp->fd_rdir && vp != rootvnode;) {
84159652Sgreen		if (vp->v_flag & VROOT) {
84259652Sgreen			if (vp->v_mount == NULL) {	/* forced unmount */
84389306Salfred				FILEDESC_UNLOCK(fdp);
84459652Sgreen				free(buf, M_TEMP);
84559652Sgreen				return (EBADF);
84659652Sgreen			}
84759652Sgreen			vp = vp->v_mount->mnt_vnodecovered;
84859652Sgreen			continue;
84959652Sgreen		}
85085287Sdes		if (vp != vn && vp->v_dd->v_id != vp->v_ddid) {
85189306Salfred			FILEDESC_UNLOCK(fdp);
85259652Sgreen			numfullpathfail1++;
85359652Sgreen			free(buf, M_TEMP);
85459652Sgreen			return (ENOTDIR);
85559652Sgreen		}
85659652Sgreen		ncp = TAILQ_FIRST(&vp->v_cache_dst);
85759652Sgreen		if (!ncp) {
85889306Salfred			FILEDESC_UNLOCK(fdp);
85959652Sgreen			numfullpathfail2++;
86059652Sgreen			free(buf, M_TEMP);
86159652Sgreen			return (ENOENT);
86259652Sgreen		}
86385287Sdes		if (vp != vn && ncp->nc_dvp != vp->v_dd) {
86489306Salfred			FILEDESC_UNLOCK(fdp);
86559652Sgreen			numfullpathfail3++;
86659652Sgreen			free(buf, M_TEMP);
86759652Sgreen			return (EBADF);
86859652Sgreen		}
86959652Sgreen		for (i = ncp->nc_nlen - 1; i >= 0; i--) {
87059652Sgreen			if (bp == buf) {
87189306Salfred				FILEDESC_UNLOCK(fdp);
87259652Sgreen				numfullpathfail4++;
87359652Sgreen				free(buf, M_TEMP);
87459652Sgreen				return (ENOMEM);
87559652Sgreen			}
87659652Sgreen			*--bp = ncp->nc_name[i];
87759652Sgreen		}
87859652Sgreen		if (bp == buf) {
87989306Salfred			FILEDESC_UNLOCK(fdp);
88059652Sgreen			numfullpathfail4++;
88159652Sgreen			free(buf, M_TEMP);
88259652Sgreen			return (ENOMEM);
88359652Sgreen		}
88459652Sgreen		*--bp = '/';
88559652Sgreen		slash_prefixed = 1;
88659652Sgreen		vp = ncp->nc_dvp;
88759652Sgreen	}
88859652Sgreen	if (!slash_prefixed) {
88959652Sgreen		if (bp == buf) {
89089306Salfred			FILEDESC_UNLOCK(fdp);
89159652Sgreen			numfullpathfail4++;
89259652Sgreen			free(buf, M_TEMP);
89359652Sgreen			return (ENOMEM);
89459652Sgreen		}
89559652Sgreen		*--bp = '/';
89659652Sgreen	}
89789306Salfred	FILEDESC_UNLOCK(fdp);
89859652Sgreen	numfullpathfound++;
89959652Sgreen	*retbuf = bp;
90085287Sdes	*freebuf = buf;
90159652Sgreen	return (0);
90259652Sgreen}
903