vfs_cache.c revision 144318
1251881Speter/*-
2251881Speter * Copyright (c) 1989, 1993, 1995
3251881Speter *	The Regents of the University of California.  All rights reserved.
4251881Speter *
5251881Speter * This code is derived from software contributed to Berkeley by
6251881Speter * Poul-Henning Kamp of the FreeBSD Project.
7251881Speter *
8251881Speter * Redistribution and use in source and binary forms, with or without
9251881Speter * modification, are permitted provided that the following conditions
10251881Speter * are met:
11251881Speter * 1. Redistributions of source code must retain the above copyright
12251881Speter *    notice, this list of conditions and the following disclaimer.
13251881Speter * 2. Redistributions in binary form must reproduce the above copyright
14251881Speter *    notice, this list of conditions and the following disclaimer in the
15251881Speter *    documentation and/or other materials provided with the distribution.
16251881Speter * 4. Neither the name of the University nor the names of its contributors
17251881Speter *    may be used to endorse or promote products derived from this software
18251881Speter *    without specific prior written permission.
19251881Speter *
20251881Speter * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21251881Speter * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22251881Speter * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23251881Speter * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24251881Speter * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25251881Speter * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26251881Speter * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27251881Speter * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28251881Speter * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29251881Speter * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30251881Speter * SUCH DAMAGE.
31251881Speter *
32251881Speter *	@(#)vfs_cache.c	8.5 (Berkeley) 3/22/95
33251881Speter */
34251881Speter
35251881Speter#include <sys/cdefs.h>
36251881Speter__FBSDID("$FreeBSD: head/sys/kern/vfs_cache.c 144318 2005-03-30 02:59:32Z das $");
37251881Speter
38251881Speter#include <sys/param.h>
39251881Speter#include <sys/systm.h>
40251881Speter#include <sys/kernel.h>
41251881Speter#include <sys/lock.h>
42251881Speter#include <sys/mutex.h>
43251881Speter#include <sys/sysctl.h>
44251881Speter#include <sys/mount.h>
45251881Speter#include <sys/vnode.h>
46251881Speter#include <sys/namei.h>
47251881Speter#include <sys/malloc.h>
48251881Speter#include <sys/syscallsubr.h>
49251881Speter#include <sys/sysproto.h>
50251881Speter#include <sys/proc.h>
51251881Speter#include <sys/filedesc.h>
52251881Speter#include <sys/fnv_hash.h>
53251881Speter
54251881Speter#include <vm/uma.h>
55251881Speter
56251881Speter/*
57251881Speter * This structure describes the elements in the cache of recent
58251881Speter * names looked up by namei.
59251881Speter */
60251881Speter
61251881Speterstruct	namecache {
62251881Speter	LIST_ENTRY(namecache) nc_hash;	/* hash chain */
63251881Speter	LIST_ENTRY(namecache) nc_src;	/* source vnode list */
64251881Speter	TAILQ_ENTRY(namecache) nc_dst;	/* destination vnode list */
65251881Speter	struct	vnode *nc_dvp;		/* vnode of parent of name */
66251881Speter	struct	vnode *nc_vp;		/* vnode the name refers to */
67251881Speter	u_char	nc_flag;		/* flag bits */
68251881Speter	u_char	nc_nlen;		/* length of name */
69251881Speter	char	nc_name[0];		/* segment name */
70251881Speter};
71251881Speter
72251881Speter/*
73251881Speter * Name caching works as follows:
74251881Speter *
75251881Speter * Names found by directory scans are retained in a cache
76251881Speter * for future reference.  It is managed LRU, so frequently
77251881Speter * used names will hang around.  Cache is indexed by hash value
78251881Speter * obtained from (vp, name) where vp refers to the directory
79251881Speter * containing name.
80251881Speter *
81251881Speter * If it is a "negative" entry, (i.e. for a name that is known NOT to
82251881Speter * exist) the vnode pointer will be NULL.
83251881Speter *
84251881Speter * Upon reaching the last segment of a path, if the reference
85251881Speter * is for DELETE, or NOCACHE is set (rewrite), and the
86251881Speter * name is located in the cache, it will be dropped.
87251881Speter */
88251881Speter
89251881Speter/*
90251881Speter * Structures associated with name cacheing.
91251881Speter */
92251881Speter#define NCHHASH(hash) \
93251881Speter	(&nchashtbl[(hash) & nchash])
94251881Speterstatic LIST_HEAD(nchashhead, namecache) *nchashtbl;	/* Hash Table */
95251881Speterstatic TAILQ_HEAD(, namecache) ncneg;	/* Hash Table */
96251881Speterstatic u_long	nchash;			/* size of hash table */
97251881SpeterSYSCTL_ULONG(_debug, OID_AUTO, nchash, CTLFLAG_RD, &nchash, 0, "");
98251881Speterstatic u_long	ncnegfactor = 16;	/* ratio of negative entries */
99251881SpeterSYSCTL_ULONG(_debug, OID_AUTO, ncnegfactor, CTLFLAG_RW, &ncnegfactor, 0, "");
100251881Speterstatic u_long	numneg;			/* number of cache entries allocated */
101251881SpeterSYSCTL_ULONG(_debug, OID_AUTO, numneg, CTLFLAG_RD, &numneg, 0, "");
102251881Speterstatic u_long	numcache;		/* number of cache entries allocated */
103251881SpeterSYSCTL_ULONG(_debug, OID_AUTO, numcache, CTLFLAG_RD, &numcache, 0, "");
104251881Speterstatic u_long	numcachehv;		/* number of cache entries with vnodes held */
105251881SpeterSYSCTL_ULONG(_debug, OID_AUTO, numcachehv, CTLFLAG_RD, &numcachehv, 0, "");
106251881Speter#if 0
107251881Speterstatic u_long	numcachepl;		/* number of cache purge for leaf entries */
108251881SpeterSYSCTL_ULONG(_debug, OID_AUTO, numcachepl, CTLFLAG_RD, &numcachepl, 0, "");
109251881Speter#endif
110251881Speterstruct	nchstats nchstats;		/* cache effectiveness statistics */
111251881Speter
112251881Speterstatic struct mtx cache_lock;
113251881SpeterMTX_SYSINIT(vfscache, &cache_lock, "Name Cache", MTX_DEF);
114251881Speter
115251881Speter#define	CACHE_LOCK()	mtx_lock(&cache_lock)
116251881Speter#define	CACHE_UNLOCK()	mtx_unlock(&cache_lock)
117251881Speter
118251881Speter/*
119251881Speter * UMA zones for the VFS cache.
120251881Speter *
121251881Speter * The small cache is used for entries with short names, which are the
122251881Speter * most common.  The large cache is used for entries which are too big to
123251881Speter * fit in the small cache.
124251881Speter */
125251881Speterstatic uma_zone_t cache_zone_small;
126251881Speterstatic uma_zone_t cache_zone_large;
127251881Speter
128251881Speter#define	CACHE_PATH_CUTOFF	32
129251881Speter#define	CACHE_ZONE_SMALL	(sizeof(struct namecache) + CACHE_PATH_CUTOFF)
130251881Speter#define	CACHE_ZONE_LARGE	(sizeof(struct namecache) + NAME_MAX)
131251881Speter
132251881Speter#define cache_alloc(len)	uma_zalloc(((len) <= CACHE_PATH_CUTOFF) ? \
133251881Speter	cache_zone_small : cache_zone_large, M_WAITOK)
134251881Speter#define cache_free(ncp)		do { \
135251881Speter	if (ncp != NULL) \
136251881Speter		uma_zfree(((ncp)->nc_nlen <= CACHE_PATH_CUTOFF) ? \
137251881Speter		    cache_zone_small : cache_zone_large, (ncp)); \
138251881Speter} while (0)
139251881Speter
140251881Speterstatic int	doingcache = 1;		/* 1 => enable the cache */
141251881SpeterSYSCTL_INT(_debug, OID_AUTO, vfscache, CTLFLAG_RW, &doingcache, 0, "");
142251881Speter
143251881Speter/* Export size information to userland */
144251881SpeterSYSCTL_INT(_debug, OID_AUTO, vnsize, CTLFLAG_RD, 0, sizeof(struct vnode), "");
145251881SpeterSYSCTL_INT(_debug, OID_AUTO, ncsize, CTLFLAG_RD, 0, sizeof(struct namecache), "");
146251881Speter
147251881Speter/*
148251881Speter * The new name cache statistics
149251881Speter */
150251881Speterstatic SYSCTL_NODE(_vfs, OID_AUTO, cache, CTLFLAG_RW, 0, "Name cache statistics");
151251881Speter#define STATNODE(mode, name, var) \
152251881Speter	SYSCTL_ULONG(_vfs_cache, OID_AUTO, name, mode, var, 0, "");
153251881SpeterSTATNODE(CTLFLAG_RD, numneg, &numneg);
154251881SpeterSTATNODE(CTLFLAG_RD, numcache, &numcache);
155251881Speterstatic u_long numcalls; STATNODE(CTLFLAG_RD, numcalls, &numcalls);
156251881Speterstatic u_long dothits; STATNODE(CTLFLAG_RD, dothits, &dothits);
157251881Speterstatic u_long dotdothits; STATNODE(CTLFLAG_RD, dotdothits, &dotdothits);
158251881Speterstatic u_long numchecks; STATNODE(CTLFLAG_RD, numchecks, &numchecks);
159251881Speterstatic u_long nummiss; STATNODE(CTLFLAG_RD, nummiss, &nummiss);
160251881Speterstatic u_long nummisszap; STATNODE(CTLFLAG_RD, nummisszap, &nummisszap);
161251881Speterstatic u_long numposzaps; STATNODE(CTLFLAG_RD, numposzaps, &numposzaps);
162251881Speterstatic u_long numposhits; STATNODE(CTLFLAG_RD, numposhits, &numposhits);
163251881Speterstatic u_long numnegzaps; STATNODE(CTLFLAG_RD, numnegzaps, &numnegzaps);
164251881Speterstatic u_long numneghits; STATNODE(CTLFLAG_RD, numneghits, &numneghits);
165251881Speter
166251881SpeterSYSCTL_OPAQUE(_vfs_cache, OID_AUTO, nchstats, CTLFLAG_RD, &nchstats,
167251881Speter	sizeof(nchstats), "LU", "VFS cache effectiveness statistics");
168251881Speter
169251881Speter
170251881Speter
171251881Speterstatic void cache_zap(struct namecache *ncp);
172251881Speterstatic int vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir,
173251881Speter    char *buf, char **retbuf, u_int buflen);
174251881Speter
175251881Speterstatic MALLOC_DEFINE(M_VFSCACHE, "vfscache", "VFS name cache entries");
176251881Speter
177251881Speter/*
178251881Speter * Flags in namecache.nc_flag
179251881Speter */
180251881Speter#define NCF_WHITE	1
181251881Speter
182251881Speter/*
183251881Speter * Grab an atomic snapshot of the name cache hash chain lengths
184251881Speter */
185251881SpeterSYSCTL_NODE(_debug, OID_AUTO, hashstat, CTLFLAG_RW, NULL, "hash table stats");
186251881Speter
187251881Speterstatic int
188251881Spetersysctl_debug_hashstat_rawnchash(SYSCTL_HANDLER_ARGS)
189251881Speter{
190251881Speter	int error;
191251881Speter	struct nchashhead *ncpp;
192251881Speter	struct namecache *ncp;
193251881Speter	int n_nchash;
194251881Speter	int count;
195251881Speter
196251881Speter	n_nchash = nchash + 1;	/* nchash is max index, not count */
197251881Speter	if (!req->oldptr)
198251881Speter		return SYSCTL_OUT(req, 0, n_nchash * sizeof(int));
199251881Speter
200251881Speter	/* Scan hash tables for applicable entries */
201251881Speter	for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) {
202251881Speter		count = 0;
203251881Speter		LIST_FOREACH(ncp, ncpp, nc_hash) {
204251881Speter			count++;
205251881Speter		}
206251881Speter		error = SYSCTL_OUT(req, &count, sizeof(count));
207251881Speter		if (error)
208251881Speter			return (error);
209251881Speter	}
210251881Speter	return (0);
211251881Speter}
212251881SpeterSYSCTL_PROC(_debug_hashstat, OID_AUTO, rawnchash, CTLTYPE_INT|CTLFLAG_RD,
213251881Speter	0, 0, sysctl_debug_hashstat_rawnchash, "S,int", "nchash chain lengths");
214251881Speter
215251881Speterstatic int
216251881Spetersysctl_debug_hashstat_nchash(SYSCTL_HANDLER_ARGS)
217251881Speter{
218251881Speter	int error;
219251881Speter	struct nchashhead *ncpp;
220251881Speter	struct namecache *ncp;
221251881Speter	int n_nchash;
222251881Speter	int count, maxlength, used, pct;
223251881Speter
224251881Speter	if (!req->oldptr)
225251881Speter		return SYSCTL_OUT(req, 0, 4 * sizeof(int));
226251881Speter
227251881Speter	n_nchash = nchash + 1;	/* nchash is max index, not count */
228251881Speter	used = 0;
229251881Speter	maxlength = 0;
230251881Speter
231251881Speter	/* Scan hash tables for applicable entries */
232251881Speter	for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) {
233251881Speter		count = 0;
234251881Speter		LIST_FOREACH(ncp, ncpp, nc_hash) {
235251881Speter			count++;
236251881Speter		}
237251881Speter		if (count)
238251881Speter			used++;
239251881Speter		if (maxlength < count)
240251881Speter			maxlength = count;
241251881Speter	}
242251881Speter	n_nchash = nchash + 1;
243251881Speter	pct = (used * 100 * 100) / n_nchash;
244251881Speter	error = SYSCTL_OUT(req, &n_nchash, sizeof(n_nchash));
245251881Speter	if (error)
246251881Speter		return (error);
247251881Speter	error = SYSCTL_OUT(req, &used, sizeof(used));
248251881Speter	if (error)
249251881Speter		return (error);
250251881Speter	error = SYSCTL_OUT(req, &maxlength, sizeof(maxlength));
251251881Speter	if (error)
252251881Speter		return (error);
253251881Speter	error = SYSCTL_OUT(req, &pct, sizeof(pct));
254251881Speter	if (error)
255251881Speter		return (error);
256251881Speter	return (0);
257251881Speter}
258251881SpeterSYSCTL_PROC(_debug_hashstat, OID_AUTO, nchash, CTLTYPE_INT|CTLFLAG_RD,
259251881Speter	0, 0, sysctl_debug_hashstat_nchash, "I", "nchash chain lengths");
260251881Speter
261251881Speter/*
262251881Speter * cache_zap():
263251881Speter *
264251881Speter *   Removes a namecache entry from cache, whether it contains an actual
265251881Speter *   pointer to a vnode or if it is just a negative cache entry.
266251881Speter */
267251881Speterstatic void
268251881Spetercache_zap(ncp)
269251881Speter	struct namecache *ncp;
270251881Speter{
271251881Speter	struct vnode *vp;
272251881Speter
273251881Speter	mtx_assert(&cache_lock, MA_OWNED);
274251881Speter	vp = NULL;
275251881Speter	LIST_REMOVE(ncp, nc_hash);
276251881Speter	LIST_REMOVE(ncp, nc_src);
277251881Speter	if (LIST_EMPTY(&ncp->nc_dvp->v_cache_src)) {
278251881Speter		vp = ncp->nc_dvp;
279251881Speter		numcachehv--;
280251881Speter	}
281251881Speter	if (ncp->nc_vp) {
282251881Speter		TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_dst);
283251881Speter	} else {
284251881Speter		TAILQ_REMOVE(&ncneg, ncp, nc_dst);
285251881Speter		numneg--;
286251881Speter	}
287251881Speter	numcache--;
288251881Speter	cache_free(ncp);
289251881Speter	if (vp)
290251881Speter		vdrop(vp);
291251881Speter}
292251881Speter
293251881Speter/*
294251881Speter * cache_leaf_test()
295251881Speter *
296251881Speter *      Test whether this (directory) vnode's namei cache entry contains
297251881Speter *      subdirectories or not.  Used to determine whether the directory is
298251881Speter *      a leaf in the namei cache or not.  Note: the directory may still
299251881Speter *      contain files in the namei cache.
300251881Speter *
301251881Speter *      Returns 0 if the directory is a leaf, -1 if it isn't.
302251881Speter */
303251881Speterint
304251881Spetercache_leaf_test(struct vnode *vp)
305251881Speter{
306251881Speter	struct namecache *ncpc;
307251881Speter	int leaf;
308251881Speter
309251881Speter	leaf = 0;
310251881Speter	CACHE_LOCK();
311251881Speter	for (ncpc = LIST_FIRST(&vp->v_cache_src);
312251881Speter	     ncpc != NULL;
313251881Speter	     ncpc = LIST_NEXT(ncpc, nc_src)
314251881Speter	 ) {
315251881Speter		if (ncpc->nc_vp != NULL && ncpc->nc_vp->v_type == VDIR) {
316251881Speter			leaf = -1;
317251881Speter			break;
318251881Speter		}
319251881Speter	}
320251881Speter	CACHE_UNLOCK();
321251881Speter	return (leaf);
322251881Speter}
323251881Speter
324251881Speter/*
325251881Speter * Lookup an entry in the cache
326251881Speter *
327251881Speter * Lookup is called with dvp pointing to the directory to search,
328251881Speter * cnp pointing to the name of the entry being sought. If the lookup
329251881Speter * succeeds, the vnode is returned in *vpp, and a status of -1 is
330251881Speter * returned. If the lookup determines that the name does not exist
331251881Speter * (negative cacheing), a status of ENOENT is returned. If the lookup
332251881Speter * fails, a status of zero is returned.
333251881Speter *
334251881Speter * vpp is locked and ref'd on return.  If we're looking up DOTDOT, dvp is
335251881Speter * unlocked.  If we're looking up . an extra ref is taken, but the lock is
336251881Speter * not recursively acquired.
337251881Speter */
338251881Speter
339251881Speterint
340251881Spetercache_lookup(dvp, vpp, cnp)
341251881Speter	struct vnode *dvp;
342251881Speter	struct vnode **vpp;
343251881Speter	struct componentname *cnp;
344251881Speter{
345251881Speter	struct namecache *ncp;
346251881Speter	u_int32_t hash;
347251881Speter
348251881Speter	if (!doingcache) {
349251881Speter		cnp->cn_flags &= ~MAKEENTRY;
350251881Speter		return (0);
351251881Speter	}
352251881Speterretry:
353251881Speter	CACHE_LOCK();
354251881Speter	numcalls++;
355251881Speter
356251881Speter	if (cnp->cn_nameptr[0] == '.') {
357251881Speter		if (cnp->cn_namelen == 1) {
358251881Speter			*vpp = dvp;
359251881Speter			dothits++;
360251881Speter			goto success;
361251881Speter		}
362251881Speter		if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') {
363251881Speter			dotdothits++;
364251881Speter			if (dvp->v_dd->v_id != dvp->v_ddid ||
365251881Speter			    (cnp->cn_flags & MAKEENTRY) == 0) {
366251881Speter				dvp->v_ddid = 0;
367251881Speter				CACHE_UNLOCK();
368251881Speter				return (0);
369251881Speter			}
370251881Speter			*vpp = dvp->v_dd;
371251881Speter			goto success;
372251881Speter		}
373251881Speter	}
374251881Speter
375251881Speter	hash = fnv_32_buf(cnp->cn_nameptr, cnp->cn_namelen, FNV1_32_INIT);
376251881Speter	hash = fnv_32_buf(&dvp->v_id, sizeof(dvp->v_id), hash);
377251881Speter	LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) {
378251881Speter		numchecks++;
379251881Speter		if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen &&
380251881Speter		    !bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen))
381251881Speter			break;
382251881Speter	}
383251881Speter
384251881Speter	/* We failed to find an entry */
385251881Speter	if (ncp == 0) {
386251881Speter		if ((cnp->cn_flags & MAKEENTRY) == 0) {
387251881Speter			nummisszap++;
388251881Speter		} else {
389251881Speter			nummiss++;
390251881Speter		}
391251881Speter		nchstats.ncs_miss++;
392251881Speter		CACHE_UNLOCK();
393251881Speter		return (0);
394251881Speter	}
395251881Speter
396251881Speter	/* We don't want to have an entry, so dump it */
397251881Speter	if ((cnp->cn_flags & MAKEENTRY) == 0) {
398251881Speter		numposzaps++;
399251881Speter		nchstats.ncs_badhits++;
400251881Speter		cache_zap(ncp);
401251881Speter		CACHE_UNLOCK();
402251881Speter		return (0);
403251881Speter	}
404251881Speter
405251881Speter	/* We found a "positive" match, return the vnode */
406251881Speter	if (ncp->nc_vp) {
407251881Speter		numposhits++;
408251881Speter		nchstats.ncs_goodhits++;
409251881Speter		*vpp = ncp->nc_vp;
410251881Speter		goto success;
411251881Speter	}
412251881Speter
413251881Speter	/* We found a negative match, and want to create it, so purge */
414251881Speter	if (cnp->cn_nameiop == CREATE) {
415251881Speter		numnegzaps++;
416251881Speter		nchstats.ncs_badhits++;
417251881Speter		cache_zap(ncp);
418251881Speter		CACHE_UNLOCK();
419251881Speter		return (0);
420251881Speter	}
421251881Speter
422251881Speter	numneghits++;
423251881Speter	/*
424251881Speter	 * We found a "negative" match, so we shift it to the end of
425251881Speter	 * the "negative" cache entries queue to satisfy LRU.  Also,
426251881Speter	 * check to see if the entry is a whiteout; indicate this to
427251881Speter	 * the componentname, if so.
428251881Speter	 */
429251881Speter	TAILQ_REMOVE(&ncneg, ncp, nc_dst);
430251881Speter	TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst);
431251881Speter	nchstats.ncs_neghits++;
432251881Speter	if (ncp->nc_flag & NCF_WHITE)
433251881Speter		cnp->cn_flags |= ISWHITEOUT;
434251881Speter	CACHE_UNLOCK();
435251881Speter	return (ENOENT);
436251881Speter
437251881Spetersuccess:
438251881Speter	/*
439251881Speter	 * On success we return a locked and ref'd vnode as per the lookup
440251881Speter	 * protocol.
441251881Speter	 */
442251881Speter	if (dvp == *vpp) {   /* lookup on "." */
443251881Speter		VREF(*vpp);
444251881Speter		CACHE_UNLOCK();
445251881Speter		return (-1);
446251881Speter	}
447251881Speter	if (cnp->cn_flags & ISDOTDOT)
448251881Speter		VOP_UNLOCK(dvp, 0, cnp->cn_thread);
449251881Speter	VI_LOCK(*vpp);
450251881Speter	CACHE_UNLOCK();
451251881Speter	if (vget(*vpp, cnp->cn_lkflags | LK_INTERLOCK, cnp->cn_thread)) {
452251881Speter		if (cnp->cn_flags & ISDOTDOT)
453251881Speter			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, cnp->cn_thread);
454251881Speter		*vpp = NULL;
455251881Speter		goto retry;
456251881Speter	}
457251881Speter	return (-1);
458251881Speter}
459251881Speter
460251881Speter/*
461251881Speter * Add an entry to the cache.
462251881Speter */
463251881Spetervoid
464251881Spetercache_enter(dvp, vp, cnp)
465251881Speter	struct vnode *dvp;
466251881Speter	struct vnode *vp;
467251881Speter	struct componentname *cnp;
468251881Speter{
469251881Speter	struct namecache *ncp;
470251881Speter	struct nchashhead *ncpp;
471251881Speter	u_int32_t hash;
472251881Speter	int hold;
473251881Speter	int zap;
474251881Speter	int len;
475251881Speter
476251881Speter	if (!doingcache)
477251881Speter		return;
478251881Speter
479251881Speter	if (cnp->cn_nameptr[0] == '.') {
480251881Speter		if (cnp->cn_namelen == 1) {
481251881Speter			return;
482251881Speter		}
483251881Speter		if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') {
484251881Speter			if (vp) {
485251881Speter				dvp->v_dd = vp;
486251881Speter				dvp->v_ddid = vp->v_id;
487251881Speter			} else {
488251881Speter				dvp->v_dd = dvp;
489251881Speter				dvp->v_ddid = 0;
490251881Speter			}
491251881Speter			return;
492251881Speter		}
493251881Speter	}
494251881Speter
495251881Speter	hold = 0;
496251881Speter	zap = 0;
497251881Speter	ncp = cache_alloc(cnp->cn_namelen);
498251881Speter	CACHE_LOCK();
499251881Speter	numcache++;
500251881Speter	if (!vp) {
501251881Speter		numneg++;
502251881Speter		ncp->nc_flag = cnp->cn_flags & ISWHITEOUT ? NCF_WHITE : 0;
503251881Speter	} else if (vp->v_type == VDIR) {
504251881Speter		vp->v_dd = dvp;
505251881Speter		vp->v_ddid = dvp->v_id;
506251881Speter	}
507251881Speter
508251881Speter	/*
509251881Speter	 * Set the rest of the namecache entry elements, calculate it's
510251881Speter	 * hash key and insert it into the appropriate chain within
511251881Speter	 * the cache entries table.
512251881Speter	 */
513251881Speter	ncp->nc_vp = vp;
514251881Speter	ncp->nc_dvp = dvp;
515251881Speter	len = ncp->nc_nlen = cnp->cn_namelen;
516251881Speter	hash = fnv_32_buf(cnp->cn_nameptr, len, FNV1_32_INIT);
517251881Speter	bcopy(cnp->cn_nameptr, ncp->nc_name, len);
518251881Speter	hash = fnv_32_buf(&dvp->v_id, sizeof(dvp->v_id), hash);
519251881Speter	ncpp = NCHHASH(hash);
520251881Speter	LIST_INSERT_HEAD(ncpp, ncp, nc_hash);
521251881Speter	if (LIST_EMPTY(&dvp->v_cache_src)) {
522251881Speter		hold = 1;
523251881Speter		numcachehv++;
524251881Speter	}
525251881Speter	LIST_INSERT_HEAD(&dvp->v_cache_src, ncp, nc_src);
526251881Speter	/*
527251881Speter	 * If the entry is "negative", we place it into the
528251881Speter	 * "negative" cache queue, otherwise, we place it into the
529251881Speter	 * destination vnode's cache entries queue.
530251881Speter	 */
531251881Speter	if (vp) {
532251881Speter		TAILQ_INSERT_HEAD(&vp->v_cache_dst, ncp, nc_dst);
533251881Speter	} else {
534251881Speter		TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst);
535251881Speter	}
536251881Speter	if (numneg * ncnegfactor > numcache) {
537251881Speter		ncp = TAILQ_FIRST(&ncneg);
538251881Speter		zap = 1;
539251881Speter	}
540251881Speter	if (hold)
541251881Speter		vhold(dvp);
542251881Speter	if (zap)
543251881Speter		cache_zap(ncp);
544251881Speter	CACHE_UNLOCK();
545251881Speter}
546251881Speter
547251881Speter/*
548251881Speter * Name cache initialization, from vfs_init() when we are booting
549251881Speter */
550251881Speterstatic void
551251881Speternchinit(void *dummy __unused)
552251881Speter{
553251881Speter
554251881Speter	TAILQ_INIT(&ncneg);
555251881Speter
556251881Speter	cache_zone_small = uma_zcreate("S VFS Cache", CACHE_ZONE_SMALL, NULL,
557251881Speter	    NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT);
558251881Speter	cache_zone_large = uma_zcreate("L VFS Cache", CACHE_ZONE_LARGE, NULL,
559251881Speter	    NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT);
560251881Speter
561251881Speter	nchashtbl = hashinit(desiredvnodes * 2, M_VFSCACHE, &nchash);
562251881Speter}
563251881SpeterSYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nchinit, NULL)
564251881Speter
565251881Speter
566251881Speter/*
567251881Speter * Invalidate all entries to a particular vnode.
568251881Speter *
569251881Speter * Remove all entries in the namecache relating to this vnode and
570251881Speter * change the v_id.  We take the v_id from a global counter, since
571251881Speter * it becomes a handy sequence number in crash-dumps that way.
572251881Speter * No valid vnode will ever have (v_id == 0).
573251881Speter *
574251881Speter * XXX: Only time and the size of v_id prevents this from failing:
575251881Speter * XXX: In theory we should hunt down all (struct vnode*, v_id)
576251881Speter * XXX: soft references and nuke them, at least on the global
577251881Speter * XXX: v_id wraparound.  The period of resistance can be extended
578251881Speter * XXX: by incrementing each vnodes v_id individually instead of
579251881Speter * XXX: using the global v_id.
580251881Speter */
581251881Spetervoid
582251881Spetercache_purge(vp)
583251881Speter	struct vnode *vp;
584251881Speter{
585251881Speter	struct namecache *ncp;
586251881Speter	static u_long nextid;
587251881Speter
588251881Speter	CACHE_LOCK();
589251881Speter	while (!LIST_EMPTY(&vp->v_cache_src)) {
590251881Speter		struct vnode *cvp;
591251881Speter
592251881Speter		ncp = LIST_FIRST(&vp->v_cache_src);
593251881Speter		/*
594251881Speter		 * We must reset v_dd of any children so they don't
595251881Speter		 * continue to point to us.
596251881Speter		 */
597251881Speter		if ((cvp = ncp->nc_vp) && cvp->v_dd == vp) {
598251881Speter			cvp->v_dd = cvp;
599251881Speter			cvp->v_ddid = 0;
600251881Speter		}
601251881Speter		cache_zap(ncp);
602251881Speter	}
603251881Speter	while (!TAILQ_EMPTY(&vp->v_cache_dst))
604251881Speter		cache_zap(TAILQ_FIRST(&vp->v_cache_dst));
605	do
606		nextid++;
607	while (nextid == vp->v_id || !nextid);
608	vp->v_id = nextid;
609	vp->v_dd = vp;
610	vp->v_ddid = 0;
611	CACHE_UNLOCK();
612}
613
614/*
615 * Flush all entries referencing a particular filesystem.
616 *
617 * Since we need to check it anyway, we will flush all the invalid
618 * entries at the same time.
619 */
620void
621cache_purgevfs(mp)
622	struct mount *mp;
623{
624	struct nchashhead *ncpp;
625	struct namecache *ncp, *nnp;
626	struct nchashhead mplist;
627
628	LIST_INIT(&mplist);
629	ncp = NULL;
630
631	/* Scan hash tables for applicable entries */
632	CACHE_LOCK();
633	for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) {
634		for (ncp = LIST_FIRST(ncpp); ncp != 0; ncp = nnp) {
635			nnp = LIST_NEXT(ncp, nc_hash);
636			if (ncp->nc_dvp->v_mount == mp) {
637				LIST_REMOVE(ncp, nc_hash);
638				LIST_INSERT_HEAD(&mplist, ncp, nc_hash);
639			}
640		}
641	}
642	while (!LIST_EMPTY(&mplist))
643		cache_zap(LIST_FIRST(&mplist));
644	CACHE_UNLOCK();
645}
646
647/*
648 * Perform canonical checks and cache lookup and pass on to filesystem
649 * through the vop_cachedlookup only if needed.
650 */
651
652int
653vfs_cache_lookup(ap)
654	struct vop_lookup_args /* {
655		struct vnode *a_dvp;
656		struct vnode **a_vpp;
657		struct componentname *a_cnp;
658	} */ *ap;
659{
660	struct vnode *dvp;
661	int error;
662	struct vnode **vpp = ap->a_vpp;
663	struct componentname *cnp = ap->a_cnp;
664	struct ucred *cred = cnp->cn_cred;
665	int flags = cnp->cn_flags;
666	struct thread *td = cnp->cn_thread;
667
668	*vpp = NULL;
669	dvp = ap->a_dvp;
670
671	if (dvp->v_type != VDIR)
672		return (ENOTDIR);
673
674	if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
675	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
676		return (EROFS);
677
678	error = VOP_ACCESS(dvp, VEXEC, cred, td);
679	if (error)
680		return (error);
681
682	error = cache_lookup(dvp, vpp, cnp);
683	if (error == 0)
684		return (VOP_CACHEDLOOKUP(dvp, vpp, cnp));
685	if (error == ENOENT)
686		return (error);
687	return (0);
688}
689
690
691#ifndef _SYS_SYSPROTO_H_
692struct  __getcwd_args {
693	u_char	*buf;
694	u_int	buflen;
695};
696#endif
697
698/*
699 * XXX All of these sysctls would probably be more productive dead.
700 */
701static int disablecwd;
702SYSCTL_INT(_debug, OID_AUTO, disablecwd, CTLFLAG_RW, &disablecwd, 0,
703   "Disable the getcwd syscall");
704
705/* Implementation of the getcwd syscall */
706int
707__getcwd(td, uap)
708	struct thread *td;
709	struct __getcwd_args *uap;
710{
711
712	return (kern___getcwd(td, uap->buf, UIO_USERSPACE, uap->buflen));
713}
714
715int
716kern___getcwd(struct thread *td, u_char *buf, enum uio_seg bufseg, u_int buflen)
717{
718	char *bp, *tmpbuf;
719	struct filedesc *fdp;
720	int error;
721
722	if (disablecwd)
723		return (ENODEV);
724	if (buflen < 2)
725		return (EINVAL);
726	if (buflen > MAXPATHLEN)
727		buflen = MAXPATHLEN;
728
729	tmpbuf = malloc(buflen, M_TEMP, M_WAITOK);
730	fdp = td->td_proc->p_fd;
731	mtx_lock(&Giant);
732	FILEDESC_LOCK(fdp);
733	error = vn_fullpath1(td, fdp->fd_cdir, fdp->fd_rdir, tmpbuf,
734	    &bp, buflen);
735	FILEDESC_UNLOCK(fdp);
736	mtx_unlock(&Giant);
737
738	if (!error) {
739		if (bufseg == UIO_SYSSPACE)
740			bcopy(bp, buf, strlen(bp) + 1);
741		else
742			error = copyout(bp, buf, strlen(bp) + 1);
743	}
744	free(tmpbuf, M_TEMP);
745	return (error);
746}
747
748/*
749 * Thus begins the fullpath magic.
750 */
751
752#undef STATNODE
753#define STATNODE(name)							\
754	static u_int name;						\
755	SYSCTL_UINT(_vfs_cache, OID_AUTO, name, CTLFLAG_RD, &name, 0, "")
756
757static int disablefullpath;
758SYSCTL_INT(_debug, OID_AUTO, disablefullpath, CTLFLAG_RW, &disablefullpath, 0,
759	"Disable the vn_fullpath function");
760
761/* These count for kern___getcwd(), too. */
762STATNODE(numfullpathcalls);
763STATNODE(numfullpathfail1);
764STATNODE(numfullpathfail2);
765STATNODE(numfullpathfail4);
766STATNODE(numfullpathfound);
767
768/*
769 * Retrieve the full filesystem path that correspond to a vnode from the name
770 * cache (if available)
771 */
772int
773vn_fullpath(struct thread *td, struct vnode *vn, char **retbuf, char **freebuf)
774{
775	char *buf;
776	struct filedesc *fdp;
777	int error;
778
779	if (disablefullpath)
780		return (ENODEV);
781	if (vn == NULL)
782		return (EINVAL);
783
784	buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
785	fdp = td->td_proc->p_fd;
786	mtx_lock(&Giant);
787	FILEDESC_LOCK(fdp);
788	error = vn_fullpath1(td, vn, fdp->fd_rdir, buf, retbuf, MAXPATHLEN);
789	FILEDESC_UNLOCK(fdp);
790	mtx_unlock(&Giant);
791
792	if (!error)
793		*freebuf = buf;
794	else
795		free(buf, M_TEMP);
796	return (error);
797}
798
799/*
800 * The magic behind kern___getcwd() and vn_fullpath().
801 */
802static int
803vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir,
804    char *buf, char **retbuf, u_int buflen)
805{
806	char *bp;
807	int error, i, slash_prefixed;
808	struct namecache *ncp;
809
810	mtx_assert(&Giant, MA_OWNED);
811
812	bp = buf + buflen - 1;
813	*bp = '\0';
814	error = 0;
815	slash_prefixed = 0;
816
817	CACHE_LOCK();
818	numfullpathcalls++;
819	if (vp->v_type != VDIR) {
820		ncp = TAILQ_FIRST(&vp->v_cache_dst);
821		if (!ncp) {
822			numfullpathfail2++;
823			CACHE_UNLOCK();
824			return (ENOENT);
825		}
826		for (i = ncp->nc_nlen - 1; i >= 0 && bp > buf; i--)
827			*--bp = ncp->nc_name[i];
828		if (bp == buf) {
829			numfullpathfail4++;
830			CACHE_UNLOCK();
831			return (ENOMEM);
832		}
833		*--bp = '/';
834		slash_prefixed = 1;
835		vp = ncp->nc_dvp;
836	}
837	while (vp != rdir && vp != rootvnode) {
838		if (vp->v_vflag & VV_ROOT) {
839			if (vp->v_mount == NULL) {	/* forced unmount */
840				error = EBADF;
841				break;
842			}
843			vp = vp->v_mount->mnt_vnodecovered;
844			continue;
845		}
846		if (vp->v_dd->v_id != vp->v_ddid) {
847			numfullpathfail1++;
848			error = ENOTDIR;
849			break;
850		}
851		ncp = TAILQ_FIRST(&vp->v_cache_dst);
852		if (!ncp) {
853			numfullpathfail2++;
854			error = ENOENT;
855			break;
856		}
857		MPASS(ncp->nc_dvp == vp->v_dd);
858		for (i = ncp->nc_nlen - 1; i >= 0 && bp != buf; i--)
859			*--bp = ncp->nc_name[i];
860		if (bp == buf) {
861			numfullpathfail4++;
862			error = ENOMEM;
863			break;
864		}
865		*--bp = '/';
866		slash_prefixed = 1;
867		vp = ncp->nc_dvp;
868	}
869	if (error) {
870		CACHE_UNLOCK();
871		return (error);
872	}
873	if (!slash_prefixed) {
874		if (bp == buf) {
875			numfullpathfail4++;
876			CACHE_UNLOCK();
877			return (ENOMEM);
878		} else {
879			*--bp = '/';
880		}
881	}
882	numfullpathfound++;
883	CACHE_UNLOCK();
884
885	*retbuf = bp;
886	return (0);
887}
888