Deleted Added
full compact
vfs_cache.c (188833) vfs_cache.c (189593)
1/*-
2 * Copyright (c) 1989, 1993, 1995
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Poul-Henning Kamp of the FreeBSD Project.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95
33 */
34
35#include <sys/cdefs.h>
1/*-
2 * Copyright (c) 1989, 1993, 1995
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Poul-Henning Kamp of the FreeBSD Project.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95
33 */
34
35#include <sys/cdefs.h>
36__FBSDID("$FreeBSD: head/sys/kern/vfs_cache.c 188833 2009-02-19 22:28:48Z jhb $");
36__FBSDID("$FreeBSD: head/sys/kern/vfs_cache.c 189593 2009-03-09 19:04:53Z jhb $");
37
38#include <sys/param.h>
39#include <sys/filedesc.h>
40#include <sys/fnv_hash.h>
41#include <sys/kernel.h>
42#include <sys/lock.h>
43#include <sys/malloc.h>
44#include <sys/mount.h>
45#include <sys/namei.h>
46#include <sys/proc.h>
47#include <sys/rwlock.h>
48#include <sys/syscallsubr.h>
49#include <sys/sysctl.h>
50#include <sys/sysproto.h>
51#include <sys/systm.h>
52#include <sys/vnode.h>
53
54#include <vm/uma.h>
55
56/*
57 * This structure describes the elements in the cache of recent
58 * names looked up by namei.
59 */
60
61struct namecache {
62 LIST_ENTRY(namecache) nc_hash; /* hash chain */
63 LIST_ENTRY(namecache) nc_src; /* source vnode list */
64 TAILQ_ENTRY(namecache) nc_dst; /* destination vnode list */
65 struct vnode *nc_dvp; /* vnode of parent of name */
66 struct vnode *nc_vp; /* vnode the name refers to */
67 u_char nc_flag; /* flag bits */
68 u_char nc_nlen; /* length of name */
69 char nc_name[0]; /* segment name */
70};
71
72/*
73 * Name caching works as follows:
74 *
75 * Names found by directory scans are retained in a cache
76 * for future reference. It is managed LRU, so frequently
77 * used names will hang around. Cache is indexed by hash value
78 * obtained from (vp, name) where vp refers to the directory
79 * containing name.
80 *
81 * If it is a "negative" entry, (i.e. for a name that is known NOT to
82 * exist) the vnode pointer will be NULL.
83 *
84 * Upon reaching the last segment of a path, if the reference
85 * is for DELETE, or NOCACHE is set (rewrite), and the
86 * name is located in the cache, it will be dropped.
87 */
88
89/*
90 * Structures associated with name cacheing.
91 */
92#define NCHHASH(hash) \
93 (&nchashtbl[(hash) & nchash])
94static LIST_HEAD(nchashhead, namecache) *nchashtbl; /* Hash Table */
95static TAILQ_HEAD(, namecache) ncneg; /* Hash Table */
96static u_long nchash; /* size of hash table */
97SYSCTL_ULONG(_debug, OID_AUTO, nchash, CTLFLAG_RD, &nchash, 0, "");
98static u_long ncnegfactor = 16; /* ratio of negative entries */
99SYSCTL_ULONG(_debug, OID_AUTO, ncnegfactor, CTLFLAG_RW, &ncnegfactor, 0, "");
100static u_long numneg; /* number of cache entries allocated */
101SYSCTL_ULONG(_debug, OID_AUTO, numneg, CTLFLAG_RD, &numneg, 0, "");
102static u_long numcache; /* number of cache entries allocated */
103SYSCTL_ULONG(_debug, OID_AUTO, numcache, CTLFLAG_RD, &numcache, 0, "");
104static u_long numcachehv; /* number of cache entries with vnodes held */
105SYSCTL_ULONG(_debug, OID_AUTO, numcachehv, CTLFLAG_RD, &numcachehv, 0, "");
106#if 0
107static u_long numcachepl; /* number of cache purge for leaf entries */
108SYSCTL_ULONG(_debug, OID_AUTO, numcachepl, CTLFLAG_RD, &numcachepl, 0, "");
109#endif
110struct nchstats nchstats; /* cache effectiveness statistics */
111
112static struct rwlock cache_lock;
113RW_SYSINIT(vfscache, &cache_lock, "Name Cache");
114
115#define CACHE_UPGRADE_LOCK() rw_try_upgrade(&cache_lock)
116#define CACHE_RLOCK() rw_rlock(&cache_lock)
117#define CACHE_RUNLOCK() rw_runlock(&cache_lock)
118#define CACHE_WLOCK() rw_wlock(&cache_lock)
119#define CACHE_WUNLOCK() rw_wunlock(&cache_lock)
120
121/*
122 * UMA zones for the VFS cache.
123 *
124 * The small cache is used for entries with short names, which are the
125 * most common. The large cache is used for entries which are too big to
126 * fit in the small cache.
127 */
128static uma_zone_t cache_zone_small;
129static uma_zone_t cache_zone_large;
130
131#define CACHE_PATH_CUTOFF 32
132#define CACHE_ZONE_SMALL (sizeof(struct namecache) + CACHE_PATH_CUTOFF)
133#define CACHE_ZONE_LARGE (sizeof(struct namecache) + NAME_MAX)
134
135#define cache_alloc(len) uma_zalloc(((len) <= CACHE_PATH_CUTOFF) ? \
136 cache_zone_small : cache_zone_large, M_WAITOK)
137#define cache_free(ncp) do { \
138 if (ncp != NULL) \
139 uma_zfree(((ncp)->nc_nlen <= CACHE_PATH_CUTOFF) ? \
140 cache_zone_small : cache_zone_large, (ncp)); \
141} while (0)
142
143static int doingcache = 1; /* 1 => enable the cache */
144SYSCTL_INT(_debug, OID_AUTO, vfscache, CTLFLAG_RW, &doingcache, 0, "");
145
146/* Export size information to userland */
147SYSCTL_INT(_debug_sizeof, OID_AUTO, namecache, CTLFLAG_RD, 0,
148 sizeof(struct namecache), "");
149
150/*
151 * The new name cache statistics
152 */
153static SYSCTL_NODE(_vfs, OID_AUTO, cache, CTLFLAG_RW, 0, "Name cache statistics");
154#define STATNODE(mode, name, var) \
155 SYSCTL_ULONG(_vfs_cache, OID_AUTO, name, mode, var, 0, "");
156STATNODE(CTLFLAG_RD, numneg, &numneg);
157STATNODE(CTLFLAG_RD, numcache, &numcache);
158static u_long numcalls; STATNODE(CTLFLAG_RD, numcalls, &numcalls);
159static u_long dothits; STATNODE(CTLFLAG_RD, dothits, &dothits);
160static u_long dotdothits; STATNODE(CTLFLAG_RD, dotdothits, &dotdothits);
161static u_long numchecks; STATNODE(CTLFLAG_RD, numchecks, &numchecks);
162static u_long nummiss; STATNODE(CTLFLAG_RD, nummiss, &nummiss);
163static u_long nummisszap; STATNODE(CTLFLAG_RD, nummisszap, &nummisszap);
164static u_long numposzaps; STATNODE(CTLFLAG_RD, numposzaps, &numposzaps);
165static u_long numposhits; STATNODE(CTLFLAG_RD, numposhits, &numposhits);
166static u_long numnegzaps; STATNODE(CTLFLAG_RD, numnegzaps, &numnegzaps);
167static u_long numneghits; STATNODE(CTLFLAG_RD, numneghits, &numneghits);
168static u_long numupgrades; STATNODE(CTLFLAG_RD, numupgrades, &numupgrades);
169
170SYSCTL_OPAQUE(_vfs_cache, OID_AUTO, nchstats, CTLFLAG_RD | CTLFLAG_MPSAFE,
171 &nchstats, sizeof(nchstats), "LU", "VFS cache effectiveness statistics");
172
173
174
175static void cache_zap(struct namecache *ncp);
176static int vn_vptocnp(struct vnode **vp, char **bp, char *buf, u_int *buflen);
177static int vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir,
178 char *buf, char **retbuf, u_int buflen);
179
180static MALLOC_DEFINE(M_VFSCACHE, "vfscache", "VFS name cache entries");
181
182/*
183 * Flags in namecache.nc_flag
184 */
185#define NCF_WHITE 1
186
37
38#include <sys/param.h>
39#include <sys/filedesc.h>
40#include <sys/fnv_hash.h>
41#include <sys/kernel.h>
42#include <sys/lock.h>
43#include <sys/malloc.h>
44#include <sys/mount.h>
45#include <sys/namei.h>
46#include <sys/proc.h>
47#include <sys/rwlock.h>
48#include <sys/syscallsubr.h>
49#include <sys/sysctl.h>
50#include <sys/sysproto.h>
51#include <sys/systm.h>
52#include <sys/vnode.h>
53
54#include <vm/uma.h>
55
56/*
57 * This structure describes the elements in the cache of recent
58 * names looked up by namei.
59 */
60
61struct namecache {
62 LIST_ENTRY(namecache) nc_hash; /* hash chain */
63 LIST_ENTRY(namecache) nc_src; /* source vnode list */
64 TAILQ_ENTRY(namecache) nc_dst; /* destination vnode list */
65 struct vnode *nc_dvp; /* vnode of parent of name */
66 struct vnode *nc_vp; /* vnode the name refers to */
67 u_char nc_flag; /* flag bits */
68 u_char nc_nlen; /* length of name */
69 char nc_name[0]; /* segment name */
70};
71
72/*
73 * Name caching works as follows:
74 *
75 * Names found by directory scans are retained in a cache
76 * for future reference. It is managed LRU, so frequently
77 * used names will hang around. Cache is indexed by hash value
78 * obtained from (vp, name) where vp refers to the directory
79 * containing name.
80 *
81 * If it is a "negative" entry, (i.e. for a name that is known NOT to
82 * exist) the vnode pointer will be NULL.
83 *
84 * Upon reaching the last segment of a path, if the reference
85 * is for DELETE, or NOCACHE is set (rewrite), and the
86 * name is located in the cache, it will be dropped.
87 */
88
89/*
90 * Structures associated with name cacheing.
91 */
92#define NCHHASH(hash) \
93 (&nchashtbl[(hash) & nchash])
94static LIST_HEAD(nchashhead, namecache) *nchashtbl; /* Hash Table */
95static TAILQ_HEAD(, namecache) ncneg; /* Hash Table */
96static u_long nchash; /* size of hash table */
97SYSCTL_ULONG(_debug, OID_AUTO, nchash, CTLFLAG_RD, &nchash, 0, "");
98static u_long ncnegfactor = 16; /* ratio of negative entries */
99SYSCTL_ULONG(_debug, OID_AUTO, ncnegfactor, CTLFLAG_RW, &ncnegfactor, 0, "");
100static u_long numneg; /* number of cache entries allocated */
101SYSCTL_ULONG(_debug, OID_AUTO, numneg, CTLFLAG_RD, &numneg, 0, "");
102static u_long numcache; /* number of cache entries allocated */
103SYSCTL_ULONG(_debug, OID_AUTO, numcache, CTLFLAG_RD, &numcache, 0, "");
104static u_long numcachehv; /* number of cache entries with vnodes held */
105SYSCTL_ULONG(_debug, OID_AUTO, numcachehv, CTLFLAG_RD, &numcachehv, 0, "");
106#if 0
107static u_long numcachepl; /* number of cache purge for leaf entries */
108SYSCTL_ULONG(_debug, OID_AUTO, numcachepl, CTLFLAG_RD, &numcachepl, 0, "");
109#endif
110struct nchstats nchstats; /* cache effectiveness statistics */
111
112static struct rwlock cache_lock;
113RW_SYSINIT(vfscache, &cache_lock, "Name Cache");
114
115#define CACHE_UPGRADE_LOCK() rw_try_upgrade(&cache_lock)
116#define CACHE_RLOCK() rw_rlock(&cache_lock)
117#define CACHE_RUNLOCK() rw_runlock(&cache_lock)
118#define CACHE_WLOCK() rw_wlock(&cache_lock)
119#define CACHE_WUNLOCK() rw_wunlock(&cache_lock)
120
121/*
122 * UMA zones for the VFS cache.
123 *
124 * The small cache is used for entries with short names, which are the
125 * most common. The large cache is used for entries which are too big to
126 * fit in the small cache.
127 */
128static uma_zone_t cache_zone_small;
129static uma_zone_t cache_zone_large;
130
131#define CACHE_PATH_CUTOFF 32
132#define CACHE_ZONE_SMALL (sizeof(struct namecache) + CACHE_PATH_CUTOFF)
133#define CACHE_ZONE_LARGE (sizeof(struct namecache) + NAME_MAX)
134
135#define cache_alloc(len) uma_zalloc(((len) <= CACHE_PATH_CUTOFF) ? \
136 cache_zone_small : cache_zone_large, M_WAITOK)
137#define cache_free(ncp) do { \
138 if (ncp != NULL) \
139 uma_zfree(((ncp)->nc_nlen <= CACHE_PATH_CUTOFF) ? \
140 cache_zone_small : cache_zone_large, (ncp)); \
141} while (0)
142
143static int doingcache = 1; /* 1 => enable the cache */
144SYSCTL_INT(_debug, OID_AUTO, vfscache, CTLFLAG_RW, &doingcache, 0, "");
145
146/* Export size information to userland */
147SYSCTL_INT(_debug_sizeof, OID_AUTO, namecache, CTLFLAG_RD, 0,
148 sizeof(struct namecache), "");
149
150/*
151 * The new name cache statistics
152 */
153static SYSCTL_NODE(_vfs, OID_AUTO, cache, CTLFLAG_RW, 0, "Name cache statistics");
154#define STATNODE(mode, name, var) \
155 SYSCTL_ULONG(_vfs_cache, OID_AUTO, name, mode, var, 0, "");
156STATNODE(CTLFLAG_RD, numneg, &numneg);
157STATNODE(CTLFLAG_RD, numcache, &numcache);
158static u_long numcalls; STATNODE(CTLFLAG_RD, numcalls, &numcalls);
159static u_long dothits; STATNODE(CTLFLAG_RD, dothits, &dothits);
160static u_long dotdothits; STATNODE(CTLFLAG_RD, dotdothits, &dotdothits);
161static u_long numchecks; STATNODE(CTLFLAG_RD, numchecks, &numchecks);
162static u_long nummiss; STATNODE(CTLFLAG_RD, nummiss, &nummiss);
163static u_long nummisszap; STATNODE(CTLFLAG_RD, nummisszap, &nummisszap);
164static u_long numposzaps; STATNODE(CTLFLAG_RD, numposzaps, &numposzaps);
165static u_long numposhits; STATNODE(CTLFLAG_RD, numposhits, &numposhits);
166static u_long numnegzaps; STATNODE(CTLFLAG_RD, numnegzaps, &numnegzaps);
167static u_long numneghits; STATNODE(CTLFLAG_RD, numneghits, &numneghits);
168static u_long numupgrades; STATNODE(CTLFLAG_RD, numupgrades, &numupgrades);
169
170SYSCTL_OPAQUE(_vfs_cache, OID_AUTO, nchstats, CTLFLAG_RD | CTLFLAG_MPSAFE,
171 &nchstats, sizeof(nchstats), "LU", "VFS cache effectiveness statistics");
172
173
174
175static void cache_zap(struct namecache *ncp);
176static int vn_vptocnp(struct vnode **vp, char **bp, char *buf, u_int *buflen);
177static int vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir,
178 char *buf, char **retbuf, u_int buflen);
179
180static MALLOC_DEFINE(M_VFSCACHE, "vfscache", "VFS name cache entries");
181
182/*
183 * Flags in namecache.nc_flag
184 */
185#define NCF_WHITE 1
186
187#ifdef DIAGNOSTIC
187/*
188 * Grab an atomic snapshot of the name cache hash chain lengths
189 */
190SYSCTL_NODE(_debug, OID_AUTO, hashstat, CTLFLAG_RW, NULL, "hash table stats");
191
192static int
193sysctl_debug_hashstat_rawnchash(SYSCTL_HANDLER_ARGS)
194{
195 int error;
196 struct nchashhead *ncpp;
197 struct namecache *ncp;
198 int n_nchash;
199 int count;
200
201 n_nchash = nchash + 1; /* nchash is max index, not count */
202 if (!req->oldptr)
203 return SYSCTL_OUT(req, 0, n_nchash * sizeof(int));
204
205 /* Scan hash tables for applicable entries */
206 for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) {
207 CACHE_RLOCK();
208 count = 0;
209 LIST_FOREACH(ncp, ncpp, nc_hash) {
210 count++;
211 }
212 CACHE_RUNLOCK();
213 error = SYSCTL_OUT(req, &count, sizeof(count));
214 if (error)
215 return (error);
216 }
217 return (0);
218}
219SYSCTL_PROC(_debug_hashstat, OID_AUTO, rawnchash, CTLTYPE_INT|CTLFLAG_RD|
220 CTLFLAG_MPSAFE, 0, 0, sysctl_debug_hashstat_rawnchash, "S,int",
221 "nchash chain lengths");
222
223static int
224sysctl_debug_hashstat_nchash(SYSCTL_HANDLER_ARGS)
225{
226 int error;
227 struct nchashhead *ncpp;
228 struct namecache *ncp;
229 int n_nchash;
230 int count, maxlength, used, pct;
231
232 if (!req->oldptr)
233 return SYSCTL_OUT(req, 0, 4 * sizeof(int));
234
235 n_nchash = nchash + 1; /* nchash is max index, not count */
236 used = 0;
237 maxlength = 0;
238
239 /* Scan hash tables for applicable entries */
240 for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) {
241 count = 0;
242 CACHE_RLOCK();
243 LIST_FOREACH(ncp, ncpp, nc_hash) {
244 count++;
245 }
246 CACHE_RUNLOCK();
247 if (count)
248 used++;
249 if (maxlength < count)
250 maxlength = count;
251 }
252 n_nchash = nchash + 1;
253 pct = (used * 100 * 100) / n_nchash;
254 error = SYSCTL_OUT(req, &n_nchash, sizeof(n_nchash));
255 if (error)
256 return (error);
257 error = SYSCTL_OUT(req, &used, sizeof(used));
258 if (error)
259 return (error);
260 error = SYSCTL_OUT(req, &maxlength, sizeof(maxlength));
261 if (error)
262 return (error);
263 error = SYSCTL_OUT(req, &pct, sizeof(pct));
264 if (error)
265 return (error);
266 return (0);
267}
268SYSCTL_PROC(_debug_hashstat, OID_AUTO, nchash, CTLTYPE_INT|CTLFLAG_RD|
269 CTLFLAG_MPSAFE, 0, 0, sysctl_debug_hashstat_nchash, "I",
270 "nchash chain lengths");
188/*
189 * Grab an atomic snapshot of the name cache hash chain lengths
190 */
191SYSCTL_NODE(_debug, OID_AUTO, hashstat, CTLFLAG_RW, NULL, "hash table stats");
192
193static int
194sysctl_debug_hashstat_rawnchash(SYSCTL_HANDLER_ARGS)
195{
196 int error;
197 struct nchashhead *ncpp;
198 struct namecache *ncp;
199 int n_nchash;
200 int count;
201
202 n_nchash = nchash + 1; /* nchash is max index, not count */
203 if (!req->oldptr)
204 return SYSCTL_OUT(req, 0, n_nchash * sizeof(int));
205
206 /* Scan hash tables for applicable entries */
207 for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) {
208 CACHE_RLOCK();
209 count = 0;
210 LIST_FOREACH(ncp, ncpp, nc_hash) {
211 count++;
212 }
213 CACHE_RUNLOCK();
214 error = SYSCTL_OUT(req, &count, sizeof(count));
215 if (error)
216 return (error);
217 }
218 return (0);
219}
220SYSCTL_PROC(_debug_hashstat, OID_AUTO, rawnchash, CTLTYPE_INT|CTLFLAG_RD|
221 CTLFLAG_MPSAFE, 0, 0, sysctl_debug_hashstat_rawnchash, "S,int",
222 "nchash chain lengths");
223
224static int
225sysctl_debug_hashstat_nchash(SYSCTL_HANDLER_ARGS)
226{
227 int error;
228 struct nchashhead *ncpp;
229 struct namecache *ncp;
230 int n_nchash;
231 int count, maxlength, used, pct;
232
233 if (!req->oldptr)
234 return SYSCTL_OUT(req, 0, 4 * sizeof(int));
235
236 n_nchash = nchash + 1; /* nchash is max index, not count */
237 used = 0;
238 maxlength = 0;
239
240 /* Scan hash tables for applicable entries */
241 for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) {
242 count = 0;
243 CACHE_RLOCK();
244 LIST_FOREACH(ncp, ncpp, nc_hash) {
245 count++;
246 }
247 CACHE_RUNLOCK();
248 if (count)
249 used++;
250 if (maxlength < count)
251 maxlength = count;
252 }
253 n_nchash = nchash + 1;
254 pct = (used * 100 * 100) / n_nchash;
255 error = SYSCTL_OUT(req, &n_nchash, sizeof(n_nchash));
256 if (error)
257 return (error);
258 error = SYSCTL_OUT(req, &used, sizeof(used));
259 if (error)
260 return (error);
261 error = SYSCTL_OUT(req, &maxlength, sizeof(maxlength));
262 if (error)
263 return (error);
264 error = SYSCTL_OUT(req, &pct, sizeof(pct));
265 if (error)
266 return (error);
267 return (0);
268}
269SYSCTL_PROC(_debug_hashstat, OID_AUTO, nchash, CTLTYPE_INT|CTLFLAG_RD|
270 CTLFLAG_MPSAFE, 0, 0, sysctl_debug_hashstat_nchash, "I",
271 "nchash chain lengths");
272#endif
271
272/*
273 * cache_zap():
274 *
275 * Removes a namecache entry from cache, whether it contains an actual
276 * pointer to a vnode or if it is just a negative cache entry.
277 */
278static void
279cache_zap(ncp)
280 struct namecache *ncp;
281{
282 struct vnode *vp;
283
284 rw_assert(&cache_lock, RA_WLOCKED);
285 CTR2(KTR_VFS, "cache_zap(%p) vp %p", ncp, ncp->nc_vp);
286 vp = NULL;
287 LIST_REMOVE(ncp, nc_hash);
288 LIST_REMOVE(ncp, nc_src);
289 if (LIST_EMPTY(&ncp->nc_dvp->v_cache_src)) {
290 vp = ncp->nc_dvp;
291 numcachehv--;
292 }
293 if (ncp->nc_vp) {
294 TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_dst);
295 ncp->nc_vp->v_dd = NULL;
296 } else {
297 TAILQ_REMOVE(&ncneg, ncp, nc_dst);
298 numneg--;
299 }
300 numcache--;
301 cache_free(ncp);
302 if (vp)
303 vdrop(vp);
304}
305
306/*
307 * Lookup an entry in the cache
308 *
309 * Lookup is called with dvp pointing to the directory to search,
310 * cnp pointing to the name of the entry being sought. If the lookup
311 * succeeds, the vnode is returned in *vpp, and a status of -1 is
312 * returned. If the lookup determines that the name does not exist
313 * (negative cacheing), a status of ENOENT is returned. If the lookup
314 * fails, a status of zero is returned. If the directory vnode is
315 * recycled out from under us due to a forced unmount, a status of
316 * EBADF is returned.
317 *
318 * vpp is locked and ref'd on return. If we're looking up DOTDOT, dvp is
319 * unlocked. If we're looking up . an extra ref is taken, but the lock is
320 * not recursively acquired.
321 */
322
323int
324cache_lookup(dvp, vpp, cnp)
325 struct vnode *dvp;
326 struct vnode **vpp;
327 struct componentname *cnp;
328{
329 struct namecache *ncp;
330 u_int32_t hash;
331 int error, ltype, wlocked;
332
333 if (!doingcache) {
334 cnp->cn_flags &= ~MAKEENTRY;
335 return (0);
336 }
337retry:
338 CACHE_RLOCK();
339 wlocked = 0;
340 numcalls++;
341 error = 0;
342
343retry_wlocked:
344 if (cnp->cn_nameptr[0] == '.') {
345 if (cnp->cn_namelen == 1) {
346 *vpp = dvp;
347 CTR2(KTR_VFS, "cache_lookup(%p, %s) found via .",
348 dvp, cnp->cn_nameptr);
349 dothits++;
350 goto success;
351 }
352 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') {
353 dotdothits++;
354 if (dvp->v_dd == NULL ||
355 (cnp->cn_flags & MAKEENTRY) == 0) {
356 goto unlock;
357 }
358 *vpp = dvp->v_dd;
359 CTR3(KTR_VFS, "cache_lookup(%p, %s) found %p via ..",
360 dvp, cnp->cn_nameptr, *vpp);
361 goto success;
362 }
363 }
364
365 hash = fnv_32_buf(cnp->cn_nameptr, cnp->cn_namelen, FNV1_32_INIT);
366 hash = fnv_32_buf(&dvp, sizeof(dvp), hash);
367 LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) {
368 numchecks++;
369 if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen &&
370 !bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen))
371 break;
372 }
373
374 /* We failed to find an entry */
375 if (ncp == NULL) {
376 if ((cnp->cn_flags & MAKEENTRY) == 0) {
377 nummisszap++;
378 } else {
379 nummiss++;
380 }
381 nchstats.ncs_miss++;
382 goto unlock;
383 }
384
385 /* We don't want to have an entry, so dump it */
386 if ((cnp->cn_flags & MAKEENTRY) == 0) {
387 numposzaps++;
388 nchstats.ncs_badhits++;
389 if (!wlocked && !CACHE_UPGRADE_LOCK())
390 goto wlock;
391 cache_zap(ncp);
392 CACHE_WUNLOCK();
393 return (0);
394 }
395
396 /* We found a "positive" match, return the vnode */
397 if (ncp->nc_vp) {
398 numposhits++;
399 nchstats.ncs_goodhits++;
400 *vpp = ncp->nc_vp;
401 CTR4(KTR_VFS, "cache_lookup(%p, %s) found %p via ncp %p",
402 dvp, cnp->cn_nameptr, *vpp, ncp);
403 goto success;
404 }
405
406 /* We found a negative match, and want to create it, so purge */
407 if (cnp->cn_nameiop == CREATE) {
408 numnegzaps++;
409 nchstats.ncs_badhits++;
410 if (!wlocked && !CACHE_UPGRADE_LOCK())
411 goto wlock;
412 cache_zap(ncp);
413 CACHE_WUNLOCK();
414 return (0);
415 }
416
417 if (!wlocked && !CACHE_UPGRADE_LOCK())
418 goto wlock;
419 numneghits++;
420 /*
421 * We found a "negative" match, so we shift it to the end of
422 * the "negative" cache entries queue to satisfy LRU. Also,
423 * check to see if the entry is a whiteout; indicate this to
424 * the componentname, if so.
425 */
426 TAILQ_REMOVE(&ncneg, ncp, nc_dst);
427 TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst);
428 nchstats.ncs_neghits++;
429 if (ncp->nc_flag & NCF_WHITE)
430 cnp->cn_flags |= ISWHITEOUT;
431 CACHE_WUNLOCK();
432 return (ENOENT);
433
434wlock:
435 /*
436 * We need to update the cache after our lookup, so upgrade to
437 * a write lock and retry the operation.
438 */
439 CACHE_RUNLOCK();
440 CACHE_WLOCK();
441 numupgrades++;
442 wlocked = 1;
443 goto retry_wlocked;
444
445success:
446 /*
447 * On success we return a locked and ref'd vnode as per the lookup
448 * protocol.
449 */
450 if (dvp == *vpp) { /* lookup on "." */
451 VREF(*vpp);
452 if (wlocked)
453 CACHE_WUNLOCK();
454 else
455 CACHE_RUNLOCK();
456 /*
457 * When we lookup "." we still can be asked to lock it
458 * differently...
459 */
460 ltype = cnp->cn_lkflags & LK_TYPE_MASK;
461 if (ltype != VOP_ISLOCKED(*vpp)) {
462 if (ltype == LK_EXCLUSIVE) {
463 vn_lock(*vpp, LK_UPGRADE | LK_RETRY);
464 if ((*vpp)->v_iflag & VI_DOOMED) {
465 /* forced unmount */
466 vrele(*vpp);
467 *vpp = NULL;
468 return (EBADF);
469 }
470 } else
471 vn_lock(*vpp, LK_DOWNGRADE | LK_RETRY);
472 }
473 return (-1);
474 }
475 ltype = 0; /* silence gcc warning */
476 if (cnp->cn_flags & ISDOTDOT) {
477 ltype = VOP_ISLOCKED(dvp);
478 VOP_UNLOCK(dvp, 0);
479 }
480 VI_LOCK(*vpp);
481 if (wlocked)
482 CACHE_WUNLOCK();
483 else
484 CACHE_RUNLOCK();
485 error = vget(*vpp, cnp->cn_lkflags | LK_INTERLOCK, cnp->cn_thread);
486 if (cnp->cn_flags & ISDOTDOT)
487 vn_lock(dvp, ltype | LK_RETRY);
488 if (error) {
489 *vpp = NULL;
490 goto retry;
491 }
492 if ((cnp->cn_flags & ISLASTCN) &&
493 (cnp->cn_lkflags & LK_TYPE_MASK) == LK_EXCLUSIVE) {
494 ASSERT_VOP_ELOCKED(*vpp, "cache_lookup");
495 }
496 return (-1);
497
498unlock:
499 if (wlocked)
500 CACHE_WUNLOCK();
501 else
502 CACHE_RUNLOCK();
503 return (0);
504}
505
506/*
507 * Add an entry to the cache.
508 */
509void
510cache_enter(dvp, vp, cnp)
511 struct vnode *dvp;
512 struct vnode *vp;
513 struct componentname *cnp;
514{
515 struct namecache *ncp, *n2;
516 struct nchashhead *ncpp;
517 u_int32_t hash;
518 int hold;
519 int zap;
520 int len;
521
522 CTR3(KTR_VFS, "cache_enter(%p, %p, %s)", dvp, vp, cnp->cn_nameptr);
523 VNASSERT(vp == NULL || (vp->v_iflag & VI_DOOMED) == 0, vp,
524 ("cahe_enter: Adding a doomed vnode"));
525
526 if (!doingcache)
527 return;
528
529 /*
530 * Avoid blowout in namecache entries.
531 */
532 if (numcache >= desiredvnodes * 2)
533 return;
534
535 if (cnp->cn_nameptr[0] == '.') {
536 if (cnp->cn_namelen == 1) {
537 return;
538 }
539 /*
540 * For dotdot lookups only cache the v_dd pointer if the
541 * directory has a link back to its parent via v_cache_dst.
542 * Without this an unlinked directory would keep a soft
543 * reference to its parent which could not be NULLd at
544 * cache_purge() time.
545 */
546 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') {
547 CACHE_WLOCK();
548 if (!TAILQ_EMPTY(&dvp->v_cache_dst))
549 dvp->v_dd = vp;
550 CACHE_WUNLOCK();
551 return;
552 }
553 }
554
555 hold = 0;
556 zap = 0;
557
558 /*
559 * Calculate the hash key and setup as much of the new
560 * namecache entry as possible before acquiring the lock.
561 */
562 ncp = cache_alloc(cnp->cn_namelen);
563 ncp->nc_vp = vp;
564 ncp->nc_dvp = dvp;
565 len = ncp->nc_nlen = cnp->cn_namelen;
566 hash = fnv_32_buf(cnp->cn_nameptr, len, FNV1_32_INIT);
567 bcopy(cnp->cn_nameptr, ncp->nc_name, len);
568 hash = fnv_32_buf(&dvp, sizeof(dvp), hash);
569 CACHE_WLOCK();
570
571 /*
572 * See if this vnode or negative entry is already in the cache
573 * with this name. This can happen with concurrent lookups of
574 * the same path name.
575 */
576 ncpp = NCHHASH(hash);
577 LIST_FOREACH(n2, ncpp, nc_hash) {
578 if (n2->nc_dvp == dvp &&
579 n2->nc_nlen == cnp->cn_namelen &&
580 !bcmp(n2->nc_name, cnp->cn_nameptr, n2->nc_nlen)) {
581 CACHE_WUNLOCK();
582 cache_free(ncp);
583 return;
584 }
585 }
586
587 numcache++;
588 if (!vp) {
589 numneg++;
590 ncp->nc_flag = cnp->cn_flags & ISWHITEOUT ? NCF_WHITE : 0;
591 } else if (vp->v_type == VDIR) {
592 vp->v_dd = dvp;
593 } else {
594 vp->v_dd = NULL;
595 }
596
597 /*
598 * Insert the new namecache entry into the appropriate chain
599 * within the cache entries table.
600 */
601 LIST_INSERT_HEAD(ncpp, ncp, nc_hash);
602 if (LIST_EMPTY(&dvp->v_cache_src)) {
603 hold = 1;
604 numcachehv++;
605 }
606 LIST_INSERT_HEAD(&dvp->v_cache_src, ncp, nc_src);
607 /*
608 * If the entry is "negative", we place it into the
609 * "negative" cache queue, otherwise, we place it into the
610 * destination vnode's cache entries queue.
611 */
612 if (vp) {
613 TAILQ_INSERT_HEAD(&vp->v_cache_dst, ncp, nc_dst);
614 } else {
615 TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst);
616 }
617 if (numneg * ncnegfactor > numcache) {
618 ncp = TAILQ_FIRST(&ncneg);
619 zap = 1;
620 }
621 if (hold)
622 vhold(dvp);
623 if (zap)
624 cache_zap(ncp);
625 CACHE_WUNLOCK();
626}
627
628/*
629 * Name cache initialization, from vfs_init() when we are booting
630 */
631static void
632nchinit(void *dummy __unused)
633{
634
635 TAILQ_INIT(&ncneg);
636
637 cache_zone_small = uma_zcreate("S VFS Cache", CACHE_ZONE_SMALL, NULL,
638 NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT);
639 cache_zone_large = uma_zcreate("L VFS Cache", CACHE_ZONE_LARGE, NULL,
640 NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT);
641
642 nchashtbl = hashinit(desiredvnodes * 2, M_VFSCACHE, &nchash);
643}
644SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nchinit, NULL);
645
646
647/*
648 * Invalidate all entries to a particular vnode.
649 */
650void
651cache_purge(vp)
652 struct vnode *vp;
653{
654
655 CTR1(KTR_VFS, "cache_purge(%p)", vp);
656 CACHE_WLOCK();
657 while (!LIST_EMPTY(&vp->v_cache_src))
658 cache_zap(LIST_FIRST(&vp->v_cache_src));
659 while (!TAILQ_EMPTY(&vp->v_cache_dst))
660 cache_zap(TAILQ_FIRST(&vp->v_cache_dst));
661 vp->v_dd = NULL;
662 CACHE_WUNLOCK();
663}
664
665/*
666 * Invalidate all negative entries for a particular directory vnode.
667 */
668void
669cache_purge_negative(vp)
670 struct vnode *vp;
671{
672 struct namecache *cp, *ncp;
673
674 CTR1(KTR_VFS, "cache_purge_negative(%p)", vp);
675 CACHE_WLOCK();
676 LIST_FOREACH_SAFE(cp, &vp->v_cache_src, nc_src, ncp) {
677 if (cp->nc_vp == NULL)
678 cache_zap(cp);
679 }
680 CACHE_WUNLOCK();
681}
682
683/*
684 * Flush all entries referencing a particular filesystem.
685 */
686void
687cache_purgevfs(mp)
688 struct mount *mp;
689{
690 struct nchashhead *ncpp;
691 struct namecache *ncp, *nnp;
692
693 /* Scan hash tables for applicable entries */
694 CACHE_WLOCK();
695 for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) {
696 LIST_FOREACH_SAFE(ncp, ncpp, nc_hash, nnp) {
697 if (ncp->nc_dvp->v_mount == mp)
698 cache_zap(ncp);
699 }
700 }
701 CACHE_WUNLOCK();
702}
703
704/*
705 * Perform canonical checks and cache lookup and pass on to filesystem
706 * through the vop_cachedlookup only if needed.
707 */
708
709int
710vfs_cache_lookup(ap)
711 struct vop_lookup_args /* {
712 struct vnode *a_dvp;
713 struct vnode **a_vpp;
714 struct componentname *a_cnp;
715 } */ *ap;
716{
717 struct vnode *dvp;
718 int error;
719 struct vnode **vpp = ap->a_vpp;
720 struct componentname *cnp = ap->a_cnp;
721 struct ucred *cred = cnp->cn_cred;
722 int flags = cnp->cn_flags;
723 struct thread *td = cnp->cn_thread;
724
725 *vpp = NULL;
726 dvp = ap->a_dvp;
727
728 if (dvp->v_type != VDIR)
729 return (ENOTDIR);
730
731 if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
732 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
733 return (EROFS);
734
735 error = VOP_ACCESS(dvp, VEXEC, cred, td);
736 if (error)
737 return (error);
738
739 error = cache_lookup(dvp, vpp, cnp);
740 if (error == 0)
741 return (VOP_CACHEDLOOKUP(dvp, vpp, cnp));
742 if (error == -1)
743 return (0);
744 return (error);
745}
746
747
748#ifndef _SYS_SYSPROTO_H_
749struct __getcwd_args {
750 u_char *buf;
751 u_int buflen;
752};
753#endif
754
755/*
756 * XXX All of these sysctls would probably be more productive dead.
757 */
758static int disablecwd;
759SYSCTL_INT(_debug, OID_AUTO, disablecwd, CTLFLAG_RW, &disablecwd, 0,
760 "Disable the getcwd syscall");
761
762/* Implementation of the getcwd syscall. */
763int
764__getcwd(td, uap)
765 struct thread *td;
766 struct __getcwd_args *uap;
767{
768
769 return (kern___getcwd(td, uap->buf, UIO_USERSPACE, uap->buflen));
770}
771
772int
773kern___getcwd(struct thread *td, u_char *buf, enum uio_seg bufseg, u_int buflen)
774{
775 char *bp, *tmpbuf;
776 struct filedesc *fdp;
777 struct vnode *cdir, *rdir;
778 int error, vfslocked;
779
780 if (disablecwd)
781 return (ENODEV);
782 if (buflen < 2)
783 return (EINVAL);
784 if (buflen > MAXPATHLEN)
785 buflen = MAXPATHLEN;
786
787 tmpbuf = malloc(buflen, M_TEMP, M_WAITOK);
788 fdp = td->td_proc->p_fd;
789 FILEDESC_SLOCK(fdp);
790 cdir = fdp->fd_cdir;
791 VREF(cdir);
792 rdir = fdp->fd_rdir;
793 VREF(rdir);
794 FILEDESC_SUNLOCK(fdp);
795 error = vn_fullpath1(td, cdir, rdir, tmpbuf, &bp, buflen);
796 vfslocked = VFS_LOCK_GIANT(rdir->v_mount);
797 vrele(rdir);
798 VFS_UNLOCK_GIANT(vfslocked);
799 vfslocked = VFS_LOCK_GIANT(cdir->v_mount);
800 vrele(cdir);
801 VFS_UNLOCK_GIANT(vfslocked);
802
803 if (!error) {
804 if (bufseg == UIO_SYSSPACE)
805 bcopy(bp, buf, strlen(bp) + 1);
806 else
807 error = copyout(bp, buf, strlen(bp) + 1);
808 }
809 free(tmpbuf, M_TEMP);
810 return (error);
811}
812
813/*
814 * Thus begins the fullpath magic.
815 */
816
817#undef STATNODE
818#define STATNODE(name) \
819 static u_int name; \
820 SYSCTL_UINT(_vfs_cache, OID_AUTO, name, CTLFLAG_RD, &name, 0, "")
821
822static int disablefullpath;
823SYSCTL_INT(_debug, OID_AUTO, disablefullpath, CTLFLAG_RW, &disablefullpath, 0,
824 "Disable the vn_fullpath function");
825
826/* These count for kern___getcwd(), too. */
827STATNODE(numfullpathcalls);
828STATNODE(numfullpathfail1);
829STATNODE(numfullpathfail2);
830STATNODE(numfullpathfail4);
831STATNODE(numfullpathfound);
832
833/*
834 * Retrieve the full filesystem path that correspond to a vnode from the name
835 * cache (if available)
836 */
837int
838vn_fullpath(struct thread *td, struct vnode *vn, char **retbuf, char **freebuf)
839{
840 char *buf;
841 struct filedesc *fdp;
842 struct vnode *rdir;
843 int error, vfslocked;
844
845 if (disablefullpath)
846 return (ENODEV);
847 if (vn == NULL)
848 return (EINVAL);
849
850 buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
851 fdp = td->td_proc->p_fd;
852 FILEDESC_SLOCK(fdp);
853 rdir = fdp->fd_rdir;
854 VREF(rdir);
855 FILEDESC_SUNLOCK(fdp);
856 error = vn_fullpath1(td, vn, rdir, buf, retbuf, MAXPATHLEN);
857 vfslocked = VFS_LOCK_GIANT(rdir->v_mount);
858 vrele(rdir);
859 VFS_UNLOCK_GIANT(vfslocked);
860
861 if (!error)
862 *freebuf = buf;
863 else
864 free(buf, M_TEMP);
865 return (error);
866}
867
868/*
869 * This function is similar to vn_fullpath, but it attempts to lookup the
870 * pathname relative to the global root mount point. This is required for the
871 * auditing sub-system, as audited pathnames must be absolute, relative to the
872 * global root mount point.
873 */
874int
875vn_fullpath_global(struct thread *td, struct vnode *vn,
876 char **retbuf, char **freebuf)
877{
878 char *buf;
879 int error;
880
881 if (disablefullpath)
882 return (ENODEV);
883 if (vn == NULL)
884 return (EINVAL);
885 buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
886 error = vn_fullpath1(td, vn, rootvnode, buf, retbuf, MAXPATHLEN);
887 if (!error)
888 *freebuf = buf;
889 else
890 free(buf, M_TEMP);
891 return (error);
892}
893
894static int
895vn_vptocnp(struct vnode **vp, char **bp, char *buf, u_int *buflen)
896{
897 struct vnode *dvp;
898 int error, vfslocked;
899
900 vhold(*vp);
901 CACHE_RUNLOCK();
902 vfslocked = VFS_LOCK_GIANT((*vp)->v_mount);
903 vn_lock(*vp, LK_SHARED | LK_RETRY);
904 error = VOP_VPTOCNP(*vp, &dvp, buf, buflen);
905 VOP_UNLOCK(*vp, 0);
906 vdrop(*vp);
907 VFS_UNLOCK_GIANT(vfslocked);
908 if (error) {
909 numfullpathfail2++;
910 return (error);
911 }
912 *bp = buf + *buflen;
913 *vp = dvp;
914 CACHE_RLOCK();
915 if ((*vp)->v_iflag & VI_DOOMED) {
916 /* forced unmount */
917 CACHE_RUNLOCK();
918 vdrop(*vp);
919 return (ENOENT);
920 }
921 vdrop(*vp);
922
923 return (0);
924}
925
926/*
927 * The magic behind kern___getcwd() and vn_fullpath().
928 */
929static int
930vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir,
931 char *buf, char **retbuf, u_int buflen)
932{
933 char *bp;
934 int error, i, slash_prefixed;
935 struct namecache *ncp;
936
937 buflen--;
938 bp = buf + buflen;
939 *bp = '\0';
940 error = 0;
941 slash_prefixed = 0;
942
943 CACHE_RLOCK();
944 numfullpathcalls++;
945 if (vp->v_type != VDIR) {
946 ncp = TAILQ_FIRST(&vp->v_cache_dst);
947 if (ncp != NULL) {
948 for (i = ncp->nc_nlen - 1; i >= 0 && bp > buf; i--)
949 *--bp = ncp->nc_name[i];
950 if (bp == buf) {
951 numfullpathfail4++;
952 CACHE_RUNLOCK();
953 return (ENOMEM);
954 }
955 vp = ncp->nc_dvp;
956 } else {
957 error = vn_vptocnp(&vp, &bp, buf, &buflen);
958 if (error) {
959 return (error);
960 }
961 }
962 *--bp = '/';
963 buflen--;
964 if (buflen < 0) {
965 numfullpathfail4++;
966 CACHE_RUNLOCK();
967 return (ENOMEM);
968 }
969 slash_prefixed = 1;
970 }
971 while (vp != rdir && vp != rootvnode) {
972 if (vp->v_vflag & VV_ROOT) {
973 if (vp->v_iflag & VI_DOOMED) { /* forced unmount */
974 CACHE_RUNLOCK();
975 error = EBADF;
976 break;
977 }
978 vp = vp->v_mount->mnt_vnodecovered;
979 continue;
980 }
981 if (vp->v_type != VDIR) {
982 numfullpathfail1++;
983 CACHE_RUNLOCK();
984 error = ENOTDIR;
985 break;
986 }
987 ncp = TAILQ_FIRST(&vp->v_cache_dst);
988 if (ncp != NULL) {
989 MPASS(vp->v_dd == NULL || ncp->nc_dvp == vp->v_dd);
990 buflen -= ncp->nc_nlen - 1;
991 for (i = ncp->nc_nlen - 1; i >= 0 && bp != buf; i--)
992 *--bp = ncp->nc_name[i];
993 if (bp == buf) {
994 numfullpathfail4++;
995 CACHE_RUNLOCK();
996 error = ENOMEM;
997 break;
998 }
999 vp = ncp->nc_dvp;
1000 } else {
1001 error = vn_vptocnp(&vp, &bp, buf, &buflen);
1002 if (error) {
1003 break;
1004 }
1005 }
1006 *--bp = '/';
1007 buflen--;
1008 if (buflen < 0) {
1009 numfullpathfail4++;
1010 CACHE_RUNLOCK();
1011 error = ENOMEM;
1012 break;
1013 }
1014 slash_prefixed = 1;
1015 }
1016 if (error)
1017 return (error);
1018 if (!slash_prefixed) {
1019 if (bp == buf) {
1020 numfullpathfail4++;
1021 CACHE_RUNLOCK();
1022 return (ENOMEM);
1023 } else {
1024 *--bp = '/';
1025 }
1026 }
1027 numfullpathfound++;
1028 CACHE_RUNLOCK();
1029
1030 *retbuf = bp;
1031 return (0);
1032}
1033
1034int
1035vn_commname(struct vnode *vp, char *buf, u_int buflen)
1036{
1037 struct namecache *ncp;
1038 int l;
1039
1040 CACHE_RLOCK();
1041 ncp = TAILQ_FIRST(&vp->v_cache_dst);
1042 if (!ncp) {
1043 CACHE_RUNLOCK();
1044 return (ENOENT);
1045 }
1046 l = min(ncp->nc_nlen, buflen - 1);
1047 memcpy(buf, ncp->nc_name, l);
1048 CACHE_RUNLOCK();
1049 buf[l] = '\0';
1050 return (0);
1051}
273
274/*
275 * cache_zap():
276 *
277 * Removes a namecache entry from cache, whether it contains an actual
278 * pointer to a vnode or if it is just a negative cache entry.
279 */
280static void
281cache_zap(ncp)
282 struct namecache *ncp;
283{
284 struct vnode *vp;
285
286 rw_assert(&cache_lock, RA_WLOCKED);
287 CTR2(KTR_VFS, "cache_zap(%p) vp %p", ncp, ncp->nc_vp);
288 vp = NULL;
289 LIST_REMOVE(ncp, nc_hash);
290 LIST_REMOVE(ncp, nc_src);
291 if (LIST_EMPTY(&ncp->nc_dvp->v_cache_src)) {
292 vp = ncp->nc_dvp;
293 numcachehv--;
294 }
295 if (ncp->nc_vp) {
296 TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_dst);
297 ncp->nc_vp->v_dd = NULL;
298 } else {
299 TAILQ_REMOVE(&ncneg, ncp, nc_dst);
300 numneg--;
301 }
302 numcache--;
303 cache_free(ncp);
304 if (vp)
305 vdrop(vp);
306}
307
308/*
309 * Lookup an entry in the cache
310 *
311 * Lookup is called with dvp pointing to the directory to search,
312 * cnp pointing to the name of the entry being sought. If the lookup
313 * succeeds, the vnode is returned in *vpp, and a status of -1 is
314 * returned. If the lookup determines that the name does not exist
315 * (negative cacheing), a status of ENOENT is returned. If the lookup
316 * fails, a status of zero is returned. If the directory vnode is
317 * recycled out from under us due to a forced unmount, a status of
318 * EBADF is returned.
319 *
320 * vpp is locked and ref'd on return. If we're looking up DOTDOT, dvp is
321 * unlocked. If we're looking up . an extra ref is taken, but the lock is
322 * not recursively acquired.
323 */
324
325int
326cache_lookup(dvp, vpp, cnp)
327 struct vnode *dvp;
328 struct vnode **vpp;
329 struct componentname *cnp;
330{
331 struct namecache *ncp;
332 u_int32_t hash;
333 int error, ltype, wlocked;
334
335 if (!doingcache) {
336 cnp->cn_flags &= ~MAKEENTRY;
337 return (0);
338 }
339retry:
340 CACHE_RLOCK();
341 wlocked = 0;
342 numcalls++;
343 error = 0;
344
345retry_wlocked:
346 if (cnp->cn_nameptr[0] == '.') {
347 if (cnp->cn_namelen == 1) {
348 *vpp = dvp;
349 CTR2(KTR_VFS, "cache_lookup(%p, %s) found via .",
350 dvp, cnp->cn_nameptr);
351 dothits++;
352 goto success;
353 }
354 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') {
355 dotdothits++;
356 if (dvp->v_dd == NULL ||
357 (cnp->cn_flags & MAKEENTRY) == 0) {
358 goto unlock;
359 }
360 *vpp = dvp->v_dd;
361 CTR3(KTR_VFS, "cache_lookup(%p, %s) found %p via ..",
362 dvp, cnp->cn_nameptr, *vpp);
363 goto success;
364 }
365 }
366
367 hash = fnv_32_buf(cnp->cn_nameptr, cnp->cn_namelen, FNV1_32_INIT);
368 hash = fnv_32_buf(&dvp, sizeof(dvp), hash);
369 LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) {
370 numchecks++;
371 if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen &&
372 !bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen))
373 break;
374 }
375
376 /* We failed to find an entry */
377 if (ncp == NULL) {
378 if ((cnp->cn_flags & MAKEENTRY) == 0) {
379 nummisszap++;
380 } else {
381 nummiss++;
382 }
383 nchstats.ncs_miss++;
384 goto unlock;
385 }
386
387 /* We don't want to have an entry, so dump it */
388 if ((cnp->cn_flags & MAKEENTRY) == 0) {
389 numposzaps++;
390 nchstats.ncs_badhits++;
391 if (!wlocked && !CACHE_UPGRADE_LOCK())
392 goto wlock;
393 cache_zap(ncp);
394 CACHE_WUNLOCK();
395 return (0);
396 }
397
398 /* We found a "positive" match, return the vnode */
399 if (ncp->nc_vp) {
400 numposhits++;
401 nchstats.ncs_goodhits++;
402 *vpp = ncp->nc_vp;
403 CTR4(KTR_VFS, "cache_lookup(%p, %s) found %p via ncp %p",
404 dvp, cnp->cn_nameptr, *vpp, ncp);
405 goto success;
406 }
407
408 /* We found a negative match, and want to create it, so purge */
409 if (cnp->cn_nameiop == CREATE) {
410 numnegzaps++;
411 nchstats.ncs_badhits++;
412 if (!wlocked && !CACHE_UPGRADE_LOCK())
413 goto wlock;
414 cache_zap(ncp);
415 CACHE_WUNLOCK();
416 return (0);
417 }
418
419 if (!wlocked && !CACHE_UPGRADE_LOCK())
420 goto wlock;
421 numneghits++;
422 /*
423 * We found a "negative" match, so we shift it to the end of
424 * the "negative" cache entries queue to satisfy LRU. Also,
425 * check to see if the entry is a whiteout; indicate this to
426 * the componentname, if so.
427 */
428 TAILQ_REMOVE(&ncneg, ncp, nc_dst);
429 TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst);
430 nchstats.ncs_neghits++;
431 if (ncp->nc_flag & NCF_WHITE)
432 cnp->cn_flags |= ISWHITEOUT;
433 CACHE_WUNLOCK();
434 return (ENOENT);
435
436wlock:
437 /*
438 * We need to update the cache after our lookup, so upgrade to
439 * a write lock and retry the operation.
440 */
441 CACHE_RUNLOCK();
442 CACHE_WLOCK();
443 numupgrades++;
444 wlocked = 1;
445 goto retry_wlocked;
446
447success:
448 /*
449 * On success we return a locked and ref'd vnode as per the lookup
450 * protocol.
451 */
452 if (dvp == *vpp) { /* lookup on "." */
453 VREF(*vpp);
454 if (wlocked)
455 CACHE_WUNLOCK();
456 else
457 CACHE_RUNLOCK();
458 /*
459 * When we lookup "." we still can be asked to lock it
460 * differently...
461 */
462 ltype = cnp->cn_lkflags & LK_TYPE_MASK;
463 if (ltype != VOP_ISLOCKED(*vpp)) {
464 if (ltype == LK_EXCLUSIVE) {
465 vn_lock(*vpp, LK_UPGRADE | LK_RETRY);
466 if ((*vpp)->v_iflag & VI_DOOMED) {
467 /* forced unmount */
468 vrele(*vpp);
469 *vpp = NULL;
470 return (EBADF);
471 }
472 } else
473 vn_lock(*vpp, LK_DOWNGRADE | LK_RETRY);
474 }
475 return (-1);
476 }
477 ltype = 0; /* silence gcc warning */
478 if (cnp->cn_flags & ISDOTDOT) {
479 ltype = VOP_ISLOCKED(dvp);
480 VOP_UNLOCK(dvp, 0);
481 }
482 VI_LOCK(*vpp);
483 if (wlocked)
484 CACHE_WUNLOCK();
485 else
486 CACHE_RUNLOCK();
487 error = vget(*vpp, cnp->cn_lkflags | LK_INTERLOCK, cnp->cn_thread);
488 if (cnp->cn_flags & ISDOTDOT)
489 vn_lock(dvp, ltype | LK_RETRY);
490 if (error) {
491 *vpp = NULL;
492 goto retry;
493 }
494 if ((cnp->cn_flags & ISLASTCN) &&
495 (cnp->cn_lkflags & LK_TYPE_MASK) == LK_EXCLUSIVE) {
496 ASSERT_VOP_ELOCKED(*vpp, "cache_lookup");
497 }
498 return (-1);
499
500unlock:
501 if (wlocked)
502 CACHE_WUNLOCK();
503 else
504 CACHE_RUNLOCK();
505 return (0);
506}
507
508/*
509 * Add an entry to the cache.
510 */
511void
512cache_enter(dvp, vp, cnp)
513 struct vnode *dvp;
514 struct vnode *vp;
515 struct componentname *cnp;
516{
517 struct namecache *ncp, *n2;
518 struct nchashhead *ncpp;
519 u_int32_t hash;
520 int hold;
521 int zap;
522 int len;
523
524 CTR3(KTR_VFS, "cache_enter(%p, %p, %s)", dvp, vp, cnp->cn_nameptr);
525 VNASSERT(vp == NULL || (vp->v_iflag & VI_DOOMED) == 0, vp,
526 ("cahe_enter: Adding a doomed vnode"));
527
528 if (!doingcache)
529 return;
530
531 /*
532 * Avoid blowout in namecache entries.
533 */
534 if (numcache >= desiredvnodes * 2)
535 return;
536
537 if (cnp->cn_nameptr[0] == '.') {
538 if (cnp->cn_namelen == 1) {
539 return;
540 }
541 /*
542 * For dotdot lookups only cache the v_dd pointer if the
543 * directory has a link back to its parent via v_cache_dst.
544 * Without this an unlinked directory would keep a soft
545 * reference to its parent which could not be NULLd at
546 * cache_purge() time.
547 */
548 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') {
549 CACHE_WLOCK();
550 if (!TAILQ_EMPTY(&dvp->v_cache_dst))
551 dvp->v_dd = vp;
552 CACHE_WUNLOCK();
553 return;
554 }
555 }
556
557 hold = 0;
558 zap = 0;
559
560 /*
561 * Calculate the hash key and setup as much of the new
562 * namecache entry as possible before acquiring the lock.
563 */
564 ncp = cache_alloc(cnp->cn_namelen);
565 ncp->nc_vp = vp;
566 ncp->nc_dvp = dvp;
567 len = ncp->nc_nlen = cnp->cn_namelen;
568 hash = fnv_32_buf(cnp->cn_nameptr, len, FNV1_32_INIT);
569 bcopy(cnp->cn_nameptr, ncp->nc_name, len);
570 hash = fnv_32_buf(&dvp, sizeof(dvp), hash);
571 CACHE_WLOCK();
572
573 /*
574 * See if this vnode or negative entry is already in the cache
575 * with this name. This can happen with concurrent lookups of
576 * the same path name.
577 */
578 ncpp = NCHHASH(hash);
579 LIST_FOREACH(n2, ncpp, nc_hash) {
580 if (n2->nc_dvp == dvp &&
581 n2->nc_nlen == cnp->cn_namelen &&
582 !bcmp(n2->nc_name, cnp->cn_nameptr, n2->nc_nlen)) {
583 CACHE_WUNLOCK();
584 cache_free(ncp);
585 return;
586 }
587 }
588
589 numcache++;
590 if (!vp) {
591 numneg++;
592 ncp->nc_flag = cnp->cn_flags & ISWHITEOUT ? NCF_WHITE : 0;
593 } else if (vp->v_type == VDIR) {
594 vp->v_dd = dvp;
595 } else {
596 vp->v_dd = NULL;
597 }
598
599 /*
600 * Insert the new namecache entry into the appropriate chain
601 * within the cache entries table.
602 */
603 LIST_INSERT_HEAD(ncpp, ncp, nc_hash);
604 if (LIST_EMPTY(&dvp->v_cache_src)) {
605 hold = 1;
606 numcachehv++;
607 }
608 LIST_INSERT_HEAD(&dvp->v_cache_src, ncp, nc_src);
609 /*
610 * If the entry is "negative", we place it into the
611 * "negative" cache queue, otherwise, we place it into the
612 * destination vnode's cache entries queue.
613 */
614 if (vp) {
615 TAILQ_INSERT_HEAD(&vp->v_cache_dst, ncp, nc_dst);
616 } else {
617 TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst);
618 }
619 if (numneg * ncnegfactor > numcache) {
620 ncp = TAILQ_FIRST(&ncneg);
621 zap = 1;
622 }
623 if (hold)
624 vhold(dvp);
625 if (zap)
626 cache_zap(ncp);
627 CACHE_WUNLOCK();
628}
629
630/*
631 * Name cache initialization, from vfs_init() when we are booting
632 */
633static void
634nchinit(void *dummy __unused)
635{
636
637 TAILQ_INIT(&ncneg);
638
639 cache_zone_small = uma_zcreate("S VFS Cache", CACHE_ZONE_SMALL, NULL,
640 NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT);
641 cache_zone_large = uma_zcreate("L VFS Cache", CACHE_ZONE_LARGE, NULL,
642 NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT);
643
644 nchashtbl = hashinit(desiredvnodes * 2, M_VFSCACHE, &nchash);
645}
646SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nchinit, NULL);
647
648
649/*
650 * Invalidate all entries to a particular vnode.
651 */
652void
653cache_purge(vp)
654 struct vnode *vp;
655{
656
657 CTR1(KTR_VFS, "cache_purge(%p)", vp);
658 CACHE_WLOCK();
659 while (!LIST_EMPTY(&vp->v_cache_src))
660 cache_zap(LIST_FIRST(&vp->v_cache_src));
661 while (!TAILQ_EMPTY(&vp->v_cache_dst))
662 cache_zap(TAILQ_FIRST(&vp->v_cache_dst));
663 vp->v_dd = NULL;
664 CACHE_WUNLOCK();
665}
666
667/*
668 * Invalidate all negative entries for a particular directory vnode.
669 */
670void
671cache_purge_negative(vp)
672 struct vnode *vp;
673{
674 struct namecache *cp, *ncp;
675
676 CTR1(KTR_VFS, "cache_purge_negative(%p)", vp);
677 CACHE_WLOCK();
678 LIST_FOREACH_SAFE(cp, &vp->v_cache_src, nc_src, ncp) {
679 if (cp->nc_vp == NULL)
680 cache_zap(cp);
681 }
682 CACHE_WUNLOCK();
683}
684
685/*
686 * Flush all entries referencing a particular filesystem.
687 */
688void
689cache_purgevfs(mp)
690 struct mount *mp;
691{
692 struct nchashhead *ncpp;
693 struct namecache *ncp, *nnp;
694
695 /* Scan hash tables for applicable entries */
696 CACHE_WLOCK();
697 for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) {
698 LIST_FOREACH_SAFE(ncp, ncpp, nc_hash, nnp) {
699 if (ncp->nc_dvp->v_mount == mp)
700 cache_zap(ncp);
701 }
702 }
703 CACHE_WUNLOCK();
704}
705
706/*
707 * Perform canonical checks and cache lookup and pass on to filesystem
708 * through the vop_cachedlookup only if needed.
709 */
710
711int
712vfs_cache_lookup(ap)
713 struct vop_lookup_args /* {
714 struct vnode *a_dvp;
715 struct vnode **a_vpp;
716 struct componentname *a_cnp;
717 } */ *ap;
718{
719 struct vnode *dvp;
720 int error;
721 struct vnode **vpp = ap->a_vpp;
722 struct componentname *cnp = ap->a_cnp;
723 struct ucred *cred = cnp->cn_cred;
724 int flags = cnp->cn_flags;
725 struct thread *td = cnp->cn_thread;
726
727 *vpp = NULL;
728 dvp = ap->a_dvp;
729
730 if (dvp->v_type != VDIR)
731 return (ENOTDIR);
732
733 if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
734 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
735 return (EROFS);
736
737 error = VOP_ACCESS(dvp, VEXEC, cred, td);
738 if (error)
739 return (error);
740
741 error = cache_lookup(dvp, vpp, cnp);
742 if (error == 0)
743 return (VOP_CACHEDLOOKUP(dvp, vpp, cnp));
744 if (error == -1)
745 return (0);
746 return (error);
747}
748
749
750#ifndef _SYS_SYSPROTO_H_
751struct __getcwd_args {
752 u_char *buf;
753 u_int buflen;
754};
755#endif
756
757/*
758 * XXX All of these sysctls would probably be more productive dead.
759 */
760static int disablecwd;
761SYSCTL_INT(_debug, OID_AUTO, disablecwd, CTLFLAG_RW, &disablecwd, 0,
762 "Disable the getcwd syscall");
763
764/* Implementation of the getcwd syscall. */
765int
766__getcwd(td, uap)
767 struct thread *td;
768 struct __getcwd_args *uap;
769{
770
771 return (kern___getcwd(td, uap->buf, UIO_USERSPACE, uap->buflen));
772}
773
774int
775kern___getcwd(struct thread *td, u_char *buf, enum uio_seg bufseg, u_int buflen)
776{
777 char *bp, *tmpbuf;
778 struct filedesc *fdp;
779 struct vnode *cdir, *rdir;
780 int error, vfslocked;
781
782 if (disablecwd)
783 return (ENODEV);
784 if (buflen < 2)
785 return (EINVAL);
786 if (buflen > MAXPATHLEN)
787 buflen = MAXPATHLEN;
788
789 tmpbuf = malloc(buflen, M_TEMP, M_WAITOK);
790 fdp = td->td_proc->p_fd;
791 FILEDESC_SLOCK(fdp);
792 cdir = fdp->fd_cdir;
793 VREF(cdir);
794 rdir = fdp->fd_rdir;
795 VREF(rdir);
796 FILEDESC_SUNLOCK(fdp);
797 error = vn_fullpath1(td, cdir, rdir, tmpbuf, &bp, buflen);
798 vfslocked = VFS_LOCK_GIANT(rdir->v_mount);
799 vrele(rdir);
800 VFS_UNLOCK_GIANT(vfslocked);
801 vfslocked = VFS_LOCK_GIANT(cdir->v_mount);
802 vrele(cdir);
803 VFS_UNLOCK_GIANT(vfslocked);
804
805 if (!error) {
806 if (bufseg == UIO_SYSSPACE)
807 bcopy(bp, buf, strlen(bp) + 1);
808 else
809 error = copyout(bp, buf, strlen(bp) + 1);
810 }
811 free(tmpbuf, M_TEMP);
812 return (error);
813}
814
815/*
816 * Thus begins the fullpath magic.
817 */
818
819#undef STATNODE
820#define STATNODE(name) \
821 static u_int name; \
822 SYSCTL_UINT(_vfs_cache, OID_AUTO, name, CTLFLAG_RD, &name, 0, "")
823
824static int disablefullpath;
825SYSCTL_INT(_debug, OID_AUTO, disablefullpath, CTLFLAG_RW, &disablefullpath, 0,
826 "Disable the vn_fullpath function");
827
828/* These count for kern___getcwd(), too. */
829STATNODE(numfullpathcalls);
830STATNODE(numfullpathfail1);
831STATNODE(numfullpathfail2);
832STATNODE(numfullpathfail4);
833STATNODE(numfullpathfound);
834
835/*
836 * Retrieve the full filesystem path that correspond to a vnode from the name
837 * cache (if available)
838 */
839int
840vn_fullpath(struct thread *td, struct vnode *vn, char **retbuf, char **freebuf)
841{
842 char *buf;
843 struct filedesc *fdp;
844 struct vnode *rdir;
845 int error, vfslocked;
846
847 if (disablefullpath)
848 return (ENODEV);
849 if (vn == NULL)
850 return (EINVAL);
851
852 buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
853 fdp = td->td_proc->p_fd;
854 FILEDESC_SLOCK(fdp);
855 rdir = fdp->fd_rdir;
856 VREF(rdir);
857 FILEDESC_SUNLOCK(fdp);
858 error = vn_fullpath1(td, vn, rdir, buf, retbuf, MAXPATHLEN);
859 vfslocked = VFS_LOCK_GIANT(rdir->v_mount);
860 vrele(rdir);
861 VFS_UNLOCK_GIANT(vfslocked);
862
863 if (!error)
864 *freebuf = buf;
865 else
866 free(buf, M_TEMP);
867 return (error);
868}
869
870/*
871 * This function is similar to vn_fullpath, but it attempts to lookup the
872 * pathname relative to the global root mount point. This is required for the
873 * auditing sub-system, as audited pathnames must be absolute, relative to the
874 * global root mount point.
875 */
876int
877vn_fullpath_global(struct thread *td, struct vnode *vn,
878 char **retbuf, char **freebuf)
879{
880 char *buf;
881 int error;
882
883 if (disablefullpath)
884 return (ENODEV);
885 if (vn == NULL)
886 return (EINVAL);
887 buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
888 error = vn_fullpath1(td, vn, rootvnode, buf, retbuf, MAXPATHLEN);
889 if (!error)
890 *freebuf = buf;
891 else
892 free(buf, M_TEMP);
893 return (error);
894}
895
896static int
897vn_vptocnp(struct vnode **vp, char **bp, char *buf, u_int *buflen)
898{
899 struct vnode *dvp;
900 int error, vfslocked;
901
902 vhold(*vp);
903 CACHE_RUNLOCK();
904 vfslocked = VFS_LOCK_GIANT((*vp)->v_mount);
905 vn_lock(*vp, LK_SHARED | LK_RETRY);
906 error = VOP_VPTOCNP(*vp, &dvp, buf, buflen);
907 VOP_UNLOCK(*vp, 0);
908 vdrop(*vp);
909 VFS_UNLOCK_GIANT(vfslocked);
910 if (error) {
911 numfullpathfail2++;
912 return (error);
913 }
914 *bp = buf + *buflen;
915 *vp = dvp;
916 CACHE_RLOCK();
917 if ((*vp)->v_iflag & VI_DOOMED) {
918 /* forced unmount */
919 CACHE_RUNLOCK();
920 vdrop(*vp);
921 return (ENOENT);
922 }
923 vdrop(*vp);
924
925 return (0);
926}
927
928/*
929 * The magic behind kern___getcwd() and vn_fullpath().
930 */
931static int
932vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir,
933 char *buf, char **retbuf, u_int buflen)
934{
935 char *bp;
936 int error, i, slash_prefixed;
937 struct namecache *ncp;
938
939 buflen--;
940 bp = buf + buflen;
941 *bp = '\0';
942 error = 0;
943 slash_prefixed = 0;
944
945 CACHE_RLOCK();
946 numfullpathcalls++;
947 if (vp->v_type != VDIR) {
948 ncp = TAILQ_FIRST(&vp->v_cache_dst);
949 if (ncp != NULL) {
950 for (i = ncp->nc_nlen - 1; i >= 0 && bp > buf; i--)
951 *--bp = ncp->nc_name[i];
952 if (bp == buf) {
953 numfullpathfail4++;
954 CACHE_RUNLOCK();
955 return (ENOMEM);
956 }
957 vp = ncp->nc_dvp;
958 } else {
959 error = vn_vptocnp(&vp, &bp, buf, &buflen);
960 if (error) {
961 return (error);
962 }
963 }
964 *--bp = '/';
965 buflen--;
966 if (buflen < 0) {
967 numfullpathfail4++;
968 CACHE_RUNLOCK();
969 return (ENOMEM);
970 }
971 slash_prefixed = 1;
972 }
973 while (vp != rdir && vp != rootvnode) {
974 if (vp->v_vflag & VV_ROOT) {
975 if (vp->v_iflag & VI_DOOMED) { /* forced unmount */
976 CACHE_RUNLOCK();
977 error = EBADF;
978 break;
979 }
980 vp = vp->v_mount->mnt_vnodecovered;
981 continue;
982 }
983 if (vp->v_type != VDIR) {
984 numfullpathfail1++;
985 CACHE_RUNLOCK();
986 error = ENOTDIR;
987 break;
988 }
989 ncp = TAILQ_FIRST(&vp->v_cache_dst);
990 if (ncp != NULL) {
991 MPASS(vp->v_dd == NULL || ncp->nc_dvp == vp->v_dd);
992 buflen -= ncp->nc_nlen - 1;
993 for (i = ncp->nc_nlen - 1; i >= 0 && bp != buf; i--)
994 *--bp = ncp->nc_name[i];
995 if (bp == buf) {
996 numfullpathfail4++;
997 CACHE_RUNLOCK();
998 error = ENOMEM;
999 break;
1000 }
1001 vp = ncp->nc_dvp;
1002 } else {
1003 error = vn_vptocnp(&vp, &bp, buf, &buflen);
1004 if (error) {
1005 break;
1006 }
1007 }
1008 *--bp = '/';
1009 buflen--;
1010 if (buflen < 0) {
1011 numfullpathfail4++;
1012 CACHE_RUNLOCK();
1013 error = ENOMEM;
1014 break;
1015 }
1016 slash_prefixed = 1;
1017 }
1018 if (error)
1019 return (error);
1020 if (!slash_prefixed) {
1021 if (bp == buf) {
1022 numfullpathfail4++;
1023 CACHE_RUNLOCK();
1024 return (ENOMEM);
1025 } else {
1026 *--bp = '/';
1027 }
1028 }
1029 numfullpathfound++;
1030 CACHE_RUNLOCK();
1031
1032 *retbuf = bp;
1033 return (0);
1034}
1035
1036int
1037vn_commname(struct vnode *vp, char *buf, u_int buflen)
1038{
1039 struct namecache *ncp;
1040 int l;
1041
1042 CACHE_RLOCK();
1043 ncp = TAILQ_FIRST(&vp->v_cache_dst);
1044 if (!ncp) {
1045 CACHE_RUNLOCK();
1046 return (ENOENT);
1047 }
1048 l = min(ncp->nc_nlen, buflen - 1);
1049 memcpy(buf, ncp->nc_name, l);
1050 CACHE_RUNLOCK();
1051 buf[l] = '\0';
1052 return (0);
1053}