Deleted Added
full compact
vfs_cache.c (230143) vfs_cache.c (230394)
1/*-
2 * Copyright (c) 1989, 1993, 1995
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Poul-Henning Kamp of the FreeBSD Project.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95
33 */
34
35#include <sys/cdefs.h>
1/*-
2 * Copyright (c) 1989, 1993, 1995
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Poul-Henning Kamp of the FreeBSD Project.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95
33 */
34
35#include <sys/cdefs.h>
36__FBSDID("$FreeBSD: head/sys/kern/vfs_cache.c 230143 2012-01-15 18:08:15Z mm $");
36__FBSDID("$FreeBSD: head/sys/kern/vfs_cache.c 230394 2012-01-20 20:02:01Z jhb $");
37
38#include "opt_kdtrace.h"
39#include "opt_ktrace.h"
40
41#include <sys/param.h>
42#include <sys/systm.h>
43#include <sys/filedesc.h>
44#include <sys/fnv_hash.h>
45#include <sys/kernel.h>
46#include <sys/lock.h>
47#include <sys/malloc.h>
48#include <sys/fcntl.h>
49#include <sys/mount.h>
50#include <sys/namei.h>
51#include <sys/proc.h>
52#include <sys/rwlock.h>
53#include <sys/sdt.h>
54#include <sys/syscallsubr.h>
55#include <sys/sysctl.h>
56#include <sys/sysproto.h>
57#include <sys/vnode.h>
58#ifdef KTRACE
59#include <sys/ktrace.h>
60#endif
61
62#include <vm/uma.h>
63
64SDT_PROVIDER_DECLARE(vfs);
65SDT_PROBE_DEFINE3(vfs, namecache, enter, done, done, "struct vnode *", "char *",
66 "struct vnode *");
67SDT_PROBE_DEFINE2(vfs, namecache, enter_negative, done, done, "struct vnode *",
68 "char *");
69SDT_PROBE_DEFINE1(vfs, namecache, fullpath, entry, entry, "struct vnode *");
70SDT_PROBE_DEFINE3(vfs, namecache, fullpath, hit, hit, "struct vnode *",
71 "struct char *", "struct vnode *");
72SDT_PROBE_DEFINE1(vfs, namecache, fullpath, miss, miss, "struct vnode *");
73SDT_PROBE_DEFINE3(vfs, namecache, fullpath, return, return, "int",
74 "struct vnode *", "struct char *");
75SDT_PROBE_DEFINE3(vfs, namecache, lookup, hit, hit, "struct vnode *", "char *",
76 "struct vnode *");
77SDT_PROBE_DEFINE2(vfs, namecache, lookup, hit_negative, hit-negative,
78 "struct vnode *", "char *");
79SDT_PROBE_DEFINE2(vfs, namecache, lookup, miss, miss, "struct vnode *",
80 "char *");
81SDT_PROBE_DEFINE1(vfs, namecache, purge, done, done, "struct vnode *");
82SDT_PROBE_DEFINE1(vfs, namecache, purge_negative, done, done, "struct vnode *");
83SDT_PROBE_DEFINE1(vfs, namecache, purgevfs, done, done, "struct mount *");
84SDT_PROBE_DEFINE3(vfs, namecache, zap, done, done, "struct vnode *", "char *",
85 "struct vnode *");
86SDT_PROBE_DEFINE2(vfs, namecache, zap_negative, done, done, "struct vnode *",
87 "char *");
88
89/*
90 * This structure describes the elements in the cache of recent
91 * names looked up by namei.
92 */
93
94struct namecache {
95 LIST_ENTRY(namecache) nc_hash; /* hash chain */
96 LIST_ENTRY(namecache) nc_src; /* source vnode list */
97 TAILQ_ENTRY(namecache) nc_dst; /* destination vnode list */
98 struct vnode *nc_dvp; /* vnode of parent of name */
99 struct vnode *nc_vp; /* vnode the name refers to */
37
38#include "opt_kdtrace.h"
39#include "opt_ktrace.h"
40
41#include <sys/param.h>
42#include <sys/systm.h>
43#include <sys/filedesc.h>
44#include <sys/fnv_hash.h>
45#include <sys/kernel.h>
46#include <sys/lock.h>
47#include <sys/malloc.h>
48#include <sys/fcntl.h>
49#include <sys/mount.h>
50#include <sys/namei.h>
51#include <sys/proc.h>
52#include <sys/rwlock.h>
53#include <sys/sdt.h>
54#include <sys/syscallsubr.h>
55#include <sys/sysctl.h>
56#include <sys/sysproto.h>
57#include <sys/vnode.h>
58#ifdef KTRACE
59#include <sys/ktrace.h>
60#endif
61
62#include <vm/uma.h>
63
64SDT_PROVIDER_DECLARE(vfs);
65SDT_PROBE_DEFINE3(vfs, namecache, enter, done, done, "struct vnode *", "char *",
66 "struct vnode *");
67SDT_PROBE_DEFINE2(vfs, namecache, enter_negative, done, done, "struct vnode *",
68 "char *");
69SDT_PROBE_DEFINE1(vfs, namecache, fullpath, entry, entry, "struct vnode *");
70SDT_PROBE_DEFINE3(vfs, namecache, fullpath, hit, hit, "struct vnode *",
71 "struct char *", "struct vnode *");
72SDT_PROBE_DEFINE1(vfs, namecache, fullpath, miss, miss, "struct vnode *");
73SDT_PROBE_DEFINE3(vfs, namecache, fullpath, return, return, "int",
74 "struct vnode *", "struct char *");
75SDT_PROBE_DEFINE3(vfs, namecache, lookup, hit, hit, "struct vnode *", "char *",
76 "struct vnode *");
77SDT_PROBE_DEFINE2(vfs, namecache, lookup, hit_negative, hit-negative,
78 "struct vnode *", "char *");
79SDT_PROBE_DEFINE2(vfs, namecache, lookup, miss, miss, "struct vnode *",
80 "char *");
81SDT_PROBE_DEFINE1(vfs, namecache, purge, done, done, "struct vnode *");
82SDT_PROBE_DEFINE1(vfs, namecache, purge_negative, done, done, "struct vnode *");
83SDT_PROBE_DEFINE1(vfs, namecache, purgevfs, done, done, "struct mount *");
84SDT_PROBE_DEFINE3(vfs, namecache, zap, done, done, "struct vnode *", "char *",
85 "struct vnode *");
86SDT_PROBE_DEFINE2(vfs, namecache, zap_negative, done, done, "struct vnode *",
87 "char *");
88
89/*
90 * This structure describes the elements in the cache of recent
91 * names looked up by namei.
92 */
93
94struct namecache {
95 LIST_ENTRY(namecache) nc_hash; /* hash chain */
96 LIST_ENTRY(namecache) nc_src; /* source vnode list */
97 TAILQ_ENTRY(namecache) nc_dst; /* destination vnode list */
98 struct vnode *nc_dvp; /* vnode of parent of name */
99 struct vnode *nc_vp; /* vnode the name refers to */
100 struct timespec nc_time; /* timespec provided by fs */
101 int nc_ticks; /* ticks value when entry was added */
100 u_char nc_flag; /* flag bits */
101 u_char nc_nlen; /* length of name */
102 char nc_name[0]; /* segment name + nul */
103};
104
105/*
106 * Name caching works as follows:
107 *
108 * Names found by directory scans are retained in a cache
109 * for future reference. It is managed LRU, so frequently
110 * used names will hang around. Cache is indexed by hash value
111 * obtained from (vp, name) where vp refers to the directory
112 * containing name.
113 *
114 * If it is a "negative" entry, (i.e. for a name that is known NOT to
115 * exist) the vnode pointer will be NULL.
116 *
117 * Upon reaching the last segment of a path, if the reference
118 * is for DELETE, or NOCACHE is set (rewrite), and the
119 * name is located in the cache, it will be dropped.
120 */
121
122/*
123 * Structures associated with name cacheing.
124 */
125#define NCHHASH(hash) \
126 (&nchashtbl[(hash) & nchash])
127static LIST_HEAD(nchashhead, namecache) *nchashtbl; /* Hash Table */
128static TAILQ_HEAD(, namecache) ncneg; /* Hash Table */
129static u_long nchash; /* size of hash table */
130SYSCTL_ULONG(_debug, OID_AUTO, nchash, CTLFLAG_RD, &nchash, 0,
131 "Size of namecache hash table");
132static u_long ncnegfactor = 16; /* ratio of negative entries */
133SYSCTL_ULONG(_vfs, OID_AUTO, ncnegfactor, CTLFLAG_RW, &ncnegfactor, 0,
134 "Ratio of negative namecache entries");
135static u_long numneg; /* number of negative entries allocated */
136SYSCTL_ULONG(_debug, OID_AUTO, numneg, CTLFLAG_RD, &numneg, 0,
137 "Number of negative entries in namecache");
138static u_long numcache; /* number of cache entries allocated */
139SYSCTL_ULONG(_debug, OID_AUTO, numcache, CTLFLAG_RD, &numcache, 0,
140 "Number of namecache entries");
141static u_long numcachehv; /* number of cache entries with vnodes held */
142SYSCTL_ULONG(_debug, OID_AUTO, numcachehv, CTLFLAG_RD, &numcachehv, 0,
143 "Number of namecache entries with vnodes held");
144static u_int ncsizefactor = 2;
145SYSCTL_UINT(_vfs, OID_AUTO, ncsizefactor, CTLFLAG_RW, &ncsizefactor, 0,
146 "Size factor for namecache");
147
148struct nchstats nchstats; /* cache effectiveness statistics */
149
150static struct rwlock cache_lock;
151RW_SYSINIT(vfscache, &cache_lock, "Name Cache");
152
153#define CACHE_UPGRADE_LOCK() rw_try_upgrade(&cache_lock)
154#define CACHE_RLOCK() rw_rlock(&cache_lock)
155#define CACHE_RUNLOCK() rw_runlock(&cache_lock)
156#define CACHE_WLOCK() rw_wlock(&cache_lock)
157#define CACHE_WUNLOCK() rw_wunlock(&cache_lock)
158
159/*
160 * UMA zones for the VFS cache.
161 *
162 * The small cache is used for entries with short names, which are the
163 * most common. The large cache is used for entries which are too big to
164 * fit in the small cache.
165 */
166static uma_zone_t cache_zone_small;
167static uma_zone_t cache_zone_large;
168
169#define CACHE_PATH_CUTOFF 35
170#define CACHE_ZONE_SMALL (sizeof(struct namecache) + CACHE_PATH_CUTOFF \
171 + 1)
172#define CACHE_ZONE_LARGE (sizeof(struct namecache) + NAME_MAX + 1)
173
174#define cache_alloc(len) uma_zalloc(((len) <= CACHE_PATH_CUTOFF) ? \
175 cache_zone_small : cache_zone_large, M_WAITOK)
176#define cache_free(ncp) do { \
177 if (ncp != NULL) \
178 uma_zfree(((ncp)->nc_nlen <= CACHE_PATH_CUTOFF) ? \
179 cache_zone_small : cache_zone_large, (ncp)); \
180} while (0)
181
182static int doingcache = 1; /* 1 => enable the cache */
183SYSCTL_INT(_debug, OID_AUTO, vfscache, CTLFLAG_RW, &doingcache, 0,
184 "VFS namecache enabled");
185
186/* Export size information to userland */
187SYSCTL_INT(_debug_sizeof, OID_AUTO, namecache, CTLFLAG_RD, 0,
188 sizeof(struct namecache), "sizeof(struct namecache)");
189
190/*
191 * The new name cache statistics
192 */
193static SYSCTL_NODE(_vfs, OID_AUTO, cache, CTLFLAG_RW, 0,
194 "Name cache statistics");
195#define STATNODE(mode, name, var, descr) \
196 SYSCTL_ULONG(_vfs_cache, OID_AUTO, name, mode, var, 0, descr);
197STATNODE(CTLFLAG_RD, numneg, &numneg, "Number of negative cache entries");
198STATNODE(CTLFLAG_RD, numcache, &numcache, "Number of cache entries");
199static u_long numcalls; STATNODE(CTLFLAG_RD, numcalls, &numcalls,
200 "Number of cache lookups");
201static u_long dothits; STATNODE(CTLFLAG_RD, dothits, &dothits,
202 "Number of '.' hits");
203static u_long dotdothits; STATNODE(CTLFLAG_RD, dotdothits, &dotdothits,
204 "Number of '..' hits");
205static u_long numchecks; STATNODE(CTLFLAG_RD, numchecks, &numchecks,
206 "Number of checks in lookup");
207static u_long nummiss; STATNODE(CTLFLAG_RD, nummiss, &nummiss,
208 "Number of cache misses");
209static u_long nummisszap; STATNODE(CTLFLAG_RD, nummisszap, &nummisszap,
210 "Number of cache misses we do not want to cache");
211static u_long numposzaps; STATNODE(CTLFLAG_RD, numposzaps, &numposzaps,
212 "Number of cache hits (positive) we do not want to cache");
213static u_long numposhits; STATNODE(CTLFLAG_RD, numposhits, &numposhits,
214 "Number of cache hits (positive)");
215static u_long numnegzaps; STATNODE(CTLFLAG_RD, numnegzaps, &numnegzaps,
216 "Number of cache hits (negative) we do not want to cache");
217static u_long numneghits; STATNODE(CTLFLAG_RD, numneghits, &numneghits,
218 "Number of cache hits (negative)");
219static u_long numupgrades; STATNODE(CTLFLAG_RD, numupgrades, &numupgrades,
220 "Number of updates of the cache after lookup (write lock + retry)");
221
222SYSCTL_OPAQUE(_vfs_cache, OID_AUTO, nchstats, CTLFLAG_RD | CTLFLAG_MPSAFE,
223 &nchstats, sizeof(nchstats), "LU",
224 "VFS cache effectiveness statistics");
225
226
227
228static void cache_zap(struct namecache *ncp);
229static int vn_vptocnp_locked(struct vnode **vp, struct ucred *cred, char *buf,
230 u_int *buflen);
231static int vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir,
232 char *buf, char **retbuf, u_int buflen);
233
234static MALLOC_DEFINE(M_VFSCACHE, "vfscache", "VFS name cache entries");
235
236/*
237 * Flags in namecache.nc_flag
238 */
239#define NCF_WHITE 0x01
240#define NCF_ISDOTDOT 0x02
241
242#ifdef DIAGNOSTIC
243/*
244 * Grab an atomic snapshot of the name cache hash chain lengths
245 */
246static SYSCTL_NODE(_debug, OID_AUTO, hashstat, CTLFLAG_RW, NULL,
247 "hash table stats");
248
249static int
250sysctl_debug_hashstat_rawnchash(SYSCTL_HANDLER_ARGS)
251{
252 int error;
253 struct nchashhead *ncpp;
254 struct namecache *ncp;
255 int n_nchash;
256 int count;
257
258 n_nchash = nchash + 1; /* nchash is max index, not count */
259 if (!req->oldptr)
260 return SYSCTL_OUT(req, 0, n_nchash * sizeof(int));
261
262 /* Scan hash tables for applicable entries */
263 for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) {
264 CACHE_RLOCK();
265 count = 0;
266 LIST_FOREACH(ncp, ncpp, nc_hash) {
267 count++;
268 }
269 CACHE_RUNLOCK();
270 error = SYSCTL_OUT(req, &count, sizeof(count));
271 if (error)
272 return (error);
273 }
274 return (0);
275}
276SYSCTL_PROC(_debug_hashstat, OID_AUTO, rawnchash, CTLTYPE_INT|CTLFLAG_RD|
277 CTLFLAG_MPSAFE, 0, 0, sysctl_debug_hashstat_rawnchash, "S,int",
278 "nchash chain lengths");
279
280static int
281sysctl_debug_hashstat_nchash(SYSCTL_HANDLER_ARGS)
282{
283 int error;
284 struct nchashhead *ncpp;
285 struct namecache *ncp;
286 int n_nchash;
287 int count, maxlength, used, pct;
288
289 if (!req->oldptr)
290 return SYSCTL_OUT(req, 0, 4 * sizeof(int));
291
292 n_nchash = nchash + 1; /* nchash is max index, not count */
293 used = 0;
294 maxlength = 0;
295
296 /* Scan hash tables for applicable entries */
297 for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) {
298 count = 0;
299 CACHE_RLOCK();
300 LIST_FOREACH(ncp, ncpp, nc_hash) {
301 count++;
302 }
303 CACHE_RUNLOCK();
304 if (count)
305 used++;
306 if (maxlength < count)
307 maxlength = count;
308 }
309 n_nchash = nchash + 1;
310 pct = (used * 100 * 100) / n_nchash;
311 error = SYSCTL_OUT(req, &n_nchash, sizeof(n_nchash));
312 if (error)
313 return (error);
314 error = SYSCTL_OUT(req, &used, sizeof(used));
315 if (error)
316 return (error);
317 error = SYSCTL_OUT(req, &maxlength, sizeof(maxlength));
318 if (error)
319 return (error);
320 error = SYSCTL_OUT(req, &pct, sizeof(pct));
321 if (error)
322 return (error);
323 return (0);
324}
325SYSCTL_PROC(_debug_hashstat, OID_AUTO, nchash, CTLTYPE_INT|CTLFLAG_RD|
326 CTLFLAG_MPSAFE, 0, 0, sysctl_debug_hashstat_nchash, "I",
327 "nchash chain lengths");
328#endif
329
330/*
331 * cache_zap():
332 *
333 * Removes a namecache entry from cache, whether it contains an actual
334 * pointer to a vnode or if it is just a negative cache entry.
335 */
336static void
337cache_zap(ncp)
338 struct namecache *ncp;
339{
340 struct vnode *vp;
341
342 rw_assert(&cache_lock, RA_WLOCKED);
343 CTR2(KTR_VFS, "cache_zap(%p) vp %p", ncp, ncp->nc_vp);
344#ifdef KDTRACE_HOOKS
345 if (ncp->nc_vp != NULL) {
346 SDT_PROBE(vfs, namecache, zap, done, ncp->nc_dvp,
347 ncp->nc_name, ncp->nc_vp, 0, 0);
348 } else {
349 SDT_PROBE(vfs, namecache, zap_negative, done, ncp->nc_dvp,
350 ncp->nc_name, 0, 0, 0);
351 }
352#endif
353 vp = NULL;
354 LIST_REMOVE(ncp, nc_hash);
355 if (ncp->nc_flag & NCF_ISDOTDOT) {
356 if (ncp == ncp->nc_dvp->v_cache_dd)
357 ncp->nc_dvp->v_cache_dd = NULL;
358 } else {
359 LIST_REMOVE(ncp, nc_src);
360 if (LIST_EMPTY(&ncp->nc_dvp->v_cache_src)) {
361 vp = ncp->nc_dvp;
362 numcachehv--;
363 }
364 }
365 if (ncp->nc_vp) {
366 TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_dst);
367 if (ncp == ncp->nc_vp->v_cache_dd)
368 ncp->nc_vp->v_cache_dd = NULL;
369 } else {
370 TAILQ_REMOVE(&ncneg, ncp, nc_dst);
371 numneg--;
372 }
373 numcache--;
374 cache_free(ncp);
375 if (vp)
376 vdrop(vp);
377}
378
379/*
380 * Lookup an entry in the cache
381 *
382 * Lookup is called with dvp pointing to the directory to search,
383 * cnp pointing to the name of the entry being sought. If the lookup
384 * succeeds, the vnode is returned in *vpp, and a status of -1 is
385 * returned. If the lookup determines that the name does not exist
386 * (negative cacheing), a status of ENOENT is returned. If the lookup
387 * fails, a status of zero is returned. If the directory vnode is
388 * recycled out from under us due to a forced unmount, a status of
389 * ENOENT is returned.
390 *
391 * vpp is locked and ref'd on return. If we're looking up DOTDOT, dvp is
392 * unlocked. If we're looking up . an extra ref is taken, but the lock is
393 * not recursively acquired.
394 */
395
396int
102 u_char nc_flag; /* flag bits */
103 u_char nc_nlen; /* length of name */
104 char nc_name[0]; /* segment name + nul */
105};
106
107/*
108 * Name caching works as follows:
109 *
110 * Names found by directory scans are retained in a cache
111 * for future reference. It is managed LRU, so frequently
112 * used names will hang around. Cache is indexed by hash value
113 * obtained from (vp, name) where vp refers to the directory
114 * containing name.
115 *
116 * If it is a "negative" entry, (i.e. for a name that is known NOT to
117 * exist) the vnode pointer will be NULL.
118 *
119 * Upon reaching the last segment of a path, if the reference
120 * is for DELETE, or NOCACHE is set (rewrite), and the
121 * name is located in the cache, it will be dropped.
122 */
123
124/*
125 * Structures associated with name cacheing.
126 */
127#define NCHHASH(hash) \
128 (&nchashtbl[(hash) & nchash])
129static LIST_HEAD(nchashhead, namecache) *nchashtbl; /* Hash Table */
130static TAILQ_HEAD(, namecache) ncneg; /* Hash Table */
131static u_long nchash; /* size of hash table */
132SYSCTL_ULONG(_debug, OID_AUTO, nchash, CTLFLAG_RD, &nchash, 0,
133 "Size of namecache hash table");
134static u_long ncnegfactor = 16; /* ratio of negative entries */
135SYSCTL_ULONG(_vfs, OID_AUTO, ncnegfactor, CTLFLAG_RW, &ncnegfactor, 0,
136 "Ratio of negative namecache entries");
137static u_long numneg; /* number of negative entries allocated */
138SYSCTL_ULONG(_debug, OID_AUTO, numneg, CTLFLAG_RD, &numneg, 0,
139 "Number of negative entries in namecache");
140static u_long numcache; /* number of cache entries allocated */
141SYSCTL_ULONG(_debug, OID_AUTO, numcache, CTLFLAG_RD, &numcache, 0,
142 "Number of namecache entries");
143static u_long numcachehv; /* number of cache entries with vnodes held */
144SYSCTL_ULONG(_debug, OID_AUTO, numcachehv, CTLFLAG_RD, &numcachehv, 0,
145 "Number of namecache entries with vnodes held");
146static u_int ncsizefactor = 2;
147SYSCTL_UINT(_vfs, OID_AUTO, ncsizefactor, CTLFLAG_RW, &ncsizefactor, 0,
148 "Size factor for namecache");
149
150struct nchstats nchstats; /* cache effectiveness statistics */
151
152static struct rwlock cache_lock;
153RW_SYSINIT(vfscache, &cache_lock, "Name Cache");
154
155#define CACHE_UPGRADE_LOCK() rw_try_upgrade(&cache_lock)
156#define CACHE_RLOCK() rw_rlock(&cache_lock)
157#define CACHE_RUNLOCK() rw_runlock(&cache_lock)
158#define CACHE_WLOCK() rw_wlock(&cache_lock)
159#define CACHE_WUNLOCK() rw_wunlock(&cache_lock)
160
161/*
162 * UMA zones for the VFS cache.
163 *
164 * The small cache is used for entries with short names, which are the
165 * most common. The large cache is used for entries which are too big to
166 * fit in the small cache.
167 */
168static uma_zone_t cache_zone_small;
169static uma_zone_t cache_zone_large;
170
171#define CACHE_PATH_CUTOFF 35
172#define CACHE_ZONE_SMALL (sizeof(struct namecache) + CACHE_PATH_CUTOFF \
173 + 1)
174#define CACHE_ZONE_LARGE (sizeof(struct namecache) + NAME_MAX + 1)
175
176#define cache_alloc(len) uma_zalloc(((len) <= CACHE_PATH_CUTOFF) ? \
177 cache_zone_small : cache_zone_large, M_WAITOK)
178#define cache_free(ncp) do { \
179 if (ncp != NULL) \
180 uma_zfree(((ncp)->nc_nlen <= CACHE_PATH_CUTOFF) ? \
181 cache_zone_small : cache_zone_large, (ncp)); \
182} while (0)
183
184static int doingcache = 1; /* 1 => enable the cache */
185SYSCTL_INT(_debug, OID_AUTO, vfscache, CTLFLAG_RW, &doingcache, 0,
186 "VFS namecache enabled");
187
188/* Export size information to userland */
189SYSCTL_INT(_debug_sizeof, OID_AUTO, namecache, CTLFLAG_RD, 0,
190 sizeof(struct namecache), "sizeof(struct namecache)");
191
192/*
193 * The new name cache statistics
194 */
195static SYSCTL_NODE(_vfs, OID_AUTO, cache, CTLFLAG_RW, 0,
196 "Name cache statistics");
197#define STATNODE(mode, name, var, descr) \
198 SYSCTL_ULONG(_vfs_cache, OID_AUTO, name, mode, var, 0, descr);
199STATNODE(CTLFLAG_RD, numneg, &numneg, "Number of negative cache entries");
200STATNODE(CTLFLAG_RD, numcache, &numcache, "Number of cache entries");
201static u_long numcalls; STATNODE(CTLFLAG_RD, numcalls, &numcalls,
202 "Number of cache lookups");
203static u_long dothits; STATNODE(CTLFLAG_RD, dothits, &dothits,
204 "Number of '.' hits");
205static u_long dotdothits; STATNODE(CTLFLAG_RD, dotdothits, &dotdothits,
206 "Number of '..' hits");
207static u_long numchecks; STATNODE(CTLFLAG_RD, numchecks, &numchecks,
208 "Number of checks in lookup");
209static u_long nummiss; STATNODE(CTLFLAG_RD, nummiss, &nummiss,
210 "Number of cache misses");
211static u_long nummisszap; STATNODE(CTLFLAG_RD, nummisszap, &nummisszap,
212 "Number of cache misses we do not want to cache");
213static u_long numposzaps; STATNODE(CTLFLAG_RD, numposzaps, &numposzaps,
214 "Number of cache hits (positive) we do not want to cache");
215static u_long numposhits; STATNODE(CTLFLAG_RD, numposhits, &numposhits,
216 "Number of cache hits (positive)");
217static u_long numnegzaps; STATNODE(CTLFLAG_RD, numnegzaps, &numnegzaps,
218 "Number of cache hits (negative) we do not want to cache");
219static u_long numneghits; STATNODE(CTLFLAG_RD, numneghits, &numneghits,
220 "Number of cache hits (negative)");
221static u_long numupgrades; STATNODE(CTLFLAG_RD, numupgrades, &numupgrades,
222 "Number of updates of the cache after lookup (write lock + retry)");
223
224SYSCTL_OPAQUE(_vfs_cache, OID_AUTO, nchstats, CTLFLAG_RD | CTLFLAG_MPSAFE,
225 &nchstats, sizeof(nchstats), "LU",
226 "VFS cache effectiveness statistics");
227
228
229
230static void cache_zap(struct namecache *ncp);
231static int vn_vptocnp_locked(struct vnode **vp, struct ucred *cred, char *buf,
232 u_int *buflen);
233static int vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir,
234 char *buf, char **retbuf, u_int buflen);
235
236static MALLOC_DEFINE(M_VFSCACHE, "vfscache", "VFS name cache entries");
237
238/*
239 * Flags in namecache.nc_flag
240 */
241#define NCF_WHITE 0x01
242#define NCF_ISDOTDOT 0x02
243
244#ifdef DIAGNOSTIC
245/*
246 * Grab an atomic snapshot of the name cache hash chain lengths
247 */
248static SYSCTL_NODE(_debug, OID_AUTO, hashstat, CTLFLAG_RW, NULL,
249 "hash table stats");
250
251static int
252sysctl_debug_hashstat_rawnchash(SYSCTL_HANDLER_ARGS)
253{
254 int error;
255 struct nchashhead *ncpp;
256 struct namecache *ncp;
257 int n_nchash;
258 int count;
259
260 n_nchash = nchash + 1; /* nchash is max index, not count */
261 if (!req->oldptr)
262 return SYSCTL_OUT(req, 0, n_nchash * sizeof(int));
263
264 /* Scan hash tables for applicable entries */
265 for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) {
266 CACHE_RLOCK();
267 count = 0;
268 LIST_FOREACH(ncp, ncpp, nc_hash) {
269 count++;
270 }
271 CACHE_RUNLOCK();
272 error = SYSCTL_OUT(req, &count, sizeof(count));
273 if (error)
274 return (error);
275 }
276 return (0);
277}
278SYSCTL_PROC(_debug_hashstat, OID_AUTO, rawnchash, CTLTYPE_INT|CTLFLAG_RD|
279 CTLFLAG_MPSAFE, 0, 0, sysctl_debug_hashstat_rawnchash, "S,int",
280 "nchash chain lengths");
281
282static int
283sysctl_debug_hashstat_nchash(SYSCTL_HANDLER_ARGS)
284{
285 int error;
286 struct nchashhead *ncpp;
287 struct namecache *ncp;
288 int n_nchash;
289 int count, maxlength, used, pct;
290
291 if (!req->oldptr)
292 return SYSCTL_OUT(req, 0, 4 * sizeof(int));
293
294 n_nchash = nchash + 1; /* nchash is max index, not count */
295 used = 0;
296 maxlength = 0;
297
298 /* Scan hash tables for applicable entries */
299 for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) {
300 count = 0;
301 CACHE_RLOCK();
302 LIST_FOREACH(ncp, ncpp, nc_hash) {
303 count++;
304 }
305 CACHE_RUNLOCK();
306 if (count)
307 used++;
308 if (maxlength < count)
309 maxlength = count;
310 }
311 n_nchash = nchash + 1;
312 pct = (used * 100 * 100) / n_nchash;
313 error = SYSCTL_OUT(req, &n_nchash, sizeof(n_nchash));
314 if (error)
315 return (error);
316 error = SYSCTL_OUT(req, &used, sizeof(used));
317 if (error)
318 return (error);
319 error = SYSCTL_OUT(req, &maxlength, sizeof(maxlength));
320 if (error)
321 return (error);
322 error = SYSCTL_OUT(req, &pct, sizeof(pct));
323 if (error)
324 return (error);
325 return (0);
326}
327SYSCTL_PROC(_debug_hashstat, OID_AUTO, nchash, CTLTYPE_INT|CTLFLAG_RD|
328 CTLFLAG_MPSAFE, 0, 0, sysctl_debug_hashstat_nchash, "I",
329 "nchash chain lengths");
330#endif
331
332/*
333 * cache_zap():
334 *
335 * Removes a namecache entry from cache, whether it contains an actual
336 * pointer to a vnode or if it is just a negative cache entry.
337 */
338static void
339cache_zap(ncp)
340 struct namecache *ncp;
341{
342 struct vnode *vp;
343
344 rw_assert(&cache_lock, RA_WLOCKED);
345 CTR2(KTR_VFS, "cache_zap(%p) vp %p", ncp, ncp->nc_vp);
346#ifdef KDTRACE_HOOKS
347 if (ncp->nc_vp != NULL) {
348 SDT_PROBE(vfs, namecache, zap, done, ncp->nc_dvp,
349 ncp->nc_name, ncp->nc_vp, 0, 0);
350 } else {
351 SDT_PROBE(vfs, namecache, zap_negative, done, ncp->nc_dvp,
352 ncp->nc_name, 0, 0, 0);
353 }
354#endif
355 vp = NULL;
356 LIST_REMOVE(ncp, nc_hash);
357 if (ncp->nc_flag & NCF_ISDOTDOT) {
358 if (ncp == ncp->nc_dvp->v_cache_dd)
359 ncp->nc_dvp->v_cache_dd = NULL;
360 } else {
361 LIST_REMOVE(ncp, nc_src);
362 if (LIST_EMPTY(&ncp->nc_dvp->v_cache_src)) {
363 vp = ncp->nc_dvp;
364 numcachehv--;
365 }
366 }
367 if (ncp->nc_vp) {
368 TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_dst);
369 if (ncp == ncp->nc_vp->v_cache_dd)
370 ncp->nc_vp->v_cache_dd = NULL;
371 } else {
372 TAILQ_REMOVE(&ncneg, ncp, nc_dst);
373 numneg--;
374 }
375 numcache--;
376 cache_free(ncp);
377 if (vp)
378 vdrop(vp);
379}
380
381/*
382 * Lookup an entry in the cache
383 *
384 * Lookup is called with dvp pointing to the directory to search,
385 * cnp pointing to the name of the entry being sought. If the lookup
386 * succeeds, the vnode is returned in *vpp, and a status of -1 is
387 * returned. If the lookup determines that the name does not exist
388 * (negative cacheing), a status of ENOENT is returned. If the lookup
389 * fails, a status of zero is returned. If the directory vnode is
390 * recycled out from under us due to a forced unmount, a status of
391 * ENOENT is returned.
392 *
393 * vpp is locked and ref'd on return. If we're looking up DOTDOT, dvp is
394 * unlocked. If we're looking up . an extra ref is taken, but the lock is
395 * not recursively acquired.
396 */
397
398int
397cache_lookup(dvp, vpp, cnp)
399cache_lookup_times(dvp, vpp, cnp, tsp, ticksp)
398 struct vnode *dvp;
399 struct vnode **vpp;
400 struct componentname *cnp;
400 struct vnode *dvp;
401 struct vnode **vpp;
402 struct componentname *cnp;
403 struct timespec *tsp;
404 int *ticksp;
401{
402 struct namecache *ncp;
403 uint32_t hash;
404 int error, ltype, wlocked;
405
406 if (!doingcache) {
407 cnp->cn_flags &= ~MAKEENTRY;
408 return (0);
409 }
410retry:
411 CACHE_RLOCK();
412 wlocked = 0;
413 numcalls++;
414 error = 0;
415
416retry_wlocked:
417 if (cnp->cn_nameptr[0] == '.') {
418 if (cnp->cn_namelen == 1) {
419 *vpp = dvp;
420 CTR2(KTR_VFS, "cache_lookup(%p, %s) found via .",
421 dvp, cnp->cn_nameptr);
422 dothits++;
423 SDT_PROBE(vfs, namecache, lookup, hit, dvp, ".",
424 *vpp, 0, 0);
405{
406 struct namecache *ncp;
407 uint32_t hash;
408 int error, ltype, wlocked;
409
410 if (!doingcache) {
411 cnp->cn_flags &= ~MAKEENTRY;
412 return (0);
413 }
414retry:
415 CACHE_RLOCK();
416 wlocked = 0;
417 numcalls++;
418 error = 0;
419
420retry_wlocked:
421 if (cnp->cn_nameptr[0] == '.') {
422 if (cnp->cn_namelen == 1) {
423 *vpp = dvp;
424 CTR2(KTR_VFS, "cache_lookup(%p, %s) found via .",
425 dvp, cnp->cn_nameptr);
426 dothits++;
427 SDT_PROBE(vfs, namecache, lookup, hit, dvp, ".",
428 *vpp, 0, 0);
429 if (tsp != NULL)
430 timespecclear(tsp);
431 if (ticksp != NULL)
432 *ticksp = ticks;
425 goto success;
426 }
427 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') {
428 dotdothits++;
429 if (dvp->v_cache_dd == NULL) {
430 SDT_PROBE(vfs, namecache, lookup, miss, dvp,
431 "..", NULL, 0, 0);
432 goto unlock;
433 }
434 if ((cnp->cn_flags & MAKEENTRY) == 0) {
435 if (!wlocked && !CACHE_UPGRADE_LOCK())
436 goto wlock;
437 if (dvp->v_cache_dd->nc_flag & NCF_ISDOTDOT)
438 cache_zap(dvp->v_cache_dd);
439 dvp->v_cache_dd = NULL;
440 CACHE_WUNLOCK();
441 return (0);
442 }
433 goto success;
434 }
435 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') {
436 dotdothits++;
437 if (dvp->v_cache_dd == NULL) {
438 SDT_PROBE(vfs, namecache, lookup, miss, dvp,
439 "..", NULL, 0, 0);
440 goto unlock;
441 }
442 if ((cnp->cn_flags & MAKEENTRY) == 0) {
443 if (!wlocked && !CACHE_UPGRADE_LOCK())
444 goto wlock;
445 if (dvp->v_cache_dd->nc_flag & NCF_ISDOTDOT)
446 cache_zap(dvp->v_cache_dd);
447 dvp->v_cache_dd = NULL;
448 CACHE_WUNLOCK();
449 return (0);
450 }
443 if (dvp->v_cache_dd->nc_flag & NCF_ISDOTDOT)
444 *vpp = dvp->v_cache_dd->nc_vp;
451 ncp = dvp->v_cache_dd;
452 if (ncp->nc_flag & NCF_ISDOTDOT)
453 *vpp = ncp->nc_vp;
445 else
454 else
446 *vpp = dvp->v_cache_dd->nc_dvp;
455 *vpp = ncp->nc_dvp;
447 /* Return failure if negative entry was found. */
456 /* Return failure if negative entry was found. */
448 if (*vpp == NULL) {
449 ncp = dvp->v_cache_dd;
457 if (*vpp == NULL)
450 goto negative_success;
458 goto negative_success;
451 }
452 CTR3(KTR_VFS, "cache_lookup(%p, %s) found %p via ..",
453 dvp, cnp->cn_nameptr, *vpp);
454 SDT_PROBE(vfs, namecache, lookup, hit, dvp, "..",
455 *vpp, 0, 0);
459 CTR3(KTR_VFS, "cache_lookup(%p, %s) found %p via ..",
460 dvp, cnp->cn_nameptr, *vpp);
461 SDT_PROBE(vfs, namecache, lookup, hit, dvp, "..",
462 *vpp, 0, 0);
463 if (tsp != NULL)
464 *tsp = ncp->nc_time;
465 if (ticksp != NULL)
466 *ticksp = ncp->nc_ticks;
456 goto success;
457 }
458 }
459
460 hash = fnv_32_buf(cnp->cn_nameptr, cnp->cn_namelen, FNV1_32_INIT);
461 hash = fnv_32_buf(&dvp, sizeof(dvp), hash);
462 LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) {
463 numchecks++;
464 if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen &&
465 !bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen))
466 break;
467 }
468
469 /* We failed to find an entry */
470 if (ncp == NULL) {
471 SDT_PROBE(vfs, namecache, lookup, miss, dvp, cnp->cn_nameptr,
472 NULL, 0, 0);
473 if ((cnp->cn_flags & MAKEENTRY) == 0) {
474 nummisszap++;
475 } else {
476 nummiss++;
477 }
478 nchstats.ncs_miss++;
479 goto unlock;
480 }
481
482 /* We don't want to have an entry, so dump it */
483 if ((cnp->cn_flags & MAKEENTRY) == 0) {
484 numposzaps++;
485 nchstats.ncs_badhits++;
486 if (!wlocked && !CACHE_UPGRADE_LOCK())
487 goto wlock;
488 cache_zap(ncp);
489 CACHE_WUNLOCK();
490 return (0);
491 }
492
493 /* We found a "positive" match, return the vnode */
494 if (ncp->nc_vp) {
495 numposhits++;
496 nchstats.ncs_goodhits++;
497 *vpp = ncp->nc_vp;
498 CTR4(KTR_VFS, "cache_lookup(%p, %s) found %p via ncp %p",
499 dvp, cnp->cn_nameptr, *vpp, ncp);
500 SDT_PROBE(vfs, namecache, lookup, hit, dvp, ncp->nc_name,
501 *vpp, 0, 0);
467 goto success;
468 }
469 }
470
471 hash = fnv_32_buf(cnp->cn_nameptr, cnp->cn_namelen, FNV1_32_INIT);
472 hash = fnv_32_buf(&dvp, sizeof(dvp), hash);
473 LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) {
474 numchecks++;
475 if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen &&
476 !bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen))
477 break;
478 }
479
480 /* We failed to find an entry */
481 if (ncp == NULL) {
482 SDT_PROBE(vfs, namecache, lookup, miss, dvp, cnp->cn_nameptr,
483 NULL, 0, 0);
484 if ((cnp->cn_flags & MAKEENTRY) == 0) {
485 nummisszap++;
486 } else {
487 nummiss++;
488 }
489 nchstats.ncs_miss++;
490 goto unlock;
491 }
492
493 /* We don't want to have an entry, so dump it */
494 if ((cnp->cn_flags & MAKEENTRY) == 0) {
495 numposzaps++;
496 nchstats.ncs_badhits++;
497 if (!wlocked && !CACHE_UPGRADE_LOCK())
498 goto wlock;
499 cache_zap(ncp);
500 CACHE_WUNLOCK();
501 return (0);
502 }
503
504 /* We found a "positive" match, return the vnode */
505 if (ncp->nc_vp) {
506 numposhits++;
507 nchstats.ncs_goodhits++;
508 *vpp = ncp->nc_vp;
509 CTR4(KTR_VFS, "cache_lookup(%p, %s) found %p via ncp %p",
510 dvp, cnp->cn_nameptr, *vpp, ncp);
511 SDT_PROBE(vfs, namecache, lookup, hit, dvp, ncp->nc_name,
512 *vpp, 0, 0);
513 if (tsp != NULL)
514 *tsp = ncp->nc_time;
515 if (ticksp != NULL)
516 *ticksp = ncp->nc_ticks;
502 goto success;
503 }
504
505negative_success:
506 /* We found a negative match, and want to create it, so purge */
507 if (cnp->cn_nameiop == CREATE) {
508 numnegzaps++;
509 nchstats.ncs_badhits++;
510 if (!wlocked && !CACHE_UPGRADE_LOCK())
511 goto wlock;
512 cache_zap(ncp);
513 CACHE_WUNLOCK();
514 return (0);
515 }
516
517 if (!wlocked && !CACHE_UPGRADE_LOCK())
518 goto wlock;
519 numneghits++;
520 /*
521 * We found a "negative" match, so we shift it to the end of
522 * the "negative" cache entries queue to satisfy LRU. Also,
523 * check to see if the entry is a whiteout; indicate this to
524 * the componentname, if so.
525 */
526 TAILQ_REMOVE(&ncneg, ncp, nc_dst);
527 TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst);
528 nchstats.ncs_neghits++;
529 if (ncp->nc_flag & NCF_WHITE)
530 cnp->cn_flags |= ISWHITEOUT;
531 SDT_PROBE(vfs, namecache, lookup, hit_negative, dvp, ncp->nc_name,
532 0, 0, 0);
517 goto success;
518 }
519
520negative_success:
521 /* We found a negative match, and want to create it, so purge */
522 if (cnp->cn_nameiop == CREATE) {
523 numnegzaps++;
524 nchstats.ncs_badhits++;
525 if (!wlocked && !CACHE_UPGRADE_LOCK())
526 goto wlock;
527 cache_zap(ncp);
528 CACHE_WUNLOCK();
529 return (0);
530 }
531
532 if (!wlocked && !CACHE_UPGRADE_LOCK())
533 goto wlock;
534 numneghits++;
535 /*
536 * We found a "negative" match, so we shift it to the end of
537 * the "negative" cache entries queue to satisfy LRU. Also,
538 * check to see if the entry is a whiteout; indicate this to
539 * the componentname, if so.
540 */
541 TAILQ_REMOVE(&ncneg, ncp, nc_dst);
542 TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst);
543 nchstats.ncs_neghits++;
544 if (ncp->nc_flag & NCF_WHITE)
545 cnp->cn_flags |= ISWHITEOUT;
546 SDT_PROBE(vfs, namecache, lookup, hit_negative, dvp, ncp->nc_name,
547 0, 0, 0);
548 if (tsp != NULL)
549 *tsp = ncp->nc_time;
550 if (ticksp != NULL)
551 *ticksp = ncp->nc_ticks;
533 CACHE_WUNLOCK();
534 return (ENOENT);
535
536wlock:
537 /*
538 * We need to update the cache after our lookup, so upgrade to
539 * a write lock and retry the operation.
540 */
541 CACHE_RUNLOCK();
542 CACHE_WLOCK();
543 numupgrades++;
544 wlocked = 1;
545 goto retry_wlocked;
546
547success:
548 /*
549 * On success we return a locked and ref'd vnode as per the lookup
550 * protocol.
551 */
552 if (dvp == *vpp) { /* lookup on "." */
553 VREF(*vpp);
554 if (wlocked)
555 CACHE_WUNLOCK();
556 else
557 CACHE_RUNLOCK();
558 /*
559 * When we lookup "." we still can be asked to lock it
560 * differently...
561 */
562 ltype = cnp->cn_lkflags & LK_TYPE_MASK;
563 if (ltype != VOP_ISLOCKED(*vpp)) {
564 if (ltype == LK_EXCLUSIVE) {
565 vn_lock(*vpp, LK_UPGRADE | LK_RETRY);
566 if ((*vpp)->v_iflag & VI_DOOMED) {
567 /* forced unmount */
568 vrele(*vpp);
569 *vpp = NULL;
570 return (ENOENT);
571 }
572 } else
573 vn_lock(*vpp, LK_DOWNGRADE | LK_RETRY);
574 }
575 return (-1);
576 }
577 ltype = 0; /* silence gcc warning */
578 if (cnp->cn_flags & ISDOTDOT) {
579 ltype = VOP_ISLOCKED(dvp);
580 VOP_UNLOCK(dvp, 0);
581 }
582 VI_LOCK(*vpp);
583 if (wlocked)
584 CACHE_WUNLOCK();
585 else
586 CACHE_RUNLOCK();
587 error = vget(*vpp, cnp->cn_lkflags | LK_INTERLOCK, cnp->cn_thread);
588 if (cnp->cn_flags & ISDOTDOT) {
589 vn_lock(dvp, ltype | LK_RETRY);
590 if (dvp->v_iflag & VI_DOOMED) {
591 if (error == 0)
592 vput(*vpp);
593 *vpp = NULL;
594 return (ENOENT);
595 }
596 }
597 if (error) {
598 *vpp = NULL;
599 goto retry;
600 }
601 if ((cnp->cn_flags & ISLASTCN) &&
602 (cnp->cn_lkflags & LK_TYPE_MASK) == LK_EXCLUSIVE) {
603 ASSERT_VOP_ELOCKED(*vpp, "cache_lookup");
604 }
605 return (-1);
606
607unlock:
608 if (wlocked)
609 CACHE_WUNLOCK();
610 else
611 CACHE_RUNLOCK();
612 return (0);
613}
614
615/*
616 * Add an entry to the cache.
617 */
618void
552 CACHE_WUNLOCK();
553 return (ENOENT);
554
555wlock:
556 /*
557 * We need to update the cache after our lookup, so upgrade to
558 * a write lock and retry the operation.
559 */
560 CACHE_RUNLOCK();
561 CACHE_WLOCK();
562 numupgrades++;
563 wlocked = 1;
564 goto retry_wlocked;
565
566success:
567 /*
568 * On success we return a locked and ref'd vnode as per the lookup
569 * protocol.
570 */
571 if (dvp == *vpp) { /* lookup on "." */
572 VREF(*vpp);
573 if (wlocked)
574 CACHE_WUNLOCK();
575 else
576 CACHE_RUNLOCK();
577 /*
578 * When we lookup "." we still can be asked to lock it
579 * differently...
580 */
581 ltype = cnp->cn_lkflags & LK_TYPE_MASK;
582 if (ltype != VOP_ISLOCKED(*vpp)) {
583 if (ltype == LK_EXCLUSIVE) {
584 vn_lock(*vpp, LK_UPGRADE | LK_RETRY);
585 if ((*vpp)->v_iflag & VI_DOOMED) {
586 /* forced unmount */
587 vrele(*vpp);
588 *vpp = NULL;
589 return (ENOENT);
590 }
591 } else
592 vn_lock(*vpp, LK_DOWNGRADE | LK_RETRY);
593 }
594 return (-1);
595 }
596 ltype = 0; /* silence gcc warning */
597 if (cnp->cn_flags & ISDOTDOT) {
598 ltype = VOP_ISLOCKED(dvp);
599 VOP_UNLOCK(dvp, 0);
600 }
601 VI_LOCK(*vpp);
602 if (wlocked)
603 CACHE_WUNLOCK();
604 else
605 CACHE_RUNLOCK();
606 error = vget(*vpp, cnp->cn_lkflags | LK_INTERLOCK, cnp->cn_thread);
607 if (cnp->cn_flags & ISDOTDOT) {
608 vn_lock(dvp, ltype | LK_RETRY);
609 if (dvp->v_iflag & VI_DOOMED) {
610 if (error == 0)
611 vput(*vpp);
612 *vpp = NULL;
613 return (ENOENT);
614 }
615 }
616 if (error) {
617 *vpp = NULL;
618 goto retry;
619 }
620 if ((cnp->cn_flags & ISLASTCN) &&
621 (cnp->cn_lkflags & LK_TYPE_MASK) == LK_EXCLUSIVE) {
622 ASSERT_VOP_ELOCKED(*vpp, "cache_lookup");
623 }
624 return (-1);
625
626unlock:
627 if (wlocked)
628 CACHE_WUNLOCK();
629 else
630 CACHE_RUNLOCK();
631 return (0);
632}
633
634/*
635 * Add an entry to the cache.
636 */
637void
619cache_enter(dvp, vp, cnp)
638cache_enter_time(dvp, vp, cnp, tsp)
620 struct vnode *dvp;
621 struct vnode *vp;
622 struct componentname *cnp;
639 struct vnode *dvp;
640 struct vnode *vp;
641 struct componentname *cnp;
642 struct timespec *tsp;
623{
624 struct namecache *ncp, *n2;
625 struct nchashhead *ncpp;
626 uint32_t hash;
627 int flag;
628 int hold;
629 int zap;
630 int len;
631
632 CTR3(KTR_VFS, "cache_enter(%p, %p, %s)", dvp, vp, cnp->cn_nameptr);
633 VNASSERT(vp == NULL || (vp->v_iflag & VI_DOOMED) == 0, vp,
634 ("cache_enter: Adding a doomed vnode"));
635 VNASSERT(dvp == NULL || (dvp->v_iflag & VI_DOOMED) == 0, dvp,
636 ("cache_enter: Doomed vnode used as src"));
637
638 if (!doingcache)
639 return;
640
641 /*
642 * Avoid blowout in namecache entries.
643 */
644 if (numcache >= desiredvnodes * ncsizefactor)
645 return;
646
647 flag = 0;
648 if (cnp->cn_nameptr[0] == '.') {
649 if (cnp->cn_namelen == 1)
650 return;
651 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') {
652 CACHE_WLOCK();
653 /*
654 * If dotdot entry already exists, just retarget it
655 * to new parent vnode, otherwise continue with new
656 * namecache entry allocation.
657 */
658 if ((ncp = dvp->v_cache_dd) != NULL &&
659 ncp->nc_flag & NCF_ISDOTDOT) {
660 KASSERT(ncp->nc_dvp == dvp,
661 ("wrong isdotdot parent"));
662 if (ncp->nc_vp != NULL)
663 TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst,
664 ncp, nc_dst);
665 else
666 TAILQ_REMOVE(&ncneg, ncp, nc_dst);
667 if (vp != NULL)
668 TAILQ_INSERT_HEAD(&vp->v_cache_dst,
669 ncp, nc_dst);
670 else
671 TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst);
672 ncp->nc_vp = vp;
673 CACHE_WUNLOCK();
674 return;
675 }
676 dvp->v_cache_dd = NULL;
677 SDT_PROBE(vfs, namecache, enter, done, dvp, "..", vp,
678 0, 0);
679 CACHE_WUNLOCK();
680 flag = NCF_ISDOTDOT;
681 }
682 }
683
684 hold = 0;
685 zap = 0;
686
687 /*
688 * Calculate the hash key and setup as much of the new
689 * namecache entry as possible before acquiring the lock.
690 */
691 ncp = cache_alloc(cnp->cn_namelen);
692 ncp->nc_vp = vp;
693 ncp->nc_dvp = dvp;
694 ncp->nc_flag = flag;
643{
644 struct namecache *ncp, *n2;
645 struct nchashhead *ncpp;
646 uint32_t hash;
647 int flag;
648 int hold;
649 int zap;
650 int len;
651
652 CTR3(KTR_VFS, "cache_enter(%p, %p, %s)", dvp, vp, cnp->cn_nameptr);
653 VNASSERT(vp == NULL || (vp->v_iflag & VI_DOOMED) == 0, vp,
654 ("cache_enter: Adding a doomed vnode"));
655 VNASSERT(dvp == NULL || (dvp->v_iflag & VI_DOOMED) == 0, dvp,
656 ("cache_enter: Doomed vnode used as src"));
657
658 if (!doingcache)
659 return;
660
661 /*
662 * Avoid blowout in namecache entries.
663 */
664 if (numcache >= desiredvnodes * ncsizefactor)
665 return;
666
667 flag = 0;
668 if (cnp->cn_nameptr[0] == '.') {
669 if (cnp->cn_namelen == 1)
670 return;
671 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') {
672 CACHE_WLOCK();
673 /*
674 * If dotdot entry already exists, just retarget it
675 * to new parent vnode, otherwise continue with new
676 * namecache entry allocation.
677 */
678 if ((ncp = dvp->v_cache_dd) != NULL &&
679 ncp->nc_flag & NCF_ISDOTDOT) {
680 KASSERT(ncp->nc_dvp == dvp,
681 ("wrong isdotdot parent"));
682 if (ncp->nc_vp != NULL)
683 TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst,
684 ncp, nc_dst);
685 else
686 TAILQ_REMOVE(&ncneg, ncp, nc_dst);
687 if (vp != NULL)
688 TAILQ_INSERT_HEAD(&vp->v_cache_dst,
689 ncp, nc_dst);
690 else
691 TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst);
692 ncp->nc_vp = vp;
693 CACHE_WUNLOCK();
694 return;
695 }
696 dvp->v_cache_dd = NULL;
697 SDT_PROBE(vfs, namecache, enter, done, dvp, "..", vp,
698 0, 0);
699 CACHE_WUNLOCK();
700 flag = NCF_ISDOTDOT;
701 }
702 }
703
704 hold = 0;
705 zap = 0;
706
707 /*
708 * Calculate the hash key and setup as much of the new
709 * namecache entry as possible before acquiring the lock.
710 */
711 ncp = cache_alloc(cnp->cn_namelen);
712 ncp->nc_vp = vp;
713 ncp->nc_dvp = dvp;
714 ncp->nc_flag = flag;
715 if (tsp != NULL)
716 ncp->nc_time = *tsp;
717 else
718 timespecclear(&ncp->nc_time);
719 ncp->nc_ticks = ticks;
695 len = ncp->nc_nlen = cnp->cn_namelen;
696 hash = fnv_32_buf(cnp->cn_nameptr, len, FNV1_32_INIT);
697 strlcpy(ncp->nc_name, cnp->cn_nameptr, len + 1);
698 hash = fnv_32_buf(&dvp, sizeof(dvp), hash);
699 CACHE_WLOCK();
700
701 /*
702 * See if this vnode or negative entry is already in the cache
703 * with this name. This can happen with concurrent lookups of
704 * the same path name.
705 */
706 ncpp = NCHHASH(hash);
707 LIST_FOREACH(n2, ncpp, nc_hash) {
708 if (n2->nc_dvp == dvp &&
709 n2->nc_nlen == cnp->cn_namelen &&
710 !bcmp(n2->nc_name, cnp->cn_nameptr, n2->nc_nlen)) {
720 len = ncp->nc_nlen = cnp->cn_namelen;
721 hash = fnv_32_buf(cnp->cn_nameptr, len, FNV1_32_INIT);
722 strlcpy(ncp->nc_name, cnp->cn_nameptr, len + 1);
723 hash = fnv_32_buf(&dvp, sizeof(dvp), hash);
724 CACHE_WLOCK();
725
726 /*
727 * See if this vnode or negative entry is already in the cache
728 * with this name. This can happen with concurrent lookups of
729 * the same path name.
730 */
731 ncpp = NCHHASH(hash);
732 LIST_FOREACH(n2, ncpp, nc_hash) {
733 if (n2->nc_dvp == dvp &&
734 n2->nc_nlen == cnp->cn_namelen &&
735 !bcmp(n2->nc_name, cnp->cn_nameptr, n2->nc_nlen)) {
736 n2->nc_time = ncp->nc_time;
737 n2->nc_ticks = ncp->nc_ticks;
711 CACHE_WUNLOCK();
712 cache_free(ncp);
713 return;
714 }
715 }
716
717 if (flag == NCF_ISDOTDOT) {
718 /*
719 * See if we are trying to add .. entry, but some other lookup
720 * has populated v_cache_dd pointer already.
721 */
722 if (dvp->v_cache_dd != NULL) {
723 CACHE_WUNLOCK();
724 cache_free(ncp);
725 return;
726 }
727 KASSERT(vp == NULL || vp->v_type == VDIR,
728 ("wrong vnode type %p", vp));
729 dvp->v_cache_dd = ncp;
730 }
731
732 numcache++;
733 if (!vp) {
734 numneg++;
735 if (cnp->cn_flags & ISWHITEOUT)
736 ncp->nc_flag |= NCF_WHITE;
737 } else if (vp->v_type == VDIR) {
738 if (flag != NCF_ISDOTDOT) {
739 if ((n2 = vp->v_cache_dd) != NULL &&
740 (n2->nc_flag & NCF_ISDOTDOT) != 0)
741 cache_zap(n2);
742 vp->v_cache_dd = ncp;
743 }
744 } else {
745 vp->v_cache_dd = NULL;
746 }
747
748 /*
749 * Insert the new namecache entry into the appropriate chain
750 * within the cache entries table.
751 */
752 LIST_INSERT_HEAD(ncpp, ncp, nc_hash);
753 if (flag != NCF_ISDOTDOT) {
754 if (LIST_EMPTY(&dvp->v_cache_src)) {
755 hold = 1;
756 numcachehv++;
757 }
758 LIST_INSERT_HEAD(&dvp->v_cache_src, ncp, nc_src);
759 }
760
761 /*
762 * If the entry is "negative", we place it into the
763 * "negative" cache queue, otherwise, we place it into the
764 * destination vnode's cache entries queue.
765 */
766 if (vp) {
767 TAILQ_INSERT_HEAD(&vp->v_cache_dst, ncp, nc_dst);
768 SDT_PROBE(vfs, namecache, enter, done, dvp, ncp->nc_name, vp,
769 0, 0);
770 } else {
771 TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst);
772 SDT_PROBE(vfs, namecache, enter_negative, done, dvp,
773 ncp->nc_name, 0, 0, 0);
774 }
775 if (numneg * ncnegfactor > numcache) {
776 ncp = TAILQ_FIRST(&ncneg);
777 zap = 1;
778 }
779 if (hold)
780 vhold(dvp);
781 if (zap)
782 cache_zap(ncp);
783 CACHE_WUNLOCK();
784}
785
786/*
787 * Name cache initialization, from vfs_init() when we are booting
788 */
789static void
790nchinit(void *dummy __unused)
791{
792
793 TAILQ_INIT(&ncneg);
794
795 cache_zone_small = uma_zcreate("S VFS Cache", CACHE_ZONE_SMALL, NULL,
796 NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT);
797 cache_zone_large = uma_zcreate("L VFS Cache", CACHE_ZONE_LARGE, NULL,
798 NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT);
799
800 nchashtbl = hashinit(desiredvnodes * 2, M_VFSCACHE, &nchash);
801}
802SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nchinit, NULL);
803
804
805/*
806 * Invalidate all entries to a particular vnode.
807 */
808void
809cache_purge(vp)
810 struct vnode *vp;
811{
812
813 CTR1(KTR_VFS, "cache_purge(%p)", vp);
814 SDT_PROBE(vfs, namecache, purge, done, vp, 0, 0, 0, 0);
815 CACHE_WLOCK();
816 while (!LIST_EMPTY(&vp->v_cache_src))
817 cache_zap(LIST_FIRST(&vp->v_cache_src));
818 while (!TAILQ_EMPTY(&vp->v_cache_dst))
819 cache_zap(TAILQ_FIRST(&vp->v_cache_dst));
820 if (vp->v_cache_dd != NULL) {
821 KASSERT(vp->v_cache_dd->nc_flag & NCF_ISDOTDOT,
822 ("lost dotdot link"));
823 cache_zap(vp->v_cache_dd);
824 }
825 KASSERT(vp->v_cache_dd == NULL, ("incomplete purge"));
826 CACHE_WUNLOCK();
827}
828
829/*
830 * Invalidate all negative entries for a particular directory vnode.
831 */
832void
833cache_purge_negative(vp)
834 struct vnode *vp;
835{
836 struct namecache *cp, *ncp;
837
838 CTR1(KTR_VFS, "cache_purge_negative(%p)", vp);
839 SDT_PROBE(vfs, namecache, purge_negative, done, vp, 0, 0, 0, 0);
840 CACHE_WLOCK();
841 LIST_FOREACH_SAFE(cp, &vp->v_cache_src, nc_src, ncp) {
842 if (cp->nc_vp == NULL)
843 cache_zap(cp);
844 }
845 CACHE_WUNLOCK();
846}
847
848/*
849 * Flush all entries referencing a particular filesystem.
850 */
851void
852cache_purgevfs(mp)
853 struct mount *mp;
854{
855 struct nchashhead *ncpp;
856 struct namecache *ncp, *nnp;
857
858 /* Scan hash tables for applicable entries */
859 SDT_PROBE(vfs, namecache, purgevfs, done, mp, 0, 0, 0, 0);
860 CACHE_WLOCK();
861 for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) {
862 LIST_FOREACH_SAFE(ncp, ncpp, nc_hash, nnp) {
863 if (ncp->nc_dvp->v_mount == mp)
864 cache_zap(ncp);
865 }
866 }
867 CACHE_WUNLOCK();
868}
869
870/*
871 * Perform canonical checks and cache lookup and pass on to filesystem
872 * through the vop_cachedlookup only if needed.
873 */
874
875int
876vfs_cache_lookup(ap)
877 struct vop_lookup_args /* {
878 struct vnode *a_dvp;
879 struct vnode **a_vpp;
880 struct componentname *a_cnp;
881 } */ *ap;
882{
883 struct vnode *dvp;
884 int error;
885 struct vnode **vpp = ap->a_vpp;
886 struct componentname *cnp = ap->a_cnp;
887 struct ucred *cred = cnp->cn_cred;
888 int flags = cnp->cn_flags;
889 struct thread *td = cnp->cn_thread;
890
891 *vpp = NULL;
892 dvp = ap->a_dvp;
893
894 if (dvp->v_type != VDIR)
895 return (ENOTDIR);
896
897 if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
898 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
899 return (EROFS);
900
901 error = VOP_ACCESS(dvp, VEXEC, cred, td);
902 if (error)
903 return (error);
904
905 error = cache_lookup(dvp, vpp, cnp);
906 if (error == 0)
907 return (VOP_CACHEDLOOKUP(dvp, vpp, cnp));
908 if (error == -1)
909 return (0);
910 return (error);
911}
912
913
914#ifndef _SYS_SYSPROTO_H_
915struct __getcwd_args {
916 u_char *buf;
917 u_int buflen;
918};
919#endif
920
921/*
922 * XXX All of these sysctls would probably be more productive dead.
923 */
924static int disablecwd;
925SYSCTL_INT(_debug, OID_AUTO, disablecwd, CTLFLAG_RW, &disablecwd, 0,
926 "Disable the getcwd syscall");
927
928/* Implementation of the getcwd syscall. */
929int
930sys___getcwd(td, uap)
931 struct thread *td;
932 struct __getcwd_args *uap;
933{
934
935 return (kern___getcwd(td, uap->buf, UIO_USERSPACE, uap->buflen));
936}
937
938int
939kern___getcwd(struct thread *td, u_char *buf, enum uio_seg bufseg, u_int buflen)
940{
941 char *bp, *tmpbuf;
942 struct filedesc *fdp;
943 struct vnode *cdir, *rdir;
944 int error, vfslocked;
945
946 if (disablecwd)
947 return (ENODEV);
948 if (buflen < 2)
949 return (EINVAL);
950 if (buflen > MAXPATHLEN)
951 buflen = MAXPATHLEN;
952
953 tmpbuf = malloc(buflen, M_TEMP, M_WAITOK);
954 fdp = td->td_proc->p_fd;
955 FILEDESC_SLOCK(fdp);
956 cdir = fdp->fd_cdir;
957 VREF(cdir);
958 rdir = fdp->fd_rdir;
959 VREF(rdir);
960 FILEDESC_SUNLOCK(fdp);
961 error = vn_fullpath1(td, cdir, rdir, tmpbuf, &bp, buflen);
962 vfslocked = VFS_LOCK_GIANT(rdir->v_mount);
963 vrele(rdir);
964 VFS_UNLOCK_GIANT(vfslocked);
965 vfslocked = VFS_LOCK_GIANT(cdir->v_mount);
966 vrele(cdir);
967 VFS_UNLOCK_GIANT(vfslocked);
968
969 if (!error) {
970 if (bufseg == UIO_SYSSPACE)
971 bcopy(bp, buf, strlen(bp) + 1);
972 else
973 error = copyout(bp, buf, strlen(bp) + 1);
974#ifdef KTRACE
975 if (KTRPOINT(curthread, KTR_NAMEI))
976 ktrnamei(bp);
977#endif
978 }
979 free(tmpbuf, M_TEMP);
980 return (error);
981}
982
983/*
984 * Thus begins the fullpath magic.
985 */
986
987#undef STATNODE
988#define STATNODE(name, descr) \
989 static u_int name; \
990 SYSCTL_UINT(_vfs_cache, OID_AUTO, name, CTLFLAG_RD, &name, 0, descr)
991
992static int disablefullpath;
993SYSCTL_INT(_debug, OID_AUTO, disablefullpath, CTLFLAG_RW, &disablefullpath, 0,
994 "Disable the vn_fullpath function");
995
996/* These count for kern___getcwd(), too. */
997STATNODE(numfullpathcalls, "Number of fullpath search calls");
998STATNODE(numfullpathfail1, "Number of fullpath search errors (ENOTDIR)");
999STATNODE(numfullpathfail2,
1000 "Number of fullpath search errors (VOP_VPTOCNP failures)");
1001STATNODE(numfullpathfail4, "Number of fullpath search errors (ENOMEM)");
1002STATNODE(numfullpathfound, "Number of successful fullpath calls");
1003
1004/*
1005 * Retrieve the full filesystem path that correspond to a vnode from the name
1006 * cache (if available)
1007 */
1008int
1009vn_fullpath(struct thread *td, struct vnode *vn, char **retbuf, char **freebuf)
1010{
1011 char *buf;
1012 struct filedesc *fdp;
1013 struct vnode *rdir;
1014 int error, vfslocked;
1015
1016 if (disablefullpath)
1017 return (ENODEV);
1018 if (vn == NULL)
1019 return (EINVAL);
1020
1021 buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
1022 fdp = td->td_proc->p_fd;
1023 FILEDESC_SLOCK(fdp);
1024 rdir = fdp->fd_rdir;
1025 VREF(rdir);
1026 FILEDESC_SUNLOCK(fdp);
1027 error = vn_fullpath1(td, vn, rdir, buf, retbuf, MAXPATHLEN);
1028 vfslocked = VFS_LOCK_GIANT(rdir->v_mount);
1029 vrele(rdir);
1030 VFS_UNLOCK_GIANT(vfslocked);
1031
1032 if (!error)
1033 *freebuf = buf;
1034 else
1035 free(buf, M_TEMP);
1036 return (error);
1037}
1038
1039/*
1040 * This function is similar to vn_fullpath, but it attempts to lookup the
1041 * pathname relative to the global root mount point. This is required for the
1042 * auditing sub-system, as audited pathnames must be absolute, relative to the
1043 * global root mount point.
1044 */
1045int
1046vn_fullpath_global(struct thread *td, struct vnode *vn,
1047 char **retbuf, char **freebuf)
1048{
1049 char *buf;
1050 int error;
1051
1052 if (disablefullpath)
1053 return (ENODEV);
1054 if (vn == NULL)
1055 return (EINVAL);
1056 buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
1057 error = vn_fullpath1(td, vn, rootvnode, buf, retbuf, MAXPATHLEN);
1058 if (!error)
1059 *freebuf = buf;
1060 else
1061 free(buf, M_TEMP);
1062 return (error);
1063}
1064
1065int
1066vn_vptocnp(struct vnode **vp, struct ucred *cred, char *buf, u_int *buflen)
1067{
1068 int error;
1069
1070 CACHE_RLOCK();
1071 error = vn_vptocnp_locked(vp, cred, buf, buflen);
1072 if (error == 0)
1073 CACHE_RUNLOCK();
1074 return (error);
1075}
1076
1077static int
1078vn_vptocnp_locked(struct vnode **vp, struct ucred *cred, char *buf,
1079 u_int *buflen)
1080{
1081 struct vnode *dvp;
1082 struct namecache *ncp;
1083 int error, vfslocked;
1084
1085 TAILQ_FOREACH(ncp, &((*vp)->v_cache_dst), nc_dst) {
1086 if ((ncp->nc_flag & NCF_ISDOTDOT) == 0)
1087 break;
1088 }
1089 if (ncp != NULL) {
1090 if (*buflen < ncp->nc_nlen) {
1091 CACHE_RUNLOCK();
1092 vfslocked = VFS_LOCK_GIANT((*vp)->v_mount);
1093 vrele(*vp);
1094 VFS_UNLOCK_GIANT(vfslocked);
1095 numfullpathfail4++;
1096 error = ENOMEM;
1097 SDT_PROBE(vfs, namecache, fullpath, return, error,
1098 vp, NULL, 0, 0);
1099 return (error);
1100 }
1101 *buflen -= ncp->nc_nlen;
1102 memcpy(buf + *buflen, ncp->nc_name, ncp->nc_nlen);
1103 SDT_PROBE(vfs, namecache, fullpath, hit, ncp->nc_dvp,
1104 ncp->nc_name, vp, 0, 0);
1105 dvp = *vp;
1106 *vp = ncp->nc_dvp;
1107 vref(*vp);
1108 CACHE_RUNLOCK();
1109 vfslocked = VFS_LOCK_GIANT(dvp->v_mount);
1110 vrele(dvp);
1111 VFS_UNLOCK_GIANT(vfslocked);
1112 CACHE_RLOCK();
1113 return (0);
1114 }
1115 SDT_PROBE(vfs, namecache, fullpath, miss, vp, 0, 0, 0, 0);
1116
1117 CACHE_RUNLOCK();
1118 vfslocked = VFS_LOCK_GIANT((*vp)->v_mount);
1119 vn_lock(*vp, LK_SHARED | LK_RETRY);
1120 error = VOP_VPTOCNP(*vp, &dvp, cred, buf, buflen);
1121 vput(*vp);
1122 VFS_UNLOCK_GIANT(vfslocked);
1123 if (error) {
1124 numfullpathfail2++;
1125 SDT_PROBE(vfs, namecache, fullpath, return, error, vp,
1126 NULL, 0, 0);
1127 return (error);
1128 }
1129
1130 *vp = dvp;
1131 CACHE_RLOCK();
1132 if (dvp->v_iflag & VI_DOOMED) {
1133 /* forced unmount */
1134 CACHE_RUNLOCK();
1135 vfslocked = VFS_LOCK_GIANT(dvp->v_mount);
1136 vrele(dvp);
1137 VFS_UNLOCK_GIANT(vfslocked);
1138 error = ENOENT;
1139 SDT_PROBE(vfs, namecache, fullpath, return, error, vp,
1140 NULL, 0, 0);
1141 return (error);
1142 }
1143 /*
1144 * *vp has its use count incremented still.
1145 */
1146
1147 return (0);
1148}
1149
1150/*
1151 * The magic behind kern___getcwd() and vn_fullpath().
1152 */
1153static int
1154vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir,
1155 char *buf, char **retbuf, u_int buflen)
1156{
1157 int error, slash_prefixed, vfslocked;
1158#ifdef KDTRACE_HOOKS
1159 struct vnode *startvp = vp;
1160#endif
1161 struct vnode *vp1;
1162
1163 buflen--;
1164 buf[buflen] = '\0';
1165 error = 0;
1166 slash_prefixed = 0;
1167
1168 SDT_PROBE(vfs, namecache, fullpath, entry, vp, 0, 0, 0, 0);
1169 numfullpathcalls++;
1170 vref(vp);
1171 CACHE_RLOCK();
1172 if (vp->v_type != VDIR) {
1173 error = vn_vptocnp_locked(&vp, td->td_ucred, buf, &buflen);
1174 if (error)
1175 return (error);
1176 if (buflen == 0) {
1177 CACHE_RUNLOCK();
1178 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1179 vrele(vp);
1180 VFS_UNLOCK_GIANT(vfslocked);
1181 return (ENOMEM);
1182 }
1183 buf[--buflen] = '/';
1184 slash_prefixed = 1;
1185 }
1186 while (vp != rdir && vp != rootvnode) {
1187 if (vp->v_vflag & VV_ROOT) {
1188 if (vp->v_iflag & VI_DOOMED) { /* forced unmount */
1189 CACHE_RUNLOCK();
1190 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1191 vrele(vp);
1192 VFS_UNLOCK_GIANT(vfslocked);
1193 error = ENOENT;
1194 SDT_PROBE(vfs, namecache, fullpath, return,
1195 error, vp, NULL, 0, 0);
1196 break;
1197 }
1198 vp1 = vp->v_mount->mnt_vnodecovered;
1199 vref(vp1);
1200 CACHE_RUNLOCK();
1201 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1202 vrele(vp);
1203 VFS_UNLOCK_GIANT(vfslocked);
1204 vp = vp1;
1205 CACHE_RLOCK();
1206 continue;
1207 }
1208 if (vp->v_type != VDIR) {
1209 CACHE_RUNLOCK();
1210 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1211 vrele(vp);
1212 VFS_UNLOCK_GIANT(vfslocked);
1213 numfullpathfail1++;
1214 error = ENOTDIR;
1215 SDT_PROBE(vfs, namecache, fullpath, return,
1216 error, vp, NULL, 0, 0);
1217 break;
1218 }
1219 error = vn_vptocnp_locked(&vp, td->td_ucred, buf, &buflen);
1220 if (error)
1221 break;
1222 if (buflen == 0) {
1223 CACHE_RUNLOCK();
1224 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1225 vrele(vp);
1226 VFS_UNLOCK_GIANT(vfslocked);
1227 error = ENOMEM;
1228 SDT_PROBE(vfs, namecache, fullpath, return, error,
1229 startvp, NULL, 0, 0);
1230 break;
1231 }
1232 buf[--buflen] = '/';
1233 slash_prefixed = 1;
1234 }
1235 if (error)
1236 return (error);
1237 if (!slash_prefixed) {
1238 if (buflen == 0) {
1239 CACHE_RUNLOCK();
1240 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1241 vrele(vp);
1242 VFS_UNLOCK_GIANT(vfslocked);
1243 numfullpathfail4++;
1244 SDT_PROBE(vfs, namecache, fullpath, return, ENOMEM,
1245 startvp, NULL, 0, 0);
1246 return (ENOMEM);
1247 }
1248 buf[--buflen] = '/';
1249 }
1250 numfullpathfound++;
1251 CACHE_RUNLOCK();
1252 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1253 vrele(vp);
1254 VFS_UNLOCK_GIANT(vfslocked);
1255
1256 SDT_PROBE(vfs, namecache, fullpath, return, 0, startvp, buf + buflen,
1257 0, 0);
1258 *retbuf = buf + buflen;
1259 return (0);
1260}
1261
1262int
1263vn_commname(struct vnode *vp, char *buf, u_int buflen)
1264{
1265 struct namecache *ncp;
1266 int l;
1267
1268 CACHE_RLOCK();
1269 TAILQ_FOREACH(ncp, &vp->v_cache_dst, nc_dst)
1270 if ((ncp->nc_flag & NCF_ISDOTDOT) == 0)
1271 break;
1272 if (ncp == NULL) {
1273 CACHE_RUNLOCK();
1274 return (ENOENT);
1275 }
1276 l = min(ncp->nc_nlen, buflen - 1);
1277 memcpy(buf, ncp->nc_name, l);
1278 CACHE_RUNLOCK();
1279 buf[l] = '\0';
1280 return (0);
1281}
1282
738 CACHE_WUNLOCK();
739 cache_free(ncp);
740 return;
741 }
742 }
743
744 if (flag == NCF_ISDOTDOT) {
745 /*
746 * See if we are trying to add .. entry, but some other lookup
747 * has populated v_cache_dd pointer already.
748 */
749 if (dvp->v_cache_dd != NULL) {
750 CACHE_WUNLOCK();
751 cache_free(ncp);
752 return;
753 }
754 KASSERT(vp == NULL || vp->v_type == VDIR,
755 ("wrong vnode type %p", vp));
756 dvp->v_cache_dd = ncp;
757 }
758
759 numcache++;
760 if (!vp) {
761 numneg++;
762 if (cnp->cn_flags & ISWHITEOUT)
763 ncp->nc_flag |= NCF_WHITE;
764 } else if (vp->v_type == VDIR) {
765 if (flag != NCF_ISDOTDOT) {
766 if ((n2 = vp->v_cache_dd) != NULL &&
767 (n2->nc_flag & NCF_ISDOTDOT) != 0)
768 cache_zap(n2);
769 vp->v_cache_dd = ncp;
770 }
771 } else {
772 vp->v_cache_dd = NULL;
773 }
774
775 /*
776 * Insert the new namecache entry into the appropriate chain
777 * within the cache entries table.
778 */
779 LIST_INSERT_HEAD(ncpp, ncp, nc_hash);
780 if (flag != NCF_ISDOTDOT) {
781 if (LIST_EMPTY(&dvp->v_cache_src)) {
782 hold = 1;
783 numcachehv++;
784 }
785 LIST_INSERT_HEAD(&dvp->v_cache_src, ncp, nc_src);
786 }
787
788 /*
789 * If the entry is "negative", we place it into the
790 * "negative" cache queue, otherwise, we place it into the
791 * destination vnode's cache entries queue.
792 */
793 if (vp) {
794 TAILQ_INSERT_HEAD(&vp->v_cache_dst, ncp, nc_dst);
795 SDT_PROBE(vfs, namecache, enter, done, dvp, ncp->nc_name, vp,
796 0, 0);
797 } else {
798 TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst);
799 SDT_PROBE(vfs, namecache, enter_negative, done, dvp,
800 ncp->nc_name, 0, 0, 0);
801 }
802 if (numneg * ncnegfactor > numcache) {
803 ncp = TAILQ_FIRST(&ncneg);
804 zap = 1;
805 }
806 if (hold)
807 vhold(dvp);
808 if (zap)
809 cache_zap(ncp);
810 CACHE_WUNLOCK();
811}
812
813/*
814 * Name cache initialization, from vfs_init() when we are booting
815 */
816static void
817nchinit(void *dummy __unused)
818{
819
820 TAILQ_INIT(&ncneg);
821
822 cache_zone_small = uma_zcreate("S VFS Cache", CACHE_ZONE_SMALL, NULL,
823 NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT);
824 cache_zone_large = uma_zcreate("L VFS Cache", CACHE_ZONE_LARGE, NULL,
825 NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT);
826
827 nchashtbl = hashinit(desiredvnodes * 2, M_VFSCACHE, &nchash);
828}
829SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nchinit, NULL);
830
831
832/*
833 * Invalidate all entries to a particular vnode.
834 */
835void
836cache_purge(vp)
837 struct vnode *vp;
838{
839
840 CTR1(KTR_VFS, "cache_purge(%p)", vp);
841 SDT_PROBE(vfs, namecache, purge, done, vp, 0, 0, 0, 0);
842 CACHE_WLOCK();
843 while (!LIST_EMPTY(&vp->v_cache_src))
844 cache_zap(LIST_FIRST(&vp->v_cache_src));
845 while (!TAILQ_EMPTY(&vp->v_cache_dst))
846 cache_zap(TAILQ_FIRST(&vp->v_cache_dst));
847 if (vp->v_cache_dd != NULL) {
848 KASSERT(vp->v_cache_dd->nc_flag & NCF_ISDOTDOT,
849 ("lost dotdot link"));
850 cache_zap(vp->v_cache_dd);
851 }
852 KASSERT(vp->v_cache_dd == NULL, ("incomplete purge"));
853 CACHE_WUNLOCK();
854}
855
856/*
857 * Invalidate all negative entries for a particular directory vnode.
858 */
859void
860cache_purge_negative(vp)
861 struct vnode *vp;
862{
863 struct namecache *cp, *ncp;
864
865 CTR1(KTR_VFS, "cache_purge_negative(%p)", vp);
866 SDT_PROBE(vfs, namecache, purge_negative, done, vp, 0, 0, 0, 0);
867 CACHE_WLOCK();
868 LIST_FOREACH_SAFE(cp, &vp->v_cache_src, nc_src, ncp) {
869 if (cp->nc_vp == NULL)
870 cache_zap(cp);
871 }
872 CACHE_WUNLOCK();
873}
874
875/*
876 * Flush all entries referencing a particular filesystem.
877 */
878void
879cache_purgevfs(mp)
880 struct mount *mp;
881{
882 struct nchashhead *ncpp;
883 struct namecache *ncp, *nnp;
884
885 /* Scan hash tables for applicable entries */
886 SDT_PROBE(vfs, namecache, purgevfs, done, mp, 0, 0, 0, 0);
887 CACHE_WLOCK();
888 for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) {
889 LIST_FOREACH_SAFE(ncp, ncpp, nc_hash, nnp) {
890 if (ncp->nc_dvp->v_mount == mp)
891 cache_zap(ncp);
892 }
893 }
894 CACHE_WUNLOCK();
895}
896
897/*
898 * Perform canonical checks and cache lookup and pass on to filesystem
899 * through the vop_cachedlookup only if needed.
900 */
901
902int
903vfs_cache_lookup(ap)
904 struct vop_lookup_args /* {
905 struct vnode *a_dvp;
906 struct vnode **a_vpp;
907 struct componentname *a_cnp;
908 } */ *ap;
909{
910 struct vnode *dvp;
911 int error;
912 struct vnode **vpp = ap->a_vpp;
913 struct componentname *cnp = ap->a_cnp;
914 struct ucred *cred = cnp->cn_cred;
915 int flags = cnp->cn_flags;
916 struct thread *td = cnp->cn_thread;
917
918 *vpp = NULL;
919 dvp = ap->a_dvp;
920
921 if (dvp->v_type != VDIR)
922 return (ENOTDIR);
923
924 if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
925 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
926 return (EROFS);
927
928 error = VOP_ACCESS(dvp, VEXEC, cred, td);
929 if (error)
930 return (error);
931
932 error = cache_lookup(dvp, vpp, cnp);
933 if (error == 0)
934 return (VOP_CACHEDLOOKUP(dvp, vpp, cnp));
935 if (error == -1)
936 return (0);
937 return (error);
938}
939
940
941#ifndef _SYS_SYSPROTO_H_
942struct __getcwd_args {
943 u_char *buf;
944 u_int buflen;
945};
946#endif
947
948/*
949 * XXX All of these sysctls would probably be more productive dead.
950 */
951static int disablecwd;
952SYSCTL_INT(_debug, OID_AUTO, disablecwd, CTLFLAG_RW, &disablecwd, 0,
953 "Disable the getcwd syscall");
954
955/* Implementation of the getcwd syscall. */
956int
957sys___getcwd(td, uap)
958 struct thread *td;
959 struct __getcwd_args *uap;
960{
961
962 return (kern___getcwd(td, uap->buf, UIO_USERSPACE, uap->buflen));
963}
964
965int
966kern___getcwd(struct thread *td, u_char *buf, enum uio_seg bufseg, u_int buflen)
967{
968 char *bp, *tmpbuf;
969 struct filedesc *fdp;
970 struct vnode *cdir, *rdir;
971 int error, vfslocked;
972
973 if (disablecwd)
974 return (ENODEV);
975 if (buflen < 2)
976 return (EINVAL);
977 if (buflen > MAXPATHLEN)
978 buflen = MAXPATHLEN;
979
980 tmpbuf = malloc(buflen, M_TEMP, M_WAITOK);
981 fdp = td->td_proc->p_fd;
982 FILEDESC_SLOCK(fdp);
983 cdir = fdp->fd_cdir;
984 VREF(cdir);
985 rdir = fdp->fd_rdir;
986 VREF(rdir);
987 FILEDESC_SUNLOCK(fdp);
988 error = vn_fullpath1(td, cdir, rdir, tmpbuf, &bp, buflen);
989 vfslocked = VFS_LOCK_GIANT(rdir->v_mount);
990 vrele(rdir);
991 VFS_UNLOCK_GIANT(vfslocked);
992 vfslocked = VFS_LOCK_GIANT(cdir->v_mount);
993 vrele(cdir);
994 VFS_UNLOCK_GIANT(vfslocked);
995
996 if (!error) {
997 if (bufseg == UIO_SYSSPACE)
998 bcopy(bp, buf, strlen(bp) + 1);
999 else
1000 error = copyout(bp, buf, strlen(bp) + 1);
1001#ifdef KTRACE
1002 if (KTRPOINT(curthread, KTR_NAMEI))
1003 ktrnamei(bp);
1004#endif
1005 }
1006 free(tmpbuf, M_TEMP);
1007 return (error);
1008}
1009
1010/*
1011 * Thus begins the fullpath magic.
1012 */
1013
1014#undef STATNODE
1015#define STATNODE(name, descr) \
1016 static u_int name; \
1017 SYSCTL_UINT(_vfs_cache, OID_AUTO, name, CTLFLAG_RD, &name, 0, descr)
1018
1019static int disablefullpath;
1020SYSCTL_INT(_debug, OID_AUTO, disablefullpath, CTLFLAG_RW, &disablefullpath, 0,
1021 "Disable the vn_fullpath function");
1022
1023/* These count for kern___getcwd(), too. */
1024STATNODE(numfullpathcalls, "Number of fullpath search calls");
1025STATNODE(numfullpathfail1, "Number of fullpath search errors (ENOTDIR)");
1026STATNODE(numfullpathfail2,
1027 "Number of fullpath search errors (VOP_VPTOCNP failures)");
1028STATNODE(numfullpathfail4, "Number of fullpath search errors (ENOMEM)");
1029STATNODE(numfullpathfound, "Number of successful fullpath calls");
1030
1031/*
1032 * Retrieve the full filesystem path that correspond to a vnode from the name
1033 * cache (if available)
1034 */
1035int
1036vn_fullpath(struct thread *td, struct vnode *vn, char **retbuf, char **freebuf)
1037{
1038 char *buf;
1039 struct filedesc *fdp;
1040 struct vnode *rdir;
1041 int error, vfslocked;
1042
1043 if (disablefullpath)
1044 return (ENODEV);
1045 if (vn == NULL)
1046 return (EINVAL);
1047
1048 buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
1049 fdp = td->td_proc->p_fd;
1050 FILEDESC_SLOCK(fdp);
1051 rdir = fdp->fd_rdir;
1052 VREF(rdir);
1053 FILEDESC_SUNLOCK(fdp);
1054 error = vn_fullpath1(td, vn, rdir, buf, retbuf, MAXPATHLEN);
1055 vfslocked = VFS_LOCK_GIANT(rdir->v_mount);
1056 vrele(rdir);
1057 VFS_UNLOCK_GIANT(vfslocked);
1058
1059 if (!error)
1060 *freebuf = buf;
1061 else
1062 free(buf, M_TEMP);
1063 return (error);
1064}
1065
1066/*
1067 * This function is similar to vn_fullpath, but it attempts to lookup the
1068 * pathname relative to the global root mount point. This is required for the
1069 * auditing sub-system, as audited pathnames must be absolute, relative to the
1070 * global root mount point.
1071 */
1072int
1073vn_fullpath_global(struct thread *td, struct vnode *vn,
1074 char **retbuf, char **freebuf)
1075{
1076 char *buf;
1077 int error;
1078
1079 if (disablefullpath)
1080 return (ENODEV);
1081 if (vn == NULL)
1082 return (EINVAL);
1083 buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
1084 error = vn_fullpath1(td, vn, rootvnode, buf, retbuf, MAXPATHLEN);
1085 if (!error)
1086 *freebuf = buf;
1087 else
1088 free(buf, M_TEMP);
1089 return (error);
1090}
1091
1092int
1093vn_vptocnp(struct vnode **vp, struct ucred *cred, char *buf, u_int *buflen)
1094{
1095 int error;
1096
1097 CACHE_RLOCK();
1098 error = vn_vptocnp_locked(vp, cred, buf, buflen);
1099 if (error == 0)
1100 CACHE_RUNLOCK();
1101 return (error);
1102}
1103
1104static int
1105vn_vptocnp_locked(struct vnode **vp, struct ucred *cred, char *buf,
1106 u_int *buflen)
1107{
1108 struct vnode *dvp;
1109 struct namecache *ncp;
1110 int error, vfslocked;
1111
1112 TAILQ_FOREACH(ncp, &((*vp)->v_cache_dst), nc_dst) {
1113 if ((ncp->nc_flag & NCF_ISDOTDOT) == 0)
1114 break;
1115 }
1116 if (ncp != NULL) {
1117 if (*buflen < ncp->nc_nlen) {
1118 CACHE_RUNLOCK();
1119 vfslocked = VFS_LOCK_GIANT((*vp)->v_mount);
1120 vrele(*vp);
1121 VFS_UNLOCK_GIANT(vfslocked);
1122 numfullpathfail4++;
1123 error = ENOMEM;
1124 SDT_PROBE(vfs, namecache, fullpath, return, error,
1125 vp, NULL, 0, 0);
1126 return (error);
1127 }
1128 *buflen -= ncp->nc_nlen;
1129 memcpy(buf + *buflen, ncp->nc_name, ncp->nc_nlen);
1130 SDT_PROBE(vfs, namecache, fullpath, hit, ncp->nc_dvp,
1131 ncp->nc_name, vp, 0, 0);
1132 dvp = *vp;
1133 *vp = ncp->nc_dvp;
1134 vref(*vp);
1135 CACHE_RUNLOCK();
1136 vfslocked = VFS_LOCK_GIANT(dvp->v_mount);
1137 vrele(dvp);
1138 VFS_UNLOCK_GIANT(vfslocked);
1139 CACHE_RLOCK();
1140 return (0);
1141 }
1142 SDT_PROBE(vfs, namecache, fullpath, miss, vp, 0, 0, 0, 0);
1143
1144 CACHE_RUNLOCK();
1145 vfslocked = VFS_LOCK_GIANT((*vp)->v_mount);
1146 vn_lock(*vp, LK_SHARED | LK_RETRY);
1147 error = VOP_VPTOCNP(*vp, &dvp, cred, buf, buflen);
1148 vput(*vp);
1149 VFS_UNLOCK_GIANT(vfslocked);
1150 if (error) {
1151 numfullpathfail2++;
1152 SDT_PROBE(vfs, namecache, fullpath, return, error, vp,
1153 NULL, 0, 0);
1154 return (error);
1155 }
1156
1157 *vp = dvp;
1158 CACHE_RLOCK();
1159 if (dvp->v_iflag & VI_DOOMED) {
1160 /* forced unmount */
1161 CACHE_RUNLOCK();
1162 vfslocked = VFS_LOCK_GIANT(dvp->v_mount);
1163 vrele(dvp);
1164 VFS_UNLOCK_GIANT(vfslocked);
1165 error = ENOENT;
1166 SDT_PROBE(vfs, namecache, fullpath, return, error, vp,
1167 NULL, 0, 0);
1168 return (error);
1169 }
1170 /*
1171 * *vp has its use count incremented still.
1172 */
1173
1174 return (0);
1175}
1176
1177/*
1178 * The magic behind kern___getcwd() and vn_fullpath().
1179 */
1180static int
1181vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir,
1182 char *buf, char **retbuf, u_int buflen)
1183{
1184 int error, slash_prefixed, vfslocked;
1185#ifdef KDTRACE_HOOKS
1186 struct vnode *startvp = vp;
1187#endif
1188 struct vnode *vp1;
1189
1190 buflen--;
1191 buf[buflen] = '\0';
1192 error = 0;
1193 slash_prefixed = 0;
1194
1195 SDT_PROBE(vfs, namecache, fullpath, entry, vp, 0, 0, 0, 0);
1196 numfullpathcalls++;
1197 vref(vp);
1198 CACHE_RLOCK();
1199 if (vp->v_type != VDIR) {
1200 error = vn_vptocnp_locked(&vp, td->td_ucred, buf, &buflen);
1201 if (error)
1202 return (error);
1203 if (buflen == 0) {
1204 CACHE_RUNLOCK();
1205 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1206 vrele(vp);
1207 VFS_UNLOCK_GIANT(vfslocked);
1208 return (ENOMEM);
1209 }
1210 buf[--buflen] = '/';
1211 slash_prefixed = 1;
1212 }
1213 while (vp != rdir && vp != rootvnode) {
1214 if (vp->v_vflag & VV_ROOT) {
1215 if (vp->v_iflag & VI_DOOMED) { /* forced unmount */
1216 CACHE_RUNLOCK();
1217 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1218 vrele(vp);
1219 VFS_UNLOCK_GIANT(vfslocked);
1220 error = ENOENT;
1221 SDT_PROBE(vfs, namecache, fullpath, return,
1222 error, vp, NULL, 0, 0);
1223 break;
1224 }
1225 vp1 = vp->v_mount->mnt_vnodecovered;
1226 vref(vp1);
1227 CACHE_RUNLOCK();
1228 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1229 vrele(vp);
1230 VFS_UNLOCK_GIANT(vfslocked);
1231 vp = vp1;
1232 CACHE_RLOCK();
1233 continue;
1234 }
1235 if (vp->v_type != VDIR) {
1236 CACHE_RUNLOCK();
1237 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1238 vrele(vp);
1239 VFS_UNLOCK_GIANT(vfslocked);
1240 numfullpathfail1++;
1241 error = ENOTDIR;
1242 SDT_PROBE(vfs, namecache, fullpath, return,
1243 error, vp, NULL, 0, 0);
1244 break;
1245 }
1246 error = vn_vptocnp_locked(&vp, td->td_ucred, buf, &buflen);
1247 if (error)
1248 break;
1249 if (buflen == 0) {
1250 CACHE_RUNLOCK();
1251 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1252 vrele(vp);
1253 VFS_UNLOCK_GIANT(vfslocked);
1254 error = ENOMEM;
1255 SDT_PROBE(vfs, namecache, fullpath, return, error,
1256 startvp, NULL, 0, 0);
1257 break;
1258 }
1259 buf[--buflen] = '/';
1260 slash_prefixed = 1;
1261 }
1262 if (error)
1263 return (error);
1264 if (!slash_prefixed) {
1265 if (buflen == 0) {
1266 CACHE_RUNLOCK();
1267 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1268 vrele(vp);
1269 VFS_UNLOCK_GIANT(vfslocked);
1270 numfullpathfail4++;
1271 SDT_PROBE(vfs, namecache, fullpath, return, ENOMEM,
1272 startvp, NULL, 0, 0);
1273 return (ENOMEM);
1274 }
1275 buf[--buflen] = '/';
1276 }
1277 numfullpathfound++;
1278 CACHE_RUNLOCK();
1279 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1280 vrele(vp);
1281 VFS_UNLOCK_GIANT(vfslocked);
1282
1283 SDT_PROBE(vfs, namecache, fullpath, return, 0, startvp, buf + buflen,
1284 0, 0);
1285 *retbuf = buf + buflen;
1286 return (0);
1287}
1288
1289int
1290vn_commname(struct vnode *vp, char *buf, u_int buflen)
1291{
1292 struct namecache *ncp;
1293 int l;
1294
1295 CACHE_RLOCK();
1296 TAILQ_FOREACH(ncp, &vp->v_cache_dst, nc_dst)
1297 if ((ncp->nc_flag & NCF_ISDOTDOT) == 0)
1298 break;
1299 if (ncp == NULL) {
1300 CACHE_RUNLOCK();
1301 return (ENOENT);
1302 }
1303 l = min(ncp->nc_nlen, buflen - 1);
1304 memcpy(buf, ncp->nc_name, l);
1305 CACHE_RUNLOCK();
1306 buf[l] = '\0';
1307 return (0);
1308}
1309
1310/* ABI compat shims for old kernel modules. */
1311#undef cache_enter
1312#undef cache_lookup
1313
1314void cache_enter(struct vnode *dvp, struct vnode *vp,
1315 struct componentname *cnp);
1316int cache_lookup(struct vnode *dvp, struct vnode **vpp,
1317 struct componentname *cnp);
1318
1319void
1320cache_enter(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
1321{
1322
1323 cache_enter_time(dvp, vp, cnp, NULL);
1324}
1325
1326int
1327cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp)
1328{
1329
1330 return (cache_lookup_times(dvp, vpp, cnp, NULL, NULL));
1331}
1332
1283/*
1284 * This function updates path string to vnode's full global path
1285 * and checks the size of the new path string against the pathlen argument.
1286 *
1287 * Requires a locked, referenced vnode and GIANT lock held.
1288 * Vnode is re-locked on success or ENODEV, otherwise unlocked.
1289 *
1290 * If sysctl debug.disablefullpath is set, ENODEV is returned,
1291 * vnode is left locked and path remain untouched.
1292 *
1293 * If vp is a directory, the call to vn_fullpath_global() always succeeds
1294 * because it falls back to the ".." lookup if the namecache lookup fails.
1295 */
1296int
1297vn_path_to_global_path(struct thread *td, struct vnode *vp, char *path,
1298 u_int pathlen)
1299{
1300 struct nameidata nd;
1301 struct vnode *vp1;
1302 char *rpath, *fbuf;
1303 int error, vfslocked;
1304
1305 VFS_ASSERT_GIANT(vp->v_mount);
1306 ASSERT_VOP_ELOCKED(vp, __func__);
1307
1308 /* Return ENODEV if sysctl debug.disablefullpath==1 */
1309 if (disablefullpath)
1310 return (ENODEV);
1311
1312 /* Construct global filesystem path from vp. */
1313 VOP_UNLOCK(vp, 0);
1314 error = vn_fullpath_global(td, vp, &rpath, &fbuf);
1315
1316 if (error != 0) {
1317 vrele(vp);
1318 return (error);
1319 }
1320
1321 if (strlen(rpath) >= pathlen) {
1322 vrele(vp);
1323 error = ENAMETOOLONG;
1324 goto out;
1325 }
1326
1327 /*
1328 * Re-lookup the vnode by path to detect a possible rename.
1329 * As a side effect, the vnode is relocked.
1330 * If vnode was renamed, return ENOENT.
1331 */
1332 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
1333 UIO_SYSSPACE, path, td);
1334 error = namei(&nd);
1335 if (error != 0) {
1336 vrele(vp);
1337 goto out;
1338 }
1339 vfslocked = NDHASGIANT(&nd);
1340 NDFREE(&nd, NDF_ONLY_PNBUF);
1341 vp1 = nd.ni_vp;
1342 vrele(vp);
1343 if (vp1 == vp)
1344 strcpy(path, rpath);
1345 else {
1346 vput(vp1);
1347 error = ENOENT;
1348 }
1349 VFS_UNLOCK_GIANT(vfslocked);
1350
1351out:
1352 free(fbuf, M_TEMP);
1353 return (error);
1354}
1333/*
1334 * This function updates path string to vnode's full global path
1335 * and checks the size of the new path string against the pathlen argument.
1336 *
1337 * Requires a locked, referenced vnode and GIANT lock held.
1338 * Vnode is re-locked on success or ENODEV, otherwise unlocked.
1339 *
1340 * If sysctl debug.disablefullpath is set, ENODEV is returned,
1341 * vnode is left locked and path remain untouched.
1342 *
1343 * If vp is a directory, the call to vn_fullpath_global() always succeeds
1344 * because it falls back to the ".." lookup if the namecache lookup fails.
1345 */
1346int
1347vn_path_to_global_path(struct thread *td, struct vnode *vp, char *path,
1348 u_int pathlen)
1349{
1350 struct nameidata nd;
1351 struct vnode *vp1;
1352 char *rpath, *fbuf;
1353 int error, vfslocked;
1354
1355 VFS_ASSERT_GIANT(vp->v_mount);
1356 ASSERT_VOP_ELOCKED(vp, __func__);
1357
1358 /* Return ENODEV if sysctl debug.disablefullpath==1 */
1359 if (disablefullpath)
1360 return (ENODEV);
1361
1362 /* Construct global filesystem path from vp. */
1363 VOP_UNLOCK(vp, 0);
1364 error = vn_fullpath_global(td, vp, &rpath, &fbuf);
1365
1366 if (error != 0) {
1367 vrele(vp);
1368 return (error);
1369 }
1370
1371 if (strlen(rpath) >= pathlen) {
1372 vrele(vp);
1373 error = ENAMETOOLONG;
1374 goto out;
1375 }
1376
1377 /*
1378 * Re-lookup the vnode by path to detect a possible rename.
1379 * As a side effect, the vnode is relocked.
1380 * If vnode was renamed, return ENOENT.
1381 */
1382 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
1383 UIO_SYSSPACE, path, td);
1384 error = namei(&nd);
1385 if (error != 0) {
1386 vrele(vp);
1387 goto out;
1388 }
1389 vfslocked = NDHASGIANT(&nd);
1390 NDFREE(&nd, NDF_ONLY_PNBUF);
1391 vp1 = nd.ni_vp;
1392 vrele(vp);
1393 if (vp1 == vp)
1394 strcpy(path, rpath);
1395 else {
1396 vput(vp1);
1397 error = ENOENT;
1398 }
1399 VFS_UNLOCK_GIANT(vfslocked);
1400
1401out:
1402 free(fbuf, M_TEMP);
1403 return (error);
1404}