vfs_cache.c revision 29094
168651Skris/* 268651Skris * Copyright (c) 1989, 1993, 1995 368651Skris * The Regents of the University of California. All rights reserved. 468651Skris * 5290207Sjkim * This code is derived from software contributed to Berkeley by 668651Skris * Poul-Henning Kamp of the FreeBSD Project. 768651Skris * 868651Skris * Redistribution and use in source and binary forms, with or without 968651Skris * modification, are permitted provided that the following conditions 1068651Skris * are met: 1168651Skris * 1. Redistributions of source code must retain the above copyright 1268651Skris * notice, this list of conditions and the following disclaimer. 1368651Skris * 2. Redistributions in binary form must reproduce the above copyright 1468651Skris * notice, this list of conditions and the following disclaimer in the 1568651Skris * documentation and/or other materials provided with the distribution. 16109998Smarkm * 3. All advertising materials mentioning features or use of this software 1768651Skris * must display the following acknowledgement: 18109998Smarkm * This product includes software developed by the University of 19109998Smarkm * California, Berkeley and its contributors. 20109998Smarkm * 4. Neither the name of the University nor the names of its contributors 21109998Smarkm * may be used to endorse or promote products derived from this software 22109998Smarkm * without specific prior written permission. 23109998Smarkm * 24295009Sjkim * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25295009Sjkim * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2668651Skris * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27109998Smarkm * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 2868651Skris * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29109998Smarkm * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30109998Smarkm * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 3168651Skris * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 3268651Skris * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 3368651Skris * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 3468651Skris * SUCH DAMAGE. 3568651Skris * 36109998Smarkm * @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95 3768651Skris * $Id: vfs_cache.c,v 1.30 1997/09/03 09:20:17 phk Exp $ 3868651Skris */ 3968651Skris 4068651Skris#include <sys/param.h> 4168651Skris#include <sys/systm.h> 4268651Skris#include <sys/kernel.h> 4368651Skris#include <sys/sysctl.h> 4468651Skris#include <sys/mount.h> 4568651Skris#include <sys/vnode.h> 4668651Skris#include <sys/namei.h> 4768651Skris#include <sys/malloc.h> 4868651Skris 4968651Skris 5068651Skris/* 5168651Skris * Name caching works as follows: 5268651Skris * 5368651Skris * Names found by directory scans are retained in a cache 5468651Skris * for future reference. It is managed LRU, so frequently 5568651Skris * used names will hang around. Cache is indexed by hash value 5668651Skris * obtained from (vp, name) where vp refers to the directory 5768651Skris * containing name. 5868651Skris * 59109998Smarkm * If it is a "negative" entry, (i.e. for a name that is known NOT to 6068651Skris * exist) the vnode pointer will be NULL. 6168651Skris * 6268651Skris * Upon reaching the last segment of a path, if the reference 6368651Skris * is for DELETE, or NOCACHE is set (rewrite), and the 6468651Skris * name is located in the cache, it will be dropped. 6568651Skris */ 6668651Skris 6768651Skris/* 6868651Skris * Structures associated with name cacheing. 6968651Skris */ 7068651Skris#define NCHHASH(dvp, cnp) \ 7168651Skris (&nchashtbl[((dvp)->v_id + (cnp)->cn_hash) & nchash]) 7268651Skrisstatic LIST_HEAD(nchashhead, namecache) *nchashtbl; /* Hash Table */ 7368651Skrisstatic TAILQ_HEAD(, namecache) ncneg; /* Hash Table */ 7468651Skrisstatic u_long nchash; /* size of hash table */ 7568651SkrisSYSCTL_INT(_debug, OID_AUTO, nchash, CTLFLAG_RD, &nchash, 0, ""); 7668651Skrisstatic u_long ncnegfactor = 16; /* ratio of negative entries */ 7768651SkrisSYSCTL_INT(_debug, OID_AUTO, ncnegfactor, CTLFLAG_RW, &ncnegfactor, 0, ""); 7868651Skrisstatic u_long numneg; /* number of cache entries allocated */ 7968651SkrisSYSCTL_INT(_debug, OID_AUTO, numneg, CTLFLAG_RD, &numneg, 0, ""); 8068651Skrisstatic u_long numcache; /* number of cache entries allocated */ 8168651SkrisSYSCTL_INT(_debug, OID_AUTO, numcache, CTLFLAG_RD, &numcache, 0, ""); 8268651Skrisstruct nchstats nchstats; /* cache effectiveness statistics */ 8368651Skris 8468651Skrisstatic int doingcache = 1; /* 1 => enable the cache */ 8568651SkrisSYSCTL_INT(_debug, OID_AUTO, vfscache, CTLFLAG_RW, &doingcache, 0, ""); 8668651SkrisSYSCTL_INT(_debug, OID_AUTO, vnsize, CTLFLAG_RD, 0, sizeof(struct vnode), ""); 8768651SkrisSYSCTL_INT(_debug, OID_AUTO, ncsize, CTLFLAG_RD, 0, sizeof(struct namecache), ""); 8868651Skris 8968651Skrisstatic void cache_zap __P((struct namecache *ncp)); 9068651Skris 9168651Skris/* 92109998Smarkm * Flags in namecache.nc_flag 93109998Smarkm */ 94109998Smarkm#define NCF_WHITE 1 9568651Skris/* 9668651Skris * Delete an entry from its hash list and move it to the front 9768651Skris * of the LRU list for immediate reuse. 9868651Skris */ 9968651Skrisstatic void 10068651Skriscache_zap(ncp) 10168651Skris struct namecache *ncp; 10268651Skris{ 10368651Skris LIST_REMOVE(ncp, nc_hash); 10468651Skris LIST_REMOVE(ncp, nc_src); 10568651Skris if (LIST_EMPTY(&ncp->nc_dvp->v_cache_src)) 10668651Skris vdrop(ncp->nc_dvp); 10768651Skris if (ncp->nc_vp) { 10868651Skris TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_dst); 10968651Skris } else { 11068651Skris TAILQ_REMOVE(&ncneg, ncp, nc_dst); 11168651Skris numneg--; 11268651Skris } 11368651Skris numcache--; 11468651Skris free(ncp, M_CACHE); 11568651Skris} 11668651Skris 11768651Skris/* 11868651Skris * Lookup an entry in the cache 11968651Skris * 12068651Skris * We don't do this if the segment name is long, simply so the cache 12168651Skris * can avoid holding long names (which would either waste space, or 12268651Skris * add greatly to the complexity). 12368651Skris * 12468651Skris * Lookup is called with dvp pointing to the directory to search, 12568651Skris * cnp pointing to the name of the entry being sought. If the lookup 12668651Skris * succeeds, the vnode is returned in *vpp, and a status of -1 is 12768651Skris * returned. If the lookup determines that the name does not exist 12868651Skris * (negative cacheing), a status of ENOENT is returned. If the lookup 12968651Skris * fails, a status of zero is returned. 13068651Skris */ 131109998Smarkm 132109998Smarkmint 133109998Smarkmcache_lookup(dvp, vpp, cnp) 134109998Smarkm struct vnode *dvp; 135109998Smarkm struct vnode **vpp; 13668651Skris struct componentname *cnp; 13768651Skris{ 13868651Skris register struct namecache *ncp, *nnp; 13968651Skris register struct nchashhead *ncpp; 14068651Skris 14168651Skris if (!doingcache) { 14268651Skris cnp->cn_flags &= ~MAKEENTRY; 14368651Skris return (0); 14468651Skris } 14568651Skris 14668651Skris if (cnp->cn_nameptr[0] == '.') { 14768651Skris if (cnp->cn_namelen == 1) { 14868651Skris *vpp = dvp; 14968651Skris return (-1); 15068651Skris } 15168651Skris if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') { 15268651Skris if (dvp->v_dd->v_id != dvp->v_ddid || 15368651Skris (cnp->cn_flags & MAKEENTRY) == 0) { 15468651Skris dvp->v_ddid = 0; 15568651Skris return (0); 15668651Skris } 15768651Skris *vpp = dvp->v_dd; 15868651Skris return (-1); 15968651Skris } 16068651Skris } 16168651Skris 16268651Skris LIST_FOREACH(ncp, (NCHHASH(dvp, cnp)), nc_hash) { 16368651Skris if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen && 16468651Skris !bcmp(ncp->nc_name, cnp->cn_nameptr, (u_int)ncp->nc_nlen)) 16568651Skris break; 16668651Skris } 16768651Skris 16868651Skris /* We failed to find an entry */ 16968651Skris if (ncp == 0) { 17068651Skris nchstats.ncs_miss++; 17168651Skris return (0); 17268651Skris } 17368651Skris 17468651Skris /* We don't want to have an entry, so dump it */ 17568651Skris if ((cnp->cn_flags & MAKEENTRY) == 0) { 17668651Skris nchstats.ncs_badhits++; 17768651Skris cache_zap(ncp); 17868651Skris return (0); 17968651Skris } 18068651Skris 18168651Skris /* We found a "positive" match, return the vnode */ 18268651Skris if (ncp->nc_vp) { 18368651Skris nchstats.ncs_goodhits++; 18468651Skris *vpp = ncp->nc_vp; 18568651Skris return (-1); 18668651Skris } 18768651Skris 18868651Skris /* We found a negative match, and want to create it, so purge */ 18968651Skris if (cnp->cn_nameiop == CREATE) { 19068651Skris nchstats.ncs_badhits++; 19168651Skris cache_zap(ncp); 19268651Skris return (0); 193 } 194 195 /* 196 * We found a "negative" match, ENOENT notifies client of this match. 197 * The nc_vpid field records whether this is a whiteout. 198 */ 199 TAILQ_REMOVE(&ncneg, ncp, nc_dst); 200 TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst); 201 nchstats.ncs_neghits++; 202 if (ncp->nc_flag & NCF_WHITE) 203 cnp->cn_flags |= ISWHITEOUT; 204 return (ENOENT); 205} 206 207/* 208 * Add an entry to the cache. 209 */ 210void 211cache_enter(dvp, vp, cnp) 212 struct vnode *dvp; 213 struct vnode *vp; 214 struct componentname *cnp; 215{ 216 register struct namecache *ncp; 217 register struct nchashhead *ncpp; 218 219 if (!doingcache) 220 return; 221 222 if (cnp->cn_nameptr[0] == '.') { 223 if (cnp->cn_namelen == 1) { 224 return; 225 } 226 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') { 227 if (vp) { 228 dvp->v_dd = vp; 229 dvp->v_ddid = vp->v_id; 230 } else { 231 dvp->v_dd = dvp; 232 dvp->v_ddid = 0; 233 } 234 return; 235 } 236 } 237 238 ncp = (struct namecache *) 239 malloc(sizeof *ncp + cnp->cn_namelen, M_CACHE, M_WAITOK); 240 bzero((char *)ncp, sizeof *ncp); 241 numcache++; 242 if (!vp) { 243 numneg++; 244 ncp->nc_flag = cnp->cn_flags & ISWHITEOUT ? NCF_WHITE : 0; 245 } else if (vp->v_type == VDIR) { 246 vp->v_dd = dvp; 247 vp->v_ddid = dvp->v_id; 248 } 249 250 /* 251 * Fill in cache info, if vp is NULL this is a "negative" cache entry. 252 * For negative entries, we have to record whether it is a whiteout. 253 * the whiteout flag is stored in the nc_vpid field which is 254 * otherwise unused. 255 */ 256 ncp->nc_vp = vp; 257 ncp->nc_dvp = dvp; 258 ncp->nc_nlen = cnp->cn_namelen; 259 bcopy(cnp->cn_nameptr, ncp->nc_name, (unsigned)ncp->nc_nlen); 260 ncpp = NCHHASH(dvp, cnp); 261 LIST_INSERT_HEAD(ncpp, ncp, nc_hash); 262 if (LIST_EMPTY(&dvp->v_cache_src)) 263 vhold(dvp); 264 LIST_INSERT_HEAD(&dvp->v_cache_src, ncp, nc_src); 265 if (vp) { 266 TAILQ_INSERT_HEAD(&vp->v_cache_dst, ncp, nc_dst); 267 } else { 268 TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst); 269 } 270 if (numneg*ncnegfactor > numcache) { 271 ncp = TAILQ_FIRST(&ncneg); 272 cache_zap(ncp); 273 } 274} 275 276/* 277 * Name cache initialization, from vfs_init() when we are booting 278 */ 279void 280nchinit() 281{ 282 283 TAILQ_INIT(&ncneg); 284 nchashtbl = hashinit(desiredvnodes*2, M_CACHE, &nchash); 285} 286 287/* 288 * Invalidate all entries to particular vnode. 289 * 290 * We actually just increment the v_id, that will do it. The stale entries 291 * will be purged by lookup as they get found. If the v_id wraps around, we 292 * need to ditch the entire cache, to avoid confusion. No valid vnode will 293 * ever have (v_id == 0). 294 */ 295void 296cache_purge(vp) 297 struct vnode *vp; 298{ 299 struct namecache *ncp; 300 struct nchashhead *ncpp; 301 static u_long nextid; 302 303 while (!LIST_EMPTY(&vp->v_cache_src)) 304 cache_zap(LIST_FIRST(&vp->v_cache_src)); 305 while (!TAILQ_EMPTY(&vp->v_cache_dst)) 306 cache_zap(TAILQ_FIRST(&vp->v_cache_dst)); 307 308 nextid++; 309 while (nextid == vp->v_id || !nextid) 310 continue; 311 vp->v_id = nextid; 312 vp->v_dd = vp; 313 vp->v_ddid = 0; 314} 315 316/* 317 * Flush all entries referencing a particular filesystem. 318 * 319 * Since we need to check it anyway, we will flush all the invalid 320 * entries at the same time. 321 */ 322void 323cache_purgevfs(mp) 324 struct mount *mp; 325{ 326 struct nchashhead *ncpp; 327 struct namecache *ncp, *nnp; 328 329 /* Scan hash tables for applicable entries */ 330 for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) { 331 for (ncp = LIST_FIRST(ncpp); ncp != 0; ncp = nnp) { 332 nnp = LIST_NEXT(ncp, nc_hash); 333 if (ncp->nc_dvp->v_mount == mp) { 334 cache_zap(ncp); 335 } 336 } 337 } 338} 339 340/* 341 * Perform canonical checks and cache lookup and pass on to filesystem 342 * through the vop_cachedlookup only if needed. 343 */ 344 345int 346vfs_cache_lookup(ap) 347 struct vop_lookup_args /* { 348 struct vnode *a_dvp; 349 struct vnode **a_vpp; 350 struct componentname *a_cnp; 351 } */ *ap; 352{ 353 struct vnode *vdp; 354 struct vnode *pdp; 355 int lockparent; 356 int error; 357 struct vnode **vpp = ap->a_vpp; 358 struct componentname *cnp = ap->a_cnp; 359 struct ucred *cred = cnp->cn_cred; 360 int flags = cnp->cn_flags; 361 struct proc *p = cnp->cn_proc; 362 u_long vpid; /* capability number of vnode */ 363 364 *vpp = NULL; 365 vdp = ap->a_dvp; 366 lockparent = flags & LOCKPARENT; 367 368 if (vdp->v_type != VDIR) 369 return (ENOTDIR); 370 371 if ((flags & ISLASTCN) && (vdp->v_mount->mnt_flag & MNT_RDONLY) && 372 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) 373 return (EROFS); 374 375 error = VOP_ACCESS(vdp, VEXEC, cred, cnp->cn_proc); 376 377 if (error) 378 return (error); 379 380 error = cache_lookup(vdp, vpp, cnp); 381 382 if (!error) 383 return (VCALL(vdp, VOFFSET(vop_cachedlookup), 384 (struct vop_cachedlookup_args *)ap)); 385 386 if (error == ENOENT) 387 return (error); 388 389 pdp = vdp; 390 vdp = *vpp; 391 vpid = vdp->v_id; 392 if (pdp == vdp) { /* lookup on "." */ 393 VREF(vdp); 394 error = 0; 395 } else if (flags & ISDOTDOT) { 396 VOP_UNLOCK(pdp, 0, p); 397 error = vget(vdp, LK_EXCLUSIVE, p); 398 if (!error && lockparent && (flags & ISLASTCN)) 399 error = vn_lock(pdp, LK_EXCLUSIVE, p); 400 } else { 401 error = vget(vdp, LK_EXCLUSIVE, p); 402 if (!lockparent || error || !(flags & ISLASTCN)) 403 VOP_UNLOCK(pdp, 0, p); 404 } 405 /* 406 * Check that the capability number did not change 407 * while we were waiting for the lock. 408 */ 409 if (!error) { 410 if (vpid == vdp->v_id) 411 return (0); 412 vput(vdp); 413 if (lockparent && pdp != vdp && (flags & ISLASTCN)) 414 VOP_UNLOCK(pdp, 0, p); 415 } 416 error = vn_lock(pdp, LK_EXCLUSIVE, p); 417 if (error) 418 return (error); 419 return (VCALL(vdp, VOFFSET(vop_cachedlookup), 420 (struct vop_cachedlookup_args *)ap)); 421} 422