pseudofs_vncache.c revision 168764
175295Sdes/*- 275295Sdes * Copyright (c) 2001 Dag-Erling Co�dan Sm�rgrav 375295Sdes * All rights reserved. 475295Sdes * 575295Sdes * Redistribution and use in source and binary forms, with or without 675295Sdes * modification, are permitted provided that the following conditions 775295Sdes * are met: 875295Sdes * 1. Redistributions of source code must retain the above copyright 975295Sdes * notice, this list of conditions and the following disclaimer 1075295Sdes * in this position and unchanged. 1175295Sdes * 2. Redistributions in binary form must reproduce the above copyright 1275295Sdes * notice, this list of conditions and the following disclaimer in the 1375295Sdes * documentation and/or other materials provided with the distribution. 1475295Sdes * 3. The name of the author may not be used to endorse or promote products 1575295Sdes * derived from this software without specific prior written permission. 1675295Sdes * 1775295Sdes * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 1875295Sdes * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 1975295Sdes * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 2075295Sdes * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 2175295Sdes * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 2275295Sdes * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 2375295Sdes * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 2475295Sdes * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 2575295Sdes * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 2675295Sdes * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 2775295Sdes */ 2875295Sdes 29143592Sdes#include <sys/cdefs.h> 30143592Sdes__FBSDID("$FreeBSD: head/sys/fs/pseudofs/pseudofs_vncache.c 168764 2007-04-15 17:10:01Z des $"); 31143592Sdes 32143592Sdes#include "opt_pseudofs.h" 33143592Sdes 3475295Sdes#include <sys/param.h> 3575295Sdes#include <sys/kernel.h> 3675295Sdes#include <sys/systm.h> 37112564Sjhb#include <sys/eventhandler.h> 3878073Sdes#include <sys/lock.h> 3975295Sdes#include <sys/malloc.h> 4077965Sdes#include <sys/mutex.h> 4184246Sdes#include <sys/proc.h> 4275295Sdes#include <sys/sysctl.h> 4375295Sdes#include <sys/vnode.h> 4475295Sdes 4575295Sdes#include <fs/pseudofs/pseudofs.h> 4675295Sdes#include <fs/pseudofs/pseudofs_internal.h> 4775295Sdes 4877998Sdesstatic MALLOC_DEFINE(M_PFSVNCACHE, "pfs_vncache", "pseudofs vnode cache"); 4975295Sdes 5075295Sdesstatic struct mtx pfs_vncache_mutex; 5189071Smsmithstatic struct pfs_vdata *pfs_vncache; 52112564Sjhbstatic eventhandler_tag pfs_exit_tag; 53112564Sjhbstatic void pfs_exit(void *arg, struct proc *p); 5475295Sdes 5575295SdesSYSCTL_NODE(_vfs_pfs, OID_AUTO, vncache, CTLFLAG_RW, 0, 5675295Sdes "pseudofs vnode cache"); 5775295Sdes 5884246Sdesstatic int pfs_vncache_entries; 5984246SdesSYSCTL_INT(_vfs_pfs_vncache, OID_AUTO, entries, CTLFLAG_RD, 6084246Sdes &pfs_vncache_entries, 0, 6184246Sdes "number of entries in the vnode cache"); 6284246Sdes 6384246Sdesstatic int pfs_vncache_maxentries; 6484246SdesSYSCTL_INT(_vfs_pfs_vncache, OID_AUTO, maxentries, CTLFLAG_RD, 6584246Sdes &pfs_vncache_maxentries, 0, 6684246Sdes "highest number of entries in the vnode cache"); 6784246Sdes 6875295Sdesstatic int pfs_vncache_hits; 6984246SdesSYSCTL_INT(_vfs_pfs_vncache, OID_AUTO, hits, CTLFLAG_RD, 7084246Sdes &pfs_vncache_hits, 0, 7175295Sdes "number of cache hits since initialization"); 7275295Sdes 7375295Sdesstatic int pfs_vncache_misses; 7484246SdesSYSCTL_INT(_vfs_pfs_vncache, OID_AUTO, misses, CTLFLAG_RD, 7584246Sdes &pfs_vncache_misses, 0, 7675295Sdes "number of cache misses since initialization"); 7775295Sdes 78138290Sphkextern struct vop_vector pfs_vnodeops; /* XXX -> .h file */ 7975295Sdes 8075295Sdes/* 8175295Sdes * Initialize vnode cache 8275295Sdes */ 8375295Sdesvoid 8475295Sdespfs_vncache_load(void) 8575295Sdes{ 86168720Sdes 87168720Sdes mtx_assert(&Giant, MA_OWNED); 88168720Sdes mtx_init(&pfs_vncache_mutex, "pfs_vncache", NULL, MTX_DEF); 89112564Sjhb pfs_exit_tag = EVENTHANDLER_REGISTER(process_exit, pfs_exit, NULL, 90112564Sjhb EVENTHANDLER_PRI_ANY); 9175295Sdes} 9275295Sdes 9375295Sdes/* 9475295Sdes * Tear down vnode cache 9575295Sdes */ 9675295Sdesvoid 9775295Sdespfs_vncache_unload(void) 9875295Sdes{ 99168720Sdes 100168720Sdes mtx_assert(&Giant, MA_OWNED); 101112564Sjhb EVENTHANDLER_DEREGISTER(process_exit, pfs_exit_tag); 102168720Sdes KASSERT(pfs_vncache_entries == 0, 103168720Sdes ("%d vncache entries remaining", pfs_vncache_entries)); 10475295Sdes mtx_destroy(&pfs_vncache_mutex); 10575295Sdes} 10675295Sdes 10775295Sdes/* 10875295Sdes * Allocate a vnode 10975295Sdes */ 11075295Sdesint 11177998Sdespfs_vncache_alloc(struct mount *mp, struct vnode **vpp, 11277998Sdes struct pfs_node *pn, pid_t pid) 11375295Sdes{ 11477998Sdes struct pfs_vdata *pvd; 115165737Sjhb struct vnode *vp; 11675295Sdes int error; 11788234Sdillon 11888234Sdillon /* 11997940Sdes * See if the vnode is in the cache. 12088234Sdillon * XXX linear search is not very efficient. 12188234Sdillon */ 122165737Sjhbretry: 12375295Sdes mtx_lock(&pfs_vncache_mutex); 12484246Sdes for (pvd = pfs_vncache; pvd; pvd = pvd->pvd_next) { 125109969Stjr if (pvd->pvd_pn == pn && pvd->pvd_pid == pid && 126109969Stjr pvd->pvd_vnode->v_mount == mp) { 127165737Sjhb vp = pvd->pvd_vnode; 128165737Sjhb VI_LOCK(vp); 129165737Sjhb mtx_unlock(&pfs_vncache_mutex); 130165737Sjhb if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, curthread) == 0) { 13175295Sdes ++pfs_vncache_hits; 132165737Sjhb *vpp = vp; 133168637Sdes /* 134168637Sdes * Some callers cache_enter(vp) later, so 135168637Sdes * we have to make sure it's not in the 136168637Sdes * VFS cache so it doesn't get entered 137168637Sdes * twice. A better solution would be to 138168637Sdes * make pfs_vncache_alloc() responsible 139168637Sdes * for entering the vnode in the VFS 140168637Sdes * cache. 141168637Sdes */ 142168637Sdes cache_purge(vp); 14375295Sdes return (0); 14475295Sdes } 145165737Sjhb goto retry; 14677998Sdes } 14777998Sdes } 14877998Sdes mtx_unlock(&pfs_vncache_mutex); 14975295Sdes ++pfs_vncache_misses; 15075295Sdes 15175295Sdes /* nope, get a new one */ 152111119Simp MALLOC(pvd, struct pfs_vdata *, sizeof *pvd, M_PFSVNCACHE, M_WAITOK); 15384246Sdes if (++pfs_vncache_entries > pfs_vncache_maxentries) 15484246Sdes pfs_vncache_maxentries = pfs_vncache_entries; 155138290Sphk error = getnewvnode("pseudofs", mp, &pfs_vnodeops, vpp); 156105165Sphk if (error) { 157105165Sphk FREE(pvd, M_PFSVNCACHE); 15875295Sdes return (error); 159105165Sphk } 16077998Sdes pvd->pvd_pn = pn; 16177998Sdes pvd->pvd_pid = pid; 16277998Sdes (*vpp)->v_data = pvd; 16375295Sdes switch (pn->pn_type) { 16475295Sdes case pfstype_root: 165101308Sjeff (*vpp)->v_vflag = VV_ROOT; 16675295Sdes#if 0 16775295Sdes printf("root vnode allocated\n"); 16875295Sdes#endif 16984246Sdes /* fall through */ 17075295Sdes case pfstype_dir: 17175295Sdes case pfstype_this: 17275295Sdes case pfstype_parent: 17377998Sdes case pfstype_procdir: 17475295Sdes (*vpp)->v_type = VDIR; 17575295Sdes break; 17675295Sdes case pfstype_file: 17775295Sdes (*vpp)->v_type = VREG; 17875295Sdes break; 17975295Sdes case pfstype_symlink: 18075295Sdes (*vpp)->v_type = VLNK; 18175295Sdes break; 18277998Sdes case pfstype_none: 18377998Sdes KASSERT(0, ("pfs_vncache_alloc called for null node\n")); 18475295Sdes default: 18575295Sdes panic("%s has unexpected type: %d", pn->pn_name, pn->pn_type); 18675295Sdes } 187103314Snjl /* 188103314Snjl * Propagate flag through to vnode so users know it can change 189103314Snjl * if the process changes (i.e. execve) 190103314Snjl */ 191103314Snjl if ((pn->pn_flags & PFS_PROCDEP) != 0) 192103314Snjl (*vpp)->v_vflag |= VV_PROCDEP; 19384246Sdes pvd->pvd_vnode = *vpp; 194167497Stegge (*vpp)->v_vnlock->lk_flags |= LK_CANRECURSE; 195167497Stegge vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, curthread); 196167497Stegge error = insmntque(*vpp, mp); 197167497Stegge if (error != 0) { 198167497Stegge FREE(pvd, M_PFSVNCACHE); 199167497Stegge *vpp = NULLVP; 200167497Stegge return (error); 201167497Stegge } 20277998Sdes mtx_lock(&pfs_vncache_mutex); 20384246Sdes pvd->pvd_prev = NULL; 20484246Sdes pvd->pvd_next = pfs_vncache; 20584246Sdes if (pvd->pvd_next) 20684246Sdes pvd->pvd_next->pvd_prev = pvd; 20784246Sdes pfs_vncache = pvd; 20875295Sdes mtx_unlock(&pfs_vncache_mutex); 20975295Sdes return (0); 21075295Sdes} 21175295Sdes 21275295Sdes/* 21375295Sdes * Free a vnode 21475295Sdes */ 21575295Sdesint 21675295Sdespfs_vncache_free(struct vnode *vp) 21775295Sdes{ 21877998Sdes struct pfs_vdata *pvd; 21988234Sdillon 22075295Sdes mtx_lock(&pfs_vncache_mutex); 22184246Sdes pvd = (struct pfs_vdata *)vp->v_data; 22284246Sdes KASSERT(pvd != NULL, ("pfs_vncache_free(): no vnode data\n")); 22384246Sdes if (pvd->pvd_next) 22484246Sdes pvd->pvd_next->pvd_prev = pvd->pvd_prev; 22584246Sdes if (pvd->pvd_prev) 22684246Sdes pvd->pvd_prev->pvd_next = pvd->pvd_next; 22777998Sdes else 22884246Sdes pfs_vncache = pvd->pvd_next; 22977998Sdes mtx_unlock(&pfs_vncache_mutex); 23084246Sdes 23184246Sdes --pfs_vncache_entries; 23277998Sdes FREE(pvd, M_PFSVNCACHE); 23375295Sdes vp->v_data = NULL; 23475295Sdes return (0); 23575295Sdes} 23684246Sdes 23784246Sdes/* 238168764Sdes * Purge the cache of dead entries 239139896Srwatson * 240168637Sdes * This is extremely inefficient due to the fact that vgone() not only 241168637Sdes * indirectly modifies the vnode cache, but may also sleep. We can 242168637Sdes * neither hold pfs_vncache_mutex across a vgone() call, nor make any 243168637Sdes * assumptions about the state of the cache after vgone() returns. In 244168637Sdes * consequence, we must start over after every vgone() call, and keep 245168637Sdes * trying until we manage to traverse the entire cache. 246168637Sdes * 247168637Sdes * The only way to improve this situation is to change the data structure 248168637Sdes * used to implement the cache. 24984246Sdes */ 250168637Sdesvoid 251168637Sdespfs_purge(struct pfs_node *pn) 25284246Sdes{ 253133776Sdes struct pfs_vdata *pvd; 254133776Sdes struct vnode *vnp; 25584246Sdes 256133776Sdes mtx_lock(&pfs_vncache_mutex); 257133776Sdes pvd = pfs_vncache; 258133776Sdes while (pvd != NULL) { 259168637Sdes if (pvd->pvd_dead || (pn != NULL && pvd->pvd_pn == pn)) { 260133776Sdes vnp = pvd->pvd_vnode; 261147809Sjeff vhold(vnp); 262133776Sdes mtx_unlock(&pfs_vncache_mutex); 263143513Sjeff VOP_LOCK(vnp, LK_EXCLUSIVE, curthread); 264133776Sdes vgone(vnp); 265143513Sjeff VOP_UNLOCK(vnp, 0, curthread); 266147809Sjeff vdrop(vnp); 267133776Sdes mtx_lock(&pfs_vncache_mutex); 268133776Sdes pvd = pfs_vncache; 269133776Sdes } else { 270133776Sdes pvd = pvd->pvd_next; 27184246Sdes } 27284246Sdes } 27384246Sdes mtx_unlock(&pfs_vncache_mutex); 274168637Sdes} 275168637Sdes 276168637Sdes/* 277168637Sdes * Free all vnodes associated with a defunct process 278168637Sdes * 279168637Sdes * XXXRW: It is unfortunate that pfs_exit() always acquires and releases two 280168637Sdes * mutexes (one of which is Giant) for every process exit, even if procfs 281168637Sdes * isn't mounted. 282168637Sdes */ 283168637Sdesstatic void 284168637Sdespfs_exit(void *arg, struct proc *p) 285168637Sdes{ 286168637Sdes struct pfs_vdata *pvd; 287168637Sdes int dead; 288168637Sdes 289168637Sdes if (pfs_vncache == NULL) 290168637Sdes return; 291168637Sdes mtx_lock(&Giant); 292168637Sdes mtx_lock(&pfs_vncache_mutex); 293168637Sdes for (pvd = pfs_vncache, dead = 0; pvd != NULL; pvd = pvd->pvd_next) 294168637Sdes if (pvd->pvd_pid == p->p_pid) 295168637Sdes dead = pvd->pvd_dead = 1; 296168637Sdes mtx_unlock(&pfs_vncache_mutex); 297168637Sdes if (dead) 298168637Sdes pfs_purge(NULL); 299126975Sgreen mtx_unlock(&Giant); 30084246Sdes} 301