pseudofs_vncache.c revision 167497
175295Sdes/*- 275295Sdes * Copyright (c) 2001 Dag-Erling Co�dan Sm�rgrav 375295Sdes * All rights reserved. 475295Sdes * 575295Sdes * Redistribution and use in source and binary forms, with or without 675295Sdes * modification, are permitted provided that the following conditions 775295Sdes * are met: 875295Sdes * 1. Redistributions of source code must retain the above copyright 975295Sdes * notice, this list of conditions and the following disclaimer 1075295Sdes * in this position and unchanged. 1175295Sdes * 2. Redistributions in binary form must reproduce the above copyright 1275295Sdes * notice, this list of conditions and the following disclaimer in the 1375295Sdes * documentation and/or other materials provided with the distribution. 1475295Sdes * 3. The name of the author may not be used to endorse or promote products 1575295Sdes * derived from this software without specific prior written permission. 1675295Sdes * 1775295Sdes * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 1875295Sdes * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 1975295Sdes * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 2075295Sdes * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 2175295Sdes * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 2275295Sdes * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 2375295Sdes * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 2475295Sdes * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 2575295Sdes * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 2675295Sdes * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 2775295Sdes */ 2875295Sdes 29143592Sdes#include <sys/cdefs.h> 30143592Sdes__FBSDID("$FreeBSD: head/sys/fs/pseudofs/pseudofs_vncache.c 167497 2007-03-13 01:50:27Z tegge $"); 31143592Sdes 32143592Sdes#include "opt_pseudofs.h" 33143592Sdes 3475295Sdes#include <sys/param.h> 3575295Sdes#include <sys/kernel.h> 3675295Sdes#include <sys/systm.h> 37112564Sjhb#include <sys/eventhandler.h> 3878073Sdes#include <sys/lock.h> 3975295Sdes#include <sys/malloc.h> 4077965Sdes#include <sys/mutex.h> 4184246Sdes#include <sys/proc.h> 4275295Sdes#include <sys/sysctl.h> 4375295Sdes#include <sys/vnode.h> 4475295Sdes 4575295Sdes#include <fs/pseudofs/pseudofs.h> 4675295Sdes#include <fs/pseudofs/pseudofs_internal.h> 4775295Sdes 4877998Sdesstatic MALLOC_DEFINE(M_PFSVNCACHE, "pfs_vncache", "pseudofs vnode cache"); 4975295Sdes 5075295Sdesstatic struct mtx pfs_vncache_mutex; 5189071Smsmithstatic struct pfs_vdata *pfs_vncache; 52112564Sjhbstatic eventhandler_tag pfs_exit_tag; 53112564Sjhbstatic void pfs_exit(void *arg, struct proc *p); 5475295Sdes 5575295SdesSYSCTL_NODE(_vfs_pfs, OID_AUTO, vncache, CTLFLAG_RW, 0, 5675295Sdes "pseudofs vnode cache"); 5775295Sdes 5884246Sdesstatic int pfs_vncache_entries; 5984246SdesSYSCTL_INT(_vfs_pfs_vncache, OID_AUTO, entries, CTLFLAG_RD, 6084246Sdes &pfs_vncache_entries, 0, 6184246Sdes "number of entries in the vnode cache"); 6284246Sdes 6384246Sdesstatic int pfs_vncache_maxentries; 6484246SdesSYSCTL_INT(_vfs_pfs_vncache, OID_AUTO, maxentries, CTLFLAG_RD, 6584246Sdes &pfs_vncache_maxentries, 0, 6684246Sdes "highest number of entries in the vnode cache"); 6784246Sdes 6875295Sdesstatic int pfs_vncache_hits; 6984246SdesSYSCTL_INT(_vfs_pfs_vncache, OID_AUTO, hits, CTLFLAG_RD, 7084246Sdes &pfs_vncache_hits, 0, 7175295Sdes "number of cache hits since initialization"); 7275295Sdes 7375295Sdesstatic int pfs_vncache_misses; 7484246SdesSYSCTL_INT(_vfs_pfs_vncache, OID_AUTO, misses, CTLFLAG_RD, 7584246Sdes &pfs_vncache_misses, 0, 7675295Sdes "number of cache misses since initialization"); 7775295Sdes 78138290Sphkextern struct vop_vector pfs_vnodeops; /* XXX -> .h file */ 7975295Sdes 8075295Sdes/* 8175295Sdes * Initialize vnode cache 8275295Sdes */ 8375295Sdesvoid 8475295Sdespfs_vncache_load(void) 8575295Sdes{ 86133776Sdes mtx_init(&pfs_vncache_mutex, "pseudofs_vncache", NULL, MTX_DEF); 87112564Sjhb pfs_exit_tag = EVENTHANDLER_REGISTER(process_exit, pfs_exit, NULL, 88112564Sjhb EVENTHANDLER_PRI_ANY); 8975295Sdes} 9075295Sdes 9175295Sdes/* 9275295Sdes * Tear down vnode cache 9375295Sdes */ 9475295Sdesvoid 9575295Sdespfs_vncache_unload(void) 9675295Sdes{ 97112564Sjhb EVENTHANDLER_DEREGISTER(process_exit, pfs_exit_tag); 9884386Sdes if (pfs_vncache_entries != 0) 9984386Sdes printf("pfs_vncache_unload(): %d entries remaining\n", 10084386Sdes pfs_vncache_entries); 10175295Sdes mtx_destroy(&pfs_vncache_mutex); 10275295Sdes} 10375295Sdes 10475295Sdes/* 10575295Sdes * Allocate a vnode 10675295Sdes */ 10775295Sdesint 10877998Sdespfs_vncache_alloc(struct mount *mp, struct vnode **vpp, 10977998Sdes struct pfs_node *pn, pid_t pid) 11075295Sdes{ 11177998Sdes struct pfs_vdata *pvd; 112165737Sjhb struct vnode *vp; 11375295Sdes int error; 11488234Sdillon 11588234Sdillon /* 11697940Sdes * See if the vnode is in the cache. 11788234Sdillon * XXX linear search is not very efficient. 11888234Sdillon */ 119165737Sjhbretry: 12075295Sdes mtx_lock(&pfs_vncache_mutex); 12184246Sdes for (pvd = pfs_vncache; pvd; pvd = pvd->pvd_next) { 122109969Stjr if (pvd->pvd_pn == pn && pvd->pvd_pid == pid && 123109969Stjr pvd->pvd_vnode->v_mount == mp) { 124165737Sjhb vp = pvd->pvd_vnode; 125165737Sjhb VI_LOCK(vp); 126165737Sjhb mtx_unlock(&pfs_vncache_mutex); 127165737Sjhb if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, curthread) == 0) { 12875295Sdes ++pfs_vncache_hits; 129165737Sjhb *vpp = vp; 13088234Sdillon /* XXX see comment at top of pfs_lookup() */ 131165737Sjhb cache_purge(vp); 13275295Sdes return (0); 13375295Sdes } 134165737Sjhb goto retry; 13577998Sdes } 13677998Sdes } 13777998Sdes mtx_unlock(&pfs_vncache_mutex); 13875295Sdes ++pfs_vncache_misses; 13975295Sdes 14075295Sdes /* nope, get a new one */ 141111119Simp MALLOC(pvd, struct pfs_vdata *, sizeof *pvd, M_PFSVNCACHE, M_WAITOK); 14284246Sdes if (++pfs_vncache_entries > pfs_vncache_maxentries) 14384246Sdes pfs_vncache_maxentries = pfs_vncache_entries; 144138290Sphk error = getnewvnode("pseudofs", mp, &pfs_vnodeops, vpp); 145105165Sphk if (error) { 146105165Sphk FREE(pvd, M_PFSVNCACHE); 14775295Sdes return (error); 148105165Sphk } 14977998Sdes pvd->pvd_pn = pn; 15077998Sdes pvd->pvd_pid = pid; 15177998Sdes (*vpp)->v_data = pvd; 15275295Sdes switch (pn->pn_type) { 15375295Sdes case pfstype_root: 154101308Sjeff (*vpp)->v_vflag = VV_ROOT; 15575295Sdes#if 0 15675295Sdes printf("root vnode allocated\n"); 15775295Sdes#endif 15884246Sdes /* fall through */ 15975295Sdes case pfstype_dir: 16075295Sdes case pfstype_this: 16175295Sdes case pfstype_parent: 16277998Sdes case pfstype_procdir: 16375295Sdes (*vpp)->v_type = VDIR; 16475295Sdes break; 16575295Sdes case pfstype_file: 16675295Sdes (*vpp)->v_type = VREG; 16775295Sdes break; 16875295Sdes case pfstype_symlink: 16975295Sdes (*vpp)->v_type = VLNK; 17075295Sdes break; 17177998Sdes case pfstype_none: 17277998Sdes KASSERT(0, ("pfs_vncache_alloc called for null node\n")); 17375295Sdes default: 17475295Sdes panic("%s has unexpected type: %d", pn->pn_name, pn->pn_type); 17575295Sdes } 176103314Snjl /* 177103314Snjl * Propagate flag through to vnode so users know it can change 178103314Snjl * if the process changes (i.e. execve) 179103314Snjl */ 180103314Snjl if ((pn->pn_flags & PFS_PROCDEP) != 0) 181103314Snjl (*vpp)->v_vflag |= VV_PROCDEP; 18284246Sdes pvd->pvd_vnode = *vpp; 183167497Stegge (*vpp)->v_vnlock->lk_flags |= LK_CANRECURSE; 184167497Stegge vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, curthread); 185167497Stegge error = insmntque(*vpp, mp); 186167497Stegge if (error != 0) { 187167497Stegge FREE(pvd, M_PFSVNCACHE); 188167497Stegge *vpp = NULLVP; 189167497Stegge return (error); 190167497Stegge } 19177998Sdes mtx_lock(&pfs_vncache_mutex); 19284246Sdes pvd->pvd_prev = NULL; 19384246Sdes pvd->pvd_next = pfs_vncache; 19484246Sdes if (pvd->pvd_next) 19584246Sdes pvd->pvd_next->pvd_prev = pvd; 19684246Sdes pfs_vncache = pvd; 19775295Sdes mtx_unlock(&pfs_vncache_mutex); 19875295Sdes return (0); 19975295Sdes} 20075295Sdes 20175295Sdes/* 20275295Sdes * Free a vnode 20375295Sdes */ 20475295Sdesint 20575295Sdespfs_vncache_free(struct vnode *vp) 20675295Sdes{ 20777998Sdes struct pfs_vdata *pvd; 20888234Sdillon 20975295Sdes mtx_lock(&pfs_vncache_mutex); 21084246Sdes pvd = (struct pfs_vdata *)vp->v_data; 21184246Sdes KASSERT(pvd != NULL, ("pfs_vncache_free(): no vnode data\n")); 21284246Sdes if (pvd->pvd_next) 21384246Sdes pvd->pvd_next->pvd_prev = pvd->pvd_prev; 21484246Sdes if (pvd->pvd_prev) 21584246Sdes pvd->pvd_prev->pvd_next = pvd->pvd_next; 21677998Sdes else 21784246Sdes pfs_vncache = pvd->pvd_next; 21877998Sdes mtx_unlock(&pfs_vncache_mutex); 21984246Sdes 22084246Sdes --pfs_vncache_entries; 22177998Sdes FREE(pvd, M_PFSVNCACHE); 22275295Sdes vp->v_data = NULL; 22375295Sdes return (0); 22475295Sdes} 22584246Sdes 22684246Sdes/* 22784246Sdes * Free all vnodes associated with a defunct process 228139896Srwatson * 229139896Srwatson * XXXRW: It is unfortunate that pfs_exit() always acquires and releases two 230139896Srwatson * mutexes (one of which is Giant) for every process exit, even if procfs 231139896Srwatson * isn't mounted. 23284246Sdes */ 23384246Sdesstatic void 234112564Sjhbpfs_exit(void *arg, struct proc *p) 23584246Sdes{ 236133776Sdes struct pfs_vdata *pvd; 237133776Sdes struct vnode *vnp; 23884246Sdes 239142907Sphk if (pfs_vncache == NULL) 240142907Sphk return; 241126975Sgreen mtx_lock(&Giant); 24284246Sdes /* 243133776Sdes * This is extremely inefficient due to the fact that vgone() not 244133776Sdes * only indirectly modifies the vnode cache, but may also sleep. 245133776Sdes * We can neither hold pfs_vncache_mutex across a vgone() call, 246133776Sdes * nor make any assumptions about the state of the cache after 247133776Sdes * vgone() returns. In consequence, we must start over after 248133776Sdes * every vgone() call, and keep trying until we manage to traverse 249133776Sdes * the entire cache. 250133776Sdes * 251133776Sdes * The only way to improve this situation is to change the data 252133776Sdes * structure used to implement the cache. An obvious choice in 253133776Sdes * this particular case would be a BST sorted by PID. 25484246Sdes */ 255133776Sdes mtx_lock(&pfs_vncache_mutex); 256133776Sdes pvd = pfs_vncache; 257133776Sdes while (pvd != NULL) { 258133776Sdes if (pvd->pvd_pid == p->p_pid) { 259133776Sdes vnp = pvd->pvd_vnode; 260147809Sjeff vhold(vnp); 261133776Sdes mtx_unlock(&pfs_vncache_mutex); 262143513Sjeff VOP_LOCK(vnp, LK_EXCLUSIVE, curthread); 263133776Sdes vgone(vnp); 264143513Sjeff VOP_UNLOCK(vnp, 0, curthread); 265147809Sjeff vdrop(vnp); 266133776Sdes mtx_lock(&pfs_vncache_mutex); 267133776Sdes pvd = pfs_vncache; 268133776Sdes } else { 269133776Sdes pvd = pvd->pvd_next; 27084246Sdes } 27184246Sdes } 27284246Sdes mtx_unlock(&pfs_vncache_mutex); 273126975Sgreen mtx_unlock(&Giant); 27484246Sdes} 27584386Sdes 27684386Sdes/* 27784386Sdes * Disable a pseudofs node, and free all vnodes associated with it 27884386Sdes */ 27984386Sdesint 28084386Sdespfs_disable(struct pfs_node *pn) 28184386Sdes{ 282133776Sdes struct pfs_vdata *pvd; 283133776Sdes struct vnode *vnp; 28497940Sdes 28584386Sdes if (pn->pn_flags & PFS_DISABLED) 28684386Sdes return (0); 287133776Sdes pn->pn_flags |= PFS_DISABLED; 288133776Sdes /* XXX see comment above nearly identical code in pfs_exit() */ 28984386Sdes mtx_lock(&pfs_vncache_mutex); 290133776Sdes pvd = pfs_vncache; 291133776Sdes while (pvd != NULL) { 292133776Sdes if (pvd->pvd_pn == pn) { 293133776Sdes vnp = pvd->pvd_vnode; 294147809Sjeff vhold(vnp); 295133776Sdes mtx_unlock(&pfs_vncache_mutex); 296143513Sjeff VOP_LOCK(vnp, LK_EXCLUSIVE, curthread); 297133776Sdes vgone(vnp); 298143513Sjeff VOP_UNLOCK(vnp, 0, curthread); 299147809Sjeff vdrop(vnp); 300133776Sdes mtx_lock(&pfs_vncache_mutex); 301133776Sdes pvd = pfs_vncache; 302133776Sdes } else { 303133776Sdes pvd = pvd->pvd_next; 30484386Sdes } 30584386Sdes } 30684386Sdes mtx_unlock(&pfs_vncache_mutex); 30784386Sdes return (0); 30884386Sdes} 30984386Sdes 31084386Sdes/* 31184386Sdes * Re-enable a disabled pseudofs node 31284386Sdes */ 31384386Sdesint 31484386Sdespfs_enable(struct pfs_node *pn) 31584386Sdes{ 31684386Sdes pn->pn_flags &= ~PFS_DISABLED; 31784386Sdes return (0); 31884386Sdes} 319