pseudofs_vncache.c revision 176519
175295Sdes/*- 275295Sdes * Copyright (c) 2001 Dag-Erling Co�dan Sm�rgrav 375295Sdes * All rights reserved. 475295Sdes * 575295Sdes * Redistribution and use in source and binary forms, with or without 675295Sdes * modification, are permitted provided that the following conditions 775295Sdes * are met: 875295Sdes * 1. Redistributions of source code must retain the above copyright 975295Sdes * notice, this list of conditions and the following disclaimer 1075295Sdes * in this position and unchanged. 1175295Sdes * 2. Redistributions in binary form must reproduce the above copyright 1275295Sdes * notice, this list of conditions and the following disclaimer in the 1375295Sdes * documentation and/or other materials provided with the distribution. 1475295Sdes * 3. The name of the author may not be used to endorse or promote products 1575295Sdes * derived from this software without specific prior written permission. 1675295Sdes * 1775295Sdes * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 1875295Sdes * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 1975295Sdes * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 2075295Sdes * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 2175295Sdes * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 2275295Sdes * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 2375295Sdes * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 2475295Sdes * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 2575295Sdes * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 2675295Sdes * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 2775295Sdes */ 2875295Sdes 29143592Sdes#include <sys/cdefs.h> 30143592Sdes__FBSDID("$FreeBSD: head/sys/fs/pseudofs/pseudofs_vncache.c 176519 2008-02-24 16:38:58Z attilio $"); 31143592Sdes 32143592Sdes#include "opt_pseudofs.h" 33143592Sdes 3475295Sdes#include <sys/param.h> 3575295Sdes#include <sys/kernel.h> 3675295Sdes#include <sys/systm.h> 37112564Sjhb#include <sys/eventhandler.h> 3878073Sdes#include <sys/lock.h> 3975295Sdes#include <sys/malloc.h> 4077965Sdes#include <sys/mutex.h> 4184246Sdes#include <sys/proc.h> 4275295Sdes#include <sys/sysctl.h> 4375295Sdes#include <sys/vnode.h> 4475295Sdes 4575295Sdes#include <fs/pseudofs/pseudofs.h> 4675295Sdes#include <fs/pseudofs/pseudofs_internal.h> 4775295Sdes 4877998Sdesstatic MALLOC_DEFINE(M_PFSVNCACHE, "pfs_vncache", "pseudofs vnode cache"); 4975295Sdes 5075295Sdesstatic struct mtx pfs_vncache_mutex; 5189071Smsmithstatic struct pfs_vdata *pfs_vncache; 52112564Sjhbstatic eventhandler_tag pfs_exit_tag; 53112564Sjhbstatic void pfs_exit(void *arg, struct proc *p); 5475295Sdes 5575295SdesSYSCTL_NODE(_vfs_pfs, OID_AUTO, vncache, CTLFLAG_RW, 0, 5675295Sdes "pseudofs vnode cache"); 5775295Sdes 5884246Sdesstatic int pfs_vncache_entries; 5984246SdesSYSCTL_INT(_vfs_pfs_vncache, OID_AUTO, entries, CTLFLAG_RD, 6084246Sdes &pfs_vncache_entries, 0, 6184246Sdes "number of entries in the vnode cache"); 6284246Sdes 6384246Sdesstatic int pfs_vncache_maxentries; 6484246SdesSYSCTL_INT(_vfs_pfs_vncache, OID_AUTO, maxentries, CTLFLAG_RD, 6584246Sdes &pfs_vncache_maxentries, 0, 6684246Sdes "highest number of entries in the vnode cache"); 6784246Sdes 6875295Sdesstatic int pfs_vncache_hits; 6984246SdesSYSCTL_INT(_vfs_pfs_vncache, OID_AUTO, hits, CTLFLAG_RD, 7084246Sdes &pfs_vncache_hits, 0, 7175295Sdes "number of cache hits since initialization"); 7275295Sdes 7375295Sdesstatic int pfs_vncache_misses; 7484246SdesSYSCTL_INT(_vfs_pfs_vncache, OID_AUTO, misses, CTLFLAG_RD, 7584246Sdes &pfs_vncache_misses, 0, 7675295Sdes "number of cache misses since initialization"); 7775295Sdes 78138290Sphkextern struct vop_vector pfs_vnodeops; /* XXX -> .h file */ 7975295Sdes 8075295Sdes/* 8175295Sdes * Initialize vnode cache 8275295Sdes */ 8375295Sdesvoid 8475295Sdespfs_vncache_load(void) 8575295Sdes{ 86168720Sdes 87168720Sdes mtx_assert(&Giant, MA_OWNED); 88168720Sdes mtx_init(&pfs_vncache_mutex, "pfs_vncache", NULL, MTX_DEF); 89112564Sjhb pfs_exit_tag = EVENTHANDLER_REGISTER(process_exit, pfs_exit, NULL, 90112564Sjhb EVENTHANDLER_PRI_ANY); 9175295Sdes} 9275295Sdes 9375295Sdes/* 9475295Sdes * Tear down vnode cache 9575295Sdes */ 9675295Sdesvoid 9775295Sdespfs_vncache_unload(void) 9875295Sdes{ 99168720Sdes 100168720Sdes mtx_assert(&Giant, MA_OWNED); 101112564Sjhb EVENTHANDLER_DEREGISTER(process_exit, pfs_exit_tag); 102168720Sdes KASSERT(pfs_vncache_entries == 0, 103168720Sdes ("%d vncache entries remaining", pfs_vncache_entries)); 10475295Sdes mtx_destroy(&pfs_vncache_mutex); 10575295Sdes} 10675295Sdes 10775295Sdes/* 10875295Sdes * Allocate a vnode 10975295Sdes */ 11075295Sdesint 11177998Sdespfs_vncache_alloc(struct mount *mp, struct vnode **vpp, 11277998Sdes struct pfs_node *pn, pid_t pid) 11375295Sdes{ 11477998Sdes struct pfs_vdata *pvd; 115165737Sjhb struct vnode *vp; 11675295Sdes int error; 11788234Sdillon 11888234Sdillon /* 11997940Sdes * See if the vnode is in the cache. 12088234Sdillon * XXX linear search is not very efficient. 12188234Sdillon */ 122165737Sjhbretry: 12375295Sdes mtx_lock(&pfs_vncache_mutex); 12484246Sdes for (pvd = pfs_vncache; pvd; pvd = pvd->pvd_next) { 125109969Stjr if (pvd->pvd_pn == pn && pvd->pvd_pid == pid && 126109969Stjr pvd->pvd_vnode->v_mount == mp) { 127165737Sjhb vp = pvd->pvd_vnode; 128165737Sjhb VI_LOCK(vp); 129165737Sjhb mtx_unlock(&pfs_vncache_mutex); 130165737Sjhb if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, curthread) == 0) { 13175295Sdes ++pfs_vncache_hits; 132165737Sjhb *vpp = vp; 133168637Sdes /* 134168637Sdes * Some callers cache_enter(vp) later, so 135168637Sdes * we have to make sure it's not in the 136168637Sdes * VFS cache so it doesn't get entered 137168637Sdes * twice. A better solution would be to 138168637Sdes * make pfs_vncache_alloc() responsible 139168637Sdes * for entering the vnode in the VFS 140168637Sdes * cache. 141168637Sdes */ 142168637Sdes cache_purge(vp); 14375295Sdes return (0); 14475295Sdes } 145165737Sjhb goto retry; 14677998Sdes } 14777998Sdes } 14877998Sdes mtx_unlock(&pfs_vncache_mutex); 14975295Sdes ++pfs_vncache_misses; 15075295Sdes 15175295Sdes /* nope, get a new one */ 152111119Simp MALLOC(pvd, struct pfs_vdata *, sizeof *pvd, M_PFSVNCACHE, M_WAITOK); 153168985Sdes mtx_lock(&pfs_vncache_mutex); 15484246Sdes if (++pfs_vncache_entries > pfs_vncache_maxentries) 15584246Sdes pfs_vncache_maxentries = pfs_vncache_entries; 156168985Sdes mtx_unlock(&pfs_vncache_mutex); 157138290Sphk error = getnewvnode("pseudofs", mp, &pfs_vnodeops, vpp); 158105165Sphk if (error) { 159168985Sdes mtx_lock(&pfs_vncache_mutex); 160168985Sdes --pfs_vncache_entries; 161168985Sdes mtx_unlock(&pfs_vncache_mutex); 162105165Sphk FREE(pvd, M_PFSVNCACHE); 16375295Sdes return (error); 164105165Sphk } 16577998Sdes pvd->pvd_pn = pn; 16677998Sdes pvd->pvd_pid = pid; 16777998Sdes (*vpp)->v_data = pvd; 16875295Sdes switch (pn->pn_type) { 16975295Sdes case pfstype_root: 170101308Sjeff (*vpp)->v_vflag = VV_ROOT; 17175295Sdes#if 0 17275295Sdes printf("root vnode allocated\n"); 17375295Sdes#endif 17484246Sdes /* fall through */ 17575295Sdes case pfstype_dir: 17675295Sdes case pfstype_this: 17775295Sdes case pfstype_parent: 17877998Sdes case pfstype_procdir: 17975295Sdes (*vpp)->v_type = VDIR; 18075295Sdes break; 18175295Sdes case pfstype_file: 18275295Sdes (*vpp)->v_type = VREG; 18375295Sdes break; 18475295Sdes case pfstype_symlink: 18575295Sdes (*vpp)->v_type = VLNK; 18675295Sdes break; 18777998Sdes case pfstype_none: 18877998Sdes KASSERT(0, ("pfs_vncache_alloc called for null node\n")); 18975295Sdes default: 19075295Sdes panic("%s has unexpected type: %d", pn->pn_name, pn->pn_type); 19175295Sdes } 192103314Snjl /* 193103314Snjl * Propagate flag through to vnode so users know it can change 194103314Snjl * if the process changes (i.e. execve) 195103314Snjl */ 196103314Snjl if ((pn->pn_flags & PFS_PROCDEP) != 0) 197103314Snjl (*vpp)->v_vflag |= VV_PROCDEP; 19884246Sdes pvd->pvd_vnode = *vpp; 199176519Sattilio VN_LOCK_AREC(*vpp); 200175202Sattilio vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY); 201167497Stegge error = insmntque(*vpp, mp); 202167497Stegge if (error != 0) { 203168985Sdes mtx_lock(&pfs_vncache_mutex); 204168985Sdes --pfs_vncache_entries; 205168985Sdes mtx_unlock(&pfs_vncache_mutex); 206167497Stegge FREE(pvd, M_PFSVNCACHE); 207167497Stegge *vpp = NULLVP; 208167497Stegge return (error); 209167497Stegge } 21077998Sdes mtx_lock(&pfs_vncache_mutex); 21184246Sdes pvd->pvd_prev = NULL; 21284246Sdes pvd->pvd_next = pfs_vncache; 21384246Sdes if (pvd->pvd_next) 21484246Sdes pvd->pvd_next->pvd_prev = pvd; 21584246Sdes pfs_vncache = pvd; 21675295Sdes mtx_unlock(&pfs_vncache_mutex); 21775295Sdes return (0); 21875295Sdes} 21975295Sdes 22075295Sdes/* 22175295Sdes * Free a vnode 22275295Sdes */ 22375295Sdesint 22475295Sdespfs_vncache_free(struct vnode *vp) 22575295Sdes{ 22677998Sdes struct pfs_vdata *pvd; 22788234Sdillon 22875295Sdes mtx_lock(&pfs_vncache_mutex); 22984246Sdes pvd = (struct pfs_vdata *)vp->v_data; 23084246Sdes KASSERT(pvd != NULL, ("pfs_vncache_free(): no vnode data\n")); 23184246Sdes if (pvd->pvd_next) 23284246Sdes pvd->pvd_next->pvd_prev = pvd->pvd_prev; 23384246Sdes if (pvd->pvd_prev) 23484246Sdes pvd->pvd_prev->pvd_next = pvd->pvd_next; 23577998Sdes else 23684246Sdes pfs_vncache = pvd->pvd_next; 237168985Sdes --pfs_vncache_entries; 23877998Sdes mtx_unlock(&pfs_vncache_mutex); 23984246Sdes 24077998Sdes FREE(pvd, M_PFSVNCACHE); 24175295Sdes vp->v_data = NULL; 24275295Sdes return (0); 24375295Sdes} 24484246Sdes 24584246Sdes/* 246168764Sdes * Purge the cache of dead entries 247139896Srwatson * 248168637Sdes * This is extremely inefficient due to the fact that vgone() not only 249168637Sdes * indirectly modifies the vnode cache, but may also sleep. We can 250168637Sdes * neither hold pfs_vncache_mutex across a vgone() call, nor make any 251168637Sdes * assumptions about the state of the cache after vgone() returns. In 252168637Sdes * consequence, we must start over after every vgone() call, and keep 253168637Sdes * trying until we manage to traverse the entire cache. 254168637Sdes * 255168637Sdes * The only way to improve this situation is to change the data structure 256168637Sdes * used to implement the cache. 25784246Sdes */ 258168637Sdesvoid 259168637Sdespfs_purge(struct pfs_node *pn) 26084246Sdes{ 261133776Sdes struct pfs_vdata *pvd; 262133776Sdes struct vnode *vnp; 26384246Sdes 264133776Sdes mtx_lock(&pfs_vncache_mutex); 265133776Sdes pvd = pfs_vncache; 266133776Sdes while (pvd != NULL) { 267168637Sdes if (pvd->pvd_dead || (pn != NULL && pvd->pvd_pn == pn)) { 268133776Sdes vnp = pvd->pvd_vnode; 269147809Sjeff vhold(vnp); 270133776Sdes mtx_unlock(&pfs_vncache_mutex); 271175294Sattilio VOP_LOCK(vnp, LK_EXCLUSIVE); 272133776Sdes vgone(vnp); 273175294Sattilio VOP_UNLOCK(vnp, 0); 274147809Sjeff vdrop(vnp); 275133776Sdes mtx_lock(&pfs_vncache_mutex); 276133776Sdes pvd = pfs_vncache; 277133776Sdes } else { 278133776Sdes pvd = pvd->pvd_next; 27984246Sdes } 28084246Sdes } 28184246Sdes mtx_unlock(&pfs_vncache_mutex); 282168637Sdes} 283168637Sdes 284168637Sdes/* 285168637Sdes * Free all vnodes associated with a defunct process 286168637Sdes * 287168637Sdes * XXXRW: It is unfortunate that pfs_exit() always acquires and releases two 288168637Sdes * mutexes (one of which is Giant) for every process exit, even if procfs 289168637Sdes * isn't mounted. 290168637Sdes */ 291168637Sdesstatic void 292168637Sdespfs_exit(void *arg, struct proc *p) 293168637Sdes{ 294168637Sdes struct pfs_vdata *pvd; 295168637Sdes int dead; 296168637Sdes 297168637Sdes if (pfs_vncache == NULL) 298168637Sdes return; 299168637Sdes mtx_lock(&Giant); 300168637Sdes mtx_lock(&pfs_vncache_mutex); 301168637Sdes for (pvd = pfs_vncache, dead = 0; pvd != NULL; pvd = pvd->pvd_next) 302168637Sdes if (pvd->pvd_pid == p->p_pid) 303168637Sdes dead = pvd->pvd_dead = 1; 304168637Sdes mtx_unlock(&pfs_vncache_mutex); 305168637Sdes if (dead) 306168637Sdes pfs_purge(NULL); 307126975Sgreen mtx_unlock(&Giant); 30884246Sdes} 309