vfs_subr.c revision 41994
154359Sroberto/* 254359Sroberto * Copyright (c) 1989, 1993 354359Sroberto * The Regents of the University of California. All rights reserved. 4285612Sdelphij * (c) UNIX System Laboratories, Inc. 554359Sroberto * All or some portions of this file are derived from material licensed 6285612Sdelphij * to the University of California by American Telephone and Telegraph 7182007Sroberto * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8182007Sroberto * the permission of UNIX System Laboratories, Inc. 9182007Sroberto * 10285612Sdelphij * Redistribution and use in source and binary forms, with or without 11285612Sdelphij * modification, are permitted provided that the following conditions 12285612Sdelphij * are met: 13285612Sdelphij * 1. Redistributions of source code must retain the above copyright 14285612Sdelphij * notice, this list of conditions and the following disclaimer. 15285612Sdelphij * 2. Redistributions in binary form must reproduce the above copyright 16285612Sdelphij * notice, this list of conditions and the following disclaimer in the 17285612Sdelphij * documentation and/or other materials provided with the distribution. 18285612Sdelphij * 3. All advertising materials mentioning features or use of this software 19285612Sdelphij * must display the following acknowledgement: 20285612Sdelphij * This product includes software developed by the University of 21182007Sroberto * California, Berkeley and its contributors. 2282498Sroberto * 4. Neither the name of the University nor the names of its contributors 2382498Sroberto * may be used to endorse or promote products derived from this software 2482498Sroberto * without specific prior written permission. 25285612Sdelphij * 26285612Sdelphij * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27285612Sdelphij * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28285612Sdelphij * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29285612Sdelphij * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30285612Sdelphij * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31285612Sdelphij * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 3282498Sroberto * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33285612Sdelphij * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34182007Sroberto * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35293650Sglebius * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 3654359Sroberto * SUCH DAMAGE. 3754359Sroberto * 38182007Sroberto * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95 39182007Sroberto * $Id: vfs_subr.c,v 1.174 1998/12/04 22:54:51 archie Exp $ 40182007Sroberto */ 4154359Sroberto 4254359Sroberto/* 43182007Sroberto * External virtual filesystem routines 44182007Sroberto */ 45182007Sroberto#include "opt_ddb.h" 46182007Sroberto 47182007Sroberto#include <sys/param.h> 4854359Sroberto#include <sys/systm.h> 4954359Sroberto#include <sys/conf.h> 5054359Sroberto#include <sys/kernel.h> 5154359Sroberto#include <sys/proc.h> 5254359Sroberto#include <sys/malloc.h> 5354359Sroberto#include <sys/mount.h> 5454359Sroberto#include <sys/socket.h> 5554359Sroberto#include <sys/vnode.h> 5654359Sroberto#include <sys/stat.h> 5754359Sroberto#include <sys/buf.h> 5854359Sroberto#include <sys/domain.h> 5954359Sroberto#include <sys/dirent.h> 60182007Sroberto#include <sys/vmmeter.h> 6154359Sroberto 62285612Sdelphij#include <machine/limits.h> 63285612Sdelphij 6454359Sroberto#include <vm/vm.h> 6554359Sroberto#include <vm/vm_object.h> 66182007Sroberto#include <vm/vm_extern.h> 67182007Sroberto#include <vm/pmap.h> 68182007Sroberto#include <vm/vm_map.h> 69182007Sroberto#include <vm/vm_pager.h> 70182007Sroberto#include <vm/vnode_pager.h> 71285612Sdelphij#include <vm/vm_zone.h> 7254359Sroberto#include <sys/sysctl.h> 7354359Sroberto 7454359Sroberto#include <miscfs/specfs/specdev.h> 75285612Sdelphij 76285612Sdelphijstatic MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure"); 77285612Sdelphij 78293650Sglebiusstatic void insmntque __P((struct vnode *vp, struct mount *mp)); 79293650Sglebiusstatic void vclean __P((struct vnode *vp, int flags, struct proc *p)); 80285612Sdelphijstatic void vfree __P((struct vnode *)); 81285612Sdelphijstatic void vgonel __P((struct vnode *vp, struct proc *p)); 82285612Sdelphijstatic unsigned long numvnodes; 83285612SdelphijSYSCTL_INT(_debug, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, ""); 84285612Sdelphij 85285612Sdelphijenum vtype iftovt_tab[16] = { 86285612Sdelphij VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 87285612Sdelphij VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 88285612Sdelphij}; 89285612Sdelphijint vttoif_tab[9] = { 90285612Sdelphij 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 91285612Sdelphij S_IFSOCK, S_IFIFO, S_IFMT, 92285612Sdelphij}; 93285612Sdelphij 94285612Sdelphijstatic TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */ 95285612Sdelphijstruct tobefreelist vnode_tobefree_list; /* vnode free list */ 96285612Sdelphij 97285612Sdelphijstatic u_long wantfreevnodes = 25; 98285612SdelphijSYSCTL_INT(_debug, OID_AUTO, wantfreevnodes, CTLFLAG_RW, &wantfreevnodes, 0, ""); 99285612Sdelphijstatic u_long freevnodes = 0; 100285612SdelphijSYSCTL_INT(_debug, OID_AUTO, freevnodes, CTLFLAG_RD, &freevnodes, 0, ""); 101285612Sdelphij 102285612Sdelphijint vfs_ioopt = 0; 103285612Sdelphij#ifdef ENABLE_VFS_IOOPT 104285612SdelphijSYSCTL_INT(_vfs, OID_AUTO, ioopt, CTLFLAG_RW, &vfs_ioopt, 0, ""); 10554359Sroberto#endif 10654359Sroberto 10754359Srobertostruct mntlist mountlist; /* mounted filesystem list */ 10854359Srobertostruct simplelock mountlist_slock; 10954359Srobertostatic struct simplelock mntid_slock; 11054359Srobertostruct simplelock mntvnode_slock; 11154359Srobertoint nfs_mount_type = -1; 11254359Srobertostatic struct simplelock vnode_free_list_slock; 11354359Srobertostatic struct simplelock spechash_slock; 11454359Srobertostruct nfs_public nfs_pub; /* publicly exported FS */ 11554359Srobertostatic vm_zone_t vnode_zone; 11654359Sroberto 11754359Sroberto/* 118182007Sroberto * The workitem queue. 11954359Sroberto */ 12054359Sroberto#define SYNCER_MAXDELAY 32 121182007Srobertostatic int syncer_maxdelay = SYNCER_MAXDELAY; /* maximum delay time */ 12254359Srobertotime_t syncdelay = 30; 12354359Srobertoint rushjob; /* number of slots to run ASAP */ 124132451Sroberto 125132451Srobertostatic int syncer_delayno = 0; 12654359Srobertostatic long syncer_mask; 12754359SrobertoLIST_HEAD(synclist, vnode); 12854359Srobertostatic struct synclist *syncer_workitem_pending; 12954359Sroberto 13054359Srobertoint desiredvnodes; 13154359SrobertoSYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, &desiredvnodes, 0, ""); 13254359Sroberto 13354359Srobertostatic void vfs_free_addrlist __P((struct netexport *nep)); 13454359Srobertostatic int vfs_free_netcred __P((struct radix_node *rn, void *w)); 13554359Srobertostatic int vfs_hang_addrlist __P((struct mount *mp, struct netexport *nep, 13654359Sroberto struct export_args *argp)); 13754359Sroberto 13854359Sroberto/* 13954359Sroberto * Initialize the vnode management data structures. 14054359Sroberto */ 14154359Srobertovoid 142182007Srobertovntblinit() 14354359Sroberto{ 14454359Sroberto 14554359Sroberto desiredvnodes = maxproc + cnt.v_page_count / 4; 14654359Sroberto simple_lock_init(&mntvnode_slock); 14754359Sroberto simple_lock_init(&mntid_slock); 14854359Sroberto simple_lock_init(&spechash_slock); 14954359Sroberto TAILQ_INIT(&vnode_free_list); 15054359Sroberto TAILQ_INIT(&vnode_tobefree_list); 15154359Sroberto simple_lock_init(&vnode_free_list_slock); 15254359Sroberto CIRCLEQ_INIT(&mountlist); 15354359Sroberto vnode_zone = zinit("VNODE", sizeof (struct vnode), 0, 0, 5); 15454359Sroberto /* 15554359Sroberto * Initialize the filesystem syncer. 15654359Sroberto */ 15754359Sroberto syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE, 15854359Sroberto &syncer_mask); 159285612Sdelphij syncer_maxdelay = syncer_mask + 1; 16054359Sroberto} 16154359Sroberto 16254359Sroberto/* 16354359Sroberto * Mark a mount point as busy. Used to synchronize access and to delay 16454359Sroberto * unmounting. Interlock is not released on failure. 16554359Sroberto */ 16654359Srobertoint 167182007Srobertovfs_busy(mp, flags, interlkp, p) 168182007Sroberto struct mount *mp; 16954359Sroberto int flags; 17054359Sroberto struct simplelock *interlkp; 17154359Sroberto struct proc *p; 17254359Sroberto{ 173285612Sdelphij int lkflags; 174285612Sdelphij 17554359Sroberto if (mp->mnt_kern_flag & MNTK_UNMOUNT) { 17654359Sroberto if (flags & LK_NOWAIT) 177132451Sroberto return (ENOENT); 17854359Sroberto mp->mnt_kern_flag |= MNTK_MWAIT; 179132451Sroberto if (interlkp) { 180132451Sroberto simple_unlock(interlkp); 181132451Sroberto } 18254359Sroberto /* 183132451Sroberto * Since all busy locks are shared except the exclusive 18454359Sroberto * lock granted when unmounting, the only place that a 18554359Sroberto * wakeup needs to be done is at the release of the 18654359Sroberto * exclusive lock at the end of dounmount. 18754359Sroberto */ 18854359Sroberto tsleep((caddr_t)mp, PVFS, "vfs_busy", 0); 18954359Sroberto if (interlkp) { 19054359Sroberto simple_lock(interlkp); 19154359Sroberto } 19254359Sroberto return (ENOENT); 19354359Sroberto } 19454359Sroberto lkflags = LK_SHARED | LK_NOPAUSE; 19554359Sroberto if (interlkp) 196106163Sroberto lkflags |= LK_INTERLOCK; 197106163Sroberto if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p)) 198132451Sroberto panic("vfs_busy: unexpected lock failure"); 199106163Sroberto return (0); 200106163Sroberto} 201106163Sroberto 20254359Sroberto/* 20354359Sroberto * Free a busy filesystem. 20454359Sroberto */ 20554359Srobertovoid 20654359Srobertovfs_unbusy(mp, p) 20754359Sroberto struct mount *mp; 20854359Sroberto struct proc *p; 20954359Sroberto{ 21054359Sroberto 21154359Sroberto lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p); 21254359Sroberto} 21354359Sroberto 21454359Sroberto/* 21554359Sroberto * Lookup a filesystem type, and if found allocate and initialize 21654359Sroberto * a mount structure for it. 21754359Sroberto * 21854359Sroberto * Devname is usually updated by mount(8) after booting. 21954359Sroberto */ 22054359Srobertoint 22154359Srobertovfs_rootmountalloc(fstypename, devname, mpp) 22254359Sroberto char *fstypename; 22354359Sroberto char *devname; 22454359Sroberto struct mount **mpp; 22554359Sroberto{ 22654359Sroberto struct proc *p = curproc; /* XXX */ 22754359Sroberto struct vfsconf *vfsp; 22854359Sroberto struct mount *mp; 22954359Sroberto 23054359Sroberto if (fstypename == NULL) 23154359Sroberto return (ENODEV); 23254359Sroberto for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 23354359Sroberto if (!strcmp(vfsp->vfc_name, fstypename)) 23454359Sroberto break; 23554359Sroberto if (vfsp == NULL) 23654359Sroberto return (ENODEV); 23754359Sroberto mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); 23854359Sroberto bzero((char *)mp, (u_long)sizeof(struct mount)); 23954359Sroberto lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE); 24054359Sroberto (void)vfs_busy(mp, LK_NOWAIT, 0, p); 24154359Sroberto LIST_INIT(&mp->mnt_vnodelist); 24254359Sroberto mp->mnt_vfc = vfsp; 24354359Sroberto mp->mnt_op = vfsp->vfc_vfsops; 244289997Sglebius mp->mnt_flag = MNT_RDONLY; 24554359Sroberto mp->mnt_vnodecovered = NULLVP; 24654359Sroberto vfsp->vfc_refcount++; 24754359Sroberto mp->mnt_stat.f_type = vfsp->vfc_typenum; 24854359Sroberto mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 24954359Sroberto strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 25054359Sroberto mp->mnt_stat.f_mntonname[0] = '/'; 25154359Sroberto mp->mnt_stat.f_mntonname[1] = 0; 25254359Sroberto (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); 25354359Sroberto *mpp = mp; 25454359Sroberto return (0); 25554359Sroberto} 25654359Sroberto 25754359Sroberto/* 25854359Sroberto * Find an appropriate filesystem to use for the root. If a filesystem 25954359Sroberto * has not been preselected, walk through the list of known filesystems 26054359Sroberto * trying those that have mountroot routines, and try them until one 26154359Sroberto * works or we have tried them all. 26254359Sroberto */ 26354359Sroberto#ifdef notdef /* XXX JH */ 26454359Srobertoint 26554359Srobertolite2_vfs_mountroot() 26654359Sroberto{ 26754359Sroberto struct vfsconf *vfsp; 26854359Sroberto extern int (*lite2_mountroot) __P((void)); 26954359Sroberto int error; 27054359Sroberto 27154359Sroberto if (lite2_mountroot != NULL) 27254359Sroberto return ((*lite2_mountroot)()); 27354359Sroberto for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 27454359Sroberto if (vfsp->vfc_mountroot == NULL) 27554359Sroberto continue; 27654359Sroberto if ((error = (*vfsp->vfc_mountroot)()) == 0) 27754359Sroberto return (0); 27854359Sroberto printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error); 27954359Sroberto } 28054359Sroberto return (ENODEV); 28154359Sroberto} 28254359Sroberto#endif 28354359Sroberto 28454359Sroberto/* 28554359Sroberto * Lookup a mount point by filesystem identifier. 28654359Sroberto */ 287285612Sdelphijstruct mount * 288285612Sdelphijvfs_getvfs(fsid) 289285612Sdelphij fsid_t *fsid; 290132451Sroberto{ 291285612Sdelphij register struct mount *mp; 292285612Sdelphij 293132451Sroberto simple_lock(&mountlist_slock); 294132451Sroberto for (mp = mountlist.cqh_first; mp != (void *)&mountlist; 29554359Sroberto mp = mp->mnt_list.cqe_next) { 296182007Sroberto if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 297182007Sroberto mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { 298285612Sdelphij simple_unlock(&mountlist_slock); 299182007Sroberto return (mp); 300182007Sroberto } 301182007Sroberto } 302182007Sroberto simple_unlock(&mountlist_slock); 303285612Sdelphij return ((struct mount *) 0); 304182007Sroberto} 305285612Sdelphij 306182007Sroberto/* 307285612Sdelphij * Get a new unique fsid 308182007Sroberto */ 309182007Srobertovoid 310182007Srobertovfs_getnewfsid(mp) 311182007Sroberto struct mount *mp; 312182007Sroberto{ 313182007Sroberto static u_short xxxfs_mntid; 314182007Sroberto 315182007Sroberto fsid_t tfsid; 316182007Sroberto int mtype; 317182007Sroberto 318182007Sroberto simple_lock(&mntid_slock); 319285612Sdelphij mtype = mp->mnt_vfc->vfc_typenum; 320182007Sroberto mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0); 321182007Sroberto mp->mnt_stat.f_fsid.val[1] = mtype; 322182007Sroberto if (xxxfs_mntid == 0) 323182007Sroberto ++xxxfs_mntid; 324182007Sroberto tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid); 325182007Sroberto tfsid.val[1] = mtype; 326182007Sroberto if (mountlist.cqh_first != (void *)&mountlist) { 327182007Sroberto while (vfs_getvfs(&tfsid)) { 328182007Sroberto tfsid.val[0]++; 329182007Sroberto xxxfs_mntid++; 330182007Sroberto } 331182007Sroberto } 332182007Sroberto mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 333182007Sroberto simple_unlock(&mntid_slock); 334182007Sroberto} 335182007Sroberto 336182007Sroberto/* 337182007Sroberto * Set vnode attributes to VNOVAL 338182007Sroberto */ 339182007Srobertovoid 340182007Srobertovattr_null(vap) 341182007Sroberto register struct vattr *vap; 342182007Sroberto{ 343182007Sroberto 344182007Sroberto vap->va_type = VNON; 345182007Sroberto vap->va_size = VNOVAL; 346182007Sroberto vap->va_bytes = VNOVAL; 347182007Sroberto vap->va_mode = VNOVAL; 348182007Sroberto vap->va_nlink = VNOVAL; 349182007Sroberto vap->va_uid = VNOVAL; 350182007Sroberto vap->va_gid = VNOVAL; 351182007Sroberto vap->va_fsid = VNOVAL; 352182007Sroberto vap->va_fileid = VNOVAL; 35354359Sroberto vap->va_blocksize = VNOVAL; 35454359Sroberto vap->va_rdev = VNOVAL; 35554359Sroberto vap->va_atime.tv_sec = VNOVAL; 35654359Sroberto vap->va_atime.tv_nsec = VNOVAL; 35754359Sroberto vap->va_mtime.tv_sec = VNOVAL; 35854359Sroberto vap->va_mtime.tv_nsec = VNOVAL; 35954359Sroberto vap->va_ctime.tv_sec = VNOVAL; 36054359Sroberto vap->va_ctime.tv_nsec = VNOVAL; 36154359Sroberto vap->va_flags = VNOVAL; 362285612Sdelphij vap->va_gen = VNOVAL; 36354359Sroberto vap->va_vaflags = 0; 36454359Sroberto} 36554359Sroberto 36654359Sroberto/* 36754359Sroberto * Routines having to do with the management of the vnode table. 36854359Sroberto */ 36954359Srobertoextern vop_t **dead_vnodeop_p; 37054359Sroberto 37154359Sroberto/* 37254359Sroberto * Return the next vnode from the free list. 37354359Sroberto */ 37454359Srobertoint 37554359Srobertogetnewvnode(tag, mp, vops, vpp) 37654359Sroberto enum vtagtype tag; 37754359Sroberto struct mount *mp; 37854359Sroberto vop_t **vops; 37954359Sroberto struct vnode **vpp; 38054359Sroberto{ 38154359Sroberto int s; 38254359Sroberto struct proc *p = curproc; /* XXX */ 38354359Sroberto struct vnode *vp, *tvp, *nvp; 38454359Sroberto vm_object_t object; 38554359Sroberto TAILQ_HEAD(freelst, vnode) vnode_tmp_list; 38654359Sroberto 38754359Sroberto /* 38854359Sroberto * We take the least recently used vnode from the freelist 38954359Sroberto * if we can get it and it has no cached pages, and no 39054359Sroberto * namecache entries are relative to it. 39154359Sroberto * Otherwise we allocate a new vnode 39254359Sroberto */ 39354359Sroberto 39454359Sroberto s = splbio(); 39554359Sroberto simple_lock(&vnode_free_list_slock); 396132451Sroberto TAILQ_INIT(&vnode_tmp_list); 397132451Sroberto 398285612Sdelphij for (vp = TAILQ_FIRST(&vnode_tobefree_list); vp; vp = nvp) { 399285612Sdelphij nvp = TAILQ_NEXT(vp, v_freelist); 400132451Sroberto TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 401132451Sroberto if (vp->v_flag & VAGE) { 402132451Sroberto TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 40354359Sroberto } else { 404132451Sroberto TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 405132451Sroberto } 406132451Sroberto vp->v_flag &= ~(VTBFREE|VAGE); 407132451Sroberto vp->v_flag |= VFREE; 408132451Sroberto if (vp->v_usecount) 409132451Sroberto panic("tobe free vnode isn't"); 410132451Sroberto freevnodes++; 411132451Sroberto } 412285612Sdelphij 413285612Sdelphij if (wantfreevnodes && freevnodes < wantfreevnodes) { 414285612Sdelphij vp = NULL; 415285612Sdelphij } else if (!wantfreevnodes && freevnodes <= desiredvnodes) { 416285612Sdelphij /* 417285612Sdelphij * XXX: this is only here to be backwards compatible 418285612Sdelphij */ 419285612Sdelphij vp = NULL; 420132451Sroberto } else { 421132451Sroberto for (vp = TAILQ_FIRST(&vnode_free_list); vp; vp = nvp) { 422132451Sroberto nvp = TAILQ_NEXT(vp, v_freelist); 423132451Sroberto if (!simple_lock_try(&vp->v_interlock)) 424132451Sroberto continue; 425132451Sroberto if (vp->v_usecount) 426132451Sroberto panic("free vnode isn't"); 427132451Sroberto 428132451Sroberto object = vp->v_object; 429285612Sdelphij if (object && (object->resident_page_count || object->ref_count)) { 430285612Sdelphij printf("object inconsistant state: RPC: %d, RC: %d\n", 431132451Sroberto object->resident_page_count, object->ref_count); 432132451Sroberto /* Don't recycle if it's caching some pages */ 433132451Sroberto TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 434285612Sdelphij TAILQ_INSERT_TAIL(&vnode_tmp_list, vp, v_freelist); 435132451Sroberto continue; 436132451Sroberto } else if (LIST_FIRST(&vp->v_cache_src)) { 437182007Sroberto /* Don't recycle if active in the namecache */ 438182007Sroberto simple_unlock(&vp->v_interlock); 439182007Sroberto continue; 440182007Sroberto } else { 441182007Sroberto break; 442132451Sroberto } 443132451Sroberto } 444132451Sroberto } 445132451Sroberto 446132451Sroberto for (tvp = TAILQ_FIRST(&vnode_tmp_list); tvp; tvp = nvp) { 44754359Sroberto nvp = TAILQ_NEXT(tvp, v_freelist); 448132451Sroberto TAILQ_REMOVE(&vnode_tmp_list, tvp, v_freelist); 449132451Sroberto TAILQ_INSERT_TAIL(&vnode_free_list, tvp, v_freelist); 450132451Sroberto simple_unlock(&tvp->v_interlock); 451132451Sroberto } 452132451Sroberto 453132451Sroberto if (vp) { 454285612Sdelphij vp->v_flag |= VDOOMED; 455182007Sroberto TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 456182007Sroberto freevnodes--; 457132451Sroberto simple_unlock(&vnode_free_list_slock); 458132451Sroberto cache_purge(vp); 45954359Sroberto vp->v_lease = NULL; 460285612Sdelphij if (vp->v_type != VBAD) { 461285612Sdelphij vgonel(vp, p); 462285612Sdelphij } else { 463285612Sdelphij simple_unlock(&vp->v_interlock); 464285612Sdelphij } 465285612Sdelphij 466285612Sdelphij#ifdef DIAGNOSTIC 467285612Sdelphij { 468132451Sroberto int s; 469285612Sdelphij 470285612Sdelphij if (vp->v_data) 471285612Sdelphij panic("cleaned vnode isn't"); 472285612Sdelphij s = splbio(); 473285612Sdelphij if (vp->v_numoutput) 47454359Sroberto panic("Clean vnode has pending I/O's"); 475285612Sdelphij splx(s); 47654359Sroberto } 47754359Sroberto#endif 47854359Sroberto vp->v_flag = 0; 479285612Sdelphij vp->v_lastr = 0; 480285612Sdelphij vp->v_lastw = 0; 48154359Sroberto vp->v_lasta = 0; 48254359Sroberto vp->v_cstart = 0; 483285612Sdelphij vp->v_clen = 0; 484132451Sroberto vp->v_socket = 0; 485132451Sroberto vp->v_writecount = 0; /* XXX */ 486285612Sdelphij vp->v_maxio = 0; 487132451Sroberto } else { 488132451Sroberto simple_unlock(&vnode_free_list_slock); 489132451Sroberto vp = (struct vnode *) zalloc(vnode_zone); 490132451Sroberto bzero((char *) vp, sizeof *vp); 491132451Sroberto simple_lock_init(&vp->v_interlock); 492132451Sroberto vp->v_dd = vp; 493132451Sroberto cache_purge(vp); 494132451Sroberto LIST_INIT(&vp->v_cache_src); 495132451Sroberto TAILQ_INIT(&vp->v_cache_dst); 49654359Sroberto numvnodes++; 49754359Sroberto } 49854359Sroberto 49954359Sroberto TAILQ_INIT(&vp->v_cleanblkhd); 50054359Sroberto TAILQ_INIT(&vp->v_dirtyblkhd); 501182007Sroberto vp->v_type = VNON; 502330567Sgordon vp->v_tag = tag; 50354359Sroberto vp->v_op = vops; 50454359Sroberto insmntque(vp, mp); 50554359Sroberto *vpp = vp; 50654359Sroberto vp->v_usecount = 1; 50754359Sroberto vp->v_data = 0; 508285612Sdelphij splx(s); 509132451Sroberto 510132451Sroberto vfs_object_create(vp, p, p->p_ucred, TRUE); 51154359Sroberto return (0); 512285612Sdelphij} 51354359Sroberto 51454359Sroberto/* 51554359Sroberto * Move a vnode from one mount queue to another. 51654359Sroberto */ 51754359Srobertostatic void 51854359Srobertoinsmntque(vp, mp) 51954359Sroberto register struct vnode *vp; 52054359Sroberto register struct mount *mp; 52154359Sroberto{ 522330567Sgordon 523285612Sdelphij simple_lock(&mntvnode_slock); 52454359Sroberto /* 52554359Sroberto * Delete from old mount point vnode list, if on one. 52654359Sroberto */ 52754359Sroberto if (vp->v_mount != NULL) 528132451Sroberto LIST_REMOVE(vp, v_mntvnodes); 529132451Sroberto /* 530293650Sglebius * Insert into list of vnodes for the new mount point, if available. 531132451Sroberto */ 532293650Sglebius if ((vp->v_mount = mp) == NULL) { 533132451Sroberto simple_unlock(&mntvnode_slock); 534293650Sglebius return; 535285612Sdelphij } 536285612Sdelphij LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 537285612Sdelphij simple_unlock(&mntvnode_slock); 538285612Sdelphij} 539285612Sdelphij 54054359Sroberto/* 541106163Sroberto * Update outstanding I/O count and do wakeup if requested. 542132451Sroberto */ 54354359Srobertovoid 54454359Srobertovwakeup(bp) 54554359Sroberto register struct buf *bp; 54654359Sroberto{ 54754359Sroberto register struct vnode *vp; 54854359Sroberto 54954359Sroberto bp->b_flags &= ~B_WRITEINPROG; 55054359Sroberto if ((vp = bp->b_vp)) { 55154359Sroberto vp->v_numoutput--; 55254359Sroberto if (vp->v_numoutput < 0) 553285612Sdelphij panic("vwakeup: neg numoutput"); 554285612Sdelphij if ((vp->v_numoutput == 0) && (vp->v_flag & VBWAIT)) { 55554359Sroberto vp->v_flag &= ~VBWAIT; 55654359Sroberto wakeup((caddr_t) &vp->v_numoutput); 557285612Sdelphij } 558285612Sdelphij } 55954359Sroberto} 56054359Sroberto 56154359Sroberto/* 56254359Sroberto * Flush out and invalidate all buffers associated with a vnode. 56354359Sroberto * Called with the underlying object locked. 56454359Sroberto */ 56554359Srobertoint 56654359Srobertovinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 56754359Sroberto register struct vnode *vp; 56854359Sroberto int flags; 56954359Sroberto struct ucred *cred; 57054359Sroberto struct proc *p; 57154359Sroberto int slpflag, slptimeo; 572285612Sdelphij{ 573285612Sdelphij register struct buf *bp; 574285612Sdelphij struct buf *nbp, *blist; 57554359Sroberto int s, error; 576285612Sdelphij vm_object_t object; 57754359Sroberto 57854359Sroberto if (flags & V_SAVE) { 57954359Sroberto s = splbio(); 58054359Sroberto while (vp->v_numoutput) { 58154359Sroberto vp->v_flag |= VBWAIT; 58254359Sroberto tsleep((caddr_t)&vp->v_numoutput, 58354359Sroberto slpflag | (PRIBIO + 1), 58454359Sroberto "vinvlbuf", slptimeo); 58554359Sroberto } 58654359Sroberto if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) { 587293650Sglebius splx(s); 588293650Sglebius if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) != 0) 589293650Sglebius return (error); 590293650Sglebius s = splbio(); 59154359Sroberto if (vp->v_numoutput > 0 || 59254359Sroberto !TAILQ_EMPTY(&vp->v_dirtyblkhd)) 59354359Sroberto panic("vinvalbuf: dirty bufs"); 594285612Sdelphij } 595293650Sglebius splx(s); 596293650Sglebius } 597293650Sglebius s = splbio(); 598293650Sglebius for (;;) { 59954359Sroberto blist = TAILQ_FIRST(&vp->v_cleanblkhd); 600132451Sroberto if (!blist) 60154359Sroberto blist = TAILQ_FIRST(&vp->v_dirtyblkhd); 60254359Sroberto if (!blist) 60354359Sroberto break; 60454359Sroberto 60554359Sroberto for (bp = blist; bp; bp = nbp) { 60654359Sroberto nbp = TAILQ_NEXT(bp, b_vnbufs); 607285612Sdelphij if (bp->b_flags & B_BUSY) { 608294569Sdelphij bp->b_flags |= B_WANTED; 609294569Sdelphij error = tsleep((caddr_t) bp, 610294569Sdelphij slpflag | (PRIBIO + 4), "vinvalbuf", 611294569Sdelphij slptimeo); 612294569Sdelphij if (error) { 61354359Sroberto splx(s); 61454359Sroberto return (error); 61554359Sroberto } 61654359Sroberto break; 61754359Sroberto } 61854359Sroberto /* 61954359Sroberto * XXX Since there are no node locks for NFS, I 62054359Sroberto * believe there is a slight chance that a delayed 62154359Sroberto * write will occur while sleeping just above, so 62254359Sroberto * check for it. Note that vfs_bio_awrite expects 62354359Sroberto * buffers to reside on a queue, while VOP_BWRITE and 62454359Sroberto * brelse do not. 62554359Sroberto */ 62654359Sroberto if (((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) && 627285612Sdelphij (flags & V_SAVE)) { 62854359Sroberto 629294569Sdelphij if (bp->b_vp == vp) { 63054359Sroberto if (bp->b_flags & B_CLUSTEROK) { 63154359Sroberto vfs_bio_awrite(bp); 63254359Sroberto } else { 633285612Sdelphij bremfree(bp); 63454359Sroberto bp->b_flags |= (B_BUSY | B_ASYNC); 635285612Sdelphij VOP_BWRITE(bp); 636294569Sdelphij } 63754359Sroberto } else { 63854359Sroberto bremfree(bp); 639293650Sglebius bp->b_flags |= B_BUSY; 64054359Sroberto (void) VOP_BWRITE(bp); 641285612Sdelphij } 64254359Sroberto break; 64354359Sroberto } 644294569Sdelphij bremfree(bp); 645294569Sdelphij bp->b_flags |= (B_INVAL | B_NOCACHE | B_RELBUF | B_BUSY); 646294569Sdelphij bp->b_flags &= ~B_ASYNC; 647294569Sdelphij brelse(bp); 648294569Sdelphij } 649294569Sdelphij } 650294569Sdelphij 651294569Sdelphij while (vp->v_numoutput > 0) { 652316722Sdelphij vp->v_flag |= VBWAIT; 653294569Sdelphij tsleep(&vp->v_numoutput, PVM, "vnvlbv", 0); 654294569Sdelphij } 65554359Sroberto 65654359Sroberto splx(s); 65754359Sroberto 65854359Sroberto /* 65954359Sroberto * Destroy the copy in the VM cache, too. 66054359Sroberto */ 661293650Sglebius simple_lock(&vp->v_interlock); 662293650Sglebius object = vp->v_object; 66354359Sroberto if (object != NULL) { 66454359Sroberto vm_object_page_remove(object, 0, 0, 66554359Sroberto (flags & V_SAVE) ? TRUE : FALSE); 66654359Sroberto } 66754359Sroberto simple_unlock(&vp->v_interlock); 66854359Sroberto 66954359Sroberto if (!TAILQ_EMPTY(&vp->v_dirtyblkhd) || !TAILQ_EMPTY(&vp->v_cleanblkhd)) 67054359Sroberto panic("vinvalbuf: flush failed"); 67154359Sroberto return (0); 672301301Sdelphij} 67354359Sroberto 67454359Sroberto/* 67554359Sroberto * Truncate a file's buffer and pages to a specified length. This 67654359Sroberto * is in lieu of the old vinvalbuf mechanism, which performed unneeded 67754359Sroberto * sync activity. 67854359Sroberto */ 67954359Srobertoint 68054359Srobertovtruncbuf(vp, cred, p, length, blksize) 68154359Sroberto register struct vnode *vp; 68254359Sroberto struct ucred *cred; 68354359Sroberto struct proc *p; 684285612Sdelphij off_t length; 68554359Sroberto int blksize; 68654359Sroberto{ 68754359Sroberto register struct buf *bp; 68854359Sroberto struct buf *nbp; 68954359Sroberto int s, anyfreed; 69054359Sroberto int trunclbn; 69154359Sroberto 692285612Sdelphij /* 69354359Sroberto * Round up to the *next* lbn. 694301301Sdelphij */ 69554359Sroberto trunclbn = (length + blksize - 1) / blksize; 69654359Sroberto 69754359Sroberto s = splbio(); 69854359Srobertorestart: 69954359Sroberto anyfreed = 1; 700285612Sdelphij for (;anyfreed;) { 701285612Sdelphij anyfreed = 0; 70254359Sroberto for (bp = TAILQ_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 70354359Sroberto nbp = TAILQ_NEXT(bp, b_vnbufs); 70454359Sroberto if (bp->b_lblkno >= trunclbn) { 70554359Sroberto if (bp->b_flags & B_BUSY) { 706285612Sdelphij bp->b_flags |= B_WANTED; 707285612Sdelphij tsleep(bp, PRIBIO + 4, "vtrb1", 0); 70854359Sroberto goto restart; 70954359Sroberto } else { 71054359Sroberto bremfree(bp); 71154359Sroberto bp->b_flags |= (B_BUSY | B_INVAL | B_RELBUF); 712285612Sdelphij bp->b_flags &= ~B_ASYNC; 71354359Sroberto brelse(bp); 71454359Sroberto anyfreed = 1; 71554359Sroberto } 71654359Sroberto if (nbp && (((nbp->b_xflags & B_VNCLEAN) == 0)|| 717285612Sdelphij (nbp->b_vp != vp) || 71854359Sroberto (nbp->b_flags & B_DELWRI))) { 71954359Sroberto goto restart; 72054359Sroberto } 72154359Sroberto } 722285612Sdelphij } 72354359Sroberto 72454359Sroberto for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 72554359Sroberto nbp = TAILQ_NEXT(bp, b_vnbufs); 72654359Sroberto if (bp->b_lblkno >= trunclbn) { 72754359Sroberto if (bp->b_flags & B_BUSY) { 72854359Sroberto bp->b_flags |= B_WANTED; 72954359Sroberto tsleep(bp, PRIBIO + 4, "vtrb2", 0); 73054359Sroberto goto restart; 731285612Sdelphij } else { 73254359Sroberto bremfree(bp); 73354359Sroberto bp->b_flags |= (B_BUSY | B_INVAL | B_RELBUF); 73454359Sroberto bp->b_flags &= ~B_ASYNC; 73554359Sroberto brelse(bp); 73654359Sroberto anyfreed = 1; 73754359Sroberto } 73854359Sroberto if (nbp && (((nbp->b_xflags & B_VNDIRTY) == 0)|| 73954359Sroberto (nbp->b_vp != vp) || 74054359Sroberto (nbp->b_flags & B_DELWRI) == 0)) { 74154359Sroberto goto restart; 74254359Sroberto } 74354359Sroberto } 74454359Sroberto } 74554359Sroberto } 74654359Sroberto 74754359Sroberto if (length > 0) { 74854359Srobertorestartsync: 74954359Sroberto for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 75054359Sroberto nbp = TAILQ_NEXT(bp, b_vnbufs); 75154359Sroberto if ((bp->b_flags & B_DELWRI) && (bp->b_lblkno < 0)) { 75254359Sroberto if (bp->b_flags & B_BUSY) { 75354359Sroberto bp->b_flags |= B_WANTED; 754132451Sroberto tsleep(bp, PRIBIO, "vtrb3", 0); 755132451Sroberto } else { 756132451Sroberto bremfree(bp); 757132451Sroberto bp->b_flags |= B_BUSY; 758285612Sdelphij if (bp->b_vp == vp) { 759285612Sdelphij bp->b_flags |= B_ASYNC; 76054359Sroberto } else { 76154359Sroberto bp->b_flags &= ~B_ASYNC; 762293650Sglebius } 76354359Sroberto VOP_BWRITE(bp); 76454359Sroberto } 76554359Sroberto goto restartsync; 76654359Sroberto } 76754359Sroberto 76854359Sroberto } 76954359Sroberto } 77054359Sroberto 771285612Sdelphij while (vp->v_numoutput > 0) { 77254359Sroberto vp->v_flag |= VBWAIT; 773293650Sglebius tsleep(&vp->v_numoutput, PVM, "vbtrunc", 0); 77454359Sroberto } 77554359Sroberto 77654359Sroberto splx(s); 77754359Sroberto 778132451Sroberto vnode_pager_setsize(vp, length); 77954359Sroberto 78054359Sroberto return (0); 78154359Sroberto} 78254359Sroberto 78354359Sroberto/* 78454359Sroberto * Associate a buffer with a vnode. 78554359Sroberto */ 78654359Srobertovoid 78754359Srobertobgetvp(vp, bp) 78854359Sroberto register struct vnode *vp; 78954359Sroberto register struct buf *bp; 79054359Sroberto{ 79154359Sroberto int s; 79254359Sroberto 79354359Sroberto#if defined(DIAGNOSTIC) 79454359Sroberto if (bp->b_vp) 79554359Sroberto panic("bgetvp: not free"); 79654359Sroberto#endif 79754359Sroberto vhold(vp); 79854359Sroberto bp->b_vp = vp; 79954359Sroberto if (vp->v_type == VBLK || vp->v_type == VCHR) 800294569Sdelphij bp->b_dev = vp->v_rdev; 801294569Sdelphij else 80254359Sroberto bp->b_dev = NODEV; 803294569Sdelphij /* 804132451Sroberto * Insert onto list for new vnode. 805293650Sglebius */ 80654359Sroberto s = splbio(); 807285612Sdelphij bp->b_xflags |= B_VNCLEAN; 80854359Sroberto bp->b_xflags &= ~B_VNDIRTY; 80954359Sroberto TAILQ_INSERT_TAIL(&vp->v_cleanblkhd, bp, b_vnbufs); 810132451Sroberto splx(s); 811132451Sroberto} 812132451Sroberto 813132451Sroberto/* 814132451Sroberto * Disassociate a buffer from a vnode. 815285612Sdelphij */ 816285612Sdelphijvoid 817285612Sdelphijbrelvp(bp) 818132451Sroberto register struct buf *bp; 819285612Sdelphij{ 820132451Sroberto struct vnode *vp; 821132451Sroberto struct buflists *listheadp; 822132451Sroberto int s; 82354359Sroberto 82454359Sroberto#if defined(DIAGNOSTIC) 825132451Sroberto if (bp->b_vp == (struct vnode *) 0) 82654359Sroberto panic("brelvp: NULL"); 82754359Sroberto#endif 82854359Sroberto 82954359Sroberto /* 83054359Sroberto * Delete from old vnode list, if on one. 83154359Sroberto */ 83254359Sroberto vp = bp->b_vp; 83354359Sroberto s = splbio(); 83454359Sroberto if (bp->b_xflags & (B_VNDIRTY|B_VNCLEAN)) { 835285612Sdelphij if (bp->b_xflags & B_VNDIRTY) 83654359Sroberto listheadp = &vp->v_dirtyblkhd; 83754359Sroberto else 83854359Sroberto listheadp = &vp->v_cleanblkhd; 83954359Sroberto TAILQ_REMOVE(listheadp, bp, b_vnbufs); 84054359Sroberto bp->b_xflags &= ~(B_VNDIRTY|B_VNCLEAN); 84154359Sroberto } 842285612Sdelphij if ((vp->v_flag & VONWORKLST) && TAILQ_EMPTY(&vp->v_dirtyblkhd)) { 843285612Sdelphij vp->v_flag &= ~VONWORKLST; 844285612Sdelphij LIST_REMOVE(vp, v_synclist); 845285612Sdelphij } 846285612Sdelphij splx(s); 847285612Sdelphij bp->b_vp = (struct vnode *) 0; 848285612Sdelphij vdrop(vp); 849285612Sdelphij} 850285612Sdelphij 851285612Sdelphij/* 852285612Sdelphij * The workitem queue. 853285612Sdelphij * 854285612Sdelphij * It is useful to delay writes of file data and filesystem metadata 855285612Sdelphij * for tens of seconds so that quickly created and deleted files need 856285612Sdelphij * not waste disk bandwidth being created and removed. To realize this, 857285612Sdelphij * we append vnodes to a "workitem" queue. When running with a soft 858285612Sdelphij * updates implementation, most pending metadata dependencies should 859285612Sdelphij * not wait for more than a few seconds. Thus, mounted on block devices 860285612Sdelphij * are delayed only about a half the time that file data is delayed. 861285612Sdelphij * Similarly, directory updates are more critical, so are only delayed 86254359Sroberto * about a third the time that file data is delayed. Thus, there are 86354359Sroberto * SYNCER_MAXDELAY queues that are processed round-robin at a rate of 86454359Sroberto * one each second (driven off the filesystem syner process). The 86554359Sroberto * syncer_delayno variable indicates the next queue that is to be processed. 86654359Sroberto * Items that need to be processed soon are placed in this queue: 86754359Sroberto * 868293650Sglebius * syncer_workitem_pending[syncer_delayno] 869285612Sdelphij * 870293650Sglebius * A delay of fifteen seconds is done by placing the request fifteen 87154359Sroberto * entries later in the queue: 87254359Sroberto * 87354359Sroberto * syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask] 874285612Sdelphij * 875285612Sdelphij */ 876285612Sdelphij 877285612Sdelphij/* 878285612Sdelphij * Add an item to the syncer work queue. 879293650Sglebius */ 880285612Sdelphijvoid 88154359Srobertovn_syncer_add_to_worklist(vp, delay) 882285612Sdelphij struct vnode *vp; 88354359Sroberto int delay; 88454359Sroberto{ 88554359Sroberto int s, slot; 88654359Sroberto 88754359Sroberto s = splbio(); 888285612Sdelphij 889285612Sdelphij if (vp->v_flag & VONWORKLST) { 89054359Sroberto LIST_REMOVE(vp, v_synclist); 89154359Sroberto } 89254359Sroberto 89354359Sroberto if (delay > syncer_maxdelay - 2) 894182007Sroberto delay = syncer_maxdelay - 2; 89554359Sroberto slot = (syncer_delayno + delay) & syncer_mask; 89654359Sroberto 897182007Sroberto LIST_INSERT_HEAD(&syncer_workitem_pending[slot], vp, v_synclist); 89854359Sroberto vp->v_flag |= VONWORKLST; 899285612Sdelphij splx(s); 900285612Sdelphij} 90154359Sroberto 902285612Sdelphijstatic void sched_sync __P((void)); 903285612Sdelphijstatic struct proc *updateproc; 904285612Sdelphijstatic struct kproc_desc up_kp = { 905285612Sdelphij "syncer", 906285612Sdelphij sched_sync, 90754359Sroberto &updateproc 908285612Sdelphij}; 909285612SdelphijSYSINIT_KT(syncer, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp) 910285612Sdelphij 911285612Sdelphij/* 912285612Sdelphij * System filesystem synchronizer daemon. 913285612Sdelphij */ 914285612Sdelphijvoid 91554359Srobertosched_sync(void) 916285612Sdelphij{ 917285612Sdelphij struct synclist *slp; 91854359Sroberto struct vnode *vp; 91954359Sroberto long starttime; 920285612Sdelphij int s; 921285612Sdelphij struct proc *p = updateproc; 922285612Sdelphij 923285612Sdelphij for (;;) { 924285612Sdelphij starttime = time_second; 925285612Sdelphij 926285612Sdelphij /* 927285612Sdelphij * Push files whose dirty time has expired. 928285612Sdelphij */ 929285612Sdelphij s = splbio(); 930285612Sdelphij slp = &syncer_workitem_pending[syncer_delayno]; 931285612Sdelphij syncer_delayno += 1; 932285612Sdelphij if (syncer_delayno == syncer_maxdelay) 933285612Sdelphij syncer_delayno = 0; 934285612Sdelphij splx(s); 935285612Sdelphij 936285612Sdelphij while ((vp = LIST_FIRST(slp)) != NULL) { 937285612Sdelphij vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 938285612Sdelphij (void) VOP_FSYNC(vp, p->p_ucred, MNT_LAZY, p); 939285612Sdelphij VOP_UNLOCK(vp, 0, p); 940285612Sdelphij if (LIST_FIRST(slp) == vp) { 941285612Sdelphij if (TAILQ_EMPTY(&vp->v_dirtyblkhd) && 942293650Sglebius vp->v_type != VBLK) 943293650Sglebius panic("sched_sync: fsync failed"); 944285612Sdelphij /* 945285612Sdelphij * Move ourselves to the back of the sync list. 946285612Sdelphij */ 947285612Sdelphij LIST_REMOVE(vp, v_synclist); 948285612Sdelphij vn_syncer_add_to_worklist(vp, syncdelay); 949293650Sglebius } 950285612Sdelphij } 951285612Sdelphij 952285612Sdelphij /* 953285612Sdelphij * Do soft update processing. 954285612Sdelphij */ 95554359Sroberto if (bioops.io_sync) 95654359Sroberto (*bioops.io_sync)(NULL); 95754359Sroberto 95854359Sroberto /* 95954359Sroberto * The variable rushjob allows the kernel to speed up the 96054359Sroberto * processing of the filesystem syncer process. A rushjob 96154359Sroberto * value of N tells the filesystem syncer to process the next 96254359Sroberto * N seconds worth of work on its queue ASAP. Currently rushjob 96354359Sroberto * is used by the soft update code to speed up the filesystem 96454359Sroberto * syncer process when the incore state is getting so far 96554359Sroberto * ahead of the disk that the kernel memory pool is being 966293650Sglebius * threatened with exhaustion. 967293650Sglebius */ 968293650Sglebius if (rushjob > 0) { 969293650Sglebius rushjob -= 1; 970293650Sglebius continue; 971293650Sglebius } 972132451Sroberto /* 973132451Sroberto * If it has taken us less than a second to process the 97454359Sroberto * current work, then wait. Otherwise start right over 97554359Sroberto * again. We can still lose time if any single round 97654359Sroberto * takes more than two seconds, but it does not really 97754359Sroberto * matter as we are just trying to generally pace the 97854359Sroberto * filesystem activity. 979285612Sdelphij */ 98054359Sroberto if (time_second == starttime) 98154359Sroberto tsleep(&lbolt, PPAUSE, "syncer", 0); 98254359Sroberto } 98354359Sroberto} 98454359Sroberto 98554359Sroberto/* 98654359Sroberto * Associate a p-buffer with a vnode. 98754359Sroberto */ 98854359Srobertovoid 98954359Srobertopbgetvp(vp, bp) 99054359Sroberto register struct vnode *vp; 99154359Sroberto register struct buf *bp; 992106163Sroberto{ 99354359Sroberto#if defined(DIAGNOSTIC) 99454359Sroberto if (bp->b_vp) 99554359Sroberto panic("pbgetvp: not free"); 99654359Sroberto#endif 997293650Sglebius bp->b_vp = vp; 99854359Sroberto if (vp->v_type == VBLK || vp->v_type == VCHR) 999285612Sdelphij bp->b_dev = vp->v_rdev; 100054359Sroberto else 100154359Sroberto bp->b_dev = NODEV; 100254359Sroberto} 100354359Sroberto 100454359Sroberto/* 100554359Sroberto * Disassociate a p-buffer from a vnode. 100654359Sroberto */ 100754359Srobertovoid 100854359Srobertopbrelvp(bp) 100954359Sroberto register struct buf *bp; 101054359Sroberto{ 101154359Sroberto 1012285612Sdelphij#if defined(DIAGNOSTIC) 101354359Sroberto if (bp->b_vp == (struct vnode *) 0) 101454359Sroberto panic("pbrelvp: NULL"); 101554359Sroberto#endif 101654359Sroberto 1017132451Sroberto bp->b_vp = (struct vnode *) 0; 101854359Sroberto} 1019106163Sroberto 1020106163Sroberto/* 1021106163Sroberto * Reassign a buffer from one vnode to another. 1022106163Sroberto * Used to assign file specific control information 1023106163Sroberto * (indirect blocks) to the vnode to which they belong. 1024106163Sroberto */ 1025106163Srobertovoid 1026106163Srobertoreassignbuf(bp, newvp) 1027106163Sroberto register struct buf *bp; 1028106163Sroberto register struct vnode *newvp; 1029132451Sroberto{ 1030132451Sroberto struct buflists *listheadp; 1031132451Sroberto struct vnode *oldvp; 1032106163Sroberto int delay; 1033106163Sroberto int s; 1034106163Sroberto 1035132451Sroberto if (newvp == NULL) { 1036132451Sroberto printf("reassignbuf: NULL"); 1037132451Sroberto return; 1038132451Sroberto } 1039132451Sroberto 1040106163Sroberto s = splbio(); 1041106163Sroberto /* 1042106163Sroberto * Delete from old vnode list, if on one. 1043106163Sroberto */ 1044106163Sroberto if (bp->b_xflags & (B_VNDIRTY|B_VNCLEAN)) { 1045106163Sroberto oldvp = bp->b_vp; 1046106163Sroberto if (bp->b_xflags & B_VNDIRTY) 104754359Sroberto listheadp = &oldvp->v_dirtyblkhd; 104854359Sroberto else 104954359Sroberto listheadp = &oldvp->v_cleanblkhd; 1050285612Sdelphij TAILQ_REMOVE(listheadp, bp, b_vnbufs); 1051132451Sroberto bp->b_xflags &= ~(B_VNDIRTY|B_VNCLEAN); 1052132451Sroberto vdrop(oldvp); 1053132451Sroberto } 105454359Sroberto /* 1055285612Sdelphij * If dirty, put on list of dirty buffers; otherwise insert onto list 105654359Sroberto * of clean buffers. 1057285612Sdelphij */ 105854359Sroberto if (bp->b_flags & B_DELWRI) { 105954359Sroberto struct buf *tbp; 106054359Sroberto 1061285612Sdelphij listheadp = &newvp->v_dirtyblkhd; 106254359Sroberto if ((newvp->v_flag & VONWORKLST) == 0) { 106354359Sroberto switch (newvp->v_type) { 106454359Sroberto case VDIR: 1065285612Sdelphij delay = syncdelay / 3; 106654359Sroberto break; 106754359Sroberto case VBLK: 106854359Sroberto if (newvp->v_specmountpoint != NULL) { 1069285612Sdelphij delay = syncdelay / 2; 107054359Sroberto break; 107154359Sroberto } 1072285612Sdelphij /* fall through */ 107354359Sroberto default: 107454359Sroberto delay = syncdelay; 1075285612Sdelphij } 107654359Sroberto vn_syncer_add_to_worklist(newvp, delay); 107754359Sroberto } 107854359Sroberto bp->b_xflags |= B_VNDIRTY; 1079285612Sdelphij tbp = TAILQ_FIRST(listheadp); 108054359Sroberto if (tbp == NULL || 108154359Sroberto (bp->b_lblkno >= 0 && tbp->b_lblkno > bp->b_lblkno)) { 108254359Sroberto TAILQ_INSERT_HEAD(listheadp, bp, b_vnbufs); 108354359Sroberto } else { 108454359Sroberto if (bp->b_lblkno >= 0) { 108554359Sroberto struct buf *ttbp; 108654359Sroberto while ((ttbp = TAILQ_NEXT(tbp, b_vnbufs)) && 108754359Sroberto (ttbp->b_lblkno < bp->b_lblkno)) { 108854359Sroberto tbp = ttbp; 108954359Sroberto } 109054359Sroberto TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs); 109154359Sroberto } else { 109254359Sroberto TAILQ_INSERT_TAIL(listheadp, bp, b_vnbufs); 109354359Sroberto } 109454359Sroberto } 1095285612Sdelphij } else { 1096285612Sdelphij bp->b_xflags |= B_VNCLEAN; 109782498Sroberto TAILQ_INSERT_TAIL(&newvp->v_cleanblkhd, bp, b_vnbufs); 1098285612Sdelphij if ((newvp->v_flag & VONWORKLST) && 1099285612Sdelphij TAILQ_EMPTY(&newvp->v_dirtyblkhd)) { 110082498Sroberto newvp->v_flag &= ~VONWORKLST; 1101285612Sdelphij LIST_REMOVE(newvp, v_synclist); 1102285612Sdelphij } 1103285612Sdelphij } 110482498Sroberto bp->b_vp = newvp; 110582498Sroberto vhold(bp->b_vp); 110682498Sroberto splx(s); 110754359Sroberto} 1108285612Sdelphij 110954359Sroberto/* 111054359Sroberto * Create a vnode for a block device. 111154359Sroberto * Used for mounting the root file system. 1112132451Sroberto */ 111354359Srobertoint 111454359Srobertobdevvp(dev, vpp) 111554359Sroberto dev_t dev; 111654359Sroberto struct vnode **vpp; 111754359Sroberto{ 111854359Sroberto register struct vnode *vp; 111954359Sroberto struct vnode *nvp; 112054359Sroberto int error; 112154359Sroberto 112254359Sroberto /* XXX 255 is for mfs. */ 112354359Sroberto if (dev == NODEV || (major(dev) != 255 && (major(dev) >= nblkdev || 112454359Sroberto bdevsw[major(dev)] == NULL))) { 112554359Sroberto *vpp = NULLVP; 112654359Sroberto return (ENXIO); 112754359Sroberto } 1128132451Sroberto error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp); 112954359Sroberto if (error) { 113054359Sroberto *vpp = NULLVP; 113154359Sroberto return (error); 113254359Sroberto } 113354359Sroberto vp = nvp; 113454359Sroberto vp->v_type = VBLK; 113554359Sroberto if ((nvp = checkalias(vp, dev, (struct mount *)0)) != NULL) { 113654359Sroberto vput(vp); 113754359Sroberto vp = nvp; 1138182007Sroberto } 113954359Sroberto *vpp = vp; 114054359Sroberto return (0); 1141132451Sroberto} 1142132451Sroberto 114354359Sroberto/* 114454359Sroberto * Check to see if the new vnode represents a special device 1145132451Sroberto * for which we already have a vnode (either because of 114654359Sroberto * bdevvp() or because of a different vnode representing 114754359Sroberto * the same block device). If such an alias exists, deallocate 114854359Sroberto * the existing contents and return the aliased vnode. The 1149285612Sdelphij * caller is responsible for filling it with its new contents. 1150285612Sdelphij */ 1151285612Sdelphijstruct vnode * 1152285612Sdelphijcheckalias(nvp, nvp_rdev, mp) 1153285612Sdelphij register struct vnode *nvp; 115454359Sroberto dev_t nvp_rdev; 115554359Sroberto struct mount *mp; 115654359Sroberto{ 115754359Sroberto struct proc *p = curproc; /* XXX */ 115854359Sroberto struct vnode *vp; 115954359Sroberto struct vnode **vpp; 116054359Sroberto 116154359Sroberto if (nvp->v_type != VBLK && nvp->v_type != VCHR) 116254359Sroberto return (NULLVP); 116354359Sroberto 116454359Sroberto vpp = &speclisth[SPECHASH(nvp_rdev)]; 116554359Srobertoloop: 116654359Sroberto simple_lock(&spechash_slock); 116754359Sroberto for (vp = *vpp; vp; vp = vp->v_specnext) { 116854359Sroberto if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) 116954359Sroberto continue; 117054359Sroberto /* 117154359Sroberto * Alias, but not in use, so flush it out. 117254359Sroberto * Only alias active device nodes. 117354359Sroberto * Not sure why we don't re-use this like we do below. 117454359Sroberto */ 117554359Sroberto simple_lock(&vp->v_interlock); 117654359Sroberto if (vp->v_usecount == 0) { 117754359Sroberto simple_unlock(&spechash_slock); 1178132451Sroberto vgonel(vp, p); 1179132451Sroberto goto loop; 1180132451Sroberto } 118154359Sroberto if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) { 118254359Sroberto /* 118354359Sroberto * It dissappeared, and we may have slept. 118454359Sroberto * Restart from the beginning 118554359Sroberto */ 118654359Sroberto simple_unlock(&spechash_slock); 1187132451Sroberto goto loop; 1188132451Sroberto } 1189132451Sroberto break; 1190132451Sroberto } 1191132451Sroberto /* 1192132451Sroberto * It would be a lot clearer what is going on here if 1193132451Sroberto * this had been expressed as: 1194132451Sroberto * if ( vp && (vp->v_tag == VT_NULL)) 1195132451Sroberto * and the clauses had been swapped. 119654359Sroberto */ 1197132451Sroberto if (vp == NULL || vp->v_tag != VT_NON) { 119854359Sroberto /* 119954359Sroberto * Put the new vnode into the hash chain. 1200182007Sroberto * and if there was an alias, connect them. 1201182007Sroberto */ 1202182007Sroberto MALLOC(nvp->v_specinfo, struct specinfo *, 1203182007Sroberto sizeof(struct specinfo), M_VNODE, M_WAITOK); 1204182007Sroberto nvp->v_rdev = nvp_rdev; 1205182007Sroberto nvp->v_hashchain = vpp; 1206182007Sroberto nvp->v_specnext = *vpp; 1207182007Sroberto nvp->v_specmountpoint = NULL; 1208182007Sroberto simple_unlock(&spechash_slock); 1209182007Sroberto *vpp = nvp; 1210182007Sroberto if (vp != NULLVP) { 1211182007Sroberto nvp->v_flag |= VALIASED; 1212182007Sroberto vp->v_flag |= VALIASED; 1213182007Sroberto vput(vp); 1214182007Sroberto } 1215132451Sroberto return (NULLVP); 121654359Sroberto } 121754359Sroberto /* 121854359Sroberto * if ( vp && (vp->v_tag == VT_NULL)) 121954359Sroberto * We have a vnode alias, but it is a trashed. 122054359Sroberto * Make it look like it's newley allocated. (by getnewvnode()) 122154359Sroberto * The caller should use this instead. 122254359Sroberto */ 122354359Sroberto simple_unlock(&spechash_slock); 122454359Sroberto VOP_UNLOCK(vp, 0, p); 122554359Sroberto simple_lock(&vp->v_interlock); 122654359Sroberto vclean(vp, 0, p); 122754359Sroberto vp->v_op = nvp->v_op; 122854359Sroberto vp->v_tag = nvp->v_tag; 122954359Sroberto nvp->v_type = VNON; 123054359Sroberto insmntque(vp, mp); 123154359Sroberto return (vp); 123254359Sroberto} 123354359Sroberto 123454359Sroberto/* 123554359Sroberto * Grab a particular vnode from the free list, increment its 123654359Sroberto * reference count and lock it. The vnode lock bit is set the 123754359Sroberto * vnode is being eliminated in vgone. The process is awakened 123854359Sroberto * when the transition is completed, and an error returned to 123954359Sroberto * indicate that the vnode is no longer usable (possibly having 124054359Sroberto * been changed to a new file system type). 124154359Sroberto */ 124254359Srobertoint 124354359Srobertovget(vp, flags, p) 1244132451Sroberto register struct vnode *vp; 1245132451Sroberto int flags; 1246132451Sroberto struct proc *p; 124754359Sroberto{ 124854359Sroberto int error; 124954359Sroberto 125054359Sroberto /* 125154359Sroberto * If the vnode is in the process of being cleaned out for 125254359Sroberto * another use, we wait for the cleaning to finish and then 125354359Sroberto * return failure. Cleaning is determined by checking that 125454359Sroberto * the VXLOCK flag is set. 125554359Sroberto */ 125654359Sroberto if ((flags & LK_INTERLOCK) == 0) { 125754359Sroberto simple_lock(&vp->v_interlock); 125854359Sroberto } 125954359Sroberto if (vp->v_flag & VXLOCK) { 126054359Sroberto vp->v_flag |= VXWANT; 126154359Sroberto simple_unlock(&vp->v_interlock); 126254359Sroberto tsleep((caddr_t)vp, PINOD, "vget", 0); 126354359Sroberto return (ENOENT); 126454359Sroberto } 126554359Sroberto 126654359Sroberto vp->v_usecount++; 126754359Sroberto 126854359Sroberto if (VSHOULDBUSY(vp)) 126954359Sroberto vbusy(vp); 127054359Sroberto if (flags & LK_TYPE_MASK) { 127154359Sroberto if ((error = vn_lock(vp, flags | LK_INTERLOCK, p)) != 0) { 127254359Sroberto /* 127354359Sroberto * must expand vrele here because we do not want 127454359Sroberto * to call VOP_INACTIVE if the reference count 127554359Sroberto * drops back to zero since it was never really 127654359Sroberto * active. We must remove it from the free list 127754359Sroberto * before sleeping so that multiple processes do 127854359Sroberto * not try to recycle it. 127954359Sroberto */ 128054359Sroberto simple_lock(&vp->v_interlock); 128154359Sroberto vp->v_usecount--; 128254359Sroberto if (VSHOULDFREE(vp)) 128354359Sroberto vfree(vp); 128454359Sroberto simple_unlock(&vp->v_interlock); 128554359Sroberto } 128654359Sroberto return (error); 128754359Sroberto } 128854359Sroberto simple_unlock(&vp->v_interlock); 128954359Sroberto return (0); 129054359Sroberto} 129154359Sroberto 129254359Srobertovoid 129354359Srobertovref(struct vnode *vp) 129454359Sroberto{ 1295293650Sglebius simple_lock(&vp->v_interlock); 129654359Sroberto vp->v_usecount++; 129754359Sroberto simple_unlock(&vp->v_interlock); 129854359Sroberto} 129954359Sroberto 130054359Sroberto/* 130154359Sroberto * Vnode put/release. 130254359Sroberto * If count drops to zero, call inactive routine and return to freelist. 130354359Sroberto */ 130454359Srobertovoid 130554359Srobertovrele(vp) 130654359Sroberto struct vnode *vp; 130754359Sroberto{ 130854359Sroberto struct proc *p = curproc; /* XXX */ 130954359Sroberto 131054359Sroberto#ifdef DIAGNOSTIC 131154359Sroberto if (vp == NULL) 131254359Sroberto panic("vrele: null vp"); 131354359Sroberto#endif 131454359Sroberto simple_lock(&vp->v_interlock); 131554359Sroberto 131654359Sroberto if (vp->v_usecount > 1) { 131754359Sroberto 131854359Sroberto vp->v_usecount--; 131954359Sroberto simple_unlock(&vp->v_interlock); 132054359Sroberto 132154359Sroberto return; 132254359Sroberto } 132354359Sroberto 132454359Sroberto if (vp->v_usecount == 1) { 132554359Sroberto 132654359Sroberto vp->v_usecount--; 132754359Sroberto if (VSHOULDFREE(vp)) 132854359Sroberto vfree(vp); 132954359Sroberto /* 133054359Sroberto * If we are doing a vput, the node is already locked, and we must 133154359Sroberto * call VOP_INACTIVE with the node locked. So, in the case of 133254359Sroberto * vrele, we explicitly lock the vnode before calling VOP_INACTIVE. 133354359Sroberto */ 133454359Sroberto if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) { 133554359Sroberto VOP_INACTIVE(vp, p); 133654359Sroberto } 133754359Sroberto 133854359Sroberto } else { 133954359Sroberto#ifdef DIAGNOSTIC 134054359Sroberto vprint("vrele: negative ref count", vp); 134154359Sroberto simple_unlock(&vp->v_interlock); 134254359Sroberto#endif 134354359Sroberto panic("vrele: negative ref cnt"); 134454359Sroberto } 134554359Sroberto} 134654359Sroberto 134754359Srobertovoid 134854359Srobertovput(vp) 134954359Sroberto struct vnode *vp; 1350132451Sroberto{ 135154359Sroberto struct proc *p = curproc; /* XXX */ 1352132451Sroberto 1353182007Sroberto#ifdef DIAGNOSTIC 1354182007Sroberto if (vp == NULL) 1355182007Sroberto panic("vput: null vp"); 1356132451Sroberto#endif 1357132451Sroberto 1358132451Sroberto simple_lock(&vp->v_interlock); 135954359Sroberto 136054359Sroberto if (vp->v_usecount > 1) { 136154359Sroberto 136254359Sroberto vp->v_usecount--; 136354359Sroberto VOP_UNLOCK(vp, LK_INTERLOCK, p); 136454359Sroberto return; 136554359Sroberto 136654359Sroberto } 136754359Sroberto 136854359Sroberto if (vp->v_usecount == 1) { 136954359Sroberto 137054359Sroberto vp->v_usecount--; 1371285612Sdelphij if (VSHOULDFREE(vp)) 1372182007Sroberto vfree(vp); 1373182007Sroberto /* 1374182007Sroberto * If we are doing a vput, the node is already locked, and we must 1375182007Sroberto * call VOP_INACTIVE with the node locked. So, in the case of 137654359Sroberto * vrele, we explicitly lock the vnode before calling VOP_INACTIVE. 137754359Sroberto */ 1378182007Sroberto simple_unlock(&vp->v_interlock); 1379132451Sroberto VOP_INACTIVE(vp, p); 1380132451Sroberto 1381132451Sroberto } else { 1382132451Sroberto#ifdef DIAGNOSTIC 1383132451Sroberto vprint("vput: negative ref count", vp); 1384132451Sroberto#endif 1385132451Sroberto panic("vput: negative ref cnt"); 1386132451Sroberto } 138754359Sroberto} 138854359Sroberto 138954359Sroberto/* 1390182007Sroberto * Somebody doesn't want the vnode recycled. 1391182007Sroberto */ 139254359Srobertovoid 139354359Srobertovhold(vp) 139454359Sroberto register struct vnode *vp; 139554359Sroberto{ 139654359Sroberto int s; 139754359Sroberto 139854359Sroberto s = splbio(); 139954359Sroberto vp->v_holdcnt++; 140054359Sroberto if (VSHOULDBUSY(vp)) 140154359Sroberto vbusy(vp); 140254359Sroberto splx(s); 140354359Sroberto} 140454359Sroberto 140554359Sroberto/* 140654359Sroberto * One less who cares about this vnode. 140754359Sroberto */ 1408293650Sglebiusvoid 140954359Srobertovdrop(vp) 141054359Sroberto register struct vnode *vp; 141154359Sroberto{ 1412182007Sroberto int s; 141354359Sroberto 141454359Sroberto s = splbio(); 141554359Sroberto if (vp->v_holdcnt <= 0) 141654359Sroberto panic("vdrop: holdcnt"); 141754359Sroberto vp->v_holdcnt--; 141854359Sroberto if (VSHOULDFREE(vp)) 141954359Sroberto vfree(vp); 1420132451Sroberto splx(s); 1421132451Sroberto} 1422132451Sroberto 1423132451Sroberto/* 1424132451Sroberto * Remove any vnodes in the vnode table belonging to mount point mp. 1425132451Sroberto * 1426132451Sroberto * If MNT_NOFORCE is specified, there should not be any active ones, 1427132451Sroberto * return error if any are found (nb: this is a user error, not a 1428132451Sroberto * system error). If MNT_FORCE is specified, detach any active vnodes 1429132451Sroberto * that are found. 1430132451Sroberto */ 143154359Sroberto#ifdef DIAGNOSTIC 143254359Srobertostatic int busyprt = 0; /* print out busy vnodes */ 143354359SrobertoSYSCTL_INT(_debug, OID_AUTO, busyprt, CTLFLAG_RW, &busyprt, 0, ""); 143454359Sroberto#endif 143554359Sroberto 143654359Srobertoint 143754359Srobertovflush(mp, skipvp, flags) 143854359Sroberto struct mount *mp; 143954359Sroberto struct vnode *skipvp; 144054359Sroberto int flags; 144154359Sroberto{ 144254359Sroberto struct proc *p = curproc; /* XXX */ 144354359Sroberto struct vnode *vp, *nvp; 1444285612Sdelphij int busy = 0; 1445132451Sroberto 1446132451Sroberto simple_lock(&mntvnode_slock); 144754359Srobertoloop: 144854359Sroberto for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { 1449132451Sroberto /* 145054359Sroberto * Make sure this vnode wasn't reclaimed in getnewvnode(). 1451285612Sdelphij * Start over if it has (it won't be on the list anymore). 1452132451Sroberto */ 1453132451Sroberto if (vp->v_mount != mp) 1454132451Sroberto goto loop; 1455132451Sroberto nvp = vp->v_mntvnodes.le_next; 1456132451Sroberto /* 1457285612Sdelphij * Skip over a selected vnode. 1458285612Sdelphij */ 1459285612Sdelphij if (vp == skipvp) 1460285612Sdelphij continue; 146154359Sroberto 1462285612Sdelphij simple_lock(&vp->v_interlock); 1463285612Sdelphij /* 1464285612Sdelphij * Skip over a vnodes marked VSYSTEM. 146554359Sroberto */ 1466182007Sroberto if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { 1467289997Sglebius simple_unlock(&vp->v_interlock); 1468285612Sdelphij continue; 1469285612Sdelphij } 1470285612Sdelphij /* 1471285612Sdelphij * If WRITECLOSE is set, only flush out regular file vnodes 1472285612Sdelphij * open for writing. 1473285612Sdelphij */ 1474285612Sdelphij if ((flags & WRITECLOSE) && 1475285612Sdelphij (vp->v_writecount == 0 || vp->v_type != VREG)) { 1476285612Sdelphij simple_unlock(&vp->v_interlock); 1477285612Sdelphij continue; 147854359Sroberto } 147954359Sroberto 1480285612Sdelphij /* 1481285612Sdelphij * With v_usecount == 0, all we need to do is clear out the 1482285612Sdelphij * vnode data structures and we are done. 148354359Sroberto */ 148454359Sroberto if (vp->v_usecount == 0) { 1485285612Sdelphij simple_unlock(&mntvnode_slock); 148654359Sroberto vgonel(vp, p); 148754359Sroberto simple_lock(&mntvnode_slock); 148854359Sroberto continue; 148954359Sroberto } 1490285612Sdelphij 149154359Sroberto /* 1492285612Sdelphij * If FORCECLOSE is set, forcibly close the vnode. For block 149354359Sroberto * or character devices, revert to an anonymous device. For 149454359Sroberto * all other files, just kill them. 1495285612Sdelphij */ 1496285612Sdelphij if (flags & FORCECLOSE) { 1497285612Sdelphij simple_unlock(&mntvnode_slock); 1498132451Sroberto if (vp->v_type != VBLK && vp->v_type != VCHR) { 1499285612Sdelphij vgonel(vp, p); 1500285612Sdelphij } else { 150154359Sroberto vclean(vp, 0, p); 150254359Sroberto vp->v_op = spec_vnodeop_p; 150354359Sroberto insmntque(vp, (struct mount *) 0); 150454359Sroberto } 150554359Sroberto simple_lock(&mntvnode_slock); 150654359Sroberto continue; 150754359Sroberto } 150854359Sroberto#ifdef DIAGNOSTIC 150954359Sroberto if (busyprt) 151054359Sroberto vprint("vflush: busy vnode", vp); 151154359Sroberto#endif 151254359Sroberto simple_unlock(&vp->v_interlock); 151354359Sroberto busy++; 151454359Sroberto } 151554359Sroberto simple_unlock(&mntvnode_slock); 151654359Sroberto if (busy) 151754359Sroberto return (EBUSY); 151854359Sroberto return (0); 1519182007Sroberto} 1520285612Sdelphij 1521285612Sdelphij/* 1522285612Sdelphij * Disassociate the underlying file system from a vnode. 1523285612Sdelphij */ 152454359Srobertostatic void 152554359Srobertovclean(vp, flags, p) 1526182007Sroberto struct vnode *vp; 152754359Sroberto int flags; 152854359Sroberto struct proc *p; 1529285612Sdelphij{ 153054359Sroberto int active; 1531285612Sdelphij vm_object_t obj; 1532285612Sdelphij 153354359Sroberto /* 1534285612Sdelphij * Check to see if the vnode is in use. If so we have to reference it 1535182007Sroberto * before we clean it out so that its count cannot fall to zero and 1536182007Sroberto * generate a race against ourselves to recycle it. 1537285612Sdelphij */ 1538285612Sdelphij if ((active = vp->v_usecount)) 153954359Sroberto vp->v_usecount++; 154054359Sroberto 1541182007Sroberto /* 1542285612Sdelphij * Prevent the vnode from being recycled or brought into use while we 1543182007Sroberto * clean it out. 1544285612Sdelphij */ 1545182007Sroberto if (vp->v_flag & VXLOCK) 1546182007Sroberto panic("vclean: deadlock"); 1547285612Sdelphij vp->v_flag |= VXLOCK; 1548285612Sdelphij /* 1549285612Sdelphij * Even if the count is zero, the VOP_INACTIVE routine may still 1550285612Sdelphij * have the object locked while it cleans it out. The VOP_LOCK 155154359Sroberto * ensures that the VOP_INACTIVE routine is done with its work. 155254359Sroberto * For active vnodes, it ensures that no other activity can 155354359Sroberto * occur while the underlying object is being cleaned out. 1554182007Sroberto */ 1555182007Sroberto VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p); 1556285612Sdelphij 1557285612Sdelphij /* 155854359Sroberto * Clean out any buffers associated with the vnode. 1559182007Sroberto */ 1560285612Sdelphij vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0); 1561285612Sdelphij if (obj = vp->v_object) { 156254359Sroberto if (obj->ref_count == 0) { 156354359Sroberto /* 1564285612Sdelphij * This is a normal way of shutting down the object/vnode 156554359Sroberto * association. 156654359Sroberto */ 156754359Sroberto vm_object_terminate(obj); 156854359Sroberto } else { 156954359Sroberto /* 157054359Sroberto * Woe to the process that tries to page now :-). 157154359Sroberto */ 157254359Sroberto vm_pager_deallocate(obj); 157354359Sroberto } 157454359Sroberto } 157554359Sroberto 157654359Sroberto /* 157754359Sroberto * If purging an active vnode, it must be closed and 157854359Sroberto * deactivated before being reclaimed. Note that the 1579285612Sdelphij * VOP_INACTIVE will unlock the vnode. 1580285612Sdelphij */ 158154359Sroberto if (active) { 158254359Sroberto if (flags & DOCLOSE) 158354359Sroberto VOP_CLOSE(vp, IO_NDELAY, NOCRED, p); 158454359Sroberto VOP_INACTIVE(vp, p); 158554359Sroberto } else { 158654359Sroberto /* 158754359Sroberto * Any other processes trying to obtain this lock must first 158854359Sroberto * wait for VXLOCK to clear, then call the new lock operation. 158954359Sroberto */ 159054359Sroberto VOP_UNLOCK(vp, 0, p); 159154359Sroberto } 159254359Sroberto /* 159354359Sroberto * Reclaim the vnode. 159454359Sroberto */ 1595132451Sroberto if (VOP_RECLAIM(vp, p)) 159654359Sroberto panic("vclean: cannot reclaim"); 1597132451Sroberto 159854359Sroberto if (active) 159954359Sroberto vrele(vp); 1600182007Sroberto 1601132451Sroberto cache_purge(vp); 1602132451Sroberto if (vp->v_vnlock) { 1603132451Sroberto#if 0 /* This is the only place we have LK_DRAINED in the entire kernel ??? */ 160454359Sroberto#ifdef DIAGNOSTIC 160554359Sroberto if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0) 160654359Sroberto vprint("vclean: lock not drained", vp); 160754359Sroberto#endif 160854359Sroberto#endif 160954359Sroberto FREE(vp->v_vnlock, M_VNODE); 161054359Sroberto vp->v_vnlock = NULL; 161154359Sroberto } 161254359Sroberto 161354359Sroberto if (VSHOULDFREE(vp)) 161454359Sroberto vfree(vp); 161554359Sroberto 161654359Sroberto /* 161754359Sroberto * Done with purge, notify sleepers of the grim news. 161854359Sroberto */ 161954359Sroberto vp->v_op = dead_vnodeop_p; 162054359Sroberto vn_pollgone(vp); 162154359Sroberto vp->v_tag = VT_NON; 162254359Sroberto vp->v_flag &= ~VXLOCK; 162354359Sroberto if (vp->v_flag & VXWANT) { 162454359Sroberto vp->v_flag &= ~VXWANT; 162554359Sroberto wakeup((caddr_t) vp); 162654359Sroberto } 162754359Sroberto} 162854359Sroberto 162954359Sroberto/* 163054359Sroberto * Eliminate all activity associated with the requested vnode 163154359Sroberto * and with all vnodes aliased to the requested vnode. 163254359Sroberto */ 163354359Srobertoint 163454359Srobertovop_revoke(ap) 163554359Sroberto struct vop_revoke_args /* { 163654359Sroberto struct vnode *a_vp; 163754359Sroberto int a_flags; 163854359Sroberto } */ *ap; 163954359Sroberto{ 164054359Sroberto struct vnode *vp, *vq; 164154359Sroberto struct proc *p = curproc; /* XXX */ 164254359Sroberto 164354359Sroberto#ifdef DIAGNOSTIC 164454359Sroberto if ((ap->a_flags & REVOKEALL) == 0) 164554359Sroberto panic("vop_revoke"); 164654359Sroberto#endif 164754359Sroberto 164854359Sroberto vp = ap->a_vp; 164954359Sroberto simple_lock(&vp->v_interlock); 165054359Sroberto 165154359Sroberto if (vp->v_flag & VALIASED) { 165254359Sroberto /* 165354359Sroberto * If a vgone (or vclean) is already in progress, 165454359Sroberto * wait until it is done and return. 165554359Sroberto */ 165654359Sroberto if (vp->v_flag & VXLOCK) { 165754359Sroberto vp->v_flag |= VXWANT; 165854359Sroberto simple_unlock(&vp->v_interlock); 165954359Sroberto tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0); 166054359Sroberto return (0); 166154359Sroberto } 166254359Sroberto /* 166354359Sroberto * Ensure that vp will not be vgone'd while we 166454359Sroberto * are eliminating its aliases. 166554359Sroberto */ 166654359Sroberto vp->v_flag |= VXLOCK; 166754359Sroberto simple_unlock(&vp->v_interlock); 166854359Sroberto while (vp->v_flag & VALIASED) { 166954359Sroberto simple_lock(&spechash_slock); 167054359Sroberto for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 167154359Sroberto if (vq->v_rdev != vp->v_rdev || 167254359Sroberto vq->v_type != vp->v_type || vp == vq) 167354359Sroberto continue; 167454359Sroberto simple_unlock(&spechash_slock); 167554359Sroberto vgone(vq); 167654359Sroberto break; 167754359Sroberto } 1678132451Sroberto if (vq == NULLVP) { 1679132451Sroberto simple_unlock(&spechash_slock); 168054359Sroberto } 168154359Sroberto } 168254359Sroberto /* 168354359Sroberto * Remove the lock so that vgone below will 168454359Sroberto * really eliminate the vnode after which time 1685132451Sroberto * vgone will awaken any sleepers. 1686132451Sroberto */ 1687132451Sroberto simple_lock(&vp->v_interlock); 1688132451Sroberto vp->v_flag &= ~VXLOCK; 1689132451Sroberto if (vp->v_flag & VXWANT) { 1690132451Sroberto vp->v_flag &= ~VXWANT; 1691132451Sroberto wakeup(vp); 1692132451Sroberto } 1693132451Sroberto } 1694132451Sroberto vgonel(vp, p); 1695132451Sroberto return (0); 1696132451Sroberto} 1697132451Sroberto 1698132451Sroberto/* 1699132451Sroberto * Recycle an unused vnode to the front of the free list. 1700132451Sroberto * Release the passed interlock if the vnode will be recycled. 1701132451Sroberto */ 1702132451Srobertoint 1703132451Srobertovrecycle(vp, inter_lkp, p) 1704132451Sroberto struct vnode *vp; 170554359Sroberto struct simplelock *inter_lkp; 170654359Sroberto struct proc *p; 170754359Sroberto{ 170854359Sroberto 170954359Sroberto simple_lock(&vp->v_interlock); 171054359Sroberto if (vp->v_usecount == 0) { 171154359Sroberto if (inter_lkp) { 171254359Sroberto simple_unlock(inter_lkp); 171354359Sroberto } 171454359Sroberto vgonel(vp, p); 171554359Sroberto return (1); 171654359Sroberto } 171754359Sroberto simple_unlock(&vp->v_interlock); 171854359Sroberto return (0); 171954359Sroberto} 172054359Sroberto 172154359Sroberto/* 172254359Sroberto * Eliminate all activity associated with a vnode 172354359Sroberto * in preparation for reuse. 172454359Sroberto */ 172554359Srobertovoid 1726182007Srobertovgone(vp) 172754359Sroberto register struct vnode *vp; 1728182007Sroberto{ 1729182007Sroberto struct proc *p = curproc; /* XXX */ 173054359Sroberto 173154359Sroberto simple_lock(&vp->v_interlock); 173254359Sroberto vgonel(vp, p); 173354359Sroberto} 1734182007Sroberto 173554359Sroberto/* 173654359Sroberto * vgone, with the vp interlock held. 173754359Sroberto */ 173854359Srobertostatic void 173954359Srobertovgonel(vp, p) 174054359Sroberto struct vnode *vp; 174154359Sroberto struct proc *p; 174254359Sroberto{ 174354359Sroberto int s; 174454359Sroberto struct vnode *vq; 174554359Sroberto struct vnode *vx; 174654359Sroberto 174754359Sroberto /* 1748285612Sdelphij * If a vgone (or vclean) is already in progress, 1749285612Sdelphij * wait until it is done and return. 1750285612Sdelphij */ 175154359Sroberto if (vp->v_flag & VXLOCK) { 1752285612Sdelphij vp->v_flag |= VXWANT; 1753285612Sdelphij simple_unlock(&vp->v_interlock); 1754285612Sdelphij tsleep((caddr_t)vp, PINOD, "vgone", 0); 1755285612Sdelphij return; 1756285612Sdelphij } 1757285612Sdelphij 1758285612Sdelphij /* 1759285612Sdelphij * Clean out the filesystem specific data. 1760285612Sdelphij */ 1761285612Sdelphij vclean(vp, DOCLOSE, p); 1762285612Sdelphij simple_lock(&vp->v_interlock); 1763285612Sdelphij 1764285612Sdelphij /* 1765285612Sdelphij * Delete from old mount point vnode list, if on one. 1766285612Sdelphij */ 1767285612Sdelphij if (vp->v_mount != NULL) 1768285612Sdelphij insmntque(vp, (struct mount *)0); 1769285612Sdelphij /* 1770285612Sdelphij * If special device, remove it from special device alias list 1771285612Sdelphij * if it is on one. 1772285612Sdelphij */ 1773285612Sdelphij if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) { 1774285612Sdelphij simple_lock(&spechash_slock); 177554359Sroberto if (*vp->v_hashchain == vp) { 177654359Sroberto *vp->v_hashchain = vp->v_specnext; 177754359Sroberto } else { 177854359Sroberto for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 177954359Sroberto if (vq->v_specnext != vp) 178054359Sroberto continue; 178154359Sroberto vq->v_specnext = vp->v_specnext; 178254359Sroberto break; 178354359Sroberto } 178454359Sroberto if (vq == NULL) 178554359Sroberto panic("missing bdev"); 178654359Sroberto } 178754359Sroberto if (vp->v_flag & VALIASED) { 178854359Sroberto vx = NULL; 178954359Sroberto for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 179054359Sroberto if (vq->v_rdev != vp->v_rdev || 179154359Sroberto vq->v_type != vp->v_type) 179254359Sroberto continue; 179354359Sroberto if (vx) 179454359Sroberto break; 179554359Sroberto vx = vq; 179654359Sroberto } 1797285612Sdelphij if (vx == NULL) 1798285612Sdelphij panic("missing alias"); 1799285612Sdelphij if (vq == NULL) 1800285612Sdelphij vx->v_flag &= ~VALIASED; 1801285612Sdelphij vp->v_flag &= ~VALIASED; 1802285612Sdelphij } 1803285612Sdelphij simple_unlock(&spechash_slock); 180482498Sroberto FREE(vp->v_specinfo, M_VNODE); 180554359Sroberto vp->v_specinfo = NULL; 1806285612Sdelphij } 1807285612Sdelphij 180854359Sroberto /* 180954359Sroberto * If it is on the freelist and not already at the head, 181054359Sroberto * move it to the head of the list. The test of the back 181154359Sroberto * pointer and the reference count of zero is because 181254359Sroberto * it will be removed from the free list by getnewvnode, 181354359Sroberto * but will not have its reference count incremented until 181454359Sroberto * after calling vgone. If the reference count were 181554359Sroberto * incremented first, vgone would (incorrectly) try to 181654359Sroberto * close the previous instance of the underlying object. 181754359Sroberto */ 181854359Sroberto if (vp->v_usecount == 0 && !(vp->v_flag & VDOOMED)) { 181954359Sroberto s = splbio(); 182054359Sroberto simple_lock(&vnode_free_list_slock); 182154359Sroberto if (vp->v_flag & VFREE) { 182254359Sroberto TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 182354359Sroberto } else if (vp->v_flag & VTBFREE) { 182454359Sroberto TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 182554359Sroberto vp->v_flag &= ~VTBFREE; 182654359Sroberto freevnodes++; 182754359Sroberto } else 182854359Sroberto freevnodes++; 182954359Sroberto vp->v_flag |= VFREE; 183054359Sroberto TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 183154359Sroberto simple_unlock(&vnode_free_list_slock); 183254359Sroberto splx(s); 183354359Sroberto } 183454359Sroberto 183554359Sroberto vp->v_type = VBAD; 183654359Sroberto simple_unlock(&vp->v_interlock); 183754359Sroberto} 183854359Sroberto 183954359Sroberto/* 184054359Sroberto * Lookup a vnode by device number. 184154359Sroberto */ 184254359Srobertoint 184354359Srobertovfinddev(dev, type, vpp) 184454359Sroberto dev_t dev; 184554359Sroberto enum vtype type; 184654359Sroberto struct vnode **vpp; 184754359Sroberto{ 184854359Sroberto register struct vnode *vp; 184954359Sroberto int rc = 0; 185054359Sroberto 185154359Sroberto simple_lock(&spechash_slock); 185254359Sroberto for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 185354359Sroberto if (dev != vp->v_rdev || type != vp->v_type) 185454359Sroberto continue; 185554359Sroberto *vpp = vp; 185654359Sroberto rc = 1; 185754359Sroberto break; 185854359Sroberto } 185954359Sroberto simple_unlock(&spechash_slock); 186054359Sroberto return (rc); 186154359Sroberto} 186254359Sroberto 186354359Sroberto/* 186454359Sroberto * Calculate the total number of references to a special device. 186554359Sroberto */ 186654359Srobertoint 186754359Srobertovcount(vp) 186854359Sroberto register struct vnode *vp; 186954359Sroberto{ 187054359Sroberto struct vnode *vq, *vnext; 187154359Sroberto int count; 187254359Sroberto 187354359Srobertoloop: 187454359Sroberto if ((vp->v_flag & VALIASED) == 0) 187554359Sroberto return (vp->v_usecount); 187654359Sroberto simple_lock(&spechash_slock); 187754359Sroberto for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { 187854359Sroberto vnext = vq->v_specnext; 187954359Sroberto if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 188054359Sroberto continue; 188154359Sroberto /* 188254359Sroberto * Alias, but not in use, so flush it out. 188354359Sroberto */ 188454359Sroberto if (vq->v_usecount == 0 && vq != vp) { 188554359Sroberto simple_unlock(&spechash_slock); 188654359Sroberto vgone(vq); 188754359Sroberto goto loop; 188854359Sroberto } 188954359Sroberto count += vq->v_usecount; 189054359Sroberto } 189154359Sroberto simple_unlock(&spechash_slock); 189254359Sroberto return (count); 189354359Sroberto} 1894285612Sdelphij/* 1895285612Sdelphij * Print out a description of a vnode. 1896285612Sdelphij */ 1897285612Sdelphijstatic char *typename[] = 1898285612Sdelphij{"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"}; 1899285612Sdelphij 1900285612Sdelphijvoid 1901285612Sdelphijvprint(label, vp) 1902285612Sdelphij char *label; 190354359Sroberto register struct vnode *vp; 190454359Sroberto{ 190554359Sroberto char buf[96]; 1906285612Sdelphij 190754359Sroberto if (label != NULL) 190854359Sroberto printf("%s: %p: ", label, (void *)vp); 1909285612Sdelphij else 191054359Sroberto printf("%p: ", (void *)vp); 191154359Sroberto printf("type %s, usecount %d, writecount %d, refcount %d,", 1912285612Sdelphij typename[vp->v_type], vp->v_usecount, vp->v_writecount, 1913285612Sdelphij vp->v_holdcnt); 1914285612Sdelphij buf[0] = '\0'; 1915285612Sdelphij if (vp->v_flag & VROOT) 191654359Sroberto strcat(buf, "|VROOT"); 191754359Sroberto if (vp->v_flag & VTEXT) 191854359Sroberto strcat(buf, "|VTEXT"); 191954359Sroberto if (vp->v_flag & VSYSTEM) 192054359Sroberto strcat(buf, "|VSYSTEM"); 192154359Sroberto if (vp->v_flag & VXLOCK) 1922285612Sdelphij strcat(buf, "|VXLOCK"); 192354359Sroberto if (vp->v_flag & VXWANT) 192454359Sroberto strcat(buf, "|VXWANT"); 1925285612Sdelphij if (vp->v_flag & VBWAIT) 192654359Sroberto strcat(buf, "|VBWAIT"); 192754359Sroberto if (vp->v_flag & VALIASED) 1928285612Sdelphij strcat(buf, "|VALIASED"); 1929285612Sdelphij if (vp->v_flag & VDOOMED) 1930285612Sdelphij strcat(buf, "|VDOOMED"); 1931285612Sdelphij if (vp->v_flag & VFREE) 193254359Sroberto strcat(buf, "|VFREE"); 193354359Sroberto if (vp->v_flag & VOBJBUF) 193454359Sroberto strcat(buf, "|VOBJBUF"); 193554359Sroberto if (buf[0] != '\0') 193654359Sroberto printf(" flags (%s)", &buf[1]); 193754359Sroberto if (vp->v_data == NULL) { 193854359Sroberto printf("\n"); 193954359Sroberto } else { 194054359Sroberto printf("\n\t"); 194154359Sroberto VOP_PRINT(vp); 194254359Sroberto } 1943285612Sdelphij} 194454359Sroberto 194554359Sroberto#ifdef DDB 1946285612Sdelphij#include <ddb/ddb.h> 1947285612Sdelphij/* 194854359Sroberto * List all of the locked vnodes in the system. 194954359Sroberto * Called when debugging the kernel. 195054359Sroberto */ 195154359SrobertoDB_SHOW_COMMAND(lockedvnodes, lockedvnodes) 1952285612Sdelphij{ 195354359Sroberto struct proc *p = curproc; /* XXX */ 195454359Sroberto struct mount *mp, *nmp; 1955285612Sdelphij struct vnode *vp; 195654359Sroberto 195754359Sroberto printf("Locked vnodes\n"); 1958285612Sdelphij simple_lock(&mountlist_slock); 1959285612Sdelphij for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 1960285612Sdelphij if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { 1961285612Sdelphij nmp = mp->mnt_list.cqe_next; 1962285612Sdelphij continue; 196354359Sroberto } 1964285612Sdelphij for (vp = mp->mnt_vnodelist.lh_first; 1965285612Sdelphij vp != NULL; 1966285612Sdelphij vp = vp->v_mntvnodes.le_next) { 196754359Sroberto if (VOP_ISLOCKED(vp)) 1968 vprint((char *)0, vp); 1969 } 1970 simple_lock(&mountlist_slock); 1971 nmp = mp->mnt_list.cqe_next; 1972 vfs_unbusy(mp, p); 1973 } 1974 simple_unlock(&mountlist_slock); 1975} 1976#endif 1977 1978/* 1979 * Top level filesystem related information gathering. 1980 */ 1981static int sysctl_ovfs_conf __P(SYSCTL_HANDLER_ARGS); 1982 1983static int 1984vfs_sysctl SYSCTL_HANDLER_ARGS 1985{ 1986 int *name = (int *)arg1 - 1; /* XXX */ 1987 u_int namelen = arg2 + 1; /* XXX */ 1988 struct vfsconf *vfsp; 1989 1990#if 1 || defined(COMPAT_PRELITE2) 1991 /* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. */ 1992 if (namelen == 1) 1993 return (sysctl_ovfs_conf(oidp, arg1, arg2, req)); 1994#endif 1995 1996#ifdef notyet 1997 /* all sysctl names at this level are at least name and field */ 1998 if (namelen < 2) 1999 return (ENOTDIR); /* overloaded */ 2000 if (name[0] != VFS_GENERIC) { 2001 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 2002 if (vfsp->vfc_typenum == name[0]) 2003 break; 2004 if (vfsp == NULL) 2005 return (EOPNOTSUPP); 2006 return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1, 2007 oldp, oldlenp, newp, newlen, p)); 2008 } 2009#endif 2010 switch (name[1]) { 2011 case VFS_MAXTYPENUM: 2012 if (namelen != 2) 2013 return (ENOTDIR); 2014 return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int))); 2015 case VFS_CONF: 2016 if (namelen != 3) 2017 return (ENOTDIR); /* overloaded */ 2018 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 2019 if (vfsp->vfc_typenum == name[2]) 2020 break; 2021 if (vfsp == NULL) 2022 return (EOPNOTSUPP); 2023 return (SYSCTL_OUT(req, vfsp, sizeof *vfsp)); 2024 } 2025 return (EOPNOTSUPP); 2026} 2027 2028SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD, vfs_sysctl, 2029 "Generic filesystem"); 2030 2031#if 1 || defined(COMPAT_PRELITE2) 2032 2033static int 2034sysctl_ovfs_conf SYSCTL_HANDLER_ARGS 2035{ 2036 int error; 2037 struct vfsconf *vfsp; 2038 struct ovfsconf ovfs; 2039 2040 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 2041 ovfs.vfc_vfsops = vfsp->vfc_vfsops; /* XXX used as flag */ 2042 strcpy(ovfs.vfc_name, vfsp->vfc_name); 2043 ovfs.vfc_index = vfsp->vfc_typenum; 2044 ovfs.vfc_refcount = vfsp->vfc_refcount; 2045 ovfs.vfc_flags = vfsp->vfc_flags; 2046 error = SYSCTL_OUT(req, &ovfs, sizeof ovfs); 2047 if (error) 2048 return error; 2049 } 2050 return 0; 2051} 2052 2053#endif /* 1 || COMPAT_PRELITE2 */ 2054 2055#if 0 2056#define KINFO_VNODESLOP 10 2057/* 2058 * Dump vnode list (via sysctl). 2059 * Copyout address of vnode followed by vnode. 2060 */ 2061/* ARGSUSED */ 2062static int 2063sysctl_vnode SYSCTL_HANDLER_ARGS 2064{ 2065 struct proc *p = curproc; /* XXX */ 2066 struct mount *mp, *nmp; 2067 struct vnode *nvp, *vp; 2068 int error; 2069 2070#define VPTRSZ sizeof (struct vnode *) 2071#define VNODESZ sizeof (struct vnode) 2072 2073 req->lock = 0; 2074 if (!req->oldptr) /* Make an estimate */ 2075 return (SYSCTL_OUT(req, 0, 2076 (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ))); 2077 2078 simple_lock(&mountlist_slock); 2079 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 2080 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { 2081 nmp = mp->mnt_list.cqe_next; 2082 continue; 2083 } 2084again: 2085 simple_lock(&mntvnode_slock); 2086 for (vp = mp->mnt_vnodelist.lh_first; 2087 vp != NULL; 2088 vp = nvp) { 2089 /* 2090 * Check that the vp is still associated with 2091 * this filesystem. RACE: could have been 2092 * recycled onto the same filesystem. 2093 */ 2094 if (vp->v_mount != mp) { 2095 simple_unlock(&mntvnode_slock); 2096 goto again; 2097 } 2098 nvp = vp->v_mntvnodes.le_next; 2099 simple_unlock(&mntvnode_slock); 2100 if ((error = SYSCTL_OUT(req, &vp, VPTRSZ)) || 2101 (error = SYSCTL_OUT(req, vp, VNODESZ))) 2102 return (error); 2103 simple_lock(&mntvnode_slock); 2104 } 2105 simple_unlock(&mntvnode_slock); 2106 simple_lock(&mountlist_slock); 2107 nmp = mp->mnt_list.cqe_next; 2108 vfs_unbusy(mp, p); 2109 } 2110 simple_unlock(&mountlist_slock); 2111 2112 return (0); 2113} 2114#endif 2115 2116/* 2117 * XXX 2118 * Exporting the vnode list on large systems causes them to crash. 2119 * Exporting the vnode list on medium systems causes sysctl to coredump. 2120 */ 2121#if 0 2122SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE|CTLFLAG_RD, 2123 0, 0, sysctl_vnode, "S,vnode", ""); 2124#endif 2125 2126/* 2127 * Check to see if a filesystem is mounted on a block device. 2128 */ 2129int 2130vfs_mountedon(vp) 2131 struct vnode *vp; 2132{ 2133 struct vnode *vq; 2134 int error = 0; 2135 2136 if (vp->v_specmountpoint != NULL) 2137 return (EBUSY); 2138 if (vp->v_flag & VALIASED) { 2139 simple_lock(&spechash_slock); 2140 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 2141 if (vq->v_rdev != vp->v_rdev || 2142 vq->v_type != vp->v_type) 2143 continue; 2144 if (vq->v_specmountpoint != NULL) { 2145 error = EBUSY; 2146 break; 2147 } 2148 } 2149 simple_unlock(&spechash_slock); 2150 } 2151 return (error); 2152} 2153 2154/* 2155 * Unmount all filesystems. The list is traversed in reverse order 2156 * of mounting to avoid dependencies. 2157 */ 2158void 2159vfs_unmountall() 2160{ 2161 struct mount *mp, *nmp; 2162 struct proc *p; 2163 int error; 2164 2165 if (curproc != NULL) 2166 p = curproc; 2167 else 2168 p = initproc; /* XXX XXX should this be proc0? */ 2169 /* 2170 * Since this only runs when rebooting, it is not interlocked. 2171 */ 2172 for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { 2173 nmp = mp->mnt_list.cqe_prev; 2174 error = dounmount(mp, MNT_FORCE, p); 2175 if (error) { 2176 printf("unmount of %s failed (", 2177 mp->mnt_stat.f_mntonname); 2178 if (error == EBUSY) 2179 printf("BUSY)\n"); 2180 else 2181 printf("%d)\n", error); 2182 } 2183 } 2184} 2185 2186/* 2187 * Build hash lists of net addresses and hang them off the mount point. 2188 * Called by ufs_mount() to set up the lists of export addresses. 2189 */ 2190static int 2191vfs_hang_addrlist(mp, nep, argp) 2192 struct mount *mp; 2193 struct netexport *nep; 2194 struct export_args *argp; 2195{ 2196 register struct netcred *np; 2197 register struct radix_node_head *rnh; 2198 register int i; 2199 struct radix_node *rn; 2200 struct sockaddr *saddr, *smask = 0; 2201 struct domain *dom; 2202 int error; 2203 2204 if (argp->ex_addrlen == 0) { 2205 if (mp->mnt_flag & MNT_DEFEXPORTED) 2206 return (EPERM); 2207 np = &nep->ne_defexported; 2208 np->netc_exflags = argp->ex_flags; 2209 np->netc_anon = argp->ex_anon; 2210 np->netc_anon.cr_ref = 1; 2211 mp->mnt_flag |= MNT_DEFEXPORTED; 2212 return (0); 2213 } 2214 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 2215 np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK); 2216 bzero((caddr_t) np, i); 2217 saddr = (struct sockaddr *) (np + 1); 2218 if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen))) 2219 goto out; 2220 if (saddr->sa_len > argp->ex_addrlen) 2221 saddr->sa_len = argp->ex_addrlen; 2222 if (argp->ex_masklen) { 2223 smask = (struct sockaddr *) ((caddr_t) saddr + argp->ex_addrlen); 2224 error = copyin(argp->ex_mask, (caddr_t) smask, argp->ex_masklen); 2225 if (error) 2226 goto out; 2227 if (smask->sa_len > argp->ex_masklen) 2228 smask->sa_len = argp->ex_masklen; 2229 } 2230 i = saddr->sa_family; 2231 if ((rnh = nep->ne_rtable[i]) == 0) { 2232 /* 2233 * Seems silly to initialize every AF when most are not used, 2234 * do so on demand here 2235 */ 2236 for (dom = domains; dom; dom = dom->dom_next) 2237 if (dom->dom_family == i && dom->dom_rtattach) { 2238 dom->dom_rtattach((void **) &nep->ne_rtable[i], 2239 dom->dom_rtoffset); 2240 break; 2241 } 2242 if ((rnh = nep->ne_rtable[i]) == 0) { 2243 error = ENOBUFS; 2244 goto out; 2245 } 2246 } 2247 rn = (*rnh->rnh_addaddr) ((caddr_t) saddr, (caddr_t) smask, rnh, 2248 np->netc_rnodes); 2249 if (rn == 0 || np != (struct netcred *) rn) { /* already exists */ 2250 error = EPERM; 2251 goto out; 2252 } 2253 np->netc_exflags = argp->ex_flags; 2254 np->netc_anon = argp->ex_anon; 2255 np->netc_anon.cr_ref = 1; 2256 return (0); 2257out: 2258 free(np, M_NETADDR); 2259 return (error); 2260} 2261 2262/* ARGSUSED */ 2263static int 2264vfs_free_netcred(rn, w) 2265 struct radix_node *rn; 2266 void *w; 2267{ 2268 register struct radix_node_head *rnh = (struct radix_node_head *) w; 2269 2270 (*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh); 2271 free((caddr_t) rn, M_NETADDR); 2272 return (0); 2273} 2274 2275/* 2276 * Free the net address hash lists that are hanging off the mount points. 2277 */ 2278static void 2279vfs_free_addrlist(nep) 2280 struct netexport *nep; 2281{ 2282 register int i; 2283 register struct radix_node_head *rnh; 2284 2285 for (i = 0; i <= AF_MAX; i++) 2286 if ((rnh = nep->ne_rtable[i])) { 2287 (*rnh->rnh_walktree) (rnh, vfs_free_netcred, 2288 (caddr_t) rnh); 2289 free((caddr_t) rnh, M_RTABLE); 2290 nep->ne_rtable[i] = 0; 2291 } 2292} 2293 2294int 2295vfs_export(mp, nep, argp) 2296 struct mount *mp; 2297 struct netexport *nep; 2298 struct export_args *argp; 2299{ 2300 int error; 2301 2302 if (argp->ex_flags & MNT_DELEXPORT) { 2303 if (mp->mnt_flag & MNT_EXPUBLIC) { 2304 vfs_setpublicfs(NULL, NULL, NULL); 2305 mp->mnt_flag &= ~MNT_EXPUBLIC; 2306 } 2307 vfs_free_addrlist(nep); 2308 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 2309 } 2310 if (argp->ex_flags & MNT_EXPORTED) { 2311 if (argp->ex_flags & MNT_EXPUBLIC) { 2312 if ((error = vfs_setpublicfs(mp, nep, argp)) != 0) 2313 return (error); 2314 mp->mnt_flag |= MNT_EXPUBLIC; 2315 } 2316 if ((error = vfs_hang_addrlist(mp, nep, argp))) 2317 return (error); 2318 mp->mnt_flag |= MNT_EXPORTED; 2319 } 2320 return (0); 2321} 2322 2323 2324/* 2325 * Set the publicly exported filesystem (WebNFS). Currently, only 2326 * one public filesystem is possible in the spec (RFC 2054 and 2055) 2327 */ 2328int 2329vfs_setpublicfs(mp, nep, argp) 2330 struct mount *mp; 2331 struct netexport *nep; 2332 struct export_args *argp; 2333{ 2334 int error; 2335 struct vnode *rvp; 2336 char *cp; 2337 2338 /* 2339 * mp == NULL -> invalidate the current info, the FS is 2340 * no longer exported. May be called from either vfs_export 2341 * or unmount, so check if it hasn't already been done. 2342 */ 2343 if (mp == NULL) { 2344 if (nfs_pub.np_valid) { 2345 nfs_pub.np_valid = 0; 2346 if (nfs_pub.np_index != NULL) { 2347 FREE(nfs_pub.np_index, M_TEMP); 2348 nfs_pub.np_index = NULL; 2349 } 2350 } 2351 return (0); 2352 } 2353 2354 /* 2355 * Only one allowed at a time. 2356 */ 2357 if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount) 2358 return (EBUSY); 2359 2360 /* 2361 * Get real filehandle for root of exported FS. 2362 */ 2363 bzero((caddr_t)&nfs_pub.np_handle, sizeof(nfs_pub.np_handle)); 2364 nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid; 2365 2366 if ((error = VFS_ROOT(mp, &rvp))) 2367 return (error); 2368 2369 if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid))) 2370 return (error); 2371 2372 vput(rvp); 2373 2374 /* 2375 * If an indexfile was specified, pull it in. 2376 */ 2377 if (argp->ex_indexfile != NULL) { 2378 MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP, 2379 M_WAITOK); 2380 error = copyinstr(argp->ex_indexfile, nfs_pub.np_index, 2381 MAXNAMLEN, (size_t *)0); 2382 if (!error) { 2383 /* 2384 * Check for illegal filenames. 2385 */ 2386 for (cp = nfs_pub.np_index; *cp; cp++) { 2387 if (*cp == '/') { 2388 error = EINVAL; 2389 break; 2390 } 2391 } 2392 } 2393 if (error) { 2394 FREE(nfs_pub.np_index, M_TEMP); 2395 return (error); 2396 } 2397 } 2398 2399 nfs_pub.np_mount = mp; 2400 nfs_pub.np_valid = 1; 2401 return (0); 2402} 2403 2404struct netcred * 2405vfs_export_lookup(mp, nep, nam) 2406 register struct mount *mp; 2407 struct netexport *nep; 2408 struct sockaddr *nam; 2409{ 2410 register struct netcred *np; 2411 register struct radix_node_head *rnh; 2412 struct sockaddr *saddr; 2413 2414 np = NULL; 2415 if (mp->mnt_flag & MNT_EXPORTED) { 2416 /* 2417 * Lookup in the export list first. 2418 */ 2419 if (nam != NULL) { 2420 saddr = nam; 2421 rnh = nep->ne_rtable[saddr->sa_family]; 2422 if (rnh != NULL) { 2423 np = (struct netcred *) 2424 (*rnh->rnh_matchaddr)((caddr_t)saddr, 2425 rnh); 2426 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 2427 np = NULL; 2428 } 2429 } 2430 /* 2431 * If no address match, use the default if it exists. 2432 */ 2433 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 2434 np = &nep->ne_defexported; 2435 } 2436 return (np); 2437} 2438 2439/* 2440 * perform msync on all vnodes under a mount point 2441 * the mount point must be locked. 2442 */ 2443void 2444vfs_msync(struct mount *mp, int flags) { 2445 struct vnode *vp, *nvp; 2446 struct vm_object *obj; 2447 int anyio, tries; 2448 2449 tries = 5; 2450loop: 2451 anyio = 0; 2452 for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { 2453 2454 nvp = vp->v_mntvnodes.le_next; 2455 2456 if (vp->v_mount != mp) { 2457 goto loop; 2458 } 2459 2460 if (vp->v_flag & VXLOCK) /* XXX: what if MNT_WAIT? */ 2461 continue; 2462 2463 if (flags != MNT_WAIT) { 2464 obj = vp->v_object; 2465 if (obj == NULL || (obj->flags & OBJ_MIGHTBEDIRTY) == 0) 2466 continue; 2467 if (VOP_ISLOCKED(vp)) 2468 continue; 2469 } 2470 2471 simple_lock(&vp->v_interlock); 2472 if (vp->v_object && 2473 (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) { 2474 if (!vget(vp, 2475 LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY | LK_NOOBJ, curproc)) { 2476 if (vp->v_object) { 2477 vm_object_page_clean(vp->v_object, 0, 0, flags == MNT_WAIT ? OBJPC_SYNC : 0); 2478 anyio = 1; 2479 } 2480 vput(vp); 2481 } 2482 } else { 2483 simple_unlock(&vp->v_interlock); 2484 } 2485 } 2486 if (anyio && (--tries > 0)) 2487 goto loop; 2488} 2489 2490/* 2491 * Create the VM object needed for VMIO and mmap support. This 2492 * is done for all VREG files in the system. Some filesystems might 2493 * afford the additional metadata buffering capability of the 2494 * VMIO code by making the device node be VMIO mode also. 2495 * 2496 * If !waslocked, must be called with interlock. 2497 */ 2498int 2499vfs_object_create(vp, p, cred, waslocked) 2500 struct vnode *vp; 2501 struct proc *p; 2502 struct ucred *cred; 2503 int waslocked; 2504{ 2505 struct vattr vat; 2506 vm_object_t object; 2507 int error = 0; 2508 2509 if ((vp->v_type != VREG) && (vp->v_type != VBLK)) { 2510 if (!waslocked) 2511 simple_unlock(&vp->v_interlock); 2512 return 0; 2513 } 2514 2515 if (!waslocked) 2516 vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK | LK_RETRY, p); 2517 2518retry: 2519 if ((object = vp->v_object) == NULL) { 2520 if (vp->v_type == VREG) { 2521 if ((error = VOP_GETATTR(vp, &vat, cred, p)) != 0) 2522 goto retn; 2523 object = vnode_pager_alloc(vp, vat.va_size, 0, 0); 2524 } else if (major(vp->v_rdev) < nblkdev && 2525 bdevsw[major(vp->v_rdev)] != NULL) { 2526 /* 2527 * This simply allocates the biggest object possible 2528 * for a VBLK vnode. This should be fixed, but doesn't 2529 * cause any problems (yet). 2530 */ 2531 object = vnode_pager_alloc(vp, IDX_TO_OFF(INT_MAX), 0, 0); 2532 } 2533 object->ref_count--; 2534 vp->v_usecount--; 2535 } else { 2536 if (object->flags & OBJ_DEAD) { 2537 VOP_UNLOCK(vp, 0, p); 2538 tsleep(object, PVM, "vodead", 0); 2539 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 2540 goto retry; 2541 } 2542 } 2543 2544 if (vp->v_object) { 2545 vp->v_flag |= VOBJBUF; 2546 } 2547 2548retn: 2549 if (!waslocked) { 2550 simple_lock(&vp->v_interlock); 2551 VOP_UNLOCK(vp, LK_INTERLOCK, p); 2552 } 2553 2554 return error; 2555} 2556 2557static void 2558vfree(vp) 2559 struct vnode *vp; 2560{ 2561 int s; 2562 2563 s = splbio(); 2564 simple_lock(&vnode_free_list_slock); 2565 if (vp->v_flag & VTBFREE) { 2566 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 2567 vp->v_flag &= ~VTBFREE; 2568 } 2569 if (vp->v_flag & VAGE) { 2570 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 2571 } else { 2572 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 2573 } 2574 freevnodes++; 2575 simple_unlock(&vnode_free_list_slock); 2576 vp->v_flag &= ~VAGE; 2577 vp->v_flag |= VFREE; 2578 splx(s); 2579} 2580 2581void 2582vbusy(vp) 2583 struct vnode *vp; 2584{ 2585 int s; 2586 2587 s = splbio(); 2588 simple_lock(&vnode_free_list_slock); 2589 if (vp->v_flag & VTBFREE) { 2590 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 2591 vp->v_flag &= ~VTBFREE; 2592 } else { 2593 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 2594 freevnodes--; 2595 } 2596 simple_unlock(&vnode_free_list_slock); 2597 vp->v_flag &= ~(VFREE|VAGE); 2598 splx(s); 2599} 2600 2601/* 2602 * Record a process's interest in events which might happen to 2603 * a vnode. Because poll uses the historic select-style interface 2604 * internally, this routine serves as both the ``check for any 2605 * pending events'' and the ``record my interest in future events'' 2606 * functions. (These are done together, while the lock is held, 2607 * to avoid race conditions.) 2608 */ 2609int 2610vn_pollrecord(vp, p, events) 2611 struct vnode *vp; 2612 struct proc *p; 2613 short events; 2614{ 2615 simple_lock(&vp->v_pollinfo.vpi_lock); 2616 if (vp->v_pollinfo.vpi_revents & events) { 2617 /* 2618 * This leaves events we are not interested 2619 * in available for the other process which 2620 * which presumably had requested them 2621 * (otherwise they would never have been 2622 * recorded). 2623 */ 2624 events &= vp->v_pollinfo.vpi_revents; 2625 vp->v_pollinfo.vpi_revents &= ~events; 2626 2627 simple_unlock(&vp->v_pollinfo.vpi_lock); 2628 return events; 2629 } 2630 vp->v_pollinfo.vpi_events |= events; 2631 selrecord(p, &vp->v_pollinfo.vpi_selinfo); 2632 simple_unlock(&vp->v_pollinfo.vpi_lock); 2633 return 0; 2634} 2635 2636/* 2637 * Note the occurrence of an event. If the VN_POLLEVENT macro is used, 2638 * it is possible for us to miss an event due to race conditions, but 2639 * that condition is expected to be rare, so for the moment it is the 2640 * preferred interface. 2641 */ 2642void 2643vn_pollevent(vp, events) 2644 struct vnode *vp; 2645 short events; 2646{ 2647 simple_lock(&vp->v_pollinfo.vpi_lock); 2648 if (vp->v_pollinfo.vpi_events & events) { 2649 /* 2650 * We clear vpi_events so that we don't 2651 * call selwakeup() twice if two events are 2652 * posted before the polling process(es) is 2653 * awakened. This also ensures that we take at 2654 * most one selwakeup() if the polling process 2655 * is no longer interested. However, it does 2656 * mean that only one event can be noticed at 2657 * a time. (Perhaps we should only clear those 2658 * event bits which we note?) XXX 2659 */ 2660 vp->v_pollinfo.vpi_events = 0; /* &= ~events ??? */ 2661 vp->v_pollinfo.vpi_revents |= events; 2662 selwakeup(&vp->v_pollinfo.vpi_selinfo); 2663 } 2664 simple_unlock(&vp->v_pollinfo.vpi_lock); 2665} 2666 2667/* 2668 * Wake up anyone polling on vp because it is being revoked. 2669 * This depends on dead_poll() returning POLLHUP for correct 2670 * behavior. 2671 */ 2672void 2673vn_pollgone(vp) 2674 struct vnode *vp; 2675{ 2676 simple_lock(&vp->v_pollinfo.vpi_lock); 2677 if (vp->v_pollinfo.vpi_events) { 2678 vp->v_pollinfo.vpi_events = 0; 2679 selwakeup(&vp->v_pollinfo.vpi_selinfo); 2680 } 2681 simple_unlock(&vp->v_pollinfo.vpi_lock); 2682} 2683 2684 2685 2686/* 2687 * Routine to create and manage a filesystem syncer vnode. 2688 */ 2689#define sync_close ((int (*) __P((struct vop_close_args *)))nullop) 2690int sync_fsync __P((struct vop_fsync_args *)); 2691int sync_inactive __P((struct vop_inactive_args *)); 2692int sync_reclaim __P((struct vop_reclaim_args *)); 2693#define sync_lock ((int (*) __P((struct vop_lock_args *)))vop_nolock) 2694#define sync_unlock ((int (*) __P((struct vop_unlock_args *)))vop_nounlock) 2695int sync_print __P((struct vop_print_args *)); 2696#define sync_islocked ((int(*) __P((struct vop_islocked_args *)))vop_noislocked) 2697 2698static vop_t **sync_vnodeop_p; 2699static struct vnodeopv_entry_desc sync_vnodeop_entries[] = { 2700 { &vop_default_desc, (vop_t *) vop_eopnotsupp }, 2701 { &vop_close_desc, (vop_t *) sync_close }, /* close */ 2702 { &vop_fsync_desc, (vop_t *) sync_fsync }, /* fsync */ 2703 { &vop_inactive_desc, (vop_t *) sync_inactive }, /* inactive */ 2704 { &vop_reclaim_desc, (vop_t *) sync_reclaim }, /* reclaim */ 2705 { &vop_lock_desc, (vop_t *) sync_lock }, /* lock */ 2706 { &vop_unlock_desc, (vop_t *) sync_unlock }, /* unlock */ 2707 { &vop_print_desc, (vop_t *) sync_print }, /* print */ 2708 { &vop_islocked_desc, (vop_t *) sync_islocked }, /* islocked */ 2709 { NULL, NULL } 2710}; 2711static struct vnodeopv_desc sync_vnodeop_opv_desc = 2712 { &sync_vnodeop_p, sync_vnodeop_entries }; 2713 2714VNODEOP_SET(sync_vnodeop_opv_desc); 2715 2716/* 2717 * Create a new filesystem syncer vnode for the specified mount point. 2718 */ 2719int 2720vfs_allocate_syncvnode(mp) 2721 struct mount *mp; 2722{ 2723 struct vnode *vp; 2724 static long start, incr, next; 2725 int error; 2726 2727 /* Allocate a new vnode */ 2728 if ((error = getnewvnode(VT_VFS, mp, sync_vnodeop_p, &vp)) != 0) { 2729 mp->mnt_syncer = NULL; 2730 return (error); 2731 } 2732 vp->v_type = VNON; 2733 /* 2734 * Place the vnode onto the syncer worklist. We attempt to 2735 * scatter them about on the list so that they will go off 2736 * at evenly distributed times even if all the filesystems 2737 * are mounted at once. 2738 */ 2739 next += incr; 2740 if (next == 0 || next > syncer_maxdelay) { 2741 start /= 2; 2742 incr /= 2; 2743 if (start == 0) { 2744 start = syncer_maxdelay / 2; 2745 incr = syncer_maxdelay; 2746 } 2747 next = start; 2748 } 2749 vn_syncer_add_to_worklist(vp, syncdelay > 0 ? next % syncdelay : 0); 2750 mp->mnt_syncer = vp; 2751 return (0); 2752} 2753 2754/* 2755 * Do a lazy sync of the filesystem. 2756 */ 2757static int 2758sync_fsync(ap) 2759 struct vop_fsync_args /* { 2760 struct vnode *a_vp; 2761 struct ucred *a_cred; 2762 int a_waitfor; 2763 struct proc *a_p; 2764 } */ *ap; 2765{ 2766 struct vnode *syncvp = ap->a_vp; 2767 struct mount *mp = syncvp->v_mount; 2768 struct proc *p = ap->a_p; 2769 int asyncflag; 2770 2771 /* 2772 * We only need to do something if this is a lazy evaluation. 2773 */ 2774 if (ap->a_waitfor != MNT_LAZY) 2775 return (0); 2776 2777 /* 2778 * Move ourselves to the back of the sync list. 2779 */ 2780 vn_syncer_add_to_worklist(syncvp, syncdelay); 2781 2782 /* 2783 * Walk the list of vnodes pushing all that are dirty and 2784 * not already on the sync list. 2785 */ 2786 simple_lock(&mountlist_slock); 2787 if (vfs_busy(mp, LK_EXCLUSIVE | LK_NOWAIT, &mountlist_slock, p) != 0) { 2788 simple_unlock(&mountlist_slock); 2789 return (0); 2790 } 2791 asyncflag = mp->mnt_flag & MNT_ASYNC; 2792 mp->mnt_flag &= ~MNT_ASYNC; 2793 vfs_msync(mp, MNT_NOWAIT); 2794 VFS_SYNC(mp, MNT_LAZY, ap->a_cred, p); 2795 if (asyncflag) 2796 mp->mnt_flag |= MNT_ASYNC; 2797 vfs_unbusy(mp, p); 2798 return (0); 2799} 2800 2801/* 2802 * The syncer vnode is no referenced. 2803 */ 2804static int 2805sync_inactive(ap) 2806 struct vop_inactive_args /* { 2807 struct vnode *a_vp; 2808 struct proc *a_p; 2809 } */ *ap; 2810{ 2811 2812 vgone(ap->a_vp); 2813 return (0); 2814} 2815 2816/* 2817 * The syncer vnode is no longer needed and is being decommissioned. 2818 */ 2819static int 2820sync_reclaim(ap) 2821 struct vop_reclaim_args /* { 2822 struct vnode *a_vp; 2823 } */ *ap; 2824{ 2825 struct vnode *vp = ap->a_vp; 2826 2827 vp->v_mount->mnt_syncer = NULL; 2828 if (vp->v_flag & VONWORKLST) { 2829 LIST_REMOVE(vp, v_synclist); 2830 vp->v_flag &= ~VONWORKLST; 2831 } 2832 2833 return (0); 2834} 2835 2836/* 2837 * Print out a syncer vnode. 2838 */ 2839static int 2840sync_print(ap) 2841 struct vop_print_args /* { 2842 struct vnode *a_vp; 2843 } */ *ap; 2844{ 2845 struct vnode *vp = ap->a_vp; 2846 2847 printf("syncer vnode"); 2848 if (vp->v_vnlock != NULL) 2849 lockmgr_printinfo(vp->v_vnlock); 2850 printf("\n"); 2851 return (0); 2852} 2853