vfs_subr.c revision 28558
110154Sache/* 27767Sache * Copyright (c) 1989, 1993 37767Sache * The Regents of the University of California. All rights reserved. 4941Snate * (c) UNIX System Laboratories, Inc. 57767Sache * All or some portions of this file are derived from material licensed 67767Sache * to the University of California by American Telephone and Telegraph 7941Snate * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8941Snate * the permission of UNIX System Laboratories, Inc. 9941Snate * 10941Snate * Redistribution and use in source and binary forms, with or without 11941Snate * modification, are permitted provided that the following conditions 12941Snate * are met: 13941Snate * 1. Redistributions of source code must retain the above copyright 14941Snate * notice, this list of conditions and the following disclaimer. 15941Snate * 2. Redistributions in binary form must reproduce the above copyright 16941Snate * notice, this list of conditions and the following disclaimer in the 17941Snate * documentation and/or other materials provided with the distribution. 18941Snate * 3. All advertising materials mentioning features or use of this software 19941Snate * must display the following acknowledgement: 2010154Sache * This product includes software developed by the University of 21941Snate * California, Berkeley and its contributors. 22941Snate * 4. Neither the name of the University nor the names of its contributors 23941Snate * may be used to endorse or promote products derived from this software 24941Snate * without specific prior written permission. 25941Snate * 26941Snate * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27941Snate * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28941Snate * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2987230Smarkm * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 3054158Scharnier * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 3187230Smarkm * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 3287230Smarkm * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33941Snate * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34941Snate * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35941Snate * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 367767Sache * SUCH DAMAGE. 37941Snate * 38941Snate * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95 39941Snate * $Id: vfs_subr.c,v 1.92 1997/08/21 20:33:39 bde Exp $ 4022873Sdavidn */ 41941Snate 42941Snate/* 4354158Scharnier * External virtual filesystem routines 44941Snate */ 45941Snate#include "opt_ddb.h" 46941Snate#include "opt_devfs.h" 47941Snate 48941Snate#include <sys/param.h> 49941Snate#include <sys/systm.h> 50941Snate#include <sys/kernel.h> 51941Snate#include <sys/file.h> 52941Snate#include <sys/proc.h> 53941Snate#include <sys/mount.h> 5422873Sdavidn#include <sys/time.h> 557767Sache#include <sys/vnode.h> 567767Sache#include <sys/stat.h> 5711760Sache#include <sys/namei.h> 5811760Sache#include <sys/ucred.h> 597767Sache#include <sys/buf.h> 60941Snate#include <sys/errno.h> 6123318Sache#include <sys/malloc.h> 6222873Sdavidn#include <sys/domain.h> 6322873Sdavidn#include <sys/mbuf.h> 6423318Sache#include <sys/dirent.h> 6522873Sdavidn 6622873Sdavidn#include <machine/limits.h> 67941Snate 687767Sache#include <vm/vm.h> 69941Snate#include <vm/vm_param.h> 70941Snate#include <vm/vm_object.h> 71941Snate#include <vm/vm_extern.h> 727767Sache#include <vm/vm_pager.h> 737767Sache#include <vm/vnode_pager.h> 74941Snate#include <sys/sysctl.h> 75941Snate 76941Snate#include <miscfs/specfs/specdev.h> 77941Snate 78941Snate#ifdef DDB 7910154Sacheextern void printlockedvnodes __P((void)); 807767Sache#endif 817767Sachestatic void vclean __P((struct vnode *vp, int flags, struct proc *p)); 827767Sachestatic void vgonel __P((struct vnode *vp, struct proc *p)); 837767Sacheunsigned long numvnodes; 847767SacheSYSCTL_INT(_debug, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, ""); 857767Sachestatic void vputrele __P((struct vnode *vp, int put)); 867767Sache 877767Sacheenum vtype iftovt_tab[16] = { 887767Sache VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 897767Sache VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 907767Sache}; 917767Sacheint vttoif_tab[9] = { 927767Sache 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 93941Snate S_IFSOCK, S_IFIFO, S_IFMT, 94941Snate}; 95941Snate 9610154Sache/* 9710154Sache * Insq/Remq for the vnode usage lists. 98941Snate */ 997767Sache#define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) 10087208Smarkm#define bufremvn(bp) { \ 101941Snate LIST_REMOVE(bp, b_vnbufs); \ 1027767Sache (bp)->b_vnbufs.le_next = NOLIST; \ 1037767Sache} 10446081SimpTAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */ 105941Snatestatic u_long freevnodes = 0; 106941Snate 1077767Sachestruct mntlist mountlist; /* mounted filesystem list */ 108941Snatestruct simplelock mountlist_slock; 109941Snatestatic struct simplelock mntid_slock; 1107767Sachestruct simplelock mntvnode_slock; 111941Snatestruct simplelock vnode_free_list_slock; 1127767Sachestatic struct simplelock spechash_slock; 113941Snatestruct nfs_public nfs_pub; /* publicly exported FS */ 114941Snate 11582722Skrisint desiredvnodes; 116941SnateSYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, &desiredvnodes, 0, ""); 117941Snate 118941Snatestatic void vfs_free_addrlist __P((struct netexport *nep)); 1197767Sachestatic int vfs_free_netcred __P((struct radix_node *rn, void *w)); 1207767Sachestatic int vfs_hang_addrlist __P((struct mount *mp, struct netexport *nep, 1217767Sache struct export_args *argp)); 1227767Sache 1237767Sache/* 12487208Smarkm * Initialize the vnode management data structures. 1257767Sache */ 126941Snatevoid 127941Snatevntblinit() 12887208Smarkm{ 129941Snate 13010154Sache desiredvnodes = maxproc + vm_object_cache_max; 131941Snate simple_lock_init(&mntvnode_slock); 1327767Sache simple_lock_init(&mntid_slock); 1337767Sache simple_lock_init(&spechash_slock); 1347767Sache TAILQ_INIT(&vnode_free_list); 1357767Sache simple_lock_init(&vnode_free_list_slock); 1367767Sache CIRCLEQ_INIT(&mountlist); 1377767Sache} 138941Snate 13982722Skris/* 140941Snate * Mark a mount point as busy. Used to synchronize access and to delay 141941Snate * unmounting. Interlock is not released on failure. 14287208Smarkm */ 143941Snateint 14482722Skrisvfs_busy(mp, flags, interlkp, p) 14582722Skris struct mount *mp; 14682722Skris int flags; 14782722Skris struct simplelock *interlkp; 14882722Skris struct proc *p; 14982722Skris{ 15082722Skris int lkflags; 151941Snate 152941Snate if (mp->mnt_flag & MNT_UNMOUNT) { 153941Snate if (flags & LK_NOWAIT) 154941Snate return (ENOENT); 1557767Sache mp->mnt_flag |= MNT_MWAIT; 156941Snate if (interlkp) { 157941Snate simple_unlock(interlkp); 158941Snate } 159941Snate /* 1607767Sache * Since all busy locks are shared except the exclusive 1617767Sache * lock granted when unmounting, the only place that a 162941Snate * wakeup needs to be done is at the release of the 1637767Sache * exclusive lock at the end of dounmount. 16480294Sobrien */ 16580294Sobrien tsleep((caddr_t)mp, PVFS, "vfs_busy", 0); 1667767Sache if (interlkp) { 1677767Sache simple_lock(interlkp); 1687767Sache } 169941Snate return (ENOENT); 17054158Scharnier } 171941Snate lkflags = LK_SHARED; 1727767Sache if (interlkp) 1737767Sache lkflags |= LK_INTERLOCK; 17410154Sache if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p)) 1757767Sache panic("vfs_busy: unexpected lock failure"); 17654158Scharnier return (0); 17710154Sache} 1787767Sache 1797767Sache/* 18080294Sobrien * Free a busy filesystem. 18180294Sobrien */ 1827767Sachevoid 183941Snatevfs_unbusy(mp, p) 184941Snate struct mount *mp; 18510154Sache struct proc *p; 18610154Sache{ 18710154Sache 18810154Sache lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p); 18910154Sache} 19010154Sache 19110154Sache/* 19210154Sache * Lookup a filesystem type, and if found allocate and initialize 19310154Sache * a mount structure for it. 19410154Sache * 19510154Sache * Devname is usually updated by mount(8) after booting. 19610154Sache */ 19710154Sacheint 19810154Sachevfs_rootmountalloc(fstypename, devname, mpp) 19910154Sache char *fstypename; 20010154Sache char *devname; 20110154Sache struct mount **mpp; 20210154Sache{ 20310154Sache struct proc *p = curproc; /* XXX */ 20410154Sache struct vfsconf *vfsp; 20510154Sache struct mount *mp; 20610154Sache 20710154Sache for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 20810154Sache if (!strcmp(vfsp->vfc_name, fstypename)) 20910154Sache break; 210941Snate if (vfsp == NULL) 2117767Sache return (ENODEV); 212941Snate mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); 2137767Sache bzero((char *)mp, (u_long)sizeof(struct mount)); 2147767Sache lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0); 21510154Sache (void)vfs_busy(mp, LK_NOWAIT, 0, p); 2167767Sache LIST_INIT(&mp->mnt_vnodelist); 2177767Sache mp->mnt_vfc = vfsp; 2187767Sache mp->mnt_op = vfsp->vfc_vfsops; 2197767Sache mp->mnt_flag = MNT_RDONLY; 2207767Sache mp->mnt_vnodecovered = NULLVP; 2217767Sache vfsp->vfc_refcount++; 2227767Sache mp->mnt_stat.f_type = vfsp->vfc_typenum; 2237767Sache mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 2247767Sache strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 2257767Sache mp->mnt_stat.f_mntonname[0] = '/'; 22610154Sache mp->mnt_stat.f_mntonname[1] = 0; 22711760Sache (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); 22811760Sache *mpp = mp; 22911760Sache return (0); 23011760Sache} 2317767Sache 2327767Sache/* 2337767Sache * Find an appropriate filesystem to use for the root. If a filesystem 2347767Sache * has not been preselected, walk through the list of known filesystems 2357767Sache * trying those that have mountroot routines, and try them until one 2367767Sache * works or we have tried them all. 237941Snate */ 2387767Sache#ifdef notdef /* XXX JH */ 239941Snateint 2407767Sachelite2_vfs_mountroot(void) 241941Snate{ 2427767Sache struct vfsconf *vfsp; 2437767Sache extern int (*lite2_mountroot)(void); 2447767Sache int error; 2457767Sache 2467767Sache if (lite2_mountroot != NULL) 247941Snate return ((*lite2_mountroot)()); 2487767Sache for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 249941Snate if (vfsp->vfc_mountroot == NULL) 2507767Sache continue; 25154158Scharnier if ((error = (*vfsp->vfc_mountroot)()) == 0) 252941Snate return (0); 2537767Sache printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error); 2547767Sache } 255941Snate return (ENODEV); 2567767Sache} 2577767Sache#endif 2587767Sache 259941Snate/* 2607767Sache * Lookup a mount point by filesystem identifier. 2617767Sache */ 2627767Sachestruct mount * 2637767Sachevfs_getvfs(fsid) 2647767Sache fsid_t *fsid; 2657767Sache{ 2667767Sache register struct mount *mp; 267941Snate 26810154Sache simple_lock(&mountlist_slock); 26954158Scharnier for (mp = mountlist.cqh_first; mp != (void *)&mountlist; 270941Snate mp = mp->mnt_list.cqe_next) { 27110154Sache if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 27210154Sache mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { 273941Snate simple_unlock(&mountlist_slock); 27410154Sache return (mp); 27510154Sache } 27610154Sache } 277941Snate simple_unlock(&mountlist_slock); 27810154Sache return ((struct mount *) 0); 27910154Sache} 28054158Scharnier 28110154Sache/* 2827767Sache * Get a new unique fsid 2837767Sache */ 2847767Sachevoid 2857767Sachevfs_getnewfsid(mp) 2867767Sache struct mount *mp; 2877767Sache{ 2887767Sache static u_short xxxfs_mntid; 28954158Scharnier 290941Snate fsid_t tfsid; 2917767Sache int mtype; 29254158Scharnier 293941Snate simple_lock(&mntid_slock); 2947767Sache mtype = mp->mnt_vfc->vfc_typenum; 29554158Scharnier mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0); 296941Snate mp->mnt_stat.f_fsid.val[1] = mtype; 2977767Sache if (xxxfs_mntid == 0) 298941Snate ++xxxfs_mntid; 2998112Sache tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid); 3008112Sache tfsid.val[1] = mtype; 3018112Sache if (mountlist.cqh_first != (void *)&mountlist) { 3028112Sache while (vfs_getvfs(&tfsid)) { 3038112Sache tfsid.val[0]++; 3048112Sache xxxfs_mntid++; 30510154Sache } 3067767Sache } 3077767Sache mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 3087767Sache simple_unlock(&mntid_slock); 309941Snate} 3107767Sache 3117767Sache/* 3127767Sache * Set vnode attributes to VNOVAL 3137767Sache */ 3147767Sachevoid 3157767Sachevattr_null(vap) 316941Snate register struct vattr *vap; 3177767Sache{ 31854158Scharnier 319941Snate vap->va_type = VNON; 3207767Sache vap->va_size = VNOVAL; 3217767Sache vap->va_bytes = VNOVAL; 3227767Sache vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid = 3237767Sache vap->va_fsid = vap->va_fileid = 3247767Sache vap->va_blocksize = vap->va_rdev = 3257767Sache vap->va_atime.tv_sec = vap->va_atime.tv_nsec = 326941Snate vap->va_mtime.tv_sec = vap->va_mtime.tv_nsec = 32710154Sache vap->va_ctime.tv_sec = vap->va_ctime.tv_nsec = 32822873Sdavidn vap->va_flags = vap->va_gen = VNOVAL; 3297767Sache vap->va_vaflags = 0; 33010154Sache} 3317767Sache 3327767Sache/* 3337767Sache * Routines having to do with the management of the vnode table. 334941Snate */ 3357767Sacheextern vop_t **dead_vnodeop_p; 3367767Sache 3377767Sache/* 3387767Sache * Return the next vnode from the free list. 33954158Scharnier */ 3407767Sacheint 34122873Sdavidngetnewvnode(tag, mp, vops, vpp) 34222873Sdavidn enum vtagtype tag; 343941Snate struct mount *mp; 3447767Sache vop_t **vops; 3457767Sache struct vnode **vpp; 3467767Sache{ 347941Snate struct proc *p = curproc; /* XXX */ 3487767Sache struct vnode *vp; 3497767Sache 35054158Scharnier /* 3517767Sache * We take the least recently used vnode from the freelist 3527767Sache * if we can get it and it has no cached pages, and no 3537767Sache * namecache entries are relative to it. 3547767Sache * Otherwise we allocate a new vnode 3557767Sache */ 3567767Sache 357941Snate simple_lock(&vnode_free_list_slock); 3587767Sache 3597767Sache if (freevnodes >= desiredvnodes) { 3607767Sache TAILQ_FOREACH(vp, &vnode_free_list, v_freelist) { 3617767Sache if (!simple_lock_try(&vp->v_interlock)) 3627767Sache continue; 36387208Smarkm if (vp->v_usecount) 3647767Sache panic("free vnode isn't"); 3657767Sache 3667767Sache if (vp->v_object && vp->v_object->resident_page_count) { 36710154Sache /* Don't recycle if it's caching some pages */ 3687767Sache simple_unlock(&vp->v_interlock); 3697767Sache continue; 3707767Sache } else if (LIST_FIRST(&vp->v_cache_src)) { 3717767Sache /* Don't recycle if active in the namecache */ 372941Snate simple_unlock(&vp->v_interlock); 3737767Sache continue; 3747767Sache } else { 3757767Sache break; 3767767Sache } 3777767Sache } 3787767Sache } else { 3797767Sache vp = NULL; 3807767Sache } 3817767Sache 38210154Sache if (vp) { 38310154Sache TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 38410154Sache freevnodes--; 38510154Sache /* see comment on why 0xdeadb is set at end of vgone (below) */ 38610154Sache vp->v_freelist.tqe_prev = (struct vnode **) 0xdeadb; 38710154Sache simple_unlock(&vnode_free_list_slock); 38810154Sache vp->v_lease = NULL; 38910154Sache if (vp->v_type != VBAD) 39010154Sache vgonel(vp, p); 39110154Sache else { 39210154Sache simple_unlock(&vp->v_interlock); 39310154Sache } 3947767Sache 395941Snate#ifdef DIAGNOSTIC 3967767Sache { 3977767Sache int s; 3987767Sache 3997767Sache if (vp->v_data) 40010154Sache panic("cleaned vnode isn't"); 401941Snate s = splbio(); 40210154Sache if (vp->v_numoutput) 4037767Sache panic("Clean vnode has pending I/O's"); 4047767Sache splx(s); 4057767Sache } 4067767Sache#endif 4077767Sache vp->v_flag = 0; 4087767Sache vp->v_lastr = 0; 4097767Sache vp->v_lastw = 0; 4107767Sache vp->v_lasta = 0; 4117767Sache vp->v_cstart = 0; 4127767Sache vp->v_clen = 0; 4137767Sache vp->v_socket = 0; 4147767Sache vp->v_writecount = 0; /* XXX */ 41510154Sache } else { 4167767Sache simple_unlock(&vnode_free_list_slock); 4177767Sache vp = (struct vnode *) malloc((u_long) sizeof *vp, 4187767Sache M_VNODE, M_WAITOK); 4197767Sache bzero((char *) vp, sizeof *vp); 4207767Sache vp->v_dd = vp; 4217767Sache LIST_INIT(&vp->v_cache_src); 4227767Sache TAILQ_INIT(&vp->v_cache_dst); 4237767Sache numvnodes++; 424941Snate } 4257767Sache 4267767Sache vp->v_type = VNON; 427941Snate cache_purge(vp); 4287767Sache vp->v_tag = tag; 4297767Sache vp->v_op = vops; 43054158Scharnier insmntque(vp, mp); 43110154Sache *vpp = vp; 4327767Sache vp->v_usecount = 1; 43354158Scharnier vp->v_data = 0; 434941Snate return (0); 4357767Sache} 436941Snate 4377767Sache/* 4387767Sache * Move a vnode from one mount queue to another. 439941Snate */ 4407767Sachevoid 44154158Scharnierinsmntque(vp, mp) 442941Snate register struct vnode *vp; 4437767Sache register struct mount *mp; 44410154Sache{ 445941Snate 446941Snate simple_lock(&mntvnode_slock); 447941Snate /* 448941Snate * Delete from old mount point vnode list, if on one. 449941Snate */ 45010154Sache if (vp->v_mount != NULL) 4517767Sache LIST_REMOVE(vp, v_mntvnodes); 4527767Sache /* 4537767Sache * Insert into list of vnodes for the new mount point, if available. 4547767Sache */ 4557767Sache if ((vp->v_mount = mp) == NULL) { 4567767Sache simple_unlock(&mntvnode_slock); 4577767Sache return; 4587767Sache } 4597767Sache LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 46010154Sache simple_unlock(&mntvnode_slock); 4617767Sache} 4627767Sache 4637767Sache/* 46440389Smckay * Update outstanding I/O count and do wakeup if requested. 46540389Smckay */ 46640389Smckayvoid 46740389Smckayvwakeup(bp) 468941Snate register struct buf *bp; 4697767Sache{ 470941Snate register struct vnode *vp; 4717767Sache 47254158Scharnier bp->b_flags &= ~B_WRITEINPROG; 473941Snate if ((vp = bp->b_vp)) { 4747767Sache vp->v_numoutput--; 47554158Scharnier if (vp->v_numoutput < 0) 476941Snate panic("vwakeup: neg numoutput"); 47710154Sache if ((vp->v_numoutput == 0) && (vp->v_flag & VBWAIT)) { 4787767Sache vp->v_flag &= ~VBWAIT; 4797767Sache wakeup((caddr_t) &vp->v_numoutput); 4807767Sache } 48154158Scharnier } 48210154Sache} 4837767Sache 4847767Sache/* 4857767Sache * Flush out and invalidate all buffers associated with a vnode. 4867767Sache * Called with the underlying object locked. 4877767Sache */ 4887767Sacheint 4897767Sachevinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 490941Snate register struct vnode *vp; 49110154Sache int flags; 4927767Sache struct ucred *cred; 493941Snate struct proc *p; 4947767Sache int slpflag, slptimeo; 4957767Sache{ 496941Snate register struct buf *bp; 4977767Sache struct buf *nbp, *blist; 4987767Sache int s, error; 49987208Smarkm vm_object_t object; 5007767Sache 5017767Sache if (flags & V_SAVE) { 5027767Sache if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p))) 5037767Sache return (error); 5047767Sache if (vp->v_dirtyblkhd.lh_first != NULL) 505941Snate panic("vinvalbuf: dirty bufs"); 50610154Sache } 50710154Sache 50810154Sache s = splbio(); 50910154Sache for (;;) { 51010154Sache if ((blist = vp->v_cleanblkhd.lh_first) && (flags & V_SAVEMETA)) 51110154Sache while (blist && blist->b_lblkno < 0) 5127767Sache blist = blist->b_vnbufs.le_next; 5137767Sache if (!blist && (blist = vp->v_dirtyblkhd.lh_first) && 514941Snate (flags & V_SAVEMETA)) 515941Snate while (blist && blist->b_lblkno < 0) 516941Snate blist = blist->b_vnbufs.le_next; 51710154Sache if (!blist) 518941Snate break; 5197767Sache 5207767Sache for (bp = blist; bp; bp = nbp) { 5217767Sache nbp = bp->b_vnbufs.le_next; 5227767Sache if ((flags & V_SAVEMETA) && bp->b_lblkno < 0) 52310154Sache continue; 52410154Sache if (bp->b_flags & B_BUSY) { 52510154Sache bp->b_flags |= B_WANTED; 52610154Sache error = tsleep((caddr_t) bp, 52710154Sache slpflag | (PRIBIO + 1), "vinvalbuf", 528941Snate slptimeo); 5297767Sache if (error) { 530941Snate splx(s); 5317767Sache return (error); 53254158Scharnier } 5338874Srgrimes break; 53410154Sache } 53554158Scharnier bremfree(bp); 53610154Sache bp->b_flags |= B_BUSY; 53710154Sache /* 53810154Sache * XXX Since there are no node locks for NFS, I 53910154Sache * believe there is a slight chance that a delayed 54010154Sache * write will occur while sleeping just above, so 54110154Sache * check for it. 54210154Sache */ 54310154Sache if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) { 54410154Sache (void) VOP_BWRITE(bp); 54554158Scharnier break; 54610154Sache } 54710154Sache bp->b_flags |= (B_INVAL|B_NOCACHE|B_RELBUF); 54810154Sache brelse(bp); 5497860Sache } 55010154Sache } 55110154Sache 55210154Sache while (vp->v_numoutput > 0) { 55354158Scharnier vp->v_flag |= VBWAIT; 55454158Scharnier tsleep(&vp->v_numoutput, PVM, "vnvlbv", 0); 55510154Sache } 55610154Sache 55710154Sache splx(s); 55810154Sache 55910154Sache /* 56010154Sache * Destroy the copy in the VM cache, too. 56110154Sache */ 56210154Sache object = vp->v_object; 56310154Sache if (object != NULL) { 56410154Sache vm_object_page_remove(object, 0, object->size, 56510154Sache (flags & V_SAVE) ? TRUE : FALSE); 56610154Sache } 56710154Sache if (!(flags & V_SAVEMETA) && 56810154Sache (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first)) 56910154Sache panic("vinvalbuf: flush failed"); 57010154Sache return (0); 57110154Sache} 57210154Sache 57310154Sache/* 57410154Sache * Associate a buffer with a vnode. 57510154Sache */ 57610154Sachevoid 57710154Sachebgetvp(vp, bp) 57810154Sache register struct vnode *vp; 57954158Scharnier register struct buf *bp; 58010154Sache{ 58110154Sache int s; 58210154Sache 58310154Sache if (bp->b_vp) 58410154Sache panic("bgetvp: not free"); 58510154Sache VHOLD(vp); 58610154Sache bp->b_vp = vp; 58710154Sache if (vp->v_type == VBLK || vp->v_type == VCHR) 58854158Scharnier bp->b_dev = vp->v_rdev; 58954158Scharnier else 59010154Sache bp->b_dev = NODEV; 59110154Sache /* 5927860Sache * Insert onto list for new vnode. 5937767Sache */ 5947767Sache s = splbio(); 595941Snate bufinsvn(bp, &vp->v_cleanblkhd); 596941Snate splx(s); 5977767Sache} 598941Snate 5997767Sache/* 6007767Sache * Disassociate a buffer from a vnode. 6017767Sache */ 6027767Sachevoid 603941Snatebrelvp(bp) 6047767Sache register struct buf *bp; 60587208Smarkm{ 6067767Sache struct vnode *vp; 6077767Sache int s; 608941Snate 6097767Sache if (bp->b_vp == (struct vnode *) 0) 610941Snate panic("brelvp: NULL"); 6117767Sache /* 6127767Sache * Delete from old vnode list, if on one. 6137767Sache */ 6147767Sache s = splbio(); 6157767Sache if (bp->b_vnbufs.le_next != NOLIST) 6167767Sache bufremvn(bp); 617941Snate splx(s); 61882722Skris 61982722Skris vp = bp->b_vp; 6207767Sache bp->b_vp = (struct vnode *) 0; 6217767Sache HOLDRELE(vp); 6227767Sache} 6237767Sache 6247767Sache/* 6257767Sache * Associate a p-buffer with a vnode. 6267767Sache */ 6277767Sachevoid 6287767Sachepbgetvp(vp, bp) 6297767Sache register struct vnode *vp; 6307767Sache register struct buf *bp; 6317767Sache{ 6327767Sache#if defined(DIAGNOSTIC) 6337767Sache if (bp->b_vp) 634941Snate panic("pbgetvp: not free"); 6357767Sache#endif 6367767Sache bp->b_vp = vp; 6377767Sache if (vp->v_type == VBLK || vp->v_type == VCHR) 63824360Simp bp->b_dev = vp->v_rdev; 6397767Sache else 6407767Sache bp->b_dev = NODEV; 6417767Sache} 6427767Sache 643941Snate/* 6447767Sache * Disassociate a p-buffer from a vnode. 6457767Sache */ 6467767Sachevoid 647941Snatepbrelvp(bp) 6487767Sache register struct buf *bp; 6497767Sache{ 6507767Sache struct vnode *vp; 65110154Sache 6527767Sache#if defined(DIAGNOSTIC) 6537767Sache if (bp->b_vp == (struct vnode *) 0) 6547767Sache panic("pbrelvp: NULL"); 655941Snate#endif 6567767Sache 6577767Sache bp->b_vp = (struct vnode *) 0; 6587767Sache} 659941Snate 6607767Sache/* 6617767Sache * Reassign a buffer from one vnode to another. 662941Snate * Used to assign file specific control information 6637767Sache * (indirect blocks) to the vnode to which they belong. 6647767Sache */ 6657767Sachevoid 666941Snatereassignbuf(bp, newvp) 6677767Sache register struct buf *bp; 6687767Sache register struct vnode *newvp; 6697767Sache{ 670941Snate int s; 6717767Sache 6727767Sache if (newvp == NULL) { 6737767Sache printf("reassignbuf: NULL"); 674941Snate return; 6757767Sache } 6767767Sache 6777767Sache s = splbio(); 678941Snate /* 6797767Sache * Delete from old vnode list, if on one. 6807767Sache */ 6817767Sache if (bp->b_vnbufs.le_next != NOLIST) 682941Snate bufremvn(bp); 6837767Sache /* 6847767Sache * If dirty, put on list of dirty buffers; otherwise insert onto list 6857767Sache * of clean buffers. 686941Snate */ 6877767Sache if (bp->b_flags & B_DELWRI) { 6887767Sache struct buf *tbp; 6897767Sache 690941Snate tbp = newvp->v_dirtyblkhd.lh_first; 69110154Sache if (!tbp || (tbp->b_lblkno > bp->b_lblkno)) { 69210154Sache bufinsvn(bp, &newvp->v_dirtyblkhd); 69310154Sache } else { 69410154Sache while (tbp->b_vnbufs.le_next && 69510154Sache (tbp->b_vnbufs.le_next->b_lblkno < bp->b_lblkno)) { 6967767Sache tbp = tbp->b_vnbufs.le_next; 6977767Sache } 6987767Sache LIST_INSERT_AFTER(tbp, bp, b_vnbufs); 6997767Sache } 7007767Sache } else { 7017767Sache bufinsvn(bp, &newvp->v_cleanblkhd); 702941Snate } 7037767Sache splx(s); 70482722Skris} 70582722Skris 70682722Skris#ifndef DEVFS_ROOT 707941Snate/* 7087767Sache * Create a vnode for a block device. 7097767Sache * Used for root filesystem, argdev, and swap areas. 7107767Sache * Also used for memory file system special devices. 71154158Scharnier */ 7127767Sacheint 7137767Sachebdevvp(dev, vpp) 714941Snate dev_t dev; 7157767Sache struct vnode **vpp; 7167767Sache{ 7177767Sache register struct vnode *vp; 7187767Sache struct vnode *nvp; 7197767Sache int error; 7207767Sache 7217767Sache if (dev == NODEV) 7227767Sache return (0); 7237767Sache error = getnewvnode(VT_NON, (struct mount *) 0, spec_vnodeop_p, &nvp); 72410154Sache if (error) { 7257767Sache *vpp = 0; 7267767Sache return (error); 72710154Sache } 72810154Sache vp = nvp; 72910154Sache vp->v_type = VBLK; 73010154Sache if ((nvp = checkalias(vp, dev, (struct mount *) 0))) { 73110154Sache vput(vp); 7327767Sache vp = nvp; 7337767Sache } 7347767Sache *vpp = vp; 7357767Sache return (0); 7367767Sache} 7377767Sache#endif /* !DEVFS_ROOT */ 738941Snate 7397767Sache/* 7407767Sache * Check to see if the new vnode represents a special device 7417767Sache * for which we already have a vnode (either because of 7427767Sache * bdevvp() or because of a different vnode representing 7437767Sache * the same block device). If such an alias exists, deallocate 7447767Sache * the existing contents and return the aliased vnode. The 7457767Sache * caller is responsible for filling it with its new contents. 7467767Sache */ 7477767Sachestruct vnode * 7487767Sachecheckalias(nvp, nvp_rdev, mp) 7497767Sache register struct vnode *nvp; 7507767Sache dev_t nvp_rdev; 7517767Sache struct mount *mp; 75210154Sache{ 7537767Sache struct proc *p = curproc; /* XXX */ 7547767Sache struct vnode *vp; 7557767Sache struct vnode **vpp; 7567767Sache 7577767Sache if (nvp->v_type != VBLK && nvp->v_type != VCHR) 7587767Sache return (NULLVP); 7597767Sache 7607767Sache vpp = &speclisth[SPECHASH(nvp_rdev)]; 7617767Sacheloop: 7627767Sache simple_lock(&spechash_slock); 76354158Scharnier for (vp = *vpp; vp; vp = vp->v_specnext) { 7647767Sache if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) 7657767Sache continue; 7667767Sache /* 767941Snate * Alias, but not in use, so flush it out. 768 */ 769 simple_lock(&vp->v_interlock); 770 if (vp->v_usecount == 0) { 771 simple_unlock(&spechash_slock); 772 vgonel(vp, p); 773 goto loop; 774 } 775 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) { 776 simple_unlock(&spechash_slock); 777 goto loop; 778 } 779 break; 780 } 781 if (vp == NULL || vp->v_tag != VT_NON) { 782 MALLOC(nvp->v_specinfo, struct specinfo *, 783 sizeof(struct specinfo), M_VNODE, M_WAITOK); 784 nvp->v_rdev = nvp_rdev; 785 nvp->v_hashchain = vpp; 786 nvp->v_specnext = *vpp; 787 nvp->v_specflags = 0; 788 simple_unlock(&spechash_slock); 789 *vpp = nvp; 790 if (vp != NULLVP) { 791 nvp->v_flag |= VALIASED; 792 vp->v_flag |= VALIASED; 793 vput(vp); 794 } 795 return (NULLVP); 796 } 797 simple_unlock(&spechash_slock); 798 VOP_UNLOCK(vp, 0, p); 799 simple_lock(&vp->v_interlock); 800 vclean(vp, 0, p); 801 vp->v_op = nvp->v_op; 802 vp->v_tag = nvp->v_tag; 803 nvp->v_type = VNON; 804 insmntque(vp, mp); 805 return (vp); 806} 807 808/* 809 * Grab a particular vnode from the free list, increment its 810 * reference count and lock it. The vnode lock bit is set the 811 * vnode is being eliminated in vgone. The process is awakened 812 * when the transition is completed, and an error returned to 813 * indicate that the vnode is no longer usable (possibly having 814 * been changed to a new file system type). 815 */ 816int 817vget(vp, flags, p) 818 register struct vnode *vp; 819 int flags; 820 struct proc *p; 821{ 822 int error; 823 824 /* 825 * If the vnode is in the process of being cleaned out for 826 * another use, we wait for the cleaning to finish and then 827 * return failure. Cleaning is determined by checking that 828 * the VXLOCK flag is set. 829 */ 830 if ((flags & LK_INTERLOCK) == 0) { 831 simple_lock(&vp->v_interlock); 832 } 833 if (vp->v_flag & VXLOCK) { 834 vp->v_flag |= VXWANT; 835 simple_unlock(&vp->v_interlock); 836 tsleep((caddr_t)vp, PINOD, "vget", 0); 837 return (ENOENT); 838 } 839 if (vp->v_usecount == 0) { 840 simple_lock(&vnode_free_list_slock); 841 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 842 simple_unlock(&vnode_free_list_slock); 843 freevnodes--; 844 } 845 vp->v_usecount++; 846 /* 847 * Create the VM object, if needed 848 */ 849 if ((vp->v_type == VREG) && 850 ((vp->v_object == NULL) || 851 (vp->v_object->flags & OBJ_VFS_REF) == 0 || 852 (vp->v_object->flags & OBJ_DEAD))) { 853 /* 854 * XXX vfs_object_create probably needs the interlock. 855 */ 856 simple_unlock(&vp->v_interlock); 857 vfs_object_create(vp, curproc, curproc->p_ucred, 0); 858 simple_lock(&vp->v_interlock); 859 } 860 if (flags & LK_TYPE_MASK) { 861 if (error = vn_lock(vp, flags | LK_INTERLOCK, p)) 862 vrele(vp); 863 return (error); 864 } 865 simple_unlock(&vp->v_interlock); 866 return (0); 867} 868 869/* 870 * Stubs to use when there is no locking to be done on the underlying object. 871 * A minimal shared lock is necessary to ensure that the underlying object 872 * is not revoked while an operation is in progress. So, an active shared 873 * count is maintained in an auxillary vnode lock structure. 874 */ 875int 876vop_sharedlock(ap) 877 struct vop_lock_args /* { 878 struct vnode *a_vp; 879 int a_flags; 880 struct proc *a_p; 881 } */ *ap; 882{ 883 /* 884 * This code cannot be used until all the non-locking filesystems 885 * (notably NFS) are converted to properly lock and release nodes. 886 * Also, certain vnode operations change the locking state within 887 * the operation (create, mknod, remove, link, rename, mkdir, rmdir, 888 * and symlink). Ideally these operations should not change the 889 * lock state, but should be changed to let the caller of the 890 * function unlock them. Otherwise all intermediate vnode layers 891 * (such as union, umapfs, etc) must catch these functions to do 892 * the necessary locking at their layer. Note that the inactive 893 * and lookup operations also change their lock state, but this 894 * cannot be avoided, so these two operations will always need 895 * to be handled in intermediate layers. 896 */ 897 struct vnode *vp = ap->a_vp; 898 int vnflags, flags = ap->a_flags; 899 900 if (vp->v_vnlock == NULL) { 901 if ((flags & LK_TYPE_MASK) == LK_DRAIN) 902 return (0); 903 MALLOC(vp->v_vnlock, struct lock *, sizeof(struct lock), 904 M_VNODE, M_WAITOK); 905 lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0); 906 } 907 switch (flags & LK_TYPE_MASK) { 908 case LK_DRAIN: 909 vnflags = LK_DRAIN; 910 break; 911 case LK_EXCLUSIVE: 912#ifdef DEBUG_VFS_LOCKS 913 /* 914 * Normally, we use shared locks here, but that confuses 915 * the locking assertions. 916 */ 917 vnflags = LK_EXCLUSIVE; 918 break; 919#endif 920 case LK_SHARED: 921 vnflags = LK_SHARED; 922 break; 923 case LK_UPGRADE: 924 case LK_EXCLUPGRADE: 925 case LK_DOWNGRADE: 926 return (0); 927 case LK_RELEASE: 928 default: 929 panic("vop_nolock: bad operation %d", flags & LK_TYPE_MASK); 930 } 931 if (flags & LK_INTERLOCK) 932 vnflags |= LK_INTERLOCK; 933 return(lockmgr(vp->v_vnlock, vnflags, &vp->v_interlock, ap->a_p)); 934} 935 936/* 937 * Stubs to use when there is no locking to be done on the underlying object. 938 * A minimal shared lock is necessary to ensure that the underlying object 939 * is not revoked while an operation is in progress. So, an active shared 940 * count is maintained in an auxillary vnode lock structure. 941 */ 942int 943vop_nolock(ap) 944 struct vop_lock_args /* { 945 struct vnode *a_vp; 946 int a_flags; 947 struct proc *a_p; 948 } */ *ap; 949{ 950#ifdef notyet 951 /* 952 * This code cannot be used until all the non-locking filesystems 953 * (notably NFS) are converted to properly lock and release nodes. 954 * Also, certain vnode operations change the locking state within 955 * the operation (create, mknod, remove, link, rename, mkdir, rmdir, 956 * and symlink). Ideally these operations should not change the 957 * lock state, but should be changed to let the caller of the 958 * function unlock them. Otherwise all intermediate vnode layers 959 * (such as union, umapfs, etc) must catch these functions to do 960 * the necessary locking at their layer. Note that the inactive 961 * and lookup operations also change their lock state, but this 962 * cannot be avoided, so these two operations will always need 963 * to be handled in intermediate layers. 964 */ 965 struct vnode *vp = ap->a_vp; 966 int vnflags, flags = ap->a_flags; 967 968 if (vp->v_vnlock == NULL) { 969 if ((flags & LK_TYPE_MASK) == LK_DRAIN) 970 return (0); 971 MALLOC(vp->v_vnlock, struct lock *, sizeof(struct lock), 972 M_VNODE, M_WAITOK); 973 lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0); 974 } 975 switch (flags & LK_TYPE_MASK) { 976 case LK_DRAIN: 977 vnflags = LK_DRAIN; 978 break; 979 case LK_EXCLUSIVE: 980 case LK_SHARED: 981 vnflags = LK_SHARED; 982 break; 983 case LK_UPGRADE: 984 case LK_EXCLUPGRADE: 985 case LK_DOWNGRADE: 986 return (0); 987 case LK_RELEASE: 988 default: 989 panic("vop_nolock: bad operation %d", flags & LK_TYPE_MASK); 990 } 991 if (flags & LK_INTERLOCK) 992 vnflags |= LK_INTERLOCK; 993 return(lockmgr(vp->v_vnlock, vnflags, &vp->v_interlock, ap->a_p)); 994#else /* for now */ 995 /* 996 * Since we are not using the lock manager, we must clear 997 * the interlock here. 998 */ 999 if (ap->a_flags & LK_INTERLOCK) { 1000 simple_unlock(&ap->a_vp->v_interlock); 1001 } 1002 return (0); 1003#endif 1004} 1005 1006/* 1007 * Do the inverse of vop_nolock, handling the interlock in a compatible way. 1008 */ 1009int 1010vop_nounlock(ap) 1011 struct vop_unlock_args /* { 1012 struct vnode *a_vp; 1013 int a_flags; 1014 struct proc *a_p; 1015 } */ *ap; 1016{ 1017 struct vnode *vp = ap->a_vp; 1018 1019 if (vp->v_vnlock == NULL) { 1020 if (ap->a_flags & LK_INTERLOCK) 1021 simple_unlock(&ap->a_vp->v_interlock); 1022 return (0); 1023 } 1024 return (lockmgr(vp->v_vnlock, LK_RELEASE | ap->a_flags, 1025 &ap->a_vp->v_interlock, ap->a_p)); 1026} 1027 1028/* 1029 * Return whether or not the node is in use. 1030 */ 1031int 1032vop_noislocked(ap) 1033 struct vop_islocked_args /* { 1034 struct vnode *a_vp; 1035 } */ *ap; 1036{ 1037 struct vnode *vp = ap->a_vp; 1038 1039 if (vp->v_vnlock == NULL) 1040 return (0); 1041 return (lockstatus(vp->v_vnlock)); 1042} 1043 1044/* #ifdef DIAGNOSTIC */ 1045/* 1046 * Vnode reference, just increment the count 1047 */ 1048void 1049vref(vp) 1050 struct vnode *vp; 1051{ 1052 simple_lock(&vp->v_interlock); 1053 if (vp->v_usecount <= 0) 1054 panic("vref used where vget required"); 1055 1056 vp->v_usecount++; 1057 1058 if ((vp->v_type == VREG) && 1059 ((vp->v_object == NULL) || 1060 ((vp->v_object->flags & OBJ_VFS_REF) == 0) || 1061 (vp->v_object->flags & OBJ_DEAD))) { 1062 /* 1063 * We need to lock to VP during the time that 1064 * the object is created. This is necessary to 1065 * keep the system from re-entrantly doing it 1066 * multiple times. 1067 * XXX vfs_object_create probably needs the interlock? 1068 */ 1069 simple_unlock(&vp->v_interlock); 1070 vfs_object_create(vp, curproc, curproc->p_ucred, 0); 1071 return; 1072 } 1073 simple_unlock(&vp->v_interlock); 1074} 1075 1076/* 1077 * Vnode put/release. 1078 * If count drops to zero, call inactive routine and return to freelist. 1079 */ 1080static void 1081vputrele(vp, put) 1082 struct vnode *vp; 1083 int put; 1084{ 1085 struct proc *p = curproc; /* XXX */ 1086 1087#ifdef DIAGNOSTIC 1088 if (vp == NULL) 1089 panic("vputrele: null vp"); 1090#endif 1091 simple_lock(&vp->v_interlock); 1092 1093 if ((vp->v_usecount == 2) && 1094 vp->v_object && 1095 (vp->v_object->flags & OBJ_VFS_REF)) { 1096 vp->v_usecount--; 1097 vp->v_object->flags &= ~OBJ_VFS_REF; 1098 if (put) { 1099 VOP_UNLOCK(vp, LK_INTERLOCK, p); 1100 } else { 1101 simple_unlock(&vp->v_interlock); 1102 } 1103 vm_object_deallocate(vp->v_object); 1104 return; 1105 } 1106 1107 if (vp->v_usecount > 1) { 1108 vp->v_usecount--; 1109 if (put) { 1110 VOP_UNLOCK(vp, LK_INTERLOCK, p); 1111 } else { 1112 simple_unlock(&vp->v_interlock); 1113 } 1114 return; 1115 } 1116 1117 if (vp->v_usecount < 1) { 1118#ifdef DIAGNOSTIC 1119 vprint("vputrele: negative ref count", vp); 1120#endif 1121 panic("vputrele: negative ref cnt"); 1122 } 1123 1124 /* 1125 * If we are doing a vput, the node is already locked, and we must 1126 * call VOP_INACTIVE with the node locked. So, in the case of 1127 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE. 1128 */ 1129 if (put) { 1130 simple_unlock(&vp->v_interlock); 1131 VOP_INACTIVE(vp, p); 1132 } else if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) { 1133 VOP_INACTIVE(vp, p); 1134 } 1135 1136 vp->v_usecount--; 1137 simple_lock(&vnode_free_list_slock); 1138 if (vp->v_flag & VAGE) { 1139 vp->v_flag &= ~VAGE; 1140 if(vp->v_tag != VT_TFS) 1141 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 1142 } else { 1143 if(vp->v_tag != VT_TFS) 1144 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1145 } 1146 freevnodes++; 1147 simple_unlock(&vnode_free_list_slock); 1148} 1149 1150/* 1151 * vput(), just unlock and vrele() 1152 */ 1153void 1154vput(vp) 1155 struct vnode *vp; 1156{ 1157 vputrele(vp, 1); 1158} 1159 1160void 1161vrele(vp) 1162 struct vnode *vp; 1163{ 1164 vputrele(vp, 0); 1165} 1166 1167#ifdef DIAGNOSTIC 1168/* 1169 * Page or buffer structure gets a reference. 1170 */ 1171void 1172vhold(vp) 1173 register struct vnode *vp; 1174{ 1175 1176 simple_lock(&vp->v_interlock); 1177 vp->v_holdcnt++; 1178 simple_unlock(&vp->v_interlock); 1179} 1180 1181/* 1182 * Page or buffer structure frees a reference. 1183 */ 1184void 1185holdrele(vp) 1186 register struct vnode *vp; 1187{ 1188 1189 simple_lock(&vp->v_interlock); 1190 if (vp->v_holdcnt <= 0) 1191 panic("holdrele: holdcnt"); 1192 vp->v_holdcnt--; 1193 simple_unlock(&vp->v_interlock); 1194} 1195#endif /* DIAGNOSTIC */ 1196 1197/* 1198 * Remove any vnodes in the vnode table belonging to mount point mp. 1199 * 1200 * If MNT_NOFORCE is specified, there should not be any active ones, 1201 * return error if any are found (nb: this is a user error, not a 1202 * system error). If MNT_FORCE is specified, detach any active vnodes 1203 * that are found. 1204 */ 1205#ifdef DIAGNOSTIC 1206static int busyprt = 0; /* print out busy vnodes */ 1207SYSCTL_INT(_debug, OID_AUTO, busyprt, CTLFLAG_RW, &busyprt, 0, ""); 1208#endif 1209 1210int 1211vflush(mp, skipvp, flags) 1212 struct mount *mp; 1213 struct vnode *skipvp; 1214 int flags; 1215{ 1216 struct proc *p = curproc; /* XXX */ 1217 struct vnode *vp, *nvp; 1218 int busy = 0; 1219 1220 simple_lock(&mntvnode_slock); 1221loop: 1222 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { 1223 /* 1224 * Make sure this vnode wasn't reclaimed in getnewvnode(). 1225 * Start over if it has (it won't be on the list anymore). 1226 */ 1227 if (vp->v_mount != mp) 1228 goto loop; 1229 nvp = vp->v_mntvnodes.le_next; 1230 /* 1231 * Skip over a selected vnode. 1232 */ 1233 if (vp == skipvp) 1234 continue; 1235 1236 simple_lock(&vp->v_interlock); 1237 /* 1238 * Skip over a vnodes marked VSYSTEM. 1239 */ 1240 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { 1241 simple_unlock(&vp->v_interlock); 1242 continue; 1243 } 1244 /* 1245 * If WRITECLOSE is set, only flush out regular file vnodes 1246 * open for writing. 1247 */ 1248 if ((flags & WRITECLOSE) && 1249 (vp->v_writecount == 0 || vp->v_type != VREG)) { 1250 simple_unlock(&vp->v_interlock); 1251 continue; 1252 } 1253 1254 /* 1255 * With v_usecount == 0, all we need to do is clear out the 1256 * vnode data structures and we are done. 1257 */ 1258 if (vp->v_usecount == 0) { 1259 simple_unlock(&mntvnode_slock); 1260 vgonel(vp, p); 1261 simple_lock(&mntvnode_slock); 1262 continue; 1263 } 1264 1265 /* 1266 * If FORCECLOSE is set, forcibly close the vnode. For block 1267 * or character devices, revert to an anonymous device. For 1268 * all other files, just kill them. 1269 */ 1270 if (flags & FORCECLOSE) { 1271 simple_unlock(&mntvnode_slock); 1272 if (vp->v_type != VBLK && vp->v_type != VCHR) { 1273 vgonel(vp, p); 1274 } else { 1275 vclean(vp, 0, p); 1276 vp->v_op = spec_vnodeop_p; 1277 insmntque(vp, (struct mount *) 0); 1278 } 1279 simple_lock(&mntvnode_slock); 1280 continue; 1281 } 1282#ifdef DIAGNOSTIC 1283 if (busyprt) 1284 vprint("vflush: busy vnode", vp); 1285#endif 1286 simple_unlock(&vp->v_interlock); 1287 busy++; 1288 } 1289 simple_unlock(&mntvnode_slock); 1290 if (busy) 1291 return (EBUSY); 1292 return (0); 1293} 1294 1295/* 1296 * Disassociate the underlying file system from a vnode. 1297 */ 1298static void 1299vclean(struct vnode *vp, int flags, struct proc *p) 1300{ 1301 int active, irefed; 1302 vm_object_t object; 1303 1304 /* 1305 * Check to see if the vnode is in use. If so we have to reference it 1306 * before we clean it out so that its count cannot fall to zero and 1307 * generate a race against ourselves to recycle it. 1308 */ 1309 if ((active = vp->v_usecount)) 1310 vp->v_usecount++; 1311 /* 1312 * Prevent the vnode from being recycled or brought into use while we 1313 * clean it out. 1314 */ 1315 if (vp->v_flag & VXLOCK) 1316 panic("vclean: deadlock"); 1317 vp->v_flag |= VXLOCK; 1318 /* 1319 * Even if the count is zero, the VOP_INACTIVE routine may still 1320 * have the object locked while it cleans it out. The VOP_LOCK 1321 * ensures that the VOP_INACTIVE routine is done with its work. 1322 * For active vnodes, it ensures that no other activity can 1323 * occur while the underlying object is being cleaned out. 1324 */ 1325 VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p); 1326 1327 object = vp->v_object; 1328 irefed = 0; 1329 if (object && ((object->flags & OBJ_DEAD) == 0)) { 1330 if (object->ref_count == 0) { 1331 vm_object_reference(object); 1332 irefed = 1; 1333 } 1334 ++object->ref_count; 1335 pager_cache(object, FALSE); 1336 } 1337 1338 /* 1339 * Clean out any buffers associated with the vnode. 1340 */ 1341 if (flags & DOCLOSE) 1342 vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0); 1343 1344 if (irefed) { 1345 vm_object_deallocate(object); 1346 } 1347 1348 /* 1349 * If purging an active vnode, it must be closed and 1350 * deactivated before being reclaimed. Note that the 1351 * VOP_INACTIVE will unlock the vnode. 1352 */ 1353 if (active) { 1354 if (flags & DOCLOSE) 1355 VOP_CLOSE(vp, IO_NDELAY, NOCRED, p); 1356 VOP_INACTIVE(vp, p); 1357 } else { 1358 /* 1359 * Any other processes trying to obtain this lock must first 1360 * wait for VXLOCK to clear, then call the new lock operation. 1361 */ 1362 VOP_UNLOCK(vp, 0, p); 1363 } 1364 /* 1365 * Reclaim the vnode. 1366 */ 1367 if (VOP_RECLAIM(vp, p)) 1368 panic("vclean: cannot reclaim"); 1369 if (active) 1370 vrele(vp); 1371 cache_purge(vp); 1372 if (vp->v_vnlock) { 1373#ifdef DIAGNOSTIC 1374 if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0) 1375 vprint("vclean: lock not drained", vp); 1376#endif 1377 FREE(vp->v_vnlock, M_VNODE); 1378 vp->v_vnlock = NULL; 1379 } 1380 1381 /* 1382 * Done with purge, notify sleepers of the grim news. 1383 */ 1384 vp->v_op = dead_vnodeop_p; 1385 vp->v_tag = VT_NON; 1386 vp->v_flag &= ~VXLOCK; 1387 if (vp->v_flag & VXWANT) { 1388 vp->v_flag &= ~VXWANT; 1389 wakeup((caddr_t) vp); 1390 } 1391} 1392 1393/* 1394 * Eliminate all activity associated with the requested vnode 1395 * and with all vnodes aliased to the requested vnode. 1396 */ 1397int 1398vop_revoke(ap) 1399 struct vop_revoke_args /* { 1400 struct vnode *a_vp; 1401 int a_flags; 1402 } */ *ap; 1403{ 1404 struct vnode *vp, *vq; 1405 struct proc *p = curproc; /* XXX */ 1406 1407#ifdef DIAGNOSTIC 1408 if ((ap->a_flags & REVOKEALL) == 0) 1409 panic("vop_revoke"); 1410#endif 1411 1412 vp = ap->a_vp; 1413 simple_lock(&vp->v_interlock); 1414 1415 if (vp->v_flag & VALIASED) { 1416 /* 1417 * If a vgone (or vclean) is already in progress, 1418 * wait until it is done and return. 1419 */ 1420 if (vp->v_flag & VXLOCK) { 1421 vp->v_flag |= VXWANT; 1422 simple_unlock(&vp->v_interlock); 1423 tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0); 1424 return (0); 1425 } 1426 /* 1427 * Ensure that vp will not be vgone'd while we 1428 * are eliminating its aliases. 1429 */ 1430 vp->v_flag |= VXLOCK; 1431 simple_unlock(&vp->v_interlock); 1432 while (vp->v_flag & VALIASED) { 1433 simple_lock(&spechash_slock); 1434 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1435 if (vq->v_rdev != vp->v_rdev || 1436 vq->v_type != vp->v_type || vp == vq) 1437 continue; 1438 simple_unlock(&spechash_slock); 1439 vgone(vq); 1440 break; 1441 } 1442 if (vq == NULLVP) { 1443 simple_unlock(&spechash_slock); 1444 } 1445 } 1446 /* 1447 * Remove the lock so that vgone below will 1448 * really eliminate the vnode after which time 1449 * vgone will awaken any sleepers. 1450 */ 1451 simple_lock(&vp->v_interlock); 1452 vp->v_flag &= ~VXLOCK; 1453 } 1454 vgonel(vp, p); 1455 return (0); 1456} 1457 1458/* 1459 * Recycle an unused vnode to the front of the free list. 1460 * Release the passed interlock if the vnode will be recycled. 1461 */ 1462int 1463vrecycle(vp, inter_lkp, p) 1464 struct vnode *vp; 1465 struct simplelock *inter_lkp; 1466 struct proc *p; 1467{ 1468 1469 simple_lock(&vp->v_interlock); 1470 if (vp->v_usecount == 0) { 1471 if (inter_lkp) { 1472 simple_unlock(inter_lkp); 1473 } 1474 vgonel(vp, p); 1475 return (1); 1476 } 1477 simple_unlock(&vp->v_interlock); 1478 return (0); 1479} 1480 1481/* 1482 * Eliminate all activity associated with a vnode 1483 * in preparation for reuse. 1484 */ 1485void 1486vgone(vp) 1487 register struct vnode *vp; 1488{ 1489 struct proc *p = curproc; /* XXX */ 1490 1491 simple_lock(&vp->v_interlock); 1492 vgonel(vp, p); 1493} 1494 1495/* 1496 * vgone, with the vp interlock held. 1497 */ 1498static void 1499vgonel(vp, p) 1500 struct vnode *vp; 1501 struct proc *p; 1502{ 1503 struct vnode *vq; 1504 struct vnode *vx; 1505 1506 /* 1507 * If a vgone (or vclean) is already in progress, 1508 * wait until it is done and return. 1509 */ 1510 if (vp->v_flag & VXLOCK) { 1511 vp->v_flag |= VXWANT; 1512 simple_unlock(&vp->v_interlock); 1513 tsleep((caddr_t)vp, PINOD, "vgone", 0); 1514 return; 1515 } 1516 1517 if (vp->v_object) { 1518 vp->v_object->flags |= OBJ_VNODE_GONE; 1519 } 1520 1521 /* 1522 * Clean out the filesystem specific data. 1523 */ 1524 vclean(vp, DOCLOSE, p); 1525 /* 1526 * Delete from old mount point vnode list, if on one. 1527 */ 1528 if (vp->v_mount != NULL) 1529 insmntque(vp, (struct mount *)0); 1530 /* 1531 * If special device, remove it from special device alias list 1532 * if it is on one. 1533 */ 1534 if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) { 1535 simple_lock(&spechash_slock); 1536 if (*vp->v_hashchain == vp) { 1537 *vp->v_hashchain = vp->v_specnext; 1538 } else { 1539 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1540 if (vq->v_specnext != vp) 1541 continue; 1542 vq->v_specnext = vp->v_specnext; 1543 break; 1544 } 1545 if (vq == NULL) 1546 panic("missing bdev"); 1547 } 1548 if (vp->v_flag & VALIASED) { 1549 vx = NULL; 1550 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1551 if (vq->v_rdev != vp->v_rdev || 1552 vq->v_type != vp->v_type) 1553 continue; 1554 if (vx) 1555 break; 1556 vx = vq; 1557 } 1558 if (vx == NULL) 1559 panic("missing alias"); 1560 if (vq == NULL) 1561 vx->v_flag &= ~VALIASED; 1562 vp->v_flag &= ~VALIASED; 1563 } 1564 simple_unlock(&spechash_slock); 1565 FREE(vp->v_specinfo, M_VNODE); 1566 vp->v_specinfo = NULL; 1567 } 1568 1569 /* 1570 * If it is on the freelist and not already at the head, 1571 * move it to the head of the list. The test of the back 1572 * pointer and the reference count of zero is because 1573 * it will be removed from the free list by getnewvnode, 1574 * but will not have its reference count incremented until 1575 * after calling vgone. If the reference count were 1576 * incremented first, vgone would (incorrectly) try to 1577 * close the previous instance of the underlying object. 1578 * So, the back pointer is explicitly set to `0xdeadb' in 1579 * getnewvnode after removing it from the freelist to ensure 1580 * that we do not try to move it here. 1581 */ 1582 if (vp->v_usecount == 0) { 1583 simple_lock(&vnode_free_list_slock); 1584 if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) && 1585 vnode_free_list.tqh_first != vp) { 1586 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1587 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 1588 } 1589 simple_unlock(&vnode_free_list_slock); 1590 } 1591 1592 vp->v_type = VBAD; 1593} 1594 1595/* 1596 * Lookup a vnode by device number. 1597 */ 1598int 1599vfinddev(dev, type, vpp) 1600 dev_t dev; 1601 enum vtype type; 1602 struct vnode **vpp; 1603{ 1604 register struct vnode *vp; 1605 int rc = 0; 1606 1607 simple_lock(&spechash_slock); 1608 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 1609 if (dev != vp->v_rdev || type != vp->v_type) 1610 continue; 1611 *vpp = vp; 1612 rc = 1; 1613 break; 1614 } 1615 simple_unlock(&spechash_slock); 1616 return (rc); 1617} 1618 1619/* 1620 * Calculate the total number of references to a special device. 1621 */ 1622int 1623vcount(vp) 1624 register struct vnode *vp; 1625{ 1626 struct vnode *vq, *vnext; 1627 int count; 1628 1629loop: 1630 if ((vp->v_flag & VALIASED) == 0) 1631 return (vp->v_usecount); 1632 simple_lock(&spechash_slock); 1633 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { 1634 vnext = vq->v_specnext; 1635 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 1636 continue; 1637 /* 1638 * Alias, but not in use, so flush it out. 1639 */ 1640 if (vq->v_usecount == 0 && vq != vp) { 1641 simple_unlock(&spechash_slock); 1642 vgone(vq); 1643 goto loop; 1644 } 1645 count += vq->v_usecount; 1646 } 1647 simple_unlock(&spechash_slock); 1648 return (count); 1649} 1650 1651/* 1652 * Print out a description of a vnode. 1653 */ 1654static char *typename[] = 1655{"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"}; 1656 1657void 1658vprint(label, vp) 1659 char *label; 1660 register struct vnode *vp; 1661{ 1662 char buf[64]; 1663 1664 if (label != NULL) 1665 printf("%s: %x: ", label, vp); 1666 else 1667 printf("%x: ", vp); 1668 printf("type %s, usecount %d, writecount %d, refcount %ld,", 1669 typename[vp->v_type], vp->v_usecount, vp->v_writecount, 1670 vp->v_holdcnt); 1671 buf[0] = '\0'; 1672 if (vp->v_flag & VROOT) 1673 strcat(buf, "|VROOT"); 1674 if (vp->v_flag & VTEXT) 1675 strcat(buf, "|VTEXT"); 1676 if (vp->v_flag & VSYSTEM) 1677 strcat(buf, "|VSYSTEM"); 1678 if (vp->v_flag & VXLOCK) 1679 strcat(buf, "|VXLOCK"); 1680 if (vp->v_flag & VXWANT) 1681 strcat(buf, "|VXWANT"); 1682 if (vp->v_flag & VBWAIT) 1683 strcat(buf, "|VBWAIT"); 1684 if (vp->v_flag & VALIASED) 1685 strcat(buf, "|VALIASED"); 1686 if (buf[0] != '\0') 1687 printf(" flags (%s)", &buf[1]); 1688 if (vp->v_data == NULL) { 1689 printf("\n"); 1690 } else { 1691 printf("\n\t"); 1692 VOP_PRINT(vp); 1693 } 1694} 1695 1696#ifdef DDB 1697/* 1698 * List all of the locked vnodes in the system. 1699 * Called when debugging the kernel. 1700 */ 1701void 1702printlockedvnodes() 1703{ 1704 struct proc *p = curproc; /* XXX */ 1705 struct mount *mp, *nmp; 1706 struct vnode *vp; 1707 1708 printf("Locked vnodes\n"); 1709 simple_lock(&mountlist_slock); 1710 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 1711 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { 1712 nmp = mp->mnt_list.cqe_next; 1713 continue; 1714 } 1715 for (vp = mp->mnt_vnodelist.lh_first; 1716 vp != NULL; 1717 vp = vp->v_mntvnodes.le_next) { 1718 if (VOP_ISLOCKED(vp)) 1719 vprint((char *)0, vp); 1720 } 1721 simple_lock(&mountlist_slock); 1722 nmp = mp->mnt_list.cqe_next; 1723 vfs_unbusy(mp, p); 1724 } 1725 simple_unlock(&mountlist_slock); 1726} 1727#endif 1728 1729/* 1730 * Top level filesystem related information gathering. 1731 */ 1732static int sysctl_ovfs_conf __P(SYSCTL_HANDLER_ARGS); 1733 1734static int 1735vfs_sysctl SYSCTL_HANDLER_ARGS 1736{ 1737 int *name = (int *)arg1 - 1; /* XXX */ 1738 u_int namelen = arg2 + 1; /* XXX */ 1739 struct vfsconf *vfsp; 1740 1741#ifndef NO_COMPAT_PRELITE2 1742 /* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. */ 1743 if (namelen == 1) 1744 return (sysctl_ovfs_conf(oidp, arg1, arg2, req)); 1745#endif 1746 1747#ifdef notyet 1748 /* all sysctl names at this level are at least name and field */ 1749 if (namelen < 2) 1750 return (ENOTDIR); /* overloaded */ 1751 if (name[0] != VFS_GENERIC) { 1752 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 1753 if (vfsp->vfc_typenum == name[0]) 1754 break; 1755 if (vfsp == NULL) 1756 return (EOPNOTSUPP); 1757 return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1, 1758 oldp, oldlenp, newp, newlen, p)); 1759 } 1760#endif 1761 switch (name[1]) { 1762 case VFS_MAXTYPENUM: 1763 if (namelen != 2) 1764 return (ENOTDIR); 1765 return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int))); 1766 case VFS_CONF: 1767 if (namelen != 3) 1768 return (ENOTDIR); /* overloaded */ 1769 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 1770 if (vfsp->vfc_typenum == name[2]) 1771 break; 1772 if (vfsp == NULL) 1773 return (EOPNOTSUPP); 1774 return (SYSCTL_OUT(req, vfsp, sizeof *vfsp)); 1775 } 1776 return (EOPNOTSUPP); 1777} 1778 1779SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD, vfs_sysctl, 1780 "Generic filesystem"); 1781 1782#ifndef NO_COMPAT_PRELITE2 1783 1784static int 1785sysctl_ovfs_conf SYSCTL_HANDLER_ARGS 1786{ 1787 int error; 1788 struct vfsconf *vfsp; 1789 struct ovfsconf ovfs; 1790 1791 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 1792 ovfs.vfc_vfsops = vfsp->vfc_vfsops; /* XXX used as flag */ 1793 strcpy(ovfs.vfc_name, vfsp->vfc_name); 1794 ovfs.vfc_index = vfsp->vfc_typenum; 1795 ovfs.vfc_refcount = vfsp->vfc_refcount; 1796 ovfs.vfc_flags = vfsp->vfc_flags; 1797 error = SYSCTL_OUT(req, &ovfs, sizeof ovfs); 1798 if (error) 1799 return error; 1800 } 1801 return 0; 1802} 1803 1804#endif /* !NO_COMPAT_PRELITE2 */ 1805 1806int kinfo_vdebug = 1; 1807int kinfo_vgetfailed; 1808 1809#define KINFO_VNODESLOP 10 1810/* 1811 * Dump vnode list (via sysctl). 1812 * Copyout address of vnode followed by vnode. 1813 */ 1814/* ARGSUSED */ 1815static int 1816sysctl_vnode SYSCTL_HANDLER_ARGS 1817{ 1818 struct proc *p = curproc; /* XXX */ 1819 struct mount *mp, *nmp; 1820 struct vnode *nvp, *vp; 1821 int error; 1822 1823#define VPTRSZ sizeof (struct vnode *) 1824#define VNODESZ sizeof (struct vnode) 1825 1826 req->lock = 0; 1827 if (!req->oldptr) /* Make an estimate */ 1828 return (SYSCTL_OUT(req, 0, 1829 (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ))); 1830 1831 simple_lock(&mountlist_slock); 1832 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 1833 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { 1834 nmp = mp->mnt_list.cqe_next; 1835 continue; 1836 } 1837again: 1838 simple_lock(&mntvnode_slock); 1839 for (vp = mp->mnt_vnodelist.lh_first; 1840 vp != NULL; 1841 vp = nvp) { 1842 /* 1843 * Check that the vp is still associated with 1844 * this filesystem. RACE: could have been 1845 * recycled onto the same filesystem. 1846 */ 1847 if (vp->v_mount != mp) { 1848 simple_unlock(&mntvnode_slock); 1849 if (kinfo_vdebug) 1850 printf("kinfo: vp changed\n"); 1851 goto again; 1852 } 1853 nvp = vp->v_mntvnodes.le_next; 1854 simple_unlock(&mntvnode_slock); 1855 if ((error = SYSCTL_OUT(req, &vp, VPTRSZ)) || 1856 (error = SYSCTL_OUT(req, vp, VNODESZ))) 1857 return (error); 1858 simple_lock(&mntvnode_slock); 1859 } 1860 simple_unlock(&mntvnode_slock); 1861 simple_lock(&mountlist_slock); 1862 nmp = mp->mnt_list.cqe_next; 1863 vfs_unbusy(mp, p); 1864 } 1865 simple_unlock(&mountlist_slock); 1866 1867 return (0); 1868} 1869 1870/* 1871 * XXX 1872 * Exporting the vnode list on large systems causes them to crash. 1873 * Exporting the vnode list on medium systems causes sysctl to coredump. 1874 */ 1875#if 0 1876SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE|CTLFLAG_RD, 1877 0, 0, sysctl_vnode, "S,vnode", ""); 1878#endif 1879 1880/* 1881 * Check to see if a filesystem is mounted on a block device. 1882 */ 1883int 1884vfs_mountedon(vp) 1885 struct vnode *vp; 1886{ 1887 struct vnode *vq; 1888 int error = 0; 1889 1890 if (vp->v_specflags & SI_MOUNTEDON) 1891 return (EBUSY); 1892 if (vp->v_flag & VALIASED) { 1893 simple_lock(&spechash_slock); 1894 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1895 if (vq->v_rdev != vp->v_rdev || 1896 vq->v_type != vp->v_type) 1897 continue; 1898 if (vq->v_specflags & SI_MOUNTEDON) { 1899 error = EBUSY; 1900 break; 1901 } 1902 } 1903 simple_unlock(&spechash_slock); 1904 } 1905 return (error); 1906} 1907 1908/* 1909 * Unmount all filesystems. The list is traversed in reverse order 1910 * of mounting to avoid dependencies. 1911 */ 1912void 1913vfs_unmountall() 1914{ 1915 struct mount *mp, *nmp; 1916 struct proc *p = initproc; /* XXX XXX should this be proc0? */ 1917 int error; 1918 1919 /* 1920 * Since this only runs when rebooting, it is not interlocked. 1921 */ 1922 for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { 1923 nmp = mp->mnt_list.cqe_prev; 1924 error = dounmount(mp, MNT_FORCE, p); 1925 if (error) { 1926 printf("unmount of %s failed (", 1927 mp->mnt_stat.f_mntonname); 1928 if (error == EBUSY) 1929 printf("BUSY)\n"); 1930 else 1931 printf("%d)\n", error); 1932 } 1933 } 1934} 1935 1936/* 1937 * Build hash lists of net addresses and hang them off the mount point. 1938 * Called by ufs_mount() to set up the lists of export addresses. 1939 */ 1940static int 1941vfs_hang_addrlist(struct mount *mp, struct netexport *nep, 1942 struct export_args *argp) 1943{ 1944 register struct netcred *np; 1945 register struct radix_node_head *rnh; 1946 register int i; 1947 struct radix_node *rn; 1948 struct sockaddr *saddr, *smask = 0; 1949 struct domain *dom; 1950 int error; 1951 1952 if (argp->ex_addrlen == 0) { 1953 if (mp->mnt_flag & MNT_DEFEXPORTED) 1954 return (EPERM); 1955 np = &nep->ne_defexported; 1956 np->netc_exflags = argp->ex_flags; 1957 np->netc_anon = argp->ex_anon; 1958 np->netc_anon.cr_ref = 1; 1959 mp->mnt_flag |= MNT_DEFEXPORTED; 1960 return (0); 1961 } 1962 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 1963 np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK); 1964 bzero((caddr_t) np, i); 1965 saddr = (struct sockaddr *) (np + 1); 1966 if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen))) 1967 goto out; 1968 if (saddr->sa_len > argp->ex_addrlen) 1969 saddr->sa_len = argp->ex_addrlen; 1970 if (argp->ex_masklen) { 1971 smask = (struct sockaddr *) ((caddr_t) saddr + argp->ex_addrlen); 1972 error = copyin(argp->ex_mask, (caddr_t) smask, argp->ex_masklen); 1973 if (error) 1974 goto out; 1975 if (smask->sa_len > argp->ex_masklen) 1976 smask->sa_len = argp->ex_masklen; 1977 } 1978 i = saddr->sa_family; 1979 if ((rnh = nep->ne_rtable[i]) == 0) { 1980 /* 1981 * Seems silly to initialize every AF when most are not used, 1982 * do so on demand here 1983 */ 1984 for (dom = domains; dom; dom = dom->dom_next) 1985 if (dom->dom_family == i && dom->dom_rtattach) { 1986 dom->dom_rtattach((void **) &nep->ne_rtable[i], 1987 dom->dom_rtoffset); 1988 break; 1989 } 1990 if ((rnh = nep->ne_rtable[i]) == 0) { 1991 error = ENOBUFS; 1992 goto out; 1993 } 1994 } 1995 rn = (*rnh->rnh_addaddr) ((caddr_t) saddr, (caddr_t) smask, rnh, 1996 np->netc_rnodes); 1997 if (rn == 0 || np != (struct netcred *) rn) { /* already exists */ 1998 error = EPERM; 1999 goto out; 2000 } 2001 np->netc_exflags = argp->ex_flags; 2002 np->netc_anon = argp->ex_anon; 2003 np->netc_anon.cr_ref = 1; 2004 return (0); 2005out: 2006 free(np, M_NETADDR); 2007 return (error); 2008} 2009 2010/* ARGSUSED */ 2011static int 2012vfs_free_netcred(struct radix_node *rn, void *w) 2013{ 2014 register struct radix_node_head *rnh = (struct radix_node_head *) w; 2015 2016 (*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh); 2017 free((caddr_t) rn, M_NETADDR); 2018 return (0); 2019} 2020 2021/* 2022 * Free the net address hash lists that are hanging off the mount points. 2023 */ 2024static void 2025vfs_free_addrlist(struct netexport *nep) 2026{ 2027 register int i; 2028 register struct radix_node_head *rnh; 2029 2030 for (i = 0; i <= AF_MAX; i++) 2031 if ((rnh = nep->ne_rtable[i])) { 2032 (*rnh->rnh_walktree) (rnh, vfs_free_netcred, 2033 (caddr_t) rnh); 2034 free((caddr_t) rnh, M_RTABLE); 2035 nep->ne_rtable[i] = 0; 2036 } 2037} 2038 2039int 2040vfs_export(mp, nep, argp) 2041 struct mount *mp; 2042 struct netexport *nep; 2043 struct export_args *argp; 2044{ 2045 int error; 2046 2047 if (argp->ex_flags & MNT_DELEXPORT) { 2048 if (mp->mnt_flag & MNT_EXPUBLIC) { 2049 vfs_setpublicfs(NULL, NULL, NULL); 2050 mp->mnt_flag &= ~MNT_EXPUBLIC; 2051 } 2052 vfs_free_addrlist(nep); 2053 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 2054 } 2055 if (argp->ex_flags & MNT_EXPORTED) { 2056 if (argp->ex_flags & MNT_EXPUBLIC) { 2057 if ((error = vfs_setpublicfs(mp, nep, argp)) != 0) 2058 return (error); 2059 mp->mnt_flag |= MNT_EXPUBLIC; 2060 } 2061 if ((error = vfs_hang_addrlist(mp, nep, argp))) 2062 return (error); 2063 mp->mnt_flag |= MNT_EXPORTED; 2064 } 2065 return (0); 2066} 2067 2068 2069/* 2070 * Set the publicly exported filesystem (WebNFS). Currently, only 2071 * one public filesystem is possible in the spec (RFC 2054 and 2055) 2072 */ 2073int 2074vfs_setpublicfs(mp, nep, argp) 2075 struct mount *mp; 2076 struct netexport *nep; 2077 struct export_args *argp; 2078{ 2079 int error; 2080 struct vnode *rvp; 2081 char *cp; 2082 2083 /* 2084 * mp == NULL -> invalidate the current info, the FS is 2085 * no longer exported. May be called from either vfs_export 2086 * or unmount, so check if it hasn't already been done. 2087 */ 2088 if (mp == NULL) { 2089 if (nfs_pub.np_valid) { 2090 nfs_pub.np_valid = 0; 2091 if (nfs_pub.np_index != NULL) { 2092 FREE(nfs_pub.np_index, M_TEMP); 2093 nfs_pub.np_index = NULL; 2094 } 2095 } 2096 return (0); 2097 } 2098 2099 /* 2100 * Only one allowed at a time. 2101 */ 2102 if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount) 2103 return (EBUSY); 2104 2105 /* 2106 * Get real filehandle for root of exported FS. 2107 */ 2108 bzero((caddr_t)&nfs_pub.np_handle, sizeof(nfs_pub.np_handle)); 2109 nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid; 2110 2111 if ((error = VFS_ROOT(mp, &rvp))) 2112 return (error); 2113 2114 if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid))) 2115 return (error); 2116 2117 vput(rvp); 2118 2119 /* 2120 * If an indexfile was specified, pull it in. 2121 */ 2122 if (argp->ex_indexfile != NULL) { 2123 MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP, 2124 M_WAITOK); 2125 error = copyinstr(argp->ex_indexfile, nfs_pub.np_index, 2126 MAXNAMLEN, (size_t *)0); 2127 if (!error) { 2128 /* 2129 * Check for illegal filenames. 2130 */ 2131 for (cp = nfs_pub.np_index; *cp; cp++) { 2132 if (*cp == '/') { 2133 error = EINVAL; 2134 break; 2135 } 2136 } 2137 } 2138 if (error) { 2139 FREE(nfs_pub.np_index, M_TEMP); 2140 return (error); 2141 } 2142 } 2143 2144 nfs_pub.np_mount = mp; 2145 nfs_pub.np_valid = 1; 2146 return (0); 2147} 2148 2149struct netcred * 2150vfs_export_lookup(mp, nep, nam) 2151 register struct mount *mp; 2152 struct netexport *nep; 2153 struct sockaddr *nam; 2154{ 2155 register struct netcred *np; 2156 register struct radix_node_head *rnh; 2157 struct sockaddr *saddr; 2158 2159 np = NULL; 2160 if (mp->mnt_flag & MNT_EXPORTED) { 2161 /* 2162 * Lookup in the export list first. 2163 */ 2164 if (nam != NULL) { 2165 saddr = nam; 2166 rnh = nep->ne_rtable[saddr->sa_family]; 2167 if (rnh != NULL) { 2168 np = (struct netcred *) 2169 (*rnh->rnh_matchaddr)((caddr_t)saddr, 2170 rnh); 2171 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 2172 np = NULL; 2173 } 2174 } 2175 /* 2176 * If no address match, use the default if it exists. 2177 */ 2178 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 2179 np = &nep->ne_defexported; 2180 } 2181 return (np); 2182} 2183 2184/* 2185 * perform msync on all vnodes under a mount point 2186 * the mount point must be locked. 2187 */ 2188void 2189vfs_msync(struct mount *mp, int flags) { 2190 struct vnode *vp, *nvp; 2191loop: 2192 for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { 2193 2194 if (vp->v_mount != mp) 2195 goto loop; 2196 nvp = vp->v_mntvnodes.le_next; 2197 if (VOP_ISLOCKED(vp) && (flags != MNT_WAIT)) 2198 continue; 2199 if (vp->v_object && 2200 (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) { 2201 vm_object_page_clean(vp->v_object, 0, 0, TRUE, TRUE); 2202 } 2203 } 2204} 2205 2206/* 2207 * Create the VM object needed for VMIO and mmap support. This 2208 * is done for all VREG files in the system. Some filesystems might 2209 * afford the additional metadata buffering capability of the 2210 * VMIO code by making the device node be VMIO mode also. 2211 */ 2212int 2213vfs_object_create(vp, p, cred, waslocked) 2214 struct vnode *vp; 2215 struct proc *p; 2216 struct ucred *cred; 2217 int waslocked; 2218{ 2219 struct vattr vat; 2220 vm_object_t object; 2221 int error = 0; 2222 2223retry: 2224 if ((object = vp->v_object) == NULL) { 2225 if (vp->v_type == VREG) { 2226 if ((error = VOP_GETATTR(vp, &vat, cred, p)) != 0) 2227 goto retn; 2228 (void) vnode_pager_alloc(vp, 2229 OFF_TO_IDX(round_page(vat.va_size)), 0, 0); 2230 } else { 2231 /* 2232 * This simply allocates the biggest object possible 2233 * for a VBLK vnode. This should be fixed, but doesn't 2234 * cause any problems (yet). 2235 */ 2236 (void) vnode_pager_alloc(vp, INT_MAX, 0, 0); 2237 } 2238 vp->v_object->flags |= OBJ_VFS_REF; 2239 } else { 2240 if (object->flags & OBJ_DEAD) { 2241 if (waslocked) 2242 VOP_UNLOCK(vp, 0, p); 2243 tsleep(object, PVM, "vodead", 0); 2244 if (waslocked) 2245 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 2246 goto retry; 2247 } 2248 if ((object->flags & OBJ_VFS_REF) == 0) { 2249 object->flags |= OBJ_VFS_REF; 2250 vm_object_reference(object); 2251 } 2252 } 2253 if (vp->v_object) 2254 vp->v_flag |= VVMIO; 2255 2256retn: 2257 return error; 2258} 2259 2260void 2261vtouch(vp) 2262 struct vnode *vp; 2263{ 2264 simple_lock(&vp->v_interlock); 2265 if (vp->v_usecount) { 2266 simple_unlock(&vp->v_interlock); 2267 return; 2268 } 2269 if (simple_lock_try(&vnode_free_list_slock)) { 2270 if (vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) { 2271 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 2272 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 2273 } 2274 simple_unlock(&vnode_free_list_slock); 2275 } 2276 simple_unlock(&vp->v_interlock); 2277} 2278