vfs_subr.c revision 32585
1/*
2 * Copyright (c) 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the University of
21 *	California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 *	@(#)vfs_subr.c	8.31 (Berkeley) 5/26/95
39 * $Id: vfs_subr.c,v 1.123 1998/01/12 03:15:01 dyson Exp $
40 */
41
42/*
43 * External virtual filesystem routines
44 */
45#include "opt_ddb.h"
46#include "opt_devfs.h"
47
48#include <sys/param.h>
49#include <sys/systm.h>
50#include <sys/kernel.h>
51#include <sys/proc.h>
52#include <sys/malloc.h>
53#include <sys/mount.h>
54#include <sys/vnode.h>
55#include <sys/stat.h>
56#include <sys/buf.h>
57#include <sys/poll.h>
58#include <sys/domain.h>
59#include <sys/dirent.h>
60#include <sys/vmmeter.h>
61
62#include <machine/limits.h>
63
64#include <vm/vm.h>
65#include <vm/vm_object.h>
66#include <vm/vm_extern.h>
67#include <vm/pmap.h>
68#include <vm/vm_map.h>
69#include <vm/vm_pager.h>
70#include <vm/vnode_pager.h>
71#include <sys/sysctl.h>
72
73#include <miscfs/specfs/specdev.h>
74
75static MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure");
76
77static void	insmntque __P((struct vnode *vp, struct mount *mp));
78#ifdef DDB
79static void	printlockedvnodes __P((void));
80#endif
81static void	vclean __P((struct vnode *vp, int flags, struct proc *p));
82static void	vfree __P((struct vnode *));
83static void	vgonel __P((struct vnode *vp, struct proc *p));
84static unsigned long	numvnodes;
85SYSCTL_INT(_debug, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, "");
86
87enum vtype iftovt_tab[16] = {
88	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
89	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
90};
91int vttoif_tab[9] = {
92	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
93	S_IFSOCK, S_IFIFO, S_IFMT,
94};
95
96/*
97 * Insq/Remq for the vnode usage lists.
98 */
99#define	bufinsvn(bp, dp)	LIST_INSERT_HEAD(dp, bp, b_vnbufs)
100#define	bufremvn(bp) {							\
101	LIST_REMOVE(bp, b_vnbufs);					\
102	(bp)->b_vnbufs.le_next = NOLIST;				\
103}
104
105TAILQ_HEAD(freelst, vnode) vnode_free_list;	/* vnode free list */
106struct tobefreelist vnode_tobefree_list;	/* vnode free list */
107
108static u_long wantfreevnodes = 25;
109SYSCTL_INT(_debug, OID_AUTO, wantfreevnodes, CTLFLAG_RW, &wantfreevnodes, 0, "");
110static u_long freevnodes = 0;
111SYSCTL_INT(_debug, OID_AUTO, freevnodes, CTLFLAG_RD, &freevnodes, 0, "");
112
113int vfs_ioopt = 0;
114SYSCTL_INT(_vfs, OID_AUTO, ioopt, CTLFLAG_RW, &vfs_ioopt, 0, "");
115
116struct mntlist mountlist;	/* mounted filesystem list */
117struct simplelock mountlist_slock;
118static struct simplelock mntid_slock;
119struct simplelock mntvnode_slock;
120struct simplelock vnode_free_list_slock;
121static struct simplelock spechash_slock;
122struct nfs_public nfs_pub;	/* publicly exported FS */
123
124int desiredvnodes;
125SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, &desiredvnodes, 0, "");
126
127static void	vfs_free_addrlist __P((struct netexport *nep));
128static int	vfs_free_netcred __P((struct radix_node *rn, void *w));
129static int	vfs_hang_addrlist __P((struct mount *mp, struct netexport *nep,
130				       struct export_args *argp));
131
132/*
133 * Initialize the vnode management data structures.
134 */
135void
136vntblinit()
137{
138
139	desiredvnodes = maxproc + cnt.v_page_count / 4;
140	simple_lock_init(&mntvnode_slock);
141	simple_lock_init(&mntid_slock);
142	simple_lock_init(&spechash_slock);
143	TAILQ_INIT(&vnode_free_list);
144	TAILQ_INIT(&vnode_tobefree_list);
145	simple_lock_init(&vnode_free_list_slock);
146	CIRCLEQ_INIT(&mountlist);
147}
148
149/*
150 * Mark a mount point as busy. Used to synchronize access and to delay
151 * unmounting. Interlock is not released on failure.
152 */
153int
154vfs_busy(mp, flags, interlkp, p)
155	struct mount *mp;
156	int flags;
157	struct simplelock *interlkp;
158	struct proc *p;
159{
160	int lkflags;
161
162	if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
163		if (flags & LK_NOWAIT)
164			return (ENOENT);
165		mp->mnt_kern_flag |= MNTK_MWAIT;
166		if (interlkp) {
167			simple_unlock(interlkp);
168		}
169		/*
170		 * Since all busy locks are shared except the exclusive
171		 * lock granted when unmounting, the only place that a
172		 * wakeup needs to be done is at the release of the
173		 * exclusive lock at the end of dounmount.
174		 */
175		tsleep((caddr_t)mp, PVFS, "vfs_busy", 0);
176		if (interlkp) {
177			simple_lock(interlkp);
178		}
179		return (ENOENT);
180	}
181	lkflags = LK_SHARED;
182	if (interlkp)
183		lkflags |= LK_INTERLOCK;
184	if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p))
185		panic("vfs_busy: unexpected lock failure");
186	return (0);
187}
188
189/*
190 * Free a busy filesystem.
191 */
192void
193vfs_unbusy(mp, p)
194	struct mount *mp;
195	struct proc *p;
196{
197
198	lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p);
199}
200
201/*
202 * Lookup a filesystem type, and if found allocate and initialize
203 * a mount structure for it.
204 *
205 * Devname is usually updated by mount(8) after booting.
206 */
207int
208vfs_rootmountalloc(fstypename, devname, mpp)
209	char *fstypename;
210	char *devname;
211	struct mount **mpp;
212{
213	struct proc *p = curproc;	/* XXX */
214	struct vfsconf *vfsp;
215	struct mount *mp;
216
217	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
218		if (!strcmp(vfsp->vfc_name, fstypename))
219			break;
220	if (vfsp == NULL)
221		return (ENODEV);
222	mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
223	bzero((char *)mp, (u_long)sizeof(struct mount));
224	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
225	(void)vfs_busy(mp, LK_NOWAIT, 0, p);
226	LIST_INIT(&mp->mnt_vnodelist);
227	mp->mnt_vfc = vfsp;
228	mp->mnt_op = vfsp->vfc_vfsops;
229	mp->mnt_flag = MNT_RDONLY;
230	mp->mnt_vnodecovered = NULLVP;
231	vfsp->vfc_refcount++;
232	mp->mnt_stat.f_type = vfsp->vfc_typenum;
233	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
234	strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
235	mp->mnt_stat.f_mntonname[0] = '/';
236	mp->mnt_stat.f_mntonname[1] = 0;
237	(void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
238	*mpp = mp;
239	return (0);
240}
241
242/*
243 * Find an appropriate filesystem to use for the root. If a filesystem
244 * has not been preselected, walk through the list of known filesystems
245 * trying those that have mountroot routines, and try them until one
246 * works or we have tried them all.
247 */
248#ifdef notdef	/* XXX JH */
249int
250lite2_vfs_mountroot()
251{
252	struct vfsconf *vfsp;
253	extern int (*lite2_mountroot) __P((void));
254	int error;
255
256	if (lite2_mountroot != NULL)
257		return ((*lite2_mountroot)());
258	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
259		if (vfsp->vfc_mountroot == NULL)
260			continue;
261		if ((error = (*vfsp->vfc_mountroot)()) == 0)
262			return (0);
263		printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error);
264	}
265	return (ENODEV);
266}
267#endif
268
269/*
270 * Lookup a mount point by filesystem identifier.
271 */
272struct mount *
273vfs_getvfs(fsid)
274	fsid_t *fsid;
275{
276	register struct mount *mp;
277
278	simple_lock(&mountlist_slock);
279	for (mp = mountlist.cqh_first; mp != (void *)&mountlist;
280	    mp = mp->mnt_list.cqe_next) {
281		if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
282		    mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
283			simple_unlock(&mountlist_slock);
284			return (mp);
285	    }
286	}
287	simple_unlock(&mountlist_slock);
288	return ((struct mount *) 0);
289}
290
291/*
292 * Get a new unique fsid
293 */
294void
295vfs_getnewfsid(mp)
296	struct mount *mp;
297{
298	static u_short xxxfs_mntid;
299
300	fsid_t tfsid;
301	int mtype;
302
303	simple_lock(&mntid_slock);
304	mtype = mp->mnt_vfc->vfc_typenum;
305	mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
306	mp->mnt_stat.f_fsid.val[1] = mtype;
307	if (xxxfs_mntid == 0)
308		++xxxfs_mntid;
309	tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
310	tfsid.val[1] = mtype;
311	if (mountlist.cqh_first != (void *)&mountlist) {
312		while (vfs_getvfs(&tfsid)) {
313			tfsid.val[0]++;
314			xxxfs_mntid++;
315		}
316	}
317	mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
318	simple_unlock(&mntid_slock);
319}
320
321/*
322 * Set vnode attributes to VNOVAL
323 */
324void
325vattr_null(vap)
326	register struct vattr *vap;
327{
328
329	vap->va_type = VNON;
330	vap->va_size = VNOVAL;
331	vap->va_bytes = VNOVAL;
332	vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
333	    vap->va_fsid = vap->va_fileid =
334	    vap->va_blocksize = vap->va_rdev =
335	    vap->va_atime.tv_sec = vap->va_atime.tv_nsec =
336	    vap->va_mtime.tv_sec = vap->va_mtime.tv_nsec =
337	    vap->va_ctime.tv_sec = vap->va_ctime.tv_nsec =
338	    vap->va_flags = vap->va_gen = VNOVAL;
339	vap->va_vaflags = 0;
340}
341
342/*
343 * Routines having to do with the management of the vnode table.
344 */
345extern vop_t **dead_vnodeop_p;
346
347/*
348 * Return the next vnode from the free list.
349 */
350int
351getnewvnode(tag, mp, vops, vpp)
352	enum vtagtype tag;
353	struct mount *mp;
354	vop_t **vops;
355	struct vnode **vpp;
356{
357	int s;
358	struct proc *p = curproc;	/* XXX */
359	struct vnode *vp, *tvp, *nvp;
360	vm_object_t object;
361	TAILQ_HEAD(freelst, vnode) vnode_tmp_list;
362
363	/*
364	 * We take the least recently used vnode from the freelist
365	 * if we can get it and it has no cached pages, and no
366	 * namecache entries are relative to it.
367	 * Otherwise we allocate a new vnode
368	 */
369
370	s = splbio();
371	simple_lock(&vnode_free_list_slock);
372	TAILQ_INIT(&vnode_tmp_list);
373
374	for (vp = TAILQ_FIRST(&vnode_tobefree_list); vp; vp = nvp) {
375		nvp = TAILQ_NEXT(vp, v_freelist);
376		TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist);
377		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
378		vp->v_flag &= ~VTBFREE;
379		vp->v_flag |= VFREE;
380		if (vp->v_usecount)
381			panic("tobe free vnode isn't");
382		freevnodes++;
383	}
384
385	if (wantfreevnodes && freevnodes < wantfreevnodes) {
386		vp = NULL;
387	} else if (!wantfreevnodes && freevnodes <= desiredvnodes) {
388		/*
389		 * XXX: this is only here to be backwards compatible
390		 */
391		vp = NULL;
392	} else {
393		for (vp = TAILQ_FIRST(&vnode_free_list); vp; vp = nvp) {
394
395			nvp = TAILQ_NEXT(vp, v_freelist);
396
397			if (!simple_lock_try(&vp->v_interlock))
398				continue;
399			if (vp->v_usecount)
400				panic("free vnode isn't");
401
402			object = vp->v_object;
403			if (object && (object->resident_page_count || object->ref_count)) {
404				/* Don't recycle if it's caching some pages */
405				TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
406				TAILQ_INSERT_TAIL(&vnode_tmp_list, vp, v_freelist);
407				continue;
408			} else if (LIST_FIRST(&vp->v_cache_src)) {
409				/* Don't recycle if active in the namecache */
410				simple_unlock(&vp->v_interlock);
411				continue;
412			} else {
413				break;
414			}
415		}
416	}
417
418	for (tvp = TAILQ_FIRST(&vnode_tmp_list); tvp; tvp = nvp) {
419		nvp = TAILQ_NEXT(tvp, v_freelist);
420		TAILQ_REMOVE(&vnode_tmp_list, tvp, v_freelist);
421		TAILQ_INSERT_TAIL(&vnode_free_list, tvp, v_freelist);
422		simple_unlock(&tvp->v_interlock);
423	}
424
425	if (vp) {
426		vp->v_flag |= VDOOMED;
427		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
428		freevnodes--;
429		simple_unlock(&vnode_free_list_slock);
430		cache_purge(vp);
431		vp->v_lease = NULL;
432		if (vp->v_type != VBAD) {
433			vgonel(vp, p);
434		} else {
435			simple_unlock(&vp->v_interlock);
436		}
437
438#ifdef DIAGNOSTIC
439		{
440			int s;
441
442			if (vp->v_data)
443				panic("cleaned vnode isn't");
444			s = splbio();
445			if (vp->v_numoutput)
446				panic("Clean vnode has pending I/O's");
447			splx(s);
448		}
449#endif
450		vp->v_flag = 0;
451		vp->v_lastr = 0;
452		vp->v_lastw = 0;
453		vp->v_lasta = 0;
454		vp->v_cstart = 0;
455		vp->v_clen = 0;
456		vp->v_socket = 0;
457		vp->v_writecount = 0;	/* XXX */
458	} else {
459		simple_unlock(&vnode_free_list_slock);
460		vp = (struct vnode *) malloc((u_long) sizeof *vp,
461		    M_VNODE, M_WAITOK);
462		bzero((char *) vp, sizeof *vp);
463		simple_lock_init(&vp->v_interlock);
464		vp->v_dd = vp;
465		cache_purge(vp);
466		LIST_INIT(&vp->v_cache_src);
467		TAILQ_INIT(&vp->v_cache_dst);
468		numvnodes++;
469	}
470
471	vp->v_type = VNON;
472	vp->v_tag = tag;
473	vp->v_op = vops;
474	insmntque(vp, mp);
475	*vpp = vp;
476	vp->v_usecount = 1;
477	vp->v_data = 0;
478	splx(s);
479	return (0);
480}
481
482/*
483 * Move a vnode from one mount queue to another.
484 */
485static void
486insmntque(vp, mp)
487	register struct vnode *vp;
488	register struct mount *mp;
489{
490
491	simple_lock(&mntvnode_slock);
492	/*
493	 * Delete from old mount point vnode list, if on one.
494	 */
495	if (vp->v_mount != NULL)
496		LIST_REMOVE(vp, v_mntvnodes);
497	/*
498	 * Insert into list of vnodes for the new mount point, if available.
499	 */
500	if ((vp->v_mount = mp) == NULL) {
501		simple_unlock(&mntvnode_slock);
502		return;
503	}
504	LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
505	simple_unlock(&mntvnode_slock);
506}
507
508/*
509 * Update outstanding I/O count and do wakeup if requested.
510 */
511void
512vwakeup(bp)
513	register struct buf *bp;
514{
515	register struct vnode *vp;
516
517	bp->b_flags &= ~B_WRITEINPROG;
518	if ((vp = bp->b_vp)) {
519		vp->v_numoutput--;
520		if (vp->v_numoutput < 0)
521			panic("vwakeup: neg numoutput");
522		if ((vp->v_numoutput == 0) && (vp->v_flag & VBWAIT)) {
523			vp->v_flag &= ~VBWAIT;
524			wakeup((caddr_t) &vp->v_numoutput);
525		}
526	}
527}
528
529/*
530 * Flush out and invalidate all buffers associated with a vnode.
531 * Called with the underlying object locked.
532 */
533int
534vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
535	register struct vnode *vp;
536	int flags;
537	struct ucred *cred;
538	struct proc *p;
539	int slpflag, slptimeo;
540{
541	register struct buf *bp;
542	struct buf *nbp, *blist;
543	int s, error;
544	vm_object_t object;
545
546	if (flags & V_SAVE) {
547		if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)))
548			return (error);
549		if (vp->v_dirtyblkhd.lh_first != NULL)
550			panic("vinvalbuf: dirty bufs");
551	}
552
553	s = splbio();
554	for (;;) {
555		if ((blist = vp->v_cleanblkhd.lh_first) && (flags & V_SAVEMETA))
556			while (blist && blist->b_lblkno < 0)
557				blist = blist->b_vnbufs.le_next;
558		if (!blist && (blist = vp->v_dirtyblkhd.lh_first) &&
559		    (flags & V_SAVEMETA))
560			while (blist && blist->b_lblkno < 0)
561				blist = blist->b_vnbufs.le_next;
562		if (!blist)
563			break;
564
565		for (bp = blist; bp; bp = nbp) {
566			nbp = bp->b_vnbufs.le_next;
567			if ((flags & V_SAVEMETA) && bp->b_lblkno < 0)
568				continue;
569			if (bp->b_flags & B_BUSY) {
570				bp->b_flags |= B_WANTED;
571				error = tsleep((caddr_t) bp,
572				    slpflag | (PRIBIO + 1), "vinvalbuf",
573				    slptimeo);
574				if (error) {
575					splx(s);
576					return (error);
577				}
578				break;
579			}
580			bremfree(bp);
581			bp->b_flags |= B_BUSY;
582			/*
583			 * XXX Since there are no node locks for NFS, I
584			 * believe there is a slight chance that a delayed
585			 * write will occur while sleeping just above, so
586			 * check for it.
587			 */
588			if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
589				if (bp->b_vp == vp) {
590					if (bp->b_flags & B_CLUSTEROK) {
591						vfs_bio_awrite(bp);
592					} else {
593						bp->b_flags |= B_ASYNC;
594						VOP_BWRITE(bp);
595					}
596				} else {
597					(void) VOP_BWRITE(bp);
598				}
599				break;
600			}
601			bp->b_flags |= (B_INVAL|B_NOCACHE|B_RELBUF);
602			brelse(bp);
603		}
604	}
605
606	while (vp->v_numoutput > 0) {
607		vp->v_flag |= VBWAIT;
608		tsleep(&vp->v_numoutput, PVM, "vnvlbv", 0);
609	}
610
611	splx(s);
612
613	/*
614	 * Destroy the copy in the VM cache, too.
615	 */
616	simple_lock(&vp->v_interlock);
617	object = vp->v_object;
618	if (object != NULL) {
619		if (flags & V_SAVEMETA)
620			vm_object_page_remove(object, 0, object->size,
621				(flags & V_SAVE) ? TRUE : FALSE);
622		else
623			vm_object_page_remove(object, 0, 0,
624				(flags & V_SAVE) ? TRUE : FALSE);
625	}
626	simple_unlock(&vp->v_interlock);
627
628	if (!(flags & V_SAVEMETA) &&
629	    (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first))
630		panic("vinvalbuf: flush failed");
631	return (0);
632}
633
634/*
635 * Associate a buffer with a vnode.
636 */
637void
638bgetvp(vp, bp)
639	register struct vnode *vp;
640	register struct buf *bp;
641{
642	int s;
643
644#if defined(DIAGNOSTIC)
645	if (bp->b_vp)
646		panic("bgetvp: not free");
647#endif
648	vhold(vp);
649	bp->b_vp = vp;
650	if (vp->v_type == VBLK || vp->v_type == VCHR)
651		bp->b_dev = vp->v_rdev;
652	else
653		bp->b_dev = NODEV;
654	/*
655	 * Insert onto list for new vnode.
656	 */
657	s = splbio();
658	bufinsvn(bp, &vp->v_cleanblkhd);
659	splx(s);
660}
661
662/*
663 * Disassociate a buffer from a vnode.
664 */
665void
666brelvp(bp)
667	register struct buf *bp;
668{
669	struct vnode *vp;
670	int s;
671
672#if defined(DIAGNOSTIC)
673	if (bp->b_vp == (struct vnode *) 0)
674		panic("brelvp: NULL");
675#endif
676
677	/*
678	 * Delete from old vnode list, if on one.
679	 */
680	s = splbio();
681	if (bp->b_vnbufs.le_next != NOLIST)
682		bufremvn(bp);
683	splx(s);
684
685	vp = bp->b_vp;
686	bp->b_vp = (struct vnode *) 0;
687	vdrop(vp);
688}
689
690/*
691 * Associate a p-buffer with a vnode.
692 */
693void
694pbgetvp(vp, bp)
695	register struct vnode *vp;
696	register struct buf *bp;
697{
698#if defined(DIAGNOSTIC)
699	if (bp->b_vp)
700		panic("pbgetvp: not free");
701#endif
702	bp->b_vp = vp;
703	if (vp->v_type == VBLK || vp->v_type == VCHR)
704		bp->b_dev = vp->v_rdev;
705	else
706		bp->b_dev = NODEV;
707}
708
709/*
710 * Disassociate a p-buffer from a vnode.
711 */
712void
713pbrelvp(bp)
714	register struct buf *bp;
715{
716
717#if defined(DIAGNOSTIC)
718	if (bp->b_vp == (struct vnode *) 0)
719		panic("pbrelvp: NULL");
720#endif
721
722	bp->b_vp = (struct vnode *) 0;
723}
724
725/*
726 * Reassign a buffer from one vnode to another.
727 * Used to assign file specific control information
728 * (indirect blocks) to the vnode to which they belong.
729 */
730void
731reassignbuf(bp, newvp)
732	register struct buf *bp;
733	register struct vnode *newvp;
734{
735	int s;
736
737	if (newvp == NULL) {
738		printf("reassignbuf: NULL");
739		return;
740	}
741
742	s = splbio();
743	/*
744	 * Delete from old vnode list, if on one.
745	 */
746	if (bp->b_vnbufs.le_next != NOLIST) {
747		bufremvn(bp);
748		vdrop(bp->b_vp);
749	}
750	/*
751	 * If dirty, put on list of dirty buffers; otherwise insert onto list
752	 * of clean buffers.
753	 */
754	if (bp->b_flags & B_DELWRI) {
755		struct buf *tbp;
756
757		tbp = newvp->v_dirtyblkhd.lh_first;
758		if (!tbp || (tbp->b_lblkno > bp->b_lblkno)) {
759			bufinsvn(bp, &newvp->v_dirtyblkhd);
760		} else {
761			while (tbp->b_vnbufs.le_next &&
762				(tbp->b_vnbufs.le_next->b_lblkno < bp->b_lblkno)) {
763				tbp = tbp->b_vnbufs.le_next;
764			}
765			LIST_INSERT_AFTER(tbp, bp, b_vnbufs);
766		}
767	} else {
768		bufinsvn(bp, &newvp->v_cleanblkhd);
769	}
770	bp->b_vp = newvp;
771	vhold(bp->b_vp);
772	splx(s);
773}
774
775#ifndef DEVFS_ROOT
776/*
777 * Create a vnode for a block device.
778 * Used for mounting the root file system.
779 */
780int
781bdevvp(dev, vpp)
782	dev_t dev;
783	struct vnode **vpp;
784{
785	register struct vnode *vp;
786	struct vnode *nvp;
787	int error;
788
789	if (dev == NODEV)
790		return (0);
791	error = getnewvnode(VT_NON, (struct mount *) 0, spec_vnodeop_p, &nvp);
792	if (error) {
793		*vpp = 0;
794		return (error);
795	}
796	vp = nvp;
797	vp->v_type = VBLK;
798	if ((nvp = checkalias(vp, dev, (struct mount *) 0))) {
799		vput(vp);
800		vp = nvp;
801	}
802	*vpp = vp;
803	return (0);
804}
805#endif /* !DEVFS_ROOT */
806
807/*
808 * Check to see if the new vnode represents a special device
809 * for which we already have a vnode (either because of
810 * bdevvp() or because of a different vnode representing
811 * the same block device). If such an alias exists, deallocate
812 * the existing contents and return the aliased vnode. The
813 * caller is responsible for filling it with its new contents.
814 */
815struct vnode *
816checkalias(nvp, nvp_rdev, mp)
817	register struct vnode *nvp;
818	dev_t nvp_rdev;
819	struct mount *mp;
820{
821	struct proc *p = curproc;	/* XXX */
822	struct vnode *vp;
823	struct vnode **vpp;
824
825	if (nvp->v_type != VBLK && nvp->v_type != VCHR)
826		return (NULLVP);
827
828	vpp = &speclisth[SPECHASH(nvp_rdev)];
829loop:
830	simple_lock(&spechash_slock);
831	for (vp = *vpp; vp; vp = vp->v_specnext) {
832		if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
833			continue;
834		/*
835		 * Alias, but not in use, so flush it out.
836		 */
837		simple_lock(&vp->v_interlock);
838		if (vp->v_usecount == 0) {
839			simple_unlock(&spechash_slock);
840			vgonel(vp, p);
841			goto loop;
842		}
843		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
844			simple_unlock(&spechash_slock);
845			goto loop;
846		}
847		break;
848	}
849	if (vp == NULL || vp->v_tag != VT_NON) {
850		MALLOC(nvp->v_specinfo, struct specinfo *,
851		    sizeof(struct specinfo), M_VNODE, M_WAITOK);
852		nvp->v_rdev = nvp_rdev;
853		nvp->v_hashchain = vpp;
854		nvp->v_specnext = *vpp;
855		nvp->v_specflags = 0;
856		simple_unlock(&spechash_slock);
857		*vpp = nvp;
858		if (vp != NULLVP) {
859			nvp->v_flag |= VALIASED;
860			vp->v_flag |= VALIASED;
861			vput(vp);
862		}
863		return (NULLVP);
864	}
865	simple_unlock(&spechash_slock);
866	VOP_UNLOCK(vp, 0, p);
867	simple_lock(&vp->v_interlock);
868	vclean(vp, 0, p);
869	vp->v_op = nvp->v_op;
870	vp->v_tag = nvp->v_tag;
871	nvp->v_type = VNON;
872	insmntque(vp, mp);
873	return (vp);
874}
875
876/*
877 * Grab a particular vnode from the free list, increment its
878 * reference count and lock it. The vnode lock bit is set the
879 * vnode is being eliminated in vgone. The process is awakened
880 * when the transition is completed, and an error returned to
881 * indicate that the vnode is no longer usable (possibly having
882 * been changed to a new file system type).
883 */
884int
885vget(vp, flags, p)
886	register struct vnode *vp;
887	int flags;
888	struct proc *p;
889{
890	int error;
891
892	/*
893	 * If the vnode is in the process of being cleaned out for
894	 * another use, we wait for the cleaning to finish and then
895	 * return failure. Cleaning is determined by checking that
896	 * the VXLOCK flag is set.
897	 */
898	if ((flags & LK_INTERLOCK) == 0) {
899		simple_lock(&vp->v_interlock);
900	}
901	if (vp->v_flag & VXLOCK) {
902		vp->v_flag |= VXWANT;
903		simple_unlock(&vp->v_interlock);
904		tsleep((caddr_t)vp, PINOD, "vget", 0);
905		return (ENOENT);
906	}
907
908	vp->v_usecount++;
909
910	if (VSHOULDBUSY(vp))
911		vbusy(vp);
912	/*
913	 * Create the VM object, if needed
914	 */
915	if ((flags & LK_NOOBJ) == 0 &&
916		   (vp->v_type == VREG) &&
917		   ((vp->v_object == NULL) ||
918			(vp->v_object->flags & OBJ_DEAD))) {
919		vfs_object_create(vp, curproc, curproc->p_ucred, 0);
920	}
921	if (flags & LK_TYPE_MASK) {
922		if (error = vn_lock(vp, flags | LK_INTERLOCK, p))
923			vrele(vp);
924		return (error);
925	}
926	simple_unlock(&vp->v_interlock);
927	return (0);
928}
929
930void
931vref(struct vnode *vp)
932{
933	simple_lock(&vp->v_interlock);
934	vp->v_usecount++;
935	simple_unlock(&vp->v_interlock);
936}
937
938/*
939 * Vnode put/release.
940 * If count drops to zero, call inactive routine and return to freelist.
941 */
942void
943vrele(vp)
944	struct vnode *vp;
945{
946	struct proc *p = curproc;	/* XXX */
947
948#ifdef DIAGNOSTIC
949	if (vp == NULL)
950		panic("vrele: null vp");
951#endif
952	simple_lock(&vp->v_interlock);
953
954	if (vp->v_usecount > 1) {
955
956		vp->v_usecount--;
957		simple_unlock(&vp->v_interlock);
958
959		return;
960	}
961
962	if (vp->v_usecount == 1) {
963
964		vp->v_usecount--;
965
966		if (VSHOULDFREE(vp))
967			vfree(vp);
968	/*
969	 * If we are doing a vput, the node is already locked, and we must
970	 * call VOP_INACTIVE with the node locked.  So, in the case of
971	 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE.
972	 */
973		if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) {
974			VOP_INACTIVE(vp, p);
975		}
976
977	} else {
978#ifdef DIAGNOSTIC
979		vprint("vrele: negative ref count", vp);
980		simple_unlock(&vp->v_interlock);
981#endif
982		panic("vrele: negative ref cnt");
983	}
984}
985
986void
987vput(vp)
988	struct vnode *vp;
989{
990	struct proc *p = curproc;	/* XXX */
991
992#ifdef DIAGNOSTIC
993	if (vp == NULL)
994		panic("vput: null vp");
995#endif
996
997	simple_lock(&vp->v_interlock);
998
999	if (vp->v_usecount > 1) {
1000
1001		vp->v_usecount--;
1002		VOP_UNLOCK(vp, LK_INTERLOCK, p);
1003		return;
1004
1005	}
1006
1007	if (vp->v_usecount == 1) {
1008
1009		vp->v_usecount--;
1010		if (VSHOULDFREE(vp))
1011			vfree(vp);
1012	/*
1013	 * If we are doing a vput, the node is already locked, and we must
1014	 * call VOP_INACTIVE with the node locked.  So, in the case of
1015	 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE.
1016	 */
1017		simple_unlock(&vp->v_interlock);
1018		VOP_INACTIVE(vp, p);
1019
1020	} else {
1021#ifdef DIAGNOSTIC
1022		vprint("vput: negative ref count", vp);
1023#endif
1024		panic("vput: negative ref cnt");
1025	}
1026}
1027
1028/*
1029 * Somebody doesn't want the vnode recycled.
1030 */
1031void
1032vhold(vp)
1033	register struct vnode *vp;
1034{
1035
1036	simple_lock(&vp->v_interlock);
1037	vp->v_holdcnt++;
1038	if (VSHOULDBUSY(vp))
1039		vbusy(vp);
1040	simple_unlock(&vp->v_interlock);
1041}
1042
1043/*
1044 * One less who cares about this vnode.
1045 */
1046void
1047vdrop(vp)
1048	register struct vnode *vp;
1049{
1050
1051	simple_lock(&vp->v_interlock);
1052	if (vp->v_holdcnt <= 0)
1053		panic("holdrele: holdcnt");
1054	vp->v_holdcnt--;
1055	if (VSHOULDFREE(vp))
1056		vfree(vp);
1057	simple_unlock(&vp->v_interlock);
1058}
1059
1060/*
1061 * Remove any vnodes in the vnode table belonging to mount point mp.
1062 *
1063 * If MNT_NOFORCE is specified, there should not be any active ones,
1064 * return error if any are found (nb: this is a user error, not a
1065 * system error). If MNT_FORCE is specified, detach any active vnodes
1066 * that are found.
1067 */
1068#ifdef DIAGNOSTIC
1069static int busyprt = 0;		/* print out busy vnodes */
1070SYSCTL_INT(_debug, OID_AUTO, busyprt, CTLFLAG_RW, &busyprt, 0, "");
1071#endif
1072
1073int
1074vflush(mp, skipvp, flags)
1075	struct mount *mp;
1076	struct vnode *skipvp;
1077	int flags;
1078{
1079	struct proc *p = curproc;	/* XXX */
1080	struct vnode *vp, *nvp;
1081	int busy = 0;
1082
1083	simple_lock(&mntvnode_slock);
1084loop:
1085	for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
1086		/*
1087		 * Make sure this vnode wasn't reclaimed in getnewvnode().
1088		 * Start over if it has (it won't be on the list anymore).
1089		 */
1090		if (vp->v_mount != mp)
1091			goto loop;
1092		nvp = vp->v_mntvnodes.le_next;
1093		/*
1094		 * Skip over a selected vnode.
1095		 */
1096		if (vp == skipvp)
1097			continue;
1098
1099		simple_lock(&vp->v_interlock);
1100		/*
1101		 * Skip over a vnodes marked VSYSTEM.
1102		 */
1103		if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
1104			simple_unlock(&vp->v_interlock);
1105			continue;
1106		}
1107		/*
1108		 * If WRITECLOSE is set, only flush out regular file vnodes
1109		 * open for writing.
1110		 */
1111		if ((flags & WRITECLOSE) &&
1112		    (vp->v_writecount == 0 || vp->v_type != VREG)) {
1113			simple_unlock(&vp->v_interlock);
1114			continue;
1115		}
1116
1117		/*
1118		 * With v_usecount == 0, all we need to do is clear out the
1119		 * vnode data structures and we are done.
1120		 */
1121		if (vp->v_usecount == 0) {
1122			simple_unlock(&mntvnode_slock);
1123			vgonel(vp, p);
1124			simple_lock(&mntvnode_slock);
1125			continue;
1126		}
1127
1128		/*
1129		 * If FORCECLOSE is set, forcibly close the vnode. For block
1130		 * or character devices, revert to an anonymous device. For
1131		 * all other files, just kill them.
1132		 */
1133		if (flags & FORCECLOSE) {
1134			simple_unlock(&mntvnode_slock);
1135			if (vp->v_type != VBLK && vp->v_type != VCHR) {
1136				vgonel(vp, p);
1137			} else {
1138				vclean(vp, 0, p);
1139				vp->v_op = spec_vnodeop_p;
1140				insmntque(vp, (struct mount *) 0);
1141			}
1142			simple_lock(&mntvnode_slock);
1143			continue;
1144		}
1145#ifdef DIAGNOSTIC
1146		if (busyprt)
1147			vprint("vflush: busy vnode", vp);
1148#endif
1149		simple_unlock(&vp->v_interlock);
1150		busy++;
1151	}
1152	simple_unlock(&mntvnode_slock);
1153	if (busy)
1154		return (EBUSY);
1155	return (0);
1156}
1157
1158/*
1159 * Disassociate the underlying file system from a vnode.
1160 */
1161static void
1162vclean(vp, flags, p)
1163	struct vnode *vp;
1164	int flags;
1165	struct proc *p;
1166{
1167	int active;
1168
1169	/*
1170	 * Check to see if the vnode is in use. If so we have to reference it
1171	 * before we clean it out so that its count cannot fall to zero and
1172	 * generate a race against ourselves to recycle it.
1173	 */
1174	if ((active = vp->v_usecount))
1175		vp->v_usecount++;
1176
1177	/*
1178	 * Prevent the vnode from being recycled or brought into use while we
1179	 * clean it out.
1180	 */
1181	if (vp->v_flag & VXLOCK)
1182		panic("vclean: deadlock");
1183	vp->v_flag |= VXLOCK;
1184	/*
1185	 * Even if the count is zero, the VOP_INACTIVE routine may still
1186	 * have the object locked while it cleans it out. The VOP_LOCK
1187	 * ensures that the VOP_INACTIVE routine is done with its work.
1188	 * For active vnodes, it ensures that no other activity can
1189	 * occur while the underlying object is being cleaned out.
1190	 */
1191	VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p);
1192
1193	/*
1194	 * Clean out any buffers associated with the vnode.
1195	 */
1196	vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
1197	if (vp->v_object) {
1198		if (vp->v_object->ref_count == 0) {
1199			/*
1200			 * This is a normal way of shutting down the object/vnode
1201			 * association.
1202			 */
1203			vm_object_terminate(vp->v_object);
1204		} else {
1205			/*
1206			 * Woe to the process that tries to page now :-).
1207			 */
1208			vm_pager_deallocate(vp->v_object);
1209		}
1210	}
1211
1212	/*
1213	 * If purging an active vnode, it must be closed and
1214	 * deactivated before being reclaimed. Note that the
1215	 * VOP_INACTIVE will unlock the vnode.
1216	 */
1217	if (active) {
1218		if (flags & DOCLOSE)
1219			VOP_CLOSE(vp, IO_NDELAY, NOCRED, p);
1220		VOP_INACTIVE(vp, p);
1221	} else {
1222		/*
1223		 * Any other processes trying to obtain this lock must first
1224		 * wait for VXLOCK to clear, then call the new lock operation.
1225		 */
1226		VOP_UNLOCK(vp, 0, p);
1227	}
1228	/*
1229	 * Reclaim the vnode.
1230	 */
1231	if (VOP_RECLAIM(vp, p))
1232		panic("vclean: cannot reclaim");
1233	if (active)
1234		vrele(vp);
1235	cache_purge(vp);
1236	if (vp->v_vnlock) {
1237#if 0 /* This is the only place we have LK_DRAINED in the entire kernel ??? */
1238#ifdef DIAGNOSTIC
1239		if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0)
1240			vprint("vclean: lock not drained", vp);
1241#endif
1242#endif
1243		FREE(vp->v_vnlock, M_VNODE);
1244		vp->v_vnlock = NULL;
1245	}
1246
1247	/*
1248	 * Done with purge, notify sleepers of the grim news.
1249	 */
1250	vp->v_op = dead_vnodeop_p;
1251	vn_pollgone(vp);
1252	vp->v_tag = VT_NON;
1253	vp->v_flag &= ~VXLOCK;
1254	if (vp->v_flag & VXWANT) {
1255		vp->v_flag &= ~VXWANT;
1256		wakeup((caddr_t) vp);
1257	}
1258}
1259
1260/*
1261 * Eliminate all activity associated with the requested vnode
1262 * and with all vnodes aliased to the requested vnode.
1263 */
1264int
1265vop_revoke(ap)
1266	struct vop_revoke_args /* {
1267		struct vnode *a_vp;
1268		int a_flags;
1269	} */ *ap;
1270{
1271	struct vnode *vp, *vq;
1272	struct proc *p = curproc;	/* XXX */
1273
1274#ifdef DIAGNOSTIC
1275	if ((ap->a_flags & REVOKEALL) == 0)
1276		panic("vop_revoke");
1277#endif
1278
1279	vp = ap->a_vp;
1280	simple_lock(&vp->v_interlock);
1281
1282	if (vp->v_flag & VALIASED) {
1283		/*
1284		 * If a vgone (or vclean) is already in progress,
1285		 * wait until it is done and return.
1286		 */
1287		if (vp->v_flag & VXLOCK) {
1288			vp->v_flag |= VXWANT;
1289			simple_unlock(&vp->v_interlock);
1290			tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0);
1291			return (0);
1292		}
1293		/*
1294		 * Ensure that vp will not be vgone'd while we
1295		 * are eliminating its aliases.
1296		 */
1297		vp->v_flag |= VXLOCK;
1298		simple_unlock(&vp->v_interlock);
1299		while (vp->v_flag & VALIASED) {
1300			simple_lock(&spechash_slock);
1301			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1302				if (vq->v_rdev != vp->v_rdev ||
1303				    vq->v_type != vp->v_type || vp == vq)
1304					continue;
1305				simple_unlock(&spechash_slock);
1306				vgone(vq);
1307				break;
1308			}
1309			if (vq == NULLVP) {
1310				simple_unlock(&spechash_slock);
1311			}
1312		}
1313		/*
1314		 * Remove the lock so that vgone below will
1315		 * really eliminate the vnode after which time
1316		 * vgone will awaken any sleepers.
1317		 */
1318		simple_lock(&vp->v_interlock);
1319		vp->v_flag &= ~VXLOCK;
1320		if (vp->v_flag & VXWANT) {
1321			vp->v_flag &= ~VXWANT;
1322			wakeup(vp);
1323		}
1324	}
1325	vgonel(vp, p);
1326	return (0);
1327}
1328
1329/*
1330 * Recycle an unused vnode to the front of the free list.
1331 * Release the passed interlock if the vnode will be recycled.
1332 */
1333int
1334vrecycle(vp, inter_lkp, p)
1335	struct vnode *vp;
1336	struct simplelock *inter_lkp;
1337	struct proc *p;
1338{
1339
1340	simple_lock(&vp->v_interlock);
1341	if (vp->v_usecount == 0) {
1342		if (inter_lkp) {
1343			simple_unlock(inter_lkp);
1344		}
1345		vgonel(vp, p);
1346		return (1);
1347	}
1348	simple_unlock(&vp->v_interlock);
1349	return (0);
1350}
1351
1352/*
1353 * Eliminate all activity associated with a vnode
1354 * in preparation for reuse.
1355 */
1356void
1357vgone(vp)
1358	register struct vnode *vp;
1359{
1360	struct proc *p = curproc;	/* XXX */
1361
1362	simple_lock(&vp->v_interlock);
1363	vgonel(vp, p);
1364}
1365
1366/*
1367 * vgone, with the vp interlock held.
1368 */
1369static void
1370vgonel(vp, p)
1371	struct vnode *vp;
1372	struct proc *p;
1373{
1374	int s;
1375	struct vnode *vq;
1376	struct vnode *vx;
1377
1378	/*
1379	 * If a vgone (or vclean) is already in progress,
1380	 * wait until it is done and return.
1381	 */
1382	if (vp->v_flag & VXLOCK) {
1383		vp->v_flag |= VXWANT;
1384		simple_unlock(&vp->v_interlock);
1385		tsleep((caddr_t)vp, PINOD, "vgone", 0);
1386		return;
1387	}
1388
1389	/*
1390	 * Clean out the filesystem specific data.
1391	 */
1392	vclean(vp, DOCLOSE, p);
1393
1394	/*
1395	 * Delete from old mount point vnode list, if on one.
1396	 */
1397	if (vp->v_mount != NULL)
1398		insmntque(vp, (struct mount *)0);
1399	/*
1400	 * If special device, remove it from special device alias list
1401	 * if it is on one.
1402	 */
1403	if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) {
1404		simple_lock(&spechash_slock);
1405		if (*vp->v_hashchain == vp) {
1406			*vp->v_hashchain = vp->v_specnext;
1407		} else {
1408			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1409				if (vq->v_specnext != vp)
1410					continue;
1411				vq->v_specnext = vp->v_specnext;
1412				break;
1413			}
1414			if (vq == NULL)
1415				panic("missing bdev");
1416		}
1417		if (vp->v_flag & VALIASED) {
1418			vx = NULL;
1419			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1420				if (vq->v_rdev != vp->v_rdev ||
1421				    vq->v_type != vp->v_type)
1422					continue;
1423				if (vx)
1424					break;
1425				vx = vq;
1426			}
1427			if (vx == NULL)
1428				panic("missing alias");
1429			if (vq == NULL)
1430				vx->v_flag &= ~VALIASED;
1431			vp->v_flag &= ~VALIASED;
1432		}
1433		simple_unlock(&spechash_slock);
1434		FREE(vp->v_specinfo, M_VNODE);
1435		vp->v_specinfo = NULL;
1436	}
1437
1438	/*
1439	 * If it is on the freelist and not already at the head,
1440	 * move it to the head of the list. The test of the back
1441	 * pointer and the reference count of zero is because
1442	 * it will be removed from the free list by getnewvnode,
1443	 * but will not have its reference count incremented until
1444	 * after calling vgone. If the reference count were
1445	 * incremented first, vgone would (incorrectly) try to
1446	 * close the previous instance of the underlying object.
1447	 */
1448	if (vp->v_usecount == 0 && !(vp->v_flag & VDOOMED)) {
1449		s = splbio();
1450		simple_lock(&vnode_free_list_slock);
1451		if (vp->v_flag & VFREE) {
1452			TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1453		} else if (vp->v_flag & VTBFREE) {
1454			TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist);
1455			vp->v_flag &= ~VTBFREE;
1456		}
1457		vp->v_flag |= VFREE;
1458		TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1459		simple_unlock(&vnode_free_list_slock);
1460		splx(s);
1461	}
1462
1463	vp->v_type = VBAD;
1464	simple_unlock(&vp->v_interlock);
1465}
1466
1467/*
1468 * Lookup a vnode by device number.
1469 */
1470int
1471vfinddev(dev, type, vpp)
1472	dev_t dev;
1473	enum vtype type;
1474	struct vnode **vpp;
1475{
1476	register struct vnode *vp;
1477	int rc = 0;
1478
1479	simple_lock(&spechash_slock);
1480	for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1481		if (dev != vp->v_rdev || type != vp->v_type)
1482			continue;
1483		*vpp = vp;
1484		rc = 1;
1485		break;
1486	}
1487	simple_unlock(&spechash_slock);
1488	return (rc);
1489}
1490
1491/*
1492 * Calculate the total number of references to a special device.
1493 */
1494int
1495vcount(vp)
1496	register struct vnode *vp;
1497{
1498	struct vnode *vq, *vnext;
1499	int count;
1500
1501loop:
1502	if ((vp->v_flag & VALIASED) == 0)
1503		return (vp->v_usecount);
1504	simple_lock(&spechash_slock);
1505	for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
1506		vnext = vq->v_specnext;
1507		if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
1508			continue;
1509		/*
1510		 * Alias, but not in use, so flush it out.
1511		 */
1512		if (vq->v_usecount == 0 && vq != vp) {
1513			simple_unlock(&spechash_slock);
1514			vgone(vq);
1515			goto loop;
1516		}
1517		count += vq->v_usecount;
1518	}
1519	simple_unlock(&spechash_slock);
1520	return (count);
1521}
1522/*
1523 * Print out a description of a vnode.
1524 */
1525static char *typename[] =
1526{"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"};
1527
1528void
1529vprint(label, vp)
1530	char *label;
1531	register struct vnode *vp;
1532{
1533	char buf[64];
1534
1535	if (label != NULL)
1536		printf("%s: %x: ", label, vp);
1537	else
1538		printf("%x: ", vp);
1539	printf("type %s, usecount %d, writecount %d, refcount %ld,",
1540	    typename[vp->v_type], vp->v_usecount, vp->v_writecount,
1541	    vp->v_holdcnt);
1542	buf[0] = '\0';
1543	if (vp->v_flag & VROOT)
1544		strcat(buf, "|VROOT");
1545	if (vp->v_flag & VTEXT)
1546		strcat(buf, "|VTEXT");
1547	if (vp->v_flag & VSYSTEM)
1548		strcat(buf, "|VSYSTEM");
1549	if (vp->v_flag & VXLOCK)
1550		strcat(buf, "|VXLOCK");
1551	if (vp->v_flag & VXWANT)
1552		strcat(buf, "|VXWANT");
1553	if (vp->v_flag & VBWAIT)
1554		strcat(buf, "|VBWAIT");
1555	if (vp->v_flag & VALIASED)
1556		strcat(buf, "|VALIASED");
1557	if (vp->v_flag & VDOOMED)
1558		strcat(buf, "|VDOOMED");
1559	if (vp->v_flag & VFREE)
1560		strcat(buf, "|VFREE");
1561	if (vp->v_flag & VOBJBUF)
1562		strcat(buf, "|VOBJBUF");
1563	if (buf[0] != '\0')
1564		printf(" flags (%s)", &buf[1]);
1565	if (vp->v_data == NULL) {
1566		printf("\n");
1567	} else {
1568		printf("\n\t");
1569		VOP_PRINT(vp);
1570	}
1571}
1572
1573#ifdef DDB
1574/*
1575 * List all of the locked vnodes in the system.
1576 * Called when debugging the kernel.
1577 */
1578static void
1579printlockedvnodes()
1580{
1581	struct proc *p = curproc;	/* XXX */
1582	struct mount *mp, *nmp;
1583	struct vnode *vp;
1584
1585	printf("Locked vnodes\n");
1586	simple_lock(&mountlist_slock);
1587	for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
1588		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
1589			nmp = mp->mnt_list.cqe_next;
1590			continue;
1591		}
1592		for (vp = mp->mnt_vnodelist.lh_first;
1593		     vp != NULL;
1594		     vp = vp->v_mntvnodes.le_next) {
1595			if (VOP_ISLOCKED(vp))
1596				vprint((char *)0, vp);
1597		}
1598		simple_lock(&mountlist_slock);
1599		nmp = mp->mnt_list.cqe_next;
1600		vfs_unbusy(mp, p);
1601	}
1602	simple_unlock(&mountlist_slock);
1603}
1604#endif
1605
1606/*
1607 * Top level filesystem related information gathering.
1608 */
1609static int	sysctl_ovfs_conf __P(SYSCTL_HANDLER_ARGS);
1610
1611static int
1612vfs_sysctl SYSCTL_HANDLER_ARGS
1613{
1614	int *name = (int *)arg1 - 1;	/* XXX */
1615	u_int namelen = arg2 + 1;	/* XXX */
1616	struct vfsconf *vfsp;
1617
1618#ifndef NO_COMPAT_PRELITE2
1619	/* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. */
1620	if (namelen == 1)
1621		return (sysctl_ovfs_conf(oidp, arg1, arg2, req));
1622#endif
1623
1624#ifdef notyet
1625	/* all sysctl names at this level are at least name and field */
1626	if (namelen < 2)
1627		return (ENOTDIR);		/* overloaded */
1628	if (name[0] != VFS_GENERIC) {
1629		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1630			if (vfsp->vfc_typenum == name[0])
1631				break;
1632		if (vfsp == NULL)
1633			return (EOPNOTSUPP);
1634		return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1,
1635		    oldp, oldlenp, newp, newlen, p));
1636	}
1637#endif
1638	switch (name[1]) {
1639	case VFS_MAXTYPENUM:
1640		if (namelen != 2)
1641			return (ENOTDIR);
1642		return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int)));
1643	case VFS_CONF:
1644		if (namelen != 3)
1645			return (ENOTDIR);	/* overloaded */
1646		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1647			if (vfsp->vfc_typenum == name[2])
1648				break;
1649		if (vfsp == NULL)
1650			return (EOPNOTSUPP);
1651		return (SYSCTL_OUT(req, vfsp, sizeof *vfsp));
1652	}
1653	return (EOPNOTSUPP);
1654}
1655
1656SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD, vfs_sysctl,
1657	"Generic filesystem");
1658
1659#ifndef NO_COMPAT_PRELITE2
1660
1661static int
1662sysctl_ovfs_conf SYSCTL_HANDLER_ARGS
1663{
1664	int error;
1665	struct vfsconf *vfsp;
1666	struct ovfsconf ovfs;
1667
1668	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
1669		ovfs.vfc_vfsops = vfsp->vfc_vfsops;	/* XXX used as flag */
1670		strcpy(ovfs.vfc_name, vfsp->vfc_name);
1671		ovfs.vfc_index = vfsp->vfc_typenum;
1672		ovfs.vfc_refcount = vfsp->vfc_refcount;
1673		ovfs.vfc_flags = vfsp->vfc_flags;
1674		error = SYSCTL_OUT(req, &ovfs, sizeof ovfs);
1675		if (error)
1676			return error;
1677	}
1678	return 0;
1679}
1680
1681#endif /* !NO_COMPAT_PRELITE2 */
1682
1683static volatile int kinfo_vdebug = 1;
1684
1685#if 0
1686#define KINFO_VNODESLOP	10
1687/*
1688 * Dump vnode list (via sysctl).
1689 * Copyout address of vnode followed by vnode.
1690 */
1691/* ARGSUSED */
1692static int
1693sysctl_vnode SYSCTL_HANDLER_ARGS
1694{
1695	struct proc *p = curproc;	/* XXX */
1696	struct mount *mp, *nmp;
1697	struct vnode *nvp, *vp;
1698	int error;
1699
1700#define VPTRSZ	sizeof (struct vnode *)
1701#define VNODESZ	sizeof (struct vnode)
1702
1703	req->lock = 0;
1704	if (!req->oldptr) /* Make an estimate */
1705		return (SYSCTL_OUT(req, 0,
1706			(numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ)));
1707
1708	simple_lock(&mountlist_slock);
1709	for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
1710		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
1711			nmp = mp->mnt_list.cqe_next;
1712			continue;
1713		}
1714again:
1715		simple_lock(&mntvnode_slock);
1716		for (vp = mp->mnt_vnodelist.lh_first;
1717		     vp != NULL;
1718		     vp = nvp) {
1719			/*
1720			 * Check that the vp is still associated with
1721			 * this filesystem.  RACE: could have been
1722			 * recycled onto the same filesystem.
1723			 */
1724			if (vp->v_mount != mp) {
1725				simple_unlock(&mntvnode_slock);
1726				if (kinfo_vdebug)
1727					printf("kinfo: vp changed\n");
1728				goto again;
1729			}
1730			nvp = vp->v_mntvnodes.le_next;
1731			simple_unlock(&mntvnode_slock);
1732			if ((error = SYSCTL_OUT(req, &vp, VPTRSZ)) ||
1733			    (error = SYSCTL_OUT(req, vp, VNODESZ)))
1734				return (error);
1735			simple_lock(&mntvnode_slock);
1736		}
1737		simple_unlock(&mntvnode_slock);
1738		simple_lock(&mountlist_slock);
1739		nmp = mp->mnt_list.cqe_next;
1740		vfs_unbusy(mp, p);
1741	}
1742	simple_unlock(&mountlist_slock);
1743
1744	return (0);
1745}
1746#endif
1747
1748/*
1749 * XXX
1750 * Exporting the vnode list on large systems causes them to crash.
1751 * Exporting the vnode list on medium systems causes sysctl to coredump.
1752 */
1753#if 0
1754SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE|CTLFLAG_RD,
1755	0, 0, sysctl_vnode, "S,vnode", "");
1756#endif
1757
1758/*
1759 * Check to see if a filesystem is mounted on a block device.
1760 */
1761int
1762vfs_mountedon(vp)
1763	struct vnode *vp;
1764{
1765	struct vnode *vq;
1766	int error = 0;
1767
1768	if (vp->v_specflags & SI_MOUNTEDON)
1769		return (EBUSY);
1770	if (vp->v_flag & VALIASED) {
1771		simple_lock(&spechash_slock);
1772		for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1773			if (vq->v_rdev != vp->v_rdev ||
1774			    vq->v_type != vp->v_type)
1775				continue;
1776			if (vq->v_specflags & SI_MOUNTEDON) {
1777				error = EBUSY;
1778				break;
1779			}
1780		}
1781		simple_unlock(&spechash_slock);
1782	}
1783	return (error);
1784}
1785
1786/*
1787 * Unmount all filesystems. The list is traversed in reverse order
1788 * of mounting to avoid dependencies.
1789 */
1790void
1791vfs_unmountall()
1792{
1793	struct mount *mp, *nmp;
1794	struct proc *p = initproc;	/* XXX XXX should this be proc0? */
1795	int error;
1796
1797	/*
1798	 * Since this only runs when rebooting, it is not interlocked.
1799	 */
1800	for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
1801		nmp = mp->mnt_list.cqe_prev;
1802		error = dounmount(mp, MNT_FORCE, p);
1803		if (error) {
1804			printf("unmount of %s failed (",
1805			    mp->mnt_stat.f_mntonname);
1806			if (error == EBUSY)
1807				printf("BUSY)\n");
1808			else
1809				printf("%d)\n", error);
1810		}
1811	}
1812}
1813
1814/*
1815 * Build hash lists of net addresses and hang them off the mount point.
1816 * Called by ufs_mount() to set up the lists of export addresses.
1817 */
1818static int
1819vfs_hang_addrlist(mp, nep, argp)
1820	struct mount *mp;
1821	struct netexport *nep;
1822	struct export_args *argp;
1823{
1824	register struct netcred *np;
1825	register struct radix_node_head *rnh;
1826	register int i;
1827	struct radix_node *rn;
1828	struct sockaddr *saddr, *smask = 0;
1829	struct domain *dom;
1830	int error;
1831
1832	if (argp->ex_addrlen == 0) {
1833		if (mp->mnt_flag & MNT_DEFEXPORTED)
1834			return (EPERM);
1835		np = &nep->ne_defexported;
1836		np->netc_exflags = argp->ex_flags;
1837		np->netc_anon = argp->ex_anon;
1838		np->netc_anon.cr_ref = 1;
1839		mp->mnt_flag |= MNT_DEFEXPORTED;
1840		return (0);
1841	}
1842	i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
1843	np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK);
1844	bzero((caddr_t) np, i);
1845	saddr = (struct sockaddr *) (np + 1);
1846	if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen)))
1847		goto out;
1848	if (saddr->sa_len > argp->ex_addrlen)
1849		saddr->sa_len = argp->ex_addrlen;
1850	if (argp->ex_masklen) {
1851		smask = (struct sockaddr *) ((caddr_t) saddr + argp->ex_addrlen);
1852		error = copyin(argp->ex_mask, (caddr_t) smask, argp->ex_masklen);
1853		if (error)
1854			goto out;
1855		if (smask->sa_len > argp->ex_masklen)
1856			smask->sa_len = argp->ex_masklen;
1857	}
1858	i = saddr->sa_family;
1859	if ((rnh = nep->ne_rtable[i]) == 0) {
1860		/*
1861		 * Seems silly to initialize every AF when most are not used,
1862		 * do so on demand here
1863		 */
1864		for (dom = domains; dom; dom = dom->dom_next)
1865			if (dom->dom_family == i && dom->dom_rtattach) {
1866				dom->dom_rtattach((void **) &nep->ne_rtable[i],
1867				    dom->dom_rtoffset);
1868				break;
1869			}
1870		if ((rnh = nep->ne_rtable[i]) == 0) {
1871			error = ENOBUFS;
1872			goto out;
1873		}
1874	}
1875	rn = (*rnh->rnh_addaddr) ((caddr_t) saddr, (caddr_t) smask, rnh,
1876	    np->netc_rnodes);
1877	if (rn == 0 || np != (struct netcred *) rn) {	/* already exists */
1878		error = EPERM;
1879		goto out;
1880	}
1881	np->netc_exflags = argp->ex_flags;
1882	np->netc_anon = argp->ex_anon;
1883	np->netc_anon.cr_ref = 1;
1884	return (0);
1885out:
1886	free(np, M_NETADDR);
1887	return (error);
1888}
1889
1890/* ARGSUSED */
1891static int
1892vfs_free_netcred(rn, w)
1893	struct radix_node *rn;
1894	void *w;
1895{
1896	register struct radix_node_head *rnh = (struct radix_node_head *) w;
1897
1898	(*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh);
1899	free((caddr_t) rn, M_NETADDR);
1900	return (0);
1901}
1902
1903/*
1904 * Free the net address hash lists that are hanging off the mount points.
1905 */
1906static void
1907vfs_free_addrlist(nep)
1908	struct netexport *nep;
1909{
1910	register int i;
1911	register struct radix_node_head *rnh;
1912
1913	for (i = 0; i <= AF_MAX; i++)
1914		if ((rnh = nep->ne_rtable[i])) {
1915			(*rnh->rnh_walktree) (rnh, vfs_free_netcred,
1916			    (caddr_t) rnh);
1917			free((caddr_t) rnh, M_RTABLE);
1918			nep->ne_rtable[i] = 0;
1919		}
1920}
1921
1922int
1923vfs_export(mp, nep, argp)
1924	struct mount *mp;
1925	struct netexport *nep;
1926	struct export_args *argp;
1927{
1928	int error;
1929
1930	if (argp->ex_flags & MNT_DELEXPORT) {
1931		if (mp->mnt_flag & MNT_EXPUBLIC) {
1932			vfs_setpublicfs(NULL, NULL, NULL);
1933			mp->mnt_flag &= ~MNT_EXPUBLIC;
1934		}
1935		vfs_free_addrlist(nep);
1936		mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
1937	}
1938	if (argp->ex_flags & MNT_EXPORTED) {
1939		if (argp->ex_flags & MNT_EXPUBLIC) {
1940			if ((error = vfs_setpublicfs(mp, nep, argp)) != 0)
1941				return (error);
1942			mp->mnt_flag |= MNT_EXPUBLIC;
1943		}
1944		if ((error = vfs_hang_addrlist(mp, nep, argp)))
1945			return (error);
1946		mp->mnt_flag |= MNT_EXPORTED;
1947	}
1948	return (0);
1949}
1950
1951
1952/*
1953 * Set the publicly exported filesystem (WebNFS). Currently, only
1954 * one public filesystem is possible in the spec (RFC 2054 and 2055)
1955 */
1956int
1957vfs_setpublicfs(mp, nep, argp)
1958	struct mount *mp;
1959	struct netexport *nep;
1960	struct export_args *argp;
1961{
1962	int error;
1963	struct vnode *rvp;
1964	char *cp;
1965
1966	/*
1967	 * mp == NULL -> invalidate the current info, the FS is
1968	 * no longer exported. May be called from either vfs_export
1969	 * or unmount, so check if it hasn't already been done.
1970	 */
1971	if (mp == NULL) {
1972		if (nfs_pub.np_valid) {
1973			nfs_pub.np_valid = 0;
1974			if (nfs_pub.np_index != NULL) {
1975				FREE(nfs_pub.np_index, M_TEMP);
1976				nfs_pub.np_index = NULL;
1977			}
1978		}
1979		return (0);
1980	}
1981
1982	/*
1983	 * Only one allowed at a time.
1984	 */
1985	if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount)
1986		return (EBUSY);
1987
1988	/*
1989	 * Get real filehandle for root of exported FS.
1990	 */
1991	bzero((caddr_t)&nfs_pub.np_handle, sizeof(nfs_pub.np_handle));
1992	nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid;
1993
1994	if ((error = VFS_ROOT(mp, &rvp)))
1995		return (error);
1996
1997	if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid)))
1998		return (error);
1999
2000	vput(rvp);
2001
2002	/*
2003	 * If an indexfile was specified, pull it in.
2004	 */
2005	if (argp->ex_indexfile != NULL) {
2006		MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP,
2007		    M_WAITOK);
2008		error = copyinstr(argp->ex_indexfile, nfs_pub.np_index,
2009		    MAXNAMLEN, (size_t *)0);
2010		if (!error) {
2011			/*
2012			 * Check for illegal filenames.
2013			 */
2014			for (cp = nfs_pub.np_index; *cp; cp++) {
2015				if (*cp == '/') {
2016					error = EINVAL;
2017					break;
2018				}
2019			}
2020		}
2021		if (error) {
2022			FREE(nfs_pub.np_index, M_TEMP);
2023			return (error);
2024		}
2025	}
2026
2027	nfs_pub.np_mount = mp;
2028	nfs_pub.np_valid = 1;
2029	return (0);
2030}
2031
2032struct netcred *
2033vfs_export_lookup(mp, nep, nam)
2034	register struct mount *mp;
2035	struct netexport *nep;
2036	struct sockaddr *nam;
2037{
2038	register struct netcred *np;
2039	register struct radix_node_head *rnh;
2040	struct sockaddr *saddr;
2041
2042	np = NULL;
2043	if (mp->mnt_flag & MNT_EXPORTED) {
2044		/*
2045		 * Lookup in the export list first.
2046		 */
2047		if (nam != NULL) {
2048			saddr = nam;
2049			rnh = nep->ne_rtable[saddr->sa_family];
2050			if (rnh != NULL) {
2051				np = (struct netcred *)
2052					(*rnh->rnh_matchaddr)((caddr_t)saddr,
2053							      rnh);
2054				if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
2055					np = NULL;
2056			}
2057		}
2058		/*
2059		 * If no address match, use the default if it exists.
2060		 */
2061		if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
2062			np = &nep->ne_defexported;
2063	}
2064	return (np);
2065}
2066
2067/*
2068 * perform msync on all vnodes under a mount point
2069 * the mount point must be locked.
2070 */
2071void
2072vfs_msync(struct mount *mp, int flags) {
2073	struct vnode *vp, *nvp;
2074	int anyio, tries;
2075
2076	tries = 5;
2077loop:
2078	anyio = 0;
2079	for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
2080
2081		nvp = vp->v_mntvnodes.le_next;
2082
2083		if (vp->v_mount != mp) {
2084			goto loop;
2085		}
2086
2087		if ((vp->v_flag & VXLOCK) ||
2088			(VOP_ISLOCKED(vp) && (flags != MNT_WAIT))) {
2089			continue;
2090		}
2091
2092		simple_lock(&vp->v_interlock);
2093		if (vp->v_object &&
2094		   (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
2095			if (!vget(vp,
2096				LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY | LK_NOOBJ, curproc)) {
2097				if (vp->v_object) {
2098					vm_object_page_clean(vp->v_object, 0, 0, TRUE);
2099					anyio = 1;
2100				}
2101				vput(vp);
2102			}
2103		} else {
2104			simple_unlock(&vp->v_interlock);
2105		}
2106	}
2107	if (anyio && (--tries > 0))
2108		goto loop;
2109}
2110
2111/*
2112 * Create the VM object needed for VMIO and mmap support.  This
2113 * is done for all VREG files in the system.  Some filesystems might
2114 * afford the additional metadata buffering capability of the
2115 * VMIO code by making the device node be VMIO mode also.
2116 *
2117 * If !waslocked, must be called with interlock.
2118 */
2119int
2120vfs_object_create(vp, p, cred, waslocked)
2121	struct vnode *vp;
2122	struct proc *p;
2123	struct ucred *cred;
2124	int waslocked;
2125{
2126	struct vattr vat;
2127	vm_object_t object;
2128	int error = 0;
2129
2130	if ((vp->v_type != VREG) && (vp->v_type != VBLK)) {
2131		return 0;
2132	}
2133
2134	if (!waslocked)
2135		vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK | LK_RETRY, p);
2136
2137retry:
2138	if ((object = vp->v_object) == NULL) {
2139		if (vp->v_type == VREG) {
2140			if ((error = VOP_GETATTR(vp, &vat, cred, p)) != 0)
2141				goto retn;
2142			object = vnode_pager_alloc(vp,
2143				OFF_TO_IDX(round_page(vat.va_size)), 0, 0);
2144		} else if (major(vp->v_rdev) < nblkdev) {
2145			/*
2146			 * This simply allocates the biggest object possible
2147			 * for a VBLK vnode.  This should be fixed, but doesn't
2148			 * cause any problems (yet).
2149			 */
2150			object = vnode_pager_alloc(vp, INT_MAX, 0, 0);
2151		}
2152		object->ref_count--;
2153		vp->v_usecount--;
2154	} else {
2155		if (object->flags & OBJ_DEAD) {
2156			VOP_UNLOCK(vp, 0, p);
2157			tsleep(object, PVM, "vodead", 0);
2158			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2159			goto retry;
2160		}
2161	}
2162
2163	if (vp->v_object) {
2164		vp->v_flag |= VOBJBUF;
2165	}
2166
2167retn:
2168	if (!waslocked) {
2169		simple_lock(&vp->v_interlock);
2170		VOP_UNLOCK(vp, LK_INTERLOCK, p);
2171	}
2172
2173	return error;
2174}
2175
2176static void
2177vfree(vp)
2178	struct vnode *vp;
2179{
2180	int s;
2181
2182	s = splbio();
2183	simple_lock(&vnode_free_list_slock);
2184	if (vp->v_flag & VTBFREE) {
2185		TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist);
2186		vp->v_flag &= ~VTBFREE;
2187	}
2188	if (vp->v_flag & VAGE) {
2189		TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
2190	} else {
2191		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
2192	}
2193	freevnodes++;
2194	simple_unlock(&vnode_free_list_slock);
2195	vp->v_flag &= ~VAGE;
2196	vp->v_flag |= VFREE;
2197	splx(s);
2198}
2199
2200void
2201vbusy(vp)
2202	struct vnode *vp;
2203{
2204	int s;
2205
2206	s = splbio();
2207	simple_lock(&vnode_free_list_slock);
2208	if (vp->v_flag & VTBFREE) {
2209		TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist);
2210		vp->v_flag &= ~VTBFREE;
2211	} else {
2212		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
2213		freevnodes--;
2214	}
2215	simple_unlock(&vnode_free_list_slock);
2216	vp->v_flag &= ~VFREE;
2217	splx(s);
2218}
2219
2220/*
2221 * Record a process's interest in events which might happen to
2222 * a vnode.  Because poll uses the historic select-style interface
2223 * internally, this routine serves as both the ``check for any
2224 * pending events'' and the ``record my interest in future events''
2225 * functions.  (These are done together, while the lock is held,
2226 * to avoid race conditions.)
2227 */
2228int
2229vn_pollrecord(vp, p, events)
2230	struct vnode *vp;
2231	struct proc *p;
2232	short events;
2233{
2234	simple_lock(&vp->v_pollinfo.vpi_lock);
2235	if (vp->v_pollinfo.vpi_revents & events) {
2236		/*
2237		 * This leaves events we are not interested
2238		 * in available for the other process which
2239		 * which presumably had requested them
2240		 * (otherwise they would never have been
2241		 * recorded).
2242		 */
2243		events &= vp->v_pollinfo.vpi_revents;
2244		vp->v_pollinfo.vpi_revents &= ~events;
2245
2246		simple_unlock(&vp->v_pollinfo.vpi_lock);
2247		return events;
2248	}
2249	vp->v_pollinfo.vpi_events |= events;
2250	selrecord(p, &vp->v_pollinfo.vpi_selinfo);
2251	simple_unlock(&vp->v_pollinfo.vpi_lock);
2252	return 0;
2253}
2254
2255/*
2256 * Note the occurrence of an event.  If the VN_POLLEVENT macro is used,
2257 * it is possible for us to miss an event due to race conditions, but
2258 * that condition is expected to be rare, so for the moment it is the
2259 * preferred interface.
2260 */
2261void
2262vn_pollevent(vp, events)
2263	struct vnode *vp;
2264	short events;
2265{
2266	simple_lock(&vp->v_pollinfo.vpi_lock);
2267	if (vp->v_pollinfo.vpi_events & events) {
2268		/*
2269		 * We clear vpi_events so that we don't
2270		 * call selwakeup() twice if two events are
2271		 * posted before the polling process(es) is
2272		 * awakened.  This also ensures that we take at
2273		 * most one selwakeup() if the polling process
2274		 * is no longer interested.  However, it does
2275		 * mean that only one event can be noticed at
2276		 * a time.  (Perhaps we should only clear those
2277		 * event bits which we note?) XXX
2278		 */
2279		vp->v_pollinfo.vpi_events = 0;	/* &= ~events ??? */
2280		vp->v_pollinfo.vpi_revents |= events;
2281		selwakeup(&vp->v_pollinfo.vpi_selinfo);
2282	}
2283	simple_unlock(&vp->v_pollinfo.vpi_lock);
2284}
2285
2286/*
2287 * Wake up anyone polling on vp because it is being revoked.
2288 * This depends on dead_poll() returning POLLHUP for correct
2289 * behavior.
2290 */
2291void
2292vn_pollgone(vp)
2293	struct vnode *vp;
2294{
2295	simple_lock(&vp->v_pollinfo.vpi_lock);
2296	if (vp->v_pollinfo.vpi_events) {
2297		vp->v_pollinfo.vpi_events = 0;
2298		selwakeup(&vp->v_pollinfo.vpi_selinfo);
2299	}
2300	simple_unlock(&vp->v_pollinfo.vpi_lock);
2301}
2302