vfs_export.c revision 32454
1/*
2 * Copyright (c) 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the University of
21 *	California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 *	@(#)vfs_subr.c	8.31 (Berkeley) 5/26/95
39 * $Id: vfs_subr.c,v 1.121 1998/01/07 09:26:29 dyson Exp $
40 */
41
42/*
43 * External virtual filesystem routines
44 */
45#include "opt_ddb.h"
46#include "opt_devfs.h"
47
48#include <sys/param.h>
49#include <sys/systm.h>
50#include <sys/kernel.h>
51#include <sys/proc.h>
52#include <sys/malloc.h>
53#include <sys/mount.h>
54#include <sys/vnode.h>
55#include <sys/stat.h>
56#include <sys/buf.h>
57#include <sys/poll.h>
58#include <sys/domain.h>
59#include <sys/dirent.h>
60#include <sys/vmmeter.h>
61
62#include <machine/limits.h>
63
64#include <vm/vm.h>
65#include <vm/vm_object.h>
66#include <vm/vm_extern.h>
67#include <vm/pmap.h>
68#include <vm/vm_map.h>
69#include <vm/vnode_pager.h>
70#include <sys/sysctl.h>
71
72#include <miscfs/specfs/specdev.h>
73
74static MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure");
75
76static void	insmntque __P((struct vnode *vp, struct mount *mp));
77#ifdef DDB
78static void	printlockedvnodes __P((void));
79#endif
80static void	vbusy __P((struct vnode *));
81static void	vclean __P((struct vnode *vp, int flags, struct proc *p));
82static void	vfree __P((struct vnode *));
83static void	vgonel __P((struct vnode *vp, struct proc *p));
84static unsigned long	numvnodes;
85SYSCTL_INT(_debug, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, "");
86
87enum vtype iftovt_tab[16] = {
88	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
89	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
90};
91int vttoif_tab[9] = {
92	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
93	S_IFSOCK, S_IFIFO, S_IFMT,
94};
95
96/*
97 * Insq/Remq for the vnode usage lists.
98 */
99#define	bufinsvn(bp, dp)	LIST_INSERT_HEAD(dp, bp, b_vnbufs)
100#define	bufremvn(bp) {							\
101	LIST_REMOVE(bp, b_vnbufs);					\
102	(bp)->b_vnbufs.le_next = NOLIST;				\
103}
104
105TAILQ_HEAD(freelst, vnode) vnode_free_list;	/* vnode free list */
106struct tobefreelist vnode_tobefree_list;	/* vnode free list */
107
108static u_long wantfreevnodes = 25;
109SYSCTL_INT(_debug, OID_AUTO, wantfreevnodes, CTLFLAG_RW, &wantfreevnodes, 0, "");
110static u_long freevnodes = 0;
111SYSCTL_INT(_debug, OID_AUTO, freevnodes, CTLFLAG_RD, &freevnodes, 0, "");
112
113int vfs_ioopt = 2;
114SYSCTL_INT(_vfs, OID_AUTO, ioopt, CTLFLAG_RW, &vfs_ioopt, 0, "");
115
116struct mntlist mountlist;	/* mounted filesystem list */
117struct simplelock mountlist_slock;
118static struct simplelock mntid_slock;
119struct simplelock mntvnode_slock;
120struct simplelock vnode_free_list_slock;
121static struct simplelock spechash_slock;
122struct nfs_public nfs_pub;	/* publicly exported FS */
123
124int desiredvnodes;
125SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, &desiredvnodes, 0, "");
126
127static void	vfs_free_addrlist __P((struct netexport *nep));
128static int	vfs_free_netcred __P((struct radix_node *rn, void *w));
129static int	vfs_hang_addrlist __P((struct mount *mp, struct netexport *nep,
130				       struct export_args *argp));
131
132/*
133 * Initialize the vnode management data structures.
134 */
135void
136vntblinit()
137{
138
139	desiredvnodes = maxproc + cnt.v_page_count / 4;
140	simple_lock_init(&mntvnode_slock);
141	simple_lock_init(&mntid_slock);
142	simple_lock_init(&spechash_slock);
143	TAILQ_INIT(&vnode_free_list);
144	TAILQ_INIT(&vnode_tobefree_list);
145	simple_lock_init(&vnode_free_list_slock);
146	CIRCLEQ_INIT(&mountlist);
147}
148
149/*
150 * Mark a mount point as busy. Used to synchronize access and to delay
151 * unmounting. Interlock is not released on failure.
152 */
153int
154vfs_busy(mp, flags, interlkp, p)
155	struct mount *mp;
156	int flags;
157	struct simplelock *interlkp;
158	struct proc *p;
159{
160	int lkflags;
161
162	if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
163		if (flags & LK_NOWAIT)
164			return (ENOENT);
165		mp->mnt_kern_flag |= MNTK_MWAIT;
166		if (interlkp) {
167			simple_unlock(interlkp);
168		}
169		/*
170		 * Since all busy locks are shared except the exclusive
171		 * lock granted when unmounting, the only place that a
172		 * wakeup needs to be done is at the release of the
173		 * exclusive lock at the end of dounmount.
174		 */
175		tsleep((caddr_t)mp, PVFS, "vfs_busy", 0);
176		if (interlkp) {
177			simple_lock(interlkp);
178		}
179		return (ENOENT);
180	}
181	lkflags = LK_SHARED;
182	if (interlkp)
183		lkflags |= LK_INTERLOCK;
184	if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p))
185		panic("vfs_busy: unexpected lock failure");
186	return (0);
187}
188
189/*
190 * Free a busy filesystem.
191 */
192void
193vfs_unbusy(mp, p)
194	struct mount *mp;
195	struct proc *p;
196{
197
198	lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p);
199}
200
201/*
202 * Lookup a filesystem type, and if found allocate and initialize
203 * a mount structure for it.
204 *
205 * Devname is usually updated by mount(8) after booting.
206 */
207int
208vfs_rootmountalloc(fstypename, devname, mpp)
209	char *fstypename;
210	char *devname;
211	struct mount **mpp;
212{
213	struct proc *p = curproc;	/* XXX */
214	struct vfsconf *vfsp;
215	struct mount *mp;
216
217	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
218		if (!strcmp(vfsp->vfc_name, fstypename))
219			break;
220	if (vfsp == NULL)
221		return (ENODEV);
222	mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
223	bzero((char *)mp, (u_long)sizeof(struct mount));
224	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
225	(void)vfs_busy(mp, LK_NOWAIT, 0, p);
226	LIST_INIT(&mp->mnt_vnodelist);
227	mp->mnt_vfc = vfsp;
228	mp->mnt_op = vfsp->vfc_vfsops;
229	mp->mnt_flag = MNT_RDONLY;
230	mp->mnt_vnodecovered = NULLVP;
231	vfsp->vfc_refcount++;
232	mp->mnt_stat.f_type = vfsp->vfc_typenum;
233	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
234	strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
235	mp->mnt_stat.f_mntonname[0] = '/';
236	mp->mnt_stat.f_mntonname[1] = 0;
237	(void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
238	*mpp = mp;
239	return (0);
240}
241
242/*
243 * Find an appropriate filesystem to use for the root. If a filesystem
244 * has not been preselected, walk through the list of known filesystems
245 * trying those that have mountroot routines, and try them until one
246 * works or we have tried them all.
247 */
248#ifdef notdef	/* XXX JH */
249int
250lite2_vfs_mountroot()
251{
252	struct vfsconf *vfsp;
253	extern int (*lite2_mountroot) __P((void));
254	int error;
255
256	if (lite2_mountroot != NULL)
257		return ((*lite2_mountroot)());
258	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
259		if (vfsp->vfc_mountroot == NULL)
260			continue;
261		if ((error = (*vfsp->vfc_mountroot)()) == 0)
262			return (0);
263		printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error);
264	}
265	return (ENODEV);
266}
267#endif
268
269/*
270 * Lookup a mount point by filesystem identifier.
271 */
272struct mount *
273vfs_getvfs(fsid)
274	fsid_t *fsid;
275{
276	register struct mount *mp;
277
278	simple_lock(&mountlist_slock);
279	for (mp = mountlist.cqh_first; mp != (void *)&mountlist;
280	    mp = mp->mnt_list.cqe_next) {
281		if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
282		    mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
283			simple_unlock(&mountlist_slock);
284			return (mp);
285	    }
286	}
287	simple_unlock(&mountlist_slock);
288	return ((struct mount *) 0);
289}
290
291/*
292 * Get a new unique fsid
293 */
294void
295vfs_getnewfsid(mp)
296	struct mount *mp;
297{
298	static u_short xxxfs_mntid;
299
300	fsid_t tfsid;
301	int mtype;
302
303	simple_lock(&mntid_slock);
304	mtype = mp->mnt_vfc->vfc_typenum;
305	mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
306	mp->mnt_stat.f_fsid.val[1] = mtype;
307	if (xxxfs_mntid == 0)
308		++xxxfs_mntid;
309	tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
310	tfsid.val[1] = mtype;
311	if (mountlist.cqh_first != (void *)&mountlist) {
312		while (vfs_getvfs(&tfsid)) {
313			tfsid.val[0]++;
314			xxxfs_mntid++;
315		}
316	}
317	mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
318	simple_unlock(&mntid_slock);
319}
320
321/*
322 * Set vnode attributes to VNOVAL
323 */
324void
325vattr_null(vap)
326	register struct vattr *vap;
327{
328
329	vap->va_type = VNON;
330	vap->va_size = VNOVAL;
331	vap->va_bytes = VNOVAL;
332	vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
333	    vap->va_fsid = vap->va_fileid =
334	    vap->va_blocksize = vap->va_rdev =
335	    vap->va_atime.tv_sec = vap->va_atime.tv_nsec =
336	    vap->va_mtime.tv_sec = vap->va_mtime.tv_nsec =
337	    vap->va_ctime.tv_sec = vap->va_ctime.tv_nsec =
338	    vap->va_flags = vap->va_gen = VNOVAL;
339	vap->va_vaflags = 0;
340}
341
342/*
343 * Routines having to do with the management of the vnode table.
344 */
345extern vop_t **dead_vnodeop_p;
346
347/*
348 * Return the next vnode from the free list.
349 */
350int
351getnewvnode(tag, mp, vops, vpp)
352	enum vtagtype tag;
353	struct mount *mp;
354	vop_t **vops;
355	struct vnode **vpp;
356{
357	int s;
358	struct proc *p = curproc;	/* XXX */
359	struct vnode *vp, *tvp, *nvp;
360	vm_object_t object;
361	TAILQ_HEAD(freelst, vnode) vnode_tmp_list;
362
363	/*
364	 * We take the least recently used vnode from the freelist
365	 * if we can get it and it has no cached pages, and no
366	 * namecache entries are relative to it.
367	 * Otherwise we allocate a new vnode
368	 */
369
370	s = splbio();
371	simple_lock(&vnode_free_list_slock);
372	TAILQ_INIT(&vnode_tmp_list);
373
374	for (vp = TAILQ_FIRST(&vnode_tobefree_list); vp; vp = nvp) {
375		nvp = TAILQ_NEXT(vp, v_freelist);
376		vp->v_flag &= ~VTBFREE;
377		TAILQ_INSERT_TAIL(&vnode_tmp_list, vp, v_freelist);
378	}
379
380	if (wantfreevnodes && freevnodes < wantfreevnodes) {
381		vp = NULL;
382	} else if (!wantfreevnodes && freevnodes <= desiredvnodes) {
383		/*
384		 * XXX: this is only here to be backwards compatible
385		 */
386		vp = NULL;
387	} else {
388		for (vp = TAILQ_FIRST(&vnode_free_list); vp; vp = nvp) {
389
390			nvp = TAILQ_NEXT(vp, v_freelist);
391
392			if (!simple_lock_try(&vp->v_interlock))
393				continue;
394			if (vp->v_usecount)
395				panic("free vnode isn't");
396
397			object = vp->v_object;
398			if (object && (object->resident_page_count || object->ref_count)) {
399				/* Don't recycle if it's caching some pages */
400				TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
401				TAILQ_INSERT_TAIL(&vnode_tmp_list, vp, v_freelist);
402				continue;
403			} else if (LIST_FIRST(&vp->v_cache_src)) {
404				/* Don't recycle if active in the namecache */
405				simple_unlock(&vp->v_interlock);
406				continue;
407			} else {
408				break;
409			}
410		}
411	}
412
413	for (tvp = TAILQ_FIRST(&vnode_tmp_list); tvp; tvp = nvp) {
414		nvp = TAILQ_NEXT(tvp, v_freelist);
415		TAILQ_REMOVE(&vnode_tmp_list, tvp, v_freelist);
416		TAILQ_INSERT_TAIL(&vnode_free_list, tvp, v_freelist);
417		simple_unlock(&tvp->v_interlock);
418	}
419
420	if (vp) {
421		vp->v_flag |= VDOOMED;
422		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
423		freevnodes--;
424		simple_unlock(&vnode_free_list_slock);
425		cache_purge(vp);
426		vp->v_lease = NULL;
427		if (vp->v_type != VBAD) {
428			vgonel(vp, p);
429		} else {
430			simple_unlock(&vp->v_interlock);
431		}
432
433#ifdef DIAGNOSTIC
434		{
435			int s;
436
437			if (vp->v_data)
438				panic("cleaned vnode isn't");
439			s = splbio();
440			if (vp->v_numoutput)
441				panic("Clean vnode has pending I/O's");
442			splx(s);
443		}
444#endif
445		vp->v_flag = 0;
446		vp->v_lastr = 0;
447		vp->v_lastw = 0;
448		vp->v_lasta = 0;
449		vp->v_cstart = 0;
450		vp->v_clen = 0;
451		vp->v_socket = 0;
452		vp->v_writecount = 0;	/* XXX */
453	} else {
454		simple_unlock(&vnode_free_list_slock);
455		vp = (struct vnode *) malloc((u_long) sizeof *vp,
456		    M_VNODE, M_WAITOK);
457		bzero((char *) vp, sizeof *vp);
458		simple_lock_init(&vp->v_interlock);
459		vp->v_dd = vp;
460		cache_purge(vp);
461		LIST_INIT(&vp->v_cache_src);
462		TAILQ_INIT(&vp->v_cache_dst);
463		numvnodes++;
464	}
465
466	vp->v_type = VNON;
467	vp->v_tag = tag;
468	vp->v_op = vops;
469	insmntque(vp, mp);
470	*vpp = vp;
471	vp->v_usecount = 1;
472	vp->v_data = 0;
473	splx(s);
474	return (0);
475}
476
477/*
478 * Move a vnode from one mount queue to another.
479 */
480static void
481insmntque(vp, mp)
482	register struct vnode *vp;
483	register struct mount *mp;
484{
485
486	simple_lock(&mntvnode_slock);
487	/*
488	 * Delete from old mount point vnode list, if on one.
489	 */
490	if (vp->v_mount != NULL)
491		LIST_REMOVE(vp, v_mntvnodes);
492	/*
493	 * Insert into list of vnodes for the new mount point, if available.
494	 */
495	if ((vp->v_mount = mp) == NULL) {
496		simple_unlock(&mntvnode_slock);
497		return;
498	}
499	LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
500	simple_unlock(&mntvnode_slock);
501}
502
503/*
504 * Update outstanding I/O count and do wakeup if requested.
505 */
506void
507vwakeup(bp)
508	register struct buf *bp;
509{
510	register struct vnode *vp;
511
512	bp->b_flags &= ~B_WRITEINPROG;
513	if ((vp = bp->b_vp)) {
514		vp->v_numoutput--;
515		if (vp->v_numoutput < 0)
516			panic("vwakeup: neg numoutput");
517		if ((vp->v_numoutput == 0) && (vp->v_flag & VBWAIT)) {
518			vp->v_flag &= ~VBWAIT;
519			wakeup((caddr_t) &vp->v_numoutput);
520		}
521	}
522}
523
524/*
525 * Flush out and invalidate all buffers associated with a vnode.
526 * Called with the underlying object locked.
527 */
528int
529vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
530	register struct vnode *vp;
531	int flags;
532	struct ucred *cred;
533	struct proc *p;
534	int slpflag, slptimeo;
535{
536	register struct buf *bp;
537	struct buf *nbp, *blist;
538	int s, error;
539	vm_object_t object;
540
541	if (flags & V_SAVE) {
542		if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)))
543			return (error);
544		if (vp->v_dirtyblkhd.lh_first != NULL)
545			panic("vinvalbuf: dirty bufs");
546	}
547
548	s = splbio();
549	for (;;) {
550		if ((blist = vp->v_cleanblkhd.lh_first) && (flags & V_SAVEMETA))
551			while (blist && blist->b_lblkno < 0)
552				blist = blist->b_vnbufs.le_next;
553		if (!blist && (blist = vp->v_dirtyblkhd.lh_first) &&
554		    (flags & V_SAVEMETA))
555			while (blist && blist->b_lblkno < 0)
556				blist = blist->b_vnbufs.le_next;
557		if (!blist)
558			break;
559
560		for (bp = blist; bp; bp = nbp) {
561			nbp = bp->b_vnbufs.le_next;
562			if ((flags & V_SAVEMETA) && bp->b_lblkno < 0)
563				continue;
564			if (bp->b_flags & B_BUSY) {
565				bp->b_flags |= B_WANTED;
566				error = tsleep((caddr_t) bp,
567				    slpflag | (PRIBIO + 1), "vinvalbuf",
568				    slptimeo);
569				if (error) {
570					splx(s);
571					return (error);
572				}
573				break;
574			}
575			bremfree(bp);
576			bp->b_flags |= B_BUSY;
577			/*
578			 * XXX Since there are no node locks for NFS, I
579			 * believe there is a slight chance that a delayed
580			 * write will occur while sleeping just above, so
581			 * check for it.
582			 */
583			if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
584				if (bp->b_vp == vp) {
585					if (bp->b_flags & B_CLUSTEROK) {
586						vfs_bio_awrite(bp);
587					} else {
588						bp->b_flags |= B_ASYNC;
589						VOP_BWRITE(bp);
590					}
591				} else {
592					(void) VOP_BWRITE(bp);
593				}
594				break;
595			}
596			bp->b_flags |= (B_INVAL|B_NOCACHE|B_RELBUF);
597			brelse(bp);
598		}
599	}
600
601	while (vp->v_numoutput > 0) {
602		vp->v_flag |= VBWAIT;
603		tsleep(&vp->v_numoutput, PVM, "vnvlbv", 0);
604	}
605
606	splx(s);
607
608	/*
609	 * Destroy the copy in the VM cache, too.
610	 */
611	simple_lock(&vp->v_interlock);
612	object = vp->v_object;
613	if (object != NULL) {
614		if (flags & V_SAVEMETA)
615			vm_object_page_remove(object, 0, object->size,
616				(flags & V_SAVE) ? TRUE : FALSE);
617		else
618			vm_object_page_remove(object, 0, 0,
619				(flags & V_SAVE) ? TRUE : FALSE);
620	}
621	simple_unlock(&vp->v_interlock);
622
623	if (!(flags & V_SAVEMETA) &&
624	    (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first))
625		panic("vinvalbuf: flush failed");
626	return (0);
627}
628
629/*
630 * Associate a buffer with a vnode.
631 */
632void
633bgetvp(vp, bp)
634	register struct vnode *vp;
635	register struct buf *bp;
636{
637	int s;
638
639#if defined(DIAGNOSTIC)
640	if (bp->b_vp)
641		panic("bgetvp: not free");
642#endif
643	vhold(vp);
644	bp->b_vp = vp;
645	if (vp->v_type == VBLK || vp->v_type == VCHR)
646		bp->b_dev = vp->v_rdev;
647	else
648		bp->b_dev = NODEV;
649	/*
650	 * Insert onto list for new vnode.
651	 */
652	s = splbio();
653	bufinsvn(bp, &vp->v_cleanblkhd);
654	splx(s);
655}
656
657/*
658 * Disassociate a buffer from a vnode.
659 */
660void
661brelvp(bp)
662	register struct buf *bp;
663{
664	struct vnode *vp;
665	int s;
666
667#if defined(DIAGNOSTIC)
668	if (bp->b_vp == (struct vnode *) 0)
669		panic("brelvp: NULL");
670#endif
671
672	/*
673	 * Delete from old vnode list, if on one.
674	 */
675	s = splbio();
676	if (bp->b_vnbufs.le_next != NOLIST)
677		bufremvn(bp);
678	splx(s);
679
680	vp = bp->b_vp;
681	bp->b_vp = (struct vnode *) 0;
682	vdrop(vp);
683}
684
685/*
686 * Associate a p-buffer with a vnode.
687 */
688void
689pbgetvp(vp, bp)
690	register struct vnode *vp;
691	register struct buf *bp;
692{
693#if defined(DIAGNOSTIC)
694	if (bp->b_vp)
695		panic("pbgetvp: not free");
696#endif
697	bp->b_vp = vp;
698	if (vp->v_type == VBLK || vp->v_type == VCHR)
699		bp->b_dev = vp->v_rdev;
700	else
701		bp->b_dev = NODEV;
702}
703
704/*
705 * Disassociate a p-buffer from a vnode.
706 */
707void
708pbrelvp(bp)
709	register struct buf *bp;
710{
711
712#if defined(DIAGNOSTIC)
713	if (bp->b_vp == (struct vnode *) 0)
714		panic("pbrelvp: NULL");
715#endif
716
717	bp->b_vp = (struct vnode *) 0;
718}
719
720/*
721 * Reassign a buffer from one vnode to another.
722 * Used to assign file specific control information
723 * (indirect blocks) to the vnode to which they belong.
724 */
725void
726reassignbuf(bp, newvp)
727	register struct buf *bp;
728	register struct vnode *newvp;
729{
730	int s;
731
732	if (newvp == NULL) {
733		printf("reassignbuf: NULL");
734		return;
735	}
736
737	s = splbio();
738	/*
739	 * Delete from old vnode list, if on one.
740	 */
741	if (bp->b_vnbufs.le_next != NOLIST) {
742		bufremvn(bp);
743		vdrop(bp->b_vp);
744	}
745	/*
746	 * If dirty, put on list of dirty buffers; otherwise insert onto list
747	 * of clean buffers.
748	 */
749	if (bp->b_flags & B_DELWRI) {
750		struct buf *tbp;
751
752		tbp = newvp->v_dirtyblkhd.lh_first;
753		if (!tbp || (tbp->b_lblkno > bp->b_lblkno)) {
754			bufinsvn(bp, &newvp->v_dirtyblkhd);
755		} else {
756			while (tbp->b_vnbufs.le_next &&
757				(tbp->b_vnbufs.le_next->b_lblkno < bp->b_lblkno)) {
758				tbp = tbp->b_vnbufs.le_next;
759			}
760			LIST_INSERT_AFTER(tbp, bp, b_vnbufs);
761		}
762	} else {
763		bufinsvn(bp, &newvp->v_cleanblkhd);
764	}
765	bp->b_vp = newvp;
766	vhold(bp->b_vp);
767	splx(s);
768}
769
770#ifndef DEVFS_ROOT
771/*
772 * Create a vnode for a block device.
773 * Used for mounting the root file system.
774 */
775int
776bdevvp(dev, vpp)
777	dev_t dev;
778	struct vnode **vpp;
779{
780	register struct vnode *vp;
781	struct vnode *nvp;
782	int error;
783
784	if (dev == NODEV)
785		return (0);
786	error = getnewvnode(VT_NON, (struct mount *) 0, spec_vnodeop_p, &nvp);
787	if (error) {
788		*vpp = 0;
789		return (error);
790	}
791	vp = nvp;
792	vp->v_type = VBLK;
793	if ((nvp = checkalias(vp, dev, (struct mount *) 0))) {
794		vput(vp);
795		vp = nvp;
796	}
797	*vpp = vp;
798	return (0);
799}
800#endif /* !DEVFS_ROOT */
801
802/*
803 * Check to see if the new vnode represents a special device
804 * for which we already have a vnode (either because of
805 * bdevvp() or because of a different vnode representing
806 * the same block device). If such an alias exists, deallocate
807 * the existing contents and return the aliased vnode. The
808 * caller is responsible for filling it with its new contents.
809 */
810struct vnode *
811checkalias(nvp, nvp_rdev, mp)
812	register struct vnode *nvp;
813	dev_t nvp_rdev;
814	struct mount *mp;
815{
816	struct proc *p = curproc;	/* XXX */
817	struct vnode *vp;
818	struct vnode **vpp;
819
820	if (nvp->v_type != VBLK && nvp->v_type != VCHR)
821		return (NULLVP);
822
823	vpp = &speclisth[SPECHASH(nvp_rdev)];
824loop:
825	simple_lock(&spechash_slock);
826	for (vp = *vpp; vp; vp = vp->v_specnext) {
827		if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
828			continue;
829		/*
830		 * Alias, but not in use, so flush it out.
831		 */
832		simple_lock(&vp->v_interlock);
833		if (vp->v_usecount == 0) {
834			simple_unlock(&spechash_slock);
835			vgonel(vp, p);
836			goto loop;
837		}
838		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
839			simple_unlock(&spechash_slock);
840			goto loop;
841		}
842		break;
843	}
844	if (vp == NULL || vp->v_tag != VT_NON) {
845		MALLOC(nvp->v_specinfo, struct specinfo *,
846		    sizeof(struct specinfo), M_VNODE, M_WAITOK);
847		nvp->v_rdev = nvp_rdev;
848		nvp->v_hashchain = vpp;
849		nvp->v_specnext = *vpp;
850		nvp->v_specflags = 0;
851		simple_unlock(&spechash_slock);
852		*vpp = nvp;
853		if (vp != NULLVP) {
854			nvp->v_flag |= VALIASED;
855			vp->v_flag |= VALIASED;
856			vput(vp);
857		}
858		return (NULLVP);
859	}
860	simple_unlock(&spechash_slock);
861	VOP_UNLOCK(vp, 0, p);
862	simple_lock(&vp->v_interlock);
863	vclean(vp, 0, p);
864	vp->v_op = nvp->v_op;
865	vp->v_tag = nvp->v_tag;
866	nvp->v_type = VNON;
867	insmntque(vp, mp);
868	return (vp);
869}
870
871/*
872 * Grab a particular vnode from the free list, increment its
873 * reference count and lock it. The vnode lock bit is set the
874 * vnode is being eliminated in vgone. The process is awakened
875 * when the transition is completed, and an error returned to
876 * indicate that the vnode is no longer usable (possibly having
877 * been changed to a new file system type).
878 */
879int
880vget(vp, flags, p)
881	register struct vnode *vp;
882	int flags;
883	struct proc *p;
884{
885	int error;
886
887	/*
888	 * If the vnode is in the process of being cleaned out for
889	 * another use, we wait for the cleaning to finish and then
890	 * return failure. Cleaning is determined by checking that
891	 * the VXLOCK flag is set.
892	 */
893	if ((flags & LK_INTERLOCK) == 0) {
894		simple_lock(&vp->v_interlock);
895	}
896	if (vp->v_flag & VXLOCK) {
897		vp->v_flag |= VXWANT;
898		simple_unlock(&vp->v_interlock);
899		tsleep((caddr_t)vp, PINOD, "vget", 0);
900		return (ENOENT);
901	}
902
903	vp->v_usecount++;
904
905	if (VSHOULDBUSY(vp))
906		vbusy(vp);
907	/*
908	 * Create the VM object, if needed
909	 */
910	if ((flags & LK_NOOBJ) == 0 &&
911		   (vp->v_type == VREG) &&
912		   ((vp->v_object == NULL) ||
913			(vp->v_object->flags & OBJ_DEAD))) {
914		vfs_object_create(vp, curproc, curproc->p_ucred, 0);
915	}
916	if (flags & LK_TYPE_MASK) {
917		if (error = vn_lock(vp, flags | LK_INTERLOCK, p))
918			vrele(vp);
919		return (error);
920	}
921	simple_unlock(&vp->v_interlock);
922	return (0);
923}
924
925void
926vref(struct vnode *vp)
927{
928	simple_lock(&vp->v_interlock);
929	vp->v_usecount++;
930	simple_unlock(&vp->v_interlock);
931}
932
933/*
934 * Vnode put/release.
935 * If count drops to zero, call inactive routine and return to freelist.
936 */
937void
938vrele(vp)
939	struct vnode *vp;
940{
941	struct proc *p = curproc;	/* XXX */
942
943#ifdef DIAGNOSTIC
944	if (vp == NULL)
945		panic("vrele: null vp");
946#endif
947	simple_lock(&vp->v_interlock);
948
949	if (vp->v_usecount > 1) {
950
951		vp->v_usecount--;
952		simple_unlock(&vp->v_interlock);
953
954		return;
955	}
956
957	if (vp->v_usecount == 1) {
958
959		vp->v_usecount--;
960
961		if (VSHOULDFREE(vp))
962			vfree(vp);
963	/*
964	 * If we are doing a vput, the node is already locked, and we must
965	 * call VOP_INACTIVE with the node locked.  So, in the case of
966	 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE.
967	 */
968		if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) {
969			VOP_INACTIVE(vp, p);
970		}
971
972	} else {
973#ifdef DIAGNOSTIC
974		vprint("vrele: negative ref count", vp);
975		simple_unlock(&vp->v_interlock);
976#endif
977		panic("vrele: negative ref cnt");
978	}
979}
980
981void
982vput(vp)
983	struct vnode *vp;
984{
985	struct proc *p = curproc;	/* XXX */
986
987#ifdef DIAGNOSTIC
988	if (vp == NULL)
989		panic("vput: null vp");
990#endif
991
992	simple_lock(&vp->v_interlock);
993
994	if (vp->v_usecount > 1) {
995
996		vp->v_usecount--;
997		VOP_UNLOCK(vp, LK_INTERLOCK, p);
998		return;
999
1000	}
1001
1002	if (vp->v_usecount == 1) {
1003
1004		vp->v_usecount--;
1005		if (VSHOULDFREE(vp))
1006			vfree(vp);
1007	/*
1008	 * If we are doing a vput, the node is already locked, and we must
1009	 * call VOP_INACTIVE with the node locked.  So, in the case of
1010	 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE.
1011	 */
1012		simple_unlock(&vp->v_interlock);
1013		VOP_INACTIVE(vp, p);
1014
1015	} else {
1016#ifdef DIAGNOSTIC
1017		vprint("vput: negative ref count", vp);
1018#endif
1019		panic("vput: negative ref cnt");
1020	}
1021}
1022
1023/*
1024 * Somebody doesn't want the vnode recycled.
1025 */
1026void
1027vhold(vp)
1028	register struct vnode *vp;
1029{
1030
1031	simple_lock(&vp->v_interlock);
1032	vp->v_holdcnt++;
1033	if (VSHOULDBUSY(vp))
1034		vbusy(vp);
1035	simple_unlock(&vp->v_interlock);
1036}
1037
1038/*
1039 * One less who cares about this vnode.
1040 */
1041void
1042vdrop(vp)
1043	register struct vnode *vp;
1044{
1045
1046	simple_lock(&vp->v_interlock);
1047	if (vp->v_holdcnt <= 0)
1048		panic("holdrele: holdcnt");
1049	vp->v_holdcnt--;
1050	if (VSHOULDFREE(vp))
1051		vfree(vp);
1052	simple_unlock(&vp->v_interlock);
1053}
1054
1055/*
1056 * Remove any vnodes in the vnode table belonging to mount point mp.
1057 *
1058 * If MNT_NOFORCE is specified, there should not be any active ones,
1059 * return error if any are found (nb: this is a user error, not a
1060 * system error). If MNT_FORCE is specified, detach any active vnodes
1061 * that are found.
1062 */
1063#ifdef DIAGNOSTIC
1064static int busyprt = 0;		/* print out busy vnodes */
1065SYSCTL_INT(_debug, OID_AUTO, busyprt, CTLFLAG_RW, &busyprt, 0, "");
1066#endif
1067
1068int
1069vflush(mp, skipvp, flags)
1070	struct mount *mp;
1071	struct vnode *skipvp;
1072	int flags;
1073{
1074	struct proc *p = curproc;	/* XXX */
1075	struct vnode *vp, *nvp;
1076	int busy = 0;
1077
1078	simple_lock(&mntvnode_slock);
1079loop:
1080	for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
1081		/*
1082		 * Make sure this vnode wasn't reclaimed in getnewvnode().
1083		 * Start over if it has (it won't be on the list anymore).
1084		 */
1085		if (vp->v_mount != mp)
1086			goto loop;
1087		nvp = vp->v_mntvnodes.le_next;
1088		/*
1089		 * Skip over a selected vnode.
1090		 */
1091		if (vp == skipvp)
1092			continue;
1093
1094		simple_lock(&vp->v_interlock);
1095		/*
1096		 * Skip over a vnodes marked VSYSTEM.
1097		 */
1098		if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
1099			simple_unlock(&vp->v_interlock);
1100			continue;
1101		}
1102		/*
1103		 * If WRITECLOSE is set, only flush out regular file vnodes
1104		 * open for writing.
1105		 */
1106		if ((flags & WRITECLOSE) &&
1107		    (vp->v_writecount == 0 || vp->v_type != VREG)) {
1108			simple_unlock(&vp->v_interlock);
1109			continue;
1110		}
1111
1112		/*
1113		 * With v_usecount == 0, all we need to do is clear out the
1114		 * vnode data structures and we are done.
1115		 */
1116		if (vp->v_usecount == 0) {
1117			simple_unlock(&mntvnode_slock);
1118			vgonel(vp, p);
1119			simple_lock(&mntvnode_slock);
1120			continue;
1121		}
1122
1123		/*
1124		 * If FORCECLOSE is set, forcibly close the vnode. For block
1125		 * or character devices, revert to an anonymous device. For
1126		 * all other files, just kill them.
1127		 */
1128		if (flags & FORCECLOSE) {
1129			simple_unlock(&mntvnode_slock);
1130			if (vp->v_type != VBLK && vp->v_type != VCHR) {
1131				vgonel(vp, p);
1132			} else {
1133				vclean(vp, 0, p);
1134				vp->v_op = spec_vnodeop_p;
1135				insmntque(vp, (struct mount *) 0);
1136			}
1137			simple_lock(&mntvnode_slock);
1138			continue;
1139		}
1140#ifdef DIAGNOSTIC
1141		if (busyprt)
1142			vprint("vflush: busy vnode", vp);
1143#endif
1144		simple_unlock(&vp->v_interlock);
1145		busy++;
1146	}
1147	simple_unlock(&mntvnode_slock);
1148	if (busy)
1149		return (EBUSY);
1150	return (0);
1151}
1152
1153/*
1154 * Disassociate the underlying file system from a vnode.
1155 */
1156static void
1157vclean(vp, flags, p)
1158	struct vnode *vp;
1159	int flags;
1160	struct proc *p;
1161{
1162	int active;
1163
1164	/*
1165	 * Check to see if the vnode is in use. If so we have to reference it
1166	 * before we clean it out so that its count cannot fall to zero and
1167	 * generate a race against ourselves to recycle it.
1168	 */
1169	if ((active = vp->v_usecount))
1170		vp->v_usecount++;
1171
1172	if (vp->v_object) {
1173		vp->v_object->flags |= OBJ_DEAD;
1174	}
1175	/*
1176	 * Prevent the vnode from being recycled or brought into use while we
1177	 * clean it out.
1178	 */
1179	if (vp->v_flag & VXLOCK)
1180		panic("vclean: deadlock");
1181	vp->v_flag |= VXLOCK;
1182	/*
1183	 * Even if the count is zero, the VOP_INACTIVE routine may still
1184	 * have the object locked while it cleans it out. The VOP_LOCK
1185	 * ensures that the VOP_INACTIVE routine is done with its work.
1186	 * For active vnodes, it ensures that no other activity can
1187	 * occur while the underlying object is being cleaned out.
1188	 */
1189	VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p);
1190
1191	/*
1192	 * Clean out any buffers associated with the vnode.
1193	 */
1194	if (vp->v_object)
1195		vm_object_terminate(vp->v_object);
1196	else
1197		vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
1198
1199	/*
1200	 * If purging an active vnode, it must be closed and
1201	 * deactivated before being reclaimed. Note that the
1202	 * VOP_INACTIVE will unlock the vnode.
1203	 */
1204	if (active) {
1205		if (flags & DOCLOSE)
1206			VOP_CLOSE(vp, IO_NDELAY, NOCRED, p);
1207		VOP_INACTIVE(vp, p);
1208	} else {
1209		/*
1210		 * Any other processes trying to obtain this lock must first
1211		 * wait for VXLOCK to clear, then call the new lock operation.
1212		 */
1213		VOP_UNLOCK(vp, 0, p);
1214	}
1215	/*
1216	 * Reclaim the vnode.
1217	 */
1218	if (VOP_RECLAIM(vp, p))
1219		panic("vclean: cannot reclaim");
1220	if (active)
1221		vrele(vp);
1222	cache_purge(vp);
1223	if (vp->v_vnlock) {
1224#if 0 /* This is the only place we have LK_DRAINED in the entire kernel ??? */
1225#ifdef DIAGNOSTIC
1226		if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0)
1227			vprint("vclean: lock not drained", vp);
1228#endif
1229#endif
1230		FREE(vp->v_vnlock, M_VNODE);
1231		vp->v_vnlock = NULL;
1232	}
1233
1234	/*
1235	 * Done with purge, notify sleepers of the grim news.
1236	 */
1237	vp->v_op = dead_vnodeop_p;
1238	vn_pollgone(vp);
1239	vp->v_tag = VT_NON;
1240	vp->v_flag &= ~VXLOCK;
1241	if (vp->v_flag & VXWANT) {
1242		vp->v_flag &= ~VXWANT;
1243		wakeup((caddr_t) vp);
1244	}
1245}
1246
1247/*
1248 * Eliminate all activity associated with the requested vnode
1249 * and with all vnodes aliased to the requested vnode.
1250 */
1251int
1252vop_revoke(ap)
1253	struct vop_revoke_args /* {
1254		struct vnode *a_vp;
1255		int a_flags;
1256	} */ *ap;
1257{
1258	struct vnode *vp, *vq;
1259	struct proc *p = curproc;	/* XXX */
1260
1261#ifdef DIAGNOSTIC
1262	if ((ap->a_flags & REVOKEALL) == 0)
1263		panic("vop_revoke");
1264#endif
1265
1266	vp = ap->a_vp;
1267	simple_lock(&vp->v_interlock);
1268
1269	if (vp->v_flag & VALIASED) {
1270		/*
1271		 * If a vgone (or vclean) is already in progress,
1272		 * wait until it is done and return.
1273		 */
1274		if (vp->v_flag & VXLOCK) {
1275			vp->v_flag |= VXWANT;
1276			simple_unlock(&vp->v_interlock);
1277			tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0);
1278			return (0);
1279		}
1280		/*
1281		 * Ensure that vp will not be vgone'd while we
1282		 * are eliminating its aliases.
1283		 */
1284		vp->v_flag |= VXLOCK;
1285		simple_unlock(&vp->v_interlock);
1286		while (vp->v_flag & VALIASED) {
1287			simple_lock(&spechash_slock);
1288			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1289				if (vq->v_rdev != vp->v_rdev ||
1290				    vq->v_type != vp->v_type || vp == vq)
1291					continue;
1292				simple_unlock(&spechash_slock);
1293				vgone(vq);
1294				break;
1295			}
1296			if (vq == NULLVP) {
1297				simple_unlock(&spechash_slock);
1298			}
1299		}
1300		/*
1301		 * Remove the lock so that vgone below will
1302		 * really eliminate the vnode after which time
1303		 * vgone will awaken any sleepers.
1304		 */
1305		simple_lock(&vp->v_interlock);
1306		vp->v_flag &= ~VXLOCK;
1307		if (vp->v_flag & VXWANT) {
1308			vp->v_flag &= ~VXWANT;
1309			wakeup(vp);
1310		}
1311	}
1312	vgonel(vp, p);
1313	return (0);
1314}
1315
1316/*
1317 * Recycle an unused vnode to the front of the free list.
1318 * Release the passed interlock if the vnode will be recycled.
1319 */
1320int
1321vrecycle(vp, inter_lkp, p)
1322	struct vnode *vp;
1323	struct simplelock *inter_lkp;
1324	struct proc *p;
1325{
1326
1327	simple_lock(&vp->v_interlock);
1328	if (vp->v_usecount == 0) {
1329		if (inter_lkp) {
1330			simple_unlock(inter_lkp);
1331		}
1332		vgonel(vp, p);
1333		return (1);
1334	}
1335	simple_unlock(&vp->v_interlock);
1336	return (0);
1337}
1338
1339/*
1340 * Eliminate all activity associated with a vnode
1341 * in preparation for reuse.
1342 */
1343void
1344vgone(vp)
1345	register struct vnode *vp;
1346{
1347	struct proc *p = curproc;	/* XXX */
1348
1349	simple_lock(&vp->v_interlock);
1350	vgonel(vp, p);
1351}
1352
1353/*
1354 * vgone, with the vp interlock held.
1355 */
1356static void
1357vgonel(vp, p)
1358	struct vnode *vp;
1359	struct proc *p;
1360{
1361	int s;
1362	struct vnode *vq;
1363	struct vnode *vx;
1364
1365	/*
1366	 * If a vgone (or vclean) is already in progress,
1367	 * wait until it is done and return.
1368	 */
1369	if (vp->v_flag & VXLOCK) {
1370		vp->v_flag |= VXWANT;
1371		simple_unlock(&vp->v_interlock);
1372		tsleep((caddr_t)vp, PINOD, "vgone", 0);
1373		return;
1374	}
1375
1376	/*
1377	 * Clean out the filesystem specific data.
1378	 */
1379	vclean(vp, DOCLOSE, p);
1380
1381	/*
1382	 * Delete from old mount point vnode list, if on one.
1383	 */
1384	if (vp->v_mount != NULL)
1385		insmntque(vp, (struct mount *)0);
1386	/*
1387	 * If special device, remove it from special device alias list
1388	 * if it is on one.
1389	 */
1390	if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) {
1391		simple_lock(&spechash_slock);
1392		if (*vp->v_hashchain == vp) {
1393			*vp->v_hashchain = vp->v_specnext;
1394		} else {
1395			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1396				if (vq->v_specnext != vp)
1397					continue;
1398				vq->v_specnext = vp->v_specnext;
1399				break;
1400			}
1401			if (vq == NULL)
1402				panic("missing bdev");
1403		}
1404		if (vp->v_flag & VALIASED) {
1405			vx = NULL;
1406			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1407				if (vq->v_rdev != vp->v_rdev ||
1408				    vq->v_type != vp->v_type)
1409					continue;
1410				if (vx)
1411					break;
1412				vx = vq;
1413			}
1414			if (vx == NULL)
1415				panic("missing alias");
1416			if (vq == NULL)
1417				vx->v_flag &= ~VALIASED;
1418			vp->v_flag &= ~VALIASED;
1419		}
1420		simple_unlock(&spechash_slock);
1421		FREE(vp->v_specinfo, M_VNODE);
1422		vp->v_specinfo = NULL;
1423	}
1424
1425	/*
1426	 * If it is on the freelist and not already at the head,
1427	 * move it to the head of the list. The test of the back
1428	 * pointer and the reference count of zero is because
1429	 * it will be removed from the free list by getnewvnode,
1430	 * but will not have its reference count incremented until
1431	 * after calling vgone. If the reference count were
1432	 * incremented first, vgone would (incorrectly) try to
1433	 * close the previous instance of the underlying object.
1434	 */
1435	if (vp->v_usecount == 0 && !(vp->v_flag & VDOOMED)) {
1436		s = splbio();
1437		simple_lock(&vnode_free_list_slock);
1438		if (vp->v_flag & VFREE) {
1439			TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1440		} else if (vp->v_flag & VTBFREE) {
1441			TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist);
1442			vp->v_flag &= ~VTBFREE;
1443		}
1444		vp->v_flag |= VFREE;
1445		TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1446		simple_unlock(&vnode_free_list_slock);
1447		splx(s);
1448	}
1449
1450	vp->v_type = VBAD;
1451	simple_unlock(&vp->v_interlock);
1452}
1453
1454/*
1455 * Lookup a vnode by device number.
1456 */
1457int
1458vfinddev(dev, type, vpp)
1459	dev_t dev;
1460	enum vtype type;
1461	struct vnode **vpp;
1462{
1463	register struct vnode *vp;
1464	int rc = 0;
1465
1466	simple_lock(&spechash_slock);
1467	for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1468		if (dev != vp->v_rdev || type != vp->v_type)
1469			continue;
1470		*vpp = vp;
1471		rc = 1;
1472		break;
1473	}
1474	simple_unlock(&spechash_slock);
1475	return (rc);
1476}
1477
1478/*
1479 * Calculate the total number of references to a special device.
1480 */
1481int
1482vcount(vp)
1483	register struct vnode *vp;
1484{
1485	struct vnode *vq, *vnext;
1486	int count;
1487
1488loop:
1489	if ((vp->v_flag & VALIASED) == 0)
1490		return (vp->v_usecount);
1491	simple_lock(&spechash_slock);
1492	for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
1493		vnext = vq->v_specnext;
1494		if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
1495			continue;
1496		/*
1497		 * Alias, but not in use, so flush it out.
1498		 */
1499		if (vq->v_usecount == 0 && vq != vp) {
1500			simple_unlock(&spechash_slock);
1501			vgone(vq);
1502			goto loop;
1503		}
1504		count += vq->v_usecount;
1505	}
1506	simple_unlock(&spechash_slock);
1507	return (count);
1508}
1509/*
1510 * Print out a description of a vnode.
1511 */
1512static char *typename[] =
1513{"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"};
1514
1515void
1516vprint(label, vp)
1517	char *label;
1518	register struct vnode *vp;
1519{
1520	char buf[64];
1521
1522	if (label != NULL)
1523		printf("%s: %x: ", label, vp);
1524	else
1525		printf("%x: ", vp);
1526	printf("type %s, usecount %d, writecount %d, refcount %ld,",
1527	    typename[vp->v_type], vp->v_usecount, vp->v_writecount,
1528	    vp->v_holdcnt);
1529	buf[0] = '\0';
1530	if (vp->v_flag & VROOT)
1531		strcat(buf, "|VROOT");
1532	if (vp->v_flag & VTEXT)
1533		strcat(buf, "|VTEXT");
1534	if (vp->v_flag & VSYSTEM)
1535		strcat(buf, "|VSYSTEM");
1536	if (vp->v_flag & VXLOCK)
1537		strcat(buf, "|VXLOCK");
1538	if (vp->v_flag & VXWANT)
1539		strcat(buf, "|VXWANT");
1540	if (vp->v_flag & VBWAIT)
1541		strcat(buf, "|VBWAIT");
1542	if (vp->v_flag & VALIASED)
1543		strcat(buf, "|VALIASED");
1544	if (vp->v_flag & VDOOMED)
1545		strcat(buf, "|VDOOMED");
1546	if (vp->v_flag & VFREE)
1547		strcat(buf, "|VFREE");
1548	if (vp->v_flag & VOBJBUF)
1549		strcat(buf, "|VOBJBUF");
1550	if (buf[0] != '\0')
1551		printf(" flags (%s)", &buf[1]);
1552	if (vp->v_data == NULL) {
1553		printf("\n");
1554	} else {
1555		printf("\n\t");
1556		VOP_PRINT(vp);
1557	}
1558}
1559
1560#ifdef DDB
1561/*
1562 * List all of the locked vnodes in the system.
1563 * Called when debugging the kernel.
1564 */
1565static void
1566printlockedvnodes()
1567{
1568	struct proc *p = curproc;	/* XXX */
1569	struct mount *mp, *nmp;
1570	struct vnode *vp;
1571
1572	printf("Locked vnodes\n");
1573	simple_lock(&mountlist_slock);
1574	for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
1575		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
1576			nmp = mp->mnt_list.cqe_next;
1577			continue;
1578		}
1579		for (vp = mp->mnt_vnodelist.lh_first;
1580		     vp != NULL;
1581		     vp = vp->v_mntvnodes.le_next) {
1582			if (VOP_ISLOCKED(vp))
1583				vprint((char *)0, vp);
1584		}
1585		simple_lock(&mountlist_slock);
1586		nmp = mp->mnt_list.cqe_next;
1587		vfs_unbusy(mp, p);
1588	}
1589	simple_unlock(&mountlist_slock);
1590}
1591#endif
1592
1593/*
1594 * Top level filesystem related information gathering.
1595 */
1596static int	sysctl_ovfs_conf __P(SYSCTL_HANDLER_ARGS);
1597
1598static int
1599vfs_sysctl SYSCTL_HANDLER_ARGS
1600{
1601	int *name = (int *)arg1 - 1;	/* XXX */
1602	u_int namelen = arg2 + 1;	/* XXX */
1603	struct vfsconf *vfsp;
1604
1605#ifndef NO_COMPAT_PRELITE2
1606	/* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. */
1607	if (namelen == 1)
1608		return (sysctl_ovfs_conf(oidp, arg1, arg2, req));
1609#endif
1610
1611#ifdef notyet
1612	/* all sysctl names at this level are at least name and field */
1613	if (namelen < 2)
1614		return (ENOTDIR);		/* overloaded */
1615	if (name[0] != VFS_GENERIC) {
1616		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1617			if (vfsp->vfc_typenum == name[0])
1618				break;
1619		if (vfsp == NULL)
1620			return (EOPNOTSUPP);
1621		return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1,
1622		    oldp, oldlenp, newp, newlen, p));
1623	}
1624#endif
1625	switch (name[1]) {
1626	case VFS_MAXTYPENUM:
1627		if (namelen != 2)
1628			return (ENOTDIR);
1629		return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int)));
1630	case VFS_CONF:
1631		if (namelen != 3)
1632			return (ENOTDIR);	/* overloaded */
1633		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1634			if (vfsp->vfc_typenum == name[2])
1635				break;
1636		if (vfsp == NULL)
1637			return (EOPNOTSUPP);
1638		return (SYSCTL_OUT(req, vfsp, sizeof *vfsp));
1639	}
1640	return (EOPNOTSUPP);
1641}
1642
1643SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD, vfs_sysctl,
1644	"Generic filesystem");
1645
1646#ifndef NO_COMPAT_PRELITE2
1647
1648static int
1649sysctl_ovfs_conf SYSCTL_HANDLER_ARGS
1650{
1651	int error;
1652	struct vfsconf *vfsp;
1653	struct ovfsconf ovfs;
1654
1655	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
1656		ovfs.vfc_vfsops = vfsp->vfc_vfsops;	/* XXX used as flag */
1657		strcpy(ovfs.vfc_name, vfsp->vfc_name);
1658		ovfs.vfc_index = vfsp->vfc_typenum;
1659		ovfs.vfc_refcount = vfsp->vfc_refcount;
1660		ovfs.vfc_flags = vfsp->vfc_flags;
1661		error = SYSCTL_OUT(req, &ovfs, sizeof ovfs);
1662		if (error)
1663			return error;
1664	}
1665	return 0;
1666}
1667
1668#endif /* !NO_COMPAT_PRELITE2 */
1669
1670static volatile int kinfo_vdebug = 1;
1671
1672#if 0
1673#define KINFO_VNODESLOP	10
1674/*
1675 * Dump vnode list (via sysctl).
1676 * Copyout address of vnode followed by vnode.
1677 */
1678/* ARGSUSED */
1679static int
1680sysctl_vnode SYSCTL_HANDLER_ARGS
1681{
1682	struct proc *p = curproc;	/* XXX */
1683	struct mount *mp, *nmp;
1684	struct vnode *nvp, *vp;
1685	int error;
1686
1687#define VPTRSZ	sizeof (struct vnode *)
1688#define VNODESZ	sizeof (struct vnode)
1689
1690	req->lock = 0;
1691	if (!req->oldptr) /* Make an estimate */
1692		return (SYSCTL_OUT(req, 0,
1693			(numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ)));
1694
1695	simple_lock(&mountlist_slock);
1696	for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
1697		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
1698			nmp = mp->mnt_list.cqe_next;
1699			continue;
1700		}
1701again:
1702		simple_lock(&mntvnode_slock);
1703		for (vp = mp->mnt_vnodelist.lh_first;
1704		     vp != NULL;
1705		     vp = nvp) {
1706			/*
1707			 * Check that the vp is still associated with
1708			 * this filesystem.  RACE: could have been
1709			 * recycled onto the same filesystem.
1710			 */
1711			if (vp->v_mount != mp) {
1712				simple_unlock(&mntvnode_slock);
1713				if (kinfo_vdebug)
1714					printf("kinfo: vp changed\n");
1715				goto again;
1716			}
1717			nvp = vp->v_mntvnodes.le_next;
1718			simple_unlock(&mntvnode_slock);
1719			if ((error = SYSCTL_OUT(req, &vp, VPTRSZ)) ||
1720			    (error = SYSCTL_OUT(req, vp, VNODESZ)))
1721				return (error);
1722			simple_lock(&mntvnode_slock);
1723		}
1724		simple_unlock(&mntvnode_slock);
1725		simple_lock(&mountlist_slock);
1726		nmp = mp->mnt_list.cqe_next;
1727		vfs_unbusy(mp, p);
1728	}
1729	simple_unlock(&mountlist_slock);
1730
1731	return (0);
1732}
1733#endif
1734
1735/*
1736 * XXX
1737 * Exporting the vnode list on large systems causes them to crash.
1738 * Exporting the vnode list on medium systems causes sysctl to coredump.
1739 */
1740#if 0
1741SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE|CTLFLAG_RD,
1742	0, 0, sysctl_vnode, "S,vnode", "");
1743#endif
1744
1745/*
1746 * Check to see if a filesystem is mounted on a block device.
1747 */
1748int
1749vfs_mountedon(vp)
1750	struct vnode *vp;
1751{
1752	struct vnode *vq;
1753	int error = 0;
1754
1755	if (vp->v_specflags & SI_MOUNTEDON)
1756		return (EBUSY);
1757	if (vp->v_flag & VALIASED) {
1758		simple_lock(&spechash_slock);
1759		for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1760			if (vq->v_rdev != vp->v_rdev ||
1761			    vq->v_type != vp->v_type)
1762				continue;
1763			if (vq->v_specflags & SI_MOUNTEDON) {
1764				error = EBUSY;
1765				break;
1766			}
1767		}
1768		simple_unlock(&spechash_slock);
1769	}
1770	return (error);
1771}
1772
1773/*
1774 * Unmount all filesystems. The list is traversed in reverse order
1775 * of mounting to avoid dependencies.
1776 */
1777void
1778vfs_unmountall()
1779{
1780	struct mount *mp, *nmp;
1781	struct proc *p = initproc;	/* XXX XXX should this be proc0? */
1782	int error;
1783
1784	/*
1785	 * Since this only runs when rebooting, it is not interlocked.
1786	 */
1787	for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
1788		nmp = mp->mnt_list.cqe_prev;
1789		error = dounmount(mp, MNT_FORCE, p);
1790		if (error) {
1791			printf("unmount of %s failed (",
1792			    mp->mnt_stat.f_mntonname);
1793			if (error == EBUSY)
1794				printf("BUSY)\n");
1795			else
1796				printf("%d)\n", error);
1797		}
1798	}
1799}
1800
1801/*
1802 * Build hash lists of net addresses and hang them off the mount point.
1803 * Called by ufs_mount() to set up the lists of export addresses.
1804 */
1805static int
1806vfs_hang_addrlist(mp, nep, argp)
1807	struct mount *mp;
1808	struct netexport *nep;
1809	struct export_args *argp;
1810{
1811	register struct netcred *np;
1812	register struct radix_node_head *rnh;
1813	register int i;
1814	struct radix_node *rn;
1815	struct sockaddr *saddr, *smask = 0;
1816	struct domain *dom;
1817	int error;
1818
1819	if (argp->ex_addrlen == 0) {
1820		if (mp->mnt_flag & MNT_DEFEXPORTED)
1821			return (EPERM);
1822		np = &nep->ne_defexported;
1823		np->netc_exflags = argp->ex_flags;
1824		np->netc_anon = argp->ex_anon;
1825		np->netc_anon.cr_ref = 1;
1826		mp->mnt_flag |= MNT_DEFEXPORTED;
1827		return (0);
1828	}
1829	i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
1830	np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK);
1831	bzero((caddr_t) np, i);
1832	saddr = (struct sockaddr *) (np + 1);
1833	if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen)))
1834		goto out;
1835	if (saddr->sa_len > argp->ex_addrlen)
1836		saddr->sa_len = argp->ex_addrlen;
1837	if (argp->ex_masklen) {
1838		smask = (struct sockaddr *) ((caddr_t) saddr + argp->ex_addrlen);
1839		error = copyin(argp->ex_mask, (caddr_t) smask, argp->ex_masklen);
1840		if (error)
1841			goto out;
1842		if (smask->sa_len > argp->ex_masklen)
1843			smask->sa_len = argp->ex_masklen;
1844	}
1845	i = saddr->sa_family;
1846	if ((rnh = nep->ne_rtable[i]) == 0) {
1847		/*
1848		 * Seems silly to initialize every AF when most are not used,
1849		 * do so on demand here
1850		 */
1851		for (dom = domains; dom; dom = dom->dom_next)
1852			if (dom->dom_family == i && dom->dom_rtattach) {
1853				dom->dom_rtattach((void **) &nep->ne_rtable[i],
1854				    dom->dom_rtoffset);
1855				break;
1856			}
1857		if ((rnh = nep->ne_rtable[i]) == 0) {
1858			error = ENOBUFS;
1859			goto out;
1860		}
1861	}
1862	rn = (*rnh->rnh_addaddr) ((caddr_t) saddr, (caddr_t) smask, rnh,
1863	    np->netc_rnodes);
1864	if (rn == 0 || np != (struct netcred *) rn) {	/* already exists */
1865		error = EPERM;
1866		goto out;
1867	}
1868	np->netc_exflags = argp->ex_flags;
1869	np->netc_anon = argp->ex_anon;
1870	np->netc_anon.cr_ref = 1;
1871	return (0);
1872out:
1873	free(np, M_NETADDR);
1874	return (error);
1875}
1876
1877/* ARGSUSED */
1878static int
1879vfs_free_netcred(rn, w)
1880	struct radix_node *rn;
1881	void *w;
1882{
1883	register struct radix_node_head *rnh = (struct radix_node_head *) w;
1884
1885	(*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh);
1886	free((caddr_t) rn, M_NETADDR);
1887	return (0);
1888}
1889
1890/*
1891 * Free the net address hash lists that are hanging off the mount points.
1892 */
1893static void
1894vfs_free_addrlist(nep)
1895	struct netexport *nep;
1896{
1897	register int i;
1898	register struct radix_node_head *rnh;
1899
1900	for (i = 0; i <= AF_MAX; i++)
1901		if ((rnh = nep->ne_rtable[i])) {
1902			(*rnh->rnh_walktree) (rnh, vfs_free_netcred,
1903			    (caddr_t) rnh);
1904			free((caddr_t) rnh, M_RTABLE);
1905			nep->ne_rtable[i] = 0;
1906		}
1907}
1908
1909int
1910vfs_export(mp, nep, argp)
1911	struct mount *mp;
1912	struct netexport *nep;
1913	struct export_args *argp;
1914{
1915	int error;
1916
1917	if (argp->ex_flags & MNT_DELEXPORT) {
1918		if (mp->mnt_flag & MNT_EXPUBLIC) {
1919			vfs_setpublicfs(NULL, NULL, NULL);
1920			mp->mnt_flag &= ~MNT_EXPUBLIC;
1921		}
1922		vfs_free_addrlist(nep);
1923		mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
1924	}
1925	if (argp->ex_flags & MNT_EXPORTED) {
1926		if (argp->ex_flags & MNT_EXPUBLIC) {
1927			if ((error = vfs_setpublicfs(mp, nep, argp)) != 0)
1928				return (error);
1929			mp->mnt_flag |= MNT_EXPUBLIC;
1930		}
1931		if ((error = vfs_hang_addrlist(mp, nep, argp)))
1932			return (error);
1933		mp->mnt_flag |= MNT_EXPORTED;
1934	}
1935	return (0);
1936}
1937
1938
1939/*
1940 * Set the publicly exported filesystem (WebNFS). Currently, only
1941 * one public filesystem is possible in the spec (RFC 2054 and 2055)
1942 */
1943int
1944vfs_setpublicfs(mp, nep, argp)
1945	struct mount *mp;
1946	struct netexport *nep;
1947	struct export_args *argp;
1948{
1949	int error;
1950	struct vnode *rvp;
1951	char *cp;
1952
1953	/*
1954	 * mp == NULL -> invalidate the current info, the FS is
1955	 * no longer exported. May be called from either vfs_export
1956	 * or unmount, so check if it hasn't already been done.
1957	 */
1958	if (mp == NULL) {
1959		if (nfs_pub.np_valid) {
1960			nfs_pub.np_valid = 0;
1961			if (nfs_pub.np_index != NULL) {
1962				FREE(nfs_pub.np_index, M_TEMP);
1963				nfs_pub.np_index = NULL;
1964			}
1965		}
1966		return (0);
1967	}
1968
1969	/*
1970	 * Only one allowed at a time.
1971	 */
1972	if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount)
1973		return (EBUSY);
1974
1975	/*
1976	 * Get real filehandle for root of exported FS.
1977	 */
1978	bzero((caddr_t)&nfs_pub.np_handle, sizeof(nfs_pub.np_handle));
1979	nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid;
1980
1981	if ((error = VFS_ROOT(mp, &rvp)))
1982		return (error);
1983
1984	if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid)))
1985		return (error);
1986
1987	vput(rvp);
1988
1989	/*
1990	 * If an indexfile was specified, pull it in.
1991	 */
1992	if (argp->ex_indexfile != NULL) {
1993		MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP,
1994		    M_WAITOK);
1995		error = copyinstr(argp->ex_indexfile, nfs_pub.np_index,
1996		    MAXNAMLEN, (size_t *)0);
1997		if (!error) {
1998			/*
1999			 * Check for illegal filenames.
2000			 */
2001			for (cp = nfs_pub.np_index; *cp; cp++) {
2002				if (*cp == '/') {
2003					error = EINVAL;
2004					break;
2005				}
2006			}
2007		}
2008		if (error) {
2009			FREE(nfs_pub.np_index, M_TEMP);
2010			return (error);
2011		}
2012	}
2013
2014	nfs_pub.np_mount = mp;
2015	nfs_pub.np_valid = 1;
2016	return (0);
2017}
2018
2019struct netcred *
2020vfs_export_lookup(mp, nep, nam)
2021	register struct mount *mp;
2022	struct netexport *nep;
2023	struct sockaddr *nam;
2024{
2025	register struct netcred *np;
2026	register struct radix_node_head *rnh;
2027	struct sockaddr *saddr;
2028
2029	np = NULL;
2030	if (mp->mnt_flag & MNT_EXPORTED) {
2031		/*
2032		 * Lookup in the export list first.
2033		 */
2034		if (nam != NULL) {
2035			saddr = nam;
2036			rnh = nep->ne_rtable[saddr->sa_family];
2037			if (rnh != NULL) {
2038				np = (struct netcred *)
2039					(*rnh->rnh_matchaddr)((caddr_t)saddr,
2040							      rnh);
2041				if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
2042					np = NULL;
2043			}
2044		}
2045		/*
2046		 * If no address match, use the default if it exists.
2047		 */
2048		if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
2049			np = &nep->ne_defexported;
2050	}
2051	return (np);
2052}
2053
2054/*
2055 * perform msync on all vnodes under a mount point
2056 * the mount point must be locked.
2057 */
2058void
2059vfs_msync(struct mount *mp, int flags) {
2060	struct vnode *vp, *nvp;
2061	int anyio, tries;
2062
2063	tries = 5;
2064loop:
2065	anyio = 0;
2066	for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
2067
2068		nvp = vp->v_mntvnodes.le_next;
2069
2070		if (vp->v_mount != mp) {
2071			goto loop;
2072		}
2073
2074		if ((vp->v_flag & VXLOCK) ||
2075			(VOP_ISLOCKED(vp) && (flags != MNT_WAIT))) {
2076			continue;
2077		}
2078
2079		simple_lock(&vp->v_interlock);
2080		if (vp->v_object &&
2081		   (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
2082			if (!vget(vp,
2083				LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY | LK_NOOBJ, curproc)) {
2084				if (vp->v_object) {
2085					vm_object_page_clean(vp->v_object, 0, 0, TRUE);
2086					anyio = 1;
2087				}
2088				vput(vp);
2089			}
2090		} else {
2091			simple_unlock(&vp->v_interlock);
2092		}
2093	}
2094	if (anyio && (--tries > 0))
2095		goto loop;
2096}
2097
2098/*
2099 * Create the VM object needed for VMIO and mmap support.  This
2100 * is done for all VREG files in the system.  Some filesystems might
2101 * afford the additional metadata buffering capability of the
2102 * VMIO code by making the device node be VMIO mode also.
2103 *
2104 * If !waslocked, must be called with interlock.
2105 */
2106int
2107vfs_object_create(vp, p, cred, waslocked)
2108	struct vnode *vp;
2109	struct proc *p;
2110	struct ucred *cred;
2111	int waslocked;
2112{
2113	struct vattr vat;
2114	vm_object_t object;
2115	int error = 0;
2116
2117	if ((vp->v_type != VREG) && (vp->v_type != VBLK)) {
2118		return 0;
2119	}
2120
2121	if (!waslocked)
2122		vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK | LK_RETRY, p);
2123
2124retry:
2125	if ((object = vp->v_object) == NULL) {
2126		if (vp->v_type == VREG) {
2127			if ((error = VOP_GETATTR(vp, &vat, cred, p)) != 0)
2128				goto retn;
2129			object = vnode_pager_alloc(vp,
2130				OFF_TO_IDX(round_page(vat.va_size)), 0, 0);
2131		} else if (major(vp->v_rdev) < nblkdev) {
2132			/*
2133			 * This simply allocates the biggest object possible
2134			 * for a VBLK vnode.  This should be fixed, but doesn't
2135			 * cause any problems (yet).
2136			 */
2137			object = vnode_pager_alloc(vp, INT_MAX, 0, 0);
2138		}
2139		object->ref_count--;
2140		vp->v_usecount--;
2141	} else {
2142		if (object->flags & OBJ_DEAD) {
2143			VOP_UNLOCK(vp, 0, p);
2144			tsleep(object, PVM, "vodead", 0);
2145			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2146			goto retry;
2147		}
2148	}
2149
2150	if (vp->v_object) {
2151		vp->v_flag |= VOBJBUF;
2152	}
2153
2154retn:
2155	if (!waslocked) {
2156		simple_lock(&vp->v_interlock);
2157		VOP_UNLOCK(vp, LK_INTERLOCK, p);
2158	}
2159
2160	return error;
2161}
2162
2163static void
2164vfree(vp)
2165	struct vnode *vp;
2166{
2167	int s;
2168
2169	s = splbio();
2170	simple_lock(&vnode_free_list_slock);
2171	if (vp->v_flag & VTBFREE) {
2172		TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist);
2173		vp->v_flag &= ~VTBFREE;
2174	}
2175	if (vp->v_flag & VAGE) {
2176		TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
2177	} else {
2178		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
2179	}
2180	freevnodes++;
2181	simple_unlock(&vnode_free_list_slock);
2182	vp->v_flag &= ~VAGE;
2183	vp->v_flag |= VFREE;
2184	splx(s);
2185}
2186
2187static void
2188vbusy(vp)
2189	struct vnode *vp;
2190{
2191	int s;
2192
2193	s = splbio();
2194	simple_lock(&vnode_free_list_slock);
2195	if (vp->v_flag & VTBFREE) {
2196		TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist);
2197		vp->v_flag &= ~VTBFREE;
2198	} else {
2199		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
2200		freevnodes--;
2201	}
2202	simple_unlock(&vnode_free_list_slock);
2203	vp->v_flag &= ~VFREE;
2204	splx(s);
2205}
2206
2207/*
2208 * Record a process's interest in events which might happen to
2209 * a vnode.  Because poll uses the historic select-style interface
2210 * internally, this routine serves as both the ``check for any
2211 * pending events'' and the ``record my interest in future events''
2212 * functions.  (These are done together, while the lock is held,
2213 * to avoid race conditions.)
2214 */
2215int
2216vn_pollrecord(vp, p, events)
2217	struct vnode *vp;
2218	struct proc *p;
2219	short events;
2220{
2221	simple_lock(&vp->v_pollinfo.vpi_lock);
2222	if (vp->v_pollinfo.vpi_revents & events) {
2223		/*
2224		 * This leaves events we are not interested
2225		 * in available for the other process which
2226		 * which presumably had requested them
2227		 * (otherwise they would never have been
2228		 * recorded).
2229		 */
2230		events &= vp->v_pollinfo.vpi_revents;
2231		vp->v_pollinfo.vpi_revents &= ~events;
2232
2233		simple_unlock(&vp->v_pollinfo.vpi_lock);
2234		return events;
2235	}
2236	vp->v_pollinfo.vpi_events |= events;
2237	selrecord(p, &vp->v_pollinfo.vpi_selinfo);
2238	simple_unlock(&vp->v_pollinfo.vpi_lock);
2239	return 0;
2240}
2241
2242/*
2243 * Note the occurrence of an event.  If the VN_POLLEVENT macro is used,
2244 * it is possible for us to miss an event due to race conditions, but
2245 * that condition is expected to be rare, so for the moment it is the
2246 * preferred interface.
2247 */
2248void
2249vn_pollevent(vp, events)
2250	struct vnode *vp;
2251	short events;
2252{
2253	simple_lock(&vp->v_pollinfo.vpi_lock);
2254	if (vp->v_pollinfo.vpi_events & events) {
2255		/*
2256		 * We clear vpi_events so that we don't
2257		 * call selwakeup() twice if two events are
2258		 * posted before the polling process(es) is
2259		 * awakened.  This also ensures that we take at
2260		 * most one selwakeup() if the polling process
2261		 * is no longer interested.  However, it does
2262		 * mean that only one event can be noticed at
2263		 * a time.  (Perhaps we should only clear those
2264		 * event bits which we note?) XXX
2265		 */
2266		vp->v_pollinfo.vpi_events = 0;	/* &= ~events ??? */
2267		vp->v_pollinfo.vpi_revents |= events;
2268		selwakeup(&vp->v_pollinfo.vpi_selinfo);
2269	}
2270	simple_unlock(&vp->v_pollinfo.vpi_lock);
2271}
2272
2273/*
2274 * Wake up anyone polling on vp because it is being revoked.
2275 * This depends on dead_poll() returning POLLHUP for correct
2276 * behavior.
2277 */
2278void
2279vn_pollgone(vp)
2280	struct vnode *vp;
2281{
2282	simple_lock(&vp->v_pollinfo.vpi_lock);
2283	if (vp->v_pollinfo.vpi_events) {
2284		vp->v_pollinfo.vpi_events = 0;
2285		selwakeup(&vp->v_pollinfo.vpi_selinfo);
2286	}
2287	simple_unlock(&vp->v_pollinfo.vpi_lock);
2288}
2289