vfs_subr.c revision 1.79
1/*	$OpenBSD: vfs_subr.c,v 1.79 2001/12/10 18:47:16 art Exp $	*/
2/*	$NetBSD: vfs_subr.c,v 1.53 1996/04/22 01:39:13 christos Exp $	*/
3
4/*
5 * Copyright (c) 1989, 1993
6 *	The Regents of the University of California.  All rights reserved.
7 * (c) UNIX System Laboratories, Inc.
8 * All or some portions of this file are derived from material licensed
9 * to the University of California by American Telephone and Telegraph
10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11 * the permission of UNIX System Laboratories, Inc.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 *    notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 *    notice, this list of conditions and the following disclaimer in the
20 *    documentation and/or other materials provided with the distribution.
21 * 3. All advertising materials mentioning features or use of this software
22 *    must display the following acknowledgement:
23 *	This product includes software developed by the University of
24 *	California, Berkeley and its contributors.
25 * 4. Neither the name of the University nor the names of its contributors
26 *    may be used to endorse or promote products derived from this software
27 *    without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * SUCH DAMAGE.
40 *
41 *	@(#)vfs_subr.c	8.13 (Berkeley) 4/18/94
42 */
43
44/*
45 * External virtual filesystem routines
46 */
47
48#include <sys/param.h>
49#include <sys/systm.h>
50#include <sys/proc.h>
51#include <sys/mount.h>
52#include <sys/time.h>
53#include <sys/fcntl.h>
54#include <sys/kernel.h>
55#include <sys/vnode.h>
56#include <sys/stat.h>
57#include <sys/namei.h>
58#include <sys/ucred.h>
59#include <sys/buf.h>
60#include <sys/errno.h>
61#include <sys/malloc.h>
62#include <sys/domain.h>
63#include <sys/mbuf.h>
64#include <sys/syscallargs.h>
65#include <sys/pool.h>
66
67#include <uvm/uvm_extern.h>
68#include <sys/sysctl.h>
69
70#include <miscfs/specfs/specdev.h>
71
72enum vtype iftovt_tab[16] = {
73	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
74	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
75};
76int	vttoif_tab[9] = {
77	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
78	S_IFSOCK, S_IFIFO, S_IFMT,
79};
80
81int doforce = 1;		/* 1 => permit forcible unmounting */
82int prtactive = 0;		/* 1 => print out reclaim of active vnodes */
83int suid_clear = 1;		/* 1 => clear SUID / SGID on owner change */
84
85/*
86 * Insq/Remq for the vnode usage lists.
87 */
88#define	bufinsvn(bp, dp)	LIST_INSERT_HEAD(dp, bp, b_vnbufs)
89#define	bufremvn(bp) {							\
90	LIST_REMOVE(bp, b_vnbufs);					\
91	(bp)->b_vnbufs.le_next = NOLIST;				\
92}
93
94struct freelst vnode_hold_list;   /* list of vnodes referencing buffers */
95struct freelst vnode_free_list;   /* vnode free list */
96
97struct mntlist mountlist;			/* mounted filesystem list */
98struct simplelock mountlist_slock;
99static struct simplelock mntid_slock;
100struct simplelock mntvnode_slock;
101struct simplelock vnode_free_list_slock;
102struct simplelock spechash_slock;
103
104void	vclean __P((struct vnode *, int, struct proc *));
105
106void insmntque __P((struct vnode *, struct mount *));
107int getdevvp __P((dev_t, struct vnode **, enum vtype));
108
109int vfs_hang_addrlist __P((struct mount *, struct netexport *,
110				  struct export_args *));
111int vfs_free_netcred __P((struct radix_node *, void *));
112void vfs_free_addrlist __P((struct netexport *));
113static __inline__ void vputonfreelist __P((struct vnode *));
114
115int vflush_vnode(struct vnode *, void *);
116
117#ifdef DEBUG
118void printlockedvnodes __P((void));
119#endif
120
121#define VN_KNOTE(vp, b) \
122	KNOTE((struct klist *)&vp->v_selectinfo.vsi_selinfo.si_note, (b))
123
124struct pool vnode_pool;
125
126/*
127 * Initialize the vnode management data structures.
128 */
129void
130vntblinit()
131{
132
133	pool_init(&vnode_pool, sizeof(struct vnode), 0, 0, 0, "vnodes",
134		0, pool_page_alloc_nointr, pool_page_free_nointr, M_VNODE);
135	simple_lock_init(&mntvnode_slock);
136	simple_lock_init(&mntid_slock);
137	simple_lock_init(&spechash_slock);
138	TAILQ_INIT(&vnode_hold_list);
139	TAILQ_INIT(&vnode_free_list);
140	simple_lock_init(&vnode_free_list_slock);
141	CIRCLEQ_INIT(&mountlist);
142	simple_lock_init(&mountlist_slock);
143	/*
144	 * Initialize the filesystem syncer.
145	 */
146	vn_initialize_syncerd();
147}
148
149
150/*
151 * Mark a mount point as busy. Used to synchronize access and to delay
152 * unmounting. Interlock is not released on failure.
153 */
154
155int
156vfs_busy(mp, flags, interlkp, p)
157	struct mount *mp;
158	int flags;
159	struct simplelock *interlkp;
160	struct proc *p;
161{
162	int lkflags;
163
164	if (mp->mnt_flag & MNT_UNMOUNT) {
165		if (flags & LK_NOWAIT)
166			return (ENOENT);
167		mp->mnt_flag |= MNT_MWAIT;
168		if (interlkp)
169			simple_unlock(interlkp);
170		/*
171		 * Since all busy locks are shared except the exclusive
172		 * lock granted when unmounting, the only place that a
173		 * wakeup needs to be done is at the release of the
174		 * exclusive lock at the end of dounmount.
175		 */
176 		sleep((caddr_t)mp, PVFS);
177		if (interlkp)
178			simple_lock(interlkp);
179		return (ENOENT);
180	}
181	lkflags = LK_SHARED;
182	if (interlkp)
183		lkflags |= LK_INTERLOCK;
184	if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p))
185		panic("vfs_busy: unexpected lock failure");
186	return (0);
187}
188
189
190/*
191 * Free a busy file system
192 */
193void
194vfs_unbusy(mp, p)
195	struct mount *mp;
196	struct proc *p;
197{
198	lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p);
199}
200
201int
202vfs_isbusy(struct mount *mp)
203{
204	return (lockstatus(&mp->mnt_lock));
205}
206
207/*
208 * Lookup a filesystem type, and if found allocate and initialize
209 * a mount structure for it.
210 *
211 * Devname is usually updated by mount(8) after booting.
212 */
213
214int
215vfs_rootmountalloc(fstypename, devname, mpp)
216	char *fstypename;
217	char *devname;
218	struct mount **mpp;
219{
220	struct proc *p = curproc;	/* XXX */
221	struct vfsconf *vfsp;
222	struct mount *mp;
223
224	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
225		if (!strcmp(vfsp->vfc_name, fstypename))
226			break;
227	if (vfsp == NULL)
228		return (ENODEV);
229	mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
230	bzero((char *)mp, (u_long)sizeof(struct mount));
231	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
232	(void)vfs_busy(mp, LK_NOWAIT, 0, p);
233	LIST_INIT(&mp->mnt_vnodelist);
234	mp->mnt_vfc = vfsp;
235	mp->mnt_op = vfsp->vfc_vfsops;
236	mp->mnt_flag = MNT_RDONLY;
237	mp->mnt_vnodecovered = NULLVP;
238	vfsp->vfc_refcount++;
239	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
240	strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
241	mp->mnt_stat.f_mntonname[0] = '/';
242	(void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
243	*mpp = mp;
244 	return (0);
245 }
246
247/*
248 * Find an appropriate filesystem to use for the root. If a filesystem
249 * has not been preselected, walk through the list of known filesystems
250 * trying those that have mountroot routines, and try them until one
251 * works or we have tried them all.
252  */
253int
254vfs_mountroot()
255{
256	struct vfsconf *vfsp;
257	extern int (*mountroot)(void);
258	int error;
259
260	if (mountroot != NULL)
261		return ((*mountroot)());
262	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
263		if (vfsp->vfc_mountroot == NULL)
264			continue;
265		if ((error = (*vfsp->vfc_mountroot)()) == 0)
266			return (0);
267		printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error);
268 	}
269	return (ENODEV);
270}
271
272/*
273 * Lookup a mount point by filesystem identifier.
274 */
275struct mount *
276vfs_getvfs(fsid)
277	fsid_t *fsid;
278{
279	register struct mount *mp;
280
281	simple_lock(&mountlist_slock);
282	CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) {
283		if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
284		    mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
285			simple_unlock(&mountlist_slock);
286			return (mp);
287		}
288	}
289	simple_unlock(&mountlist_slock);
290	return ((struct mount *)0);
291}
292
293
294/*
295 * Get a new unique fsid
296 */
297void
298vfs_getnewfsid(mp)
299	struct mount *mp;
300{
301	static u_short xxxfs_mntid;
302
303	fsid_t tfsid;
304	int mtype;
305
306	simple_lock(&mntid_slock);
307	mtype = mp->mnt_vfc->vfc_typenum;
308	mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
309	mp->mnt_stat.f_fsid.val[1] = mtype;
310	if (xxxfs_mntid == 0)
311		++xxxfs_mntid;
312	tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
313	tfsid.val[1] = mtype;
314	if (!CIRCLEQ_EMPTY(&mountlist)) {
315		while (vfs_getvfs(&tfsid)) {
316			tfsid.val[0]++;
317			xxxfs_mntid++;
318		}
319	}
320	mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
321	simple_unlock(&mntid_slock);
322}
323
324/*
325 * Make a 'unique' number from a mount type name.
326 * Note that this is no longer used for ffs which
327 * now has an on-disk filesystem id.
328 */
329long
330makefstype(type)
331	char *type;
332{
333	long rv;
334
335	for (rv = 0; *type; type++) {
336		rv <<= 2;
337		rv ^= *type;
338	}
339	return rv;
340}
341
342/*
343 * Set vnode attributes to VNOVAL
344 */
345void
346vattr_null(vap)
347	register struct vattr *vap;
348{
349
350	vap->va_type = VNON;
351	/* XXX These next two used to be one line, but for a GCC bug. */
352	vap->va_size = VNOVAL;
353	vap->va_bytes = VNOVAL;
354	vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
355		vap->va_fsid = vap->va_fileid =
356		vap->va_blocksize = vap->va_rdev =
357		vap->va_atime.tv_sec = vap->va_atime.tv_nsec =
358		vap->va_mtime.tv_sec = vap->va_mtime.tv_nsec =
359		vap->va_ctime.tv_sec = vap->va_ctime.tv_nsec =
360		vap->va_flags = vap->va_gen = VNOVAL;
361	vap->va_vaflags = 0;
362}
363
364/*
365 * Routines having to do with the management of the vnode table.
366 */
367extern int (**dead_vnodeop_p) __P((void *));
368long numvnodes;
369
370/*
371 * Return the next vnode from the free list.
372 */
373int
374getnewvnode(tag, mp, vops, vpp)
375	enum vtagtype tag;
376	struct mount *mp;
377	int (**vops) __P((void *));
378	struct vnode **vpp;
379{
380	extern struct uvm_pagerops uvm_vnodeops;
381	struct uvm_object *uobj;
382	struct proc *p = curproc;			/* XXX */
383	struct freelst *listhd;
384	static int toggle;
385	struct vnode *vp;
386	int s;
387
388	/*
389	 * We must choose whether to allocate a new vnode or recycle an
390	 * existing one. The criterion for allocating a new one is that
391	 * the total number of vnodes is less than the number desired or
392	 * there are no vnodes on either free list. Generally we only
393	 * want to recycle vnodes that have no buffers associated with
394	 * them, so we look first on the vnode_free_list. If it is empty,
395	 * we next consider vnodes with referencing buffers on the
396	 * vnode_hold_list. The toggle ensures that half the time we
397	 * will use a buffer from the vnode_hold_list, and half the time
398	 * we will allocate a new one unless the list has grown to twice
399	 * the desired size. We are reticent to recycle vnodes from the
400	 * vnode_hold_list because we will lose the identity of all its
401	 * referencing buffers.
402	 */
403	toggle ^= 1;
404	if (numvnodes > 2 * desiredvnodes)
405		toggle = 0;
406
407	simple_lock(&vnode_free_list_slock);
408	s = splbio();
409	if ((numvnodes < desiredvnodes) ||
410	    ((TAILQ_FIRST(listhd = &vnode_free_list) == NULL) &&
411	    ((TAILQ_FIRST(listhd = &vnode_hold_list) == NULL) || toggle))) {
412		splx(s);
413		simple_unlock(&vnode_free_list_slock);
414		vp = pool_get(&vnode_pool, PR_WAITOK);
415		bzero(vp, sizeof *vp);
416		/*
417		 * initialize uvm_object within vnode.
418		 */
419		uobj = &vp->v_uobj;
420		uobj->pgops = &uvm_vnodeops;
421		uobj->uo_npages = 0;
422		TAILQ_INIT(&uobj->memq);
423		numvnodes++;
424	} else {
425		TAILQ_FOREACH(vp, listhd, v_freelist) {
426			if (simple_lock_try(&vp->v_interlock))
427				break;
428		}
429		/*
430		 * Unless this is a bad time of the month, at most
431		 * the first NCPUS items on the free list are
432		 * locked, so this is close enough to being empty.
433		 */
434		if (vp == NULL) {
435			splx(s);
436			simple_unlock(&vnode_free_list_slock);
437			tablefull("vnode");
438			*vpp = 0;
439			return (ENFILE);
440		}
441		if (vp->v_usecount) {
442			vprint("free vnode", vp);
443			panic("free vnode isn't");
444		}
445
446		TAILQ_REMOVE(listhd, vp, v_freelist);
447		vp->v_bioflag &= ~VBIOONFREELIST;
448		splx(s);
449
450		simple_unlock(&vnode_free_list_slock);
451		if (vp->v_type != VBAD)
452			vgonel(vp, p);
453		else
454			simple_unlock(&vp->v_interlock);
455#ifdef DIAGNOSTIC
456		if (vp->v_data || vp->v_uobj.uo_npages ||
457		    TAILQ_FIRST(&vp->v_uobj.memq)) {
458			vprint("cleaned vnode", vp);
459			panic("cleaned vnode isn't");
460		}
461		if (vp->v_numoutput)
462			panic("Clean vnode has pending I/O's");
463#endif
464		vp->v_flag = 0;
465		vp->v_socket = 0;
466	}
467	vp->v_type = VNON;
468	cache_purge(vp);
469	vp->v_tag = tag;
470	vp->v_op = vops;
471	insmntque(vp, mp);
472	*vpp = vp;
473	vp->v_usecount = 1;
474	vp->v_data = 0;
475	simple_lock_init(&vp->v_uobj.vmobjlock);
476
477	vp->v_size = VSIZENOTSET;
478
479	return (0);
480}
481
482/*
483 * Move a vnode from one mount queue to another.
484 */
485void
486insmntque(vp, mp)
487	register struct vnode *vp;
488	register struct mount *mp;
489{
490	simple_lock(&mntvnode_slock);
491	/*
492	 * Delete from old mount point vnode list, if on one.
493	 */
494
495	if (vp->v_mount != NULL)
496		LIST_REMOVE(vp, v_mntvnodes);
497	/*
498	 * Insert into list of vnodes for the new mount point, if available.
499	 */
500	if ((vp->v_mount = mp) != NULL)
501		LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
502	simple_unlock(&mntvnode_slock);
503}
504
505
506/*
507 * Create a vnode for a block device.
508 * Used for root filesystem, argdev, and swap areas.
509 * Also used for memory file system special devices.
510 */
511int
512bdevvp(dev, vpp)
513	dev_t dev;
514	struct vnode **vpp;
515{
516
517	return (getdevvp(dev, vpp, VBLK));
518}
519
520/*
521 * Create a vnode for a character device.
522 * Used for kernfs and some console handling.
523 */
524int
525cdevvp(dev, vpp)
526	dev_t dev;
527	struct vnode **vpp;
528{
529
530	return (getdevvp(dev, vpp, VCHR));
531}
532
533/*
534 * Create a vnode for a device.
535 * Used by bdevvp (block device) for root file system etc.,
536 * and by cdevvp (character device) for console and kernfs.
537 */
538int
539getdevvp(dev, vpp, type)
540	dev_t dev;
541	struct vnode **vpp;
542	enum vtype type;
543{
544	register struct vnode *vp;
545	struct vnode *nvp;
546	int error;
547
548	if (dev == NODEV) {
549		*vpp = NULLVP;
550		return (0);
551	}
552	error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp);
553	if (error) {
554		*vpp = NULLVP;
555		return (error);
556	}
557	vp = nvp;
558	vp->v_type = type;
559	if ((nvp = checkalias(vp, dev, NULL)) != 0) {
560		vput(vp);
561		vp = nvp;
562	}
563	*vpp = vp;
564	return (0);
565}
566
567/*
568 * Check to see if the new vnode represents a special device
569 * for which we already have a vnode (either because of
570 * bdevvp() or because of a different vnode representing
571 * the same block device). If such an alias exists, deallocate
572 * the existing contents and return the aliased vnode. The
573 * caller is responsible for filling it with its new contents.
574 */
575struct vnode *
576checkalias(nvp, nvp_rdev, mp)
577	register struct vnode *nvp;
578	dev_t nvp_rdev;
579	struct mount *mp;
580{
581	struct proc *p = curproc;
582	register struct vnode *vp;
583	struct vnode **vpp;
584
585	if (nvp->v_type != VBLK && nvp->v_type != VCHR)
586		return (NULLVP);
587
588	vpp = &speclisth[SPECHASH(nvp_rdev)];
589loop:
590	simple_lock(&spechash_slock);
591	for (vp = *vpp; vp; vp = vp->v_specnext) {
592		simple_lock(&vp->v_interlock);
593		if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) {
594			simple_unlock(&vp->v_interlock);
595			continue;
596		}
597		/*
598		 * Alias, but not in use, so flush it out.
599		 */
600		if (vp->v_usecount == 0) {
601			simple_unlock(&spechash_slock);
602			vgonel(vp, p);
603			goto loop;
604		}
605		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
606			simple_unlock(&spechash_slock);
607			goto loop;
608		}
609		break;
610	}
611
612	/*
613	 * Common case is actually in the if statement
614	 */
615	if (vp == NULL || !(vp->v_tag == VT_NON && vp->v_type == VBLK)) {
616		MALLOC(nvp->v_specinfo, struct specinfo *,
617			sizeof(struct specinfo), M_VNODE, M_WAITOK);
618		nvp->v_rdev = nvp_rdev;
619		nvp->v_hashchain = vpp;
620		nvp->v_specnext = *vpp;
621		nvp->v_specmountpoint = NULL;
622		nvp->v_speclockf = NULL;
623		simple_unlock(&spechash_slock);
624		*vpp = nvp;
625		if (vp != NULLVP) {
626			nvp->v_flag |= VALIASED;
627			vp->v_flag |= VALIASED;
628			vput(vp);
629		}
630		return (NULLVP);
631	}
632
633	/*
634	 * This code is the uncommon case. It is called in case
635	 * we found an alias that was VT_NON && vtype of VBLK
636	 * This means we found a block device that was created
637	 * using bdevvp.
638	 * An example of such a vnode is the root partition device vnode
639	 * created in ffs_mountroot.
640	 *
641	 * The vnodes created by bdevvp should not be aliased (why?).
642	 */
643
644	simple_unlock(&spechash_slock);
645	VOP_UNLOCK(vp, 0, p);
646	simple_lock(&vp->v_interlock);
647	vclean(vp, 0, p);
648	vp->v_op = nvp->v_op;
649	vp->v_tag = nvp->v_tag;
650	nvp->v_type = VNON;
651	insmntque(vp, mp);
652	return (vp);
653}
654
655/*
656 * Grab a particular vnode from the free list, increment its
657 * reference count and lock it. The vnode lock bit is set the
658 * vnode is being eliminated in vgone. The process is awakened
659 * when the transition is completed, and an error returned to
660 * indicate that the vnode is no longer usable (possibly having
661 * been changed to a new file system type).
662 */
663int
664vget(vp, flags, p)
665	struct vnode *vp;
666	int flags;
667	struct proc *p;
668{
669	int error;
670	int s;
671	/*
672	 * If the vnode is in the process of being cleaned out for
673	 * another use, we wait for the cleaning to finish and then
674	 * return failure. Cleaning is determined by checking that
675	 * the VXLOCK flag is set.
676	 */
677	if ((flags & LK_INTERLOCK) == 0) {
678		simple_lock(&vp->v_interlock);
679		flags |= LK_INTERLOCK;
680	}
681	if (vp->v_flag & VXLOCK) {
682		if (flags & LK_NOWAIT) {
683			simple_unlock(&vp->v_interlock);
684			return (EBUSY);
685		}
686 		vp->v_flag |= VXWANT;
687		simple_unlock(&vp->v_interlock);
688		tsleep((caddr_t)vp, PINOD, "vget", 0);
689		return (ENOENT);
690 	}
691	if (vp->v_usecount == 0 &&
692	    (vp->v_bioflag & VBIOONFREELIST)) {
693		s = splbio();
694		simple_lock(&vnode_free_list_slock);
695		if (vp->v_holdcnt > 0)
696			TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
697		else
698			TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
699		simple_unlock(&vnode_free_list_slock);
700		vp->v_bioflag &= ~VBIOONFREELIST;
701		splx(s);
702	}
703 	vp->v_usecount++;
704	if (flags & LK_TYPE_MASK) {
705		if ((error = vn_lock(vp, flags, p)) != 0) {
706			vp->v_usecount--;
707			if (vp->v_usecount == 0)
708				vputonfreelist(vp);
709
710			simple_unlock(&vp->v_interlock);
711		}
712		return (error);
713	}
714	simple_unlock(&vp->v_interlock);
715	return (0);
716}
717
718
719#ifdef DIAGNOSTIC
720/*
721 * Vnode reference.
722 */
723void
724vref(vp)
725	struct vnode *vp;
726{
727	simple_lock(&vp->v_interlock);
728	if (vp->v_usecount == 0)
729		panic("vref used where vget required");
730	vp->v_usecount++;
731	simple_unlock(&vp->v_interlock);
732}
733#endif /* DIAGNOSTIC */
734
735static __inline__ void
736vputonfreelist(vp)
737	struct vnode *vp;
738{
739	int s;
740	struct freelst *lst;
741
742	s = splbio();
743#ifdef DIAGNOSTIC
744	if (vp->v_usecount != 0)
745		panic("Use count is not zero!");
746
747	if (vp->v_bioflag & VBIOONFREELIST) {
748		vprint("vnode already on free list: ", vp);
749		panic("vnode already on free list");
750	}
751#endif
752
753	vp->v_bioflag |= VBIOONFREELIST;
754
755	if (vp->v_holdcnt > 0)
756		lst = &vnode_hold_list;
757	else
758		lst = &vnode_free_list;
759
760	if (vp->v_type == VBAD)
761		TAILQ_INSERT_HEAD(lst, vp, v_freelist);
762	else
763		TAILQ_INSERT_TAIL(lst, vp, v_freelist);
764
765	splx(s);
766}
767
768/*
769 * vput(), just unlock and vrele()
770 */
771void
772vput(vp)
773	register struct vnode *vp;
774{
775	struct proc *p = curproc;	/* XXX */
776
777#ifdef DIAGNOSTIC
778	if (vp == NULL)
779		panic("vput: null vp");
780#endif
781	simple_lock(&vp->v_interlock);
782
783#ifdef DIAGNOSTIC
784	if (vp->v_usecount == 0) {
785		vprint("vput: bad ref count", vp);
786		panic("vput: ref cnt");
787	}
788#endif
789	vp->v_usecount--;
790	if (vp->v_usecount > 0) {
791		simple_unlock(&vp->v_interlock);
792		VOP_UNLOCK(vp, 0, p);
793		return;
794	}
795
796#ifdef DIAGNOSTIC
797	if (vp->v_writecount != 0) {
798		vprint("vput: bad writecount", vp);
799		panic("vput: v_writecount != 0");
800	}
801#endif
802	vputonfreelist(vp);
803
804	if (vp->v_flag & VTEXT) {
805		uvmexp.vtextpages -= vp->v_uobj.uo_npages;
806		uvmexp.vnodepages += vp->v_uobj.uo_npages;
807	}
808	vp->v_flag &= ~VTEXT;
809	simple_unlock(&vp->v_interlock);
810
811	VOP_INACTIVE(vp, p);
812}
813
814/*
815 * Vnode release - use for active VNODES.
816 * If count drops to zero, call inactive routine and return to freelist.
817 */
818void
819vrele(vp)
820	register struct vnode *vp;
821{
822	struct proc *p = curproc;	/* XXX */
823
824#ifdef DIAGNOSTIC
825	if (vp == NULL)
826		panic("vrele: null vp");
827#endif
828	simple_lock(&vp->v_interlock);
829#ifdef DIAGNOSTIC
830	if (vp->v_usecount == 0) {
831		vprint("vrele: bad ref count", vp);
832		panic("vrele: ref cnt");
833	}
834#endif
835	vp->v_usecount--;
836	if (vp->v_usecount > 0) {
837		simple_unlock(&vp->v_interlock);
838		return;
839	}
840
841#ifdef DIAGNOSTIC
842	if (vp->v_writecount != 0) {
843		vprint("vrele: bad writecount", vp);
844		panic("vrele: v_writecount != 0");
845	}
846#endif
847	vputonfreelist(vp);
848
849	if (vp->v_flag & VTEXT) {
850		uvmexp.vtextpages -= vp->v_uobj.uo_npages;
851		uvmexp.vnodepages += vp->v_uobj.uo_npages;
852	}
853	vp->v_flag &= ~VTEXT;
854	if (vn_lock(vp, LK_EXCLUSIVE|LK_INTERLOCK, p) == 0)
855		VOP_INACTIVE(vp, p);
856}
857
858/*
859 * Page or buffer structure gets a reference.
860 * Must be called at splbio();
861 */
862void
863vhold(struct vnode *vp)
864{
865
866	/*
867	 * If it is on the freelist and the hold count is currently
868	 * zero, move it to the hold list.
869	 */
870  	simple_lock(&vp->v_interlock);
871	if ((vp->v_bioflag & VBIOONFREELIST) &&
872	    vp->v_holdcnt == 0 && vp->v_usecount == 0) {
873		simple_lock(&vnode_free_list_slock);
874		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
875		TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
876		simple_unlock(&vnode_free_list_slock);
877	}
878	vp->v_holdcnt++;
879	simple_unlock(&vp->v_interlock);
880}
881
882/*
883 * Release a vhold reference.
884 * Must be called at splbio();
885 */
886void
887vholdrele(struct vnode *vp)
888{
889	simple_lock(&vp->v_interlock);
890#ifdef DIAGNOSTIC
891	if (vp->v_holdcnt == 0)
892		panic("vholdrele: holdcnt");
893#endif
894	vp->v_holdcnt--;
895
896	/*
897	 * If it is on the holdlist and the hold count drops to
898	 * zero, move it to the free list.
899	 */
900	if ((vp->v_bioflag & VBIOONFREELIST) &&
901	    vp->v_holdcnt == 0 && vp->v_usecount == 0) {
902		simple_lock(&vnode_free_list_slock);
903		TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
904		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
905		simple_unlock(&vnode_free_list_slock);
906	}
907	simple_unlock(&vp->v_interlock);
908}
909
910/*
911 * Remove any vnodes in the vnode table belonging to mount point mp.
912 *
913 * If MNT_NOFORCE is specified, there should not be any active ones,
914 * return error if any are found (nb: this is a user error, not a
915 * system error). If MNT_FORCE is specified, detach any active vnodes
916 * that are found.
917 */
918#ifdef DEBUG
919int busyprt = 0;	/* print out busy vnodes */
920struct ctldebug debug1 = { "busyprt", &busyprt };
921#endif
922
923int
924vfs_mount_foreach_vnode(struct mount *mp,
925    int (*func)(struct vnode *, void *), void *arg) {
926	struct vnode *vp, *nvp;
927	int error = 0;
928
929	simple_lock(&mntvnode_slock);
930loop:
931	for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
932		if (vp->v_mount != mp)
933			goto loop;
934		nvp = vp->v_mntvnodes.le_next;
935		simple_lock(&vp->v_interlock);
936		simple_unlock(&mntvnode_slock);
937
938		error = func(vp, arg);
939
940		simple_lock(&mntvnode_slock);
941
942		if (error != 0)
943			break;
944	}
945	simple_unlock(&mntvnode_slock);
946
947	return (error);
948}
949
950
951struct vflush_args {
952	struct vnode *skipvp;
953	int busy;
954	int flags;
955};
956
957int
958vflush_vnode(struct vnode *vp, void *arg) {
959	struct vflush_args *va = arg;
960	struct proc *p = curproc;
961
962	if (vp == va->skipvp) {
963		simple_unlock(&vp->v_interlock);
964		return (0);
965	}
966
967	if ((va->flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
968		simple_unlock(&vp->v_interlock);
969		return (0);
970	}
971
972	/*
973	 * If WRITECLOSE is set, only flush out regular file
974	 * vnodes open for writing.
975	 */
976	if ((va->flags & WRITECLOSE) &&
977	    (vp->v_writecount == 0 || vp->v_type != VREG)) {
978		simple_unlock(&vp->v_interlock);
979		return (0);
980	}
981
982	/*
983	 * With v_usecount == 0, all we need to do is clear
984	 * out the vnode data structures and we are done.
985	 */
986	if (vp->v_usecount == 0) {
987		vgonel(vp, p);
988		return (0);
989	}
990
991	/*
992	 * If FORCECLOSE is set, forcibly close the vnode.
993	 * For block or character devices, revert to an
994	 * anonymous device. For all other files, just kill them.
995	 */
996	if (va->flags & FORCECLOSE) {
997		if (vp->v_type != VBLK && vp->v_type != VCHR) {
998			vgonel(vp, p);
999		} else {
1000			vclean(vp, 0, p);
1001			vp->v_op = spec_vnodeop_p;
1002			insmntque(vp, (struct mount *)0);
1003		}
1004		return (0);
1005	}
1006
1007#ifdef DEBUG
1008	if (busyprt)
1009		vprint("vflush: busy vnode", vp);
1010#endif
1011	simple_unlock(&vp->v_interlock);
1012	va->busy++;
1013	return (0);
1014}
1015
1016int
1017vflush(mp, skipvp, flags)
1018	struct mount *mp;
1019	struct vnode *skipvp;
1020	int flags;
1021{
1022	struct vflush_args va;
1023	va.skipvp = skipvp;
1024	va.busy = 0;
1025	va.flags = flags;
1026
1027	vfs_mount_foreach_vnode(mp, vflush_vnode, &va);
1028
1029	if (va.busy)
1030		return (EBUSY);
1031	return (0);
1032}
1033
1034/*
1035 * Disassociate the underlying file system from a vnode.
1036 * The vnode interlock is held on entry.
1037 */
1038void
1039vclean(vp, flags, p)
1040	register struct vnode *vp;
1041	int flags;
1042	struct proc *p;
1043{
1044	int active;
1045
1046	/*
1047	 * Check to see if the vnode is in use.
1048	 * If so we have to reference it before we clean it out
1049	 * so that its count cannot fall to zero and generate a
1050	 * race against ourselves to recycle it.
1051	 */
1052	if ((active = vp->v_usecount) != 0)
1053		vp->v_usecount++;
1054
1055	/*
1056	 * Prevent the vnode from being recycled or
1057	 * brought into use while we clean it out.
1058	 */
1059	if (vp->v_flag & VXLOCK)
1060		panic("vclean: deadlock");
1061	vp->v_flag |= VXLOCK;
1062	if (vp->v_flag & VTEXT) {
1063		uvmexp.vtextpages -= vp->v_uobj.uo_npages;
1064		uvmexp.vnodepages += vp->v_uobj.uo_npages;
1065	}
1066	vp->v_flag &= ~VTEXT;
1067
1068	/*
1069	 * Even if the count is zero, the VOP_INACTIVE routine may still
1070	 * have the object locked while it cleans it out. The VOP_LOCK
1071	 * ensures that the VOP_INACTIVE routine is done with its work.
1072	 * For active vnodes, it ensures that no other activity can
1073	 * occur while the underlying object is being cleaned out.
1074	 */
1075	VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p);
1076
1077	/*
1078	 * Clean out any cached data associated with the vnode.
1079	 */
1080	if (flags & DOCLOSE)
1081		vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
1082	/*
1083	 * If purging an active vnode, it must be closed and
1084	 * deactivated before being reclaimed. Note that the
1085	 * VOP_INACTIVE will unlock the vnode
1086	 */
1087	if (active) {
1088		if (flags & DOCLOSE)
1089			VOP_CLOSE(vp, FNONBLOCK, NOCRED, p);
1090		VOP_INACTIVE(vp, p);
1091	} else {
1092		/*
1093		 * Any other processes trying to obtain this lock must first
1094		 * wait for VXLOCK to clear, then call the new lock operation.
1095		 */
1096		VOP_UNLOCK(vp, 0, p);
1097	}
1098
1099	/*
1100	 * Reclaim the vnode.
1101	 */
1102	if (VOP_RECLAIM(vp, p))
1103		panic("vclean: cannot reclaim");
1104	if (active) {
1105		simple_lock(&vp->v_interlock);
1106
1107		vp->v_usecount--;
1108		if (vp->v_usecount == 0) {
1109			if (vp->v_holdcnt > 0)
1110				panic("vclean: not clean");
1111			vputonfreelist(vp);
1112		}
1113
1114		simple_unlock(&vp->v_interlock);
1115	}
1116	cache_purge(vp);
1117	if (vp->v_vnlock) {
1118		if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0)
1119			vprint("vclean: lock not drained", vp);
1120		FREE(vp->v_vnlock, M_VNODE);
1121		vp->v_vnlock = NULL;
1122	}
1123
1124	/*
1125	 * Done with purge, notify sleepers of the grim news.
1126	 */
1127	vp->v_op = dead_vnodeop_p;
1128	simple_lock(&vp->v_selectinfo.vsi_lock);
1129	VN_KNOTE(vp, NOTE_REVOKE);
1130	simple_unlock(&vp->v_selectinfo.vsi_lock);
1131	vp->v_tag = VT_NON;
1132	vp->v_flag &= ~VXLOCK;
1133#ifdef DIAGNOSTIC
1134	vp->v_flag &= ~VLOCKSWORK;
1135#endif
1136	if (vp->v_flag & VXWANT) {
1137		vp->v_flag &= ~VXWANT;
1138		wakeup((caddr_t)vp);
1139	}
1140}
1141
1142
1143
1144/*
1145 * Recycle an unused vnode to the front of the free list.
1146 * Release the passed interlock if the vnode will be recycled.
1147 */
1148int
1149vrecycle(vp, inter_lkp, p)
1150	struct vnode *vp;
1151	struct simplelock *inter_lkp;
1152	struct proc *p;
1153{
1154
1155	simple_lock(&vp->v_interlock);
1156	if (vp->v_usecount == 0) {
1157		if (inter_lkp)
1158			simple_unlock(inter_lkp);
1159		vgonel(vp, p);
1160		return (1);
1161	}
1162	simple_unlock(&vp->v_interlock);
1163	return (0);
1164}
1165
1166
1167/*
1168 * Eliminate all activity associated with a vnode
1169 * in preparation for reuse.
1170 */
1171void
1172vgone(vp)
1173	register struct vnode *vp;
1174{
1175	struct proc *p = curproc;
1176
1177	simple_lock (&vp->v_interlock);
1178	vgonel(vp, p);
1179}
1180
1181/*
1182 * vgone, with the vp interlock held.
1183 */
1184void
1185vgonel(vp, p)
1186	struct vnode *vp;
1187	struct proc *p;
1188{
1189	register struct vnode *vq;
1190	struct vnode *vx;
1191
1192	/*
1193	 * If a vgone (or vclean) is already in progress,
1194	 * wait until it is done and return.
1195	 */
1196	if (vp->v_flag & VXLOCK) {
1197		vp->v_flag |= VXWANT;
1198		simple_unlock(&vp->v_interlock);
1199		tsleep((caddr_t)vp, PINOD, "vgone", 0);
1200		return;
1201	}
1202	/*
1203	 * Clean out the filesystem specific data.
1204	 */
1205	vclean(vp, DOCLOSE, p);
1206	/*
1207	 * Delete from old mount point vnode list, if on one.
1208	 */
1209	if (vp->v_mount != NULL)
1210		insmntque(vp, (struct mount *)0);
1211	/*
1212	 * If special device, remove it from special device alias list
1213	 * if it is on one.
1214	 */
1215	if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) {
1216		simple_lock(&spechash_slock);
1217		if (*vp->v_hashchain == vp) {
1218			*vp->v_hashchain = vp->v_specnext;
1219		} else {
1220			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1221				if (vq->v_specnext != vp)
1222					continue;
1223				vq->v_specnext = vp->v_specnext;
1224				break;
1225			}
1226			if (vq == NULL)
1227				panic("missing bdev");
1228		}
1229		if (vp->v_flag & VALIASED) {
1230			vx = NULL;
1231			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1232				if (vq->v_rdev != vp->v_rdev ||
1233				    vq->v_type != vp->v_type)
1234					continue;
1235				if (vx)
1236					break;
1237				vx = vq;
1238			}
1239			if (vx == NULL)
1240				panic("missing alias");
1241			if (vq == NULL)
1242				vx->v_flag &= ~VALIASED;
1243			vp->v_flag &= ~VALIASED;
1244		}
1245		simple_unlock(&spechash_slock);
1246		FREE(vp->v_specinfo, M_VNODE);
1247		vp->v_specinfo = NULL;
1248	}
1249	/*
1250	 * If it is on the freelist and not already at the head,
1251	 * move it to the head of the list.
1252	 */
1253	vp->v_type = VBAD;
1254
1255	/*
1256	 * Move onto the free list, unless we were called from
1257	 * getnewvnode and we're not on any free list
1258	 */
1259	if (vp->v_usecount == 0 &&
1260	    (vp->v_bioflag & VBIOONFREELIST)) {
1261		int s;
1262
1263		simple_lock(&vnode_free_list_slock);
1264		s = splbio();
1265
1266		if (vp->v_holdcnt > 0)
1267			panic("vgonel: not clean");
1268
1269		if (TAILQ_FIRST(&vnode_free_list) != vp) {
1270			TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1271			TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1272		}
1273		splx(s);
1274		simple_unlock(&vnode_free_list_slock);
1275	}
1276}
1277
1278/*
1279 * Lookup a vnode by device number.
1280 */
1281int
1282vfinddev(dev, type, vpp)
1283	dev_t dev;
1284	enum vtype type;
1285	struct vnode **vpp;
1286{
1287	register struct vnode *vp;
1288	int rc =0;
1289
1290	simple_lock(&spechash_slock);
1291	for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1292		if (dev != vp->v_rdev || type != vp->v_type)
1293			continue;
1294		*vpp = vp;
1295		rc = 1;
1296		break;
1297	}
1298	simple_unlock(&spechash_slock);
1299	return (rc);
1300}
1301
1302/*
1303 * Revoke all the vnodes corresponding to the specified minor number
1304 * range (endpoints inclusive) of the specified major.
1305 */
1306void
1307vdevgone(maj, minl, minh, type)
1308	int maj, minl, minh;
1309	enum vtype type;
1310{
1311	struct vnode *vp;
1312	int mn;
1313
1314	for (mn = minl; mn <= minh; mn++)
1315		if (vfinddev(makedev(maj, mn), type, &vp))
1316			VOP_REVOKE(vp, REVOKEALL);
1317}
1318
1319/*
1320 * Calculate the total number of references to a special device.
1321 */
1322int
1323vcount(vp)
1324	struct vnode *vp;
1325{
1326	struct vnode *vq, *vnext;
1327	int count;
1328
1329loop:
1330	if ((vp->v_flag & VALIASED) == 0)
1331		return (vp->v_usecount);
1332	simple_lock(&spechash_slock);
1333	for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
1334		vnext = vq->v_specnext;
1335		if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
1336			continue;
1337		/*
1338		 * Alias, but not in use, so flush it out.
1339		 */
1340		if (vq->v_usecount == 0 && vq != vp) {
1341			simple_unlock(&spechash_slock);
1342			vgone(vq);
1343			goto loop;
1344		}
1345		count += vq->v_usecount;
1346	}
1347	simple_unlock(&spechash_slock);
1348	return (count);
1349}
1350
1351/*
1352 * Print out a description of a vnode.
1353 */
1354static char *typename[] =
1355   { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" };
1356
1357void
1358vprint(label, vp)
1359	char *label;
1360	register struct vnode *vp;
1361{
1362	char buf[64];
1363
1364	if (label != NULL)
1365		printf("%s: ", label);
1366	printf("type %s, usecount %u, writecount %u, holdcount %u,",
1367		typename[vp->v_type], vp->v_usecount, vp->v_writecount,
1368		vp->v_holdcnt);
1369	buf[0] = '\0';
1370	if (vp->v_flag & VROOT)
1371		strcat(buf, "|VROOT");
1372	if (vp->v_flag & VTEXT)
1373		strcat(buf, "|VTEXT");
1374	if (vp->v_flag & VSYSTEM)
1375		strcat(buf, "|VSYSTEM");
1376	if (vp->v_flag & VXLOCK)
1377		strcat(buf, "|VXLOCK");
1378	if (vp->v_flag & VXWANT)
1379		strcat(buf, "|VXWANT");
1380	if (vp->v_bioflag & VBIOWAIT)
1381		strcat(buf, "| VBIOWAIT");
1382	if (vp->v_flag & VALIASED)
1383		strcat(buf, "|VALIASED");
1384	if (buf[0] != '\0')
1385		printf(" flags (%s)", &buf[1]);
1386	if (vp->v_data == NULL) {
1387		printf("\n");
1388	} else {
1389		printf("\n\t");
1390		VOP_PRINT(vp);
1391	}
1392}
1393
1394#ifdef DEBUG
1395/*
1396 * List all of the locked vnodes in the system.
1397 * Called when debugging the kernel.
1398 */
1399void
1400printlockedvnodes()
1401{
1402	struct proc *p = curproc;
1403	register struct mount *mp, *nmp;
1404	register struct vnode *vp;
1405
1406	printf("Locked vnodes\n");
1407	simple_lock(&mountlist_slock);
1408	for (mp = CIRCLEQ_FIRST(&mountlist); mp != CIRCLEQ_END(&mountlist);
1409	    mp = nmp) {
1410		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
1411			nmp = CIRCLEQ_NEXT(mp, mnt_list);
1412			continue;
1413		}
1414		for (vp = mp->mnt_vnodelist.lh_first; vp;
1415		    vp = vp->v_mntvnodes.le_next) {
1416			if (VOP_ISLOCKED(vp))
1417				vprint((char *)0, vp);
1418		}
1419		simple_lock(&mountlist_slock);
1420		nmp = CIRCLEQ_NEXT(mp, mnt_list);
1421		vfs_unbusy(mp, p);
1422 	}
1423	simple_unlock(&mountlist_slock);
1424
1425}
1426#endif
1427
1428/*
1429 * Top level filesystem related information gathering.
1430 */
1431int
1432vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
1433	int *name;
1434	u_int namelen;
1435	void *oldp;
1436	size_t *oldlenp;
1437	void *newp;
1438	size_t newlen;
1439	struct proc *p;
1440{
1441	struct vfsconf *vfsp;
1442
1443	/* all sysctl names at this level are at least name and field */
1444	if (namelen < 2)
1445		return (ENOTDIR);		/* overloaded */
1446	if (name[0] != VFS_GENERIC) {
1447		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1448			if (vfsp->vfc_typenum == name[0])
1449				break;
1450		if (vfsp == NULL)
1451			return (EOPNOTSUPP);
1452		return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1,
1453		    oldp, oldlenp, newp, newlen, p));
1454	}
1455	switch (name[1]) {
1456	case VFS_MAXTYPENUM:
1457		return (sysctl_rdint(oldp, oldlenp, newp, maxvfsconf));
1458	case VFS_CONF:
1459		if (namelen < 3)
1460			return (ENOTDIR);	/* overloaded */
1461		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1462			if (vfsp->vfc_typenum == name[2])
1463				break;
1464		if (vfsp == NULL)
1465			return (EOPNOTSUPP);
1466		return (sysctl_rdstruct(oldp, oldlenp, newp, vfsp,
1467		    sizeof(struct vfsconf)));
1468	}
1469	return (EOPNOTSUPP);
1470}
1471
1472
1473int kinfo_vdebug = 1;
1474int kinfo_vgetfailed;
1475#define KINFO_VNODESLOP	10
1476/*
1477 * Dump vnode list (via sysctl).
1478 * Copyout address of vnode followed by vnode.
1479 */
1480/* ARGSUSED */
1481int
1482sysctl_vnode(where, sizep, p)
1483	char *where;
1484	size_t *sizep;
1485	struct proc *p;
1486{
1487	register struct mount *mp, *nmp;
1488	struct vnode *vp, *nvp;
1489	register char *bp = where, *savebp;
1490	char *ewhere;
1491	int error;
1492
1493#define VPTRSZ	sizeof (struct vnode *)
1494#define VNODESZ	sizeof (struct vnode)
1495	if (where == NULL) {
1496		*sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
1497		return (0);
1498	}
1499	ewhere = where + *sizep;
1500
1501	simple_lock(&mountlist_slock);
1502	for (mp = CIRCLEQ_FIRST(&mountlist); mp != CIRCLEQ_END(&mountlist);
1503	    mp = nmp) {
1504		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
1505			nmp = CIRCLEQ_NEXT(mp, mnt_list);
1506			continue;
1507		}
1508		savebp = bp;
1509again:
1510		for (vp = mp->mnt_vnodelist.lh_first; vp != NULL;
1511		    vp = nvp) {
1512			/*
1513			 * Check that the vp is still associated with
1514			 * this filesystem.  RACE: could have been
1515			 * recycled onto the same filesystem.
1516			 */
1517			if (vp->v_mount != mp) {
1518				simple_unlock(&mntvnode_slock);
1519				if (kinfo_vdebug)
1520					printf("kinfo: vp changed\n");
1521				bp = savebp;
1522				goto again;
1523			}
1524			nvp = vp->v_mntvnodes.le_next;
1525			if (bp + VPTRSZ + VNODESZ > ewhere) {
1526				simple_unlock(&mntvnode_slock);
1527				*sizep = bp - where;
1528				return (ENOMEM);
1529			}
1530			if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) ||
1531			   (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ)))
1532				return (error);
1533			bp += VPTRSZ + VNODESZ;
1534			simple_lock(&mntvnode_slock);
1535		}
1536
1537		simple_unlock(&mntvnode_slock);
1538		simple_lock(&mountlist_slock);
1539		nmp = CIRCLEQ_NEXT(mp, mnt_list);
1540		vfs_unbusy(mp, p);
1541	}
1542
1543	simple_unlock(&mountlist_slock);
1544
1545	*sizep = bp - where;
1546	return (0);
1547}
1548
1549/*
1550 * Check to see if a filesystem is mounted on a block device.
1551 */
1552int
1553vfs_mountedon(vp)
1554	register struct vnode *vp;
1555{
1556	register struct vnode *vq;
1557	int error = 0;
1558
1559 	if (vp->v_specmountpoint != NULL)
1560		return (EBUSY);
1561	if (vp->v_flag & VALIASED) {
1562		simple_lock(&spechash_slock);
1563		for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1564			if (vq->v_rdev != vp->v_rdev ||
1565			    vq->v_type != vp->v_type)
1566				continue;
1567			if (vq->v_specmountpoint != NULL) {
1568				error = EBUSY;
1569				break;
1570			}
1571 		}
1572		simple_unlock(&spechash_slock);
1573	}
1574	return (error);
1575}
1576
1577/*
1578 * Build hash lists of net addresses and hang them off the mount point.
1579 * Called by ufs_mount() to set up the lists of export addresses.
1580 */
1581int
1582vfs_hang_addrlist(mp, nep, argp)
1583	struct mount *mp;
1584	struct netexport *nep;
1585	struct export_args *argp;
1586{
1587	register struct netcred *np;
1588	register struct radix_node_head *rnh;
1589	register int i;
1590	struct radix_node *rn;
1591	struct sockaddr *saddr, *smask = 0;
1592	struct domain *dom;
1593	int error;
1594
1595	if (argp->ex_addrlen == 0) {
1596		if (mp->mnt_flag & MNT_DEFEXPORTED)
1597			return (EPERM);
1598		np = &nep->ne_defexported;
1599		np->netc_exflags = argp->ex_flags;
1600		np->netc_anon = argp->ex_anon;
1601		np->netc_anon.cr_ref = 1;
1602		mp->mnt_flag |= MNT_DEFEXPORTED;
1603		return (0);
1604	}
1605	if (argp->ex_addrlen > MLEN)
1606		return (EINVAL);
1607	i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
1608	np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK);
1609	bzero((caddr_t)np, i);
1610	saddr = (struct sockaddr *)(np + 1);
1611	error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen);
1612	if (error)
1613		goto out;
1614	if (saddr->sa_len > argp->ex_addrlen)
1615		saddr->sa_len = argp->ex_addrlen;
1616	if (argp->ex_masklen) {
1617		smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
1618		error = copyin(argp->ex_mask, (caddr_t)smask, argp->ex_masklen);
1619		if (error)
1620			goto out;
1621		if (smask->sa_len > argp->ex_masklen)
1622			smask->sa_len = argp->ex_masklen;
1623	}
1624	i = saddr->sa_family;
1625	if (i < 0 || i > AF_MAX) {
1626		error = EINVAL;
1627		goto out;
1628	}
1629	if ((rnh = nep->ne_rtable[i]) == 0) {
1630		/*
1631		 * Seems silly to initialize every AF when most are not
1632		 * used, do so on demand here
1633		 */
1634		for (dom = domains; dom; dom = dom->dom_next)
1635			if (dom->dom_family == i && dom->dom_rtattach) {
1636				dom->dom_rtattach((void **)&nep->ne_rtable[i],
1637					dom->dom_rtoffset);
1638				break;
1639			}
1640		if ((rnh = nep->ne_rtable[i]) == 0) {
1641			error = ENOBUFS;
1642			goto out;
1643		}
1644	}
1645	rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh,
1646		np->netc_rnodes);
1647	if (rn == 0 || np != (struct netcred *)rn) { /* already exists */
1648		error = EPERM;
1649		goto out;
1650	}
1651	np->netc_exflags = argp->ex_flags;
1652	np->netc_anon = argp->ex_anon;
1653	np->netc_anon.cr_ref = 1;
1654	return (0);
1655out:
1656	free(np, M_NETADDR);
1657	return (error);
1658}
1659
1660/* ARGSUSED */
1661int
1662vfs_free_netcred(rn, w)
1663	struct radix_node *rn;
1664	void *w;
1665{
1666	register struct radix_node_head *rnh = (struct radix_node_head *)w;
1667
1668	(*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh);
1669	free((caddr_t)rn, M_NETADDR);
1670	return (0);
1671}
1672
1673/*
1674 * Free the net address hash lists that are hanging off the mount points.
1675 */
1676void
1677vfs_free_addrlist(nep)
1678	struct netexport *nep;
1679{
1680	register int i;
1681	register struct radix_node_head *rnh;
1682
1683	for (i = 0; i <= AF_MAX; i++)
1684		if ((rnh = nep->ne_rtable[i]) != NULL) {
1685			(*rnh->rnh_walktree)(rnh, vfs_free_netcred, rnh);
1686			free((caddr_t)rnh, M_RTABLE);
1687			nep->ne_rtable[i] = 0;
1688		}
1689}
1690
1691int
1692vfs_export(mp, nep, argp)
1693	struct mount *mp;
1694	struct netexport *nep;
1695	struct export_args *argp;
1696{
1697	int error;
1698
1699	if (argp->ex_flags & MNT_DELEXPORT) {
1700		vfs_free_addrlist(nep);
1701		mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
1702	}
1703	if (argp->ex_flags & MNT_EXPORTED) {
1704		if ((error = vfs_hang_addrlist(mp, nep, argp)) != 0)
1705			return (error);
1706		mp->mnt_flag |= MNT_EXPORTED;
1707	}
1708	return (0);
1709}
1710
1711struct netcred *
1712vfs_export_lookup(mp, nep, nam)
1713	register struct mount *mp;
1714	struct netexport *nep;
1715	struct mbuf *nam;
1716{
1717	register struct netcred *np;
1718	register struct radix_node_head *rnh;
1719	struct sockaddr *saddr;
1720
1721	np = NULL;
1722	if (mp->mnt_flag & MNT_EXPORTED) {
1723		/*
1724		 * Lookup in the export list first.
1725		 */
1726		if (nam != NULL) {
1727			saddr = mtod(nam, struct sockaddr *);
1728			rnh = nep->ne_rtable[saddr->sa_family];
1729			if (rnh != NULL) {
1730				np = (struct netcred *)
1731					(*rnh->rnh_matchaddr)((caddr_t)saddr,
1732					    rnh);
1733				if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
1734					np = NULL;
1735			}
1736		}
1737		/*
1738		 * If no address match, use the default if it exists.
1739		 */
1740		if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
1741			np = &nep->ne_defexported;
1742	}
1743	return (np);
1744}
1745
1746/*
1747 * Do the usual access checking.
1748 * file_mode, uid and gid are from the vnode in question,
1749 * while acc_mode and cred are from the VOP_ACCESS parameter list
1750 */
1751int
1752vaccess(file_mode, uid, gid, acc_mode, cred)
1753	mode_t file_mode;
1754	uid_t uid;
1755	gid_t gid;
1756	mode_t acc_mode;
1757	struct ucred *cred;
1758{
1759	mode_t mask;
1760
1761	/* User id 0 always gets access. */
1762	if (cred->cr_uid == 0)
1763		return 0;
1764
1765	mask = 0;
1766
1767	/* Otherwise, check the owner. */
1768	if (cred->cr_uid == uid) {
1769		if (acc_mode & VEXEC)
1770			mask |= S_IXUSR;
1771		if (acc_mode & VREAD)
1772			mask |= S_IRUSR;
1773		if (acc_mode & VWRITE)
1774			mask |= S_IWUSR;
1775		return (file_mode & mask) == mask ? 0 : EACCES;
1776	}
1777
1778	/* Otherwise, check the groups. */
1779	if (cred->cr_gid == gid || groupmember(gid, cred)) {
1780		if (acc_mode & VEXEC)
1781			mask |= S_IXGRP;
1782		if (acc_mode & VREAD)
1783			mask |= S_IRGRP;
1784		if (acc_mode & VWRITE)
1785			mask |= S_IWGRP;
1786		return (file_mode & mask) == mask ? 0 : EACCES;
1787	}
1788
1789	/* Otherwise, check everyone else. */
1790	if (acc_mode & VEXEC)
1791		mask |= S_IXOTH;
1792	if (acc_mode & VREAD)
1793		mask |= S_IROTH;
1794	if (acc_mode & VWRITE)
1795		mask |= S_IWOTH;
1796	return (file_mode & mask) == mask ? 0 : EACCES;
1797}
1798
1799/*
1800 * Unmount all file systems.
1801 * We traverse the list in reverse order under the assumption that doing so
1802 * will avoid needing to worry about dependencies.
1803 */
1804void
1805vfs_unmountall()
1806{
1807	register struct mount *mp, *nmp;
1808	int allerror, error, again = 1;
1809
1810 retry:
1811	allerror = 0;
1812	for (mp = CIRCLEQ_LAST(&mountlist); mp != CIRCLEQ_END(&mountlist);
1813	    mp = nmp) {
1814		nmp = CIRCLEQ_PREV(mp, mnt_list);
1815		if ((error = dounmount(mp, MNT_FORCE, curproc)) != 0) {
1816			printf("unmount of %s failed with error %d\n",
1817			    mp->mnt_stat.f_mntonname, error);
1818			allerror = 1;
1819		}
1820	}
1821
1822	if (allerror) {
1823		printf("WARNING: some file systems would not unmount\n");
1824		if (again) {
1825			printf("retrying\n");
1826			again = 0;
1827			goto retry;
1828		}
1829	}
1830}
1831
1832/*
1833 * Sync and unmount file systems before shutting down.
1834 */
1835void
1836vfs_shutdown()
1837{
1838	/* XXX Should suspend scheduling. */
1839	(void) spl0();
1840
1841	printf("syncing disks... ");
1842
1843	if (panicstr == 0) {
1844		/* Sync before unmount, in case we hang on something. */
1845		sys_sync(&proc0, (void *)0, (register_t *)0);
1846
1847		/* Unmount file systems. */
1848		vfs_unmountall();
1849	}
1850
1851	if (vfs_syncwait(1))
1852		printf("giving up\n");
1853	else
1854		printf("done\n");
1855}
1856
1857/*
1858 * perform sync() operation and wait for buffers to flush.
1859 * assumtions: called w/ scheduler disabled and physical io enabled
1860 * for now called at spl0() XXX
1861 */
1862int
1863vfs_syncwait(verbose)
1864	int verbose;
1865{
1866	register struct buf *bp;
1867	int iter, nbusy, dcount, s;
1868	struct proc *p;
1869
1870	p = curproc? curproc : &proc0;
1871	sys_sync(p, (void *)0, (register_t *)0);
1872
1873	/* Wait for sync to finish. */
1874	dcount = 10000;
1875	for (iter = 0; iter < 20; iter++) {
1876		nbusy = 0;
1877		for (bp = &buf[nbuf]; --bp >= buf; ) {
1878			if ((bp->b_flags & (B_BUSY|B_INVAL|B_READ)) == B_BUSY)
1879				nbusy++;
1880			/*
1881			 * With soft updates, some buffers that are
1882			 * written will be remarked as dirty until other
1883			 * buffers are written.
1884			 */
1885			if (bp->b_flags & B_DELWRI) {
1886				s = splbio();
1887				bremfree(bp);
1888				bp->b_flags |= B_BUSY;
1889				splx(s);
1890				nbusy++;
1891				bawrite(bp);
1892				if (dcount-- <= 0) {
1893					if (verbose)
1894						printf("softdep ");
1895					return 1;
1896				}
1897			}
1898		}
1899		if (nbusy == 0)
1900			break;
1901		if (verbose)
1902			printf("%d ", nbusy);
1903		DELAY(40000 * iter);
1904	}
1905
1906	return nbusy;
1907}
1908
1909/*
1910 * posix file system related system variables.
1911 */
1912int
1913fs_posix_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
1914	int *name;
1915	u_int namelen;
1916	void *oldp;
1917	size_t *oldlenp;
1918	void *newp;
1919	size_t newlen;
1920	struct proc *p;
1921{
1922	/* all sysctl names at this level are terminal */
1923	if (namelen != 1)
1924		return (ENOTDIR);
1925
1926	switch (name[0]) {
1927	case FS_POSIX_SETUID:
1928		if (newp && securelevel > 0)
1929			return (EPERM);
1930		return(sysctl_int(oldp, oldlenp, newp, newlen, &suid_clear));
1931	default:
1932		return (EOPNOTSUPP);
1933	}
1934	/* NOTREACHED */
1935}
1936
1937/*
1938 * file system related system variables.
1939 */
1940int
1941fs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
1942	int *name;
1943	u_int namelen;
1944	void *oldp;
1945	size_t *oldlenp;
1946	void *newp;
1947	size_t newlen;
1948	struct proc *p;
1949{
1950	sysctlfn *fn;
1951
1952	switch (name[0]) {
1953	case FS_POSIX:
1954		fn = fs_posix_sysctl;
1955		break;
1956	default:
1957		return (EOPNOTSUPP);
1958	}
1959	return (*fn)(name + 1, namelen - 1, oldp, oldlenp, newp, newlen, p);
1960}
1961
1962
1963/*
1964 * Routines dealing with vnodes and buffers
1965 */
1966
1967/*
1968 * Wait for all outstanding I/Os to complete
1969 *
1970 * Manipulates v_numoutput. Must be called at splbio()
1971 */
1972int
1973vwaitforio(vp, slpflag, wmesg, timeo)
1974	struct vnode *vp;
1975	int slpflag, timeo;
1976	char *wmesg;
1977{
1978	int error = 0;
1979
1980	while (vp->v_numoutput) {
1981		vp->v_bioflag |= VBIOWAIT;
1982		error = tsleep((caddr_t)&vp->v_numoutput,
1983		    slpflag | (PRIBIO + 1), wmesg, timeo);
1984		if (error)
1985			break;
1986	}
1987
1988	return (error);
1989}
1990
1991
1992/*
1993 * Update outstanding I/O count and do wakeup if requested.
1994 *
1995 * Manipulates v_numoutput. Must be called at splbio()
1996 */
1997void
1998vwakeup(vp)
1999	struct vnode *vp;
2000{
2001	if (vp != NULL) {
2002		if (vp->v_numoutput-- == 0)
2003			panic("vwakeup: neg numoutput");
2004		if ((vp->v_bioflag & VBIOWAIT) && vp->v_numoutput == 0) {
2005			vp->v_bioflag &= ~VBIOWAIT;
2006			wakeup((caddr_t)&vp->v_numoutput);
2007		}
2008	}
2009}
2010
2011/*
2012 * Flush out and invalidate all buffers associated with a vnode.
2013 * Called with the underlying object locked.
2014 */
2015int
2016vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
2017	register struct vnode *vp;
2018	int flags;
2019	struct ucred *cred;
2020	struct proc *p;
2021	int slpflag, slptimeo;
2022{
2023	struct uvm_object *uobj = &vp->v_uobj;
2024	struct buf *bp;
2025	struct buf *nbp, *blist;
2026	int s, error, rv;
2027	int flushflags = PGO_ALLPAGES|PGO_FREE|PGO_SYNCIO|
2028	    (flags & V_SAVE ? PGO_CLEANIT : 0);
2029
2030	/* XXXUBC this doesn't look at flags or slp* */
2031	if (vp->v_type == VREG) {
2032		simple_lock(&uobj->vmobjlock);
2033		rv = (uobj->pgops->pgo_flush)(uobj, 0, 0, flushflags);
2034		simple_unlock(&uobj->vmobjlock);
2035		if (!rv) {
2036			return EIO;
2037		}
2038	}
2039
2040	if (flags & V_SAVE) {
2041		s = splbio();
2042		vwaitforio(vp, 0, "vinvalbuf", 0);
2043		if (vp->v_dirtyblkhd.lh_first != NULL) {
2044			splx(s);
2045			if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) != 0)
2046				return (error);
2047			s = splbio();
2048			if (vp->v_numoutput > 0 ||
2049			    vp->v_dirtyblkhd.lh_first != NULL)
2050				panic("vinvalbuf: dirty bufs");
2051		}
2052		splx(s);
2053	}
2054loop:
2055	s = splbio();
2056	for (;;) {
2057		if ((blist = vp->v_cleanblkhd.lh_first) &&
2058		    (flags & V_SAVEMETA))
2059			while (blist && blist->b_lblkno < 0)
2060				blist = blist->b_vnbufs.le_next;
2061		if (!blist && (blist = vp->v_dirtyblkhd.lh_first) &&
2062		    (flags & V_SAVEMETA))
2063			while (blist && blist->b_lblkno < 0)
2064				blist = blist->b_vnbufs.le_next;
2065		if (!blist)
2066			break;
2067
2068		for (bp = blist; bp; bp = nbp) {
2069			nbp = bp->b_vnbufs.le_next;
2070			if (flags & V_SAVEMETA && bp->b_lblkno < 0)
2071				continue;
2072			if (bp->b_flags & B_BUSY) {
2073				bp->b_flags |= B_WANTED;
2074				error = tsleep((caddr_t)bp,
2075					slpflag | (PRIBIO + 1), "vinvalbuf",
2076					slptimeo);
2077				if (error) {
2078					splx(s);
2079					return (error);
2080				}
2081				break;
2082			}
2083			bremfree(bp);
2084			bp->b_flags |= B_BUSY;
2085			/*
2086			 * XXX Since there are no node locks for NFS, I believe
2087			 * there is a slight chance that a delayed write will
2088			 * occur while sleeping just above, so check for it.
2089			 */
2090			if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
2091				splx(s);
2092				(void) VOP_BWRITE(bp);
2093				goto loop;
2094			}
2095			bp->b_flags |= B_INVAL;
2096			brelse(bp);
2097		}
2098	}
2099	if (!(flags & V_SAVEMETA) &&
2100	    (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first))
2101		panic("vinvalbuf: flush failed");
2102	splx(s);
2103	return (0);
2104}
2105
2106void
2107vflushbuf(vp, sync)
2108	struct vnode *vp;
2109	int sync;
2110{
2111	struct uvm_object *uobj = &vp->v_uobj;
2112	struct buf *bp, *nbp;
2113	int s;
2114
2115	if (vp->v_type == VREG) {
2116		int flags = PGO_CLEANIT|PGO_ALLPAGES| (sync ? PGO_SYNCIO : 0);
2117
2118		simple_lock(&uobj->vmobjlock);
2119		(uobj->pgops->pgo_flush)(uobj, 0, 0, flags);
2120		simple_unlock(&uobj->vmobjlock);
2121	}
2122
2123loop:
2124	s = splbio();
2125	for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
2126		nbp = bp->b_vnbufs.le_next;
2127		if ((bp->b_flags & B_BUSY))
2128			continue;
2129		if ((bp->b_flags & B_DELWRI) == 0)
2130			panic("vflushbuf: not dirty");
2131		bremfree(bp);
2132		bp->b_flags |= B_BUSY;
2133		splx(s);
2134		/*
2135		 * Wait for I/O associated with indirect blocks to complete,
2136		 * since there is no way to quickly wait for them below.
2137		 */
2138		if (bp->b_vp == vp || sync == 0)
2139			(void) bawrite(bp);
2140		else
2141			(void) bwrite(bp);
2142		goto loop;
2143	}
2144	if (sync == 0) {
2145		splx(s);
2146		return;
2147	}
2148	vwaitforio(vp, 0, "vflushbuf", 0);
2149	if (vp->v_dirtyblkhd.lh_first != NULL) {
2150		splx(s);
2151		vprint("vflushbuf: dirty", vp);
2152		goto loop;
2153	}
2154	splx(s);
2155}
2156
2157/*
2158 * Associate a buffer with a vnode.
2159 *
2160 * Manipulates buffer vnode queues. Must be called at splbio().
2161 */
2162void
2163bgetvp(vp, bp)
2164	register struct vnode *vp;
2165	register struct buf *bp;
2166{
2167
2168	if (bp->b_vp)
2169		panic("bgetvp: not free");
2170	vhold(vp);
2171	bp->b_vp = vp;
2172	if (vp->v_type == VBLK || vp->v_type == VCHR)
2173		bp->b_dev = vp->v_rdev;
2174	else
2175		bp->b_dev = NODEV;
2176	/*
2177	 * Insert onto list for new vnode.
2178	 */
2179	bufinsvn(bp, &vp->v_cleanblkhd);
2180}
2181
2182/*
2183 * Disassociate a buffer from a vnode.
2184 *
2185 * Manipulates vnode buffer queues. Must be called at splbio().
2186 */
2187void
2188brelvp(struct buf *bp)
2189{
2190	struct vnode *vp;
2191
2192	if ((vp = bp->b_vp) == NULL)
2193		panic("brelvp: NULL");
2194
2195	/*
2196	 * Delete from old vnode list, if on one.
2197	 */
2198	if (bp->b_vnbufs.le_next != NOLIST)
2199		bufremvn(bp);
2200	if (TAILQ_EMPTY(&vp->v_uobj.memq) &&
2201	    (vp->v_bioflag & VBIOONSYNCLIST) &&
2202	    LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
2203		vp->v_bioflag &= ~VBIOONSYNCLIST;
2204		LIST_REMOVE(vp, v_synclist);
2205	}
2206	bp->b_vp = NULL;
2207
2208	vholdrele(vp);
2209}
2210
2211/*
2212 * Replaces the current vnode associated with the buffer, if any
2213 * with a new vnode.
2214 *
2215 * If an output I/O is pending on the buffer, the old vnode is
2216 * I/O count is adjusted.
2217 *
2218 * Ignores vnode buffer queues. Must be called at splbio().
2219 */
2220void
2221buf_replacevnode(bp, newvp)
2222	struct buf *bp;
2223	struct vnode *newvp;
2224{
2225	struct vnode *oldvp = bp->b_vp;
2226
2227	if (oldvp)
2228		brelvp(bp);
2229
2230	if ((bp->b_flags & (B_READ | B_DONE)) == 0) {
2231		newvp->v_numoutput++;	/* put it on swapdev */
2232		vwakeup(oldvp);
2233	}
2234
2235	bgetvp(newvp, bp);
2236	bufremvn(bp);
2237}
2238
2239/*
2240 * Used to assign buffers to the appropriate clean or dirty list on
2241 * the vnode and to add newly dirty vnodes to the appropriate
2242 * filesystem syncer list.
2243 *
2244 * Manipulates vnode buffer queues. Must be called at splbio().
2245 */
2246void
2247reassignbuf(bp)
2248	struct buf *bp;
2249{
2250	struct buflists *listheadp;
2251	int delay;
2252	struct vnode *vp = bp->b_vp;
2253
2254	/*
2255	 * Delete from old vnode list, if on one.
2256	 */
2257	if (bp->b_vnbufs.le_next != NOLIST)
2258		bufremvn(bp);
2259	/*
2260	 * If dirty, put on list of dirty buffers;
2261	 * otherwise insert onto list of clean buffers.
2262	 */
2263	if ((bp->b_flags & B_DELWRI) == 0) {
2264		listheadp = &vp->v_cleanblkhd;
2265		if (TAILQ_EMPTY(&vp->v_uobj.memq) &&
2266		    (vp->v_bioflag & VBIOONSYNCLIST) &&
2267		    LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
2268			vp->v_bioflag &= ~VBIOONSYNCLIST;
2269			LIST_REMOVE(vp, v_synclist);
2270		}
2271	} else {
2272		listheadp = &vp->v_dirtyblkhd;
2273		if ((vp->v_bioflag & VBIOONSYNCLIST) == 0) {
2274			switch (vp->v_type) {
2275			case VDIR:
2276				delay = syncdelay / 2;
2277				break;
2278			case VBLK:
2279				if (vp->v_specmountpoint != NULL) {
2280					delay = syncdelay / 3;
2281					break;
2282				}
2283				/* fall through */
2284			default:
2285				delay = syncdelay;
2286			}
2287			vn_syncer_add_to_worklist(vp, delay);
2288		}
2289	}
2290	bufinsvn(bp, listheadp);
2291}
2292
2293int
2294vfs_register(vfs)
2295	struct vfsconf *vfs;
2296{
2297	struct vfsconf *vfsp;
2298	struct vfsconf **vfspp;
2299
2300#ifdef DIAGNOSTIC
2301	/* Paranoia? */
2302	if (vfs->vfc_refcount != 0)
2303		printf("vfs_register called with vfc_refcount > 0\n");
2304#endif
2305
2306	/* Check if filesystem already known */
2307	for (vfspp = &vfsconf, vfsp = vfsconf; vfsp;
2308	    vfspp = &vfsp->vfc_next, vfsp = vfsp->vfc_next)
2309		if (strcmp(vfsp->vfc_name, vfs->vfc_name) == 0)
2310			return (EEXIST);
2311
2312	if (vfs->vfc_typenum > maxvfsconf)
2313		maxvfsconf = vfs->vfc_typenum;
2314
2315	vfs->vfc_next = NULL;
2316
2317	/* Add to the end of the list */
2318	*vfspp = vfs;
2319
2320	/* Call vfs_init() */
2321	if (vfs->vfc_vfsops->vfs_init)
2322		(*(vfs->vfc_vfsops->vfs_init))(vfs);
2323
2324	return 0;
2325}
2326
2327int
2328vfs_unregister(vfs)
2329	struct vfsconf *vfs;
2330{
2331	struct vfsconf *vfsp;
2332	struct vfsconf **vfspp;
2333	int maxtypenum;
2334
2335	/* Find our vfsconf struct */
2336	for (vfspp = &vfsconf, vfsp = vfsconf; vfsp;
2337	    vfspp = &vfsp->vfc_next, vfsp = vfsp->vfc_next) {
2338		if (strcmp(vfsp->vfc_name, vfs->vfc_name) == 0)
2339			break;
2340	}
2341
2342	if (!vfsp)			/* Not found */
2343		return (ENOENT);
2344
2345	if (vfsp->vfc_refcount)		/* In use */
2346		return (EBUSY);
2347
2348	/* Remove from list and free */
2349	*vfspp = vfsp->vfc_next;
2350
2351	maxtypenum = 0;
2352
2353	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
2354		if (vfsp->vfc_typenum > maxtypenum)
2355			maxtypenum = vfsp->vfc_typenum;
2356
2357	maxvfsconf = maxtypenum;
2358	return 0;
2359}
2360
2361/*
2362 * Check if vnode represents a disk device
2363 */
2364int
2365vn_isdisk(vp, errp)
2366	struct vnode *vp;
2367	int *errp;
2368{
2369	if (vp->v_type != VBLK && vp->v_type != VCHR)
2370		return (0);
2371
2372	return (1);
2373}
2374