vfs_subr.c revision 1.238
1/*	$OpenBSD: vfs_subr.c,v 1.238 2015/12/05 10:11:53 tedu Exp $	*/
2/*	$NetBSD: vfs_subr.c,v 1.53 1996/04/22 01:39:13 christos Exp $	*/
3
4/*
5 * Copyright (c) 1989, 1993
6 *	The Regents of the University of California.  All rights reserved.
7 * (c) UNIX System Laboratories, Inc.
8 * All or some portions of this file are derived from material licensed
9 * to the University of California by American Telephone and Telegraph
10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11 * the permission of UNIX System Laboratories, Inc.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 *    notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 *    notice, this list of conditions and the following disclaimer in the
20 *    documentation and/or other materials provided with the distribution.
21 * 3. Neither the name of the University nor the names of its contributors
22 *    may be used to endorse or promote products derived from this software
23 *    without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 *	@(#)vfs_subr.c	8.13 (Berkeley) 4/18/94
38 */
39
40/*
41 * External virtual filesystem routines
42 */
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/proc.h>
47#include <sys/sysctl.h>
48#include <sys/mount.h>
49#include <sys/time.h>
50#include <sys/fcntl.h>
51#include <sys/kernel.h>
52#include <sys/conf.h>
53#include <sys/vnode.h>
54#include <sys/lock.h>
55#include <sys/stat.h>
56#include <sys/acct.h>
57#include <sys/namei.h>
58#include <sys/ucred.h>
59#include <sys/buf.h>
60#include <sys/errno.h>
61#include <sys/malloc.h>
62#include <sys/mbuf.h>
63#include <sys/syscallargs.h>
64#include <sys/pool.h>
65#include <sys/tree.h>
66#include <sys/specdev.h>
67
68#include <netinet/in.h>
69
70#include <uvm/uvm_extern.h>
71#include <uvm/uvm_vnode.h>
72
73#include "softraid.h"
74
75void sr_shutdown(void);
76
77enum vtype iftovt_tab[16] = {
78	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
79	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
80};
81
82int	vttoif_tab[9] = {
83	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
84	S_IFSOCK, S_IFIFO, S_IFMT,
85};
86
87int doforce = 1;		/* 1 => permit forcible unmounting */
88int prtactive = 0;		/* 1 => print out reclaim of active vnodes */
89int suid_clear = 1;		/* 1 => clear SUID / SGID on owner change */
90
91/*
92 * Insq/Remq for the vnode usage lists.
93 */
94#define	bufinsvn(bp, dp)	LIST_INSERT_HEAD(dp, bp, b_vnbufs)
95#define	bufremvn(bp) {							\
96	LIST_REMOVE(bp, b_vnbufs);					\
97	LIST_NEXT(bp, b_vnbufs) = NOLIST;				\
98}
99
100struct freelst vnode_hold_list;	/* list of vnodes referencing buffers */
101struct freelst vnode_free_list;	/* vnode free list */
102
103struct mntlist mountlist;	/* mounted filesystem list */
104
105void	vclean(struct vnode *, int, struct proc *);
106
107void insmntque(struct vnode *, struct mount *);
108int getdevvp(dev_t, struct vnode **, enum vtype);
109
110int vfs_hang_addrlist(struct mount *, struct netexport *,
111				  struct export_args *);
112int vfs_free_netcred(struct radix_node *, void *, u_int);
113void vfs_free_addrlist(struct netexport *);
114void vputonfreelist(struct vnode *);
115
116int vflush_vnode(struct vnode *, void *);
117int maxvnodes;
118
119#ifdef DEBUG
120void printlockedvnodes(void);
121#endif
122
123struct pool vnode_pool;
124struct pool uvm_vnode_pool;
125
126static int rb_buf_compare(struct buf *b1, struct buf *b2);
127RB_GENERATE(buf_rb_bufs, buf, b_rbbufs, rb_buf_compare);
128
129static int
130rb_buf_compare(struct buf *b1, struct buf *b2)
131{
132	if (b1->b_lblkno < b2->b_lblkno)
133		return(-1);
134	if (b1->b_lblkno > b2->b_lblkno)
135		return(1);
136	return(0);
137}
138
139/*
140 * Initialize the vnode management data structures.
141 */
142void
143vntblinit(void)
144{
145	/* buffer cache may need a vnode for each buffer */
146	maxvnodes = 2 * initialvnodes;
147	pool_init(&vnode_pool, sizeof(struct vnode), 0, 0, PR_WAITOK,
148	    "vnodes", NULL);
149	pool_init(&uvm_vnode_pool, sizeof(struct uvm_vnode), 0, 0, PR_WAITOK,
150	    "uvmvnodes", NULL);
151	TAILQ_INIT(&vnode_hold_list);
152	TAILQ_INIT(&vnode_free_list);
153	TAILQ_INIT(&mountlist);
154	/*
155	 * Initialize the filesystem syncer.
156	 */
157	vn_initialize_syncerd();
158
159	rn_init(sizeof(struct sockaddr_in));
160}
161
162/*
163 * Mark a mount point as busy. Used to synchronize access and to delay
164 * unmounting.
165 *
166 * Default behaviour is to attempt getting a READ lock and in case of an
167 * ongoing unmount, to wait for it to finish and then return failure.
168 */
169int
170vfs_busy(struct mount *mp, int flags)
171{
172	int rwflags = 0;
173
174	/* new mountpoints need their lock initialised */
175	if (mp->mnt_lock.rwl_name == NULL)
176		rw_init(&mp->mnt_lock, "vfslock");
177
178	if (flags & VB_WRITE)
179		rwflags |= RW_WRITE;
180	else
181		rwflags |= RW_READ;
182
183	if (flags & VB_WAIT)
184		rwflags |= RW_SLEEPFAIL;
185	else
186		rwflags |= RW_NOSLEEP;
187
188	if (rw_enter(&mp->mnt_lock, rwflags))
189		return (EBUSY);
190
191	return (0);
192}
193
194/*
195 * Free a busy file system
196 */
197void
198vfs_unbusy(struct mount *mp)
199{
200	rw_exit(&mp->mnt_lock);
201}
202
203int
204vfs_isbusy(struct mount *mp)
205{
206	if (RWLOCK_OWNER(&mp->mnt_lock) > 0)
207		return (1);
208	else
209		return (0);
210}
211
212/*
213 * Lookup a filesystem type, and if found allocate and initialize
214 * a mount structure for it.
215 *
216 * Devname is usually updated by mount(8) after booting.
217 */
218int
219vfs_rootmountalloc(char *fstypename, char *devname, struct mount **mpp)
220{
221	struct vfsconf *vfsp;
222	struct mount *mp;
223
224	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
225		if (!strcmp(vfsp->vfc_name, fstypename))
226			break;
227	if (vfsp == NULL)
228		return (ENODEV);
229	mp = malloc(sizeof(*mp), M_MOUNT, M_WAITOK|M_ZERO);
230	(void)vfs_busy(mp, VB_READ|VB_NOWAIT);
231	LIST_INIT(&mp->mnt_vnodelist);
232	mp->mnt_vfc = vfsp;
233	mp->mnt_op = vfsp->vfc_vfsops;
234	mp->mnt_flag = MNT_RDONLY;
235	mp->mnt_vnodecovered = NULLVP;
236	vfsp->vfc_refcount++;
237	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
238	strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
239	mp->mnt_stat.f_mntonname[0] = '/';
240	copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN, 0);
241	copystr(devname, mp->mnt_stat.f_mntfromspec, MNAMELEN, 0);
242	*mpp = mp;
243 	return (0);
244 }
245
246/*
247 * Lookup a mount point by filesystem identifier.
248 */
249struct mount *
250vfs_getvfs(fsid_t *fsid)
251{
252	struct mount *mp;
253
254	TAILQ_FOREACH(mp, &mountlist, mnt_list) {
255		if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
256		    mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
257			return (mp);
258		}
259	}
260
261	return (NULL);
262}
263
264
265/*
266 * Get a new unique fsid
267 */
268void
269vfs_getnewfsid(struct mount *mp)
270{
271	static u_short xxxfs_mntid;
272
273	fsid_t tfsid;
274	int mtype;
275
276	mtype = mp->mnt_vfc->vfc_typenum;
277	mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
278	mp->mnt_stat.f_fsid.val[1] = mtype;
279	if (xxxfs_mntid == 0)
280		++xxxfs_mntid;
281	tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
282	tfsid.val[1] = mtype;
283	if (!TAILQ_EMPTY(&mountlist)) {
284		while (vfs_getvfs(&tfsid)) {
285			tfsid.val[0]++;
286			xxxfs_mntid++;
287		}
288	}
289	mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
290}
291
292/*
293 * Set vnode attributes to VNOVAL
294 */
295void
296vattr_null(struct vattr *vap)
297{
298
299	vap->va_type = VNON;
300	/*
301	 * Don't get fancy: u_quad_t = u_int = VNOVAL leaves the u_quad_t
302	 * with 2^31-1 instead of 2^64-1.  Just write'm out and let
303	 * the compiler do its job.
304	 */
305	vap->va_mode = VNOVAL;
306	vap->va_nlink = VNOVAL;
307	vap->va_uid = VNOVAL;
308	vap->va_gid = VNOVAL;
309	vap->va_fsid = VNOVAL;
310	vap->va_fileid = VNOVAL;
311	vap->va_size = VNOVAL;
312	vap->va_blocksize = VNOVAL;
313	vap->va_atime.tv_sec = VNOVAL;
314	vap->va_atime.tv_nsec = VNOVAL;
315	vap->va_mtime.tv_sec = VNOVAL;
316	vap->va_mtime.tv_nsec = VNOVAL;
317	vap->va_ctime.tv_sec = VNOVAL;
318	vap->va_ctime.tv_nsec = VNOVAL;
319	vap->va_gen = VNOVAL;
320	vap->va_flags = VNOVAL;
321	vap->va_rdev = VNOVAL;
322	vap->va_bytes = VNOVAL;
323	vap->va_filerev = VNOVAL;
324	vap->va_vaflags = 0;
325}
326
327/*
328 * Routines having to do with the management of the vnode table.
329 */
330long numvnodes;
331
332/*
333 * Return the next vnode from the free list.
334 */
335int
336getnewvnode(enum vtagtype tag, struct mount *mp, struct vops *vops,
337    struct vnode **vpp)
338{
339	struct proc *p = curproc;
340	struct freelst *listhd;
341	static int toggle;
342	struct vnode *vp;
343	int s;
344
345	/*
346	 * allow maxvnodes to increase if the buffer cache itself
347	 * is big enough to justify it. (we don't shrink it ever)
348	 */
349	maxvnodes = maxvnodes < bcstats.numbufs ? bcstats.numbufs
350	    : maxvnodes;
351
352	/*
353	 * We must choose whether to allocate a new vnode or recycle an
354	 * existing one. The criterion for allocating a new one is that
355	 * the total number of vnodes is less than the number desired or
356	 * there are no vnodes on either free list. Generally we only
357	 * want to recycle vnodes that have no buffers associated with
358	 * them, so we look first on the vnode_free_list. If it is empty,
359	 * we next consider vnodes with referencing buffers on the
360	 * vnode_hold_list. The toggle ensures that half the time we
361	 * will use a buffer from the vnode_hold_list, and half the time
362	 * we will allocate a new one unless the list has grown to twice
363	 * the desired size. We are reticent to recycle vnodes from the
364	 * vnode_hold_list because we will lose the identity of all its
365	 * referencing buffers.
366	 */
367	toggle ^= 1;
368	if (numvnodes / 2 > maxvnodes)
369		toggle = 0;
370
371	s = splbio();
372	if ((numvnodes < maxvnodes) ||
373	    ((TAILQ_FIRST(listhd = &vnode_free_list) == NULL) &&
374	    ((TAILQ_FIRST(listhd = &vnode_hold_list) == NULL) || toggle))) {
375		splx(s);
376		vp = pool_get(&vnode_pool, PR_WAITOK | PR_ZERO);
377		vp->v_uvm = pool_get(&uvm_vnode_pool, PR_WAITOK | PR_ZERO);
378		vp->v_uvm->u_vnode = vp;
379		RB_INIT(&vp->v_bufs_tree);
380		RB_INIT(&vp->v_nc_tree);
381		TAILQ_INIT(&vp->v_cache_dst);
382		numvnodes++;
383	} else {
384		for (vp = TAILQ_FIRST(listhd); vp != NULLVP;
385		    vp = TAILQ_NEXT(vp, v_freelist)) {
386			if (VOP_ISLOCKED(vp) == 0)
387				break;
388		}
389		/*
390		 * Unless this is a bad time of the month, at most
391		 * the first NCPUS items on the free list are
392		 * locked, so this is close enough to being empty.
393		 */
394		if (vp == NULL) {
395			splx(s);
396			tablefull("vnode");
397			*vpp = 0;
398			return (ENFILE);
399		}
400
401#ifdef DIAGNOSTIC
402		if (vp->v_usecount) {
403			vprint("free vnode", vp);
404			panic("free vnode isn't");
405		}
406#endif
407
408		TAILQ_REMOVE(listhd, vp, v_freelist);
409		vp->v_bioflag &= ~VBIOONFREELIST;
410		splx(s);
411
412		if (vp->v_type != VBAD)
413			vgonel(vp, p);
414#ifdef DIAGNOSTIC
415		if (vp->v_data) {
416			vprint("cleaned vnode", vp);
417			panic("cleaned vnode isn't");
418		}
419		s = splbio();
420		if (vp->v_numoutput)
421			panic("Clean vnode has pending I/O's");
422		splx(s);
423#endif
424		vp->v_flag = 0;
425		vp->v_socket = 0;
426	}
427	cache_purge(vp);
428	vp->v_type = VNON;
429	vp->v_tag = tag;
430	vp->v_op = vops;
431	insmntque(vp, mp);
432	*vpp = vp;
433	vp->v_usecount = 1;
434	vp->v_data = 0;
435	return (0);
436}
437
438/*
439 * Move a vnode from one mount queue to another.
440 */
441void
442insmntque(struct vnode *vp, struct mount *mp)
443{
444	/*
445	 * Delete from old mount point vnode list, if on one.
446	 */
447	if (vp->v_mount != NULL)
448		LIST_REMOVE(vp, v_mntvnodes);
449	/*
450	 * Insert into list of vnodes for the new mount point, if available.
451	 */
452	if ((vp->v_mount = mp) != NULL)
453		LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
454}
455
456/*
457 * Create a vnode for a block device.
458 * Used for root filesystem, argdev, and swap areas.
459 * Also used for memory file system special devices.
460 */
461int
462bdevvp(dev_t dev, struct vnode **vpp)
463{
464	return (getdevvp(dev, vpp, VBLK));
465}
466
467/*
468 * Create a vnode for a character device.
469 * Used for console handling.
470 */
471int
472cdevvp(dev_t dev, struct vnode **vpp)
473{
474	return (getdevvp(dev, vpp, VCHR));
475}
476
477/*
478 * Create a vnode for a device.
479 * Used by bdevvp (block device) for root file system etc.,
480 * and by cdevvp (character device) for console.
481 */
482int
483getdevvp(dev_t dev, struct vnode **vpp, enum vtype type)
484{
485	struct vnode *vp;
486	struct vnode *nvp;
487	int error;
488
489	if (dev == NODEV) {
490		*vpp = NULLVP;
491		return (0);
492	}
493	error = getnewvnode(VT_NON, NULL, &spec_vops, &nvp);
494	if (error) {
495		*vpp = NULLVP;
496		return (error);
497	}
498	vp = nvp;
499	vp->v_type = type;
500	if ((nvp = checkalias(vp, dev, NULL)) != 0) {
501		vput(vp);
502		vp = nvp;
503	}
504	if (vp->v_type == VCHR && cdevsw[major(vp->v_rdev)].d_type == D_TTY)
505		vp->v_flag |= VISTTY;
506	*vpp = vp;
507	return (0);
508}
509
510/*
511 * Check to see if the new vnode represents a special device
512 * for which we already have a vnode (either because of
513 * bdevvp() or because of a different vnode representing
514 * the same block device). If such an alias exists, deallocate
515 * the existing contents and return the aliased vnode. The
516 * caller is responsible for filling it with its new contents.
517 */
518struct vnode *
519checkalias(struct vnode *nvp, dev_t nvp_rdev, struct mount *mp)
520{
521	struct proc *p = curproc;
522	struct vnode *vp;
523	struct vnode **vpp;
524
525	if (nvp->v_type != VBLK && nvp->v_type != VCHR)
526		return (NULLVP);
527
528	vpp = &speclisth[SPECHASH(nvp_rdev)];
529loop:
530	for (vp = *vpp; vp; vp = vp->v_specnext) {
531		if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) {
532			continue;
533		}
534		/*
535		 * Alias, but not in use, so flush it out.
536		 */
537		if (vp->v_usecount == 0) {
538			vgonel(vp, p);
539			goto loop;
540		}
541		if (vget(vp, LK_EXCLUSIVE, p)) {
542			goto loop;
543		}
544		break;
545	}
546
547	/*
548	 * Common case is actually in the if statement
549	 */
550	if (vp == NULL || !(vp->v_tag == VT_NON && vp->v_type == VBLK)) {
551		nvp->v_specinfo = malloc(sizeof(struct specinfo), M_VNODE,
552			M_WAITOK);
553		nvp->v_rdev = nvp_rdev;
554		nvp->v_hashchain = vpp;
555		nvp->v_specnext = *vpp;
556		nvp->v_specmountpoint = NULL;
557		nvp->v_speclockf = NULL;
558		memset(nvp->v_specbitmap, 0, sizeof(nvp->v_specbitmap));
559		*vpp = nvp;
560		if (vp != NULLVP) {
561			nvp->v_flag |= VALIASED;
562			vp->v_flag |= VALIASED;
563			vput(vp);
564		}
565		return (NULLVP);
566	}
567
568	/*
569	 * This code is the uncommon case. It is called in case
570	 * we found an alias that was VT_NON && vtype of VBLK
571	 * This means we found a block device that was created
572	 * using bdevvp.
573	 * An example of such a vnode is the root partition device vnode
574	 * created in ffs_mountroot.
575	 *
576	 * The vnodes created by bdevvp should not be aliased (why?).
577	 */
578
579	VOP_UNLOCK(vp, 0, p);
580	vclean(vp, 0, p);
581	vp->v_op = nvp->v_op;
582	vp->v_tag = nvp->v_tag;
583	nvp->v_type = VNON;
584	insmntque(vp, mp);
585	return (vp);
586}
587
588/*
589 * Grab a particular vnode from the free list, increment its
590 * reference count and lock it. If the vnode lock bit is set,
591 * the vnode is being eliminated in vgone. In that case, we
592 * cannot grab it, so the process is awakened when the
593 * transition is completed, and an error code is returned to
594 * indicate that the vnode is no longer usable, possibly
595 * having been changed to a new file system type.
596 */
597int
598vget(struct vnode *vp, int flags, struct proc *p)
599{
600	int error, s, onfreelist;
601
602	/*
603	 * If the vnode is in the process of being cleaned out for
604	 * another use, we wait for the cleaning to finish and then
605	 * return failure. Cleaning is determined by checking that
606	 * the VXLOCK flag is set.
607	 */
608
609	if (vp->v_flag & VXLOCK) {
610		if (flags & LK_NOWAIT) {
611			return (EBUSY);
612		}
613
614		vp->v_flag |= VXWANT;
615		tsleep(vp, PINOD, "vget", 0);
616		return (ENOENT);
617	}
618
619	onfreelist = vp->v_bioflag & VBIOONFREELIST;
620	if (vp->v_usecount == 0 && onfreelist) {
621		s = splbio();
622		if (vp->v_holdcnt > 0)
623			TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
624		else
625			TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
626		vp->v_bioflag &= ~VBIOONFREELIST;
627		splx(s);
628	}
629
630 	vp->v_usecount++;
631	if (flags & LK_TYPE_MASK) {
632		if ((error = vn_lock(vp, flags, p)) != 0) {
633			vp->v_usecount--;
634			if (vp->v_usecount == 0 && onfreelist)
635				vputonfreelist(vp);
636		}
637		return (error);
638	}
639
640	return (0);
641}
642
643
644/* Vnode reference. */
645void
646vref(struct vnode *vp)
647{
648#ifdef DIAGNOSTIC
649	if (vp->v_usecount == 0)
650		panic("vref used where vget required");
651	if (vp->v_type == VNON)
652		panic("vref on a VNON vnode");
653#endif
654	vp->v_usecount++;
655}
656
657void
658vputonfreelist(struct vnode *vp)
659{
660	int s;
661	struct freelst *lst;
662
663	s = splbio();
664#ifdef DIAGNOSTIC
665	if (vp->v_usecount != 0)
666		panic("Use count is not zero!");
667
668	if (vp->v_bioflag & VBIOONFREELIST) {
669		vprint("vnode already on free list: ", vp);
670		panic("vnode already on free list");
671	}
672#endif
673
674	vp->v_bioflag |= VBIOONFREELIST;
675
676	if (vp->v_holdcnt > 0)
677		lst = &vnode_hold_list;
678	else
679		lst = &vnode_free_list;
680
681	if (vp->v_type == VBAD)
682		TAILQ_INSERT_HEAD(lst, vp, v_freelist);
683	else
684		TAILQ_INSERT_TAIL(lst, vp, v_freelist);
685
686	splx(s);
687}
688
689/*
690 * vput(), just unlock and vrele()
691 */
692void
693vput(struct vnode *vp)
694{
695	struct proc *p = curproc;
696
697#ifdef DIAGNOSTIC
698	if (vp == NULL)
699		panic("vput: null vp");
700#endif
701
702#ifdef DIAGNOSTIC
703	if (vp->v_usecount == 0) {
704		vprint("vput: bad ref count", vp);
705		panic("vput: ref cnt");
706	}
707#endif
708	vp->v_usecount--;
709	if (vp->v_usecount > 0) {
710		VOP_UNLOCK(vp, 0, p);
711		return;
712	}
713
714#ifdef DIAGNOSTIC
715	if (vp->v_writecount != 0) {
716		vprint("vput: bad writecount", vp);
717		panic("vput: v_writecount != 0");
718	}
719#endif
720
721	VOP_INACTIVE(vp, p);
722
723	if (vp->v_usecount == 0 && !(vp->v_bioflag & VBIOONFREELIST))
724		vputonfreelist(vp);
725}
726
727/*
728 * Vnode release - use for active VNODES.
729 * If count drops to zero, call inactive routine and return to freelist.
730 * Returns 0 if it did not sleep.
731 */
732int
733vrele(struct vnode *vp)
734{
735	struct proc *p = curproc;
736
737#ifdef DIAGNOSTIC
738	if (vp == NULL)
739		panic("vrele: null vp");
740#endif
741#ifdef DIAGNOSTIC
742	if (vp->v_usecount == 0) {
743		vprint("vrele: bad ref count", vp);
744		panic("vrele: ref cnt");
745	}
746#endif
747	vp->v_usecount--;
748	if (vp->v_usecount > 0) {
749		return (0);
750	}
751
752#ifdef DIAGNOSTIC
753	if (vp->v_writecount != 0) {
754		vprint("vrele: bad writecount", vp);
755		panic("vrele: v_writecount != 0");
756	}
757#endif
758
759	if (vn_lock(vp, LK_EXCLUSIVE, p)) {
760#ifdef DIAGNOSTIC
761		vprint("vrele: cannot lock", vp);
762#endif
763		return (1);
764	}
765
766	VOP_INACTIVE(vp, p);
767
768	if (vp->v_usecount == 0 && !(vp->v_bioflag & VBIOONFREELIST))
769		vputonfreelist(vp);
770	return (1);
771}
772
773/* Page or buffer structure gets a reference. */
774void
775vhold(struct vnode *vp)
776{
777	/*
778	 * If it is on the freelist and the hold count is currently
779	 * zero, move it to the hold list.
780	 */
781	if ((vp->v_bioflag & VBIOONFREELIST) &&
782	    vp->v_holdcnt == 0 && vp->v_usecount == 0) {
783		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
784		TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
785	}
786	vp->v_holdcnt++;
787}
788
789/* Lose interest in a vnode. */
790void
791vdrop(struct vnode *vp)
792{
793#ifdef DIAGNOSTIC
794	if (vp->v_holdcnt == 0)
795		panic("vdrop: zero holdcnt");
796#endif
797
798	vp->v_holdcnt--;
799
800	/*
801	 * If it is on the holdlist and the hold count drops to
802	 * zero, move it to the free list.
803	 */
804	if ((vp->v_bioflag & VBIOONFREELIST) &&
805	    vp->v_holdcnt == 0 && vp->v_usecount == 0) {
806		TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
807		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
808	}
809}
810
811/*
812 * Remove any vnodes in the vnode table belonging to mount point mp.
813 *
814 * If MNT_NOFORCE is specified, there should not be any active ones,
815 * return error if any are found (nb: this is a user error, not a
816 * system error). If MNT_FORCE is specified, detach any active vnodes
817 * that are found.
818 */
819#ifdef DEBUG
820int busyprt = 0;	/* print out busy vnodes */
821struct ctldebug debug1 = { "busyprt", &busyprt };
822#endif
823
824int
825vfs_mount_foreach_vnode(struct mount *mp,
826    int (*func)(struct vnode *, void *), void *arg) {
827	struct vnode *vp, *nvp;
828	int error = 0;
829
830loop:
831	for (vp = LIST_FIRST(&mp->mnt_vnodelist); vp != NULL; vp = nvp) {
832		if (vp->v_mount != mp)
833			goto loop;
834		nvp = LIST_NEXT(vp, v_mntvnodes);
835
836		error = func(vp, arg);
837
838		if (error != 0)
839			break;
840	}
841
842	return (error);
843}
844
845struct vflush_args {
846	struct vnode *skipvp;
847	int busy;
848	int flags;
849};
850
851int
852vflush_vnode(struct vnode *vp, void *arg) {
853	struct vflush_args *va = arg;
854	struct proc *p = curproc;
855
856	if (vp == va->skipvp) {
857		return (0);
858	}
859
860	if ((va->flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
861		return (0);
862	}
863
864	/*
865	 * If WRITECLOSE is set, only flush out regular file
866	 * vnodes open for writing.
867	 */
868	if ((va->flags & WRITECLOSE) &&
869	    (vp->v_writecount == 0 || vp->v_type != VREG)) {
870		return (0);
871	}
872
873	/*
874	 * With v_usecount == 0, all we need to do is clear
875	 * out the vnode data structures and we are done.
876	 */
877	if (vp->v_usecount == 0) {
878		vgonel(vp, p);
879		return (0);
880	}
881
882	/*
883	 * If FORCECLOSE is set, forcibly close the vnode.
884	 * For block or character devices, revert to an
885	 * anonymous device. For all other files, just kill them.
886	 */
887	if (va->flags & FORCECLOSE) {
888		if (vp->v_type != VBLK && vp->v_type != VCHR) {
889			vgonel(vp, p);
890		} else {
891			vclean(vp, 0, p);
892			vp->v_op = &spec_vops;
893			insmntque(vp, (struct mount *)0);
894		}
895		return (0);
896	}
897
898#ifdef DEBUG
899	if (busyprt)
900		vprint("vflush: busy vnode", vp);
901#endif
902	va->busy++;
903	return (0);
904}
905
906int
907vflush(struct mount *mp, struct vnode *skipvp, int flags)
908{
909	struct vflush_args va;
910	va.skipvp = skipvp;
911	va.busy = 0;
912	va.flags = flags;
913
914	vfs_mount_foreach_vnode(mp, vflush_vnode, &va);
915
916	if (va.busy)
917		return (EBUSY);
918	return (0);
919}
920
921/*
922 * Disassociate the underlying file system from a vnode.
923 */
924void
925vclean(struct vnode *vp, int flags, struct proc *p)
926{
927	int active;
928
929	/*
930	 * Check to see if the vnode is in use.
931	 * If so we have to reference it before we clean it out
932	 * so that its count cannot fall to zero and generate a
933	 * race against ourselves to recycle it.
934	 */
935	if ((active = vp->v_usecount) != 0)
936		vp->v_usecount++;
937
938	/*
939	 * Prevent the vnode from being recycled or
940	 * brought into use while we clean it out.
941	 */
942	if (vp->v_flag & VXLOCK)
943		panic("vclean: deadlock");
944	vp->v_flag |= VXLOCK;
945	/*
946	 * Even if the count is zero, the VOP_INACTIVE routine may still
947	 * have the object locked while it cleans it out. The VOP_LOCK
948	 * ensures that the VOP_INACTIVE routine is done with its work.
949	 * For active vnodes, it ensures that no other activity can
950	 * occur while the underlying object is being cleaned out.
951	 */
952	VOP_LOCK(vp, LK_DRAIN, p);
953
954	/*
955	 * Clean out any VM data associated with the vnode.
956	 */
957	uvm_vnp_terminate(vp);
958	/*
959	 * Clean out any buffers associated with the vnode.
960	 */
961	if (flags & DOCLOSE)
962		vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
963	/*
964	 * If purging an active vnode, it must be closed and
965	 * deactivated before being reclaimed. Note that the
966	 * VOP_INACTIVE will unlock the vnode
967	 */
968	if (active) {
969		if (flags & DOCLOSE)
970			VOP_CLOSE(vp, FNONBLOCK, NOCRED, p);
971		VOP_INACTIVE(vp, p);
972	} else {
973		/*
974		 * Any other processes trying to obtain this lock must first
975		 * wait for VXLOCK to clear, then call the new lock operation.
976		 */
977		VOP_UNLOCK(vp, 0, p);
978	}
979
980	/*
981	 * Reclaim the vnode.
982	 */
983	if (VOP_RECLAIM(vp, p))
984		panic("vclean: cannot reclaim");
985	if (active) {
986		vp->v_usecount--;
987		if (vp->v_usecount == 0) {
988			if (vp->v_holdcnt > 0)
989				panic("vclean: not clean");
990			vputonfreelist(vp);
991		}
992	}
993	cache_purge(vp);
994
995	/*
996	 * Done with purge, notify sleepers of the grim news.
997	 */
998	vp->v_op = &dead_vops;
999	VN_KNOTE(vp, NOTE_REVOKE);
1000	vp->v_tag = VT_NON;
1001	vp->v_flag &= ~VXLOCK;
1002#ifdef VFSLCKDEBUG
1003	vp->v_flag &= ~VLOCKSWORK;
1004#endif
1005	if (vp->v_flag & VXWANT) {
1006		vp->v_flag &= ~VXWANT;
1007		wakeup(vp);
1008	}
1009}
1010
1011/*
1012 * Recycle an unused vnode to the front of the free list.
1013 */
1014int
1015vrecycle(struct vnode *vp, struct proc *p)
1016{
1017	if (vp->v_usecount == 0) {
1018		vgonel(vp, p);
1019		return (1);
1020	}
1021	return (0);
1022}
1023
1024/*
1025 * Eliminate all activity associated with a vnode
1026 * in preparation for reuse.
1027 */
1028void
1029vgone(struct vnode *vp)
1030{
1031	struct proc *p = curproc;
1032	vgonel(vp, p);
1033}
1034
1035/*
1036 * vgone, with struct proc.
1037 */
1038void
1039vgonel(struct vnode *vp, struct proc *p)
1040{
1041	struct vnode *vq;
1042	struct vnode *vx;
1043
1044	/*
1045	 * If a vgone (or vclean) is already in progress,
1046	 * wait until it is done and return.
1047	 */
1048	if (vp->v_flag & VXLOCK) {
1049		vp->v_flag |= VXWANT;
1050		tsleep(vp, PINOD, "vgone", 0);
1051		return;
1052	}
1053
1054	/*
1055	 * Clean out the filesystem specific data.
1056	 */
1057	vclean(vp, DOCLOSE, p);
1058	/*
1059	 * Delete from old mount point vnode list, if on one.
1060	 */
1061	if (vp->v_mount != NULL)
1062		insmntque(vp, (struct mount *)0);
1063	/*
1064	 * If special device, remove it from special device alias list
1065	 * if it is on one.
1066	 */
1067	if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) {
1068		if (*vp->v_hashchain == vp) {
1069			*vp->v_hashchain = vp->v_specnext;
1070		} else {
1071			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1072				if (vq->v_specnext != vp)
1073					continue;
1074				vq->v_specnext = vp->v_specnext;
1075				break;
1076			}
1077			if (vq == NULL)
1078				panic("missing bdev");
1079		}
1080		if (vp->v_flag & VALIASED) {
1081			vx = NULL;
1082			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1083				if (vq->v_rdev != vp->v_rdev ||
1084				    vq->v_type != vp->v_type)
1085					continue;
1086				if (vx)
1087					break;
1088				vx = vq;
1089			}
1090			if (vx == NULL)
1091				panic("missing alias");
1092			if (vq == NULL)
1093				vx->v_flag &= ~VALIASED;
1094			vp->v_flag &= ~VALIASED;
1095		}
1096		free(vp->v_specinfo, M_VNODE, sizeof(struct specinfo));
1097		vp->v_specinfo = NULL;
1098	}
1099	/*
1100	 * If it is on the freelist and not already at the head,
1101	 * move it to the head of the list.
1102	 */
1103	vp->v_type = VBAD;
1104
1105	/*
1106	 * Move onto the free list, unless we were called from
1107	 * getnewvnode and we're not on any free list
1108	 */
1109	if (vp->v_usecount == 0 &&
1110	    (vp->v_bioflag & VBIOONFREELIST)) {
1111		int s;
1112
1113		s = splbio();
1114
1115		if (vp->v_holdcnt > 0)
1116			panic("vgonel: not clean");
1117
1118		if (TAILQ_FIRST(&vnode_free_list) != vp) {
1119			TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1120			TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1121		}
1122		splx(s);
1123	}
1124}
1125
1126/*
1127 * Lookup a vnode by device number.
1128 */
1129int
1130vfinddev(dev_t dev, enum vtype type, struct vnode **vpp)
1131{
1132	struct vnode *vp;
1133	int rc =0;
1134
1135	for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1136		if (dev != vp->v_rdev || type != vp->v_type)
1137			continue;
1138		*vpp = vp;
1139		rc = 1;
1140		break;
1141	}
1142	return (rc);
1143}
1144
1145/*
1146 * Revoke all the vnodes corresponding to the specified minor number
1147 * range (endpoints inclusive) of the specified major.
1148 */
1149void
1150vdevgone(int maj, int minl, int minh, enum vtype type)
1151{
1152	struct vnode *vp;
1153	int mn;
1154
1155	for (mn = minl; mn <= minh; mn++)
1156		if (vfinddev(makedev(maj, mn), type, &vp))
1157			VOP_REVOKE(vp, REVOKEALL);
1158}
1159
1160/*
1161 * Calculate the total number of references to a special device.
1162 */
1163int
1164vcount(struct vnode *vp)
1165{
1166	struct vnode *vq, *vnext;
1167	int count;
1168
1169loop:
1170	if ((vp->v_flag & VALIASED) == 0)
1171		return (vp->v_usecount);
1172	for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
1173		vnext = vq->v_specnext;
1174		if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
1175			continue;
1176		/*
1177		 * Alias, but not in use, so flush it out.
1178		 */
1179		if (vq->v_usecount == 0 && vq != vp) {
1180			vgone(vq);
1181			goto loop;
1182		}
1183		count += vq->v_usecount;
1184	}
1185	return (count);
1186}
1187
1188#if defined(DEBUG) || defined(DIAGNOSTIC)
1189/*
1190 * Print out a description of a vnode.
1191 */
1192static char *typename[] =
1193   { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" };
1194
1195void
1196vprint(char *label, struct vnode *vp)
1197{
1198	char buf[64];
1199
1200	if (label != NULL)
1201		printf("%s: ", label);
1202	printf("%p, type %s, use %u, write %u, hold %u,",
1203		vp, typename[vp->v_type], vp->v_usecount, vp->v_writecount,
1204		vp->v_holdcnt);
1205	buf[0] = '\0';
1206	if (vp->v_flag & VROOT)
1207		strlcat(buf, "|VROOT", sizeof buf);
1208	if (vp->v_flag & VTEXT)
1209		strlcat(buf, "|VTEXT", sizeof buf);
1210	if (vp->v_flag & VSYSTEM)
1211		strlcat(buf, "|VSYSTEM", sizeof buf);
1212	if (vp->v_flag & VXLOCK)
1213		strlcat(buf, "|VXLOCK", sizeof buf);
1214	if (vp->v_flag & VXWANT)
1215		strlcat(buf, "|VXWANT", sizeof buf);
1216	if (vp->v_bioflag & VBIOWAIT)
1217		strlcat(buf, "|VBIOWAIT", sizeof buf);
1218	if (vp->v_bioflag & VBIOONFREELIST)
1219		strlcat(buf, "|VBIOONFREELIST", sizeof buf);
1220	if (vp->v_bioflag & VBIOONSYNCLIST)
1221		strlcat(buf, "|VBIOONSYNCLIST", sizeof buf);
1222	if (vp->v_flag & VALIASED)
1223		strlcat(buf, "|VALIASED", sizeof buf);
1224	if (buf[0] != '\0')
1225		printf(" flags (%s)", &buf[1]);
1226	if (vp->v_data == NULL) {
1227		printf("\n");
1228	} else {
1229		printf("\n\t");
1230		VOP_PRINT(vp);
1231	}
1232}
1233#endif /* DEBUG || DIAGNOSTIC */
1234
1235#ifdef DEBUG
1236/*
1237 * List all of the locked vnodes in the system.
1238 * Called when debugging the kernel.
1239 */
1240void
1241printlockedvnodes(void)
1242{
1243	struct mount *mp, *nmp;
1244	struct vnode *vp;
1245
1246	printf("Locked vnodes\n");
1247
1248	TAILQ_FOREACH_SAFE(mp, &mountlist, mnt_list, nmp) {
1249		if (vfs_busy(mp, VB_READ|VB_NOWAIT))
1250			continue;
1251		LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
1252			if (VOP_ISLOCKED(vp))
1253				vprint((char *)0, vp);
1254		}
1255		vfs_unbusy(mp);
1256 	}
1257
1258}
1259#endif
1260
1261/*
1262 * Top level filesystem related information gathering.
1263 */
1264int
1265vfs_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
1266    size_t newlen, struct proc *p)
1267{
1268	struct vfsconf *vfsp, *tmpvfsp;
1269	int ret;
1270
1271	/* all sysctl names at this level are at least name and field */
1272	if (namelen < 2)
1273		return (ENOTDIR);		/* overloaded */
1274
1275	if (name[0] != VFS_GENERIC) {
1276		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1277			if (vfsp->vfc_typenum == name[0])
1278				break;
1279
1280		if (vfsp == NULL)
1281			return (EOPNOTSUPP);
1282
1283		return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1,
1284		    oldp, oldlenp, newp, newlen, p));
1285	}
1286
1287	switch (name[1]) {
1288	case VFS_MAXTYPENUM:
1289		return (sysctl_rdint(oldp, oldlenp, newp, maxvfsconf));
1290
1291	case VFS_CONF:
1292		if (namelen < 3)
1293			return (ENOTDIR);	/* overloaded */
1294
1295		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1296			if (vfsp->vfc_typenum == name[2])
1297				break;
1298
1299		if (vfsp == NULL)
1300			return (EOPNOTSUPP);
1301
1302		/* Make a copy, clear out kernel pointers */
1303		tmpvfsp = malloc(sizeof(*tmpvfsp), M_TEMP, M_WAITOK);
1304		memcpy(tmpvfsp, vfsp, sizeof(*tmpvfsp));
1305		tmpvfsp->vfc_vfsops = NULL;
1306		tmpvfsp->vfc_next = NULL;
1307
1308		ret = sysctl_rdstruct(oldp, oldlenp, newp, tmpvfsp,
1309		    sizeof(struct vfsconf));
1310
1311		free(tmpvfsp, M_TEMP, sizeof(*tmpvfsp));
1312		return (ret);
1313	case VFS_BCACHESTAT:	/* buffer cache statistics */
1314		ret = sysctl_rdstruct(oldp, oldlenp, newp, &bcstats,
1315		    sizeof(struct bcachestats));
1316		return(ret);
1317	}
1318	return (EOPNOTSUPP);
1319}
1320
1321/*
1322 * Check to see if a filesystem is mounted on a block device.
1323 */
1324int
1325vfs_mountedon(struct vnode *vp)
1326{
1327	struct vnode *vq;
1328	int error = 0;
1329
1330 	if (vp->v_specmountpoint != NULL)
1331		return (EBUSY);
1332	if (vp->v_flag & VALIASED) {
1333		for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1334			if (vq->v_rdev != vp->v_rdev ||
1335			    vq->v_type != vp->v_type)
1336				continue;
1337			if (vq->v_specmountpoint != NULL) {
1338				error = EBUSY;
1339				break;
1340			}
1341 		}
1342	}
1343	return (error);
1344}
1345
1346/*
1347 * Build hash lists of net addresses and hang them off the mount point.
1348 * Called by ufs_mount() to set up the lists of export addresses.
1349 */
1350int
1351vfs_hang_addrlist(struct mount *mp, struct netexport *nep,
1352    struct export_args *argp)
1353{
1354	struct netcred *np;
1355	struct radix_node_head *rnh;
1356	int nplen, i;
1357	struct radix_node *rn;
1358	struct sockaddr *saddr, *smask = 0;
1359	int error;
1360
1361	if (argp->ex_addrlen == 0) {
1362		if (mp->mnt_flag & MNT_DEFEXPORTED)
1363			return (EPERM);
1364		np = &nep->ne_defexported;
1365		/* fill in the kernel's ucred from userspace's xucred */
1366		if ((error = crfromxucred(&np->netc_anon, &argp->ex_anon)))
1367			return (error);
1368		mp->mnt_flag |= MNT_DEFEXPORTED;
1369		goto finish;
1370	}
1371	if (argp->ex_addrlen > MLEN || argp->ex_masklen > MLEN ||
1372	    argp->ex_addrlen < 0 || argp->ex_masklen < 0)
1373		return (EINVAL);
1374	nplen = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
1375	np = (struct netcred *)malloc(nplen, M_NETADDR, M_WAITOK|M_ZERO);
1376	saddr = (struct sockaddr *)(np + 1);
1377	error = copyin(argp->ex_addr, saddr, argp->ex_addrlen);
1378	if (error)
1379		goto out;
1380	if (saddr->sa_len > argp->ex_addrlen)
1381		saddr->sa_len = argp->ex_addrlen;
1382	if (argp->ex_masklen) {
1383		smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
1384		error = copyin(argp->ex_mask, smask, argp->ex_masklen);
1385		if (error)
1386			goto out;
1387		if (smask->sa_len > argp->ex_masklen)
1388			smask->sa_len = argp->ex_masklen;
1389	}
1390	/* fill in the kernel's ucred from userspace's xucred */
1391	if ((error = crfromxucred(&np->netc_anon, &argp->ex_anon)))
1392		goto out;
1393	i = saddr->sa_family;
1394	switch (i) {
1395	case AF_INET:
1396		if ((rnh = nep->ne_rtable_inet) == NULL) {
1397			if (!rn_inithead((void **)&nep->ne_rtable_inet,
1398			    offsetof(struct sockaddr_in, sin_addr))) {
1399				error = ENOBUFS;
1400				goto out;
1401			}
1402			rnh = nep->ne_rtable_inet;
1403		}
1404		break;
1405	default:
1406		error = EINVAL;
1407		goto out;
1408	}
1409	rn = rn_addroute(saddr, smask, rnh, np->netc_rnodes, 0);
1410	if (rn == 0 || np != (struct netcred *)rn) { /* already exists */
1411		error = EPERM;
1412		goto out;
1413	}
1414finish:
1415	np->netc_exflags = argp->ex_flags;
1416	return (0);
1417out:
1418	free(np, M_NETADDR, nplen);
1419	return (error);
1420}
1421
1422int
1423vfs_free_netcred(struct radix_node *rn, void *w, u_int id)
1424{
1425	struct radix_node_head *rnh = (struct radix_node_head *)w;
1426
1427	rn_delete(rn->rn_key, rn->rn_mask, rnh, NULL);
1428	free(rn, M_NETADDR, 0);
1429	return (0);
1430}
1431
1432/*
1433 * Free the net address hash lists that are hanging off the mount points.
1434 */
1435void
1436vfs_free_addrlist(struct netexport *nep)
1437{
1438	struct radix_node_head *rnh;
1439
1440	if ((rnh = nep->ne_rtable_inet) != NULL) {
1441		rn_walktree(rnh, vfs_free_netcred, rnh);
1442		free(rnh, M_RTABLE, 0);
1443		nep->ne_rtable_inet = NULL;
1444	}
1445}
1446
1447int
1448vfs_export(struct mount *mp, struct netexport *nep, struct export_args *argp)
1449{
1450	int error;
1451
1452	if (argp->ex_flags & MNT_DELEXPORT) {
1453		vfs_free_addrlist(nep);
1454		mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
1455	}
1456	if (argp->ex_flags & MNT_EXPORTED) {
1457		if ((error = vfs_hang_addrlist(mp, nep, argp)) != 0)
1458			return (error);
1459		mp->mnt_flag |= MNT_EXPORTED;
1460	}
1461	return (0);
1462}
1463
1464struct netcred *
1465vfs_export_lookup(struct mount *mp, struct netexport *nep, struct mbuf *nam)
1466{
1467	struct netcred *np;
1468	struct radix_node_head *rnh;
1469	struct sockaddr *saddr;
1470
1471	np = NULL;
1472	if (mp->mnt_flag & MNT_EXPORTED) {
1473		/*
1474		 * Lookup in the export list first.
1475		 */
1476		if (nam != NULL) {
1477			saddr = mtod(nam, struct sockaddr *);
1478			switch(saddr->sa_family) {
1479			case AF_INET:
1480				rnh = nep->ne_rtable_inet;
1481				break;
1482			default:
1483				rnh = NULL;
1484				break;
1485			}
1486			if (rnh != NULL)
1487				np = (struct netcred *)rn_match(saddr, rnh);
1488		}
1489		/*
1490		 * If no address match, use the default if it exists.
1491		 */
1492		if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
1493			np = &nep->ne_defexported;
1494	}
1495	return (np);
1496}
1497
1498/*
1499 * Do the usual access checking.
1500 * file_mode, uid and gid are from the vnode in question,
1501 * while acc_mode and cred are from the VOP_ACCESS parameter list
1502 */
1503int
1504vaccess(enum vtype type, mode_t file_mode, uid_t uid, gid_t gid,
1505    mode_t acc_mode, struct ucred *cred)
1506{
1507	mode_t mask;
1508
1509	/* User id 0 always gets read/write access. */
1510	if (cred->cr_uid == 0) {
1511		/* For VEXEC, at least one of the execute bits must be set. */
1512		if ((acc_mode & VEXEC) && type != VDIR &&
1513		    (file_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) == 0)
1514			return EACCES;
1515		return 0;
1516	}
1517
1518	mask = 0;
1519
1520	/* Otherwise, check the owner. */
1521	if (cred->cr_uid == uid) {
1522		if (acc_mode & VEXEC)
1523			mask |= S_IXUSR;
1524		if (acc_mode & VREAD)
1525			mask |= S_IRUSR;
1526		if (acc_mode & VWRITE)
1527			mask |= S_IWUSR;
1528		return (file_mode & mask) == mask ? 0 : EACCES;
1529	}
1530
1531	/* Otherwise, check the groups. */
1532	if (groupmember(gid, cred)) {
1533		if (acc_mode & VEXEC)
1534			mask |= S_IXGRP;
1535		if (acc_mode & VREAD)
1536			mask |= S_IRGRP;
1537		if (acc_mode & VWRITE)
1538			mask |= S_IWGRP;
1539		return (file_mode & mask) == mask ? 0 : EACCES;
1540	}
1541
1542	/* Otherwise, check everyone else. */
1543	if (acc_mode & VEXEC)
1544		mask |= S_IXOTH;
1545	if (acc_mode & VREAD)
1546		mask |= S_IROTH;
1547	if (acc_mode & VWRITE)
1548		mask |= S_IWOTH;
1549	return (file_mode & mask) == mask ? 0 : EACCES;
1550}
1551
1552/*
1553 * Unmount all file systems.
1554 * We traverse the list in reverse order under the assumption that doing so
1555 * will avoid needing to worry about dependencies.
1556 */
1557void
1558vfs_unmountall(void)
1559{
1560	struct mount *mp, *nmp;
1561	int allerror, error, again = 1;
1562
1563 retry:
1564	allerror = 0;
1565	TAILQ_FOREACH_REVERSE_SAFE(mp, &mountlist, mntlist, mnt_list, nmp) {
1566		if ((vfs_busy(mp, VB_WRITE|VB_NOWAIT)) != 0)
1567			continue;
1568		if ((error = dounmount(mp, MNT_FORCE, curproc, NULL)) != 0) {
1569			printf("unmount of %s failed with error %d\n",
1570			    mp->mnt_stat.f_mntonname, error);
1571			allerror = 1;
1572		}
1573	}
1574
1575	if (allerror) {
1576		printf("WARNING: some file systems would not unmount\n");
1577		if (again) {
1578			printf("retrying\n");
1579			again = 0;
1580			goto retry;
1581		}
1582	}
1583}
1584
1585/*
1586 * Sync and unmount file systems before shutting down.
1587 */
1588void
1589vfs_shutdown(void)
1590{
1591#ifdef ACCOUNTING
1592	acct_shutdown();
1593#endif
1594
1595	/* XXX Should suspend scheduling. */
1596	(void) spl0();
1597
1598	printf("syncing disks... ");
1599
1600	if (panicstr == 0) {
1601		/* Sync before unmount, in case we hang on something. */
1602		sys_sync(&proc0, (void *)0, (register_t *)0);
1603
1604		/* Unmount file systems. */
1605		vfs_unmountall();
1606	}
1607
1608	if (vfs_syncwait(1))
1609		printf("giving up\n");
1610	else
1611		printf("done\n");
1612
1613#if NSOFTRAID > 0
1614	sr_shutdown();
1615#endif
1616}
1617
1618/*
1619 * perform sync() operation and wait for buffers to flush.
1620 * assumptions: called w/ scheduler disabled and physical io enabled
1621 * for now called at spl0() XXX
1622 */
1623int
1624vfs_syncwait(int verbose)
1625{
1626	struct buf *bp;
1627	int iter, nbusy, dcount, s;
1628	struct proc *p;
1629#ifdef MULTIPROCESSOR
1630	int hold_count;
1631#endif
1632
1633	p = curproc? curproc : &proc0;
1634	sys_sync(p, (void *)0, (register_t *)0);
1635
1636	/* Wait for sync to finish. */
1637	dcount = 10000;
1638	for (iter = 0; iter < 20; iter++) {
1639		nbusy = 0;
1640		LIST_FOREACH(bp, &bufhead, b_list) {
1641			if ((bp->b_flags & (B_BUSY|B_INVAL|B_READ)) == B_BUSY)
1642				nbusy++;
1643			/*
1644			 * With soft updates, some buffers that are
1645			 * written will be remarked as dirty until other
1646			 * buffers are written.
1647			 */
1648			if (bp->b_flags & B_DELWRI) {
1649				s = splbio();
1650				bremfree(bp);
1651				buf_acquire(bp);
1652				splx(s);
1653				nbusy++;
1654				bawrite(bp);
1655				if (dcount-- <= 0) {
1656					if (verbose)
1657						printf("softdep ");
1658					return 1;
1659				}
1660			}
1661		}
1662		if (nbusy == 0)
1663			break;
1664		if (verbose)
1665			printf("%d ", nbusy);
1666#ifdef MULTIPROCESSOR
1667		if (__mp_lock_held(&kernel_lock))
1668			hold_count = __mp_release_all(&kernel_lock);
1669		else
1670			hold_count = 0;
1671#endif
1672		DELAY(40000 * iter);
1673#ifdef MULTIPROCESSOR
1674		if (hold_count)
1675			__mp_acquire_count(&kernel_lock, hold_count);
1676#endif
1677	}
1678
1679	return nbusy;
1680}
1681
1682/*
1683 * posix file system related system variables.
1684 */
1685int
1686fs_posix_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp,
1687    void *newp, size_t newlen, struct proc *p)
1688{
1689	/* all sysctl names at this level are terminal */
1690	if (namelen != 1)
1691		return (ENOTDIR);
1692
1693	switch (name[0]) {
1694	case FS_POSIX_SETUID:
1695		if (newp && securelevel > 0)
1696			return (EPERM);
1697		return(sysctl_int(oldp, oldlenp, newp, newlen, &suid_clear));
1698	default:
1699		return (EOPNOTSUPP);
1700	}
1701	/* NOTREACHED */
1702}
1703
1704/*
1705 * file system related system variables.
1706 */
1707int
1708fs_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
1709    size_t newlen, struct proc *p)
1710{
1711	sysctlfn *fn;
1712
1713	switch (name[0]) {
1714	case FS_POSIX:
1715		fn = fs_posix_sysctl;
1716		break;
1717	default:
1718		return (EOPNOTSUPP);
1719	}
1720	return (*fn)(name + 1, namelen - 1, oldp, oldlenp, newp, newlen, p);
1721}
1722
1723
1724/*
1725 * Routines dealing with vnodes and buffers
1726 */
1727
1728/*
1729 * Wait for all outstanding I/Os to complete
1730 *
1731 * Manipulates v_numoutput. Must be called at splbio()
1732 */
1733int
1734vwaitforio(struct vnode *vp, int slpflag, char *wmesg, int timeo)
1735{
1736	int error = 0;
1737
1738	splassert(IPL_BIO);
1739
1740	while (vp->v_numoutput) {
1741		vp->v_bioflag |= VBIOWAIT;
1742		error = tsleep(&vp->v_numoutput,
1743		    slpflag | (PRIBIO + 1), wmesg, timeo);
1744		if (error)
1745			break;
1746	}
1747
1748	return (error);
1749}
1750
1751/*
1752 * Update outstanding I/O count and do wakeup if requested.
1753 *
1754 * Manipulates v_numoutput. Must be called at splbio()
1755 */
1756void
1757vwakeup(struct vnode *vp)
1758{
1759	splassert(IPL_BIO);
1760
1761	if (vp != NULL) {
1762		if (vp->v_numoutput-- == 0)
1763			panic("vwakeup: neg numoutput");
1764		if ((vp->v_bioflag & VBIOWAIT) && vp->v_numoutput == 0) {
1765			vp->v_bioflag &= ~VBIOWAIT;
1766			wakeup(&vp->v_numoutput);
1767		}
1768	}
1769}
1770
1771/*
1772 * Flush out and invalidate all buffers associated with a vnode.
1773 * Called with the underlying object locked.
1774 */
1775int
1776vinvalbuf(struct vnode *vp, int flags, struct ucred *cred, struct proc *p,
1777    int slpflag, int slptimeo)
1778{
1779	struct buf *bp;
1780	struct buf *nbp, *blist;
1781	int s, error;
1782
1783#ifdef VFSLCKDEBUG
1784	if ((vp->v_flag & VLOCKSWORK) && !VOP_ISLOCKED(vp))
1785		panic("vinvalbuf(): vp isn't locked");
1786#endif
1787
1788	if (flags & V_SAVE) {
1789		s = splbio();
1790		vwaitforio(vp, 0, "vinvalbuf", 0);
1791		if (!LIST_EMPTY(&vp->v_dirtyblkhd)) {
1792			splx(s);
1793			if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) != 0)
1794				return (error);
1795			s = splbio();
1796			if (vp->v_numoutput > 0 ||
1797			    !LIST_EMPTY(&vp->v_dirtyblkhd))
1798				panic("vinvalbuf: dirty bufs");
1799		}
1800		splx(s);
1801	}
1802loop:
1803	s = splbio();
1804	for (;;) {
1805		if ((blist = LIST_FIRST(&vp->v_cleanblkhd)) &&
1806		    (flags & V_SAVEMETA))
1807			while (blist && blist->b_lblkno < 0)
1808				blist = LIST_NEXT(blist, b_vnbufs);
1809		if (blist == NULL &&
1810		    (blist = LIST_FIRST(&vp->v_dirtyblkhd)) &&
1811		    (flags & V_SAVEMETA))
1812			while (blist && blist->b_lblkno < 0)
1813				blist = LIST_NEXT(blist, b_vnbufs);
1814		if (!blist)
1815			break;
1816
1817		for (bp = blist; bp; bp = nbp) {
1818			nbp = LIST_NEXT(bp, b_vnbufs);
1819			if (flags & V_SAVEMETA && bp->b_lblkno < 0)
1820				continue;
1821			if (bp->b_flags & B_BUSY) {
1822				bp->b_flags |= B_WANTED;
1823				error = tsleep(bp, slpflag | (PRIBIO + 1),
1824				    "vinvalbuf", slptimeo);
1825				if (error) {
1826					splx(s);
1827					return (error);
1828				}
1829				break;
1830			}
1831			bremfree(bp);
1832			/*
1833			 * XXX Since there are no node locks for NFS, I believe
1834			 * there is a slight chance that a delayed write will
1835			 * occur while sleeping just above, so check for it.
1836			 */
1837			if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
1838				buf_acquire(bp);
1839				splx(s);
1840				(void) VOP_BWRITE(bp);
1841				goto loop;
1842			}
1843			buf_acquire_nomap(bp);
1844			bp->b_flags |= B_INVAL;
1845			brelse(bp);
1846		}
1847	}
1848	if (!(flags & V_SAVEMETA) &&
1849	    (!LIST_EMPTY(&vp->v_dirtyblkhd) || !LIST_EMPTY(&vp->v_cleanblkhd)))
1850		panic("vinvalbuf: flush failed");
1851	splx(s);
1852	return (0);
1853}
1854
1855void
1856vflushbuf(struct vnode *vp, int sync)
1857{
1858	struct buf *bp, *nbp;
1859	int s;
1860
1861loop:
1862	s = splbio();
1863	for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp != NULL; bp = nbp) {
1864		nbp = LIST_NEXT(bp, b_vnbufs);
1865		if ((bp->b_flags & B_BUSY))
1866			continue;
1867		if ((bp->b_flags & B_DELWRI) == 0)
1868			panic("vflushbuf: not dirty");
1869		bremfree(bp);
1870		buf_acquire(bp);
1871		splx(s);
1872		/*
1873		 * Wait for I/O associated with indirect blocks to complete,
1874		 * since there is no way to quickly wait for them below.
1875		 */
1876		if (bp->b_vp == vp || sync == 0)
1877			(void) bawrite(bp);
1878		else
1879			(void) bwrite(bp);
1880		goto loop;
1881	}
1882	if (sync == 0) {
1883		splx(s);
1884		return;
1885	}
1886	vwaitforio(vp, 0, "vflushbuf", 0);
1887	if (!LIST_EMPTY(&vp->v_dirtyblkhd)) {
1888		splx(s);
1889#ifdef DIAGNOSTIC
1890		vprint("vflushbuf: dirty", vp);
1891#endif
1892		goto loop;
1893	}
1894	splx(s);
1895}
1896
1897/*
1898 * Associate a buffer with a vnode.
1899 *
1900 * Manipulates buffer vnode queues. Must be called at splbio().
1901 */
1902void
1903bgetvp(struct vnode *vp, struct buf *bp)
1904{
1905	splassert(IPL_BIO);
1906
1907
1908	if (bp->b_vp)
1909		panic("bgetvp: not free");
1910	vhold(vp);
1911	bp->b_vp = vp;
1912	if (vp->v_type == VBLK || vp->v_type == VCHR)
1913		bp->b_dev = vp->v_rdev;
1914	else
1915		bp->b_dev = NODEV;
1916	/*
1917	 * Insert onto list for new vnode.
1918	 */
1919	bufinsvn(bp, &vp->v_cleanblkhd);
1920}
1921
1922/*
1923 * Disassociate a buffer from a vnode.
1924 *
1925 * Manipulates vnode buffer queues. Must be called at splbio().
1926 */
1927void
1928brelvp(struct buf *bp)
1929{
1930	struct vnode *vp;
1931
1932	splassert(IPL_BIO);
1933
1934	if ((vp = bp->b_vp) == (struct vnode *) 0)
1935		panic("brelvp: NULL");
1936	/*
1937	 * Delete from old vnode list, if on one.
1938	 */
1939	if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
1940		bufremvn(bp);
1941	if ((vp->v_bioflag & VBIOONSYNCLIST) &&
1942	    LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
1943		vp->v_bioflag &= ~VBIOONSYNCLIST;
1944		LIST_REMOVE(vp, v_synclist);
1945	}
1946	bp->b_vp = NULL;
1947
1948	vdrop(vp);
1949}
1950
1951/*
1952 * Replaces the current vnode associated with the buffer, if any,
1953 * with a new vnode.
1954 *
1955 * If an output I/O is pending on the buffer, the old vnode
1956 * I/O count is adjusted.
1957 *
1958 * Ignores vnode buffer queues. Must be called at splbio().
1959 */
1960void
1961buf_replacevnode(struct buf *bp, struct vnode *newvp)
1962{
1963	struct vnode *oldvp = bp->b_vp;
1964
1965	splassert(IPL_BIO);
1966
1967	if (oldvp)
1968		brelvp(bp);
1969
1970	if ((bp->b_flags & (B_READ | B_DONE)) == 0) {
1971		newvp->v_numoutput++;	/* put it on swapdev */
1972		vwakeup(oldvp);
1973	}
1974
1975	bgetvp(newvp, bp);
1976	bufremvn(bp);
1977}
1978
1979/*
1980 * Used to assign buffers to the appropriate clean or dirty list on
1981 * the vnode and to add newly dirty vnodes to the appropriate
1982 * filesystem syncer list.
1983 *
1984 * Manipulates vnode buffer queues. Must be called at splbio().
1985 */
1986void
1987reassignbuf(struct buf *bp)
1988{
1989	struct buflists *listheadp;
1990	int delay;
1991	struct vnode *vp = bp->b_vp;
1992
1993	splassert(IPL_BIO);
1994
1995	/*
1996	 * Delete from old vnode list, if on one.
1997	 */
1998	if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
1999		bufremvn(bp);
2000
2001	/*
2002	 * If dirty, put on list of dirty buffers;
2003	 * otherwise insert onto list of clean buffers.
2004	 */
2005	if ((bp->b_flags & B_DELWRI) == 0) {
2006		listheadp = &vp->v_cleanblkhd;
2007		if ((vp->v_bioflag & VBIOONSYNCLIST) &&
2008		    LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
2009			vp->v_bioflag &= ~VBIOONSYNCLIST;
2010			LIST_REMOVE(vp, v_synclist);
2011		}
2012	} else {
2013		listheadp = &vp->v_dirtyblkhd;
2014		if ((vp->v_bioflag & VBIOONSYNCLIST) == 0) {
2015			switch (vp->v_type) {
2016			case VDIR:
2017				delay = syncdelay / 2;
2018				break;
2019			case VBLK:
2020				if (vp->v_specmountpoint != NULL) {
2021					delay = syncdelay / 3;
2022					break;
2023				}
2024				/* FALLTHROUGH */
2025			default:
2026				delay = syncdelay;
2027			}
2028			vn_syncer_add_to_worklist(vp, delay);
2029		}
2030	}
2031	bufinsvn(bp, listheadp);
2032}
2033
2034int
2035vfs_register(struct vfsconf *vfs)
2036{
2037	struct vfsconf *vfsp;
2038	struct vfsconf **vfspp;
2039
2040#ifdef DIAGNOSTIC
2041	/* Paranoia? */
2042	if (vfs->vfc_refcount != 0)
2043		printf("vfs_register called with vfc_refcount > 0\n");
2044#endif
2045
2046	/* Check if filesystem already known */
2047	for (vfspp = &vfsconf, vfsp = vfsconf; vfsp;
2048	    vfspp = &vfsp->vfc_next, vfsp = vfsp->vfc_next)
2049		if (strcmp(vfsp->vfc_name, vfs->vfc_name) == 0)
2050			return (EEXIST);
2051
2052	if (vfs->vfc_typenum > maxvfsconf)
2053		maxvfsconf = vfs->vfc_typenum;
2054
2055	vfs->vfc_next = NULL;
2056
2057	/* Add to the end of the list */
2058	*vfspp = vfs;
2059
2060	/* Call vfs_init() */
2061	if (vfs->vfc_vfsops->vfs_init)
2062		(*(vfs->vfc_vfsops->vfs_init))(vfs);
2063
2064	return 0;
2065}
2066
2067int
2068vfs_unregister(struct vfsconf *vfs)
2069{
2070	struct vfsconf *vfsp;
2071	struct vfsconf **vfspp;
2072	int maxtypenum;
2073
2074	/* Find our vfsconf struct */
2075	for (vfspp = &vfsconf, vfsp = vfsconf; vfsp;
2076	    vfspp = &vfsp->vfc_next, vfsp = vfsp->vfc_next) {
2077		if (strcmp(vfsp->vfc_name, vfs->vfc_name) == 0)
2078			break;
2079	}
2080
2081	if (!vfsp)			/* Not found */
2082		return (ENOENT);
2083
2084	if (vfsp->vfc_refcount)		/* In use */
2085		return (EBUSY);
2086
2087	/* Remove from list and free */
2088	*vfspp = vfsp->vfc_next;
2089
2090	maxtypenum = 0;
2091
2092	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
2093		if (vfsp->vfc_typenum > maxtypenum)
2094			maxtypenum = vfsp->vfc_typenum;
2095
2096	maxvfsconf = maxtypenum;
2097	return 0;
2098}
2099
2100/*
2101 * Check if vnode represents a disk device
2102 */
2103int
2104vn_isdisk(struct vnode *vp, int *errp)
2105{
2106	if (vp->v_type != VBLK && vp->v_type != VCHR)
2107		return (0);
2108
2109	return (1);
2110}
2111
2112#ifdef DDB
2113#include <machine/db_machdep.h>
2114#include <ddb/db_interface.h>
2115
2116void
2117vfs_buf_print(void *b, int full,
2118    int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
2119{
2120	struct buf *bp = b;
2121
2122	(*pr)("  vp %p lblkno 0x%llx blkno 0x%llx dev 0x%x\n"
2123	      "  proc %p error %d flags %lb\n",
2124	    bp->b_vp, (int64_t)bp->b_lblkno, (int64_t)bp->b_blkno, bp->b_dev,
2125	    bp->b_proc, bp->b_error, bp->b_flags, B_BITS);
2126
2127	(*pr)("  bufsize 0x%lx bcount 0x%lx resid 0x%lx\n"
2128	      "  data %p saveaddr %p dep %p iodone %p\n",
2129	    bp->b_bufsize, bp->b_bcount, (long)bp->b_resid,
2130	    bp->b_data, bp->b_saveaddr,
2131	    LIST_FIRST(&bp->b_dep), bp->b_iodone);
2132
2133	(*pr)("  dirty {off 0x%x end 0x%x} valid {off 0x%x end 0x%x}\n",
2134	    bp->b_dirtyoff, bp->b_dirtyend, bp->b_validoff, bp->b_validend);
2135
2136#ifdef FFS_SOFTUPDATES
2137	if (full)
2138		softdep_print(bp, full, pr);
2139#endif
2140}
2141
2142const char *vtypes[] = { VTYPE_NAMES };
2143const char *vtags[] = { VTAG_NAMES };
2144
2145void
2146vfs_vnode_print(void *v, int full,
2147    int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
2148{
2149	struct vnode *vp = v;
2150
2151	(*pr)("tag %s(%d) type %s(%d) mount %p typedata %p\n",
2152	      vp->v_tag > nitems(vtags)? "<unk>":vtags[vp->v_tag], vp->v_tag,
2153	      vp->v_type > nitems(vtypes)? "<unk>":vtypes[vp->v_type],
2154	      vp->v_type, vp->v_mount, vp->v_mountedhere);
2155
2156	(*pr)("data %p usecount %d writecount %d holdcnt %d numoutput %d\n",
2157	      vp->v_data, vp->v_usecount, vp->v_writecount,
2158	      vp->v_holdcnt, vp->v_numoutput);
2159
2160	/* uvm_object_printit(&vp->v_uobj, full, pr); */
2161
2162	if (full) {
2163		struct buf *bp;
2164
2165		(*pr)("clean bufs:\n");
2166		LIST_FOREACH(bp, &vp->v_cleanblkhd, b_vnbufs) {
2167			(*pr)(" bp %p\n", bp);
2168			vfs_buf_print(bp, full, pr);
2169		}
2170
2171		(*pr)("dirty bufs:\n");
2172		LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) {
2173			(*pr)(" bp %p\n", bp);
2174			vfs_buf_print(bp, full, pr);
2175		}
2176	}
2177}
2178
2179void
2180vfs_mount_print(struct mount *mp, int full,
2181    int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
2182{
2183	struct vfsconf *vfc = mp->mnt_vfc;
2184	struct vnode *vp;
2185	int cnt = 0;
2186
2187	(*pr)("flags %b\nvnodecovered %p syncer %p data %p\n",
2188	    mp->mnt_flag, MNT_BITS,
2189	    mp->mnt_vnodecovered, mp->mnt_syncer, mp->mnt_data);
2190
2191	(*pr)("vfsconf: ops %p name \"%s\" num %d ref %d flags 0x%x\n",
2192            vfc->vfc_vfsops, vfc->vfc_name, vfc->vfc_typenum,
2193	    vfc->vfc_refcount, vfc->vfc_flags);
2194
2195	(*pr)("statvfs cache: bsize %x iosize %x\nblocks %llu free %llu avail %lld\n",
2196	    mp->mnt_stat.f_bsize, mp->mnt_stat.f_iosize, mp->mnt_stat.f_blocks,
2197	    mp->mnt_stat.f_bfree, mp->mnt_stat.f_bavail);
2198
2199	(*pr)("  files %llu ffiles %llu favail %lld\n", mp->mnt_stat.f_files,
2200	    mp->mnt_stat.f_ffree, mp->mnt_stat.f_favail);
2201
2202	(*pr)("  f_fsidx {0x%x, 0x%x} owner %u ctime 0x%llx\n",
2203	    mp->mnt_stat.f_fsid.val[0], mp->mnt_stat.f_fsid.val[1],
2204	    mp->mnt_stat.f_owner, mp->mnt_stat.f_ctime);
2205
2206 	(*pr)("  syncwrites %llu asyncwrites = %llu\n",
2207	    mp->mnt_stat.f_syncwrites, mp->mnt_stat.f_asyncwrites);
2208
2209 	(*pr)("  syncreads %llu asyncreads = %llu\n",
2210	    mp->mnt_stat.f_syncreads, mp->mnt_stat.f_asyncreads);
2211
2212	(*pr)("  fstype \"%s\" mnton \"%s\" mntfrom \"%s\" mntspec \"%s\"\n",
2213	    mp->mnt_stat.f_fstypename, mp->mnt_stat.f_mntonname,
2214	    mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntfromspec);
2215
2216	(*pr)("locked vnodes:");
2217	/* XXX would take mountlist lock, except ddb has no context */
2218	LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes)
2219		if (VOP_ISLOCKED(vp)) {
2220			if (!LIST_NEXT(vp, v_mntvnodes))
2221				(*pr)(" %p", vp);
2222			else if (!(cnt++ % (72 / (sizeof(void *) * 2 + 4))))
2223				(*pr)("\n\t%p", vp);
2224			else
2225				(*pr)(", %p", vp);
2226		}
2227	(*pr)("\n");
2228
2229	if (full) {
2230		(*pr)("all vnodes:\n\t");
2231		/* XXX would take mountlist lock, except ddb has no context */
2232		LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes)
2233			if (!LIST_NEXT(vp, v_mntvnodes))
2234				(*pr)(" %p", vp);
2235			else if (!(cnt++ % (72 / (sizeof(void *) * 2 + 4))))
2236				(*pr)(" %p,\n\t", vp);
2237			else
2238				(*pr)(" %p,", vp);
2239		(*pr)("\n");
2240	}
2241}
2242#endif /* DDB */
2243
2244void
2245copy_statfs_info(struct statfs *sbp, const struct mount *mp)
2246{
2247	const struct statfs *mbp;
2248
2249	strncpy(sbp->f_fstypename, mp->mnt_vfc->vfc_name, MFSNAMELEN);
2250
2251	if (sbp == (mbp = &mp->mnt_stat))
2252		return;
2253
2254	sbp->f_fsid = mbp->f_fsid;
2255	sbp->f_owner = mbp->f_owner;
2256	sbp->f_flags = mbp->f_flags;
2257	sbp->f_syncwrites = mbp->f_syncwrites;
2258	sbp->f_asyncwrites = mbp->f_asyncwrites;
2259	sbp->f_syncreads = mbp->f_syncreads;
2260	sbp->f_asyncreads = mbp->f_asyncreads;
2261	sbp->f_namemax = mbp->f_namemax;
2262	memcpy(sbp->f_mntonname, mp->mnt_stat.f_mntonname, MNAMELEN);
2263	memcpy(sbp->f_mntfromname, mp->mnt_stat.f_mntfromname, MNAMELEN);
2264	memcpy(sbp->f_mntfromspec, mp->mnt_stat.f_mntfromspec, MNAMELEN);
2265	memcpy(&sbp->mount_info.ufs_args, &mp->mnt_stat.mount_info.ufs_args,
2266	    sizeof(struct ufs_args));
2267}
2268