Deleted Added
full compact
vfs_export.c (23333) vfs_export.c (23382)
1/*
2 * Copyright (c) 1989, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95
1/*
2 * Copyright (c) 1989, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95
39 * $Id: vfs_subr.c,v 1.77 1997/03/02 17:53:37 bde Exp $
39 * $Id: vfs_subr.c,v 1.78 1997/03/03 12:58:20 bde Exp $
40 */
41
42/*
43 * External virtual filesystem routines
44 */
45#include "opt_ddb.h"
46#include "opt_devfs.h"
47
48#include <sys/param.h>
49#include <sys/systm.h>
50#include <sys/kernel.h>
51#include <sys/file.h>
52#include <sys/proc.h>
53#include <sys/mount.h>
54#include <sys/time.h>
55#include <sys/vnode.h>
56#include <sys/stat.h>
57#include <sys/namei.h>
58#include <sys/ucred.h>
59#include <sys/buf.h>
60#include <sys/errno.h>
61#include <sys/malloc.h>
62#include <sys/domain.h>
63#include <sys/mbuf.h>
64
65#include <vm/vm.h>
66#include <vm/vm_param.h>
67#include <vm/vm_object.h>
68#include <vm/vm_extern.h>
69#include <vm/vm_pager.h>
70#include <vm/vnode_pager.h>
71#include <sys/sysctl.h>
72
73#include <miscfs/specfs/specdev.h>
74
75#ifdef DDB
76extern void printlockedvnodes __P((void));
77#endif
78static void vclean __P((struct vnode *vp, int flags, struct proc *p));
79extern void vgonel __P((struct vnode *vp, struct proc *p));
80unsigned long numvnodes;
81extern void vfs_unmountroot __P((struct mount *rootfs));
82extern void vputrele __P((struct vnode *vp, int put));
83
84enum vtype iftovt_tab[16] = {
85 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
86 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
87};
88int vttoif_tab[9] = {
89 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
90 S_IFSOCK, S_IFIFO, S_IFMT,
91};
92
93/*
94 * Insq/Remq for the vnode usage lists.
95 */
96#define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs)
97#define bufremvn(bp) { \
98 LIST_REMOVE(bp, b_vnbufs); \
99 (bp)->b_vnbufs.le_next = NOLIST; \
100}
101TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */
102static u_long freevnodes = 0;
103
104struct mntlist mountlist; /* mounted filesystem list */
105struct simplelock mountlist_slock;
106static struct simplelock mntid_slock;
107struct simplelock mntvnode_slock;
108struct simplelock vnode_free_list_slock;
109static struct simplelock spechash_slock;
110
111int desiredvnodes;
112SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, &desiredvnodes, 0, "");
113
114static void vfs_free_addrlist __P((struct netexport *nep));
115static int vfs_free_netcred __P((struct radix_node *rn, void *w));
116static int vfs_hang_addrlist __P((struct mount *mp, struct netexport *nep,
117 struct export_args *argp));
118
119/*
120 * Initialize the vnode management data structures.
121 */
122void
123vntblinit()
124{
125
126 desiredvnodes = maxproc + vm_object_cache_max;
127 simple_lock_init(&mntvnode_slock);
128 simple_lock_init(&mntid_slock);
129 simple_lock_init(&spechash_slock);
130 TAILQ_INIT(&vnode_free_list);
131 simple_lock_init(&vnode_free_list_slock);
132 CIRCLEQ_INIT(&mountlist);
133}
134
135/*
136 * Mark a mount point as busy. Used to synchronize access and to delay
137 * unmounting. Interlock is not released on failure.
138 */
139int
140vfs_busy(mp, flags, interlkp, p)
141 struct mount *mp;
142 int flags;
143 struct simplelock *interlkp;
144 struct proc *p;
145{
146 int lkflags;
147
148 if (mp->mnt_flag & MNT_UNMOUNT) {
149 if (flags & LK_NOWAIT)
150 return (ENOENT);
151 mp->mnt_flag |= MNT_MWAIT;
152 if (interlkp) {
153 simple_unlock(interlkp);
154 }
155 /*
156 * Since all busy locks are shared except the exclusive
157 * lock granted when unmounting, the only place that a
158 * wakeup needs to be done is at the release of the
159 * exclusive lock at the end of dounmount.
160 */
161 tsleep((caddr_t)mp, PVFS, "vfs_busy", 0);
162 if (interlkp) {
163 simple_lock(interlkp);
164 }
165 return (ENOENT);
166 }
167 lkflags = LK_SHARED;
168 if (interlkp)
169 lkflags |= LK_INTERLOCK;
170 if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p))
171 panic("vfs_busy: unexpected lock failure");
172 return (0);
173}
174
175/*
176 * Free a busy filesystem.
177 */
178void
179vfs_unbusy(mp, p)
180 struct mount *mp;
181 struct proc *p;
182{
183
184 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p);
185}
186
187/*
188 * Lookup a filesystem type, and if found allocate and initialize
189 * a mount structure for it.
190 *
191 * Devname is usually updated by mount(8) after booting.
192 */
193int
194vfs_rootmountalloc(fstypename, devname, mpp)
195 char *fstypename;
196 char *devname;
197 struct mount **mpp;
198{
199 struct proc *p = curproc; /* XXX */
200 struct vfsconf *vfsp;
201 struct mount *mp;
202
203 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
204 if (!strcmp(vfsp->vfc_name, fstypename))
205 break;
206 if (vfsp == NULL)
207 return (ENODEV);
208 mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
209 bzero((char *)mp, (u_long)sizeof(struct mount));
210 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
211 (void)vfs_busy(mp, LK_NOWAIT, 0, p);
212 LIST_INIT(&mp->mnt_vnodelist);
213 mp->mnt_vfc = vfsp;
214 mp->mnt_op = vfsp->vfc_vfsops;
215 mp->mnt_flag = MNT_RDONLY;
216 mp->mnt_vnodecovered = NULLVP;
217 vfsp->vfc_refcount++;
218 mp->mnt_stat.f_type = vfsp->vfc_typenum;
219 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
220 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
221 mp->mnt_stat.f_mntonname[0] = '/';
222 mp->mnt_stat.f_mntonname[1] = 0;
223 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
224 *mpp = mp;
225 return (0);
226}
227
228/*
229 * Find an appropriate filesystem to use for the root. If a filesystem
230 * has not been preselected, walk through the list of known filesystems
231 * trying those that have mountroot routines, and try them until one
232 * works or we have tried them all.
233 */
234#ifdef notdef /* XXX JH */
235int
236lite2_vfs_mountroot(void)
237{
238 struct vfsconf *vfsp;
239 extern int (*lite2_mountroot)(void);
240 int error;
241
242 if (lite2_mountroot != NULL)
243 return ((*lite2_mountroot)());
244 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
245 if (vfsp->vfc_mountroot == NULL)
246 continue;
247 if ((error = (*vfsp->vfc_mountroot)()) == 0)
248 return (0);
249 printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error);
250 }
251 return (ENODEV);
252}
253#endif
254
255/*
256 * Lookup a mount point by filesystem identifier.
257 */
258struct mount *
259vfs_getvfs(fsid)
260 fsid_t *fsid;
261{
262 register struct mount *mp;
263
264 simple_lock(&mountlist_slock);
265 for (mp = mountlist.cqh_first; mp != (void *)&mountlist;
266 mp = mp->mnt_list.cqe_next) {
267 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
268 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
269 simple_unlock(&mountlist_slock);
270 return (mp);
271 }
272 }
273 simple_unlock(&mountlist_slock);
274 return ((struct mount *) 0);
275}
276
277/*
278 * Get a new unique fsid
279 */
280void
281vfs_getnewfsid(mp)
282 struct mount *mp;
283{
284 static u_short xxxfs_mntid;
285
286 fsid_t tfsid;
287 int mtype;
288
289 simple_lock(&mntid_slock);
290 mtype = mp->mnt_vfc->vfc_typenum;
291 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
292 mp->mnt_stat.f_fsid.val[1] = mtype;
293 if (xxxfs_mntid == 0)
294 ++xxxfs_mntid;
295 tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
296 tfsid.val[1] = mtype;
297 if (mountlist.cqh_first != (void *)&mountlist) {
298 while (vfs_getvfs(&tfsid)) {
299 tfsid.val[0]++;
300 xxxfs_mntid++;
301 }
302 }
303 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
304 simple_unlock(&mntid_slock);
305}
306
307/*
308 * Set vnode attributes to VNOVAL
309 */
310void
311vattr_null(vap)
312 register struct vattr *vap;
313{
314
315 vap->va_type = VNON;
316 vap->va_size = VNOVAL;
317 vap->va_bytes = VNOVAL;
318 vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
319 vap->va_fsid = vap->va_fileid =
320 vap->va_blocksize = vap->va_rdev =
321 vap->va_atime.tv_sec = vap->va_atime.tv_nsec =
322 vap->va_mtime.tv_sec = vap->va_mtime.tv_nsec =
323 vap->va_ctime.tv_sec = vap->va_ctime.tv_nsec =
324 vap->va_flags = vap->va_gen = VNOVAL;
325 vap->va_vaflags = 0;
326}
327
328/*
329 * Routines having to do with the management of the vnode table.
330 */
331extern vop_t **dead_vnodeop_p;
332
333/*
334 * Return the next vnode from the free list.
335 */
336int
337getnewvnode(tag, mp, vops, vpp)
338 enum vtagtype tag;
339 struct mount *mp;
340 vop_t **vops;
341 struct vnode **vpp;
342{
343 struct proc *p = curproc; /* XXX */
344 struct vnode *vp;
345
346 simple_lock(&vnode_free_list_slock);
347retry:
348 /*
349 * we allocate a new vnode if
350 * 1. we don't have any free
351 * Pretty obvious, we actually used to panic, but that
352 * is a silly thing to do.
353 * 2. we havn't filled our pool yet
354 * We don't want to trash the incore (VM-)vnodecache.
355 * 3. if less that 1/4th of our vnodes are free.
356 * We don't want to trash the namei cache either.
357 */
358 if (freevnodes < (numvnodes >> 2) ||
359 numvnodes < desiredvnodes ||
360 vnode_free_list.tqh_first == NULL) {
361 simple_unlock(&vnode_free_list_slock);
362 vp = (struct vnode *) malloc((u_long) sizeof *vp,
363 M_VNODE, M_WAITOK);
364 bzero((char *) vp, sizeof *vp);
365 numvnodes++;
366 } else {
367 for (vp = vnode_free_list.tqh_first;
368 vp != NULLVP; vp = vp->v_freelist.tqe_next) {
369 if (simple_lock_try(&vp->v_interlock))
370 break;
371 }
372 /*
373 * Unless this is a bad time of the month, at most
374 * the first NCPUS items on the free list are
375 * locked, so this is close enough to being empty.
376 */
377 if (vp == NULLVP) {
378 simple_unlock(&vnode_free_list_slock);
379 tablefull("vnode");
380 *vpp = 0;
381 return (ENFILE);
382 }
383 if (vp->v_usecount)
384 panic("free vnode isn't");
385 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
386 if (vp->v_usage > 0) {
387 simple_unlock(&vp->v_interlock);
388 --vp->v_usage;
389 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
390 goto retry;
391 }
392 freevnodes--;
393
394 /* see comment on why 0xdeadb is set at end of vgone (below) */
395 vp->v_freelist.tqe_prev = (struct vnode **) 0xdeadb;
396 simple_unlock(&vnode_free_list_slock);
397 vp->v_lease = NULL;
398 if (vp->v_type != VBAD)
399 vgonel(vp, p);
400 else {
401 simple_unlock(&vp->v_interlock);
402 }
403
404#ifdef DIAGNOSTIC
405 {
406 int s;
407
408 if (vp->v_data)
409 panic("cleaned vnode isn't");
410 s = splbio();
411 if (vp->v_numoutput)
412 panic("Clean vnode has pending I/O's");
413 splx(s);
414 }
415#endif
416 vp->v_flag = 0;
417 vp->v_lastr = 0;
418 vp->v_lastw = 0;
419 vp->v_lasta = 0;
420 vp->v_cstart = 0;
421 vp->v_clen = 0;
422 vp->v_socket = 0;
423 vp->v_writecount = 0; /* XXX */
424 vp->v_usage = 0;
425 }
426 vp->v_type = VNON;
427 cache_purge(vp);
428 vp->v_tag = tag;
429 vp->v_op = vops;
430 insmntque(vp, mp);
431 *vpp = vp;
432 vp->v_usecount = 1;
433 vp->v_data = 0;
434 return (0);
435}
436
437/*
438 * Move a vnode from one mount queue to another.
439 */
440void
441insmntque(vp, mp)
442 register struct vnode *vp;
443 register struct mount *mp;
444{
445
446 simple_lock(&mntvnode_slock);
447 /*
448 * Delete from old mount point vnode list, if on one.
449 */
450 if (vp->v_mount != NULL)
451 LIST_REMOVE(vp, v_mntvnodes);
452 /*
453 * Insert into list of vnodes for the new mount point, if available.
454 */
455 if ((vp->v_mount = mp) == NULL) {
456 simple_unlock(&mntvnode_slock);
457 return;
458 }
459 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
460 simple_unlock(&mntvnode_slock);
461}
462
463/*
464 * Update outstanding I/O count and do wakeup if requested.
465 */
466void
467vwakeup(bp)
468 register struct buf *bp;
469{
470 register struct vnode *vp;
471
472 bp->b_flags &= ~B_WRITEINPROG;
473 if ((vp = bp->b_vp)) {
474 vp->v_numoutput--;
475 if (vp->v_numoutput < 0)
476 panic("vwakeup: neg numoutput");
477 if ((vp->v_numoutput == 0) && (vp->v_flag & VBWAIT)) {
478 vp->v_flag &= ~VBWAIT;
479 wakeup((caddr_t) &vp->v_numoutput);
480 }
481 }
482}
483
484/*
485 * Flush out and invalidate all buffers associated with a vnode.
486 * Called with the underlying object locked.
487 */
488int
489vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
490 register struct vnode *vp;
491 int flags;
492 struct ucred *cred;
493 struct proc *p;
494 int slpflag, slptimeo;
495{
496 register struct buf *bp;
497 struct buf *nbp, *blist;
498 int s, error;
499 vm_object_t object;
500
501 if (flags & V_SAVE) {
502 if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)))
503 return (error);
504 if (vp->v_dirtyblkhd.lh_first != NULL)
505 panic("vinvalbuf: dirty bufs");
506 }
507
508 s = splbio();
509 for (;;) {
510 if ((blist = vp->v_cleanblkhd.lh_first) && (flags & V_SAVEMETA))
511 while (blist && blist->b_lblkno < 0)
512 blist = blist->b_vnbufs.le_next;
513 if (!blist && (blist = vp->v_dirtyblkhd.lh_first) &&
514 (flags & V_SAVEMETA))
515 while (blist && blist->b_lblkno < 0)
516 blist = blist->b_vnbufs.le_next;
517 if (!blist)
518 break;
519
520 for (bp = blist; bp; bp = nbp) {
521 nbp = bp->b_vnbufs.le_next;
522 if ((flags & V_SAVEMETA) && bp->b_lblkno < 0)
523 continue;
524 if (bp->b_flags & B_BUSY) {
525 bp->b_flags |= B_WANTED;
526 error = tsleep((caddr_t) bp,
527 slpflag | (PRIBIO + 1), "vinvalbuf",
528 slptimeo);
529 splx(s);
530 if (error)
531 return (error);
532 break;
533 }
534 bremfree(bp);
535 bp->b_flags |= B_BUSY;
536 /*
537 * XXX Since there are no node locks for NFS, I
538 * believe there is a slight chance that a delayed
539 * write will occur while sleeping just above, so
540 * check for it.
541 */
542 if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
543 (void) VOP_BWRITE(bp);
544 break;
545 }
546 bp->b_flags |= (B_INVAL|B_NOCACHE|B_RELBUF);
547 brelse(bp);
548 }
549 }
550 splx(s);
551
552 s = splbio();
553 while (vp->v_numoutput > 0) {
554 vp->v_flag |= VBWAIT;
555 tsleep(&vp->v_numoutput, PVM, "vnvlbv", 0);
556 }
557 splx(s);
558
559 /*
560 * Destroy the copy in the VM cache, too.
561 */
562 object = vp->v_object;
563 if (object != NULL) {
564 vm_object_page_remove(object, 0, object->size,
565 (flags & V_SAVE) ? TRUE : FALSE);
566 }
567 if (!(flags & V_SAVEMETA) &&
568 (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first))
569 panic("vinvalbuf: flush failed");
570 return (0);
571}
572
573/*
574 * Associate a buffer with a vnode.
575 */
576void
577bgetvp(vp, bp)
578 register struct vnode *vp;
579 register struct buf *bp;
580{
581 int s;
582
583 if (bp->b_vp)
584 panic("bgetvp: not free");
585 VHOLD(vp);
586 bp->b_vp = vp;
587 if (vp->v_type == VBLK || vp->v_type == VCHR)
588 bp->b_dev = vp->v_rdev;
589 else
590 bp->b_dev = NODEV;
591 /*
592 * Insert onto list for new vnode.
593 */
594 s = splbio();
595 bufinsvn(bp, &vp->v_cleanblkhd);
596 splx(s);
597}
598
599/*
600 * Disassociate a buffer from a vnode.
601 */
602void
603brelvp(bp)
604 register struct buf *bp;
605{
606 struct vnode *vp;
607 int s;
608
609 if (bp->b_vp == (struct vnode *) 0)
610 panic("brelvp: NULL");
611 /*
612 * Delete from old vnode list, if on one.
613 */
614 s = splbio();
615 if (bp->b_vnbufs.le_next != NOLIST)
616 bufremvn(bp);
617 splx(s);
618
619 vp = bp->b_vp;
620 bp->b_vp = (struct vnode *) 0;
621 HOLDRELE(vp);
622}
623
624/*
625 * Associate a p-buffer with a vnode.
626 */
627void
628pbgetvp(vp, bp)
629 register struct vnode *vp;
630 register struct buf *bp;
631{
632#if defined(DIAGNOSTIC)
633 if (bp->b_vp)
634 panic("pbgetvp: not free");
635#endif
636 bp->b_vp = vp;
637 if (vp->v_type == VBLK || vp->v_type == VCHR)
638 bp->b_dev = vp->v_rdev;
639 else
640 bp->b_dev = NODEV;
641}
642
643/*
644 * Disassociate a p-buffer from a vnode.
645 */
646void
647pbrelvp(bp)
648 register struct buf *bp;
649{
650 struct vnode *vp;
651
652#if defined(DIAGNOSTIC)
653 if (bp->b_vp == (struct vnode *) 0)
654 panic("pbrelvp: NULL");
655#endif
656
657 bp->b_vp = (struct vnode *) 0;
658}
659
660/*
661 * Reassign a buffer from one vnode to another.
662 * Used to assign file specific control information
663 * (indirect blocks) to the vnode to which they belong.
664 */
665void
666reassignbuf(bp, newvp)
667 register struct buf *bp;
668 register struct vnode *newvp;
669{
670 int s;
671
672 if (newvp == NULL) {
673 printf("reassignbuf: NULL");
674 return;
675 }
676
677 s = splbio();
678 /*
679 * Delete from old vnode list, if on one.
680 */
681 if (bp->b_vnbufs.le_next != NOLIST)
682 bufremvn(bp);
683 /*
684 * If dirty, put on list of dirty buffers; otherwise insert onto list
685 * of clean buffers.
686 */
687 if (bp->b_flags & B_DELWRI) {
688 struct buf *tbp;
689
690 tbp = newvp->v_dirtyblkhd.lh_first;
691 if (!tbp || (tbp->b_lblkno > bp->b_lblkno)) {
692 bufinsvn(bp, &newvp->v_dirtyblkhd);
693 } else {
694 while (tbp->b_vnbufs.le_next &&
695 (tbp->b_vnbufs.le_next->b_lblkno < bp->b_lblkno)) {
696 tbp = tbp->b_vnbufs.le_next;
697 }
698 LIST_INSERT_AFTER(tbp, bp, b_vnbufs);
699 }
700 } else {
701 bufinsvn(bp, &newvp->v_cleanblkhd);
702 }
703 splx(s);
704}
705
706#ifndef DEVFS_ROOT
707/*
708 * Create a vnode for a block device.
709 * Used for root filesystem, argdev, and swap areas.
710 * Also used for memory file system special devices.
711 */
712int
713bdevvp(dev, vpp)
714 dev_t dev;
715 struct vnode **vpp;
716{
717 register struct vnode *vp;
718 struct vnode *nvp;
719 int error;
720
721 if (dev == NODEV)
722 return (0);
723 error = getnewvnode(VT_NON, (struct mount *) 0, spec_vnodeop_p, &nvp);
724 if (error) {
725 *vpp = 0;
726 return (error);
727 }
728 vp = nvp;
729 vp->v_type = VBLK;
730 if ((nvp = checkalias(vp, dev, (struct mount *) 0))) {
731 vput(vp);
732 vp = nvp;
733 }
734 *vpp = vp;
735 return (0);
736}
737#endif /* !DEVFS_ROOT */
738
739/*
740 * Check to see if the new vnode represents a special device
741 * for which we already have a vnode (either because of
742 * bdevvp() or because of a different vnode representing
743 * the same block device). If such an alias exists, deallocate
744 * the existing contents and return the aliased vnode. The
745 * caller is responsible for filling it with its new contents.
746 */
747struct vnode *
748checkalias(nvp, nvp_rdev, mp)
749 register struct vnode *nvp;
750 dev_t nvp_rdev;
751 struct mount *mp;
752{
753 struct proc *p = curproc; /* XXX */
754 struct vnode *vp;
755 struct vnode **vpp;
756
757 if (nvp->v_type != VBLK && nvp->v_type != VCHR)
758 return (NULLVP);
759
760 vpp = &speclisth[SPECHASH(nvp_rdev)];
761loop:
762 simple_lock(&spechash_slock);
763 for (vp = *vpp; vp; vp = vp->v_specnext) {
764 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
765 continue;
766 /*
767 * Alias, but not in use, so flush it out.
768 */
769 simple_lock(&vp->v_interlock);
770 if (vp->v_usecount == 0) {
771 simple_unlock(&spechash_slock);
772 vgonel(vp, p);
773 goto loop;
774 }
775 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
776 simple_unlock(&spechash_slock);
777 goto loop;
778 }
779 break;
780 }
781 if (vp == NULL || vp->v_tag != VT_NON) {
782 MALLOC(nvp->v_specinfo, struct specinfo *,
783 sizeof(struct specinfo), M_VNODE, M_WAITOK);
784 nvp->v_rdev = nvp_rdev;
785 nvp->v_hashchain = vpp;
786 nvp->v_specnext = *vpp;
787 nvp->v_specflags = 0;
788 simple_unlock(&spechash_slock);
789 *vpp = nvp;
790 if (vp != NULLVP) {
791 nvp->v_flag |= VALIASED;
792 vp->v_flag |= VALIASED;
793 vput(vp);
794 }
795 return (NULLVP);
796 }
797 simple_unlock(&spechash_slock);
798 VOP_UNLOCK(vp, 0, p);
799 simple_lock(&vp->v_interlock);
800 vclean(vp, 0, p);
801 vp->v_op = nvp->v_op;
802 vp->v_tag = nvp->v_tag;
803 nvp->v_type = VNON;
804 insmntque(vp, mp);
805 return (vp);
806}
807
808/*
809 * Grab a particular vnode from the free list, increment its
810 * reference count and lock it. The vnode lock bit is set the
811 * vnode is being eliminated in vgone. The process is awakened
812 * when the transition is completed, and an error returned to
813 * indicate that the vnode is no longer usable (possibly having
814 * been changed to a new file system type).
815 */
816int
817vget(vp, flags, p)
818 register struct vnode *vp;
819 int flags;
820 struct proc *p;
821{
822 int error;
823
824 /*
825 * If the vnode is in the process of being cleaned out for
826 * another use, we wait for the cleaning to finish and then
827 * return failure. Cleaning is determined by checking that
828 * the VXLOCK flag is set.
829 */
830 if ((flags & LK_INTERLOCK) == 0) {
831 simple_lock(&vp->v_interlock);
832 }
833 if (vp->v_flag & VXLOCK) {
834 vp->v_flag |= VXWANT;
835 simple_unlock(&vp->v_interlock);
836 tsleep((caddr_t)vp, PINOD, "vget", 0);
837 return (ENOENT);
838 }
839 if (vp->v_usecount == 0) {
840 simple_lock(&vnode_free_list_slock);
841 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
842 simple_unlock(&vnode_free_list_slock);
843 freevnodes--;
844 }
845 vp->v_usecount++;
846 /*
847 * Create the VM object, if needed
848 */
849 if ((vp->v_type == VREG) &&
850 ((vp->v_object == NULL) ||
851 (vp->v_object->flags & OBJ_VFS_REF) == 0)) {
852 /*
853 * XXX vfs_object_create probably needs the interlock.
854 */
855 simple_unlock(&vp->v_interlock);
856 vfs_object_create(vp, curproc, curproc->p_ucred, 0);
857 simple_lock(&vp->v_interlock);
858 }
859 if (flags & LK_TYPE_MASK) {
860 if (error = vn_lock(vp, flags | LK_INTERLOCK, p))
861 vrele(vp);
862 return (error);
863 }
864 simple_unlock(&vp->v_interlock);
865 return (0);
866}
867
868/*
869 * Stubs to use when there is no locking to be done on the underlying object.
870 * A minimal shared lock is necessary to ensure that the underlying object
871 * is not revoked while an operation is in progress. So, an active shared
872 * count is maintained in an auxillary vnode lock structure.
873 */
874int
875vop_nolock(ap)
876 struct vop_lock_args /* {
877 struct vnode *a_vp;
878 int a_flags;
879 struct proc *a_p;
880 } */ *ap;
881{
882#ifdef notyet
883 /*
884 * This code cannot be used until all the non-locking filesystems
885 * (notably NFS) are converted to properly lock and release nodes.
886 * Also, certain vnode operations change the locking state within
887 * the operation (create, mknod, remove, link, rename, mkdir, rmdir,
888 * and symlink). Ideally these operations should not change the
889 * lock state, but should be changed to let the caller of the
890 * function unlock them. Otherwise all intermediate vnode layers
891 * (such as union, umapfs, etc) must catch these functions to do
892 * the necessary locking at their layer. Note that the inactive
893 * and lookup operations also change their lock state, but this
894 * cannot be avoided, so these two operations will always need
895 * to be handled in intermediate layers.
896 */
897 struct vnode *vp = ap->a_vp;
898 int vnflags, flags = ap->a_flags;
899
900 if (vp->v_vnlock == NULL) {
901 if ((flags & LK_TYPE_MASK) == LK_DRAIN)
902 return (0);
903 MALLOC(vp->v_vnlock, struct lock *, sizeof(struct lock),
904 M_VNODE, M_WAITOK);
905 lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
906 }
907 switch (flags & LK_TYPE_MASK) {
908 case LK_DRAIN:
909 vnflags = LK_DRAIN;
910 break;
911 case LK_EXCLUSIVE:
912 case LK_SHARED:
913 vnflags = LK_SHARED;
914 break;
915 case LK_UPGRADE:
916 case LK_EXCLUPGRADE:
917 case LK_DOWNGRADE:
918 return (0);
919 case LK_RELEASE:
920 default:
921 panic("vop_nolock: bad operation %d", flags & LK_TYPE_MASK);
922 }
923 if (flags & LK_INTERLOCK)
924 vnflags |= LK_INTERLOCK;
925 return(lockmgr(vp->v_vnlock, vnflags, &vp->v_interlock, ap->a_p));
926#else /* for now */
927 /*
928 * Since we are not using the lock manager, we must clear
929 * the interlock here.
930 */
931 if (ap->a_flags & LK_INTERLOCK) {
932 simple_unlock(&ap->a_vp->v_interlock);
933 }
934 return (0);
935#endif
936}
937
938/*
939 * Do the inverse of vop_nolock, handling the interlock in a compatible way.
940 */
941int
942vop_nounlock(ap)
943 struct vop_unlock_args /* {
944 struct vnode *a_vp;
945 int a_flags;
946 struct proc *a_p;
947 } */ *ap;
948{
949 struct vnode *vp = ap->a_vp;
950
951 if (vp->v_vnlock == NULL) {
952 if (ap->a_flags & LK_INTERLOCK)
953 simple_unlock(&ap->a_vp->v_interlock);
954 return (0);
955 }
956 return (lockmgr(vp->v_vnlock, LK_RELEASE | ap->a_flags,
957 &ap->a_vp->v_interlock, ap->a_p));
958}
959
960/*
961 * Return whether or not the node is in use.
962 */
963int
964vop_noislocked(ap)
965 struct vop_islocked_args /* {
966 struct vnode *a_vp;
967 } */ *ap;
968{
969 struct vnode *vp = ap->a_vp;
970
971 if (vp->v_vnlock == NULL)
972 return (0);
973 return (lockstatus(vp->v_vnlock));
974}
975
976/* #ifdef DIAGNOSTIC */
977/*
978 * Vnode reference, just increment the count
979 */
980void
981vref(vp)
982 struct vnode *vp;
983{
984 simple_lock(&vp->v_interlock);
985 if (vp->v_usecount <= 0)
986 panic("vref used where vget required");
987
988 vp->v_usecount++;
989
990 if ((vp->v_type == VREG) &&
991 ((vp->v_object == NULL) ||
992 ((vp->v_object->flags & OBJ_VFS_REF) == 0)) ) {
993 /*
994 * We need to lock to VP during the time that
995 * the object is created. This is necessary to
996 * keep the system from re-entrantly doing it
997 * multiple times.
998 * XXX vfs_object_create probably needs the interlock?
999 */
1000 simple_unlock(&vp->v_interlock);
1001 vfs_object_create(vp, curproc, curproc->p_ucred, 0);
1002 return;
1003 }
1004 simple_unlock(&vp->v_interlock);
1005}
1006
1007/*
1008 * Vnode put/release.
1009 * If count drops to zero, call inactive routine and return to freelist.
1010 */
1011void
1012vputrele(vp, put)
1013 struct vnode *vp;
1014 int put;
1015{
1016 struct proc *p = curproc; /* XXX */
1017
1018#ifdef DIAGNOSTIC
1019 if (vp == NULL)
1020 panic("vputrele: null vp");
1021#endif
1022 simple_lock(&vp->v_interlock);
1023 vp->v_usecount--;
1024
1025 if ((vp->v_usecount == 1) &&
1026 vp->v_object &&
1027 (vp->v_object->flags & OBJ_VFS_REF)) {
1028 vp->v_object->flags &= ~OBJ_VFS_REF;
1029 if (put) {
1030 VOP_UNLOCK(vp, LK_INTERLOCK, p);
1031 } else {
1032 simple_unlock(&vp->v_interlock);
1033 }
1034 vm_object_deallocate(vp->v_object);
1035 return;
1036 }
1037
1038 if (vp->v_usecount > 0) {
1039 if (put) {
1040 VOP_UNLOCK(vp, LK_INTERLOCK, p);
1041 } else {
1042 simple_unlock(&vp->v_interlock);
1043 }
1044 return;
1045 }
1046
1047 if (vp->v_usecount < 0) {
1048#ifdef DIAGNOSTIC
1049 vprint("vputrele: negative ref count", vp);
1050#endif
1051 panic("vputrele: negative ref cnt");
1052 }
1053 simple_lock(&vnode_free_list_slock);
1054 if (vp->v_flag & VAGE) {
1055 vp->v_flag &= ~VAGE;
1056 vp->v_usage = 0;
1057 if(vp->v_tag != VT_TFS)
1058 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1059 } else {
1060 if(vp->v_tag != VT_TFS)
1061 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1062 }
1063 freevnodes++;
1064 simple_unlock(&vnode_free_list_slock);
1065
1066 /*
1067 * If we are doing a vput, the node is already locked, and we must
1068 * call VOP_INACTIVE with the node locked. So, in the case of
1069 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE.
1070 */
1071 if (put) {
1072 simple_unlock(&vp->v_interlock);
1073 VOP_INACTIVE(vp, p);
1074 } else if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) {
1075 VOP_INACTIVE(vp, p);
1076 }
1077}
1078
1079/*
1080 * vput(), just unlock and vrele()
1081 */
1082void
1083vput(vp)
1084 struct vnode *vp;
1085{
1086 vputrele(vp, 1);
1087}
1088
1089void
1090vrele(vp)
1091 struct vnode *vp;
1092{
1093 vputrele(vp, 0);
1094}
1095
1096#ifdef DIAGNOSTIC
1097/*
1098 * Page or buffer structure gets a reference.
1099 */
1100void
1101vhold(vp)
1102 register struct vnode *vp;
1103{
1104
1105 simple_lock(&vp->v_interlock);
1106 vp->v_holdcnt++;
1107 simple_unlock(&vp->v_interlock);
1108}
1109
1110/*
1111 * Page or buffer structure frees a reference.
1112 */
1113void
1114holdrele(vp)
1115 register struct vnode *vp;
1116{
1117
1118 simple_lock(&vp->v_interlock);
1119 if (vp->v_holdcnt <= 0)
1120 panic("holdrele: holdcnt");
1121 vp->v_holdcnt--;
1122 simple_unlock(&vp->v_interlock);
1123}
1124#endif /* DIAGNOSTIC */
1125
1126/*
1127 * Remove any vnodes in the vnode table belonging to mount point mp.
1128 *
1129 * If MNT_NOFORCE is specified, there should not be any active ones,
1130 * return error if any are found (nb: this is a user error, not a
1131 * system error). If MNT_FORCE is specified, detach any active vnodes
1132 * that are found.
1133 */
1134#ifdef DIAGNOSTIC
1135static int busyprt = 0; /* print out busy vnodes */
1136SYSCTL_INT(_debug, 1, busyprt, CTLFLAG_RW, &busyprt, 0, "");
1137#endif
1138
1139int
1140vflush(mp, skipvp, flags)
1141 struct mount *mp;
1142 struct vnode *skipvp;
1143 int flags;
1144{
1145 struct proc *p = curproc; /* XXX */
1146 struct vnode *vp, *nvp;
1147 int busy = 0;
1148
1149 simple_lock(&mntvnode_slock);
1150loop:
1151 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
1152 /*
1153 * Make sure this vnode wasn't reclaimed in getnewvnode().
1154 * Start over if it has (it won't be on the list anymore).
1155 */
1156 if (vp->v_mount != mp)
1157 goto loop;
1158 nvp = vp->v_mntvnodes.le_next;
1159 /*
1160 * Skip over a selected vnode.
1161 */
1162 if (vp == skipvp)
1163 continue;
1164
1165 simple_lock(&vp->v_interlock);
1166 /*
1167 * Skip over a vnodes marked VSYSTEM.
1168 */
1169 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
1170 simple_unlock(&vp->v_interlock);
1171 continue;
1172 }
1173 /*
1174 * If WRITECLOSE is set, only flush out regular file vnodes
1175 * open for writing.
1176 */
1177 if ((flags & WRITECLOSE) &&
1178 (vp->v_writecount == 0 || vp->v_type != VREG)) {
1179 simple_unlock(&vp->v_interlock);
1180 continue;
1181 }
1182
1183 if (vp->v_object && (vp->v_object->flags & OBJ_VFS_REF)) {
1184 simple_unlock(&vp->v_interlock);
1185 simple_unlock(&mntvnode_slock);
1186 vm_object_reference(vp->v_object);
1187 pager_cache(vp->v_object, FALSE);
1188 vp->v_object->flags &= ~OBJ_VFS_REF;
1189 vm_object_deallocate(vp->v_object);
1190 simple_lock(&mntvnode_slock);
1191 simple_lock(&vp->v_interlock);
1192 }
1193
1194 /*
1195 * With v_usecount == 0, all we need to do is clear out the
1196 * vnode data structures and we are done.
1197 */
1198 if (vp->v_usecount == 0) {
1199 simple_unlock(&mntvnode_slock);
1200 vgonel(vp, p);
1201 simple_lock(&mntvnode_slock);
1202 continue;
1203 }
1204
1205 /*
1206 * If FORCECLOSE is set, forcibly close the vnode. For block
1207 * or character devices, revert to an anonymous device. For
1208 * all other files, just kill them.
1209 */
1210 if (flags & FORCECLOSE) {
1211 simple_unlock(&mntvnode_slock);
1212 if (vp->v_type != VBLK && vp->v_type != VCHR) {
1213 vgonel(vp, p);
1214 } else {
1215 vclean(vp, 0, p);
1216 vp->v_op = spec_vnodeop_p;
1217 insmntque(vp, (struct mount *) 0);
1218 }
1219 simple_lock(&mntvnode_slock);
1220 continue;
1221 }
1222#ifdef DIAGNOSTIC
1223 if (busyprt)
1224 vprint("vflush: busy vnode", vp);
1225#endif
1226 simple_unlock(&vp->v_interlock);
1227 busy++;
1228 }
1229 simple_unlock(&mntvnode_slock);
1230 if (busy)
1231 return (EBUSY);
1232 return (0);
1233}
1234
1235/*
1236 * Disassociate the underlying file system from a vnode.
1237 */
1238static void
1239vclean(struct vnode *vp, int flags, struct proc *p)
1240{
1241 int active;
1242
1243 /*
1244 * Check to see if the vnode is in use. If so we have to reference it
1245 * before we clean it out so that its count cannot fall to zero and
1246 * generate a race against ourselves to recycle it.
1247 */
1248 if ((active = vp->v_usecount))
1249 vp->v_usecount++;
1250 /*
1251 * Prevent the vnode from being recycled or brought into use while we
1252 * clean it out.
1253 */
1254 if (vp->v_flag & VXLOCK)
1255 panic("vclean: deadlock");
1256 vp->v_flag |= VXLOCK;
1257 /*
1258 * Even if the count is zero, the VOP_INACTIVE routine may still
1259 * have the object locked while it cleans it out. The VOP_LOCK
1260 * ensures that the VOP_INACTIVE routine is done with its work.
1261 * For active vnodes, it ensures that no other activity can
1262 * occur while the underlying object is being cleaned out.
1263 */
1264 VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p);
1265 /*
1266 * Clean out any buffers associated with the vnode.
1267 */
1268 if (flags & DOCLOSE)
1269 vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
1270 /*
1271 * If purging an active vnode, it must be closed and
1272 * deactivated before being reclaimed. Note that the
1273 * VOP_INACTIVE will unlock the vnode.
1274 */
1275 if (active) {
1276 if (flags & DOCLOSE)
1277 VOP_CLOSE(vp, IO_NDELAY, NOCRED, p);
1278 VOP_INACTIVE(vp, p);
1279 } else {
1280 /*
1281 * Any other processes trying to obtain this lock must first
1282 * wait for VXLOCK to clear, then call the new lock operation.
1283 */
1284 VOP_UNLOCK(vp, 0, p);
1285 }
1286 /*
1287 * Reclaim the vnode.
1288 */
1289 if (VOP_RECLAIM(vp, p))
1290 panic("vclean: cannot reclaim");
1291 if (active)
1292 vrele(vp);
1293 cache_purge(vp);
1294 if (vp->v_vnlock) {
1295 if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0)
1296 vprint("vclean: lock not drained", vp);
1297 FREE(vp->v_vnlock, M_VNODE);
1298 vp->v_vnlock = NULL;
1299 }
1300
1301 /*
1302 * Done with purge, notify sleepers of the grim news.
1303 */
1304 vp->v_op = dead_vnodeop_p;
1305 vp->v_tag = VT_NON;
1306 vp->v_flag &= ~VXLOCK;
1307 if (vp->v_flag & VXWANT) {
1308 vp->v_flag &= ~VXWANT;
1309 wakeup((caddr_t) vp);
1310 }
1311}
1312
1313/*
1314 * Eliminate all activity associated with the requested vnode
1315 * and with all vnodes aliased to the requested vnode.
1316 */
1317int
1318vop_revoke(ap)
1319 struct vop_revoke_args /* {
1320 struct vnode *a_vp;
1321 int a_flags;
1322 } */ *ap;
1323{
1324 struct vnode *vp, *vq;
1325 struct proc *p = curproc; /* XXX */
1326
1327#ifdef DIAGNOSTIC
1328 if ((ap->a_flags & REVOKEALL) == 0)
1329 panic("vop_revoke");
1330#endif
1331
1332 vp = ap->a_vp;
1333 simple_lock(&vp->v_interlock);
1334
1335 if (vp->v_flag & VALIASED) {
1336 /*
1337 * If a vgone (or vclean) is already in progress,
1338 * wait until it is done and return.
1339 */
1340 if (vp->v_flag & VXLOCK) {
1341 vp->v_flag |= VXWANT;
1342 simple_unlock(&vp->v_interlock);
1343 tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0);
1344 return (0);
1345 }
1346 /*
1347 * Ensure that vp will not be vgone'd while we
1348 * are eliminating its aliases.
1349 */
1350 vp->v_flag |= VXLOCK;
1351 simple_unlock(&vp->v_interlock);
1352 while (vp->v_flag & VALIASED) {
1353 simple_lock(&spechash_slock);
1354 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1355 if (vq->v_rdev != vp->v_rdev ||
1356 vq->v_type != vp->v_type || vp == vq)
1357 continue;
1358 simple_unlock(&spechash_slock);
1359 vgone(vq);
1360 break;
1361 }
1362 if (vq == NULLVP) {
1363 simple_unlock(&spechash_slock);
1364 }
1365 }
1366 /*
1367 * Remove the lock so that vgone below will
1368 * really eliminate the vnode after which time
1369 * vgone will awaken any sleepers.
1370 */
1371 simple_lock(&vp->v_interlock);
1372 vp->v_flag &= ~VXLOCK;
1373 }
1374 vgonel(vp, p);
1375 return (0);
1376}
1377
1378/*
1379 * Recycle an unused vnode to the front of the free list.
1380 * Release the passed interlock if the vnode will be recycled.
1381 */
1382int
1383vrecycle(vp, inter_lkp, p)
1384 struct vnode *vp;
1385 struct simplelock *inter_lkp;
1386 struct proc *p;
1387{
1388
1389 simple_lock(&vp->v_interlock);
1390 if (vp->v_usecount == 0) {
1391 if (inter_lkp) {
1392 simple_unlock(inter_lkp);
1393 }
1394 vgonel(vp, p);
1395 return (1);
1396 }
1397 simple_unlock(&vp->v_interlock);
1398 return (0);
1399}
1400
1401/*
1402 * Eliminate all activity associated with a vnode
1403 * in preparation for reuse.
1404 */
1405void
1406vgone(vp)
1407 register struct vnode *vp;
1408{
1409 struct proc *p = curproc; /* XXX */
1410
1411 simple_lock(&vp->v_interlock);
1412 vgonel(vp, p);
1413}
1414
1415/*
1416 * vgone, with the vp interlock held.
1417 */
1418void
1419vgonel(vp, p)
1420 struct vnode *vp;
1421 struct proc *p;
1422{
1423 struct vnode *vq;
1424 struct vnode *vx;
1425
1426 /*
1427 * If a vgone (or vclean) is already in progress,
1428 * wait until it is done and return.
1429 */
1430 if (vp->v_flag & VXLOCK) {
1431 vp->v_flag |= VXWANT;
1432 simple_unlock(&vp->v_interlock);
1433 tsleep((caddr_t)vp, PINOD, "vgone", 0);
1434 return;
1435 }
1436
1437 if (vp->v_object) {
1438 vp->v_object->flags |= OBJ_VNODE_GONE;
1439 }
1440
1441 /*
1442 * Clean out the filesystem specific data.
1443 */
1444 vclean(vp, DOCLOSE, p);
1445 /*
1446 * Delete from old mount point vnode list, if on one.
1447 */
1448 if (vp->v_mount != NULL)
1449 insmntque(vp, (struct mount *)0);
1450 /*
1451 * If special device, remove it from special device alias list
1452 * if it is on one.
1453 */
1454 if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) {
1455 simple_lock(&spechash_slock);
1456 if (*vp->v_hashchain == vp) {
1457 *vp->v_hashchain = vp->v_specnext;
1458 } else {
1459 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1460 if (vq->v_specnext != vp)
1461 continue;
1462 vq->v_specnext = vp->v_specnext;
1463 break;
1464 }
1465 if (vq == NULL)
1466 panic("missing bdev");
1467 }
1468 if (vp->v_flag & VALIASED) {
1469 vx = NULL;
1470 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1471 if (vq->v_rdev != vp->v_rdev ||
1472 vq->v_type != vp->v_type)
1473 continue;
1474 if (vx)
1475 break;
1476 vx = vq;
1477 }
1478 if (vx == NULL)
1479 panic("missing alias");
1480 if (vq == NULL)
1481 vx->v_flag &= ~VALIASED;
1482 vp->v_flag &= ~VALIASED;
1483 }
1484 simple_unlock(&spechash_slock);
1485 FREE(vp->v_specinfo, M_VNODE);
1486 vp->v_specinfo = NULL;
1487 }
1488
1489 /*
1490 * If it is on the freelist and not already at the head,
1491 * move it to the head of the list. The test of the back
1492 * pointer and the reference count of zero is because
1493 * it will be removed from the free list by getnewvnode,
1494 * but will not have its reference count incremented until
1495 * after calling vgone. If the reference count were
1496 * incremented first, vgone would (incorrectly) try to
1497 * close the previous instance of the underlying object.
1498 * So, the back pointer is explicitly set to `0xdeadb' in
1499 * getnewvnode after removing it from the freelist to ensure
1500 * that we do not try to move it here.
1501 */
1502 if (vp->v_usecount == 0) {
1503 simple_lock(&vnode_free_list_slock);
1504 if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
1505 vnode_free_list.tqh_first != vp) {
1506 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1507 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1508 }
1509 simple_unlock(&vnode_free_list_slock);
1510 }
1511
1512 vp->v_type = VBAD;
1513}
1514
1515/*
1516 * Lookup a vnode by device number.
1517 */
1518int
1519vfinddev(dev, type, vpp)
1520 dev_t dev;
1521 enum vtype type;
1522 struct vnode **vpp;
1523{
1524 register struct vnode *vp;
1525 int rc = 0;
1526
1527 simple_lock(&spechash_slock);
1528 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1529 if (dev != vp->v_rdev || type != vp->v_type)
1530 continue;
1531 *vpp = vp;
1532 rc = 1;
1533 break;
1534 }
1535 simple_unlock(&spechash_slock);
1536 return (rc);
1537}
1538
1539/*
1540 * Calculate the total number of references to a special device.
1541 */
1542int
1543vcount(vp)
1544 register struct vnode *vp;
1545{
1546 struct vnode *vq, *vnext;
1547 int count;
1548
1549loop:
1550 if ((vp->v_flag & VALIASED) == 0)
1551 return (vp->v_usecount);
1552 simple_lock(&spechash_slock);
1553 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
1554 vnext = vq->v_specnext;
1555 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
1556 continue;
1557 /*
1558 * Alias, but not in use, so flush it out.
1559 */
1560 if (vq->v_usecount == 0 && vq != vp) {
1561 simple_unlock(&spechash_slock);
1562 vgone(vq);
1563 goto loop;
1564 }
1565 count += vq->v_usecount;
1566 }
1567 simple_unlock(&spechash_slock);
1568 return (count);
1569}
1570
1571/*
1572 * Print out a description of a vnode.
1573 */
1574static char *typename[] =
1575{"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"};
1576
1577void
1578vprint(label, vp)
1579 char *label;
1580 register struct vnode *vp;
1581{
1582 char buf[64];
1583
1584 if (label != NULL)
1585 printf("%s: ", label);
1586 printf("type %s, usecount %d, writecount %d, refcount %ld,",
1587 typename[vp->v_type], vp->v_usecount, vp->v_writecount,
1588 vp->v_holdcnt);
1589 buf[0] = '\0';
1590 if (vp->v_flag & VROOT)
1591 strcat(buf, "|VROOT");
1592 if (vp->v_flag & VTEXT)
1593 strcat(buf, "|VTEXT");
1594 if (vp->v_flag & VSYSTEM)
1595 strcat(buf, "|VSYSTEM");
1596 if (vp->v_flag & VXLOCK)
1597 strcat(buf, "|VXLOCK");
1598 if (vp->v_flag & VXWANT)
1599 strcat(buf, "|VXWANT");
1600 if (vp->v_flag & VBWAIT)
1601 strcat(buf, "|VBWAIT");
1602 if (vp->v_flag & VALIASED)
1603 strcat(buf, "|VALIASED");
1604 if (buf[0] != '\0')
1605 printf(" flags (%s)", &buf[1]);
1606 if (vp->v_data == NULL) {
1607 printf("\n");
1608 } else {
1609 printf("\n\t");
1610 VOP_PRINT(vp);
1611 }
1612}
1613
1614#ifdef DDB
1615/*
1616 * List all of the locked vnodes in the system.
1617 * Called when debugging the kernel.
1618 */
1619void
1620printlockedvnodes()
1621{
1622 struct proc *p = curproc; /* XXX */
1623 struct mount *mp, *nmp;
1624 struct vnode *vp;
1625
1626 printf("Locked vnodes\n");
1627 simple_lock(&mountlist_slock);
1628 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
1629 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
1630 nmp = mp->mnt_list.cqe_next;
1631 continue;
1632 }
1633 for (vp = mp->mnt_vnodelist.lh_first;
1634 vp != NULL;
1635 vp = vp->v_mntvnodes.le_next) {
1636 if (VOP_ISLOCKED(vp))
1637 vprint((char *)0, vp);
1638 }
1639 simple_lock(&mountlist_slock);
1640 nmp = mp->mnt_list.cqe_next;
1641 vfs_unbusy(mp, p);
1642 }
1643 simple_unlock(&mountlist_slock);
1644}
1645#endif
1646
1647/*
1648 * Top level filesystem related information gathering.
1649 */
40 */
41
42/*
43 * External virtual filesystem routines
44 */
45#include "opt_ddb.h"
46#include "opt_devfs.h"
47
48#include <sys/param.h>
49#include <sys/systm.h>
50#include <sys/kernel.h>
51#include <sys/file.h>
52#include <sys/proc.h>
53#include <sys/mount.h>
54#include <sys/time.h>
55#include <sys/vnode.h>
56#include <sys/stat.h>
57#include <sys/namei.h>
58#include <sys/ucred.h>
59#include <sys/buf.h>
60#include <sys/errno.h>
61#include <sys/malloc.h>
62#include <sys/domain.h>
63#include <sys/mbuf.h>
64
65#include <vm/vm.h>
66#include <vm/vm_param.h>
67#include <vm/vm_object.h>
68#include <vm/vm_extern.h>
69#include <vm/vm_pager.h>
70#include <vm/vnode_pager.h>
71#include <sys/sysctl.h>
72
73#include <miscfs/specfs/specdev.h>
74
75#ifdef DDB
76extern void printlockedvnodes __P((void));
77#endif
78static void vclean __P((struct vnode *vp, int flags, struct proc *p));
79extern void vgonel __P((struct vnode *vp, struct proc *p));
80unsigned long numvnodes;
81extern void vfs_unmountroot __P((struct mount *rootfs));
82extern void vputrele __P((struct vnode *vp, int put));
83
84enum vtype iftovt_tab[16] = {
85 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
86 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
87};
88int vttoif_tab[9] = {
89 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
90 S_IFSOCK, S_IFIFO, S_IFMT,
91};
92
93/*
94 * Insq/Remq for the vnode usage lists.
95 */
96#define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs)
97#define bufremvn(bp) { \
98 LIST_REMOVE(bp, b_vnbufs); \
99 (bp)->b_vnbufs.le_next = NOLIST; \
100}
101TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */
102static u_long freevnodes = 0;
103
104struct mntlist mountlist; /* mounted filesystem list */
105struct simplelock mountlist_slock;
106static struct simplelock mntid_slock;
107struct simplelock mntvnode_slock;
108struct simplelock vnode_free_list_slock;
109static struct simplelock spechash_slock;
110
111int desiredvnodes;
112SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, &desiredvnodes, 0, "");
113
114static void vfs_free_addrlist __P((struct netexport *nep));
115static int vfs_free_netcred __P((struct radix_node *rn, void *w));
116static int vfs_hang_addrlist __P((struct mount *mp, struct netexport *nep,
117 struct export_args *argp));
118
119/*
120 * Initialize the vnode management data structures.
121 */
122void
123vntblinit()
124{
125
126 desiredvnodes = maxproc + vm_object_cache_max;
127 simple_lock_init(&mntvnode_slock);
128 simple_lock_init(&mntid_slock);
129 simple_lock_init(&spechash_slock);
130 TAILQ_INIT(&vnode_free_list);
131 simple_lock_init(&vnode_free_list_slock);
132 CIRCLEQ_INIT(&mountlist);
133}
134
135/*
136 * Mark a mount point as busy. Used to synchronize access and to delay
137 * unmounting. Interlock is not released on failure.
138 */
139int
140vfs_busy(mp, flags, interlkp, p)
141 struct mount *mp;
142 int flags;
143 struct simplelock *interlkp;
144 struct proc *p;
145{
146 int lkflags;
147
148 if (mp->mnt_flag & MNT_UNMOUNT) {
149 if (flags & LK_NOWAIT)
150 return (ENOENT);
151 mp->mnt_flag |= MNT_MWAIT;
152 if (interlkp) {
153 simple_unlock(interlkp);
154 }
155 /*
156 * Since all busy locks are shared except the exclusive
157 * lock granted when unmounting, the only place that a
158 * wakeup needs to be done is at the release of the
159 * exclusive lock at the end of dounmount.
160 */
161 tsleep((caddr_t)mp, PVFS, "vfs_busy", 0);
162 if (interlkp) {
163 simple_lock(interlkp);
164 }
165 return (ENOENT);
166 }
167 lkflags = LK_SHARED;
168 if (interlkp)
169 lkflags |= LK_INTERLOCK;
170 if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p))
171 panic("vfs_busy: unexpected lock failure");
172 return (0);
173}
174
175/*
176 * Free a busy filesystem.
177 */
178void
179vfs_unbusy(mp, p)
180 struct mount *mp;
181 struct proc *p;
182{
183
184 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p);
185}
186
187/*
188 * Lookup a filesystem type, and if found allocate and initialize
189 * a mount structure for it.
190 *
191 * Devname is usually updated by mount(8) after booting.
192 */
193int
194vfs_rootmountalloc(fstypename, devname, mpp)
195 char *fstypename;
196 char *devname;
197 struct mount **mpp;
198{
199 struct proc *p = curproc; /* XXX */
200 struct vfsconf *vfsp;
201 struct mount *mp;
202
203 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
204 if (!strcmp(vfsp->vfc_name, fstypename))
205 break;
206 if (vfsp == NULL)
207 return (ENODEV);
208 mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
209 bzero((char *)mp, (u_long)sizeof(struct mount));
210 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
211 (void)vfs_busy(mp, LK_NOWAIT, 0, p);
212 LIST_INIT(&mp->mnt_vnodelist);
213 mp->mnt_vfc = vfsp;
214 mp->mnt_op = vfsp->vfc_vfsops;
215 mp->mnt_flag = MNT_RDONLY;
216 mp->mnt_vnodecovered = NULLVP;
217 vfsp->vfc_refcount++;
218 mp->mnt_stat.f_type = vfsp->vfc_typenum;
219 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
220 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
221 mp->mnt_stat.f_mntonname[0] = '/';
222 mp->mnt_stat.f_mntonname[1] = 0;
223 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
224 *mpp = mp;
225 return (0);
226}
227
228/*
229 * Find an appropriate filesystem to use for the root. If a filesystem
230 * has not been preselected, walk through the list of known filesystems
231 * trying those that have mountroot routines, and try them until one
232 * works or we have tried them all.
233 */
234#ifdef notdef /* XXX JH */
235int
236lite2_vfs_mountroot(void)
237{
238 struct vfsconf *vfsp;
239 extern int (*lite2_mountroot)(void);
240 int error;
241
242 if (lite2_mountroot != NULL)
243 return ((*lite2_mountroot)());
244 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
245 if (vfsp->vfc_mountroot == NULL)
246 continue;
247 if ((error = (*vfsp->vfc_mountroot)()) == 0)
248 return (0);
249 printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error);
250 }
251 return (ENODEV);
252}
253#endif
254
255/*
256 * Lookup a mount point by filesystem identifier.
257 */
258struct mount *
259vfs_getvfs(fsid)
260 fsid_t *fsid;
261{
262 register struct mount *mp;
263
264 simple_lock(&mountlist_slock);
265 for (mp = mountlist.cqh_first; mp != (void *)&mountlist;
266 mp = mp->mnt_list.cqe_next) {
267 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
268 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
269 simple_unlock(&mountlist_slock);
270 return (mp);
271 }
272 }
273 simple_unlock(&mountlist_slock);
274 return ((struct mount *) 0);
275}
276
277/*
278 * Get a new unique fsid
279 */
280void
281vfs_getnewfsid(mp)
282 struct mount *mp;
283{
284 static u_short xxxfs_mntid;
285
286 fsid_t tfsid;
287 int mtype;
288
289 simple_lock(&mntid_slock);
290 mtype = mp->mnt_vfc->vfc_typenum;
291 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
292 mp->mnt_stat.f_fsid.val[1] = mtype;
293 if (xxxfs_mntid == 0)
294 ++xxxfs_mntid;
295 tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
296 tfsid.val[1] = mtype;
297 if (mountlist.cqh_first != (void *)&mountlist) {
298 while (vfs_getvfs(&tfsid)) {
299 tfsid.val[0]++;
300 xxxfs_mntid++;
301 }
302 }
303 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
304 simple_unlock(&mntid_slock);
305}
306
307/*
308 * Set vnode attributes to VNOVAL
309 */
310void
311vattr_null(vap)
312 register struct vattr *vap;
313{
314
315 vap->va_type = VNON;
316 vap->va_size = VNOVAL;
317 vap->va_bytes = VNOVAL;
318 vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
319 vap->va_fsid = vap->va_fileid =
320 vap->va_blocksize = vap->va_rdev =
321 vap->va_atime.tv_sec = vap->va_atime.tv_nsec =
322 vap->va_mtime.tv_sec = vap->va_mtime.tv_nsec =
323 vap->va_ctime.tv_sec = vap->va_ctime.tv_nsec =
324 vap->va_flags = vap->va_gen = VNOVAL;
325 vap->va_vaflags = 0;
326}
327
328/*
329 * Routines having to do with the management of the vnode table.
330 */
331extern vop_t **dead_vnodeop_p;
332
333/*
334 * Return the next vnode from the free list.
335 */
336int
337getnewvnode(tag, mp, vops, vpp)
338 enum vtagtype tag;
339 struct mount *mp;
340 vop_t **vops;
341 struct vnode **vpp;
342{
343 struct proc *p = curproc; /* XXX */
344 struct vnode *vp;
345
346 simple_lock(&vnode_free_list_slock);
347retry:
348 /*
349 * we allocate a new vnode if
350 * 1. we don't have any free
351 * Pretty obvious, we actually used to panic, but that
352 * is a silly thing to do.
353 * 2. we havn't filled our pool yet
354 * We don't want to trash the incore (VM-)vnodecache.
355 * 3. if less that 1/4th of our vnodes are free.
356 * We don't want to trash the namei cache either.
357 */
358 if (freevnodes < (numvnodes >> 2) ||
359 numvnodes < desiredvnodes ||
360 vnode_free_list.tqh_first == NULL) {
361 simple_unlock(&vnode_free_list_slock);
362 vp = (struct vnode *) malloc((u_long) sizeof *vp,
363 M_VNODE, M_WAITOK);
364 bzero((char *) vp, sizeof *vp);
365 numvnodes++;
366 } else {
367 for (vp = vnode_free_list.tqh_first;
368 vp != NULLVP; vp = vp->v_freelist.tqe_next) {
369 if (simple_lock_try(&vp->v_interlock))
370 break;
371 }
372 /*
373 * Unless this is a bad time of the month, at most
374 * the first NCPUS items on the free list are
375 * locked, so this is close enough to being empty.
376 */
377 if (vp == NULLVP) {
378 simple_unlock(&vnode_free_list_slock);
379 tablefull("vnode");
380 *vpp = 0;
381 return (ENFILE);
382 }
383 if (vp->v_usecount)
384 panic("free vnode isn't");
385 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
386 if (vp->v_usage > 0) {
387 simple_unlock(&vp->v_interlock);
388 --vp->v_usage;
389 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
390 goto retry;
391 }
392 freevnodes--;
393
394 /* see comment on why 0xdeadb is set at end of vgone (below) */
395 vp->v_freelist.tqe_prev = (struct vnode **) 0xdeadb;
396 simple_unlock(&vnode_free_list_slock);
397 vp->v_lease = NULL;
398 if (vp->v_type != VBAD)
399 vgonel(vp, p);
400 else {
401 simple_unlock(&vp->v_interlock);
402 }
403
404#ifdef DIAGNOSTIC
405 {
406 int s;
407
408 if (vp->v_data)
409 panic("cleaned vnode isn't");
410 s = splbio();
411 if (vp->v_numoutput)
412 panic("Clean vnode has pending I/O's");
413 splx(s);
414 }
415#endif
416 vp->v_flag = 0;
417 vp->v_lastr = 0;
418 vp->v_lastw = 0;
419 vp->v_lasta = 0;
420 vp->v_cstart = 0;
421 vp->v_clen = 0;
422 vp->v_socket = 0;
423 vp->v_writecount = 0; /* XXX */
424 vp->v_usage = 0;
425 }
426 vp->v_type = VNON;
427 cache_purge(vp);
428 vp->v_tag = tag;
429 vp->v_op = vops;
430 insmntque(vp, mp);
431 *vpp = vp;
432 vp->v_usecount = 1;
433 vp->v_data = 0;
434 return (0);
435}
436
437/*
438 * Move a vnode from one mount queue to another.
439 */
440void
441insmntque(vp, mp)
442 register struct vnode *vp;
443 register struct mount *mp;
444{
445
446 simple_lock(&mntvnode_slock);
447 /*
448 * Delete from old mount point vnode list, if on one.
449 */
450 if (vp->v_mount != NULL)
451 LIST_REMOVE(vp, v_mntvnodes);
452 /*
453 * Insert into list of vnodes for the new mount point, if available.
454 */
455 if ((vp->v_mount = mp) == NULL) {
456 simple_unlock(&mntvnode_slock);
457 return;
458 }
459 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
460 simple_unlock(&mntvnode_slock);
461}
462
463/*
464 * Update outstanding I/O count and do wakeup if requested.
465 */
466void
467vwakeup(bp)
468 register struct buf *bp;
469{
470 register struct vnode *vp;
471
472 bp->b_flags &= ~B_WRITEINPROG;
473 if ((vp = bp->b_vp)) {
474 vp->v_numoutput--;
475 if (vp->v_numoutput < 0)
476 panic("vwakeup: neg numoutput");
477 if ((vp->v_numoutput == 0) && (vp->v_flag & VBWAIT)) {
478 vp->v_flag &= ~VBWAIT;
479 wakeup((caddr_t) &vp->v_numoutput);
480 }
481 }
482}
483
484/*
485 * Flush out and invalidate all buffers associated with a vnode.
486 * Called with the underlying object locked.
487 */
488int
489vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
490 register struct vnode *vp;
491 int flags;
492 struct ucred *cred;
493 struct proc *p;
494 int slpflag, slptimeo;
495{
496 register struct buf *bp;
497 struct buf *nbp, *blist;
498 int s, error;
499 vm_object_t object;
500
501 if (flags & V_SAVE) {
502 if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)))
503 return (error);
504 if (vp->v_dirtyblkhd.lh_first != NULL)
505 panic("vinvalbuf: dirty bufs");
506 }
507
508 s = splbio();
509 for (;;) {
510 if ((blist = vp->v_cleanblkhd.lh_first) && (flags & V_SAVEMETA))
511 while (blist && blist->b_lblkno < 0)
512 blist = blist->b_vnbufs.le_next;
513 if (!blist && (blist = vp->v_dirtyblkhd.lh_first) &&
514 (flags & V_SAVEMETA))
515 while (blist && blist->b_lblkno < 0)
516 blist = blist->b_vnbufs.le_next;
517 if (!blist)
518 break;
519
520 for (bp = blist; bp; bp = nbp) {
521 nbp = bp->b_vnbufs.le_next;
522 if ((flags & V_SAVEMETA) && bp->b_lblkno < 0)
523 continue;
524 if (bp->b_flags & B_BUSY) {
525 bp->b_flags |= B_WANTED;
526 error = tsleep((caddr_t) bp,
527 slpflag | (PRIBIO + 1), "vinvalbuf",
528 slptimeo);
529 splx(s);
530 if (error)
531 return (error);
532 break;
533 }
534 bremfree(bp);
535 bp->b_flags |= B_BUSY;
536 /*
537 * XXX Since there are no node locks for NFS, I
538 * believe there is a slight chance that a delayed
539 * write will occur while sleeping just above, so
540 * check for it.
541 */
542 if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
543 (void) VOP_BWRITE(bp);
544 break;
545 }
546 bp->b_flags |= (B_INVAL|B_NOCACHE|B_RELBUF);
547 brelse(bp);
548 }
549 }
550 splx(s);
551
552 s = splbio();
553 while (vp->v_numoutput > 0) {
554 vp->v_flag |= VBWAIT;
555 tsleep(&vp->v_numoutput, PVM, "vnvlbv", 0);
556 }
557 splx(s);
558
559 /*
560 * Destroy the copy in the VM cache, too.
561 */
562 object = vp->v_object;
563 if (object != NULL) {
564 vm_object_page_remove(object, 0, object->size,
565 (flags & V_SAVE) ? TRUE : FALSE);
566 }
567 if (!(flags & V_SAVEMETA) &&
568 (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first))
569 panic("vinvalbuf: flush failed");
570 return (0);
571}
572
573/*
574 * Associate a buffer with a vnode.
575 */
576void
577bgetvp(vp, bp)
578 register struct vnode *vp;
579 register struct buf *bp;
580{
581 int s;
582
583 if (bp->b_vp)
584 panic("bgetvp: not free");
585 VHOLD(vp);
586 bp->b_vp = vp;
587 if (vp->v_type == VBLK || vp->v_type == VCHR)
588 bp->b_dev = vp->v_rdev;
589 else
590 bp->b_dev = NODEV;
591 /*
592 * Insert onto list for new vnode.
593 */
594 s = splbio();
595 bufinsvn(bp, &vp->v_cleanblkhd);
596 splx(s);
597}
598
599/*
600 * Disassociate a buffer from a vnode.
601 */
602void
603brelvp(bp)
604 register struct buf *bp;
605{
606 struct vnode *vp;
607 int s;
608
609 if (bp->b_vp == (struct vnode *) 0)
610 panic("brelvp: NULL");
611 /*
612 * Delete from old vnode list, if on one.
613 */
614 s = splbio();
615 if (bp->b_vnbufs.le_next != NOLIST)
616 bufremvn(bp);
617 splx(s);
618
619 vp = bp->b_vp;
620 bp->b_vp = (struct vnode *) 0;
621 HOLDRELE(vp);
622}
623
624/*
625 * Associate a p-buffer with a vnode.
626 */
627void
628pbgetvp(vp, bp)
629 register struct vnode *vp;
630 register struct buf *bp;
631{
632#if defined(DIAGNOSTIC)
633 if (bp->b_vp)
634 panic("pbgetvp: not free");
635#endif
636 bp->b_vp = vp;
637 if (vp->v_type == VBLK || vp->v_type == VCHR)
638 bp->b_dev = vp->v_rdev;
639 else
640 bp->b_dev = NODEV;
641}
642
643/*
644 * Disassociate a p-buffer from a vnode.
645 */
646void
647pbrelvp(bp)
648 register struct buf *bp;
649{
650 struct vnode *vp;
651
652#if defined(DIAGNOSTIC)
653 if (bp->b_vp == (struct vnode *) 0)
654 panic("pbrelvp: NULL");
655#endif
656
657 bp->b_vp = (struct vnode *) 0;
658}
659
660/*
661 * Reassign a buffer from one vnode to another.
662 * Used to assign file specific control information
663 * (indirect blocks) to the vnode to which they belong.
664 */
665void
666reassignbuf(bp, newvp)
667 register struct buf *bp;
668 register struct vnode *newvp;
669{
670 int s;
671
672 if (newvp == NULL) {
673 printf("reassignbuf: NULL");
674 return;
675 }
676
677 s = splbio();
678 /*
679 * Delete from old vnode list, if on one.
680 */
681 if (bp->b_vnbufs.le_next != NOLIST)
682 bufremvn(bp);
683 /*
684 * If dirty, put on list of dirty buffers; otherwise insert onto list
685 * of clean buffers.
686 */
687 if (bp->b_flags & B_DELWRI) {
688 struct buf *tbp;
689
690 tbp = newvp->v_dirtyblkhd.lh_first;
691 if (!tbp || (tbp->b_lblkno > bp->b_lblkno)) {
692 bufinsvn(bp, &newvp->v_dirtyblkhd);
693 } else {
694 while (tbp->b_vnbufs.le_next &&
695 (tbp->b_vnbufs.le_next->b_lblkno < bp->b_lblkno)) {
696 tbp = tbp->b_vnbufs.le_next;
697 }
698 LIST_INSERT_AFTER(tbp, bp, b_vnbufs);
699 }
700 } else {
701 bufinsvn(bp, &newvp->v_cleanblkhd);
702 }
703 splx(s);
704}
705
706#ifndef DEVFS_ROOT
707/*
708 * Create a vnode for a block device.
709 * Used for root filesystem, argdev, and swap areas.
710 * Also used for memory file system special devices.
711 */
712int
713bdevvp(dev, vpp)
714 dev_t dev;
715 struct vnode **vpp;
716{
717 register struct vnode *vp;
718 struct vnode *nvp;
719 int error;
720
721 if (dev == NODEV)
722 return (0);
723 error = getnewvnode(VT_NON, (struct mount *) 0, spec_vnodeop_p, &nvp);
724 if (error) {
725 *vpp = 0;
726 return (error);
727 }
728 vp = nvp;
729 vp->v_type = VBLK;
730 if ((nvp = checkalias(vp, dev, (struct mount *) 0))) {
731 vput(vp);
732 vp = nvp;
733 }
734 *vpp = vp;
735 return (0);
736}
737#endif /* !DEVFS_ROOT */
738
739/*
740 * Check to see if the new vnode represents a special device
741 * for which we already have a vnode (either because of
742 * bdevvp() or because of a different vnode representing
743 * the same block device). If such an alias exists, deallocate
744 * the existing contents and return the aliased vnode. The
745 * caller is responsible for filling it with its new contents.
746 */
747struct vnode *
748checkalias(nvp, nvp_rdev, mp)
749 register struct vnode *nvp;
750 dev_t nvp_rdev;
751 struct mount *mp;
752{
753 struct proc *p = curproc; /* XXX */
754 struct vnode *vp;
755 struct vnode **vpp;
756
757 if (nvp->v_type != VBLK && nvp->v_type != VCHR)
758 return (NULLVP);
759
760 vpp = &speclisth[SPECHASH(nvp_rdev)];
761loop:
762 simple_lock(&spechash_slock);
763 for (vp = *vpp; vp; vp = vp->v_specnext) {
764 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
765 continue;
766 /*
767 * Alias, but not in use, so flush it out.
768 */
769 simple_lock(&vp->v_interlock);
770 if (vp->v_usecount == 0) {
771 simple_unlock(&spechash_slock);
772 vgonel(vp, p);
773 goto loop;
774 }
775 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
776 simple_unlock(&spechash_slock);
777 goto loop;
778 }
779 break;
780 }
781 if (vp == NULL || vp->v_tag != VT_NON) {
782 MALLOC(nvp->v_specinfo, struct specinfo *,
783 sizeof(struct specinfo), M_VNODE, M_WAITOK);
784 nvp->v_rdev = nvp_rdev;
785 nvp->v_hashchain = vpp;
786 nvp->v_specnext = *vpp;
787 nvp->v_specflags = 0;
788 simple_unlock(&spechash_slock);
789 *vpp = nvp;
790 if (vp != NULLVP) {
791 nvp->v_flag |= VALIASED;
792 vp->v_flag |= VALIASED;
793 vput(vp);
794 }
795 return (NULLVP);
796 }
797 simple_unlock(&spechash_slock);
798 VOP_UNLOCK(vp, 0, p);
799 simple_lock(&vp->v_interlock);
800 vclean(vp, 0, p);
801 vp->v_op = nvp->v_op;
802 vp->v_tag = nvp->v_tag;
803 nvp->v_type = VNON;
804 insmntque(vp, mp);
805 return (vp);
806}
807
808/*
809 * Grab a particular vnode from the free list, increment its
810 * reference count and lock it. The vnode lock bit is set the
811 * vnode is being eliminated in vgone. The process is awakened
812 * when the transition is completed, and an error returned to
813 * indicate that the vnode is no longer usable (possibly having
814 * been changed to a new file system type).
815 */
816int
817vget(vp, flags, p)
818 register struct vnode *vp;
819 int flags;
820 struct proc *p;
821{
822 int error;
823
824 /*
825 * If the vnode is in the process of being cleaned out for
826 * another use, we wait for the cleaning to finish and then
827 * return failure. Cleaning is determined by checking that
828 * the VXLOCK flag is set.
829 */
830 if ((flags & LK_INTERLOCK) == 0) {
831 simple_lock(&vp->v_interlock);
832 }
833 if (vp->v_flag & VXLOCK) {
834 vp->v_flag |= VXWANT;
835 simple_unlock(&vp->v_interlock);
836 tsleep((caddr_t)vp, PINOD, "vget", 0);
837 return (ENOENT);
838 }
839 if (vp->v_usecount == 0) {
840 simple_lock(&vnode_free_list_slock);
841 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
842 simple_unlock(&vnode_free_list_slock);
843 freevnodes--;
844 }
845 vp->v_usecount++;
846 /*
847 * Create the VM object, if needed
848 */
849 if ((vp->v_type == VREG) &&
850 ((vp->v_object == NULL) ||
851 (vp->v_object->flags & OBJ_VFS_REF) == 0)) {
852 /*
853 * XXX vfs_object_create probably needs the interlock.
854 */
855 simple_unlock(&vp->v_interlock);
856 vfs_object_create(vp, curproc, curproc->p_ucred, 0);
857 simple_lock(&vp->v_interlock);
858 }
859 if (flags & LK_TYPE_MASK) {
860 if (error = vn_lock(vp, flags | LK_INTERLOCK, p))
861 vrele(vp);
862 return (error);
863 }
864 simple_unlock(&vp->v_interlock);
865 return (0);
866}
867
868/*
869 * Stubs to use when there is no locking to be done on the underlying object.
870 * A minimal shared lock is necessary to ensure that the underlying object
871 * is not revoked while an operation is in progress. So, an active shared
872 * count is maintained in an auxillary vnode lock structure.
873 */
874int
875vop_nolock(ap)
876 struct vop_lock_args /* {
877 struct vnode *a_vp;
878 int a_flags;
879 struct proc *a_p;
880 } */ *ap;
881{
882#ifdef notyet
883 /*
884 * This code cannot be used until all the non-locking filesystems
885 * (notably NFS) are converted to properly lock and release nodes.
886 * Also, certain vnode operations change the locking state within
887 * the operation (create, mknod, remove, link, rename, mkdir, rmdir,
888 * and symlink). Ideally these operations should not change the
889 * lock state, but should be changed to let the caller of the
890 * function unlock them. Otherwise all intermediate vnode layers
891 * (such as union, umapfs, etc) must catch these functions to do
892 * the necessary locking at their layer. Note that the inactive
893 * and lookup operations also change their lock state, but this
894 * cannot be avoided, so these two operations will always need
895 * to be handled in intermediate layers.
896 */
897 struct vnode *vp = ap->a_vp;
898 int vnflags, flags = ap->a_flags;
899
900 if (vp->v_vnlock == NULL) {
901 if ((flags & LK_TYPE_MASK) == LK_DRAIN)
902 return (0);
903 MALLOC(vp->v_vnlock, struct lock *, sizeof(struct lock),
904 M_VNODE, M_WAITOK);
905 lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
906 }
907 switch (flags & LK_TYPE_MASK) {
908 case LK_DRAIN:
909 vnflags = LK_DRAIN;
910 break;
911 case LK_EXCLUSIVE:
912 case LK_SHARED:
913 vnflags = LK_SHARED;
914 break;
915 case LK_UPGRADE:
916 case LK_EXCLUPGRADE:
917 case LK_DOWNGRADE:
918 return (0);
919 case LK_RELEASE:
920 default:
921 panic("vop_nolock: bad operation %d", flags & LK_TYPE_MASK);
922 }
923 if (flags & LK_INTERLOCK)
924 vnflags |= LK_INTERLOCK;
925 return(lockmgr(vp->v_vnlock, vnflags, &vp->v_interlock, ap->a_p));
926#else /* for now */
927 /*
928 * Since we are not using the lock manager, we must clear
929 * the interlock here.
930 */
931 if (ap->a_flags & LK_INTERLOCK) {
932 simple_unlock(&ap->a_vp->v_interlock);
933 }
934 return (0);
935#endif
936}
937
938/*
939 * Do the inverse of vop_nolock, handling the interlock in a compatible way.
940 */
941int
942vop_nounlock(ap)
943 struct vop_unlock_args /* {
944 struct vnode *a_vp;
945 int a_flags;
946 struct proc *a_p;
947 } */ *ap;
948{
949 struct vnode *vp = ap->a_vp;
950
951 if (vp->v_vnlock == NULL) {
952 if (ap->a_flags & LK_INTERLOCK)
953 simple_unlock(&ap->a_vp->v_interlock);
954 return (0);
955 }
956 return (lockmgr(vp->v_vnlock, LK_RELEASE | ap->a_flags,
957 &ap->a_vp->v_interlock, ap->a_p));
958}
959
960/*
961 * Return whether or not the node is in use.
962 */
963int
964vop_noislocked(ap)
965 struct vop_islocked_args /* {
966 struct vnode *a_vp;
967 } */ *ap;
968{
969 struct vnode *vp = ap->a_vp;
970
971 if (vp->v_vnlock == NULL)
972 return (0);
973 return (lockstatus(vp->v_vnlock));
974}
975
976/* #ifdef DIAGNOSTIC */
977/*
978 * Vnode reference, just increment the count
979 */
980void
981vref(vp)
982 struct vnode *vp;
983{
984 simple_lock(&vp->v_interlock);
985 if (vp->v_usecount <= 0)
986 panic("vref used where vget required");
987
988 vp->v_usecount++;
989
990 if ((vp->v_type == VREG) &&
991 ((vp->v_object == NULL) ||
992 ((vp->v_object->flags & OBJ_VFS_REF) == 0)) ) {
993 /*
994 * We need to lock to VP during the time that
995 * the object is created. This is necessary to
996 * keep the system from re-entrantly doing it
997 * multiple times.
998 * XXX vfs_object_create probably needs the interlock?
999 */
1000 simple_unlock(&vp->v_interlock);
1001 vfs_object_create(vp, curproc, curproc->p_ucred, 0);
1002 return;
1003 }
1004 simple_unlock(&vp->v_interlock);
1005}
1006
1007/*
1008 * Vnode put/release.
1009 * If count drops to zero, call inactive routine and return to freelist.
1010 */
1011void
1012vputrele(vp, put)
1013 struct vnode *vp;
1014 int put;
1015{
1016 struct proc *p = curproc; /* XXX */
1017
1018#ifdef DIAGNOSTIC
1019 if (vp == NULL)
1020 panic("vputrele: null vp");
1021#endif
1022 simple_lock(&vp->v_interlock);
1023 vp->v_usecount--;
1024
1025 if ((vp->v_usecount == 1) &&
1026 vp->v_object &&
1027 (vp->v_object->flags & OBJ_VFS_REF)) {
1028 vp->v_object->flags &= ~OBJ_VFS_REF;
1029 if (put) {
1030 VOP_UNLOCK(vp, LK_INTERLOCK, p);
1031 } else {
1032 simple_unlock(&vp->v_interlock);
1033 }
1034 vm_object_deallocate(vp->v_object);
1035 return;
1036 }
1037
1038 if (vp->v_usecount > 0) {
1039 if (put) {
1040 VOP_UNLOCK(vp, LK_INTERLOCK, p);
1041 } else {
1042 simple_unlock(&vp->v_interlock);
1043 }
1044 return;
1045 }
1046
1047 if (vp->v_usecount < 0) {
1048#ifdef DIAGNOSTIC
1049 vprint("vputrele: negative ref count", vp);
1050#endif
1051 panic("vputrele: negative ref cnt");
1052 }
1053 simple_lock(&vnode_free_list_slock);
1054 if (vp->v_flag & VAGE) {
1055 vp->v_flag &= ~VAGE;
1056 vp->v_usage = 0;
1057 if(vp->v_tag != VT_TFS)
1058 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1059 } else {
1060 if(vp->v_tag != VT_TFS)
1061 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1062 }
1063 freevnodes++;
1064 simple_unlock(&vnode_free_list_slock);
1065
1066 /*
1067 * If we are doing a vput, the node is already locked, and we must
1068 * call VOP_INACTIVE with the node locked. So, in the case of
1069 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE.
1070 */
1071 if (put) {
1072 simple_unlock(&vp->v_interlock);
1073 VOP_INACTIVE(vp, p);
1074 } else if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) {
1075 VOP_INACTIVE(vp, p);
1076 }
1077}
1078
1079/*
1080 * vput(), just unlock and vrele()
1081 */
1082void
1083vput(vp)
1084 struct vnode *vp;
1085{
1086 vputrele(vp, 1);
1087}
1088
1089void
1090vrele(vp)
1091 struct vnode *vp;
1092{
1093 vputrele(vp, 0);
1094}
1095
1096#ifdef DIAGNOSTIC
1097/*
1098 * Page or buffer structure gets a reference.
1099 */
1100void
1101vhold(vp)
1102 register struct vnode *vp;
1103{
1104
1105 simple_lock(&vp->v_interlock);
1106 vp->v_holdcnt++;
1107 simple_unlock(&vp->v_interlock);
1108}
1109
1110/*
1111 * Page or buffer structure frees a reference.
1112 */
1113void
1114holdrele(vp)
1115 register struct vnode *vp;
1116{
1117
1118 simple_lock(&vp->v_interlock);
1119 if (vp->v_holdcnt <= 0)
1120 panic("holdrele: holdcnt");
1121 vp->v_holdcnt--;
1122 simple_unlock(&vp->v_interlock);
1123}
1124#endif /* DIAGNOSTIC */
1125
1126/*
1127 * Remove any vnodes in the vnode table belonging to mount point mp.
1128 *
1129 * If MNT_NOFORCE is specified, there should not be any active ones,
1130 * return error if any are found (nb: this is a user error, not a
1131 * system error). If MNT_FORCE is specified, detach any active vnodes
1132 * that are found.
1133 */
1134#ifdef DIAGNOSTIC
1135static int busyprt = 0; /* print out busy vnodes */
1136SYSCTL_INT(_debug, 1, busyprt, CTLFLAG_RW, &busyprt, 0, "");
1137#endif
1138
1139int
1140vflush(mp, skipvp, flags)
1141 struct mount *mp;
1142 struct vnode *skipvp;
1143 int flags;
1144{
1145 struct proc *p = curproc; /* XXX */
1146 struct vnode *vp, *nvp;
1147 int busy = 0;
1148
1149 simple_lock(&mntvnode_slock);
1150loop:
1151 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
1152 /*
1153 * Make sure this vnode wasn't reclaimed in getnewvnode().
1154 * Start over if it has (it won't be on the list anymore).
1155 */
1156 if (vp->v_mount != mp)
1157 goto loop;
1158 nvp = vp->v_mntvnodes.le_next;
1159 /*
1160 * Skip over a selected vnode.
1161 */
1162 if (vp == skipvp)
1163 continue;
1164
1165 simple_lock(&vp->v_interlock);
1166 /*
1167 * Skip over a vnodes marked VSYSTEM.
1168 */
1169 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
1170 simple_unlock(&vp->v_interlock);
1171 continue;
1172 }
1173 /*
1174 * If WRITECLOSE is set, only flush out regular file vnodes
1175 * open for writing.
1176 */
1177 if ((flags & WRITECLOSE) &&
1178 (vp->v_writecount == 0 || vp->v_type != VREG)) {
1179 simple_unlock(&vp->v_interlock);
1180 continue;
1181 }
1182
1183 if (vp->v_object && (vp->v_object->flags & OBJ_VFS_REF)) {
1184 simple_unlock(&vp->v_interlock);
1185 simple_unlock(&mntvnode_slock);
1186 vm_object_reference(vp->v_object);
1187 pager_cache(vp->v_object, FALSE);
1188 vp->v_object->flags &= ~OBJ_VFS_REF;
1189 vm_object_deallocate(vp->v_object);
1190 simple_lock(&mntvnode_slock);
1191 simple_lock(&vp->v_interlock);
1192 }
1193
1194 /*
1195 * With v_usecount == 0, all we need to do is clear out the
1196 * vnode data structures and we are done.
1197 */
1198 if (vp->v_usecount == 0) {
1199 simple_unlock(&mntvnode_slock);
1200 vgonel(vp, p);
1201 simple_lock(&mntvnode_slock);
1202 continue;
1203 }
1204
1205 /*
1206 * If FORCECLOSE is set, forcibly close the vnode. For block
1207 * or character devices, revert to an anonymous device. For
1208 * all other files, just kill them.
1209 */
1210 if (flags & FORCECLOSE) {
1211 simple_unlock(&mntvnode_slock);
1212 if (vp->v_type != VBLK && vp->v_type != VCHR) {
1213 vgonel(vp, p);
1214 } else {
1215 vclean(vp, 0, p);
1216 vp->v_op = spec_vnodeop_p;
1217 insmntque(vp, (struct mount *) 0);
1218 }
1219 simple_lock(&mntvnode_slock);
1220 continue;
1221 }
1222#ifdef DIAGNOSTIC
1223 if (busyprt)
1224 vprint("vflush: busy vnode", vp);
1225#endif
1226 simple_unlock(&vp->v_interlock);
1227 busy++;
1228 }
1229 simple_unlock(&mntvnode_slock);
1230 if (busy)
1231 return (EBUSY);
1232 return (0);
1233}
1234
1235/*
1236 * Disassociate the underlying file system from a vnode.
1237 */
1238static void
1239vclean(struct vnode *vp, int flags, struct proc *p)
1240{
1241 int active;
1242
1243 /*
1244 * Check to see if the vnode is in use. If so we have to reference it
1245 * before we clean it out so that its count cannot fall to zero and
1246 * generate a race against ourselves to recycle it.
1247 */
1248 if ((active = vp->v_usecount))
1249 vp->v_usecount++;
1250 /*
1251 * Prevent the vnode from being recycled or brought into use while we
1252 * clean it out.
1253 */
1254 if (vp->v_flag & VXLOCK)
1255 panic("vclean: deadlock");
1256 vp->v_flag |= VXLOCK;
1257 /*
1258 * Even if the count is zero, the VOP_INACTIVE routine may still
1259 * have the object locked while it cleans it out. The VOP_LOCK
1260 * ensures that the VOP_INACTIVE routine is done with its work.
1261 * For active vnodes, it ensures that no other activity can
1262 * occur while the underlying object is being cleaned out.
1263 */
1264 VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p);
1265 /*
1266 * Clean out any buffers associated with the vnode.
1267 */
1268 if (flags & DOCLOSE)
1269 vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
1270 /*
1271 * If purging an active vnode, it must be closed and
1272 * deactivated before being reclaimed. Note that the
1273 * VOP_INACTIVE will unlock the vnode.
1274 */
1275 if (active) {
1276 if (flags & DOCLOSE)
1277 VOP_CLOSE(vp, IO_NDELAY, NOCRED, p);
1278 VOP_INACTIVE(vp, p);
1279 } else {
1280 /*
1281 * Any other processes trying to obtain this lock must first
1282 * wait for VXLOCK to clear, then call the new lock operation.
1283 */
1284 VOP_UNLOCK(vp, 0, p);
1285 }
1286 /*
1287 * Reclaim the vnode.
1288 */
1289 if (VOP_RECLAIM(vp, p))
1290 panic("vclean: cannot reclaim");
1291 if (active)
1292 vrele(vp);
1293 cache_purge(vp);
1294 if (vp->v_vnlock) {
1295 if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0)
1296 vprint("vclean: lock not drained", vp);
1297 FREE(vp->v_vnlock, M_VNODE);
1298 vp->v_vnlock = NULL;
1299 }
1300
1301 /*
1302 * Done with purge, notify sleepers of the grim news.
1303 */
1304 vp->v_op = dead_vnodeop_p;
1305 vp->v_tag = VT_NON;
1306 vp->v_flag &= ~VXLOCK;
1307 if (vp->v_flag & VXWANT) {
1308 vp->v_flag &= ~VXWANT;
1309 wakeup((caddr_t) vp);
1310 }
1311}
1312
1313/*
1314 * Eliminate all activity associated with the requested vnode
1315 * and with all vnodes aliased to the requested vnode.
1316 */
1317int
1318vop_revoke(ap)
1319 struct vop_revoke_args /* {
1320 struct vnode *a_vp;
1321 int a_flags;
1322 } */ *ap;
1323{
1324 struct vnode *vp, *vq;
1325 struct proc *p = curproc; /* XXX */
1326
1327#ifdef DIAGNOSTIC
1328 if ((ap->a_flags & REVOKEALL) == 0)
1329 panic("vop_revoke");
1330#endif
1331
1332 vp = ap->a_vp;
1333 simple_lock(&vp->v_interlock);
1334
1335 if (vp->v_flag & VALIASED) {
1336 /*
1337 * If a vgone (or vclean) is already in progress,
1338 * wait until it is done and return.
1339 */
1340 if (vp->v_flag & VXLOCK) {
1341 vp->v_flag |= VXWANT;
1342 simple_unlock(&vp->v_interlock);
1343 tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0);
1344 return (0);
1345 }
1346 /*
1347 * Ensure that vp will not be vgone'd while we
1348 * are eliminating its aliases.
1349 */
1350 vp->v_flag |= VXLOCK;
1351 simple_unlock(&vp->v_interlock);
1352 while (vp->v_flag & VALIASED) {
1353 simple_lock(&spechash_slock);
1354 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1355 if (vq->v_rdev != vp->v_rdev ||
1356 vq->v_type != vp->v_type || vp == vq)
1357 continue;
1358 simple_unlock(&spechash_slock);
1359 vgone(vq);
1360 break;
1361 }
1362 if (vq == NULLVP) {
1363 simple_unlock(&spechash_slock);
1364 }
1365 }
1366 /*
1367 * Remove the lock so that vgone below will
1368 * really eliminate the vnode after which time
1369 * vgone will awaken any sleepers.
1370 */
1371 simple_lock(&vp->v_interlock);
1372 vp->v_flag &= ~VXLOCK;
1373 }
1374 vgonel(vp, p);
1375 return (0);
1376}
1377
1378/*
1379 * Recycle an unused vnode to the front of the free list.
1380 * Release the passed interlock if the vnode will be recycled.
1381 */
1382int
1383vrecycle(vp, inter_lkp, p)
1384 struct vnode *vp;
1385 struct simplelock *inter_lkp;
1386 struct proc *p;
1387{
1388
1389 simple_lock(&vp->v_interlock);
1390 if (vp->v_usecount == 0) {
1391 if (inter_lkp) {
1392 simple_unlock(inter_lkp);
1393 }
1394 vgonel(vp, p);
1395 return (1);
1396 }
1397 simple_unlock(&vp->v_interlock);
1398 return (0);
1399}
1400
1401/*
1402 * Eliminate all activity associated with a vnode
1403 * in preparation for reuse.
1404 */
1405void
1406vgone(vp)
1407 register struct vnode *vp;
1408{
1409 struct proc *p = curproc; /* XXX */
1410
1411 simple_lock(&vp->v_interlock);
1412 vgonel(vp, p);
1413}
1414
1415/*
1416 * vgone, with the vp interlock held.
1417 */
1418void
1419vgonel(vp, p)
1420 struct vnode *vp;
1421 struct proc *p;
1422{
1423 struct vnode *vq;
1424 struct vnode *vx;
1425
1426 /*
1427 * If a vgone (or vclean) is already in progress,
1428 * wait until it is done and return.
1429 */
1430 if (vp->v_flag & VXLOCK) {
1431 vp->v_flag |= VXWANT;
1432 simple_unlock(&vp->v_interlock);
1433 tsleep((caddr_t)vp, PINOD, "vgone", 0);
1434 return;
1435 }
1436
1437 if (vp->v_object) {
1438 vp->v_object->flags |= OBJ_VNODE_GONE;
1439 }
1440
1441 /*
1442 * Clean out the filesystem specific data.
1443 */
1444 vclean(vp, DOCLOSE, p);
1445 /*
1446 * Delete from old mount point vnode list, if on one.
1447 */
1448 if (vp->v_mount != NULL)
1449 insmntque(vp, (struct mount *)0);
1450 /*
1451 * If special device, remove it from special device alias list
1452 * if it is on one.
1453 */
1454 if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) {
1455 simple_lock(&spechash_slock);
1456 if (*vp->v_hashchain == vp) {
1457 *vp->v_hashchain = vp->v_specnext;
1458 } else {
1459 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1460 if (vq->v_specnext != vp)
1461 continue;
1462 vq->v_specnext = vp->v_specnext;
1463 break;
1464 }
1465 if (vq == NULL)
1466 panic("missing bdev");
1467 }
1468 if (vp->v_flag & VALIASED) {
1469 vx = NULL;
1470 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1471 if (vq->v_rdev != vp->v_rdev ||
1472 vq->v_type != vp->v_type)
1473 continue;
1474 if (vx)
1475 break;
1476 vx = vq;
1477 }
1478 if (vx == NULL)
1479 panic("missing alias");
1480 if (vq == NULL)
1481 vx->v_flag &= ~VALIASED;
1482 vp->v_flag &= ~VALIASED;
1483 }
1484 simple_unlock(&spechash_slock);
1485 FREE(vp->v_specinfo, M_VNODE);
1486 vp->v_specinfo = NULL;
1487 }
1488
1489 /*
1490 * If it is on the freelist and not already at the head,
1491 * move it to the head of the list. The test of the back
1492 * pointer and the reference count of zero is because
1493 * it will be removed from the free list by getnewvnode,
1494 * but will not have its reference count incremented until
1495 * after calling vgone. If the reference count were
1496 * incremented first, vgone would (incorrectly) try to
1497 * close the previous instance of the underlying object.
1498 * So, the back pointer is explicitly set to `0xdeadb' in
1499 * getnewvnode after removing it from the freelist to ensure
1500 * that we do not try to move it here.
1501 */
1502 if (vp->v_usecount == 0) {
1503 simple_lock(&vnode_free_list_slock);
1504 if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
1505 vnode_free_list.tqh_first != vp) {
1506 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1507 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1508 }
1509 simple_unlock(&vnode_free_list_slock);
1510 }
1511
1512 vp->v_type = VBAD;
1513}
1514
1515/*
1516 * Lookup a vnode by device number.
1517 */
1518int
1519vfinddev(dev, type, vpp)
1520 dev_t dev;
1521 enum vtype type;
1522 struct vnode **vpp;
1523{
1524 register struct vnode *vp;
1525 int rc = 0;
1526
1527 simple_lock(&spechash_slock);
1528 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1529 if (dev != vp->v_rdev || type != vp->v_type)
1530 continue;
1531 *vpp = vp;
1532 rc = 1;
1533 break;
1534 }
1535 simple_unlock(&spechash_slock);
1536 return (rc);
1537}
1538
1539/*
1540 * Calculate the total number of references to a special device.
1541 */
1542int
1543vcount(vp)
1544 register struct vnode *vp;
1545{
1546 struct vnode *vq, *vnext;
1547 int count;
1548
1549loop:
1550 if ((vp->v_flag & VALIASED) == 0)
1551 return (vp->v_usecount);
1552 simple_lock(&spechash_slock);
1553 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
1554 vnext = vq->v_specnext;
1555 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
1556 continue;
1557 /*
1558 * Alias, but not in use, so flush it out.
1559 */
1560 if (vq->v_usecount == 0 && vq != vp) {
1561 simple_unlock(&spechash_slock);
1562 vgone(vq);
1563 goto loop;
1564 }
1565 count += vq->v_usecount;
1566 }
1567 simple_unlock(&spechash_slock);
1568 return (count);
1569}
1570
1571/*
1572 * Print out a description of a vnode.
1573 */
1574static char *typename[] =
1575{"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"};
1576
1577void
1578vprint(label, vp)
1579 char *label;
1580 register struct vnode *vp;
1581{
1582 char buf[64];
1583
1584 if (label != NULL)
1585 printf("%s: ", label);
1586 printf("type %s, usecount %d, writecount %d, refcount %ld,",
1587 typename[vp->v_type], vp->v_usecount, vp->v_writecount,
1588 vp->v_holdcnt);
1589 buf[0] = '\0';
1590 if (vp->v_flag & VROOT)
1591 strcat(buf, "|VROOT");
1592 if (vp->v_flag & VTEXT)
1593 strcat(buf, "|VTEXT");
1594 if (vp->v_flag & VSYSTEM)
1595 strcat(buf, "|VSYSTEM");
1596 if (vp->v_flag & VXLOCK)
1597 strcat(buf, "|VXLOCK");
1598 if (vp->v_flag & VXWANT)
1599 strcat(buf, "|VXWANT");
1600 if (vp->v_flag & VBWAIT)
1601 strcat(buf, "|VBWAIT");
1602 if (vp->v_flag & VALIASED)
1603 strcat(buf, "|VALIASED");
1604 if (buf[0] != '\0')
1605 printf(" flags (%s)", &buf[1]);
1606 if (vp->v_data == NULL) {
1607 printf("\n");
1608 } else {
1609 printf("\n\t");
1610 VOP_PRINT(vp);
1611 }
1612}
1613
1614#ifdef DDB
1615/*
1616 * List all of the locked vnodes in the system.
1617 * Called when debugging the kernel.
1618 */
1619void
1620printlockedvnodes()
1621{
1622 struct proc *p = curproc; /* XXX */
1623 struct mount *mp, *nmp;
1624 struct vnode *vp;
1625
1626 printf("Locked vnodes\n");
1627 simple_lock(&mountlist_slock);
1628 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
1629 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
1630 nmp = mp->mnt_list.cqe_next;
1631 continue;
1632 }
1633 for (vp = mp->mnt_vnodelist.lh_first;
1634 vp != NULL;
1635 vp = vp->v_mntvnodes.le_next) {
1636 if (VOP_ISLOCKED(vp))
1637 vprint((char *)0, vp);
1638 }
1639 simple_lock(&mountlist_slock);
1640 nmp = mp->mnt_list.cqe_next;
1641 vfs_unbusy(mp, p);
1642 }
1643 simple_unlock(&mountlist_slock);
1644}
1645#endif
1646
1647/*
1648 * Top level filesystem related information gathering.
1649 */
1650extern int vfs_sysctl __P(SYSCTL_HANDLER_ARGS);
1651static int sysctl_ovfs_conf __P(SYSCTL_HANDLER_ARGS);
1652
1650static int sysctl_ovfs_conf __P(SYSCTL_HANDLER_ARGS);
1651
1653int
1652static int
1654vfs_sysctl SYSCTL_HANDLER_ARGS
1655{
1653vfs_sysctl SYSCTL_HANDLER_ARGS
1654{
1656 int *name = (int *)arg1;
1657 u_int namelen = arg2;
1655 int *name = (int *)arg1 - 1; /* XXX */
1656 u_int namelen = arg2 + 1; /* XXX */
1658 struct vfsconf *vfsp;
1659
1660#ifndef NO_COMPAT_PRELITE2
1661 /* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. */
1657 struct vfsconf *vfsp;
1658
1659#ifndef NO_COMPAT_PRELITE2
1660 /* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. */
1662 if (namelen == 1 && name[0] == VFS_VFSCONF)
1661 if (namelen == 1)
1663 return (sysctl_ovfs_conf(oidp, arg1, arg2, req));
1664#endif
1665
1662 return (sysctl_ovfs_conf(oidp, arg1, arg2, req));
1663#endif
1664
1665#ifdef notyet
1666 /* all sysctl names at this level are at least name and field */
1667 if (namelen < 2)
1668 return (ENOTDIR); /* overloaded */
1669 if (name[0] != VFS_GENERIC) {
1670 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1671 if (vfsp->vfc_typenum == name[0])
1672 break;
1673 if (vfsp == NULL)
1674 return (EOPNOTSUPP);
1666 /* all sysctl names at this level are at least name and field */
1667 if (namelen < 2)
1668 return (ENOTDIR); /* overloaded */
1669 if (name[0] != VFS_GENERIC) {
1670 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1671 if (vfsp->vfc_typenum == name[0])
1672 break;
1673 if (vfsp == NULL)
1674 return (EOPNOTSUPP);
1675#ifdef notyet
1676 return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1,
1677 oldp, oldlenp, newp, newlen, p));
1675 return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1,
1676 oldp, oldlenp, newp, newlen, p));
1678#else
1679 return (EOPNOTSUPP);
1680#endif
1681 }
1677 }
1678#endif
1682 switch (name[1]) {
1683 case VFS_MAXTYPENUM:
1684 if (namelen != 2)
1685 return (ENOTDIR);
1686 return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int)));
1687 case VFS_CONF:
1688 if (namelen != 3)
1689 return (ENOTDIR); /* overloaded */
1690 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1691 if (vfsp->vfc_typenum == name[2])
1692 break;
1693 if (vfsp == NULL)
1694 return (EOPNOTSUPP);
1695 return (SYSCTL_OUT(req, vfsp, sizeof *vfsp));
1696 }
1697 return (EOPNOTSUPP);
1698}
1699
1679 switch (name[1]) {
1680 case VFS_MAXTYPENUM:
1681 if (namelen != 2)
1682 return (ENOTDIR);
1683 return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int)));
1684 case VFS_CONF:
1685 if (namelen != 3)
1686 return (ENOTDIR); /* overloaded */
1687 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1688 if (vfsp->vfc_typenum == name[2])
1689 break;
1690 if (vfsp == NULL)
1691 return (EOPNOTSUPP);
1692 return (SYSCTL_OUT(req, vfsp, sizeof *vfsp));
1693 }
1694 return (EOPNOTSUPP);
1695}
1696
1697SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD, vfs_sysctl,
1698 "Generic filesystem");
1699
1700#ifndef NO_COMPAT_PRELITE2
1701
1702static int
1703sysctl_ovfs_conf SYSCTL_HANDLER_ARGS
1704{
1705 int error;
1706 struct vfsconf *vfsp;
1707 struct ovfsconf ovfs;
1708
1709 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
1710 ovfs.vfc_vfsops = vfsp->vfc_vfsops; /* XXX used as flag */
1711 strcpy(ovfs.vfc_name, vfsp->vfc_name);
1712 ovfs.vfc_index = vfsp->vfc_typenum;
1713 ovfs.vfc_refcount = vfsp->vfc_refcount;
1714 ovfs.vfc_flags = vfsp->vfc_flags;
1715 error = SYSCTL_OUT(req, &ovfs, sizeof ovfs);
1716 if (error)
1717 return error;
1718 }
1719 return 0;
1720}
1721
1722#endif /* !NO_COMPAT_PRELITE2 */
1723
1724int kinfo_vdebug = 1;
1725int kinfo_vgetfailed;
1726
1727#define KINFO_VNODESLOP 10
1728/*
1729 * Dump vnode list (via sysctl).
1730 * Copyout address of vnode followed by vnode.
1731 */
1732/* ARGSUSED */
1733static int
1734sysctl_vnode SYSCTL_HANDLER_ARGS
1735{
1736 struct proc *p = curproc; /* XXX */
1737 struct mount *mp, *nmp;
1738 struct vnode *nvp, *vp;
1739 int error;
1740
1741#define VPTRSZ sizeof (struct vnode *)
1742#define VNODESZ sizeof (struct vnode)
1743
1744 req->lock = 0;
1745 if (!req->oldptr) /* Make an estimate */
1746 return (SYSCTL_OUT(req, 0,
1747 (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ)));
1748
1749 simple_lock(&mountlist_slock);
1750 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
1751 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
1752 nmp = mp->mnt_list.cqe_next;
1753 continue;
1754 }
1755again:
1756 simple_lock(&mntvnode_slock);
1757 for (vp = mp->mnt_vnodelist.lh_first;
1758 vp != NULL;
1759 vp = nvp) {
1760 /*
1761 * Check that the vp is still associated with
1762 * this filesystem. RACE: could have been
1763 * recycled onto the same filesystem.
1764 */
1765 if (vp->v_mount != mp) {
1766 simple_unlock(&mntvnode_slock);
1767 if (kinfo_vdebug)
1768 printf("kinfo: vp changed\n");
1769 goto again;
1770 }
1771 nvp = vp->v_mntvnodes.le_next;
1772 simple_unlock(&mntvnode_slock);
1773 if ((error = SYSCTL_OUT(req, &vp, VPTRSZ)) ||
1774 (error = SYSCTL_OUT(req, vp, VNODESZ)))
1775 return (error);
1776 simple_lock(&mntvnode_slock);
1777 }
1778 simple_unlock(&mntvnode_slock);
1779 simple_lock(&mountlist_slock);
1780 nmp = mp->mnt_list.cqe_next;
1781 vfs_unbusy(mp, p);
1782 }
1783 simple_unlock(&mountlist_slock);
1784
1785 return (0);
1786}
1787
1788SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE|CTLFLAG_RD,
1789 0, 0, sysctl_vnode, "S,vnode", "");
1790
1791/*
1792 * Check to see if a filesystem is mounted on a block device.
1793 */
1794int
1795vfs_mountedon(vp)
1796 struct vnode *vp;
1797{
1798 struct vnode *vq;
1799 int error = 0;
1800
1801 if (vp->v_specflags & SI_MOUNTEDON)
1802 return (EBUSY);
1803 if (vp->v_flag & VALIASED) {
1804 simple_lock(&spechash_slock);
1805 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1806 if (vq->v_rdev != vp->v_rdev ||
1807 vq->v_type != vp->v_type)
1808 continue;
1809 if (vq->v_specflags & SI_MOUNTEDON) {
1810 error = EBUSY;
1811 break;
1812 }
1813 }
1814 simple_unlock(&spechash_slock);
1815 }
1816 return (error);
1817}
1818
1819/*
1820 * Unmount all filesystems. The list is traversed in reverse order
1821 * of mounting to avoid dependencies.
1822 */
1823void
1824vfs_unmountall()
1825{
1826 struct mount *mp, *nmp;
1827 struct proc *p = initproc; /* XXX XXX should this be proc0? */
1828 int error;
1829
1830 /*
1831 * Since this only runs when rebooting, it is not interlocked.
1832 */
1833 for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
1834 nmp = mp->mnt_list.cqe_prev;
1835 error = dounmount(mp, MNT_FORCE, p);
1836 if (error) {
1837 printf("unmount of %s failed (",
1838 mp->mnt_stat.f_mntonname);
1839 if (error == EBUSY)
1840 printf("BUSY)\n");
1841 else
1842 printf("%d)\n", error);
1843 }
1844 }
1845}
1846
1847/*
1848 * Build hash lists of net addresses and hang them off the mount point.
1849 * Called by ufs_mount() to set up the lists of export addresses.
1850 */
1851static int
1852vfs_hang_addrlist(struct mount *mp, struct netexport *nep,
1853 struct export_args *argp)
1854{
1855 register struct netcred *np;
1856 register struct radix_node_head *rnh;
1857 register int i;
1858 struct radix_node *rn;
1859 struct sockaddr *saddr, *smask = 0;
1860 struct domain *dom;
1861 int error;
1862
1863 if (argp->ex_addrlen == 0) {
1864 if (mp->mnt_flag & MNT_DEFEXPORTED)
1865 return (EPERM);
1866 np = &nep->ne_defexported;
1867 np->netc_exflags = argp->ex_flags;
1868 np->netc_anon = argp->ex_anon;
1869 np->netc_anon.cr_ref = 1;
1870 mp->mnt_flag |= MNT_DEFEXPORTED;
1871 return (0);
1872 }
1873 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
1874 np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK);
1875 bzero((caddr_t) np, i);
1876 saddr = (struct sockaddr *) (np + 1);
1877 if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen)))
1878 goto out;
1879 if (saddr->sa_len > argp->ex_addrlen)
1880 saddr->sa_len = argp->ex_addrlen;
1881 if (argp->ex_masklen) {
1882 smask = (struct sockaddr *) ((caddr_t) saddr + argp->ex_addrlen);
1883 error = copyin(argp->ex_addr, (caddr_t) smask, argp->ex_masklen);
1884 if (error)
1885 goto out;
1886 if (smask->sa_len > argp->ex_masklen)
1887 smask->sa_len = argp->ex_masklen;
1888 }
1889 i = saddr->sa_family;
1890 if ((rnh = nep->ne_rtable[i]) == 0) {
1891 /*
1892 * Seems silly to initialize every AF when most are not used,
1893 * do so on demand here
1894 */
1895 for (dom = domains; dom; dom = dom->dom_next)
1896 if (dom->dom_family == i && dom->dom_rtattach) {
1897 dom->dom_rtattach((void **) &nep->ne_rtable[i],
1898 dom->dom_rtoffset);
1899 break;
1900 }
1901 if ((rnh = nep->ne_rtable[i]) == 0) {
1902 error = ENOBUFS;
1903 goto out;
1904 }
1905 }
1906 rn = (*rnh->rnh_addaddr) ((caddr_t) saddr, (caddr_t) smask, rnh,
1907 np->netc_rnodes);
1908 if (rn == 0 || np != (struct netcred *) rn) { /* already exists */
1909 error = EPERM;
1910 goto out;
1911 }
1912 np->netc_exflags = argp->ex_flags;
1913 np->netc_anon = argp->ex_anon;
1914 np->netc_anon.cr_ref = 1;
1915 return (0);
1916out:
1917 free(np, M_NETADDR);
1918 return (error);
1919}
1920
1921/* ARGSUSED */
1922static int
1923vfs_free_netcred(struct radix_node *rn, void *w)
1924{
1925 register struct radix_node_head *rnh = (struct radix_node_head *) w;
1926
1927 (*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh);
1928 free((caddr_t) rn, M_NETADDR);
1929 return (0);
1930}
1931
1932/*
1933 * Free the net address hash lists that are hanging off the mount points.
1934 */
1935static void
1936vfs_free_addrlist(struct netexport *nep)
1937{
1938 register int i;
1939 register struct radix_node_head *rnh;
1940
1941 for (i = 0; i <= AF_MAX; i++)
1942 if ((rnh = nep->ne_rtable[i])) {
1943 (*rnh->rnh_walktree) (rnh, vfs_free_netcred,
1944 (caddr_t) rnh);
1945 free((caddr_t) rnh, M_RTABLE);
1946 nep->ne_rtable[i] = 0;
1947 }
1948}
1949
1950int
1951vfs_export(mp, nep, argp)
1952 struct mount *mp;
1953 struct netexport *nep;
1954 struct export_args *argp;
1955{
1956 int error;
1957
1958 if (argp->ex_flags & MNT_DELEXPORT) {
1959 vfs_free_addrlist(nep);
1960 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
1961 }
1962 if (argp->ex_flags & MNT_EXPORTED) {
1963 if ((error = vfs_hang_addrlist(mp, nep, argp)))
1964 return (error);
1965 mp->mnt_flag |= MNT_EXPORTED;
1966 }
1967 return (0);
1968}
1969
1970struct netcred *
1971vfs_export_lookup(mp, nep, nam)
1972 register struct mount *mp;
1973 struct netexport *nep;
1974 struct mbuf *nam;
1975{
1976 register struct netcred *np;
1977 register struct radix_node_head *rnh;
1978 struct sockaddr *saddr;
1979
1980 np = NULL;
1981 if (mp->mnt_flag & MNT_EXPORTED) {
1982 /*
1983 * Lookup in the export list first.
1984 */
1985 if (nam != NULL) {
1986 saddr = mtod(nam, struct sockaddr *);
1987 rnh = nep->ne_rtable[saddr->sa_family];
1988 if (rnh != NULL) {
1989 np = (struct netcred *)
1990 (*rnh->rnh_matchaddr)((caddr_t)saddr,
1991 rnh);
1992 if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
1993 np = NULL;
1994 }
1995 }
1996 /*
1997 * If no address match, use the default if it exists.
1998 */
1999 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
2000 np = &nep->ne_defexported;
2001 }
2002 return (np);
2003}
2004
2005/*
2006 * perform msync on all vnodes under a mount point
2007 * the mount point must be locked.
2008 */
2009void
2010vfs_msync(struct mount *mp, int flags) {
2011 struct vnode *vp, *nvp;
2012loop:
2013 for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
2014
2015 if (vp->v_mount != mp)
2016 goto loop;
2017 nvp = vp->v_mntvnodes.le_next;
2018 if (VOP_ISLOCKED(vp) && (flags != MNT_WAIT))
2019 continue;
2020 if (vp->v_object &&
2021 (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
2022 vm_object_page_clean(vp->v_object, 0, 0, TRUE, TRUE);
2023 }
2024 }
2025}
2026
2027/*
2028 * Create the VM object needed for VMIO and mmap support. This
2029 * is done for all VREG files in the system. Some filesystems might
2030 * afford the additional metadata buffering capability of the
2031 * VMIO code by making the device node be VMIO mode also.
2032 */
2033int
2034vfs_object_create(vp, p, cred, waslocked)
2035 struct vnode *vp;
2036 struct proc *p;
2037 struct ucred *cred;
2038 int waslocked;
2039{
2040 struct vattr vat;
2041 vm_object_t object;
2042 int error = 0;
2043
2044retry:
2045 if ((object = vp->v_object) == NULL) {
2046 if (vp->v_type == VREG) {
2047 if ((error = VOP_GETATTR(vp, &vat, cred, p)) != 0)
2048 goto retn;
2049 (void) vnode_pager_alloc(vp,
2050 OFF_TO_IDX(round_page(vat.va_size)), 0, 0);
2051 } else {
2052 /*
2053 * This simply allocates the biggest object possible
2054 * for a VBLK vnode. This should be fixed, but doesn't
2055 * cause any problems (yet).
2056 */
2057 (void) vnode_pager_alloc(vp, INT_MAX, 0, 0);
2058 }
2059 vp->v_object->flags |= OBJ_VFS_REF;
2060 } else {
2061 if (object->flags & OBJ_DEAD) {
2062 if (waslocked)
2063 VOP_UNLOCK(vp, 0, p);
2064 tsleep(object, PVM, "vodead", 0);
2065 if (waslocked)
2066 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2067 goto retry;
2068 }
2069 if ((object->flags & OBJ_VFS_REF) == 0) {
2070 object->flags |= OBJ_VFS_REF;
2071 vm_object_reference(object);
2072 }
2073 }
2074 if (vp->v_object)
2075 vp->v_flag |= VVMIO;
2076
2077retn:
2078 return error;
2079}
1700#ifndef NO_COMPAT_PRELITE2
1701
1702static int
1703sysctl_ovfs_conf SYSCTL_HANDLER_ARGS
1704{
1705 int error;
1706 struct vfsconf *vfsp;
1707 struct ovfsconf ovfs;
1708
1709 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
1710 ovfs.vfc_vfsops = vfsp->vfc_vfsops; /* XXX used as flag */
1711 strcpy(ovfs.vfc_name, vfsp->vfc_name);
1712 ovfs.vfc_index = vfsp->vfc_typenum;
1713 ovfs.vfc_refcount = vfsp->vfc_refcount;
1714 ovfs.vfc_flags = vfsp->vfc_flags;
1715 error = SYSCTL_OUT(req, &ovfs, sizeof ovfs);
1716 if (error)
1717 return error;
1718 }
1719 return 0;
1720}
1721
1722#endif /* !NO_COMPAT_PRELITE2 */
1723
1724int kinfo_vdebug = 1;
1725int kinfo_vgetfailed;
1726
1727#define KINFO_VNODESLOP 10
1728/*
1729 * Dump vnode list (via sysctl).
1730 * Copyout address of vnode followed by vnode.
1731 */
1732/* ARGSUSED */
1733static int
1734sysctl_vnode SYSCTL_HANDLER_ARGS
1735{
1736 struct proc *p = curproc; /* XXX */
1737 struct mount *mp, *nmp;
1738 struct vnode *nvp, *vp;
1739 int error;
1740
1741#define VPTRSZ sizeof (struct vnode *)
1742#define VNODESZ sizeof (struct vnode)
1743
1744 req->lock = 0;
1745 if (!req->oldptr) /* Make an estimate */
1746 return (SYSCTL_OUT(req, 0,
1747 (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ)));
1748
1749 simple_lock(&mountlist_slock);
1750 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
1751 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
1752 nmp = mp->mnt_list.cqe_next;
1753 continue;
1754 }
1755again:
1756 simple_lock(&mntvnode_slock);
1757 for (vp = mp->mnt_vnodelist.lh_first;
1758 vp != NULL;
1759 vp = nvp) {
1760 /*
1761 * Check that the vp is still associated with
1762 * this filesystem. RACE: could have been
1763 * recycled onto the same filesystem.
1764 */
1765 if (vp->v_mount != mp) {
1766 simple_unlock(&mntvnode_slock);
1767 if (kinfo_vdebug)
1768 printf("kinfo: vp changed\n");
1769 goto again;
1770 }
1771 nvp = vp->v_mntvnodes.le_next;
1772 simple_unlock(&mntvnode_slock);
1773 if ((error = SYSCTL_OUT(req, &vp, VPTRSZ)) ||
1774 (error = SYSCTL_OUT(req, vp, VNODESZ)))
1775 return (error);
1776 simple_lock(&mntvnode_slock);
1777 }
1778 simple_unlock(&mntvnode_slock);
1779 simple_lock(&mountlist_slock);
1780 nmp = mp->mnt_list.cqe_next;
1781 vfs_unbusy(mp, p);
1782 }
1783 simple_unlock(&mountlist_slock);
1784
1785 return (0);
1786}
1787
1788SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE|CTLFLAG_RD,
1789 0, 0, sysctl_vnode, "S,vnode", "");
1790
1791/*
1792 * Check to see if a filesystem is mounted on a block device.
1793 */
1794int
1795vfs_mountedon(vp)
1796 struct vnode *vp;
1797{
1798 struct vnode *vq;
1799 int error = 0;
1800
1801 if (vp->v_specflags & SI_MOUNTEDON)
1802 return (EBUSY);
1803 if (vp->v_flag & VALIASED) {
1804 simple_lock(&spechash_slock);
1805 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1806 if (vq->v_rdev != vp->v_rdev ||
1807 vq->v_type != vp->v_type)
1808 continue;
1809 if (vq->v_specflags & SI_MOUNTEDON) {
1810 error = EBUSY;
1811 break;
1812 }
1813 }
1814 simple_unlock(&spechash_slock);
1815 }
1816 return (error);
1817}
1818
1819/*
1820 * Unmount all filesystems. The list is traversed in reverse order
1821 * of mounting to avoid dependencies.
1822 */
1823void
1824vfs_unmountall()
1825{
1826 struct mount *mp, *nmp;
1827 struct proc *p = initproc; /* XXX XXX should this be proc0? */
1828 int error;
1829
1830 /*
1831 * Since this only runs when rebooting, it is not interlocked.
1832 */
1833 for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
1834 nmp = mp->mnt_list.cqe_prev;
1835 error = dounmount(mp, MNT_FORCE, p);
1836 if (error) {
1837 printf("unmount of %s failed (",
1838 mp->mnt_stat.f_mntonname);
1839 if (error == EBUSY)
1840 printf("BUSY)\n");
1841 else
1842 printf("%d)\n", error);
1843 }
1844 }
1845}
1846
1847/*
1848 * Build hash lists of net addresses and hang them off the mount point.
1849 * Called by ufs_mount() to set up the lists of export addresses.
1850 */
1851static int
1852vfs_hang_addrlist(struct mount *mp, struct netexport *nep,
1853 struct export_args *argp)
1854{
1855 register struct netcred *np;
1856 register struct radix_node_head *rnh;
1857 register int i;
1858 struct radix_node *rn;
1859 struct sockaddr *saddr, *smask = 0;
1860 struct domain *dom;
1861 int error;
1862
1863 if (argp->ex_addrlen == 0) {
1864 if (mp->mnt_flag & MNT_DEFEXPORTED)
1865 return (EPERM);
1866 np = &nep->ne_defexported;
1867 np->netc_exflags = argp->ex_flags;
1868 np->netc_anon = argp->ex_anon;
1869 np->netc_anon.cr_ref = 1;
1870 mp->mnt_flag |= MNT_DEFEXPORTED;
1871 return (0);
1872 }
1873 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
1874 np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK);
1875 bzero((caddr_t) np, i);
1876 saddr = (struct sockaddr *) (np + 1);
1877 if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen)))
1878 goto out;
1879 if (saddr->sa_len > argp->ex_addrlen)
1880 saddr->sa_len = argp->ex_addrlen;
1881 if (argp->ex_masklen) {
1882 smask = (struct sockaddr *) ((caddr_t) saddr + argp->ex_addrlen);
1883 error = copyin(argp->ex_addr, (caddr_t) smask, argp->ex_masklen);
1884 if (error)
1885 goto out;
1886 if (smask->sa_len > argp->ex_masklen)
1887 smask->sa_len = argp->ex_masklen;
1888 }
1889 i = saddr->sa_family;
1890 if ((rnh = nep->ne_rtable[i]) == 0) {
1891 /*
1892 * Seems silly to initialize every AF when most are not used,
1893 * do so on demand here
1894 */
1895 for (dom = domains; dom; dom = dom->dom_next)
1896 if (dom->dom_family == i && dom->dom_rtattach) {
1897 dom->dom_rtattach((void **) &nep->ne_rtable[i],
1898 dom->dom_rtoffset);
1899 break;
1900 }
1901 if ((rnh = nep->ne_rtable[i]) == 0) {
1902 error = ENOBUFS;
1903 goto out;
1904 }
1905 }
1906 rn = (*rnh->rnh_addaddr) ((caddr_t) saddr, (caddr_t) smask, rnh,
1907 np->netc_rnodes);
1908 if (rn == 0 || np != (struct netcred *) rn) { /* already exists */
1909 error = EPERM;
1910 goto out;
1911 }
1912 np->netc_exflags = argp->ex_flags;
1913 np->netc_anon = argp->ex_anon;
1914 np->netc_anon.cr_ref = 1;
1915 return (0);
1916out:
1917 free(np, M_NETADDR);
1918 return (error);
1919}
1920
1921/* ARGSUSED */
1922static int
1923vfs_free_netcred(struct radix_node *rn, void *w)
1924{
1925 register struct radix_node_head *rnh = (struct radix_node_head *) w;
1926
1927 (*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh);
1928 free((caddr_t) rn, M_NETADDR);
1929 return (0);
1930}
1931
1932/*
1933 * Free the net address hash lists that are hanging off the mount points.
1934 */
1935static void
1936vfs_free_addrlist(struct netexport *nep)
1937{
1938 register int i;
1939 register struct radix_node_head *rnh;
1940
1941 for (i = 0; i <= AF_MAX; i++)
1942 if ((rnh = nep->ne_rtable[i])) {
1943 (*rnh->rnh_walktree) (rnh, vfs_free_netcred,
1944 (caddr_t) rnh);
1945 free((caddr_t) rnh, M_RTABLE);
1946 nep->ne_rtable[i] = 0;
1947 }
1948}
1949
1950int
1951vfs_export(mp, nep, argp)
1952 struct mount *mp;
1953 struct netexport *nep;
1954 struct export_args *argp;
1955{
1956 int error;
1957
1958 if (argp->ex_flags & MNT_DELEXPORT) {
1959 vfs_free_addrlist(nep);
1960 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
1961 }
1962 if (argp->ex_flags & MNT_EXPORTED) {
1963 if ((error = vfs_hang_addrlist(mp, nep, argp)))
1964 return (error);
1965 mp->mnt_flag |= MNT_EXPORTED;
1966 }
1967 return (0);
1968}
1969
1970struct netcred *
1971vfs_export_lookup(mp, nep, nam)
1972 register struct mount *mp;
1973 struct netexport *nep;
1974 struct mbuf *nam;
1975{
1976 register struct netcred *np;
1977 register struct radix_node_head *rnh;
1978 struct sockaddr *saddr;
1979
1980 np = NULL;
1981 if (mp->mnt_flag & MNT_EXPORTED) {
1982 /*
1983 * Lookup in the export list first.
1984 */
1985 if (nam != NULL) {
1986 saddr = mtod(nam, struct sockaddr *);
1987 rnh = nep->ne_rtable[saddr->sa_family];
1988 if (rnh != NULL) {
1989 np = (struct netcred *)
1990 (*rnh->rnh_matchaddr)((caddr_t)saddr,
1991 rnh);
1992 if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
1993 np = NULL;
1994 }
1995 }
1996 /*
1997 * If no address match, use the default if it exists.
1998 */
1999 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
2000 np = &nep->ne_defexported;
2001 }
2002 return (np);
2003}
2004
2005/*
2006 * perform msync on all vnodes under a mount point
2007 * the mount point must be locked.
2008 */
2009void
2010vfs_msync(struct mount *mp, int flags) {
2011 struct vnode *vp, *nvp;
2012loop:
2013 for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
2014
2015 if (vp->v_mount != mp)
2016 goto loop;
2017 nvp = vp->v_mntvnodes.le_next;
2018 if (VOP_ISLOCKED(vp) && (flags != MNT_WAIT))
2019 continue;
2020 if (vp->v_object &&
2021 (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
2022 vm_object_page_clean(vp->v_object, 0, 0, TRUE, TRUE);
2023 }
2024 }
2025}
2026
2027/*
2028 * Create the VM object needed for VMIO and mmap support. This
2029 * is done for all VREG files in the system. Some filesystems might
2030 * afford the additional metadata buffering capability of the
2031 * VMIO code by making the device node be VMIO mode also.
2032 */
2033int
2034vfs_object_create(vp, p, cred, waslocked)
2035 struct vnode *vp;
2036 struct proc *p;
2037 struct ucred *cred;
2038 int waslocked;
2039{
2040 struct vattr vat;
2041 vm_object_t object;
2042 int error = 0;
2043
2044retry:
2045 if ((object = vp->v_object) == NULL) {
2046 if (vp->v_type == VREG) {
2047 if ((error = VOP_GETATTR(vp, &vat, cred, p)) != 0)
2048 goto retn;
2049 (void) vnode_pager_alloc(vp,
2050 OFF_TO_IDX(round_page(vat.va_size)), 0, 0);
2051 } else {
2052 /*
2053 * This simply allocates the biggest object possible
2054 * for a VBLK vnode. This should be fixed, but doesn't
2055 * cause any problems (yet).
2056 */
2057 (void) vnode_pager_alloc(vp, INT_MAX, 0, 0);
2058 }
2059 vp->v_object->flags |= OBJ_VFS_REF;
2060 } else {
2061 if (object->flags & OBJ_DEAD) {
2062 if (waslocked)
2063 VOP_UNLOCK(vp, 0, p);
2064 tsleep(object, PVM, "vodead", 0);
2065 if (waslocked)
2066 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2067 goto retry;
2068 }
2069 if ((object->flags & OBJ_VFS_REF) == 0) {
2070 object->flags |= OBJ_VFS_REF;
2071 vm_object_reference(object);
2072 }
2073 }
2074 if (vp->v_object)
2075 vp->v_flag |= VVMIO;
2076
2077retn:
2078 return error;
2079}