Cross Reference: /freebsd-10.3-release/sys/kern/vfs

Deleted Added

sdiff udiff text old ( 31016 ) new ( 31132 )

full compact

1/*
2 * Copyright (c) 1989, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95
39 * $Id: vfs_subr.c,v 1.112 1997/11/07 08:53:11 phk Exp $
40 */
41
42/*
43 * External virtual filesystem routines
44 */
45#include "opt_ddb.h"
46#include "opt_devfs.h"
47
48#include <sys/param.h>
49#include <sys/systm.h>
50#include <sys/kernel.h>
51#include <sys/proc.h>
52#include <sys/malloc.h>
53#include <sys/mount.h>
54#include <sys/vnode.h>
55#include <sys/stat.h>
56#include <sys/buf.h>
57#include <sys/poll.h>
58#include <sys/domain.h>
59#include <sys/dirent.h>
60
61#include <machine/limits.h>
62
63#include <vm/vm.h>
64#include <vm/vm_object.h>
65#include <vm/vm_extern.h>
66#include <vm/vnode_pager.h>
67#include <sys/sysctl.h>
68
69#include <miscfs/specfs/specdev.h>
70
71static MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure");
72
73#ifdef DDB
74extern void printlockedvnodes __P((void));
75#endif
76static void vclean __P((struct vnode *vp, int flags, struct proc *p));
77static void vgonel __P((struct vnode *vp, struct proc *p));
78unsigned long numvnodes;
79SYSCTL_INT(_debug, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, "");
80static void vputrele __P((struct vnode *vp, int put));
81
82enum vtype iftovt_tab[16] = {
83 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
84 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
85};
86int vttoif_tab[9] = {
87 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
88 S_IFSOCK, S_IFIFO, S_IFMT,
89};
90
91/*
92 * Insq/Remq for the vnode usage lists.
93 */
94#define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs)
95#define bufremvn(bp) { \
96 LIST_REMOVE(bp, b_vnbufs); \
97 (bp)->b_vnbufs.le_next = NOLIST; \
98}
99TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */
100static u_long wantfreevnodes = 25;
101SYSCTL_INT(_debug, OID_AUTO, wantfreevnodes, CTLFLAG_RW, &wantfreevnodes, 0, "");
102static u_long freevnodes = 0;
103SYSCTL_INT(_debug, OID_AUTO, freevnodes, CTLFLAG_RD, &freevnodes, 0, "");
104
105struct mntlist mountlist; /* mounted filesystem list */
106struct simplelock mountlist_slock;
107static struct simplelock mntid_slock;
108struct simplelock mntvnode_slock;
109struct simplelock vnode_free_list_slock;
110static struct simplelock spechash_slock;
111struct nfs_public nfs_pub; /* publicly exported FS */
112
113int desiredvnodes;
114SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, &desiredvnodes, 0, "");
115
116static void vfs_free_addrlist __P((struct netexport *nep));
117static int vfs_free_netcred __P((struct radix_node *rn, void *w));
118static int vfs_hang_addrlist __P((struct mount *mp, struct netexport *nep,
119 struct export_args *argp));
120
121/*
122 * Initialize the vnode management data structures.
123 */
124void
125vntblinit()
126{
127
128 desiredvnodes = maxproc + vm_object_cache_max;
129 simple_lock_init(&mntvnode_slock);
130 simple_lock_init(&mntid_slock);
131 simple_lock_init(&spechash_slock);
132 TAILQ_INIT(&vnode_free_list);
133 simple_lock_init(&vnode_free_list_slock);
134 CIRCLEQ_INIT(&mountlist);
135}
136
137/*
138 * Mark a mount point as busy. Used to synchronize access and to delay
139 * unmounting. Interlock is not released on failure.
140 */
141int
142vfs_busy(mp, flags, interlkp, p)
143 struct mount *mp;
144 int flags;
145 struct simplelock *interlkp;
146 struct proc *p;
147{
148 int lkflags;
149
150 if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
151 if (flags & LK_NOWAIT)
152 return (ENOENT);
153 mp->mnt_kern_flag |= MNTK_MWAIT;
154 if (interlkp) {
155 simple_unlock(interlkp);
156 }
157 /*
158 * Since all busy locks are shared except the exclusive
159 * lock granted when unmounting, the only place that a
160 * wakeup needs to be done is at the release of the
161 * exclusive lock at the end of dounmount.
162 */
163 tsleep((caddr_t)mp, PVFS, "vfs_busy", 0);
164 if (interlkp) {
165 simple_lock(interlkp);
166 }
167 return (ENOENT);
168 }
169 lkflags = LK_SHARED;
170 if (interlkp)
171 lkflags |= LK_INTERLOCK;
172 if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p))
173 panic("vfs_busy: unexpected lock failure");
174 return (0);
175}
176
177/*
178 * Free a busy filesystem.
179 */
180void
181vfs_unbusy(mp, p)
182 struct mount *mp;
183 struct proc *p;
184{
185
186 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p);
187}
188
189/*
190 * Lookup a filesystem type, and if found allocate and initialize
191 * a mount structure for it.
192 *
193 * Devname is usually updated by mount(8) after booting.
194 */
195int
196vfs_rootmountalloc(fstypename, devname, mpp)
197 char *fstypename;
198 char *devname;
199 struct mount **mpp;
200{
201 struct proc *p = curproc; /* XXX */
202 struct vfsconf *vfsp;
203 struct mount *mp;
204
205 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
206 if (!strcmp(vfsp->vfc_name, fstypename))
207 break;
208 if (vfsp == NULL)
209 return (ENODEV);
210 mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
211 bzero((char *)mp, (u_long)sizeof(struct mount));
212 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
213 (void)vfs_busy(mp, LK_NOWAIT, 0, p);
214 LIST_INIT(&mp->mnt_vnodelist);
215 mp->mnt_vfc = vfsp;
216 mp->mnt_op = vfsp->vfc_vfsops;
217 mp->mnt_flag = MNT_RDONLY;
218 mp->mnt_vnodecovered = NULLVP;
219 vfsp->vfc_refcount++;
220 mp->mnt_stat.f_type = vfsp->vfc_typenum;
221 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
222 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
223 mp->mnt_stat.f_mntonname[0] = '/';
224 mp->mnt_stat.f_mntonname[1] = 0;
225 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
226 *mpp = mp;
227 return (0);
228}
229
230/*
231 * Find an appropriate filesystem to use for the root. If a filesystem
232 * has not been preselected, walk through the list of known filesystems
233 * trying those that have mountroot routines, and try them until one
234 * works or we have tried them all.
235 */
236#ifdef notdef /* XXX JH */
237int
238lite2_vfs_mountroot()
239{
240 struct vfsconf *vfsp;
241 extern int (*lite2_mountroot) __P((void));
242 int error;
243
244 if (lite2_mountroot != NULL)
245 return ((*lite2_mountroot)());
246 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
247 if (vfsp->vfc_mountroot == NULL)
248 continue;
249 if ((error = (*vfsp->vfc_mountroot)()) == 0)
250 return (0);
251 printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error);
252 }
253 return (ENODEV);
254}
255#endif
256
257/*
258 * Lookup a mount point by filesystem identifier.
259 */
260struct mount *
261vfs_getvfs(fsid)
262 fsid_t *fsid;
263{
264 register struct mount *mp;
265
266 simple_lock(&mountlist_slock);
267 for (mp = mountlist.cqh_first; mp != (void *)&mountlist;
268 mp = mp->mnt_list.cqe_next) {
269 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
270 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
271 simple_unlock(&mountlist_slock);
272 return (mp);
273 }
274 }
275 simple_unlock(&mountlist_slock);
276 return ((struct mount *) 0);
277}
278
279/*
280 * Get a new unique fsid
281 */
282void
283vfs_getnewfsid(mp)
284 struct mount *mp;
285{
286 static u_short xxxfs_mntid;
287
288 fsid_t tfsid;
289 int mtype;
290
291 simple_lock(&mntid_slock);
292 mtype = mp->mnt_vfc->vfc_typenum;
293 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
294 mp->mnt_stat.f_fsid.val[1] = mtype;
295 if (xxxfs_mntid == 0)
296 ++xxxfs_mntid;
297 tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
298 tfsid.val[1] = mtype;
299 if (mountlist.cqh_first != (void *)&mountlist) {
300 while (vfs_getvfs(&tfsid)) {
301 tfsid.val[0]++;
302 xxxfs_mntid++;
303 }
304 }
305 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
306 simple_unlock(&mntid_slock);
307}
308
309/*
310 * Set vnode attributes to VNOVAL
311 */
312void
313vattr_null(vap)
314 register struct vattr *vap;
315{
316
317 vap->va_type = VNON;
318 vap->va_size = VNOVAL;
319 vap->va_bytes = VNOVAL;
320 vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
321 vap->va_fsid = vap->va_fileid =
322 vap->va_blocksize = vap->va_rdev =
323 vap->va_atime.tv_sec = vap->va_atime.tv_nsec =
324 vap->va_mtime.tv_sec = vap->va_mtime.tv_nsec =
325 vap->va_ctime.tv_sec = vap->va_ctime.tv_nsec =
326 vap->va_flags = vap->va_gen = VNOVAL;
327 vap->va_vaflags = 0;
328}
329
330/*
331 * Routines having to do with the management of the vnode table.
332 */
333extern vop_t **dead_vnodeop_p;
334
335/*
336 * Return the next vnode from the free list.
337 */
338int
339getnewvnode(tag, mp, vops, vpp)
340 enum vtagtype tag;
341 struct mount *mp;
342 vop_t **vops;
343 struct vnode **vpp;
344{
345 struct proc *p = curproc; /* XXX */
346 struct vnode *vp;
347
348 /*
349 * We take the least recently used vnode from the freelist
350 * if we can get it and it has no cached pages, and no
351 * namecache entries are relative to it.
352 * Otherwise we allocate a new vnode
353 */
354
355 simple_lock(&vnode_free_list_slock);
356
357 if (wantfreevnodes && freevnodes < wantfreevnodes) {
358 vp = NULL;
359 } else if (!wantfreevnodes && freevnodes <= desiredvnodes) {
360 /*
361 * XXX: this is only here to be backwards compatible
362 */
363 vp = NULL;
364 } else {
365 TAILQ_FOREACH(vp, &vnode_free_list, v_freelist) {
366 if (!simple_lock_try(&vp->v_interlock))
367 continue;
368 if (vp->v_usecount)
369 panic("free vnode isn't");
370
371 if (vp->v_object && vp->v_object->resident_page_count) {
372 /* Don't recycle if it's caching some pages */
373 simple_unlock(&vp->v_interlock);
374 continue;
375 } else if (LIST_FIRST(&vp->v_cache_src)) {
376 /* Don't recycle if active in the namecache */
377 simple_unlock(&vp->v_interlock);
378 continue;
379 } else {
380 break;
381 }
382 }
383 }
384
385 if (vp) {
386 vp->v_flag |= VDOOMED;
387 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
388 freevnodes--;
389 simple_unlock(&vnode_free_list_slock);
390 cache_purge(vp);
391 vp->v_lease = NULL;
392 if (vp->v_type != VBAD)
393 vgonel(vp, p);
394 else {
395 simple_unlock(&vp->v_interlock);
396 }
397
398#ifdef DIAGNOSTIC
399 {
400 int s;
401
402 if (vp->v_data)
403 panic("cleaned vnode isn't");
404 s = splbio();
405 if (vp->v_numoutput)
406 panic("Clean vnode has pending I/O's");
407 splx(s);
408 }
409#endif
410 vp->v_flag = 0;
411 vp->v_lastr = 0;
412 vp->v_lastw = 0;
413 vp->v_lasta = 0;
414 vp->v_cstart = 0;
415 vp->v_clen = 0;
416 vp->v_socket = 0;
417 vp->v_writecount = 0; /* XXX */
418 } else {
419 simple_unlock(&vnode_free_list_slock);
420 vp = (struct vnode *) malloc((u_long) sizeof *vp,
421 M_VNODE, M_WAITOK);
422 bzero((char *) vp, sizeof *vp);
423 vp->v_dd = vp;
424 cache_purge(vp);
425 LIST_INIT(&vp->v_cache_src);
426 TAILQ_INIT(&vp->v_cache_dst);
427 numvnodes++;
428 }
429
430 vp->v_type = VNON;
431 vp->v_tag = tag;
432 vp->v_op = vops;
433 insmntque(vp, mp);
434 *vpp = vp;
435 vp->v_usecount = 1;
436 vp->v_data = 0;
437 return (0);
438}
439
440/*
441 * Move a vnode from one mount queue to another.
442 */
443void
444insmntque(vp, mp)
445 register struct vnode *vp;
446 register struct mount *mp;
447{
448
449 simple_lock(&mntvnode_slock);
450 /*
451 * Delete from old mount point vnode list, if on one.
452 */
453 if (vp->v_mount != NULL)
454 LIST_REMOVE(vp, v_mntvnodes);
455 /*
456 * Insert into list of vnodes for the new mount point, if available.
457 */
458 if ((vp->v_mount = mp) == NULL) {
459 simple_unlock(&mntvnode_slock);
460 return;
461 }
462 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
463 simple_unlock(&mntvnode_slock);
464}
465
466/*
467 * Update outstanding I/O count and do wakeup if requested.
468 */
469void
470vwakeup(bp)
471 register struct buf *bp;
472{
473 register struct vnode *vp;
474
475 bp->b_flags &= ~B_WRITEINPROG;
476 if ((vp = bp->b_vp)) {
477 vp->v_numoutput--;
478 if (vp->v_numoutput < 0)
479 panic("vwakeup: neg numoutput");
480 if ((vp->v_numoutput == 0) && (vp->v_flag & VBWAIT)) {
481 vp->v_flag &= ~VBWAIT;
482 wakeup((caddr_t) &vp->v_numoutput);
483 }
484 }
485}
486
487/*
488 * Flush out and invalidate all buffers associated with a vnode.
489 * Called with the underlying object locked.
490 */
491int
492vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
493 register struct vnode *vp;
494 int flags;
495 struct ucred *cred;
496 struct proc *p;
497 int slpflag, slptimeo;
498{
499 register struct buf *bp;
500 struct buf *nbp, *blist;
501 int s, error;
502 vm_object_t object;
503
504 if (flags & V_SAVE) {
505 if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)))
506 return (error);
507 if (vp->v_dirtyblkhd.lh_first != NULL)
508 panic("vinvalbuf: dirty bufs");
509 }
510
511 s = splbio();
512 for (;;) {
513 if ((blist = vp->v_cleanblkhd.lh_first) && (flags & V_SAVEMETA))
514 while (blist && blist->b_lblkno < 0)
515 blist = blist->b_vnbufs.le_next;
516 if (!blist && (blist = vp->v_dirtyblkhd.lh_first) &&
517 (flags & V_SAVEMETA))
518 while (blist && blist->b_lblkno < 0)
519 blist = blist->b_vnbufs.le_next;
520 if (!blist)
521 break;
522
523 for (bp = blist; bp; bp = nbp) {
524 nbp = bp->b_vnbufs.le_next;
525 if ((flags & V_SAVEMETA) && bp->b_lblkno < 0)
526 continue;
527 if (bp->b_flags & B_BUSY) {
528 bp->b_flags |= B_WANTED;
529 error = tsleep((caddr_t) bp,
530 slpflag | (PRIBIO + 1), "vinvalbuf",
531 slptimeo);
532 if (error) {
533 splx(s);
534 return (error);
535 }
536 break;
537 }
538 bremfree(bp);
539 bp->b_flags |= B_BUSY;
540 /*
541 * XXX Since there are no node locks for NFS, I
542 * believe there is a slight chance that a delayed
543 * write will occur while sleeping just above, so
544 * check for it.
545 */
546 if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
547 (void) VOP_BWRITE(bp);
548 break;
549 }
550 bp->b_flags |= (B_INVAL|B_NOCACHE|B_RELBUF);
551 brelse(bp);
552 }
553 }
554
555 while (vp->v_numoutput > 0) {
556 vp->v_flag |= VBWAIT;
557 tsleep(&vp->v_numoutput, PVM, "vnvlbv", 0);
558 }
559
560 splx(s);
561
562 /*
563 * Destroy the copy in the VM cache, too.
564 */
565 object = vp->v_object;
566 if (object != NULL) {
567 vm_object_page_remove(object, 0, object->size,
568 (flags & V_SAVE) ? TRUE : FALSE);
569 }
570 if (!(flags & V_SAVEMETA) &&
571 (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first))
572 panic("vinvalbuf: flush failed");
573 return (0);
574}
575
576/*
577 * Associate a buffer with a vnode.
578 */
579void
580bgetvp(vp, bp)
581 register struct vnode *vp;
582 register struct buf *bp;
583{
584 int s;
585
586 if (bp->b_vp)
587 panic("bgetvp: not free");
588 vhold(vp);
589 bp->b_vp = vp;
590 if (vp->v_type == VBLK || vp->v_type == VCHR)
591 bp->b_dev = vp->v_rdev;
592 else
593 bp->b_dev = NODEV;
594 /*
595 * Insert onto list for new vnode.
596 */
597 s = splbio();
598 bufinsvn(bp, &vp->v_cleanblkhd);
599 splx(s);
600}
601
602/*
603 * Disassociate a buffer from a vnode.
604 */
605void
606brelvp(bp)
607 register struct buf *bp;
608{
609 struct vnode *vp;
610 int s;
611
612 if (bp->b_vp == (struct vnode *) 0)
613 panic("brelvp: NULL");
614 /*
615 * Delete from old vnode list, if on one.
616 */
617 s = splbio();
618 if (bp->b_vnbufs.le_next != NOLIST)
619 bufremvn(bp);
620 splx(s);
621
622 vp = bp->b_vp;
623 bp->b_vp = (struct vnode *) 0;
624 vdrop(vp);
625}
626
627/*
628 * Associate a p-buffer with a vnode.
629 */
630void
631pbgetvp(vp, bp)
632 register struct vnode *vp;
633 register struct buf *bp;
634{
635#if defined(DIAGNOSTIC)
636 if (bp->b_vp)
637 panic("pbgetvp: not free");
638#endif
639 bp->b_vp = vp;
640 if (vp->v_type == VBLK || vp->v_type == VCHR)
641 bp->b_dev = vp->v_rdev;
642 else
643 bp->b_dev = NODEV;
644}
645
646/*
647 * Disassociate a p-buffer from a vnode.
648 */
649void
650pbrelvp(bp)
651 register struct buf *bp;
652{
653
654#if defined(DIAGNOSTIC)
655 if (bp->b_vp == (struct vnode *) 0)
656 panic("pbrelvp: NULL");
657#endif
658
659 bp->b_vp = (struct vnode *) 0;
660}
661
662/*
663 * Reassign a buffer from one vnode to another.
664 * Used to assign file specific control information
665 * (indirect blocks) to the vnode to which they belong.
666 */
667void
668reassignbuf(bp, newvp)
669 register struct buf *bp;
670 register struct vnode *newvp;
671{
672 int s;
673
674 if (newvp == NULL) {
675 printf("reassignbuf: NULL");
676 return;
677 }
678
679 s = splbio();
680 /*
681 * Delete from old vnode list, if on one.
682 */
683 if (bp->b_vnbufs.le_next != NOLIST) {
684 bufremvn(bp);
685 vdrop(bp->b_vp);
686 }
687 /*
688 * If dirty, put on list of dirty buffers; otherwise insert onto list
689 * of clean buffers.
690 */
691 if (bp->b_flags & B_DELWRI) {
692 struct buf *tbp;
693
694 tbp = newvp->v_dirtyblkhd.lh_first;
695 if (!tbp || (tbp->b_lblkno > bp->b_lblkno)) {
696 bufinsvn(bp, &newvp->v_dirtyblkhd);
697 } else {
698 while (tbp->b_vnbufs.le_next &&
699 (tbp->b_vnbufs.le_next->b_lblkno < bp->b_lblkno)) {
700 tbp = tbp->b_vnbufs.le_next;
701 }
702 LIST_INSERT_AFTER(tbp, bp, b_vnbufs);
703 }
704 } else {
705 bufinsvn(bp, &newvp->v_cleanblkhd);
706 }
707 bp->b_vp = newvp;
708 vhold(bp->b_vp);
709 splx(s);
710}
711
712#ifndef DEVFS_ROOT
713/*
714 * Create a vnode for a block device.
715 * Used for mounting the root file system.
716 */
717int
718bdevvp(dev, vpp)
719 dev_t dev;
720 struct vnode **vpp;
721{
722 register struct vnode *vp;
723 struct vnode *nvp;
724 int error;
725
726 if (dev == NODEV)
727 return (0);
728 error = getnewvnode(VT_NON, (struct mount *) 0, spec_vnodeop_p, &nvp);
729 if (error) {
730 *vpp = 0;
731 return (error);
732 }
733 vp = nvp;
734 vp->v_type = VBLK;
735 if ((nvp = checkalias(vp, dev, (struct mount *) 0))) {
736 vput(vp);
737 vp = nvp;
738 }
739 *vpp = vp;
740 return (0);
741}
742#endif /* !DEVFS_ROOT */
743
744/*
745 * Check to see if the new vnode represents a special device
746 * for which we already have a vnode (either because of
747 * bdevvp() or because of a different vnode representing
748 * the same block device). If such an alias exists, deallocate
749 * the existing contents and return the aliased vnode. The
750 * caller is responsible for filling it with its new contents.
751 */
752struct vnode *
753checkalias(nvp, nvp_rdev, mp)
754 register struct vnode *nvp;
755 dev_t nvp_rdev;
756 struct mount *mp;
757{
758 struct proc *p = curproc; /* XXX */
759 struct vnode *vp;
760 struct vnode **vpp;
761
762 if (nvp->v_type != VBLK && nvp->v_type != VCHR)
763 return (NULLVP);
764
765 vpp = &speclisth[SPECHASH(nvp_rdev)];
766loop:
767 simple_lock(&spechash_slock);
768 for (vp = *vpp; vp; vp = vp->v_specnext) {
769 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
770 continue;
771 /*
772 * Alias, but not in use, so flush it out.
773 */
774 simple_lock(&vp->v_interlock);
775 if (vp->v_usecount == 0) {
776 simple_unlock(&spechash_slock);
777 vgonel(vp, p);
778 goto loop;
779 }
780 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
781 simple_unlock(&spechash_slock);
782 goto loop;
783 }
784 break;
785 }
786 if (vp == NULL || vp->v_tag != VT_NON) {
787 MALLOC(nvp->v_specinfo, struct specinfo *,
788 sizeof(struct specinfo), M_VNODE, M_WAITOK);
789 nvp->v_rdev = nvp_rdev;
790 nvp->v_hashchain = vpp;
791 nvp->v_specnext = *vpp;
792 nvp->v_specflags = 0;
793 simple_unlock(&spechash_slock);
794 *vpp = nvp;
795 if (vp != NULLVP) {
796 nvp->v_flag |= VALIASED;
797 vp->v_flag |= VALIASED;
798 vput(vp);
799 }
800 return (NULLVP);
801 }
802 simple_unlock(&spechash_slock);
803 VOP_UNLOCK(vp, 0, p);
804 simple_lock(&vp->v_interlock);
805 vclean(vp, 0, p);
806 vp->v_op = nvp->v_op;
807 vp->v_tag = nvp->v_tag;
808 nvp->v_type = VNON;
809 insmntque(vp, mp);
810 return (vp);
811}
812
813/*
814 * Grab a particular vnode from the free list, increment its
815 * reference count and lock it. The vnode lock bit is set the
816 * vnode is being eliminated in vgone. The process is awakened
817 * when the transition is completed, and an error returned to
818 * indicate that the vnode is no longer usable (possibly having
819 * been changed to a new file system type).
820 */
821int
822vget(vp, flags, p)
823 register struct vnode *vp;
824 int flags;
825 struct proc *p;
826{
827 int error;
828
829 /*
830 * If the vnode is in the process of being cleaned out for
831 * another use, we wait for the cleaning to finish and then
832 * return failure. Cleaning is determined by checking that
833 * the VXLOCK flag is set.
834 */
835 if ((flags & LK_INTERLOCK) == 0) {
836 simple_lock(&vp->v_interlock);
837 }
838 if (vp->v_flag & VXLOCK) {
839 vp->v_flag |= VXWANT;
840 simple_unlock(&vp->v_interlock);
841 tsleep((caddr_t)vp, PINOD, "vget", 0);
842 return (ENOENT);
843 }
844 vp->v_usecount++;
845 if (VSHOULDBUSY(vp))
846 vbusy(vp);
847 /*
848 * Create the VM object, if needed
849 */
850 if ((vp->v_type == VREG) &&
851 ((vp->v_object == NULL) ||
852 (vp->v_object->flags & OBJ_VFS_REF) == 0 ||
853 (vp->v_object->flags & OBJ_DEAD))) {
854 /*
855 * XXX vfs_object_create probably needs the interlock.
856 */
857 simple_unlock(&vp->v_interlock);
858 vfs_object_create(vp, curproc, curproc->p_ucred, 0);
859 simple_lock(&vp->v_interlock);
860 }
861 if (flags & LK_TYPE_MASK) {
862 if (error = vn_lock(vp, flags | LK_INTERLOCK, p))
863 vrele(vp);
864 return (error);
865 }
866 simple_unlock(&vp->v_interlock);
867 return (0);
868}
869/* #ifdef DIAGNOSTIC */
870/*
871 * Vnode reference, just increment the count
872 */
873void
874vref(vp)
875 struct vnode *vp;
876{
877 simple_lock(&vp->v_interlock);
878 if (vp->v_usecount <= 0)
879 panic("vref used where vget required");
880
881 vp->v_usecount++;
882
883 if ((vp->v_type == VREG) &&
884 ((vp->v_object == NULL) ||
885 ((vp->v_object->flags & OBJ_VFS_REF) == 0) ||
886 (vp->v_object->flags & OBJ_DEAD))) {
887 /*
888 * We need to lock to VP during the time that
889 * the object is created. This is necessary to
890 * keep the system from re-entrantly doing it
891 * multiple times.
892 * XXX vfs_object_create probably needs the interlock?
893 */
894 simple_unlock(&vp->v_interlock);
895 vfs_object_create(vp, curproc, curproc->p_ucred, 0);
896 return;
897 }
898 simple_unlock(&vp->v_interlock);
899}
900
901/*
902 * Vnode put/release.
903 * If count drops to zero, call inactive routine and return to freelist.
904 */
905static void
906vputrele(vp, put)
907 struct vnode *vp;
908 int put;
909{
910 struct proc *p = curproc; /* XXX */
911
912#ifdef DIAGNOSTIC
913 if (vp == NULL)
914 panic("vputrele: null vp");
915#endif
916 simple_lock(&vp->v_interlock);
917
918 if ((vp->v_usecount == 2) &&
919 vp->v_object &&
920 (vp->v_object->flags & OBJ_VFS_REF)) {
921 vp->v_usecount--;
922 vp->v_object->flags &= ~OBJ_VFS_REF;
923 if (put) {
924 VOP_UNLOCK(vp, LK_INTERLOCK, p);
925 } else {
926 simple_unlock(&vp->v_interlock);
927 }
928 vm_object_deallocate(vp->v_object);
929 return;
930 }
931
932 if (vp->v_usecount > 1) {
933 vp->v_usecount--;
934 if (put) {
935 VOP_UNLOCK(vp, LK_INTERLOCK, p);
936 } else {
937 simple_unlock(&vp->v_interlock);
938 }
939 return;
940 }
941
942 if (vp->v_usecount < 1) {
943#ifdef DIAGNOSTIC
944 vprint("vputrele: negative ref count", vp);
945#endif
946 panic("vputrele: negative ref cnt");
947 }
948
949 vp->v_usecount--;
950 if (VSHOULDFREE(vp))
951 vfree(vp);
952 /*
953 * If we are doing a vput, the node is already locked, and we must
954 * call VOP_INACTIVE with the node locked. So, in the case of
955 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE.
956 */
957 if (put) {
958 simple_unlock(&vp->v_interlock);
959 VOP_INACTIVE(vp, p);
960 } else if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) {
961 VOP_INACTIVE(vp, p);
962 }
963}
964
965/*
966 * vput(), just unlock and vrele()
967 */
968void
969vput(vp)
970 struct vnode *vp;
971{
972 vputrele(vp, 1);
973}
974
975void
976vrele(vp)
977 struct vnode *vp;
978{
979 vputrele(vp, 0);
980}
981
982/*
983 * Somebody doesn't want the vnode recycled.
984 */
985void
986vhold(vp)
987 register struct vnode *vp;
988{
989
990 simple_lock(&vp->v_interlock);
991 vp->v_holdcnt++;
992 if (VSHOULDBUSY(vp))
993 vbusy(vp);
994 simple_unlock(&vp->v_interlock);
995}
996
997/*
998 * One less who cares about this vnode.
999 */
1000void
1001vdrop(vp)
1002 register struct vnode *vp;
1003{
1004
1005 simple_lock(&vp->v_interlock);
1006 if (vp->v_holdcnt <= 0)
1007 panic("holdrele: holdcnt");
1008 vp->v_holdcnt--;
1009 if (VSHOULDFREE(vp))
1010 vfree(vp);
1011 simple_unlock(&vp->v_interlock);
1012}
1013
1014/*
1015 * Remove any vnodes in the vnode table belonging to mount point mp.
1016 *
1017 * If MNT_NOFORCE is specified, there should not be any active ones,
1018 * return error if any are found (nb: this is a user error, not a
1019 * system error). If MNT_FORCE is specified, detach any active vnodes
1020 * that are found.
1021 */
1022#ifdef DIAGNOSTIC
1023static int busyprt = 0; /* print out busy vnodes */
1024SYSCTL_INT(_debug, OID_AUTO, busyprt, CTLFLAG_RW, &busyprt, 0, "");
1025#endif
1026
1027int
1028vflush(mp, skipvp, flags)
1029 struct mount *mp;
1030 struct vnode *skipvp;
1031 int flags;
1032{
1033 struct proc *p = curproc; /* XXX */
1034 struct vnode *vp, *nvp;
1035 int busy = 0;
1036
1037 simple_lock(&mntvnode_slock);
1038loop:
1039 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
1040 /*
1041 * Make sure this vnode wasn't reclaimed in getnewvnode().
1042 * Start over if it has (it won't be on the list anymore).
1043 */
1044 if (vp->v_mount != mp)
1045 goto loop;
1046 nvp = vp->v_mntvnodes.le_next;
1047 /*
1048 * Skip over a selected vnode.
1049 */
1050 if (vp == skipvp)
1051 continue;
1052
1053 simple_lock(&vp->v_interlock);
1054 /*
1055 * Skip over a vnodes marked VSYSTEM.
1056 */
1057 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
1058 simple_unlock(&vp->v_interlock);
1059 continue;
1060 }
1061 /*
1062 * If WRITECLOSE is set, only flush out regular file vnodes
1063 * open for writing.
1064 */
1065 if ((flags & WRITECLOSE) &&
1066 (vp->v_writecount == 0 || vp->v_type != VREG)) {
1067 simple_unlock(&vp->v_interlock);
1068 continue;
1069 }
1070
1071 /*
1072 * With v_usecount == 0, all we need to do is clear out the
1073 * vnode data structures and we are done.
1074 */
1075 if (vp->v_usecount == 0) {
1076 simple_unlock(&mntvnode_slock);
1077 vgonel(vp, p);
1078 simple_lock(&mntvnode_slock);
1079 continue;
1080 }
1081
1082 /*
1083 * If FORCECLOSE is set, forcibly close the vnode. For block
1084 * or character devices, revert to an anonymous device. For
1085 * all other files, just kill them.
1086 */
1087 if (flags & FORCECLOSE) {
1088 simple_unlock(&mntvnode_slock);
1089 if (vp->v_type != VBLK && vp->v_type != VCHR) {
1090 vgonel(vp, p);
1091 } else {
1092 vclean(vp, 0, p);
1093 vp->v_op = spec_vnodeop_p;
1094 insmntque(vp, (struct mount *) 0);
1095 }
1096 simple_lock(&mntvnode_slock);
1097 continue;
1098 }
1099#ifdef DIAGNOSTIC
1100 if (busyprt)
1101 vprint("vflush: busy vnode", vp);
1102#endif
1103 simple_unlock(&vp->v_interlock);
1104 busy++;
1105 }
1106 simple_unlock(&mntvnode_slock);
1107 if (busy)
1108 return (EBUSY);
1109 return (0);
1110}
1111
1112/*
1113 * Disassociate the underlying file system from a vnode.
1114 */
1115static void
1116vclean(vp, flags, p)
1117 struct vnode *vp;
1118 int flags;
1119 struct proc *p;
1120{
1121 int active, irefed;
1122 vm_object_t object;
1123
1124 /*
1125 * Check to see if the vnode is in use. If so we have to reference it
1126 * before we clean it out so that its count cannot fall to zero and
1127 * generate a race against ourselves to recycle it.
1128 */
1129 if ((active = vp->v_usecount))
1130 vp->v_usecount++;
1131 /*
1132 * Prevent the vnode from being recycled or brought into use while we
1133 * clean it out.
1134 */
1135 if (vp->v_flag & VXLOCK)
1136 panic("vclean: deadlock");
1137 vp->v_flag |= VXLOCK;
1138 /*
1139 * Even if the count is zero, the VOP_INACTIVE routine may still
1140 * have the object locked while it cleans it out. The VOP_LOCK
1141 * ensures that the VOP_INACTIVE routine is done with its work.
1142 * For active vnodes, it ensures that no other activity can
1143 * occur while the underlying object is being cleaned out.
1144 */
1145 VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p);
1146
1147 object = vp->v_object;
1148 irefed = 0;
1149 if (object && ((object->flags & OBJ_DEAD) == 0)) {
1150 if (object->ref_count == 0) {
1151 vm_object_reference(object);
1152 irefed = 1;
1153 }
1154 ++object->ref_count;
1155 pager_cache(object, FALSE);
1156 }
1157
1158 /*
1159 * Clean out any buffers associated with the vnode.
1160 */
1161 if (flags & DOCLOSE)
1162 vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
1163
1164 if (irefed) {
1165 vm_object_deallocate(object);
1166 }
1167
1168 /*
1169 * If purging an active vnode, it must be closed and
1170 * deactivated before being reclaimed. Note that the
1171 * VOP_INACTIVE will unlock the vnode.
1172 */
1173 if (active) {
1174 if (flags & DOCLOSE)
1175 VOP_CLOSE(vp, IO_NDELAY, NOCRED, p);
1176 VOP_INACTIVE(vp, p);
1177 } else {
1178 /*
1179 * Any other processes trying to obtain this lock must first
1180 * wait for VXLOCK to clear, then call the new lock operation.
1181 */
1182 VOP_UNLOCK(vp, 0, p);
1183 }
1184 /*
1185 * Reclaim the vnode.
1186 */
1187 if (VOP_RECLAIM(vp, p))
1188 panic("vclean: cannot reclaim");
1189 if (active)
1190 vrele(vp);
1191 cache_purge(vp);
1192 if (vp->v_vnlock) {
1193#if 0 /* This is the only place we have LK_DRAINED in the entire kernel ??? */
1194#ifdef DIAGNOSTIC
1195 if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0)
1196 vprint("vclean: lock not drained", vp);
1197#endif
1198#endif
1199 FREE(vp->v_vnlock, M_VNODE);
1200 vp->v_vnlock = NULL;
1201 }
1202
1203 /*
1204 * Done with purge, notify sleepers of the grim news.
1205 */
1206 vp->v_op = dead_vnodeop_p;
1207 vp->v_tag = VT_NON;
1208 vp->v_flag &= ~VXLOCK;
1209 if (vp->v_flag & VXWANT) {
1210 vp->v_flag &= ~VXWANT;
1211 wakeup((caddr_t) vp);
1212 }
1213}
1214
1215/*
1216 * Eliminate all activity associated with the requested vnode
1217 * and with all vnodes aliased to the requested vnode.
1218 */
1219int
1220vop_revoke(ap)
1221 struct vop_revoke_args /* {
1222 struct vnode *a_vp;
1223 int a_flags;
1224 } */ *ap;
1225{
1226 struct vnode *vp, *vq;
1227 struct proc *p = curproc; /* XXX */
1228
1229#ifdef DIAGNOSTIC
1230 if ((ap->a_flags & REVOKEALL) == 0)
1231 panic("vop_revoke");
1232#endif
1233
1234 vp = ap->a_vp;
1235 simple_lock(&vp->v_interlock);
1236
1237 if (vp->v_flag & VALIASED) {
1238 /*
1239 * If a vgone (or vclean) is already in progress,
1240 * wait until it is done and return.
1241 */
1242 if (vp->v_flag & VXLOCK) {
1243 vp->v_flag |= VXWANT;
1244 simple_unlock(&vp->v_interlock);
1245 tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0);
1246 return (0);
1247 }
1248 /*
1249 * Ensure that vp will not be vgone'd while we
1250 * are eliminating its aliases.
1251 */
1252 vp->v_flag |= VXLOCK;
1253 simple_unlock(&vp->v_interlock);
1254 while (vp->v_flag & VALIASED) {
1255 simple_lock(&spechash_slock);
1256 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1257 if (vq->v_rdev != vp->v_rdev ||
1258 vq->v_type != vp->v_type || vp == vq)
1259 continue;
1260 simple_unlock(&spechash_slock);
1261 vgone(vq);
1262 break;
1263 }
1264 if (vq == NULLVP) {
1265 simple_unlock(&spechash_slock);
1266 }
1267 }
1268 /*
1269 * Remove the lock so that vgone below will
1270 * really eliminate the vnode after which time
1271 * vgone will awaken any sleepers.
1272 */
1273 simple_lock(&vp->v_interlock);
1274 vp->v_flag &= ~VXLOCK;
1275 }
1276 vgonel(vp, p);
1277 return (0);
1278}
1279
1280/*
1281 * Recycle an unused vnode to the front of the free list.
1282 * Release the passed interlock if the vnode will be recycled.
1283 */
1284int
1285vrecycle(vp, inter_lkp, p)
1286 struct vnode *vp;
1287 struct simplelock *inter_lkp;
1288 struct proc *p;
1289{
1290
1291 simple_lock(&vp->v_interlock);
1292 if (vp->v_usecount == 0) {
1293 if (inter_lkp) {
1294 simple_unlock(inter_lkp);
1295 }
1296 vgonel(vp, p);
1297 return (1);
1298 }
1299 simple_unlock(&vp->v_interlock);
1300 return (0);
1301}
1302
1303/*
1304 * Eliminate all activity associated with a vnode
1305 * in preparation for reuse.
1306 */
1307void
1308vgone(vp)
1309 register struct vnode *vp;
1310{
1311 struct proc *p = curproc; /* XXX */
1312
1313 simple_lock(&vp->v_interlock);
1314 vgonel(vp, p);
1315}
1316
1317/*
1318 * vgone, with the vp interlock held.
1319 */
1320static void
1321vgonel(vp, p)
1322 struct vnode *vp;
1323 struct proc *p;
1324{
1325 struct vnode *vq;
1326 struct vnode *vx;
1327
1328 /*
1329 * If a vgone (or vclean) is already in progress,
1330 * wait until it is done and return.
1331 */
1332 if (vp->v_flag & VXLOCK) {
1333 vp->v_flag |= VXWANT;
1334 simple_unlock(&vp->v_interlock);
1335 tsleep((caddr_t)vp, PINOD, "vgone", 0);
1336 return;
1337 }
1338
1339 if (vp->v_object) {
1340 vp->v_object->flags |= OBJ_VNODE_GONE;
1341 }
1342
1343 /*
1344 * Clean out the filesystem specific data.
1345 */
1346 vclean(vp, DOCLOSE, p);
1347 /*
1348 * Delete from old mount point vnode list, if on one.
1349 */
1350 if (vp->v_mount != NULL)
1351 insmntque(vp, (struct mount *)0);
1352 /*
1353 * If special device, remove it from special device alias list
1354 * if it is on one.
1355 */
1356 if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) {
1357 simple_lock(&spechash_slock);
1358 if (*vp->v_hashchain == vp) {
1359 *vp->v_hashchain = vp->v_specnext;
1360 } else {
1361 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1362 if (vq->v_specnext != vp)
1363 continue;
1364 vq->v_specnext = vp->v_specnext;
1365 break;
1366 }
1367 if (vq == NULL)
1368 panic("missing bdev");
1369 }
1370 if (vp->v_flag & VALIASED) {
1371 vx = NULL;
1372 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1373 if (vq->v_rdev != vp->v_rdev ||
1374 vq->v_type != vp->v_type)
1375 continue;
1376 if (vx)
1377 break;
1378 vx = vq;
1379 }
1380 if (vx == NULL)
1381 panic("missing alias");
1382 if (vq == NULL)
1383 vx->v_flag &= ~VALIASED;
1384 vp->v_flag &= ~VALIASED;
1385 }
1386 simple_unlock(&spechash_slock);
1387 FREE(vp->v_specinfo, M_VNODE);
1388 vp->v_specinfo = NULL;
1389 }
1390
1391 /*
1392 * If it is on the freelist and not already at the head,
1393 * move it to the head of the list. The test of the back
1394 * pointer and the reference count of zero is because
1395 * it will be removed from the free list by getnewvnode,
1396 * but will not have its reference count incremented until
1397 * after calling vgone. If the reference count were
1398 * incremented first, vgone would (incorrectly) try to
1399 * close the previous instance of the underlying object.
1400 */
1401 if (vp->v_usecount == 0 && !(vp->v_flag & VDOOMED)) {
1402 simple_lock(&vnode_free_list_slock);
1403 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1404 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1405 simple_unlock(&vnode_free_list_slock);
1406 }
1407
1408 vp->v_type = VBAD;
1409}
1410
1411/*
1412 * Lookup a vnode by device number.
1413 */
1414int
1415vfinddev(dev, type, vpp)
1416 dev_t dev;
1417 enum vtype type;
1418 struct vnode **vpp;
1419{
1420 register struct vnode *vp;
1421 int rc = 0;
1422
1423 simple_lock(&spechash_slock);
1424 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1425 if (dev != vp->v_rdev || type != vp->v_type)
1426 continue;
1427 *vpp = vp;
1428 rc = 1;
1429 break;
1430 }
1431 simple_unlock(&spechash_slock);
1432 return (rc);
1433}
1434
1435/*
1436 * Calculate the total number of references to a special device.
1437 */
1438int
1439vcount(vp)
1440 register struct vnode *vp;
1441{
1442 struct vnode *vq, *vnext;
1443 int count;
1444
1445loop:
1446 if ((vp->v_flag & VALIASED) == 0)
1447 return (vp->v_usecount);
1448 simple_lock(&spechash_slock);
1449 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
1450 vnext = vq->v_specnext;
1451 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
1452 continue;
1453 /*
1454 * Alias, but not in use, so flush it out.
1455 */
1456 if (vq->v_usecount == 0 && vq != vp) {
1457 simple_unlock(&spechash_slock);
1458 vgone(vq);
1459 goto loop;
1460 }
1461 count += vq->v_usecount;
1462 }
1463 simple_unlock(&spechash_slock);
1464 return (count);
1465}
1466/*
1467 * Print out a description of a vnode.
1468 */
1469static char *typename[] =
1470{"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"};
1471
1472void
1473vprint(label, vp)
1474 char *label;
1475 register struct vnode *vp;
1476{
1477 char buf[64];
1478
1479 if (label != NULL)
1480 printf("%s: %x: ", label, vp);
1481 else
1482 printf("%x: ", vp);
1483 printf("type %s, usecount %d, writecount %d, refcount %ld,",
1484 typename[vp->v_type], vp->v_usecount, vp->v_writecount,
1485 vp->v_holdcnt);
1486 buf[0] = '\0';
1487 if (vp->v_flag & VROOT)
1488 strcat(buf, "|VROOT");
1489 if (vp->v_flag & VTEXT)
1490 strcat(buf, "|VTEXT");
1491 if (vp->v_flag & VSYSTEM)
1492 strcat(buf, "|VSYSTEM");
1493 if (vp->v_flag & VXLOCK)
1494 strcat(buf, "|VXLOCK");
1495 if (vp->v_flag & VXWANT)
1496 strcat(buf, "|VXWANT");
1497 if (vp->v_flag & VBWAIT)
1498 strcat(buf, "|VBWAIT");
1499 if (vp->v_flag & VALIASED)
1500 strcat(buf, "|VALIASED");
1501 if (vp->v_flag & VDOOMED)
1502 strcat(buf, "|VDOOMED");
1503 if (vp->v_flag & VFREE)
1504 strcat(buf, "|VFREE");
1505 if (buf[0] != '\0')
1506 printf(" flags (%s)", &buf[1]);
1507 if (vp->v_data == NULL) {
1508 printf("\n");
1509 } else {
1510 printf("\n\t");
1511 VOP_PRINT(vp);
1512 }
1513}
1514
1515#ifdef DDB
1516/*
1517 * List all of the locked vnodes in the system.
1518 * Called when debugging the kernel.
1519 */
1520void
1521printlockedvnodes()
1522{
1523 struct proc *p = curproc; /* XXX */
1524 struct mount *mp, *nmp;
1525 struct vnode *vp;
1526
1527 printf("Locked vnodes\n");
1528 simple_lock(&mountlist_slock);
1529 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
1530 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
1531 nmp = mp->mnt_list.cqe_next;
1532 continue;
1533 }
1534 for (vp = mp->mnt_vnodelist.lh_first;
1535 vp != NULL;
1536 vp = vp->v_mntvnodes.le_next) {
1537 if (VOP_ISLOCKED(vp))
1538 vprint((char *)0, vp);
1539 }
1540 simple_lock(&mountlist_slock);
1541 nmp = mp->mnt_list.cqe_next;
1542 vfs_unbusy(mp, p);
1543 }
1544 simple_unlock(&mountlist_slock);
1545}
1546#endif
1547
1548/*
1549 * Top level filesystem related information gathering.
1550 */
1551static int sysctl_ovfs_conf __P(SYSCTL_HANDLER_ARGS);
1552
1553static int
1554vfs_sysctl SYSCTL_HANDLER_ARGS
1555{
1556 int *name = (int *)arg1 - 1; /* XXX */
1557 u_int namelen = arg2 + 1; /* XXX */
1558 struct vfsconf *vfsp;
1559
1560#ifndef NO_COMPAT_PRELITE2
1561 /* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. */
1562 if (namelen == 1)
1563 return (sysctl_ovfs_conf(oidp, arg1, arg2, req));
1564#endif
1565
1566#ifdef notyet
1567 /* all sysctl names at this level are at least name and field */
1568 if (namelen < 2)
1569 return (ENOTDIR); /* overloaded */
1570 if (name[0] != VFS_GENERIC) {
1571 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1572 if (vfsp->vfc_typenum == name[0])
1573 break;
1574 if (vfsp == NULL)
1575 return (EOPNOTSUPP);
1576 return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1,
1577 oldp, oldlenp, newp, newlen, p));
1578 }
1579#endif
1580 switch (name[1]) {
1581 case VFS_MAXTYPENUM:
1582 if (namelen != 2)
1583 return (ENOTDIR);
1584 return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int)));
1585 case VFS_CONF:
1586 if (namelen != 3)
1587 return (ENOTDIR); /* overloaded */
1588 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1589 if (vfsp->vfc_typenum == name[2])
1590 break;
1591 if (vfsp == NULL)
1592 return (EOPNOTSUPP);
1593 return (SYSCTL_OUT(req, vfsp, sizeof *vfsp));
1594 }
1595 return (EOPNOTSUPP);
1596}
1597
1598SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD, vfs_sysctl,
1599 "Generic filesystem");
1600
1601#ifndef NO_COMPAT_PRELITE2
1602
1603static int
1604sysctl_ovfs_conf SYSCTL_HANDLER_ARGS
1605{
1606 int error;
1607 struct vfsconf *vfsp;
1608 struct ovfsconf ovfs;
1609
1610 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
1611 ovfs.vfc_vfsops = vfsp->vfc_vfsops; /* XXX used as flag */
1612 strcpy(ovfs.vfc_name, vfsp->vfc_name);
1613 ovfs.vfc_index = vfsp->vfc_typenum;
1614 ovfs.vfc_refcount = vfsp->vfc_refcount;
1615 ovfs.vfc_flags = vfsp->vfc_flags;
1616 error = SYSCTL_OUT(req, &ovfs, sizeof ovfs);
1617 if (error)
1618 return error;
1619 }
1620 return 0;
1621}
1622
1623#endif /* !NO_COMPAT_PRELITE2 */
1624
1625int kinfo_vdebug = 1;
1626int kinfo_vgetfailed;
1627
1628#if 0
1629#define KINFO_VNODESLOP 10
1630/*
1631 * Dump vnode list (via sysctl).
1632 * Copyout address of vnode followed by vnode.
1633 */
1634/* ARGSUSED */
1635static int
1636sysctl_vnode SYSCTL_HANDLER_ARGS
1637{
1638 struct proc *p = curproc; /* XXX */
1639 struct mount *mp, *nmp;
1640 struct vnode *nvp, *vp;
1641 int error;
1642
1643#define VPTRSZ sizeof (struct vnode *)
1644#define VNODESZ sizeof (struct vnode)
1645
1646 req->lock = 0;
1647 if (!req->oldptr) /* Make an estimate */
1648 return (SYSCTL_OUT(req, 0,
1649 (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ)));
1650
1651 simple_lock(&mountlist_slock);
1652 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
1653 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
1654 nmp = mp->mnt_list.cqe_next;
1655 continue;
1656 }
1657again:
1658 simple_lock(&mntvnode_slock);
1659 for (vp = mp->mnt_vnodelist.lh_first;
1660 vp != NULL;
1661 vp = nvp) {
1662 /*
1663 * Check that the vp is still associated with
1664 * this filesystem. RACE: could have been
1665 * recycled onto the same filesystem.
1666 */
1667 if (vp->v_mount != mp) {
1668 simple_unlock(&mntvnode_slock);
1669 if (kinfo_vdebug)
1670 printf("kinfo: vp changed\n");
1671 goto again;
1672 }
1673 nvp = vp->v_mntvnodes.le_next;
1674 simple_unlock(&mntvnode_slock);
1675 if ((error = SYSCTL_OUT(req, &vp, VPTRSZ)) ||
1676 (error = SYSCTL_OUT(req, vp, VNODESZ)))
1677 return (error);
1678 simple_lock(&mntvnode_slock);
1679 }
1680 simple_unlock(&mntvnode_slock);
1681 simple_lock(&mountlist_slock);
1682 nmp = mp->mnt_list.cqe_next;
1683 vfs_unbusy(mp, p);
1684 }
1685 simple_unlock(&mountlist_slock);
1686
1687 return (0);
1688}
1689#endif
1690
1691/*
1692 * XXX
1693 * Exporting the vnode list on large systems causes them to crash.
1694 * Exporting the vnode list on medium systems causes sysctl to coredump.
1695 */
1696#if 0
1697SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE|CTLFLAG_RD,
1698 0, 0, sysctl_vnode, "S,vnode", "");
1699#endif
1700
1701/*
1702 * Check to see if a filesystem is mounted on a block device.
1703 */
1704int
1705vfs_mountedon(vp)
1706 struct vnode *vp;
1707{
1708 struct vnode *vq;
1709 int error = 0;
1710
1711 if (vp->v_specflags & SI_MOUNTEDON)
1712 return (EBUSY);
1713 if (vp->v_flag & VALIASED) {
1714 simple_lock(&spechash_slock);
1715 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1716 if (vq->v_rdev != vp->v_rdev ||
1717 vq->v_type != vp->v_type)
1718 continue;
1719 if (vq->v_specflags & SI_MOUNTEDON) {
1720 error = EBUSY;
1721 break;
1722 }
1723 }
1724 simple_unlock(&spechash_slock);
1725 }
1726 return (error);
1727}
1728
1729/*
1730 * Unmount all filesystems. The list is traversed in reverse order
1731 * of mounting to avoid dependencies.
1732 */
1733void
1734vfs_unmountall()
1735{
1736 struct mount *mp, *nmp;
1737 struct proc *p = initproc; /* XXX XXX should this be proc0? */
1738 int error;
1739
1740 /*
1741 * Since this only runs when rebooting, it is not interlocked.
1742 */
1743 for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
1744 nmp = mp->mnt_list.cqe_prev;
1745 error = dounmount(mp, MNT_FORCE, p);
1746 if (error) {
1747 printf("unmount of %s failed (",
1748 mp->mnt_stat.f_mntonname);
1749 if (error == EBUSY)
1750 printf("BUSY)\n");
1751 else
1752 printf("%d)\n", error);
1753 }
1754 }
1755}
1756
1757/*
1758 * Build hash lists of net addresses and hang them off the mount point.
1759 * Called by ufs_mount() to set up the lists of export addresses.
1760 */
1761static int
1762vfs_hang_addrlist(mp, nep, argp)
1763 struct mount *mp;
1764 struct netexport *nep;
1765 struct export_args *argp;
1766{
1767 register struct netcred *np;
1768 register struct radix_node_head *rnh;
1769 register int i;
1770 struct radix_node *rn;
1771 struct sockaddr *saddr, *smask = 0;
1772 struct domain *dom;
1773 int error;
1774
1775 if (argp->ex_addrlen == 0) {
1776 if (mp->mnt_flag & MNT_DEFEXPORTED)
1777 return (EPERM);
1778 np = &nep->ne_defexported;
1779 np->netc_exflags = argp->ex_flags;
1780 np->netc_anon = argp->ex_anon;
1781 np->netc_anon.cr_ref = 1;
1782 mp->mnt_flag |= MNT_DEFEXPORTED;
1783 return (0);
1784 }
1785 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
1786 np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK);
1787 bzero((caddr_t) np, i);
1788 saddr = (struct sockaddr *) (np + 1);
1789 if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen)))
1790 goto out;
1791 if (saddr->sa_len > argp->ex_addrlen)
1792 saddr->sa_len = argp->ex_addrlen;
1793 if (argp->ex_masklen) {
1794 smask = (struct sockaddr *) ((caddr_t) saddr + argp->ex_addrlen);
1795 error = copyin(argp->ex_mask, (caddr_t) smask, argp->ex_masklen);
1796 if (error)
1797 goto out;
1798 if (smask->sa_len > argp->ex_masklen)
1799 smask->sa_len = argp->ex_masklen;
1800 }
1801 i = saddr->sa_family;
1802 if ((rnh = nep->ne_rtable[i]) == 0) {
1803 /*
1804 * Seems silly to initialize every AF when most are not used,
1805 * do so on demand here
1806 */
1807 for (dom = domains; dom; dom = dom->dom_next)
1808 if (dom->dom_family == i && dom->dom_rtattach) {
1809 dom->dom_rtattach((void **) &nep->ne_rtable[i],
1810 dom->dom_rtoffset);
1811 break;
1812 }
1813 if ((rnh = nep->ne_rtable[i]) == 0) {
1814 error = ENOBUFS;
1815 goto out;
1816 }
1817 }
1818 rn = (*rnh->rnh_addaddr) ((caddr_t) saddr, (caddr_t) smask, rnh,
1819 np->netc_rnodes);
1820 if (rn == 0 || np != (struct netcred *) rn) { /* already exists */
1821 error = EPERM;
1822 goto out;
1823 }
1824 np->netc_exflags = argp->ex_flags;
1825 np->netc_anon = argp->ex_anon;
1826 np->netc_anon.cr_ref = 1;
1827 return (0);
1828out:
1829 free(np, M_NETADDR);
1830 return (error);
1831}
1832
1833/* ARGSUSED */
1834static int
1835vfs_free_netcred(rn, w)
1836 struct radix_node *rn;
1837 void *w;
1838{
1839 register struct radix_node_head *rnh = (struct radix_node_head *) w;
1840
1841 (*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh);
1842 free((caddr_t) rn, M_NETADDR);
1843 return (0);
1844}
1845
1846/*
1847 * Free the net address hash lists that are hanging off the mount points.
1848 */
1849static void
1850vfs_free_addrlist(nep)
1851 struct netexport *nep;
1852{
1853 register int i;
1854 register struct radix_node_head *rnh;
1855
1856 for (i = 0; i <= AF_MAX; i++)
1857 if ((rnh = nep->ne_rtable[i])) {
1858 (*rnh->rnh_walktree) (rnh, vfs_free_netcred,
1859 (caddr_t) rnh);
1860 free((caddr_t) rnh, M_RTABLE);
1861 nep->ne_rtable[i] = 0;
1862 }
1863}
1864
1865int
1866vfs_export(mp, nep, argp)
1867 struct mount *mp;
1868 struct netexport *nep;
1869 struct export_args *argp;
1870{
1871 int error;
1872
1873 if (argp->ex_flags & MNT_DELEXPORT) {
1874 if (mp->mnt_flag & MNT_EXPUBLIC) {
1875 vfs_setpublicfs(NULL, NULL, NULL);
1876 mp->mnt_flag &= ~MNT_EXPUBLIC;
1877 }
1878 vfs_free_addrlist(nep);
1879 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
1880 }
1881 if (argp->ex_flags & MNT_EXPORTED) {
1882 if (argp->ex_flags & MNT_EXPUBLIC) {
1883 if ((error = vfs_setpublicfs(mp, nep, argp)) != 0)
1884 return (error);
1885 mp->mnt_flag |= MNT_EXPUBLIC;
1886 }
1887 if ((error = vfs_hang_addrlist(mp, nep, argp)))
1888 return (error);
1889 mp->mnt_flag |= MNT_EXPORTED;
1890 }
1891 return (0);
1892}
1893
1894
1895/*
1896 * Set the publicly exported filesystem (WebNFS). Currently, only
1897 * one public filesystem is possible in the spec (RFC 2054 and 2055)
1898 */
1899int
1900vfs_setpublicfs(mp, nep, argp)
1901 struct mount *mp;
1902 struct netexport *nep;
1903 struct export_args *argp;
1904{
1905 int error;
1906 struct vnode *rvp;
1907 char *cp;
1908
1909 /*
1910 * mp == NULL -> invalidate the current info, the FS is
1911 * no longer exported. May be called from either vfs_export
1912 * or unmount, so check if it hasn't already been done.
1913 */
1914 if (mp == NULL) {
1915 if (nfs_pub.np_valid) {
1916 nfs_pub.np_valid = 0;
1917 if (nfs_pub.np_index != NULL) {
1918 FREE(nfs_pub.np_index, M_TEMP);
1919 nfs_pub.np_index = NULL;
1920 }
1921 }
1922 return (0);
1923 }
1924
1925 /*
1926 * Only one allowed at a time.
1927 */
1928 if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount)
1929 return (EBUSY);
1930
1931 /*
1932 * Get real filehandle for root of exported FS.
1933 */
1934 bzero((caddr_t)&nfs_pub.np_handle, sizeof(nfs_pub.np_handle));
1935 nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid;
1936
1937 if ((error = VFS_ROOT(mp, &rvp)))
1938 return (error);
1939
1940 if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid)))
1941 return (error);
1942
1943 vput(rvp);
1944
1945 /*
1946 * If an indexfile was specified, pull it in.
1947 */
1948 if (argp->ex_indexfile != NULL) {
1949 MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP,
1950 M_WAITOK);
1951 error = copyinstr(argp->ex_indexfile, nfs_pub.np_index,
1952 MAXNAMLEN, (size_t *)0);
1953 if (!error) {
1954 /*
1955 * Check for illegal filenames.
1956 */
1957 for (cp = nfs_pub.np_index; *cp; cp++) {
1958 if (*cp == '/') {
1959 error = EINVAL;
1960 break;
1961 }
1962 }
1963 }
1964 if (error) {
1965 FREE(nfs_pub.np_index, M_TEMP);
1966 return (error);
1967 }
1968 }
1969
1970 nfs_pub.np_mount = mp;
1971 nfs_pub.np_valid = 1;
1972 return (0);
1973}
1974
1975struct netcred *
1976vfs_export_lookup(mp, nep, nam)
1977 register struct mount *mp;
1978 struct netexport *nep;
1979 struct sockaddr *nam;
1980{
1981 register struct netcred *np;
1982 register struct radix_node_head *rnh;
1983 struct sockaddr *saddr;
1984
1985 np = NULL;
1986 if (mp->mnt_flag & MNT_EXPORTED) {
1987 /*
1988 * Lookup in the export list first.
1989 */
1990 if (nam != NULL) {
1991 saddr = nam;
1992 rnh = nep->ne_rtable[saddr->sa_family];
1993 if (rnh != NULL) {
1994 np = (struct netcred *)
1995 (*rnh->rnh_matchaddr)((caddr_t)saddr,
1996 rnh);
1997 if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
1998 np = NULL;
1999 }
2000 }
2001 /*
2002 * If no address match, use the default if it exists.
2003 */
2004 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
2005 np = &nep->ne_defexported;
2006 }
2007 return (np);
2008}
2009
2010/*
2011 * perform msync on all vnodes under a mount point
2012 * the mount point must be locked.
2013 */
2014void
2015vfs_msync(struct mount *mp, int flags) {
2016 struct vnode *vp, *nvp;
2017loop:
2018 for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
2019
2020 if (vp->v_mount != mp)
2021 goto loop;
2022 nvp = vp->v_mntvnodes.le_next;
2023 if (VOP_ISLOCKED(vp) && (flags != MNT_WAIT))
2024 continue;
2025 if (vp->v_object &&
2026 (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
2027 vm_object_page_clean(vp->v_object, 0, 0, TRUE, TRUE);
2028 }
2029 }
2030}
2031
2032/*
2033 * Create the VM object needed for VMIO and mmap support. This
2034 * is done for all VREG files in the system. Some filesystems might
2035 * afford the additional metadata buffering capability of the
2036 * VMIO code by making the device node be VMIO mode also.
2037 */
2038int
2039vfs_object_create(vp, p, cred, waslocked)
2040 struct vnode *vp;
2041 struct proc *p;
2042 struct ucred *cred;
2043 int waslocked;
2044{
2045 struct vattr vat;
2046 vm_object_t object;
2047 int error = 0;
2048
2049retry:
2050 if ((object = vp->v_object) == NULL) {
2051 if (vp->v_type == VREG) {
2052 if ((error = VOP_GETATTR(vp, &vat, cred, p)) != 0)
2053 goto retn;
2054 (void) vnode_pager_alloc(vp,
2055 OFF_TO_IDX(round_page(vat.va_size)), 0, 0);
2056 } else {
2057 /*
2058 * This simply allocates the biggest object possible
2059 * for a VBLK vnode. This should be fixed, but doesn't
2060 * cause any problems (yet).
2061 */
2062 (void) vnode_pager_alloc(vp, INT_MAX, 0, 0);
2063 }
2064 vp->v_object->flags |= OBJ_VFS_REF;
2065 } else {
2066 if (object->flags & OBJ_DEAD) {
2067 if (waslocked)
2068 VOP_UNLOCK(vp, 0, p);
2069 tsleep(object, PVM, "vodead", 0);
2070 if (waslocked)
2071 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2072 goto retry;
2073 }
2074 if ((object->flags & OBJ_VFS_REF) == 0) {
2075 object->flags |= OBJ_VFS_REF;
2076 vm_object_reference(object);
2077 }
2078 }
2079 if (vp->v_object)
2080 vp->v_flag |= VVMIO;
2081
2082retn:
2083 return error;
2084}
2085
2086void
2087vfree(vp)
2088 struct vnode *vp;
2089{
2090 simple_lock(&vnode_free_list_slock);
2091 if (vp->v_flag & VAGE) {
2092 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
2093 } else {
2094 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
2095 }
2096 freevnodes++;
2097 simple_unlock(&vnode_free_list_slock);
2098 vp->v_flag &= ~VAGE;
2099 vp->v_flag |= VFREE;
2100}
2101
2102void
2103vbusy(vp)
2104 struct vnode *vp;
2105{
2106 simple_lock(&vnode_free_list_slock);
2107 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
2108 freevnodes--;
2109 simple_unlock(&vnode_free_list_slock);
2110 vp->v_flag &= ~VFREE;
2111}