vfs_subr.c revision 44679
1/* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95 39 * $Id: vfs_subr.c,v 1.188 1999/02/25 05:22:29 dillon Exp $ 40 */ 41 42/* 43 * External virtual filesystem routines 44 */ 45#include "opt_ddb.h" 46 47#include <sys/param.h> 48#include <sys/systm.h> 49#include <sys/conf.h> 50#include <sys/fcntl.h> 51#include <sys/kernel.h> 52#include <sys/proc.h> 53#include <sys/malloc.h> 54#include <sys/mount.h> 55#include <sys/socket.h> 56#include <sys/vnode.h> 57#include <sys/stat.h> 58#include <sys/buf.h> 59#include <sys/domain.h> 60#include <sys/dirent.h> 61#include <sys/vmmeter.h> 62 63#include <machine/limits.h> 64 65#include <vm/vm.h> 66#include <vm/vm_param.h> 67#include <vm/vm_prot.h> 68#include <vm/vm_object.h> 69#include <vm/vm_extern.h> 70#include <vm/pmap.h> 71#include <vm/vm_map.h> 72#include <vm/vm_page.h> 73#include <vm/vm_pager.h> 74#include <vm/vnode_pager.h> 75#include <vm/vm_zone.h> 76#include <sys/sysctl.h> 77 78#include <miscfs/specfs/specdev.h> 79 80static MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure"); 81 82static void insmntque __P((struct vnode *vp, struct mount *mp)); 83static void vclean __P((struct vnode *vp, int flags, struct proc *p)); 84static void vfree __P((struct vnode *)); 85static void vgonel __P((struct vnode *vp, struct proc *p)); 86static unsigned long numvnodes; 87SYSCTL_INT(_debug, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, ""); 88 89enum vtype iftovt_tab[16] = { 90 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 91 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 92}; 93int vttoif_tab[9] = { 94 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 95 S_IFSOCK, S_IFIFO, S_IFMT, 96}; 97 98static TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */ 99struct tobefreelist vnode_tobefree_list; /* vnode free list */ 100 101static u_long wantfreevnodes = 25; 102SYSCTL_INT(_debug, OID_AUTO, wantfreevnodes, CTLFLAG_RW, &wantfreevnodes, 0, ""); 103static u_long freevnodes = 0; 104SYSCTL_INT(_debug, OID_AUTO, freevnodes, CTLFLAG_RD, &freevnodes, 0, ""); 105 106int vfs_ioopt = 0; 107#ifdef ENABLE_VFS_IOOPT 108SYSCTL_INT(_vfs, OID_AUTO, ioopt, CTLFLAG_RW, &vfs_ioopt, 0, ""); 109#endif 110 111struct mntlist mountlist; /* mounted filesystem list */ 112struct simplelock mountlist_slock; 113struct simplelock mntvnode_slock; 114int nfs_mount_type = -1; 115#ifndef NULL_SIMPLELOCKS 116static struct simplelock mntid_slock; 117static struct simplelock vnode_free_list_slock; 118static struct simplelock spechash_slock; 119#endif 120struct nfs_public nfs_pub; /* publicly exported FS */ 121static vm_zone_t vnode_zone; 122 123/* 124 * The workitem queue. 125 */ 126#define SYNCER_MAXDELAY 32 127static int syncer_maxdelay = SYNCER_MAXDELAY; /* maximum delay time */ 128time_t syncdelay = 30; 129int rushjob; /* number of slots to run ASAP */ 130 131static int syncer_delayno = 0; 132static long syncer_mask; 133LIST_HEAD(synclist, vnode); 134static struct synclist *syncer_workitem_pending; 135 136int desiredvnodes; 137SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, &desiredvnodes, 0, ""); 138 139static void vfs_free_addrlist __P((struct netexport *nep)); 140static int vfs_free_netcred __P((struct radix_node *rn, void *w)); 141static int vfs_hang_addrlist __P((struct mount *mp, struct netexport *nep, 142 struct export_args *argp)); 143 144/* 145 * Initialize the vnode management data structures. 146 */ 147void 148vntblinit() 149{ 150 151 desiredvnodes = maxproc + cnt.v_page_count / 4; 152 simple_lock_init(&mntvnode_slock); 153 simple_lock_init(&mntid_slock); 154 simple_lock_init(&spechash_slock); 155 TAILQ_INIT(&vnode_free_list); 156 TAILQ_INIT(&vnode_tobefree_list); 157 simple_lock_init(&vnode_free_list_slock); 158 CIRCLEQ_INIT(&mountlist); 159 vnode_zone = zinit("VNODE", sizeof (struct vnode), 0, 0, 5); 160 /* 161 * Initialize the filesystem syncer. 162 */ 163 syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE, 164 &syncer_mask); 165 syncer_maxdelay = syncer_mask + 1; 166} 167 168/* 169 * Mark a mount point as busy. Used to synchronize access and to delay 170 * unmounting. Interlock is not released on failure. 171 */ 172int 173vfs_busy(mp, flags, interlkp, p) 174 struct mount *mp; 175 int flags; 176 struct simplelock *interlkp; 177 struct proc *p; 178{ 179 int lkflags; 180 181 if (mp->mnt_kern_flag & MNTK_UNMOUNT) { 182 if (flags & LK_NOWAIT) 183 return (ENOENT); 184 mp->mnt_kern_flag |= MNTK_MWAIT; 185 if (interlkp) { 186 simple_unlock(interlkp); 187 } 188 /* 189 * Since all busy locks are shared except the exclusive 190 * lock granted when unmounting, the only place that a 191 * wakeup needs to be done is at the release of the 192 * exclusive lock at the end of dounmount. 193 */ 194 tsleep((caddr_t)mp, PVFS, "vfs_busy", 0); 195 if (interlkp) { 196 simple_lock(interlkp); 197 } 198 return (ENOENT); 199 } 200 lkflags = LK_SHARED | LK_NOPAUSE; 201 if (interlkp) 202 lkflags |= LK_INTERLOCK; 203 if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p)) 204 panic("vfs_busy: unexpected lock failure"); 205 return (0); 206} 207 208/* 209 * Free a busy filesystem. 210 */ 211void 212vfs_unbusy(mp, p) 213 struct mount *mp; 214 struct proc *p; 215{ 216 217 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p); 218} 219 220/* 221 * Lookup a filesystem type, and if found allocate and initialize 222 * a mount structure for it. 223 * 224 * Devname is usually updated by mount(8) after booting. 225 */ 226int 227vfs_rootmountalloc(fstypename, devname, mpp) 228 char *fstypename; 229 char *devname; 230 struct mount **mpp; 231{ 232 struct proc *p = curproc; /* XXX */ 233 struct vfsconf *vfsp; 234 struct mount *mp; 235 236 if (fstypename == NULL) 237 return (ENODEV); 238 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 239 if (!strcmp(vfsp->vfc_name, fstypename)) 240 break; 241 if (vfsp == NULL) 242 return (ENODEV); 243 mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); 244 bzero((char *)mp, (u_long)sizeof(struct mount)); 245 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE); 246 (void)vfs_busy(mp, LK_NOWAIT, 0, p); 247 LIST_INIT(&mp->mnt_vnodelist); 248 mp->mnt_vfc = vfsp; 249 mp->mnt_op = vfsp->vfc_vfsops; 250 mp->mnt_flag = MNT_RDONLY; 251 mp->mnt_vnodecovered = NULLVP; 252 vfsp->vfc_refcount++; 253 mp->mnt_stat.f_type = vfsp->vfc_typenum; 254 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 255 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 256 mp->mnt_stat.f_mntonname[0] = '/'; 257 mp->mnt_stat.f_mntonname[1] = 0; 258 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); 259 *mpp = mp; 260 return (0); 261} 262 263/* 264 * Find an appropriate filesystem to use for the root. If a filesystem 265 * has not been preselected, walk through the list of known filesystems 266 * trying those that have mountroot routines, and try them until one 267 * works or we have tried them all. 268 */ 269#ifdef notdef /* XXX JH */ 270int 271lite2_vfs_mountroot() 272{ 273 struct vfsconf *vfsp; 274 extern int (*lite2_mountroot) __P((void)); 275 int error; 276 277 if (lite2_mountroot != NULL) 278 return ((*lite2_mountroot)()); 279 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 280 if (vfsp->vfc_mountroot == NULL) 281 continue; 282 if ((error = (*vfsp->vfc_mountroot)()) == 0) 283 return (0); 284 printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error); 285 } 286 return (ENODEV); 287} 288#endif 289 290/* 291 * Lookup a mount point by filesystem identifier. 292 */ 293struct mount * 294vfs_getvfs(fsid) 295 fsid_t *fsid; 296{ 297 register struct mount *mp; 298 299 simple_lock(&mountlist_slock); 300 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; 301 mp = mp->mnt_list.cqe_next) { 302 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 303 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { 304 simple_unlock(&mountlist_slock); 305 return (mp); 306 } 307 } 308 simple_unlock(&mountlist_slock); 309 return ((struct mount *) 0); 310} 311 312/* 313 * Get a new unique fsid 314 */ 315void 316vfs_getnewfsid(mp) 317 struct mount *mp; 318{ 319 static u_short xxxfs_mntid; 320 321 fsid_t tfsid; 322 int mtype; 323 324 simple_lock(&mntid_slock); 325 mtype = mp->mnt_vfc->vfc_typenum; 326 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0); 327 mp->mnt_stat.f_fsid.val[1] = mtype; 328 if (xxxfs_mntid == 0) 329 ++xxxfs_mntid; 330 tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid); 331 tfsid.val[1] = mtype; 332 if (mountlist.cqh_first != (void *)&mountlist) { 333 while (vfs_getvfs(&tfsid)) { 334 tfsid.val[0]++; 335 xxxfs_mntid++; 336 } 337 } 338 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 339 simple_unlock(&mntid_slock); 340} 341 342/* 343 * Set vnode attributes to VNOVAL 344 */ 345void 346vattr_null(vap) 347 register struct vattr *vap; 348{ 349 350 vap->va_type = VNON; 351 vap->va_size = VNOVAL; 352 vap->va_bytes = VNOVAL; 353 vap->va_mode = VNOVAL; 354 vap->va_nlink = VNOVAL; 355 vap->va_uid = VNOVAL; 356 vap->va_gid = VNOVAL; 357 vap->va_fsid = VNOVAL; 358 vap->va_fileid = VNOVAL; 359 vap->va_blocksize = VNOVAL; 360 vap->va_rdev = VNOVAL; 361 vap->va_atime.tv_sec = VNOVAL; 362 vap->va_atime.tv_nsec = VNOVAL; 363 vap->va_mtime.tv_sec = VNOVAL; 364 vap->va_mtime.tv_nsec = VNOVAL; 365 vap->va_ctime.tv_sec = VNOVAL; 366 vap->va_ctime.tv_nsec = VNOVAL; 367 vap->va_flags = VNOVAL; 368 vap->va_gen = VNOVAL; 369 vap->va_vaflags = 0; 370} 371 372/* 373 * Routines having to do with the management of the vnode table. 374 */ 375extern vop_t **dead_vnodeop_p; 376 377/* 378 * Return the next vnode from the free list. 379 */ 380int 381getnewvnode(tag, mp, vops, vpp) 382 enum vtagtype tag; 383 struct mount *mp; 384 vop_t **vops; 385 struct vnode **vpp; 386{ 387 int s; 388 struct proc *p = curproc; /* XXX */ 389 struct vnode *vp, *tvp, *nvp; 390 vm_object_t object; 391 TAILQ_HEAD(freelst, vnode) vnode_tmp_list; 392 393 /* 394 * We take the least recently used vnode from the freelist 395 * if we can get it and it has no cached pages, and no 396 * namecache entries are relative to it. 397 * Otherwise we allocate a new vnode 398 */ 399 400 s = splbio(); 401 simple_lock(&vnode_free_list_slock); 402 TAILQ_INIT(&vnode_tmp_list); 403 404 for (vp = TAILQ_FIRST(&vnode_tobefree_list); vp; vp = nvp) { 405 nvp = TAILQ_NEXT(vp, v_freelist); 406 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 407 if (vp->v_flag & VAGE) { 408 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 409 } else { 410 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 411 } 412 vp->v_flag &= ~(VTBFREE|VAGE); 413 vp->v_flag |= VFREE; 414 if (vp->v_usecount) 415 panic("tobe free vnode isn't"); 416 freevnodes++; 417 } 418 419 if (wantfreevnodes && freevnodes < wantfreevnodes) { 420 vp = NULL; 421 } else if (!wantfreevnodes && freevnodes <= desiredvnodes) { 422 /* 423 * XXX: this is only here to be backwards compatible 424 */ 425 vp = NULL; 426 } else { 427 for (vp = TAILQ_FIRST(&vnode_free_list); vp; vp = nvp) { 428 nvp = TAILQ_NEXT(vp, v_freelist); 429 if (!simple_lock_try(&vp->v_interlock)) 430 continue; 431 if (vp->v_usecount) 432 panic("free vnode isn't"); 433 434 object = vp->v_object; 435 if (object && (object->resident_page_count || object->ref_count)) { 436 printf("object inconsistant state: RPC: %d, RC: %d\n", 437 object->resident_page_count, object->ref_count); 438 /* Don't recycle if it's caching some pages */ 439 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 440 TAILQ_INSERT_TAIL(&vnode_tmp_list, vp, v_freelist); 441 continue; 442 } else if (LIST_FIRST(&vp->v_cache_src)) { 443 /* Don't recycle if active in the namecache */ 444 simple_unlock(&vp->v_interlock); 445 continue; 446 } else { 447 break; 448 } 449 } 450 } 451 452 for (tvp = TAILQ_FIRST(&vnode_tmp_list); tvp; tvp = nvp) { 453 nvp = TAILQ_NEXT(tvp, v_freelist); 454 TAILQ_REMOVE(&vnode_tmp_list, tvp, v_freelist); 455 TAILQ_INSERT_TAIL(&vnode_free_list, tvp, v_freelist); 456 simple_unlock(&tvp->v_interlock); 457 } 458 459 if (vp) { 460 vp->v_flag |= VDOOMED; 461 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 462 freevnodes--; 463 simple_unlock(&vnode_free_list_slock); 464 cache_purge(vp); 465 vp->v_lease = NULL; 466 if (vp->v_type != VBAD) { 467 vgonel(vp, p); 468 } else { 469 simple_unlock(&vp->v_interlock); 470 } 471 472#ifdef INVARIANTS 473 { 474 int s; 475 476 if (vp->v_data) 477 panic("cleaned vnode isn't"); 478 s = splbio(); 479 if (vp->v_numoutput) 480 panic("Clean vnode has pending I/O's"); 481 splx(s); 482 } 483#endif 484 vp->v_flag = 0; 485 vp->v_lastr = 0; 486 vp->v_lastw = 0; 487 vp->v_lasta = 0; 488 vp->v_cstart = 0; 489 vp->v_clen = 0; 490 vp->v_socket = 0; 491 vp->v_writecount = 0; /* XXX */ 492 vp->v_maxio = 0; 493 } else { 494 simple_unlock(&vnode_free_list_slock); 495 vp = (struct vnode *) zalloc(vnode_zone); 496 bzero((char *) vp, sizeof *vp); 497 simple_lock_init(&vp->v_interlock); 498 vp->v_dd = vp; 499 cache_purge(vp); 500 LIST_INIT(&vp->v_cache_src); 501 TAILQ_INIT(&vp->v_cache_dst); 502 numvnodes++; 503 } 504 505 TAILQ_INIT(&vp->v_cleanblkhd); 506 TAILQ_INIT(&vp->v_dirtyblkhd); 507 vp->v_type = VNON; 508 vp->v_tag = tag; 509 vp->v_op = vops; 510 insmntque(vp, mp); 511 *vpp = vp; 512 vp->v_usecount = 1; 513 vp->v_data = 0; 514 splx(s); 515 516 vfs_object_create(vp, p, p->p_ucred); 517 return (0); 518} 519 520/* 521 * Move a vnode from one mount queue to another. 522 */ 523static void 524insmntque(vp, mp) 525 register struct vnode *vp; 526 register struct mount *mp; 527{ 528 529 simple_lock(&mntvnode_slock); 530 /* 531 * Delete from old mount point vnode list, if on one. 532 */ 533 if (vp->v_mount != NULL) 534 LIST_REMOVE(vp, v_mntvnodes); 535 /* 536 * Insert into list of vnodes for the new mount point, if available. 537 */ 538 if ((vp->v_mount = mp) == NULL) { 539 simple_unlock(&mntvnode_slock); 540 return; 541 } 542 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 543 simple_unlock(&mntvnode_slock); 544} 545 546/* 547 * Update outstanding I/O count and do wakeup if requested. 548 */ 549void 550vwakeup(bp) 551 register struct buf *bp; 552{ 553 register struct vnode *vp; 554 555 bp->b_flags &= ~B_WRITEINPROG; 556 if ((vp = bp->b_vp)) { 557 vp->v_numoutput--; 558 if (vp->v_numoutput < 0) 559 panic("vwakeup: neg numoutput"); 560 if ((vp->v_numoutput == 0) && (vp->v_flag & VBWAIT)) { 561 vp->v_flag &= ~VBWAIT; 562 wakeup((caddr_t) &vp->v_numoutput); 563 } 564 } 565} 566 567/* 568 * Flush out and invalidate all buffers associated with a vnode. 569 * Called with the underlying object locked. 570 */ 571int 572vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 573 register struct vnode *vp; 574 int flags; 575 struct ucred *cred; 576 struct proc *p; 577 int slpflag, slptimeo; 578{ 579 register struct buf *bp; 580 struct buf *nbp, *blist; 581 int s, error; 582 vm_object_t object; 583 584 if (flags & V_SAVE) { 585 s = splbio(); 586 while (vp->v_numoutput) { 587 vp->v_flag |= VBWAIT; 588 error = tsleep((caddr_t)&vp->v_numoutput, 589 slpflag | (PRIBIO + 1), "vinvlbuf", slptimeo); 590 if (error) { 591 splx(s); 592 return (error); 593 } 594 } 595 if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) { 596 splx(s); 597 if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) != 0) 598 return (error); 599 s = splbio(); 600 if (vp->v_numoutput > 0 || 601 !TAILQ_EMPTY(&vp->v_dirtyblkhd)) 602 panic("vinvalbuf: dirty bufs"); 603 } 604 splx(s); 605 } 606 s = splbio(); 607 for (;;) { 608 blist = TAILQ_FIRST(&vp->v_cleanblkhd); 609 if (!blist) 610 blist = TAILQ_FIRST(&vp->v_dirtyblkhd); 611 if (!blist) 612 break; 613 614 for (bp = blist; bp; bp = nbp) { 615 nbp = TAILQ_NEXT(bp, b_vnbufs); 616 if (bp->b_flags & B_BUSY) { 617 bp->b_flags |= B_WANTED; 618 error = tsleep((caddr_t) bp, 619 slpflag | (PRIBIO + 4), "vinvalbuf", 620 slptimeo); 621 if (error) { 622 splx(s); 623 return (error); 624 } 625 break; 626 } 627 /* 628 * XXX Since there are no node locks for NFS, I 629 * believe there is a slight chance that a delayed 630 * write will occur while sleeping just above, so 631 * check for it. Note that vfs_bio_awrite expects 632 * buffers to reside on a queue, while VOP_BWRITE and 633 * brelse do not. 634 */ 635 if (((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) && 636 (flags & V_SAVE)) { 637 638 if (bp->b_vp == vp) { 639 if (bp->b_flags & B_CLUSTEROK) { 640 vfs_bio_awrite(bp); 641 } else { 642 bremfree(bp); 643 bp->b_flags |= (B_BUSY | B_ASYNC); 644 VOP_BWRITE(bp); 645 } 646 } else { 647 bremfree(bp); 648 bp->b_flags |= B_BUSY; 649 (void) VOP_BWRITE(bp); 650 } 651 break; 652 } 653 bremfree(bp); 654 bp->b_flags |= (B_INVAL | B_NOCACHE | B_RELBUF | B_BUSY); 655 bp->b_flags &= ~B_ASYNC; 656 brelse(bp); 657 } 658 } 659 660 while (vp->v_numoutput > 0) { 661 vp->v_flag |= VBWAIT; 662 tsleep(&vp->v_numoutput, PVM, "vnvlbv", 0); 663 } 664 665 splx(s); 666 667 /* 668 * Destroy the copy in the VM cache, too. 669 */ 670 simple_lock(&vp->v_interlock); 671 object = vp->v_object; 672 if (object != NULL) { 673 vm_object_page_remove(object, 0, 0, 674 (flags & V_SAVE) ? TRUE : FALSE); 675 } 676 simple_unlock(&vp->v_interlock); 677 678 if (!TAILQ_EMPTY(&vp->v_dirtyblkhd) || !TAILQ_EMPTY(&vp->v_cleanblkhd)) 679 panic("vinvalbuf: flush failed"); 680 return (0); 681} 682 683/* 684 * Truncate a file's buffer and pages to a specified length. This 685 * is in lieu of the old vinvalbuf mechanism, which performed unneeded 686 * sync activity. 687 */ 688int 689vtruncbuf(vp, cred, p, length, blksize) 690 register struct vnode *vp; 691 struct ucred *cred; 692 struct proc *p; 693 off_t length; 694 int blksize; 695{ 696 register struct buf *bp; 697 struct buf *nbp; 698 int s, anyfreed; 699 int trunclbn; 700 701 /* 702 * Round up to the *next* lbn. 703 */ 704 trunclbn = (length + blksize - 1) / blksize; 705 706 s = splbio(); 707restart: 708 anyfreed = 1; 709 for (;anyfreed;) { 710 anyfreed = 0; 711 for (bp = TAILQ_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 712 nbp = TAILQ_NEXT(bp, b_vnbufs); 713 if (bp->b_lblkno >= trunclbn) { 714 if (bp->b_flags & B_BUSY) { 715 bp->b_flags |= B_WANTED; 716 tsleep(bp, PRIBIO + 4, "vtrb1", 0); 717 goto restart; 718 } else { 719 bremfree(bp); 720 bp->b_flags |= (B_BUSY | B_INVAL | B_RELBUF); 721 bp->b_flags &= ~B_ASYNC; 722 brelse(bp); 723 anyfreed = 1; 724 } 725 if (nbp && (((nbp->b_xflags & B_VNCLEAN) == 0)|| 726 (nbp->b_vp != vp) || 727 (nbp->b_flags & B_DELWRI))) { 728 goto restart; 729 } 730 } 731 } 732 733 for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 734 nbp = TAILQ_NEXT(bp, b_vnbufs); 735 if (bp->b_lblkno >= trunclbn) { 736 if (bp->b_flags & B_BUSY) { 737 bp->b_flags |= B_WANTED; 738 tsleep(bp, PRIBIO + 4, "vtrb2", 0); 739 goto restart; 740 } else { 741 bremfree(bp); 742 bp->b_flags |= (B_BUSY | B_INVAL | B_RELBUF); 743 bp->b_flags &= ~B_ASYNC; 744 brelse(bp); 745 anyfreed = 1; 746 } 747 if (nbp && (((nbp->b_xflags & B_VNDIRTY) == 0)|| 748 (nbp->b_vp != vp) || 749 (nbp->b_flags & B_DELWRI) == 0)) { 750 goto restart; 751 } 752 } 753 } 754 } 755 756 if (length > 0) { 757restartsync: 758 for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 759 nbp = TAILQ_NEXT(bp, b_vnbufs); 760 if ((bp->b_flags & B_DELWRI) && (bp->b_lblkno < 0)) { 761 if (bp->b_flags & B_BUSY) { 762 bp->b_flags |= B_WANTED; 763 tsleep(bp, PRIBIO, "vtrb3", 0); 764 } else { 765 bremfree(bp); 766 bp->b_flags |= B_BUSY; 767 if (bp->b_vp == vp) { 768 bp->b_flags |= B_ASYNC; 769 } else { 770 bp->b_flags &= ~B_ASYNC; 771 } 772 VOP_BWRITE(bp); 773 } 774 goto restartsync; 775 } 776 777 } 778 } 779 780 while (vp->v_numoutput > 0) { 781 vp->v_flag |= VBWAIT; 782 tsleep(&vp->v_numoutput, PVM, "vbtrunc", 0); 783 } 784 785 splx(s); 786 787 vnode_pager_setsize(vp, length); 788 789 return (0); 790} 791 792/* 793 * Associate a buffer with a vnode. 794 */ 795void 796bgetvp(vp, bp) 797 register struct vnode *vp; 798 register struct buf *bp; 799{ 800 int s; 801 802 KASSERT(bp->b_vp == NULL, ("bgetvp: not free")); 803 804 vhold(vp); 805 bp->b_vp = vp; 806 if (vp->v_type == VBLK || vp->v_type == VCHR) 807 bp->b_dev = vp->v_rdev; 808 else 809 bp->b_dev = NODEV; 810 /* 811 * Insert onto list for new vnode. 812 */ 813 s = splbio(); 814 bp->b_xflags |= B_VNCLEAN; 815 bp->b_xflags &= ~B_VNDIRTY; 816 TAILQ_INSERT_TAIL(&vp->v_cleanblkhd, bp, b_vnbufs); 817 splx(s); 818} 819 820/* 821 * Disassociate a buffer from a vnode. 822 */ 823void 824brelvp(bp) 825 register struct buf *bp; 826{ 827 struct vnode *vp; 828 struct buflists *listheadp; 829 int s; 830 831 KASSERT(bp->b_vp != NULL, ("brelvp: NULL")); 832 833 /* 834 * Delete from old vnode list, if on one. 835 */ 836 vp = bp->b_vp; 837 s = splbio(); 838 if (bp->b_xflags & (B_VNDIRTY|B_VNCLEAN)) { 839 if (bp->b_xflags & B_VNDIRTY) 840 listheadp = &vp->v_dirtyblkhd; 841 else 842 listheadp = &vp->v_cleanblkhd; 843 TAILQ_REMOVE(listheadp, bp, b_vnbufs); 844 bp->b_xflags &= ~(B_VNDIRTY|B_VNCLEAN); 845 } 846 if ((vp->v_flag & VONWORKLST) && TAILQ_EMPTY(&vp->v_dirtyblkhd)) { 847 vp->v_flag &= ~VONWORKLST; 848 LIST_REMOVE(vp, v_synclist); 849 } 850 splx(s); 851 bp->b_vp = (struct vnode *) 0; 852 vdrop(vp); 853} 854 855/* 856 * The workitem queue. 857 * 858 * It is useful to delay writes of file data and filesystem metadata 859 * for tens of seconds so that quickly created and deleted files need 860 * not waste disk bandwidth being created and removed. To realize this, 861 * we append vnodes to a "workitem" queue. When running with a soft 862 * updates implementation, most pending metadata dependencies should 863 * not wait for more than a few seconds. Thus, mounted on block devices 864 * are delayed only about a half the time that file data is delayed. 865 * Similarly, directory updates are more critical, so are only delayed 866 * about a third the time that file data is delayed. Thus, there are 867 * SYNCER_MAXDELAY queues that are processed round-robin at a rate of 868 * one each second (driven off the filesystem syner process). The 869 * syncer_delayno variable indicates the next queue that is to be processed. 870 * Items that need to be processed soon are placed in this queue: 871 * 872 * syncer_workitem_pending[syncer_delayno] 873 * 874 * A delay of fifteen seconds is done by placing the request fifteen 875 * entries later in the queue: 876 * 877 * syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask] 878 * 879 */ 880 881/* 882 * Add an item to the syncer work queue. 883 */ 884static void 885vn_syncer_add_to_worklist(struct vnode *vp, int delay) 886{ 887 int s, slot; 888 889 s = splbio(); 890 891 if (vp->v_flag & VONWORKLST) { 892 LIST_REMOVE(vp, v_synclist); 893 } 894 895 if (delay > syncer_maxdelay - 2) 896 delay = syncer_maxdelay - 2; 897 slot = (syncer_delayno + delay) & syncer_mask; 898 899 LIST_INSERT_HEAD(&syncer_workitem_pending[slot], vp, v_synclist); 900 vp->v_flag |= VONWORKLST; 901 splx(s); 902} 903 904struct proc *updateproc; 905static void sched_sync __P((void)); 906static const struct kproc_desc up_kp = { 907 "syncer", 908 sched_sync, 909 &updateproc 910}; 911SYSINIT_KT(syncer, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp) 912 913/* 914 * System filesystem synchronizer daemon. 915 */ 916void 917sched_sync(void) 918{ 919 struct synclist *slp; 920 struct vnode *vp; 921 long starttime; 922 int s; 923 struct proc *p = updateproc; 924 925 for (;;) { 926 starttime = time_second; 927 928 /* 929 * Push files whose dirty time has expired. Be careful 930 * of interrupt race on slp queue. 931 */ 932 s = splbio(); 933 slp = &syncer_workitem_pending[syncer_delayno]; 934 syncer_delayno += 1; 935 if (syncer_delayno == syncer_maxdelay) 936 syncer_delayno = 0; 937 splx(s); 938 939 while ((vp = LIST_FIRST(slp)) != NULL) { 940 if (VOP_ISLOCKED(vp) == 0) { 941 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 942 (void) VOP_FSYNC(vp, p->p_ucred, MNT_LAZY, p); 943 VOP_UNLOCK(vp, 0, p); 944 } 945 s = splbio(); 946 if (LIST_FIRST(slp) == vp) { 947 /* 948 * Note: v_tag VT_VFS vps can remain on the 949 * worklist too with no dirty blocks, but 950 * since sync_fsync() moves it to a different 951 * slot we are safe. 952 */ 953 if (TAILQ_EMPTY(&vp->v_dirtyblkhd) && 954 vp->v_type != VBLK) 955 panic("sched_sync: fsync failed vp %p tag %d", vp, vp->v_tag); 956 /* 957 * Put us back on the worklist. The worklist 958 * routine will remove us from our current 959 * position and then add us back in at a later 960 * position. 961 */ 962 vn_syncer_add_to_worklist(vp, syncdelay); 963 } 964 splx(s); 965 } 966 967 /* 968 * Do soft update processing. 969 */ 970 if (bioops.io_sync) 971 (*bioops.io_sync)(NULL); 972 973 /* 974 * The variable rushjob allows the kernel to speed up the 975 * processing of the filesystem syncer process. A rushjob 976 * value of N tells the filesystem syncer to process the next 977 * N seconds worth of work on its queue ASAP. Currently rushjob 978 * is used by the soft update code to speed up the filesystem 979 * syncer process when the incore state is getting so far 980 * ahead of the disk that the kernel memory pool is being 981 * threatened with exhaustion. 982 */ 983 if (rushjob > 0) { 984 rushjob -= 1; 985 continue; 986 } 987 /* 988 * If it has taken us less than a second to process the 989 * current work, then wait. Otherwise start right over 990 * again. We can still lose time if any single round 991 * takes more than two seconds, but it does not really 992 * matter as we are just trying to generally pace the 993 * filesystem activity. 994 */ 995 if (time_second == starttime) 996 tsleep(&lbolt, PPAUSE, "syncer", 0); 997 } 998} 999 1000/* 1001 * Associate a p-buffer with a vnode. 1002 * 1003 * Also sets B_PAGING flag to indicate that vnode is not fully associated 1004 * with the buffer. i.e. the bp has not been linked into the vnode or 1005 * ref-counted. 1006 */ 1007void 1008pbgetvp(vp, bp) 1009 register struct vnode *vp; 1010 register struct buf *bp; 1011{ 1012 1013 KASSERT(bp->b_vp == NULL, ("pbgetvp: not free")); 1014 1015 bp->b_vp = vp; 1016 bp->b_flags |= B_PAGING; 1017 if (vp->v_type == VBLK || vp->v_type == VCHR) 1018 bp->b_dev = vp->v_rdev; 1019 else 1020 bp->b_dev = NODEV; 1021} 1022 1023/* 1024 * Disassociate a p-buffer from a vnode. 1025 */ 1026void 1027pbrelvp(bp) 1028 register struct buf *bp; 1029{ 1030 1031 KASSERT(bp->b_vp != NULL, ("pbrelvp: NULL")); 1032 1033#if !defined(MAX_PERF) 1034 /* XXX REMOVE ME */ 1035 if (bp->b_vnbufs.tqe_next != NULL) { 1036 panic( 1037 "relpbuf(): b_vp was probably reassignbuf()d %p %x", 1038 bp, 1039 (int)bp->b_flags 1040 ); 1041 } 1042#endif 1043 bp->b_vp = (struct vnode *) 0; 1044 bp->b_flags &= ~B_PAGING; 1045} 1046 1047void 1048pbreassignbuf(bp, newvp) 1049 struct buf *bp; 1050 struct vnode *newvp; 1051{ 1052#if !defined(MAX_PERF) 1053 if ((bp->b_flags & B_PAGING) == 0) { 1054 panic( 1055 "pbreassignbuf() on non phys bp %p", 1056 bp 1057 ); 1058 } 1059#endif 1060 bp->b_vp = newvp; 1061} 1062 1063/* 1064 * Reassign a buffer from one vnode to another. 1065 * Used to assign file specific control information 1066 * (indirect blocks) to the vnode to which they belong. 1067 */ 1068void 1069reassignbuf(bp, newvp) 1070 register struct buf *bp; 1071 register struct vnode *newvp; 1072{ 1073 struct buflists *listheadp; 1074 int delay; 1075 int s; 1076 1077 if (newvp == NULL) { 1078 printf("reassignbuf: NULL"); 1079 return; 1080 } 1081 1082#if !defined(MAX_PERF) 1083 /* 1084 * B_PAGING flagged buffers cannot be reassigned because their vp 1085 * is not fully linked in. 1086 */ 1087 if (bp->b_flags & B_PAGING) 1088 panic("cannot reassign paging buffer"); 1089#endif 1090 1091 s = splbio(); 1092 /* 1093 * Delete from old vnode list, if on one. 1094 */ 1095 if (bp->b_xflags & (B_VNDIRTY|B_VNCLEAN)) { 1096 if (bp->b_xflags & B_VNDIRTY) 1097 listheadp = &bp->b_vp->v_dirtyblkhd; 1098 else 1099 listheadp = &bp->b_vp->v_cleanblkhd; 1100 TAILQ_REMOVE(listheadp, bp, b_vnbufs); 1101 bp->b_xflags &= ~(B_VNDIRTY|B_VNCLEAN); 1102 if (bp->b_vp != newvp) { 1103 vdrop(bp->b_vp); 1104 bp->b_vp = NULL; /* for clarification */ 1105 } 1106 } 1107 /* 1108 * If dirty, put on list of dirty buffers; otherwise insert onto list 1109 * of clean buffers. 1110 */ 1111 if (bp->b_flags & B_DELWRI) { 1112 struct buf *tbp; 1113 1114 listheadp = &newvp->v_dirtyblkhd; 1115 if ((newvp->v_flag & VONWORKLST) == 0) { 1116 switch (newvp->v_type) { 1117 case VDIR: 1118 delay = syncdelay / 3; 1119 break; 1120 case VBLK: 1121 if (newvp->v_specmountpoint != NULL) { 1122 delay = syncdelay / 2; 1123 break; 1124 } 1125 /* fall through */ 1126 default: 1127 delay = syncdelay; 1128 } 1129 vn_syncer_add_to_worklist(newvp, delay); 1130 } 1131 bp->b_xflags |= B_VNDIRTY; 1132 tbp = TAILQ_FIRST(listheadp); 1133 if (tbp == NULL || 1134 (bp->b_lblkno >= 0 && tbp->b_lblkno > bp->b_lblkno)) { 1135 TAILQ_INSERT_HEAD(listheadp, bp, b_vnbufs); 1136 } else { 1137 if (bp->b_lblkno >= 0) { 1138 struct buf *ttbp; 1139 while ((ttbp = TAILQ_NEXT(tbp, b_vnbufs)) && 1140 (ttbp->b_lblkno < bp->b_lblkno)) { 1141 tbp = ttbp; 1142 } 1143 TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs); 1144 } else { 1145 TAILQ_INSERT_TAIL(listheadp, bp, b_vnbufs); 1146 } 1147 } 1148 } else { 1149 bp->b_xflags |= B_VNCLEAN; 1150 TAILQ_INSERT_TAIL(&newvp->v_cleanblkhd, bp, b_vnbufs); 1151 if ((newvp->v_flag & VONWORKLST) && 1152 TAILQ_EMPTY(&newvp->v_dirtyblkhd)) { 1153 newvp->v_flag &= ~VONWORKLST; 1154 LIST_REMOVE(newvp, v_synclist); 1155 } 1156 } 1157 if (bp->b_vp != newvp) { 1158 bp->b_vp = newvp; 1159 vhold(bp->b_vp); 1160 } 1161 splx(s); 1162} 1163 1164/* 1165 * Create a vnode for a block device. 1166 * Used for mounting the root file system. 1167 */ 1168int 1169bdevvp(dev, vpp) 1170 dev_t dev; 1171 struct vnode **vpp; 1172{ 1173 register struct vnode *vp; 1174 struct vnode *nvp; 1175 int error; 1176 1177 /* XXX 255 is for mfs. */ 1178 if (dev == NODEV || (major(dev) != 255 && (major(dev) >= nblkdev || 1179 bdevsw[major(dev)] == NULL))) { 1180 *vpp = NULLVP; 1181 return (ENXIO); 1182 } 1183 error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp); 1184 if (error) { 1185 *vpp = NULLVP; 1186 return (error); 1187 } 1188 vp = nvp; 1189 vp->v_type = VBLK; 1190 if ((nvp = checkalias(vp, dev, (struct mount *)0)) != NULL) { 1191 vput(vp); 1192 vp = nvp; 1193 } 1194 *vpp = vp; 1195 return (0); 1196} 1197 1198/* 1199 * Check to see if the new vnode represents a special device 1200 * for which we already have a vnode (either because of 1201 * bdevvp() or because of a different vnode representing 1202 * the same block device). If such an alias exists, deallocate 1203 * the existing contents and return the aliased vnode. The 1204 * caller is responsible for filling it with its new contents. 1205 */ 1206struct vnode * 1207checkalias(nvp, nvp_rdev, mp) 1208 register struct vnode *nvp; 1209 dev_t nvp_rdev; 1210 struct mount *mp; 1211{ 1212 struct proc *p = curproc; /* XXX */ 1213 struct vnode *vp; 1214 struct vnode **vpp; 1215 int rmaj = major(nvp_rdev); 1216 1217 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 1218 return (NULLVP); 1219 1220 vpp = &speclisth[SPECHASH(nvp_rdev)]; 1221loop: 1222 simple_lock(&spechash_slock); 1223 for (vp = *vpp; vp; vp = vp->v_specnext) { 1224 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) 1225 continue; 1226 /* 1227 * Alias, but not in use, so flush it out. 1228 * Only alias active device nodes. 1229 * Not sure why we don't re-use this like we do below. 1230 */ 1231 simple_lock(&vp->v_interlock); 1232 if (vp->v_usecount == 0) { 1233 simple_unlock(&spechash_slock); 1234 vgonel(vp, p); 1235 goto loop; 1236 } 1237 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) { 1238 /* 1239 * It dissappeared, and we may have slept. 1240 * Restart from the beginning 1241 */ 1242 simple_unlock(&spechash_slock); 1243 goto loop; 1244 } 1245 break; 1246 } 1247 /* 1248 * It would be a lot clearer what is going on here if 1249 * this had been expressed as: 1250 * if ( vp && (vp->v_tag == VT_NULL)) 1251 * and the clauses had been swapped. 1252 */ 1253 if (vp == NULL || vp->v_tag != VT_NON) { 1254 struct specinfo *sinfo; 1255 1256 /* 1257 * Put the new vnode into the hash chain. 1258 * and if there was an alias, connect them. 1259 */ 1260 MALLOC(sinfo, struct specinfo *, 1261 sizeof(struct specinfo), M_VNODE, M_WAITOK); 1262 bzero(sinfo, sizeof(struct specinfo)); 1263 nvp->v_specinfo = sinfo; 1264 sinfo->si_rdev = nvp_rdev; 1265 sinfo->si_hashchain = vpp; 1266 sinfo->si_specnext = *vpp; 1267 sinfo->si_bsize_phys = DEV_BSIZE; 1268 sinfo->si_bsize_best = BLKDEV_IOSIZE; 1269 sinfo->si_bsize_max = MAXBSIZE; 1270 1271 /* 1272 * Ask the device to fix up specinfo. Typically the 1273 * si_bsize_* parameters may need fixing up. 1274 */ 1275 1276 if (nvp->v_type == VBLK && rmaj < nblkdev) { 1277 if (bdevsw[rmaj] && bdevsw[rmaj]->d_parms) 1278 1279 (*bdevsw[rmaj]->d_parms)(nvp_rdev, sinfo, DPARM_GET); 1280 } else if (nvp->v_type == VCHR && rmaj < nchrdev) { 1281 if (cdevsw[rmaj] && cdevsw[rmaj]->d_parms) 1282 (*cdevsw[rmaj]->d_parms)(nvp_rdev, sinfo, DPARM_GET); 1283 } 1284 1285 simple_unlock(&spechash_slock); 1286 *vpp = nvp; 1287 if (vp != NULLVP) { 1288 nvp->v_flag |= VALIASED; 1289 vp->v_flag |= VALIASED; 1290 vput(vp); 1291 } 1292 return (NULLVP); 1293 } 1294 /* 1295 * if ( vp && (vp->v_tag == VT_NULL)) 1296 * We have a vnode alias, but it is a trashed. 1297 * Make it look like it's newley allocated. (by getnewvnode()) 1298 * The caller should use this instead. 1299 */ 1300 simple_unlock(&spechash_slock); 1301 VOP_UNLOCK(vp, 0, p); 1302 simple_lock(&vp->v_interlock); 1303 vclean(vp, 0, p); 1304 vp->v_op = nvp->v_op; 1305 vp->v_tag = nvp->v_tag; 1306 nvp->v_type = VNON; 1307 insmntque(vp, mp); 1308 return (vp); 1309} 1310 1311/* 1312 * Grab a particular vnode from the free list, increment its 1313 * reference count and lock it. The vnode lock bit is set the 1314 * vnode is being eliminated in vgone. The process is awakened 1315 * when the transition is completed, and an error returned to 1316 * indicate that the vnode is no longer usable (possibly having 1317 * been changed to a new file system type). 1318 */ 1319int 1320vget(vp, flags, p) 1321 register struct vnode *vp; 1322 int flags; 1323 struct proc *p; 1324{ 1325 int error; 1326 1327 /* 1328 * If the vnode is in the process of being cleaned out for 1329 * another use, we wait for the cleaning to finish and then 1330 * return failure. Cleaning is determined by checking that 1331 * the VXLOCK flag is set. 1332 */ 1333 if ((flags & LK_INTERLOCK) == 0) { 1334 simple_lock(&vp->v_interlock); 1335 } 1336 if (vp->v_flag & VXLOCK) { 1337 vp->v_flag |= VXWANT; 1338 simple_unlock(&vp->v_interlock); 1339 tsleep((caddr_t)vp, PINOD, "vget", 0); 1340 return (ENOENT); 1341 } 1342 1343 vp->v_usecount++; 1344 1345 if (VSHOULDBUSY(vp)) 1346 vbusy(vp); 1347 if (flags & LK_TYPE_MASK) { 1348 if ((error = vn_lock(vp, flags | LK_INTERLOCK, p)) != 0) { 1349 /* 1350 * must expand vrele here because we do not want 1351 * to call VOP_INACTIVE if the reference count 1352 * drops back to zero since it was never really 1353 * active. We must remove it from the free list 1354 * before sleeping so that multiple processes do 1355 * not try to recycle it. 1356 */ 1357 simple_lock(&vp->v_interlock); 1358 vp->v_usecount--; 1359 if (VSHOULDFREE(vp)) 1360 vfree(vp); 1361 simple_unlock(&vp->v_interlock); 1362 } 1363 return (error); 1364 } 1365 simple_unlock(&vp->v_interlock); 1366 return (0); 1367} 1368 1369void 1370vref(struct vnode *vp) 1371{ 1372 simple_lock(&vp->v_interlock); 1373 vp->v_usecount++; 1374 simple_unlock(&vp->v_interlock); 1375} 1376 1377/* 1378 * Vnode put/release. 1379 * If count drops to zero, call inactive routine and return to freelist. 1380 */ 1381void 1382vrele(vp) 1383 struct vnode *vp; 1384{ 1385 struct proc *p = curproc; /* XXX */ 1386 1387 KASSERT(vp != NULL, ("vrele: null vp")); 1388 1389 simple_lock(&vp->v_interlock); 1390 1391 if (vp->v_usecount > 1) { 1392 1393 vp->v_usecount--; 1394 simple_unlock(&vp->v_interlock); 1395 1396 return; 1397 } 1398 1399 if (vp->v_usecount == 1) { 1400 1401 vp->v_usecount--; 1402 if (VSHOULDFREE(vp)) 1403 vfree(vp); 1404 /* 1405 * If we are doing a vput, the node is already locked, and we must 1406 * call VOP_INACTIVE with the node locked. So, in the case of 1407 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE. 1408 */ 1409 if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) { 1410 VOP_INACTIVE(vp, p); 1411 } 1412 1413 } else { 1414#ifdef DIAGNOSTIC 1415 vprint("vrele: negative ref count", vp); 1416 simple_unlock(&vp->v_interlock); 1417#endif 1418 panic("vrele: negative ref cnt"); 1419 } 1420} 1421 1422void 1423vput(vp) 1424 struct vnode *vp; 1425{ 1426 struct proc *p = curproc; /* XXX */ 1427 1428 KASSERT(vp != NULL, ("vput: null vp")); 1429 1430 simple_lock(&vp->v_interlock); 1431 1432 if (vp->v_usecount > 1) { 1433 1434 vp->v_usecount--; 1435 VOP_UNLOCK(vp, LK_INTERLOCK, p); 1436 return; 1437 1438 } 1439 1440 if (vp->v_usecount == 1) { 1441 1442 vp->v_usecount--; 1443 if (VSHOULDFREE(vp)) 1444 vfree(vp); 1445 /* 1446 * If we are doing a vput, the node is already locked, and we must 1447 * call VOP_INACTIVE with the node locked. So, in the case of 1448 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE. 1449 */ 1450 simple_unlock(&vp->v_interlock); 1451 VOP_INACTIVE(vp, p); 1452 1453 } else { 1454#ifdef DIAGNOSTIC 1455 vprint("vput: negative ref count", vp); 1456#endif 1457 panic("vput: negative ref cnt"); 1458 } 1459} 1460 1461/* 1462 * Somebody doesn't want the vnode recycled. 1463 */ 1464void 1465vhold(vp) 1466 register struct vnode *vp; 1467{ 1468 int s; 1469 1470 s = splbio(); 1471 vp->v_holdcnt++; 1472 if (VSHOULDBUSY(vp)) 1473 vbusy(vp); 1474 splx(s); 1475} 1476 1477/* 1478 * One less who cares about this vnode. 1479 */ 1480void 1481vdrop(vp) 1482 register struct vnode *vp; 1483{ 1484 int s; 1485 1486 s = splbio(); 1487 if (vp->v_holdcnt <= 0) 1488 panic("vdrop: holdcnt"); 1489 vp->v_holdcnt--; 1490 if (VSHOULDFREE(vp)) 1491 vfree(vp); 1492 splx(s); 1493} 1494 1495/* 1496 * Remove any vnodes in the vnode table belonging to mount point mp. 1497 * 1498 * If MNT_NOFORCE is specified, there should not be any active ones, 1499 * return error if any are found (nb: this is a user error, not a 1500 * system error). If MNT_FORCE is specified, detach any active vnodes 1501 * that are found. 1502 */ 1503#ifdef DIAGNOSTIC 1504static int busyprt = 0; /* print out busy vnodes */ 1505SYSCTL_INT(_debug, OID_AUTO, busyprt, CTLFLAG_RW, &busyprt, 0, ""); 1506#endif 1507 1508int 1509vflush(mp, skipvp, flags) 1510 struct mount *mp; 1511 struct vnode *skipvp; 1512 int flags; 1513{ 1514 struct proc *p = curproc; /* XXX */ 1515 struct vnode *vp, *nvp; 1516 int busy = 0; 1517 1518 simple_lock(&mntvnode_slock); 1519loop: 1520 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { 1521 /* 1522 * Make sure this vnode wasn't reclaimed in getnewvnode(). 1523 * Start over if it has (it won't be on the list anymore). 1524 */ 1525 if (vp->v_mount != mp) 1526 goto loop; 1527 nvp = vp->v_mntvnodes.le_next; 1528 /* 1529 * Skip over a selected vnode. 1530 */ 1531 if (vp == skipvp) 1532 continue; 1533 1534 simple_lock(&vp->v_interlock); 1535 /* 1536 * Skip over a vnodes marked VSYSTEM. 1537 */ 1538 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { 1539 simple_unlock(&vp->v_interlock); 1540 continue; 1541 } 1542 /* 1543 * If WRITECLOSE is set, only flush out regular file vnodes 1544 * open for writing. 1545 */ 1546 if ((flags & WRITECLOSE) && 1547 (vp->v_writecount == 0 || vp->v_type != VREG)) { 1548 simple_unlock(&vp->v_interlock); 1549 continue; 1550 } 1551 1552 /* 1553 * With v_usecount == 0, all we need to do is clear out the 1554 * vnode data structures and we are done. 1555 */ 1556 if (vp->v_usecount == 0) { 1557 simple_unlock(&mntvnode_slock); 1558 vgonel(vp, p); 1559 simple_lock(&mntvnode_slock); 1560 continue; 1561 } 1562 1563 /* 1564 * If FORCECLOSE is set, forcibly close the vnode. For block 1565 * or character devices, revert to an anonymous device. For 1566 * all other files, just kill them. 1567 */ 1568 if (flags & FORCECLOSE) { 1569 simple_unlock(&mntvnode_slock); 1570 if (vp->v_type != VBLK && vp->v_type != VCHR) { 1571 vgonel(vp, p); 1572 } else { 1573 vclean(vp, 0, p); 1574 vp->v_op = spec_vnodeop_p; 1575 insmntque(vp, (struct mount *) 0); 1576 } 1577 simple_lock(&mntvnode_slock); 1578 continue; 1579 } 1580#ifdef DIAGNOSTIC 1581 if (busyprt) 1582 vprint("vflush: busy vnode", vp); 1583#endif 1584 simple_unlock(&vp->v_interlock); 1585 busy++; 1586 } 1587 simple_unlock(&mntvnode_slock); 1588 if (busy) 1589 return (EBUSY); 1590 return (0); 1591} 1592 1593/* 1594 * Disassociate the underlying file system from a vnode. 1595 */ 1596static void 1597vclean(vp, flags, p) 1598 struct vnode *vp; 1599 int flags; 1600 struct proc *p; 1601{ 1602 int active; 1603 vm_object_t obj; 1604 1605 /* 1606 * Check to see if the vnode is in use. If so we have to reference it 1607 * before we clean it out so that its count cannot fall to zero and 1608 * generate a race against ourselves to recycle it. 1609 */ 1610 if ((active = vp->v_usecount)) 1611 vp->v_usecount++; 1612 1613 /* 1614 * Prevent the vnode from being recycled or brought into use while we 1615 * clean it out. 1616 */ 1617 if (vp->v_flag & VXLOCK) 1618 panic("vclean: deadlock"); 1619 vp->v_flag |= VXLOCK; 1620 /* 1621 * Even if the count is zero, the VOP_INACTIVE routine may still 1622 * have the object locked while it cleans it out. The VOP_LOCK 1623 * ensures that the VOP_INACTIVE routine is done with its work. 1624 * For active vnodes, it ensures that no other activity can 1625 * occur while the underlying object is being cleaned out. 1626 */ 1627 VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p); 1628 1629 /* 1630 * Clean out any buffers associated with the vnode. 1631 */ 1632 vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0); 1633 if ((obj = vp->v_object) != NULL) { 1634 if (obj->ref_count == 0) { 1635 /* 1636 * This is a normal way of shutting down the object/vnode 1637 * association. 1638 */ 1639 vm_object_terminate(obj); 1640 } else { 1641 /* 1642 * Woe to the process that tries to page now :-). 1643 */ 1644 vm_pager_deallocate(obj); 1645 } 1646 } 1647 1648 /* 1649 * If purging an active vnode, it must be closed and 1650 * deactivated before being reclaimed. Note that the 1651 * VOP_INACTIVE will unlock the vnode. 1652 */ 1653 if (active) { 1654 if (flags & DOCLOSE) 1655 VOP_CLOSE(vp, FNONBLOCK, NOCRED, p); 1656 VOP_INACTIVE(vp, p); 1657 } else { 1658 /* 1659 * Any other processes trying to obtain this lock must first 1660 * wait for VXLOCK to clear, then call the new lock operation. 1661 */ 1662 VOP_UNLOCK(vp, 0, p); 1663 } 1664 /* 1665 * Reclaim the vnode. 1666 */ 1667 if (VOP_RECLAIM(vp, p)) 1668 panic("vclean: cannot reclaim"); 1669 1670 if (active) 1671 vrele(vp); 1672 1673 cache_purge(vp); 1674 if (vp->v_vnlock) { 1675#if 0 /* This is the only place we have LK_DRAINED in the entire kernel ??? */ 1676#ifdef DIAGNOSTIC 1677 if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0) 1678 vprint("vclean: lock not drained", vp); 1679#endif 1680#endif 1681 FREE(vp->v_vnlock, M_VNODE); 1682 vp->v_vnlock = NULL; 1683 } 1684 1685 if (VSHOULDFREE(vp)) 1686 vfree(vp); 1687 1688 /* 1689 * Done with purge, notify sleepers of the grim news. 1690 */ 1691 vp->v_op = dead_vnodeop_p; 1692 vn_pollgone(vp); 1693 vp->v_tag = VT_NON; 1694 vp->v_flag &= ~VXLOCK; 1695 if (vp->v_flag & VXWANT) { 1696 vp->v_flag &= ~VXWANT; 1697 wakeup((caddr_t) vp); 1698 } 1699} 1700 1701/* 1702 * Eliminate all activity associated with the requested vnode 1703 * and with all vnodes aliased to the requested vnode. 1704 */ 1705int 1706vop_revoke(ap) 1707 struct vop_revoke_args /* { 1708 struct vnode *a_vp; 1709 int a_flags; 1710 } */ *ap; 1711{ 1712 struct vnode *vp, *vq; 1713 struct proc *p = curproc; /* XXX */ 1714 1715 KASSERT((ap->a_flags & REVOKEALL) != 0, ("vop_revoke")); 1716 1717 vp = ap->a_vp; 1718 simple_lock(&vp->v_interlock); 1719 1720 if (vp->v_flag & VALIASED) { 1721 /* 1722 * If a vgone (or vclean) is already in progress, 1723 * wait until it is done and return. 1724 */ 1725 if (vp->v_flag & VXLOCK) { 1726 vp->v_flag |= VXWANT; 1727 simple_unlock(&vp->v_interlock); 1728 tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0); 1729 return (0); 1730 } 1731 /* 1732 * Ensure that vp will not be vgone'd while we 1733 * are eliminating its aliases. 1734 */ 1735 vp->v_flag |= VXLOCK; 1736 simple_unlock(&vp->v_interlock); 1737 while (vp->v_flag & VALIASED) { 1738 simple_lock(&spechash_slock); 1739 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1740 if (vq->v_rdev != vp->v_rdev || 1741 vq->v_type != vp->v_type || vp == vq) 1742 continue; 1743 simple_unlock(&spechash_slock); 1744 vgone(vq); 1745 break; 1746 } 1747 if (vq == NULLVP) { 1748 simple_unlock(&spechash_slock); 1749 } 1750 } 1751 /* 1752 * Remove the lock so that vgone below will 1753 * really eliminate the vnode after which time 1754 * vgone will awaken any sleepers. 1755 */ 1756 simple_lock(&vp->v_interlock); 1757 vp->v_flag &= ~VXLOCK; 1758 if (vp->v_flag & VXWANT) { 1759 vp->v_flag &= ~VXWANT; 1760 wakeup(vp); 1761 } 1762 } 1763 vgonel(vp, p); 1764 return (0); 1765} 1766 1767/* 1768 * Recycle an unused vnode to the front of the free list. 1769 * Release the passed interlock if the vnode will be recycled. 1770 */ 1771int 1772vrecycle(vp, inter_lkp, p) 1773 struct vnode *vp; 1774 struct simplelock *inter_lkp; 1775 struct proc *p; 1776{ 1777 1778 simple_lock(&vp->v_interlock); 1779 if (vp->v_usecount == 0) { 1780 if (inter_lkp) { 1781 simple_unlock(inter_lkp); 1782 } 1783 vgonel(vp, p); 1784 return (1); 1785 } 1786 simple_unlock(&vp->v_interlock); 1787 return (0); 1788} 1789 1790/* 1791 * Eliminate all activity associated with a vnode 1792 * in preparation for reuse. 1793 */ 1794void 1795vgone(vp) 1796 register struct vnode *vp; 1797{ 1798 struct proc *p = curproc; /* XXX */ 1799 1800 simple_lock(&vp->v_interlock); 1801 vgonel(vp, p); 1802} 1803 1804/* 1805 * vgone, with the vp interlock held. 1806 */ 1807static void 1808vgonel(vp, p) 1809 struct vnode *vp; 1810 struct proc *p; 1811{ 1812 int s; 1813 struct vnode *vq; 1814 struct vnode *vx; 1815 1816 /* 1817 * If a vgone (or vclean) is already in progress, 1818 * wait until it is done and return. 1819 */ 1820 if (vp->v_flag & VXLOCK) { 1821 vp->v_flag |= VXWANT; 1822 simple_unlock(&vp->v_interlock); 1823 tsleep((caddr_t)vp, PINOD, "vgone", 0); 1824 return; 1825 } 1826 1827 /* 1828 * Clean out the filesystem specific data. 1829 */ 1830 vclean(vp, DOCLOSE, p); 1831 simple_lock(&vp->v_interlock); 1832 1833 /* 1834 * Delete from old mount point vnode list, if on one. 1835 */ 1836 if (vp->v_mount != NULL) 1837 insmntque(vp, (struct mount *)0); 1838 /* 1839 * If special device, remove it from special device alias list 1840 * if it is on one. 1841 */ 1842 if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) { 1843 simple_lock(&spechash_slock); 1844 if (*vp->v_hashchain == vp) { 1845 *vp->v_hashchain = vp->v_specnext; 1846 } else { 1847 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1848 if (vq->v_specnext != vp) 1849 continue; 1850 vq->v_specnext = vp->v_specnext; 1851 break; 1852 } 1853 if (vq == NULL) 1854 panic("missing bdev"); 1855 } 1856 if (vp->v_flag & VALIASED) { 1857 vx = NULL; 1858 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1859 if (vq->v_rdev != vp->v_rdev || 1860 vq->v_type != vp->v_type) 1861 continue; 1862 if (vx) 1863 break; 1864 vx = vq; 1865 } 1866 if (vx == NULL) 1867 panic("missing alias"); 1868 if (vq == NULL) 1869 vx->v_flag &= ~VALIASED; 1870 vp->v_flag &= ~VALIASED; 1871 } 1872 simple_unlock(&spechash_slock); 1873 FREE(vp->v_specinfo, M_VNODE); 1874 vp->v_specinfo = NULL; 1875 } 1876 1877 /* 1878 * If it is on the freelist and not already at the head, 1879 * move it to the head of the list. The test of the back 1880 * pointer and the reference count of zero is because 1881 * it will be removed from the free list by getnewvnode, 1882 * but will not have its reference count incremented until 1883 * after calling vgone. If the reference count were 1884 * incremented first, vgone would (incorrectly) try to 1885 * close the previous instance of the underlying object. 1886 */ 1887 if (vp->v_usecount == 0 && !(vp->v_flag & VDOOMED)) { 1888 s = splbio(); 1889 simple_lock(&vnode_free_list_slock); 1890 if (vp->v_flag & VFREE) { 1891 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1892 } else if (vp->v_flag & VTBFREE) { 1893 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 1894 vp->v_flag &= ~VTBFREE; 1895 freevnodes++; 1896 } else 1897 freevnodes++; 1898 vp->v_flag |= VFREE; 1899 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 1900 simple_unlock(&vnode_free_list_slock); 1901 splx(s); 1902 } 1903 1904 vp->v_type = VBAD; 1905 simple_unlock(&vp->v_interlock); 1906} 1907 1908/* 1909 * Lookup a vnode by device number. 1910 */ 1911int 1912vfinddev(dev, type, vpp) 1913 dev_t dev; 1914 enum vtype type; 1915 struct vnode **vpp; 1916{ 1917 register struct vnode *vp; 1918 int rc = 0; 1919 1920 simple_lock(&spechash_slock); 1921 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 1922 if (dev != vp->v_rdev || type != vp->v_type) 1923 continue; 1924 *vpp = vp; 1925 rc = 1; 1926 break; 1927 } 1928 simple_unlock(&spechash_slock); 1929 return (rc); 1930} 1931 1932/* 1933 * Calculate the total number of references to a special device. 1934 */ 1935int 1936vcount(vp) 1937 register struct vnode *vp; 1938{ 1939 struct vnode *vq, *vnext; 1940 int count; 1941 1942loop: 1943 if ((vp->v_flag & VALIASED) == 0) 1944 return (vp->v_usecount); 1945 simple_lock(&spechash_slock); 1946 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { 1947 vnext = vq->v_specnext; 1948 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 1949 continue; 1950 /* 1951 * Alias, but not in use, so flush it out. 1952 */ 1953 if (vq->v_usecount == 0 && vq != vp) { 1954 simple_unlock(&spechash_slock); 1955 vgone(vq); 1956 goto loop; 1957 } 1958 count += vq->v_usecount; 1959 } 1960 simple_unlock(&spechash_slock); 1961 return (count); 1962} 1963/* 1964 * Print out a description of a vnode. 1965 */ 1966static char *typename[] = 1967{"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"}; 1968 1969void 1970vprint(label, vp) 1971 char *label; 1972 register struct vnode *vp; 1973{ 1974 char buf[96]; 1975 1976 if (label != NULL) 1977 printf("%s: %p: ", label, (void *)vp); 1978 else 1979 printf("%p: ", (void *)vp); 1980 printf("type %s, usecount %d, writecount %d, refcount %d,", 1981 typename[vp->v_type], vp->v_usecount, vp->v_writecount, 1982 vp->v_holdcnt); 1983 buf[0] = '\0'; 1984 if (vp->v_flag & VROOT) 1985 strcat(buf, "|VROOT"); 1986 if (vp->v_flag & VTEXT) 1987 strcat(buf, "|VTEXT"); 1988 if (vp->v_flag & VSYSTEM) 1989 strcat(buf, "|VSYSTEM"); 1990 if (vp->v_flag & VXLOCK) 1991 strcat(buf, "|VXLOCK"); 1992 if (vp->v_flag & VXWANT) 1993 strcat(buf, "|VXWANT"); 1994 if (vp->v_flag & VBWAIT) 1995 strcat(buf, "|VBWAIT"); 1996 if (vp->v_flag & VALIASED) 1997 strcat(buf, "|VALIASED"); 1998 if (vp->v_flag & VDOOMED) 1999 strcat(buf, "|VDOOMED"); 2000 if (vp->v_flag & VFREE) 2001 strcat(buf, "|VFREE"); 2002 if (vp->v_flag & VOBJBUF) 2003 strcat(buf, "|VOBJBUF"); 2004 if (buf[0] != '\0') 2005 printf(" flags (%s)", &buf[1]); 2006 if (vp->v_data == NULL) { 2007 printf("\n"); 2008 } else { 2009 printf("\n\t"); 2010 VOP_PRINT(vp); 2011 } 2012} 2013 2014#ifdef DDB 2015#include <ddb/ddb.h> 2016/* 2017 * List all of the locked vnodes in the system. 2018 * Called when debugging the kernel. 2019 */ 2020DB_SHOW_COMMAND(lockedvnodes, lockedvnodes) 2021{ 2022 struct proc *p = curproc; /* XXX */ 2023 struct mount *mp, *nmp; 2024 struct vnode *vp; 2025 2026 printf("Locked vnodes\n"); 2027 simple_lock(&mountlist_slock); 2028 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 2029 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { 2030 nmp = mp->mnt_list.cqe_next; 2031 continue; 2032 } 2033 for (vp = mp->mnt_vnodelist.lh_first; 2034 vp != NULL; 2035 vp = vp->v_mntvnodes.le_next) { 2036 if (VOP_ISLOCKED(vp)) 2037 vprint((char *)0, vp); 2038 } 2039 simple_lock(&mountlist_slock); 2040 nmp = mp->mnt_list.cqe_next; 2041 vfs_unbusy(mp, p); 2042 } 2043 simple_unlock(&mountlist_slock); 2044} 2045#endif 2046 2047/* 2048 * Top level filesystem related information gathering. 2049 */ 2050static int sysctl_ovfs_conf __P(SYSCTL_HANDLER_ARGS); 2051 2052static int 2053vfs_sysctl SYSCTL_HANDLER_ARGS 2054{ 2055 int *name = (int *)arg1 - 1; /* XXX */ 2056 u_int namelen = arg2 + 1; /* XXX */ 2057 struct vfsconf *vfsp; 2058 2059#if 1 || defined(COMPAT_PRELITE2) 2060 /* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. */ 2061 if (namelen == 1) 2062 return (sysctl_ovfs_conf(oidp, arg1, arg2, req)); 2063#endif 2064 2065#ifdef notyet 2066 /* all sysctl names at this level are at least name and field */ 2067 if (namelen < 2) 2068 return (ENOTDIR); /* overloaded */ 2069 if (name[0] != VFS_GENERIC) { 2070 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 2071 if (vfsp->vfc_typenum == name[0]) 2072 break; 2073 if (vfsp == NULL) 2074 return (EOPNOTSUPP); 2075 return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1, 2076 oldp, oldlenp, newp, newlen, p)); 2077 } 2078#endif 2079 switch (name[1]) { 2080 case VFS_MAXTYPENUM: 2081 if (namelen != 2) 2082 return (ENOTDIR); 2083 return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int))); 2084 case VFS_CONF: 2085 if (namelen != 3) 2086 return (ENOTDIR); /* overloaded */ 2087 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 2088 if (vfsp->vfc_typenum == name[2]) 2089 break; 2090 if (vfsp == NULL) 2091 return (EOPNOTSUPP); 2092 return (SYSCTL_OUT(req, vfsp, sizeof *vfsp)); 2093 } 2094 return (EOPNOTSUPP); 2095} 2096 2097SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD, vfs_sysctl, 2098 "Generic filesystem"); 2099 2100#if 1 || defined(COMPAT_PRELITE2) 2101 2102static int 2103sysctl_ovfs_conf SYSCTL_HANDLER_ARGS 2104{ 2105 int error; 2106 struct vfsconf *vfsp; 2107 struct ovfsconf ovfs; 2108 2109 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 2110 ovfs.vfc_vfsops = vfsp->vfc_vfsops; /* XXX used as flag */ 2111 strcpy(ovfs.vfc_name, vfsp->vfc_name); 2112 ovfs.vfc_index = vfsp->vfc_typenum; 2113 ovfs.vfc_refcount = vfsp->vfc_refcount; 2114 ovfs.vfc_flags = vfsp->vfc_flags; 2115 error = SYSCTL_OUT(req, &ovfs, sizeof ovfs); 2116 if (error) 2117 return error; 2118 } 2119 return 0; 2120} 2121 2122#endif /* 1 || COMPAT_PRELITE2 */ 2123 2124#if 0 2125#define KINFO_VNODESLOP 10 2126/* 2127 * Dump vnode list (via sysctl). 2128 * Copyout address of vnode followed by vnode. 2129 */ 2130/* ARGSUSED */ 2131static int 2132sysctl_vnode SYSCTL_HANDLER_ARGS 2133{ 2134 struct proc *p = curproc; /* XXX */ 2135 struct mount *mp, *nmp; 2136 struct vnode *nvp, *vp; 2137 int error; 2138 2139#define VPTRSZ sizeof (struct vnode *) 2140#define VNODESZ sizeof (struct vnode) 2141 2142 req->lock = 0; 2143 if (!req->oldptr) /* Make an estimate */ 2144 return (SYSCTL_OUT(req, 0, 2145 (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ))); 2146 2147 simple_lock(&mountlist_slock); 2148 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 2149 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { 2150 nmp = mp->mnt_list.cqe_next; 2151 continue; 2152 } 2153again: 2154 simple_lock(&mntvnode_slock); 2155 for (vp = mp->mnt_vnodelist.lh_first; 2156 vp != NULL; 2157 vp = nvp) { 2158 /* 2159 * Check that the vp is still associated with 2160 * this filesystem. RACE: could have been 2161 * recycled onto the same filesystem. 2162 */ 2163 if (vp->v_mount != mp) { 2164 simple_unlock(&mntvnode_slock); 2165 goto again; 2166 } 2167 nvp = vp->v_mntvnodes.le_next; 2168 simple_unlock(&mntvnode_slock); 2169 if ((error = SYSCTL_OUT(req, &vp, VPTRSZ)) || 2170 (error = SYSCTL_OUT(req, vp, VNODESZ))) 2171 return (error); 2172 simple_lock(&mntvnode_slock); 2173 } 2174 simple_unlock(&mntvnode_slock); 2175 simple_lock(&mountlist_slock); 2176 nmp = mp->mnt_list.cqe_next; 2177 vfs_unbusy(mp, p); 2178 } 2179 simple_unlock(&mountlist_slock); 2180 2181 return (0); 2182} 2183#endif 2184 2185/* 2186 * XXX 2187 * Exporting the vnode list on large systems causes them to crash. 2188 * Exporting the vnode list on medium systems causes sysctl to coredump. 2189 */ 2190#if 0 2191SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE|CTLFLAG_RD, 2192 0, 0, sysctl_vnode, "S,vnode", ""); 2193#endif 2194 2195/* 2196 * Check to see if a filesystem is mounted on a block device. 2197 */ 2198int 2199vfs_mountedon(vp) 2200 struct vnode *vp; 2201{ 2202 struct vnode *vq; 2203 int error = 0; 2204 2205 if (vp->v_specmountpoint != NULL) 2206 return (EBUSY); 2207 if (vp->v_flag & VALIASED) { 2208 simple_lock(&spechash_slock); 2209 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 2210 if (vq->v_rdev != vp->v_rdev || 2211 vq->v_type != vp->v_type) 2212 continue; 2213 if (vq->v_specmountpoint != NULL) { 2214 error = EBUSY; 2215 break; 2216 } 2217 } 2218 simple_unlock(&spechash_slock); 2219 } 2220 return (error); 2221} 2222 2223/* 2224 * Unmount all filesystems. The list is traversed in reverse order 2225 * of mounting to avoid dependencies. 2226 */ 2227void 2228vfs_unmountall() 2229{ 2230 struct mount *mp, *nmp; 2231 struct proc *p; 2232 int error; 2233 2234 if (curproc != NULL) 2235 p = curproc; 2236 else 2237 p = initproc; /* XXX XXX should this be proc0? */ 2238 /* 2239 * Since this only runs when rebooting, it is not interlocked. 2240 */ 2241 for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { 2242 nmp = mp->mnt_list.cqe_prev; 2243 error = dounmount(mp, MNT_FORCE, p); 2244 if (error) { 2245 printf("unmount of %s failed (", 2246 mp->mnt_stat.f_mntonname); 2247 if (error == EBUSY) 2248 printf("BUSY)\n"); 2249 else 2250 printf("%d)\n", error); 2251 } 2252 } 2253} 2254 2255/* 2256 * Build hash lists of net addresses and hang them off the mount point. 2257 * Called by ufs_mount() to set up the lists of export addresses. 2258 */ 2259static int 2260vfs_hang_addrlist(mp, nep, argp) 2261 struct mount *mp; 2262 struct netexport *nep; 2263 struct export_args *argp; 2264{ 2265 register struct netcred *np; 2266 register struct radix_node_head *rnh; 2267 register int i; 2268 struct radix_node *rn; 2269 struct sockaddr *saddr, *smask = 0; 2270 struct domain *dom; 2271 int error; 2272 2273 if (argp->ex_addrlen == 0) { 2274 if (mp->mnt_flag & MNT_DEFEXPORTED) 2275 return (EPERM); 2276 np = &nep->ne_defexported; 2277 np->netc_exflags = argp->ex_flags; 2278 np->netc_anon = argp->ex_anon; 2279 np->netc_anon.cr_ref = 1; 2280 mp->mnt_flag |= MNT_DEFEXPORTED; 2281 return (0); 2282 } 2283 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 2284 np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK); 2285 bzero((caddr_t) np, i); 2286 saddr = (struct sockaddr *) (np + 1); 2287 if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen))) 2288 goto out; 2289 if (saddr->sa_len > argp->ex_addrlen) 2290 saddr->sa_len = argp->ex_addrlen; 2291 if (argp->ex_masklen) { 2292 smask = (struct sockaddr *) ((caddr_t) saddr + argp->ex_addrlen); 2293 error = copyin(argp->ex_mask, (caddr_t) smask, argp->ex_masklen); 2294 if (error) 2295 goto out; 2296 if (smask->sa_len > argp->ex_masklen) 2297 smask->sa_len = argp->ex_masklen; 2298 } 2299 i = saddr->sa_family; 2300 if ((rnh = nep->ne_rtable[i]) == 0) { 2301 /* 2302 * Seems silly to initialize every AF when most are not used, 2303 * do so on demand here 2304 */ 2305 for (dom = domains; dom; dom = dom->dom_next) 2306 if (dom->dom_family == i && dom->dom_rtattach) { 2307 dom->dom_rtattach((void **) &nep->ne_rtable[i], 2308 dom->dom_rtoffset); 2309 break; 2310 } 2311 if ((rnh = nep->ne_rtable[i]) == 0) { 2312 error = ENOBUFS; 2313 goto out; 2314 } 2315 } 2316 rn = (*rnh->rnh_addaddr) ((caddr_t) saddr, (caddr_t) smask, rnh, 2317 np->netc_rnodes); 2318 if (rn == 0 || np != (struct netcred *) rn) { /* already exists */ 2319 error = EPERM; 2320 goto out; 2321 } 2322 np->netc_exflags = argp->ex_flags; 2323 np->netc_anon = argp->ex_anon; 2324 np->netc_anon.cr_ref = 1; 2325 return (0); 2326out: 2327 free(np, M_NETADDR); 2328 return (error); 2329} 2330 2331/* ARGSUSED */ 2332static int 2333vfs_free_netcred(rn, w) 2334 struct radix_node *rn; 2335 void *w; 2336{ 2337 register struct radix_node_head *rnh = (struct radix_node_head *) w; 2338 2339 (*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh); 2340 free((caddr_t) rn, M_NETADDR); 2341 return (0); 2342} 2343 2344/* 2345 * Free the net address hash lists that are hanging off the mount points. 2346 */ 2347static void 2348vfs_free_addrlist(nep) 2349 struct netexport *nep; 2350{ 2351 register int i; 2352 register struct radix_node_head *rnh; 2353 2354 for (i = 0; i <= AF_MAX; i++) 2355 if ((rnh = nep->ne_rtable[i])) { 2356 (*rnh->rnh_walktree) (rnh, vfs_free_netcred, 2357 (caddr_t) rnh); 2358 free((caddr_t) rnh, M_RTABLE); 2359 nep->ne_rtable[i] = 0; 2360 } 2361} 2362 2363int 2364vfs_export(mp, nep, argp) 2365 struct mount *mp; 2366 struct netexport *nep; 2367 struct export_args *argp; 2368{ 2369 int error; 2370 2371 if (argp->ex_flags & MNT_DELEXPORT) { 2372 if (mp->mnt_flag & MNT_EXPUBLIC) { 2373 vfs_setpublicfs(NULL, NULL, NULL); 2374 mp->mnt_flag &= ~MNT_EXPUBLIC; 2375 } 2376 vfs_free_addrlist(nep); 2377 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 2378 } 2379 if (argp->ex_flags & MNT_EXPORTED) { 2380 if (argp->ex_flags & MNT_EXPUBLIC) { 2381 if ((error = vfs_setpublicfs(mp, nep, argp)) != 0) 2382 return (error); 2383 mp->mnt_flag |= MNT_EXPUBLIC; 2384 } 2385 if ((error = vfs_hang_addrlist(mp, nep, argp))) 2386 return (error); 2387 mp->mnt_flag |= MNT_EXPORTED; 2388 } 2389 return (0); 2390} 2391 2392 2393/* 2394 * Set the publicly exported filesystem (WebNFS). Currently, only 2395 * one public filesystem is possible in the spec (RFC 2054 and 2055) 2396 */ 2397int 2398vfs_setpublicfs(mp, nep, argp) 2399 struct mount *mp; 2400 struct netexport *nep; 2401 struct export_args *argp; 2402{ 2403 int error; 2404 struct vnode *rvp; 2405 char *cp; 2406 2407 /* 2408 * mp == NULL -> invalidate the current info, the FS is 2409 * no longer exported. May be called from either vfs_export 2410 * or unmount, so check if it hasn't already been done. 2411 */ 2412 if (mp == NULL) { 2413 if (nfs_pub.np_valid) { 2414 nfs_pub.np_valid = 0; 2415 if (nfs_pub.np_index != NULL) { 2416 FREE(nfs_pub.np_index, M_TEMP); 2417 nfs_pub.np_index = NULL; 2418 } 2419 } 2420 return (0); 2421 } 2422 2423 /* 2424 * Only one allowed at a time. 2425 */ 2426 if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount) 2427 return (EBUSY); 2428 2429 /* 2430 * Get real filehandle for root of exported FS. 2431 */ 2432 bzero((caddr_t)&nfs_pub.np_handle, sizeof(nfs_pub.np_handle)); 2433 nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid; 2434 2435 if ((error = VFS_ROOT(mp, &rvp))) 2436 return (error); 2437 2438 if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid))) 2439 return (error); 2440 2441 vput(rvp); 2442 2443 /* 2444 * If an indexfile was specified, pull it in. 2445 */ 2446 if (argp->ex_indexfile != NULL) { 2447 MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP, 2448 M_WAITOK); 2449 error = copyinstr(argp->ex_indexfile, nfs_pub.np_index, 2450 MAXNAMLEN, (size_t *)0); 2451 if (!error) { 2452 /* 2453 * Check for illegal filenames. 2454 */ 2455 for (cp = nfs_pub.np_index; *cp; cp++) { 2456 if (*cp == '/') { 2457 error = EINVAL; 2458 break; 2459 } 2460 } 2461 } 2462 if (error) { 2463 FREE(nfs_pub.np_index, M_TEMP); 2464 return (error); 2465 } 2466 } 2467 2468 nfs_pub.np_mount = mp; 2469 nfs_pub.np_valid = 1; 2470 return (0); 2471} 2472 2473struct netcred * 2474vfs_export_lookup(mp, nep, nam) 2475 register struct mount *mp; 2476 struct netexport *nep; 2477 struct sockaddr *nam; 2478{ 2479 register struct netcred *np; 2480 register struct radix_node_head *rnh; 2481 struct sockaddr *saddr; 2482 2483 np = NULL; 2484 if (mp->mnt_flag & MNT_EXPORTED) { 2485 /* 2486 * Lookup in the export list first. 2487 */ 2488 if (nam != NULL) { 2489 saddr = nam; 2490 rnh = nep->ne_rtable[saddr->sa_family]; 2491 if (rnh != NULL) { 2492 np = (struct netcred *) 2493 (*rnh->rnh_matchaddr)((caddr_t)saddr, 2494 rnh); 2495 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 2496 np = NULL; 2497 } 2498 } 2499 /* 2500 * If no address match, use the default if it exists. 2501 */ 2502 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 2503 np = &nep->ne_defexported; 2504 } 2505 return (np); 2506} 2507 2508/* 2509 * perform msync on all vnodes under a mount point 2510 * the mount point must be locked. 2511 */ 2512void 2513vfs_msync(struct mount *mp, int flags) { 2514 struct vnode *vp, *nvp; 2515 struct vm_object *obj; 2516 int anyio, tries; 2517 2518 tries = 5; 2519loop: 2520 anyio = 0; 2521 for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { 2522 2523 nvp = vp->v_mntvnodes.le_next; 2524 2525 if (vp->v_mount != mp) { 2526 goto loop; 2527 } 2528 2529 if (vp->v_flag & VXLOCK) /* XXX: what if MNT_WAIT? */ 2530 continue; 2531 2532 if (flags != MNT_WAIT) { 2533 obj = vp->v_object; 2534 if (obj == NULL || (obj->flags & OBJ_MIGHTBEDIRTY) == 0) 2535 continue; 2536 if (VOP_ISLOCKED(vp)) 2537 continue; 2538 } 2539 2540 simple_lock(&vp->v_interlock); 2541 if (vp->v_object && 2542 (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) { 2543 if (!vget(vp, 2544 LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY | LK_NOOBJ, curproc)) { 2545 if (vp->v_object) { 2546 vm_object_page_clean(vp->v_object, 0, 0, flags == MNT_WAIT ? OBJPC_SYNC : 0); 2547 anyio = 1; 2548 } 2549 vput(vp); 2550 } 2551 } else { 2552 simple_unlock(&vp->v_interlock); 2553 } 2554 } 2555 if (anyio && (--tries > 0)) 2556 goto loop; 2557} 2558 2559/* 2560 * Create the VM object needed for VMIO and mmap support. This 2561 * is done for all VREG files in the system. Some filesystems might 2562 * afford the additional metadata buffering capability of the 2563 * VMIO code by making the device node be VMIO mode also. 2564 * 2565 * vp must be locked when vfs_object_create is called. 2566 */ 2567int 2568vfs_object_create(vp, p, cred) 2569 struct vnode *vp; 2570 struct proc *p; 2571 struct ucred *cred; 2572{ 2573 struct vattr vat; 2574 vm_object_t object; 2575 int error = 0; 2576 2577 if ((vp->v_type != VREG) && (vp->v_type != VBLK)) 2578 return 0; 2579 2580retry: 2581 if ((object = vp->v_object) == NULL) { 2582 if (vp->v_type == VREG) { 2583 if ((error = VOP_GETATTR(vp, &vat, cred, p)) != 0) 2584 goto retn; 2585 object = vnode_pager_alloc(vp, vat.va_size, 0, 0); 2586 } else if (major(vp->v_rdev) < nblkdev && 2587 bdevsw[major(vp->v_rdev)] != NULL) { 2588 /* 2589 * This simply allocates the biggest object possible 2590 * for a VBLK vnode. This should be fixed, but doesn't 2591 * cause any problems (yet). 2592 */ 2593 object = vnode_pager_alloc(vp, IDX_TO_OFF(INT_MAX), 0, 0); 2594 } else { 2595 goto retn; 2596 } 2597 /* 2598 * Dereference the reference we just created. This assumes 2599 * that the object is associated with the vp. 2600 */ 2601 object->ref_count--; 2602 vp->v_usecount--; 2603 } else { 2604 if (object->flags & OBJ_DEAD) { 2605 VOP_UNLOCK(vp, 0, p); 2606 tsleep(object, PVM, "vodead", 0); 2607 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 2608 goto retry; 2609 } 2610 } 2611 2612 KASSERT(vp->v_object != NULL, ("vfs_object_create: NULL object")); 2613 vp->v_flag |= VOBJBUF; 2614 2615retn: 2616 return error; 2617} 2618 2619static void 2620vfree(vp) 2621 struct vnode *vp; 2622{ 2623 int s; 2624 2625 s = splbio(); 2626 simple_lock(&vnode_free_list_slock); 2627 if (vp->v_flag & VTBFREE) { 2628 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 2629 vp->v_flag &= ~VTBFREE; 2630 } 2631 if (vp->v_flag & VAGE) { 2632 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 2633 } else { 2634 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 2635 } 2636 freevnodes++; 2637 simple_unlock(&vnode_free_list_slock); 2638 vp->v_flag &= ~VAGE; 2639 vp->v_flag |= VFREE; 2640 splx(s); 2641} 2642 2643void 2644vbusy(vp) 2645 struct vnode *vp; 2646{ 2647 int s; 2648 2649 s = splbio(); 2650 simple_lock(&vnode_free_list_slock); 2651 if (vp->v_flag & VTBFREE) { 2652 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 2653 vp->v_flag &= ~VTBFREE; 2654 } else { 2655 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 2656 freevnodes--; 2657 } 2658 simple_unlock(&vnode_free_list_slock); 2659 vp->v_flag &= ~(VFREE|VAGE); 2660 splx(s); 2661} 2662 2663/* 2664 * Record a process's interest in events which might happen to 2665 * a vnode. Because poll uses the historic select-style interface 2666 * internally, this routine serves as both the ``check for any 2667 * pending events'' and the ``record my interest in future events'' 2668 * functions. (These are done together, while the lock is held, 2669 * to avoid race conditions.) 2670 */ 2671int 2672vn_pollrecord(vp, p, events) 2673 struct vnode *vp; 2674 struct proc *p; 2675 short events; 2676{ 2677 simple_lock(&vp->v_pollinfo.vpi_lock); 2678 if (vp->v_pollinfo.vpi_revents & events) { 2679 /* 2680 * This leaves events we are not interested 2681 * in available for the other process which 2682 * which presumably had requested them 2683 * (otherwise they would never have been 2684 * recorded). 2685 */ 2686 events &= vp->v_pollinfo.vpi_revents; 2687 vp->v_pollinfo.vpi_revents &= ~events; 2688 2689 simple_unlock(&vp->v_pollinfo.vpi_lock); 2690 return events; 2691 } 2692 vp->v_pollinfo.vpi_events |= events; 2693 selrecord(p, &vp->v_pollinfo.vpi_selinfo); 2694 simple_unlock(&vp->v_pollinfo.vpi_lock); 2695 return 0; 2696} 2697 2698/* 2699 * Note the occurrence of an event. If the VN_POLLEVENT macro is used, 2700 * it is possible for us to miss an event due to race conditions, but 2701 * that condition is expected to be rare, so for the moment it is the 2702 * preferred interface. 2703 */ 2704void 2705vn_pollevent(vp, events) 2706 struct vnode *vp; 2707 short events; 2708{ 2709 simple_lock(&vp->v_pollinfo.vpi_lock); 2710 if (vp->v_pollinfo.vpi_events & events) { 2711 /* 2712 * We clear vpi_events so that we don't 2713 * call selwakeup() twice if two events are 2714 * posted before the polling process(es) is 2715 * awakened. This also ensures that we take at 2716 * most one selwakeup() if the polling process 2717 * is no longer interested. However, it does 2718 * mean that only one event can be noticed at 2719 * a time. (Perhaps we should only clear those 2720 * event bits which we note?) XXX 2721 */ 2722 vp->v_pollinfo.vpi_events = 0; /* &= ~events ??? */ 2723 vp->v_pollinfo.vpi_revents |= events; 2724 selwakeup(&vp->v_pollinfo.vpi_selinfo); 2725 } 2726 simple_unlock(&vp->v_pollinfo.vpi_lock); 2727} 2728 2729/* 2730 * Wake up anyone polling on vp because it is being revoked. 2731 * This depends on dead_poll() returning POLLHUP for correct 2732 * behavior. 2733 */ 2734void 2735vn_pollgone(vp) 2736 struct vnode *vp; 2737{ 2738 simple_lock(&vp->v_pollinfo.vpi_lock); 2739 if (vp->v_pollinfo.vpi_events) { 2740 vp->v_pollinfo.vpi_events = 0; 2741 selwakeup(&vp->v_pollinfo.vpi_selinfo); 2742 } 2743 simple_unlock(&vp->v_pollinfo.vpi_lock); 2744} 2745 2746 2747 2748/* 2749 * Routine to create and manage a filesystem syncer vnode. 2750 */ 2751#define sync_close ((int (*) __P((struct vop_close_args *)))nullop) 2752static int sync_fsync __P((struct vop_fsync_args *)); 2753static int sync_inactive __P((struct vop_inactive_args *)); 2754static int sync_reclaim __P((struct vop_reclaim_args *)); 2755#define sync_lock ((int (*) __P((struct vop_lock_args *)))vop_nolock) 2756#define sync_unlock ((int (*) __P((struct vop_unlock_args *)))vop_nounlock) 2757static int sync_print __P((struct vop_print_args *)); 2758#define sync_islocked ((int(*) __P((struct vop_islocked_args *)))vop_noislocked) 2759 2760static vop_t **sync_vnodeop_p; 2761static struct vnodeopv_entry_desc sync_vnodeop_entries[] = { 2762 { &vop_default_desc, (vop_t *) vop_eopnotsupp }, 2763 { &vop_close_desc, (vop_t *) sync_close }, /* close */ 2764 { &vop_fsync_desc, (vop_t *) sync_fsync }, /* fsync */ 2765 { &vop_inactive_desc, (vop_t *) sync_inactive }, /* inactive */ 2766 { &vop_reclaim_desc, (vop_t *) sync_reclaim }, /* reclaim */ 2767 { &vop_lock_desc, (vop_t *) sync_lock }, /* lock */ 2768 { &vop_unlock_desc, (vop_t *) sync_unlock }, /* unlock */ 2769 { &vop_print_desc, (vop_t *) sync_print }, /* print */ 2770 { &vop_islocked_desc, (vop_t *) sync_islocked }, /* islocked */ 2771 { NULL, NULL } 2772}; 2773static struct vnodeopv_desc sync_vnodeop_opv_desc = 2774 { &sync_vnodeop_p, sync_vnodeop_entries }; 2775 2776VNODEOP_SET(sync_vnodeop_opv_desc); 2777 2778/* 2779 * Create a new filesystem syncer vnode for the specified mount point. 2780 */ 2781int 2782vfs_allocate_syncvnode(mp) 2783 struct mount *mp; 2784{ 2785 struct vnode *vp; 2786 static long start, incr, next; 2787 int error; 2788 2789 /* Allocate a new vnode */ 2790 if ((error = getnewvnode(VT_VFS, mp, sync_vnodeop_p, &vp)) != 0) { 2791 mp->mnt_syncer = NULL; 2792 return (error); 2793 } 2794 vp->v_type = VNON; 2795 /* 2796 * Place the vnode onto the syncer worklist. We attempt to 2797 * scatter them about on the list so that they will go off 2798 * at evenly distributed times even if all the filesystems 2799 * are mounted at once. 2800 */ 2801 next += incr; 2802 if (next == 0 || next > syncer_maxdelay) { 2803 start /= 2; 2804 incr /= 2; 2805 if (start == 0) { 2806 start = syncer_maxdelay / 2; 2807 incr = syncer_maxdelay; 2808 } 2809 next = start; 2810 } 2811 vn_syncer_add_to_worklist(vp, syncdelay > 0 ? next % syncdelay : 0); 2812 mp->mnt_syncer = vp; 2813 return (0); 2814} 2815 2816/* 2817 * Do a lazy sync of the filesystem. 2818 */ 2819static int 2820sync_fsync(ap) 2821 struct vop_fsync_args /* { 2822 struct vnode *a_vp; 2823 struct ucred *a_cred; 2824 int a_waitfor; 2825 struct proc *a_p; 2826 } */ *ap; 2827{ 2828 struct vnode *syncvp = ap->a_vp; 2829 struct mount *mp = syncvp->v_mount; 2830 struct proc *p = ap->a_p; 2831 int asyncflag; 2832 2833 /* 2834 * We only need to do something if this is a lazy evaluation. 2835 */ 2836 if (ap->a_waitfor != MNT_LAZY) 2837 return (0); 2838 2839 /* 2840 * Move ourselves to the back of the sync list. 2841 */ 2842 vn_syncer_add_to_worklist(syncvp, syncdelay); 2843 2844 /* 2845 * Walk the list of vnodes pushing all that are dirty and 2846 * not already on the sync list. 2847 */ 2848 simple_lock(&mountlist_slock); 2849 if (vfs_busy(mp, LK_EXCLUSIVE | LK_NOWAIT, &mountlist_slock, p) != 0) { 2850 simple_unlock(&mountlist_slock); 2851 return (0); 2852 } 2853 asyncflag = mp->mnt_flag & MNT_ASYNC; 2854 mp->mnt_flag &= ~MNT_ASYNC; 2855 vfs_msync(mp, MNT_NOWAIT); 2856 VFS_SYNC(mp, MNT_LAZY, ap->a_cred, p); 2857 if (asyncflag) 2858 mp->mnt_flag |= MNT_ASYNC; 2859 vfs_unbusy(mp, p); 2860 return (0); 2861} 2862 2863/* 2864 * The syncer vnode is no referenced. 2865 */ 2866static int 2867sync_inactive(ap) 2868 struct vop_inactive_args /* { 2869 struct vnode *a_vp; 2870 struct proc *a_p; 2871 } */ *ap; 2872{ 2873 2874 vgone(ap->a_vp); 2875 return (0); 2876} 2877 2878/* 2879 * The syncer vnode is no longer needed and is being decommissioned. 2880 * 2881 * Modifications to the worklist must be protected at splbio(). 2882 */ 2883static int 2884sync_reclaim(ap) 2885 struct vop_reclaim_args /* { 2886 struct vnode *a_vp; 2887 } */ *ap; 2888{ 2889 struct vnode *vp = ap->a_vp; 2890 int s; 2891 2892 s = splbio(); 2893 vp->v_mount->mnt_syncer = NULL; 2894 if (vp->v_flag & VONWORKLST) { 2895 LIST_REMOVE(vp, v_synclist); 2896 vp->v_flag &= ~VONWORKLST; 2897 } 2898 splx(s); 2899 2900 return (0); 2901} 2902 2903/* 2904 * Print out a syncer vnode. 2905 */ 2906static int 2907sync_print(ap) 2908 struct vop_print_args /* { 2909 struct vnode *a_vp; 2910 } */ *ap; 2911{ 2912 struct vnode *vp = ap->a_vp; 2913 2914 printf("syncer vnode"); 2915 if (vp->v_vnlock != NULL) 2916 lockmgr_printinfo(vp->v_vnlock); 2917 printf("\n"); 2918 return (0); 2919} 2920