vfs_export.c revision 48391
1/* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95 39 * $Id: vfs_subr.c,v 1.203 1999/06/26 02:46:10 mckusick Exp $ 40 */ 41 42/* 43 * External virtual filesystem routines 44 */ 45#include "opt_ddb.h" 46 47#include <sys/param.h> 48#include <sys/systm.h> 49#include <sys/conf.h> 50#include <sys/fcntl.h> 51#include <sys/kernel.h> 52#include <sys/proc.h> 53#include <sys/kthread.h> 54#include <sys/malloc.h> 55#include <sys/mount.h> 56#include <sys/socket.h> 57#include <sys/vnode.h> 58#include <sys/stat.h> 59#include <sys/buf.h> 60#include <sys/domain.h> 61#include <sys/dirent.h> 62#include <sys/vmmeter.h> 63 64#include <machine/limits.h> 65 66#include <vm/vm.h> 67#include <vm/vm_param.h> 68#include <vm/vm_prot.h> 69#include <vm/vm_object.h> 70#include <vm/vm_extern.h> 71#include <vm/pmap.h> 72#include <vm/vm_map.h> 73#include <vm/vm_page.h> 74#include <vm/vm_pager.h> 75#include <vm/vnode_pager.h> 76#include <vm/vm_zone.h> 77#include <sys/sysctl.h> 78 79#include <miscfs/specfs/specdev.h> 80 81static MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure"); 82 83static void insmntque __P((struct vnode *vp, struct mount *mp)); 84static void vclean __P((struct vnode *vp, int flags, struct proc *p)); 85static void vfree __P((struct vnode *)); 86static void vgonel __P((struct vnode *vp, struct proc *p)); 87static unsigned long numvnodes; 88SYSCTL_INT(_debug, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, ""); 89 90enum vtype iftovt_tab[16] = { 91 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 92 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 93}; 94int vttoif_tab[9] = { 95 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 96 S_IFSOCK, S_IFIFO, S_IFMT, 97}; 98 99static TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */ 100struct tobefreelist vnode_tobefree_list; /* vnode free list */ 101 102static u_long wantfreevnodes = 25; 103SYSCTL_INT(_debug, OID_AUTO, wantfreevnodes, CTLFLAG_RW, &wantfreevnodes, 0, ""); 104static u_long freevnodes = 0; 105SYSCTL_INT(_debug, OID_AUTO, freevnodes, CTLFLAG_RD, &freevnodes, 0, ""); 106 107int vfs_ioopt = 0; 108#ifdef ENABLE_VFS_IOOPT 109SYSCTL_INT(_vfs, OID_AUTO, ioopt, CTLFLAG_RW, &vfs_ioopt, 0, ""); 110#endif 111 112struct mntlist mountlist; /* mounted filesystem list */ 113struct simplelock mountlist_slock; 114struct simplelock mntvnode_slock; 115int nfs_mount_type = -1; 116#ifndef NULL_SIMPLELOCKS 117static struct simplelock mntid_slock; 118static struct simplelock vnode_free_list_slock; 119static struct simplelock spechash_slock; 120#endif 121struct nfs_public nfs_pub; /* publicly exported FS */ 122static vm_zone_t vnode_zone; 123 124/* 125 * The workitem queue. 126 */ 127#define SYNCER_MAXDELAY 32 128static int syncer_maxdelay = SYNCER_MAXDELAY; /* maximum delay time */ 129time_t syncdelay = 30; /* max time to delay syncing data */ 130time_t filedelay = 30; /* time to delay syncing files */ 131SYSCTL_INT(_kern, OID_AUTO, filedelay, CTLFLAG_RW, &filedelay, 0, ""); 132time_t dirdelay = 29; /* time to delay syncing directories */ 133SYSCTL_INT(_kern, OID_AUTO, dirdelay, CTLFLAG_RW, &dirdelay, 0, ""); 134time_t metadelay = 28; /* time to delay syncing metadata */ 135SYSCTL_INT(_kern, OID_AUTO, metadelay, CTLFLAG_RW, &metadelay, 0, ""); 136static int rushjob; /* number of slots to run ASAP */ 137static int stat_rush_requests; /* number of times I/O speeded up */ 138SYSCTL_INT(_debug, OID_AUTO, rush_requests, CTLFLAG_RW, &stat_rush_requests, 0, ""); 139 140static int syncer_delayno = 0; 141static long syncer_mask; 142LIST_HEAD(synclist, vnode); 143static struct synclist *syncer_workitem_pending; 144 145int desiredvnodes; 146SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, 147 &desiredvnodes, 0, "Maximum number of vnodes"); 148 149static void vfs_free_addrlist __P((struct netexport *nep)); 150static int vfs_free_netcred __P((struct radix_node *rn, void *w)); 151static int vfs_hang_addrlist __P((struct mount *mp, struct netexport *nep, 152 struct export_args *argp)); 153 154/* 155 * Initialize the vnode management data structures. 156 */ 157void 158vntblinit() 159{ 160 161 desiredvnodes = maxproc + cnt.v_page_count / 4; 162 simple_lock_init(&mntvnode_slock); 163 simple_lock_init(&mntid_slock); 164 simple_lock_init(&spechash_slock); 165 TAILQ_INIT(&vnode_free_list); 166 TAILQ_INIT(&vnode_tobefree_list); 167 simple_lock_init(&vnode_free_list_slock); 168 CIRCLEQ_INIT(&mountlist); 169 vnode_zone = zinit("VNODE", sizeof (struct vnode), 0, 0, 5); 170 /* 171 * Initialize the filesystem syncer. 172 */ 173 syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE, 174 &syncer_mask); 175 syncer_maxdelay = syncer_mask + 1; 176} 177 178/* 179 * Mark a mount point as busy. Used to synchronize access and to delay 180 * unmounting. Interlock is not released on failure. 181 */ 182int 183vfs_busy(mp, flags, interlkp, p) 184 struct mount *mp; 185 int flags; 186 struct simplelock *interlkp; 187 struct proc *p; 188{ 189 int lkflags; 190 191 if (mp->mnt_kern_flag & MNTK_UNMOUNT) { 192 if (flags & LK_NOWAIT) 193 return (ENOENT); 194 mp->mnt_kern_flag |= MNTK_MWAIT; 195 if (interlkp) { 196 simple_unlock(interlkp); 197 } 198 /* 199 * Since all busy locks are shared except the exclusive 200 * lock granted when unmounting, the only place that a 201 * wakeup needs to be done is at the release of the 202 * exclusive lock at the end of dounmount. 203 */ 204 tsleep((caddr_t)mp, PVFS, "vfs_busy", 0); 205 if (interlkp) { 206 simple_lock(interlkp); 207 } 208 return (ENOENT); 209 } 210 lkflags = LK_SHARED | LK_NOPAUSE; 211 if (interlkp) 212 lkflags |= LK_INTERLOCK; 213 if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p)) 214 panic("vfs_busy: unexpected lock failure"); 215 return (0); 216} 217 218/* 219 * Free a busy filesystem. 220 */ 221void 222vfs_unbusy(mp, p) 223 struct mount *mp; 224 struct proc *p; 225{ 226 227 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p); 228} 229 230/* 231 * Lookup a filesystem type, and if found allocate and initialize 232 * a mount structure for it. 233 * 234 * Devname is usually updated by mount(8) after booting. 235 */ 236int 237vfs_rootmountalloc(fstypename, devname, mpp) 238 char *fstypename; 239 char *devname; 240 struct mount **mpp; 241{ 242 struct proc *p = curproc; /* XXX */ 243 struct vfsconf *vfsp; 244 struct mount *mp; 245 246 if (fstypename == NULL) 247 return (ENODEV); 248 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 249 if (!strcmp(vfsp->vfc_name, fstypename)) 250 break; 251 if (vfsp == NULL) 252 return (ENODEV); 253 mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); 254 bzero((char *)mp, (u_long)sizeof(struct mount)); 255 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE); 256 (void)vfs_busy(mp, LK_NOWAIT, 0, p); 257 LIST_INIT(&mp->mnt_vnodelist); 258 mp->mnt_vfc = vfsp; 259 mp->mnt_op = vfsp->vfc_vfsops; 260 mp->mnt_flag = MNT_RDONLY; 261 mp->mnt_vnodecovered = NULLVP; 262 vfsp->vfc_refcount++; 263 mp->mnt_stat.f_type = vfsp->vfc_typenum; 264 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 265 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 266 mp->mnt_stat.f_mntonname[0] = '/'; 267 mp->mnt_stat.f_mntonname[1] = 0; 268 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); 269 *mpp = mp; 270 return (0); 271} 272 273/* 274 * Find an appropriate filesystem to use for the root. If a filesystem 275 * has not been preselected, walk through the list of known filesystems 276 * trying those that have mountroot routines, and try them until one 277 * works or we have tried them all. 278 */ 279#ifdef notdef /* XXX JH */ 280int 281lite2_vfs_mountroot() 282{ 283 struct vfsconf *vfsp; 284 extern int (*lite2_mountroot) __P((void)); 285 int error; 286 287 if (lite2_mountroot != NULL) 288 return ((*lite2_mountroot)()); 289 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 290 if (vfsp->vfc_mountroot == NULL) 291 continue; 292 if ((error = (*vfsp->vfc_mountroot)()) == 0) 293 return (0); 294 printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error); 295 } 296 return (ENODEV); 297} 298#endif 299 300/* 301 * Lookup a mount point by filesystem identifier. 302 */ 303struct mount * 304vfs_getvfs(fsid) 305 fsid_t *fsid; 306{ 307 register struct mount *mp; 308 309 simple_lock(&mountlist_slock); 310 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; 311 mp = mp->mnt_list.cqe_next) { 312 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 313 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { 314 simple_unlock(&mountlist_slock); 315 return (mp); 316 } 317 } 318 simple_unlock(&mountlist_slock); 319 return ((struct mount *) 0); 320} 321 322/* 323 * Get a new unique fsid 324 */ 325void 326vfs_getnewfsid(mp) 327 struct mount *mp; 328{ 329 static u_short xxxfs_mntid; 330 331 fsid_t tfsid; 332 int mtype; 333 334 simple_lock(&mntid_slock); 335 mtype = mp->mnt_vfc->vfc_typenum; 336 mp->mnt_stat.f_fsid.val[0] = (256 + mtype) * 256; 337 mp->mnt_stat.f_fsid.val[1] = mtype; 338 if (xxxfs_mntid == 0) 339 ++xxxfs_mntid; 340 tfsid.val[0] = (256 + mtype) * 256 | xxxfs_mntid; 341 tfsid.val[1] = mtype; 342 if (mountlist.cqh_first != (void *)&mountlist) { 343 while (vfs_getvfs(&tfsid)) { 344 tfsid.val[0]++; 345 xxxfs_mntid++; 346 } 347 } 348 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 349 simple_unlock(&mntid_slock); 350} 351 352/* 353 * Set vnode attributes to VNOVAL 354 */ 355void 356vattr_null(vap) 357 register struct vattr *vap; 358{ 359 360 vap->va_type = VNON; 361 vap->va_size = VNOVAL; 362 vap->va_bytes = VNOVAL; 363 vap->va_mode = VNOVAL; 364 vap->va_nlink = VNOVAL; 365 vap->va_uid = VNOVAL; 366 vap->va_gid = VNOVAL; 367 vap->va_fsid = VNOVAL; 368 vap->va_fileid = VNOVAL; 369 vap->va_blocksize = VNOVAL; 370 vap->va_rdev = VNOVAL; 371 vap->va_atime.tv_sec = VNOVAL; 372 vap->va_atime.tv_nsec = VNOVAL; 373 vap->va_mtime.tv_sec = VNOVAL; 374 vap->va_mtime.tv_nsec = VNOVAL; 375 vap->va_ctime.tv_sec = VNOVAL; 376 vap->va_ctime.tv_nsec = VNOVAL; 377 vap->va_flags = VNOVAL; 378 vap->va_gen = VNOVAL; 379 vap->va_vaflags = 0; 380} 381 382/* 383 * Routines having to do with the management of the vnode table. 384 */ 385extern vop_t **dead_vnodeop_p; 386 387/* 388 * Return the next vnode from the free list. 389 */ 390int 391getnewvnode(tag, mp, vops, vpp) 392 enum vtagtype tag; 393 struct mount *mp; 394 vop_t **vops; 395 struct vnode **vpp; 396{ 397 int s; 398 struct proc *p = curproc; /* XXX */ 399 struct vnode *vp, *tvp, *nvp; 400 vm_object_t object; 401 TAILQ_HEAD(freelst, vnode) vnode_tmp_list; 402 403 /* 404 * We take the least recently used vnode from the freelist 405 * if we can get it and it has no cached pages, and no 406 * namecache entries are relative to it. 407 * Otherwise we allocate a new vnode 408 */ 409 410 s = splbio(); 411 simple_lock(&vnode_free_list_slock); 412 TAILQ_INIT(&vnode_tmp_list); 413 414 for (vp = TAILQ_FIRST(&vnode_tobefree_list); vp; vp = nvp) { 415 nvp = TAILQ_NEXT(vp, v_freelist); 416 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 417 if (vp->v_flag & VAGE) { 418 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 419 } else { 420 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 421 } 422 vp->v_flag &= ~(VTBFREE|VAGE); 423 vp->v_flag |= VFREE; 424 if (vp->v_usecount) 425 panic("tobe free vnode isn't"); 426 freevnodes++; 427 } 428 429 if (wantfreevnodes && freevnodes < wantfreevnodes) { 430 vp = NULL; 431 } else if (!wantfreevnodes && freevnodes <= desiredvnodes) { 432 /* 433 * XXX: this is only here to be backwards compatible 434 */ 435 vp = NULL; 436 } else { 437 for (vp = TAILQ_FIRST(&vnode_free_list); vp; vp = nvp) { 438 nvp = TAILQ_NEXT(vp, v_freelist); 439 if (!simple_lock_try(&vp->v_interlock)) 440 continue; 441 if (vp->v_usecount) 442 panic("free vnode isn't"); 443 444 object = vp->v_object; 445 if (object && (object->resident_page_count || object->ref_count)) { 446 printf("object inconsistant state: RPC: %d, RC: %d\n", 447 object->resident_page_count, object->ref_count); 448 /* Don't recycle if it's caching some pages */ 449 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 450 TAILQ_INSERT_TAIL(&vnode_tmp_list, vp, v_freelist); 451 continue; 452 } else if (LIST_FIRST(&vp->v_cache_src)) { 453 /* Don't recycle if active in the namecache */ 454 simple_unlock(&vp->v_interlock); 455 continue; 456 } else { 457 break; 458 } 459 } 460 } 461 462 for (tvp = TAILQ_FIRST(&vnode_tmp_list); tvp; tvp = nvp) { 463 nvp = TAILQ_NEXT(tvp, v_freelist); 464 TAILQ_REMOVE(&vnode_tmp_list, tvp, v_freelist); 465 TAILQ_INSERT_TAIL(&vnode_free_list, tvp, v_freelist); 466 simple_unlock(&tvp->v_interlock); 467 } 468 469 if (vp) { 470 vp->v_flag |= VDOOMED; 471 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 472 freevnodes--; 473 simple_unlock(&vnode_free_list_slock); 474 cache_purge(vp); 475 vp->v_lease = NULL; 476 if (vp->v_type != VBAD) { 477 vgonel(vp, p); 478 } else { 479 simple_unlock(&vp->v_interlock); 480 } 481 482#ifdef INVARIANTS 483 { 484 int s; 485 486 if (vp->v_data) 487 panic("cleaned vnode isn't"); 488 s = splbio(); 489 if (vp->v_numoutput) 490 panic("Clean vnode has pending I/O's"); 491 splx(s); 492 } 493#endif 494 vp->v_flag = 0; 495 vp->v_lastr = 0; 496 vp->v_lastw = 0; 497 vp->v_lasta = 0; 498 vp->v_cstart = 0; 499 vp->v_clen = 0; 500 vp->v_socket = 0; 501 vp->v_writecount = 0; /* XXX */ 502 vp->v_maxio = 0; 503 } else { 504 simple_unlock(&vnode_free_list_slock); 505 vp = (struct vnode *) zalloc(vnode_zone); 506 bzero((char *) vp, sizeof *vp); 507 simple_lock_init(&vp->v_interlock); 508 vp->v_dd = vp; 509 cache_purge(vp); 510 LIST_INIT(&vp->v_cache_src); 511 TAILQ_INIT(&vp->v_cache_dst); 512 numvnodes++; 513 } 514 515 TAILQ_INIT(&vp->v_cleanblkhd); 516 TAILQ_INIT(&vp->v_dirtyblkhd); 517 vp->v_type = VNON; 518 vp->v_tag = tag; 519 vp->v_op = vops; 520 insmntque(vp, mp); 521 *vpp = vp; 522 vp->v_usecount = 1; 523 vp->v_data = 0; 524 splx(s); 525 526 vfs_object_create(vp, p, p->p_ucred); 527 return (0); 528} 529 530/* 531 * Move a vnode from one mount queue to another. 532 */ 533static void 534insmntque(vp, mp) 535 register struct vnode *vp; 536 register struct mount *mp; 537{ 538 539 simple_lock(&mntvnode_slock); 540 /* 541 * Delete from old mount point vnode list, if on one. 542 */ 543 if (vp->v_mount != NULL) 544 LIST_REMOVE(vp, v_mntvnodes); 545 /* 546 * Insert into list of vnodes for the new mount point, if available. 547 */ 548 if ((vp->v_mount = mp) == NULL) { 549 simple_unlock(&mntvnode_slock); 550 return; 551 } 552 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 553 simple_unlock(&mntvnode_slock); 554} 555 556/* 557 * Update outstanding I/O count and do wakeup if requested. 558 */ 559void 560vwakeup(bp) 561 register struct buf *bp; 562{ 563 register struct vnode *vp; 564 565 bp->b_flags &= ~B_WRITEINPROG; 566 if ((vp = bp->b_vp)) { 567 vp->v_numoutput--; 568 if (vp->v_numoutput < 0) 569 panic("vwakeup: neg numoutput"); 570 if ((vp->v_numoutput == 0) && (vp->v_flag & VBWAIT)) { 571 vp->v_flag &= ~VBWAIT; 572 wakeup((caddr_t) &vp->v_numoutput); 573 } 574 } 575} 576 577/* 578 * Flush out and invalidate all buffers associated with a vnode. 579 * Called with the underlying object locked. 580 */ 581int 582vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 583 register struct vnode *vp; 584 int flags; 585 struct ucred *cred; 586 struct proc *p; 587 int slpflag, slptimeo; 588{ 589 register struct buf *bp; 590 struct buf *nbp, *blist; 591 int s, error; 592 vm_object_t object; 593 594 if (flags & V_SAVE) { 595 s = splbio(); 596 while (vp->v_numoutput) { 597 vp->v_flag |= VBWAIT; 598 error = tsleep((caddr_t)&vp->v_numoutput, 599 slpflag | (PRIBIO + 1), "vinvlbuf", slptimeo); 600 if (error) { 601 splx(s); 602 return (error); 603 } 604 } 605 if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) { 606 splx(s); 607 if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) != 0) 608 return (error); 609 s = splbio(); 610 if (vp->v_numoutput > 0 || 611 !TAILQ_EMPTY(&vp->v_dirtyblkhd)) 612 panic("vinvalbuf: dirty bufs"); 613 } 614 splx(s); 615 } 616 s = splbio(); 617 for (;;) { 618 blist = TAILQ_FIRST(&vp->v_cleanblkhd); 619 if (!blist) 620 blist = TAILQ_FIRST(&vp->v_dirtyblkhd); 621 if (!blist) 622 break; 623 624 for (bp = blist; bp; bp = nbp) { 625 nbp = TAILQ_NEXT(bp, b_vnbufs); 626 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) { 627 error = BUF_TIMELOCK(bp, 628 LK_EXCLUSIVE | LK_SLEEPFAIL, 629 "vinvalbuf", slpflag, slptimeo); 630 if (error == ENOLCK) 631 break; 632 splx(s); 633 return (error); 634 } 635 /* 636 * XXX Since there are no node locks for NFS, I 637 * believe there is a slight chance that a delayed 638 * write will occur while sleeping just above, so 639 * check for it. Note that vfs_bio_awrite expects 640 * buffers to reside on a queue, while VOP_BWRITE and 641 * brelse do not. 642 */ 643 if (((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) && 644 (flags & V_SAVE)) { 645 646 if (bp->b_vp == vp) { 647 if (bp->b_flags & B_CLUSTEROK) { 648 BUF_UNLOCK(bp); 649 vfs_bio_awrite(bp); 650 } else { 651 bremfree(bp); 652 bp->b_flags |= B_ASYNC; 653 VOP_BWRITE(bp->b_vp, bp); 654 } 655 } else { 656 bremfree(bp); 657 (void) VOP_BWRITE(bp->b_vp, bp); 658 } 659 break; 660 } 661 bremfree(bp); 662 bp->b_flags |= (B_INVAL | B_NOCACHE | B_RELBUF); 663 bp->b_flags &= ~B_ASYNC; 664 brelse(bp); 665 } 666 } 667 668 while (vp->v_numoutput > 0) { 669 vp->v_flag |= VBWAIT; 670 tsleep(&vp->v_numoutput, PVM, "vnvlbv", 0); 671 } 672 673 splx(s); 674 675 /* 676 * Destroy the copy in the VM cache, too. 677 */ 678 simple_lock(&vp->v_interlock); 679 object = vp->v_object; 680 if (object != NULL) { 681 vm_object_page_remove(object, 0, 0, 682 (flags & V_SAVE) ? TRUE : FALSE); 683 } 684 simple_unlock(&vp->v_interlock); 685 686 if (!TAILQ_EMPTY(&vp->v_dirtyblkhd) || !TAILQ_EMPTY(&vp->v_cleanblkhd)) 687 panic("vinvalbuf: flush failed"); 688 return (0); 689} 690 691/* 692 * Truncate a file's buffer and pages to a specified length. This 693 * is in lieu of the old vinvalbuf mechanism, which performed unneeded 694 * sync activity. 695 */ 696int 697vtruncbuf(vp, cred, p, length, blksize) 698 register struct vnode *vp; 699 struct ucred *cred; 700 struct proc *p; 701 off_t length; 702 int blksize; 703{ 704 register struct buf *bp; 705 struct buf *nbp; 706 int s, anyfreed; 707 int trunclbn; 708 709 /* 710 * Round up to the *next* lbn. 711 */ 712 trunclbn = (length + blksize - 1) / blksize; 713 714 s = splbio(); 715restart: 716 anyfreed = 1; 717 for (;anyfreed;) { 718 anyfreed = 0; 719 for (bp = TAILQ_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 720 nbp = TAILQ_NEXT(bp, b_vnbufs); 721 if (bp->b_lblkno >= trunclbn) { 722 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) { 723 BUF_LOCK(bp, LK_EXCLUSIVE|LK_SLEEPFAIL); 724 goto restart; 725 } else { 726 bremfree(bp); 727 bp->b_flags |= (B_INVAL | B_RELBUF); 728 bp->b_flags &= ~B_ASYNC; 729 brelse(bp); 730 anyfreed = 1; 731 } 732 if (nbp && (((nbp->b_xflags & B_VNCLEAN) == 0)|| 733 (nbp->b_vp != vp) || 734 (nbp->b_flags & B_DELWRI))) { 735 goto restart; 736 } 737 } 738 } 739 740 for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 741 nbp = TAILQ_NEXT(bp, b_vnbufs); 742 if (bp->b_lblkno >= trunclbn) { 743 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) { 744 BUF_LOCK(bp, LK_EXCLUSIVE|LK_SLEEPFAIL); 745 goto restart; 746 } else { 747 bremfree(bp); 748 bp->b_flags |= (B_INVAL | B_RELBUF); 749 bp->b_flags &= ~B_ASYNC; 750 brelse(bp); 751 anyfreed = 1; 752 } 753 if (nbp && (((nbp->b_xflags & B_VNDIRTY) == 0)|| 754 (nbp->b_vp != vp) || 755 (nbp->b_flags & B_DELWRI) == 0)) { 756 goto restart; 757 } 758 } 759 } 760 } 761 762 if (length > 0) { 763restartsync: 764 for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 765 nbp = TAILQ_NEXT(bp, b_vnbufs); 766 if ((bp->b_flags & B_DELWRI) && (bp->b_lblkno < 0)) { 767 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) { 768 BUF_LOCK(bp, LK_EXCLUSIVE|LK_SLEEPFAIL); 769 goto restart; 770 } else { 771 bremfree(bp); 772 if (bp->b_vp == vp) { 773 bp->b_flags |= B_ASYNC; 774 } else { 775 bp->b_flags &= ~B_ASYNC; 776 } 777 VOP_BWRITE(bp->b_vp, bp); 778 } 779 goto restartsync; 780 } 781 782 } 783 } 784 785 while (vp->v_numoutput > 0) { 786 vp->v_flag |= VBWAIT; 787 tsleep(&vp->v_numoutput, PVM, "vbtrunc", 0); 788 } 789 790 splx(s); 791 792 vnode_pager_setsize(vp, length); 793 794 return (0); 795} 796 797/* 798 * Associate a buffer with a vnode. 799 */ 800void 801bgetvp(vp, bp) 802 register struct vnode *vp; 803 register struct buf *bp; 804{ 805 int s; 806 807 KASSERT(bp->b_vp == NULL, ("bgetvp: not free")); 808 809 vhold(vp); 810 bp->b_vp = vp; 811 if (vp->v_type == VBLK || vp->v_type == VCHR) 812 bp->b_dev = vp->v_rdev; 813 else 814 bp->b_dev = NODEV; 815 /* 816 * Insert onto list for new vnode. 817 */ 818 s = splbio(); 819 bp->b_xflags |= B_VNCLEAN; 820 bp->b_xflags &= ~B_VNDIRTY; 821 TAILQ_INSERT_TAIL(&vp->v_cleanblkhd, bp, b_vnbufs); 822 splx(s); 823} 824 825/* 826 * Disassociate a buffer from a vnode. 827 */ 828void 829brelvp(bp) 830 register struct buf *bp; 831{ 832 struct vnode *vp; 833 struct buflists *listheadp; 834 int s; 835 836 KASSERT(bp->b_vp != NULL, ("brelvp: NULL")); 837 838 /* 839 * Delete from old vnode list, if on one. 840 */ 841 vp = bp->b_vp; 842 s = splbio(); 843 if (bp->b_xflags & (B_VNDIRTY|B_VNCLEAN)) { 844 if (bp->b_xflags & B_VNDIRTY) 845 listheadp = &vp->v_dirtyblkhd; 846 else 847 listheadp = &vp->v_cleanblkhd; 848 TAILQ_REMOVE(listheadp, bp, b_vnbufs); 849 bp->b_xflags &= ~(B_VNDIRTY|B_VNCLEAN); 850 } 851 if ((vp->v_flag & VONWORKLST) && TAILQ_EMPTY(&vp->v_dirtyblkhd)) { 852 vp->v_flag &= ~VONWORKLST; 853 LIST_REMOVE(vp, v_synclist); 854 } 855 splx(s); 856 bp->b_vp = (struct vnode *) 0; 857 vdrop(vp); 858} 859 860/* 861 * The workitem queue. 862 * 863 * It is useful to delay writes of file data and filesystem metadata 864 * for tens of seconds so that quickly created and deleted files need 865 * not waste disk bandwidth being created and removed. To realize this, 866 * we append vnodes to a "workitem" queue. When running with a soft 867 * updates implementation, most pending metadata dependencies should 868 * not wait for more than a few seconds. Thus, mounted on block devices 869 * are delayed only about a half the time that file data is delayed. 870 * Similarly, directory updates are more critical, so are only delayed 871 * about a third the time that file data is delayed. Thus, there are 872 * SYNCER_MAXDELAY queues that are processed round-robin at a rate of 873 * one each second (driven off the filesystem syner process). The 874 * syncer_delayno variable indicates the next queue that is to be processed. 875 * Items that need to be processed soon are placed in this queue: 876 * 877 * syncer_workitem_pending[syncer_delayno] 878 * 879 * A delay of fifteen seconds is done by placing the request fifteen 880 * entries later in the queue: 881 * 882 * syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask] 883 * 884 */ 885 886/* 887 * Add an item to the syncer work queue. 888 */ 889static void 890vn_syncer_add_to_worklist(struct vnode *vp, int delay) 891{ 892 int s, slot; 893 894 s = splbio(); 895 896 if (vp->v_flag & VONWORKLST) { 897 LIST_REMOVE(vp, v_synclist); 898 } 899 900 if (delay > syncer_maxdelay - 2) 901 delay = syncer_maxdelay - 2; 902 slot = (syncer_delayno + delay) & syncer_mask; 903 904 LIST_INSERT_HEAD(&syncer_workitem_pending[slot], vp, v_synclist); 905 vp->v_flag |= VONWORKLST; 906 splx(s); 907} 908 909struct proc *updateproc; 910static void sched_sync __P((void)); 911static struct kproc_desc up_kp = { 912 "syncer", 913 sched_sync, 914 &updateproc 915}; 916SYSINIT(syncer, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp) 917 918/* 919 * System filesystem synchronizer daemon. 920 */ 921void 922sched_sync(void) 923{ 924 struct synclist *slp; 925 struct vnode *vp; 926 long starttime; 927 int s; 928 struct proc *p = updateproc; 929 930 for (;;) { 931 starttime = time_second; 932 933 /* 934 * Push files whose dirty time has expired. Be careful 935 * of interrupt race on slp queue. 936 */ 937 s = splbio(); 938 slp = &syncer_workitem_pending[syncer_delayno]; 939 syncer_delayno += 1; 940 if (syncer_delayno == syncer_maxdelay) 941 syncer_delayno = 0; 942 splx(s); 943 944 while ((vp = LIST_FIRST(slp)) != NULL) { 945 if (VOP_ISLOCKED(vp) == 0) { 946 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 947 (void) VOP_FSYNC(vp, p->p_ucred, MNT_LAZY, p); 948 VOP_UNLOCK(vp, 0, p); 949 } 950 s = splbio(); 951 if (LIST_FIRST(slp) == vp) { 952 /* 953 * Note: v_tag VT_VFS vps can remain on the 954 * worklist too with no dirty blocks, but 955 * since sync_fsync() moves it to a different 956 * slot we are safe. 957 */ 958 if (TAILQ_EMPTY(&vp->v_dirtyblkhd) && 959 vp->v_type != VBLK) 960 panic("sched_sync: fsync failed vp %p tag %d", vp, vp->v_tag); 961 /* 962 * Put us back on the worklist. The worklist 963 * routine will remove us from our current 964 * position and then add us back in at a later 965 * position. 966 */ 967 vn_syncer_add_to_worklist(vp, syncdelay); 968 } 969 splx(s); 970 } 971 972 /* 973 * Do soft update processing. 974 */ 975 if (bioops.io_sync) 976 (*bioops.io_sync)(NULL); 977 978 /* 979 * The variable rushjob allows the kernel to speed up the 980 * processing of the filesystem syncer process. A rushjob 981 * value of N tells the filesystem syncer to process the next 982 * N seconds worth of work on its queue ASAP. Currently rushjob 983 * is used by the soft update code to speed up the filesystem 984 * syncer process when the incore state is getting so far 985 * ahead of the disk that the kernel memory pool is being 986 * threatened with exhaustion. 987 */ 988 if (rushjob > 0) { 989 rushjob -= 1; 990 continue; 991 } 992 /* 993 * If it has taken us less than a second to process the 994 * current work, then wait. Otherwise start right over 995 * again. We can still lose time if any single round 996 * takes more than two seconds, but it does not really 997 * matter as we are just trying to generally pace the 998 * filesystem activity. 999 */ 1000 if (time_second == starttime) 1001 tsleep(&lbolt, PPAUSE, "syncer", 0); 1002 } 1003} 1004 1005/* 1006 * Request the syncer daemon to speed up its work. 1007 * We never push it to speed up more than half of its 1008 * normal turn time, otherwise it could take over the cpu. 1009 */ 1010int 1011speedup_syncer() 1012{ 1013 int s; 1014 1015 s = splhigh(); 1016 if (updateproc->p_wchan == &lbolt) 1017 setrunnable(updateproc); 1018 splx(s); 1019 if (rushjob < syncdelay / 2) { 1020 rushjob += 1; 1021 stat_rush_requests += 1; 1022 return (1); 1023 } 1024 return(0); 1025} 1026 1027/* 1028 * Associate a p-buffer with a vnode. 1029 * 1030 * Also sets B_PAGING flag to indicate that vnode is not fully associated 1031 * with the buffer. i.e. the bp has not been linked into the vnode or 1032 * ref-counted. 1033 */ 1034void 1035pbgetvp(vp, bp) 1036 register struct vnode *vp; 1037 register struct buf *bp; 1038{ 1039 1040 KASSERT(bp->b_vp == NULL, ("pbgetvp: not free")); 1041 1042 bp->b_vp = vp; 1043 bp->b_flags |= B_PAGING; 1044 if (vp->v_type == VBLK || vp->v_type == VCHR) 1045 bp->b_dev = vp->v_rdev; 1046 else 1047 bp->b_dev = NODEV; 1048} 1049 1050/* 1051 * Disassociate a p-buffer from a vnode. 1052 */ 1053void 1054pbrelvp(bp) 1055 register struct buf *bp; 1056{ 1057 1058 KASSERT(bp->b_vp != NULL, ("pbrelvp: NULL")); 1059 1060#if !defined(MAX_PERF) 1061 /* XXX REMOVE ME */ 1062 if (bp->b_vnbufs.tqe_next != NULL) { 1063 panic( 1064 "relpbuf(): b_vp was probably reassignbuf()d %p %x", 1065 bp, 1066 (int)bp->b_flags 1067 ); 1068 } 1069#endif 1070 bp->b_vp = (struct vnode *) 0; 1071 bp->b_flags &= ~B_PAGING; 1072} 1073 1074void 1075pbreassignbuf(bp, newvp) 1076 struct buf *bp; 1077 struct vnode *newvp; 1078{ 1079#if !defined(MAX_PERF) 1080 if ((bp->b_flags & B_PAGING) == 0) { 1081 panic( 1082 "pbreassignbuf() on non phys bp %p", 1083 bp 1084 ); 1085 } 1086#endif 1087 bp->b_vp = newvp; 1088} 1089 1090/* 1091 * Reassign a buffer from one vnode to another. 1092 * Used to assign file specific control information 1093 * (indirect blocks) to the vnode to which they belong. 1094 */ 1095void 1096reassignbuf(bp, newvp) 1097 register struct buf *bp; 1098 register struct vnode *newvp; 1099{ 1100 struct buflists *listheadp; 1101 int delay; 1102 int s; 1103 1104 if (newvp == NULL) { 1105 printf("reassignbuf: NULL"); 1106 return; 1107 } 1108 1109#if !defined(MAX_PERF) 1110 /* 1111 * B_PAGING flagged buffers cannot be reassigned because their vp 1112 * is not fully linked in. 1113 */ 1114 if (bp->b_flags & B_PAGING) 1115 panic("cannot reassign paging buffer"); 1116#endif 1117 1118 s = splbio(); 1119 /* 1120 * Delete from old vnode list, if on one. 1121 */ 1122 if (bp->b_xflags & (B_VNDIRTY|B_VNCLEAN)) { 1123 if (bp->b_xflags & B_VNDIRTY) 1124 listheadp = &bp->b_vp->v_dirtyblkhd; 1125 else 1126 listheadp = &bp->b_vp->v_cleanblkhd; 1127 TAILQ_REMOVE(listheadp, bp, b_vnbufs); 1128 bp->b_xflags &= ~(B_VNDIRTY|B_VNCLEAN); 1129 if (bp->b_vp != newvp) { 1130 vdrop(bp->b_vp); 1131 bp->b_vp = NULL; /* for clarification */ 1132 } 1133 } 1134 /* 1135 * If dirty, put on list of dirty buffers; otherwise insert onto list 1136 * of clean buffers. 1137 */ 1138 if (bp->b_flags & B_DELWRI) { 1139 struct buf *tbp; 1140 1141 listheadp = &newvp->v_dirtyblkhd; 1142 if ((newvp->v_flag & VONWORKLST) == 0) { 1143 switch (newvp->v_type) { 1144 case VDIR: 1145 delay = dirdelay; 1146 break; 1147 case VBLK: 1148 if (newvp->v_specmountpoint != NULL) { 1149 delay = metadelay; 1150 break; 1151 } 1152 /* fall through */ 1153 default: 1154 delay = filedelay; 1155 } 1156 vn_syncer_add_to_worklist(newvp, delay); 1157 } 1158 bp->b_xflags |= B_VNDIRTY; 1159 tbp = TAILQ_FIRST(listheadp); 1160 if (tbp == NULL || 1161 (bp->b_lblkno >= 0 && tbp->b_lblkno > bp->b_lblkno)) { 1162 TAILQ_INSERT_HEAD(listheadp, bp, b_vnbufs); 1163 } else { 1164 if (bp->b_lblkno >= 0) { 1165 struct buf *ttbp; 1166 while ((ttbp = TAILQ_NEXT(tbp, b_vnbufs)) && 1167 (ttbp->b_lblkno < bp->b_lblkno)) { 1168 tbp = ttbp; 1169 } 1170 TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs); 1171 } else { 1172 TAILQ_INSERT_TAIL(listheadp, bp, b_vnbufs); 1173 } 1174 } 1175 } else { 1176 bp->b_xflags |= B_VNCLEAN; 1177 TAILQ_INSERT_TAIL(&newvp->v_cleanblkhd, bp, b_vnbufs); 1178 if ((newvp->v_flag & VONWORKLST) && 1179 TAILQ_EMPTY(&newvp->v_dirtyblkhd)) { 1180 newvp->v_flag &= ~VONWORKLST; 1181 LIST_REMOVE(newvp, v_synclist); 1182 } 1183 } 1184 if (bp->b_vp != newvp) { 1185 bp->b_vp = newvp; 1186 vhold(bp->b_vp); 1187 } 1188 splx(s); 1189} 1190 1191/* 1192 * Create a vnode for a block device. 1193 * Used for mounting the root file system. 1194 */ 1195int 1196bdevvp(dev, vpp) 1197 dev_t dev; 1198 struct vnode **vpp; 1199{ 1200 register struct vnode *vp; 1201 struct vnode *nvp; 1202 int error; 1203 1204 if (dev == NODEV) { 1205 *vpp = NULLVP; 1206 return (ENXIO); 1207 } 1208 error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp); 1209 if (error) { 1210 *vpp = NULLVP; 1211 return (error); 1212 } 1213 vp = nvp; 1214 vp->v_type = VBLK; 1215 if ((nvp = checkalias(vp, dev2udev(dev), (struct mount *)0)) != NULL) { 1216 vput(vp); 1217 vp = nvp; 1218 } 1219 *vpp = vp; 1220 return (0); 1221} 1222 1223/* 1224 * Check to see if the new vnode represents a special device 1225 * for which we already have a vnode (either because of 1226 * bdevvp() or because of a different vnode representing 1227 * the same block device). If such an alias exists, deallocate 1228 * the existing contents and return the aliased vnode. The 1229 * caller is responsible for filling it with its new contents. 1230 */ 1231struct vnode * 1232checkalias(nvp, nvp_rdev, mp) 1233 register struct vnode *nvp; 1234 udev_t nvp_rdev; 1235 struct mount *mp; 1236{ 1237 struct proc *p = curproc; /* XXX */ 1238 struct vnode *vp; 1239 struct vnode **vpp; 1240 dev_t dev; 1241 1242 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 1243 return (NULLVP); 1244 1245 dev = udev2dev(nvp_rdev, 2); 1246 1247 vpp = &speclisth[SPECHASH(dev)]; 1248loop: 1249 simple_lock(&spechash_slock); 1250 for (vp = *vpp; vp; vp = vp->v_specnext) { 1251 if (dev != vp->v_rdev || nvp->v_type != vp->v_type) 1252 continue; 1253 /* 1254 * Alias, but not in use, so flush it out. 1255 * Only alias active device nodes. 1256 * Not sure why we don't re-use this like we do below. 1257 */ 1258 simple_lock(&vp->v_interlock); 1259 if (vp->v_usecount == 0) { 1260 simple_unlock(&spechash_slock); 1261 vgonel(vp, p); 1262 goto loop; 1263 } 1264 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) { 1265 /* 1266 * It dissappeared, and we may have slept. 1267 * Restart from the beginning 1268 */ 1269 simple_unlock(&spechash_slock); 1270 goto loop; 1271 } 1272 break; 1273 } 1274 /* 1275 * It would be a lot clearer what is going on here if 1276 * this had been expressed as: 1277 * if ( vp && (vp->v_tag == VT_NULL)) 1278 * and the clauses had been swapped. 1279 */ 1280 if (vp == NULL || vp->v_tag != VT_NON) { 1281 struct specinfo *sinfo; 1282 1283 /* 1284 * Put the new vnode into the hash chain. 1285 * and if there was an alias, connect them. 1286 */ 1287 MALLOC(sinfo, struct specinfo *, 1288 sizeof(struct specinfo), M_VNODE, M_WAITOK); 1289 bzero(sinfo, sizeof(struct specinfo)); 1290 nvp->v_specinfo = sinfo; 1291 sinfo->si_rdev = dev; 1292 sinfo->si_hashchain = vpp; 1293 sinfo->si_specnext = *vpp; 1294 sinfo->si_bsize_phys = DEV_BSIZE; 1295 sinfo->si_bsize_best = BLKDEV_IOSIZE; 1296 sinfo->si_bsize_max = MAXBSIZE; 1297 1298 /* 1299 * Ask the device to fix up specinfo. Typically the 1300 * si_bsize_* parameters may need fixing up. 1301 */ 1302 1303 if (nvp->v_type == VBLK) { 1304 if (bdevsw(dev) && bdevsw(dev)->d_parms) 1305 (*bdevsw(dev)->d_parms)(dev, sinfo, DPARM_GET); 1306 } else if (nvp->v_type == VCHR) { 1307 if (devsw(dev) && devsw(dev)->d_parms) 1308 (*devsw(dev)->d_parms)(dev, sinfo, DPARM_GET); 1309 } 1310 1311 simple_unlock(&spechash_slock); 1312 *vpp = nvp; 1313 if (vp != NULLVP) { 1314 nvp->v_flag |= VALIASED; 1315 vp->v_flag |= VALIASED; 1316 vput(vp); 1317 } 1318 return (NULLVP); 1319 } 1320 /* 1321 * if ( vp && (vp->v_tag == VT_NULL)) 1322 * We have a vnode alias, but it is a trashed. 1323 * Make it look like it's newley allocated. (by getnewvnode()) 1324 * The caller should use this instead. 1325 */ 1326 simple_unlock(&spechash_slock); 1327 VOP_UNLOCK(vp, 0, p); 1328 simple_lock(&vp->v_interlock); 1329 vclean(vp, 0, p); 1330 vp->v_op = nvp->v_op; 1331 vp->v_tag = nvp->v_tag; 1332 nvp->v_type = VNON; 1333 insmntque(vp, mp); 1334 return (vp); 1335} 1336 1337/* 1338 * Grab a particular vnode from the free list, increment its 1339 * reference count and lock it. The vnode lock bit is set the 1340 * vnode is being eliminated in vgone. The process is awakened 1341 * when the transition is completed, and an error returned to 1342 * indicate that the vnode is no longer usable (possibly having 1343 * been changed to a new file system type). 1344 */ 1345int 1346vget(vp, flags, p) 1347 register struct vnode *vp; 1348 int flags; 1349 struct proc *p; 1350{ 1351 int error; 1352 1353 /* 1354 * If the vnode is in the process of being cleaned out for 1355 * another use, we wait for the cleaning to finish and then 1356 * return failure. Cleaning is determined by checking that 1357 * the VXLOCK flag is set. 1358 */ 1359 if ((flags & LK_INTERLOCK) == 0) { 1360 simple_lock(&vp->v_interlock); 1361 } 1362 if (vp->v_flag & VXLOCK) { 1363 vp->v_flag |= VXWANT; 1364 simple_unlock(&vp->v_interlock); 1365 tsleep((caddr_t)vp, PINOD, "vget", 0); 1366 return (ENOENT); 1367 } 1368 1369 vp->v_usecount++; 1370 1371 if (VSHOULDBUSY(vp)) 1372 vbusy(vp); 1373 if (flags & LK_TYPE_MASK) { 1374 if ((error = vn_lock(vp, flags | LK_INTERLOCK, p)) != 0) { 1375 /* 1376 * must expand vrele here because we do not want 1377 * to call VOP_INACTIVE if the reference count 1378 * drops back to zero since it was never really 1379 * active. We must remove it from the free list 1380 * before sleeping so that multiple processes do 1381 * not try to recycle it. 1382 */ 1383 simple_lock(&vp->v_interlock); 1384 vp->v_usecount--; 1385 if (VSHOULDFREE(vp)) 1386 vfree(vp); 1387 simple_unlock(&vp->v_interlock); 1388 } 1389 return (error); 1390 } 1391 simple_unlock(&vp->v_interlock); 1392 return (0); 1393} 1394 1395void 1396vref(struct vnode *vp) 1397{ 1398 simple_lock(&vp->v_interlock); 1399 vp->v_usecount++; 1400 simple_unlock(&vp->v_interlock); 1401} 1402 1403/* 1404 * Vnode put/release. 1405 * If count drops to zero, call inactive routine and return to freelist. 1406 */ 1407void 1408vrele(vp) 1409 struct vnode *vp; 1410{ 1411 struct proc *p = curproc; /* XXX */ 1412 1413 KASSERT(vp != NULL, ("vrele: null vp")); 1414 1415 simple_lock(&vp->v_interlock); 1416 1417 if (vp->v_usecount > 1) { 1418 1419 vp->v_usecount--; 1420 simple_unlock(&vp->v_interlock); 1421 1422 return; 1423 } 1424 1425 if (vp->v_usecount == 1) { 1426 1427 vp->v_usecount--; 1428 if (VSHOULDFREE(vp)) 1429 vfree(vp); 1430 /* 1431 * If we are doing a vput, the node is already locked, and we must 1432 * call VOP_INACTIVE with the node locked. So, in the case of 1433 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE. 1434 */ 1435 if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) { 1436 VOP_INACTIVE(vp, p); 1437 } 1438 1439 } else { 1440#ifdef DIAGNOSTIC 1441 vprint("vrele: negative ref count", vp); 1442 simple_unlock(&vp->v_interlock); 1443#endif 1444 panic("vrele: negative ref cnt"); 1445 } 1446} 1447 1448void 1449vput(vp) 1450 struct vnode *vp; 1451{ 1452 struct proc *p = curproc; /* XXX */ 1453 1454 KASSERT(vp != NULL, ("vput: null vp")); 1455 1456 simple_lock(&vp->v_interlock); 1457 1458 if (vp->v_usecount > 1) { 1459 1460 vp->v_usecount--; 1461 VOP_UNLOCK(vp, LK_INTERLOCK, p); 1462 return; 1463 1464 } 1465 1466 if (vp->v_usecount == 1) { 1467 1468 vp->v_usecount--; 1469 if (VSHOULDFREE(vp)) 1470 vfree(vp); 1471 /* 1472 * If we are doing a vput, the node is already locked, and we must 1473 * call VOP_INACTIVE with the node locked. So, in the case of 1474 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE. 1475 */ 1476 simple_unlock(&vp->v_interlock); 1477 VOP_INACTIVE(vp, p); 1478 1479 } else { 1480#ifdef DIAGNOSTIC 1481 vprint("vput: negative ref count", vp); 1482#endif 1483 panic("vput: negative ref cnt"); 1484 } 1485} 1486 1487/* 1488 * Somebody doesn't want the vnode recycled. 1489 */ 1490void 1491vhold(vp) 1492 register struct vnode *vp; 1493{ 1494 int s; 1495 1496 s = splbio(); 1497 vp->v_holdcnt++; 1498 if (VSHOULDBUSY(vp)) 1499 vbusy(vp); 1500 splx(s); 1501} 1502 1503/* 1504 * One less who cares about this vnode. 1505 */ 1506void 1507vdrop(vp) 1508 register struct vnode *vp; 1509{ 1510 int s; 1511 1512 s = splbio(); 1513 if (vp->v_holdcnt <= 0) 1514 panic("vdrop: holdcnt"); 1515 vp->v_holdcnt--; 1516 if (VSHOULDFREE(vp)) 1517 vfree(vp); 1518 splx(s); 1519} 1520 1521/* 1522 * Remove any vnodes in the vnode table belonging to mount point mp. 1523 * 1524 * If MNT_NOFORCE is specified, there should not be any active ones, 1525 * return error if any are found (nb: this is a user error, not a 1526 * system error). If MNT_FORCE is specified, detach any active vnodes 1527 * that are found. 1528 */ 1529#ifdef DIAGNOSTIC 1530static int busyprt = 0; /* print out busy vnodes */ 1531SYSCTL_INT(_debug, OID_AUTO, busyprt, CTLFLAG_RW, &busyprt, 0, ""); 1532#endif 1533 1534int 1535vflush(mp, skipvp, flags) 1536 struct mount *mp; 1537 struct vnode *skipvp; 1538 int flags; 1539{ 1540 struct proc *p = curproc; /* XXX */ 1541 struct vnode *vp, *nvp; 1542 int busy = 0; 1543 1544 simple_lock(&mntvnode_slock); 1545loop: 1546 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { 1547 /* 1548 * Make sure this vnode wasn't reclaimed in getnewvnode(). 1549 * Start over if it has (it won't be on the list anymore). 1550 */ 1551 if (vp->v_mount != mp) 1552 goto loop; 1553 nvp = vp->v_mntvnodes.le_next; 1554 /* 1555 * Skip over a selected vnode. 1556 */ 1557 if (vp == skipvp) 1558 continue; 1559 1560 simple_lock(&vp->v_interlock); 1561 /* 1562 * Skip over a vnodes marked VSYSTEM. 1563 */ 1564 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { 1565 simple_unlock(&vp->v_interlock); 1566 continue; 1567 } 1568 /* 1569 * If WRITECLOSE is set, only flush out regular file vnodes 1570 * open for writing. 1571 */ 1572 if ((flags & WRITECLOSE) && 1573 (vp->v_writecount == 0 || vp->v_type != VREG)) { 1574 simple_unlock(&vp->v_interlock); 1575 continue; 1576 } 1577 1578 /* 1579 * With v_usecount == 0, all we need to do is clear out the 1580 * vnode data structures and we are done. 1581 */ 1582 if (vp->v_usecount == 0) { 1583 simple_unlock(&mntvnode_slock); 1584 vgonel(vp, p); 1585 simple_lock(&mntvnode_slock); 1586 continue; 1587 } 1588 1589 /* 1590 * If FORCECLOSE is set, forcibly close the vnode. For block 1591 * or character devices, revert to an anonymous device. For 1592 * all other files, just kill them. 1593 */ 1594 if (flags & FORCECLOSE) { 1595 simple_unlock(&mntvnode_slock); 1596 if (vp->v_type != VBLK && vp->v_type != VCHR) { 1597 vgonel(vp, p); 1598 } else { 1599 vclean(vp, 0, p); 1600 vp->v_op = spec_vnodeop_p; 1601 insmntque(vp, (struct mount *) 0); 1602 } 1603 simple_lock(&mntvnode_slock); 1604 continue; 1605 } 1606#ifdef DIAGNOSTIC 1607 if (busyprt) 1608 vprint("vflush: busy vnode", vp); 1609#endif 1610 simple_unlock(&vp->v_interlock); 1611 busy++; 1612 } 1613 simple_unlock(&mntvnode_slock); 1614 if (busy) 1615 return (EBUSY); 1616 return (0); 1617} 1618 1619/* 1620 * Disassociate the underlying file system from a vnode. 1621 */ 1622static void 1623vclean(vp, flags, p) 1624 struct vnode *vp; 1625 int flags; 1626 struct proc *p; 1627{ 1628 int active; 1629 vm_object_t obj; 1630 1631 /* 1632 * Check to see if the vnode is in use. If so we have to reference it 1633 * before we clean it out so that its count cannot fall to zero and 1634 * generate a race against ourselves to recycle it. 1635 */ 1636 if ((active = vp->v_usecount)) 1637 vp->v_usecount++; 1638 1639 /* 1640 * Prevent the vnode from being recycled or brought into use while we 1641 * clean it out. 1642 */ 1643 if (vp->v_flag & VXLOCK) 1644 panic("vclean: deadlock"); 1645 vp->v_flag |= VXLOCK; 1646 /* 1647 * Even if the count is zero, the VOP_INACTIVE routine may still 1648 * have the object locked while it cleans it out. The VOP_LOCK 1649 * ensures that the VOP_INACTIVE routine is done with its work. 1650 * For active vnodes, it ensures that no other activity can 1651 * occur while the underlying object is being cleaned out. 1652 */ 1653 VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p); 1654 1655 /* 1656 * Clean out any buffers associated with the vnode. 1657 */ 1658 vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0); 1659 if ((obj = vp->v_object) != NULL) { 1660 if (obj->ref_count == 0) { 1661 /* 1662 * This is a normal way of shutting down the object/vnode 1663 * association. 1664 */ 1665 vm_object_terminate(obj); 1666 } else { 1667 /* 1668 * Woe to the process that tries to page now :-). 1669 */ 1670 vm_pager_deallocate(obj); 1671 } 1672 } 1673 1674 /* 1675 * If purging an active vnode, it must be closed and 1676 * deactivated before being reclaimed. Note that the 1677 * VOP_INACTIVE will unlock the vnode. 1678 */ 1679 if (active) { 1680 if (flags & DOCLOSE) 1681 VOP_CLOSE(vp, FNONBLOCK, NOCRED, p); 1682 VOP_INACTIVE(vp, p); 1683 } else { 1684 /* 1685 * Any other processes trying to obtain this lock must first 1686 * wait for VXLOCK to clear, then call the new lock operation. 1687 */ 1688 VOP_UNLOCK(vp, 0, p); 1689 } 1690 /* 1691 * Reclaim the vnode. 1692 */ 1693 if (VOP_RECLAIM(vp, p)) 1694 panic("vclean: cannot reclaim"); 1695 1696 if (active) 1697 vrele(vp); 1698 1699 cache_purge(vp); 1700 if (vp->v_vnlock) { 1701#if 0 /* This is the only place we have LK_DRAINED in the entire kernel ??? */ 1702#ifdef DIAGNOSTIC 1703 if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0) 1704 vprint("vclean: lock not drained", vp); 1705#endif 1706#endif 1707 FREE(vp->v_vnlock, M_VNODE); 1708 vp->v_vnlock = NULL; 1709 } 1710 1711 if (VSHOULDFREE(vp)) 1712 vfree(vp); 1713 1714 /* 1715 * Done with purge, notify sleepers of the grim news. 1716 */ 1717 vp->v_op = dead_vnodeop_p; 1718 vn_pollgone(vp); 1719 vp->v_tag = VT_NON; 1720 vp->v_flag &= ~VXLOCK; 1721 if (vp->v_flag & VXWANT) { 1722 vp->v_flag &= ~VXWANT; 1723 wakeup((caddr_t) vp); 1724 } 1725} 1726 1727/* 1728 * Eliminate all activity associated with the requested vnode 1729 * and with all vnodes aliased to the requested vnode. 1730 */ 1731int 1732vop_revoke(ap) 1733 struct vop_revoke_args /* { 1734 struct vnode *a_vp; 1735 int a_flags; 1736 } */ *ap; 1737{ 1738 struct vnode *vp, *vq; 1739 struct proc *p = curproc; /* XXX */ 1740 1741 KASSERT((ap->a_flags & REVOKEALL) != 0, ("vop_revoke")); 1742 1743 vp = ap->a_vp; 1744 simple_lock(&vp->v_interlock); 1745 1746 if (vp->v_flag & VALIASED) { 1747 /* 1748 * If a vgone (or vclean) is already in progress, 1749 * wait until it is done and return. 1750 */ 1751 if (vp->v_flag & VXLOCK) { 1752 vp->v_flag |= VXWANT; 1753 simple_unlock(&vp->v_interlock); 1754 tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0); 1755 return (0); 1756 } 1757 /* 1758 * Ensure that vp will not be vgone'd while we 1759 * are eliminating its aliases. 1760 */ 1761 vp->v_flag |= VXLOCK; 1762 simple_unlock(&vp->v_interlock); 1763 while (vp->v_flag & VALIASED) { 1764 simple_lock(&spechash_slock); 1765 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1766 if (vq->v_rdev != vp->v_rdev || 1767 vq->v_type != vp->v_type || vp == vq) 1768 continue; 1769 simple_unlock(&spechash_slock); 1770 vgone(vq); 1771 break; 1772 } 1773 if (vq == NULLVP) { 1774 simple_unlock(&spechash_slock); 1775 } 1776 } 1777 /* 1778 * Remove the lock so that vgone below will 1779 * really eliminate the vnode after which time 1780 * vgone will awaken any sleepers. 1781 */ 1782 simple_lock(&vp->v_interlock); 1783 vp->v_flag &= ~VXLOCK; 1784 if (vp->v_flag & VXWANT) { 1785 vp->v_flag &= ~VXWANT; 1786 wakeup(vp); 1787 } 1788 } 1789 vgonel(vp, p); 1790 return (0); 1791} 1792 1793/* 1794 * Recycle an unused vnode to the front of the free list. 1795 * Release the passed interlock if the vnode will be recycled. 1796 */ 1797int 1798vrecycle(vp, inter_lkp, p) 1799 struct vnode *vp; 1800 struct simplelock *inter_lkp; 1801 struct proc *p; 1802{ 1803 1804 simple_lock(&vp->v_interlock); 1805 if (vp->v_usecount == 0) { 1806 if (inter_lkp) { 1807 simple_unlock(inter_lkp); 1808 } 1809 vgonel(vp, p); 1810 return (1); 1811 } 1812 simple_unlock(&vp->v_interlock); 1813 return (0); 1814} 1815 1816/* 1817 * Eliminate all activity associated with a vnode 1818 * in preparation for reuse. 1819 */ 1820void 1821vgone(vp) 1822 register struct vnode *vp; 1823{ 1824 struct proc *p = curproc; /* XXX */ 1825 1826 simple_lock(&vp->v_interlock); 1827 vgonel(vp, p); 1828} 1829 1830/* 1831 * vgone, with the vp interlock held. 1832 */ 1833static void 1834vgonel(vp, p) 1835 struct vnode *vp; 1836 struct proc *p; 1837{ 1838 int s; 1839 struct vnode *vq; 1840 struct vnode *vx; 1841 1842 /* 1843 * If a vgone (or vclean) is already in progress, 1844 * wait until it is done and return. 1845 */ 1846 if (vp->v_flag & VXLOCK) { 1847 vp->v_flag |= VXWANT; 1848 simple_unlock(&vp->v_interlock); 1849 tsleep((caddr_t)vp, PINOD, "vgone", 0); 1850 return; 1851 } 1852 1853 /* 1854 * Clean out the filesystem specific data. 1855 */ 1856 vclean(vp, DOCLOSE, p); 1857 simple_lock(&vp->v_interlock); 1858 1859 /* 1860 * Delete from old mount point vnode list, if on one. 1861 */ 1862 if (vp->v_mount != NULL) 1863 insmntque(vp, (struct mount *)0); 1864 /* 1865 * If special device, remove it from special device alias list 1866 * if it is on one. 1867 */ 1868 if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) { 1869 simple_lock(&spechash_slock); 1870 if (*vp->v_hashchain == vp) { 1871 *vp->v_hashchain = vp->v_specnext; 1872 } else { 1873 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1874 if (vq->v_specnext != vp) 1875 continue; 1876 vq->v_specnext = vp->v_specnext; 1877 break; 1878 } 1879 if (vq == NULL) 1880 panic("missing bdev"); 1881 } 1882 if (vp->v_flag & VALIASED) { 1883 vx = NULL; 1884 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1885 if (vq->v_rdev != vp->v_rdev || 1886 vq->v_type != vp->v_type) 1887 continue; 1888 if (vx) 1889 break; 1890 vx = vq; 1891 } 1892 if (vx == NULL) 1893 panic("missing alias"); 1894 if (vq == NULL) 1895 vx->v_flag &= ~VALIASED; 1896 vp->v_flag &= ~VALIASED; 1897 } 1898 simple_unlock(&spechash_slock); 1899 FREE(vp->v_specinfo, M_VNODE); 1900 vp->v_specinfo = NULL; 1901 } 1902 1903 /* 1904 * If it is on the freelist and not already at the head, 1905 * move it to the head of the list. The test of the back 1906 * pointer and the reference count of zero is because 1907 * it will be removed from the free list by getnewvnode, 1908 * but will not have its reference count incremented until 1909 * after calling vgone. If the reference count were 1910 * incremented first, vgone would (incorrectly) try to 1911 * close the previous instance of the underlying object. 1912 */ 1913 if (vp->v_usecount == 0 && !(vp->v_flag & VDOOMED)) { 1914 s = splbio(); 1915 simple_lock(&vnode_free_list_slock); 1916 if (vp->v_flag & VFREE) { 1917 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1918 } else if (vp->v_flag & VTBFREE) { 1919 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 1920 vp->v_flag &= ~VTBFREE; 1921 freevnodes++; 1922 } else 1923 freevnodes++; 1924 vp->v_flag |= VFREE; 1925 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 1926 simple_unlock(&vnode_free_list_slock); 1927 splx(s); 1928 } 1929 1930 vp->v_type = VBAD; 1931 simple_unlock(&vp->v_interlock); 1932} 1933 1934/* 1935 * Lookup a vnode by device number. 1936 */ 1937int 1938vfinddev(dev, type, vpp) 1939 dev_t dev; 1940 enum vtype type; 1941 struct vnode **vpp; 1942{ 1943 register struct vnode *vp; 1944 int rc = 0; 1945 1946 simple_lock(&spechash_slock); 1947 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 1948 if (dev != vp->v_rdev || type != vp->v_type) 1949 continue; 1950 *vpp = vp; 1951 rc = 1; 1952 break; 1953 } 1954 simple_unlock(&spechash_slock); 1955 return (rc); 1956} 1957 1958/* 1959 * Calculate the total number of references to a special device. 1960 */ 1961int 1962vcount(vp) 1963 register struct vnode *vp; 1964{ 1965 struct vnode *vq, *vnext; 1966 int count; 1967 1968loop: 1969 if ((vp->v_flag & VALIASED) == 0) 1970 return (vp->v_usecount); 1971 simple_lock(&spechash_slock); 1972 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { 1973 vnext = vq->v_specnext; 1974 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 1975 continue; 1976 /* 1977 * Alias, but not in use, so flush it out. 1978 */ 1979 if (vq->v_usecount == 0 && vq != vp) { 1980 simple_unlock(&spechash_slock); 1981 vgone(vq); 1982 goto loop; 1983 } 1984 count += vq->v_usecount; 1985 } 1986 simple_unlock(&spechash_slock); 1987 return (count); 1988} 1989/* 1990 * Print out a description of a vnode. 1991 */ 1992static char *typename[] = 1993{"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"}; 1994 1995void 1996vprint(label, vp) 1997 char *label; 1998 register struct vnode *vp; 1999{ 2000 char buf[96]; 2001 2002 if (label != NULL) 2003 printf("%s: %p: ", label, (void *)vp); 2004 else 2005 printf("%p: ", (void *)vp); 2006 printf("type %s, usecount %d, writecount %d, refcount %d,", 2007 typename[vp->v_type], vp->v_usecount, vp->v_writecount, 2008 vp->v_holdcnt); 2009 buf[0] = '\0'; 2010 if (vp->v_flag & VROOT) 2011 strcat(buf, "|VROOT"); 2012 if (vp->v_flag & VTEXT) 2013 strcat(buf, "|VTEXT"); 2014 if (vp->v_flag & VSYSTEM) 2015 strcat(buf, "|VSYSTEM"); 2016 if (vp->v_flag & VXLOCK) 2017 strcat(buf, "|VXLOCK"); 2018 if (vp->v_flag & VXWANT) 2019 strcat(buf, "|VXWANT"); 2020 if (vp->v_flag & VBWAIT) 2021 strcat(buf, "|VBWAIT"); 2022 if (vp->v_flag & VALIASED) 2023 strcat(buf, "|VALIASED"); 2024 if (vp->v_flag & VDOOMED) 2025 strcat(buf, "|VDOOMED"); 2026 if (vp->v_flag & VFREE) 2027 strcat(buf, "|VFREE"); 2028 if (vp->v_flag & VOBJBUF) 2029 strcat(buf, "|VOBJBUF"); 2030 if (buf[0] != '\0') 2031 printf(" flags (%s)", &buf[1]); 2032 if (vp->v_data == NULL) { 2033 printf("\n"); 2034 } else { 2035 printf("\n\t"); 2036 VOP_PRINT(vp); 2037 } 2038} 2039 2040#ifdef DDB 2041#include <ddb/ddb.h> 2042/* 2043 * List all of the locked vnodes in the system. 2044 * Called when debugging the kernel. 2045 */ 2046DB_SHOW_COMMAND(lockedvnodes, lockedvnodes) 2047{ 2048 struct proc *p = curproc; /* XXX */ 2049 struct mount *mp, *nmp; 2050 struct vnode *vp; 2051 2052 printf("Locked vnodes\n"); 2053 simple_lock(&mountlist_slock); 2054 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 2055 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { 2056 nmp = mp->mnt_list.cqe_next; 2057 continue; 2058 } 2059 for (vp = mp->mnt_vnodelist.lh_first; 2060 vp != NULL; 2061 vp = vp->v_mntvnodes.le_next) { 2062 if (VOP_ISLOCKED(vp)) 2063 vprint((char *)0, vp); 2064 } 2065 simple_lock(&mountlist_slock); 2066 nmp = mp->mnt_list.cqe_next; 2067 vfs_unbusy(mp, p); 2068 } 2069 simple_unlock(&mountlist_slock); 2070} 2071#endif 2072 2073/* 2074 * Top level filesystem related information gathering. 2075 */ 2076static int sysctl_ovfs_conf __P(SYSCTL_HANDLER_ARGS); 2077 2078static int 2079vfs_sysctl SYSCTL_HANDLER_ARGS 2080{ 2081 int *name = (int *)arg1 - 1; /* XXX */ 2082 u_int namelen = arg2 + 1; /* XXX */ 2083 struct vfsconf *vfsp; 2084 2085#if 1 || defined(COMPAT_PRELITE2) 2086 /* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. */ 2087 if (namelen == 1) 2088 return (sysctl_ovfs_conf(oidp, arg1, arg2, req)); 2089#endif 2090 2091#ifdef notyet 2092 /* all sysctl names at this level are at least name and field */ 2093 if (namelen < 2) 2094 return (ENOTDIR); /* overloaded */ 2095 if (name[0] != VFS_GENERIC) { 2096 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 2097 if (vfsp->vfc_typenum == name[0]) 2098 break; 2099 if (vfsp == NULL) 2100 return (EOPNOTSUPP); 2101 return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1, 2102 oldp, oldlenp, newp, newlen, p)); 2103 } 2104#endif 2105 switch (name[1]) { 2106 case VFS_MAXTYPENUM: 2107 if (namelen != 2) 2108 return (ENOTDIR); 2109 return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int))); 2110 case VFS_CONF: 2111 if (namelen != 3) 2112 return (ENOTDIR); /* overloaded */ 2113 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 2114 if (vfsp->vfc_typenum == name[2]) 2115 break; 2116 if (vfsp == NULL) 2117 return (EOPNOTSUPP); 2118 return (SYSCTL_OUT(req, vfsp, sizeof *vfsp)); 2119 } 2120 return (EOPNOTSUPP); 2121} 2122 2123SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD, vfs_sysctl, 2124 "Generic filesystem"); 2125 2126#if 1 || defined(COMPAT_PRELITE2) 2127 2128static int 2129sysctl_ovfs_conf SYSCTL_HANDLER_ARGS 2130{ 2131 int error; 2132 struct vfsconf *vfsp; 2133 struct ovfsconf ovfs; 2134 2135 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 2136 ovfs.vfc_vfsops = vfsp->vfc_vfsops; /* XXX used as flag */ 2137 strcpy(ovfs.vfc_name, vfsp->vfc_name); 2138 ovfs.vfc_index = vfsp->vfc_typenum; 2139 ovfs.vfc_refcount = vfsp->vfc_refcount; 2140 ovfs.vfc_flags = vfsp->vfc_flags; 2141 error = SYSCTL_OUT(req, &ovfs, sizeof ovfs); 2142 if (error) 2143 return error; 2144 } 2145 return 0; 2146} 2147 2148#endif /* 1 || COMPAT_PRELITE2 */ 2149 2150#if 0 2151#define KINFO_VNODESLOP 10 2152/* 2153 * Dump vnode list (via sysctl). 2154 * Copyout address of vnode followed by vnode. 2155 */ 2156/* ARGSUSED */ 2157static int 2158sysctl_vnode SYSCTL_HANDLER_ARGS 2159{ 2160 struct proc *p = curproc; /* XXX */ 2161 struct mount *mp, *nmp; 2162 struct vnode *nvp, *vp; 2163 int error; 2164 2165#define VPTRSZ sizeof (struct vnode *) 2166#define VNODESZ sizeof (struct vnode) 2167 2168 req->lock = 0; 2169 if (!req->oldptr) /* Make an estimate */ 2170 return (SYSCTL_OUT(req, 0, 2171 (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ))); 2172 2173 simple_lock(&mountlist_slock); 2174 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 2175 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { 2176 nmp = mp->mnt_list.cqe_next; 2177 continue; 2178 } 2179again: 2180 simple_lock(&mntvnode_slock); 2181 for (vp = mp->mnt_vnodelist.lh_first; 2182 vp != NULL; 2183 vp = nvp) { 2184 /* 2185 * Check that the vp is still associated with 2186 * this filesystem. RACE: could have been 2187 * recycled onto the same filesystem. 2188 */ 2189 if (vp->v_mount != mp) { 2190 simple_unlock(&mntvnode_slock); 2191 goto again; 2192 } 2193 nvp = vp->v_mntvnodes.le_next; 2194 simple_unlock(&mntvnode_slock); 2195 if ((error = SYSCTL_OUT(req, &vp, VPTRSZ)) || 2196 (error = SYSCTL_OUT(req, vp, VNODESZ))) 2197 return (error); 2198 simple_lock(&mntvnode_slock); 2199 } 2200 simple_unlock(&mntvnode_slock); 2201 simple_lock(&mountlist_slock); 2202 nmp = mp->mnt_list.cqe_next; 2203 vfs_unbusy(mp, p); 2204 } 2205 simple_unlock(&mountlist_slock); 2206 2207 return (0); 2208} 2209#endif 2210 2211/* 2212 * XXX 2213 * Exporting the vnode list on large systems causes them to crash. 2214 * Exporting the vnode list on medium systems causes sysctl to coredump. 2215 */ 2216#if 0 2217SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE|CTLFLAG_RD, 2218 0, 0, sysctl_vnode, "S,vnode", ""); 2219#endif 2220 2221/* 2222 * Check to see if a filesystem is mounted on a block device. 2223 */ 2224int 2225vfs_mountedon(vp) 2226 struct vnode *vp; 2227{ 2228 struct vnode *vq; 2229 int error = 0; 2230 2231 if (vp->v_specmountpoint != NULL) 2232 return (EBUSY); 2233 if (vp->v_flag & VALIASED) { 2234 simple_lock(&spechash_slock); 2235 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 2236 if (vq->v_rdev != vp->v_rdev || 2237 vq->v_type != vp->v_type) 2238 continue; 2239 if (vq->v_specmountpoint != NULL) { 2240 error = EBUSY; 2241 break; 2242 } 2243 } 2244 simple_unlock(&spechash_slock); 2245 } 2246 return (error); 2247} 2248 2249/* 2250 * Unmount all filesystems. The list is traversed in reverse order 2251 * of mounting to avoid dependencies. 2252 */ 2253void 2254vfs_unmountall() 2255{ 2256 struct mount *mp, *nmp; 2257 struct proc *p; 2258 int error; 2259 2260 if (curproc != NULL) 2261 p = curproc; 2262 else 2263 p = initproc; /* XXX XXX should this be proc0? */ 2264 /* 2265 * Since this only runs when rebooting, it is not interlocked. 2266 */ 2267 for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { 2268 nmp = mp->mnt_list.cqe_prev; 2269 error = dounmount(mp, MNT_FORCE, p); 2270 if (error) { 2271 printf("unmount of %s failed (", 2272 mp->mnt_stat.f_mntonname); 2273 if (error == EBUSY) 2274 printf("BUSY)\n"); 2275 else 2276 printf("%d)\n", error); 2277 } 2278 } 2279} 2280 2281/* 2282 * Build hash lists of net addresses and hang them off the mount point. 2283 * Called by ufs_mount() to set up the lists of export addresses. 2284 */ 2285static int 2286vfs_hang_addrlist(mp, nep, argp) 2287 struct mount *mp; 2288 struct netexport *nep; 2289 struct export_args *argp; 2290{ 2291 register struct netcred *np; 2292 register struct radix_node_head *rnh; 2293 register int i; 2294 struct radix_node *rn; 2295 struct sockaddr *saddr, *smask = 0; 2296 struct domain *dom; 2297 int error; 2298 2299 if (argp->ex_addrlen == 0) { 2300 if (mp->mnt_flag & MNT_DEFEXPORTED) 2301 return (EPERM); 2302 np = &nep->ne_defexported; 2303 np->netc_exflags = argp->ex_flags; 2304 np->netc_anon = argp->ex_anon; 2305 np->netc_anon.cr_ref = 1; 2306 mp->mnt_flag |= MNT_DEFEXPORTED; 2307 return (0); 2308 } 2309 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 2310 np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK); 2311 bzero((caddr_t) np, i); 2312 saddr = (struct sockaddr *) (np + 1); 2313 if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen))) 2314 goto out; 2315 if (saddr->sa_len > argp->ex_addrlen) 2316 saddr->sa_len = argp->ex_addrlen; 2317 if (argp->ex_masklen) { 2318 smask = (struct sockaddr *) ((caddr_t) saddr + argp->ex_addrlen); 2319 error = copyin(argp->ex_mask, (caddr_t) smask, argp->ex_masklen); 2320 if (error) 2321 goto out; 2322 if (smask->sa_len > argp->ex_masklen) 2323 smask->sa_len = argp->ex_masklen; 2324 } 2325 i = saddr->sa_family; 2326 if ((rnh = nep->ne_rtable[i]) == 0) { 2327 /* 2328 * Seems silly to initialize every AF when most are not used, 2329 * do so on demand here 2330 */ 2331 for (dom = domains; dom; dom = dom->dom_next) 2332 if (dom->dom_family == i && dom->dom_rtattach) { 2333 dom->dom_rtattach((void **) &nep->ne_rtable[i], 2334 dom->dom_rtoffset); 2335 break; 2336 } 2337 if ((rnh = nep->ne_rtable[i]) == 0) { 2338 error = ENOBUFS; 2339 goto out; 2340 } 2341 } 2342 rn = (*rnh->rnh_addaddr) ((caddr_t) saddr, (caddr_t) smask, rnh, 2343 np->netc_rnodes); 2344 if (rn == 0 || np != (struct netcred *) rn) { /* already exists */ 2345 error = EPERM; 2346 goto out; 2347 } 2348 np->netc_exflags = argp->ex_flags; 2349 np->netc_anon = argp->ex_anon; 2350 np->netc_anon.cr_ref = 1; 2351 return (0); 2352out: 2353 free(np, M_NETADDR); 2354 return (error); 2355} 2356 2357/* ARGSUSED */ 2358static int 2359vfs_free_netcred(rn, w) 2360 struct radix_node *rn; 2361 void *w; 2362{ 2363 register struct radix_node_head *rnh = (struct radix_node_head *) w; 2364 2365 (*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh); 2366 free((caddr_t) rn, M_NETADDR); 2367 return (0); 2368} 2369 2370/* 2371 * Free the net address hash lists that are hanging off the mount points. 2372 */ 2373static void 2374vfs_free_addrlist(nep) 2375 struct netexport *nep; 2376{ 2377 register int i; 2378 register struct radix_node_head *rnh; 2379 2380 for (i = 0; i <= AF_MAX; i++) 2381 if ((rnh = nep->ne_rtable[i])) { 2382 (*rnh->rnh_walktree) (rnh, vfs_free_netcred, 2383 (caddr_t) rnh); 2384 free((caddr_t) rnh, M_RTABLE); 2385 nep->ne_rtable[i] = 0; 2386 } 2387} 2388 2389int 2390vfs_export(mp, nep, argp) 2391 struct mount *mp; 2392 struct netexport *nep; 2393 struct export_args *argp; 2394{ 2395 int error; 2396 2397 if (argp->ex_flags & MNT_DELEXPORT) { 2398 if (mp->mnt_flag & MNT_EXPUBLIC) { 2399 vfs_setpublicfs(NULL, NULL, NULL); 2400 mp->mnt_flag &= ~MNT_EXPUBLIC; 2401 } 2402 vfs_free_addrlist(nep); 2403 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 2404 } 2405 if (argp->ex_flags & MNT_EXPORTED) { 2406 if (argp->ex_flags & MNT_EXPUBLIC) { 2407 if ((error = vfs_setpublicfs(mp, nep, argp)) != 0) 2408 return (error); 2409 mp->mnt_flag |= MNT_EXPUBLIC; 2410 } 2411 if ((error = vfs_hang_addrlist(mp, nep, argp))) 2412 return (error); 2413 mp->mnt_flag |= MNT_EXPORTED; 2414 } 2415 return (0); 2416} 2417 2418 2419/* 2420 * Set the publicly exported filesystem (WebNFS). Currently, only 2421 * one public filesystem is possible in the spec (RFC 2054 and 2055) 2422 */ 2423int 2424vfs_setpublicfs(mp, nep, argp) 2425 struct mount *mp; 2426 struct netexport *nep; 2427 struct export_args *argp; 2428{ 2429 int error; 2430 struct vnode *rvp; 2431 char *cp; 2432 2433 /* 2434 * mp == NULL -> invalidate the current info, the FS is 2435 * no longer exported. May be called from either vfs_export 2436 * or unmount, so check if it hasn't already been done. 2437 */ 2438 if (mp == NULL) { 2439 if (nfs_pub.np_valid) { 2440 nfs_pub.np_valid = 0; 2441 if (nfs_pub.np_index != NULL) { 2442 FREE(nfs_pub.np_index, M_TEMP); 2443 nfs_pub.np_index = NULL; 2444 } 2445 } 2446 return (0); 2447 } 2448 2449 /* 2450 * Only one allowed at a time. 2451 */ 2452 if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount) 2453 return (EBUSY); 2454 2455 /* 2456 * Get real filehandle for root of exported FS. 2457 */ 2458 bzero((caddr_t)&nfs_pub.np_handle, sizeof(nfs_pub.np_handle)); 2459 nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid; 2460 2461 if ((error = VFS_ROOT(mp, &rvp))) 2462 return (error); 2463 2464 if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid))) 2465 return (error); 2466 2467 vput(rvp); 2468 2469 /* 2470 * If an indexfile was specified, pull it in. 2471 */ 2472 if (argp->ex_indexfile != NULL) { 2473 MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP, 2474 M_WAITOK); 2475 error = copyinstr(argp->ex_indexfile, nfs_pub.np_index, 2476 MAXNAMLEN, (size_t *)0); 2477 if (!error) { 2478 /* 2479 * Check for illegal filenames. 2480 */ 2481 for (cp = nfs_pub.np_index; *cp; cp++) { 2482 if (*cp == '/') { 2483 error = EINVAL; 2484 break; 2485 } 2486 } 2487 } 2488 if (error) { 2489 FREE(nfs_pub.np_index, M_TEMP); 2490 return (error); 2491 } 2492 } 2493 2494 nfs_pub.np_mount = mp; 2495 nfs_pub.np_valid = 1; 2496 return (0); 2497} 2498 2499struct netcred * 2500vfs_export_lookup(mp, nep, nam) 2501 register struct mount *mp; 2502 struct netexport *nep; 2503 struct sockaddr *nam; 2504{ 2505 register struct netcred *np; 2506 register struct radix_node_head *rnh; 2507 struct sockaddr *saddr; 2508 2509 np = NULL; 2510 if (mp->mnt_flag & MNT_EXPORTED) { 2511 /* 2512 * Lookup in the export list first. 2513 */ 2514 if (nam != NULL) { 2515 saddr = nam; 2516 rnh = nep->ne_rtable[saddr->sa_family]; 2517 if (rnh != NULL) { 2518 np = (struct netcred *) 2519 (*rnh->rnh_matchaddr)((caddr_t)saddr, 2520 rnh); 2521 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 2522 np = NULL; 2523 } 2524 } 2525 /* 2526 * If no address match, use the default if it exists. 2527 */ 2528 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 2529 np = &nep->ne_defexported; 2530 } 2531 return (np); 2532} 2533 2534/* 2535 * perform msync on all vnodes under a mount point 2536 * the mount point must be locked. 2537 */ 2538void 2539vfs_msync(struct mount *mp, int flags) { 2540 struct vnode *vp, *nvp; 2541 struct vm_object *obj; 2542 int anyio, tries; 2543 2544 tries = 5; 2545loop: 2546 anyio = 0; 2547 for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { 2548 2549 nvp = vp->v_mntvnodes.le_next; 2550 2551 if (vp->v_mount != mp) { 2552 goto loop; 2553 } 2554 2555 if (vp->v_flag & VXLOCK) /* XXX: what if MNT_WAIT? */ 2556 continue; 2557 2558 if (flags != MNT_WAIT) { 2559 obj = vp->v_object; 2560 if (obj == NULL || (obj->flags & OBJ_MIGHTBEDIRTY) == 0) 2561 continue; 2562 if (VOP_ISLOCKED(vp)) 2563 continue; 2564 } 2565 2566 simple_lock(&vp->v_interlock); 2567 if (vp->v_object && 2568 (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) { 2569 if (!vget(vp, 2570 LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY | LK_NOOBJ, curproc)) { 2571 if (vp->v_object) { 2572 vm_object_page_clean(vp->v_object, 0, 0, flags == MNT_WAIT ? OBJPC_SYNC : 0); 2573 anyio = 1; 2574 } 2575 vput(vp); 2576 } 2577 } else { 2578 simple_unlock(&vp->v_interlock); 2579 } 2580 } 2581 if (anyio && (--tries > 0)) 2582 goto loop; 2583} 2584 2585/* 2586 * Create the VM object needed for VMIO and mmap support. This 2587 * is done for all VREG files in the system. Some filesystems might 2588 * afford the additional metadata buffering capability of the 2589 * VMIO code by making the device node be VMIO mode also. 2590 * 2591 * vp must be locked when vfs_object_create is called. 2592 */ 2593int 2594vfs_object_create(vp, p, cred) 2595 struct vnode *vp; 2596 struct proc *p; 2597 struct ucred *cred; 2598{ 2599 struct vattr vat; 2600 vm_object_t object; 2601 int error = 0; 2602 2603 if ((vp->v_type != VREG) && (vp->v_type != VBLK)) 2604 return 0; 2605 2606retry: 2607 if ((object = vp->v_object) == NULL) { 2608 if (vp->v_type == VREG) { 2609 if ((error = VOP_GETATTR(vp, &vat, cred, p)) != 0) 2610 goto retn; 2611 object = vnode_pager_alloc(vp, vat.va_size, 0, 0); 2612 } else if (bdevsw(vp->v_rdev) != NULL) { 2613 /* 2614 * This simply allocates the biggest object possible 2615 * for a VBLK vnode. This should be fixed, but doesn't 2616 * cause any problems (yet). 2617 */ 2618 object = vnode_pager_alloc(vp, IDX_TO_OFF(INT_MAX), 0, 0); 2619 } else { 2620 goto retn; 2621 } 2622 /* 2623 * Dereference the reference we just created. This assumes 2624 * that the object is associated with the vp. 2625 */ 2626 object->ref_count--; 2627 vp->v_usecount--; 2628 } else { 2629 if (object->flags & OBJ_DEAD) { 2630 VOP_UNLOCK(vp, 0, p); 2631 tsleep(object, PVM, "vodead", 0); 2632 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 2633 goto retry; 2634 } 2635 } 2636 2637 KASSERT(vp->v_object != NULL, ("vfs_object_create: NULL object")); 2638 vp->v_flag |= VOBJBUF; 2639 2640retn: 2641 return error; 2642} 2643 2644static void 2645vfree(vp) 2646 struct vnode *vp; 2647{ 2648 int s; 2649 2650 s = splbio(); 2651 simple_lock(&vnode_free_list_slock); 2652 if (vp->v_flag & VTBFREE) { 2653 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 2654 vp->v_flag &= ~VTBFREE; 2655 } 2656 if (vp->v_flag & VAGE) { 2657 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 2658 } else { 2659 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 2660 } 2661 freevnodes++; 2662 simple_unlock(&vnode_free_list_slock); 2663 vp->v_flag &= ~VAGE; 2664 vp->v_flag |= VFREE; 2665 splx(s); 2666} 2667 2668void 2669vbusy(vp) 2670 struct vnode *vp; 2671{ 2672 int s; 2673 2674 s = splbio(); 2675 simple_lock(&vnode_free_list_slock); 2676 if (vp->v_flag & VTBFREE) { 2677 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 2678 vp->v_flag &= ~VTBFREE; 2679 } else { 2680 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 2681 freevnodes--; 2682 } 2683 simple_unlock(&vnode_free_list_slock); 2684 vp->v_flag &= ~(VFREE|VAGE); 2685 splx(s); 2686} 2687 2688/* 2689 * Record a process's interest in events which might happen to 2690 * a vnode. Because poll uses the historic select-style interface 2691 * internally, this routine serves as both the ``check for any 2692 * pending events'' and the ``record my interest in future events'' 2693 * functions. (These are done together, while the lock is held, 2694 * to avoid race conditions.) 2695 */ 2696int 2697vn_pollrecord(vp, p, events) 2698 struct vnode *vp; 2699 struct proc *p; 2700 short events; 2701{ 2702 simple_lock(&vp->v_pollinfo.vpi_lock); 2703 if (vp->v_pollinfo.vpi_revents & events) { 2704 /* 2705 * This leaves events we are not interested 2706 * in available for the other process which 2707 * which presumably had requested them 2708 * (otherwise they would never have been 2709 * recorded). 2710 */ 2711 events &= vp->v_pollinfo.vpi_revents; 2712 vp->v_pollinfo.vpi_revents &= ~events; 2713 2714 simple_unlock(&vp->v_pollinfo.vpi_lock); 2715 return events; 2716 } 2717 vp->v_pollinfo.vpi_events |= events; 2718 selrecord(p, &vp->v_pollinfo.vpi_selinfo); 2719 simple_unlock(&vp->v_pollinfo.vpi_lock); 2720 return 0; 2721} 2722 2723/* 2724 * Note the occurrence of an event. If the VN_POLLEVENT macro is used, 2725 * it is possible for us to miss an event due to race conditions, but 2726 * that condition is expected to be rare, so for the moment it is the 2727 * preferred interface. 2728 */ 2729void 2730vn_pollevent(vp, events) 2731 struct vnode *vp; 2732 short events; 2733{ 2734 simple_lock(&vp->v_pollinfo.vpi_lock); 2735 if (vp->v_pollinfo.vpi_events & events) { 2736 /* 2737 * We clear vpi_events so that we don't 2738 * call selwakeup() twice if two events are 2739 * posted before the polling process(es) is 2740 * awakened. This also ensures that we take at 2741 * most one selwakeup() if the polling process 2742 * is no longer interested. However, it does 2743 * mean that only one event can be noticed at 2744 * a time. (Perhaps we should only clear those 2745 * event bits which we note?) XXX 2746 */ 2747 vp->v_pollinfo.vpi_events = 0; /* &= ~events ??? */ 2748 vp->v_pollinfo.vpi_revents |= events; 2749 selwakeup(&vp->v_pollinfo.vpi_selinfo); 2750 } 2751 simple_unlock(&vp->v_pollinfo.vpi_lock); 2752} 2753 2754/* 2755 * Wake up anyone polling on vp because it is being revoked. 2756 * This depends on dead_poll() returning POLLHUP for correct 2757 * behavior. 2758 */ 2759void 2760vn_pollgone(vp) 2761 struct vnode *vp; 2762{ 2763 simple_lock(&vp->v_pollinfo.vpi_lock); 2764 if (vp->v_pollinfo.vpi_events) { 2765 vp->v_pollinfo.vpi_events = 0; 2766 selwakeup(&vp->v_pollinfo.vpi_selinfo); 2767 } 2768 simple_unlock(&vp->v_pollinfo.vpi_lock); 2769} 2770 2771 2772 2773/* 2774 * Routine to create and manage a filesystem syncer vnode. 2775 */ 2776#define sync_close ((int (*) __P((struct vop_close_args *)))nullop) 2777static int sync_fsync __P((struct vop_fsync_args *)); 2778static int sync_inactive __P((struct vop_inactive_args *)); 2779static int sync_reclaim __P((struct vop_reclaim_args *)); 2780#define sync_lock ((int (*) __P((struct vop_lock_args *)))vop_nolock) 2781#define sync_unlock ((int (*) __P((struct vop_unlock_args *)))vop_nounlock) 2782static int sync_print __P((struct vop_print_args *)); 2783#define sync_islocked ((int(*) __P((struct vop_islocked_args *)))vop_noislocked) 2784 2785static vop_t **sync_vnodeop_p; 2786static struct vnodeopv_entry_desc sync_vnodeop_entries[] = { 2787 { &vop_default_desc, (vop_t *) vop_eopnotsupp }, 2788 { &vop_close_desc, (vop_t *) sync_close }, /* close */ 2789 { &vop_fsync_desc, (vop_t *) sync_fsync }, /* fsync */ 2790 { &vop_inactive_desc, (vop_t *) sync_inactive }, /* inactive */ 2791 { &vop_reclaim_desc, (vop_t *) sync_reclaim }, /* reclaim */ 2792 { &vop_lock_desc, (vop_t *) sync_lock }, /* lock */ 2793 { &vop_unlock_desc, (vop_t *) sync_unlock }, /* unlock */ 2794 { &vop_print_desc, (vop_t *) sync_print }, /* print */ 2795 { &vop_islocked_desc, (vop_t *) sync_islocked }, /* islocked */ 2796 { NULL, NULL } 2797}; 2798static struct vnodeopv_desc sync_vnodeop_opv_desc = 2799 { &sync_vnodeop_p, sync_vnodeop_entries }; 2800 2801VNODEOP_SET(sync_vnodeop_opv_desc); 2802 2803/* 2804 * Create a new filesystem syncer vnode for the specified mount point. 2805 */ 2806int 2807vfs_allocate_syncvnode(mp) 2808 struct mount *mp; 2809{ 2810 struct vnode *vp; 2811 static long start, incr, next; 2812 int error; 2813 2814 /* Allocate a new vnode */ 2815 if ((error = getnewvnode(VT_VFS, mp, sync_vnodeop_p, &vp)) != 0) { 2816 mp->mnt_syncer = NULL; 2817 return (error); 2818 } 2819 vp->v_type = VNON; 2820 /* 2821 * Place the vnode onto the syncer worklist. We attempt to 2822 * scatter them about on the list so that they will go off 2823 * at evenly distributed times even if all the filesystems 2824 * are mounted at once. 2825 */ 2826 next += incr; 2827 if (next == 0 || next > syncer_maxdelay) { 2828 start /= 2; 2829 incr /= 2; 2830 if (start == 0) { 2831 start = syncer_maxdelay / 2; 2832 incr = syncer_maxdelay; 2833 } 2834 next = start; 2835 } 2836 vn_syncer_add_to_worklist(vp, syncdelay > 0 ? next % syncdelay : 0); 2837 mp->mnt_syncer = vp; 2838 return (0); 2839} 2840 2841/* 2842 * Do a lazy sync of the filesystem. 2843 */ 2844static int 2845sync_fsync(ap) 2846 struct vop_fsync_args /* { 2847 struct vnode *a_vp; 2848 struct ucred *a_cred; 2849 int a_waitfor; 2850 struct proc *a_p; 2851 } */ *ap; 2852{ 2853 struct vnode *syncvp = ap->a_vp; 2854 struct mount *mp = syncvp->v_mount; 2855 struct proc *p = ap->a_p; 2856 int asyncflag; 2857 2858 /* 2859 * We only need to do something if this is a lazy evaluation. 2860 */ 2861 if (ap->a_waitfor != MNT_LAZY) 2862 return (0); 2863 2864 /* 2865 * Move ourselves to the back of the sync list. 2866 */ 2867 vn_syncer_add_to_worklist(syncvp, syncdelay); 2868 2869 /* 2870 * Walk the list of vnodes pushing all that are dirty and 2871 * not already on the sync list. 2872 */ 2873 simple_lock(&mountlist_slock); 2874 if (vfs_busy(mp, LK_EXCLUSIVE | LK_NOWAIT, &mountlist_slock, p) != 0) { 2875 simple_unlock(&mountlist_slock); 2876 return (0); 2877 } 2878 asyncflag = mp->mnt_flag & MNT_ASYNC; 2879 mp->mnt_flag &= ~MNT_ASYNC; 2880 vfs_msync(mp, MNT_NOWAIT); 2881 VFS_SYNC(mp, MNT_LAZY, ap->a_cred, p); 2882 if (asyncflag) 2883 mp->mnt_flag |= MNT_ASYNC; 2884 vfs_unbusy(mp, p); 2885 return (0); 2886} 2887 2888/* 2889 * The syncer vnode is no referenced. 2890 */ 2891static int 2892sync_inactive(ap) 2893 struct vop_inactive_args /* { 2894 struct vnode *a_vp; 2895 struct proc *a_p; 2896 } */ *ap; 2897{ 2898 2899 vgone(ap->a_vp); 2900 return (0); 2901} 2902 2903/* 2904 * The syncer vnode is no longer needed and is being decommissioned. 2905 * 2906 * Modifications to the worklist must be protected at splbio(). 2907 */ 2908static int 2909sync_reclaim(ap) 2910 struct vop_reclaim_args /* { 2911 struct vnode *a_vp; 2912 } */ *ap; 2913{ 2914 struct vnode *vp = ap->a_vp; 2915 int s; 2916 2917 s = splbio(); 2918 vp->v_mount->mnt_syncer = NULL; 2919 if (vp->v_flag & VONWORKLST) { 2920 LIST_REMOVE(vp, v_synclist); 2921 vp->v_flag &= ~VONWORKLST; 2922 } 2923 splx(s); 2924 2925 return (0); 2926} 2927 2928/* 2929 * Print out a syncer vnode. 2930 */ 2931static int 2932sync_print(ap) 2933 struct vop_print_args /* { 2934 struct vnode *a_vp; 2935 } */ *ap; 2936{ 2937 struct vnode *vp = ap->a_vp; 2938 2939 printf("syncer vnode"); 2940 if (vp->v_vnlock != NULL) 2941 lockmgr_printinfo(vp->v_vnlock); 2942 printf("\n"); 2943 return (0); 2944} 2945