vfs_export.c revision 55611
1/* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95 39 * $FreeBSD: head/sys/kern/vfs_export.c 55611 2000-01-08 16:20:06Z eivind $ 40 */ 41 42/* 43 * External virtual filesystem routines 44 */ 45#include "opt_ddb.h" 46 47#include <sys/param.h> 48#include <sys/systm.h> 49#include <sys/buf.h> 50#include <sys/conf.h> 51#include <sys/dirent.h> 52#include <sys/domain.h> 53#include <sys/eventhandler.h> 54#include <sys/fcntl.h> 55#include <sys/kernel.h> 56#include <sys/kthread.h> 57#include <sys/malloc.h> 58#include <sys/mount.h> 59#include <sys/namei.h> 60#include <sys/proc.h> 61#include <sys/reboot.h> 62#include <sys/socket.h> 63#include <sys/stat.h> 64#include <sys/sysctl.h> 65#include <sys/vmmeter.h> 66#include <sys/vnode.h> 67 68#include <machine/limits.h> 69 70#include <vm/vm.h> 71#include <vm/vm_object.h> 72#include <vm/vm_extern.h> 73#include <vm/pmap.h> 74#include <vm/vm_map.h> 75#include <vm/vm_page.h> 76#include <vm/vm_pager.h> 77#include <vm/vnode_pager.h> 78#include <vm/vm_zone.h> 79 80static MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure"); 81 82static void insmntque __P((struct vnode *vp, struct mount *mp)); 83static void vclean __P((struct vnode *vp, int flags, struct proc *p)); 84static void vfree __P((struct vnode *)); 85static void vgonel __P((struct vnode *vp, struct proc *p)); 86static unsigned long numvnodes; 87SYSCTL_INT(_debug, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, ""); 88 89enum vtype iftovt_tab[16] = { 90 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 91 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 92}; 93int vttoif_tab[9] = { 94 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 95 S_IFSOCK, S_IFIFO, S_IFMT, 96}; 97 98static TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */ 99struct tobefreelist vnode_tobefree_list; /* vnode free list */ 100 101static u_long wantfreevnodes = 25; 102SYSCTL_INT(_debug, OID_AUTO, wantfreevnodes, CTLFLAG_RW, &wantfreevnodes, 0, ""); 103static u_long freevnodes = 0; 104SYSCTL_INT(_debug, OID_AUTO, freevnodes, CTLFLAG_RD, &freevnodes, 0, ""); 105 106static int reassignbufcalls; 107SYSCTL_INT(_vfs, OID_AUTO, reassignbufcalls, CTLFLAG_RW, &reassignbufcalls, 0, ""); 108static int reassignbufloops; 109SYSCTL_INT(_vfs, OID_AUTO, reassignbufloops, CTLFLAG_RW, &reassignbufloops, 0, ""); 110static int reassignbufsortgood; 111SYSCTL_INT(_vfs, OID_AUTO, reassignbufsortgood, CTLFLAG_RW, &reassignbufsortgood, 0, ""); 112static int reassignbufsortbad; 113SYSCTL_INT(_vfs, OID_AUTO, reassignbufsortbad, CTLFLAG_RW, &reassignbufsortbad, 0, ""); 114static int reassignbufmethod = 1; 115SYSCTL_INT(_vfs, OID_AUTO, reassignbufmethod, CTLFLAG_RW, &reassignbufmethod, 0, ""); 116 117#ifdef ENABLE_VFS_IOOPT 118int vfs_ioopt = 0; 119SYSCTL_INT(_vfs, OID_AUTO, ioopt, CTLFLAG_RW, &vfs_ioopt, 0, ""); 120#endif 121 122struct mntlist mountlist = TAILQ_HEAD_INITIALIZER(mountlist); /* mounted fs */ 123struct simplelock mountlist_slock; 124struct simplelock mntvnode_slock; 125int nfs_mount_type = -1; 126#ifndef NULL_SIMPLELOCKS 127static struct simplelock mntid_slock; 128static struct simplelock vnode_free_list_slock; 129static struct simplelock spechash_slock; 130#endif 131struct nfs_public nfs_pub; /* publicly exported FS */ 132static vm_zone_t vnode_zone; 133 134/* 135 * The workitem queue. 136 */ 137#define SYNCER_MAXDELAY 32 138static int syncer_maxdelay = SYNCER_MAXDELAY; /* maximum delay time */ 139time_t syncdelay = 30; /* max time to delay syncing data */ 140time_t filedelay = 30; /* time to delay syncing files */ 141SYSCTL_INT(_kern, OID_AUTO, filedelay, CTLFLAG_RW, &filedelay, 0, ""); 142time_t dirdelay = 29; /* time to delay syncing directories */ 143SYSCTL_INT(_kern, OID_AUTO, dirdelay, CTLFLAG_RW, &dirdelay, 0, ""); 144time_t metadelay = 28; /* time to delay syncing metadata */ 145SYSCTL_INT(_kern, OID_AUTO, metadelay, CTLFLAG_RW, &metadelay, 0, ""); 146static int rushjob; /* number of slots to run ASAP */ 147static int stat_rush_requests; /* number of times I/O speeded up */ 148SYSCTL_INT(_debug, OID_AUTO, rush_requests, CTLFLAG_RW, &stat_rush_requests, 0, ""); 149 150static int syncer_delayno = 0; 151static long syncer_mask; 152LIST_HEAD(synclist, vnode); 153static struct synclist *syncer_workitem_pending; 154 155int desiredvnodes; 156SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, 157 &desiredvnodes, 0, "Maximum number of vnodes"); 158 159static void vfs_free_addrlist __P((struct netexport *nep)); 160static int vfs_free_netcred __P((struct radix_node *rn, void *w)); 161static int vfs_hang_addrlist __P((struct mount *mp, struct netexport *nep, 162 struct export_args *argp)); 163 164/* 165 * Initialize the vnode management data structures. 166 */ 167void 168vntblinit() 169{ 170 171 desiredvnodes = maxproc + cnt.v_page_count / 4; 172 simple_lock_init(&mntvnode_slock); 173 simple_lock_init(&mntid_slock); 174 simple_lock_init(&spechash_slock); 175 TAILQ_INIT(&vnode_free_list); 176 TAILQ_INIT(&vnode_tobefree_list); 177 simple_lock_init(&vnode_free_list_slock); 178 vnode_zone = zinit("VNODE", sizeof (struct vnode), 0, 0, 5); 179 /* 180 * Initialize the filesystem syncer. 181 */ 182 syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE, 183 &syncer_mask); 184 syncer_maxdelay = syncer_mask + 1; 185} 186 187/* 188 * Mark a mount point as busy. Used to synchronize access and to delay 189 * unmounting. Interlock is not released on failure. 190 */ 191int 192vfs_busy(mp, flags, interlkp, p) 193 struct mount *mp; 194 int flags; 195 struct simplelock *interlkp; 196 struct proc *p; 197{ 198 int lkflags; 199 200 if (mp->mnt_kern_flag & MNTK_UNMOUNT) { 201 if (flags & LK_NOWAIT) 202 return (ENOENT); 203 mp->mnt_kern_flag |= MNTK_MWAIT; 204 if (interlkp) { 205 simple_unlock(interlkp); 206 } 207 /* 208 * Since all busy locks are shared except the exclusive 209 * lock granted when unmounting, the only place that a 210 * wakeup needs to be done is at the release of the 211 * exclusive lock at the end of dounmount. 212 */ 213 tsleep((caddr_t)mp, PVFS, "vfs_busy", 0); 214 if (interlkp) { 215 simple_lock(interlkp); 216 } 217 return (ENOENT); 218 } 219 lkflags = LK_SHARED | LK_NOPAUSE; 220 if (interlkp) 221 lkflags |= LK_INTERLOCK; 222 if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p)) 223 panic("vfs_busy: unexpected lock failure"); 224 return (0); 225} 226 227/* 228 * Free a busy filesystem. 229 */ 230void 231vfs_unbusy(mp, p) 232 struct mount *mp; 233 struct proc *p; 234{ 235 236 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p); 237} 238 239/* 240 * Lookup a filesystem type, and if found allocate and initialize 241 * a mount structure for it. 242 * 243 * Devname is usually updated by mount(8) after booting. 244 */ 245int 246vfs_rootmountalloc(fstypename, devname, mpp) 247 char *fstypename; 248 char *devname; 249 struct mount **mpp; 250{ 251 struct proc *p = curproc; /* XXX */ 252 struct vfsconf *vfsp; 253 struct mount *mp; 254 255 if (fstypename == NULL) 256 return (ENODEV); 257 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 258 if (!strcmp(vfsp->vfc_name, fstypename)) 259 break; 260 if (vfsp == NULL) 261 return (ENODEV); 262 mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); 263 bzero((char *)mp, (u_long)sizeof(struct mount)); 264 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE); 265 (void)vfs_busy(mp, LK_NOWAIT, 0, p); 266 LIST_INIT(&mp->mnt_vnodelist); 267 mp->mnt_vfc = vfsp; 268 mp->mnt_op = vfsp->vfc_vfsops; 269 mp->mnt_flag = MNT_RDONLY; 270 mp->mnt_vnodecovered = NULLVP; 271 vfsp->vfc_refcount++; 272 mp->mnt_iosize_max = DFLTPHYS; 273 mp->mnt_stat.f_type = vfsp->vfc_typenum; 274 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 275 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 276 mp->mnt_stat.f_mntonname[0] = '/'; 277 mp->mnt_stat.f_mntonname[1] = 0; 278 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); 279 *mpp = mp; 280 return (0); 281} 282 283/* 284 * Find an appropriate filesystem to use for the root. If a filesystem 285 * has not been preselected, walk through the list of known filesystems 286 * trying those that have mountroot routines, and try them until one 287 * works or we have tried them all. 288 */ 289#ifdef notdef /* XXX JH */ 290int 291lite2_vfs_mountroot() 292{ 293 struct vfsconf *vfsp; 294 extern int (*lite2_mountroot) __P((void)); 295 int error; 296 297 if (lite2_mountroot != NULL) 298 return ((*lite2_mountroot)()); 299 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 300 if (vfsp->vfc_mountroot == NULL) 301 continue; 302 if ((error = (*vfsp->vfc_mountroot)()) == 0) 303 return (0); 304 printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error); 305 } 306 return (ENODEV); 307} 308#endif 309 310/* 311 * Lookup a mount point by filesystem identifier. 312 */ 313struct mount * 314vfs_getvfs(fsid) 315 fsid_t *fsid; 316{ 317 register struct mount *mp; 318 319 simple_lock(&mountlist_slock); 320 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 321 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 322 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { 323 simple_unlock(&mountlist_slock); 324 return (mp); 325 } 326 } 327 simple_unlock(&mountlist_slock); 328 return ((struct mount *) 0); 329} 330 331/* 332 * Get a new unique fsid 333 * 334 * Keep in mind that several mounts may be running in parallel, 335 * so always increment mntid_base even if lower numbers are available. 336 */ 337 338static u_short mntid_base; 339 340void 341vfs_getnewfsid(mp) 342 struct mount *mp; 343{ 344 fsid_t tfsid; 345 int mtype; 346 347 simple_lock(&mntid_slock); 348 349 mtype = mp->mnt_vfc->vfc_typenum; 350 for (;;) { 351 tfsid.val[0] = makeudev(255, mtype + (mntid_base << 16)); 352 tfsid.val[1] = mtype; 353 ++mntid_base; 354 if (vfs_getvfs(&tfsid) == NULL) 355 break; 356 } 357 358 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 359 mp->mnt_stat.f_fsid.val[1] = tfsid.val[1]; 360 361 simple_unlock(&mntid_slock); 362} 363 364/* 365 * Knob to control the precision of file timestamps: 366 * 367 * 0 = seconds only; nanoseconds zeroed. 368 * 1 = seconds and nanoseconds, accurate within 1/HZ. 369 * 2 = seconds and nanoseconds, truncated to microseconds. 370 * >=3 = seconds and nanoseconds, maximum precision. 371 */ 372enum { TSP_SEC, TSP_HZ, TSP_USEC, TSP_NSEC }; 373 374static int timestamp_precision = TSP_SEC; 375SYSCTL_INT(_vfs, OID_AUTO, timestamp_precision, CTLFLAG_RW, 376 ×tamp_precision, 0, ""); 377 378/* 379 * Get a current timestamp. 380 */ 381void 382vfs_timestamp(tsp) 383 struct timespec *tsp; 384{ 385 struct timeval tv; 386 387 switch (timestamp_precision) { 388 case TSP_SEC: 389 tsp->tv_sec = time_second; 390 tsp->tv_nsec = 0; 391 break; 392 case TSP_HZ: 393 getnanotime(tsp); 394 break; 395 case TSP_USEC: 396 microtime(&tv); 397 TIMEVAL_TO_TIMESPEC(&tv, tsp); 398 break; 399 case TSP_NSEC: 400 default: 401 nanotime(tsp); 402 break; 403 } 404} 405 406/* 407 * Set vnode attributes to VNOVAL 408 */ 409void 410vattr_null(vap) 411 register struct vattr *vap; 412{ 413 414 vap->va_type = VNON; 415 vap->va_size = VNOVAL; 416 vap->va_bytes = VNOVAL; 417 vap->va_mode = VNOVAL; 418 vap->va_nlink = VNOVAL; 419 vap->va_uid = VNOVAL; 420 vap->va_gid = VNOVAL; 421 vap->va_fsid = VNOVAL; 422 vap->va_fileid = VNOVAL; 423 vap->va_blocksize = VNOVAL; 424 vap->va_rdev = VNOVAL; 425 vap->va_atime.tv_sec = VNOVAL; 426 vap->va_atime.tv_nsec = VNOVAL; 427 vap->va_mtime.tv_sec = VNOVAL; 428 vap->va_mtime.tv_nsec = VNOVAL; 429 vap->va_ctime.tv_sec = VNOVAL; 430 vap->va_ctime.tv_nsec = VNOVAL; 431 vap->va_flags = VNOVAL; 432 vap->va_gen = VNOVAL; 433 vap->va_vaflags = 0; 434} 435 436/* 437 * Routines having to do with the management of the vnode table. 438 */ 439extern vop_t **dead_vnodeop_p; 440 441/* 442 * Return the next vnode from the free list. 443 */ 444int 445getnewvnode(tag, mp, vops, vpp) 446 enum vtagtype tag; 447 struct mount *mp; 448 vop_t **vops; 449 struct vnode **vpp; 450{ 451 int s; 452 struct proc *p = curproc; /* XXX */ 453 struct vnode *vp, *tvp, *nvp; 454 vm_object_t object; 455 TAILQ_HEAD(freelst, vnode) vnode_tmp_list; 456 457 /* 458 * We take the least recently used vnode from the freelist 459 * if we can get it and it has no cached pages, and no 460 * namecache entries are relative to it. 461 * Otherwise we allocate a new vnode 462 */ 463 464 s = splbio(); 465 simple_lock(&vnode_free_list_slock); 466 TAILQ_INIT(&vnode_tmp_list); 467 468 for (vp = TAILQ_FIRST(&vnode_tobefree_list); vp; vp = nvp) { 469 nvp = TAILQ_NEXT(vp, v_freelist); 470 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 471 if (vp->v_flag & VAGE) { 472 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 473 } else { 474 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 475 } 476 vp->v_flag &= ~(VTBFREE|VAGE); 477 vp->v_flag |= VFREE; 478 if (vp->v_usecount) 479 panic("tobe free vnode isn't"); 480 freevnodes++; 481 } 482 483 if (wantfreevnodes && freevnodes < wantfreevnodes) { 484 vp = NULL; 485 } else if (!wantfreevnodes && freevnodes <= desiredvnodes) { 486 /* 487 * XXX: this is only here to be backwards compatible 488 */ 489 vp = NULL; 490 } else { 491 for (vp = TAILQ_FIRST(&vnode_free_list); vp; vp = nvp) { 492 nvp = TAILQ_NEXT(vp, v_freelist); 493 if (!simple_lock_try(&vp->v_interlock)) 494 continue; 495 if (vp->v_usecount) 496 panic("free vnode isn't"); 497 498 object = vp->v_object; 499 if (object && (object->resident_page_count || object->ref_count)) { 500 printf("object inconsistant state: RPC: %d, RC: %d\n", 501 object->resident_page_count, object->ref_count); 502 /* Don't recycle if it's caching some pages */ 503 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 504 TAILQ_INSERT_TAIL(&vnode_tmp_list, vp, v_freelist); 505 continue; 506 } else if (LIST_FIRST(&vp->v_cache_src)) { 507 /* Don't recycle if active in the namecache */ 508 simple_unlock(&vp->v_interlock); 509 continue; 510 } else { 511 break; 512 } 513 } 514 } 515 516 for (tvp = TAILQ_FIRST(&vnode_tmp_list); tvp; tvp = nvp) { 517 nvp = TAILQ_NEXT(tvp, v_freelist); 518 TAILQ_REMOVE(&vnode_tmp_list, tvp, v_freelist); 519 TAILQ_INSERT_TAIL(&vnode_free_list, tvp, v_freelist); 520 simple_unlock(&tvp->v_interlock); 521 } 522 523 if (vp) { 524 vp->v_flag |= VDOOMED; 525 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 526 freevnodes--; 527 simple_unlock(&vnode_free_list_slock); 528 cache_purge(vp); 529 vp->v_lease = NULL; 530 if (vp->v_type != VBAD) { 531 vgonel(vp, p); 532 } else { 533 simple_unlock(&vp->v_interlock); 534 } 535 536#ifdef INVARIANTS 537 { 538 int s; 539 540 if (vp->v_data) 541 panic("cleaned vnode isn't"); 542 s = splbio(); 543 if (vp->v_numoutput) 544 panic("Clean vnode has pending I/O's"); 545 splx(s); 546 } 547#endif 548 vp->v_flag = 0; 549 vp->v_lastw = 0; 550 vp->v_lasta = 0; 551 vp->v_cstart = 0; 552 vp->v_clen = 0; 553 vp->v_socket = 0; 554 vp->v_writecount = 0; /* XXX */ 555 } else { 556 simple_unlock(&vnode_free_list_slock); 557 vp = (struct vnode *) zalloc(vnode_zone); 558 bzero((char *) vp, sizeof *vp); 559 simple_lock_init(&vp->v_interlock); 560 vp->v_dd = vp; 561 cache_purge(vp); 562 LIST_INIT(&vp->v_cache_src); 563 TAILQ_INIT(&vp->v_cache_dst); 564 numvnodes++; 565 } 566 567 TAILQ_INIT(&vp->v_cleanblkhd); 568 TAILQ_INIT(&vp->v_dirtyblkhd); 569 vp->v_type = VNON; 570 vp->v_tag = tag; 571 vp->v_op = vops; 572 insmntque(vp, mp); 573 *vpp = vp; 574 vp->v_usecount = 1; 575 vp->v_data = 0; 576 splx(s); 577 578 vfs_object_create(vp, p, p->p_ucred); 579 return (0); 580} 581 582/* 583 * Move a vnode from one mount queue to another. 584 */ 585static void 586insmntque(vp, mp) 587 register struct vnode *vp; 588 register struct mount *mp; 589{ 590 591 simple_lock(&mntvnode_slock); 592 /* 593 * Delete from old mount point vnode list, if on one. 594 */ 595 if (vp->v_mount != NULL) 596 LIST_REMOVE(vp, v_mntvnodes); 597 /* 598 * Insert into list of vnodes for the new mount point, if available. 599 */ 600 if ((vp->v_mount = mp) == NULL) { 601 simple_unlock(&mntvnode_slock); 602 return; 603 } 604 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 605 simple_unlock(&mntvnode_slock); 606} 607 608/* 609 * Update outstanding I/O count and do wakeup if requested. 610 */ 611void 612vwakeup(bp) 613 register struct buf *bp; 614{ 615 register struct vnode *vp; 616 617 bp->b_flags &= ~B_WRITEINPROG; 618 if ((vp = bp->b_vp)) { 619 vp->v_numoutput--; 620 if (vp->v_numoutput < 0) 621 panic("vwakeup: neg numoutput"); 622 if ((vp->v_numoutput == 0) && (vp->v_flag & VBWAIT)) { 623 vp->v_flag &= ~VBWAIT; 624 wakeup((caddr_t) &vp->v_numoutput); 625 } 626 } 627} 628 629/* 630 * Flush out and invalidate all buffers associated with a vnode. 631 * Called with the underlying object locked. 632 */ 633int 634vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 635 register struct vnode *vp; 636 int flags; 637 struct ucred *cred; 638 struct proc *p; 639 int slpflag, slptimeo; 640{ 641 register struct buf *bp; 642 struct buf *nbp, *blist; 643 int s, error; 644 vm_object_t object; 645 646 if (flags & V_SAVE) { 647 s = splbio(); 648 while (vp->v_numoutput) { 649 vp->v_flag |= VBWAIT; 650 error = tsleep((caddr_t)&vp->v_numoutput, 651 slpflag | (PRIBIO + 1), "vinvlbuf", slptimeo); 652 if (error) { 653 splx(s); 654 return (error); 655 } 656 } 657 if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) { 658 splx(s); 659 if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) != 0) 660 return (error); 661 s = splbio(); 662 if (vp->v_numoutput > 0 || 663 !TAILQ_EMPTY(&vp->v_dirtyblkhd)) 664 panic("vinvalbuf: dirty bufs"); 665 } 666 splx(s); 667 } 668 s = splbio(); 669 for (;;) { 670 blist = TAILQ_FIRST(&vp->v_cleanblkhd); 671 if (!blist) 672 blist = TAILQ_FIRST(&vp->v_dirtyblkhd); 673 if (!blist) 674 break; 675 676 for (bp = blist; bp; bp = nbp) { 677 nbp = TAILQ_NEXT(bp, b_vnbufs); 678 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) { 679 error = BUF_TIMELOCK(bp, 680 LK_EXCLUSIVE | LK_SLEEPFAIL, 681 "vinvalbuf", slpflag, slptimeo); 682 if (error == ENOLCK) 683 break; 684 splx(s); 685 return (error); 686 } 687 /* 688 * XXX Since there are no node locks for NFS, I 689 * believe there is a slight chance that a delayed 690 * write will occur while sleeping just above, so 691 * check for it. Note that vfs_bio_awrite expects 692 * buffers to reside on a queue, while VOP_BWRITE and 693 * brelse do not. 694 */ 695 if (((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) && 696 (flags & V_SAVE)) { 697 698 if (bp->b_vp == vp) { 699 if (bp->b_flags & B_CLUSTEROK) { 700 BUF_UNLOCK(bp); 701 vfs_bio_awrite(bp); 702 } else { 703 bremfree(bp); 704 bp->b_flags |= B_ASYNC; 705 VOP_BWRITE(bp->b_vp, bp); 706 } 707 } else { 708 bremfree(bp); 709 (void) VOP_BWRITE(bp->b_vp, bp); 710 } 711 break; 712 } 713 bremfree(bp); 714 bp->b_flags |= (B_INVAL | B_NOCACHE | B_RELBUF); 715 bp->b_flags &= ~B_ASYNC; 716 brelse(bp); 717 } 718 } 719 720 while (vp->v_numoutput > 0) { 721 vp->v_flag |= VBWAIT; 722 tsleep(&vp->v_numoutput, PVM, "vnvlbv", 0); 723 } 724 725 splx(s); 726 727 /* 728 * Destroy the copy in the VM cache, too. 729 */ 730 simple_lock(&vp->v_interlock); 731 object = vp->v_object; 732 if (object != NULL) { 733 vm_object_page_remove(object, 0, 0, 734 (flags & V_SAVE) ? TRUE : FALSE); 735 } 736 simple_unlock(&vp->v_interlock); 737 738 if (!TAILQ_EMPTY(&vp->v_dirtyblkhd) || !TAILQ_EMPTY(&vp->v_cleanblkhd)) 739 panic("vinvalbuf: flush failed"); 740 return (0); 741} 742 743/* 744 * Truncate a file's buffer and pages to a specified length. This 745 * is in lieu of the old vinvalbuf mechanism, which performed unneeded 746 * sync activity. 747 */ 748int 749vtruncbuf(vp, cred, p, length, blksize) 750 register struct vnode *vp; 751 struct ucred *cred; 752 struct proc *p; 753 off_t length; 754 int blksize; 755{ 756 register struct buf *bp; 757 struct buf *nbp; 758 int s, anyfreed; 759 int trunclbn; 760 761 /* 762 * Round up to the *next* lbn. 763 */ 764 trunclbn = (length + blksize - 1) / blksize; 765 766 s = splbio(); 767restart: 768 anyfreed = 1; 769 for (;anyfreed;) { 770 anyfreed = 0; 771 for (bp = TAILQ_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 772 nbp = TAILQ_NEXT(bp, b_vnbufs); 773 if (bp->b_lblkno >= trunclbn) { 774 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) { 775 BUF_LOCK(bp, LK_EXCLUSIVE|LK_SLEEPFAIL); 776 goto restart; 777 } else { 778 bremfree(bp); 779 bp->b_flags |= (B_INVAL | B_RELBUF); 780 bp->b_flags &= ~B_ASYNC; 781 brelse(bp); 782 anyfreed = 1; 783 } 784 if (nbp && 785 (((nbp->b_xflags & BX_VNCLEAN) == 0) || 786 (nbp->b_vp != vp) || 787 (nbp->b_flags & B_DELWRI))) { 788 goto restart; 789 } 790 } 791 } 792 793 for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 794 nbp = TAILQ_NEXT(bp, b_vnbufs); 795 if (bp->b_lblkno >= trunclbn) { 796 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) { 797 BUF_LOCK(bp, LK_EXCLUSIVE|LK_SLEEPFAIL); 798 goto restart; 799 } else { 800 bremfree(bp); 801 bp->b_flags |= (B_INVAL | B_RELBUF); 802 bp->b_flags &= ~B_ASYNC; 803 brelse(bp); 804 anyfreed = 1; 805 } 806 if (nbp && 807 (((nbp->b_xflags & BX_VNDIRTY) == 0) || 808 (nbp->b_vp != vp) || 809 (nbp->b_flags & B_DELWRI) == 0)) { 810 goto restart; 811 } 812 } 813 } 814 } 815 816 if (length > 0) { 817restartsync: 818 for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 819 nbp = TAILQ_NEXT(bp, b_vnbufs); 820 if ((bp->b_flags & B_DELWRI) && (bp->b_lblkno < 0)) { 821 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) { 822 BUF_LOCK(bp, LK_EXCLUSIVE|LK_SLEEPFAIL); 823 goto restart; 824 } else { 825 bremfree(bp); 826 if (bp->b_vp == vp) { 827 bp->b_flags |= B_ASYNC; 828 } else { 829 bp->b_flags &= ~B_ASYNC; 830 } 831 VOP_BWRITE(bp->b_vp, bp); 832 } 833 goto restartsync; 834 } 835 836 } 837 } 838 839 while (vp->v_numoutput > 0) { 840 vp->v_flag |= VBWAIT; 841 tsleep(&vp->v_numoutput, PVM, "vbtrunc", 0); 842 } 843 844 splx(s); 845 846 vnode_pager_setsize(vp, length); 847 848 return (0); 849} 850 851/* 852 * Associate a buffer with a vnode. 853 */ 854void 855bgetvp(vp, bp) 856 register struct vnode *vp; 857 register struct buf *bp; 858{ 859 int s; 860 861 KASSERT(bp->b_vp == NULL, ("bgetvp: not free")); 862 863 vhold(vp); 864 bp->b_vp = vp; 865 bp->b_dev = vn_todev(vp); 866 /* 867 * Insert onto list for new vnode. 868 */ 869 s = splbio(); 870 bp->b_xflags |= BX_VNCLEAN; 871 bp->b_xflags &= ~BX_VNDIRTY; 872 TAILQ_INSERT_TAIL(&vp->v_cleanblkhd, bp, b_vnbufs); 873 splx(s); 874} 875 876/* 877 * Disassociate a buffer from a vnode. 878 */ 879void 880brelvp(bp) 881 register struct buf *bp; 882{ 883 struct vnode *vp; 884 struct buflists *listheadp; 885 int s; 886 887 KASSERT(bp->b_vp != NULL, ("brelvp: NULL")); 888 889 /* 890 * Delete from old vnode list, if on one. 891 */ 892 vp = bp->b_vp; 893 s = splbio(); 894 if (bp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN)) { 895 if (bp->b_xflags & BX_VNDIRTY) 896 listheadp = &vp->v_dirtyblkhd; 897 else 898 listheadp = &vp->v_cleanblkhd; 899 TAILQ_REMOVE(listheadp, bp, b_vnbufs); 900 bp->b_xflags &= ~(BX_VNDIRTY | BX_VNCLEAN); 901 } 902 if ((vp->v_flag & VONWORKLST) && TAILQ_EMPTY(&vp->v_dirtyblkhd)) { 903 vp->v_flag &= ~VONWORKLST; 904 LIST_REMOVE(vp, v_synclist); 905 } 906 splx(s); 907 bp->b_vp = (struct vnode *) 0; 908 vdrop(vp); 909} 910 911/* 912 * The workitem queue. 913 * 914 * It is useful to delay writes of file data and filesystem metadata 915 * for tens of seconds so that quickly created and deleted files need 916 * not waste disk bandwidth being created and removed. To realize this, 917 * we append vnodes to a "workitem" queue. When running with a soft 918 * updates implementation, most pending metadata dependencies should 919 * not wait for more than a few seconds. Thus, mounted on block devices 920 * are delayed only about a half the time that file data is delayed. 921 * Similarly, directory updates are more critical, so are only delayed 922 * about a third the time that file data is delayed. Thus, there are 923 * SYNCER_MAXDELAY queues that are processed round-robin at a rate of 924 * one each second (driven off the filesystem syncer process). The 925 * syncer_delayno variable indicates the next queue that is to be processed. 926 * Items that need to be processed soon are placed in this queue: 927 * 928 * syncer_workitem_pending[syncer_delayno] 929 * 930 * A delay of fifteen seconds is done by placing the request fifteen 931 * entries later in the queue: 932 * 933 * syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask] 934 * 935 */ 936 937/* 938 * Add an item to the syncer work queue. 939 */ 940static void 941vn_syncer_add_to_worklist(struct vnode *vp, int delay) 942{ 943 int s, slot; 944 945 s = splbio(); 946 947 if (vp->v_flag & VONWORKLST) { 948 LIST_REMOVE(vp, v_synclist); 949 } 950 951 if (delay > syncer_maxdelay - 2) 952 delay = syncer_maxdelay - 2; 953 slot = (syncer_delayno + delay) & syncer_mask; 954 955 LIST_INSERT_HEAD(&syncer_workitem_pending[slot], vp, v_synclist); 956 vp->v_flag |= VONWORKLST; 957 splx(s); 958} 959 960struct proc *updateproc; 961static void sched_sync __P((void)); 962static struct kproc_desc up_kp = { 963 "syncer", 964 sched_sync, 965 &updateproc 966}; 967SYSINIT(syncer, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp) 968 969/* 970 * System filesystem synchronizer daemon. 971 */ 972void 973sched_sync(void) 974{ 975 struct synclist *slp; 976 struct vnode *vp; 977 long starttime; 978 int s; 979 struct proc *p = updateproc; 980 981 EVENTHANDLER_REGISTER(shutdown_pre_sync, shutdown_kproc, p, 982 SHUTDOWN_PRI_LAST); 983 984 p->p_flag |= P_BUFEXHAUST; 985 986 for (;;) { 987 kproc_suspend_loop(p); 988 989 starttime = time_second; 990 991 /* 992 * Push files whose dirty time has expired. Be careful 993 * of interrupt race on slp queue. 994 */ 995 s = splbio(); 996 slp = &syncer_workitem_pending[syncer_delayno]; 997 syncer_delayno += 1; 998 if (syncer_delayno == syncer_maxdelay) 999 syncer_delayno = 0; 1000 splx(s); 1001 1002 while ((vp = LIST_FIRST(slp)) != NULL) { 1003 if (VOP_ISLOCKED(vp, NULL) == 0) { 1004 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 1005 (void) VOP_FSYNC(vp, p->p_ucred, MNT_LAZY, p); 1006 VOP_UNLOCK(vp, 0, p); 1007 } 1008 s = splbio(); 1009 if (LIST_FIRST(slp) == vp) { 1010 /* 1011 * Note: v_tag VT_VFS vps can remain on the 1012 * worklist too with no dirty blocks, but 1013 * since sync_fsync() moves it to a different 1014 * slot we are safe. 1015 */ 1016 if (TAILQ_EMPTY(&vp->v_dirtyblkhd) && 1017 !vn_isdisk(vp)) 1018 panic("sched_sync: fsync failed vp %p tag %d", vp, vp->v_tag); 1019 /* 1020 * Put us back on the worklist. The worklist 1021 * routine will remove us from our current 1022 * position and then add us back in at a later 1023 * position. 1024 */ 1025 vn_syncer_add_to_worklist(vp, syncdelay); 1026 } 1027 splx(s); 1028 } 1029 1030 /* 1031 * Do soft update processing. 1032 */ 1033 if (bioops.io_sync) 1034 (*bioops.io_sync)(NULL); 1035 1036 /* 1037 * The variable rushjob allows the kernel to speed up the 1038 * processing of the filesystem syncer process. A rushjob 1039 * value of N tells the filesystem syncer to process the next 1040 * N seconds worth of work on its queue ASAP. Currently rushjob 1041 * is used by the soft update code to speed up the filesystem 1042 * syncer process when the incore state is getting so far 1043 * ahead of the disk that the kernel memory pool is being 1044 * threatened with exhaustion. 1045 */ 1046 if (rushjob > 0) { 1047 rushjob -= 1; 1048 continue; 1049 } 1050 /* 1051 * If it has taken us less than a second to process the 1052 * current work, then wait. Otherwise start right over 1053 * again. We can still lose time if any single round 1054 * takes more than two seconds, but it does not really 1055 * matter as we are just trying to generally pace the 1056 * filesystem activity. 1057 */ 1058 if (time_second == starttime) 1059 tsleep(&lbolt, PPAUSE, "syncer", 0); 1060 } 1061} 1062 1063/* 1064 * Request the syncer daemon to speed up its work. 1065 * We never push it to speed up more than half of its 1066 * normal turn time, otherwise it could take over the cpu. 1067 */ 1068int 1069speedup_syncer() 1070{ 1071 int s; 1072 1073 s = splhigh(); 1074 if (updateproc->p_wchan == &lbolt) 1075 setrunnable(updateproc); 1076 splx(s); 1077 if (rushjob < syncdelay / 2) { 1078 rushjob += 1; 1079 stat_rush_requests += 1; 1080 return (1); 1081 } 1082 return(0); 1083} 1084 1085/* 1086 * Associate a p-buffer with a vnode. 1087 * 1088 * Also sets B_PAGING flag to indicate that vnode is not fully associated 1089 * with the buffer. i.e. the bp has not been linked into the vnode or 1090 * ref-counted. 1091 */ 1092void 1093pbgetvp(vp, bp) 1094 register struct vnode *vp; 1095 register struct buf *bp; 1096{ 1097 1098 KASSERT(bp->b_vp == NULL, ("pbgetvp: not free")); 1099 1100 bp->b_vp = vp; 1101 bp->b_flags |= B_PAGING; 1102 bp->b_dev = vn_todev(vp); 1103} 1104 1105/* 1106 * Disassociate a p-buffer from a vnode. 1107 */ 1108void 1109pbrelvp(bp) 1110 register struct buf *bp; 1111{ 1112 1113 KASSERT(bp->b_vp != NULL, ("pbrelvp: NULL")); 1114 1115#if !defined(MAX_PERF) 1116 /* XXX REMOVE ME */ 1117 if (bp->b_vnbufs.tqe_next != NULL) { 1118 panic( 1119 "relpbuf(): b_vp was probably reassignbuf()d %p %x", 1120 bp, 1121 (int)bp->b_flags 1122 ); 1123 } 1124#endif 1125 bp->b_vp = (struct vnode *) 0; 1126 bp->b_flags &= ~B_PAGING; 1127} 1128 1129void 1130pbreassignbuf(bp, newvp) 1131 struct buf *bp; 1132 struct vnode *newvp; 1133{ 1134#if !defined(MAX_PERF) 1135 if ((bp->b_flags & B_PAGING) == 0) { 1136 panic( 1137 "pbreassignbuf() on non phys bp %p", 1138 bp 1139 ); 1140 } 1141#endif 1142 bp->b_vp = newvp; 1143} 1144 1145/* 1146 * Reassign a buffer from one vnode to another. 1147 * Used to assign file specific control information 1148 * (indirect blocks) to the vnode to which they belong. 1149 */ 1150void 1151reassignbuf(bp, newvp) 1152 register struct buf *bp; 1153 register struct vnode *newvp; 1154{ 1155 struct buflists *listheadp; 1156 int delay; 1157 int s; 1158 1159 if (newvp == NULL) { 1160 printf("reassignbuf: NULL"); 1161 return; 1162 } 1163 ++reassignbufcalls; 1164 1165#if !defined(MAX_PERF) 1166 /* 1167 * B_PAGING flagged buffers cannot be reassigned because their vp 1168 * is not fully linked in. 1169 */ 1170 if (bp->b_flags & B_PAGING) 1171 panic("cannot reassign paging buffer"); 1172#endif 1173 1174 s = splbio(); 1175 /* 1176 * Delete from old vnode list, if on one. 1177 */ 1178 if (bp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN)) { 1179 if (bp->b_xflags & BX_VNDIRTY) 1180 listheadp = &bp->b_vp->v_dirtyblkhd; 1181 else 1182 listheadp = &bp->b_vp->v_cleanblkhd; 1183 TAILQ_REMOVE(listheadp, bp, b_vnbufs); 1184 bp->b_xflags &= ~(BX_VNDIRTY | BX_VNCLEAN); 1185 if (bp->b_vp != newvp) { 1186 vdrop(bp->b_vp); 1187 bp->b_vp = NULL; /* for clarification */ 1188 } 1189 } 1190 /* 1191 * If dirty, put on list of dirty buffers; otherwise insert onto list 1192 * of clean buffers. 1193 */ 1194 if (bp->b_flags & B_DELWRI) { 1195 struct buf *tbp; 1196 1197 listheadp = &newvp->v_dirtyblkhd; 1198 if ((newvp->v_flag & VONWORKLST) == 0) { 1199 switch (newvp->v_type) { 1200 case VDIR: 1201 delay = dirdelay; 1202 break; 1203 case VCHR: 1204 case VBLK: 1205 if (newvp->v_specmountpoint != NULL) { 1206 delay = metadelay; 1207 break; 1208 } 1209 /* fall through */ 1210 default: 1211 delay = filedelay; 1212 } 1213 vn_syncer_add_to_worklist(newvp, delay); 1214 } 1215 bp->b_xflags |= BX_VNDIRTY; 1216 tbp = TAILQ_FIRST(listheadp); 1217 if (tbp == NULL || 1218 bp->b_lblkno == 0 || 1219 (bp->b_lblkno > 0 && tbp->b_lblkno < 0) || 1220 (bp->b_lblkno > 0 && bp->b_lblkno < tbp->b_lblkno)) { 1221 TAILQ_INSERT_HEAD(listheadp, bp, b_vnbufs); 1222 ++reassignbufsortgood; 1223 } else if (bp->b_lblkno < 0) { 1224 TAILQ_INSERT_TAIL(listheadp, bp, b_vnbufs); 1225 ++reassignbufsortgood; 1226 } else if (reassignbufmethod == 1) { 1227 /* 1228 * New sorting algorithm, only handle sequential case, 1229 * otherwise append to end (but before metadata) 1230 */ 1231 if ((tbp = gbincore(newvp, bp->b_lblkno - 1)) != NULL && 1232 (tbp->b_xflags & BX_VNDIRTY)) { 1233 /* 1234 * Found the best place to insert the buffer 1235 */ 1236 TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs); 1237 ++reassignbufsortgood; 1238 } else { 1239 /* 1240 * Missed, append to end, but before meta-data. 1241 * We know that the head buffer in the list is 1242 * not meta-data due to prior conditionals. 1243 * 1244 * Indirect effects: NFS second stage write 1245 * tends to wind up here, giving maximum 1246 * distance between the unstable write and the 1247 * commit rpc. 1248 */ 1249 tbp = TAILQ_LAST(listheadp, buflists); 1250 while (tbp && tbp->b_lblkno < 0) 1251 tbp = TAILQ_PREV(tbp, buflists, b_vnbufs); 1252 TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs); 1253 ++reassignbufsortbad; 1254 } 1255 } else { 1256 /* 1257 * Old sorting algorithm, scan queue and insert 1258 */ 1259 struct buf *ttbp; 1260 while ((ttbp = TAILQ_NEXT(tbp, b_vnbufs)) && 1261 (ttbp->b_lblkno < bp->b_lblkno)) { 1262 ++reassignbufloops; 1263 tbp = ttbp; 1264 } 1265 TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs); 1266 } 1267 } else { 1268 bp->b_xflags |= BX_VNCLEAN; 1269 TAILQ_INSERT_TAIL(&newvp->v_cleanblkhd, bp, b_vnbufs); 1270 if ((newvp->v_flag & VONWORKLST) && 1271 TAILQ_EMPTY(&newvp->v_dirtyblkhd)) { 1272 newvp->v_flag &= ~VONWORKLST; 1273 LIST_REMOVE(newvp, v_synclist); 1274 } 1275 } 1276 if (bp->b_vp != newvp) { 1277 bp->b_vp = newvp; 1278 vhold(bp->b_vp); 1279 } 1280 splx(s); 1281} 1282 1283/* 1284 * Create a vnode for a block device. 1285 * Used for mounting the root file system. 1286 */ 1287int 1288bdevvp(dev, vpp) 1289 dev_t dev; 1290 struct vnode **vpp; 1291{ 1292 register struct vnode *vp; 1293 struct vnode *nvp; 1294 int error; 1295 1296 if (dev == NODEV) { 1297 *vpp = NULLVP; 1298 return (ENXIO); 1299 } 1300 error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp); 1301 if (error) { 1302 *vpp = NULLVP; 1303 return (error); 1304 } 1305 vp = nvp; 1306 vp->v_type = VBLK; 1307 addalias(vp, dev); 1308 *vpp = vp; 1309 return (0); 1310} 1311 1312/* 1313 * Add vnode to the alias list hung off the dev_t. 1314 * 1315 * The reason for this gunk is that multiple vnodes can reference 1316 * the same physical device, so checking vp->v_usecount to see 1317 * how many users there are is inadequate; the v_usecount for 1318 * the vnodes need to be accumulated. vcount() does that. 1319 */ 1320void 1321addaliasu(nvp, nvp_rdev) 1322 struct vnode *nvp; 1323 udev_t nvp_rdev; 1324{ 1325 1326 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 1327 panic("addaliasu on non-special vnode"); 1328 addalias(nvp, udev2dev(nvp_rdev, nvp->v_type == VBLK ? 1 : 0)); 1329} 1330 1331void 1332addalias(nvp, dev) 1333 struct vnode *nvp; 1334 dev_t dev; 1335{ 1336 1337 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 1338 panic("addalias on non-special vnode"); 1339 1340 nvp->v_rdev = dev; 1341 simple_lock(&spechash_slock); 1342 SLIST_INSERT_HEAD(&dev->si_hlist, nvp, v_specnext); 1343 simple_unlock(&spechash_slock); 1344} 1345 1346/* 1347 * Grab a particular vnode from the free list, increment its 1348 * reference count and lock it. The vnode lock bit is set if the 1349 * vnode is being eliminated in vgone. The process is awakened 1350 * when the transition is completed, and an error returned to 1351 * indicate that the vnode is no longer usable (possibly having 1352 * been changed to a new file system type). 1353 */ 1354int 1355vget(vp, flags, p) 1356 register struct vnode *vp; 1357 int flags; 1358 struct proc *p; 1359{ 1360 int error; 1361 1362 /* 1363 * If the vnode is in the process of being cleaned out for 1364 * another use, we wait for the cleaning to finish and then 1365 * return failure. Cleaning is determined by checking that 1366 * the VXLOCK flag is set. 1367 */ 1368 if ((flags & LK_INTERLOCK) == 0) { 1369 simple_lock(&vp->v_interlock); 1370 } 1371 if (vp->v_flag & VXLOCK) { 1372 vp->v_flag |= VXWANT; 1373 simple_unlock(&vp->v_interlock); 1374 tsleep((caddr_t)vp, PINOD, "vget", 0); 1375 return (ENOENT); 1376 } 1377 1378 vp->v_usecount++; 1379 1380 if (VSHOULDBUSY(vp)) 1381 vbusy(vp); 1382 if (flags & LK_TYPE_MASK) { 1383 if ((error = vn_lock(vp, flags | LK_INTERLOCK, p)) != 0) { 1384 /* 1385 * must expand vrele here because we do not want 1386 * to call VOP_INACTIVE if the reference count 1387 * drops back to zero since it was never really 1388 * active. We must remove it from the free list 1389 * before sleeping so that multiple processes do 1390 * not try to recycle it. 1391 */ 1392 simple_lock(&vp->v_interlock); 1393 vp->v_usecount--; 1394 if (VSHOULDFREE(vp)) 1395 vfree(vp); 1396 simple_unlock(&vp->v_interlock); 1397 } 1398 return (error); 1399 } 1400 simple_unlock(&vp->v_interlock); 1401 return (0); 1402} 1403 1404void 1405vref(struct vnode *vp) 1406{ 1407 simple_lock(&vp->v_interlock); 1408 vp->v_usecount++; 1409 simple_unlock(&vp->v_interlock); 1410} 1411 1412/* 1413 * Vnode put/release. 1414 * If count drops to zero, call inactive routine and return to freelist. 1415 */ 1416void 1417vrele(vp) 1418 struct vnode *vp; 1419{ 1420 struct proc *p = curproc; /* XXX */ 1421 1422 KASSERT(vp != NULL, ("vrele: null vp")); 1423 1424 simple_lock(&vp->v_interlock); 1425 1426 if (vp->v_usecount > 1) { 1427 1428 vp->v_usecount--; 1429 simple_unlock(&vp->v_interlock); 1430 1431 return; 1432 } 1433 1434 if (vp->v_usecount == 1) { 1435 1436 vp->v_usecount--; 1437 if (VSHOULDFREE(vp)) 1438 vfree(vp); 1439 /* 1440 * If we are doing a vput, the node is already locked, and we must 1441 * call VOP_INACTIVE with the node locked. So, in the case of 1442 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE. 1443 */ 1444 if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) { 1445 VOP_INACTIVE(vp, p); 1446 } 1447 1448 } else { 1449#ifdef DIAGNOSTIC 1450 vprint("vrele: negative ref count", vp); 1451 simple_unlock(&vp->v_interlock); 1452#endif 1453 panic("vrele: negative ref cnt"); 1454 } 1455} 1456 1457void 1458vput(vp) 1459 struct vnode *vp; 1460{ 1461 struct proc *p = curproc; /* XXX */ 1462 1463 KASSERT(vp != NULL, ("vput: null vp")); 1464 1465 simple_lock(&vp->v_interlock); 1466 1467 if (vp->v_usecount > 1) { 1468 1469 vp->v_usecount--; 1470 VOP_UNLOCK(vp, LK_INTERLOCK, p); 1471 return; 1472 1473 } 1474 1475 if (vp->v_usecount == 1) { 1476 1477 vp->v_usecount--; 1478 if (VSHOULDFREE(vp)) 1479 vfree(vp); 1480 /* 1481 * If we are doing a vput, the node is already locked, and we must 1482 * call VOP_INACTIVE with the node locked. So, in the case of 1483 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE. 1484 */ 1485 simple_unlock(&vp->v_interlock); 1486 VOP_INACTIVE(vp, p); 1487 1488 } else { 1489#ifdef DIAGNOSTIC 1490 vprint("vput: negative ref count", vp); 1491#endif 1492 panic("vput: negative ref cnt"); 1493 } 1494} 1495 1496/* 1497 * Somebody doesn't want the vnode recycled. 1498 */ 1499void 1500vhold(vp) 1501 register struct vnode *vp; 1502{ 1503 int s; 1504 1505 s = splbio(); 1506 vp->v_holdcnt++; 1507 if (VSHOULDBUSY(vp)) 1508 vbusy(vp); 1509 splx(s); 1510} 1511 1512/* 1513 * One less who cares about this vnode. 1514 */ 1515void 1516vdrop(vp) 1517 register struct vnode *vp; 1518{ 1519 int s; 1520 1521 s = splbio(); 1522 if (vp->v_holdcnt <= 0) 1523 panic("vdrop: holdcnt"); 1524 vp->v_holdcnt--; 1525 if (VSHOULDFREE(vp)) 1526 vfree(vp); 1527 splx(s); 1528} 1529 1530/* 1531 * Remove any vnodes in the vnode table belonging to mount point mp. 1532 * 1533 * If MNT_NOFORCE is specified, there should not be any active ones, 1534 * return error if any are found (nb: this is a user error, not a 1535 * system error). If MNT_FORCE is specified, detach any active vnodes 1536 * that are found. 1537 */ 1538#ifdef DIAGNOSTIC 1539static int busyprt = 0; /* print out busy vnodes */ 1540SYSCTL_INT(_debug, OID_AUTO, busyprt, CTLFLAG_RW, &busyprt, 0, ""); 1541#endif 1542 1543int 1544vflush(mp, skipvp, flags) 1545 struct mount *mp; 1546 struct vnode *skipvp; 1547 int flags; 1548{ 1549 struct proc *p = curproc; /* XXX */ 1550 struct vnode *vp, *nvp; 1551 int busy = 0; 1552 1553 simple_lock(&mntvnode_slock); 1554loop: 1555 for (vp = LIST_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) { 1556 /* 1557 * Make sure this vnode wasn't reclaimed in getnewvnode(). 1558 * Start over if it has (it won't be on the list anymore). 1559 */ 1560 if (vp->v_mount != mp) 1561 goto loop; 1562 nvp = LIST_NEXT(vp, v_mntvnodes); 1563 /* 1564 * Skip over a selected vnode. 1565 */ 1566 if (vp == skipvp) 1567 continue; 1568 1569 simple_lock(&vp->v_interlock); 1570 /* 1571 * Skip over a vnodes marked VSYSTEM. 1572 */ 1573 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { 1574 simple_unlock(&vp->v_interlock); 1575 continue; 1576 } 1577 /* 1578 * If WRITECLOSE is set, only flush out regular file vnodes 1579 * open for writing. 1580 */ 1581 if ((flags & WRITECLOSE) && 1582 (vp->v_writecount == 0 || vp->v_type != VREG)) { 1583 simple_unlock(&vp->v_interlock); 1584 continue; 1585 } 1586 1587 /* 1588 * With v_usecount == 0, all we need to do is clear out the 1589 * vnode data structures and we are done. 1590 */ 1591 if (vp->v_usecount == 0) { 1592 simple_unlock(&mntvnode_slock); 1593 vgonel(vp, p); 1594 simple_lock(&mntvnode_slock); 1595 continue; 1596 } 1597 1598 /* 1599 * If FORCECLOSE is set, forcibly close the vnode. For block 1600 * or character devices, revert to an anonymous device. For 1601 * all other files, just kill them. 1602 */ 1603 if (flags & FORCECLOSE) { 1604 simple_unlock(&mntvnode_slock); 1605 if (vp->v_type != VBLK && vp->v_type != VCHR) { 1606 vgonel(vp, p); 1607 } else { 1608 vclean(vp, 0, p); 1609 vp->v_op = spec_vnodeop_p; 1610 insmntque(vp, (struct mount *) 0); 1611 } 1612 simple_lock(&mntvnode_slock); 1613 continue; 1614 } 1615#ifdef DIAGNOSTIC 1616 if (busyprt) 1617 vprint("vflush: busy vnode", vp); 1618#endif 1619 simple_unlock(&vp->v_interlock); 1620 busy++; 1621 } 1622 simple_unlock(&mntvnode_slock); 1623 if (busy) 1624 return (EBUSY); 1625 return (0); 1626} 1627 1628/* 1629 * Disassociate the underlying file system from a vnode. 1630 */ 1631static void 1632vclean(vp, flags, p) 1633 struct vnode *vp; 1634 int flags; 1635 struct proc *p; 1636{ 1637 int active; 1638 vm_object_t obj; 1639 1640 /* 1641 * Check to see if the vnode is in use. If so we have to reference it 1642 * before we clean it out so that its count cannot fall to zero and 1643 * generate a race against ourselves to recycle it. 1644 */ 1645 if ((active = vp->v_usecount)) 1646 vp->v_usecount++; 1647 1648 /* 1649 * Prevent the vnode from being recycled or brought into use while we 1650 * clean it out. 1651 */ 1652 if (vp->v_flag & VXLOCK) 1653 panic("vclean: deadlock"); 1654 vp->v_flag |= VXLOCK; 1655 /* 1656 * Even if the count is zero, the VOP_INACTIVE routine may still 1657 * have the object locked while it cleans it out. The VOP_LOCK 1658 * ensures that the VOP_INACTIVE routine is done with its work. 1659 * For active vnodes, it ensures that no other activity can 1660 * occur while the underlying object is being cleaned out. 1661 */ 1662 VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p); 1663 1664 /* 1665 * Clean out any buffers associated with the vnode. 1666 */ 1667 vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0); 1668 if ((obj = vp->v_object) != NULL) { 1669 if (obj->ref_count == 0) { 1670 /* 1671 * vclean() may be called twice. The first time removes the 1672 * primary reference to the object, the second time goes 1673 * one further and is a special-case to terminate the object. 1674 */ 1675 vm_object_terminate(obj); 1676 } else { 1677 /* 1678 * Woe to the process that tries to page now :-). 1679 */ 1680 vm_pager_deallocate(obj); 1681 } 1682 } 1683 1684 /* 1685 * If purging an active vnode, it must be closed and 1686 * deactivated before being reclaimed. Note that the 1687 * VOP_INACTIVE will unlock the vnode. 1688 */ 1689 if (active) { 1690 if (flags & DOCLOSE) 1691 VOP_CLOSE(vp, FNONBLOCK, NOCRED, p); 1692 VOP_INACTIVE(vp, p); 1693 } else { 1694 /* 1695 * Any other processes trying to obtain this lock must first 1696 * wait for VXLOCK to clear, then call the new lock operation. 1697 */ 1698 VOP_UNLOCK(vp, 0, p); 1699 } 1700 /* 1701 * Reclaim the vnode. 1702 */ 1703 if (VOP_RECLAIM(vp, p)) 1704 panic("vclean: cannot reclaim"); 1705 1706 if (active) 1707 vrele(vp); 1708 1709 cache_purge(vp); 1710 if (vp->v_vnlock) { 1711 FREE(vp->v_vnlock, M_VNODE); 1712 vp->v_vnlock = NULL; 1713 } 1714 1715 if (VSHOULDFREE(vp)) 1716 vfree(vp); 1717 1718 /* 1719 * Done with purge, notify sleepers of the grim news. 1720 */ 1721 vp->v_op = dead_vnodeop_p; 1722 vn_pollgone(vp); 1723 vp->v_tag = VT_NON; 1724 vp->v_flag &= ~VXLOCK; 1725 if (vp->v_flag & VXWANT) { 1726 vp->v_flag &= ~VXWANT; 1727 wakeup((caddr_t) vp); 1728 } 1729} 1730 1731/* 1732 * Eliminate all activity associated with the requested vnode 1733 * and with all vnodes aliased to the requested vnode. 1734 */ 1735int 1736vop_revoke(ap) 1737 struct vop_revoke_args /* { 1738 struct vnode *a_vp; 1739 int a_flags; 1740 } */ *ap; 1741{ 1742 struct vnode *vp, *vq; 1743 dev_t dev; 1744 1745 KASSERT((ap->a_flags & REVOKEALL) != 0, ("vop_revoke")); 1746 1747 vp = ap->a_vp; 1748 /* 1749 * If a vgone (or vclean) is already in progress, 1750 * wait until it is done and return. 1751 */ 1752 if (vp->v_flag & VXLOCK) { 1753 vp->v_flag |= VXWANT; 1754 simple_unlock(&vp->v_interlock); 1755 tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0); 1756 return (0); 1757 } 1758 dev = vp->v_rdev; 1759 for (;;) { 1760 simple_lock(&spechash_slock); 1761 vq = SLIST_FIRST(&dev->si_hlist); 1762 simple_unlock(&spechash_slock); 1763 if (!vq) 1764 break; 1765 vgone(vq); 1766 } 1767 return (0); 1768} 1769 1770/* 1771 * Recycle an unused vnode to the front of the free list. 1772 * Release the passed interlock if the vnode will be recycled. 1773 */ 1774int 1775vrecycle(vp, inter_lkp, p) 1776 struct vnode *vp; 1777 struct simplelock *inter_lkp; 1778 struct proc *p; 1779{ 1780 1781 simple_lock(&vp->v_interlock); 1782 if (vp->v_usecount == 0) { 1783 if (inter_lkp) { 1784 simple_unlock(inter_lkp); 1785 } 1786 vgonel(vp, p); 1787 return (1); 1788 } 1789 simple_unlock(&vp->v_interlock); 1790 return (0); 1791} 1792 1793/* 1794 * Eliminate all activity associated with a vnode 1795 * in preparation for reuse. 1796 */ 1797void 1798vgone(vp) 1799 register struct vnode *vp; 1800{ 1801 struct proc *p = curproc; /* XXX */ 1802 1803 simple_lock(&vp->v_interlock); 1804 vgonel(vp, p); 1805} 1806 1807/* 1808 * vgone, with the vp interlock held. 1809 */ 1810static void 1811vgonel(vp, p) 1812 struct vnode *vp; 1813 struct proc *p; 1814{ 1815 int s; 1816 1817 /* 1818 * If a vgone (or vclean) is already in progress, 1819 * wait until it is done and return. 1820 */ 1821 if (vp->v_flag & VXLOCK) { 1822 vp->v_flag |= VXWANT; 1823 simple_unlock(&vp->v_interlock); 1824 tsleep((caddr_t)vp, PINOD, "vgone", 0); 1825 return; 1826 } 1827 1828 /* 1829 * Clean out the filesystem specific data. 1830 */ 1831 vclean(vp, DOCLOSE, p); 1832 simple_lock(&vp->v_interlock); 1833 1834 /* 1835 * Delete from old mount point vnode list, if on one. 1836 */ 1837 if (vp->v_mount != NULL) 1838 insmntque(vp, (struct mount *)0); 1839 /* 1840 * If special device, remove it from special device alias list 1841 * if it is on one. 1842 */ 1843 if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_rdev != NULL) { 1844 simple_lock(&spechash_slock); 1845 SLIST_REMOVE(&vp->v_hashchain, vp, vnode, v_specnext); 1846 freedev(vp->v_rdev); 1847 simple_unlock(&spechash_slock); 1848 vp->v_rdev = NULL; 1849 } 1850 1851 /* 1852 * If it is on the freelist and not already at the head, 1853 * move it to the head of the list. The test of the back 1854 * pointer and the reference count of zero is because 1855 * it will be removed from the free list by getnewvnode, 1856 * but will not have its reference count incremented until 1857 * after calling vgone. If the reference count were 1858 * incremented first, vgone would (incorrectly) try to 1859 * close the previous instance of the underlying object. 1860 */ 1861 if (vp->v_usecount == 0 && !(vp->v_flag & VDOOMED)) { 1862 s = splbio(); 1863 simple_lock(&vnode_free_list_slock); 1864 if (vp->v_flag & VFREE) { 1865 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1866 } else if (vp->v_flag & VTBFREE) { 1867 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 1868 vp->v_flag &= ~VTBFREE; 1869 freevnodes++; 1870 } else 1871 freevnodes++; 1872 vp->v_flag |= VFREE; 1873 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 1874 simple_unlock(&vnode_free_list_slock); 1875 splx(s); 1876 } 1877 1878 vp->v_type = VBAD; 1879 simple_unlock(&vp->v_interlock); 1880} 1881 1882/* 1883 * Lookup a vnode by device number. 1884 */ 1885int 1886vfinddev(dev, type, vpp) 1887 dev_t dev; 1888 enum vtype type; 1889 struct vnode **vpp; 1890{ 1891 struct vnode *vp; 1892 1893 simple_lock(&spechash_slock); 1894 SLIST_FOREACH(vp, &dev->si_hlist, v_specnext) { 1895 if (type == vp->v_type) { 1896 *vpp = vp; 1897 simple_unlock(&spechash_slock); 1898 return (1); 1899 } 1900 } 1901 simple_unlock(&spechash_slock); 1902 return (0); 1903} 1904 1905/* 1906 * Calculate the total number of references to a special device. 1907 */ 1908int 1909vcount(vp) 1910 struct vnode *vp; 1911{ 1912 struct vnode *vq; 1913 int count; 1914 1915 count = 0; 1916 simple_lock(&spechash_slock); 1917 SLIST_FOREACH(vq, &vp->v_hashchain, v_specnext) 1918 count += vq->v_usecount; 1919 simple_unlock(&spechash_slock); 1920 return (count); 1921} 1922 1923/* 1924 * Print out a description of a vnode. 1925 */ 1926static char *typename[] = 1927{"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"}; 1928 1929void 1930vprint(label, vp) 1931 char *label; 1932 struct vnode *vp; 1933{ 1934 char buf[96]; 1935 1936 if (label != NULL) 1937 printf("%s: %p: ", label, (void *)vp); 1938 else 1939 printf("%p: ", (void *)vp); 1940 printf("type %s, usecount %d, writecount %d, refcount %d,", 1941 typename[vp->v_type], vp->v_usecount, vp->v_writecount, 1942 vp->v_holdcnt); 1943 buf[0] = '\0'; 1944 if (vp->v_flag & VROOT) 1945 strcat(buf, "|VROOT"); 1946 if (vp->v_flag & VTEXT) 1947 strcat(buf, "|VTEXT"); 1948 if (vp->v_flag & VSYSTEM) 1949 strcat(buf, "|VSYSTEM"); 1950 if (vp->v_flag & VXLOCK) 1951 strcat(buf, "|VXLOCK"); 1952 if (vp->v_flag & VXWANT) 1953 strcat(buf, "|VXWANT"); 1954 if (vp->v_flag & VBWAIT) 1955 strcat(buf, "|VBWAIT"); 1956 if (vp->v_flag & VDOOMED) 1957 strcat(buf, "|VDOOMED"); 1958 if (vp->v_flag & VFREE) 1959 strcat(buf, "|VFREE"); 1960 if (vp->v_flag & VOBJBUF) 1961 strcat(buf, "|VOBJBUF"); 1962 if (buf[0] != '\0') 1963 printf(" flags (%s)", &buf[1]); 1964 if (vp->v_data == NULL) { 1965 printf("\n"); 1966 } else { 1967 printf("\n\t"); 1968 VOP_PRINT(vp); 1969 } 1970} 1971 1972#ifdef DDB 1973#include <ddb/ddb.h> 1974/* 1975 * List all of the locked vnodes in the system. 1976 * Called when debugging the kernel. 1977 */ 1978DB_SHOW_COMMAND(lockedvnodes, lockedvnodes) 1979{ 1980 struct proc *p = curproc; /* XXX */ 1981 struct mount *mp, *nmp; 1982 struct vnode *vp; 1983 1984 printf("Locked vnodes\n"); 1985 simple_lock(&mountlist_slock); 1986 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 1987 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { 1988 nmp = TAILQ_NEXT(mp, mnt_list); 1989 continue; 1990 } 1991 LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { 1992 if (VOP_ISLOCKED(vp, NULL)) 1993 vprint((char *)0, vp); 1994 } 1995 simple_lock(&mountlist_slock); 1996 nmp = TAILQ_NEXT(mp, mnt_list); 1997 vfs_unbusy(mp, p); 1998 } 1999 simple_unlock(&mountlist_slock); 2000} 2001#endif 2002 2003/* 2004 * Top level filesystem related information gathering. 2005 */ 2006static int sysctl_ovfs_conf __P(SYSCTL_HANDLER_ARGS); 2007 2008static int 2009vfs_sysctl SYSCTL_HANDLER_ARGS 2010{ 2011 int *name = (int *)arg1 - 1; /* XXX */ 2012 u_int namelen = arg2 + 1; /* XXX */ 2013 struct vfsconf *vfsp; 2014 2015#if 1 || defined(COMPAT_PRELITE2) 2016 /* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. */ 2017 if (namelen == 1) 2018 return (sysctl_ovfs_conf(oidp, arg1, arg2, req)); 2019#endif 2020 2021#ifdef notyet 2022 /* all sysctl names at this level are at least name and field */ 2023 if (namelen < 2) 2024 return (ENOTDIR); /* overloaded */ 2025 if (name[0] != VFS_GENERIC) { 2026 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 2027 if (vfsp->vfc_typenum == name[0]) 2028 break; 2029 if (vfsp == NULL) 2030 return (EOPNOTSUPP); 2031 return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1, 2032 oldp, oldlenp, newp, newlen, p)); 2033 } 2034#endif 2035 switch (name[1]) { 2036 case VFS_MAXTYPENUM: 2037 if (namelen != 2) 2038 return (ENOTDIR); 2039 return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int))); 2040 case VFS_CONF: 2041 if (namelen != 3) 2042 return (ENOTDIR); /* overloaded */ 2043 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 2044 if (vfsp->vfc_typenum == name[2]) 2045 break; 2046 if (vfsp == NULL) 2047 return (EOPNOTSUPP); 2048 return (SYSCTL_OUT(req, vfsp, sizeof *vfsp)); 2049 } 2050 return (EOPNOTSUPP); 2051} 2052 2053SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD, vfs_sysctl, 2054 "Generic filesystem"); 2055 2056#if 1 || defined(COMPAT_PRELITE2) 2057 2058static int 2059sysctl_ovfs_conf SYSCTL_HANDLER_ARGS 2060{ 2061 int error; 2062 struct vfsconf *vfsp; 2063 struct ovfsconf ovfs; 2064 2065 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 2066 ovfs.vfc_vfsops = vfsp->vfc_vfsops; /* XXX used as flag */ 2067 strcpy(ovfs.vfc_name, vfsp->vfc_name); 2068 ovfs.vfc_index = vfsp->vfc_typenum; 2069 ovfs.vfc_refcount = vfsp->vfc_refcount; 2070 ovfs.vfc_flags = vfsp->vfc_flags; 2071 error = SYSCTL_OUT(req, &ovfs, sizeof ovfs); 2072 if (error) 2073 return error; 2074 } 2075 return 0; 2076} 2077 2078#endif /* 1 || COMPAT_PRELITE2 */ 2079 2080#if 0 2081#define KINFO_VNODESLOP 10 2082/* 2083 * Dump vnode list (via sysctl). 2084 * Copyout address of vnode followed by vnode. 2085 */ 2086/* ARGSUSED */ 2087static int 2088sysctl_vnode SYSCTL_HANDLER_ARGS 2089{ 2090 struct proc *p = curproc; /* XXX */ 2091 struct mount *mp, *nmp; 2092 struct vnode *nvp, *vp; 2093 int error; 2094 2095#define VPTRSZ sizeof (struct vnode *) 2096#define VNODESZ sizeof (struct vnode) 2097 2098 req->lock = 0; 2099 if (!req->oldptr) /* Make an estimate */ 2100 return (SYSCTL_OUT(req, 0, 2101 (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ))); 2102 2103 simple_lock(&mountlist_slock); 2104 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 2105 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { 2106 nmp = TAILQ_NEXT(mp, mnt_list); 2107 continue; 2108 } 2109again: 2110 simple_lock(&mntvnode_slock); 2111 for (vp = LIST_FIRST(&mp->mnt_vnodelist); 2112 vp != NULL; 2113 vp = nvp) { 2114 /* 2115 * Check that the vp is still associated with 2116 * this filesystem. RACE: could have been 2117 * recycled onto the same filesystem. 2118 */ 2119 if (vp->v_mount != mp) { 2120 simple_unlock(&mntvnode_slock); 2121 goto again; 2122 } 2123 nvp = LIST_NEXT(vp, v_mntvnodes); 2124 simple_unlock(&mntvnode_slock); 2125 if ((error = SYSCTL_OUT(req, &vp, VPTRSZ)) || 2126 (error = SYSCTL_OUT(req, vp, VNODESZ))) 2127 return (error); 2128 simple_lock(&mntvnode_slock); 2129 } 2130 simple_unlock(&mntvnode_slock); 2131 simple_lock(&mountlist_slock); 2132 nmp = TAILQ_NEXT(mp, mnt_list); 2133 vfs_unbusy(mp, p); 2134 } 2135 simple_unlock(&mountlist_slock); 2136 2137 return (0); 2138} 2139#endif 2140 2141/* 2142 * XXX 2143 * Exporting the vnode list on large systems causes them to crash. 2144 * Exporting the vnode list on medium systems causes sysctl to coredump. 2145 */ 2146#if 0 2147SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE|CTLFLAG_RD, 2148 0, 0, sysctl_vnode, "S,vnode", ""); 2149#endif 2150 2151/* 2152 * Check to see if a filesystem is mounted on a block device. 2153 */ 2154int 2155vfs_mountedon(vp) 2156 struct vnode *vp; 2157{ 2158 2159 if (vp->v_specmountpoint != NULL) 2160 return (EBUSY); 2161 return (0); 2162} 2163 2164/* 2165 * Unmount all filesystems. The list is traversed in reverse order 2166 * of mounting to avoid dependencies. 2167 */ 2168void 2169vfs_unmountall() 2170{ 2171 struct mount *mp; 2172 struct proc *p; 2173 int error; 2174 2175 if (curproc != NULL) 2176 p = curproc; 2177 else 2178 p = initproc; /* XXX XXX should this be proc0? */ 2179 /* 2180 * Since this only runs when rebooting, it is not interlocked. 2181 */ 2182 while(!TAILQ_EMPTY(&mountlist)) { 2183 mp = TAILQ_LAST(&mountlist, mntlist); 2184 error = dounmount(mp, MNT_FORCE, p); 2185 if (error) { 2186 TAILQ_REMOVE(&mountlist, mp, mnt_list); 2187 printf("unmount of %s failed (", 2188 mp->mnt_stat.f_mntonname); 2189 if (error == EBUSY) 2190 printf("BUSY)\n"); 2191 else 2192 printf("%d)\n", error); 2193 } else { 2194 /* The unmount has removed mp from the mountlist */ 2195 } 2196 } 2197} 2198 2199/* 2200 * Build hash lists of net addresses and hang them off the mount point. 2201 * Called by ufs_mount() to set up the lists of export addresses. 2202 */ 2203static int 2204vfs_hang_addrlist(mp, nep, argp) 2205 struct mount *mp; 2206 struct netexport *nep; 2207 struct export_args *argp; 2208{ 2209 register struct netcred *np; 2210 register struct radix_node_head *rnh; 2211 register int i; 2212 struct radix_node *rn; 2213 struct sockaddr *saddr, *smask = 0; 2214 struct domain *dom; 2215 int error; 2216 2217 if (argp->ex_addrlen == 0) { 2218 if (mp->mnt_flag & MNT_DEFEXPORTED) 2219 return (EPERM); 2220 np = &nep->ne_defexported; 2221 np->netc_exflags = argp->ex_flags; 2222 np->netc_anon = argp->ex_anon; 2223 np->netc_anon.cr_ref = 1; 2224 mp->mnt_flag |= MNT_DEFEXPORTED; 2225 return (0); 2226 } 2227 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 2228 np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK); 2229 bzero((caddr_t) np, i); 2230 saddr = (struct sockaddr *) (np + 1); 2231 if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen))) 2232 goto out; 2233 if (saddr->sa_len > argp->ex_addrlen) 2234 saddr->sa_len = argp->ex_addrlen; 2235 if (argp->ex_masklen) { 2236 smask = (struct sockaddr *) ((caddr_t) saddr + argp->ex_addrlen); 2237 error = copyin(argp->ex_mask, (caddr_t) smask, argp->ex_masklen); 2238 if (error) 2239 goto out; 2240 if (smask->sa_len > argp->ex_masklen) 2241 smask->sa_len = argp->ex_masklen; 2242 } 2243 i = saddr->sa_family; 2244 if ((rnh = nep->ne_rtable[i]) == 0) { 2245 /* 2246 * Seems silly to initialize every AF when most are not used, 2247 * do so on demand here 2248 */ 2249 for (dom = domains; dom; dom = dom->dom_next) 2250 if (dom->dom_family == i && dom->dom_rtattach) { 2251 dom->dom_rtattach((void **) &nep->ne_rtable[i], 2252 dom->dom_rtoffset); 2253 break; 2254 } 2255 if ((rnh = nep->ne_rtable[i]) == 0) { 2256 error = ENOBUFS; 2257 goto out; 2258 } 2259 } 2260 rn = (*rnh->rnh_addaddr) ((caddr_t) saddr, (caddr_t) smask, rnh, 2261 np->netc_rnodes); 2262 if (rn == 0 || np != (struct netcred *) rn) { /* already exists */ 2263 error = EPERM; 2264 goto out; 2265 } 2266 np->netc_exflags = argp->ex_flags; 2267 np->netc_anon = argp->ex_anon; 2268 np->netc_anon.cr_ref = 1; 2269 return (0); 2270out: 2271 free(np, M_NETADDR); 2272 return (error); 2273} 2274 2275/* ARGSUSED */ 2276static int 2277vfs_free_netcred(rn, w) 2278 struct radix_node *rn; 2279 void *w; 2280{ 2281 register struct radix_node_head *rnh = (struct radix_node_head *) w; 2282 2283 (*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh); 2284 free((caddr_t) rn, M_NETADDR); 2285 return (0); 2286} 2287 2288/* 2289 * Free the net address hash lists that are hanging off the mount points. 2290 */ 2291static void 2292vfs_free_addrlist(nep) 2293 struct netexport *nep; 2294{ 2295 register int i; 2296 register struct radix_node_head *rnh; 2297 2298 for (i = 0; i <= AF_MAX; i++) 2299 if ((rnh = nep->ne_rtable[i])) { 2300 (*rnh->rnh_walktree) (rnh, vfs_free_netcred, 2301 (caddr_t) rnh); 2302 free((caddr_t) rnh, M_RTABLE); 2303 nep->ne_rtable[i] = 0; 2304 } 2305} 2306 2307int 2308vfs_export(mp, nep, argp) 2309 struct mount *mp; 2310 struct netexport *nep; 2311 struct export_args *argp; 2312{ 2313 int error; 2314 2315 if (argp->ex_flags & MNT_DELEXPORT) { 2316 if (mp->mnt_flag & MNT_EXPUBLIC) { 2317 vfs_setpublicfs(NULL, NULL, NULL); 2318 mp->mnt_flag &= ~MNT_EXPUBLIC; 2319 } 2320 vfs_free_addrlist(nep); 2321 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 2322 } 2323 if (argp->ex_flags & MNT_EXPORTED) { 2324 if (argp->ex_flags & MNT_EXPUBLIC) { 2325 if ((error = vfs_setpublicfs(mp, nep, argp)) != 0) 2326 return (error); 2327 mp->mnt_flag |= MNT_EXPUBLIC; 2328 } 2329 if ((error = vfs_hang_addrlist(mp, nep, argp))) 2330 return (error); 2331 mp->mnt_flag |= MNT_EXPORTED; 2332 } 2333 return (0); 2334} 2335 2336 2337/* 2338 * Set the publicly exported filesystem (WebNFS). Currently, only 2339 * one public filesystem is possible in the spec (RFC 2054 and 2055) 2340 */ 2341int 2342vfs_setpublicfs(mp, nep, argp) 2343 struct mount *mp; 2344 struct netexport *nep; 2345 struct export_args *argp; 2346{ 2347 int error; 2348 struct vnode *rvp; 2349 char *cp; 2350 2351 /* 2352 * mp == NULL -> invalidate the current info, the FS is 2353 * no longer exported. May be called from either vfs_export 2354 * or unmount, so check if it hasn't already been done. 2355 */ 2356 if (mp == NULL) { 2357 if (nfs_pub.np_valid) { 2358 nfs_pub.np_valid = 0; 2359 if (nfs_pub.np_index != NULL) { 2360 FREE(nfs_pub.np_index, M_TEMP); 2361 nfs_pub.np_index = NULL; 2362 } 2363 } 2364 return (0); 2365 } 2366 2367 /* 2368 * Only one allowed at a time. 2369 */ 2370 if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount) 2371 return (EBUSY); 2372 2373 /* 2374 * Get real filehandle for root of exported FS. 2375 */ 2376 bzero((caddr_t)&nfs_pub.np_handle, sizeof(nfs_pub.np_handle)); 2377 nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid; 2378 2379 if ((error = VFS_ROOT(mp, &rvp))) 2380 return (error); 2381 2382 if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid))) 2383 return (error); 2384 2385 vput(rvp); 2386 2387 /* 2388 * If an indexfile was specified, pull it in. 2389 */ 2390 if (argp->ex_indexfile != NULL) { 2391 MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP, 2392 M_WAITOK); 2393 error = copyinstr(argp->ex_indexfile, nfs_pub.np_index, 2394 MAXNAMLEN, (size_t *)0); 2395 if (!error) { 2396 /* 2397 * Check for illegal filenames. 2398 */ 2399 for (cp = nfs_pub.np_index; *cp; cp++) { 2400 if (*cp == '/') { 2401 error = EINVAL; 2402 break; 2403 } 2404 } 2405 } 2406 if (error) { 2407 FREE(nfs_pub.np_index, M_TEMP); 2408 return (error); 2409 } 2410 } 2411 2412 nfs_pub.np_mount = mp; 2413 nfs_pub.np_valid = 1; 2414 return (0); 2415} 2416 2417struct netcred * 2418vfs_export_lookup(mp, nep, nam) 2419 register struct mount *mp; 2420 struct netexport *nep; 2421 struct sockaddr *nam; 2422{ 2423 register struct netcred *np; 2424 register struct radix_node_head *rnh; 2425 struct sockaddr *saddr; 2426 2427 np = NULL; 2428 if (mp->mnt_flag & MNT_EXPORTED) { 2429 /* 2430 * Lookup in the export list first. 2431 */ 2432 if (nam != NULL) { 2433 saddr = nam; 2434 rnh = nep->ne_rtable[saddr->sa_family]; 2435 if (rnh != NULL) { 2436 np = (struct netcred *) 2437 (*rnh->rnh_matchaddr)((caddr_t)saddr, 2438 rnh); 2439 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 2440 np = NULL; 2441 } 2442 } 2443 /* 2444 * If no address match, use the default if it exists. 2445 */ 2446 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 2447 np = &nep->ne_defexported; 2448 } 2449 return (np); 2450} 2451 2452/* 2453 * perform msync on all vnodes under a mount point 2454 * the mount point must be locked. 2455 */ 2456void 2457vfs_msync(struct mount *mp, int flags) { 2458 struct vnode *vp, *nvp; 2459 struct vm_object *obj; 2460 int anyio, tries; 2461 2462 tries = 5; 2463loop: 2464 anyio = 0; 2465 for (vp = LIST_FIRST(&mp->mnt_vnodelist); vp != NULL; vp = nvp) { 2466 2467 nvp = LIST_NEXT(vp, v_mntvnodes); 2468 2469 if (vp->v_mount != mp) { 2470 goto loop; 2471 } 2472 2473 if (vp->v_flag & VXLOCK) /* XXX: what if MNT_WAIT? */ 2474 continue; 2475 2476 if (flags != MNT_WAIT) { 2477 obj = vp->v_object; 2478 if (obj == NULL || (obj->flags & OBJ_MIGHTBEDIRTY) == 0) 2479 continue; 2480 if (VOP_ISLOCKED(vp, NULL)) 2481 continue; 2482 } 2483 2484 simple_lock(&vp->v_interlock); 2485 if (vp->v_object && 2486 (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) { 2487 if (!vget(vp, 2488 LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY | LK_NOOBJ, curproc)) { 2489 if (vp->v_object) { 2490 vm_object_page_clean(vp->v_object, 0, 0, flags == MNT_WAIT ? OBJPC_SYNC : OBJPC_NOSYNC); 2491 anyio = 1; 2492 } 2493 vput(vp); 2494 } 2495 } else { 2496 simple_unlock(&vp->v_interlock); 2497 } 2498 } 2499 if (anyio && (--tries > 0)) 2500 goto loop; 2501} 2502 2503/* 2504 * Create the VM object needed for VMIO and mmap support. This 2505 * is done for all VREG files in the system. Some filesystems might 2506 * afford the additional metadata buffering capability of the 2507 * VMIO code by making the device node be VMIO mode also. 2508 * 2509 * vp must be locked when vfs_object_create is called. 2510 */ 2511int 2512vfs_object_create(vp, p, cred) 2513 struct vnode *vp; 2514 struct proc *p; 2515 struct ucred *cred; 2516{ 2517 struct vattr vat; 2518 vm_object_t object; 2519 int error = 0; 2520 2521 if (!vn_isdisk(vp) && vn_canvmio(vp) == FALSE) 2522 return 0; 2523 2524retry: 2525 if ((object = vp->v_object) == NULL) { 2526 if (vp->v_type == VREG || vp->v_type == VDIR) { 2527 if ((error = VOP_GETATTR(vp, &vat, cred, p)) != 0) 2528 goto retn; 2529 object = vnode_pager_alloc(vp, vat.va_size, 0, 0); 2530 } else if (devsw(vp->v_rdev) != NULL) { 2531 /* 2532 * This simply allocates the biggest object possible 2533 * for a disk vnode. This should be fixed, but doesn't 2534 * cause any problems (yet). 2535 */ 2536 object = vnode_pager_alloc(vp, IDX_TO_OFF(INT_MAX), 0, 0); 2537 } else { 2538 goto retn; 2539 } 2540 /* 2541 * Dereference the reference we just created. This assumes 2542 * that the object is associated with the vp. 2543 */ 2544 object->ref_count--; 2545 vp->v_usecount--; 2546 } else { 2547 if (object->flags & OBJ_DEAD) { 2548 VOP_UNLOCK(vp, 0, p); 2549 tsleep(object, PVM, "vodead", 0); 2550 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 2551 goto retry; 2552 } 2553 } 2554 2555 KASSERT(vp->v_object != NULL, ("vfs_object_create: NULL object")); 2556 vp->v_flag |= VOBJBUF; 2557 2558retn: 2559 return error; 2560} 2561 2562static void 2563vfree(vp) 2564 struct vnode *vp; 2565{ 2566 int s; 2567 2568 s = splbio(); 2569 simple_lock(&vnode_free_list_slock); 2570 if (vp->v_flag & VTBFREE) { 2571 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 2572 vp->v_flag &= ~VTBFREE; 2573 } 2574 if (vp->v_flag & VAGE) { 2575 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 2576 } else { 2577 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 2578 } 2579 freevnodes++; 2580 simple_unlock(&vnode_free_list_slock); 2581 vp->v_flag &= ~VAGE; 2582 vp->v_flag |= VFREE; 2583 splx(s); 2584} 2585 2586void 2587vbusy(vp) 2588 struct vnode *vp; 2589{ 2590 int s; 2591 2592 s = splbio(); 2593 simple_lock(&vnode_free_list_slock); 2594 if (vp->v_flag & VTBFREE) { 2595 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 2596 vp->v_flag &= ~VTBFREE; 2597 } else { 2598 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 2599 freevnodes--; 2600 } 2601 simple_unlock(&vnode_free_list_slock); 2602 vp->v_flag &= ~(VFREE|VAGE); 2603 splx(s); 2604} 2605 2606/* 2607 * Record a process's interest in events which might happen to 2608 * a vnode. Because poll uses the historic select-style interface 2609 * internally, this routine serves as both the ``check for any 2610 * pending events'' and the ``record my interest in future events'' 2611 * functions. (These are done together, while the lock is held, 2612 * to avoid race conditions.) 2613 */ 2614int 2615vn_pollrecord(vp, p, events) 2616 struct vnode *vp; 2617 struct proc *p; 2618 short events; 2619{ 2620 simple_lock(&vp->v_pollinfo.vpi_lock); 2621 if (vp->v_pollinfo.vpi_revents & events) { 2622 /* 2623 * This leaves events we are not interested 2624 * in available for the other process which 2625 * which presumably had requested them 2626 * (otherwise they would never have been 2627 * recorded). 2628 */ 2629 events &= vp->v_pollinfo.vpi_revents; 2630 vp->v_pollinfo.vpi_revents &= ~events; 2631 2632 simple_unlock(&vp->v_pollinfo.vpi_lock); 2633 return events; 2634 } 2635 vp->v_pollinfo.vpi_events |= events; 2636 selrecord(p, &vp->v_pollinfo.vpi_selinfo); 2637 simple_unlock(&vp->v_pollinfo.vpi_lock); 2638 return 0; 2639} 2640 2641/* 2642 * Note the occurrence of an event. If the VN_POLLEVENT macro is used, 2643 * it is possible for us to miss an event due to race conditions, but 2644 * that condition is expected to be rare, so for the moment it is the 2645 * preferred interface. 2646 */ 2647void 2648vn_pollevent(vp, events) 2649 struct vnode *vp; 2650 short events; 2651{ 2652 simple_lock(&vp->v_pollinfo.vpi_lock); 2653 if (vp->v_pollinfo.vpi_events & events) { 2654 /* 2655 * We clear vpi_events so that we don't 2656 * call selwakeup() twice if two events are 2657 * posted before the polling process(es) is 2658 * awakened. This also ensures that we take at 2659 * most one selwakeup() if the polling process 2660 * is no longer interested. However, it does 2661 * mean that only one event can be noticed at 2662 * a time. (Perhaps we should only clear those 2663 * event bits which we note?) XXX 2664 */ 2665 vp->v_pollinfo.vpi_events = 0; /* &= ~events ??? */ 2666 vp->v_pollinfo.vpi_revents |= events; 2667 selwakeup(&vp->v_pollinfo.vpi_selinfo); 2668 } 2669 simple_unlock(&vp->v_pollinfo.vpi_lock); 2670} 2671 2672/* 2673 * Wake up anyone polling on vp because it is being revoked. 2674 * This depends on dead_poll() returning POLLHUP for correct 2675 * behavior. 2676 */ 2677void 2678vn_pollgone(vp) 2679 struct vnode *vp; 2680{ 2681 simple_lock(&vp->v_pollinfo.vpi_lock); 2682 if (vp->v_pollinfo.vpi_events) { 2683 vp->v_pollinfo.vpi_events = 0; 2684 selwakeup(&vp->v_pollinfo.vpi_selinfo); 2685 } 2686 simple_unlock(&vp->v_pollinfo.vpi_lock); 2687} 2688 2689 2690 2691/* 2692 * Routine to create and manage a filesystem syncer vnode. 2693 */ 2694#define sync_close ((int (*) __P((struct vop_close_args *)))nullop) 2695static int sync_fsync __P((struct vop_fsync_args *)); 2696static int sync_inactive __P((struct vop_inactive_args *)); 2697static int sync_reclaim __P((struct vop_reclaim_args *)); 2698#define sync_lock ((int (*) __P((struct vop_lock_args *)))vop_nolock) 2699#define sync_unlock ((int (*) __P((struct vop_unlock_args *)))vop_nounlock) 2700static int sync_print __P((struct vop_print_args *)); 2701#define sync_islocked ((int(*) __P((struct vop_islocked_args *)))vop_noislocked) 2702 2703static vop_t **sync_vnodeop_p; 2704static struct vnodeopv_entry_desc sync_vnodeop_entries[] = { 2705 { &vop_default_desc, (vop_t *) vop_eopnotsupp }, 2706 { &vop_close_desc, (vop_t *) sync_close }, /* close */ 2707 { &vop_fsync_desc, (vop_t *) sync_fsync }, /* fsync */ 2708 { &vop_inactive_desc, (vop_t *) sync_inactive }, /* inactive */ 2709 { &vop_reclaim_desc, (vop_t *) sync_reclaim }, /* reclaim */ 2710 { &vop_lock_desc, (vop_t *) sync_lock }, /* lock */ 2711 { &vop_unlock_desc, (vop_t *) sync_unlock }, /* unlock */ 2712 { &vop_print_desc, (vop_t *) sync_print }, /* print */ 2713 { &vop_islocked_desc, (vop_t *) sync_islocked }, /* islocked */ 2714 { NULL, NULL } 2715}; 2716static struct vnodeopv_desc sync_vnodeop_opv_desc = 2717 { &sync_vnodeop_p, sync_vnodeop_entries }; 2718 2719VNODEOP_SET(sync_vnodeop_opv_desc); 2720 2721/* 2722 * Create a new filesystem syncer vnode for the specified mount point. 2723 */ 2724int 2725vfs_allocate_syncvnode(mp) 2726 struct mount *mp; 2727{ 2728 struct vnode *vp; 2729 static long start, incr, next; 2730 int error; 2731 2732 /* Allocate a new vnode */ 2733 if ((error = getnewvnode(VT_VFS, mp, sync_vnodeop_p, &vp)) != 0) { 2734 mp->mnt_syncer = NULL; 2735 return (error); 2736 } 2737 vp->v_type = VNON; 2738 /* 2739 * Place the vnode onto the syncer worklist. We attempt to 2740 * scatter them about on the list so that they will go off 2741 * at evenly distributed times even if all the filesystems 2742 * are mounted at once. 2743 */ 2744 next += incr; 2745 if (next == 0 || next > syncer_maxdelay) { 2746 start /= 2; 2747 incr /= 2; 2748 if (start == 0) { 2749 start = syncer_maxdelay / 2; 2750 incr = syncer_maxdelay; 2751 } 2752 next = start; 2753 } 2754 vn_syncer_add_to_worklist(vp, syncdelay > 0 ? next % syncdelay : 0); 2755 mp->mnt_syncer = vp; 2756 return (0); 2757} 2758 2759/* 2760 * Do a lazy sync of the filesystem. 2761 */ 2762static int 2763sync_fsync(ap) 2764 struct vop_fsync_args /* { 2765 struct vnode *a_vp; 2766 struct ucred *a_cred; 2767 int a_waitfor; 2768 struct proc *a_p; 2769 } */ *ap; 2770{ 2771 struct vnode *syncvp = ap->a_vp; 2772 struct mount *mp = syncvp->v_mount; 2773 struct proc *p = ap->a_p; 2774 int asyncflag; 2775 2776 /* 2777 * We only need to do something if this is a lazy evaluation. 2778 */ 2779 if (ap->a_waitfor != MNT_LAZY) 2780 return (0); 2781 2782 /* 2783 * Move ourselves to the back of the sync list. 2784 */ 2785 vn_syncer_add_to_worklist(syncvp, syncdelay); 2786 2787 /* 2788 * Walk the list of vnodes pushing all that are dirty and 2789 * not already on the sync list. 2790 */ 2791 simple_lock(&mountlist_slock); 2792 if (vfs_busy(mp, LK_EXCLUSIVE | LK_NOWAIT, &mountlist_slock, p) != 0) { 2793 simple_unlock(&mountlist_slock); 2794 return (0); 2795 } 2796 asyncflag = mp->mnt_flag & MNT_ASYNC; 2797 mp->mnt_flag &= ~MNT_ASYNC; 2798 vfs_msync(mp, MNT_NOWAIT); 2799 VFS_SYNC(mp, MNT_LAZY, ap->a_cred, p); 2800 if (asyncflag) 2801 mp->mnt_flag |= MNT_ASYNC; 2802 vfs_unbusy(mp, p); 2803 return (0); 2804} 2805 2806/* 2807 * The syncer vnode is no referenced. 2808 */ 2809static int 2810sync_inactive(ap) 2811 struct vop_inactive_args /* { 2812 struct vnode *a_vp; 2813 struct proc *a_p; 2814 } */ *ap; 2815{ 2816 2817 vgone(ap->a_vp); 2818 return (0); 2819} 2820 2821/* 2822 * The syncer vnode is no longer needed and is being decommissioned. 2823 * 2824 * Modifications to the worklist must be protected at splbio(). 2825 */ 2826static int 2827sync_reclaim(ap) 2828 struct vop_reclaim_args /* { 2829 struct vnode *a_vp; 2830 } */ *ap; 2831{ 2832 struct vnode *vp = ap->a_vp; 2833 int s; 2834 2835 s = splbio(); 2836 vp->v_mount->mnt_syncer = NULL; 2837 if (vp->v_flag & VONWORKLST) { 2838 LIST_REMOVE(vp, v_synclist); 2839 vp->v_flag &= ~VONWORKLST; 2840 } 2841 splx(s); 2842 2843 return (0); 2844} 2845 2846/* 2847 * Print out a syncer vnode. 2848 */ 2849static int 2850sync_print(ap) 2851 struct vop_print_args /* { 2852 struct vnode *a_vp; 2853 } */ *ap; 2854{ 2855 struct vnode *vp = ap->a_vp; 2856 2857 printf("syncer vnode"); 2858 if (vp->v_vnlock != NULL) 2859 lockmgr_printinfo(vp->v_vnlock); 2860 printf("\n"); 2861 return (0); 2862} 2863 2864/* 2865 * extract the dev_t from a VBLK or VCHR 2866 */ 2867dev_t 2868vn_todev(vp) 2869 struct vnode *vp; 2870{ 2871 if (vp->v_type != VBLK && vp->v_type != VCHR) 2872 return (NODEV); 2873 return (vp->v_rdev); 2874} 2875 2876/* 2877 * Check if vnode represents a disk device 2878 */ 2879int 2880vn_isdisk(vp) 2881 struct vnode *vp; 2882{ 2883 if (vp->v_type != VBLK && vp->v_type != VCHR) 2884 return (0); 2885 if (!devsw(vp->v_rdev)) 2886 return (0); 2887 if (!(devsw(vp->v_rdev)->d_flags & D_DISK)) 2888 return (0); 2889 return (1); 2890} 2891 2892void 2893NDFREE(ndp, flags) 2894 struct nameidata *ndp; 2895 const uint flags; 2896{ 2897 if (!(flags & NDF_NO_FREE_PNBUF) && 2898 (ndp->ni_cnd.cn_flags & HASBUF)) { 2899 zfree(namei_zone, ndp->ni_cnd.cn_pnbuf); 2900 ndp->ni_cnd.cn_flags &= ~HASBUF; 2901 } 2902 if (!(flags & NDF_NO_DVP_UNLOCK) && 2903 (ndp->ni_cnd.cn_flags & LOCKPARENT) && 2904 ndp->ni_dvp != ndp->ni_vp) 2905 VOP_UNLOCK(ndp->ni_dvp, 0, ndp->ni_cnd.cn_proc); 2906 if (!(flags & NDF_NO_DVP_RELE) && 2907 (ndp->ni_cnd.cn_flags & (LOCKPARENT|WANTPARENT))) { 2908 vrele(ndp->ni_dvp); 2909 ndp->ni_dvp = NULL; 2910 } 2911 if (!(flags & NDF_NO_VP_UNLOCK) && 2912 (ndp->ni_cnd.cn_flags & LOCKLEAF) && ndp->ni_vp) 2913 VOP_UNLOCK(ndp->ni_vp, 0, ndp->ni_cnd.cn_proc); 2914 if (!(flags & NDF_NO_VP_RELE) && 2915 ndp->ni_vp) { 2916 vrele(ndp->ni_vp); 2917 ndp->ni_vp = NULL; 2918 } 2919 if (!(flags & NDF_NO_STARTDIR_RELE) && 2920 (ndp->ni_cnd.cn_flags & SAVESTART)) { 2921 vrele(ndp->ni_startdir); 2922 ndp->ni_startdir = NULL; 2923 } 2924} 2925