vfs_export.c revision 51345
1/* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95 39 * $FreeBSD: head/sys/kern/vfs_export.c 51345 1999-09-17 06:10:27Z dillon $ 40 */ 41 42/* 43 * External virtual filesystem routines 44 */ 45#include "opt_ddb.h" 46 47#include <sys/param.h> 48#include <sys/systm.h> 49#include <sys/fcntl.h> 50#include <sys/kernel.h> 51#include <sys/proc.h> 52#include <sys/kthread.h> 53#include <sys/malloc.h> 54#include <sys/mount.h> 55#include <sys/socket.h> 56#include <sys/vnode.h> 57#include <sys/stat.h> 58#include <sys/buf.h> 59#include <sys/domain.h> 60#include <sys/dirent.h> 61#include <sys/vmmeter.h> 62#include <sys/conf.h> 63 64#include <machine/limits.h> 65 66#include <vm/vm.h> 67#include <vm/vm_param.h> 68#include <vm/vm_prot.h> 69#include <vm/vm_object.h> 70#include <vm/vm_extern.h> 71#include <vm/pmap.h> 72#include <vm/vm_map.h> 73#include <vm/vm_page.h> 74#include <vm/vm_pager.h> 75#include <vm/vnode_pager.h> 76#include <vm/vm_zone.h> 77#include <sys/sysctl.h> 78 79static MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure"); 80 81static void insmntque __P((struct vnode *vp, struct mount *mp)); 82static void vclean __P((struct vnode *vp, int flags, struct proc *p)); 83static void vfree __P((struct vnode *)); 84static void vgonel __P((struct vnode *vp, struct proc *p)); 85static unsigned long numvnodes; 86SYSCTL_INT(_debug, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, ""); 87 88enum vtype iftovt_tab[16] = { 89 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 90 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 91}; 92int vttoif_tab[9] = { 93 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 94 S_IFSOCK, S_IFIFO, S_IFMT, 95}; 96 97static TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */ 98struct tobefreelist vnode_tobefree_list; /* vnode free list */ 99 100static u_long wantfreevnodes = 25; 101SYSCTL_INT(_debug, OID_AUTO, wantfreevnodes, CTLFLAG_RW, &wantfreevnodes, 0, ""); 102static u_long freevnodes = 0; 103SYSCTL_INT(_debug, OID_AUTO, freevnodes, CTLFLAG_RD, &freevnodes, 0, ""); 104 105static int reassignbufcalls; 106SYSCTL_INT(_vfs, OID_AUTO, reassignbufcalls, CTLFLAG_RW, &reassignbufcalls, 0, ""); 107static int reassignbufloops; 108SYSCTL_INT(_vfs, OID_AUTO, reassignbufloops, CTLFLAG_RW, &reassignbufloops, 0, ""); 109static int reassignbufsortgood; 110SYSCTL_INT(_vfs, OID_AUTO, reassignbufsortgood, CTLFLAG_RW, &reassignbufsortgood, 0, ""); 111static int reassignbufsortbad; 112SYSCTL_INT(_vfs, OID_AUTO, reassignbufsortbad, CTLFLAG_RW, &reassignbufsortbad, 0, ""); 113static int reassignbufmethod = 1; 114SYSCTL_INT(_vfs, OID_AUTO, reassignbufmethod, CTLFLAG_RW, &reassignbufmethod, 0, ""); 115 116int enable_userblk_io = 1; 117SYSCTL_INT(_vfs, OID_AUTO, enable_userblk_io, CTLFLAG_RW, &enable_userblk_io, 0, ""); 118 119#ifdef ENABLE_VFS_IOOPT 120int vfs_ioopt = 0; 121SYSCTL_INT(_vfs, OID_AUTO, ioopt, CTLFLAG_RW, &vfs_ioopt, 0, ""); 122#endif 123 124struct mntlist mountlist; /* mounted filesystem list */ 125struct simplelock mountlist_slock; 126struct simplelock mntvnode_slock; 127int nfs_mount_type = -1; 128#ifndef NULL_SIMPLELOCKS 129static struct simplelock mntid_slock; 130static struct simplelock vnode_free_list_slock; 131static struct simplelock spechash_slock; 132#endif 133struct nfs_public nfs_pub; /* publicly exported FS */ 134static vm_zone_t vnode_zone; 135 136/* 137 * The workitem queue. 138 */ 139#define SYNCER_MAXDELAY 32 140static int syncer_maxdelay = SYNCER_MAXDELAY; /* maximum delay time */ 141time_t syncdelay = 30; /* max time to delay syncing data */ 142time_t filedelay = 30; /* time to delay syncing files */ 143SYSCTL_INT(_kern, OID_AUTO, filedelay, CTLFLAG_RW, &filedelay, 0, ""); 144time_t dirdelay = 29; /* time to delay syncing directories */ 145SYSCTL_INT(_kern, OID_AUTO, dirdelay, CTLFLAG_RW, &dirdelay, 0, ""); 146time_t metadelay = 28; /* time to delay syncing metadata */ 147SYSCTL_INT(_kern, OID_AUTO, metadelay, CTLFLAG_RW, &metadelay, 0, ""); 148static int rushjob; /* number of slots to run ASAP */ 149static int stat_rush_requests; /* number of times I/O speeded up */ 150SYSCTL_INT(_debug, OID_AUTO, rush_requests, CTLFLAG_RW, &stat_rush_requests, 0, ""); 151 152static int syncer_delayno = 0; 153static long syncer_mask; 154LIST_HEAD(synclist, vnode); 155static struct synclist *syncer_workitem_pending; 156 157int desiredvnodes; 158SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, 159 &desiredvnodes, 0, "Maximum number of vnodes"); 160 161static void vfs_free_addrlist __P((struct netexport *nep)); 162static int vfs_free_netcred __P((struct radix_node *rn, void *w)); 163static int vfs_hang_addrlist __P((struct mount *mp, struct netexport *nep, 164 struct export_args *argp)); 165 166/* 167 * Initialize the vnode management data structures. 168 */ 169void 170vntblinit() 171{ 172 173 desiredvnodes = maxproc + cnt.v_page_count / 4; 174 simple_lock_init(&mntvnode_slock); 175 simple_lock_init(&mntid_slock); 176 simple_lock_init(&spechash_slock); 177 TAILQ_INIT(&vnode_free_list); 178 TAILQ_INIT(&vnode_tobefree_list); 179 simple_lock_init(&vnode_free_list_slock); 180 CIRCLEQ_INIT(&mountlist); 181 vnode_zone = zinit("VNODE", sizeof (struct vnode), 0, 0, 5); 182 /* 183 * Initialize the filesystem syncer. 184 */ 185 syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE, 186 &syncer_mask); 187 syncer_maxdelay = syncer_mask + 1; 188} 189 190/* 191 * Mark a mount point as busy. Used to synchronize access and to delay 192 * unmounting. Interlock is not released on failure. 193 */ 194int 195vfs_busy(mp, flags, interlkp, p) 196 struct mount *mp; 197 int flags; 198 struct simplelock *interlkp; 199 struct proc *p; 200{ 201 int lkflags; 202 203 if (mp->mnt_kern_flag & MNTK_UNMOUNT) { 204 if (flags & LK_NOWAIT) 205 return (ENOENT); 206 mp->mnt_kern_flag |= MNTK_MWAIT; 207 if (interlkp) { 208 simple_unlock(interlkp); 209 } 210 /* 211 * Since all busy locks are shared except the exclusive 212 * lock granted when unmounting, the only place that a 213 * wakeup needs to be done is at the release of the 214 * exclusive lock at the end of dounmount. 215 */ 216 tsleep((caddr_t)mp, PVFS, "vfs_busy", 0); 217 if (interlkp) { 218 simple_lock(interlkp); 219 } 220 return (ENOENT); 221 } 222 lkflags = LK_SHARED | LK_NOPAUSE; 223 if (interlkp) 224 lkflags |= LK_INTERLOCK; 225 if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p)) 226 panic("vfs_busy: unexpected lock failure"); 227 return (0); 228} 229 230/* 231 * Free a busy filesystem. 232 */ 233void 234vfs_unbusy(mp, p) 235 struct mount *mp; 236 struct proc *p; 237{ 238 239 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p); 240} 241 242/* 243 * Lookup a filesystem type, and if found allocate and initialize 244 * a mount structure for it. 245 * 246 * Devname is usually updated by mount(8) after booting. 247 */ 248int 249vfs_rootmountalloc(fstypename, devname, mpp) 250 char *fstypename; 251 char *devname; 252 struct mount **mpp; 253{ 254 struct proc *p = curproc; /* XXX */ 255 struct vfsconf *vfsp; 256 struct mount *mp; 257 258 if (fstypename == NULL) 259 return (ENODEV); 260 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 261 if (!strcmp(vfsp->vfc_name, fstypename)) 262 break; 263 if (vfsp == NULL) 264 return (ENODEV); 265 mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); 266 bzero((char *)mp, (u_long)sizeof(struct mount)); 267 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE); 268 (void)vfs_busy(mp, LK_NOWAIT, 0, p); 269 LIST_INIT(&mp->mnt_vnodelist); 270 mp->mnt_vfc = vfsp; 271 mp->mnt_op = vfsp->vfc_vfsops; 272 mp->mnt_flag = MNT_RDONLY; 273 mp->mnt_vnodecovered = NULLVP; 274 vfsp->vfc_refcount++; 275 mp->mnt_stat.f_type = vfsp->vfc_typenum; 276 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 277 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 278 mp->mnt_stat.f_mntonname[0] = '/'; 279 mp->mnt_stat.f_mntonname[1] = 0; 280 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); 281 *mpp = mp; 282 return (0); 283} 284 285/* 286 * Find an appropriate filesystem to use for the root. If a filesystem 287 * has not been preselected, walk through the list of known filesystems 288 * trying those that have mountroot routines, and try them until one 289 * works or we have tried them all. 290 */ 291#ifdef notdef /* XXX JH */ 292int 293lite2_vfs_mountroot() 294{ 295 struct vfsconf *vfsp; 296 extern int (*lite2_mountroot) __P((void)); 297 int error; 298 299 if (lite2_mountroot != NULL) 300 return ((*lite2_mountroot)()); 301 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 302 if (vfsp->vfc_mountroot == NULL) 303 continue; 304 if ((error = (*vfsp->vfc_mountroot)()) == 0) 305 return (0); 306 printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error); 307 } 308 return (ENODEV); 309} 310#endif 311 312/* 313 * Lookup a mount point by filesystem identifier. 314 */ 315struct mount * 316vfs_getvfs(fsid) 317 fsid_t *fsid; 318{ 319 register struct mount *mp; 320 321 simple_lock(&mountlist_slock); 322 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; 323 mp = mp->mnt_list.cqe_next) { 324 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 325 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { 326 simple_unlock(&mountlist_slock); 327 return (mp); 328 } 329 } 330 simple_unlock(&mountlist_slock); 331 return ((struct mount *) 0); 332} 333 334/* 335 * Get a new unique fsid 336 */ 337void 338vfs_getnewfsid(mp) 339 struct mount *mp; 340{ 341 static u_short xxxfs_mntid; 342 343 fsid_t tfsid; 344 int mtype; 345 346 simple_lock(&mntid_slock); 347 mtype = mp->mnt_vfc->vfc_typenum; 348 mp->mnt_stat.f_fsid.val[0] = makeudev(255, mtype); 349 mp->mnt_stat.f_fsid.val[1] = mtype; 350 if (xxxfs_mntid == 0) 351 ++xxxfs_mntid; 352 tfsid.val[0] = makeudev(255, mtype + (xxxfs_mntid << 16)); 353 tfsid.val[1] = mtype; 354 if (mountlist.cqh_first != (void *)&mountlist) { 355 while (vfs_getvfs(&tfsid)) { 356 xxxfs_mntid++; 357 tfsid.val[0] = makeudev(255, 358 mtype + (xxxfs_mntid << 16)); 359 } 360 } 361 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 362 simple_unlock(&mntid_slock); 363} 364 365/* 366 * Knob to control the precision of file timestamps: 367 * 368 * 0 = seconds only; nanoseconds zeroed. 369 * 1 = seconds and nanoseconds, accurate within 1/HZ. 370 * 2 = seconds and nanoseconds, truncated to microseconds. 371 * >=3 = seconds and nanoseconds, maximum precision. 372 */ 373enum { TSP_SEC, TSP_HZ, TSP_USEC, TSP_NSEC }; 374 375static int timestamp_precision = TSP_SEC; 376SYSCTL_INT(_vfs, OID_AUTO, timestamp_precision, CTLFLAG_RW, 377 ×tamp_precision, 0, ""); 378 379/* 380 * Get a current timestamp. 381 */ 382void 383vfs_timestamp(tsp) 384 struct timespec *tsp; 385{ 386 struct timeval tv; 387 388 switch (timestamp_precision) { 389 case TSP_SEC: 390 tsp->tv_sec = time_second; 391 tsp->tv_nsec = 0; 392 break; 393 case TSP_HZ: 394 getnanotime(tsp); 395 break; 396 case TSP_USEC: 397 microtime(&tv); 398 TIMEVAL_TO_TIMESPEC(&tv, tsp); 399 break; 400 case TSP_NSEC: 401 default: 402 nanotime(tsp); 403 break; 404 } 405} 406 407/* 408 * Set vnode attributes to VNOVAL 409 */ 410void 411vattr_null(vap) 412 register struct vattr *vap; 413{ 414 415 vap->va_type = VNON; 416 vap->va_size = VNOVAL; 417 vap->va_bytes = VNOVAL; 418 vap->va_mode = VNOVAL; 419 vap->va_nlink = VNOVAL; 420 vap->va_uid = VNOVAL; 421 vap->va_gid = VNOVAL; 422 vap->va_fsid = VNOVAL; 423 vap->va_fileid = VNOVAL; 424 vap->va_blocksize = VNOVAL; 425 vap->va_rdev = VNOVAL; 426 vap->va_atime.tv_sec = VNOVAL; 427 vap->va_atime.tv_nsec = VNOVAL; 428 vap->va_mtime.tv_sec = VNOVAL; 429 vap->va_mtime.tv_nsec = VNOVAL; 430 vap->va_ctime.tv_sec = VNOVAL; 431 vap->va_ctime.tv_nsec = VNOVAL; 432 vap->va_flags = VNOVAL; 433 vap->va_gen = VNOVAL; 434 vap->va_vaflags = 0; 435} 436 437/* 438 * Routines having to do with the management of the vnode table. 439 */ 440extern vop_t **dead_vnodeop_p; 441 442/* 443 * Return the next vnode from the free list. 444 */ 445int 446getnewvnode(tag, mp, vops, vpp) 447 enum vtagtype tag; 448 struct mount *mp; 449 vop_t **vops; 450 struct vnode **vpp; 451{ 452 int s; 453 struct proc *p = curproc; /* XXX */ 454 struct vnode *vp, *tvp, *nvp; 455 vm_object_t object; 456 TAILQ_HEAD(freelst, vnode) vnode_tmp_list; 457 458 /* 459 * We take the least recently used vnode from the freelist 460 * if we can get it and it has no cached pages, and no 461 * namecache entries are relative to it. 462 * Otherwise we allocate a new vnode 463 */ 464 465 s = splbio(); 466 simple_lock(&vnode_free_list_slock); 467 TAILQ_INIT(&vnode_tmp_list); 468 469 for (vp = TAILQ_FIRST(&vnode_tobefree_list); vp; vp = nvp) { 470 nvp = TAILQ_NEXT(vp, v_freelist); 471 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 472 if (vp->v_flag & VAGE) { 473 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 474 } else { 475 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 476 } 477 vp->v_flag &= ~(VTBFREE|VAGE); 478 vp->v_flag |= VFREE; 479 if (vp->v_usecount) 480 panic("tobe free vnode isn't"); 481 freevnodes++; 482 } 483 484 if (wantfreevnodes && freevnodes < wantfreevnodes) { 485 vp = NULL; 486 } else if (!wantfreevnodes && freevnodes <= desiredvnodes) { 487 /* 488 * XXX: this is only here to be backwards compatible 489 */ 490 vp = NULL; 491 } else { 492 for (vp = TAILQ_FIRST(&vnode_free_list); vp; vp = nvp) { 493 nvp = TAILQ_NEXT(vp, v_freelist); 494 if (!simple_lock_try(&vp->v_interlock)) 495 continue; 496 if (vp->v_usecount) 497 panic("free vnode isn't"); 498 499 object = vp->v_object; 500 if (object && (object->resident_page_count || object->ref_count)) { 501 printf("object inconsistant state: RPC: %d, RC: %d\n", 502 object->resident_page_count, object->ref_count); 503 /* Don't recycle if it's caching some pages */ 504 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 505 TAILQ_INSERT_TAIL(&vnode_tmp_list, vp, v_freelist); 506 continue; 507 } else if (LIST_FIRST(&vp->v_cache_src)) { 508 /* Don't recycle if active in the namecache */ 509 simple_unlock(&vp->v_interlock); 510 continue; 511 } else { 512 break; 513 } 514 } 515 } 516 517 for (tvp = TAILQ_FIRST(&vnode_tmp_list); tvp; tvp = nvp) { 518 nvp = TAILQ_NEXT(tvp, v_freelist); 519 TAILQ_REMOVE(&vnode_tmp_list, tvp, v_freelist); 520 TAILQ_INSERT_TAIL(&vnode_free_list, tvp, v_freelist); 521 simple_unlock(&tvp->v_interlock); 522 } 523 524 if (vp) { 525 vp->v_flag |= VDOOMED; 526 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 527 freevnodes--; 528 simple_unlock(&vnode_free_list_slock); 529 cache_purge(vp); 530 vp->v_lease = NULL; 531 if (vp->v_type != VBAD) { 532 vgonel(vp, p); 533 } else { 534 simple_unlock(&vp->v_interlock); 535 } 536 537#ifdef INVARIANTS 538 { 539 int s; 540 541 if (vp->v_data) 542 panic("cleaned vnode isn't"); 543 s = splbio(); 544 if (vp->v_numoutput) 545 panic("Clean vnode has pending I/O's"); 546 splx(s); 547 } 548#endif 549 vp->v_flag = 0; 550 vp->v_lastr = 0; 551 vp->v_lastw = 0; 552 vp->v_lasta = 0; 553 vp->v_cstart = 0; 554 vp->v_clen = 0; 555 vp->v_socket = 0; 556 vp->v_writecount = 0; /* XXX */ 557 vp->v_maxio = 0; 558 } else { 559 simple_unlock(&vnode_free_list_slock); 560 vp = (struct vnode *) zalloc(vnode_zone); 561 bzero((char *) vp, sizeof *vp); 562 simple_lock_init(&vp->v_interlock); 563 vp->v_dd = vp; 564 cache_purge(vp); 565 LIST_INIT(&vp->v_cache_src); 566 TAILQ_INIT(&vp->v_cache_dst); 567 numvnodes++; 568 } 569 570 TAILQ_INIT(&vp->v_cleanblkhd); 571 TAILQ_INIT(&vp->v_dirtyblkhd); 572 vp->v_type = VNON; 573 vp->v_tag = tag; 574 vp->v_op = vops; 575 insmntque(vp, mp); 576 *vpp = vp; 577 vp->v_usecount = 1; 578 vp->v_data = 0; 579 splx(s); 580 581 vfs_object_create(vp, p, p->p_ucred); 582 return (0); 583} 584 585/* 586 * Move a vnode from one mount queue to another. 587 */ 588static void 589insmntque(vp, mp) 590 register struct vnode *vp; 591 register struct mount *mp; 592{ 593 594 simple_lock(&mntvnode_slock); 595 /* 596 * Delete from old mount point vnode list, if on one. 597 */ 598 if (vp->v_mount != NULL) 599 LIST_REMOVE(vp, v_mntvnodes); 600 /* 601 * Insert into list of vnodes for the new mount point, if available. 602 */ 603 if ((vp->v_mount = mp) == NULL) { 604 simple_unlock(&mntvnode_slock); 605 return; 606 } 607 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 608 simple_unlock(&mntvnode_slock); 609} 610 611/* 612 * Update outstanding I/O count and do wakeup if requested. 613 */ 614void 615vwakeup(bp) 616 register struct buf *bp; 617{ 618 register struct vnode *vp; 619 620 bp->b_flags &= ~B_WRITEINPROG; 621 if ((vp = bp->b_vp)) { 622 vp->v_numoutput--; 623 if (vp->v_numoutput < 0) 624 panic("vwakeup: neg numoutput"); 625 if ((vp->v_numoutput == 0) && (vp->v_flag & VBWAIT)) { 626 vp->v_flag &= ~VBWAIT; 627 wakeup((caddr_t) &vp->v_numoutput); 628 } 629 } 630} 631 632/* 633 * Flush out and invalidate all buffers associated with a vnode. 634 * Called with the underlying object locked. 635 */ 636int 637vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 638 register struct vnode *vp; 639 int flags; 640 struct ucred *cred; 641 struct proc *p; 642 int slpflag, slptimeo; 643{ 644 register struct buf *bp; 645 struct buf *nbp, *blist; 646 int s, error; 647 vm_object_t object; 648 649 if (flags & V_SAVE) { 650 s = splbio(); 651 while (vp->v_numoutput) { 652 vp->v_flag |= VBWAIT; 653 error = tsleep((caddr_t)&vp->v_numoutput, 654 slpflag | (PRIBIO + 1), "vinvlbuf", slptimeo); 655 if (error) { 656 splx(s); 657 return (error); 658 } 659 } 660 if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) { 661 splx(s); 662 if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) != 0) 663 return (error); 664 s = splbio(); 665 if (vp->v_numoutput > 0 || 666 !TAILQ_EMPTY(&vp->v_dirtyblkhd)) 667 panic("vinvalbuf: dirty bufs"); 668 } 669 splx(s); 670 } 671 s = splbio(); 672 for (;;) { 673 blist = TAILQ_FIRST(&vp->v_cleanblkhd); 674 if (!blist) 675 blist = TAILQ_FIRST(&vp->v_dirtyblkhd); 676 if (!blist) 677 break; 678 679 for (bp = blist; bp; bp = nbp) { 680 nbp = TAILQ_NEXT(bp, b_vnbufs); 681 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) { 682 error = BUF_TIMELOCK(bp, 683 LK_EXCLUSIVE | LK_SLEEPFAIL, 684 "vinvalbuf", slpflag, slptimeo); 685 if (error == ENOLCK) 686 break; 687 splx(s); 688 return (error); 689 } 690 /* 691 * XXX Since there are no node locks for NFS, I 692 * believe there is a slight chance that a delayed 693 * write will occur while sleeping just above, so 694 * check for it. Note that vfs_bio_awrite expects 695 * buffers to reside on a queue, while VOP_BWRITE and 696 * brelse do not. 697 */ 698 if (((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) && 699 (flags & V_SAVE)) { 700 701 if (bp->b_vp == vp) { 702 if (bp->b_flags & B_CLUSTEROK) { 703 BUF_UNLOCK(bp); 704 vfs_bio_awrite(bp); 705 } else { 706 bremfree(bp); 707 bp->b_flags |= B_ASYNC; 708 VOP_BWRITE(bp->b_vp, bp); 709 } 710 } else { 711 bremfree(bp); 712 (void) VOP_BWRITE(bp->b_vp, bp); 713 } 714 break; 715 } 716 bremfree(bp); 717 bp->b_flags |= (B_INVAL | B_NOCACHE | B_RELBUF); 718 bp->b_flags &= ~B_ASYNC; 719 brelse(bp); 720 } 721 } 722 723 while (vp->v_numoutput > 0) { 724 vp->v_flag |= VBWAIT; 725 tsleep(&vp->v_numoutput, PVM, "vnvlbv", 0); 726 } 727 728 splx(s); 729 730 /* 731 * Destroy the copy in the VM cache, too. 732 */ 733 simple_lock(&vp->v_interlock); 734 object = vp->v_object; 735 if (object != NULL) { 736 vm_object_page_remove(object, 0, 0, 737 (flags & V_SAVE) ? TRUE : FALSE); 738 } 739 simple_unlock(&vp->v_interlock); 740 741 if (!TAILQ_EMPTY(&vp->v_dirtyblkhd) || !TAILQ_EMPTY(&vp->v_cleanblkhd)) 742 panic("vinvalbuf: flush failed"); 743 return (0); 744} 745 746/* 747 * Truncate a file's buffer and pages to a specified length. This 748 * is in lieu of the old vinvalbuf mechanism, which performed unneeded 749 * sync activity. 750 */ 751int 752vtruncbuf(vp, cred, p, length, blksize) 753 register struct vnode *vp; 754 struct ucred *cred; 755 struct proc *p; 756 off_t length; 757 int blksize; 758{ 759 register struct buf *bp; 760 struct buf *nbp; 761 int s, anyfreed; 762 int trunclbn; 763 764 /* 765 * Round up to the *next* lbn. 766 */ 767 trunclbn = (length + blksize - 1) / blksize; 768 769 s = splbio(); 770restart: 771 anyfreed = 1; 772 for (;anyfreed;) { 773 anyfreed = 0; 774 for (bp = TAILQ_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 775 nbp = TAILQ_NEXT(bp, b_vnbufs); 776 if (bp->b_lblkno >= trunclbn) { 777 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) { 778 BUF_LOCK(bp, LK_EXCLUSIVE|LK_SLEEPFAIL); 779 goto restart; 780 } else { 781 bremfree(bp); 782 bp->b_flags |= (B_INVAL | B_RELBUF); 783 bp->b_flags &= ~B_ASYNC; 784 brelse(bp); 785 anyfreed = 1; 786 } 787 if (nbp && (((nbp->b_xflags & B_VNCLEAN) == 0)|| 788 (nbp->b_vp != vp) || 789 (nbp->b_flags & B_DELWRI))) { 790 goto restart; 791 } 792 } 793 } 794 795 for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 796 nbp = TAILQ_NEXT(bp, b_vnbufs); 797 if (bp->b_lblkno >= trunclbn) { 798 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) { 799 BUF_LOCK(bp, LK_EXCLUSIVE|LK_SLEEPFAIL); 800 goto restart; 801 } else { 802 bremfree(bp); 803 bp->b_flags |= (B_INVAL | B_RELBUF); 804 bp->b_flags &= ~B_ASYNC; 805 brelse(bp); 806 anyfreed = 1; 807 } 808 if (nbp && (((nbp->b_xflags & B_VNDIRTY) == 0)|| 809 (nbp->b_vp != vp) || 810 (nbp->b_flags & B_DELWRI) == 0)) { 811 goto restart; 812 } 813 } 814 } 815 } 816 817 if (length > 0) { 818restartsync: 819 for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 820 nbp = TAILQ_NEXT(bp, b_vnbufs); 821 if ((bp->b_flags & B_DELWRI) && (bp->b_lblkno < 0)) { 822 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) { 823 BUF_LOCK(bp, LK_EXCLUSIVE|LK_SLEEPFAIL); 824 goto restart; 825 } else { 826 bremfree(bp); 827 if (bp->b_vp == vp) { 828 bp->b_flags |= B_ASYNC; 829 } else { 830 bp->b_flags &= ~B_ASYNC; 831 } 832 VOP_BWRITE(bp->b_vp, bp); 833 } 834 goto restartsync; 835 } 836 837 } 838 } 839 840 while (vp->v_numoutput > 0) { 841 vp->v_flag |= VBWAIT; 842 tsleep(&vp->v_numoutput, PVM, "vbtrunc", 0); 843 } 844 845 splx(s); 846 847 vnode_pager_setsize(vp, length); 848 849 return (0); 850} 851 852/* 853 * Associate a buffer with a vnode. 854 */ 855void 856bgetvp(vp, bp) 857 register struct vnode *vp; 858 register struct buf *bp; 859{ 860 int s; 861 862 KASSERT(bp->b_vp == NULL, ("bgetvp: not free")); 863 864 vhold(vp); 865 bp->b_vp = vp; 866 bp->b_dev = vn_todev(vp); 867 /* 868 * Insert onto list for new vnode. 869 */ 870 s = splbio(); 871 bp->b_xflags |= B_VNCLEAN; 872 bp->b_xflags &= ~B_VNDIRTY; 873 TAILQ_INSERT_TAIL(&vp->v_cleanblkhd, bp, b_vnbufs); 874 splx(s); 875} 876 877/* 878 * Disassociate a buffer from a vnode. 879 */ 880void 881brelvp(bp) 882 register struct buf *bp; 883{ 884 struct vnode *vp; 885 struct buflists *listheadp; 886 int s; 887 888 KASSERT(bp->b_vp != NULL, ("brelvp: NULL")); 889 890 /* 891 * Delete from old vnode list, if on one. 892 */ 893 vp = bp->b_vp; 894 s = splbio(); 895 if (bp->b_xflags & (B_VNDIRTY|B_VNCLEAN)) { 896 if (bp->b_xflags & B_VNDIRTY) 897 listheadp = &vp->v_dirtyblkhd; 898 else 899 listheadp = &vp->v_cleanblkhd; 900 TAILQ_REMOVE(listheadp, bp, b_vnbufs); 901 bp->b_xflags &= ~(B_VNDIRTY|B_VNCLEAN); 902 } 903 if ((vp->v_flag & VONWORKLST) && TAILQ_EMPTY(&vp->v_dirtyblkhd)) { 904 vp->v_flag &= ~VONWORKLST; 905 LIST_REMOVE(vp, v_synclist); 906 } 907 splx(s); 908 bp->b_vp = (struct vnode *) 0; 909 vdrop(vp); 910} 911 912/* 913 * The workitem queue. 914 * 915 * It is useful to delay writes of file data and filesystem metadata 916 * for tens of seconds so that quickly created and deleted files need 917 * not waste disk bandwidth being created and removed. To realize this, 918 * we append vnodes to a "workitem" queue. When running with a soft 919 * updates implementation, most pending metadata dependencies should 920 * not wait for more than a few seconds. Thus, mounted on block devices 921 * are delayed only about a half the time that file data is delayed. 922 * Similarly, directory updates are more critical, so are only delayed 923 * about a third the time that file data is delayed. Thus, there are 924 * SYNCER_MAXDELAY queues that are processed round-robin at a rate of 925 * one each second (driven off the filesystem syncer process). The 926 * syncer_delayno variable indicates the next queue that is to be processed. 927 * Items that need to be processed soon are placed in this queue: 928 * 929 * syncer_workitem_pending[syncer_delayno] 930 * 931 * A delay of fifteen seconds is done by placing the request fifteen 932 * entries later in the queue: 933 * 934 * syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask] 935 * 936 */ 937 938/* 939 * Add an item to the syncer work queue. 940 */ 941static void 942vn_syncer_add_to_worklist(struct vnode *vp, int delay) 943{ 944 int s, slot; 945 946 s = splbio(); 947 948 if (vp->v_flag & VONWORKLST) { 949 LIST_REMOVE(vp, v_synclist); 950 } 951 952 if (delay > syncer_maxdelay - 2) 953 delay = syncer_maxdelay - 2; 954 slot = (syncer_delayno + delay) & syncer_mask; 955 956 LIST_INSERT_HEAD(&syncer_workitem_pending[slot], vp, v_synclist); 957 vp->v_flag |= VONWORKLST; 958 splx(s); 959} 960 961struct proc *updateproc; 962static void sched_sync __P((void)); 963static struct kproc_desc up_kp = { 964 "syncer", 965 sched_sync, 966 &updateproc 967}; 968SYSINIT(syncer, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp) 969 970/* 971 * System filesystem synchronizer daemon. 972 */ 973void 974sched_sync(void) 975{ 976 struct synclist *slp; 977 struct vnode *vp; 978 long starttime; 979 int s; 980 struct proc *p = updateproc; 981 982 p->p_flag |= P_BUFEXHAUST; 983 984 for (;;) { 985 starttime = time_second; 986 987 /* 988 * Push files whose dirty time has expired. Be careful 989 * of interrupt race on slp queue. 990 */ 991 s = splbio(); 992 slp = &syncer_workitem_pending[syncer_delayno]; 993 syncer_delayno += 1; 994 if (syncer_delayno == syncer_maxdelay) 995 syncer_delayno = 0; 996 splx(s); 997 998 while ((vp = LIST_FIRST(slp)) != NULL) { 999 if (VOP_ISLOCKED(vp) == 0) { 1000 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 1001 (void) VOP_FSYNC(vp, p->p_ucred, MNT_LAZY, p); 1002 VOP_UNLOCK(vp, 0, p); 1003 } 1004 s = splbio(); 1005 if (LIST_FIRST(slp) == vp) { 1006 /* 1007 * Note: v_tag VT_VFS vps can remain on the 1008 * worklist too with no dirty blocks, but 1009 * since sync_fsync() moves it to a different 1010 * slot we are safe. 1011 */ 1012 if (TAILQ_EMPTY(&vp->v_dirtyblkhd) && 1013 vp->v_type != VBLK) 1014 panic("sched_sync: fsync failed vp %p tag %d", vp, vp->v_tag); 1015 /* 1016 * Put us back on the worklist. The worklist 1017 * routine will remove us from our current 1018 * position and then add us back in at a later 1019 * position. 1020 */ 1021 vn_syncer_add_to_worklist(vp, syncdelay); 1022 } 1023 splx(s); 1024 } 1025 1026 /* 1027 * Do soft update processing. 1028 */ 1029 if (bioops.io_sync) 1030 (*bioops.io_sync)(NULL); 1031 1032 /* 1033 * The variable rushjob allows the kernel to speed up the 1034 * processing of the filesystem syncer process. A rushjob 1035 * value of N tells the filesystem syncer to process the next 1036 * N seconds worth of work on its queue ASAP. Currently rushjob 1037 * is used by the soft update code to speed up the filesystem 1038 * syncer process when the incore state is getting so far 1039 * ahead of the disk that the kernel memory pool is being 1040 * threatened with exhaustion. 1041 */ 1042 if (rushjob > 0) { 1043 rushjob -= 1; 1044 continue; 1045 } 1046 /* 1047 * If it has taken us less than a second to process the 1048 * current work, then wait. Otherwise start right over 1049 * again. We can still lose time if any single round 1050 * takes more than two seconds, but it does not really 1051 * matter as we are just trying to generally pace the 1052 * filesystem activity. 1053 */ 1054 if (time_second == starttime) 1055 tsleep(&lbolt, PPAUSE, "syncer", 0); 1056 } 1057} 1058 1059/* 1060 * Request the syncer daemon to speed up its work. 1061 * We never push it to speed up more than half of its 1062 * normal turn time, otherwise it could take over the cpu. 1063 */ 1064int 1065speedup_syncer() 1066{ 1067 int s; 1068 1069 s = splhigh(); 1070 if (updateproc->p_wchan == &lbolt) 1071 setrunnable(updateproc); 1072 splx(s); 1073 if (rushjob < syncdelay / 2) { 1074 rushjob += 1; 1075 stat_rush_requests += 1; 1076 return (1); 1077 } 1078 return(0); 1079} 1080 1081/* 1082 * Associate a p-buffer with a vnode. 1083 * 1084 * Also sets B_PAGING flag to indicate that vnode is not fully associated 1085 * with the buffer. i.e. the bp has not been linked into the vnode or 1086 * ref-counted. 1087 */ 1088void 1089pbgetvp(vp, bp) 1090 register struct vnode *vp; 1091 register struct buf *bp; 1092{ 1093 1094 KASSERT(bp->b_vp == NULL, ("pbgetvp: not free")); 1095 1096 bp->b_vp = vp; 1097 bp->b_flags |= B_PAGING; 1098 bp->b_dev = vn_todev(vp); 1099} 1100 1101/* 1102 * Disassociate a p-buffer from a vnode. 1103 */ 1104void 1105pbrelvp(bp) 1106 register struct buf *bp; 1107{ 1108 1109 KASSERT(bp->b_vp != NULL, ("pbrelvp: NULL")); 1110 1111#if !defined(MAX_PERF) 1112 /* XXX REMOVE ME */ 1113 if (bp->b_vnbufs.tqe_next != NULL) { 1114 panic( 1115 "relpbuf(): b_vp was probably reassignbuf()d %p %x", 1116 bp, 1117 (int)bp->b_flags 1118 ); 1119 } 1120#endif 1121 bp->b_vp = (struct vnode *) 0; 1122 bp->b_flags &= ~B_PAGING; 1123} 1124 1125void 1126pbreassignbuf(bp, newvp) 1127 struct buf *bp; 1128 struct vnode *newvp; 1129{ 1130#if !defined(MAX_PERF) 1131 if ((bp->b_flags & B_PAGING) == 0) { 1132 panic( 1133 "pbreassignbuf() on non phys bp %p", 1134 bp 1135 ); 1136 } 1137#endif 1138 bp->b_vp = newvp; 1139} 1140 1141/* 1142 * Reassign a buffer from one vnode to another. 1143 * Used to assign file specific control information 1144 * (indirect blocks) to the vnode to which they belong. 1145 */ 1146void 1147reassignbuf(bp, newvp) 1148 register struct buf *bp; 1149 register struct vnode *newvp; 1150{ 1151 struct buflists *listheadp; 1152 int delay; 1153 int s; 1154 1155 if (newvp == NULL) { 1156 printf("reassignbuf: NULL"); 1157 return; 1158 } 1159 ++reassignbufcalls; 1160 1161#if !defined(MAX_PERF) 1162 /* 1163 * B_PAGING flagged buffers cannot be reassigned because their vp 1164 * is not fully linked in. 1165 */ 1166 if (bp->b_flags & B_PAGING) 1167 panic("cannot reassign paging buffer"); 1168#endif 1169 1170 s = splbio(); 1171 /* 1172 * Delete from old vnode list, if on one. 1173 */ 1174 if (bp->b_xflags & (B_VNDIRTY|B_VNCLEAN)) { 1175 if (bp->b_xflags & B_VNDIRTY) 1176 listheadp = &bp->b_vp->v_dirtyblkhd; 1177 else 1178 listheadp = &bp->b_vp->v_cleanblkhd; 1179 TAILQ_REMOVE(listheadp, bp, b_vnbufs); 1180 bp->b_xflags &= ~(B_VNDIRTY|B_VNCLEAN); 1181 if (bp->b_vp != newvp) { 1182 vdrop(bp->b_vp); 1183 bp->b_vp = NULL; /* for clarification */ 1184 } 1185 } 1186 /* 1187 * If dirty, put on list of dirty buffers; otherwise insert onto list 1188 * of clean buffers. 1189 */ 1190 if (bp->b_flags & B_DELWRI) { 1191 struct buf *tbp; 1192 1193 listheadp = &newvp->v_dirtyblkhd; 1194 if ((newvp->v_flag & VONWORKLST) == 0) { 1195 switch (newvp->v_type) { 1196 case VDIR: 1197 delay = dirdelay; 1198 break; 1199 case VBLK: 1200 if (newvp->v_specmountpoint != NULL) { 1201 delay = metadelay; 1202 break; 1203 } 1204 /* fall through */ 1205 default: 1206 delay = filedelay; 1207 } 1208 vn_syncer_add_to_worklist(newvp, delay); 1209 } 1210 bp->b_xflags |= B_VNDIRTY; 1211 tbp = TAILQ_FIRST(listheadp); 1212 if (tbp == NULL || 1213 bp->b_lblkno == 0 || 1214 (bp->b_lblkno > 0 && bp->b_lblkno < tbp->b_lblkno)) { 1215 TAILQ_INSERT_HEAD(listheadp, bp, b_vnbufs); 1216 ++reassignbufsortgood; 1217 } else if (bp->b_lblkno < 0) { 1218 TAILQ_INSERT_TAIL(listheadp, bp, b_vnbufs); 1219 ++reassignbufsortgood; 1220 } else if (reassignbufmethod == 1) { 1221 /* 1222 * New sorting algorithm, only handle sequential case, 1223 * otherwise guess. 1224 */ 1225 if ((tbp = gbincore(newvp, bp->b_lblkno - 1)) != NULL && 1226 (tbp->b_xflags & B_VNDIRTY)) { 1227 TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs); 1228 ++reassignbufsortgood; 1229 } else { 1230 TAILQ_INSERT_HEAD(listheadp, bp, b_vnbufs); 1231 ++reassignbufsortbad; 1232 } 1233 } else { 1234 /* 1235 * Old sorting algorithm, scan queue and insert 1236 */ 1237 struct buf *ttbp; 1238 while ((ttbp = TAILQ_NEXT(tbp, b_vnbufs)) && 1239 (ttbp->b_lblkno < bp->b_lblkno)) { 1240 ++reassignbufloops; 1241 tbp = ttbp; 1242 } 1243 TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs); 1244 } 1245 } else { 1246 bp->b_xflags |= B_VNCLEAN; 1247 TAILQ_INSERT_TAIL(&newvp->v_cleanblkhd, bp, b_vnbufs); 1248 if ((newvp->v_flag & VONWORKLST) && 1249 TAILQ_EMPTY(&newvp->v_dirtyblkhd)) { 1250 newvp->v_flag &= ~VONWORKLST; 1251 LIST_REMOVE(newvp, v_synclist); 1252 } 1253 } 1254 if (bp->b_vp != newvp) { 1255 bp->b_vp = newvp; 1256 vhold(bp->b_vp); 1257 } 1258 splx(s); 1259} 1260 1261/* 1262 * Create a vnode for a block device. 1263 * Used for mounting the root file system. 1264 */ 1265int 1266bdevvp(dev, vpp) 1267 dev_t dev; 1268 struct vnode **vpp; 1269{ 1270 register struct vnode *vp; 1271 struct vnode *nvp; 1272 int error; 1273 1274 if (dev == NODEV) { 1275 *vpp = NULLVP; 1276 return (ENXIO); 1277 } 1278 error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp); 1279 if (error) { 1280 *vpp = NULLVP; 1281 return (error); 1282 } 1283 vp = nvp; 1284 vp->v_type = VBLK; 1285 addalias(vp, dev); 1286 *vpp = vp; 1287 return (0); 1288} 1289 1290/* 1291 * Add vnode to the alias list hung off the dev_t. 1292 * 1293 * The reason for this gunk is that multiple vnodes can reference 1294 * the same physical device, so checking vp->v_usecount to see 1295 * how many users there are is inadequate; the v_usecount for 1296 * the vnodes need to be accumulated. vcount() does that. 1297 */ 1298void 1299addaliasu(nvp, nvp_rdev) 1300 struct vnode *nvp; 1301 udev_t nvp_rdev; 1302{ 1303 1304 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 1305 panic("addaliasu on non-special vnode"); 1306 1307 nvp->v_rdev = udev2dev(nvp_rdev, nvp->v_type == VBLK ? 1 : 0); 1308 simple_lock(&spechash_slock); 1309 SLIST_INSERT_HEAD(&nvp->v_rdev->si_hlist, nvp, v_specnext); 1310 simple_unlock(&spechash_slock); 1311} 1312 1313void 1314addalias(nvp, dev) 1315 struct vnode *nvp; 1316 dev_t dev; 1317{ 1318 1319 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 1320 panic("addalias on non-special vnode"); 1321 1322 nvp->v_rdev = dev; 1323 simple_lock(&spechash_slock); 1324 SLIST_INSERT_HEAD(&dev->si_hlist, nvp, v_specnext); 1325 simple_unlock(&spechash_slock); 1326} 1327 1328/* 1329 * Grab a particular vnode from the free list, increment its 1330 * reference count and lock it. The vnode lock bit is set if the 1331 * vnode is being eliminated in vgone. The process is awakened 1332 * when the transition is completed, and an error returned to 1333 * indicate that the vnode is no longer usable (possibly having 1334 * been changed to a new file system type). 1335 */ 1336int 1337vget(vp, flags, p) 1338 register struct vnode *vp; 1339 int flags; 1340 struct proc *p; 1341{ 1342 int error; 1343 1344 /* 1345 * If the vnode is in the process of being cleaned out for 1346 * another use, we wait for the cleaning to finish and then 1347 * return failure. Cleaning is determined by checking that 1348 * the VXLOCK flag is set. 1349 */ 1350 if ((flags & LK_INTERLOCK) == 0) { 1351 simple_lock(&vp->v_interlock); 1352 } 1353 if (vp->v_flag & VXLOCK) { 1354 vp->v_flag |= VXWANT; 1355 simple_unlock(&vp->v_interlock); 1356 tsleep((caddr_t)vp, PINOD, "vget", 0); 1357 return (ENOENT); 1358 } 1359 1360 vp->v_usecount++; 1361 1362 if (VSHOULDBUSY(vp)) 1363 vbusy(vp); 1364 if (flags & LK_TYPE_MASK) { 1365 if ((error = vn_lock(vp, flags | LK_INTERLOCK, p)) != 0) { 1366 /* 1367 * must expand vrele here because we do not want 1368 * to call VOP_INACTIVE if the reference count 1369 * drops back to zero since it was never really 1370 * active. We must remove it from the free list 1371 * before sleeping so that multiple processes do 1372 * not try to recycle it. 1373 */ 1374 simple_lock(&vp->v_interlock); 1375 vp->v_usecount--; 1376 if (VSHOULDFREE(vp)) 1377 vfree(vp); 1378 simple_unlock(&vp->v_interlock); 1379 } 1380 return (error); 1381 } 1382 simple_unlock(&vp->v_interlock); 1383 return (0); 1384} 1385 1386void 1387vref(struct vnode *vp) 1388{ 1389 simple_lock(&vp->v_interlock); 1390 vp->v_usecount++; 1391 simple_unlock(&vp->v_interlock); 1392} 1393 1394/* 1395 * Vnode put/release. 1396 * If count drops to zero, call inactive routine and return to freelist. 1397 */ 1398void 1399vrele(vp) 1400 struct vnode *vp; 1401{ 1402 struct proc *p = curproc; /* XXX */ 1403 1404 KASSERT(vp != NULL, ("vrele: null vp")); 1405 1406 simple_lock(&vp->v_interlock); 1407 1408 if (vp->v_usecount > 1) { 1409 1410 vp->v_usecount--; 1411 simple_unlock(&vp->v_interlock); 1412 1413 return; 1414 } 1415 1416 if (vp->v_usecount == 1) { 1417 1418 vp->v_usecount--; 1419 if (VSHOULDFREE(vp)) 1420 vfree(vp); 1421 /* 1422 * If we are doing a vput, the node is already locked, and we must 1423 * call VOP_INACTIVE with the node locked. So, in the case of 1424 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE. 1425 */ 1426 if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) { 1427 VOP_INACTIVE(vp, p); 1428 } 1429 1430 } else { 1431#ifdef DIAGNOSTIC 1432 vprint("vrele: negative ref count", vp); 1433 simple_unlock(&vp->v_interlock); 1434#endif 1435 panic("vrele: negative ref cnt"); 1436 } 1437} 1438 1439void 1440vput(vp) 1441 struct vnode *vp; 1442{ 1443 struct proc *p = curproc; /* XXX */ 1444 1445 KASSERT(vp != NULL, ("vput: null vp")); 1446 1447 simple_lock(&vp->v_interlock); 1448 1449 if (vp->v_usecount > 1) { 1450 1451 vp->v_usecount--; 1452 VOP_UNLOCK(vp, LK_INTERLOCK, p); 1453 return; 1454 1455 } 1456 1457 if (vp->v_usecount == 1) { 1458 1459 vp->v_usecount--; 1460 if (VSHOULDFREE(vp)) 1461 vfree(vp); 1462 /* 1463 * If we are doing a vput, the node is already locked, and we must 1464 * call VOP_INACTIVE with the node locked. So, in the case of 1465 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE. 1466 */ 1467 simple_unlock(&vp->v_interlock); 1468 VOP_INACTIVE(vp, p); 1469 1470 } else { 1471#ifdef DIAGNOSTIC 1472 vprint("vput: negative ref count", vp); 1473#endif 1474 panic("vput: negative ref cnt"); 1475 } 1476} 1477 1478/* 1479 * Somebody doesn't want the vnode recycled. 1480 */ 1481void 1482vhold(vp) 1483 register struct vnode *vp; 1484{ 1485 int s; 1486 1487 s = splbio(); 1488 vp->v_holdcnt++; 1489 if (VSHOULDBUSY(vp)) 1490 vbusy(vp); 1491 splx(s); 1492} 1493 1494/* 1495 * One less who cares about this vnode. 1496 */ 1497void 1498vdrop(vp) 1499 register struct vnode *vp; 1500{ 1501 int s; 1502 1503 s = splbio(); 1504 if (vp->v_holdcnt <= 0) 1505 panic("vdrop: holdcnt"); 1506 vp->v_holdcnt--; 1507 if (VSHOULDFREE(vp)) 1508 vfree(vp); 1509 splx(s); 1510} 1511 1512/* 1513 * Remove any vnodes in the vnode table belonging to mount point mp. 1514 * 1515 * If MNT_NOFORCE is specified, there should not be any active ones, 1516 * return error if any are found (nb: this is a user error, not a 1517 * system error). If MNT_FORCE is specified, detach any active vnodes 1518 * that are found. 1519 */ 1520#ifdef DIAGNOSTIC 1521static int busyprt = 0; /* print out busy vnodes */ 1522SYSCTL_INT(_debug, OID_AUTO, busyprt, CTLFLAG_RW, &busyprt, 0, ""); 1523#endif 1524 1525int 1526vflush(mp, skipvp, flags) 1527 struct mount *mp; 1528 struct vnode *skipvp; 1529 int flags; 1530{ 1531 struct proc *p = curproc; /* XXX */ 1532 struct vnode *vp, *nvp; 1533 int busy = 0; 1534 1535 simple_lock(&mntvnode_slock); 1536loop: 1537 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { 1538 /* 1539 * Make sure this vnode wasn't reclaimed in getnewvnode(). 1540 * Start over if it has (it won't be on the list anymore). 1541 */ 1542 if (vp->v_mount != mp) 1543 goto loop; 1544 nvp = vp->v_mntvnodes.le_next; 1545 /* 1546 * Skip over a selected vnode. 1547 */ 1548 if (vp == skipvp) 1549 continue; 1550 1551 simple_lock(&vp->v_interlock); 1552 /* 1553 * Skip over a vnodes marked VSYSTEM. 1554 */ 1555 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { 1556 simple_unlock(&vp->v_interlock); 1557 continue; 1558 } 1559 /* 1560 * If WRITECLOSE is set, only flush out regular file vnodes 1561 * open for writing. 1562 */ 1563 if ((flags & WRITECLOSE) && 1564 (vp->v_writecount == 0 || vp->v_type != VREG)) { 1565 simple_unlock(&vp->v_interlock); 1566 continue; 1567 } 1568 1569 /* 1570 * With v_usecount == 0, all we need to do is clear out the 1571 * vnode data structures and we are done. 1572 */ 1573 if (vp->v_usecount == 0) { 1574 simple_unlock(&mntvnode_slock); 1575 vgonel(vp, p); 1576 simple_lock(&mntvnode_slock); 1577 continue; 1578 } 1579 1580 /* 1581 * If FORCECLOSE is set, forcibly close the vnode. For block 1582 * or character devices, revert to an anonymous device. For 1583 * all other files, just kill them. 1584 */ 1585 if (flags & FORCECLOSE) { 1586 simple_unlock(&mntvnode_slock); 1587 if (vp->v_type != VBLK && vp->v_type != VCHR) { 1588 vgonel(vp, p); 1589 } else { 1590 vclean(vp, 0, p); 1591 vp->v_op = spec_vnodeop_p; 1592 insmntque(vp, (struct mount *) 0); 1593 } 1594 simple_lock(&mntvnode_slock); 1595 continue; 1596 } 1597#ifdef DIAGNOSTIC 1598 if (busyprt) 1599 vprint("vflush: busy vnode", vp); 1600#endif 1601 simple_unlock(&vp->v_interlock); 1602 busy++; 1603 } 1604 simple_unlock(&mntvnode_slock); 1605 if (busy) 1606 return (EBUSY); 1607 return (0); 1608} 1609 1610/* 1611 * Disassociate the underlying file system from a vnode. 1612 */ 1613static void 1614vclean(vp, flags, p) 1615 struct vnode *vp; 1616 int flags; 1617 struct proc *p; 1618{ 1619 int active; 1620 vm_object_t obj; 1621 1622 /* 1623 * Check to see if the vnode is in use. If so we have to reference it 1624 * before we clean it out so that its count cannot fall to zero and 1625 * generate a race against ourselves to recycle it. 1626 */ 1627 if ((active = vp->v_usecount)) 1628 vp->v_usecount++; 1629 1630 /* 1631 * Prevent the vnode from being recycled or brought into use while we 1632 * clean it out. 1633 */ 1634 if (vp->v_flag & VXLOCK) 1635 panic("vclean: deadlock"); 1636 vp->v_flag |= VXLOCK; 1637 /* 1638 * Even if the count is zero, the VOP_INACTIVE routine may still 1639 * have the object locked while it cleans it out. The VOP_LOCK 1640 * ensures that the VOP_INACTIVE routine is done with its work. 1641 * For active vnodes, it ensures that no other activity can 1642 * occur while the underlying object is being cleaned out. 1643 */ 1644 VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p); 1645 1646 /* 1647 * Clean out any buffers associated with the vnode. 1648 */ 1649 vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0); 1650 if ((obj = vp->v_object) != NULL) { 1651 if (obj->ref_count == 0) { 1652 /* 1653 * This is a normal way of shutting down the object/vnode 1654 * association. 1655 */ 1656 vm_object_terminate(obj); 1657 } else { 1658 /* 1659 * Woe to the process that tries to page now :-). 1660 */ 1661 vm_pager_deallocate(obj); 1662 } 1663 } 1664 1665 /* 1666 * If purging an active vnode, it must be closed and 1667 * deactivated before being reclaimed. Note that the 1668 * VOP_INACTIVE will unlock the vnode. 1669 */ 1670 if (active) { 1671 if (flags & DOCLOSE) 1672 VOP_CLOSE(vp, FNONBLOCK, NOCRED, p); 1673 VOP_INACTIVE(vp, p); 1674 } else { 1675 /* 1676 * Any other processes trying to obtain this lock must first 1677 * wait for VXLOCK to clear, then call the new lock operation. 1678 */ 1679 VOP_UNLOCK(vp, 0, p); 1680 } 1681 /* 1682 * Reclaim the vnode. 1683 */ 1684 if (VOP_RECLAIM(vp, p)) 1685 panic("vclean: cannot reclaim"); 1686 1687 if (active) 1688 vrele(vp); 1689 1690 cache_purge(vp); 1691 if (vp->v_vnlock) { 1692 FREE(vp->v_vnlock, M_VNODE); 1693 vp->v_vnlock = NULL; 1694 } 1695 1696 if (VSHOULDFREE(vp)) 1697 vfree(vp); 1698 1699 /* 1700 * Done with purge, notify sleepers of the grim news. 1701 */ 1702 vp->v_op = dead_vnodeop_p; 1703 vn_pollgone(vp); 1704 vp->v_tag = VT_NON; 1705 vp->v_flag &= ~VXLOCK; 1706 if (vp->v_flag & VXWANT) { 1707 vp->v_flag &= ~VXWANT; 1708 wakeup((caddr_t) vp); 1709 } 1710} 1711 1712/* 1713 * Eliminate all activity associated with the requested vnode 1714 * and with all vnodes aliased to the requested vnode. 1715 */ 1716int 1717vop_revoke(ap) 1718 struct vop_revoke_args /* { 1719 struct vnode *a_vp; 1720 int a_flags; 1721 } */ *ap; 1722{ 1723 struct vnode *vp, *vq; 1724 dev_t dev; 1725 1726 KASSERT((ap->a_flags & REVOKEALL) != 0, ("vop_revoke")); 1727 1728 vp = ap->a_vp; 1729 /* 1730 * If a vgone (or vclean) is already in progress, 1731 * wait until it is done and return. 1732 */ 1733 if (vp->v_flag & VXLOCK) { 1734 vp->v_flag |= VXWANT; 1735 simple_unlock(&vp->v_interlock); 1736 tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0); 1737 return (0); 1738 } 1739 dev = vp->v_rdev; 1740 for (;;) { 1741 simple_lock(&spechash_slock); 1742 vq = SLIST_FIRST(&dev->si_hlist); 1743 simple_unlock(&spechash_slock); 1744 if (!vq) 1745 break; 1746 vgone(vq); 1747 } 1748 return (0); 1749} 1750 1751/* 1752 * Recycle an unused vnode to the front of the free list. 1753 * Release the passed interlock if the vnode will be recycled. 1754 */ 1755int 1756vrecycle(vp, inter_lkp, p) 1757 struct vnode *vp; 1758 struct simplelock *inter_lkp; 1759 struct proc *p; 1760{ 1761 1762 simple_lock(&vp->v_interlock); 1763 if (vp->v_usecount == 0) { 1764 if (inter_lkp) { 1765 simple_unlock(inter_lkp); 1766 } 1767 vgonel(vp, p); 1768 return (1); 1769 } 1770 simple_unlock(&vp->v_interlock); 1771 return (0); 1772} 1773 1774/* 1775 * Eliminate all activity associated with a vnode 1776 * in preparation for reuse. 1777 */ 1778void 1779vgone(vp) 1780 register struct vnode *vp; 1781{ 1782 struct proc *p = curproc; /* XXX */ 1783 1784 simple_lock(&vp->v_interlock); 1785 vgonel(vp, p); 1786} 1787 1788/* 1789 * vgone, with the vp interlock held. 1790 */ 1791static void 1792vgonel(vp, p) 1793 struct vnode *vp; 1794 struct proc *p; 1795{ 1796 int s; 1797 1798 /* 1799 * If a vgone (or vclean) is already in progress, 1800 * wait until it is done and return. 1801 */ 1802 if (vp->v_flag & VXLOCK) { 1803 vp->v_flag |= VXWANT; 1804 simple_unlock(&vp->v_interlock); 1805 tsleep((caddr_t)vp, PINOD, "vgone", 0); 1806 return; 1807 } 1808 1809 /* 1810 * Clean out the filesystem specific data. 1811 */ 1812 vclean(vp, DOCLOSE, p); 1813 simple_lock(&vp->v_interlock); 1814 1815 /* 1816 * Delete from old mount point vnode list, if on one. 1817 */ 1818 if (vp->v_mount != NULL) 1819 insmntque(vp, (struct mount *)0); 1820 /* 1821 * If special device, remove it from special device alias list 1822 * if it is on one. 1823 */ 1824 if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_rdev != NULL) { 1825 simple_lock(&spechash_slock); 1826 SLIST_REMOVE(&vp->v_hashchain, vp, vnode, v_specnext); 1827 freedev(vp->v_rdev); 1828 simple_unlock(&spechash_slock); 1829 vp->v_rdev = NULL; 1830 } 1831 1832 /* 1833 * If it is on the freelist and not already at the head, 1834 * move it to the head of the list. The test of the back 1835 * pointer and the reference count of zero is because 1836 * it will be removed from the free list by getnewvnode, 1837 * but will not have its reference count incremented until 1838 * after calling vgone. If the reference count were 1839 * incremented first, vgone would (incorrectly) try to 1840 * close the previous instance of the underlying object. 1841 */ 1842 if (vp->v_usecount == 0 && !(vp->v_flag & VDOOMED)) { 1843 s = splbio(); 1844 simple_lock(&vnode_free_list_slock); 1845 if (vp->v_flag & VFREE) { 1846 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1847 } else if (vp->v_flag & VTBFREE) { 1848 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 1849 vp->v_flag &= ~VTBFREE; 1850 freevnodes++; 1851 } else 1852 freevnodes++; 1853 vp->v_flag |= VFREE; 1854 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 1855 simple_unlock(&vnode_free_list_slock); 1856 splx(s); 1857 } 1858 1859 vp->v_type = VBAD; 1860 simple_unlock(&vp->v_interlock); 1861} 1862 1863/* 1864 * Lookup a vnode by device number. 1865 */ 1866int 1867vfinddev(dev, type, vpp) 1868 dev_t dev; 1869 enum vtype type; 1870 struct vnode **vpp; 1871{ 1872 struct vnode *vp; 1873 1874 simple_lock(&spechash_slock); 1875 SLIST_FOREACH(vp, &dev->si_hlist, v_specnext) { 1876 if (type == vp->v_type) { 1877 *vpp = vp; 1878 simple_unlock(&spechash_slock); 1879 return (1); 1880 } 1881 } 1882 simple_unlock(&spechash_slock); 1883 return (0); 1884} 1885 1886/* 1887 * Calculate the total number of references to a special device. 1888 */ 1889int 1890vcount(vp) 1891 struct vnode *vp; 1892{ 1893 struct vnode *vq; 1894 int count; 1895 1896 count = 0; 1897 simple_lock(&spechash_slock); 1898 SLIST_FOREACH(vq, &vp->v_hashchain, v_specnext) 1899 count += vq->v_usecount; 1900 simple_unlock(&spechash_slock); 1901 return (count); 1902} 1903 1904/* 1905 * Print out a description of a vnode. 1906 */ 1907static char *typename[] = 1908{"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"}; 1909 1910void 1911vprint(label, vp) 1912 char *label; 1913 struct vnode *vp; 1914{ 1915 char buf[96]; 1916 1917 if (label != NULL) 1918 printf("%s: %p: ", label, (void *)vp); 1919 else 1920 printf("%p: ", (void *)vp); 1921 printf("type %s, usecount %d, writecount %d, refcount %d,", 1922 typename[vp->v_type], vp->v_usecount, vp->v_writecount, 1923 vp->v_holdcnt); 1924 buf[0] = '\0'; 1925 if (vp->v_flag & VROOT) 1926 strcat(buf, "|VROOT"); 1927 if (vp->v_flag & VTEXT) 1928 strcat(buf, "|VTEXT"); 1929 if (vp->v_flag & VSYSTEM) 1930 strcat(buf, "|VSYSTEM"); 1931 if (vp->v_flag & VXLOCK) 1932 strcat(buf, "|VXLOCK"); 1933 if (vp->v_flag & VXWANT) 1934 strcat(buf, "|VXWANT"); 1935 if (vp->v_flag & VBWAIT) 1936 strcat(buf, "|VBWAIT"); 1937 if (vp->v_flag & VDOOMED) 1938 strcat(buf, "|VDOOMED"); 1939 if (vp->v_flag & VFREE) 1940 strcat(buf, "|VFREE"); 1941 if (vp->v_flag & VOBJBUF) 1942 strcat(buf, "|VOBJBUF"); 1943 if (buf[0] != '\0') 1944 printf(" flags (%s)", &buf[1]); 1945 if (vp->v_data == NULL) { 1946 printf("\n"); 1947 } else { 1948 printf("\n\t"); 1949 VOP_PRINT(vp); 1950 } 1951} 1952 1953#ifdef DDB 1954#include <ddb/ddb.h> 1955/* 1956 * List all of the locked vnodes in the system. 1957 * Called when debugging the kernel. 1958 */ 1959DB_SHOW_COMMAND(lockedvnodes, lockedvnodes) 1960{ 1961 struct proc *p = curproc; /* XXX */ 1962 struct mount *mp, *nmp; 1963 struct vnode *vp; 1964 1965 printf("Locked vnodes\n"); 1966 simple_lock(&mountlist_slock); 1967 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 1968 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { 1969 nmp = mp->mnt_list.cqe_next; 1970 continue; 1971 } 1972 for (vp = mp->mnt_vnodelist.lh_first; 1973 vp != NULL; 1974 vp = vp->v_mntvnodes.le_next) { 1975 if (VOP_ISLOCKED(vp)) 1976 vprint((char *)0, vp); 1977 } 1978 simple_lock(&mountlist_slock); 1979 nmp = mp->mnt_list.cqe_next; 1980 vfs_unbusy(mp, p); 1981 } 1982 simple_unlock(&mountlist_slock); 1983} 1984#endif 1985 1986/* 1987 * Top level filesystem related information gathering. 1988 */ 1989static int sysctl_ovfs_conf __P(SYSCTL_HANDLER_ARGS); 1990 1991static int 1992vfs_sysctl SYSCTL_HANDLER_ARGS 1993{ 1994 int *name = (int *)arg1 - 1; /* XXX */ 1995 u_int namelen = arg2 + 1; /* XXX */ 1996 struct vfsconf *vfsp; 1997 1998#if 1 || defined(COMPAT_PRELITE2) 1999 /* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. */ 2000 if (namelen == 1) 2001 return (sysctl_ovfs_conf(oidp, arg1, arg2, req)); 2002#endif 2003 2004#ifdef notyet 2005 /* all sysctl names at this level are at least name and field */ 2006 if (namelen < 2) 2007 return (ENOTDIR); /* overloaded */ 2008 if (name[0] != VFS_GENERIC) { 2009 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 2010 if (vfsp->vfc_typenum == name[0]) 2011 break; 2012 if (vfsp == NULL) 2013 return (EOPNOTSUPP); 2014 return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1, 2015 oldp, oldlenp, newp, newlen, p)); 2016 } 2017#endif 2018 switch (name[1]) { 2019 case VFS_MAXTYPENUM: 2020 if (namelen != 2) 2021 return (ENOTDIR); 2022 return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int))); 2023 case VFS_CONF: 2024 if (namelen != 3) 2025 return (ENOTDIR); /* overloaded */ 2026 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 2027 if (vfsp->vfc_typenum == name[2]) 2028 break; 2029 if (vfsp == NULL) 2030 return (EOPNOTSUPP); 2031 return (SYSCTL_OUT(req, vfsp, sizeof *vfsp)); 2032 } 2033 return (EOPNOTSUPP); 2034} 2035 2036SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD, vfs_sysctl, 2037 "Generic filesystem"); 2038 2039#if 1 || defined(COMPAT_PRELITE2) 2040 2041static int 2042sysctl_ovfs_conf SYSCTL_HANDLER_ARGS 2043{ 2044 int error; 2045 struct vfsconf *vfsp; 2046 struct ovfsconf ovfs; 2047 2048 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 2049 ovfs.vfc_vfsops = vfsp->vfc_vfsops; /* XXX used as flag */ 2050 strcpy(ovfs.vfc_name, vfsp->vfc_name); 2051 ovfs.vfc_index = vfsp->vfc_typenum; 2052 ovfs.vfc_refcount = vfsp->vfc_refcount; 2053 ovfs.vfc_flags = vfsp->vfc_flags; 2054 error = SYSCTL_OUT(req, &ovfs, sizeof ovfs); 2055 if (error) 2056 return error; 2057 } 2058 return 0; 2059} 2060 2061#endif /* 1 || COMPAT_PRELITE2 */ 2062 2063#if 0 2064#define KINFO_VNODESLOP 10 2065/* 2066 * Dump vnode list (via sysctl). 2067 * Copyout address of vnode followed by vnode. 2068 */ 2069/* ARGSUSED */ 2070static int 2071sysctl_vnode SYSCTL_HANDLER_ARGS 2072{ 2073 struct proc *p = curproc; /* XXX */ 2074 struct mount *mp, *nmp; 2075 struct vnode *nvp, *vp; 2076 int error; 2077 2078#define VPTRSZ sizeof (struct vnode *) 2079#define VNODESZ sizeof (struct vnode) 2080 2081 req->lock = 0; 2082 if (!req->oldptr) /* Make an estimate */ 2083 return (SYSCTL_OUT(req, 0, 2084 (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ))); 2085 2086 simple_lock(&mountlist_slock); 2087 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 2088 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { 2089 nmp = mp->mnt_list.cqe_next; 2090 continue; 2091 } 2092again: 2093 simple_lock(&mntvnode_slock); 2094 for (vp = mp->mnt_vnodelist.lh_first; 2095 vp != NULL; 2096 vp = nvp) { 2097 /* 2098 * Check that the vp is still associated with 2099 * this filesystem. RACE: could have been 2100 * recycled onto the same filesystem. 2101 */ 2102 if (vp->v_mount != mp) { 2103 simple_unlock(&mntvnode_slock); 2104 goto again; 2105 } 2106 nvp = vp->v_mntvnodes.le_next; 2107 simple_unlock(&mntvnode_slock); 2108 if ((error = SYSCTL_OUT(req, &vp, VPTRSZ)) || 2109 (error = SYSCTL_OUT(req, vp, VNODESZ))) 2110 return (error); 2111 simple_lock(&mntvnode_slock); 2112 } 2113 simple_unlock(&mntvnode_slock); 2114 simple_lock(&mountlist_slock); 2115 nmp = mp->mnt_list.cqe_next; 2116 vfs_unbusy(mp, p); 2117 } 2118 simple_unlock(&mountlist_slock); 2119 2120 return (0); 2121} 2122#endif 2123 2124/* 2125 * XXX 2126 * Exporting the vnode list on large systems causes them to crash. 2127 * Exporting the vnode list on medium systems causes sysctl to coredump. 2128 */ 2129#if 0 2130SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE|CTLFLAG_RD, 2131 0, 0, sysctl_vnode, "S,vnode", ""); 2132#endif 2133 2134/* 2135 * Check to see if a filesystem is mounted on a block device. 2136 */ 2137int 2138vfs_mountedon(vp) 2139 struct vnode *vp; 2140{ 2141 2142 if (vp->v_specmountpoint != NULL) 2143 return (EBUSY); 2144 return (0); 2145} 2146 2147/* 2148 * Unmount all filesystems. The list is traversed in reverse order 2149 * of mounting to avoid dependencies. 2150 */ 2151void 2152vfs_unmountall() 2153{ 2154 struct mount *mp, *nmp; 2155 struct proc *p; 2156 int error; 2157 2158 if (curproc != NULL) 2159 p = curproc; 2160 else 2161 p = initproc; /* XXX XXX should this be proc0? */ 2162 /* 2163 * Since this only runs when rebooting, it is not interlocked. 2164 */ 2165 for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { 2166 nmp = mp->mnt_list.cqe_prev; 2167 error = dounmount(mp, MNT_FORCE, p); 2168 if (error) { 2169 printf("unmount of %s failed (", 2170 mp->mnt_stat.f_mntonname); 2171 if (error == EBUSY) 2172 printf("BUSY)\n"); 2173 else 2174 printf("%d)\n", error); 2175 } 2176 } 2177} 2178 2179/* 2180 * Build hash lists of net addresses and hang them off the mount point. 2181 * Called by ufs_mount() to set up the lists of export addresses. 2182 */ 2183static int 2184vfs_hang_addrlist(mp, nep, argp) 2185 struct mount *mp; 2186 struct netexport *nep; 2187 struct export_args *argp; 2188{ 2189 register struct netcred *np; 2190 register struct radix_node_head *rnh; 2191 register int i; 2192 struct radix_node *rn; 2193 struct sockaddr *saddr, *smask = 0; 2194 struct domain *dom; 2195 int error; 2196 2197 if (argp->ex_addrlen == 0) { 2198 if (mp->mnt_flag & MNT_DEFEXPORTED) 2199 return (EPERM); 2200 np = &nep->ne_defexported; 2201 np->netc_exflags = argp->ex_flags; 2202 np->netc_anon = argp->ex_anon; 2203 np->netc_anon.cr_ref = 1; 2204 mp->mnt_flag |= MNT_DEFEXPORTED; 2205 return (0); 2206 } 2207 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 2208 np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK); 2209 bzero((caddr_t) np, i); 2210 saddr = (struct sockaddr *) (np + 1); 2211 if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen))) 2212 goto out; 2213 if (saddr->sa_len > argp->ex_addrlen) 2214 saddr->sa_len = argp->ex_addrlen; 2215 if (argp->ex_masklen) { 2216 smask = (struct sockaddr *) ((caddr_t) saddr + argp->ex_addrlen); 2217 error = copyin(argp->ex_mask, (caddr_t) smask, argp->ex_masklen); 2218 if (error) 2219 goto out; 2220 if (smask->sa_len > argp->ex_masklen) 2221 smask->sa_len = argp->ex_masklen; 2222 } 2223 i = saddr->sa_family; 2224 if ((rnh = nep->ne_rtable[i]) == 0) { 2225 /* 2226 * Seems silly to initialize every AF when most are not used, 2227 * do so on demand here 2228 */ 2229 for (dom = domains; dom; dom = dom->dom_next) 2230 if (dom->dom_family == i && dom->dom_rtattach) { 2231 dom->dom_rtattach((void **) &nep->ne_rtable[i], 2232 dom->dom_rtoffset); 2233 break; 2234 } 2235 if ((rnh = nep->ne_rtable[i]) == 0) { 2236 error = ENOBUFS; 2237 goto out; 2238 } 2239 } 2240 rn = (*rnh->rnh_addaddr) ((caddr_t) saddr, (caddr_t) smask, rnh, 2241 np->netc_rnodes); 2242 if (rn == 0 || np != (struct netcred *) rn) { /* already exists */ 2243 error = EPERM; 2244 goto out; 2245 } 2246 np->netc_exflags = argp->ex_flags; 2247 np->netc_anon = argp->ex_anon; 2248 np->netc_anon.cr_ref = 1; 2249 return (0); 2250out: 2251 free(np, M_NETADDR); 2252 return (error); 2253} 2254 2255/* ARGSUSED */ 2256static int 2257vfs_free_netcred(rn, w) 2258 struct radix_node *rn; 2259 void *w; 2260{ 2261 register struct radix_node_head *rnh = (struct radix_node_head *) w; 2262 2263 (*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh); 2264 free((caddr_t) rn, M_NETADDR); 2265 return (0); 2266} 2267 2268/* 2269 * Free the net address hash lists that are hanging off the mount points. 2270 */ 2271static void 2272vfs_free_addrlist(nep) 2273 struct netexport *nep; 2274{ 2275 register int i; 2276 register struct radix_node_head *rnh; 2277 2278 for (i = 0; i <= AF_MAX; i++) 2279 if ((rnh = nep->ne_rtable[i])) { 2280 (*rnh->rnh_walktree) (rnh, vfs_free_netcred, 2281 (caddr_t) rnh); 2282 free((caddr_t) rnh, M_RTABLE); 2283 nep->ne_rtable[i] = 0; 2284 } 2285} 2286 2287int 2288vfs_export(mp, nep, argp) 2289 struct mount *mp; 2290 struct netexport *nep; 2291 struct export_args *argp; 2292{ 2293 int error; 2294 2295 if (argp->ex_flags & MNT_DELEXPORT) { 2296 if (mp->mnt_flag & MNT_EXPUBLIC) { 2297 vfs_setpublicfs(NULL, NULL, NULL); 2298 mp->mnt_flag &= ~MNT_EXPUBLIC; 2299 } 2300 vfs_free_addrlist(nep); 2301 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 2302 } 2303 if (argp->ex_flags & MNT_EXPORTED) { 2304 if (argp->ex_flags & MNT_EXPUBLIC) { 2305 if ((error = vfs_setpublicfs(mp, nep, argp)) != 0) 2306 return (error); 2307 mp->mnt_flag |= MNT_EXPUBLIC; 2308 } 2309 if ((error = vfs_hang_addrlist(mp, nep, argp))) 2310 return (error); 2311 mp->mnt_flag |= MNT_EXPORTED; 2312 } 2313 return (0); 2314} 2315 2316 2317/* 2318 * Set the publicly exported filesystem (WebNFS). Currently, only 2319 * one public filesystem is possible in the spec (RFC 2054 and 2055) 2320 */ 2321int 2322vfs_setpublicfs(mp, nep, argp) 2323 struct mount *mp; 2324 struct netexport *nep; 2325 struct export_args *argp; 2326{ 2327 int error; 2328 struct vnode *rvp; 2329 char *cp; 2330 2331 /* 2332 * mp == NULL -> invalidate the current info, the FS is 2333 * no longer exported. May be called from either vfs_export 2334 * or unmount, so check if it hasn't already been done. 2335 */ 2336 if (mp == NULL) { 2337 if (nfs_pub.np_valid) { 2338 nfs_pub.np_valid = 0; 2339 if (nfs_pub.np_index != NULL) { 2340 FREE(nfs_pub.np_index, M_TEMP); 2341 nfs_pub.np_index = NULL; 2342 } 2343 } 2344 return (0); 2345 } 2346 2347 /* 2348 * Only one allowed at a time. 2349 */ 2350 if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount) 2351 return (EBUSY); 2352 2353 /* 2354 * Get real filehandle for root of exported FS. 2355 */ 2356 bzero((caddr_t)&nfs_pub.np_handle, sizeof(nfs_pub.np_handle)); 2357 nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid; 2358 2359 if ((error = VFS_ROOT(mp, &rvp))) 2360 return (error); 2361 2362 if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid))) 2363 return (error); 2364 2365 vput(rvp); 2366 2367 /* 2368 * If an indexfile was specified, pull it in. 2369 */ 2370 if (argp->ex_indexfile != NULL) { 2371 MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP, 2372 M_WAITOK); 2373 error = copyinstr(argp->ex_indexfile, nfs_pub.np_index, 2374 MAXNAMLEN, (size_t *)0); 2375 if (!error) { 2376 /* 2377 * Check for illegal filenames. 2378 */ 2379 for (cp = nfs_pub.np_index; *cp; cp++) { 2380 if (*cp == '/') { 2381 error = EINVAL; 2382 break; 2383 } 2384 } 2385 } 2386 if (error) { 2387 FREE(nfs_pub.np_index, M_TEMP); 2388 return (error); 2389 } 2390 } 2391 2392 nfs_pub.np_mount = mp; 2393 nfs_pub.np_valid = 1; 2394 return (0); 2395} 2396 2397struct netcred * 2398vfs_export_lookup(mp, nep, nam) 2399 register struct mount *mp; 2400 struct netexport *nep; 2401 struct sockaddr *nam; 2402{ 2403 register struct netcred *np; 2404 register struct radix_node_head *rnh; 2405 struct sockaddr *saddr; 2406 2407 np = NULL; 2408 if (mp->mnt_flag & MNT_EXPORTED) { 2409 /* 2410 * Lookup in the export list first. 2411 */ 2412 if (nam != NULL) { 2413 saddr = nam; 2414 rnh = nep->ne_rtable[saddr->sa_family]; 2415 if (rnh != NULL) { 2416 np = (struct netcred *) 2417 (*rnh->rnh_matchaddr)((caddr_t)saddr, 2418 rnh); 2419 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 2420 np = NULL; 2421 } 2422 } 2423 /* 2424 * If no address match, use the default if it exists. 2425 */ 2426 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 2427 np = &nep->ne_defexported; 2428 } 2429 return (np); 2430} 2431 2432/* 2433 * perform msync on all vnodes under a mount point 2434 * the mount point must be locked. 2435 */ 2436void 2437vfs_msync(struct mount *mp, int flags) { 2438 struct vnode *vp, *nvp; 2439 struct vm_object *obj; 2440 int anyio, tries; 2441 2442 tries = 5; 2443loop: 2444 anyio = 0; 2445 for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { 2446 2447 nvp = vp->v_mntvnodes.le_next; 2448 2449 if (vp->v_mount != mp) { 2450 goto loop; 2451 } 2452 2453 if (vp->v_flag & VXLOCK) /* XXX: what if MNT_WAIT? */ 2454 continue; 2455 2456 if (flags != MNT_WAIT) { 2457 obj = vp->v_object; 2458 if (obj == NULL || (obj->flags & OBJ_MIGHTBEDIRTY) == 0) 2459 continue; 2460 if (VOP_ISLOCKED(vp)) 2461 continue; 2462 } 2463 2464 simple_lock(&vp->v_interlock); 2465 if (vp->v_object && 2466 (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) { 2467 if (!vget(vp, 2468 LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY | LK_NOOBJ, curproc)) { 2469 if (vp->v_object) { 2470 vm_object_page_clean(vp->v_object, 0, 0, flags == MNT_WAIT ? OBJPC_SYNC : 0); 2471 anyio = 1; 2472 } 2473 vput(vp); 2474 } 2475 } else { 2476 simple_unlock(&vp->v_interlock); 2477 } 2478 } 2479 if (anyio && (--tries > 0)) 2480 goto loop; 2481} 2482 2483/* 2484 * Create the VM object needed for VMIO and mmap support. This 2485 * is done for all VREG files in the system. Some filesystems might 2486 * afford the additional metadata buffering capability of the 2487 * VMIO code by making the device node be VMIO mode also. 2488 * 2489 * vp must be locked when vfs_object_create is called. 2490 */ 2491int 2492vfs_object_create(vp, p, cred) 2493 struct vnode *vp; 2494 struct proc *p; 2495 struct ucred *cred; 2496{ 2497 struct vattr vat; 2498 vm_object_t object; 2499 int error = 0; 2500 2501 if (vp->v_type != VBLK && vn_canvmio(vp) == FALSE) 2502 return 0; 2503 2504retry: 2505 if ((object = vp->v_object) == NULL) { 2506 if (vp->v_type == VREG || vp->v_type == VDIR) { 2507 if ((error = VOP_GETATTR(vp, &vat, cred, p)) != 0) 2508 goto retn; 2509 object = vnode_pager_alloc(vp, vat.va_size, 0, 0); 2510 } else if (devsw(vp->v_rdev) != NULL) { 2511 /* 2512 * This simply allocates the biggest object possible 2513 * for a VBLK vnode. This should be fixed, but doesn't 2514 * cause any problems (yet). 2515 */ 2516 object = vnode_pager_alloc(vp, IDX_TO_OFF(INT_MAX), 0, 0); 2517 } else { 2518 goto retn; 2519 } 2520 /* 2521 * Dereference the reference we just created. This assumes 2522 * that the object is associated with the vp. 2523 */ 2524 object->ref_count--; 2525 vp->v_usecount--; 2526 } else { 2527 if (object->flags & OBJ_DEAD) { 2528 VOP_UNLOCK(vp, 0, p); 2529 tsleep(object, PVM, "vodead", 0); 2530 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 2531 goto retry; 2532 } 2533 } 2534 2535 KASSERT(vp->v_object != NULL, ("vfs_object_create: NULL object")); 2536 vp->v_flag |= VOBJBUF; 2537 2538retn: 2539 return error; 2540} 2541 2542static void 2543vfree(vp) 2544 struct vnode *vp; 2545{ 2546 int s; 2547 2548 s = splbio(); 2549 simple_lock(&vnode_free_list_slock); 2550 if (vp->v_flag & VTBFREE) { 2551 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 2552 vp->v_flag &= ~VTBFREE; 2553 } 2554 if (vp->v_flag & VAGE) { 2555 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 2556 } else { 2557 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 2558 } 2559 freevnodes++; 2560 simple_unlock(&vnode_free_list_slock); 2561 vp->v_flag &= ~VAGE; 2562 vp->v_flag |= VFREE; 2563 splx(s); 2564} 2565 2566void 2567vbusy(vp) 2568 struct vnode *vp; 2569{ 2570 int s; 2571 2572 s = splbio(); 2573 simple_lock(&vnode_free_list_slock); 2574 if (vp->v_flag & VTBFREE) { 2575 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 2576 vp->v_flag &= ~VTBFREE; 2577 } else { 2578 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 2579 freevnodes--; 2580 } 2581 simple_unlock(&vnode_free_list_slock); 2582 vp->v_flag &= ~(VFREE|VAGE); 2583 splx(s); 2584} 2585 2586/* 2587 * Record a process's interest in events which might happen to 2588 * a vnode. Because poll uses the historic select-style interface 2589 * internally, this routine serves as both the ``check for any 2590 * pending events'' and the ``record my interest in future events'' 2591 * functions. (These are done together, while the lock is held, 2592 * to avoid race conditions.) 2593 */ 2594int 2595vn_pollrecord(vp, p, events) 2596 struct vnode *vp; 2597 struct proc *p; 2598 short events; 2599{ 2600 simple_lock(&vp->v_pollinfo.vpi_lock); 2601 if (vp->v_pollinfo.vpi_revents & events) { 2602 /* 2603 * This leaves events we are not interested 2604 * in available for the other process which 2605 * which presumably had requested them 2606 * (otherwise they would never have been 2607 * recorded). 2608 */ 2609 events &= vp->v_pollinfo.vpi_revents; 2610 vp->v_pollinfo.vpi_revents &= ~events; 2611 2612 simple_unlock(&vp->v_pollinfo.vpi_lock); 2613 return events; 2614 } 2615 vp->v_pollinfo.vpi_events |= events; 2616 selrecord(p, &vp->v_pollinfo.vpi_selinfo); 2617 simple_unlock(&vp->v_pollinfo.vpi_lock); 2618 return 0; 2619} 2620 2621/* 2622 * Note the occurrence of an event. If the VN_POLLEVENT macro is used, 2623 * it is possible for us to miss an event due to race conditions, but 2624 * that condition is expected to be rare, so for the moment it is the 2625 * preferred interface. 2626 */ 2627void 2628vn_pollevent(vp, events) 2629 struct vnode *vp; 2630 short events; 2631{ 2632 simple_lock(&vp->v_pollinfo.vpi_lock); 2633 if (vp->v_pollinfo.vpi_events & events) { 2634 /* 2635 * We clear vpi_events so that we don't 2636 * call selwakeup() twice if two events are 2637 * posted before the polling process(es) is 2638 * awakened. This also ensures that we take at 2639 * most one selwakeup() if the polling process 2640 * is no longer interested. However, it does 2641 * mean that only one event can be noticed at 2642 * a time. (Perhaps we should only clear those 2643 * event bits which we note?) XXX 2644 */ 2645 vp->v_pollinfo.vpi_events = 0; /* &= ~events ??? */ 2646 vp->v_pollinfo.vpi_revents |= events; 2647 selwakeup(&vp->v_pollinfo.vpi_selinfo); 2648 } 2649 simple_unlock(&vp->v_pollinfo.vpi_lock); 2650} 2651 2652/* 2653 * Wake up anyone polling on vp because it is being revoked. 2654 * This depends on dead_poll() returning POLLHUP for correct 2655 * behavior. 2656 */ 2657void 2658vn_pollgone(vp) 2659 struct vnode *vp; 2660{ 2661 simple_lock(&vp->v_pollinfo.vpi_lock); 2662 if (vp->v_pollinfo.vpi_events) { 2663 vp->v_pollinfo.vpi_events = 0; 2664 selwakeup(&vp->v_pollinfo.vpi_selinfo); 2665 } 2666 simple_unlock(&vp->v_pollinfo.vpi_lock); 2667} 2668 2669 2670 2671/* 2672 * Routine to create and manage a filesystem syncer vnode. 2673 */ 2674#define sync_close ((int (*) __P((struct vop_close_args *)))nullop) 2675static int sync_fsync __P((struct vop_fsync_args *)); 2676static int sync_inactive __P((struct vop_inactive_args *)); 2677static int sync_reclaim __P((struct vop_reclaim_args *)); 2678#define sync_lock ((int (*) __P((struct vop_lock_args *)))vop_nolock) 2679#define sync_unlock ((int (*) __P((struct vop_unlock_args *)))vop_nounlock) 2680static int sync_print __P((struct vop_print_args *)); 2681#define sync_islocked ((int(*) __P((struct vop_islocked_args *)))vop_noislocked) 2682 2683static vop_t **sync_vnodeop_p; 2684static struct vnodeopv_entry_desc sync_vnodeop_entries[] = { 2685 { &vop_default_desc, (vop_t *) vop_eopnotsupp }, 2686 { &vop_close_desc, (vop_t *) sync_close }, /* close */ 2687 { &vop_fsync_desc, (vop_t *) sync_fsync }, /* fsync */ 2688 { &vop_inactive_desc, (vop_t *) sync_inactive }, /* inactive */ 2689 { &vop_reclaim_desc, (vop_t *) sync_reclaim }, /* reclaim */ 2690 { &vop_lock_desc, (vop_t *) sync_lock }, /* lock */ 2691 { &vop_unlock_desc, (vop_t *) sync_unlock }, /* unlock */ 2692 { &vop_print_desc, (vop_t *) sync_print }, /* print */ 2693 { &vop_islocked_desc, (vop_t *) sync_islocked }, /* islocked */ 2694 { NULL, NULL } 2695}; 2696static struct vnodeopv_desc sync_vnodeop_opv_desc = 2697 { &sync_vnodeop_p, sync_vnodeop_entries }; 2698 2699VNODEOP_SET(sync_vnodeop_opv_desc); 2700 2701/* 2702 * Create a new filesystem syncer vnode for the specified mount point. 2703 */ 2704int 2705vfs_allocate_syncvnode(mp) 2706 struct mount *mp; 2707{ 2708 struct vnode *vp; 2709 static long start, incr, next; 2710 int error; 2711 2712 /* Allocate a new vnode */ 2713 if ((error = getnewvnode(VT_VFS, mp, sync_vnodeop_p, &vp)) != 0) { 2714 mp->mnt_syncer = NULL; 2715 return (error); 2716 } 2717 vp->v_type = VNON; 2718 /* 2719 * Place the vnode onto the syncer worklist. We attempt to 2720 * scatter them about on the list so that they will go off 2721 * at evenly distributed times even if all the filesystems 2722 * are mounted at once. 2723 */ 2724 next += incr; 2725 if (next == 0 || next > syncer_maxdelay) { 2726 start /= 2; 2727 incr /= 2; 2728 if (start == 0) { 2729 start = syncer_maxdelay / 2; 2730 incr = syncer_maxdelay; 2731 } 2732 next = start; 2733 } 2734 vn_syncer_add_to_worklist(vp, syncdelay > 0 ? next % syncdelay : 0); 2735 mp->mnt_syncer = vp; 2736 return (0); 2737} 2738 2739/* 2740 * Do a lazy sync of the filesystem. 2741 */ 2742static int 2743sync_fsync(ap) 2744 struct vop_fsync_args /* { 2745 struct vnode *a_vp; 2746 struct ucred *a_cred; 2747 int a_waitfor; 2748 struct proc *a_p; 2749 } */ *ap; 2750{ 2751 struct vnode *syncvp = ap->a_vp; 2752 struct mount *mp = syncvp->v_mount; 2753 struct proc *p = ap->a_p; 2754 int asyncflag; 2755 2756 /* 2757 * We only need to do something if this is a lazy evaluation. 2758 */ 2759 if (ap->a_waitfor != MNT_LAZY) 2760 return (0); 2761 2762 /* 2763 * Move ourselves to the back of the sync list. 2764 */ 2765 vn_syncer_add_to_worklist(syncvp, syncdelay); 2766 2767 /* 2768 * Walk the list of vnodes pushing all that are dirty and 2769 * not already on the sync list. 2770 */ 2771 simple_lock(&mountlist_slock); 2772 if (vfs_busy(mp, LK_EXCLUSIVE | LK_NOWAIT, &mountlist_slock, p) != 0) { 2773 simple_unlock(&mountlist_slock); 2774 return (0); 2775 } 2776 asyncflag = mp->mnt_flag & MNT_ASYNC; 2777 mp->mnt_flag &= ~MNT_ASYNC; 2778 vfs_msync(mp, MNT_NOWAIT); 2779 VFS_SYNC(mp, MNT_LAZY, ap->a_cred, p); 2780 if (asyncflag) 2781 mp->mnt_flag |= MNT_ASYNC; 2782 vfs_unbusy(mp, p); 2783 return (0); 2784} 2785 2786/* 2787 * The syncer vnode is no referenced. 2788 */ 2789static int 2790sync_inactive(ap) 2791 struct vop_inactive_args /* { 2792 struct vnode *a_vp; 2793 struct proc *a_p; 2794 } */ *ap; 2795{ 2796 2797 vgone(ap->a_vp); 2798 return (0); 2799} 2800 2801/* 2802 * The syncer vnode is no longer needed and is being decommissioned. 2803 * 2804 * Modifications to the worklist must be protected at splbio(). 2805 */ 2806static int 2807sync_reclaim(ap) 2808 struct vop_reclaim_args /* { 2809 struct vnode *a_vp; 2810 } */ *ap; 2811{ 2812 struct vnode *vp = ap->a_vp; 2813 int s; 2814 2815 s = splbio(); 2816 vp->v_mount->mnt_syncer = NULL; 2817 if (vp->v_flag & VONWORKLST) { 2818 LIST_REMOVE(vp, v_synclist); 2819 vp->v_flag &= ~VONWORKLST; 2820 } 2821 splx(s); 2822 2823 return (0); 2824} 2825 2826/* 2827 * Print out a syncer vnode. 2828 */ 2829static int 2830sync_print(ap) 2831 struct vop_print_args /* { 2832 struct vnode *a_vp; 2833 } */ *ap; 2834{ 2835 struct vnode *vp = ap->a_vp; 2836 2837 printf("syncer vnode"); 2838 if (vp->v_vnlock != NULL) 2839 lockmgr_printinfo(vp->v_vnlock); 2840 printf("\n"); 2841 return (0); 2842} 2843 2844/* 2845 * extract the dev_t from a VBLK or VCHR 2846 */ 2847dev_t 2848vn_todev(vp) 2849 struct vnode *vp; 2850{ 2851 if (vp->v_type != VBLK && vp->v_type != VCHR) 2852 return (NODEV); 2853 return (vp->v_rdev); 2854} 2855 2856/* 2857 * Check if vnode represents a disk device 2858 */ 2859int 2860vn_isdisk(vp) 2861 struct vnode *vp; 2862{ 2863 if (vp->v_type != VBLK) 2864 return (0); 2865 if (!devsw(vp->v_rdev)) 2866 return (0); 2867 if (!(devsw(vp->v_rdev)->d_flags & D_DISK)) 2868 return (0); 2869 return (1); 2870} 2871 2872