vfs_export.c revision 62573
1/* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95 39 * $FreeBSD: head/sys/kern/vfs_export.c 62573 2000-07-04 11:25:35Z phk $ 40 */ 41 42/* 43 * External virtual filesystem routines 44 */ 45#include "opt_ddb.h" 46#include "opt_ffs.h" 47 48#include <sys/param.h> 49#include <sys/systm.h> 50#include <sys/bio.h> 51#include <sys/buf.h> 52#include <sys/conf.h> 53#include <sys/dirent.h> 54#include <sys/domain.h> 55#include <sys/eventhandler.h> 56#include <sys/fcntl.h> 57#include <sys/kernel.h> 58#include <sys/kthread.h> 59#include <sys/malloc.h> 60#include <sys/mount.h> 61#include <sys/namei.h> 62#include <sys/proc.h> 63#include <sys/reboot.h> 64#include <sys/socket.h> 65#include <sys/stat.h> 66#include <sys/sysctl.h> 67#include <sys/vmmeter.h> 68#include <sys/vnode.h> 69 70#include <machine/limits.h> 71 72#include <vm/vm.h> 73#include <vm/vm_object.h> 74#include <vm/vm_extern.h> 75#include <vm/pmap.h> 76#include <vm/vm_map.h> 77#include <vm/vm_page.h> 78#include <vm/vm_pager.h> 79#include <vm/vnode_pager.h> 80#include <vm/vm_zone.h> 81 82static MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure"); 83 84static void insmntque __P((struct vnode *vp, struct mount *mp)); 85static void vclean __P((struct vnode *vp, int flags, struct proc *p)); 86static unsigned long numvnodes; 87SYSCTL_INT(_debug, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, ""); 88 89enum vtype iftovt_tab[16] = { 90 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 91 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 92}; 93int vttoif_tab[9] = { 94 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 95 S_IFSOCK, S_IFIFO, S_IFMT, 96}; 97 98static TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */ 99 100static u_long wantfreevnodes = 25; 101SYSCTL_INT(_debug, OID_AUTO, wantfreevnodes, CTLFLAG_RW, &wantfreevnodes, 0, ""); 102static u_long freevnodes = 0; 103SYSCTL_INT(_debug, OID_AUTO, freevnodes, CTLFLAG_RD, &freevnodes, 0, ""); 104 105static int reassignbufcalls; 106SYSCTL_INT(_vfs, OID_AUTO, reassignbufcalls, CTLFLAG_RW, &reassignbufcalls, 0, ""); 107static int reassignbufloops; 108SYSCTL_INT(_vfs, OID_AUTO, reassignbufloops, CTLFLAG_RW, &reassignbufloops, 0, ""); 109static int reassignbufsortgood; 110SYSCTL_INT(_vfs, OID_AUTO, reassignbufsortgood, CTLFLAG_RW, &reassignbufsortgood, 0, ""); 111static int reassignbufsortbad; 112SYSCTL_INT(_vfs, OID_AUTO, reassignbufsortbad, CTLFLAG_RW, &reassignbufsortbad, 0, ""); 113static int reassignbufmethod = 1; 114SYSCTL_INT(_vfs, OID_AUTO, reassignbufmethod, CTLFLAG_RW, &reassignbufmethod, 0, ""); 115 116#ifdef ENABLE_VFS_IOOPT 117int vfs_ioopt = 0; 118SYSCTL_INT(_vfs, OID_AUTO, ioopt, CTLFLAG_RW, &vfs_ioopt, 0, ""); 119#endif 120 121struct mntlist mountlist = TAILQ_HEAD_INITIALIZER(mountlist); /* mounted fs */ 122struct simplelock mountlist_slock; 123struct simplelock mntvnode_slock; 124int nfs_mount_type = -1; 125#ifndef NULL_SIMPLELOCKS 126static struct simplelock mntid_slock; 127static struct simplelock vnode_free_list_slock; 128static struct simplelock spechash_slock; 129#endif 130struct nfs_public nfs_pub; /* publicly exported FS */ 131static vm_zone_t vnode_zone; 132int prtactive = 0; /* 1 => print out reclaim of active vnodes */ 133 134/* 135 * The workitem queue. 136 */ 137#define SYNCER_MAXDELAY 32 138static int syncer_maxdelay = SYNCER_MAXDELAY; /* maximum delay time */ 139time_t syncdelay = 30; /* max time to delay syncing data */ 140time_t filedelay = 30; /* time to delay syncing files */ 141SYSCTL_INT(_kern, OID_AUTO, filedelay, CTLFLAG_RW, &filedelay, 0, ""); 142time_t dirdelay = 29; /* time to delay syncing directories */ 143SYSCTL_INT(_kern, OID_AUTO, dirdelay, CTLFLAG_RW, &dirdelay, 0, ""); 144time_t metadelay = 28; /* time to delay syncing metadata */ 145SYSCTL_INT(_kern, OID_AUTO, metadelay, CTLFLAG_RW, &metadelay, 0, ""); 146static int rushjob; /* number of slots to run ASAP */ 147static int stat_rush_requests; /* number of times I/O speeded up */ 148SYSCTL_INT(_debug, OID_AUTO, rush_requests, CTLFLAG_RW, &stat_rush_requests, 0, ""); 149 150static int syncer_delayno = 0; 151static long syncer_mask; 152LIST_HEAD(synclist, vnode); 153static struct synclist *syncer_workitem_pending; 154 155int desiredvnodes; 156SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, 157 &desiredvnodes, 0, "Maximum number of vnodes"); 158 159static void vfs_free_addrlist __P((struct netexport *nep)); 160static int vfs_free_netcred __P((struct radix_node *rn, void *w)); 161static int vfs_hang_addrlist __P((struct mount *mp, struct netexport *nep, 162 struct export_args *argp)); 163 164/* 165 * Initialize the vnode management data structures. 166 */ 167void 168vntblinit() 169{ 170 171 desiredvnodes = maxproc + cnt.v_page_count / 4; 172 simple_lock_init(&mntvnode_slock); 173 simple_lock_init(&mntid_slock); 174 simple_lock_init(&spechash_slock); 175 TAILQ_INIT(&vnode_free_list); 176 simple_lock_init(&vnode_free_list_slock); 177 vnode_zone = zinit("VNODE", sizeof (struct vnode), 0, 0, 5); 178 /* 179 * Initialize the filesystem syncer. 180 */ 181 syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE, 182 &syncer_mask); 183 syncer_maxdelay = syncer_mask + 1; 184} 185 186/* 187 * Mark a mount point as busy. Used to synchronize access and to delay 188 * unmounting. Interlock is not released on failure. 189 */ 190int 191vfs_busy(mp, flags, interlkp, p) 192 struct mount *mp; 193 int flags; 194 struct simplelock *interlkp; 195 struct proc *p; 196{ 197 int lkflags; 198 199 if (mp->mnt_kern_flag & MNTK_UNMOUNT) { 200 if (flags & LK_NOWAIT) 201 return (ENOENT); 202 mp->mnt_kern_flag |= MNTK_MWAIT; 203 if (interlkp) { 204 simple_unlock(interlkp); 205 } 206 /* 207 * Since all busy locks are shared except the exclusive 208 * lock granted when unmounting, the only place that a 209 * wakeup needs to be done is at the release of the 210 * exclusive lock at the end of dounmount. 211 */ 212 tsleep((caddr_t)mp, PVFS, "vfs_busy", 0); 213 if (interlkp) { 214 simple_lock(interlkp); 215 } 216 return (ENOENT); 217 } 218 lkflags = LK_SHARED | LK_NOPAUSE; 219 if (interlkp) 220 lkflags |= LK_INTERLOCK; 221 if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p)) 222 panic("vfs_busy: unexpected lock failure"); 223 return (0); 224} 225 226/* 227 * Free a busy filesystem. 228 */ 229void 230vfs_unbusy(mp, p) 231 struct mount *mp; 232 struct proc *p; 233{ 234 235 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p); 236} 237 238/* 239 * Lookup a filesystem type, and if found allocate and initialize 240 * a mount structure for it. 241 * 242 * Devname is usually updated by mount(8) after booting. 243 */ 244int 245vfs_rootmountalloc(fstypename, devname, mpp) 246 char *fstypename; 247 char *devname; 248 struct mount **mpp; 249{ 250 struct proc *p = curproc; /* XXX */ 251 struct vfsconf *vfsp; 252 struct mount *mp; 253 254 if (fstypename == NULL) 255 return (ENODEV); 256 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 257 if (!strcmp(vfsp->vfc_name, fstypename)) 258 break; 259 if (vfsp == NULL) 260 return (ENODEV); 261 mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); 262 bzero((char *)mp, (u_long)sizeof(struct mount)); 263 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE); 264 (void)vfs_busy(mp, LK_NOWAIT, 0, p); 265 LIST_INIT(&mp->mnt_vnodelist); 266 mp->mnt_vfc = vfsp; 267 mp->mnt_op = vfsp->vfc_vfsops; 268 mp->mnt_flag = MNT_RDONLY; 269 mp->mnt_vnodecovered = NULLVP; 270 vfsp->vfc_refcount++; 271 mp->mnt_iosize_max = DFLTPHYS; 272 mp->mnt_stat.f_type = vfsp->vfc_typenum; 273 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 274 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 275 mp->mnt_stat.f_mntonname[0] = '/'; 276 mp->mnt_stat.f_mntonname[1] = 0; 277 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); 278 *mpp = mp; 279 return (0); 280} 281 282/* 283 * Find an appropriate filesystem to use for the root. If a filesystem 284 * has not been preselected, walk through the list of known filesystems 285 * trying those that have mountroot routines, and try them until one 286 * works or we have tried them all. 287 */ 288#ifdef notdef /* XXX JH */ 289int 290lite2_vfs_mountroot() 291{ 292 struct vfsconf *vfsp; 293 extern int (*lite2_mountroot) __P((void)); 294 int error; 295 296 if (lite2_mountroot != NULL) 297 return ((*lite2_mountroot)()); 298 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 299 if (vfsp->vfc_mountroot == NULL) 300 continue; 301 if ((error = (*vfsp->vfc_mountroot)()) == 0) 302 return (0); 303 printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error); 304 } 305 return (ENODEV); 306} 307#endif 308 309/* 310 * Lookup a mount point by filesystem identifier. 311 */ 312struct mount * 313vfs_getvfs(fsid) 314 fsid_t *fsid; 315{ 316 register struct mount *mp; 317 318 simple_lock(&mountlist_slock); 319 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 320 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 321 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { 322 simple_unlock(&mountlist_slock); 323 return (mp); 324 } 325 } 326 simple_unlock(&mountlist_slock); 327 return ((struct mount *) 0); 328} 329 330/* 331 * Get a new unique fsid. Try to make its val[0] unique, since this value 332 * will be used to create fake device numbers for stat(). Also try (but 333 * not so hard) make its val[0] unique mod 2^16, since some emulators only 334 * support 16-bit device numbers. We end up with unique val[0]'s for the 335 * first 2^16 calls and unique val[0]'s mod 2^16 for the first 2^8 calls. 336 * 337 * Keep in mind that several mounts may be running in parallel. Starting 338 * the search one past where the previous search terminated is both a 339 * micro-optimization and a defense against returning the same fsid to 340 * different mounts. 341 */ 342void 343vfs_getnewfsid(mp) 344 struct mount *mp; 345{ 346 static u_int16_t mntid_base; 347 fsid_t tfsid; 348 int mtype; 349 350 simple_lock(&mntid_slock); 351 mtype = mp->mnt_vfc->vfc_typenum; 352 tfsid.val[1] = mtype; 353 mtype = (mtype & 0xFF) << 16; 354 for (;;) { 355 tfsid.val[0] = makeudev(255, mtype | mntid_base++); 356 if (vfs_getvfs(&tfsid) == NULL) 357 break; 358 } 359 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 360 mp->mnt_stat.f_fsid.val[1] = tfsid.val[1]; 361 simple_unlock(&mntid_slock); 362} 363 364/* 365 * Knob to control the precision of file timestamps: 366 * 367 * 0 = seconds only; nanoseconds zeroed. 368 * 1 = seconds and nanoseconds, accurate within 1/HZ. 369 * 2 = seconds and nanoseconds, truncated to microseconds. 370 * >=3 = seconds and nanoseconds, maximum precision. 371 */ 372enum { TSP_SEC, TSP_HZ, TSP_USEC, TSP_NSEC }; 373 374static int timestamp_precision = TSP_SEC; 375SYSCTL_INT(_vfs, OID_AUTO, timestamp_precision, CTLFLAG_RW, 376 ×tamp_precision, 0, ""); 377 378/* 379 * Get a current timestamp. 380 */ 381void 382vfs_timestamp(tsp) 383 struct timespec *tsp; 384{ 385 struct timeval tv; 386 387 switch (timestamp_precision) { 388 case TSP_SEC: 389 tsp->tv_sec = time_second; 390 tsp->tv_nsec = 0; 391 break; 392 case TSP_HZ: 393 getnanotime(tsp); 394 break; 395 case TSP_USEC: 396 microtime(&tv); 397 TIMEVAL_TO_TIMESPEC(&tv, tsp); 398 break; 399 case TSP_NSEC: 400 default: 401 nanotime(tsp); 402 break; 403 } 404} 405 406/* 407 * Set vnode attributes to VNOVAL 408 */ 409void 410vattr_null(vap) 411 register struct vattr *vap; 412{ 413 414 vap->va_type = VNON; 415 vap->va_size = VNOVAL; 416 vap->va_bytes = VNOVAL; 417 vap->va_mode = VNOVAL; 418 vap->va_nlink = VNOVAL; 419 vap->va_uid = VNOVAL; 420 vap->va_gid = VNOVAL; 421 vap->va_fsid = VNOVAL; 422 vap->va_fileid = VNOVAL; 423 vap->va_blocksize = VNOVAL; 424 vap->va_rdev = VNOVAL; 425 vap->va_atime.tv_sec = VNOVAL; 426 vap->va_atime.tv_nsec = VNOVAL; 427 vap->va_mtime.tv_sec = VNOVAL; 428 vap->va_mtime.tv_nsec = VNOVAL; 429 vap->va_ctime.tv_sec = VNOVAL; 430 vap->va_ctime.tv_nsec = VNOVAL; 431 vap->va_flags = VNOVAL; 432 vap->va_gen = VNOVAL; 433 vap->va_vaflags = 0; 434} 435 436/* 437 * Routines having to do with the management of the vnode table. 438 */ 439extern vop_t **dead_vnodeop_p; 440 441/* 442 * Return the next vnode from the free list. 443 */ 444int 445getnewvnode(tag, mp, vops, vpp) 446 enum vtagtype tag; 447 struct mount *mp; 448 vop_t **vops; 449 struct vnode **vpp; 450{ 451 int s, count; 452 struct proc *p = curproc; /* XXX */ 453 struct vnode *vp = NULL; 454 vm_object_t object; 455 456 /* 457 * We take the least recently used vnode from the freelist 458 * if we can get it and it has no cached pages, and no 459 * namecache entries are relative to it. 460 * Otherwise we allocate a new vnode 461 */ 462 463 s = splbio(); 464 simple_lock(&vnode_free_list_slock); 465 466 if (wantfreevnodes && freevnodes < wantfreevnodes) { 467 vp = NULL; 468 } else if (!wantfreevnodes && freevnodes <= desiredvnodes) { 469 /* 470 * XXX: this is only here to be backwards compatible 471 */ 472 vp = NULL; 473 } else for (count = 0; count < freevnodes; count++) { 474 vp = TAILQ_FIRST(&vnode_free_list); 475 if (vp == NULL || vp->v_usecount) 476 panic("getnewvnode: free vnode isn't"); 477 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 478 /* 479 * Don't recycle if active in the namecache or 480 * if it still has cached pages or we cannot get 481 * its interlock. 482 */ 483 object = vp->v_object; 484 if (LIST_FIRST(&vp->v_cache_src) != NULL || 485 (object && (object->resident_page_count || 486 object->ref_count)) || 487 !simple_lock_try(&vp->v_interlock)) { 488 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 489 vp = NULL; 490 continue; 491 } 492 break; 493 } 494 if (vp) { 495 vp->v_flag |= VDOOMED; 496 freevnodes--; 497 simple_unlock(&vnode_free_list_slock); 498 cache_purge(vp); 499 vp->v_lease = NULL; 500 if (vp->v_type != VBAD) { 501 vgonel(vp, p); 502 } else { 503 simple_unlock(&vp->v_interlock); 504 } 505 506#ifdef INVARIANTS 507 { 508 int s; 509 510 if (vp->v_data) 511 panic("cleaned vnode isn't"); 512 s = splbio(); 513 if (vp->v_numoutput) 514 panic("Clean vnode has pending I/O's"); 515 splx(s); 516 } 517#endif 518 vp->v_flag = 0; 519 vp->v_lastw = 0; 520 vp->v_lasta = 0; 521 vp->v_cstart = 0; 522 vp->v_clen = 0; 523 vp->v_socket = 0; 524 vp->v_writecount = 0; /* XXX */ 525 } else { 526 simple_unlock(&vnode_free_list_slock); 527 vp = (struct vnode *) zalloc(vnode_zone); 528 bzero((char *) vp, sizeof *vp); 529 simple_lock_init(&vp->v_interlock); 530 vp->v_dd = vp; 531 cache_purge(vp); 532 LIST_INIT(&vp->v_cache_src); 533 TAILQ_INIT(&vp->v_cache_dst); 534 numvnodes++; 535 } 536 537 TAILQ_INIT(&vp->v_cleanblkhd); 538 TAILQ_INIT(&vp->v_dirtyblkhd); 539 vp->v_type = VNON; 540 vp->v_tag = tag; 541 vp->v_op = vops; 542 insmntque(vp, mp); 543 *vpp = vp; 544 vp->v_usecount = 1; 545 vp->v_data = 0; 546 splx(s); 547 548 vfs_object_create(vp, p, p->p_ucred); 549 return (0); 550} 551 552/* 553 * Move a vnode from one mount queue to another. 554 */ 555static void 556insmntque(vp, mp) 557 register struct vnode *vp; 558 register struct mount *mp; 559{ 560 561 simple_lock(&mntvnode_slock); 562 /* 563 * Delete from old mount point vnode list, if on one. 564 */ 565 if (vp->v_mount != NULL) 566 LIST_REMOVE(vp, v_mntvnodes); 567 /* 568 * Insert into list of vnodes for the new mount point, if available. 569 */ 570 if ((vp->v_mount = mp) == NULL) { 571 simple_unlock(&mntvnode_slock); 572 return; 573 } 574 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 575 simple_unlock(&mntvnode_slock); 576} 577 578/* 579 * Update outstanding I/O count and do wakeup if requested. 580 */ 581void 582vwakeup(bp) 583 register struct buf *bp; 584{ 585 register struct vnode *vp; 586 587 bp->b_flags &= ~B_WRITEINPROG; 588 if ((vp = bp->b_vp)) { 589 vp->v_numoutput--; 590 if (vp->v_numoutput < 0) 591 panic("vwakeup: neg numoutput"); 592 if ((vp->v_numoutput == 0) && (vp->v_flag & VBWAIT)) { 593 vp->v_flag &= ~VBWAIT; 594 wakeup((caddr_t) &vp->v_numoutput); 595 } 596 } 597} 598 599/* 600 * Flush out and invalidate all buffers associated with a vnode. 601 * Called with the underlying object locked. 602 */ 603int 604vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 605 register struct vnode *vp; 606 int flags; 607 struct ucred *cred; 608 struct proc *p; 609 int slpflag, slptimeo; 610{ 611 register struct buf *bp; 612 struct buf *nbp, *blist; 613 int s, error; 614 vm_object_t object; 615 616 if (flags & V_SAVE) { 617 s = splbio(); 618 while (vp->v_numoutput) { 619 vp->v_flag |= VBWAIT; 620 error = tsleep((caddr_t)&vp->v_numoutput, 621 slpflag | (PRIBIO + 1), "vinvlbuf", slptimeo); 622 if (error) { 623 splx(s); 624 return (error); 625 } 626 } 627 if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) { 628 splx(s); 629 if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) != 0) 630 return (error); 631 s = splbio(); 632 if (vp->v_numoutput > 0 || 633 !TAILQ_EMPTY(&vp->v_dirtyblkhd)) 634 panic("vinvalbuf: dirty bufs"); 635 } 636 splx(s); 637 } 638 s = splbio(); 639 for (;;) { 640 blist = TAILQ_FIRST(&vp->v_cleanblkhd); 641 if (!blist) 642 blist = TAILQ_FIRST(&vp->v_dirtyblkhd); 643 if (!blist) 644 break; 645 646 for (bp = blist; bp; bp = nbp) { 647 nbp = TAILQ_NEXT(bp, b_vnbufs); 648 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) { 649 error = BUF_TIMELOCK(bp, 650 LK_EXCLUSIVE | LK_SLEEPFAIL, 651 "vinvalbuf", slpflag, slptimeo); 652 if (error == ENOLCK) 653 break; 654 splx(s); 655 return (error); 656 } 657 /* 658 * XXX Since there are no node locks for NFS, I 659 * believe there is a slight chance that a delayed 660 * write will occur while sleeping just above, so 661 * check for it. Note that vfs_bio_awrite expects 662 * buffers to reside on a queue, while VOP_BWRITE and 663 * brelse do not. 664 */ 665 if (((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) && 666 (flags & V_SAVE)) { 667 668 if (bp->b_vp == vp) { 669 if (bp->b_flags & B_CLUSTEROK) { 670 BUF_UNLOCK(bp); 671 vfs_bio_awrite(bp); 672 } else { 673 bremfree(bp); 674 bp->b_flags |= B_ASYNC; 675 BUF_WRITE(bp); 676 } 677 } else { 678 bremfree(bp); 679 (void) BUF_WRITE(bp); 680 } 681 break; 682 } 683 bremfree(bp); 684 bp->b_flags |= (B_INVAL | B_NOCACHE | B_RELBUF); 685 bp->b_flags &= ~B_ASYNC; 686 brelse(bp); 687 } 688 } 689 690 while (vp->v_numoutput > 0) { 691 vp->v_flag |= VBWAIT; 692 tsleep(&vp->v_numoutput, PVM, "vnvlbv", 0); 693 } 694 695 splx(s); 696 697 /* 698 * Destroy the copy in the VM cache, too. 699 */ 700 simple_lock(&vp->v_interlock); 701 object = vp->v_object; 702 if (object != NULL) { 703 vm_object_page_remove(object, 0, 0, 704 (flags & V_SAVE) ? TRUE : FALSE); 705 } 706 simple_unlock(&vp->v_interlock); 707 708 if (!TAILQ_EMPTY(&vp->v_dirtyblkhd) || !TAILQ_EMPTY(&vp->v_cleanblkhd)) 709 panic("vinvalbuf: flush failed"); 710 return (0); 711} 712 713/* 714 * Truncate a file's buffer and pages to a specified length. This 715 * is in lieu of the old vinvalbuf mechanism, which performed unneeded 716 * sync activity. 717 */ 718int 719vtruncbuf(vp, cred, p, length, blksize) 720 register struct vnode *vp; 721 struct ucred *cred; 722 struct proc *p; 723 off_t length; 724 int blksize; 725{ 726 register struct buf *bp; 727 struct buf *nbp; 728 int s, anyfreed; 729 int trunclbn; 730 731 /* 732 * Round up to the *next* lbn. 733 */ 734 trunclbn = (length + blksize - 1) / blksize; 735 736 s = splbio(); 737restart: 738 anyfreed = 1; 739 for (;anyfreed;) { 740 anyfreed = 0; 741 for (bp = TAILQ_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 742 nbp = TAILQ_NEXT(bp, b_vnbufs); 743 if (bp->b_lblkno >= trunclbn) { 744 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) { 745 BUF_LOCK(bp, LK_EXCLUSIVE|LK_SLEEPFAIL); 746 goto restart; 747 } else { 748 bremfree(bp); 749 bp->b_flags |= (B_INVAL | B_RELBUF); 750 bp->b_flags &= ~B_ASYNC; 751 brelse(bp); 752 anyfreed = 1; 753 } 754 if (nbp && 755 (((nbp->b_xflags & BX_VNCLEAN) == 0) || 756 (nbp->b_vp != vp) || 757 (nbp->b_flags & B_DELWRI))) { 758 goto restart; 759 } 760 } 761 } 762 763 for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 764 nbp = TAILQ_NEXT(bp, b_vnbufs); 765 if (bp->b_lblkno >= trunclbn) { 766 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) { 767 BUF_LOCK(bp, LK_EXCLUSIVE|LK_SLEEPFAIL); 768 goto restart; 769 } else { 770 bremfree(bp); 771 bp->b_flags |= (B_INVAL | B_RELBUF); 772 bp->b_flags &= ~B_ASYNC; 773 brelse(bp); 774 anyfreed = 1; 775 } 776 if (nbp && 777 (((nbp->b_xflags & BX_VNDIRTY) == 0) || 778 (nbp->b_vp != vp) || 779 (nbp->b_flags & B_DELWRI) == 0)) { 780 goto restart; 781 } 782 } 783 } 784 } 785 786 if (length > 0) { 787restartsync: 788 for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 789 nbp = TAILQ_NEXT(bp, b_vnbufs); 790 if ((bp->b_flags & B_DELWRI) && (bp->b_lblkno < 0)) { 791 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) { 792 BUF_LOCK(bp, LK_EXCLUSIVE|LK_SLEEPFAIL); 793 goto restart; 794 } else { 795 bremfree(bp); 796 if (bp->b_vp == vp) { 797 bp->b_flags |= B_ASYNC; 798 } else { 799 bp->b_flags &= ~B_ASYNC; 800 } 801 BUF_WRITE(bp); 802 } 803 goto restartsync; 804 } 805 806 } 807 } 808 809 while (vp->v_numoutput > 0) { 810 vp->v_flag |= VBWAIT; 811 tsleep(&vp->v_numoutput, PVM, "vbtrunc", 0); 812 } 813 814 splx(s); 815 816 vnode_pager_setsize(vp, length); 817 818 return (0); 819} 820 821/* 822 * Associate a buffer with a vnode. 823 */ 824void 825bgetvp(vp, bp) 826 register struct vnode *vp; 827 register struct buf *bp; 828{ 829 int s; 830 831 KASSERT(bp->b_vp == NULL, ("bgetvp: not free")); 832 833 vhold(vp); 834 bp->b_vp = vp; 835 bp->b_dev = vn_todev(vp); 836 /* 837 * Insert onto list for new vnode. 838 */ 839 s = splbio(); 840 bp->b_xflags |= BX_VNCLEAN; 841 bp->b_xflags &= ~BX_VNDIRTY; 842 TAILQ_INSERT_TAIL(&vp->v_cleanblkhd, bp, b_vnbufs); 843 splx(s); 844} 845 846/* 847 * Disassociate a buffer from a vnode. 848 */ 849void 850brelvp(bp) 851 register struct buf *bp; 852{ 853 struct vnode *vp; 854 struct buflists *listheadp; 855 int s; 856 857 KASSERT(bp->b_vp != NULL, ("brelvp: NULL")); 858 859 /* 860 * Delete from old vnode list, if on one. 861 */ 862 vp = bp->b_vp; 863 s = splbio(); 864 if (bp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN)) { 865 if (bp->b_xflags & BX_VNDIRTY) 866 listheadp = &vp->v_dirtyblkhd; 867 else 868 listheadp = &vp->v_cleanblkhd; 869 TAILQ_REMOVE(listheadp, bp, b_vnbufs); 870 bp->b_xflags &= ~(BX_VNDIRTY | BX_VNCLEAN); 871 } 872 if ((vp->v_flag & VONWORKLST) && TAILQ_EMPTY(&vp->v_dirtyblkhd)) { 873 vp->v_flag &= ~VONWORKLST; 874 LIST_REMOVE(vp, v_synclist); 875 } 876 splx(s); 877 bp->b_vp = (struct vnode *) 0; 878 vdrop(vp); 879} 880 881/* 882 * The workitem queue. 883 * 884 * It is useful to delay writes of file data and filesystem metadata 885 * for tens of seconds so that quickly created and deleted files need 886 * not waste disk bandwidth being created and removed. To realize this, 887 * we append vnodes to a "workitem" queue. When running with a soft 888 * updates implementation, most pending metadata dependencies should 889 * not wait for more than a few seconds. Thus, mounted on block devices 890 * are delayed only about a half the time that file data is delayed. 891 * Similarly, directory updates are more critical, so are only delayed 892 * about a third the time that file data is delayed. Thus, there are 893 * SYNCER_MAXDELAY queues that are processed round-robin at a rate of 894 * one each second (driven off the filesystem syncer process). The 895 * syncer_delayno variable indicates the next queue that is to be processed. 896 * Items that need to be processed soon are placed in this queue: 897 * 898 * syncer_workitem_pending[syncer_delayno] 899 * 900 * A delay of fifteen seconds is done by placing the request fifteen 901 * entries later in the queue: 902 * 903 * syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask] 904 * 905 */ 906 907/* 908 * Add an item to the syncer work queue. 909 */ 910static void 911vn_syncer_add_to_worklist(struct vnode *vp, int delay) 912{ 913 int s, slot; 914 915 s = splbio(); 916 917 if (vp->v_flag & VONWORKLST) { 918 LIST_REMOVE(vp, v_synclist); 919 } 920 921 if (delay > syncer_maxdelay - 2) 922 delay = syncer_maxdelay - 2; 923 slot = (syncer_delayno + delay) & syncer_mask; 924 925 LIST_INSERT_HEAD(&syncer_workitem_pending[slot], vp, v_synclist); 926 vp->v_flag |= VONWORKLST; 927 splx(s); 928} 929 930struct proc *updateproc; 931static void sched_sync __P((void)); 932static struct kproc_desc up_kp = { 933 "syncer", 934 sched_sync, 935 &updateproc 936}; 937SYSINIT(syncer, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp) 938 939/* 940 * System filesystem synchronizer daemon. 941 */ 942void 943sched_sync(void) 944{ 945 struct synclist *slp; 946 struct vnode *vp; 947 long starttime; 948 int s; 949 struct proc *p = updateproc; 950 951 EVENTHANDLER_REGISTER(shutdown_pre_sync, shutdown_kproc, p, 952 SHUTDOWN_PRI_LAST); 953 954 for (;;) { 955 kproc_suspend_loop(p); 956 957 starttime = time_second; 958 959 /* 960 * Push files whose dirty time has expired. Be careful 961 * of interrupt race on slp queue. 962 */ 963 s = splbio(); 964 slp = &syncer_workitem_pending[syncer_delayno]; 965 syncer_delayno += 1; 966 if (syncer_delayno == syncer_maxdelay) 967 syncer_delayno = 0; 968 splx(s); 969 970 while ((vp = LIST_FIRST(slp)) != NULL) { 971 if (VOP_ISLOCKED(vp, NULL) == 0) { 972 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 973 (void) VOP_FSYNC(vp, p->p_ucred, MNT_LAZY, p); 974 VOP_UNLOCK(vp, 0, p); 975 } 976 s = splbio(); 977 if (LIST_FIRST(slp) == vp) { 978 /* 979 * Note: v_tag VT_VFS vps can remain on the 980 * worklist too with no dirty blocks, but 981 * since sync_fsync() moves it to a different 982 * slot we are safe. 983 */ 984 if (TAILQ_EMPTY(&vp->v_dirtyblkhd) && 985 !vn_isdisk(vp, NULL)) 986 panic("sched_sync: fsync failed vp %p tag %d", vp, vp->v_tag); 987 /* 988 * Put us back on the worklist. The worklist 989 * routine will remove us from our current 990 * position and then add us back in at a later 991 * position. 992 */ 993 vn_syncer_add_to_worklist(vp, syncdelay); 994 } 995 splx(s); 996 } 997 998 /* 999 * Do soft update processing. 1000 */ 1001#ifdef SOFTUPDATES 1002 softdep_process_worklist(NULL); 1003#endif 1004 1005 /* 1006 * The variable rushjob allows the kernel to speed up the 1007 * processing of the filesystem syncer process. A rushjob 1008 * value of N tells the filesystem syncer to process the next 1009 * N seconds worth of work on its queue ASAP. Currently rushjob 1010 * is used by the soft update code to speed up the filesystem 1011 * syncer process when the incore state is getting so far 1012 * ahead of the disk that the kernel memory pool is being 1013 * threatened with exhaustion. 1014 */ 1015 if (rushjob > 0) { 1016 rushjob -= 1; 1017 continue; 1018 } 1019 /* 1020 * If it has taken us less than a second to process the 1021 * current work, then wait. Otherwise start right over 1022 * again. We can still lose time if any single round 1023 * takes more than two seconds, but it does not really 1024 * matter as we are just trying to generally pace the 1025 * filesystem activity. 1026 */ 1027 if (time_second == starttime) 1028 tsleep(&lbolt, PPAUSE, "syncer", 0); 1029 } 1030} 1031 1032/* 1033 * Request the syncer daemon to speed up its work. 1034 * We never push it to speed up more than half of its 1035 * normal turn time, otherwise it could take over the cpu. 1036 */ 1037int 1038speedup_syncer() 1039{ 1040 int s; 1041 1042 s = splhigh(); 1043 if (updateproc->p_wchan == &lbolt) 1044 setrunnable(updateproc); 1045 splx(s); 1046 if (rushjob < syncdelay / 2) { 1047 rushjob += 1; 1048 stat_rush_requests += 1; 1049 return (1); 1050 } 1051 return(0); 1052} 1053 1054/* 1055 * Associate a p-buffer with a vnode. 1056 * 1057 * Also sets B_PAGING flag to indicate that vnode is not fully associated 1058 * with the buffer. i.e. the bp has not been linked into the vnode or 1059 * ref-counted. 1060 */ 1061void 1062pbgetvp(vp, bp) 1063 register struct vnode *vp; 1064 register struct buf *bp; 1065{ 1066 1067 KASSERT(bp->b_vp == NULL, ("pbgetvp: not free")); 1068 1069 bp->b_vp = vp; 1070 bp->b_flags |= B_PAGING; 1071 bp->b_dev = vn_todev(vp); 1072} 1073 1074/* 1075 * Disassociate a p-buffer from a vnode. 1076 */ 1077void 1078pbrelvp(bp) 1079 register struct buf *bp; 1080{ 1081 1082 KASSERT(bp->b_vp != NULL, ("pbrelvp: NULL")); 1083 1084 /* XXX REMOVE ME */ 1085 if (bp->b_vnbufs.tqe_next != NULL) { 1086 panic( 1087 "relpbuf(): b_vp was probably reassignbuf()d %p %x", 1088 bp, 1089 (int)bp->b_flags 1090 ); 1091 } 1092 bp->b_vp = (struct vnode *) 0; 1093 bp->b_flags &= ~B_PAGING; 1094} 1095 1096void 1097pbreassignbuf(bp, newvp) 1098 struct buf *bp; 1099 struct vnode *newvp; 1100{ 1101 if ((bp->b_flags & B_PAGING) == 0) { 1102 panic( 1103 "pbreassignbuf() on non phys bp %p", 1104 bp 1105 ); 1106 } 1107 bp->b_vp = newvp; 1108} 1109 1110/* 1111 * Reassign a buffer from one vnode to another. 1112 * Used to assign file specific control information 1113 * (indirect blocks) to the vnode to which they belong. 1114 */ 1115void 1116reassignbuf(bp, newvp) 1117 register struct buf *bp; 1118 register struct vnode *newvp; 1119{ 1120 struct buflists *listheadp; 1121 int delay; 1122 int s; 1123 1124 if (newvp == NULL) { 1125 printf("reassignbuf: NULL"); 1126 return; 1127 } 1128 ++reassignbufcalls; 1129 1130 /* 1131 * B_PAGING flagged buffers cannot be reassigned because their vp 1132 * is not fully linked in. 1133 */ 1134 if (bp->b_flags & B_PAGING) 1135 panic("cannot reassign paging buffer"); 1136 1137 s = splbio(); 1138 /* 1139 * Delete from old vnode list, if on one. 1140 */ 1141 if (bp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN)) { 1142 if (bp->b_xflags & BX_VNDIRTY) 1143 listheadp = &bp->b_vp->v_dirtyblkhd; 1144 else 1145 listheadp = &bp->b_vp->v_cleanblkhd; 1146 TAILQ_REMOVE(listheadp, bp, b_vnbufs); 1147 bp->b_xflags &= ~(BX_VNDIRTY | BX_VNCLEAN); 1148 if (bp->b_vp != newvp) { 1149 vdrop(bp->b_vp); 1150 bp->b_vp = NULL; /* for clarification */ 1151 } 1152 } 1153 /* 1154 * If dirty, put on list of dirty buffers; otherwise insert onto list 1155 * of clean buffers. 1156 */ 1157 if (bp->b_flags & B_DELWRI) { 1158 struct buf *tbp; 1159 1160 listheadp = &newvp->v_dirtyblkhd; 1161 if ((newvp->v_flag & VONWORKLST) == 0) { 1162 switch (newvp->v_type) { 1163 case VDIR: 1164 delay = dirdelay; 1165 break; 1166 case VCHR: 1167 case VBLK: 1168 if (newvp->v_specmountpoint != NULL) { 1169 delay = metadelay; 1170 break; 1171 } 1172 /* fall through */ 1173 default: 1174 delay = filedelay; 1175 } 1176 vn_syncer_add_to_worklist(newvp, delay); 1177 } 1178 bp->b_xflags |= BX_VNDIRTY; 1179 tbp = TAILQ_FIRST(listheadp); 1180 if (tbp == NULL || 1181 bp->b_lblkno == 0 || 1182 (bp->b_lblkno > 0 && tbp->b_lblkno < 0) || 1183 (bp->b_lblkno > 0 && bp->b_lblkno < tbp->b_lblkno)) { 1184 TAILQ_INSERT_HEAD(listheadp, bp, b_vnbufs); 1185 ++reassignbufsortgood; 1186 } else if (bp->b_lblkno < 0) { 1187 TAILQ_INSERT_TAIL(listheadp, bp, b_vnbufs); 1188 ++reassignbufsortgood; 1189 } else if (reassignbufmethod == 1) { 1190 /* 1191 * New sorting algorithm, only handle sequential case, 1192 * otherwise append to end (but before metadata) 1193 */ 1194 if ((tbp = gbincore(newvp, bp->b_lblkno - 1)) != NULL && 1195 (tbp->b_xflags & BX_VNDIRTY)) { 1196 /* 1197 * Found the best place to insert the buffer 1198 */ 1199 TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs); 1200 ++reassignbufsortgood; 1201 } else { 1202 /* 1203 * Missed, append to end, but before meta-data. 1204 * We know that the head buffer in the list is 1205 * not meta-data due to prior conditionals. 1206 * 1207 * Indirect effects: NFS second stage write 1208 * tends to wind up here, giving maximum 1209 * distance between the unstable write and the 1210 * commit rpc. 1211 */ 1212 tbp = TAILQ_LAST(listheadp, buflists); 1213 while (tbp && tbp->b_lblkno < 0) 1214 tbp = TAILQ_PREV(tbp, buflists, b_vnbufs); 1215 TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs); 1216 ++reassignbufsortbad; 1217 } 1218 } else { 1219 /* 1220 * Old sorting algorithm, scan queue and insert 1221 */ 1222 struct buf *ttbp; 1223 while ((ttbp = TAILQ_NEXT(tbp, b_vnbufs)) && 1224 (ttbp->b_lblkno < bp->b_lblkno)) { 1225 ++reassignbufloops; 1226 tbp = ttbp; 1227 } 1228 TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs); 1229 } 1230 } else { 1231 bp->b_xflags |= BX_VNCLEAN; 1232 TAILQ_INSERT_TAIL(&newvp->v_cleanblkhd, bp, b_vnbufs); 1233 if ((newvp->v_flag & VONWORKLST) && 1234 TAILQ_EMPTY(&newvp->v_dirtyblkhd)) { 1235 newvp->v_flag &= ~VONWORKLST; 1236 LIST_REMOVE(newvp, v_synclist); 1237 } 1238 } 1239 if (bp->b_vp != newvp) { 1240 bp->b_vp = newvp; 1241 vhold(bp->b_vp); 1242 } 1243 splx(s); 1244} 1245 1246/* 1247 * Create a vnode for a block device. 1248 * Used for mounting the root file system. 1249 * XXX: This now changed to a VCHR due to the block/char merging. 1250 */ 1251int 1252bdevvp(dev, vpp) 1253 dev_t dev; 1254 struct vnode **vpp; 1255{ 1256 register struct vnode *vp; 1257 struct vnode *nvp; 1258 int error; 1259 1260 if (dev == NODEV) { 1261 *vpp = NULLVP; 1262 return (ENXIO); 1263 } 1264 error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp); 1265 if (error) { 1266 *vpp = NULLVP; 1267 return (error); 1268 } 1269 vp = nvp; 1270 vp->v_type = VCHR; 1271 addalias(vp, dev); 1272 *vpp = vp; 1273 return (0); 1274} 1275 1276/* 1277 * Add vnode to the alias list hung off the dev_t. 1278 * 1279 * The reason for this gunk is that multiple vnodes can reference 1280 * the same physical device, so checking vp->v_usecount to see 1281 * how many users there are is inadequate; the v_usecount for 1282 * the vnodes need to be accumulated. vcount() does that. 1283 */ 1284void 1285addaliasu(nvp, nvp_rdev) 1286 struct vnode *nvp; 1287 udev_t nvp_rdev; 1288{ 1289 1290 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 1291 panic("addaliasu on non-special vnode"); 1292 addalias(nvp, udev2dev(nvp_rdev, nvp->v_type == VBLK ? 1 : 0)); 1293} 1294 1295void 1296addalias(nvp, dev) 1297 struct vnode *nvp; 1298 dev_t dev; 1299{ 1300 1301 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 1302 panic("addalias on non-special vnode"); 1303 1304 nvp->v_rdev = dev; 1305 simple_lock(&spechash_slock); 1306 SLIST_INSERT_HEAD(&dev->si_hlist, nvp, v_specnext); 1307 simple_unlock(&spechash_slock); 1308} 1309 1310/* 1311 * Grab a particular vnode from the free list, increment its 1312 * reference count and lock it. The vnode lock bit is set if the 1313 * vnode is being eliminated in vgone. The process is awakened 1314 * when the transition is completed, and an error returned to 1315 * indicate that the vnode is no longer usable (possibly having 1316 * been changed to a new file system type). 1317 */ 1318int 1319vget(vp, flags, p) 1320 register struct vnode *vp; 1321 int flags; 1322 struct proc *p; 1323{ 1324 int error; 1325 1326 /* 1327 * If the vnode is in the process of being cleaned out for 1328 * another use, we wait for the cleaning to finish and then 1329 * return failure. Cleaning is determined by checking that 1330 * the VXLOCK flag is set. 1331 */ 1332 if ((flags & LK_INTERLOCK) == 0) { 1333 simple_lock(&vp->v_interlock); 1334 } 1335 if (vp->v_flag & VXLOCK) { 1336 vp->v_flag |= VXWANT; 1337 simple_unlock(&vp->v_interlock); 1338 tsleep((caddr_t)vp, PINOD, "vget", 0); 1339 return (ENOENT); 1340 } 1341 1342 vp->v_usecount++; 1343 1344 if (VSHOULDBUSY(vp)) 1345 vbusy(vp); 1346 if (flags & LK_TYPE_MASK) { 1347 if ((error = vn_lock(vp, flags | LK_INTERLOCK, p)) != 0) { 1348 /* 1349 * must expand vrele here because we do not want 1350 * to call VOP_INACTIVE if the reference count 1351 * drops back to zero since it was never really 1352 * active. We must remove it from the free list 1353 * before sleeping so that multiple processes do 1354 * not try to recycle it. 1355 */ 1356 simple_lock(&vp->v_interlock); 1357 vp->v_usecount--; 1358 if (VSHOULDFREE(vp)) 1359 vfree(vp); 1360 simple_unlock(&vp->v_interlock); 1361 } 1362 return (error); 1363 } 1364 simple_unlock(&vp->v_interlock); 1365 return (0); 1366} 1367 1368void 1369vref(struct vnode *vp) 1370{ 1371 simple_lock(&vp->v_interlock); 1372 vp->v_usecount++; 1373 simple_unlock(&vp->v_interlock); 1374} 1375 1376/* 1377 * Vnode put/release. 1378 * If count drops to zero, call inactive routine and return to freelist. 1379 */ 1380void 1381vrele(vp) 1382 struct vnode *vp; 1383{ 1384 struct proc *p = curproc; /* XXX */ 1385 1386 KASSERT(vp != NULL, ("vrele: null vp")); 1387 1388 simple_lock(&vp->v_interlock); 1389 1390 if (vp->v_usecount > 1) { 1391 1392 vp->v_usecount--; 1393 simple_unlock(&vp->v_interlock); 1394 1395 return; 1396 } 1397 1398 if (vp->v_usecount == 1) { 1399 1400 vp->v_usecount--; 1401 if (VSHOULDFREE(vp)) 1402 vfree(vp); 1403 /* 1404 * If we are doing a vput, the node is already locked, and we must 1405 * call VOP_INACTIVE with the node locked. So, in the case of 1406 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE. 1407 */ 1408 if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) { 1409 VOP_INACTIVE(vp, p); 1410 } 1411 1412 } else { 1413#ifdef DIAGNOSTIC 1414 vprint("vrele: negative ref count", vp); 1415 simple_unlock(&vp->v_interlock); 1416#endif 1417 panic("vrele: negative ref cnt"); 1418 } 1419} 1420 1421void 1422vput(vp) 1423 struct vnode *vp; 1424{ 1425 struct proc *p = curproc; /* XXX */ 1426 1427 KASSERT(vp != NULL, ("vput: null vp")); 1428 1429 simple_lock(&vp->v_interlock); 1430 1431 if (vp->v_usecount > 1) { 1432 1433 vp->v_usecount--; 1434 VOP_UNLOCK(vp, LK_INTERLOCK, p); 1435 return; 1436 1437 } 1438 1439 if (vp->v_usecount == 1) { 1440 1441 vp->v_usecount--; 1442 if (VSHOULDFREE(vp)) 1443 vfree(vp); 1444 /* 1445 * If we are doing a vput, the node is already locked, and we must 1446 * call VOP_INACTIVE with the node locked. So, in the case of 1447 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE. 1448 */ 1449 simple_unlock(&vp->v_interlock); 1450 VOP_INACTIVE(vp, p); 1451 1452 } else { 1453#ifdef DIAGNOSTIC 1454 vprint("vput: negative ref count", vp); 1455#endif 1456 panic("vput: negative ref cnt"); 1457 } 1458} 1459 1460/* 1461 * Somebody doesn't want the vnode recycled. 1462 */ 1463void 1464vhold(vp) 1465 register struct vnode *vp; 1466{ 1467 int s; 1468 1469 s = splbio(); 1470 vp->v_holdcnt++; 1471 if (VSHOULDBUSY(vp)) 1472 vbusy(vp); 1473 splx(s); 1474} 1475 1476/* 1477 * One less who cares about this vnode. 1478 */ 1479void 1480vdrop(vp) 1481 register struct vnode *vp; 1482{ 1483 int s; 1484 1485 s = splbio(); 1486 if (vp->v_holdcnt <= 0) 1487 panic("vdrop: holdcnt"); 1488 vp->v_holdcnt--; 1489 if (VSHOULDFREE(vp)) 1490 vfree(vp); 1491 splx(s); 1492} 1493 1494/* 1495 * Remove any vnodes in the vnode table belonging to mount point mp. 1496 * 1497 * If MNT_NOFORCE is specified, there should not be any active ones, 1498 * return error if any are found (nb: this is a user error, not a 1499 * system error). If MNT_FORCE is specified, detach any active vnodes 1500 * that are found. 1501 */ 1502#ifdef DIAGNOSTIC 1503static int busyprt = 0; /* print out busy vnodes */ 1504SYSCTL_INT(_debug, OID_AUTO, busyprt, CTLFLAG_RW, &busyprt, 0, ""); 1505#endif 1506 1507int 1508vflush(mp, skipvp, flags) 1509 struct mount *mp; 1510 struct vnode *skipvp; 1511 int flags; 1512{ 1513 struct proc *p = curproc; /* XXX */ 1514 struct vnode *vp, *nvp; 1515 int busy = 0; 1516 1517 simple_lock(&mntvnode_slock); 1518loop: 1519 for (vp = LIST_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) { 1520 /* 1521 * Make sure this vnode wasn't reclaimed in getnewvnode(). 1522 * Start over if it has (it won't be on the list anymore). 1523 */ 1524 if (vp->v_mount != mp) 1525 goto loop; 1526 nvp = LIST_NEXT(vp, v_mntvnodes); 1527 /* 1528 * Skip over a selected vnode. 1529 */ 1530 if (vp == skipvp) 1531 continue; 1532 1533 simple_lock(&vp->v_interlock); 1534 /* 1535 * Skip over a vnodes marked VSYSTEM. 1536 */ 1537 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { 1538 simple_unlock(&vp->v_interlock); 1539 continue; 1540 } 1541 /* 1542 * If WRITECLOSE is set, only flush out regular file vnodes 1543 * open for writing. 1544 */ 1545 if ((flags & WRITECLOSE) && 1546 (vp->v_writecount == 0 || vp->v_type != VREG)) { 1547 simple_unlock(&vp->v_interlock); 1548 continue; 1549 } 1550 1551 /* 1552 * With v_usecount == 0, all we need to do is clear out the 1553 * vnode data structures and we are done. 1554 */ 1555 if (vp->v_usecount == 0) { 1556 simple_unlock(&mntvnode_slock); 1557 vgonel(vp, p); 1558 simple_lock(&mntvnode_slock); 1559 continue; 1560 } 1561 1562 /* 1563 * If FORCECLOSE is set, forcibly close the vnode. For block 1564 * or character devices, revert to an anonymous device. For 1565 * all other files, just kill them. 1566 */ 1567 if (flags & FORCECLOSE) { 1568 simple_unlock(&mntvnode_slock); 1569 if (vp->v_type != VBLK && vp->v_type != VCHR) { 1570 vgonel(vp, p); 1571 } else { 1572 vclean(vp, 0, p); 1573 vp->v_op = spec_vnodeop_p; 1574 insmntque(vp, (struct mount *) 0); 1575 } 1576 simple_lock(&mntvnode_slock); 1577 continue; 1578 } 1579#ifdef DIAGNOSTIC 1580 if (busyprt) 1581 vprint("vflush: busy vnode", vp); 1582#endif 1583 simple_unlock(&vp->v_interlock); 1584 busy++; 1585 } 1586 simple_unlock(&mntvnode_slock); 1587 if (busy) 1588 return (EBUSY); 1589 return (0); 1590} 1591 1592/* 1593 * Disassociate the underlying file system from a vnode. 1594 */ 1595static void 1596vclean(vp, flags, p) 1597 struct vnode *vp; 1598 int flags; 1599 struct proc *p; 1600{ 1601 int active; 1602 vm_object_t obj; 1603 1604 /* 1605 * Check to see if the vnode is in use. If so we have to reference it 1606 * before we clean it out so that its count cannot fall to zero and 1607 * generate a race against ourselves to recycle it. 1608 */ 1609 if ((active = vp->v_usecount)) 1610 vp->v_usecount++; 1611 1612 /* 1613 * Prevent the vnode from being recycled or brought into use while we 1614 * clean it out. 1615 */ 1616 if (vp->v_flag & VXLOCK) 1617 panic("vclean: deadlock"); 1618 vp->v_flag |= VXLOCK; 1619 /* 1620 * Even if the count is zero, the VOP_INACTIVE routine may still 1621 * have the object locked while it cleans it out. The VOP_LOCK 1622 * ensures that the VOP_INACTIVE routine is done with its work. 1623 * For active vnodes, it ensures that no other activity can 1624 * occur while the underlying object is being cleaned out. 1625 */ 1626 VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p); 1627 1628 /* 1629 * Clean out any buffers associated with the vnode. 1630 * If the flush fails, just toss the buffers. 1631 */ 1632 if (flags & DOCLOSE) { 1633 if (vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0) != 0) 1634 vinvalbuf(vp, 0, NOCRED, p, 0, 0); 1635 } 1636 1637 if ((obj = vp->v_object) != NULL) { 1638 if (obj->ref_count == 0) { 1639 /* 1640 * vclean() may be called twice. The first time 1641 * removes the primary reference to the object, 1642 * the second time goes one further and is a 1643 * special-case to terminate the object. 1644 */ 1645 vm_object_terminate(obj); 1646 } else { 1647 /* 1648 * Woe to the process that tries to page now :-). 1649 */ 1650 vm_pager_deallocate(obj); 1651 } 1652 } 1653 1654 /* 1655 * If purging an active vnode, it must be closed and 1656 * deactivated before being reclaimed. Note that the 1657 * VOP_INACTIVE will unlock the vnode. 1658 */ 1659 if (active) { 1660 if (flags & DOCLOSE) 1661 VOP_CLOSE(vp, FNONBLOCK, NOCRED, p); 1662 VOP_INACTIVE(vp, p); 1663 } else { 1664 /* 1665 * Any other processes trying to obtain this lock must first 1666 * wait for VXLOCK to clear, then call the new lock operation. 1667 */ 1668 VOP_UNLOCK(vp, 0, p); 1669 } 1670 /* 1671 * Reclaim the vnode. 1672 */ 1673 if (VOP_RECLAIM(vp, p)) 1674 panic("vclean: cannot reclaim"); 1675 1676 if (active) { 1677 /* 1678 * Inline copy of vrele() since VOP_INACTIVE 1679 * has already been called. 1680 */ 1681 simple_lock(&vp->v_interlock); 1682 if (--vp->v_usecount <= 0) { 1683#ifdef DIAGNOSTIC 1684 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 1685 vprint("vclean: bad ref count", vp); 1686 panic("vclean: ref cnt"); 1687 } 1688#endif 1689 vfree(vp); 1690 } 1691 simple_unlock(&vp->v_interlock); 1692 } 1693 1694 cache_purge(vp); 1695 if (vp->v_vnlock) { 1696 FREE(vp->v_vnlock, M_VNODE); 1697 vp->v_vnlock = NULL; 1698 } 1699 1700 if (VSHOULDFREE(vp)) 1701 vfree(vp); 1702 1703 /* 1704 * Done with purge, notify sleepers of the grim news. 1705 */ 1706 vp->v_op = dead_vnodeop_p; 1707 vn_pollgone(vp); 1708 vp->v_tag = VT_NON; 1709 vp->v_flag &= ~VXLOCK; 1710 if (vp->v_flag & VXWANT) { 1711 vp->v_flag &= ~VXWANT; 1712 wakeup((caddr_t) vp); 1713 } 1714} 1715 1716/* 1717 * Eliminate all activity associated with the requested vnode 1718 * and with all vnodes aliased to the requested vnode. 1719 */ 1720int 1721vop_revoke(ap) 1722 struct vop_revoke_args /* { 1723 struct vnode *a_vp; 1724 int a_flags; 1725 } */ *ap; 1726{ 1727 struct vnode *vp, *vq; 1728 dev_t dev; 1729 1730 KASSERT((ap->a_flags & REVOKEALL) != 0, ("vop_revoke")); 1731 1732 vp = ap->a_vp; 1733 /* 1734 * If a vgone (or vclean) is already in progress, 1735 * wait until it is done and return. 1736 */ 1737 if (vp->v_flag & VXLOCK) { 1738 vp->v_flag |= VXWANT; 1739 simple_unlock(&vp->v_interlock); 1740 tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0); 1741 return (0); 1742 } 1743 dev = vp->v_rdev; 1744 for (;;) { 1745 simple_lock(&spechash_slock); 1746 vq = SLIST_FIRST(&dev->si_hlist); 1747 simple_unlock(&spechash_slock); 1748 if (!vq) 1749 break; 1750 vgone(vq); 1751 } 1752 return (0); 1753} 1754 1755/* 1756 * Recycle an unused vnode to the front of the free list. 1757 * Release the passed interlock if the vnode will be recycled. 1758 */ 1759int 1760vrecycle(vp, inter_lkp, p) 1761 struct vnode *vp; 1762 struct simplelock *inter_lkp; 1763 struct proc *p; 1764{ 1765 1766 simple_lock(&vp->v_interlock); 1767 if (vp->v_usecount == 0) { 1768 if (inter_lkp) { 1769 simple_unlock(inter_lkp); 1770 } 1771 vgonel(vp, p); 1772 return (1); 1773 } 1774 simple_unlock(&vp->v_interlock); 1775 return (0); 1776} 1777 1778/* 1779 * Eliminate all activity associated with a vnode 1780 * in preparation for reuse. 1781 */ 1782void 1783vgone(vp) 1784 register struct vnode *vp; 1785{ 1786 struct proc *p = curproc; /* XXX */ 1787 1788 simple_lock(&vp->v_interlock); 1789 vgonel(vp, p); 1790} 1791 1792/* 1793 * vgone, with the vp interlock held. 1794 */ 1795void 1796vgonel(vp, p) 1797 struct vnode *vp; 1798 struct proc *p; 1799{ 1800 int s; 1801 1802 /* 1803 * If a vgone (or vclean) is already in progress, 1804 * wait until it is done and return. 1805 */ 1806 if (vp->v_flag & VXLOCK) { 1807 vp->v_flag |= VXWANT; 1808 simple_unlock(&vp->v_interlock); 1809 tsleep((caddr_t)vp, PINOD, "vgone", 0); 1810 return; 1811 } 1812 1813 /* 1814 * Clean out the filesystem specific data. 1815 */ 1816 vclean(vp, DOCLOSE, p); 1817 simple_lock(&vp->v_interlock); 1818 1819 /* 1820 * Delete from old mount point vnode list, if on one. 1821 */ 1822 if (vp->v_mount != NULL) 1823 insmntque(vp, (struct mount *)0); 1824 /* 1825 * If special device, remove it from special device alias list 1826 * if it is on one. 1827 */ 1828 if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_rdev != NULL) { 1829 simple_lock(&spechash_slock); 1830 SLIST_REMOVE(&vp->v_hashchain, vp, vnode, v_specnext); 1831 freedev(vp->v_rdev); 1832 simple_unlock(&spechash_slock); 1833 vp->v_rdev = NULL; 1834 } 1835 1836 /* 1837 * If it is on the freelist and not already at the head, 1838 * move it to the head of the list. The test of the 1839 * VDOOMED flag and the reference count of zero is because 1840 * it will be removed from the free list by getnewvnode, 1841 * but will not have its reference count incremented until 1842 * after calling vgone. If the reference count were 1843 * incremented first, vgone would (incorrectly) try to 1844 * close the previous instance of the underlying object. 1845 */ 1846 if (vp->v_usecount == 0 && !(vp->v_flag & VDOOMED)) { 1847 s = splbio(); 1848 simple_lock(&vnode_free_list_slock); 1849 if (vp->v_flag & VFREE) 1850 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1851 else 1852 freevnodes++; 1853 vp->v_flag |= VFREE; 1854 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 1855 simple_unlock(&vnode_free_list_slock); 1856 splx(s); 1857 } 1858 1859 vp->v_type = VBAD; 1860 simple_unlock(&vp->v_interlock); 1861} 1862 1863/* 1864 * Lookup a vnode by device number. 1865 */ 1866int 1867vfinddev(dev, type, vpp) 1868 dev_t dev; 1869 enum vtype type; 1870 struct vnode **vpp; 1871{ 1872 struct vnode *vp; 1873 1874 simple_lock(&spechash_slock); 1875 SLIST_FOREACH(vp, &dev->si_hlist, v_specnext) { 1876 if (type == vp->v_type) { 1877 *vpp = vp; 1878 simple_unlock(&spechash_slock); 1879 return (1); 1880 } 1881 } 1882 simple_unlock(&spechash_slock); 1883 return (0); 1884} 1885 1886/* 1887 * Calculate the total number of references to a special device. 1888 */ 1889int 1890vcount(vp) 1891 struct vnode *vp; 1892{ 1893 struct vnode *vq; 1894 int count; 1895 1896 count = 0; 1897 simple_lock(&spechash_slock); 1898 SLIST_FOREACH(vq, &vp->v_hashchain, v_specnext) 1899 count += vq->v_usecount; 1900 simple_unlock(&spechash_slock); 1901 return (count); 1902} 1903 1904/* 1905 * Same as above, but using the dev_t as argument 1906 */ 1907 1908int 1909count_dev(dev) 1910 dev_t dev; 1911{ 1912 struct vnode *vp; 1913 1914 vp = SLIST_FIRST(&dev->si_hlist); 1915 if (vp == NULL) 1916 return (0); 1917 return(vcount(vp)); 1918} 1919 1920/* 1921 * Print out a description of a vnode. 1922 */ 1923static char *typename[] = 1924{"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"}; 1925 1926void 1927vprint(label, vp) 1928 char *label; 1929 struct vnode *vp; 1930{ 1931 char buf[96]; 1932 1933 if (label != NULL) 1934 printf("%s: %p: ", label, (void *)vp); 1935 else 1936 printf("%p: ", (void *)vp); 1937 printf("type %s, usecount %d, writecount %d, refcount %d,", 1938 typename[vp->v_type], vp->v_usecount, vp->v_writecount, 1939 vp->v_holdcnt); 1940 buf[0] = '\0'; 1941 if (vp->v_flag & VROOT) 1942 strcat(buf, "|VROOT"); 1943 if (vp->v_flag & VTEXT) 1944 strcat(buf, "|VTEXT"); 1945 if (vp->v_flag & VSYSTEM) 1946 strcat(buf, "|VSYSTEM"); 1947 if (vp->v_flag & VXLOCK) 1948 strcat(buf, "|VXLOCK"); 1949 if (vp->v_flag & VXWANT) 1950 strcat(buf, "|VXWANT"); 1951 if (vp->v_flag & VBWAIT) 1952 strcat(buf, "|VBWAIT"); 1953 if (vp->v_flag & VDOOMED) 1954 strcat(buf, "|VDOOMED"); 1955 if (vp->v_flag & VFREE) 1956 strcat(buf, "|VFREE"); 1957 if (vp->v_flag & VOBJBUF) 1958 strcat(buf, "|VOBJBUF"); 1959 if (buf[0] != '\0') 1960 printf(" flags (%s)", &buf[1]); 1961 if (vp->v_data == NULL) { 1962 printf("\n"); 1963 } else { 1964 printf("\n\t"); 1965 VOP_PRINT(vp); 1966 } 1967} 1968 1969#ifdef DDB 1970#include <ddb/ddb.h> 1971/* 1972 * List all of the locked vnodes in the system. 1973 * Called when debugging the kernel. 1974 */ 1975DB_SHOW_COMMAND(lockedvnodes, lockedvnodes) 1976{ 1977 struct proc *p = curproc; /* XXX */ 1978 struct mount *mp, *nmp; 1979 struct vnode *vp; 1980 1981 printf("Locked vnodes\n"); 1982 simple_lock(&mountlist_slock); 1983 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 1984 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { 1985 nmp = TAILQ_NEXT(mp, mnt_list); 1986 continue; 1987 } 1988 LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { 1989 if (VOP_ISLOCKED(vp, NULL)) 1990 vprint((char *)0, vp); 1991 } 1992 simple_lock(&mountlist_slock); 1993 nmp = TAILQ_NEXT(mp, mnt_list); 1994 vfs_unbusy(mp, p); 1995 } 1996 simple_unlock(&mountlist_slock); 1997} 1998#endif 1999 2000/* 2001 * Top level filesystem related information gathering. 2002 */ 2003static int sysctl_ovfs_conf __P((SYSCTL_HANDLER_ARGS)); 2004 2005static int 2006vfs_sysctl(SYSCTL_HANDLER_ARGS) 2007{ 2008 int *name = (int *)arg1 - 1; /* XXX */ 2009 u_int namelen = arg2 + 1; /* XXX */ 2010 struct vfsconf *vfsp; 2011 2012#if 1 || defined(COMPAT_PRELITE2) 2013 /* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. */ 2014 if (namelen == 1) 2015 return (sysctl_ovfs_conf(oidp, arg1, arg2, req)); 2016#endif 2017 2018#ifdef notyet 2019 /* all sysctl names at this level are at least name and field */ 2020 if (namelen < 2) 2021 return (ENOTDIR); /* overloaded */ 2022 if (name[0] != VFS_GENERIC) { 2023 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 2024 if (vfsp->vfc_typenum == name[0]) 2025 break; 2026 if (vfsp == NULL) 2027 return (EOPNOTSUPP); 2028 return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1, 2029 oldp, oldlenp, newp, newlen, p)); 2030 } 2031#endif 2032 switch (name[1]) { 2033 case VFS_MAXTYPENUM: 2034 if (namelen != 2) 2035 return (ENOTDIR); 2036 return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int))); 2037 case VFS_CONF: 2038 if (namelen != 3) 2039 return (ENOTDIR); /* overloaded */ 2040 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 2041 if (vfsp->vfc_typenum == name[2]) 2042 break; 2043 if (vfsp == NULL) 2044 return (EOPNOTSUPP); 2045 return (SYSCTL_OUT(req, vfsp, sizeof *vfsp)); 2046 } 2047 return (EOPNOTSUPP); 2048} 2049 2050SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD, vfs_sysctl, 2051 "Generic filesystem"); 2052 2053#if 1 || defined(COMPAT_PRELITE2) 2054 2055static int 2056sysctl_ovfs_conf(SYSCTL_HANDLER_ARGS) 2057{ 2058 int error; 2059 struct vfsconf *vfsp; 2060 struct ovfsconf ovfs; 2061 2062 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 2063 ovfs.vfc_vfsops = vfsp->vfc_vfsops; /* XXX used as flag */ 2064 strcpy(ovfs.vfc_name, vfsp->vfc_name); 2065 ovfs.vfc_index = vfsp->vfc_typenum; 2066 ovfs.vfc_refcount = vfsp->vfc_refcount; 2067 ovfs.vfc_flags = vfsp->vfc_flags; 2068 error = SYSCTL_OUT(req, &ovfs, sizeof ovfs); 2069 if (error) 2070 return error; 2071 } 2072 return 0; 2073} 2074 2075#endif /* 1 || COMPAT_PRELITE2 */ 2076 2077#if 0 2078#define KINFO_VNODESLOP 10 2079/* 2080 * Dump vnode list (via sysctl). 2081 * Copyout address of vnode followed by vnode. 2082 */ 2083/* ARGSUSED */ 2084static int 2085sysctl_vnode(SYSCTL_HANDLER_ARGS) 2086{ 2087 struct proc *p = curproc; /* XXX */ 2088 struct mount *mp, *nmp; 2089 struct vnode *nvp, *vp; 2090 int error; 2091 2092#define VPTRSZ sizeof (struct vnode *) 2093#define VNODESZ sizeof (struct vnode) 2094 2095 req->lock = 0; 2096 if (!req->oldptr) /* Make an estimate */ 2097 return (SYSCTL_OUT(req, 0, 2098 (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ))); 2099 2100 simple_lock(&mountlist_slock); 2101 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 2102 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { 2103 nmp = TAILQ_NEXT(mp, mnt_list); 2104 continue; 2105 } 2106again: 2107 simple_lock(&mntvnode_slock); 2108 for (vp = LIST_FIRST(&mp->mnt_vnodelist); 2109 vp != NULL; 2110 vp = nvp) { 2111 /* 2112 * Check that the vp is still associated with 2113 * this filesystem. RACE: could have been 2114 * recycled onto the same filesystem. 2115 */ 2116 if (vp->v_mount != mp) { 2117 simple_unlock(&mntvnode_slock); 2118 goto again; 2119 } 2120 nvp = LIST_NEXT(vp, v_mntvnodes); 2121 simple_unlock(&mntvnode_slock); 2122 if ((error = SYSCTL_OUT(req, &vp, VPTRSZ)) || 2123 (error = SYSCTL_OUT(req, vp, VNODESZ))) 2124 return (error); 2125 simple_lock(&mntvnode_slock); 2126 } 2127 simple_unlock(&mntvnode_slock); 2128 simple_lock(&mountlist_slock); 2129 nmp = TAILQ_NEXT(mp, mnt_list); 2130 vfs_unbusy(mp, p); 2131 } 2132 simple_unlock(&mountlist_slock); 2133 2134 return (0); 2135} 2136#endif 2137 2138/* 2139 * XXX 2140 * Exporting the vnode list on large systems causes them to crash. 2141 * Exporting the vnode list on medium systems causes sysctl to coredump. 2142 */ 2143#if 0 2144SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE|CTLFLAG_RD, 2145 0, 0, sysctl_vnode, "S,vnode", ""); 2146#endif 2147 2148/* 2149 * Check to see if a filesystem is mounted on a block device. 2150 */ 2151int 2152vfs_mountedon(vp) 2153 struct vnode *vp; 2154{ 2155 2156 if (vp->v_specmountpoint != NULL) 2157 return (EBUSY); 2158 return (0); 2159} 2160 2161/* 2162 * Unmount all filesystems. The list is traversed in reverse order 2163 * of mounting to avoid dependencies. 2164 */ 2165void 2166vfs_unmountall() 2167{ 2168 struct mount *mp; 2169 struct proc *p; 2170 int error; 2171 2172 if (curproc != NULL) 2173 p = curproc; 2174 else 2175 p = initproc; /* XXX XXX should this be proc0? */ 2176 /* 2177 * Since this only runs when rebooting, it is not interlocked. 2178 */ 2179 while(!TAILQ_EMPTY(&mountlist)) { 2180 mp = TAILQ_LAST(&mountlist, mntlist); 2181 error = dounmount(mp, MNT_FORCE, p); 2182 if (error) { 2183 TAILQ_REMOVE(&mountlist, mp, mnt_list); 2184 printf("unmount of %s failed (", 2185 mp->mnt_stat.f_mntonname); 2186 if (error == EBUSY) 2187 printf("BUSY)\n"); 2188 else 2189 printf("%d)\n", error); 2190 } else { 2191 /* The unmount has removed mp from the mountlist */ 2192 } 2193 } 2194} 2195 2196/* 2197 * Build hash lists of net addresses and hang them off the mount point. 2198 * Called by ufs_mount() to set up the lists of export addresses. 2199 */ 2200static int 2201vfs_hang_addrlist(mp, nep, argp) 2202 struct mount *mp; 2203 struct netexport *nep; 2204 struct export_args *argp; 2205{ 2206 register struct netcred *np; 2207 register struct radix_node_head *rnh; 2208 register int i; 2209 struct radix_node *rn; 2210 struct sockaddr *saddr, *smask = 0; 2211 struct domain *dom; 2212 int error; 2213 2214 if (argp->ex_addrlen == 0) { 2215 if (mp->mnt_flag & MNT_DEFEXPORTED) 2216 return (EPERM); 2217 np = &nep->ne_defexported; 2218 np->netc_exflags = argp->ex_flags; 2219 np->netc_anon = argp->ex_anon; 2220 np->netc_anon.cr_ref = 1; 2221 mp->mnt_flag |= MNT_DEFEXPORTED; 2222 return (0); 2223 } 2224 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 2225 np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK); 2226 bzero((caddr_t) np, i); 2227 saddr = (struct sockaddr *) (np + 1); 2228 if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen))) 2229 goto out; 2230 if (saddr->sa_len > argp->ex_addrlen) 2231 saddr->sa_len = argp->ex_addrlen; 2232 if (argp->ex_masklen) { 2233 smask = (struct sockaddr *) ((caddr_t) saddr + argp->ex_addrlen); 2234 error = copyin(argp->ex_mask, (caddr_t) smask, argp->ex_masklen); 2235 if (error) 2236 goto out; 2237 if (smask->sa_len > argp->ex_masklen) 2238 smask->sa_len = argp->ex_masklen; 2239 } 2240 i = saddr->sa_family; 2241 if ((rnh = nep->ne_rtable[i]) == 0) { 2242 /* 2243 * Seems silly to initialize every AF when most are not used, 2244 * do so on demand here 2245 */ 2246 for (dom = domains; dom; dom = dom->dom_next) 2247 if (dom->dom_family == i && dom->dom_rtattach) { 2248 dom->dom_rtattach((void **) &nep->ne_rtable[i], 2249 dom->dom_rtoffset); 2250 break; 2251 } 2252 if ((rnh = nep->ne_rtable[i]) == 0) { 2253 error = ENOBUFS; 2254 goto out; 2255 } 2256 } 2257 rn = (*rnh->rnh_addaddr) ((caddr_t) saddr, (caddr_t) smask, rnh, 2258 np->netc_rnodes); 2259 if (rn == 0 || np != (struct netcred *) rn) { /* already exists */ 2260 error = EPERM; 2261 goto out; 2262 } 2263 np->netc_exflags = argp->ex_flags; 2264 np->netc_anon = argp->ex_anon; 2265 np->netc_anon.cr_ref = 1; 2266 return (0); 2267out: 2268 free(np, M_NETADDR); 2269 return (error); 2270} 2271 2272/* ARGSUSED */ 2273static int 2274vfs_free_netcred(rn, w) 2275 struct radix_node *rn; 2276 void *w; 2277{ 2278 register struct radix_node_head *rnh = (struct radix_node_head *) w; 2279 2280 (*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh); 2281 free((caddr_t) rn, M_NETADDR); 2282 return (0); 2283} 2284 2285/* 2286 * Free the net address hash lists that are hanging off the mount points. 2287 */ 2288static void 2289vfs_free_addrlist(nep) 2290 struct netexport *nep; 2291{ 2292 register int i; 2293 register struct radix_node_head *rnh; 2294 2295 for (i = 0; i <= AF_MAX; i++) 2296 if ((rnh = nep->ne_rtable[i])) { 2297 (*rnh->rnh_walktree) (rnh, vfs_free_netcred, 2298 (caddr_t) rnh); 2299 free((caddr_t) rnh, M_RTABLE); 2300 nep->ne_rtable[i] = 0; 2301 } 2302} 2303 2304int 2305vfs_export(mp, nep, argp) 2306 struct mount *mp; 2307 struct netexport *nep; 2308 struct export_args *argp; 2309{ 2310 int error; 2311 2312 if (argp->ex_flags & MNT_DELEXPORT) { 2313 if (mp->mnt_flag & MNT_EXPUBLIC) { 2314 vfs_setpublicfs(NULL, NULL, NULL); 2315 mp->mnt_flag &= ~MNT_EXPUBLIC; 2316 } 2317 vfs_free_addrlist(nep); 2318 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 2319 } 2320 if (argp->ex_flags & MNT_EXPORTED) { 2321 if (argp->ex_flags & MNT_EXPUBLIC) { 2322 if ((error = vfs_setpublicfs(mp, nep, argp)) != 0) 2323 return (error); 2324 mp->mnt_flag |= MNT_EXPUBLIC; 2325 } 2326 if ((error = vfs_hang_addrlist(mp, nep, argp))) 2327 return (error); 2328 mp->mnt_flag |= MNT_EXPORTED; 2329 } 2330 return (0); 2331} 2332 2333 2334/* 2335 * Set the publicly exported filesystem (WebNFS). Currently, only 2336 * one public filesystem is possible in the spec (RFC 2054 and 2055) 2337 */ 2338int 2339vfs_setpublicfs(mp, nep, argp) 2340 struct mount *mp; 2341 struct netexport *nep; 2342 struct export_args *argp; 2343{ 2344 int error; 2345 struct vnode *rvp; 2346 char *cp; 2347 2348 /* 2349 * mp == NULL -> invalidate the current info, the FS is 2350 * no longer exported. May be called from either vfs_export 2351 * or unmount, so check if it hasn't already been done. 2352 */ 2353 if (mp == NULL) { 2354 if (nfs_pub.np_valid) { 2355 nfs_pub.np_valid = 0; 2356 if (nfs_pub.np_index != NULL) { 2357 FREE(nfs_pub.np_index, M_TEMP); 2358 nfs_pub.np_index = NULL; 2359 } 2360 } 2361 return (0); 2362 } 2363 2364 /* 2365 * Only one allowed at a time. 2366 */ 2367 if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount) 2368 return (EBUSY); 2369 2370 /* 2371 * Get real filehandle for root of exported FS. 2372 */ 2373 bzero((caddr_t)&nfs_pub.np_handle, sizeof(nfs_pub.np_handle)); 2374 nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid; 2375 2376 if ((error = VFS_ROOT(mp, &rvp))) 2377 return (error); 2378 2379 if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid))) 2380 return (error); 2381 2382 vput(rvp); 2383 2384 /* 2385 * If an indexfile was specified, pull it in. 2386 */ 2387 if (argp->ex_indexfile != NULL) { 2388 MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP, 2389 M_WAITOK); 2390 error = copyinstr(argp->ex_indexfile, nfs_pub.np_index, 2391 MAXNAMLEN, (size_t *)0); 2392 if (!error) { 2393 /* 2394 * Check for illegal filenames. 2395 */ 2396 for (cp = nfs_pub.np_index; *cp; cp++) { 2397 if (*cp == '/') { 2398 error = EINVAL; 2399 break; 2400 } 2401 } 2402 } 2403 if (error) { 2404 FREE(nfs_pub.np_index, M_TEMP); 2405 return (error); 2406 } 2407 } 2408 2409 nfs_pub.np_mount = mp; 2410 nfs_pub.np_valid = 1; 2411 return (0); 2412} 2413 2414struct netcred * 2415vfs_export_lookup(mp, nep, nam) 2416 register struct mount *mp; 2417 struct netexport *nep; 2418 struct sockaddr *nam; 2419{ 2420 register struct netcred *np; 2421 register struct radix_node_head *rnh; 2422 struct sockaddr *saddr; 2423 2424 np = NULL; 2425 if (mp->mnt_flag & MNT_EXPORTED) { 2426 /* 2427 * Lookup in the export list first. 2428 */ 2429 if (nam != NULL) { 2430 saddr = nam; 2431 rnh = nep->ne_rtable[saddr->sa_family]; 2432 if (rnh != NULL) { 2433 np = (struct netcred *) 2434 (*rnh->rnh_matchaddr)((caddr_t)saddr, 2435 rnh); 2436 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 2437 np = NULL; 2438 } 2439 } 2440 /* 2441 * If no address match, use the default if it exists. 2442 */ 2443 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 2444 np = &nep->ne_defexported; 2445 } 2446 return (np); 2447} 2448 2449/* 2450 * perform msync on all vnodes under a mount point 2451 * the mount point must be locked. 2452 */ 2453void 2454vfs_msync(struct mount *mp, int flags) { 2455 struct vnode *vp, *nvp; 2456 struct vm_object *obj; 2457 int anyio, tries; 2458 2459 tries = 5; 2460loop: 2461 anyio = 0; 2462 for (vp = LIST_FIRST(&mp->mnt_vnodelist); vp != NULL; vp = nvp) { 2463 2464 nvp = LIST_NEXT(vp, v_mntvnodes); 2465 2466 if (vp->v_mount != mp) { 2467 goto loop; 2468 } 2469 2470 if (vp->v_flag & VXLOCK) /* XXX: what if MNT_WAIT? */ 2471 continue; 2472 2473 if (flags != MNT_WAIT) { 2474 obj = vp->v_object; 2475 if (obj == NULL || (obj->flags & OBJ_MIGHTBEDIRTY) == 0) 2476 continue; 2477 if (VOP_ISLOCKED(vp, NULL)) 2478 continue; 2479 } 2480 2481 simple_lock(&vp->v_interlock); 2482 if (vp->v_object && 2483 (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) { 2484 if (!vget(vp, 2485 LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY | LK_NOOBJ, curproc)) { 2486 if (vp->v_object) { 2487 vm_object_page_clean(vp->v_object, 0, 0, flags == MNT_WAIT ? OBJPC_SYNC : OBJPC_NOSYNC); 2488 anyio = 1; 2489 } 2490 vput(vp); 2491 } 2492 } else { 2493 simple_unlock(&vp->v_interlock); 2494 } 2495 } 2496 if (anyio && (--tries > 0)) 2497 goto loop; 2498} 2499 2500/* 2501 * Create the VM object needed for VMIO and mmap support. This 2502 * is done for all VREG files in the system. Some filesystems might 2503 * afford the additional metadata buffering capability of the 2504 * VMIO code by making the device node be VMIO mode also. 2505 * 2506 * vp must be locked when vfs_object_create is called. 2507 */ 2508int 2509vfs_object_create(vp, p, cred) 2510 struct vnode *vp; 2511 struct proc *p; 2512 struct ucred *cred; 2513{ 2514 struct vattr vat; 2515 vm_object_t object; 2516 int error = 0; 2517 2518 if (!vn_isdisk(vp, NULL) && vn_canvmio(vp) == FALSE) 2519 return 0; 2520 2521retry: 2522 if ((object = vp->v_object) == NULL) { 2523 if (vp->v_type == VREG || vp->v_type == VDIR) { 2524 if ((error = VOP_GETATTR(vp, &vat, cred, p)) != 0) 2525 goto retn; 2526 object = vnode_pager_alloc(vp, vat.va_size, 0, 0); 2527 } else if (devsw(vp->v_rdev) != NULL) { 2528 /* 2529 * This simply allocates the biggest object possible 2530 * for a disk vnode. This should be fixed, but doesn't 2531 * cause any problems (yet). 2532 */ 2533 object = vnode_pager_alloc(vp, IDX_TO_OFF(INT_MAX), 0, 0); 2534 } else { 2535 goto retn; 2536 } 2537 /* 2538 * Dereference the reference we just created. This assumes 2539 * that the object is associated with the vp. 2540 */ 2541 object->ref_count--; 2542 vp->v_usecount--; 2543 } else { 2544 if (object->flags & OBJ_DEAD) { 2545 VOP_UNLOCK(vp, 0, p); 2546 tsleep(object, PVM, "vodead", 0); 2547 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 2548 goto retry; 2549 } 2550 } 2551 2552 KASSERT(vp->v_object != NULL, ("vfs_object_create: NULL object")); 2553 vp->v_flag |= VOBJBUF; 2554 2555retn: 2556 return error; 2557} 2558 2559void 2560vfree(vp) 2561 struct vnode *vp; 2562{ 2563 int s; 2564 2565 s = splbio(); 2566 simple_lock(&vnode_free_list_slock); 2567 KASSERT((vp->v_flag & VFREE) == 0, ("vnode already free")); 2568 if (vp->v_flag & VAGE) { 2569 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 2570 } else { 2571 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 2572 } 2573 freevnodes++; 2574 simple_unlock(&vnode_free_list_slock); 2575 vp->v_flag &= ~VAGE; 2576 vp->v_flag |= VFREE; 2577 splx(s); 2578} 2579 2580void 2581vbusy(vp) 2582 struct vnode *vp; 2583{ 2584 int s; 2585 2586 s = splbio(); 2587 simple_lock(&vnode_free_list_slock); 2588 KASSERT((vp->v_flag & VFREE) != 0, ("vnode not free")); 2589 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 2590 freevnodes--; 2591 simple_unlock(&vnode_free_list_slock); 2592 vp->v_flag &= ~(VFREE|VAGE); 2593 splx(s); 2594} 2595 2596/* 2597 * Record a process's interest in events which might happen to 2598 * a vnode. Because poll uses the historic select-style interface 2599 * internally, this routine serves as both the ``check for any 2600 * pending events'' and the ``record my interest in future events'' 2601 * functions. (These are done together, while the lock is held, 2602 * to avoid race conditions.) 2603 */ 2604int 2605vn_pollrecord(vp, p, events) 2606 struct vnode *vp; 2607 struct proc *p; 2608 short events; 2609{ 2610 simple_lock(&vp->v_pollinfo.vpi_lock); 2611 if (vp->v_pollinfo.vpi_revents & events) { 2612 /* 2613 * This leaves events we are not interested 2614 * in available for the other process which 2615 * which presumably had requested them 2616 * (otherwise they would never have been 2617 * recorded). 2618 */ 2619 events &= vp->v_pollinfo.vpi_revents; 2620 vp->v_pollinfo.vpi_revents &= ~events; 2621 2622 simple_unlock(&vp->v_pollinfo.vpi_lock); 2623 return events; 2624 } 2625 vp->v_pollinfo.vpi_events |= events; 2626 selrecord(p, &vp->v_pollinfo.vpi_selinfo); 2627 simple_unlock(&vp->v_pollinfo.vpi_lock); 2628 return 0; 2629} 2630 2631/* 2632 * Note the occurrence of an event. If the VN_POLLEVENT macro is used, 2633 * it is possible for us to miss an event due to race conditions, but 2634 * that condition is expected to be rare, so for the moment it is the 2635 * preferred interface. 2636 */ 2637void 2638vn_pollevent(vp, events) 2639 struct vnode *vp; 2640 short events; 2641{ 2642 simple_lock(&vp->v_pollinfo.vpi_lock); 2643 if (vp->v_pollinfo.vpi_events & events) { 2644 /* 2645 * We clear vpi_events so that we don't 2646 * call selwakeup() twice if two events are 2647 * posted before the polling process(es) is 2648 * awakened. This also ensures that we take at 2649 * most one selwakeup() if the polling process 2650 * is no longer interested. However, it does 2651 * mean that only one event can be noticed at 2652 * a time. (Perhaps we should only clear those 2653 * event bits which we note?) XXX 2654 */ 2655 vp->v_pollinfo.vpi_events = 0; /* &= ~events ??? */ 2656 vp->v_pollinfo.vpi_revents |= events; 2657 selwakeup(&vp->v_pollinfo.vpi_selinfo); 2658 } 2659 simple_unlock(&vp->v_pollinfo.vpi_lock); 2660} 2661 2662/* 2663 * Wake up anyone polling on vp because it is being revoked. 2664 * This depends on dead_poll() returning POLLHUP for correct 2665 * behavior. 2666 */ 2667void 2668vn_pollgone(vp) 2669 struct vnode *vp; 2670{ 2671 simple_lock(&vp->v_pollinfo.vpi_lock); 2672 if (vp->v_pollinfo.vpi_events) { 2673 vp->v_pollinfo.vpi_events = 0; 2674 selwakeup(&vp->v_pollinfo.vpi_selinfo); 2675 } 2676 simple_unlock(&vp->v_pollinfo.vpi_lock); 2677} 2678 2679 2680 2681/* 2682 * Routine to create and manage a filesystem syncer vnode. 2683 */ 2684#define sync_close ((int (*) __P((struct vop_close_args *)))nullop) 2685static int sync_fsync __P((struct vop_fsync_args *)); 2686static int sync_inactive __P((struct vop_inactive_args *)); 2687static int sync_reclaim __P((struct vop_reclaim_args *)); 2688#define sync_lock ((int (*) __P((struct vop_lock_args *)))vop_nolock) 2689#define sync_unlock ((int (*) __P((struct vop_unlock_args *)))vop_nounlock) 2690static int sync_print __P((struct vop_print_args *)); 2691#define sync_islocked ((int(*) __P((struct vop_islocked_args *)))vop_noislocked) 2692 2693static vop_t **sync_vnodeop_p; 2694static struct vnodeopv_entry_desc sync_vnodeop_entries[] = { 2695 { &vop_default_desc, (vop_t *) vop_eopnotsupp }, 2696 { &vop_close_desc, (vop_t *) sync_close }, /* close */ 2697 { &vop_fsync_desc, (vop_t *) sync_fsync }, /* fsync */ 2698 { &vop_inactive_desc, (vop_t *) sync_inactive }, /* inactive */ 2699 { &vop_reclaim_desc, (vop_t *) sync_reclaim }, /* reclaim */ 2700 { &vop_lock_desc, (vop_t *) sync_lock }, /* lock */ 2701 { &vop_unlock_desc, (vop_t *) sync_unlock }, /* unlock */ 2702 { &vop_print_desc, (vop_t *) sync_print }, /* print */ 2703 { &vop_islocked_desc, (vop_t *) sync_islocked }, /* islocked */ 2704 { NULL, NULL } 2705}; 2706static struct vnodeopv_desc sync_vnodeop_opv_desc = 2707 { &sync_vnodeop_p, sync_vnodeop_entries }; 2708 2709VNODEOP_SET(sync_vnodeop_opv_desc); 2710 2711/* 2712 * Create a new filesystem syncer vnode for the specified mount point. 2713 */ 2714int 2715vfs_allocate_syncvnode(mp) 2716 struct mount *mp; 2717{ 2718 struct vnode *vp; 2719 static long start, incr, next; 2720 int error; 2721 2722 /* Allocate a new vnode */ 2723 if ((error = getnewvnode(VT_VFS, mp, sync_vnodeop_p, &vp)) != 0) { 2724 mp->mnt_syncer = NULL; 2725 return (error); 2726 } 2727 vp->v_type = VNON; 2728 /* 2729 * Place the vnode onto the syncer worklist. We attempt to 2730 * scatter them about on the list so that they will go off 2731 * at evenly distributed times even if all the filesystems 2732 * are mounted at once. 2733 */ 2734 next += incr; 2735 if (next == 0 || next > syncer_maxdelay) { 2736 start /= 2; 2737 incr /= 2; 2738 if (start == 0) { 2739 start = syncer_maxdelay / 2; 2740 incr = syncer_maxdelay; 2741 } 2742 next = start; 2743 } 2744 vn_syncer_add_to_worklist(vp, syncdelay > 0 ? next % syncdelay : 0); 2745 mp->mnt_syncer = vp; 2746 return (0); 2747} 2748 2749/* 2750 * Do a lazy sync of the filesystem. 2751 */ 2752static int 2753sync_fsync(ap) 2754 struct vop_fsync_args /* { 2755 struct vnode *a_vp; 2756 struct ucred *a_cred; 2757 int a_waitfor; 2758 struct proc *a_p; 2759 } */ *ap; 2760{ 2761 struct vnode *syncvp = ap->a_vp; 2762 struct mount *mp = syncvp->v_mount; 2763 struct proc *p = ap->a_p; 2764 int asyncflag; 2765 2766 /* 2767 * We only need to do something if this is a lazy evaluation. 2768 */ 2769 if (ap->a_waitfor != MNT_LAZY) 2770 return (0); 2771 2772 /* 2773 * Move ourselves to the back of the sync list. 2774 */ 2775 vn_syncer_add_to_worklist(syncvp, syncdelay); 2776 2777 /* 2778 * Walk the list of vnodes pushing all that are dirty and 2779 * not already on the sync list. 2780 */ 2781 simple_lock(&mountlist_slock); 2782 if (vfs_busy(mp, LK_EXCLUSIVE | LK_NOWAIT, &mountlist_slock, p) != 0) { 2783 simple_unlock(&mountlist_slock); 2784 return (0); 2785 } 2786 asyncflag = mp->mnt_flag & MNT_ASYNC; 2787 mp->mnt_flag &= ~MNT_ASYNC; 2788 vfs_msync(mp, MNT_NOWAIT); 2789 VFS_SYNC(mp, MNT_LAZY, ap->a_cred, p); 2790 if (asyncflag) 2791 mp->mnt_flag |= MNT_ASYNC; 2792 vfs_unbusy(mp, p); 2793 return (0); 2794} 2795 2796/* 2797 * The syncer vnode is no referenced. 2798 */ 2799static int 2800sync_inactive(ap) 2801 struct vop_inactive_args /* { 2802 struct vnode *a_vp; 2803 struct proc *a_p; 2804 } */ *ap; 2805{ 2806 2807 vgone(ap->a_vp); 2808 return (0); 2809} 2810 2811/* 2812 * The syncer vnode is no longer needed and is being decommissioned. 2813 * 2814 * Modifications to the worklist must be protected at splbio(). 2815 */ 2816static int 2817sync_reclaim(ap) 2818 struct vop_reclaim_args /* { 2819 struct vnode *a_vp; 2820 } */ *ap; 2821{ 2822 struct vnode *vp = ap->a_vp; 2823 int s; 2824 2825 s = splbio(); 2826 vp->v_mount->mnt_syncer = NULL; 2827 if (vp->v_flag & VONWORKLST) { 2828 LIST_REMOVE(vp, v_synclist); 2829 vp->v_flag &= ~VONWORKLST; 2830 } 2831 splx(s); 2832 2833 return (0); 2834} 2835 2836/* 2837 * Print out a syncer vnode. 2838 */ 2839static int 2840sync_print(ap) 2841 struct vop_print_args /* { 2842 struct vnode *a_vp; 2843 } */ *ap; 2844{ 2845 struct vnode *vp = ap->a_vp; 2846 2847 printf("syncer vnode"); 2848 if (vp->v_vnlock != NULL) 2849 lockmgr_printinfo(vp->v_vnlock); 2850 printf("\n"); 2851 return (0); 2852} 2853 2854/* 2855 * extract the dev_t from a VBLK or VCHR 2856 */ 2857dev_t 2858vn_todev(vp) 2859 struct vnode *vp; 2860{ 2861 if (vp->v_type != VBLK && vp->v_type != VCHR) 2862 return (NODEV); 2863 return (vp->v_rdev); 2864} 2865 2866/* 2867 * Check if vnode represents a disk device 2868 */ 2869int 2870vn_isdisk(vp, errp) 2871 struct vnode *vp; 2872 int *errp; 2873{ 2874 if (vp->v_type != VBLK && vp->v_type != VCHR) { 2875 if (errp != NULL) 2876 *errp = ENOTBLK; 2877 return (0); 2878 } 2879 if (vp->v_rdev == NULL) { 2880 if (errp != NULL) 2881 *errp = ENXIO; 2882 return (0); 2883 } 2884 if (!devsw(vp->v_rdev)) { 2885 if (errp != NULL) 2886 *errp = ENXIO; 2887 return (0); 2888 } 2889 if (!(devsw(vp->v_rdev)->d_flags & D_DISK)) { 2890 if (errp != NULL) 2891 *errp = ENOTBLK; 2892 return (0); 2893 } 2894 if (errp != NULL) 2895 *errp = 0; 2896 return (1); 2897} 2898 2899void 2900NDFREE(ndp, flags) 2901 struct nameidata *ndp; 2902 const uint flags; 2903{ 2904 if (!(flags & NDF_NO_FREE_PNBUF) && 2905 (ndp->ni_cnd.cn_flags & HASBUF)) { 2906 zfree(namei_zone, ndp->ni_cnd.cn_pnbuf); 2907 ndp->ni_cnd.cn_flags &= ~HASBUF; 2908 } 2909 if (!(flags & NDF_NO_DVP_UNLOCK) && 2910 (ndp->ni_cnd.cn_flags & LOCKPARENT) && 2911 ndp->ni_dvp != ndp->ni_vp) 2912 VOP_UNLOCK(ndp->ni_dvp, 0, ndp->ni_cnd.cn_proc); 2913 if (!(flags & NDF_NO_DVP_RELE) && 2914 (ndp->ni_cnd.cn_flags & (LOCKPARENT|WANTPARENT))) { 2915 vrele(ndp->ni_dvp); 2916 ndp->ni_dvp = NULL; 2917 } 2918 if (!(flags & NDF_NO_VP_UNLOCK) && 2919 (ndp->ni_cnd.cn_flags & LOCKLEAF) && ndp->ni_vp) 2920 VOP_UNLOCK(ndp->ni_vp, 0, ndp->ni_cnd.cn_proc); 2921 if (!(flags & NDF_NO_VP_RELE) && 2922 ndp->ni_vp) { 2923 vrele(ndp->ni_vp); 2924 ndp->ni_vp = NULL; 2925 } 2926 if (!(flags & NDF_NO_STARTDIR_RELE) && 2927 (ndp->ni_cnd.cn_flags & SAVESTART)) { 2928 vrele(ndp->ni_startdir); 2929 ndp->ni_startdir = NULL; 2930 } 2931} 2932