Cross Reference: /freebsd-9.3-release/sys/kern/vfs

Deleted Added

sdiff udiff text old ( 46679 ) new ( 47028 )

full compact

vfs_subr.c (46679)	vfs_subr.c (47028)
1/* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95	1/* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95
39 * $Id: vfs_subr.c,v 1.192 1999/05/08 06:39:48 phk Exp $	39 * $Id: vfs_subr.c,v 1.193 1999/05/08 07:02:38 phk Exp $
40 / 41 42/ 43 * External virtual filesystem routines 44 / 45#include "opt_ddb.h" 46 47#include <sys/param.h> 48#include <sys/systm.h> 49#include <sys/conf.h> 50#include <sys/fcntl.h> 51#include <sys/kernel.h> 52#include <sys/proc.h> 53#include <sys/malloc.h> 54#include <sys/mount.h> 55#include <sys/socket.h> 56#include <sys/vnode.h> 57#include <sys/stat.h> 58#include <sys/buf.h> 59#include <sys/domain.h> 60#include <sys/dirent.h> 61#include <sys/vmmeter.h> 62 63#include <machine/limits.h> 64 65#include <vm/vm.h> 66#include <vm/vm_param.h> 67#include <vm/vm_prot.h> 68#include <vm/vm_object.h> 69#include <vm/vm_extern.h> 70#include <vm/pmap.h> 71#include <vm/vm_map.h> 72#include <vm/vm_page.h> 73#include <vm/vm_pager.h> 74#include <vm/vnode_pager.h> 75#include <vm/vm_zone.h> 76#include <sys/sysctl.h> 77 78#include <miscfs/specfs/specdev.h> 79 80static MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure"); 81 82static void insmntque __P((struct vnode vp, struct mount mp)); 83static void vclean __P((struct vnode vp, int flags, struct proc p)); 84static void vfree __P((struct vnode )); 85static void vgonel __P((struct vnode vp, struct proc p)); 86static unsigned long numvnodes; 87SYSCTL_INT(_debug, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, ""); 88 89enum vtype iftovt_tab[16] = { 90 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 91 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 92}; 93int vttoif_tab[9] = { 94 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 95 S_IFSOCK, S_IFIFO, S_IFMT, 96}; 97 98static TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list / 99struct tobefreelist vnode_tobefree_list; / vnode free list / 100* 101static u_long wantfreevnodes = 25; 102SYSCTL_INT(_debug, OID_AUTO, wantfreevnodes, CTLFLAG_RW, &wantfreevnodes, 0, ""); 103static u_long freevnodes = 0; 104SYSCTL_INT(_debug, OID_AUTO, freevnodes, CTLFLAG_RD, &freevnodes, 0, ""); 105 106int vfs_ioopt = 0; 107#ifdef ENABLE_VFS_IOOPT 108SYSCTL_INT(_vfs, OID_AUTO, ioopt, CTLFLAG_RW, &vfs_ioopt, 0, ""); 109#endif 110 111struct mntlist mountlist; /* mounted filesystem list / 112struct simplelock mountlist_slock; 113struct simplelock mntvnode_slock; 114int nfs_mount_type = -1; 115#ifndef NULL_SIMPLELOCKS 116static struct simplelock mntid_slock; 117static struct simplelock vnode_free_list_slock; 118static struct simplelock spechash_slock; 119#endif 120struct nfs_public nfs_pub; / publicly exported FS / 121static vm_zone_t vnode_zone; 122* 123/* 124 * The workitem queue. 125 / 126#define SYNCER_MAXDELAY 32 127static int syncer_maxdelay = SYNCER_MAXDELAY; / maximum delay time / 128time_t syncdelay = 30; 129int rushjob; / number of slots to run ASAP / 130* 131static int syncer_delayno = 0; 132static long syncer_mask; 133LIST_HEAD(synclist, vnode); 134static struct synclist syncer_workitem_pending; 135* 136int desiredvnodes; 137SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, 138 &desiredvnodes, 0, "Maximum number of vnodes"); 139 140static void vfs_free_addrlist __P((struct netexport nep)); 141static int vfs_free_netcred __P((struct radix_node rn, void w)); 142static int vfs_hang_addrlist __P((struct mount mp, struct netexport nep, 143* struct export_args argp)); 144* 145/* 146 * Initialize the vnode management data structures. 147 / 148void 149vntblinit() 150{ 151* 152 desiredvnodes = maxproc + cnt.v_page_count / 4; 153 simple_lock_init(&mntvnode_slock); 154 simple_lock_init(&mntid_slock); 155 simple_lock_init(&spechash_slock); 156 TAILQ_INIT(&vnode_free_list); 157 TAILQ_INIT(&vnode_tobefree_list); 158 simple_lock_init(&vnode_free_list_slock); 159 CIRCLEQ_INIT(&mountlist); 160 vnode_zone = zinit("VNODE", sizeof (struct vnode), 0, 0, 5); 161 /* 162 * Initialize the filesystem syncer. 163 / 164* syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE, 165 &syncer_mask); 166 syncer_maxdelay = syncer_mask + 1; 167} 168 169/* 170 * Mark a mount point as busy. Used to synchronize access and to delay 171 * unmounting. Interlock is not released on failure. 172 / 173int 174vfs_busy(mp, flags, interlkp, p) 175* struct mount mp; 176* int flags; 177 struct simplelock interlkp; 178* struct proc p; 179{ 180* int lkflags; 181 182 if (mp->mnt_kern_flag & MNTK_UNMOUNT) { 183 if (flags & LK_NOWAIT) 184 return (ENOENT); 185 mp->mnt_kern_flag \|= MNTK_MWAIT; 186 if (interlkp) { 187 simple_unlock(interlkp); 188 } 189 /* 190 * Since all busy locks are shared except the exclusive 191 * lock granted when unmounting, the only place that a 192 * wakeup needs to be done is at the release of the 193 * exclusive lock at the end of dounmount. 194 / 195* tsleep((caddr_t)mp, PVFS, "vfs_busy", 0); 196 if (interlkp) { 197 simple_lock(interlkp); 198 } 199 return (ENOENT); 200 } 201 lkflags = LK_SHARED \| LK_NOPAUSE; 202 if (interlkp) 203 lkflags \|= LK_INTERLOCK; 204 if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p)) 205 panic("vfs_busy: unexpected lock failure"); 206 return (0); 207} 208 209/* 210 * Free a busy filesystem. 211 / 212void 213vfs_unbusy(mp, p) 214* struct mount mp; 215* struct proc p; 216{ 217* 218 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p); 219} 220 221/* 222 * Lookup a filesystem type, and if found allocate and initialize 223 * a mount structure for it. 224 * 225 * Devname is usually updated by mount(8) after booting. 226 / 227int 228vfs_rootmountalloc(fstypename, devname, mpp) 229* char fstypename; 230* char devname; 231* struct mount *mpp; 232{ 233* struct proc p = curproc; / XXX / 234* struct vfsconf vfsp; 235* struct mount mp; 236* 237 if (fstypename == NULL) 238 return (ENODEV); 239 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 240 if (!strcmp(vfsp->vfc_name, fstypename)) 241 break; 242 if (vfsp == NULL) 243 return (ENODEV); 244 mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); 245 bzero((char )mp, (u_long)sizeof(struct mount)); 246* lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE); 247 (void)vfs_busy(mp, LK_NOWAIT, 0, p); 248 LIST_INIT(&mp->mnt_vnodelist); 249 mp->mnt_vfc = vfsp; 250 mp->mnt_op = vfsp->vfc_vfsops; 251 mp->mnt_flag = MNT_RDONLY; 252 mp->mnt_vnodecovered = NULLVP; 253 vfsp->vfc_refcount++; 254 mp->mnt_stat.f_type = vfsp->vfc_typenum; 255 mp->mnt_flag \|= vfsp->vfc_flags & MNT_VISFLAGMASK; 256 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 257 mp->mnt_stat.f_mntonname[0] = '/'; 258 mp->mnt_stat.f_mntonname[1] = 0; 259 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); 260 mpp = mp; 261* return (0); 262} 263 264/* 265 * Find an appropriate filesystem to use for the root. If a filesystem 266 * has not been preselected, walk through the list of known filesystems 267 * trying those that have mountroot routines, and try them until one 268 * works or we have tried them all. 269 / 270#ifdef notdef / XXX JH / 271int 272lite2_vfs_mountroot() 273{ 274* struct vfsconf vfsp; 275* extern int (lite2_mountroot) __P((void)); 276* int error; 277 278 if (lite2_mountroot != NULL) 279 return ((lite2_mountroot)()); 280* for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 281 if (vfsp->vfc_mountroot == NULL) 282 continue; 283 if ((error = (vfsp->vfc_mountroot)()) == 0) 284* return (0); 285 printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error); 286 } 287 return (ENODEV); 288} 289#endif 290 291/* 292 * Lookup a mount point by filesystem identifier. 293 / 294struct mount 295vfs_getvfs(fsid) 296 fsid_t fsid; 297{ 298* register struct mount mp; 299* 300 simple_lock(&mountlist_slock); 301 for (mp = mountlist.cqh_first; mp != (void )&mountlist; 302* mp = mp->mnt_list.cqe_next) { 303 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 304 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { 305 simple_unlock(&mountlist_slock); 306 return (mp); 307 } 308 } 309 simple_unlock(&mountlist_slock); 310 return ((struct mount ) 0); 311} 312* 313/* 314 * Get a new unique fsid 315 / 316void 317vfs_getnewfsid(mp) 318* struct mount mp; 319{ 320* static u_short xxxfs_mntid; 321 322 fsid_t tfsid; 323 int mtype; 324 325 simple_lock(&mntid_slock); 326 mtype = mp->mnt_vfc->vfc_typenum; 327 mp->mnt_stat.f_fsid.val[0] = (nblkdev + mtype) * 256; 328 mp->mnt_stat.f_fsid.val[1] = mtype; 329 if (xxxfs_mntid == 0) 330 ++xxxfs_mntid; 331 tfsid.val[0] = (nblkdev + mtype) * 256 \| xxxfs_mntid; 332 tfsid.val[1] = mtype; 333 if (mountlist.cqh_first != (void )&mountlist) { 334* while (vfs_getvfs(&tfsid)) { 335 tfsid.val[0]++; 336 xxxfs_mntid++; 337 } 338 } 339 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 340 simple_unlock(&mntid_slock); 341} 342 343/* 344 * Set vnode attributes to VNOVAL 345 / 346void 347vattr_null(vap) 348* register struct vattr vap; 349{ 350* 351 vap->va_type = VNON; 352 vap->va_size = VNOVAL; 353 vap->va_bytes = VNOVAL; 354 vap->va_mode = VNOVAL; 355 vap->va_nlink = VNOVAL; 356 vap->va_uid = VNOVAL; 357 vap->va_gid = VNOVAL; 358 vap->va_fsid = VNOVAL; 359 vap->va_fileid = VNOVAL; 360 vap->va_blocksize = VNOVAL; 361 vap->va_rdev = VNOVAL; 362 vap->va_atime.tv_sec = VNOVAL; 363 vap->va_atime.tv_nsec = VNOVAL; 364 vap->va_mtime.tv_sec = VNOVAL; 365 vap->va_mtime.tv_nsec = VNOVAL; 366 vap->va_ctime.tv_sec = VNOVAL; 367 vap->va_ctime.tv_nsec = VNOVAL; 368 vap->va_flags = VNOVAL; 369 vap->va_gen = VNOVAL; 370 vap->va_vaflags = 0; 371} 372 373/* 374 * Routines having to do with the management of the vnode table. 375 / 376extern vop_t dead_vnodeop_p; 377* 378/* 379 * Return the next vnode from the free list. 380 / 381int 382getnewvnode(tag, mp, vops, vpp) 383* enum vtagtype tag; 384 struct mount mp; 385* vop_t *vops; 386* struct vnode *vpp; 387{ 388* int s; 389 struct proc p = curproc; / XXX / 390* struct vnode vp, tvp, nvp; 391* vm_object_t object; 392 TAILQ_HEAD(freelst, vnode) vnode_tmp_list; 393 394 /* 395 * We take the least recently used vnode from the freelist 396 * if we can get it and it has no cached pages, and no 397 * namecache entries are relative to it. 398 * Otherwise we allocate a new vnode 399 / 400* 401 s = splbio(); 402 simple_lock(&vnode_free_list_slock); 403 TAILQ_INIT(&vnode_tmp_list); 404 405 for (vp = TAILQ_FIRST(&vnode_tobefree_list); vp; vp = nvp) { 406 nvp = TAILQ_NEXT(vp, v_freelist); 407 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 408 if (vp->v_flag & VAGE) { 409 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 410 } else { 411 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 412 } 413 vp->v_flag &= ~(VTBFREE\|VAGE); 414 vp->v_flag \|= VFREE; 415 if (vp->v_usecount) 416 panic("tobe free vnode isn't"); 417 freevnodes++; 418 } 419 420 if (wantfreevnodes && freevnodes < wantfreevnodes) { 421 vp = NULL; 422 } else if (!wantfreevnodes && freevnodes <= desiredvnodes) { 423 /* 424 * XXX: this is only here to be backwards compatible 425 / 426* vp = NULL; 427 } else { 428 for (vp = TAILQ_FIRST(&vnode_free_list); vp; vp = nvp) { 429 nvp = TAILQ_NEXT(vp, v_freelist); 430 if (!simple_lock_try(&vp->v_interlock)) 431 continue; 432 if (vp->v_usecount) 433 panic("free vnode isn't"); 434 435 object = vp->v_object; 436 if (object && (object->resident_page_count \|\| object->ref_count)) { 437 printf("object inconsistant state: RPC: %d, RC: %d\n", 438 object->resident_page_count, object->ref_count); 439 /* Don't recycle if it's caching some pages / 440* TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 441 TAILQ_INSERT_TAIL(&vnode_tmp_list, vp, v_freelist); 442 continue; 443 } else if (LIST_FIRST(&vp->v_cache_src)) { 444 /* Don't recycle if active in the namecache / 445* simple_unlock(&vp->v_interlock); 446 continue; 447 } else { 448 break; 449 } 450 } 451 } 452 453 for (tvp = TAILQ_FIRST(&vnode_tmp_list); tvp; tvp = nvp) { 454 nvp = TAILQ_NEXT(tvp, v_freelist); 455 TAILQ_REMOVE(&vnode_tmp_list, tvp, v_freelist); 456 TAILQ_INSERT_TAIL(&vnode_free_list, tvp, v_freelist); 457 simple_unlock(&tvp->v_interlock); 458 } 459 460 if (vp) { 461 vp->v_flag \|= VDOOMED; 462 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 463 freevnodes--; 464 simple_unlock(&vnode_free_list_slock); 465 cache_purge(vp); 466 vp->v_lease = NULL; 467 if (vp->v_type != VBAD) { 468 vgonel(vp, p); 469 } else { 470 simple_unlock(&vp->v_interlock); 471 } 472 473#ifdef INVARIANTS 474 { 475 int s; 476 477 if (vp->v_data) 478 panic("cleaned vnode isn't"); 479 s = splbio(); 480 if (vp->v_numoutput) 481 panic("Clean vnode has pending I/O's"); 482 splx(s); 483 } 484#endif 485 vp->v_flag = 0; 486 vp->v_lastr = 0; 487 vp->v_lastw = 0; 488 vp->v_lasta = 0; 489 vp->v_cstart = 0; 490 vp->v_clen = 0; 491 vp->v_socket = 0; 492 vp->v_writecount = 0; /* XXX / 493* vp->v_maxio = 0; 494 } else { 495 simple_unlock(&vnode_free_list_slock); 496 vp = (struct vnode ) zalloc(vnode_zone); 497* bzero((char ) vp, sizeof vp); 498 simple_lock_init(&vp->v_interlock); 499 vp->v_dd = vp; 500 cache_purge(vp); 501 LIST_INIT(&vp->v_cache_src); 502 TAILQ_INIT(&vp->v_cache_dst); 503 numvnodes++; 504 } 505 506 TAILQ_INIT(&vp->v_cleanblkhd); 507 TAILQ_INIT(&vp->v_dirtyblkhd); 508 vp->v_type = VNON; 509 vp->v_tag = tag; 510 vp->v_op = vops; 511 insmntque(vp, mp); 512 vpp = vp; 513* vp->v_usecount = 1; 514 vp->v_data = 0; 515 splx(s); 516 517 vfs_object_create(vp, p, p->p_ucred); 518 return (0); 519} 520 521/* 522 * Move a vnode from one mount queue to another. 523 / 524static void 525insmntque(vp, mp) 526* register struct vnode vp; 527* register struct mount mp; 528{ 529* 530 simple_lock(&mntvnode_slock); 531 /* 532 * Delete from old mount point vnode list, if on one. 533 / 534* if (vp->v_mount != NULL) 535 LIST_REMOVE(vp, v_mntvnodes); 536 /* 537 * Insert into list of vnodes for the new mount point, if available. 538 / 539* if ((vp->v_mount = mp) == NULL) { 540 simple_unlock(&mntvnode_slock); 541 return; 542 } 543 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 544 simple_unlock(&mntvnode_slock); 545} 546 547/* 548 * Update outstanding I/O count and do wakeup if requested. 549 / 550void 551vwakeup(bp) 552* register struct buf bp; 553{ 554* register struct vnode vp; 555* 556 bp->b_flags &= ~B_WRITEINPROG; 557 if ((vp = bp->b_vp)) { 558 vp->v_numoutput--; 559 if (vp->v_numoutput < 0) 560 panic("vwakeup: neg numoutput"); 561 if ((vp->v_numoutput == 0) && (vp->v_flag & VBWAIT)) { 562 vp->v_flag &= ~VBWAIT; 563 wakeup((caddr_t) &vp->v_numoutput); 564 } 565 } 566} 567 568/* 569 * Flush out and invalidate all buffers associated with a vnode. 570 * Called with the underlying object locked. 571 / 572int 573vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 574* register struct vnode vp; 575* int flags; 576 struct ucred cred; 577* struct proc p; 578* int slpflag, slptimeo; 579{ 580 register struct buf bp; 581* struct buf nbp, blist; 582 int s, error; 583 vm_object_t object; 584 585 if (flags & V_SAVE) { 586 s = splbio(); 587 while (vp->v_numoutput) { 588 vp->v_flag \|= VBWAIT; 589 error = tsleep((caddr_t)&vp->v_numoutput, 590 slpflag \| (PRIBIO + 1), "vinvlbuf", slptimeo); 591 if (error) { 592 splx(s); 593 return (error); 594 } 595 } 596 if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) { 597 splx(s); 598 if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) != 0) 599 return (error); 600 s = splbio(); 601 if (vp->v_numoutput > 0 \|\| 602 !TAILQ_EMPTY(&vp->v_dirtyblkhd)) 603 panic("vinvalbuf: dirty bufs"); 604 } 605 splx(s); 606 } 607 s = splbio(); 608 for (;;) { 609 blist = TAILQ_FIRST(&vp->v_cleanblkhd); 610 if (!blist) 611 blist = TAILQ_FIRST(&vp->v_dirtyblkhd); 612 if (!blist) 613 break; 614 615 for (bp = blist; bp; bp = nbp) { 616 nbp = TAILQ_NEXT(bp, b_vnbufs); 617 if (bp->b_flags & B_BUSY) { 618 bp->b_flags \|= B_WANTED; 619 error = tsleep((caddr_t) bp, 620 slpflag \| (PRIBIO + 4), "vinvalbuf", 621 slptimeo); 622 if (error) { 623 splx(s); 624 return (error); 625 } 626 break; 627 } 628 /* 629 * XXX Since there are no node locks for NFS, I 630 * believe there is a slight chance that a delayed 631 * write will occur while sleeping just above, so 632 * check for it. Note that vfs_bio_awrite expects 633 * buffers to reside on a queue, while VOP_BWRITE and 634 * brelse do not. 635 / 636* if (((bp->b_flags & (B_DELWRI \| B_INVAL)) == B_DELWRI) && 637 (flags & V_SAVE)) { 638 639 if (bp->b_vp == vp) { 640 if (bp->b_flags & B_CLUSTEROK) { 641 vfs_bio_awrite(bp); 642 } else { 643 bremfree(bp); 644 bp->b_flags \|= (B_BUSY \| B_ASYNC); 645 VOP_BWRITE(bp); 646 } 647 } else { 648 bremfree(bp); 649 bp->b_flags \|= B_BUSY; 650 (void) VOP_BWRITE(bp); 651 } 652 break; 653 } 654 bremfree(bp); 655 bp->b_flags \|= (B_INVAL \| B_NOCACHE \| B_RELBUF \| B_BUSY); 656 bp->b_flags &= ~B_ASYNC; 657 brelse(bp); 658 } 659 } 660 661 while (vp->v_numoutput > 0) { 662 vp->v_flag \|= VBWAIT; 663 tsleep(&vp->v_numoutput, PVM, "vnvlbv", 0); 664 } 665 666 splx(s); 667 668 /* 669 * Destroy the copy in the VM cache, too. 670 / 671* simple_lock(&vp->v_interlock); 672 object = vp->v_object; 673 if (object != NULL) { 674 vm_object_page_remove(object, 0, 0, 675 (flags & V_SAVE) ? TRUE : FALSE); 676 } 677 simple_unlock(&vp->v_interlock); 678 679 if (!TAILQ_EMPTY(&vp->v_dirtyblkhd) \|\| !TAILQ_EMPTY(&vp->v_cleanblkhd)) 680 panic("vinvalbuf: flush failed"); 681 return (0); 682} 683 684/* 685 * Truncate a file's buffer and pages to a specified length. This 686 * is in lieu of the old vinvalbuf mechanism, which performed unneeded 687 * sync activity. 688 / 689int 690vtruncbuf(vp, cred, p, length, blksize) 691* register struct vnode vp; 692* struct ucred cred; 693* struct proc p; 694* off_t length; 695 int blksize; 696{ 697 register struct buf bp; 698* struct buf nbp; 699* int s, anyfreed; 700 int trunclbn; 701 702 /* 703 * Round up to the next lbn. 704 / 705* trunclbn = (length + blksize - 1) / blksize; 706 707 s = splbio(); 708restart: 709 anyfreed = 1; 710 for (;anyfreed;) { 711 anyfreed = 0; 712 for (bp = TAILQ_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 713 nbp = TAILQ_NEXT(bp, b_vnbufs); 714 if (bp->b_lblkno >= trunclbn) { 715 if (bp->b_flags & B_BUSY) { 716 bp->b_flags \|= B_WANTED; 717 tsleep(bp, PRIBIO + 4, "vtrb1", 0); 718 goto restart; 719 } else { 720 bremfree(bp); 721 bp->b_flags \|= (B_BUSY \| B_INVAL \| B_RELBUF); 722 bp->b_flags &= ~B_ASYNC; 723 brelse(bp); 724 anyfreed = 1; 725 } 726 if (nbp && (((nbp->b_xflags & B_VNCLEAN) == 0)\|\| 727 (nbp->b_vp != vp) \|\| 728 (nbp->b_flags & B_DELWRI))) { 729 goto restart; 730 } 731 } 732 } 733 734 for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 735 nbp = TAILQ_NEXT(bp, b_vnbufs); 736 if (bp->b_lblkno >= trunclbn) { 737 if (bp->b_flags & B_BUSY) { 738 bp->b_flags \|= B_WANTED; 739 tsleep(bp, PRIBIO + 4, "vtrb2", 0); 740 goto restart; 741 } else { 742 bremfree(bp); 743 bp->b_flags \|= (B_BUSY \| B_INVAL \| B_RELBUF); 744 bp->b_flags &= ~B_ASYNC; 745 brelse(bp); 746 anyfreed = 1; 747 } 748 if (nbp && (((nbp->b_xflags & B_VNDIRTY) == 0)\|\| 749 (nbp->b_vp != vp) \|\| 750 (nbp->b_flags & B_DELWRI) == 0)) { 751 goto restart; 752 } 753 } 754 } 755 } 756 757 if (length > 0) { 758restartsync: 759 for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 760 nbp = TAILQ_NEXT(bp, b_vnbufs); 761 if ((bp->b_flags & B_DELWRI) && (bp->b_lblkno < 0)) { 762 if (bp->b_flags & B_BUSY) { 763 bp->b_flags \|= B_WANTED; 764 tsleep(bp, PRIBIO, "vtrb3", 0); 765 } else { 766 bremfree(bp); 767 bp->b_flags \|= B_BUSY; 768 if (bp->b_vp == vp) { 769 bp->b_flags \|= B_ASYNC; 770 } else { 771 bp->b_flags &= ~B_ASYNC; 772 } 773 VOP_BWRITE(bp); 774 } 775 goto restartsync; 776 } 777 778 } 779 } 780 781 while (vp->v_numoutput > 0) { 782 vp->v_flag \|= VBWAIT; 783 tsleep(&vp->v_numoutput, PVM, "vbtrunc", 0); 784 } 785 786 splx(s); 787 788 vnode_pager_setsize(vp, length); 789 790 return (0); 791} 792 793/* 794 * Associate a buffer with a vnode. 795 / 796void 797bgetvp(vp, bp) 798* register struct vnode vp; 799* register struct buf bp; 800{ 801* int s; 802 803 KASSERT(bp->b_vp == NULL, ("bgetvp: not free")); 804 805 vhold(vp); 806 bp->b_vp = vp; 807 if (vp->v_type == VBLK \|\| vp->v_type == VCHR) 808 bp->b_dev = vp->v_rdev; 809 else 810 bp->b_dev = NODEV; 811 /* 812 * Insert onto list for new vnode. 813 / 814* s = splbio(); 815 bp->b_xflags \|= B_VNCLEAN; 816 bp->b_xflags &= ~B_VNDIRTY; 817 TAILQ_INSERT_TAIL(&vp->v_cleanblkhd, bp, b_vnbufs); 818 splx(s); 819} 820 821/* 822 * Disassociate a buffer from a vnode. 823 / 824void 825brelvp(bp) 826* register struct buf bp; 827{ 828* struct vnode vp; 829* struct buflists listheadp; 830* int s; 831 832 KASSERT(bp->b_vp != NULL, ("brelvp: NULL")); 833 834 /* 835 * Delete from old vnode list, if on one. 836 / 837* vp = bp->b_vp; 838 s = splbio(); 839 if (bp->b_xflags & (B_VNDIRTY\|B_VNCLEAN)) { 840 if (bp->b_xflags & B_VNDIRTY) 841 listheadp = &vp->v_dirtyblkhd; 842 else 843 listheadp = &vp->v_cleanblkhd; 844 TAILQ_REMOVE(listheadp, bp, b_vnbufs); 845 bp->b_xflags &= ~(B_VNDIRTY\|B_VNCLEAN); 846 } 847 if ((vp->v_flag & VONWORKLST) && TAILQ_EMPTY(&vp->v_dirtyblkhd)) { 848 vp->v_flag &= ~VONWORKLST; 849 LIST_REMOVE(vp, v_synclist); 850 } 851 splx(s); 852 bp->b_vp = (struct vnode ) 0; 853* vdrop(vp); 854} 855 856/* 857 * The workitem queue. 858 * 859 * It is useful to delay writes of file data and filesystem metadata 860 * for tens of seconds so that quickly created and deleted files need 861 * not waste disk bandwidth being created and removed. To realize this, 862 * we append vnodes to a "workitem" queue. When running with a soft 863 * updates implementation, most pending metadata dependencies should 864 * not wait for more than a few seconds. Thus, mounted on block devices 865 * are delayed only about a half the time that file data is delayed. 866 * Similarly, directory updates are more critical, so are only delayed 867 * about a third the time that file data is delayed. Thus, there are 868 * SYNCER_MAXDELAY queues that are processed round-robin at a rate of 869 * one each second (driven off the filesystem syner process). The 870 * syncer_delayno variable indicates the next queue that is to be processed. 871 * Items that need to be processed soon are placed in this queue: 872 * 873 * syncer_workitem_pending[syncer_delayno] 874 * 875 * A delay of fifteen seconds is done by placing the request fifteen 876 * entries later in the queue: 877 * 878 * syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask] 879 * 880 / 881* 882/* 883 * Add an item to the syncer work queue. 884 / 885static void 886vn_syncer_add_to_worklist(struct vnode vp, int delay) 887{ 888 int s, slot; 889 890 s = splbio(); 891 892 if (vp->v_flag & VONWORKLST) { 893 LIST_REMOVE(vp, v_synclist); 894 } 895 896 if (delay > syncer_maxdelay - 2) 897 delay = syncer_maxdelay - 2; 898 slot = (syncer_delayno + delay) & syncer_mask; 899 900 LIST_INSERT_HEAD(&syncer_workitem_pending[slot], vp, v_synclist); 901 vp->v_flag \|= VONWORKLST; 902 splx(s); 903} 904 905struct proc updateproc; 906static void sched_sync __P((void)); 907static const struct kproc_desc up_kp = { 908* "syncer", 909 sched_sync, 910 &updateproc 911}; 912SYSINIT_KT(syncer, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp) 913 914/* 915 * System filesystem synchronizer daemon. 916 / 917void 918sched_sync(void) 919{ 920* struct synclist slp; 921* struct vnode vp; 922* long starttime; 923 int s; 924 struct proc p = updateproc; 925* 926 for (;;) { 927 starttime = time_second; 928 929 /* 930 * Push files whose dirty time has expired. Be careful 931 * of interrupt race on slp queue. 932 / 933* s = splbio(); 934 slp = &syncer_workitem_pending[syncer_delayno]; 935 syncer_delayno += 1; 936 if (syncer_delayno == syncer_maxdelay) 937 syncer_delayno = 0; 938 splx(s); 939 940 while ((vp = LIST_FIRST(slp)) != NULL) { 941 if (VOP_ISLOCKED(vp) == 0) { 942 vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY, p); 943 (void) VOP_FSYNC(vp, p->p_ucred, MNT_LAZY, p); 944 VOP_UNLOCK(vp, 0, p); 945 } 946 s = splbio(); 947 if (LIST_FIRST(slp) == vp) { 948 /* 949 * Note: v_tag VT_VFS vps can remain on the 950 * worklist too with no dirty blocks, but 951 * since sync_fsync() moves it to a different 952 * slot we are safe. 953 / 954* if (TAILQ_EMPTY(&vp->v_dirtyblkhd) && 955 vp->v_type != VBLK) 956 panic("sched_sync: fsync failed vp %p tag %d", vp, vp->v_tag); 957 /* 958 * Put us back on the worklist. The worklist 959 * routine will remove us from our current 960 * position and then add us back in at a later 961 * position. 962 / 963* vn_syncer_add_to_worklist(vp, syncdelay); 964 } 965 splx(s); 966 } 967 968 /* 969 * Do soft update processing. 970 / 971* if (bioops.io_sync) 972 (bioops.io_sync)(NULL); 973* 974 /* 975 * The variable rushjob allows the kernel to speed up the 976 * processing of the filesystem syncer process. A rushjob 977 * value of N tells the filesystem syncer to process the next 978 * N seconds worth of work on its queue ASAP. Currently rushjob 979 * is used by the soft update code to speed up the filesystem 980 * syncer process when the incore state is getting so far 981 * ahead of the disk that the kernel memory pool is being 982 * threatened with exhaustion. 983 / 984* if (rushjob > 0) { 985 rushjob -= 1; 986 continue; 987 } 988 /* 989 * If it has taken us less than a second to process the 990 * current work, then wait. Otherwise start right over 991 * again. We can still lose time if any single round 992 * takes more than two seconds, but it does not really 993 * matter as we are just trying to generally pace the 994 * filesystem activity. 995 / 996* if (time_second == starttime) 997 tsleep(&lbolt, PPAUSE, "syncer", 0); 998 } 999} 1000 1001/* 1002 * Associate a p-buffer with a vnode. 1003 * 1004 * Also sets B_PAGING flag to indicate that vnode is not fully associated 1005 * with the buffer. i.e. the bp has not been linked into the vnode or 1006 * ref-counted. 1007 / 1008void 1009pbgetvp(vp, bp) 1010* register struct vnode vp; 1011* register struct buf bp; 1012{ 1013* 1014 KASSERT(bp->b_vp == NULL, ("pbgetvp: not free")); 1015 1016 bp->b_vp = vp; 1017 bp->b_flags \|= B_PAGING; 1018 if (vp->v_type == VBLK \|\| vp->v_type == VCHR) 1019 bp->b_dev = vp->v_rdev; 1020 else 1021 bp->b_dev = NODEV; 1022} 1023 1024/* 1025 * Disassociate a p-buffer from a vnode. 1026 / 1027void 1028pbrelvp(bp) 1029* register struct buf bp; 1030{ 1031* 1032 KASSERT(bp->b_vp != NULL, ("pbrelvp: NULL")); 1033 1034#if !defined(MAX_PERF) 1035 /* XXX REMOVE ME / 1036* if (bp->b_vnbufs.tqe_next != NULL) { 1037 panic( 1038 "relpbuf(): b_vp was probably reassignbuf()d %p %x", 1039 bp, 1040 (int)bp->b_flags 1041 ); 1042 } 1043#endif 1044 bp->b_vp = (struct vnode ) 0; 1045* bp->b_flags &= ~B_PAGING; 1046} 1047 1048void 1049pbreassignbuf(bp, newvp) 1050 struct buf bp; 1051* struct vnode newvp; 1052{ 1053#if !defined(MAX_PERF) 1054* if ((bp->b_flags & B_PAGING) == 0) { 1055 panic( 1056 "pbreassignbuf() on non phys bp %p", 1057 bp 1058 ); 1059 } 1060#endif 1061 bp->b_vp = newvp; 1062} 1063 1064/* 1065 * Reassign a buffer from one vnode to another. 1066 * Used to assign file specific control information 1067 * (indirect blocks) to the vnode to which they belong. 1068 / 1069void 1070reassignbuf(bp, newvp) 1071* register struct buf bp; 1072* register struct vnode newvp; 1073{ 1074* struct buflists listheadp; 1075* int delay; 1076 int s; 1077 1078 if (newvp == NULL) { 1079 printf("reassignbuf: NULL"); 1080 return; 1081 } 1082 1083#if !defined(MAX_PERF) 1084 /* 1085 * B_PAGING flagged buffers cannot be reassigned because their vp 1086 * is not fully linked in. 1087 / 1088* if (bp->b_flags & B_PAGING) 1089 panic("cannot reassign paging buffer"); 1090#endif 1091 1092 s = splbio(); 1093 /* 1094 * Delete from old vnode list, if on one. 1095 / 1096* if (bp->b_xflags & (B_VNDIRTY\|B_VNCLEAN)) { 1097 if (bp->b_xflags & B_VNDIRTY) 1098 listheadp = &bp->b_vp->v_dirtyblkhd; 1099 else 1100 listheadp = &bp->b_vp->v_cleanblkhd; 1101 TAILQ_REMOVE(listheadp, bp, b_vnbufs); 1102 bp->b_xflags &= ~(B_VNDIRTY\|B_VNCLEAN); 1103 if (bp->b_vp != newvp) { 1104 vdrop(bp->b_vp); 1105 bp->b_vp = NULL; /* for clarification / 1106* } 1107 } 1108 /* 1109 * If dirty, put on list of dirty buffers; otherwise insert onto list 1110 * of clean buffers. 1111 / 1112* if (bp->b_flags & B_DELWRI) { 1113 struct buf tbp; 1114* 1115 listheadp = &newvp->v_dirtyblkhd; 1116 if ((newvp->v_flag & VONWORKLST) == 0) { 1117 switch (newvp->v_type) { 1118 case VDIR: 1119 delay = syncdelay / 3; 1120 break; 1121 case VBLK: 1122 if (newvp->v_specmountpoint != NULL) { 1123 delay = syncdelay / 2; 1124 break; 1125 } 1126 /* fall through / 1127* default: 1128 delay = syncdelay; 1129 } 1130 vn_syncer_add_to_worklist(newvp, delay); 1131 } 1132 bp->b_xflags \|= B_VNDIRTY; 1133 tbp = TAILQ_FIRST(listheadp); 1134 if (tbp == NULL \|\| 1135 (bp->b_lblkno >= 0 && tbp->b_lblkno > bp->b_lblkno)) { 1136 TAILQ_INSERT_HEAD(listheadp, bp, b_vnbufs); 1137 } else { 1138 if (bp->b_lblkno >= 0) { 1139 struct buf ttbp; 1140* while ((ttbp = TAILQ_NEXT(tbp, b_vnbufs)) && 1141 (ttbp->b_lblkno < bp->b_lblkno)) { 1142 tbp = ttbp; 1143 } 1144 TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs); 1145 } else { 1146 TAILQ_INSERT_TAIL(listheadp, bp, b_vnbufs); 1147 } 1148 } 1149 } else { 1150 bp->b_xflags \|= B_VNCLEAN; 1151 TAILQ_INSERT_TAIL(&newvp->v_cleanblkhd, bp, b_vnbufs); 1152 if ((newvp->v_flag & VONWORKLST) && 1153 TAILQ_EMPTY(&newvp->v_dirtyblkhd)) { 1154 newvp->v_flag &= ~VONWORKLST; 1155 LIST_REMOVE(newvp, v_synclist); 1156 } 1157 } 1158 if (bp->b_vp != newvp) { 1159 bp->b_vp = newvp; 1160 vhold(bp->b_vp); 1161 } 1162 splx(s); 1163} 1164 1165/* 1166 * Create a vnode for a block device. 1167 * Used for mounting the root file system. 1168 / 1169int 1170bdevvp(dev, vpp) 1171* dev_t dev; 1172 struct vnode *vpp; 1173{ 1174* register struct vnode vp; 1175* struct vnode nvp; 1176* int error; 1177 1178 /* XXX 255 is for mfs. / 1179* if (dev == NODEV \|\| (major(dev) != 255 && (major(dev) >= nblkdev \|\| 1180 bdevsw(dev) == NULL))) { 1181 vpp = NULLVP; 1182* return (ENXIO); 1183 } 1184 error = getnewvnode(VT_NON, (struct mount )0, spec_vnodeop_p, &nvp); 1185* if (error) { 1186 vpp = NULLVP; 1187* return (error); 1188 } 1189 vp = nvp; 1190 vp->v_type = VBLK;	40 / 41 42/ 43 * External virtual filesystem routines 44 / 45#include "opt_ddb.h" 46 47#include <sys/param.h> 48#include <sys/systm.h> 49#include <sys/conf.h> 50#include <sys/fcntl.h> 51#include <sys/kernel.h> 52#include <sys/proc.h> 53#include <sys/malloc.h> 54#include <sys/mount.h> 55#include <sys/socket.h> 56#include <sys/vnode.h> 57#include <sys/stat.h> 58#include <sys/buf.h> 59#include <sys/domain.h> 60#include <sys/dirent.h> 61#include <sys/vmmeter.h> 62 63#include <machine/limits.h> 64 65#include <vm/vm.h> 66#include <vm/vm_param.h> 67#include <vm/vm_prot.h> 68#include <vm/vm_object.h> 69#include <vm/vm_extern.h> 70#include <vm/pmap.h> 71#include <vm/vm_map.h> 72#include <vm/vm_page.h> 73#include <vm/vm_pager.h> 74#include <vm/vnode_pager.h> 75#include <vm/vm_zone.h> 76#include <sys/sysctl.h> 77 78#include <miscfs/specfs/specdev.h> 79 80static MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure"); 81 82static void insmntque __P((struct vnode vp, struct mount mp)); 83static void vclean __P((struct vnode vp, int flags, struct proc p)); 84static void vfree __P((struct vnode )); 85static void vgonel __P((struct vnode vp, struct proc p)); 86static unsigned long numvnodes; 87SYSCTL_INT(_debug, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, ""); 88 89enum vtype iftovt_tab[16] = { 90 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 91 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 92}; 93int vttoif_tab[9] = { 94 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 95 S_IFSOCK, S_IFIFO, S_IFMT, 96}; 97 98static TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list / 99struct tobefreelist vnode_tobefree_list; / vnode free list / 100* 101static u_long wantfreevnodes = 25; 102SYSCTL_INT(_debug, OID_AUTO, wantfreevnodes, CTLFLAG_RW, &wantfreevnodes, 0, ""); 103static u_long freevnodes = 0; 104SYSCTL_INT(_debug, OID_AUTO, freevnodes, CTLFLAG_RD, &freevnodes, 0, ""); 105 106int vfs_ioopt = 0; 107#ifdef ENABLE_VFS_IOOPT 108SYSCTL_INT(_vfs, OID_AUTO, ioopt, CTLFLAG_RW, &vfs_ioopt, 0, ""); 109#endif 110 111struct mntlist mountlist; /* mounted filesystem list / 112struct simplelock mountlist_slock; 113struct simplelock mntvnode_slock; 114int nfs_mount_type = -1; 115#ifndef NULL_SIMPLELOCKS 116static struct simplelock mntid_slock; 117static struct simplelock vnode_free_list_slock; 118static struct simplelock spechash_slock; 119#endif 120struct nfs_public nfs_pub; / publicly exported FS / 121static vm_zone_t vnode_zone; 122* 123/* 124 * The workitem queue. 125 / 126#define SYNCER_MAXDELAY 32 127static int syncer_maxdelay = SYNCER_MAXDELAY; / maximum delay time / 128time_t syncdelay = 30; 129int rushjob; / number of slots to run ASAP / 130* 131static int syncer_delayno = 0; 132static long syncer_mask; 133LIST_HEAD(synclist, vnode); 134static struct synclist syncer_workitem_pending; 135* 136int desiredvnodes; 137SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, 138 &desiredvnodes, 0, "Maximum number of vnodes"); 139 140static void vfs_free_addrlist __P((struct netexport nep)); 141static int vfs_free_netcred __P((struct radix_node rn, void w)); 142static int vfs_hang_addrlist __P((struct mount mp, struct netexport nep, 143* struct export_args argp)); 144* 145/* 146 * Initialize the vnode management data structures. 147 / 148void 149vntblinit() 150{ 151* 152 desiredvnodes = maxproc + cnt.v_page_count / 4; 153 simple_lock_init(&mntvnode_slock); 154 simple_lock_init(&mntid_slock); 155 simple_lock_init(&spechash_slock); 156 TAILQ_INIT(&vnode_free_list); 157 TAILQ_INIT(&vnode_tobefree_list); 158 simple_lock_init(&vnode_free_list_slock); 159 CIRCLEQ_INIT(&mountlist); 160 vnode_zone = zinit("VNODE", sizeof (struct vnode), 0, 0, 5); 161 /* 162 * Initialize the filesystem syncer. 163 / 164* syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE, 165 &syncer_mask); 166 syncer_maxdelay = syncer_mask + 1; 167} 168 169/* 170 * Mark a mount point as busy. Used to synchronize access and to delay 171 * unmounting. Interlock is not released on failure. 172 / 173int 174vfs_busy(mp, flags, interlkp, p) 175* struct mount mp; 176* int flags; 177 struct simplelock interlkp; 178* struct proc p; 179{ 180* int lkflags; 181 182 if (mp->mnt_kern_flag & MNTK_UNMOUNT) { 183 if (flags & LK_NOWAIT) 184 return (ENOENT); 185 mp->mnt_kern_flag \|= MNTK_MWAIT; 186 if (interlkp) { 187 simple_unlock(interlkp); 188 } 189 /* 190 * Since all busy locks are shared except the exclusive 191 * lock granted when unmounting, the only place that a 192 * wakeup needs to be done is at the release of the 193 * exclusive lock at the end of dounmount. 194 / 195* tsleep((caddr_t)mp, PVFS, "vfs_busy", 0); 196 if (interlkp) { 197 simple_lock(interlkp); 198 } 199 return (ENOENT); 200 } 201 lkflags = LK_SHARED \| LK_NOPAUSE; 202 if (interlkp) 203 lkflags \|= LK_INTERLOCK; 204 if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p)) 205 panic("vfs_busy: unexpected lock failure"); 206 return (0); 207} 208 209/* 210 * Free a busy filesystem. 211 / 212void 213vfs_unbusy(mp, p) 214* struct mount mp; 215* struct proc p; 216{ 217* 218 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p); 219} 220 221/* 222 * Lookup a filesystem type, and if found allocate and initialize 223 * a mount structure for it. 224 * 225 * Devname is usually updated by mount(8) after booting. 226 / 227int 228vfs_rootmountalloc(fstypename, devname, mpp) 229* char fstypename; 230* char devname; 231* struct mount *mpp; 232{ 233* struct proc p = curproc; / XXX / 234* struct vfsconf vfsp; 235* struct mount mp; 236* 237 if (fstypename == NULL) 238 return (ENODEV); 239 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 240 if (!strcmp(vfsp->vfc_name, fstypename)) 241 break; 242 if (vfsp == NULL) 243 return (ENODEV); 244 mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); 245 bzero((char )mp, (u_long)sizeof(struct mount)); 246* lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE); 247 (void)vfs_busy(mp, LK_NOWAIT, 0, p); 248 LIST_INIT(&mp->mnt_vnodelist); 249 mp->mnt_vfc = vfsp; 250 mp->mnt_op = vfsp->vfc_vfsops; 251 mp->mnt_flag = MNT_RDONLY; 252 mp->mnt_vnodecovered = NULLVP; 253 vfsp->vfc_refcount++; 254 mp->mnt_stat.f_type = vfsp->vfc_typenum; 255 mp->mnt_flag \|= vfsp->vfc_flags & MNT_VISFLAGMASK; 256 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 257 mp->mnt_stat.f_mntonname[0] = '/'; 258 mp->mnt_stat.f_mntonname[1] = 0; 259 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); 260 mpp = mp; 261* return (0); 262} 263 264/* 265 * Find an appropriate filesystem to use for the root. If a filesystem 266 * has not been preselected, walk through the list of known filesystems 267 * trying those that have mountroot routines, and try them until one 268 * works or we have tried them all. 269 / 270#ifdef notdef / XXX JH / 271int 272lite2_vfs_mountroot() 273{ 274* struct vfsconf vfsp; 275* extern int (lite2_mountroot) __P((void)); 276* int error; 277 278 if (lite2_mountroot != NULL) 279 return ((lite2_mountroot)()); 280* for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 281 if (vfsp->vfc_mountroot == NULL) 282 continue; 283 if ((error = (vfsp->vfc_mountroot)()) == 0) 284* return (0); 285 printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error); 286 } 287 return (ENODEV); 288} 289#endif 290 291/* 292 * Lookup a mount point by filesystem identifier. 293 / 294struct mount 295vfs_getvfs(fsid) 296 fsid_t fsid; 297{ 298* register struct mount mp; 299* 300 simple_lock(&mountlist_slock); 301 for (mp = mountlist.cqh_first; mp != (void )&mountlist; 302* mp = mp->mnt_list.cqe_next) { 303 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 304 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { 305 simple_unlock(&mountlist_slock); 306 return (mp); 307 } 308 } 309 simple_unlock(&mountlist_slock); 310 return ((struct mount ) 0); 311} 312* 313/* 314 * Get a new unique fsid 315 / 316void 317vfs_getnewfsid(mp) 318* struct mount mp; 319{ 320* static u_short xxxfs_mntid; 321 322 fsid_t tfsid; 323 int mtype; 324 325 simple_lock(&mntid_slock); 326 mtype = mp->mnt_vfc->vfc_typenum; 327 mp->mnt_stat.f_fsid.val[0] = (nblkdev + mtype) * 256; 328 mp->mnt_stat.f_fsid.val[1] = mtype; 329 if (xxxfs_mntid == 0) 330 ++xxxfs_mntid; 331 tfsid.val[0] = (nblkdev + mtype) * 256 \| xxxfs_mntid; 332 tfsid.val[1] = mtype; 333 if (mountlist.cqh_first != (void )&mountlist) { 334* while (vfs_getvfs(&tfsid)) { 335 tfsid.val[0]++; 336 xxxfs_mntid++; 337 } 338 } 339 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 340 simple_unlock(&mntid_slock); 341} 342 343/* 344 * Set vnode attributes to VNOVAL 345 / 346void 347vattr_null(vap) 348* register struct vattr vap; 349{ 350* 351 vap->va_type = VNON; 352 vap->va_size = VNOVAL; 353 vap->va_bytes = VNOVAL; 354 vap->va_mode = VNOVAL; 355 vap->va_nlink = VNOVAL; 356 vap->va_uid = VNOVAL; 357 vap->va_gid = VNOVAL; 358 vap->va_fsid = VNOVAL; 359 vap->va_fileid = VNOVAL; 360 vap->va_blocksize = VNOVAL; 361 vap->va_rdev = VNOVAL; 362 vap->va_atime.tv_sec = VNOVAL; 363 vap->va_atime.tv_nsec = VNOVAL; 364 vap->va_mtime.tv_sec = VNOVAL; 365 vap->va_mtime.tv_nsec = VNOVAL; 366 vap->va_ctime.tv_sec = VNOVAL; 367 vap->va_ctime.tv_nsec = VNOVAL; 368 vap->va_flags = VNOVAL; 369 vap->va_gen = VNOVAL; 370 vap->va_vaflags = 0; 371} 372 373/* 374 * Routines having to do with the management of the vnode table. 375 / 376extern vop_t dead_vnodeop_p; 377* 378/* 379 * Return the next vnode from the free list. 380 / 381int 382getnewvnode(tag, mp, vops, vpp) 383* enum vtagtype tag; 384 struct mount mp; 385* vop_t *vops; 386* struct vnode *vpp; 387{ 388* int s; 389 struct proc p = curproc; / XXX / 390* struct vnode vp, tvp, nvp; 391* vm_object_t object; 392 TAILQ_HEAD(freelst, vnode) vnode_tmp_list; 393 394 /* 395 * We take the least recently used vnode from the freelist 396 * if we can get it and it has no cached pages, and no 397 * namecache entries are relative to it. 398 * Otherwise we allocate a new vnode 399 / 400* 401 s = splbio(); 402 simple_lock(&vnode_free_list_slock); 403 TAILQ_INIT(&vnode_tmp_list); 404 405 for (vp = TAILQ_FIRST(&vnode_tobefree_list); vp; vp = nvp) { 406 nvp = TAILQ_NEXT(vp, v_freelist); 407 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 408 if (vp->v_flag & VAGE) { 409 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 410 } else { 411 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 412 } 413 vp->v_flag &= ~(VTBFREE\|VAGE); 414 vp->v_flag \|= VFREE; 415 if (vp->v_usecount) 416 panic("tobe free vnode isn't"); 417 freevnodes++; 418 } 419 420 if (wantfreevnodes && freevnodes < wantfreevnodes) { 421 vp = NULL; 422 } else if (!wantfreevnodes && freevnodes <= desiredvnodes) { 423 /* 424 * XXX: this is only here to be backwards compatible 425 / 426* vp = NULL; 427 } else { 428 for (vp = TAILQ_FIRST(&vnode_free_list); vp; vp = nvp) { 429 nvp = TAILQ_NEXT(vp, v_freelist); 430 if (!simple_lock_try(&vp->v_interlock)) 431 continue; 432 if (vp->v_usecount) 433 panic("free vnode isn't"); 434 435 object = vp->v_object; 436 if (object && (object->resident_page_count \|\| object->ref_count)) { 437 printf("object inconsistant state: RPC: %d, RC: %d\n", 438 object->resident_page_count, object->ref_count); 439 /* Don't recycle if it's caching some pages / 440* TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 441 TAILQ_INSERT_TAIL(&vnode_tmp_list, vp, v_freelist); 442 continue; 443 } else if (LIST_FIRST(&vp->v_cache_src)) { 444 /* Don't recycle if active in the namecache / 445* simple_unlock(&vp->v_interlock); 446 continue; 447 } else { 448 break; 449 } 450 } 451 } 452 453 for (tvp = TAILQ_FIRST(&vnode_tmp_list); tvp; tvp = nvp) { 454 nvp = TAILQ_NEXT(tvp, v_freelist); 455 TAILQ_REMOVE(&vnode_tmp_list, tvp, v_freelist); 456 TAILQ_INSERT_TAIL(&vnode_free_list, tvp, v_freelist); 457 simple_unlock(&tvp->v_interlock); 458 } 459 460 if (vp) { 461 vp->v_flag \|= VDOOMED; 462 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 463 freevnodes--; 464 simple_unlock(&vnode_free_list_slock); 465 cache_purge(vp); 466 vp->v_lease = NULL; 467 if (vp->v_type != VBAD) { 468 vgonel(vp, p); 469 } else { 470 simple_unlock(&vp->v_interlock); 471 } 472 473#ifdef INVARIANTS 474 { 475 int s; 476 477 if (vp->v_data) 478 panic("cleaned vnode isn't"); 479 s = splbio(); 480 if (vp->v_numoutput) 481 panic("Clean vnode has pending I/O's"); 482 splx(s); 483 } 484#endif 485 vp->v_flag = 0; 486 vp->v_lastr = 0; 487 vp->v_lastw = 0; 488 vp->v_lasta = 0; 489 vp->v_cstart = 0; 490 vp->v_clen = 0; 491 vp->v_socket = 0; 492 vp->v_writecount = 0; /* XXX / 493* vp->v_maxio = 0; 494 } else { 495 simple_unlock(&vnode_free_list_slock); 496 vp = (struct vnode ) zalloc(vnode_zone); 497* bzero((char ) vp, sizeof vp); 498 simple_lock_init(&vp->v_interlock); 499 vp->v_dd = vp; 500 cache_purge(vp); 501 LIST_INIT(&vp->v_cache_src); 502 TAILQ_INIT(&vp->v_cache_dst); 503 numvnodes++; 504 } 505 506 TAILQ_INIT(&vp->v_cleanblkhd); 507 TAILQ_INIT(&vp->v_dirtyblkhd); 508 vp->v_type = VNON; 509 vp->v_tag = tag; 510 vp->v_op = vops; 511 insmntque(vp, mp); 512 vpp = vp; 513* vp->v_usecount = 1; 514 vp->v_data = 0; 515 splx(s); 516 517 vfs_object_create(vp, p, p->p_ucred); 518 return (0); 519} 520 521/* 522 * Move a vnode from one mount queue to another. 523 / 524static void 525insmntque(vp, mp) 526* register struct vnode vp; 527* register struct mount mp; 528{ 529* 530 simple_lock(&mntvnode_slock); 531 /* 532 * Delete from old mount point vnode list, if on one. 533 / 534* if (vp->v_mount != NULL) 535 LIST_REMOVE(vp, v_mntvnodes); 536 /* 537 * Insert into list of vnodes for the new mount point, if available. 538 / 539* if ((vp->v_mount = mp) == NULL) { 540 simple_unlock(&mntvnode_slock); 541 return; 542 } 543 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 544 simple_unlock(&mntvnode_slock); 545} 546 547/* 548 * Update outstanding I/O count and do wakeup if requested. 549 / 550void 551vwakeup(bp) 552* register struct buf bp; 553{ 554* register struct vnode vp; 555* 556 bp->b_flags &= ~B_WRITEINPROG; 557 if ((vp = bp->b_vp)) { 558 vp->v_numoutput--; 559 if (vp->v_numoutput < 0) 560 panic("vwakeup: neg numoutput"); 561 if ((vp->v_numoutput == 0) && (vp->v_flag & VBWAIT)) { 562 vp->v_flag &= ~VBWAIT; 563 wakeup((caddr_t) &vp->v_numoutput); 564 } 565 } 566} 567 568/* 569 * Flush out and invalidate all buffers associated with a vnode. 570 * Called with the underlying object locked. 571 / 572int 573vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 574* register struct vnode vp; 575* int flags; 576 struct ucred cred; 577* struct proc p; 578* int slpflag, slptimeo; 579{ 580 register struct buf bp; 581* struct buf nbp, blist; 582 int s, error; 583 vm_object_t object; 584 585 if (flags & V_SAVE) { 586 s = splbio(); 587 while (vp->v_numoutput) { 588 vp->v_flag \|= VBWAIT; 589 error = tsleep((caddr_t)&vp->v_numoutput, 590 slpflag \| (PRIBIO + 1), "vinvlbuf", slptimeo); 591 if (error) { 592 splx(s); 593 return (error); 594 } 595 } 596 if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) { 597 splx(s); 598 if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) != 0) 599 return (error); 600 s = splbio(); 601 if (vp->v_numoutput > 0 \|\| 602 !TAILQ_EMPTY(&vp->v_dirtyblkhd)) 603 panic("vinvalbuf: dirty bufs"); 604 } 605 splx(s); 606 } 607 s = splbio(); 608 for (;;) { 609 blist = TAILQ_FIRST(&vp->v_cleanblkhd); 610 if (!blist) 611 blist = TAILQ_FIRST(&vp->v_dirtyblkhd); 612 if (!blist) 613 break; 614 615 for (bp = blist; bp; bp = nbp) { 616 nbp = TAILQ_NEXT(bp, b_vnbufs); 617 if (bp->b_flags & B_BUSY) { 618 bp->b_flags \|= B_WANTED; 619 error = tsleep((caddr_t) bp, 620 slpflag \| (PRIBIO + 4), "vinvalbuf", 621 slptimeo); 622 if (error) { 623 splx(s); 624 return (error); 625 } 626 break; 627 } 628 /* 629 * XXX Since there are no node locks for NFS, I 630 * believe there is a slight chance that a delayed 631 * write will occur while sleeping just above, so 632 * check for it. Note that vfs_bio_awrite expects 633 * buffers to reside on a queue, while VOP_BWRITE and 634 * brelse do not. 635 / 636* if (((bp->b_flags & (B_DELWRI \| B_INVAL)) == B_DELWRI) && 637 (flags & V_SAVE)) { 638 639 if (bp->b_vp == vp) { 640 if (bp->b_flags & B_CLUSTEROK) { 641 vfs_bio_awrite(bp); 642 } else { 643 bremfree(bp); 644 bp->b_flags \|= (B_BUSY \| B_ASYNC); 645 VOP_BWRITE(bp); 646 } 647 } else { 648 bremfree(bp); 649 bp->b_flags \|= B_BUSY; 650 (void) VOP_BWRITE(bp); 651 } 652 break; 653 } 654 bremfree(bp); 655 bp->b_flags \|= (B_INVAL \| B_NOCACHE \| B_RELBUF \| B_BUSY); 656 bp->b_flags &= ~B_ASYNC; 657 brelse(bp); 658 } 659 } 660 661 while (vp->v_numoutput > 0) { 662 vp->v_flag \|= VBWAIT; 663 tsleep(&vp->v_numoutput, PVM, "vnvlbv", 0); 664 } 665 666 splx(s); 667 668 /* 669 * Destroy the copy in the VM cache, too. 670 / 671* simple_lock(&vp->v_interlock); 672 object = vp->v_object; 673 if (object != NULL) { 674 vm_object_page_remove(object, 0, 0, 675 (flags & V_SAVE) ? TRUE : FALSE); 676 } 677 simple_unlock(&vp->v_interlock); 678 679 if (!TAILQ_EMPTY(&vp->v_dirtyblkhd) \|\| !TAILQ_EMPTY(&vp->v_cleanblkhd)) 680 panic("vinvalbuf: flush failed"); 681 return (0); 682} 683 684/* 685 * Truncate a file's buffer and pages to a specified length. This 686 * is in lieu of the old vinvalbuf mechanism, which performed unneeded 687 * sync activity. 688 / 689int 690vtruncbuf(vp, cred, p, length, blksize) 691* register struct vnode vp; 692* struct ucred cred; 693* struct proc p; 694* off_t length; 695 int blksize; 696{ 697 register struct buf bp; 698* struct buf nbp; 699* int s, anyfreed; 700 int trunclbn; 701 702 /* 703 * Round up to the next lbn. 704 / 705* trunclbn = (length + blksize - 1) / blksize; 706 707 s = splbio(); 708restart: 709 anyfreed = 1; 710 for (;anyfreed;) { 711 anyfreed = 0; 712 for (bp = TAILQ_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 713 nbp = TAILQ_NEXT(bp, b_vnbufs); 714 if (bp->b_lblkno >= trunclbn) { 715 if (bp->b_flags & B_BUSY) { 716 bp->b_flags \|= B_WANTED; 717 tsleep(bp, PRIBIO + 4, "vtrb1", 0); 718 goto restart; 719 } else { 720 bremfree(bp); 721 bp->b_flags \|= (B_BUSY \| B_INVAL \| B_RELBUF); 722 bp->b_flags &= ~B_ASYNC; 723 brelse(bp); 724 anyfreed = 1; 725 } 726 if (nbp && (((nbp->b_xflags & B_VNCLEAN) == 0)\|\| 727 (nbp->b_vp != vp) \|\| 728 (nbp->b_flags & B_DELWRI))) { 729 goto restart; 730 } 731 } 732 } 733 734 for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 735 nbp = TAILQ_NEXT(bp, b_vnbufs); 736 if (bp->b_lblkno >= trunclbn) { 737 if (bp->b_flags & B_BUSY) { 738 bp->b_flags \|= B_WANTED; 739 tsleep(bp, PRIBIO + 4, "vtrb2", 0); 740 goto restart; 741 } else { 742 bremfree(bp); 743 bp->b_flags \|= (B_BUSY \| B_INVAL \| B_RELBUF); 744 bp->b_flags &= ~B_ASYNC; 745 brelse(bp); 746 anyfreed = 1; 747 } 748 if (nbp && (((nbp->b_xflags & B_VNDIRTY) == 0)\|\| 749 (nbp->b_vp != vp) \|\| 750 (nbp->b_flags & B_DELWRI) == 0)) { 751 goto restart; 752 } 753 } 754 } 755 } 756 757 if (length > 0) { 758restartsync: 759 for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 760 nbp = TAILQ_NEXT(bp, b_vnbufs); 761 if ((bp->b_flags & B_DELWRI) && (bp->b_lblkno < 0)) { 762 if (bp->b_flags & B_BUSY) { 763 bp->b_flags \|= B_WANTED; 764 tsleep(bp, PRIBIO, "vtrb3", 0); 765 } else { 766 bremfree(bp); 767 bp->b_flags \|= B_BUSY; 768 if (bp->b_vp == vp) { 769 bp->b_flags \|= B_ASYNC; 770 } else { 771 bp->b_flags &= ~B_ASYNC; 772 } 773 VOP_BWRITE(bp); 774 } 775 goto restartsync; 776 } 777 778 } 779 } 780 781 while (vp->v_numoutput > 0) { 782 vp->v_flag \|= VBWAIT; 783 tsleep(&vp->v_numoutput, PVM, "vbtrunc", 0); 784 } 785 786 splx(s); 787 788 vnode_pager_setsize(vp, length); 789 790 return (0); 791} 792 793/* 794 * Associate a buffer with a vnode. 795 / 796void 797bgetvp(vp, bp) 798* register struct vnode vp; 799* register struct buf bp; 800{ 801* int s; 802 803 KASSERT(bp->b_vp == NULL, ("bgetvp: not free")); 804 805 vhold(vp); 806 bp->b_vp = vp; 807 if (vp->v_type == VBLK \|\| vp->v_type == VCHR) 808 bp->b_dev = vp->v_rdev; 809 else 810 bp->b_dev = NODEV; 811 /* 812 * Insert onto list for new vnode. 813 / 814* s = splbio(); 815 bp->b_xflags \|= B_VNCLEAN; 816 bp->b_xflags &= ~B_VNDIRTY; 817 TAILQ_INSERT_TAIL(&vp->v_cleanblkhd, bp, b_vnbufs); 818 splx(s); 819} 820 821/* 822 * Disassociate a buffer from a vnode. 823 / 824void 825brelvp(bp) 826* register struct buf bp; 827{ 828* struct vnode vp; 829* struct buflists listheadp; 830* int s; 831 832 KASSERT(bp->b_vp != NULL, ("brelvp: NULL")); 833 834 /* 835 * Delete from old vnode list, if on one. 836 / 837* vp = bp->b_vp; 838 s = splbio(); 839 if (bp->b_xflags & (B_VNDIRTY\|B_VNCLEAN)) { 840 if (bp->b_xflags & B_VNDIRTY) 841 listheadp = &vp->v_dirtyblkhd; 842 else 843 listheadp = &vp->v_cleanblkhd; 844 TAILQ_REMOVE(listheadp, bp, b_vnbufs); 845 bp->b_xflags &= ~(B_VNDIRTY\|B_VNCLEAN); 846 } 847 if ((vp->v_flag & VONWORKLST) && TAILQ_EMPTY(&vp->v_dirtyblkhd)) { 848 vp->v_flag &= ~VONWORKLST; 849 LIST_REMOVE(vp, v_synclist); 850 } 851 splx(s); 852 bp->b_vp = (struct vnode ) 0; 853* vdrop(vp); 854} 855 856/* 857 * The workitem queue. 858 * 859 * It is useful to delay writes of file data and filesystem metadata 860 * for tens of seconds so that quickly created and deleted files need 861 * not waste disk bandwidth being created and removed. To realize this, 862 * we append vnodes to a "workitem" queue. When running with a soft 863 * updates implementation, most pending metadata dependencies should 864 * not wait for more than a few seconds. Thus, mounted on block devices 865 * are delayed only about a half the time that file data is delayed. 866 * Similarly, directory updates are more critical, so are only delayed 867 * about a third the time that file data is delayed. Thus, there are 868 * SYNCER_MAXDELAY queues that are processed round-robin at a rate of 869 * one each second (driven off the filesystem syner process). The 870 * syncer_delayno variable indicates the next queue that is to be processed. 871 * Items that need to be processed soon are placed in this queue: 872 * 873 * syncer_workitem_pending[syncer_delayno] 874 * 875 * A delay of fifteen seconds is done by placing the request fifteen 876 * entries later in the queue: 877 * 878 * syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask] 879 * 880 / 881* 882/* 883 * Add an item to the syncer work queue. 884 / 885static void 886vn_syncer_add_to_worklist(struct vnode vp, int delay) 887{ 888 int s, slot; 889 890 s = splbio(); 891 892 if (vp->v_flag & VONWORKLST) { 893 LIST_REMOVE(vp, v_synclist); 894 } 895 896 if (delay > syncer_maxdelay - 2) 897 delay = syncer_maxdelay - 2; 898 slot = (syncer_delayno + delay) & syncer_mask; 899 900 LIST_INSERT_HEAD(&syncer_workitem_pending[slot], vp, v_synclist); 901 vp->v_flag \|= VONWORKLST; 902 splx(s); 903} 904 905struct proc updateproc; 906static void sched_sync __P((void)); 907static const struct kproc_desc up_kp = { 908* "syncer", 909 sched_sync, 910 &updateproc 911}; 912SYSINIT_KT(syncer, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp) 913 914/* 915 * System filesystem synchronizer daemon. 916 / 917void 918sched_sync(void) 919{ 920* struct synclist slp; 921* struct vnode vp; 922* long starttime; 923 int s; 924 struct proc p = updateproc; 925* 926 for (;;) { 927 starttime = time_second; 928 929 /* 930 * Push files whose dirty time has expired. Be careful 931 * of interrupt race on slp queue. 932 / 933* s = splbio(); 934 slp = &syncer_workitem_pending[syncer_delayno]; 935 syncer_delayno += 1; 936 if (syncer_delayno == syncer_maxdelay) 937 syncer_delayno = 0; 938 splx(s); 939 940 while ((vp = LIST_FIRST(slp)) != NULL) { 941 if (VOP_ISLOCKED(vp) == 0) { 942 vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY, p); 943 (void) VOP_FSYNC(vp, p->p_ucred, MNT_LAZY, p); 944 VOP_UNLOCK(vp, 0, p); 945 } 946 s = splbio(); 947 if (LIST_FIRST(slp) == vp) { 948 /* 949 * Note: v_tag VT_VFS vps can remain on the 950 * worklist too with no dirty blocks, but 951 * since sync_fsync() moves it to a different 952 * slot we are safe. 953 / 954* if (TAILQ_EMPTY(&vp->v_dirtyblkhd) && 955 vp->v_type != VBLK) 956 panic("sched_sync: fsync failed vp %p tag %d", vp, vp->v_tag); 957 /* 958 * Put us back on the worklist. The worklist 959 * routine will remove us from our current 960 * position and then add us back in at a later 961 * position. 962 / 963* vn_syncer_add_to_worklist(vp, syncdelay); 964 } 965 splx(s); 966 } 967 968 /* 969 * Do soft update processing. 970 / 971* if (bioops.io_sync) 972 (bioops.io_sync)(NULL); 973* 974 /* 975 * The variable rushjob allows the kernel to speed up the 976 * processing of the filesystem syncer process. A rushjob 977 * value of N tells the filesystem syncer to process the next 978 * N seconds worth of work on its queue ASAP. Currently rushjob 979 * is used by the soft update code to speed up the filesystem 980 * syncer process when the incore state is getting so far 981 * ahead of the disk that the kernel memory pool is being 982 * threatened with exhaustion. 983 / 984* if (rushjob > 0) { 985 rushjob -= 1; 986 continue; 987 } 988 /* 989 * If it has taken us less than a second to process the 990 * current work, then wait. Otherwise start right over 991 * again. We can still lose time if any single round 992 * takes more than two seconds, but it does not really 993 * matter as we are just trying to generally pace the 994 * filesystem activity. 995 / 996* if (time_second == starttime) 997 tsleep(&lbolt, PPAUSE, "syncer", 0); 998 } 999} 1000 1001/* 1002 * Associate a p-buffer with a vnode. 1003 * 1004 * Also sets B_PAGING flag to indicate that vnode is not fully associated 1005 * with the buffer. i.e. the bp has not been linked into the vnode or 1006 * ref-counted. 1007 / 1008void 1009pbgetvp(vp, bp) 1010* register struct vnode vp; 1011* register struct buf bp; 1012{ 1013* 1014 KASSERT(bp->b_vp == NULL, ("pbgetvp: not free")); 1015 1016 bp->b_vp = vp; 1017 bp->b_flags \|= B_PAGING; 1018 if (vp->v_type == VBLK \|\| vp->v_type == VCHR) 1019 bp->b_dev = vp->v_rdev; 1020 else 1021 bp->b_dev = NODEV; 1022} 1023 1024/* 1025 * Disassociate a p-buffer from a vnode. 1026 / 1027void 1028pbrelvp(bp) 1029* register struct buf bp; 1030{ 1031* 1032 KASSERT(bp->b_vp != NULL, ("pbrelvp: NULL")); 1033 1034#if !defined(MAX_PERF) 1035 /* XXX REMOVE ME / 1036* if (bp->b_vnbufs.tqe_next != NULL) { 1037 panic( 1038 "relpbuf(): b_vp was probably reassignbuf()d %p %x", 1039 bp, 1040 (int)bp->b_flags 1041 ); 1042 } 1043#endif 1044 bp->b_vp = (struct vnode ) 0; 1045* bp->b_flags &= ~B_PAGING; 1046} 1047 1048void 1049pbreassignbuf(bp, newvp) 1050 struct buf bp; 1051* struct vnode newvp; 1052{ 1053#if !defined(MAX_PERF) 1054* if ((bp->b_flags & B_PAGING) == 0) { 1055 panic( 1056 "pbreassignbuf() on non phys bp %p", 1057 bp 1058 ); 1059 } 1060#endif 1061 bp->b_vp = newvp; 1062} 1063 1064/* 1065 * Reassign a buffer from one vnode to another. 1066 * Used to assign file specific control information 1067 * (indirect blocks) to the vnode to which they belong. 1068 / 1069void 1070reassignbuf(bp, newvp) 1071* register struct buf bp; 1072* register struct vnode newvp; 1073{ 1074* struct buflists listheadp; 1075* int delay; 1076 int s; 1077 1078 if (newvp == NULL) { 1079 printf("reassignbuf: NULL"); 1080 return; 1081 } 1082 1083#if !defined(MAX_PERF) 1084 /* 1085 * B_PAGING flagged buffers cannot be reassigned because their vp 1086 * is not fully linked in. 1087 / 1088* if (bp->b_flags & B_PAGING) 1089 panic("cannot reassign paging buffer"); 1090#endif 1091 1092 s = splbio(); 1093 /* 1094 * Delete from old vnode list, if on one. 1095 / 1096* if (bp->b_xflags & (B_VNDIRTY\|B_VNCLEAN)) { 1097 if (bp->b_xflags & B_VNDIRTY) 1098 listheadp = &bp->b_vp->v_dirtyblkhd; 1099 else 1100 listheadp = &bp->b_vp->v_cleanblkhd; 1101 TAILQ_REMOVE(listheadp, bp, b_vnbufs); 1102 bp->b_xflags &= ~(B_VNDIRTY\|B_VNCLEAN); 1103 if (bp->b_vp != newvp) { 1104 vdrop(bp->b_vp); 1105 bp->b_vp = NULL; /* for clarification / 1106* } 1107 } 1108 /* 1109 * If dirty, put on list of dirty buffers; otherwise insert onto list 1110 * of clean buffers. 1111 / 1112* if (bp->b_flags & B_DELWRI) { 1113 struct buf tbp; 1114* 1115 listheadp = &newvp->v_dirtyblkhd; 1116 if ((newvp->v_flag & VONWORKLST) == 0) { 1117 switch (newvp->v_type) { 1118 case VDIR: 1119 delay = syncdelay / 3; 1120 break; 1121 case VBLK: 1122 if (newvp->v_specmountpoint != NULL) { 1123 delay = syncdelay / 2; 1124 break; 1125 } 1126 /* fall through / 1127* default: 1128 delay = syncdelay; 1129 } 1130 vn_syncer_add_to_worklist(newvp, delay); 1131 } 1132 bp->b_xflags \|= B_VNDIRTY; 1133 tbp = TAILQ_FIRST(listheadp); 1134 if (tbp == NULL \|\| 1135 (bp->b_lblkno >= 0 && tbp->b_lblkno > bp->b_lblkno)) { 1136 TAILQ_INSERT_HEAD(listheadp, bp, b_vnbufs); 1137 } else { 1138 if (bp->b_lblkno >= 0) { 1139 struct buf ttbp; 1140* while ((ttbp = TAILQ_NEXT(tbp, b_vnbufs)) && 1141 (ttbp->b_lblkno < bp->b_lblkno)) { 1142 tbp = ttbp; 1143 } 1144 TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs); 1145 } else { 1146 TAILQ_INSERT_TAIL(listheadp, bp, b_vnbufs); 1147 } 1148 } 1149 } else { 1150 bp->b_xflags \|= B_VNCLEAN; 1151 TAILQ_INSERT_TAIL(&newvp->v_cleanblkhd, bp, b_vnbufs); 1152 if ((newvp->v_flag & VONWORKLST) && 1153 TAILQ_EMPTY(&newvp->v_dirtyblkhd)) { 1154 newvp->v_flag &= ~VONWORKLST; 1155 LIST_REMOVE(newvp, v_synclist); 1156 } 1157 } 1158 if (bp->b_vp != newvp) { 1159 bp->b_vp = newvp; 1160 vhold(bp->b_vp); 1161 } 1162 splx(s); 1163} 1164 1165/* 1166 * Create a vnode for a block device. 1167 * Used for mounting the root file system. 1168 / 1169int 1170bdevvp(dev, vpp) 1171* dev_t dev; 1172 struct vnode *vpp; 1173{ 1174* register struct vnode vp; 1175* struct vnode nvp; 1176* int error; 1177 1178 /* XXX 255 is for mfs. / 1179* if (dev == NODEV \|\| (major(dev) != 255 && (major(dev) >= nblkdev \|\| 1180 bdevsw(dev) == NULL))) { 1181 vpp = NULLVP; 1182* return (ENXIO); 1183 } 1184 error = getnewvnode(VT_NON, (struct mount )0, spec_vnodeop_p, &nvp); 1185* if (error) { 1186 vpp = NULLVP; 1187* return (error); 1188 } 1189 vp = nvp; 1190 vp->v_type = VBLK;
1191 if ((nvp = checkalias(vp, dev, (struct mount *)0)) != NULL) {	1191 if ((nvp = checkalias(vp, dev2udev(dev), (struct mount *)0)) != NULL) {
1192 vput(vp); 1193 vp = nvp; 1194 } 1195 vpp = vp; 1196* return (0); 1197} 1198 1199/* 1200 * Check to see if the new vnode represents a special device 1201 * for which we already have a vnode (either because of 1202 * bdevvp() or because of a different vnode representing 1203 * the same block device). If such an alias exists, deallocate 1204 * the existing contents and return the aliased vnode. The 1205 * caller is responsible for filling it with its new contents. 1206 / 1207struct vnode 1208checkalias(nvp, nvp_rdev, mp) 1209 register struct vnode *nvp;	1192 vput(vp); 1193 vp = nvp; 1194 } 1195 vpp = vp; 1196* return (0); 1197} 1198 1199/* 1200 * Check to see if the new vnode represents a special device 1201 * for which we already have a vnode (either because of 1202 * bdevvp() or because of a different vnode representing 1203 * the same block device). If such an alias exists, deallocate 1204 * the existing contents and return the aliased vnode. The 1205 * caller is responsible for filling it with its new contents. 1206 / 1207struct vnode 1208checkalias(nvp, nvp_rdev, mp) 1209 register struct vnode *nvp;
1210 dev_t nvp_rdev;	1210 udev_t nvp_rdev;
1211 struct mount mp; 1212{ 1213* struct proc p = curproc; / XXX / 1214* struct vnode vp; 1215* struct vnode **vpp;	1211 struct mount mp; 1212{ 1213* struct proc p = curproc; / XXX / 1214* struct vnode vp; 1215* struct vnode **vpp;
1216 int rmaj = major(nvp_rdev);	1216 int rmaj = umajor(nvp_rdev); 1217 dev_t dev;
1217 1218 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 1219 return (NULLVP); 1220	1218 1219 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 1220 return (NULLVP); 1221
	1222 dev = udev2dev(nvp_rdev, 2); 1223
1221 vpp = &speclisth[SPECHASH(nvp_rdev)]; 1222loop: 1223 simple_lock(&spechash_slock); 1224 for (vp = *vpp; vp; vp = vp->v_specnext) {	1224 vpp = &speclisth[SPECHASH(nvp_rdev)]; 1225loop: 1226 simple_lock(&spechash_slock); 1227 for (vp = *vpp; vp; vp = vp->v_specnext) {
1225 if (nvp_rdev != vp->v_rdev \|\| nvp->v_type != vp->v_type)	1228 if (dev != vp->v_rdev \|\| nvp->v_type != vp->v_type)
1226 continue; 1227 /* 1228 * Alias, but not in use, so flush it out. 1229 * Only alias active device nodes. 1230 * Not sure why we don't re-use this like we do below. 1231 / 1232* simple_lock(&vp->v_interlock); 1233 if (vp->v_usecount == 0) { 1234 simple_unlock(&spechash_slock); 1235 vgonel(vp, p); 1236 goto loop; 1237 } 1238 if (vget(vp, LK_EXCLUSIVE \| LK_INTERLOCK, p)) { 1239 /* 1240 * It dissappeared, and we may have slept. 1241 * Restart from the beginning 1242 / 1243* simple_unlock(&spechash_slock); 1244 goto loop; 1245 } 1246 break; 1247 } 1248 /* 1249 * It would be a lot clearer what is going on here if 1250 * this had been expressed as: 1251 * if ( vp && (vp->v_tag == VT_NULL)) 1252 * and the clauses had been swapped. 1253 / 1254* if (vp == NULL \|\| vp->v_tag != VT_NON) { 1255 struct specinfo sinfo; 1256* 1257 /* 1258 * Put the new vnode into the hash chain. 1259 * and if there was an alias, connect them. 1260 / 1261* MALLOC(sinfo, struct specinfo , 1262* sizeof(struct specinfo), M_VNODE, M_WAITOK); 1263 bzero(sinfo, sizeof(struct specinfo)); 1264 nvp->v_specinfo = sinfo;	1229 continue; 1230 /* 1231 * Alias, but not in use, so flush it out. 1232 * Only alias active device nodes. 1233 * Not sure why we don't re-use this like we do below. 1234 / 1235* simple_lock(&vp->v_interlock); 1236 if (vp->v_usecount == 0) { 1237 simple_unlock(&spechash_slock); 1238 vgonel(vp, p); 1239 goto loop; 1240 } 1241 if (vget(vp, LK_EXCLUSIVE \| LK_INTERLOCK, p)) { 1242 /* 1243 * It dissappeared, and we may have slept. 1244 * Restart from the beginning 1245 / 1246* simple_unlock(&spechash_slock); 1247 goto loop; 1248 } 1249 break; 1250 } 1251 /* 1252 * It would be a lot clearer what is going on here if 1253 * this had been expressed as: 1254 * if ( vp && (vp->v_tag == VT_NULL)) 1255 * and the clauses had been swapped. 1256 / 1257* if (vp == NULL \|\| vp->v_tag != VT_NON) { 1258 struct specinfo sinfo; 1259* 1260 /* 1261 * Put the new vnode into the hash chain. 1262 * and if there was an alias, connect them. 1263 / 1264* MALLOC(sinfo, struct specinfo , 1265* sizeof(struct specinfo), M_VNODE, M_WAITOK); 1266 bzero(sinfo, sizeof(struct specinfo)); 1267 nvp->v_specinfo = sinfo;
1265 sinfo->si_rdev = nvp_rdev;	1268 sinfo->si_rdev = dev;
1266 sinfo->si_hashchain = vpp; 1267 sinfo->si_specnext = vpp; 1268* sinfo->si_bsize_phys = DEV_BSIZE; 1269 sinfo->si_bsize_best = BLKDEV_IOSIZE; 1270 sinfo->si_bsize_max = MAXBSIZE; 1271 1272 /* 1273 * Ask the device to fix up specinfo. Typically the 1274 * si_bsize_* parameters may need fixing up. 1275 / 1276* 1277 if (nvp->v_type == VBLK && rmaj < nblkdev) {	1269 sinfo->si_hashchain = vpp; 1270 sinfo->si_specnext = vpp; 1271* sinfo->si_bsize_phys = DEV_BSIZE; 1272 sinfo->si_bsize_best = BLKDEV_IOSIZE; 1273 sinfo->si_bsize_max = MAXBSIZE; 1274 1275 /* 1276 * Ask the device to fix up specinfo. Typically the 1277 * si_bsize_* parameters may need fixing up. 1278 / 1279* 1280 if (nvp->v_type == VBLK && rmaj < nblkdev) {
1278 if (bdevsw(nvp_rdev) && bdevsw(nvp_rdev)->d_parms)	1281 if (bdevsw(dev) && bdevsw(dev)->d_parms)
1279	1282
1280 (*bdevsw(nvp_rdev)->d_parms)(nvp_rdev, sinfo, DPARM_GET);	1283 (*bdevsw(dev)->d_parms)(dev, sinfo, DPARM_GET);
1281 } else if (nvp->v_type == VCHR && rmaj < nchrdev) {	1284 } else if (nvp->v_type == VCHR && rmaj < nchrdev) {
1282 if (devsw(nvp_rdev) && devsw(nvp_rdev)->d_parms) 1283 (*devsw(nvp_rdev)->d_parms)(nvp_rdev, sinfo, DPARM_GET);	1285 if (devsw(dev) && devsw(dev)->d_parms) 1286 (*devsw(dev)->d_parms)(dev, sinfo, DPARM_GET);
1284 } 1285 1286 simple_unlock(&spechash_slock); 1287 vpp = nvp; 1288* if (vp != NULLVP) { 1289 nvp->v_flag \|= VALIASED; 1290 vp->v_flag \|= VALIASED; 1291 vput(vp); 1292 } 1293 return (NULLVP); 1294 } 1295 /* 1296 * if ( vp && (vp->v_tag == VT_NULL)) 1297 * We have a vnode alias, but it is a trashed. 1298 * Make it look like it's newley allocated. (by getnewvnode()) 1299 * The caller should use this instead. 1300 / 1301* simple_unlock(&spechash_slock); 1302 VOP_UNLOCK(vp, 0, p); 1303 simple_lock(&vp->v_interlock); 1304 vclean(vp, 0, p); 1305 vp->v_op = nvp->v_op; 1306 vp->v_tag = nvp->v_tag; 1307 nvp->v_type = VNON; 1308 insmntque(vp, mp); 1309 return (vp); 1310} 1311 1312/* 1313 * Grab a particular vnode from the free list, increment its 1314 * reference count and lock it. The vnode lock bit is set the 1315 * vnode is being eliminated in vgone. The process is awakened 1316 * when the transition is completed, and an error returned to 1317 * indicate that the vnode is no longer usable (possibly having 1318 * been changed to a new file system type). 1319 / 1320int 1321vget(vp, flags, p) 1322* register struct vnode vp; 1323* int flags; 1324 struct proc p; 1325{ 1326* int error; 1327 1328 /* 1329 * If the vnode is in the process of being cleaned out for 1330 * another use, we wait for the cleaning to finish and then 1331 * return failure. Cleaning is determined by checking that 1332 * the VXLOCK flag is set. 1333 / 1334* if ((flags & LK_INTERLOCK) == 0) { 1335 simple_lock(&vp->v_interlock); 1336 } 1337 if (vp->v_flag & VXLOCK) { 1338 vp->v_flag \|= VXWANT; 1339 simple_unlock(&vp->v_interlock); 1340 tsleep((caddr_t)vp, PINOD, "vget", 0); 1341 return (ENOENT); 1342 } 1343 1344 vp->v_usecount++; 1345 1346 if (VSHOULDBUSY(vp)) 1347 vbusy(vp); 1348 if (flags & LK_TYPE_MASK) { 1349 if ((error = vn_lock(vp, flags \| LK_INTERLOCK, p)) != 0) { 1350 /* 1351 * must expand vrele here because we do not want 1352 * to call VOP_INACTIVE if the reference count 1353 * drops back to zero since it was never really 1354 * active. We must remove it from the free list 1355 * before sleeping so that multiple processes do 1356 * not try to recycle it. 1357 / 1358* simple_lock(&vp->v_interlock); 1359 vp->v_usecount--; 1360 if (VSHOULDFREE(vp)) 1361 vfree(vp); 1362 simple_unlock(&vp->v_interlock); 1363 } 1364 return (error); 1365 } 1366 simple_unlock(&vp->v_interlock); 1367 return (0); 1368} 1369 1370void 1371vref(struct vnode vp) 1372{ 1373* simple_lock(&vp->v_interlock); 1374 vp->v_usecount++; 1375 simple_unlock(&vp->v_interlock); 1376} 1377 1378/* 1379 * Vnode put/release. 1380 * If count drops to zero, call inactive routine and return to freelist. 1381 / 1382void 1383vrele(vp) 1384* struct vnode vp; 1385{ 1386* struct proc p = curproc; / XXX / 1387* 1388 KASSERT(vp != NULL, ("vrele: null vp")); 1389 1390 simple_lock(&vp->v_interlock); 1391 1392 if (vp->v_usecount > 1) { 1393 1394 vp->v_usecount--; 1395 simple_unlock(&vp->v_interlock); 1396 1397 return; 1398 } 1399 1400 if (vp->v_usecount == 1) { 1401 1402 vp->v_usecount--; 1403 if (VSHOULDFREE(vp)) 1404 vfree(vp); 1405 /* 1406 * If we are doing a vput, the node is already locked, and we must 1407 * call VOP_INACTIVE with the node locked. So, in the case of 1408 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE. 1409 / 1410* if (vn_lock(vp, LK_EXCLUSIVE \| LK_INTERLOCK, p) == 0) { 1411 VOP_INACTIVE(vp, p); 1412 } 1413 1414 } else { 1415#ifdef DIAGNOSTIC 1416 vprint("vrele: negative ref count", vp); 1417 simple_unlock(&vp->v_interlock); 1418#endif 1419 panic("vrele: negative ref cnt"); 1420 } 1421} 1422 1423void 1424vput(vp) 1425 struct vnode vp; 1426{ 1427* struct proc p = curproc; / XXX / 1428* 1429 KASSERT(vp != NULL, ("vput: null vp")); 1430 1431 simple_lock(&vp->v_interlock); 1432 1433 if (vp->v_usecount > 1) { 1434 1435 vp->v_usecount--; 1436 VOP_UNLOCK(vp, LK_INTERLOCK, p); 1437 return; 1438 1439 } 1440 1441 if (vp->v_usecount == 1) { 1442 1443 vp->v_usecount--; 1444 if (VSHOULDFREE(vp)) 1445 vfree(vp); 1446 /* 1447 * If we are doing a vput, the node is already locked, and we must 1448 * call VOP_INACTIVE with the node locked. So, in the case of 1449 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE. 1450 / 1451* simple_unlock(&vp->v_interlock); 1452 VOP_INACTIVE(vp, p); 1453 1454 } else { 1455#ifdef DIAGNOSTIC 1456 vprint("vput: negative ref count", vp); 1457#endif 1458 panic("vput: negative ref cnt"); 1459 } 1460} 1461 1462/* 1463 * Somebody doesn't want the vnode recycled. 1464 / 1465void 1466vhold(vp) 1467* register struct vnode vp; 1468{ 1469* int s; 1470 1471 s = splbio(); 1472 vp->v_holdcnt++; 1473 if (VSHOULDBUSY(vp)) 1474 vbusy(vp); 1475 splx(s); 1476} 1477 1478/* 1479 * One less who cares about this vnode. 1480 / 1481void 1482vdrop(vp) 1483* register struct vnode vp; 1484{ 1485* int s; 1486 1487 s = splbio(); 1488 if (vp->v_holdcnt <= 0) 1489 panic("vdrop: holdcnt"); 1490 vp->v_holdcnt--; 1491 if (VSHOULDFREE(vp)) 1492 vfree(vp); 1493 splx(s); 1494} 1495 1496/* 1497 * Remove any vnodes in the vnode table belonging to mount point mp. 1498 * 1499 * If MNT_NOFORCE is specified, there should not be any active ones, 1500 * return error if any are found (nb: this is a user error, not a 1501 * system error). If MNT_FORCE is specified, detach any active vnodes 1502 * that are found. 1503 / 1504#ifdef DIAGNOSTIC 1505static int busyprt = 0; / print out busy vnodes / 1506SYSCTL_INT(_debug, OID_AUTO, busyprt, CTLFLAG_RW, &busyprt, 0, ""); 1507#endif 1508* 1509int 1510vflush(mp, skipvp, flags) 1511 struct mount mp; 1512* struct vnode skipvp; 1513* int flags; 1514{ 1515 struct proc p = curproc; / XXX / 1516* struct vnode vp, nvp; 1517 int busy = 0; 1518 1519 simple_lock(&mntvnode_slock); 1520loop: 1521 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { 1522 /* 1523 * Make sure this vnode wasn't reclaimed in getnewvnode(). 1524 * Start over if it has (it won't be on the list anymore). 1525 / 1526* if (vp->v_mount != mp) 1527 goto loop; 1528 nvp = vp->v_mntvnodes.le_next; 1529 /* 1530 * Skip over a selected vnode. 1531 / 1532* if (vp == skipvp) 1533 continue; 1534 1535 simple_lock(&vp->v_interlock); 1536 /* 1537 * Skip over a vnodes marked VSYSTEM. 1538 / 1539* if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { 1540 simple_unlock(&vp->v_interlock); 1541 continue; 1542 } 1543 /* 1544 * If WRITECLOSE is set, only flush out regular file vnodes 1545 * open for writing. 1546 / 1547* if ((flags & WRITECLOSE) && 1548 (vp->v_writecount == 0 \|\| vp->v_type != VREG)) { 1549 simple_unlock(&vp->v_interlock); 1550 continue; 1551 } 1552 1553 /* 1554 * With v_usecount == 0, all we need to do is clear out the 1555 * vnode data structures and we are done. 1556 / 1557* if (vp->v_usecount == 0) { 1558 simple_unlock(&mntvnode_slock); 1559 vgonel(vp, p); 1560 simple_lock(&mntvnode_slock); 1561 continue; 1562 } 1563 1564 /* 1565 * If FORCECLOSE is set, forcibly close the vnode. For block 1566 * or character devices, revert to an anonymous device. For 1567 * all other files, just kill them. 1568 / 1569* if (flags & FORCECLOSE) { 1570 simple_unlock(&mntvnode_slock); 1571 if (vp->v_type != VBLK && vp->v_type != VCHR) { 1572 vgonel(vp, p); 1573 } else { 1574 vclean(vp, 0, p); 1575 vp->v_op = spec_vnodeop_p; 1576 insmntque(vp, (struct mount ) 0); 1577* } 1578 simple_lock(&mntvnode_slock); 1579 continue; 1580 } 1581#ifdef DIAGNOSTIC 1582 if (busyprt) 1583 vprint("vflush: busy vnode", vp); 1584#endif 1585 simple_unlock(&vp->v_interlock); 1586 busy++; 1587 } 1588 simple_unlock(&mntvnode_slock); 1589 if (busy) 1590 return (EBUSY); 1591 return (0); 1592} 1593 1594/* 1595 * Disassociate the underlying file system from a vnode. 1596 / 1597static void 1598vclean(vp, flags, p) 1599* struct vnode vp; 1600* int flags; 1601 struct proc p; 1602{ 1603* int active; 1604 vm_object_t obj; 1605 1606 /* 1607 * Check to see if the vnode is in use. If so we have to reference it 1608 * before we clean it out so that its count cannot fall to zero and 1609 * generate a race against ourselves to recycle it. 1610 / 1611* if ((active = vp->v_usecount)) 1612 vp->v_usecount++; 1613 1614 /* 1615 * Prevent the vnode from being recycled or brought into use while we 1616 * clean it out. 1617 / 1618* if (vp->v_flag & VXLOCK) 1619 panic("vclean: deadlock"); 1620 vp->v_flag \|= VXLOCK; 1621 /* 1622 * Even if the count is zero, the VOP_INACTIVE routine may still 1623 * have the object locked while it cleans it out. The VOP_LOCK 1624 * ensures that the VOP_INACTIVE routine is done with its work. 1625 * For active vnodes, it ensures that no other activity can 1626 * occur while the underlying object is being cleaned out. 1627 / 1628* VOP_LOCK(vp, LK_DRAIN \| LK_INTERLOCK, p); 1629 1630 /* 1631 * Clean out any buffers associated with the vnode. 1632 / 1633* vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0); 1634 if ((obj = vp->v_object) != NULL) { 1635 if (obj->ref_count == 0) { 1636 /* 1637 * This is a normal way of shutting down the object/vnode 1638 * association. 1639 / 1640* vm_object_terminate(obj); 1641 } else { 1642 /* 1643 * Woe to the process that tries to page now :-). 1644 / 1645* vm_pager_deallocate(obj); 1646 } 1647 } 1648 1649 /* 1650 * If purging an active vnode, it must be closed and 1651 * deactivated before being reclaimed. Note that the 1652 * VOP_INACTIVE will unlock the vnode. 1653 / 1654* if (active) { 1655 if (flags & DOCLOSE) 1656 VOP_CLOSE(vp, FNONBLOCK, NOCRED, p); 1657 VOP_INACTIVE(vp, p); 1658 } else { 1659 /* 1660 * Any other processes trying to obtain this lock must first 1661 * wait for VXLOCK to clear, then call the new lock operation. 1662 / 1663* VOP_UNLOCK(vp, 0, p); 1664 } 1665 /* 1666 * Reclaim the vnode. 1667 / 1668* if (VOP_RECLAIM(vp, p)) 1669 panic("vclean: cannot reclaim"); 1670 1671 if (active) 1672 vrele(vp); 1673 1674 cache_purge(vp); 1675 if (vp->v_vnlock) { 1676#if 0 /* This is the only place we have LK_DRAINED in the entire kernel ??? / 1677#ifdef DIAGNOSTIC 1678* if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0) 1679 vprint("vclean: lock not drained", vp); 1680#endif 1681#endif 1682 FREE(vp->v_vnlock, M_VNODE); 1683 vp->v_vnlock = NULL; 1684 } 1685 1686 if (VSHOULDFREE(vp)) 1687 vfree(vp); 1688 1689 /* 1690 * Done with purge, notify sleepers of the grim news. 1691 / 1692* vp->v_op = dead_vnodeop_p; 1693 vn_pollgone(vp); 1694 vp->v_tag = VT_NON; 1695 vp->v_flag &= ~VXLOCK; 1696 if (vp->v_flag & VXWANT) { 1697 vp->v_flag &= ~VXWANT; 1698 wakeup((caddr_t) vp); 1699 } 1700} 1701 1702/* 1703 * Eliminate all activity associated with the requested vnode 1704 * and with all vnodes aliased to the requested vnode. 1705 / 1706int 1707vop_revoke(ap) 1708* struct vop_revoke_args /* { 1709 struct vnode a_vp; 1710* int a_flags; 1711 } / ap; 1712{ 1713 struct vnode vp, vq; 1714 struct proc p = curproc; / XXX / 1715* 1716 KASSERT((ap->a_flags & REVOKEALL) != 0, ("vop_revoke")); 1717 1718 vp = ap->a_vp; 1719 simple_lock(&vp->v_interlock); 1720 1721 if (vp->v_flag & VALIASED) { 1722 /* 1723 * If a vgone (or vclean) is already in progress, 1724 * wait until it is done and return. 1725 / 1726* if (vp->v_flag & VXLOCK) { 1727 vp->v_flag \|= VXWANT; 1728 simple_unlock(&vp->v_interlock); 1729 tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0); 1730 return (0); 1731 } 1732 /* 1733 * Ensure that vp will not be vgone'd while we 1734 * are eliminating its aliases. 1735 / 1736* vp->v_flag \|= VXLOCK; 1737 simple_unlock(&vp->v_interlock); 1738 while (vp->v_flag & VALIASED) { 1739 simple_lock(&spechash_slock); 1740 for (vq = vp->v_hashchain; vq; vq = vq->v_specnext) { 1741* if (vq->v_rdev != vp->v_rdev \|\| 1742 vq->v_type != vp->v_type \|\| vp == vq) 1743 continue; 1744 simple_unlock(&spechash_slock); 1745 vgone(vq); 1746 break; 1747 } 1748 if (vq == NULLVP) { 1749 simple_unlock(&spechash_slock); 1750 } 1751 } 1752 /* 1753 * Remove the lock so that vgone below will 1754 * really eliminate the vnode after which time 1755 * vgone will awaken any sleepers. 1756 / 1757* simple_lock(&vp->v_interlock); 1758 vp->v_flag &= ~VXLOCK; 1759 if (vp->v_flag & VXWANT) { 1760 vp->v_flag &= ~VXWANT; 1761 wakeup(vp); 1762 } 1763 } 1764 vgonel(vp, p); 1765 return (0); 1766} 1767 1768/* 1769 * Recycle an unused vnode to the front of the free list. 1770 * Release the passed interlock if the vnode will be recycled. 1771 / 1772int 1773vrecycle(vp, inter_lkp, p) 1774* struct vnode vp; 1775* struct simplelock inter_lkp; 1776* struct proc p; 1777{ 1778* 1779 simple_lock(&vp->v_interlock); 1780 if (vp->v_usecount == 0) { 1781 if (inter_lkp) { 1782 simple_unlock(inter_lkp); 1783 } 1784 vgonel(vp, p); 1785 return (1); 1786 } 1787 simple_unlock(&vp->v_interlock); 1788 return (0); 1789} 1790 1791/* 1792 * Eliminate all activity associated with a vnode 1793 * in preparation for reuse. 1794 / 1795void 1796vgone(vp) 1797* register struct vnode vp; 1798{ 1799* struct proc p = curproc; / XXX / 1800* 1801 simple_lock(&vp->v_interlock); 1802 vgonel(vp, p); 1803} 1804 1805/* 1806 * vgone, with the vp interlock held. 1807 / 1808static void 1809vgonel(vp, p) 1810* struct vnode vp; 1811* struct proc p; 1812{ 1813* int s; 1814 struct vnode vq; 1815* struct vnode vx; 1816* 1817 /* 1818 * If a vgone (or vclean) is already in progress, 1819 * wait until it is done and return. 1820 / 1821* if (vp->v_flag & VXLOCK) { 1822 vp->v_flag \|= VXWANT; 1823 simple_unlock(&vp->v_interlock); 1824 tsleep((caddr_t)vp, PINOD, "vgone", 0); 1825 return; 1826 } 1827 1828 /* 1829 * Clean out the filesystem specific data. 1830 / 1831* vclean(vp, DOCLOSE, p); 1832 simple_lock(&vp->v_interlock); 1833 1834 /* 1835 * Delete from old mount point vnode list, if on one. 1836 / 1837* if (vp->v_mount != NULL) 1838 insmntque(vp, (struct mount )0); 1839* /* 1840 * If special device, remove it from special device alias list 1841 * if it is on one. 1842 / 1843* if ((vp->v_type == VBLK \|\| vp->v_type == VCHR) && vp->v_specinfo != 0) { 1844 simple_lock(&spechash_slock); 1845 if (vp->v_hashchain == vp) { 1846* vp->v_hashchain = vp->v_specnext; 1847* } else { 1848 for (vq = vp->v_hashchain; vq; vq = vq->v_specnext) { 1849* if (vq->v_specnext != vp) 1850 continue; 1851 vq->v_specnext = vp->v_specnext; 1852 break; 1853 } 1854 if (vq == NULL) 1855 panic("missing bdev"); 1856 } 1857 if (vp->v_flag & VALIASED) { 1858 vx = NULL; 1859 for (vq = vp->v_hashchain; vq; vq = vq->v_specnext) { 1860* if (vq->v_rdev != vp->v_rdev \|\| 1861 vq->v_type != vp->v_type) 1862 continue; 1863 if (vx) 1864 break; 1865 vx = vq; 1866 } 1867 if (vx == NULL) 1868 panic("missing alias"); 1869 if (vq == NULL) 1870 vx->v_flag &= ~VALIASED; 1871 vp->v_flag &= ~VALIASED; 1872 } 1873 simple_unlock(&spechash_slock); 1874 FREE(vp->v_specinfo, M_VNODE); 1875 vp->v_specinfo = NULL; 1876 } 1877 1878 /* 1879 * If it is on the freelist and not already at the head, 1880 * move it to the head of the list. The test of the back 1881 * pointer and the reference count of zero is because 1882 * it will be removed from the free list by getnewvnode, 1883 * but will not have its reference count incremented until 1884 * after calling vgone. If the reference count were 1885 * incremented first, vgone would (incorrectly) try to 1886 * close the previous instance of the underlying object. 1887 / 1888* if (vp->v_usecount == 0 && !(vp->v_flag & VDOOMED)) { 1889 s = splbio(); 1890 simple_lock(&vnode_free_list_slock); 1891 if (vp->v_flag & VFREE) { 1892 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1893 } else if (vp->v_flag & VTBFREE) { 1894 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 1895 vp->v_flag &= ~VTBFREE; 1896 freevnodes++; 1897 } else 1898 freevnodes++; 1899 vp->v_flag \|= VFREE; 1900 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 1901 simple_unlock(&vnode_free_list_slock); 1902 splx(s); 1903 } 1904 1905 vp->v_type = VBAD; 1906 simple_unlock(&vp->v_interlock); 1907} 1908 1909/* 1910 * Lookup a vnode by device number. 1911 / 1912int 1913vfinddev(dev, type, vpp) 1914* dev_t dev; 1915 enum vtype type; 1916 struct vnode *vpp; 1917{ 1918* register struct vnode vp; 1919* int rc = 0; 1920 1921 simple_lock(&spechash_slock); 1922 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 1923 if (dev != vp->v_rdev \|\| type != vp->v_type) 1924 continue; 1925 vpp = vp; 1926* rc = 1; 1927 break; 1928 } 1929 simple_unlock(&spechash_slock); 1930 return (rc); 1931} 1932 1933/* 1934 * Calculate the total number of references to a special device. 1935 / 1936int 1937vcount(vp) 1938* register struct vnode vp; 1939{ 1940* struct vnode vq, vnext; 1941 int count; 1942 1943loop: 1944 if ((vp->v_flag & VALIASED) == 0) 1945 return (vp->v_usecount); 1946 simple_lock(&spechash_slock); 1947 for (count = 0, vq = vp->v_hashchain; vq; vq = vnext) { 1948* vnext = vq->v_specnext; 1949 if (vq->v_rdev != vp->v_rdev \|\| vq->v_type != vp->v_type) 1950 continue; 1951 /* 1952 * Alias, but not in use, so flush it out. 1953 / 1954* if (vq->v_usecount == 0 && vq != vp) { 1955 simple_unlock(&spechash_slock); 1956 vgone(vq); 1957 goto loop; 1958 } 1959 count += vq->v_usecount; 1960 } 1961 simple_unlock(&spechash_slock); 1962 return (count); 1963} 1964/* 1965 * Print out a description of a vnode. 1966 / 1967static char typename[] = 1968{"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"}; 1969 1970void 1971vprint(label, vp) 1972 char label; 1973* register struct vnode vp; 1974{ 1975* char buf[96]; 1976 1977 if (label != NULL) 1978 printf("%s: %p: ", label, (void )vp); 1979* else 1980 printf("%p: ", (void )vp); 1981* printf("type %s, usecount %d, writecount %d, refcount %d,", 1982 typename[vp->v_type], vp->v_usecount, vp->v_writecount, 1983 vp->v_holdcnt); 1984 buf[0] = '\0'; 1985 if (vp->v_flag & VROOT) 1986 strcat(buf, "\|VROOT"); 1987 if (vp->v_flag & VTEXT) 1988 strcat(buf, "\|VTEXT"); 1989 if (vp->v_flag & VSYSTEM) 1990 strcat(buf, "\|VSYSTEM"); 1991 if (vp->v_flag & VXLOCK) 1992 strcat(buf, "\|VXLOCK"); 1993 if (vp->v_flag & VXWANT) 1994 strcat(buf, "\|VXWANT"); 1995 if (vp->v_flag & VBWAIT) 1996 strcat(buf, "\|VBWAIT"); 1997 if (vp->v_flag & VALIASED) 1998 strcat(buf, "\|VALIASED"); 1999 if (vp->v_flag & VDOOMED) 2000 strcat(buf, "\|VDOOMED"); 2001 if (vp->v_flag & VFREE) 2002 strcat(buf, "\|VFREE"); 2003 if (vp->v_flag & VOBJBUF) 2004 strcat(buf, "\|VOBJBUF"); 2005 if (buf[0] != '\0') 2006 printf(" flags (%s)", &buf[1]); 2007 if (vp->v_data == NULL) { 2008 printf("\n"); 2009 } else { 2010 printf("\n\t"); 2011 VOP_PRINT(vp); 2012 } 2013} 2014 2015#ifdef DDB 2016#include <ddb/ddb.h> 2017/* 2018 * List all of the locked vnodes in the system. 2019 * Called when debugging the kernel. 2020 / 2021DB_SHOW_COMMAND(lockedvnodes, lockedvnodes) 2022{ 2023* struct proc p = curproc; / XXX / 2024* struct mount mp, nmp; 2025 struct vnode vp; 2026* 2027 printf("Locked vnodes\n"); 2028 simple_lock(&mountlist_slock); 2029 for (mp = mountlist.cqh_first; mp != (void )&mountlist; mp = nmp) { 2030* if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { 2031 nmp = mp->mnt_list.cqe_next; 2032 continue; 2033 } 2034 for (vp = mp->mnt_vnodelist.lh_first; 2035 vp != NULL; 2036 vp = vp->v_mntvnodes.le_next) { 2037 if (VOP_ISLOCKED(vp)) 2038 vprint((char )0, vp); 2039* } 2040 simple_lock(&mountlist_slock); 2041 nmp = mp->mnt_list.cqe_next; 2042 vfs_unbusy(mp, p); 2043 } 2044 simple_unlock(&mountlist_slock); 2045} 2046#endif 2047 2048/* 2049 * Top level filesystem related information gathering. 2050 / 2051static int sysctl_ovfs_conf __P(SYSCTL_HANDLER_ARGS); 2052* 2053static int 2054vfs_sysctl SYSCTL_HANDLER_ARGS 2055{ 2056 int name = (int )arg1 - 1; /* XXX / 2057* u_int namelen = arg2 + 1; /* XXX / 2058* struct vfsconf vfsp; 2059* 2060#if 1 \|\| defined(COMPAT_PRELITE2) 2061 /* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. / 2062* if (namelen == 1) 2063 return (sysctl_ovfs_conf(oidp, arg1, arg2, req)); 2064#endif 2065 2066#ifdef notyet 2067 /* all sysctl names at this level are at least name and field / 2068* if (namelen < 2) 2069 return (ENOTDIR); /* overloaded / 2070* if (name[0] != VFS_GENERIC) { 2071 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 2072 if (vfsp->vfc_typenum == name[0]) 2073 break; 2074 if (vfsp == NULL) 2075 return (EOPNOTSUPP); 2076 return ((vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1, 2077* oldp, oldlenp, newp, newlen, p)); 2078 } 2079#endif 2080 switch (name[1]) { 2081 case VFS_MAXTYPENUM: 2082 if (namelen != 2) 2083 return (ENOTDIR); 2084 return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int))); 2085 case VFS_CONF: 2086 if (namelen != 3) 2087 return (ENOTDIR); /* overloaded / 2088* for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 2089 if (vfsp->vfc_typenum == name[2]) 2090 break; 2091 if (vfsp == NULL) 2092 return (EOPNOTSUPP); 2093 return (SYSCTL_OUT(req, vfsp, sizeof vfsp)); 2094* } 2095 return (EOPNOTSUPP); 2096} 2097 2098SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD, vfs_sysctl, 2099 "Generic filesystem"); 2100 2101#if 1 \|\| defined(COMPAT_PRELITE2) 2102 2103static int 2104sysctl_ovfs_conf SYSCTL_HANDLER_ARGS 2105{ 2106 int error; 2107 struct vfsconf vfsp; 2108* struct ovfsconf ovfs; 2109 2110 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 2111 ovfs.vfc_vfsops = vfsp->vfc_vfsops; /* XXX used as flag / 2112* strcpy(ovfs.vfc_name, vfsp->vfc_name); 2113 ovfs.vfc_index = vfsp->vfc_typenum; 2114 ovfs.vfc_refcount = vfsp->vfc_refcount; 2115 ovfs.vfc_flags = vfsp->vfc_flags; 2116 error = SYSCTL_OUT(req, &ovfs, sizeof ovfs); 2117 if (error) 2118 return error; 2119 } 2120 return 0; 2121} 2122 2123#endif /* 1 \|\| COMPAT_PRELITE2 / 2124* 2125#if 0 2126#define KINFO_VNODESLOP 10 2127/* 2128 * Dump vnode list (via sysctl). 2129 * Copyout address of vnode followed by vnode. 2130 / 2131/ ARGSUSED / 2132static int 2133sysctl_vnode SYSCTL_HANDLER_ARGS 2134{ 2135* struct proc p = curproc; / XXX / 2136* struct mount mp, nmp; 2137 struct vnode nvp, vp; 2138 int error; 2139 2140#define VPTRSZ sizeof (struct vnode ) 2141#define VNODESZ sizeof (struct vnode) 2142* 2143 req->lock = 0; 2144 if (!req->oldptr) /* Make an estimate / 2145* return (SYSCTL_OUT(req, 0, 2146 (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ))); 2147 2148 simple_lock(&mountlist_slock); 2149 for (mp = mountlist.cqh_first; mp != (void )&mountlist; mp = nmp) { 2150* if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { 2151 nmp = mp->mnt_list.cqe_next; 2152 continue; 2153 } 2154again: 2155 simple_lock(&mntvnode_slock); 2156 for (vp = mp->mnt_vnodelist.lh_first; 2157 vp != NULL; 2158 vp = nvp) { 2159 /* 2160 * Check that the vp is still associated with 2161 * this filesystem. RACE: could have been 2162 * recycled onto the same filesystem. 2163 / 2164* if (vp->v_mount != mp) { 2165 simple_unlock(&mntvnode_slock); 2166 goto again; 2167 } 2168 nvp = vp->v_mntvnodes.le_next; 2169 simple_unlock(&mntvnode_slock); 2170 if ((error = SYSCTL_OUT(req, &vp, VPTRSZ)) \|\| 2171 (error = SYSCTL_OUT(req, vp, VNODESZ))) 2172 return (error); 2173 simple_lock(&mntvnode_slock); 2174 } 2175 simple_unlock(&mntvnode_slock); 2176 simple_lock(&mountlist_slock); 2177 nmp = mp->mnt_list.cqe_next; 2178 vfs_unbusy(mp, p); 2179 } 2180 simple_unlock(&mountlist_slock); 2181 2182 return (0); 2183} 2184#endif 2185 2186/* 2187 * XXX 2188 * Exporting the vnode list on large systems causes them to crash. 2189 * Exporting the vnode list on medium systems causes sysctl to coredump. 2190 / 2191#if 0 2192SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE\|CTLFLAG_RD, 2193* 0, 0, sysctl_vnode, "S,vnode", ""); 2194#endif 2195 2196/* 2197 * Check to see if a filesystem is mounted on a block device. 2198 / 2199int 2200vfs_mountedon(vp) 2201* struct vnode vp; 2202{ 2203* struct vnode vq; 2204* int error = 0; 2205 2206 if (vp->v_specmountpoint != NULL) 2207 return (EBUSY); 2208 if (vp->v_flag & VALIASED) { 2209 simple_lock(&spechash_slock); 2210 for (vq = vp->v_hashchain; vq; vq = vq->v_specnext) { 2211* if (vq->v_rdev != vp->v_rdev \|\| 2212 vq->v_type != vp->v_type) 2213 continue; 2214 if (vq->v_specmountpoint != NULL) { 2215 error = EBUSY; 2216 break; 2217 } 2218 } 2219 simple_unlock(&spechash_slock); 2220 } 2221 return (error); 2222} 2223 2224/* 2225 * Unmount all filesystems. The list is traversed in reverse order 2226 * of mounting to avoid dependencies. 2227 / 2228void 2229vfs_unmountall() 2230{ 2231* struct mount mp, nmp; 2232 struct proc p; 2233* int error; 2234 2235 if (curproc != NULL) 2236 p = curproc; 2237 else 2238 p = initproc; /* XXX XXX should this be proc0? / 2239* /* 2240 * Since this only runs when rebooting, it is not interlocked. 2241 / 2242* for (mp = mountlist.cqh_last; mp != (void )&mountlist; mp = nmp) { 2243* nmp = mp->mnt_list.cqe_prev; 2244 error = dounmount(mp, MNT_FORCE, p); 2245 if (error) { 2246 printf("unmount of %s failed (", 2247 mp->mnt_stat.f_mntonname); 2248 if (error == EBUSY) 2249 printf("BUSY)\n"); 2250 else 2251 printf("%d)\n", error); 2252 } 2253 } 2254} 2255 2256/* 2257 * Build hash lists of net addresses and hang them off the mount point. 2258 * Called by ufs_mount() to set up the lists of export addresses. 2259 / 2260static int 2261vfs_hang_addrlist(mp, nep, argp) 2262* struct mount mp; 2263* struct netexport nep; 2264* struct export_args argp; 2265{ 2266* register struct netcred np; 2267* register struct radix_node_head rnh; 2268* register int i; 2269 struct radix_node rn; 2270* struct sockaddr saddr, smask = 0; 2271 struct domain dom; 2272* int error; 2273 2274 if (argp->ex_addrlen == 0) { 2275 if (mp->mnt_flag & MNT_DEFEXPORTED) 2276 return (EPERM); 2277 np = &nep->ne_defexported; 2278 np->netc_exflags = argp->ex_flags; 2279 np->netc_anon = argp->ex_anon; 2280 np->netc_anon.cr_ref = 1; 2281 mp->mnt_flag \|= MNT_DEFEXPORTED; 2282 return (0); 2283 } 2284 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 2285 np = (struct netcred ) malloc(i, M_NETADDR, M_WAITOK); 2286* bzero((caddr_t) np, i); 2287 saddr = (struct sockaddr ) (np + 1); 2288* if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen))) 2289 goto out; 2290 if (saddr->sa_len > argp->ex_addrlen) 2291 saddr->sa_len = argp->ex_addrlen; 2292 if (argp->ex_masklen) { 2293 smask = (struct sockaddr ) ((caddr_t) saddr + argp->ex_addrlen); 2294* error = copyin(argp->ex_mask, (caddr_t) smask, argp->ex_masklen); 2295 if (error) 2296 goto out; 2297 if (smask->sa_len > argp->ex_masklen) 2298 smask->sa_len = argp->ex_masklen; 2299 } 2300 i = saddr->sa_family; 2301 if ((rnh = nep->ne_rtable[i]) == 0) { 2302 /* 2303 * Seems silly to initialize every AF when most are not used, 2304 * do so on demand here 2305 / 2306* for (dom = domains; dom; dom = dom->dom_next) 2307 if (dom->dom_family == i && dom->dom_rtattach) { 2308 dom->dom_rtattach((void *) &nep->ne_rtable[i], 2309* dom->dom_rtoffset); 2310 break; 2311 } 2312 if ((rnh = nep->ne_rtable[i]) == 0) { 2313 error = ENOBUFS; 2314 goto out; 2315 } 2316 } 2317 rn = (rnh->rnh_addaddr) ((caddr_t) saddr, (caddr_t) smask, rnh, 2318* np->netc_rnodes); 2319 if (rn == 0 \|\| np != (struct netcred ) rn) { / already exists / 2320* error = EPERM; 2321 goto out; 2322 } 2323 np->netc_exflags = argp->ex_flags; 2324 np->netc_anon = argp->ex_anon; 2325 np->netc_anon.cr_ref = 1; 2326 return (0); 2327out: 2328 free(np, M_NETADDR); 2329 return (error); 2330} 2331 2332/* ARGSUSED / 2333static int 2334vfs_free_netcred(rn, w) 2335* struct radix_node rn; 2336* void w; 2337{ 2338* register struct radix_node_head rnh = (struct radix_node_head ) w; 2339 2340 (rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh); 2341* free((caddr_t) rn, M_NETADDR); 2342 return (0); 2343} 2344 2345/* 2346 * Free the net address hash lists that are hanging off the mount points. 2347 / 2348static void 2349vfs_free_addrlist(nep) 2350* struct netexport nep; 2351{ 2352* register int i; 2353 register struct radix_node_head rnh; 2354* 2355 for (i = 0; i <= AF_MAX; i++) 2356 if ((rnh = nep->ne_rtable[i])) { 2357 (rnh->rnh_walktree) (rnh, vfs_free_netcred, 2358* (caddr_t) rnh); 2359 free((caddr_t) rnh, M_RTABLE); 2360 nep->ne_rtable[i] = 0; 2361 } 2362} 2363 2364int 2365vfs_export(mp, nep, argp) 2366 struct mount mp; 2367* struct netexport nep; 2368* struct export_args argp; 2369{ 2370* int error; 2371 2372 if (argp->ex_flags & MNT_DELEXPORT) { 2373 if (mp->mnt_flag & MNT_EXPUBLIC) { 2374 vfs_setpublicfs(NULL, NULL, NULL); 2375 mp->mnt_flag &= ~MNT_EXPUBLIC; 2376 } 2377 vfs_free_addrlist(nep); 2378 mp->mnt_flag &= ~(MNT_EXPORTED \| MNT_DEFEXPORTED); 2379 } 2380 if (argp->ex_flags & MNT_EXPORTED) { 2381 if (argp->ex_flags & MNT_EXPUBLIC) { 2382 if ((error = vfs_setpublicfs(mp, nep, argp)) != 0) 2383 return (error); 2384 mp->mnt_flag \|= MNT_EXPUBLIC; 2385 } 2386 if ((error = vfs_hang_addrlist(mp, nep, argp))) 2387 return (error); 2388 mp->mnt_flag \|= MNT_EXPORTED; 2389 } 2390 return (0); 2391} 2392 2393 2394/* 2395 * Set the publicly exported filesystem (WebNFS). Currently, only 2396 * one public filesystem is possible in the spec (RFC 2054 and 2055) 2397 / 2398int 2399vfs_setpublicfs(mp, nep, argp) 2400* struct mount mp; 2401* struct netexport nep; 2402* struct export_args argp; 2403{ 2404* int error; 2405 struct vnode rvp; 2406* char cp; 2407* 2408 /* 2409 * mp == NULL -> invalidate the current info, the FS is 2410 * no longer exported. May be called from either vfs_export 2411 * or unmount, so check if it hasn't already been done. 2412 / 2413* if (mp == NULL) { 2414 if (nfs_pub.np_valid) { 2415 nfs_pub.np_valid = 0; 2416 if (nfs_pub.np_index != NULL) { 2417 FREE(nfs_pub.np_index, M_TEMP); 2418 nfs_pub.np_index = NULL; 2419 } 2420 } 2421 return (0); 2422 } 2423 2424 /* 2425 * Only one allowed at a time. 2426 / 2427* if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount) 2428 return (EBUSY); 2429 2430 /* 2431 * Get real filehandle for root of exported FS. 2432 / 2433* bzero((caddr_t)&nfs_pub.np_handle, sizeof(nfs_pub.np_handle)); 2434 nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid; 2435 2436 if ((error = VFS_ROOT(mp, &rvp))) 2437 return (error); 2438 2439 if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid))) 2440 return (error); 2441 2442 vput(rvp); 2443 2444 /* 2445 * If an indexfile was specified, pull it in. 2446 / 2447* if (argp->ex_indexfile != NULL) { 2448 MALLOC(nfs_pub.np_index, char , MAXNAMLEN + 1, M_TEMP, 2449* M_WAITOK); 2450 error = copyinstr(argp->ex_indexfile, nfs_pub.np_index, 2451 MAXNAMLEN, (size_t )0); 2452* if (!error) { 2453 /* 2454 * Check for illegal filenames. 2455 / 2456* for (cp = nfs_pub.np_index; cp; cp++) { 2457* if (cp == '/') { 2458* error = EINVAL; 2459 break; 2460 } 2461 } 2462 } 2463 if (error) { 2464 FREE(nfs_pub.np_index, M_TEMP); 2465 return (error); 2466 } 2467 } 2468 2469 nfs_pub.np_mount = mp; 2470 nfs_pub.np_valid = 1; 2471 return (0); 2472} 2473 2474struct netcred * 2475vfs_export_lookup(mp, nep, nam) 2476 register struct mount mp; 2477* struct netexport nep; 2478* struct sockaddr nam; 2479{ 2480* register struct netcred np; 2481* register struct radix_node_head rnh; 2482* struct sockaddr saddr; 2483* 2484 np = NULL; 2485 if (mp->mnt_flag & MNT_EXPORTED) { 2486 /* 2487 * Lookup in the export list first. 2488 / 2489* if (nam != NULL) { 2490 saddr = nam; 2491 rnh = nep->ne_rtable[saddr->sa_family]; 2492 if (rnh != NULL) { 2493 np = (struct netcred ) 2494* (rnh->rnh_matchaddr)((caddr_t)saddr, 2495* rnh); 2496 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 2497 np = NULL; 2498 } 2499 } 2500 /* 2501 * If no address match, use the default if it exists. 2502 / 2503* if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 2504 np = &nep->ne_defexported; 2505 } 2506 return (np); 2507} 2508 2509/* 2510 * perform msync on all vnodes under a mount point 2511 * the mount point must be locked. 2512 / 2513void 2514vfs_msync(struct mount mp, int flags) { 2515 struct vnode vp, nvp; 2516 struct vm_object obj; 2517* int anyio, tries; 2518 2519 tries = 5; 2520loop: 2521 anyio = 0; 2522 for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { 2523 2524 nvp = vp->v_mntvnodes.le_next; 2525 2526 if (vp->v_mount != mp) { 2527 goto loop; 2528 } 2529 2530 if (vp->v_flag & VXLOCK) /* XXX: what if MNT_WAIT? / 2531* continue; 2532 2533 if (flags != MNT_WAIT) { 2534 obj = vp->v_object; 2535 if (obj == NULL \|\| (obj->flags & OBJ_MIGHTBEDIRTY) == 0) 2536 continue; 2537 if (VOP_ISLOCKED(vp)) 2538 continue; 2539 } 2540 2541 simple_lock(&vp->v_interlock); 2542 if (vp->v_object && 2543 (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) { 2544 if (!vget(vp, 2545 LK_INTERLOCK \| LK_EXCLUSIVE \| LK_RETRY \| LK_NOOBJ, curproc)) { 2546 if (vp->v_object) { 2547 vm_object_page_clean(vp->v_object, 0, 0, flags == MNT_WAIT ? OBJPC_SYNC : 0); 2548 anyio = 1; 2549 } 2550 vput(vp); 2551 } 2552 } else { 2553 simple_unlock(&vp->v_interlock); 2554 } 2555 } 2556 if (anyio && (--tries > 0)) 2557 goto loop; 2558} 2559 2560/* 2561 * Create the VM object needed for VMIO and mmap support. This 2562 * is done for all VREG files in the system. Some filesystems might 2563 * afford the additional metadata buffering capability of the 2564 * VMIO code by making the device node be VMIO mode also. 2565 * 2566 * vp must be locked when vfs_object_create is called. 2567 / 2568int 2569vfs_object_create(vp, p, cred) 2570* struct vnode vp; 2571* struct proc p; 2572* struct ucred cred; 2573{ 2574* struct vattr vat; 2575 vm_object_t object; 2576 int error = 0; 2577 2578 if ((vp->v_type != VREG) && (vp->v_type != VBLK)) 2579 return 0; 2580 2581retry: 2582 if ((object = vp->v_object) == NULL) { 2583 if (vp->v_type == VREG) { 2584 if ((error = VOP_GETATTR(vp, &vat, cred, p)) != 0) 2585 goto retn; 2586 object = vnode_pager_alloc(vp, vat.va_size, 0, 0); 2587 } else if (major(vp->v_rdev) < nblkdev && 2588 bdevsw(vp->v_rdev) != NULL) { 2589 /* 2590 * This simply allocates the biggest object possible 2591 * for a VBLK vnode. This should be fixed, but doesn't 2592 * cause any problems (yet). 2593 / 2594* object = vnode_pager_alloc(vp, IDX_TO_OFF(INT_MAX), 0, 0); 2595 } else { 2596 goto retn; 2597 } 2598 /* 2599 * Dereference the reference we just created. This assumes 2600 * that the object is associated with the vp. 2601 / 2602* object->ref_count--; 2603 vp->v_usecount--; 2604 } else { 2605 if (object->flags & OBJ_DEAD) { 2606 VOP_UNLOCK(vp, 0, p); 2607 tsleep(object, PVM, "vodead", 0); 2608 vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY, p); 2609 goto retry; 2610 } 2611 } 2612 2613 KASSERT(vp->v_object != NULL, ("vfs_object_create: NULL object")); 2614 vp->v_flag \|= VOBJBUF; 2615 2616retn: 2617 return error; 2618} 2619 2620static void 2621vfree(vp) 2622 struct vnode vp; 2623{ 2624* int s; 2625 2626 s = splbio(); 2627 simple_lock(&vnode_free_list_slock); 2628 if (vp->v_flag & VTBFREE) { 2629 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 2630 vp->v_flag &= ~VTBFREE; 2631 } 2632 if (vp->v_flag & VAGE) { 2633 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 2634 } else { 2635 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 2636 } 2637 freevnodes++; 2638 simple_unlock(&vnode_free_list_slock); 2639 vp->v_flag &= ~VAGE; 2640 vp->v_flag \|= VFREE; 2641 splx(s); 2642} 2643 2644void 2645vbusy(vp) 2646 struct vnode vp; 2647{ 2648* int s; 2649 2650 s = splbio(); 2651 simple_lock(&vnode_free_list_slock); 2652 if (vp->v_flag & VTBFREE) { 2653 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 2654 vp->v_flag &= ~VTBFREE; 2655 } else { 2656 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 2657 freevnodes--; 2658 } 2659 simple_unlock(&vnode_free_list_slock); 2660 vp->v_flag &= ~(VFREE\|VAGE); 2661 splx(s); 2662} 2663 2664/* 2665 * Record a process's interest in events which might happen to 2666 * a vnode. Because poll uses the historic select-style interface 2667 * internally, this routine serves as both the ``check for any 2668 * pending events'' and the ``record my interest in future events'' 2669 * functions. (These are done together, while the lock is held, 2670 * to avoid race conditions.) 2671 / 2672int 2673vn_pollrecord(vp, p, events) 2674* struct vnode vp; 2675* struct proc p; 2676* short events; 2677{ 2678 simple_lock(&vp->v_pollinfo.vpi_lock); 2679 if (vp->v_pollinfo.vpi_revents & events) { 2680 /* 2681 * This leaves events we are not interested 2682 * in available for the other process which 2683 * which presumably had requested them 2684 * (otherwise they would never have been 2685 * recorded). 2686 / 2687* events &= vp->v_pollinfo.vpi_revents; 2688 vp->v_pollinfo.vpi_revents &= ~events; 2689 2690 simple_unlock(&vp->v_pollinfo.vpi_lock); 2691 return events; 2692 } 2693 vp->v_pollinfo.vpi_events \|= events; 2694 selrecord(p, &vp->v_pollinfo.vpi_selinfo); 2695 simple_unlock(&vp->v_pollinfo.vpi_lock); 2696 return 0; 2697} 2698 2699/* 2700 * Note the occurrence of an event. If the VN_POLLEVENT macro is used, 2701 * it is possible for us to miss an event due to race conditions, but 2702 * that condition is expected to be rare, so for the moment it is the 2703 * preferred interface. 2704 / 2705void 2706vn_pollevent(vp, events) 2707* struct vnode vp; 2708* short events; 2709{ 2710 simple_lock(&vp->v_pollinfo.vpi_lock); 2711 if (vp->v_pollinfo.vpi_events & events) { 2712 /* 2713 * We clear vpi_events so that we don't 2714 * call selwakeup() twice if two events are 2715 * posted before the polling process(es) is 2716 * awakened. This also ensures that we take at 2717 * most one selwakeup() if the polling process 2718 * is no longer interested. However, it does 2719 * mean that only one event can be noticed at 2720 * a time. (Perhaps we should only clear those 2721 * event bits which we note?) XXX 2722 / 2723* vp->v_pollinfo.vpi_events = 0; /* &= ~events ??? / 2724* vp->v_pollinfo.vpi_revents \|= events; 2725 selwakeup(&vp->v_pollinfo.vpi_selinfo); 2726 } 2727 simple_unlock(&vp->v_pollinfo.vpi_lock); 2728} 2729 2730/* 2731 * Wake up anyone polling on vp because it is being revoked. 2732 * This depends on dead_poll() returning POLLHUP for correct 2733 * behavior. 2734 / 2735void 2736vn_pollgone(vp) 2737* struct vnode vp; 2738{ 2739* simple_lock(&vp->v_pollinfo.vpi_lock); 2740 if (vp->v_pollinfo.vpi_events) { 2741 vp->v_pollinfo.vpi_events = 0; 2742 selwakeup(&vp->v_pollinfo.vpi_selinfo); 2743 } 2744 simple_unlock(&vp->v_pollinfo.vpi_lock); 2745} 2746 2747 2748 2749/* 2750 * Routine to create and manage a filesystem syncer vnode. 2751 / 2752#define sync_close ((int () __P((struct vop_close_args )))nullop) 2753static int sync_fsync __P((struct vop_fsync_args )); 2754static int sync_inactive __P((struct vop_inactive_args )); 2755static int sync_reclaim __P((struct vop_reclaim_args )); 2756#define sync_lock ((int () __P((struct vop_lock_args )))vop_nolock) 2757#define sync_unlock ((int () __P((struct vop_unlock_args )))vop_nounlock) 2758static int sync_print __P((struct vop_print_args )); 2759#define sync_islocked ((int() __P((struct vop_islocked_args )))vop_noislocked) 2760* 2761static vop_t *sync_vnodeop_p; 2762static struct vnodeopv_entry_desc sync_vnodeop_entries[] = { 2763* { &vop_default_desc, (vop_t ) vop_eopnotsupp }, 2764* { &vop_close_desc, (vop_t ) sync_close }, / close / 2765* { &vop_fsync_desc, (vop_t ) sync_fsync }, / fsync / 2766* { &vop_inactive_desc, (vop_t ) sync_inactive }, / inactive / 2767* { &vop_reclaim_desc, (vop_t ) sync_reclaim }, / reclaim / 2768* { &vop_lock_desc, (vop_t ) sync_lock }, / lock / 2769* { &vop_unlock_desc, (vop_t ) sync_unlock }, / unlock / 2770* { &vop_print_desc, (vop_t ) sync_print }, / print / 2771* { &vop_islocked_desc, (vop_t ) sync_islocked }, / islocked / 2772* { NULL, NULL } 2773}; 2774static struct vnodeopv_desc sync_vnodeop_opv_desc = 2775 { &sync_vnodeop_p, sync_vnodeop_entries }; 2776 2777VNODEOP_SET(sync_vnodeop_opv_desc); 2778 2779/* 2780 * Create a new filesystem syncer vnode for the specified mount point. 2781 / 2782int 2783vfs_allocate_syncvnode(mp) 2784* struct mount mp; 2785{ 2786* struct vnode vp; 2787* static long start, incr, next; 2788 int error; 2789 2790 /* Allocate a new vnode / 2791* if ((error = getnewvnode(VT_VFS, mp, sync_vnodeop_p, &vp)) != 0) { 2792 mp->mnt_syncer = NULL; 2793 return (error); 2794 } 2795 vp->v_type = VNON; 2796 /* 2797 * Place the vnode onto the syncer worklist. We attempt to 2798 * scatter them about on the list so that they will go off 2799 * at evenly distributed times even if all the filesystems 2800 * are mounted at once. 2801 / 2802* next += incr; 2803 if (next == 0 \|\| next > syncer_maxdelay) { 2804 start /= 2; 2805 incr /= 2; 2806 if (start == 0) { 2807 start = syncer_maxdelay / 2; 2808 incr = syncer_maxdelay; 2809 } 2810 next = start; 2811 } 2812 vn_syncer_add_to_worklist(vp, syncdelay > 0 ? next % syncdelay : 0); 2813 mp->mnt_syncer = vp; 2814 return (0); 2815} 2816 2817/* 2818 * Do a lazy sync of the filesystem. 2819 / 2820static int 2821sync_fsync(ap) 2822* struct vop_fsync_args /* { 2823 struct vnode a_vp; 2824* struct ucred a_cred; 2825* int a_waitfor; 2826 struct proc a_p; 2827* } / ap; 2828{ 2829 struct vnode syncvp = ap->a_vp; 2830* struct mount mp = syncvp->v_mount; 2831* struct proc p = ap->a_p; 2832* int asyncflag; 2833 2834 /* 2835 * We only need to do something if this is a lazy evaluation. 2836 / 2837* if (ap->a_waitfor != MNT_LAZY) 2838 return (0); 2839 2840 /* 2841 * Move ourselves to the back of the sync list. 2842 / 2843* vn_syncer_add_to_worklist(syncvp, syncdelay); 2844 2845 /* 2846 * Walk the list of vnodes pushing all that are dirty and 2847 * not already on the sync list. 2848 / 2849* simple_lock(&mountlist_slock); 2850 if (vfs_busy(mp, LK_EXCLUSIVE \| LK_NOWAIT, &mountlist_slock, p) != 0) { 2851 simple_unlock(&mountlist_slock); 2852 return (0); 2853 } 2854 asyncflag = mp->mnt_flag & MNT_ASYNC; 2855 mp->mnt_flag &= ~MNT_ASYNC; 2856 vfs_msync(mp, MNT_NOWAIT); 2857 VFS_SYNC(mp, MNT_LAZY, ap->a_cred, p); 2858 if (asyncflag) 2859 mp->mnt_flag \|= MNT_ASYNC; 2860 vfs_unbusy(mp, p); 2861 return (0); 2862} 2863 2864/* 2865 * The syncer vnode is no referenced. 2866 / 2867static int 2868sync_inactive(ap) 2869* struct vop_inactive_args /* { 2870 struct vnode a_vp; 2871* struct proc a_p; 2872* } / ap; 2873{ 2874 2875 vgone(ap->a_vp); 2876 return (0); 2877} 2878 2879/* 2880 * The syncer vnode is no longer needed and is being decommissioned. 2881 * 2882 * Modifications to the worklist must be protected at splbio(). 2883 / 2884static int 2885sync_reclaim(ap) 2886* struct vop_reclaim_args /* { 2887 struct vnode a_vp; 2888* } / ap; 2889{ 2890 struct vnode vp = ap->a_vp; 2891* int s; 2892 2893 s = splbio(); 2894 vp->v_mount->mnt_syncer = NULL; 2895 if (vp->v_flag & VONWORKLST) { 2896 LIST_REMOVE(vp, v_synclist); 2897 vp->v_flag &= ~VONWORKLST; 2898 } 2899 splx(s); 2900 2901 return (0); 2902} 2903 2904/* 2905 * Print out a syncer vnode. 2906 / 2907static int 2908sync_print(ap) 2909* struct vop_print_args /* { 2910 struct vnode a_vp; 2911* } / ap; 2912{ 2913 struct vnode vp = ap->a_vp; 2914* 2915 printf("syncer vnode"); 2916 if (vp->v_vnlock != NULL) 2917 lockmgr_printinfo(vp->v_vnlock); 2918 printf("\n"); 2919 return (0); 2920}	1287 } 1288 1289 simple_unlock(&spechash_slock); 1290 vpp = nvp; 1291* if (vp != NULLVP) { 1292 nvp->v_flag \|= VALIASED; 1293 vp->v_flag \|= VALIASED; 1294 vput(vp); 1295 } 1296 return (NULLVP); 1297 } 1298 /* 1299 * if ( vp && (vp->v_tag == VT_NULL)) 1300 * We have a vnode alias, but it is a trashed. 1301 * Make it look like it's newley allocated. (by getnewvnode()) 1302 * The caller should use this instead. 1303 / 1304* simple_unlock(&spechash_slock); 1305 VOP_UNLOCK(vp, 0, p); 1306 simple_lock(&vp->v_interlock); 1307 vclean(vp, 0, p); 1308 vp->v_op = nvp->v_op; 1309 vp->v_tag = nvp->v_tag; 1310 nvp->v_type = VNON; 1311 insmntque(vp, mp); 1312 return (vp); 1313} 1314 1315/* 1316 * Grab a particular vnode from the free list, increment its 1317 * reference count and lock it. The vnode lock bit is set the 1318 * vnode is being eliminated in vgone. The process is awakened 1319 * when the transition is completed, and an error returned to 1320 * indicate that the vnode is no longer usable (possibly having 1321 * been changed to a new file system type). 1322 / 1323int 1324vget(vp, flags, p) 1325* register struct vnode vp; 1326* int flags; 1327 struct proc p; 1328{ 1329* int error; 1330 1331 /* 1332 * If the vnode is in the process of being cleaned out for 1333 * another use, we wait for the cleaning to finish and then 1334 * return failure. Cleaning is determined by checking that 1335 * the VXLOCK flag is set. 1336 / 1337* if ((flags & LK_INTERLOCK) == 0) { 1338 simple_lock(&vp->v_interlock); 1339 } 1340 if (vp->v_flag & VXLOCK) { 1341 vp->v_flag \|= VXWANT; 1342 simple_unlock(&vp->v_interlock); 1343 tsleep((caddr_t)vp, PINOD, "vget", 0); 1344 return (ENOENT); 1345 } 1346 1347 vp->v_usecount++; 1348 1349 if (VSHOULDBUSY(vp)) 1350 vbusy(vp); 1351 if (flags & LK_TYPE_MASK) { 1352 if ((error = vn_lock(vp, flags \| LK_INTERLOCK, p)) != 0) { 1353 /* 1354 * must expand vrele here because we do not want 1355 * to call VOP_INACTIVE if the reference count 1356 * drops back to zero since it was never really 1357 * active. We must remove it from the free list 1358 * before sleeping so that multiple processes do 1359 * not try to recycle it. 1360 / 1361* simple_lock(&vp->v_interlock); 1362 vp->v_usecount--; 1363 if (VSHOULDFREE(vp)) 1364 vfree(vp); 1365 simple_unlock(&vp->v_interlock); 1366 } 1367 return (error); 1368 } 1369 simple_unlock(&vp->v_interlock); 1370 return (0); 1371} 1372 1373void 1374vref(struct vnode vp) 1375{ 1376* simple_lock(&vp->v_interlock); 1377 vp->v_usecount++; 1378 simple_unlock(&vp->v_interlock); 1379} 1380 1381/* 1382 * Vnode put/release. 1383 * If count drops to zero, call inactive routine and return to freelist. 1384 / 1385void 1386vrele(vp) 1387* struct vnode vp; 1388{ 1389* struct proc p = curproc; / XXX / 1390* 1391 KASSERT(vp != NULL, ("vrele: null vp")); 1392 1393 simple_lock(&vp->v_interlock); 1394 1395 if (vp->v_usecount > 1) { 1396 1397 vp->v_usecount--; 1398 simple_unlock(&vp->v_interlock); 1399 1400 return; 1401 } 1402 1403 if (vp->v_usecount == 1) { 1404 1405 vp->v_usecount--; 1406 if (VSHOULDFREE(vp)) 1407 vfree(vp); 1408 /* 1409 * If we are doing a vput, the node is already locked, and we must 1410 * call VOP_INACTIVE with the node locked. So, in the case of 1411 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE. 1412 / 1413* if (vn_lock(vp, LK_EXCLUSIVE \| LK_INTERLOCK, p) == 0) { 1414 VOP_INACTIVE(vp, p); 1415 } 1416 1417 } else { 1418#ifdef DIAGNOSTIC 1419 vprint("vrele: negative ref count", vp); 1420 simple_unlock(&vp->v_interlock); 1421#endif 1422 panic("vrele: negative ref cnt"); 1423 } 1424} 1425 1426void 1427vput(vp) 1428 struct vnode vp; 1429{ 1430* struct proc p = curproc; / XXX / 1431* 1432 KASSERT(vp != NULL, ("vput: null vp")); 1433 1434 simple_lock(&vp->v_interlock); 1435 1436 if (vp->v_usecount > 1) { 1437 1438 vp->v_usecount--; 1439 VOP_UNLOCK(vp, LK_INTERLOCK, p); 1440 return; 1441 1442 } 1443 1444 if (vp->v_usecount == 1) { 1445 1446 vp->v_usecount--; 1447 if (VSHOULDFREE(vp)) 1448 vfree(vp); 1449 /* 1450 * If we are doing a vput, the node is already locked, and we must 1451 * call VOP_INACTIVE with the node locked. So, in the case of 1452 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE. 1453 / 1454* simple_unlock(&vp->v_interlock); 1455 VOP_INACTIVE(vp, p); 1456 1457 } else { 1458#ifdef DIAGNOSTIC 1459 vprint("vput: negative ref count", vp); 1460#endif 1461 panic("vput: negative ref cnt"); 1462 } 1463} 1464 1465/* 1466 * Somebody doesn't want the vnode recycled. 1467 / 1468void 1469vhold(vp) 1470* register struct vnode vp; 1471{ 1472* int s; 1473 1474 s = splbio(); 1475 vp->v_holdcnt++; 1476 if (VSHOULDBUSY(vp)) 1477 vbusy(vp); 1478 splx(s); 1479} 1480 1481/* 1482 * One less who cares about this vnode. 1483 / 1484void 1485vdrop(vp) 1486* register struct vnode vp; 1487{ 1488* int s; 1489 1490 s = splbio(); 1491 if (vp->v_holdcnt <= 0) 1492 panic("vdrop: holdcnt"); 1493 vp->v_holdcnt--; 1494 if (VSHOULDFREE(vp)) 1495 vfree(vp); 1496 splx(s); 1497} 1498 1499/* 1500 * Remove any vnodes in the vnode table belonging to mount point mp. 1501 * 1502 * If MNT_NOFORCE is specified, there should not be any active ones, 1503 * return error if any are found (nb: this is a user error, not a 1504 * system error). If MNT_FORCE is specified, detach any active vnodes 1505 * that are found. 1506 / 1507#ifdef DIAGNOSTIC 1508static int busyprt = 0; / print out busy vnodes / 1509SYSCTL_INT(_debug, OID_AUTO, busyprt, CTLFLAG_RW, &busyprt, 0, ""); 1510#endif 1511* 1512int 1513vflush(mp, skipvp, flags) 1514 struct mount mp; 1515* struct vnode skipvp; 1516* int flags; 1517{ 1518 struct proc p = curproc; / XXX / 1519* struct vnode vp, nvp; 1520 int busy = 0; 1521 1522 simple_lock(&mntvnode_slock); 1523loop: 1524 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { 1525 /* 1526 * Make sure this vnode wasn't reclaimed in getnewvnode(). 1527 * Start over if it has (it won't be on the list anymore). 1528 / 1529* if (vp->v_mount != mp) 1530 goto loop; 1531 nvp = vp->v_mntvnodes.le_next; 1532 /* 1533 * Skip over a selected vnode. 1534 / 1535* if (vp == skipvp) 1536 continue; 1537 1538 simple_lock(&vp->v_interlock); 1539 /* 1540 * Skip over a vnodes marked VSYSTEM. 1541 / 1542* if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { 1543 simple_unlock(&vp->v_interlock); 1544 continue; 1545 } 1546 /* 1547 * If WRITECLOSE is set, only flush out regular file vnodes 1548 * open for writing. 1549 / 1550* if ((flags & WRITECLOSE) && 1551 (vp->v_writecount == 0 \|\| vp->v_type != VREG)) { 1552 simple_unlock(&vp->v_interlock); 1553 continue; 1554 } 1555 1556 /* 1557 * With v_usecount == 0, all we need to do is clear out the 1558 * vnode data structures and we are done. 1559 / 1560* if (vp->v_usecount == 0) { 1561 simple_unlock(&mntvnode_slock); 1562 vgonel(vp, p); 1563 simple_lock(&mntvnode_slock); 1564 continue; 1565 } 1566 1567 /* 1568 * If FORCECLOSE is set, forcibly close the vnode. For block 1569 * or character devices, revert to an anonymous device. For 1570 * all other files, just kill them. 1571 / 1572* if (flags & FORCECLOSE) { 1573 simple_unlock(&mntvnode_slock); 1574 if (vp->v_type != VBLK && vp->v_type != VCHR) { 1575 vgonel(vp, p); 1576 } else { 1577 vclean(vp, 0, p); 1578 vp->v_op = spec_vnodeop_p; 1579 insmntque(vp, (struct mount ) 0); 1580* } 1581 simple_lock(&mntvnode_slock); 1582 continue; 1583 } 1584#ifdef DIAGNOSTIC 1585 if (busyprt) 1586 vprint("vflush: busy vnode", vp); 1587#endif 1588 simple_unlock(&vp->v_interlock); 1589 busy++; 1590 } 1591 simple_unlock(&mntvnode_slock); 1592 if (busy) 1593 return (EBUSY); 1594 return (0); 1595} 1596 1597/* 1598 * Disassociate the underlying file system from a vnode. 1599 / 1600static void 1601vclean(vp, flags, p) 1602* struct vnode vp; 1603* int flags; 1604 struct proc p; 1605{ 1606* int active; 1607 vm_object_t obj; 1608 1609 /* 1610 * Check to see if the vnode is in use. If so we have to reference it 1611 * before we clean it out so that its count cannot fall to zero and 1612 * generate a race against ourselves to recycle it. 1613 / 1614* if ((active = vp->v_usecount)) 1615 vp->v_usecount++; 1616 1617 /* 1618 * Prevent the vnode from being recycled or brought into use while we 1619 * clean it out. 1620 / 1621* if (vp->v_flag & VXLOCK) 1622 panic("vclean: deadlock"); 1623 vp->v_flag \|= VXLOCK; 1624 /* 1625 * Even if the count is zero, the VOP_INACTIVE routine may still 1626 * have the object locked while it cleans it out. The VOP_LOCK 1627 * ensures that the VOP_INACTIVE routine is done with its work. 1628 * For active vnodes, it ensures that no other activity can 1629 * occur while the underlying object is being cleaned out. 1630 / 1631* VOP_LOCK(vp, LK_DRAIN \| LK_INTERLOCK, p); 1632 1633 /* 1634 * Clean out any buffers associated with the vnode. 1635 / 1636* vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0); 1637 if ((obj = vp->v_object) != NULL) { 1638 if (obj->ref_count == 0) { 1639 /* 1640 * This is a normal way of shutting down the object/vnode 1641 * association. 1642 / 1643* vm_object_terminate(obj); 1644 } else { 1645 /* 1646 * Woe to the process that tries to page now :-). 1647 / 1648* vm_pager_deallocate(obj); 1649 } 1650 } 1651 1652 /* 1653 * If purging an active vnode, it must be closed and 1654 * deactivated before being reclaimed. Note that the 1655 * VOP_INACTIVE will unlock the vnode. 1656 / 1657* if (active) { 1658 if (flags & DOCLOSE) 1659 VOP_CLOSE(vp, FNONBLOCK, NOCRED, p); 1660 VOP_INACTIVE(vp, p); 1661 } else { 1662 /* 1663 * Any other processes trying to obtain this lock must first 1664 * wait for VXLOCK to clear, then call the new lock operation. 1665 / 1666* VOP_UNLOCK(vp, 0, p); 1667 } 1668 /* 1669 * Reclaim the vnode. 1670 / 1671* if (VOP_RECLAIM(vp, p)) 1672 panic("vclean: cannot reclaim"); 1673 1674 if (active) 1675 vrele(vp); 1676 1677 cache_purge(vp); 1678 if (vp->v_vnlock) { 1679#if 0 /* This is the only place we have LK_DRAINED in the entire kernel ??? / 1680#ifdef DIAGNOSTIC 1681* if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0) 1682 vprint("vclean: lock not drained", vp); 1683#endif 1684#endif 1685 FREE(vp->v_vnlock, M_VNODE); 1686 vp->v_vnlock = NULL; 1687 } 1688 1689 if (VSHOULDFREE(vp)) 1690 vfree(vp); 1691 1692 /* 1693 * Done with purge, notify sleepers of the grim news. 1694 / 1695* vp->v_op = dead_vnodeop_p; 1696 vn_pollgone(vp); 1697 vp->v_tag = VT_NON; 1698 vp->v_flag &= ~VXLOCK; 1699 if (vp->v_flag & VXWANT) { 1700 vp->v_flag &= ~VXWANT; 1701 wakeup((caddr_t) vp); 1702 } 1703} 1704 1705/* 1706 * Eliminate all activity associated with the requested vnode 1707 * and with all vnodes aliased to the requested vnode. 1708 / 1709int 1710vop_revoke(ap) 1711* struct vop_revoke_args /* { 1712 struct vnode a_vp; 1713* int a_flags; 1714 } / ap; 1715{ 1716 struct vnode vp, vq; 1717 struct proc p = curproc; / XXX / 1718* 1719 KASSERT((ap->a_flags & REVOKEALL) != 0, ("vop_revoke")); 1720 1721 vp = ap->a_vp; 1722 simple_lock(&vp->v_interlock); 1723 1724 if (vp->v_flag & VALIASED) { 1725 /* 1726 * If a vgone (or vclean) is already in progress, 1727 * wait until it is done and return. 1728 / 1729* if (vp->v_flag & VXLOCK) { 1730 vp->v_flag \|= VXWANT; 1731 simple_unlock(&vp->v_interlock); 1732 tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0); 1733 return (0); 1734 } 1735 /* 1736 * Ensure that vp will not be vgone'd while we 1737 * are eliminating its aliases. 1738 / 1739* vp->v_flag \|= VXLOCK; 1740 simple_unlock(&vp->v_interlock); 1741 while (vp->v_flag & VALIASED) { 1742 simple_lock(&spechash_slock); 1743 for (vq = vp->v_hashchain; vq; vq = vq->v_specnext) { 1744* if (vq->v_rdev != vp->v_rdev \|\| 1745 vq->v_type != vp->v_type \|\| vp == vq) 1746 continue; 1747 simple_unlock(&spechash_slock); 1748 vgone(vq); 1749 break; 1750 } 1751 if (vq == NULLVP) { 1752 simple_unlock(&spechash_slock); 1753 } 1754 } 1755 /* 1756 * Remove the lock so that vgone below will 1757 * really eliminate the vnode after which time 1758 * vgone will awaken any sleepers. 1759 / 1760* simple_lock(&vp->v_interlock); 1761 vp->v_flag &= ~VXLOCK; 1762 if (vp->v_flag & VXWANT) { 1763 vp->v_flag &= ~VXWANT; 1764 wakeup(vp); 1765 } 1766 } 1767 vgonel(vp, p); 1768 return (0); 1769} 1770 1771/* 1772 * Recycle an unused vnode to the front of the free list. 1773 * Release the passed interlock if the vnode will be recycled. 1774 / 1775int 1776vrecycle(vp, inter_lkp, p) 1777* struct vnode vp; 1778* struct simplelock inter_lkp; 1779* struct proc p; 1780{ 1781* 1782 simple_lock(&vp->v_interlock); 1783 if (vp->v_usecount == 0) { 1784 if (inter_lkp) { 1785 simple_unlock(inter_lkp); 1786 } 1787 vgonel(vp, p); 1788 return (1); 1789 } 1790 simple_unlock(&vp->v_interlock); 1791 return (0); 1792} 1793 1794/* 1795 * Eliminate all activity associated with a vnode 1796 * in preparation for reuse. 1797 / 1798void 1799vgone(vp) 1800* register struct vnode vp; 1801{ 1802* struct proc p = curproc; / XXX / 1803* 1804 simple_lock(&vp->v_interlock); 1805 vgonel(vp, p); 1806} 1807 1808/* 1809 * vgone, with the vp interlock held. 1810 / 1811static void 1812vgonel(vp, p) 1813* struct vnode vp; 1814* struct proc p; 1815{ 1816* int s; 1817 struct vnode vq; 1818* struct vnode vx; 1819* 1820 /* 1821 * If a vgone (or vclean) is already in progress, 1822 * wait until it is done and return. 1823 / 1824* if (vp->v_flag & VXLOCK) { 1825 vp->v_flag \|= VXWANT; 1826 simple_unlock(&vp->v_interlock); 1827 tsleep((caddr_t)vp, PINOD, "vgone", 0); 1828 return; 1829 } 1830 1831 /* 1832 * Clean out the filesystem specific data. 1833 / 1834* vclean(vp, DOCLOSE, p); 1835 simple_lock(&vp->v_interlock); 1836 1837 /* 1838 * Delete from old mount point vnode list, if on one. 1839 / 1840* if (vp->v_mount != NULL) 1841 insmntque(vp, (struct mount )0); 1842* /* 1843 * If special device, remove it from special device alias list 1844 * if it is on one. 1845 / 1846* if ((vp->v_type == VBLK \|\| vp->v_type == VCHR) && vp->v_specinfo != 0) { 1847 simple_lock(&spechash_slock); 1848 if (vp->v_hashchain == vp) { 1849* vp->v_hashchain = vp->v_specnext; 1850* } else { 1851 for (vq = vp->v_hashchain; vq; vq = vq->v_specnext) { 1852* if (vq->v_specnext != vp) 1853 continue; 1854 vq->v_specnext = vp->v_specnext; 1855 break; 1856 } 1857 if (vq == NULL) 1858 panic("missing bdev"); 1859 } 1860 if (vp->v_flag & VALIASED) { 1861 vx = NULL; 1862 for (vq = vp->v_hashchain; vq; vq = vq->v_specnext) { 1863* if (vq->v_rdev != vp->v_rdev \|\| 1864 vq->v_type != vp->v_type) 1865 continue; 1866 if (vx) 1867 break; 1868 vx = vq; 1869 } 1870 if (vx == NULL) 1871 panic("missing alias"); 1872 if (vq == NULL) 1873 vx->v_flag &= ~VALIASED; 1874 vp->v_flag &= ~VALIASED; 1875 } 1876 simple_unlock(&spechash_slock); 1877 FREE(vp->v_specinfo, M_VNODE); 1878 vp->v_specinfo = NULL; 1879 } 1880 1881 /* 1882 * If it is on the freelist and not already at the head, 1883 * move it to the head of the list. The test of the back 1884 * pointer and the reference count of zero is because 1885 * it will be removed from the free list by getnewvnode, 1886 * but will not have its reference count incremented until 1887 * after calling vgone. If the reference count were 1888 * incremented first, vgone would (incorrectly) try to 1889 * close the previous instance of the underlying object. 1890 / 1891* if (vp->v_usecount == 0 && !(vp->v_flag & VDOOMED)) { 1892 s = splbio(); 1893 simple_lock(&vnode_free_list_slock); 1894 if (vp->v_flag & VFREE) { 1895 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1896 } else if (vp->v_flag & VTBFREE) { 1897 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 1898 vp->v_flag &= ~VTBFREE; 1899 freevnodes++; 1900 } else 1901 freevnodes++; 1902 vp->v_flag \|= VFREE; 1903 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 1904 simple_unlock(&vnode_free_list_slock); 1905 splx(s); 1906 } 1907 1908 vp->v_type = VBAD; 1909 simple_unlock(&vp->v_interlock); 1910} 1911 1912/* 1913 * Lookup a vnode by device number. 1914 / 1915int 1916vfinddev(dev, type, vpp) 1917* dev_t dev; 1918 enum vtype type; 1919 struct vnode *vpp; 1920{ 1921* register struct vnode vp; 1922* int rc = 0; 1923 1924 simple_lock(&spechash_slock); 1925 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 1926 if (dev != vp->v_rdev \|\| type != vp->v_type) 1927 continue; 1928 vpp = vp; 1929* rc = 1; 1930 break; 1931 } 1932 simple_unlock(&spechash_slock); 1933 return (rc); 1934} 1935 1936/* 1937 * Calculate the total number of references to a special device. 1938 / 1939int 1940vcount(vp) 1941* register struct vnode vp; 1942{ 1943* struct vnode vq, vnext; 1944 int count; 1945 1946loop: 1947 if ((vp->v_flag & VALIASED) == 0) 1948 return (vp->v_usecount); 1949 simple_lock(&spechash_slock); 1950 for (count = 0, vq = vp->v_hashchain; vq; vq = vnext) { 1951* vnext = vq->v_specnext; 1952 if (vq->v_rdev != vp->v_rdev \|\| vq->v_type != vp->v_type) 1953 continue; 1954 /* 1955 * Alias, but not in use, so flush it out. 1956 / 1957* if (vq->v_usecount == 0 && vq != vp) { 1958 simple_unlock(&spechash_slock); 1959 vgone(vq); 1960 goto loop; 1961 } 1962 count += vq->v_usecount; 1963 } 1964 simple_unlock(&spechash_slock); 1965 return (count); 1966} 1967/* 1968 * Print out a description of a vnode. 1969 / 1970static char typename[] = 1971{"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"}; 1972 1973void 1974vprint(label, vp) 1975 char label; 1976* register struct vnode vp; 1977{ 1978* char buf[96]; 1979 1980 if (label != NULL) 1981 printf("%s: %p: ", label, (void )vp); 1982* else 1983 printf("%p: ", (void )vp); 1984* printf("type %s, usecount %d, writecount %d, refcount %d,", 1985 typename[vp->v_type], vp->v_usecount, vp->v_writecount, 1986 vp->v_holdcnt); 1987 buf[0] = '\0'; 1988 if (vp->v_flag & VROOT) 1989 strcat(buf, "\|VROOT"); 1990 if (vp->v_flag & VTEXT) 1991 strcat(buf, "\|VTEXT"); 1992 if (vp->v_flag & VSYSTEM) 1993 strcat(buf, "\|VSYSTEM"); 1994 if (vp->v_flag & VXLOCK) 1995 strcat(buf, "\|VXLOCK"); 1996 if (vp->v_flag & VXWANT) 1997 strcat(buf, "\|VXWANT"); 1998 if (vp->v_flag & VBWAIT) 1999 strcat(buf, "\|VBWAIT"); 2000 if (vp->v_flag & VALIASED) 2001 strcat(buf, "\|VALIASED"); 2002 if (vp->v_flag & VDOOMED) 2003 strcat(buf, "\|VDOOMED"); 2004 if (vp->v_flag & VFREE) 2005 strcat(buf, "\|VFREE"); 2006 if (vp->v_flag & VOBJBUF) 2007 strcat(buf, "\|VOBJBUF"); 2008 if (buf[0] != '\0') 2009 printf(" flags (%s)", &buf[1]); 2010 if (vp->v_data == NULL) { 2011 printf("\n"); 2012 } else { 2013 printf("\n\t"); 2014 VOP_PRINT(vp); 2015 } 2016} 2017 2018#ifdef DDB 2019#include <ddb/ddb.h> 2020/* 2021 * List all of the locked vnodes in the system. 2022 * Called when debugging the kernel. 2023 / 2024DB_SHOW_COMMAND(lockedvnodes, lockedvnodes) 2025{ 2026* struct proc p = curproc; / XXX / 2027* struct mount mp, nmp; 2028 struct vnode vp; 2029* 2030 printf("Locked vnodes\n"); 2031 simple_lock(&mountlist_slock); 2032 for (mp = mountlist.cqh_first; mp != (void )&mountlist; mp = nmp) { 2033* if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { 2034 nmp = mp->mnt_list.cqe_next; 2035 continue; 2036 } 2037 for (vp = mp->mnt_vnodelist.lh_first; 2038 vp != NULL; 2039 vp = vp->v_mntvnodes.le_next) { 2040 if (VOP_ISLOCKED(vp)) 2041 vprint((char )0, vp); 2042* } 2043 simple_lock(&mountlist_slock); 2044 nmp = mp->mnt_list.cqe_next; 2045 vfs_unbusy(mp, p); 2046 } 2047 simple_unlock(&mountlist_slock); 2048} 2049#endif 2050 2051/* 2052 * Top level filesystem related information gathering. 2053 / 2054static int sysctl_ovfs_conf __P(SYSCTL_HANDLER_ARGS); 2055* 2056static int 2057vfs_sysctl SYSCTL_HANDLER_ARGS 2058{ 2059 int name = (int )arg1 - 1; /* XXX / 2060* u_int namelen = arg2 + 1; /* XXX / 2061* struct vfsconf vfsp; 2062* 2063#if 1 \|\| defined(COMPAT_PRELITE2) 2064 /* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. / 2065* if (namelen == 1) 2066 return (sysctl_ovfs_conf(oidp, arg1, arg2, req)); 2067#endif 2068 2069#ifdef notyet 2070 /* all sysctl names at this level are at least name and field / 2071* if (namelen < 2) 2072 return (ENOTDIR); /* overloaded / 2073* if (name[0] != VFS_GENERIC) { 2074 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 2075 if (vfsp->vfc_typenum == name[0]) 2076 break; 2077 if (vfsp == NULL) 2078 return (EOPNOTSUPP); 2079 return ((vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1, 2080* oldp, oldlenp, newp, newlen, p)); 2081 } 2082#endif 2083 switch (name[1]) { 2084 case VFS_MAXTYPENUM: 2085 if (namelen != 2) 2086 return (ENOTDIR); 2087 return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int))); 2088 case VFS_CONF: 2089 if (namelen != 3) 2090 return (ENOTDIR); /* overloaded / 2091* for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 2092 if (vfsp->vfc_typenum == name[2]) 2093 break; 2094 if (vfsp == NULL) 2095 return (EOPNOTSUPP); 2096 return (SYSCTL_OUT(req, vfsp, sizeof vfsp)); 2097* } 2098 return (EOPNOTSUPP); 2099} 2100 2101SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD, vfs_sysctl, 2102 "Generic filesystem"); 2103 2104#if 1 \|\| defined(COMPAT_PRELITE2) 2105 2106static int 2107sysctl_ovfs_conf SYSCTL_HANDLER_ARGS 2108{ 2109 int error; 2110 struct vfsconf vfsp; 2111* struct ovfsconf ovfs; 2112 2113 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 2114 ovfs.vfc_vfsops = vfsp->vfc_vfsops; /* XXX used as flag / 2115* strcpy(ovfs.vfc_name, vfsp->vfc_name); 2116 ovfs.vfc_index = vfsp->vfc_typenum; 2117 ovfs.vfc_refcount = vfsp->vfc_refcount; 2118 ovfs.vfc_flags = vfsp->vfc_flags; 2119 error = SYSCTL_OUT(req, &ovfs, sizeof ovfs); 2120 if (error) 2121 return error; 2122 } 2123 return 0; 2124} 2125 2126#endif /* 1 \|\| COMPAT_PRELITE2 / 2127* 2128#if 0 2129#define KINFO_VNODESLOP 10 2130/* 2131 * Dump vnode list (via sysctl). 2132 * Copyout address of vnode followed by vnode. 2133 / 2134/ ARGSUSED / 2135static int 2136sysctl_vnode SYSCTL_HANDLER_ARGS 2137{ 2138* struct proc p = curproc; / XXX / 2139* struct mount mp, nmp; 2140 struct vnode nvp, vp; 2141 int error; 2142 2143#define VPTRSZ sizeof (struct vnode ) 2144#define VNODESZ sizeof (struct vnode) 2145* 2146 req->lock = 0; 2147 if (!req->oldptr) /* Make an estimate / 2148* return (SYSCTL_OUT(req, 0, 2149 (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ))); 2150 2151 simple_lock(&mountlist_slock); 2152 for (mp = mountlist.cqh_first; mp != (void )&mountlist; mp = nmp) { 2153* if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { 2154 nmp = mp->mnt_list.cqe_next; 2155 continue; 2156 } 2157again: 2158 simple_lock(&mntvnode_slock); 2159 for (vp = mp->mnt_vnodelist.lh_first; 2160 vp != NULL; 2161 vp = nvp) { 2162 /* 2163 * Check that the vp is still associated with 2164 * this filesystem. RACE: could have been 2165 * recycled onto the same filesystem. 2166 / 2167* if (vp->v_mount != mp) { 2168 simple_unlock(&mntvnode_slock); 2169 goto again; 2170 } 2171 nvp = vp->v_mntvnodes.le_next; 2172 simple_unlock(&mntvnode_slock); 2173 if ((error = SYSCTL_OUT(req, &vp, VPTRSZ)) \|\| 2174 (error = SYSCTL_OUT(req, vp, VNODESZ))) 2175 return (error); 2176 simple_lock(&mntvnode_slock); 2177 } 2178 simple_unlock(&mntvnode_slock); 2179 simple_lock(&mountlist_slock); 2180 nmp = mp->mnt_list.cqe_next; 2181 vfs_unbusy(mp, p); 2182 } 2183 simple_unlock(&mountlist_slock); 2184 2185 return (0); 2186} 2187#endif 2188 2189/* 2190 * XXX 2191 * Exporting the vnode list on large systems causes them to crash. 2192 * Exporting the vnode list on medium systems causes sysctl to coredump. 2193 / 2194#if 0 2195SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE\|CTLFLAG_RD, 2196* 0, 0, sysctl_vnode, "S,vnode", ""); 2197#endif 2198 2199/* 2200 * Check to see if a filesystem is mounted on a block device. 2201 / 2202int 2203vfs_mountedon(vp) 2204* struct vnode vp; 2205{ 2206* struct vnode vq; 2207* int error = 0; 2208 2209 if (vp->v_specmountpoint != NULL) 2210 return (EBUSY); 2211 if (vp->v_flag & VALIASED) { 2212 simple_lock(&spechash_slock); 2213 for (vq = vp->v_hashchain; vq; vq = vq->v_specnext) { 2214* if (vq->v_rdev != vp->v_rdev \|\| 2215 vq->v_type != vp->v_type) 2216 continue; 2217 if (vq->v_specmountpoint != NULL) { 2218 error = EBUSY; 2219 break; 2220 } 2221 } 2222 simple_unlock(&spechash_slock); 2223 } 2224 return (error); 2225} 2226 2227/* 2228 * Unmount all filesystems. The list is traversed in reverse order 2229 * of mounting to avoid dependencies. 2230 / 2231void 2232vfs_unmountall() 2233{ 2234* struct mount mp, nmp; 2235 struct proc p; 2236* int error; 2237 2238 if (curproc != NULL) 2239 p = curproc; 2240 else 2241 p = initproc; /* XXX XXX should this be proc0? / 2242* /* 2243 * Since this only runs when rebooting, it is not interlocked. 2244 / 2245* for (mp = mountlist.cqh_last; mp != (void )&mountlist; mp = nmp) { 2246* nmp = mp->mnt_list.cqe_prev; 2247 error = dounmount(mp, MNT_FORCE, p); 2248 if (error) { 2249 printf("unmount of %s failed (", 2250 mp->mnt_stat.f_mntonname); 2251 if (error == EBUSY) 2252 printf("BUSY)\n"); 2253 else 2254 printf("%d)\n", error); 2255 } 2256 } 2257} 2258 2259/* 2260 * Build hash lists of net addresses and hang them off the mount point. 2261 * Called by ufs_mount() to set up the lists of export addresses. 2262 / 2263static int 2264vfs_hang_addrlist(mp, nep, argp) 2265* struct mount mp; 2266* struct netexport nep; 2267* struct export_args argp; 2268{ 2269* register struct netcred np; 2270* register struct radix_node_head rnh; 2271* register int i; 2272 struct radix_node rn; 2273* struct sockaddr saddr, smask = 0; 2274 struct domain dom; 2275* int error; 2276 2277 if (argp->ex_addrlen == 0) { 2278 if (mp->mnt_flag & MNT_DEFEXPORTED) 2279 return (EPERM); 2280 np = &nep->ne_defexported; 2281 np->netc_exflags = argp->ex_flags; 2282 np->netc_anon = argp->ex_anon; 2283 np->netc_anon.cr_ref = 1; 2284 mp->mnt_flag \|= MNT_DEFEXPORTED; 2285 return (0); 2286 } 2287 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 2288 np = (struct netcred ) malloc(i, M_NETADDR, M_WAITOK); 2289* bzero((caddr_t) np, i); 2290 saddr = (struct sockaddr ) (np + 1); 2291* if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen))) 2292 goto out; 2293 if (saddr->sa_len > argp->ex_addrlen) 2294 saddr->sa_len = argp->ex_addrlen; 2295 if (argp->ex_masklen) { 2296 smask = (struct sockaddr ) ((caddr_t) saddr + argp->ex_addrlen); 2297* error = copyin(argp->ex_mask, (caddr_t) smask, argp->ex_masklen); 2298 if (error) 2299 goto out; 2300 if (smask->sa_len > argp->ex_masklen) 2301 smask->sa_len = argp->ex_masklen; 2302 } 2303 i = saddr->sa_family; 2304 if ((rnh = nep->ne_rtable[i]) == 0) { 2305 /* 2306 * Seems silly to initialize every AF when most are not used, 2307 * do so on demand here 2308 / 2309* for (dom = domains; dom; dom = dom->dom_next) 2310 if (dom->dom_family == i && dom->dom_rtattach) { 2311 dom->dom_rtattach((void *) &nep->ne_rtable[i], 2312* dom->dom_rtoffset); 2313 break; 2314 } 2315 if ((rnh = nep->ne_rtable[i]) == 0) { 2316 error = ENOBUFS; 2317 goto out; 2318 } 2319 } 2320 rn = (rnh->rnh_addaddr) ((caddr_t) saddr, (caddr_t) smask, rnh, 2321* np->netc_rnodes); 2322 if (rn == 0 \|\| np != (struct netcred ) rn) { / already exists / 2323* error = EPERM; 2324 goto out; 2325 } 2326 np->netc_exflags = argp->ex_flags; 2327 np->netc_anon = argp->ex_anon; 2328 np->netc_anon.cr_ref = 1; 2329 return (0); 2330out: 2331 free(np, M_NETADDR); 2332 return (error); 2333} 2334 2335/* ARGSUSED / 2336static int 2337vfs_free_netcred(rn, w) 2338* struct radix_node rn; 2339* void w; 2340{ 2341* register struct radix_node_head rnh = (struct radix_node_head ) w; 2342 2343 (rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh); 2344* free((caddr_t) rn, M_NETADDR); 2345 return (0); 2346} 2347 2348/* 2349 * Free the net address hash lists that are hanging off the mount points. 2350 / 2351static void 2352vfs_free_addrlist(nep) 2353* struct netexport nep; 2354{ 2355* register int i; 2356 register struct radix_node_head rnh; 2357* 2358 for (i = 0; i <= AF_MAX; i++) 2359 if ((rnh = nep->ne_rtable[i])) { 2360 (rnh->rnh_walktree) (rnh, vfs_free_netcred, 2361* (caddr_t) rnh); 2362 free((caddr_t) rnh, M_RTABLE); 2363 nep->ne_rtable[i] = 0; 2364 } 2365} 2366 2367int 2368vfs_export(mp, nep, argp) 2369 struct mount mp; 2370* struct netexport nep; 2371* struct export_args argp; 2372{ 2373* int error; 2374 2375 if (argp->ex_flags & MNT_DELEXPORT) { 2376 if (mp->mnt_flag & MNT_EXPUBLIC) { 2377 vfs_setpublicfs(NULL, NULL, NULL); 2378 mp->mnt_flag &= ~MNT_EXPUBLIC; 2379 } 2380 vfs_free_addrlist(nep); 2381 mp->mnt_flag &= ~(MNT_EXPORTED \| MNT_DEFEXPORTED); 2382 } 2383 if (argp->ex_flags & MNT_EXPORTED) { 2384 if (argp->ex_flags & MNT_EXPUBLIC) { 2385 if ((error = vfs_setpublicfs(mp, nep, argp)) != 0) 2386 return (error); 2387 mp->mnt_flag \|= MNT_EXPUBLIC; 2388 } 2389 if ((error = vfs_hang_addrlist(mp, nep, argp))) 2390 return (error); 2391 mp->mnt_flag \|= MNT_EXPORTED; 2392 } 2393 return (0); 2394} 2395 2396 2397/* 2398 * Set the publicly exported filesystem (WebNFS). Currently, only 2399 * one public filesystem is possible in the spec (RFC 2054 and 2055) 2400 / 2401int 2402vfs_setpublicfs(mp, nep, argp) 2403* struct mount mp; 2404* struct netexport nep; 2405* struct export_args argp; 2406{ 2407* int error; 2408 struct vnode rvp; 2409* char cp; 2410* 2411 /* 2412 * mp == NULL -> invalidate the current info, the FS is 2413 * no longer exported. May be called from either vfs_export 2414 * or unmount, so check if it hasn't already been done. 2415 / 2416* if (mp == NULL) { 2417 if (nfs_pub.np_valid) { 2418 nfs_pub.np_valid = 0; 2419 if (nfs_pub.np_index != NULL) { 2420 FREE(nfs_pub.np_index, M_TEMP); 2421 nfs_pub.np_index = NULL; 2422 } 2423 } 2424 return (0); 2425 } 2426 2427 /* 2428 * Only one allowed at a time. 2429 / 2430* if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount) 2431 return (EBUSY); 2432 2433 /* 2434 * Get real filehandle for root of exported FS. 2435 / 2436* bzero((caddr_t)&nfs_pub.np_handle, sizeof(nfs_pub.np_handle)); 2437 nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid; 2438 2439 if ((error = VFS_ROOT(mp, &rvp))) 2440 return (error); 2441 2442 if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid))) 2443 return (error); 2444 2445 vput(rvp); 2446 2447 /* 2448 * If an indexfile was specified, pull it in. 2449 / 2450* if (argp->ex_indexfile != NULL) { 2451 MALLOC(nfs_pub.np_index, char , MAXNAMLEN + 1, M_TEMP, 2452* M_WAITOK); 2453 error = copyinstr(argp->ex_indexfile, nfs_pub.np_index, 2454 MAXNAMLEN, (size_t )0); 2455* if (!error) { 2456 /* 2457 * Check for illegal filenames. 2458 / 2459* for (cp = nfs_pub.np_index; cp; cp++) { 2460* if (cp == '/') { 2461* error = EINVAL; 2462 break; 2463 } 2464 } 2465 } 2466 if (error) { 2467 FREE(nfs_pub.np_index, M_TEMP); 2468 return (error); 2469 } 2470 } 2471 2472 nfs_pub.np_mount = mp; 2473 nfs_pub.np_valid = 1; 2474 return (0); 2475} 2476 2477struct netcred * 2478vfs_export_lookup(mp, nep, nam) 2479 register struct mount mp; 2480* struct netexport nep; 2481* struct sockaddr nam; 2482{ 2483* register struct netcred np; 2484* register struct radix_node_head rnh; 2485* struct sockaddr saddr; 2486* 2487 np = NULL; 2488 if (mp->mnt_flag & MNT_EXPORTED) { 2489 /* 2490 * Lookup in the export list first. 2491 / 2492* if (nam != NULL) { 2493 saddr = nam; 2494 rnh = nep->ne_rtable[saddr->sa_family]; 2495 if (rnh != NULL) { 2496 np = (struct netcred ) 2497* (rnh->rnh_matchaddr)((caddr_t)saddr, 2498* rnh); 2499 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 2500 np = NULL; 2501 } 2502 } 2503 /* 2504 * If no address match, use the default if it exists. 2505 / 2506* if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 2507 np = &nep->ne_defexported; 2508 } 2509 return (np); 2510} 2511 2512/* 2513 * perform msync on all vnodes under a mount point 2514 * the mount point must be locked. 2515 / 2516void 2517vfs_msync(struct mount mp, int flags) { 2518 struct vnode vp, nvp; 2519 struct vm_object obj; 2520* int anyio, tries; 2521 2522 tries = 5; 2523loop: 2524 anyio = 0; 2525 for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { 2526 2527 nvp = vp->v_mntvnodes.le_next; 2528 2529 if (vp->v_mount != mp) { 2530 goto loop; 2531 } 2532 2533 if (vp->v_flag & VXLOCK) /* XXX: what if MNT_WAIT? / 2534* continue; 2535 2536 if (flags != MNT_WAIT) { 2537 obj = vp->v_object; 2538 if (obj == NULL \|\| (obj->flags & OBJ_MIGHTBEDIRTY) == 0) 2539 continue; 2540 if (VOP_ISLOCKED(vp)) 2541 continue; 2542 } 2543 2544 simple_lock(&vp->v_interlock); 2545 if (vp->v_object && 2546 (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) { 2547 if (!vget(vp, 2548 LK_INTERLOCK \| LK_EXCLUSIVE \| LK_RETRY \| LK_NOOBJ, curproc)) { 2549 if (vp->v_object) { 2550 vm_object_page_clean(vp->v_object, 0, 0, flags == MNT_WAIT ? OBJPC_SYNC : 0); 2551 anyio = 1; 2552 } 2553 vput(vp); 2554 } 2555 } else { 2556 simple_unlock(&vp->v_interlock); 2557 } 2558 } 2559 if (anyio && (--tries > 0)) 2560 goto loop; 2561} 2562 2563/* 2564 * Create the VM object needed for VMIO and mmap support. This 2565 * is done for all VREG files in the system. Some filesystems might 2566 * afford the additional metadata buffering capability of the 2567 * VMIO code by making the device node be VMIO mode also. 2568 * 2569 * vp must be locked when vfs_object_create is called. 2570 / 2571int 2572vfs_object_create(vp, p, cred) 2573* struct vnode vp; 2574* struct proc p; 2575* struct ucred cred; 2576{ 2577* struct vattr vat; 2578 vm_object_t object; 2579 int error = 0; 2580 2581 if ((vp->v_type != VREG) && (vp->v_type != VBLK)) 2582 return 0; 2583 2584retry: 2585 if ((object = vp->v_object) == NULL) { 2586 if (vp->v_type == VREG) { 2587 if ((error = VOP_GETATTR(vp, &vat, cred, p)) != 0) 2588 goto retn; 2589 object = vnode_pager_alloc(vp, vat.va_size, 0, 0); 2590 } else if (major(vp->v_rdev) < nblkdev && 2591 bdevsw(vp->v_rdev) != NULL) { 2592 /* 2593 * This simply allocates the biggest object possible 2594 * for a VBLK vnode. This should be fixed, but doesn't 2595 * cause any problems (yet). 2596 / 2597* object = vnode_pager_alloc(vp, IDX_TO_OFF(INT_MAX), 0, 0); 2598 } else { 2599 goto retn; 2600 } 2601 /* 2602 * Dereference the reference we just created. This assumes 2603 * that the object is associated with the vp. 2604 / 2605* object->ref_count--; 2606 vp->v_usecount--; 2607 } else { 2608 if (object->flags & OBJ_DEAD) { 2609 VOP_UNLOCK(vp, 0, p); 2610 tsleep(object, PVM, "vodead", 0); 2611 vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY, p); 2612 goto retry; 2613 } 2614 } 2615 2616 KASSERT(vp->v_object != NULL, ("vfs_object_create: NULL object")); 2617 vp->v_flag \|= VOBJBUF; 2618 2619retn: 2620 return error; 2621} 2622 2623static void 2624vfree(vp) 2625 struct vnode vp; 2626{ 2627* int s; 2628 2629 s = splbio(); 2630 simple_lock(&vnode_free_list_slock); 2631 if (vp->v_flag & VTBFREE) { 2632 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 2633 vp->v_flag &= ~VTBFREE; 2634 } 2635 if (vp->v_flag & VAGE) { 2636 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 2637 } else { 2638 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 2639 } 2640 freevnodes++; 2641 simple_unlock(&vnode_free_list_slock); 2642 vp->v_flag &= ~VAGE; 2643 vp->v_flag \|= VFREE; 2644 splx(s); 2645} 2646 2647void 2648vbusy(vp) 2649 struct vnode vp; 2650{ 2651* int s; 2652 2653 s = splbio(); 2654 simple_lock(&vnode_free_list_slock); 2655 if (vp->v_flag & VTBFREE) { 2656 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 2657 vp->v_flag &= ~VTBFREE; 2658 } else { 2659 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 2660 freevnodes--; 2661 } 2662 simple_unlock(&vnode_free_list_slock); 2663 vp->v_flag &= ~(VFREE\|VAGE); 2664 splx(s); 2665} 2666 2667/* 2668 * Record a process's interest in events which might happen to 2669 * a vnode. Because poll uses the historic select-style interface 2670 * internally, this routine serves as both the ``check for any 2671 * pending events'' and the ``record my interest in future events'' 2672 * functions. (These are done together, while the lock is held, 2673 * to avoid race conditions.) 2674 / 2675int 2676vn_pollrecord(vp, p, events) 2677* struct vnode vp; 2678* struct proc p; 2679* short events; 2680{ 2681 simple_lock(&vp->v_pollinfo.vpi_lock); 2682 if (vp->v_pollinfo.vpi_revents & events) { 2683 /* 2684 * This leaves events we are not interested 2685 * in available for the other process which 2686 * which presumably had requested them 2687 * (otherwise they would never have been 2688 * recorded). 2689 / 2690* events &= vp->v_pollinfo.vpi_revents; 2691 vp->v_pollinfo.vpi_revents &= ~events; 2692 2693 simple_unlock(&vp->v_pollinfo.vpi_lock); 2694 return events; 2695 } 2696 vp->v_pollinfo.vpi_events \|= events; 2697 selrecord(p, &vp->v_pollinfo.vpi_selinfo); 2698 simple_unlock(&vp->v_pollinfo.vpi_lock); 2699 return 0; 2700} 2701 2702/* 2703 * Note the occurrence of an event. If the VN_POLLEVENT macro is used, 2704 * it is possible for us to miss an event due to race conditions, but 2705 * that condition is expected to be rare, so for the moment it is the 2706 * preferred interface. 2707 / 2708void 2709vn_pollevent(vp, events) 2710* struct vnode vp; 2711* short events; 2712{ 2713 simple_lock(&vp->v_pollinfo.vpi_lock); 2714 if (vp->v_pollinfo.vpi_events & events) { 2715 /* 2716 * We clear vpi_events so that we don't 2717 * call selwakeup() twice if two events are 2718 * posted before the polling process(es) is 2719 * awakened. This also ensures that we take at 2720 * most one selwakeup() if the polling process 2721 * is no longer interested. However, it does 2722 * mean that only one event can be noticed at 2723 * a time. (Perhaps we should only clear those 2724 * event bits which we note?) XXX 2725 / 2726* vp->v_pollinfo.vpi_events = 0; /* &= ~events ??? / 2727* vp->v_pollinfo.vpi_revents \|= events; 2728 selwakeup(&vp->v_pollinfo.vpi_selinfo); 2729 } 2730 simple_unlock(&vp->v_pollinfo.vpi_lock); 2731} 2732 2733/* 2734 * Wake up anyone polling on vp because it is being revoked. 2735 * This depends on dead_poll() returning POLLHUP for correct 2736 * behavior. 2737 / 2738void 2739vn_pollgone(vp) 2740* struct vnode vp; 2741{ 2742* simple_lock(&vp->v_pollinfo.vpi_lock); 2743 if (vp->v_pollinfo.vpi_events) { 2744 vp->v_pollinfo.vpi_events = 0; 2745 selwakeup(&vp->v_pollinfo.vpi_selinfo); 2746 } 2747 simple_unlock(&vp->v_pollinfo.vpi_lock); 2748} 2749 2750 2751 2752/* 2753 * Routine to create and manage a filesystem syncer vnode. 2754 / 2755#define sync_close ((int () __P((struct vop_close_args )))nullop) 2756static int sync_fsync __P((struct vop_fsync_args )); 2757static int sync_inactive __P((struct vop_inactive_args )); 2758static int sync_reclaim __P((struct vop_reclaim_args )); 2759#define sync_lock ((int () __P((struct vop_lock_args )))vop_nolock) 2760#define sync_unlock ((int () __P((struct vop_unlock_args )))vop_nounlock) 2761static int sync_print __P((struct vop_print_args )); 2762#define sync_islocked ((int() __P((struct vop_islocked_args )))vop_noislocked) 2763* 2764static vop_t *sync_vnodeop_p; 2765static struct vnodeopv_entry_desc sync_vnodeop_entries[] = { 2766* { &vop_default_desc, (vop_t ) vop_eopnotsupp }, 2767* { &vop_close_desc, (vop_t ) sync_close }, / close / 2768* { &vop_fsync_desc, (vop_t ) sync_fsync }, / fsync / 2769* { &vop_inactive_desc, (vop_t ) sync_inactive }, / inactive / 2770* { &vop_reclaim_desc, (vop_t ) sync_reclaim }, / reclaim / 2771* { &vop_lock_desc, (vop_t ) sync_lock }, / lock / 2772* { &vop_unlock_desc, (vop_t ) sync_unlock }, / unlock / 2773* { &vop_print_desc, (vop_t ) sync_print }, / print / 2774* { &vop_islocked_desc, (vop_t ) sync_islocked }, / islocked / 2775* { NULL, NULL } 2776}; 2777static struct vnodeopv_desc sync_vnodeop_opv_desc = 2778 { &sync_vnodeop_p, sync_vnodeop_entries }; 2779 2780VNODEOP_SET(sync_vnodeop_opv_desc); 2781 2782/* 2783 * Create a new filesystem syncer vnode for the specified mount point. 2784 / 2785int 2786vfs_allocate_syncvnode(mp) 2787* struct mount mp; 2788{ 2789* struct vnode vp; 2790* static long start, incr, next; 2791 int error; 2792 2793 /* Allocate a new vnode / 2794* if ((error = getnewvnode(VT_VFS, mp, sync_vnodeop_p, &vp)) != 0) { 2795 mp->mnt_syncer = NULL; 2796 return (error); 2797 } 2798 vp->v_type = VNON; 2799 /* 2800 * Place the vnode onto the syncer worklist. We attempt to 2801 * scatter them about on the list so that they will go off 2802 * at evenly distributed times even if all the filesystems 2803 * are mounted at once. 2804 / 2805* next += incr; 2806 if (next == 0 \|\| next > syncer_maxdelay) { 2807 start /= 2; 2808 incr /= 2; 2809 if (start == 0) { 2810 start = syncer_maxdelay / 2; 2811 incr = syncer_maxdelay; 2812 } 2813 next = start; 2814 } 2815 vn_syncer_add_to_worklist(vp, syncdelay > 0 ? next % syncdelay : 0); 2816 mp->mnt_syncer = vp; 2817 return (0); 2818} 2819 2820/* 2821 * Do a lazy sync of the filesystem. 2822 / 2823static int 2824sync_fsync(ap) 2825* struct vop_fsync_args /* { 2826 struct vnode a_vp; 2827* struct ucred a_cred; 2828* int a_waitfor; 2829 struct proc a_p; 2830* } / ap; 2831{ 2832 struct vnode syncvp = ap->a_vp; 2833* struct mount mp = syncvp->v_mount; 2834* struct proc p = ap->a_p; 2835* int asyncflag; 2836 2837 /* 2838 * We only need to do something if this is a lazy evaluation. 2839 / 2840* if (ap->a_waitfor != MNT_LAZY) 2841 return (0); 2842 2843 /* 2844 * Move ourselves to the back of the sync list. 2845 / 2846* vn_syncer_add_to_worklist(syncvp, syncdelay); 2847 2848 /* 2849 * Walk the list of vnodes pushing all that are dirty and 2850 * not already on the sync list. 2851 / 2852* simple_lock(&mountlist_slock); 2853 if (vfs_busy(mp, LK_EXCLUSIVE \| LK_NOWAIT, &mountlist_slock, p) != 0) { 2854 simple_unlock(&mountlist_slock); 2855 return (0); 2856 } 2857 asyncflag = mp->mnt_flag & MNT_ASYNC; 2858 mp->mnt_flag &= ~MNT_ASYNC; 2859 vfs_msync(mp, MNT_NOWAIT); 2860 VFS_SYNC(mp, MNT_LAZY, ap->a_cred, p); 2861 if (asyncflag) 2862 mp->mnt_flag \|= MNT_ASYNC; 2863 vfs_unbusy(mp, p); 2864 return (0); 2865} 2866 2867/* 2868 * The syncer vnode is no referenced. 2869 / 2870static int 2871sync_inactive(ap) 2872* struct vop_inactive_args /* { 2873 struct vnode a_vp; 2874* struct proc a_p; 2875* } / ap; 2876{ 2877 2878 vgone(ap->a_vp); 2879 return (0); 2880} 2881 2882/* 2883 * The syncer vnode is no longer needed and is being decommissioned. 2884 * 2885 * Modifications to the worklist must be protected at splbio(). 2886 / 2887static int 2888sync_reclaim(ap) 2889* struct vop_reclaim_args /* { 2890 struct vnode a_vp; 2891* } / ap; 2892{ 2893 struct vnode vp = ap->a_vp; 2894* int s; 2895 2896 s = splbio(); 2897 vp->v_mount->mnt_syncer = NULL; 2898 if (vp->v_flag & VONWORKLST) { 2899 LIST_REMOVE(vp, v_synclist); 2900 vp->v_flag &= ~VONWORKLST; 2901 } 2902 splx(s); 2903 2904 return (0); 2905} 2906 2907/* 2908 * Print out a syncer vnode. 2909 / 2910static int 2911sync_print(ap) 2912* struct vop_print_args /* { 2913 struct vnode a_vp; 2914* } / ap; 2915{ 2916 struct vnode vp = ap->a_vp; 2917* 2918 printf("syncer vnode"); 2919 if (vp->v_vnlock != NULL) 2920 lockmgr_printinfo(vp->v_vnlock); 2921 printf("\n"); 2922 return (0); 2923}