vfs_subr.c revision 32585
1/* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95 39 * $Id: vfs_subr.c,v 1.123 1998/01/12 03:15:01 dyson Exp $ 40 */ 41 42/* 43 * External virtual filesystem routines 44 */ 45#include "opt_ddb.h" 46#include "opt_devfs.h" 47 48#include <sys/param.h> 49#include <sys/systm.h> 50#include <sys/kernel.h> 51#include <sys/proc.h> 52#include <sys/malloc.h> 53#include <sys/mount.h> 54#include <sys/vnode.h> 55#include <sys/stat.h> 56#include <sys/buf.h> 57#include <sys/poll.h> 58#include <sys/domain.h> 59#include <sys/dirent.h> 60#include <sys/vmmeter.h> 61 62#include <machine/limits.h> 63 64#include <vm/vm.h> 65#include <vm/vm_object.h> 66#include <vm/vm_extern.h> 67#include <vm/pmap.h> 68#include <vm/vm_map.h> 69#include <vm/vm_pager.h> 70#include <vm/vnode_pager.h> 71#include <sys/sysctl.h> 72 73#include <miscfs/specfs/specdev.h> 74 75static MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure"); 76 77static void insmntque __P((struct vnode *vp, struct mount *mp)); 78#ifdef DDB 79static void printlockedvnodes __P((void)); 80#endif 81static void vclean __P((struct vnode *vp, int flags, struct proc *p)); 82static void vfree __P((struct vnode *)); 83static void vgonel __P((struct vnode *vp, struct proc *p)); 84static unsigned long numvnodes; 85SYSCTL_INT(_debug, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, ""); 86 87enum vtype iftovt_tab[16] = { 88 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 89 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 90}; 91int vttoif_tab[9] = { 92 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 93 S_IFSOCK, S_IFIFO, S_IFMT, 94}; 95 96/* 97 * Insq/Remq for the vnode usage lists. 98 */ 99#define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) 100#define bufremvn(bp) { \ 101 LIST_REMOVE(bp, b_vnbufs); \ 102 (bp)->b_vnbufs.le_next = NOLIST; \ 103} 104 105TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */ 106struct tobefreelist vnode_tobefree_list; /* vnode free list */ 107 108static u_long wantfreevnodes = 25; 109SYSCTL_INT(_debug, OID_AUTO, wantfreevnodes, CTLFLAG_RW, &wantfreevnodes, 0, ""); 110static u_long freevnodes = 0; 111SYSCTL_INT(_debug, OID_AUTO, freevnodes, CTLFLAG_RD, &freevnodes, 0, ""); 112 113int vfs_ioopt = 0; 114SYSCTL_INT(_vfs, OID_AUTO, ioopt, CTLFLAG_RW, &vfs_ioopt, 0, ""); 115 116struct mntlist mountlist; /* mounted filesystem list */ 117struct simplelock mountlist_slock; 118static struct simplelock mntid_slock; 119struct simplelock mntvnode_slock; 120struct simplelock vnode_free_list_slock; 121static struct simplelock spechash_slock; 122struct nfs_public nfs_pub; /* publicly exported FS */ 123 124int desiredvnodes; 125SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, &desiredvnodes, 0, ""); 126 127static void vfs_free_addrlist __P((struct netexport *nep)); 128static int vfs_free_netcred __P((struct radix_node *rn, void *w)); 129static int vfs_hang_addrlist __P((struct mount *mp, struct netexport *nep, 130 struct export_args *argp)); 131 132/* 133 * Initialize the vnode management data structures. 134 */ 135void 136vntblinit() 137{ 138 139 desiredvnodes = maxproc + cnt.v_page_count / 4; 140 simple_lock_init(&mntvnode_slock); 141 simple_lock_init(&mntid_slock); 142 simple_lock_init(&spechash_slock); 143 TAILQ_INIT(&vnode_free_list); 144 TAILQ_INIT(&vnode_tobefree_list); 145 simple_lock_init(&vnode_free_list_slock); 146 CIRCLEQ_INIT(&mountlist); 147} 148 149/* 150 * Mark a mount point as busy. Used to synchronize access and to delay 151 * unmounting. Interlock is not released on failure. 152 */ 153int 154vfs_busy(mp, flags, interlkp, p) 155 struct mount *mp; 156 int flags; 157 struct simplelock *interlkp; 158 struct proc *p; 159{ 160 int lkflags; 161 162 if (mp->mnt_kern_flag & MNTK_UNMOUNT) { 163 if (flags & LK_NOWAIT) 164 return (ENOENT); 165 mp->mnt_kern_flag |= MNTK_MWAIT; 166 if (interlkp) { 167 simple_unlock(interlkp); 168 } 169 /* 170 * Since all busy locks are shared except the exclusive 171 * lock granted when unmounting, the only place that a 172 * wakeup needs to be done is at the release of the 173 * exclusive lock at the end of dounmount. 174 */ 175 tsleep((caddr_t)mp, PVFS, "vfs_busy", 0); 176 if (interlkp) { 177 simple_lock(interlkp); 178 } 179 return (ENOENT); 180 } 181 lkflags = LK_SHARED; 182 if (interlkp) 183 lkflags |= LK_INTERLOCK; 184 if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p)) 185 panic("vfs_busy: unexpected lock failure"); 186 return (0); 187} 188 189/* 190 * Free a busy filesystem. 191 */ 192void 193vfs_unbusy(mp, p) 194 struct mount *mp; 195 struct proc *p; 196{ 197 198 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p); 199} 200 201/* 202 * Lookup a filesystem type, and if found allocate and initialize 203 * a mount structure for it. 204 * 205 * Devname is usually updated by mount(8) after booting. 206 */ 207int 208vfs_rootmountalloc(fstypename, devname, mpp) 209 char *fstypename; 210 char *devname; 211 struct mount **mpp; 212{ 213 struct proc *p = curproc; /* XXX */ 214 struct vfsconf *vfsp; 215 struct mount *mp; 216 217 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 218 if (!strcmp(vfsp->vfc_name, fstypename)) 219 break; 220 if (vfsp == NULL) 221 return (ENODEV); 222 mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); 223 bzero((char *)mp, (u_long)sizeof(struct mount)); 224 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0); 225 (void)vfs_busy(mp, LK_NOWAIT, 0, p); 226 LIST_INIT(&mp->mnt_vnodelist); 227 mp->mnt_vfc = vfsp; 228 mp->mnt_op = vfsp->vfc_vfsops; 229 mp->mnt_flag = MNT_RDONLY; 230 mp->mnt_vnodecovered = NULLVP; 231 vfsp->vfc_refcount++; 232 mp->mnt_stat.f_type = vfsp->vfc_typenum; 233 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 234 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 235 mp->mnt_stat.f_mntonname[0] = '/'; 236 mp->mnt_stat.f_mntonname[1] = 0; 237 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); 238 *mpp = mp; 239 return (0); 240} 241 242/* 243 * Find an appropriate filesystem to use for the root. If a filesystem 244 * has not been preselected, walk through the list of known filesystems 245 * trying those that have mountroot routines, and try them until one 246 * works or we have tried them all. 247 */ 248#ifdef notdef /* XXX JH */ 249int 250lite2_vfs_mountroot() 251{ 252 struct vfsconf *vfsp; 253 extern int (*lite2_mountroot) __P((void)); 254 int error; 255 256 if (lite2_mountroot != NULL) 257 return ((*lite2_mountroot)()); 258 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 259 if (vfsp->vfc_mountroot == NULL) 260 continue; 261 if ((error = (*vfsp->vfc_mountroot)()) == 0) 262 return (0); 263 printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error); 264 } 265 return (ENODEV); 266} 267#endif 268 269/* 270 * Lookup a mount point by filesystem identifier. 271 */ 272struct mount * 273vfs_getvfs(fsid) 274 fsid_t *fsid; 275{ 276 register struct mount *mp; 277 278 simple_lock(&mountlist_slock); 279 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; 280 mp = mp->mnt_list.cqe_next) { 281 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 282 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { 283 simple_unlock(&mountlist_slock); 284 return (mp); 285 } 286 } 287 simple_unlock(&mountlist_slock); 288 return ((struct mount *) 0); 289} 290 291/* 292 * Get a new unique fsid 293 */ 294void 295vfs_getnewfsid(mp) 296 struct mount *mp; 297{ 298 static u_short xxxfs_mntid; 299 300 fsid_t tfsid; 301 int mtype; 302 303 simple_lock(&mntid_slock); 304 mtype = mp->mnt_vfc->vfc_typenum; 305 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0); 306 mp->mnt_stat.f_fsid.val[1] = mtype; 307 if (xxxfs_mntid == 0) 308 ++xxxfs_mntid; 309 tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid); 310 tfsid.val[1] = mtype; 311 if (mountlist.cqh_first != (void *)&mountlist) { 312 while (vfs_getvfs(&tfsid)) { 313 tfsid.val[0]++; 314 xxxfs_mntid++; 315 } 316 } 317 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 318 simple_unlock(&mntid_slock); 319} 320 321/* 322 * Set vnode attributes to VNOVAL 323 */ 324void 325vattr_null(vap) 326 register struct vattr *vap; 327{ 328 329 vap->va_type = VNON; 330 vap->va_size = VNOVAL; 331 vap->va_bytes = VNOVAL; 332 vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid = 333 vap->va_fsid = vap->va_fileid = 334 vap->va_blocksize = vap->va_rdev = 335 vap->va_atime.tv_sec = vap->va_atime.tv_nsec = 336 vap->va_mtime.tv_sec = vap->va_mtime.tv_nsec = 337 vap->va_ctime.tv_sec = vap->va_ctime.tv_nsec = 338 vap->va_flags = vap->va_gen = VNOVAL; 339 vap->va_vaflags = 0; 340} 341 342/* 343 * Routines having to do with the management of the vnode table. 344 */ 345extern vop_t **dead_vnodeop_p; 346 347/* 348 * Return the next vnode from the free list. 349 */ 350int 351getnewvnode(tag, mp, vops, vpp) 352 enum vtagtype tag; 353 struct mount *mp; 354 vop_t **vops; 355 struct vnode **vpp; 356{ 357 int s; 358 struct proc *p = curproc; /* XXX */ 359 struct vnode *vp, *tvp, *nvp; 360 vm_object_t object; 361 TAILQ_HEAD(freelst, vnode) vnode_tmp_list; 362 363 /* 364 * We take the least recently used vnode from the freelist 365 * if we can get it and it has no cached pages, and no 366 * namecache entries are relative to it. 367 * Otherwise we allocate a new vnode 368 */ 369 370 s = splbio(); 371 simple_lock(&vnode_free_list_slock); 372 TAILQ_INIT(&vnode_tmp_list); 373 374 for (vp = TAILQ_FIRST(&vnode_tobefree_list); vp; vp = nvp) { 375 nvp = TAILQ_NEXT(vp, v_freelist); 376 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 377 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 378 vp->v_flag &= ~VTBFREE; 379 vp->v_flag |= VFREE; 380 if (vp->v_usecount) 381 panic("tobe free vnode isn't"); 382 freevnodes++; 383 } 384 385 if (wantfreevnodes && freevnodes < wantfreevnodes) { 386 vp = NULL; 387 } else if (!wantfreevnodes && freevnodes <= desiredvnodes) { 388 /* 389 * XXX: this is only here to be backwards compatible 390 */ 391 vp = NULL; 392 } else { 393 for (vp = TAILQ_FIRST(&vnode_free_list); vp; vp = nvp) { 394 395 nvp = TAILQ_NEXT(vp, v_freelist); 396 397 if (!simple_lock_try(&vp->v_interlock)) 398 continue; 399 if (vp->v_usecount) 400 panic("free vnode isn't"); 401 402 object = vp->v_object; 403 if (object && (object->resident_page_count || object->ref_count)) { 404 /* Don't recycle if it's caching some pages */ 405 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 406 TAILQ_INSERT_TAIL(&vnode_tmp_list, vp, v_freelist); 407 continue; 408 } else if (LIST_FIRST(&vp->v_cache_src)) { 409 /* Don't recycle if active in the namecache */ 410 simple_unlock(&vp->v_interlock); 411 continue; 412 } else { 413 break; 414 } 415 } 416 } 417 418 for (tvp = TAILQ_FIRST(&vnode_tmp_list); tvp; tvp = nvp) { 419 nvp = TAILQ_NEXT(tvp, v_freelist); 420 TAILQ_REMOVE(&vnode_tmp_list, tvp, v_freelist); 421 TAILQ_INSERT_TAIL(&vnode_free_list, tvp, v_freelist); 422 simple_unlock(&tvp->v_interlock); 423 } 424 425 if (vp) { 426 vp->v_flag |= VDOOMED; 427 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 428 freevnodes--; 429 simple_unlock(&vnode_free_list_slock); 430 cache_purge(vp); 431 vp->v_lease = NULL; 432 if (vp->v_type != VBAD) { 433 vgonel(vp, p); 434 } else { 435 simple_unlock(&vp->v_interlock); 436 } 437 438#ifdef DIAGNOSTIC 439 { 440 int s; 441 442 if (vp->v_data) 443 panic("cleaned vnode isn't"); 444 s = splbio(); 445 if (vp->v_numoutput) 446 panic("Clean vnode has pending I/O's"); 447 splx(s); 448 } 449#endif 450 vp->v_flag = 0; 451 vp->v_lastr = 0; 452 vp->v_lastw = 0; 453 vp->v_lasta = 0; 454 vp->v_cstart = 0; 455 vp->v_clen = 0; 456 vp->v_socket = 0; 457 vp->v_writecount = 0; /* XXX */ 458 } else { 459 simple_unlock(&vnode_free_list_slock); 460 vp = (struct vnode *) malloc((u_long) sizeof *vp, 461 M_VNODE, M_WAITOK); 462 bzero((char *) vp, sizeof *vp); 463 simple_lock_init(&vp->v_interlock); 464 vp->v_dd = vp; 465 cache_purge(vp); 466 LIST_INIT(&vp->v_cache_src); 467 TAILQ_INIT(&vp->v_cache_dst); 468 numvnodes++; 469 } 470 471 vp->v_type = VNON; 472 vp->v_tag = tag; 473 vp->v_op = vops; 474 insmntque(vp, mp); 475 *vpp = vp; 476 vp->v_usecount = 1; 477 vp->v_data = 0; 478 splx(s); 479 return (0); 480} 481 482/* 483 * Move a vnode from one mount queue to another. 484 */ 485static void 486insmntque(vp, mp) 487 register struct vnode *vp; 488 register struct mount *mp; 489{ 490 491 simple_lock(&mntvnode_slock); 492 /* 493 * Delete from old mount point vnode list, if on one. 494 */ 495 if (vp->v_mount != NULL) 496 LIST_REMOVE(vp, v_mntvnodes); 497 /* 498 * Insert into list of vnodes for the new mount point, if available. 499 */ 500 if ((vp->v_mount = mp) == NULL) { 501 simple_unlock(&mntvnode_slock); 502 return; 503 } 504 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 505 simple_unlock(&mntvnode_slock); 506} 507 508/* 509 * Update outstanding I/O count and do wakeup if requested. 510 */ 511void 512vwakeup(bp) 513 register struct buf *bp; 514{ 515 register struct vnode *vp; 516 517 bp->b_flags &= ~B_WRITEINPROG; 518 if ((vp = bp->b_vp)) { 519 vp->v_numoutput--; 520 if (vp->v_numoutput < 0) 521 panic("vwakeup: neg numoutput"); 522 if ((vp->v_numoutput == 0) && (vp->v_flag & VBWAIT)) { 523 vp->v_flag &= ~VBWAIT; 524 wakeup((caddr_t) &vp->v_numoutput); 525 } 526 } 527} 528 529/* 530 * Flush out and invalidate all buffers associated with a vnode. 531 * Called with the underlying object locked. 532 */ 533int 534vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 535 register struct vnode *vp; 536 int flags; 537 struct ucred *cred; 538 struct proc *p; 539 int slpflag, slptimeo; 540{ 541 register struct buf *bp; 542 struct buf *nbp, *blist; 543 int s, error; 544 vm_object_t object; 545 546 if (flags & V_SAVE) { 547 if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p))) 548 return (error); 549 if (vp->v_dirtyblkhd.lh_first != NULL) 550 panic("vinvalbuf: dirty bufs"); 551 } 552 553 s = splbio(); 554 for (;;) { 555 if ((blist = vp->v_cleanblkhd.lh_first) && (flags & V_SAVEMETA)) 556 while (blist && blist->b_lblkno < 0) 557 blist = blist->b_vnbufs.le_next; 558 if (!blist && (blist = vp->v_dirtyblkhd.lh_first) && 559 (flags & V_SAVEMETA)) 560 while (blist && blist->b_lblkno < 0) 561 blist = blist->b_vnbufs.le_next; 562 if (!blist) 563 break; 564 565 for (bp = blist; bp; bp = nbp) { 566 nbp = bp->b_vnbufs.le_next; 567 if ((flags & V_SAVEMETA) && bp->b_lblkno < 0) 568 continue; 569 if (bp->b_flags & B_BUSY) { 570 bp->b_flags |= B_WANTED; 571 error = tsleep((caddr_t) bp, 572 slpflag | (PRIBIO + 1), "vinvalbuf", 573 slptimeo); 574 if (error) { 575 splx(s); 576 return (error); 577 } 578 break; 579 } 580 bremfree(bp); 581 bp->b_flags |= B_BUSY; 582 /* 583 * XXX Since there are no node locks for NFS, I 584 * believe there is a slight chance that a delayed 585 * write will occur while sleeping just above, so 586 * check for it. 587 */ 588 if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) { 589 if (bp->b_vp == vp) { 590 if (bp->b_flags & B_CLUSTEROK) { 591 vfs_bio_awrite(bp); 592 } else { 593 bp->b_flags |= B_ASYNC; 594 VOP_BWRITE(bp); 595 } 596 } else { 597 (void) VOP_BWRITE(bp); 598 } 599 break; 600 } 601 bp->b_flags |= (B_INVAL|B_NOCACHE|B_RELBUF); 602 brelse(bp); 603 } 604 } 605 606 while (vp->v_numoutput > 0) { 607 vp->v_flag |= VBWAIT; 608 tsleep(&vp->v_numoutput, PVM, "vnvlbv", 0); 609 } 610 611 splx(s); 612 613 /* 614 * Destroy the copy in the VM cache, too. 615 */ 616 simple_lock(&vp->v_interlock); 617 object = vp->v_object; 618 if (object != NULL) { 619 if (flags & V_SAVEMETA) 620 vm_object_page_remove(object, 0, object->size, 621 (flags & V_SAVE) ? TRUE : FALSE); 622 else 623 vm_object_page_remove(object, 0, 0, 624 (flags & V_SAVE) ? TRUE : FALSE); 625 } 626 simple_unlock(&vp->v_interlock); 627 628 if (!(flags & V_SAVEMETA) && 629 (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first)) 630 panic("vinvalbuf: flush failed"); 631 return (0); 632} 633 634/* 635 * Associate a buffer with a vnode. 636 */ 637void 638bgetvp(vp, bp) 639 register struct vnode *vp; 640 register struct buf *bp; 641{ 642 int s; 643 644#if defined(DIAGNOSTIC) 645 if (bp->b_vp) 646 panic("bgetvp: not free"); 647#endif 648 vhold(vp); 649 bp->b_vp = vp; 650 if (vp->v_type == VBLK || vp->v_type == VCHR) 651 bp->b_dev = vp->v_rdev; 652 else 653 bp->b_dev = NODEV; 654 /* 655 * Insert onto list for new vnode. 656 */ 657 s = splbio(); 658 bufinsvn(bp, &vp->v_cleanblkhd); 659 splx(s); 660} 661 662/* 663 * Disassociate a buffer from a vnode. 664 */ 665void 666brelvp(bp) 667 register struct buf *bp; 668{ 669 struct vnode *vp; 670 int s; 671 672#if defined(DIAGNOSTIC) 673 if (bp->b_vp == (struct vnode *) 0) 674 panic("brelvp: NULL"); 675#endif 676 677 /* 678 * Delete from old vnode list, if on one. 679 */ 680 s = splbio(); 681 if (bp->b_vnbufs.le_next != NOLIST) 682 bufremvn(bp); 683 splx(s); 684 685 vp = bp->b_vp; 686 bp->b_vp = (struct vnode *) 0; 687 vdrop(vp); 688} 689 690/* 691 * Associate a p-buffer with a vnode. 692 */ 693void 694pbgetvp(vp, bp) 695 register struct vnode *vp; 696 register struct buf *bp; 697{ 698#if defined(DIAGNOSTIC) 699 if (bp->b_vp) 700 panic("pbgetvp: not free"); 701#endif 702 bp->b_vp = vp; 703 if (vp->v_type == VBLK || vp->v_type == VCHR) 704 bp->b_dev = vp->v_rdev; 705 else 706 bp->b_dev = NODEV; 707} 708 709/* 710 * Disassociate a p-buffer from a vnode. 711 */ 712void 713pbrelvp(bp) 714 register struct buf *bp; 715{ 716 717#if defined(DIAGNOSTIC) 718 if (bp->b_vp == (struct vnode *) 0) 719 panic("pbrelvp: NULL"); 720#endif 721 722 bp->b_vp = (struct vnode *) 0; 723} 724 725/* 726 * Reassign a buffer from one vnode to another. 727 * Used to assign file specific control information 728 * (indirect blocks) to the vnode to which they belong. 729 */ 730void 731reassignbuf(bp, newvp) 732 register struct buf *bp; 733 register struct vnode *newvp; 734{ 735 int s; 736 737 if (newvp == NULL) { 738 printf("reassignbuf: NULL"); 739 return; 740 } 741 742 s = splbio(); 743 /* 744 * Delete from old vnode list, if on one. 745 */ 746 if (bp->b_vnbufs.le_next != NOLIST) { 747 bufremvn(bp); 748 vdrop(bp->b_vp); 749 } 750 /* 751 * If dirty, put on list of dirty buffers; otherwise insert onto list 752 * of clean buffers. 753 */ 754 if (bp->b_flags & B_DELWRI) { 755 struct buf *tbp; 756 757 tbp = newvp->v_dirtyblkhd.lh_first; 758 if (!tbp || (tbp->b_lblkno > bp->b_lblkno)) { 759 bufinsvn(bp, &newvp->v_dirtyblkhd); 760 } else { 761 while (tbp->b_vnbufs.le_next && 762 (tbp->b_vnbufs.le_next->b_lblkno < bp->b_lblkno)) { 763 tbp = tbp->b_vnbufs.le_next; 764 } 765 LIST_INSERT_AFTER(tbp, bp, b_vnbufs); 766 } 767 } else { 768 bufinsvn(bp, &newvp->v_cleanblkhd); 769 } 770 bp->b_vp = newvp; 771 vhold(bp->b_vp); 772 splx(s); 773} 774 775#ifndef DEVFS_ROOT 776/* 777 * Create a vnode for a block device. 778 * Used for mounting the root file system. 779 */ 780int 781bdevvp(dev, vpp) 782 dev_t dev; 783 struct vnode **vpp; 784{ 785 register struct vnode *vp; 786 struct vnode *nvp; 787 int error; 788 789 if (dev == NODEV) 790 return (0); 791 error = getnewvnode(VT_NON, (struct mount *) 0, spec_vnodeop_p, &nvp); 792 if (error) { 793 *vpp = 0; 794 return (error); 795 } 796 vp = nvp; 797 vp->v_type = VBLK; 798 if ((nvp = checkalias(vp, dev, (struct mount *) 0))) { 799 vput(vp); 800 vp = nvp; 801 } 802 *vpp = vp; 803 return (0); 804} 805#endif /* !DEVFS_ROOT */ 806 807/* 808 * Check to see if the new vnode represents a special device 809 * for which we already have a vnode (either because of 810 * bdevvp() or because of a different vnode representing 811 * the same block device). If such an alias exists, deallocate 812 * the existing contents and return the aliased vnode. The 813 * caller is responsible for filling it with its new contents. 814 */ 815struct vnode * 816checkalias(nvp, nvp_rdev, mp) 817 register struct vnode *nvp; 818 dev_t nvp_rdev; 819 struct mount *mp; 820{ 821 struct proc *p = curproc; /* XXX */ 822 struct vnode *vp; 823 struct vnode **vpp; 824 825 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 826 return (NULLVP); 827 828 vpp = &speclisth[SPECHASH(nvp_rdev)]; 829loop: 830 simple_lock(&spechash_slock); 831 for (vp = *vpp; vp; vp = vp->v_specnext) { 832 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) 833 continue; 834 /* 835 * Alias, but not in use, so flush it out. 836 */ 837 simple_lock(&vp->v_interlock); 838 if (vp->v_usecount == 0) { 839 simple_unlock(&spechash_slock); 840 vgonel(vp, p); 841 goto loop; 842 } 843 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) { 844 simple_unlock(&spechash_slock); 845 goto loop; 846 } 847 break; 848 } 849 if (vp == NULL || vp->v_tag != VT_NON) { 850 MALLOC(nvp->v_specinfo, struct specinfo *, 851 sizeof(struct specinfo), M_VNODE, M_WAITOK); 852 nvp->v_rdev = nvp_rdev; 853 nvp->v_hashchain = vpp; 854 nvp->v_specnext = *vpp; 855 nvp->v_specflags = 0; 856 simple_unlock(&spechash_slock); 857 *vpp = nvp; 858 if (vp != NULLVP) { 859 nvp->v_flag |= VALIASED; 860 vp->v_flag |= VALIASED; 861 vput(vp); 862 } 863 return (NULLVP); 864 } 865 simple_unlock(&spechash_slock); 866 VOP_UNLOCK(vp, 0, p); 867 simple_lock(&vp->v_interlock); 868 vclean(vp, 0, p); 869 vp->v_op = nvp->v_op; 870 vp->v_tag = nvp->v_tag; 871 nvp->v_type = VNON; 872 insmntque(vp, mp); 873 return (vp); 874} 875 876/* 877 * Grab a particular vnode from the free list, increment its 878 * reference count and lock it. The vnode lock bit is set the 879 * vnode is being eliminated in vgone. The process is awakened 880 * when the transition is completed, and an error returned to 881 * indicate that the vnode is no longer usable (possibly having 882 * been changed to a new file system type). 883 */ 884int 885vget(vp, flags, p) 886 register struct vnode *vp; 887 int flags; 888 struct proc *p; 889{ 890 int error; 891 892 /* 893 * If the vnode is in the process of being cleaned out for 894 * another use, we wait for the cleaning to finish and then 895 * return failure. Cleaning is determined by checking that 896 * the VXLOCK flag is set. 897 */ 898 if ((flags & LK_INTERLOCK) == 0) { 899 simple_lock(&vp->v_interlock); 900 } 901 if (vp->v_flag & VXLOCK) { 902 vp->v_flag |= VXWANT; 903 simple_unlock(&vp->v_interlock); 904 tsleep((caddr_t)vp, PINOD, "vget", 0); 905 return (ENOENT); 906 } 907 908 vp->v_usecount++; 909 910 if (VSHOULDBUSY(vp)) 911 vbusy(vp); 912 /* 913 * Create the VM object, if needed 914 */ 915 if ((flags & LK_NOOBJ) == 0 && 916 (vp->v_type == VREG) && 917 ((vp->v_object == NULL) || 918 (vp->v_object->flags & OBJ_DEAD))) { 919 vfs_object_create(vp, curproc, curproc->p_ucred, 0); 920 } 921 if (flags & LK_TYPE_MASK) { 922 if (error = vn_lock(vp, flags | LK_INTERLOCK, p)) 923 vrele(vp); 924 return (error); 925 } 926 simple_unlock(&vp->v_interlock); 927 return (0); 928} 929 930void 931vref(struct vnode *vp) 932{ 933 simple_lock(&vp->v_interlock); 934 vp->v_usecount++; 935 simple_unlock(&vp->v_interlock); 936} 937 938/* 939 * Vnode put/release. 940 * If count drops to zero, call inactive routine and return to freelist. 941 */ 942void 943vrele(vp) 944 struct vnode *vp; 945{ 946 struct proc *p = curproc; /* XXX */ 947 948#ifdef DIAGNOSTIC 949 if (vp == NULL) 950 panic("vrele: null vp"); 951#endif 952 simple_lock(&vp->v_interlock); 953 954 if (vp->v_usecount > 1) { 955 956 vp->v_usecount--; 957 simple_unlock(&vp->v_interlock); 958 959 return; 960 } 961 962 if (vp->v_usecount == 1) { 963 964 vp->v_usecount--; 965 966 if (VSHOULDFREE(vp)) 967 vfree(vp); 968 /* 969 * If we are doing a vput, the node is already locked, and we must 970 * call VOP_INACTIVE with the node locked. So, in the case of 971 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE. 972 */ 973 if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) { 974 VOP_INACTIVE(vp, p); 975 } 976 977 } else { 978#ifdef DIAGNOSTIC 979 vprint("vrele: negative ref count", vp); 980 simple_unlock(&vp->v_interlock); 981#endif 982 panic("vrele: negative ref cnt"); 983 } 984} 985 986void 987vput(vp) 988 struct vnode *vp; 989{ 990 struct proc *p = curproc; /* XXX */ 991 992#ifdef DIAGNOSTIC 993 if (vp == NULL) 994 panic("vput: null vp"); 995#endif 996 997 simple_lock(&vp->v_interlock); 998 999 if (vp->v_usecount > 1) { 1000 1001 vp->v_usecount--; 1002 VOP_UNLOCK(vp, LK_INTERLOCK, p); 1003 return; 1004 1005 } 1006 1007 if (vp->v_usecount == 1) { 1008 1009 vp->v_usecount--; 1010 if (VSHOULDFREE(vp)) 1011 vfree(vp); 1012 /* 1013 * If we are doing a vput, the node is already locked, and we must 1014 * call VOP_INACTIVE with the node locked. So, in the case of 1015 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE. 1016 */ 1017 simple_unlock(&vp->v_interlock); 1018 VOP_INACTIVE(vp, p); 1019 1020 } else { 1021#ifdef DIAGNOSTIC 1022 vprint("vput: negative ref count", vp); 1023#endif 1024 panic("vput: negative ref cnt"); 1025 } 1026} 1027 1028/* 1029 * Somebody doesn't want the vnode recycled. 1030 */ 1031void 1032vhold(vp) 1033 register struct vnode *vp; 1034{ 1035 1036 simple_lock(&vp->v_interlock); 1037 vp->v_holdcnt++; 1038 if (VSHOULDBUSY(vp)) 1039 vbusy(vp); 1040 simple_unlock(&vp->v_interlock); 1041} 1042 1043/* 1044 * One less who cares about this vnode. 1045 */ 1046void 1047vdrop(vp) 1048 register struct vnode *vp; 1049{ 1050 1051 simple_lock(&vp->v_interlock); 1052 if (vp->v_holdcnt <= 0) 1053 panic("holdrele: holdcnt"); 1054 vp->v_holdcnt--; 1055 if (VSHOULDFREE(vp)) 1056 vfree(vp); 1057 simple_unlock(&vp->v_interlock); 1058} 1059 1060/* 1061 * Remove any vnodes in the vnode table belonging to mount point mp. 1062 * 1063 * If MNT_NOFORCE is specified, there should not be any active ones, 1064 * return error if any are found (nb: this is a user error, not a 1065 * system error). If MNT_FORCE is specified, detach any active vnodes 1066 * that are found. 1067 */ 1068#ifdef DIAGNOSTIC 1069static int busyprt = 0; /* print out busy vnodes */ 1070SYSCTL_INT(_debug, OID_AUTO, busyprt, CTLFLAG_RW, &busyprt, 0, ""); 1071#endif 1072 1073int 1074vflush(mp, skipvp, flags) 1075 struct mount *mp; 1076 struct vnode *skipvp; 1077 int flags; 1078{ 1079 struct proc *p = curproc; /* XXX */ 1080 struct vnode *vp, *nvp; 1081 int busy = 0; 1082 1083 simple_lock(&mntvnode_slock); 1084loop: 1085 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { 1086 /* 1087 * Make sure this vnode wasn't reclaimed in getnewvnode(). 1088 * Start over if it has (it won't be on the list anymore). 1089 */ 1090 if (vp->v_mount != mp) 1091 goto loop; 1092 nvp = vp->v_mntvnodes.le_next; 1093 /* 1094 * Skip over a selected vnode. 1095 */ 1096 if (vp == skipvp) 1097 continue; 1098 1099 simple_lock(&vp->v_interlock); 1100 /* 1101 * Skip over a vnodes marked VSYSTEM. 1102 */ 1103 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { 1104 simple_unlock(&vp->v_interlock); 1105 continue; 1106 } 1107 /* 1108 * If WRITECLOSE is set, only flush out regular file vnodes 1109 * open for writing. 1110 */ 1111 if ((flags & WRITECLOSE) && 1112 (vp->v_writecount == 0 || vp->v_type != VREG)) { 1113 simple_unlock(&vp->v_interlock); 1114 continue; 1115 } 1116 1117 /* 1118 * With v_usecount == 0, all we need to do is clear out the 1119 * vnode data structures and we are done. 1120 */ 1121 if (vp->v_usecount == 0) { 1122 simple_unlock(&mntvnode_slock); 1123 vgonel(vp, p); 1124 simple_lock(&mntvnode_slock); 1125 continue; 1126 } 1127 1128 /* 1129 * If FORCECLOSE is set, forcibly close the vnode. For block 1130 * or character devices, revert to an anonymous device. For 1131 * all other files, just kill them. 1132 */ 1133 if (flags & FORCECLOSE) { 1134 simple_unlock(&mntvnode_slock); 1135 if (vp->v_type != VBLK && vp->v_type != VCHR) { 1136 vgonel(vp, p); 1137 } else { 1138 vclean(vp, 0, p); 1139 vp->v_op = spec_vnodeop_p; 1140 insmntque(vp, (struct mount *) 0); 1141 } 1142 simple_lock(&mntvnode_slock); 1143 continue; 1144 } 1145#ifdef DIAGNOSTIC 1146 if (busyprt) 1147 vprint("vflush: busy vnode", vp); 1148#endif 1149 simple_unlock(&vp->v_interlock); 1150 busy++; 1151 } 1152 simple_unlock(&mntvnode_slock); 1153 if (busy) 1154 return (EBUSY); 1155 return (0); 1156} 1157 1158/* 1159 * Disassociate the underlying file system from a vnode. 1160 */ 1161static void 1162vclean(vp, flags, p) 1163 struct vnode *vp; 1164 int flags; 1165 struct proc *p; 1166{ 1167 int active; 1168 1169 /* 1170 * Check to see if the vnode is in use. If so we have to reference it 1171 * before we clean it out so that its count cannot fall to zero and 1172 * generate a race against ourselves to recycle it. 1173 */ 1174 if ((active = vp->v_usecount)) 1175 vp->v_usecount++; 1176 1177 /* 1178 * Prevent the vnode from being recycled or brought into use while we 1179 * clean it out. 1180 */ 1181 if (vp->v_flag & VXLOCK) 1182 panic("vclean: deadlock"); 1183 vp->v_flag |= VXLOCK; 1184 /* 1185 * Even if the count is zero, the VOP_INACTIVE routine may still 1186 * have the object locked while it cleans it out. The VOP_LOCK 1187 * ensures that the VOP_INACTIVE routine is done with its work. 1188 * For active vnodes, it ensures that no other activity can 1189 * occur while the underlying object is being cleaned out. 1190 */ 1191 VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p); 1192 1193 /* 1194 * Clean out any buffers associated with the vnode. 1195 */ 1196 vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0); 1197 if (vp->v_object) { 1198 if (vp->v_object->ref_count == 0) { 1199 /* 1200 * This is a normal way of shutting down the object/vnode 1201 * association. 1202 */ 1203 vm_object_terminate(vp->v_object); 1204 } else { 1205 /* 1206 * Woe to the process that tries to page now :-). 1207 */ 1208 vm_pager_deallocate(vp->v_object); 1209 } 1210 } 1211 1212 /* 1213 * If purging an active vnode, it must be closed and 1214 * deactivated before being reclaimed. Note that the 1215 * VOP_INACTIVE will unlock the vnode. 1216 */ 1217 if (active) { 1218 if (flags & DOCLOSE) 1219 VOP_CLOSE(vp, IO_NDELAY, NOCRED, p); 1220 VOP_INACTIVE(vp, p); 1221 } else { 1222 /* 1223 * Any other processes trying to obtain this lock must first 1224 * wait for VXLOCK to clear, then call the new lock operation. 1225 */ 1226 VOP_UNLOCK(vp, 0, p); 1227 } 1228 /* 1229 * Reclaim the vnode. 1230 */ 1231 if (VOP_RECLAIM(vp, p)) 1232 panic("vclean: cannot reclaim"); 1233 if (active) 1234 vrele(vp); 1235 cache_purge(vp); 1236 if (vp->v_vnlock) { 1237#if 0 /* This is the only place we have LK_DRAINED in the entire kernel ??? */ 1238#ifdef DIAGNOSTIC 1239 if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0) 1240 vprint("vclean: lock not drained", vp); 1241#endif 1242#endif 1243 FREE(vp->v_vnlock, M_VNODE); 1244 vp->v_vnlock = NULL; 1245 } 1246 1247 /* 1248 * Done with purge, notify sleepers of the grim news. 1249 */ 1250 vp->v_op = dead_vnodeop_p; 1251 vn_pollgone(vp); 1252 vp->v_tag = VT_NON; 1253 vp->v_flag &= ~VXLOCK; 1254 if (vp->v_flag & VXWANT) { 1255 vp->v_flag &= ~VXWANT; 1256 wakeup((caddr_t) vp); 1257 } 1258} 1259 1260/* 1261 * Eliminate all activity associated with the requested vnode 1262 * and with all vnodes aliased to the requested vnode. 1263 */ 1264int 1265vop_revoke(ap) 1266 struct vop_revoke_args /* { 1267 struct vnode *a_vp; 1268 int a_flags; 1269 } */ *ap; 1270{ 1271 struct vnode *vp, *vq; 1272 struct proc *p = curproc; /* XXX */ 1273 1274#ifdef DIAGNOSTIC 1275 if ((ap->a_flags & REVOKEALL) == 0) 1276 panic("vop_revoke"); 1277#endif 1278 1279 vp = ap->a_vp; 1280 simple_lock(&vp->v_interlock); 1281 1282 if (vp->v_flag & VALIASED) { 1283 /* 1284 * If a vgone (or vclean) is already in progress, 1285 * wait until it is done and return. 1286 */ 1287 if (vp->v_flag & VXLOCK) { 1288 vp->v_flag |= VXWANT; 1289 simple_unlock(&vp->v_interlock); 1290 tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0); 1291 return (0); 1292 } 1293 /* 1294 * Ensure that vp will not be vgone'd while we 1295 * are eliminating its aliases. 1296 */ 1297 vp->v_flag |= VXLOCK; 1298 simple_unlock(&vp->v_interlock); 1299 while (vp->v_flag & VALIASED) { 1300 simple_lock(&spechash_slock); 1301 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1302 if (vq->v_rdev != vp->v_rdev || 1303 vq->v_type != vp->v_type || vp == vq) 1304 continue; 1305 simple_unlock(&spechash_slock); 1306 vgone(vq); 1307 break; 1308 } 1309 if (vq == NULLVP) { 1310 simple_unlock(&spechash_slock); 1311 } 1312 } 1313 /* 1314 * Remove the lock so that vgone below will 1315 * really eliminate the vnode after which time 1316 * vgone will awaken any sleepers. 1317 */ 1318 simple_lock(&vp->v_interlock); 1319 vp->v_flag &= ~VXLOCK; 1320 if (vp->v_flag & VXWANT) { 1321 vp->v_flag &= ~VXWANT; 1322 wakeup(vp); 1323 } 1324 } 1325 vgonel(vp, p); 1326 return (0); 1327} 1328 1329/* 1330 * Recycle an unused vnode to the front of the free list. 1331 * Release the passed interlock if the vnode will be recycled. 1332 */ 1333int 1334vrecycle(vp, inter_lkp, p) 1335 struct vnode *vp; 1336 struct simplelock *inter_lkp; 1337 struct proc *p; 1338{ 1339 1340 simple_lock(&vp->v_interlock); 1341 if (vp->v_usecount == 0) { 1342 if (inter_lkp) { 1343 simple_unlock(inter_lkp); 1344 } 1345 vgonel(vp, p); 1346 return (1); 1347 } 1348 simple_unlock(&vp->v_interlock); 1349 return (0); 1350} 1351 1352/* 1353 * Eliminate all activity associated with a vnode 1354 * in preparation for reuse. 1355 */ 1356void 1357vgone(vp) 1358 register struct vnode *vp; 1359{ 1360 struct proc *p = curproc; /* XXX */ 1361 1362 simple_lock(&vp->v_interlock); 1363 vgonel(vp, p); 1364} 1365 1366/* 1367 * vgone, with the vp interlock held. 1368 */ 1369static void 1370vgonel(vp, p) 1371 struct vnode *vp; 1372 struct proc *p; 1373{ 1374 int s; 1375 struct vnode *vq; 1376 struct vnode *vx; 1377 1378 /* 1379 * If a vgone (or vclean) is already in progress, 1380 * wait until it is done and return. 1381 */ 1382 if (vp->v_flag & VXLOCK) { 1383 vp->v_flag |= VXWANT; 1384 simple_unlock(&vp->v_interlock); 1385 tsleep((caddr_t)vp, PINOD, "vgone", 0); 1386 return; 1387 } 1388 1389 /* 1390 * Clean out the filesystem specific data. 1391 */ 1392 vclean(vp, DOCLOSE, p); 1393 1394 /* 1395 * Delete from old mount point vnode list, if on one. 1396 */ 1397 if (vp->v_mount != NULL) 1398 insmntque(vp, (struct mount *)0); 1399 /* 1400 * If special device, remove it from special device alias list 1401 * if it is on one. 1402 */ 1403 if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) { 1404 simple_lock(&spechash_slock); 1405 if (*vp->v_hashchain == vp) { 1406 *vp->v_hashchain = vp->v_specnext; 1407 } else { 1408 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1409 if (vq->v_specnext != vp) 1410 continue; 1411 vq->v_specnext = vp->v_specnext; 1412 break; 1413 } 1414 if (vq == NULL) 1415 panic("missing bdev"); 1416 } 1417 if (vp->v_flag & VALIASED) { 1418 vx = NULL; 1419 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1420 if (vq->v_rdev != vp->v_rdev || 1421 vq->v_type != vp->v_type) 1422 continue; 1423 if (vx) 1424 break; 1425 vx = vq; 1426 } 1427 if (vx == NULL) 1428 panic("missing alias"); 1429 if (vq == NULL) 1430 vx->v_flag &= ~VALIASED; 1431 vp->v_flag &= ~VALIASED; 1432 } 1433 simple_unlock(&spechash_slock); 1434 FREE(vp->v_specinfo, M_VNODE); 1435 vp->v_specinfo = NULL; 1436 } 1437 1438 /* 1439 * If it is on the freelist and not already at the head, 1440 * move it to the head of the list. The test of the back 1441 * pointer and the reference count of zero is because 1442 * it will be removed from the free list by getnewvnode, 1443 * but will not have its reference count incremented until 1444 * after calling vgone. If the reference count were 1445 * incremented first, vgone would (incorrectly) try to 1446 * close the previous instance of the underlying object. 1447 */ 1448 if (vp->v_usecount == 0 && !(vp->v_flag & VDOOMED)) { 1449 s = splbio(); 1450 simple_lock(&vnode_free_list_slock); 1451 if (vp->v_flag & VFREE) { 1452 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1453 } else if (vp->v_flag & VTBFREE) { 1454 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 1455 vp->v_flag &= ~VTBFREE; 1456 } 1457 vp->v_flag |= VFREE; 1458 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 1459 simple_unlock(&vnode_free_list_slock); 1460 splx(s); 1461 } 1462 1463 vp->v_type = VBAD; 1464 simple_unlock(&vp->v_interlock); 1465} 1466 1467/* 1468 * Lookup a vnode by device number. 1469 */ 1470int 1471vfinddev(dev, type, vpp) 1472 dev_t dev; 1473 enum vtype type; 1474 struct vnode **vpp; 1475{ 1476 register struct vnode *vp; 1477 int rc = 0; 1478 1479 simple_lock(&spechash_slock); 1480 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 1481 if (dev != vp->v_rdev || type != vp->v_type) 1482 continue; 1483 *vpp = vp; 1484 rc = 1; 1485 break; 1486 } 1487 simple_unlock(&spechash_slock); 1488 return (rc); 1489} 1490 1491/* 1492 * Calculate the total number of references to a special device. 1493 */ 1494int 1495vcount(vp) 1496 register struct vnode *vp; 1497{ 1498 struct vnode *vq, *vnext; 1499 int count; 1500 1501loop: 1502 if ((vp->v_flag & VALIASED) == 0) 1503 return (vp->v_usecount); 1504 simple_lock(&spechash_slock); 1505 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { 1506 vnext = vq->v_specnext; 1507 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 1508 continue; 1509 /* 1510 * Alias, but not in use, so flush it out. 1511 */ 1512 if (vq->v_usecount == 0 && vq != vp) { 1513 simple_unlock(&spechash_slock); 1514 vgone(vq); 1515 goto loop; 1516 } 1517 count += vq->v_usecount; 1518 } 1519 simple_unlock(&spechash_slock); 1520 return (count); 1521} 1522/* 1523 * Print out a description of a vnode. 1524 */ 1525static char *typename[] = 1526{"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"}; 1527 1528void 1529vprint(label, vp) 1530 char *label; 1531 register struct vnode *vp; 1532{ 1533 char buf[64]; 1534 1535 if (label != NULL) 1536 printf("%s: %x: ", label, vp); 1537 else 1538 printf("%x: ", vp); 1539 printf("type %s, usecount %d, writecount %d, refcount %ld,", 1540 typename[vp->v_type], vp->v_usecount, vp->v_writecount, 1541 vp->v_holdcnt); 1542 buf[0] = '\0'; 1543 if (vp->v_flag & VROOT) 1544 strcat(buf, "|VROOT"); 1545 if (vp->v_flag & VTEXT) 1546 strcat(buf, "|VTEXT"); 1547 if (vp->v_flag & VSYSTEM) 1548 strcat(buf, "|VSYSTEM"); 1549 if (vp->v_flag & VXLOCK) 1550 strcat(buf, "|VXLOCK"); 1551 if (vp->v_flag & VXWANT) 1552 strcat(buf, "|VXWANT"); 1553 if (vp->v_flag & VBWAIT) 1554 strcat(buf, "|VBWAIT"); 1555 if (vp->v_flag & VALIASED) 1556 strcat(buf, "|VALIASED"); 1557 if (vp->v_flag & VDOOMED) 1558 strcat(buf, "|VDOOMED"); 1559 if (vp->v_flag & VFREE) 1560 strcat(buf, "|VFREE"); 1561 if (vp->v_flag & VOBJBUF) 1562 strcat(buf, "|VOBJBUF"); 1563 if (buf[0] != '\0') 1564 printf(" flags (%s)", &buf[1]); 1565 if (vp->v_data == NULL) { 1566 printf("\n"); 1567 } else { 1568 printf("\n\t"); 1569 VOP_PRINT(vp); 1570 } 1571} 1572 1573#ifdef DDB 1574/* 1575 * List all of the locked vnodes in the system. 1576 * Called when debugging the kernel. 1577 */ 1578static void 1579printlockedvnodes() 1580{ 1581 struct proc *p = curproc; /* XXX */ 1582 struct mount *mp, *nmp; 1583 struct vnode *vp; 1584 1585 printf("Locked vnodes\n"); 1586 simple_lock(&mountlist_slock); 1587 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 1588 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { 1589 nmp = mp->mnt_list.cqe_next; 1590 continue; 1591 } 1592 for (vp = mp->mnt_vnodelist.lh_first; 1593 vp != NULL; 1594 vp = vp->v_mntvnodes.le_next) { 1595 if (VOP_ISLOCKED(vp)) 1596 vprint((char *)0, vp); 1597 } 1598 simple_lock(&mountlist_slock); 1599 nmp = mp->mnt_list.cqe_next; 1600 vfs_unbusy(mp, p); 1601 } 1602 simple_unlock(&mountlist_slock); 1603} 1604#endif 1605 1606/* 1607 * Top level filesystem related information gathering. 1608 */ 1609static int sysctl_ovfs_conf __P(SYSCTL_HANDLER_ARGS); 1610 1611static int 1612vfs_sysctl SYSCTL_HANDLER_ARGS 1613{ 1614 int *name = (int *)arg1 - 1; /* XXX */ 1615 u_int namelen = arg2 + 1; /* XXX */ 1616 struct vfsconf *vfsp; 1617 1618#ifndef NO_COMPAT_PRELITE2 1619 /* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. */ 1620 if (namelen == 1) 1621 return (sysctl_ovfs_conf(oidp, arg1, arg2, req)); 1622#endif 1623 1624#ifdef notyet 1625 /* all sysctl names at this level are at least name and field */ 1626 if (namelen < 2) 1627 return (ENOTDIR); /* overloaded */ 1628 if (name[0] != VFS_GENERIC) { 1629 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 1630 if (vfsp->vfc_typenum == name[0]) 1631 break; 1632 if (vfsp == NULL) 1633 return (EOPNOTSUPP); 1634 return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1, 1635 oldp, oldlenp, newp, newlen, p)); 1636 } 1637#endif 1638 switch (name[1]) { 1639 case VFS_MAXTYPENUM: 1640 if (namelen != 2) 1641 return (ENOTDIR); 1642 return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int))); 1643 case VFS_CONF: 1644 if (namelen != 3) 1645 return (ENOTDIR); /* overloaded */ 1646 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 1647 if (vfsp->vfc_typenum == name[2]) 1648 break; 1649 if (vfsp == NULL) 1650 return (EOPNOTSUPP); 1651 return (SYSCTL_OUT(req, vfsp, sizeof *vfsp)); 1652 } 1653 return (EOPNOTSUPP); 1654} 1655 1656SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD, vfs_sysctl, 1657 "Generic filesystem"); 1658 1659#ifndef NO_COMPAT_PRELITE2 1660 1661static int 1662sysctl_ovfs_conf SYSCTL_HANDLER_ARGS 1663{ 1664 int error; 1665 struct vfsconf *vfsp; 1666 struct ovfsconf ovfs; 1667 1668 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 1669 ovfs.vfc_vfsops = vfsp->vfc_vfsops; /* XXX used as flag */ 1670 strcpy(ovfs.vfc_name, vfsp->vfc_name); 1671 ovfs.vfc_index = vfsp->vfc_typenum; 1672 ovfs.vfc_refcount = vfsp->vfc_refcount; 1673 ovfs.vfc_flags = vfsp->vfc_flags; 1674 error = SYSCTL_OUT(req, &ovfs, sizeof ovfs); 1675 if (error) 1676 return error; 1677 } 1678 return 0; 1679} 1680 1681#endif /* !NO_COMPAT_PRELITE2 */ 1682 1683static volatile int kinfo_vdebug = 1; 1684 1685#if 0 1686#define KINFO_VNODESLOP 10 1687/* 1688 * Dump vnode list (via sysctl). 1689 * Copyout address of vnode followed by vnode. 1690 */ 1691/* ARGSUSED */ 1692static int 1693sysctl_vnode SYSCTL_HANDLER_ARGS 1694{ 1695 struct proc *p = curproc; /* XXX */ 1696 struct mount *mp, *nmp; 1697 struct vnode *nvp, *vp; 1698 int error; 1699 1700#define VPTRSZ sizeof (struct vnode *) 1701#define VNODESZ sizeof (struct vnode) 1702 1703 req->lock = 0; 1704 if (!req->oldptr) /* Make an estimate */ 1705 return (SYSCTL_OUT(req, 0, 1706 (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ))); 1707 1708 simple_lock(&mountlist_slock); 1709 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 1710 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { 1711 nmp = mp->mnt_list.cqe_next; 1712 continue; 1713 } 1714again: 1715 simple_lock(&mntvnode_slock); 1716 for (vp = mp->mnt_vnodelist.lh_first; 1717 vp != NULL; 1718 vp = nvp) { 1719 /* 1720 * Check that the vp is still associated with 1721 * this filesystem. RACE: could have been 1722 * recycled onto the same filesystem. 1723 */ 1724 if (vp->v_mount != mp) { 1725 simple_unlock(&mntvnode_slock); 1726 if (kinfo_vdebug) 1727 printf("kinfo: vp changed\n"); 1728 goto again; 1729 } 1730 nvp = vp->v_mntvnodes.le_next; 1731 simple_unlock(&mntvnode_slock); 1732 if ((error = SYSCTL_OUT(req, &vp, VPTRSZ)) || 1733 (error = SYSCTL_OUT(req, vp, VNODESZ))) 1734 return (error); 1735 simple_lock(&mntvnode_slock); 1736 } 1737 simple_unlock(&mntvnode_slock); 1738 simple_lock(&mountlist_slock); 1739 nmp = mp->mnt_list.cqe_next; 1740 vfs_unbusy(mp, p); 1741 } 1742 simple_unlock(&mountlist_slock); 1743 1744 return (0); 1745} 1746#endif 1747 1748/* 1749 * XXX 1750 * Exporting the vnode list on large systems causes them to crash. 1751 * Exporting the vnode list on medium systems causes sysctl to coredump. 1752 */ 1753#if 0 1754SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE|CTLFLAG_RD, 1755 0, 0, sysctl_vnode, "S,vnode", ""); 1756#endif 1757 1758/* 1759 * Check to see if a filesystem is mounted on a block device. 1760 */ 1761int 1762vfs_mountedon(vp) 1763 struct vnode *vp; 1764{ 1765 struct vnode *vq; 1766 int error = 0; 1767 1768 if (vp->v_specflags & SI_MOUNTEDON) 1769 return (EBUSY); 1770 if (vp->v_flag & VALIASED) { 1771 simple_lock(&spechash_slock); 1772 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1773 if (vq->v_rdev != vp->v_rdev || 1774 vq->v_type != vp->v_type) 1775 continue; 1776 if (vq->v_specflags & SI_MOUNTEDON) { 1777 error = EBUSY; 1778 break; 1779 } 1780 } 1781 simple_unlock(&spechash_slock); 1782 } 1783 return (error); 1784} 1785 1786/* 1787 * Unmount all filesystems. The list is traversed in reverse order 1788 * of mounting to avoid dependencies. 1789 */ 1790void 1791vfs_unmountall() 1792{ 1793 struct mount *mp, *nmp; 1794 struct proc *p = initproc; /* XXX XXX should this be proc0? */ 1795 int error; 1796 1797 /* 1798 * Since this only runs when rebooting, it is not interlocked. 1799 */ 1800 for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { 1801 nmp = mp->mnt_list.cqe_prev; 1802 error = dounmount(mp, MNT_FORCE, p); 1803 if (error) { 1804 printf("unmount of %s failed (", 1805 mp->mnt_stat.f_mntonname); 1806 if (error == EBUSY) 1807 printf("BUSY)\n"); 1808 else 1809 printf("%d)\n", error); 1810 } 1811 } 1812} 1813 1814/* 1815 * Build hash lists of net addresses and hang them off the mount point. 1816 * Called by ufs_mount() to set up the lists of export addresses. 1817 */ 1818static int 1819vfs_hang_addrlist(mp, nep, argp) 1820 struct mount *mp; 1821 struct netexport *nep; 1822 struct export_args *argp; 1823{ 1824 register struct netcred *np; 1825 register struct radix_node_head *rnh; 1826 register int i; 1827 struct radix_node *rn; 1828 struct sockaddr *saddr, *smask = 0; 1829 struct domain *dom; 1830 int error; 1831 1832 if (argp->ex_addrlen == 0) { 1833 if (mp->mnt_flag & MNT_DEFEXPORTED) 1834 return (EPERM); 1835 np = &nep->ne_defexported; 1836 np->netc_exflags = argp->ex_flags; 1837 np->netc_anon = argp->ex_anon; 1838 np->netc_anon.cr_ref = 1; 1839 mp->mnt_flag |= MNT_DEFEXPORTED; 1840 return (0); 1841 } 1842 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 1843 np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK); 1844 bzero((caddr_t) np, i); 1845 saddr = (struct sockaddr *) (np + 1); 1846 if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen))) 1847 goto out; 1848 if (saddr->sa_len > argp->ex_addrlen) 1849 saddr->sa_len = argp->ex_addrlen; 1850 if (argp->ex_masklen) { 1851 smask = (struct sockaddr *) ((caddr_t) saddr + argp->ex_addrlen); 1852 error = copyin(argp->ex_mask, (caddr_t) smask, argp->ex_masklen); 1853 if (error) 1854 goto out; 1855 if (smask->sa_len > argp->ex_masklen) 1856 smask->sa_len = argp->ex_masklen; 1857 } 1858 i = saddr->sa_family; 1859 if ((rnh = nep->ne_rtable[i]) == 0) { 1860 /* 1861 * Seems silly to initialize every AF when most are not used, 1862 * do so on demand here 1863 */ 1864 for (dom = domains; dom; dom = dom->dom_next) 1865 if (dom->dom_family == i && dom->dom_rtattach) { 1866 dom->dom_rtattach((void **) &nep->ne_rtable[i], 1867 dom->dom_rtoffset); 1868 break; 1869 } 1870 if ((rnh = nep->ne_rtable[i]) == 0) { 1871 error = ENOBUFS; 1872 goto out; 1873 } 1874 } 1875 rn = (*rnh->rnh_addaddr) ((caddr_t) saddr, (caddr_t) smask, rnh, 1876 np->netc_rnodes); 1877 if (rn == 0 || np != (struct netcred *) rn) { /* already exists */ 1878 error = EPERM; 1879 goto out; 1880 } 1881 np->netc_exflags = argp->ex_flags; 1882 np->netc_anon = argp->ex_anon; 1883 np->netc_anon.cr_ref = 1; 1884 return (0); 1885out: 1886 free(np, M_NETADDR); 1887 return (error); 1888} 1889 1890/* ARGSUSED */ 1891static int 1892vfs_free_netcred(rn, w) 1893 struct radix_node *rn; 1894 void *w; 1895{ 1896 register struct radix_node_head *rnh = (struct radix_node_head *) w; 1897 1898 (*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh); 1899 free((caddr_t) rn, M_NETADDR); 1900 return (0); 1901} 1902 1903/* 1904 * Free the net address hash lists that are hanging off the mount points. 1905 */ 1906static void 1907vfs_free_addrlist(nep) 1908 struct netexport *nep; 1909{ 1910 register int i; 1911 register struct radix_node_head *rnh; 1912 1913 for (i = 0; i <= AF_MAX; i++) 1914 if ((rnh = nep->ne_rtable[i])) { 1915 (*rnh->rnh_walktree) (rnh, vfs_free_netcred, 1916 (caddr_t) rnh); 1917 free((caddr_t) rnh, M_RTABLE); 1918 nep->ne_rtable[i] = 0; 1919 } 1920} 1921 1922int 1923vfs_export(mp, nep, argp) 1924 struct mount *mp; 1925 struct netexport *nep; 1926 struct export_args *argp; 1927{ 1928 int error; 1929 1930 if (argp->ex_flags & MNT_DELEXPORT) { 1931 if (mp->mnt_flag & MNT_EXPUBLIC) { 1932 vfs_setpublicfs(NULL, NULL, NULL); 1933 mp->mnt_flag &= ~MNT_EXPUBLIC; 1934 } 1935 vfs_free_addrlist(nep); 1936 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 1937 } 1938 if (argp->ex_flags & MNT_EXPORTED) { 1939 if (argp->ex_flags & MNT_EXPUBLIC) { 1940 if ((error = vfs_setpublicfs(mp, nep, argp)) != 0) 1941 return (error); 1942 mp->mnt_flag |= MNT_EXPUBLIC; 1943 } 1944 if ((error = vfs_hang_addrlist(mp, nep, argp))) 1945 return (error); 1946 mp->mnt_flag |= MNT_EXPORTED; 1947 } 1948 return (0); 1949} 1950 1951 1952/* 1953 * Set the publicly exported filesystem (WebNFS). Currently, only 1954 * one public filesystem is possible in the spec (RFC 2054 and 2055) 1955 */ 1956int 1957vfs_setpublicfs(mp, nep, argp) 1958 struct mount *mp; 1959 struct netexport *nep; 1960 struct export_args *argp; 1961{ 1962 int error; 1963 struct vnode *rvp; 1964 char *cp; 1965 1966 /* 1967 * mp == NULL -> invalidate the current info, the FS is 1968 * no longer exported. May be called from either vfs_export 1969 * or unmount, so check if it hasn't already been done. 1970 */ 1971 if (mp == NULL) { 1972 if (nfs_pub.np_valid) { 1973 nfs_pub.np_valid = 0; 1974 if (nfs_pub.np_index != NULL) { 1975 FREE(nfs_pub.np_index, M_TEMP); 1976 nfs_pub.np_index = NULL; 1977 } 1978 } 1979 return (0); 1980 } 1981 1982 /* 1983 * Only one allowed at a time. 1984 */ 1985 if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount) 1986 return (EBUSY); 1987 1988 /* 1989 * Get real filehandle for root of exported FS. 1990 */ 1991 bzero((caddr_t)&nfs_pub.np_handle, sizeof(nfs_pub.np_handle)); 1992 nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid; 1993 1994 if ((error = VFS_ROOT(mp, &rvp))) 1995 return (error); 1996 1997 if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid))) 1998 return (error); 1999 2000 vput(rvp); 2001 2002 /* 2003 * If an indexfile was specified, pull it in. 2004 */ 2005 if (argp->ex_indexfile != NULL) { 2006 MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP, 2007 M_WAITOK); 2008 error = copyinstr(argp->ex_indexfile, nfs_pub.np_index, 2009 MAXNAMLEN, (size_t *)0); 2010 if (!error) { 2011 /* 2012 * Check for illegal filenames. 2013 */ 2014 for (cp = nfs_pub.np_index; *cp; cp++) { 2015 if (*cp == '/') { 2016 error = EINVAL; 2017 break; 2018 } 2019 } 2020 } 2021 if (error) { 2022 FREE(nfs_pub.np_index, M_TEMP); 2023 return (error); 2024 } 2025 } 2026 2027 nfs_pub.np_mount = mp; 2028 nfs_pub.np_valid = 1; 2029 return (0); 2030} 2031 2032struct netcred * 2033vfs_export_lookup(mp, nep, nam) 2034 register struct mount *mp; 2035 struct netexport *nep; 2036 struct sockaddr *nam; 2037{ 2038 register struct netcred *np; 2039 register struct radix_node_head *rnh; 2040 struct sockaddr *saddr; 2041 2042 np = NULL; 2043 if (mp->mnt_flag & MNT_EXPORTED) { 2044 /* 2045 * Lookup in the export list first. 2046 */ 2047 if (nam != NULL) { 2048 saddr = nam; 2049 rnh = nep->ne_rtable[saddr->sa_family]; 2050 if (rnh != NULL) { 2051 np = (struct netcred *) 2052 (*rnh->rnh_matchaddr)((caddr_t)saddr, 2053 rnh); 2054 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 2055 np = NULL; 2056 } 2057 } 2058 /* 2059 * If no address match, use the default if it exists. 2060 */ 2061 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 2062 np = &nep->ne_defexported; 2063 } 2064 return (np); 2065} 2066 2067/* 2068 * perform msync on all vnodes under a mount point 2069 * the mount point must be locked. 2070 */ 2071void 2072vfs_msync(struct mount *mp, int flags) { 2073 struct vnode *vp, *nvp; 2074 int anyio, tries; 2075 2076 tries = 5; 2077loop: 2078 anyio = 0; 2079 for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { 2080 2081 nvp = vp->v_mntvnodes.le_next; 2082 2083 if (vp->v_mount != mp) { 2084 goto loop; 2085 } 2086 2087 if ((vp->v_flag & VXLOCK) || 2088 (VOP_ISLOCKED(vp) && (flags != MNT_WAIT))) { 2089 continue; 2090 } 2091 2092 simple_lock(&vp->v_interlock); 2093 if (vp->v_object && 2094 (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) { 2095 if (!vget(vp, 2096 LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY | LK_NOOBJ, curproc)) { 2097 if (vp->v_object) { 2098 vm_object_page_clean(vp->v_object, 0, 0, TRUE); 2099 anyio = 1; 2100 } 2101 vput(vp); 2102 } 2103 } else { 2104 simple_unlock(&vp->v_interlock); 2105 } 2106 } 2107 if (anyio && (--tries > 0)) 2108 goto loop; 2109} 2110 2111/* 2112 * Create the VM object needed for VMIO and mmap support. This 2113 * is done for all VREG files in the system. Some filesystems might 2114 * afford the additional metadata buffering capability of the 2115 * VMIO code by making the device node be VMIO mode also. 2116 * 2117 * If !waslocked, must be called with interlock. 2118 */ 2119int 2120vfs_object_create(vp, p, cred, waslocked) 2121 struct vnode *vp; 2122 struct proc *p; 2123 struct ucred *cred; 2124 int waslocked; 2125{ 2126 struct vattr vat; 2127 vm_object_t object; 2128 int error = 0; 2129 2130 if ((vp->v_type != VREG) && (vp->v_type != VBLK)) { 2131 return 0; 2132 } 2133 2134 if (!waslocked) 2135 vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK | LK_RETRY, p); 2136 2137retry: 2138 if ((object = vp->v_object) == NULL) { 2139 if (vp->v_type == VREG) { 2140 if ((error = VOP_GETATTR(vp, &vat, cred, p)) != 0) 2141 goto retn; 2142 object = vnode_pager_alloc(vp, 2143 OFF_TO_IDX(round_page(vat.va_size)), 0, 0); 2144 } else if (major(vp->v_rdev) < nblkdev) { 2145 /* 2146 * This simply allocates the biggest object possible 2147 * for a VBLK vnode. This should be fixed, but doesn't 2148 * cause any problems (yet). 2149 */ 2150 object = vnode_pager_alloc(vp, INT_MAX, 0, 0); 2151 } 2152 object->ref_count--; 2153 vp->v_usecount--; 2154 } else { 2155 if (object->flags & OBJ_DEAD) { 2156 VOP_UNLOCK(vp, 0, p); 2157 tsleep(object, PVM, "vodead", 0); 2158 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 2159 goto retry; 2160 } 2161 } 2162 2163 if (vp->v_object) { 2164 vp->v_flag |= VOBJBUF; 2165 } 2166 2167retn: 2168 if (!waslocked) { 2169 simple_lock(&vp->v_interlock); 2170 VOP_UNLOCK(vp, LK_INTERLOCK, p); 2171 } 2172 2173 return error; 2174} 2175 2176static void 2177vfree(vp) 2178 struct vnode *vp; 2179{ 2180 int s; 2181 2182 s = splbio(); 2183 simple_lock(&vnode_free_list_slock); 2184 if (vp->v_flag & VTBFREE) { 2185 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 2186 vp->v_flag &= ~VTBFREE; 2187 } 2188 if (vp->v_flag & VAGE) { 2189 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 2190 } else { 2191 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 2192 } 2193 freevnodes++; 2194 simple_unlock(&vnode_free_list_slock); 2195 vp->v_flag &= ~VAGE; 2196 vp->v_flag |= VFREE; 2197 splx(s); 2198} 2199 2200void 2201vbusy(vp) 2202 struct vnode *vp; 2203{ 2204 int s; 2205 2206 s = splbio(); 2207 simple_lock(&vnode_free_list_slock); 2208 if (vp->v_flag & VTBFREE) { 2209 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 2210 vp->v_flag &= ~VTBFREE; 2211 } else { 2212 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 2213 freevnodes--; 2214 } 2215 simple_unlock(&vnode_free_list_slock); 2216 vp->v_flag &= ~VFREE; 2217 splx(s); 2218} 2219 2220/* 2221 * Record a process's interest in events which might happen to 2222 * a vnode. Because poll uses the historic select-style interface 2223 * internally, this routine serves as both the ``check for any 2224 * pending events'' and the ``record my interest in future events'' 2225 * functions. (These are done together, while the lock is held, 2226 * to avoid race conditions.) 2227 */ 2228int 2229vn_pollrecord(vp, p, events) 2230 struct vnode *vp; 2231 struct proc *p; 2232 short events; 2233{ 2234 simple_lock(&vp->v_pollinfo.vpi_lock); 2235 if (vp->v_pollinfo.vpi_revents & events) { 2236 /* 2237 * This leaves events we are not interested 2238 * in available for the other process which 2239 * which presumably had requested them 2240 * (otherwise they would never have been 2241 * recorded). 2242 */ 2243 events &= vp->v_pollinfo.vpi_revents; 2244 vp->v_pollinfo.vpi_revents &= ~events; 2245 2246 simple_unlock(&vp->v_pollinfo.vpi_lock); 2247 return events; 2248 } 2249 vp->v_pollinfo.vpi_events |= events; 2250 selrecord(p, &vp->v_pollinfo.vpi_selinfo); 2251 simple_unlock(&vp->v_pollinfo.vpi_lock); 2252 return 0; 2253} 2254 2255/* 2256 * Note the occurrence of an event. If the VN_POLLEVENT macro is used, 2257 * it is possible for us to miss an event due to race conditions, but 2258 * that condition is expected to be rare, so for the moment it is the 2259 * preferred interface. 2260 */ 2261void 2262vn_pollevent(vp, events) 2263 struct vnode *vp; 2264 short events; 2265{ 2266 simple_lock(&vp->v_pollinfo.vpi_lock); 2267 if (vp->v_pollinfo.vpi_events & events) { 2268 /* 2269 * We clear vpi_events so that we don't 2270 * call selwakeup() twice if two events are 2271 * posted before the polling process(es) is 2272 * awakened. This also ensures that we take at 2273 * most one selwakeup() if the polling process 2274 * is no longer interested. However, it does 2275 * mean that only one event can be noticed at 2276 * a time. (Perhaps we should only clear those 2277 * event bits which we note?) XXX 2278 */ 2279 vp->v_pollinfo.vpi_events = 0; /* &= ~events ??? */ 2280 vp->v_pollinfo.vpi_revents |= events; 2281 selwakeup(&vp->v_pollinfo.vpi_selinfo); 2282 } 2283 simple_unlock(&vp->v_pollinfo.vpi_lock); 2284} 2285 2286/* 2287 * Wake up anyone polling on vp because it is being revoked. 2288 * This depends on dead_poll() returning POLLHUP for correct 2289 * behavior. 2290 */ 2291void 2292vn_pollgone(vp) 2293 struct vnode *vp; 2294{ 2295 simple_lock(&vp->v_pollinfo.vpi_lock); 2296 if (vp->v_pollinfo.vpi_events) { 2297 vp->v_pollinfo.vpi_events = 0; 2298 selwakeup(&vp->v_pollinfo.vpi_selinfo); 2299 } 2300 simple_unlock(&vp->v_pollinfo.vpi_lock); 2301} 2302