vfs_subr.c revision 1.30
1/* $OpenBSD: vfs_subr.c,v 1.30 1998/12/28 19:35:35 art Exp $ */ 2/* $NetBSD: vfs_subr.c,v 1.53 1996/04/22 01:39:13 christos Exp $ */ 3 4/* 5 * Copyright (c) 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * (c) UNIX System Laboratories, Inc. 8 * All or some portions of this file are derived from material licensed 9 * to the University of California by American Telephone and Telegraph 10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 11 * the permission of UNIX System Laboratories, Inc. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. All advertising materials mentioning features or use of this software 22 * must display the following acknowledgement: 23 * This product includes software developed by the University of 24 * California, Berkeley and its contributors. 25 * 4. Neither the name of the University nor the names of its contributors 26 * may be used to endorse or promote products derived from this software 27 * without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 39 * SUCH DAMAGE. 40 * 41 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 42 */ 43 44/* 45 * External virtual filesystem routines 46 */ 47 48#include <sys/param.h> 49#include <sys/systm.h> 50#include <sys/proc.h> 51#include <sys/mount.h> 52#include <sys/time.h> 53#include <sys/fcntl.h> 54#include <sys/kernel.h> 55#include <sys/vnode.h> 56#include <sys/stat.h> 57#include <sys/namei.h> 58#include <sys/ucred.h> 59#include <sys/buf.h> 60#include <sys/errno.h> 61#include <sys/malloc.h> 62#include <sys/domain.h> 63#include <sys/mbuf.h> 64#include <sys/syscallargs.h> 65 66#include <vm/vm.h> 67#include <sys/sysctl.h> 68 69#include <miscfs/specfs/specdev.h> 70 71enum vtype iftovt_tab[16] = { 72 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 73 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 74}; 75int vttoif_tab[9] = { 76 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 77 S_IFSOCK, S_IFIFO, S_IFMT, 78}; 79 80int doforce = 1; /* 1 => permit forcible unmounting */ 81int prtactive = 0; /* 1 => print out reclaim of active vnodes */ 82int suid_clear = 1; /* 1 => clear SUID / SGID on owner change */ 83 84/* 85 * Insq/Remq for the vnode usage lists. 86 */ 87#define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) 88#define bufremvn(bp) { \ 89 LIST_REMOVE(bp, b_vnbufs); \ 90 (bp)->b_vnbufs.le_next = NOLIST; \ 91} 92 93struct freelst vnode_hold_list; /* list of vnodes referencing buffers */ 94struct freelst vnode_free_list; /* vnode free list */ 95 96struct mntlist mountlist; /* mounted filesystem list */ 97struct simplelock mountlist_slock; 98static struct simplelock mntid_slock; 99struct simplelock mntvnode_slock; 100struct simplelock vnode_free_list_slock; 101struct simplelock spechash_slock; 102 103 104void insmntque __P((struct vnode *, struct mount *)); 105int getdevvp __P((dev_t, struct vnode **, enum vtype)); 106 107int vfs_hang_addrlist __P((struct mount *, struct netexport *, 108 struct export_args *)); 109int vfs_free_netcred __P((struct radix_node *, void *)); 110void vfs_free_addrlist __P((struct netexport *)); 111static __inline__ void vputonfreelist __P((struct vnode *)); 112 113#ifdef DEBUG 114void printlockedvnodes __P((void)); 115#endif 116 117/* 118 * Initialize the vnode management data structures. 119 */ 120void 121vntblinit() 122{ 123 124 simple_lock_init(&mntvnode_slock); 125 simple_lock_init(&mntid_slock); 126 simple_lock_init(&spechash_slock); 127 TAILQ_INIT(&vnode_hold_list); 128 TAILQ_INIT(&vnode_free_list); 129 simple_lock_init(&vnode_free_list_slock); 130 CIRCLEQ_INIT(&mountlist); 131 /* 132 * Initialize the filesystem syncer. 133 */ 134 vn_initialize_syncerd(); 135} 136 137 138/* 139 * Mark a mount point as busy. Used to synchronize access and to delay 140 * unmounting. Interlock is not released on failure. 141 */ 142 143int 144vfs_busy(mp, flags, interlkp, p) 145 struct mount *mp; 146 int flags; 147 struct simplelock *interlkp; 148 struct proc *p; 149{ 150 int lkflags; 151 152 if (mp->mnt_flag & MNT_UNMOUNT) { 153 if (flags & LK_NOWAIT) 154 return (ENOENT); 155 mp->mnt_flag |= MNT_MWAIT; 156 if (interlkp) 157 simple_unlock(interlkp); 158 /* 159 * Since all busy locks are shared except the exclusive 160 * lock granted when unmounting, the only place that a 161 * wakeup needs to be done is at the release of the 162 * exclusive lock at the end of dounmount. 163 */ 164 sleep((caddr_t)mp, PVFS); 165 if (interlkp) 166 simple_lock(interlkp); 167 return (ENOENT); 168 } 169 lkflags = LK_SHARED; 170 if (interlkp) 171 lkflags |= LK_INTERLOCK; 172 if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p)) 173 panic("vfs_busy: unexpected lock failure"); 174 return (0); 175} 176 177 178/* 179 * Free a busy file system 180 */ 181void 182vfs_unbusy(mp, p) 183 struct mount *mp; 184 struct proc *p; 185{ 186 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p); 187} 188 189/* 190 * Lookup a filesystem type, and if found allocate and initialize 191 * a mount structure for it. 192 * 193 * Devname is usually updated by mount(8) after booting. 194 */ 195 196int 197vfs_rootmountalloc(fstypename, devname, mpp) 198 char *fstypename; 199 char *devname; 200 struct mount **mpp; 201{ 202 struct proc *p = curproc; /* XXX */ 203 struct vfsconf *vfsp; 204 struct mount *mp; 205 206 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 207 if (!strcmp(vfsp->vfc_name, fstypename)) 208 break; 209 if (vfsp == NULL) 210 return (ENODEV); 211 mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); 212 bzero((char *)mp, (u_long)sizeof(struct mount)); 213 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0); 214 (void)vfs_busy(mp, LK_NOWAIT, 0, p); 215 LIST_INIT(&mp->mnt_vnodelist); 216 mp->mnt_vfc = vfsp; 217 mp->mnt_op = vfsp->vfc_vfsops; 218 mp->mnt_flag = MNT_RDONLY; 219 mp->mnt_vnodecovered = NULLVP; 220 vfsp->vfc_refcount++; 221 mp->mnt_stat.f_type = vfsp->vfc_typenum; 222 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 223 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 224 mp->mnt_stat.f_mntonname[0] = '/'; 225 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); 226 *mpp = mp; 227 return (0); 228 } 229 230/* 231 * Find an appropriate filesystem to use for the root. If a filesystem 232 * has not been preselected, walk through the list of known filesystems 233 * trying those that have mountroot routines, and try them until one 234 * works or we have tried them all. 235 */ 236int 237vfs_mountroot() 238{ 239 struct vfsconf *vfsp; 240 extern int (*mountroot)(void); 241 int error; 242 243 if (mountroot != NULL) 244 return ((*mountroot)()); 245 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 246 if (vfsp->vfc_mountroot == NULL) 247 continue; 248 if ((error = (*vfsp->vfc_mountroot)()) == 0) 249 return (0); 250 printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error); 251 } 252 return (ENODEV); 253} 254 255/* 256 * Lookup a mount point by filesystem identifier. 257 */ 258struct mount * 259vfs_getvfs(fsid) 260 fsid_t *fsid; 261{ 262 register struct mount *mp; 263 264 simple_lock(&mountlist_slock); 265 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; 266 mp = mp->mnt_list.cqe_next) { 267 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 268 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { 269 simple_unlock(&mountlist_slock); 270 return (mp); 271 } 272 } 273 simple_unlock(&mountlist_slock); 274 return ((struct mount *)0); 275} 276 277 278/* 279 * Get a new unique fsid 280 */ 281void 282vfs_getnewfsid(mp) 283 struct mount *mp; 284{ 285 static u_short xxxfs_mntid; 286 287 fsid_t tfsid; 288 int mtype; 289 290 simple_lock(&mntid_slock); 291 mtype = mp->mnt_vfc->vfc_typenum; 292 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0); 293 mp->mnt_stat.f_fsid.val[1] = mtype; 294 if (xxxfs_mntid == 0) 295 ++xxxfs_mntid; 296 tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid); 297 tfsid.val[1] = mtype; 298 if (mountlist.cqh_first != (void *)&mountlist) { 299 while (vfs_getvfs(&tfsid)) { 300 tfsid.val[0]++; 301 xxxfs_mntid++; 302 } 303 } 304 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 305 simple_unlock(&mntid_slock); 306} 307 308/* 309 * Make a 'unique' number from a mount type name. 310 * Note that this is no longer used for ffs which 311 * now has an on-disk filesystem id. 312 */ 313long 314makefstype(type) 315 char *type; 316{ 317 long rv; 318 319 for (rv = 0; *type; type++) { 320 rv <<= 2; 321 rv ^= *type; 322 } 323 return rv; 324} 325 326/* 327 * Set vnode attributes to VNOVAL 328 */ 329void 330vattr_null(vap) 331 register struct vattr *vap; 332{ 333 334 vap->va_type = VNON; 335 /* XXX These next two used to be one line, but for a GCC bug. */ 336 vap->va_size = VNOVAL; 337 vap->va_bytes = VNOVAL; 338 vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid = 339 vap->va_fsid = vap->va_fileid = 340 vap->va_blocksize = vap->va_rdev = 341 vap->va_atime.tv_sec = vap->va_atime.tv_nsec = 342 vap->va_mtime.tv_sec = vap->va_mtime.tv_nsec = 343 vap->va_ctime.tv_sec = vap->va_ctime.tv_nsec = 344 vap->va_flags = vap->va_gen = VNOVAL; 345 vap->va_vaflags = 0; 346} 347 348/* 349 * Routines having to do with the management of the vnode table. 350 */ 351extern int (**dead_vnodeop_p) __P((void *)); 352long numvnodes; 353 354/* 355 * Return the next vnode from the free list. 356 */ 357int 358getnewvnode(tag, mp, vops, vpp) 359 enum vtagtype tag; 360 struct mount *mp; 361 int (**vops) __P((void *)); 362 struct vnode **vpp; 363{ 364 struct proc *p = curproc; /* XXX */ 365 struct freelst *listhd; 366 static int toggle; 367 struct vnode *vp; 368#ifdef DIAGNOSTIC 369 int s; 370#endif 371 372 /* 373 * We must choose whether to allocate a new vnode or recycle an 374 * existing one. The criterion for allocating a new one is that 375 * the total number of vnodes is less than the number desired or 376 * there are no vnodes on either free list. Generally we only 377 * want to recycle vnodes that have no buffers associated with 378 * them, so we look first on the vnode_free_list. If it is empty, 379 * we next consider vnodes with referencing buffers on the 380 * vnode_hold_list. The toggle ensures that half the time we 381 * will use a buffer from the vnode_hold_list, and half the time 382 * we will allocate a new one unless the list has grown to twice 383 * the desired size. We are reticent to recycle vnodes from the 384 * vnode_hold_list because we will lose the identity of all its 385 * referencing buffers. 386 */ 387 toggle ^= 1; 388 if (numvnodes > 2 * desiredvnodes) 389 toggle = 0; 390 391 392 simple_lock(&vnode_free_list_slock); 393 if ((numvnodes < desiredvnodes) || 394 ((TAILQ_FIRST(listhd = &vnode_free_list) == NULL) && 395 ((TAILQ_FIRST(listhd = &vnode_hold_list) == NULL) || toggle))) { 396 simple_unlock(&vnode_free_list_slock); 397 vp = (struct vnode *)malloc((u_long)sizeof *vp, 398 M_VNODE, M_WAITOK); 399 bzero((char *)vp, sizeof *vp); 400 numvnodes++; 401 } else { 402 for (vp = TAILQ_FIRST(listhd); vp != NULLVP; 403 vp = TAILQ_NEXT(vp, v_freelist)) { 404 if (simple_lock_try(&vp->v_interlock)) 405 break; 406 } 407 /* 408 * Unless this is a bad time of the month, at most 409 * the first NCPUS items on the free list are 410 * locked, so this is close enough to being empty. 411 */ 412 if (vp == NULLVP) { 413 simple_unlock(&vnode_free_list_slock); 414 tablefull("vnode"); 415 *vpp = 0; 416 return (ENFILE); 417 } 418 if (vp->v_usecount) { 419 vprint("free vnode", vp); 420 panic("free vnode isn't"); 421 } 422 423 TAILQ_REMOVE(listhd, vp, v_freelist); 424 vp->v_flag &= ~VONFREELIST; 425 426 simple_unlock(&vnode_free_list_slock); 427 vp->v_lease = NULL; 428 if (vp->v_type != VBAD) 429 vgonel(vp, p); 430 else 431 simple_unlock(&vp->v_interlock); 432#ifdef DIAGNOSTIC 433 if (vp->v_data) { 434 vprint("cleaned vnode", vp); 435 panic("cleaned vnode isn't"); 436 } 437 s = splbio(); 438 if (vp->v_numoutput) 439 panic("Clean vnode has pending I/O's"); 440 splx(s); 441#endif 442 vp->v_flag = 0; 443 vp->v_lastr = 0; 444 vp->v_ralen = 0; 445 vp->v_maxra = 0; 446 vp->v_lastw = 0; 447 vp->v_lasta = 0; 448 vp->v_cstart = 0; 449 vp->v_clen = 0; 450 vp->v_socket = 0; 451 } 452 vp->v_type = VNON; 453 cache_purge(vp); 454 vp->v_tag = tag; 455 vp->v_op = vops; 456 insmntque(vp, mp); 457 *vpp = vp; 458 vp->v_usecount = 1; 459 vp->v_data = 0; 460 return (0); 461} 462 463/* 464 * Move a vnode from one mount queue to another. 465 */ 466void 467insmntque(vp, mp) 468 register struct vnode *vp; 469 register struct mount *mp; 470{ 471 simple_lock(&mntvnode_slock); 472 /* 473 * Delete from old mount point vnode list, if on one. 474 */ 475 476 if (vp->v_mount != NULL) 477 LIST_REMOVE(vp, v_mntvnodes); 478 /* 479 * Insert into list of vnodes for the new mount point, if available. 480 */ 481 if ((vp->v_mount = mp) != NULL) 482 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 483 simple_unlock(&mntvnode_slock); 484} 485 486 487/* 488 * Create a vnode for a block device. 489 * Used for root filesystem, argdev, and swap areas. 490 * Also used for memory file system special devices. 491 */ 492int 493bdevvp(dev, vpp) 494 dev_t dev; 495 struct vnode **vpp; 496{ 497 498 return (getdevvp(dev, vpp, VBLK)); 499} 500 501/* 502 * Create a vnode for a character device. 503 * Used for kernfs and some console handling. 504 */ 505int 506cdevvp(dev, vpp) 507 dev_t dev; 508 struct vnode **vpp; 509{ 510 511 return (getdevvp(dev, vpp, VCHR)); 512} 513 514/* 515 * Create a vnode for a device. 516 * Used by bdevvp (block device) for root file system etc., 517 * and by cdevvp (character device) for console and kernfs. 518 */ 519int 520getdevvp(dev, vpp, type) 521 dev_t dev; 522 struct vnode **vpp; 523 enum vtype type; 524{ 525 register struct vnode *vp; 526 struct vnode *nvp; 527 int error; 528 529 if (dev == NODEV) { 530 *vpp = NULLVP; 531 return (0); 532 } 533 error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp); 534 if (error) { 535 *vpp = NULLVP; 536 return (error); 537 } 538 vp = nvp; 539 vp->v_type = type; 540 if ((nvp = checkalias(vp, dev, NULL)) != 0) { 541 vput(vp); 542 vp = nvp; 543 } 544 *vpp = vp; 545 return (0); 546} 547 548/* 549 * Check to see if the new vnode represents a special device 550 * for which we already have a vnode (either because of 551 * bdevvp() or because of a different vnode representing 552 * the same block device). If such an alias exists, deallocate 553 * the existing contents and return the aliased vnode. The 554 * caller is responsible for filling it with its new contents. 555 */ 556struct vnode * 557checkalias(nvp, nvp_rdev, mp) 558 register struct vnode *nvp; 559 dev_t nvp_rdev; 560 struct mount *mp; 561{ 562 struct proc *p = curproc; 563 register struct vnode *vp; 564 struct vnode **vpp; 565 566 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 567 return (NULLVP); 568 569 vpp = &speclisth[SPECHASH(nvp_rdev)]; 570loop: 571 simple_lock(&spechash_slock); 572 for (vp = *vpp; vp; vp = vp->v_specnext) { 573 simple_lock(&vp->v_interlock); 574 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) { 575 simple_unlock(&vp->v_interlock); 576 continue; 577 } 578 /* 579 * Alias, but not in use, so flush it out. 580 */ 581 if (vp->v_usecount == 0) { 582 simple_unlock(&spechash_slock); 583 vgonel(vp, p); 584 goto loop; 585 } 586 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) { 587 simple_unlock(&spechash_slock); 588 goto loop; 589 } 590 break; 591 } 592 593 594 /* 595 * Common case is actually in the if statement 596 */ 597 if (vp == NULL || !(vp->v_tag == VT_NON && vp->v_type == VBLK)) { 598 MALLOC(nvp->v_specinfo, struct specinfo *, 599 sizeof(struct specinfo), M_VNODE, M_WAITOK); 600 nvp->v_rdev = nvp_rdev; 601 nvp->v_hashchain = vpp; 602 nvp->v_specnext = *vpp; 603 nvp->v_specmountpoint = NULL; 604 nvp->v_speclockf = NULL; 605 simple_unlock(&spechash_slock); 606 *vpp = nvp; 607 if (vp != NULLVP) { 608 nvp->v_flag |= VALIASED; 609 vp->v_flag |= VALIASED; 610 vput(vp); 611 } 612 return (NULLVP); 613 } 614 615 /* 616 * This code is the uncommon case. It is called in case 617 * we found an alias that was VT_NON && vtype of VBLK 618 * This means we found a block device that was created 619 * using bdevvp. 620 * An example of such a vnode is the root partition device vnode 621 * craeted in ffs_mountroot. 622 * 623 * The vnodes created by bdevvp should not be aliased (why?). 624 */ 625 626 simple_unlock(&spechash_slock); 627 VOP_UNLOCK(vp, 0, p); 628 simple_lock(&vp->v_interlock); 629 vclean(vp, 0, p); 630 vp->v_op = nvp->v_op; 631 vp->v_tag = nvp->v_tag; 632 nvp->v_type = VNON; 633 insmntque(vp, mp); 634 return (vp); 635} 636 637/* 638 * Grab a particular vnode from the free list, increment its 639 * reference count and lock it. The vnode lock bit is set the 640 * vnode is being eliminated in vgone. The process is awakened 641 * when the transition is completed, and an error returned to 642 * indicate that the vnode is no longer usable (possibly having 643 * been changed to a new file system type). 644 */ 645int 646vget(vp, flags, p) 647 struct vnode *vp; 648 int flags; 649 struct proc *p; 650{ 651 int error; 652 /* 653 * If the vnode is in the process of being cleaned out for 654 * another use, we wait for the cleaning to finish and then 655 * return failure. Cleaning is determined by checking that 656 * the VXLOCK flag is set. 657 */ 658 if ((flags & LK_INTERLOCK) == 0) 659 simple_lock(&vp->v_interlock); 660 if (vp->v_flag & VXLOCK) { 661 vp->v_flag |= VXWANT; 662 simple_unlock(&vp->v_interlock); 663 tsleep((caddr_t)vp, PINOD, "vget", 0); 664 return (ENOENT); 665 } 666 if ((vp->v_flag & VONFREELIST) && (vp->v_usecount == 0)) { 667 simple_lock(&vnode_free_list_slock); 668 if (vp->v_holdcnt > 0) 669 TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist); 670 else 671 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 672 simple_unlock(&vnode_free_list_slock); 673 vp->v_flag &= ~VONFREELIST; 674 } 675 vp->v_usecount++; 676 if (flags & LK_TYPE_MASK) { 677 if ((error = vn_lock(vp, flags | LK_INTERLOCK, p)) != 0) { 678 vp->v_usecount--; 679 if (vp->v_usecount == 0) 680 vputonfreelist(vp); 681 682 simple_unlock(&vp->v_interlock); 683 } 684 return (error); 685 } 686 simple_unlock(&vp->v_interlock); 687 return (0); 688} 689 690 691#ifdef DIAGNOSTIC 692/* 693 * Vnode reference. 694 */ 695void 696vref(vp) 697 struct vnode *vp; 698{ 699 simple_lock(&vp->v_interlock); 700 if (vp->v_usecount <= 0) 701 panic("vref used where vget required"); 702 vp->v_usecount++; 703 simple_unlock(&vp->v_interlock); 704} 705#endif /* DIAGNOSTIC */ 706 707static __inline__ void 708vputonfreelist(vp) 709 struct vnode *vp; 710 711{ 712 struct freelst *lst; 713 714 /* 715 * insert at tail of LRU list 716 */ 717#ifdef DIAGNOSTIC 718 if (vp->v_usecount != 0) { 719 panic("Use count is not zero!"); 720 } 721 722 if (vp->v_flag & VONFREELIST) { 723 vprint ("vnode already on free list: ", vp); 724 panic ("vnode already on free list"); 725 return; 726 } 727#endif 728 729 vp->v_flag |= VONFREELIST; 730 731 simple_lock(&vnode_free_list_slock); 732 733 if (vp->v_holdcnt > 0) 734 lst = &vnode_hold_list; 735 else 736 lst = &vnode_free_list; 737 738 739 if (vp->v_type == VBAD) 740 TAILQ_INSERT_HEAD(lst, vp, v_freelist); 741 else 742 TAILQ_INSERT_TAIL(lst, vp, v_freelist); 743 744 simple_unlock(&vnode_free_list_slock); 745} 746 747/* 748 * vput(), just unlock and vrele() 749 */ 750void 751vput(vp) 752 register struct vnode *vp; 753{ 754 struct proc *p = curproc; /* XXX */ 755 756#ifdef DIAGNOSTIC 757 if (vp == NULL) 758 panic("vput: null vp"); 759#endif 760 simple_lock(&vp->v_interlock); 761 vp->v_usecount--; 762 if (vp->v_usecount > 0) { 763 simple_unlock(&vp->v_interlock); 764 VOP_UNLOCK(vp, 0, p); 765 return; 766 } 767#ifdef DIAGNOSTIC 768 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 769 vprint("vput: bad ref count", vp); 770 panic("vput: ref cnt"); 771 } 772#endif 773 vputonfreelist(vp); 774 775 VOP_INACTIVE(vp, p); 776 777 simple_unlock(&vp->v_interlock); 778} 779 780/* 781 * Vnode release - use for active VNODES. 782 * If count drops to zero, call inactive routine and return to freelist. 783 */ 784void 785vrele(vp) 786 register struct vnode *vp; 787{ 788 struct proc *p = curproc; /* XXX */ 789 790#ifdef DIAGNOSTIC 791 if (vp == NULL) 792 panic("vrele: null vp"); 793#endif 794 simple_lock(&vp->v_interlock); 795 vp->v_usecount--; 796 if (vp->v_usecount > 0) { 797 simple_unlock(&vp->v_interlock); 798 return; 799 } 800#ifdef DIAGNOSTIC 801 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 802 vprint("vrele: bad ref count", vp); 803 panic("vrele: ref cnt"); 804 } 805#endif 806 vputonfreelist(vp); 807 808 if (vn_lock(vp, LK_EXCLUSIVE|LK_INTERLOCK, p) == 0) 809 VOP_INACTIVE(vp, p); 810} 811 812#ifdef DIAGNOSTIC 813/* 814 * Page or buffer structure gets a reference. 815 */ 816void 817vhold(vp) 818 register struct vnode *vp; 819{ 820 821 /* 822 * If it is on the freelist and the hold count is currently 823 * zero, move it to the hold list. 824 */ 825 simple_lock(&vp->v_interlock); 826 if ((vp->v_flag & VONFREELIST) && 827 vp->v_holdcnt == 0 && vp->v_usecount == 0) { 828 simple_lock(&vnode_free_list_slock); 829 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 830 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); 831 simple_unlock(&vnode_free_list_slock); 832 } 833 vp->v_holdcnt++; 834 simple_unlock(&vp->v_interlock); 835} 836 837/* 838 * Page or buffer structure frees a reference. 839 */ 840void 841holdrele(vp) 842 register struct vnode *vp; 843{ 844 845 simple_lock(&vp->v_interlock); 846 if (vp->v_holdcnt <= 0) 847 panic("holdrele: holdcnt"); 848 vp->v_holdcnt--; 849 /* 850 * If it is on the holdlist and the hold count drops to 851 * zero, move it to the free list. 852 */ 853 if ((vp->v_flag & VONFREELIST) && 854 vp->v_holdcnt == 0 && vp->v_usecount == 0) { 855 simple_lock(&vnode_free_list_slock); 856 TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist); 857 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 858 simple_unlock(&vnode_free_list_slock); 859 } 860 simple_unlock(&vp->v_interlock); 861} 862#endif /* DIAGNOSTIC */ 863 864/* 865 * Remove any vnodes in the vnode table belonging to mount point mp. 866 * 867 * If MNT_NOFORCE is specified, there should not be any active ones, 868 * return error if any are found (nb: this is a user error, not a 869 * system error). If MNT_FORCE is specified, detach any active vnodes 870 * that are found. 871 */ 872#ifdef DEBUG 873int busyprt = 0; /* print out busy vnodes */ 874struct ctldebug debug1 = { "busyprt", &busyprt }; 875#endif 876 877int 878vflush(mp, skipvp, flags) 879 struct mount *mp; 880 struct vnode *skipvp; 881 int flags; 882{ 883 struct proc *p = curproc; 884 register struct vnode *vp, *nvp; 885 int busy = 0; 886 887 simple_lock(&mntvnode_slock); 888loop: 889 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { 890 if (vp->v_mount != mp) 891 goto loop; 892 nvp = vp->v_mntvnodes.le_next; 893 /* 894 * Skip over a selected vnode. 895 */ 896 if (vp == skipvp) 897 continue; 898 899 simple_lock(&vp->v_interlock); 900 /* 901 * Skip over a vnodes marked VSYSTEM. 902 */ 903 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { 904 simple_unlock(&vp->v_interlock); 905 continue; 906 } 907 /* 908 * If WRITECLOSE is set, only flush out regular file 909 * vnodes open for writing. 910 */ 911 if ((flags & WRITECLOSE) && 912 (vp->v_writecount == 0 || vp->v_type != VREG)) { 913 simple_unlock(&vp->v_interlock); 914 continue; 915 } 916 /* 917 * With v_usecount == 0, all we need to do is clear 918 * out the vnode data structures and we are done. 919 */ 920 if (vp->v_usecount == 0) { 921 simple_unlock(&mntvnode_slock); 922 vgonel(vp, p); 923 simple_lock(&mntvnode_slock); 924 continue; 925 } 926 /* 927 * If FORCECLOSE is set, forcibly close the vnode. 928 * For block or character devices, revert to an 929 * anonymous device. For all other files, just kill them. 930 */ 931 if (flags & FORCECLOSE) { 932 simple_unlock(&mntvnode_slock); 933 if (vp->v_type != VBLK && vp->v_type != VCHR) { 934 vgonel(vp, p); 935 } else { 936 vclean(vp, 0, p); 937 vp->v_op = spec_vnodeop_p; 938 insmntque(vp, (struct mount *)0); 939 } 940 simple_lock(&mntvnode_slock); 941 continue; 942 } 943#ifdef DEBUG 944 if (busyprt) 945 vprint("vflush: busy vnode", vp); 946#endif 947 simple_unlock(&vp->v_interlock); 948 busy++; 949 } 950 simple_unlock(&mntvnode_slock); 951 if (busy) 952 return (EBUSY); 953 return (0); 954} 955 956/* 957 * Disassociate the underlying file system from a vnode. 958 * The vnode interlock is held on entry. 959 */ 960void 961vclean(vp, flags, p) 962 register struct vnode *vp; 963 int flags; 964 struct proc *p; 965{ 966 int active; 967 968 /* 969 * Check to see if the vnode is in use. 970 * If so we have to reference it before we clean it out 971 * so that its count cannot fall to zero and generate a 972 * race against ourselves to recycle it. 973 */ 974 if ((active = vp->v_usecount) != 0) 975 vp->v_usecount++; 976 977 /* 978 * Prevent the vnode from being recycled or 979 * brought into use while we clean it out. 980 */ 981 if (vp->v_flag & VXLOCK) 982 panic("vclean: deadlock"); 983 vp->v_flag |= VXLOCK; 984 985 986 /* 987 * Even if the count is zero, the VOP_INACTIVE routine may still 988 * have the object locked while it cleans it out. The VOP_LOCK 989 * ensures that the VOP_INACTIVE routine is done with its work. 990 * For active vnodes, it ensures that no other activity can 991 * occur while the underlying object is being cleaned out. 992 */ 993 VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p); 994 995 /* 996 * Clean out any buffers associated with the vnode. 997 */ 998 if (flags & DOCLOSE) 999 vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0); 1000 /* 1001 * If purging an active vnode, it must be closed and 1002 * deactivated before being reclaimed. Note that the 1003 * VOP_INACTIVE will unlock the vnode 1004 */ 1005 if (active) { 1006 if (flags & DOCLOSE) 1007 VOP_CLOSE(vp, FNONBLOCK, NOCRED, p); 1008 VOP_INACTIVE(vp, p); 1009 } else { 1010 /* 1011 * Any other processes trying to obtain this lock must first 1012 * wait for VXLOCK to clear, then call the new lock operation. 1013 */ 1014 VOP_UNLOCK(vp, 0, p); 1015 } 1016 1017 /* 1018 * Reclaim the vnode. 1019 */ 1020 if (VOP_RECLAIM(vp, p)) 1021 panic("vclean: cannot reclaim"); 1022 if (active) { 1023 simple_lock(&vp->v_interlock); 1024 1025 vp->v_usecount--; 1026 if (vp->v_usecount == 0) { 1027 if (vp->v_holdcnt > 0) 1028 panic("vclean: not clean"); 1029 vputonfreelist(vp); 1030 } 1031 1032 simple_unlock(&vp->v_interlock); 1033 } 1034 cache_purge(vp); 1035 if (vp->v_vnlock) { 1036 if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0) 1037 vprint("vclean: lock not drained", vp); 1038 FREE(vp->v_vnlock, M_VNODE); 1039 vp->v_vnlock = NULL; 1040 } 1041 1042 /* 1043 * Done with purge, notify sleepers of the grim news. 1044 */ 1045 vp->v_op = dead_vnodeop_p; 1046 vp->v_tag = VT_NON; 1047 vp->v_flag &= ~VXLOCK; 1048#ifdef DIAGNOSTIC 1049 vp->v_flag &= ~VLOCKSWORK; 1050#endif 1051 if (vp->v_flag & VXWANT) { 1052 vp->v_flag &= ~VXWANT; 1053 wakeup((caddr_t)vp); 1054 } 1055} 1056 1057 1058 1059/* 1060 * Recycle an unused vnode to the front of the free list. 1061 * Release the passed interlock if the vnode will be recycled. 1062 */ 1063int 1064vrecycle(vp, inter_lkp, p) 1065 struct vnode *vp; 1066 struct simplelock *inter_lkp; 1067 struct proc *p; 1068{ 1069 1070 simple_lock(&vp->v_interlock); 1071 if (vp->v_usecount == 0) { 1072 if (inter_lkp) 1073 simple_unlock(inter_lkp); 1074 vgonel(vp, p); 1075 return (1); 1076 } 1077 simple_unlock(&vp->v_interlock); 1078 return (0); 1079} 1080 1081/* 1082 * Eliminate all activity associated with a vnode 1083 * in preparation for reuse. 1084 */ 1085void 1086vgone(vp) 1087 register struct vnode *vp; 1088{ 1089 struct proc *p = curproc; 1090 1091 simple_lock (&vp->v_interlock); 1092 vgonel(vp, p); 1093} 1094 1095/* 1096 * vgone, with the vp interlock held. 1097 */ 1098void 1099vgonel(vp, p) 1100 struct vnode *vp; 1101 struct proc *p; 1102{ 1103 register struct vnode *vq; 1104 struct vnode *vx; 1105 1106 /* 1107 * If a vgone (or vclean) is already in progress, 1108 * wait until it is done and return. 1109 */ 1110 if (vp->v_flag & VXLOCK) { 1111 vp->v_flag |= VXWANT; 1112 simple_unlock(&vp->v_interlock); 1113 tsleep((caddr_t)vp, PINOD, "vgone", 0); 1114 return; 1115 } 1116 /* 1117 * Clean out the filesystem specific data. 1118 */ 1119 vclean(vp, DOCLOSE, p); 1120 /* 1121 * Delete from old mount point vnode list, if on one. 1122 */ 1123 if (vp->v_mount != NULL) 1124 insmntque(vp, (struct mount *)0); 1125 /* 1126 * If special device, remove it from special device alias list 1127 * if it is on one. 1128 */ 1129 if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) { 1130 simple_lock(&spechash_slock); 1131 if (*vp->v_hashchain == vp) { 1132 *vp->v_hashchain = vp->v_specnext; 1133 } else { 1134 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1135 if (vq->v_specnext != vp) 1136 continue; 1137 vq->v_specnext = vp->v_specnext; 1138 break; 1139 } 1140 if (vq == NULL) 1141 panic("missing bdev"); 1142 } 1143 if (vp->v_flag & VALIASED) { 1144 vx = NULL; 1145 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1146 if (vq->v_rdev != vp->v_rdev || 1147 vq->v_type != vp->v_type) 1148 continue; 1149 if (vx) 1150 break; 1151 vx = vq; 1152 } 1153 if (vx == NULL) 1154 panic("missing alias"); 1155 if (vq == NULL) 1156 vx->v_flag &= ~VALIASED; 1157 vp->v_flag &= ~VALIASED; 1158 } 1159 simple_unlock(&spechash_slock); 1160 FREE(vp->v_specinfo, M_VNODE); 1161 vp->v_specinfo = NULL; 1162 } 1163 /* 1164 * If it is on the freelist and not already at the head, 1165 * move it to the head of the list. 1166 */ 1167 vp->v_type = VBAD; 1168 1169 if ((vp->v_flag & VONFREELIST) && 1170 vp->v_usecount == 0) { 1171 simple_lock(&vnode_free_list_slock); 1172 if (vp->v_holdcnt > 0) 1173 panic("vgonel: not clean"); 1174 if (TAILQ_FIRST(&vnode_free_list) != vp) { 1175 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1176 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 1177 } 1178 simple_unlock(&vnode_free_list_slock); 1179 } 1180} 1181 1182/* 1183 * Lookup a vnode by device number. 1184 */ 1185int 1186vfinddev(dev, type, vpp) 1187 dev_t dev; 1188 enum vtype type; 1189 struct vnode **vpp; 1190{ 1191 register struct vnode *vp; 1192 int rc =0; 1193 1194 simple_lock(&spechash_slock); 1195 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 1196 if (dev != vp->v_rdev || type != vp->v_type) 1197 continue; 1198 *vpp = vp; 1199 rc = 1; 1200 break; 1201 } 1202 simple_unlock(&spechash_slock); 1203 return (rc); 1204} 1205 1206/* 1207 * Calculate the total number of references to a special device. 1208 */ 1209int 1210vcount(vp) 1211 struct vnode *vp; 1212{ 1213 struct vnode *vq, *vnext; 1214 int count; 1215 1216loop: 1217 if ((vp->v_flag & VALIASED) == 0) 1218 return (vp->v_usecount); 1219 simple_lock(&spechash_slock); 1220 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { 1221 vnext = vq->v_specnext; 1222 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 1223 continue; 1224 /* 1225 * Alias, but not in use, so flush it out. 1226 */ 1227 if (vq->v_usecount == 0 && vq != vp) { 1228 simple_unlock(&spechash_slock); 1229 vgone(vq); 1230 goto loop; 1231 } 1232 count += vq->v_usecount; 1233 } 1234 simple_unlock(&spechash_slock); 1235 return (count); 1236} 1237 1238/* 1239 * Print out a description of a vnode. 1240 */ 1241static char *typename[] = 1242 { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" }; 1243 1244void 1245vprint(label, vp) 1246 char *label; 1247 register struct vnode *vp; 1248{ 1249 char buf[64]; 1250 1251 if (label != NULL) 1252 printf("%s: ", label); 1253 printf("type %s, usecount %d, writecount %d, holdcount %ld,", 1254 typename[vp->v_type], vp->v_usecount, vp->v_writecount, 1255 vp->v_holdcnt); 1256 buf[0] = '\0'; 1257 if (vp->v_flag & VROOT) 1258 strcat(buf, "|VROOT"); 1259 if (vp->v_flag & VTEXT) 1260 strcat(buf, "|VTEXT"); 1261 if (vp->v_flag & VSYSTEM) 1262 strcat(buf, "|VSYSTEM"); 1263 if (vp->v_flag & VXLOCK) 1264 strcat(buf, "|VXLOCK"); 1265 if (vp->v_flag & VXWANT) 1266 strcat(buf, "|VXWANT"); 1267 if (vp->v_flag & VBWAIT) 1268 strcat(buf, "|VBWAIT"); 1269 if (vp->v_flag & VALIASED) 1270 strcat(buf, "|VALIASED"); 1271 if (buf[0] != '\0') 1272 printf(" flags (%s)", &buf[1]); 1273 if (vp->v_data == NULL) { 1274 printf("\n"); 1275 } else { 1276 printf("\n\t"); 1277 VOP_PRINT(vp); 1278 } 1279} 1280 1281#ifdef DEBUG 1282/* 1283 * List all of the locked vnodes in the system. 1284 * Called when debugging the kernel. 1285 */ 1286void 1287printlockedvnodes() 1288{ 1289 struct proc *p = curproc; 1290 register struct mount *mp, *nmp; 1291 register struct vnode *vp; 1292 1293 printf("Locked vnodes\n"); 1294 simple_lock(&mountlist_slock); 1295 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 1296 mp = nmp) { 1297 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { 1298 nmp = CIRCLEQ_NEXT(mp, mnt_list); 1299 continue; 1300 } 1301 for (vp = mp->mnt_vnodelist.lh_first; 1302 vp != NULL; 1303 vp = vp->v_mntvnodes.le_next) { 1304 if (VOP_ISLOCKED(vp)) 1305 vprint((char *)0, vp); 1306 } 1307 simple_lock(&mountlist_slock); 1308 nmp = CIRCLEQ_NEXT(mp, mnt_list); 1309 vfs_unbusy(mp, p); 1310 } 1311 simple_unlock(&mountlist_slock); 1312 1313} 1314#endif 1315 1316/* 1317 * Top level filesystem related information gathering. 1318 */ 1319int 1320vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) 1321 int *name; 1322 u_int namelen; 1323 void *oldp; 1324 size_t *oldlenp; 1325 void *newp; 1326 size_t newlen; 1327 struct proc *p; 1328{ 1329 struct vfsconf *vfsp; 1330 1331 /* all sysctl names at this level are at least name and field */ 1332 if (namelen < 2) 1333 return (ENOTDIR); /* overloaded */ 1334 if (name[0] != VFS_GENERIC) { 1335 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 1336 if (vfsp->vfc_typenum == name[0]) 1337 break; 1338 if (vfsp == NULL) 1339 return (EOPNOTSUPP); 1340 return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1, 1341 oldp, oldlenp, newp, newlen, p)); 1342 } 1343 switch (name[1]) { 1344 case VFS_MAXTYPENUM: 1345 return (sysctl_rdint(oldp, oldlenp, newp, maxvfsconf)); 1346 case VFS_CONF: 1347 if (namelen < 3) 1348 return (ENOTDIR); /* overloaded */ 1349 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 1350 if (vfsp->vfc_typenum == name[2]) 1351 break; 1352 if (vfsp == NULL) 1353 return (EOPNOTSUPP); 1354 return (sysctl_rdstruct(oldp, oldlenp, newp, vfsp, 1355 sizeof(struct vfsconf))); 1356 } 1357 return (EOPNOTSUPP); 1358} 1359 1360 1361int kinfo_vdebug = 1; 1362int kinfo_vgetfailed; 1363#define KINFO_VNODESLOP 10 1364/* 1365 * Dump vnode list (via sysctl). 1366 * Copyout address of vnode followed by vnode. 1367 */ 1368/* ARGSUSED */ 1369int 1370sysctl_vnode(where, sizep, p) 1371 char *where; 1372 size_t *sizep; 1373 struct proc *p; 1374{ 1375 register struct mount *mp, *nmp; 1376 struct vnode *vp, *nvp; 1377 register char *bp = where, *savebp; 1378 char *ewhere; 1379 int error; 1380 1381#define VPTRSZ sizeof (struct vnode *) 1382#define VNODESZ sizeof (struct vnode) 1383 if (where == NULL) { 1384 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ); 1385 return (0); 1386 } 1387 ewhere = where + *sizep; 1388 1389 simple_lock(&mountlist_slock); 1390 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 1391 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { 1392 nmp = mp->mnt_list.cqe_next; 1393 continue; 1394 } 1395 savebp = bp; 1396again: 1397 for (vp = mp->mnt_vnodelist.lh_first; 1398 vp != NULL; 1399 vp = nvp) { 1400 /* 1401 * Check that the vp is still associated with 1402 * this filesystem. RACE: could have been 1403 * recycled onto the same filesystem. 1404 */ 1405 if (vp->v_mount != mp) { 1406 simple_unlock(&mntvnode_slock); 1407 if (kinfo_vdebug) 1408 printf("kinfo: vp changed\n"); 1409 bp = savebp; 1410 goto again; 1411 } 1412 nvp = vp->v_mntvnodes.le_next; 1413 if (bp + VPTRSZ + VNODESZ > ewhere) { 1414 simple_unlock(&mntvnode_slock); 1415 *sizep = bp - where; 1416 return (ENOMEM); 1417 } 1418 if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) || 1419 (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ))) 1420 return (error); 1421 bp += VPTRSZ + VNODESZ; 1422 simple_lock(&mntvnode_slock); 1423 } 1424 1425 simple_unlock(&mntvnode_slock); 1426 simple_lock(&mountlist_slock); 1427 nmp = mp->mnt_list.cqe_next; 1428 vfs_unbusy(mp, p); 1429 } 1430 1431 simple_unlock(&mountlist_slock); 1432 1433 *sizep = bp - where; 1434 return (0); 1435} 1436 1437/* 1438 * Check to see if a filesystem is mounted on a block device. 1439 */ 1440int 1441vfs_mountedon(vp) 1442 register struct vnode *vp; 1443{ 1444 register struct vnode *vq; 1445 int error = 0; 1446 1447 if (vp->v_specmountpoint != NULL) 1448 return (EBUSY); 1449 if (vp->v_flag & VALIASED) { 1450 simple_lock(&spechash_slock); 1451 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1452 if (vq->v_rdev != vp->v_rdev || 1453 vq->v_type != vp->v_type) 1454 continue; 1455 if (vq->v_specmountpoint != NULL) { 1456 error = EBUSY; 1457 break; 1458 } 1459 } 1460 simple_unlock(&spechash_slock); 1461 } 1462 return (error); 1463} 1464 1465/* 1466 * Build hash lists of net addresses and hang them off the mount point. 1467 * Called by ufs_mount() to set up the lists of export addresses. 1468 */ 1469int 1470vfs_hang_addrlist(mp, nep, argp) 1471 struct mount *mp; 1472 struct netexport *nep; 1473 struct export_args *argp; 1474{ 1475 register struct netcred *np; 1476 register struct radix_node_head *rnh; 1477 register int i; 1478 struct radix_node *rn; 1479 struct sockaddr *saddr, *smask = 0; 1480 struct domain *dom; 1481 int error; 1482 1483 if (argp->ex_addrlen == 0) { 1484 if (mp->mnt_flag & MNT_DEFEXPORTED) 1485 return (EPERM); 1486 np = &nep->ne_defexported; 1487 np->netc_exflags = argp->ex_flags; 1488 np->netc_anon = argp->ex_anon; 1489 np->netc_anon.cr_ref = 1; 1490 mp->mnt_flag |= MNT_DEFEXPORTED; 1491 return (0); 1492 } 1493 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 1494 np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK); 1495 bzero((caddr_t)np, i); 1496 saddr = (struct sockaddr *)(np + 1); 1497 error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen); 1498 if (error) 1499 goto out; 1500 if (saddr->sa_len > argp->ex_addrlen) 1501 saddr->sa_len = argp->ex_addrlen; 1502 if (argp->ex_masklen) { 1503 smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen); 1504 error = copyin(argp->ex_mask, (caddr_t)smask, argp->ex_masklen); 1505 if (error) 1506 goto out; 1507 if (smask->sa_len > argp->ex_masklen) 1508 smask->sa_len = argp->ex_masklen; 1509 } 1510 i = saddr->sa_family; 1511 if ((rnh = nep->ne_rtable[i]) == 0) { 1512 /* 1513 * Seems silly to initialize every AF when most are not 1514 * used, do so on demand here 1515 */ 1516 for (dom = domains; dom; dom = dom->dom_next) 1517 if (dom->dom_family == i && dom->dom_rtattach) { 1518 dom->dom_rtattach((void **)&nep->ne_rtable[i], 1519 dom->dom_rtoffset); 1520 break; 1521 } 1522 if ((rnh = nep->ne_rtable[i]) == 0) { 1523 error = ENOBUFS; 1524 goto out; 1525 } 1526 } 1527 rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh, 1528 np->netc_rnodes); 1529 if (rn == 0 || np != (struct netcred *)rn) { /* already exists */ 1530 error = EPERM; 1531 goto out; 1532 } 1533 np->netc_exflags = argp->ex_flags; 1534 np->netc_anon = argp->ex_anon; 1535 np->netc_anon.cr_ref = 1; 1536 return (0); 1537out: 1538 free(np, M_NETADDR); 1539 return (error); 1540} 1541 1542/* ARGSUSED */ 1543int 1544vfs_free_netcred(rn, w) 1545 struct radix_node *rn; 1546 void *w; 1547{ 1548 register struct radix_node_head *rnh = (struct radix_node_head *)w; 1549 1550 (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh); 1551 free((caddr_t)rn, M_NETADDR); 1552 return (0); 1553} 1554 1555/* 1556 * Free the net address hash lists that are hanging off the mount points. 1557 */ 1558void 1559vfs_free_addrlist(nep) 1560 struct netexport *nep; 1561{ 1562 register int i; 1563 register struct radix_node_head *rnh; 1564 1565 for (i = 0; i <= AF_MAX; i++) 1566 if ((rnh = nep->ne_rtable[i]) != NULL) { 1567 (*rnh->rnh_walktree)(rnh, vfs_free_netcred, rnh); 1568 free((caddr_t)rnh, M_RTABLE); 1569 nep->ne_rtable[i] = 0; 1570 } 1571} 1572 1573int 1574vfs_export(mp, nep, argp) 1575 struct mount *mp; 1576 struct netexport *nep; 1577 struct export_args *argp; 1578{ 1579 int error; 1580 1581 if (argp->ex_flags & MNT_DELEXPORT) { 1582 vfs_free_addrlist(nep); 1583 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 1584 } 1585 if (argp->ex_flags & MNT_EXPORTED) { 1586 if ((error = vfs_hang_addrlist(mp, nep, argp)) != 0) 1587 return (error); 1588 mp->mnt_flag |= MNT_EXPORTED; 1589 } 1590 return (0); 1591} 1592 1593struct netcred * 1594vfs_export_lookup(mp, nep, nam) 1595 register struct mount *mp; 1596 struct netexport *nep; 1597 struct mbuf *nam; 1598{ 1599 register struct netcred *np; 1600 register struct radix_node_head *rnh; 1601 struct sockaddr *saddr; 1602 1603 np = NULL; 1604 if (mp->mnt_flag & MNT_EXPORTED) { 1605 /* 1606 * Lookup in the export list first. 1607 */ 1608 if (nam != NULL) { 1609 saddr = mtod(nam, struct sockaddr *); 1610 rnh = nep->ne_rtable[saddr->sa_family]; 1611 if (rnh != NULL) { 1612 np = (struct netcred *) 1613 (*rnh->rnh_matchaddr)((caddr_t)saddr, 1614 rnh); 1615 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 1616 np = NULL; 1617 } 1618 } 1619 /* 1620 * If no address match, use the default if it exists. 1621 */ 1622 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 1623 np = &nep->ne_defexported; 1624 } 1625 return (np); 1626} 1627 1628/* 1629 * Do the usual access checking. 1630 * file_mode, uid and gid are from the vnode in question, 1631 * while acc_mode and cred are from the VOP_ACCESS parameter list 1632 */ 1633int 1634vaccess(file_mode, uid, gid, acc_mode, cred) 1635 mode_t file_mode; 1636 uid_t uid; 1637 gid_t gid; 1638 mode_t acc_mode; 1639 struct ucred *cred; 1640{ 1641 mode_t mask; 1642 1643 /* User id 0 always gets access. */ 1644 if (cred->cr_uid == 0) 1645 return 0; 1646 1647 mask = 0; 1648 1649 /* Otherwise, check the owner. */ 1650 if (cred->cr_uid == uid) { 1651 if (acc_mode & VEXEC) 1652 mask |= S_IXUSR; 1653 if (acc_mode & VREAD) 1654 mask |= S_IRUSR; 1655 if (acc_mode & VWRITE) 1656 mask |= S_IWUSR; 1657 return (file_mode & mask) == mask ? 0 : EACCES; 1658 } 1659 1660 /* Otherwise, check the groups. */ 1661 if (cred->cr_gid == gid || groupmember(gid, cred)) { 1662 if (acc_mode & VEXEC) 1663 mask |= S_IXGRP; 1664 if (acc_mode & VREAD) 1665 mask |= S_IRGRP; 1666 if (acc_mode & VWRITE) 1667 mask |= S_IWGRP; 1668 return (file_mode & mask) == mask ? 0 : EACCES; 1669 } 1670 1671 /* Otherwise, check everyone else. */ 1672 if (acc_mode & VEXEC) 1673 mask |= S_IXOTH; 1674 if (acc_mode & VREAD) 1675 mask |= S_IROTH; 1676 if (acc_mode & VWRITE) 1677 mask |= S_IWOTH; 1678 return (file_mode & mask) == mask ? 0 : EACCES; 1679} 1680 1681/* 1682 * Unmount all file systems. 1683 * We traverse the list in reverse order under the assumption that doing so 1684 * will avoid needing to worry about dependencies. 1685 */ 1686void 1687vfs_unmountall() 1688{ 1689 register struct mount *mp, *nmp; 1690 int allerror, error, again = 1; 1691 1692 retry: 1693 for (allerror = 0, 1694 mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { 1695 nmp = mp->mnt_list.cqe_prev; 1696 if ((error = dounmount(mp, MNT_FORCE, curproc)) != 0) { 1697 printf("unmount of %s failed with error %d\n", 1698 mp->mnt_stat.f_mntonname, error); 1699 allerror = 1; 1700 } 1701 } 1702 1703 if (allerror) { 1704 printf("WARNING: some file systems would not unmount\n"); 1705 if (again) { 1706 printf("retrying\n"); 1707 again = 0; 1708 goto retry; 1709 } 1710 } 1711} 1712 1713/* 1714 * Sync and unmount file systems before shutting down. 1715 */ 1716void 1717vfs_shutdown() 1718{ 1719 register struct buf *bp; 1720 int iter, nbusy; 1721 1722 /* XXX Should suspend scheduling. */ 1723 (void) spl0(); 1724 1725 printf("syncing disks... "); 1726 1727 if (panicstr == 0) { 1728 /* Release inodes held by texts before update. */ 1729 vnode_pager_umount(NULL); 1730#ifdef notdef 1731 vnshutdown(); 1732#endif 1733 1734 /* Sync before unmount, in case we hang on something. */ 1735 sys_sync(&proc0, (void *)0, (register_t *)0); 1736 1737 /* Unmount file systems. */ 1738 vfs_unmountall(); 1739 } 1740 1741 /* Sync again after unmount, just in case. */ 1742 sys_sync(&proc0, (void *)0, (register_t *)0); 1743 1744 /* Wait for sync to finish. */ 1745 for (iter = 0; iter < 20; iter++) { 1746 nbusy = 0; 1747 for (bp = &buf[nbuf]; --bp >= buf; ) 1748 if ((bp->b_flags & (B_BUSY|B_INVAL)) == B_BUSY) 1749 nbusy++; 1750 if (nbusy == 0) 1751 break; 1752 printf("%d ", nbusy); 1753 DELAY(40000 * iter); 1754 } 1755 if (nbusy) 1756 printf("giving up\n"); 1757 else 1758 printf("done\n"); 1759} 1760 1761/* 1762 * posix file system related system variables. 1763 */ 1764int 1765fs_posix_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) 1766 int *name; 1767 u_int namelen; 1768 void *oldp; 1769 size_t *oldlenp; 1770 void *newp; 1771 size_t newlen; 1772 struct proc *p; 1773{ 1774 /* all sysctl names at this level are terminal */ 1775 if (namelen != 1) 1776 return (ENOTDIR); 1777 1778 switch (name[0]) { 1779 case FS_POSIX_SETUID: 1780 if (newp && securelevel > 0) 1781 return (EPERM); 1782 return(sysctl_int(oldp, oldlenp, newp, newlen, &suid_clear)); 1783 default: 1784 return (EOPNOTSUPP); 1785 } 1786 /* NOTREACHED */ 1787} 1788 1789/* 1790 * file system related system variables. 1791 */ 1792int 1793fs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) 1794 int *name; 1795 u_int namelen; 1796 void *oldp; 1797 size_t *oldlenp; 1798 void *newp; 1799 size_t newlen; 1800 struct proc *p; 1801{ 1802 sysctlfn *fn; 1803 1804 switch (name[0]) { 1805 case FS_POSIX: 1806 fn = fs_posix_sysctl; 1807 break; 1808 default: 1809 return (EOPNOTSUPP); 1810 } 1811 return (*fn)(name + 1, namelen - 1, oldp, oldlenp, newp, newlen, p); 1812} 1813 1814 1815/* 1816 * Routines dealing with vnodes and buffers 1817 */ 1818 1819/* 1820 * Update outstanding I/O count and do wakeup if requested. 1821 * 1822 * Manipulates v_numoutput. Must be called at splbio() 1823 */ 1824void 1825vwakeup(bp) 1826 register struct buf *bp; 1827{ 1828 register struct vnode *vp; 1829 1830 bp->b_flags &= ~B_WRITEINPROG; 1831 if ((vp = bp->b_vp) != NULL) { 1832 if (--vp->v_numoutput < 0) 1833 panic("vwakeup: neg numoutput"); 1834 if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) { 1835 vp->v_flag &= ~VBWAIT; 1836 wakeup((caddr_t)&vp->v_numoutput); 1837 } 1838 } 1839} 1840 1841/* 1842 * Flush out and invalidate all buffers associated with a vnode. 1843 * Called with the underlying object locked. 1844 */ 1845int 1846vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 1847 register struct vnode *vp; 1848 int flags; 1849 struct ucred *cred; 1850 struct proc *p; 1851 int slpflag, slptimeo; 1852{ 1853 register struct buf *bp; 1854 struct buf *nbp, *blist; 1855 int s, error; 1856 1857 if (flags & V_SAVE) { 1858 s = splbio(); 1859 while (vp->v_numoutput) { 1860 vp->v_flag |= VBWAIT; 1861 sleep((caddr_t)&vp->v_numoutput, PRIBIO + 1); 1862 } 1863 if (vp->v_dirtyblkhd.lh_first != NULL) { 1864 splx(s); 1865 if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) != 0) 1866 return (error); 1867 s = splbio(); 1868 if (vp->v_numoutput > 0 || 1869 vp->v_dirtyblkhd.lh_first != NULL) 1870 panic("vinvalbuf: dirty bufs"); 1871 } 1872 splx(s); 1873 } 1874loop: 1875 s = splbio(); 1876 for (;;) { 1877 if ((blist = vp->v_cleanblkhd.lh_first) && 1878 (flags & V_SAVEMETA)) 1879 while (blist && blist->b_lblkno < 0) 1880 blist = blist->b_vnbufs.le_next; 1881 if (!blist && (blist = vp->v_dirtyblkhd.lh_first) && 1882 (flags & V_SAVEMETA)) 1883 while (blist && blist->b_lblkno < 0) 1884 blist = blist->b_vnbufs.le_next; 1885 if (!blist) 1886 break; 1887 1888 for (bp = blist; bp; bp = nbp) { 1889 nbp = bp->b_vnbufs.le_next; 1890 if (flags & V_SAVEMETA && bp->b_lblkno < 0) 1891 continue; 1892 if (bp->b_flags & B_BUSY) { 1893 bp->b_flags |= B_WANTED; 1894 error = tsleep((caddr_t)bp, 1895 slpflag | (PRIBIO + 1), "vinvalbuf", 1896 slptimeo); 1897 if (error) { 1898 splx(s); 1899 return (error); 1900 } 1901 break; 1902 } 1903 bp->b_flags |= B_BUSY | B_VFLUSH; 1904 /* 1905 * XXX Since there are no node locks for NFS, I believe 1906 * there is a slight chance that a delayed write will 1907 * occur while sleeping just above, so check for it. 1908 */ 1909 if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) { 1910 splx(s); 1911 (void) VOP_BWRITE(bp); 1912 goto loop; 1913 } 1914 bp->b_flags |= B_INVAL; 1915 brelse(bp); 1916 } 1917 } 1918 if (!(flags & V_SAVEMETA) && 1919 (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first)) 1920 panic("vinvalbuf: flush failed"); 1921 splx(s); 1922 return (0); 1923} 1924 1925void 1926vflushbuf(vp, sync) 1927 register struct vnode *vp; 1928 int sync; 1929{ 1930 register struct buf *bp, *nbp; 1931 int s; 1932 1933loop: 1934 s = splbio(); 1935 for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { 1936 nbp = bp->b_vnbufs.le_next; 1937 if ((bp->b_flags & B_BUSY)) 1938 continue; 1939 if ((bp->b_flags & B_DELWRI) == 0) 1940 panic("vflushbuf: not dirty"); 1941 bp->b_flags |= B_BUSY | B_VFLUSH; 1942 splx(s); 1943 /* 1944 * Wait for I/O associated with indirect blocks to complete, 1945 * since there is no way to quickly wait for them below. 1946 */ 1947 if (bp->b_vp == vp || sync == 0) 1948 (void) bawrite(bp); 1949 else 1950 (void) bwrite(bp); 1951 goto loop; 1952 } 1953 if (sync == 0) { 1954 splx(s); 1955 return; 1956 } 1957 while (vp->v_numoutput) { 1958 vp->v_flag |= VBWAIT; 1959 tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "vflushbuf", 0); 1960 } 1961 splx(s); 1962 if (vp->v_dirtyblkhd.lh_first != NULL) { 1963 vprint("vflushbuf: dirty", vp); 1964 goto loop; 1965 } 1966} 1967 1968/* 1969 * Associate a buffer with a vnode. 1970 * 1971 * Manipulates buffer vnode queues. Must be called at splbio(). 1972 */ 1973void 1974bgetvp(vp, bp) 1975 register struct vnode *vp; 1976 register struct buf *bp; 1977{ 1978 1979 if (bp->b_vp) 1980 panic("bgetvp: not free"); 1981 VHOLD(vp); 1982 bp->b_vp = vp; 1983 if (vp->v_type == VBLK || vp->v_type == VCHR) 1984 bp->b_dev = vp->v_rdev; 1985 else 1986 bp->b_dev = NODEV; 1987 /* 1988 * Insert onto list for new vnode. 1989 */ 1990 bufinsvn(bp, &vp->v_cleanblkhd); 1991} 1992 1993/* 1994 * Disassociate a buffer from a vnode. 1995 * 1996 * Manipulates vnode buffer queues. Must be called at splbio(). 1997 */ 1998void 1999brelvp(bp) 2000 register struct buf *bp; 2001{ 2002 struct vnode *vp; 2003 struct buf *wasdirty; 2004 2005 if ((vp = bp->b_vp) == (struct vnode *) 0) 2006 panic("brelvp: NULL"); 2007 /* 2008 * Delete from old vnode list, if on one. 2009 */ 2010 wasdirty = vp->v_dirtyblkhd.lh_first; 2011 if (bp->b_vnbufs.le_next != NOLIST) 2012 bufremvn(bp); 2013 if (wasdirty && LIST_FIRST(&vp->v_dirtyblkhd) == NULL) 2014 LIST_REMOVE(vp, v_synclist); 2015 bp->b_vp = (struct vnode *) 0; 2016 HOLDRELE(vp); 2017} 2018 2019/* 2020 * Reassign a buffer from one vnode to another. Used to assign buffers 2021 * to the appropriate clean or dirty list and to add newly dirty vnodes 2022 * to the appropriate filesystem syncer list. 2023 * 2024 * Manipulates vnode buffer queues. Must be called at splbio(). 2025 */ 2026void 2027reassignbuf(bp, newvp) 2028 register struct buf *bp; 2029 register struct vnode *newvp; 2030{ 2031 struct buflists *listheadp; 2032 struct buf *wasdirty; 2033 int delay; 2034 2035 if (newvp == NULL) { 2036 printf("reassignbuf: NULL"); 2037 return; 2038 } 2039 /* 2040 * Delete from old vnode list, if on one. 2041 */ 2042 wasdirty = newvp->v_dirtyblkhd.lh_first; 2043 if (bp->b_vnbufs.le_next != NOLIST) 2044 bufremvn(bp); 2045 /* 2046 * If dirty, put on list of dirty buffers; 2047 * otherwise insert onto list of clean buffers. 2048 */ 2049 if ((bp->b_flags & B_DELWRI) == 0) { 2050 listheadp = &newvp->v_cleanblkhd; 2051 if (wasdirty && LIST_FIRST(&newvp->v_dirtyblkhd) == NULL) 2052 LIST_REMOVE(newvp, v_synclist); 2053 } else { 2054 listheadp = &newvp->v_dirtyblkhd; 2055 if (LIST_FIRST(listheadp) == NULL) { 2056 switch (newvp->v_type) { 2057 case VDIR: 2058 delay = syncdelay / 3; 2059 break; 2060 case VBLK: 2061 if (newvp->v_specmountpoint != NULL) { 2062 delay = syncdelay / 2; 2063 break; 2064 } 2065 /* fall through */ 2066 default: 2067 delay = syncdelay; 2068 } 2069 vn_syncer_add_to_worklist(newvp, delay); 2070 } 2071 } 2072 bufinsvn(bp, listheadp); 2073} 2074