1/* $NetBSD: vfs_mount.c,v 1.105 2024/04/19 00:45:41 riastradh Exp $ */ 2 3/*- 4 * Copyright (c) 1997-2020 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center, by Charles M. Hannum, and by Andrew Doran. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33/* 34 * Copyright (c) 1989, 1993 35 * The Regents of the University of California. All rights reserved. 36 * (c) UNIX System Laboratories, Inc. 37 * All or some portions of this file are derived from material licensed 38 * to the University of California by American Telephone and Telegraph 39 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 40 * the permission of UNIX System Laboratories, Inc. 41 * 42 * Redistribution and use in source and binary forms, with or without 43 * modification, are permitted provided that the following conditions 44 * are met: 45 * 1. Redistributions of source code must retain the above copyright 46 * notice, this list of conditions and the following disclaimer. 47 * 2. Redistributions in binary form must reproduce the above copyright 48 * notice, this list of conditions and the following disclaimer in the 49 * documentation and/or other materials provided with the distribution. 50 * 3. Neither the name of the University nor the names of its contributors 51 * may be used to endorse or promote products derived from this software 52 * without specific prior written permission. 53 * 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 * 66 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 67 */ 68 69#include <sys/cdefs.h> 70__KERNEL_RCSID(0, "$NetBSD: vfs_mount.c,v 1.105 2024/04/19 00:45:41 riastradh Exp $"); 71 72#include "veriexec.h" 73 74#include <sys/param.h> 75#include <sys/kernel.h> 76 77#include <sys/atomic.h> 78#include <sys/buf.h> 79#include <sys/conf.h> 80#include <sys/fcntl.h> 81#include <sys/filedesc.h> 82#include <sys/device.h> 83#include <sys/kauth.h> 84#include <sys/kmem.h> 85#include <sys/module.h> 86#include <sys/mount.h> 87#include <sys/fstrans.h> 88#include <sys/namei.h> 89#include <sys/extattr.h> 90#include <sys/verified_exec.h> 91#include <sys/syscallargs.h> 92#include <sys/sysctl.h> 93#include <sys/systm.h> 94#include <sys/vfs_syscalls.h> 95#include <sys/vnode_impl.h> 96 97#include <miscfs/deadfs/deadfs.h> 98#include <miscfs/genfs/genfs.h> 99#include <miscfs/specfs/specdev.h> 100 101#include <uvm/uvm_swap.h> 102 103enum mountlist_type { 104 ME_MOUNT, 105 ME_MARKER 106}; 107struct mountlist_entry { 108 TAILQ_ENTRY(mountlist_entry) me_list; /* Mount list. */ 109 struct mount *me_mount; /* Actual mount if ME_MOUNT, 110 current mount else. */ 111 enum mountlist_type me_type; /* Mount or marker. */ 112}; 113struct mount_iterator { 114 struct mountlist_entry mi_entry; 115}; 116 117static struct vnode *vfs_vnode_iterator_next1(struct vnode_iterator *, 118 bool (*)(void *, struct vnode *), void *, bool); 119 120/* Root filesystem. */ 121vnode_t * rootvnode; 122 123/* Mounted filesystem list. */ 124static TAILQ_HEAD(mountlist, mountlist_entry) mountlist; 125static kmutex_t mountlist_lock __cacheline_aligned; 126int vnode_offset_next_by_lru /* XXX: ugly hack for pstat.c */ 127 = offsetof(vnode_impl_t, vi_lrulist.tqe_next); 128 129kmutex_t vfs_list_lock __cacheline_aligned; 130 131static specificdata_domain_t mount_specificdata_domain; 132static kmutex_t mntid_lock; 133 134static kmutex_t mountgen_lock __cacheline_aligned; 135static uint64_t mountgen; 136 137void 138vfs_mount_sysinit(void) 139{ 140 141 TAILQ_INIT(&mountlist); 142 mutex_init(&mountlist_lock, MUTEX_DEFAULT, IPL_NONE); 143 mutex_init(&vfs_list_lock, MUTEX_DEFAULT, IPL_NONE); 144 145 mount_specificdata_domain = specificdata_domain_create(); 146 mutex_init(&mntid_lock, MUTEX_DEFAULT, IPL_NONE); 147 mutex_init(&mountgen_lock, MUTEX_DEFAULT, IPL_NONE); 148 mountgen = 0; 149} 150 151struct mount * 152vfs_mountalloc(struct vfsops *vfsops, vnode_t *vp) 153{ 154 struct mount *mp; 155 int error __diagused; 156 157 mp = kmem_zalloc(sizeof(*mp), KM_SLEEP); 158 mp->mnt_op = vfsops; 159 mp->mnt_refcnt = 1; 160 TAILQ_INIT(&mp->mnt_vnodelist); 161 mp->mnt_renamelock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 162 mp->mnt_vnodelock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 163 mp->mnt_updating = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 164 mp->mnt_vnodecovered = vp; 165 mount_initspecific(mp); 166 167 error = fstrans_mount(mp); 168 KASSERT(error == 0); 169 170 mutex_enter(&mountgen_lock); 171 mp->mnt_gen = mountgen++; 172 mutex_exit(&mountgen_lock); 173 174 return mp; 175} 176 177/* 178 * vfs_rootmountalloc: lookup a filesystem type, and if found allocate and 179 * initialize a mount structure for it. 180 * 181 * Devname is usually updated by mount(8) after booting. 182 */ 183int 184vfs_rootmountalloc(const char *fstypename, const char *devname, 185 struct mount **mpp) 186{ 187 struct vfsops *vfsp = NULL; 188 struct mount *mp; 189 int error __diagused; 190 191 mutex_enter(&vfs_list_lock); 192 LIST_FOREACH(vfsp, &vfs_list, vfs_list) 193 if (!strncmp(vfsp->vfs_name, fstypename, 194 sizeof(mp->mnt_stat.f_fstypename))) 195 break; 196 if (vfsp == NULL) { 197 mutex_exit(&vfs_list_lock); 198 return (ENODEV); 199 } 200 vfsp->vfs_refcount++; 201 mutex_exit(&vfs_list_lock); 202 203 if ((mp = vfs_mountalloc(vfsp, NULL)) == NULL) 204 return ENOMEM; 205 error = vfs_busy(mp); 206 KASSERT(error == 0); 207 mp->mnt_flag = MNT_RDONLY; 208 (void)strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name, 209 sizeof(mp->mnt_stat.f_fstypename)); 210 mp->mnt_stat.f_mntonname[0] = '/'; 211 mp->mnt_stat.f_mntonname[1] = '\0'; 212 mp->mnt_stat.f_mntfromname[sizeof(mp->mnt_stat.f_mntfromname) - 1] = 213 '\0'; 214 (void)copystr(devname, mp->mnt_stat.f_mntfromname, 215 sizeof(mp->mnt_stat.f_mntfromname) - 1, 0); 216 *mpp = mp; 217 return 0; 218} 219 220/* 221 * vfs_getnewfsid: get a new unique fsid. 222 */ 223void 224vfs_getnewfsid(struct mount *mp) 225{ 226 static u_short xxxfs_mntid; 227 struct mountlist_entry *me; 228 fsid_t tfsid; 229 int mtype; 230 231 mutex_enter(&mntid_lock); 232 if (xxxfs_mntid == 0) 233 ++xxxfs_mntid; 234 mtype = makefstype(mp->mnt_op->vfs_name); 235 tfsid.__fsid_val[0] = makedev(mtype & 0xff, xxxfs_mntid); 236 tfsid.__fsid_val[1] = mtype; 237 /* Always increment to not return the same fsid to parallel mounts. */ 238 xxxfs_mntid++; 239 240 /* 241 * Directly walk mountlist to prevent deadlock through 242 * mountlist_iterator_next() -> vfs_busy(). 243 */ 244 mutex_enter(&mountlist_lock); 245 for (me = TAILQ_FIRST(&mountlist); me != TAILQ_END(&mountlist); ) { 246 if (me->me_type == ME_MOUNT && 247 me->me_mount->mnt_stat.f_fsidx.__fsid_val[0] == 248 tfsid.__fsid_val[0] && 249 me->me_mount->mnt_stat.f_fsidx.__fsid_val[1] == 250 tfsid.__fsid_val[1]) { 251 tfsid.__fsid_val[0]++; 252 xxxfs_mntid++; 253 me = TAILQ_FIRST(&mountlist); 254 } else { 255 me = TAILQ_NEXT(me, me_list); 256 } 257 } 258 mutex_exit(&mountlist_lock); 259 260 mp->mnt_stat.f_fsidx.__fsid_val[0] = tfsid.__fsid_val[0]; 261 mp->mnt_stat.f_fsidx.__fsid_val[1] = tfsid.__fsid_val[1]; 262 mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0]; 263 mutex_exit(&mntid_lock); 264} 265 266/* 267 * Lookup a mount point by filesystem identifier. 268 * 269 * XXX Needs to add a reference to the mount point. 270 */ 271struct mount * 272vfs_getvfs(fsid_t *fsid) 273{ 274 mount_iterator_t *iter; 275 struct mount *mp; 276 277 mountlist_iterator_init(&iter); 278 while ((mp = mountlist_iterator_next(iter)) != NULL) { 279 if (mp->mnt_stat.f_fsidx.__fsid_val[0] == fsid->__fsid_val[0] && 280 mp->mnt_stat.f_fsidx.__fsid_val[1] == fsid->__fsid_val[1]) { 281 mountlist_iterator_destroy(iter); 282 return mp; 283 } 284 } 285 mountlist_iterator_destroy(iter); 286 return NULL; 287} 288 289/* 290 * Take a reference to a mount structure. 291 */ 292void 293vfs_ref(struct mount *mp) 294{ 295 296 KASSERT(mp->mnt_refcnt > 0 || mutex_owned(&mountlist_lock)); 297 298 atomic_inc_uint(&mp->mnt_refcnt); 299} 300 301/* 302 * Drop a reference to a mount structure, freeing if the last reference. 303 */ 304void 305vfs_rele(struct mount *mp) 306{ 307 308 membar_release(); 309 if (__predict_true((int)atomic_dec_uint_nv(&mp->mnt_refcnt) > 0)) { 310 return; 311 } 312 membar_acquire(); 313 314 /* 315 * Nothing else has visibility of the mount: we can now 316 * free the data structures. 317 */ 318 KASSERT(mp->mnt_refcnt == 0); 319 specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref); 320 mutex_obj_free(mp->mnt_updating); 321 mutex_obj_free(mp->mnt_renamelock); 322 mutex_obj_free(mp->mnt_vnodelock); 323 if (mp->mnt_op != NULL) { 324 vfs_delref(mp->mnt_op); 325 } 326 fstrans_unmount(mp); 327 /* 328 * Final free of mp gets done from fstrans_mount_dtor(). 329 * 330 * Prevents this memory to be reused as a mount before 331 * fstrans releases all references to it. 332 */ 333} 334 335/* 336 * Mark a mount point as busy, and gain a new reference to it. Used to 337 * prevent the file system from being unmounted during critical sections. 338 * 339 * vfs_busy can be called multiple times and by multiple threads 340 * and must be accompanied by the same number of vfs_unbusy calls. 341 * 342 * => The caller must hold a pre-existing reference to the mount. 343 * => Will fail if the file system is being unmounted, or is unmounted. 344 */ 345static inline int 346_vfs_busy(struct mount *mp, bool wait) 347{ 348 349 KASSERT(mp->mnt_refcnt > 0); 350 351 if (wait) { 352 fstrans_start(mp); 353 } else { 354 if (fstrans_start_nowait(mp)) 355 return EBUSY; 356 } 357 if (__predict_false((mp->mnt_iflag & IMNT_GONE) != 0)) { 358 fstrans_done(mp); 359 return ENOENT; 360 } 361 vfs_ref(mp); 362 return 0; 363} 364 365int 366vfs_busy(struct mount *mp) 367{ 368 369 return _vfs_busy(mp, true); 370} 371 372int 373vfs_trybusy(struct mount *mp) 374{ 375 376 return _vfs_busy(mp, false); 377} 378 379/* 380 * Unbusy a busy filesystem. 381 * 382 * Every successful vfs_busy() call must be undone by a vfs_unbusy() call. 383 */ 384void 385vfs_unbusy(struct mount *mp) 386{ 387 388 KASSERT(mp->mnt_refcnt > 0); 389 390 fstrans_done(mp); 391 vfs_rele(mp); 392} 393 394/* 395 * Change a file systems lower mount. 396 * Both the current and the new lower mount may be NULL. The caller 397 * guarantees exclusive access to the mount and holds a pre-existing 398 * reference to the new lower mount. 399 */ 400int 401vfs_set_lowermount(struct mount *mp, struct mount *lowermp) 402{ 403 struct mount *oldlowermp; 404 int error; 405 406#ifdef DEBUG 407 /* 408 * Limit the depth of file system stack so kernel sanitizers 409 * may stress mount/unmount without exhausting the kernel stack. 410 */ 411 int depth; 412 struct mount *mp2; 413 414 for (depth = 0, mp2 = lowermp; mp2; depth++, mp2 = mp2->mnt_lower) { 415 if (depth == 23) 416 return EINVAL; 417 } 418#endif 419 420 if (lowermp) { 421 if (lowermp == dead_rootmount) 422 return ENOENT; 423 error = vfs_busy(lowermp); 424 if (error) 425 return error; 426 vfs_ref(lowermp); 427 } 428 429 oldlowermp = mp->mnt_lower; 430 mp->mnt_lower = lowermp; 431 432 if (lowermp) 433 vfs_unbusy(lowermp); 434 435 if (oldlowermp) 436 vfs_rele(oldlowermp); 437 438 return 0; 439} 440 441struct vnode_iterator { 442 vnode_impl_t vi_vnode; 443}; 444 445void 446vfs_vnode_iterator_init(struct mount *mp, struct vnode_iterator **vnip) 447{ 448 vnode_t *vp; 449 vnode_impl_t *vip; 450 451 vp = vnalloc_marker(mp); 452 vip = VNODE_TO_VIMPL(vp); 453 454 mutex_enter(mp->mnt_vnodelock); 455 TAILQ_INSERT_HEAD(&mp->mnt_vnodelist, vip, vi_mntvnodes); 456 vp->v_usecount = 1; 457 mutex_exit(mp->mnt_vnodelock); 458 459 *vnip = (struct vnode_iterator *)vip; 460} 461 462void 463vfs_vnode_iterator_destroy(struct vnode_iterator *vni) 464{ 465 vnode_impl_t *mvip = &vni->vi_vnode; 466 vnode_t *mvp = VIMPL_TO_VNODE(mvip); 467 kmutex_t *lock; 468 469 KASSERT(vnis_marker(mvp)); 470 if (vrefcnt(mvp) != 0) { 471 lock = mvp->v_mount->mnt_vnodelock; 472 mutex_enter(lock); 473 TAILQ_REMOVE(&mvp->v_mount->mnt_vnodelist, mvip, vi_mntvnodes); 474 mvp->v_usecount = 0; 475 mutex_exit(lock); 476 } 477 vnfree_marker(mvp); 478} 479 480static struct vnode * 481vfs_vnode_iterator_next1(struct vnode_iterator *vni, 482 bool (*f)(void *, struct vnode *), void *cl, bool do_wait) 483{ 484 vnode_impl_t *mvip = &vni->vi_vnode; 485 struct mount *mp = VIMPL_TO_VNODE(mvip)->v_mount; 486 vnode_t *vp; 487 vnode_impl_t *vip; 488 kmutex_t *lock; 489 int error; 490 491 KASSERT(vnis_marker(VIMPL_TO_VNODE(mvip))); 492 493 lock = mp->mnt_vnodelock; 494 do { 495 mutex_enter(lock); 496 vip = TAILQ_NEXT(mvip, vi_mntvnodes); 497 TAILQ_REMOVE(&mp->mnt_vnodelist, mvip, vi_mntvnodes); 498 VIMPL_TO_VNODE(mvip)->v_usecount = 0; 499again: 500 if (vip == NULL) { 501 mutex_exit(lock); 502 return NULL; 503 } 504 vp = VIMPL_TO_VNODE(vip); 505 KASSERT(vp != NULL); 506 mutex_enter(vp->v_interlock); 507 if (vnis_marker(vp) || 508 vdead_check(vp, (do_wait ? 0 : VDEAD_NOWAIT)) || 509 (f && !(*f)(cl, vp))) { 510 mutex_exit(vp->v_interlock); 511 vip = TAILQ_NEXT(vip, vi_mntvnodes); 512 goto again; 513 } 514 515 TAILQ_INSERT_AFTER(&mp->mnt_vnodelist, vip, mvip, vi_mntvnodes); 516 VIMPL_TO_VNODE(mvip)->v_usecount = 1; 517 mutex_exit(lock); 518 error = vcache_vget(vp); 519 KASSERT(error == 0 || error == ENOENT); 520 } while (error != 0); 521 522 return vp; 523} 524 525struct vnode * 526vfs_vnode_iterator_next(struct vnode_iterator *vni, 527 bool (*f)(void *, struct vnode *), void *cl) 528{ 529 530 return vfs_vnode_iterator_next1(vni, f, cl, false); 531} 532 533/* 534 * Move a vnode from one mount queue to another. 535 */ 536void 537vfs_insmntque(vnode_t *vp, struct mount *mp) 538{ 539 vnode_impl_t *vip = VNODE_TO_VIMPL(vp); 540 struct mount *omp; 541 kmutex_t *lock; 542 543 KASSERT(mp == NULL || (mp->mnt_iflag & IMNT_UNMOUNT) == 0 || 544 vp->v_tag == VT_VFS); 545 546 /* 547 * Delete from old mount point vnode list, if on one. 548 */ 549 if ((omp = vp->v_mount) != NULL) { 550 lock = omp->mnt_vnodelock; 551 mutex_enter(lock); 552 TAILQ_REMOVE(&vp->v_mount->mnt_vnodelist, vip, vi_mntvnodes); 553 mutex_exit(lock); 554 } 555 556 /* 557 * Insert into list of vnodes for the new mount point, if 558 * available. The caller must take a reference on the mount 559 * structure and donate to the vnode. 560 */ 561 if ((vp->v_mount = mp) != NULL) { 562 lock = mp->mnt_vnodelock; 563 mutex_enter(lock); 564 TAILQ_INSERT_TAIL(&mp->mnt_vnodelist, vip, vi_mntvnodes); 565 mutex_exit(lock); 566 } 567 568 if (omp != NULL) { 569 /* Release reference to old mount. */ 570 vfs_rele(omp); 571 } 572} 573 574/* 575 * Remove any vnodes in the vnode table belonging to mount point mp. 576 * 577 * If FORCECLOSE is not specified, there should not be any active ones, 578 * return error if any are found (nb: this is a user error, not a 579 * system error). If FORCECLOSE is specified, detach any active vnodes 580 * that are found. 581 * 582 * If WRITECLOSE is set, only flush out regular file vnodes open for 583 * writing. 584 * 585 * SKIPSYSTEM causes any vnodes marked VV_SYSTEM to be skipped. 586 */ 587#ifdef DEBUG 588int busyprt = 0; /* print out busy vnodes */ 589struct ctldebug debug1 = { "busyprt", &busyprt }; 590#endif 591 592static vnode_t * 593vflushnext(struct vnode_iterator *marker, int *when) 594{ 595 if (getticks() > *when) { 596 yield(); 597 *when = getticks() + hz / 10; 598 } 599 preempt_point(); 600 return vfs_vnode_iterator_next1(marker, NULL, NULL, true); 601} 602 603/* 604 * Flush one vnode. Referenced on entry, unreferenced on return. 605 */ 606static int 607vflush_one(vnode_t *vp, vnode_t *skipvp, int flags) 608{ 609 int error; 610 struct vattr vattr; 611 612 if (vp == skipvp || 613 ((flags & SKIPSYSTEM) && (vp->v_vflag & VV_SYSTEM))) { 614 vrele(vp); 615 return 0; 616 } 617 /* 618 * If WRITECLOSE is set, only flush out regular file 619 * vnodes open for writing or open and unlinked. 620 */ 621 if ((flags & WRITECLOSE)) { 622 if (vp->v_type != VREG) { 623 vrele(vp); 624 return 0; 625 } 626 error = vn_lock(vp, LK_EXCLUSIVE); 627 if (error) { 628 KASSERT(error == ENOENT); 629 vrele(vp); 630 return 0; 631 } 632 error = VOP_FSYNC(vp, curlwp->l_cred, FSYNC_WAIT, 0, 0); 633 if (error == 0) 634 error = VOP_GETATTR(vp, &vattr, curlwp->l_cred); 635 VOP_UNLOCK(vp); 636 if (error) { 637 vrele(vp); 638 return error; 639 } 640 if (vp->v_writecount == 0 && vattr.va_nlink > 0) { 641 vrele(vp); 642 return 0; 643 } 644 } 645 /* 646 * First try to recycle the vnode. 647 */ 648 if (vrecycle(vp)) 649 return 0; 650 /* 651 * If FORCECLOSE is set, forcibly close the vnode. 652 * For block or character devices, revert to an 653 * anonymous device. For all other files, just 654 * kill them. 655 */ 656 if (flags & FORCECLOSE) { 657 if (vrefcnt(vp) > 1 && 658 (vp->v_type == VBLK || vp->v_type == VCHR)) 659 vcache_make_anon(vp); 660 else 661 vgone(vp); 662 return 0; 663 } 664 vrele(vp); 665 return EBUSY; 666} 667 668int 669vflush(struct mount *mp, vnode_t *skipvp, int flags) 670{ 671 vnode_t *vp; 672 struct vnode_iterator *marker; 673 int busy, error, when, retries = 2; 674 675 do { 676 busy = error = when = 0; 677 678 /* 679 * First, flush out any vnode references from the 680 * deferred vrele list. 681 */ 682 vrele_flush(mp); 683 684 vfs_vnode_iterator_init(mp, &marker); 685 686 while ((vp = vflushnext(marker, &when)) != NULL) { 687 error = vflush_one(vp, skipvp, flags); 688 if (error == EBUSY) { 689 error = 0; 690 busy++; 691#ifdef DEBUG 692 if (busyprt && retries == 0) 693 vprint("vflush: busy vnode", vp); 694#endif 695 } else if (error != 0) { 696 break; 697 } 698 } 699 700 vfs_vnode_iterator_destroy(marker); 701 } while (error == 0 && busy > 0 && retries-- > 0); 702 703 if (error) 704 return error; 705 if (busy) 706 return EBUSY; 707 return 0; 708} 709 710/* 711 * Mount a file system. 712 */ 713 714/* 715 * Scan all active processes to see if any of them have a current or root 716 * directory onto which the new filesystem has just been mounted. If so, 717 * replace them with the new mount point. 718 */ 719static void 720mount_checkdirs(vnode_t *olddp) 721{ 722 vnode_t *newdp, *rele1, *rele2; 723 struct cwdinfo *cwdi; 724 struct proc *p; 725 bool retry; 726 727 if (vrefcnt(olddp) == 1) { 728 return; 729 } 730 if (VFS_ROOT(olddp->v_mountedhere, LK_EXCLUSIVE, &newdp)) 731 panic("mount: lost mount"); 732 733 do { 734 retry = false; 735 mutex_enter(&proc_lock); 736 PROCLIST_FOREACH(p, &allproc) { 737 if ((cwdi = p->p_cwdi) == NULL) 738 continue; 739 /* 740 * Cannot change to the old directory any more, 741 * so even if we see a stale value it is not a 742 * problem. 743 */ 744 if (cwdi->cwdi_cdir != olddp && 745 cwdi->cwdi_rdir != olddp) 746 continue; 747 retry = true; 748 rele1 = NULL; 749 rele2 = NULL; 750 atomic_inc_uint(&cwdi->cwdi_refcnt); 751 mutex_exit(&proc_lock); 752 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 753 if (cwdi->cwdi_cdir == olddp) { 754 rele1 = cwdi->cwdi_cdir; 755 vref(newdp); 756 cwdi->cwdi_cdir = newdp; 757 } 758 if (cwdi->cwdi_rdir == olddp) { 759 rele2 = cwdi->cwdi_rdir; 760 vref(newdp); 761 cwdi->cwdi_rdir = newdp; 762 } 763 rw_exit(&cwdi->cwdi_lock); 764 cwdfree(cwdi); 765 if (rele1 != NULL) 766 vrele(rele1); 767 if (rele2 != NULL) 768 vrele(rele2); 769 mutex_enter(&proc_lock); 770 break; 771 } 772 mutex_exit(&proc_lock); 773 } while (retry); 774 775 if (rootvnode == olddp) { 776 vrele(rootvnode); 777 vref(newdp); 778 rootvnode = newdp; 779 } 780 vput(newdp); 781} 782 783/* 784 * Start extended attributes 785 */ 786static int 787start_extattr(struct mount *mp) 788{ 789 int error; 790 791 error = VFS_EXTATTRCTL(mp, EXTATTR_CMD_START, NULL, 0, NULL); 792 if (error) 793 printf("%s: failed to start extattr: error = %d\n", 794 mp->mnt_stat.f_mntonname, error); 795 796 return error; 797} 798 799int 800mount_domount(struct lwp *l, vnode_t **vpp, struct vfsops *vfsops, 801 const char *path, int flags, void *data, size_t *data_len) 802{ 803 vnode_t *vp = *vpp; 804 struct mount *mp; 805 struct pathbuf *pb; 806 struct nameidata nd; 807 int error, error2; 808 809 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 810 KAUTH_REQ_SYSTEM_MOUNT_NEW, vp, KAUTH_ARG(flags), data); 811 if (error) { 812 vfs_delref(vfsops); 813 return error; 814 } 815 816 /* Cannot make a non-dir a mount-point (from here anyway). */ 817 if (vp->v_type != VDIR) { 818 vfs_delref(vfsops); 819 return ENOTDIR; 820 } 821 822 if (flags & MNT_EXPORTED) { 823 vfs_delref(vfsops); 824 return EINVAL; 825 } 826 827 if ((mp = vfs_mountalloc(vfsops, vp)) == NULL) { 828 vfs_delref(vfsops); 829 return ENOMEM; 830 } 831 832 mp->mnt_stat.f_owner = kauth_cred_geteuid(l->l_cred); 833 834 /* 835 * The underlying file system may refuse the mount for 836 * various reasons. Allow the user to force it to happen. 837 * 838 * Set the mount level flags. 839 */ 840 mp->mnt_flag = flags & (MNT_BASIC_FLAGS | MNT_FORCE | MNT_IGNORE); 841 842 error = VFS_MOUNT(mp, path, data, data_len); 843 mp->mnt_flag &= ~MNT_OP_FLAGS; 844 845 if (error != 0) { 846 vfs_rele(mp); 847 return error; 848 } 849 850 /* Suspend new file system before taking mnt_updating. */ 851 do { 852 error2 = vfs_suspend(mp, 0); 853 } while (error2 == EINTR || error2 == ERESTART); 854 KASSERT(error2 == 0 || error2 == EOPNOTSUPP); 855 mutex_enter(mp->mnt_updating); 856 857 /* 858 * Validate and prepare the mount point. 859 */ 860 error = pathbuf_copyin(path, &pb); 861 if (error != 0) { 862 goto err_mounted; 863 } 864 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 865 error = namei(&nd); 866 pathbuf_destroy(pb); 867 if (error != 0) { 868 goto err_mounted; 869 } 870 if (nd.ni_vp != vp) { 871 vput(nd.ni_vp); 872 error = EINVAL; 873 goto err_mounted; 874 } 875 if (vp->v_mountedhere != NULL) { 876 vput(nd.ni_vp); 877 error = EBUSY; 878 goto err_mounted; 879 } 880 error = vinvalbuf(vp, V_SAVE, l->l_cred, l, 0, 0); 881 if (error != 0) { 882 vput(nd.ni_vp); 883 goto err_mounted; 884 } 885 886 /* 887 * Put the new filesystem on the mount list after root. 888 */ 889 cache_purge(vp); 890 mp->mnt_iflag &= ~IMNT_WANTRDWR; 891 892 mountlist_append(mp); 893 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) 894 vfs_syncer_add_to_worklist(mp); 895 vp->v_mountedhere = mp; 896 vput(nd.ni_vp); 897 898 mount_checkdirs(vp); 899 mutex_exit(mp->mnt_updating); 900 if (error2 == 0) 901 vfs_resume(mp); 902 903 /* Hold an additional reference to the mount across VFS_START(). */ 904 vfs_ref(mp); 905 (void) VFS_STATVFS(mp, &mp->mnt_stat); 906 error = VFS_START(mp, 0); 907 if (error) { 908 vrele(vp); 909 } else if (flags & MNT_EXTATTR) { 910 if (start_extattr(mp) != 0) 911 mp->mnt_flag &= ~MNT_EXTATTR; 912 } 913 /* Drop reference held for VFS_START(). */ 914 vfs_rele(mp); 915 *vpp = NULL; 916 return error; 917 918err_mounted: 919 if (VFS_UNMOUNT(mp, MNT_FORCE) != 0) 920 panic("Unmounting fresh file system failed"); 921 mutex_exit(mp->mnt_updating); 922 if (error2 == 0) 923 vfs_resume(mp); 924 vfs_set_lowermount(mp, NULL); 925 vfs_rele(mp); 926 927 return error; 928} 929 930/* 931 * Do the actual file system unmount. File system is assumed to have 932 * been locked by the caller. 933 * 934 * => Caller hold reference to the mount, explicitly for dounmount(). 935 */ 936int 937dounmount(struct mount *mp, int flags, struct lwp *l) 938{ 939 struct vnode *coveredvp, *vp; 940 struct vnode_impl *vip; 941 int error, async, used_syncer, used_extattr; 942 const bool was_suspended = fstrans_is_owner(mp); 943 944#if NVERIEXEC > 0 945 error = veriexec_unmountchk(mp); 946 if (error) 947 return (error); 948#endif /* NVERIEXEC > 0 */ 949 950 if (!was_suspended) { 951 error = vfs_suspend(mp, 0); 952 if (error) { 953 return error; 954 } 955 } 956 957 KASSERT((mp->mnt_iflag & IMNT_GONE) == 0); 958 959 used_syncer = (mp->mnt_iflag & IMNT_ONWORKLIST) != 0; 960 used_extattr = mp->mnt_flag & MNT_EXTATTR; 961 962 mp->mnt_iflag |= IMNT_UNMOUNT; 963 mutex_enter(mp->mnt_updating); 964 async = mp->mnt_flag & MNT_ASYNC; 965 mp->mnt_flag &= ~MNT_ASYNC; 966 cache_purgevfs(mp); /* remove cache entries for this file sys */ 967 if (used_syncer) 968 vfs_syncer_remove_from_worklist(mp); 969 error = 0; 970 if (((mp->mnt_flag & MNT_RDONLY) == 0) && ((flags & MNT_FORCE) == 0)) { 971 error = VFS_SYNC(mp, MNT_WAIT, l->l_cred); 972 } 973 if (error == 0 || (flags & MNT_FORCE)) { 974 error = VFS_UNMOUNT(mp, flags); 975 } 976 if (error) { 977 mp->mnt_iflag &= ~IMNT_UNMOUNT; 978 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) 979 vfs_syncer_add_to_worklist(mp); 980 mp->mnt_flag |= async; 981 mutex_exit(mp->mnt_updating); 982 if (!was_suspended) 983 vfs_resume(mp); 984 if (used_extattr) { 985 if (start_extattr(mp) != 0) 986 mp->mnt_flag &= ~MNT_EXTATTR; 987 else 988 mp->mnt_flag |= MNT_EXTATTR; 989 } 990 return (error); 991 } 992 mutex_exit(mp->mnt_updating); 993 994 /* 995 * mark filesystem as gone to prevent further umounts 996 * after mnt_umounting lock is gone, this also prevents 997 * vfs_busy() from succeeding. 998 */ 999 mp->mnt_iflag |= IMNT_GONE; 1000 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) { 1001 coveredvp->v_mountedhere = NULL; 1002 } 1003 if (!was_suspended) 1004 vfs_resume(mp); 1005 1006 mountlist_remove(mp); 1007 1008 if ((vip = TAILQ_FIRST(&mp->mnt_vnodelist)) != NULL) { 1009 vp = VIMPL_TO_VNODE(vip); 1010 vprint("dangling", vp); 1011 panic("unmount: dangling vnode"); 1012 } 1013 vfs_hooks_unmount(mp); 1014 1015 vfs_set_lowermount(mp, NULL); 1016 vfs_rele(mp); /* reference from mount() */ 1017 if (coveredvp != NULLVP) { 1018 vrele(coveredvp); 1019 } 1020 return (0); 1021} 1022 1023/* 1024 * Unmount all file systems. 1025 * We traverse the list in reverse order under the assumption that doing so 1026 * will avoid needing to worry about dependencies. 1027 */ 1028bool 1029vfs_unmountall(struct lwp *l) 1030{ 1031 1032 printf("unmounting file systems...\n"); 1033 return vfs_unmountall1(l, true, true); 1034} 1035 1036static void 1037vfs_unmount_print(struct mount *mp, const char *pfx) 1038{ 1039 1040 aprint_verbose("%sunmounted %s on %s type %s\n", pfx, 1041 mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname, 1042 mp->mnt_stat.f_fstypename); 1043} 1044 1045/* 1046 * Return the mount with the highest generation less than "gen". 1047 */ 1048static struct mount * 1049vfs_unmount_next(uint64_t gen) 1050{ 1051 mount_iterator_t *iter; 1052 struct mount *mp, *nmp; 1053 1054 nmp = NULL; 1055 1056 mountlist_iterator_init(&iter); 1057 while ((mp = mountlist_iterator_next(iter)) != NULL) { 1058 if ((nmp == NULL || mp->mnt_gen > nmp->mnt_gen) && 1059 mp->mnt_gen < gen) { 1060 if (nmp != NULL) 1061 vfs_rele(nmp); 1062 nmp = mp; 1063 vfs_ref(nmp); 1064 } 1065 } 1066 mountlist_iterator_destroy(iter); 1067 1068 return nmp; 1069} 1070 1071bool 1072vfs_unmount_forceone(struct lwp *l) 1073{ 1074 struct mount *mp; 1075 int error; 1076 1077 mp = vfs_unmount_next(mountgen); 1078 if (mp == NULL) { 1079 return false; 1080 } 1081 1082#ifdef DEBUG 1083 printf("forcefully unmounting %s (%s)...\n", 1084 mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname); 1085#endif 1086 if ((error = dounmount(mp, MNT_FORCE, l)) == 0) { 1087 vfs_unmount_print(mp, "forcefully "); 1088 return true; 1089 } else { 1090 vfs_rele(mp); 1091 } 1092 1093#ifdef DEBUG 1094 printf("forceful unmount of %s failed with error %d\n", 1095 mp->mnt_stat.f_mntonname, error); 1096#endif 1097 1098 return false; 1099} 1100 1101bool 1102vfs_unmountall1(struct lwp *l, bool force, bool verbose) 1103{ 1104 struct mount *mp; 1105 mount_iterator_t *iter; 1106 bool any_error = false, progress = false; 1107 uint64_t gen; 1108 int error; 1109 1110 gen = mountgen; 1111 for (;;) { 1112 mp = vfs_unmount_next(gen); 1113 if (mp == NULL) 1114 break; 1115 gen = mp->mnt_gen; 1116 1117#ifdef DEBUG 1118 printf("unmounting %p %s (%s)...\n", 1119 (void *)mp, mp->mnt_stat.f_mntonname, 1120 mp->mnt_stat.f_mntfromname); 1121#endif 1122 if ((error = dounmount(mp, force ? MNT_FORCE : 0, l)) == 0) { 1123 vfs_unmount_print(mp, ""); 1124 progress = true; 1125 } else { 1126 vfs_rele(mp); 1127 if (verbose) { 1128 printf("unmount of %s failed with error %d\n", 1129 mp->mnt_stat.f_mntonname, error); 1130 } 1131 any_error = true; 1132 } 1133 } 1134 if (verbose) { 1135 printf("unmounting done\n"); 1136 } 1137 if (any_error && verbose) { 1138 printf("WARNING: some file systems would not unmount\n"); 1139 } 1140 /* If the mountlist is empty it is time to remove swap. */ 1141 mountlist_iterator_init(&iter); 1142 if (mountlist_iterator_next(iter) == NULL) { 1143 uvm_swap_shutdown(l); 1144 } 1145 mountlist_iterator_destroy(iter); 1146 1147 return progress; 1148} 1149 1150void 1151vfs_sync_all(struct lwp *l) 1152{ 1153 printf("syncing disks... "); 1154 1155 /* remove user processes from run queue */ 1156 suspendsched(); 1157 (void)spl0(); 1158 1159 /* avoid coming back this way again if we panic. */ 1160 doing_shutdown = 1; 1161 1162 do_sys_sync(l); 1163 1164 /* Wait for sync to finish. */ 1165 if (vfs_syncwait() != 0) { 1166#if defined(DDB) && defined(DEBUG_HALT_BUSY) 1167 Debugger(); 1168#endif 1169 printf("giving up\n"); 1170 return; 1171 } else 1172 printf("done\n"); 1173} 1174 1175/* 1176 * Sync and unmount file systems before shutting down. 1177 */ 1178void 1179vfs_shutdown(void) 1180{ 1181 lwp_t *l = curlwp; 1182 1183 vfs_sync_all(l); 1184 1185 /* 1186 * If we have panicked - do not make the situation potentially 1187 * worse by unmounting the file systems. 1188 */ 1189 if (panicstr != NULL) { 1190 return; 1191 } 1192 1193 /* Unmount file systems. */ 1194 vfs_unmountall(l); 1195} 1196 1197/* 1198 * Print a list of supported file system types (used by vfs_mountroot) 1199 */ 1200static void 1201vfs_print_fstypes(void) 1202{ 1203 struct vfsops *v; 1204 int cnt = 0; 1205 1206 mutex_enter(&vfs_list_lock); 1207 LIST_FOREACH(v, &vfs_list, vfs_list) 1208 ++cnt; 1209 mutex_exit(&vfs_list_lock); 1210 1211 if (cnt == 0) { 1212 printf("WARNING: No file system modules have been loaded.\n"); 1213 return; 1214 } 1215 1216 printf("Supported file systems:"); 1217 mutex_enter(&vfs_list_lock); 1218 LIST_FOREACH(v, &vfs_list, vfs_list) { 1219 printf(" %s", v->vfs_name); 1220 } 1221 mutex_exit(&vfs_list_lock); 1222 printf("\n"); 1223} 1224 1225/* 1226 * Mount the root file system. If the operator didn't specify a 1227 * file system to use, try all possible file systems until one 1228 * succeeds. 1229 */ 1230int 1231vfs_mountroot(void) 1232{ 1233 struct vfsops *v; 1234 int error = ENODEV; 1235 1236 if (root_device == NULL) 1237 panic("vfs_mountroot: root device unknown"); 1238 1239 switch (device_class(root_device)) { 1240 case DV_IFNET: 1241 if (rootdev != NODEV) 1242 panic("vfs_mountroot: rootdev set for DV_IFNET " 1243 "(0x%llx -> %llu,%llu)", 1244 (unsigned long long)rootdev, 1245 (unsigned long long)major(rootdev), 1246 (unsigned long long)minor(rootdev)); 1247 break; 1248 1249 case DV_DISK: 1250 if (rootdev == NODEV) 1251 panic("vfs_mountroot: rootdev not set for DV_DISK"); 1252 if (bdevvp(rootdev, &rootvp)) 1253 panic("vfs_mountroot: can't get vnode for rootdev"); 1254 vn_lock(rootvp, LK_EXCLUSIVE | LK_RETRY); 1255 error = VOP_OPEN(rootvp, FREAD, FSCRED); 1256 VOP_UNLOCK(rootvp); 1257 if (error) { 1258 printf("vfs_mountroot: can't open root device\n"); 1259 return (error); 1260 } 1261 break; 1262 1263 case DV_VIRTUAL: 1264 break; 1265 1266 default: 1267 printf("%s: inappropriate for root file system\n", 1268 device_xname(root_device)); 1269 return (ENODEV); 1270 } 1271 1272 /* 1273 * If user specified a root fs type, use it. Make sure the 1274 * specified type exists and has a mount_root() 1275 */ 1276 if (strcmp(rootfstype, ROOT_FSTYPE_ANY) != 0) { 1277 v = vfs_getopsbyname(rootfstype); 1278 error = EFTYPE; 1279 if (v != NULL) { 1280 if (v->vfs_mountroot != NULL) { 1281 error = (v->vfs_mountroot)(); 1282 } 1283 v->vfs_refcount--; 1284 } 1285 goto done; 1286 } 1287 1288 /* 1289 * Try each file system currently configured into the kernel. 1290 */ 1291 mutex_enter(&vfs_list_lock); 1292 LIST_FOREACH(v, &vfs_list, vfs_list) { 1293 if (v->vfs_mountroot == NULL) 1294 continue; 1295#ifdef DEBUG 1296 aprint_normal("mountroot: trying %s...\n", v->vfs_name); 1297#endif 1298 v->vfs_refcount++; 1299 mutex_exit(&vfs_list_lock); 1300 error = (*v->vfs_mountroot)(); 1301 mutex_enter(&vfs_list_lock); 1302 v->vfs_refcount--; 1303 if (!error) { 1304 aprint_normal("root file system type: %s\n", 1305 v->vfs_name); 1306 break; 1307 } 1308 } 1309 mutex_exit(&vfs_list_lock); 1310 1311 if (v == NULL) { 1312 vfs_print_fstypes(); 1313 printf("no file system for %s", device_xname(root_device)); 1314 if (device_class(root_device) == DV_DISK) 1315 printf(" (dev 0x%llx)", (unsigned long long)rootdev); 1316 printf("\n"); 1317 error = EFTYPE; 1318 } 1319 1320done: 1321 if (error && device_class(root_device) == DV_DISK) { 1322 vn_lock(rootvp, LK_EXCLUSIVE | LK_RETRY); 1323 VOP_CLOSE(rootvp, FREAD, FSCRED); 1324 VOP_UNLOCK(rootvp); 1325 vrele(rootvp); 1326 } 1327 if (error == 0) { 1328 mount_iterator_t *iter; 1329 struct mount *mp; 1330 1331 mountlist_iterator_init(&iter); 1332 mp = mountlist_iterator_next(iter); 1333 KASSERT(mp != NULL); 1334 mountlist_iterator_destroy(iter); 1335 1336 mp->mnt_flag |= MNT_ROOTFS; 1337 mp->mnt_op->vfs_refcount++; 1338 1339 /* 1340 * Get the vnode for '/'. Set cwdi0.cwdi_cdir to 1341 * reference it, and donate it the reference grabbed 1342 * with VFS_ROOT(). 1343 */ 1344 error = VFS_ROOT(mp, LK_NONE, &rootvnode); 1345 if (error) 1346 panic("cannot find root vnode, error=%d", error); 1347 cwdi0.cwdi_cdir = rootvnode; 1348 cwdi0.cwdi_rdir = NULL; 1349 1350 /* 1351 * Now that root is mounted, we can fixup initproc's CWD 1352 * info. All other processes are kthreads, which merely 1353 * share proc0's CWD info. 1354 */ 1355 initproc->p_cwdi->cwdi_cdir = rootvnode; 1356 vref(initproc->p_cwdi->cwdi_cdir); 1357 initproc->p_cwdi->cwdi_rdir = NULL; 1358 /* 1359 * Enable loading of modules from the filesystem 1360 */ 1361 module_load_vfs_init(); 1362 1363 } 1364 return (error); 1365} 1366 1367/* 1368 * mount_specific_key_create -- 1369 * Create a key for subsystem mount-specific data. 1370 */ 1371int 1372mount_specific_key_create(specificdata_key_t *keyp, specificdata_dtor_t dtor) 1373{ 1374 1375 return specificdata_key_create(mount_specificdata_domain, keyp, dtor); 1376} 1377 1378/* 1379 * mount_specific_key_delete -- 1380 * Delete a key for subsystem mount-specific data. 1381 */ 1382void 1383mount_specific_key_delete(specificdata_key_t key) 1384{ 1385 1386 specificdata_key_delete(mount_specificdata_domain, key); 1387} 1388 1389/* 1390 * mount_initspecific -- 1391 * Initialize a mount's specificdata container. 1392 */ 1393void 1394mount_initspecific(struct mount *mp) 1395{ 1396 int error __diagused; 1397 1398 error = specificdata_init(mount_specificdata_domain, 1399 &mp->mnt_specdataref); 1400 KASSERT(error == 0); 1401} 1402 1403/* 1404 * mount_finispecific -- 1405 * Finalize a mount's specificdata container. 1406 */ 1407void 1408mount_finispecific(struct mount *mp) 1409{ 1410 1411 specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref); 1412} 1413 1414/* 1415 * mount_getspecific -- 1416 * Return mount-specific data corresponding to the specified key. 1417 */ 1418void * 1419mount_getspecific(struct mount *mp, specificdata_key_t key) 1420{ 1421 1422 return specificdata_getspecific(mount_specificdata_domain, 1423 &mp->mnt_specdataref, key); 1424} 1425 1426/* 1427 * mount_setspecific -- 1428 * Set mount-specific data corresponding to the specified key. 1429 */ 1430void 1431mount_setspecific(struct mount *mp, specificdata_key_t key, void *data) 1432{ 1433 1434 specificdata_setspecific(mount_specificdata_domain, 1435 &mp->mnt_specdataref, key, data); 1436} 1437 1438/* 1439 * Check to see if a filesystem is mounted on a block device. 1440 */ 1441int 1442vfs_mountedon(vnode_t *vp) 1443{ 1444 vnode_t *vq; 1445 int error = 0; 1446 1447 if (vp->v_type != VBLK) 1448 return ENOTBLK; 1449 if (spec_node_getmountedfs(vp) != NULL) 1450 return EBUSY; 1451 if (spec_node_lookup_by_dev(vp->v_type, vp->v_rdev, VDEAD_NOWAIT, &vq) 1452 == 0) { 1453 if (spec_node_getmountedfs(vq) != NULL) 1454 error = EBUSY; 1455 vrele(vq); 1456 } 1457 1458 return error; 1459} 1460 1461/* 1462 * Check if a device pointed to by vp is mounted. 1463 * 1464 * Returns: 1465 * EINVAL if it's not a disk 1466 * EBUSY if it's a disk and mounted 1467 * 0 if it's a disk and not mounted 1468 */ 1469int 1470rawdev_mounted(vnode_t *vp, vnode_t **bvpp) 1471{ 1472 vnode_t *bvp; 1473 dev_t dev; 1474 int d_type; 1475 1476 bvp = NULL; 1477 d_type = D_OTHER; 1478 1479 if (iskmemvp(vp)) 1480 return EINVAL; 1481 1482 switch (vp->v_type) { 1483 case VCHR: { 1484 const struct cdevsw *cdev; 1485 1486 dev = vp->v_rdev; 1487 cdev = cdevsw_lookup(dev); 1488 if (cdev != NULL) { 1489 dev_t blkdev; 1490 1491 blkdev = devsw_chr2blk(dev); 1492 if (blkdev != NODEV) { 1493 if (vfinddev(blkdev, VBLK, &bvp) != 0) { 1494 d_type = (cdev->d_flag & D_TYPEMASK); 1495 /* XXX: what if bvp disappears? */ 1496 vrele(bvp); 1497 } 1498 } 1499 } 1500 1501 break; 1502 } 1503 1504 case VBLK: { 1505 const struct bdevsw *bdev; 1506 1507 dev = vp->v_rdev; 1508 bdev = bdevsw_lookup(dev); 1509 if (bdev != NULL) 1510 d_type = (bdev->d_flag & D_TYPEMASK); 1511 1512 bvp = vp; 1513 1514 break; 1515 } 1516 1517 default: 1518 break; 1519 } 1520 1521 if (d_type != D_DISK) 1522 return EINVAL; 1523 1524 if (bvpp != NULL) 1525 *bvpp = bvp; 1526 1527 /* 1528 * XXX: This is bogus. We should be failing the request 1529 * XXX: not only if this specific slice is mounted, but 1530 * XXX: if it's on a disk with any other mounted slice. 1531 */ 1532 if (vfs_mountedon(bvp)) 1533 return EBUSY; 1534 1535 return 0; 1536} 1537 1538/* 1539 * Make a 'unique' number from a mount type name. 1540 */ 1541long 1542makefstype(const char *type) 1543{ 1544 long rv; 1545 1546 for (rv = 0; *type; type++) { 1547 rv <<= 2; 1548 rv ^= *type; 1549 } 1550 return rv; 1551} 1552 1553static struct mountlist_entry * 1554mountlist_alloc(enum mountlist_type type, struct mount *mp) 1555{ 1556 struct mountlist_entry *me; 1557 1558 me = kmem_zalloc(sizeof(*me), KM_SLEEP); 1559 me->me_mount = mp; 1560 me->me_type = type; 1561 1562 return me; 1563} 1564 1565static void 1566mountlist_free(struct mountlist_entry *me) 1567{ 1568 1569 kmem_free(me, sizeof(*me)); 1570} 1571 1572void 1573mountlist_iterator_init(mount_iterator_t **mip) 1574{ 1575 struct mountlist_entry *me; 1576 1577 me = mountlist_alloc(ME_MARKER, NULL); 1578 mutex_enter(&mountlist_lock); 1579 TAILQ_INSERT_HEAD(&mountlist, me, me_list); 1580 mutex_exit(&mountlist_lock); 1581 *mip = (mount_iterator_t *)me; 1582} 1583 1584void 1585mountlist_iterator_destroy(mount_iterator_t *mi) 1586{ 1587 struct mountlist_entry *marker = &mi->mi_entry; 1588 1589 if (marker->me_mount != NULL) 1590 vfs_unbusy(marker->me_mount); 1591 1592 mutex_enter(&mountlist_lock); 1593 TAILQ_REMOVE(&mountlist, marker, me_list); 1594 mutex_exit(&mountlist_lock); 1595 1596 mountlist_free(marker); 1597 1598} 1599 1600/* 1601 * Return the next mount or NULL for this iterator. 1602 * Mark it busy on success. 1603 */ 1604static inline struct mount * 1605_mountlist_iterator_next(mount_iterator_t *mi, bool wait) 1606{ 1607 struct mountlist_entry *me, *marker = &mi->mi_entry; 1608 struct mount *mp; 1609 int error; 1610 1611 if (marker->me_mount != NULL) { 1612 vfs_unbusy(marker->me_mount); 1613 marker->me_mount = NULL; 1614 } 1615 1616 mutex_enter(&mountlist_lock); 1617 for (;;) { 1618 KASSERT(marker->me_type == ME_MARKER); 1619 1620 me = TAILQ_NEXT(marker, me_list); 1621 if (me == NULL) { 1622 /* End of list: keep marker and return. */ 1623 mutex_exit(&mountlist_lock); 1624 return NULL; 1625 } 1626 TAILQ_REMOVE(&mountlist, marker, me_list); 1627 TAILQ_INSERT_AFTER(&mountlist, me, marker, me_list); 1628 1629 /* Skip other markers. */ 1630 if (me->me_type != ME_MOUNT) 1631 continue; 1632 1633 /* Take an initial reference for vfs_busy() below. */ 1634 mp = me->me_mount; 1635 KASSERT(mp != NULL); 1636 vfs_ref(mp); 1637 mutex_exit(&mountlist_lock); 1638 1639 /* Try to mark this mount busy and return on success. */ 1640 if (wait) 1641 error = vfs_busy(mp); 1642 else 1643 error = vfs_trybusy(mp); 1644 if (error == 0) { 1645 vfs_rele(mp); 1646 marker->me_mount = mp; 1647 return mp; 1648 } 1649 vfs_rele(mp); 1650 mutex_enter(&mountlist_lock); 1651 } 1652} 1653 1654struct mount * 1655mountlist_iterator_next(mount_iterator_t *mi) 1656{ 1657 1658 return _mountlist_iterator_next(mi, true); 1659} 1660 1661struct mount * 1662mountlist_iterator_trynext(mount_iterator_t *mi) 1663{ 1664 1665 return _mountlist_iterator_next(mi, false); 1666} 1667 1668/* 1669 * Attach new mount to the end of the mount list. 1670 */ 1671void 1672mountlist_append(struct mount *mp) 1673{ 1674 struct mountlist_entry *me; 1675 1676 me = mountlist_alloc(ME_MOUNT, mp); 1677 mutex_enter(&mountlist_lock); 1678 TAILQ_INSERT_TAIL(&mountlist, me, me_list); 1679 mutex_exit(&mountlist_lock); 1680} 1681 1682/* 1683 * Remove mount from mount list. 1684 */void 1685mountlist_remove(struct mount *mp) 1686{ 1687 struct mountlist_entry *me; 1688 1689 mutex_enter(&mountlist_lock); 1690 TAILQ_FOREACH(me, &mountlist, me_list) 1691 if (me->me_type == ME_MOUNT && me->me_mount == mp) 1692 break; 1693 KASSERT(me != NULL); 1694 TAILQ_REMOVE(&mountlist, me, me_list); 1695 mutex_exit(&mountlist_lock); 1696 mountlist_free(me); 1697} 1698 1699/* 1700 * Unlocked variant to traverse the mountlist. 1701 * To be used from DDB only. 1702 */ 1703struct mount * 1704_mountlist_next(struct mount *mp) 1705{ 1706 struct mountlist_entry *me; 1707 1708 if (mp == NULL) { 1709 me = TAILQ_FIRST(&mountlist); 1710 } else { 1711 TAILQ_FOREACH(me, &mountlist, me_list) 1712 if (me->me_type == ME_MOUNT && me->me_mount == mp) 1713 break; 1714 if (me != NULL) 1715 me = TAILQ_NEXT(me, me_list); 1716 } 1717 1718 while (me != NULL && me->me_type != ME_MOUNT) 1719 me = TAILQ_NEXT(me, me_list); 1720 1721 return (me ? me->me_mount : NULL); 1722} 1723