vfs_mount.c revision 184588
155992Swpaul/*- 255992Swpaul * Copyright (c) 1999-2004 Poul-Henning Kamp 355992Swpaul * Copyright (c) 1999 Michael Smith 455992Swpaul * Copyright (c) 1989, 1993 555992Swpaul * The Regents of the University of California. All rights reserved. 655992Swpaul * (c) UNIX System Laboratories, Inc. 755992Swpaul * All or some portions of this file are derived from material licensed 855992Swpaul * to the University of California by American Telephone and Telegraph 955992Swpaul * Co. or Unix System Laboratories, Inc. and are reproduced herein with 1055992Swpaul * the permission of UNIX System Laboratories, Inc. 1155992Swpaul * 1255992Swpaul * Redistribution and use in source and binary forms, with or without 1355992Swpaul * modification, are permitted provided that the following conditions 1455992Swpaul * are met: 1555992Swpaul * 1. Redistributions of source code must retain the above copyright 1655992Swpaul * notice, this list of conditions and the following disclaimer. 1755992Swpaul * 2. Redistributions in binary form must reproduce the above copyright 1855992Swpaul * notice, this list of conditions and the following disclaimer in the 1955992Swpaul * documentation and/or other materials provided with the distribution. 2055992Swpaul * 4. Neither the name of the University nor the names of its contributors 2155992Swpaul * may be used to endorse or promote products derived from this software 2255992Swpaul * without specific prior written permission. 2355992Swpaul * 2455992Swpaul * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 2555992Swpaul * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2655992Swpaul * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2755992Swpaul * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 2855992Swpaul * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2955992Swpaul * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 3055992Swpaul * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 3155992Swpaul * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 3255992Swpaul * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 3355992Swpaul * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 3455992Swpaul * SUCH DAMAGE. 3555992Swpaul */ 3655992Swpaul 3755992Swpaul#include <sys/cdefs.h> 3855992Swpaul__FBSDID("$FreeBSD: head/sys/kern/vfs_mount.c 184588 2008-11-03 10:38:00Z dfr $"); 3955992Swpaul 4055992Swpaul#include <sys/param.h> 4155992Swpaul#include <sys/conf.h> 4255992Swpaul#include <sys/fcntl.h> 4355992Swpaul#include <sys/jail.h> 4455992Swpaul#include <sys/kernel.h> 4555992Swpaul#include <sys/libkern.h> 4655992Swpaul#include <sys/malloc.h> 4755992Swpaul#include <sys/mount.h> 4855992Swpaul#include <sys/mutex.h> 4955992Swpaul#include <sys/namei.h> 5055992Swpaul#include <sys/priv.h> 5155992Swpaul#include <sys/proc.h> 5255992Swpaul#include <sys/filedesc.h> 5355992Swpaul#include <sys/reboot.h> 5455992Swpaul#include <sys/syscallsubr.h> 5555992Swpaul#include <sys/sysproto.h> 5655992Swpaul#include <sys/sx.h> 5755992Swpaul#include <sys/sysctl.h> 5855992Swpaul#include <sys/sysent.h> 5955992Swpaul#include <sys/systm.h> 6055992Swpaul#include <sys/vnode.h> 6155992Swpaul#include <vm/uma.h> 6255992Swpaul 6355992Swpaul#include <geom/geom.h> 6455992Swpaul 6555992Swpaul#include <machine/stdarg.h> 6655992Swpaul 6755992Swpaul#include <security/audit/audit.h> 6855992Swpaul#include <security/mac/mac_framework.h> 6955992Swpaul 7055992Swpaul#include "opt_rootdevname.h" 7155992Swpaul#include "opt_mac.h" 7284811Sjhb 7367365Sjhb#define ROOTNAME "root_device" 7455992Swpaul#define VFS_MOUNTARG_SIZE_MAX (1024 * 64) 7555992Swpaul 7655992Swpaulstatic int vfs_domount(struct thread *td, const char *fstype, 7755992Swpaul char *fspath, int fsflags, void *fsdata); 7855992Swpaulstatic int vfs_mountroot_ask(void); 7955992Swpaulstatic int vfs_mountroot_try(const char *mountfrom); 8077217Sphkstatic void free_mntarg(struct mntarg *ma); 8155992Swpaulstatic int vfs_getopt_pos(struct vfsoptlist *opts, const char *name); 8255992Swpaul 8355992Swpaulstatic int usermount = 0; 8455992SwpaulSYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, 8555992Swpaul "Unprivileged users may mount and unmount file systems"); 8655992Swpaul 8755992SwpaulMALLOC_DEFINE(M_MOUNT, "mount", "vfs mount structure"); 8855992SwpaulMALLOC_DEFINE(M_VNODE_MARKER, "vnodemarker", "vnode marker"); 8955992Swpaulstatic uma_zone_t mount_zone; 9055992Swpaul 9155992Swpaul/* List of mounted filesystems. */ 9255992Swpaulstruct mntlist mountlist = TAILQ_HEAD_INITIALIZER(mountlist); 9355992Swpaul 9455992Swpaul/* For any iteration/modification of mountlist */ 9555992Swpaulstruct mtx mountlist_mtx; 9655992SwpaulMTX_SYSINIT(mountlist, &mountlist_mtx, "mountlist", MTX_DEF); 9755992Swpaul 9855992SwpaulTAILQ_HEAD(vfsoptlist, vfsopt); 9955992Swpaulstruct vfsopt { 10081221Sbrooks TAILQ_ENTRY(vfsopt) link; 10155992Swpaul char *name; 10255992Swpaul void *value; 10355992Swpaul int len; 104108401Sambrisko}; 10555992Swpaul 10655992Swpaul/* 10755992Swpaul * The vnode of the system's root (/ in the filesystem, without chroot 10855992Swpaul * active.) 10981221Sbrooks */ 11055992Swpaulstruct vnode *rootvnode; 11155992Swpaul 11255992Swpaul/* 11355992Swpaul * The root filesystem is detailed in the kernel environment variable 11455992Swpaul * vfs.root.mountfrom, which is expected to be in the general format 11555992Swpaul * 11692739Salfred * <vfsname>:[<path>] 11792739Salfred * vfsname := the name of a VFS known to the kernel and capable 11892739Salfred * of being mounted as root 119110362Sambrisko * path := disk device name or other data used by the filesystem 120110362Sambrisko * to locate its physical store 12155992Swpaul */ 12283270Sbrooks 12383270Sbrooks/* 12455992Swpaul * Global opts, taken by all filesystems 12555992Swpaul */ 12655992Swpaulstatic const char *global_opts[] = { 12755992Swpaul "errmsg", 12855992Swpaul "fstype", 12983270Sbrooks "fspath", 13055992Swpaul "ro", 13156051Swpaul "rw", 13255992Swpaul "nosuid", 13355992Swpaul "noexec", 13455992Swpaul NULL 13555992Swpaul}; 13655992Swpaul 13755992Swpaul/* 138108401Sambrisko * The root specifiers we will try if RB_CDROM is specified. 139108401Sambrisko */ 140108401Sambriskostatic char *cdrom_rootdevnames[] = { 141108401Sambrisko "cd9660:cd0", 142108401Sambrisko "cd9660:acd0", 143108401Sambrisko NULL 14455992Swpaul}; 14555992Swpaul 14655992Swpaul/* legacy find-root code */ 14783270Sbrookschar *rootdevnames[2] = {NULL, NULL}; 14883270Sbrooks#ifndef ROOTDEVNAME 14955992Swpaul# define ROOTDEVNAME NULL 15055992Swpaul#endif 15155992Swpaulstatic const char *ctrootdevname = ROOTDEVNAME; 15255992Swpaul 15355992Swpaul/* 15455992Swpaul * --------------------------------------------------------------------- 15555992Swpaul * Functions for building and sanitizing the mount options 15655992Swpaul */ 15755992Swpaul 15855992Swpaul/* Remove one mount option. */ 15983270Sbrooksstatic void 160108401Sambriskovfs_freeopt(struct vfsoptlist *opts, struct vfsopt *opt) 161108401Sambrisko{ 162108401Sambrisko 163108401Sambrisko TAILQ_REMOVE(opts, opt, link); 164108401Sambrisko free(opt->name, M_MOUNT); 165108401Sambrisko if (opt->value != NULL) 166108401Sambrisko free(opt->value, M_MOUNT); 167108401Sambrisko#ifdef INVARIANTS 168108401Sambrisko else if (opt->len != 0) 169108401Sambrisko panic("%s: mount option with NULL value but length != 0", 170108401Sambrisko __func__); 171108401Sambrisko#endif 17255992Swpaul free(opt, M_MOUNT); 173108401Sambrisko} 174108401Sambrisko 175108401Sambrisko/* Release all resources related to the mount options. */ 176108401Sambriskovoid 177108401Sambriskovfs_freeopts(struct vfsoptlist *opts) 178108401Sambrisko{ 17955992Swpaul struct vfsopt *opt; 18055992Swpaul 18155992Swpaul while (!TAILQ_EMPTY(opts)) { 18255992Swpaul opt = TAILQ_FIRST(opts); 18355992Swpaul vfs_freeopt(opts, opt); 18455992Swpaul } 18555992Swpaul free(opts, M_MOUNT); 18655992Swpaul} 18755992Swpaul 18855992Swpaulvoid 18955992Swpaulvfs_deleteopt(struct vfsoptlist *opts, const char *name) 190108401Sambrisko{ 191108401Sambrisko struct vfsopt *opt, *temp; 192108401Sambrisko 193108401Sambrisko if (opts == NULL) 194108401Sambrisko return; 195108401Sambrisko TAILQ_FOREACH_SAFE(opt, opts, link, temp) { 196108401Sambrisko if (strcmp(opt->name, name) == 0) 197108401Sambrisko vfs_freeopt(opts, opt); 198108401Sambrisko } 199108401Sambrisko} 200108401Sambrisko 201108401Sambrisko/* 202108401Sambrisko * Check if options are equal (with or without the "no" prefix). 203108401Sambrisko */ 204108401Sambriskostatic int 205108401Sambriskovfs_equalopts(const char *opt1, const char *opt2) 206108401Sambrisko{ 207108401Sambrisko 208108401Sambrisko /* "opt" vs. "opt" or "noopt" vs. "noopt" */ 209108401Sambrisko if (strcmp(opt1, opt2) == 0) 210108401Sambrisko return (1); 211108401Sambrisko /* "noopt" vs. "opt" */ 212108401Sambrisko if (strncmp(opt1, "no", 2) == 0 && strcmp(opt1 + 2, opt2) == 0) 213108401Sambrisko return (1); 214108401Sambrisko /* "opt" vs. "noopt" */ 215108401Sambrisko if (strncmp(opt2, "no", 2) == 0 && strcmp(opt1, opt2 + 2) == 0) 216108401Sambrisko return (1); 217108401Sambrisko return (0); 218108401Sambrisko} 219108401Sambrisko 220108401Sambrisko/* 221108401Sambrisko * If a mount option is specified several times, 222108401Sambrisko * (with or without the "no" prefix) only keep 223108401Sambrisko * the last occurence of it. 224108401Sambrisko */ 225108401Sambriskostatic void 226108401Sambriskovfs_sanitizeopts(struct vfsoptlist *opts) 227108401Sambrisko{ 228108401Sambrisko struct vfsopt *opt, *opt2, *tmp; 229108401Sambrisko 23055992Swpaul TAILQ_FOREACH_REVERSE(opt, opts, vfsoptlist, link) { 23155992Swpaul opt2 = TAILQ_PREV(opt, vfsoptlist, link); 23255992Swpaul while (opt2 != NULL) { 23355992Swpaul if (vfs_equalopts(opt->name, opt2->name)) { 23455992Swpaul tmp = TAILQ_PREV(opt2, vfsoptlist, link); 23583270Sbrooks vfs_freeopt(opts, opt2); 23655992Swpaul opt2 = tmp; 23755992Swpaul } else { 23855992Swpaul opt2 = TAILQ_PREV(opt2, vfsoptlist, link); 23955992Swpaul } 24055992Swpaul } 24155992Swpaul } 24267096Swpaul} 24355992Swpaul 24455992Swpaul/* 24555992Swpaul * Build a linked list of mount options from a struct uio. 246108401Sambrisko */ 247108401Sambriskostatic int 24855992Swpaulvfs_buildopts(struct uio *auio, struct vfsoptlist **options) 24955992Swpaul{ 25055992Swpaul struct vfsoptlist *opts; 25183270Sbrooks struct vfsopt *opt; 25255992Swpaul size_t memused; 25355992Swpaul unsigned int i, iovcnt; 25455992Swpaul int error, namelen, optlen; 25555992Swpaul 25655992Swpaul opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK); 25755992Swpaul TAILQ_INIT(opts); 25877217Sphk memused = 0; 259106937Ssam iovcnt = auio->uio_iovcnt; 26055992Swpaul for (i = 0; i < iovcnt; i += 2) { 26155992Swpaul opt = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK); 26255992Swpaul namelen = auio->uio_iov[i].iov_len; 26355992Swpaul optlen = auio->uio_iov[i + 1].iov_len; 26455992Swpaul opt->name = malloc(namelen, M_MOUNT, M_WAITOK); 26555992Swpaul opt->value = NULL; 266110362Sambrisko opt->len = 0; 267110362Sambrisko 268110362Sambrisko /* 269110362Sambrisko * Do this early, so jumps to "bad" will free the current 270110362Sambrisko * option. 271110362Sambrisko */ 272110362Sambrisko TAILQ_INSERT_TAIL(opts, opt, link); 273110362Sambrisko memused += sizeof(struct vfsopt) + optlen + namelen; 274110362Sambrisko 275110362Sambrisko /* 276110362Sambrisko * Avoid consuming too much memory, and attempts to overflow 277110362Sambrisko * memused. 278110362Sambrisko */ 279110362Sambrisko if (memused > VFS_MOUNTARG_SIZE_MAX || 280110362Sambrisko optlen > VFS_MOUNTARG_SIZE_MAX || 281110362Sambrisko namelen > VFS_MOUNTARG_SIZE_MAX) { 28255992Swpaul error = EINVAL; 28355992Swpaul goto bad; 28455992Swpaul } 28555992Swpaul 28655992Swpaul if (auio->uio_segflg == UIO_SYSSPACE) { 28755992Swpaul bcopy(auio->uio_iov[i].iov_base, opt->name, namelen); 288110362Sambrisko } else { 289110362Sambrisko error = copyin(auio->uio_iov[i].iov_base, opt->name, 29055992Swpaul namelen); 29155992Swpaul if (error) 29255992Swpaul goto bad; 29355992Swpaul } 29455992Swpaul /* Ensure names are null-terminated strings. */ 29555992Swpaul if (opt->name[namelen - 1] != '\0') { 29655992Swpaul error = EINVAL; 29755992Swpaul goto bad; 29855992Swpaul } 29955992Swpaul if (optlen != 0) { 30055992Swpaul opt->len = optlen; 30155992Swpaul opt->value = malloc(optlen, M_MOUNT, M_WAITOK); 302 if (auio->uio_segflg == UIO_SYSSPACE) { 303 bcopy(auio->uio_iov[i + 1].iov_base, opt->value, 304 optlen); 305 } else { 306 error = copyin(auio->uio_iov[i + 1].iov_base, 307 opt->value, optlen); 308 if (error) 309 goto bad; 310 } 311 } 312 } 313 vfs_sanitizeopts(opts); 314 *options = opts; 315 return (0); 316bad: 317 vfs_freeopts(opts); 318 return (error); 319} 320 321/* 322 * Merge the old mount options with the new ones passed 323 * in the MNT_UPDATE case. 324 * 325 * XXX This function will keep a "nofoo" option in the 326 * new options if there is no matching "foo" option 327 * to be cancelled in the old options. This is a bug 328 * if the option's canonical name is "foo". E.g., "noro" 329 * shouldn't end up in the mount point's active options, 330 * but it can. 331 */ 332static void 333vfs_mergeopts(struct vfsoptlist *toopts, struct vfsoptlist *opts) 334{ 335 struct vfsopt *opt, *opt2, *new; 336 337 TAILQ_FOREACH(opt, opts, link) { 338 /* 339 * Check that this option hasn't been redefined 340 * nor cancelled with a "no" mount option. 341 */ 342 opt2 = TAILQ_FIRST(toopts); 343 while (opt2 != NULL) { 344 if (strcmp(opt2->name, opt->name) == 0) 345 goto next; 346 if (strncmp(opt2->name, "no", 2) == 0 && 347 strcmp(opt2->name + 2, opt->name) == 0) { 348 vfs_freeopt(toopts, opt2); 349 goto next; 350 } 351 opt2 = TAILQ_NEXT(opt2, link); 352 } 353 /* We want this option, duplicate it. */ 354 new = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK); 355 new->name = malloc(strlen(opt->name) + 1, M_MOUNT, M_WAITOK); 356 strcpy(new->name, opt->name); 357 if (opt->len != 0) { 358 new->value = malloc(opt->len, M_MOUNT, M_WAITOK); 359 bcopy(opt->value, new->value, opt->len); 360 } else { 361 new->value = NULL; 362 } 363 new->len = opt->len; 364 TAILQ_INSERT_TAIL(toopts, new, link); 365next: 366 continue; 367 } 368} 369 370/* 371 * Mount a filesystem. 372 */ 373int 374nmount(td, uap) 375 struct thread *td; 376 struct nmount_args /* { 377 struct iovec *iovp; 378 unsigned int iovcnt; 379 int flags; 380 } */ *uap; 381{ 382 struct uio *auio; 383 struct iovec *iov; 384 unsigned int i; 385 int error; 386 u_int iovcnt; 387 388 AUDIT_ARG(fflags, uap->flags); 389 390 /* 391 * Filter out MNT_ROOTFS. We do not want clients of nmount() in 392 * userspace to set this flag, but we must filter it out if we want 393 * MNT_UPDATE on the root file system to work. 394 * MNT_ROOTFS should only be set in the kernel in vfs_mountroot_try(). 395 */ 396 uap->flags &= ~MNT_ROOTFS; 397 398 iovcnt = uap->iovcnt; 399 /* 400 * Check that we have an even number of iovec's 401 * and that we have at least two options. 402 */ 403 if ((iovcnt & 1) || (iovcnt < 4)) 404 return (EINVAL); 405 406 error = copyinuio(uap->iovp, iovcnt, &auio); 407 if (error) 408 return (error); 409 iov = auio->uio_iov; 410 for (i = 0; i < iovcnt; i++) { 411 if (iov->iov_len > MMAXOPTIONLEN) { 412 free(auio, M_IOV); 413 return (EINVAL); 414 } 415 iov++; 416 } 417 error = vfs_donmount(td, uap->flags, auio); 418 419 free(auio, M_IOV); 420 return (error); 421} 422 423/* 424 * --------------------------------------------------------------------- 425 * Various utility functions 426 */ 427 428void 429vfs_ref(struct mount *mp) 430{ 431 432 MNT_ILOCK(mp); 433 MNT_REF(mp); 434 MNT_IUNLOCK(mp); 435} 436 437void 438vfs_rel(struct mount *mp) 439{ 440 441 MNT_ILOCK(mp); 442 MNT_REL(mp); 443 MNT_IUNLOCK(mp); 444} 445 446static int 447mount_init(void *mem, int size, int flags) 448{ 449 struct mount *mp; 450 451 mp = (struct mount *)mem; 452 mtx_init(&mp->mnt_mtx, "struct mount mtx", NULL, MTX_DEF); 453 lockinit(&mp->mnt_explock, PVFS, "explock", 0, 0); 454 return (0); 455} 456 457static void 458mount_fini(void *mem, int size) 459{ 460 struct mount *mp; 461 462 mp = (struct mount *)mem; 463 lockdestroy(&mp->mnt_explock); 464 mtx_destroy(&mp->mnt_mtx); 465} 466 467/* 468 * Allocate and initialize the mount point struct. 469 */ 470struct mount * 471vfs_mount_alloc(struct vnode *vp, struct vfsconf *vfsp, const char *fspath, 472 struct ucred *cred) 473{ 474 struct mount *mp; 475 476 mp = uma_zalloc(mount_zone, M_WAITOK); 477 bzero(&mp->mnt_startzero, 478 __rangeof(struct mount, mnt_startzero, mnt_endzero)); 479 TAILQ_INIT(&mp->mnt_nvnodelist); 480 mp->mnt_nvnodelistsize = 0; 481 mp->mnt_ref = 0; 482 (void) vfs_busy(mp, MBF_NOWAIT); 483 mp->mnt_op = vfsp->vfc_vfsops; 484 mp->mnt_vfc = vfsp; 485 vfsp->vfc_refcount++; /* XXX Unlocked */ 486 mp->mnt_stat.f_type = vfsp->vfc_typenum; 487 mp->mnt_gen++; 488 strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 489 mp->mnt_vnodecovered = vp; 490 mp->mnt_cred = crdup(cred); 491 mp->mnt_stat.f_owner = cred->cr_uid; 492 strlcpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN); 493 mp->mnt_iosize_max = DFLTPHYS; 494#ifdef MAC 495 mac_mount_init(mp); 496 mac_mount_create(cred, mp); 497#endif 498 arc4rand(&mp->mnt_hashseed, sizeof mp->mnt_hashseed, 0); 499 return (mp); 500} 501 502/* 503 * Destroy the mount struct previously allocated by vfs_mount_alloc(). 504 */ 505void 506vfs_mount_destroy(struct mount *mp) 507{ 508 509 MNT_ILOCK(mp); 510 while (mp->mnt_ref) 511 msleep(mp, MNT_MTX(mp), PVFS, "mntref", 0); 512 if (mp->mnt_holdcnt != 0) { 513 printf("Waiting for mount point to be unheld\n"); 514 while (mp->mnt_holdcnt != 0) { 515 mp->mnt_holdcntwaiters++; 516 msleep(&mp->mnt_holdcnt, MNT_MTX(mp), 517 PZERO, "mntdestroy", 0); 518 mp->mnt_holdcntwaiters--; 519 } 520 printf("mount point unheld\n"); 521 } 522 if (mp->mnt_writeopcount > 0) { 523 printf("Waiting for mount point write ops\n"); 524 while (mp->mnt_writeopcount > 0) { 525 mp->mnt_kern_flag |= MNTK_SUSPEND; 526 msleep(&mp->mnt_writeopcount, 527 MNT_MTX(mp), 528 PZERO, "mntdestroy2", 0); 529 } 530 printf("mount point write ops completed\n"); 531 } 532 if (mp->mnt_secondary_writes > 0) { 533 printf("Waiting for mount point secondary write ops\n"); 534 while (mp->mnt_secondary_writes > 0) { 535 mp->mnt_kern_flag |= MNTK_SUSPEND; 536 msleep(&mp->mnt_secondary_writes, 537 MNT_MTX(mp), 538 PZERO, "mntdestroy3", 0); 539 } 540 printf("mount point secondary write ops completed\n"); 541 } 542 MNT_IUNLOCK(mp); 543 mp->mnt_vfc->vfc_refcount--; 544 if (!TAILQ_EMPTY(&mp->mnt_nvnodelist)) { 545 struct vnode *vp; 546 547 TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) 548 vprint("", vp); 549 panic("unmount: dangling vnode"); 550 } 551 MNT_ILOCK(mp); 552 if (mp->mnt_kern_flag & MNTK_MWAIT) 553 wakeup(mp); 554 if (mp->mnt_writeopcount != 0) 555 panic("vfs_mount_destroy: nonzero writeopcount"); 556 if (mp->mnt_secondary_writes != 0) 557 panic("vfs_mount_destroy: nonzero secondary_writes"); 558 if (mp->mnt_nvnodelistsize != 0) 559 panic("vfs_mount_destroy: nonzero nvnodelistsize"); 560 mp->mnt_writeopcount = -1000; 561 mp->mnt_nvnodelistsize = -1000; 562 mp->mnt_secondary_writes = -1000; 563 MNT_IUNLOCK(mp); 564#ifdef MAC 565 mac_mount_destroy(mp); 566#endif 567 if (mp->mnt_opt != NULL) 568 vfs_freeopts(mp->mnt_opt); 569 crfree(mp->mnt_cred); 570 uma_zfree(mount_zone, mp); 571} 572 573int 574vfs_donmount(struct thread *td, int fsflags, struct uio *fsoptions) 575{ 576 struct vfsoptlist *optlist; 577 struct vfsopt *opt, *noro_opt, *tmp_opt; 578 char *fstype, *fspath, *errmsg; 579 int error, fstypelen, fspathlen, errmsg_len, errmsg_pos; 580 int has_rw, has_noro; 581 582 errmsg = fspath = NULL; 583 errmsg_len = has_noro = has_rw = fspathlen = 0; 584 errmsg_pos = -1; 585 586 error = vfs_buildopts(fsoptions, &optlist); 587 if (error) 588 return (error); 589 590 if (vfs_getopt(optlist, "errmsg", (void **)&errmsg, &errmsg_len) == 0) 591 errmsg_pos = vfs_getopt_pos(optlist, "errmsg"); 592 593 /* 594 * We need these two options before the others, 595 * and they are mandatory for any filesystem. 596 * Ensure they are NUL terminated as well. 597 */ 598 fstypelen = 0; 599 error = vfs_getopt(optlist, "fstype", (void **)&fstype, &fstypelen); 600 if (error || fstype[fstypelen - 1] != '\0') { 601 error = EINVAL; 602 if (errmsg != NULL) 603 strncpy(errmsg, "Invalid fstype", errmsg_len); 604 goto bail; 605 } 606 fspathlen = 0; 607 error = vfs_getopt(optlist, "fspath", (void **)&fspath, &fspathlen); 608 if (error || fspath[fspathlen - 1] != '\0') { 609 error = EINVAL; 610 if (errmsg != NULL) 611 strncpy(errmsg, "Invalid fspath", errmsg_len); 612 goto bail; 613 } 614 615 /* 616 * We need to see if we have the "update" option 617 * before we call vfs_domount(), since vfs_domount() has special 618 * logic based on MNT_UPDATE. This is very important 619 * when we want to update the root filesystem. 620 */ 621 TAILQ_FOREACH_SAFE(opt, optlist, link, tmp_opt) { 622 if (strcmp(opt->name, "update") == 0) { 623 fsflags |= MNT_UPDATE; 624 vfs_freeopt(optlist, opt); 625 } 626 else if (strcmp(opt->name, "async") == 0) 627 fsflags |= MNT_ASYNC; 628 else if (strcmp(opt->name, "force") == 0) { 629 fsflags |= MNT_FORCE; 630 vfs_freeopt(optlist, opt); 631 } 632 else if (strcmp(opt->name, "reload") == 0) { 633 fsflags |= MNT_RELOAD; 634 vfs_freeopt(optlist, opt); 635 } 636 else if (strcmp(opt->name, "multilabel") == 0) 637 fsflags |= MNT_MULTILABEL; 638 else if (strcmp(opt->name, "noasync") == 0) 639 fsflags &= ~MNT_ASYNC; 640 else if (strcmp(opt->name, "noatime") == 0) 641 fsflags |= MNT_NOATIME; 642 else if (strcmp(opt->name, "atime") == 0) { 643 free(opt->name, M_MOUNT); 644 opt->name = strdup("nonoatime", M_MOUNT); 645 } 646 else if (strcmp(opt->name, "noclusterr") == 0) 647 fsflags |= MNT_NOCLUSTERR; 648 else if (strcmp(opt->name, "clusterr") == 0) { 649 free(opt->name, M_MOUNT); 650 opt->name = strdup("nonoclusterr", M_MOUNT); 651 } 652 else if (strcmp(opt->name, "noclusterw") == 0) 653 fsflags |= MNT_NOCLUSTERW; 654 else if (strcmp(opt->name, "clusterw") == 0) { 655 free(opt->name, M_MOUNT); 656 opt->name = strdup("nonoclusterw", M_MOUNT); 657 } 658 else if (strcmp(opt->name, "noexec") == 0) 659 fsflags |= MNT_NOEXEC; 660 else if (strcmp(opt->name, "exec") == 0) { 661 free(opt->name, M_MOUNT); 662 opt->name = strdup("nonoexec", M_MOUNT); 663 } 664 else if (strcmp(opt->name, "nosuid") == 0) 665 fsflags |= MNT_NOSUID; 666 else if (strcmp(opt->name, "suid") == 0) { 667 free(opt->name, M_MOUNT); 668 opt->name = strdup("nonosuid", M_MOUNT); 669 } 670 else if (strcmp(opt->name, "nosymfollow") == 0) 671 fsflags |= MNT_NOSYMFOLLOW; 672 else if (strcmp(opt->name, "symfollow") == 0) { 673 free(opt->name, M_MOUNT); 674 opt->name = strdup("nonosymfollow", M_MOUNT); 675 } 676 else if (strcmp(opt->name, "noro") == 0) { 677 fsflags &= ~MNT_RDONLY; 678 has_noro = 1; 679 } 680 else if (strcmp(opt->name, "rw") == 0) { 681 fsflags &= ~MNT_RDONLY; 682 has_rw = 1; 683 } 684 else if (strcmp(opt->name, "ro") == 0) 685 fsflags |= MNT_RDONLY; 686 else if (strcmp(opt->name, "rdonly") == 0) { 687 free(opt->name, M_MOUNT); 688 opt->name = strdup("ro", M_MOUNT); 689 fsflags |= MNT_RDONLY; 690 } 691 else if (strcmp(opt->name, "suiddir") == 0) 692 fsflags |= MNT_SUIDDIR; 693 else if (strcmp(opt->name, "sync") == 0) 694 fsflags |= MNT_SYNCHRONOUS; 695 else if (strcmp(opt->name, "union") == 0) 696 fsflags |= MNT_UNION; 697 } 698 699 /* 700 * If "rw" was specified as a mount option, and we 701 * are trying to update a mount-point from "ro" to "rw", 702 * we need a mount option "noro", since in vfs_mergeopts(), 703 * "noro" will cancel "ro", but "rw" will not do anything. 704 */ 705 if (has_rw && !has_noro) { 706 noro_opt = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK); 707 noro_opt->name = strdup("noro", M_MOUNT); 708 noro_opt->value = NULL; 709 noro_opt->len = 0; 710 TAILQ_INSERT_TAIL(optlist, noro_opt, link); 711 } 712 713 /* 714 * Be ultra-paranoid about making sure the type and fspath 715 * variables will fit in our mp buffers, including the 716 * terminating NUL. 717 */ 718 if (fstypelen >= MFSNAMELEN - 1 || fspathlen >= MNAMELEN - 1) { 719 error = ENAMETOOLONG; 720 goto bail; 721 } 722 723 mtx_lock(&Giant); 724 error = vfs_domount(td, fstype, fspath, fsflags, optlist); 725 mtx_unlock(&Giant); 726bail: 727 /* copyout the errmsg */ 728 if (errmsg_pos != -1 && ((2 * errmsg_pos + 1) < fsoptions->uio_iovcnt) 729 && errmsg_len > 0 && errmsg != NULL) { 730 if (fsoptions->uio_segflg == UIO_SYSSPACE) { 731 bcopy(errmsg, 732 fsoptions->uio_iov[2 * errmsg_pos + 1].iov_base, 733 fsoptions->uio_iov[2 * errmsg_pos + 1].iov_len); 734 } else { 735 copyout(errmsg, 736 fsoptions->uio_iov[2 * errmsg_pos + 1].iov_base, 737 fsoptions->uio_iov[2 * errmsg_pos + 1].iov_len); 738 } 739 } 740 741 if (error != 0) 742 vfs_freeopts(optlist); 743 return (error); 744} 745 746/* 747 * Old mount API. 748 */ 749#ifndef _SYS_SYSPROTO_H_ 750struct mount_args { 751 char *type; 752 char *path; 753 int flags; 754 caddr_t data; 755}; 756#endif 757/* ARGSUSED */ 758int 759mount(td, uap) 760 struct thread *td; 761 struct mount_args /* { 762 char *type; 763 char *path; 764 int flags; 765 caddr_t data; 766 } */ *uap; 767{ 768 char *fstype; 769 struct vfsconf *vfsp = NULL; 770 struct mntarg *ma = NULL; 771 int error; 772 773 AUDIT_ARG(fflags, uap->flags); 774 775 /* 776 * Filter out MNT_ROOTFS. We do not want clients of mount() in 777 * userspace to set this flag, but we must filter it out if we want 778 * MNT_UPDATE on the root file system to work. 779 * MNT_ROOTFS should only be set in the kernel in vfs_mountroot_try(). 780 */ 781 uap->flags &= ~MNT_ROOTFS; 782 783 fstype = malloc(MFSNAMELEN, M_TEMP, M_WAITOK); 784 error = copyinstr(uap->type, fstype, MFSNAMELEN, NULL); 785 if (error) { 786 free(fstype, M_TEMP); 787 return (error); 788 } 789 790 AUDIT_ARG(text, fstype); 791 mtx_lock(&Giant); 792 vfsp = vfs_byname_kld(fstype, td, &error); 793 free(fstype, M_TEMP); 794 if (vfsp == NULL) { 795 mtx_unlock(&Giant); 796 return (ENOENT); 797 } 798 if (vfsp->vfc_vfsops->vfs_cmount == NULL) { 799 mtx_unlock(&Giant); 800 return (EOPNOTSUPP); 801 } 802 803 ma = mount_argsu(ma, "fstype", uap->type, MNAMELEN); 804 ma = mount_argsu(ma, "fspath", uap->path, MNAMELEN); 805 ma = mount_argb(ma, uap->flags & MNT_RDONLY, "noro"); 806 ma = mount_argb(ma, !(uap->flags & MNT_NOSUID), "nosuid"); 807 ma = mount_argb(ma, !(uap->flags & MNT_NOEXEC), "noexec"); 808 809 error = vfsp->vfc_vfsops->vfs_cmount(ma, uap->data, uap->flags, td); 810 mtx_unlock(&Giant); 811 return (error); 812} 813 814 815/* 816 * vfs_domount(): actually attempt a filesystem mount. 817 */ 818static int 819vfs_domount( 820 struct thread *td, /* Calling thread. */ 821 const char *fstype, /* Filesystem type. */ 822 char *fspath, /* Mount path. */ 823 int fsflags, /* Flags common to all filesystems. */ 824 void *fsdata /* Options local to the filesystem. */ 825 ) 826{ 827 struct vnode *vp; 828 struct mount *mp; 829 struct vfsconf *vfsp; 830 struct oexport_args oexport; 831 struct export_args export; 832 int error, flag = 0; 833 struct vattr va; 834 struct nameidata nd; 835 836 mtx_assert(&Giant, MA_OWNED); 837 /* 838 * Be ultra-paranoid about making sure the type and fspath 839 * variables will fit in our mp buffers, including the 840 * terminating NUL. 841 */ 842 if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN) 843 return (ENAMETOOLONG); 844 845 if (jailed(td->td_ucred) || usermount == 0) { 846 if ((error = priv_check(td, PRIV_VFS_MOUNT)) != 0) 847 return (error); 848 } 849 850 /* 851 * Do not allow NFS export or MNT_SUIDDIR by unprivileged users. 852 */ 853 if (fsflags & MNT_EXPORTED) { 854 error = priv_check(td, PRIV_VFS_MOUNT_EXPORTED); 855 if (error) 856 return (error); 857 } 858 if (fsflags & MNT_SUIDDIR) { 859 error = priv_check(td, PRIV_VFS_MOUNT_SUIDDIR); 860 if (error) 861 return (error); 862 } 863 /* 864 * Silently enforce MNT_NOSUID and MNT_USER for unprivileged users. 865 */ 866 if ((fsflags & (MNT_NOSUID | MNT_USER)) != (MNT_NOSUID | MNT_USER)) { 867 if (priv_check(td, PRIV_VFS_MOUNT_NONUSER) != 0) 868 fsflags |= MNT_NOSUID | MNT_USER; 869 } 870 871 /* Load KLDs before we lock the covered vnode to avoid reversals. */ 872 vfsp = NULL; 873 if ((fsflags & MNT_UPDATE) == 0) { 874 /* Don't try to load KLDs if we're mounting the root. */ 875 if (fsflags & MNT_ROOTFS) 876 vfsp = vfs_byname(fstype); 877 else 878 vfsp = vfs_byname_kld(fstype, td, &error); 879 if (vfsp == NULL) 880 return (ENODEV); 881 if (jailed(td->td_ucred) && !(vfsp->vfc_flags & VFCF_JAIL)) 882 return (EPERM); 883 } 884 /* 885 * Get vnode to be covered 886 */ 887 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_SYSSPACE, 888 fspath, td); 889 if ((error = namei(&nd)) != 0) 890 return (error); 891 NDFREE(&nd, NDF_ONLY_PNBUF); 892 vp = nd.ni_vp; 893 if (fsflags & MNT_UPDATE) { 894 if ((vp->v_vflag & VV_ROOT) == 0) { 895 vput(vp); 896 return (EINVAL); 897 } 898 mp = vp->v_mount; 899 MNT_ILOCK(mp); 900 flag = mp->mnt_flag; 901 /* 902 * We only allow the filesystem to be reloaded if it 903 * is currently mounted read-only. 904 */ 905 if ((fsflags & MNT_RELOAD) && 906 ((mp->mnt_flag & MNT_RDONLY) == 0)) { 907 MNT_IUNLOCK(mp); 908 vput(vp); 909 return (EOPNOTSUPP); /* Needs translation */ 910 } 911 MNT_IUNLOCK(mp); 912 /* 913 * Only privileged root, or (if MNT_USER is set) the user that 914 * did the original mount is permitted to update it. 915 */ 916 error = vfs_suser(mp, td); 917 if (error) { 918 vput(vp); 919 return (error); 920 } 921 if (vfs_busy(mp, MBF_NOWAIT)) { 922 vput(vp); 923 return (EBUSY); 924 } 925 VI_LOCK(vp); 926 if ((vp->v_iflag & VI_MOUNT) != 0 || 927 vp->v_mountedhere != NULL) { 928 VI_UNLOCK(vp); 929 vfs_unbusy(mp); 930 vput(vp); 931 return (EBUSY); 932 } 933 vp->v_iflag |= VI_MOUNT; 934 VI_UNLOCK(vp); 935 MNT_ILOCK(mp); 936 mp->mnt_flag |= fsflags & 937 (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT | MNT_ROOTFS); 938 MNT_IUNLOCK(mp); 939 VOP_UNLOCK(vp, 0); 940 mp->mnt_optnew = fsdata; 941 vfs_mergeopts(mp->mnt_optnew, mp->mnt_opt); 942 } else { 943 /* 944 * If the user is not root, ensure that they own the directory 945 * onto which we are attempting to mount. 946 */ 947 error = VOP_GETATTR(vp, &va, td->td_ucred); 948 if (error) { 949 vput(vp); 950 return (error); 951 } 952 if (va.va_uid != td->td_ucred->cr_uid) { 953 error = priv_check_cred(td->td_ucred, PRIV_VFS_ADMIN, 954 0); 955 if (error) { 956 vput(vp); 957 return (error); 958 } 959 } 960 error = vinvalbuf(vp, V_SAVE, 0, 0); 961 if (error != 0) { 962 vput(vp); 963 return (error); 964 } 965 if (vp->v_type != VDIR) { 966 vput(vp); 967 return (ENOTDIR); 968 } 969 VI_LOCK(vp); 970 if ((vp->v_iflag & VI_MOUNT) != 0 || 971 vp->v_mountedhere != NULL) { 972 VI_UNLOCK(vp); 973 vput(vp); 974 return (EBUSY); 975 } 976 vp->v_iflag |= VI_MOUNT; 977 VI_UNLOCK(vp); 978 979 /* 980 * Allocate and initialize the filesystem. 981 */ 982 mp = vfs_mount_alloc(vp, vfsp, fspath, td->td_ucred); 983 VOP_UNLOCK(vp, 0); 984 985 /* XXXMAC: pass to vfs_mount_alloc? */ 986 mp->mnt_optnew = fsdata; 987 } 988 989 /* 990 * Set the mount level flags. 991 */ 992 MNT_ILOCK(mp); 993 mp->mnt_flag = (mp->mnt_flag & ~MNT_UPDATEMASK) | 994 (fsflags & (MNT_UPDATEMASK | MNT_FORCE | MNT_ROOTFS | 995 MNT_RDONLY)); 996 if ((mp->mnt_flag & MNT_ASYNC) == 0) 997 mp->mnt_kern_flag &= ~MNTK_ASYNC; 998 MNT_IUNLOCK(mp); 999 /* 1000 * Mount the filesystem. 1001 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they 1002 * get. No freeing of cn_pnbuf. 1003 */ 1004 error = VFS_MOUNT(mp, td); 1005 1006 /* 1007 * Process the export option only if we are 1008 * updating mount options. 1009 */ 1010 if (!error && (fsflags & MNT_UPDATE)) { 1011 if (vfs_copyopt(mp->mnt_optnew, "export", &export, 1012 sizeof(export)) == 0) 1013 error = vfs_export(mp, &export); 1014 else if (vfs_copyopt(mp->mnt_optnew, "export", &oexport, 1015 sizeof(oexport)) == 0) { 1016 export.ex_flags = oexport.ex_flags; 1017 export.ex_root = oexport.ex_root; 1018 export.ex_anon = oexport.ex_anon; 1019 export.ex_addr = oexport.ex_addr; 1020 export.ex_addrlen = oexport.ex_addrlen; 1021 export.ex_mask = oexport.ex_mask; 1022 export.ex_masklen = oexport.ex_masklen; 1023 export.ex_indexfile = oexport.ex_indexfile; 1024 export.ex_numsecflavors = 0; 1025 error = vfs_export(mp, &export); 1026 } 1027 } 1028 1029 if (!error) { 1030 if (mp->mnt_opt != NULL) 1031 vfs_freeopts(mp->mnt_opt); 1032 mp->mnt_opt = mp->mnt_optnew; 1033 (void)VFS_STATFS(mp, &mp->mnt_stat, td); 1034 } 1035 /* 1036 * Prevent external consumers of mount options from reading 1037 * mnt_optnew. 1038 */ 1039 mp->mnt_optnew = NULL; 1040 if (mp->mnt_flag & MNT_UPDATE) { 1041 MNT_ILOCK(mp); 1042 if (error) 1043 mp->mnt_flag = (mp->mnt_flag & MNT_QUOTA) | 1044 (flag & ~MNT_QUOTA); 1045 else 1046 mp->mnt_flag &= ~(MNT_UPDATE | MNT_RELOAD | 1047 MNT_FORCE | MNT_SNAPSHOT); 1048 if ((mp->mnt_flag & MNT_ASYNC) != 0 && mp->mnt_noasync == 0) 1049 mp->mnt_kern_flag |= MNTK_ASYNC; 1050 else 1051 mp->mnt_kern_flag &= ~MNTK_ASYNC; 1052 MNT_IUNLOCK(mp); 1053 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 1054 if (mp->mnt_syncer == NULL) 1055 error = vfs_allocate_syncvnode(mp); 1056 } else { 1057 if (mp->mnt_syncer != NULL) 1058 vrele(mp->mnt_syncer); 1059 mp->mnt_syncer = NULL; 1060 } 1061 vfs_unbusy(mp); 1062 VI_LOCK(vp); 1063 vp->v_iflag &= ~VI_MOUNT; 1064 VI_UNLOCK(vp); 1065 vrele(vp); 1066 return (error); 1067 } 1068 MNT_ILOCK(mp); 1069 if ((mp->mnt_flag & MNT_ASYNC) != 0 && mp->mnt_noasync == 0) 1070 mp->mnt_kern_flag |= MNTK_ASYNC; 1071 else 1072 mp->mnt_kern_flag &= ~MNTK_ASYNC; 1073 MNT_IUNLOCK(mp); 1074 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1075 /* 1076 * Put the new filesystem on the mount list after root. 1077 */ 1078 cache_purge(vp); 1079 if (!error) { 1080 struct vnode *newdp; 1081 1082 VI_LOCK(vp); 1083 vp->v_iflag &= ~VI_MOUNT; 1084 VI_UNLOCK(vp); 1085 vp->v_mountedhere = mp; 1086 mtx_lock(&mountlist_mtx); 1087 TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list); 1088 mtx_unlock(&mountlist_mtx); 1089 vfs_event_signal(NULL, VQ_MOUNT, 0); 1090 if (VFS_ROOT(mp, LK_EXCLUSIVE, &newdp, td)) 1091 panic("mount: lost mount"); 1092 mountcheckdirs(vp, newdp); 1093 vput(newdp); 1094 VOP_UNLOCK(vp, 0); 1095 if ((mp->mnt_flag & MNT_RDONLY) == 0) 1096 error = vfs_allocate_syncvnode(mp); 1097 vfs_unbusy(mp); 1098 if (error) 1099 vrele(vp); 1100 } else { 1101 VI_LOCK(vp); 1102 vp->v_iflag &= ~VI_MOUNT; 1103 VI_UNLOCK(vp); 1104 vfs_unbusy(mp); 1105 vfs_mount_destroy(mp); 1106 vput(vp); 1107 } 1108 return (error); 1109} 1110 1111/* 1112 * Unmount a filesystem. 1113 * 1114 * Note: unmount takes a path to the vnode mounted on as argument, not 1115 * special file (as before). 1116 */ 1117#ifndef _SYS_SYSPROTO_H_ 1118struct unmount_args { 1119 char *path; 1120 int flags; 1121}; 1122#endif 1123/* ARGSUSED */ 1124int 1125unmount(td, uap) 1126 struct thread *td; 1127 register struct unmount_args /* { 1128 char *path; 1129 int flags; 1130 } */ *uap; 1131{ 1132 struct mount *mp; 1133 char *pathbuf; 1134 int error, id0, id1; 1135 1136 if (jailed(td->td_ucred) || usermount == 0) { 1137 error = priv_check(td, PRIV_VFS_UNMOUNT); 1138 if (error) 1139 return (error); 1140 } 1141 1142 pathbuf = malloc(MNAMELEN, M_TEMP, M_WAITOK); 1143 error = copyinstr(uap->path, pathbuf, MNAMELEN, NULL); 1144 if (error) { 1145 free(pathbuf, M_TEMP); 1146 return (error); 1147 } 1148 AUDIT_ARG(upath, td, pathbuf, ARG_UPATH1); 1149 mtx_lock(&Giant); 1150 if (uap->flags & MNT_BYFSID) { 1151 /* Decode the filesystem ID. */ 1152 if (sscanf(pathbuf, "FSID:%d:%d", &id0, &id1) != 2) { 1153 mtx_unlock(&Giant); 1154 free(pathbuf, M_TEMP); 1155 return (EINVAL); 1156 } 1157 1158 mtx_lock(&mountlist_mtx); 1159 TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) { 1160 if (mp->mnt_stat.f_fsid.val[0] == id0 && 1161 mp->mnt_stat.f_fsid.val[1] == id1) 1162 break; 1163 } 1164 mtx_unlock(&mountlist_mtx); 1165 } else { 1166 mtx_lock(&mountlist_mtx); 1167 TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) { 1168 if (strcmp(mp->mnt_stat.f_mntonname, pathbuf) == 0) 1169 break; 1170 } 1171 mtx_unlock(&mountlist_mtx); 1172 } 1173 free(pathbuf, M_TEMP); 1174 if (mp == NULL) { 1175 /* 1176 * Previously we returned ENOENT for a nonexistent path and 1177 * EINVAL for a non-mountpoint. We cannot tell these apart 1178 * now, so in the !MNT_BYFSID case return the more likely 1179 * EINVAL for compatibility. 1180 */ 1181 mtx_unlock(&Giant); 1182 return ((uap->flags & MNT_BYFSID) ? ENOENT : EINVAL); 1183 } 1184 1185 /* 1186 * Don't allow unmounting the root filesystem. 1187 */ 1188 if (mp->mnt_flag & MNT_ROOTFS) { 1189 mtx_unlock(&Giant); 1190 return (EINVAL); 1191 } 1192 error = dounmount(mp, uap->flags, td); 1193 mtx_unlock(&Giant); 1194 return (error); 1195} 1196 1197/* 1198 * Do the actual filesystem unmount. 1199 */ 1200int 1201dounmount(mp, flags, td) 1202 struct mount *mp; 1203 int flags; 1204 struct thread *td; 1205{ 1206 struct vnode *coveredvp, *fsrootvp; 1207 int error; 1208 int async_flag; 1209 int mnt_gen_r; 1210 1211 mtx_assert(&Giant, MA_OWNED); 1212 1213 if ((coveredvp = mp->mnt_vnodecovered) != NULL) { 1214 mnt_gen_r = mp->mnt_gen; 1215 VI_LOCK(coveredvp); 1216 vholdl(coveredvp); 1217 vn_lock(coveredvp, LK_EXCLUSIVE | LK_INTERLOCK | LK_RETRY); 1218 vdrop(coveredvp); 1219 /* 1220 * Check for mp being unmounted while waiting for the 1221 * covered vnode lock. 1222 */ 1223 if (coveredvp->v_mountedhere != mp || 1224 coveredvp->v_mountedhere->mnt_gen != mnt_gen_r) { 1225 VOP_UNLOCK(coveredvp, 0); 1226 return (EBUSY); 1227 } 1228 } 1229 /* 1230 * Only privileged root, or (if MNT_USER is set) the user that did the 1231 * original mount is permitted to unmount this filesystem. 1232 */ 1233 error = vfs_suser(mp, td); 1234 if (error) { 1235 if (coveredvp) 1236 VOP_UNLOCK(coveredvp, 0); 1237 return (error); 1238 } 1239 1240 MNT_ILOCK(mp); 1241 if (mp->mnt_kern_flag & MNTK_UNMOUNT) { 1242 MNT_IUNLOCK(mp); 1243 if (coveredvp) 1244 VOP_UNLOCK(coveredvp, 0); 1245 return (EBUSY); 1246 } 1247 mp->mnt_kern_flag |= MNTK_UNMOUNT | MNTK_NOINSMNTQ; 1248 /* Allow filesystems to detect that a forced unmount is in progress. */ 1249 if (flags & MNT_FORCE) 1250 mp->mnt_kern_flag |= MNTK_UNMOUNTF; 1251 error = 0; 1252 if (mp->mnt_lockref) { 1253 if (flags & MNT_FORCE) { 1254 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_NOINSMNTQ | 1255 MNTK_UNMOUNTF); 1256 if (mp->mnt_kern_flag & MNTK_MWAIT) { 1257 mp->mnt_kern_flag &= ~MNTK_MWAIT; 1258 wakeup(mp); 1259 } 1260 MNT_IUNLOCK(mp); 1261 if (coveredvp) 1262 VOP_UNLOCK(coveredvp, 0); 1263 return (EBUSY); 1264 } 1265 mp->mnt_kern_flag |= MNTK_DRAINING; 1266 error = msleep(&mp->mnt_lockref, MNT_MTX(mp), PVFS, 1267 "mount drain", 0); 1268 } 1269 MNT_IUNLOCK(mp); 1270 KASSERT(mp->mnt_lockref == 0, 1271 ("%s: invalid lock refcount in the drain path @ %s:%d", 1272 __func__, __FILE__, __LINE__)); 1273 KASSERT(error == 0, 1274 ("%s: invalid return value for msleep in the drain path @ %s:%d", 1275 __func__, __FILE__, __LINE__)); 1276 vn_start_write(NULL, &mp, V_WAIT); 1277 1278 if (mp->mnt_flag & MNT_EXPUBLIC) 1279 vfs_setpublicfs(NULL, NULL, NULL); 1280 1281 vfs_msync(mp, MNT_WAIT); 1282 MNT_ILOCK(mp); 1283 async_flag = mp->mnt_flag & MNT_ASYNC; 1284 mp->mnt_flag &= ~MNT_ASYNC; 1285 mp->mnt_kern_flag &= ~MNTK_ASYNC; 1286 MNT_IUNLOCK(mp); 1287 cache_purgevfs(mp); /* remove cache entries for this file sys */ 1288 if (mp->mnt_syncer != NULL) 1289 vrele(mp->mnt_syncer); 1290 /* 1291 * For forced unmounts, move process cdir/rdir refs on the fs root 1292 * vnode to the covered vnode. For non-forced unmounts we want 1293 * such references to cause an EBUSY error. 1294 */ 1295 if ((flags & MNT_FORCE) && 1296 VFS_ROOT(mp, LK_EXCLUSIVE, &fsrootvp, td) == 0) { 1297 if (mp->mnt_vnodecovered != NULL) 1298 mountcheckdirs(fsrootvp, mp->mnt_vnodecovered); 1299 if (fsrootvp == rootvnode) { 1300 vrele(rootvnode); 1301 rootvnode = NULL; 1302 } 1303 vput(fsrootvp); 1304 } 1305 if (((mp->mnt_flag & MNT_RDONLY) || 1306 (error = VFS_SYNC(mp, MNT_WAIT, td)) == 0) || 1307 (flags & MNT_FORCE)) { 1308 error = VFS_UNMOUNT(mp, flags, td); 1309 } 1310 vn_finished_write(mp); 1311 /* 1312 * If we failed to flush the dirty blocks for this mount point, 1313 * undo all the cdir/rdir and rootvnode changes we made above. 1314 * Unless we failed to do so because the device is reporting that 1315 * it doesn't exist anymore. 1316 */ 1317 if (error && error != ENXIO) { 1318 if ((flags & MNT_FORCE) && 1319 VFS_ROOT(mp, LK_EXCLUSIVE, &fsrootvp, td) == 0) { 1320 if (mp->mnt_vnodecovered != NULL) 1321 mountcheckdirs(mp->mnt_vnodecovered, fsrootvp); 1322 if (rootvnode == NULL) { 1323 rootvnode = fsrootvp; 1324 vref(rootvnode); 1325 } 1326 vput(fsrootvp); 1327 } 1328 MNT_ILOCK(mp); 1329 mp->mnt_kern_flag &= ~MNTK_NOINSMNTQ; 1330 if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL) { 1331 MNT_IUNLOCK(mp); 1332 (void) vfs_allocate_syncvnode(mp); 1333 MNT_ILOCK(mp); 1334 } 1335 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 1336 mp->mnt_flag |= async_flag; 1337 if ((mp->mnt_flag & MNT_ASYNC) != 0 && mp->mnt_noasync == 0) 1338 mp->mnt_kern_flag |= MNTK_ASYNC; 1339 if (mp->mnt_kern_flag & MNTK_MWAIT) { 1340 mp->mnt_kern_flag &= ~MNTK_MWAIT; 1341 wakeup(mp); 1342 } 1343 MNT_IUNLOCK(mp); 1344 if (coveredvp) 1345 VOP_UNLOCK(coveredvp, 0); 1346 return (error); 1347 } 1348 mtx_lock(&mountlist_mtx); 1349 TAILQ_REMOVE(&mountlist, mp, mnt_list); 1350 mtx_unlock(&mountlist_mtx); 1351 if (coveredvp != NULL) { 1352 coveredvp->v_mountedhere = NULL; 1353 vput(coveredvp); 1354 } 1355 vfs_event_signal(NULL, VQ_UNMOUNT, 0); 1356 vfs_mount_destroy(mp); 1357 return (0); 1358} 1359 1360/* 1361 * --------------------------------------------------------------------- 1362 * Mounting of root filesystem 1363 * 1364 */ 1365 1366struct root_hold_token { 1367 const char *who; 1368 LIST_ENTRY(root_hold_token) list; 1369}; 1370 1371static LIST_HEAD(, root_hold_token) root_holds = 1372 LIST_HEAD_INITIALIZER(&root_holds); 1373 1374static int root_mount_complete; 1375 1376/* 1377 * Hold root mount. 1378 */ 1379struct root_hold_token * 1380root_mount_hold(const char *identifier) 1381{ 1382 struct root_hold_token *h; 1383 1384 h = malloc(sizeof *h, M_DEVBUF, M_ZERO | M_WAITOK); 1385 h->who = identifier; 1386 mtx_lock(&mountlist_mtx); 1387 LIST_INSERT_HEAD(&root_holds, h, list); 1388 mtx_unlock(&mountlist_mtx); 1389 return (h); 1390} 1391 1392/* 1393 * Release root mount. 1394 */ 1395void 1396root_mount_rel(struct root_hold_token *h) 1397{ 1398 1399 mtx_lock(&mountlist_mtx); 1400 LIST_REMOVE(h, list); 1401 wakeup(&root_holds); 1402 mtx_unlock(&mountlist_mtx); 1403 free(h, M_DEVBUF); 1404} 1405 1406/* 1407 * Wait for all subsystems to release root mount. 1408 */ 1409static void 1410root_mount_prepare(void) 1411{ 1412 struct root_hold_token *h; 1413 1414 for (;;) { 1415 DROP_GIANT(); 1416 g_waitidle(); 1417 PICKUP_GIANT(); 1418 mtx_lock(&mountlist_mtx); 1419 if (LIST_EMPTY(&root_holds)) { 1420 mtx_unlock(&mountlist_mtx); 1421 break; 1422 } 1423 printf("Root mount waiting for:"); 1424 LIST_FOREACH(h, &root_holds, list) 1425 printf(" %s", h->who); 1426 printf("\n"); 1427 msleep(&root_holds, &mountlist_mtx, PZERO | PDROP, "roothold", 1428 hz); 1429 } 1430} 1431 1432/* 1433 * Root was mounted, share the good news. 1434 */ 1435static void 1436root_mount_done(void) 1437{ 1438 1439 /* 1440 * Use a mutex to prevent the wakeup being missed and waiting for 1441 * an extra 1 second sleep. 1442 */ 1443 mtx_lock(&mountlist_mtx); 1444 root_mount_complete = 1; 1445 wakeup(&root_mount_complete); 1446 mtx_unlock(&mountlist_mtx); 1447} 1448 1449/* 1450 * Return true if root is already mounted. 1451 */ 1452int 1453root_mounted(void) 1454{ 1455 1456 /* No mutex is acquired here because int stores are atomic. */ 1457 return (root_mount_complete); 1458} 1459 1460/* 1461 * Wait until root is mounted. 1462 */ 1463void 1464root_mount_wait(void) 1465{ 1466 1467 /* 1468 * Panic on an obvious deadlock - the function can't be called from 1469 * a thread which is doing the whole SYSINIT stuff. 1470 */ 1471 KASSERT(curthread->td_proc->p_pid != 0, 1472 ("root_mount_wait: cannot be called from the swapper thread")); 1473 mtx_lock(&mountlist_mtx); 1474 while (!root_mount_complete) { 1475 msleep(&root_mount_complete, &mountlist_mtx, PZERO, "rootwait", 1476 hz); 1477 } 1478 mtx_unlock(&mountlist_mtx); 1479} 1480 1481static void 1482set_rootvnode(struct thread *td) 1483{ 1484 struct proc *p; 1485 1486 if (VFS_ROOT(TAILQ_FIRST(&mountlist), LK_EXCLUSIVE, &rootvnode, td)) 1487 panic("Cannot find root vnode"); 1488 1489 p = td->td_proc; 1490 FILEDESC_XLOCK(p->p_fd); 1491 1492 if (p->p_fd->fd_cdir != NULL) 1493 vrele(p->p_fd->fd_cdir); 1494 p->p_fd->fd_cdir = rootvnode; 1495 VREF(rootvnode); 1496 1497 if (p->p_fd->fd_rdir != NULL) 1498 vrele(p->p_fd->fd_rdir); 1499 p->p_fd->fd_rdir = rootvnode; 1500 VREF(rootvnode); 1501 1502 FILEDESC_XUNLOCK(p->p_fd); 1503 1504 VOP_UNLOCK(rootvnode, 0); 1505 1506 EVENTHANDLER_INVOKE(mountroot); 1507} 1508 1509/* 1510 * Mount /devfs as our root filesystem, but do not put it on the mountlist 1511 * yet. Create a /dev -> / symlink so that absolute pathnames will lookup. 1512 */ 1513 1514static void 1515devfs_first(void) 1516{ 1517 struct thread *td = curthread; 1518 struct vfsoptlist *opts; 1519 struct vfsconf *vfsp; 1520 struct mount *mp = NULL; 1521 int error; 1522 1523 vfsp = vfs_byname("devfs"); 1524 KASSERT(vfsp != NULL, ("Could not find devfs by name")); 1525 if (vfsp == NULL) 1526 return; 1527 1528 mp = vfs_mount_alloc(NULLVP, vfsp, "/dev", td->td_ucred); 1529 1530 error = VFS_MOUNT(mp, td); 1531 KASSERT(error == 0, ("VFS_MOUNT(devfs) failed %d", error)); 1532 if (error) 1533 return; 1534 1535 opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK); 1536 TAILQ_INIT(opts); 1537 mp->mnt_opt = opts; 1538 1539 mtx_lock(&mountlist_mtx); 1540 TAILQ_INSERT_HEAD(&mountlist, mp, mnt_list); 1541 mtx_unlock(&mountlist_mtx); 1542 1543 set_rootvnode(td); 1544 1545 error = kern_symlink(td, "/", "dev", UIO_SYSSPACE); 1546 if (error) 1547 printf("kern_symlink /dev -> / returns %d\n", error); 1548} 1549 1550/* 1551 * Surgically move our devfs to be mounted on /dev. 1552 */ 1553 1554static void 1555devfs_fixup(struct thread *td) 1556{ 1557 struct nameidata nd; 1558 int error; 1559 struct vnode *vp, *dvp; 1560 struct mount *mp; 1561 1562 /* Remove our devfs mount from the mountlist and purge the cache */ 1563 mtx_lock(&mountlist_mtx); 1564 mp = TAILQ_FIRST(&mountlist); 1565 TAILQ_REMOVE(&mountlist, mp, mnt_list); 1566 mtx_unlock(&mountlist_mtx); 1567 cache_purgevfs(mp); 1568 1569 VFS_ROOT(mp, LK_EXCLUSIVE, &dvp, td); 1570 VI_LOCK(dvp); 1571 dvp->v_iflag &= ~VI_MOUNT; 1572 VI_UNLOCK(dvp); 1573 dvp->v_mountedhere = NULL; 1574 1575 /* Set up the real rootvnode, and purge the cache */ 1576 TAILQ_FIRST(&mountlist)->mnt_vnodecovered = NULL; 1577 set_rootvnode(td); 1578 cache_purgevfs(rootvnode->v_mount); 1579 1580 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, "/dev", td); 1581 error = namei(&nd); 1582 if (error) { 1583 printf("Lookup of /dev for devfs, error: %d\n", error); 1584 return; 1585 } 1586 NDFREE(&nd, NDF_ONLY_PNBUF); 1587 vp = nd.ni_vp; 1588 if (vp->v_type != VDIR) { 1589 vput(vp); 1590 } 1591 error = vinvalbuf(vp, V_SAVE, 0, 0); 1592 if (error) { 1593 vput(vp); 1594 } 1595 cache_purge(vp); 1596 mp->mnt_vnodecovered = vp; 1597 vp->v_mountedhere = mp; 1598 mtx_lock(&mountlist_mtx); 1599 TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list); 1600 mtx_unlock(&mountlist_mtx); 1601 VOP_UNLOCK(vp, 0); 1602 vput(dvp); 1603 vfs_unbusy(mp); 1604 1605 /* Unlink the no longer needed /dev/dev -> / symlink */ 1606 kern_unlink(td, "/dev/dev", UIO_SYSSPACE); 1607} 1608 1609/* 1610 * Report errors during filesystem mounting. 1611 */ 1612void 1613vfs_mount_error(struct mount *mp, const char *fmt, ...) 1614{ 1615 struct vfsoptlist *moptlist = mp->mnt_optnew; 1616 va_list ap; 1617 int error, len; 1618 char *errmsg; 1619 1620 error = vfs_getopt(moptlist, "errmsg", (void **)&errmsg, &len); 1621 if (error || errmsg == NULL || len <= 0) 1622 return; 1623 1624 va_start(ap, fmt); 1625 vsnprintf(errmsg, (size_t)len, fmt, ap); 1626 va_end(ap); 1627} 1628 1629/* 1630 * Find and mount the root filesystem 1631 */ 1632void 1633vfs_mountroot(void) 1634{ 1635 char *cp; 1636 int error, i, asked = 0; 1637 1638 root_mount_prepare(); 1639 1640 mount_zone = uma_zcreate("Mountpoints", sizeof(struct mount), 1641 NULL, NULL, mount_init, mount_fini, 1642 UMA_ALIGN_PTR, UMA_ZONE_NOFREE); 1643 devfs_first(); 1644 1645 /* 1646 * We are booted with instructions to prompt for the root filesystem. 1647 */ 1648 if (boothowto & RB_ASKNAME) { 1649 if (!vfs_mountroot_ask()) 1650 goto mounted; 1651 asked = 1; 1652 } 1653 1654 /* 1655 * The root filesystem information is compiled in, and we are 1656 * booted with instructions to use it. 1657 */ 1658 if (ctrootdevname != NULL && (boothowto & RB_DFLTROOT)) { 1659 if (!vfs_mountroot_try(ctrootdevname)) 1660 goto mounted; 1661 ctrootdevname = NULL; 1662 } 1663 1664 /* 1665 * We've been given the generic "use CDROM as root" flag. This is 1666 * necessary because one media may be used in many different 1667 * devices, so we need to search for them. 1668 */ 1669 if (boothowto & RB_CDROM) { 1670 for (i = 0; cdrom_rootdevnames[i] != NULL; i++) { 1671 if (!vfs_mountroot_try(cdrom_rootdevnames[i])) 1672 goto mounted; 1673 } 1674 } 1675 1676 /* 1677 * Try to use the value read by the loader from /etc/fstab, or 1678 * supplied via some other means. This is the preferred 1679 * mechanism. 1680 */ 1681 cp = getenv("vfs.root.mountfrom"); 1682 if (cp != NULL) { 1683 error = vfs_mountroot_try(cp); 1684 freeenv(cp); 1685 if (!error) 1686 goto mounted; 1687 } 1688 1689 /* 1690 * Try values that may have been computed by code during boot 1691 */ 1692 if (!vfs_mountroot_try(rootdevnames[0])) 1693 goto mounted; 1694 if (!vfs_mountroot_try(rootdevnames[1])) 1695 goto mounted; 1696 1697 /* 1698 * If we (still) have a compiled-in default, try it. 1699 */ 1700 if (ctrootdevname != NULL) 1701 if (!vfs_mountroot_try(ctrootdevname)) 1702 goto mounted; 1703 /* 1704 * Everything so far has failed, prompt on the console if we haven't 1705 * already tried that. 1706 */ 1707 if (!asked) 1708 if (!vfs_mountroot_ask()) 1709 goto mounted; 1710 1711 panic("Root mount failed, startup aborted."); 1712 1713mounted: 1714 root_mount_done(); 1715} 1716 1717/* 1718 * Mount (mountfrom) as the root filesystem. 1719 */ 1720static int 1721vfs_mountroot_try(const char *mountfrom) 1722{ 1723 struct mount *mp; 1724 char *vfsname, *path; 1725 time_t timebase; 1726 int error; 1727 char patt[32]; 1728 1729 vfsname = NULL; 1730 path = NULL; 1731 mp = NULL; 1732 error = EINVAL; 1733 1734 if (mountfrom == NULL) 1735 return (error); /* don't complain */ 1736 printf("Trying to mount root from %s\n", mountfrom); 1737 1738 /* parse vfs name and path */ 1739 vfsname = malloc(MFSNAMELEN, M_MOUNT, M_WAITOK); 1740 path = malloc(MNAMELEN, M_MOUNT, M_WAITOK); 1741 vfsname[0] = path[0] = 0; 1742 sprintf(patt, "%%%d[a-z0-9]:%%%ds", MFSNAMELEN, MNAMELEN); 1743 if (sscanf(mountfrom, patt, vfsname, path) < 1) 1744 goto out; 1745 1746 if (path[0] == '\0') 1747 strcpy(path, ROOTNAME); 1748 1749 error = kernel_vmount( 1750 MNT_RDONLY | MNT_ROOTFS, 1751 "fstype", vfsname, 1752 "fspath", "/", 1753 "from", path, 1754 NULL); 1755 if (error == 0) { 1756 /* 1757 * We mount devfs prior to mounting the / FS, so the first 1758 * entry will typically be devfs. 1759 */ 1760 mp = TAILQ_FIRST(&mountlist); 1761 KASSERT(mp != NULL, ("%s: mountlist is empty", __func__)); 1762 1763 /* 1764 * Iterate over all currently mounted file systems and use 1765 * the time stamp found to check and/or initialize the RTC. 1766 * Typically devfs has no time stamp and the only other FS 1767 * is the actual / FS. 1768 * Call inittodr() only once and pass it the largest of the 1769 * timestamps we encounter. 1770 */ 1771 timebase = 0; 1772 do { 1773 if (mp->mnt_time > timebase) 1774 timebase = mp->mnt_time; 1775 mp = TAILQ_NEXT(mp, mnt_list); 1776 } while (mp != NULL); 1777 inittodr(timebase); 1778 1779 devfs_fixup(curthread); 1780 } 1781out: 1782 free(path, M_MOUNT); 1783 free(vfsname, M_MOUNT); 1784 return (error); 1785} 1786 1787/* 1788 * --------------------------------------------------------------------- 1789 * Interactive root filesystem selection code. 1790 */ 1791 1792static int 1793vfs_mountroot_ask(void) 1794{ 1795 char name[128]; 1796 1797 for(;;) { 1798 printf("\nManual root filesystem specification:\n"); 1799 printf(" <fstype>:<device> Mount <device> using filesystem <fstype>\n"); 1800#if defined(__amd64__) || defined(__i386__) || defined(__ia64__) 1801 printf(" eg. ufs:da0s1a\n"); 1802#else 1803 printf(" eg. ufs:/dev/da0a\n"); 1804#endif 1805 printf(" ? List valid disk boot devices\n"); 1806 printf(" <empty line> Abort manual input\n"); 1807 printf("\nmountroot> "); 1808 gets(name, sizeof(name), 1); 1809 if (name[0] == '\0') 1810 return (1); 1811 if (name[0] == '?') { 1812 printf("\nList of GEOM managed disk devices:\n "); 1813 g_dev_print(); 1814 continue; 1815 } 1816 if (!vfs_mountroot_try(name)) 1817 return (0); 1818 } 1819} 1820 1821/* 1822 * --------------------------------------------------------------------- 1823 * Functions for querying mount options/arguments from filesystems. 1824 */ 1825 1826/* 1827 * Check that no unknown options are given 1828 */ 1829int 1830vfs_filteropt(struct vfsoptlist *opts, const char **legal) 1831{ 1832 struct vfsopt *opt; 1833 char errmsg[255]; 1834 const char **t, *p, *q; 1835 int ret = 0; 1836 1837 TAILQ_FOREACH(opt, opts, link) { 1838 p = opt->name; 1839 q = NULL; 1840 if (p[0] == 'n' && p[1] == 'o') 1841 q = p + 2; 1842 for(t = global_opts; *t != NULL; t++) { 1843 if (strcmp(*t, p) == 0) 1844 break; 1845 if (q != NULL) { 1846 if (strcmp(*t, q) == 0) 1847 break; 1848 } 1849 } 1850 if (*t != NULL) 1851 continue; 1852 for(t = legal; *t != NULL; t++) { 1853 if (strcmp(*t, p) == 0) 1854 break; 1855 if (q != NULL) { 1856 if (strcmp(*t, q) == 0) 1857 break; 1858 } 1859 } 1860 if (*t != NULL) 1861 continue; 1862 snprintf(errmsg, sizeof(errmsg), 1863 "mount option <%s> is unknown", p); 1864 printf("%s\n", errmsg); 1865 ret = EINVAL; 1866 } 1867 if (ret != 0) { 1868 TAILQ_FOREACH(opt, opts, link) { 1869 if (strcmp(opt->name, "errmsg") == 0) { 1870 strncpy((char *)opt->value, errmsg, opt->len); 1871 } 1872 } 1873 } 1874 return (ret); 1875} 1876 1877/* 1878 * Get a mount option by its name. 1879 * 1880 * Return 0 if the option was found, ENOENT otherwise. 1881 * If len is non-NULL it will be filled with the length 1882 * of the option. If buf is non-NULL, it will be filled 1883 * with the address of the option. 1884 */ 1885int 1886vfs_getopt(opts, name, buf, len) 1887 struct vfsoptlist *opts; 1888 const char *name; 1889 void **buf; 1890 int *len; 1891{ 1892 struct vfsopt *opt; 1893 1894 KASSERT(opts != NULL, ("vfs_getopt: caller passed 'opts' as NULL")); 1895 1896 TAILQ_FOREACH(opt, opts, link) { 1897 if (strcmp(name, opt->name) == 0) { 1898 if (len != NULL) 1899 *len = opt->len; 1900 if (buf != NULL) 1901 *buf = opt->value; 1902 return (0); 1903 } 1904 } 1905 return (ENOENT); 1906} 1907 1908static int 1909vfs_getopt_pos(struct vfsoptlist *opts, const char *name) 1910{ 1911 struct vfsopt *opt; 1912 int i; 1913 1914 if (opts == NULL) 1915 return (-1); 1916 1917 i = 0; 1918 TAILQ_FOREACH(opt, opts, link) { 1919 if (strcmp(name, opt->name) == 0) 1920 return (i); 1921 ++i; 1922 } 1923 return (-1); 1924} 1925 1926char * 1927vfs_getopts(struct vfsoptlist *opts, const char *name, int *error) 1928{ 1929 struct vfsopt *opt; 1930 1931 *error = 0; 1932 TAILQ_FOREACH(opt, opts, link) { 1933 if (strcmp(name, opt->name) != 0) 1934 continue; 1935 if (((char *)opt->value)[opt->len - 1] != '\0') { 1936 *error = EINVAL; 1937 return (NULL); 1938 } 1939 return (opt->value); 1940 } 1941 *error = ENOENT; 1942 return (NULL); 1943} 1944 1945int 1946vfs_flagopt(struct vfsoptlist *opts, const char *name, u_int *w, u_int val) 1947{ 1948 struct vfsopt *opt; 1949 1950 TAILQ_FOREACH(opt, opts, link) { 1951 if (strcmp(name, opt->name) == 0) { 1952 if (w != NULL) 1953 *w |= val; 1954 return (1); 1955 } 1956 } 1957 if (w != NULL) 1958 *w &= ~val; 1959 return (0); 1960} 1961 1962int 1963vfs_scanopt(struct vfsoptlist *opts, const char *name, const char *fmt, ...) 1964{ 1965 va_list ap; 1966 struct vfsopt *opt; 1967 int ret; 1968 1969 KASSERT(opts != NULL, ("vfs_getopt: caller passed 'opts' as NULL")); 1970 1971 TAILQ_FOREACH(opt, opts, link) { 1972 if (strcmp(name, opt->name) != 0) 1973 continue; 1974 if (opt->len == 0 || opt->value == NULL) 1975 return (0); 1976 if (((char *)opt->value)[opt->len - 1] != '\0') 1977 return (0); 1978 va_start(ap, fmt); 1979 ret = vsscanf(opt->value, fmt, ap); 1980 va_end(ap); 1981 return (ret); 1982 } 1983 return (0); 1984} 1985 1986/* 1987 * Find and copy a mount option. 1988 * 1989 * The size of the buffer has to be specified 1990 * in len, if it is not the same length as the 1991 * mount option, EINVAL is returned. 1992 * Returns ENOENT if the option is not found. 1993 */ 1994int 1995vfs_copyopt(opts, name, dest, len) 1996 struct vfsoptlist *opts; 1997 const char *name; 1998 void *dest; 1999 int len; 2000{ 2001 struct vfsopt *opt; 2002 2003 KASSERT(opts != NULL, ("vfs_copyopt: caller passed 'opts' as NULL")); 2004 2005 TAILQ_FOREACH(opt, opts, link) { 2006 if (strcmp(name, opt->name) == 0) { 2007 if (len != opt->len) 2008 return (EINVAL); 2009 bcopy(opt->value, dest, opt->len); 2010 return (0); 2011 } 2012 } 2013 return (ENOENT); 2014} 2015 2016/* 2017 * This is a helper function for filesystems to traverse their 2018 * vnodes. See MNT_VNODE_FOREACH() in sys/mount.h 2019 */ 2020 2021struct vnode * 2022__mnt_vnode_next(struct vnode **mvp, struct mount *mp) 2023{ 2024 struct vnode *vp; 2025 2026 mtx_assert(MNT_MTX(mp), MA_OWNED); 2027 2028 KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch")); 2029 if ((*mvp)->v_yield++ == 500) { 2030 MNT_IUNLOCK(mp); 2031 (*mvp)->v_yield = 0; 2032 uio_yield(); 2033 MNT_ILOCK(mp); 2034 } 2035 vp = TAILQ_NEXT(*mvp, v_nmntvnodes); 2036 while (vp != NULL && vp->v_type == VMARKER) 2037 vp = TAILQ_NEXT(vp, v_nmntvnodes); 2038 2039 /* Check if we are done */ 2040 if (vp == NULL) { 2041 __mnt_vnode_markerfree(mvp, mp); 2042 return (NULL); 2043 } 2044 TAILQ_REMOVE(&mp->mnt_nvnodelist, *mvp, v_nmntvnodes); 2045 TAILQ_INSERT_AFTER(&mp->mnt_nvnodelist, vp, *mvp, v_nmntvnodes); 2046 return (vp); 2047} 2048 2049struct vnode * 2050__mnt_vnode_first(struct vnode **mvp, struct mount *mp) 2051{ 2052 struct vnode *vp; 2053 2054 mtx_assert(MNT_MTX(mp), MA_OWNED); 2055 2056 vp = TAILQ_FIRST(&mp->mnt_nvnodelist); 2057 while (vp != NULL && vp->v_type == VMARKER) 2058 vp = TAILQ_NEXT(vp, v_nmntvnodes); 2059 2060 /* Check if we are done */ 2061 if (vp == NULL) { 2062 *mvp = NULL; 2063 return (NULL); 2064 } 2065 mp->mnt_holdcnt++; 2066 MNT_IUNLOCK(mp); 2067 *mvp = (struct vnode *) malloc(sizeof(struct vnode), 2068 M_VNODE_MARKER, 2069 M_WAITOK | M_ZERO); 2070 MNT_ILOCK(mp); 2071 (*mvp)->v_type = VMARKER; 2072 2073 vp = TAILQ_FIRST(&mp->mnt_nvnodelist); 2074 while (vp != NULL && vp->v_type == VMARKER) 2075 vp = TAILQ_NEXT(vp, v_nmntvnodes); 2076 2077 /* Check if we are done */ 2078 if (vp == NULL) { 2079 MNT_IUNLOCK(mp); 2080 free(*mvp, M_VNODE_MARKER); 2081 MNT_ILOCK(mp); 2082 *mvp = NULL; 2083 mp->mnt_holdcnt--; 2084 if (mp->mnt_holdcnt == 0 && mp->mnt_holdcntwaiters != 0) 2085 wakeup(&mp->mnt_holdcnt); 2086 return (NULL); 2087 } 2088 (*mvp)->v_mount = mp; 2089 TAILQ_INSERT_AFTER(&mp->mnt_nvnodelist, vp, *mvp, v_nmntvnodes); 2090 return (vp); 2091} 2092 2093 2094void 2095__mnt_vnode_markerfree(struct vnode **mvp, struct mount *mp) 2096{ 2097 2098 if (*mvp == NULL) 2099 return; 2100 2101 mtx_assert(MNT_MTX(mp), MA_OWNED); 2102 2103 KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch")); 2104 TAILQ_REMOVE(&mp->mnt_nvnodelist, *mvp, v_nmntvnodes); 2105 MNT_IUNLOCK(mp); 2106 free(*mvp, M_VNODE_MARKER); 2107 MNT_ILOCK(mp); 2108 *mvp = NULL; 2109 2110 mp->mnt_holdcnt--; 2111 if (mp->mnt_holdcnt == 0 && mp->mnt_holdcntwaiters != 0) 2112 wakeup(&mp->mnt_holdcnt); 2113} 2114 2115 2116int 2117__vfs_statfs(struct mount *mp, struct statfs *sbp, struct thread *td) 2118{ 2119 int error; 2120 2121 error = mp->mnt_op->vfs_statfs(mp, &mp->mnt_stat, td); 2122 if (sbp != &mp->mnt_stat) 2123 *sbp = mp->mnt_stat; 2124 return (error); 2125} 2126 2127void 2128vfs_mountedfrom(struct mount *mp, const char *from) 2129{ 2130 2131 bzero(mp->mnt_stat.f_mntfromname, sizeof mp->mnt_stat.f_mntfromname); 2132 strlcpy(mp->mnt_stat.f_mntfromname, from, 2133 sizeof mp->mnt_stat.f_mntfromname); 2134} 2135 2136/* 2137 * --------------------------------------------------------------------- 2138 * This is the api for building mount args and mounting filesystems from 2139 * inside the kernel. 2140 * 2141 * The API works by accumulation of individual args. First error is 2142 * latched. 2143 * 2144 * XXX: should be documented in new manpage kernel_mount(9) 2145 */ 2146 2147/* A memory allocation which must be freed when we are done */ 2148struct mntaarg { 2149 SLIST_ENTRY(mntaarg) next; 2150}; 2151 2152/* The header for the mount arguments */ 2153struct mntarg { 2154 struct iovec *v; 2155 int len; 2156 int error; 2157 SLIST_HEAD(, mntaarg) list; 2158}; 2159 2160/* 2161 * Add a boolean argument. 2162 * 2163 * flag is the boolean value. 2164 * name must start with "no". 2165 */ 2166struct mntarg * 2167mount_argb(struct mntarg *ma, int flag, const char *name) 2168{ 2169 2170 KASSERT(name[0] == 'n' && name[1] == 'o', 2171 ("mount_argb(...,%s): name must start with 'no'", name)); 2172 2173 return (mount_arg(ma, name + (flag ? 2 : 0), NULL, 0)); 2174} 2175 2176/* 2177 * Add an argument printf style 2178 */ 2179struct mntarg * 2180mount_argf(struct mntarg *ma, const char *name, const char *fmt, ...) 2181{ 2182 va_list ap; 2183 struct mntaarg *maa; 2184 struct sbuf *sb; 2185 int len; 2186 2187 if (ma == NULL) { 2188 ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO); 2189 SLIST_INIT(&ma->list); 2190 } 2191 if (ma->error) 2192 return (ma); 2193 2194 ma->v = realloc(ma->v, sizeof *ma->v * (ma->len + 2), 2195 M_MOUNT, M_WAITOK); 2196 ma->v[ma->len].iov_base = (void *)(uintptr_t)name; 2197 ma->v[ma->len].iov_len = strlen(name) + 1; 2198 ma->len++; 2199 2200 sb = sbuf_new_auto(); 2201 va_start(ap, fmt); 2202 sbuf_vprintf(sb, fmt, ap); 2203 va_end(ap); 2204 sbuf_finish(sb); 2205 len = sbuf_len(sb) + 1; 2206 maa = malloc(sizeof *maa + len, M_MOUNT, M_WAITOK | M_ZERO); 2207 SLIST_INSERT_HEAD(&ma->list, maa, next); 2208 bcopy(sbuf_data(sb), maa + 1, len); 2209 sbuf_delete(sb); 2210 2211 ma->v[ma->len].iov_base = maa + 1; 2212 ma->v[ma->len].iov_len = len; 2213 ma->len++; 2214 2215 return (ma); 2216} 2217 2218/* 2219 * Add an argument which is a userland string. 2220 */ 2221struct mntarg * 2222mount_argsu(struct mntarg *ma, const char *name, const void *val, int len) 2223{ 2224 struct mntaarg *maa; 2225 char *tbuf; 2226 2227 if (val == NULL) 2228 return (ma); 2229 if (ma == NULL) { 2230 ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO); 2231 SLIST_INIT(&ma->list); 2232 } 2233 if (ma->error) 2234 return (ma); 2235 maa = malloc(sizeof *maa + len, M_MOUNT, M_WAITOK | M_ZERO); 2236 SLIST_INSERT_HEAD(&ma->list, maa, next); 2237 tbuf = (void *)(maa + 1); 2238 ma->error = copyinstr(val, tbuf, len, NULL); 2239 return (mount_arg(ma, name, tbuf, -1)); 2240} 2241 2242/* 2243 * Plain argument. 2244 * 2245 * If length is -1, treat value as a C string. 2246 */ 2247struct mntarg * 2248mount_arg(struct mntarg *ma, const char *name, const void *val, int len) 2249{ 2250 2251 if (ma == NULL) { 2252 ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO); 2253 SLIST_INIT(&ma->list); 2254 } 2255 if (ma->error) 2256 return (ma); 2257 2258 ma->v = realloc(ma->v, sizeof *ma->v * (ma->len + 2), 2259 M_MOUNT, M_WAITOK); 2260 ma->v[ma->len].iov_base = (void *)(uintptr_t)name; 2261 ma->v[ma->len].iov_len = strlen(name) + 1; 2262 ma->len++; 2263 2264 ma->v[ma->len].iov_base = (void *)(uintptr_t)val; 2265 if (len < 0) 2266 ma->v[ma->len].iov_len = strlen(val) + 1; 2267 else 2268 ma->v[ma->len].iov_len = len; 2269 ma->len++; 2270 return (ma); 2271} 2272 2273/* 2274 * Free a mntarg structure 2275 */ 2276static void 2277free_mntarg(struct mntarg *ma) 2278{ 2279 struct mntaarg *maa; 2280 2281 while (!SLIST_EMPTY(&ma->list)) { 2282 maa = SLIST_FIRST(&ma->list); 2283 SLIST_REMOVE_HEAD(&ma->list, next); 2284 free(maa, M_MOUNT); 2285 } 2286 free(ma->v, M_MOUNT); 2287 free(ma, M_MOUNT); 2288} 2289 2290/* 2291 * Mount a filesystem 2292 */ 2293int 2294kernel_mount(struct mntarg *ma, int flags) 2295{ 2296 struct uio auio; 2297 int error; 2298 2299 KASSERT(ma != NULL, ("kernel_mount NULL ma")); 2300 KASSERT(ma->v != NULL, ("kernel_mount NULL ma->v")); 2301 KASSERT(!(ma->len & 1), ("kernel_mount odd ma->len (%d)", ma->len)); 2302 2303 auio.uio_iov = ma->v; 2304 auio.uio_iovcnt = ma->len; 2305 auio.uio_segflg = UIO_SYSSPACE; 2306 2307 error = ma->error; 2308 if (!error) 2309 error = vfs_donmount(curthread, flags, &auio); 2310 free_mntarg(ma); 2311 return (error); 2312} 2313 2314/* 2315 * A printflike function to mount a filesystem. 2316 */ 2317int 2318kernel_vmount(int flags, ...) 2319{ 2320 struct mntarg *ma = NULL; 2321 va_list ap; 2322 const char *cp; 2323 const void *vp; 2324 int error; 2325 2326 va_start(ap, flags); 2327 for (;;) { 2328 cp = va_arg(ap, const char *); 2329 if (cp == NULL) 2330 break; 2331 vp = va_arg(ap, const void *); 2332 ma = mount_arg(ma, cp, vp, (vp != NULL ? -1 : 0)); 2333 } 2334 va_end(ap); 2335 2336 error = kernel_mount(ma, flags); 2337 return (error); 2338} 2339