vfs_mount.c revision 161584
11541Srgrimes/*-
21541Srgrimes * Copyright (c) 1999-2004 Poul-Henning Kamp
31541Srgrimes * Copyright (c) 1999 Michael Smith
41541Srgrimes * Copyright (c) 1989, 1993
51541Srgrimes *	The Regents of the University of California.  All rights reserved.
61541Srgrimes * (c) UNIX System Laboratories, Inc.
71541Srgrimes * All or some portions of this file are derived from material licensed
81541Srgrimes * to the University of California by American Telephone and Telegraph
91541Srgrimes * Co. or Unix System Laboratories, Inc. and are reproduced herein with
101541Srgrimes * the permission of UNIX System Laboratories, Inc.
111541Srgrimes *
121541Srgrimes * Redistribution and use in source and binary forms, with or without
131541Srgrimes * modification, are permitted provided that the following conditions
141541Srgrimes * are met:
151541Srgrimes * 1. Redistributions of source code must retain the above copyright
161541Srgrimes *    notice, this list of conditions and the following disclaimer.
171541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
181541Srgrimes *    notice, this list of conditions and the following disclaimer in the
191541Srgrimes *    documentation and/or other materials provided with the distribution.
201541Srgrimes * 4. Neither the name of the University nor the names of its contributors
211541Srgrimes *    may be used to endorse or promote products derived from this software
221541Srgrimes *    without specific prior written permission.
231541Srgrimes *
241541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
251541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
261541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
271541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
281541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
291541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
301541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
311541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
321541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
331541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
341541Srgrimes * SUCH DAMAGE.
351541Srgrimes */
361541Srgrimes
37116182Sobrien#include <sys/cdefs.h>
38116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/vfs_mount.c 161584 2006-08-24 18:52:28Z marius $");
39116182Sobrien
40170075Semaste#include <sys/param.h>
41150968Sglebius#include <sys/conf.h>
42147565Speter#include <sys/jail.h>
4344666Sphk#include <sys/kernel.h>
44116874Ssmkelly#include <sys/libkern.h>
4544666Sphk#include <sys/mac.h>
461541Srgrimes#include <sys/malloc.h>
471541Srgrimes#include <sys/mount.h>
481541Srgrimes#include <sys/mutex.h>
49131927Smarcel#include <sys/namei.h>
501541Srgrimes#include <sys/proc.h>
51201879Sattilio#include <sys/filedesc.h>
52201879Sattilio#include <sys/reboot.h>
5374914Sjhb#include <sys/syscallsubr.h>
5467365Sjhb#include <sys/sysproto.h>
551541Srgrimes#include <sys/sx.h>
56111024Sjeff#include <sys/sysctl.h>
571541Srgrimes#include <sys/sysent.h>
58104964Sjeff#include <sys/systm.h>
593308Sphk#include <sys/vnode.h>
60201879Sattilio#include <vm/uma.h>
6176078Sjhb
622320Sdg#include <geom/geom.h>
6312662Sdg
6412662Sdg#include <machine/stdarg.h>
653308Sphk
6667551Sjhb#include <security/audit/audit.h>
6767551Sjhb
68114216Skan#include "opt_rootdevname.h"
69102926Sphk#include "opt_ddb.h"
701541Srgrimes#include "opt_mac.h"
711541Srgrimes
721541Srgrimes#ifdef DDB
731541Srgrimes#include <ddb/ddb.h>
741541Srgrimes#endif
75146799Sjkoshy
76146799Sjkoshy#define	ROOTNAME		"root_device"
77233628Sfabient#define	VFS_MOUNTARG_SIZE_MAX	(1024 * 64)
78233628Sfabient
79146799Sjkoshystatic int	vfs_domount(struct thread *td, const char *fstype,
80146799Sjkoshy		    char *fspath, int fsflags, void *fsdata);
8187902Sluigistatic struct mount *vfs_mount_alloc(struct vnode *dvp, struct vfsconf *vfsp,
8287902Sluigi		    const char *fspath, struct thread *td);
8387902Sluigistatic int	vfs_mountroot_ask(void);
8431639Sfsmpstatic int	vfs_mountroot_try(const char *mountfrom);
8592723Salfredstatic int	vfs_donmount(struct thread *td, int fsflags,
86177253Srwatson		    struct uio *fsoptions);
8710358Sjulianstatic void	free_mntarg(struct mntarg *ma);
88169803Sjeffstatic void	vfs_mount_destroy(struct mount *);
89170468Sattiliostatic int	vfs_getopt_pos(struct vfsoptlist *opts, const char *name);
90169803Sjeff
91147692Speterstatic int	usermount = 0;
92147692SpeterSYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0,
93147692Speter    "Unprivileged users may mount and unmount file systems");
94147692Speter
95174070SpeterMALLOC_DEFINE(M_MOUNT, "mount", "vfs mount structure");
96147703SpsMALLOC_DEFINE(M_VNODE_MARKER, "vnodemarker", "vnode marker");
97147692Speterstatic uma_zone_t mount_zone;
98147692Speter
99174070Speter/* List of mounted filesystems. */
100157822Sjhbstruct mntlist mountlist = TAILQ_HEAD_INITIALIZER(mountlist);
101174070Speter
102174070Speter/* For any iteration/modification of mountlist */
103147703Spsstruct mtx mountlist_mtx;
104147692SpeterMTX_SYSINIT(mountlist, &mountlist_mtx, "mountlist", MTX_DEF);
105147692Speter
106147692SpeterTAILQ_HEAD(vfsoptlist, vfsopt);
107147692Speterstruct vfsopt {
108147692Speter	TAILQ_ENTRY(vfsopt) link;
109147692Speter	char	*name;
110147692Speter	void	*value;
111147692Speter	int	len;
112147692Speter};
113147692Speter
114147692Speter/*
115147692Speter * The vnode of the system's root (/ in the filesystem, without chroot
116147692Speter * active.)
117147692Speter */
118147692Speterstruct vnode	*rootvnode;
119192304Sed
120147692Speter/*
121147692Speter * The root filesystem is detailed in the kernel environment variable
122174070Speter * vfs.root.mountfrom, which is expected to be in the general format
123174070Speter *
124174070Speter * <vfsname>:[<path>]
125174070Speter * vfsname   := the name of a VFS known to the kernel and capable
126174070Speter *              of being mounted as root
127174070Speter * path      := disk device name or other data used by the filesystem
128174070Speter *              to locate its physical store
129174072Srwatson */
130174070Speter
131174070Speter/*
132174070Speter * Global opts, taken by all filesystems
133174072Srwatson */
134174070Speterstatic const char *global_opts[] = {
135174070Speter	"errmsg",
136174070Speter	"fstype",
137174070Speter	"fspath",
138174070Speter	"rdonly",
139174070Speter	"ro",
140174070Speter	"rw",
141174070Speter	"suid",
142174070Speter	"exec",
143174070Speter	"update",
144174070Speter	NULL
145174070Speter};
146174070Speter
147174070Speter/*
148174070Speter * The root specifiers we will try if RB_CDROM is specified.
149174070Speter */
150174070Speterstatic char *cdrom_rootdevnames[] = {
151174070Speter	"cd9660:cd0",
152174070Speter	"cd9660:acd0",
153174070Speter	NULL
154174070Speter};
155174070Speter
156174070Speter/* legacy find-root code */
157174070Speterchar		*rootdevnames[2] = {NULL, NULL};
158174070Speter#ifndef ROOTDEVNAME
159174070Speter#  define ROOTDEVNAME NULL
160174070Speter#endif
161174070Speterstatic const char	*ctrootdevname = ROOTDEVNAME;
162174070Speter
163192304Sed/*
164174070Speter * ---------------------------------------------------------------------
165174070Speter * Functions for building and sanitizing the mount options
166201879Sattilio */
167206482Sattilio
168206879Sattilio/* Remove one mount option. */
169206482Sattiliostatic void
170206482Sattiliovfs_freeopt(struct vfsoptlist *opts, struct vfsopt *opt)
171206482Sattilio{
172206482Sattilio
173201879Sattilio	TAILQ_REMOVE(opts, opt, link);
174201879Sattilio	free(opt->name, M_MOUNT);
175201879Sattilio	if (opt->value != NULL)
176201879Sattilio		free(opt->value, M_MOUNT);
177201879Sattilio#ifdef INVARIANTS
178201879Sattilio	else if (opt->len != 0)
179201879Sattilio		panic("%s: mount option with NULL value but length != 0",
180201879Sattilio		    __func__);
181201879Sattilio#endif
182201879Sattilio	free(opt, M_MOUNT);
183206482Sattilio}
184201879Sattilio
185201879Sattilio/* Release all resources related to the mount options. */
186201879Sattiliostatic void
187201879Sattiliovfs_freeopts(struct vfsoptlist *opts)
188201879Sattilio{
189201879Sattilio	struct vfsopt *opt;
190201879Sattilio
191201879Sattilio	while (!TAILQ_EMPTY(opts)) {
192201879Sattilio		opt = TAILQ_FIRST(opts);
193201879Sattilio		vfs_freeopt(opts, opt);
194201879Sattilio	}
195201879Sattilio	free(opts, M_MOUNT);
196201879Sattilio}
197201879Sattilio
198201879Sattilio/*
199201879Sattilio * Check if options are equal (with or without the "no" prefix).
200214682Sjhb */
201201879Sattiliostatic int
202201879Sattiliovfs_equalopts(const char *opt1, const char *opt2)
203201879Sattilio{
204201879Sattilio
205201879Sattilio	/* "opt" vs. "opt" or "noopt" vs. "noopt" */
206220390Sjhb	if (strcmp(opt1, opt2) == 0)
207220390Sjhb		return (1);
208220390Sjhb	/* "noopt" vs. "opt" */
209220390Sjhb	if (strncmp(opt1, "no", 2) == 0 && strcmp(opt1 + 2, opt2) == 0)
210201879Sattilio		return (1);
211209761Sattilio	/* "opt" vs. "noopt" */
212209761Sattilio	if (strncmp(opt2, "no", 2) == 0 && strcmp(opt1, opt2 + 2) == 0)
213209761Sattilio		return (1);
214209761Sattilio	return (0);
215209761Sattilio}
216209761Sattilio
217201879Sattilio/*
218209761Sattilio * If a mount option is specified several times,
219201879Sattilio * (with or without the "no" prefix) only keep
220201879Sattilio * the last occurence of it.
221201879Sattilio */
222201879Sattiliostatic void
223201879Sattiliovfs_sanitizeopts(struct vfsoptlist *opts)
224201879Sattilio{
225201879Sattilio	struct vfsopt *opt, *opt2, *tmp;
226206482Sattilio
227201879Sattilio	TAILQ_FOREACH_REVERSE(opt, opts, vfsoptlist, link) {
228201879Sattilio		opt2 = TAILQ_PREV(opt, vfsoptlist, link);
229201879Sattilio		while (opt2 != NULL) {
230201879Sattilio			if (vfs_equalopts(opt->name, opt2->name)) {
231201879Sattilio				tmp = TAILQ_PREV(opt2, vfsoptlist, link);
232201879Sattilio				vfs_freeopt(opts, opt2);
233201879Sattilio				opt2 = tmp;
234201879Sattilio			} else {
235201879Sattilio				opt2 = TAILQ_PREV(opt2, vfsoptlist, link);
236201879Sattilio			}
237201879Sattilio		}
238201879Sattilio	}
239201879Sattilio}
240201879Sattilio
241201879Sattilio/*
242209761Sattilio * Build a linked list of mount options from a struct uio.
243209761Sattilio */
244209761Sattiliostatic int
245201879Sattiliovfs_buildopts(struct uio *auio, struct vfsoptlist **options)
246201879Sattilio{
247201879Sattilio	struct vfsoptlist *opts;
248201879Sattilio	struct vfsopt *opt;
249201879Sattilio	size_t memused;
250201879Sattilio	unsigned int i, iovcnt;
251201879Sattilio	int error, namelen, optlen;
252201879Sattilio
253201879Sattilio	opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK);
254201879Sattilio	TAILQ_INIT(opts);
255201879Sattilio	memused = 0;
256201879Sattilio	iovcnt = auio->uio_iovcnt;
257201879Sattilio	for (i = 0; i < iovcnt; i += 2) {
258201879Sattilio		opt = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
259201879Sattilio		namelen = auio->uio_iov[i].iov_len;
260201879Sattilio		optlen = auio->uio_iov[i + 1].iov_len;
261201879Sattilio		opt->name = malloc(namelen, M_MOUNT, M_WAITOK);
262201879Sattilio		opt->value = NULL;
263201879Sattilio		opt->len = 0;
264201879Sattilio
265201879Sattilio		/*
266206482Sattilio		 * Do this early, so jumps to "bad" will free the current
267206482Sattilio		 * option.
268206482Sattilio		 */
269206482Sattilio		TAILQ_INSERT_TAIL(opts, opt, link);
270201879Sattilio		memused += sizeof(struct vfsopt) + optlen + namelen;
271206482Sattilio
272206482Sattilio		/*
273206482Sattilio		 * Avoid consuming too much memory, and attempts to overflow
274206482Sattilio		 * memused.
275206482Sattilio		 */
276206482Sattilio		if (memused > VFS_MOUNTARG_SIZE_MAX ||
277206482Sattilio		    optlen > VFS_MOUNTARG_SIZE_MAX ||
278206482Sattilio		    namelen > VFS_MOUNTARG_SIZE_MAX) {
279206482Sattilio			error = EINVAL;
280206482Sattilio			goto bad;
281206482Sattilio		}
282206482Sattilio
283206482Sattilio		if (auio->uio_segflg == UIO_SYSSPACE) {
284201879Sattilio			bcopy(auio->uio_iov[i].iov_base, opt->name, namelen);
285201879Sattilio		} else {
286201879Sattilio			error = copyin(auio->uio_iov[i].iov_base, opt->name,
287201879Sattilio			    namelen);
288201879Sattilio			if (error)
289201879Sattilio				goto bad;
290201879Sattilio		}
291201879Sattilio		/* Ensure names are null-terminated strings. */
292201879Sattilio		if (opt->name[namelen - 1] != '\0') {
293201879Sattilio			error = EINVAL;
294201879Sattilio			goto bad;
295201879Sattilio		}
296201879Sattilio		if (optlen != 0) {
297214682Sjhb			opt->len = optlen;
298201879Sattilio			opt->value = malloc(optlen, M_MOUNT, M_WAITOK);
299201879Sattilio			if (auio->uio_segflg == UIO_SYSSPACE) {
300201879Sattilio				bcopy(auio->uio_iov[i + 1].iov_base, opt->value,
301201879Sattilio				    optlen);
302201879Sattilio			} else {
303201879Sattilio				error = copyin(auio->uio_iov[i + 1].iov_base,
304201879Sattilio				    opt->value, optlen);
305201879Sattilio				if (error)
306201879Sattilio					goto bad;
307201879Sattilio			}
308201879Sattilio		}
309227309Sed	}
310227309Sed	vfs_sanitizeopts(opts);
311201879Sattilio	*options = opts;
312201879Sattilio	return (0);
313201879Sattiliobad:
314201879Sattilio	vfs_freeopts(opts);
315201879Sattilio	return (error);
316201879Sattilio}
317201879Sattilio
318201879Sattilio/*
319201879Sattilio * Merge the old mount options with the new ones passed
320201879Sattilio * in the MNT_UPDATE case.
321174070Speter */
322174070Speterstatic void
323174070Spetervfs_mergeopts(struct vfsoptlist *toopts, struct vfsoptlist *opts)
324174070Speter{
325174070Speter	struct vfsopt *opt, *opt2, *new;
326174070Speter
327174070Speter	TAILQ_FOREACH(opt, opts, link) {
328174070Speter		/*
329209059Sjhb		 * Check that this option hasn't been redefined
330174070Speter		 * nor cancelled with a "no" mount option.
331174070Speter		 */
332174070Speter		opt2 = TAILQ_FIRST(toopts);
333174070Speter		while (opt2 != NULL) {
334174070Speter			if (strcmp(opt2->name, opt->name) == 0)
335174070Speter				goto next;
336126383Sphk			if (strncmp(opt2->name, "no", 2) == 0 &&
337126383Sphk			    strcmp(opt2->name + 2, opt->name) == 0) {
338116874Ssmkelly				vfs_freeopt(toopts, opt2);
339126383Sphk				goto next;
340116874Ssmkelly			}
341126383Sphk			opt2 = TAILQ_NEXT(opt2, link);
342126383Sphk		}
343126383Sphk		/* We want this option, duplicate it. */
344116874Ssmkelly		new = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
3451541Srgrimes		new->name = malloc(strlen(opt->name) + 1, M_MOUNT, M_WAITOK);
3461541Srgrimes		strcpy(new->name, opt->name);
3471541Srgrimes		if (opt->len != 0) {
34834618Sphk			new->value = malloc(opt->len, M_MOUNT, M_WAITOK);
34934618Sphk			bcopy(opt->value, new->value, opt->len);
35033690Sphk		} else {
35134618Sphk			new->value = NULL;
35234618Sphk		}
35333690Sphk		new->len = opt->len;
35434618Sphk		TAILQ_INSERT_TAIL(toopts, new, link);
35534618Sphknext:
35634618Sphk		continue;
35734618Sphk	}
3581541Srgrimes}
3591541Srgrimes
36034618Sphk/*
36134618Sphk * ---------------------------------------------------------------------
36234618Sphk * Mount a filesystem
36334618Sphk */
36434618Sphkint
3651541Srgrimesnmount(td, uap)
36634618Sphk	struct thread *td;
36734618Sphk	struct nmount_args /* {
3681541Srgrimes		struct iovec *iovp;
36934618Sphk		unsigned int iovcnt;
37034618Sphk		int flags;
37134618Sphk	} */ *uap;
37233690Sphk{
37333690Sphk	struct uio *auio;
37433690Sphk	struct iovec *iov;
3751541Srgrimes	unsigned int i;
3761541Srgrimes	int error;
3771541Srgrimes	u_int iovcnt;
3781541Srgrimes
379110296Sjake	AUDIT_ARG(fflags, uap->flags);
3801541Srgrimes
381110296Sjake	/* Kick out MNT_ROOTFS early as it is legal internally */
3821541Srgrimes	if (uap->flags & MNT_ROOTFS)
383215701Sdim		return (EINVAL);
384212601Smav
385208494Smav	iovcnt = uap->iovcnt;
3861541Srgrimes	/*
3871541Srgrimes	 * Check that we have an even number of iovec's
3881541Srgrimes	 * and that we have at least two options.
38910358Sjulian	 */
39010358Sjulian	if ((iovcnt & 1) || (iovcnt < 4))
39112569Sbde		return (EINVAL);
39212569Sbde
3931541Srgrimes	error = copyinuio(uap->iovp, iovcnt, &auio);
3941541Srgrimes	if (error)
3951541Srgrimes		return (error);
3961541Srgrimes	iov = auio->uio_iov;
3971541Srgrimes	for (i = 0; i < iovcnt; i++) {
3981541Srgrimes		if (iov->iov_len > MMAXOPTIONLEN) {
3991541Srgrimes			free(auio, M_IOV);
400209371Smav			return (EINVAL);
4011541Srgrimes		}
4021541Srgrimes		iov++;
4031541Srgrimes	}
4041541Srgrimes	error = vfs_donmount(td, uap->flags, auio);
4051541Srgrimes
4061541Srgrimes	free(auio, M_IOV);
4071541Srgrimes	return (error);
4081541Srgrimes}
4091541Srgrimes
410126383Sphk/*
411126383Sphk * ---------------------------------------------------------------------
412126383Sphk * Various utility functions
4131541Srgrimes */
4141541Srgrimes
4151541Srgrimesvoid
416110296Sjakevfs_ref(struct mount *mp)
417153666Sjhb{
418110296Sjake
41976078Sjhb	MNT_ILOCK(mp);
42076078Sjhb	MNT_REF(mp);
421153666Sjhb	MNT_IUNLOCK(mp);
42276078Sjhb}
42376078Sjhb
424110296Sjakevoid
42583366Sjulianvfs_rel(struct mount *mp)
426172207Sjeff{
42776078Sjhb
42876078Sjhb	MNT_ILOCK(mp);
42976078Sjhb	MNT_REL(mp);
43076078Sjhb	MNT_IUNLOCK(mp);
431163709Sjb}
432172207Sjeff
433163709Sjbstatic int
434170297Sjeffmount_init(void *mem, int size, int flags)
435170297Sjeff{
436172207Sjeff	struct mount *mp;
437172207Sjeff
438170297Sjeff	mp = (struct mount *)mem;
439163709Sjb	mtx_init(&mp->mnt_mtx, "struct mount mtx", NULL, MTX_DEF);
440170297Sjeff	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
441170297Sjeff	return (0);
442172207Sjeff}
443172207Sjeff
444170297Sjeffstatic void
445170297Sjeffmount_fini(void *mem, int size)
446170297Sjeff{
447212541Smav	struct mount *mp;
448172207Sjeff
449170297Sjeff	mp = (struct mount *)mem;
450146799Sjkoshy	lockdestroy(&mp->mnt_lock);
451233628Sfabient	mtx_destroy(&mp->mnt_mtx);
452146799Sjkoshy}
453146799Sjkoshy
454233628Sfabient/*
455233628Sfabient * Allocate and initialize the mount point struct.
456146799Sjkoshy */
457177859Sjeffstatic struct mount *
45876078Sjhbvfs_mount_alloc(struct vnode *vp, struct vfsconf *vfsp,
45976078Sjhb    const char *fspath, struct thread *td)
46076078Sjhb{
4611541Srgrimes	struct mount *mp;
4621541Srgrimes
4631541Srgrimes	mp = uma_zalloc(mount_zone, M_WAITOK);
464153666Sjhb	bzero(&mp->mnt_startzero,
4651541Srgrimes	    __rangeof(struct mount, mnt_startzero, mnt_endzero));
4661541Srgrimes	TAILQ_INIT(&mp->mnt_nvnodelist);
467177859Sjeff	mp->mnt_nvnodelistsize = 0;
468153666Sjhb	mp->mnt_ref = 0;
469212603Smav	(void) vfs_busy(mp, LK_NOWAIT, 0, td);
470212541Smav	mp->mnt_op = vfsp->vfc_vfsops;
4711541Srgrimes	mp->mnt_vfc = vfsp;
4721541Srgrimes	vfsp->vfc_refcount++;	/* XXX Unlocked */
47376078Sjhb	mp->mnt_stat.f_type = vfsp->vfc_typenum;
47476078Sjhb	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
4751541Srgrimes	strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
476110296Sjake	mp->mnt_vnodecovered = vp;
477153666Sjhb	mp->mnt_cred = crdup(td->td_ucred);
478153666Sjhb	mp->mnt_stat.f_owner = td->td_ucred->cr_uid;
479110296Sjake	strlcpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN);
48087902Sluigi	mp->mnt_iosize_max = DFLTPHYS;
48190550Sluigi#ifdef MAC
48287902Sluigi	mac_init_mount(mp);
483126383Sphk	mac_create_mount(td->td_ucred, mp);
484126383Sphk#endif
485116874Ssmkelly	arc4rand(&mp->mnt_hashseed, sizeof mp->mnt_hashseed, 0);
486126383Sphk	return (mp);
4871541Srgrimes}
4881541Srgrimes
489212541Smav/*
490232783Smav * Destroy the mount struct previously allocated by vfs_mount_alloc().
491212541Smav */
492212541Smavstatic void
493212541Smavvfs_mount_destroy(struct mount *mp)
494212541Smav{
495212541Smav	int i;
496212601Smav
497212601Smav	MNT_ILOCK(mp);
498212601Smav	for (i = 0; mp->mnt_ref && i < 3; i++)
499212601Smav		msleep(mp, MNT_MTX(mp), PVFS, "mntref", hz);
500212541Smav	/*
501212541Smav	 * This will always cause a 3 second delay in rebooting due to
502212541Smav	 * refs on the root mountpoint that never go away.  Most of these
503212541Smav	 * are held by init which never exits.
504212541Smav	 */
505212541Smav	if (i == 3 && (!rebooting || bootverbose))
506212541Smav		printf("Mount point %s had %d dangling refs\n",
507212541Smav		    mp->mnt_stat.f_mntonname, mp->mnt_ref);
508212541Smav	if (mp->mnt_holdcnt != 0) {
509212541Smav		printf("Waiting for mount point to be unheld\n");
510212541Smav		while (mp->mnt_holdcnt != 0) {
511212541Smav			mp->mnt_holdcntwaiters++;
512212541Smav			msleep(&mp->mnt_holdcnt, MNT_MTX(mp),
513212541Smav			       PZERO, "mntdestroy", 0);
514212541Smav			mp->mnt_holdcntwaiters--;
515212541Smav		}
516212541Smav		printf("mount point unheld\n");
517212541Smav	}
518212541Smav	if (mp->mnt_writeopcount > 0) {
519212541Smav		printf("Waiting for mount point write ops\n");
520212541Smav		while (mp->mnt_writeopcount > 0) {
521212541Smav			mp->mnt_kern_flag |= MNTK_SUSPEND;
522212541Smav			msleep(&mp->mnt_writeopcount,
523212541Smav			       MNT_MTX(mp),
524212541Smav			       PZERO, "mntdestroy2", 0);
525212541Smav		}
526212541Smav		printf("mount point write ops completed\n");
527212541Smav	}
528212541Smav	if (mp->mnt_secondary_writes > 0) {
529212541Smav		printf("Waiting for mount point secondary write ops\n");
530212541Smav		while (mp->mnt_secondary_writes > 0) {
531212541Smav			mp->mnt_kern_flag |= MNTK_SUSPEND;
532212541Smav			msleep(&mp->mnt_secondary_writes,
533212541Smav			       MNT_MTX(mp),
534212541Smav			       PZERO, "mntdestroy3", 0);
535212541Smav		}
536212541Smav		printf("mount point secondary write ops completed\n");
537212541Smav	}
538212541Smav	MNT_IUNLOCK(mp);
539212541Smav	mp->mnt_vfc->vfc_refcount--;
540212541Smav	if (!TAILQ_EMPTY(&mp->mnt_nvnodelist)) {
541212541Smav		struct vnode *vp;
542212541Smav
543212541Smav		TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes)
544233628Sfabient			vprint("", vp);
545233628Sfabient		panic("unmount: dangling vnode");
546212541Smav	}
547212541Smav	MNT_ILOCK(mp);
548212541Smav	if (mp->mnt_kern_flag & MNTK_MWAIT)
549212541Smav		wakeup(mp);
550212601Smav	if (mp->mnt_writeopcount != 0)
551212601Smav		panic("vfs_mount_destroy: nonzero writeopcount");
552212603Smav	if (mp->mnt_secondary_writes != 0)
553212541Smav		panic("vfs_mount_destroy: nonzero secondary_writes");
554212601Smav	if (mp->mnt_nvnodelistsize != 0)
555212601Smav		panic("vfs_mount_destroy: nonzero nvnodelistsize");
556212541Smav	mp->mnt_writeopcount = -1000;
557212601Smav	mp->mnt_nvnodelistsize = -1000;
558212601Smav	mp->mnt_secondary_writes = -1000;
559212541Smav	MNT_IUNLOCK(mp);
560212541Smav#ifdef MAC
561212601Smav	mac_destroy_mount(mp);
562212601Smav#endif
563212541Smav	if (mp->mnt_opt != NULL)
564212541Smav		vfs_freeopts(mp->mnt_opt);
565212541Smav	crfree(mp->mnt_cred);
566212541Smav	uma_zfree(mount_zone, mp);
567212541Smav}
568212541Smav
569212541Smavstatic int
570212541Smavvfs_donmount(struct thread *td, int fsflags, struct uio *fsoptions)
571212541Smav{
572212541Smav	struct vfsoptlist *optlist;
573212541Smav	struct vfsopt *opt, *noro_opt;
574212541Smav	char *fstype, *fspath, *errmsg;
575212541Smav	int error, fstypelen, fspathlen, errmsg_len, errmsg_pos;
576212541Smav	int has_rw, has_noro;
577212541Smav
578212541Smav	errmsg_len = 0;
5791541Srgrimes	errmsg_pos = -1;
58034961Sphk	has_rw = 0;
5811541Srgrimes	has_noro = 0;
5821541Srgrimes
58334961Sphk	error = vfs_buildopts(fsoptions, &optlist);
5841541Srgrimes	if (error)
5851541Srgrimes		return (error);
5865081Sbde
5875081Sbde	if (vfs_getopt(optlist, "errmsg", (void **)&errmsg, &errmsg_len) == 0)
5881541Srgrimes		errmsg_pos = vfs_getopt_pos(optlist, "errmsg");
5891541Srgrimes	else
5905081Sbde		errmsg_len = 0;
5915081Sbde
5925081Sbde	/*
5935081Sbde	 * We need these two options before the others,
5945081Sbde	 * and they are mandatory for any filesystem.
5955081Sbde	 * Ensure they are NUL terminated as well.
5961541Srgrimes	 */
5975081Sbde	fstypelen = 0;
5985081Sbde	error = vfs_getopt(optlist, "fstype", (void **)&fstype, &fstypelen);
5995081Sbde	if (error || fstype[fstypelen - 1] != '\0') {
6005081Sbde		error = EINVAL;
6015081Sbde		if (errmsg != NULL)
6025081Sbde			strncpy(errmsg, "Invalid fstype", errmsg_len);
6035081Sbde		goto bail;
6045081Sbde	}
6055081Sbde	fspathlen = 0;
6065081Sbde	error = vfs_getopt(optlist, "fspath", (void **)&fspath, &fspathlen);
6075081Sbde	if (error || fspath[fspathlen - 1] != '\0') {
6081541Srgrimes		error = EINVAL;
60934961Sphk		if (errmsg != NULL)
61034961Sphk			strncpy(errmsg, "Invalid fspath", errmsg_len);
6115081Sbde		goto bail;
6125081Sbde	}
6135081Sbde
6145081Sbde	/*
6155081Sbde	 * We need to see if we have the "update" option
6165081Sbde	 * before we call vfs_domount(), since vfs_domount() has special
61734618Sphk	 * logic based on MNT_UPDATE.  This is very important
61833690Sphk	 * when we want to update the root filesystem.
61933690Sphk	 */
62033690Sphk	TAILQ_FOREACH(opt, optlist, link) {
62134961Sphk		if (strcmp(opt->name, "update") == 0)
6225081Sbde			fsflags |= MNT_UPDATE;
6235081Sbde		else if (strcmp(opt->name, "async") == 0)
6245081Sbde			fsflags |= MNT_ASYNC;
6255081Sbde		else if (strcmp(opt->name, "force") == 0)
6265081Sbde			fsflags |= MNT_FORCE;
6275081Sbde		else if (strcmp(opt->name, "multilabel") == 0)
6285081Sbde			fsflags |= MNT_MULTILABEL;
6295081Sbde		else if (strcmp(opt->name, "noasync") == 0)
6305081Sbde			fsflags &= ~MNT_ASYNC;
6311541Srgrimes		else if (strcmp(opt->name, "noatime") == 0)
6325081Sbde			fsflags |= MNT_NOATIME;
6335081Sbde		else if (strcmp(opt->name, "noclusterr") == 0)
6345081Sbde			fsflags |= MNT_NOCLUSTERR;
63540012Salex		else if (strcmp(opt->name, "noclusterw") == 0)
6361541Srgrimes			fsflags |= MNT_NOCLUSTERW;
6371541Srgrimes		else if (strcmp(opt->name, "noexec") == 0)
6381541Srgrimes			fsflags |= MNT_NOEXEC;
6391541Srgrimes		else if (strcmp(opt->name, "nosuid") == 0)
6401541Srgrimes			fsflags |= MNT_NOSUID;
6411541Srgrimes		else if (strcmp(opt->name, "nosymfollow") == 0)
6421541Srgrimes			fsflags |= MNT_NOSYMFOLLOW;
6431541Srgrimes		else if (strcmp(opt->name, "noro") == 0) {
6441541Srgrimes			fsflags &= ~MNT_RDONLY;
6451541Srgrimes			has_noro = 1;
6461541Srgrimes		}
6471541Srgrimes		else if (strcmp(opt->name, "rw") == 0) {
6481541Srgrimes			fsflags &= ~MNT_RDONLY;
649113874Sjhb			has_rw = 1;
650113874Sjhb		}
651110530Sjulian		else if (strcmp(opt->name, "ro") == 0 ||
652113874Sjhb		    strcmp(opt->name, "rdonly") == 0)
653113874Sjhb			fsflags |= MNT_RDONLY;
654209371Smav		else if (strcmp(opt->name, "snapshot") == 0)
655110296Sjake			fsflags |= MNT_SNAPSHOT;
656110296Sjake		else if (strcmp(opt->name, "suiddir") == 0)
657209371Smav			fsflags |= MNT_SUIDDIR;
6581541Srgrimes		else if (strcmp(opt->name, "sync") == 0)
6591541Srgrimes			fsflags |= MNT_SYNCHRONOUS;
6601541Srgrimes		else if (strcmp(opt->name, "union") == 0)
6611541Srgrimes			fsflags |= MNT_UNION;
6621541Srgrimes	}
6631541Srgrimes
6641541Srgrimes	/*
6651541Srgrimes	 * If "rw" was specified as a mount option, and we
6661541Srgrimes	 * are trying to update a mount-point from "ro" to "rw",
6671541Srgrimes	 * we need a mount option "noro", since in vfs_mergeopts(),
6681541Srgrimes	 * "noro" will cancel "ro", but "rw" will not do anything.
669110530Sjulian	 */
670113874Sjhb	if (has_rw && !has_noro) {
671113874Sjhb		noro_opt = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
672113874Sjhb		noro_opt->name = strdup("noro", M_MOUNT);
673113874Sjhb		noro_opt->value = NULL;
674113874Sjhb		noro_opt->len = 0;
675123740Speter		TAILQ_INSERT_TAIL(optlist, noro_opt, link);
676113874Sjhb	}
677110530Sjulian
678128852Scperciva	/*
679128852Scperciva	 * Be ultra-paranoid about making sure the type and fspath
680113874Sjhb	 * variables will fit in our mp buffers, including the
681209371Smav	 * terminating NUL.
682110296Sjake	 */
683110296Sjake	if (fstypelen >= MFSNAMELEN - 1 || fspathlen >= MNAMELEN - 1) {
684209371Smav		error = ENAMETOOLONG;
6851541Srgrimes		goto bail;
6861541Srgrimes	}
6871541Srgrimes
6881541Srgrimes	mtx_lock(&Giant);
689170174Sjeff	error = vfs_domount(td, fstype, fspath, fsflags, optlist);
690170174Sjeff	mtx_unlock(&Giant);
691170174Sjeffbail:
692110296Sjake	/* copyout the errmsg */
6931541Srgrimes	if (errmsg_pos != -1 && ((2 * errmsg_pos + 1) < fsoptions->uio_iovcnt)
6941541Srgrimes	    && errmsg_len > 0 && errmsg != NULL) {
695153666Sjhb		if (fsoptions->uio_segflg == UIO_SYSSPACE) {
6961541Srgrimes			bcopy(errmsg,
697232783Smav			    fsoptions->uio_iov[2 * errmsg_pos + 1].iov_base,
698232783Smav			    fsoptions->uio_iov[2 * errmsg_pos + 1].iov_len);
699232783Smav		} else {
700232783Smav			copyout(errmsg,
701232783Smav			    fsoptions->uio_iov[2 * errmsg_pos + 1].iov_base,
702232783Smav			    fsoptions->uio_iov[2 * errmsg_pos + 1].iov_len);
703232783Smav		}
70417342Sbde	}
70517342Sbde
706110296Sjake	if (error != 0)
707110296Sjake		vfs_freeopts(optlist);
708110296Sjake	return (error);
709174070Speter}
7101541Srgrimes
711110296Sjake/*
712110296Sjake * ---------------------------------------------------------------------
713110296Sjake * Old mount API.
714174070Speter */
715153666Sjhb#ifndef _SYS_SYSPROTO_H_
71653751Sbdestruct mount_args {
71753751Sbde	char	*type;
7181541Srgrimes	char	*path;
719232783Smav	int	flags;
720130551Sjulian	caddr_t	data;
721232783Smav};
7221541Srgrimes#endif
723232783Smav/* ARGSUSED */
7241541Srgrimesint
7251541Srgrimesmount(td, uap)
7261541Srgrimes	struct thread *td;
7271541Srgrimes	struct mount_args /* {
7281541Srgrimes		char *type;
7291541Srgrimes		char *path;
7301541Srgrimes		int flags;
7311541Srgrimes		caddr_t data;
7321541Srgrimes	} */ *uap;
7331541Srgrimes{
7341541Srgrimes	char *fstype;
7351541Srgrimes	struct vfsconf *vfsp = NULL;
7361541Srgrimes	struct mntarg *ma = NULL;
737151658Sjhb	int error;
738151658Sjhb
739232783Smav	AUDIT_ARG(fflags, uap->flags);
740232783Smav
74165557Sjasone	/* Kick out MNT_ROOTFS early as it is legal internally */
742232783Smav	uap->flags &= ~MNT_ROOTFS;
743232783Smav
744167327Sjulian	fstype = malloc(MFSNAMELEN, M_TEMP, M_WAITOK);
745232783Smav	error = copyinstr(uap->type, fstype, MFSNAMELEN, NULL);
74665557Sjasone	if (error) {
747232783Smav		free(fstype, M_TEMP);
74865557Sjasone		return (error);
7491541Srgrimes	}
7501541Srgrimes
75165782Sjhb	AUDIT_ARG(text, fstype);
752131436Sjhb	mtx_lock(&Giant);
753131436Sjhb	vfsp = vfs_byname_kld(fstype, td, &error);
754170174Sjeff	free(fstype, M_TEMP);
755232783Smav	if (vfsp == NULL) {
756232783Smav		mtx_unlock(&Giant);
757232783Smav		return (ENOENT);
758131436Sjhb	}
759131436Sjhb	if (vfsp->vfc_vfsops->vfs_cmount == NULL) {
760131436Sjhb		mtx_unlock(&Giant);
761187357Sjeff		return (EOPNOTSUPP);
762187357Sjeff	}
763174070Speter
764232783Smav	ma = mount_argsu(ma, "fstype", uap->type, MNAMELEN);
765232783Smav	ma = mount_argsu(ma, "fspath", uap->path, MNAMELEN);
766170297Sjeff	ma = mount_argb(ma, uap->flags & MNT_RDONLY, "noro");
767233628Sfabient	ma = mount_argb(ma, !(uap->flags & MNT_NOSUID), "nosuid");
768233628Sfabient	ma = mount_argb(ma, !(uap->flags & MNT_NOEXEC), "noexec");
769233628Sfabient
770233628Sfabient	error = vfsp->vfc_vfsops->vfs_cmount(ma, uap->data, uap->flags, td);
77176078Sjhb	mtx_unlock(&Giant);
77266716Sjhb	return (error);
77376078Sjhb}
774153666Sjhb
77576078Sjhb
776232783Smav/*
777232783Smav * vfs_domount(): actually attempt a filesystem mount.
778232783Smav */
779232783Smavstatic int
780232783Smavvfs_domount(
781232783Smav	struct thread *td,	/* Flags common to all filesystems. */
782232783Smav	const char *fstype,	/* Filesystem type. */
783110296Sjake	char *fspath,		/* Mount path. */
784110296Sjake	int fsflags,		/* Flags common to all filesystems. */
785110296Sjake	void *fsdata		/* Options local to the filesystem. */
786153490Sjhb	)
787110296Sjake{
78876078Sjhb	struct vnode *vp;
789111032Sjulian	struct mount *mp;
790153666Sjhb	struct vfsconf *vfsp;
791110296Sjake	struct export_args export;
792110296Sjake	int error, flag = 0, kern_flag = 0;
793110296Sjake	struct vattr va;
794110530Sjulian	struct nameidata nd;
795110530Sjulian
796110296Sjake	mtx_assert(&Giant, MA_OWNED);
797113874Sjhb	/*
798232783Smav	 * Be ultra-paranoid about making sure the type and fspath
799110296Sjake	 * variables will fit in our mp buffers, including the
800110296Sjake	 * terminating NUL.
801110296Sjake	 */
802110296Sjake	if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN)
803110296Sjake		return (ENAMETOOLONG);
804110296Sjake
805110296Sjake	if (jailed(td->td_ucred))
806153666Sjhb		return (EPERM);
807153666Sjhb	if (usermount == 0) {
808110296Sjake		if ((error = suser(td)) != 0)
809232783Smav			return (error);
810110296Sjake	}
811110296Sjake
812110296Sjake	/*
813110296Sjake	 * Do not allow NFS export or MNT_SUIDDIR by unprivileged users.
8141541Srgrimes	 */
8151541Srgrimes	if (fsflags & (MNT_EXPORTED | MNT_SUIDDIR)) {
8161541Srgrimes		if ((error = suser(td)) != 0)
8171541Srgrimes			return (error);
8181541Srgrimes	}
81912152Sphk	/*
82062573Sphk	 * Silently enforce MNT_NOSUID and MNT_USER for
8211541Srgrimes	 * unprivileged users.
8221541Srgrimes	 */
8231541Srgrimes	if (suser(td) != 0)
8241541Srgrimes		fsflags |= MNT_NOSUID | MNT_USER;
8251541Srgrimes
82696052Sbde	/* Load KLDs before we lock the covered vnode to avoid reversals. */
8271541Srgrimes	vfsp = NULL;
8281541Srgrimes	if ((fsflags & MNT_UPDATE) == 0) {
8291541Srgrimes		/* Don't try to load KLDs if we're mounting the root. */
8301541Srgrimes		if (fsflags & MNT_ROOTFS)
83112243Sphk			vfsp = vfs_byname(fstype);
8321541Srgrimes		else
8332858Swollman			vfsp = vfs_byname_kld(fstype, td, &error);
834192304Sed		if (vfsp == NULL)
835192304Sed			return (ENODEV);
83688019Sluigi	}
83788019Sluigi	/*
838116874Ssmkelly	 * Get vnode to be covered
839126383Sphk	 */
840126383Sphk	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_SYSSPACE,
841126383Sphk	    fspath, td);
842165260Sn_hibma	if ((error = namei(&nd)) != 0)
843116874Ssmkelly		return (error);
844126383Sphk	NDFREE(&nd, NDF_ONLY_PNBUF);
845116874Ssmkelly	vp = nd.ni_vp;
846126386Sphk	if (fsflags & MNT_UPDATE) {
847165260Sn_hibma		if ((vp->v_vflag & VV_ROOT) == 0) {
848126383Sphk			vput(vp);
849126383Sphk			return (EINVAL);
850165260Sn_hibma		}
851126383Sphk		mp = vp->v_mount;
852126383Sphk		flag = mp->mnt_flag;
853126383Sphk		kern_flag = mp->mnt_kern_flag;
854116874Ssmkelly		/*
855116874Ssmkelly		 * We only allow the filesystem to be reloaded if it
856116874Ssmkelly		 * is currently mounted read-only.
857116874Ssmkelly		 */
858170075Semaste		if ((fsflags & MNT_RELOAD) &&
859116874Ssmkelly		    ((mp->mnt_flag & MNT_RDONLY) == 0)) {
860116874Ssmkelly			vput(vp);
861116874Ssmkelly			return (EOPNOTSUPP);	/* Needs translation */
862116874Ssmkelly		}
863116874Ssmkelly		/*
864209390Sed		 * Only privileged root, or (if MNT_USER is set) the user that
865116874Ssmkelly		 * did the original mount is permitted to update it.
866116874Ssmkelly		 */
867116874Ssmkelly		error = vfs_suser(mp, td);
868116874Ssmkelly		if (error) {
869116874Ssmkelly			vput(vp);
870116874Ssmkelly			return (error);
871225788Smav		}
872157822Sjhb		if (vfs_busy(mp, LK_NOWAIT, 0, td)) {
873116874Ssmkelly			vput(vp);
874116874Ssmkelly			return (EBUSY);
875116874Ssmkelly		}
876116874Ssmkelly		VI_LOCK(vp);
877116874Ssmkelly		if ((vp->v_iflag & VI_MOUNT) != 0 ||
878116874Ssmkelly		    vp->v_mountedhere != NULL) {
879116874Ssmkelly			VI_UNLOCK(vp);
880116908Ssmkelly			vfs_unbusy(mp, td);
881170075Semaste			vput(vp);
882170075Semaste			return (EBUSY);
883170075Semaste		}
884174898Srwatson		vp->v_iflag |= VI_MOUNT;
885170075Semaste		VI_UNLOCK(vp);
886116874Ssmkelly		mp->mnt_flag |= fsflags &
887170075Semaste		    (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT | MNT_ROOTFS);
888116874Ssmkelly		VOP_UNLOCK(vp, 0, td);
889116874Ssmkelly		mp->mnt_optnew = fsdata;
890126383Sphk		vfs_mergeopts(mp->mnt_optnew, mp->mnt_opt);
891	} else {
892		/*
893		 * If the user is not root, ensure that they own the directory
894		 * onto which we are attempting to mount.
895		 */
896		error = VOP_GETATTR(vp, &va, td->td_ucred, td);
897		if (error) {
898			vput(vp);
899			return (error);
900		}
901		if (va.va_uid != td->td_ucred->cr_uid) {
902			if ((error = suser(td)) != 0) {
903				vput(vp);
904				return (error);
905			}
906		}
907		error = vinvalbuf(vp, V_SAVE, td, 0, 0);
908		if (error != 0) {
909			vput(vp);
910			return (error);
911		}
912		if (vp->v_type != VDIR) {
913			vput(vp);
914			return (ENOTDIR);
915		}
916		VI_LOCK(vp);
917		if ((vp->v_iflag & VI_MOUNT) != 0 ||
918		    vp->v_mountedhere != NULL) {
919			VI_UNLOCK(vp);
920			vput(vp);
921			return (EBUSY);
922		}
923		vp->v_iflag |= VI_MOUNT;
924		VI_UNLOCK(vp);
925
926		/*
927		 * Allocate and initialize the filesystem.
928		 */
929		mp = vfs_mount_alloc(vp, vfsp, fspath, td);
930		VOP_UNLOCK(vp, 0, td);
931
932		/* XXXMAC: pass to vfs_mount_alloc? */
933		mp->mnt_optnew = fsdata;
934	}
935
936	/*
937	 * Set the mount level flags.
938	 */
939	if (fsflags & MNT_RDONLY)
940		mp->mnt_flag |= MNT_RDONLY;
941	mp->mnt_flag &=~ MNT_UPDATEMASK;
942	mp->mnt_flag |= fsflags & (MNT_UPDATEMASK | MNT_FORCE | MNT_ROOTFS);
943	/*
944	 * Mount the filesystem.
945	 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
946	 * get.  No freeing of cn_pnbuf.
947	 */
948        error = VFS_MOUNT(mp, td);
949
950	/*
951	 * Process the export option only if we are
952	 * updating mount options.
953	 */
954	if (!error && (fsflags & MNT_UPDATE)) {
955		if (vfs_copyopt(mp->mnt_optnew, "export", &export,
956		    sizeof(export)) == 0)
957			error = vfs_export(mp, &export);
958	}
959
960	if (!error) {
961		if (mp->mnt_opt != NULL)
962			vfs_freeopts(mp->mnt_opt);
963		mp->mnt_opt = mp->mnt_optnew;
964		(void)VFS_STATFS(mp, &mp->mnt_stat, td);
965	}
966	/*
967	 * Prevent external consumers of mount options from reading
968	 * mnt_optnew.
969	*/
970	mp->mnt_optnew = NULL;
971	if (mp->mnt_flag & MNT_UPDATE) {
972		mp->mnt_flag &=
973		    ~(MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_SNAPSHOT);
974		if (error) {
975			mp->mnt_flag = flag;
976			mp->mnt_kern_flag = kern_flag;
977		}
978		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
979			if (mp->mnt_syncer == NULL)
980				error = vfs_allocate_syncvnode(mp);
981		} else {
982			if (mp->mnt_syncer != NULL)
983				vrele(mp->mnt_syncer);
984			mp->mnt_syncer = NULL;
985		}
986		vfs_unbusy(mp, td);
987		VI_LOCK(vp);
988		vp->v_iflag &= ~VI_MOUNT;
989		VI_UNLOCK(vp);
990		vrele(vp);
991		return (error);
992	}
993	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
994	/*
995	 * Put the new filesystem on the mount list after root.
996	 */
997	cache_purge(vp);
998	if (!error) {
999		struct vnode *newdp;
1000
1001		VI_LOCK(vp);
1002		vp->v_iflag &= ~VI_MOUNT;
1003		VI_UNLOCK(vp);
1004		vp->v_mountedhere = mp;
1005		mtx_lock(&mountlist_mtx);
1006		TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
1007		mtx_unlock(&mountlist_mtx);
1008		vfs_event_signal(NULL, VQ_MOUNT, 0);
1009		if (VFS_ROOT(mp, LK_EXCLUSIVE, &newdp, td))
1010			panic("mount: lost mount");
1011		mountcheckdirs(vp, newdp);
1012		vput(newdp);
1013		VOP_UNLOCK(vp, 0, td);
1014		if ((mp->mnt_flag & MNT_RDONLY) == 0)
1015			error = vfs_allocate_syncvnode(mp);
1016		vfs_unbusy(mp, td);
1017		if (error)
1018			vrele(vp);
1019	} else {
1020		VI_LOCK(vp);
1021		vp->v_iflag &= ~VI_MOUNT;
1022		VI_UNLOCK(vp);
1023		vfs_unbusy(mp, td);
1024		vfs_mount_destroy(mp);
1025		vput(vp);
1026	}
1027	return (error);
1028}
1029
1030/*
1031 * ---------------------------------------------------------------------
1032 * Unmount a filesystem.
1033 *
1034 * Note: unmount takes a path to the vnode mounted on as argument,
1035 * not special file (as before).
1036 */
1037#ifndef _SYS_SYSPROTO_H_
1038struct unmount_args {
1039	char	*path;
1040	int	flags;
1041};
1042#endif
1043/* ARGSUSED */
1044int
1045unmount(td, uap)
1046	struct thread *td;
1047	register struct unmount_args /* {
1048		char *path;
1049		int flags;
1050	} */ *uap;
1051{
1052	struct mount *mp;
1053	char *pathbuf;
1054	int error, id0, id1;
1055
1056	if (jailed(td->td_ucred))
1057		return (EPERM);
1058	if (usermount == 0) {
1059		if ((error = suser(td)) != 0)
1060			return (error);
1061	}
1062
1063	pathbuf = malloc(MNAMELEN, M_TEMP, M_WAITOK);
1064	error = copyinstr(uap->path, pathbuf, MNAMELEN, NULL);
1065	if (error) {
1066		free(pathbuf, M_TEMP);
1067		return (error);
1068	}
1069	AUDIT_ARG(upath, td, pathbuf, ARG_UPATH1);
1070	mtx_lock(&Giant);
1071	if (uap->flags & MNT_BYFSID) {
1072		/* Decode the filesystem ID. */
1073		if (sscanf(pathbuf, "FSID:%d:%d", &id0, &id1) != 2) {
1074			mtx_unlock(&Giant);
1075			free(pathbuf, M_TEMP);
1076			return (EINVAL);
1077		}
1078
1079		mtx_lock(&mountlist_mtx);
1080		TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) {
1081			if (mp->mnt_stat.f_fsid.val[0] == id0 &&
1082			    mp->mnt_stat.f_fsid.val[1] == id1)
1083				break;
1084		}
1085		mtx_unlock(&mountlist_mtx);
1086	} else {
1087		mtx_lock(&mountlist_mtx);
1088		TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) {
1089			if (strcmp(mp->mnt_stat.f_mntonname, pathbuf) == 0)
1090				break;
1091		}
1092		mtx_unlock(&mountlist_mtx);
1093	}
1094	free(pathbuf, M_TEMP);
1095	if (mp == NULL) {
1096		/*
1097		 * Previously we returned ENOENT for a nonexistent path and
1098		 * EINVAL for a non-mountpoint.  We cannot tell these apart
1099		 * now, so in the !MNT_BYFSID case return the more likely
1100		 * EINVAL for compatibility.
1101		 */
1102		mtx_unlock(&Giant);
1103		return ((uap->flags & MNT_BYFSID) ? ENOENT : EINVAL);
1104	}
1105
1106	/*
1107	 * Only privileged root, or (if MNT_USER is set) the user that did the
1108	 * original mount is permitted to unmount this filesystem.
1109	 */
1110	error = vfs_suser(mp, td);
1111	if (error) {
1112		mtx_unlock(&Giant);
1113		return (error);
1114	}
1115
1116	/*
1117	 * Don't allow unmounting the root filesystem.
1118	 */
1119	if (mp->mnt_flag & MNT_ROOTFS) {
1120		mtx_unlock(&Giant);
1121		return (EINVAL);
1122	}
1123	error = dounmount(mp, uap->flags, td);
1124	mtx_unlock(&Giant);
1125	return (error);
1126}
1127
1128/*
1129 * Do the actual filesystem unmount.
1130 */
1131int
1132dounmount(mp, flags, td)
1133	struct mount *mp;
1134	int flags;
1135	struct thread *td;
1136{
1137	struct vnode *coveredvp, *fsrootvp;
1138	int error;
1139	int async_flag;
1140
1141	mtx_assert(&Giant, MA_OWNED);
1142
1143	if ((coveredvp = mp->mnt_vnodecovered) != NULL)
1144		vn_lock(coveredvp, LK_EXCLUSIVE | LK_RETRY, td);
1145	MNT_ILOCK(mp);
1146	if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
1147		MNT_IUNLOCK(mp);
1148		if (coveredvp)
1149			VOP_UNLOCK(coveredvp, 0, td);
1150		return (EBUSY);
1151	}
1152	mp->mnt_kern_flag |= MNTK_UNMOUNT;
1153	/* Allow filesystems to detect that a forced unmount is in progress. */
1154	if (flags & MNT_FORCE)
1155		mp->mnt_kern_flag |= MNTK_UNMOUNTF;
1156	error = lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK |
1157	    ((flags & MNT_FORCE) ? 0 : LK_NOWAIT), MNT_MTX(mp), td);
1158	if (error) {
1159		MNT_ILOCK(mp);
1160		mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
1161		if (mp->mnt_kern_flag & MNTK_MWAIT)
1162			wakeup(mp);
1163		MNT_IUNLOCK(mp);
1164		if (coveredvp)
1165			VOP_UNLOCK(coveredvp, 0, td);
1166		return (error);
1167	}
1168	vn_start_write(NULL, &mp, V_WAIT);
1169
1170	if (mp->mnt_flag & MNT_EXPUBLIC)
1171		vfs_setpublicfs(NULL, NULL, NULL);
1172
1173	vfs_msync(mp, MNT_WAIT);
1174	async_flag = mp->mnt_flag & MNT_ASYNC;
1175	mp->mnt_flag &= ~MNT_ASYNC;
1176	cache_purgevfs(mp);	/* remove cache entries for this file sys */
1177	if (mp->mnt_syncer != NULL)
1178		vrele(mp->mnt_syncer);
1179	/*
1180	 * For forced unmounts, move process cdir/rdir refs on the fs root
1181	 * vnode to the covered vnode.  For non-forced unmounts we want
1182	 * such references to cause an EBUSY error.
1183	 */
1184	if ((flags & MNT_FORCE) &&
1185	    VFS_ROOT(mp, LK_EXCLUSIVE, &fsrootvp, td) == 0) {
1186		if (mp->mnt_vnodecovered != NULL)
1187			mountcheckdirs(fsrootvp, mp->mnt_vnodecovered);
1188		if (fsrootvp == rootvnode) {
1189			vrele(rootvnode);
1190			rootvnode = NULL;
1191		}
1192		vput(fsrootvp);
1193	}
1194	if (((mp->mnt_flag & MNT_RDONLY) ||
1195	     (error = VFS_SYNC(mp, MNT_WAIT, td)) == 0) ||
1196	    (flags & MNT_FORCE)) {
1197		error = VFS_UNMOUNT(mp, flags, td);
1198	}
1199	vn_finished_write(mp);
1200	if (error) {
1201		/* Undo cdir/rdir and rootvnode changes made above. */
1202		if ((flags & MNT_FORCE) &&
1203		    VFS_ROOT(mp, LK_EXCLUSIVE, &fsrootvp, td) == 0) {
1204			if (mp->mnt_vnodecovered != NULL)
1205				mountcheckdirs(mp->mnt_vnodecovered, fsrootvp);
1206			if (rootvnode == NULL) {
1207				rootvnode = fsrootvp;
1208				vref(rootvnode);
1209			}
1210			vput(fsrootvp);
1211		}
1212		if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL)
1213			(void) vfs_allocate_syncvnode(mp);
1214		MNT_ILOCK(mp);
1215		mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
1216		mp->mnt_flag |= async_flag;
1217		lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, td);
1218		if (mp->mnt_kern_flag & MNTK_MWAIT)
1219			wakeup(mp);
1220		MNT_IUNLOCK(mp);
1221		if (coveredvp)
1222			VOP_UNLOCK(coveredvp, 0, td);
1223		return (error);
1224	}
1225	mtx_lock(&mountlist_mtx);
1226	TAILQ_REMOVE(&mountlist, mp, mnt_list);
1227	mtx_unlock(&mountlist_mtx);
1228	if (coveredvp != NULL) {
1229		coveredvp->v_mountedhere = NULL;
1230		vput(coveredvp);
1231	}
1232	vfs_event_signal(NULL, VQ_UNMOUNT, 0);
1233	lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, td);
1234	vfs_mount_destroy(mp);
1235	return (0);
1236}
1237
1238/*
1239 * ---------------------------------------------------------------------
1240 * Mounting of root filesystem
1241 *
1242 */
1243
1244struct root_hold_token {
1245	const char 			*who;
1246	LIST_ENTRY(root_hold_token)	list;
1247};
1248
1249static LIST_HEAD(, root_hold_token)	root_holds =
1250    LIST_HEAD_INITIALIZER(&root_holds);
1251
1252struct root_hold_token *
1253root_mount_hold(const char *identifier)
1254{
1255	struct root_hold_token *h;
1256
1257	h = malloc(sizeof *h, M_DEVBUF, M_ZERO | M_WAITOK);
1258	h->who = identifier;
1259	mtx_lock(&mountlist_mtx);
1260	LIST_INSERT_HEAD(&root_holds, h, list);
1261	mtx_unlock(&mountlist_mtx);
1262	return (h);
1263}
1264
1265void
1266root_mount_rel(struct root_hold_token *h)
1267{
1268
1269	mtx_lock(&mountlist_mtx);
1270	LIST_REMOVE(h, list);
1271	wakeup(&root_holds);
1272	mtx_unlock(&mountlist_mtx);
1273	free(h, M_DEVBUF);
1274}
1275
1276static void
1277root_mount_wait(void)
1278{
1279	struct root_hold_token *h;
1280
1281	for (;;) {
1282		DROP_GIANT();
1283		g_waitidle();
1284		PICKUP_GIANT();
1285		mtx_lock(&mountlist_mtx);
1286		if (LIST_EMPTY(&root_holds)) {
1287			mtx_unlock(&mountlist_mtx);
1288			break;
1289		}
1290		printf("Root mount waiting for:");
1291		LIST_FOREACH(h, &root_holds, list)
1292			printf(" %s", h->who);
1293		printf("\n");
1294		msleep(&root_holds, &mountlist_mtx, PZERO | PDROP, "roothold",
1295		    hz);
1296	}
1297}
1298
1299static void
1300set_rootvnode(struct thread *td)
1301{
1302	struct proc *p;
1303
1304	if (VFS_ROOT(TAILQ_FIRST(&mountlist), LK_EXCLUSIVE, &rootvnode, td))
1305		panic("Cannot find root vnode");
1306
1307	p = td->td_proc;
1308	FILEDESC_LOCK(p->p_fd);
1309
1310	if (p->p_fd->fd_cdir != NULL)
1311		vrele(p->p_fd->fd_cdir);
1312	p->p_fd->fd_cdir = rootvnode;
1313	VREF(rootvnode);
1314
1315	if (p->p_fd->fd_rdir != NULL)
1316		vrele(p->p_fd->fd_rdir);
1317	p->p_fd->fd_rdir = rootvnode;
1318	VREF(rootvnode);
1319
1320	FILEDESC_UNLOCK(p->p_fd);
1321
1322	VOP_UNLOCK(rootvnode, 0, td);
1323}
1324
1325/*
1326 * Mount /devfs as our root filesystem, but do not put it on the mountlist
1327 * yet.  Create a /dev -> / symlink so that absolute pathnames will lookup.
1328 */
1329
1330static void
1331devfs_first(void)
1332{
1333	struct thread *td = curthread;
1334	struct vfsoptlist *opts;
1335	struct vfsconf *vfsp;
1336	struct mount *mp = NULL;
1337	int error;
1338
1339	vfsp = vfs_byname("devfs");
1340	KASSERT(vfsp != NULL, ("Could not find devfs by name"));
1341	if (vfsp == NULL)
1342		return;
1343
1344	mp = vfs_mount_alloc(NULLVP, vfsp, "/dev", td);
1345
1346	error = VFS_MOUNT(mp, td);
1347	KASSERT(error == 0, ("VFS_MOUNT(devfs) failed %d", error));
1348	if (error)
1349		return;
1350
1351	opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK);
1352	TAILQ_INIT(opts);
1353	mp->mnt_opt = opts;
1354
1355	mtx_lock(&mountlist_mtx);
1356	TAILQ_INSERT_HEAD(&mountlist, mp, mnt_list);
1357	mtx_unlock(&mountlist_mtx);
1358
1359	set_rootvnode(td);
1360
1361	error = kern_symlink(td, "/", "dev", UIO_SYSSPACE);
1362	if (error)
1363		printf("kern_symlink /dev -> / returns %d\n", error);
1364}
1365
1366/*
1367 * Surgically move our devfs to be mounted on /dev.
1368 */
1369
1370static void
1371devfs_fixup(struct thread *td)
1372{
1373	struct nameidata nd;
1374	int error;
1375	struct vnode *vp, *dvp;
1376	struct mount *mp;
1377
1378	/* Remove our devfs mount from the mountlist and purge the cache */
1379	mtx_lock(&mountlist_mtx);
1380	mp = TAILQ_FIRST(&mountlist);
1381	TAILQ_REMOVE(&mountlist, mp, mnt_list);
1382	mtx_unlock(&mountlist_mtx);
1383	cache_purgevfs(mp);
1384
1385	VFS_ROOT(mp, LK_EXCLUSIVE, &dvp, td);
1386	VI_LOCK(dvp);
1387	dvp->v_iflag &= ~VI_MOUNT;
1388	dvp->v_mountedhere = NULL;
1389	VI_UNLOCK(dvp);
1390
1391	/* Set up the real rootvnode, and purge the cache */
1392	TAILQ_FIRST(&mountlist)->mnt_vnodecovered = NULL;
1393	set_rootvnode(td);
1394	cache_purgevfs(rootvnode->v_mount);
1395
1396	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, "/dev", td);
1397	error = namei(&nd);
1398	if (error) {
1399		printf("Lookup of /dev for devfs, error: %d\n", error);
1400		return;
1401	}
1402	NDFREE(&nd, NDF_ONLY_PNBUF);
1403	vp = nd.ni_vp;
1404	if (vp->v_type != VDIR) {
1405		vput(vp);
1406	}
1407	error = vinvalbuf(vp, V_SAVE, td, 0, 0);
1408	if (error) {
1409		vput(vp);
1410	}
1411	cache_purge(vp);
1412	mp->mnt_vnodecovered = vp;
1413	vp->v_mountedhere = mp;
1414	mtx_lock(&mountlist_mtx);
1415	TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
1416	mtx_unlock(&mountlist_mtx);
1417	VOP_UNLOCK(vp, 0, td);
1418	vput(dvp);
1419	vfs_unbusy(mp, td);
1420
1421	/* Unlink the no longer needed /dev/dev -> / symlink */
1422	kern_unlink(td, "/dev/dev", UIO_SYSSPACE);
1423}
1424
1425/*
1426 * Report errors during filesystem mounting.
1427 */
1428void
1429vfs_mount_error(struct mount *mp, const char *fmt, ...)
1430{
1431	struct vfsoptlist *moptlist = mp->mnt_optnew;
1432	va_list ap;
1433	int error, len;
1434	char *errmsg;
1435
1436	error = vfs_getopt(moptlist, "errmsg", (void **)&errmsg, &len);
1437	if (error || errmsg == NULL || len <= 0)
1438		return;
1439
1440	va_start(ap, fmt);
1441	vsnprintf(errmsg, (size_t)len, fmt, ap);
1442	va_end(ap);
1443}
1444
1445/*
1446 * Find and mount the root filesystem
1447 */
1448void
1449vfs_mountroot(void)
1450{
1451	char *cp;
1452	int error, i, asked = 0;
1453
1454	root_mount_wait();
1455
1456	mount_zone = uma_zcreate("Mountpoints", sizeof(struct mount),
1457	    NULL, NULL, mount_init, mount_fini,
1458	    UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
1459	devfs_first();
1460
1461	/*
1462	 * We are booted with instructions to prompt for the root filesystem.
1463	 */
1464	if (boothowto & RB_ASKNAME) {
1465		if (!vfs_mountroot_ask())
1466			return;
1467		asked = 1;
1468	}
1469
1470	/*
1471	 * The root filesystem information is compiled in, and we are
1472	 * booted with instructions to use it.
1473	 */
1474	if (ctrootdevname != NULL && (boothowto & RB_DFLTROOT)) {
1475		if (!vfs_mountroot_try(ctrootdevname))
1476			return;
1477		ctrootdevname = NULL;
1478	}
1479
1480	/*
1481	 * We've been given the generic "use CDROM as root" flag.  This is
1482	 * necessary because one media may be used in many different
1483	 * devices, so we need to search for them.
1484	 */
1485	if (boothowto & RB_CDROM) {
1486		for (i = 0; cdrom_rootdevnames[i] != NULL; i++) {
1487			if (!vfs_mountroot_try(cdrom_rootdevnames[i]))
1488				return;
1489		}
1490	}
1491
1492	/*
1493	 * Try to use the value read by the loader from /etc/fstab, or
1494	 * supplied via some other means.  This is the preferred
1495	 * mechanism.
1496	 */
1497	cp = getenv("vfs.root.mountfrom");
1498	if (cp != NULL) {
1499		error = vfs_mountroot_try(cp);
1500		freeenv(cp);
1501		if (!error)
1502			return;
1503	}
1504
1505	/*
1506	 * Try values that may have been computed by code during boot
1507	 */
1508	if (!vfs_mountroot_try(rootdevnames[0]))
1509		return;
1510	if (!vfs_mountroot_try(rootdevnames[1]))
1511		return;
1512
1513	/*
1514	 * If we (still) have a compiled-in default, try it.
1515	 */
1516	if (ctrootdevname != NULL)
1517		if (!vfs_mountroot_try(ctrootdevname))
1518			return;
1519	/*
1520	 * Everything so far has failed, prompt on the console if we haven't
1521	 * already tried that.
1522	 */
1523	if (!asked)
1524		if (!vfs_mountroot_ask())
1525			return;
1526
1527	panic("Root mount failed, startup aborted.");
1528}
1529
1530/*
1531 * Mount (mountfrom) as the root filesystem.
1532 */
1533static int
1534vfs_mountroot_try(const char *mountfrom)
1535{
1536	struct mount	*mp;
1537	char		*vfsname, *path;
1538	time_t		timebase;
1539	int		error;
1540	char		patt[32];
1541
1542	vfsname = NULL;
1543	path    = NULL;
1544	mp      = NULL;
1545	error   = EINVAL;
1546
1547	if (mountfrom == NULL)
1548		return (error);		/* don't complain */
1549	printf("Trying to mount root from %s\n", mountfrom);
1550
1551	/* parse vfs name and path */
1552	vfsname = malloc(MFSNAMELEN, M_MOUNT, M_WAITOK);
1553	path = malloc(MNAMELEN, M_MOUNT, M_WAITOK);
1554	vfsname[0] = path[0] = 0;
1555	sprintf(patt, "%%%d[a-z0-9]:%%%ds", MFSNAMELEN, MNAMELEN);
1556	if (sscanf(mountfrom, patt, vfsname, path) < 1)
1557		goto out;
1558
1559	if (path[0] == '\0')
1560		strcpy(path, ROOTNAME);
1561
1562	error = kernel_vmount(
1563	    MNT_RDONLY | MNT_ROOTFS,
1564	    "fstype", vfsname,
1565	    "fspath", "/",
1566	    "from", path,
1567	    NULL);
1568	if (error == 0) {
1569		/*
1570		 * We mount devfs prior to mounting the / FS, so the first
1571		 * entry will typically be devfs.
1572		 */
1573		mp = TAILQ_FIRST(&mountlist);
1574		KASSERT(mp != NULL, ("%s: mountlist is empty", __func__));
1575
1576		/*
1577		 * Iterate over all currently mounted file systems and use
1578		 * the time stamp found to check and/or initialize the RTC.
1579		 * Typically devfs has no time stamp and the only other FS
1580		 * is the actual / FS.
1581		 * Call inittodr() only once and pass it the largest of the
1582		 * timestamps we encounter.
1583		 */
1584		timebase = 0;
1585		do {
1586			if (mp->mnt_time > timebase)
1587				timebase = mp->mnt_time;
1588			mp = TAILQ_NEXT(mp, mnt_list);
1589		} while (mp != NULL);
1590		inittodr(timebase);
1591
1592		devfs_fixup(curthread);
1593	}
1594out:
1595	free(path, M_MOUNT);
1596	free(vfsname, M_MOUNT);
1597	return (error);
1598}
1599
1600/*
1601 * ---------------------------------------------------------------------
1602 * Interactive root filesystem selection code.
1603 */
1604
1605static int
1606vfs_mountroot_ask(void)
1607{
1608	char name[128];
1609
1610	for(;;) {
1611		printf("\nManual root filesystem specification:\n");
1612		printf("  <fstype>:<device>  Mount <device> using filesystem <fstype>\n");
1613#if defined(__amd64__) || defined(__i386__) || defined(__ia64__)
1614		printf("                       eg. ufs:da0s1a\n");
1615#else
1616		printf("                       eg. ufs:/dev/da0a\n");
1617#endif
1618		printf("  ?                  List valid disk boot devices\n");
1619		printf("  <empty line>       Abort manual input\n");
1620		printf("\nmountroot> ");
1621		gets(name, sizeof(name), 1);
1622		if (name[0] == '\0')
1623			return (1);
1624		if (name[0] == '?') {
1625			printf("\nList of GEOM managed disk devices:\n  ");
1626			g_dev_print();
1627			continue;
1628		}
1629		if (!vfs_mountroot_try(name))
1630			return (0);
1631	}
1632}
1633
1634/*
1635 * ---------------------------------------------------------------------
1636 * Functions for querying mount options/arguments from filesystems.
1637 */
1638
1639/*
1640 * Check that no unknown options are given
1641 */
1642int
1643vfs_filteropt(struct vfsoptlist *opts, const char **legal)
1644{
1645	struct vfsopt *opt;
1646	const char **t, *p;
1647
1648
1649	TAILQ_FOREACH(opt, opts, link) {
1650		p = opt->name;
1651		if (p[0] == 'n' && p[1] == 'o')
1652			p += 2;
1653		for(t = global_opts; *t != NULL; t++)
1654			if (!strcmp(*t, p))
1655				break;
1656		if (*t != NULL)
1657			continue;
1658		for(t = legal; *t != NULL; t++)
1659			if (!strcmp(*t, p))
1660				break;
1661		if (*t != NULL)
1662			continue;
1663		printf("mount option <%s> is unknown\n", p);
1664		return (EINVAL);
1665	}
1666	return (0);
1667}
1668
1669/*
1670 * Get a mount option by its name.
1671 *
1672 * Return 0 if the option was found, ENOENT otherwise.
1673 * If len is non-NULL it will be filled with the length
1674 * of the option. If buf is non-NULL, it will be filled
1675 * with the address of the option.
1676 */
1677int
1678vfs_getopt(opts, name, buf, len)
1679	struct vfsoptlist *opts;
1680	const char *name;
1681	void **buf;
1682	int *len;
1683{
1684	struct vfsopt *opt;
1685
1686	KASSERT(opts != NULL, ("vfs_getopt: caller passed 'opts' as NULL"));
1687
1688	TAILQ_FOREACH(opt, opts, link) {
1689		if (strcmp(name, opt->name) == 0) {
1690			if (len != NULL)
1691				*len = opt->len;
1692			if (buf != NULL)
1693				*buf = opt->value;
1694			return (0);
1695		}
1696	}
1697	return (ENOENT);
1698}
1699
1700static int
1701vfs_getopt_pos(struct vfsoptlist *opts, const char *name)
1702{
1703	struct vfsopt *opt;
1704	int i;
1705
1706	if (opts == NULL)
1707		return (-1);
1708
1709	i = 0;
1710	TAILQ_FOREACH(opt, opts, link) {
1711		if (strcmp(name, opt->name) == 0)
1712			return (i);
1713		++i;
1714	}
1715	return (-1);
1716}
1717
1718char *
1719vfs_getopts(struct vfsoptlist *opts, const char *name, int *error)
1720{
1721	struct vfsopt *opt;
1722
1723	*error = 0;
1724	TAILQ_FOREACH(opt, opts, link) {
1725		if (strcmp(name, opt->name) != 0)
1726			continue;
1727		if (((char *)opt->value)[opt->len - 1] != '\0') {
1728			*error = EINVAL;
1729			return (NULL);
1730		}
1731		return (opt->value);
1732	}
1733	return (NULL);
1734}
1735
1736int
1737vfs_flagopt(struct vfsoptlist *opts, const char *name, u_int *w, u_int val)
1738{
1739	struct vfsopt *opt;
1740
1741	TAILQ_FOREACH(opt, opts, link) {
1742		if (strcmp(name, opt->name) == 0) {
1743			if (w != NULL)
1744				*w |= val;
1745			return (1);
1746		}
1747	}
1748	if (w != NULL)
1749		*w &= ~val;
1750	return (0);
1751}
1752
1753int
1754vfs_scanopt(struct vfsoptlist *opts, const char *name, const char *fmt, ...)
1755{
1756	va_list ap;
1757	struct vfsopt *opt;
1758	int ret;
1759
1760	KASSERT(opts != NULL, ("vfs_getopt: caller passed 'opts' as NULL"));
1761
1762	TAILQ_FOREACH(opt, opts, link) {
1763		if (strcmp(name, opt->name) != 0)
1764			continue;
1765		if (((char *)opt->value)[opt->len - 1] != '\0')
1766			return (0);
1767		va_start(ap, fmt);
1768		ret = vsscanf(opt->value, fmt, ap);
1769		va_end(ap);
1770		return (ret);
1771	}
1772	return (0);
1773}
1774
1775/*
1776 * Find and copy a mount option.
1777 *
1778 * The size of the buffer has to be specified
1779 * in len, if it is not the same length as the
1780 * mount option, EINVAL is returned.
1781 * Returns ENOENT if the option is not found.
1782 */
1783int
1784vfs_copyopt(opts, name, dest, len)
1785	struct vfsoptlist *opts;
1786	const char *name;
1787	void *dest;
1788	int len;
1789{
1790	struct vfsopt *opt;
1791
1792	KASSERT(opts != NULL, ("vfs_copyopt: caller passed 'opts' as NULL"));
1793
1794	TAILQ_FOREACH(opt, opts, link) {
1795		if (strcmp(name, opt->name) == 0) {
1796			if (len != opt->len)
1797				return (EINVAL);
1798			bcopy(opt->value, dest, opt->len);
1799			return (0);
1800		}
1801	}
1802	return (ENOENT);
1803}
1804
1805/*
1806 * This is a helper function for filesystems to traverse their
1807 * vnodes.  See MNT_VNODE_FOREACH() in sys/mount.h
1808 */
1809
1810struct vnode *
1811__mnt_vnode_next(struct vnode **mvp, struct mount *mp)
1812{
1813	struct vnode *vp;
1814
1815	mtx_assert(MNT_MTX(mp), MA_OWNED);
1816
1817	KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch"));
1818	vp = TAILQ_NEXT(*mvp, v_nmntvnodes);
1819	while (vp != NULL && vp->v_type == VMARKER)
1820		vp = TAILQ_NEXT(vp, v_nmntvnodes);
1821
1822	/* Check if we are done */
1823	if (vp == NULL) {
1824		__mnt_vnode_markerfree(mvp, mp);
1825		return (NULL);
1826	}
1827	TAILQ_REMOVE(&mp->mnt_nvnodelist, *mvp, v_nmntvnodes);
1828	TAILQ_INSERT_AFTER(&mp->mnt_nvnodelist, vp, *mvp, v_nmntvnodes);
1829	return (vp);
1830}
1831
1832struct vnode *
1833__mnt_vnode_first(struct vnode **mvp, struct mount *mp)
1834{
1835	struct vnode *vp;
1836
1837	mtx_assert(MNT_MTX(mp), MA_OWNED);
1838
1839	vp = TAILQ_FIRST(&mp->mnt_nvnodelist);
1840	while (vp != NULL && vp->v_type == VMARKER)
1841		vp = TAILQ_NEXT(vp, v_nmntvnodes);
1842
1843	/* Check if we are done */
1844	if (vp == NULL) {
1845		*mvp = NULL;
1846		return (NULL);
1847	}
1848	mp->mnt_holdcnt++;
1849	MNT_IUNLOCK(mp);
1850	*mvp = (struct vnode *) malloc(sizeof(struct vnode),
1851				       M_VNODE_MARKER,
1852				       M_WAITOK | M_ZERO);
1853	MNT_ILOCK(mp);
1854	(*mvp)->v_type = VMARKER;
1855
1856	vp = TAILQ_FIRST(&mp->mnt_nvnodelist);
1857	while (vp != NULL && vp->v_type == VMARKER)
1858		vp = TAILQ_NEXT(vp, v_nmntvnodes);
1859
1860	/* Check if we are done */
1861	if (vp == NULL) {
1862		MNT_IUNLOCK(mp);
1863		free(*mvp, M_VNODE_MARKER);
1864		MNT_ILOCK(mp);
1865		*mvp = NULL;
1866		mp->mnt_holdcnt--;
1867		if (mp->mnt_holdcnt == 0 && mp->mnt_holdcntwaiters != 0)
1868			wakeup(&mp->mnt_holdcnt);
1869		return (NULL);
1870	}
1871	mp->mnt_markercnt++;
1872	(*mvp)->v_mount = mp;
1873	TAILQ_INSERT_AFTER(&mp->mnt_nvnodelist, vp, *mvp, v_nmntvnodes);
1874	return (vp);
1875}
1876
1877
1878void
1879__mnt_vnode_markerfree(struct vnode **mvp, struct mount *mp)
1880{
1881
1882	if (*mvp == NULL)
1883		return;
1884
1885	mtx_assert(MNT_MTX(mp), MA_OWNED);
1886
1887	KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch"));
1888	TAILQ_REMOVE(&mp->mnt_nvnodelist, *mvp, v_nmntvnodes);
1889	MNT_IUNLOCK(mp);
1890	free(*mvp, M_VNODE_MARKER);
1891	MNT_ILOCK(mp);
1892	*mvp = NULL;
1893
1894	mp->mnt_markercnt--;
1895	mp->mnt_holdcnt--;
1896	if (mp->mnt_holdcnt == 0 && mp->mnt_holdcntwaiters != 0)
1897		wakeup(&mp->mnt_holdcnt);
1898}
1899
1900
1901int
1902__vfs_statfs(struct mount *mp, struct statfs *sbp, struct thread *td)
1903{
1904	int error;
1905
1906	error = mp->mnt_op->vfs_statfs(mp, &mp->mnt_stat, td);
1907	if (sbp != &mp->mnt_stat)
1908		*sbp = mp->mnt_stat;
1909	return (error);
1910}
1911
1912void
1913vfs_mountedfrom(struct mount *mp, const char *from)
1914{
1915
1916	bzero(mp->mnt_stat.f_mntfromname, sizeof mp->mnt_stat.f_mntfromname);
1917	strlcpy(mp->mnt_stat.f_mntfromname, from,
1918	    sizeof mp->mnt_stat.f_mntfromname);
1919}
1920
1921/*
1922 * ---------------------------------------------------------------------
1923 * This is the api for building mount args and mounting filesystems from
1924 * inside the kernel.
1925 *
1926 * The API works by accumulation of individual args.  First error is
1927 * latched.
1928 *
1929 * XXX: should be documented in new manpage kernel_mount(9)
1930 */
1931
1932/* A memory allocation which must be freed when we are done */
1933struct mntaarg {
1934	SLIST_ENTRY(mntaarg)	next;
1935};
1936
1937/* The header for the mount arguments */
1938struct mntarg {
1939	struct iovec *v;
1940	int len;
1941	int error;
1942	SLIST_HEAD(, mntaarg)	list;
1943};
1944
1945/*
1946 * Add a boolean argument.
1947 *
1948 * flag is the boolean value.
1949 * name must start with "no".
1950 */
1951struct mntarg *
1952mount_argb(struct mntarg *ma, int flag, const char *name)
1953{
1954
1955	KASSERT(name[0] == 'n' && name[1] == 'o',
1956	    ("mount_argb(...,%s): name must start with 'no'", name));
1957
1958	return (mount_arg(ma, name + (flag ? 2 : 0), NULL, 0));
1959}
1960
1961/*
1962 * Add an argument printf style
1963 */
1964struct mntarg *
1965mount_argf(struct mntarg *ma, const char *name, const char *fmt, ...)
1966{
1967	va_list ap;
1968	struct mntaarg *maa;
1969	struct sbuf *sb;
1970	int len;
1971
1972	if (ma == NULL) {
1973		ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO);
1974		SLIST_INIT(&ma->list);
1975	}
1976	if (ma->error)
1977		return (ma);
1978
1979	ma->v = realloc(ma->v, sizeof *ma->v * (ma->len + 2),
1980	    M_MOUNT, M_WAITOK);
1981	ma->v[ma->len].iov_base = (void *)(uintptr_t)name;
1982	ma->v[ma->len].iov_len = strlen(name) + 1;
1983	ma->len++;
1984
1985	sb = sbuf_new(NULL, NULL, 0, SBUF_AUTOEXTEND);
1986	va_start(ap, fmt);
1987	sbuf_vprintf(sb, fmt, ap);
1988	va_end(ap);
1989	sbuf_finish(sb);
1990	len = sbuf_len(sb) + 1;
1991	maa = malloc(sizeof *maa + len, M_MOUNT, M_WAITOK | M_ZERO);
1992	SLIST_INSERT_HEAD(&ma->list, maa, next);
1993	bcopy(sbuf_data(sb), maa + 1, len);
1994	sbuf_delete(sb);
1995
1996	ma->v[ma->len].iov_base = maa + 1;
1997	ma->v[ma->len].iov_len = len;
1998	ma->len++;
1999
2000	return (ma);
2001}
2002
2003/*
2004 * Add an argument which is a userland string.
2005 */
2006struct mntarg *
2007mount_argsu(struct mntarg *ma, const char *name, const void *val, int len)
2008{
2009	struct mntaarg *maa;
2010	char *tbuf;
2011
2012	if (val == NULL)
2013		return (ma);
2014	if (ma == NULL) {
2015		ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO);
2016		SLIST_INIT(&ma->list);
2017	}
2018	if (ma->error)
2019		return (ma);
2020	maa = malloc(sizeof *maa + len, M_MOUNT, M_WAITOK | M_ZERO);
2021	SLIST_INSERT_HEAD(&ma->list, maa, next);
2022	tbuf = (void *)(maa + 1);
2023	ma->error = copyinstr(val, tbuf, len, NULL);
2024	return (mount_arg(ma, name, tbuf, -1));
2025}
2026
2027/*
2028 * Plain argument.
2029 *
2030 * If length is -1, use printf.
2031 */
2032struct mntarg *
2033mount_arg(struct mntarg *ma, const char *name, const void *val, int len)
2034{
2035
2036	if (ma == NULL) {
2037		ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO);
2038		SLIST_INIT(&ma->list);
2039	}
2040	if (ma->error)
2041		return (ma);
2042
2043	ma->v = realloc(ma->v, sizeof *ma->v * (ma->len + 2),
2044	    M_MOUNT, M_WAITOK);
2045	ma->v[ma->len].iov_base = (void *)(uintptr_t)name;
2046	ma->v[ma->len].iov_len = strlen(name) + 1;
2047	ma->len++;
2048
2049	ma->v[ma->len].iov_base = (void *)(uintptr_t)val;
2050	if (len < 0)
2051		ma->v[ma->len].iov_len = strlen(val) + 1;
2052	else
2053		ma->v[ma->len].iov_len = len;
2054	ma->len++;
2055	return (ma);
2056}
2057
2058/*
2059 * Free a mntarg structure
2060 */
2061static void
2062free_mntarg(struct mntarg *ma)
2063{
2064	struct mntaarg *maa;
2065
2066	while (!SLIST_EMPTY(&ma->list)) {
2067		maa = SLIST_FIRST(&ma->list);
2068		SLIST_REMOVE_HEAD(&ma->list, next);
2069		free(maa, M_MOUNT);
2070	}
2071	free(ma->v, M_MOUNT);
2072	free(ma, M_MOUNT);
2073}
2074
2075/*
2076 * Mount a filesystem
2077 */
2078int
2079kernel_mount(struct mntarg *ma, int flags)
2080{
2081	struct uio auio;
2082	int error;
2083
2084	KASSERT(ma != NULL, ("kernel_mount NULL ma"));
2085	KASSERT(ma->v != NULL, ("kernel_mount NULL ma->v"));
2086	KASSERT(!(ma->len & 1), ("kernel_mount odd ma->len (%d)", ma->len));
2087
2088	auio.uio_iov = ma->v;
2089	auio.uio_iovcnt = ma->len;
2090	auio.uio_segflg = UIO_SYSSPACE;
2091
2092	error = ma->error;
2093	if (!error)
2094		error = vfs_donmount(curthread, flags, &auio);
2095	free_mntarg(ma);
2096	return (error);
2097}
2098
2099/*
2100 * A printflike function to mount a filesystem.
2101 */
2102int
2103kernel_vmount(int flags, ...)
2104{
2105	struct mntarg *ma = NULL;
2106	va_list ap;
2107	const char *cp;
2108	const void *vp;
2109	int error;
2110
2111	va_start(ap, flags);
2112	for (;;) {
2113		cp = va_arg(ap, const char *);
2114		if (cp == NULL)
2115			break;
2116		vp = va_arg(ap, const void *);
2117		ma = mount_arg(ma, cp, vp, -1);
2118	}
2119	va_end(ap);
2120
2121	error = kernel_mount(ma, flags);
2122	return (error);
2123}
2124