vfs_mount.c revision 139804
1133819Stjr/*-
2133819Stjr * Copyright (c) 1999-2004 Poul-Henning Kamp
3133819Stjr * Copyright (c) 1999 Michael Smith
4133819Stjr * Copyright (c) 1989, 1993
5133819Stjr *	The Regents of the University of California.  All rights reserved.
6133819Stjr * (c) UNIX System Laboratories, Inc.
7133819Stjr * All or some portions of this file are derived from material licensed
8133819Stjr * to the University of California by American Telephone and Telegraph
9133819Stjr * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10133819Stjr * the permission of UNIX System Laboratories, Inc.
11133819Stjr *
12133819Stjr * Redistribution and use in source and binary forms, with or without
13133819Stjr * modification, are permitted provided that the following conditions
14133819Stjr * are met:
15133819Stjr * 1. Redistributions of source code must retain the above copyright
16133819Stjr *    notice, this list of conditions and the following disclaimer.
17133819Stjr * 2. Redistributions in binary form must reproduce the above copyright
18133819Stjr *    notice, this list of conditions and the following disclaimer in the
19133819Stjr *    documentation and/or other materials provided with the distribution.
20133819Stjr * 4. Neither the name of the University nor the names of its contributors
21133819Stjr *    may be used to endorse or promote products derived from this software
22133819Stjr *    without specific prior written permission.
23133819Stjr *
24133819Stjr * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
25133819Stjr * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26133819Stjr * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27133819Stjr * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
28133819Stjr * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29133819Stjr * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30133819Stjr * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31133819Stjr * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32133819Stjr * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33133819Stjr * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34133819Stjr * SUCH DAMAGE.
35133819Stjr */
36133819Stjr
37165832Snetchild#include <sys/cdefs.h>
38165832Snetchild__FBSDID("$FreeBSD: head/sys/kern/vfs_mount.c 139804 2005-01-06 23:35:40Z imp $");
39162954Sphk
40142057Sjhb#include <sys/param.h>
41161310Snetchild#include <sys/conf.h>
42133819Stjr#include <sys/cons.h>
43133819Stjr#include <sys/jail.h>
44133819Stjr#include <sys/kernel.h>
45133819Stjr#include <sys/mac.h>
46166729Sjkim#include <sys/malloc.h>
47133819Stjr#include <sys/mount.h>
48133819Stjr#include <sys/mutex.h>
49133819Stjr#include <sys/namei.h>
50166188Sjeff#include <sys/proc.h>
51133819Stjr#include <sys/filedesc.h>
52133819Stjr#include <sys/reboot.h>
53133819Stjr#include <sys/syscallsubr.h>
54133819Stjr#include <sys/sysproto.h>
55133819Stjr#include <sys/sx.h>
56168035Sjkim#include <sys/sysctl.h>
57166729Sjkim#include <sys/sysent.h>
58168035Sjkim#include <sys/systm.h>
59168035Sjkim#include <sys/vnode.h>
60133819Stjr
61133819Stjr#include <geom/geom.h>
62133819Stjr
63133819Stjr#include <machine/stdarg.h>
64133819Stjr
65210431Skib#include "opt_rootdevname.h"
66133819Stjr#include "opt_ddb.h"
67133819Stjr#include "opt_mac.h"
68133819Stjr
69133819Stjr#ifdef DDB
70133819Stjr#include <ddb/ddb.h>
71161474Snetchild#endif
72133819Stjr
73133819Stjr#define	ROOTNAME		"root_device"
74133819Stjr#define	VFS_MOUNTARG_SIZE_MAX	(1024 * 64)
75133819Stjr
76133819Stjrstatic void	gets(char *cp);
77133819Stjrstatic int	vfs_domount(struct thread *td, const char *fstype,
78133819Stjr		    char *fspath, int fsflags, void *fsdata);
79133819Stjrstatic int	vfs_mount_alloc(struct vnode *dvp, struct vfsconf *vfsp,
80133819Stjr		    const char *fspath, struct thread *td, struct mount **mpp);
81133819Stjrstatic int	vfs_mountroot_ask(void);
82133819Stjrstatic int	vfs_mountroot_try(const char *mountfrom);
83133819Stjrstatic int	vfs_donmount(struct thread *td, int fsflags,
84133819Stjr		    struct uio *fsoptions);
85133819Stjr
86133819Stjrstatic int	usermount = 0;
87133819StjrSYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0,
88133819Stjr    "Unprivileged users may mount and unmount file systems");
89133819Stjr
90133819StjrMALLOC_DEFINE(M_MOUNT, "mount", "vfs mount structure");
91133819Stjr
92133819Stjr/* List of mounted filesystems. */
93198554Sjhbstruct mntlist mountlist = TAILQ_HEAD_INITIALIZER(mountlist);
94198554Sjhb
95198554Sjhb/* For any iteration/modification of mountlist */
96198554Sjhbstruct mtx mountlist_mtx;
97133819Stjr
98133819StjrTAILQ_HEAD(vfsoptlist, vfsopt);
99133819Stjrstruct vfsopt {
100133819Stjr	TAILQ_ENTRY(vfsopt) link;
101133819Stjr	char	*name;
102133819Stjr	void	*value;
103133819Stjr	int	len;
104133819Stjr};
105133819Stjr
106133819Stjr/*
107133819Stjr * The vnode of the system's root (/ in the filesystem, without chroot
108133819Stjr * active.)
109142057Sjhb */
110142057Sjhbstruct vnode	*rootvnode;
111142057Sjhb
112142057Sjhb/*
113142057Sjhb * The root filesystem is detailed in the kernel environment variable
114142057Sjhb * vfs.root.mountfrom, which is expected to be in the general format
115142057Sjhb *
116142057Sjhb * <vfsname>:[<path>]
117142057Sjhb * vfsname   := the name of a VFS known to the kernel and capable
118142057Sjhb *              of being mounted as root
119142057Sjhb * path      := disk device name or other data used by the filesystem
120142057Sjhb *              to locate its physical store
121142057Sjhb */
122142057Sjhb
123210431Skib/*
124210431Skib * Global opts, taken by all filesystems
125142057Sjhb */
126142057Sjhbstatic const char *global_opts[] = {
127142057Sjhb	"fstype",
128161474Snetchild	"fspath",
129168063Sjkim	"ro",
130161474Snetchild	"suid",
131161474Snetchild	"exec",
132168063Sjkim	NULL
133161474Snetchild};
134161474Snetchild
135168063Sjkim/*
136142057Sjhb * The root specifiers we will try if RB_CDROM is specified.
137142057Sjhb */
138142057Sjhbstatic char *cdrom_rootdevnames[] = {
139185438Skib	"cd9660:cd0",
140133819Stjr	"cd9660:acd0",
141144449Sjhb	NULL
142185438Skib};
143133819Stjr
144185438Skib/* legacy find-root code */
145144449Sjhbchar		*rootdevnames[2] = {NULL, NULL};
146144449Sjhb#ifndef ROOTDEVNAME
147185438Skib#  define ROOTDEVNAME NULL
148144449Sjhb#endif
149133819Stjrconst char	*ctrootdevname = ROOTDEVNAME;
150144449Sjhb
151144449Sjhb/*
152133819Stjr * ---------------------------------------------------------------------
153144449Sjhb * Functions for building and sanitizing the mount options
154168844Sjkim */
155144449Sjhb
156144449Sjhb/* Remove one mount option. */
157185438Skibstatic void
158144449Sjhbvfs_freeopt(struct vfsoptlist *opts, struct vfsopt *opt)
159144449Sjhb{
160144449Sjhb
161144449Sjhb	TAILQ_REMOVE(opts, opt, link);
162144449Sjhb	free(opt->name, M_MOUNT);
163144449Sjhb	if (opt->value != NULL)
164133819Stjr		free(opt->value, M_MOUNT);
165144449Sjhb#ifdef INVARIANTS
166144449Sjhb	else if (opt->len != 0)
167144449Sjhb		panic("%s: mount option with NULL value but length != 0",
168144449Sjhb		    __func__);
169144449Sjhb#endif
170144449Sjhb	free(opt, M_MOUNT);
171144449Sjhb}
172144449Sjhb
173144449Sjhb/* Release all resources related to the mount options. */
174144449Sjhbstatic void
175144449Sjhbvfs_freeopts(struct vfsoptlist *opts)
176144449Sjhb{
177144449Sjhb	struct vfsopt *opt;
178144449Sjhb
179144449Sjhb	while (!TAILQ_EMPTY(opts)) {
180144449Sjhb		opt = TAILQ_FIRST(opts);
181133819Stjr		vfs_freeopt(opts, opt);
182144449Sjhb	}
183185438Skib	free(opts, M_MOUNT);
184185438Skib}
185185438Skib
186185438Skib/*
187185438Skib * Check if options are equal (with or without the "no" prefix).
188185438Skib */
189185438Skibstatic int
190185438Skibvfs_equalopts(const char *opt1, const char *opt2)
191185438Skib{
192185438Skib
193185438Skib	/* "opt" vs. "opt" or "noopt" vs. "noopt" */
194185438Skib	if (strcmp(opt1, opt2) == 0)
195185438Skib		return (1);
196185438Skib	/* "noopt" vs. "opt" */
197185438Skib	if (strncmp(opt1, "no", 2) == 0 && strcmp(opt1 + 2, opt2) == 0)
198185438Skib		return (1);
199185438Skib	/* "opt" vs. "noopt" */
200185438Skib	if (strncmp(opt2, "no", 2) == 0 && strcmp(opt1, opt2 + 2) == 0)
201185438Skib		return (1);
202185438Skib	return (0);
203185438Skib}
204185438Skib
205185438Skib/*
206185438Skib * If a mount option is specified several times,
207185438Skib * (with or without the "no" prefix) only keep
208185438Skib * the last occurence of it.
209185438Skib */
210185438Skibstatic void
211144449Sjhbvfs_sanitizeopts(struct vfsoptlist *opts)
212144449Sjhb{
213144449Sjhb	struct vfsopt *opt, *opt2, *tmp;
214144449Sjhb
215133819Stjr	TAILQ_FOREACH_REVERSE(opt, opts, vfsoptlist, link) {
216144449Sjhb		opt2 = TAILQ_PREV(opt, vfsoptlist, link);
217144449Sjhb		while (opt2 != NULL) {
218144449Sjhb			if (vfs_equalopts(opt->name, opt2->name)) {
219144449Sjhb				tmp = TAILQ_PREV(opt2, vfsoptlist, link);
220144449Sjhb				vfs_freeopt(opts, opt2);
221133819Stjr				opt2 = tmp;
222133819Stjr			} else {
223133819Stjr				opt2 = TAILQ_PREV(opt2, vfsoptlist, link);
224133819Stjr			}
225133819Stjr		}
226133819Stjr	}
227144449Sjhb}
228144449Sjhb
229133819Stjr/*
230144449Sjhb * Build a linked list of mount options from a struct uio.
231144449Sjhb */
232144449Sjhbstatic int
233144449Sjhbvfs_buildopts(struct uio *auio, struct vfsoptlist **options)
234144449Sjhb{
235133819Stjr	struct vfsoptlist *opts;
236133819Stjr	struct vfsopt *opt;
237133819Stjr	size_t memused;
238133819Stjr	unsigned int i, iovcnt;
239133819Stjr	int error, namelen, optlen;
240133819Stjr
241133819Stjr	opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK);
242133819Stjr	TAILQ_INIT(opts);
243133819Stjr	memused = 0;
244133819Stjr	iovcnt = auio->uio_iovcnt;
245133819Stjr	for (i = 0; i < iovcnt; i += 2) {
246133819Stjr		opt = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
247133819Stjr		namelen = auio->uio_iov[i].iov_len;
248133819Stjr		optlen = auio->uio_iov[i + 1].iov_len;
249133819Stjr		opt->name = malloc(namelen, M_MOUNT, M_WAITOK);
250133819Stjr		opt->value = NULL;
251133819Stjr		opt->len = 0;
252133819Stjr
253133819Stjr		/*
254133819Stjr		 * Do this early, so jumps to "bad" will free the current
255133819Stjr		 * option.
256133819Stjr		 */
257133819Stjr		TAILQ_INSERT_TAIL(opts, opt, link);
258133819Stjr		memused += sizeof(struct vfsopt) + optlen + namelen;
259133819Stjr
260133819Stjr		/*
261133819Stjr		 * Avoid consuming too much memory, and attempts to overflow
262133819Stjr		 * memused.
263133819Stjr		 */
264133819Stjr		if (memused > VFS_MOUNTARG_SIZE_MAX ||
265133819Stjr		    optlen > VFS_MOUNTARG_SIZE_MAX ||
266133819Stjr		    namelen > VFS_MOUNTARG_SIZE_MAX) {
267133819Stjr			error = EINVAL;
268133819Stjr			goto bad;
269133819Stjr		}
270133819Stjr
271133819Stjr		if (auio->uio_segflg == UIO_SYSSPACE) {
272133819Stjr			bcopy(auio->uio_iov[i].iov_base, opt->name, namelen);
273133819Stjr		} else {
274133819Stjr			error = copyin(auio->uio_iov[i].iov_base, opt->name,
275133819Stjr			    namelen);
276133819Stjr			if (error)
277133819Stjr				goto bad;
278133819Stjr		}
279133819Stjr		/* Ensure names are null-terminated strings. */
280133819Stjr		if (opt->name[namelen - 1] != '\0') {
281133819Stjr			error = EINVAL;
282133819Stjr			goto bad;
283133819Stjr		}
284133819Stjr		if (optlen != 0) {
285133819Stjr			opt->len = optlen;
286133819Stjr			opt->value = malloc(optlen, M_MOUNT, M_WAITOK);
287133819Stjr			if (auio->uio_segflg == UIO_SYSSPACE) {
288133819Stjr				bcopy(auio->uio_iov[i + 1].iov_base, opt->value,
289133819Stjr				    optlen);
290133819Stjr			} else {
291133819Stjr				error = copyin(auio->uio_iov[i + 1].iov_base,
292133819Stjr				    opt->value, optlen);
293133819Stjr				if (error)
294133819Stjr					goto bad;
295133819Stjr			}
296133819Stjr		}
297133819Stjr	}
298133819Stjr	vfs_sanitizeopts(opts);
299133819Stjr	*options = opts;
300133819Stjr	return (0);
301133819Stjrbad:
302133819Stjr	vfs_freeopts(opts);
303133819Stjr	return (error);
304133819Stjr}
305133819Stjr
306133819Stjr/*
307133819Stjr * Merge the old mount options with the new ones passed
308133819Stjr * in the MNT_UPDATE case.
309133819Stjr */
310133819Stjrstatic void
311133819Stjrvfs_mergeopts(struct vfsoptlist *toopts, struct vfsoptlist *opts)
312133819Stjr{
313133819Stjr	struct vfsopt *opt, *opt2, *new;
314133819Stjr
315133819Stjr	TAILQ_FOREACH(opt, opts, link) {
316133819Stjr		/*
317133819Stjr		 * Check that this option hasn't been redefined
318133819Stjr		 * nor cancelled with a "no" mount option.
319133819Stjr		 */
320133819Stjr		opt2 = TAILQ_FIRST(toopts);
321133819Stjr		while (opt2 != NULL) {
322133819Stjr			if (strcmp(opt2->name, opt->name) == 0)
323133819Stjr				goto next;
324133819Stjr			if (strncmp(opt2->name, "no", 2) == 0 &&
325133819Stjr			    strcmp(opt2->name + 2, opt->name) == 0) {
326133819Stjr				vfs_freeopt(toopts, opt2);
327133819Stjr				goto next;
328133819Stjr			}
329144441Sjhb			opt2 = TAILQ_NEXT(opt2, link);
330133819Stjr		}
331133819Stjr		/* We want this option, duplicate it. */
332133819Stjr		new = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
333133819Stjr		new->name = malloc(strlen(opt->name) + 1, M_MOUNT, M_WAITOK);
334133819Stjr		strcpy(new->name, opt->name);
335133819Stjr		if (opt->len != 0) {
336133819Stjr			new->value = malloc(opt->len, M_MOUNT, M_WAITOK);
337133819Stjr			bcopy(opt->value, new->value, opt->len);
338133819Stjr		} else {
339133819Stjr			new->value = NULL;
340133819Stjr		}
341133819Stjr		new->len = opt->len;
342133819Stjr		TAILQ_INSERT_TAIL(toopts, new, link);
343133819Stjrnext:
344133819Stjr		continue;
345133819Stjr	}
346133819Stjr}
347133819Stjr
348133819Stjr/*
349133819Stjr * ---------------------------------------------------------------------
350133819Stjr * Mount a filesystem
351133819Stjr */
352133819Stjrint
353133819Stjrnmount(td, uap)
354133819Stjr	struct thread *td;
355133819Stjr	struct nmount_args /* {
356133819Stjr		struct iovec *iovp;
357133819Stjr		unsigned int iovcnt;
358133819Stjr		int flags;
359133819Stjr	} */ *uap;
360133819Stjr{
361133819Stjr	struct uio *auio;
362133819Stjr	struct iovec *iov;
363133819Stjr	unsigned int i;
364133819Stjr	int error;
365133819Stjr	u_int iovcnt;
366133819Stjr
367133819Stjr	/* Kick out MNT_ROOTFS early as it is legal internally */
368133819Stjr	if (uap->flags & MNT_ROOTFS)
369133819Stjr		return (EINVAL);
370133819Stjr
371133819Stjr	iovcnt = uap->iovcnt;
372133819Stjr	/*
373133819Stjr	 * Check that we have an even number of iovec's
374133819Stjr	 * and that we have at least two options.
375133819Stjr	 */
376133819Stjr	if ((iovcnt & 1) || (iovcnt < 4))
377133819Stjr		return (EINVAL);
378133819Stjr
379133819Stjr	error = copyinuio(uap->iovp, iovcnt, &auio);
380133819Stjr	if (error)
381133819Stjr		return (error);
382133819Stjr	iov = auio->uio_iov;
383133819Stjr	for (i = 0; i < iovcnt; i++) {
384133819Stjr		if (iov->iov_len > MMAXOPTIONLEN) {
385133819Stjr			free(auio, M_IOV);
386133819Stjr			return (EINVAL);
387133819Stjr		}
388133819Stjr		iov++;
389166150Snetchild	}
390166150Snetchild	error = vfs_donmount(td, uap->flags, auio);
391133819Stjr	free(auio, M_IOV);
392133819Stjr	return (error);
393133819Stjr}
394133819Stjr
395133819Stjr/*
396133819Stjr * ---------------------------------------------------------------------
397166150Snetchild * Various utility functions
398133819Stjr */
399168063Sjkim
400166150Snetchild/*
401166150Snetchild * Allocate and initialize the mount point struct.
402166150Snetchild */
403166150Snetchildstatic int
404133819Stjrvfs_mount_alloc(struct vnode *vp, struct vfsconf *vfsp,
405133819Stjr    const char *fspath, struct thread *td, struct mount **mpp)
406133819Stjr{
407161474Snetchild	struct mount *mp;
408161474Snetchild
409161474Snetchild	mp = malloc(sizeof(struct mount), M_MOUNT, M_WAITOK | M_ZERO);
410161474Snetchild	TAILQ_INIT(&mp->mnt_nvnodelist);
411166150Snetchild	mp->mnt_nvnodelistsize = 0;
412166150Snetchild	mtx_init(&mp->mnt_mtx, "struct mount mtx", NULL, MTX_DEF);
413168063Sjkim	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE);
414168063Sjkim	vfs_busy(mp, LK_NOWAIT, 0, td);
415168063Sjkim	mp->mnt_op = vfsp->vfc_vfsops;
416170307Sjeff	mp->mnt_vfc = vfsp;
417166150Snetchild	vfsp->vfc_refcount++;
418166188Sjeff	mp->mnt_stat.f_type = vfsp->vfc_typenum;
419170307Sjeff	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
420166150Snetchild	strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
421133819Stjr	mp->mnt_vnodecovered = vp;
422133819Stjr	mp->mnt_cred = crdup(td->td_ucred);
423133819Stjr	mp->mnt_stat.f_owner = td->td_ucred->cr_uid;
424133819Stjr	strlcpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN);
425133819Stjr	mp->mnt_iosize_max = DFLTPHYS;
426133819Stjr#ifdef MAC
427133819Stjr	mac_init_mount(mp);
428161611Snetchild	mac_create_mount(td->td_ucred, mp);
429166150Snetchild#endif
430133819Stjr	*mpp = mp;
431133819Stjr	return (0);
432133819Stjr}
433133819Stjr
434133819Stjr/*
435133819Stjr * Destroy the mount struct previously allocated by vfs_mount_alloc().
436168063Sjkim */
437166150Snetchildvoid
438133819Stjrvfs_mount_destroy(struct mount *mp, struct thread *td)
439161611Snetchild{
440161611Snetchild
441161611Snetchild	mp->mnt_vfc->vfc_refcount--;
442161611Snetchild	if (!TAILQ_EMPTY(&mp->mnt_nvnodelist))
443133819Stjr		panic("unmount: dangling vnode");
444133819Stjr	vfs_unbusy(mp,td);
445133819Stjr	lockdestroy(&mp->mnt_lock);
446161474Snetchild	mtx_destroy(&mp->mnt_mtx);
447161474Snetchild	if (mp->mnt_kern_flag & MNTK_MWAIT)
448161474Snetchild		wakeup(mp);
449166150Snetchild#ifdef MAC
450166150Snetchild	mac_destroy_mount(mp);
451166150Snetchild#endif
452166150Snetchild	if (mp->mnt_opt != NULL)
453166150Snetchild		vfs_freeopts(mp->mnt_opt);
454166150Snetchild	crfree(mp->mnt_cred);
455168063Sjkim	free(mp, M_MOUNT);
456168848Sjkim}
457168848Sjkim
458168848Sjkimstatic int
459170307Sjeffvfs_donmount(struct thread *td, int fsflags, struct uio *fsoptions)
460166150Snetchild{
461166188Sjeff	struct vfsoptlist *optlist;
462170307Sjeff	char *fstype, *fspath;
463166150Snetchild	int error, fstypelen, fspathlen;
464161611Snetchild
465161611Snetchild	error = vfs_buildopts(fsoptions, &optlist);
466161611Snetchild	if (error)
467188750Skib		return (error);
468161611Snetchild
469168063Sjkim	/*
470133819Stjr	 * We need these two options before the others,
471133819Stjr	 * and they are mandatory for any filesystem.
472133819Stjr	 * Ensure they are NUL terminated as well.
473133819Stjr	 */
474133819Stjr	fstypelen = 0;
475133819Stjr	error = vfs_getopt(optlist, "fstype", (void **)&fstype, &fstypelen);
476133819Stjr	if (error || fstype[fstypelen - 1] != '\0') {
477133819Stjr		error = EINVAL;
478133819Stjr		goto bail;
479133819Stjr	}
480161474Snetchild	fspathlen = 0;
481133819Stjr	error = vfs_getopt(optlist, "fspath", (void **)&fspath, &fspathlen);
482133819Stjr	if (error || fspath[fspathlen - 1] != '\0') {
483133819Stjr		error = EINVAL;
484168063Sjkim		goto bail;
485168063Sjkim	}
486168063Sjkim
487133819Stjr	/*
488133819Stjr	 * Be ultra-paranoid about making sure the type and fspath
489133819Stjr	 * variables will fit in our mp buffers, including the
490133819Stjr	 * terminating NUL.
491169458Skan	 */
492169458Skan	if (fstypelen >= MFSNAMELEN - 1 || fspathlen >= MNAMELEN - 1) {
493169458Skan		error = ENAMETOOLONG;
494169458Skan		goto bail;
495169458Skan	}
496133819Stjr
497133819Stjr	mtx_lock(&Giant);
498167157Sjkim	error = vfs_domount(td, fstype, fspath, fsflags, optlist);
499133819Stjr	mtx_unlock(&Giant);
500167157Sjkimbail:
501133819Stjr	if (error)
502168063Sjkim		vfs_freeopts(optlist);
503168063Sjkim	return (error);
504168063Sjkim}
505168848Sjkim
506168848Sjkim/*
507163371Snetchild * ---------------------------------------------------------------------
508167157Sjkim * Old mount API.
509133819Stjr */
510133819Stjr#ifndef _SYS_SYSPROTO_H_
511161474Snetchildstruct mount_args {
512161474Snetchild	char	*type;
513161474Snetchild	char	*path;
514161474Snetchild	int	flags;
515161474Snetchild	caddr_t	data;
516166944Snetchild};
517161474Snetchild#endif
518161474Snetchild/* ARGSUSED */
519161474Snetchildint
520161474Snetchildmount(td, uap)
521161474Snetchild	struct thread *td;
522161474Snetchild	struct mount_args /* {
523161474Snetchild		char *type;
524167157Sjkim		char *path;
525161474Snetchild		int flags;
526161474Snetchild		caddr_t data;
527168056Sjkim	} */ *uap;
528168056Sjkim{
529168056Sjkim	char *fstype;
530168056Sjkim	struct vfsconf *vfsp = NULL;
531133819Stjr	struct mntarg *ma = NULL;
532133819Stjr	int error;
533133819Stjr
534166395Skib	/* Kick out MNT_ROOTFS early as it is legal internally */
535167157Sjkim	uap->flags &= ~MNT_ROOTFS;
536166395Skib
537166395Skib	if (uap->data == NULL)
538166395Skib		return (EINVAL);
539166395Skib
540166395Skib	fstype = malloc(MFSNAMELEN, M_TEMP, M_WAITOK);
541166395Skib	error = copyinstr(uap->type, fstype, MFSNAMELEN, NULL);
542168063Sjkim	if (!error) {
543161474Snetchild		mtx_lock(&Giant);	/* XXX ? */
544161474Snetchild		vfsp = vfs_byname_kld(fstype, td, &error);
545161474Snetchild		mtx_unlock(&Giant);
546165867Snetchild	}
547161474Snetchild	free(fstype, M_TEMP);
548161474Snetchild	if (error)
549133819Stjr		return (error);
550167157Sjkim	if (vfsp == NULL)
551161474Snetchild		return (ENOENT);
552161696Snetchild	if (vfsp->vfc_vfsops->vfs_cmount == NULL)
553161474Snetchild		return (EOPNOTSUPP);
554161696Snetchild
555161474Snetchild	ma = mount_argsu(ma, "fstype", uap->type, MNAMELEN);
556168063Sjkim	ma = mount_argsu(ma, "fspath", uap->path, MNAMELEN);
557161474Snetchild	ma = mount_argb(ma, uap->flags & MNT_RDONLY, "noro");
558161474Snetchild	ma = mount_argb(ma, !(uap->flags & MNT_NOSUID), "nosuid");
559167157Sjkim	ma = mount_argb(ma, !(uap->flags & MNT_NOEXEC), "noexec");
560161474Snetchild
561161474Snetchild	error = vfsp->vfc_vfsops->vfs_cmount(ma, uap->data, uap->flags, td);
562161474Snetchild	return (error);
563161474Snetchild}
564167157Sjkim
565161474Snetchild
566161474Snetchild/*
567161474Snetchild * vfs_domount(): actually attempt a filesystem mount.
568161696Snetchild */
569161474Snetchildstatic int
570161474Snetchildvfs_domount(
571168056Sjkim	struct thread *td,	/* Flags common to all filesystems. */
572168056Sjkim	const char *fstype,	/* Filesystem type. */
573168056Sjkim	char *fspath,		/* Mount path. */
574168056Sjkim	int fsflags,		/* Flags common to all filesystems. */
575168056Sjkim	void *fsdata		/* Options local to the filesystem. */
576168056Sjkim	)
577168056Sjkim{
578133819Stjr	struct vnode *vp;
579133819Stjr	struct mount *mp;
580133819Stjr	struct vfsconf *vfsp;
581133819Stjr	int error, flag = 0, kern_flag = 0;
582168063Sjkim	struct vattr va;
583168063Sjkim	struct nameidata nd;
584168063Sjkim
585168063Sjkim	mtx_assert(&Giant, MA_OWNED);
586161310Snetchild
587161310Snetchild	/*
588168063Sjkim	 * Be ultra-paranoid about making sure the type and fspath
589133819Stjr	 * variables will fit in our mp buffers, including the
590167157Sjkim	 * terminating NUL.
591168035Sjkim	 */
592168035Sjkim	if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN)
593216634Sjkim		return (ENAMETOOLONG);
594168848Sjkim
595168035Sjkim	if (jailed(td->td_ucred))
596168848Sjkim		return (EPERM);
597168035Sjkim	if (usermount == 0) {
598168035Sjkim		if ((error = suser(td)) != 0)
599168035Sjkim			return (error);
600168035Sjkim	}
601168035Sjkim
602168848Sjkim	/*
603168035Sjkim	 * Do not allow NFS export or MNT_SUIDDIR by unprivileged users.
604168035Sjkim	 */
605168035Sjkim	if (fsflags & (MNT_EXPORTED | MNT_SUIDDIR)) {
606168035Sjkim		if ((error = suser(td)) != 0)
607168035Sjkim			return (error);
608168035Sjkim	}
609168035Sjkim	/*
610168035Sjkim	 * Silently enforce MNT_NOSUID and MNT_USER for
611168035Sjkim	 * unprivileged users.
612168035Sjkim	 */
613168035Sjkim	if (suser(td) != 0)
614168035Sjkim		fsflags |= MNT_NOSUID | MNT_USER;
615168035Sjkim	/*
616168035Sjkim	 * Get vnode to be covered
617168035Sjkim	 */
618168035Sjkim	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath, td);
619168035Sjkim	if ((error = namei(&nd)) != 0)
620168035Sjkim		return (error);
621168035Sjkim	NDFREE(&nd, NDF_ONLY_PNBUF);
622168035Sjkim	vp = nd.ni_vp;
623216634Sjkim	if (fsflags & MNT_UPDATE) {
624216634Sjkim		if ((vp->v_vflag & VV_ROOT) == 0) {
625216634Sjkim			vput(vp);
626190620Skib			return (EINVAL);
627216634Sjkim		}
628168035Sjkim		mp = vp->v_mount;
629161474Snetchild		flag = mp->mnt_flag;
630161474Snetchild		kern_flag = mp->mnt_kern_flag;
631133819Stjr		/*
632133819Stjr		 * We only allow the filesystem to be reloaded if it
633168063Sjkim		 * is currently mounted read-only.
634168063Sjkim		 */
635168063Sjkim		if ((fsflags & MNT_RELOAD) &&
636133819Stjr		    ((mp->mnt_flag & MNT_RDONLY) == 0)) {
637167157Sjkim			vput(vp);
638166150Snetchild			return (EOPNOTSUPP);	/* Needs translation */
639166150Snetchild		}
640166150Snetchild		/*
641166150Snetchild		 * Only privileged root, or (if MNT_USER is set) the user that
642133819Stjr		 * did the original mount is permitted to update it.
643133819Stjr		 */
644133819Stjr		error = vfs_suser(mp, td);
645133819Stjr		if (error) {
646170307Sjeff			vput(vp);
647133819Stjr			return (error);
648166188Sjeff		}
649170307Sjeff		if (vfs_busy(mp, LK_NOWAIT, 0, td)) {
650133819Stjr			vput(vp);
651133819Stjr			return (EBUSY);
652133819Stjr		}
653168063Sjkim		VI_LOCK(vp);
654167157Sjkim		if ((vp->v_iflag & VI_MOUNT) != 0 ||
655168063Sjkim		    vp->v_mountedhere != NULL) {
656168063Sjkim			VI_UNLOCK(vp);
657163374Snetchild			vfs_unbusy(mp, td);
658188750Skib			vput(vp);
659163374Snetchild			return (EBUSY);
660163374Snetchild		}
661163374Snetchild		vp->v_iflag |= VI_MOUNT;
662133819Stjr		VI_UNLOCK(vp);
663133819Stjr		mp->mnt_flag |= fsflags &
664133819Stjr		    (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT | MNT_ROOTFS);
665133819Stjr		VOP_UNLOCK(vp, 0, td);
666133819Stjr		mp->mnt_optnew = fsdata;
667133819Stjr		vfs_mergeopts(mp->mnt_optnew, mp->mnt_opt);
668133819Stjr	} else {
669133819Stjr		/*
670133819Stjr		 * If the user is not root, ensure that they own the directory
671133819Stjr		 * onto which we are attempting to mount.
672133819Stjr		 */
673133819Stjr		error = VOP_GETATTR(vp, &va, td->td_ucred, td);
674168063Sjkim		if (error) {
675168063Sjkim			vput(vp);
676133819Stjr			return (error);
677133819Stjr		}
678133819Stjr		if (va.va_uid != td->td_ucred->cr_uid) {
679198554Sjhb			if ((error = suser(td)) != 0) {
680198554Sjhb				vput(vp);
681198554Sjhb				return (error);
682133819Stjr			}
683133819Stjr		}
684133819Stjr		error = vinvalbuf(vp, V_SAVE, td->td_ucred, td, 0, 0);
685133819Stjr		if (error != 0) {
686133819Stjr			vput(vp);
687133819Stjr			return (error);
688133819Stjr		}
689133819Stjr		if (vp->v_type != VDIR) {
690133819Stjr			vput(vp);
691133819Stjr			return (ENOTDIR);
692133819Stjr		}
693133819Stjr		vfsp = vfs_byname_kld(fstype, td, &error);
694133819Stjr		if (vfsp == NULL) {
695133819Stjr			vput(vp);
696168063Sjkim			return (error);
697168063Sjkim		}
698168063Sjkim		VI_LOCK(vp);
699133819Stjr		if ((vp->v_iflag & VI_MOUNT) != 0 ||
700133819Stjr		    vp->v_mountedhere != NULL) {
701198554Sjhb			VI_UNLOCK(vp);
702198554Sjhb			vput(vp);
703198554Sjhb			return (EBUSY);
704133819Stjr		}
705133819Stjr		vp->v_iflag |= VI_MOUNT;
706133819Stjr		VI_UNLOCK(vp);
707198554Sjhb
708198554Sjhb		/*
709133819Stjr		 * Allocate and initialize the filesystem.
710133819Stjr		 */
711133819Stjr		error = vfs_mount_alloc(vp, vfsp, fspath, td, &mp);
712133819Stjr		if (error) {
713133819Stjr			vput(vp);
714133819Stjr			return (error);
715133819Stjr		}
716133819Stjr		VOP_UNLOCK(vp, 0, td);
717133819Stjr
718133819Stjr		/* XXXMAC: pass to vfs_mount_alloc? */
719133819Stjr		mp->mnt_optnew = fsdata;
720133819Stjr	}
721165832Snetchild
722133819Stjr	/*
723133819Stjr	 * Set the mount level flags.
724133819Stjr	 */
725165832Snetchild	if (fsflags & MNT_RDONLY)
726165832Snetchild		mp->mnt_flag |= MNT_RDONLY;
727165832Snetchild	mp->mnt_flag &=~ MNT_UPDATEMASK;
728165832Snetchild	mp->mnt_flag |= fsflags & (MNT_UPDATEMASK | MNT_FORCE | MNT_ROOTFS);
729165832Snetchild	/*
730165832Snetchild	 * Mount the filesystem.
731198554Sjhb	 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
732165832Snetchild	 * get.  No freeing of cn_pnbuf.
733165832Snetchild	 */
734198554Sjhb        error = VFS_MOUNT(mp, td);
735133819Stjr	if (!error) {
736198554Sjhb		if (mp->mnt_opt != NULL)
737133819Stjr			vfs_freeopts(mp->mnt_opt);
738198554Sjhb		mp->mnt_opt = mp->mnt_optnew;
739133819Stjr		VFS_STATFS(mp, &mp->mnt_stat, td);
740208994Skan	}
741208994Skan	/*
742208994Skan	 * Prevent external consumers of mount options from reading
743208994Skan	 * mnt_optnew.
744208994Skan	*/
745133819Stjr	mp->mnt_optnew = NULL;
746208994Skan	if (mp->mnt_flag & MNT_UPDATE) {
747133819Stjr		mp->mnt_flag &=
748198554Sjhb		    ~(MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_SNAPSHOT);
749133819Stjr		if (error) {
750133819Stjr			mp->mnt_flag = flag;
751166727Sjkim			mp->mnt_kern_flag = kern_flag;
752166727Sjkim		}
753166727Sjkim		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
754166727Sjkim			if (mp->mnt_syncer == NULL)
755166727Sjkim				error = vfs_allocate_syncvnode(mp);
756198554Sjhb		} else {
757166727Sjkim			if (mp->mnt_syncer != NULL)
758166727Sjkim				vrele(mp->mnt_syncer);
759166727Sjkim			mp->mnt_syncer = NULL;
760167048Sjkim		}
761198554Sjhb		vfs_unbusy(mp, td);
762167048Sjkim		VI_LOCK(vp);
763166727Sjkim		vp->v_iflag &= ~VI_MOUNT;
764166727Sjkim		VI_UNLOCK(vp);
765166727Sjkim		vrele(vp);
766166727Sjkim		return (error);
767166727Sjkim	}
768166727Sjkim	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
769166727Sjkim	/*
770167048Sjkim	 * Put the new filesystem on the mount list after root.
771166727Sjkim	 */
772166727Sjkim	cache_purge(vp);
773166727Sjkim	if (!error) {
774166727Sjkim		struct vnode *newdp;
775166727Sjkim
776166727Sjkim		VI_LOCK(vp);
777166727Sjkim		vp->v_iflag &= ~VI_MOUNT;
778166727Sjkim		VI_UNLOCK(vp);
779166727Sjkim		vp->v_mountedhere = mp;
780166727Sjkim		mtx_lock(&mountlist_mtx);
781166727Sjkim		TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
782166727Sjkim		mtx_unlock(&mountlist_mtx);
783166727Sjkim		vfs_event_signal(NULL, VQ_MOUNT, 0);
784166727Sjkim		if (VFS_ROOT(mp, &newdp, td))
785166727Sjkim			panic("mount: lost mount");
786198554Sjhb		mountcheckdirs(vp, newdp);
787168063Sjkim		vput(newdp);
788168063Sjkim		VOP_UNLOCK(vp, 0, td);
789133819Stjr		if ((mp->mnt_flag & MNT_RDONLY) == 0)
790133819Stjr			error = vfs_allocate_syncvnode(mp);
791133819Stjr		vfs_unbusy(mp, td);
792168063Sjkim		if (error || (error = VFS_START(mp, 0, td)) != 0)
793133819Stjr			vrele(vp);
794168063Sjkim	} else {
795133819Stjr		VI_LOCK(vp);
796133819Stjr		vp->v_iflag &= ~VI_MOUNT;
797133819Stjr		VI_UNLOCK(vp);
798133819Stjr		vfs_mount_destroy(mp, td);
799133819Stjr		vput(vp);
800168848Sjkim	}
801166944Snetchild	return (error);
802133819Stjr}
803133819Stjr
804133819Stjr/*
805133819Stjr * ---------------------------------------------------------------------
806133819Stjr * Unmount a filesystem.
807133819Stjr *
808133819Stjr * Note: unmount takes a path to the vnode mounted on as argument,
809198554Sjhb * not special file (as before).
810168063Sjkim */
811168063Sjkim#ifndef _SYS_SYSPROTO_H_
812133819Stjrstruct unmount_args {
813133819Stjr	char	*path;
814133819Stjr	int	flags;
815133819Stjr};
816133819Stjr#endif
817133819Stjr/* ARGSUSED */
818133819Stjrint
819133819Stjrunmount(td, uap)
820133819Stjr	struct thread *td;
821133819Stjr	register struct unmount_args /* {
822133819Stjr		char *path;
823133819Stjr		int flags;
824133819Stjr	} */ *uap;
825133819Stjr{
826133819Stjr	struct mount *mp;
827166727Sjkim	char *pathbuf;
828133819Stjr	int error, id0, id1;
829133819Stjr
830133819Stjr	if (jailed(td->td_ucred))
831133819Stjr		return (EPERM);
832168063Sjkim	if (usermount == 0) {
833176193Sjkim		if ((error = suser(td)) != 0)
834176193Sjkim			return (error);
835176193Sjkim	}
836176193Sjkim
837176193Sjkim	pathbuf = malloc(MNAMELEN, M_TEMP, M_WAITOK);
838176193Sjkim	error = copyinstr(uap->path, pathbuf, MNAMELEN, NULL);
839133819Stjr	if (error) {
840198554Sjhb		free(pathbuf, M_TEMP);
841198554Sjhb		return (error);
842198554Sjhb	}
843176193Sjkim	if (uap->flags & MNT_BYFSID) {
844198554Sjhb		/* Decode the filesystem ID. */
845198554Sjhb		if (sscanf(pathbuf, "FSID:%d:%d", &id0, &id1) != 2) {
846176193Sjkim			free(pathbuf, M_TEMP);
847176193Sjkim			return (EINVAL);
848133819Stjr		}
849198554Sjhb
850198554Sjhb		mtx_lock(&mountlist_mtx);
851133819Stjr		TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) {
852198554Sjhb			if (mp->mnt_stat.f_fsid.val[0] == id0 &&
853133819Stjr			    mp->mnt_stat.f_fsid.val[1] == id1)
854133819Stjr				break;
855133819Stjr		}
856133819Stjr		mtx_unlock(&mountlist_mtx);
857133819Stjr	} else {
858133843Sobrien		mtx_lock(&mountlist_mtx);
859133819Stjr		TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) {
860133819Stjr			if (strcmp(mp->mnt_stat.f_mntonname, pathbuf) == 0)
861133819Stjr				break;
862133819Stjr		}
863133819Stjr		mtx_unlock(&mountlist_mtx);
864133819Stjr	}
865133819Stjr	free(pathbuf, M_TEMP);
866133819Stjr	if (mp == NULL) {
867133819Stjr		/*
868133819Stjr		 * Previously we returned ENOENT for a nonexistent path and
869133819Stjr		 * EINVAL for a non-mountpoint.  We cannot tell these apart
870133819Stjr		 * now, so in the !MNT_BYFSID case return the more likely
871168035Sjkim		 * EINVAL for compatibility.
872168035Sjkim		 */
873168035Sjkim		return ((uap->flags & MNT_BYFSID) ? ENOENT : EINVAL);
874168035Sjkim	}
875168035Sjkim
876168035Sjkim	/*
877168035Sjkim	 * Only privileged root, or (if MNT_USER is set) the user that did the
878168035Sjkim	 * original mount is permitted to unmount this filesystem.
879168035Sjkim	 */
880168035Sjkim	error = vfs_suser(mp, td);
881168035Sjkim	if (error)
882168035Sjkim		return (error);
883168035Sjkim
884166729Sjkim	/*
885166729Sjkim	 * Don't allow unmounting the root filesystem.
886166729Sjkim	 */
887166729Sjkim	if (mp->mnt_flag & MNT_ROOTFS)
888166729Sjkim		return (EINVAL);
889166729Sjkim	mtx_lock(&Giant);
890166729Sjkim	error = dounmount(mp, uap->flags, td);
891166729Sjkim	mtx_unlock(&Giant);
892166729Sjkim	return (error);
893166729Sjkim}
894166729Sjkim
895166729Sjkim/*
896166729Sjkim * Do the actual filesystem unmount.
897166729Sjkim */
898166729Sjkimint
899166729Sjkimdounmount(mp, flags, td)
900166729Sjkim	struct mount *mp;
901133819Stjr	int flags;
902133819Stjr	struct thread *td;
903133819Stjr{
904184849Sed	struct vnode *coveredvp, *fsrootvp;
905133819Stjr	int error;
906133819Stjr	int async_flag;
907133819Stjr
908133819Stjr	mtx_assert(&Giant, MA_OWNED);
909133819Stjr
910133819Stjr	mtx_lock(&mountlist_mtx);
911184849Sed	if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
912184849Sed		mtx_unlock(&mountlist_mtx);
913133819Stjr		return (EBUSY);
914133819Stjr	}
915184849Sed	mp->mnt_kern_flag |= MNTK_UNMOUNT;
916184849Sed	/* Allow filesystems to detect that a forced unmount is in progress. */
917133819Stjr	if (flags & MNT_FORCE)
918166731Sjkim		mp->mnt_kern_flag |= MNTK_UNMOUNTF;
919133819Stjr	error = lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK |
920133819Stjr	    ((flags & MNT_FORCE) ? 0 : LK_NOWAIT), &mountlist_mtx, td);
921133819Stjr	if (error) {
922133819Stjr		mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
923133819Stjr		if (mp->mnt_kern_flag & MNTK_MWAIT)
924133819Stjr			wakeup(mp);
925133819Stjr		return (error);
926133819Stjr	}
927133819Stjr	vn_start_write(NULL, &mp, V_WAIT);
928133819Stjr
929133819Stjr	if (mp->mnt_flag & MNT_EXPUBLIC)
930133819Stjr		vfs_setpublicfs(NULL, NULL, NULL);
931133819Stjr
932133819Stjr	vfs_msync(mp, MNT_WAIT);
933133819Stjr	async_flag = mp->mnt_flag & MNT_ASYNC;
934133819Stjr	mp->mnt_flag &= ~MNT_ASYNC;
935133819Stjr	cache_purgevfs(mp);	/* remove cache entries for this file sys */
936133819Stjr	if (mp->mnt_syncer != NULL)
937133819Stjr		vrele(mp->mnt_syncer);
938133819Stjr	/*
939133819Stjr	 * For forced unmounts, move process cdir/rdir refs on the fs root
940133819Stjr	 * vnode to the covered vnode.  For non-forced unmounts we want
941133819Stjr	 * such references to cause an EBUSY error.
942133819Stjr	 */
943133819Stjr	if ((flags & MNT_FORCE) && VFS_ROOT(mp, &fsrootvp, td) == 0) {
944133819Stjr		if (mp->mnt_vnodecovered != NULL)
945133819Stjr			mountcheckdirs(fsrootvp, mp->mnt_vnodecovered);
946133819Stjr		if (fsrootvp == rootvnode) {
947133819Stjr			vrele(rootvnode);
948133819Stjr			rootvnode = NULL;
949133819Stjr		}
950133819Stjr		vput(fsrootvp);
951133819Stjr	}
952133819Stjr	if (((mp->mnt_flag & MNT_RDONLY) ||
953133819Stjr	     (error = VFS_SYNC(mp, MNT_WAIT, td->td_ucred, td)) == 0) ||
954133819Stjr	    (flags & MNT_FORCE)) {
955133819Stjr		error = VFS_UNMOUNT(mp, flags, td);
956133819Stjr	}
957133819Stjr	vn_finished_write(mp);
958168063Sjkim	if (error) {
959133819Stjr		/* Undo cdir/rdir and rootvnode changes made above. */
960133819Stjr		if ((flags & MNT_FORCE) && VFS_ROOT(mp, &fsrootvp, td) == 0) {
961133819Stjr			if (mp->mnt_vnodecovered != NULL)
962133819Stjr				mountcheckdirs(mp->mnt_vnodecovered, fsrootvp);
963133819Stjr			if (rootvnode == NULL) {
964133819Stjr				rootvnode = fsrootvp;
965133819Stjr				vref(rootvnode);
966133819Stjr			}
967133819Stjr			vput(fsrootvp);
968133819Stjr		}
969133819Stjr		if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL)
970133819Stjr			(void) vfs_allocate_syncvnode(mp);
971133819Stjr		mtx_lock(&mountlist_mtx);
972133819Stjr		mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
973133819Stjr		mp->mnt_flag |= async_flag;
974133819Stjr		lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK,
975133819Stjr		    &mountlist_mtx, td);
976133819Stjr		if (mp->mnt_kern_flag & MNTK_MWAIT)
977133819Stjr			wakeup(mp);
978133819Stjr		return (error);
979133819Stjr	}
980133819Stjr	mtx_lock(&mountlist_mtx);
981133819Stjr	TAILQ_REMOVE(&mountlist, mp, mnt_list);
982133819Stjr	if ((coveredvp = mp->mnt_vnodecovered) != NULL)
983133819Stjr		coveredvp->v_mountedhere = NULL;
984133819Stjr	mtx_unlock(&mountlist_mtx);
985133819Stjr	vfs_event_signal(NULL, VQ_UNMOUNT, 0);
986133819Stjr	vfs_mount_destroy(mp, td);
987133819Stjr	if (coveredvp != NULL)
988133819Stjr		vrele(coveredvp);
989133819Stjr	return (0);
990133819Stjr}
991133819Stjr
992133819Stjr/*
993133819Stjr * ---------------------------------------------------------------------
994133819Stjr * Mounting of root filesystem
995133819Stjr *
996133819Stjr */
997133819Stjr
998133819Stjrstatic void
999133819Stjrset_rootvnode(struct thread *td)
1000133819Stjr{
1001133819Stjr	struct proc *p;
1002133819Stjr
1003133819Stjr	if (VFS_ROOT(TAILQ_FIRST(&mountlist), &rootvnode, td))
1004133819Stjr		panic("Cannot find root vnode");
1005133819Stjr
1006133819Stjr	p = td->td_proc;
1007133819Stjr	FILEDESC_LOCK(p->p_fd);
1008133819Stjr
1009133819Stjr	if (p->p_fd->fd_cdir != NULL)
1010133819Stjr		vrele(p->p_fd->fd_cdir);
1011133819Stjr	p->p_fd->fd_cdir = rootvnode;
1012133819Stjr	VREF(rootvnode);
1013133819Stjr
1014133819Stjr	if (p->p_fd->fd_rdir != NULL)
1015133819Stjr		vrele(p->p_fd->fd_rdir);
1016133819Stjr	p->p_fd->fd_rdir = rootvnode;
1017133819Stjr	VREF(rootvnode);
1018133819Stjr
1019133819Stjr	FILEDESC_UNLOCK(p->p_fd);
1020133819Stjr
1021133819Stjr	VOP_UNLOCK(rootvnode, 0, td);
1022133819Stjr}
1023133819Stjr
1024133819Stjr/*
1025133819Stjr * Mount /devfs as our root filesystem, but do not put it on the mountlist
1026133819Stjr * yet.  Create a /dev -> / symlink so that absolute pathnames will lookup.
1027133819Stjr */
1028133819Stjr
1029133819Stjrstatic struct mount *
1030133819Stjrdevfs_first(void)
1031133819Stjr{
1032133819Stjr	struct thread *td = curthread;
1033133819Stjr	struct vfsconf *vfsp;
1034133819Stjr	struct mount *mp = NULL;
1035133819Stjr	int error;
1036133819Stjr
1037133819Stjr	vfsp = vfs_byname("devfs");
1038133819Stjr	KASSERT(vfsp != NULL, ("Could not find devfs by name"));
1039133819Stjr	if (vfsp == NULL)
1040133819Stjr		return(NULL);
1041134269Sjhb
1042134269Sjhb	error = vfs_mount_alloc(NULLVP, vfsp, "/dev", td, &mp);
1043133819Stjr	KASSERT(error == 0, ("vfs_mount_alloc failed %d", error));
1044133819Stjr	if (error)
1045133819Stjr		return (NULL);
1046133819Stjr
1047133819Stjr	error = VFS_MOUNT(mp, curthread);
1048133819Stjr	KASSERT(error == 0, ("VFS_MOUNT(devfs) failed %d", error));
1049133819Stjr	if (error)
1050133819Stjr		return (NULL);
1051133819Stjr
1052133819Stjr	VFS_START(mp, 0, td);
1053133819Stjr
1054133819Stjr	mtx_lock(&mountlist_mtx);
1055133819Stjr	TAILQ_INSERT_HEAD(&mountlist, mp, mnt_list);
1056133819Stjr	mtx_unlock(&mountlist_mtx);
1057133819Stjr
1058133819Stjr	set_rootvnode(td);
1059133819Stjr
1060133819Stjr	error = kern_symlink(td, "/", "dev", UIO_SYSSPACE);
1061133819Stjr	if (error)
1062133819Stjr		printf("kern_symlink /dev -> / returns %d\n", error);
1063133819Stjr
1064133819Stjr	return (mp);
1065133819Stjr}
1066133819Stjr
1067133819Stjr/*
1068133819Stjr * Surgically move our devfs to be mounted on /dev.
1069133819Stjr */
1070133819Stjr
1071133819Stjrstatic void
1072133819Stjrdevfs_fixup(struct thread *td)
1073133819Stjr{
1074133819Stjr	struct nameidata nd;
1075133819Stjr	int error;
1076133819Stjr	struct vnode *vp, *dvp;
1077133819Stjr	struct mount *mp;
1078133819Stjr
1079133819Stjr	/* Remove our devfs mount from the mountlist and purge the cache */
1080133819Stjr	mtx_lock(&mountlist_mtx);
1081168844Sjkim	mp = TAILQ_FIRST(&mountlist);
1082133819Stjr	TAILQ_REMOVE(&mountlist, mp, mnt_list);
1083133819Stjr	mtx_unlock(&mountlist_mtx);
1084133819Stjr	cache_purgevfs(mp);
1085133819Stjr
1086168844Sjkim	VFS_ROOT(mp, &dvp, td);
1087133819Stjr	VI_LOCK(dvp);
1088133819Stjr	dvp->v_iflag &= ~VI_MOUNT;
1089133819Stjr	dvp->v_mountedhere = NULL;
1090133819Stjr	VI_UNLOCK(dvp);
1091133819Stjr
1092168843Sjkim	/* Set up the real rootvnode, and purge the cache */
1093168843Sjkim	TAILQ_FIRST(&mountlist)->mnt_vnodecovered = NULL;
1094168843Sjkim	set_rootvnode(td);
1095168843Sjkim	cache_purgevfs(rootvnode->v_mount);
1096168843Sjkim
1097168843Sjkim	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, "/dev", td);
1098168843Sjkim	error = namei(&nd);
1099168843Sjkim	if (error) {
1100168843Sjkim		printf("Lookup /dev -> %d\n", error);
1101168843Sjkim		return;
1102168843Sjkim	}
1103168843Sjkim	NDFREE(&nd, NDF_ONLY_PNBUF);
1104168843Sjkim	vp = nd.ni_vp;
1105168843Sjkim	if (vp->v_type != VDIR) {
1106168843Sjkim		vput(vp);
1107168843Sjkim	}
1108168843Sjkim	error = vinvalbuf(vp, V_SAVE, td->td_ucred, td, 0, 0);
1109168843Sjkim	if (error) {
1110168843Sjkim		vput(vp);
1111168843Sjkim	}
1112168843Sjkim	cache_purge(vp);
1113168843Sjkim	mp->mnt_vnodecovered = vp;
1114168843Sjkim	vp->v_mountedhere = mp;
1115168843Sjkim	mtx_lock(&mountlist_mtx);
1116168843Sjkim	TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
1117168843Sjkim	mtx_unlock(&mountlist_mtx);
1118168843Sjkim	VOP_UNLOCK(vp, 0, td);
1119133819Stjr	vfs_unbusy(mp, td);
1120133819Stjr	vput(dvp);
1121136152Sjhb
1122136152Sjhb	/* Unlink the no longer needed /dev/dev -> / symlink */
1123133819Stjr	kern_unlink(td, "/dev/dev", UIO_SYSSPACE);
1124133819Stjr}
1125136152Sjhb
1126133819Stjr/*
1127133819Stjr * Find and mount the root filesystem
1128136152Sjhb */
1129133819Stjrvoid
1130133819Stjrvfs_mountroot(void)
1131133819Stjr{
1132133819Stjr	char *cp;
1133133819Stjr	int error, i, asked = 0;
1134133819Stjr	struct mount *mp;
1135133819Stjr
1136133819Stjr	/*
1137133819Stjr	 * Wait for GEOM to settle down
1138133819Stjr	 */
1139133819Stjr	DROP_GIANT();
1140133819Stjr	g_waitidle();
1141133819Stjr	PICKUP_GIANT();
1142133819Stjr
1143133819Stjr	mp = devfs_first();
1144133819Stjr
1145133819Stjr	/*
1146133819Stjr	 * We are booted with instructions to prompt for the root filesystem.
1147136152Sjhb	 */
1148133819Stjr	if (boothowto & RB_ASKNAME) {
1149133819Stjr		if (!vfs_mountroot_ask())
1150133819Stjr			return;
1151133819Stjr		asked = 1;
1152133819Stjr	}
1153133819Stjr
1154133819Stjr	/*
1155133819Stjr	 * The root filesystem information is compiled in, and we are
1156133819Stjr	 * booted with instructions to use it.
1157133819Stjr	 */
1158133819Stjr	if (ctrootdevname != NULL && (boothowto & RB_DFLTROOT)) {
1159133819Stjr		if (!vfs_mountroot_try(ctrootdevname))
1160144449Sjhb			return;
1161133819Stjr		ctrootdevname = NULL;
1162133819Stjr	}
1163133819Stjr
1164133819Stjr	/*
1165133819Stjr	 * We've been given the generic "use CDROM as root" flag.  This is
1166133819Stjr	 * necessary because one media may be used in many different
1167133819Stjr	 * devices, so we need to search for them.
1168133819Stjr	 */
1169168035Sjkim	if (boothowto & RB_CDROM) {
1170168035Sjkim		for (i = 0; cdrom_rootdevnames[i] != NULL; i++) {
1171133819Stjr			if (!vfs_mountroot_try(cdrom_rootdevnames[i]))
1172168035Sjkim				return;
1173168035Sjkim		}
1174216634Sjkim	}
1175168035Sjkim
1176168035Sjkim	/*
1177133819Stjr	 * Try to use the value read by the loader from /etc/fstab, or
1178168035Sjkim	 * supplied via some other means.  This is the preferred
1179168035Sjkim	 * mechanism.
1180168035Sjkim	 */
1181168035Sjkim	cp = getenv("vfs.root.mountfrom");
1182168035Sjkim	if (cp != NULL) {
1183168035Sjkim		error = vfs_mountroot_try(cp);
1184168848Sjkim		freeenv(cp);
1185168035Sjkim		if (!error)
1186168035Sjkim			return;
1187168035Sjkim	}
1188168035Sjkim
1189168035Sjkim	/*
1190168035Sjkim	 * Try values that may have been computed by code during boot
1191168035Sjkim	 */
1192168035Sjkim	if (!vfs_mountroot_try(rootdevnames[0]))
1193168035Sjkim		return;
1194168035Sjkim	if (!vfs_mountroot_try(rootdevnames[1]))
1195168035Sjkim		return;
1196168035Sjkim
1197168035Sjkim	/*
1198168035Sjkim	 * If we (still) have a compiled-in default, try it.
1199168035Sjkim	 */
1200168035Sjkim	if (ctrootdevname != NULL)
1201168035Sjkim		if (!vfs_mountroot_try(ctrootdevname))
1202168035Sjkim			return;
1203168848Sjkim	/*
1204168848Sjkim	 * Everything so far has failed, prompt on the console if we haven't
1205168035Sjkim	 * already tried that.
1206168848Sjkim	 */
1207168035Sjkim	if (!asked)
1208168035Sjkim		if (!vfs_mountroot_ask())
1209168035Sjkim			return;
1210168035Sjkim
1211168035Sjkim	panic("Root mount failed, startup aborted.");
1212168035Sjkim}
1213168035Sjkim
1214168035Sjkim/*
1215168035Sjkim * Mount (mountfrom) as the root filesystem.
1216168035Sjkim */
1217168035Sjkimstatic int
1218168035Sjkimvfs_mountroot_try(const char *mountfrom)
1219168035Sjkim{
1220168035Sjkim        struct mount	*mp;
1221168035Sjkim	char		*vfsname, *path;
1222168035Sjkim	int		error;
1223168035Sjkim	char		patt[32];
1224168035Sjkim	int		s;
1225168035Sjkim
1226168035Sjkim	vfsname = NULL;
1227168848Sjkim	path    = NULL;
1228168848Sjkim	mp      = NULL;
1229168848Sjkim	error   = EINVAL;
1230168035Sjkim
1231168035Sjkim	if (mountfrom == NULL)
1232168035Sjkim		return (error);		/* don't complain */
1233168035Sjkim
1234168035Sjkim	s = splcam();			/* Overkill, but annoying without it */
1235168035Sjkim	printf("Trying to mount root from %s\n", mountfrom);
1236168035Sjkim	splx(s);
1237168035Sjkim
1238168035Sjkim	/* parse vfs name and path */
1239168035Sjkim	vfsname = malloc(MFSNAMELEN, M_MOUNT, M_WAITOK);
1240168035Sjkim	path = malloc(MNAMELEN, M_MOUNT, M_WAITOK);
1241168035Sjkim	vfsname[0] = path[0] = 0;
1242168035Sjkim	sprintf(patt, "%%%d[a-z0-9]:%%%ds", MFSNAMELEN, MNAMELEN);
1243168035Sjkim	if (sscanf(mountfrom, patt, vfsname, path) < 1)
1244168035Sjkim		return (error);
1245168035Sjkim
1246168035Sjkim	if (path[0] == '\0')
1247168035Sjkim		strcpy(path, ROOTNAME);
1248168035Sjkim
1249168035Sjkim	error = kernel_vmount(
1250168035Sjkim	    MNT_RDONLY | MNT_ROOTFS,
1251168035Sjkim	    "fstype", vfsname,
1252168035Sjkim	    "fspath", "/",
1253168035Sjkim	    "from", path,
1254168035Sjkim	    NULL);
1255168035Sjkim	if (error == 0) {
1256168035Sjkim		mp = TAILQ_FIRST(&mountlist);
1257168035Sjkim
1258168035Sjkim		/* sanity check system clock against root fs timestamp */
1259168035Sjkim		inittodr(mp->mnt_time);
1260168035Sjkim		vfs_unbusy(mp, curthread);
1261168035Sjkim		error = VFS_START(mp, 0, curthread);
1262168035Sjkim
1263216634Sjkim		devfs_fixup(curthread);
1264216634Sjkim	}
1265216634Sjkim	return (error);
1266190620Skib}
1267168035Sjkim
1268168035Sjkim/*
1269133819Stjr * ---------------------------------------------------------------------
1270 * Interactive root filesystem selection code.
1271 */
1272
1273static int
1274vfs_mountroot_ask(void)
1275{
1276	char name[128];
1277
1278	for(;;) {
1279		printf("\nManual root filesystem specification:\n");
1280		printf("  <fstype>:<device>  Mount <device> using filesystem <fstype>\n");
1281#if defined(__i386__) || defined(__ia64__)
1282		printf("                       eg. ufs:da0s1a\n");
1283#else
1284		printf("                       eg. ufs:/dev/da0a\n");
1285#endif
1286		printf("  ?                  List valid disk boot devices\n");
1287		printf("  <empty line>       Abort manual input\n");
1288		printf("\nmountroot> ");
1289		gets(name);
1290		if (name[0] == '\0')
1291			return (1);
1292		if (name[0] == '?') {
1293			printf("\nList of GEOM managed disk devices:\n  ");
1294			g_dev_print();
1295			continue;
1296		}
1297		if (!vfs_mountroot_try(name))
1298			return (0);
1299	}
1300}
1301
1302/*
1303 * Local helper function for vfs_mountroot_ask.
1304 */
1305static void
1306gets(char *cp)
1307{
1308	char *lp;
1309	int c;
1310
1311	lp = cp;
1312	for (;;) {
1313		printf("%c", c = cngetc() & 0177);
1314		switch (c) {
1315		case -1:
1316		case '\n':
1317		case '\r':
1318			*lp++ = '\0';
1319			return;
1320		case '\b':
1321		case '\177':
1322			if (lp > cp) {
1323				printf(" \b");
1324				lp--;
1325			}
1326			continue;
1327		case '#':
1328			lp--;
1329			if (lp < cp)
1330				lp = cp;
1331			continue;
1332		case '@':
1333		case 'u' & 037:
1334			lp = cp;
1335			printf("%c", '\n');
1336			continue;
1337		default:
1338			*lp++ = c;
1339		}
1340	}
1341}
1342
1343/*
1344 * ---------------------------------------------------------------------
1345 * Functions for querying mount options/arguments from filesystems.
1346 */
1347
1348/*
1349 * Check that no unknown options are given
1350 */
1351int
1352vfs_filteropt(struct vfsoptlist *opts, const char **legal)
1353{
1354	struct vfsopt *opt;
1355	const char **t, *p;
1356
1357
1358	TAILQ_FOREACH(opt, opts, link) {
1359		p = opt->name;
1360		if (p[0] == 'n' && p[1] == 'o')
1361			p += 2;
1362		for(t = global_opts; *t != NULL; t++)
1363			if (!strcmp(*t, p))
1364				break;
1365		if (*t != NULL)
1366			continue;
1367		for(t = legal; *t != NULL; t++)
1368			if (!strcmp(*t, p))
1369				break;
1370		if (*t != NULL)
1371			continue;
1372		printf("mount option <%s> is unknown\n", p);
1373		return (EINVAL);
1374	}
1375	return (0);
1376}
1377
1378/*
1379 * Get a mount option by its name.
1380 *
1381 * Return 0 if the option was found, ENOENT otherwise.
1382 * If len is non-NULL it will be filled with the length
1383 * of the option. If buf is non-NULL, it will be filled
1384 * with the address of the option.
1385 */
1386int
1387vfs_getopt(opts, name, buf, len)
1388	struct vfsoptlist *opts;
1389	const char *name;
1390	void **buf;
1391	int *len;
1392{
1393	struct vfsopt *opt;
1394
1395	KASSERT(opts != NULL, ("vfs_getopt: caller passed 'opts' as NULL"));
1396
1397	TAILQ_FOREACH(opt, opts, link) {
1398		if (strcmp(name, opt->name) == 0) {
1399			if (len != NULL)
1400				*len = opt->len;
1401			if (buf != NULL)
1402				*buf = opt->value;
1403			return (0);
1404		}
1405	}
1406	return (ENOENT);
1407}
1408
1409char *
1410vfs_getopts(struct vfsoptlist *opts, const char *name, int *error)
1411{
1412	struct vfsopt *opt;
1413
1414	*error = 0;
1415	TAILQ_FOREACH(opt, opts, link) {
1416		if (strcmp(name, opt->name) != 0)
1417			continue;
1418		if (((char *)opt->value)[opt->len - 1] != '\0') {
1419			*error = EINVAL;
1420			return (NULL);
1421		}
1422		return (opt->value);
1423	}
1424	return (NULL);
1425}
1426
1427int
1428vfs_flagopt(struct vfsoptlist *opts, const char *name, u_int *w, u_int val)
1429{
1430	struct vfsopt *opt;
1431
1432	TAILQ_FOREACH(opt, opts, link) {
1433		if (strcmp(name, opt->name) == 0) {
1434			if (w != NULL)
1435				*w |= val;
1436			return (1);
1437		}
1438	}
1439	if (w != NULL)
1440		*w &= ~val;
1441	return (0);
1442}
1443
1444int
1445vfs_scanopt(struct vfsoptlist *opts, const char *name, const char *fmt, ...)
1446{
1447	va_list ap;
1448	struct vfsopt *opt;
1449	int ret;
1450
1451	KASSERT(opts != NULL, ("vfs_getopt: caller passed 'opts' as NULL"));
1452
1453	TAILQ_FOREACH(opt, opts, link) {
1454		if (strcmp(name, opt->name) != 0)
1455			continue;
1456		if (((char *)opt->value)[opt->len - 1] != '\0')
1457			return (0);
1458		va_start(ap, fmt);
1459		ret = vsscanf(opt->value, fmt, ap);
1460		va_end(ap);
1461		return (ret);
1462	}
1463	return (0);
1464}
1465
1466/*
1467 * Find and copy a mount option.
1468 *
1469 * The size of the buffer has to be specified
1470 * in len, if it is not the same length as the
1471 * mount option, EINVAL is returned.
1472 * Returns ENOENT if the option is not found.
1473 */
1474int
1475vfs_copyopt(opts, name, dest, len)
1476	struct vfsoptlist *opts;
1477	const char *name;
1478	void *dest;
1479	int len;
1480{
1481	struct vfsopt *opt;
1482
1483	KASSERT(opts != NULL, ("vfs_copyopt: caller passed 'opts' as NULL"));
1484
1485	TAILQ_FOREACH(opt, opts, link) {
1486		if (strcmp(name, opt->name) == 0) {
1487			if (len != opt->len)
1488				return (EINVAL);
1489			bcopy(opt->value, dest, opt->len);
1490			return (0);
1491		}
1492	}
1493	return (ENOENT);
1494}
1495
1496/*
1497 * This is a helper function for filesystems to traverse their
1498 * vnodes.  See MNT_VNODE_FOREACH() in sys/mount.h
1499 */
1500
1501struct vnode *
1502__mnt_vnode_next(struct vnode **nvp, struct mount *mp)
1503{
1504	struct vnode *vp;
1505
1506	mtx_assert(&mp->mnt_mtx, MA_OWNED);
1507
1508	vp = *nvp;
1509	/* Check if we are done */
1510	if (vp == NULL)
1511		return (NULL);
1512	/* If our next vnode is no longer ours, start over */
1513	if (vp->v_mount != mp)
1514		vp = TAILQ_FIRST(&mp->mnt_nvnodelist);
1515	/* Save pointer to next vnode in list */
1516	if (vp != NULL)
1517		*nvp = TAILQ_NEXT(vp, v_nmntvnodes);
1518	else
1519		*nvp = NULL;
1520	return (vp);
1521}
1522
1523int
1524__vfs_statfs(struct mount *mp, struct statfs *sbp, struct thread *td)
1525{
1526	int error;
1527
1528	error = mp->mnt_op->vfs_statfs(mp, &mp->mnt_stat, td);
1529	if (sbp != &mp->mnt_stat)
1530		*sbp = mp->mnt_stat;
1531	return (error);
1532}
1533
1534void
1535vfs_mountedfrom(struct mount *mp, const char *from)
1536{
1537
1538	bzero(mp->mnt_stat.f_mntfromname, sizeof mp->mnt_stat.f_mntfromname);
1539	strlcpy(mp->mnt_stat.f_mntfromname, from,
1540	    sizeof mp->mnt_stat.f_mntfromname);
1541}
1542
1543/*
1544 * ---------------------------------------------------------------------
1545 * This is the api for building mount args and mounting filesystems from
1546 * inside the kernel.
1547 *
1548 * The API works by accumulation of individual args.  First error is
1549 * latched.
1550 *
1551 * XXX: should be documented in new manpage kernel_mount(9)
1552 */
1553
1554/* A memory allocation which must be freed when we are done */
1555struct mntaarg {
1556	SLIST_ENTRY(mntaarg)	next;
1557};
1558
1559/* The header for the mount arguments */
1560struct mntarg {
1561	struct iovec *v;
1562	int len;
1563	int error;
1564	SLIST_HEAD(, mntaarg)	list;
1565};
1566
1567/*
1568 * Add a boolean argument.
1569 *
1570 * flag is the boolean value.
1571 * name must start with "no".
1572 */
1573struct mntarg *
1574mount_argb(struct mntarg *ma, int flag, const char *name)
1575{
1576
1577	KASSERT(name[0] == 'n' && name[1] == 'o',
1578	    ("mount_argb(...,%s): name must start with 'no'", name));
1579
1580	return (mount_arg(ma, name + (flag ? 2 : 0), NULL, 0));
1581}
1582
1583/*
1584 * Add an argument printf style
1585 */
1586struct mntarg *
1587mount_argf(struct mntarg *ma, const char *name, const char *fmt, ...)
1588{
1589	va_list ap;
1590	struct mntaarg *maa;
1591	struct sbuf *sb;
1592	int len;
1593
1594	if (ma == NULL) {
1595		ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO);
1596		SLIST_INIT(&ma->list);
1597	}
1598	if (ma->error)
1599		return (ma);
1600
1601	ma->v = realloc(ma->v, sizeof *ma->v * (ma->len + 2),
1602	    M_MOUNT, M_WAITOK);
1603	ma->v[ma->len].iov_base = (void *)(uintptr_t)name;
1604	ma->v[ma->len].iov_len = strlen(name) + 1;
1605	ma->len++;
1606
1607	sb = sbuf_new(NULL, NULL, 0, SBUF_AUTOEXTEND);
1608	va_start(ap, fmt);
1609	sbuf_vprintf(sb, fmt, ap);
1610	va_end(ap);
1611	sbuf_finish(sb);
1612	len = sbuf_len(sb) + 1;
1613	maa = malloc(sizeof *maa + len, M_MOUNT, M_WAITOK | M_ZERO);
1614	SLIST_INSERT_HEAD(&ma->list, maa, next);
1615	bcopy(sbuf_data(sb), maa + 1, len);
1616	sbuf_delete(sb);
1617
1618	ma->v[ma->len].iov_base = maa + 1;
1619	ma->v[ma->len].iov_len = len;
1620	ma->len++;
1621
1622	return (ma);
1623}
1624
1625/*
1626 * Add an argument which is a userland string.
1627 */
1628struct mntarg *
1629mount_argsu(struct mntarg *ma, const char *name, const void *val, int len)
1630{
1631	struct mntaarg *maa;
1632	char *tbuf;
1633
1634	if (val == NULL)
1635		return (ma);
1636	if (ma == NULL) {
1637		ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO);
1638		SLIST_INIT(&ma->list);
1639	}
1640	if (ma->error)
1641		return (ma);
1642	maa = malloc(sizeof *maa + len, M_MOUNT, M_WAITOK | M_ZERO);
1643	SLIST_INSERT_HEAD(&ma->list, maa, next);
1644	tbuf = (void *)(maa + 1);
1645	ma->error = copyinstr(val, tbuf, len, NULL);
1646	return (mount_arg(ma, name, tbuf, -1));
1647}
1648
1649/*
1650 * Plain argument.
1651 *
1652 * If length is -1, use printf.
1653 */
1654struct mntarg *
1655mount_arg(struct mntarg *ma, const char *name, const void *val, int len)
1656{
1657
1658	if (ma == NULL) {
1659		ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO);
1660		SLIST_INIT(&ma->list);
1661	}
1662	if (ma->error)
1663		return (ma);
1664
1665	ma->v = realloc(ma->v, sizeof *ma->v * (ma->len + 2),
1666	    M_MOUNT, M_WAITOK);
1667	ma->v[ma->len].iov_base = (void *)(uintptr_t)name;
1668	ma->v[ma->len].iov_len = strlen(name) + 1;
1669	ma->len++;
1670
1671	ma->v[ma->len].iov_base = (void *)(uintptr_t)val;
1672	if (len < 0)
1673		ma->v[ma->len].iov_len = strlen(val) + 1;
1674	else
1675		ma->v[ma->len].iov_len = len;
1676	ma->len++;
1677	return (ma);
1678}
1679
1680/*
1681 * Free a mntarg structure
1682 */
1683void
1684free_mntarg(struct mntarg *ma)
1685{
1686	struct mntaarg *maa;
1687
1688	while (!SLIST_EMPTY(&ma->list)) {
1689		maa = SLIST_FIRST(&ma->list);
1690		SLIST_REMOVE_HEAD(&ma->list, next);
1691		free(maa, M_MOUNT);
1692	}
1693	free(ma->v, M_MOUNT);
1694	free(ma, M_MOUNT);
1695}
1696
1697/*
1698 * Mount a filesystem
1699 */
1700int
1701kernel_mount(struct mntarg *ma, int flags)
1702{
1703	struct uio auio;
1704	int error;
1705
1706	KASSERT(ma != NULL, ("kernel_mount NULL ma"));
1707	KASSERT(ma->v != NULL, ("kernel_mount NULL ma->v"));
1708	KASSERT(!(ma->len & 1), ("kernel_mount odd ma->len (%d)", ma->len));
1709
1710	auio.uio_iov = ma->v;
1711	auio.uio_iovcnt = ma->len;
1712	auio.uio_segflg = UIO_SYSSPACE;
1713
1714	error = ma->error;
1715	if (!error)
1716		error = vfs_donmount(curthread, flags, &auio);
1717	free_mntarg(ma);
1718	return (error);
1719}
1720
1721/*
1722 * A printflike function to mount a filesystem.
1723 */
1724int
1725kernel_vmount(int flags, ...)
1726{
1727	struct mntarg *ma = NULL;
1728	va_list ap;
1729	const char *cp;
1730	const void *vp;
1731	int error;
1732
1733	va_start(ap, flags);
1734	for (;;) {
1735		cp = va_arg(ap, const char *);
1736		if (cp == NULL)
1737			break;
1738		vp = va_arg(ap, const void *);
1739		ma = mount_arg(ma, cp, vp, -1);
1740	}
1741	va_end(ap);
1742
1743	error = kernel_mount(ma, flags);
1744	return (error);
1745}
1746