vfs_mount.c revision 100363
1/*
2 * Copyright (c) 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the University of
21 *	California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * Copyright (c) 1999 Michael Smith
39 * All rights reserved.
40 * Copyright (c) 1999 Poul-Henning Kamp
41 * All rights reserved.
42 *
43 * Redistribution and use in source and binary forms, with or without
44 * modification, are permitted provided that the following conditions
45 * are met:
46 * 1. Redistributions of source code must retain the above copyright
47 *    notice, this list of conditions and the following disclaimer.
48 * 2. Redistributions in binary form must reproduce the above copyright
49 *    notice, this list of conditions and the following disclaimer in the
50 *    documentation and/or other materials provided with the distribution.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 * $FreeBSD: head/sys/kern/vfs_mount.c 100363 2002-07-19 16:05:31Z mux $
65 */
66
67#include <sys/param.h>
68#include <sys/conf.h>
69#include <sys/cons.h>
70#include <sys/kernel.h>
71#include <sys/linker.h>
72#include <sys/malloc.h>
73#include <sys/mount.h>
74#include <sys/mutex.h>
75#include <sys/namei.h>
76#include <sys/proc.h>
77#include <sys/reboot.h>
78#include <sys/sysproto.h>
79#include <sys/sx.h>
80#include <sys/sysctl.h>
81#include <sys/sysent.h>
82#include <sys/systm.h>
83#include <sys/vnode.h>
84
85#include <machine/stdarg.h>
86
87#include "opt_rootdevname.h"
88#include "opt_ddb.h"
89
90#ifdef DDB
91#include <ddb/ddb.h>
92#endif
93
94#define ROOTNAME	"root_device"
95
96static void	checkdirs(struct vnode *olddp, struct vnode *newdp);
97static int	vfs_nmount(struct thread *td, int, struct uio *);
98static int	vfs_mountroot_try(char *mountfrom);
99static int	vfs_mountroot_ask(void);
100static void	gets(char *cp);
101
102static int	usermount = 0;	/* if 1, non-root can mount fs. */
103SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, "");
104
105MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount structure");
106
107/* List of mounted filesystems. */
108struct mntlist mountlist = TAILQ_HEAD_INITIALIZER(mountlist);
109
110/* For any iteration/modification of mountlist */
111struct mtx mountlist_mtx;
112
113/* For any iteration/modification of mnt_vnodelist */
114struct mtx mntvnode_mtx;
115
116/*
117 * The vnode of the system's root (/ in the filesystem, without chroot
118 * active.)
119 */
120struct vnode	*rootvnode;
121
122/*
123 * The root filesystem is detailed in the kernel environment variable
124 * vfs.root.mountfrom, which is expected to be in the general format
125 *
126 * <vfsname>:[<path>]
127 * vfsname   := the name of a VFS known to the kernel and capable
128 *              of being mounted as root
129 * path      := disk device name or other data used by the filesystem
130 *              to locate its physical store
131 */
132
133/*
134 * The root specifiers we will try if RB_CDROM is specified.
135 */
136static char *cdrom_rootdevnames[] = {
137	"cd9660:cd0a",
138	"cd9660:acd0a",
139	"cd9660:wcd0a",
140	NULL
141};
142
143/* legacy find-root code */
144char		*rootdevnames[2] = {NULL, NULL};
145static int	setrootbyname(char *name);
146dev_t		rootdev = NODEV;
147
148/* Remove one mount option. */
149static void
150vfs_freeopt(struct vfsoptlist *opts, struct vfsopt *opt)
151{
152
153	TAILQ_REMOVE(opts, opt, link);
154	free(opt->name, M_MOUNT);
155	if (opt->value != NULL)
156		free(opt->value, M_MOUNT);
157#ifdef INVARIANTS
158	else if (opt->len != 0)
159		panic("%s: mount option with NULL value but length != 0",
160		    __func__);
161#endif
162	free(opt, M_MOUNT);
163}
164
165/* Release all resources related to the mount options. */
166static void
167vfs_freeopts(struct vfsoptlist *opts)
168{
169	struct vfsopt *opt;
170
171	while (!TAILQ_EMPTY(opts)) {
172		opt = TAILQ_FIRST(opts);
173		vfs_freeopt(opts, opt);
174	}
175	free(opts, M_MOUNT);
176}
177
178/*
179 * If a mount option is specified several times,
180 * (with or without the "no" prefix) only keep
181 * the last occurence of it.
182 */
183static void
184vfs_sanitizeopts(struct vfsoptlist *opts)
185{
186	struct vfsopt *opt, *opt2, *tmp;
187	int noopt;
188
189	TAILQ_FOREACH_REVERSE(opt, opts, vfsoptlist, link) {
190		if (strncmp(opt->name, "no", 2) == 0)
191			noopt = 1;
192		else
193			noopt = 0;
194		opt2 = TAILQ_PREV(opt, vfsoptlist, link);
195		while (opt2 != NULL) {
196			if (strcmp(opt2->name, opt->name) == 0 ||
197			    (noopt && strcmp(opt->name + 2, opt2->name) == 0) ||
198			    (!noopt && strncmp(opt2->name, "no", 2) == 0 &&
199			    strcmp(opt2->name + 2, opt->name) == 0)) {
200				tmp = TAILQ_PREV(opt2, vfsoptlist, link);
201				vfs_freeopt(opts, opt2);
202				opt2 = tmp;
203			} else {
204				opt2 = TAILQ_PREV(opt2, vfsoptlist, link);
205			}
206		}
207	}
208}
209
210/*
211 * Build a linked list of mount options from a struct uio.
212 */
213static int
214vfs_buildopts(struct uio *auio, struct vfsoptlist **options)
215{
216	struct vfsoptlist *opts;
217	struct vfsopt *opt;
218	unsigned int i, iovcnt;
219	int error, namelen, optlen;
220
221	iovcnt = auio->uio_iovcnt;
222	opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK);
223	TAILQ_INIT(opts);
224	for (i = 0; i < iovcnt; i += 2) {
225		opt = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
226		namelen = auio->uio_iov[i].iov_len;
227		optlen = auio->uio_iov[i + 1].iov_len;
228		opt->name = malloc(namelen, M_MOUNT, M_WAITOK);
229		opt->len = optlen;
230		if (optlen == 0) {
231			opt->value = NULL;
232		} else {
233			opt->value = malloc(optlen, M_MOUNT, M_WAITOK);
234			if (auio->uio_segflg == UIO_SYSSPACE) {
235				bcopy(auio->uio_iov[i].iov_base, opt->name,
236				    namelen);
237				bcopy(auio->uio_iov[i + 1].iov_base, opt->value,
238				    optlen);
239			} else {
240				error = copyin(auio->uio_iov[i].iov_base,
241				    opt->name, namelen);
242				if (error)
243					goto bad;
244				error = copyin(auio->uio_iov[i + 1].iov_base,
245				    opt->value, optlen);
246				if (error)
247					goto bad;
248			}
249		}
250		TAILQ_INSERT_TAIL(opts, opt, link);
251	}
252	vfs_sanitizeopts(opts);
253	*options = opts;
254	return (0);
255bad:
256	vfs_freeopts(opts);
257	return (error);
258}
259
260/*
261 * Merge the old mount options with the new ones passed
262 * in the MNT_UPDATE case.
263 */
264static void
265vfs_mergeopts(struct vfsoptlist *toopts, struct vfsoptlist *opts)
266{
267	struct vfsopt *opt, *opt2, *new;
268
269	TAILQ_FOREACH(opt, opts, link) {
270		/*
271		 * Check that this option hasn't been redefined
272		 * nor cancelled with a "no" mount option.
273		 */
274		opt2 = TAILQ_FIRST(toopts);
275		while (opt2 != NULL) {
276			if (strcmp(opt2->name, opt->name) == 0)
277				goto next;
278			if (strncmp(opt2->name, "no", 2) == 0 &&
279			    strcmp(opt2->name + 2, opt->name) == 0) {
280				vfs_freeopt(toopts, opt2);
281				goto next;
282			}
283			opt2 = TAILQ_NEXT(opt2, link);
284		}
285		/* We want this option, duplicate it. */
286		new = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
287		new->name = malloc(strlen(opt->name) + 1, M_MOUNT, M_WAITOK);
288		strcpy(new->name, opt->name);
289		if (opt->len != 0) {
290			new->value = malloc(opt->len, M_MOUNT, M_WAITOK);
291			bcopy(opt->value, new->value, opt->len);
292		} else {
293			new->value = NULL;
294		}
295		new->len = opt->len;
296		TAILQ_INSERT_TAIL(toopts, new, link);
297next:
298		continue;
299	}
300}
301
302/*
303 * New mount API.
304 */
305int
306nmount(td, uap)
307	struct thread *td;
308	struct nmount_args /* {
309		syscallarg(struct iovec *) iovp;
310		syscallarg(unsigned int) iovcnt;
311		syscallarg(int) flags;
312	} */ *uap;
313{
314	struct uio auio;
315	struct iovec *iov, *needfree;
316	struct iovec aiov[UIO_SMALLIOV];
317	unsigned int i;
318	int error;
319	u_int iovlen, iovcnt;
320
321	iovcnt = SCARG(uap, iovcnt);
322	iovlen = iovcnt * sizeof (struct iovec);
323	/*
324	 * Check that we have an even number of iovec's
325	 * and that we have at least two options.
326	 */
327	if ((iovcnt & 1) || (iovcnt < 4) || (iovcnt > UIO_MAXIOV))
328		return (EINVAL);
329
330	if (iovcnt > UIO_SMALLIOV) {
331		MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
332		needfree = iov;
333	} else {
334		iov = aiov;
335		needfree = NULL;
336	}
337	auio.uio_iov = iov;
338	auio.uio_iovcnt = iovcnt;
339	auio.uio_segflg = UIO_USERSPACE;
340	if ((error = copyin(uap->iovp, iov, iovlen)))
341		goto finish;
342
343	for (i = 0; i < iovcnt; i++) {
344		if (iov->iov_len > MMAXOPTIONLEN) {
345			error = EINVAL;
346			goto finish;
347		}
348		iov++;
349	}
350	error = vfs_nmount(td, SCARG(uap, flags), &auio);
351finish:
352	if (needfree != NULL)
353		free(needfree, M_TEMP);
354	return (error);
355}
356
357int
358kernel_mount(iovp, iovcnt, flags)
359	struct iovec *iovp;
360	unsigned int iovcnt;
361	int flags;
362{
363	struct uio auio;
364	int error;
365
366	/*
367	 * Check that we have an even number of iovec's
368	 * and that we have at least two options.
369	 */
370	if ((iovcnt & 1) || (iovcnt < 4))
371		return (EINVAL);
372
373	auio.uio_iov = iovp;
374	auio.uio_iovcnt = iovcnt;
375	auio.uio_segflg = UIO_SYSSPACE;
376
377	error = vfs_nmount(curthread, flags, &auio);
378	return (error);
379}
380
381int
382kernel_vmount(int flags, ...)
383{
384	struct iovec *iovp;
385	struct uio auio;
386	va_list ap;
387	unsigned int iovcnt, iovlen, len;
388	const char *cp;
389	char *buf, *pos;
390	size_t n;
391	int error, i;
392
393	len = 0;
394	va_start(ap, flags);
395	for (iovcnt = 0; (cp = va_arg(ap, const char *)) != NULL; iovcnt++)
396		len += strlen(cp) + 1;
397	va_end(ap);
398
399	if (iovcnt < 4 || iovcnt & 1)
400		return (EINVAL);
401
402	iovlen = iovcnt * sizeof (struct iovec);
403	MALLOC(iovp, struct iovec *, iovlen, M_MOUNT, M_WAITOK);
404	MALLOC(buf, char *, len, M_MOUNT, M_WAITOK);
405	pos = buf;
406	va_start(ap, flags);
407	for (i = 0; i < iovcnt; i++) {
408		cp = va_arg(ap, const char *);
409		copystr(cp, pos, len - (pos - buf), &n);
410		iovp[i].iov_base = pos;
411		iovp[i].iov_len = n;
412		pos += n;
413	}
414	va_end(ap);
415
416	auio.uio_iov = iovp;
417	auio.uio_iovcnt = iovcnt;
418	auio.uio_segflg = UIO_SYSSPACE;
419
420	error = vfs_nmount(curthread, flags, &auio);
421	FREE(iovp, M_MOUNT);
422	FREE(buf, M_MOUNT);
423	return (error);
424}
425
426/*
427 * vfs_nmount(): actually attempt a filesystem mount.
428 */
429static int
430vfs_nmount(td, fsflags, fsoptions)
431	struct thread *td;
432	int fsflags;		/* Flags common to all filesystems. */
433	struct uio *fsoptions;	/* Options local to the filesystem. */
434{
435	linker_file_t lf;
436	struct vnode *vp;
437	struct mount *mp;
438	struct vfsconf *vfsp;
439	struct vfsoptlist *optlist;
440	char *fstype, *fspath;
441	int error, flag = 0, kern_flag = 0;
442	int fstypelen, fspathlen;
443	struct vattr va;
444	struct nameidata nd;
445
446	error = vfs_buildopts(fsoptions, &optlist);
447	if (error)
448		return (error);
449
450	/*
451	 * We need these two options before the others,
452	 * and they are mandatory for any filesystem.
453	 * Ensure they are NUL terminated as well.
454	 */
455	fstypelen = 0;
456	error = vfs_getopt(optlist, "fstype", (void **)&fstype, &fstypelen);
457	if (error || fstype[fstypelen - 1] != '\0') {
458		error = EINVAL;
459		goto bad;
460	}
461	fspathlen = 0;
462	error = vfs_getopt(optlist, "fspath", (void **)&fspath, &fspathlen);
463	if (error || fspath[fspathlen - 1] != '\0') {
464		error = EINVAL;
465		goto bad;
466	}
467
468	/*
469	 * Be ultra-paranoid about making sure the type and fspath
470	 * variables will fit in our mp buffers, including the
471	 * terminating NUL.
472	 */
473	if (fstypelen >= MFSNAMELEN - 1 || fspathlen >= MNAMELEN - 1) {
474		error = ENAMETOOLONG;
475		goto bad;
476	}
477
478	if (usermount == 0) {
479	       	error = suser(td);
480		if (error)
481			goto bad;
482	}
483	/*
484	 * Do not allow NFS export by non-root users.
485	 */
486	if (fsflags & MNT_EXPORTED) {
487		error = suser(td);
488		if (error)
489			goto bad;
490	}
491	/*
492	 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users.
493	 */
494	if (suser(td))
495		fsflags |= MNT_NOSUID | MNT_NODEV;
496	/*
497	 * Get vnode to be covered
498	 */
499	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath, td);
500	if ((error = namei(&nd)) != 0)
501		goto bad;
502	NDFREE(&nd, NDF_ONLY_PNBUF);
503	vp = nd.ni_vp;
504	if (fsflags & MNT_UPDATE) {
505		if ((vp->v_flag & VROOT) == 0) {
506			vput(vp);
507			error = EINVAL;
508			goto bad;
509		}
510		mp = vp->v_mount;
511		flag = mp->mnt_flag;
512		kern_flag = mp->mnt_kern_flag;
513		/*
514		 * We only allow the filesystem to be reloaded if it
515		 * is currently mounted read-only.
516		 */
517		if ((fsflags & MNT_RELOAD) &&
518		    ((mp->mnt_flag & MNT_RDONLY) == 0)) {
519			vput(vp);
520			error = EOPNOTSUPP;	/* Needs translation */
521			goto bad;
522		}
523		/*
524		 * Only root, or the user that did the original mount is
525		 * permitted to update it.
526		 */
527		if (mp->mnt_stat.f_owner != td->td_ucred->cr_uid) {
528			error = suser(td);
529			if (error) {
530				vput(vp);
531				goto bad;
532			}
533		}
534		if (vfs_busy(mp, LK_NOWAIT, 0, td)) {
535			vput(vp);
536			error = EBUSY;
537			goto bad;
538		}
539		mtx_lock(&vp->v_interlock);
540		if ((vp->v_flag & VMOUNT) != 0 || vp->v_mountedhere != NULL) {
541			mtx_unlock(&vp->v_interlock);
542			vfs_unbusy(mp, td);
543			vput(vp);
544			error = EBUSY;
545			goto bad;
546		}
547		vp->v_flag |= VMOUNT;
548		mtx_unlock(&vp->v_interlock);
549		mp->mnt_flag |= fsflags &
550		    (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT);
551		VOP_UNLOCK(vp, 0, td);
552		mp->mnt_optnew = optlist;
553		vfs_mergeopts(mp->mnt_optnew, mp->mnt_opt);
554		goto update;
555	}
556	/*
557	 * If the user is not root, ensure that they own the directory
558	 * onto which we are attempting to mount.
559	 */
560	error = VOP_GETATTR(vp, &va, td->td_ucred, td);
561	if (error) {
562		vput(vp);
563		goto bad;
564	}
565	if (va.va_uid != td->td_ucred->cr_uid) {
566		error = suser(td);
567		if (error) {
568			vput(vp);
569			goto bad;
570		}
571	}
572	if ((error = vinvalbuf(vp, V_SAVE, td->td_ucred, td, 0, 0)) != 0) {
573		vput(vp);
574		goto bad;
575	}
576	if (vp->v_type != VDIR) {
577		vput(vp);
578		error = ENOTDIR;
579		goto bad;
580	}
581	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
582		if (!strcmp(vfsp->vfc_name, fstype))
583			break;
584	if (vfsp == NULL) {
585		/* Only load modules for root (very important!). */
586		error = suser(td);
587		if (error) {
588			vput(vp);
589			goto bad;
590		}
591		error = securelevel_gt(td->td_ucred, 0);
592		if (error) {
593			vput(vp);
594			goto bad;
595		}
596		error = linker_load_file(fstype, &lf);
597		if (error || lf == NULL) {
598			vput(vp);
599			if (lf == NULL)
600				error = ENODEV;
601			goto bad;
602		}
603		lf->userrefs++;
604		/* Look up again to see if the VFS was loaded. */
605		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
606			if (!strcmp(vfsp->vfc_name, fstype))
607				break;
608		if (vfsp == NULL) {
609			lf->userrefs--;
610			linker_file_unload(lf);
611			vput(vp);
612			error = ENODEV;
613			goto bad;
614		}
615	}
616	mtx_lock(&vp->v_interlock);
617	if ((vp->v_flag & VMOUNT) != 0 ||
618	    vp->v_mountedhere != NULL) {
619		mtx_unlock(&vp->v_interlock);
620		vput(vp);
621		error = EBUSY;
622		goto bad;
623	}
624	vp->v_flag |= VMOUNT;
625	mtx_unlock(&vp->v_interlock);
626
627	/*
628	 * Allocate and initialize the filesystem.
629	 */
630	mp = malloc(sizeof(struct mount), M_MOUNT, M_WAITOK | M_ZERO);
631	TAILQ_INIT(&mp->mnt_nvnodelist);
632	TAILQ_INIT(&mp->mnt_reservedvnlist);
633	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE);
634	(void)vfs_busy(mp, LK_NOWAIT, 0, td);
635	mp->mnt_op = vfsp->vfc_vfsops;
636	mp->mnt_vfc = vfsp;
637	vfsp->vfc_refcount++;
638	mp->mnt_stat.f_type = vfsp->vfc_typenum;
639	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
640	strncpy(mp->mnt_stat.f_fstypename, fstype, MFSNAMELEN);
641	mp->mnt_vnodecovered = vp;
642	mp->mnt_stat.f_owner = td->td_ucred->cr_uid;
643	strncpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN);
644	mp->mnt_iosize_max = DFLTPHYS;
645	VOP_UNLOCK(vp, 0, td);
646	mp->mnt_optnew = optlist;
647
648update:
649	/*
650	 * Check if the fs implements the new VFS_NMOUNT()
651	 * function, since the new system call was used.
652	 */
653	if (mp->mnt_op->vfs_mount != NULL) {
654		printf("%s doesn't support the new mount syscall\n",
655		    mp->mnt_vfc->vfc_name);
656		mtx_lock(&vp->v_interlock);
657		vp->v_flag &= ~VMOUNT;
658		mtx_unlock(&vp->v_interlock);
659		if (mp->mnt_flag & MNT_UPDATE)
660			vfs_unbusy(mp, td);
661		else {
662			mp->mnt_vfc->vfc_refcount--;
663			vfs_unbusy(mp, td);
664			free(mp, M_MOUNT);
665		}
666		vrele(vp);
667		error = EOPNOTSUPP;
668		goto bad;
669	}
670
671	/*
672	 * Set the mount level flags.
673	 */
674	if (fsflags & MNT_RDONLY)
675		mp->mnt_flag |= MNT_RDONLY;
676	else if (mp->mnt_flag & MNT_RDONLY)
677		mp->mnt_kern_flag |= MNTK_WANTRDWR;
678	mp->mnt_flag &=~ MNT_UPDATEMASK;
679	mp->mnt_flag |= fsflags & (MNT_UPDATEMASK | MNT_FORCE);
680	/*
681	 * Mount the filesystem.
682	 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
683	 * get.  No freeing of cn_pnbuf.
684	 */
685	error = VFS_NMOUNT(mp, &nd, td);
686	if (!error) {
687		if (mp->mnt_opt != NULL)
688			vfs_freeopts(mp->mnt_opt);
689		mp->mnt_opt = mp->mnt_optnew;
690	}
691	/*
692	 * Prevent external consumers of mount
693	 * options to read mnt_optnew.
694	 */
695	mp->mnt_optnew = NULL;
696	if (mp->mnt_flag & MNT_UPDATE) {
697		if (mp->mnt_kern_flag & MNTK_WANTRDWR)
698			mp->mnt_flag &= ~MNT_RDONLY;
699		mp->mnt_flag &=~
700		    (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_SNAPSHOT);
701		mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
702		if (error) {
703			mp->mnt_flag = flag;
704			mp->mnt_kern_flag = kern_flag;
705		}
706		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
707			if (mp->mnt_syncer == NULL)
708				error = vfs_allocate_syncvnode(mp);
709		} else {
710			if (mp->mnt_syncer != NULL)
711				vput(mp->mnt_syncer);
712			mp->mnt_syncer = NULL;
713		}
714		vfs_unbusy(mp, td);
715		mtx_lock(&vp->v_interlock);
716		vp->v_flag &= ~VMOUNT;
717		mtx_unlock(&vp->v_interlock);
718		vrele(vp);
719		return (error);
720	}
721	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
722	/*
723	 * Put the new filesystem on the mount list after root.
724	 */
725	cache_purge(vp);
726	if (!error) {
727		struct vnode *newdp;
728
729		mtx_lock(&vp->v_interlock);
730		vp->v_flag &= ~VMOUNT;
731		vp->v_mountedhere = mp;
732		mtx_unlock(&vp->v_interlock);
733		mtx_lock(&mountlist_mtx);
734		TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
735		mtx_unlock(&mountlist_mtx);
736		if (VFS_ROOT(mp, &newdp))
737			panic("mount: lost mount");
738		checkdirs(vp, newdp);
739		vput(newdp);
740		VOP_UNLOCK(vp, 0, td);
741		if ((mp->mnt_flag & MNT_RDONLY) == 0)
742			error = vfs_allocate_syncvnode(mp);
743		vfs_unbusy(mp, td);
744		if ((error = VFS_START(mp, 0, td)) != 0) {
745			vrele(vp);
746			goto bad;
747		}
748	} else {
749		mtx_lock(&vp->v_interlock);
750		vp->v_flag &= ~VMOUNT;
751		mtx_unlock(&vp->v_interlock);
752		mp->mnt_vfc->vfc_refcount--;
753		vfs_unbusy(mp, td);
754		free(mp, M_MOUNT);
755		vput(vp);
756		goto bad;
757	}
758	return (0);
759bad:
760	vfs_freeopts(optlist);
761	return (error);
762}
763
764/*
765 * Old mount API.
766 */
767#ifndef _SYS_SYSPROTO_H_
768struct mount_args {
769	char	*type;
770	char	*path;
771	int	flags;
772	caddr_t	data;
773};
774#endif
775/* ARGSUSED */
776int
777mount(td, uap)
778	struct thread *td;
779	struct mount_args /* {
780		syscallarg(char *) type;
781		syscallarg(char *) path;
782		syscallarg(int) flags;
783		syscallarg(caddr_t) data;
784	} */ *uap;
785{
786	char *fstype;
787	char *fspath;
788	int error;
789
790	fstype = malloc(MFSNAMELEN, M_TEMP, M_WAITOK);
791	fspath = malloc(MNAMELEN, M_TEMP, M_WAITOK);
792
793	/*
794	 * vfs_mount() actually takes a kernel string for `type' and
795	 * `path' now, so extract them.
796	 */
797	error = copyinstr(SCARG(uap, type), fstype, MFSNAMELEN, NULL);
798	if (error)
799		goto finish;
800	error = copyinstr(SCARG(uap, path), fspath, MNAMELEN, NULL);
801	if (error)
802		goto finish;
803	error = vfs_mount(td, fstype, fspath, SCARG(uap, flags),
804	    SCARG(uap, data));
805finish:
806	free(fstype, M_TEMP);
807	free(fspath, M_TEMP);
808	return (error);
809}
810
811/*
812 * vfs_mount(): actually attempt a filesystem mount.
813 *
814 * This routine is designed to be a "generic" entry point for routines
815 * that wish to mount a filesystem. All parameters except `fsdata' are
816 * pointers into kernel space. `fsdata' is currently still a pointer
817 * into userspace.
818 */
819int
820vfs_mount(td, fstype, fspath, fsflags, fsdata)
821	struct thread *td;
822	const char *fstype;
823	char *fspath;
824	int fsflags;
825	void *fsdata;
826{
827	linker_file_t lf;
828	struct vnode *vp;
829	struct mount *mp;
830	struct vfsconf *vfsp;
831	int error, flag = 0, kern_flag = 0;
832	struct vattr va;
833	struct nameidata nd;
834
835	/*
836	 * Be ultra-paranoid about making sure the type and fspath
837	 * variables will fit in our mp buffers, including the
838	 * terminating NUL.
839	 */
840	if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN)
841		return (ENAMETOOLONG);
842
843	if (usermount == 0) {
844		error = suser(td);
845		if (error)
846			return (error);
847	}
848	/*
849	 * Do not allow NFS export by non-root users.
850	 */
851	if (fsflags & MNT_EXPORTED) {
852		error = suser(td);
853		if (error)
854			return (error);
855	}
856	/*
857	 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users.
858	 */
859	if (suser(td))
860		fsflags |= MNT_NOSUID | MNT_NODEV;
861	/*
862	 * Get vnode to be covered
863	 */
864	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath, td);
865	if ((error = namei(&nd)) != 0)
866		return (error);
867	NDFREE(&nd, NDF_ONLY_PNBUF);
868	vp = nd.ni_vp;
869	if (fsflags & MNT_UPDATE) {
870		if ((vp->v_flag & VROOT) == 0) {
871			vput(vp);
872			return (EINVAL);
873		}
874		mp = vp->v_mount;
875		flag = mp->mnt_flag;
876		kern_flag = mp->mnt_kern_flag;
877		/*
878		 * We only allow the filesystem to be reloaded if it
879		 * is currently mounted read-only.
880		 */
881		if ((fsflags & MNT_RELOAD) &&
882		    ((mp->mnt_flag & MNT_RDONLY) == 0)) {
883			vput(vp);
884			return (EOPNOTSUPP);	/* Needs translation */
885		}
886		/*
887		 * Only root, or the user that did the original mount is
888		 * permitted to update it.
889		 */
890		if (mp->mnt_stat.f_owner != td->td_ucred->cr_uid) {
891			error = suser(td);
892			if (error) {
893				vput(vp);
894				return (error);
895			}
896		}
897		if (vfs_busy(mp, LK_NOWAIT, 0, td)) {
898			vput(vp);
899			return (EBUSY);
900		}
901		mtx_lock(&vp->v_interlock);
902		if ((vp->v_flag & VMOUNT) != 0 || vp->v_mountedhere != NULL) {
903			mtx_unlock(&vp->v_interlock);
904			vfs_unbusy(mp, td);
905			vput(vp);
906			return (EBUSY);
907		}
908		vp->v_flag |= VMOUNT;
909		mtx_unlock(&vp->v_interlock);
910		mp->mnt_flag |= fsflags &
911		    (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT);
912		VOP_UNLOCK(vp, 0, td);
913		goto update;
914	}
915	/*
916	 * If the user is not root, ensure that they own the directory
917	 * onto which we are attempting to mount.
918	 */
919	error = VOP_GETATTR(vp, &va, td->td_ucred, td);
920	if (error) {
921		vput(vp);
922		return (error);
923	}
924	if (va.va_uid != td->td_ucred->cr_uid) {
925		error = suser(td);
926		if (error) {
927			vput(vp);
928			return (error);
929		}
930	}
931	if ((error = vinvalbuf(vp, V_SAVE, td->td_ucred, td, 0, 0)) != 0) {
932		vput(vp);
933		return (error);
934	}
935	if (vp->v_type != VDIR) {
936		vput(vp);
937		return (ENOTDIR);
938	}
939	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
940		if (!strcmp(vfsp->vfc_name, fstype))
941			break;
942	if (vfsp == NULL) {
943		/* Only load modules for root (very important!). */
944		error = suser(td);
945		if (error) {
946			vput(vp);
947			return (error);
948		}
949		error = securelevel_gt(td->td_ucred, 0);
950		if (error) {
951			vput(vp);
952			return (error);
953		}
954		error = linker_load_file(fstype, &lf);
955		if (error || lf == NULL) {
956			vput(vp);
957			if (lf == NULL)
958				error = ENODEV;
959			return (error);
960		}
961		lf->userrefs++;
962		/* Look up again to see if the VFS was loaded. */
963		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
964			if (!strcmp(vfsp->vfc_name, fstype))
965				break;
966		if (vfsp == NULL) {
967			lf->userrefs--;
968			linker_file_unload(lf);
969			vput(vp);
970			return (ENODEV);
971		}
972	}
973	mtx_lock(&vp->v_interlock);
974	if ((vp->v_flag & VMOUNT) != 0 ||
975	    vp->v_mountedhere != NULL) {
976		mtx_unlock(&vp->v_interlock);
977		vput(vp);
978		return (EBUSY);
979	}
980	vp->v_flag |= VMOUNT;
981	mtx_unlock(&vp->v_interlock);
982
983	/*
984	 * Allocate and initialize the filesystem.
985	 */
986	mp = malloc(sizeof(struct mount), M_MOUNT, M_WAITOK | M_ZERO);
987	TAILQ_INIT(&mp->mnt_nvnodelist);
988	TAILQ_INIT(&mp->mnt_reservedvnlist);
989	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE);
990	(void)vfs_busy(mp, LK_NOWAIT, 0, td);
991	mp->mnt_op = vfsp->vfc_vfsops;
992	mp->mnt_vfc = vfsp;
993	vfsp->vfc_refcount++;
994	mp->mnt_stat.f_type = vfsp->vfc_typenum;
995	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
996	strncpy(mp->mnt_stat.f_fstypename, fstype, MFSNAMELEN);
997	mp->mnt_vnodecovered = vp;
998	mp->mnt_stat.f_owner = td->td_ucred->cr_uid;
999	strncpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN);
1000	mp->mnt_iosize_max = DFLTPHYS;
1001	VOP_UNLOCK(vp, 0, td);
1002update:
1003	/*
1004	 * Check if the fs implements the old VFS_MOUNT()
1005	 * function, since the old system call was used.
1006	 */
1007	if (mp->mnt_op->vfs_mount == NULL) {
1008		printf("%s doesn't support the old mount syscall\n",
1009		    mp->mnt_vfc->vfc_name);
1010		mtx_lock(&vp->v_interlock);
1011		vp->v_flag &= ~VMOUNT;
1012		mtx_unlock(&vp->v_interlock);
1013		if (mp->mnt_flag & MNT_UPDATE)
1014			vfs_unbusy(mp, td);
1015		else {
1016			mp->mnt_vfc->vfc_refcount--;
1017			vfs_unbusy(mp, td);
1018			free(mp, M_MOUNT);
1019		}
1020		vrele(vp);
1021		return (EOPNOTSUPP);
1022	}
1023
1024	/*
1025	 * Set the mount level flags.
1026	 */
1027	if (fsflags & MNT_RDONLY)
1028		mp->mnt_flag |= MNT_RDONLY;
1029	else if (mp->mnt_flag & MNT_RDONLY)
1030		mp->mnt_kern_flag |= MNTK_WANTRDWR;
1031	mp->mnt_flag &=~ MNT_UPDATEMASK;
1032	mp->mnt_flag |= fsflags & (MNT_UPDATEMASK | MNT_FORCE);
1033	/*
1034	 * Mount the filesystem.
1035	 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
1036	 * get.  No freeing of cn_pnbuf.
1037	 */
1038	error = VFS_MOUNT(mp, fspath, fsdata, &nd, td);
1039	if (mp->mnt_flag & MNT_UPDATE) {
1040		if (mp->mnt_kern_flag & MNTK_WANTRDWR)
1041			mp->mnt_flag &= ~MNT_RDONLY;
1042		mp->mnt_flag &=~
1043		    (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_SNAPSHOT);
1044		mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
1045		if (error) {
1046			mp->mnt_flag = flag;
1047			mp->mnt_kern_flag = kern_flag;
1048		}
1049		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
1050			if (mp->mnt_syncer == NULL)
1051				error = vfs_allocate_syncvnode(mp);
1052		} else {
1053			if (mp->mnt_syncer != NULL)
1054				vput(mp->mnt_syncer);
1055			mp->mnt_syncer = NULL;
1056		}
1057		vfs_unbusy(mp, td);
1058		mtx_lock(&vp->v_interlock);
1059		vp->v_flag &= ~VMOUNT;
1060		mtx_unlock(&vp->v_interlock);
1061		vrele(vp);
1062		return (error);
1063	}
1064	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1065	/*
1066	 * Put the new filesystem on the mount list after root.
1067	 */
1068	cache_purge(vp);
1069	if (!error) {
1070		struct vnode *newdp;
1071
1072		mtx_lock(&vp->v_interlock);
1073		vp->v_flag &= ~VMOUNT;
1074		vp->v_mountedhere = mp;
1075		mtx_unlock(&vp->v_interlock);
1076		mtx_lock(&mountlist_mtx);
1077		TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
1078		mtx_unlock(&mountlist_mtx);
1079		if (VFS_ROOT(mp, &newdp))
1080			panic("mount: lost mount");
1081		checkdirs(vp, newdp);
1082		vput(newdp);
1083		VOP_UNLOCK(vp, 0, td);
1084		if ((mp->mnt_flag & MNT_RDONLY) == 0)
1085			error = vfs_allocate_syncvnode(mp);
1086		vfs_unbusy(mp, td);
1087		if ((error = VFS_START(mp, 0, td)) != 0)
1088			vrele(vp);
1089	} else {
1090		mtx_lock(&vp->v_interlock);
1091		vp->v_flag &= ~VMOUNT;
1092		mtx_unlock(&vp->v_interlock);
1093		mp->mnt_vfc->vfc_refcount--;
1094		vfs_unbusy(mp, td);
1095		free(mp, M_MOUNT);
1096		vput(vp);
1097	}
1098	return (error);
1099}
1100
1101/*
1102 * Scan all active processes to see if any of them have a current
1103 * or root directory of `olddp'. If so, replace them with the new
1104 * mount point.
1105 */
1106static void
1107checkdirs(olddp, newdp)
1108	struct vnode *olddp, *newdp;
1109{
1110	struct filedesc *fdp;
1111	struct proc *p;
1112	int nrele;
1113
1114	if (olddp->v_usecount == 1)
1115		return;
1116	sx_slock(&allproc_lock);
1117	LIST_FOREACH(p, &allproc, p_list) {
1118		PROC_LOCK(p);
1119		fdp = p->p_fd;
1120		if (fdp == NULL) {
1121			PROC_UNLOCK(p);
1122			continue;
1123		}
1124		nrele = 0;
1125		FILEDESC_LOCK(fdp);
1126		if (fdp->fd_cdir == olddp) {
1127			VREF(newdp);
1128			fdp->fd_cdir = newdp;
1129			nrele++;
1130		}
1131		if (fdp->fd_rdir == olddp) {
1132			VREF(newdp);
1133			fdp->fd_rdir = newdp;
1134			nrele++;
1135		}
1136		FILEDESC_UNLOCK(fdp);
1137		PROC_UNLOCK(p);
1138		while (nrele--)
1139			vrele(olddp);
1140	}
1141	sx_sunlock(&allproc_lock);
1142	if (rootvnode == olddp) {
1143		vrele(rootvnode);
1144		VREF(newdp);
1145		rootvnode = newdp;
1146	}
1147}
1148
1149/*
1150 * Unmount a filesystem.
1151 *
1152 * Note: unmount takes a path to the vnode mounted on as argument,
1153 * not special file (as before).
1154 */
1155#ifndef _SYS_SYSPROTO_H_
1156struct unmount_args {
1157	char	*path;
1158	int	flags;
1159};
1160#endif
1161/* ARGSUSED */
1162int
1163unmount(td, uap)
1164	struct thread *td;
1165	register struct unmount_args /* {
1166		syscallarg(char *) path;
1167		syscallarg(int) flags;
1168	} */ *uap;
1169{
1170	register struct vnode *vp;
1171	struct mount *mp;
1172	int error;
1173	struct nameidata nd;
1174
1175	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1176	    SCARG(uap, path), td);
1177	if ((error = namei(&nd)) != 0)
1178		return (error);
1179	vp = nd.ni_vp;
1180	NDFREE(&nd, NDF_ONLY_PNBUF);
1181	mp = vp->v_mount;
1182
1183	/*
1184	 * Only root, or the user that did the original mount is
1185	 * permitted to unmount this filesystem.
1186	 */
1187	if (mp->mnt_stat.f_owner != td->td_ucred->cr_uid) {
1188		error = suser(td);
1189		if (error) {
1190			vput(vp);
1191			return (error);
1192		}
1193	}
1194
1195	/*
1196	 * Don't allow unmounting the root filesystem.
1197	 */
1198	if (mp->mnt_flag & MNT_ROOTFS) {
1199		vput(vp);
1200		return (EINVAL);
1201	}
1202
1203	/*
1204	 * Must be the root of the filesystem
1205	 */
1206	if ((vp->v_flag & VROOT) == 0) {
1207		vput(vp);
1208		return (EINVAL);
1209	}
1210	vput(vp);
1211	return (dounmount(mp, SCARG(uap, flags), td));
1212}
1213
1214/*
1215 * Do the actual filesystem unmount.
1216 */
1217int
1218dounmount(mp, flags, td)
1219	struct mount *mp;
1220	int flags;
1221	struct thread *td;
1222{
1223	struct vnode *coveredvp, *fsrootvp;
1224	int error;
1225	int async_flag;
1226
1227	mtx_lock(&mountlist_mtx);
1228	if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
1229		mtx_unlock(&mountlist_mtx);
1230		return (EBUSY);
1231	}
1232	mp->mnt_kern_flag |= MNTK_UNMOUNT;
1233	/* Allow filesystems to detect that a forced unmount is in progress. */
1234	if (flags & MNT_FORCE)
1235		mp->mnt_kern_flag |= MNTK_UNMOUNTF;
1236	error = lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK |
1237	    ((flags & MNT_FORCE) ? 0 : LK_NOWAIT), &mountlist_mtx, td);
1238	if (error) {
1239		mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
1240		if (mp->mnt_kern_flag & MNTK_MWAIT)
1241			wakeup(mp);
1242		return (error);
1243	}
1244	vn_start_write(NULL, &mp, V_WAIT);
1245
1246	if (mp->mnt_flag & MNT_EXPUBLIC)
1247		vfs_setpublicfs(NULL, NULL, NULL);
1248
1249	vfs_msync(mp, MNT_WAIT);
1250	async_flag = mp->mnt_flag & MNT_ASYNC;
1251	mp->mnt_flag &=~ MNT_ASYNC;
1252	cache_purgevfs(mp);	/* remove cache entries for this file sys */
1253	if (mp->mnt_syncer != NULL)
1254		vput(mp->mnt_syncer);
1255	/* Move process cdir/rdir refs on fs root to underlying vnode. */
1256	if (VFS_ROOT(mp, &fsrootvp) == 0) {
1257		if (mp->mnt_vnodecovered != NULL)
1258			checkdirs(fsrootvp, mp->mnt_vnodecovered);
1259		if (fsrootvp == rootvnode) {
1260			vrele(rootvnode);
1261			rootvnode = NULL;
1262		}
1263		vput(fsrootvp);
1264	}
1265	if (((mp->mnt_flag & MNT_RDONLY) ||
1266	     (error = VFS_SYNC(mp, MNT_WAIT, td->td_ucred, td)) == 0) ||
1267	    (flags & MNT_FORCE)) {
1268		error = VFS_UNMOUNT(mp, flags, td);
1269	}
1270	vn_finished_write(mp);
1271	if (error) {
1272		/* Undo cdir/rdir and rootvnode changes made above. */
1273		if (VFS_ROOT(mp, &fsrootvp) == 0) {
1274			if (mp->mnt_vnodecovered != NULL)
1275				checkdirs(mp->mnt_vnodecovered, fsrootvp);
1276			if (rootvnode == NULL) {
1277				rootvnode = fsrootvp;
1278				vref(rootvnode);
1279			}
1280			vput(fsrootvp);
1281		}
1282		if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL)
1283			(void) vfs_allocate_syncvnode(mp);
1284		mtx_lock(&mountlist_mtx);
1285		mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
1286		mp->mnt_flag |= async_flag;
1287		lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK,
1288		    &mountlist_mtx, td);
1289		if (mp->mnt_kern_flag & MNTK_MWAIT)
1290			wakeup(mp);
1291		return (error);
1292	}
1293	mtx_lock(&mountlist_mtx);
1294	TAILQ_REMOVE(&mountlist, mp, mnt_list);
1295	if ((coveredvp = mp->mnt_vnodecovered) != NULL)
1296		coveredvp->v_mountedhere = NULL;
1297	mp->mnt_vfc->vfc_refcount--;
1298	if (!TAILQ_EMPTY(&mp->mnt_nvnodelist))
1299		panic("unmount: dangling vnode");
1300	lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_mtx, td);
1301	lockdestroy(&mp->mnt_lock);
1302	if (coveredvp != NULL)
1303		vrele(coveredvp);
1304	if (mp->mnt_kern_flag & MNTK_MWAIT)
1305		wakeup(mp);
1306	if (mp->mnt_op->vfs_mount == NULL)
1307		vfs_freeopts(mp->mnt_opt);
1308	free(mp, M_MOUNT);
1309	return (0);
1310}
1311
1312/*
1313 * Lookup a filesystem type, and if found allocate and initialize
1314 * a mount structure for it.
1315 *
1316 * Devname is usually updated by mount(8) after booting.
1317 */
1318int
1319vfs_rootmountalloc(fstypename, devname, mpp)
1320	char *fstypename;
1321	char *devname;
1322	struct mount **mpp;
1323{
1324	struct thread *td = curthread;	/* XXX */
1325	struct vfsconf *vfsp;
1326	struct mount *mp;
1327
1328	if (fstypename == NULL)
1329		return (ENODEV);
1330	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1331		if (!strcmp(vfsp->vfc_name, fstypename))
1332			break;
1333	if (vfsp == NULL)
1334		return (ENODEV);
1335	mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK | M_ZERO);
1336	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE);
1337	(void)vfs_busy(mp, LK_NOWAIT, 0, td);
1338	TAILQ_INIT(&mp->mnt_nvnodelist);
1339	TAILQ_INIT(&mp->mnt_reservedvnlist);
1340	mp->mnt_vfc = vfsp;
1341	mp->mnt_op = vfsp->vfc_vfsops;
1342	mp->mnt_flag = MNT_RDONLY;
1343	mp->mnt_vnodecovered = NULLVP;
1344	vfsp->vfc_refcount++;
1345	mp->mnt_iosize_max = DFLTPHYS;
1346	mp->mnt_stat.f_type = vfsp->vfc_typenum;
1347	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
1348	strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
1349	mp->mnt_stat.f_mntonname[0] = '/';
1350	mp->mnt_stat.f_mntonname[1] = 0;
1351	(void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
1352	*mpp = mp;
1353	return (0);
1354}
1355
1356/*
1357 * Find and mount the root filesystem
1358 */
1359void
1360vfs_mountroot(void)
1361{
1362	char		*cp;
1363	int		i, error;
1364
1365	/*
1366	 * The root filesystem information is compiled in, and we are
1367	 * booted with instructions to use it.
1368	 */
1369#ifdef ROOTDEVNAME
1370	if ((boothowto & RB_DFLTROOT) &&
1371	    !vfs_mountroot_try(ROOTDEVNAME))
1372		return;
1373#endif
1374	/*
1375	 * We are booted with instructions to prompt for the root filesystem,
1376	 * or to use the compiled-in default when it doesn't exist.
1377	 */
1378	if (boothowto & (RB_DFLTROOT | RB_ASKNAME)) {
1379		if (!vfs_mountroot_ask())
1380			return;
1381	}
1382
1383	/*
1384	 * We've been given the generic "use CDROM as root" flag.  This is
1385	 * necessary because one media may be used in many different
1386	 * devices, so we need to search for them.
1387	 */
1388	if (boothowto & RB_CDROM) {
1389		for (i = 0; cdrom_rootdevnames[i] != NULL; i++) {
1390			if (!vfs_mountroot_try(cdrom_rootdevnames[i]))
1391				return;
1392		}
1393	}
1394
1395	/*
1396	 * Try to use the value read by the loader from /etc/fstab, or
1397	 * supplied via some other means.  This is the preferred
1398	 * mechanism.
1399	 */
1400	if ((cp = getenv("vfs.root.mountfrom")) != NULL) {
1401		error = vfs_mountroot_try(cp);
1402		freeenv(cp);
1403		if (!error)
1404			return;
1405	}
1406
1407	/*
1408	 * Try values that may have been computed by the machine-dependant
1409	 * legacy code.
1410	 */
1411	if (!vfs_mountroot_try(rootdevnames[0]))
1412		return;
1413	if (!vfs_mountroot_try(rootdevnames[1]))
1414		return;
1415
1416	/*
1417	 * If we have a compiled-in default, and haven't already tried it, try
1418	 * it now.
1419	 */
1420#ifdef ROOTDEVNAME
1421	if (!(boothowto & RB_DFLTROOT))
1422		if (!vfs_mountroot_try(ROOTDEVNAME))
1423			return;
1424#endif
1425
1426	/*
1427	 * Everything so far has failed, prompt on the console if we haven't
1428	 * already tried that.
1429	 */
1430	if (!(boothowto & (RB_DFLTROOT | RB_ASKNAME)) && !vfs_mountroot_ask())
1431		return;
1432	panic("Root mount failed, startup aborted.");
1433}
1434
1435/*
1436 * Mount (mountfrom) as the root filesystem.
1437 */
1438static int
1439vfs_mountroot_try(char *mountfrom)
1440{
1441        struct mount	*mp;
1442	char		*vfsname, *path;
1443	int		error;
1444	char		patt[32];
1445	int		s;
1446
1447	vfsname = NULL;
1448	path    = NULL;
1449	mp      = NULL;
1450	error   = EINVAL;
1451
1452	if (mountfrom == NULL)
1453		return(error);		/* don't complain */
1454
1455	s = splcam();			/* Overkill, but annoying without it */
1456	printf("Mounting root from %s\n", mountfrom);
1457	splx(s);
1458
1459	/* parse vfs name and path */
1460	vfsname = malloc(MFSNAMELEN, M_MOUNT, M_WAITOK);
1461	path = malloc(MNAMELEN, M_MOUNT, M_WAITOK);
1462	vfsname[0] = path[0] = 0;
1463	sprintf(patt, "%%%d[a-z0-9]:%%%ds", MFSNAMELEN, MNAMELEN);
1464	if (sscanf(mountfrom, patt, vfsname, path) < 1)
1465		goto done;
1466
1467	/* allocate a root mount */
1468	error = vfs_rootmountalloc(vfsname, path[0] != 0 ? path : ROOTNAME,
1469				   &mp);
1470	if (error != 0) {
1471		printf("Can't allocate root mount for filesystem '%s': %d\n",
1472		       vfsname, error);
1473		goto done;
1474	}
1475	mp->mnt_flag |= MNT_ROOTFS;
1476
1477	/* do our best to set rootdev */
1478	if ((path[0] != 0) && setrootbyname(path))
1479		printf("setrootbyname failed\n");
1480
1481	/* If the root device is a type "memory disk", mount RW */
1482	if (rootdev != NODEV && devsw(rootdev) &&
1483	    (devsw(rootdev)->d_flags & D_MEMDISK))
1484		mp->mnt_flag &= ~MNT_RDONLY;
1485
1486	/*
1487	 * Set the mount path to be something useful, because the
1488	 * filesystem code isn't responsible now for initialising
1489	 * f_mntonname unless they want to override the default
1490	 * (which is `path'.)
1491	 */
1492	strncpy(mp->mnt_stat.f_mntonname, "/", MNAMELEN);
1493
1494	error = VFS_MOUNT(mp, NULL, NULL, NULL, curthread);
1495
1496done:
1497	if (vfsname != NULL)
1498		free(vfsname, M_MOUNT);
1499	if (path != NULL)
1500		free(path, M_MOUNT);
1501	if (error != 0) {
1502		if (mp != NULL) {
1503			vfs_unbusy(mp, curthread);
1504			free(mp, M_MOUNT);
1505		}
1506		printf("Root mount failed: %d\n", error);
1507	} else {
1508
1509		/* register with list of mounted filesystems */
1510		mtx_lock(&mountlist_mtx);
1511		TAILQ_INSERT_HEAD(&mountlist, mp, mnt_list);
1512		mtx_unlock(&mountlist_mtx);
1513
1514		/* sanity check system clock against root fs timestamp */
1515		inittodr(mp->mnt_time);
1516		vfs_unbusy(mp, curthread);
1517		error = VFS_START(mp, 0, curthread);
1518	}
1519	return(error);
1520}
1521
1522/*
1523 * Spin prompting on the console for a suitable root filesystem
1524 */
1525static int
1526vfs_mountroot_ask(void)
1527{
1528	char name[128];
1529	int i;
1530	dev_t dev;
1531
1532	for(;;) {
1533		printf("\nManual root filesystem specification:\n");
1534		printf("  <fstype>:<device>  Mount <device> using filesystem <fstype>\n");
1535#if defined(__i386__) || defined(__ia64__)
1536		printf("                       eg. ufs:da0s1a\n");
1537#else
1538		printf("                       eg. ufs:da0a\n");
1539#endif
1540		printf("  ?                  List valid disk boot devices\n");
1541		printf("  <empty line>       Abort manual input\n");
1542		printf("\nmountroot> ");
1543		gets(name);
1544		if (name[0] == 0)
1545			return(1);
1546		if (name[0] == '?') {
1547			printf("Possibly valid devices for 'ufs' root:\n");
1548			for (i = 0; i < NUMCDEVSW; i++) {
1549				dev = makedev(i, 0);
1550				if (devsw(dev) != NULL)
1551					printf(" \"%s\"", devsw(dev)->d_name);
1552			}
1553			printf("\n");
1554			continue;
1555		}
1556		if (!vfs_mountroot_try(name))
1557			return(0);
1558	}
1559}
1560
1561/*
1562 * Local helper function for vfs_mountroot_ask.
1563 */
1564static void
1565gets(char *cp)
1566{
1567	char *lp;
1568	int c;
1569
1570	lp = cp;
1571	for (;;) {
1572		printf("%c", c = cngetc() & 0177);
1573		switch (c) {
1574		case -1:
1575		case '\n':
1576		case '\r':
1577			*lp++ = '\0';
1578			return;
1579		case '\b':
1580		case '\177':
1581			if (lp > cp) {
1582				printf(" \b");
1583				lp--;
1584			}
1585			continue;
1586		case '#':
1587			lp--;
1588			if (lp < cp)
1589				lp = cp;
1590			continue;
1591		case '@':
1592		case 'u' & 037:
1593			lp = cp;
1594			printf("%c", '\n');
1595			continue;
1596		default:
1597			*lp++ = c;
1598		}
1599	}
1600}
1601
1602/*
1603 * Convert a given name to the dev_t of the disk-like device
1604 * it refers to.
1605 */
1606dev_t
1607getdiskbyname(char *name) {
1608	char *cp;
1609	dev_t dev;
1610
1611	cp = name;
1612	if (!bcmp(cp, "/dev/", 5))
1613		cp += 5;
1614
1615	dev = NODEV;
1616	EVENTHANDLER_INVOKE(dev_clone, cp, strlen(cp), &dev);
1617	return (dev);
1618}
1619
1620/*
1621 * Set rootdev to match (name), given that we expect it to
1622 * refer to a disk-like device.
1623 */
1624static int
1625setrootbyname(char *name)
1626{
1627	dev_t diskdev;
1628
1629	diskdev = getdiskbyname(name);
1630	if (diskdev != NODEV) {
1631		rootdev = diskdev;
1632		return (0);
1633	}
1634
1635	return (1);
1636}
1637
1638/* Show the dev_t for a disk specified by name */
1639#ifdef DDB
1640DB_SHOW_COMMAND(disk, db_getdiskbyname)
1641{
1642	dev_t dev;
1643
1644	if (modif[0] == '\0') {
1645		db_error("usage: show disk/devicename");
1646		return;
1647	}
1648	dev = getdiskbyname(modif);
1649	if (dev != NODEV)
1650		db_printf("dev_t = %p\n", dev);
1651	else
1652		db_printf("No disk device matched.\n");
1653}
1654#endif
1655
1656/*
1657 * Get a mount option by its name.
1658 *
1659 * Return 0 if the option was found, ENOENT otherwise.
1660 * If len is non-NULL it will be filled with the length
1661 * of the option. If buf is non-NULL, it will be filled
1662 * with the address of the option.
1663 */
1664int
1665vfs_getopt(opts, name, buf, len)
1666	struct vfsoptlist *opts;
1667	const char *name;
1668	void **buf;
1669	int *len;
1670{
1671	struct vfsopt *opt;
1672
1673	TAILQ_FOREACH(opt, opts, link) {
1674		if (strcmp(name, opt->name) == 0) {
1675			if (len != NULL)
1676				*len = opt->len;
1677			if (buf != NULL)
1678				*buf = opt->value;
1679			return (0);
1680		}
1681	}
1682	return (ENOENT);
1683}
1684
1685/*
1686 * Find and copy a mount option.
1687 *
1688 * The size of the buffer has to be specified
1689 * in len, if it is not the same length as the
1690 * mount option, EINVAL is returned.
1691 * Returns ENOENT if the option is not found.
1692 */
1693int
1694vfs_copyopt(opts, name, dest, len)
1695	struct vfsoptlist *opts;
1696	const char *name;
1697	void *dest;
1698	int len;
1699{
1700	struct vfsopt *opt;
1701
1702	TAILQ_FOREACH(opt, opts, link) {
1703		if (strcmp(name, opt->name) == 0) {
1704			if (len != opt->len)
1705				return (EINVAL);
1706			bcopy(opt->value, dest, opt->len);
1707			return (0);
1708		}
1709	}
1710	return (ENOENT);
1711}
1712