vfs_mount.c revision 101241
1/*
2 * Copyright (c) 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the University of
21 *	California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * Copyright (c) 1999 Michael Smith
39 * All rights reserved.
40 * Copyright (c) 1999 Poul-Henning Kamp
41 * All rights reserved.
42 *
43 * Redistribution and use in source and binary forms, with or without
44 * modification, are permitted provided that the following conditions
45 * are met:
46 * 1. Redistributions of source code must retain the above copyright
47 *    notice, this list of conditions and the following disclaimer.
48 * 2. Redistributions in binary form must reproduce the above copyright
49 *    notice, this list of conditions and the following disclaimer in the
50 *    documentation and/or other materials provided with the distribution.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 * $FreeBSD: head/sys/kern/vfs_mount.c 101241 2002-08-02 20:56:07Z mux $
65 */
66
67#include <sys/param.h>
68#include <sys/conf.h>
69#include <sys/cons.h>
70#include <sys/kernel.h>
71#include <sys/linker.h>
72#include <sys/mac.h>
73#include <sys/malloc.h>
74#include <sys/mount.h>
75#include <sys/mutex.h>
76#include <sys/namei.h>
77#include <sys/proc.h>
78#include <sys/reboot.h>
79#include <sys/sysproto.h>
80#include <sys/sx.h>
81#include <sys/sysctl.h>
82#include <sys/sysent.h>
83#include <sys/systm.h>
84#include <sys/vnode.h>
85
86#include <machine/stdarg.h>
87
88#include "opt_rootdevname.h"
89#include "opt_ddb.h"
90#include "opt_mac.h"
91
92#ifdef DDB
93#include <ddb/ddb.h>
94#endif
95
96#define ROOTNAME	"root_device"
97
98static void	checkdirs(struct vnode *olddp, struct vnode *newdp);
99static int	vfs_nmount(struct thread *td, int, struct uio *);
100static int	vfs_mountroot_try(char *mountfrom);
101static int	vfs_mountroot_ask(void);
102static void	gets(char *cp);
103
104static int	usermount = 0;	/* if 1, non-root can mount fs. */
105SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, "");
106
107MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount structure");
108
109/* List of mounted filesystems. */
110struct mntlist mountlist = TAILQ_HEAD_INITIALIZER(mountlist);
111
112/* For any iteration/modification of mountlist */
113struct mtx mountlist_mtx;
114
115/* For any iteration/modification of mnt_vnodelist */
116struct mtx mntvnode_mtx;
117
118/*
119 * The vnode of the system's root (/ in the filesystem, without chroot
120 * active.)
121 */
122struct vnode	*rootvnode;
123
124/*
125 * The root filesystem is detailed in the kernel environment variable
126 * vfs.root.mountfrom, which is expected to be in the general format
127 *
128 * <vfsname>:[<path>]
129 * vfsname   := the name of a VFS known to the kernel and capable
130 *              of being mounted as root
131 * path      := disk device name or other data used by the filesystem
132 *              to locate its physical store
133 */
134
135/*
136 * The root specifiers we will try if RB_CDROM is specified.
137 */
138static char *cdrom_rootdevnames[] = {
139	"cd9660:cd0a",
140	"cd9660:acd0a",
141	"cd9660:wcd0a",
142	NULL
143};
144
145/* legacy find-root code */
146char		*rootdevnames[2] = {NULL, NULL};
147static int	setrootbyname(char *name);
148dev_t		rootdev = NODEV;
149
150/* Remove one mount option. */
151static void
152vfs_freeopt(struct vfsoptlist *opts, struct vfsopt *opt)
153{
154
155	TAILQ_REMOVE(opts, opt, link);
156	free(opt->name, M_MOUNT);
157	if (opt->value != NULL)
158		free(opt->value, M_MOUNT);
159#ifdef INVARIANTS
160	else if (opt->len != 0)
161		panic("%s: mount option with NULL value but length != 0",
162		    __func__);
163#endif
164	free(opt, M_MOUNT);
165}
166
167/* Release all resources related to the mount options. */
168static void
169vfs_freeopts(struct vfsoptlist *opts)
170{
171	struct vfsopt *opt;
172
173	while (!TAILQ_EMPTY(opts)) {
174		opt = TAILQ_FIRST(opts);
175		vfs_freeopt(opts, opt);
176	}
177	free(opts, M_MOUNT);
178}
179
180/*
181 * If a mount option is specified several times,
182 * (with or without the "no" prefix) only keep
183 * the last occurence of it.
184 */
185static void
186vfs_sanitizeopts(struct vfsoptlist *opts)
187{
188	struct vfsopt *opt, *opt2, *tmp;
189	int noopt;
190
191	TAILQ_FOREACH_REVERSE(opt, opts, vfsoptlist, link) {
192		if (strncmp(opt->name, "no", 2) == 0)
193			noopt = 1;
194		else
195			noopt = 0;
196		opt2 = TAILQ_PREV(opt, vfsoptlist, link);
197		while (opt2 != NULL) {
198			if (strcmp(opt2->name, opt->name) == 0 ||
199			    (noopt && strcmp(opt->name + 2, opt2->name) == 0) ||
200			    (!noopt && strncmp(opt2->name, "no", 2) == 0 &&
201			    strcmp(opt2->name + 2, opt->name) == 0)) {
202				tmp = TAILQ_PREV(opt2, vfsoptlist, link);
203				vfs_freeopt(opts, opt2);
204				opt2 = tmp;
205			} else {
206				opt2 = TAILQ_PREV(opt2, vfsoptlist, link);
207			}
208		}
209	}
210}
211
212/*
213 * Build a linked list of mount options from a struct uio.
214 */
215static int
216vfs_buildopts(struct uio *auio, struct vfsoptlist **options)
217{
218	struct vfsoptlist *opts;
219	struct vfsopt *opt;
220	unsigned int i, iovcnt;
221	int error, namelen, optlen;
222
223	iovcnt = auio->uio_iovcnt;
224	opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK);
225	TAILQ_INIT(opts);
226	for (i = 0; i < iovcnt; i += 2) {
227		opt = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
228		namelen = auio->uio_iov[i].iov_len;
229		optlen = auio->uio_iov[i + 1].iov_len;
230		opt->name = malloc(namelen, M_MOUNT, M_WAITOK);
231		opt->value = NULL;
232		if (auio->uio_segflg == UIO_SYSSPACE) {
233			bcopy(auio->uio_iov[i].iov_base, opt->name, namelen);
234		} else {
235			error = copyin(auio->uio_iov[i].iov_base, opt->name,
236			    namelen);
237			if (error)
238				goto bad;
239		}
240		opt->len = optlen;
241		if (optlen != 0) {
242			opt->value = malloc(optlen, M_MOUNT, M_WAITOK);
243			if (auio->uio_segflg == UIO_SYSSPACE) {
244				bcopy(auio->uio_iov[i + 1].iov_base, opt->value,
245				    optlen);
246			} else {
247				error = copyin(auio->uio_iov[i + 1].iov_base,
248				    opt->value, optlen);
249				if (error)
250					goto bad;
251			}
252		}
253		TAILQ_INSERT_TAIL(opts, opt, link);
254	}
255	vfs_sanitizeopts(opts);
256	*options = opts;
257	return (0);
258bad:
259	vfs_freeopts(opts);
260	return (error);
261}
262
263/*
264 * Merge the old mount options with the new ones passed
265 * in the MNT_UPDATE case.
266 */
267static void
268vfs_mergeopts(struct vfsoptlist *toopts, struct vfsoptlist *opts)
269{
270	struct vfsopt *opt, *opt2, *new;
271
272	TAILQ_FOREACH(opt, opts, link) {
273		/*
274		 * Check that this option hasn't been redefined
275		 * nor cancelled with a "no" mount option.
276		 */
277		opt2 = TAILQ_FIRST(toopts);
278		while (opt2 != NULL) {
279			if (strcmp(opt2->name, opt->name) == 0)
280				goto next;
281			if (strncmp(opt2->name, "no", 2) == 0 &&
282			    strcmp(opt2->name + 2, opt->name) == 0) {
283				vfs_freeopt(toopts, opt2);
284				goto next;
285			}
286			opt2 = TAILQ_NEXT(opt2, link);
287		}
288		/* We want this option, duplicate it. */
289		new = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
290		new->name = malloc(strlen(opt->name) + 1, M_MOUNT, M_WAITOK);
291		strcpy(new->name, opt->name);
292		if (opt->len != 0) {
293			new->value = malloc(opt->len, M_MOUNT, M_WAITOK);
294			bcopy(opt->value, new->value, opt->len);
295		} else {
296			new->value = NULL;
297		}
298		new->len = opt->len;
299		TAILQ_INSERT_TAIL(toopts, new, link);
300next:
301		continue;
302	}
303}
304
305/*
306 * New mount API.
307 */
308int
309nmount(td, uap)
310	struct thread *td;
311	struct nmount_args /* {
312		syscallarg(struct iovec *) iovp;
313		syscallarg(unsigned int) iovcnt;
314		syscallarg(int) flags;
315	} */ *uap;
316{
317	struct uio auio;
318	struct iovec *iov, *needfree;
319	struct iovec aiov[UIO_SMALLIOV];
320	unsigned int i;
321	int error;
322	u_int iovlen, iovcnt;
323
324	iovcnt = SCARG(uap, iovcnt);
325	iovlen = iovcnt * sizeof (struct iovec);
326	/*
327	 * Check that we have an even number of iovec's
328	 * and that we have at least two options.
329	 */
330	if ((iovcnt & 1) || (iovcnt < 4) || (iovcnt > UIO_MAXIOV))
331		return (EINVAL);
332
333	if (iovcnt > UIO_SMALLIOV) {
334		MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
335		needfree = iov;
336	} else {
337		iov = aiov;
338		needfree = NULL;
339	}
340	auio.uio_iov = iov;
341	auio.uio_iovcnt = iovcnt;
342	auio.uio_segflg = UIO_USERSPACE;
343	if ((error = copyin(uap->iovp, iov, iovlen)))
344		goto finish;
345
346	for (i = 0; i < iovcnt; i++) {
347		if (iov->iov_len > MMAXOPTIONLEN) {
348			error = EINVAL;
349			goto finish;
350		}
351		iov++;
352	}
353	error = vfs_nmount(td, SCARG(uap, flags), &auio);
354finish:
355	if (needfree != NULL)
356		free(needfree, M_TEMP);
357	return (error);
358}
359
360int
361kernel_mount(iovp, iovcnt, flags)
362	struct iovec *iovp;
363	unsigned int iovcnt;
364	int flags;
365{
366	struct uio auio;
367	int error;
368
369	/*
370	 * Check that we have an even number of iovec's
371	 * and that we have at least two options.
372	 */
373	if ((iovcnt & 1) || (iovcnt < 4))
374		return (EINVAL);
375
376	auio.uio_iov = iovp;
377	auio.uio_iovcnt = iovcnt;
378	auio.uio_segflg = UIO_SYSSPACE;
379
380	error = vfs_nmount(curthread, flags, &auio);
381	return (error);
382}
383
384int
385kernel_vmount(int flags, ...)
386{
387	struct iovec *iovp;
388	struct uio auio;
389	va_list ap;
390	unsigned int iovcnt, iovlen, len;
391	const char *cp;
392	char *buf, *pos;
393	size_t n;
394	int error, i;
395
396	len = 0;
397	va_start(ap, flags);
398	for (iovcnt = 0; (cp = va_arg(ap, const char *)) != NULL; iovcnt++)
399		len += strlen(cp) + 1;
400	va_end(ap);
401
402	if (iovcnt < 4 || iovcnt & 1)
403		return (EINVAL);
404
405	iovlen = iovcnt * sizeof (struct iovec);
406	MALLOC(iovp, struct iovec *, iovlen, M_MOUNT, M_WAITOK);
407	MALLOC(buf, char *, len, M_MOUNT, M_WAITOK);
408	pos = buf;
409	va_start(ap, flags);
410	for (i = 0; i < iovcnt; i++) {
411		cp = va_arg(ap, const char *);
412		copystr(cp, pos, len - (pos - buf), &n);
413		iovp[i].iov_base = pos;
414		iovp[i].iov_len = n;
415		pos += n;
416	}
417	va_end(ap);
418
419	auio.uio_iov = iovp;
420	auio.uio_iovcnt = iovcnt;
421	auio.uio_segflg = UIO_SYSSPACE;
422
423	error = vfs_nmount(curthread, flags, &auio);
424	FREE(iovp, M_MOUNT);
425	FREE(buf, M_MOUNT);
426	return (error);
427}
428
429/*
430 * vfs_nmount(): actually attempt a filesystem mount.
431 */
432static int
433vfs_nmount(td, fsflags, fsoptions)
434	struct thread *td;
435	int fsflags;		/* Flags common to all filesystems. */
436	struct uio *fsoptions;	/* Options local to the filesystem. */
437{
438	linker_file_t lf;
439	struct vnode *vp;
440	struct mount *mp;
441	struct vfsconf *vfsp;
442	struct vfsoptlist *optlist;
443	char *fstype, *fspath;
444	int error, flag = 0, kern_flag = 0;
445	int fstypelen, fspathlen;
446	struct vattr va;
447	struct nameidata nd;
448
449	error = vfs_buildopts(fsoptions, &optlist);
450	if (error)
451		return (error);
452
453	/*
454	 * We need these two options before the others,
455	 * and they are mandatory for any filesystem.
456	 * Ensure they are NUL terminated as well.
457	 */
458	fstypelen = 0;
459	error = vfs_getopt(optlist, "fstype", (void **)&fstype, &fstypelen);
460	if (error || fstype[fstypelen - 1] != '\0') {
461		error = EINVAL;
462		goto bad;
463	}
464	fspathlen = 0;
465	error = vfs_getopt(optlist, "fspath", (void **)&fspath, &fspathlen);
466	if (error || fspath[fspathlen - 1] != '\0') {
467		error = EINVAL;
468		goto bad;
469	}
470
471	/*
472	 * Be ultra-paranoid about making sure the type and fspath
473	 * variables will fit in our mp buffers, including the
474	 * terminating NUL.
475	 */
476	if (fstypelen >= MFSNAMELEN - 1 || fspathlen >= MNAMELEN - 1) {
477		error = ENAMETOOLONG;
478		goto bad;
479	}
480
481	if (usermount == 0) {
482	       	error = suser(td);
483		if (error)
484			goto bad;
485	}
486	/*
487	 * Do not allow NFS export by non-root users.
488	 */
489	if (fsflags & MNT_EXPORTED) {
490		error = suser(td);
491		if (error)
492			goto bad;
493	}
494	/*
495	 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users.
496	 */
497	if (suser(td))
498		fsflags |= MNT_NOSUID | MNT_NODEV;
499	/*
500	 * Get vnode to be covered
501	 */
502	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath, td);
503	if ((error = namei(&nd)) != 0)
504		goto bad;
505	NDFREE(&nd, NDF_ONLY_PNBUF);
506	vp = nd.ni_vp;
507	if (fsflags & MNT_UPDATE) {
508		if ((vp->v_flag & VROOT) == 0) {
509			vput(vp);
510			error = EINVAL;
511			goto bad;
512		}
513		mp = vp->v_mount;
514		flag = mp->mnt_flag;
515		kern_flag = mp->mnt_kern_flag;
516		/*
517		 * We only allow the filesystem to be reloaded if it
518		 * is currently mounted read-only.
519		 */
520		if ((fsflags & MNT_RELOAD) &&
521		    ((mp->mnt_flag & MNT_RDONLY) == 0)) {
522			vput(vp);
523			error = EOPNOTSUPP;	/* Needs translation */
524			goto bad;
525		}
526		/*
527		 * Only root, or the user that did the original mount is
528		 * permitted to update it.
529		 */
530		if (mp->mnt_stat.f_owner != td->td_ucred->cr_uid) {
531			error = suser(td);
532			if (error) {
533				vput(vp);
534				goto bad;
535			}
536		}
537		if (vfs_busy(mp, LK_NOWAIT, 0, td)) {
538			vput(vp);
539			error = EBUSY;
540			goto bad;
541		}
542		mtx_lock(&vp->v_interlock);
543		if ((vp->v_flag & VMOUNT) != 0 || vp->v_mountedhere != NULL) {
544			mtx_unlock(&vp->v_interlock);
545			vfs_unbusy(mp, td);
546			vput(vp);
547			error = EBUSY;
548			goto bad;
549		}
550		vp->v_flag |= VMOUNT;
551		mtx_unlock(&vp->v_interlock);
552		mp->mnt_flag |= fsflags &
553		    (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT);
554		VOP_UNLOCK(vp, 0, td);
555		mp->mnt_optnew = optlist;
556		vfs_mergeopts(mp->mnt_optnew, mp->mnt_opt);
557		goto update;
558	}
559	/*
560	 * If the user is not root, ensure that they own the directory
561	 * onto which we are attempting to mount.
562	 */
563	error = VOP_GETATTR(vp, &va, td->td_ucred, td);
564	if (error) {
565		vput(vp);
566		goto bad;
567	}
568	if (va.va_uid != td->td_ucred->cr_uid) {
569		error = suser(td);
570		if (error) {
571			vput(vp);
572			goto bad;
573		}
574	}
575	if ((error = vinvalbuf(vp, V_SAVE, td->td_ucred, td, 0, 0)) != 0) {
576		vput(vp);
577		goto bad;
578	}
579	if (vp->v_type != VDIR) {
580		vput(vp);
581		error = ENOTDIR;
582		goto bad;
583	}
584	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
585		if (!strcmp(vfsp->vfc_name, fstype))
586			break;
587	if (vfsp == NULL) {
588		/* Only load modules for root (very important!). */
589		error = suser(td);
590		if (error) {
591			vput(vp);
592			goto bad;
593		}
594		error = securelevel_gt(td->td_ucred, 0);
595		if (error) {
596			vput(vp);
597			goto bad;
598		}
599		error = linker_load_module(NULL, fstype, NULL, NULL, &lf);
600		if (error || lf == NULL) {
601			vput(vp);
602			if (lf == NULL)
603				error = ENODEV;
604			goto bad;
605		}
606		lf->userrefs++;
607		/* Look up again to see if the VFS was loaded. */
608		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
609			if (!strcmp(vfsp->vfc_name, fstype))
610				break;
611		if (vfsp == NULL) {
612			lf->userrefs--;
613			linker_file_unload(lf);
614			vput(vp);
615			error = ENODEV;
616			goto bad;
617		}
618	}
619	mtx_lock(&vp->v_interlock);
620	if ((vp->v_flag & VMOUNT) != 0 ||
621	    vp->v_mountedhere != NULL) {
622		mtx_unlock(&vp->v_interlock);
623		vput(vp);
624		error = EBUSY;
625		goto bad;
626	}
627	vp->v_flag |= VMOUNT;
628	mtx_unlock(&vp->v_interlock);
629
630	/*
631	 * Allocate and initialize the filesystem.
632	 */
633	mp = malloc(sizeof(struct mount), M_MOUNT, M_WAITOK | M_ZERO);
634	TAILQ_INIT(&mp->mnt_nvnodelist);
635	TAILQ_INIT(&mp->mnt_reservedvnlist);
636	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE);
637	(void)vfs_busy(mp, LK_NOWAIT, 0, td);
638	mp->mnt_op = vfsp->vfc_vfsops;
639	mp->mnt_vfc = vfsp;
640	vfsp->vfc_refcount++;
641	mp->mnt_stat.f_type = vfsp->vfc_typenum;
642	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
643	strncpy(mp->mnt_stat.f_fstypename, fstype, MFSNAMELEN);
644	mp->mnt_vnodecovered = vp;
645	mp->mnt_stat.f_owner = td->td_ucred->cr_uid;
646	strncpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN);
647	mp->mnt_iosize_max = DFLTPHYS;
648#ifdef MAC
649	mac_init_mount(mp);
650	mac_create_mount(td->td_ucred, mp);
651#endif
652	VOP_UNLOCK(vp, 0, td);
653	mp->mnt_optnew = optlist;	/* XXXMAC: should this be above? */
654
655update:
656	/*
657	 * Check if the fs implements the new VFS_NMOUNT()
658	 * function, since the new system call was used.
659	 */
660	if (mp->mnt_op->vfs_mount != NULL) {
661		printf("%s doesn't support the new mount syscall\n",
662		    mp->mnt_vfc->vfc_name);
663		mtx_lock(&vp->v_interlock);
664		vp->v_flag &= ~VMOUNT;
665		mtx_unlock(&vp->v_interlock);
666		if (mp->mnt_flag & MNT_UPDATE)
667			vfs_unbusy(mp, td);
668		else {
669			mp->mnt_vfc->vfc_refcount--;
670			vfs_unbusy(mp, td);
671#ifdef MAC
672			mac_destroy_mount(mp);
673#endif
674			free(mp, M_MOUNT);
675		}
676		vrele(vp);
677		error = EOPNOTSUPP;
678		goto bad;
679	}
680
681	/*
682	 * Set the mount level flags.
683	 */
684	if (fsflags & MNT_RDONLY)
685		mp->mnt_flag |= MNT_RDONLY;
686	else if (mp->mnt_flag & MNT_RDONLY)
687		mp->mnt_kern_flag |= MNTK_WANTRDWR;
688	mp->mnt_flag &=~ MNT_UPDATEMASK;
689	mp->mnt_flag |= fsflags & (MNT_UPDATEMASK | MNT_FORCE);
690	/*
691	 * Mount the filesystem.
692	 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
693	 * get.  No freeing of cn_pnbuf.
694	 */
695	error = VFS_NMOUNT(mp, &nd, td);
696	if (!error) {
697		if (mp->mnt_opt != NULL)
698			vfs_freeopts(mp->mnt_opt);
699		mp->mnt_opt = mp->mnt_optnew;
700	}
701	/*
702	 * Prevent external consumers of mount
703	 * options to read mnt_optnew.
704	 */
705	mp->mnt_optnew = NULL;
706	if (mp->mnt_flag & MNT_UPDATE) {
707		if (mp->mnt_kern_flag & MNTK_WANTRDWR)
708			mp->mnt_flag &= ~MNT_RDONLY;
709		mp->mnt_flag &=~
710		    (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_SNAPSHOT);
711		mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
712		if (error) {
713			mp->mnt_flag = flag;
714			mp->mnt_kern_flag = kern_flag;
715		}
716		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
717			if (mp->mnt_syncer == NULL)
718				error = vfs_allocate_syncvnode(mp);
719		} else {
720			if (mp->mnt_syncer != NULL)
721				vrele(mp->mnt_syncer);
722			mp->mnt_syncer = NULL;
723		}
724		vfs_unbusy(mp, td);
725		mtx_lock(&vp->v_interlock);
726		vp->v_flag &= ~VMOUNT;
727		mtx_unlock(&vp->v_interlock);
728		vrele(vp);
729		return (error);
730	}
731	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
732	/*
733	 * Put the new filesystem on the mount list after root.
734	 */
735	cache_purge(vp);
736	if (!error) {
737		struct vnode *newdp;
738
739		mtx_lock(&vp->v_interlock);
740		vp->v_flag &= ~VMOUNT;
741		vp->v_mountedhere = mp;
742		mtx_unlock(&vp->v_interlock);
743		mtx_lock(&mountlist_mtx);
744		TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
745		mtx_unlock(&mountlist_mtx);
746		if (VFS_ROOT(mp, &newdp))
747			panic("mount: lost mount");
748		checkdirs(vp, newdp);
749		vput(newdp);
750		VOP_UNLOCK(vp, 0, td);
751		if ((mp->mnt_flag & MNT_RDONLY) == 0)
752			error = vfs_allocate_syncvnode(mp);
753		vfs_unbusy(mp, td);
754		if ((error = VFS_START(mp, 0, td)) != 0) {
755			vrele(vp);
756			goto bad;
757		}
758	} else {
759		mtx_lock(&vp->v_interlock);
760		vp->v_flag &= ~VMOUNT;
761		mtx_unlock(&vp->v_interlock);
762		mp->mnt_vfc->vfc_refcount--;
763		vfs_unbusy(mp, td);
764#ifdef MAC
765		mac_destroy_mount(mp);
766#endif
767		free(mp, M_MOUNT);
768		vput(vp);
769		goto bad;
770	}
771	return (0);
772bad:
773	vfs_freeopts(optlist);
774	return (error);
775}
776
777/*
778 * Old mount API.
779 */
780#ifndef _SYS_SYSPROTO_H_
781struct mount_args {
782	char	*type;
783	char	*path;
784	int	flags;
785	caddr_t	data;
786};
787#endif
788/* ARGSUSED */
789int
790mount(td, uap)
791	struct thread *td;
792	struct mount_args /* {
793		syscallarg(char *) type;
794		syscallarg(char *) path;
795		syscallarg(int) flags;
796		syscallarg(caddr_t) data;
797	} */ *uap;
798{
799	char *fstype;
800	char *fspath;
801	int error;
802
803	fstype = malloc(MFSNAMELEN, M_TEMP, M_WAITOK);
804	fspath = malloc(MNAMELEN, M_TEMP, M_WAITOK);
805
806	/*
807	 * vfs_mount() actually takes a kernel string for `type' and
808	 * `path' now, so extract them.
809	 */
810	error = copyinstr(SCARG(uap, type), fstype, MFSNAMELEN, NULL);
811	if (error)
812		goto finish;
813	error = copyinstr(SCARG(uap, path), fspath, MNAMELEN, NULL);
814	if (error)
815		goto finish;
816	error = vfs_mount(td, fstype, fspath, SCARG(uap, flags),
817	    SCARG(uap, data));
818finish:
819	free(fstype, M_TEMP);
820	free(fspath, M_TEMP);
821	return (error);
822}
823
824/*
825 * vfs_mount(): actually attempt a filesystem mount.
826 *
827 * This routine is designed to be a "generic" entry point for routines
828 * that wish to mount a filesystem. All parameters except `fsdata' are
829 * pointers into kernel space. `fsdata' is currently still a pointer
830 * into userspace.
831 */
832int
833vfs_mount(td, fstype, fspath, fsflags, fsdata)
834	struct thread *td;
835	const char *fstype;
836	char *fspath;
837	int fsflags;
838	void *fsdata;
839{
840	linker_file_t lf;
841	struct vnode *vp;
842	struct mount *mp;
843	struct vfsconf *vfsp;
844	int error, flag = 0, kern_flag = 0;
845	struct vattr va;
846	struct nameidata nd;
847
848	/*
849	 * Be ultra-paranoid about making sure the type and fspath
850	 * variables will fit in our mp buffers, including the
851	 * terminating NUL.
852	 */
853	if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN)
854		return (ENAMETOOLONG);
855
856	if (usermount == 0) {
857		error = suser(td);
858		if (error)
859			return (error);
860	}
861	/*
862	 * Do not allow NFS export by non-root users.
863	 */
864	if (fsflags & MNT_EXPORTED) {
865		error = suser(td);
866		if (error)
867			return (error);
868	}
869	/*
870	 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users.
871	 */
872	if (suser(td))
873		fsflags |= MNT_NOSUID | MNT_NODEV;
874	/*
875	 * Get vnode to be covered
876	 */
877	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath, td);
878	if ((error = namei(&nd)) != 0)
879		return (error);
880	NDFREE(&nd, NDF_ONLY_PNBUF);
881	vp = nd.ni_vp;
882	if (fsflags & MNT_UPDATE) {
883		if ((vp->v_flag & VROOT) == 0) {
884			vput(vp);
885			return (EINVAL);
886		}
887		mp = vp->v_mount;
888		flag = mp->mnt_flag;
889		kern_flag = mp->mnt_kern_flag;
890		/*
891		 * We only allow the filesystem to be reloaded if it
892		 * is currently mounted read-only.
893		 */
894		if ((fsflags & MNT_RELOAD) &&
895		    ((mp->mnt_flag & MNT_RDONLY) == 0)) {
896			vput(vp);
897			return (EOPNOTSUPP);	/* Needs translation */
898		}
899		/*
900		 * Only root, or the user that did the original mount is
901		 * permitted to update it.
902		 */
903		if (mp->mnt_stat.f_owner != td->td_ucred->cr_uid) {
904			error = suser(td);
905			if (error) {
906				vput(vp);
907				return (error);
908			}
909		}
910		if (vfs_busy(mp, LK_NOWAIT, 0, td)) {
911			vput(vp);
912			return (EBUSY);
913		}
914		mtx_lock(&vp->v_interlock);
915		if ((vp->v_flag & VMOUNT) != 0 || vp->v_mountedhere != NULL) {
916			mtx_unlock(&vp->v_interlock);
917			vfs_unbusy(mp, td);
918			vput(vp);
919			return (EBUSY);
920		}
921		vp->v_flag |= VMOUNT;
922		mtx_unlock(&vp->v_interlock);
923		mp->mnt_flag |= fsflags &
924		    (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT);
925		VOP_UNLOCK(vp, 0, td);
926		goto update;
927	}
928	/*
929	 * If the user is not root, ensure that they own the directory
930	 * onto which we are attempting to mount.
931	 */
932	error = VOP_GETATTR(vp, &va, td->td_ucred, td);
933	if (error) {
934		vput(vp);
935		return (error);
936	}
937	if (va.va_uid != td->td_ucred->cr_uid) {
938		error = suser(td);
939		if (error) {
940			vput(vp);
941			return (error);
942		}
943	}
944	if ((error = vinvalbuf(vp, V_SAVE, td->td_ucred, td, 0, 0)) != 0) {
945		vput(vp);
946		return (error);
947	}
948	if (vp->v_type != VDIR) {
949		vput(vp);
950		return (ENOTDIR);
951	}
952	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
953		if (!strcmp(vfsp->vfc_name, fstype))
954			break;
955	if (vfsp == NULL) {
956		/* Only load modules for root (very important!). */
957		error = suser(td);
958		if (error) {
959			vput(vp);
960			return (error);
961		}
962		error = securelevel_gt(td->td_ucred, 0);
963		if (error) {
964			vput(vp);
965			return (error);
966		}
967		error = linker_load_module(NULL, fstype, NULL, NULL, &lf);
968		if (error || lf == NULL) {
969			vput(vp);
970			if (lf == NULL)
971				error = ENODEV;
972			return (error);
973		}
974		lf->userrefs++;
975		/* Look up again to see if the VFS was loaded. */
976		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
977			if (!strcmp(vfsp->vfc_name, fstype))
978				break;
979		if (vfsp == NULL) {
980			lf->userrefs--;
981			linker_file_unload(lf);
982			vput(vp);
983			return (ENODEV);
984		}
985	}
986	mtx_lock(&vp->v_interlock);
987	if ((vp->v_flag & VMOUNT) != 0 ||
988	    vp->v_mountedhere != NULL) {
989		mtx_unlock(&vp->v_interlock);
990		vput(vp);
991		return (EBUSY);
992	}
993	vp->v_flag |= VMOUNT;
994	mtx_unlock(&vp->v_interlock);
995
996	/*
997	 * Allocate and initialize the filesystem.
998	 */
999	mp = malloc(sizeof(struct mount), M_MOUNT, M_WAITOK | M_ZERO);
1000	TAILQ_INIT(&mp->mnt_nvnodelist);
1001	TAILQ_INIT(&mp->mnt_reservedvnlist);
1002	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE);
1003	(void)vfs_busy(mp, LK_NOWAIT, 0, td);
1004	mp->mnt_op = vfsp->vfc_vfsops;
1005	mp->mnt_vfc = vfsp;
1006	vfsp->vfc_refcount++;
1007	mp->mnt_stat.f_type = vfsp->vfc_typenum;
1008	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
1009	strncpy(mp->mnt_stat.f_fstypename, fstype, MFSNAMELEN);
1010	mp->mnt_vnodecovered = vp;
1011	mp->mnt_stat.f_owner = td->td_ucred->cr_uid;
1012	strncpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN);
1013	mp->mnt_iosize_max = DFLTPHYS;
1014#ifdef MAC
1015	mac_init_mount(mp);
1016	mac_create_mount(td->td_ucred, mp);
1017#endif
1018	VOP_UNLOCK(vp, 0, td);
1019update:
1020	/*
1021	 * Check if the fs implements the old VFS_MOUNT()
1022	 * function, since the old system call was used.
1023	 */
1024	if (mp->mnt_op->vfs_mount == NULL) {
1025		printf("%s doesn't support the old mount syscall\n",
1026		    mp->mnt_vfc->vfc_name);
1027		mtx_lock(&vp->v_interlock);
1028		vp->v_flag &= ~VMOUNT;
1029		mtx_unlock(&vp->v_interlock);
1030		if (mp->mnt_flag & MNT_UPDATE)
1031			vfs_unbusy(mp, td);
1032		else {
1033			mp->mnt_vfc->vfc_refcount--;
1034			vfs_unbusy(mp, td);
1035#ifdef MAC
1036			mac_destroy_mount(mp);
1037#endif
1038			free(mp, M_MOUNT);
1039		}
1040		vrele(vp);
1041		return (EOPNOTSUPP);
1042	}
1043
1044	/*
1045	 * Set the mount level flags.
1046	 */
1047	if (fsflags & MNT_RDONLY)
1048		mp->mnt_flag |= MNT_RDONLY;
1049	else if (mp->mnt_flag & MNT_RDONLY)
1050		mp->mnt_kern_flag |= MNTK_WANTRDWR;
1051	mp->mnt_flag &=~ MNT_UPDATEMASK;
1052	mp->mnt_flag |= fsflags & (MNT_UPDATEMASK | MNT_FORCE);
1053	/*
1054	 * Mount the filesystem.
1055	 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
1056	 * get.  No freeing of cn_pnbuf.
1057	 */
1058	error = VFS_MOUNT(mp, fspath, fsdata, &nd, td);
1059	if (mp->mnt_flag & MNT_UPDATE) {
1060		if (mp->mnt_kern_flag & MNTK_WANTRDWR)
1061			mp->mnt_flag &= ~MNT_RDONLY;
1062		mp->mnt_flag &=~
1063		    (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_SNAPSHOT);
1064		mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
1065		if (error) {
1066			mp->mnt_flag = flag;
1067			mp->mnt_kern_flag = kern_flag;
1068		}
1069		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
1070			if (mp->mnt_syncer == NULL)
1071				error = vfs_allocate_syncvnode(mp);
1072		} else {
1073			if (mp->mnt_syncer != NULL)
1074				vrele(mp->mnt_syncer);
1075			mp->mnt_syncer = NULL;
1076		}
1077		vfs_unbusy(mp, td);
1078		mtx_lock(&vp->v_interlock);
1079		vp->v_flag &= ~VMOUNT;
1080		mtx_unlock(&vp->v_interlock);
1081		vrele(vp);
1082		return (error);
1083	}
1084	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1085	/*
1086	 * Put the new filesystem on the mount list after root.
1087	 */
1088	cache_purge(vp);
1089	if (!error) {
1090		struct vnode *newdp;
1091
1092		mtx_lock(&vp->v_interlock);
1093		vp->v_flag &= ~VMOUNT;
1094		vp->v_mountedhere = mp;
1095		mtx_unlock(&vp->v_interlock);
1096		mtx_lock(&mountlist_mtx);
1097		TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
1098		mtx_unlock(&mountlist_mtx);
1099		if (VFS_ROOT(mp, &newdp))
1100			panic("mount: lost mount");
1101		checkdirs(vp, newdp);
1102		vput(newdp);
1103		VOP_UNLOCK(vp, 0, td);
1104		if ((mp->mnt_flag & MNT_RDONLY) == 0)
1105			error = vfs_allocate_syncvnode(mp);
1106		vfs_unbusy(mp, td);
1107		if ((error = VFS_START(mp, 0, td)) != 0)
1108			vrele(vp);
1109	} else {
1110		mtx_lock(&vp->v_interlock);
1111		vp->v_flag &= ~VMOUNT;
1112		mtx_unlock(&vp->v_interlock);
1113		mp->mnt_vfc->vfc_refcount--;
1114		vfs_unbusy(mp, td);
1115#ifdef MAC
1116		mac_destroy_mount(mp);
1117#endif
1118		free(mp, M_MOUNT);
1119		vput(vp);
1120	}
1121	return (error);
1122}
1123
1124/*
1125 * Scan all active processes to see if any of them have a current
1126 * or root directory of `olddp'. If so, replace them with the new
1127 * mount point.
1128 */
1129static void
1130checkdirs(olddp, newdp)
1131	struct vnode *olddp, *newdp;
1132{
1133	struct filedesc *fdp;
1134	struct proc *p;
1135	int nrele;
1136
1137	if (olddp->v_usecount == 1)
1138		return;
1139	sx_slock(&allproc_lock);
1140	LIST_FOREACH(p, &allproc, p_list) {
1141		PROC_LOCK(p);
1142		fdp = p->p_fd;
1143		if (fdp == NULL) {
1144			PROC_UNLOCK(p);
1145			continue;
1146		}
1147		nrele = 0;
1148		FILEDESC_LOCK(fdp);
1149		if (fdp->fd_cdir == olddp) {
1150			VREF(newdp);
1151			fdp->fd_cdir = newdp;
1152			nrele++;
1153		}
1154		if (fdp->fd_rdir == olddp) {
1155			VREF(newdp);
1156			fdp->fd_rdir = newdp;
1157			nrele++;
1158		}
1159		FILEDESC_UNLOCK(fdp);
1160		PROC_UNLOCK(p);
1161		while (nrele--)
1162			vrele(olddp);
1163	}
1164	sx_sunlock(&allproc_lock);
1165	if (rootvnode == olddp) {
1166		vrele(rootvnode);
1167		VREF(newdp);
1168		rootvnode = newdp;
1169	}
1170}
1171
1172/*
1173 * Unmount a filesystem.
1174 *
1175 * Note: unmount takes a path to the vnode mounted on as argument,
1176 * not special file (as before).
1177 */
1178#ifndef _SYS_SYSPROTO_H_
1179struct unmount_args {
1180	char	*path;
1181	int	flags;
1182};
1183#endif
1184/* ARGSUSED */
1185int
1186unmount(td, uap)
1187	struct thread *td;
1188	register struct unmount_args /* {
1189		syscallarg(char *) path;
1190		syscallarg(int) flags;
1191	} */ *uap;
1192{
1193	register struct vnode *vp;
1194	struct mount *mp;
1195	int error;
1196	struct nameidata nd;
1197
1198	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1199	    SCARG(uap, path), td);
1200	if ((error = namei(&nd)) != 0)
1201		return (error);
1202	vp = nd.ni_vp;
1203	NDFREE(&nd, NDF_ONLY_PNBUF);
1204	mp = vp->v_mount;
1205
1206	/*
1207	 * Only root, or the user that did the original mount is
1208	 * permitted to unmount this filesystem.
1209	 */
1210	if (mp->mnt_stat.f_owner != td->td_ucred->cr_uid) {
1211		error = suser(td);
1212		if (error) {
1213			vput(vp);
1214			return (error);
1215		}
1216	}
1217
1218	/*
1219	 * Don't allow unmounting the root filesystem.
1220	 */
1221	if (mp->mnt_flag & MNT_ROOTFS) {
1222		vput(vp);
1223		return (EINVAL);
1224	}
1225
1226	/*
1227	 * Must be the root of the filesystem
1228	 */
1229	if ((vp->v_flag & VROOT) == 0) {
1230		vput(vp);
1231		return (EINVAL);
1232	}
1233	vput(vp);
1234	return (dounmount(mp, SCARG(uap, flags), td));
1235}
1236
1237/*
1238 * Do the actual filesystem unmount.
1239 */
1240int
1241dounmount(mp, flags, td)
1242	struct mount *mp;
1243	int flags;
1244	struct thread *td;
1245{
1246	struct vnode *coveredvp, *fsrootvp;
1247	int error;
1248	int async_flag;
1249
1250	mtx_lock(&mountlist_mtx);
1251	if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
1252		mtx_unlock(&mountlist_mtx);
1253		return (EBUSY);
1254	}
1255	mp->mnt_kern_flag |= MNTK_UNMOUNT;
1256	/* Allow filesystems to detect that a forced unmount is in progress. */
1257	if (flags & MNT_FORCE)
1258		mp->mnt_kern_flag |= MNTK_UNMOUNTF;
1259	error = lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK |
1260	    ((flags & MNT_FORCE) ? 0 : LK_NOWAIT), &mountlist_mtx, td);
1261	if (error) {
1262		mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
1263		if (mp->mnt_kern_flag & MNTK_MWAIT)
1264			wakeup(mp);
1265		return (error);
1266	}
1267	vn_start_write(NULL, &mp, V_WAIT);
1268
1269	if (mp->mnt_flag & MNT_EXPUBLIC)
1270		vfs_setpublicfs(NULL, NULL, NULL);
1271
1272	vfs_msync(mp, MNT_WAIT);
1273	async_flag = mp->mnt_flag & MNT_ASYNC;
1274	mp->mnt_flag &=~ MNT_ASYNC;
1275	cache_purgevfs(mp);	/* remove cache entries for this file sys */
1276	if (mp->mnt_syncer != NULL)
1277		vrele(mp->mnt_syncer);
1278	/* Move process cdir/rdir refs on fs root to underlying vnode. */
1279	if (VFS_ROOT(mp, &fsrootvp) == 0) {
1280		if (mp->mnt_vnodecovered != NULL)
1281			checkdirs(fsrootvp, mp->mnt_vnodecovered);
1282		if (fsrootvp == rootvnode) {
1283			vrele(rootvnode);
1284			rootvnode = NULL;
1285		}
1286		vput(fsrootvp);
1287	}
1288	if (((mp->mnt_flag & MNT_RDONLY) ||
1289	     (error = VFS_SYNC(mp, MNT_WAIT, td->td_ucred, td)) == 0) ||
1290	    (flags & MNT_FORCE)) {
1291		error = VFS_UNMOUNT(mp, flags, td);
1292	}
1293	vn_finished_write(mp);
1294	if (error) {
1295		/* Undo cdir/rdir and rootvnode changes made above. */
1296		if (VFS_ROOT(mp, &fsrootvp) == 0) {
1297			if (mp->mnt_vnodecovered != NULL)
1298				checkdirs(mp->mnt_vnodecovered, fsrootvp);
1299			if (rootvnode == NULL) {
1300				rootvnode = fsrootvp;
1301				vref(rootvnode);
1302			}
1303			vput(fsrootvp);
1304		}
1305		if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL)
1306			(void) vfs_allocate_syncvnode(mp);
1307		mtx_lock(&mountlist_mtx);
1308		mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
1309		mp->mnt_flag |= async_flag;
1310		lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK,
1311		    &mountlist_mtx, td);
1312		if (mp->mnt_kern_flag & MNTK_MWAIT)
1313			wakeup(mp);
1314		return (error);
1315	}
1316	mtx_lock(&mountlist_mtx);
1317	TAILQ_REMOVE(&mountlist, mp, mnt_list);
1318	if ((coveredvp = mp->mnt_vnodecovered) != NULL)
1319		coveredvp->v_mountedhere = NULL;
1320	mp->mnt_vfc->vfc_refcount--;
1321	if (!TAILQ_EMPTY(&mp->mnt_nvnodelist))
1322		panic("unmount: dangling vnode");
1323	lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_mtx, td);
1324	lockdestroy(&mp->mnt_lock);
1325	if (coveredvp != NULL)
1326		vrele(coveredvp);
1327	if (mp->mnt_kern_flag & MNTK_MWAIT)
1328		wakeup(mp);
1329#ifdef MAC
1330	mac_destroy_mount(mp);
1331#endif
1332	if (mp->mnt_op->vfs_mount == NULL)
1333		vfs_freeopts(mp->mnt_opt);
1334	free(mp, M_MOUNT);
1335	return (0);
1336}
1337
1338/*
1339 * Lookup a filesystem type, and if found allocate and initialize
1340 * a mount structure for it.
1341 *
1342 * Devname is usually updated by mount(8) after booting.
1343 */
1344int
1345vfs_rootmountalloc(fstypename, devname, mpp)
1346	char *fstypename;
1347	char *devname;
1348	struct mount **mpp;
1349{
1350	struct thread *td = curthread;	/* XXX */
1351	struct vfsconf *vfsp;
1352	struct mount *mp;
1353
1354	if (fstypename == NULL)
1355		return (ENODEV);
1356	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1357		if (!strcmp(vfsp->vfc_name, fstypename))
1358			break;
1359	if (vfsp == NULL)
1360		return (ENODEV);
1361	mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK | M_ZERO);
1362	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE);
1363	(void)vfs_busy(mp, LK_NOWAIT, 0, td);
1364	TAILQ_INIT(&mp->mnt_nvnodelist);
1365	TAILQ_INIT(&mp->mnt_reservedvnlist);
1366	mp->mnt_vfc = vfsp;
1367	mp->mnt_op = vfsp->vfc_vfsops;
1368	mp->mnt_flag = MNT_RDONLY;
1369	mp->mnt_vnodecovered = NULLVP;
1370	vfsp->vfc_refcount++;
1371	mp->mnt_iosize_max = DFLTPHYS;
1372	mp->mnt_stat.f_type = vfsp->vfc_typenum;
1373	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
1374	strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
1375	mp->mnt_stat.f_mntonname[0] = '/';
1376	mp->mnt_stat.f_mntonname[1] = 0;
1377	(void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
1378#ifdef MAC
1379	mac_init_mount(mp);
1380	mac_create_mount(td->td_ucred, mp);
1381#endif
1382	*mpp = mp;
1383	return (0);
1384}
1385
1386/*
1387 * Find and mount the root filesystem
1388 */
1389void
1390vfs_mountroot(void)
1391{
1392	char		*cp;
1393	int		i, error;
1394
1395	/*
1396	 * The root filesystem information is compiled in, and we are
1397	 * booted with instructions to use it.
1398	 */
1399#ifdef ROOTDEVNAME
1400	if ((boothowto & RB_DFLTROOT) &&
1401	    !vfs_mountroot_try(ROOTDEVNAME))
1402		return;
1403#endif
1404	/*
1405	 * We are booted with instructions to prompt for the root filesystem,
1406	 * or to use the compiled-in default when it doesn't exist.
1407	 */
1408	if (boothowto & (RB_DFLTROOT | RB_ASKNAME)) {
1409		if (!vfs_mountroot_ask())
1410			return;
1411	}
1412
1413	/*
1414	 * We've been given the generic "use CDROM as root" flag.  This is
1415	 * necessary because one media may be used in many different
1416	 * devices, so we need to search for them.
1417	 */
1418	if (boothowto & RB_CDROM) {
1419		for (i = 0; cdrom_rootdevnames[i] != NULL; i++) {
1420			if (!vfs_mountroot_try(cdrom_rootdevnames[i]))
1421				return;
1422		}
1423	}
1424
1425	/*
1426	 * Try to use the value read by the loader from /etc/fstab, or
1427	 * supplied via some other means.  This is the preferred
1428	 * mechanism.
1429	 */
1430	if ((cp = getenv("vfs.root.mountfrom")) != NULL) {
1431		error = vfs_mountroot_try(cp);
1432		freeenv(cp);
1433		if (!error)
1434			return;
1435	}
1436
1437	/*
1438	 * Try values that may have been computed by the machine-dependant
1439	 * legacy code.
1440	 */
1441	if (!vfs_mountroot_try(rootdevnames[0]))
1442		return;
1443	if (!vfs_mountroot_try(rootdevnames[1]))
1444		return;
1445
1446	/*
1447	 * If we have a compiled-in default, and haven't already tried it, try
1448	 * it now.
1449	 */
1450#ifdef ROOTDEVNAME
1451	if (!(boothowto & RB_DFLTROOT))
1452		if (!vfs_mountroot_try(ROOTDEVNAME))
1453			return;
1454#endif
1455
1456	/*
1457	 * Everything so far has failed, prompt on the console if we haven't
1458	 * already tried that.
1459	 */
1460	if (!(boothowto & (RB_DFLTROOT | RB_ASKNAME)) && !vfs_mountroot_ask())
1461		return;
1462	panic("Root mount failed, startup aborted.");
1463}
1464
1465/*
1466 * Mount (mountfrom) as the root filesystem.
1467 */
1468static int
1469vfs_mountroot_try(char *mountfrom)
1470{
1471        struct mount	*mp;
1472	char		*vfsname, *path;
1473	int		error;
1474	char		patt[32];
1475	int		s;
1476
1477	vfsname = NULL;
1478	path    = NULL;
1479	mp      = NULL;
1480	error   = EINVAL;
1481
1482	if (mountfrom == NULL)
1483		return(error);		/* don't complain */
1484
1485	s = splcam();			/* Overkill, but annoying without it */
1486	printf("Mounting root from %s\n", mountfrom);
1487	splx(s);
1488
1489	/* parse vfs name and path */
1490	vfsname = malloc(MFSNAMELEN, M_MOUNT, M_WAITOK);
1491	path = malloc(MNAMELEN, M_MOUNT, M_WAITOK);
1492	vfsname[0] = path[0] = 0;
1493	sprintf(patt, "%%%d[a-z0-9]:%%%ds", MFSNAMELEN, MNAMELEN);
1494	if (sscanf(mountfrom, patt, vfsname, path) < 1)
1495		goto done;
1496
1497	/* allocate a root mount */
1498	error = vfs_rootmountalloc(vfsname, path[0] != 0 ? path : ROOTNAME,
1499				   &mp);
1500	if (error != 0) {
1501		printf("Can't allocate root mount for filesystem '%s': %d\n",
1502		       vfsname, error);
1503		goto done;
1504	}
1505	mp->mnt_flag |= MNT_ROOTFS;
1506
1507	/* do our best to set rootdev */
1508	if ((path[0] != 0) && setrootbyname(path))
1509		printf("setrootbyname failed\n");
1510
1511	/* If the root device is a type "memory disk", mount RW */
1512	if (rootdev != NODEV && devsw(rootdev) &&
1513	    (devsw(rootdev)->d_flags & D_MEMDISK))
1514		mp->mnt_flag &= ~MNT_RDONLY;
1515
1516	/*
1517	 * Set the mount path to be something useful, because the
1518	 * filesystem code isn't responsible now for initialising
1519	 * f_mntonname unless they want to override the default
1520	 * (which is `path'.)
1521	 */
1522	strncpy(mp->mnt_stat.f_mntonname, "/", MNAMELEN);
1523
1524	error = VFS_MOUNT(mp, NULL, NULL, NULL, curthread);
1525
1526done:
1527	if (vfsname != NULL)
1528		free(vfsname, M_MOUNT);
1529	if (path != NULL)
1530		free(path, M_MOUNT);
1531	if (error != 0) {
1532		if (mp != NULL) {
1533			vfs_unbusy(mp, curthread);
1534#ifdef MAC
1535			mac_destroy_mount(mp);
1536#endif
1537			free(mp, M_MOUNT);
1538		}
1539		printf("Root mount failed: %d\n", error);
1540	} else {
1541
1542		/* register with list of mounted filesystems */
1543		mtx_lock(&mountlist_mtx);
1544		TAILQ_INSERT_HEAD(&mountlist, mp, mnt_list);
1545		mtx_unlock(&mountlist_mtx);
1546
1547		/* sanity check system clock against root fs timestamp */
1548		inittodr(mp->mnt_time);
1549		vfs_unbusy(mp, curthread);
1550		error = VFS_START(mp, 0, curthread);
1551	}
1552	return(error);
1553}
1554
1555/*
1556 * Spin prompting on the console for a suitable root filesystem
1557 */
1558static int
1559vfs_mountroot_ask(void)
1560{
1561	char name[128];
1562	int i;
1563	dev_t dev;
1564
1565	for(;;) {
1566		printf("\nManual root filesystem specification:\n");
1567		printf("  <fstype>:<device>  Mount <device> using filesystem <fstype>\n");
1568#if defined(__i386__) || defined(__ia64__)
1569		printf("                       eg. ufs:da0s1a\n");
1570#else
1571		printf("                       eg. ufs:da0a\n");
1572#endif
1573		printf("  ?                  List valid disk boot devices\n");
1574		printf("  <empty line>       Abort manual input\n");
1575		printf("\nmountroot> ");
1576		gets(name);
1577		if (name[0] == 0)
1578			return(1);
1579		if (name[0] == '?') {
1580			printf("Possibly valid devices for 'ufs' root:\n");
1581			for (i = 0; i < NUMCDEVSW; i++) {
1582				dev = makedev(i, 0);
1583				if (devsw(dev) != NULL)
1584					printf(" \"%s\"", devsw(dev)->d_name);
1585			}
1586			printf("\n");
1587			continue;
1588		}
1589		if (!vfs_mountroot_try(name))
1590			return(0);
1591	}
1592}
1593
1594/*
1595 * Local helper function for vfs_mountroot_ask.
1596 */
1597static void
1598gets(char *cp)
1599{
1600	char *lp;
1601	int c;
1602
1603	lp = cp;
1604	for (;;) {
1605		printf("%c", c = cngetc() & 0177);
1606		switch (c) {
1607		case -1:
1608		case '\n':
1609		case '\r':
1610			*lp++ = '\0';
1611			return;
1612		case '\b':
1613		case '\177':
1614			if (lp > cp) {
1615				printf(" \b");
1616				lp--;
1617			}
1618			continue;
1619		case '#':
1620			lp--;
1621			if (lp < cp)
1622				lp = cp;
1623			continue;
1624		case '@':
1625		case 'u' & 037:
1626			lp = cp;
1627			printf("%c", '\n');
1628			continue;
1629		default:
1630			*lp++ = c;
1631		}
1632	}
1633}
1634
1635/*
1636 * Convert a given name to the dev_t of the disk-like device
1637 * it refers to.
1638 */
1639dev_t
1640getdiskbyname(char *name) {
1641	char *cp;
1642	dev_t dev;
1643
1644	cp = name;
1645	if (!bcmp(cp, "/dev/", 5))
1646		cp += 5;
1647
1648	dev = NODEV;
1649	EVENTHANDLER_INVOKE(dev_clone, cp, strlen(cp), &dev);
1650	return (dev);
1651}
1652
1653/*
1654 * Set rootdev to match (name), given that we expect it to
1655 * refer to a disk-like device.
1656 */
1657static int
1658setrootbyname(char *name)
1659{
1660	dev_t diskdev;
1661
1662	diskdev = getdiskbyname(name);
1663	if (diskdev != NODEV) {
1664		rootdev = diskdev;
1665		return (0);
1666	}
1667
1668	return (1);
1669}
1670
1671/* Show the dev_t for a disk specified by name */
1672#ifdef DDB
1673DB_SHOW_COMMAND(disk, db_getdiskbyname)
1674{
1675	dev_t dev;
1676
1677	if (modif[0] == '\0') {
1678		db_error("usage: show disk/devicename");
1679		return;
1680	}
1681	dev = getdiskbyname(modif);
1682	if (dev != NODEV)
1683		db_printf("dev_t = %p\n", dev);
1684	else
1685		db_printf("No disk device matched.\n");
1686}
1687#endif
1688
1689/*
1690 * Get a mount option by its name.
1691 *
1692 * Return 0 if the option was found, ENOENT otherwise.
1693 * If len is non-NULL it will be filled with the length
1694 * of the option. If buf is non-NULL, it will be filled
1695 * with the address of the option.
1696 */
1697int
1698vfs_getopt(opts, name, buf, len)
1699	struct vfsoptlist *opts;
1700	const char *name;
1701	void **buf;
1702	int *len;
1703{
1704	struct vfsopt *opt;
1705
1706	TAILQ_FOREACH(opt, opts, link) {
1707		if (strcmp(name, opt->name) == 0) {
1708			if (len != NULL)
1709				*len = opt->len;
1710			if (buf != NULL)
1711				*buf = opt->value;
1712			return (0);
1713		}
1714	}
1715	return (ENOENT);
1716}
1717
1718/*
1719 * Find and copy a mount option.
1720 *
1721 * The size of the buffer has to be specified
1722 * in len, if it is not the same length as the
1723 * mount option, EINVAL is returned.
1724 * Returns ENOENT if the option is not found.
1725 */
1726int
1727vfs_copyopt(opts, name, dest, len)
1728	struct vfsoptlist *opts;
1729	const char *name;
1730	void *dest;
1731	int len;
1732{
1733	struct vfsopt *opt;
1734
1735	TAILQ_FOREACH(opt, opts, link) {
1736		if (strcmp(name, opt->name) == 0) {
1737			if (len != opt->len)
1738				return (EINVAL);
1739			bcopy(opt->value, dest, opt->len);
1740			return (0);
1741		}
1742	}
1743	return (ENOENT);
1744}
1745