vfs_mountroot.c revision 213365
1/*-
2 * Copyright (c) 1999-2004 Poul-Henning Kamp
3 * Copyright (c) 1999 Michael Smith
4 * Copyright (c) 1989, 1993
5 *      The Regents of the University of California.  All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 *    notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 *    notice, this list of conditions and the following disclaimer in the
19 *    documentation and/or other materials provided with the distribution.
20 * 4. Neither the name of the University nor the names of its contributors
21 *    may be used to endorse or promote products derived from this software
22 *    without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
36
37#include <sys/cdefs.h>
38__FBSDID("$FreeBSD: head/sys/kern/vfs_mountroot.c 213365 2010-10-02 19:44:13Z marcel $");
39
40#include <sys/param.h>
41#include <sys/conf.h>
42#include <sys/fcntl.h>
43#include <sys/jail.h>
44#include <sys/kernel.h>
45#include <sys/libkern.h>
46#include <sys/malloc.h>
47#include <sys/mount.h>
48#include <sys/mutex.h>
49#include <sys/namei.h>
50#include <sys/priv.h>
51#include <sys/proc.h>
52#include <sys/filedesc.h>
53#include <sys/reboot.h>
54#include <sys/syscallsubr.h>
55#include <sys/sysproto.h>
56#include <sys/sx.h>
57#include <sys/sysctl.h>
58#include <sys/sysent.h>
59#include <sys/systm.h>
60#include <sys/vnode.h>
61#include <vm/uma.h>
62
63#include <geom/geom.h>
64
65#include <machine/stdarg.h>
66
67#include "opt_rootdevname.h"
68
69#define	ROOTNAME		"root_device"
70
71static int	vfs_mountroot_ask(void);
72static int	vfs_mountroot_try(const char *mountfrom, const char *options);
73
74/*
75 * The vnode of the system's root (/ in the filesystem, without chroot
76 * active.)
77 */
78struct vnode	*rootvnode;
79
80/*
81 * The root filesystem is detailed in the kernel environment variable
82 * vfs.root.mountfrom, which is expected to be in the general format
83 *
84 * <vfsname>:[<path>][	<vfsname>:[<path>] ...]
85 * vfsname   := the name of a VFS known to the kernel and capable
86 *              of being mounted as root
87 * path      := disk device name or other data used by the filesystem
88 *              to locate its physical store
89 *
90 * If the environment variable vfs.root.mountfrom is a space separated list,
91 * each list element is tried in turn and the root filesystem will be mounted
92 * from the first one that suceeds.
93 *
94 * The environment variable vfs.root.mountfrom.options is a comma delimited
95 * set of string mount options.  These mount options must be parseable
96 * by nmount() in the kernel.
97 */
98
99/*
100 * The root specifiers we will try if RB_CDROM is specified.
101 */
102static char *cdrom_rootdevnames[] = {
103	"cd9660:cd0",
104	"cd9660:acd0",
105	NULL
106};
107
108/* legacy find-root code */
109char		*rootdevnames[2] = {NULL, NULL};
110#ifndef ROOTDEVNAME
111#  define ROOTDEVNAME NULL
112#endif
113static const char	*ctrootdevname = ROOTDEVNAME;
114
115struct root_hold_token {
116	const char			*who;
117	LIST_ENTRY(root_hold_token)	list;
118};
119
120static LIST_HEAD(, root_hold_token)	root_holds =
121    LIST_HEAD_INITIALIZER(root_holds);
122
123static int root_mount_complete;
124
125struct root_hold_token *
126root_mount_hold(const char *identifier)
127{
128	struct root_hold_token *h;
129
130	if (root_mounted())
131		return (NULL);
132
133	h = malloc(sizeof *h, M_DEVBUF, M_ZERO | M_WAITOK);
134	h->who = identifier;
135	mtx_lock(&mountlist_mtx);
136	LIST_INSERT_HEAD(&root_holds, h, list);
137	mtx_unlock(&mountlist_mtx);
138	return (h);
139}
140
141void
142root_mount_rel(struct root_hold_token *h)
143{
144
145	if (h == NULL)
146		return;
147	mtx_lock(&mountlist_mtx);
148	LIST_REMOVE(h, list);
149	wakeup(&root_holds);
150	mtx_unlock(&mountlist_mtx);
151	free(h, M_DEVBUF);
152}
153
154static void
155root_mount_prepare(void)
156{
157	struct root_hold_token *h;
158	struct timeval lastfail;
159	int curfail = 0;
160
161	for (;;) {
162		DROP_GIANT();
163		g_waitidle();
164		PICKUP_GIANT();
165		mtx_lock(&mountlist_mtx);
166		if (LIST_EMPTY(&root_holds)) {
167			mtx_unlock(&mountlist_mtx);
168			break;
169		}
170		if (ppsratecheck(&lastfail, &curfail, 1)) {
171			printf("Root mount waiting for:");
172			LIST_FOREACH(h, &root_holds, list)
173				printf(" %s", h->who);
174			printf("\n");
175		}
176		msleep(&root_holds, &mountlist_mtx, PZERO | PDROP, "roothold",
177		    hz);
178	}
179}
180
181static void
182root_mount_done(void)
183{
184
185	/* Keep prison0's root in sync with the global rootvnode. */
186	mtx_lock(&prison0.pr_mtx);
187	prison0.pr_root = rootvnode;
188	vref(prison0.pr_root);
189	mtx_unlock(&prison0.pr_mtx);
190	/*
191	 * Use a mutex to prevent the wakeup being missed and waiting for
192	 * an extra 1 second sleep.
193	 */
194	mtx_lock(&mountlist_mtx);
195	root_mount_complete = 1;
196	wakeup(&root_mount_complete);
197	mtx_unlock(&mountlist_mtx);
198}
199
200int
201root_mounted(void)
202{
203
204	/* No mutex is acquired here because int stores are atomic. */
205	return (root_mount_complete);
206}
207
208void
209root_mount_wait(void)
210{
211
212	/*
213	 * Panic on an obvious deadlock - the function can't be called from
214	 * a thread which is doing the whole SYSINIT stuff.
215	 */
216	KASSERT(curthread->td_proc->p_pid != 0,
217	    ("root_mount_wait: cannot be called from the swapper thread"));
218	mtx_lock(&mountlist_mtx);
219	while (!root_mount_complete) {
220		msleep(&root_mount_complete, &mountlist_mtx, PZERO, "rootwait",
221		    hz);
222	}
223	mtx_unlock(&mountlist_mtx);
224}
225
226static void
227set_rootvnode(void)
228{
229	struct proc *p;
230
231	if (VFS_ROOT(TAILQ_FIRST(&mountlist), LK_EXCLUSIVE, &rootvnode))
232		panic("Cannot find root vnode");
233
234	VOP_UNLOCK(rootvnode, 0);
235
236	p = curthread->td_proc;
237	FILEDESC_XLOCK(p->p_fd);
238
239	if (p->p_fd->fd_cdir != NULL)
240		vrele(p->p_fd->fd_cdir);
241	p->p_fd->fd_cdir = rootvnode;
242	VREF(rootvnode);
243
244	if (p->p_fd->fd_rdir != NULL)
245		vrele(p->p_fd->fd_rdir);
246	p->p_fd->fd_rdir = rootvnode;
247	VREF(rootvnode);
248
249	FILEDESC_XUNLOCK(p->p_fd);
250
251	EVENTHANDLER_INVOKE(mountroot);
252}
253
254static void
255devfs_first(void)
256{
257	struct thread *td = curthread;
258	struct vfsoptlist *opts;
259	struct vfsconf *vfsp;
260	struct mount *mp = NULL;
261	int error;
262
263	vfsp = vfs_byname("devfs");
264	KASSERT(vfsp != NULL, ("Could not find devfs by name"));
265	if (vfsp == NULL)
266		return;
267
268	mp = vfs_mount_alloc(NULLVP, vfsp, "/dev", td->td_ucred);
269
270	error = VFS_MOUNT(mp);
271	KASSERT(error == 0, ("VFS_MOUNT(devfs) failed %d", error));
272	if (error)
273		return;
274
275	opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK);
276	TAILQ_INIT(opts);
277	mp->mnt_opt = opts;
278
279	mtx_lock(&mountlist_mtx);
280	TAILQ_INSERT_HEAD(&mountlist, mp, mnt_list);
281	mtx_unlock(&mountlist_mtx);
282
283	set_rootvnode();
284
285	error = kern_symlink(td, "/", "dev", UIO_SYSSPACE);
286	if (error)
287		printf("kern_symlink /dev -> / returns %d\n", error);
288}
289
290static void
291devfs_fixup(struct thread *td)
292{
293	struct nameidata nd;
294	struct vnode *vp, *dvp;
295	struct mount *mp;
296	int error;
297
298	/* Remove our devfs mount from the mountlist and purge the cache */
299	mtx_lock(&mountlist_mtx);
300	mp = TAILQ_FIRST(&mountlist);
301	TAILQ_REMOVE(&mountlist, mp, mnt_list);
302	mtx_unlock(&mountlist_mtx);
303	cache_purgevfs(mp);
304
305	VFS_ROOT(mp, LK_EXCLUSIVE, &dvp);
306	VI_LOCK(dvp);
307	dvp->v_iflag &= ~VI_MOUNT;
308	VI_UNLOCK(dvp);
309	dvp->v_mountedhere = NULL;
310
311	/* Set up the real rootvnode, and purge the cache */
312	TAILQ_FIRST(&mountlist)->mnt_vnodecovered = NULL;
313	set_rootvnode();
314	cache_purgevfs(rootvnode->v_mount);
315
316	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, "/dev", td);
317	error = namei(&nd);
318	if (error) {
319		printf("Lookup of /dev for devfs, error: %d\n", error);
320		vput(dvp);
321		vfs_unbusy(mp);
322		return;
323	}
324	NDFREE(&nd, NDF_ONLY_PNBUF);
325	vp = nd.ni_vp;
326	if (vp->v_type != VDIR) {
327		printf("/dev is not a directory\n");
328		vput(dvp);
329		vput(vp);
330		vfs_unbusy(mp);
331		return;
332	}
333	error = vinvalbuf(vp, V_SAVE, 0, 0);
334	if (error) {
335		printf("vinvalbuf() of /dev failed, error: %d\n", error);
336		vput(dvp);
337		vput(vp);
338		vfs_unbusy(mp);
339		return;
340	}
341	cache_purge(vp);
342	mp->mnt_vnodecovered = vp;
343	vp->v_mountedhere = mp;
344	mtx_lock(&mountlist_mtx);
345	TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
346	mtx_unlock(&mountlist_mtx);
347	VOP_UNLOCK(vp, 0);
348	vput(dvp);
349	vfs_unbusy(mp);
350
351	/* Unlink the no longer needed /dev/dev -> / symlink */
352	error = kern_unlink(td, "/dev/dev", UIO_SYSSPACE);
353	if (error)
354		printf("kern_unlink of /dev/dev failed, error: %d\n", error);
355}
356
357void
358vfs_mountroot(void)
359{
360	char *cp, *cpt, *options, *tmpdev;
361	int error, i, asked = 0;
362
363	options = NULL;
364
365	root_mount_prepare();
366
367	devfs_first();
368
369	/*
370	 * We are booted with instructions to prompt for the root filesystem.
371	 */
372	if (boothowto & RB_ASKNAME) {
373		if (!vfs_mountroot_ask())
374			goto mounted;
375		asked = 1;
376	}
377
378	options = getenv("vfs.root.mountfrom.options");
379
380	/*
381	 * The root filesystem information is compiled in, and we are
382	 * booted with instructions to use it.
383	 */
384	if (ctrootdevname != NULL && (boothowto & RB_DFLTROOT)) {
385		if (!vfs_mountroot_try(ctrootdevname, options))
386			goto mounted;
387		ctrootdevname = NULL;
388	}
389
390	/*
391	 * We've been given the generic "use CDROM as root" flag.  This is
392	 * necessary because one media may be used in many different
393	 * devices, so we need to search for them.
394	 */
395	if (boothowto & RB_CDROM) {
396		for (i = 0; cdrom_rootdevnames[i] != NULL; i++) {
397			if (!vfs_mountroot_try(cdrom_rootdevnames[i], options))
398				goto mounted;
399		}
400	}
401
402	/*
403	 * Try to use the value read by the loader from /etc/fstab, or
404	 * supplied via some other means.  This is the preferred
405	 * mechanism.
406	 */
407	cp = getenv("vfs.root.mountfrom");
408	if (cp != NULL) {
409		cpt = cp;
410		while ((tmpdev = strsep(&cpt, " \t")) != NULL) {
411			error = vfs_mountroot_try(tmpdev, options);
412			if (error == 0) {
413				freeenv(cp);
414				goto mounted;
415			}
416		}
417		freeenv(cp);
418	}
419
420	/*
421	 * Try values that may have been computed by code during boot
422	 */
423	if (!vfs_mountroot_try(rootdevnames[0], options))
424		goto mounted;
425	if (!vfs_mountroot_try(rootdevnames[1], options))
426		goto mounted;
427
428	/*
429	 * If we (still) have a compiled-in default, try it.
430	 */
431	if (ctrootdevname != NULL)
432		if (!vfs_mountroot_try(ctrootdevname, options))
433			goto mounted;
434	/*
435	 * Everything so far has failed, prompt on the console if we haven't
436	 * already tried that.
437	 */
438	if (!asked)
439		if (!vfs_mountroot_ask())
440			goto mounted;
441
442	panic("Root mount failed, startup aborted.");
443
444mounted:
445	root_mount_done();
446	freeenv(options);
447}
448
449static struct mntarg *
450parse_mountroot_options(struct mntarg *ma, const char *options)
451{
452	char *p;
453	char *name, *name_arg;
454	char *val, *val_arg;
455	char *opts;
456
457	if (options == NULL || options[0] == '\0')
458		return (ma);
459
460	p = opts = strdup(options, M_MOUNT);
461	if (opts == NULL) {
462		return (ma);
463	}
464
465	while((name = strsep(&p, ",")) != NULL) {
466		if (name[0] == '\0')
467			break;
468
469		val = strchr(name, '=');
470		if (val != NULL) {
471			*val = '\0';
472			++val;
473		}
474		if( strcmp(name, "rw") == 0 ||
475		    strcmp(name, "noro") == 0) {
476			/*
477			 * The first time we mount the root file system,
478			 * we need to mount 'ro', so We need to ignore
479			 * 'rw' and 'noro' mount options.
480			 */
481			continue;
482		}
483		name_arg = strdup(name, M_MOUNT);
484		val_arg = NULL;
485		if (val != NULL)
486			val_arg = strdup(val, M_MOUNT);
487
488		ma = mount_arg(ma, name_arg, val_arg,
489		    (val_arg != NULL ? -1 : 0));
490	}
491	free(opts, M_MOUNT);
492	return (ma);
493}
494
495/*
496 * Mount (mountfrom) as the root filesystem.
497 */
498static int
499vfs_mountroot_try(const char *mountfrom, const char *options)
500{
501	struct mount	*mp;
502	struct mntarg	*ma;
503	char		*vfsname, *path;
504	time_t		timebase;
505	int		error;
506	char		patt[32];
507	char		errmsg[255];
508
509	vfsname = NULL;
510	path    = NULL;
511	mp      = NULL;
512	ma	= NULL;
513	error   = EINVAL;
514	bzero(errmsg, sizeof(errmsg));
515
516	if (mountfrom == NULL)
517		return (error);		/* don't complain */
518	printf("Trying to mount root from %s\n", mountfrom);
519
520	/* parse vfs name and path */
521	vfsname = malloc(MFSNAMELEN, M_MOUNT, M_WAITOK);
522	path = malloc(MNAMELEN, M_MOUNT, M_WAITOK);
523	vfsname[0] = path[0] = 0;
524	sprintf(patt, "%%%d[a-z0-9]:%%%ds", MFSNAMELEN, MNAMELEN);
525	if (sscanf(mountfrom, patt, vfsname, path) < 1)
526		goto out;
527
528	if (path[0] == '\0')
529		strcpy(path, ROOTNAME);
530
531	ma = mount_arg(ma, "fstype", vfsname, -1);
532	ma = mount_arg(ma, "fspath", "/", -1);
533	ma = mount_arg(ma, "from", path, -1);
534	ma = mount_arg(ma, "errmsg", errmsg, sizeof(errmsg));
535	ma = mount_arg(ma, "ro", NULL, 0);
536	ma = parse_mountroot_options(ma, options);
537	error = kernel_mount(ma, MNT_ROOTFS);
538
539	if (error == 0) {
540		/*
541		 * We mount devfs prior to mounting the / FS, so the first
542		 * entry will typically be devfs.
543		 */
544		mp = TAILQ_FIRST(&mountlist);
545		KASSERT(mp != NULL, ("%s: mountlist is empty", __func__));
546
547		/*
548		 * Iterate over all currently mounted file systems and use
549		 * the time stamp found to check and/or initialize the RTC.
550		 * Typically devfs has no time stamp and the only other FS
551		 * is the actual / FS.
552		 * Call inittodr() only once and pass it the largest of the
553		 * timestamps we encounter.
554		 */
555		timebase = 0;
556		do {
557			if (mp->mnt_time > timebase)
558				timebase = mp->mnt_time;
559			mp = TAILQ_NEXT(mp, mnt_list);
560		} while (mp != NULL);
561		inittodr(timebase);
562
563		devfs_fixup(curthread);
564	}
565
566	if (error != 0 ) {
567		printf("ROOT MOUNT ERROR: %s\n", errmsg);
568		printf("If you have invalid mount options, reboot, and ");
569		printf("first try the following from\n");
570		printf("the loader prompt:\n\n");
571		printf("     set vfs.root.mountfrom.options=rw\n\n");
572		printf("and then remove invalid mount options from ");
573		printf("/etc/fstab.\n\n");
574	}
575out:
576	free(path, M_MOUNT);
577	free(vfsname, M_MOUNT);
578	return (error);
579}
580
581static int
582vfs_mountroot_ask(void)
583{
584	char name[128];
585	char *mountfrom;
586	char *options;
587
588	for(;;) {
589		printf("Loader variables:\n");
590		printf("vfs.root.mountfrom=");
591		mountfrom = getenv("vfs.root.mountfrom");
592		if (mountfrom != NULL) {
593			printf("%s", mountfrom);
594		}
595		printf("\n");
596		printf("vfs.root.mountfrom.options=");
597		options = getenv("vfs.root.mountfrom.options");
598		if (options != NULL) {
599			printf("%s", options);
600		}
601		printf("\n");
602		freeenv(mountfrom);
603		freeenv(options);
604		printf("\nManual root filesystem specification:\n");
605		printf("  <fstype>:<device>  Mount <device> using filesystem <fstype>\n");
606		printf("                       eg. zfs:tank\n");
607		printf("                       eg. ufs:/dev/da0s1a\n");
608		printf("                       eg. cd9660:/dev/acd0\n");
609		printf("                       This is equivalent to: ");
610		printf("mount -t cd9660 /dev/acd0 /\n");
611		printf("\n");
612		printf("  ?                  List valid disk boot devices\n");
613		printf("  <empty line>       Abort manual input\n");
614		printf("\nmountroot> ");
615		gets(name, sizeof(name), 1);
616		if (name[0] == '\0')
617			return (1);
618		if (name[0] == '?') {
619			printf("\nList of GEOM managed disk devices:\n  ");
620			g_dev_print();
621			continue;
622		}
623		if (!vfs_mountroot_try(name, NULL))
624			return (0);
625	}
626}
627