vfs_mountroot.c revision 287107
11573Srgrimes/*-
21573Srgrimes * Copyright (c) 2010 Marcel Moolenaar
31573Srgrimes * Copyright (c) 1999-2004 Poul-Henning Kamp
41573Srgrimes * Copyright (c) 1999 Michael Smith
51573Srgrimes * Copyright (c) 1989, 1993
61573Srgrimes *      The Regents of the University of California.  All rights reserved.
71573Srgrimes * (c) UNIX System Laboratories, Inc.
81573Srgrimes * All or some portions of this file are derived from material licensed
91573Srgrimes * to the University of California by American Telephone and Telegraph
101573Srgrimes * Co. or Unix System Laboratories, Inc. and are reproduced herein with
111573Srgrimes * the permission of UNIX System Laboratories, Inc.
121573Srgrimes *
131573Srgrimes * Redistribution and use in source and binary forms, with or without
141573Srgrimes * modification, are permitted provided that the following conditions
151573Srgrimes * are met:
16249808Semaste * 1. Redistributions of source code must retain the above copyright
171573Srgrimes *    notice, this list of conditions and the following disclaimer.
181573Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
191573Srgrimes *    notice, this list of conditions and the following disclaimer in the
201573Srgrimes *    documentation and/or other materials provided with the distribution.
211573Srgrimes * 4. Neither the name of the University nor the names of its contributors
221573Srgrimes *    may be used to endorse or promote products derived from this software
231573Srgrimes *    without specific prior written permission.
241573Srgrimes *
251573Srgrimes * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
261573Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
271573Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
281573Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
291573Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
301573Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
311573Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
321573Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
331573Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
341573Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
351573Srgrimes * SUCH DAMAGE.
3692986Sobrien */
3792986Sobrien
381573Srgrimes#include "opt_rootdevname.h"
3971579Sdeischen
401573Srgrimes#include <sys/cdefs.h>
411573Srgrimes__FBSDID("$FreeBSD: head/sys/kern/vfs_mountroot.c 287107 2015-08-24 13:18:13Z trasz $");
421573Srgrimes
431573Srgrimes#include <sys/param.h>
441573Srgrimes#include <sys/conf.h>
4571579Sdeischen#include <sys/cons.h>
461573Srgrimes#include <sys/fcntl.h>
47150040Sstefanf#include <sys/jail.h>
48150040Sstefanf#include <sys/kernel.h>
49150040Sstefanf#include <sys/malloc.h>
501573Srgrimes#include <sys/mdioctl.h>
511573Srgrimes#include <sys/mount.h>
521573Srgrimes#include <sys/mutex.h>
531573Srgrimes#include <sys/namei.h>
541573Srgrimes#include <sys/priv.h>
5571579Sdeischen#include <sys/proc.h>
561573Srgrimes#include <sys/filedesc.h>
571573Srgrimes#include <sys/reboot.h>
58249810Semaste#include <sys/sbuf.h>
591573Srgrimes#include <sys/stat.h>
6092889Sobrien#include <sys/syscallsubr.h>
6192889Sobrien#include <sys/sysproto.h>
621573Srgrimes#include <sys/sx.h>
631573Srgrimes#include <sys/sysctl.h>
641573Srgrimes#include <sys/sysent.h>
651573Srgrimes#include <sys/systm.h>
661573Srgrimes#include <sys/vnode.h>
671573Srgrimes
681573Srgrimes#include <geom/geom.h>
691573Srgrimes
701573Srgrimes/*
711573Srgrimes * The root filesystem is detailed in the kernel environment variable
721573Srgrimes * vfs.root.mountfrom, which is expected to be in the general format
731573Srgrimes *
741573Srgrimes * <vfsname>:[<path>][	<vfsname>:[<path>] ...]
751573Srgrimes * vfsname   := the name of a VFS known to the kernel and capable
761573Srgrimes *              of being mounted as root
771573Srgrimes * path      := disk device name or other data used by the filesystem
781573Srgrimes *              to locate its physical store
791573Srgrimes *
801573Srgrimes * If the environment variable vfs.root.mountfrom is a space separated list,
811573Srgrimes * each list element is tried in turn and the root filesystem will be mounted
821573Srgrimes * from the first one that suceeds.
831573Srgrimes *
841573Srgrimes * The environment variable vfs.root.mountfrom.options is a comma delimited
851573Srgrimes * set of string mount options.  These mount options must be parseable
861573Srgrimes * by nmount() in the kernel.
871573Srgrimes */
881573Srgrimes
891573Srgrimesstatic int parse_mount(char **);
90249810Semastestatic struct mntarg *parse_mountroot_options(struct mntarg *, const char *);
911573Srgrimes
921573Srgrimes/*
931573Srgrimes * The vnode of the system's root (/ in the filesystem, without chroot
9471579Sdeischen * active.)
951573Srgrimes */
961573Srgrimesstruct vnode *rootvnode;
971573Srgrimes
981573Srgrimes/*
991573Srgrimes * Mount of the system's /dev.
1001573Srgrimes */
1011573Srgrimesstruct mount *rootdevmp;
1021573Srgrimes
1031573Srgrimeschar *rootdevnames[2] = {NULL, NULL};
1041573Srgrimes
1051573Srgrimesstruct mtx root_holds_mtx;
1061573SrgrimesMTX_SYSINIT(root_holds, &root_holds_mtx, "root_holds", MTX_DEF);
1071573Srgrimes
1081573Srgrimesstruct root_hold_token {
1091573Srgrimes	const char			*who;
1101573Srgrimes	LIST_ENTRY(root_hold_token)	list;
1111573Srgrimes};
1121573Srgrimes
1131573Srgrimesstatic LIST_HEAD(, root_hold_token)	root_holds =
1141573Srgrimes    LIST_HEAD_INITIALIZER(root_holds);
1151573Srgrimes
1161573Srgrimesenum action {
117	A_CONTINUE,
118	A_PANIC,
119	A_REBOOT,
120	A_RETRY
121};
122
123static enum action root_mount_onfail = A_CONTINUE;
124
125static int root_mount_mddev;
126static int root_mount_complete;
127
128/* By default wait up to 3 seconds for devices to appear. */
129static int root_mount_timeout = 3;
130TUNABLE_INT("vfs.mountroot.timeout", &root_mount_timeout);
131
132struct root_hold_token *
133root_mount_hold(const char *identifier)
134{
135	struct root_hold_token *h;
136
137	if (root_mounted())
138		return (NULL);
139
140	h = malloc(sizeof *h, M_DEVBUF, M_ZERO | M_WAITOK);
141	h->who = identifier;
142	mtx_lock(&root_holds_mtx);
143	LIST_INSERT_HEAD(&root_holds, h, list);
144	mtx_unlock(&root_holds_mtx);
145	return (h);
146}
147
148void
149root_mount_rel(struct root_hold_token *h)
150{
151
152	if (h == NULL)
153		return;
154	mtx_lock(&root_holds_mtx);
155	LIST_REMOVE(h, list);
156	wakeup(&root_holds);
157	mtx_unlock(&root_holds_mtx);
158	free(h, M_DEVBUF);
159}
160
161int
162root_mounted(void)
163{
164
165	/* No mutex is acquired here because int stores are atomic. */
166	return (root_mount_complete);
167}
168
169void
170root_mount_wait(void)
171{
172
173	/*
174	 * Panic on an obvious deadlock - the function can't be called from
175	 * a thread which is doing the whole SYSINIT stuff.
176	 */
177	KASSERT(curthread->td_proc->p_pid != 0,
178	    ("root_mount_wait: cannot be called from the swapper thread"));
179	mtx_lock(&root_holds_mtx);
180	while (!root_mount_complete) {
181		msleep(&root_mount_complete, &root_holds_mtx, PZERO, "rootwait",
182		    hz);
183	}
184	mtx_unlock(&root_holds_mtx);
185}
186
187static void
188set_rootvnode(void)
189{
190	struct proc *p;
191
192	if (VFS_ROOT(TAILQ_FIRST(&mountlist), LK_EXCLUSIVE, &rootvnode))
193		panic("Cannot find root vnode");
194
195	VOP_UNLOCK(rootvnode, 0);
196
197	p = curthread->td_proc;
198	FILEDESC_XLOCK(p->p_fd);
199
200	if (p->p_fd->fd_cdir != NULL)
201		vrele(p->p_fd->fd_cdir);
202	p->p_fd->fd_cdir = rootvnode;
203	VREF(rootvnode);
204
205	if (p->p_fd->fd_rdir != NULL)
206		vrele(p->p_fd->fd_rdir);
207	p->p_fd->fd_rdir = rootvnode;
208	VREF(rootvnode);
209
210	FILEDESC_XUNLOCK(p->p_fd);
211}
212
213static int
214vfs_mountroot_devfs(struct thread *td, struct mount **mpp)
215{
216	struct vfsoptlist *opts;
217	struct vfsconf *vfsp;
218	struct mount *mp;
219	int error;
220
221	*mpp = NULL;
222
223	vfsp = vfs_byname("devfs");
224	KASSERT(vfsp != NULL, ("Could not find devfs by name"));
225	if (vfsp == NULL)
226		return (ENOENT);
227
228	mp = vfs_mount_alloc(NULLVP, vfsp, "/dev", td->td_ucred);
229
230	error = VFS_MOUNT(mp);
231	KASSERT(error == 0, ("VFS_MOUNT(devfs) failed %d", error));
232	if (error)
233		return (error);
234
235	opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK);
236	TAILQ_INIT(opts);
237	mp->mnt_opt = opts;
238
239	mtx_lock(&mountlist_mtx);
240	TAILQ_INSERT_HEAD(&mountlist, mp, mnt_list);
241	mtx_unlock(&mountlist_mtx);
242
243	*mpp = mp;
244	rootdevmp = mp;
245	set_rootvnode();
246
247	error = kern_symlinkat(td, "/", AT_FDCWD, "dev", UIO_SYSSPACE);
248	if (error)
249		printf("kern_symlink /dev -> / returns %d\n", error);
250
251	return (error);
252}
253
254static int
255vfs_mountroot_shuffle(struct thread *td, struct mount *mpdevfs)
256{
257	struct nameidata nd;
258	struct mount *mporoot, *mpnroot;
259	struct vnode *vp, *vporoot, *vpdevfs;
260	char *fspath;
261	int error;
262
263	mpnroot = TAILQ_NEXT(mpdevfs, mnt_list);
264
265	/* Shuffle the mountlist. */
266	mtx_lock(&mountlist_mtx);
267	mporoot = TAILQ_FIRST(&mountlist);
268	TAILQ_REMOVE(&mountlist, mpdevfs, mnt_list);
269	if (mporoot != mpdevfs) {
270		TAILQ_REMOVE(&mountlist, mpnroot, mnt_list);
271		TAILQ_INSERT_HEAD(&mountlist, mpnroot, mnt_list);
272	}
273	TAILQ_INSERT_TAIL(&mountlist, mpdevfs, mnt_list);
274	mtx_unlock(&mountlist_mtx);
275
276	cache_purgevfs(mporoot);
277	if (mporoot != mpdevfs)
278		cache_purgevfs(mpdevfs);
279
280	VFS_ROOT(mporoot, LK_EXCLUSIVE, &vporoot);
281
282	VI_LOCK(vporoot);
283	vporoot->v_iflag &= ~VI_MOUNT;
284	VI_UNLOCK(vporoot);
285	vporoot->v_mountedhere = NULL;
286	mporoot->mnt_flag &= ~MNT_ROOTFS;
287	mporoot->mnt_vnodecovered = NULL;
288	vput(vporoot);
289
290	/* Set up the new rootvnode, and purge the cache */
291	mpnroot->mnt_vnodecovered = NULL;
292	set_rootvnode();
293	cache_purgevfs(rootvnode->v_mount);
294
295	if (mporoot != mpdevfs) {
296		/* Remount old root under /.mount or /mnt */
297		fspath = "/.mount";
298		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
299		    fspath, td);
300		error = namei(&nd);
301		if (error) {
302			NDFREE(&nd, NDF_ONLY_PNBUF);
303			fspath = "/mnt";
304			NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
305			    fspath, td);
306			error = namei(&nd);
307		}
308		if (!error) {
309			vp = nd.ni_vp;
310			error = (vp->v_type == VDIR) ? 0 : ENOTDIR;
311			if (!error)
312				error = vinvalbuf(vp, V_SAVE, 0, 0);
313			if (!error) {
314				cache_purge(vp);
315				mporoot->mnt_vnodecovered = vp;
316				vp->v_mountedhere = mporoot;
317				strlcpy(mporoot->mnt_stat.f_mntonname,
318				    fspath, MNAMELEN);
319				VOP_UNLOCK(vp, 0);
320			} else
321				vput(vp);
322		}
323		NDFREE(&nd, NDF_ONLY_PNBUF);
324
325		if (error && bootverbose)
326			printf("mountroot: unable to remount previous root "
327			    "under /.mount or /mnt (error %d).\n", error);
328	}
329
330	/* Remount devfs under /dev */
331	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, "/dev", td);
332	error = namei(&nd);
333	if (!error) {
334		vp = nd.ni_vp;
335		error = (vp->v_type == VDIR) ? 0 : ENOTDIR;
336		if (!error)
337			error = vinvalbuf(vp, V_SAVE, 0, 0);
338		if (!error) {
339			vpdevfs = mpdevfs->mnt_vnodecovered;
340			if (vpdevfs != NULL) {
341				cache_purge(vpdevfs);
342				vpdevfs->v_mountedhere = NULL;
343				vrele(vpdevfs);
344			}
345			mpdevfs->mnt_vnodecovered = vp;
346			vp->v_mountedhere = mpdevfs;
347			VOP_UNLOCK(vp, 0);
348		} else
349			vput(vp);
350	}
351	if (error && bootverbose)
352		printf("mountroot: unable to remount devfs under /dev "
353		    "(error %d).\n", error);
354	NDFREE(&nd, NDF_ONLY_PNBUF);
355
356	if (mporoot == mpdevfs) {
357		vfs_unbusy(mpdevfs);
358		/* Unlink the no longer needed /dev/dev -> / symlink */
359		error = kern_unlinkat(td, AT_FDCWD, "/dev/dev",
360		    UIO_SYSSPACE, 0);
361		if (error && bootverbose)
362			printf("mountroot: unable to unlink /dev/dev "
363			    "(error %d)\n", error);
364	}
365
366	return (0);
367}
368
369/*
370 * Configuration parser.
371 */
372
373/* Parser character classes. */
374#define	CC_WHITESPACE		-1
375#define	CC_NONWHITESPACE	-2
376
377/* Parse errors. */
378#define	PE_EOF			-1
379#define	PE_EOL			-2
380
381static __inline int
382parse_peek(char **conf)
383{
384
385	return (**conf);
386}
387
388static __inline void
389parse_poke(char **conf, int c)
390{
391
392	**conf = c;
393}
394
395static __inline void
396parse_advance(char **conf)
397{
398
399	(*conf)++;
400}
401
402static int
403parse_skipto(char **conf, int mc)
404{
405	int c, match;
406
407	while (1) {
408		c = parse_peek(conf);
409		if (c == 0)
410			return (PE_EOF);
411		switch (mc) {
412		case CC_WHITESPACE:
413			match = (c == ' ' || c == '\t' || c == '\n') ? 1 : 0;
414			break;
415		case CC_NONWHITESPACE:
416			if (c == '\n')
417				return (PE_EOL);
418			match = (c != ' ' && c != '\t') ? 1 : 0;
419			break;
420		default:
421			match = (c == mc) ? 1 : 0;
422			break;
423		}
424		if (match)
425			break;
426		parse_advance(conf);
427	}
428	return (0);
429}
430
431static int
432parse_token(char **conf, char **tok)
433{
434	char *p;
435	size_t len;
436	int error;
437
438	*tok = NULL;
439	error = parse_skipto(conf, CC_NONWHITESPACE);
440	if (error)
441		return (error);
442	p = *conf;
443	error = parse_skipto(conf, CC_WHITESPACE);
444	len = *conf - p;
445	*tok = malloc(len + 1, M_TEMP, M_WAITOK | M_ZERO);
446	bcopy(p, *tok, len);
447	return (0);
448}
449
450static void
451parse_dir_ask_printenv(const char *var)
452{
453	char *val;
454
455	val = kern_getenv(var);
456	if (val != NULL) {
457		printf("  %s=%s\n", var, val);
458		freeenv(val);
459	}
460}
461
462static int
463parse_dir_ask(char **conf)
464{
465	char name[80];
466	char *mnt;
467	int error;
468
469	printf("\nLoader variables:\n");
470	parse_dir_ask_printenv("vfs.root.mountfrom");
471	parse_dir_ask_printenv("vfs.root.mountfrom.options");
472
473	printf("\nManual root filesystem specification:\n");
474	printf("  <fstype>:<device> [options]\n");
475	printf("      Mount <device> using filesystem <fstype>\n");
476	printf("      and with the specified (optional) option list.\n");
477	printf("\n");
478	printf("    eg. ufs:/dev/da0s1a\n");
479	printf("        zfs:tank\n");
480	printf("        cd9660:/dev/acd0 ro\n");
481	printf("          (which is equivalent to: ");
482	printf("mount -t cd9660 -o ro /dev/acd0 /)\n");
483	printf("\n");
484	printf("  ?               List valid disk boot devices\n");
485	printf("  .               Yield 1 second (for background tasks)\n");
486	printf("  <empty line>    Abort manual input\n");
487
488	do {
489		error = EINVAL;
490		printf("\nmountroot> ");
491		cngets(name, sizeof(name), GETS_ECHO);
492		if (name[0] == '\0')
493			break;
494		if (name[0] == '?' && name[1] == '\0') {
495			printf("\nList of GEOM managed disk devices:\n  ");
496			g_dev_print();
497			continue;
498		}
499		if (name[0] == '.' && name[1] == '\0') {
500			pause("rmask", hz);
501			continue;
502		}
503		mnt = name;
504		error = parse_mount(&mnt);
505		if (error == -1)
506			printf("Invalid file system specification.\n");
507	} while (error != 0);
508
509	return (error);
510}
511
512static int
513parse_dir_md(char **conf)
514{
515	struct stat sb;
516	struct thread *td;
517	struct md_ioctl *mdio;
518	char *path, *tok;
519	int error, fd, len;
520
521	td = curthread;
522
523	error = parse_token(conf, &tok);
524	if (error)
525		return (error);
526
527	len = strlen(tok);
528	mdio = malloc(sizeof(*mdio) + len + 1, M_TEMP, M_WAITOK | M_ZERO);
529	path = (void *)(mdio + 1);
530	bcopy(tok, path, len);
531	free(tok, M_TEMP);
532
533	/* Get file status. */
534	error = kern_statat(td, 0, AT_FDCWD, path, UIO_SYSSPACE, &sb, NULL);
535	if (error)
536		goto out;
537
538	/* Open /dev/mdctl so that we can attach/detach. */
539	error = kern_openat(td, AT_FDCWD, "/dev/" MDCTL_NAME, UIO_SYSSPACE,
540	    O_RDWR, 0);
541	if (error)
542		goto out;
543
544	fd = td->td_retval[0];
545	mdio->md_version = MDIOVERSION;
546	mdio->md_type = MD_VNODE;
547
548	if (root_mount_mddev != -1) {
549		mdio->md_unit = root_mount_mddev;
550		DROP_GIANT();
551		error = kern_ioctl(td, fd, MDIOCDETACH, (void *)mdio);
552		PICKUP_GIANT();
553		/* Ignore errors. We don't care. */
554		root_mount_mddev = -1;
555	}
556
557	mdio->md_file = (void *)(mdio + 1);
558	mdio->md_options = MD_AUTOUNIT | MD_READONLY;
559	mdio->md_mediasize = sb.st_size;
560	mdio->md_unit = 0;
561	DROP_GIANT();
562	error = kern_ioctl(td, fd, MDIOCATTACH, (void *)mdio);
563	PICKUP_GIANT();
564	if (error)
565		goto out;
566
567	if (mdio->md_unit > 9) {
568		printf("rootmount: too many md units\n");
569		mdio->md_file = NULL;
570		mdio->md_options = 0;
571		mdio->md_mediasize = 0;
572		DROP_GIANT();
573		error = kern_ioctl(td, fd, MDIOCDETACH, (void *)mdio);
574		PICKUP_GIANT();
575		/* Ignore errors. We don't care. */
576		error = ERANGE;
577		goto out;
578	}
579
580	root_mount_mddev = mdio->md_unit;
581	printf(MD_NAME "%u attached to %s\n", root_mount_mddev, mdio->md_file);
582
583	error = kern_close(td, fd);
584
585 out:
586	free(mdio, M_TEMP);
587	return (error);
588}
589
590static int
591parse_dir_onfail(char **conf)
592{
593	char *action;
594	int error;
595
596	error = parse_token(conf, &action);
597	if (error)
598		return (error);
599
600	if (!strcmp(action, "continue"))
601		root_mount_onfail = A_CONTINUE;
602	else if (!strcmp(action, "panic"))
603		root_mount_onfail = A_PANIC;
604	else if (!strcmp(action, "reboot"))
605		root_mount_onfail = A_REBOOT;
606	else if (!strcmp(action, "retry"))
607		root_mount_onfail = A_RETRY;
608	else {
609		printf("rootmount: %s: unknown action\n", action);
610		error = EINVAL;
611	}
612
613	free(action, M_TEMP);
614	return (0);
615}
616
617static int
618parse_dir_timeout(char **conf)
619{
620	char *tok, *endtok;
621	long secs;
622	int error;
623
624	error = parse_token(conf, &tok);
625	if (error)
626		return (error);
627
628	secs = strtol(tok, &endtok, 0);
629	error = (secs < 0 || *endtok != '\0') ? EINVAL : 0;
630	if (!error)
631		root_mount_timeout = secs;
632	free(tok, M_TEMP);
633	return (error);
634}
635
636static int
637parse_directive(char **conf)
638{
639	char *dir;
640	int error;
641
642	error = parse_token(conf, &dir);
643	if (error)
644		return (error);
645
646	if (strcmp(dir, ".ask") == 0)
647		error = parse_dir_ask(conf);
648	else if (strcmp(dir, ".md") == 0)
649		error = parse_dir_md(conf);
650	else if (strcmp(dir, ".onfail") == 0)
651		error = parse_dir_onfail(conf);
652	else if (strcmp(dir, ".timeout") == 0)
653		error = parse_dir_timeout(conf);
654	else {
655		printf("mountroot: invalid directive `%s'\n", dir);
656		/* Ignore the rest of the line. */
657		(void)parse_skipto(conf, '\n');
658		error = EINVAL;
659	}
660	free(dir, M_TEMP);
661	return (error);
662}
663
664static int
665parse_mount_dev_present(const char *dev)
666{
667	struct nameidata nd;
668	int error;
669
670	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, dev, curthread);
671	error = namei(&nd);
672	if (!error)
673		vput(nd.ni_vp);
674	NDFREE(&nd, NDF_ONLY_PNBUF);
675	return (error != 0) ? 0 : 1;
676}
677
678#define	ERRMSGL	255
679static int
680parse_mount(char **conf)
681{
682	char *errmsg;
683	struct mntarg *ma;
684	char *dev, *fs, *opts, *tok;
685	int delay, error, timeout;
686
687	error = parse_token(conf, &tok);
688	if (error)
689		return (error);
690	fs = tok;
691	error = parse_skipto(&tok, ':');
692	if (error) {
693		free(fs, M_TEMP);
694		return (error);
695	}
696	parse_poke(&tok, '\0');
697	parse_advance(&tok);
698	dev = tok;
699
700	if (root_mount_mddev != -1) {
701		/* Handle substitution for the md unit number. */
702		tok = strstr(dev, "md#");
703		if (tok != NULL)
704			tok[2] = '0' + root_mount_mddev;
705	}
706
707	/* Parse options. */
708	error = parse_token(conf, &tok);
709	opts = (error == 0) ? tok : NULL;
710
711	printf("Trying to mount root from %s:%s [%s]...\n", fs, dev,
712	    (opts != NULL) ? opts : "");
713
714	errmsg = malloc(ERRMSGL, M_TEMP, M_WAITOK | M_ZERO);
715
716	if (vfs_byname(fs) == NULL) {
717		strlcpy(errmsg, "unknown file system", ERRMSGL);
718		error = ENOENT;
719		goto out;
720	}
721
722	if (strcmp(fs, "zfs") != 0 && strstr(fs, "nfs") == NULL &&
723	    dev[0] != '\0' && !parse_mount_dev_present(dev)) {
724		printf("mountroot: waiting for device %s ...\n", dev);
725		delay = hz / 10;
726		timeout = root_mount_timeout * hz;
727		do {
728			pause("rmdev", delay);
729			timeout -= delay;
730		} while (timeout > 0 && !parse_mount_dev_present(dev));
731		if (timeout <= 0) {
732			error = ENODEV;
733			goto out;
734		}
735	}
736
737	ma = NULL;
738	ma = mount_arg(ma, "fstype", fs, -1);
739	ma = mount_arg(ma, "fspath", "/", -1);
740	ma = mount_arg(ma, "from", dev, -1);
741	ma = mount_arg(ma, "errmsg", errmsg, ERRMSGL);
742	ma = mount_arg(ma, "ro", NULL, 0);
743	ma = parse_mountroot_options(ma, opts);
744	error = kernel_mount(ma, MNT_ROOTFS);
745
746 out:
747	if (error) {
748		printf("Mounting from %s:%s failed with error %d",
749		    fs, dev, error);
750		if (errmsg[0] != '\0')
751			printf(": %s", errmsg);
752		printf(".\n");
753	}
754	free(fs, M_TEMP);
755	free(errmsg, M_TEMP);
756	if (opts != NULL)
757		free(opts, M_TEMP);
758	/* kernel_mount can return -1 on error. */
759	return ((error < 0) ? EDOOFUS : error);
760}
761#undef ERRMSGL
762
763static int
764vfs_mountroot_parse(struct sbuf *sb, struct mount *mpdevfs)
765{
766	struct mount *mp;
767	char *conf;
768	int error;
769
770	root_mount_mddev = -1;
771
772retry:
773	conf = sbuf_data(sb);
774	mp = TAILQ_NEXT(mpdevfs, mnt_list);
775	error = (mp == NULL) ? 0 : EDOOFUS;
776	root_mount_onfail = A_CONTINUE;
777	while (mp == NULL) {
778		error = parse_skipto(&conf, CC_NONWHITESPACE);
779		if (error == PE_EOL) {
780			parse_advance(&conf);
781			continue;
782		}
783		if (error < 0)
784			break;
785		switch (parse_peek(&conf)) {
786		case '#':
787			error = parse_skipto(&conf, '\n');
788			break;
789		case '.':
790			error = parse_directive(&conf);
791			break;
792		default:
793			error = parse_mount(&conf);
794			break;
795		}
796		if (error < 0)
797			break;
798		/* Ignore any trailing garbage on the line. */
799		if (parse_peek(&conf) != '\n') {
800			printf("mountroot: advancing to next directive...\n");
801			(void)parse_skipto(&conf, '\n');
802		}
803		mp = TAILQ_NEXT(mpdevfs, mnt_list);
804	}
805	if (mp != NULL)
806		return (0);
807
808	/*
809	 * We failed to mount (a new) root.
810	 */
811	switch (root_mount_onfail) {
812	case A_CONTINUE:
813		break;
814	case A_PANIC:
815		panic("mountroot: unable to (re-)mount root.");
816		/* NOTREACHED */
817	case A_RETRY:
818		goto retry;
819	case A_REBOOT:
820		kern_reboot(RB_NOSYNC);
821		/* NOTREACHED */
822	}
823
824	return (error);
825}
826
827static void
828vfs_mountroot_conf0(struct sbuf *sb)
829{
830	char *s, *tok, *mnt, *opt;
831	int error;
832
833	sbuf_printf(sb, ".onfail panic\n");
834	sbuf_printf(sb, ".timeout %d\n", root_mount_timeout);
835	if (boothowto & RB_ASKNAME)
836		sbuf_printf(sb, ".ask\n");
837#ifdef ROOTDEVNAME
838	if (boothowto & RB_DFLTROOT)
839		sbuf_printf(sb, "%s\n", ROOTDEVNAME);
840#endif
841	if (boothowto & RB_CDROM) {
842		sbuf_printf(sb, "cd9660:/dev/cd0 ro\n");
843		sbuf_printf(sb, ".timeout 0\n");
844		sbuf_printf(sb, "cd9660:/dev/acd0 ro\n");
845		sbuf_printf(sb, ".timeout %d\n", root_mount_timeout);
846	}
847	s = kern_getenv("vfs.root.mountfrom");
848	if (s != NULL) {
849		opt = kern_getenv("vfs.root.mountfrom.options");
850		tok = s;
851		error = parse_token(&tok, &mnt);
852		while (!error) {
853			sbuf_printf(sb, "%s %s\n", mnt,
854			    (opt != NULL) ? opt : "");
855			free(mnt, M_TEMP);
856			error = parse_token(&tok, &mnt);
857		}
858		if (opt != NULL)
859			freeenv(opt);
860		freeenv(s);
861	}
862	if (rootdevnames[0] != NULL)
863		sbuf_printf(sb, "%s\n", rootdevnames[0]);
864	if (rootdevnames[1] != NULL)
865		sbuf_printf(sb, "%s\n", rootdevnames[1]);
866#ifdef ROOTDEVNAME
867	if (!(boothowto & RB_DFLTROOT))
868		sbuf_printf(sb, "%s\n", ROOTDEVNAME);
869#endif
870	if (!(boothowto & RB_ASKNAME))
871		sbuf_printf(sb, ".ask\n");
872}
873
874static int
875vfs_mountroot_readconf(struct thread *td, struct sbuf *sb)
876{
877	static char buf[128];
878	struct nameidata nd;
879	off_t ofs;
880	ssize_t resid;
881	int error, flags, len;
882
883	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, "/.mount.conf", td);
884	flags = FREAD;
885	error = vn_open(&nd, &flags, 0, NULL);
886	if (error)
887		return (error);
888
889	NDFREE(&nd, NDF_ONLY_PNBUF);
890	ofs = 0;
891	len = sizeof(buf) - 1;
892	while (1) {
893		error = vn_rdwr(UIO_READ, nd.ni_vp, buf, len, ofs,
894		    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred,
895		    NOCRED, &resid, td);
896		if (error)
897			break;
898		if (resid == len)
899			break;
900		buf[len - resid] = 0;
901		sbuf_printf(sb, "%s", buf);
902		ofs += len - resid;
903	}
904
905	VOP_UNLOCK(nd.ni_vp, 0);
906	vn_close(nd.ni_vp, FREAD, td->td_ucred, td);
907	return (error);
908}
909
910static void
911vfs_mountroot_wait(void)
912{
913	struct root_hold_token *h;
914	struct timeval lastfail;
915	int curfail;
916
917	curfail = 0;
918	while (1) {
919		DROP_GIANT();
920		g_waitidle();
921		PICKUP_GIANT();
922		mtx_lock(&root_holds_mtx);
923		if (LIST_EMPTY(&root_holds)) {
924			mtx_unlock(&root_holds_mtx);
925			break;
926		}
927		if (ppsratecheck(&lastfail, &curfail, 1)) {
928			printf("Root mount waiting for:");
929			LIST_FOREACH(h, &root_holds, list)
930				printf(" %s", h->who);
931			printf("\n");
932		}
933		msleep(&root_holds, &root_holds_mtx, PZERO | PDROP, "roothold",
934		    hz);
935	}
936}
937
938void
939vfs_mountroot(void)
940{
941	struct mount *mp;
942	struct sbuf *sb;
943	struct thread *td;
944	time_t timebase;
945	int error;
946
947	td = curthread;
948
949	vfs_mountroot_wait();
950
951	sb = sbuf_new_auto();
952	vfs_mountroot_conf0(sb);
953	sbuf_finish(sb);
954
955	error = vfs_mountroot_devfs(td, &mp);
956	while (!error) {
957		error = vfs_mountroot_parse(sb, mp);
958		if (!error) {
959			error = vfs_mountroot_shuffle(td, mp);
960			if (!error) {
961				sbuf_clear(sb);
962				error = vfs_mountroot_readconf(td, sb);
963				sbuf_finish(sb);
964			}
965		}
966	}
967
968	sbuf_delete(sb);
969
970	/*
971	 * Iterate over all currently mounted file systems and use
972	 * the time stamp found to check and/or initialize the RTC.
973	 * Call inittodr() only once and pass it the largest of the
974	 * timestamps we encounter.
975	 */
976	timebase = 0;
977	mtx_lock(&mountlist_mtx);
978	mp = TAILQ_FIRST(&mountlist);
979	while (mp != NULL) {
980		if (mp->mnt_time > timebase)
981			timebase = mp->mnt_time;
982		mp = TAILQ_NEXT(mp, mnt_list);
983	}
984	mtx_unlock(&mountlist_mtx);
985	inittodr(timebase);
986
987	/* Keep prison0's root in sync with the global rootvnode. */
988	mtx_lock(&prison0.pr_mtx);
989	prison0.pr_root = rootvnode;
990	vref(prison0.pr_root);
991	mtx_unlock(&prison0.pr_mtx);
992
993	mtx_lock(&root_holds_mtx);
994	atomic_store_rel_int(&root_mount_complete, 1);
995	wakeup(&root_mount_complete);
996	mtx_unlock(&root_holds_mtx);
997
998	EVENTHANDLER_INVOKE(mountroot);
999}
1000
1001static struct mntarg *
1002parse_mountroot_options(struct mntarg *ma, const char *options)
1003{
1004	char *p;
1005	char *name, *name_arg;
1006	char *val, *val_arg;
1007	char *opts;
1008
1009	if (options == NULL || options[0] == '\0')
1010		return (ma);
1011
1012	p = opts = strdup(options, M_MOUNT);
1013	if (opts == NULL) {
1014		return (ma);
1015	}
1016
1017	while((name = strsep(&p, ",")) != NULL) {
1018		if (name[0] == '\0')
1019			break;
1020
1021		val = strchr(name, '=');
1022		if (val != NULL) {
1023			*val = '\0';
1024			++val;
1025		}
1026		if( strcmp(name, "rw") == 0 ||
1027		    strcmp(name, "noro") == 0) {
1028			/*
1029			 * The first time we mount the root file system,
1030			 * we need to mount 'ro', so We need to ignore
1031			 * 'rw' and 'noro' mount options.
1032			 */
1033			continue;
1034		}
1035		name_arg = strdup(name, M_MOUNT);
1036		val_arg = NULL;
1037		if (val != NULL)
1038			val_arg = strdup(val, M_MOUNT);
1039
1040		ma = mount_arg(ma, name_arg, val_arg,
1041		    (val_arg != NULL ? -1 : 0));
1042	}
1043	free(opts, M_MOUNT);
1044	return (ma);
1045}
1046