vfs_mountroot.c revision 213365
1/*- 2 * Copyright (c) 1999-2004 Poul-Henning Kamp 3 * Copyright (c) 1999 Michael Smith 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37#include <sys/cdefs.h> 38__FBSDID("$FreeBSD: head/sys/kern/vfs_mountroot.c 213365 2010-10-02 19:44:13Z marcel $"); 39 40#include <sys/param.h> 41#include <sys/conf.h> 42#include <sys/fcntl.h> 43#include <sys/jail.h> 44#include <sys/kernel.h> 45#include <sys/libkern.h> 46#include <sys/malloc.h> 47#include <sys/mount.h> 48#include <sys/mutex.h> 49#include <sys/namei.h> 50#include <sys/priv.h> 51#include <sys/proc.h> 52#include <sys/filedesc.h> 53#include <sys/reboot.h> 54#include <sys/syscallsubr.h> 55#include <sys/sysproto.h> 56#include <sys/sx.h> 57#include <sys/sysctl.h> 58#include <sys/sysent.h> 59#include <sys/systm.h> 60#include <sys/vnode.h> 61#include <vm/uma.h> 62 63#include <geom/geom.h> 64 65#include <machine/stdarg.h> 66 67#include "opt_rootdevname.h" 68 69#define ROOTNAME "root_device" 70 71static int vfs_mountroot_ask(void); 72static int vfs_mountroot_try(const char *mountfrom, const char *options); 73 74/* 75 * The vnode of the system's root (/ in the filesystem, without chroot 76 * active.) 77 */ 78struct vnode *rootvnode; 79 80/* 81 * The root filesystem is detailed in the kernel environment variable 82 * vfs.root.mountfrom, which is expected to be in the general format 83 * 84 * <vfsname>:[<path>][ <vfsname>:[<path>] ...] 85 * vfsname := the name of a VFS known to the kernel and capable 86 * of being mounted as root 87 * path := disk device name or other data used by the filesystem 88 * to locate its physical store 89 * 90 * If the environment variable vfs.root.mountfrom is a space separated list, 91 * each list element is tried in turn and the root filesystem will be mounted 92 * from the first one that suceeds. 93 * 94 * The environment variable vfs.root.mountfrom.options is a comma delimited 95 * set of string mount options. These mount options must be parseable 96 * by nmount() in the kernel. 97 */ 98 99/* 100 * The root specifiers we will try if RB_CDROM is specified. 101 */ 102static char *cdrom_rootdevnames[] = { 103 "cd9660:cd0", 104 "cd9660:acd0", 105 NULL 106}; 107 108/* legacy find-root code */ 109char *rootdevnames[2] = {NULL, NULL}; 110#ifndef ROOTDEVNAME 111# define ROOTDEVNAME NULL 112#endif 113static const char *ctrootdevname = ROOTDEVNAME; 114 115struct root_hold_token { 116 const char *who; 117 LIST_ENTRY(root_hold_token) list; 118}; 119 120static LIST_HEAD(, root_hold_token) root_holds = 121 LIST_HEAD_INITIALIZER(root_holds); 122 123static int root_mount_complete; 124 125struct root_hold_token * 126root_mount_hold(const char *identifier) 127{ 128 struct root_hold_token *h; 129 130 if (root_mounted()) 131 return (NULL); 132 133 h = malloc(sizeof *h, M_DEVBUF, M_ZERO | M_WAITOK); 134 h->who = identifier; 135 mtx_lock(&mountlist_mtx); 136 LIST_INSERT_HEAD(&root_holds, h, list); 137 mtx_unlock(&mountlist_mtx); 138 return (h); 139} 140 141void 142root_mount_rel(struct root_hold_token *h) 143{ 144 145 if (h == NULL) 146 return; 147 mtx_lock(&mountlist_mtx); 148 LIST_REMOVE(h, list); 149 wakeup(&root_holds); 150 mtx_unlock(&mountlist_mtx); 151 free(h, M_DEVBUF); 152} 153 154static void 155root_mount_prepare(void) 156{ 157 struct root_hold_token *h; 158 struct timeval lastfail; 159 int curfail = 0; 160 161 for (;;) { 162 DROP_GIANT(); 163 g_waitidle(); 164 PICKUP_GIANT(); 165 mtx_lock(&mountlist_mtx); 166 if (LIST_EMPTY(&root_holds)) { 167 mtx_unlock(&mountlist_mtx); 168 break; 169 } 170 if (ppsratecheck(&lastfail, &curfail, 1)) { 171 printf("Root mount waiting for:"); 172 LIST_FOREACH(h, &root_holds, list) 173 printf(" %s", h->who); 174 printf("\n"); 175 } 176 msleep(&root_holds, &mountlist_mtx, PZERO | PDROP, "roothold", 177 hz); 178 } 179} 180 181static void 182root_mount_done(void) 183{ 184 185 /* Keep prison0's root in sync with the global rootvnode. */ 186 mtx_lock(&prison0.pr_mtx); 187 prison0.pr_root = rootvnode; 188 vref(prison0.pr_root); 189 mtx_unlock(&prison0.pr_mtx); 190 /* 191 * Use a mutex to prevent the wakeup being missed and waiting for 192 * an extra 1 second sleep. 193 */ 194 mtx_lock(&mountlist_mtx); 195 root_mount_complete = 1; 196 wakeup(&root_mount_complete); 197 mtx_unlock(&mountlist_mtx); 198} 199 200int 201root_mounted(void) 202{ 203 204 /* No mutex is acquired here because int stores are atomic. */ 205 return (root_mount_complete); 206} 207 208void 209root_mount_wait(void) 210{ 211 212 /* 213 * Panic on an obvious deadlock - the function can't be called from 214 * a thread which is doing the whole SYSINIT stuff. 215 */ 216 KASSERT(curthread->td_proc->p_pid != 0, 217 ("root_mount_wait: cannot be called from the swapper thread")); 218 mtx_lock(&mountlist_mtx); 219 while (!root_mount_complete) { 220 msleep(&root_mount_complete, &mountlist_mtx, PZERO, "rootwait", 221 hz); 222 } 223 mtx_unlock(&mountlist_mtx); 224} 225 226static void 227set_rootvnode(void) 228{ 229 struct proc *p; 230 231 if (VFS_ROOT(TAILQ_FIRST(&mountlist), LK_EXCLUSIVE, &rootvnode)) 232 panic("Cannot find root vnode"); 233 234 VOP_UNLOCK(rootvnode, 0); 235 236 p = curthread->td_proc; 237 FILEDESC_XLOCK(p->p_fd); 238 239 if (p->p_fd->fd_cdir != NULL) 240 vrele(p->p_fd->fd_cdir); 241 p->p_fd->fd_cdir = rootvnode; 242 VREF(rootvnode); 243 244 if (p->p_fd->fd_rdir != NULL) 245 vrele(p->p_fd->fd_rdir); 246 p->p_fd->fd_rdir = rootvnode; 247 VREF(rootvnode); 248 249 FILEDESC_XUNLOCK(p->p_fd); 250 251 EVENTHANDLER_INVOKE(mountroot); 252} 253 254static void 255devfs_first(void) 256{ 257 struct thread *td = curthread; 258 struct vfsoptlist *opts; 259 struct vfsconf *vfsp; 260 struct mount *mp = NULL; 261 int error; 262 263 vfsp = vfs_byname("devfs"); 264 KASSERT(vfsp != NULL, ("Could not find devfs by name")); 265 if (vfsp == NULL) 266 return; 267 268 mp = vfs_mount_alloc(NULLVP, vfsp, "/dev", td->td_ucred); 269 270 error = VFS_MOUNT(mp); 271 KASSERT(error == 0, ("VFS_MOUNT(devfs) failed %d", error)); 272 if (error) 273 return; 274 275 opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK); 276 TAILQ_INIT(opts); 277 mp->mnt_opt = opts; 278 279 mtx_lock(&mountlist_mtx); 280 TAILQ_INSERT_HEAD(&mountlist, mp, mnt_list); 281 mtx_unlock(&mountlist_mtx); 282 283 set_rootvnode(); 284 285 error = kern_symlink(td, "/", "dev", UIO_SYSSPACE); 286 if (error) 287 printf("kern_symlink /dev -> / returns %d\n", error); 288} 289 290static void 291devfs_fixup(struct thread *td) 292{ 293 struct nameidata nd; 294 struct vnode *vp, *dvp; 295 struct mount *mp; 296 int error; 297 298 /* Remove our devfs mount from the mountlist and purge the cache */ 299 mtx_lock(&mountlist_mtx); 300 mp = TAILQ_FIRST(&mountlist); 301 TAILQ_REMOVE(&mountlist, mp, mnt_list); 302 mtx_unlock(&mountlist_mtx); 303 cache_purgevfs(mp); 304 305 VFS_ROOT(mp, LK_EXCLUSIVE, &dvp); 306 VI_LOCK(dvp); 307 dvp->v_iflag &= ~VI_MOUNT; 308 VI_UNLOCK(dvp); 309 dvp->v_mountedhere = NULL; 310 311 /* Set up the real rootvnode, and purge the cache */ 312 TAILQ_FIRST(&mountlist)->mnt_vnodecovered = NULL; 313 set_rootvnode(); 314 cache_purgevfs(rootvnode->v_mount); 315 316 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, "/dev", td); 317 error = namei(&nd); 318 if (error) { 319 printf("Lookup of /dev for devfs, error: %d\n", error); 320 vput(dvp); 321 vfs_unbusy(mp); 322 return; 323 } 324 NDFREE(&nd, NDF_ONLY_PNBUF); 325 vp = nd.ni_vp; 326 if (vp->v_type != VDIR) { 327 printf("/dev is not a directory\n"); 328 vput(dvp); 329 vput(vp); 330 vfs_unbusy(mp); 331 return; 332 } 333 error = vinvalbuf(vp, V_SAVE, 0, 0); 334 if (error) { 335 printf("vinvalbuf() of /dev failed, error: %d\n", error); 336 vput(dvp); 337 vput(vp); 338 vfs_unbusy(mp); 339 return; 340 } 341 cache_purge(vp); 342 mp->mnt_vnodecovered = vp; 343 vp->v_mountedhere = mp; 344 mtx_lock(&mountlist_mtx); 345 TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list); 346 mtx_unlock(&mountlist_mtx); 347 VOP_UNLOCK(vp, 0); 348 vput(dvp); 349 vfs_unbusy(mp); 350 351 /* Unlink the no longer needed /dev/dev -> / symlink */ 352 error = kern_unlink(td, "/dev/dev", UIO_SYSSPACE); 353 if (error) 354 printf("kern_unlink of /dev/dev failed, error: %d\n", error); 355} 356 357void 358vfs_mountroot(void) 359{ 360 char *cp, *cpt, *options, *tmpdev; 361 int error, i, asked = 0; 362 363 options = NULL; 364 365 root_mount_prepare(); 366 367 devfs_first(); 368 369 /* 370 * We are booted with instructions to prompt for the root filesystem. 371 */ 372 if (boothowto & RB_ASKNAME) { 373 if (!vfs_mountroot_ask()) 374 goto mounted; 375 asked = 1; 376 } 377 378 options = getenv("vfs.root.mountfrom.options"); 379 380 /* 381 * The root filesystem information is compiled in, and we are 382 * booted with instructions to use it. 383 */ 384 if (ctrootdevname != NULL && (boothowto & RB_DFLTROOT)) { 385 if (!vfs_mountroot_try(ctrootdevname, options)) 386 goto mounted; 387 ctrootdevname = NULL; 388 } 389 390 /* 391 * We've been given the generic "use CDROM as root" flag. This is 392 * necessary because one media may be used in many different 393 * devices, so we need to search for them. 394 */ 395 if (boothowto & RB_CDROM) { 396 for (i = 0; cdrom_rootdevnames[i] != NULL; i++) { 397 if (!vfs_mountroot_try(cdrom_rootdevnames[i], options)) 398 goto mounted; 399 } 400 } 401 402 /* 403 * Try to use the value read by the loader from /etc/fstab, or 404 * supplied via some other means. This is the preferred 405 * mechanism. 406 */ 407 cp = getenv("vfs.root.mountfrom"); 408 if (cp != NULL) { 409 cpt = cp; 410 while ((tmpdev = strsep(&cpt, " \t")) != NULL) { 411 error = vfs_mountroot_try(tmpdev, options); 412 if (error == 0) { 413 freeenv(cp); 414 goto mounted; 415 } 416 } 417 freeenv(cp); 418 } 419 420 /* 421 * Try values that may have been computed by code during boot 422 */ 423 if (!vfs_mountroot_try(rootdevnames[0], options)) 424 goto mounted; 425 if (!vfs_mountroot_try(rootdevnames[1], options)) 426 goto mounted; 427 428 /* 429 * If we (still) have a compiled-in default, try it. 430 */ 431 if (ctrootdevname != NULL) 432 if (!vfs_mountroot_try(ctrootdevname, options)) 433 goto mounted; 434 /* 435 * Everything so far has failed, prompt on the console if we haven't 436 * already tried that. 437 */ 438 if (!asked) 439 if (!vfs_mountroot_ask()) 440 goto mounted; 441 442 panic("Root mount failed, startup aborted."); 443 444mounted: 445 root_mount_done(); 446 freeenv(options); 447} 448 449static struct mntarg * 450parse_mountroot_options(struct mntarg *ma, const char *options) 451{ 452 char *p; 453 char *name, *name_arg; 454 char *val, *val_arg; 455 char *opts; 456 457 if (options == NULL || options[0] == '\0') 458 return (ma); 459 460 p = opts = strdup(options, M_MOUNT); 461 if (opts == NULL) { 462 return (ma); 463 } 464 465 while((name = strsep(&p, ",")) != NULL) { 466 if (name[0] == '\0') 467 break; 468 469 val = strchr(name, '='); 470 if (val != NULL) { 471 *val = '\0'; 472 ++val; 473 } 474 if( strcmp(name, "rw") == 0 || 475 strcmp(name, "noro") == 0) { 476 /* 477 * The first time we mount the root file system, 478 * we need to mount 'ro', so We need to ignore 479 * 'rw' and 'noro' mount options. 480 */ 481 continue; 482 } 483 name_arg = strdup(name, M_MOUNT); 484 val_arg = NULL; 485 if (val != NULL) 486 val_arg = strdup(val, M_MOUNT); 487 488 ma = mount_arg(ma, name_arg, val_arg, 489 (val_arg != NULL ? -1 : 0)); 490 } 491 free(opts, M_MOUNT); 492 return (ma); 493} 494 495/* 496 * Mount (mountfrom) as the root filesystem. 497 */ 498static int 499vfs_mountroot_try(const char *mountfrom, const char *options) 500{ 501 struct mount *mp; 502 struct mntarg *ma; 503 char *vfsname, *path; 504 time_t timebase; 505 int error; 506 char patt[32]; 507 char errmsg[255]; 508 509 vfsname = NULL; 510 path = NULL; 511 mp = NULL; 512 ma = NULL; 513 error = EINVAL; 514 bzero(errmsg, sizeof(errmsg)); 515 516 if (mountfrom == NULL) 517 return (error); /* don't complain */ 518 printf("Trying to mount root from %s\n", mountfrom); 519 520 /* parse vfs name and path */ 521 vfsname = malloc(MFSNAMELEN, M_MOUNT, M_WAITOK); 522 path = malloc(MNAMELEN, M_MOUNT, M_WAITOK); 523 vfsname[0] = path[0] = 0; 524 sprintf(patt, "%%%d[a-z0-9]:%%%ds", MFSNAMELEN, MNAMELEN); 525 if (sscanf(mountfrom, patt, vfsname, path) < 1) 526 goto out; 527 528 if (path[0] == '\0') 529 strcpy(path, ROOTNAME); 530 531 ma = mount_arg(ma, "fstype", vfsname, -1); 532 ma = mount_arg(ma, "fspath", "/", -1); 533 ma = mount_arg(ma, "from", path, -1); 534 ma = mount_arg(ma, "errmsg", errmsg, sizeof(errmsg)); 535 ma = mount_arg(ma, "ro", NULL, 0); 536 ma = parse_mountroot_options(ma, options); 537 error = kernel_mount(ma, MNT_ROOTFS); 538 539 if (error == 0) { 540 /* 541 * We mount devfs prior to mounting the / FS, so the first 542 * entry will typically be devfs. 543 */ 544 mp = TAILQ_FIRST(&mountlist); 545 KASSERT(mp != NULL, ("%s: mountlist is empty", __func__)); 546 547 /* 548 * Iterate over all currently mounted file systems and use 549 * the time stamp found to check and/or initialize the RTC. 550 * Typically devfs has no time stamp and the only other FS 551 * is the actual / FS. 552 * Call inittodr() only once and pass it the largest of the 553 * timestamps we encounter. 554 */ 555 timebase = 0; 556 do { 557 if (mp->mnt_time > timebase) 558 timebase = mp->mnt_time; 559 mp = TAILQ_NEXT(mp, mnt_list); 560 } while (mp != NULL); 561 inittodr(timebase); 562 563 devfs_fixup(curthread); 564 } 565 566 if (error != 0 ) { 567 printf("ROOT MOUNT ERROR: %s\n", errmsg); 568 printf("If you have invalid mount options, reboot, and "); 569 printf("first try the following from\n"); 570 printf("the loader prompt:\n\n"); 571 printf(" set vfs.root.mountfrom.options=rw\n\n"); 572 printf("and then remove invalid mount options from "); 573 printf("/etc/fstab.\n\n"); 574 } 575out: 576 free(path, M_MOUNT); 577 free(vfsname, M_MOUNT); 578 return (error); 579} 580 581static int 582vfs_mountroot_ask(void) 583{ 584 char name[128]; 585 char *mountfrom; 586 char *options; 587 588 for(;;) { 589 printf("Loader variables:\n"); 590 printf("vfs.root.mountfrom="); 591 mountfrom = getenv("vfs.root.mountfrom"); 592 if (mountfrom != NULL) { 593 printf("%s", mountfrom); 594 } 595 printf("\n"); 596 printf("vfs.root.mountfrom.options="); 597 options = getenv("vfs.root.mountfrom.options"); 598 if (options != NULL) { 599 printf("%s", options); 600 } 601 printf("\n"); 602 freeenv(mountfrom); 603 freeenv(options); 604 printf("\nManual root filesystem specification:\n"); 605 printf(" <fstype>:<device> Mount <device> using filesystem <fstype>\n"); 606 printf(" eg. zfs:tank\n"); 607 printf(" eg. ufs:/dev/da0s1a\n"); 608 printf(" eg. cd9660:/dev/acd0\n"); 609 printf(" This is equivalent to: "); 610 printf("mount -t cd9660 /dev/acd0 /\n"); 611 printf("\n"); 612 printf(" ? List valid disk boot devices\n"); 613 printf(" <empty line> Abort manual input\n"); 614 printf("\nmountroot> "); 615 gets(name, sizeof(name), 1); 616 if (name[0] == '\0') 617 return (1); 618 if (name[0] == '?') { 619 printf("\nList of GEOM managed disk devices:\n "); 620 g_dev_print(); 621 continue; 622 } 623 if (!vfs_mountroot_try(name, NULL)) 624 return (0); 625 } 626} 627