g_mirror.c revision 156421
1/*- 2 * Copyright (c) 2004-2005 Pawel Jakub Dawidek <pjd@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27#include <sys/cdefs.h> 28__FBSDID("$FreeBSD: head/sys/geom/mirror/g_mirror.c 156421 2006-03-08 08:27:33Z pjd $"); 29 30#include <sys/param.h> 31#include <sys/systm.h> 32#include <sys/kernel.h> 33#include <sys/module.h> 34#include <sys/limits.h> 35#include <sys/lock.h> 36#include <sys/mutex.h> 37#include <sys/bio.h> 38#include <sys/sysctl.h> 39#include <sys/malloc.h> 40#include <sys/eventhandler.h> 41#include <vm/uma.h> 42#include <geom/geom.h> 43#include <sys/proc.h> 44#include <sys/kthread.h> 45#include <sys/sched.h> 46#include <geom/mirror/g_mirror.h> 47 48 49static MALLOC_DEFINE(M_MIRROR, "mirror_data", "GEOM_MIRROR Data"); 50 51SYSCTL_DECL(_kern_geom); 52SYSCTL_NODE(_kern_geom, OID_AUTO, mirror, CTLFLAG_RW, 0, "GEOM_MIRROR stuff"); 53u_int g_mirror_debug = 0; 54TUNABLE_INT("kern.geom.mirror.debug", &g_mirror_debug); 55SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, debug, CTLFLAG_RW, &g_mirror_debug, 0, 56 "Debug level"); 57static u_int g_mirror_timeout = 4; 58TUNABLE_INT("kern.geom.mirror.timeout", &g_mirror_timeout); 59SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, timeout, CTLFLAG_RW, &g_mirror_timeout, 60 0, "Time to wait on all mirror components"); 61static u_int g_mirror_idletime = 5; 62TUNABLE_INT("kern.geom.mirror.idletime", &g_mirror_idletime); 63SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, idletime, CTLFLAG_RW, 64 &g_mirror_idletime, 0, "Mark components as clean when idling"); 65static u_int g_mirror_reqs_per_sync = 5; 66SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, reqs_per_sync, CTLFLAG_RW, 67 &g_mirror_reqs_per_sync, 0, 68 "Number of regular I/O requests per synchronization request"); 69static u_int g_mirror_syncs_per_sec = 1000; 70SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, syncs_per_sec, CTLFLAG_RW, 71 &g_mirror_syncs_per_sec, 0, 72 "Number of synchronizations requests per second"); 73static u_int g_mirror_disconnect_on_failure = 1; 74TUNABLE_INT("kern.geom.mirror.disconnect_on_failure", 75 &g_mirror_disconnect_on_failure); 76SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, disconnect_on_failure, CTLFLAG_RW, 77 &g_mirror_disconnect_on_failure, 0, "Disconnect component on I/O failure."); 78 79#define MSLEEP(ident, mtx, priority, wmesg, timeout) do { \ 80 G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, (ident)); \ 81 msleep((ident), (mtx), (priority), (wmesg), (timeout)); \ 82 G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, (ident)); \ 83} while (0) 84 85static eventhandler_tag g_mirror_ehtag = NULL; 86 87static int g_mirror_destroy_geom(struct gctl_req *req, struct g_class *mp, 88 struct g_geom *gp); 89static g_taste_t g_mirror_taste; 90static void g_mirror_init(struct g_class *mp); 91static void g_mirror_fini(struct g_class *mp); 92 93struct g_class g_mirror_class = { 94 .name = G_MIRROR_CLASS_NAME, 95 .version = G_VERSION, 96 .ctlreq = g_mirror_config, 97 .taste = g_mirror_taste, 98 .destroy_geom = g_mirror_destroy_geom, 99 .init = g_mirror_init, 100 .fini = g_mirror_fini 101}; 102 103 104static void g_mirror_destroy_provider(struct g_mirror_softc *sc); 105static int g_mirror_update_disk(struct g_mirror_disk *disk, u_int state); 106static void g_mirror_update_device(struct g_mirror_softc *sc, boolean_t force); 107static void g_mirror_dumpconf(struct sbuf *sb, const char *indent, 108 struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp); 109static void g_mirror_sync_stop(struct g_mirror_disk *disk, int type); 110 111 112static const char * 113g_mirror_disk_state2str(int state) 114{ 115 116 switch (state) { 117 case G_MIRROR_DISK_STATE_NONE: 118 return ("NONE"); 119 case G_MIRROR_DISK_STATE_NEW: 120 return ("NEW"); 121 case G_MIRROR_DISK_STATE_ACTIVE: 122 return ("ACTIVE"); 123 case G_MIRROR_DISK_STATE_STALE: 124 return ("STALE"); 125 case G_MIRROR_DISK_STATE_SYNCHRONIZING: 126 return ("SYNCHRONIZING"); 127 case G_MIRROR_DISK_STATE_DISCONNECTED: 128 return ("DISCONNECTED"); 129 case G_MIRROR_DISK_STATE_DESTROY: 130 return ("DESTROY"); 131 default: 132 return ("INVALID"); 133 } 134} 135 136static const char * 137g_mirror_device_state2str(int state) 138{ 139 140 switch (state) { 141 case G_MIRROR_DEVICE_STATE_STARTING: 142 return ("STARTING"); 143 case G_MIRROR_DEVICE_STATE_RUNNING: 144 return ("RUNNING"); 145 default: 146 return ("INVALID"); 147 } 148} 149 150static const char * 151g_mirror_get_diskname(struct g_mirror_disk *disk) 152{ 153 154 if (disk->d_consumer == NULL || disk->d_consumer->provider == NULL) 155 return ("[unknown]"); 156 return (disk->d_name); 157} 158 159/* 160 * --- Events handling functions --- 161 * Events in geom_mirror are used to maintain disks and device status 162 * from one thread to simplify locking. 163 */ 164static void 165g_mirror_event_free(struct g_mirror_event *ep) 166{ 167 168 free(ep, M_MIRROR); 169} 170 171int 172g_mirror_event_send(void *arg, int state, int flags) 173{ 174 struct g_mirror_softc *sc; 175 struct g_mirror_disk *disk; 176 struct g_mirror_event *ep; 177 int error; 178 179 ep = malloc(sizeof(*ep), M_MIRROR, M_WAITOK); 180 G_MIRROR_DEBUG(4, "%s: Sending event %p.", __func__, ep); 181 if ((flags & G_MIRROR_EVENT_DEVICE) != 0) { 182 disk = NULL; 183 sc = arg; 184 } else { 185 disk = arg; 186 sc = disk->d_softc; 187 } 188 ep->e_disk = disk; 189 ep->e_state = state; 190 ep->e_flags = flags; 191 ep->e_error = 0; 192 mtx_lock(&sc->sc_events_mtx); 193 TAILQ_INSERT_TAIL(&sc->sc_events, ep, e_next); 194 mtx_unlock(&sc->sc_events_mtx); 195 G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc); 196 mtx_lock(&sc->sc_queue_mtx); 197 wakeup(sc); 198 mtx_unlock(&sc->sc_queue_mtx); 199 if ((flags & G_MIRROR_EVENT_DONTWAIT) != 0) 200 return (0); 201 g_topology_assert(); 202 G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, ep); 203 g_topology_unlock(); 204 while ((ep->e_flags & G_MIRROR_EVENT_DONE) == 0) { 205 mtx_lock(&sc->sc_events_mtx); 206 MSLEEP(ep, &sc->sc_events_mtx, PRIBIO | PDROP, "m:event", 207 hz * 5); 208 } 209 /* Don't even try to use 'sc' here, because it could be already dead. */ 210 g_topology_lock(); 211 error = ep->e_error; 212 g_mirror_event_free(ep); 213 return (error); 214} 215 216static struct g_mirror_event * 217g_mirror_event_get(struct g_mirror_softc *sc) 218{ 219 struct g_mirror_event *ep; 220 221 mtx_lock(&sc->sc_events_mtx); 222 ep = TAILQ_FIRST(&sc->sc_events); 223 mtx_unlock(&sc->sc_events_mtx); 224 return (ep); 225} 226 227static void 228g_mirror_event_remove(struct g_mirror_softc *sc, struct g_mirror_event *ep) 229{ 230 231 mtx_lock(&sc->sc_events_mtx); 232 TAILQ_REMOVE(&sc->sc_events, ep, e_next); 233 mtx_unlock(&sc->sc_events_mtx); 234} 235 236static void 237g_mirror_event_cancel(struct g_mirror_disk *disk) 238{ 239 struct g_mirror_softc *sc; 240 struct g_mirror_event *ep, *tmpep; 241 242 g_topology_assert(); 243 244 sc = disk->d_softc; 245 mtx_lock(&sc->sc_events_mtx); 246 TAILQ_FOREACH_SAFE(ep, &sc->sc_events, e_next, tmpep) { 247 if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0) 248 continue; 249 if (ep->e_disk != disk) 250 continue; 251 TAILQ_REMOVE(&sc->sc_events, ep, e_next); 252 if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0) 253 g_mirror_event_free(ep); 254 else { 255 ep->e_error = ECANCELED; 256 wakeup(ep); 257 } 258 } 259 mtx_unlock(&sc->sc_events_mtx); 260} 261 262/* 263 * Return the number of disks in given state. 264 * If state is equal to -1, count all connected disks. 265 */ 266u_int 267g_mirror_ndisks(struct g_mirror_softc *sc, int state) 268{ 269 struct g_mirror_disk *disk; 270 u_int n = 0; 271 272 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 273 if (state == -1 || disk->d_state == state) 274 n++; 275 } 276 return (n); 277} 278 279/* 280 * Find a disk in mirror by its disk ID. 281 */ 282static struct g_mirror_disk * 283g_mirror_id2disk(struct g_mirror_softc *sc, uint32_t id) 284{ 285 struct g_mirror_disk *disk; 286 287 g_topology_assert(); 288 289 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 290 if (disk->d_id == id) 291 return (disk); 292 } 293 return (NULL); 294} 295 296static u_int 297g_mirror_nrequests(struct g_mirror_softc *sc, struct g_consumer *cp) 298{ 299 struct bio *bp; 300 u_int nreqs = 0; 301 302 mtx_lock(&sc->sc_queue_mtx); 303 TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) { 304 if (bp->bio_from == cp) 305 nreqs++; 306 } 307 mtx_unlock(&sc->sc_queue_mtx); 308 return (nreqs); 309} 310 311static int 312g_mirror_is_busy(struct g_mirror_softc *sc, struct g_consumer *cp) 313{ 314 315 if (cp->index > 0) { 316 G_MIRROR_DEBUG(2, 317 "I/O requests for %s exist, can't destroy it now.", 318 cp->provider->name); 319 return (1); 320 } 321 if (g_mirror_nrequests(sc, cp) > 0) { 322 G_MIRROR_DEBUG(2, 323 "I/O requests for %s in queue, can't destroy it now.", 324 cp->provider->name); 325 return (1); 326 } 327 return (0); 328} 329 330static void 331g_mirror_destroy_consumer(void *arg, int flags __unused) 332{ 333 struct g_consumer *cp; 334 335 cp = arg; 336 G_MIRROR_DEBUG(1, "Consumer %s destroyed.", cp->provider->name); 337 g_detach(cp); 338 g_destroy_consumer(cp); 339} 340 341static void 342g_mirror_kill_consumer(struct g_mirror_softc *sc, struct g_consumer *cp) 343{ 344 struct g_provider *pp; 345 int retaste_wait; 346 347 g_topology_assert(); 348 349 cp->private = NULL; 350 if (g_mirror_is_busy(sc, cp)) 351 return; 352 pp = cp->provider; 353 retaste_wait = 0; 354 if (cp->acw == 1) { 355 if ((pp->geom->flags & G_GEOM_WITHER) == 0) 356 retaste_wait = 1; 357 } 358 G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", pp->name, -cp->acr, 359 -cp->acw, -cp->ace, 0); 360 if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0) 361 g_access(cp, -cp->acr, -cp->acw, -cp->ace); 362 if (retaste_wait) { 363 /* 364 * After retaste event was send (inside g_access()), we can send 365 * event to detach and destroy consumer. 366 * A class, which has consumer to the given provider connected 367 * will not receive retaste event for the provider. 368 * This is the way how I ignore retaste events when I close 369 * consumers opened for write: I detach and destroy consumer 370 * after retaste event is sent. 371 */ 372 g_post_event(g_mirror_destroy_consumer, cp, M_WAITOK, NULL); 373 return; 374 } 375 G_MIRROR_DEBUG(1, "Consumer %s destroyed.", pp->name); 376 g_detach(cp); 377 g_destroy_consumer(cp); 378} 379 380static int 381g_mirror_connect_disk(struct g_mirror_disk *disk, struct g_provider *pp) 382{ 383 struct g_consumer *cp; 384 int error; 385 386 g_topology_assert(); 387 KASSERT(disk->d_consumer == NULL, 388 ("Disk already connected (device %s).", disk->d_softc->sc_name)); 389 390 cp = g_new_consumer(disk->d_softc->sc_geom); 391 error = g_attach(cp, pp); 392 if (error != 0) { 393 g_destroy_consumer(cp); 394 return (error); 395 } 396 error = g_access(cp, 1, 1, 1); 397 if (error != 0) { 398 g_detach(cp); 399 g_destroy_consumer(cp); 400 G_MIRROR_DEBUG(0, "Cannot open consumer %s (error=%d).", 401 pp->name, error); 402 return (error); 403 } 404 disk->d_consumer = cp; 405 disk->d_consumer->private = disk; 406 disk->d_consumer->index = 0; 407 408 G_MIRROR_DEBUG(2, "Disk %s connected.", g_mirror_get_diskname(disk)); 409 return (0); 410} 411 412static void 413g_mirror_disconnect_consumer(struct g_mirror_softc *sc, struct g_consumer *cp) 414{ 415 416 g_topology_assert(); 417 418 if (cp == NULL) 419 return; 420 if (cp->provider != NULL) 421 g_mirror_kill_consumer(sc, cp); 422 else 423 g_destroy_consumer(cp); 424} 425 426/* 427 * Initialize disk. This means allocate memory, create consumer, attach it 428 * to the provider and open access (r1w1e1) to it. 429 */ 430static struct g_mirror_disk * 431g_mirror_init_disk(struct g_mirror_softc *sc, struct g_provider *pp, 432 struct g_mirror_metadata *md, int *errorp) 433{ 434 struct g_mirror_disk *disk; 435 int error; 436 437 disk = malloc(sizeof(*disk), M_MIRROR, M_NOWAIT | M_ZERO); 438 if (disk == NULL) { 439 error = ENOMEM; 440 goto fail; 441 } 442 disk->d_softc = sc; 443 error = g_mirror_connect_disk(disk, pp); 444 if (error != 0) 445 goto fail; 446 disk->d_id = md->md_did; 447 disk->d_state = G_MIRROR_DISK_STATE_NONE; 448 disk->d_priority = md->md_priority; 449 disk->d_delay.sec = 0; 450 disk->d_delay.frac = 0; 451 binuptime(&disk->d_last_used); 452 disk->d_flags = md->md_dflags; 453 if (md->md_provider[0] != '\0') 454 disk->d_flags |= G_MIRROR_DISK_FLAG_HARDCODED; 455 disk->d_sync.ds_consumer = NULL; 456 disk->d_sync.ds_offset = md->md_sync_offset; 457 disk->d_sync.ds_offset_done = md->md_sync_offset; 458 disk->d_sync.ds_resync = -1; 459 disk->d_genid = md->md_genid; 460 disk->d_sync.ds_syncid = md->md_syncid; 461 if (errorp != NULL) 462 *errorp = 0; 463 return (disk); 464fail: 465 if (errorp != NULL) 466 *errorp = error; 467 if (disk != NULL) 468 free(disk, M_MIRROR); 469 return (NULL); 470} 471 472static void 473g_mirror_destroy_disk(struct g_mirror_disk *disk) 474{ 475 struct g_mirror_softc *sc; 476 477 g_topology_assert(); 478 479 LIST_REMOVE(disk, d_next); 480 g_mirror_event_cancel(disk); 481 sc = disk->d_softc; 482 if (sc->sc_hint == disk) 483 sc->sc_hint = NULL; 484 switch (disk->d_state) { 485 case G_MIRROR_DISK_STATE_SYNCHRONIZING: 486 g_mirror_sync_stop(disk, 1); 487 /* FALLTHROUGH */ 488 case G_MIRROR_DISK_STATE_NEW: 489 case G_MIRROR_DISK_STATE_STALE: 490 case G_MIRROR_DISK_STATE_ACTIVE: 491 g_mirror_disconnect_consumer(sc, disk->d_consumer); 492 free(disk, M_MIRROR); 493 break; 494 default: 495 KASSERT(0 == 1, ("Wrong disk state (%s, %s).", 496 g_mirror_get_diskname(disk), 497 g_mirror_disk_state2str(disk->d_state))); 498 } 499} 500 501static void 502g_mirror_destroy_device(struct g_mirror_softc *sc) 503{ 504 struct g_mirror_disk *disk; 505 struct g_mirror_event *ep; 506 struct g_geom *gp; 507 struct g_consumer *cp, *tmpcp; 508 509 g_topology_assert(); 510 511 gp = sc->sc_geom; 512 if (sc->sc_provider != NULL) 513 g_mirror_destroy_provider(sc); 514 for (disk = LIST_FIRST(&sc->sc_disks); disk != NULL; 515 disk = LIST_FIRST(&sc->sc_disks)) { 516 disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY; 517 g_mirror_update_metadata(disk); 518 g_mirror_destroy_disk(disk); 519 } 520 while ((ep = g_mirror_event_get(sc)) != NULL) { 521 g_mirror_event_remove(sc, ep); 522 if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0) 523 g_mirror_event_free(ep); 524 else { 525 ep->e_error = ECANCELED; 526 ep->e_flags |= G_MIRROR_EVENT_DONE; 527 G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, ep); 528 mtx_lock(&sc->sc_events_mtx); 529 wakeup(ep); 530 mtx_unlock(&sc->sc_events_mtx); 531 } 532 } 533 callout_drain(&sc->sc_callout); 534 gp->softc = NULL; 535 536 LIST_FOREACH_SAFE(cp, &sc->sc_sync.ds_geom->consumer, consumer, tmpcp) { 537 g_mirror_disconnect_consumer(sc, cp); 538 } 539 sc->sc_sync.ds_geom->softc = NULL; 540 g_wither_geom(sc->sc_sync.ds_geom, ENXIO); 541 mtx_destroy(&sc->sc_queue_mtx); 542 mtx_destroy(&sc->sc_events_mtx); 543 G_MIRROR_DEBUG(0, "Device %s destroyed.", gp->name); 544 g_wither_geom(gp, ENXIO); 545} 546 547static void 548g_mirror_orphan(struct g_consumer *cp) 549{ 550 struct g_mirror_disk *disk; 551 552 g_topology_assert(); 553 554 disk = cp->private; 555 if (disk == NULL) 556 return; 557 disk->d_softc->sc_bump_id |= G_MIRROR_BUMP_SYNCID; 558 g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED, 559 G_MIRROR_EVENT_DONTWAIT); 560} 561 562/* 563 * Function should return the next active disk on the list. 564 * It is possible that it will be the same disk as given. 565 * If there are no active disks on list, NULL is returned. 566 */ 567static __inline struct g_mirror_disk * 568g_mirror_find_next(struct g_mirror_softc *sc, struct g_mirror_disk *disk) 569{ 570 struct g_mirror_disk *dp; 571 572 for (dp = LIST_NEXT(disk, d_next); dp != disk; 573 dp = LIST_NEXT(dp, d_next)) { 574 if (dp == NULL) 575 dp = LIST_FIRST(&sc->sc_disks); 576 if (dp->d_state == G_MIRROR_DISK_STATE_ACTIVE) 577 break; 578 } 579 if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE) 580 return (NULL); 581 return (dp); 582} 583 584static struct g_mirror_disk * 585g_mirror_get_disk(struct g_mirror_softc *sc) 586{ 587 struct g_mirror_disk *disk; 588 589 if (sc->sc_hint == NULL) { 590 sc->sc_hint = LIST_FIRST(&sc->sc_disks); 591 if (sc->sc_hint == NULL) 592 return (NULL); 593 } 594 disk = sc->sc_hint; 595 if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) { 596 disk = g_mirror_find_next(sc, disk); 597 if (disk == NULL) 598 return (NULL); 599 } 600 sc->sc_hint = g_mirror_find_next(sc, disk); 601 return (disk); 602} 603 604static int 605g_mirror_write_metadata(struct g_mirror_disk *disk, 606 struct g_mirror_metadata *md) 607{ 608 struct g_mirror_softc *sc; 609 struct g_consumer *cp; 610 off_t offset, length; 611 u_char *sector; 612 int error = 0; 613 614 g_topology_assert(); 615 616 sc = disk->d_softc; 617 cp = disk->d_consumer; 618 KASSERT(cp != NULL, ("NULL consumer (%s).", sc->sc_name)); 619 KASSERT(cp->provider != NULL, ("NULL provider (%s).", sc->sc_name)); 620 KASSERT(cp->acr == 1 && cp->acw == 1 && cp->ace == 1, 621 ("Consumer %s closed? (r%dw%de%d).", cp->provider->name, cp->acr, 622 cp->acw, cp->ace)); 623 length = cp->provider->sectorsize; 624 offset = cp->provider->mediasize - length; 625 sector = malloc((size_t)length, M_MIRROR, M_WAITOK | M_ZERO); 626 if (md != NULL) 627 mirror_metadata_encode(md, sector); 628 g_topology_unlock(); 629 error = g_write_data(cp, offset, sector, length); 630 g_topology_lock(); 631 free(sector, M_MIRROR); 632 if (error != 0) { 633 if ((disk->d_flags & G_MIRROR_DISK_FLAG_BROKEN) == 0) { 634 disk->d_flags |= G_MIRROR_DISK_FLAG_BROKEN; 635 G_MIRROR_DEBUG(0, "Cannot write metadata on %s " 636 "(device=%s, error=%d).", 637 g_mirror_get_diskname(disk), sc->sc_name, error); 638 } else { 639 G_MIRROR_DEBUG(1, "Cannot write metadata on %s " 640 "(device=%s, error=%d).", 641 g_mirror_get_diskname(disk), sc->sc_name, error); 642 } 643 if (g_mirror_disconnect_on_failure && 644 g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 1) { 645 sc->sc_bump_id |= G_MIRROR_BUMP_GENID; 646 g_mirror_event_send(disk, 647 G_MIRROR_DISK_STATE_DISCONNECTED, 648 G_MIRROR_EVENT_DONTWAIT); 649 } 650 } 651 return (error); 652} 653 654static int 655g_mirror_clear_metadata(struct g_mirror_disk *disk) 656{ 657 int error; 658 659 g_topology_assert(); 660 error = g_mirror_write_metadata(disk, NULL); 661 if (error == 0) { 662 G_MIRROR_DEBUG(2, "Metadata on %s cleared.", 663 g_mirror_get_diskname(disk)); 664 } else { 665 G_MIRROR_DEBUG(0, 666 "Cannot clear metadata on disk %s (error=%d).", 667 g_mirror_get_diskname(disk), error); 668 } 669 return (error); 670} 671 672void 673g_mirror_fill_metadata(struct g_mirror_softc *sc, struct g_mirror_disk *disk, 674 struct g_mirror_metadata *md) 675{ 676 677 strlcpy(md->md_magic, G_MIRROR_MAGIC, sizeof(md->md_magic)); 678 md->md_version = G_MIRROR_VERSION; 679 strlcpy(md->md_name, sc->sc_name, sizeof(md->md_name)); 680 md->md_mid = sc->sc_id; 681 md->md_all = sc->sc_ndisks; 682 md->md_slice = sc->sc_slice; 683 md->md_balance = sc->sc_balance; 684 md->md_genid = sc->sc_genid; 685 md->md_mediasize = sc->sc_mediasize; 686 md->md_sectorsize = sc->sc_sectorsize; 687 md->md_mflags = (sc->sc_flags & G_MIRROR_DEVICE_FLAG_MASK); 688 bzero(md->md_provider, sizeof(md->md_provider)); 689 if (disk == NULL) { 690 md->md_did = arc4random(); 691 md->md_priority = 0; 692 md->md_syncid = 0; 693 md->md_dflags = 0; 694 md->md_sync_offset = 0; 695 md->md_provsize = 0; 696 } else { 697 md->md_did = disk->d_id; 698 md->md_priority = disk->d_priority; 699 md->md_syncid = disk->d_sync.ds_syncid; 700 md->md_dflags = (disk->d_flags & G_MIRROR_DISK_FLAG_MASK); 701 if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) 702 md->md_sync_offset = disk->d_sync.ds_offset_done; 703 else 704 md->md_sync_offset = 0; 705 if ((disk->d_flags & G_MIRROR_DISK_FLAG_HARDCODED) != 0) { 706 strlcpy(md->md_provider, 707 disk->d_consumer->provider->name, 708 sizeof(md->md_provider)); 709 } 710 md->md_provsize = disk->d_consumer->provider->mediasize; 711 } 712} 713 714void 715g_mirror_update_metadata(struct g_mirror_disk *disk) 716{ 717 struct g_mirror_metadata md; 718 int error; 719 720 g_topology_assert(); 721 g_mirror_fill_metadata(disk->d_softc, disk, &md); 722 error = g_mirror_write_metadata(disk, &md); 723 if (error == 0) { 724 G_MIRROR_DEBUG(2, "Metadata on %s updated.", 725 g_mirror_get_diskname(disk)); 726 } else { 727 G_MIRROR_DEBUG(0, 728 "Cannot update metadata on disk %s (error=%d).", 729 g_mirror_get_diskname(disk), error); 730 } 731} 732 733static void 734g_mirror_bump_syncid(struct g_mirror_softc *sc) 735{ 736 struct g_mirror_disk *disk; 737 738 g_topology_assert(); 739 KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0, 740 ("%s called with no active disks (device=%s).", __func__, 741 sc->sc_name)); 742 743 sc->sc_syncid++; 744 G_MIRROR_DEBUG(1, "Device %s: syncid bumped to %u.", sc->sc_name, 745 sc->sc_syncid); 746 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 747 if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE || 748 disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) { 749 disk->d_sync.ds_syncid = sc->sc_syncid; 750 g_mirror_update_metadata(disk); 751 } 752 } 753} 754 755static void 756g_mirror_bump_genid(struct g_mirror_softc *sc) 757{ 758 struct g_mirror_disk *disk; 759 760 g_topology_assert(); 761 KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0, 762 ("%s called with no active disks (device=%s).", __func__, 763 sc->sc_name)); 764 765 sc->sc_genid++; 766 G_MIRROR_DEBUG(1, "Device %s: genid bumped to %u.", sc->sc_name, 767 sc->sc_genid); 768 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 769 if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE || 770 disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) { 771 disk->d_genid = sc->sc_genid; 772 g_mirror_update_metadata(disk); 773 } 774 } 775} 776 777static int 778g_mirror_idle(struct g_mirror_softc *sc, int from_access) 779{ 780 struct g_mirror_disk *disk; 781 int timeout; 782 783 if (sc->sc_provider == NULL) 784 return (0); 785 if (sc->sc_idle) 786 return (0); 787 if (sc->sc_writes > 0) 788 return (0); 789 if (!from_access && sc->sc_provider->acw > 0) { 790 timeout = g_mirror_idletime - (time_uptime - sc->sc_last_write); 791 if (timeout > 0) 792 return (timeout); 793 } 794 sc->sc_idle = 1; 795 if (!from_access) 796 g_topology_lock(); 797 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 798 if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) 799 continue; 800 G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as clean.", 801 g_mirror_get_diskname(disk), sc->sc_name); 802 disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY; 803 g_mirror_update_metadata(disk); 804 } 805 if (!from_access) 806 g_topology_unlock(); 807 return (0); 808} 809 810static void 811g_mirror_unidle(struct g_mirror_softc *sc) 812{ 813 struct g_mirror_disk *disk; 814 815 sc->sc_idle = 0; 816 sc->sc_last_write = time_uptime; 817 g_topology_lock(); 818 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 819 if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) 820 continue; 821 G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as dirty.", 822 g_mirror_get_diskname(disk), sc->sc_name); 823 disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY; 824 g_mirror_update_metadata(disk); 825 } 826 g_topology_unlock(); 827} 828 829static __inline int 830bintime_cmp(struct bintime *bt1, struct bintime *bt2) 831{ 832 833 if (bt1->sec < bt2->sec) 834 return (-1); 835 else if (bt1->sec > bt2->sec) 836 return (1); 837 if (bt1->frac < bt2->frac) 838 return (-1); 839 else if (bt1->frac > bt2->frac) 840 return (1); 841 return (0); 842} 843 844static void 845g_mirror_update_delay(struct g_mirror_disk *disk, struct bio *bp) 846{ 847 848 if (disk->d_softc->sc_balance != G_MIRROR_BALANCE_LOAD) 849 return; 850 binuptime(&disk->d_delay); 851 bintime_sub(&disk->d_delay, &bp->bio_t0); 852} 853 854static void 855g_mirror_done(struct bio *bp) 856{ 857 struct g_mirror_softc *sc; 858 859 sc = bp->bio_from->geom->softc; 860 bp->bio_cflags |= G_MIRROR_BIO_FLAG_REGULAR; 861 mtx_lock(&sc->sc_queue_mtx); 862 bioq_disksort(&sc->sc_queue, bp); 863 wakeup(sc); 864 mtx_unlock(&sc->sc_queue_mtx); 865} 866 867static void 868g_mirror_regular_request(struct bio *bp) 869{ 870 struct g_mirror_softc *sc; 871 struct g_mirror_disk *disk; 872 struct bio *pbp; 873 874 g_topology_assert_not(); 875 876 pbp = bp->bio_parent; 877 sc = pbp->bio_to->geom->softc; 878 bp->bio_from->index--; 879 if (bp->bio_cmd == BIO_WRITE) 880 sc->sc_writes--; 881 disk = bp->bio_from->private; 882 if (disk == NULL) { 883 g_topology_lock(); 884 g_mirror_kill_consumer(sc, bp->bio_from); 885 g_topology_unlock(); 886 } else { 887 g_mirror_update_delay(disk, bp); 888 } 889 890 pbp->bio_inbed++; 891 KASSERT(pbp->bio_inbed <= pbp->bio_children, 892 ("bio_inbed (%u) is bigger than bio_children (%u).", pbp->bio_inbed, 893 pbp->bio_children)); 894 if (bp->bio_error == 0 && pbp->bio_error == 0) { 895 G_MIRROR_LOGREQ(3, bp, "Request delivered."); 896 g_destroy_bio(bp); 897 if (pbp->bio_children == pbp->bio_inbed) { 898 G_MIRROR_LOGREQ(3, pbp, "Request delivered."); 899 pbp->bio_completed = pbp->bio_length; 900 g_io_deliver(pbp, pbp->bio_error); 901 } 902 return; 903 } else if (bp->bio_error != 0) { 904 if (pbp->bio_error == 0) 905 pbp->bio_error = bp->bio_error; 906 if (disk != NULL) { 907 if ((disk->d_flags & G_MIRROR_DISK_FLAG_BROKEN) == 0) { 908 disk->d_flags |= G_MIRROR_DISK_FLAG_BROKEN; 909 G_MIRROR_LOGREQ(0, bp, 910 "Request failed (error=%d).", 911 bp->bio_error); 912 } else { 913 G_MIRROR_LOGREQ(1, bp, 914 "Request failed (error=%d).", 915 bp->bio_error); 916 } 917 if (g_mirror_disconnect_on_failure && 918 g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 1) 919 { 920 sc->sc_bump_id |= G_MIRROR_BUMP_GENID; 921 g_mirror_event_send(disk, 922 G_MIRROR_DISK_STATE_DISCONNECTED, 923 G_MIRROR_EVENT_DONTWAIT); 924 } 925 } 926 switch (pbp->bio_cmd) { 927 case BIO_DELETE: 928 case BIO_WRITE: 929 pbp->bio_inbed--; 930 pbp->bio_children--; 931 break; 932 } 933 } 934 g_destroy_bio(bp); 935 936 switch (pbp->bio_cmd) { 937 case BIO_READ: 938 if (pbp->bio_inbed < pbp->bio_children) 939 break; 940 if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 1) 941 g_io_deliver(pbp, pbp->bio_error); 942 else { 943 pbp->bio_error = 0; 944 mtx_lock(&sc->sc_queue_mtx); 945 bioq_disksort(&sc->sc_queue, pbp); 946 G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc); 947 wakeup(sc); 948 mtx_unlock(&sc->sc_queue_mtx); 949 } 950 break; 951 case BIO_DELETE: 952 case BIO_WRITE: 953 if (pbp->bio_children == 0) { 954 /* 955 * All requests failed. 956 */ 957 } else if (pbp->bio_inbed < pbp->bio_children) { 958 /* Do nothing. */ 959 break; 960 } else if (pbp->bio_children == pbp->bio_inbed) { 961 /* Some requests succeeded. */ 962 pbp->bio_error = 0; 963 pbp->bio_completed = pbp->bio_length; 964 } 965 g_io_deliver(pbp, pbp->bio_error); 966 break; 967 default: 968 KASSERT(1 == 0, ("Invalid request: %u.", pbp->bio_cmd)); 969 break; 970 } 971} 972 973static void 974g_mirror_sync_done(struct bio *bp) 975{ 976 struct g_mirror_softc *sc; 977 978 G_MIRROR_LOGREQ(3, bp, "Synchronization request delivered."); 979 sc = bp->bio_from->geom->softc; 980 bp->bio_cflags |= G_MIRROR_BIO_FLAG_SYNC; 981 mtx_lock(&sc->sc_queue_mtx); 982 bioq_disksort(&sc->sc_queue, bp); 983 wakeup(sc); 984 mtx_unlock(&sc->sc_queue_mtx); 985} 986 987static void 988g_mirror_kernel_dump(struct bio *bp) 989{ 990 struct g_mirror_softc *sc; 991 struct g_mirror_disk *disk; 992 struct bio *cbp; 993 struct g_kerneldump *gkd; 994 995 /* 996 * We configure dumping to the first component, because this component 997 * will be used for reading with 'prefer' balance algorithm. 998 * If the component with the higest priority is currently disconnected 999 * we will not be able to read the dump after the reboot if it will be 1000 * connected and synchronized later. Can we do something better? 1001 */ 1002 sc = bp->bio_to->geom->softc; 1003 disk = LIST_FIRST(&sc->sc_disks); 1004 1005 gkd = (struct g_kerneldump *)bp->bio_data; 1006 if (gkd->length > bp->bio_to->mediasize) 1007 gkd->length = bp->bio_to->mediasize; 1008 cbp = g_clone_bio(bp); 1009 if (cbp == NULL) { 1010 g_io_deliver(bp, ENOMEM); 1011 return; 1012 } 1013 cbp->bio_done = g_std_done; 1014 g_io_request(cbp, disk->d_consumer); 1015 G_MIRROR_DEBUG(1, "Kernel dump will go to %s.", 1016 g_mirror_get_diskname(disk)); 1017} 1018 1019static void 1020g_mirror_start(struct bio *bp) 1021{ 1022 struct g_mirror_softc *sc; 1023 1024 sc = bp->bio_to->geom->softc; 1025 /* 1026 * If sc == NULL or there are no valid disks, provider's error 1027 * should be set and g_mirror_start() should not be called at all. 1028 */ 1029 KASSERT(sc != NULL && sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING, 1030 ("Provider's error should be set (error=%d)(mirror=%s).", 1031 bp->bio_to->error, bp->bio_to->name)); 1032 G_MIRROR_LOGREQ(3, bp, "Request received."); 1033 1034 switch (bp->bio_cmd) { 1035 case BIO_READ: 1036 case BIO_WRITE: 1037 case BIO_DELETE: 1038 break; 1039 case BIO_GETATTR: 1040 if (strcmp("GEOM::kerneldump", bp->bio_attribute) == 0) { 1041 g_mirror_kernel_dump(bp); 1042 return; 1043 } 1044 /* FALLTHROUGH */ 1045 default: 1046 g_io_deliver(bp, EOPNOTSUPP); 1047 return; 1048 } 1049 mtx_lock(&sc->sc_queue_mtx); 1050 bioq_disksort(&sc->sc_queue, bp); 1051 G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc); 1052 wakeup(sc); 1053 mtx_unlock(&sc->sc_queue_mtx); 1054} 1055 1056/* 1057 * Send one synchronization request. 1058 */ 1059static void 1060g_mirror_sync_one(struct g_mirror_disk *disk) 1061{ 1062 struct g_mirror_softc *sc; 1063 struct bio *bp; 1064 1065 sc = disk->d_softc; 1066 KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING, 1067 ("Disk %s is not marked for synchronization.", 1068 g_mirror_get_diskname(disk))); 1069 1070 bp = g_new_bio(); 1071 if (bp == NULL) 1072 return; 1073 bp->bio_parent = NULL; 1074 bp->bio_cmd = BIO_READ; 1075 bp->bio_offset = disk->d_sync.ds_offset; 1076 bp->bio_length = MIN(MAXPHYS, sc->sc_mediasize - bp->bio_offset); 1077 bp->bio_cflags = 0; 1078 bp->bio_done = g_mirror_sync_done; 1079 bp->bio_data = disk->d_sync.ds_data; 1080 if (bp->bio_data == NULL) { 1081 g_destroy_bio(bp); 1082 return; 1083 } 1084 disk->d_sync.ds_offset += bp->bio_length; 1085 bp->bio_to = sc->sc_provider; 1086 G_MIRROR_LOGREQ(3, bp, "Sending synchronization request."); 1087 disk->d_sync.ds_consumer->index++; 1088 g_io_request(bp, disk->d_sync.ds_consumer); 1089} 1090 1091static void 1092g_mirror_sync_request(struct bio *bp) 1093{ 1094 struct g_mirror_softc *sc; 1095 struct g_mirror_disk *disk; 1096 1097 bp->bio_from->index--; 1098 sc = bp->bio_from->geom->softc; 1099 disk = bp->bio_from->private; 1100 if (disk == NULL) { 1101 g_topology_lock(); 1102 g_mirror_kill_consumer(sc, bp->bio_from); 1103 g_topology_unlock(); 1104 g_destroy_bio(bp); 1105 return; 1106 } 1107 1108 /* 1109 * Synchronization request. 1110 */ 1111 switch (bp->bio_cmd) { 1112 case BIO_READ: 1113 { 1114 struct g_consumer *cp; 1115 1116 if (bp->bio_error != 0) { 1117 G_MIRROR_LOGREQ(0, bp, 1118 "Synchronization request failed (error=%d).", 1119 bp->bio_error); 1120 g_destroy_bio(bp); 1121 return; 1122 } 1123 G_MIRROR_LOGREQ(3, bp, 1124 "Synchronization request half-finished."); 1125 bp->bio_cmd = BIO_WRITE; 1126 bp->bio_cflags = 0; 1127 cp = disk->d_consumer; 1128 KASSERT(cp->acr == 1 && cp->acw == 1 && cp->ace == 1, 1129 ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, 1130 cp->acr, cp->acw, cp->ace)); 1131 cp->index++; 1132 g_io_request(bp, cp); 1133 return; 1134 } 1135 case BIO_WRITE: 1136 { 1137 struct g_mirror_disk_sync *sync; 1138 1139 if (bp->bio_error != 0) { 1140 G_MIRROR_LOGREQ(0, bp, 1141 "Synchronization request failed (error=%d).", 1142 bp->bio_error); 1143 g_destroy_bio(bp); 1144 sc->sc_bump_id |= G_MIRROR_BUMP_GENID; 1145 g_mirror_event_send(disk, 1146 G_MIRROR_DISK_STATE_DISCONNECTED, 1147 G_MIRROR_EVENT_DONTWAIT); 1148 return; 1149 } 1150 G_MIRROR_LOGREQ(3, bp, "Synchronization request finished."); 1151 sync = &disk->d_sync; 1152 sync->ds_offset_done = bp->bio_offset + bp->bio_length; 1153 g_destroy_bio(bp); 1154 if (sync->ds_resync != -1) 1155 break; 1156 if (sync->ds_offset_done == sc->sc_provider->mediasize) { 1157 /* 1158 * Disk up-to-date, activate it. 1159 */ 1160 g_mirror_event_send(disk, G_MIRROR_DISK_STATE_ACTIVE, 1161 G_MIRROR_EVENT_DONTWAIT); 1162 return; 1163 } else if (sync->ds_offset_done % (MAXPHYS * 100) == 0) { 1164 /* 1165 * Update offset_done on every 100 blocks. 1166 * XXX: This should be configurable. 1167 */ 1168 g_topology_lock(); 1169 g_mirror_update_metadata(disk); 1170 g_topology_unlock(); 1171 } 1172 return; 1173 } 1174 default: 1175 KASSERT(1 == 0, ("Invalid command here: %u (device=%s)", 1176 bp->bio_cmd, sc->sc_name)); 1177 break; 1178 } 1179} 1180 1181static void 1182g_mirror_request_prefer(struct g_mirror_softc *sc, struct bio *bp) 1183{ 1184 struct g_mirror_disk *disk; 1185 struct g_consumer *cp; 1186 struct bio *cbp; 1187 1188 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 1189 if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE) 1190 break; 1191 } 1192 if (disk == NULL) { 1193 if (bp->bio_error == 0) 1194 bp->bio_error = ENXIO; 1195 g_io_deliver(bp, bp->bio_error); 1196 return; 1197 } 1198 cbp = g_clone_bio(bp); 1199 if (cbp == NULL) { 1200 if (bp->bio_error == 0) 1201 bp->bio_error = ENOMEM; 1202 g_io_deliver(bp, bp->bio_error); 1203 return; 1204 } 1205 /* 1206 * Fill in the component buf structure. 1207 */ 1208 cp = disk->d_consumer; 1209 cbp->bio_done = g_mirror_done; 1210 cbp->bio_to = cp->provider; 1211 G_MIRROR_LOGREQ(3, cbp, "Sending request."); 1212 KASSERT(cp->acr == 1 && cp->acw == 1 && cp->ace == 1, 1213 ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr, 1214 cp->acw, cp->ace)); 1215 cp->index++; 1216 g_io_request(cbp, cp); 1217} 1218 1219static void 1220g_mirror_request_round_robin(struct g_mirror_softc *sc, struct bio *bp) 1221{ 1222 struct g_mirror_disk *disk; 1223 struct g_consumer *cp; 1224 struct bio *cbp; 1225 1226 disk = g_mirror_get_disk(sc); 1227 if (disk == NULL) { 1228 if (bp->bio_error == 0) 1229 bp->bio_error = ENXIO; 1230 g_io_deliver(bp, bp->bio_error); 1231 return; 1232 } 1233 cbp = g_clone_bio(bp); 1234 if (cbp == NULL) { 1235 if (bp->bio_error == 0) 1236 bp->bio_error = ENOMEM; 1237 g_io_deliver(bp, bp->bio_error); 1238 return; 1239 } 1240 /* 1241 * Fill in the component buf structure. 1242 */ 1243 cp = disk->d_consumer; 1244 cbp->bio_done = g_mirror_done; 1245 cbp->bio_to = cp->provider; 1246 G_MIRROR_LOGREQ(3, cbp, "Sending request."); 1247 KASSERT(cp->acr == 1 && cp->acw == 1 && cp->ace == 1, 1248 ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr, 1249 cp->acw, cp->ace)); 1250 cp->index++; 1251 g_io_request(cbp, cp); 1252} 1253 1254static void 1255g_mirror_request_load(struct g_mirror_softc *sc, struct bio *bp) 1256{ 1257 struct g_mirror_disk *disk, *dp; 1258 struct g_consumer *cp; 1259 struct bio *cbp; 1260 struct bintime curtime; 1261 1262 binuptime(&curtime); 1263 /* 1264 * Find a disk which the smallest load. 1265 */ 1266 disk = NULL; 1267 LIST_FOREACH(dp, &sc->sc_disks, d_next) { 1268 if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE) 1269 continue; 1270 /* If disk wasn't used for more than 2 sec, use it. */ 1271 if (curtime.sec - dp->d_last_used.sec >= 2) { 1272 disk = dp; 1273 break; 1274 } 1275 if (disk == NULL || 1276 bintime_cmp(&dp->d_delay, &disk->d_delay) < 0) { 1277 disk = dp; 1278 } 1279 } 1280 KASSERT(disk != NULL, ("NULL disk for %s.", sc->sc_name)); 1281 cbp = g_clone_bio(bp); 1282 if (cbp == NULL) { 1283 if (bp->bio_error == 0) 1284 bp->bio_error = ENOMEM; 1285 g_io_deliver(bp, bp->bio_error); 1286 return; 1287 } 1288 /* 1289 * Fill in the component buf structure. 1290 */ 1291 cp = disk->d_consumer; 1292 cbp->bio_done = g_mirror_done; 1293 cbp->bio_to = cp->provider; 1294 binuptime(&disk->d_last_used); 1295 G_MIRROR_LOGREQ(3, cbp, "Sending request."); 1296 KASSERT(cp->acr == 1 && cp->acw == 1 && cp->ace == 1, 1297 ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr, 1298 cp->acw, cp->ace)); 1299 cp->index++; 1300 g_io_request(cbp, cp); 1301} 1302 1303static void 1304g_mirror_request_split(struct g_mirror_softc *sc, struct bio *bp) 1305{ 1306 struct bio_queue_head queue; 1307 struct g_mirror_disk *disk; 1308 struct g_consumer *cp; 1309 struct bio *cbp; 1310 off_t left, mod, offset, slice; 1311 u_char *data; 1312 u_int ndisks; 1313 1314 if (bp->bio_length <= sc->sc_slice) { 1315 g_mirror_request_round_robin(sc, bp); 1316 return; 1317 } 1318 ndisks = g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE); 1319 slice = bp->bio_length / ndisks; 1320 mod = slice % sc->sc_provider->sectorsize; 1321 if (mod != 0) 1322 slice += sc->sc_provider->sectorsize - mod; 1323 /* 1324 * Allocate all bios before sending any request, so we can 1325 * return ENOMEM in nice and clean way. 1326 */ 1327 left = bp->bio_length; 1328 offset = bp->bio_offset; 1329 data = bp->bio_data; 1330 bioq_init(&queue); 1331 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 1332 if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) 1333 continue; 1334 cbp = g_clone_bio(bp); 1335 if (cbp == NULL) { 1336 for (cbp = bioq_first(&queue); cbp != NULL; 1337 cbp = bioq_first(&queue)) { 1338 bioq_remove(&queue, cbp); 1339 g_destroy_bio(cbp); 1340 } 1341 if (bp->bio_error == 0) 1342 bp->bio_error = ENOMEM; 1343 g_io_deliver(bp, bp->bio_error); 1344 return; 1345 } 1346 bioq_insert_tail(&queue, cbp); 1347 cbp->bio_done = g_mirror_done; 1348 cbp->bio_caller1 = disk; 1349 cbp->bio_to = disk->d_consumer->provider; 1350 cbp->bio_offset = offset; 1351 cbp->bio_data = data; 1352 cbp->bio_length = MIN(left, slice); 1353 left -= cbp->bio_length; 1354 if (left == 0) 1355 break; 1356 offset += cbp->bio_length; 1357 data += cbp->bio_length; 1358 } 1359 for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) { 1360 bioq_remove(&queue, cbp); 1361 G_MIRROR_LOGREQ(3, cbp, "Sending request."); 1362 disk = cbp->bio_caller1; 1363 cbp->bio_caller1 = NULL; 1364 cp = disk->d_consumer; 1365 KASSERT(cp->acr == 1 && cp->acw == 1 && cp->ace == 1, 1366 ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, 1367 cp->acr, cp->acw, cp->ace)); 1368 disk->d_consumer->index++; 1369 g_io_request(cbp, disk->d_consumer); 1370 } 1371} 1372 1373static void 1374g_mirror_register_request(struct bio *bp) 1375{ 1376 struct g_mirror_softc *sc; 1377 1378 sc = bp->bio_to->geom->softc; 1379 switch (bp->bio_cmd) { 1380 case BIO_READ: 1381 switch (sc->sc_balance) { 1382 case G_MIRROR_BALANCE_LOAD: 1383 g_mirror_request_load(sc, bp); 1384 break; 1385 case G_MIRROR_BALANCE_PREFER: 1386 g_mirror_request_prefer(sc, bp); 1387 break; 1388 case G_MIRROR_BALANCE_ROUND_ROBIN: 1389 g_mirror_request_round_robin(sc, bp); 1390 break; 1391 case G_MIRROR_BALANCE_SPLIT: 1392 g_mirror_request_split(sc, bp); 1393 break; 1394 } 1395 return; 1396 case BIO_WRITE: 1397 case BIO_DELETE: 1398 { 1399 struct g_mirror_disk *disk; 1400 struct g_mirror_disk_sync *sync; 1401 struct bio_queue_head queue; 1402 struct g_consumer *cp; 1403 struct bio *cbp; 1404 1405 if (sc->sc_idle) 1406 g_mirror_unidle(sc); 1407 else 1408 sc->sc_last_write = time_uptime; 1409 1410 /* 1411 * Allocate all bios before sending any request, so we can 1412 * return ENOMEM in nice and clean way. 1413 */ 1414 bioq_init(&queue); 1415 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 1416 sync = &disk->d_sync; 1417 switch (disk->d_state) { 1418 case G_MIRROR_DISK_STATE_ACTIVE: 1419 break; 1420 case G_MIRROR_DISK_STATE_SYNCHRONIZING: 1421 if (bp->bio_offset >= sync->ds_offset) 1422 continue; 1423 else if (bp->bio_offset + bp->bio_length > 1424 sync->ds_offset_done && 1425 (bp->bio_offset < sync->ds_resync || 1426 sync->ds_resync == -1)) { 1427 sync->ds_resync = bp->bio_offset - 1428 (bp->bio_offset % MAXPHYS); 1429 } 1430 break; 1431 default: 1432 continue; 1433 } 1434 cbp = g_clone_bio(bp); 1435 if (cbp == NULL) { 1436 for (cbp = bioq_first(&queue); cbp != NULL; 1437 cbp = bioq_first(&queue)) { 1438 bioq_remove(&queue, cbp); 1439 g_destroy_bio(cbp); 1440 } 1441 if (bp->bio_error == 0) 1442 bp->bio_error = ENOMEM; 1443 g_io_deliver(bp, bp->bio_error); 1444 return; 1445 } 1446 bioq_insert_tail(&queue, cbp); 1447 cbp->bio_done = g_mirror_done; 1448 cp = disk->d_consumer; 1449 cbp->bio_caller1 = cp; 1450 cbp->bio_to = cp->provider; 1451 KASSERT(cp->acr == 1 && cp->acw == 1 && cp->ace == 1, 1452 ("Consumer %s not opened (r%dw%de%d).", 1453 cp->provider->name, cp->acr, cp->acw, cp->ace)); 1454 } 1455 for (cbp = bioq_first(&queue); cbp != NULL; 1456 cbp = bioq_first(&queue)) { 1457 bioq_remove(&queue, cbp); 1458 G_MIRROR_LOGREQ(3, cbp, "Sending request."); 1459 cp = cbp->bio_caller1; 1460 cbp->bio_caller1 = NULL; 1461 cp->index++; 1462 sc->sc_writes++; 1463 g_io_request(cbp, cp); 1464 } 1465 /* 1466 * Bump syncid on first write. 1467 */ 1468 if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID) != 0) { 1469 sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID; 1470 g_topology_lock(); 1471 g_mirror_bump_syncid(sc); 1472 g_topology_unlock(); 1473 } 1474 return; 1475 } 1476 default: 1477 KASSERT(1 == 0, ("Invalid command here: %u (device=%s)", 1478 bp->bio_cmd, sc->sc_name)); 1479 break; 1480 } 1481} 1482 1483static int 1484g_mirror_can_destroy(struct g_mirror_softc *sc) 1485{ 1486 struct g_geom *gp; 1487 struct g_consumer *cp; 1488 1489 g_topology_assert(); 1490 gp = sc->sc_geom; 1491 LIST_FOREACH(cp, &gp->consumer, consumer) { 1492 if (g_mirror_is_busy(sc, cp)) 1493 return (0); 1494 } 1495 gp = sc->sc_sync.ds_geom; 1496 LIST_FOREACH(cp, &gp->consumer, consumer) { 1497 if (g_mirror_is_busy(sc, cp)) 1498 return (0); 1499 } 1500 G_MIRROR_DEBUG(2, "No I/O requests for %s, it can be destroyed.", 1501 sc->sc_name); 1502 return (1); 1503} 1504 1505static int 1506g_mirror_try_destroy(struct g_mirror_softc *sc) 1507{ 1508 1509 if (sc->sc_rootmount != NULL) { 1510 G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", __LINE__, 1511 sc->sc_rootmount); 1512 root_mount_rel(sc->sc_rootmount); 1513 sc->sc_rootmount = NULL; 1514 } 1515 g_topology_lock(); 1516 if (!g_mirror_can_destroy(sc)) { 1517 g_topology_unlock(); 1518 return (0); 1519 } 1520 if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_WAIT) != 0) { 1521 g_topology_unlock(); 1522 G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, 1523 &sc->sc_worker); 1524 wakeup(&sc->sc_worker); 1525 sc->sc_worker = NULL; 1526 } else { 1527 g_mirror_destroy_device(sc); 1528 g_topology_unlock(); 1529 free(sc, M_MIRROR); 1530 } 1531 return (1); 1532} 1533 1534/* 1535 * Worker thread. 1536 */ 1537static void 1538g_mirror_worker(void *arg) 1539{ 1540 struct g_mirror_softc *sc; 1541 struct g_mirror_disk *disk; 1542 struct g_mirror_disk_sync *sync; 1543 struct g_mirror_event *ep; 1544 struct bio *bp; 1545 u_int nreqs; 1546 int timeout; 1547 1548 sc = arg; 1549 mtx_lock_spin(&sched_lock); 1550 sched_prio(curthread, PRIBIO); 1551 mtx_unlock_spin(&sched_lock); 1552 1553 nreqs = 0; 1554 for (;;) { 1555 G_MIRROR_DEBUG(5, "%s: Let's see...", __func__); 1556 /* 1557 * First take a look at events. 1558 * This is important to handle events before any I/O requests. 1559 */ 1560 ep = g_mirror_event_get(sc); 1561 if (ep != NULL && g_topology_try_lock()) { 1562 g_mirror_event_remove(sc, ep); 1563 if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0) { 1564 /* Update only device status. */ 1565 G_MIRROR_DEBUG(3, 1566 "Running event for device %s.", 1567 sc->sc_name); 1568 ep->e_error = 0; 1569 g_mirror_update_device(sc, 1); 1570 } else { 1571 /* Update disk status. */ 1572 G_MIRROR_DEBUG(3, "Running event for disk %s.", 1573 g_mirror_get_diskname(ep->e_disk)); 1574 ep->e_error = g_mirror_update_disk(ep->e_disk, 1575 ep->e_state); 1576 if (ep->e_error == 0) 1577 g_mirror_update_device(sc, 0); 1578 } 1579 g_topology_unlock(); 1580 if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0) { 1581 KASSERT(ep->e_error == 0, 1582 ("Error cannot be handled.")); 1583 g_mirror_event_free(ep); 1584 } else { 1585 ep->e_flags |= G_MIRROR_EVENT_DONE; 1586 G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, 1587 ep); 1588 mtx_lock(&sc->sc_events_mtx); 1589 wakeup(ep); 1590 mtx_unlock(&sc->sc_events_mtx); 1591 } 1592 if ((sc->sc_flags & 1593 G_MIRROR_DEVICE_FLAG_DESTROY) != 0) { 1594 if (g_mirror_try_destroy(sc)) 1595 kthread_exit(0); 1596 } 1597 G_MIRROR_DEBUG(5, "%s: I'm here 1.", __func__); 1598 continue; 1599 } 1600 /* 1601 * Check if we can mark array as CLEAN and if we can't take 1602 * how much seconds should we wait. 1603 */ 1604 timeout = g_mirror_idle(sc, 0); 1605 /* 1606 * Now I/O requests. 1607 */ 1608 /* Get first request from the queue. */ 1609 mtx_lock(&sc->sc_queue_mtx); 1610 bp = bioq_first(&sc->sc_queue); 1611 if (bp == NULL) { 1612 if (ep != NULL) { 1613 /* 1614 * No I/O requests and topology lock was 1615 * already held? Try again. 1616 */ 1617 mtx_unlock(&sc->sc_queue_mtx); 1618 continue; 1619 } 1620 if ((sc->sc_flags & 1621 G_MIRROR_DEVICE_FLAG_DESTROY) != 0) { 1622 mtx_unlock(&sc->sc_queue_mtx); 1623 if (g_mirror_try_destroy(sc)) 1624 kthread_exit(0); 1625 mtx_lock(&sc->sc_queue_mtx); 1626 } 1627 } 1628 if (sc->sc_sync.ds_ndisks > 0 && 1629 (bp == NULL || nreqs > g_mirror_reqs_per_sync)) { 1630 mtx_unlock(&sc->sc_queue_mtx); 1631 /* 1632 * It is time for synchronization... 1633 */ 1634 nreqs = 0; 1635 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 1636 if (disk->d_state != 1637 G_MIRROR_DISK_STATE_SYNCHRONIZING) { 1638 continue; 1639 } 1640 sync = &disk->d_sync; 1641 if (sync->ds_offset >= 1642 sc->sc_provider->mediasize) { 1643 continue; 1644 } 1645 if (sync->ds_offset > sync->ds_offset_done) 1646 continue; 1647 if (sync->ds_resync != -1) { 1648 sync->ds_offset = sync->ds_resync; 1649 sync->ds_offset_done = sync->ds_resync; 1650 sync->ds_resync = -1; 1651 } 1652 g_mirror_sync_one(disk); 1653 } 1654 G_MIRROR_DEBUG(5, "%s: I'm here 2.", __func__); 1655 goto sleep; 1656 } 1657 if (bp == NULL) { 1658 MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "m:w1", 1659 timeout * hz); 1660 G_MIRROR_DEBUG(5, "%s: I'm here 4.", __func__); 1661 continue; 1662 } 1663 nreqs++; 1664 bioq_remove(&sc->sc_queue, bp); 1665 mtx_unlock(&sc->sc_queue_mtx); 1666 1667 if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_REGULAR) != 0) { 1668 g_mirror_regular_request(bp); 1669 } else if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0) { 1670 u_int timeout, sps; 1671 1672 g_mirror_sync_request(bp); 1673sleep: 1674 sps = g_mirror_syncs_per_sec; 1675 if (sps == 0) { 1676 G_MIRROR_DEBUG(5, "%s: I'm here 6.", __func__); 1677 continue; 1678 } 1679 if (ep != NULL) { 1680 /* 1681 * We have some pending events, don't sleep now. 1682 */ 1683 G_MIRROR_DEBUG(5, "%s: I'm here 7.", __func__); 1684 continue; 1685 } 1686 mtx_lock(&sc->sc_queue_mtx); 1687 if (bioq_first(&sc->sc_queue) != NULL) { 1688 mtx_unlock(&sc->sc_queue_mtx); 1689 G_MIRROR_DEBUG(5, "%s: I'm here 8.", __func__); 1690 continue; 1691 } 1692 timeout = hz / sps; 1693 if (timeout == 0) 1694 timeout = 1; 1695 MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "m:w3", 1696 timeout); 1697 } else { 1698 g_mirror_register_request(bp); 1699 } 1700 G_MIRROR_DEBUG(5, "%s: I'm here 9.", __func__); 1701 } 1702} 1703 1704static void 1705g_mirror_update_idle(struct g_mirror_softc *sc, struct g_mirror_disk *disk) 1706{ 1707 1708 g_topology_assert(); 1709 if (!sc->sc_idle && (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) == 0) { 1710 G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as dirty.", 1711 g_mirror_get_diskname(disk), disk->d_softc->sc_name); 1712 disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY; 1713 } else if (sc->sc_idle && 1714 (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) { 1715 G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as clean.", 1716 g_mirror_get_diskname(disk), disk->d_softc->sc_name); 1717 disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY; 1718 } 1719} 1720 1721static void 1722g_mirror_sync_start(struct g_mirror_disk *disk) 1723{ 1724 struct g_mirror_softc *sc; 1725 int error; 1726 1727 g_topology_assert(); 1728 1729 sc = disk->d_softc; 1730 KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING, 1731 ("Device not in RUNNING state (%s, %u).", sc->sc_name, 1732 sc->sc_state)); 1733 1734 G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s.", sc->sc_name, 1735 g_mirror_get_diskname(disk)); 1736 disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY; 1737 KASSERT(disk->d_sync.ds_consumer == NULL, 1738 ("Sync consumer already exists (device=%s, disk=%s).", 1739 sc->sc_name, g_mirror_get_diskname(disk))); 1740 disk->d_sync.ds_consumer = g_new_consumer(sc->sc_sync.ds_geom); 1741 disk->d_sync.ds_consumer->private = disk; 1742 disk->d_sync.ds_consumer->index = 0; 1743 error = g_attach(disk->d_sync.ds_consumer, disk->d_softc->sc_provider); 1744 KASSERT(error == 0, ("Cannot attach to %s (error=%d).", 1745 disk->d_softc->sc_name, error)); 1746 error = g_access(disk->d_sync.ds_consumer, 1, 0, 0); 1747 KASSERT(error == 0, ("Cannot open %s (error=%d).", 1748 disk->d_softc->sc_name, error)); 1749 disk->d_sync.ds_data = malloc(MAXPHYS, M_MIRROR, M_WAITOK); 1750 sc->sc_sync.ds_ndisks++; 1751} 1752 1753/* 1754 * Stop synchronization process. 1755 * type: 0 - synchronization finished 1756 * 1 - synchronization stopped 1757 */ 1758static void 1759g_mirror_sync_stop(struct g_mirror_disk *disk, int type) 1760{ 1761 1762 g_topology_assert(); 1763 KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING, 1764 ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk), 1765 g_mirror_disk_state2str(disk->d_state))); 1766 if (disk->d_sync.ds_consumer == NULL) 1767 return; 1768 1769 if (type == 0) { 1770 G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s finished.", 1771 disk->d_softc->sc_name, g_mirror_get_diskname(disk)); 1772 } else /* if (type == 1) */ { 1773 G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s stopped.", 1774 disk->d_softc->sc_name, g_mirror_get_diskname(disk)); 1775 } 1776 g_mirror_kill_consumer(disk->d_softc, disk->d_sync.ds_consumer); 1777 free(disk->d_sync.ds_data, M_MIRROR); 1778 disk->d_sync.ds_consumer = NULL; 1779 disk->d_softc->sc_sync.ds_ndisks--; 1780 disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY; 1781} 1782 1783static void 1784g_mirror_launch_provider(struct g_mirror_softc *sc) 1785{ 1786 struct g_mirror_disk *disk; 1787 struct g_provider *pp; 1788 1789 g_topology_assert(); 1790 1791 pp = g_new_providerf(sc->sc_geom, "mirror/%s", sc->sc_name); 1792 pp->mediasize = sc->sc_mediasize; 1793 pp->sectorsize = sc->sc_sectorsize; 1794 sc->sc_provider = pp; 1795 g_error_provider(pp, 0); 1796 G_MIRROR_DEBUG(0, "Device %s: provider %s launched.", sc->sc_name, 1797 pp->name); 1798 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 1799 if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) 1800 g_mirror_sync_start(disk); 1801 } 1802} 1803 1804static void 1805g_mirror_destroy_provider(struct g_mirror_softc *sc) 1806{ 1807 struct g_mirror_disk *disk; 1808 struct bio *bp; 1809 1810 g_topology_assert(); 1811 KASSERT(sc->sc_provider != NULL, ("NULL provider (device=%s).", 1812 sc->sc_name)); 1813 1814 g_error_provider(sc->sc_provider, ENXIO); 1815 mtx_lock(&sc->sc_queue_mtx); 1816 while ((bp = bioq_first(&sc->sc_queue)) != NULL) { 1817 bioq_remove(&sc->sc_queue, bp); 1818 g_io_deliver(bp, ENXIO); 1819 } 1820 mtx_unlock(&sc->sc_queue_mtx); 1821 G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.", sc->sc_name, 1822 sc->sc_provider->name); 1823 sc->sc_provider->flags |= G_PF_WITHER; 1824 g_orphan_provider(sc->sc_provider, ENXIO); 1825 sc->sc_provider = NULL; 1826 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 1827 if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) 1828 g_mirror_sync_stop(disk, 1); 1829 } 1830} 1831 1832static void 1833g_mirror_go(void *arg) 1834{ 1835 struct g_mirror_softc *sc; 1836 1837 sc = arg; 1838 G_MIRROR_DEBUG(0, "Force device %s start due to timeout.", sc->sc_name); 1839 g_mirror_event_send(sc, 0, 1840 G_MIRROR_EVENT_DONTWAIT | G_MIRROR_EVENT_DEVICE); 1841} 1842 1843static u_int 1844g_mirror_determine_state(struct g_mirror_disk *disk) 1845{ 1846 struct g_mirror_softc *sc; 1847 u_int state; 1848 1849 sc = disk->d_softc; 1850 if (sc->sc_syncid == disk->d_sync.ds_syncid) { 1851 if ((disk->d_flags & 1852 G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) { 1853 /* Disk does not need synchronization. */ 1854 state = G_MIRROR_DISK_STATE_ACTIVE; 1855 } else { 1856 if ((sc->sc_flags & 1857 G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 || 1858 (disk->d_flags & 1859 G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) { 1860 /* 1861 * We can start synchronization from 1862 * the stored offset. 1863 */ 1864 state = G_MIRROR_DISK_STATE_SYNCHRONIZING; 1865 } else { 1866 state = G_MIRROR_DISK_STATE_STALE; 1867 } 1868 } 1869 } else if (disk->d_sync.ds_syncid < sc->sc_syncid) { 1870 /* 1871 * Reset all synchronization data for this disk, 1872 * because if it even was synchronized, it was 1873 * synchronized to disks with different syncid. 1874 */ 1875 disk->d_flags |= G_MIRROR_DISK_FLAG_SYNCHRONIZING; 1876 disk->d_sync.ds_offset = 0; 1877 disk->d_sync.ds_offset_done = 0; 1878 disk->d_sync.ds_syncid = sc->sc_syncid; 1879 if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 || 1880 (disk->d_flags & G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) { 1881 state = G_MIRROR_DISK_STATE_SYNCHRONIZING; 1882 } else { 1883 state = G_MIRROR_DISK_STATE_STALE; 1884 } 1885 } else /* if (sc->sc_syncid < disk->d_sync.ds_syncid) */ { 1886 /* 1887 * Not good, NOT GOOD! 1888 * It means that mirror was started on stale disks 1889 * and more fresh disk just arrive. 1890 * If there were writes, mirror is fucked up, sorry. 1891 * I think the best choice here is don't touch 1892 * this disk and inform the user laudly. 1893 */ 1894 G_MIRROR_DEBUG(0, "Device %s was started before the freshest " 1895 "disk (%s) arrives!! It will not be connected to the " 1896 "running device.", sc->sc_name, 1897 g_mirror_get_diskname(disk)); 1898 g_mirror_destroy_disk(disk); 1899 state = G_MIRROR_DISK_STATE_NONE; 1900 /* Return immediately, because disk was destroyed. */ 1901 return (state); 1902 } 1903 G_MIRROR_DEBUG(3, "State for %s disk: %s.", 1904 g_mirror_get_diskname(disk), g_mirror_disk_state2str(state)); 1905 return (state); 1906} 1907 1908/* 1909 * Update device state. 1910 */ 1911static void 1912g_mirror_update_device(struct g_mirror_softc *sc, boolean_t force) 1913{ 1914 struct g_mirror_disk *disk; 1915 u_int state; 1916 1917 g_topology_assert(); 1918 1919 switch (sc->sc_state) { 1920 case G_MIRROR_DEVICE_STATE_STARTING: 1921 { 1922 struct g_mirror_disk *pdisk, *tdisk; 1923 u_int dirty, ndisks, genid, syncid; 1924 1925 KASSERT(sc->sc_provider == NULL, 1926 ("Non-NULL provider in STARTING state (%s).", sc->sc_name)); 1927 /* 1928 * Are we ready? We are, if all disks are connected or 1929 * if we have any disks and 'force' is true. 1930 */ 1931 if ((force && g_mirror_ndisks(sc, -1) > 0) || 1932 sc->sc_ndisks == g_mirror_ndisks(sc, -1)) { 1933 ; 1934 } else if (g_mirror_ndisks(sc, -1) == 0) { 1935 /* 1936 * Disks went down in starting phase, so destroy 1937 * device. 1938 */ 1939 callout_drain(&sc->sc_callout); 1940 sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY; 1941 G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", __LINE__, 1942 sc->sc_rootmount); 1943 root_mount_rel(sc->sc_rootmount); 1944 sc->sc_rootmount = NULL; 1945 return; 1946 } else { 1947 return; 1948 } 1949 1950 /* 1951 * Activate all disks with the biggest syncid. 1952 */ 1953 if (force) { 1954 /* 1955 * If 'force' is true, we have been called due to 1956 * timeout, so don't bother canceling timeout. 1957 */ 1958 ndisks = 0; 1959 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 1960 if ((disk->d_flags & 1961 G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) { 1962 ndisks++; 1963 } 1964 } 1965 if (ndisks == 0) { 1966 /* No valid disks found, destroy device. */ 1967 sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY; 1968 G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", 1969 __LINE__, sc->sc_rootmount); 1970 root_mount_rel(sc->sc_rootmount); 1971 sc->sc_rootmount = NULL; 1972 return; 1973 } 1974 } else { 1975 /* Cancel timeout. */ 1976 callout_drain(&sc->sc_callout); 1977 } 1978 1979 /* 1980 * Find the biggest genid. 1981 */ 1982 genid = 0; 1983 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 1984 if (disk->d_genid > genid) 1985 genid = disk->d_genid; 1986 } 1987 sc->sc_genid = genid; 1988 /* 1989 * Remove all disks without the biggest genid. 1990 */ 1991 LIST_FOREACH_SAFE(disk, &sc->sc_disks, d_next, tdisk) { 1992 if (disk->d_genid < genid) { 1993 G_MIRROR_DEBUG(0, 1994 "Component %s (device %s) broken, skipping.", 1995 g_mirror_get_diskname(disk), sc->sc_name); 1996 g_mirror_destroy_disk(disk); 1997 } 1998 } 1999 2000 /* 2001 * Find the biggest syncid. 2002 */ 2003 syncid = 0; 2004 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 2005 if (disk->d_sync.ds_syncid > syncid) 2006 syncid = disk->d_sync.ds_syncid; 2007 } 2008 2009 /* 2010 * Here we need to look for dirty disks and if all disks 2011 * with the biggest syncid are dirty, we have to choose 2012 * one with the biggest priority and rebuild the rest. 2013 */ 2014 /* 2015 * Find the number of dirty disks with the biggest syncid. 2016 * Find the number of disks with the biggest syncid. 2017 * While here, find a disk with the biggest priority. 2018 */ 2019 dirty = ndisks = 0; 2020 pdisk = NULL; 2021 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 2022 if (disk->d_sync.ds_syncid != syncid) 2023 continue; 2024 if ((disk->d_flags & 2025 G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) { 2026 continue; 2027 } 2028 ndisks++; 2029 if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) { 2030 dirty++; 2031 if (pdisk == NULL || 2032 pdisk->d_priority < disk->d_priority) { 2033 pdisk = disk; 2034 } 2035 } 2036 } 2037 if (dirty == 0) { 2038 /* No dirty disks at all, great. */ 2039 } else if (dirty == ndisks) { 2040 /* 2041 * Force synchronization for all dirty disks except one 2042 * with the biggest priority. 2043 */ 2044 KASSERT(pdisk != NULL, ("pdisk == NULL")); 2045 G_MIRROR_DEBUG(1, "Using disk %s (device %s) as a " 2046 "master disk for synchronization.", 2047 g_mirror_get_diskname(pdisk), sc->sc_name); 2048 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 2049 if (disk->d_sync.ds_syncid != syncid) 2050 continue; 2051 if ((disk->d_flags & 2052 G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) { 2053 continue; 2054 } 2055 KASSERT((disk->d_flags & 2056 G_MIRROR_DISK_FLAG_DIRTY) != 0, 2057 ("Disk %s isn't marked as dirty.", 2058 g_mirror_get_diskname(disk))); 2059 /* Skip the disk with the biggest priority. */ 2060 if (disk == pdisk) 2061 continue; 2062 disk->d_sync.ds_syncid = 0; 2063 } 2064 } else if (dirty < ndisks) { 2065 /* 2066 * Force synchronization for all dirty disks. 2067 * We have some non-dirty disks. 2068 */ 2069 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 2070 if (disk->d_sync.ds_syncid != syncid) 2071 continue; 2072 if ((disk->d_flags & 2073 G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) { 2074 continue; 2075 } 2076 if ((disk->d_flags & 2077 G_MIRROR_DISK_FLAG_DIRTY) == 0) { 2078 continue; 2079 } 2080 disk->d_sync.ds_syncid = 0; 2081 } 2082 } 2083 2084 /* Reset hint. */ 2085 sc->sc_hint = NULL; 2086 sc->sc_syncid = syncid; 2087 if (force) { 2088 /* Remember to bump syncid on first write. */ 2089 sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID; 2090 } 2091 state = G_MIRROR_DEVICE_STATE_RUNNING; 2092 G_MIRROR_DEBUG(1, "Device %s state changed from %s to %s.", 2093 sc->sc_name, g_mirror_device_state2str(sc->sc_state), 2094 g_mirror_device_state2str(state)); 2095 sc->sc_state = state; 2096 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 2097 state = g_mirror_determine_state(disk); 2098 g_mirror_event_send(disk, state, 2099 G_MIRROR_EVENT_DONTWAIT); 2100 if (state == G_MIRROR_DISK_STATE_STALE) 2101 sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID; 2102 } 2103 break; 2104 } 2105 case G_MIRROR_DEVICE_STATE_RUNNING: 2106 if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 0 && 2107 g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) { 2108 /* 2109 * No active disks or no disks at all, 2110 * so destroy device. 2111 */ 2112 if (sc->sc_provider != NULL) 2113 g_mirror_destroy_provider(sc); 2114 sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY; 2115 break; 2116 } else if (g_mirror_ndisks(sc, 2117 G_MIRROR_DISK_STATE_ACTIVE) > 0 && 2118 g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) { 2119 /* 2120 * We have active disks, launch provider if it doesn't 2121 * exist. 2122 */ 2123 if (sc->sc_provider == NULL) 2124 g_mirror_launch_provider(sc); 2125 if (sc->sc_rootmount != NULL) { 2126 G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", 2127 __LINE__, sc->sc_rootmount); 2128 root_mount_rel(sc->sc_rootmount); 2129 sc->sc_rootmount = NULL; 2130 } 2131 } 2132 /* 2133 * Genid should be bumped immediately, so do it here. 2134 */ 2135 if ((sc->sc_bump_id & G_MIRROR_BUMP_GENID) != 0) { 2136 sc->sc_bump_id &= ~G_MIRROR_BUMP_GENID; 2137 g_mirror_bump_genid(sc); 2138 } 2139 break; 2140 default: 2141 KASSERT(1 == 0, ("Wrong device state (%s, %s).", 2142 sc->sc_name, g_mirror_device_state2str(sc->sc_state))); 2143 break; 2144 } 2145} 2146 2147/* 2148 * Update disk state and device state if needed. 2149 */ 2150#define DISK_STATE_CHANGED() G_MIRROR_DEBUG(1, \ 2151 "Disk %s state changed from %s to %s (device %s).", \ 2152 g_mirror_get_diskname(disk), \ 2153 g_mirror_disk_state2str(disk->d_state), \ 2154 g_mirror_disk_state2str(state), sc->sc_name) 2155static int 2156g_mirror_update_disk(struct g_mirror_disk *disk, u_int state) 2157{ 2158 struct g_mirror_softc *sc; 2159 2160 g_topology_assert(); 2161 2162 sc = disk->d_softc; 2163again: 2164 G_MIRROR_DEBUG(3, "Changing disk %s state from %s to %s.", 2165 g_mirror_get_diskname(disk), g_mirror_disk_state2str(disk->d_state), 2166 g_mirror_disk_state2str(state)); 2167 switch (state) { 2168 case G_MIRROR_DISK_STATE_NEW: 2169 /* 2170 * Possible scenarios: 2171 * 1. New disk arrive. 2172 */ 2173 /* Previous state should be NONE. */ 2174 KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NONE, 2175 ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk), 2176 g_mirror_disk_state2str(disk->d_state))); 2177 DISK_STATE_CHANGED(); 2178 2179 disk->d_state = state; 2180 if (LIST_EMPTY(&sc->sc_disks)) 2181 LIST_INSERT_HEAD(&sc->sc_disks, disk, d_next); 2182 else { 2183 struct g_mirror_disk *dp; 2184 2185 LIST_FOREACH(dp, &sc->sc_disks, d_next) { 2186 if (disk->d_priority >= dp->d_priority) { 2187 LIST_INSERT_BEFORE(dp, disk, d_next); 2188 dp = NULL; 2189 break; 2190 } 2191 if (LIST_NEXT(dp, d_next) == NULL) 2192 break; 2193 } 2194 if (dp != NULL) 2195 LIST_INSERT_AFTER(dp, disk, d_next); 2196 } 2197 G_MIRROR_DEBUG(0, "Device %s: provider %s detected.", 2198 sc->sc_name, g_mirror_get_diskname(disk)); 2199 if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING) 2200 break; 2201 KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING, 2202 ("Wrong device state (%s, %s, %s, %s).", sc->sc_name, 2203 g_mirror_device_state2str(sc->sc_state), 2204 g_mirror_get_diskname(disk), 2205 g_mirror_disk_state2str(disk->d_state))); 2206 state = g_mirror_determine_state(disk); 2207 if (state != G_MIRROR_DISK_STATE_NONE) 2208 goto again; 2209 break; 2210 case G_MIRROR_DISK_STATE_ACTIVE: 2211 /* 2212 * Possible scenarios: 2213 * 1. New disk does not need synchronization. 2214 * 2. Synchronization process finished successfully. 2215 */ 2216 KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING, 2217 ("Wrong device state (%s, %s, %s, %s).", sc->sc_name, 2218 g_mirror_device_state2str(sc->sc_state), 2219 g_mirror_get_diskname(disk), 2220 g_mirror_disk_state2str(disk->d_state))); 2221 /* Previous state should be NEW or SYNCHRONIZING. */ 2222 KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW || 2223 disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING, 2224 ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk), 2225 g_mirror_disk_state2str(disk->d_state))); 2226 DISK_STATE_CHANGED(); 2227 2228 if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) { 2229 disk->d_flags &= ~G_MIRROR_DISK_FLAG_SYNCHRONIZING; 2230 disk->d_flags &= ~G_MIRROR_DISK_FLAG_FORCE_SYNC; 2231 g_mirror_sync_stop(disk, 0); 2232 } 2233 disk->d_state = state; 2234 disk->d_sync.ds_offset = 0; 2235 disk->d_sync.ds_offset_done = 0; 2236 g_mirror_update_idle(sc, disk); 2237 g_mirror_update_metadata(disk); 2238 G_MIRROR_DEBUG(0, "Device %s: provider %s activated.", 2239 sc->sc_name, g_mirror_get_diskname(disk)); 2240 break; 2241 case G_MIRROR_DISK_STATE_STALE: 2242 /* 2243 * Possible scenarios: 2244 * 1. Stale disk was connected. 2245 */ 2246 /* Previous state should be NEW. */ 2247 KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW, 2248 ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk), 2249 g_mirror_disk_state2str(disk->d_state))); 2250 KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING, 2251 ("Wrong device state (%s, %s, %s, %s).", sc->sc_name, 2252 g_mirror_device_state2str(sc->sc_state), 2253 g_mirror_get_diskname(disk), 2254 g_mirror_disk_state2str(disk->d_state))); 2255 /* 2256 * STALE state is only possible if device is marked 2257 * NOAUTOSYNC. 2258 */ 2259 KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) != 0, 2260 ("Wrong device state (%s, %s, %s, %s).", sc->sc_name, 2261 g_mirror_device_state2str(sc->sc_state), 2262 g_mirror_get_diskname(disk), 2263 g_mirror_disk_state2str(disk->d_state))); 2264 DISK_STATE_CHANGED(); 2265 2266 disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY; 2267 disk->d_state = state; 2268 g_mirror_update_metadata(disk); 2269 G_MIRROR_DEBUG(0, "Device %s: provider %s is stale.", 2270 sc->sc_name, g_mirror_get_diskname(disk)); 2271 break; 2272 case G_MIRROR_DISK_STATE_SYNCHRONIZING: 2273 /* 2274 * Possible scenarios: 2275 * 1. Disk which needs synchronization was connected. 2276 */ 2277 /* Previous state should be NEW. */ 2278 KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW, 2279 ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk), 2280 g_mirror_disk_state2str(disk->d_state))); 2281 KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING, 2282 ("Wrong device state (%s, %s, %s, %s).", sc->sc_name, 2283 g_mirror_device_state2str(sc->sc_state), 2284 g_mirror_get_diskname(disk), 2285 g_mirror_disk_state2str(disk->d_state))); 2286 DISK_STATE_CHANGED(); 2287 2288 if (disk->d_state == G_MIRROR_DISK_STATE_NEW) 2289 disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY; 2290 disk->d_state = state; 2291 if (sc->sc_provider != NULL) { 2292 g_mirror_sync_start(disk); 2293 g_mirror_update_metadata(disk); 2294 } 2295 break; 2296 case G_MIRROR_DISK_STATE_DISCONNECTED: 2297 /* 2298 * Possible scenarios: 2299 * 1. Device wasn't running yet, but disk disappear. 2300 * 2. Disk was active and disapppear. 2301 * 3. Disk disappear during synchronization process. 2302 */ 2303 if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING) { 2304 /* 2305 * Previous state should be ACTIVE, STALE or 2306 * SYNCHRONIZING. 2307 */ 2308 KASSERT(disk->d_state == G_MIRROR_DISK_STATE_ACTIVE || 2309 disk->d_state == G_MIRROR_DISK_STATE_STALE || 2310 disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING, 2311 ("Wrong disk state (%s, %s).", 2312 g_mirror_get_diskname(disk), 2313 g_mirror_disk_state2str(disk->d_state))); 2314 } else if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING) { 2315 /* Previous state should be NEW. */ 2316 KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW, 2317 ("Wrong disk state (%s, %s).", 2318 g_mirror_get_diskname(disk), 2319 g_mirror_disk_state2str(disk->d_state))); 2320 /* 2321 * Reset bumping syncid if disk disappeared in STARTING 2322 * state. 2323 */ 2324 if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID) != 0) 2325 sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID; 2326#ifdef INVARIANTS 2327 } else { 2328 KASSERT(1 == 0, ("Wrong device state (%s, %s, %s, %s).", 2329 sc->sc_name, 2330 g_mirror_device_state2str(sc->sc_state), 2331 g_mirror_get_diskname(disk), 2332 g_mirror_disk_state2str(disk->d_state))); 2333#endif 2334 } 2335 DISK_STATE_CHANGED(); 2336 G_MIRROR_DEBUG(0, "Device %s: provider %s disconnected.", 2337 sc->sc_name, g_mirror_get_diskname(disk)); 2338 2339 g_mirror_destroy_disk(disk); 2340 break; 2341 case G_MIRROR_DISK_STATE_DESTROY: 2342 { 2343 int error; 2344 2345 error = g_mirror_clear_metadata(disk); 2346 if (error != 0) 2347 return (error); 2348 DISK_STATE_CHANGED(); 2349 G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.", 2350 sc->sc_name, g_mirror_get_diskname(disk)); 2351 2352 g_mirror_destroy_disk(disk); 2353 sc->sc_ndisks--; 2354 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 2355 g_mirror_update_metadata(disk); 2356 } 2357 break; 2358 } 2359 default: 2360 KASSERT(1 == 0, ("Unknown state (%u).", state)); 2361 break; 2362 } 2363 return (0); 2364} 2365#undef DISK_STATE_CHANGED 2366 2367int 2368g_mirror_read_metadata(struct g_consumer *cp, struct g_mirror_metadata *md) 2369{ 2370 struct g_provider *pp; 2371 u_char *buf; 2372 int error; 2373 2374 g_topology_assert(); 2375 2376 error = g_access(cp, 1, 0, 0); 2377 if (error != 0) 2378 return (error); 2379 pp = cp->provider; 2380 g_topology_unlock(); 2381 /* Metadata are stored on last sector. */ 2382 buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize, 2383 &error); 2384 g_topology_lock(); 2385 g_access(cp, -1, 0, 0); 2386 if (buf == NULL) { 2387 G_MIRROR_DEBUG(1, "Cannot read metadata from %s (error=%d).", 2388 cp->provider->name, error); 2389 return (error); 2390 } 2391 2392 /* Decode metadata. */ 2393 error = mirror_metadata_decode(buf, md); 2394 g_free(buf); 2395 if (strcmp(md->md_magic, G_MIRROR_MAGIC) != 0) 2396 return (EINVAL); 2397 if (md->md_version > G_MIRROR_VERSION) { 2398 G_MIRROR_DEBUG(0, 2399 "Kernel module is too old to handle metadata from %s.", 2400 cp->provider->name); 2401 return (EINVAL); 2402 } 2403 if (error != 0) { 2404 G_MIRROR_DEBUG(1, "MD5 metadata hash mismatch for provider %s.", 2405 cp->provider->name); 2406 return (error); 2407 } 2408 2409 return (0); 2410} 2411 2412static int 2413g_mirror_check_metadata(struct g_mirror_softc *sc, struct g_provider *pp, 2414 struct g_mirror_metadata *md) 2415{ 2416 2417 if (g_mirror_id2disk(sc, md->md_did) != NULL) { 2418 G_MIRROR_DEBUG(1, "Disk %s (id=%u) already exists, skipping.", 2419 pp->name, md->md_did); 2420 return (EEXIST); 2421 } 2422 if (md->md_all != sc->sc_ndisks) { 2423 G_MIRROR_DEBUG(1, 2424 "Invalid '%s' field on disk %s (device %s), skipping.", 2425 "md_all", pp->name, sc->sc_name); 2426 return (EINVAL); 2427 } 2428 if (md->md_slice != sc->sc_slice) { 2429 G_MIRROR_DEBUG(1, 2430 "Invalid '%s' field on disk %s (device %s), skipping.", 2431 "md_slice", pp->name, sc->sc_name); 2432 return (EINVAL); 2433 } 2434 if (md->md_balance != sc->sc_balance) { 2435 G_MIRROR_DEBUG(1, 2436 "Invalid '%s' field on disk %s (device %s), skipping.", 2437 "md_balance", pp->name, sc->sc_name); 2438 return (EINVAL); 2439 } 2440 if (md->md_mediasize != sc->sc_mediasize) { 2441 G_MIRROR_DEBUG(1, 2442 "Invalid '%s' field on disk %s (device %s), skipping.", 2443 "md_mediasize", pp->name, sc->sc_name); 2444 return (EINVAL); 2445 } 2446 if (sc->sc_mediasize > pp->mediasize) { 2447 G_MIRROR_DEBUG(1, 2448 "Invalid size of disk %s (device %s), skipping.", pp->name, 2449 sc->sc_name); 2450 return (EINVAL); 2451 } 2452 if (md->md_sectorsize != sc->sc_sectorsize) { 2453 G_MIRROR_DEBUG(1, 2454 "Invalid '%s' field on disk %s (device %s), skipping.", 2455 "md_sectorsize", pp->name, sc->sc_name); 2456 return (EINVAL); 2457 } 2458 if ((sc->sc_sectorsize % pp->sectorsize) != 0) { 2459 G_MIRROR_DEBUG(1, 2460 "Invalid sector size of disk %s (device %s), skipping.", 2461 pp->name, sc->sc_name); 2462 return (EINVAL); 2463 } 2464 if ((md->md_mflags & ~G_MIRROR_DEVICE_FLAG_MASK) != 0) { 2465 G_MIRROR_DEBUG(1, 2466 "Invalid device flags on disk %s (device %s), skipping.", 2467 pp->name, sc->sc_name); 2468 return (EINVAL); 2469 } 2470 if ((md->md_dflags & ~G_MIRROR_DISK_FLAG_MASK) != 0) { 2471 G_MIRROR_DEBUG(1, 2472 "Invalid disk flags on disk %s (device %s), skipping.", 2473 pp->name, sc->sc_name); 2474 return (EINVAL); 2475 } 2476 return (0); 2477} 2478 2479int 2480g_mirror_add_disk(struct g_mirror_softc *sc, struct g_provider *pp, 2481 struct g_mirror_metadata *md) 2482{ 2483 struct g_mirror_disk *disk; 2484 int error; 2485 2486 g_topology_assert(); 2487 G_MIRROR_DEBUG(2, "Adding disk %s.", pp->name); 2488 2489 error = g_mirror_check_metadata(sc, pp, md); 2490 if (error != 0) 2491 return (error); 2492 if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING && 2493 md->md_genid < sc->sc_genid) { 2494 G_MIRROR_DEBUG(0, "Component %s (device %s) broken, skipping.", 2495 pp->name, sc->sc_name); 2496 return (EINVAL); 2497 } 2498 disk = g_mirror_init_disk(sc, pp, md, &error); 2499 if (disk == NULL) 2500 return (error); 2501 error = g_mirror_event_send(disk, G_MIRROR_DISK_STATE_NEW, 2502 G_MIRROR_EVENT_WAIT); 2503 if (error != 0) 2504 return (error); 2505 if (md->md_version < G_MIRROR_VERSION) { 2506 G_MIRROR_DEBUG(0, "Upgrading metadata on %s (v%d->v%d).", 2507 pp->name, md->md_version, G_MIRROR_VERSION); 2508 g_mirror_update_metadata(disk); 2509 } 2510 return (0); 2511} 2512 2513static int 2514g_mirror_access(struct g_provider *pp, int acr, int acw, int ace) 2515{ 2516 struct g_mirror_softc *sc; 2517 int dcr, dcw, dce; 2518 2519 g_topology_assert(); 2520 G_MIRROR_DEBUG(2, "Access request for %s: r%dw%de%d.", pp->name, acr, 2521 acw, ace); 2522 2523 dcr = pp->acr + acr; 2524 dcw = pp->acw + acw; 2525 dce = pp->ace + ace; 2526 2527 sc = pp->geom->softc; 2528 if (sc == NULL || LIST_EMPTY(&sc->sc_disks) || 2529 (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) { 2530 if (acr <= 0 && acw <= 0 && ace <= 0) 2531 return (0); 2532 else 2533 return (ENXIO); 2534 } 2535 if (dcw == 0 && !sc->sc_idle) 2536 g_mirror_idle(sc, 1); 2537 return (0); 2538} 2539 2540static struct g_geom * 2541g_mirror_create(struct g_class *mp, const struct g_mirror_metadata *md) 2542{ 2543 struct g_mirror_softc *sc; 2544 struct g_geom *gp; 2545 int error, timeout; 2546 2547 g_topology_assert(); 2548 G_MIRROR_DEBUG(1, "Creating device %s (id=%u).", md->md_name, 2549 md->md_mid); 2550 2551 /* One disk is minimum. */ 2552 if (md->md_all < 1) 2553 return (NULL); 2554 /* 2555 * Action geom. 2556 */ 2557 gp = g_new_geomf(mp, "%s", md->md_name); 2558 sc = malloc(sizeof(*sc), M_MIRROR, M_WAITOK | M_ZERO); 2559 gp->start = g_mirror_start; 2560 gp->orphan = g_mirror_orphan; 2561 gp->access = g_mirror_access; 2562 gp->dumpconf = g_mirror_dumpconf; 2563 2564 sc->sc_id = md->md_mid; 2565 sc->sc_slice = md->md_slice; 2566 sc->sc_balance = md->md_balance; 2567 sc->sc_mediasize = md->md_mediasize; 2568 sc->sc_sectorsize = md->md_sectorsize; 2569 sc->sc_ndisks = md->md_all; 2570 sc->sc_flags = md->md_mflags; 2571 sc->sc_bump_id = 0; 2572 sc->sc_idle = 1; 2573 sc->sc_last_write = time_uptime; 2574 sc->sc_writes = 0; 2575 bioq_init(&sc->sc_queue); 2576 mtx_init(&sc->sc_queue_mtx, "gmirror:queue", NULL, MTX_DEF); 2577 LIST_INIT(&sc->sc_disks); 2578 TAILQ_INIT(&sc->sc_events); 2579 mtx_init(&sc->sc_events_mtx, "gmirror:events", NULL, MTX_DEF); 2580 callout_init(&sc->sc_callout, CALLOUT_MPSAFE); 2581 sc->sc_state = G_MIRROR_DEVICE_STATE_STARTING; 2582 gp->softc = sc; 2583 sc->sc_geom = gp; 2584 sc->sc_provider = NULL; 2585 /* 2586 * Synchronization geom. 2587 */ 2588 gp = g_new_geomf(mp, "%s.sync", md->md_name); 2589 gp->softc = sc; 2590 gp->orphan = g_mirror_orphan; 2591 sc->sc_sync.ds_geom = gp; 2592 sc->sc_sync.ds_ndisks = 0; 2593 error = kthread_create(g_mirror_worker, sc, &sc->sc_worker, 0, 0, 2594 "g_mirror %s", md->md_name); 2595 if (error != 0) { 2596 G_MIRROR_DEBUG(1, "Cannot create kernel thread for %s.", 2597 sc->sc_name); 2598 g_destroy_geom(sc->sc_sync.ds_geom); 2599 mtx_destroy(&sc->sc_events_mtx); 2600 mtx_destroy(&sc->sc_queue_mtx); 2601 g_destroy_geom(sc->sc_geom); 2602 free(sc, M_MIRROR); 2603 return (NULL); 2604 } 2605 2606 G_MIRROR_DEBUG(0, "Device %s created (id=%u).", sc->sc_name, sc->sc_id); 2607 2608 sc->sc_rootmount = root_mount_hold("GMIRROR"); 2609 G_MIRROR_DEBUG(1, "root_mount_hold %p", sc->sc_rootmount); 2610 /* 2611 * Run timeout. 2612 */ 2613 timeout = g_mirror_timeout * hz; 2614 callout_reset(&sc->sc_callout, timeout, g_mirror_go, sc); 2615 return (sc->sc_geom); 2616} 2617 2618int 2619g_mirror_destroy(struct g_mirror_softc *sc, boolean_t force) 2620{ 2621 struct g_provider *pp; 2622 2623 g_topology_assert(); 2624 2625 if (sc == NULL) 2626 return (ENXIO); 2627 pp = sc->sc_provider; 2628 if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) { 2629 if (force) { 2630 G_MIRROR_DEBUG(1, "Device %s is still open, so it " 2631 "can't be definitely removed.", pp->name); 2632 } else { 2633 G_MIRROR_DEBUG(1, 2634 "Device %s is still open (r%dw%de%d).", pp->name, 2635 pp->acr, pp->acw, pp->ace); 2636 return (EBUSY); 2637 } 2638 } 2639 2640 sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY; 2641 sc->sc_flags |= G_MIRROR_DEVICE_FLAG_WAIT; 2642 g_topology_unlock(); 2643 G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc); 2644 mtx_lock(&sc->sc_queue_mtx); 2645 wakeup(sc); 2646 mtx_unlock(&sc->sc_queue_mtx); 2647 G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, &sc->sc_worker); 2648 while (sc->sc_worker != NULL) 2649 tsleep(&sc->sc_worker, PRIBIO, "m:destroy", hz / 5); 2650 G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, &sc->sc_worker); 2651 g_topology_lock(); 2652 g_mirror_destroy_device(sc); 2653 free(sc, M_MIRROR); 2654 return (0); 2655} 2656 2657static void 2658g_mirror_taste_orphan(struct g_consumer *cp) 2659{ 2660 2661 KASSERT(1 == 0, ("%s called while tasting %s.", __func__, 2662 cp->provider->name)); 2663} 2664 2665static struct g_geom * 2666g_mirror_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) 2667{ 2668 struct g_mirror_metadata md; 2669 struct g_mirror_softc *sc; 2670 struct g_consumer *cp; 2671 struct g_geom *gp; 2672 int error; 2673 2674 g_topology_assert(); 2675 g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name); 2676 G_MIRROR_DEBUG(2, "Tasting %s.", pp->name); 2677 2678 gp = g_new_geomf(mp, "mirror:taste"); 2679 /* 2680 * This orphan function should be never called. 2681 */ 2682 gp->orphan = g_mirror_taste_orphan; 2683 cp = g_new_consumer(gp); 2684 g_attach(cp, pp); 2685 error = g_mirror_read_metadata(cp, &md); 2686 g_detach(cp); 2687 g_destroy_consumer(cp); 2688 g_destroy_geom(gp); 2689 if (error != 0) 2690 return (NULL); 2691 gp = NULL; 2692 2693 if (md.md_provider[0] != '\0' && strcmp(md.md_provider, pp->name) != 0) 2694 return (NULL); 2695 if (md.md_provsize != 0 && md.md_provsize != pp->mediasize) 2696 return (NULL); 2697 if ((md.md_dflags & G_MIRROR_DISK_FLAG_INACTIVE) != 0) { 2698 G_MIRROR_DEBUG(0, 2699 "Device %s: provider %s marked as inactive, skipping.", 2700 md.md_name, pp->name); 2701 return (NULL); 2702 } 2703 if (g_mirror_debug >= 2) 2704 mirror_metadata_dump(&md); 2705 2706 /* 2707 * Let's check if device already exists. 2708 */ 2709 sc = NULL; 2710 LIST_FOREACH(gp, &mp->geom, geom) { 2711 sc = gp->softc; 2712 if (sc == NULL) 2713 continue; 2714 if (sc->sc_sync.ds_geom == gp) 2715 continue; 2716 if (strcmp(md.md_name, sc->sc_name) != 0) 2717 continue; 2718 if (md.md_mid != sc->sc_id) { 2719 G_MIRROR_DEBUG(0, "Device %s already configured.", 2720 sc->sc_name); 2721 return (NULL); 2722 } 2723 break; 2724 } 2725 if (gp == NULL) { 2726 gp = g_mirror_create(mp, &md); 2727 if (gp == NULL) { 2728 G_MIRROR_DEBUG(0, "Cannot create device %s.", 2729 md.md_name); 2730 return (NULL); 2731 } 2732 sc = gp->softc; 2733 } 2734 G_MIRROR_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name); 2735 error = g_mirror_add_disk(sc, pp, &md); 2736 if (error != 0) { 2737 G_MIRROR_DEBUG(0, "Cannot add disk %s to %s (error=%d).", 2738 pp->name, gp->name, error); 2739 if (LIST_EMPTY(&sc->sc_disks)) 2740 g_mirror_destroy(sc, 1); 2741 return (NULL); 2742 } 2743 return (gp); 2744} 2745 2746static int 2747g_mirror_destroy_geom(struct gctl_req *req __unused, 2748 struct g_class *mp __unused, struct g_geom *gp) 2749{ 2750 2751 return (g_mirror_destroy(gp->softc, 0)); 2752} 2753 2754static void 2755g_mirror_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, 2756 struct g_consumer *cp, struct g_provider *pp) 2757{ 2758 struct g_mirror_softc *sc; 2759 2760 g_topology_assert(); 2761 2762 sc = gp->softc; 2763 if (sc == NULL) 2764 return; 2765 /* Skip synchronization geom. */ 2766 if (gp == sc->sc_sync.ds_geom) 2767 return; 2768 if (pp != NULL) { 2769 /* Nothing here. */ 2770 } else if (cp != NULL) { 2771 struct g_mirror_disk *disk; 2772 2773 disk = cp->private; 2774 if (disk == NULL) 2775 return; 2776 sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)disk->d_id); 2777 if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) { 2778 sbuf_printf(sb, "%s<Synchronized>", indent); 2779 if (disk->d_sync.ds_offset_done == 0) 2780 sbuf_printf(sb, "0%%"); 2781 else { 2782 sbuf_printf(sb, "%u%%", 2783 (u_int)((disk->d_sync.ds_offset_done * 100) / 2784 sc->sc_provider->mediasize)); 2785 } 2786 sbuf_printf(sb, "</Synchronized>\n"); 2787 } 2788 sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, 2789 disk->d_sync.ds_syncid); 2790 sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent, 2791 disk->d_genid); 2792 sbuf_printf(sb, "%s<Flags>", indent); 2793 if (disk->d_flags == 0) 2794 sbuf_printf(sb, "NONE"); 2795 else { 2796 int first = 1; 2797 2798#define ADD_FLAG(flag, name) do { \ 2799 if ((disk->d_flags & (flag)) != 0) { \ 2800 if (!first) \ 2801 sbuf_printf(sb, ", "); \ 2802 else \ 2803 first = 0; \ 2804 sbuf_printf(sb, name); \ 2805 } \ 2806} while (0) 2807 ADD_FLAG(G_MIRROR_DISK_FLAG_DIRTY, "DIRTY"); 2808 ADD_FLAG(G_MIRROR_DISK_FLAG_HARDCODED, "HARDCODED"); 2809 ADD_FLAG(G_MIRROR_DISK_FLAG_INACTIVE, "INACTIVE"); 2810 ADD_FLAG(G_MIRROR_DISK_FLAG_SYNCHRONIZING, 2811 "SYNCHRONIZING"); 2812 ADD_FLAG(G_MIRROR_DISK_FLAG_FORCE_SYNC, "FORCE_SYNC"); 2813 ADD_FLAG(G_MIRROR_DISK_FLAG_BROKEN, "BROKEN"); 2814#undef ADD_FLAG 2815 } 2816 sbuf_printf(sb, "</Flags>\n"); 2817 sbuf_printf(sb, "%s<Priority>%u</Priority>\n", indent, 2818 disk->d_priority); 2819 sbuf_printf(sb, "%s<State>%s</State>\n", indent, 2820 g_mirror_disk_state2str(disk->d_state)); 2821 } else { 2822 sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id); 2823 sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, sc->sc_syncid); 2824 sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent, sc->sc_genid); 2825 sbuf_printf(sb, "%s<Flags>", indent); 2826 if (sc->sc_flags == 0) 2827 sbuf_printf(sb, "NONE"); 2828 else { 2829 int first = 1; 2830 2831#define ADD_FLAG(flag, name) do { \ 2832 if ((sc->sc_flags & (flag)) != 0) { \ 2833 if (!first) \ 2834 sbuf_printf(sb, ", "); \ 2835 else \ 2836 first = 0; \ 2837 sbuf_printf(sb, name); \ 2838 } \ 2839} while (0) 2840 ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOAUTOSYNC, "NOAUTOSYNC"); 2841#undef ADD_FLAG 2842 } 2843 sbuf_printf(sb, "</Flags>\n"); 2844 sbuf_printf(sb, "%s<Slice>%u</Slice>\n", indent, 2845 (u_int)sc->sc_slice); 2846 sbuf_printf(sb, "%s<Balance>%s</Balance>\n", indent, 2847 balance_name(sc->sc_balance)); 2848 sbuf_printf(sb, "%s<Components>%u</Components>\n", indent, 2849 sc->sc_ndisks); 2850 sbuf_printf(sb, "%s<State>", indent); 2851 if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING) 2852 sbuf_printf(sb, "%s", "STARTING"); 2853 else if (sc->sc_ndisks == 2854 g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE)) 2855 sbuf_printf(sb, "%s", "COMPLETE"); 2856 else 2857 sbuf_printf(sb, "%s", "DEGRADED"); 2858 sbuf_printf(sb, "</State>\n"); 2859 } 2860} 2861 2862static void 2863g_mirror_shutdown(void *arg, int howto) 2864{ 2865 struct g_class *mp; 2866 struct g_geom *gp, *gp2; 2867 2868 mp = arg; 2869 DROP_GIANT(); 2870 g_topology_lock(); 2871 LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) { 2872 if (gp->softc == NULL) 2873 continue; 2874 g_mirror_destroy(gp->softc, 1); 2875 } 2876 g_topology_unlock(); 2877 PICKUP_GIANT(); 2878#if 0 2879 tsleep(&gp, PRIBIO, "m:shutdown", hz * 20); 2880#endif 2881} 2882 2883static void 2884g_mirror_init(struct g_class *mp) 2885{ 2886 2887 g_mirror_ehtag = EVENTHANDLER_REGISTER(shutdown_post_sync, 2888 g_mirror_shutdown, mp, SHUTDOWN_PRI_FIRST); 2889 if (g_mirror_ehtag == NULL) 2890 G_MIRROR_DEBUG(0, "Warning! Cannot register shutdown event."); 2891} 2892 2893static void 2894g_mirror_fini(struct g_class *mp) 2895{ 2896 2897 if (g_mirror_ehtag == NULL) 2898 return; 2899 EVENTHANDLER_DEREGISTER(shutdown_post_sync, g_mirror_ehtag); 2900} 2901 2902DECLARE_GEOM_CLASS(g_mirror_class, g_mirror); 2903