g_mirror.c revision 137490
1/*- 2 * Copyright (c) 2004 Pawel Jakub Dawidek <pjd@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27#include <sys/cdefs.h> 28__FBSDID("$FreeBSD: head/sys/geom/mirror/g_mirror.c 137490 2004-11-09 23:27:21Z pjd $"); 29 30#include <sys/param.h> 31#include <sys/systm.h> 32#include <sys/kernel.h> 33#include <sys/module.h> 34#include <sys/limits.h> 35#include <sys/lock.h> 36#include <sys/mutex.h> 37#include <sys/bio.h> 38#include <sys/sysctl.h> 39#include <sys/malloc.h> 40#include <sys/eventhandler.h> 41#include <vm/uma.h> 42#include <geom/geom.h> 43#include <sys/proc.h> 44#include <sys/kthread.h> 45#include <geom/mirror/g_mirror.h> 46 47 48static MALLOC_DEFINE(M_MIRROR, "mirror data", "GEOM_MIRROR Data"); 49 50SYSCTL_DECL(_kern_geom); 51SYSCTL_NODE(_kern_geom, OID_AUTO, mirror, CTLFLAG_RW, 0, "GEOM_MIRROR stuff"); 52u_int g_mirror_debug = 0; 53TUNABLE_INT("kern.geom.mirror.debug", &g_mirror_debug); 54SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, debug, CTLFLAG_RW, &g_mirror_debug, 0, 55 "Debug level"); 56static u_int g_mirror_timeout = 4; 57TUNABLE_INT("kern.geom.mirror.timeout", &g_mirror_timeout); 58SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, timeout, CTLFLAG_RW, &g_mirror_timeout, 59 0, "Time to wait on all mirror components"); 60static u_int g_mirror_idletime = 5; 61TUNABLE_INT("kern.geom.mirror.idletime", &g_mirror_idletime); 62SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, idletime, CTLFLAG_RW, 63 &g_mirror_idletime, 0, "Mark components as clean when idling"); 64static u_int g_mirror_reqs_per_sync = 5; 65SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, reqs_per_sync, CTLFLAG_RW, 66 &g_mirror_reqs_per_sync, 0, 67 "Number of regular I/O requests per synchronization request"); 68static u_int g_mirror_syncs_per_sec = 100; 69SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, syncs_per_sec, CTLFLAG_RW, 70 &g_mirror_syncs_per_sec, 0, 71 "Number of synchronizations requests per second"); 72 73#define MSLEEP(ident, mtx, priority, wmesg, timeout) do { \ 74 G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, (ident)); \ 75 msleep((ident), (mtx), (priority), (wmesg), (timeout)); \ 76 G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, (ident)); \ 77} while (0) 78 79static eventhandler_tag g_mirror_ehtag = NULL; 80 81static int g_mirror_destroy_geom(struct gctl_req *req, struct g_class *mp, 82 struct g_geom *gp); 83static g_taste_t g_mirror_taste; 84static void g_mirror_init(struct g_class *mp); 85static void g_mirror_fini(struct g_class *mp); 86 87struct g_class g_mirror_class = { 88 .name = G_MIRROR_CLASS_NAME, 89 .version = G_VERSION, 90 .ctlreq = g_mirror_config, 91 .taste = g_mirror_taste, 92 .destroy_geom = g_mirror_destroy_geom, 93 .init = g_mirror_init, 94 .fini = g_mirror_fini 95}; 96 97 98static void g_mirror_destroy_provider(struct g_mirror_softc *sc); 99static int g_mirror_update_disk(struct g_mirror_disk *disk, u_int state, 100 int waitidle); 101static void g_mirror_update_device(struct g_mirror_softc *sc, boolean_t force, 102 int waitidle); 103static void g_mirror_dumpconf(struct sbuf *sb, const char *indent, 104 struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp); 105static void g_mirror_sync_stop(struct g_mirror_disk *disk, int type); 106 107 108static const char * 109g_mirror_disk_state2str(int state) 110{ 111 112 switch (state) { 113 case G_MIRROR_DISK_STATE_NONE: 114 return ("NONE"); 115 case G_MIRROR_DISK_STATE_NEW: 116 return ("NEW"); 117 case G_MIRROR_DISK_STATE_ACTIVE: 118 return ("ACTIVE"); 119 case G_MIRROR_DISK_STATE_STALE: 120 return ("STALE"); 121 case G_MIRROR_DISK_STATE_SYNCHRONIZING: 122 return ("SYNCHRONIZING"); 123 case G_MIRROR_DISK_STATE_DISCONNECTED: 124 return ("DISCONNECTED"); 125 case G_MIRROR_DISK_STATE_DESTROY: 126 return ("DESTROY"); 127 default: 128 return ("INVALID"); 129 } 130} 131 132static const char * 133g_mirror_device_state2str(int state) 134{ 135 136 switch (state) { 137 case G_MIRROR_DEVICE_STATE_STARTING: 138 return ("STARTING"); 139 case G_MIRROR_DEVICE_STATE_RUNNING: 140 return ("RUNNING"); 141 default: 142 return ("INVALID"); 143 } 144} 145 146static const char * 147g_mirror_get_diskname(struct g_mirror_disk *disk) 148{ 149 150 if (disk->d_consumer == NULL || disk->d_consumer->provider == NULL) 151 return ("[unknown]"); 152 return (disk->d_name); 153} 154 155/* 156 * --- Events handling functions --- 157 * Events in geom_mirror are used to maintain disks and device status 158 * from one thread to simplify locking. 159 */ 160static void 161g_mirror_event_free(struct g_mirror_event *ep) 162{ 163 164 free(ep, M_MIRROR); 165} 166 167int 168g_mirror_event_send(void *arg, int state, int flags) 169{ 170 struct g_mirror_softc *sc; 171 struct g_mirror_disk *disk; 172 struct g_mirror_event *ep; 173 int error; 174 175 ep = malloc(sizeof(*ep), M_MIRROR, M_WAITOK); 176 G_MIRROR_DEBUG(4, "%s: Sending event %p.", __func__, ep); 177 if ((flags & G_MIRROR_EVENT_DEVICE) != 0) { 178 disk = NULL; 179 sc = arg; 180 } else { 181 disk = arg; 182 sc = disk->d_softc; 183 } 184 ep->e_disk = disk; 185 ep->e_state = state; 186 ep->e_flags = flags; 187 ep->e_error = 0; 188 mtx_lock(&sc->sc_events_mtx); 189 TAILQ_INSERT_TAIL(&sc->sc_events, ep, e_next); 190 mtx_unlock(&sc->sc_events_mtx); 191 G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc); 192 mtx_lock(&sc->sc_queue_mtx); 193 wakeup(sc); 194 mtx_unlock(&sc->sc_queue_mtx); 195 if ((flags & G_MIRROR_EVENT_DONTWAIT) != 0) 196 return (0); 197 g_topology_assert(); 198 G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, ep); 199 g_topology_unlock(); 200 while ((ep->e_flags & G_MIRROR_EVENT_DONE) == 0) { 201 mtx_lock(&sc->sc_events_mtx); 202 MSLEEP(ep, &sc->sc_events_mtx, PRIBIO | PDROP, "m:event", 203 hz * 5); 204 } 205 /* Don't even try to use 'sc' here, because it could be already dead. */ 206 g_topology_lock(); 207 error = ep->e_error; 208 g_mirror_event_free(ep); 209 return (error); 210} 211 212static struct g_mirror_event * 213g_mirror_event_get(struct g_mirror_softc *sc) 214{ 215 struct g_mirror_event *ep; 216 217 mtx_lock(&sc->sc_events_mtx); 218 ep = TAILQ_FIRST(&sc->sc_events); 219 if (ep != NULL) 220 TAILQ_REMOVE(&sc->sc_events, ep, e_next); 221 mtx_unlock(&sc->sc_events_mtx); 222 return (ep); 223} 224 225static void 226g_mirror_event_cancel(struct g_mirror_disk *disk) 227{ 228 struct g_mirror_softc *sc; 229 struct g_mirror_event *ep, *tmpep; 230 231 g_topology_assert(); 232 233 sc = disk->d_softc; 234 mtx_lock(&sc->sc_events_mtx); 235 TAILQ_FOREACH_SAFE(ep, &sc->sc_events, e_next, tmpep) { 236 if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0) 237 continue; 238 if (ep->e_disk != disk) 239 continue; 240 TAILQ_REMOVE(&sc->sc_events, ep, e_next); 241 if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0) 242 g_mirror_event_free(ep); 243 else { 244 ep->e_error = ECANCELED; 245 wakeup(ep); 246 } 247 } 248 mtx_unlock(&sc->sc_events_mtx); 249} 250 251/* 252 * Return the number of disks in given state. 253 * If state is equal to -1, count all connected disks. 254 */ 255u_int 256g_mirror_ndisks(struct g_mirror_softc *sc, int state) 257{ 258 struct g_mirror_disk *disk; 259 u_int n = 0; 260 261 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 262 if (state == -1 || disk->d_state == state) 263 n++; 264 } 265 return (n); 266} 267 268/* 269 * Find a disk in mirror by its disk ID. 270 */ 271static struct g_mirror_disk * 272g_mirror_id2disk(struct g_mirror_softc *sc, uint32_t id) 273{ 274 struct g_mirror_disk *disk; 275 276 g_topology_assert(); 277 278 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 279 if (disk->d_id == id) 280 return (disk); 281 } 282 return (NULL); 283} 284 285static u_int 286g_mirror_nrequests(struct g_mirror_softc *sc, struct g_consumer *cp) 287{ 288 struct bio *bp; 289 u_int nreqs = 0; 290 291 mtx_lock(&sc->sc_queue_mtx); 292 TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) { 293 if (bp->bio_from == cp) 294 nreqs++; 295 } 296 mtx_unlock(&sc->sc_queue_mtx); 297 return (nreqs); 298} 299 300static int 301g_mirror_is_busy(struct g_mirror_softc *sc, struct g_consumer *cp) 302{ 303 304 if (cp->index > 0) { 305 G_MIRROR_DEBUG(2, 306 "I/O requests for %s exist, can't destroy it now.", 307 cp->provider->name); 308 return (1); 309 } 310 if (g_mirror_nrequests(sc, cp) > 0) { 311 G_MIRROR_DEBUG(2, 312 "I/O requests for %s in queue, can't destroy it now.", 313 cp->provider->name); 314 return (1); 315 } 316 return (0); 317} 318 319static void 320g_mirror_kill_consumer(struct g_mirror_softc *sc, struct g_consumer *cp) 321{ 322 323 g_topology_assert(); 324 325 cp->private = NULL; 326 if (g_mirror_is_busy(sc, cp)) 327 return; 328 G_MIRROR_DEBUG(2, "Consumer %s destroyed.", cp->provider->name); 329 g_detach(cp); 330 g_destroy_consumer(cp); 331} 332 333static int 334g_mirror_connect_disk(struct g_mirror_disk *disk, struct g_provider *pp) 335{ 336 int error; 337 338 g_topology_assert(); 339 KASSERT(disk->d_consumer == NULL, 340 ("Disk already connected (device %s).", disk->d_softc->sc_name)); 341 342 disk->d_consumer = g_new_consumer(disk->d_softc->sc_geom); 343 disk->d_consumer->private = disk; 344 disk->d_consumer->index = 0; 345 error = g_attach(disk->d_consumer, pp); 346 if (error != 0) 347 return (error); 348 G_MIRROR_DEBUG(2, "Disk %s connected.", g_mirror_get_diskname(disk)); 349 return (0); 350} 351 352static void 353g_mirror_disconnect_consumer(struct g_mirror_softc *sc, struct g_consumer *cp) 354{ 355 356 g_topology_assert(); 357 358 if (cp == NULL) 359 return; 360 if (cp->provider != NULL) { 361 G_MIRROR_DEBUG(2, "Disk %s disconnected.", cp->provider->name); 362 if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0) { 363 G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", 364 cp->provider->name, -cp->acr, -cp->acw, -cp->ace, 365 0); 366 g_access(cp, -cp->acr, -cp->acw, -cp->ace); 367 } 368 g_mirror_kill_consumer(sc, cp); 369 } else { 370 g_destroy_consumer(cp); 371 } 372} 373 374/* 375 * Initialize disk. This means allocate memory, create consumer, attach it 376 * to the provider and open access (r1w1e1) to it. 377 */ 378static struct g_mirror_disk * 379g_mirror_init_disk(struct g_mirror_softc *sc, struct g_provider *pp, 380 struct g_mirror_metadata *md, int *errorp) 381{ 382 struct g_mirror_disk *disk; 383 int error; 384 385 disk = malloc(sizeof(*disk), M_MIRROR, M_NOWAIT | M_ZERO); 386 if (disk == NULL) { 387 error = ENOMEM; 388 goto fail; 389 } 390 disk->d_softc = sc; 391 error = g_mirror_connect_disk(disk, pp); 392 if (error != 0) 393 goto fail; 394 disk->d_id = md->md_did; 395 disk->d_state = G_MIRROR_DISK_STATE_NONE; 396 disk->d_priority = md->md_priority; 397 disk->d_delay.sec = 0; 398 disk->d_delay.frac = 0; 399 binuptime(&disk->d_last_used); 400 disk->d_flags = md->md_dflags; 401 if (md->md_provider[0] != '\0') 402 disk->d_flags |= G_MIRROR_DISK_FLAG_HARDCODED; 403 disk->d_sync.ds_consumer = NULL; 404 disk->d_sync.ds_offset = md->md_sync_offset; 405 disk->d_sync.ds_offset_done = md->md_sync_offset; 406 disk->d_sync.ds_resync = -1; 407 disk->d_sync.ds_syncid = md->md_syncid; 408 if (errorp != NULL) 409 *errorp = 0; 410 return (disk); 411fail: 412 if (errorp != NULL) 413 *errorp = error; 414 if (disk != NULL) { 415 g_mirror_disconnect_consumer(sc, disk->d_consumer); 416 free(disk, M_MIRROR); 417 } 418 return (NULL); 419} 420 421static void 422g_mirror_destroy_disk(struct g_mirror_disk *disk) 423{ 424 struct g_mirror_softc *sc; 425 426 g_topology_assert(); 427 428 LIST_REMOVE(disk, d_next); 429 g_mirror_event_cancel(disk); 430 sc = disk->d_softc; 431 if (sc->sc_hint == disk) 432 sc->sc_hint = NULL; 433 switch (disk->d_state) { 434 case G_MIRROR_DISK_STATE_SYNCHRONIZING: 435 g_mirror_sync_stop(disk, 1); 436 /* FALLTHROUGH */ 437 case G_MIRROR_DISK_STATE_NEW: 438 case G_MIRROR_DISK_STATE_STALE: 439 case G_MIRROR_DISK_STATE_ACTIVE: 440 g_mirror_disconnect_consumer(sc, disk->d_consumer); 441 free(disk, M_MIRROR); 442 break; 443 default: 444 KASSERT(0 == 1, ("Wrong disk state (%s, %s).", 445 g_mirror_get_diskname(disk), 446 g_mirror_disk_state2str(disk->d_state))); 447 } 448} 449 450static void 451g_mirror_destroy_device(struct g_mirror_softc *sc) 452{ 453 struct g_mirror_disk *disk; 454 struct g_mirror_event *ep; 455 struct g_geom *gp; 456 struct g_consumer *cp, *tmpcp; 457 458 g_topology_assert(); 459 460 gp = sc->sc_geom; 461 if (sc->sc_provider != NULL) 462 g_mirror_destroy_provider(sc); 463 for (disk = LIST_FIRST(&sc->sc_disks); disk != NULL; 464 disk = LIST_FIRST(&sc->sc_disks)) { 465 disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY; 466 g_mirror_update_metadata(disk); 467 g_mirror_destroy_disk(disk); 468 } 469 while ((ep = g_mirror_event_get(sc)) != NULL) { 470 if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0) 471 g_mirror_event_free(ep); 472 else { 473 ep->e_error = ECANCELED; 474 ep->e_flags |= G_MIRROR_EVENT_DONE; 475 G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, ep); 476 mtx_lock(&sc->sc_events_mtx); 477 wakeup(ep); 478 mtx_unlock(&sc->sc_events_mtx); 479 } 480 } 481 callout_drain(&sc->sc_callout); 482 gp->softc = NULL; 483 484 LIST_FOREACH_SAFE(cp, &sc->sc_sync.ds_geom->consumer, consumer, tmpcp) { 485 g_mirror_disconnect_consumer(sc, cp); 486 } 487 sc->sc_sync.ds_geom->softc = NULL; 488 g_wither_geom(sc->sc_sync.ds_geom, ENXIO); 489 mtx_destroy(&sc->sc_queue_mtx); 490 mtx_destroy(&sc->sc_events_mtx); 491 G_MIRROR_DEBUG(0, "Device %s destroyed.", gp->name); 492 g_wither_geom(gp, ENXIO); 493} 494 495static void 496g_mirror_orphan(struct g_consumer *cp) 497{ 498 struct g_mirror_disk *disk; 499 500 g_topology_assert(); 501 502 disk = cp->private; 503 if (disk == NULL) 504 return; 505 disk->d_softc->sc_bump_syncid = G_MIRROR_BUMP_ON_FIRST_WRITE; 506 g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED, 507 G_MIRROR_EVENT_DONTWAIT); 508} 509 510static void 511g_mirror_spoiled(struct g_consumer *cp) 512{ 513 struct g_mirror_disk *disk; 514 515 g_topology_assert(); 516 517 disk = cp->private; 518 if (disk == NULL) 519 return; 520 disk->d_softc->sc_bump_syncid = G_MIRROR_BUMP_IMMEDIATELY; 521 g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED, 522 G_MIRROR_EVENT_DONTWAIT); 523} 524 525/* 526 * Function should return the next active disk on the list. 527 * It is possible that it will be the same disk as given. 528 * If there are no active disks on list, NULL is returned. 529 */ 530static __inline struct g_mirror_disk * 531g_mirror_find_next(struct g_mirror_softc *sc, struct g_mirror_disk *disk) 532{ 533 struct g_mirror_disk *dp; 534 535 for (dp = LIST_NEXT(disk, d_next); dp != disk; 536 dp = LIST_NEXT(dp, d_next)) { 537 if (dp == NULL) 538 dp = LIST_FIRST(&sc->sc_disks); 539 if (dp->d_state == G_MIRROR_DISK_STATE_ACTIVE) 540 break; 541 } 542 if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE) 543 return (NULL); 544 return (dp); 545} 546 547static struct g_mirror_disk * 548g_mirror_get_disk(struct g_mirror_softc *sc) 549{ 550 struct g_mirror_disk *disk; 551 552 if (sc->sc_hint == NULL) { 553 sc->sc_hint = LIST_FIRST(&sc->sc_disks); 554 if (sc->sc_hint == NULL) 555 return (NULL); 556 } 557 disk = sc->sc_hint; 558 if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) { 559 disk = g_mirror_find_next(sc, disk); 560 if (disk == NULL) 561 return (NULL); 562 } 563 sc->sc_hint = g_mirror_find_next(sc, disk); 564 return (disk); 565} 566 567static int 568g_mirror_write_metadata(struct g_mirror_disk *disk, 569 struct g_mirror_metadata *md) 570{ 571 struct g_mirror_softc *sc; 572 struct g_consumer *cp; 573 off_t offset, length; 574 u_char *sector; 575 int close = 0, error = 0; 576 577 g_topology_assert(); 578 579 sc = disk->d_softc; 580 cp = disk->d_consumer; 581 KASSERT(cp != NULL, ("NULL consumer (%s).", sc->sc_name)); 582 KASSERT(cp->provider != NULL, ("NULL provider (%s).", sc->sc_name)); 583 length = cp->provider->sectorsize; 584 offset = cp->provider->mediasize - length; 585 sector = malloc((size_t)length, M_MIRROR, M_WAITOK | M_ZERO); 586 /* 587 * Open consumer if it wasn't opened and remember to close it. 588 */ 589 if (cp->acw == 0) { 590 error = g_access(cp, 0, 1, 1); 591 G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", 592 cp->provider->name, 0, 1, 1, error); 593 if (error == 0) 594 close = 1; 595#ifdef INVARIANTS 596 } else { 597 KASSERT(cp->acw > 0 && cp->ace > 0, 598 ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, 599 cp->acr, cp->acw, cp->ace)); 600#endif 601 } 602 if (error == 0) { 603 if (md != NULL) 604 mirror_metadata_encode(md, sector); 605 g_topology_unlock(); 606 error = g_write_data(cp, offset, sector, length); 607 g_topology_lock(); 608 } 609 free(sector, M_MIRROR); 610 if (close) { 611 g_access(cp, 0, -1, -1); 612 G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", 613 cp->provider->name, 0, -1, -1, 0); 614 } 615 if (error != 0) { 616 disk->d_softc->sc_bump_syncid = G_MIRROR_BUMP_IMMEDIATELY; 617 g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED, 618 G_MIRROR_EVENT_DONTWAIT); 619 } 620 return (error); 621} 622 623static int 624g_mirror_clear_metadata(struct g_mirror_disk *disk) 625{ 626 int error; 627 628 g_topology_assert(); 629 error = g_mirror_write_metadata(disk, NULL); 630 if (error == 0) { 631 G_MIRROR_DEBUG(2, "Metadata on %s cleared.", 632 g_mirror_get_diskname(disk)); 633 } else { 634 G_MIRROR_DEBUG(0, 635 "Cannot clear metadata on disk %s (error=%d).", 636 g_mirror_get_diskname(disk), error); 637 } 638 return (error); 639} 640 641void 642g_mirror_fill_metadata(struct g_mirror_softc *sc, struct g_mirror_disk *disk, 643 struct g_mirror_metadata *md) 644{ 645 646 strlcpy(md->md_magic, G_MIRROR_MAGIC, sizeof(md->md_magic)); 647 md->md_version = G_MIRROR_VERSION; 648 strlcpy(md->md_name, sc->sc_name, sizeof(md->md_name)); 649 md->md_mid = sc->sc_id; 650 md->md_all = sc->sc_ndisks; 651 md->md_slice = sc->sc_slice; 652 md->md_balance = sc->sc_balance; 653 md->md_mediasize = sc->sc_mediasize; 654 md->md_sectorsize = sc->sc_sectorsize; 655 md->md_mflags = (sc->sc_flags & G_MIRROR_DEVICE_FLAG_MASK); 656 bzero(md->md_provider, sizeof(md->md_provider)); 657 if (disk == NULL) { 658 md->md_did = arc4random(); 659 md->md_priority = 0; 660 md->md_syncid = 0; 661 md->md_dflags = 0; 662 md->md_sync_offset = 0; 663 } else { 664 md->md_did = disk->d_id; 665 md->md_priority = disk->d_priority; 666 md->md_syncid = disk->d_sync.ds_syncid; 667 md->md_dflags = (disk->d_flags & G_MIRROR_DISK_FLAG_MASK); 668 if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) 669 md->md_sync_offset = disk->d_sync.ds_offset_done; 670 else 671 md->md_sync_offset = 0; 672 if ((disk->d_flags & G_MIRROR_DISK_FLAG_HARDCODED) != 0) { 673 strlcpy(md->md_provider, 674 disk->d_consumer->provider->name, 675 sizeof(md->md_provider)); 676 } 677 } 678} 679 680void 681g_mirror_update_metadata(struct g_mirror_disk *disk) 682{ 683 struct g_mirror_metadata md; 684 int error; 685 686 g_topology_assert(); 687 g_mirror_fill_metadata(disk->d_softc, disk, &md); 688 error = g_mirror_write_metadata(disk, &md); 689 if (error == 0) { 690 G_MIRROR_DEBUG(2, "Metadata on %s updated.", 691 g_mirror_get_diskname(disk)); 692 } else { 693 G_MIRROR_DEBUG(0, 694 "Cannot update metadata on disk %s (error=%d).", 695 g_mirror_get_diskname(disk), error); 696 } 697} 698 699static void 700g_mirror_bump_syncid(struct g_mirror_softc *sc, int waitidle) 701{ 702 struct g_mirror_disk *disk; 703 704 g_topology_assert(); 705 KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0, 706 ("%s called with no active disks (device=%s).", __func__, 707 sc->sc_name)); 708 709 sc->sc_syncid++; 710 G_MIRROR_DEBUG(1, "Device %s: syncid bumped to %u.", sc->sc_name, 711 sc->sc_syncid); 712 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 713 if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE || 714 disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) { 715 disk->d_sync.ds_syncid = sc->sc_syncid; 716 if (waitidle) 717 g_waitidlelock(); 718 g_mirror_update_metadata(disk); 719 } 720 } 721} 722 723static void 724g_mirror_idle(struct g_mirror_softc *sc) 725{ 726 struct g_mirror_disk *disk; 727 728 if (sc->sc_provider == NULL || sc->sc_provider->acw == 0) 729 return; 730 sc->sc_idle = 1; 731 g_topology_lock(); 732 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 733 if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) 734 continue; 735 G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as clean.", 736 g_mirror_get_diskname(disk), sc->sc_name); 737 disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY; 738 g_mirror_update_metadata(disk); 739 } 740 g_topology_unlock(); 741} 742 743static void 744g_mirror_unidle(struct g_mirror_softc *sc) 745{ 746 struct g_mirror_disk *disk; 747 748 sc->sc_idle = 0; 749 g_topology_lock(); 750 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 751 if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) 752 continue; 753 G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as dirty.", 754 g_mirror_get_diskname(disk), sc->sc_name); 755 disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY; 756 g_mirror_update_metadata(disk); 757 } 758 g_topology_unlock(); 759} 760 761/* 762 * Return 1 if we should check if mirror is idling. 763 */ 764static int 765g_mirror_check_idle(struct g_mirror_softc *sc) 766{ 767 struct g_mirror_disk *disk; 768 769 if (sc->sc_idle) 770 return (0); 771 if (sc->sc_provider != NULL && sc->sc_provider->acw == 0) 772 return (0); 773 /* 774 * Check if there are no in-flight requests. 775 */ 776 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 777 if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) 778 continue; 779 if (disk->d_consumer->index > 0) 780 return (0); 781 } 782 return (1); 783} 784 785static __inline int 786bintime_cmp(struct bintime *bt1, struct bintime *bt2) 787{ 788 789 if (bt1->sec < bt2->sec) 790 return (-1); 791 else if (bt1->sec > bt2->sec) 792 return (1); 793 if (bt1->frac < bt2->frac) 794 return (-1); 795 else if (bt1->frac > bt2->frac) 796 return (1); 797 return (0); 798} 799 800static void 801g_mirror_update_delay(struct g_mirror_disk *disk, struct bio *bp) 802{ 803 804 if (disk->d_softc->sc_balance != G_MIRROR_BALANCE_LOAD) 805 return; 806 binuptime(&disk->d_delay); 807 bintime_sub(&disk->d_delay, &bp->bio_t0); 808} 809 810static void 811g_mirror_done(struct bio *bp) 812{ 813 struct g_mirror_softc *sc; 814 815 sc = bp->bio_from->geom->softc; 816 bp->bio_cflags |= G_MIRROR_BIO_FLAG_REGULAR; 817 mtx_lock(&sc->sc_queue_mtx); 818 bioq_disksort(&sc->sc_queue, bp); 819 wakeup(sc); 820 mtx_unlock(&sc->sc_queue_mtx); 821} 822 823static void 824g_mirror_regular_request(struct bio *bp) 825{ 826 struct g_mirror_softc *sc; 827 struct g_mirror_disk *disk; 828 struct bio *pbp; 829 830 g_topology_assert_not(); 831 832 bp->bio_from->index--; 833 pbp = bp->bio_parent; 834 sc = pbp->bio_to->geom->softc; 835 disk = bp->bio_from->private; 836 if (disk == NULL) { 837 g_topology_lock(); 838 g_mirror_kill_consumer(sc, bp->bio_from); 839 g_topology_unlock(); 840 } else { 841 g_mirror_update_delay(disk, bp); 842 } 843 844 pbp->bio_inbed++; 845 KASSERT(pbp->bio_inbed <= pbp->bio_children, 846 ("bio_inbed (%u) is bigger than bio_children (%u).", pbp->bio_inbed, 847 pbp->bio_children)); 848 if (bp->bio_error == 0 && pbp->bio_error == 0) { 849 G_MIRROR_LOGREQ(3, bp, "Request delivered."); 850 g_destroy_bio(bp); 851 if (pbp->bio_children == pbp->bio_inbed) { 852 G_MIRROR_LOGREQ(3, pbp, "Request delivered."); 853 pbp->bio_completed = pbp->bio_length; 854 g_io_deliver(pbp, pbp->bio_error); 855 } 856 return; 857 } else if (bp->bio_error != 0) { 858 if (pbp->bio_error == 0) 859 pbp->bio_error = bp->bio_error; 860 G_MIRROR_LOGREQ(0, bp, "Request failed (error=%d).", 861 bp->bio_error); 862 if (disk != NULL) { 863 sc->sc_bump_syncid = G_MIRROR_BUMP_IMMEDIATELY; 864 g_mirror_event_send(disk, 865 G_MIRROR_DISK_STATE_DISCONNECTED, 866 G_MIRROR_EVENT_DONTWAIT); 867 } 868 switch (pbp->bio_cmd) { 869 case BIO_DELETE: 870 case BIO_WRITE: 871 pbp->bio_inbed--; 872 pbp->bio_children--; 873 break; 874 } 875 } 876 g_destroy_bio(bp); 877 878 switch (pbp->bio_cmd) { 879 case BIO_READ: 880 if (pbp->bio_children == pbp->bio_inbed) { 881 pbp->bio_error = 0; 882 mtx_lock(&sc->sc_queue_mtx); 883 bioq_disksort(&sc->sc_queue, pbp); 884 G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc); 885 wakeup(sc); 886 mtx_unlock(&sc->sc_queue_mtx); 887 } 888 break; 889 case BIO_DELETE: 890 case BIO_WRITE: 891 if (pbp->bio_children == 0) { 892 /* 893 * All requests failed. 894 */ 895 } else if (pbp->bio_inbed < pbp->bio_children) { 896 /* Do nothing. */ 897 break; 898 } else if (pbp->bio_children == pbp->bio_inbed) { 899 /* Some requests succeeded. */ 900 pbp->bio_error = 0; 901 pbp->bio_completed = pbp->bio_length; 902 } 903 g_io_deliver(pbp, pbp->bio_error); 904 break; 905 default: 906 KASSERT(1 == 0, ("Invalid request: %u.", pbp->bio_cmd)); 907 break; 908 } 909} 910 911static void 912g_mirror_sync_done(struct bio *bp) 913{ 914 struct g_mirror_softc *sc; 915 916 G_MIRROR_LOGREQ(3, bp, "Synchronization request delivered."); 917 sc = bp->bio_from->geom->softc; 918 bp->bio_cflags |= G_MIRROR_BIO_FLAG_SYNC; 919 mtx_lock(&sc->sc_queue_mtx); 920 bioq_disksort(&sc->sc_queue, bp); 921 wakeup(sc); 922 mtx_unlock(&sc->sc_queue_mtx); 923} 924 925static void 926g_mirror_start(struct bio *bp) 927{ 928 struct g_mirror_softc *sc; 929 930 sc = bp->bio_to->geom->softc; 931 /* 932 * If sc == NULL or there are no valid disks, provider's error 933 * should be set and g_mirror_start() should not be called at all. 934 */ 935 KASSERT(sc != NULL && sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING, 936 ("Provider's error should be set (error=%d)(mirror=%s).", 937 bp->bio_to->error, bp->bio_to->name)); 938 G_MIRROR_LOGREQ(3, bp, "Request received."); 939 940 switch (bp->bio_cmd) { 941 case BIO_READ: 942 case BIO_WRITE: 943 case BIO_DELETE: 944 break; 945 case BIO_GETATTR: 946 default: 947 g_io_deliver(bp, EOPNOTSUPP); 948 return; 949 } 950 mtx_lock(&sc->sc_queue_mtx); 951 bioq_disksort(&sc->sc_queue, bp); 952 G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc); 953 wakeup(sc); 954 mtx_unlock(&sc->sc_queue_mtx); 955} 956 957/* 958 * Send one synchronization request. 959 */ 960static void 961g_mirror_sync_one(struct g_mirror_disk *disk) 962{ 963 struct g_mirror_softc *sc; 964 struct bio *bp; 965 966 sc = disk->d_softc; 967 KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING, 968 ("Disk %s is not marked for synchronization.", 969 g_mirror_get_diskname(disk))); 970 971 bp = g_new_bio(); 972 if (bp == NULL) 973 return; 974 bp->bio_parent = NULL; 975 bp->bio_cmd = BIO_READ; 976 bp->bio_offset = disk->d_sync.ds_offset; 977 bp->bio_length = MIN(MAXPHYS, sc->sc_mediasize - bp->bio_offset); 978 bp->bio_cflags = 0; 979 bp->bio_done = g_mirror_sync_done; 980 bp->bio_data = disk->d_sync.ds_data; 981 if (bp->bio_data == NULL) { 982 g_destroy_bio(bp); 983 return; 984 } 985 disk->d_sync.ds_offset += bp->bio_length; 986 bp->bio_to = sc->sc_provider; 987 G_MIRROR_LOGREQ(3, bp, "Sending synchronization request."); 988 disk->d_sync.ds_consumer->index++; 989 g_io_request(bp, disk->d_sync.ds_consumer); 990} 991 992static void 993g_mirror_sync_request(struct bio *bp) 994{ 995 struct g_mirror_softc *sc; 996 struct g_mirror_disk *disk; 997 998 bp->bio_from->index--; 999 sc = bp->bio_from->geom->softc; 1000 disk = bp->bio_from->private; 1001 if (disk == NULL) { 1002 g_topology_lock(); 1003 g_mirror_kill_consumer(sc, bp->bio_from); 1004 g_topology_unlock(); 1005 g_destroy_bio(bp); 1006 return; 1007 } 1008 1009 /* 1010 * Synchronization request. 1011 */ 1012 switch (bp->bio_cmd) { 1013 case BIO_READ: 1014 { 1015 struct g_consumer *cp; 1016 1017 if (bp->bio_error != 0) { 1018 G_MIRROR_LOGREQ(0, bp, 1019 "Synchronization request failed (error=%d).", 1020 bp->bio_error); 1021 g_destroy_bio(bp); 1022 return; 1023 } 1024 G_MIRROR_LOGREQ(3, bp, 1025 "Synchronization request half-finished."); 1026 bp->bio_cmd = BIO_WRITE; 1027 bp->bio_cflags = 0; 1028 cp = disk->d_consumer; 1029 KASSERT(cp->acr == 0 && cp->acw == 1 && cp->ace == 1, 1030 ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, 1031 cp->acr, cp->acw, cp->ace)); 1032 cp->index++; 1033 g_io_request(bp, cp); 1034 return; 1035 } 1036 case BIO_WRITE: 1037 { 1038 struct g_mirror_disk_sync *sync; 1039 1040 if (bp->bio_error != 0) { 1041 G_MIRROR_LOGREQ(0, bp, 1042 "Synchronization request failed (error=%d).", 1043 bp->bio_error); 1044 g_destroy_bio(bp); 1045 sc->sc_bump_syncid = G_MIRROR_BUMP_IMMEDIATELY; 1046 g_mirror_event_send(disk, 1047 G_MIRROR_DISK_STATE_DISCONNECTED, 1048 G_MIRROR_EVENT_DONTWAIT); 1049 return; 1050 } 1051 G_MIRROR_LOGREQ(3, bp, "Synchronization request finished."); 1052 sync = &disk->d_sync; 1053 sync->ds_offset_done = bp->bio_offset + bp->bio_length; 1054 g_destroy_bio(bp); 1055 if (sync->ds_resync != -1) 1056 break; 1057 if (sync->ds_offset_done == sc->sc_provider->mediasize) { 1058 /* 1059 * Disk up-to-date, activate it. 1060 */ 1061 g_mirror_event_send(disk, G_MIRROR_DISK_STATE_ACTIVE, 1062 G_MIRROR_EVENT_DONTWAIT); 1063 return; 1064 } else if (sync->ds_offset_done % (MAXPHYS * 100) == 0) { 1065 /* 1066 * Update offset_done on every 100 blocks. 1067 * XXX: This should be configurable. 1068 */ 1069 g_topology_lock(); 1070 g_waitidlelock(); 1071 g_mirror_update_metadata(disk); 1072 g_topology_unlock(); 1073 } 1074 return; 1075 } 1076 default: 1077 KASSERT(1 == 0, ("Invalid command here: %u (device=%s)", 1078 bp->bio_cmd, sc->sc_name)); 1079 break; 1080 } 1081} 1082 1083static void 1084g_mirror_request_prefer(struct g_mirror_softc *sc, struct bio *bp) 1085{ 1086 struct g_mirror_disk *disk; 1087 struct g_consumer *cp; 1088 struct bio *cbp; 1089 1090 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 1091 if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE) 1092 break; 1093 } 1094 if (disk == NULL) { 1095 if (bp->bio_error == 0) 1096 bp->bio_error = ENXIO; 1097 g_io_deliver(bp, bp->bio_error); 1098 return; 1099 } 1100 cbp = g_clone_bio(bp); 1101 if (cbp == NULL) { 1102 if (bp->bio_error == 0) 1103 bp->bio_error = ENOMEM; 1104 g_io_deliver(bp, bp->bio_error); 1105 return; 1106 } 1107 /* 1108 * Fill in the component buf structure. 1109 */ 1110 cp = disk->d_consumer; 1111 cbp->bio_done = g_mirror_done; 1112 cbp->bio_to = cp->provider; 1113 G_MIRROR_LOGREQ(3, cbp, "Sending request."); 1114 KASSERT(cp->acr > 0 && cp->ace > 0, 1115 ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr, 1116 cp->acw, cp->ace)); 1117 cp->index++; 1118 g_io_request(cbp, cp); 1119} 1120 1121static void 1122g_mirror_request_round_robin(struct g_mirror_softc *sc, struct bio *bp) 1123{ 1124 struct g_mirror_disk *disk; 1125 struct g_consumer *cp; 1126 struct bio *cbp; 1127 1128 disk = g_mirror_get_disk(sc); 1129 if (disk == NULL) { 1130 if (bp->bio_error == 0) 1131 bp->bio_error = ENXIO; 1132 g_io_deliver(bp, bp->bio_error); 1133 return; 1134 } 1135 cbp = g_clone_bio(bp); 1136 if (cbp == NULL) { 1137 if (bp->bio_error == 0) 1138 bp->bio_error = ENOMEM; 1139 g_io_deliver(bp, bp->bio_error); 1140 return; 1141 } 1142 /* 1143 * Fill in the component buf structure. 1144 */ 1145 cp = disk->d_consumer; 1146 cbp->bio_done = g_mirror_done; 1147 cbp->bio_to = cp->provider; 1148 G_MIRROR_LOGREQ(3, cbp, "Sending request."); 1149 KASSERT(cp->acr > 0 && cp->ace > 0, 1150 ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr, 1151 cp->acw, cp->ace)); 1152 cp->index++; 1153 g_io_request(cbp, cp); 1154} 1155 1156static void 1157g_mirror_request_load(struct g_mirror_softc *sc, struct bio *bp) 1158{ 1159 struct g_mirror_disk *disk, *dp; 1160 struct g_consumer *cp; 1161 struct bio *cbp; 1162 struct bintime curtime; 1163 1164 binuptime(&curtime); 1165 /* 1166 * Find a disk which the smallest load. 1167 */ 1168 disk = NULL; 1169 LIST_FOREACH(dp, &sc->sc_disks, d_next) { 1170 if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE) 1171 continue; 1172 /* If disk wasn't used for more than 2 sec, use it. */ 1173 if (curtime.sec - dp->d_last_used.sec >= 2) { 1174 disk = dp; 1175 break; 1176 } 1177 if (disk == NULL || 1178 bintime_cmp(&dp->d_delay, &disk->d_delay) < 0) { 1179 disk = dp; 1180 } 1181 } 1182 cbp = g_clone_bio(bp); 1183 if (cbp == NULL) { 1184 if (bp->bio_error == 0) 1185 bp->bio_error = ENOMEM; 1186 g_io_deliver(bp, bp->bio_error); 1187 return; 1188 } 1189 /* 1190 * Fill in the component buf structure. 1191 */ 1192 cp = disk->d_consumer; 1193 cbp->bio_done = g_mirror_done; 1194 cbp->bio_to = cp->provider; 1195 binuptime(&disk->d_last_used); 1196 G_MIRROR_LOGREQ(3, cbp, "Sending request."); 1197 KASSERT(cp->acr > 0 && cp->ace > 0, 1198 ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr, 1199 cp->acw, cp->ace)); 1200 cp->index++; 1201 g_io_request(cbp, cp); 1202} 1203 1204static void 1205g_mirror_request_split(struct g_mirror_softc *sc, struct bio *bp) 1206{ 1207 struct bio_queue_head queue; 1208 struct g_mirror_disk *disk; 1209 struct g_consumer *cp; 1210 struct bio *cbp; 1211 off_t left, mod, offset, slice; 1212 u_char *data; 1213 u_int ndisks; 1214 1215 if (bp->bio_length <= sc->sc_slice) { 1216 g_mirror_request_round_robin(sc, bp); 1217 return; 1218 } 1219 ndisks = g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE); 1220 slice = bp->bio_length / ndisks; 1221 mod = slice % sc->sc_provider->sectorsize; 1222 if (mod != 0) 1223 slice += sc->sc_provider->sectorsize - mod; 1224 /* 1225 * Allocate all bios before sending any request, so we can 1226 * return ENOMEM in nice and clean way. 1227 */ 1228 left = bp->bio_length; 1229 offset = bp->bio_offset; 1230 data = bp->bio_data; 1231 bioq_init(&queue); 1232 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 1233 if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) 1234 continue; 1235 cbp = g_clone_bio(bp); 1236 if (cbp == NULL) { 1237 for (cbp = bioq_first(&queue); cbp != NULL; 1238 cbp = bioq_first(&queue)) { 1239 bioq_remove(&queue, cbp); 1240 g_destroy_bio(cbp); 1241 } 1242 if (bp->bio_error == 0) 1243 bp->bio_error = ENOMEM; 1244 g_io_deliver(bp, bp->bio_error); 1245 return; 1246 } 1247 bioq_insert_tail(&queue, cbp); 1248 cbp->bio_done = g_mirror_done; 1249 cbp->bio_caller1 = disk; 1250 cbp->bio_to = disk->d_consumer->provider; 1251 cbp->bio_offset = offset; 1252 cbp->bio_data = data; 1253 cbp->bio_length = MIN(left, slice); 1254 left -= cbp->bio_length; 1255 if (left == 0) 1256 break; 1257 offset += cbp->bio_length; 1258 data += cbp->bio_length; 1259 } 1260 for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) { 1261 bioq_remove(&queue, cbp); 1262 G_MIRROR_LOGREQ(3, cbp, "Sending request."); 1263 disk = cbp->bio_caller1; 1264 cbp->bio_caller1 = NULL; 1265 cp = disk->d_consumer; 1266 KASSERT(cp->acr > 0 && cp->ace > 0, 1267 ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, 1268 cp->acr, cp->acw, cp->ace)); 1269 disk->d_consumer->index++; 1270 g_io_request(cbp, disk->d_consumer); 1271 } 1272} 1273 1274static void 1275g_mirror_register_request(struct bio *bp) 1276{ 1277 struct g_mirror_softc *sc; 1278 1279 sc = bp->bio_to->geom->softc; 1280 switch (bp->bio_cmd) { 1281 case BIO_READ: 1282 switch (sc->sc_balance) { 1283 case G_MIRROR_BALANCE_LOAD: 1284 g_mirror_request_load(sc, bp); 1285 break; 1286 case G_MIRROR_BALANCE_PREFER: 1287 g_mirror_request_prefer(sc, bp); 1288 break; 1289 case G_MIRROR_BALANCE_ROUND_ROBIN: 1290 g_mirror_request_round_robin(sc, bp); 1291 break; 1292 case G_MIRROR_BALANCE_SPLIT: 1293 g_mirror_request_split(sc, bp); 1294 break; 1295 } 1296 return; 1297 case BIO_WRITE: 1298 case BIO_DELETE: 1299 { 1300 struct g_mirror_disk *disk; 1301 struct g_mirror_disk_sync *sync; 1302 struct bio_queue_head queue; 1303 struct g_consumer *cp; 1304 struct bio *cbp; 1305 1306 if (sc->sc_idle) 1307 g_mirror_unidle(sc); 1308 /* 1309 * Allocate all bios before sending any request, so we can 1310 * return ENOMEM in nice and clean way. 1311 */ 1312 bioq_init(&queue); 1313 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 1314 sync = &disk->d_sync; 1315 switch (disk->d_state) { 1316 case G_MIRROR_DISK_STATE_ACTIVE: 1317 break; 1318 case G_MIRROR_DISK_STATE_SYNCHRONIZING: 1319 if (bp->bio_offset >= sync->ds_offset) 1320 continue; 1321 else if (bp->bio_offset + bp->bio_length > 1322 sync->ds_offset_done && 1323 (bp->bio_offset < sync->ds_resync || 1324 sync->ds_resync == -1)) { 1325 sync->ds_resync = bp->bio_offset - 1326 (bp->bio_offset % MAXPHYS); 1327 } 1328 break; 1329 default: 1330 continue; 1331 } 1332 cbp = g_clone_bio(bp); 1333 if (cbp == NULL) { 1334 for (cbp = bioq_first(&queue); cbp != NULL; 1335 cbp = bioq_first(&queue)) { 1336 bioq_remove(&queue, cbp); 1337 g_destroy_bio(cbp); 1338 } 1339 if (bp->bio_error == 0) 1340 bp->bio_error = ENOMEM; 1341 g_io_deliver(bp, bp->bio_error); 1342 return; 1343 } 1344 bioq_insert_tail(&queue, cbp); 1345 cbp->bio_done = g_mirror_done; 1346 cp = disk->d_consumer; 1347 cbp->bio_caller1 = cp; 1348 cbp->bio_to = cp->provider; 1349 KASSERT(cp->acw > 0 && cp->ace > 0, 1350 ("Consumer %s not opened (r%dw%de%d).", 1351 cp->provider->name, cp->acr, cp->acw, cp->ace)); 1352 } 1353 for (cbp = bioq_first(&queue); cbp != NULL; 1354 cbp = bioq_first(&queue)) { 1355 bioq_remove(&queue, cbp); 1356 G_MIRROR_LOGREQ(3, cbp, "Sending request."); 1357 cp = cbp->bio_caller1; 1358 cbp->bio_caller1 = NULL; 1359 cp->index++; 1360 g_io_request(cbp, cp); 1361 } 1362 /* 1363 * Bump syncid on first write. 1364 */ 1365 if (sc->sc_bump_syncid == G_MIRROR_BUMP_ON_FIRST_WRITE) { 1366 sc->sc_bump_syncid = 0; 1367 g_topology_lock(); 1368 g_mirror_bump_syncid(sc, 1); 1369 g_topology_unlock(); 1370 } 1371 return; 1372 } 1373 default: 1374 KASSERT(1 == 0, ("Invalid command here: %u (device=%s)", 1375 bp->bio_cmd, sc->sc_name)); 1376 break; 1377 } 1378} 1379 1380static int 1381g_mirror_can_destroy(struct g_mirror_softc *sc) 1382{ 1383 struct g_geom *gp; 1384 struct g_consumer *cp; 1385 1386 g_topology_assert(); 1387 gp = sc->sc_geom; 1388 LIST_FOREACH(cp, &gp->consumer, consumer) { 1389 if (g_mirror_is_busy(sc, cp)) 1390 return (0); 1391 } 1392 gp = sc->sc_sync.ds_geom; 1393 LIST_FOREACH(cp, &gp->consumer, consumer) { 1394 if (g_mirror_is_busy(sc, cp)) 1395 return (0); 1396 } 1397 G_MIRROR_DEBUG(2, "No I/O requests for %s, it can be destroyed.", 1398 sc->sc_name); 1399 return (1); 1400} 1401 1402static int 1403g_mirror_try_destroy(struct g_mirror_softc *sc) 1404{ 1405 1406 if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_WAIT) != 0) { 1407 g_topology_lock(); 1408 if (!g_mirror_can_destroy(sc)) { 1409 g_topology_unlock(); 1410 return (0); 1411 } 1412 g_topology_unlock(); 1413 G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, 1414 &sc->sc_worker); 1415 wakeup(&sc->sc_worker); 1416 sc->sc_worker = NULL; 1417 } else { 1418 g_topology_lock(); 1419 if (!g_mirror_can_destroy(sc)) { 1420 g_topology_unlock(); 1421 return (0); 1422 } 1423 g_mirror_destroy_device(sc); 1424 g_topology_unlock(); 1425 free(sc, M_MIRROR); 1426 } 1427 return (1); 1428} 1429 1430/* 1431 * Worker thread. 1432 */ 1433static void 1434g_mirror_worker(void *arg) 1435{ 1436 struct g_mirror_softc *sc; 1437 struct g_mirror_disk *disk; 1438 struct g_mirror_disk_sync *sync; 1439 struct g_mirror_event *ep; 1440 struct bio *bp; 1441 u_int nreqs; 1442 1443 sc = arg; 1444 curthread->td_base_pri = PRIBIO; 1445 1446 nreqs = 0; 1447 for (;;) { 1448 G_MIRROR_DEBUG(5, "%s: Let's see...", __func__); 1449 /* 1450 * First take a look at events. 1451 * This is important to handle events before any I/O requests. 1452 */ 1453 ep = g_mirror_event_get(sc); 1454 if (ep != NULL) { 1455 int waitidle = 0; 1456 1457 g_topology_lock(); 1458 if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0) 1459 waitidle = 1; 1460 if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0) { 1461 /* Update only device status. */ 1462 G_MIRROR_DEBUG(3, 1463 "Running event for device %s.", 1464 sc->sc_name); 1465 ep->e_error = 0; 1466 g_mirror_update_device(sc, 1, waitidle); 1467 } else { 1468 /* Update disk status. */ 1469 G_MIRROR_DEBUG(3, "Running event for disk %s.", 1470 g_mirror_get_diskname(ep->e_disk)); 1471 ep->e_error = g_mirror_update_disk(ep->e_disk, 1472 ep->e_state, waitidle); 1473 if (ep->e_error == 0) 1474 g_mirror_update_device(sc, 0, waitidle); 1475 } 1476 g_topology_unlock(); 1477 if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0) { 1478 KASSERT(ep->e_error == 0, 1479 ("Error cannot be handled.")); 1480 g_mirror_event_free(ep); 1481 } else { 1482 ep->e_flags |= G_MIRROR_EVENT_DONE; 1483 G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, 1484 ep); 1485 mtx_lock(&sc->sc_events_mtx); 1486 wakeup(ep); 1487 mtx_unlock(&sc->sc_events_mtx); 1488 } 1489 if ((sc->sc_flags & 1490 G_MIRROR_DEVICE_FLAG_DESTROY) != 0) { 1491 if (g_mirror_try_destroy(sc)) 1492 kthread_exit(0); 1493 } 1494 G_MIRROR_DEBUG(5, "%s: I'm here 1.", __func__); 1495 continue; 1496 } 1497 /* 1498 * Now I/O requests. 1499 */ 1500 /* Get first request from the queue. */ 1501 mtx_lock(&sc->sc_queue_mtx); 1502 bp = bioq_first(&sc->sc_queue); 1503 if (bp == NULL) { 1504 if ((sc->sc_flags & 1505 G_MIRROR_DEVICE_FLAG_DESTROY) != 0) { 1506 mtx_unlock(&sc->sc_queue_mtx); 1507 if (g_mirror_try_destroy(sc)) 1508 kthread_exit(0); 1509 mtx_lock(&sc->sc_queue_mtx); 1510 } 1511 } 1512 if (sc->sc_sync.ds_ndisks > 0 && 1513 (bp == NULL || nreqs > g_mirror_reqs_per_sync)) { 1514 mtx_unlock(&sc->sc_queue_mtx); 1515 /* 1516 * It is time for synchronization... 1517 */ 1518 nreqs = 0; 1519 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 1520 if (disk->d_state != 1521 G_MIRROR_DISK_STATE_SYNCHRONIZING) { 1522 continue; 1523 } 1524 sync = &disk->d_sync; 1525 if (sync->ds_offset >= 1526 sc->sc_provider->mediasize) { 1527 continue; 1528 } 1529 if (sync->ds_offset > sync->ds_offset_done) 1530 continue; 1531 if (sync->ds_resync != -1) { 1532 sync->ds_offset = sync->ds_resync; 1533 sync->ds_offset_done = sync->ds_resync; 1534 sync->ds_resync = -1; 1535 } 1536 g_mirror_sync_one(disk); 1537 } 1538 G_MIRROR_DEBUG(5, "%s: I'm here 2.", __func__); 1539 goto sleep; 1540 } 1541 if (bp == NULL) { 1542 if (g_mirror_check_idle(sc)) { 1543 u_int idletime; 1544 1545 idletime = g_mirror_idletime; 1546 if (idletime == 0) 1547 idletime = 1; 1548 idletime *= hz; 1549 if (msleep(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, 1550 "m:w1", idletime) == EWOULDBLOCK) { 1551 G_MIRROR_DEBUG(5, "%s: I'm here 3.", 1552 __func__); 1553 /* 1554 * No I/O requests in 'idletime' seconds, 1555 * so mark components as clean. 1556 */ 1557 g_mirror_idle(sc); 1558 } 1559 G_MIRROR_DEBUG(5, "%s: I'm here 4.", __func__); 1560 } else { 1561 MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, 1562 "m:w2", 0); 1563 G_MIRROR_DEBUG(5, "%s: I'm here 5.", __func__); 1564 } 1565 continue; 1566 } 1567 nreqs++; 1568 bioq_remove(&sc->sc_queue, bp); 1569 mtx_unlock(&sc->sc_queue_mtx); 1570 1571 if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_REGULAR) != 0) { 1572 g_mirror_regular_request(bp); 1573 } else if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0) { 1574 u_int timeout, sps; 1575 1576 g_mirror_sync_request(bp); 1577sleep: 1578 sps = g_mirror_syncs_per_sec; 1579 if (sps == 0) { 1580 G_MIRROR_DEBUG(5, "%s: I'm here 6.", __func__); 1581 continue; 1582 } 1583 mtx_lock(&sc->sc_queue_mtx); 1584 if (bioq_first(&sc->sc_queue) != NULL) { 1585 mtx_unlock(&sc->sc_queue_mtx); 1586 G_MIRROR_DEBUG(5, "%s: I'm here 7.", __func__); 1587 continue; 1588 } 1589 timeout = hz / sps; 1590 if (timeout == 0) 1591 timeout = 1; 1592 MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "m:w3", 1593 timeout); 1594 } else { 1595 g_mirror_register_request(bp); 1596 } 1597 G_MIRROR_DEBUG(5, "%s: I'm here 8.", __func__); 1598 } 1599} 1600 1601/* 1602 * Open disk's consumer if needed. 1603 */ 1604static void 1605g_mirror_update_access(struct g_mirror_disk *disk) 1606{ 1607 struct g_provider *pp; 1608 struct g_consumer *cp; 1609 int acr, acw, ace, cpw, error; 1610 1611 g_topology_assert(); 1612 1613 cp = disk->d_consumer; 1614 pp = disk->d_softc->sc_provider; 1615 if (pp == NULL) { 1616 acr = -cp->acr; 1617 acw = -cp->acw; 1618 ace = -cp->ace; 1619 } else { 1620 acr = pp->acr - cp->acr; 1621 acw = pp->acw - cp->acw; 1622 ace = pp->ace - cp->ace; 1623 /* Grab an extra "exclusive" bit. */ 1624 if (pp->acr > 0 || pp->acw > 0 || pp->ace > 0) 1625 ace++; 1626 } 1627 if (acr == 0 && acw == 0 && ace == 0) 1628 return; 1629 cpw = cp->acw; 1630 error = g_access(cp, acr, acw, ace); 1631 G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", cp->provider->name, acr, 1632 acw, ace, error); 1633 if (error != 0) { 1634 disk->d_softc->sc_bump_syncid = G_MIRROR_BUMP_ON_FIRST_WRITE; 1635 g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED, 1636 G_MIRROR_EVENT_DONTWAIT); 1637 return; 1638 } 1639 if (cpw == 0 && cp->acw > 0) { 1640 G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as dirty.", 1641 g_mirror_get_diskname(disk), disk->d_softc->sc_name); 1642 disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY; 1643 } else if (cpw > 0 && cp->acw == 0) { 1644 G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as clean.", 1645 g_mirror_get_diskname(disk), disk->d_softc->sc_name); 1646 disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY; 1647 } 1648} 1649 1650static void 1651g_mirror_sync_start(struct g_mirror_disk *disk) 1652{ 1653 struct g_mirror_softc *sc; 1654 struct g_consumer *cp; 1655 int error; 1656 1657 g_topology_assert(); 1658 1659 sc = disk->d_softc; 1660 KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING, 1661 ("Device not in RUNNING state (%s, %u).", sc->sc_name, 1662 sc->sc_state)); 1663 cp = disk->d_consumer; 1664 KASSERT(cp->acr == 0 && cp->acw == 0 && cp->ace == 0, 1665 ("Consumer %s already opened.", cp->provider->name)); 1666 1667 G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s.", sc->sc_name, 1668 g_mirror_get_diskname(disk)); 1669 error = g_access(cp, 0, 1, 1); 1670 G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", cp->provider->name, 0, 1, 1671 1, error); 1672 if (error != 0) { 1673 g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED, 1674 G_MIRROR_EVENT_DONTWAIT); 1675 return; 1676 } 1677 disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY; 1678 KASSERT(disk->d_sync.ds_consumer == NULL, 1679 ("Sync consumer already exists (device=%s, disk=%s).", 1680 sc->sc_name, g_mirror_get_diskname(disk))); 1681 disk->d_sync.ds_consumer = g_new_consumer(sc->sc_sync.ds_geom); 1682 disk->d_sync.ds_consumer->private = disk; 1683 disk->d_sync.ds_consumer->index = 0; 1684 error = g_attach(disk->d_sync.ds_consumer, disk->d_softc->sc_provider); 1685 KASSERT(error == 0, ("Cannot attach to %s (error=%d).", 1686 disk->d_softc->sc_name, error)); 1687 error = g_access(disk->d_sync.ds_consumer, 1, 0, 0); 1688 KASSERT(error == 0, ("Cannot open %s (error=%d).", 1689 disk->d_softc->sc_name, error)); 1690 disk->d_sync.ds_data = malloc(MAXPHYS, M_MIRROR, M_WAITOK); 1691 sc->sc_sync.ds_ndisks++; 1692} 1693 1694/* 1695 * Stop synchronization process. 1696 * type: 0 - synchronization finished 1697 * 1 - synchronization stopped 1698 */ 1699static void 1700g_mirror_sync_stop(struct g_mirror_disk *disk, int type) 1701{ 1702 struct g_consumer *cp; 1703 1704 g_topology_assert(); 1705 KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING, 1706 ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk), 1707 g_mirror_disk_state2str(disk->d_state))); 1708 if (disk->d_sync.ds_consumer == NULL) 1709 return; 1710 1711 if (type == 0) { 1712 G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s finished.", 1713 disk->d_softc->sc_name, g_mirror_get_diskname(disk)); 1714 } else /* if (type == 1) */ { 1715 G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s stopped.", 1716 disk->d_softc->sc_name, g_mirror_get_diskname(disk)); 1717 } 1718 cp = disk->d_sync.ds_consumer; 1719 g_access(cp, -1, 0, 0); 1720 g_mirror_kill_consumer(disk->d_softc, cp); 1721 free(disk->d_sync.ds_data, M_MIRROR); 1722 disk->d_sync.ds_consumer = NULL; 1723 disk->d_softc->sc_sync.ds_ndisks--; 1724 cp = disk->d_consumer; 1725 KASSERT(cp->acr == 0 && cp->acw == 1 && cp->ace == 1, 1726 ("Consumer %s not opened.", cp->provider->name)); 1727 g_access(cp, 0, -1, -1); 1728 G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", cp->provider->name, 0, -1, 1729 -1, 0); 1730 disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY; 1731} 1732 1733static void 1734g_mirror_launch_provider(struct g_mirror_softc *sc) 1735{ 1736 struct g_mirror_disk *disk; 1737 struct g_provider *pp; 1738 1739 g_topology_assert(); 1740 1741 pp = g_new_providerf(sc->sc_geom, "mirror/%s", sc->sc_name); 1742 pp->mediasize = sc->sc_mediasize; 1743 pp->sectorsize = sc->sc_sectorsize; 1744 sc->sc_provider = pp; 1745 g_error_provider(pp, 0); 1746 G_MIRROR_DEBUG(0, "Device %s: provider %s launched.", sc->sc_name, 1747 pp->name); 1748 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 1749 if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) 1750 g_mirror_sync_start(disk); 1751 } 1752} 1753 1754static void 1755g_mirror_destroy_provider(struct g_mirror_softc *sc) 1756{ 1757 struct g_mirror_disk *disk; 1758 struct bio *bp; 1759 1760 g_topology_assert(); 1761 KASSERT(sc->sc_provider != NULL, ("NULL provider (device=%s).", 1762 sc->sc_name)); 1763 1764 g_error_provider(sc->sc_provider, ENXIO); 1765 mtx_lock(&sc->sc_queue_mtx); 1766 while ((bp = bioq_first(&sc->sc_queue)) != NULL) { 1767 bioq_remove(&sc->sc_queue, bp); 1768 g_io_deliver(bp, ENXIO); 1769 } 1770 mtx_unlock(&sc->sc_queue_mtx); 1771 G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.", sc->sc_name, 1772 sc->sc_provider->name); 1773 sc->sc_provider->flags |= G_PF_WITHER; 1774 g_orphan_provider(sc->sc_provider, ENXIO); 1775 sc->sc_provider = NULL; 1776 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 1777 if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) 1778 g_mirror_sync_stop(disk, 1); 1779 } 1780} 1781 1782static void 1783g_mirror_go(void *arg) 1784{ 1785 struct g_mirror_softc *sc; 1786 1787 sc = arg; 1788 G_MIRROR_DEBUG(0, "Force device %s start due to timeout.", sc->sc_name); 1789 g_mirror_event_send(sc, 0, 1790 G_MIRROR_EVENT_DONTWAIT | G_MIRROR_EVENT_DEVICE); 1791} 1792 1793static u_int 1794g_mirror_determine_state(struct g_mirror_disk *disk) 1795{ 1796 struct g_mirror_softc *sc; 1797 u_int state; 1798 1799 sc = disk->d_softc; 1800 if (sc->sc_syncid == disk->d_sync.ds_syncid) { 1801 if ((disk->d_flags & 1802 G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) { 1803 /* Disk does not need synchronization. */ 1804 state = G_MIRROR_DISK_STATE_ACTIVE; 1805 } else { 1806 if ((sc->sc_flags & 1807 G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 || 1808 (disk->d_flags & 1809 G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) { 1810 /* 1811 * We can start synchronization from 1812 * the stored offset. 1813 */ 1814 state = G_MIRROR_DISK_STATE_SYNCHRONIZING; 1815 } else { 1816 state = G_MIRROR_DISK_STATE_STALE; 1817 } 1818 } 1819 } else if (disk->d_sync.ds_syncid < sc->sc_syncid) { 1820 /* 1821 * Reset all synchronization data for this disk, 1822 * because if it even was synchronized, it was 1823 * synchronized to disks with different syncid. 1824 */ 1825 disk->d_flags |= G_MIRROR_DISK_FLAG_SYNCHRONIZING; 1826 disk->d_sync.ds_offset = 0; 1827 disk->d_sync.ds_offset_done = 0; 1828 disk->d_sync.ds_syncid = sc->sc_syncid; 1829 if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 || 1830 (disk->d_flags & G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) { 1831 state = G_MIRROR_DISK_STATE_SYNCHRONIZING; 1832 } else { 1833 state = G_MIRROR_DISK_STATE_STALE; 1834 } 1835 } else /* if (sc->sc_syncid < disk->d_sync.ds_syncid) */ { 1836 /* 1837 * Not good, NOT GOOD! 1838 * It means that mirror was started on stale disks 1839 * and more fresh disk just arrive. 1840 * If there were writes, mirror is fucked up, sorry. 1841 * I think the best choice here is don't touch 1842 * this disk and inform the user laudly. 1843 */ 1844 G_MIRROR_DEBUG(0, "Device %s was started before the freshest " 1845 "disk (%s) arrives!! It will not be connected to the " 1846 "running device.", sc->sc_name, 1847 g_mirror_get_diskname(disk)); 1848 g_mirror_destroy_disk(disk); 1849 state = G_MIRROR_DISK_STATE_NONE; 1850 /* Return immediately, because disk was destroyed. */ 1851 return (state); 1852 } 1853 G_MIRROR_DEBUG(3, "State for %s disk: %s.", 1854 g_mirror_get_diskname(disk), g_mirror_disk_state2str(state)); 1855 return (state); 1856} 1857 1858/* 1859 * Update device state. 1860 */ 1861static void 1862g_mirror_update_device(struct g_mirror_softc *sc, boolean_t force, int waitidle) 1863{ 1864 struct g_mirror_disk *disk; 1865 u_int state; 1866 1867 g_topology_assert(); 1868 1869 switch (sc->sc_state) { 1870 case G_MIRROR_DEVICE_STATE_STARTING: 1871 { 1872 struct g_mirror_disk *pdisk; 1873 u_int dirty, ndisks, syncid; 1874 1875 KASSERT(sc->sc_provider == NULL, 1876 ("Non-NULL provider in STARTING state (%s).", sc->sc_name)); 1877 /* 1878 * Are we ready? We are, if all disks are connected or 1879 * if we have any disks and 'force' is true. 1880 */ 1881 if ((force && g_mirror_ndisks(sc, -1) > 0) || 1882 sc->sc_ndisks == g_mirror_ndisks(sc, -1)) { 1883 ; 1884 } else if (g_mirror_ndisks(sc, -1) == 0) { 1885 /* 1886 * Disks went down in starting phase, so destroy 1887 * device. 1888 */ 1889 callout_drain(&sc->sc_callout); 1890 sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY; 1891 return; 1892 } else { 1893 return; 1894 } 1895 1896 /* 1897 * Activate all disks with the biggest syncid. 1898 */ 1899 if (force) { 1900 /* 1901 * If 'force' is true, we have been called due to 1902 * timeout, so don't bother canceling timeout. 1903 */ 1904 ndisks = 0; 1905 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 1906 if ((disk->d_flags & 1907 G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) { 1908 ndisks++; 1909 } 1910 } 1911 if (ndisks == 0) { 1912 /* No valid disks found, destroy device. */ 1913 sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY; 1914 return; 1915 } 1916 } else { 1917 /* Cancel timeout. */ 1918 callout_drain(&sc->sc_callout); 1919 } 1920 1921 /* 1922 * Find disk with the biggest syncid. 1923 */ 1924 syncid = 0; 1925 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 1926 if (disk->d_sync.ds_syncid > syncid) 1927 syncid = disk->d_sync.ds_syncid; 1928 } 1929 1930 /* 1931 * Here we need to look for dirty disks and if all disks 1932 * with the biggest syncid are dirty, we have to choose 1933 * one with the biggest priority and rebuild the rest. 1934 */ 1935 /* 1936 * Find the number of dirty disks with the biggest syncid. 1937 * Find the number of disks with the biggest syncid. 1938 * While here, find a disk with the biggest priority. 1939 */ 1940 dirty = ndisks = 0; 1941 pdisk = NULL; 1942 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 1943 if (disk->d_sync.ds_syncid != syncid) 1944 continue; 1945 if ((disk->d_flags & 1946 G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) { 1947 continue; 1948 } 1949 ndisks++; 1950 if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) { 1951 dirty++; 1952 if (pdisk == NULL || 1953 pdisk->d_priority < disk->d_priority) { 1954 pdisk = disk; 1955 } 1956 } 1957 } 1958 if (dirty == 0) { 1959 /* No dirty disks at all, great. */ 1960 } else if (dirty == ndisks) { 1961 /* 1962 * Force synchronization for all dirty disks except one 1963 * with the biggest priority. 1964 */ 1965 KASSERT(pdisk != NULL, ("pdisk == NULL")); 1966 G_MIRROR_DEBUG(1, "Using disk %s (device %s) as a " 1967 "master disk for synchronization.", 1968 g_mirror_get_diskname(pdisk), sc->sc_name); 1969 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 1970 if (disk->d_sync.ds_syncid != syncid) 1971 continue; 1972 if ((disk->d_flags & 1973 G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) { 1974 continue; 1975 } 1976 KASSERT((disk->d_flags & 1977 G_MIRROR_DISK_FLAG_DIRTY) != 0, 1978 ("Disk %s isn't marked as dirty.", 1979 g_mirror_get_diskname(disk))); 1980 /* Skip the disk with the biggest priority. */ 1981 if (disk == pdisk) 1982 continue; 1983 disk->d_sync.ds_syncid = 0; 1984 } 1985 } else if (dirty < ndisks) { 1986 /* 1987 * Force synchronization for all dirty disks. 1988 * We have some non-dirty disks. 1989 */ 1990 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 1991 if (disk->d_sync.ds_syncid != syncid) 1992 continue; 1993 if ((disk->d_flags & 1994 G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) { 1995 continue; 1996 } 1997 if ((disk->d_flags & 1998 G_MIRROR_DISK_FLAG_DIRTY) == 0) { 1999 continue; 2000 } 2001 disk->d_sync.ds_syncid = 0; 2002 } 2003 } 2004 2005 /* Reset hint. */ 2006 sc->sc_hint = NULL; 2007 sc->sc_syncid = syncid; 2008 if (force) { 2009 /* Remember to bump syncid on first write. */ 2010 sc->sc_bump_syncid = G_MIRROR_BUMP_ON_FIRST_WRITE; 2011 } 2012 state = G_MIRROR_DEVICE_STATE_RUNNING; 2013 G_MIRROR_DEBUG(1, "Device %s state changed from %s to %s.", 2014 sc->sc_name, g_mirror_device_state2str(sc->sc_state), 2015 g_mirror_device_state2str(state)); 2016 sc->sc_state = state; 2017 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 2018 state = g_mirror_determine_state(disk); 2019 g_mirror_event_send(disk, state, 2020 G_MIRROR_EVENT_DONTWAIT); 2021 if (state == G_MIRROR_DISK_STATE_STALE) { 2022 sc->sc_bump_syncid = 2023 G_MIRROR_BUMP_ON_FIRST_WRITE; 2024 } 2025 } 2026 wakeup(&g_mirror_class); 2027 break; 2028 } 2029 case G_MIRROR_DEVICE_STATE_RUNNING: 2030 if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 0 && 2031 g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) { 2032 /* 2033 * No active disks or no disks at all, 2034 * so destroy device. 2035 */ 2036 if (sc->sc_provider != NULL) 2037 g_mirror_destroy_provider(sc); 2038 sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY; 2039 break; 2040 } else if (g_mirror_ndisks(sc, 2041 G_MIRROR_DISK_STATE_ACTIVE) > 0 && 2042 g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) { 2043 /* 2044 * We have active disks, launch provider if it doesn't 2045 * exist. 2046 */ 2047 if (sc->sc_provider == NULL) 2048 g_mirror_launch_provider(sc); 2049 } 2050 /* 2051 * Bump syncid here, if we need to do it immediately. 2052 */ 2053 if (sc->sc_bump_syncid == G_MIRROR_BUMP_IMMEDIATELY) { 2054 sc->sc_bump_syncid = 0; 2055 g_mirror_bump_syncid(sc, waitidle); 2056 } 2057 break; 2058 default: 2059 KASSERT(1 == 0, ("Wrong device state (%s, %s).", 2060 sc->sc_name, g_mirror_device_state2str(sc->sc_state))); 2061 break; 2062 } 2063} 2064 2065/* 2066 * Update disk state and device state if needed. 2067 */ 2068#define DISK_STATE_CHANGED() G_MIRROR_DEBUG(1, \ 2069 "Disk %s state changed from %s to %s (device %s).", \ 2070 g_mirror_get_diskname(disk), \ 2071 g_mirror_disk_state2str(disk->d_state), \ 2072 g_mirror_disk_state2str(state), sc->sc_name) 2073static int 2074g_mirror_update_disk(struct g_mirror_disk *disk, u_int state, int waitidle) 2075{ 2076 struct g_mirror_softc *sc; 2077 2078 g_topology_assert(); 2079 2080 sc = disk->d_softc; 2081again: 2082 G_MIRROR_DEBUG(3, "Changing disk %s state from %s to %s.", 2083 g_mirror_get_diskname(disk), g_mirror_disk_state2str(disk->d_state), 2084 g_mirror_disk_state2str(state)); 2085 switch (state) { 2086 case G_MIRROR_DISK_STATE_NEW: 2087 /* 2088 * Possible scenarios: 2089 * 1. New disk arrive. 2090 */ 2091 /* Previous state should be NONE. */ 2092 KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NONE, 2093 ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk), 2094 g_mirror_disk_state2str(disk->d_state))); 2095 DISK_STATE_CHANGED(); 2096 2097 disk->d_state = state; 2098 if (LIST_EMPTY(&sc->sc_disks)) 2099 LIST_INSERT_HEAD(&sc->sc_disks, disk, d_next); 2100 else { 2101 struct g_mirror_disk *dp; 2102 2103 LIST_FOREACH(dp, &sc->sc_disks, d_next) { 2104 if (disk->d_priority >= dp->d_priority) { 2105 LIST_INSERT_BEFORE(dp, disk, d_next); 2106 dp = NULL; 2107 break; 2108 } 2109 if (LIST_NEXT(dp, d_next) == NULL) 2110 break; 2111 } 2112 if (dp != NULL) 2113 LIST_INSERT_AFTER(dp, disk, d_next); 2114 } 2115 G_MIRROR_DEBUG(0, "Device %s: provider %s detected.", 2116 sc->sc_name, g_mirror_get_diskname(disk)); 2117 if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING) 2118 break; 2119 KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING, 2120 ("Wrong device state (%s, %s, %s, %s).", sc->sc_name, 2121 g_mirror_device_state2str(sc->sc_state), 2122 g_mirror_get_diskname(disk), 2123 g_mirror_disk_state2str(disk->d_state))); 2124 state = g_mirror_determine_state(disk); 2125 if (state != G_MIRROR_DISK_STATE_NONE) 2126 goto again; 2127 break; 2128 case G_MIRROR_DISK_STATE_ACTIVE: 2129 /* 2130 * Possible scenarios: 2131 * 1. New disk does not need synchronization. 2132 * 2. Synchronization process finished successfully. 2133 */ 2134 KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING, 2135 ("Wrong device state (%s, %s, %s, %s).", sc->sc_name, 2136 g_mirror_device_state2str(sc->sc_state), 2137 g_mirror_get_diskname(disk), 2138 g_mirror_disk_state2str(disk->d_state))); 2139 /* Previous state should be NEW or SYNCHRONIZING. */ 2140 KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW || 2141 disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING, 2142 ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk), 2143 g_mirror_disk_state2str(disk->d_state))); 2144 DISK_STATE_CHANGED(); 2145 2146 if (disk->d_state == G_MIRROR_DISK_STATE_NEW) 2147 disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY; 2148 else if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) { 2149 disk->d_flags &= ~G_MIRROR_DISK_FLAG_SYNCHRONIZING; 2150 disk->d_flags &= ~G_MIRROR_DISK_FLAG_FORCE_SYNC; 2151 g_mirror_sync_stop(disk, 0); 2152 } 2153 disk->d_state = state; 2154 disk->d_sync.ds_offset = 0; 2155 disk->d_sync.ds_offset_done = 0; 2156 if (waitidle) 2157 g_waitidlelock(); 2158 g_mirror_update_access(disk); 2159 g_mirror_update_metadata(disk); 2160 G_MIRROR_DEBUG(0, "Device %s: provider %s activated.", 2161 sc->sc_name, g_mirror_get_diskname(disk)); 2162 break; 2163 case G_MIRROR_DISK_STATE_STALE: 2164 /* 2165 * Possible scenarios: 2166 * 1. Stale disk was connected. 2167 */ 2168 /* Previous state should be NEW. */ 2169 KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW, 2170 ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk), 2171 g_mirror_disk_state2str(disk->d_state))); 2172 KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING, 2173 ("Wrong device state (%s, %s, %s, %s).", sc->sc_name, 2174 g_mirror_device_state2str(sc->sc_state), 2175 g_mirror_get_diskname(disk), 2176 g_mirror_disk_state2str(disk->d_state))); 2177 /* 2178 * STALE state is only possible if device is marked 2179 * NOAUTOSYNC. 2180 */ 2181 KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) != 0, 2182 ("Wrong device state (%s, %s, %s, %s).", sc->sc_name, 2183 g_mirror_device_state2str(sc->sc_state), 2184 g_mirror_get_diskname(disk), 2185 g_mirror_disk_state2str(disk->d_state))); 2186 DISK_STATE_CHANGED(); 2187 2188 disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY; 2189 disk->d_state = state; 2190 if (waitidle) 2191 g_waitidlelock(); 2192 g_mirror_update_metadata(disk); 2193 G_MIRROR_DEBUG(0, "Device %s: provider %s is stale.", 2194 sc->sc_name, g_mirror_get_diskname(disk)); 2195 break; 2196 case G_MIRROR_DISK_STATE_SYNCHRONIZING: 2197 /* 2198 * Possible scenarios: 2199 * 1. Disk which needs synchronization was connected. 2200 */ 2201 /* Previous state should be NEW. */ 2202 KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW, 2203 ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk), 2204 g_mirror_disk_state2str(disk->d_state))); 2205 KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING, 2206 ("Wrong device state (%s, %s, %s, %s).", sc->sc_name, 2207 g_mirror_device_state2str(sc->sc_state), 2208 g_mirror_get_diskname(disk), 2209 g_mirror_disk_state2str(disk->d_state))); 2210 DISK_STATE_CHANGED(); 2211 2212 if (disk->d_state == G_MIRROR_DISK_STATE_NEW) 2213 disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY; 2214 disk->d_state = state; 2215 if (sc->sc_provider != NULL) { 2216 if (waitidle) 2217 g_waitidlelock(); 2218 g_mirror_sync_start(disk); 2219 g_mirror_update_metadata(disk); 2220 } 2221 break; 2222 case G_MIRROR_DISK_STATE_DISCONNECTED: 2223 /* 2224 * Possible scenarios: 2225 * 1. Device wasn't running yet, but disk disappear. 2226 * 2. Disk was active and disapppear. 2227 * 3. Disk disappear during synchronization process. 2228 */ 2229 if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING) { 2230 /* 2231 * Previous state should be ACTIVE, STALE or 2232 * SYNCHRONIZING. 2233 */ 2234 KASSERT(disk->d_state == G_MIRROR_DISK_STATE_ACTIVE || 2235 disk->d_state == G_MIRROR_DISK_STATE_STALE || 2236 disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING, 2237 ("Wrong disk state (%s, %s).", 2238 g_mirror_get_diskname(disk), 2239 g_mirror_disk_state2str(disk->d_state))); 2240 } else if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING) { 2241 /* Previous state should be NEW. */ 2242 KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW, 2243 ("Wrong disk state (%s, %s).", 2244 g_mirror_get_diskname(disk), 2245 g_mirror_disk_state2str(disk->d_state))); 2246 /* 2247 * Reset bumping syncid if disk disappeared in STARTING 2248 * state. 2249 */ 2250 if (sc->sc_bump_syncid == G_MIRROR_BUMP_ON_FIRST_WRITE) 2251 sc->sc_bump_syncid = 0; 2252#ifdef INVARIANTS 2253 } else { 2254 KASSERT(1 == 0, ("Wrong device state (%s, %s, %s, %s).", 2255 sc->sc_name, 2256 g_mirror_device_state2str(sc->sc_state), 2257 g_mirror_get_diskname(disk), 2258 g_mirror_disk_state2str(disk->d_state))); 2259#endif 2260 } 2261 DISK_STATE_CHANGED(); 2262 G_MIRROR_DEBUG(0, "Device %s: provider %s disconnected.", 2263 sc->sc_name, g_mirror_get_diskname(disk)); 2264 2265 g_mirror_destroy_disk(disk); 2266 break; 2267 case G_MIRROR_DISK_STATE_DESTROY: 2268 { 2269 int error; 2270 2271 error = g_mirror_clear_metadata(disk); 2272 if (error != 0) 2273 return (error); 2274 DISK_STATE_CHANGED(); 2275 G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.", 2276 sc->sc_name, g_mirror_get_diskname(disk)); 2277 2278 g_mirror_destroy_disk(disk); 2279 sc->sc_ndisks--; 2280 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 2281 if (waitidle) 2282 g_waitidlelock(); 2283 g_mirror_update_metadata(disk); 2284 } 2285 break; 2286 } 2287 default: 2288 KASSERT(1 == 0, ("Unknown state (%u).", state)); 2289 break; 2290 } 2291 return (0); 2292} 2293#undef DISK_STATE_CHANGED 2294 2295static int 2296g_mirror_read_metadata(struct g_consumer *cp, struct g_mirror_metadata *md) 2297{ 2298 struct g_provider *pp; 2299 u_char *buf; 2300 int error; 2301 2302 g_topology_assert(); 2303 2304 error = g_access(cp, 1, 0, 0); 2305 if (error != 0) 2306 return (error); 2307 pp = cp->provider; 2308 g_topology_unlock(); 2309 /* Metadata are stored on last sector. */ 2310 buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize, 2311 &error); 2312 g_topology_lock(); 2313 if (buf == NULL) { 2314 g_access(cp, -1, 0, 0); 2315 return (error); 2316 } 2317 if (error != 0) { 2318 g_access(cp, -1, 0, 0); 2319 g_free(buf); 2320 return (error); 2321 } 2322 error = g_access(cp, -1, 0, 0); 2323 KASSERT(error == 0, ("Cannot decrease access count for %s.", pp->name)); 2324 2325 /* Decode metadata. */ 2326 error = mirror_metadata_decode(buf, md); 2327 g_free(buf); 2328 if (strcmp(md->md_magic, G_MIRROR_MAGIC) != 0) 2329 return (EINVAL); 2330 if (error != 0) { 2331 G_MIRROR_DEBUG(1, "MD5 metadata hash mismatch for provider %s.", 2332 cp->provider->name); 2333 return (error); 2334 } 2335 2336 return (0); 2337} 2338 2339static int 2340g_mirror_check_metadata(struct g_mirror_softc *sc, struct g_provider *pp, 2341 struct g_mirror_metadata *md) 2342{ 2343 2344 if (g_mirror_id2disk(sc, md->md_did) != NULL) { 2345 G_MIRROR_DEBUG(1, "Disk %s (id=%u) already exists, skipping.", 2346 pp->name, md->md_did); 2347 return (EEXIST); 2348 } 2349 if (md->md_all != sc->sc_ndisks) { 2350 G_MIRROR_DEBUG(1, 2351 "Invalid '%s' field on disk %s (device %s), skipping.", 2352 "md_all", pp->name, sc->sc_name); 2353 return (EINVAL); 2354 } 2355 if (md->md_slice != sc->sc_slice) { 2356 G_MIRROR_DEBUG(1, 2357 "Invalid '%s' field on disk %s (device %s), skipping.", 2358 "md_slice", pp->name, sc->sc_name); 2359 return (EINVAL); 2360 } 2361 if (md->md_balance != sc->sc_balance) { 2362 G_MIRROR_DEBUG(1, 2363 "Invalid '%s' field on disk %s (device %s), skipping.", 2364 "md_balance", pp->name, sc->sc_name); 2365 return (EINVAL); 2366 } 2367 if (md->md_mediasize != sc->sc_mediasize) { 2368 G_MIRROR_DEBUG(1, 2369 "Invalid '%s' field on disk %s (device %s), skipping.", 2370 "md_mediasize", pp->name, sc->sc_name); 2371 return (EINVAL); 2372 } 2373 if (sc->sc_mediasize > pp->mediasize) { 2374 G_MIRROR_DEBUG(1, 2375 "Invalid size of disk %s (device %s), skipping.", pp->name, 2376 sc->sc_name); 2377 return (EINVAL); 2378 } 2379 if (md->md_sectorsize != sc->sc_sectorsize) { 2380 G_MIRROR_DEBUG(1, 2381 "Invalid '%s' field on disk %s (device %s), skipping.", 2382 "md_sectorsize", pp->name, sc->sc_name); 2383 return (EINVAL); 2384 } 2385 if ((sc->sc_sectorsize % pp->sectorsize) != 0) { 2386 G_MIRROR_DEBUG(1, 2387 "Invalid sector size of disk %s (device %s), skipping.", 2388 pp->name, sc->sc_name); 2389 return (EINVAL); 2390 } 2391 if ((md->md_mflags & ~G_MIRROR_DEVICE_FLAG_MASK) != 0) { 2392 G_MIRROR_DEBUG(1, 2393 "Invalid device flags on disk %s (device %s), skipping.", 2394 pp->name, sc->sc_name); 2395 return (EINVAL); 2396 } 2397 if ((md->md_dflags & ~G_MIRROR_DISK_FLAG_MASK) != 0) { 2398 G_MIRROR_DEBUG(1, 2399 "Invalid disk flags on disk %s (device %s), skipping.", 2400 pp->name, sc->sc_name); 2401 return (EINVAL); 2402 } 2403 return (0); 2404} 2405 2406static int 2407g_mirror_add_disk(struct g_mirror_softc *sc, struct g_provider *pp, 2408 struct g_mirror_metadata *md) 2409{ 2410 struct g_mirror_disk *disk; 2411 int error; 2412 2413 g_topology_assert(); 2414 G_MIRROR_DEBUG(2, "Adding disk %s.", pp->name); 2415 2416 error = g_mirror_check_metadata(sc, pp, md); 2417 if (error != 0) 2418 return (error); 2419 disk = g_mirror_init_disk(sc, pp, md, &error); 2420 if (disk == NULL) 2421 return (error); 2422 error = g_mirror_event_send(disk, G_MIRROR_DISK_STATE_NEW, 2423 G_MIRROR_EVENT_WAIT); 2424 return (error); 2425} 2426 2427static int 2428g_mirror_access(struct g_provider *pp, int acr, int acw, int ace) 2429{ 2430 struct g_mirror_softc *sc; 2431 struct g_mirror_disk *disk; 2432 int dcr, dcw, dce, err, error; 2433 2434 g_topology_assert(); 2435 G_MIRROR_DEBUG(2, "Access request for %s: r%dw%de%d.", pp->name, acr, 2436 acw, ace); 2437 2438 dcr = pp->acr + acr; 2439 dcw = pp->acw + acw; 2440 dce = pp->ace + ace; 2441 2442 /* On first open, grab an extra "exclusive" bit */ 2443 if (pp->acr == 0 && pp->acw == 0 && pp->ace == 0) 2444 ace++; 2445 /* ... and let go of it on last close */ 2446 if (dcr == 0 && dcw == 0 && dce == 0) 2447 ace--; 2448 2449 sc = pp->geom->softc; 2450 if (sc == NULL || LIST_EMPTY(&sc->sc_disks) || 2451 (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) { 2452 if (acr <= 0 && acw <= 0 && ace <= 0) 2453 return (0); 2454 else 2455 return (ENXIO); 2456 } 2457 error = ENXIO; 2458 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 2459 if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) 2460 continue; 2461 err = g_access(disk->d_consumer, acr, acw, ace); 2462 G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", 2463 g_mirror_get_diskname(disk), acr, acw, ace, err); 2464 if (err == 0) { 2465 /* 2466 * Mark disk as dirty on open and unmark on close. 2467 */ 2468 if (pp->acw == 0 && dcw > 0) { 2469 G_MIRROR_DEBUG(1, 2470 "Disk %s (device %s) marked as dirty.", 2471 g_mirror_get_diskname(disk), sc->sc_name); 2472 disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY; 2473 g_mirror_update_metadata(disk); 2474 } else if (pp->acw > 0 && dcw == 0) { 2475 G_MIRROR_DEBUG(1, 2476 "Disk %s (device %s) marked as clean.", 2477 g_mirror_get_diskname(disk), sc->sc_name); 2478 disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY; 2479 g_mirror_update_metadata(disk); 2480 } 2481 error = 0; 2482 } else { 2483 sc->sc_bump_syncid = G_MIRROR_BUMP_ON_FIRST_WRITE; 2484 g_mirror_event_send(disk, 2485 G_MIRROR_DISK_STATE_DISCONNECTED, 2486 G_MIRROR_EVENT_DONTWAIT); 2487 } 2488 } 2489 /* 2490 * Be sure to return 0 for negativate access requests. 2491 * In case of some HW problems, it is possible that we don't have 2492 * any active disk here, so loop above will be no-op and error will 2493 * be ENXIO. 2494 */ 2495 if (error != 0 && acr <= 0 && acw <= 0 && ace <= 0) 2496 error = 0; 2497 return (error); 2498} 2499 2500static struct g_geom * 2501g_mirror_create(struct g_class *mp, const struct g_mirror_metadata *md) 2502{ 2503 struct g_mirror_softc *sc; 2504 struct g_geom *gp; 2505 int error, timeout; 2506 2507 g_topology_assert(); 2508 G_MIRROR_DEBUG(1, "Creating device %s (id=%u).", md->md_name, 2509 md->md_mid); 2510 2511 /* One disk is minimum. */ 2512 if (md->md_all < 1) 2513 return (NULL); 2514 /* 2515 * Action geom. 2516 */ 2517 gp = g_new_geomf(mp, "%s", md->md_name); 2518 sc = malloc(sizeof(*sc), M_MIRROR, M_WAITOK | M_ZERO); 2519 gp->start = g_mirror_start; 2520 gp->spoiled = g_mirror_spoiled; 2521 gp->orphan = g_mirror_orphan; 2522 gp->access = g_mirror_access; 2523 gp->dumpconf = g_mirror_dumpconf; 2524 2525 sc->sc_id = md->md_mid; 2526 sc->sc_slice = md->md_slice; 2527 sc->sc_balance = md->md_balance; 2528 sc->sc_mediasize = md->md_mediasize; 2529 sc->sc_sectorsize = md->md_sectorsize; 2530 sc->sc_ndisks = md->md_all; 2531 sc->sc_flags = md->md_mflags; 2532 sc->sc_bump_syncid = 0; 2533 sc->sc_idle = 0; 2534 bioq_init(&sc->sc_queue); 2535 mtx_init(&sc->sc_queue_mtx, "gmirror:queue", NULL, MTX_DEF); 2536 LIST_INIT(&sc->sc_disks); 2537 TAILQ_INIT(&sc->sc_events); 2538 mtx_init(&sc->sc_events_mtx, "gmirror:events", NULL, MTX_DEF); 2539 callout_init(&sc->sc_callout, CALLOUT_MPSAFE); 2540 sc->sc_state = G_MIRROR_DEVICE_STATE_STARTING; 2541 gp->softc = sc; 2542 sc->sc_geom = gp; 2543 sc->sc_provider = NULL; 2544 /* 2545 * Synchronization geom. 2546 */ 2547 gp = g_new_geomf(mp, "%s.sync", md->md_name); 2548 gp->softc = sc; 2549 gp->orphan = g_mirror_orphan; 2550 sc->sc_sync.ds_geom = gp; 2551 sc->sc_sync.ds_ndisks = 0; 2552 error = kthread_create(g_mirror_worker, sc, &sc->sc_worker, 0, 0, 2553 "g_mirror %s", md->md_name); 2554 if (error != 0) { 2555 G_MIRROR_DEBUG(1, "Cannot create kernel thread for %s.", 2556 sc->sc_name); 2557 g_destroy_geom(sc->sc_sync.ds_geom); 2558 mtx_destroy(&sc->sc_events_mtx); 2559 mtx_destroy(&sc->sc_queue_mtx); 2560 g_destroy_geom(sc->sc_geom); 2561 free(sc, M_MIRROR); 2562 return (NULL); 2563 } 2564 2565 G_MIRROR_DEBUG(0, "Device %s created (id=%u).", sc->sc_name, sc->sc_id); 2566 2567 /* 2568 * Run timeout. 2569 */ 2570 timeout = g_mirror_timeout * hz; 2571 callout_reset(&sc->sc_callout, timeout, g_mirror_go, sc); 2572 return (sc->sc_geom); 2573} 2574 2575int 2576g_mirror_destroy(struct g_mirror_softc *sc, boolean_t force) 2577{ 2578 struct g_provider *pp; 2579 2580 g_topology_assert(); 2581 2582 if (sc == NULL) 2583 return (ENXIO); 2584 pp = sc->sc_provider; 2585 if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) { 2586 if (force) { 2587 G_MIRROR_DEBUG(0, "Device %s is still open, so it " 2588 "can't be definitely removed.", pp->name); 2589 } else { 2590 G_MIRROR_DEBUG(1, 2591 "Device %s is still open (r%dw%de%d).", pp->name, 2592 pp->acr, pp->acw, pp->ace); 2593 return (EBUSY); 2594 } 2595 } 2596 2597 sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY; 2598 sc->sc_flags |= G_MIRROR_DEVICE_FLAG_WAIT; 2599 g_topology_unlock(); 2600 G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc); 2601 mtx_lock(&sc->sc_queue_mtx); 2602 wakeup(sc); 2603 mtx_unlock(&sc->sc_queue_mtx); 2604 G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, &sc->sc_worker); 2605 while (sc->sc_worker != NULL) 2606 tsleep(&sc->sc_worker, PRIBIO, "m:destroy", hz / 5); 2607 G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, &sc->sc_worker); 2608 g_topology_lock(); 2609 g_mirror_destroy_device(sc); 2610 free(sc, M_MIRROR); 2611 return (0); 2612} 2613 2614static void 2615g_mirror_taste_orphan(struct g_consumer *cp) 2616{ 2617 2618 KASSERT(1 == 0, ("%s called while tasting %s.", __func__, 2619 cp->provider->name)); 2620} 2621 2622static struct g_geom * 2623g_mirror_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) 2624{ 2625 struct g_mirror_metadata md; 2626 struct g_mirror_softc *sc; 2627 struct g_consumer *cp; 2628 struct g_geom *gp; 2629 int error; 2630 2631 g_topology_assert(); 2632 g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name); 2633 G_MIRROR_DEBUG(2, "Tasting %s.", pp->name); 2634 2635 gp = g_new_geomf(mp, "mirror:taste"); 2636 /* 2637 * This orphan function should be never called. 2638 */ 2639 gp->orphan = g_mirror_taste_orphan; 2640 cp = g_new_consumer(gp); 2641 g_attach(cp, pp); 2642 error = g_mirror_read_metadata(cp, &md); 2643 g_detach(cp); 2644 g_destroy_consumer(cp); 2645 g_destroy_geom(gp); 2646 if (error != 0) 2647 return (NULL); 2648 gp = NULL; 2649 2650 if (md.md_version > G_MIRROR_VERSION) { 2651 printf("geom_mirror.ko module is too old to handle %s.\n", 2652 pp->name); 2653 return (NULL); 2654 } 2655 if (md.md_provider[0] != '\0' && strcmp(md.md_provider, pp->name) != 0) 2656 return (NULL); 2657 if ((md.md_dflags & G_MIRROR_DISK_FLAG_INACTIVE) != 0) { 2658 G_MIRROR_DEBUG(0, 2659 "Device %s: provider %s marked as inactive, skipping.", 2660 md.md_name, pp->name); 2661 return (NULL); 2662 } 2663 if (g_mirror_debug >= 2) 2664 mirror_metadata_dump(&md); 2665 2666 /* 2667 * Let's check if device already exists. 2668 */ 2669 sc = NULL; 2670 LIST_FOREACH(gp, &mp->geom, geom) { 2671 sc = gp->softc; 2672 if (sc == NULL) 2673 continue; 2674 if (sc->sc_sync.ds_geom == gp) 2675 continue; 2676 if (strcmp(md.md_name, sc->sc_name) != 0) 2677 continue; 2678 if (md.md_mid != sc->sc_id) { 2679 G_MIRROR_DEBUG(0, "Device %s already configured.", 2680 sc->sc_name); 2681 return (NULL); 2682 } 2683 break; 2684 } 2685 if (gp == NULL) { 2686 gp = g_mirror_create(mp, &md); 2687 if (gp == NULL) { 2688 G_MIRROR_DEBUG(0, "Cannot create device %s.", 2689 md.md_name); 2690 return (NULL); 2691 } 2692 sc = gp->softc; 2693 } 2694 G_MIRROR_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name); 2695 error = g_mirror_add_disk(sc, pp, &md); 2696 if (error != 0) { 2697 G_MIRROR_DEBUG(0, "Cannot add disk %s to %s (error=%d).", 2698 pp->name, gp->name, error); 2699 if (LIST_EMPTY(&sc->sc_disks)) 2700 g_mirror_destroy(sc, 1); 2701 return (NULL); 2702 } 2703 return (gp); 2704} 2705 2706static int 2707g_mirror_destroy_geom(struct gctl_req *req __unused, 2708 struct g_class *mp __unused, struct g_geom *gp) 2709{ 2710 2711 return (g_mirror_destroy(gp->softc, 0)); 2712} 2713 2714static void 2715g_mirror_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, 2716 struct g_consumer *cp, struct g_provider *pp) 2717{ 2718 struct g_mirror_softc *sc; 2719 2720 g_topology_assert(); 2721 2722 sc = gp->softc; 2723 if (sc == NULL) 2724 return; 2725 /* Skip synchronization geom. */ 2726 if (gp == sc->sc_sync.ds_geom) 2727 return; 2728 if (pp != NULL) { 2729 /* Nothing here. */ 2730 } else if (cp != NULL) { 2731 struct g_mirror_disk *disk; 2732 2733 disk = cp->private; 2734 if (disk == NULL) 2735 return; 2736 sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)disk->d_id); 2737 if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) { 2738 sbuf_printf(sb, "%s<Synchronized>", indent); 2739 if (disk->d_sync.ds_offset_done == 0) 2740 sbuf_printf(sb, "0%%"); 2741 else { 2742 sbuf_printf(sb, "%u%%", 2743 (u_int)((disk->d_sync.ds_offset_done * 100) / 2744 sc->sc_provider->mediasize)); 2745 } 2746 sbuf_printf(sb, "</Synchronized>\n"); 2747 } 2748 sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, 2749 disk->d_sync.ds_syncid); 2750 sbuf_printf(sb, "%s<Flags>", indent); 2751 if (disk->d_flags == 0) 2752 sbuf_printf(sb, "NONE"); 2753 else { 2754 int first = 1; 2755 2756#define ADD_FLAG(flag, name) do { \ 2757 if ((disk->d_flags & (flag)) != 0) { \ 2758 if (!first) \ 2759 sbuf_printf(sb, ", "); \ 2760 else \ 2761 first = 0; \ 2762 sbuf_printf(sb, name); \ 2763 } \ 2764} while (0) 2765 ADD_FLAG(G_MIRROR_DISK_FLAG_DIRTY, "DIRTY"); 2766 ADD_FLAG(G_MIRROR_DISK_FLAG_HARDCODED, "HARDCODED"); 2767 ADD_FLAG(G_MIRROR_DISK_FLAG_INACTIVE, "INACTIVE"); 2768 ADD_FLAG(G_MIRROR_DISK_FLAG_SYNCHRONIZING, 2769 "SYNCHRONIZING"); 2770 ADD_FLAG(G_MIRROR_DISK_FLAG_FORCE_SYNC, "FORCE_SYNC"); 2771#undef ADD_FLAG 2772 } 2773 sbuf_printf(sb, "</Flags>\n"); 2774 sbuf_printf(sb, "%s<Priority>%u</Priority>\n", indent, 2775 disk->d_priority); 2776 sbuf_printf(sb, "%s<State>%s</State>\n", indent, 2777 g_mirror_disk_state2str(disk->d_state)); 2778 } else { 2779 sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id); 2780 sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, sc->sc_syncid); 2781 sbuf_printf(sb, "%s<Flags>", indent); 2782 if (sc->sc_flags == 0) 2783 sbuf_printf(sb, "NONE"); 2784 else { 2785 int first = 1; 2786 2787#define ADD_FLAG(flag, name) do { \ 2788 if ((sc->sc_flags & (flag)) != 0) { \ 2789 if (!first) \ 2790 sbuf_printf(sb, ", "); \ 2791 else \ 2792 first = 0; \ 2793 sbuf_printf(sb, name); \ 2794 } \ 2795} while (0) 2796 ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOAUTOSYNC, "NOAUTOSYNC"); 2797#undef ADD_FLAG 2798 } 2799 sbuf_printf(sb, "</Flags>\n"); 2800 sbuf_printf(sb, "%s<Slice>%u</Slice>\n", indent, 2801 (u_int)sc->sc_slice); 2802 sbuf_printf(sb, "%s<Balance>%s</Balance>\n", indent, 2803 balance_name(sc->sc_balance)); 2804 sbuf_printf(sb, "%s<Components>%u</Components>\n", indent, 2805 sc->sc_ndisks); 2806 sbuf_printf(sb, "%s<State>", indent); 2807 if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING) 2808 sbuf_printf(sb, "%s", "STARTING"); 2809 else if (sc->sc_ndisks == 2810 g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE)) 2811 sbuf_printf(sb, "%s", "COMPLETE"); 2812 else 2813 sbuf_printf(sb, "%s", "DEGRADED"); 2814 sbuf_printf(sb, "</State>\n"); 2815 } 2816} 2817 2818static void 2819g_mirror_shutdown(void *arg, int howto) 2820{ 2821 struct g_class *mp; 2822 struct g_geom *gp, *gp2; 2823 2824 mp = arg; 2825 DROP_GIANT(); 2826 g_topology_lock(); 2827 LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) { 2828 if (gp->softc == NULL) 2829 continue; 2830 g_mirror_destroy(gp->softc, 1); 2831 } 2832 g_topology_unlock(); 2833 PICKUP_GIANT(); 2834#if 0 2835 tsleep(&gp, PRIBIO, "m:shutdown", hz * 20); 2836#endif 2837} 2838 2839static void 2840g_mirror_init(struct g_class *mp) 2841{ 2842 2843 g_mirror_ehtag = EVENTHANDLER_REGISTER(shutdown_post_sync, 2844 g_mirror_shutdown, mp, SHUTDOWN_PRI_FIRST); 2845 if (g_mirror_ehtag == NULL) 2846 G_MIRROR_DEBUG(0, "Warning! Cannot register shutdown event."); 2847} 2848 2849static void 2850g_mirror_fini(struct g_class *mp) 2851{ 2852 2853 if (g_mirror_ehtag == NULL) 2854 return; 2855 EVENTHANDLER_DEREGISTER(shutdown_post_sync, g_mirror_ehtag); 2856} 2857 2858static int 2859g_mirror_can_go(void) 2860{ 2861 struct g_mirror_softc *sc; 2862 struct g_geom *gp; 2863 struct g_provider *pp; 2864 int can_go; 2865 2866 DROP_GIANT(); 2867 can_go = 1; 2868 g_topology_lock(); 2869 LIST_FOREACH(gp, &g_mirror_class.geom, geom) { 2870 sc = gp->softc; 2871 if (sc == NULL) { 2872 can_go = 0; 2873 break; 2874 } 2875 pp = sc->sc_provider; 2876 if (pp == NULL || pp->error != 0) { 2877 can_go = 0; 2878 break; 2879 } 2880 } 2881 g_topology_unlock(); 2882 PICKUP_GIANT(); 2883 return (can_go); 2884} 2885 2886static void 2887g_mirror_rootwait(void) 2888{ 2889 2890 /* 2891 * HACK: Wait for GEOM, because g_mirror_rootwait() can be called, 2892 * HACK: before we get providers for tasting. 2893 */ 2894 tsleep(&g_mirror_class, PRIBIO, "mroot", hz * 3); 2895 /* 2896 * Wait for mirrors in degraded state. 2897 */ 2898 for (;;) { 2899 if (g_mirror_can_go()) 2900 break; 2901 tsleep(&g_mirror_class, PRIBIO, "mroot", hz); 2902 } 2903} 2904 2905SYSINIT(g_mirror_root, SI_SUB_RAID, SI_ORDER_FIRST, g_mirror_rootwait, NULL) 2906 2907DECLARE_GEOM_CLASS(g_mirror_class, g_mirror); 2908