g_mirror.c revision 155545
161837Salfred/*- 261837Salfred * Copyright (c) 2004-2005 Pawel Jakub Dawidek <pjd@FreeBSD.org> 361837Salfred * All rights reserved. 461837Salfred * 561837Salfred * Redistribution and use in source and binary forms, with or without 661837Salfred * modification, are permitted provided that the following conditions 761837Salfred * are met: 861837Salfred * 1. Redistributions of source code must retain the above copyright 961837Salfred * notice, this list of conditions and the following disclaimer. 1061837Salfred * 2. Redistributions in binary form must reproduce the above copyright 1161837Salfred * notice, this list of conditions and the following disclaimer in the 1261837Salfred * documentation and/or other materials provided with the distribution. 1361837Salfred * 1461837Salfred * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 1561837Salfred * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1661837Salfred * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 1761837Salfred * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 1861837Salfred * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 1961837Salfred * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2061837Salfred * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2161837Salfred * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2261837Salfred * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2361837Salfred * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 2461837Salfred * SUCH DAMAGE. 2561837Salfred */ 2661837Salfred 2761837Salfred#include <sys/cdefs.h> 2861837Salfred__FBSDID("$FreeBSD: head/sys/geom/mirror/g_mirror.c 155545 2006-02-11 17:39:29Z pjd $"); 2961837Salfred 3061837Salfred#include <sys/param.h> 3161837Salfred#include <sys/systm.h> 3261837Salfred#include <sys/kernel.h> 3361837Salfred#include <sys/module.h> 3461837Salfred#include <sys/limits.h> 3561837Salfred#include <sys/lock.h> 3661837Salfred#include <sys/mutex.h> 3761837Salfred#include <sys/bio.h> 3861837Salfred#include <sys/sysctl.h> 3961837Salfred#include <sys/malloc.h> 4061837Salfred#include <sys/eventhandler.h> 4161837Salfred#include <vm/uma.h> 4261837Salfred#include <geom/geom.h> 4361837Salfred#include <sys/proc.h> 4461837Salfred#include <sys/kthread.h> 4561837Salfred#include <sys/sched.h> 4661837Salfred#include <geom/mirror/g_mirror.h> 4761837Salfred 4861837Salfred 4961837Salfredstatic MALLOC_DEFINE(M_MIRROR, "mirror_data", "GEOM_MIRROR Data"); 5061837Salfred 5161837SalfredSYSCTL_DECL(_kern_geom); 5261837SalfredSYSCTL_NODE(_kern_geom, OID_AUTO, mirror, CTLFLAG_RW, 0, "GEOM_MIRROR stuff"); 5361837Salfredu_int g_mirror_debug = 0; 5461837SalfredTUNABLE_INT("kern.geom.mirror.debug", &g_mirror_debug); 5561837SalfredSYSCTL_UINT(_kern_geom_mirror, OID_AUTO, debug, CTLFLAG_RW, &g_mirror_debug, 0, 5661837Salfred "Debug level"); 5761837Salfredstatic u_int g_mirror_timeout = 4; 5861837SalfredTUNABLE_INT("kern.geom.mirror.timeout", &g_mirror_timeout); 5961837SalfredSYSCTL_UINT(_kern_geom_mirror, OID_AUTO, timeout, CTLFLAG_RW, &g_mirror_timeout, 6061837Salfred 0, "Time to wait on all mirror components"); 6161837Salfredstatic u_int g_mirror_idletime = 5; 6261837SalfredTUNABLE_INT("kern.geom.mirror.idletime", &g_mirror_idletime); 6361837SalfredSYSCTL_UINT(_kern_geom_mirror, OID_AUTO, idletime, CTLFLAG_RW, 6461837Salfred &g_mirror_idletime, 0, "Mark components as clean when idling"); 6561837Salfredstatic u_int g_mirror_reqs_per_sync = 5; 6661837SalfredSYSCTL_UINT(_kern_geom_mirror, OID_AUTO, reqs_per_sync, CTLFLAG_RW, 6761837Salfred &g_mirror_reqs_per_sync, 0, 6861837Salfred "Number of regular I/O requests per synchronization request"); 6961837Salfredstatic u_int g_mirror_syncs_per_sec = 1000; 7061837SalfredSYSCTL_UINT(_kern_geom_mirror, OID_AUTO, syncs_per_sec, CTLFLAG_RW, 7161837Salfred &g_mirror_syncs_per_sec, 0, 7261837Salfred "Number of synchronizations requests per second"); 7361837Salfredstatic u_int g_mirror_disconnect_on_failure = 1; 7461837SalfredTUNABLE_INT("kern.geom.mirror.disconnect_on_failure", 7561837Salfred &g_mirror_disconnect_on_failure); 7661837SalfredSYSCTL_UINT(_kern_geom_mirror, OID_AUTO, disconnect_on_failure, CTLFLAG_RW, 7761837Salfred &g_mirror_disconnect_on_failure, 0, "Disconnect component on I/O failure."); 7861837Salfred 7961837Salfred#define MSLEEP(ident, mtx, priority, wmesg, timeout) do { \ 8061837Salfred G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, (ident)); \ 8161837Salfred msleep((ident), (mtx), (priority), (wmesg), (timeout)); \ 8261837Salfred G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, (ident)); \ 8361837Salfred} while (0) 8461837Salfred 8561837Salfredstatic eventhandler_tag g_mirror_ehtag = NULL; 8661837Salfred 8761837Salfredstatic int g_mirror_destroy_geom(struct gctl_req *req, struct g_class *mp, 8861837Salfred struct g_geom *gp); 8961837Salfredstatic g_taste_t g_mirror_taste; 9061837Salfredstatic void g_mirror_init(struct g_class *mp); 9161837Salfredstatic void g_mirror_fini(struct g_class *mp); 9261837Salfred 9361837Salfredstruct g_class g_mirror_class = { 9461837Salfred .name = G_MIRROR_CLASS_NAME, 9561837Salfred .version = G_VERSION, 9661837Salfred .ctlreq = g_mirror_config, 9761837Salfred .taste = g_mirror_taste, 9861837Salfred .destroy_geom = g_mirror_destroy_geom, 9961837Salfred .init = g_mirror_init, 10061837Salfred .fini = g_mirror_fini 10161837Salfred}; 10261837Salfred 10361837Salfred 10461837Salfredstatic void g_mirror_destroy_provider(struct g_mirror_softc *sc); 10561837Salfredstatic int g_mirror_update_disk(struct g_mirror_disk *disk, u_int state); 10661837Salfredstatic void g_mirror_update_device(struct g_mirror_softc *sc, boolean_t force); 10761837Salfredstatic void g_mirror_dumpconf(struct sbuf *sb, const char *indent, 10861837Salfred struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp); 10961837Salfredstatic void g_mirror_sync_stop(struct g_mirror_disk *disk, int type); 11061837Salfred 11161837Salfred 11261837Salfredstatic const char * 11361837Salfredg_mirror_disk_state2str(int state) 11461837Salfred{ 11561837Salfred 11661837Salfred switch (state) { 11761837Salfred case G_MIRROR_DISK_STATE_NONE: 11861837Salfred return ("NONE"); 11961837Salfred case G_MIRROR_DISK_STATE_NEW: 12061837Salfred return ("NEW"); 12161837Salfred case G_MIRROR_DISK_STATE_ACTIVE: 12261837Salfred return ("ACTIVE"); 12361837Salfred case G_MIRROR_DISK_STATE_STALE: 12461837Salfred return ("STALE"); 12561837Salfred case G_MIRROR_DISK_STATE_SYNCHRONIZING: 12661837Salfred return ("SYNCHRONIZING"); 12761837Salfred case G_MIRROR_DISK_STATE_DISCONNECTED: 12861837Salfred return ("DISCONNECTED"); 12961837Salfred case G_MIRROR_DISK_STATE_DESTROY: 13061837Salfred return ("DESTROY"); 13161837Salfred default: 13261837Salfred return ("INVALID"); 13361837Salfred } 134} 135 136static const char * 137g_mirror_device_state2str(int state) 138{ 139 140 switch (state) { 141 case G_MIRROR_DEVICE_STATE_STARTING: 142 return ("STARTING"); 143 case G_MIRROR_DEVICE_STATE_RUNNING: 144 return ("RUNNING"); 145 default: 146 return ("INVALID"); 147 } 148} 149 150static const char * 151g_mirror_get_diskname(struct g_mirror_disk *disk) 152{ 153 154 if (disk->d_consumer == NULL || disk->d_consumer->provider == NULL) 155 return ("[unknown]"); 156 return (disk->d_name); 157} 158 159/* 160 * --- Events handling functions --- 161 * Events in geom_mirror are used to maintain disks and device status 162 * from one thread to simplify locking. 163 */ 164static void 165g_mirror_event_free(struct g_mirror_event *ep) 166{ 167 168 free(ep, M_MIRROR); 169} 170 171int 172g_mirror_event_send(void *arg, int state, int flags) 173{ 174 struct g_mirror_softc *sc; 175 struct g_mirror_disk *disk; 176 struct g_mirror_event *ep; 177 int error; 178 179 ep = malloc(sizeof(*ep), M_MIRROR, M_WAITOK); 180 G_MIRROR_DEBUG(4, "%s: Sending event %p.", __func__, ep); 181 if ((flags & G_MIRROR_EVENT_DEVICE) != 0) { 182 disk = NULL; 183 sc = arg; 184 } else { 185 disk = arg; 186 sc = disk->d_softc; 187 } 188 ep->e_disk = disk; 189 ep->e_state = state; 190 ep->e_flags = flags; 191 ep->e_error = 0; 192 mtx_lock(&sc->sc_events_mtx); 193 TAILQ_INSERT_TAIL(&sc->sc_events, ep, e_next); 194 mtx_unlock(&sc->sc_events_mtx); 195 G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc); 196 mtx_lock(&sc->sc_queue_mtx); 197 wakeup(sc); 198 mtx_unlock(&sc->sc_queue_mtx); 199 if ((flags & G_MIRROR_EVENT_DONTWAIT) != 0) 200 return (0); 201 g_topology_assert(); 202 G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, ep); 203 g_topology_unlock(); 204 while ((ep->e_flags & G_MIRROR_EVENT_DONE) == 0) { 205 mtx_lock(&sc->sc_events_mtx); 206 MSLEEP(ep, &sc->sc_events_mtx, PRIBIO | PDROP, "m:event", 207 hz * 5); 208 } 209 /* Don't even try to use 'sc' here, because it could be already dead. */ 210 g_topology_lock(); 211 error = ep->e_error; 212 g_mirror_event_free(ep); 213 return (error); 214} 215 216static struct g_mirror_event * 217g_mirror_event_get(struct g_mirror_softc *sc) 218{ 219 struct g_mirror_event *ep; 220 221 mtx_lock(&sc->sc_events_mtx); 222 ep = TAILQ_FIRST(&sc->sc_events); 223 mtx_unlock(&sc->sc_events_mtx); 224 return (ep); 225} 226 227static void 228g_mirror_event_remove(struct g_mirror_softc *sc, struct g_mirror_event *ep) 229{ 230 231 mtx_lock(&sc->sc_events_mtx); 232 TAILQ_REMOVE(&sc->sc_events, ep, e_next); 233 mtx_unlock(&sc->sc_events_mtx); 234} 235 236static void 237g_mirror_event_cancel(struct g_mirror_disk *disk) 238{ 239 struct g_mirror_softc *sc; 240 struct g_mirror_event *ep, *tmpep; 241 242 g_topology_assert(); 243 244 sc = disk->d_softc; 245 mtx_lock(&sc->sc_events_mtx); 246 TAILQ_FOREACH_SAFE(ep, &sc->sc_events, e_next, tmpep) { 247 if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0) 248 continue; 249 if (ep->e_disk != disk) 250 continue; 251 TAILQ_REMOVE(&sc->sc_events, ep, e_next); 252 if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0) 253 g_mirror_event_free(ep); 254 else { 255 ep->e_error = ECANCELED; 256 wakeup(ep); 257 } 258 } 259 mtx_unlock(&sc->sc_events_mtx); 260} 261 262/* 263 * Return the number of disks in given state. 264 * If state is equal to -1, count all connected disks. 265 */ 266u_int 267g_mirror_ndisks(struct g_mirror_softc *sc, int state) 268{ 269 struct g_mirror_disk *disk; 270 u_int n = 0; 271 272 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 273 if (state == -1 || disk->d_state == state) 274 n++; 275 } 276 return (n); 277} 278 279/* 280 * Find a disk in mirror by its disk ID. 281 */ 282static struct g_mirror_disk * 283g_mirror_id2disk(struct g_mirror_softc *sc, uint32_t id) 284{ 285 struct g_mirror_disk *disk; 286 287 g_topology_assert(); 288 289 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 290 if (disk->d_id == id) 291 return (disk); 292 } 293 return (NULL); 294} 295 296static u_int 297g_mirror_nrequests(struct g_mirror_softc *sc, struct g_consumer *cp) 298{ 299 struct bio *bp; 300 u_int nreqs = 0; 301 302 mtx_lock(&sc->sc_queue_mtx); 303 TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) { 304 if (bp->bio_from == cp) 305 nreqs++; 306 } 307 mtx_unlock(&sc->sc_queue_mtx); 308 return (nreqs); 309} 310 311static int 312g_mirror_is_busy(struct g_mirror_softc *sc, struct g_consumer *cp) 313{ 314 315 if (cp->index > 0) { 316 G_MIRROR_DEBUG(2, 317 "I/O requests for %s exist, can't destroy it now.", 318 cp->provider->name); 319 return (1); 320 } 321 if (g_mirror_nrequests(sc, cp) > 0) { 322 G_MIRROR_DEBUG(2, 323 "I/O requests for %s in queue, can't destroy it now.", 324 cp->provider->name); 325 return (1); 326 } 327 return (0); 328} 329 330static void 331g_mirror_destroy_consumer(void *arg, int flags __unused) 332{ 333 struct g_consumer *cp; 334 335 cp = arg; 336 G_MIRROR_DEBUG(1, "Consumer %s destroyed.", cp->provider->name); 337 g_detach(cp); 338 g_destroy_consumer(cp); 339} 340 341static void 342g_mirror_kill_consumer(struct g_mirror_softc *sc, struct g_consumer *cp) 343{ 344 struct g_provider *pp; 345 int retaste_wait; 346 347 g_topology_assert(); 348 349 cp->private = NULL; 350 if (g_mirror_is_busy(sc, cp)) 351 return; 352 pp = cp->provider; 353 retaste_wait = 0; 354 if (cp->acw == 1) { 355 if ((pp->geom->flags & G_GEOM_WITHER) == 0) 356 retaste_wait = 1; 357 } 358 G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", pp->name, -cp->acr, 359 -cp->acw, -cp->ace, 0); 360 if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0) 361 g_access(cp, -cp->acr, -cp->acw, -cp->ace); 362 if (retaste_wait) { 363 /* 364 * After retaste event was send (inside g_access()), we can send 365 * event to detach and destroy consumer. 366 * A class, which has consumer to the given provider connected 367 * will not receive retaste event for the provider. 368 * This is the way how I ignore retaste events when I close 369 * consumers opened for write: I detach and destroy consumer 370 * after retaste event is sent. 371 */ 372 g_post_event(g_mirror_destroy_consumer, cp, M_WAITOK, NULL); 373 return; 374 } 375 G_MIRROR_DEBUG(1, "Consumer %s destroyed.", pp->name); 376 g_detach(cp); 377 g_destroy_consumer(cp); 378} 379 380static int 381g_mirror_connect_disk(struct g_mirror_disk *disk, struct g_provider *pp) 382{ 383 struct g_consumer *cp; 384 int error; 385 386 g_topology_assert(); 387 KASSERT(disk->d_consumer == NULL, 388 ("Disk already connected (device %s).", disk->d_softc->sc_name)); 389 390 cp = g_new_consumer(disk->d_softc->sc_geom); 391 error = g_attach(cp, pp); 392 if (error != 0) { 393 g_destroy_consumer(cp); 394 return (error); 395 } 396 error = g_access(cp, 1, 1, 1); 397 if (error != 0) { 398 g_detach(cp); 399 g_destroy_consumer(cp); 400 G_MIRROR_DEBUG(0, "Cannot open consumer %s (error=%d).", 401 pp->name, error); 402 return (error); 403 } 404 disk->d_consumer = cp; 405 disk->d_consumer->private = disk; 406 disk->d_consumer->index = 0; 407 408 G_MIRROR_DEBUG(2, "Disk %s connected.", g_mirror_get_diskname(disk)); 409 return (0); 410} 411 412static void 413g_mirror_disconnect_consumer(struct g_mirror_softc *sc, struct g_consumer *cp) 414{ 415 416 g_topology_assert(); 417 418 if (cp == NULL) 419 return; 420 if (cp->provider != NULL) 421 g_mirror_kill_consumer(sc, cp); 422 else 423 g_destroy_consumer(cp); 424} 425 426/* 427 * Initialize disk. This means allocate memory, create consumer, attach it 428 * to the provider and open access (r1w1e1) to it. 429 */ 430static struct g_mirror_disk * 431g_mirror_init_disk(struct g_mirror_softc *sc, struct g_provider *pp, 432 struct g_mirror_metadata *md, int *errorp) 433{ 434 struct g_mirror_disk *disk; 435 int error; 436 437 disk = malloc(sizeof(*disk), M_MIRROR, M_NOWAIT | M_ZERO); 438 if (disk == NULL) { 439 error = ENOMEM; 440 goto fail; 441 } 442 disk->d_softc = sc; 443 error = g_mirror_connect_disk(disk, pp); 444 if (error != 0) 445 goto fail; 446 disk->d_id = md->md_did; 447 disk->d_state = G_MIRROR_DISK_STATE_NONE; 448 disk->d_priority = md->md_priority; 449 disk->d_delay.sec = 0; 450 disk->d_delay.frac = 0; 451 binuptime(&disk->d_last_used); 452 disk->d_flags = md->md_dflags; 453 if (md->md_provider[0] != '\0') 454 disk->d_flags |= G_MIRROR_DISK_FLAG_HARDCODED; 455 disk->d_sync.ds_consumer = NULL; 456 disk->d_sync.ds_offset = md->md_sync_offset; 457 disk->d_sync.ds_offset_done = md->md_sync_offset; 458 disk->d_sync.ds_resync = -1; 459 disk->d_genid = md->md_genid; 460 disk->d_sync.ds_syncid = md->md_syncid; 461 if (errorp != NULL) 462 *errorp = 0; 463 return (disk); 464fail: 465 if (errorp != NULL) 466 *errorp = error; 467 if (disk != NULL) 468 free(disk, M_MIRROR); 469 return (NULL); 470} 471 472static void 473g_mirror_destroy_disk(struct g_mirror_disk *disk) 474{ 475 struct g_mirror_softc *sc; 476 477 g_topology_assert(); 478 479 LIST_REMOVE(disk, d_next); 480 g_mirror_event_cancel(disk); 481 sc = disk->d_softc; 482 if (sc->sc_hint == disk) 483 sc->sc_hint = NULL; 484 switch (disk->d_state) { 485 case G_MIRROR_DISK_STATE_SYNCHRONIZING: 486 g_mirror_sync_stop(disk, 1); 487 /* FALLTHROUGH */ 488 case G_MIRROR_DISK_STATE_NEW: 489 case G_MIRROR_DISK_STATE_STALE: 490 case G_MIRROR_DISK_STATE_ACTIVE: 491 g_mirror_disconnect_consumer(sc, disk->d_consumer); 492 free(disk, M_MIRROR); 493 break; 494 default: 495 KASSERT(0 == 1, ("Wrong disk state (%s, %s).", 496 g_mirror_get_diskname(disk), 497 g_mirror_disk_state2str(disk->d_state))); 498 } 499} 500 501static void 502g_mirror_destroy_device(struct g_mirror_softc *sc) 503{ 504 struct g_mirror_disk *disk; 505 struct g_mirror_event *ep; 506 struct g_geom *gp; 507 struct g_consumer *cp, *tmpcp; 508 509 g_topology_assert(); 510 511 gp = sc->sc_geom; 512 if (sc->sc_provider != NULL) 513 g_mirror_destroy_provider(sc); 514 for (disk = LIST_FIRST(&sc->sc_disks); disk != NULL; 515 disk = LIST_FIRST(&sc->sc_disks)) { 516 disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY; 517 g_mirror_update_metadata(disk); 518 g_mirror_destroy_disk(disk); 519 } 520 while ((ep = g_mirror_event_get(sc)) != NULL) { 521 g_mirror_event_remove(sc, ep); 522 if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0) 523 g_mirror_event_free(ep); 524 else { 525 ep->e_error = ECANCELED; 526 ep->e_flags |= G_MIRROR_EVENT_DONE; 527 G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, ep); 528 mtx_lock(&sc->sc_events_mtx); 529 wakeup(ep); 530 mtx_unlock(&sc->sc_events_mtx); 531 } 532 } 533 callout_drain(&sc->sc_callout); 534 gp->softc = NULL; 535 536 LIST_FOREACH_SAFE(cp, &sc->sc_sync.ds_geom->consumer, consumer, tmpcp) { 537 g_mirror_disconnect_consumer(sc, cp); 538 } 539 sc->sc_sync.ds_geom->softc = NULL; 540 g_wither_geom(sc->sc_sync.ds_geom, ENXIO); 541 mtx_destroy(&sc->sc_queue_mtx); 542 mtx_destroy(&sc->sc_events_mtx); 543 G_MIRROR_DEBUG(0, "Device %s destroyed.", gp->name); 544 g_wither_geom(gp, ENXIO); 545} 546 547static void 548g_mirror_orphan(struct g_consumer *cp) 549{ 550 struct g_mirror_disk *disk; 551 552 g_topology_assert(); 553 554 disk = cp->private; 555 if (disk == NULL) 556 return; 557 disk->d_softc->sc_bump_id |= G_MIRROR_BUMP_SYNCID; 558 g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED, 559 G_MIRROR_EVENT_DONTWAIT); 560} 561 562/* 563 * Function should return the next active disk on the list. 564 * It is possible that it will be the same disk as given. 565 * If there are no active disks on list, NULL is returned. 566 */ 567static __inline struct g_mirror_disk * 568g_mirror_find_next(struct g_mirror_softc *sc, struct g_mirror_disk *disk) 569{ 570 struct g_mirror_disk *dp; 571 572 for (dp = LIST_NEXT(disk, d_next); dp != disk; 573 dp = LIST_NEXT(dp, d_next)) { 574 if (dp == NULL) 575 dp = LIST_FIRST(&sc->sc_disks); 576 if (dp->d_state == G_MIRROR_DISK_STATE_ACTIVE) 577 break; 578 } 579 if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE) 580 return (NULL); 581 return (dp); 582} 583 584static struct g_mirror_disk * 585g_mirror_get_disk(struct g_mirror_softc *sc) 586{ 587 struct g_mirror_disk *disk; 588 589 if (sc->sc_hint == NULL) { 590 sc->sc_hint = LIST_FIRST(&sc->sc_disks); 591 if (sc->sc_hint == NULL) 592 return (NULL); 593 } 594 disk = sc->sc_hint; 595 if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) { 596 disk = g_mirror_find_next(sc, disk); 597 if (disk == NULL) 598 return (NULL); 599 } 600 sc->sc_hint = g_mirror_find_next(sc, disk); 601 return (disk); 602} 603 604static int 605g_mirror_write_metadata(struct g_mirror_disk *disk, 606 struct g_mirror_metadata *md) 607{ 608 struct g_mirror_softc *sc; 609 struct g_consumer *cp; 610 off_t offset, length; 611 u_char *sector; 612 int error = 0; 613 614 g_topology_assert(); 615 616 sc = disk->d_softc; 617 cp = disk->d_consumer; 618 KASSERT(cp != NULL, ("NULL consumer (%s).", sc->sc_name)); 619 KASSERT(cp->provider != NULL, ("NULL provider (%s).", sc->sc_name)); 620 KASSERT(cp->acr == 1 && cp->acw == 1 && cp->ace == 1, 621 ("Consumer %s closed? (r%dw%de%d).", cp->provider->name, cp->acr, 622 cp->acw, cp->ace)); 623 length = cp->provider->sectorsize; 624 offset = cp->provider->mediasize - length; 625 sector = malloc((size_t)length, M_MIRROR, M_WAITOK | M_ZERO); 626 if (md != NULL) 627 mirror_metadata_encode(md, sector); 628 g_topology_unlock(); 629 error = g_write_data(cp, offset, sector, length); 630 g_topology_lock(); 631 free(sector, M_MIRROR); 632 if (error != 0) { 633 if ((disk->d_flags & G_MIRROR_DISK_FLAG_BROKEN) == 0) { 634 disk->d_flags |= G_MIRROR_DISK_FLAG_BROKEN; 635 G_MIRROR_DEBUG(0, "Cannot write metadata on %s " 636 "(device=%s, error=%d).", 637 g_mirror_get_diskname(disk), sc->sc_name, error); 638 } else { 639 G_MIRROR_DEBUG(1, "Cannot write metadata on %s " 640 "(device=%s, error=%d).", 641 g_mirror_get_diskname(disk), sc->sc_name, error); 642 } 643 if (g_mirror_disconnect_on_failure && 644 g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 1) { 645 sc->sc_bump_id |= G_MIRROR_BUMP_GENID; 646 g_mirror_event_send(disk, 647 G_MIRROR_DISK_STATE_DISCONNECTED, 648 G_MIRROR_EVENT_DONTWAIT); 649 } 650 } 651 return (error); 652} 653 654static int 655g_mirror_clear_metadata(struct g_mirror_disk *disk) 656{ 657 int error; 658 659 g_topology_assert(); 660 error = g_mirror_write_metadata(disk, NULL); 661 if (error == 0) { 662 G_MIRROR_DEBUG(2, "Metadata on %s cleared.", 663 g_mirror_get_diskname(disk)); 664 } else { 665 G_MIRROR_DEBUG(0, 666 "Cannot clear metadata on disk %s (error=%d).", 667 g_mirror_get_diskname(disk), error); 668 } 669 return (error); 670} 671 672void 673g_mirror_fill_metadata(struct g_mirror_softc *sc, struct g_mirror_disk *disk, 674 struct g_mirror_metadata *md) 675{ 676 677 strlcpy(md->md_magic, G_MIRROR_MAGIC, sizeof(md->md_magic)); 678 md->md_version = G_MIRROR_VERSION; 679 strlcpy(md->md_name, sc->sc_name, sizeof(md->md_name)); 680 md->md_mid = sc->sc_id; 681 md->md_all = sc->sc_ndisks; 682 md->md_slice = sc->sc_slice; 683 md->md_balance = sc->sc_balance; 684 md->md_genid = sc->sc_genid; 685 md->md_mediasize = sc->sc_mediasize; 686 md->md_sectorsize = sc->sc_sectorsize; 687 md->md_mflags = (sc->sc_flags & G_MIRROR_DEVICE_FLAG_MASK); 688 bzero(md->md_provider, sizeof(md->md_provider)); 689 if (disk == NULL) { 690 md->md_did = arc4random(); 691 md->md_priority = 0; 692 md->md_syncid = 0; 693 md->md_dflags = 0; 694 md->md_sync_offset = 0; 695 md->md_provsize = 0; 696 } else { 697 md->md_did = disk->d_id; 698 md->md_priority = disk->d_priority; 699 md->md_syncid = disk->d_sync.ds_syncid; 700 md->md_dflags = (disk->d_flags & G_MIRROR_DISK_FLAG_MASK); 701 if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) 702 md->md_sync_offset = disk->d_sync.ds_offset_done; 703 else 704 md->md_sync_offset = 0; 705 if ((disk->d_flags & G_MIRROR_DISK_FLAG_HARDCODED) != 0) { 706 strlcpy(md->md_provider, 707 disk->d_consumer->provider->name, 708 sizeof(md->md_provider)); 709 } 710 md->md_provsize = disk->d_consumer->provider->mediasize; 711 } 712} 713 714void 715g_mirror_update_metadata(struct g_mirror_disk *disk) 716{ 717 struct g_mirror_metadata md; 718 int error; 719 720 g_topology_assert(); 721 g_mirror_fill_metadata(disk->d_softc, disk, &md); 722 error = g_mirror_write_metadata(disk, &md); 723 if (error == 0) { 724 G_MIRROR_DEBUG(2, "Metadata on %s updated.", 725 g_mirror_get_diskname(disk)); 726 } else { 727 G_MIRROR_DEBUG(0, 728 "Cannot update metadata on disk %s (error=%d).", 729 g_mirror_get_diskname(disk), error); 730 } 731} 732 733static void 734g_mirror_bump_syncid(struct g_mirror_softc *sc) 735{ 736 struct g_mirror_disk *disk; 737 738 g_topology_assert(); 739 KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0, 740 ("%s called with no active disks (device=%s).", __func__, 741 sc->sc_name)); 742 743 sc->sc_syncid++; 744 G_MIRROR_DEBUG(1, "Device %s: syncid bumped to %u.", sc->sc_name, 745 sc->sc_syncid); 746 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 747 if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE || 748 disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) { 749 disk->d_sync.ds_syncid = sc->sc_syncid; 750 g_mirror_update_metadata(disk); 751 } 752 } 753} 754 755static void 756g_mirror_bump_genid(struct g_mirror_softc *sc) 757{ 758 struct g_mirror_disk *disk; 759 760 g_topology_assert(); 761 KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0, 762 ("%s called with no active disks (device=%s).", __func__, 763 sc->sc_name)); 764 765 sc->sc_genid++; 766 G_MIRROR_DEBUG(1, "Device %s: genid bumped to %u.", sc->sc_name, 767 sc->sc_genid); 768 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 769 if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE || 770 disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) { 771 disk->d_genid = sc->sc_genid; 772 g_mirror_update_metadata(disk); 773 } 774 } 775} 776 777static int 778g_mirror_idle(struct g_mirror_softc *sc, int from_access) 779{ 780 struct g_mirror_disk *disk; 781 int timeout; 782 783 if (sc->sc_provider == NULL) 784 return (0); 785 if (sc->sc_idle) 786 return (0); 787 if (sc->sc_writes > 0) 788 return (0); 789 if (!from_access && sc->sc_provider->acw > 0) { 790 timeout = g_mirror_idletime - (time_second - sc->sc_last_write); 791 if (timeout > 0) 792 return (timeout); 793 } 794 sc->sc_idle = 1; 795 if (!from_access) 796 g_topology_lock(); 797 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 798 if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) 799 continue; 800 G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as clean.", 801 g_mirror_get_diskname(disk), sc->sc_name); 802 disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY; 803 g_mirror_update_metadata(disk); 804 } 805 if (!from_access) 806 g_topology_unlock(); 807 return (0); 808} 809 810static void 811g_mirror_unidle(struct g_mirror_softc *sc) 812{ 813 struct g_mirror_disk *disk; 814 815 sc->sc_idle = 0; 816 sc->sc_last_write = time_second; 817 g_topology_lock(); 818 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 819 if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) 820 continue; 821 G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as dirty.", 822 g_mirror_get_diskname(disk), sc->sc_name); 823 disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY; 824 g_mirror_update_metadata(disk); 825 } 826 g_topology_unlock(); 827} 828 829static __inline int 830bintime_cmp(struct bintime *bt1, struct bintime *bt2) 831{ 832 833 if (bt1->sec < bt2->sec) 834 return (-1); 835 else if (bt1->sec > bt2->sec) 836 return (1); 837 if (bt1->frac < bt2->frac) 838 return (-1); 839 else if (bt1->frac > bt2->frac) 840 return (1); 841 return (0); 842} 843 844static void 845g_mirror_update_delay(struct g_mirror_disk *disk, struct bio *bp) 846{ 847 848 if (disk->d_softc->sc_balance != G_MIRROR_BALANCE_LOAD) 849 return; 850 binuptime(&disk->d_delay); 851 bintime_sub(&disk->d_delay, &bp->bio_t0); 852} 853 854static void 855g_mirror_done(struct bio *bp) 856{ 857 struct g_mirror_softc *sc; 858 859 sc = bp->bio_from->geom->softc; 860 bp->bio_cflags |= G_MIRROR_BIO_FLAG_REGULAR; 861 mtx_lock(&sc->sc_queue_mtx); 862 bioq_disksort(&sc->sc_queue, bp); 863 wakeup(sc); 864 mtx_unlock(&sc->sc_queue_mtx); 865} 866 867static void 868g_mirror_regular_request(struct bio *bp) 869{ 870 struct g_mirror_softc *sc; 871 struct g_mirror_disk *disk; 872 struct bio *pbp; 873 874 g_topology_assert_not(); 875 876 pbp = bp->bio_parent; 877 sc = pbp->bio_to->geom->softc; 878 bp->bio_from->index--; 879 if (bp->bio_cmd == BIO_WRITE) 880 sc->sc_writes--; 881 disk = bp->bio_from->private; 882 if (disk == NULL) { 883 g_topology_lock(); 884 g_mirror_kill_consumer(sc, bp->bio_from); 885 g_topology_unlock(); 886 } else { 887 g_mirror_update_delay(disk, bp); 888 } 889 890 pbp->bio_inbed++; 891 KASSERT(pbp->bio_inbed <= pbp->bio_children, 892 ("bio_inbed (%u) is bigger than bio_children (%u).", pbp->bio_inbed, 893 pbp->bio_children)); 894 if (bp->bio_error == 0 && pbp->bio_error == 0) { 895 G_MIRROR_LOGREQ(3, bp, "Request delivered."); 896 g_destroy_bio(bp); 897 if (pbp->bio_children == pbp->bio_inbed) { 898 G_MIRROR_LOGREQ(3, pbp, "Request delivered."); 899 pbp->bio_completed = pbp->bio_length; 900 g_io_deliver(pbp, pbp->bio_error); 901 } 902 return; 903 } else if (bp->bio_error != 0) { 904 if (pbp->bio_error == 0) 905 pbp->bio_error = bp->bio_error; 906 if (disk != NULL) { 907 if ((disk->d_flags & G_MIRROR_DISK_FLAG_BROKEN) == 0) { 908 disk->d_flags |= G_MIRROR_DISK_FLAG_BROKEN; 909 G_MIRROR_LOGREQ(0, bp, 910 "Request failed (error=%d).", 911 bp->bio_error); 912 } else { 913 G_MIRROR_LOGREQ(1, bp, 914 "Request failed (error=%d).", 915 bp->bio_error); 916 } 917 if (g_mirror_disconnect_on_failure && 918 g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 1) 919 { 920 sc->sc_bump_id |= G_MIRROR_BUMP_GENID; 921 g_mirror_event_send(disk, 922 G_MIRROR_DISK_STATE_DISCONNECTED, 923 G_MIRROR_EVENT_DONTWAIT); 924 } 925 } 926 switch (pbp->bio_cmd) { 927 case BIO_DELETE: 928 case BIO_WRITE: 929 pbp->bio_inbed--; 930 pbp->bio_children--; 931 break; 932 } 933 } 934 g_destroy_bio(bp); 935 936 switch (pbp->bio_cmd) { 937 case BIO_READ: 938 if (pbp->bio_inbed < pbp->bio_children) 939 break; 940 if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 1) 941 g_io_deliver(pbp, pbp->bio_error); 942 else { 943 pbp->bio_error = 0; 944 mtx_lock(&sc->sc_queue_mtx); 945 bioq_disksort(&sc->sc_queue, pbp); 946 G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc); 947 wakeup(sc); 948 mtx_unlock(&sc->sc_queue_mtx); 949 } 950 break; 951 case BIO_DELETE: 952 case BIO_WRITE: 953 if (pbp->bio_children == 0) { 954 /* 955 * All requests failed. 956 */ 957 } else if (pbp->bio_inbed < pbp->bio_children) { 958 /* Do nothing. */ 959 break; 960 } else if (pbp->bio_children == pbp->bio_inbed) { 961 /* Some requests succeeded. */ 962 pbp->bio_error = 0; 963 pbp->bio_completed = pbp->bio_length; 964 } 965 g_io_deliver(pbp, pbp->bio_error); 966 break; 967 default: 968 KASSERT(1 == 0, ("Invalid request: %u.", pbp->bio_cmd)); 969 break; 970 } 971} 972 973static void 974g_mirror_sync_done(struct bio *bp) 975{ 976 struct g_mirror_softc *sc; 977 978 G_MIRROR_LOGREQ(3, bp, "Synchronization request delivered."); 979 sc = bp->bio_from->geom->softc; 980 bp->bio_cflags |= G_MIRROR_BIO_FLAG_SYNC; 981 mtx_lock(&sc->sc_queue_mtx); 982 bioq_disksort(&sc->sc_queue, bp); 983 wakeup(sc); 984 mtx_unlock(&sc->sc_queue_mtx); 985} 986 987static void 988g_mirror_start(struct bio *bp) 989{ 990 struct g_mirror_softc *sc; 991 992 sc = bp->bio_to->geom->softc; 993 /* 994 * If sc == NULL or there are no valid disks, provider's error 995 * should be set and g_mirror_start() should not be called at all. 996 */ 997 KASSERT(sc != NULL && sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING, 998 ("Provider's error should be set (error=%d)(mirror=%s).", 999 bp->bio_to->error, bp->bio_to->name)); 1000 G_MIRROR_LOGREQ(3, bp, "Request received."); 1001 1002 switch (bp->bio_cmd) { 1003 case BIO_READ: 1004 case BIO_WRITE: 1005 case BIO_DELETE: 1006 break; 1007 case BIO_GETATTR: 1008 default: 1009 g_io_deliver(bp, EOPNOTSUPP); 1010 return; 1011 } 1012 mtx_lock(&sc->sc_queue_mtx); 1013 bioq_disksort(&sc->sc_queue, bp); 1014 G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc); 1015 wakeup(sc); 1016 mtx_unlock(&sc->sc_queue_mtx); 1017} 1018 1019/* 1020 * Send one synchronization request. 1021 */ 1022static void 1023g_mirror_sync_one(struct g_mirror_disk *disk) 1024{ 1025 struct g_mirror_softc *sc; 1026 struct bio *bp; 1027 1028 sc = disk->d_softc; 1029 KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING, 1030 ("Disk %s is not marked for synchronization.", 1031 g_mirror_get_diskname(disk))); 1032 1033 bp = g_new_bio(); 1034 if (bp == NULL) 1035 return; 1036 bp->bio_parent = NULL; 1037 bp->bio_cmd = BIO_READ; 1038 bp->bio_offset = disk->d_sync.ds_offset; 1039 bp->bio_length = MIN(MAXPHYS, sc->sc_mediasize - bp->bio_offset); 1040 bp->bio_cflags = 0; 1041 bp->bio_done = g_mirror_sync_done; 1042 bp->bio_data = disk->d_sync.ds_data; 1043 if (bp->bio_data == NULL) { 1044 g_destroy_bio(bp); 1045 return; 1046 } 1047 disk->d_sync.ds_offset += bp->bio_length; 1048 bp->bio_to = sc->sc_provider; 1049 G_MIRROR_LOGREQ(3, bp, "Sending synchronization request."); 1050 disk->d_sync.ds_consumer->index++; 1051 g_io_request(bp, disk->d_sync.ds_consumer); 1052} 1053 1054static void 1055g_mirror_sync_request(struct bio *bp) 1056{ 1057 struct g_mirror_softc *sc; 1058 struct g_mirror_disk *disk; 1059 1060 bp->bio_from->index--; 1061 sc = bp->bio_from->geom->softc; 1062 disk = bp->bio_from->private; 1063 if (disk == NULL) { 1064 g_topology_lock(); 1065 g_mirror_kill_consumer(sc, bp->bio_from); 1066 g_topology_unlock(); 1067 g_destroy_bio(bp); 1068 return; 1069 } 1070 1071 /* 1072 * Synchronization request. 1073 */ 1074 switch (bp->bio_cmd) { 1075 case BIO_READ: 1076 { 1077 struct g_consumer *cp; 1078 1079 if (bp->bio_error != 0) { 1080 G_MIRROR_LOGREQ(0, bp, 1081 "Synchronization request failed (error=%d).", 1082 bp->bio_error); 1083 g_destroy_bio(bp); 1084 return; 1085 } 1086 G_MIRROR_LOGREQ(3, bp, 1087 "Synchronization request half-finished."); 1088 bp->bio_cmd = BIO_WRITE; 1089 bp->bio_cflags = 0; 1090 cp = disk->d_consumer; 1091 KASSERT(cp->acr == 1 && cp->acw == 1 && cp->ace == 1, 1092 ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, 1093 cp->acr, cp->acw, cp->ace)); 1094 cp->index++; 1095 g_io_request(bp, cp); 1096 return; 1097 } 1098 case BIO_WRITE: 1099 { 1100 struct g_mirror_disk_sync *sync; 1101 1102 if (bp->bio_error != 0) { 1103 G_MIRROR_LOGREQ(0, bp, 1104 "Synchronization request failed (error=%d).", 1105 bp->bio_error); 1106 g_destroy_bio(bp); 1107 sc->sc_bump_id |= G_MIRROR_BUMP_GENID; 1108 g_mirror_event_send(disk, 1109 G_MIRROR_DISK_STATE_DISCONNECTED, 1110 G_MIRROR_EVENT_DONTWAIT); 1111 return; 1112 } 1113 G_MIRROR_LOGREQ(3, bp, "Synchronization request finished."); 1114 sync = &disk->d_sync; 1115 sync->ds_offset_done = bp->bio_offset + bp->bio_length; 1116 g_destroy_bio(bp); 1117 if (sync->ds_resync != -1) 1118 break; 1119 if (sync->ds_offset_done == sc->sc_provider->mediasize) { 1120 /* 1121 * Disk up-to-date, activate it. 1122 */ 1123 g_mirror_event_send(disk, G_MIRROR_DISK_STATE_ACTIVE, 1124 G_MIRROR_EVENT_DONTWAIT); 1125 return; 1126 } else if (sync->ds_offset_done % (MAXPHYS * 100) == 0) { 1127 /* 1128 * Update offset_done on every 100 blocks. 1129 * XXX: This should be configurable. 1130 */ 1131 g_topology_lock(); 1132 g_mirror_update_metadata(disk); 1133 g_topology_unlock(); 1134 } 1135 return; 1136 } 1137 default: 1138 KASSERT(1 == 0, ("Invalid command here: %u (device=%s)", 1139 bp->bio_cmd, sc->sc_name)); 1140 break; 1141 } 1142} 1143 1144static void 1145g_mirror_request_prefer(struct g_mirror_softc *sc, struct bio *bp) 1146{ 1147 struct g_mirror_disk *disk; 1148 struct g_consumer *cp; 1149 struct bio *cbp; 1150 1151 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 1152 if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE) 1153 break; 1154 } 1155 if (disk == NULL) { 1156 if (bp->bio_error == 0) 1157 bp->bio_error = ENXIO; 1158 g_io_deliver(bp, bp->bio_error); 1159 return; 1160 } 1161 cbp = g_clone_bio(bp); 1162 if (cbp == NULL) { 1163 if (bp->bio_error == 0) 1164 bp->bio_error = ENOMEM; 1165 g_io_deliver(bp, bp->bio_error); 1166 return; 1167 } 1168 /* 1169 * Fill in the component buf structure. 1170 */ 1171 cp = disk->d_consumer; 1172 cbp->bio_done = g_mirror_done; 1173 cbp->bio_to = cp->provider; 1174 G_MIRROR_LOGREQ(3, cbp, "Sending request."); 1175 KASSERT(cp->acr == 1 && cp->acw == 1 && cp->ace == 1, 1176 ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr, 1177 cp->acw, cp->ace)); 1178 cp->index++; 1179 g_io_request(cbp, cp); 1180} 1181 1182static void 1183g_mirror_request_round_robin(struct g_mirror_softc *sc, struct bio *bp) 1184{ 1185 struct g_mirror_disk *disk; 1186 struct g_consumer *cp; 1187 struct bio *cbp; 1188 1189 disk = g_mirror_get_disk(sc); 1190 if (disk == NULL) { 1191 if (bp->bio_error == 0) 1192 bp->bio_error = ENXIO; 1193 g_io_deliver(bp, bp->bio_error); 1194 return; 1195 } 1196 cbp = g_clone_bio(bp); 1197 if (cbp == NULL) { 1198 if (bp->bio_error == 0) 1199 bp->bio_error = ENOMEM; 1200 g_io_deliver(bp, bp->bio_error); 1201 return; 1202 } 1203 /* 1204 * Fill in the component buf structure. 1205 */ 1206 cp = disk->d_consumer; 1207 cbp->bio_done = g_mirror_done; 1208 cbp->bio_to = cp->provider; 1209 G_MIRROR_LOGREQ(3, cbp, "Sending request."); 1210 KASSERT(cp->acr == 1 && cp->acw == 1 && cp->ace == 1, 1211 ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr, 1212 cp->acw, cp->ace)); 1213 cp->index++; 1214 g_io_request(cbp, cp); 1215} 1216 1217static void 1218g_mirror_request_load(struct g_mirror_softc *sc, struct bio *bp) 1219{ 1220 struct g_mirror_disk *disk, *dp; 1221 struct g_consumer *cp; 1222 struct bio *cbp; 1223 struct bintime curtime; 1224 1225 binuptime(&curtime); 1226 /* 1227 * Find a disk which the smallest load. 1228 */ 1229 disk = NULL; 1230 LIST_FOREACH(dp, &sc->sc_disks, d_next) { 1231 if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE) 1232 continue; 1233 /* If disk wasn't used for more than 2 sec, use it. */ 1234 if (curtime.sec - dp->d_last_used.sec >= 2) { 1235 disk = dp; 1236 break; 1237 } 1238 if (disk == NULL || 1239 bintime_cmp(&dp->d_delay, &disk->d_delay) < 0) { 1240 disk = dp; 1241 } 1242 } 1243 KASSERT(disk != NULL, ("NULL disk for %s.", sc->sc_name)); 1244 cbp = g_clone_bio(bp); 1245 if (cbp == NULL) { 1246 if (bp->bio_error == 0) 1247 bp->bio_error = ENOMEM; 1248 g_io_deliver(bp, bp->bio_error); 1249 return; 1250 } 1251 /* 1252 * Fill in the component buf structure. 1253 */ 1254 cp = disk->d_consumer; 1255 cbp->bio_done = g_mirror_done; 1256 cbp->bio_to = cp->provider; 1257 binuptime(&disk->d_last_used); 1258 G_MIRROR_LOGREQ(3, cbp, "Sending request."); 1259 KASSERT(cp->acr == 1 && cp->acw == 1 && cp->ace == 1, 1260 ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr, 1261 cp->acw, cp->ace)); 1262 cp->index++; 1263 g_io_request(cbp, cp); 1264} 1265 1266static void 1267g_mirror_request_split(struct g_mirror_softc *sc, struct bio *bp) 1268{ 1269 struct bio_queue_head queue; 1270 struct g_mirror_disk *disk; 1271 struct g_consumer *cp; 1272 struct bio *cbp; 1273 off_t left, mod, offset, slice; 1274 u_char *data; 1275 u_int ndisks; 1276 1277 if (bp->bio_length <= sc->sc_slice) { 1278 g_mirror_request_round_robin(sc, bp); 1279 return; 1280 } 1281 ndisks = g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE); 1282 slice = bp->bio_length / ndisks; 1283 mod = slice % sc->sc_provider->sectorsize; 1284 if (mod != 0) 1285 slice += sc->sc_provider->sectorsize - mod; 1286 /* 1287 * Allocate all bios before sending any request, so we can 1288 * return ENOMEM in nice and clean way. 1289 */ 1290 left = bp->bio_length; 1291 offset = bp->bio_offset; 1292 data = bp->bio_data; 1293 bioq_init(&queue); 1294 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 1295 if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) 1296 continue; 1297 cbp = g_clone_bio(bp); 1298 if (cbp == NULL) { 1299 for (cbp = bioq_first(&queue); cbp != NULL; 1300 cbp = bioq_first(&queue)) { 1301 bioq_remove(&queue, cbp); 1302 g_destroy_bio(cbp); 1303 } 1304 if (bp->bio_error == 0) 1305 bp->bio_error = ENOMEM; 1306 g_io_deliver(bp, bp->bio_error); 1307 return; 1308 } 1309 bioq_insert_tail(&queue, cbp); 1310 cbp->bio_done = g_mirror_done; 1311 cbp->bio_caller1 = disk; 1312 cbp->bio_to = disk->d_consumer->provider; 1313 cbp->bio_offset = offset; 1314 cbp->bio_data = data; 1315 cbp->bio_length = MIN(left, slice); 1316 left -= cbp->bio_length; 1317 if (left == 0) 1318 break; 1319 offset += cbp->bio_length; 1320 data += cbp->bio_length; 1321 } 1322 for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) { 1323 bioq_remove(&queue, cbp); 1324 G_MIRROR_LOGREQ(3, cbp, "Sending request."); 1325 disk = cbp->bio_caller1; 1326 cbp->bio_caller1 = NULL; 1327 cp = disk->d_consumer; 1328 KASSERT(cp->acr == 1 && cp->acw == 1 && cp->ace == 1, 1329 ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, 1330 cp->acr, cp->acw, cp->ace)); 1331 disk->d_consumer->index++; 1332 g_io_request(cbp, disk->d_consumer); 1333 } 1334} 1335 1336static void 1337g_mirror_register_request(struct bio *bp) 1338{ 1339 struct g_mirror_softc *sc; 1340 1341 sc = bp->bio_to->geom->softc; 1342 switch (bp->bio_cmd) { 1343 case BIO_READ: 1344 switch (sc->sc_balance) { 1345 case G_MIRROR_BALANCE_LOAD: 1346 g_mirror_request_load(sc, bp); 1347 break; 1348 case G_MIRROR_BALANCE_PREFER: 1349 g_mirror_request_prefer(sc, bp); 1350 break; 1351 case G_MIRROR_BALANCE_ROUND_ROBIN: 1352 g_mirror_request_round_robin(sc, bp); 1353 break; 1354 case G_MIRROR_BALANCE_SPLIT: 1355 g_mirror_request_split(sc, bp); 1356 break; 1357 } 1358 return; 1359 case BIO_WRITE: 1360 case BIO_DELETE: 1361 { 1362 struct g_mirror_disk *disk; 1363 struct g_mirror_disk_sync *sync; 1364 struct bio_queue_head queue; 1365 struct g_consumer *cp; 1366 struct bio *cbp; 1367 1368 if (sc->sc_idle) 1369 g_mirror_unidle(sc); 1370 else 1371 sc->sc_last_write = time_second; 1372 1373 /* 1374 * Allocate all bios before sending any request, so we can 1375 * return ENOMEM in nice and clean way. 1376 */ 1377 bioq_init(&queue); 1378 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 1379 sync = &disk->d_sync; 1380 switch (disk->d_state) { 1381 case G_MIRROR_DISK_STATE_ACTIVE: 1382 break; 1383 case G_MIRROR_DISK_STATE_SYNCHRONIZING: 1384 if (bp->bio_offset >= sync->ds_offset) 1385 continue; 1386 else if (bp->bio_offset + bp->bio_length > 1387 sync->ds_offset_done && 1388 (bp->bio_offset < sync->ds_resync || 1389 sync->ds_resync == -1)) { 1390 sync->ds_resync = bp->bio_offset - 1391 (bp->bio_offset % MAXPHYS); 1392 } 1393 break; 1394 default: 1395 continue; 1396 } 1397 cbp = g_clone_bio(bp); 1398 if (cbp == NULL) { 1399 for (cbp = bioq_first(&queue); cbp != NULL; 1400 cbp = bioq_first(&queue)) { 1401 bioq_remove(&queue, cbp); 1402 g_destroy_bio(cbp); 1403 } 1404 if (bp->bio_error == 0) 1405 bp->bio_error = ENOMEM; 1406 g_io_deliver(bp, bp->bio_error); 1407 return; 1408 } 1409 bioq_insert_tail(&queue, cbp); 1410 cbp->bio_done = g_mirror_done; 1411 cp = disk->d_consumer; 1412 cbp->bio_caller1 = cp; 1413 cbp->bio_to = cp->provider; 1414 KASSERT(cp->acr == 1 && cp->acw == 1 && cp->ace == 1, 1415 ("Consumer %s not opened (r%dw%de%d).", 1416 cp->provider->name, cp->acr, cp->acw, cp->ace)); 1417 } 1418 for (cbp = bioq_first(&queue); cbp != NULL; 1419 cbp = bioq_first(&queue)) { 1420 bioq_remove(&queue, cbp); 1421 G_MIRROR_LOGREQ(3, cbp, "Sending request."); 1422 cp = cbp->bio_caller1; 1423 cbp->bio_caller1 = NULL; 1424 cp->index++; 1425 sc->sc_writes++; 1426 g_io_request(cbp, cp); 1427 } 1428 /* 1429 * Bump syncid on first write. 1430 */ 1431 if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID) != 0) { 1432 sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID; 1433 g_topology_lock(); 1434 g_mirror_bump_syncid(sc); 1435 g_topology_unlock(); 1436 } 1437 return; 1438 } 1439 default: 1440 KASSERT(1 == 0, ("Invalid command here: %u (device=%s)", 1441 bp->bio_cmd, sc->sc_name)); 1442 break; 1443 } 1444} 1445 1446static int 1447g_mirror_can_destroy(struct g_mirror_softc *sc) 1448{ 1449 struct g_geom *gp; 1450 struct g_consumer *cp; 1451 1452 g_topology_assert(); 1453 gp = sc->sc_geom; 1454 LIST_FOREACH(cp, &gp->consumer, consumer) { 1455 if (g_mirror_is_busy(sc, cp)) 1456 return (0); 1457 } 1458 gp = sc->sc_sync.ds_geom; 1459 LIST_FOREACH(cp, &gp->consumer, consumer) { 1460 if (g_mirror_is_busy(sc, cp)) 1461 return (0); 1462 } 1463 G_MIRROR_DEBUG(2, "No I/O requests for %s, it can be destroyed.", 1464 sc->sc_name); 1465 return (1); 1466} 1467 1468static int 1469g_mirror_try_destroy(struct g_mirror_softc *sc) 1470{ 1471 1472 if (sc->sc_rootmount != NULL) { 1473 G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", __LINE__, 1474 sc->sc_rootmount); 1475 root_mount_rel(sc->sc_rootmount); 1476 sc->sc_rootmount = NULL; 1477 } 1478 g_topology_lock(); 1479 if (!g_mirror_can_destroy(sc)) { 1480 g_topology_unlock(); 1481 return (0); 1482 } 1483 if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_WAIT) != 0) { 1484 g_topology_unlock(); 1485 G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, 1486 &sc->sc_worker); 1487 wakeup(&sc->sc_worker); 1488 sc->sc_worker = NULL; 1489 } else { 1490 g_mirror_destroy_device(sc); 1491 g_topology_unlock(); 1492 free(sc, M_MIRROR); 1493 } 1494 return (1); 1495} 1496 1497/* 1498 * Worker thread. 1499 */ 1500static void 1501g_mirror_worker(void *arg) 1502{ 1503 struct g_mirror_softc *sc; 1504 struct g_mirror_disk *disk; 1505 struct g_mirror_disk_sync *sync; 1506 struct g_mirror_event *ep; 1507 struct bio *bp; 1508 u_int nreqs; 1509 int timeout; 1510 1511 sc = arg; 1512 mtx_lock_spin(&sched_lock); 1513 sched_prio(curthread, PRIBIO); 1514 mtx_unlock_spin(&sched_lock); 1515 1516 nreqs = 0; 1517 for (;;) { 1518 G_MIRROR_DEBUG(5, "%s: Let's see...", __func__); 1519 /* 1520 * First take a look at events. 1521 * This is important to handle events before any I/O requests. 1522 */ 1523 ep = g_mirror_event_get(sc); 1524 if (ep != NULL && g_topology_try_lock()) { 1525 g_mirror_event_remove(sc, ep); 1526 if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0) { 1527 /* Update only device status. */ 1528 G_MIRROR_DEBUG(3, 1529 "Running event for device %s.", 1530 sc->sc_name); 1531 ep->e_error = 0; 1532 g_mirror_update_device(sc, 1); 1533 } else { 1534 /* Update disk status. */ 1535 G_MIRROR_DEBUG(3, "Running event for disk %s.", 1536 g_mirror_get_diskname(ep->e_disk)); 1537 ep->e_error = g_mirror_update_disk(ep->e_disk, 1538 ep->e_state); 1539 if (ep->e_error == 0) 1540 g_mirror_update_device(sc, 0); 1541 } 1542 g_topology_unlock(); 1543 if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0) { 1544 KASSERT(ep->e_error == 0, 1545 ("Error cannot be handled.")); 1546 g_mirror_event_free(ep); 1547 } else { 1548 ep->e_flags |= G_MIRROR_EVENT_DONE; 1549 G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, 1550 ep); 1551 mtx_lock(&sc->sc_events_mtx); 1552 wakeup(ep); 1553 mtx_unlock(&sc->sc_events_mtx); 1554 } 1555 if ((sc->sc_flags & 1556 G_MIRROR_DEVICE_FLAG_DESTROY) != 0) { 1557 if (g_mirror_try_destroy(sc)) 1558 kthread_exit(0); 1559 } 1560 G_MIRROR_DEBUG(5, "%s: I'm here 1.", __func__); 1561 continue; 1562 } 1563 /* 1564 * Check if we can mark array as CLEAN and if we can't take 1565 * how much seconds should we wait. 1566 */ 1567 timeout = g_mirror_idle(sc, 0); 1568 /* 1569 * Now I/O requests. 1570 */ 1571 /* Get first request from the queue. */ 1572 mtx_lock(&sc->sc_queue_mtx); 1573 bp = bioq_first(&sc->sc_queue); 1574 if (bp == NULL) { 1575 if (ep != NULL) { 1576 /* 1577 * No I/O requests and topology lock was 1578 * already held? Try again. 1579 */ 1580 mtx_unlock(&sc->sc_queue_mtx); 1581 continue; 1582 } 1583 if ((sc->sc_flags & 1584 G_MIRROR_DEVICE_FLAG_DESTROY) != 0) { 1585 mtx_unlock(&sc->sc_queue_mtx); 1586 if (g_mirror_try_destroy(sc)) 1587 kthread_exit(0); 1588 mtx_lock(&sc->sc_queue_mtx); 1589 } 1590 } 1591 if (sc->sc_sync.ds_ndisks > 0 && 1592 (bp == NULL || nreqs > g_mirror_reqs_per_sync)) { 1593 mtx_unlock(&sc->sc_queue_mtx); 1594 /* 1595 * It is time for synchronization... 1596 */ 1597 nreqs = 0; 1598 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 1599 if (disk->d_state != 1600 G_MIRROR_DISK_STATE_SYNCHRONIZING) { 1601 continue; 1602 } 1603 sync = &disk->d_sync; 1604 if (sync->ds_offset >= 1605 sc->sc_provider->mediasize) { 1606 continue; 1607 } 1608 if (sync->ds_offset > sync->ds_offset_done) 1609 continue; 1610 if (sync->ds_resync != -1) { 1611 sync->ds_offset = sync->ds_resync; 1612 sync->ds_offset_done = sync->ds_resync; 1613 sync->ds_resync = -1; 1614 } 1615 g_mirror_sync_one(disk); 1616 } 1617 G_MIRROR_DEBUG(5, "%s: I'm here 2.", __func__); 1618 goto sleep; 1619 } 1620 if (bp == NULL) { 1621 MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "m:w1", 1622 timeout * hz); 1623 G_MIRROR_DEBUG(5, "%s: I'm here 4.", __func__); 1624 continue; 1625 } 1626 nreqs++; 1627 bioq_remove(&sc->sc_queue, bp); 1628 mtx_unlock(&sc->sc_queue_mtx); 1629 1630 if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_REGULAR) != 0) { 1631 g_mirror_regular_request(bp); 1632 } else if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0) { 1633 u_int timeout, sps; 1634 1635 g_mirror_sync_request(bp); 1636sleep: 1637 sps = g_mirror_syncs_per_sec; 1638 if (sps == 0) { 1639 G_MIRROR_DEBUG(5, "%s: I'm here 6.", __func__); 1640 continue; 1641 } 1642 if (ep != NULL) { 1643 /* 1644 * We have some pending events, don't sleep now. 1645 */ 1646 G_MIRROR_DEBUG(5, "%s: I'm here 7.", __func__); 1647 continue; 1648 } 1649 mtx_lock(&sc->sc_queue_mtx); 1650 if (bioq_first(&sc->sc_queue) != NULL) { 1651 mtx_unlock(&sc->sc_queue_mtx); 1652 G_MIRROR_DEBUG(5, "%s: I'm here 8.", __func__); 1653 continue; 1654 } 1655 timeout = hz / sps; 1656 if (timeout == 0) 1657 timeout = 1; 1658 MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "m:w3", 1659 timeout); 1660 } else { 1661 g_mirror_register_request(bp); 1662 } 1663 G_MIRROR_DEBUG(5, "%s: I'm here 9.", __func__); 1664 } 1665} 1666 1667static void 1668g_mirror_update_idle(struct g_mirror_softc *sc, struct g_mirror_disk *disk) 1669{ 1670 1671 g_topology_assert(); 1672 if (!sc->sc_idle && (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) == 0) { 1673 G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as dirty.", 1674 g_mirror_get_diskname(disk), disk->d_softc->sc_name); 1675 disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY; 1676 } else if (sc->sc_idle && 1677 (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) { 1678 G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as clean.", 1679 g_mirror_get_diskname(disk), disk->d_softc->sc_name); 1680 disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY; 1681 } 1682} 1683 1684static void 1685g_mirror_sync_start(struct g_mirror_disk *disk) 1686{ 1687 struct g_mirror_softc *sc; 1688 int error; 1689 1690 g_topology_assert(); 1691 1692 sc = disk->d_softc; 1693 KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING, 1694 ("Device not in RUNNING state (%s, %u).", sc->sc_name, 1695 sc->sc_state)); 1696 1697 G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s.", sc->sc_name, 1698 g_mirror_get_diskname(disk)); 1699 disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY; 1700 KASSERT(disk->d_sync.ds_consumer == NULL, 1701 ("Sync consumer already exists (device=%s, disk=%s).", 1702 sc->sc_name, g_mirror_get_diskname(disk))); 1703 disk->d_sync.ds_consumer = g_new_consumer(sc->sc_sync.ds_geom); 1704 disk->d_sync.ds_consumer->private = disk; 1705 disk->d_sync.ds_consumer->index = 0; 1706 error = g_attach(disk->d_sync.ds_consumer, disk->d_softc->sc_provider); 1707 KASSERT(error == 0, ("Cannot attach to %s (error=%d).", 1708 disk->d_softc->sc_name, error)); 1709 error = g_access(disk->d_sync.ds_consumer, 1, 0, 0); 1710 KASSERT(error == 0, ("Cannot open %s (error=%d).", 1711 disk->d_softc->sc_name, error)); 1712 disk->d_sync.ds_data = malloc(MAXPHYS, M_MIRROR, M_WAITOK); 1713 sc->sc_sync.ds_ndisks++; 1714} 1715 1716/* 1717 * Stop synchronization process. 1718 * type: 0 - synchronization finished 1719 * 1 - synchronization stopped 1720 */ 1721static void 1722g_mirror_sync_stop(struct g_mirror_disk *disk, int type) 1723{ 1724 1725 g_topology_assert(); 1726 KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING, 1727 ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk), 1728 g_mirror_disk_state2str(disk->d_state))); 1729 if (disk->d_sync.ds_consumer == NULL) 1730 return; 1731 1732 if (type == 0) { 1733 G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s finished.", 1734 disk->d_softc->sc_name, g_mirror_get_diskname(disk)); 1735 } else /* if (type == 1) */ { 1736 G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s stopped.", 1737 disk->d_softc->sc_name, g_mirror_get_diskname(disk)); 1738 } 1739 g_mirror_kill_consumer(disk->d_softc, disk->d_sync.ds_consumer); 1740 free(disk->d_sync.ds_data, M_MIRROR); 1741 disk->d_sync.ds_consumer = NULL; 1742 disk->d_softc->sc_sync.ds_ndisks--; 1743 disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY; 1744} 1745 1746static void 1747g_mirror_launch_provider(struct g_mirror_softc *sc) 1748{ 1749 struct g_mirror_disk *disk; 1750 struct g_provider *pp; 1751 1752 g_topology_assert(); 1753 1754 pp = g_new_providerf(sc->sc_geom, "mirror/%s", sc->sc_name); 1755 pp->mediasize = sc->sc_mediasize; 1756 pp->sectorsize = sc->sc_sectorsize; 1757 sc->sc_provider = pp; 1758 g_error_provider(pp, 0); 1759 G_MIRROR_DEBUG(0, "Device %s: provider %s launched.", sc->sc_name, 1760 pp->name); 1761 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 1762 if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) 1763 g_mirror_sync_start(disk); 1764 } 1765} 1766 1767static void 1768g_mirror_destroy_provider(struct g_mirror_softc *sc) 1769{ 1770 struct g_mirror_disk *disk; 1771 struct bio *bp; 1772 1773 g_topology_assert(); 1774 KASSERT(sc->sc_provider != NULL, ("NULL provider (device=%s).", 1775 sc->sc_name)); 1776 1777 g_error_provider(sc->sc_provider, ENXIO); 1778 mtx_lock(&sc->sc_queue_mtx); 1779 while ((bp = bioq_first(&sc->sc_queue)) != NULL) { 1780 bioq_remove(&sc->sc_queue, bp); 1781 g_io_deliver(bp, ENXIO); 1782 } 1783 mtx_unlock(&sc->sc_queue_mtx); 1784 G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.", sc->sc_name, 1785 sc->sc_provider->name); 1786 sc->sc_provider->flags |= G_PF_WITHER; 1787 g_orphan_provider(sc->sc_provider, ENXIO); 1788 sc->sc_provider = NULL; 1789 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 1790 if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) 1791 g_mirror_sync_stop(disk, 1); 1792 } 1793} 1794 1795static void 1796g_mirror_go(void *arg) 1797{ 1798 struct g_mirror_softc *sc; 1799 1800 sc = arg; 1801 G_MIRROR_DEBUG(0, "Force device %s start due to timeout.", sc->sc_name); 1802 g_mirror_event_send(sc, 0, 1803 G_MIRROR_EVENT_DONTWAIT | G_MIRROR_EVENT_DEVICE); 1804} 1805 1806static u_int 1807g_mirror_determine_state(struct g_mirror_disk *disk) 1808{ 1809 struct g_mirror_softc *sc; 1810 u_int state; 1811 1812 sc = disk->d_softc; 1813 if (sc->sc_syncid == disk->d_sync.ds_syncid) { 1814 if ((disk->d_flags & 1815 G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) { 1816 /* Disk does not need synchronization. */ 1817 state = G_MIRROR_DISK_STATE_ACTIVE; 1818 } else { 1819 if ((sc->sc_flags & 1820 G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 || 1821 (disk->d_flags & 1822 G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) { 1823 /* 1824 * We can start synchronization from 1825 * the stored offset. 1826 */ 1827 state = G_MIRROR_DISK_STATE_SYNCHRONIZING; 1828 } else { 1829 state = G_MIRROR_DISK_STATE_STALE; 1830 } 1831 } 1832 } else if (disk->d_sync.ds_syncid < sc->sc_syncid) { 1833 /* 1834 * Reset all synchronization data for this disk, 1835 * because if it even was synchronized, it was 1836 * synchronized to disks with different syncid. 1837 */ 1838 disk->d_flags |= G_MIRROR_DISK_FLAG_SYNCHRONIZING; 1839 disk->d_sync.ds_offset = 0; 1840 disk->d_sync.ds_offset_done = 0; 1841 disk->d_sync.ds_syncid = sc->sc_syncid; 1842 if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 || 1843 (disk->d_flags & G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) { 1844 state = G_MIRROR_DISK_STATE_SYNCHRONIZING; 1845 } else { 1846 state = G_MIRROR_DISK_STATE_STALE; 1847 } 1848 } else /* if (sc->sc_syncid < disk->d_sync.ds_syncid) */ { 1849 /* 1850 * Not good, NOT GOOD! 1851 * It means that mirror was started on stale disks 1852 * and more fresh disk just arrive. 1853 * If there were writes, mirror is fucked up, sorry. 1854 * I think the best choice here is don't touch 1855 * this disk and inform the user laudly. 1856 */ 1857 G_MIRROR_DEBUG(0, "Device %s was started before the freshest " 1858 "disk (%s) arrives!! It will not be connected to the " 1859 "running device.", sc->sc_name, 1860 g_mirror_get_diskname(disk)); 1861 g_mirror_destroy_disk(disk); 1862 state = G_MIRROR_DISK_STATE_NONE; 1863 /* Return immediately, because disk was destroyed. */ 1864 return (state); 1865 } 1866 G_MIRROR_DEBUG(3, "State for %s disk: %s.", 1867 g_mirror_get_diskname(disk), g_mirror_disk_state2str(state)); 1868 return (state); 1869} 1870 1871/* 1872 * Update device state. 1873 */ 1874static void 1875g_mirror_update_device(struct g_mirror_softc *sc, boolean_t force) 1876{ 1877 struct g_mirror_disk *disk; 1878 u_int state; 1879 1880 g_topology_assert(); 1881 1882 switch (sc->sc_state) { 1883 case G_MIRROR_DEVICE_STATE_STARTING: 1884 { 1885 struct g_mirror_disk *pdisk, *tdisk; 1886 u_int dirty, ndisks, genid, syncid; 1887 1888 KASSERT(sc->sc_provider == NULL, 1889 ("Non-NULL provider in STARTING state (%s).", sc->sc_name)); 1890 /* 1891 * Are we ready? We are, if all disks are connected or 1892 * if we have any disks and 'force' is true. 1893 */ 1894 if ((force && g_mirror_ndisks(sc, -1) > 0) || 1895 sc->sc_ndisks == g_mirror_ndisks(sc, -1)) { 1896 ; 1897 } else if (g_mirror_ndisks(sc, -1) == 0) { 1898 /* 1899 * Disks went down in starting phase, so destroy 1900 * device. 1901 */ 1902 callout_drain(&sc->sc_callout); 1903 sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY; 1904 G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", __LINE__, 1905 sc->sc_rootmount); 1906 root_mount_rel(sc->sc_rootmount); 1907 sc->sc_rootmount = NULL; 1908 return; 1909 } else { 1910 return; 1911 } 1912 1913 /* 1914 * Activate all disks with the biggest syncid. 1915 */ 1916 if (force) { 1917 /* 1918 * If 'force' is true, we have been called due to 1919 * timeout, so don't bother canceling timeout. 1920 */ 1921 ndisks = 0; 1922 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 1923 if ((disk->d_flags & 1924 G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) { 1925 ndisks++; 1926 } 1927 } 1928 if (ndisks == 0) { 1929 /* No valid disks found, destroy device. */ 1930 sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY; 1931 G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", 1932 __LINE__, sc->sc_rootmount); 1933 root_mount_rel(sc->sc_rootmount); 1934 sc->sc_rootmount = NULL; 1935 return; 1936 } 1937 } else { 1938 /* Cancel timeout. */ 1939 callout_drain(&sc->sc_callout); 1940 } 1941 1942 /* 1943 * Find the biggest genid. 1944 */ 1945 genid = 0; 1946 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 1947 if (disk->d_genid > genid) 1948 genid = disk->d_genid; 1949 } 1950 sc->sc_genid = genid; 1951 /* 1952 * Remove all disks without the biggest genid. 1953 */ 1954 LIST_FOREACH_SAFE(disk, &sc->sc_disks, d_next, tdisk) { 1955 if (disk->d_genid < genid) { 1956 G_MIRROR_DEBUG(0, 1957 "Component %s (device %s) broken, skipping.", 1958 g_mirror_get_diskname(disk), sc->sc_name); 1959 g_mirror_destroy_disk(disk); 1960 } 1961 } 1962 1963 /* 1964 * Find the biggest syncid. 1965 */ 1966 syncid = 0; 1967 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 1968 if (disk->d_sync.ds_syncid > syncid) 1969 syncid = disk->d_sync.ds_syncid; 1970 } 1971 1972 /* 1973 * Here we need to look for dirty disks and if all disks 1974 * with the biggest syncid are dirty, we have to choose 1975 * one with the biggest priority and rebuild the rest. 1976 */ 1977 /* 1978 * Find the number of dirty disks with the biggest syncid. 1979 * Find the number of disks with the biggest syncid. 1980 * While here, find a disk with the biggest priority. 1981 */ 1982 dirty = ndisks = 0; 1983 pdisk = NULL; 1984 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 1985 if (disk->d_sync.ds_syncid != syncid) 1986 continue; 1987 if ((disk->d_flags & 1988 G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) { 1989 continue; 1990 } 1991 ndisks++; 1992 if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) { 1993 dirty++; 1994 if (pdisk == NULL || 1995 pdisk->d_priority < disk->d_priority) { 1996 pdisk = disk; 1997 } 1998 } 1999 } 2000 if (dirty == 0) { 2001 /* No dirty disks at all, great. */ 2002 } else if (dirty == ndisks) { 2003 /* 2004 * Force synchronization for all dirty disks except one 2005 * with the biggest priority. 2006 */ 2007 KASSERT(pdisk != NULL, ("pdisk == NULL")); 2008 G_MIRROR_DEBUG(1, "Using disk %s (device %s) as a " 2009 "master disk for synchronization.", 2010 g_mirror_get_diskname(pdisk), sc->sc_name); 2011 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 2012 if (disk->d_sync.ds_syncid != syncid) 2013 continue; 2014 if ((disk->d_flags & 2015 G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) { 2016 continue; 2017 } 2018 KASSERT((disk->d_flags & 2019 G_MIRROR_DISK_FLAG_DIRTY) != 0, 2020 ("Disk %s isn't marked as dirty.", 2021 g_mirror_get_diskname(disk))); 2022 /* Skip the disk with the biggest priority. */ 2023 if (disk == pdisk) 2024 continue; 2025 disk->d_sync.ds_syncid = 0; 2026 } 2027 } else if (dirty < ndisks) { 2028 /* 2029 * Force synchronization for all dirty disks. 2030 * We have some non-dirty disks. 2031 */ 2032 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 2033 if (disk->d_sync.ds_syncid != syncid) 2034 continue; 2035 if ((disk->d_flags & 2036 G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) { 2037 continue; 2038 } 2039 if ((disk->d_flags & 2040 G_MIRROR_DISK_FLAG_DIRTY) == 0) { 2041 continue; 2042 } 2043 disk->d_sync.ds_syncid = 0; 2044 } 2045 } 2046 2047 /* Reset hint. */ 2048 sc->sc_hint = NULL; 2049 sc->sc_syncid = syncid; 2050 if (force) { 2051 /* Remember to bump syncid on first write. */ 2052 sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID; 2053 } 2054 state = G_MIRROR_DEVICE_STATE_RUNNING; 2055 G_MIRROR_DEBUG(1, "Device %s state changed from %s to %s.", 2056 sc->sc_name, g_mirror_device_state2str(sc->sc_state), 2057 g_mirror_device_state2str(state)); 2058 sc->sc_state = state; 2059 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 2060 state = g_mirror_determine_state(disk); 2061 g_mirror_event_send(disk, state, 2062 G_MIRROR_EVENT_DONTWAIT); 2063 if (state == G_MIRROR_DISK_STATE_STALE) 2064 sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID; 2065 } 2066 break; 2067 } 2068 case G_MIRROR_DEVICE_STATE_RUNNING: 2069 if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 0 && 2070 g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) { 2071 /* 2072 * No active disks or no disks at all, 2073 * so destroy device. 2074 */ 2075 if (sc->sc_provider != NULL) 2076 g_mirror_destroy_provider(sc); 2077 sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY; 2078 break; 2079 } else if (g_mirror_ndisks(sc, 2080 G_MIRROR_DISK_STATE_ACTIVE) > 0 && 2081 g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) { 2082 /* 2083 * We have active disks, launch provider if it doesn't 2084 * exist. 2085 */ 2086 if (sc->sc_provider == NULL) 2087 g_mirror_launch_provider(sc); 2088 if (sc->sc_rootmount != NULL) { 2089 G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", 2090 __LINE__, sc->sc_rootmount); 2091 root_mount_rel(sc->sc_rootmount); 2092 sc->sc_rootmount = NULL; 2093 } 2094 } 2095 /* 2096 * Genid should be bumped immediately, so do it here. 2097 */ 2098 if ((sc->sc_bump_id & G_MIRROR_BUMP_GENID) != 0) { 2099 sc->sc_bump_id &= ~G_MIRROR_BUMP_GENID; 2100 g_mirror_bump_genid(sc); 2101 } 2102 break; 2103 default: 2104 KASSERT(1 == 0, ("Wrong device state (%s, %s).", 2105 sc->sc_name, g_mirror_device_state2str(sc->sc_state))); 2106 break; 2107 } 2108} 2109 2110/* 2111 * Update disk state and device state if needed. 2112 */ 2113#define DISK_STATE_CHANGED() G_MIRROR_DEBUG(1, \ 2114 "Disk %s state changed from %s to %s (device %s).", \ 2115 g_mirror_get_diskname(disk), \ 2116 g_mirror_disk_state2str(disk->d_state), \ 2117 g_mirror_disk_state2str(state), sc->sc_name) 2118static int 2119g_mirror_update_disk(struct g_mirror_disk *disk, u_int state) 2120{ 2121 struct g_mirror_softc *sc; 2122 2123 g_topology_assert(); 2124 2125 sc = disk->d_softc; 2126again: 2127 G_MIRROR_DEBUG(3, "Changing disk %s state from %s to %s.", 2128 g_mirror_get_diskname(disk), g_mirror_disk_state2str(disk->d_state), 2129 g_mirror_disk_state2str(state)); 2130 switch (state) { 2131 case G_MIRROR_DISK_STATE_NEW: 2132 /* 2133 * Possible scenarios: 2134 * 1. New disk arrive. 2135 */ 2136 /* Previous state should be NONE. */ 2137 KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NONE, 2138 ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk), 2139 g_mirror_disk_state2str(disk->d_state))); 2140 DISK_STATE_CHANGED(); 2141 2142 disk->d_state = state; 2143 if (LIST_EMPTY(&sc->sc_disks)) 2144 LIST_INSERT_HEAD(&sc->sc_disks, disk, d_next); 2145 else { 2146 struct g_mirror_disk *dp; 2147 2148 LIST_FOREACH(dp, &sc->sc_disks, d_next) { 2149 if (disk->d_priority >= dp->d_priority) { 2150 LIST_INSERT_BEFORE(dp, disk, d_next); 2151 dp = NULL; 2152 break; 2153 } 2154 if (LIST_NEXT(dp, d_next) == NULL) 2155 break; 2156 } 2157 if (dp != NULL) 2158 LIST_INSERT_AFTER(dp, disk, d_next); 2159 } 2160 G_MIRROR_DEBUG(0, "Device %s: provider %s detected.", 2161 sc->sc_name, g_mirror_get_diskname(disk)); 2162 if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING) 2163 break; 2164 KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING, 2165 ("Wrong device state (%s, %s, %s, %s).", sc->sc_name, 2166 g_mirror_device_state2str(sc->sc_state), 2167 g_mirror_get_diskname(disk), 2168 g_mirror_disk_state2str(disk->d_state))); 2169 state = g_mirror_determine_state(disk); 2170 if (state != G_MIRROR_DISK_STATE_NONE) 2171 goto again; 2172 break; 2173 case G_MIRROR_DISK_STATE_ACTIVE: 2174 /* 2175 * Possible scenarios: 2176 * 1. New disk does not need synchronization. 2177 * 2. Synchronization process finished successfully. 2178 */ 2179 KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING, 2180 ("Wrong device state (%s, %s, %s, %s).", sc->sc_name, 2181 g_mirror_device_state2str(sc->sc_state), 2182 g_mirror_get_diskname(disk), 2183 g_mirror_disk_state2str(disk->d_state))); 2184 /* Previous state should be NEW or SYNCHRONIZING. */ 2185 KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW || 2186 disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING, 2187 ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk), 2188 g_mirror_disk_state2str(disk->d_state))); 2189 DISK_STATE_CHANGED(); 2190 2191 if (disk->d_state == G_MIRROR_DISK_STATE_NEW) 2192 disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY; 2193 else if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) { 2194 disk->d_flags &= ~G_MIRROR_DISK_FLAG_SYNCHRONIZING; 2195 disk->d_flags &= ~G_MIRROR_DISK_FLAG_FORCE_SYNC; 2196 g_mirror_sync_stop(disk, 0); 2197 } 2198 disk->d_state = state; 2199 disk->d_sync.ds_offset = 0; 2200 disk->d_sync.ds_offset_done = 0; 2201 g_mirror_update_idle(sc, disk); 2202 G_MIRROR_DEBUG(0, "Device %s: provider %s activated.", 2203 sc->sc_name, g_mirror_get_diskname(disk)); 2204 break; 2205 case G_MIRROR_DISK_STATE_STALE: 2206 /* 2207 * Possible scenarios: 2208 * 1. Stale disk was connected. 2209 */ 2210 /* Previous state should be NEW. */ 2211 KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW, 2212 ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk), 2213 g_mirror_disk_state2str(disk->d_state))); 2214 KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING, 2215 ("Wrong device state (%s, %s, %s, %s).", sc->sc_name, 2216 g_mirror_device_state2str(sc->sc_state), 2217 g_mirror_get_diskname(disk), 2218 g_mirror_disk_state2str(disk->d_state))); 2219 /* 2220 * STALE state is only possible if device is marked 2221 * NOAUTOSYNC. 2222 */ 2223 KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) != 0, 2224 ("Wrong device state (%s, %s, %s, %s).", sc->sc_name, 2225 g_mirror_device_state2str(sc->sc_state), 2226 g_mirror_get_diskname(disk), 2227 g_mirror_disk_state2str(disk->d_state))); 2228 DISK_STATE_CHANGED(); 2229 2230 disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY; 2231 disk->d_state = state; 2232 g_mirror_update_metadata(disk); 2233 G_MIRROR_DEBUG(0, "Device %s: provider %s is stale.", 2234 sc->sc_name, g_mirror_get_diskname(disk)); 2235 break; 2236 case G_MIRROR_DISK_STATE_SYNCHRONIZING: 2237 /* 2238 * Possible scenarios: 2239 * 1. Disk which needs synchronization was connected. 2240 */ 2241 /* Previous state should be NEW. */ 2242 KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW, 2243 ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk), 2244 g_mirror_disk_state2str(disk->d_state))); 2245 KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING, 2246 ("Wrong device state (%s, %s, %s, %s).", sc->sc_name, 2247 g_mirror_device_state2str(sc->sc_state), 2248 g_mirror_get_diskname(disk), 2249 g_mirror_disk_state2str(disk->d_state))); 2250 DISK_STATE_CHANGED(); 2251 2252 if (disk->d_state == G_MIRROR_DISK_STATE_NEW) 2253 disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY; 2254 disk->d_state = state; 2255 if (sc->sc_provider != NULL) { 2256 g_mirror_sync_start(disk); 2257 g_mirror_update_metadata(disk); 2258 } 2259 break; 2260 case G_MIRROR_DISK_STATE_DISCONNECTED: 2261 /* 2262 * Possible scenarios: 2263 * 1. Device wasn't running yet, but disk disappear. 2264 * 2. Disk was active and disapppear. 2265 * 3. Disk disappear during synchronization process. 2266 */ 2267 if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING) { 2268 /* 2269 * Previous state should be ACTIVE, STALE or 2270 * SYNCHRONIZING. 2271 */ 2272 KASSERT(disk->d_state == G_MIRROR_DISK_STATE_ACTIVE || 2273 disk->d_state == G_MIRROR_DISK_STATE_STALE || 2274 disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING, 2275 ("Wrong disk state (%s, %s).", 2276 g_mirror_get_diskname(disk), 2277 g_mirror_disk_state2str(disk->d_state))); 2278 } else if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING) { 2279 /* Previous state should be NEW. */ 2280 KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW, 2281 ("Wrong disk state (%s, %s).", 2282 g_mirror_get_diskname(disk), 2283 g_mirror_disk_state2str(disk->d_state))); 2284 /* 2285 * Reset bumping syncid if disk disappeared in STARTING 2286 * state. 2287 */ 2288 if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID) != 0) 2289 sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID; 2290#ifdef INVARIANTS 2291 } else { 2292 KASSERT(1 == 0, ("Wrong device state (%s, %s, %s, %s).", 2293 sc->sc_name, 2294 g_mirror_device_state2str(sc->sc_state), 2295 g_mirror_get_diskname(disk), 2296 g_mirror_disk_state2str(disk->d_state))); 2297#endif 2298 } 2299 DISK_STATE_CHANGED(); 2300 G_MIRROR_DEBUG(0, "Device %s: provider %s disconnected.", 2301 sc->sc_name, g_mirror_get_diskname(disk)); 2302 2303 g_mirror_destroy_disk(disk); 2304 break; 2305 case G_MIRROR_DISK_STATE_DESTROY: 2306 { 2307 int error; 2308 2309 error = g_mirror_clear_metadata(disk); 2310 if (error != 0) 2311 return (error); 2312 DISK_STATE_CHANGED(); 2313 G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.", 2314 sc->sc_name, g_mirror_get_diskname(disk)); 2315 2316 g_mirror_destroy_disk(disk); 2317 sc->sc_ndisks--; 2318 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 2319 g_mirror_update_metadata(disk); 2320 } 2321 break; 2322 } 2323 default: 2324 KASSERT(1 == 0, ("Unknown state (%u).", state)); 2325 break; 2326 } 2327 return (0); 2328} 2329#undef DISK_STATE_CHANGED 2330 2331int 2332g_mirror_read_metadata(struct g_consumer *cp, struct g_mirror_metadata *md) 2333{ 2334 struct g_provider *pp; 2335 u_char *buf; 2336 int error; 2337 2338 g_topology_assert(); 2339 2340 error = g_access(cp, 1, 0, 0); 2341 if (error != 0) 2342 return (error); 2343 pp = cp->provider; 2344 g_topology_unlock(); 2345 /* Metadata are stored on last sector. */ 2346 buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize, 2347 &error); 2348 g_topology_lock(); 2349 g_access(cp, -1, 0, 0); 2350 if (buf == NULL) { 2351 G_MIRROR_DEBUG(1, "Cannot read metadata from %s (error=%d).", 2352 cp->provider->name, error); 2353 return (error); 2354 } 2355 2356 /* Decode metadata. */ 2357 error = mirror_metadata_decode(buf, md); 2358 g_free(buf); 2359 if (strcmp(md->md_magic, G_MIRROR_MAGIC) != 0) 2360 return (EINVAL); 2361 if (md->md_version > G_MIRROR_VERSION) { 2362 G_MIRROR_DEBUG(0, 2363 "Kernel module is too old to handle metadata from %s.", 2364 cp->provider->name); 2365 return (EINVAL); 2366 } 2367 if (error != 0) { 2368 G_MIRROR_DEBUG(1, "MD5 metadata hash mismatch for provider %s.", 2369 cp->provider->name); 2370 return (error); 2371 } 2372 2373 return (0); 2374} 2375 2376static int 2377g_mirror_check_metadata(struct g_mirror_softc *sc, struct g_provider *pp, 2378 struct g_mirror_metadata *md) 2379{ 2380 2381 if (g_mirror_id2disk(sc, md->md_did) != NULL) { 2382 G_MIRROR_DEBUG(1, "Disk %s (id=%u) already exists, skipping.", 2383 pp->name, md->md_did); 2384 return (EEXIST); 2385 } 2386 if (md->md_all != sc->sc_ndisks) { 2387 G_MIRROR_DEBUG(1, 2388 "Invalid '%s' field on disk %s (device %s), skipping.", 2389 "md_all", pp->name, sc->sc_name); 2390 return (EINVAL); 2391 } 2392 if (md->md_slice != sc->sc_slice) { 2393 G_MIRROR_DEBUG(1, 2394 "Invalid '%s' field on disk %s (device %s), skipping.", 2395 "md_slice", pp->name, sc->sc_name); 2396 return (EINVAL); 2397 } 2398 if (md->md_balance != sc->sc_balance) { 2399 G_MIRROR_DEBUG(1, 2400 "Invalid '%s' field on disk %s (device %s), skipping.", 2401 "md_balance", pp->name, sc->sc_name); 2402 return (EINVAL); 2403 } 2404 if (md->md_mediasize != sc->sc_mediasize) { 2405 G_MIRROR_DEBUG(1, 2406 "Invalid '%s' field on disk %s (device %s), skipping.", 2407 "md_mediasize", pp->name, sc->sc_name); 2408 return (EINVAL); 2409 } 2410 if (sc->sc_mediasize > pp->mediasize) { 2411 G_MIRROR_DEBUG(1, 2412 "Invalid size of disk %s (device %s), skipping.", pp->name, 2413 sc->sc_name); 2414 return (EINVAL); 2415 } 2416 if (md->md_sectorsize != sc->sc_sectorsize) { 2417 G_MIRROR_DEBUG(1, 2418 "Invalid '%s' field on disk %s (device %s), skipping.", 2419 "md_sectorsize", pp->name, sc->sc_name); 2420 return (EINVAL); 2421 } 2422 if ((sc->sc_sectorsize % pp->sectorsize) != 0) { 2423 G_MIRROR_DEBUG(1, 2424 "Invalid sector size of disk %s (device %s), skipping.", 2425 pp->name, sc->sc_name); 2426 return (EINVAL); 2427 } 2428 if ((md->md_mflags & ~G_MIRROR_DEVICE_FLAG_MASK) != 0) { 2429 G_MIRROR_DEBUG(1, 2430 "Invalid device flags on disk %s (device %s), skipping.", 2431 pp->name, sc->sc_name); 2432 return (EINVAL); 2433 } 2434 if ((md->md_dflags & ~G_MIRROR_DISK_FLAG_MASK) != 0) { 2435 G_MIRROR_DEBUG(1, 2436 "Invalid disk flags on disk %s (device %s), skipping.", 2437 pp->name, sc->sc_name); 2438 return (EINVAL); 2439 } 2440 return (0); 2441} 2442 2443int 2444g_mirror_add_disk(struct g_mirror_softc *sc, struct g_provider *pp, 2445 struct g_mirror_metadata *md) 2446{ 2447 struct g_mirror_disk *disk; 2448 int error; 2449 2450 g_topology_assert(); 2451 G_MIRROR_DEBUG(2, "Adding disk %s.", pp->name); 2452 2453 error = g_mirror_check_metadata(sc, pp, md); 2454 if (error != 0) 2455 return (error); 2456 if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING && 2457 md->md_genid < sc->sc_genid) { 2458 G_MIRROR_DEBUG(0, "Component %s (device %s) broken, skipping.", 2459 pp->name, sc->sc_name); 2460 return (EINVAL); 2461 } 2462 disk = g_mirror_init_disk(sc, pp, md, &error); 2463 if (disk == NULL) 2464 return (error); 2465 error = g_mirror_event_send(disk, G_MIRROR_DISK_STATE_NEW, 2466 G_MIRROR_EVENT_WAIT); 2467 if (error != 0) 2468 return (error); 2469 if (md->md_version < G_MIRROR_VERSION) { 2470 G_MIRROR_DEBUG(0, "Upgrading metadata on %s (v%d->v%d).", 2471 pp->name, md->md_version, G_MIRROR_VERSION); 2472 g_mirror_update_metadata(disk); 2473 } 2474 return (0); 2475} 2476 2477static int 2478g_mirror_access(struct g_provider *pp, int acr, int acw, int ace) 2479{ 2480 struct g_mirror_softc *sc; 2481 int dcr, dcw, dce; 2482 2483 g_topology_assert(); 2484 G_MIRROR_DEBUG(2, "Access request for %s: r%dw%de%d.", pp->name, acr, 2485 acw, ace); 2486 2487 dcr = pp->acr + acr; 2488 dcw = pp->acw + acw; 2489 dce = pp->ace + ace; 2490 2491 sc = pp->geom->softc; 2492 if (sc == NULL || LIST_EMPTY(&sc->sc_disks) || 2493 (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) { 2494 if (acr <= 0 && acw <= 0 && ace <= 0) 2495 return (0); 2496 else 2497 return (ENXIO); 2498 } 2499 if (dcw == 0 && !sc->sc_idle) 2500 g_mirror_idle(sc, 1); 2501 return (0); 2502} 2503 2504static struct g_geom * 2505g_mirror_create(struct g_class *mp, const struct g_mirror_metadata *md) 2506{ 2507 struct g_mirror_softc *sc; 2508 struct g_geom *gp; 2509 int error, timeout; 2510 2511 g_topology_assert(); 2512 G_MIRROR_DEBUG(1, "Creating device %s (id=%u).", md->md_name, 2513 md->md_mid); 2514 2515 /* One disk is minimum. */ 2516 if (md->md_all < 1) 2517 return (NULL); 2518 /* 2519 * Action geom. 2520 */ 2521 gp = g_new_geomf(mp, "%s", md->md_name); 2522 sc = malloc(sizeof(*sc), M_MIRROR, M_WAITOK | M_ZERO); 2523 gp->start = g_mirror_start; 2524 gp->orphan = g_mirror_orphan; 2525 gp->access = g_mirror_access; 2526 gp->dumpconf = g_mirror_dumpconf; 2527 2528 sc->sc_id = md->md_mid; 2529 sc->sc_slice = md->md_slice; 2530 sc->sc_balance = md->md_balance; 2531 sc->sc_mediasize = md->md_mediasize; 2532 sc->sc_sectorsize = md->md_sectorsize; 2533 sc->sc_ndisks = md->md_all; 2534 sc->sc_flags = md->md_mflags; 2535 sc->sc_bump_id = 0; 2536 sc->sc_idle = 1; 2537 sc->sc_last_write = time_second; 2538 sc->sc_writes = 0; 2539 bioq_init(&sc->sc_queue); 2540 mtx_init(&sc->sc_queue_mtx, "gmirror:queue", NULL, MTX_DEF); 2541 LIST_INIT(&sc->sc_disks); 2542 TAILQ_INIT(&sc->sc_events); 2543 mtx_init(&sc->sc_events_mtx, "gmirror:events", NULL, MTX_DEF); 2544 callout_init(&sc->sc_callout, CALLOUT_MPSAFE); 2545 sc->sc_state = G_MIRROR_DEVICE_STATE_STARTING; 2546 gp->softc = sc; 2547 sc->sc_geom = gp; 2548 sc->sc_provider = NULL; 2549 /* 2550 * Synchronization geom. 2551 */ 2552 gp = g_new_geomf(mp, "%s.sync", md->md_name); 2553 gp->softc = sc; 2554 gp->orphan = g_mirror_orphan; 2555 sc->sc_sync.ds_geom = gp; 2556 sc->sc_sync.ds_ndisks = 0; 2557 error = kthread_create(g_mirror_worker, sc, &sc->sc_worker, 0, 0, 2558 "g_mirror %s", md->md_name); 2559 if (error != 0) { 2560 G_MIRROR_DEBUG(1, "Cannot create kernel thread for %s.", 2561 sc->sc_name); 2562 g_destroy_geom(sc->sc_sync.ds_geom); 2563 mtx_destroy(&sc->sc_events_mtx); 2564 mtx_destroy(&sc->sc_queue_mtx); 2565 g_destroy_geom(sc->sc_geom); 2566 free(sc, M_MIRROR); 2567 return (NULL); 2568 } 2569 2570 G_MIRROR_DEBUG(0, "Device %s created (id=%u).", sc->sc_name, sc->sc_id); 2571 2572 sc->sc_rootmount = root_mount_hold("GMIRROR"); 2573 G_MIRROR_DEBUG(1, "root_mount_hold %p", sc->sc_rootmount); 2574 /* 2575 * Run timeout. 2576 */ 2577 timeout = g_mirror_timeout * hz; 2578 callout_reset(&sc->sc_callout, timeout, g_mirror_go, sc); 2579 return (sc->sc_geom); 2580} 2581 2582int 2583g_mirror_destroy(struct g_mirror_softc *sc, boolean_t force) 2584{ 2585 struct g_provider *pp; 2586 2587 g_topology_assert(); 2588 2589 if (sc == NULL) 2590 return (ENXIO); 2591 pp = sc->sc_provider; 2592 if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) { 2593 if (force) { 2594 G_MIRROR_DEBUG(1, "Device %s is still open, so it " 2595 "can't be definitely removed.", pp->name); 2596 } else { 2597 G_MIRROR_DEBUG(1, 2598 "Device %s is still open (r%dw%de%d).", pp->name, 2599 pp->acr, pp->acw, pp->ace); 2600 return (EBUSY); 2601 } 2602 } 2603 2604 sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY; 2605 sc->sc_flags |= G_MIRROR_DEVICE_FLAG_WAIT; 2606 g_topology_unlock(); 2607 G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc); 2608 mtx_lock(&sc->sc_queue_mtx); 2609 wakeup(sc); 2610 mtx_unlock(&sc->sc_queue_mtx); 2611 G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, &sc->sc_worker); 2612 while (sc->sc_worker != NULL) 2613 tsleep(&sc->sc_worker, PRIBIO, "m:destroy", hz / 5); 2614 G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, &sc->sc_worker); 2615 g_topology_lock(); 2616 g_mirror_destroy_device(sc); 2617 free(sc, M_MIRROR); 2618 return (0); 2619} 2620 2621static void 2622g_mirror_taste_orphan(struct g_consumer *cp) 2623{ 2624 2625 KASSERT(1 == 0, ("%s called while tasting %s.", __func__, 2626 cp->provider->name)); 2627} 2628 2629static struct g_geom * 2630g_mirror_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) 2631{ 2632 struct g_mirror_metadata md; 2633 struct g_mirror_softc *sc; 2634 struct g_consumer *cp; 2635 struct g_geom *gp; 2636 int error; 2637 2638 g_topology_assert(); 2639 g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name); 2640 G_MIRROR_DEBUG(2, "Tasting %s.", pp->name); 2641 2642 gp = g_new_geomf(mp, "mirror:taste"); 2643 /* 2644 * This orphan function should be never called. 2645 */ 2646 gp->orphan = g_mirror_taste_orphan; 2647 cp = g_new_consumer(gp); 2648 g_attach(cp, pp); 2649 error = g_mirror_read_metadata(cp, &md); 2650 g_detach(cp); 2651 g_destroy_consumer(cp); 2652 g_destroy_geom(gp); 2653 if (error != 0) 2654 return (NULL); 2655 gp = NULL; 2656 2657 if (md.md_provider[0] != '\0' && strcmp(md.md_provider, pp->name) != 0) 2658 return (NULL); 2659 if (md.md_provsize != 0 && md.md_provsize != pp->mediasize) 2660 return (NULL); 2661 if ((md.md_dflags & G_MIRROR_DISK_FLAG_INACTIVE) != 0) { 2662 G_MIRROR_DEBUG(0, 2663 "Device %s: provider %s marked as inactive, skipping.", 2664 md.md_name, pp->name); 2665 return (NULL); 2666 } 2667 if (g_mirror_debug >= 2) 2668 mirror_metadata_dump(&md); 2669 2670 /* 2671 * Let's check if device already exists. 2672 */ 2673 sc = NULL; 2674 LIST_FOREACH(gp, &mp->geom, geom) { 2675 sc = gp->softc; 2676 if (sc == NULL) 2677 continue; 2678 if (sc->sc_sync.ds_geom == gp) 2679 continue; 2680 if (strcmp(md.md_name, sc->sc_name) != 0) 2681 continue; 2682 if (md.md_mid != sc->sc_id) { 2683 G_MIRROR_DEBUG(0, "Device %s already configured.", 2684 sc->sc_name); 2685 return (NULL); 2686 } 2687 break; 2688 } 2689 if (gp == NULL) { 2690 gp = g_mirror_create(mp, &md); 2691 if (gp == NULL) { 2692 G_MIRROR_DEBUG(0, "Cannot create device %s.", 2693 md.md_name); 2694 return (NULL); 2695 } 2696 sc = gp->softc; 2697 } 2698 G_MIRROR_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name); 2699 error = g_mirror_add_disk(sc, pp, &md); 2700 if (error != 0) { 2701 G_MIRROR_DEBUG(0, "Cannot add disk %s to %s (error=%d).", 2702 pp->name, gp->name, error); 2703 if (LIST_EMPTY(&sc->sc_disks)) 2704 g_mirror_destroy(sc, 1); 2705 return (NULL); 2706 } 2707 return (gp); 2708} 2709 2710static int 2711g_mirror_destroy_geom(struct gctl_req *req __unused, 2712 struct g_class *mp __unused, struct g_geom *gp) 2713{ 2714 2715 return (g_mirror_destroy(gp->softc, 0)); 2716} 2717 2718static void 2719g_mirror_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, 2720 struct g_consumer *cp, struct g_provider *pp) 2721{ 2722 struct g_mirror_softc *sc; 2723 2724 g_topology_assert(); 2725 2726 sc = gp->softc; 2727 if (sc == NULL) 2728 return; 2729 /* Skip synchronization geom. */ 2730 if (gp == sc->sc_sync.ds_geom) 2731 return; 2732 if (pp != NULL) { 2733 /* Nothing here. */ 2734 } else if (cp != NULL) { 2735 struct g_mirror_disk *disk; 2736 2737 disk = cp->private; 2738 if (disk == NULL) 2739 return; 2740 sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)disk->d_id); 2741 if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) { 2742 sbuf_printf(sb, "%s<Synchronized>", indent); 2743 if (disk->d_sync.ds_offset_done == 0) 2744 sbuf_printf(sb, "0%%"); 2745 else { 2746 sbuf_printf(sb, "%u%%", 2747 (u_int)((disk->d_sync.ds_offset_done * 100) / 2748 sc->sc_provider->mediasize)); 2749 } 2750 sbuf_printf(sb, "</Synchronized>\n"); 2751 } 2752 sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, 2753 disk->d_sync.ds_syncid); 2754 sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent, 2755 disk->d_genid); 2756 sbuf_printf(sb, "%s<Flags>", indent); 2757 if (disk->d_flags == 0) 2758 sbuf_printf(sb, "NONE"); 2759 else { 2760 int first = 1; 2761 2762#define ADD_FLAG(flag, name) do { \ 2763 if ((disk->d_flags & (flag)) != 0) { \ 2764 if (!first) \ 2765 sbuf_printf(sb, ", "); \ 2766 else \ 2767 first = 0; \ 2768 sbuf_printf(sb, name); \ 2769 } \ 2770} while (0) 2771 ADD_FLAG(G_MIRROR_DISK_FLAG_DIRTY, "DIRTY"); 2772 ADD_FLAG(G_MIRROR_DISK_FLAG_HARDCODED, "HARDCODED"); 2773 ADD_FLAG(G_MIRROR_DISK_FLAG_INACTIVE, "INACTIVE"); 2774 ADD_FLAG(G_MIRROR_DISK_FLAG_SYNCHRONIZING, 2775 "SYNCHRONIZING"); 2776 ADD_FLAG(G_MIRROR_DISK_FLAG_FORCE_SYNC, "FORCE_SYNC"); 2777 ADD_FLAG(G_MIRROR_DISK_FLAG_BROKEN, "BROKEN"); 2778#undef ADD_FLAG 2779 } 2780 sbuf_printf(sb, "</Flags>\n"); 2781 sbuf_printf(sb, "%s<Priority>%u</Priority>\n", indent, 2782 disk->d_priority); 2783 sbuf_printf(sb, "%s<State>%s</State>\n", indent, 2784 g_mirror_disk_state2str(disk->d_state)); 2785 } else { 2786 sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id); 2787 sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, sc->sc_syncid); 2788 sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent, sc->sc_genid); 2789 sbuf_printf(sb, "%s<Flags>", indent); 2790 if (sc->sc_flags == 0) 2791 sbuf_printf(sb, "NONE"); 2792 else { 2793 int first = 1; 2794 2795#define ADD_FLAG(flag, name) do { \ 2796 if ((sc->sc_flags & (flag)) != 0) { \ 2797 if (!first) \ 2798 sbuf_printf(sb, ", "); \ 2799 else \ 2800 first = 0; \ 2801 sbuf_printf(sb, name); \ 2802 } \ 2803} while (0) 2804 ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOAUTOSYNC, "NOAUTOSYNC"); 2805#undef ADD_FLAG 2806 } 2807 sbuf_printf(sb, "</Flags>\n"); 2808 sbuf_printf(sb, "%s<Slice>%u</Slice>\n", indent, 2809 (u_int)sc->sc_slice); 2810 sbuf_printf(sb, "%s<Balance>%s</Balance>\n", indent, 2811 balance_name(sc->sc_balance)); 2812 sbuf_printf(sb, "%s<Components>%u</Components>\n", indent, 2813 sc->sc_ndisks); 2814 sbuf_printf(sb, "%s<State>", indent); 2815 if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING) 2816 sbuf_printf(sb, "%s", "STARTING"); 2817 else if (sc->sc_ndisks == 2818 g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE)) 2819 sbuf_printf(sb, "%s", "COMPLETE"); 2820 else 2821 sbuf_printf(sb, "%s", "DEGRADED"); 2822 sbuf_printf(sb, "</State>\n"); 2823 } 2824} 2825 2826static void 2827g_mirror_shutdown(void *arg, int howto) 2828{ 2829 struct g_class *mp; 2830 struct g_geom *gp, *gp2; 2831 2832 mp = arg; 2833 DROP_GIANT(); 2834 g_topology_lock(); 2835 LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) { 2836 if (gp->softc == NULL) 2837 continue; 2838 g_mirror_destroy(gp->softc, 1); 2839 } 2840 g_topology_unlock(); 2841 PICKUP_GIANT(); 2842#if 0 2843 tsleep(&gp, PRIBIO, "m:shutdown", hz * 20); 2844#endif 2845} 2846 2847static void 2848g_mirror_init(struct g_class *mp) 2849{ 2850 2851 g_mirror_ehtag = EVENTHANDLER_REGISTER(shutdown_post_sync, 2852 g_mirror_shutdown, mp, SHUTDOWN_PRI_FIRST); 2853 if (g_mirror_ehtag == NULL) 2854 G_MIRROR_DEBUG(0, "Warning! Cannot register shutdown event."); 2855} 2856 2857static void 2858g_mirror_fini(struct g_class *mp) 2859{ 2860 2861 if (g_mirror_ehtag == NULL) 2862 return; 2863 EVENTHANDLER_DEREGISTER(shutdown_post_sync, g_mirror_ehtag); 2864} 2865 2866DECLARE_GEOM_CLASS(g_mirror_class, g_mirror); 2867