1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2004-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD$");
31
32#include <sys/param.h>
33#include <sys/systm.h>
34#include <sys/bio.h>
35#include <sys/eventhandler.h>
36#include <sys/fail.h>
37#include <sys/kernel.h>
38#include <sys/kthread.h>
39#include <sys/limits.h>
40#include <sys/lock.h>
41#include <sys/malloc.h>
42#include <sys/mutex.h>
43#include <sys/proc.h>
44#include <sys/sbuf.h>
45#include <sys/sched.h>
46#include <sys/sx.h>
47#include <sys/sysctl.h>
48
49#include <geom/geom.h>
50#include <geom/geom_dbg.h>
51#include <geom/mirror/g_mirror.h>
52
53FEATURE(geom_mirror, "GEOM mirroring support");
54
55static MALLOC_DEFINE(M_MIRROR, "mirror_data", "GEOM_MIRROR Data");
56
57SYSCTL_DECL(_kern_geom);
58static SYSCTL_NODE(_kern_geom, OID_AUTO, mirror, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
59    "GEOM_MIRROR stuff");
60int g_mirror_debug = 0;
61SYSCTL_INT(_kern_geom_mirror, OID_AUTO, debug, CTLFLAG_RWTUN, &g_mirror_debug, 0,
62    "Debug level");
63bool g_launch_mirror_before_timeout = true;
64SYSCTL_BOOL(_kern_geom_mirror, OID_AUTO, launch_mirror_before_timeout,
65    CTLFLAG_RWTUN, &g_launch_mirror_before_timeout, 0,
66    "If false, force gmirror to wait out the full kern.geom.mirror.timeout "
67    "before launching mirrors");
68static u_int g_mirror_timeout = 4;
69SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, timeout, CTLFLAG_RWTUN, &g_mirror_timeout,
70    0, "Time to wait on all mirror components");
71static u_int g_mirror_idletime = 5;
72SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, idletime, CTLFLAG_RWTUN,
73    &g_mirror_idletime, 0, "Mark components as clean when idling");
74static u_int g_mirror_disconnect_on_failure = 1;
75SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, disconnect_on_failure, CTLFLAG_RWTUN,
76    &g_mirror_disconnect_on_failure, 0, "Disconnect component on I/O failure.");
77static u_int g_mirror_syncreqs = 2;
78SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, sync_requests, CTLFLAG_RDTUN,
79    &g_mirror_syncreqs, 0, "Parallel synchronization I/O requests.");
80static u_int g_mirror_sync_period = 5;
81SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, sync_update_period, CTLFLAG_RWTUN,
82    &g_mirror_sync_period, 0,
83    "Metadata update period during synchronization, in seconds");
84
85#define	MSLEEP(ident, mtx, priority, wmesg, timeout)	do {		\
86	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, (ident));	\
87	msleep((ident), (mtx), (priority), (wmesg), (timeout));		\
88	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, (ident));	\
89} while (0)
90
91static eventhandler_tag g_mirror_post_sync = NULL;
92static int g_mirror_shutdown = 0;
93
94static g_ctl_destroy_geom_t g_mirror_destroy_geom;
95static g_taste_t g_mirror_taste;
96static g_init_t g_mirror_init;
97static g_fini_t g_mirror_fini;
98static g_provgone_t g_mirror_providergone;
99static g_resize_t g_mirror_resize;
100
101struct g_class g_mirror_class = {
102	.name = G_MIRROR_CLASS_NAME,
103	.version = G_VERSION,
104	.ctlreq = g_mirror_config,
105	.taste = g_mirror_taste,
106	.destroy_geom = g_mirror_destroy_geom,
107	.init = g_mirror_init,
108	.fini = g_mirror_fini,
109	.providergone = g_mirror_providergone,
110	.resize = g_mirror_resize
111};
112
113static void g_mirror_destroy_provider(struct g_mirror_softc *sc);
114static int g_mirror_update_disk(struct g_mirror_disk *disk, u_int state);
115static void g_mirror_update_device(struct g_mirror_softc *sc, bool force);
116static void g_mirror_dumpconf(struct sbuf *sb, const char *indent,
117    struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp);
118static void g_mirror_timeout_drain(struct g_mirror_softc *sc);
119static int g_mirror_refresh_device(struct g_mirror_softc *sc,
120    const struct g_provider *pp, const struct g_mirror_metadata *md);
121static void g_mirror_sync_reinit(const struct g_mirror_disk *disk,
122    struct bio *bp, off_t offset);
123static void g_mirror_sync_stop(struct g_mirror_disk *disk, int type);
124static void g_mirror_register_request(struct g_mirror_softc *sc,
125    struct bio *bp);
126static void g_mirror_sync_release(struct g_mirror_softc *sc);
127
128static const char *
129g_mirror_disk_state2str(int state)
130{
131
132	switch (state) {
133	case G_MIRROR_DISK_STATE_NONE:
134		return ("NONE");
135	case G_MIRROR_DISK_STATE_NEW:
136		return ("NEW");
137	case G_MIRROR_DISK_STATE_ACTIVE:
138		return ("ACTIVE");
139	case G_MIRROR_DISK_STATE_STALE:
140		return ("STALE");
141	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
142		return ("SYNCHRONIZING");
143	case G_MIRROR_DISK_STATE_DISCONNECTED:
144		return ("DISCONNECTED");
145	case G_MIRROR_DISK_STATE_DESTROY:
146		return ("DESTROY");
147	default:
148		return ("INVALID");
149	}
150}
151
152static const char *
153g_mirror_device_state2str(int state)
154{
155
156	switch (state) {
157	case G_MIRROR_DEVICE_STATE_STARTING:
158		return ("STARTING");
159	case G_MIRROR_DEVICE_STATE_RUNNING:
160		return ("RUNNING");
161	default:
162		return ("INVALID");
163	}
164}
165
166static const char *
167g_mirror_get_diskname(struct g_mirror_disk *disk)
168{
169
170	if (disk->d_consumer == NULL || disk->d_consumer->provider == NULL)
171		return ("[unknown]");
172	return (disk->d_name);
173}
174
175/*
176 * --- Events handling functions ---
177 * Events in geom_mirror are used to maintain disks and device status
178 * from one thread to simplify locking.
179 */
180static void
181g_mirror_event_free(struct g_mirror_event *ep)
182{
183
184	free(ep, M_MIRROR);
185}
186
187static int
188g_mirror_event_dispatch(struct g_mirror_event *ep, void *arg, int state,
189    int flags)
190{
191	struct g_mirror_softc *sc;
192	struct g_mirror_disk *disk;
193	int error;
194
195	G_MIRROR_DEBUG(4, "%s: Sending event %p.", __func__, ep);
196	if ((flags & G_MIRROR_EVENT_DEVICE) != 0) {
197		disk = NULL;
198		sc = arg;
199	} else {
200		disk = arg;
201		sc = disk->d_softc;
202	}
203	ep->e_disk = disk;
204	ep->e_state = state;
205	ep->e_flags = flags;
206	ep->e_error = 0;
207	mtx_lock(&sc->sc_events_mtx);
208	TAILQ_INSERT_TAIL(&sc->sc_events, ep, e_next);
209	mtx_unlock(&sc->sc_events_mtx);
210	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
211	mtx_lock(&sc->sc_queue_mtx);
212	wakeup(sc);
213	mtx_unlock(&sc->sc_queue_mtx);
214	if ((flags & G_MIRROR_EVENT_DONTWAIT) != 0)
215		return (0);
216	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, ep);
217	sx_xunlock(&sc->sc_lock);
218	while ((ep->e_flags & G_MIRROR_EVENT_DONE) == 0) {
219		mtx_lock(&sc->sc_events_mtx);
220		MSLEEP(ep, &sc->sc_events_mtx, PRIBIO | PDROP, "m:event",
221		    hz * 5);
222	}
223	error = ep->e_error;
224	g_mirror_event_free(ep);
225	sx_xlock(&sc->sc_lock);
226	return (error);
227}
228
229int
230g_mirror_event_send(void *arg, int state, int flags)
231{
232	struct g_mirror_event *ep;
233
234	ep = malloc(sizeof(*ep), M_MIRROR, M_WAITOK);
235	return (g_mirror_event_dispatch(ep, arg, state, flags));
236}
237
238static struct g_mirror_event *
239g_mirror_event_first(struct g_mirror_softc *sc)
240{
241	struct g_mirror_event *ep;
242
243	mtx_lock(&sc->sc_events_mtx);
244	ep = TAILQ_FIRST(&sc->sc_events);
245	mtx_unlock(&sc->sc_events_mtx);
246	return (ep);
247}
248
249static void
250g_mirror_event_remove(struct g_mirror_softc *sc, struct g_mirror_event *ep)
251{
252
253	mtx_lock(&sc->sc_events_mtx);
254	TAILQ_REMOVE(&sc->sc_events, ep, e_next);
255	mtx_unlock(&sc->sc_events_mtx);
256}
257
258static void
259g_mirror_event_cancel(struct g_mirror_disk *disk)
260{
261	struct g_mirror_softc *sc;
262	struct g_mirror_event *ep, *tmpep;
263
264	sc = disk->d_softc;
265	sx_assert(&sc->sc_lock, SX_XLOCKED);
266
267	mtx_lock(&sc->sc_events_mtx);
268	TAILQ_FOREACH_SAFE(ep, &sc->sc_events, e_next, tmpep) {
269		if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0)
270			continue;
271		if (ep->e_disk != disk)
272			continue;
273		TAILQ_REMOVE(&sc->sc_events, ep, e_next);
274		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
275			g_mirror_event_free(ep);
276		else {
277			ep->e_error = ECANCELED;
278			wakeup(ep);
279		}
280	}
281	mtx_unlock(&sc->sc_events_mtx);
282}
283
284/*
285 * Return the number of disks in given state.
286 * If state is equal to -1, count all connected disks.
287 */
288u_int
289g_mirror_ndisks(struct g_mirror_softc *sc, int state)
290{
291	struct g_mirror_disk *disk;
292	u_int n = 0;
293
294	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
295		if (state == -1 || disk->d_state == state)
296			n++;
297	}
298	return (n);
299}
300
301/*
302 * Find a disk in mirror by its disk ID.
303 */
304static struct g_mirror_disk *
305g_mirror_id2disk(struct g_mirror_softc *sc, uint32_t id)
306{
307	struct g_mirror_disk *disk;
308
309	sx_assert(&sc->sc_lock, SX_XLOCKED);
310
311	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
312		if (disk->d_id == id)
313			return (disk);
314	}
315	return (NULL);
316}
317
318static u_int
319g_mirror_nrequests(struct g_mirror_softc *sc, struct g_consumer *cp)
320{
321	struct bio *bp;
322	u_int nreqs = 0;
323
324	mtx_lock(&sc->sc_queue_mtx);
325	TAILQ_FOREACH(bp, &sc->sc_queue, bio_queue) {
326		if (bp->bio_from == cp)
327			nreqs++;
328	}
329	mtx_unlock(&sc->sc_queue_mtx);
330	return (nreqs);
331}
332
333static int
334g_mirror_is_busy(struct g_mirror_softc *sc, struct g_consumer *cp)
335{
336
337	if (cp->index > 0) {
338		G_MIRROR_DEBUG(2,
339		    "I/O requests for %s exist, can't destroy it now.",
340		    cp->provider->name);
341		return (1);
342	}
343	if (g_mirror_nrequests(sc, cp) > 0) {
344		G_MIRROR_DEBUG(2,
345		    "I/O requests for %s in queue, can't destroy it now.",
346		    cp->provider->name);
347		return (1);
348	}
349	return (0);
350}
351
352static void
353g_mirror_destroy_consumer(void *arg, int flags __unused)
354{
355	struct g_consumer *cp;
356
357	g_topology_assert();
358
359	cp = arg;
360	G_MIRROR_DEBUG(1, "Consumer %s destroyed.", cp->provider->name);
361	g_detach(cp);
362	g_destroy_consumer(cp);
363}
364
365static void
366g_mirror_kill_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
367{
368	struct g_provider *pp;
369	int retaste_wait;
370
371	g_topology_assert();
372
373	cp->private = NULL;
374	if (g_mirror_is_busy(sc, cp))
375		return;
376	pp = cp->provider;
377	retaste_wait = 0;
378	if (cp->acw == 1) {
379		if ((pp->geom->flags & G_GEOM_WITHER) == 0)
380			retaste_wait = 1;
381	}
382	G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", pp->name, -cp->acr,
383	    -cp->acw, -cp->ace, 0);
384	if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
385		g_access(cp, -cp->acr, -cp->acw, -cp->ace);
386	if (retaste_wait) {
387		/*
388		 * After retaste event was send (inside g_access()), we can send
389		 * event to detach and destroy consumer.
390		 * A class, which has consumer to the given provider connected
391		 * will not receive retaste event for the provider.
392		 * This is the way how I ignore retaste events when I close
393		 * consumers opened for write: I detach and destroy consumer
394		 * after retaste event is sent.
395		 */
396		g_post_event(g_mirror_destroy_consumer, cp, M_WAITOK, NULL);
397		return;
398	}
399	G_MIRROR_DEBUG(1, "Consumer %s destroyed.", pp->name);
400	g_detach(cp);
401	g_destroy_consumer(cp);
402}
403
404static int
405g_mirror_connect_disk(struct g_mirror_disk *disk, struct g_provider *pp)
406{
407	struct g_consumer *cp;
408	int error;
409
410	g_topology_assert_not();
411	KASSERT(disk->d_consumer == NULL,
412	    ("Disk already connected (device %s).", disk->d_softc->sc_name));
413
414	g_topology_lock();
415	cp = g_new_consumer(disk->d_softc->sc_geom);
416	cp->flags |= G_CF_DIRECT_RECEIVE;
417	error = g_attach(cp, pp);
418	if (error != 0) {
419		g_destroy_consumer(cp);
420		g_topology_unlock();
421		return (error);
422	}
423	error = g_access(cp, 1, 1, 1);
424	if (error != 0) {
425		g_detach(cp);
426		g_destroy_consumer(cp);
427		g_topology_unlock();
428		G_MIRROR_DEBUG(0, "Cannot open consumer %s (error=%d).",
429		    pp->name, error);
430		return (error);
431	}
432	g_topology_unlock();
433	disk->d_consumer = cp;
434	disk->d_consumer->private = disk;
435	disk->d_consumer->index = 0;
436
437	G_MIRROR_DEBUG(2, "Disk %s connected.", g_mirror_get_diskname(disk));
438	return (0);
439}
440
441static void
442g_mirror_disconnect_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
443{
444
445	g_topology_assert();
446
447	if (cp == NULL)
448		return;
449	if (cp->provider != NULL)
450		g_mirror_kill_consumer(sc, cp);
451	else
452		g_destroy_consumer(cp);
453}
454
455/*
456 * Initialize disk. This means allocate memory, create consumer, attach it
457 * to the provider and open access (r1w1e1) to it.
458 */
459static struct g_mirror_disk *
460g_mirror_init_disk(struct g_mirror_softc *sc, struct g_provider *pp,
461    struct g_mirror_metadata *md, int *errorp)
462{
463	struct g_mirror_disk *disk;
464	int i, error;
465
466	disk = malloc(sizeof(*disk), M_MIRROR, M_NOWAIT | M_ZERO);
467	if (disk == NULL) {
468		error = ENOMEM;
469		goto fail;
470	}
471	disk->d_softc = sc;
472	error = g_mirror_connect_disk(disk, pp);
473	if (error != 0)
474		goto fail;
475	disk->d_id = md->md_did;
476	disk->d_state = G_MIRROR_DISK_STATE_NONE;
477	disk->d_priority = md->md_priority;
478	disk->d_flags = md->md_dflags;
479	error = g_getattr("GEOM::candelete", disk->d_consumer, &i);
480	if (error == 0 && i != 0)
481		disk->d_flags |= G_MIRROR_DISK_FLAG_CANDELETE;
482	if (md->md_provider[0] != '\0')
483		disk->d_flags |= G_MIRROR_DISK_FLAG_HARDCODED;
484	disk->d_sync.ds_consumer = NULL;
485	disk->d_sync.ds_offset = md->md_sync_offset;
486	disk->d_sync.ds_offset_done = md->md_sync_offset;
487	disk->d_sync.ds_update_ts = time_uptime;
488	disk->d_genid = md->md_genid;
489	disk->d_sync.ds_syncid = md->md_syncid;
490	disk->d_init_ndisks = md->md_all;
491	disk->d_init_slice = md->md_slice;
492	disk->d_init_balance = md->md_balance;
493	disk->d_init_mediasize = md->md_mediasize;
494	if (errorp != NULL)
495		*errorp = 0;
496	return (disk);
497fail:
498	if (errorp != NULL)
499		*errorp = error;
500	if (disk != NULL)
501		free(disk, M_MIRROR);
502	return (NULL);
503}
504
505static void
506g_mirror_destroy_disk(struct g_mirror_disk *disk)
507{
508	struct g_mirror_softc *sc;
509
510	g_topology_assert_not();
511	sc = disk->d_softc;
512	sx_assert(&sc->sc_lock, SX_XLOCKED);
513
514	g_topology_lock();
515	LIST_REMOVE(disk, d_next);
516	g_topology_unlock();
517	g_mirror_event_cancel(disk);
518	if (sc->sc_hint == disk)
519		sc->sc_hint = NULL;
520	switch (disk->d_state) {
521	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
522		g_mirror_sync_stop(disk, 1);
523		/* FALLTHROUGH */
524	case G_MIRROR_DISK_STATE_NEW:
525	case G_MIRROR_DISK_STATE_STALE:
526	case G_MIRROR_DISK_STATE_ACTIVE:
527		g_topology_lock();
528		g_mirror_disconnect_consumer(sc, disk->d_consumer);
529		g_topology_unlock();
530		free(disk, M_MIRROR);
531		break;
532	default:
533		KASSERT(0 == 1, ("Wrong disk state (%s, %s).",
534		    g_mirror_get_diskname(disk),
535		    g_mirror_disk_state2str(disk->d_state)));
536	}
537}
538
539static void
540g_mirror_free_device(struct g_mirror_softc *sc)
541{
542
543	g_topology_assert();
544
545	mtx_destroy(&sc->sc_queue_mtx);
546	mtx_destroy(&sc->sc_events_mtx);
547	mtx_destroy(&sc->sc_done_mtx);
548	sx_destroy(&sc->sc_lock);
549	free(sc, M_MIRROR);
550}
551
552static void
553g_mirror_providergone(struct g_provider *pp)
554{
555	struct g_mirror_softc *sc = pp->private;
556
557	if ((--sc->sc_refcnt) == 0)
558		g_mirror_free_device(sc);
559}
560
561static void
562g_mirror_destroy_device(struct g_mirror_softc *sc)
563{
564	struct g_mirror_disk *disk;
565	struct g_mirror_event *ep;
566	struct g_geom *gp;
567	struct g_consumer *cp, *tmpcp;
568
569	g_topology_assert_not();
570	sx_assert(&sc->sc_lock, SX_XLOCKED);
571
572	gp = sc->sc_geom;
573	if (sc->sc_provider != NULL)
574		g_mirror_destroy_provider(sc);
575	for (disk = LIST_FIRST(&sc->sc_disks); disk != NULL;
576	    disk = LIST_FIRST(&sc->sc_disks)) {
577		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
578		g_mirror_update_metadata(disk);
579		g_mirror_destroy_disk(disk);
580	}
581	while ((ep = g_mirror_event_first(sc)) != NULL) {
582		g_mirror_event_remove(sc, ep);
583		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
584			g_mirror_event_free(ep);
585		else {
586			ep->e_error = ECANCELED;
587			ep->e_flags |= G_MIRROR_EVENT_DONE;
588			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, ep);
589			mtx_lock(&sc->sc_events_mtx);
590			wakeup(ep);
591			mtx_unlock(&sc->sc_events_mtx);
592		}
593	}
594	g_mirror_timeout_drain(sc);
595
596	g_topology_lock();
597	LIST_FOREACH_SAFE(cp, &sc->sc_sync.ds_geom->consumer, consumer, tmpcp) {
598		g_mirror_disconnect_consumer(sc, cp);
599	}
600	g_wither_geom(sc->sc_sync.ds_geom, ENXIO);
601	G_MIRROR_DEBUG(0, "Device %s destroyed.", gp->name);
602	g_wither_geom(gp, ENXIO);
603	sx_xunlock(&sc->sc_lock);
604	if ((--sc->sc_refcnt) == 0)
605		g_mirror_free_device(sc);
606	g_topology_unlock();
607}
608
609static void
610g_mirror_orphan(struct g_consumer *cp)
611{
612	struct g_mirror_disk *disk;
613
614	g_topology_assert();
615
616	disk = cp->private;
617	if (disk == NULL)
618		return;
619	disk->d_softc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
620	g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
621	    G_MIRROR_EVENT_DONTWAIT);
622}
623
624/*
625 * Function should return the next active disk on the list.
626 * It is possible that it will be the same disk as given.
627 * If there are no active disks on list, NULL is returned.
628 */
629static __inline struct g_mirror_disk *
630g_mirror_find_next(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
631{
632	struct g_mirror_disk *dp;
633
634	for (dp = LIST_NEXT(disk, d_next); dp != disk;
635	    dp = LIST_NEXT(dp, d_next)) {
636		if (dp == NULL)
637			dp = LIST_FIRST(&sc->sc_disks);
638		if (dp->d_state == G_MIRROR_DISK_STATE_ACTIVE)
639			break;
640	}
641	if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
642		return (NULL);
643	return (dp);
644}
645
646static struct g_mirror_disk *
647g_mirror_get_disk(struct g_mirror_softc *sc)
648{
649	struct g_mirror_disk *disk;
650
651	if (sc->sc_hint == NULL) {
652		sc->sc_hint = LIST_FIRST(&sc->sc_disks);
653		if (sc->sc_hint == NULL)
654			return (NULL);
655	}
656	disk = sc->sc_hint;
657	if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) {
658		disk = g_mirror_find_next(sc, disk);
659		if (disk == NULL)
660			return (NULL);
661	}
662	sc->sc_hint = g_mirror_find_next(sc, disk);
663	return (disk);
664}
665
666static int
667g_mirror_write_metadata(struct g_mirror_disk *disk,
668    struct g_mirror_metadata *md)
669{
670	struct g_mirror_softc *sc;
671	struct g_consumer *cp;
672	off_t offset, length;
673	u_char *sector;
674	int error = 0;
675
676	g_topology_assert_not();
677	sc = disk->d_softc;
678	sx_assert(&sc->sc_lock, SX_LOCKED);
679
680	cp = disk->d_consumer;
681	KASSERT(cp != NULL, ("NULL consumer (%s).", sc->sc_name));
682	KASSERT(cp->provider != NULL, ("NULL provider (%s).", sc->sc_name));
683	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
684	    ("Consumer %s closed? (r%dw%de%d).", cp->provider->name, cp->acr,
685	    cp->acw, cp->ace));
686	length = cp->provider->sectorsize;
687	offset = cp->provider->mediasize - length;
688	sector = malloc((size_t)length, M_MIRROR, M_WAITOK | M_ZERO);
689	if (md != NULL &&
690	    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_WIPE) == 0) {
691		/*
692		 * Handle the case, when the size of parent provider reduced.
693		 */
694		if (offset < md->md_mediasize)
695			error = ENOSPC;
696		else
697			mirror_metadata_encode(md, sector);
698	}
699	KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_metadata_write, error);
700	if (error == 0)
701		error = g_write_data(cp, offset, sector, length);
702	free(sector, M_MIRROR);
703	if (error != 0) {
704		if ((disk->d_flags & G_MIRROR_DISK_FLAG_BROKEN) == 0) {
705			disk->d_flags |= G_MIRROR_DISK_FLAG_BROKEN;
706			G_MIRROR_DEBUG(0, "Cannot write metadata on %s "
707			    "(device=%s, error=%d).",
708			    g_mirror_get_diskname(disk), sc->sc_name, error);
709		} else {
710			G_MIRROR_DEBUG(1, "Cannot write metadata on %s "
711			    "(device=%s, error=%d).",
712			    g_mirror_get_diskname(disk), sc->sc_name, error);
713		}
714		if (g_mirror_disconnect_on_failure &&
715		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 1) {
716			sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
717			g_mirror_event_send(disk,
718			    G_MIRROR_DISK_STATE_DISCONNECTED,
719			    G_MIRROR_EVENT_DONTWAIT);
720		}
721	}
722	return (error);
723}
724
725static int
726g_mirror_clear_metadata(struct g_mirror_disk *disk)
727{
728	int error;
729
730	g_topology_assert_not();
731	sx_assert(&disk->d_softc->sc_lock, SX_LOCKED);
732
733	if (disk->d_softc->sc_type != G_MIRROR_TYPE_AUTOMATIC)
734		return (0);
735	error = g_mirror_write_metadata(disk, NULL);
736	if (error == 0) {
737		G_MIRROR_DEBUG(2, "Metadata on %s cleared.",
738		    g_mirror_get_diskname(disk));
739	} else {
740		G_MIRROR_DEBUG(0,
741		    "Cannot clear metadata on disk %s (error=%d).",
742		    g_mirror_get_diskname(disk), error);
743	}
744	return (error);
745}
746
747void
748g_mirror_fill_metadata(struct g_mirror_softc *sc, struct g_mirror_disk *disk,
749    struct g_mirror_metadata *md)
750{
751
752	strlcpy(md->md_magic, G_MIRROR_MAGIC, sizeof(md->md_magic));
753	md->md_version = G_MIRROR_VERSION;
754	strlcpy(md->md_name, sc->sc_name, sizeof(md->md_name));
755	md->md_mid = sc->sc_id;
756	md->md_all = sc->sc_ndisks;
757	md->md_slice = sc->sc_slice;
758	md->md_balance = sc->sc_balance;
759	md->md_genid = sc->sc_genid;
760	md->md_mediasize = sc->sc_mediasize;
761	md->md_sectorsize = sc->sc_sectorsize;
762	md->md_mflags = (sc->sc_flags & G_MIRROR_DEVICE_FLAG_MASK);
763	bzero(md->md_provider, sizeof(md->md_provider));
764	if (disk == NULL) {
765		md->md_did = arc4random();
766		md->md_priority = 0;
767		md->md_syncid = 0;
768		md->md_dflags = 0;
769		md->md_sync_offset = 0;
770		md->md_provsize = 0;
771	} else {
772		md->md_did = disk->d_id;
773		md->md_priority = disk->d_priority;
774		md->md_syncid = disk->d_sync.ds_syncid;
775		md->md_dflags = (disk->d_flags & G_MIRROR_DISK_FLAG_MASK);
776		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
777			md->md_sync_offset = disk->d_sync.ds_offset_done;
778		else
779			md->md_sync_offset = 0;
780		if ((disk->d_flags & G_MIRROR_DISK_FLAG_HARDCODED) != 0) {
781			strlcpy(md->md_provider,
782			    disk->d_consumer->provider->name,
783			    sizeof(md->md_provider));
784		}
785		md->md_provsize = disk->d_consumer->provider->mediasize;
786	}
787}
788
789void
790g_mirror_update_metadata(struct g_mirror_disk *disk)
791{
792	struct g_mirror_softc *sc;
793	struct g_mirror_metadata md;
794	int error;
795
796	g_topology_assert_not();
797	sc = disk->d_softc;
798	sx_assert(&sc->sc_lock, SX_LOCKED);
799
800	if (sc->sc_type != G_MIRROR_TYPE_AUTOMATIC)
801		return;
802	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_WIPE) == 0)
803		g_mirror_fill_metadata(sc, disk, &md);
804	error = g_mirror_write_metadata(disk, &md);
805	if (error == 0) {
806		G_MIRROR_DEBUG(2, "Metadata on %s updated.",
807		    g_mirror_get_diskname(disk));
808	} else {
809		G_MIRROR_DEBUG(0,
810		    "Cannot update metadata on disk %s (error=%d).",
811		    g_mirror_get_diskname(disk), error);
812	}
813}
814
815static void
816g_mirror_bump_syncid(struct g_mirror_softc *sc)
817{
818	struct g_mirror_disk *disk;
819
820	g_topology_assert_not();
821	sx_assert(&sc->sc_lock, SX_XLOCKED);
822	KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
823	    ("%s called with no active disks (device=%s).", __func__,
824	    sc->sc_name));
825
826	sc->sc_syncid++;
827	G_MIRROR_DEBUG(1, "Device %s: syncid bumped to %u.", sc->sc_name,
828	    sc->sc_syncid);
829	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
830		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
831		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
832			disk->d_sync.ds_syncid = sc->sc_syncid;
833			g_mirror_update_metadata(disk);
834		}
835	}
836}
837
838static void
839g_mirror_bump_genid(struct g_mirror_softc *sc)
840{
841	struct g_mirror_disk *disk;
842
843	g_topology_assert_not();
844	sx_assert(&sc->sc_lock, SX_XLOCKED);
845	KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
846	    ("%s called with no active disks (device=%s).", __func__,
847	    sc->sc_name));
848
849	sc->sc_genid++;
850	G_MIRROR_DEBUG(1, "Device %s: genid bumped to %u.", sc->sc_name,
851	    sc->sc_genid);
852	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
853		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
854		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
855			disk->d_genid = sc->sc_genid;
856			g_mirror_update_metadata(disk);
857		}
858	}
859}
860
861static int
862g_mirror_idle(struct g_mirror_softc *sc, int acw)
863{
864	struct g_mirror_disk *disk;
865	int timeout;
866
867	g_topology_assert_not();
868	sx_assert(&sc->sc_lock, SX_XLOCKED);
869
870	if (sc->sc_provider == NULL)
871		return (0);
872	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
873		return (0);
874	if (sc->sc_idle)
875		return (0);
876	if (sc->sc_writes > 0)
877		return (0);
878	if (acw > 0 || (acw == -1 && sc->sc_provider->acw > 0)) {
879		timeout = g_mirror_idletime - (time_uptime - sc->sc_last_write);
880		if (!g_mirror_shutdown && timeout > 0)
881			return (timeout);
882	}
883	sc->sc_idle = 1;
884	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
885		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
886			continue;
887		G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as clean.",
888		    g_mirror_get_diskname(disk), sc->sc_name);
889		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
890		g_mirror_update_metadata(disk);
891	}
892	return (0);
893}
894
895static void
896g_mirror_unidle(struct g_mirror_softc *sc)
897{
898	struct g_mirror_disk *disk;
899
900	g_topology_assert_not();
901	sx_assert(&sc->sc_lock, SX_XLOCKED);
902
903	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
904		return;
905	sc->sc_idle = 0;
906	sc->sc_last_write = time_uptime;
907	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
908		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
909			continue;
910		G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as dirty.",
911		    g_mirror_get_diskname(disk), sc->sc_name);
912		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
913		g_mirror_update_metadata(disk);
914	}
915}
916
917static void
918g_mirror_done(struct bio *bp)
919{
920	struct g_mirror_softc *sc;
921
922	sc = bp->bio_from->geom->softc;
923	bp->bio_cflags = G_MIRROR_BIO_FLAG_REGULAR;
924	mtx_lock(&sc->sc_queue_mtx);
925	TAILQ_INSERT_TAIL(&sc->sc_queue, bp, bio_queue);
926	mtx_unlock(&sc->sc_queue_mtx);
927	wakeup(sc);
928}
929
930static void
931g_mirror_regular_request_error(struct g_mirror_softc *sc,
932    struct g_mirror_disk *disk, struct bio *bp)
933{
934
935	if ((bp->bio_cmd == BIO_FLUSH || bp->bio_cmd == BIO_SPEEDUP) &&
936	    bp->bio_error == EOPNOTSUPP)
937		return;
938
939	if ((disk->d_flags & G_MIRROR_DISK_FLAG_BROKEN) == 0) {
940		disk->d_flags |= G_MIRROR_DISK_FLAG_BROKEN;
941		G_MIRROR_LOGREQ(0, bp, "Request failed (error=%d).",
942		    bp->bio_error);
943	} else {
944		G_MIRROR_LOGREQ(1, bp, "Request failed (error=%d).",
945		    bp->bio_error);
946	}
947	if (g_mirror_disconnect_on_failure &&
948	    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 1) {
949		if (bp->bio_error == ENXIO &&
950		    bp->bio_cmd == BIO_READ)
951			sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
952		else if (bp->bio_error == ENXIO)
953			sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID_NOW;
954		else
955			sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
956		g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
957		    G_MIRROR_EVENT_DONTWAIT);
958	}
959}
960
961static void
962g_mirror_regular_request(struct g_mirror_softc *sc, struct bio *bp)
963{
964	struct g_mirror_disk *disk;
965	struct bio *pbp;
966
967	g_topology_assert_not();
968	KASSERT(sc->sc_provider == bp->bio_parent->bio_to,
969	    ("regular request %p with unexpected origin", bp));
970
971	pbp = bp->bio_parent;
972	bp->bio_from->index--;
973	if (bp->bio_cmd == BIO_WRITE || bp->bio_cmd == BIO_DELETE)
974		sc->sc_writes--;
975	disk = bp->bio_from->private;
976	if (disk == NULL) {
977		g_topology_lock();
978		g_mirror_kill_consumer(sc, bp->bio_from);
979		g_topology_unlock();
980	}
981
982	switch (bp->bio_cmd) {
983	case BIO_READ:
984		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_read,
985		    bp->bio_error);
986		break;
987	case BIO_WRITE:
988		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_write,
989		    bp->bio_error);
990		break;
991	case BIO_DELETE:
992		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_delete,
993		    bp->bio_error);
994		break;
995	case BIO_FLUSH:
996		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_flush,
997		    bp->bio_error);
998		break;
999	case BIO_SPEEDUP:
1000		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_speedup,
1001		    bp->bio_error);
1002		break;
1003	}
1004
1005	pbp->bio_inbed++;
1006	KASSERT(pbp->bio_inbed <= pbp->bio_children,
1007	    ("bio_inbed (%u) is bigger than bio_children (%u).", pbp->bio_inbed,
1008	    pbp->bio_children));
1009	if (bp->bio_error == 0 && pbp->bio_error == 0) {
1010		G_MIRROR_LOGREQ(3, bp, "Request delivered.");
1011		g_destroy_bio(bp);
1012		if (pbp->bio_children == pbp->bio_inbed) {
1013			G_MIRROR_LOGREQ(3, pbp, "Request delivered.");
1014			pbp->bio_completed = pbp->bio_length;
1015			if (pbp->bio_cmd == BIO_WRITE ||
1016			    pbp->bio_cmd == BIO_DELETE) {
1017				TAILQ_REMOVE(&sc->sc_inflight, pbp, bio_queue);
1018				/* Release delayed sync requests if possible. */
1019				g_mirror_sync_release(sc);
1020			}
1021			g_io_deliver(pbp, pbp->bio_error);
1022		}
1023		return;
1024	} else if (bp->bio_error != 0) {
1025		if (pbp->bio_error == 0)
1026			pbp->bio_error = bp->bio_error;
1027		if (disk != NULL)
1028			g_mirror_regular_request_error(sc, disk, bp);
1029		switch (pbp->bio_cmd) {
1030		case BIO_DELETE:
1031		case BIO_WRITE:
1032		case BIO_FLUSH:
1033		case BIO_SPEEDUP:
1034			pbp->bio_inbed--;
1035			pbp->bio_children--;
1036			break;
1037		}
1038	}
1039	g_destroy_bio(bp);
1040
1041	switch (pbp->bio_cmd) {
1042	case BIO_READ:
1043		if (pbp->bio_inbed < pbp->bio_children)
1044			break;
1045		if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 1)
1046			g_io_deliver(pbp, pbp->bio_error);
1047		else {
1048			pbp->bio_error = 0;
1049			mtx_lock(&sc->sc_queue_mtx);
1050			TAILQ_INSERT_TAIL(&sc->sc_queue, pbp, bio_queue);
1051			mtx_unlock(&sc->sc_queue_mtx);
1052			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
1053			wakeup(sc);
1054		}
1055		break;
1056	case BIO_DELETE:
1057	case BIO_WRITE:
1058	case BIO_FLUSH:
1059	case BIO_SPEEDUP:
1060		if (pbp->bio_children == 0) {
1061			/*
1062			 * All requests failed.
1063			 */
1064		} else if (pbp->bio_inbed < pbp->bio_children) {
1065			/* Do nothing. */
1066			break;
1067		} else if (pbp->bio_children == pbp->bio_inbed) {
1068			/* Some requests succeeded. */
1069			pbp->bio_error = 0;
1070			pbp->bio_completed = pbp->bio_length;
1071		}
1072		if (pbp->bio_cmd == BIO_WRITE || pbp->bio_cmd == BIO_DELETE) {
1073			TAILQ_REMOVE(&sc->sc_inflight, pbp, bio_queue);
1074			/* Release delayed sync requests if possible. */
1075			g_mirror_sync_release(sc);
1076		}
1077		g_io_deliver(pbp, pbp->bio_error);
1078		break;
1079	default:
1080		KASSERT(1 == 0, ("Invalid request: %u.", pbp->bio_cmd));
1081		break;
1082	}
1083}
1084
1085static void
1086g_mirror_sync_done(struct bio *bp)
1087{
1088	struct g_mirror_softc *sc;
1089
1090	G_MIRROR_LOGREQ(3, bp, "Synchronization request delivered.");
1091	sc = bp->bio_from->geom->softc;
1092	bp->bio_cflags = G_MIRROR_BIO_FLAG_SYNC;
1093	mtx_lock(&sc->sc_queue_mtx);
1094	TAILQ_INSERT_TAIL(&sc->sc_queue, bp, bio_queue);
1095	mtx_unlock(&sc->sc_queue_mtx);
1096	wakeup(sc);
1097}
1098
1099static void
1100g_mirror_candelete(struct bio *bp)
1101{
1102	struct g_mirror_softc *sc;
1103	struct g_mirror_disk *disk;
1104	int val;
1105
1106	sc = bp->bio_to->private;
1107	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1108		if (disk->d_flags & G_MIRROR_DISK_FLAG_CANDELETE)
1109			break;
1110	}
1111	val = disk != NULL;
1112	g_handleattr(bp, "GEOM::candelete", &val, sizeof(val));
1113}
1114
1115static void
1116g_mirror_kernel_dump(struct bio *bp)
1117{
1118	struct g_mirror_softc *sc;
1119	struct g_mirror_disk *disk;
1120	struct bio *cbp;
1121	struct g_kerneldump *gkd;
1122
1123	/*
1124	 * We configure dumping to the first component, because this component
1125	 * will be used for reading with 'prefer' balance algorithm.
1126	 * If the component with the highest priority is currently disconnected
1127	 * we will not be able to read the dump after the reboot if it will be
1128	 * connected and synchronized later. Can we do something better?
1129	 */
1130	sc = bp->bio_to->private;
1131	disk = LIST_FIRST(&sc->sc_disks);
1132
1133	gkd = (struct g_kerneldump *)bp->bio_data;
1134	if (gkd->length > bp->bio_to->mediasize)
1135		gkd->length = bp->bio_to->mediasize;
1136	cbp = g_clone_bio(bp);
1137	if (cbp == NULL) {
1138		g_io_deliver(bp, ENOMEM);
1139		return;
1140	}
1141	cbp->bio_done = g_std_done;
1142	g_io_request(cbp, disk->d_consumer);
1143	G_MIRROR_DEBUG(1, "Kernel dump will go to %s.",
1144	    g_mirror_get_diskname(disk));
1145}
1146
1147static void
1148g_mirror_start(struct bio *bp)
1149{
1150	struct g_mirror_softc *sc;
1151
1152	sc = bp->bio_to->private;
1153	/*
1154	 * If sc == NULL or there are no valid disks, provider's error
1155	 * should be set and g_mirror_start() should not be called at all.
1156	 */
1157	KASSERT(sc != NULL && sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
1158	    ("Provider's error should be set (error=%d)(mirror=%s).",
1159	    bp->bio_to->error, bp->bio_to->name));
1160	G_MIRROR_LOGREQ(3, bp, "Request received.");
1161
1162	switch (bp->bio_cmd) {
1163	case BIO_READ:
1164	case BIO_WRITE:
1165	case BIO_DELETE:
1166	case BIO_SPEEDUP:
1167	case BIO_FLUSH:
1168		break;
1169	case BIO_GETATTR:
1170		if (!strcmp(bp->bio_attribute, "GEOM::candelete")) {
1171			g_mirror_candelete(bp);
1172			return;
1173		} else if (strcmp("GEOM::kerneldump", bp->bio_attribute) == 0) {
1174			g_mirror_kernel_dump(bp);
1175			return;
1176		}
1177		/* FALLTHROUGH */
1178	default:
1179		g_io_deliver(bp, EOPNOTSUPP);
1180		return;
1181	}
1182	mtx_lock(&sc->sc_queue_mtx);
1183	if (bp->bio_to->error != 0) {
1184		mtx_unlock(&sc->sc_queue_mtx);
1185		g_io_deliver(bp, bp->bio_to->error);
1186		return;
1187	}
1188	TAILQ_INSERT_TAIL(&sc->sc_queue, bp, bio_queue);
1189	mtx_unlock(&sc->sc_queue_mtx);
1190	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
1191	wakeup(sc);
1192}
1193
1194/*
1195 * Return TRUE if the given request is colliding with a in-progress
1196 * synchronization request.
1197 */
1198static bool
1199g_mirror_sync_collision(struct g_mirror_softc *sc, struct bio *bp)
1200{
1201	struct g_mirror_disk *disk;
1202	struct bio *sbp;
1203	off_t rstart, rend, sstart, send;
1204	u_int i;
1205
1206	if (sc->sc_sync.ds_ndisks == 0)
1207		return (false);
1208	rstart = bp->bio_offset;
1209	rend = bp->bio_offset + bp->bio_length;
1210	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1211		if (disk->d_state != G_MIRROR_DISK_STATE_SYNCHRONIZING)
1212			continue;
1213		for (i = 0; i < g_mirror_syncreqs; i++) {
1214			sbp = disk->d_sync.ds_bios[i];
1215			if (sbp == NULL)
1216				continue;
1217			sstart = sbp->bio_offset;
1218			send = sbp->bio_offset + sbp->bio_length;
1219			if (rend > sstart && rstart < send)
1220				return (true);
1221		}
1222	}
1223	return (false);
1224}
1225
1226/*
1227 * Return TRUE if the given sync request is colliding with a in-progress regular
1228 * request.
1229 */
1230static bool
1231g_mirror_regular_collision(struct g_mirror_softc *sc, struct bio *sbp)
1232{
1233	off_t rstart, rend, sstart, send;
1234	struct bio *bp;
1235
1236	if (sc->sc_sync.ds_ndisks == 0)
1237		return (false);
1238	sstart = sbp->bio_offset;
1239	send = sbp->bio_offset + sbp->bio_length;
1240	TAILQ_FOREACH(bp, &sc->sc_inflight, bio_queue) {
1241		rstart = bp->bio_offset;
1242		rend = bp->bio_offset + bp->bio_length;
1243		if (rend > sstart && rstart < send)
1244			return (true);
1245	}
1246	return (false);
1247}
1248
1249/*
1250 * Puts regular request onto delayed queue.
1251 */
1252static void
1253g_mirror_regular_delay(struct g_mirror_softc *sc, struct bio *bp)
1254{
1255
1256	G_MIRROR_LOGREQ(2, bp, "Delaying request.");
1257	TAILQ_INSERT_TAIL(&sc->sc_regular_delayed, bp, bio_queue);
1258}
1259
1260/*
1261 * Puts synchronization request onto delayed queue.
1262 */
1263static void
1264g_mirror_sync_delay(struct g_mirror_softc *sc, struct bio *bp)
1265{
1266
1267	G_MIRROR_LOGREQ(2, bp, "Delaying synchronization request.");
1268	TAILQ_INSERT_TAIL(&sc->sc_sync_delayed, bp, bio_queue);
1269}
1270
1271/*
1272 * Requeue delayed regular requests.
1273 */
1274static void
1275g_mirror_regular_release(struct g_mirror_softc *sc)
1276{
1277	struct bio *bp;
1278
1279	if ((bp = TAILQ_FIRST(&sc->sc_regular_delayed)) == NULL)
1280		return;
1281	if (g_mirror_sync_collision(sc, bp))
1282		return;
1283
1284	G_MIRROR_DEBUG(2, "Requeuing regular requests after collision.");
1285	mtx_lock(&sc->sc_queue_mtx);
1286	TAILQ_CONCAT(&sc->sc_regular_delayed, &sc->sc_queue, bio_queue);
1287	TAILQ_SWAP(&sc->sc_regular_delayed, &sc->sc_queue, bio, bio_queue);
1288	mtx_unlock(&sc->sc_queue_mtx);
1289}
1290
1291/*
1292 * Releases delayed sync requests which don't collide anymore with regular
1293 * requests.
1294 */
1295static void
1296g_mirror_sync_release(struct g_mirror_softc *sc)
1297{
1298	struct bio *bp, *bp2;
1299
1300	TAILQ_FOREACH_SAFE(bp, &sc->sc_sync_delayed, bio_queue, bp2) {
1301		if (g_mirror_regular_collision(sc, bp))
1302			continue;
1303		TAILQ_REMOVE(&sc->sc_sync_delayed, bp, bio_queue);
1304		G_MIRROR_LOGREQ(2, bp,
1305		    "Releasing delayed synchronization request.");
1306		g_io_request(bp, bp->bio_from);
1307	}
1308}
1309
1310/*
1311 * Free a synchronization request and clear its slot in the array.
1312 */
1313static void
1314g_mirror_sync_request_free(struct g_mirror_disk *disk, struct bio *bp)
1315{
1316	int idx;
1317
1318	if (disk != NULL && disk->d_sync.ds_bios != NULL) {
1319		idx = (int)(uintptr_t)bp->bio_caller1;
1320		KASSERT(disk->d_sync.ds_bios[idx] == bp,
1321		    ("unexpected sync BIO at %p:%d", disk, idx));
1322		disk->d_sync.ds_bios[idx] = NULL;
1323	}
1324	free(bp->bio_data, M_MIRROR);
1325	g_destroy_bio(bp);
1326}
1327
1328/*
1329 * Handle synchronization requests.
1330 * Every synchronization request is a two-step process: first, a read request is
1331 * sent to the mirror provider via the sync consumer. If that request completes
1332 * successfully, it is converted to a write and sent to the disk being
1333 * synchronized. If the write also completes successfully, the synchronization
1334 * offset is advanced and a new read request is submitted.
1335 */
1336static void
1337g_mirror_sync_request(struct g_mirror_softc *sc, struct bio *bp)
1338{
1339	struct g_mirror_disk *disk;
1340	struct g_mirror_disk_sync *sync;
1341
1342	KASSERT((bp->bio_cmd == BIO_READ &&
1343	    bp->bio_from->geom == sc->sc_sync.ds_geom) ||
1344	    (bp->bio_cmd == BIO_WRITE && bp->bio_from->geom == sc->sc_geom),
1345	    ("Sync BIO %p with unexpected origin", bp));
1346
1347	bp->bio_from->index--;
1348	disk = bp->bio_from->private;
1349	if (disk == NULL) {
1350		sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */
1351		g_topology_lock();
1352		g_mirror_kill_consumer(sc, bp->bio_from);
1353		g_topology_unlock();
1354		g_mirror_sync_request_free(NULL, bp);
1355		sx_xlock(&sc->sc_lock);
1356		return;
1357	}
1358
1359	sync = &disk->d_sync;
1360
1361	/*
1362	 * Synchronization request.
1363	 */
1364	switch (bp->bio_cmd) {
1365	case BIO_READ: {
1366		struct g_consumer *cp;
1367
1368		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_sync_request_read,
1369		    bp->bio_error);
1370
1371		if (bp->bio_error != 0) {
1372			G_MIRROR_LOGREQ(0, bp,
1373			    "Synchronization request failed (error=%d).",
1374			    bp->bio_error);
1375
1376			/*
1377			 * The read error will trigger a syncid bump, so there's
1378			 * no need to do that here.
1379			 *
1380			 * The read error handling for regular requests will
1381			 * retry the read from all active mirrors before passing
1382			 * the error back up, so there's no need to retry here.
1383			 */
1384			g_mirror_sync_request_free(disk, bp);
1385			g_mirror_event_send(disk,
1386			    G_MIRROR_DISK_STATE_DISCONNECTED,
1387			    G_MIRROR_EVENT_DONTWAIT);
1388			return;
1389		}
1390		G_MIRROR_LOGREQ(3, bp,
1391		    "Synchronization request half-finished.");
1392		bp->bio_cmd = BIO_WRITE;
1393		bp->bio_cflags = 0;
1394		cp = disk->d_consumer;
1395		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1396		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1397		    cp->acr, cp->acw, cp->ace));
1398		cp->index++;
1399		g_io_request(bp, cp);
1400		return;
1401	}
1402	case BIO_WRITE: {
1403		off_t offset;
1404		int i;
1405
1406		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_sync_request_write,
1407		    bp->bio_error);
1408
1409		if (bp->bio_error != 0) {
1410			G_MIRROR_LOGREQ(0, bp,
1411			    "Synchronization request failed (error=%d).",
1412			    bp->bio_error);
1413			g_mirror_sync_request_free(disk, bp);
1414			sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
1415			g_mirror_event_send(disk,
1416			    G_MIRROR_DISK_STATE_DISCONNECTED,
1417			    G_MIRROR_EVENT_DONTWAIT);
1418			return;
1419		}
1420		G_MIRROR_LOGREQ(3, bp, "Synchronization request finished.");
1421		if (sync->ds_offset >= sc->sc_mediasize ||
1422		    sync->ds_consumer == NULL ||
1423		    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1424			/* Don't send more synchronization requests. */
1425			sync->ds_inflight--;
1426			g_mirror_sync_request_free(disk, bp);
1427			if (sync->ds_inflight > 0)
1428				return;
1429			if (sync->ds_consumer == NULL ||
1430			    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1431				return;
1432			}
1433			/* Disk up-to-date, activate it. */
1434			g_mirror_event_send(disk, G_MIRROR_DISK_STATE_ACTIVE,
1435			    G_MIRROR_EVENT_DONTWAIT);
1436			return;
1437		}
1438
1439		/* Send next synchronization request. */
1440		g_mirror_sync_reinit(disk, bp, sync->ds_offset);
1441		sync->ds_offset += bp->bio_length;
1442
1443		G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
1444		sync->ds_consumer->index++;
1445
1446		/*
1447		 * Delay the request if it is colliding with a regular request.
1448		 */
1449		if (g_mirror_regular_collision(sc, bp))
1450			g_mirror_sync_delay(sc, bp);
1451		else
1452			g_io_request(bp, sync->ds_consumer);
1453
1454		/* Requeue delayed requests if possible. */
1455		g_mirror_regular_release(sc);
1456
1457		/* Find the smallest offset */
1458		offset = sc->sc_mediasize;
1459		for (i = 0; i < g_mirror_syncreqs; i++) {
1460			bp = sync->ds_bios[i];
1461			if (bp != NULL && bp->bio_offset < offset)
1462				offset = bp->bio_offset;
1463		}
1464		if (g_mirror_sync_period > 0 &&
1465		    time_uptime - sync->ds_update_ts > g_mirror_sync_period) {
1466			sync->ds_offset_done = offset;
1467			g_mirror_update_metadata(disk);
1468			sync->ds_update_ts = time_uptime;
1469		}
1470		return;
1471	}
1472	default:
1473		panic("Invalid I/O request %p", bp);
1474	}
1475}
1476
1477static void
1478g_mirror_request_prefer(struct g_mirror_softc *sc, struct bio *bp)
1479{
1480	struct g_mirror_disk *disk;
1481	struct g_consumer *cp;
1482	struct bio *cbp;
1483
1484	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1485		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE)
1486			break;
1487	}
1488	if (disk == NULL) {
1489		if (bp->bio_error == 0)
1490			bp->bio_error = ENXIO;
1491		g_io_deliver(bp, bp->bio_error);
1492		return;
1493	}
1494	cbp = g_clone_bio(bp);
1495	if (cbp == NULL) {
1496		if (bp->bio_error == 0)
1497			bp->bio_error = ENOMEM;
1498		g_io_deliver(bp, bp->bio_error);
1499		return;
1500	}
1501	/*
1502	 * Fill in the component buf structure.
1503	 */
1504	cp = disk->d_consumer;
1505	cbp->bio_done = g_mirror_done;
1506	cbp->bio_to = cp->provider;
1507	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1508	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1509	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1510	    cp->acw, cp->ace));
1511	cp->index++;
1512	g_io_request(cbp, cp);
1513}
1514
1515static void
1516g_mirror_request_round_robin(struct g_mirror_softc *sc, struct bio *bp)
1517{
1518	struct g_mirror_disk *disk;
1519	struct g_consumer *cp;
1520	struct bio *cbp;
1521
1522	disk = g_mirror_get_disk(sc);
1523	if (disk == NULL) {
1524		if (bp->bio_error == 0)
1525			bp->bio_error = ENXIO;
1526		g_io_deliver(bp, bp->bio_error);
1527		return;
1528	}
1529	cbp = g_clone_bio(bp);
1530	if (cbp == NULL) {
1531		if (bp->bio_error == 0)
1532			bp->bio_error = ENOMEM;
1533		g_io_deliver(bp, bp->bio_error);
1534		return;
1535	}
1536	/*
1537	 * Fill in the component buf structure.
1538	 */
1539	cp = disk->d_consumer;
1540	cbp->bio_done = g_mirror_done;
1541	cbp->bio_to = cp->provider;
1542	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1543	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1544	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1545	    cp->acw, cp->ace));
1546	cp->index++;
1547	g_io_request(cbp, cp);
1548}
1549
1550#define TRACK_SIZE  (1 * 1024 * 1024)
1551#define LOAD_SCALE	256
1552#define ABS(x)		(((x) >= 0) ? (x) : (-(x)))
1553
1554static void
1555g_mirror_request_load(struct g_mirror_softc *sc, struct bio *bp)
1556{
1557	struct g_mirror_disk *disk, *dp;
1558	struct g_consumer *cp;
1559	struct bio *cbp;
1560	int prio, best;
1561
1562	/* Find a disk with the smallest load. */
1563	disk = NULL;
1564	best = INT_MAX;
1565	LIST_FOREACH(dp, &sc->sc_disks, d_next) {
1566		if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1567			continue;
1568		prio = dp->load;
1569		/* If disk head is precisely in position - highly prefer it. */
1570		if (dp->d_last_offset == bp->bio_offset)
1571			prio -= 2 * LOAD_SCALE;
1572		else
1573		/* If disk head is close to position - prefer it. */
1574		if (ABS(dp->d_last_offset - bp->bio_offset) < TRACK_SIZE)
1575			prio -= 1 * LOAD_SCALE;
1576		if (prio <= best) {
1577			disk = dp;
1578			best = prio;
1579		}
1580	}
1581	KASSERT(disk != NULL, ("NULL disk for %s.", sc->sc_name));
1582	cbp = g_clone_bio(bp);
1583	if (cbp == NULL) {
1584		if (bp->bio_error == 0)
1585			bp->bio_error = ENOMEM;
1586		g_io_deliver(bp, bp->bio_error);
1587		return;
1588	}
1589	/*
1590	 * Fill in the component buf structure.
1591	 */
1592	cp = disk->d_consumer;
1593	cbp->bio_done = g_mirror_done;
1594	cbp->bio_to = cp->provider;
1595	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1596	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1597	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1598	    cp->acw, cp->ace));
1599	cp->index++;
1600	/* Remember last head position */
1601	disk->d_last_offset = bp->bio_offset + bp->bio_length;
1602	/* Update loads. */
1603	LIST_FOREACH(dp, &sc->sc_disks, d_next) {
1604		dp->load = (dp->d_consumer->index * LOAD_SCALE +
1605		    dp->load * 7) / 8;
1606	}
1607	g_io_request(cbp, cp);
1608}
1609
1610static void
1611g_mirror_request_split(struct g_mirror_softc *sc, struct bio *bp)
1612{
1613	struct bio_queue queue;
1614	struct g_mirror_disk *disk;
1615	struct g_consumer *cp;
1616	struct bio *cbp;
1617	off_t left, mod, offset, slice;
1618	u_char *data;
1619	u_int ndisks;
1620
1621	if (bp->bio_length <= sc->sc_slice) {
1622		g_mirror_request_round_robin(sc, bp);
1623		return;
1624	}
1625	ndisks = g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE);
1626	slice = bp->bio_length / ndisks;
1627	mod = slice % sc->sc_provider->sectorsize;
1628	if (mod != 0)
1629		slice += sc->sc_provider->sectorsize - mod;
1630	/*
1631	 * Allocate all bios before sending any request, so we can
1632	 * return ENOMEM in nice and clean way.
1633	 */
1634	left = bp->bio_length;
1635	offset = bp->bio_offset;
1636	data = bp->bio_data;
1637	TAILQ_INIT(&queue);
1638	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1639		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1640			continue;
1641		cbp = g_clone_bio(bp);
1642		if (cbp == NULL) {
1643			while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1644				TAILQ_REMOVE(&queue, cbp, bio_queue);
1645				g_destroy_bio(cbp);
1646			}
1647			if (bp->bio_error == 0)
1648				bp->bio_error = ENOMEM;
1649			g_io_deliver(bp, bp->bio_error);
1650			return;
1651		}
1652		TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
1653		cbp->bio_done = g_mirror_done;
1654		cbp->bio_caller1 = disk;
1655		cbp->bio_to = disk->d_consumer->provider;
1656		cbp->bio_offset = offset;
1657		cbp->bio_data = data;
1658		cbp->bio_length = MIN(left, slice);
1659		left -= cbp->bio_length;
1660		if (left == 0)
1661			break;
1662		offset += cbp->bio_length;
1663		data += cbp->bio_length;
1664	}
1665	while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1666		TAILQ_REMOVE(&queue, cbp, bio_queue);
1667		G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1668		disk = cbp->bio_caller1;
1669		cbp->bio_caller1 = NULL;
1670		cp = disk->d_consumer;
1671		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1672		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1673		    cp->acr, cp->acw, cp->ace));
1674		disk->d_consumer->index++;
1675		g_io_request(cbp, disk->d_consumer);
1676	}
1677}
1678
1679static void
1680g_mirror_register_request(struct g_mirror_softc *sc, struct bio *bp)
1681{
1682	struct bio_queue queue;
1683	struct bio *cbp;
1684	struct g_consumer *cp;
1685	struct g_mirror_disk *disk;
1686
1687	sx_assert(&sc->sc_lock, SA_XLOCKED);
1688
1689	/*
1690	 * To avoid ordering issues, if a write is deferred because of a
1691	 * collision with a sync request, all I/O is deferred until that
1692	 * write is initiated.
1693	 */
1694	if (bp->bio_from->geom != sc->sc_sync.ds_geom &&
1695	    !TAILQ_EMPTY(&sc->sc_regular_delayed)) {
1696		g_mirror_regular_delay(sc, bp);
1697		return;
1698	}
1699
1700	switch (bp->bio_cmd) {
1701	case BIO_READ:
1702		switch (sc->sc_balance) {
1703		case G_MIRROR_BALANCE_LOAD:
1704			g_mirror_request_load(sc, bp);
1705			break;
1706		case G_MIRROR_BALANCE_PREFER:
1707			g_mirror_request_prefer(sc, bp);
1708			break;
1709		case G_MIRROR_BALANCE_ROUND_ROBIN:
1710			g_mirror_request_round_robin(sc, bp);
1711			break;
1712		case G_MIRROR_BALANCE_SPLIT:
1713			g_mirror_request_split(sc, bp);
1714			break;
1715		}
1716		return;
1717	case BIO_WRITE:
1718	case BIO_DELETE:
1719		/*
1720		 * Delay the request if it is colliding with a synchronization
1721		 * request.
1722		 */
1723		if (g_mirror_sync_collision(sc, bp)) {
1724			g_mirror_regular_delay(sc, bp);
1725			return;
1726		}
1727
1728		if (sc->sc_idle)
1729			g_mirror_unidle(sc);
1730		else
1731			sc->sc_last_write = time_uptime;
1732
1733		/*
1734		 * Bump syncid on first write.
1735		 */
1736		if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID) != 0) {
1737			sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID;
1738			g_mirror_bump_syncid(sc);
1739		}
1740
1741		/*
1742		 * Allocate all bios before sending any request, so we can
1743		 * return ENOMEM in nice and clean way.
1744		 */
1745		TAILQ_INIT(&queue);
1746		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1747			switch (disk->d_state) {
1748			case G_MIRROR_DISK_STATE_ACTIVE:
1749				break;
1750			case G_MIRROR_DISK_STATE_SYNCHRONIZING:
1751				if (bp->bio_offset >= disk->d_sync.ds_offset)
1752					continue;
1753				break;
1754			default:
1755				continue;
1756			}
1757			if (bp->bio_cmd == BIO_DELETE &&
1758			    (disk->d_flags & G_MIRROR_DISK_FLAG_CANDELETE) == 0)
1759				continue;
1760			cbp = g_clone_bio(bp);
1761			if (cbp == NULL) {
1762				while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1763					TAILQ_REMOVE(&queue, cbp, bio_queue);
1764					g_destroy_bio(cbp);
1765				}
1766				if (bp->bio_error == 0)
1767					bp->bio_error = ENOMEM;
1768				g_io_deliver(bp, bp->bio_error);
1769				return;
1770			}
1771			TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
1772			cbp->bio_done = g_mirror_done;
1773			cp = disk->d_consumer;
1774			cbp->bio_caller1 = cp;
1775			cbp->bio_to = cp->provider;
1776			KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1777			    ("Consumer %s not opened (r%dw%de%d).",
1778			    cp->provider->name, cp->acr, cp->acw, cp->ace));
1779		}
1780		if (TAILQ_EMPTY(&queue)) {
1781			KASSERT(bp->bio_cmd == BIO_DELETE,
1782			    ("No consumers for regular request %p", bp));
1783			g_io_deliver(bp, EOPNOTSUPP);
1784			return;
1785		}
1786		while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1787			G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1788			TAILQ_REMOVE(&queue, cbp, bio_queue);
1789			cp = cbp->bio_caller1;
1790			cbp->bio_caller1 = NULL;
1791			cp->index++;
1792			sc->sc_writes++;
1793			g_io_request(cbp, cp);
1794		}
1795		/*
1796		 * Put request onto inflight queue, so we can check if new
1797		 * synchronization requests don't collide with it.
1798		 */
1799		TAILQ_INSERT_TAIL(&sc->sc_inflight, bp, bio_queue);
1800		return;
1801	case BIO_SPEEDUP:
1802	case BIO_FLUSH:
1803		TAILQ_INIT(&queue);
1804		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1805			if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1806				continue;
1807			cbp = g_clone_bio(bp);
1808			if (cbp == NULL) {
1809				while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1810					TAILQ_REMOVE(&queue, cbp, bio_queue);
1811					g_destroy_bio(cbp);
1812				}
1813				if (bp->bio_error == 0)
1814					bp->bio_error = ENOMEM;
1815				g_io_deliver(bp, bp->bio_error);
1816				return;
1817			}
1818			TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
1819			cbp->bio_done = g_mirror_done;
1820			cbp->bio_caller1 = disk;
1821			cbp->bio_to = disk->d_consumer->provider;
1822		}
1823		KASSERT(!TAILQ_EMPTY(&queue),
1824		    ("No consumers for regular request %p", bp));
1825		while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1826			G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1827			TAILQ_REMOVE(&queue, cbp, bio_queue);
1828			disk = cbp->bio_caller1;
1829			cbp->bio_caller1 = NULL;
1830			cp = disk->d_consumer;
1831			KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1832			    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1833			    cp->acr, cp->acw, cp->ace));
1834			cp->index++;
1835			g_io_request(cbp, cp);
1836		}
1837		break;
1838	default:
1839		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
1840		    bp->bio_cmd, sc->sc_name));
1841		break;
1842	}
1843}
1844
1845static int
1846g_mirror_can_destroy(struct g_mirror_softc *sc)
1847{
1848	struct g_geom *gp;
1849	struct g_consumer *cp;
1850
1851	g_topology_assert();
1852	gp = sc->sc_geom;
1853	if (gp->softc == NULL)
1854		return (1);
1855	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_TASTING) != 0)
1856		return (0);
1857	LIST_FOREACH(cp, &gp->consumer, consumer) {
1858		if (g_mirror_is_busy(sc, cp))
1859			return (0);
1860	}
1861	gp = sc->sc_sync.ds_geom;
1862	LIST_FOREACH(cp, &gp->consumer, consumer) {
1863		if (g_mirror_is_busy(sc, cp))
1864			return (0);
1865	}
1866	G_MIRROR_DEBUG(2, "No I/O requests for %s, it can be destroyed.",
1867	    sc->sc_name);
1868	return (1);
1869}
1870
1871static int
1872g_mirror_try_destroy(struct g_mirror_softc *sc)
1873{
1874
1875	if (sc->sc_rootmount != NULL) {
1876		G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
1877		    sc->sc_rootmount);
1878		root_mount_rel(sc->sc_rootmount);
1879		sc->sc_rootmount = NULL;
1880	}
1881	g_topology_lock();
1882	if (!g_mirror_can_destroy(sc)) {
1883		g_topology_unlock();
1884		return (0);
1885	}
1886	sc->sc_geom->softc = NULL;
1887	sc->sc_sync.ds_geom->softc = NULL;
1888	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DRAIN) != 0) {
1889		g_topology_unlock();
1890		G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1891		    &sc->sc_worker);
1892		/* Unlock sc_lock here, as it can be destroyed after wakeup. */
1893		sx_xunlock(&sc->sc_lock);
1894		wakeup(&sc->sc_worker);
1895		sc->sc_worker = NULL;
1896	} else {
1897		g_topology_unlock();
1898		g_mirror_destroy_device(sc);
1899	}
1900	return (1);
1901}
1902
1903/*
1904 * Worker thread.
1905 */
1906static void
1907g_mirror_worker(void *arg)
1908{
1909	struct g_mirror_softc *sc;
1910	struct g_mirror_event *ep;
1911	struct bio *bp;
1912	int timeout;
1913
1914	sc = arg;
1915	thread_lock(curthread);
1916	sched_prio(curthread, PRIBIO);
1917	thread_unlock(curthread);
1918
1919	sx_xlock(&sc->sc_lock);
1920	for (;;) {
1921		G_MIRROR_DEBUG(5, "%s: Let's see...", __func__);
1922		/*
1923		 * First take a look at events.
1924		 * This is important to handle events before any I/O requests.
1925		 */
1926		ep = g_mirror_event_first(sc);
1927		if (ep != NULL) {
1928			g_mirror_event_remove(sc, ep);
1929			if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0) {
1930				/* Update only device status. */
1931				G_MIRROR_DEBUG(3,
1932				    "Running event for device %s.",
1933				    sc->sc_name);
1934				ep->e_error = 0;
1935				g_mirror_update_device(sc, true);
1936			} else {
1937				/* Update disk status. */
1938				G_MIRROR_DEBUG(3, "Running event for disk %s.",
1939				     g_mirror_get_diskname(ep->e_disk));
1940				ep->e_error = g_mirror_update_disk(ep->e_disk,
1941				    ep->e_state);
1942				if (ep->e_error == 0)
1943					g_mirror_update_device(sc, false);
1944			}
1945			if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0) {
1946				KASSERT(ep->e_error == 0,
1947				    ("Error cannot be handled."));
1948				g_mirror_event_free(ep);
1949			} else {
1950				ep->e_flags |= G_MIRROR_EVENT_DONE;
1951				G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1952				    ep);
1953				mtx_lock(&sc->sc_events_mtx);
1954				wakeup(ep);
1955				mtx_unlock(&sc->sc_events_mtx);
1956			}
1957			if ((sc->sc_flags &
1958			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1959				if (g_mirror_try_destroy(sc)) {
1960					curthread->td_pflags &= ~TDP_GEOM;
1961					G_MIRROR_DEBUG(1, "Thread exiting.");
1962					kproc_exit(0);
1963				}
1964			}
1965			G_MIRROR_DEBUG(5, "%s: I'm here 1.", __func__);
1966			continue;
1967		}
1968
1969		/*
1970		 * Check if we can mark array as CLEAN and if we can't take
1971		 * how much seconds should we wait.
1972		 */
1973		timeout = g_mirror_idle(sc, -1);
1974
1975		/*
1976		 * Handle I/O requests.
1977		 */
1978		mtx_lock(&sc->sc_queue_mtx);
1979		bp = TAILQ_FIRST(&sc->sc_queue);
1980		if (bp != NULL)
1981			TAILQ_REMOVE(&sc->sc_queue, bp, bio_queue);
1982		else {
1983			if ((sc->sc_flags &
1984			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1985				mtx_unlock(&sc->sc_queue_mtx);
1986				if (g_mirror_try_destroy(sc)) {
1987					curthread->td_pflags &= ~TDP_GEOM;
1988					G_MIRROR_DEBUG(1, "Thread exiting.");
1989					kproc_exit(0);
1990				}
1991				mtx_lock(&sc->sc_queue_mtx);
1992				if (!TAILQ_EMPTY(&sc->sc_queue)) {
1993					mtx_unlock(&sc->sc_queue_mtx);
1994					continue;
1995				}
1996			}
1997			if (g_mirror_event_first(sc) != NULL) {
1998				mtx_unlock(&sc->sc_queue_mtx);
1999				continue;
2000			}
2001			sx_xunlock(&sc->sc_lock);
2002			MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "m:w1",
2003			    timeout * hz);
2004			sx_xlock(&sc->sc_lock);
2005			G_MIRROR_DEBUG(5, "%s: I'm here 4.", __func__);
2006			continue;
2007		}
2008		mtx_unlock(&sc->sc_queue_mtx);
2009
2010		if (bp->bio_from->geom == sc->sc_sync.ds_geom &&
2011		    (bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0) {
2012			/*
2013			 * Handle completion of the first half (the read) of a
2014			 * block synchronization operation.
2015			 */
2016			g_mirror_sync_request(sc, bp);
2017		} else if (bp->bio_to != sc->sc_provider) {
2018			if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_REGULAR) != 0)
2019				/*
2020				 * Handle completion of a regular I/O request.
2021				 */
2022				g_mirror_regular_request(sc, bp);
2023			else if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0)
2024				/*
2025				 * Handle completion of the second half (the
2026				 * write) of a block synchronization operation.
2027				 */
2028				g_mirror_sync_request(sc, bp);
2029			else {
2030				KASSERT(0,
2031				    ("Invalid request cflags=0x%hx to=%s.",
2032				    bp->bio_cflags, bp->bio_to->name));
2033			}
2034		} else {
2035			/*
2036			 * Initiate an I/O request.
2037			 */
2038			g_mirror_register_request(sc, bp);
2039		}
2040		G_MIRROR_DEBUG(5, "%s: I'm here 9.", __func__);
2041	}
2042}
2043
2044static void
2045g_mirror_update_idle(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
2046{
2047
2048	sx_assert(&sc->sc_lock, SX_LOCKED);
2049
2050	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
2051		return;
2052	if (!sc->sc_idle && (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) == 0) {
2053		G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as dirty.",
2054		    g_mirror_get_diskname(disk), sc->sc_name);
2055		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
2056	} else if (sc->sc_idle &&
2057	    (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
2058		G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as clean.",
2059		    g_mirror_get_diskname(disk), sc->sc_name);
2060		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2061	}
2062}
2063
2064static void
2065g_mirror_sync_reinit(const struct g_mirror_disk *disk, struct bio *bp,
2066    off_t offset)
2067{
2068	void *data;
2069	int idx;
2070
2071	data = bp->bio_data;
2072	idx = (int)(uintptr_t)bp->bio_caller1;
2073	g_reset_bio(bp);
2074
2075	bp->bio_cmd = BIO_READ;
2076	bp->bio_data = data;
2077	bp->bio_done = g_mirror_sync_done;
2078	bp->bio_from = disk->d_sync.ds_consumer;
2079	bp->bio_to = disk->d_softc->sc_provider;
2080	bp->bio_caller1 = (void *)(uintptr_t)idx;
2081	bp->bio_offset = offset;
2082	bp->bio_length = MIN(maxphys,
2083	    disk->d_softc->sc_mediasize - bp->bio_offset);
2084}
2085
2086static void
2087g_mirror_sync_start(struct g_mirror_disk *disk)
2088{
2089	struct g_mirror_softc *sc;
2090	struct g_mirror_disk_sync *sync;
2091	struct g_consumer *cp;
2092	struct bio *bp;
2093	int error, i;
2094
2095	g_topology_assert_not();
2096	sc = disk->d_softc;
2097	sync = &disk->d_sync;
2098	sx_assert(&sc->sc_lock, SX_LOCKED);
2099
2100	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2101	    ("Disk %s is not marked for synchronization.",
2102	    g_mirror_get_diskname(disk)));
2103	KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2104	    ("Device not in RUNNING state (%s, %u).", sc->sc_name,
2105	    sc->sc_state));
2106
2107	sx_xunlock(&sc->sc_lock);
2108	g_topology_lock();
2109	cp = g_new_consumer(sc->sc_sync.ds_geom);
2110	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
2111	error = g_attach(cp, sc->sc_provider);
2112	KASSERT(error == 0,
2113	    ("Cannot attach to %s (error=%d).", sc->sc_name, error));
2114	error = g_access(cp, 1, 0, 0);
2115	KASSERT(error == 0, ("Cannot open %s (error=%d).", sc->sc_name, error));
2116	g_topology_unlock();
2117	sx_xlock(&sc->sc_lock);
2118
2119	G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s.", sc->sc_name,
2120	    g_mirror_get_diskname(disk));
2121	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) == 0)
2122		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
2123	KASSERT(sync->ds_consumer == NULL,
2124	    ("Sync consumer already exists (device=%s, disk=%s).",
2125	    sc->sc_name, g_mirror_get_diskname(disk)));
2126
2127	sync->ds_consumer = cp;
2128	sync->ds_consumer->private = disk;
2129	sync->ds_consumer->index = 0;
2130
2131	/*
2132	 * Allocate memory for synchronization bios and initialize them.
2133	 */
2134	sync->ds_bios = malloc(sizeof(struct bio *) * g_mirror_syncreqs,
2135	    M_MIRROR, M_WAITOK);
2136	for (i = 0; i < g_mirror_syncreqs; i++) {
2137		bp = g_alloc_bio();
2138		sync->ds_bios[i] = bp;
2139
2140		bp->bio_data = malloc(maxphys, M_MIRROR, M_WAITOK);
2141		bp->bio_caller1 = (void *)(uintptr_t)i;
2142		g_mirror_sync_reinit(disk, bp, sync->ds_offset);
2143		sync->ds_offset += bp->bio_length;
2144	}
2145
2146	/* Increase the number of disks in SYNCHRONIZING state. */
2147	sc->sc_sync.ds_ndisks++;
2148	/* Set the number of in-flight synchronization requests. */
2149	sync->ds_inflight = g_mirror_syncreqs;
2150
2151	/*
2152	 * Fire off first synchronization requests.
2153	 */
2154	for (i = 0; i < g_mirror_syncreqs; i++) {
2155		bp = sync->ds_bios[i];
2156		G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
2157		sync->ds_consumer->index++;
2158		/*
2159		 * Delay the request if it is colliding with a regular request.
2160		 */
2161		if (g_mirror_regular_collision(sc, bp))
2162			g_mirror_sync_delay(sc, bp);
2163		else
2164			g_io_request(bp, sync->ds_consumer);
2165	}
2166}
2167
2168/*
2169 * Stop synchronization process.
2170 * type: 0 - synchronization finished
2171 *       1 - synchronization stopped
2172 */
2173static void
2174g_mirror_sync_stop(struct g_mirror_disk *disk, int type)
2175{
2176	struct g_mirror_softc *sc;
2177	struct g_consumer *cp;
2178
2179	g_topology_assert_not();
2180	sc = disk->d_softc;
2181	sx_assert(&sc->sc_lock, SX_LOCKED);
2182
2183	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2184	    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2185	    g_mirror_disk_state2str(disk->d_state)));
2186	if (disk->d_sync.ds_consumer == NULL)
2187		return;
2188
2189	if (type == 0) {
2190		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s finished.",
2191		    sc->sc_name, g_mirror_get_diskname(disk));
2192	} else /* if (type == 1) */ {
2193		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s stopped.",
2194		    sc->sc_name, g_mirror_get_diskname(disk));
2195	}
2196	g_mirror_regular_release(sc);
2197	free(disk->d_sync.ds_bios, M_MIRROR);
2198	disk->d_sync.ds_bios = NULL;
2199	cp = disk->d_sync.ds_consumer;
2200	disk->d_sync.ds_consumer = NULL;
2201	disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2202	sc->sc_sync.ds_ndisks--;
2203	sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */
2204	g_topology_lock();
2205	g_mirror_kill_consumer(sc, cp);
2206	g_topology_unlock();
2207	sx_xlock(&sc->sc_lock);
2208}
2209
2210static void
2211g_mirror_launch_provider(struct g_mirror_softc *sc)
2212{
2213	struct g_mirror_disk *disk;
2214	struct g_provider *pp, *dp;
2215
2216	sx_assert(&sc->sc_lock, SX_LOCKED);
2217
2218	g_topology_lock();
2219	pp = g_new_providerf(sc->sc_geom, "mirror/%s", sc->sc_name);
2220	pp->flags |= G_PF_DIRECT_RECEIVE;
2221	pp->mediasize = sc->sc_mediasize;
2222	pp->sectorsize = sc->sc_sectorsize;
2223	pp->stripesize = 0;
2224	pp->stripeoffset = 0;
2225
2226	/* Splitting of unmapped BIO's could work but isn't implemented now */
2227	if (sc->sc_balance != G_MIRROR_BALANCE_SPLIT)
2228		pp->flags |= G_PF_ACCEPT_UNMAPPED;
2229
2230	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2231		if (disk->d_consumer && disk->d_consumer->provider) {
2232			dp = disk->d_consumer->provider;
2233			if (dp->stripesize > pp->stripesize) {
2234				pp->stripesize = dp->stripesize;
2235				pp->stripeoffset = dp->stripeoffset;
2236			}
2237			/* A provider underneath us doesn't support unmapped */
2238			if ((dp->flags & G_PF_ACCEPT_UNMAPPED) == 0) {
2239				G_MIRROR_DEBUG(0, "Cancelling unmapped "
2240				    "because of %s.", dp->name);
2241				pp->flags &= ~G_PF_ACCEPT_UNMAPPED;
2242			}
2243		}
2244	}
2245	pp->private = sc;
2246	sc->sc_refcnt++;
2247	sc->sc_provider = pp;
2248	g_error_provider(pp, 0);
2249	g_topology_unlock();
2250	G_MIRROR_DEBUG(0, "Device %s launched (%u/%u).", pp->name,
2251	    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE), sc->sc_ndisks);
2252	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2253		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
2254			g_mirror_sync_start(disk);
2255	}
2256}
2257
2258static void
2259g_mirror_destroy_provider(struct g_mirror_softc *sc)
2260{
2261	struct g_mirror_disk *disk;
2262	struct bio *bp;
2263
2264	g_topology_assert_not();
2265	KASSERT(sc->sc_provider != NULL, ("NULL provider (device=%s).",
2266	    sc->sc_name));
2267
2268	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2269		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
2270			g_mirror_sync_stop(disk, 1);
2271	}
2272
2273	g_topology_lock();
2274	g_error_provider(sc->sc_provider, ENXIO);
2275	mtx_lock(&sc->sc_queue_mtx);
2276	while ((bp = TAILQ_FIRST(&sc->sc_queue)) != NULL) {
2277		TAILQ_REMOVE(&sc->sc_queue, bp, bio_queue);
2278		/*
2279		 * Abort any pending I/O that wasn't generated by us.
2280		 * Synchronization requests and requests destined for individual
2281		 * mirror components can be destroyed immediately.
2282		 */
2283		if (bp->bio_to == sc->sc_provider &&
2284		    bp->bio_from->geom != sc->sc_sync.ds_geom) {
2285			g_io_deliver(bp, ENXIO);
2286		} else {
2287			if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0)
2288				free(bp->bio_data, M_MIRROR);
2289			g_destroy_bio(bp);
2290		}
2291	}
2292	mtx_unlock(&sc->sc_queue_mtx);
2293	g_wither_provider(sc->sc_provider, ENXIO);
2294	sc->sc_provider = NULL;
2295	G_MIRROR_DEBUG(0, "Device %s: provider destroyed.", sc->sc_name);
2296	g_topology_unlock();
2297}
2298
2299static void
2300g_mirror_go(void *arg)
2301{
2302	struct g_mirror_softc *sc;
2303	struct g_mirror_event *ep;
2304
2305	sc = arg;
2306	G_MIRROR_DEBUG(0, "Force device %s start due to timeout.", sc->sc_name);
2307	ep = sc->sc_timeout_event;
2308	sc->sc_timeout_event = NULL;
2309	g_mirror_event_dispatch(ep, sc, 0,
2310	    G_MIRROR_EVENT_DONTWAIT | G_MIRROR_EVENT_DEVICE);
2311}
2312
2313static void
2314g_mirror_timeout_drain(struct g_mirror_softc *sc)
2315{
2316	sx_assert(&sc->sc_lock, SX_XLOCKED);
2317
2318	callout_drain(&sc->sc_callout);
2319	g_mirror_event_free(sc->sc_timeout_event);
2320	sc->sc_timeout_event = NULL;
2321}
2322
2323static u_int
2324g_mirror_determine_state(struct g_mirror_disk *disk)
2325{
2326	struct g_mirror_softc *sc;
2327	u_int state;
2328
2329	sc = disk->d_softc;
2330	if (sc->sc_syncid == disk->d_sync.ds_syncid) {
2331		if ((disk->d_flags &
2332		    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0 &&
2333		    (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 0 ||
2334		     (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) == 0)) {
2335			/* Disk does not need synchronization. */
2336			state = G_MIRROR_DISK_STATE_ACTIVE;
2337		} else {
2338			if ((sc->sc_flags &
2339			     G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
2340			    (disk->d_flags &
2341			     G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
2342				/*
2343				 * We can start synchronization from
2344				 * the stored offset.
2345				 */
2346				state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
2347			} else {
2348				state = G_MIRROR_DISK_STATE_STALE;
2349			}
2350		}
2351	} else if (disk->d_sync.ds_syncid < sc->sc_syncid) {
2352		/*
2353		 * Reset all synchronization data for this disk,
2354		 * because if it even was synchronized, it was
2355		 * synchronized to disks with different syncid.
2356		 */
2357		disk->d_flags |= G_MIRROR_DISK_FLAG_SYNCHRONIZING;
2358		disk->d_sync.ds_offset = 0;
2359		disk->d_sync.ds_offset_done = 0;
2360		disk->d_sync.ds_syncid = sc->sc_syncid;
2361		if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
2362		    (disk->d_flags & G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
2363			state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
2364		} else {
2365			state = G_MIRROR_DISK_STATE_STALE;
2366		}
2367	} else /* if (sc->sc_syncid < disk->d_sync.ds_syncid) */ {
2368		/*
2369		 * Not good, NOT GOOD!
2370		 * It means that mirror was started on stale disks
2371		 * and more fresh disk just arrive.
2372		 * If there were writes, mirror is broken, sorry.
2373		 * I think the best choice here is don't touch
2374		 * this disk and inform the user loudly.
2375		 */
2376		G_MIRROR_DEBUG(0, "Device %s was started before the freshest "
2377		    "disk (%s) arrives!! It will not be connected to the "
2378		    "running device.", sc->sc_name,
2379		    g_mirror_get_diskname(disk));
2380		g_mirror_destroy_disk(disk);
2381		state = G_MIRROR_DISK_STATE_NONE;
2382		/* Return immediately, because disk was destroyed. */
2383		return (state);
2384	}
2385	G_MIRROR_DEBUG(3, "State for %s disk: %s.",
2386	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(state));
2387	return (state);
2388}
2389
2390/*
2391 * Update device state.
2392 */
2393static void
2394g_mirror_update_device(struct g_mirror_softc *sc, bool force)
2395{
2396	struct g_mirror_disk *disk;
2397	u_int state;
2398
2399	sx_assert(&sc->sc_lock, SX_XLOCKED);
2400
2401	switch (sc->sc_state) {
2402	case G_MIRROR_DEVICE_STATE_STARTING:
2403	    {
2404		struct g_mirror_disk *pdisk, *tdisk;
2405		const char *mismatch;
2406		uintmax_t found, newest;
2407		u_int dirty, ndisks;
2408
2409		/* Pre-flight checks */
2410		LIST_FOREACH_SAFE(disk, &sc->sc_disks, d_next, tdisk) {
2411			/*
2412			 * Confirm we already detected the newest genid.
2413			 */
2414			KASSERT(sc->sc_genid >= disk->d_genid,
2415			    ("%s: found newer genid %u (sc:%p had %u).", __func__,
2416			    disk->d_genid, sc, sc->sc_genid));
2417
2418			/* Kick out any previously tasted stale components. */
2419			if (disk->d_genid < sc->sc_genid) {
2420				G_MIRROR_DEBUG(0, "Stale 'genid' field on %s "
2421				    "(device %s) (component=%u latest=%u), skipping.",
2422				    g_mirror_get_diskname(disk), sc->sc_name,
2423				    disk->d_genid, sc->sc_genid);
2424				g_mirror_destroy_disk(disk);
2425				sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
2426				continue;
2427			}
2428
2429			/*
2430			 * Confirm we already detected the newest syncid.
2431			 */
2432			KASSERT(sc->sc_syncid >= disk->d_sync.ds_syncid,
2433			    ("%s: found newer syncid %u (sc:%p had %u).",
2434			     __func__, disk->d_sync.ds_syncid, sc,
2435			     sc->sc_syncid));
2436
2437#define DETECT_MISMATCH(field, name) \
2438			if (mismatch == NULL &&					\
2439			    disk->d_init_ ## field != sc->sc_ ## field) {	\
2440				mismatch = name;				\
2441				found = (intmax_t)disk->d_init_ ## field;	\
2442				newest = (intmax_t)sc->sc_ ## field;		\
2443			}
2444			mismatch = NULL;
2445			DETECT_MISMATCH(ndisks, "md_all");
2446			DETECT_MISMATCH(balance, "md_balance");
2447			DETECT_MISMATCH(slice, "md_slice");
2448			DETECT_MISMATCH(mediasize, "md_mediasize");
2449#undef DETECT_MISMATCH
2450			if (mismatch != NULL) {
2451				G_MIRROR_DEBUG(0, "Found a mismatching '%s' "
2452				    "field on %s (device %s) (found=%ju "
2453				    "newest=%ju).", mismatch,
2454				    g_mirror_get_diskname(disk), sc->sc_name,
2455				    found, newest);
2456				g_mirror_destroy_disk(disk);
2457				sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
2458				continue;
2459			}
2460		}
2461
2462		KASSERT(sc->sc_provider == NULL,
2463		    ("Non-NULL provider in STARTING state (%s).", sc->sc_name));
2464		/*
2465		 * Are we ready? If the timeout (force is true) has expired, and
2466		 * any disks are present, then yes. If we're permitted to launch
2467		 * before the timeout has expired and the expected number of
2468		 * current-generation mirror disks have been tasted, then yes.
2469		 */
2470		ndisks = g_mirror_ndisks(sc, -1);
2471		if ((force && ndisks > 0) ||
2472		    (g_launch_mirror_before_timeout && ndisks == sc->sc_ndisks)) {
2473			;
2474		} else if (ndisks == 0) {
2475			/*
2476			 * Disks went down in starting phase, so destroy
2477			 * device.
2478			 */
2479			g_mirror_timeout_drain(sc);
2480			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2481			G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
2482			    sc->sc_rootmount);
2483			root_mount_rel(sc->sc_rootmount);
2484			sc->sc_rootmount = NULL;
2485			return;
2486		} else {
2487			return;
2488		}
2489
2490		/*
2491		 * Activate all disks with the biggest syncid.
2492		 */
2493		if (force) {
2494			/*
2495			 * If 'force' is true, we have been called due to
2496			 * timeout, so don't bother canceling timeout.
2497			 */
2498			ndisks = 0;
2499			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2500				if ((disk->d_flags &
2501				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) {
2502					ndisks++;
2503				}
2504			}
2505			if (ndisks == 0) {
2506				/* No valid disks found, destroy device. */
2507				sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2508				G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p",
2509				    __LINE__, sc->sc_rootmount);
2510				root_mount_rel(sc->sc_rootmount);
2511				sc->sc_rootmount = NULL;
2512				return;
2513			}
2514		} else {
2515			/* Cancel timeout. */
2516			g_mirror_timeout_drain(sc);
2517		}
2518
2519		/*
2520		 * Here we need to look for dirty disks and if all disks
2521		 * with the biggest syncid are dirty, we have to choose
2522		 * one with the biggest priority and rebuild the rest.
2523		 */
2524		/*
2525		 * Find the number of dirty disks with the biggest syncid.
2526		 * Find the number of disks with the biggest syncid.
2527		 * While here, find a disk with the biggest priority.
2528		 */
2529		dirty = ndisks = 0;
2530		pdisk = NULL;
2531		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2532			if (disk->d_sync.ds_syncid != sc->sc_syncid)
2533				continue;
2534			if ((disk->d_flags &
2535			    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2536				continue;
2537			}
2538			ndisks++;
2539			if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
2540				dirty++;
2541				if (pdisk == NULL ||
2542				    pdisk->d_priority < disk->d_priority) {
2543					pdisk = disk;
2544				}
2545			}
2546		}
2547		if (dirty == 0) {
2548			/* No dirty disks at all, great. */
2549		} else if (dirty == ndisks) {
2550			/*
2551			 * Force synchronization for all dirty disks except one
2552			 * with the biggest priority.
2553			 */
2554			KASSERT(pdisk != NULL, ("pdisk == NULL"));
2555			G_MIRROR_DEBUG(1, "Using disk %s (device %s) as a "
2556			    "master disk for synchronization.",
2557			    g_mirror_get_diskname(pdisk), sc->sc_name);
2558			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2559				if (disk->d_sync.ds_syncid != sc->sc_syncid)
2560					continue;
2561				if ((disk->d_flags &
2562				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2563					continue;
2564				}
2565				KASSERT((disk->d_flags &
2566				    G_MIRROR_DISK_FLAG_DIRTY) != 0,
2567				    ("Disk %s isn't marked as dirty.",
2568				    g_mirror_get_diskname(disk)));
2569				/* Skip the disk with the biggest priority. */
2570				if (disk == pdisk)
2571					continue;
2572				disk->d_sync.ds_syncid = 0;
2573			}
2574		} else if (dirty < ndisks) {
2575			/*
2576			 * Force synchronization for all dirty disks.
2577			 * We have some non-dirty disks.
2578			 */
2579			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2580				if (disk->d_sync.ds_syncid != sc->sc_syncid)
2581					continue;
2582				if ((disk->d_flags &
2583				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2584					continue;
2585				}
2586				if ((disk->d_flags &
2587				    G_MIRROR_DISK_FLAG_DIRTY) == 0) {
2588					continue;
2589				}
2590				disk->d_sync.ds_syncid = 0;
2591			}
2592		}
2593
2594		/* Reset hint. */
2595		sc->sc_hint = NULL;
2596		if (force) {
2597			/* Remember to bump syncid on first write. */
2598			sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
2599		}
2600		state = G_MIRROR_DEVICE_STATE_RUNNING;
2601		G_MIRROR_DEBUG(1, "Device %s state changed from %s to %s.",
2602		    sc->sc_name, g_mirror_device_state2str(sc->sc_state),
2603		    g_mirror_device_state2str(state));
2604		sc->sc_state = state;
2605		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2606			state = g_mirror_determine_state(disk);
2607			g_mirror_event_send(disk, state,
2608			    G_MIRROR_EVENT_DONTWAIT);
2609			if (state == G_MIRROR_DISK_STATE_STALE)
2610				sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
2611		}
2612		break;
2613	    }
2614	case G_MIRROR_DEVICE_STATE_RUNNING:
2615		if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 0 &&
2616		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
2617			/*
2618			 * No usable disks, so destroy the device.
2619			 */
2620			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2621			break;
2622		} else if (g_mirror_ndisks(sc,
2623		    G_MIRROR_DISK_STATE_ACTIVE) > 0 &&
2624		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
2625			/*
2626			 * We have active disks, launch provider if it doesn't
2627			 * exist.
2628			 */
2629			if (sc->sc_provider == NULL)
2630				g_mirror_launch_provider(sc);
2631			if (sc->sc_rootmount != NULL) {
2632				G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p",
2633				    __LINE__, sc->sc_rootmount);
2634				root_mount_rel(sc->sc_rootmount);
2635				sc->sc_rootmount = NULL;
2636			}
2637		}
2638		/*
2639		 * Genid should be bumped immediately, so do it here.
2640		 */
2641		if ((sc->sc_bump_id & G_MIRROR_BUMP_GENID) != 0) {
2642			sc->sc_bump_id &= ~G_MIRROR_BUMP_GENID;
2643			g_mirror_bump_genid(sc);
2644		}
2645		if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID_NOW) != 0) {
2646			sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID_NOW;
2647			g_mirror_bump_syncid(sc);
2648		}
2649		break;
2650	default:
2651		KASSERT(1 == 0, ("Wrong device state (%s, %s).",
2652		    sc->sc_name, g_mirror_device_state2str(sc->sc_state)));
2653		break;
2654	}
2655}
2656
2657/*
2658 * Update disk state and device state if needed.
2659 */
2660#define	DISK_STATE_CHANGED()	G_MIRROR_DEBUG(1,			\
2661	"Disk %s state changed from %s to %s (device %s).",		\
2662	g_mirror_get_diskname(disk),					\
2663	g_mirror_disk_state2str(disk->d_state),				\
2664	g_mirror_disk_state2str(state), sc->sc_name)
2665static int
2666g_mirror_update_disk(struct g_mirror_disk *disk, u_int state)
2667{
2668	struct g_mirror_softc *sc;
2669
2670	sc = disk->d_softc;
2671	sx_assert(&sc->sc_lock, SX_XLOCKED);
2672
2673again:
2674	G_MIRROR_DEBUG(3, "Changing disk %s state from %s to %s.",
2675	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(disk->d_state),
2676	    g_mirror_disk_state2str(state));
2677	switch (state) {
2678	case G_MIRROR_DISK_STATE_NEW:
2679		/*
2680		 * Possible scenarios:
2681		 * 1. New disk arrive.
2682		 */
2683		/* Previous state should be NONE. */
2684		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NONE,
2685		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2686		    g_mirror_disk_state2str(disk->d_state)));
2687		DISK_STATE_CHANGED();
2688
2689		disk->d_state = state;
2690		g_topology_lock();
2691		if (LIST_EMPTY(&sc->sc_disks))
2692			LIST_INSERT_HEAD(&sc->sc_disks, disk, d_next);
2693		else {
2694			struct g_mirror_disk *dp;
2695
2696			LIST_FOREACH(dp, &sc->sc_disks, d_next) {
2697				if (disk->d_priority >= dp->d_priority) {
2698					LIST_INSERT_BEFORE(dp, disk, d_next);
2699					dp = NULL;
2700					break;
2701				}
2702				if (LIST_NEXT(dp, d_next) == NULL)
2703					break;
2704			}
2705			if (dp != NULL)
2706				LIST_INSERT_AFTER(dp, disk, d_next);
2707		}
2708		g_topology_unlock();
2709		G_MIRROR_DEBUG(1, "Device %s: provider %s detected.",
2710		    sc->sc_name, g_mirror_get_diskname(disk));
2711		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
2712			break;
2713		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2714		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2715		    g_mirror_device_state2str(sc->sc_state),
2716		    g_mirror_get_diskname(disk),
2717		    g_mirror_disk_state2str(disk->d_state)));
2718		state = g_mirror_determine_state(disk);
2719		if (state != G_MIRROR_DISK_STATE_NONE)
2720			goto again;
2721		break;
2722	case G_MIRROR_DISK_STATE_ACTIVE:
2723		/*
2724		 * Possible scenarios:
2725		 * 1. New disk does not need synchronization.
2726		 * 2. Synchronization process finished successfully.
2727		 */
2728		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2729		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2730		    g_mirror_device_state2str(sc->sc_state),
2731		    g_mirror_get_diskname(disk),
2732		    g_mirror_disk_state2str(disk->d_state)));
2733		/* Previous state should be NEW or SYNCHRONIZING. */
2734		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW ||
2735		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2736		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2737		    g_mirror_disk_state2str(disk->d_state)));
2738		DISK_STATE_CHANGED();
2739
2740		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
2741			disk->d_flags &= ~G_MIRROR_DISK_FLAG_SYNCHRONIZING;
2742			disk->d_flags &= ~G_MIRROR_DISK_FLAG_FORCE_SYNC;
2743			g_mirror_sync_stop(disk, 0);
2744		}
2745		disk->d_state = state;
2746		disk->d_sync.ds_offset = 0;
2747		disk->d_sync.ds_offset_done = 0;
2748		g_mirror_update_idle(sc, disk);
2749		g_mirror_update_metadata(disk);
2750		G_MIRROR_DEBUG(1, "Device %s: provider %s activated.",
2751		    sc->sc_name, g_mirror_get_diskname(disk));
2752		break;
2753	case G_MIRROR_DISK_STATE_STALE:
2754		/*
2755		 * Possible scenarios:
2756		 * 1. Stale disk was connected.
2757		 */
2758		/* Previous state should be NEW. */
2759		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2760		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2761		    g_mirror_disk_state2str(disk->d_state)));
2762		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2763		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2764		    g_mirror_device_state2str(sc->sc_state),
2765		    g_mirror_get_diskname(disk),
2766		    g_mirror_disk_state2str(disk->d_state)));
2767		/*
2768		 * STALE state is only possible if device is marked
2769		 * NOAUTOSYNC.
2770		 */
2771		KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) != 0,
2772		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2773		    g_mirror_device_state2str(sc->sc_state),
2774		    g_mirror_get_diskname(disk),
2775		    g_mirror_disk_state2str(disk->d_state)));
2776		DISK_STATE_CHANGED();
2777
2778		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2779		disk->d_state = state;
2780		g_mirror_update_metadata(disk);
2781		G_MIRROR_DEBUG(0, "Device %s: provider %s is stale.",
2782		    sc->sc_name, g_mirror_get_diskname(disk));
2783		break;
2784	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
2785		/*
2786		 * Possible scenarios:
2787		 * 1. Disk which needs synchronization was connected.
2788		 */
2789		/* Previous state should be NEW. */
2790		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2791		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2792		    g_mirror_disk_state2str(disk->d_state)));
2793		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2794		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2795		    g_mirror_device_state2str(sc->sc_state),
2796		    g_mirror_get_diskname(disk),
2797		    g_mirror_disk_state2str(disk->d_state)));
2798		DISK_STATE_CHANGED();
2799
2800		if (disk->d_state == G_MIRROR_DISK_STATE_NEW)
2801			disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2802		disk->d_state = state;
2803		if (sc->sc_provider != NULL) {
2804			g_mirror_sync_start(disk);
2805			g_mirror_update_metadata(disk);
2806		}
2807		break;
2808	case G_MIRROR_DISK_STATE_DISCONNECTED:
2809		/*
2810		 * Possible scenarios:
2811		 * 1. Device wasn't running yet, but disk disappear.
2812		 * 2. Disk was active and disapppear.
2813		 * 3. Disk disappear during synchronization process.
2814		 */
2815		if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING) {
2816			/*
2817			 * Previous state should be ACTIVE, STALE or
2818			 * SYNCHRONIZING.
2819			 */
2820			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
2821			    disk->d_state == G_MIRROR_DISK_STATE_STALE ||
2822			    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2823			    ("Wrong disk state (%s, %s).",
2824			    g_mirror_get_diskname(disk),
2825			    g_mirror_disk_state2str(disk->d_state)));
2826		} else if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING) {
2827			/* Previous state should be NEW. */
2828			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2829			    ("Wrong disk state (%s, %s).",
2830			    g_mirror_get_diskname(disk),
2831			    g_mirror_disk_state2str(disk->d_state)));
2832			/*
2833			 * Reset bumping syncid if disk disappeared in STARTING
2834			 * state.
2835			 */
2836			if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID) != 0)
2837				sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID;
2838#ifdef	INVARIANTS
2839		} else {
2840			KASSERT(1 == 0, ("Wrong device state (%s, %s, %s, %s).",
2841			    sc->sc_name,
2842			    g_mirror_device_state2str(sc->sc_state),
2843			    g_mirror_get_diskname(disk),
2844			    g_mirror_disk_state2str(disk->d_state)));
2845#endif
2846		}
2847		DISK_STATE_CHANGED();
2848		G_MIRROR_DEBUG(0, "Device %s: provider %s disconnected.",
2849		    sc->sc_name, g_mirror_get_diskname(disk));
2850
2851		g_mirror_destroy_disk(disk);
2852		break;
2853	case G_MIRROR_DISK_STATE_DESTROY:
2854	    {
2855		int error;
2856
2857		error = g_mirror_clear_metadata(disk);
2858		if (error != 0) {
2859			G_MIRROR_DEBUG(0,
2860			    "Device %s: failed to clear metadata on %s: %d.",
2861			    sc->sc_name, g_mirror_get_diskname(disk), error);
2862			break;
2863		}
2864		DISK_STATE_CHANGED();
2865		G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.",
2866		    sc->sc_name, g_mirror_get_diskname(disk));
2867
2868		g_mirror_destroy_disk(disk);
2869		sc->sc_ndisks--;
2870		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2871			g_mirror_update_metadata(disk);
2872		}
2873		break;
2874	    }
2875	default:
2876		KASSERT(1 == 0, ("Unknown state (%u).", state));
2877		break;
2878	}
2879	return (0);
2880}
2881#undef	DISK_STATE_CHANGED
2882
2883int
2884g_mirror_read_metadata(struct g_consumer *cp, struct g_mirror_metadata *md)
2885{
2886	struct g_provider *pp;
2887	u_char *buf;
2888	int error;
2889
2890	g_topology_assert();
2891
2892	error = g_access(cp, 1, 0, 0);
2893	if (error != 0)
2894		return (error);
2895	pp = cp->provider;
2896	g_topology_unlock();
2897	/* Metadata are stored on last sector. */
2898	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
2899	    &error);
2900	g_topology_lock();
2901	g_access(cp, -1, 0, 0);
2902	if (buf == NULL) {
2903		G_MIRROR_DEBUG(1, "Cannot read metadata from %s (error=%d).",
2904		    cp->provider->name, error);
2905		return (error);
2906	}
2907
2908	/* Decode metadata. */
2909	error = mirror_metadata_decode(buf, md);
2910	g_free(buf);
2911	if (strcmp(md->md_magic, G_MIRROR_MAGIC) != 0)
2912		return (EINVAL);
2913	if (md->md_version > G_MIRROR_VERSION) {
2914		G_MIRROR_DEBUG(0,
2915		    "Kernel module is too old to handle metadata from %s.",
2916		    cp->provider->name);
2917		return (EINVAL);
2918	}
2919	if (error != 0) {
2920		G_MIRROR_DEBUG(1, "MD5 metadata hash mismatch for provider %s.",
2921		    cp->provider->name);
2922		return (error);
2923	}
2924
2925	return (0);
2926}
2927
2928static int
2929g_mirror_check_metadata(struct g_mirror_softc *sc, struct g_provider *pp,
2930    struct g_mirror_metadata *md)
2931{
2932
2933	G_MIRROR_DEBUG(2, "%s: md_did 0x%u disk %s device %s md_all 0x%x "
2934	    "sc_ndisks 0x%x md_slice 0x%x sc_slice 0x%x md_balance 0x%x "
2935	    "sc_balance 0x%x sc_mediasize 0x%jx pp_mediasize 0x%jx "
2936	    "md_sectorsize 0x%x sc_sectorsize 0x%x md_mflags 0x%jx "
2937	    "md_dflags 0x%jx md_syncid 0x%x md_genid 0x%x md_priority 0x%x "
2938	    "sc_state 0x%x.",
2939	    __func__, md->md_did, pp->name, sc->sc_name, md->md_all,
2940	    sc->sc_ndisks, md->md_slice, sc->sc_slice, md->md_balance,
2941	    sc->sc_balance, (uintmax_t)sc->sc_mediasize,
2942	    (uintmax_t)pp->mediasize, md->md_sectorsize, sc->sc_sectorsize,
2943	    (uintmax_t)md->md_mflags, (uintmax_t)md->md_dflags, md->md_syncid,
2944	    md->md_genid, md->md_priority, sc->sc_state);
2945
2946	if (g_mirror_id2disk(sc, md->md_did) != NULL) {
2947		G_MIRROR_DEBUG(1, "Disk %s (id=%u) already exists, skipping.",
2948		    pp->name, md->md_did);
2949		return (EEXIST);
2950	}
2951	if (sc->sc_mediasize > pp->mediasize) {
2952		G_MIRROR_DEBUG(1,
2953		    "Invalid size of disk %s (device %s), skipping.", pp->name,
2954		    sc->sc_name);
2955		return (EINVAL);
2956	}
2957	if (md->md_sectorsize != sc->sc_sectorsize) {
2958		G_MIRROR_DEBUG(1,
2959		    "Invalid '%s' field on disk %s (device %s), skipping.",
2960		    "md_sectorsize", pp->name, sc->sc_name);
2961		return (EINVAL);
2962	}
2963	if ((sc->sc_sectorsize % pp->sectorsize) != 0) {
2964		G_MIRROR_DEBUG(1,
2965		    "Invalid sector size of disk %s (device %s), skipping.",
2966		    pp->name, sc->sc_name);
2967		return (EINVAL);
2968	}
2969	if ((md->md_mflags & ~G_MIRROR_DEVICE_FLAG_MASK) != 0) {
2970		G_MIRROR_DEBUG(1,
2971		    "Invalid device flags on disk %s (device %s), skipping.",
2972		    pp->name, sc->sc_name);
2973		return (EINVAL);
2974	}
2975	if ((md->md_dflags & ~G_MIRROR_DISK_FLAG_MASK) != 0) {
2976		G_MIRROR_DEBUG(1,
2977		    "Invalid disk flags on disk %s (device %s), skipping.",
2978		    pp->name, sc->sc_name);
2979		return (EINVAL);
2980	}
2981	return (0);
2982}
2983
2984int
2985g_mirror_add_disk(struct g_mirror_softc *sc, struct g_provider *pp,
2986    struct g_mirror_metadata *md)
2987{
2988	struct g_mirror_disk *disk;
2989	int error;
2990
2991	g_topology_assert_not();
2992	G_MIRROR_DEBUG(2, "Adding disk %s.", pp->name);
2993
2994	error = g_mirror_check_metadata(sc, pp, md);
2995	if (error != 0)
2996		return (error);
2997
2998	if (md->md_genid < sc->sc_genid) {
2999		G_MIRROR_DEBUG(0, "Component %s (device %s) broken, skipping.",
3000		    pp->name, sc->sc_name);
3001		return (EINVAL);
3002	}
3003
3004	/*
3005	 * If the component disk we're tasting has newer metadata than the
3006	 * STARTING gmirror device, refresh the device from the component.
3007	 */
3008	error = g_mirror_refresh_device(sc, pp, md);
3009	if (error != 0)
3010		return (error);
3011
3012	disk = g_mirror_init_disk(sc, pp, md, &error);
3013	if (disk == NULL)
3014		return (error);
3015	error = g_mirror_event_send(disk, G_MIRROR_DISK_STATE_NEW,
3016	    G_MIRROR_EVENT_WAIT);
3017	if (error != 0)
3018		return (error);
3019	if (md->md_version < G_MIRROR_VERSION) {
3020		G_MIRROR_DEBUG(0, "Upgrading metadata on %s (v%d->v%d).",
3021		    pp->name, md->md_version, G_MIRROR_VERSION);
3022		g_mirror_update_metadata(disk);
3023	}
3024	return (0);
3025}
3026
3027static void
3028g_mirror_destroy_delayed(void *arg, int flag)
3029{
3030	struct g_mirror_softc *sc;
3031	int error;
3032
3033	if (flag == EV_CANCEL) {
3034		G_MIRROR_DEBUG(1, "Destroying canceled.");
3035		return;
3036	}
3037	sc = arg;
3038	g_topology_unlock();
3039	sx_xlock(&sc->sc_lock);
3040	KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) == 0,
3041	    ("DESTROY flag set on %s.", sc->sc_name));
3042	KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_CLOSEWAIT) != 0,
3043	    ("CLOSEWAIT flag not set on %s.", sc->sc_name));
3044	G_MIRROR_DEBUG(1, "Destroying %s (delayed).", sc->sc_name);
3045	error = g_mirror_destroy(sc, G_MIRROR_DESTROY_SOFT);
3046	if (error != 0) {
3047		G_MIRROR_DEBUG(0, "Cannot destroy %s (error=%d).",
3048		    sc->sc_name, error);
3049		sx_xunlock(&sc->sc_lock);
3050	}
3051	g_topology_lock();
3052}
3053
3054static int
3055g_mirror_access(struct g_provider *pp, int acr, int acw, int ace)
3056{
3057	struct g_mirror_softc *sc;
3058	int error = 0;
3059
3060	g_topology_assert();
3061	G_MIRROR_DEBUG(2, "Access request for %s: r%dw%de%d.", pp->name, acr,
3062	    acw, ace);
3063
3064	sc = pp->private;
3065	KASSERT(sc != NULL, ("NULL softc (provider=%s).", pp->name));
3066
3067	g_topology_unlock();
3068	sx_xlock(&sc->sc_lock);
3069	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0 ||
3070	    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_CLOSEWAIT) != 0 ||
3071	    LIST_EMPTY(&sc->sc_disks)) {
3072		if (acr > 0 || acw > 0 || ace > 0)
3073			error = ENXIO;
3074		goto end;
3075	}
3076	sc->sc_provider_open += acr + acw + ace;
3077	if (pp->acw + acw == 0)
3078		g_mirror_idle(sc, 0);
3079	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_CLOSEWAIT) != 0 &&
3080	    sc->sc_provider_open == 0)
3081		g_post_event(g_mirror_destroy_delayed, sc, M_WAITOK, sc, NULL);
3082end:
3083	sx_xunlock(&sc->sc_lock);
3084	g_topology_lock();
3085	return (error);
3086}
3087
3088static void
3089g_mirror_reinit_from_metadata(struct g_mirror_softc *sc,
3090    const struct g_mirror_metadata *md)
3091{
3092
3093	sc->sc_genid = md->md_genid;
3094	sc->sc_syncid = md->md_syncid;
3095
3096	sc->sc_slice = md->md_slice;
3097	sc->sc_balance = md->md_balance;
3098	sc->sc_mediasize = md->md_mediasize;
3099	sc->sc_ndisks = md->md_all;
3100	sc->sc_flags &= ~G_MIRROR_DEVICE_FLAG_MASK;
3101	sc->sc_flags |= (md->md_mflags & G_MIRROR_DEVICE_FLAG_MASK);
3102}
3103
3104struct g_geom *
3105g_mirror_create(struct g_class *mp, const struct g_mirror_metadata *md,
3106    u_int type)
3107{
3108	struct g_mirror_softc *sc;
3109	struct g_geom *gp;
3110	int error, timeout;
3111
3112	g_topology_assert();
3113	G_MIRROR_DEBUG(1, "Creating device %s (id=%u).", md->md_name,
3114	    md->md_mid);
3115
3116	/* One disk is minimum. */
3117	if (md->md_all < 1)
3118		return (NULL);
3119	/*
3120	 * Action geom.
3121	 */
3122	gp = g_new_geomf(mp, "%s", md->md_name);
3123	sc = malloc(sizeof(*sc), M_MIRROR, M_WAITOK | M_ZERO);
3124	gp->start = g_mirror_start;
3125	gp->orphan = g_mirror_orphan;
3126	gp->access = g_mirror_access;
3127	gp->dumpconf = g_mirror_dumpconf;
3128
3129	sc->sc_type = type;
3130	sc->sc_id = md->md_mid;
3131	g_mirror_reinit_from_metadata(sc, md);
3132	sc->sc_sectorsize = md->md_sectorsize;
3133	sc->sc_bump_id = 0;
3134	sc->sc_idle = 1;
3135	sc->sc_last_write = time_uptime;
3136	sc->sc_writes = 0;
3137	sc->sc_refcnt = 1;
3138	sx_init(&sc->sc_lock, "gmirror:lock");
3139	TAILQ_INIT(&sc->sc_queue);
3140	mtx_init(&sc->sc_queue_mtx, "gmirror:queue", NULL, MTX_DEF);
3141	TAILQ_INIT(&sc->sc_regular_delayed);
3142	TAILQ_INIT(&sc->sc_inflight);
3143	TAILQ_INIT(&sc->sc_sync_delayed);
3144	LIST_INIT(&sc->sc_disks);
3145	TAILQ_INIT(&sc->sc_events);
3146	mtx_init(&sc->sc_events_mtx, "gmirror:events", NULL, MTX_DEF);
3147	callout_init(&sc->sc_callout, 1);
3148	mtx_init(&sc->sc_done_mtx, "gmirror:done", NULL, MTX_DEF);
3149	sc->sc_state = G_MIRROR_DEVICE_STATE_STARTING;
3150	gp->softc = sc;
3151	sc->sc_geom = gp;
3152	sc->sc_provider = NULL;
3153	sc->sc_provider_open = 0;
3154	/*
3155	 * Synchronization geom.
3156	 */
3157	gp = g_new_geomf(mp, "%s.sync", md->md_name);
3158	gp->softc = sc;
3159	gp->orphan = g_mirror_orphan;
3160	sc->sc_sync.ds_geom = gp;
3161	sc->sc_sync.ds_ndisks = 0;
3162	error = kproc_create(g_mirror_worker, sc, &sc->sc_worker, 0, 0,
3163	    "g_mirror %s", md->md_name);
3164	if (error != 0) {
3165		G_MIRROR_DEBUG(1, "Cannot create kernel thread for %s.",
3166		    sc->sc_name);
3167		g_destroy_geom(sc->sc_sync.ds_geom);
3168		g_destroy_geom(sc->sc_geom);
3169		g_mirror_free_device(sc);
3170		return (NULL);
3171	}
3172
3173	G_MIRROR_DEBUG(1, "Device %s created (%u components, id=%u).",
3174	    sc->sc_name, sc->sc_ndisks, sc->sc_id);
3175
3176	sc->sc_rootmount = root_mount_hold("GMIRROR");
3177	G_MIRROR_DEBUG(1, "root_mount_hold %p", sc->sc_rootmount);
3178
3179	/*
3180	 * Schedule startup timeout.
3181	 */
3182	timeout = g_mirror_timeout * hz;
3183	sc->sc_timeout_event = malloc(sizeof(struct g_mirror_event), M_MIRROR,
3184	    M_WAITOK);
3185	callout_reset(&sc->sc_callout, timeout, g_mirror_go, sc);
3186	return (sc->sc_geom);
3187}
3188
3189int
3190g_mirror_destroy(struct g_mirror_softc *sc, int how)
3191{
3192	struct g_mirror_disk *disk;
3193
3194	g_topology_assert_not();
3195	sx_assert(&sc->sc_lock, SX_XLOCKED);
3196
3197	if (sc->sc_provider_open != 0) {
3198		switch (how) {
3199		case G_MIRROR_DESTROY_SOFT:
3200			G_MIRROR_DEBUG(1,
3201			    "Device %s is still open (%d).", sc->sc_name,
3202			    sc->sc_provider_open);
3203			return (EBUSY);
3204		case G_MIRROR_DESTROY_DELAYED:
3205			G_MIRROR_DEBUG(1,
3206			    "Device %s will be destroyed on last close.",
3207			    sc->sc_name);
3208			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
3209				if (disk->d_state ==
3210				    G_MIRROR_DISK_STATE_SYNCHRONIZING) {
3211					g_mirror_sync_stop(disk, 1);
3212				}
3213			}
3214			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_CLOSEWAIT;
3215			return (EBUSY);
3216		case G_MIRROR_DESTROY_HARD:
3217			G_MIRROR_DEBUG(1, "Device %s is still open, so it "
3218			    "can't be definitely removed.", sc->sc_name);
3219		}
3220	}
3221
3222	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
3223		sx_xunlock(&sc->sc_lock);
3224		return (0);
3225	}
3226	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
3227	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DRAIN;
3228	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
3229	sx_xunlock(&sc->sc_lock);
3230	mtx_lock(&sc->sc_queue_mtx);
3231	wakeup(sc);
3232	mtx_unlock(&sc->sc_queue_mtx);
3233	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, &sc->sc_worker);
3234	while (sc->sc_worker != NULL)
3235		tsleep(&sc->sc_worker, PRIBIO, "m:destroy", hz / 5);
3236	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, &sc->sc_worker);
3237	sx_xlock(&sc->sc_lock);
3238	g_mirror_destroy_device(sc);
3239	return (0);
3240}
3241
3242static void
3243g_mirror_taste_orphan(struct g_consumer *cp)
3244{
3245
3246	KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
3247	    cp->provider->name));
3248}
3249
3250static struct g_geom *
3251g_mirror_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
3252{
3253	struct g_mirror_metadata md;
3254	struct g_mirror_softc *sc;
3255	struct g_consumer *cp;
3256	struct g_geom *gp;
3257	int error;
3258
3259	g_topology_assert();
3260	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
3261	G_MIRROR_DEBUG(2, "Tasting %s.", pp->name);
3262
3263	gp = g_new_geomf(mp, "mirror:taste");
3264	/*
3265	 * This orphan function should be never called.
3266	 */
3267	gp->orphan = g_mirror_taste_orphan;
3268	cp = g_new_consumer(gp);
3269	error = g_attach(cp, pp);
3270	if (error == 0) {
3271		error = g_mirror_read_metadata(cp, &md);
3272		g_detach(cp);
3273	}
3274	g_destroy_consumer(cp);
3275	g_destroy_geom(gp);
3276	if (error != 0)
3277		return (NULL);
3278	gp = NULL;
3279
3280	if (md.md_provider[0] != '\0' &&
3281	    !g_compare_names(md.md_provider, pp->name))
3282		return (NULL);
3283	if (md.md_provsize != 0 && md.md_provsize != pp->mediasize)
3284		return (NULL);
3285	if ((md.md_dflags & G_MIRROR_DISK_FLAG_INACTIVE) != 0) {
3286		G_MIRROR_DEBUG(0,
3287		    "Device %s: provider %s marked as inactive, skipping.",
3288		    md.md_name, pp->name);
3289		return (NULL);
3290	}
3291	if (g_mirror_debug >= 2)
3292		mirror_metadata_dump(&md);
3293
3294	/*
3295	 * Let's check if device already exists.
3296	 */
3297	sc = NULL;
3298	LIST_FOREACH(gp, &mp->geom, geom) {
3299		sc = gp->softc;
3300		if (sc == NULL)
3301			continue;
3302		if (sc->sc_type != G_MIRROR_TYPE_AUTOMATIC)
3303			continue;
3304		if (sc->sc_sync.ds_geom == gp)
3305			continue;
3306		if (strcmp(md.md_name, sc->sc_name) != 0)
3307			continue;
3308		if (md.md_mid != sc->sc_id) {
3309			G_MIRROR_DEBUG(0, "Device %s already configured.",
3310			    sc->sc_name);
3311			return (NULL);
3312		}
3313		break;
3314	}
3315	if (gp == NULL) {
3316		gp = g_mirror_create(mp, &md, G_MIRROR_TYPE_AUTOMATIC);
3317		if (gp == NULL) {
3318			G_MIRROR_DEBUG(0, "Cannot create device %s.",
3319			    md.md_name);
3320			return (NULL);
3321		}
3322		sc = gp->softc;
3323	}
3324	G_MIRROR_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
3325	g_topology_unlock();
3326	sx_xlock(&sc->sc_lock);
3327	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_TASTING;
3328	error = g_mirror_add_disk(sc, pp, &md);
3329	sc->sc_flags &= ~G_MIRROR_DEVICE_FLAG_TASTING;
3330	if (error != 0) {
3331		G_MIRROR_DEBUG(0, "Cannot add disk %s to %s (error=%d).",
3332		    pp->name, gp->name, error);
3333		if (LIST_EMPTY(&sc->sc_disks)) {
3334			g_cancel_event(sc);
3335			g_mirror_destroy(sc, G_MIRROR_DESTROY_HARD);
3336			g_topology_lock();
3337			return (NULL);
3338		}
3339		gp = NULL;
3340	}
3341	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
3342		g_mirror_destroy(sc, G_MIRROR_DESTROY_HARD);
3343		g_topology_lock();
3344		return (NULL);
3345	}
3346	sx_xunlock(&sc->sc_lock);
3347	g_topology_lock();
3348	return (gp);
3349}
3350
3351static void
3352g_mirror_resize(struct g_consumer *cp)
3353{
3354	struct g_mirror_disk *disk;
3355
3356	g_topology_assert();
3357	g_trace(G_T_TOPOLOGY, "%s(%s)", __func__, cp->provider->name);
3358
3359	disk = cp->private;
3360	if (disk == NULL)
3361		return;
3362	g_topology_unlock();
3363	g_mirror_update_metadata(disk);
3364	g_topology_lock();
3365}
3366
3367static int
3368g_mirror_destroy_geom(struct gctl_req *req __unused,
3369    struct g_class *mp __unused, struct g_geom *gp)
3370{
3371	struct g_mirror_softc *sc;
3372	int error;
3373
3374	g_topology_unlock();
3375	sc = gp->softc;
3376	sx_xlock(&sc->sc_lock);
3377	g_cancel_event(sc);
3378	error = g_mirror_destroy(gp->softc, G_MIRROR_DESTROY_SOFT);
3379	if (error != 0)
3380		sx_xunlock(&sc->sc_lock);
3381	g_topology_lock();
3382	return (error);
3383}
3384
3385static void
3386g_mirror_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
3387    struct g_consumer *cp, struct g_provider *pp)
3388{
3389	struct g_mirror_softc *sc;
3390
3391	g_topology_assert();
3392
3393	sc = gp->softc;
3394	if (sc == NULL)
3395		return;
3396	/* Skip synchronization geom. */
3397	if (gp == sc->sc_sync.ds_geom)
3398		return;
3399	if (pp != NULL) {
3400		/* Nothing here. */
3401	} else if (cp != NULL) {
3402		struct g_mirror_disk *disk;
3403
3404		disk = cp->private;
3405		if (disk == NULL)
3406			return;
3407		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)disk->d_id);
3408		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
3409			sbuf_printf(sb, "%s<Synchronized>", indent);
3410			if (disk->d_sync.ds_offset == 0)
3411				sbuf_cat(sb, "0%");
3412			else
3413				sbuf_printf(sb, "%u%%",
3414				    (u_int)((disk->d_sync.ds_offset * 100) /
3415				    sc->sc_mediasize));
3416			sbuf_cat(sb, "</Synchronized>\n");
3417			if (disk->d_sync.ds_offset > 0)
3418				sbuf_printf(sb, "%s<BytesSynced>%jd"
3419				    "</BytesSynced>\n", indent,
3420				    (intmax_t)disk->d_sync.ds_offset);
3421		}
3422		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent,
3423		    disk->d_sync.ds_syncid);
3424		sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent,
3425		    disk->d_genid);
3426		sbuf_printf(sb, "%s<Flags>", indent);
3427		if (disk->d_flags == 0)
3428			sbuf_cat(sb, "NONE");
3429		else {
3430			int first = 1;
3431
3432#define	ADD_FLAG(flag, name)	do {					\
3433	if ((disk->d_flags & (flag)) != 0) {				\
3434		if (!first)						\
3435			sbuf_cat(sb, ", ");				\
3436		else							\
3437			first = 0;					\
3438		sbuf_cat(sb, name);					\
3439	}								\
3440} while (0)
3441			ADD_FLAG(G_MIRROR_DISK_FLAG_DIRTY, "DIRTY");
3442			ADD_FLAG(G_MIRROR_DISK_FLAG_HARDCODED, "HARDCODED");
3443			ADD_FLAG(G_MIRROR_DISK_FLAG_INACTIVE, "INACTIVE");
3444			ADD_FLAG(G_MIRROR_DISK_FLAG_SYNCHRONIZING,
3445			    "SYNCHRONIZING");
3446			ADD_FLAG(G_MIRROR_DISK_FLAG_FORCE_SYNC, "FORCE_SYNC");
3447			ADD_FLAG(G_MIRROR_DISK_FLAG_BROKEN, "BROKEN");
3448#undef	ADD_FLAG
3449		}
3450		sbuf_cat(sb, "</Flags>\n");
3451		sbuf_printf(sb, "%s<Priority>%u</Priority>\n", indent,
3452		    disk->d_priority);
3453		sbuf_printf(sb, "%s<State>%s</State>\n", indent,
3454		    g_mirror_disk_state2str(disk->d_state));
3455	} else {
3456		sbuf_printf(sb, "%s<Type>", indent);
3457		switch (sc->sc_type) {
3458		case G_MIRROR_TYPE_AUTOMATIC:
3459			sbuf_cat(sb, "AUTOMATIC");
3460			break;
3461		case G_MIRROR_TYPE_MANUAL:
3462			sbuf_cat(sb, "MANUAL");
3463			break;
3464		default:
3465			sbuf_cat(sb, "UNKNOWN");
3466			break;
3467		}
3468		sbuf_cat(sb, "</Type>\n");
3469		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id);
3470		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, sc->sc_syncid);
3471		sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent, sc->sc_genid);
3472		sbuf_printf(sb, "%s<Flags>", indent);
3473		if (sc->sc_flags == 0)
3474			sbuf_cat(sb, "NONE");
3475		else {
3476			int first = 1;
3477
3478#define	ADD_FLAG(flag, name)	do {					\
3479	if ((sc->sc_flags & (flag)) != 0) {				\
3480		if (!first)						\
3481			sbuf_cat(sb, ", ");				\
3482		else							\
3483			first = 0;					\
3484		sbuf_cat(sb, name);					\
3485	}								\
3486} while (0)
3487			ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOFAILSYNC, "NOFAILSYNC");
3488			ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOAUTOSYNC, "NOAUTOSYNC");
3489#undef	ADD_FLAG
3490		}
3491		sbuf_cat(sb, "</Flags>\n");
3492		sbuf_printf(sb, "%s<Slice>%u</Slice>\n", indent,
3493		    (u_int)sc->sc_slice);
3494		sbuf_printf(sb, "%s<Balance>%s</Balance>\n", indent,
3495		    balance_name(sc->sc_balance));
3496		sbuf_printf(sb, "%s<Components>%u</Components>\n", indent,
3497		    sc->sc_ndisks);
3498		sbuf_printf(sb, "%s<State>", indent);
3499		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
3500			sbuf_printf(sb, "%s", "STARTING");
3501		else if (sc->sc_ndisks ==
3502		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE))
3503			sbuf_printf(sb, "%s", "COMPLETE");
3504		else
3505			sbuf_printf(sb, "%s", "DEGRADED");
3506		sbuf_cat(sb, "</State>\n");
3507	}
3508}
3509
3510static void
3511g_mirror_shutdown_post_sync(void *arg, int howto)
3512{
3513	struct g_class *mp;
3514	struct g_geom *gp, *gp2;
3515	struct g_mirror_softc *sc;
3516	int error;
3517
3518	if (KERNEL_PANICKED())
3519		return;
3520
3521	mp = arg;
3522	g_topology_lock();
3523	g_mirror_shutdown = 1;
3524	LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
3525		if ((sc = gp->softc) == NULL)
3526			continue;
3527		/* Skip synchronization geom. */
3528		if (gp == sc->sc_sync.ds_geom)
3529			continue;
3530		g_topology_unlock();
3531		sx_xlock(&sc->sc_lock);
3532		g_mirror_idle(sc, -1);
3533		g_cancel_event(sc);
3534		error = g_mirror_destroy(sc, G_MIRROR_DESTROY_DELAYED);
3535		if (error != 0)
3536			sx_xunlock(&sc->sc_lock);
3537		g_topology_lock();
3538	}
3539	g_topology_unlock();
3540}
3541
3542static void
3543g_mirror_init(struct g_class *mp)
3544{
3545
3546	g_mirror_post_sync = EVENTHANDLER_REGISTER(shutdown_post_sync,
3547	    g_mirror_shutdown_post_sync, mp, SHUTDOWN_PRI_FIRST);
3548	if (g_mirror_post_sync == NULL)
3549		G_MIRROR_DEBUG(0, "Warning! Cannot register shutdown event.");
3550}
3551
3552static void
3553g_mirror_fini(struct g_class *mp)
3554{
3555
3556	if (g_mirror_post_sync != NULL)
3557		EVENTHANDLER_DEREGISTER(shutdown_post_sync, g_mirror_post_sync);
3558}
3559
3560/*
3561 * Refresh the mirror device's metadata when gmirror encounters a newer
3562 * generation as the individual components are being added to the mirror set.
3563 */
3564static int
3565g_mirror_refresh_device(struct g_mirror_softc *sc, const struct g_provider *pp,
3566    const struct g_mirror_metadata *md)
3567{
3568
3569	g_topology_assert_not();
3570	sx_assert(&sc->sc_lock, SX_XLOCKED);
3571
3572	KASSERT(sc->sc_genid <= md->md_genid,
3573	    ("%s: attempted to refresh from stale component %s (device %s) "
3574	    "(%u < %u).", __func__, pp->name, sc->sc_name, md->md_genid,
3575	    sc->sc_genid));
3576
3577	if (sc->sc_genid > md->md_genid || (sc->sc_genid == md->md_genid &&
3578	    sc->sc_syncid >= md->md_syncid))
3579		return (0);
3580
3581	G_MIRROR_DEBUG(0, "Found newer version for device %s (genid: curr=%u "
3582	    "new=%u; syncid: curr=%u new=%u; ndisks: curr=%u new=%u; "
3583	    "provider=%s).", sc->sc_name, sc->sc_genid, md->md_genid,
3584	    sc->sc_syncid, md->md_syncid, sc->sc_ndisks, md->md_all, pp->name);
3585
3586	if (sc->sc_state != G_MIRROR_DEVICE_STATE_STARTING) {
3587		/* Probable data corruption detected */
3588		G_MIRROR_DEBUG(0, "Cannot refresh metadata in %s state "
3589		    "(device=%s genid=%u). A stale mirror device was launched.",
3590		    g_mirror_device_state2str(sc->sc_state), sc->sc_name,
3591		    sc->sc_genid);
3592		return (EINVAL);
3593	}
3594
3595	/* Update softc */
3596	g_mirror_reinit_from_metadata(sc, md);
3597
3598	G_MIRROR_DEBUG(1, "Refresh device %s (id=%u, state=%s) from disk %s "
3599	    "(genid=%u syncid=%u md_all=%u).", sc->sc_name, md->md_mid,
3600	    g_mirror_device_state2str(sc->sc_state), pp->name, md->md_genid,
3601	    md->md_syncid, (unsigned)md->md_all);
3602
3603	return (0);
3604}
3605
3606DECLARE_GEOM_CLASS(g_mirror_class, g_mirror);
3607MODULE_VERSION(geom_mirror, 0);
3608