1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2004-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29#include <sys/param.h>
30#include <sys/systm.h>
31#include <sys/bio.h>
32#include <sys/eventhandler.h>
33#include <sys/fail.h>
34#include <sys/kernel.h>
35#include <sys/kthread.h>
36#include <sys/limits.h>
37#include <sys/lock.h>
38#include <sys/malloc.h>
39#include <sys/mutex.h>
40#include <sys/proc.h>
41#include <sys/reboot.h>
42#include <sys/sbuf.h>
43#include <sys/sched.h>
44#include <sys/sx.h>
45#include <sys/sysctl.h>
46
47#include <geom/geom.h>
48#include <geom/geom_dbg.h>
49#include <geom/geom_disk.h>
50#include <geom/mirror/g_mirror.h>
51
52FEATURE(geom_mirror, "GEOM mirroring support");
53
54static MALLOC_DEFINE(M_MIRROR, "mirror_data", "GEOM_MIRROR Data");
55
56SYSCTL_DECL(_kern_geom);
57static SYSCTL_NODE(_kern_geom, OID_AUTO, mirror, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
58    "GEOM_MIRROR stuff");
59int g_mirror_debug = 0;
60SYSCTL_INT(_kern_geom_mirror, OID_AUTO, debug, CTLFLAG_RWTUN, &g_mirror_debug, 0,
61    "Debug level");
62bool g_launch_mirror_before_timeout = true;
63SYSCTL_BOOL(_kern_geom_mirror, OID_AUTO, launch_mirror_before_timeout,
64    CTLFLAG_RWTUN, &g_launch_mirror_before_timeout, 0,
65    "If false, force gmirror to wait out the full kern.geom.mirror.timeout "
66    "before launching mirrors");
67static u_int g_mirror_timeout = 4;
68SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, timeout, CTLFLAG_RWTUN, &g_mirror_timeout,
69    0, "Time to wait on all mirror components");
70static u_int g_mirror_idletime = 5;
71SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, idletime, CTLFLAG_RWTUN,
72    &g_mirror_idletime, 0, "Mark components as clean when idling");
73static u_int g_mirror_disconnect_on_failure = 1;
74SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, disconnect_on_failure, CTLFLAG_RWTUN,
75    &g_mirror_disconnect_on_failure, 0, "Disconnect component on I/O failure.");
76static u_int g_mirror_syncreqs = 2;
77SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, sync_requests, CTLFLAG_RDTUN,
78    &g_mirror_syncreqs, 0, "Parallel synchronization I/O requests.");
79static u_int g_mirror_sync_period = 5;
80SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, sync_update_period, CTLFLAG_RWTUN,
81    &g_mirror_sync_period, 0,
82    "Metadata update period during synchronization, in seconds");
83
84#define	MSLEEP(ident, mtx, priority, wmesg, timeout)	do {		\
85	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, (ident));	\
86	msleep((ident), (mtx), (priority), (wmesg), (timeout));		\
87	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, (ident));	\
88} while (0)
89
90static eventhandler_tag g_mirror_post_sync = NULL;
91static int g_mirror_shutdown = 0;
92
93static g_ctl_destroy_geom_t g_mirror_destroy_geom;
94static g_taste_t g_mirror_taste;
95static g_init_t g_mirror_init;
96static g_fini_t g_mirror_fini;
97static g_provgone_t g_mirror_providergone;
98static g_resize_t g_mirror_resize;
99
100struct g_class g_mirror_class = {
101	.name = G_MIRROR_CLASS_NAME,
102	.version = G_VERSION,
103	.ctlreq = g_mirror_config,
104	.taste = g_mirror_taste,
105	.destroy_geom = g_mirror_destroy_geom,
106	.init = g_mirror_init,
107	.fini = g_mirror_fini,
108	.providergone = g_mirror_providergone,
109	.resize = g_mirror_resize
110};
111
112static void g_mirror_destroy_provider(struct g_mirror_softc *sc);
113static int g_mirror_update_disk(struct g_mirror_disk *disk, u_int state);
114static void g_mirror_update_device(struct g_mirror_softc *sc, bool force);
115static void g_mirror_dumpconf(struct sbuf *sb, const char *indent,
116    struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp);
117static void g_mirror_timeout_drain(struct g_mirror_softc *sc);
118static int g_mirror_refresh_device(struct g_mirror_softc *sc,
119    const struct g_provider *pp, const struct g_mirror_metadata *md);
120static void g_mirror_sync_reinit(const struct g_mirror_disk *disk,
121    struct bio *bp, off_t offset);
122static void g_mirror_sync_stop(struct g_mirror_disk *disk, int type);
123static void g_mirror_register_request(struct g_mirror_softc *sc,
124    struct bio *bp);
125static void g_mirror_sync_release(struct g_mirror_softc *sc);
126
127static const char *
128g_mirror_disk_state2str(int state)
129{
130
131	switch (state) {
132	case G_MIRROR_DISK_STATE_NONE:
133		return ("NONE");
134	case G_MIRROR_DISK_STATE_NEW:
135		return ("NEW");
136	case G_MIRROR_DISK_STATE_ACTIVE:
137		return ("ACTIVE");
138	case G_MIRROR_DISK_STATE_STALE:
139		return ("STALE");
140	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
141		return ("SYNCHRONIZING");
142	case G_MIRROR_DISK_STATE_DISCONNECTED:
143		return ("DISCONNECTED");
144	case G_MIRROR_DISK_STATE_DESTROY:
145		return ("DESTROY");
146	default:
147		return ("INVALID");
148	}
149}
150
151static const char *
152g_mirror_device_state2str(int state)
153{
154
155	switch (state) {
156	case G_MIRROR_DEVICE_STATE_STARTING:
157		return ("STARTING");
158	case G_MIRROR_DEVICE_STATE_RUNNING:
159		return ("RUNNING");
160	default:
161		return ("INVALID");
162	}
163}
164
165static const char *
166g_mirror_get_diskname(struct g_mirror_disk *disk)
167{
168
169	if (disk->d_consumer == NULL || disk->d_consumer->provider == NULL)
170		return ("[unknown]");
171	return (disk->d_name);
172}
173
174/*
175 * --- Events handling functions ---
176 * Events in geom_mirror are used to maintain disks and device status
177 * from one thread to simplify locking.
178 */
179static void
180g_mirror_event_free(struct g_mirror_event *ep)
181{
182
183	free(ep, M_MIRROR);
184}
185
186static int
187g_mirror_event_dispatch(struct g_mirror_event *ep, void *arg, int state,
188    int flags)
189{
190	struct g_mirror_softc *sc;
191	struct g_mirror_disk *disk;
192	int error;
193
194	G_MIRROR_DEBUG(4, "%s: Sending event %p.", __func__, ep);
195	if ((flags & G_MIRROR_EVENT_DEVICE) != 0) {
196		disk = NULL;
197		sc = arg;
198	} else {
199		disk = arg;
200		sc = disk->d_softc;
201	}
202	ep->e_disk = disk;
203	ep->e_state = state;
204	ep->e_flags = flags;
205	ep->e_error = 0;
206	mtx_lock(&sc->sc_events_mtx);
207	TAILQ_INSERT_TAIL(&sc->sc_events, ep, e_next);
208	mtx_unlock(&sc->sc_events_mtx);
209	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
210	mtx_lock(&sc->sc_queue_mtx);
211	wakeup(sc);
212	mtx_unlock(&sc->sc_queue_mtx);
213	if ((flags & G_MIRROR_EVENT_DONTWAIT) != 0)
214		return (0);
215	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, ep);
216	sx_xunlock(&sc->sc_lock);
217	while ((ep->e_flags & G_MIRROR_EVENT_DONE) == 0) {
218		mtx_lock(&sc->sc_events_mtx);
219		MSLEEP(ep, &sc->sc_events_mtx, PRIBIO | PDROP, "m:event",
220		    hz * 5);
221	}
222	error = ep->e_error;
223	g_mirror_event_free(ep);
224	sx_xlock(&sc->sc_lock);
225	return (error);
226}
227
228int
229g_mirror_event_send(void *arg, int state, int flags)
230{
231	struct g_mirror_event *ep;
232
233	ep = malloc(sizeof(*ep), M_MIRROR, M_WAITOK);
234	return (g_mirror_event_dispatch(ep, arg, state, flags));
235}
236
237static struct g_mirror_event *
238g_mirror_event_first(struct g_mirror_softc *sc)
239{
240	struct g_mirror_event *ep;
241
242	mtx_lock(&sc->sc_events_mtx);
243	ep = TAILQ_FIRST(&sc->sc_events);
244	mtx_unlock(&sc->sc_events_mtx);
245	return (ep);
246}
247
248static void
249g_mirror_event_remove(struct g_mirror_softc *sc, struct g_mirror_event *ep)
250{
251
252	mtx_lock(&sc->sc_events_mtx);
253	TAILQ_REMOVE(&sc->sc_events, ep, e_next);
254	mtx_unlock(&sc->sc_events_mtx);
255}
256
257static void
258g_mirror_event_cancel(struct g_mirror_disk *disk)
259{
260	struct g_mirror_softc *sc;
261	struct g_mirror_event *ep, *tmpep;
262
263	sc = disk->d_softc;
264	sx_assert(&sc->sc_lock, SX_XLOCKED);
265
266	mtx_lock(&sc->sc_events_mtx);
267	TAILQ_FOREACH_SAFE(ep, &sc->sc_events, e_next, tmpep) {
268		if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0)
269			continue;
270		if (ep->e_disk != disk)
271			continue;
272		TAILQ_REMOVE(&sc->sc_events, ep, e_next);
273		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
274			g_mirror_event_free(ep);
275		else {
276			ep->e_error = ECANCELED;
277			wakeup(ep);
278		}
279	}
280	mtx_unlock(&sc->sc_events_mtx);
281}
282
283/*
284 * Return the number of disks in given state.
285 * If state is equal to -1, count all connected disks.
286 */
287u_int
288g_mirror_ndisks(struct g_mirror_softc *sc, int state)
289{
290	struct g_mirror_disk *disk;
291	u_int n = 0;
292
293	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
294		if (state == -1 || disk->d_state == state)
295			n++;
296	}
297	return (n);
298}
299
300/*
301 * Find a disk in mirror by its disk ID.
302 */
303static struct g_mirror_disk *
304g_mirror_id2disk(struct g_mirror_softc *sc, uint32_t id)
305{
306	struct g_mirror_disk *disk;
307
308	sx_assert(&sc->sc_lock, SX_XLOCKED);
309
310	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
311		if (disk->d_id == id)
312			return (disk);
313	}
314	return (NULL);
315}
316
317static u_int
318g_mirror_nrequests(struct g_mirror_softc *sc, struct g_consumer *cp)
319{
320	struct bio *bp;
321	u_int nreqs = 0;
322
323	mtx_lock(&sc->sc_queue_mtx);
324	TAILQ_FOREACH(bp, &sc->sc_queue, bio_queue) {
325		if (bp->bio_from == cp)
326			nreqs++;
327	}
328	mtx_unlock(&sc->sc_queue_mtx);
329	return (nreqs);
330}
331
332static int
333g_mirror_is_busy(struct g_mirror_softc *sc, struct g_consumer *cp)
334{
335
336	if (cp->index > 0) {
337		G_MIRROR_DEBUG(2,
338		    "I/O requests for %s exist, can't destroy it now.",
339		    cp->provider->name);
340		return (1);
341	}
342	if (g_mirror_nrequests(sc, cp) > 0) {
343		G_MIRROR_DEBUG(2,
344		    "I/O requests for %s in queue, can't destroy it now.",
345		    cp->provider->name);
346		return (1);
347	}
348	return (0);
349}
350
351static void
352g_mirror_destroy_consumer(void *arg, int flags __unused)
353{
354	struct g_consumer *cp;
355
356	g_topology_assert();
357
358	cp = arg;
359	G_MIRROR_DEBUG(1, "Consumer %s destroyed.", cp->provider->name);
360	g_detach(cp);
361	g_destroy_consumer(cp);
362}
363
364static void
365g_mirror_kill_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
366{
367	struct g_provider *pp;
368	int retaste_wait;
369
370	g_topology_assert();
371
372	cp->private = NULL;
373	if (g_mirror_is_busy(sc, cp))
374		return;
375	pp = cp->provider;
376	retaste_wait = 0;
377	if (cp->acw == 1) {
378		if ((pp->geom->flags & G_GEOM_WITHER) == 0)
379			retaste_wait = 1;
380	}
381	G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", pp->name, -cp->acr,
382	    -cp->acw, -cp->ace, 0);
383	if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
384		g_access(cp, -cp->acr, -cp->acw, -cp->ace);
385	if (retaste_wait) {
386		/*
387		 * After retaste event was send (inside g_access()), we can send
388		 * event to detach and destroy consumer.
389		 * A class, which has consumer to the given provider connected
390		 * will not receive retaste event for the provider.
391		 * This is the way how I ignore retaste events when I close
392		 * consumers opened for write: I detach and destroy consumer
393		 * after retaste event is sent.
394		 */
395		g_post_event(g_mirror_destroy_consumer, cp, M_WAITOK, NULL);
396		return;
397	}
398	G_MIRROR_DEBUG(1, "Consumer %s destroyed.", pp->name);
399	g_detach(cp);
400	g_destroy_consumer(cp);
401}
402
403static int
404g_mirror_connect_disk(struct g_mirror_disk *disk, struct g_provider *pp)
405{
406	struct g_consumer *cp;
407	int error;
408
409	g_topology_assert_not();
410	KASSERT(disk->d_consumer == NULL,
411	    ("Disk already connected (device %s).", disk->d_softc->sc_name));
412
413	g_topology_lock();
414	cp = g_new_consumer(disk->d_softc->sc_geom);
415	cp->flags |= G_CF_DIRECT_RECEIVE;
416	error = g_attach(cp, pp);
417	if (error != 0) {
418		g_destroy_consumer(cp);
419		g_topology_unlock();
420		return (error);
421	}
422	error = g_access(cp, 1, 1, 1);
423	if (error != 0) {
424		g_detach(cp);
425		g_destroy_consumer(cp);
426		g_topology_unlock();
427		G_MIRROR_DEBUG(0, "Cannot open consumer %s (error=%d).",
428		    pp->name, error);
429		return (error);
430	}
431	g_topology_unlock();
432	disk->d_consumer = cp;
433	disk->d_consumer->private = disk;
434	disk->d_consumer->index = 0;
435
436	G_MIRROR_DEBUG(2, "Disk %s connected.", g_mirror_get_diskname(disk));
437	return (0);
438}
439
440static void
441g_mirror_disconnect_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
442{
443
444	g_topology_assert();
445
446	if (cp == NULL)
447		return;
448	if (cp->provider != NULL)
449		g_mirror_kill_consumer(sc, cp);
450	else
451		g_destroy_consumer(cp);
452}
453
454/*
455 * Initialize disk. This means allocate memory, create consumer, attach it
456 * to the provider and open access (r1w1e1) to it.
457 */
458static struct g_mirror_disk *
459g_mirror_init_disk(struct g_mirror_softc *sc, struct g_provider *pp,
460    struct g_mirror_metadata *md, int *errorp)
461{
462	struct g_mirror_disk *disk;
463	int i, error;
464
465	disk = malloc(sizeof(*disk), M_MIRROR, M_NOWAIT | M_ZERO);
466	if (disk == NULL) {
467		error = ENOMEM;
468		goto fail;
469	}
470	disk->d_softc = sc;
471	error = g_mirror_connect_disk(disk, pp);
472	if (error != 0)
473		goto fail;
474	disk->d_id = md->md_did;
475	disk->d_state = G_MIRROR_DISK_STATE_NONE;
476	disk->d_priority = md->md_priority;
477	disk->d_flags = md->md_dflags;
478	error = g_getattr("GEOM::candelete", disk->d_consumer, &i);
479	if (error == 0 && i != 0)
480		disk->d_flags |= G_MIRROR_DISK_FLAG_CANDELETE;
481	error = g_getattr("GEOM::rotation_rate", disk->d_consumer,
482		&disk->d_rotation_rate);
483	if (error)
484		disk->d_rotation_rate = DISK_RR_UNKNOWN;
485	if (md->md_provider[0] != '\0')
486		disk->d_flags |= G_MIRROR_DISK_FLAG_HARDCODED;
487	disk->d_sync.ds_consumer = NULL;
488	disk->d_sync.ds_offset = md->md_sync_offset;
489	disk->d_sync.ds_offset_done = md->md_sync_offset;
490	disk->d_sync.ds_update_ts = time_uptime;
491	disk->d_genid = md->md_genid;
492	disk->d_sync.ds_syncid = md->md_syncid;
493	disk->d_init_ndisks = md->md_all;
494	disk->d_init_slice = md->md_slice;
495	disk->d_init_balance = md->md_balance;
496	disk->d_init_mediasize = md->md_mediasize;
497	if (errorp != NULL)
498		*errorp = 0;
499	return (disk);
500fail:
501	if (errorp != NULL)
502		*errorp = error;
503	if (disk != NULL)
504		free(disk, M_MIRROR);
505	return (NULL);
506}
507
508static void
509g_mirror_destroy_disk(struct g_mirror_disk *disk)
510{
511	struct g_mirror_softc *sc;
512
513	g_topology_assert_not();
514	sc = disk->d_softc;
515	sx_assert(&sc->sc_lock, SX_XLOCKED);
516
517	g_topology_lock();
518	LIST_REMOVE(disk, d_next);
519	g_topology_unlock();
520	g_mirror_event_cancel(disk);
521	if (sc->sc_hint == disk)
522		sc->sc_hint = NULL;
523	switch (disk->d_state) {
524	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
525		g_mirror_sync_stop(disk, 1);
526		/* FALLTHROUGH */
527	case G_MIRROR_DISK_STATE_NEW:
528	case G_MIRROR_DISK_STATE_STALE:
529	case G_MIRROR_DISK_STATE_ACTIVE:
530		g_topology_lock();
531		g_mirror_disconnect_consumer(sc, disk->d_consumer);
532		g_topology_unlock();
533		free(disk, M_MIRROR);
534		break;
535	default:
536		KASSERT(0 == 1, ("Wrong disk state (%s, %s).",
537		    g_mirror_get_diskname(disk),
538		    g_mirror_disk_state2str(disk->d_state)));
539	}
540}
541
542static void
543g_mirror_free_device(struct g_mirror_softc *sc)
544{
545
546	g_topology_assert();
547
548	mtx_destroy(&sc->sc_queue_mtx);
549	mtx_destroy(&sc->sc_events_mtx);
550	mtx_destroy(&sc->sc_done_mtx);
551	sx_destroy(&sc->sc_lock);
552	free(sc, M_MIRROR);
553}
554
555static void
556g_mirror_providergone(struct g_provider *pp)
557{
558	struct g_mirror_softc *sc = pp->private;
559
560	if ((--sc->sc_refcnt) == 0)
561		g_mirror_free_device(sc);
562}
563
564static void
565g_mirror_destroy_device(struct g_mirror_softc *sc)
566{
567	struct g_mirror_disk *disk;
568	struct g_mirror_event *ep;
569	struct g_geom *gp;
570	struct g_consumer *cp, *tmpcp;
571
572	g_topology_assert_not();
573	sx_assert(&sc->sc_lock, SX_XLOCKED);
574
575	gp = sc->sc_geom;
576	if (sc->sc_provider != NULL)
577		g_mirror_destroy_provider(sc);
578	for (disk = LIST_FIRST(&sc->sc_disks); disk != NULL;
579	    disk = LIST_FIRST(&sc->sc_disks)) {
580		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
581		g_mirror_update_metadata(disk);
582		g_mirror_destroy_disk(disk);
583	}
584	while ((ep = g_mirror_event_first(sc)) != NULL) {
585		g_mirror_event_remove(sc, ep);
586		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
587			g_mirror_event_free(ep);
588		else {
589			ep->e_error = ECANCELED;
590			ep->e_flags |= G_MIRROR_EVENT_DONE;
591			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, ep);
592			mtx_lock(&sc->sc_events_mtx);
593			wakeup(ep);
594			mtx_unlock(&sc->sc_events_mtx);
595		}
596	}
597	g_mirror_timeout_drain(sc);
598
599	g_topology_lock();
600	LIST_FOREACH_SAFE(cp, &sc->sc_sync.ds_geom->consumer, consumer, tmpcp) {
601		g_mirror_disconnect_consumer(sc, cp);
602	}
603	g_wither_geom(sc->sc_sync.ds_geom, ENXIO);
604	G_MIRROR_DEBUG(0, "Device %s destroyed.", gp->name);
605	g_wither_geom(gp, ENXIO);
606	sx_xunlock(&sc->sc_lock);
607	if ((--sc->sc_refcnt) == 0)
608		g_mirror_free_device(sc);
609	g_topology_unlock();
610}
611
612static void
613g_mirror_orphan(struct g_consumer *cp)
614{
615	struct g_mirror_disk *disk;
616
617	g_topology_assert();
618
619	disk = cp->private;
620	if (disk == NULL)
621		return;
622	disk->d_softc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
623	g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
624	    G_MIRROR_EVENT_DONTWAIT);
625}
626
627/*
628 * Function should return the next active disk on the list.
629 * It is possible that it will be the same disk as given.
630 * If there are no active disks on list, NULL is returned.
631 */
632static __inline struct g_mirror_disk *
633g_mirror_find_next(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
634{
635	struct g_mirror_disk *dp;
636
637	for (dp = LIST_NEXT(disk, d_next); dp != disk;
638	    dp = LIST_NEXT(dp, d_next)) {
639		if (dp == NULL)
640			dp = LIST_FIRST(&sc->sc_disks);
641		if (dp->d_state == G_MIRROR_DISK_STATE_ACTIVE)
642			break;
643	}
644	if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
645		return (NULL);
646	return (dp);
647}
648
649static struct g_mirror_disk *
650g_mirror_get_disk(struct g_mirror_softc *sc)
651{
652	struct g_mirror_disk *disk;
653
654	if (sc->sc_hint == NULL) {
655		sc->sc_hint = LIST_FIRST(&sc->sc_disks);
656		if (sc->sc_hint == NULL)
657			return (NULL);
658	}
659	disk = sc->sc_hint;
660	if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) {
661		disk = g_mirror_find_next(sc, disk);
662		if (disk == NULL)
663			return (NULL);
664	}
665	sc->sc_hint = g_mirror_find_next(sc, disk);
666	return (disk);
667}
668
669static int
670g_mirror_write_metadata(struct g_mirror_disk *disk,
671    struct g_mirror_metadata *md)
672{
673	struct g_mirror_softc *sc;
674	struct g_consumer *cp;
675	off_t offset, length;
676	u_char *sector;
677	int error = 0;
678
679	g_topology_assert_not();
680	sc = disk->d_softc;
681	sx_assert(&sc->sc_lock, SX_LOCKED);
682
683	cp = disk->d_consumer;
684	KASSERT(cp != NULL, ("NULL consumer (%s).", sc->sc_name));
685	KASSERT(cp->provider != NULL, ("NULL provider (%s).", sc->sc_name));
686	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
687	    ("Consumer %s closed? (r%dw%de%d).", cp->provider->name, cp->acr,
688	    cp->acw, cp->ace));
689	length = cp->provider->sectorsize;
690	offset = cp->provider->mediasize - length;
691	sector = malloc((size_t)length, M_MIRROR, M_WAITOK | M_ZERO);
692	if (md != NULL &&
693	    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_WIPE) == 0) {
694		/*
695		 * Handle the case, when the size of parent provider reduced.
696		 */
697		if (offset < md->md_mediasize)
698			error = ENOSPC;
699		else
700			mirror_metadata_encode(md, sector);
701	}
702	KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_metadata_write, error);
703	if (error == 0)
704		error = g_write_data(cp, offset, sector, length);
705	free(sector, M_MIRROR);
706	if (error != 0) {
707		if ((disk->d_flags & G_MIRROR_DISK_FLAG_BROKEN) == 0) {
708			disk->d_flags |= G_MIRROR_DISK_FLAG_BROKEN;
709			G_MIRROR_DEBUG(0, "Cannot write metadata on %s "
710			    "(device=%s, error=%d).",
711			    g_mirror_get_diskname(disk), sc->sc_name, error);
712		} else {
713			G_MIRROR_DEBUG(1, "Cannot write metadata on %s "
714			    "(device=%s, error=%d).",
715			    g_mirror_get_diskname(disk), sc->sc_name, error);
716		}
717		if (g_mirror_disconnect_on_failure &&
718		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 1) {
719			sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
720			g_mirror_event_send(disk,
721			    G_MIRROR_DISK_STATE_DISCONNECTED,
722			    G_MIRROR_EVENT_DONTWAIT);
723		}
724	}
725	return (error);
726}
727
728static int
729g_mirror_clear_metadata(struct g_mirror_disk *disk)
730{
731	int error;
732
733	g_topology_assert_not();
734	sx_assert(&disk->d_softc->sc_lock, SX_LOCKED);
735
736	if (disk->d_softc->sc_type != G_MIRROR_TYPE_AUTOMATIC)
737		return (0);
738	error = g_mirror_write_metadata(disk, NULL);
739	if (error == 0) {
740		G_MIRROR_DEBUG(2, "Metadata on %s cleared.",
741		    g_mirror_get_diskname(disk));
742	} else {
743		G_MIRROR_DEBUG(0,
744		    "Cannot clear metadata on disk %s (error=%d).",
745		    g_mirror_get_diskname(disk), error);
746	}
747	return (error);
748}
749
750void
751g_mirror_fill_metadata(struct g_mirror_softc *sc, struct g_mirror_disk *disk,
752    struct g_mirror_metadata *md)
753{
754
755	bzero(md, sizeof(*md));
756	strlcpy(md->md_magic, G_MIRROR_MAGIC, sizeof(md->md_magic));
757	md->md_version = G_MIRROR_VERSION;
758	strlcpy(md->md_name, sc->sc_name, sizeof(md->md_name));
759	md->md_mid = sc->sc_id;
760	md->md_all = sc->sc_ndisks;
761	md->md_slice = sc->sc_slice;
762	md->md_balance = sc->sc_balance;
763	md->md_genid = sc->sc_genid;
764	md->md_mediasize = sc->sc_mediasize;
765	md->md_sectorsize = sc->sc_sectorsize;
766	md->md_mflags = (sc->sc_flags & G_MIRROR_DEVICE_FLAG_MASK);
767	if (disk == NULL) {
768		md->md_did = arc4random();
769	} else {
770		md->md_did = disk->d_id;
771		md->md_priority = disk->d_priority;
772		md->md_syncid = disk->d_sync.ds_syncid;
773		md->md_dflags = (disk->d_flags & G_MIRROR_DISK_FLAG_MASK);
774		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
775			md->md_sync_offset = disk->d_sync.ds_offset_done;
776		if ((disk->d_flags & G_MIRROR_DISK_FLAG_HARDCODED) != 0) {
777			strlcpy(md->md_provider,
778			    disk->d_consumer->provider->name,
779			    sizeof(md->md_provider));
780		}
781		md->md_provsize = disk->d_consumer->provider->mediasize;
782	}
783}
784
785void
786g_mirror_update_metadata(struct g_mirror_disk *disk)
787{
788	struct g_mirror_softc *sc;
789	struct g_mirror_metadata md;
790	int error;
791
792	g_topology_assert_not();
793	sc = disk->d_softc;
794	sx_assert(&sc->sc_lock, SX_LOCKED);
795
796	if (sc->sc_type != G_MIRROR_TYPE_AUTOMATIC)
797		return;
798	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_WIPE) == 0)
799		g_mirror_fill_metadata(sc, disk, &md);
800	error = g_mirror_write_metadata(disk, &md);
801	if (error == 0) {
802		G_MIRROR_DEBUG(2, "Metadata on %s updated.",
803		    g_mirror_get_diskname(disk));
804	} else {
805		G_MIRROR_DEBUG(0,
806		    "Cannot update metadata on disk %s (error=%d).",
807		    g_mirror_get_diskname(disk), error);
808	}
809}
810
811static void
812g_mirror_bump_syncid(struct g_mirror_softc *sc)
813{
814	struct g_mirror_disk *disk;
815
816	g_topology_assert_not();
817	sx_assert(&sc->sc_lock, SX_XLOCKED);
818	KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
819	    ("%s called with no active disks (device=%s).", __func__,
820	    sc->sc_name));
821
822	sc->sc_syncid++;
823	G_MIRROR_DEBUG(1, "Device %s: syncid bumped to %u.", sc->sc_name,
824	    sc->sc_syncid);
825	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
826		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
827		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
828			disk->d_sync.ds_syncid = sc->sc_syncid;
829			g_mirror_update_metadata(disk);
830		}
831	}
832}
833
834static void
835g_mirror_bump_genid(struct g_mirror_softc *sc)
836{
837	struct g_mirror_disk *disk;
838
839	g_topology_assert_not();
840	sx_assert(&sc->sc_lock, SX_XLOCKED);
841	KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
842	    ("%s called with no active disks (device=%s).", __func__,
843	    sc->sc_name));
844
845	sc->sc_genid++;
846	G_MIRROR_DEBUG(1, "Device %s: genid bumped to %u.", sc->sc_name,
847	    sc->sc_genid);
848	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
849		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
850		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
851			disk->d_genid = sc->sc_genid;
852			g_mirror_update_metadata(disk);
853		}
854	}
855}
856
857static int
858g_mirror_idle(struct g_mirror_softc *sc, int acw)
859{
860	struct g_mirror_disk *disk;
861	int timeout;
862
863	g_topology_assert_not();
864	sx_assert(&sc->sc_lock, SX_XLOCKED);
865
866	if (sc->sc_provider == NULL)
867		return (0);
868	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
869		return (0);
870	if (sc->sc_idle)
871		return (0);
872	if (sc->sc_writes > 0)
873		return (0);
874	if (acw > 0 || (acw == -1 && sc->sc_provider->acw > 0)) {
875		timeout = g_mirror_idletime - (time_uptime - sc->sc_last_write);
876		if (!g_mirror_shutdown && timeout > 0)
877			return (timeout);
878	}
879	sc->sc_idle = 1;
880	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
881		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
882			continue;
883		G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as clean.",
884		    g_mirror_get_diskname(disk), sc->sc_name);
885		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
886		g_mirror_update_metadata(disk);
887	}
888	return (0);
889}
890
891static void
892g_mirror_unidle(struct g_mirror_softc *sc)
893{
894	struct g_mirror_disk *disk;
895
896	g_topology_assert_not();
897	sx_assert(&sc->sc_lock, SX_XLOCKED);
898
899	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
900		return;
901	sc->sc_idle = 0;
902	sc->sc_last_write = time_uptime;
903	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
904		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
905			continue;
906		G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as dirty.",
907		    g_mirror_get_diskname(disk), sc->sc_name);
908		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
909		g_mirror_update_metadata(disk);
910	}
911}
912
913static void
914g_mirror_done(struct bio *bp)
915{
916	struct g_mirror_softc *sc;
917
918	sc = bp->bio_from->geom->softc;
919	bp->bio_cflags = G_MIRROR_BIO_FLAG_REGULAR;
920	mtx_lock(&sc->sc_queue_mtx);
921	TAILQ_INSERT_TAIL(&sc->sc_queue, bp, bio_queue);
922	mtx_unlock(&sc->sc_queue_mtx);
923	wakeup(sc);
924}
925
926static void
927g_mirror_regular_request_error(struct g_mirror_softc *sc,
928    struct g_mirror_disk *disk, struct bio *bp)
929{
930
931	if ((bp->bio_cmd == BIO_FLUSH || bp->bio_cmd == BIO_SPEEDUP) &&
932	    bp->bio_error == EOPNOTSUPP)
933		return;
934
935	if ((disk->d_flags & G_MIRROR_DISK_FLAG_BROKEN) == 0) {
936		disk->d_flags |= G_MIRROR_DISK_FLAG_BROKEN;
937		G_MIRROR_LOGREQ(0, bp, "Request failed (error=%d).",
938		    bp->bio_error);
939	} else {
940		G_MIRROR_LOGREQ(1, bp, "Request failed (error=%d).",
941		    bp->bio_error);
942	}
943	if (g_mirror_disconnect_on_failure &&
944	    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 1) {
945		if (bp->bio_error == ENXIO &&
946		    bp->bio_cmd == BIO_READ)
947			sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
948		else if (bp->bio_error == ENXIO)
949			sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID_NOW;
950		else
951			sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
952		g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
953		    G_MIRROR_EVENT_DONTWAIT);
954	}
955}
956
957static void
958g_mirror_regular_request(struct g_mirror_softc *sc, struct bio *bp)
959{
960	struct g_mirror_disk *disk;
961	struct bio *pbp;
962
963	g_topology_assert_not();
964	KASSERT(sc->sc_provider == bp->bio_parent->bio_to,
965	    ("regular request %p with unexpected origin", bp));
966
967	pbp = bp->bio_parent;
968	bp->bio_from->index--;
969	if (bp->bio_cmd == BIO_WRITE || bp->bio_cmd == BIO_DELETE)
970		sc->sc_writes--;
971	disk = bp->bio_from->private;
972	if (disk == NULL) {
973		g_topology_lock();
974		g_mirror_kill_consumer(sc, bp->bio_from);
975		g_topology_unlock();
976	}
977
978	switch (bp->bio_cmd) {
979	case BIO_READ:
980		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_read,
981		    bp->bio_error);
982		break;
983	case BIO_WRITE:
984		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_write,
985		    bp->bio_error);
986		break;
987	case BIO_DELETE:
988		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_delete,
989		    bp->bio_error);
990		break;
991	case BIO_FLUSH:
992		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_flush,
993		    bp->bio_error);
994		break;
995	case BIO_SPEEDUP:
996		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_speedup,
997		    bp->bio_error);
998		break;
999	}
1000
1001	pbp->bio_inbed++;
1002	KASSERT(pbp->bio_inbed <= pbp->bio_children,
1003	    ("bio_inbed (%u) is bigger than bio_children (%u).", pbp->bio_inbed,
1004	    pbp->bio_children));
1005	if (bp->bio_error == 0 && pbp->bio_error == 0) {
1006		G_MIRROR_LOGREQ(3, bp, "Request delivered.");
1007		g_destroy_bio(bp);
1008		if (pbp->bio_children == pbp->bio_inbed) {
1009			G_MIRROR_LOGREQ(3, pbp, "Request delivered.");
1010			pbp->bio_completed = pbp->bio_length;
1011			if (pbp->bio_cmd == BIO_WRITE ||
1012			    pbp->bio_cmd == BIO_DELETE) {
1013				TAILQ_REMOVE(&sc->sc_inflight, pbp, bio_queue);
1014				/* Release delayed sync requests if possible. */
1015				g_mirror_sync_release(sc);
1016			}
1017			g_io_deliver(pbp, pbp->bio_error);
1018		}
1019		return;
1020	} else if (bp->bio_error != 0) {
1021		if (pbp->bio_error == 0)
1022			pbp->bio_error = bp->bio_error;
1023		if (disk != NULL)
1024			g_mirror_regular_request_error(sc, disk, bp);
1025		switch (pbp->bio_cmd) {
1026		case BIO_DELETE:
1027		case BIO_WRITE:
1028		case BIO_FLUSH:
1029		case BIO_SPEEDUP:
1030			pbp->bio_inbed--;
1031			pbp->bio_children--;
1032			break;
1033		}
1034	}
1035	g_destroy_bio(bp);
1036
1037	switch (pbp->bio_cmd) {
1038	case BIO_READ:
1039		if (pbp->bio_inbed < pbp->bio_children)
1040			break;
1041
1042		/*
1043		 * If there is only one active disk we want to double-check that
1044		 * it is, in fact, the disk that we already tried.  This is
1045		 * necessary because we might have just lost a race with a
1046		 * removal of the tried disk (likely because of the same error)
1047		 * and the only remaining disk is still viable for a retry.
1048		 */
1049		if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 1 &&
1050		    disk != NULL &&
1051		    disk->d_state == G_MIRROR_DISK_STATE_ACTIVE) {
1052			g_io_deliver(pbp, pbp->bio_error);
1053		} else {
1054			pbp->bio_error = 0;
1055			mtx_lock(&sc->sc_queue_mtx);
1056			TAILQ_INSERT_TAIL(&sc->sc_queue, pbp, bio_queue);
1057			mtx_unlock(&sc->sc_queue_mtx);
1058			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
1059			wakeup(sc);
1060		}
1061		break;
1062	case BIO_DELETE:
1063	case BIO_WRITE:
1064	case BIO_FLUSH:
1065	case BIO_SPEEDUP:
1066		if (pbp->bio_children == 0) {
1067			/*
1068			 * All requests failed.
1069			 */
1070		} else if (pbp->bio_inbed < pbp->bio_children) {
1071			/* Do nothing. */
1072			break;
1073		} else if (pbp->bio_children == pbp->bio_inbed) {
1074			/* Some requests succeeded. */
1075			pbp->bio_error = 0;
1076			pbp->bio_completed = pbp->bio_length;
1077		}
1078		if (pbp->bio_cmd == BIO_WRITE || pbp->bio_cmd == BIO_DELETE) {
1079			TAILQ_REMOVE(&sc->sc_inflight, pbp, bio_queue);
1080			/* Release delayed sync requests if possible. */
1081			g_mirror_sync_release(sc);
1082		}
1083		g_io_deliver(pbp, pbp->bio_error);
1084		break;
1085	default:
1086		KASSERT(1 == 0, ("Invalid request: %u.", pbp->bio_cmd));
1087		break;
1088	}
1089}
1090
1091static void
1092g_mirror_sync_done(struct bio *bp)
1093{
1094	struct g_mirror_softc *sc;
1095
1096	G_MIRROR_LOGREQ(3, bp, "Synchronization request delivered.");
1097	sc = bp->bio_from->geom->softc;
1098	bp->bio_cflags = G_MIRROR_BIO_FLAG_SYNC;
1099	mtx_lock(&sc->sc_queue_mtx);
1100	TAILQ_INSERT_TAIL(&sc->sc_queue, bp, bio_queue);
1101	mtx_unlock(&sc->sc_queue_mtx);
1102	wakeup(sc);
1103}
1104
1105static void
1106g_mirror_candelete(struct bio *bp)
1107{
1108	struct g_mirror_softc *sc;
1109	struct g_mirror_disk *disk;
1110	int val;
1111
1112	sc = bp->bio_to->private;
1113	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1114		if (disk->d_flags & G_MIRROR_DISK_FLAG_CANDELETE)
1115			break;
1116	}
1117	val = disk != NULL;
1118	g_handleattr(bp, "GEOM::candelete", &val, sizeof(val));
1119}
1120
1121static void
1122g_mirror_kernel_dump(struct bio *bp)
1123{
1124	struct g_mirror_softc *sc;
1125	struct g_mirror_disk *disk;
1126	struct bio *cbp;
1127	struct g_kerneldump *gkd;
1128
1129	/*
1130	 * We configure dumping to the first component, because this component
1131	 * will be used for reading with 'prefer' balance algorithm.
1132	 * If the component with the highest priority is currently disconnected
1133	 * we will not be able to read the dump after the reboot if it will be
1134	 * connected and synchronized later. Can we do something better?
1135	 */
1136	sc = bp->bio_to->private;
1137	disk = LIST_FIRST(&sc->sc_disks);
1138
1139	gkd = (struct g_kerneldump *)bp->bio_data;
1140	if (gkd->length > bp->bio_to->mediasize)
1141		gkd->length = bp->bio_to->mediasize;
1142	cbp = g_clone_bio(bp);
1143	if (cbp == NULL) {
1144		g_io_deliver(bp, ENOMEM);
1145		return;
1146	}
1147	cbp->bio_done = g_std_done;
1148	g_io_request(cbp, disk->d_consumer);
1149	G_MIRROR_DEBUG(1, "Kernel dump will go to %s.",
1150	    g_mirror_get_diskname(disk));
1151}
1152
1153static void
1154g_mirror_rotation_rate(struct bio *bp)
1155{
1156	struct g_mirror_softc *sc;
1157	struct g_mirror_disk *disk;
1158	bool first = true;
1159	uint16_t rr = DISK_RR_UNKNOWN;
1160
1161	sc = bp->bio_to->private;
1162	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1163		if (first)
1164			rr = disk->d_rotation_rate;
1165		else if (rr != disk->d_rotation_rate) {
1166			rr = DISK_RR_UNKNOWN;
1167			break;
1168		}
1169		first = false;
1170	}
1171	g_handleattr(bp, "GEOM::rotation_rate", &rr, sizeof(rr));
1172}
1173
1174static void
1175g_mirror_start(struct bio *bp)
1176{
1177	struct g_mirror_softc *sc;
1178
1179	sc = bp->bio_to->private;
1180	/*
1181	 * If sc == NULL or there are no valid disks, provider's error
1182	 * should be set and g_mirror_start() should not be called at all.
1183	 */
1184	KASSERT(sc != NULL && sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
1185	    ("Provider's error should be set (error=%d)(mirror=%s).",
1186	    bp->bio_to->error, bp->bio_to->name));
1187	G_MIRROR_LOGREQ(3, bp, "Request received.");
1188
1189	switch (bp->bio_cmd) {
1190	case BIO_READ:
1191	case BIO_WRITE:
1192	case BIO_DELETE:
1193	case BIO_SPEEDUP:
1194	case BIO_FLUSH:
1195		break;
1196	case BIO_GETATTR:
1197		if (!strcmp(bp->bio_attribute, "GEOM::candelete")) {
1198			g_mirror_candelete(bp);
1199			return;
1200		} else if (strcmp("GEOM::kerneldump", bp->bio_attribute) == 0) {
1201			g_mirror_kernel_dump(bp);
1202			return;
1203		} else if (!strcmp(bp->bio_attribute, "GEOM::rotation_rate")) {
1204			g_mirror_rotation_rate(bp);
1205			return;
1206		}
1207		/* FALLTHROUGH */
1208	default:
1209		g_io_deliver(bp, EOPNOTSUPP);
1210		return;
1211	}
1212	mtx_lock(&sc->sc_queue_mtx);
1213	if (bp->bio_to->error != 0) {
1214		mtx_unlock(&sc->sc_queue_mtx);
1215		g_io_deliver(bp, bp->bio_to->error);
1216		return;
1217	}
1218	TAILQ_INSERT_TAIL(&sc->sc_queue, bp, bio_queue);
1219	mtx_unlock(&sc->sc_queue_mtx);
1220	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
1221	wakeup(sc);
1222}
1223
1224/*
1225 * Return TRUE if the given request is colliding with a in-progress
1226 * synchronization request.
1227 */
1228static bool
1229g_mirror_sync_collision(struct g_mirror_softc *sc, struct bio *bp)
1230{
1231	struct g_mirror_disk *disk;
1232	struct bio *sbp;
1233	off_t rstart, rend, sstart, send;
1234	u_int i;
1235
1236	if (sc->sc_sync.ds_ndisks == 0)
1237		return (false);
1238	rstart = bp->bio_offset;
1239	rend = bp->bio_offset + bp->bio_length;
1240	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1241		if (disk->d_state != G_MIRROR_DISK_STATE_SYNCHRONIZING)
1242			continue;
1243		for (i = 0; i < g_mirror_syncreqs; i++) {
1244			sbp = disk->d_sync.ds_bios[i];
1245			if (sbp == NULL)
1246				continue;
1247			sstart = sbp->bio_offset;
1248			send = sbp->bio_offset + sbp->bio_length;
1249			if (rend > sstart && rstart < send)
1250				return (true);
1251		}
1252	}
1253	return (false);
1254}
1255
1256/*
1257 * Return TRUE if the given sync request is colliding with a in-progress regular
1258 * request.
1259 */
1260static bool
1261g_mirror_regular_collision(struct g_mirror_softc *sc, struct bio *sbp)
1262{
1263	off_t rstart, rend, sstart, send;
1264	struct bio *bp;
1265
1266	if (sc->sc_sync.ds_ndisks == 0)
1267		return (false);
1268	sstart = sbp->bio_offset;
1269	send = sbp->bio_offset + sbp->bio_length;
1270	TAILQ_FOREACH(bp, &sc->sc_inflight, bio_queue) {
1271		rstart = bp->bio_offset;
1272		rend = bp->bio_offset + bp->bio_length;
1273		if (rend > sstart && rstart < send)
1274			return (true);
1275	}
1276	return (false);
1277}
1278
1279/*
1280 * Puts regular request onto delayed queue.
1281 */
1282static void
1283g_mirror_regular_delay(struct g_mirror_softc *sc, struct bio *bp)
1284{
1285
1286	G_MIRROR_LOGREQ(2, bp, "Delaying request.");
1287	TAILQ_INSERT_TAIL(&sc->sc_regular_delayed, bp, bio_queue);
1288}
1289
1290/*
1291 * Puts synchronization request onto delayed queue.
1292 */
1293static void
1294g_mirror_sync_delay(struct g_mirror_softc *sc, struct bio *bp)
1295{
1296
1297	G_MIRROR_LOGREQ(2, bp, "Delaying synchronization request.");
1298	TAILQ_INSERT_TAIL(&sc->sc_sync_delayed, bp, bio_queue);
1299}
1300
1301/*
1302 * Requeue delayed regular requests.
1303 */
1304static void
1305g_mirror_regular_release(struct g_mirror_softc *sc)
1306{
1307	struct bio *bp;
1308
1309	if ((bp = TAILQ_FIRST(&sc->sc_regular_delayed)) == NULL)
1310		return;
1311	if (g_mirror_sync_collision(sc, bp))
1312		return;
1313
1314	G_MIRROR_DEBUG(2, "Requeuing regular requests after collision.");
1315	mtx_lock(&sc->sc_queue_mtx);
1316	TAILQ_CONCAT(&sc->sc_regular_delayed, &sc->sc_queue, bio_queue);
1317	TAILQ_SWAP(&sc->sc_regular_delayed, &sc->sc_queue, bio, bio_queue);
1318	mtx_unlock(&sc->sc_queue_mtx);
1319}
1320
1321/*
1322 * Releases delayed sync requests which don't collide anymore with regular
1323 * requests.
1324 */
1325static void
1326g_mirror_sync_release(struct g_mirror_softc *sc)
1327{
1328	struct bio *bp, *bp2;
1329
1330	TAILQ_FOREACH_SAFE(bp, &sc->sc_sync_delayed, bio_queue, bp2) {
1331		if (g_mirror_regular_collision(sc, bp))
1332			continue;
1333		TAILQ_REMOVE(&sc->sc_sync_delayed, bp, bio_queue);
1334		G_MIRROR_LOGREQ(2, bp,
1335		    "Releasing delayed synchronization request.");
1336		g_io_request(bp, bp->bio_from);
1337	}
1338}
1339
1340/*
1341 * Free a synchronization request and clear its slot in the array.
1342 */
1343static void
1344g_mirror_sync_request_free(struct g_mirror_disk *disk, struct bio *bp)
1345{
1346	int idx;
1347
1348	if (disk != NULL && disk->d_sync.ds_bios != NULL) {
1349		idx = (int)(uintptr_t)bp->bio_caller1;
1350		KASSERT(disk->d_sync.ds_bios[idx] == bp,
1351		    ("unexpected sync BIO at %p:%d", disk, idx));
1352		disk->d_sync.ds_bios[idx] = NULL;
1353	}
1354	free(bp->bio_data, M_MIRROR);
1355	g_destroy_bio(bp);
1356}
1357
1358/*
1359 * Handle synchronization requests.
1360 * Every synchronization request is a two-step process: first, a read request is
1361 * sent to the mirror provider via the sync consumer. If that request completes
1362 * successfully, it is converted to a write and sent to the disk being
1363 * synchronized. If the write also completes successfully, the synchronization
1364 * offset is advanced and a new read request is submitted.
1365 */
1366static void
1367g_mirror_sync_request(struct g_mirror_softc *sc, struct bio *bp)
1368{
1369	struct g_mirror_disk *disk;
1370	struct g_mirror_disk_sync *sync;
1371
1372	KASSERT((bp->bio_cmd == BIO_READ &&
1373	    bp->bio_from->geom == sc->sc_sync.ds_geom) ||
1374	    (bp->bio_cmd == BIO_WRITE && bp->bio_from->geom == sc->sc_geom),
1375	    ("Sync BIO %p with unexpected origin", bp));
1376
1377	bp->bio_from->index--;
1378	disk = bp->bio_from->private;
1379	if (disk == NULL) {
1380		sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */
1381		g_topology_lock();
1382		g_mirror_kill_consumer(sc, bp->bio_from);
1383		g_topology_unlock();
1384		g_mirror_sync_request_free(NULL, bp);
1385		sx_xlock(&sc->sc_lock);
1386		return;
1387	}
1388
1389	sync = &disk->d_sync;
1390
1391	/*
1392	 * Synchronization request.
1393	 */
1394	switch (bp->bio_cmd) {
1395	case BIO_READ: {
1396		struct g_consumer *cp;
1397
1398		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_sync_request_read,
1399		    bp->bio_error);
1400
1401		if (bp->bio_error != 0) {
1402			G_MIRROR_LOGREQ(0, bp,
1403			    "Synchronization request failed (error=%d).",
1404			    bp->bio_error);
1405
1406			/*
1407			 * The read error will trigger a syncid bump, so there's
1408			 * no need to do that here.
1409			 *
1410			 * The read error handling for regular requests will
1411			 * retry the read from all active mirrors before passing
1412			 * the error back up, so there's no need to retry here.
1413			 */
1414			g_mirror_sync_request_free(disk, bp);
1415			g_mirror_event_send(disk,
1416			    G_MIRROR_DISK_STATE_DISCONNECTED,
1417			    G_MIRROR_EVENT_DONTWAIT);
1418			return;
1419		}
1420		G_MIRROR_LOGREQ(3, bp,
1421		    "Synchronization request half-finished.");
1422		bp->bio_cmd = BIO_WRITE;
1423		bp->bio_cflags = 0;
1424		cp = disk->d_consumer;
1425		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1426		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1427		    cp->acr, cp->acw, cp->ace));
1428		cp->index++;
1429		g_io_request(bp, cp);
1430		return;
1431	}
1432	case BIO_WRITE: {
1433		off_t offset;
1434		int i;
1435
1436		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_sync_request_write,
1437		    bp->bio_error);
1438
1439		if (bp->bio_error != 0) {
1440			G_MIRROR_LOGREQ(0, bp,
1441			    "Synchronization request failed (error=%d).",
1442			    bp->bio_error);
1443			g_mirror_sync_request_free(disk, bp);
1444			sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
1445			g_mirror_event_send(disk,
1446			    G_MIRROR_DISK_STATE_DISCONNECTED,
1447			    G_MIRROR_EVENT_DONTWAIT);
1448			return;
1449		}
1450		G_MIRROR_LOGREQ(3, bp, "Synchronization request finished.");
1451		if (sync->ds_offset >= sc->sc_mediasize ||
1452		    sync->ds_consumer == NULL ||
1453		    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1454			/* Don't send more synchronization requests. */
1455			sync->ds_inflight--;
1456			g_mirror_sync_request_free(disk, bp);
1457			if (sync->ds_inflight > 0)
1458				return;
1459			if (sync->ds_consumer == NULL ||
1460			    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1461				return;
1462			}
1463			/* Disk up-to-date, activate it. */
1464			g_mirror_event_send(disk, G_MIRROR_DISK_STATE_ACTIVE,
1465			    G_MIRROR_EVENT_DONTWAIT);
1466			return;
1467		}
1468
1469		/* Send next synchronization request. */
1470		g_mirror_sync_reinit(disk, bp, sync->ds_offset);
1471		sync->ds_offset += bp->bio_length;
1472
1473		G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
1474		sync->ds_consumer->index++;
1475
1476		/*
1477		 * Delay the request if it is colliding with a regular request.
1478		 */
1479		if (g_mirror_regular_collision(sc, bp))
1480			g_mirror_sync_delay(sc, bp);
1481		else
1482			g_io_request(bp, sync->ds_consumer);
1483
1484		/* Requeue delayed requests if possible. */
1485		g_mirror_regular_release(sc);
1486
1487		/* Find the smallest offset */
1488		offset = sc->sc_mediasize;
1489		for (i = 0; i < g_mirror_syncreqs; i++) {
1490			bp = sync->ds_bios[i];
1491			if (bp != NULL && bp->bio_offset < offset)
1492				offset = bp->bio_offset;
1493		}
1494		if (g_mirror_sync_period > 0 &&
1495		    time_uptime - sync->ds_update_ts > g_mirror_sync_period) {
1496			sync->ds_offset_done = offset;
1497			g_mirror_update_metadata(disk);
1498			sync->ds_update_ts = time_uptime;
1499		}
1500		return;
1501	}
1502	default:
1503		panic("Invalid I/O request %p", bp);
1504	}
1505}
1506
1507static void
1508g_mirror_request_prefer(struct g_mirror_softc *sc, struct bio *bp)
1509{
1510	struct g_mirror_disk *disk;
1511	struct g_consumer *cp;
1512	struct bio *cbp;
1513
1514	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1515		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE)
1516			break;
1517	}
1518	if (disk == NULL) {
1519		if (bp->bio_error == 0)
1520			bp->bio_error = ENXIO;
1521		g_io_deliver(bp, bp->bio_error);
1522		return;
1523	}
1524	cbp = g_clone_bio(bp);
1525	if (cbp == NULL) {
1526		if (bp->bio_error == 0)
1527			bp->bio_error = ENOMEM;
1528		g_io_deliver(bp, bp->bio_error);
1529		return;
1530	}
1531	/*
1532	 * Fill in the component buf structure.
1533	 */
1534	cp = disk->d_consumer;
1535	cbp->bio_done = g_mirror_done;
1536	cbp->bio_to = cp->provider;
1537	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1538	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1539	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1540	    cp->acw, cp->ace));
1541	cp->index++;
1542	g_io_request(cbp, cp);
1543}
1544
1545static void
1546g_mirror_request_round_robin(struct g_mirror_softc *sc, struct bio *bp)
1547{
1548	struct g_mirror_disk *disk;
1549	struct g_consumer *cp;
1550	struct bio *cbp;
1551
1552	disk = g_mirror_get_disk(sc);
1553	if (disk == NULL) {
1554		if (bp->bio_error == 0)
1555			bp->bio_error = ENXIO;
1556		g_io_deliver(bp, bp->bio_error);
1557		return;
1558	}
1559	cbp = g_clone_bio(bp);
1560	if (cbp == NULL) {
1561		if (bp->bio_error == 0)
1562			bp->bio_error = ENOMEM;
1563		g_io_deliver(bp, bp->bio_error);
1564		return;
1565	}
1566	/*
1567	 * Fill in the component buf structure.
1568	 */
1569	cp = disk->d_consumer;
1570	cbp->bio_done = g_mirror_done;
1571	cbp->bio_to = cp->provider;
1572	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1573	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1574	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1575	    cp->acw, cp->ace));
1576	cp->index++;
1577	g_io_request(cbp, cp);
1578}
1579
1580#define TRACK_SIZE  (1 * 1024 * 1024)
1581#define LOAD_SCALE	256
1582#define ABS(x)		(((x) >= 0) ? (x) : (-(x)))
1583
1584static void
1585g_mirror_request_load(struct g_mirror_softc *sc, struct bio *bp)
1586{
1587	struct g_mirror_disk *disk, *dp;
1588	struct g_consumer *cp;
1589	struct bio *cbp;
1590	int prio, best;
1591
1592	/* Find a disk with the smallest load. */
1593	disk = NULL;
1594	best = INT_MAX;
1595	LIST_FOREACH(dp, &sc->sc_disks, d_next) {
1596		if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1597			continue;
1598		prio = dp->load;
1599		/* If disk head is precisely in position - highly prefer it. */
1600		if (dp->d_last_offset == bp->bio_offset)
1601			prio -= 2 * LOAD_SCALE;
1602		else
1603		/* If disk head is close to position - prefer it. */
1604		if (ABS(dp->d_last_offset - bp->bio_offset) < TRACK_SIZE)
1605			prio -= 1 * LOAD_SCALE;
1606		if (prio <= best) {
1607			disk = dp;
1608			best = prio;
1609		}
1610	}
1611	KASSERT(disk != NULL, ("NULL disk for %s.", sc->sc_name));
1612	cbp = g_clone_bio(bp);
1613	if (cbp == NULL) {
1614		if (bp->bio_error == 0)
1615			bp->bio_error = ENOMEM;
1616		g_io_deliver(bp, bp->bio_error);
1617		return;
1618	}
1619	/*
1620	 * Fill in the component buf structure.
1621	 */
1622	cp = disk->d_consumer;
1623	cbp->bio_done = g_mirror_done;
1624	cbp->bio_to = cp->provider;
1625	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1626	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1627	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1628	    cp->acw, cp->ace));
1629	cp->index++;
1630	/* Remember last head position */
1631	disk->d_last_offset = bp->bio_offset + bp->bio_length;
1632	/* Update loads. */
1633	LIST_FOREACH(dp, &sc->sc_disks, d_next) {
1634		dp->load = (dp->d_consumer->index * LOAD_SCALE +
1635		    dp->load * 7) / 8;
1636	}
1637	g_io_request(cbp, cp);
1638}
1639
1640static void
1641g_mirror_request_split(struct g_mirror_softc *sc, struct bio *bp)
1642{
1643	struct bio_queue queue;
1644	struct g_mirror_disk *disk;
1645	struct g_consumer *cp __diagused;
1646	struct bio *cbp;
1647	off_t left, mod, offset, slice;
1648	u_char *data;
1649	u_int ndisks;
1650
1651	if (bp->bio_length <= sc->sc_slice) {
1652		g_mirror_request_round_robin(sc, bp);
1653		return;
1654	}
1655	ndisks = g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE);
1656	slice = bp->bio_length / ndisks;
1657	mod = slice % sc->sc_provider->sectorsize;
1658	if (mod != 0)
1659		slice += sc->sc_provider->sectorsize - mod;
1660	/*
1661	 * Allocate all bios before sending any request, so we can
1662	 * return ENOMEM in nice and clean way.
1663	 */
1664	left = bp->bio_length;
1665	offset = bp->bio_offset;
1666	data = bp->bio_data;
1667	TAILQ_INIT(&queue);
1668	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1669		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1670			continue;
1671		cbp = g_clone_bio(bp);
1672		if (cbp == NULL) {
1673			while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1674				TAILQ_REMOVE(&queue, cbp, bio_queue);
1675				g_destroy_bio(cbp);
1676			}
1677			if (bp->bio_error == 0)
1678				bp->bio_error = ENOMEM;
1679			g_io_deliver(bp, bp->bio_error);
1680			return;
1681		}
1682		TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
1683		cbp->bio_done = g_mirror_done;
1684		cbp->bio_caller1 = disk;
1685		cbp->bio_to = disk->d_consumer->provider;
1686		cbp->bio_offset = offset;
1687		cbp->bio_data = data;
1688		cbp->bio_length = MIN(left, slice);
1689		left -= cbp->bio_length;
1690		if (left == 0)
1691			break;
1692		offset += cbp->bio_length;
1693		data += cbp->bio_length;
1694	}
1695	while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1696		TAILQ_REMOVE(&queue, cbp, bio_queue);
1697		G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1698		disk = cbp->bio_caller1;
1699		cbp->bio_caller1 = NULL;
1700		cp = disk->d_consumer;
1701		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1702		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1703		    cp->acr, cp->acw, cp->ace));
1704		disk->d_consumer->index++;
1705		g_io_request(cbp, disk->d_consumer);
1706	}
1707}
1708
1709static void
1710g_mirror_register_request(struct g_mirror_softc *sc, struct bio *bp)
1711{
1712	struct bio_queue queue;
1713	struct bio *cbp;
1714	struct g_consumer *cp;
1715	struct g_mirror_disk *disk;
1716
1717	sx_assert(&sc->sc_lock, SA_XLOCKED);
1718
1719	/*
1720	 * To avoid ordering issues, if a write is deferred because of a
1721	 * collision with a sync request, all I/O is deferred until that
1722	 * write is initiated.
1723	 */
1724	if (bp->bio_from->geom != sc->sc_sync.ds_geom &&
1725	    !TAILQ_EMPTY(&sc->sc_regular_delayed)) {
1726		g_mirror_regular_delay(sc, bp);
1727		return;
1728	}
1729
1730	switch (bp->bio_cmd) {
1731	case BIO_READ:
1732		switch (sc->sc_balance) {
1733		case G_MIRROR_BALANCE_LOAD:
1734			g_mirror_request_load(sc, bp);
1735			break;
1736		case G_MIRROR_BALANCE_PREFER:
1737			g_mirror_request_prefer(sc, bp);
1738			break;
1739		case G_MIRROR_BALANCE_ROUND_ROBIN:
1740			g_mirror_request_round_robin(sc, bp);
1741			break;
1742		case G_MIRROR_BALANCE_SPLIT:
1743			g_mirror_request_split(sc, bp);
1744			break;
1745		}
1746		return;
1747	case BIO_WRITE:
1748	case BIO_DELETE:
1749		/*
1750		 * Delay the request if it is colliding with a synchronization
1751		 * request.
1752		 */
1753		if (g_mirror_sync_collision(sc, bp)) {
1754			g_mirror_regular_delay(sc, bp);
1755			return;
1756		}
1757
1758		if (sc->sc_idle)
1759			g_mirror_unidle(sc);
1760		else
1761			sc->sc_last_write = time_uptime;
1762
1763		/*
1764		 * Bump syncid on first write.
1765		 */
1766		if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID) != 0) {
1767			sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID;
1768			g_mirror_bump_syncid(sc);
1769		}
1770
1771		/*
1772		 * Allocate all bios before sending any request, so we can
1773		 * return ENOMEM in nice and clean way.
1774		 */
1775		TAILQ_INIT(&queue);
1776		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1777			switch (disk->d_state) {
1778			case G_MIRROR_DISK_STATE_ACTIVE:
1779				break;
1780			case G_MIRROR_DISK_STATE_SYNCHRONIZING:
1781				if (bp->bio_offset >= disk->d_sync.ds_offset)
1782					continue;
1783				break;
1784			default:
1785				continue;
1786			}
1787			if (bp->bio_cmd == BIO_DELETE &&
1788			    (disk->d_flags & G_MIRROR_DISK_FLAG_CANDELETE) == 0)
1789				continue;
1790			cbp = g_clone_bio(bp);
1791			if (cbp == NULL) {
1792				while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1793					TAILQ_REMOVE(&queue, cbp, bio_queue);
1794					g_destroy_bio(cbp);
1795				}
1796				if (bp->bio_error == 0)
1797					bp->bio_error = ENOMEM;
1798				g_io_deliver(bp, bp->bio_error);
1799				return;
1800			}
1801			TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
1802			cbp->bio_done = g_mirror_done;
1803			cp = disk->d_consumer;
1804			cbp->bio_caller1 = cp;
1805			cbp->bio_to = cp->provider;
1806			KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1807			    ("Consumer %s not opened (r%dw%de%d).",
1808			    cp->provider->name, cp->acr, cp->acw, cp->ace));
1809		}
1810		if (TAILQ_EMPTY(&queue)) {
1811			KASSERT(bp->bio_cmd == BIO_DELETE,
1812			    ("No consumers for regular request %p", bp));
1813			g_io_deliver(bp, EOPNOTSUPP);
1814			return;
1815		}
1816		while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1817			G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1818			TAILQ_REMOVE(&queue, cbp, bio_queue);
1819			cp = cbp->bio_caller1;
1820			cbp->bio_caller1 = NULL;
1821			cp->index++;
1822			sc->sc_writes++;
1823			g_io_request(cbp, cp);
1824		}
1825		/*
1826		 * Put request onto inflight queue, so we can check if new
1827		 * synchronization requests don't collide with it.
1828		 */
1829		TAILQ_INSERT_TAIL(&sc->sc_inflight, bp, bio_queue);
1830		return;
1831	case BIO_SPEEDUP:
1832	case BIO_FLUSH:
1833		TAILQ_INIT(&queue);
1834		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1835			if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1836				continue;
1837			cbp = g_clone_bio(bp);
1838			if (cbp == NULL) {
1839				while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1840					TAILQ_REMOVE(&queue, cbp, bio_queue);
1841					g_destroy_bio(cbp);
1842				}
1843				if (bp->bio_error == 0)
1844					bp->bio_error = ENOMEM;
1845				g_io_deliver(bp, bp->bio_error);
1846				return;
1847			}
1848			TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
1849			cbp->bio_done = g_mirror_done;
1850			cbp->bio_caller1 = disk;
1851			cbp->bio_to = disk->d_consumer->provider;
1852		}
1853		KASSERT(!TAILQ_EMPTY(&queue),
1854		    ("No consumers for regular request %p", bp));
1855		while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1856			G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1857			TAILQ_REMOVE(&queue, cbp, bio_queue);
1858			disk = cbp->bio_caller1;
1859			cbp->bio_caller1 = NULL;
1860			cp = disk->d_consumer;
1861			KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1862			    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1863			    cp->acr, cp->acw, cp->ace));
1864			cp->index++;
1865			g_io_request(cbp, cp);
1866		}
1867		break;
1868	default:
1869		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
1870		    bp->bio_cmd, sc->sc_name));
1871		break;
1872	}
1873}
1874
1875static int
1876g_mirror_can_destroy(struct g_mirror_softc *sc)
1877{
1878	struct g_geom *gp;
1879	struct g_consumer *cp;
1880
1881	g_topology_assert();
1882	gp = sc->sc_geom;
1883	if (gp->softc == NULL)
1884		return (1);
1885	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_TASTING) != 0)
1886		return (0);
1887	LIST_FOREACH(cp, &gp->consumer, consumer) {
1888		if (g_mirror_is_busy(sc, cp))
1889			return (0);
1890	}
1891	gp = sc->sc_sync.ds_geom;
1892	LIST_FOREACH(cp, &gp->consumer, consumer) {
1893		if (g_mirror_is_busy(sc, cp))
1894			return (0);
1895	}
1896	G_MIRROR_DEBUG(2, "No I/O requests for %s, it can be destroyed.",
1897	    sc->sc_name);
1898	return (1);
1899}
1900
1901static int
1902g_mirror_try_destroy(struct g_mirror_softc *sc)
1903{
1904
1905	if (sc->sc_rootmount != NULL) {
1906		G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
1907		    sc->sc_rootmount);
1908		root_mount_rel(sc->sc_rootmount);
1909		sc->sc_rootmount = NULL;
1910	}
1911	g_topology_lock();
1912	if (!g_mirror_can_destroy(sc)) {
1913		g_topology_unlock();
1914		return (0);
1915	}
1916	sc->sc_geom->softc = NULL;
1917	sc->sc_sync.ds_geom->softc = NULL;
1918	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DRAIN) != 0) {
1919		g_topology_unlock();
1920		G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1921		    &sc->sc_worker);
1922		/* Unlock sc_lock here, as it can be destroyed after wakeup. */
1923		sx_xunlock(&sc->sc_lock);
1924		wakeup(&sc->sc_worker);
1925		sc->sc_worker = NULL;
1926	} else {
1927		g_topology_unlock();
1928		g_mirror_destroy_device(sc);
1929	}
1930	return (1);
1931}
1932
1933/*
1934 * Worker thread.
1935 */
1936static void
1937g_mirror_worker(void *arg)
1938{
1939	struct g_mirror_softc *sc;
1940	struct g_mirror_event *ep;
1941	struct bio *bp;
1942	int timeout;
1943
1944	sc = arg;
1945	thread_lock(curthread);
1946	sched_prio(curthread, PRIBIO);
1947	thread_unlock(curthread);
1948
1949	sx_xlock(&sc->sc_lock);
1950	for (;;) {
1951		G_MIRROR_DEBUG(5, "%s: Let's see...", __func__);
1952		/*
1953		 * First take a look at events.
1954		 * This is important to handle events before any I/O requests.
1955		 */
1956		ep = g_mirror_event_first(sc);
1957		if (ep != NULL) {
1958			g_mirror_event_remove(sc, ep);
1959			if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0) {
1960				/* Update only device status. */
1961				G_MIRROR_DEBUG(3,
1962				    "Running event for device %s.",
1963				    sc->sc_name);
1964				ep->e_error = 0;
1965				g_mirror_update_device(sc, true);
1966			} else {
1967				/* Update disk status. */
1968				G_MIRROR_DEBUG(3, "Running event for disk %s.",
1969				     g_mirror_get_diskname(ep->e_disk));
1970				ep->e_error = g_mirror_update_disk(ep->e_disk,
1971				    ep->e_state);
1972				if (ep->e_error == 0)
1973					g_mirror_update_device(sc, false);
1974			}
1975			if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0) {
1976				KASSERT(ep->e_error == 0,
1977				    ("Error cannot be handled."));
1978				g_mirror_event_free(ep);
1979			} else {
1980				ep->e_flags |= G_MIRROR_EVENT_DONE;
1981				G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1982				    ep);
1983				mtx_lock(&sc->sc_events_mtx);
1984				wakeup(ep);
1985				mtx_unlock(&sc->sc_events_mtx);
1986			}
1987			if ((sc->sc_flags &
1988			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1989				if (g_mirror_try_destroy(sc)) {
1990					curthread->td_pflags &= ~TDP_GEOM;
1991					G_MIRROR_DEBUG(1, "Thread exiting.");
1992					kproc_exit(0);
1993				}
1994			}
1995			G_MIRROR_DEBUG(5, "%s: I'm here 1.", __func__);
1996			continue;
1997		}
1998
1999		/*
2000		 * Check if we can mark array as CLEAN and if we can't take
2001		 * how much seconds should we wait.
2002		 */
2003		timeout = g_mirror_idle(sc, -1);
2004
2005		/*
2006		 * Handle I/O requests.
2007		 */
2008		mtx_lock(&sc->sc_queue_mtx);
2009		bp = TAILQ_FIRST(&sc->sc_queue);
2010		if (bp != NULL)
2011			TAILQ_REMOVE(&sc->sc_queue, bp, bio_queue);
2012		else {
2013			if ((sc->sc_flags &
2014			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
2015				mtx_unlock(&sc->sc_queue_mtx);
2016				if (g_mirror_try_destroy(sc)) {
2017					curthread->td_pflags &= ~TDP_GEOM;
2018					G_MIRROR_DEBUG(1, "Thread exiting.");
2019					kproc_exit(0);
2020				}
2021				mtx_lock(&sc->sc_queue_mtx);
2022				if (!TAILQ_EMPTY(&sc->sc_queue)) {
2023					mtx_unlock(&sc->sc_queue_mtx);
2024					continue;
2025				}
2026			}
2027			if (g_mirror_event_first(sc) != NULL) {
2028				mtx_unlock(&sc->sc_queue_mtx);
2029				continue;
2030			}
2031			sx_xunlock(&sc->sc_lock);
2032			MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "m:w1",
2033			    timeout * hz);
2034			sx_xlock(&sc->sc_lock);
2035			G_MIRROR_DEBUG(5, "%s: I'm here 4.", __func__);
2036			continue;
2037		}
2038		mtx_unlock(&sc->sc_queue_mtx);
2039
2040		if (bp->bio_from->geom == sc->sc_sync.ds_geom &&
2041		    (bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0) {
2042			/*
2043			 * Handle completion of the first half (the read) of a
2044			 * block synchronization operation.
2045			 */
2046			g_mirror_sync_request(sc, bp);
2047		} else if (bp->bio_to != sc->sc_provider) {
2048			if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_REGULAR) != 0)
2049				/*
2050				 * Handle completion of a regular I/O request.
2051				 */
2052				g_mirror_regular_request(sc, bp);
2053			else if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0)
2054				/*
2055				 * Handle completion of the second half (the
2056				 * write) of a block synchronization operation.
2057				 */
2058				g_mirror_sync_request(sc, bp);
2059			else {
2060				KASSERT(0,
2061				    ("Invalid request cflags=0x%hx to=%s.",
2062				    bp->bio_cflags, bp->bio_to->name));
2063			}
2064		} else {
2065			/*
2066			 * Initiate an I/O request.
2067			 */
2068			g_mirror_register_request(sc, bp);
2069		}
2070		G_MIRROR_DEBUG(5, "%s: I'm here 9.", __func__);
2071	}
2072}
2073
2074static void
2075g_mirror_update_idle(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
2076{
2077
2078	sx_assert(&sc->sc_lock, SX_LOCKED);
2079
2080	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
2081		return;
2082	if (!sc->sc_idle && (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) == 0) {
2083		G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as dirty.",
2084		    g_mirror_get_diskname(disk), sc->sc_name);
2085		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
2086	} else if (sc->sc_idle &&
2087	    (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
2088		G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as clean.",
2089		    g_mirror_get_diskname(disk), sc->sc_name);
2090		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2091	}
2092}
2093
2094static void
2095g_mirror_sync_reinit(const struct g_mirror_disk *disk, struct bio *bp,
2096    off_t offset)
2097{
2098	void *data;
2099	int idx;
2100
2101	data = bp->bio_data;
2102	idx = (int)(uintptr_t)bp->bio_caller1;
2103	g_reset_bio(bp);
2104
2105	bp->bio_cmd = BIO_READ;
2106	bp->bio_data = data;
2107	bp->bio_done = g_mirror_sync_done;
2108	bp->bio_from = disk->d_sync.ds_consumer;
2109	bp->bio_to = disk->d_softc->sc_provider;
2110	bp->bio_caller1 = (void *)(uintptr_t)idx;
2111	bp->bio_offset = offset;
2112	bp->bio_length = MIN(maxphys,
2113	    disk->d_softc->sc_mediasize - bp->bio_offset);
2114}
2115
2116static void
2117g_mirror_sync_start(struct g_mirror_disk *disk)
2118{
2119	struct g_mirror_softc *sc;
2120	struct g_mirror_disk_sync *sync;
2121	struct g_consumer *cp;
2122	struct bio *bp;
2123	int error __diagused, i;
2124
2125	g_topology_assert_not();
2126	sc = disk->d_softc;
2127	sync = &disk->d_sync;
2128	sx_assert(&sc->sc_lock, SX_LOCKED);
2129
2130	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2131	    ("Disk %s is not marked for synchronization.",
2132	    g_mirror_get_diskname(disk)));
2133	KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2134	    ("Device not in RUNNING state (%s, %u).", sc->sc_name,
2135	    sc->sc_state));
2136
2137	sx_xunlock(&sc->sc_lock);
2138	g_topology_lock();
2139	cp = g_new_consumer(sc->sc_sync.ds_geom);
2140	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
2141	error = g_attach(cp, sc->sc_provider);
2142	KASSERT(error == 0,
2143	    ("Cannot attach to %s (error=%d).", sc->sc_name, error));
2144	error = g_access(cp, 1, 0, 0);
2145	KASSERT(error == 0, ("Cannot open %s (error=%d).", sc->sc_name, error));
2146	g_topology_unlock();
2147	sx_xlock(&sc->sc_lock);
2148
2149	G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s.", sc->sc_name,
2150	    g_mirror_get_diskname(disk));
2151	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) == 0)
2152		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
2153	KASSERT(sync->ds_consumer == NULL,
2154	    ("Sync consumer already exists (device=%s, disk=%s).",
2155	    sc->sc_name, g_mirror_get_diskname(disk)));
2156
2157	sync->ds_consumer = cp;
2158	sync->ds_consumer->private = disk;
2159	sync->ds_consumer->index = 0;
2160
2161	/*
2162	 * Allocate memory for synchronization bios and initialize them.
2163	 */
2164	sync->ds_bios = malloc(sizeof(struct bio *) * g_mirror_syncreqs,
2165	    M_MIRROR, M_WAITOK);
2166	for (i = 0; i < g_mirror_syncreqs; i++) {
2167		bp = g_alloc_bio();
2168		sync->ds_bios[i] = bp;
2169
2170		bp->bio_data = malloc(maxphys, M_MIRROR, M_WAITOK);
2171		bp->bio_caller1 = (void *)(uintptr_t)i;
2172		g_mirror_sync_reinit(disk, bp, sync->ds_offset);
2173		sync->ds_offset += bp->bio_length;
2174	}
2175
2176	/* Increase the number of disks in SYNCHRONIZING state. */
2177	sc->sc_sync.ds_ndisks++;
2178	/* Set the number of in-flight synchronization requests. */
2179	sync->ds_inflight = g_mirror_syncreqs;
2180
2181	/*
2182	 * Fire off first synchronization requests.
2183	 */
2184	for (i = 0; i < g_mirror_syncreqs; i++) {
2185		bp = sync->ds_bios[i];
2186		G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
2187		sync->ds_consumer->index++;
2188		/*
2189		 * Delay the request if it is colliding with a regular request.
2190		 */
2191		if (g_mirror_regular_collision(sc, bp))
2192			g_mirror_sync_delay(sc, bp);
2193		else
2194			g_io_request(bp, sync->ds_consumer);
2195	}
2196}
2197
2198/*
2199 * Stop synchronization process.
2200 * type: 0 - synchronization finished
2201 *       1 - synchronization stopped
2202 */
2203static void
2204g_mirror_sync_stop(struct g_mirror_disk *disk, int type)
2205{
2206	struct g_mirror_softc *sc;
2207	struct g_consumer *cp;
2208
2209	g_topology_assert_not();
2210	sc = disk->d_softc;
2211	sx_assert(&sc->sc_lock, SX_LOCKED);
2212
2213	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2214	    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2215	    g_mirror_disk_state2str(disk->d_state)));
2216	if (disk->d_sync.ds_consumer == NULL)
2217		return;
2218
2219	if (type == 0) {
2220		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s finished.",
2221		    sc->sc_name, g_mirror_get_diskname(disk));
2222	} else /* if (type == 1) */ {
2223		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s stopped.",
2224		    sc->sc_name, g_mirror_get_diskname(disk));
2225	}
2226	g_mirror_regular_release(sc);
2227	free(disk->d_sync.ds_bios, M_MIRROR);
2228	disk->d_sync.ds_bios = NULL;
2229	cp = disk->d_sync.ds_consumer;
2230	disk->d_sync.ds_consumer = NULL;
2231	disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2232	sc->sc_sync.ds_ndisks--;
2233	sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */
2234	g_topology_lock();
2235	g_mirror_kill_consumer(sc, cp);
2236	g_topology_unlock();
2237	sx_xlock(&sc->sc_lock);
2238}
2239
2240static void
2241g_mirror_launch_provider(struct g_mirror_softc *sc)
2242{
2243	struct g_mirror_disk *disk;
2244	struct g_provider *pp, *dp;
2245
2246	sx_assert(&sc->sc_lock, SX_LOCKED);
2247
2248	g_topology_lock();
2249	pp = g_new_providerf(sc->sc_geom, "mirror/%s", sc->sc_name);
2250	pp->flags |= G_PF_DIRECT_RECEIVE;
2251	pp->mediasize = sc->sc_mediasize;
2252	pp->sectorsize = sc->sc_sectorsize;
2253	pp->stripesize = 0;
2254	pp->stripeoffset = 0;
2255
2256	/* Splitting of unmapped BIO's could work but isn't implemented now */
2257	if (sc->sc_balance != G_MIRROR_BALANCE_SPLIT)
2258		pp->flags |= G_PF_ACCEPT_UNMAPPED;
2259
2260	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2261		if (disk->d_consumer && disk->d_consumer->provider) {
2262			dp = disk->d_consumer->provider;
2263			if (dp->stripesize > pp->stripesize) {
2264				pp->stripesize = dp->stripesize;
2265				pp->stripeoffset = dp->stripeoffset;
2266			}
2267			/* A provider underneath us doesn't support unmapped */
2268			if ((dp->flags & G_PF_ACCEPT_UNMAPPED) == 0) {
2269				G_MIRROR_DEBUG(0, "Cancelling unmapped "
2270				    "because of %s.", dp->name);
2271				pp->flags &= ~G_PF_ACCEPT_UNMAPPED;
2272			}
2273		}
2274	}
2275	pp->private = sc;
2276	sc->sc_refcnt++;
2277	sc->sc_provider = pp;
2278	g_error_provider(pp, 0);
2279	g_topology_unlock();
2280	G_MIRROR_DEBUG(0, "Device %s launched (%u/%u).", pp->name,
2281	    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE), sc->sc_ndisks);
2282	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2283		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
2284			g_mirror_sync_start(disk);
2285	}
2286}
2287
2288static void
2289g_mirror_destroy_provider(struct g_mirror_softc *sc)
2290{
2291	struct g_mirror_disk *disk;
2292	struct bio *bp;
2293
2294	g_topology_assert_not();
2295	KASSERT(sc->sc_provider != NULL, ("NULL provider (device=%s).",
2296	    sc->sc_name));
2297
2298	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2299		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
2300			g_mirror_sync_stop(disk, 1);
2301	}
2302
2303	g_topology_lock();
2304	g_error_provider(sc->sc_provider, ENXIO);
2305	mtx_lock(&sc->sc_queue_mtx);
2306	while ((bp = TAILQ_FIRST(&sc->sc_queue)) != NULL) {
2307		TAILQ_REMOVE(&sc->sc_queue, bp, bio_queue);
2308		/*
2309		 * Abort any pending I/O that wasn't generated by us.
2310		 * Synchronization requests and requests destined for individual
2311		 * mirror components can be destroyed immediately.
2312		 */
2313		if (bp->bio_to == sc->sc_provider &&
2314		    bp->bio_from->geom != sc->sc_sync.ds_geom) {
2315			g_io_deliver(bp, ENXIO);
2316		} else {
2317			if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0)
2318				free(bp->bio_data, M_MIRROR);
2319			g_destroy_bio(bp);
2320		}
2321	}
2322	mtx_unlock(&sc->sc_queue_mtx);
2323	g_wither_provider(sc->sc_provider, ENXIO);
2324	sc->sc_provider = NULL;
2325	G_MIRROR_DEBUG(0, "Device %s: provider destroyed.", sc->sc_name);
2326	g_topology_unlock();
2327}
2328
2329static void
2330g_mirror_go(void *arg)
2331{
2332	struct g_mirror_softc *sc;
2333	struct g_mirror_event *ep;
2334
2335	sc = arg;
2336	G_MIRROR_DEBUG(0, "Force device %s start due to timeout.", sc->sc_name);
2337	ep = sc->sc_timeout_event;
2338	sc->sc_timeout_event = NULL;
2339	g_mirror_event_dispatch(ep, sc, 0,
2340	    G_MIRROR_EVENT_DONTWAIT | G_MIRROR_EVENT_DEVICE);
2341}
2342
2343static void
2344g_mirror_timeout_drain(struct g_mirror_softc *sc)
2345{
2346	sx_assert(&sc->sc_lock, SX_XLOCKED);
2347
2348	callout_drain(&sc->sc_callout);
2349	g_mirror_event_free(sc->sc_timeout_event);
2350	sc->sc_timeout_event = NULL;
2351}
2352
2353static u_int
2354g_mirror_determine_state(struct g_mirror_disk *disk)
2355{
2356	struct g_mirror_softc *sc;
2357	u_int state;
2358
2359	sc = disk->d_softc;
2360	if (sc->sc_syncid == disk->d_sync.ds_syncid) {
2361		if ((disk->d_flags &
2362		    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0 &&
2363		    (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 0 ||
2364		     (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) == 0)) {
2365			/* Disk does not need synchronization. */
2366			state = G_MIRROR_DISK_STATE_ACTIVE;
2367		} else {
2368			if ((sc->sc_flags &
2369			     G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
2370			    (disk->d_flags &
2371			     G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
2372				/*
2373				 * We can start synchronization from
2374				 * the stored offset.
2375				 */
2376				state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
2377			} else {
2378				state = G_MIRROR_DISK_STATE_STALE;
2379			}
2380		}
2381	} else if (disk->d_sync.ds_syncid < sc->sc_syncid) {
2382		/*
2383		 * Reset all synchronization data for this disk,
2384		 * because if it even was synchronized, it was
2385		 * synchronized to disks with different syncid.
2386		 */
2387		disk->d_flags |= G_MIRROR_DISK_FLAG_SYNCHRONIZING;
2388		disk->d_sync.ds_offset = 0;
2389		disk->d_sync.ds_offset_done = 0;
2390		disk->d_sync.ds_syncid = sc->sc_syncid;
2391		if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
2392		    (disk->d_flags & G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
2393			state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
2394		} else {
2395			state = G_MIRROR_DISK_STATE_STALE;
2396		}
2397	} else /* if (sc->sc_syncid < disk->d_sync.ds_syncid) */ {
2398		/*
2399		 * Not good, NOT GOOD!
2400		 * It means that mirror was started on stale disks
2401		 * and more fresh disk just arrive.
2402		 * If there were writes, mirror is broken, sorry.
2403		 * I think the best choice here is don't touch
2404		 * this disk and inform the user loudly.
2405		 */
2406		G_MIRROR_DEBUG(0, "Device %s was started before the freshest "
2407		    "disk (%s) arrives!! It will not be connected to the "
2408		    "running device.", sc->sc_name,
2409		    g_mirror_get_diskname(disk));
2410		g_mirror_destroy_disk(disk);
2411		state = G_MIRROR_DISK_STATE_NONE;
2412		/* Return immediately, because disk was destroyed. */
2413		return (state);
2414	}
2415	G_MIRROR_DEBUG(3, "State for %s disk: %s.",
2416	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(state));
2417	return (state);
2418}
2419
2420/*
2421 * Update device state.
2422 */
2423static void
2424g_mirror_update_device(struct g_mirror_softc *sc, bool force)
2425{
2426	struct g_mirror_disk *disk;
2427	u_int state;
2428
2429	sx_assert(&sc->sc_lock, SX_XLOCKED);
2430
2431	switch (sc->sc_state) {
2432	case G_MIRROR_DEVICE_STATE_STARTING:
2433	    {
2434		struct g_mirror_disk *pdisk, *tdisk;
2435		const char *mismatch;
2436		uintmax_t found, newest;
2437		u_int dirty, ndisks;
2438
2439		/* Pre-flight checks */
2440		LIST_FOREACH_SAFE(disk, &sc->sc_disks, d_next, tdisk) {
2441			/*
2442			 * Confirm we already detected the newest genid.
2443			 */
2444			KASSERT(sc->sc_genid >= disk->d_genid,
2445			    ("%s: found newer genid %u (sc:%p had %u).", __func__,
2446			    disk->d_genid, sc, sc->sc_genid));
2447
2448			/* Kick out any previously tasted stale components. */
2449			if (disk->d_genid < sc->sc_genid) {
2450				G_MIRROR_DEBUG(0, "Stale 'genid' field on %s "
2451				    "(device %s) (component=%u latest=%u), skipping.",
2452				    g_mirror_get_diskname(disk), sc->sc_name,
2453				    disk->d_genid, sc->sc_genid);
2454				g_mirror_destroy_disk(disk);
2455				sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
2456				continue;
2457			}
2458
2459			/*
2460			 * Confirm we already detected the newest syncid.
2461			 */
2462			KASSERT(sc->sc_syncid >= disk->d_sync.ds_syncid,
2463			    ("%s: found newer syncid %u (sc:%p had %u).",
2464			     __func__, disk->d_sync.ds_syncid, sc,
2465			     sc->sc_syncid));
2466
2467#define DETECT_MISMATCH(field, name) \
2468			if (mismatch == NULL &&					\
2469			    disk->d_init_ ## field != sc->sc_ ## field) {	\
2470				mismatch = name;				\
2471				found = (intmax_t)disk->d_init_ ## field;	\
2472				newest = (intmax_t)sc->sc_ ## field;		\
2473			}
2474			mismatch = NULL;
2475			DETECT_MISMATCH(ndisks, "md_all");
2476			DETECT_MISMATCH(balance, "md_balance");
2477			DETECT_MISMATCH(slice, "md_slice");
2478			DETECT_MISMATCH(mediasize, "md_mediasize");
2479#undef DETECT_MISMATCH
2480			if (mismatch != NULL) {
2481				G_MIRROR_DEBUG(0, "Found a mismatching '%s' "
2482				    "field on %s (device %s) (found=%ju "
2483				    "newest=%ju).", mismatch,
2484				    g_mirror_get_diskname(disk), sc->sc_name,
2485				    found, newest);
2486				g_mirror_destroy_disk(disk);
2487				sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
2488				continue;
2489			}
2490		}
2491
2492		KASSERT(sc->sc_provider == NULL,
2493		    ("Non-NULL provider in STARTING state (%s).", sc->sc_name));
2494		/*
2495		 * Are we ready? If the timeout (force is true) has expired, and
2496		 * any disks are present, then yes. If we're permitted to launch
2497		 * before the timeout has expired and the expected number of
2498		 * current-generation mirror disks have been tasted, then yes.
2499		 */
2500		ndisks = g_mirror_ndisks(sc, -1);
2501		if ((force && ndisks > 0) ||
2502		    (g_launch_mirror_before_timeout && ndisks == sc->sc_ndisks)) {
2503			;
2504		} else if (ndisks == 0) {
2505			/*
2506			 * Disks went down in starting phase, so destroy
2507			 * device.
2508			 */
2509			g_mirror_timeout_drain(sc);
2510			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2511			G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
2512			    sc->sc_rootmount);
2513			root_mount_rel(sc->sc_rootmount);
2514			sc->sc_rootmount = NULL;
2515			return;
2516		} else {
2517			return;
2518		}
2519
2520		/*
2521		 * Activate all disks with the biggest syncid.
2522		 */
2523		if (force) {
2524			/*
2525			 * If 'force' is true, we have been called due to
2526			 * timeout, so don't bother canceling timeout.
2527			 */
2528			ndisks = 0;
2529			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2530				if ((disk->d_flags &
2531				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) {
2532					ndisks++;
2533				}
2534			}
2535			if (ndisks == 0) {
2536				/* No valid disks found, destroy device. */
2537				sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2538				G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p",
2539				    __LINE__, sc->sc_rootmount);
2540				root_mount_rel(sc->sc_rootmount);
2541				sc->sc_rootmount = NULL;
2542				return;
2543			}
2544		} else {
2545			/* Cancel timeout. */
2546			g_mirror_timeout_drain(sc);
2547		}
2548
2549		/*
2550		 * Here we need to look for dirty disks and if all disks
2551		 * with the biggest syncid are dirty, we have to choose
2552		 * one with the biggest priority and rebuild the rest.
2553		 */
2554		/*
2555		 * Find the number of dirty disks with the biggest syncid.
2556		 * Find the number of disks with the biggest syncid.
2557		 * While here, find a disk with the biggest priority.
2558		 */
2559		dirty = ndisks = 0;
2560		pdisk = NULL;
2561		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2562			if (disk->d_sync.ds_syncid != sc->sc_syncid)
2563				continue;
2564			if ((disk->d_flags &
2565			    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2566				continue;
2567			}
2568			ndisks++;
2569			if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
2570				dirty++;
2571				if (pdisk == NULL ||
2572				    pdisk->d_priority < disk->d_priority) {
2573					pdisk = disk;
2574				}
2575			}
2576		}
2577		if (dirty == 0) {
2578			/* No dirty disks at all, great. */
2579		} else if (dirty == ndisks) {
2580			/*
2581			 * Force synchronization for all dirty disks except one
2582			 * with the biggest priority.
2583			 */
2584			KASSERT(pdisk != NULL, ("pdisk == NULL"));
2585			G_MIRROR_DEBUG(1, "Using disk %s (device %s) as a "
2586			    "master disk for synchronization.",
2587			    g_mirror_get_diskname(pdisk), sc->sc_name);
2588			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2589				if (disk->d_sync.ds_syncid != sc->sc_syncid)
2590					continue;
2591				if ((disk->d_flags &
2592				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2593					continue;
2594				}
2595				KASSERT((disk->d_flags &
2596				    G_MIRROR_DISK_FLAG_DIRTY) != 0,
2597				    ("Disk %s isn't marked as dirty.",
2598				    g_mirror_get_diskname(disk)));
2599				/* Skip the disk with the biggest priority. */
2600				if (disk == pdisk)
2601					continue;
2602				disk->d_sync.ds_syncid = 0;
2603			}
2604		} else if (dirty < ndisks) {
2605			/*
2606			 * Force synchronization for all dirty disks.
2607			 * We have some non-dirty disks.
2608			 */
2609			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2610				if (disk->d_sync.ds_syncid != sc->sc_syncid)
2611					continue;
2612				if ((disk->d_flags &
2613				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2614					continue;
2615				}
2616				if ((disk->d_flags &
2617				    G_MIRROR_DISK_FLAG_DIRTY) == 0) {
2618					continue;
2619				}
2620				disk->d_sync.ds_syncid = 0;
2621			}
2622		}
2623
2624		/* Reset hint. */
2625		sc->sc_hint = NULL;
2626		if (force) {
2627			/* Remember to bump syncid on first write. */
2628			sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
2629		}
2630		state = G_MIRROR_DEVICE_STATE_RUNNING;
2631		G_MIRROR_DEBUG(1, "Device %s state changed from %s to %s.",
2632		    sc->sc_name, g_mirror_device_state2str(sc->sc_state),
2633		    g_mirror_device_state2str(state));
2634		sc->sc_state = state;
2635		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2636			state = g_mirror_determine_state(disk);
2637			g_mirror_event_send(disk, state,
2638			    G_MIRROR_EVENT_DONTWAIT);
2639			if (state == G_MIRROR_DISK_STATE_STALE)
2640				sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
2641		}
2642		break;
2643	    }
2644	case G_MIRROR_DEVICE_STATE_RUNNING:
2645		if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 0 &&
2646		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
2647			/*
2648			 * No usable disks, so destroy the device.
2649			 */
2650			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2651			break;
2652		} else if (g_mirror_ndisks(sc,
2653		    G_MIRROR_DISK_STATE_ACTIVE) > 0 &&
2654		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
2655			/*
2656			 * We have active disks, launch provider if it doesn't
2657			 * exist.
2658			 */
2659			if (sc->sc_provider == NULL)
2660				g_mirror_launch_provider(sc);
2661			if (sc->sc_rootmount != NULL) {
2662				G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p",
2663				    __LINE__, sc->sc_rootmount);
2664				root_mount_rel(sc->sc_rootmount);
2665				sc->sc_rootmount = NULL;
2666			}
2667		}
2668		/*
2669		 * Genid should be bumped immediately, so do it here.
2670		 */
2671		if ((sc->sc_bump_id & G_MIRROR_BUMP_GENID) != 0) {
2672			sc->sc_bump_id &= ~G_MIRROR_BUMP_GENID;
2673			g_mirror_bump_genid(sc);
2674		}
2675		if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID_NOW) != 0) {
2676			sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID_NOW;
2677			g_mirror_bump_syncid(sc);
2678		}
2679		break;
2680	default:
2681		KASSERT(1 == 0, ("Wrong device state (%s, %s).",
2682		    sc->sc_name, g_mirror_device_state2str(sc->sc_state)));
2683		break;
2684	}
2685}
2686
2687/*
2688 * Update disk state and device state if needed.
2689 */
2690#define	DISK_STATE_CHANGED()	G_MIRROR_DEBUG(1,			\
2691	"Disk %s state changed from %s to %s (device %s).",		\
2692	g_mirror_get_diskname(disk),					\
2693	g_mirror_disk_state2str(disk->d_state),				\
2694	g_mirror_disk_state2str(state), sc->sc_name)
2695static int
2696g_mirror_update_disk(struct g_mirror_disk *disk, u_int state)
2697{
2698	struct g_mirror_softc *sc;
2699
2700	sc = disk->d_softc;
2701	sx_assert(&sc->sc_lock, SX_XLOCKED);
2702
2703again:
2704	G_MIRROR_DEBUG(3, "Changing disk %s state from %s to %s.",
2705	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(disk->d_state),
2706	    g_mirror_disk_state2str(state));
2707	switch (state) {
2708	case G_MIRROR_DISK_STATE_NEW:
2709		/*
2710		 * Possible scenarios:
2711		 * 1. New disk arrive.
2712		 */
2713		/* Previous state should be NONE. */
2714		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NONE,
2715		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2716		    g_mirror_disk_state2str(disk->d_state)));
2717		DISK_STATE_CHANGED();
2718
2719		disk->d_state = state;
2720		g_topology_lock();
2721		if (LIST_EMPTY(&sc->sc_disks))
2722			LIST_INSERT_HEAD(&sc->sc_disks, disk, d_next);
2723		else {
2724			struct g_mirror_disk *dp;
2725
2726			LIST_FOREACH(dp, &sc->sc_disks, d_next) {
2727				if (disk->d_priority >= dp->d_priority) {
2728					LIST_INSERT_BEFORE(dp, disk, d_next);
2729					dp = NULL;
2730					break;
2731				}
2732				if (LIST_NEXT(dp, d_next) == NULL)
2733					break;
2734			}
2735			if (dp != NULL)
2736				LIST_INSERT_AFTER(dp, disk, d_next);
2737		}
2738		g_topology_unlock();
2739		G_MIRROR_DEBUG(1, "Device %s: provider %s detected.",
2740		    sc->sc_name, g_mirror_get_diskname(disk));
2741		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
2742			break;
2743		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2744		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2745		    g_mirror_device_state2str(sc->sc_state),
2746		    g_mirror_get_diskname(disk),
2747		    g_mirror_disk_state2str(disk->d_state)));
2748		state = g_mirror_determine_state(disk);
2749		if (state != G_MIRROR_DISK_STATE_NONE)
2750			goto again;
2751		break;
2752	case G_MIRROR_DISK_STATE_ACTIVE:
2753		/*
2754		 * Possible scenarios:
2755		 * 1. New disk does not need synchronization.
2756		 * 2. Synchronization process finished successfully.
2757		 */
2758		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2759		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2760		    g_mirror_device_state2str(sc->sc_state),
2761		    g_mirror_get_diskname(disk),
2762		    g_mirror_disk_state2str(disk->d_state)));
2763		/* Previous state should be NEW or SYNCHRONIZING. */
2764		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW ||
2765		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2766		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2767		    g_mirror_disk_state2str(disk->d_state)));
2768		DISK_STATE_CHANGED();
2769
2770		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
2771			disk->d_flags &= ~G_MIRROR_DISK_FLAG_SYNCHRONIZING;
2772			disk->d_flags &= ~G_MIRROR_DISK_FLAG_FORCE_SYNC;
2773			g_mirror_sync_stop(disk, 0);
2774		}
2775		disk->d_state = state;
2776		disk->d_sync.ds_offset = 0;
2777		disk->d_sync.ds_offset_done = 0;
2778		g_mirror_update_idle(sc, disk);
2779		g_mirror_update_metadata(disk);
2780		G_MIRROR_DEBUG(1, "Device %s: provider %s activated.",
2781		    sc->sc_name, g_mirror_get_diskname(disk));
2782		break;
2783	case G_MIRROR_DISK_STATE_STALE:
2784		/*
2785		 * Possible scenarios:
2786		 * 1. Stale disk was connected.
2787		 */
2788		/* Previous state should be NEW. */
2789		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2790		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2791		    g_mirror_disk_state2str(disk->d_state)));
2792		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2793		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2794		    g_mirror_device_state2str(sc->sc_state),
2795		    g_mirror_get_diskname(disk),
2796		    g_mirror_disk_state2str(disk->d_state)));
2797		/*
2798		 * STALE state is only possible if device is marked
2799		 * NOAUTOSYNC.
2800		 */
2801		KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) != 0,
2802		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2803		    g_mirror_device_state2str(sc->sc_state),
2804		    g_mirror_get_diskname(disk),
2805		    g_mirror_disk_state2str(disk->d_state)));
2806		DISK_STATE_CHANGED();
2807
2808		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2809		disk->d_state = state;
2810		g_mirror_update_metadata(disk);
2811		G_MIRROR_DEBUG(0, "Device %s: provider %s is stale.",
2812		    sc->sc_name, g_mirror_get_diskname(disk));
2813		break;
2814	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
2815		/*
2816		 * Possible scenarios:
2817		 * 1. Disk which needs synchronization was connected.
2818		 */
2819		/* Previous state should be NEW. */
2820		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2821		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2822		    g_mirror_disk_state2str(disk->d_state)));
2823		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2824		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2825		    g_mirror_device_state2str(sc->sc_state),
2826		    g_mirror_get_diskname(disk),
2827		    g_mirror_disk_state2str(disk->d_state)));
2828		DISK_STATE_CHANGED();
2829
2830		if (disk->d_state == G_MIRROR_DISK_STATE_NEW)
2831			disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2832		disk->d_state = state;
2833		if (sc->sc_provider != NULL) {
2834			g_mirror_sync_start(disk);
2835			g_mirror_update_metadata(disk);
2836		}
2837		break;
2838	case G_MIRROR_DISK_STATE_DISCONNECTED:
2839		/*
2840		 * Possible scenarios:
2841		 * 1. Device wasn't running yet, but disk disappear.
2842		 * 2. Disk was active and disapppear.
2843		 * 3. Disk disappear during synchronization process.
2844		 */
2845		if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING) {
2846			/*
2847			 * Previous state should be ACTIVE, STALE or
2848			 * SYNCHRONIZING.
2849			 */
2850			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
2851			    disk->d_state == G_MIRROR_DISK_STATE_STALE ||
2852			    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2853			    ("Wrong disk state (%s, %s).",
2854			    g_mirror_get_diskname(disk),
2855			    g_mirror_disk_state2str(disk->d_state)));
2856		} else if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING) {
2857			/* Previous state should be NEW. */
2858			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2859			    ("Wrong disk state (%s, %s).",
2860			    g_mirror_get_diskname(disk),
2861			    g_mirror_disk_state2str(disk->d_state)));
2862			/*
2863			 * Reset bumping syncid if disk disappeared in STARTING
2864			 * state.
2865			 */
2866			if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID) != 0)
2867				sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID;
2868#ifdef	INVARIANTS
2869		} else {
2870			KASSERT(1 == 0, ("Wrong device state (%s, %s, %s, %s).",
2871			    sc->sc_name,
2872			    g_mirror_device_state2str(sc->sc_state),
2873			    g_mirror_get_diskname(disk),
2874			    g_mirror_disk_state2str(disk->d_state)));
2875#endif
2876		}
2877		DISK_STATE_CHANGED();
2878		G_MIRROR_DEBUG(0, "Device %s: provider %s disconnected.",
2879		    sc->sc_name, g_mirror_get_diskname(disk));
2880
2881		g_mirror_destroy_disk(disk);
2882		break;
2883	case G_MIRROR_DISK_STATE_DESTROY:
2884	    {
2885		int error;
2886
2887		error = g_mirror_clear_metadata(disk);
2888		if (error != 0) {
2889			G_MIRROR_DEBUG(0,
2890			    "Device %s: failed to clear metadata on %s: %d.",
2891			    sc->sc_name, g_mirror_get_diskname(disk), error);
2892			break;
2893		}
2894		DISK_STATE_CHANGED();
2895		G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.",
2896		    sc->sc_name, g_mirror_get_diskname(disk));
2897
2898		g_mirror_destroy_disk(disk);
2899		sc->sc_ndisks--;
2900		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2901			g_mirror_update_metadata(disk);
2902		}
2903		break;
2904	    }
2905	default:
2906		KASSERT(1 == 0, ("Unknown state (%u).", state));
2907		break;
2908	}
2909	return (0);
2910}
2911#undef	DISK_STATE_CHANGED
2912
2913int
2914g_mirror_read_metadata(struct g_consumer *cp, struct g_mirror_metadata *md)
2915{
2916	struct g_provider *pp;
2917	u_char *buf;
2918	int error;
2919
2920	g_topology_assert();
2921
2922	error = g_access(cp, 1, 0, 0);
2923	if (error != 0)
2924		return (error);
2925	pp = cp->provider;
2926	g_topology_unlock();
2927	/* Metadata are stored on last sector. */
2928	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
2929	    &error);
2930	g_topology_lock();
2931	g_access(cp, -1, 0, 0);
2932	if (buf == NULL) {
2933		G_MIRROR_DEBUG(1, "Cannot read metadata from %s (error=%d).",
2934		    cp->provider->name, error);
2935		return (error);
2936	}
2937
2938	/* Decode metadata. */
2939	error = mirror_metadata_decode(buf, md);
2940	g_free(buf);
2941	if (strcmp(md->md_magic, G_MIRROR_MAGIC) != 0)
2942		return (EINVAL);
2943	if (md->md_version > G_MIRROR_VERSION) {
2944		G_MIRROR_DEBUG(0,
2945		    "Kernel module is too old to handle metadata from %s.",
2946		    cp->provider->name);
2947		return (EINVAL);
2948	}
2949	if (error != 0) {
2950		G_MIRROR_DEBUG(1, "MD5 metadata hash mismatch for provider %s.",
2951		    cp->provider->name);
2952		return (error);
2953	}
2954
2955	return (0);
2956}
2957
2958static int
2959g_mirror_check_metadata(struct g_mirror_softc *sc, struct g_provider *pp,
2960    struct g_mirror_metadata *md)
2961{
2962
2963	G_MIRROR_DEBUG(2, "%s: md_did 0x%u disk %s device %s md_all 0x%x "
2964	    "sc_ndisks 0x%x md_slice 0x%x sc_slice 0x%x md_balance 0x%x "
2965	    "sc_balance 0x%x sc_mediasize 0x%jx pp_mediasize 0x%jx "
2966	    "md_sectorsize 0x%x sc_sectorsize 0x%x md_mflags 0x%jx "
2967	    "md_dflags 0x%jx md_syncid 0x%x md_genid 0x%x md_priority 0x%x "
2968	    "sc_state 0x%x.",
2969	    __func__, md->md_did, pp->name, sc->sc_name, md->md_all,
2970	    sc->sc_ndisks, md->md_slice, sc->sc_slice, md->md_balance,
2971	    sc->sc_balance, (uintmax_t)sc->sc_mediasize,
2972	    (uintmax_t)pp->mediasize, md->md_sectorsize, sc->sc_sectorsize,
2973	    (uintmax_t)md->md_mflags, (uintmax_t)md->md_dflags, md->md_syncid,
2974	    md->md_genid, md->md_priority, sc->sc_state);
2975
2976	if (g_mirror_id2disk(sc, md->md_did) != NULL) {
2977		G_MIRROR_DEBUG(1, "Disk %s (id=%u) already exists, skipping.",
2978		    pp->name, md->md_did);
2979		return (EEXIST);
2980	}
2981	if (sc->sc_mediasize > pp->mediasize) {
2982		G_MIRROR_DEBUG(1,
2983		    "Invalid size of disk %s (device %s), skipping.", pp->name,
2984		    sc->sc_name);
2985		return (EINVAL);
2986	}
2987	if (md->md_sectorsize != sc->sc_sectorsize) {
2988		G_MIRROR_DEBUG(1,
2989		    "Invalid '%s' field on disk %s (device %s), skipping.",
2990		    "md_sectorsize", pp->name, sc->sc_name);
2991		return (EINVAL);
2992	}
2993	if ((sc->sc_sectorsize % pp->sectorsize) != 0) {
2994		G_MIRROR_DEBUG(1,
2995		    "Invalid sector size of disk %s (device %s), skipping.",
2996		    pp->name, sc->sc_name);
2997		return (EINVAL);
2998	}
2999	if ((md->md_mflags & ~G_MIRROR_DEVICE_FLAG_MASK) != 0) {
3000		G_MIRROR_DEBUG(1,
3001		    "Invalid device flags on disk %s (device %s), skipping.",
3002		    pp->name, sc->sc_name);
3003		return (EINVAL);
3004	}
3005	if ((md->md_dflags & ~G_MIRROR_DISK_FLAG_MASK) != 0) {
3006		G_MIRROR_DEBUG(1,
3007		    "Invalid disk flags on disk %s (device %s), skipping.",
3008		    pp->name, sc->sc_name);
3009		return (EINVAL);
3010	}
3011	return (0);
3012}
3013
3014int
3015g_mirror_add_disk(struct g_mirror_softc *sc, struct g_provider *pp,
3016    struct g_mirror_metadata *md)
3017{
3018	struct g_mirror_disk *disk;
3019	int error;
3020
3021	g_topology_assert_not();
3022	G_MIRROR_DEBUG(2, "Adding disk %s.", pp->name);
3023
3024	error = g_mirror_check_metadata(sc, pp, md);
3025	if (error != 0)
3026		return (error);
3027
3028	if (md->md_genid < sc->sc_genid) {
3029		G_MIRROR_DEBUG(0, "Component %s (device %s) broken, skipping.",
3030		    pp->name, sc->sc_name);
3031		return (EINVAL);
3032	}
3033
3034	/*
3035	 * If the component disk we're tasting has newer metadata than the
3036	 * STARTING gmirror device, refresh the device from the component.
3037	 */
3038	error = g_mirror_refresh_device(sc, pp, md);
3039	if (error != 0)
3040		return (error);
3041
3042	disk = g_mirror_init_disk(sc, pp, md, &error);
3043	if (disk == NULL)
3044		return (error);
3045	error = g_mirror_event_send(disk, G_MIRROR_DISK_STATE_NEW,
3046	    G_MIRROR_EVENT_WAIT);
3047	if (error != 0)
3048		return (error);
3049	if (md->md_version < G_MIRROR_VERSION) {
3050		G_MIRROR_DEBUG(0, "Upgrading metadata on %s (v%d->v%d).",
3051		    pp->name, md->md_version, G_MIRROR_VERSION);
3052		g_mirror_update_metadata(disk);
3053	}
3054	return (0);
3055}
3056
3057static void
3058g_mirror_destroy_delayed(void *arg, int flag)
3059{
3060	struct g_mirror_softc *sc;
3061	int error;
3062
3063	if (flag == EV_CANCEL) {
3064		G_MIRROR_DEBUG(1, "Destroying canceled.");
3065		return;
3066	}
3067	sc = arg;
3068	g_topology_unlock();
3069	sx_xlock(&sc->sc_lock);
3070	KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) == 0,
3071	    ("DESTROY flag set on %s.", sc->sc_name));
3072	KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_CLOSEWAIT) != 0,
3073	    ("CLOSEWAIT flag not set on %s.", sc->sc_name));
3074	G_MIRROR_DEBUG(1, "Destroying %s (delayed).", sc->sc_name);
3075	error = g_mirror_destroy(sc, G_MIRROR_DESTROY_SOFT);
3076	if (error != 0) {
3077		G_MIRROR_DEBUG(0, "Cannot destroy %s (error=%d).",
3078		    sc->sc_name, error);
3079		sx_xunlock(&sc->sc_lock);
3080	}
3081	g_topology_lock();
3082}
3083
3084static int
3085g_mirror_access(struct g_provider *pp, int acr, int acw, int ace)
3086{
3087	struct g_mirror_softc *sc;
3088	int error = 0;
3089
3090	g_topology_assert();
3091	G_MIRROR_DEBUG(2, "Access request for %s: r%dw%de%d.", pp->name, acr,
3092	    acw, ace);
3093
3094	sc = pp->private;
3095	KASSERT(sc != NULL, ("NULL softc (provider=%s).", pp->name));
3096
3097	g_topology_unlock();
3098	sx_xlock(&sc->sc_lock);
3099	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0 ||
3100	    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_CLOSEWAIT) != 0 ||
3101	    LIST_EMPTY(&sc->sc_disks)) {
3102		if (acr > 0 || acw > 0 || ace > 0)
3103			error = ENXIO;
3104		goto end;
3105	}
3106	sc->sc_provider_open += acr + acw + ace;
3107	if (pp->acw + acw == 0)
3108		g_mirror_idle(sc, 0);
3109	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_CLOSEWAIT) != 0 &&
3110	    sc->sc_provider_open == 0)
3111		g_post_event(g_mirror_destroy_delayed, sc, M_WAITOK, sc, NULL);
3112end:
3113	sx_xunlock(&sc->sc_lock);
3114	g_topology_lock();
3115	return (error);
3116}
3117
3118static void
3119g_mirror_reinit_from_metadata(struct g_mirror_softc *sc,
3120    const struct g_mirror_metadata *md)
3121{
3122
3123	sc->sc_genid = md->md_genid;
3124	sc->sc_syncid = md->md_syncid;
3125
3126	sc->sc_slice = md->md_slice;
3127	sc->sc_balance = md->md_balance;
3128	sc->sc_mediasize = md->md_mediasize;
3129	sc->sc_ndisks = md->md_all;
3130	sc->sc_flags &= ~G_MIRROR_DEVICE_FLAG_MASK;
3131	sc->sc_flags |= (md->md_mflags & G_MIRROR_DEVICE_FLAG_MASK);
3132}
3133
3134struct g_geom *
3135g_mirror_create(struct g_class *mp, const struct g_mirror_metadata *md,
3136    u_int type)
3137{
3138	struct g_mirror_softc *sc;
3139	struct g_geom *gp;
3140	int error, timeout;
3141
3142	g_topology_assert();
3143	G_MIRROR_DEBUG(1, "Creating device %s (id=%u).", md->md_name,
3144	    md->md_mid);
3145
3146	/* One disk is minimum. */
3147	if (md->md_all < 1)
3148		return (NULL);
3149	/*
3150	 * Action geom.
3151	 */
3152	gp = g_new_geomf(mp, "%s", md->md_name);
3153	sc = malloc(sizeof(*sc), M_MIRROR, M_WAITOK | M_ZERO);
3154	gp->start = g_mirror_start;
3155	gp->orphan = g_mirror_orphan;
3156	gp->access = g_mirror_access;
3157	gp->dumpconf = g_mirror_dumpconf;
3158
3159	sc->sc_type = type;
3160	sc->sc_id = md->md_mid;
3161	g_mirror_reinit_from_metadata(sc, md);
3162	sc->sc_sectorsize = md->md_sectorsize;
3163	sc->sc_bump_id = 0;
3164	sc->sc_idle = 1;
3165	sc->sc_last_write = time_uptime;
3166	sc->sc_writes = 0;
3167	sc->sc_refcnt = 1;
3168	sx_init(&sc->sc_lock, "gmirror:lock");
3169	TAILQ_INIT(&sc->sc_queue);
3170	mtx_init(&sc->sc_queue_mtx, "gmirror:queue", NULL, MTX_DEF);
3171	TAILQ_INIT(&sc->sc_regular_delayed);
3172	TAILQ_INIT(&sc->sc_inflight);
3173	TAILQ_INIT(&sc->sc_sync_delayed);
3174	LIST_INIT(&sc->sc_disks);
3175	TAILQ_INIT(&sc->sc_events);
3176	mtx_init(&sc->sc_events_mtx, "gmirror:events", NULL, MTX_DEF);
3177	callout_init(&sc->sc_callout, 1);
3178	mtx_init(&sc->sc_done_mtx, "gmirror:done", NULL, MTX_DEF);
3179	sc->sc_state = G_MIRROR_DEVICE_STATE_STARTING;
3180	gp->softc = sc;
3181	sc->sc_geom = gp;
3182	sc->sc_provider = NULL;
3183	sc->sc_provider_open = 0;
3184	/*
3185	 * Synchronization geom.
3186	 */
3187	gp = g_new_geomf(mp, "%s.sync", md->md_name);
3188	gp->softc = sc;
3189	gp->orphan = g_mirror_orphan;
3190	sc->sc_sync.ds_geom = gp;
3191	sc->sc_sync.ds_ndisks = 0;
3192	error = kproc_create(g_mirror_worker, sc, &sc->sc_worker, 0, 0,
3193	    "g_mirror %s", md->md_name);
3194	if (error != 0) {
3195		G_MIRROR_DEBUG(1, "Cannot create kernel thread for %s.",
3196		    sc->sc_name);
3197		g_destroy_geom(sc->sc_sync.ds_geom);
3198		g_destroy_geom(sc->sc_geom);
3199		g_mirror_free_device(sc);
3200		return (NULL);
3201	}
3202
3203	G_MIRROR_DEBUG(1, "Device %s created (%u components, id=%u).",
3204	    sc->sc_name, sc->sc_ndisks, sc->sc_id);
3205
3206	sc->sc_rootmount = root_mount_hold("GMIRROR");
3207	G_MIRROR_DEBUG(1, "root_mount_hold %p", sc->sc_rootmount);
3208
3209	/*
3210	 * Schedule startup timeout.
3211	 */
3212	timeout = g_mirror_timeout * hz;
3213	sc->sc_timeout_event = malloc(sizeof(struct g_mirror_event), M_MIRROR,
3214	    M_WAITOK);
3215	callout_reset(&sc->sc_callout, timeout, g_mirror_go, sc);
3216	return (sc->sc_geom);
3217}
3218
3219int
3220g_mirror_destroy(struct g_mirror_softc *sc, int how)
3221{
3222	struct g_mirror_disk *disk;
3223
3224	g_topology_assert_not();
3225	sx_assert(&sc->sc_lock, SX_XLOCKED);
3226
3227	if (sc->sc_provider_open != 0) {
3228		switch (how) {
3229		case G_MIRROR_DESTROY_SOFT:
3230			G_MIRROR_DEBUG(1,
3231			    "Device %s is still open (%d).", sc->sc_name,
3232			    sc->sc_provider_open);
3233			return (EBUSY);
3234		case G_MIRROR_DESTROY_DELAYED:
3235			G_MIRROR_DEBUG(1,
3236			    "Device %s will be destroyed on last close.",
3237			    sc->sc_name);
3238			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
3239				if (disk->d_state ==
3240				    G_MIRROR_DISK_STATE_SYNCHRONIZING) {
3241					g_mirror_sync_stop(disk, 1);
3242				}
3243			}
3244			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_CLOSEWAIT;
3245			return (EBUSY);
3246		case G_MIRROR_DESTROY_HARD:
3247			G_MIRROR_DEBUG(1, "Device %s is still open, so it "
3248			    "can't be definitely removed.", sc->sc_name);
3249		}
3250	}
3251
3252	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
3253		sx_xunlock(&sc->sc_lock);
3254		return (0);
3255	}
3256	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
3257	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DRAIN;
3258	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
3259	sx_xunlock(&sc->sc_lock);
3260	mtx_lock(&sc->sc_queue_mtx);
3261	wakeup(sc);
3262	mtx_unlock(&sc->sc_queue_mtx);
3263	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, &sc->sc_worker);
3264	while (sc->sc_worker != NULL)
3265		tsleep(&sc->sc_worker, PRIBIO, "m:destroy", hz / 5);
3266	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, &sc->sc_worker);
3267	sx_xlock(&sc->sc_lock);
3268	g_mirror_destroy_device(sc);
3269	return (0);
3270}
3271
3272static void
3273g_mirror_taste_orphan(struct g_consumer *cp)
3274{
3275
3276	KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
3277	    cp->provider->name));
3278}
3279
3280static struct g_geom *
3281g_mirror_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
3282{
3283	struct g_mirror_metadata md;
3284	struct g_mirror_softc *sc;
3285	struct g_consumer *cp;
3286	struct g_geom *gp;
3287	int error;
3288
3289	g_topology_assert();
3290	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
3291	G_MIRROR_DEBUG(2, "Tasting %s.", pp->name);
3292
3293	gp = g_new_geomf(mp, "mirror:taste");
3294	/*
3295	 * This orphan function should be never called.
3296	 */
3297	gp->orphan = g_mirror_taste_orphan;
3298	cp = g_new_consumer(gp);
3299	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
3300	error = g_attach(cp, pp);
3301	if (error == 0) {
3302		error = g_mirror_read_metadata(cp, &md);
3303		g_detach(cp);
3304	}
3305	g_destroy_consumer(cp);
3306	g_destroy_geom(gp);
3307	if (error != 0)
3308		return (NULL);
3309	gp = NULL;
3310
3311	if (md.md_provider[0] != '\0' &&
3312	    !g_compare_names(md.md_provider, pp->name))
3313		return (NULL);
3314	if (md.md_provsize != 0 && md.md_provsize != pp->mediasize)
3315		return (NULL);
3316	if ((md.md_dflags & G_MIRROR_DISK_FLAG_INACTIVE) != 0) {
3317		G_MIRROR_DEBUG(0,
3318		    "Device %s: provider %s marked as inactive, skipping.",
3319		    md.md_name, pp->name);
3320		return (NULL);
3321	}
3322	if (g_mirror_debug >= 2)
3323		mirror_metadata_dump(&md);
3324
3325	/*
3326	 * Let's check if device already exists.
3327	 */
3328	sc = NULL;
3329	LIST_FOREACH(gp, &mp->geom, geom) {
3330		sc = gp->softc;
3331		if (sc == NULL)
3332			continue;
3333		if (sc->sc_type != G_MIRROR_TYPE_AUTOMATIC)
3334			continue;
3335		if (sc->sc_sync.ds_geom == gp)
3336			continue;
3337		if (strcmp(md.md_name, sc->sc_name) != 0)
3338			continue;
3339		if (md.md_mid != sc->sc_id) {
3340			G_MIRROR_DEBUG(0, "Device %s already configured.",
3341			    sc->sc_name);
3342			return (NULL);
3343		}
3344		break;
3345	}
3346	if (gp == NULL) {
3347		gp = g_mirror_create(mp, &md, G_MIRROR_TYPE_AUTOMATIC);
3348		if (gp == NULL) {
3349			G_MIRROR_DEBUG(0, "Cannot create device %s.",
3350			    md.md_name);
3351			return (NULL);
3352		}
3353		sc = gp->softc;
3354	}
3355	G_MIRROR_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
3356	g_topology_unlock();
3357	sx_xlock(&sc->sc_lock);
3358	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_TASTING;
3359	error = g_mirror_add_disk(sc, pp, &md);
3360	sc->sc_flags &= ~G_MIRROR_DEVICE_FLAG_TASTING;
3361	if (error != 0) {
3362		G_MIRROR_DEBUG(0, "Cannot add disk %s to %s (error=%d).",
3363		    pp->name, gp->name, error);
3364		if (LIST_EMPTY(&sc->sc_disks)) {
3365			g_cancel_event(sc);
3366			g_mirror_destroy(sc, G_MIRROR_DESTROY_HARD);
3367			g_topology_lock();
3368			return (NULL);
3369		}
3370		gp = NULL;
3371	}
3372	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
3373		g_mirror_destroy(sc, G_MIRROR_DESTROY_HARD);
3374		g_topology_lock();
3375		return (NULL);
3376	}
3377	sx_xunlock(&sc->sc_lock);
3378	g_topology_lock();
3379	return (gp);
3380}
3381
3382static void
3383g_mirror_resize(struct g_consumer *cp)
3384{
3385	struct g_mirror_disk *disk;
3386
3387	g_topology_assert();
3388	g_trace(G_T_TOPOLOGY, "%s(%s)", __func__, cp->provider->name);
3389
3390	disk = cp->private;
3391	if (disk == NULL)
3392		return;
3393	g_topology_unlock();
3394	g_mirror_update_metadata(disk);
3395	g_topology_lock();
3396}
3397
3398static int
3399g_mirror_destroy_geom(struct gctl_req *req __unused,
3400    struct g_class *mp __unused, struct g_geom *gp)
3401{
3402	struct g_mirror_softc *sc;
3403	int error;
3404
3405	g_topology_unlock();
3406	sc = gp->softc;
3407	sx_xlock(&sc->sc_lock);
3408	g_cancel_event(sc);
3409	error = g_mirror_destroy(gp->softc, G_MIRROR_DESTROY_SOFT);
3410	if (error != 0)
3411		sx_xunlock(&sc->sc_lock);
3412	g_topology_lock();
3413	return (error);
3414}
3415
3416static void
3417g_mirror_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
3418    struct g_consumer *cp, struct g_provider *pp)
3419{
3420	struct g_mirror_softc *sc;
3421
3422	g_topology_assert();
3423
3424	sc = gp->softc;
3425	if (sc == NULL)
3426		return;
3427	/* Skip synchronization geom. */
3428	if (gp == sc->sc_sync.ds_geom)
3429		return;
3430	if (pp != NULL) {
3431		/* Nothing here. */
3432	} else if (cp != NULL) {
3433		struct g_mirror_disk *disk;
3434
3435		disk = cp->private;
3436		if (disk == NULL)
3437			return;
3438		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)disk->d_id);
3439		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
3440			sbuf_printf(sb, "%s<Synchronized>", indent);
3441			if (disk->d_sync.ds_offset == 0)
3442				sbuf_cat(sb, "0%");
3443			else
3444				sbuf_printf(sb, "%u%%",
3445				    (u_int)((disk->d_sync.ds_offset * 100) /
3446				    sc->sc_mediasize));
3447			sbuf_cat(sb, "</Synchronized>\n");
3448			if (disk->d_sync.ds_offset > 0)
3449				sbuf_printf(sb, "%s<BytesSynced>%jd"
3450				    "</BytesSynced>\n", indent,
3451				    (intmax_t)disk->d_sync.ds_offset);
3452		}
3453		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent,
3454		    disk->d_sync.ds_syncid);
3455		sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent,
3456		    disk->d_genid);
3457		sbuf_printf(sb, "%s<Flags>", indent);
3458		if (disk->d_flags == 0)
3459			sbuf_cat(sb, "NONE");
3460		else {
3461			int first = 1;
3462
3463#define	ADD_FLAG(flag, name)	do {					\
3464	if ((disk->d_flags & (flag)) != 0) {				\
3465		if (!first)						\
3466			sbuf_cat(sb, ", ");				\
3467		else							\
3468			first = 0;					\
3469		sbuf_cat(sb, name);					\
3470	}								\
3471} while (0)
3472			ADD_FLAG(G_MIRROR_DISK_FLAG_DIRTY, "DIRTY");
3473			ADD_FLAG(G_MIRROR_DISK_FLAG_HARDCODED, "HARDCODED");
3474			ADD_FLAG(G_MIRROR_DISK_FLAG_INACTIVE, "INACTIVE");
3475			ADD_FLAG(G_MIRROR_DISK_FLAG_SYNCHRONIZING,
3476			    "SYNCHRONIZING");
3477			ADD_FLAG(G_MIRROR_DISK_FLAG_FORCE_SYNC, "FORCE_SYNC");
3478			ADD_FLAG(G_MIRROR_DISK_FLAG_BROKEN, "BROKEN");
3479#undef	ADD_FLAG
3480		}
3481		sbuf_cat(sb, "</Flags>\n");
3482		sbuf_printf(sb, "%s<Priority>%u</Priority>\n", indent,
3483		    disk->d_priority);
3484		sbuf_printf(sb, "%s<State>%s</State>\n", indent,
3485		    g_mirror_disk_state2str(disk->d_state));
3486	} else {
3487		sbuf_printf(sb, "%s<Type>", indent);
3488		switch (sc->sc_type) {
3489		case G_MIRROR_TYPE_AUTOMATIC:
3490			sbuf_cat(sb, "AUTOMATIC");
3491			break;
3492		case G_MIRROR_TYPE_MANUAL:
3493			sbuf_cat(sb, "MANUAL");
3494			break;
3495		default:
3496			sbuf_cat(sb, "UNKNOWN");
3497			break;
3498		}
3499		sbuf_cat(sb, "</Type>\n");
3500		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id);
3501		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, sc->sc_syncid);
3502		sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent, sc->sc_genid);
3503		sbuf_printf(sb, "%s<Flags>", indent);
3504		if (sc->sc_flags == 0)
3505			sbuf_cat(sb, "NONE");
3506		else {
3507			int first = 1;
3508
3509#define	ADD_FLAG(flag, name)	do {					\
3510	if ((sc->sc_flags & (flag)) != 0) {				\
3511		if (!first)						\
3512			sbuf_cat(sb, ", ");				\
3513		else							\
3514			first = 0;					\
3515		sbuf_cat(sb, name);					\
3516	}								\
3517} while (0)
3518			ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOFAILSYNC, "NOFAILSYNC");
3519			ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOAUTOSYNC, "NOAUTOSYNC");
3520#undef	ADD_FLAG
3521		}
3522		sbuf_cat(sb, "</Flags>\n");
3523		sbuf_printf(sb, "%s<Slice>%u</Slice>\n", indent,
3524		    (u_int)sc->sc_slice);
3525		sbuf_printf(sb, "%s<Balance>%s</Balance>\n", indent,
3526		    balance_name(sc->sc_balance));
3527		sbuf_printf(sb, "%s<Components>%u</Components>\n", indent,
3528		    sc->sc_ndisks);
3529		sbuf_printf(sb, "%s<State>", indent);
3530		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
3531			sbuf_printf(sb, "%s", "STARTING");
3532		else if (sc->sc_ndisks ==
3533		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE))
3534			sbuf_printf(sb, "%s", "COMPLETE");
3535		else
3536			sbuf_printf(sb, "%s", "DEGRADED");
3537		sbuf_cat(sb, "</State>\n");
3538	}
3539}
3540
3541static void
3542g_mirror_shutdown_post_sync(void *arg, int howto)
3543{
3544	struct g_class *mp;
3545	struct g_geom *gp, *gp2;
3546	struct g_mirror_softc *sc;
3547	int error;
3548
3549	if ((howto & RB_NOSYNC) != 0)
3550		return;
3551
3552	mp = arg;
3553	g_topology_lock();
3554	g_mirror_shutdown = 1;
3555	LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
3556		if ((sc = gp->softc) == NULL)
3557			continue;
3558		/* Skip synchronization geom. */
3559		if (gp == sc->sc_sync.ds_geom)
3560			continue;
3561		g_topology_unlock();
3562		sx_xlock(&sc->sc_lock);
3563		g_mirror_idle(sc, -1);
3564		g_cancel_event(sc);
3565		error = g_mirror_destroy(sc, G_MIRROR_DESTROY_DELAYED);
3566		if (error != 0)
3567			sx_xunlock(&sc->sc_lock);
3568		g_topology_lock();
3569	}
3570	g_topology_unlock();
3571}
3572
3573static void
3574g_mirror_init(struct g_class *mp)
3575{
3576
3577	g_mirror_post_sync = EVENTHANDLER_REGISTER(shutdown_post_sync,
3578	    g_mirror_shutdown_post_sync, mp, SHUTDOWN_PRI_FIRST);
3579	if (g_mirror_post_sync == NULL)
3580		G_MIRROR_DEBUG(0, "Warning! Cannot register shutdown event.");
3581}
3582
3583static void
3584g_mirror_fini(struct g_class *mp)
3585{
3586
3587	if (g_mirror_post_sync != NULL)
3588		EVENTHANDLER_DEREGISTER(shutdown_post_sync, g_mirror_post_sync);
3589}
3590
3591/*
3592 * Refresh the mirror device's metadata when gmirror encounters a newer
3593 * generation as the individual components are being added to the mirror set.
3594 */
3595static int
3596g_mirror_refresh_device(struct g_mirror_softc *sc, const struct g_provider *pp,
3597    const struct g_mirror_metadata *md)
3598{
3599
3600	g_topology_assert_not();
3601	sx_assert(&sc->sc_lock, SX_XLOCKED);
3602
3603	KASSERT(sc->sc_genid <= md->md_genid,
3604	    ("%s: attempted to refresh from stale component %s (device %s) "
3605	    "(%u < %u).", __func__, pp->name, sc->sc_name, md->md_genid,
3606	    sc->sc_genid));
3607
3608	if (sc->sc_genid > md->md_genid || (sc->sc_genid == md->md_genid &&
3609	    sc->sc_syncid >= md->md_syncid))
3610		return (0);
3611
3612	G_MIRROR_DEBUG(0, "Found newer version for device %s (genid: curr=%u "
3613	    "new=%u; syncid: curr=%u new=%u; ndisks: curr=%u new=%u; "
3614	    "provider=%s).", sc->sc_name, sc->sc_genid, md->md_genid,
3615	    sc->sc_syncid, md->md_syncid, sc->sc_ndisks, md->md_all, pp->name);
3616
3617	if (sc->sc_state != G_MIRROR_DEVICE_STATE_STARTING) {
3618		/* Probable data corruption detected */
3619		G_MIRROR_DEBUG(0, "Cannot refresh metadata in %s state "
3620		    "(device=%s genid=%u). A stale mirror device was launched.",
3621		    g_mirror_device_state2str(sc->sc_state), sc->sc_name,
3622		    sc->sc_genid);
3623		return (EINVAL);
3624	}
3625
3626	/* Update softc */
3627	g_mirror_reinit_from_metadata(sc, md);
3628
3629	G_MIRROR_DEBUG(1, "Refresh device %s (id=%u, state=%s) from disk %s "
3630	    "(genid=%u syncid=%u md_all=%u).", sc->sc_name, md->md_mid,
3631	    g_mirror_device_state2str(sc->sc_state), pp->name, md->md_genid,
3632	    md->md_syncid, (unsigned)md->md_all);
3633
3634	return (0);
3635}
3636
3637DECLARE_GEOM_CLASS(g_mirror_class, g_mirror);
3638MODULE_VERSION(geom_mirror, 0);
3639