1/*-
2 * Copyright (c) 2004-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: stable/10/sys/geom/mirror/g_mirror.c 324589 2017-10-13 09:14:05Z avg $");
29
30#include <sys/param.h>
31#include <sys/systm.h>
32#include <sys/kernel.h>
33#include <sys/module.h>
34#include <sys/limits.h>
35#include <sys/lock.h>
36#include <sys/mutex.h>
37#include <sys/bio.h>
38#include <sys/sbuf.h>
39#include <sys/sysctl.h>
40#include <sys/malloc.h>
41#include <sys/eventhandler.h>
42#include <vm/uma.h>
43#include <geom/geom.h>
44#include <sys/proc.h>
45#include <sys/kthread.h>
46#include <sys/sched.h>
47#include <geom/mirror/g_mirror.h>
48
49FEATURE(geom_mirror, "GEOM mirroring support");
50
51static MALLOC_DEFINE(M_MIRROR, "mirror_data", "GEOM_MIRROR Data");
52
53SYSCTL_DECL(_kern_geom);
54static SYSCTL_NODE(_kern_geom, OID_AUTO, mirror, CTLFLAG_RW, 0,
55    "GEOM_MIRROR stuff");
56u_int g_mirror_debug = 0;
57TUNABLE_INT("kern.geom.mirror.debug", &g_mirror_debug);
58SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, debug, CTLFLAG_RW, &g_mirror_debug, 0,
59    "Debug level");
60static u_int g_mirror_timeout = 4;
61TUNABLE_INT("kern.geom.mirror.timeout", &g_mirror_timeout);
62SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, timeout, CTLFLAG_RW, &g_mirror_timeout,
63    0, "Time to wait on all mirror components");
64static u_int g_mirror_idletime = 5;
65TUNABLE_INT("kern.geom.mirror.idletime", &g_mirror_idletime);
66SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, idletime, CTLFLAG_RW,
67    &g_mirror_idletime, 0, "Mark components as clean when idling");
68static u_int g_mirror_disconnect_on_failure = 1;
69TUNABLE_INT("kern.geom.mirror.disconnect_on_failure",
70    &g_mirror_disconnect_on_failure);
71SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, disconnect_on_failure, CTLFLAG_RW,
72    &g_mirror_disconnect_on_failure, 0, "Disconnect component on I/O failure.");
73static u_int g_mirror_syncreqs = 2;
74TUNABLE_INT("kern.geom.mirror.sync_requests", &g_mirror_syncreqs);
75SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, sync_requests, CTLFLAG_RDTUN,
76    &g_mirror_syncreqs, 0, "Parallel synchronization I/O requests.");
77
78#define	MSLEEP(ident, mtx, priority, wmesg, timeout)	do {		\
79	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, (ident));	\
80	msleep((ident), (mtx), (priority), (wmesg), (timeout));		\
81	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, (ident));	\
82} while (0)
83
84static eventhandler_tag g_mirror_post_sync = NULL;
85static int g_mirror_shutdown = 0;
86
87static g_ctl_destroy_geom_t g_mirror_destroy_geom;
88static g_taste_t g_mirror_taste;
89static g_init_t g_mirror_init;
90static g_fini_t g_mirror_fini;
91static g_provgone_t g_mirror_providergone;
92static g_resize_t g_mirror_resize;
93
94struct g_class g_mirror_class = {
95	.name = G_MIRROR_CLASS_NAME,
96	.version = G_VERSION,
97	.ctlreq = g_mirror_config,
98	.taste = g_mirror_taste,
99	.destroy_geom = g_mirror_destroy_geom,
100	.init = g_mirror_init,
101	.fini = g_mirror_fini,
102	.providergone = g_mirror_providergone,
103	.resize = g_mirror_resize
104};
105
106
107static void g_mirror_destroy_provider(struct g_mirror_softc *sc);
108static int g_mirror_update_disk(struct g_mirror_disk *disk, u_int state);
109static void g_mirror_update_device(struct g_mirror_softc *sc, boolean_t force);
110static void g_mirror_dumpconf(struct sbuf *sb, const char *indent,
111    struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp);
112static void g_mirror_sync_stop(struct g_mirror_disk *disk, int type);
113static void g_mirror_register_request(struct bio *bp);
114static void g_mirror_sync_release(struct g_mirror_softc *sc);
115
116
117static const char *
118g_mirror_disk_state2str(int state)
119{
120
121	switch (state) {
122	case G_MIRROR_DISK_STATE_NONE:
123		return ("NONE");
124	case G_MIRROR_DISK_STATE_NEW:
125		return ("NEW");
126	case G_MIRROR_DISK_STATE_ACTIVE:
127		return ("ACTIVE");
128	case G_MIRROR_DISK_STATE_STALE:
129		return ("STALE");
130	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
131		return ("SYNCHRONIZING");
132	case G_MIRROR_DISK_STATE_DISCONNECTED:
133		return ("DISCONNECTED");
134	case G_MIRROR_DISK_STATE_DESTROY:
135		return ("DESTROY");
136	default:
137		return ("INVALID");
138	}
139}
140
141static const char *
142g_mirror_device_state2str(int state)
143{
144
145	switch (state) {
146	case G_MIRROR_DEVICE_STATE_STARTING:
147		return ("STARTING");
148	case G_MIRROR_DEVICE_STATE_RUNNING:
149		return ("RUNNING");
150	default:
151		return ("INVALID");
152	}
153}
154
155static const char *
156g_mirror_get_diskname(struct g_mirror_disk *disk)
157{
158
159	if (disk->d_consumer == NULL || disk->d_consumer->provider == NULL)
160		return ("[unknown]");
161	return (disk->d_name);
162}
163
164/*
165 * --- Events handling functions ---
166 * Events in geom_mirror are used to maintain disks and device status
167 * from one thread to simplify locking.
168 */
169static void
170g_mirror_event_free(struct g_mirror_event *ep)
171{
172
173	free(ep, M_MIRROR);
174}
175
176int
177g_mirror_event_send(void *arg, int state, int flags)
178{
179	struct g_mirror_softc *sc;
180	struct g_mirror_disk *disk;
181	struct g_mirror_event *ep;
182	int error;
183
184	ep = malloc(sizeof(*ep), M_MIRROR, M_WAITOK);
185	G_MIRROR_DEBUG(4, "%s: Sending event %p.", __func__, ep);
186	if ((flags & G_MIRROR_EVENT_DEVICE) != 0) {
187		disk = NULL;
188		sc = arg;
189	} else {
190		disk = arg;
191		sc = disk->d_softc;
192	}
193	ep->e_disk = disk;
194	ep->e_state = state;
195	ep->e_flags = flags;
196	ep->e_error = 0;
197	mtx_lock(&sc->sc_events_mtx);
198	TAILQ_INSERT_TAIL(&sc->sc_events, ep, e_next);
199	mtx_unlock(&sc->sc_events_mtx);
200	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
201	mtx_lock(&sc->sc_queue_mtx);
202	wakeup(sc);
203	mtx_unlock(&sc->sc_queue_mtx);
204	if ((flags & G_MIRROR_EVENT_DONTWAIT) != 0)
205		return (0);
206	sx_assert(&sc->sc_lock, SX_XLOCKED);
207	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, ep);
208	sx_xunlock(&sc->sc_lock);
209	while ((ep->e_flags & G_MIRROR_EVENT_DONE) == 0) {
210		mtx_lock(&sc->sc_events_mtx);
211		MSLEEP(ep, &sc->sc_events_mtx, PRIBIO | PDROP, "m:event",
212		    hz * 5);
213	}
214	error = ep->e_error;
215	g_mirror_event_free(ep);
216	sx_xlock(&sc->sc_lock);
217	return (error);
218}
219
220static struct g_mirror_event *
221g_mirror_event_get(struct g_mirror_softc *sc)
222{
223	struct g_mirror_event *ep;
224
225	mtx_lock(&sc->sc_events_mtx);
226	ep = TAILQ_FIRST(&sc->sc_events);
227	mtx_unlock(&sc->sc_events_mtx);
228	return (ep);
229}
230
231static void
232g_mirror_event_remove(struct g_mirror_softc *sc, struct g_mirror_event *ep)
233{
234
235	mtx_lock(&sc->sc_events_mtx);
236	TAILQ_REMOVE(&sc->sc_events, ep, e_next);
237	mtx_unlock(&sc->sc_events_mtx);
238}
239
240static void
241g_mirror_event_cancel(struct g_mirror_disk *disk)
242{
243	struct g_mirror_softc *sc;
244	struct g_mirror_event *ep, *tmpep;
245
246	sc = disk->d_softc;
247	sx_assert(&sc->sc_lock, SX_XLOCKED);
248
249	mtx_lock(&sc->sc_events_mtx);
250	TAILQ_FOREACH_SAFE(ep, &sc->sc_events, e_next, tmpep) {
251		if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0)
252			continue;
253		if (ep->e_disk != disk)
254			continue;
255		TAILQ_REMOVE(&sc->sc_events, ep, e_next);
256		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
257			g_mirror_event_free(ep);
258		else {
259			ep->e_error = ECANCELED;
260			wakeup(ep);
261		}
262	}
263	mtx_unlock(&sc->sc_events_mtx);
264}
265
266/*
267 * Return the number of disks in given state.
268 * If state is equal to -1, count all connected disks.
269 */
270u_int
271g_mirror_ndisks(struct g_mirror_softc *sc, int state)
272{
273	struct g_mirror_disk *disk;
274	u_int n = 0;
275
276	sx_assert(&sc->sc_lock, SX_LOCKED);
277
278	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
279		if (state == -1 || disk->d_state == state)
280			n++;
281	}
282	return (n);
283}
284
285/*
286 * Find a disk in mirror by its disk ID.
287 */
288static struct g_mirror_disk *
289g_mirror_id2disk(struct g_mirror_softc *sc, uint32_t id)
290{
291	struct g_mirror_disk *disk;
292
293	sx_assert(&sc->sc_lock, SX_XLOCKED);
294
295	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
296		if (disk->d_id == id)
297			return (disk);
298	}
299	return (NULL);
300}
301
302static u_int
303g_mirror_nrequests(struct g_mirror_softc *sc, struct g_consumer *cp)
304{
305	struct bio *bp;
306	u_int nreqs = 0;
307
308	mtx_lock(&sc->sc_queue_mtx);
309	TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) {
310		if (bp->bio_from == cp)
311			nreqs++;
312	}
313	mtx_unlock(&sc->sc_queue_mtx);
314	return (nreqs);
315}
316
317static int
318g_mirror_is_busy(struct g_mirror_softc *sc, struct g_consumer *cp)
319{
320
321	if (cp->index > 0) {
322		G_MIRROR_DEBUG(2,
323		    "I/O requests for %s exist, can't destroy it now.",
324		    cp->provider->name);
325		return (1);
326	}
327	if (g_mirror_nrequests(sc, cp) > 0) {
328		G_MIRROR_DEBUG(2,
329		    "I/O requests for %s in queue, can't destroy it now.",
330		    cp->provider->name);
331		return (1);
332	}
333	return (0);
334}
335
336static void
337g_mirror_destroy_consumer(void *arg, int flags __unused)
338{
339	struct g_consumer *cp;
340
341	g_topology_assert();
342
343	cp = arg;
344	G_MIRROR_DEBUG(1, "Consumer %s destroyed.", cp->provider->name);
345	g_detach(cp);
346	g_destroy_consumer(cp);
347}
348
349static void
350g_mirror_kill_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
351{
352	struct g_provider *pp;
353	int retaste_wait;
354
355	g_topology_assert();
356
357	cp->private = NULL;
358	if (g_mirror_is_busy(sc, cp))
359		return;
360	pp = cp->provider;
361	retaste_wait = 0;
362	if (cp->acw == 1) {
363		if ((pp->geom->flags & G_GEOM_WITHER) == 0)
364			retaste_wait = 1;
365	}
366	G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", pp->name, -cp->acr,
367	    -cp->acw, -cp->ace, 0);
368	if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
369		g_access(cp, -cp->acr, -cp->acw, -cp->ace);
370	if (retaste_wait) {
371		/*
372		 * After retaste event was send (inside g_access()), we can send
373		 * event to detach and destroy consumer.
374		 * A class, which has consumer to the given provider connected
375		 * will not receive retaste event for the provider.
376		 * This is the way how I ignore retaste events when I close
377		 * consumers opened for write: I detach and destroy consumer
378		 * after retaste event is sent.
379		 */
380		g_post_event(g_mirror_destroy_consumer, cp, M_WAITOK, NULL);
381		return;
382	}
383	G_MIRROR_DEBUG(1, "Consumer %s destroyed.", pp->name);
384	g_detach(cp);
385	g_destroy_consumer(cp);
386}
387
388static int
389g_mirror_connect_disk(struct g_mirror_disk *disk, struct g_provider *pp)
390{
391	struct g_consumer *cp;
392	int error;
393
394	g_topology_assert_not();
395	KASSERT(disk->d_consumer == NULL,
396	    ("Disk already connected (device %s).", disk->d_softc->sc_name));
397
398	g_topology_lock();
399	cp = g_new_consumer(disk->d_softc->sc_geom);
400	cp->flags |= G_CF_DIRECT_RECEIVE;
401	error = g_attach(cp, pp);
402	if (error != 0) {
403		g_destroy_consumer(cp);
404		g_topology_unlock();
405		return (error);
406	}
407	error = g_access(cp, 1, 1, 1);
408	if (error != 0) {
409		g_detach(cp);
410		g_destroy_consumer(cp);
411		g_topology_unlock();
412		G_MIRROR_DEBUG(0, "Cannot open consumer %s (error=%d).",
413		    pp->name, error);
414		return (error);
415	}
416	g_topology_unlock();
417	disk->d_consumer = cp;
418	disk->d_consumer->private = disk;
419	disk->d_consumer->index = 0;
420
421	G_MIRROR_DEBUG(2, "Disk %s connected.", g_mirror_get_diskname(disk));
422	return (0);
423}
424
425static void
426g_mirror_disconnect_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
427{
428
429	g_topology_assert();
430
431	if (cp == NULL)
432		return;
433	if (cp->provider != NULL)
434		g_mirror_kill_consumer(sc, cp);
435	else
436		g_destroy_consumer(cp);
437}
438
439/*
440 * Initialize disk. This means allocate memory, create consumer, attach it
441 * to the provider and open access (r1w1e1) to it.
442 */
443static struct g_mirror_disk *
444g_mirror_init_disk(struct g_mirror_softc *sc, struct g_provider *pp,
445    struct g_mirror_metadata *md, int *errorp)
446{
447	struct g_mirror_disk *disk;
448	int i, error;
449
450	disk = malloc(sizeof(*disk), M_MIRROR, M_NOWAIT | M_ZERO);
451	if (disk == NULL) {
452		error = ENOMEM;
453		goto fail;
454	}
455	disk->d_softc = sc;
456	error = g_mirror_connect_disk(disk, pp);
457	if (error != 0)
458		goto fail;
459	disk->d_id = md->md_did;
460	disk->d_state = G_MIRROR_DISK_STATE_NONE;
461	disk->d_priority = md->md_priority;
462	disk->d_flags = md->md_dflags;
463	error = g_getattr("GEOM::candelete", disk->d_consumer, &i);
464	if (error == 0 && i != 0)
465		disk->d_flags |= G_MIRROR_DISK_FLAG_CANDELETE;
466	if (md->md_provider[0] != '\0')
467		disk->d_flags |= G_MIRROR_DISK_FLAG_HARDCODED;
468	disk->d_sync.ds_consumer = NULL;
469	disk->d_sync.ds_offset = md->md_sync_offset;
470	disk->d_sync.ds_offset_done = md->md_sync_offset;
471	disk->d_genid = md->md_genid;
472	disk->d_sync.ds_syncid = md->md_syncid;
473	if (errorp != NULL)
474		*errorp = 0;
475	return (disk);
476fail:
477	if (errorp != NULL)
478		*errorp = error;
479	if (disk != NULL)
480		free(disk, M_MIRROR);
481	return (NULL);
482}
483
484static void
485g_mirror_destroy_disk(struct g_mirror_disk *disk)
486{
487	struct g_mirror_softc *sc;
488
489	g_topology_assert_not();
490	sc = disk->d_softc;
491	sx_assert(&sc->sc_lock, SX_XLOCKED);
492
493	LIST_REMOVE(disk, d_next);
494	g_mirror_event_cancel(disk);
495	if (sc->sc_hint == disk)
496		sc->sc_hint = NULL;
497	switch (disk->d_state) {
498	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
499		g_mirror_sync_stop(disk, 1);
500		/* FALLTHROUGH */
501	case G_MIRROR_DISK_STATE_NEW:
502	case G_MIRROR_DISK_STATE_STALE:
503	case G_MIRROR_DISK_STATE_ACTIVE:
504		g_topology_lock();
505		g_mirror_disconnect_consumer(sc, disk->d_consumer);
506		g_topology_unlock();
507		free(disk, M_MIRROR);
508		break;
509	default:
510		KASSERT(0 == 1, ("Wrong disk state (%s, %s).",
511		    g_mirror_get_diskname(disk),
512		    g_mirror_disk_state2str(disk->d_state)));
513	}
514}
515
516static void
517g_mirror_free_device(struct g_mirror_softc *sc)
518{
519
520	mtx_destroy(&sc->sc_queue_mtx);
521	mtx_destroy(&sc->sc_events_mtx);
522	mtx_destroy(&sc->sc_done_mtx);
523	sx_destroy(&sc->sc_lock);
524	free(sc, M_MIRROR);
525}
526
527static void
528g_mirror_providergone(struct g_provider *pp)
529{
530	struct g_mirror_softc *sc = pp->private;
531
532	if ((--sc->sc_refcnt) == 0)
533		g_mirror_free_device(sc);
534}
535
536static void
537g_mirror_destroy_device(struct g_mirror_softc *sc)
538{
539	struct g_mirror_disk *disk;
540	struct g_mirror_event *ep;
541	struct g_geom *gp;
542	struct g_consumer *cp, *tmpcp;
543
544	g_topology_assert_not();
545	sx_assert(&sc->sc_lock, SX_XLOCKED);
546
547	gp = sc->sc_geom;
548	if (sc->sc_provider != NULL)
549		g_mirror_destroy_provider(sc);
550	for (disk = LIST_FIRST(&sc->sc_disks); disk != NULL;
551	    disk = LIST_FIRST(&sc->sc_disks)) {
552		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
553		g_mirror_update_metadata(disk);
554		g_mirror_destroy_disk(disk);
555	}
556	while ((ep = g_mirror_event_get(sc)) != NULL) {
557		g_mirror_event_remove(sc, ep);
558		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
559			g_mirror_event_free(ep);
560		else {
561			ep->e_error = ECANCELED;
562			ep->e_flags |= G_MIRROR_EVENT_DONE;
563			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, ep);
564			mtx_lock(&sc->sc_events_mtx);
565			wakeup(ep);
566			mtx_unlock(&sc->sc_events_mtx);
567		}
568	}
569	callout_drain(&sc->sc_callout);
570
571	g_topology_lock();
572	LIST_FOREACH_SAFE(cp, &sc->sc_sync.ds_geom->consumer, consumer, tmpcp) {
573		g_mirror_disconnect_consumer(sc, cp);
574	}
575	g_wither_geom(sc->sc_sync.ds_geom, ENXIO);
576	G_MIRROR_DEBUG(0, "Device %s destroyed.", gp->name);
577	g_wither_geom(gp, ENXIO);
578	sx_xunlock(&sc->sc_lock);
579	if ((--sc->sc_refcnt) == 0)
580		g_mirror_free_device(sc);
581	g_topology_unlock();
582}
583
584static void
585g_mirror_orphan(struct g_consumer *cp)
586{
587	struct g_mirror_disk *disk;
588
589	g_topology_assert();
590
591	disk = cp->private;
592	if (disk == NULL)
593		return;
594	disk->d_softc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
595	g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
596	    G_MIRROR_EVENT_DONTWAIT);
597}
598
599/*
600 * Function should return the next active disk on the list.
601 * It is possible that it will be the same disk as given.
602 * If there are no active disks on list, NULL is returned.
603 */
604static __inline struct g_mirror_disk *
605g_mirror_find_next(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
606{
607	struct g_mirror_disk *dp;
608
609	for (dp = LIST_NEXT(disk, d_next); dp != disk;
610	    dp = LIST_NEXT(dp, d_next)) {
611		if (dp == NULL)
612			dp = LIST_FIRST(&sc->sc_disks);
613		if (dp->d_state == G_MIRROR_DISK_STATE_ACTIVE)
614			break;
615	}
616	if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
617		return (NULL);
618	return (dp);
619}
620
621static struct g_mirror_disk *
622g_mirror_get_disk(struct g_mirror_softc *sc)
623{
624	struct g_mirror_disk *disk;
625
626	if (sc->sc_hint == NULL) {
627		sc->sc_hint = LIST_FIRST(&sc->sc_disks);
628		if (sc->sc_hint == NULL)
629			return (NULL);
630	}
631	disk = sc->sc_hint;
632	if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) {
633		disk = g_mirror_find_next(sc, disk);
634		if (disk == NULL)
635			return (NULL);
636	}
637	sc->sc_hint = g_mirror_find_next(sc, disk);
638	return (disk);
639}
640
641static int
642g_mirror_write_metadata(struct g_mirror_disk *disk,
643    struct g_mirror_metadata *md)
644{
645	struct g_mirror_softc *sc;
646	struct g_consumer *cp;
647	off_t offset, length;
648	u_char *sector;
649	int error = 0;
650
651	g_topology_assert_not();
652	sc = disk->d_softc;
653	sx_assert(&sc->sc_lock, SX_LOCKED);
654
655	cp = disk->d_consumer;
656	KASSERT(cp != NULL, ("NULL consumer (%s).", sc->sc_name));
657	KASSERT(cp->provider != NULL, ("NULL provider (%s).", sc->sc_name));
658	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
659	    ("Consumer %s closed? (r%dw%de%d).", cp->provider->name, cp->acr,
660	    cp->acw, cp->ace));
661	length = cp->provider->sectorsize;
662	offset = cp->provider->mediasize - length;
663	sector = malloc((size_t)length, M_MIRROR, M_WAITOK | M_ZERO);
664	if (md != NULL &&
665	    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_WIPE) == 0) {
666		/*
667		 * Handle the case, when the size of parent provider reduced.
668		 */
669		if (offset < md->md_mediasize)
670			error = ENOSPC;
671		else
672			mirror_metadata_encode(md, sector);
673	}
674	if (error == 0)
675		error = g_write_data(cp, offset, sector, length);
676	free(sector, M_MIRROR);
677	if (error != 0) {
678		if ((disk->d_flags & G_MIRROR_DISK_FLAG_BROKEN) == 0) {
679			disk->d_flags |= G_MIRROR_DISK_FLAG_BROKEN;
680			G_MIRROR_DEBUG(0, "Cannot write metadata on %s "
681			    "(device=%s, error=%d).",
682			    g_mirror_get_diskname(disk), sc->sc_name, error);
683		} else {
684			G_MIRROR_DEBUG(1, "Cannot write metadata on %s "
685			    "(device=%s, error=%d).",
686			    g_mirror_get_diskname(disk), sc->sc_name, error);
687		}
688		if (g_mirror_disconnect_on_failure &&
689		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 1) {
690			sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
691			g_mirror_event_send(disk,
692			    G_MIRROR_DISK_STATE_DISCONNECTED,
693			    G_MIRROR_EVENT_DONTWAIT);
694		}
695	}
696	return (error);
697}
698
699static int
700g_mirror_clear_metadata(struct g_mirror_disk *disk)
701{
702	int error;
703
704	g_topology_assert_not();
705	sx_assert(&disk->d_softc->sc_lock, SX_LOCKED);
706
707	error = g_mirror_write_metadata(disk, NULL);
708	if (error == 0) {
709		G_MIRROR_DEBUG(2, "Metadata on %s cleared.",
710		    g_mirror_get_diskname(disk));
711	} else {
712		G_MIRROR_DEBUG(0,
713		    "Cannot clear metadata on disk %s (error=%d).",
714		    g_mirror_get_diskname(disk), error);
715	}
716	return (error);
717}
718
719void
720g_mirror_fill_metadata(struct g_mirror_softc *sc, struct g_mirror_disk *disk,
721    struct g_mirror_metadata *md)
722{
723
724	strlcpy(md->md_magic, G_MIRROR_MAGIC, sizeof(md->md_magic));
725	md->md_version = G_MIRROR_VERSION;
726	strlcpy(md->md_name, sc->sc_name, sizeof(md->md_name));
727	md->md_mid = sc->sc_id;
728	md->md_all = sc->sc_ndisks;
729	md->md_slice = sc->sc_slice;
730	md->md_balance = sc->sc_balance;
731	md->md_genid = sc->sc_genid;
732	md->md_mediasize = sc->sc_mediasize;
733	md->md_sectorsize = sc->sc_sectorsize;
734	md->md_mflags = (sc->sc_flags & G_MIRROR_DEVICE_FLAG_MASK);
735	bzero(md->md_provider, sizeof(md->md_provider));
736	if (disk == NULL) {
737		md->md_did = arc4random();
738		md->md_priority = 0;
739		md->md_syncid = 0;
740		md->md_dflags = 0;
741		md->md_sync_offset = 0;
742		md->md_provsize = 0;
743	} else {
744		md->md_did = disk->d_id;
745		md->md_priority = disk->d_priority;
746		md->md_syncid = disk->d_sync.ds_syncid;
747		md->md_dflags = (disk->d_flags & G_MIRROR_DISK_FLAG_MASK);
748		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
749			md->md_sync_offset = disk->d_sync.ds_offset_done;
750		else
751			md->md_sync_offset = 0;
752		if ((disk->d_flags & G_MIRROR_DISK_FLAG_HARDCODED) != 0) {
753			strlcpy(md->md_provider,
754			    disk->d_consumer->provider->name,
755			    sizeof(md->md_provider));
756		}
757		md->md_provsize = disk->d_consumer->provider->mediasize;
758	}
759}
760
761void
762g_mirror_update_metadata(struct g_mirror_disk *disk)
763{
764	struct g_mirror_softc *sc;
765	struct g_mirror_metadata md;
766	int error;
767
768	g_topology_assert_not();
769	sc = disk->d_softc;
770	sx_assert(&sc->sc_lock, SX_LOCKED);
771
772	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_WIPE) == 0)
773		g_mirror_fill_metadata(sc, disk, &md);
774	error = g_mirror_write_metadata(disk, &md);
775	if (error == 0) {
776		G_MIRROR_DEBUG(2, "Metadata on %s updated.",
777		    g_mirror_get_diskname(disk));
778	} else {
779		G_MIRROR_DEBUG(0,
780		    "Cannot update metadata on disk %s (error=%d).",
781		    g_mirror_get_diskname(disk), error);
782	}
783}
784
785static void
786g_mirror_bump_syncid(struct g_mirror_softc *sc)
787{
788	struct g_mirror_disk *disk;
789
790	g_topology_assert_not();
791	sx_assert(&sc->sc_lock, SX_XLOCKED);
792	KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
793	    ("%s called with no active disks (device=%s).", __func__,
794	    sc->sc_name));
795
796	sc->sc_syncid++;
797	G_MIRROR_DEBUG(1, "Device %s: syncid bumped to %u.", sc->sc_name,
798	    sc->sc_syncid);
799	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
800		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
801		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
802			disk->d_sync.ds_syncid = sc->sc_syncid;
803			g_mirror_update_metadata(disk);
804		}
805	}
806}
807
808static void
809g_mirror_bump_genid(struct g_mirror_softc *sc)
810{
811	struct g_mirror_disk *disk;
812
813	g_topology_assert_not();
814	sx_assert(&sc->sc_lock, SX_XLOCKED);
815	KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
816	    ("%s called with no active disks (device=%s).", __func__,
817	    sc->sc_name));
818
819	sc->sc_genid++;
820	G_MIRROR_DEBUG(1, "Device %s: genid bumped to %u.", sc->sc_name,
821	    sc->sc_genid);
822	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
823		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
824		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
825			disk->d_genid = sc->sc_genid;
826			g_mirror_update_metadata(disk);
827		}
828	}
829}
830
831static int
832g_mirror_idle(struct g_mirror_softc *sc, int acw)
833{
834	struct g_mirror_disk *disk;
835	int timeout;
836
837	g_topology_assert_not();
838	sx_assert(&sc->sc_lock, SX_XLOCKED);
839
840	if (sc->sc_provider == NULL)
841		return (0);
842	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
843		return (0);
844	if (sc->sc_idle)
845		return (0);
846	if (sc->sc_writes > 0)
847		return (0);
848	if (acw > 0 || (acw == -1 && sc->sc_provider->acw > 0)) {
849		timeout = g_mirror_idletime - (time_uptime - sc->sc_last_write);
850		if (!g_mirror_shutdown && timeout > 0)
851			return (timeout);
852	}
853	sc->sc_idle = 1;
854	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
855		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
856			continue;
857		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as clean.",
858		    g_mirror_get_diskname(disk), sc->sc_name);
859		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
860		g_mirror_update_metadata(disk);
861	}
862	return (0);
863}
864
865static void
866g_mirror_unidle(struct g_mirror_softc *sc)
867{
868	struct g_mirror_disk *disk;
869
870	g_topology_assert_not();
871	sx_assert(&sc->sc_lock, SX_XLOCKED);
872
873	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
874		return;
875	sc->sc_idle = 0;
876	sc->sc_last_write = time_uptime;
877	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
878		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
879			continue;
880		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as dirty.",
881		    g_mirror_get_diskname(disk), sc->sc_name);
882		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
883		g_mirror_update_metadata(disk);
884	}
885}
886
887static void
888g_mirror_flush_done(struct bio *bp)
889{
890	struct g_mirror_softc *sc;
891	struct bio *pbp;
892
893	pbp = bp->bio_parent;
894	sc = pbp->bio_to->private;
895	mtx_lock(&sc->sc_done_mtx);
896	if (pbp->bio_error == 0)
897		pbp->bio_error = bp->bio_error;
898	pbp->bio_completed += bp->bio_completed;
899	pbp->bio_inbed++;
900	if (pbp->bio_children == pbp->bio_inbed) {
901		mtx_unlock(&sc->sc_done_mtx);
902		g_io_deliver(pbp, pbp->bio_error);
903	} else
904		mtx_unlock(&sc->sc_done_mtx);
905	g_destroy_bio(bp);
906}
907
908static void
909g_mirror_done(struct bio *bp)
910{
911	struct g_mirror_softc *sc;
912
913	sc = bp->bio_from->geom->softc;
914	bp->bio_cflags = G_MIRROR_BIO_FLAG_REGULAR;
915	mtx_lock(&sc->sc_queue_mtx);
916	bioq_insert_tail(&sc->sc_queue, bp);
917	mtx_unlock(&sc->sc_queue_mtx);
918	wakeup(sc);
919}
920
921static void
922g_mirror_regular_request(struct bio *bp)
923{
924	struct g_mirror_softc *sc;
925	struct g_mirror_disk *disk;
926	struct bio *pbp;
927
928	g_topology_assert_not();
929
930	pbp = bp->bio_parent;
931	sc = pbp->bio_to->private;
932	bp->bio_from->index--;
933	if (bp->bio_cmd == BIO_WRITE)
934		sc->sc_writes--;
935	disk = bp->bio_from->private;
936	if (disk == NULL) {
937		g_topology_lock();
938		g_mirror_kill_consumer(sc, bp->bio_from);
939		g_topology_unlock();
940	}
941
942	pbp->bio_inbed++;
943	KASSERT(pbp->bio_inbed <= pbp->bio_children,
944	    ("bio_inbed (%u) is bigger than bio_children (%u).", pbp->bio_inbed,
945	    pbp->bio_children));
946	if (bp->bio_error == 0 && pbp->bio_error == 0) {
947		G_MIRROR_LOGREQ(3, bp, "Request delivered.");
948		g_destroy_bio(bp);
949		if (pbp->bio_children == pbp->bio_inbed) {
950			G_MIRROR_LOGREQ(3, pbp, "Request delivered.");
951			pbp->bio_completed = pbp->bio_length;
952			if (pbp->bio_cmd == BIO_WRITE ||
953			    pbp->bio_cmd == BIO_DELETE) {
954				bioq_remove(&sc->sc_inflight, pbp);
955				/* Release delayed sync requests if possible. */
956				g_mirror_sync_release(sc);
957			}
958			g_io_deliver(pbp, pbp->bio_error);
959		}
960		return;
961	} else if (bp->bio_error != 0) {
962		if (pbp->bio_error == 0)
963			pbp->bio_error = bp->bio_error;
964		if (disk != NULL) {
965			if ((disk->d_flags & G_MIRROR_DISK_FLAG_BROKEN) == 0) {
966				disk->d_flags |= G_MIRROR_DISK_FLAG_BROKEN;
967				G_MIRROR_LOGREQ(0, bp,
968				    "Request failed (error=%d).",
969				    bp->bio_error);
970			} else {
971				G_MIRROR_LOGREQ(1, bp,
972				    "Request failed (error=%d).",
973				    bp->bio_error);
974			}
975			if (g_mirror_disconnect_on_failure &&
976			    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 1)
977			{
978				if (bp->bio_error == ENXIO &&
979				    bp->bio_cmd == BIO_READ)
980					sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
981				else if (bp->bio_error == ENXIO)
982					sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID_NOW;
983				else
984					sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
985				g_mirror_event_send(disk,
986				    G_MIRROR_DISK_STATE_DISCONNECTED,
987				    G_MIRROR_EVENT_DONTWAIT);
988			}
989		}
990		switch (pbp->bio_cmd) {
991		case BIO_DELETE:
992		case BIO_WRITE:
993			pbp->bio_inbed--;
994			pbp->bio_children--;
995			break;
996		}
997	}
998	g_destroy_bio(bp);
999
1000	switch (pbp->bio_cmd) {
1001	case BIO_READ:
1002		if (pbp->bio_inbed < pbp->bio_children)
1003			break;
1004		if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 1)
1005			g_io_deliver(pbp, pbp->bio_error);
1006		else {
1007			pbp->bio_error = 0;
1008			mtx_lock(&sc->sc_queue_mtx);
1009			bioq_insert_tail(&sc->sc_queue, pbp);
1010			mtx_unlock(&sc->sc_queue_mtx);
1011			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
1012			wakeup(sc);
1013		}
1014		break;
1015	case BIO_DELETE:
1016	case BIO_WRITE:
1017		if (pbp->bio_children == 0) {
1018			/*
1019			 * All requests failed.
1020			 */
1021		} else if (pbp->bio_inbed < pbp->bio_children) {
1022			/* Do nothing. */
1023			break;
1024		} else if (pbp->bio_children == pbp->bio_inbed) {
1025			/* Some requests succeeded. */
1026			pbp->bio_error = 0;
1027			pbp->bio_completed = pbp->bio_length;
1028		}
1029		bioq_remove(&sc->sc_inflight, pbp);
1030		/* Release delayed sync requests if possible. */
1031		g_mirror_sync_release(sc);
1032		g_io_deliver(pbp, pbp->bio_error);
1033		break;
1034	default:
1035		KASSERT(1 == 0, ("Invalid request: %u.", pbp->bio_cmd));
1036		break;
1037	}
1038}
1039
1040static void
1041g_mirror_sync_done(struct bio *bp)
1042{
1043	struct g_mirror_softc *sc;
1044
1045	G_MIRROR_LOGREQ(3, bp, "Synchronization request delivered.");
1046	sc = bp->bio_from->geom->softc;
1047	bp->bio_cflags = G_MIRROR_BIO_FLAG_SYNC;
1048	mtx_lock(&sc->sc_queue_mtx);
1049	bioq_insert_tail(&sc->sc_queue, bp);
1050	mtx_unlock(&sc->sc_queue_mtx);
1051	wakeup(sc);
1052}
1053
1054static void
1055g_mirror_candelete(struct bio *bp)
1056{
1057	struct g_mirror_softc *sc;
1058	struct g_mirror_disk *disk;
1059	int *val;
1060
1061	sc = bp->bio_to->private;
1062	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1063		if (disk->d_flags & G_MIRROR_DISK_FLAG_CANDELETE)
1064			break;
1065	}
1066	val = (int *)bp->bio_data;
1067	*val = (disk != NULL);
1068	g_io_deliver(bp, 0);
1069}
1070
1071static void
1072g_mirror_kernel_dump(struct bio *bp)
1073{
1074	struct g_mirror_softc *sc;
1075	struct g_mirror_disk *disk;
1076	struct bio *cbp;
1077	struct g_kerneldump *gkd;
1078
1079	/*
1080	 * We configure dumping to the first component, because this component
1081	 * will be used for reading with 'prefer' balance algorithm.
1082	 * If the component with the higest priority is currently disconnected
1083	 * we will not be able to read the dump after the reboot if it will be
1084	 * connected and synchronized later. Can we do something better?
1085	 */
1086	sc = bp->bio_to->private;
1087	disk = LIST_FIRST(&sc->sc_disks);
1088
1089	gkd = (struct g_kerneldump *)bp->bio_data;
1090	if (gkd->length > bp->bio_to->mediasize)
1091		gkd->length = bp->bio_to->mediasize;
1092	cbp = g_clone_bio(bp);
1093	if (cbp == NULL) {
1094		g_io_deliver(bp, ENOMEM);
1095		return;
1096	}
1097	cbp->bio_done = g_std_done;
1098	g_io_request(cbp, disk->d_consumer);
1099	G_MIRROR_DEBUG(1, "Kernel dump will go to %s.",
1100	    g_mirror_get_diskname(disk));
1101}
1102
1103static void
1104g_mirror_flush(struct g_mirror_softc *sc, struct bio *bp)
1105{
1106	struct bio_queue_head queue;
1107	struct g_mirror_disk *disk;
1108	struct g_consumer *cp;
1109	struct bio *cbp;
1110
1111	bioq_init(&queue);
1112	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1113		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1114			continue;
1115		cbp = g_clone_bio(bp);
1116		if (cbp == NULL) {
1117			while ((cbp = bioq_takefirst(&queue)) != NULL)
1118				g_destroy_bio(cbp);
1119			if (bp->bio_error == 0)
1120				bp->bio_error = ENOMEM;
1121			g_io_deliver(bp, bp->bio_error);
1122			return;
1123		}
1124		bioq_insert_tail(&queue, cbp);
1125		cbp->bio_done = g_mirror_flush_done;
1126		cbp->bio_caller1 = disk;
1127		cbp->bio_to = disk->d_consumer->provider;
1128	}
1129	while ((cbp = bioq_takefirst(&queue)) != NULL) {
1130		G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1131		disk = cbp->bio_caller1;
1132		cbp->bio_caller1 = NULL;
1133		cp = disk->d_consumer;
1134		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1135		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1136		    cp->acr, cp->acw, cp->ace));
1137		g_io_request(cbp, disk->d_consumer);
1138	}
1139}
1140
1141static void
1142g_mirror_start(struct bio *bp)
1143{
1144	struct g_mirror_softc *sc;
1145
1146	sc = bp->bio_to->private;
1147	/*
1148	 * If sc == NULL or there are no valid disks, provider's error
1149	 * should be set and g_mirror_start() should not be called at all.
1150	 */
1151	KASSERT(sc != NULL && sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
1152	    ("Provider's error should be set (error=%d)(mirror=%s).",
1153	    bp->bio_to->error, bp->bio_to->name));
1154	G_MIRROR_LOGREQ(3, bp, "Request received.");
1155
1156	switch (bp->bio_cmd) {
1157	case BIO_READ:
1158	case BIO_WRITE:
1159	case BIO_DELETE:
1160		break;
1161	case BIO_FLUSH:
1162		g_mirror_flush(sc, bp);
1163		return;
1164	case BIO_GETATTR:
1165		if (!strcmp(bp->bio_attribute, "GEOM::candelete")) {
1166			g_mirror_candelete(bp);
1167			return;
1168		} else if (strcmp("GEOM::kerneldump", bp->bio_attribute) == 0) {
1169			g_mirror_kernel_dump(bp);
1170			return;
1171		}
1172		/* FALLTHROUGH */
1173	default:
1174		g_io_deliver(bp, EOPNOTSUPP);
1175		return;
1176	}
1177	mtx_lock(&sc->sc_queue_mtx);
1178	bioq_insert_tail(&sc->sc_queue, bp);
1179	mtx_unlock(&sc->sc_queue_mtx);
1180	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
1181	wakeup(sc);
1182}
1183
1184/*
1185 * Return TRUE if the given request is colliding with a in-progress
1186 * synchronization request.
1187 */
1188static int
1189g_mirror_sync_collision(struct g_mirror_softc *sc, struct bio *bp)
1190{
1191	struct g_mirror_disk *disk;
1192	struct bio *sbp;
1193	off_t rstart, rend, sstart, send;
1194	u_int i;
1195
1196	if (sc->sc_sync.ds_ndisks == 0)
1197		return (0);
1198	rstart = bp->bio_offset;
1199	rend = bp->bio_offset + bp->bio_length;
1200	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1201		if (disk->d_state != G_MIRROR_DISK_STATE_SYNCHRONIZING)
1202			continue;
1203		for (i = 0; i < g_mirror_syncreqs; i++) {
1204			sbp = disk->d_sync.ds_bios[i];
1205			if (sbp == NULL)
1206				continue;
1207			sstart = sbp->bio_offset;
1208			send = sbp->bio_offset + sbp->bio_length;
1209			if (rend > sstart && rstart < send)
1210				return (1);
1211		}
1212	}
1213	return (0);
1214}
1215
1216/*
1217 * Return TRUE if the given sync request is colliding with a in-progress regular
1218 * request.
1219 */
1220static int
1221g_mirror_regular_collision(struct g_mirror_softc *sc, struct bio *sbp)
1222{
1223	off_t rstart, rend, sstart, send;
1224	struct bio *bp;
1225
1226	if (sc->sc_sync.ds_ndisks == 0)
1227		return (0);
1228	sstart = sbp->bio_offset;
1229	send = sbp->bio_offset + sbp->bio_length;
1230	TAILQ_FOREACH(bp, &sc->sc_inflight.queue, bio_queue) {
1231		rstart = bp->bio_offset;
1232		rend = bp->bio_offset + bp->bio_length;
1233		if (rend > sstart && rstart < send)
1234			return (1);
1235	}
1236	return (0);
1237}
1238
1239/*
1240 * Puts request onto delayed queue.
1241 */
1242static void
1243g_mirror_regular_delay(struct g_mirror_softc *sc, struct bio *bp)
1244{
1245
1246	G_MIRROR_LOGREQ(2, bp, "Delaying request.");
1247	bioq_insert_head(&sc->sc_regular_delayed, bp);
1248}
1249
1250/*
1251 * Puts synchronization request onto delayed queue.
1252 */
1253static void
1254g_mirror_sync_delay(struct g_mirror_softc *sc, struct bio *bp)
1255{
1256
1257	G_MIRROR_LOGREQ(2, bp, "Delaying synchronization request.");
1258	bioq_insert_tail(&sc->sc_sync_delayed, bp);
1259}
1260
1261/*
1262 * Releases delayed regular requests which don't collide anymore with sync
1263 * requests.
1264 */
1265static void
1266g_mirror_regular_release(struct g_mirror_softc *sc)
1267{
1268	struct bio *bp, *bp2;
1269
1270	TAILQ_FOREACH_SAFE(bp, &sc->sc_regular_delayed.queue, bio_queue, bp2) {
1271		if (g_mirror_sync_collision(sc, bp))
1272			continue;
1273		bioq_remove(&sc->sc_regular_delayed, bp);
1274		G_MIRROR_LOGREQ(2, bp, "Releasing delayed request (%p).", bp);
1275		mtx_lock(&sc->sc_queue_mtx);
1276		bioq_insert_head(&sc->sc_queue, bp);
1277#if 0
1278		/*
1279		 * wakeup() is not needed, because this function is called from
1280		 * the worker thread.
1281		 */
1282		wakeup(&sc->sc_queue);
1283#endif
1284		mtx_unlock(&sc->sc_queue_mtx);
1285	}
1286}
1287
1288/*
1289 * Releases delayed sync requests which don't collide anymore with regular
1290 * requests.
1291 */
1292static void
1293g_mirror_sync_release(struct g_mirror_softc *sc)
1294{
1295	struct bio *bp, *bp2;
1296
1297	TAILQ_FOREACH_SAFE(bp, &sc->sc_sync_delayed.queue, bio_queue, bp2) {
1298		if (g_mirror_regular_collision(sc, bp))
1299			continue;
1300		bioq_remove(&sc->sc_sync_delayed, bp);
1301		G_MIRROR_LOGREQ(2, bp,
1302		    "Releasing delayed synchronization request.");
1303		g_io_request(bp, bp->bio_from);
1304	}
1305}
1306
1307/*
1308 * Handle synchronization requests.
1309 * Every synchronization request is two-steps process: first, READ request is
1310 * send to active provider and then WRITE request (with read data) to the provider
1311 * beeing synchronized. When WRITE is finished, new synchronization request is
1312 * send.
1313 */
1314static void
1315g_mirror_sync_request(struct bio *bp)
1316{
1317	struct g_mirror_softc *sc;
1318	struct g_mirror_disk *disk;
1319
1320	bp->bio_from->index--;
1321	sc = bp->bio_from->geom->softc;
1322	disk = bp->bio_from->private;
1323	if (disk == NULL) {
1324		sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */
1325		g_topology_lock();
1326		g_mirror_kill_consumer(sc, bp->bio_from);
1327		g_topology_unlock();
1328		free(bp->bio_data, M_MIRROR);
1329		g_destroy_bio(bp);
1330		sx_xlock(&sc->sc_lock);
1331		return;
1332	}
1333
1334	/*
1335	 * Synchronization request.
1336	 */
1337	switch (bp->bio_cmd) {
1338	case BIO_READ:
1339	    {
1340		struct g_consumer *cp;
1341
1342		if (bp->bio_error != 0) {
1343			G_MIRROR_LOGREQ(0, bp,
1344			    "Synchronization request failed (error=%d).",
1345			    bp->bio_error);
1346			g_destroy_bio(bp);
1347			return;
1348		}
1349		G_MIRROR_LOGREQ(3, bp,
1350		    "Synchronization request half-finished.");
1351		bp->bio_cmd = BIO_WRITE;
1352		bp->bio_cflags = 0;
1353		cp = disk->d_consumer;
1354		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1355		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1356		    cp->acr, cp->acw, cp->ace));
1357		cp->index++;
1358		g_io_request(bp, cp);
1359		return;
1360	    }
1361	case BIO_WRITE:
1362	    {
1363		struct g_mirror_disk_sync *sync;
1364		off_t offset;
1365		void *data;
1366		int i;
1367
1368		if (bp->bio_error != 0) {
1369			G_MIRROR_LOGREQ(0, bp,
1370			    "Synchronization request failed (error=%d).",
1371			    bp->bio_error);
1372			g_destroy_bio(bp);
1373			sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
1374			g_mirror_event_send(disk,
1375			    G_MIRROR_DISK_STATE_DISCONNECTED,
1376			    G_MIRROR_EVENT_DONTWAIT);
1377			return;
1378		}
1379		G_MIRROR_LOGREQ(3, bp, "Synchronization request finished.");
1380		sync = &disk->d_sync;
1381		if (sync->ds_offset >= sc->sc_mediasize ||
1382		    sync->ds_consumer == NULL ||
1383		    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1384			/* Don't send more synchronization requests. */
1385			sync->ds_inflight--;
1386			if (sync->ds_bios != NULL) {
1387				i = (int)(uintptr_t)bp->bio_caller1;
1388				sync->ds_bios[i] = NULL;
1389			}
1390			free(bp->bio_data, M_MIRROR);
1391			g_destroy_bio(bp);
1392			if (sync->ds_inflight > 0)
1393				return;
1394			if (sync->ds_consumer == NULL ||
1395			    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1396				return;
1397			}
1398			/* Disk up-to-date, activate it. */
1399			g_mirror_event_send(disk, G_MIRROR_DISK_STATE_ACTIVE,
1400			    G_MIRROR_EVENT_DONTWAIT);
1401			return;
1402		}
1403
1404		/* Send next synchronization request. */
1405		data = bp->bio_data;
1406		bzero(bp, sizeof(*bp));
1407		bp->bio_cmd = BIO_READ;
1408		bp->bio_offset = sync->ds_offset;
1409		bp->bio_length = MIN(MAXPHYS, sc->sc_mediasize - bp->bio_offset);
1410		sync->ds_offset += bp->bio_length;
1411		bp->bio_done = g_mirror_sync_done;
1412		bp->bio_data = data;
1413		bp->bio_from = sync->ds_consumer;
1414		bp->bio_to = sc->sc_provider;
1415		G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
1416		sync->ds_consumer->index++;
1417		/*
1418		 * Delay the request if it is colliding with a regular request.
1419		 */
1420		if (g_mirror_regular_collision(sc, bp))
1421			g_mirror_sync_delay(sc, bp);
1422		else
1423			g_io_request(bp, sync->ds_consumer);
1424
1425		/* Release delayed requests if possible. */
1426		g_mirror_regular_release(sc);
1427
1428		/* Find the smallest offset */
1429		offset = sc->sc_mediasize;
1430		for (i = 0; i < g_mirror_syncreqs; i++) {
1431			bp = sync->ds_bios[i];
1432			if (bp->bio_offset < offset)
1433				offset = bp->bio_offset;
1434		}
1435		if (sync->ds_offset_done + (MAXPHYS * 100) < offset) {
1436			/* Update offset_done on every 100 blocks. */
1437			sync->ds_offset_done = offset;
1438			g_mirror_update_metadata(disk);
1439		}
1440		return;
1441	    }
1442	default:
1443		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
1444		    bp->bio_cmd, sc->sc_name));
1445		break;
1446	}
1447}
1448
1449static void
1450g_mirror_request_prefer(struct g_mirror_softc *sc, struct bio *bp)
1451{
1452	struct g_mirror_disk *disk;
1453	struct g_consumer *cp;
1454	struct bio *cbp;
1455
1456	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1457		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE)
1458			break;
1459	}
1460	if (disk == NULL) {
1461		if (bp->bio_error == 0)
1462			bp->bio_error = ENXIO;
1463		g_io_deliver(bp, bp->bio_error);
1464		return;
1465	}
1466	cbp = g_clone_bio(bp);
1467	if (cbp == NULL) {
1468		if (bp->bio_error == 0)
1469			bp->bio_error = ENOMEM;
1470		g_io_deliver(bp, bp->bio_error);
1471		return;
1472	}
1473	/*
1474	 * Fill in the component buf structure.
1475	 */
1476	cp = disk->d_consumer;
1477	cbp->bio_done = g_mirror_done;
1478	cbp->bio_to = cp->provider;
1479	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1480	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1481	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1482	    cp->acw, cp->ace));
1483	cp->index++;
1484	g_io_request(cbp, cp);
1485}
1486
1487static void
1488g_mirror_request_round_robin(struct g_mirror_softc *sc, struct bio *bp)
1489{
1490	struct g_mirror_disk *disk;
1491	struct g_consumer *cp;
1492	struct bio *cbp;
1493
1494	disk = g_mirror_get_disk(sc);
1495	if (disk == NULL) {
1496		if (bp->bio_error == 0)
1497			bp->bio_error = ENXIO;
1498		g_io_deliver(bp, bp->bio_error);
1499		return;
1500	}
1501	cbp = g_clone_bio(bp);
1502	if (cbp == NULL) {
1503		if (bp->bio_error == 0)
1504			bp->bio_error = ENOMEM;
1505		g_io_deliver(bp, bp->bio_error);
1506		return;
1507	}
1508	/*
1509	 * Fill in the component buf structure.
1510	 */
1511	cp = disk->d_consumer;
1512	cbp->bio_done = g_mirror_done;
1513	cbp->bio_to = cp->provider;
1514	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1515	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1516	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1517	    cp->acw, cp->ace));
1518	cp->index++;
1519	g_io_request(cbp, cp);
1520}
1521
1522#define TRACK_SIZE  (1 * 1024 * 1024)
1523#define LOAD_SCALE	256
1524#define ABS(x)		(((x) >= 0) ? (x) : (-(x)))
1525
1526static void
1527g_mirror_request_load(struct g_mirror_softc *sc, struct bio *bp)
1528{
1529	struct g_mirror_disk *disk, *dp;
1530	struct g_consumer *cp;
1531	struct bio *cbp;
1532	int prio, best;
1533
1534	/* Find a disk with the smallest load. */
1535	disk = NULL;
1536	best = INT_MAX;
1537	LIST_FOREACH(dp, &sc->sc_disks, d_next) {
1538		if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1539			continue;
1540		prio = dp->load;
1541		/* If disk head is precisely in position - highly prefer it. */
1542		if (dp->d_last_offset == bp->bio_offset)
1543			prio -= 2 * LOAD_SCALE;
1544		else
1545		/* If disk head is close to position - prefer it. */
1546		if (ABS(dp->d_last_offset - bp->bio_offset) < TRACK_SIZE)
1547			prio -= 1 * LOAD_SCALE;
1548		if (prio <= best) {
1549			disk = dp;
1550			best = prio;
1551		}
1552	}
1553	KASSERT(disk != NULL, ("NULL disk for %s.", sc->sc_name));
1554	cbp = g_clone_bio(bp);
1555	if (cbp == NULL) {
1556		if (bp->bio_error == 0)
1557			bp->bio_error = ENOMEM;
1558		g_io_deliver(bp, bp->bio_error);
1559		return;
1560	}
1561	/*
1562	 * Fill in the component buf structure.
1563	 */
1564	cp = disk->d_consumer;
1565	cbp->bio_done = g_mirror_done;
1566	cbp->bio_to = cp->provider;
1567	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1568	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1569	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1570	    cp->acw, cp->ace));
1571	cp->index++;
1572	/* Remember last head position */
1573	disk->d_last_offset = bp->bio_offset + bp->bio_length;
1574	/* Update loads. */
1575	LIST_FOREACH(dp, &sc->sc_disks, d_next) {
1576		dp->load = (dp->d_consumer->index * LOAD_SCALE +
1577		    dp->load * 7) / 8;
1578	}
1579	g_io_request(cbp, cp);
1580}
1581
1582static void
1583g_mirror_request_split(struct g_mirror_softc *sc, struct bio *bp)
1584{
1585	struct bio_queue_head queue;
1586	struct g_mirror_disk *disk;
1587	struct g_consumer *cp;
1588	struct bio *cbp;
1589	off_t left, mod, offset, slice;
1590	u_char *data;
1591	u_int ndisks;
1592
1593	if (bp->bio_length <= sc->sc_slice) {
1594		g_mirror_request_round_robin(sc, bp);
1595		return;
1596	}
1597	ndisks = g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE);
1598	slice = bp->bio_length / ndisks;
1599	mod = slice % sc->sc_provider->sectorsize;
1600	if (mod != 0)
1601		slice += sc->sc_provider->sectorsize - mod;
1602	/*
1603	 * Allocate all bios before sending any request, so we can
1604	 * return ENOMEM in nice and clean way.
1605	 */
1606	left = bp->bio_length;
1607	offset = bp->bio_offset;
1608	data = bp->bio_data;
1609	bioq_init(&queue);
1610	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1611		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1612			continue;
1613		cbp = g_clone_bio(bp);
1614		if (cbp == NULL) {
1615			while ((cbp = bioq_takefirst(&queue)) != NULL)
1616				g_destroy_bio(cbp);
1617			if (bp->bio_error == 0)
1618				bp->bio_error = ENOMEM;
1619			g_io_deliver(bp, bp->bio_error);
1620			return;
1621		}
1622		bioq_insert_tail(&queue, cbp);
1623		cbp->bio_done = g_mirror_done;
1624		cbp->bio_caller1 = disk;
1625		cbp->bio_to = disk->d_consumer->provider;
1626		cbp->bio_offset = offset;
1627		cbp->bio_data = data;
1628		cbp->bio_length = MIN(left, slice);
1629		left -= cbp->bio_length;
1630		if (left == 0)
1631			break;
1632		offset += cbp->bio_length;
1633		data += cbp->bio_length;
1634	}
1635	while ((cbp = bioq_takefirst(&queue)) != NULL) {
1636		G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1637		disk = cbp->bio_caller1;
1638		cbp->bio_caller1 = NULL;
1639		cp = disk->d_consumer;
1640		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1641		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1642		    cp->acr, cp->acw, cp->ace));
1643		disk->d_consumer->index++;
1644		g_io_request(cbp, disk->d_consumer);
1645	}
1646}
1647
1648static void
1649g_mirror_register_request(struct bio *bp)
1650{
1651	struct g_mirror_softc *sc;
1652
1653	sc = bp->bio_to->private;
1654	switch (bp->bio_cmd) {
1655	case BIO_READ:
1656		switch (sc->sc_balance) {
1657		case G_MIRROR_BALANCE_LOAD:
1658			g_mirror_request_load(sc, bp);
1659			break;
1660		case G_MIRROR_BALANCE_PREFER:
1661			g_mirror_request_prefer(sc, bp);
1662			break;
1663		case G_MIRROR_BALANCE_ROUND_ROBIN:
1664			g_mirror_request_round_robin(sc, bp);
1665			break;
1666		case G_MIRROR_BALANCE_SPLIT:
1667			g_mirror_request_split(sc, bp);
1668			break;
1669		}
1670		return;
1671	case BIO_WRITE:
1672	case BIO_DELETE:
1673	    {
1674		struct g_mirror_disk *disk;
1675		struct g_mirror_disk_sync *sync;
1676		struct bio_queue_head queue;
1677		struct g_consumer *cp;
1678		struct bio *cbp;
1679
1680		/*
1681		 * Delay the request if it is colliding with a synchronization
1682		 * request.
1683		 */
1684		if (g_mirror_sync_collision(sc, bp)) {
1685			g_mirror_regular_delay(sc, bp);
1686			return;
1687		}
1688
1689		if (sc->sc_idle)
1690			g_mirror_unidle(sc);
1691		else
1692			sc->sc_last_write = time_uptime;
1693
1694		/*
1695		 * Allocate all bios before sending any request, so we can
1696		 * return ENOMEM in nice and clean way.
1697		 */
1698		bioq_init(&queue);
1699		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1700			sync = &disk->d_sync;
1701			switch (disk->d_state) {
1702			case G_MIRROR_DISK_STATE_ACTIVE:
1703				break;
1704			case G_MIRROR_DISK_STATE_SYNCHRONIZING:
1705				if (bp->bio_offset >= sync->ds_offset)
1706					continue;
1707				break;
1708			default:
1709				continue;
1710			}
1711			if (bp->bio_cmd == BIO_DELETE &&
1712			    (disk->d_flags & G_MIRROR_DISK_FLAG_CANDELETE) == 0)
1713				continue;
1714			cbp = g_clone_bio(bp);
1715			if (cbp == NULL) {
1716				while ((cbp = bioq_takefirst(&queue)) != NULL)
1717					g_destroy_bio(cbp);
1718				if (bp->bio_error == 0)
1719					bp->bio_error = ENOMEM;
1720				g_io_deliver(bp, bp->bio_error);
1721				return;
1722			}
1723			bioq_insert_tail(&queue, cbp);
1724			cbp->bio_done = g_mirror_done;
1725			cp = disk->d_consumer;
1726			cbp->bio_caller1 = cp;
1727			cbp->bio_to = cp->provider;
1728			KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1729			    ("Consumer %s not opened (r%dw%de%d).",
1730			    cp->provider->name, cp->acr, cp->acw, cp->ace));
1731		}
1732		if (bioq_first(&queue) == NULL) {
1733			g_io_deliver(bp, EOPNOTSUPP);
1734			return;
1735		}
1736		while ((cbp = bioq_takefirst(&queue)) != NULL) {
1737			G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1738			cp = cbp->bio_caller1;
1739			cbp->bio_caller1 = NULL;
1740			cp->index++;
1741			sc->sc_writes++;
1742			g_io_request(cbp, cp);
1743		}
1744		/*
1745		 * Put request onto inflight queue, so we can check if new
1746		 * synchronization requests don't collide with it.
1747		 */
1748		bioq_insert_tail(&sc->sc_inflight, bp);
1749		/*
1750		 * Bump syncid on first write.
1751		 */
1752		if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID) != 0) {
1753			sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID;
1754			g_mirror_bump_syncid(sc);
1755		}
1756		return;
1757	    }
1758	default:
1759		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
1760		    bp->bio_cmd, sc->sc_name));
1761		break;
1762	}
1763}
1764
1765static int
1766g_mirror_can_destroy(struct g_mirror_softc *sc)
1767{
1768	struct g_geom *gp;
1769	struct g_consumer *cp;
1770
1771	g_topology_assert();
1772	gp = sc->sc_geom;
1773	if (gp->softc == NULL)
1774		return (1);
1775	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_TASTING) != 0)
1776		return (0);
1777	LIST_FOREACH(cp, &gp->consumer, consumer) {
1778		if (g_mirror_is_busy(sc, cp))
1779			return (0);
1780	}
1781	gp = sc->sc_sync.ds_geom;
1782	LIST_FOREACH(cp, &gp->consumer, consumer) {
1783		if (g_mirror_is_busy(sc, cp))
1784			return (0);
1785	}
1786	G_MIRROR_DEBUG(2, "No I/O requests for %s, it can be destroyed.",
1787	    sc->sc_name);
1788	return (1);
1789}
1790
1791static int
1792g_mirror_try_destroy(struct g_mirror_softc *sc)
1793{
1794
1795	if (sc->sc_rootmount != NULL) {
1796		G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
1797		    sc->sc_rootmount);
1798		root_mount_rel(sc->sc_rootmount);
1799		sc->sc_rootmount = NULL;
1800	}
1801	g_topology_lock();
1802	if (!g_mirror_can_destroy(sc)) {
1803		g_topology_unlock();
1804		return (0);
1805	}
1806	sc->sc_geom->softc = NULL;
1807	sc->sc_sync.ds_geom->softc = NULL;
1808	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_WAIT) != 0) {
1809		g_topology_unlock();
1810		G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1811		    &sc->sc_worker);
1812		/* Unlock sc_lock here, as it can be destroyed after wakeup. */
1813		sx_xunlock(&sc->sc_lock);
1814		wakeup(&sc->sc_worker);
1815		sc->sc_worker = NULL;
1816	} else {
1817		g_topology_unlock();
1818		g_mirror_destroy_device(sc);
1819	}
1820	return (1);
1821}
1822
1823/*
1824 * Worker thread.
1825 */
1826static void
1827g_mirror_worker(void *arg)
1828{
1829	struct g_mirror_softc *sc;
1830	struct g_mirror_event *ep;
1831	struct bio *bp;
1832	int timeout;
1833
1834	sc = arg;
1835	thread_lock(curthread);
1836	sched_prio(curthread, PRIBIO);
1837	thread_unlock(curthread);
1838
1839	sx_xlock(&sc->sc_lock);
1840	for (;;) {
1841		G_MIRROR_DEBUG(5, "%s: Let's see...", __func__);
1842		/*
1843		 * First take a look at events.
1844		 * This is important to handle events before any I/O requests.
1845		 */
1846		ep = g_mirror_event_get(sc);
1847		if (ep != NULL) {
1848			g_mirror_event_remove(sc, ep);
1849			if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0) {
1850				/* Update only device status. */
1851				G_MIRROR_DEBUG(3,
1852				    "Running event for device %s.",
1853				    sc->sc_name);
1854				ep->e_error = 0;
1855				g_mirror_update_device(sc, 1);
1856			} else {
1857				/* Update disk status. */
1858				G_MIRROR_DEBUG(3, "Running event for disk %s.",
1859				     g_mirror_get_diskname(ep->e_disk));
1860				ep->e_error = g_mirror_update_disk(ep->e_disk,
1861				    ep->e_state);
1862				if (ep->e_error == 0)
1863					g_mirror_update_device(sc, 0);
1864			}
1865			if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0) {
1866				KASSERT(ep->e_error == 0,
1867				    ("Error cannot be handled."));
1868				g_mirror_event_free(ep);
1869			} else {
1870				ep->e_flags |= G_MIRROR_EVENT_DONE;
1871				G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1872				    ep);
1873				mtx_lock(&sc->sc_events_mtx);
1874				wakeup(ep);
1875				mtx_unlock(&sc->sc_events_mtx);
1876			}
1877			if ((sc->sc_flags &
1878			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1879				if (g_mirror_try_destroy(sc)) {
1880					curthread->td_pflags &= ~TDP_GEOM;
1881					G_MIRROR_DEBUG(1, "Thread exiting.");
1882					kproc_exit(0);
1883				}
1884			}
1885			G_MIRROR_DEBUG(5, "%s: I'm here 1.", __func__);
1886			continue;
1887		}
1888		/*
1889		 * Check if we can mark array as CLEAN and if we can't take
1890		 * how much seconds should we wait.
1891		 */
1892		timeout = g_mirror_idle(sc, -1);
1893		/*
1894		 * Now I/O requests.
1895		 */
1896		/* Get first request from the queue. */
1897		mtx_lock(&sc->sc_queue_mtx);
1898		bp = bioq_takefirst(&sc->sc_queue);
1899		if (bp == NULL) {
1900			if ((sc->sc_flags &
1901			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1902				mtx_unlock(&sc->sc_queue_mtx);
1903				if (g_mirror_try_destroy(sc)) {
1904					curthread->td_pflags &= ~TDP_GEOM;
1905					G_MIRROR_DEBUG(1, "Thread exiting.");
1906					kproc_exit(0);
1907				}
1908				mtx_lock(&sc->sc_queue_mtx);
1909			}
1910			sx_xunlock(&sc->sc_lock);
1911			/*
1912			 * XXX: We can miss an event here, because an event
1913			 *      can be added without sx-device-lock and without
1914			 *      mtx-queue-lock. Maybe I should just stop using
1915			 *      dedicated mutex for events synchronization and
1916			 *      stick with the queue lock?
1917			 *      The event will hang here until next I/O request
1918			 *      or next event is received.
1919			 */
1920			MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "m:w1",
1921			    timeout * hz);
1922			sx_xlock(&sc->sc_lock);
1923			G_MIRROR_DEBUG(5, "%s: I'm here 4.", __func__);
1924			continue;
1925		}
1926		mtx_unlock(&sc->sc_queue_mtx);
1927
1928		if (bp->bio_from->geom == sc->sc_sync.ds_geom &&
1929		    (bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0) {
1930			g_mirror_sync_request(bp);	/* READ */
1931		} else if (bp->bio_to != sc->sc_provider) {
1932			if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_REGULAR) != 0)
1933				g_mirror_regular_request(bp);
1934			else if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0)
1935				g_mirror_sync_request(bp);	/* WRITE */
1936			else {
1937				KASSERT(0,
1938				    ("Invalid request cflags=0x%hhx to=%s.",
1939				    bp->bio_cflags, bp->bio_to->name));
1940			}
1941		} else {
1942			g_mirror_register_request(bp);
1943		}
1944		G_MIRROR_DEBUG(5, "%s: I'm here 9.", __func__);
1945	}
1946}
1947
1948static void
1949g_mirror_update_idle(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
1950{
1951
1952	sx_assert(&sc->sc_lock, SX_LOCKED);
1953
1954	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
1955		return;
1956	if (!sc->sc_idle && (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) == 0) {
1957		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as dirty.",
1958		    g_mirror_get_diskname(disk), sc->sc_name);
1959		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
1960	} else if (sc->sc_idle &&
1961	    (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
1962		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as clean.",
1963		    g_mirror_get_diskname(disk), sc->sc_name);
1964		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
1965	}
1966}
1967
1968static void
1969g_mirror_sync_start(struct g_mirror_disk *disk)
1970{
1971	struct g_mirror_softc *sc;
1972	struct g_consumer *cp;
1973	struct bio *bp;
1974	int error, i;
1975
1976	g_topology_assert_not();
1977	sc = disk->d_softc;
1978	sx_assert(&sc->sc_lock, SX_LOCKED);
1979
1980	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
1981	    ("Disk %s is not marked for synchronization.",
1982	    g_mirror_get_diskname(disk)));
1983	KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
1984	    ("Device not in RUNNING state (%s, %u).", sc->sc_name,
1985	    sc->sc_state));
1986
1987	sx_xunlock(&sc->sc_lock);
1988	g_topology_lock();
1989	cp = g_new_consumer(sc->sc_sync.ds_geom);
1990	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
1991	error = g_attach(cp, sc->sc_provider);
1992	KASSERT(error == 0,
1993	    ("Cannot attach to %s (error=%d).", sc->sc_name, error));
1994	error = g_access(cp, 1, 0, 0);
1995	KASSERT(error == 0, ("Cannot open %s (error=%d).", sc->sc_name, error));
1996	g_topology_unlock();
1997	sx_xlock(&sc->sc_lock);
1998
1999	G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s.", sc->sc_name,
2000	    g_mirror_get_diskname(disk));
2001	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) == 0)
2002		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
2003	KASSERT(disk->d_sync.ds_consumer == NULL,
2004	    ("Sync consumer already exists (device=%s, disk=%s).",
2005	    sc->sc_name, g_mirror_get_diskname(disk)));
2006
2007	disk->d_sync.ds_consumer = cp;
2008	disk->d_sync.ds_consumer->private = disk;
2009	disk->d_sync.ds_consumer->index = 0;
2010
2011	/*
2012	 * Allocate memory for synchronization bios and initialize them.
2013	 */
2014	disk->d_sync.ds_bios = malloc(sizeof(struct bio *) * g_mirror_syncreqs,
2015	    M_MIRROR, M_WAITOK);
2016	for (i = 0; i < g_mirror_syncreqs; i++) {
2017		bp = g_alloc_bio();
2018		disk->d_sync.ds_bios[i] = bp;
2019		bp->bio_parent = NULL;
2020		bp->bio_cmd = BIO_READ;
2021		bp->bio_data = malloc(MAXPHYS, M_MIRROR, M_WAITOK);
2022		bp->bio_cflags = 0;
2023		bp->bio_offset = disk->d_sync.ds_offset;
2024		bp->bio_length = MIN(MAXPHYS, sc->sc_mediasize - bp->bio_offset);
2025		disk->d_sync.ds_offset += bp->bio_length;
2026		bp->bio_done = g_mirror_sync_done;
2027		bp->bio_from = disk->d_sync.ds_consumer;
2028		bp->bio_to = sc->sc_provider;
2029		bp->bio_caller1 = (void *)(uintptr_t)i;
2030	}
2031
2032	/* Increase the number of disks in SYNCHRONIZING state. */
2033	sc->sc_sync.ds_ndisks++;
2034	/* Set the number of in-flight synchronization requests. */
2035	disk->d_sync.ds_inflight = g_mirror_syncreqs;
2036
2037	/*
2038	 * Fire off first synchronization requests.
2039	 */
2040	for (i = 0; i < g_mirror_syncreqs; i++) {
2041		bp = disk->d_sync.ds_bios[i];
2042		G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
2043		disk->d_sync.ds_consumer->index++;
2044		/*
2045		 * Delay the request if it is colliding with a regular request.
2046		 */
2047		if (g_mirror_regular_collision(sc, bp))
2048			g_mirror_sync_delay(sc, bp);
2049		else
2050			g_io_request(bp, disk->d_sync.ds_consumer);
2051	}
2052}
2053
2054/*
2055 * Stop synchronization process.
2056 * type: 0 - synchronization finished
2057 *       1 - synchronization stopped
2058 */
2059static void
2060g_mirror_sync_stop(struct g_mirror_disk *disk, int type)
2061{
2062	struct g_mirror_softc *sc;
2063	struct g_consumer *cp;
2064
2065	g_topology_assert_not();
2066	sc = disk->d_softc;
2067	sx_assert(&sc->sc_lock, SX_LOCKED);
2068
2069	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2070	    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2071	    g_mirror_disk_state2str(disk->d_state)));
2072	if (disk->d_sync.ds_consumer == NULL)
2073		return;
2074
2075	if (type == 0) {
2076		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s finished.",
2077		    sc->sc_name, g_mirror_get_diskname(disk));
2078	} else /* if (type == 1) */ {
2079		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s stopped.",
2080		    sc->sc_name, g_mirror_get_diskname(disk));
2081	}
2082	free(disk->d_sync.ds_bios, M_MIRROR);
2083	disk->d_sync.ds_bios = NULL;
2084	cp = disk->d_sync.ds_consumer;
2085	disk->d_sync.ds_consumer = NULL;
2086	disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2087	sc->sc_sync.ds_ndisks--;
2088	sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */
2089	g_topology_lock();
2090	g_mirror_kill_consumer(sc, cp);
2091	g_topology_unlock();
2092	sx_xlock(&sc->sc_lock);
2093}
2094
2095static void
2096g_mirror_launch_provider(struct g_mirror_softc *sc)
2097{
2098	struct g_mirror_disk *disk;
2099	struct g_provider *pp, *dp;
2100
2101	sx_assert(&sc->sc_lock, SX_LOCKED);
2102
2103	g_topology_lock();
2104	pp = g_new_providerf(sc->sc_geom, "mirror/%s", sc->sc_name);
2105	pp->flags |= G_PF_DIRECT_RECEIVE;
2106	pp->mediasize = sc->sc_mediasize;
2107	pp->sectorsize = sc->sc_sectorsize;
2108	pp->stripesize = 0;
2109	pp->stripeoffset = 0;
2110
2111	/* Splitting of unmapped BIO's could work but isn't implemented now */
2112	if (sc->sc_balance != G_MIRROR_BALANCE_SPLIT)
2113		pp->flags |= G_PF_ACCEPT_UNMAPPED;
2114
2115	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2116		if (disk->d_consumer && disk->d_consumer->provider) {
2117			dp = disk->d_consumer->provider;
2118			if (dp->stripesize > pp->stripesize) {
2119				pp->stripesize = dp->stripesize;
2120				pp->stripeoffset = dp->stripeoffset;
2121			}
2122			/* A provider underneath us doesn't support unmapped */
2123			if ((dp->flags & G_PF_ACCEPT_UNMAPPED) == 0) {
2124				G_MIRROR_DEBUG(0, "Cancelling unmapped "
2125				    "because of %s.", dp->name);
2126				pp->flags &= ~G_PF_ACCEPT_UNMAPPED;
2127			}
2128		}
2129	}
2130	pp->private = sc;
2131	sc->sc_refcnt++;
2132	sc->sc_provider = pp;
2133	g_error_provider(pp, 0);
2134	g_topology_unlock();
2135	G_MIRROR_DEBUG(0, "Device %s launched (%u/%u).", pp->name,
2136	    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE), sc->sc_ndisks);
2137	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2138		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
2139			g_mirror_sync_start(disk);
2140	}
2141}
2142
2143static void
2144g_mirror_destroy_provider(struct g_mirror_softc *sc)
2145{
2146	struct g_mirror_disk *disk;
2147	struct bio *bp;
2148
2149	g_topology_assert_not();
2150	KASSERT(sc->sc_provider != NULL, ("NULL provider (device=%s).",
2151	    sc->sc_name));
2152
2153	g_topology_lock();
2154	g_error_provider(sc->sc_provider, ENXIO);
2155	mtx_lock(&sc->sc_queue_mtx);
2156	while ((bp = bioq_takefirst(&sc->sc_queue)) != NULL) {
2157		/*
2158		 * Abort any pending I/O that wasn't generated by us.
2159		 * Synchronization requests and requests destined for individual
2160		 * mirror components can be destroyed immediately.
2161		 */
2162		if (bp->bio_to == sc->sc_provider &&
2163		    bp->bio_from->geom != sc->sc_sync.ds_geom) {
2164			g_io_deliver(bp, ENXIO);
2165		} else {
2166			if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0)
2167				free(bp->bio_data, M_MIRROR);
2168			g_destroy_bio(bp);
2169		}
2170	}
2171	mtx_unlock(&sc->sc_queue_mtx);
2172	g_wither_provider(sc->sc_provider, ENXIO);
2173	sc->sc_provider = NULL;
2174	G_MIRROR_DEBUG(0, "Device %s: provider destroyed.", sc->sc_name);
2175	g_topology_unlock();
2176	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2177		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
2178			g_mirror_sync_stop(disk, 1);
2179	}
2180}
2181
2182static void
2183g_mirror_go(void *arg)
2184{
2185	struct g_mirror_softc *sc;
2186
2187	sc = arg;
2188	G_MIRROR_DEBUG(0, "Force device %s start due to timeout.", sc->sc_name);
2189	g_mirror_event_send(sc, 0,
2190	    G_MIRROR_EVENT_DONTWAIT | G_MIRROR_EVENT_DEVICE);
2191}
2192
2193static u_int
2194g_mirror_determine_state(struct g_mirror_disk *disk)
2195{
2196	struct g_mirror_softc *sc;
2197	u_int state;
2198
2199	sc = disk->d_softc;
2200	if (sc->sc_syncid == disk->d_sync.ds_syncid) {
2201		if ((disk->d_flags &
2202		    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) {
2203			/* Disk does not need synchronization. */
2204			state = G_MIRROR_DISK_STATE_ACTIVE;
2205		} else {
2206			if ((sc->sc_flags &
2207			     G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
2208			    (disk->d_flags &
2209			     G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
2210				/*
2211				 * We can start synchronization from
2212				 * the stored offset.
2213				 */
2214				state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
2215			} else {
2216				state = G_MIRROR_DISK_STATE_STALE;
2217			}
2218		}
2219	} else if (disk->d_sync.ds_syncid < sc->sc_syncid) {
2220		/*
2221		 * Reset all synchronization data for this disk,
2222		 * because if it even was synchronized, it was
2223		 * synchronized to disks with different syncid.
2224		 */
2225		disk->d_flags |= G_MIRROR_DISK_FLAG_SYNCHRONIZING;
2226		disk->d_sync.ds_offset = 0;
2227		disk->d_sync.ds_offset_done = 0;
2228		disk->d_sync.ds_syncid = sc->sc_syncid;
2229		if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
2230		    (disk->d_flags & G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
2231			state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
2232		} else {
2233			state = G_MIRROR_DISK_STATE_STALE;
2234		}
2235	} else /* if (sc->sc_syncid < disk->d_sync.ds_syncid) */ {
2236		/*
2237		 * Not good, NOT GOOD!
2238		 * It means that mirror was started on stale disks
2239		 * and more fresh disk just arrive.
2240		 * If there were writes, mirror is broken, sorry.
2241		 * I think the best choice here is don't touch
2242		 * this disk and inform the user loudly.
2243		 */
2244		G_MIRROR_DEBUG(0, "Device %s was started before the freshest "
2245		    "disk (%s) arrives!! It will not be connected to the "
2246		    "running device.", sc->sc_name,
2247		    g_mirror_get_diskname(disk));
2248		g_mirror_destroy_disk(disk);
2249		state = G_MIRROR_DISK_STATE_NONE;
2250		/* Return immediately, because disk was destroyed. */
2251		return (state);
2252	}
2253	G_MIRROR_DEBUG(3, "State for %s disk: %s.",
2254	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(state));
2255	return (state);
2256}
2257
2258/*
2259 * Update device state.
2260 */
2261static void
2262g_mirror_update_device(struct g_mirror_softc *sc, boolean_t force)
2263{
2264	struct g_mirror_disk *disk;
2265	u_int state;
2266
2267	sx_assert(&sc->sc_lock, SX_XLOCKED);
2268
2269	switch (sc->sc_state) {
2270	case G_MIRROR_DEVICE_STATE_STARTING:
2271	    {
2272		struct g_mirror_disk *pdisk, *tdisk;
2273		u_int dirty, ndisks, genid, syncid;
2274
2275		KASSERT(sc->sc_provider == NULL,
2276		    ("Non-NULL provider in STARTING state (%s).", sc->sc_name));
2277		/*
2278		 * Are we ready? We are, if all disks are connected or
2279		 * if we have any disks and 'force' is true.
2280		 */
2281		ndisks = g_mirror_ndisks(sc, -1);
2282		if (sc->sc_ndisks == ndisks || (force && ndisks > 0)) {
2283			;
2284		} else if (ndisks == 0) {
2285			/*
2286			 * Disks went down in starting phase, so destroy
2287			 * device.
2288			 */
2289			callout_drain(&sc->sc_callout);
2290			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2291			G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
2292			    sc->sc_rootmount);
2293			root_mount_rel(sc->sc_rootmount);
2294			sc->sc_rootmount = NULL;
2295			return;
2296		} else {
2297			return;
2298		}
2299
2300		/*
2301		 * Activate all disks with the biggest syncid.
2302		 */
2303		if (force) {
2304			/*
2305			 * If 'force' is true, we have been called due to
2306			 * timeout, so don't bother canceling timeout.
2307			 */
2308			ndisks = 0;
2309			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2310				if ((disk->d_flags &
2311				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) {
2312					ndisks++;
2313				}
2314			}
2315			if (ndisks == 0) {
2316				/* No valid disks found, destroy device. */
2317				sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2318				G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p",
2319				    __LINE__, sc->sc_rootmount);
2320				root_mount_rel(sc->sc_rootmount);
2321				sc->sc_rootmount = NULL;
2322				return;
2323			}
2324		} else {
2325			/* Cancel timeout. */
2326			callout_drain(&sc->sc_callout);
2327		}
2328
2329		/*
2330		 * Find the biggest genid.
2331		 */
2332		genid = 0;
2333		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2334			if (disk->d_genid > genid)
2335				genid = disk->d_genid;
2336		}
2337		sc->sc_genid = genid;
2338		/*
2339		 * Remove all disks without the biggest genid.
2340		 */
2341		LIST_FOREACH_SAFE(disk, &sc->sc_disks, d_next, tdisk) {
2342			if (disk->d_genid < genid) {
2343				G_MIRROR_DEBUG(0,
2344				    "Component %s (device %s) broken, skipping.",
2345				    g_mirror_get_diskname(disk), sc->sc_name);
2346				g_mirror_destroy_disk(disk);
2347			}
2348		}
2349
2350		/*
2351		 * Find the biggest syncid.
2352		 */
2353		syncid = 0;
2354		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2355			if (disk->d_sync.ds_syncid > syncid)
2356				syncid = disk->d_sync.ds_syncid;
2357		}
2358
2359		/*
2360		 * Here we need to look for dirty disks and if all disks
2361		 * with the biggest syncid are dirty, we have to choose
2362		 * one with the biggest priority and rebuild the rest.
2363		 */
2364		/*
2365		 * Find the number of dirty disks with the biggest syncid.
2366		 * Find the number of disks with the biggest syncid.
2367		 * While here, find a disk with the biggest priority.
2368		 */
2369		dirty = ndisks = 0;
2370		pdisk = NULL;
2371		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2372			if (disk->d_sync.ds_syncid != syncid)
2373				continue;
2374			if ((disk->d_flags &
2375			    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2376				continue;
2377			}
2378			ndisks++;
2379			if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
2380				dirty++;
2381				if (pdisk == NULL ||
2382				    pdisk->d_priority < disk->d_priority) {
2383					pdisk = disk;
2384				}
2385			}
2386		}
2387		if (dirty == 0) {
2388			/* No dirty disks at all, great. */
2389		} else if (dirty == ndisks) {
2390			/*
2391			 * Force synchronization for all dirty disks except one
2392			 * with the biggest priority.
2393			 */
2394			KASSERT(pdisk != NULL, ("pdisk == NULL"));
2395			G_MIRROR_DEBUG(1, "Using disk %s (device %s) as a "
2396			    "master disk for synchronization.",
2397			    g_mirror_get_diskname(pdisk), sc->sc_name);
2398			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2399				if (disk->d_sync.ds_syncid != syncid)
2400					continue;
2401				if ((disk->d_flags &
2402				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2403					continue;
2404				}
2405				KASSERT((disk->d_flags &
2406				    G_MIRROR_DISK_FLAG_DIRTY) != 0,
2407				    ("Disk %s isn't marked as dirty.",
2408				    g_mirror_get_diskname(disk)));
2409				/* Skip the disk with the biggest priority. */
2410				if (disk == pdisk)
2411					continue;
2412				disk->d_sync.ds_syncid = 0;
2413			}
2414		} else if (dirty < ndisks) {
2415			/*
2416			 * Force synchronization for all dirty disks.
2417			 * We have some non-dirty disks.
2418			 */
2419			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2420				if (disk->d_sync.ds_syncid != syncid)
2421					continue;
2422				if ((disk->d_flags &
2423				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2424					continue;
2425				}
2426				if ((disk->d_flags &
2427				    G_MIRROR_DISK_FLAG_DIRTY) == 0) {
2428					continue;
2429				}
2430				disk->d_sync.ds_syncid = 0;
2431			}
2432		}
2433
2434		/* Reset hint. */
2435		sc->sc_hint = NULL;
2436		sc->sc_syncid = syncid;
2437		if (force) {
2438			/* Remember to bump syncid on first write. */
2439			sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
2440		}
2441		state = G_MIRROR_DEVICE_STATE_RUNNING;
2442		G_MIRROR_DEBUG(1, "Device %s state changed from %s to %s.",
2443		    sc->sc_name, g_mirror_device_state2str(sc->sc_state),
2444		    g_mirror_device_state2str(state));
2445		sc->sc_state = state;
2446		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2447			state = g_mirror_determine_state(disk);
2448			g_mirror_event_send(disk, state,
2449			    G_MIRROR_EVENT_DONTWAIT);
2450			if (state == G_MIRROR_DISK_STATE_STALE)
2451				sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
2452		}
2453		break;
2454	    }
2455	case G_MIRROR_DEVICE_STATE_RUNNING:
2456		if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 0 &&
2457		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
2458			/*
2459			 * No active disks or no disks at all,
2460			 * so destroy device.
2461			 */
2462			if (sc->sc_provider != NULL)
2463				g_mirror_destroy_provider(sc);
2464			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2465			break;
2466		} else if (g_mirror_ndisks(sc,
2467		    G_MIRROR_DISK_STATE_ACTIVE) > 0 &&
2468		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
2469			/*
2470			 * We have active disks, launch provider if it doesn't
2471			 * exist.
2472			 */
2473			if (sc->sc_provider == NULL)
2474				g_mirror_launch_provider(sc);
2475			if (sc->sc_rootmount != NULL) {
2476				G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p",
2477				    __LINE__, sc->sc_rootmount);
2478				root_mount_rel(sc->sc_rootmount);
2479				sc->sc_rootmount = NULL;
2480			}
2481		}
2482		/*
2483		 * Genid should be bumped immediately, so do it here.
2484		 */
2485		if ((sc->sc_bump_id & G_MIRROR_BUMP_GENID) != 0) {
2486			sc->sc_bump_id &= ~G_MIRROR_BUMP_GENID;
2487			g_mirror_bump_genid(sc);
2488		}
2489		if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID_NOW) != 0) {
2490			sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID_NOW;
2491			g_mirror_bump_syncid(sc);
2492		}
2493		break;
2494	default:
2495		KASSERT(1 == 0, ("Wrong device state (%s, %s).",
2496		    sc->sc_name, g_mirror_device_state2str(sc->sc_state)));
2497		break;
2498	}
2499}
2500
2501/*
2502 * Update disk state and device state if needed.
2503 */
2504#define	DISK_STATE_CHANGED()	G_MIRROR_DEBUG(1,			\
2505	"Disk %s state changed from %s to %s (device %s).",		\
2506	g_mirror_get_diskname(disk),					\
2507	g_mirror_disk_state2str(disk->d_state),				\
2508	g_mirror_disk_state2str(state), sc->sc_name)
2509static int
2510g_mirror_update_disk(struct g_mirror_disk *disk, u_int state)
2511{
2512	struct g_mirror_softc *sc;
2513
2514	sc = disk->d_softc;
2515	sx_assert(&sc->sc_lock, SX_XLOCKED);
2516
2517again:
2518	G_MIRROR_DEBUG(3, "Changing disk %s state from %s to %s.",
2519	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(disk->d_state),
2520	    g_mirror_disk_state2str(state));
2521	switch (state) {
2522	case G_MIRROR_DISK_STATE_NEW:
2523		/*
2524		 * Possible scenarios:
2525		 * 1. New disk arrive.
2526		 */
2527		/* Previous state should be NONE. */
2528		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NONE,
2529		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2530		    g_mirror_disk_state2str(disk->d_state)));
2531		DISK_STATE_CHANGED();
2532
2533		disk->d_state = state;
2534		if (LIST_EMPTY(&sc->sc_disks))
2535			LIST_INSERT_HEAD(&sc->sc_disks, disk, d_next);
2536		else {
2537			struct g_mirror_disk *dp;
2538
2539			LIST_FOREACH(dp, &sc->sc_disks, d_next) {
2540				if (disk->d_priority >= dp->d_priority) {
2541					LIST_INSERT_BEFORE(dp, disk, d_next);
2542					dp = NULL;
2543					break;
2544				}
2545				if (LIST_NEXT(dp, d_next) == NULL)
2546					break;
2547			}
2548			if (dp != NULL)
2549				LIST_INSERT_AFTER(dp, disk, d_next);
2550		}
2551		G_MIRROR_DEBUG(1, "Device %s: provider %s detected.",
2552		    sc->sc_name, g_mirror_get_diskname(disk));
2553		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
2554			break;
2555		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2556		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2557		    g_mirror_device_state2str(sc->sc_state),
2558		    g_mirror_get_diskname(disk),
2559		    g_mirror_disk_state2str(disk->d_state)));
2560		state = g_mirror_determine_state(disk);
2561		if (state != G_MIRROR_DISK_STATE_NONE)
2562			goto again;
2563		break;
2564	case G_MIRROR_DISK_STATE_ACTIVE:
2565		/*
2566		 * Possible scenarios:
2567		 * 1. New disk does not need synchronization.
2568		 * 2. Synchronization process finished successfully.
2569		 */
2570		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2571		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2572		    g_mirror_device_state2str(sc->sc_state),
2573		    g_mirror_get_diskname(disk),
2574		    g_mirror_disk_state2str(disk->d_state)));
2575		/* Previous state should be NEW or SYNCHRONIZING. */
2576		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW ||
2577		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2578		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2579		    g_mirror_disk_state2str(disk->d_state)));
2580		DISK_STATE_CHANGED();
2581
2582		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
2583			disk->d_flags &= ~G_MIRROR_DISK_FLAG_SYNCHRONIZING;
2584			disk->d_flags &= ~G_MIRROR_DISK_FLAG_FORCE_SYNC;
2585			g_mirror_sync_stop(disk, 0);
2586		}
2587		disk->d_state = state;
2588		disk->d_sync.ds_offset = 0;
2589		disk->d_sync.ds_offset_done = 0;
2590		g_mirror_update_idle(sc, disk);
2591		g_mirror_update_metadata(disk);
2592		G_MIRROR_DEBUG(1, "Device %s: provider %s activated.",
2593		    sc->sc_name, g_mirror_get_diskname(disk));
2594		break;
2595	case G_MIRROR_DISK_STATE_STALE:
2596		/*
2597		 * Possible scenarios:
2598		 * 1. Stale disk was connected.
2599		 */
2600		/* Previous state should be NEW. */
2601		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2602		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2603		    g_mirror_disk_state2str(disk->d_state)));
2604		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2605		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2606		    g_mirror_device_state2str(sc->sc_state),
2607		    g_mirror_get_diskname(disk),
2608		    g_mirror_disk_state2str(disk->d_state)));
2609		/*
2610		 * STALE state is only possible if device is marked
2611		 * NOAUTOSYNC.
2612		 */
2613		KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) != 0,
2614		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2615		    g_mirror_device_state2str(sc->sc_state),
2616		    g_mirror_get_diskname(disk),
2617		    g_mirror_disk_state2str(disk->d_state)));
2618		DISK_STATE_CHANGED();
2619
2620		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2621		disk->d_state = state;
2622		g_mirror_update_metadata(disk);
2623		G_MIRROR_DEBUG(0, "Device %s: provider %s is stale.",
2624		    sc->sc_name, g_mirror_get_diskname(disk));
2625		break;
2626	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
2627		/*
2628		 * Possible scenarios:
2629		 * 1. Disk which needs synchronization was connected.
2630		 */
2631		/* Previous state should be NEW. */
2632		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2633		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2634		    g_mirror_disk_state2str(disk->d_state)));
2635		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2636		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2637		    g_mirror_device_state2str(sc->sc_state),
2638		    g_mirror_get_diskname(disk),
2639		    g_mirror_disk_state2str(disk->d_state)));
2640		DISK_STATE_CHANGED();
2641
2642		if (disk->d_state == G_MIRROR_DISK_STATE_NEW)
2643			disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2644		disk->d_state = state;
2645		if (sc->sc_provider != NULL) {
2646			g_mirror_sync_start(disk);
2647			g_mirror_update_metadata(disk);
2648		}
2649		break;
2650	case G_MIRROR_DISK_STATE_DISCONNECTED:
2651		/*
2652		 * Possible scenarios:
2653		 * 1. Device wasn't running yet, but disk disappear.
2654		 * 2. Disk was active and disapppear.
2655		 * 3. Disk disappear during synchronization process.
2656		 */
2657		if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING) {
2658			/*
2659			 * Previous state should be ACTIVE, STALE or
2660			 * SYNCHRONIZING.
2661			 */
2662			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
2663			    disk->d_state == G_MIRROR_DISK_STATE_STALE ||
2664			    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2665			    ("Wrong disk state (%s, %s).",
2666			    g_mirror_get_diskname(disk),
2667			    g_mirror_disk_state2str(disk->d_state)));
2668		} else if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING) {
2669			/* Previous state should be NEW. */
2670			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2671			    ("Wrong disk state (%s, %s).",
2672			    g_mirror_get_diskname(disk),
2673			    g_mirror_disk_state2str(disk->d_state)));
2674			/*
2675			 * Reset bumping syncid if disk disappeared in STARTING
2676			 * state.
2677			 */
2678			if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID) != 0)
2679				sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID;
2680#ifdef	INVARIANTS
2681		} else {
2682			KASSERT(1 == 0, ("Wrong device state (%s, %s, %s, %s).",
2683			    sc->sc_name,
2684			    g_mirror_device_state2str(sc->sc_state),
2685			    g_mirror_get_diskname(disk),
2686			    g_mirror_disk_state2str(disk->d_state)));
2687#endif
2688		}
2689		DISK_STATE_CHANGED();
2690		G_MIRROR_DEBUG(0, "Device %s: provider %s disconnected.",
2691		    sc->sc_name, g_mirror_get_diskname(disk));
2692
2693		g_mirror_destroy_disk(disk);
2694		break;
2695	case G_MIRROR_DISK_STATE_DESTROY:
2696	    {
2697		int error;
2698
2699		error = g_mirror_clear_metadata(disk);
2700		if (error != 0) {
2701			G_MIRROR_DEBUG(0,
2702			    "Device %s: failed to clear metadata on %s: %d.",
2703			    sc->sc_name, g_mirror_get_diskname(disk), error);
2704			break;
2705		}
2706		DISK_STATE_CHANGED();
2707		G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.",
2708		    sc->sc_name, g_mirror_get_diskname(disk));
2709
2710		g_mirror_destroy_disk(disk);
2711		sc->sc_ndisks--;
2712		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2713			g_mirror_update_metadata(disk);
2714		}
2715		break;
2716	    }
2717	default:
2718		KASSERT(1 == 0, ("Unknown state (%u).", state));
2719		break;
2720	}
2721	return (0);
2722}
2723#undef	DISK_STATE_CHANGED
2724
2725int
2726g_mirror_read_metadata(struct g_consumer *cp, struct g_mirror_metadata *md)
2727{
2728	struct g_provider *pp;
2729	u_char *buf;
2730	int error;
2731
2732	g_topology_assert();
2733
2734	error = g_access(cp, 1, 0, 0);
2735	if (error != 0)
2736		return (error);
2737	pp = cp->provider;
2738	g_topology_unlock();
2739	/* Metadata are stored on last sector. */
2740	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
2741	    &error);
2742	g_topology_lock();
2743	g_access(cp, -1, 0, 0);
2744	if (buf == NULL) {
2745		G_MIRROR_DEBUG(1, "Cannot read metadata from %s (error=%d).",
2746		    cp->provider->name, error);
2747		return (error);
2748	}
2749
2750	/* Decode metadata. */
2751	error = mirror_metadata_decode(buf, md);
2752	g_free(buf);
2753	if (strcmp(md->md_magic, G_MIRROR_MAGIC) != 0)
2754		return (EINVAL);
2755	if (md->md_version > G_MIRROR_VERSION) {
2756		G_MIRROR_DEBUG(0,
2757		    "Kernel module is too old to handle metadata from %s.",
2758		    cp->provider->name);
2759		return (EINVAL);
2760	}
2761	if (error != 0) {
2762		G_MIRROR_DEBUG(1, "MD5 metadata hash mismatch for provider %s.",
2763		    cp->provider->name);
2764		return (error);
2765	}
2766
2767	return (0);
2768}
2769
2770static int
2771g_mirror_check_metadata(struct g_mirror_softc *sc, struct g_provider *pp,
2772    struct g_mirror_metadata *md)
2773{
2774
2775	if (g_mirror_id2disk(sc, md->md_did) != NULL) {
2776		G_MIRROR_DEBUG(1, "Disk %s (id=%u) already exists, skipping.",
2777		    pp->name, md->md_did);
2778		return (EEXIST);
2779	}
2780	if (md->md_all != sc->sc_ndisks) {
2781		G_MIRROR_DEBUG(1,
2782		    "Invalid '%s' field on disk %s (device %s), skipping.",
2783		    "md_all", pp->name, sc->sc_name);
2784		return (EINVAL);
2785	}
2786	if (md->md_slice != sc->sc_slice) {
2787		G_MIRROR_DEBUG(1,
2788		    "Invalid '%s' field on disk %s (device %s), skipping.",
2789		    "md_slice", pp->name, sc->sc_name);
2790		return (EINVAL);
2791	}
2792	if (md->md_balance != sc->sc_balance) {
2793		G_MIRROR_DEBUG(1,
2794		    "Invalid '%s' field on disk %s (device %s), skipping.",
2795		    "md_balance", pp->name, sc->sc_name);
2796		return (EINVAL);
2797	}
2798#if 0
2799	if (md->md_mediasize != sc->sc_mediasize) {
2800		G_MIRROR_DEBUG(1,
2801		    "Invalid '%s' field on disk %s (device %s), skipping.",
2802		    "md_mediasize", pp->name, sc->sc_name);
2803		return (EINVAL);
2804	}
2805#endif
2806	if (sc->sc_mediasize > pp->mediasize) {
2807		G_MIRROR_DEBUG(1,
2808		    "Invalid size of disk %s (device %s), skipping.", pp->name,
2809		    sc->sc_name);
2810		return (EINVAL);
2811	}
2812	if (md->md_sectorsize != sc->sc_sectorsize) {
2813		G_MIRROR_DEBUG(1,
2814		    "Invalid '%s' field on disk %s (device %s), skipping.",
2815		    "md_sectorsize", pp->name, sc->sc_name);
2816		return (EINVAL);
2817	}
2818	if ((sc->sc_sectorsize % pp->sectorsize) != 0) {
2819		G_MIRROR_DEBUG(1,
2820		    "Invalid sector size of disk %s (device %s), skipping.",
2821		    pp->name, sc->sc_name);
2822		return (EINVAL);
2823	}
2824	if ((md->md_mflags & ~G_MIRROR_DEVICE_FLAG_MASK) != 0) {
2825		G_MIRROR_DEBUG(1,
2826		    "Invalid device flags on disk %s (device %s), skipping.",
2827		    pp->name, sc->sc_name);
2828		return (EINVAL);
2829	}
2830	if ((md->md_dflags & ~G_MIRROR_DISK_FLAG_MASK) != 0) {
2831		G_MIRROR_DEBUG(1,
2832		    "Invalid disk flags on disk %s (device %s), skipping.",
2833		    pp->name, sc->sc_name);
2834		return (EINVAL);
2835	}
2836	return (0);
2837}
2838
2839int
2840g_mirror_add_disk(struct g_mirror_softc *sc, struct g_provider *pp,
2841    struct g_mirror_metadata *md)
2842{
2843	struct g_mirror_disk *disk;
2844	int error;
2845
2846	g_topology_assert_not();
2847	G_MIRROR_DEBUG(2, "Adding disk %s.", pp->name);
2848
2849	error = g_mirror_check_metadata(sc, pp, md);
2850	if (error != 0)
2851		return (error);
2852	if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING &&
2853	    md->md_genid < sc->sc_genid) {
2854		G_MIRROR_DEBUG(0, "Component %s (device %s) broken, skipping.",
2855		    pp->name, sc->sc_name);
2856		return (EINVAL);
2857	}
2858	disk = g_mirror_init_disk(sc, pp, md, &error);
2859	if (disk == NULL)
2860		return (error);
2861	error = g_mirror_event_send(disk, G_MIRROR_DISK_STATE_NEW,
2862	    G_MIRROR_EVENT_WAIT);
2863	if (error != 0)
2864		return (error);
2865	if (md->md_version < G_MIRROR_VERSION) {
2866		G_MIRROR_DEBUG(0, "Upgrading metadata on %s (v%d->v%d).",
2867		    pp->name, md->md_version, G_MIRROR_VERSION);
2868		g_mirror_update_metadata(disk);
2869	}
2870	return (0);
2871}
2872
2873static void
2874g_mirror_destroy_delayed(void *arg, int flag)
2875{
2876	struct g_mirror_softc *sc;
2877	int error;
2878
2879	if (flag == EV_CANCEL) {
2880		G_MIRROR_DEBUG(1, "Destroying canceled.");
2881		return;
2882	}
2883	sc = arg;
2884	g_topology_unlock();
2885	sx_xlock(&sc->sc_lock);
2886	KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) == 0,
2887	    ("DESTROY flag set on %s.", sc->sc_name));
2888	KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROYING) != 0,
2889	    ("DESTROYING flag not set on %s.", sc->sc_name));
2890	G_MIRROR_DEBUG(1, "Destroying %s (delayed).", sc->sc_name);
2891	error = g_mirror_destroy(sc, G_MIRROR_DESTROY_SOFT);
2892	if (error != 0) {
2893		G_MIRROR_DEBUG(0, "Cannot destroy %s (error=%d).",
2894		    sc->sc_name, error);
2895		sx_xunlock(&sc->sc_lock);
2896	}
2897	g_topology_lock();
2898}
2899
2900static int
2901g_mirror_access(struct g_provider *pp, int acr, int acw, int ace)
2902{
2903	struct g_mirror_softc *sc;
2904	int error = 0;
2905
2906	g_topology_assert();
2907	G_MIRROR_DEBUG(2, "Access request for %s: r%dw%de%d.", pp->name, acr,
2908	    acw, ace);
2909
2910	sc = pp->private;
2911	KASSERT(sc != NULL, ("NULL softc (provider=%s).", pp->name));
2912
2913	g_topology_unlock();
2914	sx_xlock(&sc->sc_lock);
2915	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0 ||
2916	    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROYING) != 0 ||
2917	    LIST_EMPTY(&sc->sc_disks)) {
2918		if (acr > 0 || acw > 0 || ace > 0)
2919			error = ENXIO;
2920		goto end;
2921	}
2922	sc->sc_provider_open += acr + acw + ace;
2923	if (pp->acw + acw == 0)
2924		g_mirror_idle(sc, 0);
2925	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROYING) != 0 &&
2926	    sc->sc_provider_open == 0)
2927		g_post_event(g_mirror_destroy_delayed, sc, M_WAITOK, sc, NULL);
2928end:
2929	sx_xunlock(&sc->sc_lock);
2930	g_topology_lock();
2931	return (error);
2932}
2933
2934static struct g_geom *
2935g_mirror_create(struct g_class *mp, const struct g_mirror_metadata *md)
2936{
2937	struct g_mirror_softc *sc;
2938	struct g_geom *gp;
2939	int error, timeout;
2940
2941	g_topology_assert();
2942	G_MIRROR_DEBUG(1, "Creating device %s (id=%u).", md->md_name,
2943	    md->md_mid);
2944
2945	/* One disk is minimum. */
2946	if (md->md_all < 1)
2947		return (NULL);
2948	/*
2949	 * Action geom.
2950	 */
2951	gp = g_new_geomf(mp, "%s", md->md_name);
2952	sc = malloc(sizeof(*sc), M_MIRROR, M_WAITOK | M_ZERO);
2953	gp->start = g_mirror_start;
2954	gp->orphan = g_mirror_orphan;
2955	gp->access = g_mirror_access;
2956	gp->dumpconf = g_mirror_dumpconf;
2957
2958	sc->sc_id = md->md_mid;
2959	sc->sc_slice = md->md_slice;
2960	sc->sc_balance = md->md_balance;
2961	sc->sc_mediasize = md->md_mediasize;
2962	sc->sc_sectorsize = md->md_sectorsize;
2963	sc->sc_ndisks = md->md_all;
2964	sc->sc_flags = md->md_mflags;
2965	sc->sc_bump_id = 0;
2966	sc->sc_idle = 1;
2967	sc->sc_last_write = time_uptime;
2968	sc->sc_writes = 0;
2969	sc->sc_refcnt = 1;
2970	sx_init(&sc->sc_lock, "gmirror:lock");
2971	bioq_init(&sc->sc_queue);
2972	mtx_init(&sc->sc_queue_mtx, "gmirror:queue", NULL, MTX_DEF);
2973	bioq_init(&sc->sc_regular_delayed);
2974	bioq_init(&sc->sc_inflight);
2975	bioq_init(&sc->sc_sync_delayed);
2976	LIST_INIT(&sc->sc_disks);
2977	TAILQ_INIT(&sc->sc_events);
2978	mtx_init(&sc->sc_events_mtx, "gmirror:events", NULL, MTX_DEF);
2979	callout_init(&sc->sc_callout, 1);
2980	mtx_init(&sc->sc_done_mtx, "gmirror:done", NULL, MTX_DEF);
2981	sc->sc_state = G_MIRROR_DEVICE_STATE_STARTING;
2982	gp->softc = sc;
2983	sc->sc_geom = gp;
2984	sc->sc_provider = NULL;
2985	sc->sc_provider_open = 0;
2986	/*
2987	 * Synchronization geom.
2988	 */
2989	gp = g_new_geomf(mp, "%s.sync", md->md_name);
2990	gp->softc = sc;
2991	gp->orphan = g_mirror_orphan;
2992	sc->sc_sync.ds_geom = gp;
2993	sc->sc_sync.ds_ndisks = 0;
2994	error = kproc_create(g_mirror_worker, sc, &sc->sc_worker, 0, 0,
2995	    "g_mirror %s", md->md_name);
2996	if (error != 0) {
2997		G_MIRROR_DEBUG(1, "Cannot create kernel thread for %s.",
2998		    sc->sc_name);
2999		g_destroy_geom(sc->sc_sync.ds_geom);
3000		g_destroy_geom(sc->sc_geom);
3001		g_mirror_free_device(sc);
3002		return (NULL);
3003	}
3004
3005	G_MIRROR_DEBUG(1, "Device %s created (%u components, id=%u).",
3006	    sc->sc_name, sc->sc_ndisks, sc->sc_id);
3007
3008	sc->sc_rootmount = root_mount_hold("GMIRROR");
3009	G_MIRROR_DEBUG(1, "root_mount_hold %p", sc->sc_rootmount);
3010	/*
3011	 * Run timeout.
3012	 */
3013	timeout = g_mirror_timeout * hz;
3014	callout_reset(&sc->sc_callout, timeout, g_mirror_go, sc);
3015	return (sc->sc_geom);
3016}
3017
3018int
3019g_mirror_destroy(struct g_mirror_softc *sc, int how)
3020{
3021	struct g_mirror_disk *disk;
3022
3023	g_topology_assert_not();
3024	sx_assert(&sc->sc_lock, SX_XLOCKED);
3025
3026	if (sc->sc_provider_open != 0) {
3027		switch (how) {
3028		case G_MIRROR_DESTROY_SOFT:
3029			G_MIRROR_DEBUG(1,
3030			    "Device %s is still open (%d).", sc->sc_name,
3031			    sc->sc_provider_open);
3032			return (EBUSY);
3033		case G_MIRROR_DESTROY_DELAYED:
3034			G_MIRROR_DEBUG(1,
3035			    "Device %s will be destroyed on last close.",
3036			    sc->sc_name);
3037			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
3038				if (disk->d_state ==
3039				    G_MIRROR_DISK_STATE_SYNCHRONIZING) {
3040					g_mirror_sync_stop(disk, 1);
3041				}
3042			}
3043			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROYING;
3044			return (EBUSY);
3045		case G_MIRROR_DESTROY_HARD:
3046			G_MIRROR_DEBUG(1, "Device %s is still open, so it "
3047			    "can't be definitely removed.", sc->sc_name);
3048		}
3049	}
3050
3051	g_topology_lock();
3052	if (sc->sc_geom->softc == NULL) {
3053		g_topology_unlock();
3054		return (0);
3055	}
3056	sc->sc_geom->softc = NULL;
3057	sc->sc_sync.ds_geom->softc = NULL;
3058	g_topology_unlock();
3059
3060	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
3061	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_WAIT;
3062	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
3063	sx_xunlock(&sc->sc_lock);
3064	mtx_lock(&sc->sc_queue_mtx);
3065	wakeup(sc);
3066	mtx_unlock(&sc->sc_queue_mtx);
3067	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, &sc->sc_worker);
3068	while (sc->sc_worker != NULL)
3069		tsleep(&sc->sc_worker, PRIBIO, "m:destroy", hz / 5);
3070	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, &sc->sc_worker);
3071	sx_xlock(&sc->sc_lock);
3072	g_mirror_destroy_device(sc);
3073	return (0);
3074}
3075
3076static void
3077g_mirror_taste_orphan(struct g_consumer *cp)
3078{
3079
3080	KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
3081	    cp->provider->name));
3082}
3083
3084static struct g_geom *
3085g_mirror_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
3086{
3087	struct g_mirror_metadata md;
3088	struct g_mirror_softc *sc;
3089	struct g_consumer *cp;
3090	struct g_geom *gp;
3091	int error;
3092
3093	g_topology_assert();
3094	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
3095	G_MIRROR_DEBUG(2, "Tasting %s.", pp->name);
3096
3097	gp = g_new_geomf(mp, "mirror:taste");
3098	/*
3099	 * This orphan function should be never called.
3100	 */
3101	gp->orphan = g_mirror_taste_orphan;
3102	cp = g_new_consumer(gp);
3103	g_attach(cp, pp);
3104	error = g_mirror_read_metadata(cp, &md);
3105	g_detach(cp);
3106	g_destroy_consumer(cp);
3107	g_destroy_geom(gp);
3108	if (error != 0)
3109		return (NULL);
3110	gp = NULL;
3111
3112	if (md.md_provider[0] != '\0' &&
3113	    !g_compare_names(md.md_provider, pp->name))
3114		return (NULL);
3115	if (md.md_provsize != 0 && md.md_provsize != pp->mediasize)
3116		return (NULL);
3117	if ((md.md_dflags & G_MIRROR_DISK_FLAG_INACTIVE) != 0) {
3118		G_MIRROR_DEBUG(0,
3119		    "Device %s: provider %s marked as inactive, skipping.",
3120		    md.md_name, pp->name);
3121		return (NULL);
3122	}
3123	if (g_mirror_debug >= 2)
3124		mirror_metadata_dump(&md);
3125
3126	/*
3127	 * Let's check if device already exists.
3128	 */
3129	sc = NULL;
3130	LIST_FOREACH(gp, &mp->geom, geom) {
3131		sc = gp->softc;
3132		if (sc == NULL)
3133			continue;
3134		if (sc->sc_sync.ds_geom == gp)
3135			continue;
3136		if (strcmp(md.md_name, sc->sc_name) != 0)
3137			continue;
3138		if (md.md_mid != sc->sc_id) {
3139			G_MIRROR_DEBUG(0, "Device %s already configured.",
3140			    sc->sc_name);
3141			return (NULL);
3142		}
3143		break;
3144	}
3145	if (gp == NULL) {
3146		gp = g_mirror_create(mp, &md);
3147		if (gp == NULL) {
3148			G_MIRROR_DEBUG(0, "Cannot create device %s.",
3149			    md.md_name);
3150			return (NULL);
3151		}
3152		sc = gp->softc;
3153	}
3154	G_MIRROR_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
3155	g_topology_unlock();
3156	sx_xlock(&sc->sc_lock);
3157	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_TASTING;
3158	error = g_mirror_add_disk(sc, pp, &md);
3159	if (error != 0) {
3160		G_MIRROR_DEBUG(0, "Cannot add disk %s to %s (error=%d).",
3161		    pp->name, gp->name, error);
3162		if (LIST_EMPTY(&sc->sc_disks)) {
3163			g_cancel_event(sc);
3164			g_mirror_destroy(sc, G_MIRROR_DESTROY_HARD);
3165			g_topology_lock();
3166			return (NULL);
3167		}
3168		gp = NULL;
3169	}
3170	sc->sc_flags &= ~G_MIRROR_DEVICE_FLAG_TASTING;
3171	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
3172		g_mirror_destroy(sc, G_MIRROR_DESTROY_HARD);
3173		g_topology_lock();
3174		return (NULL);
3175	}
3176	sx_xunlock(&sc->sc_lock);
3177	g_topology_lock();
3178	return (gp);
3179}
3180
3181static void
3182g_mirror_resize(struct g_consumer *cp)
3183{
3184	struct g_mirror_disk *disk;
3185
3186	g_topology_assert();
3187	g_trace(G_T_TOPOLOGY, "%s(%s)", __func__, cp->provider->name);
3188
3189	disk = cp->private;
3190	if (disk == NULL)
3191		return;
3192	g_topology_unlock();
3193	g_mirror_update_metadata(disk);
3194	g_topology_lock();
3195}
3196
3197static int
3198g_mirror_destroy_geom(struct gctl_req *req __unused,
3199    struct g_class *mp __unused, struct g_geom *gp)
3200{
3201	struct g_mirror_softc *sc;
3202	int error;
3203
3204	g_topology_unlock();
3205	sc = gp->softc;
3206	sx_xlock(&sc->sc_lock);
3207	g_cancel_event(sc);
3208	error = g_mirror_destroy(gp->softc, G_MIRROR_DESTROY_SOFT);
3209	if (error != 0)
3210		sx_xunlock(&sc->sc_lock);
3211	g_topology_lock();
3212	return (error);
3213}
3214
3215static void
3216g_mirror_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
3217    struct g_consumer *cp, struct g_provider *pp)
3218{
3219	struct g_mirror_softc *sc;
3220
3221	g_topology_assert();
3222
3223	sc = gp->softc;
3224	if (sc == NULL)
3225		return;
3226	/* Skip synchronization geom. */
3227	if (gp == sc->sc_sync.ds_geom)
3228		return;
3229	if (pp != NULL) {
3230		/* Nothing here. */
3231	} else if (cp != NULL) {
3232		struct g_mirror_disk *disk;
3233
3234		disk = cp->private;
3235		if (disk == NULL)
3236			return;
3237		g_topology_unlock();
3238		sx_xlock(&sc->sc_lock);
3239		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)disk->d_id);
3240		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
3241			sbuf_printf(sb, "%s<Synchronized>", indent);
3242			if (disk->d_sync.ds_offset == 0)
3243				sbuf_printf(sb, "0%%");
3244			else {
3245				sbuf_printf(sb, "%u%%",
3246				    (u_int)((disk->d_sync.ds_offset * 100) /
3247				    sc->sc_provider->mediasize));
3248			}
3249			sbuf_printf(sb, "</Synchronized>\n");
3250			if (disk->d_sync.ds_offset > 0) {
3251				sbuf_printf(sb, "%s<BytesSynced>%jd"
3252				    "</BytesSynced>\n", indent,
3253				    (intmax_t)disk->d_sync.ds_offset);
3254			}
3255		}
3256		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent,
3257		    disk->d_sync.ds_syncid);
3258		sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent,
3259		    disk->d_genid);
3260		sbuf_printf(sb, "%s<Flags>", indent);
3261		if (disk->d_flags == 0)
3262			sbuf_printf(sb, "NONE");
3263		else {
3264			int first = 1;
3265
3266#define	ADD_FLAG(flag, name)	do {					\
3267	if ((disk->d_flags & (flag)) != 0) {				\
3268		if (!first)						\
3269			sbuf_printf(sb, ", ");				\
3270		else							\
3271			first = 0;					\
3272		sbuf_printf(sb, name);					\
3273	}								\
3274} while (0)
3275			ADD_FLAG(G_MIRROR_DISK_FLAG_DIRTY, "DIRTY");
3276			ADD_FLAG(G_MIRROR_DISK_FLAG_HARDCODED, "HARDCODED");
3277			ADD_FLAG(G_MIRROR_DISK_FLAG_INACTIVE, "INACTIVE");
3278			ADD_FLAG(G_MIRROR_DISK_FLAG_SYNCHRONIZING,
3279			    "SYNCHRONIZING");
3280			ADD_FLAG(G_MIRROR_DISK_FLAG_FORCE_SYNC, "FORCE_SYNC");
3281			ADD_FLAG(G_MIRROR_DISK_FLAG_BROKEN, "BROKEN");
3282#undef	ADD_FLAG
3283		}
3284		sbuf_printf(sb, "</Flags>\n");
3285		sbuf_printf(sb, "%s<Priority>%u</Priority>\n", indent,
3286		    disk->d_priority);
3287		sbuf_printf(sb, "%s<State>%s</State>\n", indent,
3288		    g_mirror_disk_state2str(disk->d_state));
3289		sx_xunlock(&sc->sc_lock);
3290		g_topology_lock();
3291	} else {
3292		g_topology_unlock();
3293		sx_xlock(&sc->sc_lock);
3294		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id);
3295		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, sc->sc_syncid);
3296		sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent, sc->sc_genid);
3297		sbuf_printf(sb, "%s<Flags>", indent);
3298		if (sc->sc_flags == 0)
3299			sbuf_printf(sb, "NONE");
3300		else {
3301			int first = 1;
3302
3303#define	ADD_FLAG(flag, name)	do {					\
3304	if ((sc->sc_flags & (flag)) != 0) {				\
3305		if (!first)						\
3306			sbuf_printf(sb, ", ");				\
3307		else							\
3308			first = 0;					\
3309		sbuf_printf(sb, name);					\
3310	}								\
3311} while (0)
3312			ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOFAILSYNC, "NOFAILSYNC");
3313			ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOAUTOSYNC, "NOAUTOSYNC");
3314#undef	ADD_FLAG
3315		}
3316		sbuf_printf(sb, "</Flags>\n");
3317		sbuf_printf(sb, "%s<Slice>%u</Slice>\n", indent,
3318		    (u_int)sc->sc_slice);
3319		sbuf_printf(sb, "%s<Balance>%s</Balance>\n", indent,
3320		    balance_name(sc->sc_balance));
3321		sbuf_printf(sb, "%s<Components>%u</Components>\n", indent,
3322		    sc->sc_ndisks);
3323		sbuf_printf(sb, "%s<State>", indent);
3324		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
3325			sbuf_printf(sb, "%s", "STARTING");
3326		else if (sc->sc_ndisks ==
3327		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE))
3328			sbuf_printf(sb, "%s", "COMPLETE");
3329		else
3330			sbuf_printf(sb, "%s", "DEGRADED");
3331		sbuf_printf(sb, "</State>\n");
3332		sx_xunlock(&sc->sc_lock);
3333		g_topology_lock();
3334	}
3335}
3336
3337static void
3338g_mirror_shutdown_post_sync(void *arg, int howto)
3339{
3340	struct g_class *mp;
3341	struct g_geom *gp, *gp2;
3342	struct g_mirror_softc *sc;
3343	int error;
3344
3345	mp = arg;
3346	DROP_GIANT();
3347	g_topology_lock();
3348	g_mirror_shutdown = 1;
3349	LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
3350		if ((sc = gp->softc) == NULL)
3351			continue;
3352		/* Skip synchronization geom. */
3353		if (gp == sc->sc_sync.ds_geom)
3354			continue;
3355		g_topology_unlock();
3356		sx_xlock(&sc->sc_lock);
3357		g_mirror_idle(sc, -1);
3358		g_cancel_event(sc);
3359		error = g_mirror_destroy(sc, G_MIRROR_DESTROY_DELAYED);
3360		if (error != 0)
3361			sx_xunlock(&sc->sc_lock);
3362		g_topology_lock();
3363	}
3364	g_topology_unlock();
3365	PICKUP_GIANT();
3366}
3367
3368static void
3369g_mirror_init(struct g_class *mp)
3370{
3371
3372	g_mirror_post_sync = EVENTHANDLER_REGISTER(shutdown_post_sync,
3373	    g_mirror_shutdown_post_sync, mp, SHUTDOWN_PRI_FIRST);
3374	if (g_mirror_post_sync == NULL)
3375		G_MIRROR_DEBUG(0, "Warning! Cannot register shutdown event.");
3376}
3377
3378static void
3379g_mirror_fini(struct g_class *mp)
3380{
3381
3382	if (g_mirror_post_sync != NULL)
3383		EVENTHANDLER_DEREGISTER(shutdown_post_sync, g_mirror_post_sync);
3384}
3385
3386DECLARE_GEOM_CLASS(g_mirror_class, g_mirror);
3387