g_mirror.c revision 324401
1/*-
2 * Copyright (c) 2004-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: stable/11/sys/geom/mirror/g_mirror.c 324401 2017-10-07 22:59:09Z ngie $");
29
30#include <sys/param.h>
31#include <sys/systm.h>
32#include <sys/kernel.h>
33#include <sys/module.h>
34#include <sys/limits.h>
35#include <sys/lock.h>
36#include <sys/mutex.h>
37#include <sys/bio.h>
38#include <sys/sbuf.h>
39#include <sys/sysctl.h>
40#include <sys/malloc.h>
41#include <sys/eventhandler.h>
42#include <vm/uma.h>
43#include <geom/geom.h>
44#include <sys/proc.h>
45#include <sys/kthread.h>
46#include <sys/sched.h>
47#include <geom/mirror/g_mirror.h>
48
49FEATURE(geom_mirror, "GEOM mirroring support");
50
51static MALLOC_DEFINE(M_MIRROR, "mirror_data", "GEOM_MIRROR Data");
52
53SYSCTL_DECL(_kern_geom);
54static SYSCTL_NODE(_kern_geom, OID_AUTO, mirror, CTLFLAG_RW, 0,
55    "GEOM_MIRROR stuff");
56u_int g_mirror_debug = 0;
57SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, debug, CTLFLAG_RWTUN, &g_mirror_debug, 0,
58    "Debug level");
59static u_int g_mirror_timeout = 4;
60SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, timeout, CTLFLAG_RWTUN, &g_mirror_timeout,
61    0, "Time to wait on all mirror components");
62static u_int g_mirror_idletime = 5;
63SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, idletime, CTLFLAG_RWTUN,
64    &g_mirror_idletime, 0, "Mark components as clean when idling");
65static u_int g_mirror_disconnect_on_failure = 1;
66SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, disconnect_on_failure, CTLFLAG_RWTUN,
67    &g_mirror_disconnect_on_failure, 0, "Disconnect component on I/O failure.");
68static u_int g_mirror_syncreqs = 2;
69SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, sync_requests, CTLFLAG_RDTUN,
70    &g_mirror_syncreqs, 0, "Parallel synchronization I/O requests.");
71
72#define	MSLEEP(ident, mtx, priority, wmesg, timeout)	do {		\
73	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, (ident));	\
74	msleep((ident), (mtx), (priority), (wmesg), (timeout));		\
75	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, (ident));	\
76} while (0)
77
78static eventhandler_tag g_mirror_post_sync = NULL;
79static int g_mirror_shutdown = 0;
80
81static g_ctl_destroy_geom_t g_mirror_destroy_geom;
82static g_taste_t g_mirror_taste;
83static g_init_t g_mirror_init;
84static g_fini_t g_mirror_fini;
85static g_provgone_t g_mirror_providergone;
86static g_resize_t g_mirror_resize;
87
88struct g_class g_mirror_class = {
89	.name = G_MIRROR_CLASS_NAME,
90	.version = G_VERSION,
91	.ctlreq = g_mirror_config,
92	.taste = g_mirror_taste,
93	.destroy_geom = g_mirror_destroy_geom,
94	.init = g_mirror_init,
95	.fini = g_mirror_fini,
96	.providergone = g_mirror_providergone,
97	.resize = g_mirror_resize
98};
99
100
101static void g_mirror_destroy_provider(struct g_mirror_softc *sc);
102static int g_mirror_update_disk(struct g_mirror_disk *disk, u_int state);
103static void g_mirror_update_device(struct g_mirror_softc *sc, bool force);
104static void g_mirror_dumpconf(struct sbuf *sb, const char *indent,
105    struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp);
106static void g_mirror_sync_stop(struct g_mirror_disk *disk, int type);
107static void g_mirror_register_request(struct bio *bp);
108static void g_mirror_sync_release(struct g_mirror_softc *sc);
109
110
111static const char *
112g_mirror_disk_state2str(int state)
113{
114
115	switch (state) {
116	case G_MIRROR_DISK_STATE_NONE:
117		return ("NONE");
118	case G_MIRROR_DISK_STATE_NEW:
119		return ("NEW");
120	case G_MIRROR_DISK_STATE_ACTIVE:
121		return ("ACTIVE");
122	case G_MIRROR_DISK_STATE_STALE:
123		return ("STALE");
124	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
125		return ("SYNCHRONIZING");
126	case G_MIRROR_DISK_STATE_DISCONNECTED:
127		return ("DISCONNECTED");
128	case G_MIRROR_DISK_STATE_DESTROY:
129		return ("DESTROY");
130	default:
131		return ("INVALID");
132	}
133}
134
135static const char *
136g_mirror_device_state2str(int state)
137{
138
139	switch (state) {
140	case G_MIRROR_DEVICE_STATE_STARTING:
141		return ("STARTING");
142	case G_MIRROR_DEVICE_STATE_RUNNING:
143		return ("RUNNING");
144	default:
145		return ("INVALID");
146	}
147}
148
149static const char *
150g_mirror_get_diskname(struct g_mirror_disk *disk)
151{
152
153	if (disk->d_consumer == NULL || disk->d_consumer->provider == NULL)
154		return ("[unknown]");
155	return (disk->d_name);
156}
157
158/*
159 * --- Events handling functions ---
160 * Events in geom_mirror are used to maintain disks and device status
161 * from one thread to simplify locking.
162 */
163static void
164g_mirror_event_free(struct g_mirror_event *ep)
165{
166
167	free(ep, M_MIRROR);
168}
169
170int
171g_mirror_event_send(void *arg, int state, int flags)
172{
173	struct g_mirror_softc *sc;
174	struct g_mirror_disk *disk;
175	struct g_mirror_event *ep;
176	int error;
177
178	ep = malloc(sizeof(*ep), M_MIRROR, M_WAITOK);
179	G_MIRROR_DEBUG(4, "%s: Sending event %p.", __func__, ep);
180	if ((flags & G_MIRROR_EVENT_DEVICE) != 0) {
181		disk = NULL;
182		sc = arg;
183	} else {
184		disk = arg;
185		sc = disk->d_softc;
186	}
187	ep->e_disk = disk;
188	ep->e_state = state;
189	ep->e_flags = flags;
190	ep->e_error = 0;
191	mtx_lock(&sc->sc_events_mtx);
192	TAILQ_INSERT_TAIL(&sc->sc_events, ep, e_next);
193	mtx_unlock(&sc->sc_events_mtx);
194	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
195	mtx_lock(&sc->sc_queue_mtx);
196	wakeup(sc);
197	mtx_unlock(&sc->sc_queue_mtx);
198	if ((flags & G_MIRROR_EVENT_DONTWAIT) != 0)
199		return (0);
200	sx_assert(&sc->sc_lock, SX_XLOCKED);
201	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, ep);
202	sx_xunlock(&sc->sc_lock);
203	while ((ep->e_flags & G_MIRROR_EVENT_DONE) == 0) {
204		mtx_lock(&sc->sc_events_mtx);
205		MSLEEP(ep, &sc->sc_events_mtx, PRIBIO | PDROP, "m:event",
206		    hz * 5);
207	}
208	error = ep->e_error;
209	g_mirror_event_free(ep);
210	sx_xlock(&sc->sc_lock);
211	return (error);
212}
213
214static struct g_mirror_event *
215g_mirror_event_get(struct g_mirror_softc *sc)
216{
217	struct g_mirror_event *ep;
218
219	mtx_lock(&sc->sc_events_mtx);
220	ep = TAILQ_FIRST(&sc->sc_events);
221	mtx_unlock(&sc->sc_events_mtx);
222	return (ep);
223}
224
225static void
226g_mirror_event_remove(struct g_mirror_softc *sc, struct g_mirror_event *ep)
227{
228
229	mtx_lock(&sc->sc_events_mtx);
230	TAILQ_REMOVE(&sc->sc_events, ep, e_next);
231	mtx_unlock(&sc->sc_events_mtx);
232}
233
234static void
235g_mirror_event_cancel(struct g_mirror_disk *disk)
236{
237	struct g_mirror_softc *sc;
238	struct g_mirror_event *ep, *tmpep;
239
240	sc = disk->d_softc;
241	sx_assert(&sc->sc_lock, SX_XLOCKED);
242
243	mtx_lock(&sc->sc_events_mtx);
244	TAILQ_FOREACH_SAFE(ep, &sc->sc_events, e_next, tmpep) {
245		if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0)
246			continue;
247		if (ep->e_disk != disk)
248			continue;
249		TAILQ_REMOVE(&sc->sc_events, ep, e_next);
250		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
251			g_mirror_event_free(ep);
252		else {
253			ep->e_error = ECANCELED;
254			wakeup(ep);
255		}
256	}
257	mtx_unlock(&sc->sc_events_mtx);
258}
259
260/*
261 * Return the number of disks in given state.
262 * If state is equal to -1, count all connected disks.
263 */
264u_int
265g_mirror_ndisks(struct g_mirror_softc *sc, int state)
266{
267	struct g_mirror_disk *disk;
268	u_int n = 0;
269
270	sx_assert(&sc->sc_lock, SX_LOCKED);
271
272	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
273		if (state == -1 || disk->d_state == state)
274			n++;
275	}
276	return (n);
277}
278
279/*
280 * Find a disk in mirror by its disk ID.
281 */
282static struct g_mirror_disk *
283g_mirror_id2disk(struct g_mirror_softc *sc, uint32_t id)
284{
285	struct g_mirror_disk *disk;
286
287	sx_assert(&sc->sc_lock, SX_XLOCKED);
288
289	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
290		if (disk->d_id == id)
291			return (disk);
292	}
293	return (NULL);
294}
295
296static u_int
297g_mirror_nrequests(struct g_mirror_softc *sc, struct g_consumer *cp)
298{
299	struct bio *bp;
300	u_int nreqs = 0;
301
302	mtx_lock(&sc->sc_queue_mtx);
303	TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) {
304		if (bp->bio_from == cp)
305			nreqs++;
306	}
307	mtx_unlock(&sc->sc_queue_mtx);
308	return (nreqs);
309}
310
311static int
312g_mirror_is_busy(struct g_mirror_softc *sc, struct g_consumer *cp)
313{
314
315	if (cp->index > 0) {
316		G_MIRROR_DEBUG(2,
317		    "I/O requests for %s exist, can't destroy it now.",
318		    cp->provider->name);
319		return (1);
320	}
321	if (g_mirror_nrequests(sc, cp) > 0) {
322		G_MIRROR_DEBUG(2,
323		    "I/O requests for %s in queue, can't destroy it now.",
324		    cp->provider->name);
325		return (1);
326	}
327	return (0);
328}
329
330static void
331g_mirror_destroy_consumer(void *arg, int flags __unused)
332{
333	struct g_consumer *cp;
334
335	g_topology_assert();
336
337	cp = arg;
338	G_MIRROR_DEBUG(1, "Consumer %s destroyed.", cp->provider->name);
339	g_detach(cp);
340	g_destroy_consumer(cp);
341}
342
343static void
344g_mirror_kill_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
345{
346	struct g_provider *pp;
347	int retaste_wait;
348
349	g_topology_assert();
350
351	cp->private = NULL;
352	if (g_mirror_is_busy(sc, cp))
353		return;
354	pp = cp->provider;
355	retaste_wait = 0;
356	if (cp->acw == 1) {
357		if ((pp->geom->flags & G_GEOM_WITHER) == 0)
358			retaste_wait = 1;
359	}
360	G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", pp->name, -cp->acr,
361	    -cp->acw, -cp->ace, 0);
362	if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
363		g_access(cp, -cp->acr, -cp->acw, -cp->ace);
364	if (retaste_wait) {
365		/*
366		 * After retaste event was send (inside g_access()), we can send
367		 * event to detach and destroy consumer.
368		 * A class, which has consumer to the given provider connected
369		 * will not receive retaste event for the provider.
370		 * This is the way how I ignore retaste events when I close
371		 * consumers opened for write: I detach and destroy consumer
372		 * after retaste event is sent.
373		 */
374		g_post_event(g_mirror_destroy_consumer, cp, M_WAITOK, NULL);
375		return;
376	}
377	G_MIRROR_DEBUG(1, "Consumer %s destroyed.", pp->name);
378	g_detach(cp);
379	g_destroy_consumer(cp);
380}
381
382static int
383g_mirror_connect_disk(struct g_mirror_disk *disk, struct g_provider *pp)
384{
385	struct g_consumer *cp;
386	int error;
387
388	g_topology_assert_not();
389	KASSERT(disk->d_consumer == NULL,
390	    ("Disk already connected (device %s).", disk->d_softc->sc_name));
391
392	g_topology_lock();
393	cp = g_new_consumer(disk->d_softc->sc_geom);
394	cp->flags |= G_CF_DIRECT_RECEIVE;
395	error = g_attach(cp, pp);
396	if (error != 0) {
397		g_destroy_consumer(cp);
398		g_topology_unlock();
399		return (error);
400	}
401	error = g_access(cp, 1, 1, 1);
402	if (error != 0) {
403		g_detach(cp);
404		g_destroy_consumer(cp);
405		g_topology_unlock();
406		G_MIRROR_DEBUG(0, "Cannot open consumer %s (error=%d).",
407		    pp->name, error);
408		return (error);
409	}
410	g_topology_unlock();
411	disk->d_consumer = cp;
412	disk->d_consumer->private = disk;
413	disk->d_consumer->index = 0;
414
415	G_MIRROR_DEBUG(2, "Disk %s connected.", g_mirror_get_diskname(disk));
416	return (0);
417}
418
419static void
420g_mirror_disconnect_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
421{
422
423	g_topology_assert();
424
425	if (cp == NULL)
426		return;
427	if (cp->provider != NULL)
428		g_mirror_kill_consumer(sc, cp);
429	else
430		g_destroy_consumer(cp);
431}
432
433/*
434 * Initialize disk. This means allocate memory, create consumer, attach it
435 * to the provider and open access (r1w1e1) to it.
436 */
437static struct g_mirror_disk *
438g_mirror_init_disk(struct g_mirror_softc *sc, struct g_provider *pp,
439    struct g_mirror_metadata *md, int *errorp)
440{
441	struct g_mirror_disk *disk;
442	int i, error;
443
444	disk = malloc(sizeof(*disk), M_MIRROR, M_NOWAIT | M_ZERO);
445	if (disk == NULL) {
446		error = ENOMEM;
447		goto fail;
448	}
449	disk->d_softc = sc;
450	error = g_mirror_connect_disk(disk, pp);
451	if (error != 0)
452		goto fail;
453	disk->d_id = md->md_did;
454	disk->d_state = G_MIRROR_DISK_STATE_NONE;
455	disk->d_priority = md->md_priority;
456	disk->d_flags = md->md_dflags;
457	error = g_getattr("GEOM::candelete", disk->d_consumer, &i);
458	if (error == 0 && i != 0)
459		disk->d_flags |= G_MIRROR_DISK_FLAG_CANDELETE;
460	if (md->md_provider[0] != '\0')
461		disk->d_flags |= G_MIRROR_DISK_FLAG_HARDCODED;
462	disk->d_sync.ds_consumer = NULL;
463	disk->d_sync.ds_offset = md->md_sync_offset;
464	disk->d_sync.ds_offset_done = md->md_sync_offset;
465	disk->d_genid = md->md_genid;
466	disk->d_sync.ds_syncid = md->md_syncid;
467	if (errorp != NULL)
468		*errorp = 0;
469	return (disk);
470fail:
471	if (errorp != NULL)
472		*errorp = error;
473	if (disk != NULL)
474		free(disk, M_MIRROR);
475	return (NULL);
476}
477
478static void
479g_mirror_destroy_disk(struct g_mirror_disk *disk)
480{
481	struct g_mirror_softc *sc;
482
483	g_topology_assert_not();
484	sc = disk->d_softc;
485	sx_assert(&sc->sc_lock, SX_XLOCKED);
486
487	LIST_REMOVE(disk, d_next);
488	g_mirror_event_cancel(disk);
489	if (sc->sc_hint == disk)
490		sc->sc_hint = NULL;
491	switch (disk->d_state) {
492	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
493		g_mirror_sync_stop(disk, 1);
494		/* FALLTHROUGH */
495	case G_MIRROR_DISK_STATE_NEW:
496	case G_MIRROR_DISK_STATE_STALE:
497	case G_MIRROR_DISK_STATE_ACTIVE:
498		g_topology_lock();
499		g_mirror_disconnect_consumer(sc, disk->d_consumer);
500		g_topology_unlock();
501		free(disk, M_MIRROR);
502		break;
503	default:
504		KASSERT(0 == 1, ("Wrong disk state (%s, %s).",
505		    g_mirror_get_diskname(disk),
506		    g_mirror_disk_state2str(disk->d_state)));
507	}
508}
509
510static void
511g_mirror_free_device(struct g_mirror_softc *sc)
512{
513
514	mtx_destroy(&sc->sc_queue_mtx);
515	mtx_destroy(&sc->sc_events_mtx);
516	mtx_destroy(&sc->sc_done_mtx);
517	sx_destroy(&sc->sc_lock);
518	free(sc, M_MIRROR);
519}
520
521static void
522g_mirror_providergone(struct g_provider *pp)
523{
524	struct g_mirror_softc *sc = pp->private;
525
526	if ((--sc->sc_refcnt) == 0)
527		g_mirror_free_device(sc);
528}
529
530static void
531g_mirror_destroy_device(struct g_mirror_softc *sc)
532{
533	struct g_mirror_disk *disk;
534	struct g_mirror_event *ep;
535	struct g_geom *gp;
536	struct g_consumer *cp, *tmpcp;
537
538	g_topology_assert_not();
539	sx_assert(&sc->sc_lock, SX_XLOCKED);
540
541	gp = sc->sc_geom;
542	if (sc->sc_provider != NULL)
543		g_mirror_destroy_provider(sc);
544	for (disk = LIST_FIRST(&sc->sc_disks); disk != NULL;
545	    disk = LIST_FIRST(&sc->sc_disks)) {
546		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
547		g_mirror_update_metadata(disk);
548		g_mirror_destroy_disk(disk);
549	}
550	while ((ep = g_mirror_event_get(sc)) != NULL) {
551		g_mirror_event_remove(sc, ep);
552		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
553			g_mirror_event_free(ep);
554		else {
555			ep->e_error = ECANCELED;
556			ep->e_flags |= G_MIRROR_EVENT_DONE;
557			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, ep);
558			mtx_lock(&sc->sc_events_mtx);
559			wakeup(ep);
560			mtx_unlock(&sc->sc_events_mtx);
561		}
562	}
563	callout_drain(&sc->sc_callout);
564
565	g_topology_lock();
566	LIST_FOREACH_SAFE(cp, &sc->sc_sync.ds_geom->consumer, consumer, tmpcp) {
567		g_mirror_disconnect_consumer(sc, cp);
568	}
569	g_wither_geom(sc->sc_sync.ds_geom, ENXIO);
570	G_MIRROR_DEBUG(0, "Device %s destroyed.", gp->name);
571	g_wither_geom(gp, ENXIO);
572	sx_xunlock(&sc->sc_lock);
573	if ((--sc->sc_refcnt) == 0)
574		g_mirror_free_device(sc);
575	g_topology_unlock();
576}
577
578static void
579g_mirror_orphan(struct g_consumer *cp)
580{
581	struct g_mirror_disk *disk;
582
583	g_topology_assert();
584
585	disk = cp->private;
586	if (disk == NULL)
587		return;
588	disk->d_softc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
589	g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
590	    G_MIRROR_EVENT_DONTWAIT);
591}
592
593/*
594 * Function should return the next active disk on the list.
595 * It is possible that it will be the same disk as given.
596 * If there are no active disks on list, NULL is returned.
597 */
598static __inline struct g_mirror_disk *
599g_mirror_find_next(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
600{
601	struct g_mirror_disk *dp;
602
603	for (dp = LIST_NEXT(disk, d_next); dp != disk;
604	    dp = LIST_NEXT(dp, d_next)) {
605		if (dp == NULL)
606			dp = LIST_FIRST(&sc->sc_disks);
607		if (dp->d_state == G_MIRROR_DISK_STATE_ACTIVE)
608			break;
609	}
610	if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
611		return (NULL);
612	return (dp);
613}
614
615static struct g_mirror_disk *
616g_mirror_get_disk(struct g_mirror_softc *sc)
617{
618	struct g_mirror_disk *disk;
619
620	if (sc->sc_hint == NULL) {
621		sc->sc_hint = LIST_FIRST(&sc->sc_disks);
622		if (sc->sc_hint == NULL)
623			return (NULL);
624	}
625	disk = sc->sc_hint;
626	if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) {
627		disk = g_mirror_find_next(sc, disk);
628		if (disk == NULL)
629			return (NULL);
630	}
631	sc->sc_hint = g_mirror_find_next(sc, disk);
632	return (disk);
633}
634
635static int
636g_mirror_write_metadata(struct g_mirror_disk *disk,
637    struct g_mirror_metadata *md)
638{
639	struct g_mirror_softc *sc;
640	struct g_consumer *cp;
641	off_t offset, length;
642	u_char *sector;
643	int error = 0;
644
645	g_topology_assert_not();
646	sc = disk->d_softc;
647	sx_assert(&sc->sc_lock, SX_LOCKED);
648
649	cp = disk->d_consumer;
650	KASSERT(cp != NULL, ("NULL consumer (%s).", sc->sc_name));
651	KASSERT(cp->provider != NULL, ("NULL provider (%s).", sc->sc_name));
652	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
653	    ("Consumer %s closed? (r%dw%de%d).", cp->provider->name, cp->acr,
654	    cp->acw, cp->ace));
655	length = cp->provider->sectorsize;
656	offset = cp->provider->mediasize - length;
657	sector = malloc((size_t)length, M_MIRROR, M_WAITOK | M_ZERO);
658	if (md != NULL &&
659	    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_WIPE) == 0) {
660		/*
661		 * Handle the case, when the size of parent provider reduced.
662		 */
663		if (offset < md->md_mediasize)
664			error = ENOSPC;
665		else
666			mirror_metadata_encode(md, sector);
667	}
668	if (error == 0)
669		error = g_write_data(cp, offset, sector, length);
670	free(sector, M_MIRROR);
671	if (error != 0) {
672		if ((disk->d_flags & G_MIRROR_DISK_FLAG_BROKEN) == 0) {
673			disk->d_flags |= G_MIRROR_DISK_FLAG_BROKEN;
674			G_MIRROR_DEBUG(0, "Cannot write metadata on %s "
675			    "(device=%s, error=%d).",
676			    g_mirror_get_diskname(disk), sc->sc_name, error);
677		} else {
678			G_MIRROR_DEBUG(1, "Cannot write metadata on %s "
679			    "(device=%s, error=%d).",
680			    g_mirror_get_diskname(disk), sc->sc_name, error);
681		}
682		if (g_mirror_disconnect_on_failure &&
683		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 1) {
684			sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
685			g_mirror_event_send(disk,
686			    G_MIRROR_DISK_STATE_DISCONNECTED,
687			    G_MIRROR_EVENT_DONTWAIT);
688		}
689	}
690	return (error);
691}
692
693static int
694g_mirror_clear_metadata(struct g_mirror_disk *disk)
695{
696	int error;
697
698	g_topology_assert_not();
699	sx_assert(&disk->d_softc->sc_lock, SX_LOCKED);
700
701	if (disk->d_softc->sc_type != G_MIRROR_TYPE_AUTOMATIC)
702		return (0);
703	error = g_mirror_write_metadata(disk, NULL);
704	if (error == 0) {
705		G_MIRROR_DEBUG(2, "Metadata on %s cleared.",
706		    g_mirror_get_diskname(disk));
707	} else {
708		G_MIRROR_DEBUG(0,
709		    "Cannot clear metadata on disk %s (error=%d).",
710		    g_mirror_get_diskname(disk), error);
711	}
712	return (error);
713}
714
715void
716g_mirror_fill_metadata(struct g_mirror_softc *sc, struct g_mirror_disk *disk,
717    struct g_mirror_metadata *md)
718{
719
720	strlcpy(md->md_magic, G_MIRROR_MAGIC, sizeof(md->md_magic));
721	md->md_version = G_MIRROR_VERSION;
722	strlcpy(md->md_name, sc->sc_name, sizeof(md->md_name));
723	md->md_mid = sc->sc_id;
724	md->md_all = sc->sc_ndisks;
725	md->md_slice = sc->sc_slice;
726	md->md_balance = sc->sc_balance;
727	md->md_genid = sc->sc_genid;
728	md->md_mediasize = sc->sc_mediasize;
729	md->md_sectorsize = sc->sc_sectorsize;
730	md->md_mflags = (sc->sc_flags & G_MIRROR_DEVICE_FLAG_MASK);
731	bzero(md->md_provider, sizeof(md->md_provider));
732	if (disk == NULL) {
733		md->md_did = arc4random();
734		md->md_priority = 0;
735		md->md_syncid = 0;
736		md->md_dflags = 0;
737		md->md_sync_offset = 0;
738		md->md_provsize = 0;
739	} else {
740		md->md_did = disk->d_id;
741		md->md_priority = disk->d_priority;
742		md->md_syncid = disk->d_sync.ds_syncid;
743		md->md_dflags = (disk->d_flags & G_MIRROR_DISK_FLAG_MASK);
744		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
745			md->md_sync_offset = disk->d_sync.ds_offset_done;
746		else
747			md->md_sync_offset = 0;
748		if ((disk->d_flags & G_MIRROR_DISK_FLAG_HARDCODED) != 0) {
749			strlcpy(md->md_provider,
750			    disk->d_consumer->provider->name,
751			    sizeof(md->md_provider));
752		}
753		md->md_provsize = disk->d_consumer->provider->mediasize;
754	}
755}
756
757void
758g_mirror_update_metadata(struct g_mirror_disk *disk)
759{
760	struct g_mirror_softc *sc;
761	struct g_mirror_metadata md;
762	int error;
763
764	g_topology_assert_not();
765	sc = disk->d_softc;
766	sx_assert(&sc->sc_lock, SX_LOCKED);
767
768	if (sc->sc_type != G_MIRROR_TYPE_AUTOMATIC)
769		return;
770	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_WIPE) == 0)
771		g_mirror_fill_metadata(sc, disk, &md);
772	error = g_mirror_write_metadata(disk, &md);
773	if (error == 0) {
774		G_MIRROR_DEBUG(2, "Metadata on %s updated.",
775		    g_mirror_get_diskname(disk));
776	} else {
777		G_MIRROR_DEBUG(0,
778		    "Cannot update metadata on disk %s (error=%d).",
779		    g_mirror_get_diskname(disk), error);
780	}
781}
782
783static void
784g_mirror_bump_syncid(struct g_mirror_softc *sc)
785{
786	struct g_mirror_disk *disk;
787
788	g_topology_assert_not();
789	sx_assert(&sc->sc_lock, SX_XLOCKED);
790	KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
791	    ("%s called with no active disks (device=%s).", __func__,
792	    sc->sc_name));
793
794	sc->sc_syncid++;
795	G_MIRROR_DEBUG(1, "Device %s: syncid bumped to %u.", sc->sc_name,
796	    sc->sc_syncid);
797	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
798		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
799		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
800			disk->d_sync.ds_syncid = sc->sc_syncid;
801			g_mirror_update_metadata(disk);
802		}
803	}
804}
805
806static void
807g_mirror_bump_genid(struct g_mirror_softc *sc)
808{
809	struct g_mirror_disk *disk;
810
811	g_topology_assert_not();
812	sx_assert(&sc->sc_lock, SX_XLOCKED);
813	KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
814	    ("%s called with no active disks (device=%s).", __func__,
815	    sc->sc_name));
816
817	sc->sc_genid++;
818	G_MIRROR_DEBUG(1, "Device %s: genid bumped to %u.", sc->sc_name,
819	    sc->sc_genid);
820	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
821		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
822		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
823			disk->d_genid = sc->sc_genid;
824			g_mirror_update_metadata(disk);
825		}
826	}
827}
828
829static int
830g_mirror_idle(struct g_mirror_softc *sc, int acw)
831{
832	struct g_mirror_disk *disk;
833	int timeout;
834
835	g_topology_assert_not();
836	sx_assert(&sc->sc_lock, SX_XLOCKED);
837
838	if (sc->sc_provider == NULL)
839		return (0);
840	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
841		return (0);
842	if (sc->sc_idle)
843		return (0);
844	if (sc->sc_writes > 0)
845		return (0);
846	if (acw > 0 || (acw == -1 && sc->sc_provider->acw > 0)) {
847		timeout = g_mirror_idletime - (time_uptime - sc->sc_last_write);
848		if (!g_mirror_shutdown && timeout > 0)
849			return (timeout);
850	}
851	sc->sc_idle = 1;
852	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
853		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
854			continue;
855		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as clean.",
856		    g_mirror_get_diskname(disk), sc->sc_name);
857		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
858		g_mirror_update_metadata(disk);
859	}
860	return (0);
861}
862
863static void
864g_mirror_unidle(struct g_mirror_softc *sc)
865{
866	struct g_mirror_disk *disk;
867
868	g_topology_assert_not();
869	sx_assert(&sc->sc_lock, SX_XLOCKED);
870
871	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
872		return;
873	sc->sc_idle = 0;
874	sc->sc_last_write = time_uptime;
875	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
876		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
877			continue;
878		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as dirty.",
879		    g_mirror_get_diskname(disk), sc->sc_name);
880		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
881		g_mirror_update_metadata(disk);
882	}
883}
884
885static void
886g_mirror_flush_done(struct bio *bp)
887{
888	struct g_mirror_softc *sc;
889	struct bio *pbp;
890
891	pbp = bp->bio_parent;
892	sc = pbp->bio_to->private;
893	mtx_lock(&sc->sc_done_mtx);
894	if (pbp->bio_error == 0)
895		pbp->bio_error = bp->bio_error;
896	pbp->bio_completed += bp->bio_completed;
897	pbp->bio_inbed++;
898	if (pbp->bio_children == pbp->bio_inbed) {
899		mtx_unlock(&sc->sc_done_mtx);
900		g_io_deliver(pbp, pbp->bio_error);
901	} else
902		mtx_unlock(&sc->sc_done_mtx);
903	g_destroy_bio(bp);
904}
905
906static void
907g_mirror_done(struct bio *bp)
908{
909	struct g_mirror_softc *sc;
910
911	sc = bp->bio_from->geom->softc;
912	bp->bio_cflags = G_MIRROR_BIO_FLAG_REGULAR;
913	mtx_lock(&sc->sc_queue_mtx);
914	bioq_insert_tail(&sc->sc_queue, bp);
915	mtx_unlock(&sc->sc_queue_mtx);
916	wakeup(sc);
917}
918
919static void
920g_mirror_regular_request(struct bio *bp)
921{
922	struct g_mirror_softc *sc;
923	struct g_mirror_disk *disk;
924	struct bio *pbp;
925
926	g_topology_assert_not();
927
928	pbp = bp->bio_parent;
929	sc = pbp->bio_to->private;
930	bp->bio_from->index--;
931	if (bp->bio_cmd == BIO_WRITE)
932		sc->sc_writes--;
933	disk = bp->bio_from->private;
934	if (disk == NULL) {
935		g_topology_lock();
936		g_mirror_kill_consumer(sc, bp->bio_from);
937		g_topology_unlock();
938	}
939
940	pbp->bio_inbed++;
941	KASSERT(pbp->bio_inbed <= pbp->bio_children,
942	    ("bio_inbed (%u) is bigger than bio_children (%u).", pbp->bio_inbed,
943	    pbp->bio_children));
944	if (bp->bio_error == 0 && pbp->bio_error == 0) {
945		G_MIRROR_LOGREQ(3, bp, "Request delivered.");
946		g_destroy_bio(bp);
947		if (pbp->bio_children == pbp->bio_inbed) {
948			G_MIRROR_LOGREQ(3, pbp, "Request delivered.");
949			pbp->bio_completed = pbp->bio_length;
950			if (pbp->bio_cmd == BIO_WRITE ||
951			    pbp->bio_cmd == BIO_DELETE) {
952				bioq_remove(&sc->sc_inflight, pbp);
953				/* Release delayed sync requests if possible. */
954				g_mirror_sync_release(sc);
955			}
956			g_io_deliver(pbp, pbp->bio_error);
957		}
958		return;
959	} else if (bp->bio_error != 0) {
960		if (pbp->bio_error == 0)
961			pbp->bio_error = bp->bio_error;
962		if (disk != NULL) {
963			if ((disk->d_flags & G_MIRROR_DISK_FLAG_BROKEN) == 0) {
964				disk->d_flags |= G_MIRROR_DISK_FLAG_BROKEN;
965				G_MIRROR_LOGREQ(0, bp,
966				    "Request failed (error=%d).",
967				    bp->bio_error);
968			} else {
969				G_MIRROR_LOGREQ(1, bp,
970				    "Request failed (error=%d).",
971				    bp->bio_error);
972			}
973			if (g_mirror_disconnect_on_failure &&
974			    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 1)
975			{
976				sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
977				g_mirror_event_send(disk,
978				    G_MIRROR_DISK_STATE_DISCONNECTED,
979				    G_MIRROR_EVENT_DONTWAIT);
980			}
981		}
982		switch (pbp->bio_cmd) {
983		case BIO_DELETE:
984		case BIO_WRITE:
985			pbp->bio_inbed--;
986			pbp->bio_children--;
987			break;
988		}
989	}
990	g_destroy_bio(bp);
991
992	switch (pbp->bio_cmd) {
993	case BIO_READ:
994		if (pbp->bio_inbed < pbp->bio_children)
995			break;
996		if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 1)
997			g_io_deliver(pbp, pbp->bio_error);
998		else {
999			pbp->bio_error = 0;
1000			mtx_lock(&sc->sc_queue_mtx);
1001			bioq_insert_tail(&sc->sc_queue, pbp);
1002			mtx_unlock(&sc->sc_queue_mtx);
1003			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
1004			wakeup(sc);
1005		}
1006		break;
1007	case BIO_DELETE:
1008	case BIO_WRITE:
1009		if (pbp->bio_children == 0) {
1010			/*
1011			 * All requests failed.
1012			 */
1013		} else if (pbp->bio_inbed < pbp->bio_children) {
1014			/* Do nothing. */
1015			break;
1016		} else if (pbp->bio_children == pbp->bio_inbed) {
1017			/* Some requests succeeded. */
1018			pbp->bio_error = 0;
1019			pbp->bio_completed = pbp->bio_length;
1020		}
1021		bioq_remove(&sc->sc_inflight, pbp);
1022		/* Release delayed sync requests if possible. */
1023		g_mirror_sync_release(sc);
1024		g_io_deliver(pbp, pbp->bio_error);
1025		break;
1026	default:
1027		KASSERT(1 == 0, ("Invalid request: %u.", pbp->bio_cmd));
1028		break;
1029	}
1030}
1031
1032static void
1033g_mirror_sync_done(struct bio *bp)
1034{
1035	struct g_mirror_softc *sc;
1036
1037	G_MIRROR_LOGREQ(3, bp, "Synchronization request delivered.");
1038	sc = bp->bio_from->geom->softc;
1039	bp->bio_cflags = G_MIRROR_BIO_FLAG_SYNC;
1040	mtx_lock(&sc->sc_queue_mtx);
1041	bioq_insert_tail(&sc->sc_queue, bp);
1042	mtx_unlock(&sc->sc_queue_mtx);
1043	wakeup(sc);
1044}
1045
1046static void
1047g_mirror_candelete(struct bio *bp)
1048{
1049	struct g_mirror_softc *sc;
1050	struct g_mirror_disk *disk;
1051	int *val;
1052
1053	sc = bp->bio_to->private;
1054	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1055		if (disk->d_flags & G_MIRROR_DISK_FLAG_CANDELETE)
1056			break;
1057	}
1058	val = (int *)bp->bio_data;
1059	*val = (disk != NULL);
1060	g_io_deliver(bp, 0);
1061}
1062
1063static void
1064g_mirror_kernel_dump(struct bio *bp)
1065{
1066	struct g_mirror_softc *sc;
1067	struct g_mirror_disk *disk;
1068	struct bio *cbp;
1069	struct g_kerneldump *gkd;
1070
1071	/*
1072	 * We configure dumping to the first component, because this component
1073	 * will be used for reading with 'prefer' balance algorithm.
1074	 * If the component with the highest priority is currently disconnected
1075	 * we will not be able to read the dump after the reboot if it will be
1076	 * connected and synchronized later. Can we do something better?
1077	 */
1078	sc = bp->bio_to->private;
1079	disk = LIST_FIRST(&sc->sc_disks);
1080
1081	gkd = (struct g_kerneldump *)bp->bio_data;
1082	if (gkd->length > bp->bio_to->mediasize)
1083		gkd->length = bp->bio_to->mediasize;
1084	cbp = g_clone_bio(bp);
1085	if (cbp == NULL) {
1086		g_io_deliver(bp, ENOMEM);
1087		return;
1088	}
1089	cbp->bio_done = g_std_done;
1090	g_io_request(cbp, disk->d_consumer);
1091	G_MIRROR_DEBUG(1, "Kernel dump will go to %s.",
1092	    g_mirror_get_diskname(disk));
1093}
1094
1095static void
1096g_mirror_flush(struct g_mirror_softc *sc, struct bio *bp)
1097{
1098	struct bio_queue_head queue;
1099	struct g_mirror_disk *disk;
1100	struct g_consumer *cp;
1101	struct bio *cbp;
1102
1103	bioq_init(&queue);
1104	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1105		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1106			continue;
1107		cbp = g_clone_bio(bp);
1108		if (cbp == NULL) {
1109			while ((cbp = bioq_takefirst(&queue)) != NULL)
1110				g_destroy_bio(cbp);
1111			if (bp->bio_error == 0)
1112				bp->bio_error = ENOMEM;
1113			g_io_deliver(bp, bp->bio_error);
1114			return;
1115		}
1116		bioq_insert_tail(&queue, cbp);
1117		cbp->bio_done = g_mirror_flush_done;
1118		cbp->bio_caller1 = disk;
1119		cbp->bio_to = disk->d_consumer->provider;
1120	}
1121	while ((cbp = bioq_takefirst(&queue)) != NULL) {
1122		G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1123		disk = cbp->bio_caller1;
1124		cbp->bio_caller1 = NULL;
1125		cp = disk->d_consumer;
1126		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1127		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1128		    cp->acr, cp->acw, cp->ace));
1129		g_io_request(cbp, disk->d_consumer);
1130	}
1131}
1132
1133static void
1134g_mirror_start(struct bio *bp)
1135{
1136	struct g_mirror_softc *sc;
1137
1138	sc = bp->bio_to->private;
1139	/*
1140	 * If sc == NULL or there are no valid disks, provider's error
1141	 * should be set and g_mirror_start() should not be called at all.
1142	 */
1143	KASSERT(sc != NULL && sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
1144	    ("Provider's error should be set (error=%d)(mirror=%s).",
1145	    bp->bio_to->error, bp->bio_to->name));
1146	G_MIRROR_LOGREQ(3, bp, "Request received.");
1147
1148	switch (bp->bio_cmd) {
1149	case BIO_READ:
1150	case BIO_WRITE:
1151	case BIO_DELETE:
1152		break;
1153	case BIO_FLUSH:
1154		g_mirror_flush(sc, bp);
1155		return;
1156	case BIO_GETATTR:
1157		if (!strcmp(bp->bio_attribute, "GEOM::candelete")) {
1158			g_mirror_candelete(bp);
1159			return;
1160		} else if (strcmp("GEOM::kerneldump", bp->bio_attribute) == 0) {
1161			g_mirror_kernel_dump(bp);
1162			return;
1163		}
1164		/* FALLTHROUGH */
1165	default:
1166		g_io_deliver(bp, EOPNOTSUPP);
1167		return;
1168	}
1169	mtx_lock(&sc->sc_queue_mtx);
1170	bioq_insert_tail(&sc->sc_queue, bp);
1171	mtx_unlock(&sc->sc_queue_mtx);
1172	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
1173	wakeup(sc);
1174}
1175
1176/*
1177 * Return TRUE if the given request is colliding with a in-progress
1178 * synchronization request.
1179 */
1180static int
1181g_mirror_sync_collision(struct g_mirror_softc *sc, struct bio *bp)
1182{
1183	struct g_mirror_disk *disk;
1184	struct bio *sbp;
1185	off_t rstart, rend, sstart, send;
1186	u_int i;
1187
1188	if (sc->sc_sync.ds_ndisks == 0)
1189		return (0);
1190	rstart = bp->bio_offset;
1191	rend = bp->bio_offset + bp->bio_length;
1192	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1193		if (disk->d_state != G_MIRROR_DISK_STATE_SYNCHRONIZING)
1194			continue;
1195		for (i = 0; i < g_mirror_syncreqs; i++) {
1196			sbp = disk->d_sync.ds_bios[i];
1197			if (sbp == NULL)
1198				continue;
1199			sstart = sbp->bio_offset;
1200			send = sbp->bio_offset + sbp->bio_length;
1201			if (rend > sstart && rstart < send)
1202				return (1);
1203		}
1204	}
1205	return (0);
1206}
1207
1208/*
1209 * Return TRUE if the given sync request is colliding with a in-progress regular
1210 * request.
1211 */
1212static int
1213g_mirror_regular_collision(struct g_mirror_softc *sc, struct bio *sbp)
1214{
1215	off_t rstart, rend, sstart, send;
1216	struct bio *bp;
1217
1218	if (sc->sc_sync.ds_ndisks == 0)
1219		return (0);
1220	sstart = sbp->bio_offset;
1221	send = sbp->bio_offset + sbp->bio_length;
1222	TAILQ_FOREACH(bp, &sc->sc_inflight.queue, bio_queue) {
1223		rstart = bp->bio_offset;
1224		rend = bp->bio_offset + bp->bio_length;
1225		if (rend > sstart && rstart < send)
1226			return (1);
1227	}
1228	return (0);
1229}
1230
1231/*
1232 * Puts request onto delayed queue.
1233 */
1234static void
1235g_mirror_regular_delay(struct g_mirror_softc *sc, struct bio *bp)
1236{
1237
1238	G_MIRROR_LOGREQ(2, bp, "Delaying request.");
1239	bioq_insert_head(&sc->sc_regular_delayed, bp);
1240}
1241
1242/*
1243 * Puts synchronization request onto delayed queue.
1244 */
1245static void
1246g_mirror_sync_delay(struct g_mirror_softc *sc, struct bio *bp)
1247{
1248
1249	G_MIRROR_LOGREQ(2, bp, "Delaying synchronization request.");
1250	bioq_insert_tail(&sc->sc_sync_delayed, bp);
1251}
1252
1253/*
1254 * Releases delayed regular requests which don't collide anymore with sync
1255 * requests.
1256 */
1257static void
1258g_mirror_regular_release(struct g_mirror_softc *sc)
1259{
1260	struct bio *bp, *bp2;
1261
1262	TAILQ_FOREACH_SAFE(bp, &sc->sc_regular_delayed.queue, bio_queue, bp2) {
1263		if (g_mirror_sync_collision(sc, bp))
1264			continue;
1265		bioq_remove(&sc->sc_regular_delayed, bp);
1266		G_MIRROR_LOGREQ(2, bp, "Releasing delayed request (%p).", bp);
1267		mtx_lock(&sc->sc_queue_mtx);
1268		bioq_insert_head(&sc->sc_queue, bp);
1269#if 0
1270		/*
1271		 * wakeup() is not needed, because this function is called from
1272		 * the worker thread.
1273		 */
1274		wakeup(&sc->sc_queue);
1275#endif
1276		mtx_unlock(&sc->sc_queue_mtx);
1277	}
1278}
1279
1280/*
1281 * Releases delayed sync requests which don't collide anymore with regular
1282 * requests.
1283 */
1284static void
1285g_mirror_sync_release(struct g_mirror_softc *sc)
1286{
1287	struct bio *bp, *bp2;
1288
1289	TAILQ_FOREACH_SAFE(bp, &sc->sc_sync_delayed.queue, bio_queue, bp2) {
1290		if (g_mirror_regular_collision(sc, bp))
1291			continue;
1292		bioq_remove(&sc->sc_sync_delayed, bp);
1293		G_MIRROR_LOGREQ(2, bp,
1294		    "Releasing delayed synchronization request.");
1295		g_io_request(bp, bp->bio_from);
1296	}
1297}
1298
1299/*
1300 * Handle synchronization requests.
1301 * Every synchronization request is two-steps process: first, READ request is
1302 * send to active provider and then WRITE request (with read data) to the provider
1303 * being synchronized. When WRITE is finished, new synchronization request is
1304 * send.
1305 */
1306static void
1307g_mirror_sync_request(struct bio *bp)
1308{
1309	struct g_mirror_softc *sc;
1310	struct g_mirror_disk *disk;
1311
1312	bp->bio_from->index--;
1313	sc = bp->bio_from->geom->softc;
1314	disk = bp->bio_from->private;
1315	if (disk == NULL) {
1316		sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */
1317		g_topology_lock();
1318		g_mirror_kill_consumer(sc, bp->bio_from);
1319		g_topology_unlock();
1320		free(bp->bio_data, M_MIRROR);
1321		g_destroy_bio(bp);
1322		sx_xlock(&sc->sc_lock);
1323		return;
1324	}
1325
1326	/*
1327	 * Synchronization request.
1328	 */
1329	switch (bp->bio_cmd) {
1330	case BIO_READ:
1331	    {
1332		struct g_consumer *cp;
1333
1334		if (bp->bio_error != 0) {
1335			G_MIRROR_LOGREQ(0, bp,
1336			    "Synchronization request failed (error=%d).",
1337			    bp->bio_error);
1338			g_destroy_bio(bp);
1339			return;
1340		}
1341		G_MIRROR_LOGREQ(3, bp,
1342		    "Synchronization request half-finished.");
1343		bp->bio_cmd = BIO_WRITE;
1344		bp->bio_cflags = 0;
1345		cp = disk->d_consumer;
1346		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1347		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1348		    cp->acr, cp->acw, cp->ace));
1349		cp->index++;
1350		g_io_request(bp, cp);
1351		return;
1352	    }
1353	case BIO_WRITE:
1354	    {
1355		struct g_mirror_disk_sync *sync;
1356		off_t offset;
1357		void *data;
1358		int i;
1359
1360		if (bp->bio_error != 0) {
1361			G_MIRROR_LOGREQ(0, bp,
1362			    "Synchronization request failed (error=%d).",
1363			    bp->bio_error);
1364			g_destroy_bio(bp);
1365			sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
1366			g_mirror_event_send(disk,
1367			    G_MIRROR_DISK_STATE_DISCONNECTED,
1368			    G_MIRROR_EVENT_DONTWAIT);
1369			return;
1370		}
1371		G_MIRROR_LOGREQ(3, bp, "Synchronization request finished.");
1372		sync = &disk->d_sync;
1373		if (sync->ds_offset >= sc->sc_mediasize ||
1374		    sync->ds_consumer == NULL ||
1375		    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1376			/* Don't send more synchronization requests. */
1377			sync->ds_inflight--;
1378			if (sync->ds_bios != NULL) {
1379				i = (int)(uintptr_t)bp->bio_caller1;
1380				sync->ds_bios[i] = NULL;
1381			}
1382			free(bp->bio_data, M_MIRROR);
1383			g_destroy_bio(bp);
1384			if (sync->ds_inflight > 0)
1385				return;
1386			if (sync->ds_consumer == NULL ||
1387			    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1388				return;
1389			}
1390			/* Disk up-to-date, activate it. */
1391			g_mirror_event_send(disk, G_MIRROR_DISK_STATE_ACTIVE,
1392			    G_MIRROR_EVENT_DONTWAIT);
1393			return;
1394		}
1395
1396		/* Send next synchronization request. */
1397		data = bp->bio_data;
1398		g_reset_bio(bp);
1399		bp->bio_cmd = BIO_READ;
1400		bp->bio_offset = sync->ds_offset;
1401		bp->bio_length = MIN(MAXPHYS, sc->sc_mediasize - bp->bio_offset);
1402		sync->ds_offset += bp->bio_length;
1403		bp->bio_done = g_mirror_sync_done;
1404		bp->bio_data = data;
1405		bp->bio_from = sync->ds_consumer;
1406		bp->bio_to = sc->sc_provider;
1407		G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
1408		sync->ds_consumer->index++;
1409		/*
1410		 * Delay the request if it is colliding with a regular request.
1411		 */
1412		if (g_mirror_regular_collision(sc, bp))
1413			g_mirror_sync_delay(sc, bp);
1414		else
1415			g_io_request(bp, sync->ds_consumer);
1416
1417		/* Release delayed requests if possible. */
1418		g_mirror_regular_release(sc);
1419
1420		/* Find the smallest offset */
1421		offset = sc->sc_mediasize;
1422		for (i = 0; i < g_mirror_syncreqs; i++) {
1423			bp = sync->ds_bios[i];
1424			if (bp->bio_offset < offset)
1425				offset = bp->bio_offset;
1426		}
1427		if (sync->ds_offset_done + (MAXPHYS * 100) < offset) {
1428			/* Update offset_done on every 100 blocks. */
1429			sync->ds_offset_done = offset;
1430			g_mirror_update_metadata(disk);
1431		}
1432		return;
1433	    }
1434	default:
1435		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
1436		    bp->bio_cmd, sc->sc_name));
1437		break;
1438	}
1439}
1440
1441static void
1442g_mirror_request_prefer(struct g_mirror_softc *sc, struct bio *bp)
1443{
1444	struct g_mirror_disk *disk;
1445	struct g_consumer *cp;
1446	struct bio *cbp;
1447
1448	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1449		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE)
1450			break;
1451	}
1452	if (disk == NULL) {
1453		if (bp->bio_error == 0)
1454			bp->bio_error = ENXIO;
1455		g_io_deliver(bp, bp->bio_error);
1456		return;
1457	}
1458	cbp = g_clone_bio(bp);
1459	if (cbp == NULL) {
1460		if (bp->bio_error == 0)
1461			bp->bio_error = ENOMEM;
1462		g_io_deliver(bp, bp->bio_error);
1463		return;
1464	}
1465	/*
1466	 * Fill in the component buf structure.
1467	 */
1468	cp = disk->d_consumer;
1469	cbp->bio_done = g_mirror_done;
1470	cbp->bio_to = cp->provider;
1471	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1472	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1473	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1474	    cp->acw, cp->ace));
1475	cp->index++;
1476	g_io_request(cbp, cp);
1477}
1478
1479static void
1480g_mirror_request_round_robin(struct g_mirror_softc *sc, struct bio *bp)
1481{
1482	struct g_mirror_disk *disk;
1483	struct g_consumer *cp;
1484	struct bio *cbp;
1485
1486	disk = g_mirror_get_disk(sc);
1487	if (disk == NULL) {
1488		if (bp->bio_error == 0)
1489			bp->bio_error = ENXIO;
1490		g_io_deliver(bp, bp->bio_error);
1491		return;
1492	}
1493	cbp = g_clone_bio(bp);
1494	if (cbp == NULL) {
1495		if (bp->bio_error == 0)
1496			bp->bio_error = ENOMEM;
1497		g_io_deliver(bp, bp->bio_error);
1498		return;
1499	}
1500	/*
1501	 * Fill in the component buf structure.
1502	 */
1503	cp = disk->d_consumer;
1504	cbp->bio_done = g_mirror_done;
1505	cbp->bio_to = cp->provider;
1506	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1507	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1508	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1509	    cp->acw, cp->ace));
1510	cp->index++;
1511	g_io_request(cbp, cp);
1512}
1513
1514#define TRACK_SIZE  (1 * 1024 * 1024)
1515#define LOAD_SCALE	256
1516#define ABS(x)		(((x) >= 0) ? (x) : (-(x)))
1517
1518static void
1519g_mirror_request_load(struct g_mirror_softc *sc, struct bio *bp)
1520{
1521	struct g_mirror_disk *disk, *dp;
1522	struct g_consumer *cp;
1523	struct bio *cbp;
1524	int prio, best;
1525
1526	/* Find a disk with the smallest load. */
1527	disk = NULL;
1528	best = INT_MAX;
1529	LIST_FOREACH(dp, &sc->sc_disks, d_next) {
1530		if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1531			continue;
1532		prio = dp->load;
1533		/* If disk head is precisely in position - highly prefer it. */
1534		if (dp->d_last_offset == bp->bio_offset)
1535			prio -= 2 * LOAD_SCALE;
1536		else
1537		/* If disk head is close to position - prefer it. */
1538		if (ABS(dp->d_last_offset - bp->bio_offset) < TRACK_SIZE)
1539			prio -= 1 * LOAD_SCALE;
1540		if (prio <= best) {
1541			disk = dp;
1542			best = prio;
1543		}
1544	}
1545	KASSERT(disk != NULL, ("NULL disk for %s.", sc->sc_name));
1546	cbp = g_clone_bio(bp);
1547	if (cbp == NULL) {
1548		if (bp->bio_error == 0)
1549			bp->bio_error = ENOMEM;
1550		g_io_deliver(bp, bp->bio_error);
1551		return;
1552	}
1553	/*
1554	 * Fill in the component buf structure.
1555	 */
1556	cp = disk->d_consumer;
1557	cbp->bio_done = g_mirror_done;
1558	cbp->bio_to = cp->provider;
1559	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1560	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1561	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1562	    cp->acw, cp->ace));
1563	cp->index++;
1564	/* Remember last head position */
1565	disk->d_last_offset = bp->bio_offset + bp->bio_length;
1566	/* Update loads. */
1567	LIST_FOREACH(dp, &sc->sc_disks, d_next) {
1568		dp->load = (dp->d_consumer->index * LOAD_SCALE +
1569		    dp->load * 7) / 8;
1570	}
1571	g_io_request(cbp, cp);
1572}
1573
1574static void
1575g_mirror_request_split(struct g_mirror_softc *sc, struct bio *bp)
1576{
1577	struct bio_queue_head queue;
1578	struct g_mirror_disk *disk;
1579	struct g_consumer *cp;
1580	struct bio *cbp;
1581	off_t left, mod, offset, slice;
1582	u_char *data;
1583	u_int ndisks;
1584
1585	if (bp->bio_length <= sc->sc_slice) {
1586		g_mirror_request_round_robin(sc, bp);
1587		return;
1588	}
1589	ndisks = g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE);
1590	slice = bp->bio_length / ndisks;
1591	mod = slice % sc->sc_provider->sectorsize;
1592	if (mod != 0)
1593		slice += sc->sc_provider->sectorsize - mod;
1594	/*
1595	 * Allocate all bios before sending any request, so we can
1596	 * return ENOMEM in nice and clean way.
1597	 */
1598	left = bp->bio_length;
1599	offset = bp->bio_offset;
1600	data = bp->bio_data;
1601	bioq_init(&queue);
1602	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1603		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1604			continue;
1605		cbp = g_clone_bio(bp);
1606		if (cbp == NULL) {
1607			while ((cbp = bioq_takefirst(&queue)) != NULL)
1608				g_destroy_bio(cbp);
1609			if (bp->bio_error == 0)
1610				bp->bio_error = ENOMEM;
1611			g_io_deliver(bp, bp->bio_error);
1612			return;
1613		}
1614		bioq_insert_tail(&queue, cbp);
1615		cbp->bio_done = g_mirror_done;
1616		cbp->bio_caller1 = disk;
1617		cbp->bio_to = disk->d_consumer->provider;
1618		cbp->bio_offset = offset;
1619		cbp->bio_data = data;
1620		cbp->bio_length = MIN(left, slice);
1621		left -= cbp->bio_length;
1622		if (left == 0)
1623			break;
1624		offset += cbp->bio_length;
1625		data += cbp->bio_length;
1626	}
1627	while ((cbp = bioq_takefirst(&queue)) != NULL) {
1628		G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1629		disk = cbp->bio_caller1;
1630		cbp->bio_caller1 = NULL;
1631		cp = disk->d_consumer;
1632		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1633		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1634		    cp->acr, cp->acw, cp->ace));
1635		disk->d_consumer->index++;
1636		g_io_request(cbp, disk->d_consumer);
1637	}
1638}
1639
1640static void
1641g_mirror_register_request(struct bio *bp)
1642{
1643	struct g_mirror_softc *sc;
1644
1645	sc = bp->bio_to->private;
1646	switch (bp->bio_cmd) {
1647	case BIO_READ:
1648		switch (sc->sc_balance) {
1649		case G_MIRROR_BALANCE_LOAD:
1650			g_mirror_request_load(sc, bp);
1651			break;
1652		case G_MIRROR_BALANCE_PREFER:
1653			g_mirror_request_prefer(sc, bp);
1654			break;
1655		case G_MIRROR_BALANCE_ROUND_ROBIN:
1656			g_mirror_request_round_robin(sc, bp);
1657			break;
1658		case G_MIRROR_BALANCE_SPLIT:
1659			g_mirror_request_split(sc, bp);
1660			break;
1661		}
1662		return;
1663	case BIO_WRITE:
1664	case BIO_DELETE:
1665	    {
1666		struct g_mirror_disk *disk;
1667		struct g_mirror_disk_sync *sync;
1668		struct bio_queue_head queue;
1669		struct g_consumer *cp;
1670		struct bio *cbp;
1671
1672		/*
1673		 * Delay the request if it is colliding with a synchronization
1674		 * request.
1675		 */
1676		if (g_mirror_sync_collision(sc, bp)) {
1677			g_mirror_regular_delay(sc, bp);
1678			return;
1679		}
1680
1681		if (sc->sc_idle)
1682			g_mirror_unidle(sc);
1683		else
1684			sc->sc_last_write = time_uptime;
1685
1686		/*
1687		 * Allocate all bios before sending any request, so we can
1688		 * return ENOMEM in nice and clean way.
1689		 */
1690		bioq_init(&queue);
1691		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1692			sync = &disk->d_sync;
1693			switch (disk->d_state) {
1694			case G_MIRROR_DISK_STATE_ACTIVE:
1695				break;
1696			case G_MIRROR_DISK_STATE_SYNCHRONIZING:
1697				if (bp->bio_offset >= sync->ds_offset)
1698					continue;
1699				break;
1700			default:
1701				continue;
1702			}
1703			if (bp->bio_cmd == BIO_DELETE &&
1704			    (disk->d_flags & G_MIRROR_DISK_FLAG_CANDELETE) == 0)
1705				continue;
1706			cbp = g_clone_bio(bp);
1707			if (cbp == NULL) {
1708				while ((cbp = bioq_takefirst(&queue)) != NULL)
1709					g_destroy_bio(cbp);
1710				if (bp->bio_error == 0)
1711					bp->bio_error = ENOMEM;
1712				g_io_deliver(bp, bp->bio_error);
1713				return;
1714			}
1715			bioq_insert_tail(&queue, cbp);
1716			cbp->bio_done = g_mirror_done;
1717			cp = disk->d_consumer;
1718			cbp->bio_caller1 = cp;
1719			cbp->bio_to = cp->provider;
1720			KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1721			    ("Consumer %s not opened (r%dw%de%d).",
1722			    cp->provider->name, cp->acr, cp->acw, cp->ace));
1723		}
1724		if (bioq_first(&queue) == NULL) {
1725			g_io_deliver(bp, EOPNOTSUPP);
1726			return;
1727		}
1728		while ((cbp = bioq_takefirst(&queue)) != NULL) {
1729			G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1730			cp = cbp->bio_caller1;
1731			cbp->bio_caller1 = NULL;
1732			cp->index++;
1733			sc->sc_writes++;
1734			g_io_request(cbp, cp);
1735		}
1736		/*
1737		 * Put request onto inflight queue, so we can check if new
1738		 * synchronization requests don't collide with it.
1739		 */
1740		bioq_insert_tail(&sc->sc_inflight, bp);
1741		/*
1742		 * Bump syncid on first write.
1743		 */
1744		if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID) != 0) {
1745			sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID;
1746			g_mirror_bump_syncid(sc);
1747		}
1748		return;
1749	    }
1750	default:
1751		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
1752		    bp->bio_cmd, sc->sc_name));
1753		break;
1754	}
1755}
1756
1757static int
1758g_mirror_can_destroy(struct g_mirror_softc *sc)
1759{
1760	struct g_geom *gp;
1761	struct g_consumer *cp;
1762
1763	g_topology_assert();
1764	gp = sc->sc_geom;
1765	if (gp->softc == NULL)
1766		return (1);
1767	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_TASTING) != 0)
1768		return (0);
1769	LIST_FOREACH(cp, &gp->consumer, consumer) {
1770		if (g_mirror_is_busy(sc, cp))
1771			return (0);
1772	}
1773	gp = sc->sc_sync.ds_geom;
1774	LIST_FOREACH(cp, &gp->consumer, consumer) {
1775		if (g_mirror_is_busy(sc, cp))
1776			return (0);
1777	}
1778	G_MIRROR_DEBUG(2, "No I/O requests for %s, it can be destroyed.",
1779	    sc->sc_name);
1780	return (1);
1781}
1782
1783static int
1784g_mirror_try_destroy(struct g_mirror_softc *sc)
1785{
1786
1787	if (sc->sc_rootmount != NULL) {
1788		G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
1789		    sc->sc_rootmount);
1790		root_mount_rel(sc->sc_rootmount);
1791		sc->sc_rootmount = NULL;
1792	}
1793	g_topology_lock();
1794	if (!g_mirror_can_destroy(sc)) {
1795		g_topology_unlock();
1796		return (0);
1797	}
1798	sc->sc_geom->softc = NULL;
1799	sc->sc_sync.ds_geom->softc = NULL;
1800	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_WAIT) != 0) {
1801		g_topology_unlock();
1802		G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1803		    &sc->sc_worker);
1804		/* Unlock sc_lock here, as it can be destroyed after wakeup. */
1805		sx_xunlock(&sc->sc_lock);
1806		wakeup(&sc->sc_worker);
1807		sc->sc_worker = NULL;
1808	} else {
1809		g_topology_unlock();
1810		g_mirror_destroy_device(sc);
1811	}
1812	return (1);
1813}
1814
1815/*
1816 * Worker thread.
1817 */
1818static void
1819g_mirror_worker(void *arg)
1820{
1821	struct g_mirror_softc *sc;
1822	struct g_mirror_event *ep;
1823	struct bio *bp;
1824	int timeout;
1825
1826	sc = arg;
1827	thread_lock(curthread);
1828	sched_prio(curthread, PRIBIO);
1829	thread_unlock(curthread);
1830
1831	sx_xlock(&sc->sc_lock);
1832	for (;;) {
1833		G_MIRROR_DEBUG(5, "%s: Let's see...", __func__);
1834		/*
1835		 * First take a look at events.
1836		 * This is important to handle events before any I/O requests.
1837		 */
1838		ep = g_mirror_event_get(sc);
1839		if (ep != NULL) {
1840			g_mirror_event_remove(sc, ep);
1841			if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0) {
1842				/* Update only device status. */
1843				G_MIRROR_DEBUG(3,
1844				    "Running event for device %s.",
1845				    sc->sc_name);
1846				ep->e_error = 0;
1847				g_mirror_update_device(sc, true);
1848			} else {
1849				/* Update disk status. */
1850				G_MIRROR_DEBUG(3, "Running event for disk %s.",
1851				     g_mirror_get_diskname(ep->e_disk));
1852				ep->e_error = g_mirror_update_disk(ep->e_disk,
1853				    ep->e_state);
1854				if (ep->e_error == 0)
1855					g_mirror_update_device(sc, false);
1856			}
1857			if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0) {
1858				KASSERT(ep->e_error == 0,
1859				    ("Error cannot be handled."));
1860				g_mirror_event_free(ep);
1861			} else {
1862				ep->e_flags |= G_MIRROR_EVENT_DONE;
1863				G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1864				    ep);
1865				mtx_lock(&sc->sc_events_mtx);
1866				wakeup(ep);
1867				mtx_unlock(&sc->sc_events_mtx);
1868			}
1869			if ((sc->sc_flags &
1870			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1871				if (g_mirror_try_destroy(sc)) {
1872					curthread->td_pflags &= ~TDP_GEOM;
1873					G_MIRROR_DEBUG(1, "Thread exiting.");
1874					kproc_exit(0);
1875				}
1876			}
1877			G_MIRROR_DEBUG(5, "%s: I'm here 1.", __func__);
1878			continue;
1879		}
1880		/*
1881		 * Check if we can mark array as CLEAN and if we can't take
1882		 * how much seconds should we wait.
1883		 */
1884		timeout = g_mirror_idle(sc, -1);
1885		/*
1886		 * Now I/O requests.
1887		 */
1888		/* Get first request from the queue. */
1889		mtx_lock(&sc->sc_queue_mtx);
1890		bp = bioq_takefirst(&sc->sc_queue);
1891		if (bp == NULL) {
1892			if ((sc->sc_flags &
1893			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1894				mtx_unlock(&sc->sc_queue_mtx);
1895				if (g_mirror_try_destroy(sc)) {
1896					curthread->td_pflags &= ~TDP_GEOM;
1897					G_MIRROR_DEBUG(1, "Thread exiting.");
1898					kproc_exit(0);
1899				}
1900				mtx_lock(&sc->sc_queue_mtx);
1901				if (bioq_first(&sc->sc_queue) != NULL) {
1902					mtx_unlock(&sc->sc_queue_mtx);
1903					continue;
1904				}
1905			}
1906			sx_xunlock(&sc->sc_lock);
1907			/*
1908			 * XXX: We can miss an event here, because an event
1909			 *      can be added without sx-device-lock and without
1910			 *      mtx-queue-lock. Maybe I should just stop using
1911			 *      dedicated mutex for events synchronization and
1912			 *      stick with the queue lock?
1913			 *      The event will hang here until next I/O request
1914			 *      or next event is received.
1915			 */
1916			MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "m:w1",
1917			    timeout * hz);
1918			sx_xlock(&sc->sc_lock);
1919			G_MIRROR_DEBUG(5, "%s: I'm here 4.", __func__);
1920			continue;
1921		}
1922		mtx_unlock(&sc->sc_queue_mtx);
1923
1924		if (bp->bio_from->geom == sc->sc_sync.ds_geom &&
1925		    (bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0) {
1926			g_mirror_sync_request(bp);	/* READ */
1927		} else if (bp->bio_to != sc->sc_provider) {
1928			if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_REGULAR) != 0)
1929				g_mirror_regular_request(bp);
1930			else if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0)
1931				g_mirror_sync_request(bp);	/* WRITE */
1932			else {
1933				KASSERT(0,
1934				    ("Invalid request cflags=0x%hx to=%s.",
1935				    bp->bio_cflags, bp->bio_to->name));
1936			}
1937		} else {
1938			g_mirror_register_request(bp);
1939		}
1940		G_MIRROR_DEBUG(5, "%s: I'm here 9.", __func__);
1941	}
1942}
1943
1944static void
1945g_mirror_update_idle(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
1946{
1947
1948	sx_assert(&sc->sc_lock, SX_LOCKED);
1949
1950	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
1951		return;
1952	if (!sc->sc_idle && (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) == 0) {
1953		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as dirty.",
1954		    g_mirror_get_diskname(disk), sc->sc_name);
1955		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
1956	} else if (sc->sc_idle &&
1957	    (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
1958		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as clean.",
1959		    g_mirror_get_diskname(disk), sc->sc_name);
1960		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
1961	}
1962}
1963
1964static void
1965g_mirror_sync_start(struct g_mirror_disk *disk)
1966{
1967	struct g_mirror_softc *sc;
1968	struct g_consumer *cp;
1969	struct bio *bp;
1970	int error, i;
1971
1972	g_topology_assert_not();
1973	sc = disk->d_softc;
1974	sx_assert(&sc->sc_lock, SX_LOCKED);
1975
1976	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
1977	    ("Disk %s is not marked for synchronization.",
1978	    g_mirror_get_diskname(disk)));
1979	KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
1980	    ("Device not in RUNNING state (%s, %u).", sc->sc_name,
1981	    sc->sc_state));
1982
1983	sx_xunlock(&sc->sc_lock);
1984	g_topology_lock();
1985	cp = g_new_consumer(sc->sc_sync.ds_geom);
1986	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
1987	error = g_attach(cp, sc->sc_provider);
1988	KASSERT(error == 0,
1989	    ("Cannot attach to %s (error=%d).", sc->sc_name, error));
1990	error = g_access(cp, 1, 0, 0);
1991	KASSERT(error == 0, ("Cannot open %s (error=%d).", sc->sc_name, error));
1992	g_topology_unlock();
1993	sx_xlock(&sc->sc_lock);
1994
1995	G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s.", sc->sc_name,
1996	    g_mirror_get_diskname(disk));
1997	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) == 0)
1998		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
1999	KASSERT(disk->d_sync.ds_consumer == NULL,
2000	    ("Sync consumer already exists (device=%s, disk=%s).",
2001	    sc->sc_name, g_mirror_get_diskname(disk)));
2002
2003	disk->d_sync.ds_consumer = cp;
2004	disk->d_sync.ds_consumer->private = disk;
2005	disk->d_sync.ds_consumer->index = 0;
2006
2007	/*
2008	 * Allocate memory for synchronization bios and initialize them.
2009	 */
2010	disk->d_sync.ds_bios = malloc(sizeof(struct bio *) * g_mirror_syncreqs,
2011	    M_MIRROR, M_WAITOK);
2012	for (i = 0; i < g_mirror_syncreqs; i++) {
2013		bp = g_alloc_bio();
2014		disk->d_sync.ds_bios[i] = bp;
2015		bp->bio_parent = NULL;
2016		bp->bio_cmd = BIO_READ;
2017		bp->bio_data = malloc(MAXPHYS, M_MIRROR, M_WAITOK);
2018		bp->bio_cflags = 0;
2019		bp->bio_offset = disk->d_sync.ds_offset;
2020		bp->bio_length = MIN(MAXPHYS, sc->sc_mediasize - bp->bio_offset);
2021		disk->d_sync.ds_offset += bp->bio_length;
2022		bp->bio_done = g_mirror_sync_done;
2023		bp->bio_from = disk->d_sync.ds_consumer;
2024		bp->bio_to = sc->sc_provider;
2025		bp->bio_caller1 = (void *)(uintptr_t)i;
2026	}
2027
2028	/* Increase the number of disks in SYNCHRONIZING state. */
2029	sc->sc_sync.ds_ndisks++;
2030	/* Set the number of in-flight synchronization requests. */
2031	disk->d_sync.ds_inflight = g_mirror_syncreqs;
2032
2033	/*
2034	 * Fire off first synchronization requests.
2035	 */
2036	for (i = 0; i < g_mirror_syncreqs; i++) {
2037		bp = disk->d_sync.ds_bios[i];
2038		G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
2039		disk->d_sync.ds_consumer->index++;
2040		/*
2041		 * Delay the request if it is colliding with a regular request.
2042		 */
2043		if (g_mirror_regular_collision(sc, bp))
2044			g_mirror_sync_delay(sc, bp);
2045		else
2046			g_io_request(bp, disk->d_sync.ds_consumer);
2047	}
2048}
2049
2050/*
2051 * Stop synchronization process.
2052 * type: 0 - synchronization finished
2053 *       1 - synchronization stopped
2054 */
2055static void
2056g_mirror_sync_stop(struct g_mirror_disk *disk, int type)
2057{
2058	struct g_mirror_softc *sc;
2059	struct g_consumer *cp;
2060
2061	g_topology_assert_not();
2062	sc = disk->d_softc;
2063	sx_assert(&sc->sc_lock, SX_LOCKED);
2064
2065	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2066	    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2067	    g_mirror_disk_state2str(disk->d_state)));
2068	if (disk->d_sync.ds_consumer == NULL)
2069		return;
2070
2071	if (type == 0) {
2072		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s finished.",
2073		    sc->sc_name, g_mirror_get_diskname(disk));
2074	} else /* if (type == 1) */ {
2075		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s stopped.",
2076		    sc->sc_name, g_mirror_get_diskname(disk));
2077	}
2078	free(disk->d_sync.ds_bios, M_MIRROR);
2079	disk->d_sync.ds_bios = NULL;
2080	cp = disk->d_sync.ds_consumer;
2081	disk->d_sync.ds_consumer = NULL;
2082	disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2083	sc->sc_sync.ds_ndisks--;
2084	sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */
2085	g_topology_lock();
2086	g_mirror_kill_consumer(sc, cp);
2087	g_topology_unlock();
2088	sx_xlock(&sc->sc_lock);
2089}
2090
2091static void
2092g_mirror_launch_provider(struct g_mirror_softc *sc)
2093{
2094	struct g_mirror_disk *disk;
2095	struct g_provider *pp, *dp;
2096
2097	sx_assert(&sc->sc_lock, SX_LOCKED);
2098
2099	g_topology_lock();
2100	pp = g_new_providerf(sc->sc_geom, "mirror/%s", sc->sc_name);
2101	pp->flags |= G_PF_DIRECT_RECEIVE;
2102	pp->mediasize = sc->sc_mediasize;
2103	pp->sectorsize = sc->sc_sectorsize;
2104	pp->stripesize = 0;
2105	pp->stripeoffset = 0;
2106
2107	/* Splitting of unmapped BIO's could work but isn't implemented now */
2108	if (sc->sc_balance != G_MIRROR_BALANCE_SPLIT)
2109		pp->flags |= G_PF_ACCEPT_UNMAPPED;
2110
2111	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2112		if (disk->d_consumer && disk->d_consumer->provider) {
2113			dp = disk->d_consumer->provider;
2114			if (dp->stripesize > pp->stripesize) {
2115				pp->stripesize = dp->stripesize;
2116				pp->stripeoffset = dp->stripeoffset;
2117			}
2118			/* A provider underneath us doesn't support unmapped */
2119			if ((dp->flags & G_PF_ACCEPT_UNMAPPED) == 0) {
2120				G_MIRROR_DEBUG(0, "Cancelling unmapped "
2121				    "because of %s.", dp->name);
2122				pp->flags &= ~G_PF_ACCEPT_UNMAPPED;
2123			}
2124		}
2125	}
2126	pp->private = sc;
2127	sc->sc_refcnt++;
2128	sc->sc_provider = pp;
2129	g_error_provider(pp, 0);
2130	g_topology_unlock();
2131	G_MIRROR_DEBUG(0, "Device %s launched (%u/%u).", pp->name,
2132	    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE), sc->sc_ndisks);
2133	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2134		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
2135			g_mirror_sync_start(disk);
2136	}
2137}
2138
2139static void
2140g_mirror_destroy_provider(struct g_mirror_softc *sc)
2141{
2142	struct g_mirror_disk *disk;
2143	struct bio *bp;
2144
2145	g_topology_assert_not();
2146	KASSERT(sc->sc_provider != NULL, ("NULL provider (device=%s).",
2147	    sc->sc_name));
2148
2149	g_topology_lock();
2150	g_error_provider(sc->sc_provider, ENXIO);
2151	mtx_lock(&sc->sc_queue_mtx);
2152	while ((bp = bioq_takefirst(&sc->sc_queue)) != NULL) {
2153		/*
2154		 * Abort any pending I/O that wasn't generated by us.
2155		 * Synchronization requests and requests destined for individual
2156		 * mirror components can be destroyed immediately.
2157		 */
2158		if (bp->bio_to == sc->sc_provider &&
2159		    bp->bio_from->geom != sc->sc_sync.ds_geom) {
2160			g_io_deliver(bp, ENXIO);
2161		} else {
2162			if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0)
2163				free(bp->bio_data, M_MIRROR);
2164			g_destroy_bio(bp);
2165		}
2166	}
2167	mtx_unlock(&sc->sc_queue_mtx);
2168	g_wither_provider(sc->sc_provider, ENXIO);
2169	sc->sc_provider = NULL;
2170	G_MIRROR_DEBUG(0, "Device %s: provider destroyed.", sc->sc_name);
2171	g_topology_unlock();
2172	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2173		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
2174			g_mirror_sync_stop(disk, 1);
2175	}
2176}
2177
2178static void
2179g_mirror_go(void *arg)
2180{
2181	struct g_mirror_softc *sc;
2182
2183	sc = arg;
2184	G_MIRROR_DEBUG(0, "Force device %s start due to timeout.", sc->sc_name);
2185	g_mirror_event_send(sc, 0,
2186	    G_MIRROR_EVENT_DONTWAIT | G_MIRROR_EVENT_DEVICE);
2187}
2188
2189static u_int
2190g_mirror_determine_state(struct g_mirror_disk *disk)
2191{
2192	struct g_mirror_softc *sc;
2193	u_int state;
2194
2195	sc = disk->d_softc;
2196	if (sc->sc_syncid == disk->d_sync.ds_syncid) {
2197		if ((disk->d_flags &
2198		    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0 &&
2199		    (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 0 ||
2200		     (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) == 0)) {
2201			/* Disk does not need synchronization. */
2202			state = G_MIRROR_DISK_STATE_ACTIVE;
2203		} else {
2204			if ((sc->sc_flags &
2205			     G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
2206			    (disk->d_flags &
2207			     G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
2208				/*
2209				 * We can start synchronization from
2210				 * the stored offset.
2211				 */
2212				state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
2213			} else {
2214				state = G_MIRROR_DISK_STATE_STALE;
2215			}
2216		}
2217	} else if (disk->d_sync.ds_syncid < sc->sc_syncid) {
2218		/*
2219		 * Reset all synchronization data for this disk,
2220		 * because if it even was synchronized, it was
2221		 * synchronized to disks with different syncid.
2222		 */
2223		disk->d_flags |= G_MIRROR_DISK_FLAG_SYNCHRONIZING;
2224		disk->d_sync.ds_offset = 0;
2225		disk->d_sync.ds_offset_done = 0;
2226		disk->d_sync.ds_syncid = sc->sc_syncid;
2227		if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
2228		    (disk->d_flags & G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
2229			state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
2230		} else {
2231			state = G_MIRROR_DISK_STATE_STALE;
2232		}
2233	} else /* if (sc->sc_syncid < disk->d_sync.ds_syncid) */ {
2234		/*
2235		 * Not good, NOT GOOD!
2236		 * It means that mirror was started on stale disks
2237		 * and more fresh disk just arrive.
2238		 * If there were writes, mirror is broken, sorry.
2239		 * I think the best choice here is don't touch
2240		 * this disk and inform the user loudly.
2241		 */
2242		G_MIRROR_DEBUG(0, "Device %s was started before the freshest "
2243		    "disk (%s) arrives!! It will not be connected to the "
2244		    "running device.", sc->sc_name,
2245		    g_mirror_get_diskname(disk));
2246		g_mirror_destroy_disk(disk);
2247		state = G_MIRROR_DISK_STATE_NONE;
2248		/* Return immediately, because disk was destroyed. */
2249		return (state);
2250	}
2251	G_MIRROR_DEBUG(3, "State for %s disk: %s.",
2252	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(state));
2253	return (state);
2254}
2255
2256/*
2257 * Update device state.
2258 */
2259static void
2260g_mirror_update_device(struct g_mirror_softc *sc, bool force)
2261{
2262	struct g_mirror_disk *disk;
2263	u_int state;
2264
2265	sx_assert(&sc->sc_lock, SX_XLOCKED);
2266
2267	switch (sc->sc_state) {
2268	case G_MIRROR_DEVICE_STATE_STARTING:
2269	    {
2270		struct g_mirror_disk *pdisk, *tdisk;
2271		u_int dirty, ndisks, genid, syncid;
2272		bool broken;
2273
2274		KASSERT(sc->sc_provider == NULL,
2275		    ("Non-NULL provider in STARTING state (%s).", sc->sc_name));
2276		/*
2277		 * Are we ready? We are, if all disks are connected or
2278		 * if we have any disks and 'force' is true.
2279		 */
2280		ndisks = g_mirror_ndisks(sc, -1);
2281		if (sc->sc_ndisks == ndisks || (force && ndisks > 0)) {
2282			;
2283		} else if (ndisks == 0) {
2284			/*
2285			 * Disks went down in starting phase, so destroy
2286			 * device.
2287			 */
2288			callout_drain(&sc->sc_callout);
2289			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2290			G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
2291			    sc->sc_rootmount);
2292			root_mount_rel(sc->sc_rootmount);
2293			sc->sc_rootmount = NULL;
2294			return;
2295		} else {
2296			return;
2297		}
2298
2299		/*
2300		 * Activate all disks with the biggest syncid.
2301		 */
2302		if (force) {
2303			/*
2304			 * If 'force' is true, we have been called due to
2305			 * timeout, so don't bother canceling timeout.
2306			 */
2307			ndisks = 0;
2308			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2309				if ((disk->d_flags &
2310				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) {
2311					ndisks++;
2312				}
2313			}
2314			if (ndisks == 0) {
2315				/* No valid disks found, destroy device. */
2316				sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2317				G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p",
2318				    __LINE__, sc->sc_rootmount);
2319				root_mount_rel(sc->sc_rootmount);
2320				sc->sc_rootmount = NULL;
2321				return;
2322			}
2323		} else {
2324			/* Cancel timeout. */
2325			callout_drain(&sc->sc_callout);
2326		}
2327
2328		/*
2329		 * Find the biggest genid.
2330		 */
2331		genid = 0;
2332		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2333			if (disk->d_genid > genid)
2334				genid = disk->d_genid;
2335		}
2336		sc->sc_genid = genid;
2337		/*
2338		 * Remove all disks without the biggest genid.
2339		 */
2340		broken = false;
2341		LIST_FOREACH_SAFE(disk, &sc->sc_disks, d_next, tdisk) {
2342			if (disk->d_genid < genid) {
2343				G_MIRROR_DEBUG(0,
2344				    "Component %s (device %s) broken, skipping.",
2345				    g_mirror_get_diskname(disk), sc->sc_name);
2346				g_mirror_destroy_disk(disk);
2347				/*
2348				 * Bump the syncid in case we discover a healthy
2349				 * replacement disk after starting the mirror.
2350				 */
2351				broken = true;
2352			}
2353		}
2354
2355		/*
2356		 * Find the biggest syncid.
2357		 */
2358		syncid = 0;
2359		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2360			if (disk->d_sync.ds_syncid > syncid)
2361				syncid = disk->d_sync.ds_syncid;
2362		}
2363
2364		/*
2365		 * Here we need to look for dirty disks and if all disks
2366		 * with the biggest syncid are dirty, we have to choose
2367		 * one with the biggest priority and rebuild the rest.
2368		 */
2369		/*
2370		 * Find the number of dirty disks with the biggest syncid.
2371		 * Find the number of disks with the biggest syncid.
2372		 * While here, find a disk with the biggest priority.
2373		 */
2374		dirty = ndisks = 0;
2375		pdisk = NULL;
2376		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2377			if (disk->d_sync.ds_syncid != syncid)
2378				continue;
2379			if ((disk->d_flags &
2380			    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2381				continue;
2382			}
2383			ndisks++;
2384			if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
2385				dirty++;
2386				if (pdisk == NULL ||
2387				    pdisk->d_priority < disk->d_priority) {
2388					pdisk = disk;
2389				}
2390			}
2391		}
2392		if (dirty == 0) {
2393			/* No dirty disks at all, great. */
2394		} else if (dirty == ndisks) {
2395			/*
2396			 * Force synchronization for all dirty disks except one
2397			 * with the biggest priority.
2398			 */
2399			KASSERT(pdisk != NULL, ("pdisk == NULL"));
2400			G_MIRROR_DEBUG(1, "Using disk %s (device %s) as a "
2401			    "master disk for synchronization.",
2402			    g_mirror_get_diskname(pdisk), sc->sc_name);
2403			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2404				if (disk->d_sync.ds_syncid != syncid)
2405					continue;
2406				if ((disk->d_flags &
2407				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2408					continue;
2409				}
2410				KASSERT((disk->d_flags &
2411				    G_MIRROR_DISK_FLAG_DIRTY) != 0,
2412				    ("Disk %s isn't marked as dirty.",
2413				    g_mirror_get_diskname(disk)));
2414				/* Skip the disk with the biggest priority. */
2415				if (disk == pdisk)
2416					continue;
2417				disk->d_sync.ds_syncid = 0;
2418			}
2419		} else if (dirty < ndisks) {
2420			/*
2421			 * Force synchronization for all dirty disks.
2422			 * We have some non-dirty disks.
2423			 */
2424			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2425				if (disk->d_sync.ds_syncid != syncid)
2426					continue;
2427				if ((disk->d_flags &
2428				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2429					continue;
2430				}
2431				if ((disk->d_flags &
2432				    G_MIRROR_DISK_FLAG_DIRTY) == 0) {
2433					continue;
2434				}
2435				disk->d_sync.ds_syncid = 0;
2436			}
2437		}
2438
2439		/* Reset hint. */
2440		sc->sc_hint = NULL;
2441		sc->sc_syncid = syncid;
2442		if (force || broken) {
2443			/* Remember to bump syncid on first write. */
2444			sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
2445		}
2446		state = G_MIRROR_DEVICE_STATE_RUNNING;
2447		G_MIRROR_DEBUG(1, "Device %s state changed from %s to %s.",
2448		    sc->sc_name, g_mirror_device_state2str(sc->sc_state),
2449		    g_mirror_device_state2str(state));
2450		sc->sc_state = state;
2451		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2452			state = g_mirror_determine_state(disk);
2453			g_mirror_event_send(disk, state,
2454			    G_MIRROR_EVENT_DONTWAIT);
2455			if (state == G_MIRROR_DISK_STATE_STALE)
2456				sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
2457		}
2458		break;
2459	    }
2460	case G_MIRROR_DEVICE_STATE_RUNNING:
2461		if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 0 &&
2462		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
2463			/*
2464			 * No active disks or no disks at all,
2465			 * so destroy device.
2466			 */
2467			if (sc->sc_provider != NULL)
2468				g_mirror_destroy_provider(sc);
2469			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2470			break;
2471		} else if (g_mirror_ndisks(sc,
2472		    G_MIRROR_DISK_STATE_ACTIVE) > 0 &&
2473		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
2474			/*
2475			 * We have active disks, launch provider if it doesn't
2476			 * exist.
2477			 */
2478			if (sc->sc_provider == NULL)
2479				g_mirror_launch_provider(sc);
2480			if (sc->sc_rootmount != NULL) {
2481				G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p",
2482				    __LINE__, sc->sc_rootmount);
2483				root_mount_rel(sc->sc_rootmount);
2484				sc->sc_rootmount = NULL;
2485			}
2486		}
2487		/*
2488		 * Genid should be bumped immediately, so do it here.
2489		 */
2490		if ((sc->sc_bump_id & G_MIRROR_BUMP_GENID) != 0) {
2491			sc->sc_bump_id &= ~G_MIRROR_BUMP_GENID;
2492			g_mirror_bump_genid(sc);
2493		}
2494		break;
2495	default:
2496		KASSERT(1 == 0, ("Wrong device state (%s, %s).",
2497		    sc->sc_name, g_mirror_device_state2str(sc->sc_state)));
2498		break;
2499	}
2500}
2501
2502/*
2503 * Update disk state and device state if needed.
2504 */
2505#define	DISK_STATE_CHANGED()	G_MIRROR_DEBUG(1,			\
2506	"Disk %s state changed from %s to %s (device %s).",		\
2507	g_mirror_get_diskname(disk),					\
2508	g_mirror_disk_state2str(disk->d_state),				\
2509	g_mirror_disk_state2str(state), sc->sc_name)
2510static int
2511g_mirror_update_disk(struct g_mirror_disk *disk, u_int state)
2512{
2513	struct g_mirror_softc *sc;
2514
2515	sc = disk->d_softc;
2516	sx_assert(&sc->sc_lock, SX_XLOCKED);
2517
2518again:
2519	G_MIRROR_DEBUG(3, "Changing disk %s state from %s to %s.",
2520	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(disk->d_state),
2521	    g_mirror_disk_state2str(state));
2522	switch (state) {
2523	case G_MIRROR_DISK_STATE_NEW:
2524		/*
2525		 * Possible scenarios:
2526		 * 1. New disk arrive.
2527		 */
2528		/* Previous state should be NONE. */
2529		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NONE,
2530		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2531		    g_mirror_disk_state2str(disk->d_state)));
2532		DISK_STATE_CHANGED();
2533
2534		disk->d_state = state;
2535		if (LIST_EMPTY(&sc->sc_disks))
2536			LIST_INSERT_HEAD(&sc->sc_disks, disk, d_next);
2537		else {
2538			struct g_mirror_disk *dp;
2539
2540			LIST_FOREACH(dp, &sc->sc_disks, d_next) {
2541				if (disk->d_priority >= dp->d_priority) {
2542					LIST_INSERT_BEFORE(dp, disk, d_next);
2543					dp = NULL;
2544					break;
2545				}
2546				if (LIST_NEXT(dp, d_next) == NULL)
2547					break;
2548			}
2549			if (dp != NULL)
2550				LIST_INSERT_AFTER(dp, disk, d_next);
2551		}
2552		G_MIRROR_DEBUG(1, "Device %s: provider %s detected.",
2553		    sc->sc_name, g_mirror_get_diskname(disk));
2554		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
2555			break;
2556		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2557		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2558		    g_mirror_device_state2str(sc->sc_state),
2559		    g_mirror_get_diskname(disk),
2560		    g_mirror_disk_state2str(disk->d_state)));
2561		state = g_mirror_determine_state(disk);
2562		if (state != G_MIRROR_DISK_STATE_NONE)
2563			goto again;
2564		break;
2565	case G_MIRROR_DISK_STATE_ACTIVE:
2566		/*
2567		 * Possible scenarios:
2568		 * 1. New disk does not need synchronization.
2569		 * 2. Synchronization process finished successfully.
2570		 */
2571		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2572		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2573		    g_mirror_device_state2str(sc->sc_state),
2574		    g_mirror_get_diskname(disk),
2575		    g_mirror_disk_state2str(disk->d_state)));
2576		/* Previous state should be NEW or SYNCHRONIZING. */
2577		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW ||
2578		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2579		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2580		    g_mirror_disk_state2str(disk->d_state)));
2581		DISK_STATE_CHANGED();
2582
2583		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
2584			disk->d_flags &= ~G_MIRROR_DISK_FLAG_SYNCHRONIZING;
2585			disk->d_flags &= ~G_MIRROR_DISK_FLAG_FORCE_SYNC;
2586			g_mirror_sync_stop(disk, 0);
2587		}
2588		disk->d_state = state;
2589		disk->d_sync.ds_offset = 0;
2590		disk->d_sync.ds_offset_done = 0;
2591		g_mirror_update_idle(sc, disk);
2592		g_mirror_update_metadata(disk);
2593		G_MIRROR_DEBUG(1, "Device %s: provider %s activated.",
2594		    sc->sc_name, g_mirror_get_diskname(disk));
2595		break;
2596	case G_MIRROR_DISK_STATE_STALE:
2597		/*
2598		 * Possible scenarios:
2599		 * 1. Stale disk was connected.
2600		 */
2601		/* Previous state should be NEW. */
2602		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2603		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2604		    g_mirror_disk_state2str(disk->d_state)));
2605		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2606		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2607		    g_mirror_device_state2str(sc->sc_state),
2608		    g_mirror_get_diskname(disk),
2609		    g_mirror_disk_state2str(disk->d_state)));
2610		/*
2611		 * STALE state is only possible if device is marked
2612		 * NOAUTOSYNC.
2613		 */
2614		KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) != 0,
2615		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2616		    g_mirror_device_state2str(sc->sc_state),
2617		    g_mirror_get_diskname(disk),
2618		    g_mirror_disk_state2str(disk->d_state)));
2619		DISK_STATE_CHANGED();
2620
2621		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2622		disk->d_state = state;
2623		g_mirror_update_metadata(disk);
2624		G_MIRROR_DEBUG(0, "Device %s: provider %s is stale.",
2625		    sc->sc_name, g_mirror_get_diskname(disk));
2626		break;
2627	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
2628		/*
2629		 * Possible scenarios:
2630		 * 1. Disk which needs synchronization was connected.
2631		 */
2632		/* Previous state should be NEW. */
2633		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2634		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2635		    g_mirror_disk_state2str(disk->d_state)));
2636		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2637		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2638		    g_mirror_device_state2str(sc->sc_state),
2639		    g_mirror_get_diskname(disk),
2640		    g_mirror_disk_state2str(disk->d_state)));
2641		DISK_STATE_CHANGED();
2642
2643		if (disk->d_state == G_MIRROR_DISK_STATE_NEW)
2644			disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2645		disk->d_state = state;
2646		if (sc->sc_provider != NULL) {
2647			g_mirror_sync_start(disk);
2648			g_mirror_update_metadata(disk);
2649		}
2650		break;
2651	case G_MIRROR_DISK_STATE_DISCONNECTED:
2652		/*
2653		 * Possible scenarios:
2654		 * 1. Device wasn't running yet, but disk disappear.
2655		 * 2. Disk was active and disapppear.
2656		 * 3. Disk disappear during synchronization process.
2657		 */
2658		if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING) {
2659			/*
2660			 * Previous state should be ACTIVE, STALE or
2661			 * SYNCHRONIZING.
2662			 */
2663			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
2664			    disk->d_state == G_MIRROR_DISK_STATE_STALE ||
2665			    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2666			    ("Wrong disk state (%s, %s).",
2667			    g_mirror_get_diskname(disk),
2668			    g_mirror_disk_state2str(disk->d_state)));
2669		} else if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING) {
2670			/* Previous state should be NEW. */
2671			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2672			    ("Wrong disk state (%s, %s).",
2673			    g_mirror_get_diskname(disk),
2674			    g_mirror_disk_state2str(disk->d_state)));
2675			/*
2676			 * Reset bumping syncid if disk disappeared in STARTING
2677			 * state.
2678			 */
2679			if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID) != 0)
2680				sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID;
2681#ifdef	INVARIANTS
2682		} else {
2683			KASSERT(1 == 0, ("Wrong device state (%s, %s, %s, %s).",
2684			    sc->sc_name,
2685			    g_mirror_device_state2str(sc->sc_state),
2686			    g_mirror_get_diskname(disk),
2687			    g_mirror_disk_state2str(disk->d_state)));
2688#endif
2689		}
2690		DISK_STATE_CHANGED();
2691		G_MIRROR_DEBUG(0, "Device %s: provider %s disconnected.",
2692		    sc->sc_name, g_mirror_get_diskname(disk));
2693
2694		g_mirror_destroy_disk(disk);
2695		break;
2696	case G_MIRROR_DISK_STATE_DESTROY:
2697	    {
2698		int error;
2699
2700		error = g_mirror_clear_metadata(disk);
2701		if (error != 0) {
2702			G_MIRROR_DEBUG(0,
2703			    "Device %s: failed to clear metadata on %s: %d.",
2704			    sc->sc_name, g_mirror_get_diskname(disk), error);
2705			break;
2706		}
2707		DISK_STATE_CHANGED();
2708		G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.",
2709		    sc->sc_name, g_mirror_get_diskname(disk));
2710
2711		g_mirror_destroy_disk(disk);
2712		sc->sc_ndisks--;
2713		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2714			g_mirror_update_metadata(disk);
2715		}
2716		break;
2717	    }
2718	default:
2719		KASSERT(1 == 0, ("Unknown state (%u).", state));
2720		break;
2721	}
2722	return (0);
2723}
2724#undef	DISK_STATE_CHANGED
2725
2726int
2727g_mirror_read_metadata(struct g_consumer *cp, struct g_mirror_metadata *md)
2728{
2729	struct g_provider *pp;
2730	u_char *buf;
2731	int error;
2732
2733	g_topology_assert();
2734
2735	error = g_access(cp, 1, 0, 0);
2736	if (error != 0)
2737		return (error);
2738	pp = cp->provider;
2739	g_topology_unlock();
2740	/* Metadata are stored on last sector. */
2741	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
2742	    &error);
2743	g_topology_lock();
2744	g_access(cp, -1, 0, 0);
2745	if (buf == NULL) {
2746		G_MIRROR_DEBUG(1, "Cannot read metadata from %s (error=%d).",
2747		    cp->provider->name, error);
2748		return (error);
2749	}
2750
2751	/* Decode metadata. */
2752	error = mirror_metadata_decode(buf, md);
2753	g_free(buf);
2754	if (strcmp(md->md_magic, G_MIRROR_MAGIC) != 0)
2755		return (EINVAL);
2756	if (md->md_version > G_MIRROR_VERSION) {
2757		G_MIRROR_DEBUG(0,
2758		    "Kernel module is too old to handle metadata from %s.",
2759		    cp->provider->name);
2760		return (EINVAL);
2761	}
2762	if (error != 0) {
2763		G_MIRROR_DEBUG(1, "MD5 metadata hash mismatch for provider %s.",
2764		    cp->provider->name);
2765		return (error);
2766	}
2767
2768	return (0);
2769}
2770
2771static int
2772g_mirror_check_metadata(struct g_mirror_softc *sc, struct g_provider *pp,
2773    struct g_mirror_metadata *md)
2774{
2775
2776	if (g_mirror_id2disk(sc, md->md_did) != NULL) {
2777		G_MIRROR_DEBUG(1, "Disk %s (id=%u) already exists, skipping.",
2778		    pp->name, md->md_did);
2779		return (EEXIST);
2780	}
2781	if (md->md_all != sc->sc_ndisks) {
2782		G_MIRROR_DEBUG(1,
2783		    "Invalid '%s' field on disk %s (device %s), skipping.",
2784		    "md_all", pp->name, sc->sc_name);
2785		return (EINVAL);
2786	}
2787	if (md->md_slice != sc->sc_slice) {
2788		G_MIRROR_DEBUG(1,
2789		    "Invalid '%s' field on disk %s (device %s), skipping.",
2790		    "md_slice", pp->name, sc->sc_name);
2791		return (EINVAL);
2792	}
2793	if (md->md_balance != sc->sc_balance) {
2794		G_MIRROR_DEBUG(1,
2795		    "Invalid '%s' field on disk %s (device %s), skipping.",
2796		    "md_balance", pp->name, sc->sc_name);
2797		return (EINVAL);
2798	}
2799#if 0
2800	if (md->md_mediasize != sc->sc_mediasize) {
2801		G_MIRROR_DEBUG(1,
2802		    "Invalid '%s' field on disk %s (device %s), skipping.",
2803		    "md_mediasize", pp->name, sc->sc_name);
2804		return (EINVAL);
2805	}
2806#endif
2807	if (sc->sc_mediasize > pp->mediasize) {
2808		G_MIRROR_DEBUG(1,
2809		    "Invalid size of disk %s (device %s), skipping.", pp->name,
2810		    sc->sc_name);
2811		return (EINVAL);
2812	}
2813	if (md->md_sectorsize != sc->sc_sectorsize) {
2814		G_MIRROR_DEBUG(1,
2815		    "Invalid '%s' field on disk %s (device %s), skipping.",
2816		    "md_sectorsize", pp->name, sc->sc_name);
2817		return (EINVAL);
2818	}
2819	if ((sc->sc_sectorsize % pp->sectorsize) != 0) {
2820		G_MIRROR_DEBUG(1,
2821		    "Invalid sector size of disk %s (device %s), skipping.",
2822		    pp->name, sc->sc_name);
2823		return (EINVAL);
2824	}
2825	if ((md->md_mflags & ~G_MIRROR_DEVICE_FLAG_MASK) != 0) {
2826		G_MIRROR_DEBUG(1,
2827		    "Invalid device flags on disk %s (device %s), skipping.",
2828		    pp->name, sc->sc_name);
2829		return (EINVAL);
2830	}
2831	if ((md->md_dflags & ~G_MIRROR_DISK_FLAG_MASK) != 0) {
2832		G_MIRROR_DEBUG(1,
2833		    "Invalid disk flags on disk %s (device %s), skipping.",
2834		    pp->name, sc->sc_name);
2835		return (EINVAL);
2836	}
2837	return (0);
2838}
2839
2840int
2841g_mirror_add_disk(struct g_mirror_softc *sc, struct g_provider *pp,
2842    struct g_mirror_metadata *md)
2843{
2844	struct g_mirror_disk *disk;
2845	int error;
2846
2847	g_topology_assert_not();
2848	G_MIRROR_DEBUG(2, "Adding disk %s.", pp->name);
2849
2850	error = g_mirror_check_metadata(sc, pp, md);
2851	if (error != 0)
2852		return (error);
2853	if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING &&
2854	    md->md_genid < sc->sc_genid) {
2855		G_MIRROR_DEBUG(0, "Component %s (device %s) broken, skipping.",
2856		    pp->name, sc->sc_name);
2857		return (EINVAL);
2858	}
2859	disk = g_mirror_init_disk(sc, pp, md, &error);
2860	if (disk == NULL)
2861		return (error);
2862	error = g_mirror_event_send(disk, G_MIRROR_DISK_STATE_NEW,
2863	    G_MIRROR_EVENT_WAIT);
2864	if (error != 0)
2865		return (error);
2866	if (md->md_version < G_MIRROR_VERSION) {
2867		G_MIRROR_DEBUG(0, "Upgrading metadata on %s (v%d->v%d).",
2868		    pp->name, md->md_version, G_MIRROR_VERSION);
2869		g_mirror_update_metadata(disk);
2870	}
2871	return (0);
2872}
2873
2874static void
2875g_mirror_destroy_delayed(void *arg, int flag)
2876{
2877	struct g_mirror_softc *sc;
2878	int error;
2879
2880	if (flag == EV_CANCEL) {
2881		G_MIRROR_DEBUG(1, "Destroying canceled.");
2882		return;
2883	}
2884	sc = arg;
2885	g_topology_unlock();
2886	sx_xlock(&sc->sc_lock);
2887	KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) == 0,
2888	    ("DESTROY flag set on %s.", sc->sc_name));
2889	KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROYING) != 0,
2890	    ("DESTROYING flag not set on %s.", sc->sc_name));
2891	G_MIRROR_DEBUG(1, "Destroying %s (delayed).", sc->sc_name);
2892	error = g_mirror_destroy(sc, G_MIRROR_DESTROY_SOFT);
2893	if (error != 0) {
2894		G_MIRROR_DEBUG(0, "Cannot destroy %s (error=%d).",
2895		    sc->sc_name, error);
2896		sx_xunlock(&sc->sc_lock);
2897	}
2898	g_topology_lock();
2899}
2900
2901static int
2902g_mirror_access(struct g_provider *pp, int acr, int acw, int ace)
2903{
2904	struct g_mirror_softc *sc;
2905	int error = 0;
2906
2907	g_topology_assert();
2908	G_MIRROR_DEBUG(2, "Access request for %s: r%dw%de%d.", pp->name, acr,
2909	    acw, ace);
2910
2911	sc = pp->private;
2912	KASSERT(sc != NULL, ("NULL softc (provider=%s).", pp->name));
2913
2914	g_topology_unlock();
2915	sx_xlock(&sc->sc_lock);
2916	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0 ||
2917	    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROYING) != 0 ||
2918	    LIST_EMPTY(&sc->sc_disks)) {
2919		if (acr > 0 || acw > 0 || ace > 0)
2920			error = ENXIO;
2921		goto end;
2922	}
2923	sc->sc_provider_open += acr + acw + ace;
2924	if (pp->acw + acw == 0)
2925		g_mirror_idle(sc, 0);
2926	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROYING) != 0 &&
2927	    sc->sc_provider_open == 0)
2928		g_post_event(g_mirror_destroy_delayed, sc, M_WAITOK, sc, NULL);
2929end:
2930	sx_xunlock(&sc->sc_lock);
2931	g_topology_lock();
2932	return (error);
2933}
2934
2935struct g_geom *
2936g_mirror_create(struct g_class *mp, const struct g_mirror_metadata *md,
2937    u_int type)
2938{
2939	struct g_mirror_softc *sc;
2940	struct g_geom *gp;
2941	int error, timeout;
2942
2943	g_topology_assert();
2944	G_MIRROR_DEBUG(1, "Creating device %s (id=%u).", md->md_name,
2945	    md->md_mid);
2946
2947	/* One disk is minimum. */
2948	if (md->md_all < 1)
2949		return (NULL);
2950	/*
2951	 * Action geom.
2952	 */
2953	gp = g_new_geomf(mp, "%s", md->md_name);
2954	sc = malloc(sizeof(*sc), M_MIRROR, M_WAITOK | M_ZERO);
2955	gp->start = g_mirror_start;
2956	gp->orphan = g_mirror_orphan;
2957	gp->access = g_mirror_access;
2958	gp->dumpconf = g_mirror_dumpconf;
2959
2960	sc->sc_type = type;
2961	sc->sc_id = md->md_mid;
2962	sc->sc_slice = md->md_slice;
2963	sc->sc_balance = md->md_balance;
2964	sc->sc_mediasize = md->md_mediasize;
2965	sc->sc_sectorsize = md->md_sectorsize;
2966	sc->sc_ndisks = md->md_all;
2967	sc->sc_flags = md->md_mflags;
2968	sc->sc_bump_id = 0;
2969	sc->sc_idle = 1;
2970	sc->sc_last_write = time_uptime;
2971	sc->sc_writes = 0;
2972	sc->sc_refcnt = 1;
2973	sx_init(&sc->sc_lock, "gmirror:lock");
2974	bioq_init(&sc->sc_queue);
2975	mtx_init(&sc->sc_queue_mtx, "gmirror:queue", NULL, MTX_DEF);
2976	bioq_init(&sc->sc_regular_delayed);
2977	bioq_init(&sc->sc_inflight);
2978	bioq_init(&sc->sc_sync_delayed);
2979	LIST_INIT(&sc->sc_disks);
2980	TAILQ_INIT(&sc->sc_events);
2981	mtx_init(&sc->sc_events_mtx, "gmirror:events", NULL, MTX_DEF);
2982	callout_init(&sc->sc_callout, 1);
2983	mtx_init(&sc->sc_done_mtx, "gmirror:done", NULL, MTX_DEF);
2984	sc->sc_state = G_MIRROR_DEVICE_STATE_STARTING;
2985	gp->softc = sc;
2986	sc->sc_geom = gp;
2987	sc->sc_provider = NULL;
2988	sc->sc_provider_open = 0;
2989	/*
2990	 * Synchronization geom.
2991	 */
2992	gp = g_new_geomf(mp, "%s.sync", md->md_name);
2993	gp->softc = sc;
2994	gp->orphan = g_mirror_orphan;
2995	sc->sc_sync.ds_geom = gp;
2996	sc->sc_sync.ds_ndisks = 0;
2997	error = kproc_create(g_mirror_worker, sc, &sc->sc_worker, 0, 0,
2998	    "g_mirror %s", md->md_name);
2999	if (error != 0) {
3000		G_MIRROR_DEBUG(1, "Cannot create kernel thread for %s.",
3001		    sc->sc_name);
3002		g_destroy_geom(sc->sc_sync.ds_geom);
3003		g_destroy_geom(sc->sc_geom);
3004		g_mirror_free_device(sc);
3005		return (NULL);
3006	}
3007
3008	G_MIRROR_DEBUG(1, "Device %s created (%u components, id=%u).",
3009	    sc->sc_name, sc->sc_ndisks, sc->sc_id);
3010
3011	sc->sc_rootmount = root_mount_hold("GMIRROR");
3012	G_MIRROR_DEBUG(1, "root_mount_hold %p", sc->sc_rootmount);
3013	/*
3014	 * Run timeout.
3015	 */
3016	timeout = g_mirror_timeout * hz;
3017	callout_reset(&sc->sc_callout, timeout, g_mirror_go, sc);
3018	return (sc->sc_geom);
3019}
3020
3021int
3022g_mirror_destroy(struct g_mirror_softc *sc, int how)
3023{
3024	struct g_mirror_disk *disk;
3025
3026	g_topology_assert_not();
3027	sx_assert(&sc->sc_lock, SX_XLOCKED);
3028
3029	if (sc->sc_provider_open != 0) {
3030		switch (how) {
3031		case G_MIRROR_DESTROY_SOFT:
3032			G_MIRROR_DEBUG(1,
3033			    "Device %s is still open (%d).", sc->sc_name,
3034			    sc->sc_provider_open);
3035			return (EBUSY);
3036		case G_MIRROR_DESTROY_DELAYED:
3037			G_MIRROR_DEBUG(1,
3038			    "Device %s will be destroyed on last close.",
3039			    sc->sc_name);
3040			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
3041				if (disk->d_state ==
3042				    G_MIRROR_DISK_STATE_SYNCHRONIZING) {
3043					g_mirror_sync_stop(disk, 1);
3044				}
3045			}
3046			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROYING;
3047			return (EBUSY);
3048		case G_MIRROR_DESTROY_HARD:
3049			G_MIRROR_DEBUG(1, "Device %s is still open, so it "
3050			    "can't be definitely removed.", sc->sc_name);
3051		}
3052	}
3053
3054	g_topology_lock();
3055	if (sc->sc_geom->softc == NULL) {
3056		g_topology_unlock();
3057		return (0);
3058	}
3059	sc->sc_geom->softc = NULL;
3060	sc->sc_sync.ds_geom->softc = NULL;
3061	g_topology_unlock();
3062
3063	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
3064	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_WAIT;
3065	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
3066	sx_xunlock(&sc->sc_lock);
3067	mtx_lock(&sc->sc_queue_mtx);
3068	wakeup(sc);
3069	mtx_unlock(&sc->sc_queue_mtx);
3070	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, &sc->sc_worker);
3071	while (sc->sc_worker != NULL)
3072		tsleep(&sc->sc_worker, PRIBIO, "m:destroy", hz / 5);
3073	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, &sc->sc_worker);
3074	sx_xlock(&sc->sc_lock);
3075	g_mirror_destroy_device(sc);
3076	return (0);
3077}
3078
3079static void
3080g_mirror_taste_orphan(struct g_consumer *cp)
3081{
3082
3083	KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
3084	    cp->provider->name));
3085}
3086
3087static struct g_geom *
3088g_mirror_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
3089{
3090	struct g_mirror_metadata md;
3091	struct g_mirror_softc *sc;
3092	struct g_consumer *cp;
3093	struct g_geom *gp;
3094	int error;
3095
3096	g_topology_assert();
3097	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
3098	G_MIRROR_DEBUG(2, "Tasting %s.", pp->name);
3099
3100	gp = g_new_geomf(mp, "mirror:taste");
3101	/*
3102	 * This orphan function should be never called.
3103	 */
3104	gp->orphan = g_mirror_taste_orphan;
3105	cp = g_new_consumer(gp);
3106	g_attach(cp, pp);
3107	error = g_mirror_read_metadata(cp, &md);
3108	g_detach(cp);
3109	g_destroy_consumer(cp);
3110	g_destroy_geom(gp);
3111	if (error != 0)
3112		return (NULL);
3113	gp = NULL;
3114
3115	if (md.md_provider[0] != '\0' &&
3116	    !g_compare_names(md.md_provider, pp->name))
3117		return (NULL);
3118	if (md.md_provsize != 0 && md.md_provsize != pp->mediasize)
3119		return (NULL);
3120	if ((md.md_dflags & G_MIRROR_DISK_FLAG_INACTIVE) != 0) {
3121		G_MIRROR_DEBUG(0,
3122		    "Device %s: provider %s marked as inactive, skipping.",
3123		    md.md_name, pp->name);
3124		return (NULL);
3125	}
3126	if (g_mirror_debug >= 2)
3127		mirror_metadata_dump(&md);
3128
3129	/*
3130	 * Let's check if device already exists.
3131	 */
3132	sc = NULL;
3133	LIST_FOREACH(gp, &mp->geom, geom) {
3134		sc = gp->softc;
3135		if (sc == NULL)
3136			continue;
3137		if (sc->sc_type != G_MIRROR_TYPE_AUTOMATIC)
3138			continue;
3139		if (sc->sc_sync.ds_geom == gp)
3140			continue;
3141		if (strcmp(md.md_name, sc->sc_name) != 0)
3142			continue;
3143		if (md.md_mid != sc->sc_id) {
3144			G_MIRROR_DEBUG(0, "Device %s already configured.",
3145			    sc->sc_name);
3146			return (NULL);
3147		}
3148		break;
3149	}
3150	if (gp == NULL) {
3151		gp = g_mirror_create(mp, &md, G_MIRROR_TYPE_AUTOMATIC);
3152		if (gp == NULL) {
3153			G_MIRROR_DEBUG(0, "Cannot create device %s.",
3154			    md.md_name);
3155			return (NULL);
3156		}
3157		sc = gp->softc;
3158	}
3159	G_MIRROR_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
3160	g_topology_unlock();
3161	sx_xlock(&sc->sc_lock);
3162	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_TASTING;
3163	error = g_mirror_add_disk(sc, pp, &md);
3164	if (error != 0) {
3165		G_MIRROR_DEBUG(0, "Cannot add disk %s to %s (error=%d).",
3166		    pp->name, gp->name, error);
3167		if (LIST_EMPTY(&sc->sc_disks)) {
3168			g_cancel_event(sc);
3169			g_mirror_destroy(sc, G_MIRROR_DESTROY_HARD);
3170			g_topology_lock();
3171			return (NULL);
3172		}
3173		gp = NULL;
3174	}
3175	sc->sc_flags &= ~G_MIRROR_DEVICE_FLAG_TASTING;
3176	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
3177		g_mirror_destroy(sc, G_MIRROR_DESTROY_HARD);
3178		g_topology_lock();
3179		return (NULL);
3180	}
3181	sx_xunlock(&sc->sc_lock);
3182	g_topology_lock();
3183	return (gp);
3184}
3185
3186static void
3187g_mirror_resize(struct g_consumer *cp)
3188{
3189	struct g_mirror_disk *disk;
3190
3191	g_topology_assert();
3192	g_trace(G_T_TOPOLOGY, "%s(%s)", __func__, cp->provider->name);
3193
3194	disk = cp->private;
3195	if (disk == NULL)
3196		return;
3197	g_topology_unlock();
3198	g_mirror_update_metadata(disk);
3199	g_topology_lock();
3200}
3201
3202static int
3203g_mirror_destroy_geom(struct gctl_req *req __unused,
3204    struct g_class *mp __unused, struct g_geom *gp)
3205{
3206	struct g_mirror_softc *sc;
3207	int error;
3208
3209	g_topology_unlock();
3210	sc = gp->softc;
3211	sx_xlock(&sc->sc_lock);
3212	g_cancel_event(sc);
3213	error = g_mirror_destroy(gp->softc, G_MIRROR_DESTROY_SOFT);
3214	if (error != 0)
3215		sx_xunlock(&sc->sc_lock);
3216	g_topology_lock();
3217	return (error);
3218}
3219
3220static void
3221g_mirror_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
3222    struct g_consumer *cp, struct g_provider *pp)
3223{
3224	struct g_mirror_softc *sc;
3225
3226	g_topology_assert();
3227
3228	sc = gp->softc;
3229	if (sc == NULL)
3230		return;
3231	/* Skip synchronization geom. */
3232	if (gp == sc->sc_sync.ds_geom)
3233		return;
3234	if (pp != NULL) {
3235		/* Nothing here. */
3236	} else if (cp != NULL) {
3237		struct g_mirror_disk *disk;
3238
3239		disk = cp->private;
3240		if (disk == NULL)
3241			return;
3242		g_topology_unlock();
3243		sx_xlock(&sc->sc_lock);
3244		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)disk->d_id);
3245		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
3246			sbuf_printf(sb, "%s<Synchronized>", indent);
3247			if (disk->d_sync.ds_offset == 0)
3248				sbuf_printf(sb, "0%%");
3249			else {
3250				sbuf_printf(sb, "%u%%",
3251				    (u_int)((disk->d_sync.ds_offset * 100) /
3252				    sc->sc_provider->mediasize));
3253			}
3254			sbuf_printf(sb, "</Synchronized>\n");
3255			if (disk->d_sync.ds_offset > 0) {
3256				sbuf_printf(sb, "%s<BytesSynced>%jd"
3257				    "</BytesSynced>\n", indent,
3258				    (intmax_t)disk->d_sync.ds_offset);
3259			}
3260		}
3261		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent,
3262		    disk->d_sync.ds_syncid);
3263		sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent,
3264		    disk->d_genid);
3265		sbuf_printf(sb, "%s<Flags>", indent);
3266		if (disk->d_flags == 0)
3267			sbuf_printf(sb, "NONE");
3268		else {
3269			int first = 1;
3270
3271#define	ADD_FLAG(flag, name)	do {					\
3272	if ((disk->d_flags & (flag)) != 0) {				\
3273		if (!first)						\
3274			sbuf_printf(sb, ", ");				\
3275		else							\
3276			first = 0;					\
3277		sbuf_printf(sb, name);					\
3278	}								\
3279} while (0)
3280			ADD_FLAG(G_MIRROR_DISK_FLAG_DIRTY, "DIRTY");
3281			ADD_FLAG(G_MIRROR_DISK_FLAG_HARDCODED, "HARDCODED");
3282			ADD_FLAG(G_MIRROR_DISK_FLAG_INACTIVE, "INACTIVE");
3283			ADD_FLAG(G_MIRROR_DISK_FLAG_SYNCHRONIZING,
3284			    "SYNCHRONIZING");
3285			ADD_FLAG(G_MIRROR_DISK_FLAG_FORCE_SYNC, "FORCE_SYNC");
3286			ADD_FLAG(G_MIRROR_DISK_FLAG_BROKEN, "BROKEN");
3287#undef	ADD_FLAG
3288		}
3289		sbuf_printf(sb, "</Flags>\n");
3290		sbuf_printf(sb, "%s<Priority>%u</Priority>\n", indent,
3291		    disk->d_priority);
3292		sbuf_printf(sb, "%s<State>%s</State>\n", indent,
3293		    g_mirror_disk_state2str(disk->d_state));
3294		sx_xunlock(&sc->sc_lock);
3295		g_topology_lock();
3296	} else {
3297		g_topology_unlock();
3298		sx_xlock(&sc->sc_lock);
3299		sbuf_printf(sb, "%s<Type>", indent);
3300		switch (sc->sc_type) {
3301		case G_MIRROR_TYPE_AUTOMATIC:
3302			sbuf_printf(sb, "AUTOMATIC");
3303			break;
3304		case G_MIRROR_TYPE_MANUAL:
3305			sbuf_printf(sb, "MANUAL");
3306			break;
3307		default:
3308			sbuf_printf(sb, "UNKNOWN");
3309			break;
3310		}
3311		sbuf_printf(sb, "</Type>\n");
3312		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id);
3313		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, sc->sc_syncid);
3314		sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent, sc->sc_genid);
3315		sbuf_printf(sb, "%s<Flags>", indent);
3316		if (sc->sc_flags == 0)
3317			sbuf_printf(sb, "NONE");
3318		else {
3319			int first = 1;
3320
3321#define	ADD_FLAG(flag, name)	do {					\
3322	if ((sc->sc_flags & (flag)) != 0) {				\
3323		if (!first)						\
3324			sbuf_printf(sb, ", ");				\
3325		else							\
3326			first = 0;					\
3327		sbuf_printf(sb, name);					\
3328	}								\
3329} while (0)
3330			ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOFAILSYNC, "NOFAILSYNC");
3331			ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOAUTOSYNC, "NOAUTOSYNC");
3332#undef	ADD_FLAG
3333		}
3334		sbuf_printf(sb, "</Flags>\n");
3335		sbuf_printf(sb, "%s<Slice>%u</Slice>\n", indent,
3336		    (u_int)sc->sc_slice);
3337		sbuf_printf(sb, "%s<Balance>%s</Balance>\n", indent,
3338		    balance_name(sc->sc_balance));
3339		sbuf_printf(sb, "%s<Components>%u</Components>\n", indent,
3340		    sc->sc_ndisks);
3341		sbuf_printf(sb, "%s<State>", indent);
3342		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
3343			sbuf_printf(sb, "%s", "STARTING");
3344		else if (sc->sc_ndisks ==
3345		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE))
3346			sbuf_printf(sb, "%s", "COMPLETE");
3347		else
3348			sbuf_printf(sb, "%s", "DEGRADED");
3349		sbuf_printf(sb, "</State>\n");
3350		sx_xunlock(&sc->sc_lock);
3351		g_topology_lock();
3352	}
3353}
3354
3355static void
3356g_mirror_shutdown_post_sync(void *arg, int howto)
3357{
3358	struct g_class *mp;
3359	struct g_geom *gp, *gp2;
3360	struct g_mirror_softc *sc;
3361	int error;
3362
3363	if (panicstr != NULL)
3364		return;
3365
3366	mp = arg;
3367	g_topology_lock();
3368	g_mirror_shutdown = 1;
3369	LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
3370		if ((sc = gp->softc) == NULL)
3371			continue;
3372		/* Skip synchronization geom. */
3373		if (gp == sc->sc_sync.ds_geom)
3374			continue;
3375		g_topology_unlock();
3376		sx_xlock(&sc->sc_lock);
3377		g_mirror_idle(sc, -1);
3378		g_cancel_event(sc);
3379		error = g_mirror_destroy(sc, G_MIRROR_DESTROY_DELAYED);
3380		if (error != 0)
3381			sx_xunlock(&sc->sc_lock);
3382		g_topology_lock();
3383	}
3384	g_topology_unlock();
3385}
3386
3387static void
3388g_mirror_init(struct g_class *mp)
3389{
3390
3391	g_mirror_post_sync = EVENTHANDLER_REGISTER(shutdown_post_sync,
3392	    g_mirror_shutdown_post_sync, mp, SHUTDOWN_PRI_FIRST);
3393	if (g_mirror_post_sync == NULL)
3394		G_MIRROR_DEBUG(0, "Warning! Cannot register shutdown event.");
3395}
3396
3397static void
3398g_mirror_fini(struct g_class *mp)
3399{
3400
3401	if (g_mirror_post_sync != NULL)
3402		EVENTHANDLER_DEREGISTER(shutdown_post_sync, g_mirror_post_sync);
3403}
3404
3405DECLARE_GEOM_CLASS(g_mirror_class, g_mirror);
3406