g_mirror.c revision 139650
1/*-
2 * Copyright (c) 2004 Pawel Jakub Dawidek <pjd@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/geom/mirror/g_mirror.c 139650 2005-01-03 19:42:37Z pjd $");
29
30#include <sys/param.h>
31#include <sys/systm.h>
32#include <sys/kernel.h>
33#include <sys/module.h>
34#include <sys/limits.h>
35#include <sys/lock.h>
36#include <sys/mutex.h>
37#include <sys/bio.h>
38#include <sys/sysctl.h>
39#include <sys/malloc.h>
40#include <sys/eventhandler.h>
41#include <vm/uma.h>
42#include <geom/geom.h>
43#include <sys/proc.h>
44#include <sys/kthread.h>
45#include <sys/sched.h>
46#include <geom/mirror/g_mirror.h>
47
48
49static MALLOC_DEFINE(M_MIRROR, "mirror data", "GEOM_MIRROR Data");
50
51SYSCTL_DECL(_kern_geom);
52SYSCTL_NODE(_kern_geom, OID_AUTO, mirror, CTLFLAG_RW, 0, "GEOM_MIRROR stuff");
53u_int g_mirror_debug = 0;
54TUNABLE_INT("kern.geom.mirror.debug", &g_mirror_debug);
55SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, debug, CTLFLAG_RW, &g_mirror_debug, 0,
56    "Debug level");
57static u_int g_mirror_timeout = 4;
58TUNABLE_INT("kern.geom.mirror.timeout", &g_mirror_timeout);
59SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, timeout, CTLFLAG_RW, &g_mirror_timeout,
60    0, "Time to wait on all mirror components");
61static u_int g_mirror_idletime = 5;
62TUNABLE_INT("kern.geom.mirror.idletime", &g_mirror_idletime);
63SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, idletime, CTLFLAG_RW,
64    &g_mirror_idletime, 0, "Mark components as clean when idling");
65static u_int g_mirror_reqs_per_sync = 5;
66SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, reqs_per_sync, CTLFLAG_RW,
67    &g_mirror_reqs_per_sync, 0,
68    "Number of regular I/O requests per synchronization request");
69static u_int g_mirror_syncs_per_sec = 100;
70SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, syncs_per_sec, CTLFLAG_RW,
71    &g_mirror_syncs_per_sec, 0,
72    "Number of synchronizations requests per second");
73
74#define	MSLEEP(ident, mtx, priority, wmesg, timeout)	do {		\
75	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, (ident));	\
76	msleep((ident), (mtx), (priority), (wmesg), (timeout));		\
77	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, (ident));	\
78} while (0)
79
80static eventhandler_tag g_mirror_ehtag = NULL;
81
82static int g_mirror_destroy_geom(struct gctl_req *req, struct g_class *mp,
83    struct g_geom *gp);
84static g_taste_t g_mirror_taste;
85static void g_mirror_init(struct g_class *mp);
86static void g_mirror_fini(struct g_class *mp);
87
88struct g_class g_mirror_class = {
89	.name = G_MIRROR_CLASS_NAME,
90	.version = G_VERSION,
91	.ctlreq = g_mirror_config,
92	.taste = g_mirror_taste,
93	.destroy_geom = g_mirror_destroy_geom,
94	.init = g_mirror_init,
95	.fini = g_mirror_fini
96};
97
98
99static void g_mirror_destroy_provider(struct g_mirror_softc *sc);
100static int g_mirror_update_disk(struct g_mirror_disk *disk, u_int state);
101static void g_mirror_update_device(struct g_mirror_softc *sc, boolean_t force);
102static void g_mirror_dumpconf(struct sbuf *sb, const char *indent,
103    struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp);
104static void g_mirror_sync_stop(struct g_mirror_disk *disk, int type);
105
106
107static const char *
108g_mirror_disk_state2str(int state)
109{
110
111	switch (state) {
112	case G_MIRROR_DISK_STATE_NONE:
113		return ("NONE");
114	case G_MIRROR_DISK_STATE_NEW:
115		return ("NEW");
116	case G_MIRROR_DISK_STATE_ACTIVE:
117		return ("ACTIVE");
118	case G_MIRROR_DISK_STATE_STALE:
119		return ("STALE");
120	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
121		return ("SYNCHRONIZING");
122	case G_MIRROR_DISK_STATE_DISCONNECTED:
123		return ("DISCONNECTED");
124	case G_MIRROR_DISK_STATE_DESTROY:
125		return ("DESTROY");
126	default:
127		return ("INVALID");
128	}
129}
130
131static const char *
132g_mirror_device_state2str(int state)
133{
134
135	switch (state) {
136	case G_MIRROR_DEVICE_STATE_STARTING:
137		return ("STARTING");
138	case G_MIRROR_DEVICE_STATE_RUNNING:
139		return ("RUNNING");
140	default:
141		return ("INVALID");
142	}
143}
144
145static const char *
146g_mirror_get_diskname(struct g_mirror_disk *disk)
147{
148
149	if (disk->d_consumer == NULL || disk->d_consumer->provider == NULL)
150		return ("[unknown]");
151	return (disk->d_name);
152}
153
154/*
155 * --- Events handling functions ---
156 * Events in geom_mirror are used to maintain disks and device status
157 * from one thread to simplify locking.
158 */
159static void
160g_mirror_event_free(struct g_mirror_event *ep)
161{
162
163	free(ep, M_MIRROR);
164}
165
166int
167g_mirror_event_send(void *arg, int state, int flags)
168{
169	struct g_mirror_softc *sc;
170	struct g_mirror_disk *disk;
171	struct g_mirror_event *ep;
172	int error;
173
174	ep = malloc(sizeof(*ep), M_MIRROR, M_WAITOK);
175	G_MIRROR_DEBUG(4, "%s: Sending event %p.", __func__, ep);
176	if ((flags & G_MIRROR_EVENT_DEVICE) != 0) {
177		disk = NULL;
178		sc = arg;
179	} else {
180		disk = arg;
181		sc = disk->d_softc;
182	}
183	ep->e_disk = disk;
184	ep->e_state = state;
185	ep->e_flags = flags;
186	ep->e_error = 0;
187	mtx_lock(&sc->sc_events_mtx);
188	TAILQ_INSERT_TAIL(&sc->sc_events, ep, e_next);
189	mtx_unlock(&sc->sc_events_mtx);
190	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
191	mtx_lock(&sc->sc_queue_mtx);
192	wakeup(sc);
193	mtx_unlock(&sc->sc_queue_mtx);
194	if ((flags & G_MIRROR_EVENT_DONTWAIT) != 0)
195		return (0);
196	g_topology_assert();
197	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, ep);
198	g_topology_unlock();
199	while ((ep->e_flags & G_MIRROR_EVENT_DONE) == 0) {
200		mtx_lock(&sc->sc_events_mtx);
201		MSLEEP(ep, &sc->sc_events_mtx, PRIBIO | PDROP, "m:event",
202		    hz * 5);
203	}
204	/* Don't even try to use 'sc' here, because it could be already dead. */
205	g_topology_lock();
206	error = ep->e_error;
207	g_mirror_event_free(ep);
208	return (error);
209}
210
211static struct g_mirror_event *
212g_mirror_event_get(struct g_mirror_softc *sc)
213{
214	struct g_mirror_event *ep;
215
216	mtx_lock(&sc->sc_events_mtx);
217	ep = TAILQ_FIRST(&sc->sc_events);
218	mtx_unlock(&sc->sc_events_mtx);
219	return (ep);
220}
221
222static void
223g_mirror_event_remove(struct g_mirror_softc *sc, struct g_mirror_event *ep)
224{
225
226	mtx_lock(&sc->sc_events_mtx);
227	TAILQ_REMOVE(&sc->sc_events, ep, e_next);
228	mtx_unlock(&sc->sc_events_mtx);
229}
230
231static void
232g_mirror_event_cancel(struct g_mirror_disk *disk)
233{
234	struct g_mirror_softc *sc;
235	struct g_mirror_event *ep, *tmpep;
236
237	g_topology_assert();
238
239	sc = disk->d_softc;
240	mtx_lock(&sc->sc_events_mtx);
241	TAILQ_FOREACH_SAFE(ep, &sc->sc_events, e_next, tmpep) {
242		if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0)
243			continue;
244		if (ep->e_disk != disk)
245			continue;
246		TAILQ_REMOVE(&sc->sc_events, ep, e_next);
247		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
248			g_mirror_event_free(ep);
249		else {
250			ep->e_error = ECANCELED;
251			wakeup(ep);
252		}
253	}
254	mtx_unlock(&sc->sc_events_mtx);
255}
256
257/*
258 * Return the number of disks in given state.
259 * If state is equal to -1, count all connected disks.
260 */
261u_int
262g_mirror_ndisks(struct g_mirror_softc *sc, int state)
263{
264	struct g_mirror_disk *disk;
265	u_int n = 0;
266
267	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
268		if (state == -1 || disk->d_state == state)
269			n++;
270	}
271	return (n);
272}
273
274/*
275 * Find a disk in mirror by its disk ID.
276 */
277static struct g_mirror_disk *
278g_mirror_id2disk(struct g_mirror_softc *sc, uint32_t id)
279{
280	struct g_mirror_disk *disk;
281
282	g_topology_assert();
283
284	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
285		if (disk->d_id == id)
286			return (disk);
287	}
288	return (NULL);
289}
290
291static u_int
292g_mirror_nrequests(struct g_mirror_softc *sc, struct g_consumer *cp)
293{
294	struct bio *bp;
295	u_int nreqs = 0;
296
297	mtx_lock(&sc->sc_queue_mtx);
298	TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) {
299		if (bp->bio_from == cp)
300			nreqs++;
301	}
302	mtx_unlock(&sc->sc_queue_mtx);
303	return (nreqs);
304}
305
306static int
307g_mirror_is_busy(struct g_mirror_softc *sc, struct g_consumer *cp)
308{
309
310	if (cp->index > 0) {
311		G_MIRROR_DEBUG(2,
312		    "I/O requests for %s exist, can't destroy it now.",
313		    cp->provider->name);
314		return (1);
315	}
316	if (g_mirror_nrequests(sc, cp) > 0) {
317		G_MIRROR_DEBUG(2,
318		    "I/O requests for %s in queue, can't destroy it now.",
319		    cp->provider->name);
320		return (1);
321	}
322	return (0);
323}
324
325static void
326g_mirror_destroy_consumer(void *arg, int flags __unused)
327{
328	struct g_consumer *cp;
329
330	cp = arg;
331	G_MIRROR_DEBUG(1, "Consumer %s destroyed.", cp->provider->name);
332	g_detach(cp);
333	g_destroy_consumer(cp);
334}
335
336static void
337g_mirror_kill_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
338{
339	struct g_provider *pp;
340	int retaste_wait;
341
342	g_topology_assert();
343
344	cp->private = NULL;
345	if (g_mirror_is_busy(sc, cp))
346		return;
347	pp = cp->provider;
348	retaste_wait = 0;
349	if (cp->acw == 1) {
350		if ((pp->geom->flags & G_GEOM_WITHER) == 0)
351			retaste_wait = 1;
352	}
353	G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", pp->name, -cp->acr,
354	    -cp->acw, -cp->ace, 0);
355	if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
356		g_access(cp, -cp->acr, -cp->acw, -cp->ace);
357	if (retaste_wait) {
358		/*
359		 * After retaste event was send (inside g_access()), we can send
360		 * event to detach and destroy consumer.
361		 * A class, which has consumer to the given provider connected
362		 * will not receive retaste event for the provider.
363		 * This is the way how I ignore retaste events when I close
364		 * consumers opened for write: I detach and destroy consumer
365		 * after retaste event is sent.
366		 */
367		g_post_event(g_mirror_destroy_consumer, cp, M_WAITOK, NULL);
368		return;
369	}
370	G_MIRROR_DEBUG(1, "Consumer %s destroyed.", pp->name);
371	g_detach(cp);
372	g_destroy_consumer(cp);
373}
374
375static int
376g_mirror_connect_disk(struct g_mirror_disk *disk, struct g_provider *pp)
377{
378	int error;
379
380	g_topology_assert();
381	KASSERT(disk->d_consumer == NULL,
382	    ("Disk already connected (device %s).", disk->d_softc->sc_name));
383
384	disk->d_consumer = g_new_consumer(disk->d_softc->sc_geom);
385	disk->d_consumer->private = disk;
386	disk->d_consumer->index = 0;
387	error = g_attach(disk->d_consumer, pp);
388	if (error != 0)
389		return (error);
390	error = g_access(disk->d_consumer, 1, 1, 1);
391	if (error != 0) {
392		G_MIRROR_DEBUG(0, "Cannot open consumer %s (error=%d).",
393		    pp->name, error);
394		return (error);
395	}
396
397	G_MIRROR_DEBUG(2, "Disk %s connected.", g_mirror_get_diskname(disk));
398	return (0);
399}
400
401static void
402g_mirror_disconnect_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
403{
404
405	g_topology_assert();
406
407	if (cp == NULL)
408		return;
409	if (cp->provider != NULL)
410		g_mirror_kill_consumer(sc, cp);
411	else
412		g_destroy_consumer(cp);
413}
414
415/*
416 * Initialize disk. This means allocate memory, create consumer, attach it
417 * to the provider and open access (r1w1e1) to it.
418 */
419static struct g_mirror_disk *
420g_mirror_init_disk(struct g_mirror_softc *sc, struct g_provider *pp,
421    struct g_mirror_metadata *md, int *errorp)
422{
423	struct g_mirror_disk *disk;
424	int error;
425
426	disk = malloc(sizeof(*disk), M_MIRROR, M_NOWAIT | M_ZERO);
427	if (disk == NULL) {
428		error = ENOMEM;
429		goto fail;
430	}
431	disk->d_softc = sc;
432	error = g_mirror_connect_disk(disk, pp);
433	if (error != 0)
434		goto fail;
435	disk->d_id = md->md_did;
436	disk->d_state = G_MIRROR_DISK_STATE_NONE;
437	disk->d_priority = md->md_priority;
438	disk->d_delay.sec = 0;
439	disk->d_delay.frac = 0;
440	binuptime(&disk->d_last_used);
441	disk->d_flags = md->md_dflags;
442	if (md->md_provider[0] != '\0')
443		disk->d_flags |= G_MIRROR_DISK_FLAG_HARDCODED;
444	disk->d_sync.ds_consumer = NULL;
445	disk->d_sync.ds_offset = md->md_sync_offset;
446	disk->d_sync.ds_offset_done = md->md_sync_offset;
447	disk->d_sync.ds_resync = -1;
448	disk->d_genid = md->md_genid;
449	disk->d_sync.ds_syncid = md->md_syncid;
450	if (errorp != NULL)
451		*errorp = 0;
452	return (disk);
453fail:
454	if (errorp != NULL)
455		*errorp = error;
456	if (disk != NULL) {
457		g_mirror_disconnect_consumer(sc, disk->d_consumer);
458		free(disk, M_MIRROR);
459	}
460	return (NULL);
461}
462
463static void
464g_mirror_destroy_disk(struct g_mirror_disk *disk)
465{
466	struct g_mirror_softc *sc;
467
468	g_topology_assert();
469
470	LIST_REMOVE(disk, d_next);
471	g_mirror_event_cancel(disk);
472	sc = disk->d_softc;
473	if (sc->sc_hint == disk)
474		sc->sc_hint = NULL;
475	switch (disk->d_state) {
476	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
477		g_mirror_sync_stop(disk, 1);
478		/* FALLTHROUGH */
479	case G_MIRROR_DISK_STATE_NEW:
480	case G_MIRROR_DISK_STATE_STALE:
481	case G_MIRROR_DISK_STATE_ACTIVE:
482		g_mirror_disconnect_consumer(sc, disk->d_consumer);
483		free(disk, M_MIRROR);
484		break;
485	default:
486		KASSERT(0 == 1, ("Wrong disk state (%s, %s).",
487		    g_mirror_get_diskname(disk),
488		    g_mirror_disk_state2str(disk->d_state)));
489	}
490}
491
492static void
493g_mirror_destroy_device(struct g_mirror_softc *sc)
494{
495	struct g_mirror_disk *disk;
496	struct g_mirror_event *ep;
497	struct g_geom *gp;
498	struct g_consumer *cp, *tmpcp;
499
500	g_topology_assert();
501
502	gp = sc->sc_geom;
503	if (sc->sc_provider != NULL)
504		g_mirror_destroy_provider(sc);
505	for (disk = LIST_FIRST(&sc->sc_disks); disk != NULL;
506	    disk = LIST_FIRST(&sc->sc_disks)) {
507		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
508		g_mirror_update_metadata(disk);
509		g_mirror_destroy_disk(disk);
510	}
511	while ((ep = g_mirror_event_get(sc)) != NULL) {
512		g_mirror_event_remove(sc, ep);
513		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
514			g_mirror_event_free(ep);
515		else {
516			ep->e_error = ECANCELED;
517			ep->e_flags |= G_MIRROR_EVENT_DONE;
518			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, ep);
519			mtx_lock(&sc->sc_events_mtx);
520			wakeup(ep);
521			mtx_unlock(&sc->sc_events_mtx);
522		}
523	}
524	callout_drain(&sc->sc_callout);
525	gp->softc = NULL;
526
527	LIST_FOREACH_SAFE(cp, &sc->sc_sync.ds_geom->consumer, consumer, tmpcp) {
528		g_mirror_disconnect_consumer(sc, cp);
529	}
530	sc->sc_sync.ds_geom->softc = NULL;
531	g_wither_geom(sc->sc_sync.ds_geom, ENXIO);
532	mtx_destroy(&sc->sc_queue_mtx);
533	mtx_destroy(&sc->sc_events_mtx);
534	G_MIRROR_DEBUG(0, "Device %s destroyed.", gp->name);
535	g_wither_geom(gp, ENXIO);
536}
537
538static void
539g_mirror_orphan(struct g_consumer *cp)
540{
541	struct g_mirror_disk *disk;
542
543	g_topology_assert();
544
545	disk = cp->private;
546	if (disk == NULL)
547		return;
548	disk->d_softc->sc_bump_id |= G_MIRROR_BUMP_SYNCID_OFW;
549	g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
550	    G_MIRROR_EVENT_DONTWAIT);
551}
552
553static void
554g_mirror_spoiled(struct g_consumer *cp)
555{
556	struct g_mirror_disk *disk;
557
558	g_topology_assert();
559
560	disk = cp->private;
561	if (disk == NULL)
562		return;
563	disk->d_softc->sc_bump_id |= G_MIRROR_BUMP_SYNCID_IMM;
564	g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
565	    G_MIRROR_EVENT_DONTWAIT);
566}
567
568/*
569 * Function should return the next active disk on the list.
570 * It is possible that it will be the same disk as given.
571 * If there are no active disks on list, NULL is returned.
572 */
573static __inline struct g_mirror_disk *
574g_mirror_find_next(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
575{
576	struct g_mirror_disk *dp;
577
578	for (dp = LIST_NEXT(disk, d_next); dp != disk;
579	    dp = LIST_NEXT(dp, d_next)) {
580		if (dp == NULL)
581			dp = LIST_FIRST(&sc->sc_disks);
582		if (dp->d_state == G_MIRROR_DISK_STATE_ACTIVE)
583			break;
584	}
585	if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
586		return (NULL);
587	return (dp);
588}
589
590static struct g_mirror_disk *
591g_mirror_get_disk(struct g_mirror_softc *sc)
592{
593	struct g_mirror_disk *disk;
594
595	if (sc->sc_hint == NULL) {
596		sc->sc_hint = LIST_FIRST(&sc->sc_disks);
597		if (sc->sc_hint == NULL)
598			return (NULL);
599	}
600	disk = sc->sc_hint;
601	if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) {
602		disk = g_mirror_find_next(sc, disk);
603		if (disk == NULL)
604			return (NULL);
605	}
606	sc->sc_hint = g_mirror_find_next(sc, disk);
607	return (disk);
608}
609
610static int
611g_mirror_write_metadata(struct g_mirror_disk *disk,
612    struct g_mirror_metadata *md)
613{
614	struct g_mirror_softc *sc;
615	struct g_consumer *cp;
616	off_t offset, length;
617	u_char *sector;
618	int error = 0;
619
620	g_topology_assert();
621
622	sc = disk->d_softc;
623	cp = disk->d_consumer;
624	KASSERT(cp != NULL, ("NULL consumer (%s).", sc->sc_name));
625	KASSERT(cp->provider != NULL, ("NULL provider (%s).", sc->sc_name));
626	KASSERT(cp->acr == 1 && cp->acw == 1 && cp->ace == 1,
627	    ("Consumer %s closed? (r%dw%de%d).", cp->provider->name, cp->acr,
628	    cp->acw, cp->ace));
629	length = cp->provider->sectorsize;
630	offset = cp->provider->mediasize - length;
631	sector = malloc((size_t)length, M_MIRROR, M_WAITOK | M_ZERO);
632	if (md != NULL)
633		mirror_metadata_encode(md, sector);
634	g_topology_unlock();
635	error = g_write_data(cp, offset, sector, length);
636	g_topology_lock();
637	free(sector, M_MIRROR);
638	if (error != 0) {
639		disk->d_softc->sc_bump_id |= G_MIRROR_BUMP_GENID_IMM;
640		g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
641		    G_MIRROR_EVENT_DONTWAIT);
642	}
643	return (error);
644}
645
646static int
647g_mirror_clear_metadata(struct g_mirror_disk *disk)
648{
649	int error;
650
651	g_topology_assert();
652	error = g_mirror_write_metadata(disk, NULL);
653	if (error == 0) {
654		G_MIRROR_DEBUG(2, "Metadata on %s cleared.",
655		    g_mirror_get_diskname(disk));
656	} else {
657		G_MIRROR_DEBUG(0,
658		    "Cannot clear metadata on disk %s (error=%d).",
659		    g_mirror_get_diskname(disk), error);
660	}
661	return (error);
662}
663
664void
665g_mirror_fill_metadata(struct g_mirror_softc *sc, struct g_mirror_disk *disk,
666    struct g_mirror_metadata *md)
667{
668
669	strlcpy(md->md_magic, G_MIRROR_MAGIC, sizeof(md->md_magic));
670	md->md_version = G_MIRROR_VERSION;
671	strlcpy(md->md_name, sc->sc_name, sizeof(md->md_name));
672	md->md_mid = sc->sc_id;
673	md->md_all = sc->sc_ndisks;
674	md->md_slice = sc->sc_slice;
675	md->md_balance = sc->sc_balance;
676	md->md_genid = sc->sc_genid;
677	md->md_mediasize = sc->sc_mediasize;
678	md->md_sectorsize = sc->sc_sectorsize;
679	md->md_mflags = (sc->sc_flags & G_MIRROR_DEVICE_FLAG_MASK);
680	bzero(md->md_provider, sizeof(md->md_provider));
681	if (disk == NULL) {
682		md->md_did = arc4random();
683		md->md_priority = 0;
684		md->md_syncid = 0;
685		md->md_dflags = 0;
686		md->md_sync_offset = 0;
687	} else {
688		md->md_did = disk->d_id;
689		md->md_priority = disk->d_priority;
690		md->md_syncid = disk->d_sync.ds_syncid;
691		md->md_dflags = (disk->d_flags & G_MIRROR_DISK_FLAG_MASK);
692		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
693			md->md_sync_offset = disk->d_sync.ds_offset_done;
694		else
695			md->md_sync_offset = 0;
696		if ((disk->d_flags & G_MIRROR_DISK_FLAG_HARDCODED) != 0) {
697			strlcpy(md->md_provider,
698			    disk->d_consumer->provider->name,
699			    sizeof(md->md_provider));
700		}
701	}
702}
703
704void
705g_mirror_update_metadata(struct g_mirror_disk *disk)
706{
707	struct g_mirror_metadata md;
708	int error;
709
710	g_topology_assert();
711	g_mirror_fill_metadata(disk->d_softc, disk, &md);
712	error = g_mirror_write_metadata(disk, &md);
713	if (error == 0) {
714		G_MIRROR_DEBUG(2, "Metadata on %s updated.",
715		    g_mirror_get_diskname(disk));
716	} else {
717		G_MIRROR_DEBUG(0,
718		    "Cannot update metadata on disk %s (error=%d).",
719		    g_mirror_get_diskname(disk), error);
720	}
721}
722
723static void
724g_mirror_bump_syncid(struct g_mirror_softc *sc)
725{
726	struct g_mirror_disk *disk;
727
728	g_topology_assert();
729	KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
730	    ("%s called with no active disks (device=%s).", __func__,
731	    sc->sc_name));
732
733	sc->sc_syncid++;
734	G_MIRROR_DEBUG(1, "Device %s: syncid bumped to %u.", sc->sc_name,
735	    sc->sc_syncid);
736	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
737		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
738		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
739			disk->d_sync.ds_syncid = sc->sc_syncid;
740			g_mirror_update_metadata(disk);
741		}
742	}
743}
744
745static void
746g_mirror_bump_genid(struct g_mirror_softc *sc)
747{
748	struct g_mirror_disk *disk;
749
750	g_topology_assert();
751	KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
752	    ("%s called with no active disks (device=%s).", __func__,
753	    sc->sc_name));
754
755	sc->sc_genid++;
756	G_MIRROR_DEBUG(1, "Device %s: genid bumped to %u.", sc->sc_name,
757	    sc->sc_genid);
758	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
759		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
760		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
761			disk->d_genid = sc->sc_genid;
762			g_mirror_update_metadata(disk);
763		}
764	}
765}
766
767static void
768g_mirror_idle(struct g_mirror_softc *sc)
769{
770	struct g_mirror_disk *disk;
771
772	if (sc->sc_provider == NULL || sc->sc_provider->acw == 0)
773		return;
774	sc->sc_idle = 1;
775	g_topology_lock();
776	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
777		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
778			continue;
779		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as clean.",
780		    g_mirror_get_diskname(disk), sc->sc_name);
781		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
782		g_mirror_update_metadata(disk);
783	}
784	g_topology_unlock();
785}
786
787static void
788g_mirror_unidle(struct g_mirror_softc *sc)
789{
790	struct g_mirror_disk *disk;
791
792	sc->sc_idle = 0;
793	g_topology_lock();
794	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
795		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
796			continue;
797		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as dirty.",
798		    g_mirror_get_diskname(disk), sc->sc_name);
799		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
800		g_mirror_update_metadata(disk);
801	}
802	g_topology_unlock();
803}
804
805/*
806 * Return 1 if we should check if mirror is idling.
807 */
808static int
809g_mirror_check_idle(struct g_mirror_softc *sc)
810{
811	struct g_mirror_disk *disk;
812
813	if (sc->sc_idle)
814		return (0);
815	if (sc->sc_provider != NULL && sc->sc_provider->acw == 0)
816		return (0);
817	/*
818	 * Check if there are no in-flight requests.
819	 */
820	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
821		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
822			continue;
823		if (disk->d_consumer->index > 0)
824			return (0);
825	}
826	return (1);
827}
828
829static __inline int
830bintime_cmp(struct bintime *bt1, struct bintime *bt2)
831{
832
833	if (bt1->sec < bt2->sec)
834		return (-1);
835	else if (bt1->sec > bt2->sec)
836		return (1);
837	if (bt1->frac < bt2->frac)
838		return (-1);
839	else if (bt1->frac > bt2->frac)
840		return (1);
841	return (0);
842}
843
844static void
845g_mirror_update_delay(struct g_mirror_disk *disk, struct bio *bp)
846{
847
848	if (disk->d_softc->sc_balance != G_MIRROR_BALANCE_LOAD)
849		return;
850	binuptime(&disk->d_delay);
851	bintime_sub(&disk->d_delay, &bp->bio_t0);
852}
853
854static void
855g_mirror_done(struct bio *bp)
856{
857	struct g_mirror_softc *sc;
858
859	sc = bp->bio_from->geom->softc;
860	bp->bio_cflags |= G_MIRROR_BIO_FLAG_REGULAR;
861	mtx_lock(&sc->sc_queue_mtx);
862	bioq_disksort(&sc->sc_queue, bp);
863	wakeup(sc);
864	mtx_unlock(&sc->sc_queue_mtx);
865}
866
867static void
868g_mirror_regular_request(struct bio *bp)
869{
870	struct g_mirror_softc *sc;
871	struct g_mirror_disk *disk;
872	struct bio *pbp;
873
874	g_topology_assert_not();
875
876	bp->bio_from->index--;
877	pbp = bp->bio_parent;
878	sc = pbp->bio_to->geom->softc;
879	disk = bp->bio_from->private;
880	if (disk == NULL) {
881		g_topology_lock();
882		g_mirror_kill_consumer(sc, bp->bio_from);
883		g_topology_unlock();
884	} else {
885		g_mirror_update_delay(disk, bp);
886	}
887
888	pbp->bio_inbed++;
889	KASSERT(pbp->bio_inbed <= pbp->bio_children,
890	    ("bio_inbed (%u) is bigger than bio_children (%u).", pbp->bio_inbed,
891	    pbp->bio_children));
892	if (bp->bio_error == 0 && pbp->bio_error == 0) {
893		G_MIRROR_LOGREQ(3, bp, "Request delivered.");
894		g_destroy_bio(bp);
895		if (pbp->bio_children == pbp->bio_inbed) {
896			G_MIRROR_LOGREQ(3, pbp, "Request delivered.");
897			pbp->bio_completed = pbp->bio_length;
898			g_io_deliver(pbp, pbp->bio_error);
899		}
900		return;
901	} else if (bp->bio_error != 0) {
902		if (pbp->bio_error == 0)
903			pbp->bio_error = bp->bio_error;
904		G_MIRROR_LOGREQ(0, bp, "Request failed (error=%d).",
905		    bp->bio_error);
906		if (disk != NULL) {
907			sc->sc_bump_id |= G_MIRROR_BUMP_GENID_IMM;
908			g_mirror_event_send(disk,
909			    G_MIRROR_DISK_STATE_DISCONNECTED,
910			    G_MIRROR_EVENT_DONTWAIT);
911		}
912		switch (pbp->bio_cmd) {
913		case BIO_DELETE:
914		case BIO_WRITE:
915			pbp->bio_inbed--;
916			pbp->bio_children--;
917			break;
918		}
919	}
920	g_destroy_bio(bp);
921
922	switch (pbp->bio_cmd) {
923	case BIO_READ:
924		if (pbp->bio_children == pbp->bio_inbed) {
925			pbp->bio_error = 0;
926			mtx_lock(&sc->sc_queue_mtx);
927			bioq_disksort(&sc->sc_queue, pbp);
928			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
929			wakeup(sc);
930			mtx_unlock(&sc->sc_queue_mtx);
931		}
932		break;
933	case BIO_DELETE:
934	case BIO_WRITE:
935		if (pbp->bio_children == 0) {
936			/*
937			 * All requests failed.
938			 */
939		} else if (pbp->bio_inbed < pbp->bio_children) {
940			/* Do nothing. */
941			break;
942		} else if (pbp->bio_children == pbp->bio_inbed) {
943			/* Some requests succeeded. */
944			pbp->bio_error = 0;
945			pbp->bio_completed = pbp->bio_length;
946		}
947		g_io_deliver(pbp, pbp->bio_error);
948		break;
949	default:
950		KASSERT(1 == 0, ("Invalid request: %u.", pbp->bio_cmd));
951		break;
952	}
953}
954
955static void
956g_mirror_sync_done(struct bio *bp)
957{
958	struct g_mirror_softc *sc;
959
960	G_MIRROR_LOGREQ(3, bp, "Synchronization request delivered.");
961	sc = bp->bio_from->geom->softc;
962	bp->bio_cflags |= G_MIRROR_BIO_FLAG_SYNC;
963	mtx_lock(&sc->sc_queue_mtx);
964	bioq_disksort(&sc->sc_queue, bp);
965	wakeup(sc);
966	mtx_unlock(&sc->sc_queue_mtx);
967}
968
969static void
970g_mirror_start(struct bio *bp)
971{
972	struct g_mirror_softc *sc;
973
974	sc = bp->bio_to->geom->softc;
975	/*
976	 * If sc == NULL or there are no valid disks, provider's error
977	 * should be set and g_mirror_start() should not be called at all.
978	 */
979	KASSERT(sc != NULL && sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
980	    ("Provider's error should be set (error=%d)(mirror=%s).",
981	    bp->bio_to->error, bp->bio_to->name));
982	G_MIRROR_LOGREQ(3, bp, "Request received.");
983
984	switch (bp->bio_cmd) {
985	case BIO_READ:
986	case BIO_WRITE:
987	case BIO_DELETE:
988		break;
989	case BIO_GETATTR:
990	default:
991		g_io_deliver(bp, EOPNOTSUPP);
992		return;
993	}
994	mtx_lock(&sc->sc_queue_mtx);
995	bioq_disksort(&sc->sc_queue, bp);
996	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
997	wakeup(sc);
998	mtx_unlock(&sc->sc_queue_mtx);
999}
1000
1001/*
1002 * Send one synchronization request.
1003 */
1004static void
1005g_mirror_sync_one(struct g_mirror_disk *disk)
1006{
1007	struct g_mirror_softc *sc;
1008	struct bio *bp;
1009
1010	sc = disk->d_softc;
1011	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
1012	    ("Disk %s is not marked for synchronization.",
1013	    g_mirror_get_diskname(disk)));
1014
1015	bp = g_new_bio();
1016	if (bp == NULL)
1017		return;
1018	bp->bio_parent = NULL;
1019	bp->bio_cmd = BIO_READ;
1020	bp->bio_offset = disk->d_sync.ds_offset;
1021	bp->bio_length = MIN(MAXPHYS, sc->sc_mediasize - bp->bio_offset);
1022	bp->bio_cflags = 0;
1023	bp->bio_done = g_mirror_sync_done;
1024	bp->bio_data = disk->d_sync.ds_data;
1025	if (bp->bio_data == NULL) {
1026		g_destroy_bio(bp);
1027		return;
1028	}
1029	disk->d_sync.ds_offset += bp->bio_length;
1030	bp->bio_to = sc->sc_provider;
1031	G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
1032	disk->d_sync.ds_consumer->index++;
1033	g_io_request(bp, disk->d_sync.ds_consumer);
1034}
1035
1036static void
1037g_mirror_sync_request(struct bio *bp)
1038{
1039	struct g_mirror_softc *sc;
1040	struct g_mirror_disk *disk;
1041
1042	bp->bio_from->index--;
1043	sc = bp->bio_from->geom->softc;
1044	disk = bp->bio_from->private;
1045	if (disk == NULL) {
1046		g_topology_lock();
1047		g_mirror_kill_consumer(sc, bp->bio_from);
1048		g_topology_unlock();
1049		g_destroy_bio(bp);
1050		return;
1051	}
1052
1053	/*
1054	 * Synchronization request.
1055	 */
1056	switch (bp->bio_cmd) {
1057	case BIO_READ:
1058	    {
1059		struct g_consumer *cp;
1060
1061		if (bp->bio_error != 0) {
1062			G_MIRROR_LOGREQ(0, bp,
1063			    "Synchronization request failed (error=%d).",
1064			    bp->bio_error);
1065			g_destroy_bio(bp);
1066			return;
1067		}
1068		G_MIRROR_LOGREQ(3, bp,
1069		    "Synchronization request half-finished.");
1070		bp->bio_cmd = BIO_WRITE;
1071		bp->bio_cflags = 0;
1072		cp = disk->d_consumer;
1073		KASSERT(cp->acr == 1 && cp->acw == 1 && cp->ace == 1,
1074		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1075		    cp->acr, cp->acw, cp->ace));
1076		cp->index++;
1077		g_io_request(bp, cp);
1078		return;
1079	    }
1080	case BIO_WRITE:
1081	    {
1082		struct g_mirror_disk_sync *sync;
1083
1084		if (bp->bio_error != 0) {
1085			G_MIRROR_LOGREQ(0, bp,
1086			    "Synchronization request failed (error=%d).",
1087			    bp->bio_error);
1088			g_destroy_bio(bp);
1089			sc->sc_bump_id |= G_MIRROR_BUMP_GENID_IMM;
1090			g_mirror_event_send(disk,
1091			    G_MIRROR_DISK_STATE_DISCONNECTED,
1092			    G_MIRROR_EVENT_DONTWAIT);
1093			return;
1094		}
1095		G_MIRROR_LOGREQ(3, bp, "Synchronization request finished.");
1096		sync = &disk->d_sync;
1097		sync->ds_offset_done = bp->bio_offset + bp->bio_length;
1098		g_destroy_bio(bp);
1099		if (sync->ds_resync != -1)
1100			break;
1101		if (sync->ds_offset_done == sc->sc_provider->mediasize) {
1102			/*
1103			 * Disk up-to-date, activate it.
1104			 */
1105			g_mirror_event_send(disk, G_MIRROR_DISK_STATE_ACTIVE,
1106			    G_MIRROR_EVENT_DONTWAIT);
1107			return;
1108		} else if (sync->ds_offset_done % (MAXPHYS * 100) == 0) {
1109			/*
1110			 * Update offset_done on every 100 blocks.
1111			 * XXX: This should be configurable.
1112			 */
1113			g_topology_lock();
1114			g_mirror_update_metadata(disk);
1115			g_topology_unlock();
1116		}
1117		return;
1118	    }
1119	default:
1120		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
1121		    bp->bio_cmd, sc->sc_name));
1122		break;
1123	}
1124}
1125
1126static void
1127g_mirror_request_prefer(struct g_mirror_softc *sc, struct bio *bp)
1128{
1129	struct g_mirror_disk *disk;
1130	struct g_consumer *cp;
1131	struct bio *cbp;
1132
1133	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1134		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE)
1135			break;
1136	}
1137	if (disk == NULL) {
1138		if (bp->bio_error == 0)
1139			bp->bio_error = ENXIO;
1140		g_io_deliver(bp, bp->bio_error);
1141		return;
1142	}
1143	cbp = g_clone_bio(bp);
1144	if (cbp == NULL) {
1145		if (bp->bio_error == 0)
1146			bp->bio_error = ENOMEM;
1147		g_io_deliver(bp, bp->bio_error);
1148		return;
1149	}
1150	/*
1151	 * Fill in the component buf structure.
1152	 */
1153	cp = disk->d_consumer;
1154	cbp->bio_done = g_mirror_done;
1155	cbp->bio_to = cp->provider;
1156	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1157	KASSERT(cp->acr == 1 && cp->acw == 1 && cp->ace == 1,
1158	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1159	    cp->acw, cp->ace));
1160	cp->index++;
1161	g_io_request(cbp, cp);
1162}
1163
1164static void
1165g_mirror_request_round_robin(struct g_mirror_softc *sc, struct bio *bp)
1166{
1167	struct g_mirror_disk *disk;
1168	struct g_consumer *cp;
1169	struct bio *cbp;
1170
1171	disk = g_mirror_get_disk(sc);
1172	if (disk == NULL) {
1173		if (bp->bio_error == 0)
1174			bp->bio_error = ENXIO;
1175		g_io_deliver(bp, bp->bio_error);
1176		return;
1177	}
1178	cbp = g_clone_bio(bp);
1179	if (cbp == NULL) {
1180		if (bp->bio_error == 0)
1181			bp->bio_error = ENOMEM;
1182		g_io_deliver(bp, bp->bio_error);
1183		return;
1184	}
1185	/*
1186	 * Fill in the component buf structure.
1187	 */
1188	cp = disk->d_consumer;
1189	cbp->bio_done = g_mirror_done;
1190	cbp->bio_to = cp->provider;
1191	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1192	KASSERT(cp->acr == 1 && cp->acw == 1 && cp->ace == 1,
1193	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1194	    cp->acw, cp->ace));
1195	cp->index++;
1196	g_io_request(cbp, cp);
1197}
1198
1199static void
1200g_mirror_request_load(struct g_mirror_softc *sc, struct bio *bp)
1201{
1202	struct g_mirror_disk *disk, *dp;
1203	struct g_consumer *cp;
1204	struct bio *cbp;
1205	struct bintime curtime;
1206
1207	binuptime(&curtime);
1208	/*
1209	 * Find a disk which the smallest load.
1210	 */
1211	disk = NULL;
1212	LIST_FOREACH(dp, &sc->sc_disks, d_next) {
1213		if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1214			continue;
1215		/* If disk wasn't used for more than 2 sec, use it. */
1216		if (curtime.sec - dp->d_last_used.sec >= 2) {
1217			disk = dp;
1218			break;
1219		}
1220		if (disk == NULL ||
1221		    bintime_cmp(&dp->d_delay, &disk->d_delay) < 0) {
1222			disk = dp;
1223		}
1224	}
1225	cbp = g_clone_bio(bp);
1226	if (cbp == NULL) {
1227		if (bp->bio_error == 0)
1228			bp->bio_error = ENOMEM;
1229		g_io_deliver(bp, bp->bio_error);
1230		return;
1231	}
1232	/*
1233	 * Fill in the component buf structure.
1234	 */
1235	cp = disk->d_consumer;
1236	cbp->bio_done = g_mirror_done;
1237	cbp->bio_to = cp->provider;
1238	binuptime(&disk->d_last_used);
1239	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1240	KASSERT(cp->acr == 1 && cp->acw == 1 && cp->ace == 1,
1241	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1242	    cp->acw, cp->ace));
1243	cp->index++;
1244	g_io_request(cbp, cp);
1245}
1246
1247static void
1248g_mirror_request_split(struct g_mirror_softc *sc, struct bio *bp)
1249{
1250	struct bio_queue_head queue;
1251	struct g_mirror_disk *disk;
1252	struct g_consumer *cp;
1253	struct bio *cbp;
1254	off_t left, mod, offset, slice;
1255	u_char *data;
1256	u_int ndisks;
1257
1258	if (bp->bio_length <= sc->sc_slice) {
1259		g_mirror_request_round_robin(sc, bp);
1260		return;
1261	}
1262	ndisks = g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE);
1263	slice = bp->bio_length / ndisks;
1264	mod = slice % sc->sc_provider->sectorsize;
1265	if (mod != 0)
1266		slice += sc->sc_provider->sectorsize - mod;
1267	/*
1268	 * Allocate all bios before sending any request, so we can
1269	 * return ENOMEM in nice and clean way.
1270	 */
1271	left = bp->bio_length;
1272	offset = bp->bio_offset;
1273	data = bp->bio_data;
1274	bioq_init(&queue);
1275	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1276		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1277			continue;
1278		cbp = g_clone_bio(bp);
1279		if (cbp == NULL) {
1280			for (cbp = bioq_first(&queue); cbp != NULL;
1281			    cbp = bioq_first(&queue)) {
1282				bioq_remove(&queue, cbp);
1283				g_destroy_bio(cbp);
1284			}
1285			if (bp->bio_error == 0)
1286				bp->bio_error = ENOMEM;
1287			g_io_deliver(bp, bp->bio_error);
1288			return;
1289		}
1290		bioq_insert_tail(&queue, cbp);
1291		cbp->bio_done = g_mirror_done;
1292		cbp->bio_caller1 = disk;
1293		cbp->bio_to = disk->d_consumer->provider;
1294		cbp->bio_offset = offset;
1295		cbp->bio_data = data;
1296		cbp->bio_length = MIN(left, slice);
1297		left -= cbp->bio_length;
1298		if (left == 0)
1299			break;
1300		offset += cbp->bio_length;
1301		data += cbp->bio_length;
1302	}
1303	for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) {
1304		bioq_remove(&queue, cbp);
1305		G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1306		disk = cbp->bio_caller1;
1307		cbp->bio_caller1 = NULL;
1308		cp = disk->d_consumer;
1309		KASSERT(cp->acr == 1 && cp->acw == 1 && cp->ace == 1,
1310		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1311		    cp->acr, cp->acw, cp->ace));
1312		disk->d_consumer->index++;
1313		g_io_request(cbp, disk->d_consumer);
1314	}
1315}
1316
1317static void
1318g_mirror_register_request(struct bio *bp)
1319{
1320	struct g_mirror_softc *sc;
1321
1322	sc = bp->bio_to->geom->softc;
1323	switch (bp->bio_cmd) {
1324	case BIO_READ:
1325		switch (sc->sc_balance) {
1326		case G_MIRROR_BALANCE_LOAD:
1327			g_mirror_request_load(sc, bp);
1328			break;
1329		case G_MIRROR_BALANCE_PREFER:
1330			g_mirror_request_prefer(sc, bp);
1331			break;
1332		case G_MIRROR_BALANCE_ROUND_ROBIN:
1333			g_mirror_request_round_robin(sc, bp);
1334			break;
1335		case G_MIRROR_BALANCE_SPLIT:
1336			g_mirror_request_split(sc, bp);
1337			break;
1338		}
1339		return;
1340	case BIO_WRITE:
1341	case BIO_DELETE:
1342	    {
1343		struct g_mirror_disk *disk;
1344		struct g_mirror_disk_sync *sync;
1345		struct bio_queue_head queue;
1346		struct g_consumer *cp;
1347		struct bio *cbp;
1348
1349		if (sc->sc_idle)
1350			g_mirror_unidle(sc);
1351		/*
1352		 * Allocate all bios before sending any request, so we can
1353		 * return ENOMEM in nice and clean way.
1354		 */
1355		bioq_init(&queue);
1356		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1357			sync = &disk->d_sync;
1358			switch (disk->d_state) {
1359			case G_MIRROR_DISK_STATE_ACTIVE:
1360				break;
1361			case G_MIRROR_DISK_STATE_SYNCHRONIZING:
1362				if (bp->bio_offset >= sync->ds_offset)
1363					continue;
1364				else if (bp->bio_offset + bp->bio_length >
1365				    sync->ds_offset_done &&
1366				    (bp->bio_offset < sync->ds_resync ||
1367				     sync->ds_resync == -1)) {
1368					sync->ds_resync = bp->bio_offset -
1369					    (bp->bio_offset % MAXPHYS);
1370				}
1371				break;
1372			default:
1373				continue;
1374			}
1375			cbp = g_clone_bio(bp);
1376			if (cbp == NULL) {
1377				for (cbp = bioq_first(&queue); cbp != NULL;
1378				    cbp = bioq_first(&queue)) {
1379					bioq_remove(&queue, cbp);
1380					g_destroy_bio(cbp);
1381				}
1382				if (bp->bio_error == 0)
1383					bp->bio_error = ENOMEM;
1384				g_io_deliver(bp, bp->bio_error);
1385				return;
1386			}
1387			bioq_insert_tail(&queue, cbp);
1388			cbp->bio_done = g_mirror_done;
1389			cp = disk->d_consumer;
1390			cbp->bio_caller1 = cp;
1391			cbp->bio_to = cp->provider;
1392			KASSERT(cp->acr == 1 && cp->acw == 1 && cp->ace == 1,
1393			    ("Consumer %s not opened (r%dw%de%d).",
1394			    cp->provider->name, cp->acr, cp->acw, cp->ace));
1395		}
1396		for (cbp = bioq_first(&queue); cbp != NULL;
1397		    cbp = bioq_first(&queue)) {
1398			bioq_remove(&queue, cbp);
1399			G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1400			cp = cbp->bio_caller1;
1401			cbp->bio_caller1 = NULL;
1402			cp->index++;
1403			g_io_request(cbp, cp);
1404		}
1405		/*
1406		 * Bump syncid on first write.
1407		 */
1408		if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID_OFW) != 0) {
1409			sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID;
1410			g_topology_lock();
1411			g_mirror_bump_syncid(sc);
1412			g_topology_unlock();
1413		}
1414		return;
1415	    }
1416	default:
1417		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
1418		    bp->bio_cmd, sc->sc_name));
1419		break;
1420	}
1421}
1422
1423static int
1424g_mirror_can_destroy(struct g_mirror_softc *sc)
1425{
1426	struct g_geom *gp;
1427	struct g_consumer *cp;
1428
1429	g_topology_assert();
1430	gp = sc->sc_geom;
1431	LIST_FOREACH(cp, &gp->consumer, consumer) {
1432		if (g_mirror_is_busy(sc, cp))
1433			return (0);
1434	}
1435	gp = sc->sc_sync.ds_geom;
1436	LIST_FOREACH(cp, &gp->consumer, consumer) {
1437		if (g_mirror_is_busy(sc, cp))
1438			return (0);
1439	}
1440	G_MIRROR_DEBUG(2, "No I/O requests for %s, it can be destroyed.",
1441	    sc->sc_name);
1442	return (1);
1443}
1444
1445static int
1446g_mirror_try_destroy(struct g_mirror_softc *sc)
1447{
1448
1449	g_topology_lock();
1450	if (!g_mirror_can_destroy(sc)) {
1451		g_topology_unlock();
1452		return (0);
1453	}
1454	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_WAIT) != 0) {
1455		g_topology_unlock();
1456		G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1457		    &sc->sc_worker);
1458		wakeup(&sc->sc_worker);
1459		sc->sc_worker = NULL;
1460	} else {
1461		g_mirror_destroy_device(sc);
1462		g_topology_unlock();
1463		free(sc, M_MIRROR);
1464	}
1465	return (1);
1466}
1467
1468/*
1469 * Worker thread.
1470 */
1471static void
1472g_mirror_worker(void *arg)
1473{
1474	struct g_mirror_softc *sc;
1475	struct g_mirror_disk *disk;
1476	struct g_mirror_disk_sync *sync;
1477	struct g_mirror_event *ep;
1478	struct bio *bp;
1479	u_int nreqs;
1480
1481	sc = arg;
1482	mtx_lock_spin(&sched_lock);
1483	sched_prio(curthread, PRIBIO);
1484	mtx_unlock_spin(&sched_lock);
1485
1486	nreqs = 0;
1487	for (;;) {
1488		G_MIRROR_DEBUG(5, "%s: Let's see...", __func__);
1489		/*
1490		 * First take a look at events.
1491		 * This is important to handle events before any I/O requests.
1492		 */
1493		ep = g_mirror_event_get(sc);
1494		if (ep != NULL && g_topology_try_lock()) {
1495			g_mirror_event_remove(sc, ep);
1496			if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0) {
1497				/* Update only device status. */
1498				G_MIRROR_DEBUG(3,
1499				    "Running event for device %s.",
1500				    sc->sc_name);
1501				ep->e_error = 0;
1502				g_mirror_update_device(sc, 1);
1503			} else {
1504				/* Update disk status. */
1505				G_MIRROR_DEBUG(3, "Running event for disk %s.",
1506				     g_mirror_get_diskname(ep->e_disk));
1507				ep->e_error = g_mirror_update_disk(ep->e_disk,
1508				    ep->e_state);
1509				if (ep->e_error == 0)
1510					g_mirror_update_device(sc, 0);
1511			}
1512			g_topology_unlock();
1513			if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0) {
1514				KASSERT(ep->e_error == 0,
1515				    ("Error cannot be handled."));
1516				g_mirror_event_free(ep);
1517			} else {
1518				ep->e_flags |= G_MIRROR_EVENT_DONE;
1519				G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1520				    ep);
1521				mtx_lock(&sc->sc_events_mtx);
1522				wakeup(ep);
1523				mtx_unlock(&sc->sc_events_mtx);
1524			}
1525			if ((sc->sc_flags &
1526			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1527				if (g_mirror_try_destroy(sc))
1528					kthread_exit(0);
1529			}
1530			G_MIRROR_DEBUG(5, "%s: I'm here 1.", __func__);
1531			continue;
1532		}
1533		/*
1534		 * Now I/O requests.
1535		 */
1536		/* Get first request from the queue. */
1537		mtx_lock(&sc->sc_queue_mtx);
1538		bp = bioq_first(&sc->sc_queue);
1539		if (bp == NULL) {
1540			if (ep != NULL) {
1541				/*
1542				 * No I/O requests and topology lock was
1543				 * already held? Try again.
1544				 */
1545				mtx_unlock(&sc->sc_queue_mtx);
1546				continue;
1547			}
1548			if ((sc->sc_flags &
1549			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1550				mtx_unlock(&sc->sc_queue_mtx);
1551				if (g_mirror_try_destroy(sc))
1552					kthread_exit(0);
1553				mtx_lock(&sc->sc_queue_mtx);
1554			}
1555		}
1556		if (sc->sc_sync.ds_ndisks > 0 &&
1557		    (bp == NULL || nreqs > g_mirror_reqs_per_sync)) {
1558			mtx_unlock(&sc->sc_queue_mtx);
1559			/*
1560			 * It is time for synchronization...
1561			 */
1562			nreqs = 0;
1563			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1564				if (disk->d_state !=
1565				    G_MIRROR_DISK_STATE_SYNCHRONIZING) {
1566					continue;
1567				}
1568				sync = &disk->d_sync;
1569				if (sync->ds_offset >=
1570				    sc->sc_provider->mediasize) {
1571					continue;
1572				}
1573				if (sync->ds_offset > sync->ds_offset_done)
1574					continue;
1575				if (sync->ds_resync != -1) {
1576					sync->ds_offset = sync->ds_resync;
1577					sync->ds_offset_done = sync->ds_resync;
1578					sync->ds_resync = -1;
1579				}
1580				g_mirror_sync_one(disk);
1581			}
1582			G_MIRROR_DEBUG(5, "%s: I'm here 2.", __func__);
1583			goto sleep;
1584		}
1585		if (bp == NULL) {
1586			if (g_mirror_check_idle(sc)) {
1587				u_int idletime;
1588
1589				idletime = g_mirror_idletime;
1590				if (idletime == 0)
1591					idletime = 1;
1592				idletime *= hz;
1593				if (msleep(sc, &sc->sc_queue_mtx, PRIBIO | PDROP,
1594				    "m:w1", idletime) == EWOULDBLOCK) {
1595					G_MIRROR_DEBUG(5, "%s: I'm here 3.",
1596					    __func__);
1597					/*
1598					 * No I/O requests in 'idletime' seconds,
1599					 * so mark components as clean.
1600					 */
1601					g_mirror_idle(sc);
1602				}
1603				G_MIRROR_DEBUG(5, "%s: I'm here 4.", __func__);
1604			} else {
1605				MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP,
1606				    "m:w2", 0);
1607				G_MIRROR_DEBUG(5, "%s: I'm here 5.", __func__);
1608			}
1609			continue;
1610		}
1611		nreqs++;
1612		bioq_remove(&sc->sc_queue, bp);
1613		mtx_unlock(&sc->sc_queue_mtx);
1614
1615		if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_REGULAR) != 0) {
1616			g_mirror_regular_request(bp);
1617		} else if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0) {
1618			u_int timeout, sps;
1619
1620			g_mirror_sync_request(bp);
1621sleep:
1622			sps = g_mirror_syncs_per_sec;
1623			if (sps == 0) {
1624				G_MIRROR_DEBUG(5, "%s: I'm here 6.", __func__);
1625				continue;
1626			}
1627			if (ep != NULL) {
1628				/*
1629				 * We have some pending events, don't sleep now.
1630				 */
1631				G_MIRROR_DEBUG(5, "%s: I'm here 7.", __func__);
1632				continue;
1633			}
1634			mtx_lock(&sc->sc_queue_mtx);
1635			if (bioq_first(&sc->sc_queue) != NULL) {
1636				mtx_unlock(&sc->sc_queue_mtx);
1637				G_MIRROR_DEBUG(5, "%s: I'm here 8.", __func__);
1638				continue;
1639			}
1640			timeout = hz / sps;
1641			if (timeout == 0)
1642				timeout = 1;
1643			MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "m:w3",
1644			    timeout);
1645		} else {
1646			g_mirror_register_request(bp);
1647		}
1648		G_MIRROR_DEBUG(5, "%s: I'm here 9.", __func__);
1649	}
1650}
1651
1652/*
1653 * Open disk's consumer if needed.
1654 */
1655static void
1656g_mirror_update_access(struct g_mirror_disk *disk)
1657{
1658	struct g_provider *pp;
1659
1660	g_topology_assert();
1661
1662	pp = disk->d_softc->sc_provider;
1663	if (pp == NULL)
1664		return;
1665	if (pp->acw > 0) {
1666		if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) == 0) {
1667			G_MIRROR_DEBUG(1,
1668			    "Disk %s (device %s) marked as dirty.",
1669			    g_mirror_get_diskname(disk),
1670			    disk->d_softc->sc_name);
1671			disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
1672		}
1673	} else if (pp->acw == 0) {
1674		if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
1675			G_MIRROR_DEBUG(1,
1676			    "Disk %s (device %s) marked as clean.",
1677			    g_mirror_get_diskname(disk),
1678			    disk->d_softc->sc_name);
1679			disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
1680		}
1681	}
1682}
1683
1684static void
1685g_mirror_sync_start(struct g_mirror_disk *disk)
1686{
1687	struct g_mirror_softc *sc;
1688	int error;
1689
1690	g_topology_assert();
1691
1692	sc = disk->d_softc;
1693	KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
1694	    ("Device not in RUNNING state (%s, %u).", sc->sc_name,
1695	    sc->sc_state));
1696
1697	G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s.", sc->sc_name,
1698	    g_mirror_get_diskname(disk));
1699	disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
1700	KASSERT(disk->d_sync.ds_consumer == NULL,
1701	    ("Sync consumer already exists (device=%s, disk=%s).",
1702	    sc->sc_name, g_mirror_get_diskname(disk)));
1703	disk->d_sync.ds_consumer = g_new_consumer(sc->sc_sync.ds_geom);
1704	disk->d_sync.ds_consumer->private = disk;
1705	disk->d_sync.ds_consumer->index = 0;
1706	error = g_attach(disk->d_sync.ds_consumer, disk->d_softc->sc_provider);
1707	KASSERT(error == 0, ("Cannot attach to %s (error=%d).",
1708	    disk->d_softc->sc_name, error));
1709	error = g_access(disk->d_sync.ds_consumer, 1, 0, 0);
1710	KASSERT(error == 0, ("Cannot open %s (error=%d).",
1711	    disk->d_softc->sc_name, error));
1712	disk->d_sync.ds_data = malloc(MAXPHYS, M_MIRROR, M_WAITOK);
1713	sc->sc_sync.ds_ndisks++;
1714}
1715
1716/*
1717 * Stop synchronization process.
1718 * type: 0 - synchronization finished
1719 *       1 - synchronization stopped
1720 */
1721static void
1722g_mirror_sync_stop(struct g_mirror_disk *disk, int type)
1723{
1724
1725	g_topology_assert();
1726	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
1727	    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
1728	    g_mirror_disk_state2str(disk->d_state)));
1729	if (disk->d_sync.ds_consumer == NULL)
1730		return;
1731
1732	if (type == 0) {
1733		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s finished.",
1734		    disk->d_softc->sc_name, g_mirror_get_diskname(disk));
1735	} else /* if (type == 1) */ {
1736		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s stopped.",
1737		    disk->d_softc->sc_name, g_mirror_get_diskname(disk));
1738	}
1739	g_mirror_kill_consumer(disk->d_softc, disk->d_sync.ds_consumer);
1740	free(disk->d_sync.ds_data, M_MIRROR);
1741	disk->d_sync.ds_consumer = NULL;
1742	disk->d_softc->sc_sync.ds_ndisks--;
1743	disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
1744}
1745
1746static void
1747g_mirror_launch_provider(struct g_mirror_softc *sc)
1748{
1749	struct g_mirror_disk *disk;
1750	struct g_provider *pp;
1751
1752	g_topology_assert();
1753
1754	pp = g_new_providerf(sc->sc_geom, "mirror/%s", sc->sc_name);
1755	pp->mediasize = sc->sc_mediasize;
1756	pp->sectorsize = sc->sc_sectorsize;
1757	sc->sc_provider = pp;
1758	g_error_provider(pp, 0);
1759	G_MIRROR_DEBUG(0, "Device %s: provider %s launched.", sc->sc_name,
1760	    pp->name);
1761	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1762		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
1763			g_mirror_sync_start(disk);
1764	}
1765}
1766
1767static void
1768g_mirror_destroy_provider(struct g_mirror_softc *sc)
1769{
1770	struct g_mirror_disk *disk;
1771	struct bio *bp;
1772
1773	g_topology_assert();
1774	KASSERT(sc->sc_provider != NULL, ("NULL provider (device=%s).",
1775	    sc->sc_name));
1776
1777	g_error_provider(sc->sc_provider, ENXIO);
1778	mtx_lock(&sc->sc_queue_mtx);
1779	while ((bp = bioq_first(&sc->sc_queue)) != NULL) {
1780		bioq_remove(&sc->sc_queue, bp);
1781		g_io_deliver(bp, ENXIO);
1782	}
1783	mtx_unlock(&sc->sc_queue_mtx);
1784	G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.", sc->sc_name,
1785	    sc->sc_provider->name);
1786	sc->sc_provider->flags |= G_PF_WITHER;
1787	g_orphan_provider(sc->sc_provider, ENXIO);
1788	sc->sc_provider = NULL;
1789	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1790		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
1791			g_mirror_sync_stop(disk, 1);
1792	}
1793}
1794
1795static void
1796g_mirror_go(void *arg)
1797{
1798	struct g_mirror_softc *sc;
1799
1800	sc = arg;
1801	G_MIRROR_DEBUG(0, "Force device %s start due to timeout.", sc->sc_name);
1802	g_mirror_event_send(sc, 0,
1803	    G_MIRROR_EVENT_DONTWAIT | G_MIRROR_EVENT_DEVICE);
1804}
1805
1806static u_int
1807g_mirror_determine_state(struct g_mirror_disk *disk)
1808{
1809	struct g_mirror_softc *sc;
1810	u_int state;
1811
1812	sc = disk->d_softc;
1813	if (sc->sc_syncid == disk->d_sync.ds_syncid) {
1814		if ((disk->d_flags &
1815		    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) {
1816			/* Disk does not need synchronization. */
1817			state = G_MIRROR_DISK_STATE_ACTIVE;
1818		} else {
1819			if ((sc->sc_flags &
1820			     G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0  ||
1821			    (disk->d_flags &
1822			     G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
1823				/*
1824				 * We can start synchronization from
1825				 * the stored offset.
1826				 */
1827				state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
1828			} else {
1829				state = G_MIRROR_DISK_STATE_STALE;
1830			}
1831		}
1832	} else if (disk->d_sync.ds_syncid < sc->sc_syncid) {
1833		/*
1834		 * Reset all synchronization data for this disk,
1835		 * because if it even was synchronized, it was
1836		 * synchronized to disks with different syncid.
1837		 */
1838		disk->d_flags |= G_MIRROR_DISK_FLAG_SYNCHRONIZING;
1839		disk->d_sync.ds_offset = 0;
1840		disk->d_sync.ds_offset_done = 0;
1841		disk->d_sync.ds_syncid = sc->sc_syncid;
1842		if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
1843		    (disk->d_flags & G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
1844			state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
1845		} else {
1846			state = G_MIRROR_DISK_STATE_STALE;
1847		}
1848	} else /* if (sc->sc_syncid < disk->d_sync.ds_syncid) */ {
1849		/*
1850		 * Not good, NOT GOOD!
1851		 * It means that mirror was started on stale disks
1852		 * and more fresh disk just arrive.
1853		 * If there were writes, mirror is fucked up, sorry.
1854		 * I think the best choice here is don't touch
1855		 * this disk and inform the user laudly.
1856		 */
1857		G_MIRROR_DEBUG(0, "Device %s was started before the freshest "
1858		    "disk (%s) arrives!! It will not be connected to the "
1859		    "running device.", sc->sc_name,
1860		    g_mirror_get_diskname(disk));
1861		g_mirror_destroy_disk(disk);
1862		state = G_MIRROR_DISK_STATE_NONE;
1863		/* Return immediately, because disk was destroyed. */
1864		return (state);
1865	}
1866	G_MIRROR_DEBUG(3, "State for %s disk: %s.",
1867	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(state));
1868	return (state);
1869}
1870
1871/*
1872 * Update device state.
1873 */
1874static void
1875g_mirror_update_device(struct g_mirror_softc *sc, boolean_t force)
1876{
1877	struct g_mirror_disk *disk;
1878	u_int state;
1879
1880	g_topology_assert();
1881
1882	switch (sc->sc_state) {
1883	case G_MIRROR_DEVICE_STATE_STARTING:
1884	    {
1885		struct g_mirror_disk *pdisk, *tdisk;
1886		u_int dirty, ndisks, genid, syncid;
1887
1888		KASSERT(sc->sc_provider == NULL,
1889		    ("Non-NULL provider in STARTING state (%s).", sc->sc_name));
1890		/*
1891		 * Are we ready? We are, if all disks are connected or
1892		 * if we have any disks and 'force' is true.
1893		 */
1894		if ((force && g_mirror_ndisks(sc, -1) > 0) ||
1895		    sc->sc_ndisks == g_mirror_ndisks(sc, -1)) {
1896			;
1897		} else if (g_mirror_ndisks(sc, -1) == 0) {
1898			/*
1899			 * Disks went down in starting phase, so destroy
1900			 * device.
1901			 */
1902			callout_drain(&sc->sc_callout);
1903			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
1904			return;
1905		} else {
1906			return;
1907		}
1908
1909		/*
1910		 * Activate all disks with the biggest syncid.
1911		 */
1912		if (force) {
1913			/*
1914			 * If 'force' is true, we have been called due to
1915			 * timeout, so don't bother canceling timeout.
1916			 */
1917			ndisks = 0;
1918			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1919				if ((disk->d_flags &
1920				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) {
1921					ndisks++;
1922				}
1923			}
1924			if (ndisks == 0) {
1925				/* No valid disks found, destroy device. */
1926				sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
1927				return;
1928			}
1929		} else {
1930			/* Cancel timeout. */
1931			callout_drain(&sc->sc_callout);
1932		}
1933
1934		/*
1935		 * Find the biggest genid.
1936		 */
1937		genid = 0;
1938		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1939			if (disk->d_genid > genid)
1940				genid = disk->d_genid;
1941		}
1942		sc->sc_genid = genid;
1943		/*
1944		 * Remove all disks without the biggest genid.
1945		 */
1946		LIST_FOREACH_SAFE(disk, &sc->sc_disks, d_next, tdisk) {
1947			if (disk->d_genid < genid) {
1948				G_MIRROR_DEBUG(0,
1949				    "Component %s (device %s) broken, skipping.",
1950				    g_mirror_get_diskname(disk), sc->sc_name);
1951				g_mirror_destroy_disk(disk);
1952			}
1953		}
1954
1955		/*
1956		 * Find the biggest syncid.
1957		 */
1958		syncid = 0;
1959		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1960			if (disk->d_sync.ds_syncid > syncid)
1961				syncid = disk->d_sync.ds_syncid;
1962		}
1963
1964		/*
1965		 * Here we need to look for dirty disks and if all disks
1966		 * with the biggest syncid are dirty, we have to choose
1967		 * one with the biggest priority and rebuild the rest.
1968		 */
1969		/*
1970		 * Find the number of dirty disks with the biggest syncid.
1971		 * Find the number of disks with the biggest syncid.
1972		 * While here, find a disk with the biggest priority.
1973		 */
1974		dirty = ndisks = 0;
1975		pdisk = NULL;
1976		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1977			if (disk->d_sync.ds_syncid != syncid)
1978				continue;
1979			if ((disk->d_flags &
1980			    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
1981				continue;
1982			}
1983			ndisks++;
1984			if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
1985				dirty++;
1986				if (pdisk == NULL ||
1987				    pdisk->d_priority < disk->d_priority) {
1988					pdisk = disk;
1989				}
1990			}
1991		}
1992		if (dirty == 0) {
1993			/* No dirty disks at all, great. */
1994		} else if (dirty == ndisks) {
1995			/*
1996			 * Force synchronization for all dirty disks except one
1997			 * with the biggest priority.
1998			 */
1999			KASSERT(pdisk != NULL, ("pdisk == NULL"));
2000			G_MIRROR_DEBUG(1, "Using disk %s (device %s) as a "
2001			    "master disk for synchronization.",
2002			    g_mirror_get_diskname(pdisk), sc->sc_name);
2003			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2004				if (disk->d_sync.ds_syncid != syncid)
2005					continue;
2006				if ((disk->d_flags &
2007				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2008					continue;
2009				}
2010				KASSERT((disk->d_flags &
2011				    G_MIRROR_DISK_FLAG_DIRTY) != 0,
2012				    ("Disk %s isn't marked as dirty.",
2013				    g_mirror_get_diskname(disk)));
2014				/* Skip the disk with the biggest priority. */
2015				if (disk == pdisk)
2016					continue;
2017				disk->d_sync.ds_syncid = 0;
2018			}
2019		} else if (dirty < ndisks) {
2020			/*
2021			 * Force synchronization for all dirty disks.
2022			 * We have some non-dirty disks.
2023			 */
2024			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2025				if (disk->d_sync.ds_syncid != syncid)
2026					continue;
2027				if ((disk->d_flags &
2028				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2029					continue;
2030				}
2031				if ((disk->d_flags &
2032				    G_MIRROR_DISK_FLAG_DIRTY) == 0) {
2033					continue;
2034				}
2035				disk->d_sync.ds_syncid = 0;
2036			}
2037		}
2038
2039		/* Reset hint. */
2040		sc->sc_hint = NULL;
2041		sc->sc_syncid = syncid;
2042		if (force) {
2043			/* Remember to bump syncid on first write. */
2044			sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID_OFW;
2045		}
2046		state = G_MIRROR_DEVICE_STATE_RUNNING;
2047		G_MIRROR_DEBUG(1, "Device %s state changed from %s to %s.",
2048		    sc->sc_name, g_mirror_device_state2str(sc->sc_state),
2049		    g_mirror_device_state2str(state));
2050		sc->sc_state = state;
2051		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2052			state = g_mirror_determine_state(disk);
2053			g_mirror_event_send(disk, state,
2054			    G_MIRROR_EVENT_DONTWAIT);
2055			if (state == G_MIRROR_DISK_STATE_STALE)
2056				sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID_OFW;
2057		}
2058		wakeup(&g_mirror_class);
2059		break;
2060	    }
2061	case G_MIRROR_DEVICE_STATE_RUNNING:
2062		if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 0 &&
2063		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
2064			/*
2065			 * No active disks or no disks at all,
2066			 * so destroy device.
2067			 */
2068			if (sc->sc_provider != NULL)
2069				g_mirror_destroy_provider(sc);
2070			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2071			break;
2072		} else if (g_mirror_ndisks(sc,
2073		    G_MIRROR_DISK_STATE_ACTIVE) > 0 &&
2074		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
2075			/*
2076			 * We have active disks, launch provider if it doesn't
2077			 * exist.
2078			 */
2079			if (sc->sc_provider == NULL)
2080				g_mirror_launch_provider(sc);
2081		}
2082		/*
2083		 * Bump syncid here, if we need to do it immediately.
2084		 */
2085		if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID_IMM) != 0) {
2086			sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID;
2087			g_mirror_bump_syncid(sc);
2088		}
2089		if ((sc->sc_bump_id & G_MIRROR_BUMP_GENID_IMM) != 0) {
2090			sc->sc_bump_id &= ~G_MIRROR_BUMP_GENID;
2091			g_mirror_bump_genid(sc);
2092		}
2093		break;
2094	default:
2095		KASSERT(1 == 0, ("Wrong device state (%s, %s).",
2096		    sc->sc_name, g_mirror_device_state2str(sc->sc_state)));
2097		break;
2098	}
2099}
2100
2101/*
2102 * Update disk state and device state if needed.
2103 */
2104#define	DISK_STATE_CHANGED()	G_MIRROR_DEBUG(1,			\
2105	"Disk %s state changed from %s to %s (device %s).",		\
2106	g_mirror_get_diskname(disk),					\
2107	g_mirror_disk_state2str(disk->d_state),				\
2108	g_mirror_disk_state2str(state), sc->sc_name)
2109static int
2110g_mirror_update_disk(struct g_mirror_disk *disk, u_int state)
2111{
2112	struct g_mirror_softc *sc;
2113
2114	g_topology_assert();
2115
2116	sc = disk->d_softc;
2117again:
2118	G_MIRROR_DEBUG(3, "Changing disk %s state from %s to %s.",
2119	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(disk->d_state),
2120	    g_mirror_disk_state2str(state));
2121	switch (state) {
2122	case G_MIRROR_DISK_STATE_NEW:
2123		/*
2124		 * Possible scenarios:
2125		 * 1. New disk arrive.
2126		 */
2127		/* Previous state should be NONE. */
2128		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NONE,
2129		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2130		    g_mirror_disk_state2str(disk->d_state)));
2131		DISK_STATE_CHANGED();
2132
2133		disk->d_state = state;
2134		if (LIST_EMPTY(&sc->sc_disks))
2135			LIST_INSERT_HEAD(&sc->sc_disks, disk, d_next);
2136		else {
2137			struct g_mirror_disk *dp;
2138
2139			LIST_FOREACH(dp, &sc->sc_disks, d_next) {
2140				if (disk->d_priority >= dp->d_priority) {
2141					LIST_INSERT_BEFORE(dp, disk, d_next);
2142					dp = NULL;
2143					break;
2144				}
2145				if (LIST_NEXT(dp, d_next) == NULL)
2146					break;
2147			}
2148			if (dp != NULL)
2149				LIST_INSERT_AFTER(dp, disk, d_next);
2150		}
2151		G_MIRROR_DEBUG(0, "Device %s: provider %s detected.",
2152		    sc->sc_name, g_mirror_get_diskname(disk));
2153		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
2154			break;
2155		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2156		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2157		    g_mirror_device_state2str(sc->sc_state),
2158		    g_mirror_get_diskname(disk),
2159		    g_mirror_disk_state2str(disk->d_state)));
2160		state = g_mirror_determine_state(disk);
2161		if (state != G_MIRROR_DISK_STATE_NONE)
2162			goto again;
2163		break;
2164	case G_MIRROR_DISK_STATE_ACTIVE:
2165		/*
2166		 * Possible scenarios:
2167		 * 1. New disk does not need synchronization.
2168		 * 2. Synchronization process finished successfully.
2169		 */
2170		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2171		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2172		    g_mirror_device_state2str(sc->sc_state),
2173		    g_mirror_get_diskname(disk),
2174		    g_mirror_disk_state2str(disk->d_state)));
2175		/* Previous state should be NEW or SYNCHRONIZING. */
2176		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW ||
2177		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2178		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2179		    g_mirror_disk_state2str(disk->d_state)));
2180		DISK_STATE_CHANGED();
2181
2182		if (disk->d_state == G_MIRROR_DISK_STATE_NEW)
2183			disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2184		else if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
2185			disk->d_flags &= ~G_MIRROR_DISK_FLAG_SYNCHRONIZING;
2186			disk->d_flags &= ~G_MIRROR_DISK_FLAG_FORCE_SYNC;
2187			g_mirror_sync_stop(disk, 0);
2188		}
2189		disk->d_state = state;
2190		disk->d_sync.ds_offset = 0;
2191		disk->d_sync.ds_offset_done = 0;
2192		g_mirror_update_access(disk);
2193		g_mirror_update_metadata(disk);
2194		G_MIRROR_DEBUG(0, "Device %s: provider %s activated.",
2195		    sc->sc_name, g_mirror_get_diskname(disk));
2196		break;
2197	case G_MIRROR_DISK_STATE_STALE:
2198		/*
2199		 * Possible scenarios:
2200		 * 1. Stale disk was connected.
2201		 */
2202		/* Previous state should be NEW. */
2203		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2204		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2205		    g_mirror_disk_state2str(disk->d_state)));
2206		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2207		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2208		    g_mirror_device_state2str(sc->sc_state),
2209		    g_mirror_get_diskname(disk),
2210		    g_mirror_disk_state2str(disk->d_state)));
2211		/*
2212		 * STALE state is only possible if device is marked
2213		 * NOAUTOSYNC.
2214		 */
2215		KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) != 0,
2216		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2217		    g_mirror_device_state2str(sc->sc_state),
2218		    g_mirror_get_diskname(disk),
2219		    g_mirror_disk_state2str(disk->d_state)));
2220		DISK_STATE_CHANGED();
2221
2222		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2223		disk->d_state = state;
2224		g_mirror_update_metadata(disk);
2225		G_MIRROR_DEBUG(0, "Device %s: provider %s is stale.",
2226		    sc->sc_name, g_mirror_get_diskname(disk));
2227		break;
2228	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
2229		/*
2230		 * Possible scenarios:
2231		 * 1. Disk which needs synchronization was connected.
2232		 */
2233		/* Previous state should be NEW. */
2234		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2235		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2236		    g_mirror_disk_state2str(disk->d_state)));
2237		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2238		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2239		    g_mirror_device_state2str(sc->sc_state),
2240		    g_mirror_get_diskname(disk),
2241		    g_mirror_disk_state2str(disk->d_state)));
2242		DISK_STATE_CHANGED();
2243
2244		if (disk->d_state == G_MIRROR_DISK_STATE_NEW)
2245			disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2246		disk->d_state = state;
2247		if (sc->sc_provider != NULL) {
2248			g_mirror_sync_start(disk);
2249			g_mirror_update_metadata(disk);
2250		}
2251		break;
2252	case G_MIRROR_DISK_STATE_DISCONNECTED:
2253		/*
2254		 * Possible scenarios:
2255		 * 1. Device wasn't running yet, but disk disappear.
2256		 * 2. Disk was active and disapppear.
2257		 * 3. Disk disappear during synchronization process.
2258		 */
2259		if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING) {
2260			/*
2261			 * Previous state should be ACTIVE, STALE or
2262			 * SYNCHRONIZING.
2263			 */
2264			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
2265			    disk->d_state == G_MIRROR_DISK_STATE_STALE ||
2266			    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2267			    ("Wrong disk state (%s, %s).",
2268			    g_mirror_get_diskname(disk),
2269			    g_mirror_disk_state2str(disk->d_state)));
2270		} else if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING) {
2271			/* Previous state should be NEW. */
2272			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2273			    ("Wrong disk state (%s, %s).",
2274			    g_mirror_get_diskname(disk),
2275			    g_mirror_disk_state2str(disk->d_state)));
2276			/*
2277			 * Reset bumping syncid if disk disappeared in STARTING
2278			 * state.
2279			 */
2280			if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID_OFW) != 0)
2281				sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID;
2282#ifdef	INVARIANTS
2283		} else {
2284			KASSERT(1 == 0, ("Wrong device state (%s, %s, %s, %s).",
2285			    sc->sc_name,
2286			    g_mirror_device_state2str(sc->sc_state),
2287			    g_mirror_get_diskname(disk),
2288			    g_mirror_disk_state2str(disk->d_state)));
2289#endif
2290		}
2291		DISK_STATE_CHANGED();
2292		G_MIRROR_DEBUG(0, "Device %s: provider %s disconnected.",
2293		    sc->sc_name, g_mirror_get_diskname(disk));
2294
2295		g_mirror_destroy_disk(disk);
2296		break;
2297	case G_MIRROR_DISK_STATE_DESTROY:
2298	    {
2299		int error;
2300
2301		error = g_mirror_clear_metadata(disk);
2302		if (error != 0)
2303			return (error);
2304		DISK_STATE_CHANGED();
2305		G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.",
2306		    sc->sc_name, g_mirror_get_diskname(disk));
2307
2308		g_mirror_destroy_disk(disk);
2309		sc->sc_ndisks--;
2310		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2311			g_mirror_update_metadata(disk);
2312		}
2313		break;
2314	    }
2315	default:
2316		KASSERT(1 == 0, ("Unknown state (%u).", state));
2317		break;
2318	}
2319	return (0);
2320}
2321#undef	DISK_STATE_CHANGED
2322
2323int
2324g_mirror_read_metadata(struct g_consumer *cp, struct g_mirror_metadata *md)
2325{
2326	struct g_provider *pp;
2327	u_char *buf;
2328	int error;
2329
2330	g_topology_assert();
2331
2332	error = g_access(cp, 1, 0, 0);
2333	if (error != 0)
2334		return (error);
2335	pp = cp->provider;
2336	g_topology_unlock();
2337	/* Metadata are stored on last sector. */
2338	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
2339	    &error);
2340	g_topology_lock();
2341	g_access(cp, -1, 0, 0);
2342	if (error != 0) {
2343		G_MIRROR_DEBUG(1, "Cannot read metadata from %s (error=%d).",
2344		    cp->provider->name, error);
2345		if (buf != NULL)
2346			g_free(buf);
2347		return (error);
2348	}
2349
2350	/* Decode metadata. */
2351	error = mirror_metadata_decode(buf, md);
2352	g_free(buf);
2353	if (strcmp(md->md_magic, G_MIRROR_MAGIC) != 0)
2354		return (EINVAL);
2355	if (md->md_version > G_MIRROR_VERSION) {
2356		G_MIRROR_DEBUG(0,
2357		    "Kernel module is too old to handle metadata from %s.",
2358		    cp->provider->name);
2359		return (EINVAL);
2360	}
2361	if (error != 0) {
2362		G_MIRROR_DEBUG(1, "MD5 metadata hash mismatch for provider %s.",
2363		    cp->provider->name);
2364		return (error);
2365	}
2366
2367	return (0);
2368}
2369
2370static int
2371g_mirror_check_metadata(struct g_mirror_softc *sc, struct g_provider *pp,
2372    struct g_mirror_metadata *md)
2373{
2374
2375	if (g_mirror_id2disk(sc, md->md_did) != NULL) {
2376		G_MIRROR_DEBUG(1, "Disk %s (id=%u) already exists, skipping.",
2377		    pp->name, md->md_did);
2378		return (EEXIST);
2379	}
2380	if (md->md_all != sc->sc_ndisks) {
2381		G_MIRROR_DEBUG(1,
2382		    "Invalid '%s' field on disk %s (device %s), skipping.",
2383		    "md_all", pp->name, sc->sc_name);
2384		return (EINVAL);
2385	}
2386	if (md->md_slice != sc->sc_slice) {
2387		G_MIRROR_DEBUG(1,
2388		    "Invalid '%s' field on disk %s (device %s), skipping.",
2389		    "md_slice", pp->name, sc->sc_name);
2390		return (EINVAL);
2391	}
2392	if (md->md_balance != sc->sc_balance) {
2393		G_MIRROR_DEBUG(1,
2394		    "Invalid '%s' field on disk %s (device %s), skipping.",
2395		    "md_balance", pp->name, sc->sc_name);
2396		return (EINVAL);
2397	}
2398	if (md->md_mediasize != sc->sc_mediasize) {
2399		G_MIRROR_DEBUG(1,
2400		    "Invalid '%s' field on disk %s (device %s), skipping.",
2401		    "md_mediasize", pp->name, sc->sc_name);
2402		return (EINVAL);
2403	}
2404	if (sc->sc_mediasize > pp->mediasize) {
2405		G_MIRROR_DEBUG(1,
2406		    "Invalid size of disk %s (device %s), skipping.", pp->name,
2407		    sc->sc_name);
2408		return (EINVAL);
2409	}
2410	if (md->md_sectorsize != sc->sc_sectorsize) {
2411		G_MIRROR_DEBUG(1,
2412		    "Invalid '%s' field on disk %s (device %s), skipping.",
2413		    "md_sectorsize", pp->name, sc->sc_name);
2414		return (EINVAL);
2415	}
2416	if ((sc->sc_sectorsize % pp->sectorsize) != 0) {
2417		G_MIRROR_DEBUG(1,
2418		    "Invalid sector size of disk %s (device %s), skipping.",
2419		    pp->name, sc->sc_name);
2420		return (EINVAL);
2421	}
2422	if ((md->md_mflags & ~G_MIRROR_DEVICE_FLAG_MASK) != 0) {
2423		G_MIRROR_DEBUG(1,
2424		    "Invalid device flags on disk %s (device %s), skipping.",
2425		    pp->name, sc->sc_name);
2426		return (EINVAL);
2427	}
2428	if ((md->md_dflags & ~G_MIRROR_DISK_FLAG_MASK) != 0) {
2429		G_MIRROR_DEBUG(1,
2430		    "Invalid disk flags on disk %s (device %s), skipping.",
2431		    pp->name, sc->sc_name);
2432		return (EINVAL);
2433	}
2434	return (0);
2435}
2436
2437int
2438g_mirror_add_disk(struct g_mirror_softc *sc, struct g_provider *pp,
2439    struct g_mirror_metadata *md)
2440{
2441	struct g_mirror_disk *disk;
2442	int error;
2443
2444	g_topology_assert();
2445	G_MIRROR_DEBUG(2, "Adding disk %s.", pp->name);
2446
2447	error = g_mirror_check_metadata(sc, pp, md);
2448	if (error != 0)
2449		return (error);
2450	if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING &&
2451	    md->md_genid < sc->sc_genid) {
2452		G_MIRROR_DEBUG(0, "Component %s (device %s) broken, skipping.",
2453		    pp->name, sc->sc_name);
2454		return (EINVAL);
2455	}
2456	disk = g_mirror_init_disk(sc, pp, md, &error);
2457	if (disk == NULL)
2458		return (error);
2459	error = g_mirror_event_send(disk, G_MIRROR_DISK_STATE_NEW,
2460	    G_MIRROR_EVENT_WAIT);
2461	if (error != 0)
2462		return (error);
2463	if (md->md_version < G_MIRROR_VERSION) {
2464		G_MIRROR_DEBUG(0, "Upgrading metadata on %s (v%d->v%d).",
2465		    pp->name, md->md_version, G_MIRROR_VERSION);
2466		g_mirror_update_metadata(disk);
2467	}
2468	return (0);
2469}
2470
2471static int
2472g_mirror_access(struct g_provider *pp, int acr, int acw, int ace)
2473{
2474	struct g_mirror_softc *sc;
2475	struct g_mirror_disk *disk;
2476	int dcr, dcw, dce;
2477
2478	g_topology_assert();
2479	G_MIRROR_DEBUG(2, "Access request for %s: r%dw%de%d.", pp->name, acr,
2480	    acw, ace);
2481
2482	dcr = pp->acr + acr;
2483	dcw = pp->acw + acw;
2484	dce = pp->ace + ace;
2485
2486	sc = pp->geom->softc;
2487	if (sc == NULL || LIST_EMPTY(&sc->sc_disks) ||
2488	    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
2489		if (acr <= 0 && acw <= 0 && ace <= 0)
2490			return (0);
2491		else
2492			return (ENXIO);
2493	}
2494	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2495		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
2496			continue;
2497		/*
2498		 * Mark disk as dirty on open and unmark on close.
2499		 */
2500		if (pp->acw == 0 && dcw > 0) {
2501			G_MIRROR_DEBUG(1,
2502			    "Disk %s (device %s) marked as dirty.",
2503			    g_mirror_get_diskname(disk), sc->sc_name);
2504			disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
2505			g_mirror_update_metadata(disk);
2506		} else if (pp->acw > 0 && dcw == 0) {
2507			G_MIRROR_DEBUG(1,
2508			    "Disk %s (device %s) marked as clean.",
2509			    g_mirror_get_diskname(disk), sc->sc_name);
2510			disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2511			g_mirror_update_metadata(disk);
2512		}
2513	}
2514	return (0);
2515}
2516
2517static struct g_geom *
2518g_mirror_create(struct g_class *mp, const struct g_mirror_metadata *md)
2519{
2520	struct g_mirror_softc *sc;
2521	struct g_geom *gp;
2522	int error, timeout;
2523
2524	g_topology_assert();
2525	G_MIRROR_DEBUG(1, "Creating device %s (id=%u).", md->md_name,
2526	    md->md_mid);
2527
2528	/* One disk is minimum. */
2529	if (md->md_all < 1)
2530		return (NULL);
2531	/*
2532	 * Action geom.
2533	 */
2534	gp = g_new_geomf(mp, "%s", md->md_name);
2535	sc = malloc(sizeof(*sc), M_MIRROR, M_WAITOK | M_ZERO);
2536	gp->start = g_mirror_start;
2537	gp->spoiled = g_mirror_spoiled;
2538	gp->orphan = g_mirror_orphan;
2539	gp->access = g_mirror_access;
2540	gp->dumpconf = g_mirror_dumpconf;
2541
2542	sc->sc_id = md->md_mid;
2543	sc->sc_slice = md->md_slice;
2544	sc->sc_balance = md->md_balance;
2545	sc->sc_mediasize = md->md_mediasize;
2546	sc->sc_sectorsize = md->md_sectorsize;
2547	sc->sc_ndisks = md->md_all;
2548	sc->sc_flags = md->md_mflags;
2549	sc->sc_bump_id = 0;
2550	sc->sc_idle = 0;
2551	bioq_init(&sc->sc_queue);
2552	mtx_init(&sc->sc_queue_mtx, "gmirror:queue", NULL, MTX_DEF);
2553	LIST_INIT(&sc->sc_disks);
2554	TAILQ_INIT(&sc->sc_events);
2555	mtx_init(&sc->sc_events_mtx, "gmirror:events", NULL, MTX_DEF);
2556	callout_init(&sc->sc_callout, CALLOUT_MPSAFE);
2557	sc->sc_state = G_MIRROR_DEVICE_STATE_STARTING;
2558	gp->softc = sc;
2559	sc->sc_geom = gp;
2560	sc->sc_provider = NULL;
2561	/*
2562	 * Synchronization geom.
2563	 */
2564	gp = g_new_geomf(mp, "%s.sync", md->md_name);
2565	gp->softc = sc;
2566	gp->orphan = g_mirror_orphan;
2567	sc->sc_sync.ds_geom = gp;
2568	sc->sc_sync.ds_ndisks = 0;
2569	error = kthread_create(g_mirror_worker, sc, &sc->sc_worker, 0, 0,
2570	    "g_mirror %s", md->md_name);
2571	if (error != 0) {
2572		G_MIRROR_DEBUG(1, "Cannot create kernel thread for %s.",
2573		    sc->sc_name);
2574		g_destroy_geom(sc->sc_sync.ds_geom);
2575		mtx_destroy(&sc->sc_events_mtx);
2576		mtx_destroy(&sc->sc_queue_mtx);
2577		g_destroy_geom(sc->sc_geom);
2578		free(sc, M_MIRROR);
2579		return (NULL);
2580	}
2581
2582	G_MIRROR_DEBUG(0, "Device %s created (id=%u).", sc->sc_name, sc->sc_id);
2583
2584	/*
2585	 * Run timeout.
2586	 */
2587	timeout = g_mirror_timeout * hz;
2588	callout_reset(&sc->sc_callout, timeout, g_mirror_go, sc);
2589	return (sc->sc_geom);
2590}
2591
2592int
2593g_mirror_destroy(struct g_mirror_softc *sc, boolean_t force)
2594{
2595	struct g_provider *pp;
2596
2597	g_topology_assert();
2598
2599	if (sc == NULL)
2600		return (ENXIO);
2601	pp = sc->sc_provider;
2602	if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
2603		if (force) {
2604			G_MIRROR_DEBUG(1, "Device %s is still open, so it "
2605			    "can't be definitely removed.", pp->name);
2606		} else {
2607			G_MIRROR_DEBUG(1,
2608			    "Device %s is still open (r%dw%de%d).", pp->name,
2609			    pp->acr, pp->acw, pp->ace);
2610			return (EBUSY);
2611		}
2612	}
2613
2614	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2615	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_WAIT;
2616	g_topology_unlock();
2617	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
2618	mtx_lock(&sc->sc_queue_mtx);
2619	wakeup(sc);
2620	mtx_unlock(&sc->sc_queue_mtx);
2621	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, &sc->sc_worker);
2622	while (sc->sc_worker != NULL)
2623		tsleep(&sc->sc_worker, PRIBIO, "m:destroy", hz / 5);
2624	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, &sc->sc_worker);
2625	g_topology_lock();
2626	g_mirror_destroy_device(sc);
2627	free(sc, M_MIRROR);
2628	return (0);
2629}
2630
2631static void
2632g_mirror_taste_orphan(struct g_consumer *cp)
2633{
2634
2635	KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
2636	    cp->provider->name));
2637}
2638
2639static struct g_geom *
2640g_mirror_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
2641{
2642	struct g_mirror_metadata md;
2643	struct g_mirror_softc *sc;
2644	struct g_consumer *cp;
2645	struct g_geom *gp;
2646	int error;
2647
2648	g_topology_assert();
2649	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
2650	G_MIRROR_DEBUG(2, "Tasting %s.", pp->name);
2651
2652	gp = g_new_geomf(mp, "mirror:taste");
2653	/*
2654	 * This orphan function should be never called.
2655	 */
2656	gp->orphan = g_mirror_taste_orphan;
2657	cp = g_new_consumer(gp);
2658	g_attach(cp, pp);
2659	error = g_mirror_read_metadata(cp, &md);
2660	g_detach(cp);
2661	g_destroy_consumer(cp);
2662	g_destroy_geom(gp);
2663	if (error != 0)
2664		return (NULL);
2665	gp = NULL;
2666
2667	if (md.md_provider[0] != '\0' && strcmp(md.md_provider, pp->name) != 0)
2668		return (NULL);
2669	if ((md.md_dflags & G_MIRROR_DISK_FLAG_INACTIVE) != 0) {
2670		G_MIRROR_DEBUG(0,
2671		    "Device %s: provider %s marked as inactive, skipping.",
2672		    md.md_name, pp->name);
2673		return (NULL);
2674	}
2675	if (g_mirror_debug >= 2)
2676		mirror_metadata_dump(&md);
2677
2678	/*
2679	 * Let's check if device already exists.
2680	 */
2681	sc = NULL;
2682	LIST_FOREACH(gp, &mp->geom, geom) {
2683		sc = gp->softc;
2684		if (sc == NULL)
2685			continue;
2686		if (sc->sc_sync.ds_geom == gp)
2687			continue;
2688		if (strcmp(md.md_name, sc->sc_name) != 0)
2689			continue;
2690		if (md.md_mid != sc->sc_id) {
2691			G_MIRROR_DEBUG(0, "Device %s already configured.",
2692			    sc->sc_name);
2693			return (NULL);
2694		}
2695		break;
2696	}
2697	if (gp == NULL) {
2698		gp = g_mirror_create(mp, &md);
2699		if (gp == NULL) {
2700			G_MIRROR_DEBUG(0, "Cannot create device %s.",
2701			    md.md_name);
2702			return (NULL);
2703		}
2704		sc = gp->softc;
2705	}
2706	G_MIRROR_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
2707	error = g_mirror_add_disk(sc, pp, &md);
2708	if (error != 0) {
2709		G_MIRROR_DEBUG(0, "Cannot add disk %s to %s (error=%d).",
2710		    pp->name, gp->name, error);
2711		if (LIST_EMPTY(&sc->sc_disks))
2712			g_mirror_destroy(sc, 1);
2713		return (NULL);
2714	}
2715	return (gp);
2716}
2717
2718static int
2719g_mirror_destroy_geom(struct gctl_req *req __unused,
2720    struct g_class *mp __unused, struct g_geom *gp)
2721{
2722
2723	return (g_mirror_destroy(gp->softc, 0));
2724}
2725
2726static void
2727g_mirror_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
2728    struct g_consumer *cp, struct g_provider *pp)
2729{
2730	struct g_mirror_softc *sc;
2731
2732	g_topology_assert();
2733
2734	sc = gp->softc;
2735	if (sc == NULL)
2736		return;
2737	/* Skip synchronization geom. */
2738	if (gp == sc->sc_sync.ds_geom)
2739		return;
2740	if (pp != NULL) {
2741		/* Nothing here. */
2742	} else if (cp != NULL) {
2743		struct g_mirror_disk *disk;
2744
2745		disk = cp->private;
2746		if (disk == NULL)
2747			return;
2748		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)disk->d_id);
2749		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
2750			sbuf_printf(sb, "%s<Synchronized>", indent);
2751			if (disk->d_sync.ds_offset_done == 0)
2752				sbuf_printf(sb, "0%%");
2753			else {
2754				sbuf_printf(sb, "%u%%",
2755				    (u_int)((disk->d_sync.ds_offset_done * 100) /
2756				    sc->sc_provider->mediasize));
2757			}
2758			sbuf_printf(sb, "</Synchronized>\n");
2759		}
2760		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent,
2761		    disk->d_sync.ds_syncid);
2762		sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent,
2763		    disk->d_genid);
2764		sbuf_printf(sb, "%s<Flags>", indent);
2765		if (disk->d_flags == 0)
2766			sbuf_printf(sb, "NONE");
2767		else {
2768			int first = 1;
2769
2770#define	ADD_FLAG(flag, name)	do {					\
2771	if ((disk->d_flags & (flag)) != 0) {				\
2772		if (!first)						\
2773			sbuf_printf(sb, ", ");				\
2774		else							\
2775			first = 0;					\
2776		sbuf_printf(sb, name);					\
2777	}								\
2778} while (0)
2779			ADD_FLAG(G_MIRROR_DISK_FLAG_DIRTY, "DIRTY");
2780			ADD_FLAG(G_MIRROR_DISK_FLAG_HARDCODED, "HARDCODED");
2781			ADD_FLAG(G_MIRROR_DISK_FLAG_INACTIVE, "INACTIVE");
2782			ADD_FLAG(G_MIRROR_DISK_FLAG_SYNCHRONIZING,
2783			    "SYNCHRONIZING");
2784			ADD_FLAG(G_MIRROR_DISK_FLAG_FORCE_SYNC, "FORCE_SYNC");
2785#undef	ADD_FLAG
2786		}
2787		sbuf_printf(sb, "</Flags>\n");
2788		sbuf_printf(sb, "%s<Priority>%u</Priority>\n", indent,
2789		    disk->d_priority);
2790		sbuf_printf(sb, "%s<State>%s</State>\n", indent,
2791		    g_mirror_disk_state2str(disk->d_state));
2792	} else {
2793		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id);
2794		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, sc->sc_syncid);
2795		sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent, sc->sc_genid);
2796		sbuf_printf(sb, "%s<Flags>", indent);
2797		if (sc->sc_flags == 0)
2798			sbuf_printf(sb, "NONE");
2799		else {
2800			int first = 1;
2801
2802#define	ADD_FLAG(flag, name)	do {					\
2803	if ((sc->sc_flags & (flag)) != 0) {				\
2804		if (!first)						\
2805			sbuf_printf(sb, ", ");				\
2806		else							\
2807			first = 0;					\
2808		sbuf_printf(sb, name);					\
2809	}								\
2810} while (0)
2811			ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOAUTOSYNC, "NOAUTOSYNC");
2812#undef	ADD_FLAG
2813		}
2814		sbuf_printf(sb, "</Flags>\n");
2815		sbuf_printf(sb, "%s<Slice>%u</Slice>\n", indent,
2816		    (u_int)sc->sc_slice);
2817		sbuf_printf(sb, "%s<Balance>%s</Balance>\n", indent,
2818		    balance_name(sc->sc_balance));
2819		sbuf_printf(sb, "%s<Components>%u</Components>\n", indent,
2820		    sc->sc_ndisks);
2821		sbuf_printf(sb, "%s<State>", indent);
2822		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
2823			sbuf_printf(sb, "%s", "STARTING");
2824		else if (sc->sc_ndisks ==
2825		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE))
2826			sbuf_printf(sb, "%s", "COMPLETE");
2827		else
2828			sbuf_printf(sb, "%s", "DEGRADED");
2829		sbuf_printf(sb, "</State>\n");
2830	}
2831}
2832
2833static void
2834g_mirror_shutdown(void *arg, int howto)
2835{
2836	struct g_class *mp;
2837	struct g_geom *gp, *gp2;
2838
2839	mp = arg;
2840	DROP_GIANT();
2841	g_topology_lock();
2842	LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
2843		if (gp->softc == NULL)
2844			continue;
2845		g_mirror_destroy(gp->softc, 1);
2846	}
2847	g_topology_unlock();
2848	PICKUP_GIANT();
2849#if 0
2850	tsleep(&gp, PRIBIO, "m:shutdown", hz * 20);
2851#endif
2852}
2853
2854static void
2855g_mirror_init(struct g_class *mp)
2856{
2857
2858	g_mirror_ehtag = EVENTHANDLER_REGISTER(shutdown_post_sync,
2859	    g_mirror_shutdown, mp, SHUTDOWN_PRI_FIRST);
2860	if (g_mirror_ehtag == NULL)
2861		G_MIRROR_DEBUG(0, "Warning! Cannot register shutdown event.");
2862}
2863
2864static void
2865g_mirror_fini(struct g_class *mp)
2866{
2867
2868	if (g_mirror_ehtag == NULL)
2869		return;
2870	EVENTHANDLER_DEREGISTER(shutdown_post_sync, g_mirror_ehtag);
2871}
2872
2873static int
2874g_mirror_can_go(void)
2875{
2876	struct g_mirror_softc *sc;
2877	struct g_geom *gp;
2878	struct g_provider *pp;
2879	int can_go;
2880
2881	DROP_GIANT();
2882	can_go = 1;
2883	g_topology_lock();
2884	LIST_FOREACH(gp, &g_mirror_class.geom, geom) {
2885		sc = gp->softc;
2886		if (sc == NULL) {
2887			can_go = 0;
2888			break;
2889		}
2890		pp = sc->sc_provider;
2891		if (pp == NULL || pp->error != 0) {
2892			can_go = 0;
2893			break;
2894		}
2895	}
2896	g_topology_unlock();
2897	PICKUP_GIANT();
2898	return (can_go);
2899}
2900
2901static void
2902g_mirror_rootwait(void)
2903{
2904
2905	/*
2906	 * HACK: Wait for GEOM, because g_mirror_rootwait() can be called,
2907	 * HACK: before we get providers for tasting.
2908	 */
2909	tsleep(&g_mirror_class, PRIBIO, "mroot", hz * 3);
2910	/*
2911	 * Wait for mirrors in degraded state.
2912	 */
2913	for (;;) {
2914		if (g_mirror_can_go())
2915			break;
2916		tsleep(&g_mirror_class, PRIBIO, "mroot", hz);
2917	}
2918}
2919
2920SYSINIT(g_mirror_root, SI_SUB_RAID, SI_ORDER_FIRST, g_mirror_rootwait, NULL)
2921
2922DECLARE_GEOM_CLASS(g_mirror_class, g_mirror);
2923