g_mirror.c revision 137253
1/*-
2 * Copyright (c) 2004 Pawel Jakub Dawidek <pjd@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/geom/mirror/g_mirror.c 137253 2004-11-05 12:31:32Z pjd $");
29
30#include <sys/param.h>
31#include <sys/systm.h>
32#include <sys/kernel.h>
33#include <sys/module.h>
34#include <sys/limits.h>
35#include <sys/lock.h>
36#include <sys/mutex.h>
37#include <sys/bio.h>
38#include <sys/sysctl.h>
39#include <sys/malloc.h>
40#include <vm/uma.h>
41#include <geom/geom.h>
42#include <sys/proc.h>
43#include <sys/kthread.h>
44#include <geom/mirror/g_mirror.h>
45
46
47static MALLOC_DEFINE(M_MIRROR, "mirror data", "GEOM_MIRROR Data");
48
49SYSCTL_DECL(_kern_geom);
50SYSCTL_NODE(_kern_geom, OID_AUTO, mirror, CTLFLAG_RW, 0, "GEOM_MIRROR stuff");
51u_int g_mirror_debug = 0;
52TUNABLE_INT("kern.geom.mirror.debug", &g_mirror_debug);
53SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, debug, CTLFLAG_RW, &g_mirror_debug, 0,
54    "Debug level");
55static u_int g_mirror_timeout = 4;
56TUNABLE_INT("kern.geom.mirror.timeout", &g_mirror_timeout);
57SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, timeout, CTLFLAG_RW, &g_mirror_timeout,
58    0, "Time to wait on all mirror components");
59static u_int g_mirror_idletime = 5;
60TUNABLE_INT("kern.geom.mirror.idletime", &g_mirror_idletime);
61SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, idletime, CTLFLAG_RW,
62    &g_mirror_idletime, 0, "Mark components as clean when idling");
63static u_int g_mirror_reqs_per_sync = 5;
64SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, reqs_per_sync, CTLFLAG_RW,
65    &g_mirror_reqs_per_sync, 0,
66    "Number of regular I/O requests per synchronization request");
67static u_int g_mirror_syncs_per_sec = 100;
68SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, syncs_per_sec, CTLFLAG_RW,
69    &g_mirror_syncs_per_sec, 0,
70    "Number of synchronizations requests per second");
71
72#define	MSLEEP(ident, mtx, priority, wmesg, timeout)	do {		\
73	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, (ident));	\
74	msleep((ident), (mtx), (priority), (wmesg), (timeout));		\
75	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, (ident));	\
76} while (0)
77
78
79static int g_mirror_destroy_geom(struct gctl_req *req, struct g_class *mp,
80    struct g_geom *gp);
81static g_taste_t g_mirror_taste;
82
83struct g_class g_mirror_class = {
84	.name = G_MIRROR_CLASS_NAME,
85	.version = G_VERSION,
86	.ctlreq = g_mirror_config,
87	.taste = g_mirror_taste,
88	.destroy_geom = g_mirror_destroy_geom
89};
90
91
92static void g_mirror_destroy_provider(struct g_mirror_softc *sc);
93static int g_mirror_update_disk(struct g_mirror_disk *disk, u_int state);
94static void g_mirror_update_device(struct g_mirror_softc *sc, boolean_t force);
95static void g_mirror_dumpconf(struct sbuf *sb, const char *indent,
96    struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp);
97static void g_mirror_sync_stop(struct g_mirror_disk *disk, int type);
98
99
100static const char *
101g_mirror_disk_state2str(int state)
102{
103
104	switch (state) {
105	case G_MIRROR_DISK_STATE_NONE:
106		return ("NONE");
107	case G_MIRROR_DISK_STATE_NEW:
108		return ("NEW");
109	case G_MIRROR_DISK_STATE_ACTIVE:
110		return ("ACTIVE");
111	case G_MIRROR_DISK_STATE_STALE:
112		return ("STALE");
113	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
114		return ("SYNCHRONIZING");
115	case G_MIRROR_DISK_STATE_DISCONNECTED:
116		return ("DISCONNECTED");
117	case G_MIRROR_DISK_STATE_DESTROY:
118		return ("DESTROY");
119	default:
120		return ("INVALID");
121	}
122}
123
124static const char *
125g_mirror_device_state2str(int state)
126{
127
128	switch (state) {
129	case G_MIRROR_DEVICE_STATE_STARTING:
130		return ("STARTING");
131	case G_MIRROR_DEVICE_STATE_RUNNING:
132		return ("RUNNING");
133	default:
134		return ("INVALID");
135	}
136}
137
138static const char *
139g_mirror_get_diskname(struct g_mirror_disk *disk)
140{
141
142	if (disk->d_consumer == NULL || disk->d_consumer->provider == NULL)
143		return ("[unknown]");
144	return (disk->d_name);
145}
146
147/*
148 * --- Events handling functions ---
149 * Events in geom_mirror are used to maintain disks and device status
150 * from one thread to simplify locking.
151 */
152static void
153g_mirror_event_free(struct g_mirror_event *ep)
154{
155
156	free(ep, M_MIRROR);
157}
158
159int
160g_mirror_event_send(void *arg, int state, int flags)
161{
162	struct g_mirror_softc *sc;
163	struct g_mirror_disk *disk;
164	struct g_mirror_event *ep;
165	int error;
166
167	ep = malloc(sizeof(*ep), M_MIRROR, M_WAITOK);
168	G_MIRROR_DEBUG(4, "%s: Sending event %p.", __func__, ep);
169	if ((flags & G_MIRROR_EVENT_DEVICE) != 0) {
170		disk = NULL;
171		sc = arg;
172	} else {
173		disk = arg;
174		sc = disk->d_softc;
175	}
176	ep->e_disk = disk;
177	ep->e_state = state;
178	ep->e_flags = flags;
179	ep->e_error = 0;
180	mtx_lock(&sc->sc_events_mtx);
181	TAILQ_INSERT_TAIL(&sc->sc_events, ep, e_next);
182	mtx_unlock(&sc->sc_events_mtx);
183	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
184	mtx_lock(&sc->sc_queue_mtx);
185	wakeup(sc);
186	mtx_unlock(&sc->sc_queue_mtx);
187	if ((flags & G_MIRROR_EVENT_DONTWAIT) != 0)
188		return (0);
189	g_topology_assert();
190	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, ep);
191	g_topology_unlock();
192	while ((ep->e_flags & G_MIRROR_EVENT_DONE) == 0) {
193		mtx_lock(&sc->sc_events_mtx);
194		MSLEEP(ep, &sc->sc_events_mtx, PRIBIO | PDROP, "m:event",
195		    hz * 5);
196	}
197	/* Don't even try to use 'sc' here, because it could be already dead. */
198	g_topology_lock();
199	error = ep->e_error;
200	g_mirror_event_free(ep);
201	return (error);
202}
203
204static struct g_mirror_event *
205g_mirror_event_get(struct g_mirror_softc *sc)
206{
207	struct g_mirror_event *ep;
208
209	mtx_lock(&sc->sc_events_mtx);
210	ep = TAILQ_FIRST(&sc->sc_events);
211	if (ep != NULL)
212		TAILQ_REMOVE(&sc->sc_events, ep, e_next);
213	mtx_unlock(&sc->sc_events_mtx);
214	return (ep);
215}
216
217static void
218g_mirror_event_cancel(struct g_mirror_disk *disk)
219{
220	struct g_mirror_softc *sc;
221	struct g_mirror_event *ep, *tmpep;
222
223	g_topology_assert();
224
225	sc = disk->d_softc;
226	mtx_lock(&sc->sc_events_mtx);
227	TAILQ_FOREACH_SAFE(ep, &sc->sc_events, e_next, tmpep) {
228		if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0)
229			continue;
230		if (ep->e_disk != disk)
231			continue;
232		TAILQ_REMOVE(&sc->sc_events, ep, e_next);
233		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
234			g_mirror_event_free(ep);
235		else {
236			ep->e_error = ECANCELED;
237			wakeup(ep);
238		}
239	}
240	mtx_unlock(&sc->sc_events_mtx);
241}
242
243/*
244 * Return the number of disks in given state.
245 * If state is equal to -1, count all connected disks.
246 */
247u_int
248g_mirror_ndisks(struct g_mirror_softc *sc, int state)
249{
250	struct g_mirror_disk *disk;
251	u_int n = 0;
252
253	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
254		if (state == -1 || disk->d_state == state)
255			n++;
256	}
257	return (n);
258}
259
260/*
261 * Find a disk in mirror by its disk ID.
262 */
263static struct g_mirror_disk *
264g_mirror_id2disk(struct g_mirror_softc *sc, uint32_t id)
265{
266	struct g_mirror_disk *disk;
267
268	g_topology_assert();
269
270	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
271		if (disk->d_id == id)
272			return (disk);
273	}
274	return (NULL);
275}
276
277static u_int
278g_mirror_nrequests(struct g_mirror_softc *sc, struct g_consumer *cp)
279{
280	struct bio *bp;
281	u_int nreqs = 0;
282
283	mtx_lock(&sc->sc_queue_mtx);
284	TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) {
285		if (bp->bio_from == cp)
286			nreqs++;
287	}
288	mtx_unlock(&sc->sc_queue_mtx);
289	return (nreqs);
290}
291
292static int
293g_mirror_is_busy(struct g_mirror_softc *sc, struct g_consumer *cp)
294{
295
296	if (cp->index > 0) {
297		G_MIRROR_DEBUG(2,
298		    "I/O requests for %s exist, can't destroy it now.",
299		    cp->provider->name);
300		return (1);
301	}
302	if (g_mirror_nrequests(sc, cp) > 0) {
303		G_MIRROR_DEBUG(2,
304		    "I/O requests for %s in queue, can't destroy it now.",
305		    cp->provider->name);
306		return (1);
307	}
308	return (0);
309}
310
311static void
312g_mirror_kill_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
313{
314
315	g_topology_assert();
316
317	cp->private = NULL;
318	if (g_mirror_is_busy(sc, cp))
319		return;
320	G_MIRROR_DEBUG(2, "Consumer %s destroyed.", cp->provider->name);
321	g_detach(cp);
322	g_destroy_consumer(cp);
323}
324
325static int
326g_mirror_connect_disk(struct g_mirror_disk *disk, struct g_provider *pp)
327{
328	int error;
329
330	g_topology_assert();
331	KASSERT(disk->d_consumer == NULL,
332	    ("Disk already connected (device %s).", disk->d_softc->sc_name));
333
334	disk->d_consumer = g_new_consumer(disk->d_softc->sc_geom);
335	disk->d_consumer->private = disk;
336	disk->d_consumer->index = 0;
337	error = g_attach(disk->d_consumer, pp);
338	if (error != 0)
339		return (error);
340	G_MIRROR_DEBUG(2, "Disk %s connected.", g_mirror_get_diskname(disk));
341	return (0);
342}
343
344static void
345g_mirror_disconnect_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
346{
347
348	g_topology_assert();
349
350	if (cp == NULL)
351		return;
352	if (cp->provider != NULL) {
353		G_MIRROR_DEBUG(2, "Disk %s disconnected.", cp->provider->name);
354		if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0) {
355			G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d",
356			    cp->provider->name, -cp->acr, -cp->acw, -cp->ace,
357			    0);
358			g_access(cp, -cp->acr, -cp->acw, -cp->ace);
359		}
360		g_mirror_kill_consumer(sc, cp);
361	} else {
362		g_destroy_consumer(cp);
363	}
364}
365
366/*
367 * Initialize disk. This means allocate memory, create consumer, attach it
368 * to the provider and open access (r1w1e1) to it.
369 */
370static struct g_mirror_disk *
371g_mirror_init_disk(struct g_mirror_softc *sc, struct g_provider *pp,
372    struct g_mirror_metadata *md, int *errorp)
373{
374	struct g_mirror_disk *disk;
375	int error;
376
377	disk = malloc(sizeof(*disk), M_MIRROR, M_NOWAIT | M_ZERO);
378	if (disk == NULL) {
379		error = ENOMEM;
380		goto fail;
381	}
382	disk->d_softc = sc;
383	error = g_mirror_connect_disk(disk, pp);
384	if (error != 0)
385		goto fail;
386	disk->d_id = md->md_did;
387	disk->d_state = G_MIRROR_DISK_STATE_NONE;
388	disk->d_priority = md->md_priority;
389	disk->d_delay.sec = 0;
390	disk->d_delay.frac = 0;
391	binuptime(&disk->d_last_used);
392	disk->d_flags = md->md_dflags;
393	if (md->md_provider[0] != '\0')
394		disk->d_flags |= G_MIRROR_DISK_FLAG_HARDCODED;
395	disk->d_sync.ds_consumer = NULL;
396	disk->d_sync.ds_offset = md->md_sync_offset;
397	disk->d_sync.ds_offset_done = md->md_sync_offset;
398	disk->d_sync.ds_resync = -1;
399	disk->d_sync.ds_syncid = md->md_syncid;
400	if (errorp != NULL)
401		*errorp = 0;
402	return (disk);
403fail:
404	if (errorp != NULL)
405		*errorp = error;
406	if (disk != NULL) {
407		g_mirror_disconnect_consumer(sc, disk->d_consumer);
408		free(disk, M_MIRROR);
409	}
410	return (NULL);
411}
412
413static void
414g_mirror_destroy_disk(struct g_mirror_disk *disk)
415{
416	struct g_mirror_softc *sc;
417
418	g_topology_assert();
419
420	LIST_REMOVE(disk, d_next);
421	g_mirror_event_cancel(disk);
422	sc = disk->d_softc;
423	if (sc->sc_hint == disk)
424		sc->sc_hint = NULL;
425	switch (disk->d_state) {
426	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
427		g_mirror_sync_stop(disk, 1);
428		/* FALLTHROUGH */
429	case G_MIRROR_DISK_STATE_NEW:
430	case G_MIRROR_DISK_STATE_STALE:
431	case G_MIRROR_DISK_STATE_ACTIVE:
432		g_mirror_disconnect_consumer(sc, disk->d_consumer);
433		free(disk, M_MIRROR);
434		break;
435	default:
436		KASSERT(0 == 1, ("Wrong disk state (%s, %s).",
437		    g_mirror_get_diskname(disk),
438		    g_mirror_disk_state2str(disk->d_state)));
439	}
440}
441
442static void
443g_mirror_destroy_device(struct g_mirror_softc *sc)
444{
445	struct g_mirror_disk *disk;
446	struct g_mirror_event *ep;
447	struct g_geom *gp;
448	struct g_consumer *cp, *tmpcp;
449
450	g_topology_assert();
451
452	gp = sc->sc_geom;
453	if (sc->sc_provider != NULL)
454		g_mirror_destroy_provider(sc);
455	for (disk = LIST_FIRST(&sc->sc_disks); disk != NULL;
456	    disk = LIST_FIRST(&sc->sc_disks)) {
457		g_mirror_destroy_disk(disk);
458	}
459	while ((ep = g_mirror_event_get(sc)) != NULL) {
460		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
461			g_mirror_event_free(ep);
462		else {
463			ep->e_error = ECANCELED;
464			ep->e_flags |= G_MIRROR_EVENT_DONE;
465			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, ep);
466			mtx_lock(&sc->sc_events_mtx);
467			wakeup(ep);
468			mtx_unlock(&sc->sc_events_mtx);
469		}
470	}
471	callout_drain(&sc->sc_callout);
472	gp->softc = NULL;
473
474	LIST_FOREACH_SAFE(cp, &sc->sc_sync.ds_geom->consumer, consumer, tmpcp) {
475		g_mirror_disconnect_consumer(sc, cp);
476	}
477	sc->sc_sync.ds_geom->softc = NULL;
478	g_wither_geom(sc->sc_sync.ds_geom, ENXIO);
479	mtx_destroy(&sc->sc_queue_mtx);
480	mtx_destroy(&sc->sc_events_mtx);
481	G_MIRROR_DEBUG(0, "Device %s destroyed.", gp->name);
482	g_wither_geom(gp, ENXIO);
483}
484
485static void
486g_mirror_orphan(struct g_consumer *cp)
487{
488	struct g_mirror_disk *disk;
489
490	g_topology_assert();
491
492	disk = cp->private;
493	if (disk == NULL)
494		return;
495	disk->d_softc->sc_bump_syncid = G_MIRROR_BUMP_ON_FIRST_WRITE;
496	g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
497	    G_MIRROR_EVENT_DONTWAIT);
498}
499
500static void
501g_mirror_spoiled(struct g_consumer *cp)
502{
503	struct g_mirror_disk *disk;
504
505	g_topology_assert();
506
507	disk = cp->private;
508	if (disk == NULL)
509		return;
510	disk->d_softc->sc_bump_syncid = G_MIRROR_BUMP_IMMEDIATELY;
511	g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
512	    G_MIRROR_EVENT_DONTWAIT);
513}
514
515/*
516 * Function should return the next active disk on the list.
517 * It is possible that it will be the same disk as given.
518 * If there are no active disks on list, NULL is returned.
519 */
520static __inline struct g_mirror_disk *
521g_mirror_find_next(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
522{
523	struct g_mirror_disk *dp;
524
525	for (dp = LIST_NEXT(disk, d_next); dp != disk;
526	    dp = LIST_NEXT(dp, d_next)) {
527		if (dp == NULL)
528			dp = LIST_FIRST(&sc->sc_disks);
529		if (dp->d_state == G_MIRROR_DISK_STATE_ACTIVE)
530			break;
531	}
532	if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
533		return (NULL);
534	return (dp);
535}
536
537static struct g_mirror_disk *
538g_mirror_get_disk(struct g_mirror_softc *sc)
539{
540	struct g_mirror_disk *disk;
541
542	if (sc->sc_hint == NULL) {
543		sc->sc_hint = LIST_FIRST(&sc->sc_disks);
544		if (sc->sc_hint == NULL)
545			return (NULL);
546	}
547	disk = sc->sc_hint;
548	if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) {
549		disk = g_mirror_find_next(sc, disk);
550		if (disk == NULL)
551			return (NULL);
552	}
553	sc->sc_hint = g_mirror_find_next(sc, disk);
554	return (disk);
555}
556
557static int
558g_mirror_write_metadata(struct g_mirror_disk *disk,
559    struct g_mirror_metadata *md)
560{
561	struct g_mirror_softc *sc;
562	struct g_consumer *cp;
563	off_t offset, length;
564	u_char *sector;
565	int close = 0, error = 0;
566
567	g_topology_assert();
568
569	sc = disk->d_softc;
570	cp = disk->d_consumer;
571	KASSERT(cp != NULL, ("NULL consumer (%s).", sc->sc_name));
572	KASSERT(cp->provider != NULL, ("NULL provider (%s).", sc->sc_name));
573	length = cp->provider->sectorsize;
574	offset = cp->provider->mediasize - length;
575	sector = malloc((size_t)length, M_MIRROR, M_WAITOK | M_ZERO);
576	/*
577	 * Open consumer if it wasn't opened and remember to close it.
578	 */
579	if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) == 0) {
580		error = g_access(cp, 0, 1, 1);
581		G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d",
582		    cp->provider->name, 0, 1, 1, error);
583		if (error == 0)
584			close = 1;
585#ifdef	INVARIANTS
586	} else {
587		KASSERT(cp->acw > 0 && cp->ace > 0,
588		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
589		    cp->acr, cp->acw, cp->ace));
590#endif
591	}
592	if (error == 0) {
593		if (md != NULL)
594			mirror_metadata_encode(md, sector);
595		g_topology_unlock();
596		error = g_write_data(cp, offset, sector, length);
597		g_topology_lock();
598	}
599	free(sector, M_MIRROR);
600	if (close) {
601		g_access(cp, 0, -1, -1);
602		G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d",
603		    cp->provider->name, 0, -1, -1, 0);
604	}
605	if (error != 0) {
606		disk->d_softc->sc_bump_syncid = G_MIRROR_BUMP_IMMEDIATELY;
607		g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
608		    G_MIRROR_EVENT_DONTWAIT);
609	}
610	return (error);
611}
612
613static int
614g_mirror_clear_metadata(struct g_mirror_disk *disk)
615{
616	int error;
617
618	g_topology_assert();
619	error = g_mirror_write_metadata(disk, NULL);
620	if (error == 0) {
621		G_MIRROR_DEBUG(2, "Metadata on %s cleared.",
622		    g_mirror_get_diskname(disk));
623	} else {
624		G_MIRROR_DEBUG(0,
625		    "Cannot clear metadata on disk %s (error=%d).",
626		    g_mirror_get_diskname(disk), error);
627	}
628	return (error);
629}
630
631void
632g_mirror_fill_metadata(struct g_mirror_softc *sc, struct g_mirror_disk *disk,
633    struct g_mirror_metadata *md)
634{
635
636	strlcpy(md->md_magic, G_MIRROR_MAGIC, sizeof(md->md_magic));
637	md->md_version = G_MIRROR_VERSION;
638	strlcpy(md->md_name, sc->sc_name, sizeof(md->md_name));
639	md->md_mid = sc->sc_id;
640	md->md_all = sc->sc_ndisks;
641	md->md_slice = sc->sc_slice;
642	md->md_balance = sc->sc_balance;
643	md->md_mediasize = sc->sc_mediasize;
644	md->md_sectorsize = sc->sc_sectorsize;
645	md->md_mflags = (sc->sc_flags & G_MIRROR_DEVICE_FLAG_MASK);
646	bzero(md->md_provider, sizeof(md->md_provider));
647	if (disk == NULL) {
648		md->md_did = arc4random();
649		md->md_priority = 0;
650		md->md_syncid = 0;
651		md->md_dflags = 0;
652		md->md_sync_offset = 0;
653	} else {
654		md->md_did = disk->d_id;
655		md->md_priority = disk->d_priority;
656		md->md_syncid = disk->d_sync.ds_syncid;
657		md->md_dflags = (disk->d_flags & G_MIRROR_DISK_FLAG_MASK);
658		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
659			md->md_sync_offset = disk->d_sync.ds_offset_done;
660		else
661			md->md_sync_offset = 0;
662		if ((disk->d_flags & G_MIRROR_DISK_FLAG_HARDCODED) != 0) {
663			strlcpy(md->md_provider,
664			    disk->d_consumer->provider->name,
665			    sizeof(md->md_provider));
666		}
667	}
668}
669
670void
671g_mirror_update_metadata(struct g_mirror_disk *disk)
672{
673	struct g_mirror_metadata md;
674	int error;
675
676	g_topology_assert();
677	g_mirror_fill_metadata(disk->d_softc, disk, &md);
678	error = g_mirror_write_metadata(disk, &md);
679	if (error == 0) {
680		G_MIRROR_DEBUG(2, "Metadata on %s updated.",
681		    g_mirror_get_diskname(disk));
682	} else {
683		G_MIRROR_DEBUG(0,
684		    "Cannot update metadata on disk %s (error=%d).",
685		    g_mirror_get_diskname(disk), error);
686	}
687}
688
689static void
690g_mirror_bump_syncid(struct g_mirror_softc *sc)
691{
692	struct g_mirror_disk *disk;
693
694	g_topology_assert();
695	KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
696	    ("%s called with no active disks (device=%s).", __func__,
697	    sc->sc_name));
698
699	sc->sc_syncid++;
700	G_MIRROR_DEBUG(1, "Device %s: syncid bumped to %u.", sc->sc_name,
701	    sc->sc_syncid);
702	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
703		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
704		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
705			disk->d_sync.ds_syncid = sc->sc_syncid;
706			g_mirror_update_metadata(disk);
707		}
708	}
709}
710
711static void
712g_mirror_idle(struct g_mirror_softc *sc)
713{
714	struct g_mirror_disk *disk;
715
716	if (sc->sc_provider == NULL || sc->sc_provider->acw == 0)
717		return;
718	sc->sc_idle = 1;
719	g_topology_lock();
720	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
721		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
722			continue;
723		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as clean.",
724		    g_mirror_get_diskname(disk), sc->sc_name);
725		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
726		g_mirror_update_metadata(disk);
727	}
728	g_topology_unlock();
729}
730
731static void
732g_mirror_unidle(struct g_mirror_softc *sc)
733{
734	struct g_mirror_disk *disk;
735
736	sc->sc_idle = 0;
737	g_topology_lock();
738	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
739		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
740			continue;
741		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as dirty.",
742		    g_mirror_get_diskname(disk), sc->sc_name);
743		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
744		g_mirror_update_metadata(disk);
745	}
746	g_topology_unlock();
747}
748
749static __inline int
750bintime_cmp(struct bintime *bt1, struct bintime *bt2)
751{
752
753	if (bt1->sec < bt2->sec)
754		return (-1);
755	else if (bt1->sec > bt2->sec)
756		return (1);
757	if (bt1->frac < bt2->frac)
758		return (-1);
759	else if (bt1->frac > bt2->frac)
760		return (1);
761	return (0);
762}
763
764static void
765g_mirror_update_delay(struct g_mirror_disk *disk, struct bio *bp)
766{
767
768	if (disk->d_softc->sc_balance != G_MIRROR_BALANCE_LOAD)
769		return;
770	binuptime(&disk->d_delay);
771	bintime_sub(&disk->d_delay, &bp->bio_t0);
772}
773
774static void
775g_mirror_done(struct bio *bp)
776{
777	struct g_mirror_softc *sc;
778
779	sc = bp->bio_from->geom->softc;
780	bp->bio_cflags |= G_MIRROR_BIO_FLAG_REGULAR;
781	mtx_lock(&sc->sc_queue_mtx);
782	bioq_disksort(&sc->sc_queue, bp);
783	wakeup(sc);
784	mtx_unlock(&sc->sc_queue_mtx);
785}
786
787static void
788g_mirror_regular_request(struct bio *bp)
789{
790	struct g_mirror_softc *sc;
791	struct g_mirror_disk *disk;
792	struct bio *pbp;
793
794	g_topology_assert_not();
795
796	bp->bio_from->index--;
797	pbp = bp->bio_parent;
798	sc = pbp->bio_to->geom->softc;
799	disk = bp->bio_from->private;
800	if (disk == NULL) {
801		g_topology_lock();
802		g_mirror_kill_consumer(sc, bp->bio_from);
803		g_topology_unlock();
804	} else {
805		g_mirror_update_delay(disk, bp);
806	}
807
808	pbp->bio_inbed++;
809	KASSERT(pbp->bio_inbed <= pbp->bio_children,
810	    ("bio_inbed (%u) is bigger than bio_children (%u).", pbp->bio_inbed,
811	    pbp->bio_children));
812	if (bp->bio_error == 0 && pbp->bio_error == 0) {
813		G_MIRROR_LOGREQ(3, bp, "Request delivered.");
814		g_destroy_bio(bp);
815		if (pbp->bio_children == pbp->bio_inbed) {
816			G_MIRROR_LOGREQ(3, pbp, "Request delivered.");
817			pbp->bio_completed = pbp->bio_length;
818			g_io_deliver(pbp, pbp->bio_error);
819		}
820		return;
821	} else if (bp->bio_error != 0) {
822		if (pbp->bio_error == 0)
823			pbp->bio_error = bp->bio_error;
824		G_MIRROR_LOGREQ(0, bp, "Request failed (error=%d).",
825		    bp->bio_error);
826		if (disk != NULL) {
827			sc->sc_bump_syncid = G_MIRROR_BUMP_IMMEDIATELY;
828			g_mirror_event_send(disk,
829			    G_MIRROR_DISK_STATE_DISCONNECTED,
830			    G_MIRROR_EVENT_DONTWAIT);
831		}
832		switch (pbp->bio_cmd) {
833		case BIO_DELETE:
834		case BIO_WRITE:
835			pbp->bio_inbed--;
836			pbp->bio_children--;
837			break;
838		}
839	}
840	g_destroy_bio(bp);
841
842	switch (pbp->bio_cmd) {
843	case BIO_READ:
844		if (pbp->bio_children == pbp->bio_inbed) {
845			pbp->bio_error = 0;
846			mtx_lock(&sc->sc_queue_mtx);
847			bioq_disksort(&sc->sc_queue, pbp);
848			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
849			wakeup(sc);
850			mtx_unlock(&sc->sc_queue_mtx);
851		}
852		break;
853	case BIO_DELETE:
854	case BIO_WRITE:
855		if (pbp->bio_children == 0) {
856			/*
857			 * All requests failed.
858			 */
859		} else if (pbp->bio_inbed < pbp->bio_children) {
860			/* Do nothing. */
861			break;
862		} else if (pbp->bio_children == pbp->bio_inbed) {
863			/* Some requests succeeded. */
864			pbp->bio_error = 0;
865			pbp->bio_completed = pbp->bio_length;
866		}
867		g_io_deliver(pbp, pbp->bio_error);
868		break;
869	default:
870		KASSERT(1 == 0, ("Invalid request: %u.", pbp->bio_cmd));
871		break;
872	}
873}
874
875static void
876g_mirror_sync_done(struct bio *bp)
877{
878	struct g_mirror_softc *sc;
879
880	G_MIRROR_LOGREQ(3, bp, "Synchronization request delivered.");
881	sc = bp->bio_from->geom->softc;
882	bp->bio_cflags |= G_MIRROR_BIO_FLAG_SYNC;
883	mtx_lock(&sc->sc_queue_mtx);
884	bioq_disksort(&sc->sc_queue, bp);
885	wakeup(sc);
886	mtx_unlock(&sc->sc_queue_mtx);
887}
888
889static void
890g_mirror_start(struct bio *bp)
891{
892	struct g_mirror_softc *sc;
893
894	sc = bp->bio_to->geom->softc;
895	/*
896	 * If sc == NULL or there are no valid disks, provider's error
897	 * should be set and g_mirror_start() should not be called at all.
898	 */
899	KASSERT(sc != NULL && sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
900	    ("Provider's error should be set (error=%d)(mirror=%s).",
901	    bp->bio_to->error, bp->bio_to->name));
902	G_MIRROR_LOGREQ(3, bp, "Request received.");
903
904	switch (bp->bio_cmd) {
905	case BIO_READ:
906	case BIO_WRITE:
907	case BIO_DELETE:
908		break;
909	case BIO_GETATTR:
910	default:
911		g_io_deliver(bp, EOPNOTSUPP);
912		return;
913	}
914	mtx_lock(&sc->sc_queue_mtx);
915	bioq_disksort(&sc->sc_queue, bp);
916	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
917	wakeup(sc);
918	mtx_unlock(&sc->sc_queue_mtx);
919}
920
921/*
922 * Send one synchronization request.
923 */
924static void
925g_mirror_sync_one(struct g_mirror_disk *disk)
926{
927	struct g_mirror_softc *sc;
928	struct bio *bp;
929
930	sc = disk->d_softc;
931	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
932	    ("Disk %s is not marked for synchronization.",
933	    g_mirror_get_diskname(disk)));
934
935	bp = g_new_bio();
936	if (bp == NULL)
937		return;
938	bp->bio_parent = NULL;
939	bp->bio_cmd = BIO_READ;
940	bp->bio_offset = disk->d_sync.ds_offset;
941	bp->bio_length = MIN(MAXPHYS, sc->sc_mediasize - bp->bio_offset);
942	bp->bio_cflags = 0;
943	bp->bio_done = g_mirror_sync_done;
944	bp->bio_data = disk->d_sync.ds_data;
945	if (bp->bio_data == NULL) {
946		g_destroy_bio(bp);
947		return;
948	}
949	disk->d_sync.ds_offset += bp->bio_length;
950	bp->bio_to = sc->sc_provider;
951	G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
952	disk->d_sync.ds_consumer->index++;
953	g_io_request(bp, disk->d_sync.ds_consumer);
954}
955
956static void
957g_mirror_sync_request(struct bio *bp)
958{
959	struct g_mirror_softc *sc;
960	struct g_mirror_disk *disk;
961
962	bp->bio_from->index--;
963	sc = bp->bio_from->geom->softc;
964	disk = bp->bio_from->private;
965	if (disk == NULL) {
966		g_topology_lock();
967		g_mirror_kill_consumer(sc, bp->bio_from);
968		g_topology_unlock();
969		g_destroy_bio(bp);
970		return;
971	}
972
973	/*
974	 * Synchronization request.
975	 */
976	switch (bp->bio_cmd) {
977	case BIO_READ:
978	    {
979		struct g_consumer *cp;
980
981		if (bp->bio_error != 0) {
982			G_MIRROR_LOGREQ(0, bp,
983			    "Synchronization request failed (error=%d).",
984			    bp->bio_error);
985			g_destroy_bio(bp);
986			return;
987		}
988		G_MIRROR_LOGREQ(3, bp,
989		    "Synchronization request half-finished.");
990		bp->bio_cmd = BIO_WRITE;
991		bp->bio_cflags = 0;
992		cp = disk->d_consumer;
993		KASSERT(cp->acr == 0 && cp->acw == 1 && cp->ace == 1,
994		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
995		    cp->acr, cp->acw, cp->ace));
996		cp->index++;
997		g_io_request(bp, cp);
998		return;
999	    }
1000	case BIO_WRITE:
1001	    {
1002		struct g_mirror_disk_sync *sync;
1003
1004		if (bp->bio_error != 0) {
1005			G_MIRROR_LOGREQ(0, bp,
1006			    "Synchronization request failed (error=%d).",
1007			    bp->bio_error);
1008			g_destroy_bio(bp);
1009			sc->sc_bump_syncid = G_MIRROR_BUMP_IMMEDIATELY;
1010			g_mirror_event_send(disk,
1011			    G_MIRROR_DISK_STATE_DISCONNECTED,
1012			    G_MIRROR_EVENT_DONTWAIT);
1013			return;
1014		}
1015		G_MIRROR_LOGREQ(3, bp, "Synchronization request finished.");
1016		sync = &disk->d_sync;
1017		sync->ds_offset_done = bp->bio_offset + bp->bio_length;
1018		g_destroy_bio(bp);
1019		if (sync->ds_resync != -1)
1020			break;
1021		if (sync->ds_offset_done == sc->sc_provider->mediasize) {
1022			/*
1023			 * Disk up-to-date, activate it.
1024			 */
1025			g_mirror_event_send(disk, G_MIRROR_DISK_STATE_ACTIVE,
1026			    G_MIRROR_EVENT_DONTWAIT);
1027			return;
1028		} else if (sync->ds_offset_done % (MAXPHYS * 100) == 0) {
1029			/*
1030			 * Update offset_done on every 100 blocks.
1031			 * XXX: This should be configurable.
1032			 */
1033			g_topology_lock();
1034			g_mirror_update_metadata(disk);
1035			g_topology_unlock();
1036		}
1037		return;
1038	    }
1039	default:
1040		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
1041		    bp->bio_cmd, sc->sc_name));
1042		break;
1043	}
1044}
1045
1046static void
1047g_mirror_request_prefer(struct g_mirror_softc *sc, struct bio *bp)
1048{
1049	struct g_mirror_disk *disk;
1050	struct g_consumer *cp;
1051	struct bio *cbp;
1052
1053	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1054		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE)
1055			break;
1056	}
1057	if (disk == NULL) {
1058		if (bp->bio_error == 0)
1059			bp->bio_error = ENXIO;
1060		g_io_deliver(bp, bp->bio_error);
1061		return;
1062	}
1063	cbp = g_clone_bio(bp);
1064	if (cbp == NULL) {
1065		if (bp->bio_error == 0)
1066			bp->bio_error = ENOMEM;
1067		g_io_deliver(bp, bp->bio_error);
1068		return;
1069	}
1070	/*
1071	 * Fill in the component buf structure.
1072	 */
1073	cp = disk->d_consumer;
1074	cbp->bio_done = g_mirror_done;
1075	cbp->bio_to = cp->provider;
1076	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1077	KASSERT(cp->acr > 0 && cp->ace > 0,
1078	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1079	    cp->acw, cp->ace));
1080	cp->index++;
1081	g_io_request(cbp, cp);
1082}
1083
1084static void
1085g_mirror_request_round_robin(struct g_mirror_softc *sc, struct bio *bp)
1086{
1087	struct g_mirror_disk *disk;
1088	struct g_consumer *cp;
1089	struct bio *cbp;
1090
1091	disk = g_mirror_get_disk(sc);
1092	if (disk == NULL) {
1093		if (bp->bio_error == 0)
1094			bp->bio_error = ENXIO;
1095		g_io_deliver(bp, bp->bio_error);
1096		return;
1097	}
1098	cbp = g_clone_bio(bp);
1099	if (cbp == NULL) {
1100		if (bp->bio_error == 0)
1101			bp->bio_error = ENOMEM;
1102		g_io_deliver(bp, bp->bio_error);
1103		return;
1104	}
1105	/*
1106	 * Fill in the component buf structure.
1107	 */
1108	cp = disk->d_consumer;
1109	cbp->bio_done = g_mirror_done;
1110	cbp->bio_to = cp->provider;
1111	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1112	KASSERT(cp->acr > 0 && cp->ace > 0,
1113	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1114	    cp->acw, cp->ace));
1115	cp->index++;
1116	g_io_request(cbp, cp);
1117}
1118
1119static void
1120g_mirror_request_load(struct g_mirror_softc *sc, struct bio *bp)
1121{
1122	struct g_mirror_disk *disk, *dp;
1123	struct g_consumer *cp;
1124	struct bio *cbp;
1125	struct bintime curtime;
1126
1127	binuptime(&curtime);
1128	/*
1129	 * Find a disk which the smallest load.
1130	 */
1131	disk = NULL;
1132	LIST_FOREACH(dp, &sc->sc_disks, d_next) {
1133		if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1134			continue;
1135		/* If disk wasn't used for more than 2 sec, use it. */
1136		if (curtime.sec - dp->d_last_used.sec >= 2) {
1137			disk = dp;
1138			break;
1139		}
1140		if (disk == NULL ||
1141		    bintime_cmp(&dp->d_delay, &disk->d_delay) < 0) {
1142			disk = dp;
1143		}
1144	}
1145	cbp = g_clone_bio(bp);
1146	if (cbp == NULL) {
1147		if (bp->bio_error == 0)
1148			bp->bio_error = ENOMEM;
1149		g_io_deliver(bp, bp->bio_error);
1150		return;
1151	}
1152	/*
1153	 * Fill in the component buf structure.
1154	 */
1155	cp = disk->d_consumer;
1156	cbp->bio_done = g_mirror_done;
1157	cbp->bio_to = cp->provider;
1158	binuptime(&disk->d_last_used);
1159	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1160	KASSERT(cp->acr > 0 && cp->ace > 0,
1161	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1162	    cp->acw, cp->ace));
1163	cp->index++;
1164	g_io_request(cbp, cp);
1165}
1166
1167static void
1168g_mirror_request_split(struct g_mirror_softc *sc, struct bio *bp)
1169{
1170	struct bio_queue_head queue;
1171	struct g_mirror_disk *disk;
1172	struct g_consumer *cp;
1173	struct bio *cbp;
1174	off_t left, mod, offset, slice;
1175	u_char *data;
1176	u_int ndisks;
1177
1178	if (bp->bio_length <= sc->sc_slice) {
1179		g_mirror_request_round_robin(sc, bp);
1180		return;
1181	}
1182	ndisks = g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE);
1183	slice = bp->bio_length / ndisks;
1184	mod = slice % sc->sc_provider->sectorsize;
1185	if (mod != 0)
1186		slice += sc->sc_provider->sectorsize - mod;
1187	/*
1188	 * Allocate all bios before sending any request, so we can
1189	 * return ENOMEM in nice and clean way.
1190	 */
1191	left = bp->bio_length;
1192	offset = bp->bio_offset;
1193	data = bp->bio_data;
1194	bioq_init(&queue);
1195	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1196		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1197			continue;
1198		cbp = g_clone_bio(bp);
1199		if (cbp == NULL) {
1200			for (cbp = bioq_first(&queue); cbp != NULL;
1201			    cbp = bioq_first(&queue)) {
1202				bioq_remove(&queue, cbp);
1203				g_destroy_bio(cbp);
1204			}
1205			if (bp->bio_error == 0)
1206				bp->bio_error = ENOMEM;
1207			g_io_deliver(bp, bp->bio_error);
1208			return;
1209		}
1210		bioq_insert_tail(&queue, cbp);
1211		cbp->bio_done = g_mirror_done;
1212		cbp->bio_caller1 = disk;
1213		cbp->bio_to = disk->d_consumer->provider;
1214		cbp->bio_offset = offset;
1215		cbp->bio_data = data;
1216		cbp->bio_length = MIN(left, slice);
1217		left -= cbp->bio_length;
1218		if (left == 0)
1219			break;
1220		offset += cbp->bio_length;
1221		data += cbp->bio_length;
1222	}
1223	for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) {
1224		bioq_remove(&queue, cbp);
1225		G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1226		disk = cbp->bio_caller1;
1227		cbp->bio_caller1 = NULL;
1228		cp = disk->d_consumer;
1229		KASSERT(cp->acr > 0 && cp->ace > 0,
1230		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1231		    cp->acr, cp->acw, cp->ace));
1232		disk->d_consumer->index++;
1233		g_io_request(cbp, disk->d_consumer);
1234	}
1235}
1236
1237static void
1238g_mirror_register_request(struct bio *bp)
1239{
1240	struct g_mirror_softc *sc;
1241
1242	sc = bp->bio_to->geom->softc;
1243	switch (bp->bio_cmd) {
1244	case BIO_READ:
1245		switch (sc->sc_balance) {
1246		case G_MIRROR_BALANCE_LOAD:
1247			g_mirror_request_load(sc, bp);
1248			break;
1249		case G_MIRROR_BALANCE_PREFER:
1250			g_mirror_request_prefer(sc, bp);
1251			break;
1252		case G_MIRROR_BALANCE_ROUND_ROBIN:
1253			g_mirror_request_round_robin(sc, bp);
1254			break;
1255		case G_MIRROR_BALANCE_SPLIT:
1256			g_mirror_request_split(sc, bp);
1257			break;
1258		}
1259		return;
1260	case BIO_WRITE:
1261	case BIO_DELETE:
1262	    {
1263		struct g_mirror_disk *disk;
1264		struct g_mirror_disk_sync *sync;
1265		struct bio_queue_head queue;
1266		struct g_consumer *cp;
1267		struct bio *cbp;
1268
1269		if (sc->sc_idle)
1270			g_mirror_unidle(sc);
1271		/*
1272		 * Allocate all bios before sending any request, so we can
1273		 * return ENOMEM in nice and clean way.
1274		 */
1275		bioq_init(&queue);
1276		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1277			sync = &disk->d_sync;
1278			switch (disk->d_state) {
1279			case G_MIRROR_DISK_STATE_ACTIVE:
1280				break;
1281			case G_MIRROR_DISK_STATE_SYNCHRONIZING:
1282				if (bp->bio_offset >= sync->ds_offset)
1283					continue;
1284				else if (bp->bio_offset + bp->bio_length >
1285				    sync->ds_offset_done &&
1286				    (bp->bio_offset < sync->ds_resync ||
1287				     sync->ds_resync == -1)) {
1288					sync->ds_resync = bp->bio_offset -
1289					    (bp->bio_offset % MAXPHYS);
1290				}
1291				break;
1292			default:
1293				continue;
1294			}
1295			cbp = g_clone_bio(bp);
1296			if (cbp == NULL) {
1297				for (cbp = bioq_first(&queue); cbp != NULL;
1298				    cbp = bioq_first(&queue)) {
1299					bioq_remove(&queue, cbp);
1300					g_destroy_bio(cbp);
1301				}
1302				if (bp->bio_error == 0)
1303					bp->bio_error = ENOMEM;
1304				g_io_deliver(bp, bp->bio_error);
1305				return;
1306			}
1307			bioq_insert_tail(&queue, cbp);
1308			cbp->bio_done = g_mirror_done;
1309			cp = disk->d_consumer;
1310			cbp->bio_caller1 = cp;
1311			cbp->bio_to = cp->provider;
1312			KASSERT(cp->acw > 0 && cp->ace > 0,
1313			    ("Consumer %s not opened (r%dw%de%d).",
1314			    cp->provider->name, cp->acr, cp->acw, cp->ace));
1315		}
1316		for (cbp = bioq_first(&queue); cbp != NULL;
1317		    cbp = bioq_first(&queue)) {
1318			bioq_remove(&queue, cbp);
1319			G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1320			cp = cbp->bio_caller1;
1321			cbp->bio_caller1 = NULL;
1322			cp->index++;
1323			g_io_request(cbp, cp);
1324		}
1325		/*
1326		 * Bump syncid on first write.
1327		 */
1328		if (sc->sc_bump_syncid == G_MIRROR_BUMP_ON_FIRST_WRITE) {
1329			sc->sc_bump_syncid = 0;
1330			g_topology_lock();
1331			g_mirror_bump_syncid(sc);
1332			g_topology_unlock();
1333		}
1334		return;
1335	    }
1336	default:
1337		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
1338		    bp->bio_cmd, sc->sc_name));
1339		break;
1340	}
1341}
1342
1343static int
1344g_mirror_can_destroy(struct g_mirror_softc *sc)
1345{
1346	struct g_geom *gp;
1347	struct g_consumer *cp;
1348
1349	g_topology_assert();
1350	gp = sc->sc_geom;
1351	LIST_FOREACH(cp, &gp->consumer, consumer) {
1352		if (g_mirror_is_busy(sc, cp))
1353			return (0);
1354	}
1355	gp = sc->sc_sync.ds_geom;
1356	LIST_FOREACH(cp, &gp->consumer, consumer) {
1357		if (g_mirror_is_busy(sc, cp))
1358			return (0);
1359	}
1360	G_MIRROR_DEBUG(2, "No I/O requests for %s, it can be destroyed.",
1361	    sc->sc_name);
1362	return (1);
1363}
1364
1365static int
1366g_mirror_try_destroy(struct g_mirror_softc *sc)
1367{
1368
1369	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_WAIT) != 0) {
1370		g_topology_lock();
1371		if (!g_mirror_can_destroy(sc)) {
1372			g_topology_unlock();
1373			return (0);
1374		}
1375		g_topology_unlock();
1376		G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1377		    &sc->sc_worker);
1378		wakeup(&sc->sc_worker);
1379		sc->sc_worker = NULL;
1380	} else {
1381		g_topology_lock();
1382		if (!g_mirror_can_destroy(sc)) {
1383			g_topology_unlock();
1384			return (0);
1385		}
1386		g_mirror_destroy_device(sc);
1387		g_topology_unlock();
1388		free(sc, M_MIRROR);
1389	}
1390	return (1);
1391}
1392
1393/*
1394 * Worker thread.
1395 */
1396static void
1397g_mirror_worker(void *arg)
1398{
1399	struct g_mirror_softc *sc;
1400	struct g_mirror_disk *disk;
1401	struct g_mirror_disk_sync *sync;
1402	struct g_mirror_event *ep;
1403	struct bio *bp;
1404	u_int nreqs;
1405
1406	sc = arg;
1407	curthread->td_base_pri = PRIBIO;
1408
1409	nreqs = 0;
1410	for (;;) {
1411		G_MIRROR_DEBUG(5, "%s: Let's see...", __func__);
1412		/*
1413		 * First take a look at events.
1414		 * This is important to handle events before any I/O requests.
1415		 */
1416		ep = g_mirror_event_get(sc);
1417		if (ep != NULL) {
1418			g_topology_lock();
1419			if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0) {
1420				/* Update only device status. */
1421				G_MIRROR_DEBUG(3,
1422				    "Running event for device %s.",
1423				    sc->sc_name);
1424				ep->e_error = 0;
1425				g_mirror_update_device(sc, 1);
1426			} else {
1427				/* Update disk status. */
1428				G_MIRROR_DEBUG(3, "Running event for disk %s.",
1429				     g_mirror_get_diskname(ep->e_disk));
1430				ep->e_error = g_mirror_update_disk(ep->e_disk,
1431				    ep->e_state);
1432				if (ep->e_error == 0)
1433					g_mirror_update_device(sc, 0);
1434			}
1435			g_topology_unlock();
1436			if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0) {
1437				KASSERT(ep->e_error == 0,
1438				    ("Error cannot be handled."));
1439				g_mirror_event_free(ep);
1440			} else {
1441				ep->e_flags |= G_MIRROR_EVENT_DONE;
1442				G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1443				    ep);
1444				mtx_lock(&sc->sc_events_mtx);
1445				wakeup(ep);
1446				mtx_unlock(&sc->sc_events_mtx);
1447			}
1448			if ((sc->sc_flags &
1449			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1450				if (g_mirror_try_destroy(sc))
1451					kthread_exit(0);
1452			}
1453			G_MIRROR_DEBUG(5, "%s: I'm here 1.", __func__);
1454			continue;
1455		}
1456		/*
1457		 * Now I/O requests.
1458		 */
1459		/* Get first request from the queue. */
1460		mtx_lock(&sc->sc_queue_mtx);
1461		bp = bioq_first(&sc->sc_queue);
1462		if (bp == NULL) {
1463			if ((sc->sc_flags &
1464			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1465				mtx_unlock(&sc->sc_queue_mtx);
1466				if (g_mirror_try_destroy(sc))
1467					kthread_exit(0);
1468				mtx_lock(&sc->sc_queue_mtx);
1469			}
1470		}
1471		if (sc->sc_sync.ds_ndisks > 0 &&
1472		    (bp == NULL || nreqs > g_mirror_reqs_per_sync)) {
1473			mtx_unlock(&sc->sc_queue_mtx);
1474			/*
1475			 * It is time for synchronization...
1476			 */
1477			nreqs = 0;
1478			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1479				if (disk->d_state !=
1480				    G_MIRROR_DISK_STATE_SYNCHRONIZING) {
1481					continue;
1482				}
1483				sync = &disk->d_sync;
1484				if (sync->ds_offset >=
1485				    sc->sc_provider->mediasize) {
1486					continue;
1487				}
1488				if (sync->ds_offset > sync->ds_offset_done)
1489					continue;
1490				if (sync->ds_resync != -1) {
1491					sync->ds_offset = sync->ds_resync;
1492					sync->ds_offset_done = sync->ds_resync;
1493					sync->ds_resync = -1;
1494				}
1495				g_mirror_sync_one(disk);
1496			}
1497			G_MIRROR_DEBUG(5, "%s: I'm here 2.", __func__);
1498			goto sleep;
1499		}
1500		if (bp == NULL) {
1501#define	G_MIRROR_IS_IDLE(sc)	((sc)->sc_idle ||			\
1502				 ((sc)->sc_provider != NULL &&		\
1503				  (sc)->sc_provider->acw == 0))
1504			if (G_MIRROR_IS_IDLE(sc)) {
1505				/*
1506				 * If we're already in idle state, sleep without
1507				 * a timeout.
1508				 */
1509				MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP,
1510				    "m:w1", 0);
1511				G_MIRROR_DEBUG(5, "%s: I'm here 3.", __func__);
1512			} else {
1513				u_int idletime;
1514
1515				idletime = g_mirror_idletime;
1516				if (idletime == 0)
1517					idletime = 1;
1518				idletime *= hz;
1519				if (msleep(sc, &sc->sc_queue_mtx, PRIBIO | PDROP,
1520				    "m:w2", idletime) == EWOULDBLOCK) {
1521					G_MIRROR_DEBUG(5, "%s: I'm here 4.",
1522					    __func__);
1523					/*
1524					 * No I/O requests in 5 seconds, so mark
1525					 * components as clean.
1526					 */
1527					g_mirror_idle(sc);
1528				}
1529				G_MIRROR_DEBUG(5, "%s: I'm here 5.", __func__);
1530			}
1531			continue;
1532		}
1533		nreqs++;
1534		bioq_remove(&sc->sc_queue, bp);
1535		mtx_unlock(&sc->sc_queue_mtx);
1536
1537		if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_REGULAR) != 0) {
1538			g_mirror_regular_request(bp);
1539		} else if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0) {
1540			u_int timeout, sps;
1541
1542			g_mirror_sync_request(bp);
1543sleep:
1544			sps = g_mirror_syncs_per_sec;
1545			if (sps == 0) {
1546				G_MIRROR_DEBUG(5, "%s: I'm here 6.", __func__);
1547				continue;
1548			}
1549			mtx_lock(&sc->sc_queue_mtx);
1550			if (bioq_first(&sc->sc_queue) != NULL) {
1551				mtx_unlock(&sc->sc_queue_mtx);
1552				G_MIRROR_DEBUG(5, "%s: I'm here 7.", __func__);
1553				continue;
1554			}
1555			timeout = hz / sps;
1556			if (timeout == 0)
1557				timeout = 1;
1558			MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "m:w3",
1559			    timeout);
1560		} else {
1561			g_mirror_register_request(bp);
1562		}
1563		G_MIRROR_DEBUG(5, "%s: I'm here 8.", __func__);
1564	}
1565}
1566
1567/*
1568 * Open disk's consumer if needed.
1569 */
1570static void
1571g_mirror_update_access(struct g_mirror_disk *disk)
1572{
1573	struct g_provider *pp;
1574	struct g_consumer *cp;
1575	int acr, acw, ace, cpw, error;
1576
1577	g_topology_assert();
1578
1579	cp = disk->d_consumer;
1580	pp = disk->d_softc->sc_provider;
1581	if (pp == NULL) {
1582		acr = -cp->acr;
1583		acw = -cp->acw;
1584		ace = -cp->ace;
1585	} else {
1586		acr = pp->acr - cp->acr;
1587		acw = pp->acw - cp->acw;
1588		ace = pp->ace - cp->ace;
1589		/* Grab an extra "exclusive" bit. */
1590		if (pp->acr > 0 || pp->acw > 0 || pp->ace > 0)
1591			ace++;
1592	}
1593	if (acr == 0 && acw == 0 && ace == 0)
1594		return;
1595	cpw = cp->acw;
1596	error = g_access(cp, acr, acw, ace);
1597	G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", cp->provider->name, acr,
1598	    acw, ace, error);
1599	if (error != 0) {
1600		disk->d_softc->sc_bump_syncid = G_MIRROR_BUMP_ON_FIRST_WRITE;
1601		g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
1602		    G_MIRROR_EVENT_DONTWAIT);
1603		return;
1604	}
1605	if (cpw == 0 && cp->acw > 0) {
1606		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as dirty.",
1607		    g_mirror_get_diskname(disk), disk->d_softc->sc_name);
1608		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
1609	} else if (cpw > 0 && cp->acw == 0) {
1610		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as clean.",
1611		    g_mirror_get_diskname(disk), disk->d_softc->sc_name);
1612		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
1613	}
1614}
1615
1616static void
1617g_mirror_sync_start(struct g_mirror_disk *disk)
1618{
1619	struct g_mirror_softc *sc;
1620	struct g_consumer *cp;
1621	int error;
1622
1623	g_topology_assert();
1624
1625	sc = disk->d_softc;
1626	KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
1627	    ("Device not in RUNNING state (%s, %u).", sc->sc_name,
1628	    sc->sc_state));
1629	cp = disk->d_consumer;
1630	KASSERT(cp->acr == 0 && cp->acw == 0 && cp->ace == 0,
1631	    ("Consumer %s already opened.", cp->provider->name));
1632
1633	G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s.", sc->sc_name,
1634	    g_mirror_get_diskname(disk));
1635	error = g_access(cp, 0, 1, 1);
1636	G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", cp->provider->name, 0, 1,
1637	    1, error);
1638	if (error != 0) {
1639		g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
1640		    G_MIRROR_EVENT_DONTWAIT);
1641		return;
1642	}
1643	disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
1644	KASSERT(disk->d_sync.ds_consumer == NULL,
1645	    ("Sync consumer already exists (device=%s, disk=%s).",
1646	    sc->sc_name, g_mirror_get_diskname(disk)));
1647	disk->d_sync.ds_consumer = g_new_consumer(sc->sc_sync.ds_geom);
1648	disk->d_sync.ds_consumer->private = disk;
1649	disk->d_sync.ds_consumer->index = 0;
1650	error = g_attach(disk->d_sync.ds_consumer, disk->d_softc->sc_provider);
1651	KASSERT(error == 0, ("Cannot attach to %s (error=%d).",
1652	    disk->d_softc->sc_name, error));
1653	error = g_access(disk->d_sync.ds_consumer, 1, 0, 0);
1654	KASSERT(error == 0, ("Cannot open %s (error=%d).",
1655	    disk->d_softc->sc_name, error));
1656	disk->d_sync.ds_data = malloc(MAXPHYS, M_MIRROR, M_WAITOK);
1657	sc->sc_sync.ds_ndisks++;
1658}
1659
1660/*
1661 * Stop synchronization process.
1662 * type: 0 - synchronization finished
1663 *       1 - synchronization stopped
1664 */
1665static void
1666g_mirror_sync_stop(struct g_mirror_disk *disk, int type)
1667{
1668	struct g_consumer *cp;
1669
1670	g_topology_assert();
1671	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
1672	    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
1673	    g_mirror_disk_state2str(disk->d_state)));
1674	if (disk->d_sync.ds_consumer == NULL)
1675		return;
1676
1677	if (type == 0) {
1678		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s finished.",
1679		    disk->d_softc->sc_name, g_mirror_get_diskname(disk));
1680	} else /* if (type == 1) */ {
1681		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s stopped.",
1682		    disk->d_softc->sc_name, g_mirror_get_diskname(disk));
1683	}
1684	cp = disk->d_sync.ds_consumer;
1685	g_access(cp, -1, 0, 0);
1686	g_mirror_kill_consumer(disk->d_softc, cp);
1687	free(disk->d_sync.ds_data, M_MIRROR);
1688	disk->d_sync.ds_consumer = NULL;
1689	disk->d_softc->sc_sync.ds_ndisks--;
1690	cp = disk->d_consumer;
1691	KASSERT(cp->acr == 0 && cp->acw == 1 && cp->ace == 1,
1692	    ("Consumer %s not opened.", cp->provider->name));
1693	g_access(cp, 0, -1, -1);
1694	G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", cp->provider->name, 0, -1,
1695	    -1, 0);
1696	disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
1697}
1698
1699static void
1700g_mirror_launch_provider(struct g_mirror_softc *sc)
1701{
1702	struct g_mirror_disk *disk;
1703	struct g_provider *pp;
1704
1705	g_topology_assert();
1706
1707	pp = g_new_providerf(sc->sc_geom, "mirror/%s", sc->sc_name);
1708	pp->mediasize = sc->sc_mediasize;
1709	pp->sectorsize = sc->sc_sectorsize;
1710	sc->sc_provider = pp;
1711	g_error_provider(pp, 0);
1712	G_MIRROR_DEBUG(0, "Device %s: provider %s launched.", sc->sc_name,
1713	    pp->name);
1714	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1715		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
1716			g_mirror_sync_start(disk);
1717	}
1718}
1719
1720static void
1721g_mirror_destroy_provider(struct g_mirror_softc *sc)
1722{
1723	struct g_mirror_disk *disk;
1724	struct bio *bp;
1725
1726	g_topology_assert();
1727	KASSERT(sc->sc_provider != NULL, ("NULL provider (device=%s).",
1728	    sc->sc_name));
1729
1730	g_error_provider(sc->sc_provider, ENXIO);
1731	mtx_lock(&sc->sc_queue_mtx);
1732	while ((bp = bioq_first(&sc->sc_queue)) != NULL) {
1733		bioq_remove(&sc->sc_queue, bp);
1734		g_io_deliver(bp, ENXIO);
1735	}
1736	mtx_unlock(&sc->sc_queue_mtx);
1737	G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.", sc->sc_name,
1738	    sc->sc_provider->name);
1739	sc->sc_provider->flags |= G_PF_WITHER;
1740	g_orphan_provider(sc->sc_provider, ENXIO);
1741	sc->sc_provider = NULL;
1742	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1743		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
1744			g_mirror_sync_stop(disk, 1);
1745	}
1746}
1747
1748static void
1749g_mirror_go(void *arg)
1750{
1751	struct g_mirror_softc *sc;
1752
1753	sc = arg;
1754	G_MIRROR_DEBUG(0, "Force device %s start due to timeout.", sc->sc_name);
1755	g_mirror_event_send(sc, 0,
1756	    G_MIRROR_EVENT_DONTWAIT | G_MIRROR_EVENT_DEVICE);
1757}
1758
1759static u_int
1760g_mirror_determine_state(struct g_mirror_disk *disk)
1761{
1762	struct g_mirror_softc *sc;
1763	u_int state;
1764
1765	sc = disk->d_softc;
1766	if (sc->sc_syncid == disk->d_sync.ds_syncid) {
1767		if ((disk->d_flags &
1768		    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) {
1769			/* Disk does not need synchronization. */
1770			state = G_MIRROR_DISK_STATE_ACTIVE;
1771		} else {
1772			if ((sc->sc_flags &
1773			     G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0  ||
1774			    (disk->d_flags &
1775			     G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
1776				/*
1777				 * We can start synchronization from
1778				 * the stored offset.
1779				 */
1780				state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
1781			} else {
1782				state = G_MIRROR_DISK_STATE_STALE;
1783			}
1784		}
1785	} else if (disk->d_sync.ds_syncid < sc->sc_syncid) {
1786		/*
1787		 * Reset all synchronization data for this disk,
1788		 * because if it even was synchronized, it was
1789		 * synchronized to disks with different syncid.
1790		 */
1791		disk->d_flags |= G_MIRROR_DISK_FLAG_SYNCHRONIZING;
1792		disk->d_sync.ds_offset = 0;
1793		disk->d_sync.ds_offset_done = 0;
1794		disk->d_sync.ds_syncid = sc->sc_syncid;
1795		if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
1796		    (disk->d_flags & G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
1797			state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
1798		} else {
1799			state = G_MIRROR_DISK_STATE_STALE;
1800		}
1801	} else /* if (sc->sc_syncid < disk->d_sync.ds_syncid) */ {
1802		/*
1803		 * Not good, NOT GOOD!
1804		 * It means that mirror was started on stale disks
1805		 * and more fresh disk just arrive.
1806		 * If there were writes, mirror is fucked up, sorry.
1807		 * I think the best choice here is don't touch
1808		 * this disk and inform the user laudly.
1809		 */
1810		G_MIRROR_DEBUG(0, "Device %s was started before the freshest "
1811		    "disk (%s) arrives!! It will not be connected to the "
1812		    "running device.", sc->sc_name,
1813		    g_mirror_get_diskname(disk));
1814		g_mirror_destroy_disk(disk);
1815		state = G_MIRROR_DISK_STATE_NONE;
1816		/* Return immediately, because disk was destroyed. */
1817		return (state);
1818	}
1819	G_MIRROR_DEBUG(3, "State for %s disk: %s.",
1820	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(state));
1821	return (state);
1822}
1823
1824/*
1825 * Update device state.
1826 */
1827static void
1828g_mirror_update_device(struct g_mirror_softc *sc, boolean_t force)
1829{
1830	struct g_mirror_disk *disk;
1831	u_int state;
1832
1833	g_topology_assert();
1834
1835	switch (sc->sc_state) {
1836	case G_MIRROR_DEVICE_STATE_STARTING:
1837	    {
1838		struct g_mirror_disk *pdisk;
1839		u_int dirty, ndisks, syncid;
1840
1841		KASSERT(sc->sc_provider == NULL,
1842		    ("Non-NULL provider in STARTING state (%s).", sc->sc_name));
1843		/*
1844		 * Are we ready? We are, if all disks are connected or
1845		 * if we have any disks and 'force' is true.
1846		 */
1847		if ((force && g_mirror_ndisks(sc, -1) > 0) ||
1848		    sc->sc_ndisks == g_mirror_ndisks(sc, -1)) {
1849			;
1850		} else if (g_mirror_ndisks(sc, -1) == 0) {
1851			/*
1852			 * Disks went down in starting phase, so destroy
1853			 * device.
1854			 */
1855			callout_drain(&sc->sc_callout);
1856			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
1857			return;
1858		} else {
1859			return;
1860		}
1861
1862		/*
1863		 * Activate all disks with the biggest syncid.
1864		 */
1865		if (force) {
1866			/*
1867			 * If 'force' is true, we have been called due to
1868			 * timeout, so don't bother canceling timeout.
1869			 */
1870			ndisks = 0;
1871			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1872				if ((disk->d_flags &
1873				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) {
1874					ndisks++;
1875				}
1876			}
1877			if (ndisks == 0) {
1878				/* No valid disks found, destroy device. */
1879				sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
1880				return;
1881			}
1882		} else {
1883			/* Cancel timeout. */
1884			callout_drain(&sc->sc_callout);
1885		}
1886
1887		/*
1888		 * Find disk with the biggest syncid.
1889		 */
1890		syncid = 0;
1891		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1892			if (disk->d_sync.ds_syncid > syncid)
1893				syncid = disk->d_sync.ds_syncid;
1894		}
1895
1896		/*
1897		 * Here we need to look for dirty disks and if all disks
1898		 * with the biggest syncid are dirty, we have to choose
1899		 * one with the biggest priority and rebuild the rest.
1900		 */
1901		/*
1902		 * Find the number of dirty disks with the biggest syncid.
1903		 * Find the number of disks with the biggest syncid.
1904		 * While here, find a disk with the biggest priority.
1905		 */
1906		dirty = ndisks = 0;
1907		pdisk = NULL;
1908		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1909			if (disk->d_sync.ds_syncid != syncid)
1910				continue;
1911			if ((disk->d_flags &
1912			    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
1913				continue;
1914			}
1915			ndisks++;
1916			if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
1917				dirty++;
1918				if (pdisk == NULL ||
1919				    pdisk->d_priority < disk->d_priority) {
1920					pdisk = disk;
1921				}
1922			}
1923		}
1924		if (dirty == 0) {
1925			/* No dirty disks at all, great. */
1926		} else if (dirty == ndisks) {
1927			/*
1928			 * Force synchronization for all dirty disks except one
1929			 * with the biggest priority.
1930			 */
1931			KASSERT(pdisk != NULL, ("pdisk == NULL"));
1932			G_MIRROR_DEBUG(1, "Using disk %s (device %s) as a "
1933			    "master disk for synchronization.",
1934			    g_mirror_get_diskname(pdisk), sc->sc_name);
1935			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1936				if (disk->d_sync.ds_syncid != syncid)
1937					continue;
1938				if ((disk->d_flags &
1939				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
1940					continue;
1941				}
1942				KASSERT((disk->d_flags &
1943				    G_MIRROR_DISK_FLAG_DIRTY) != 0,
1944				    ("Disk %s isn't marked as dirty.",
1945				    g_mirror_get_diskname(disk)));
1946				/* Skip the disk with the biggest priority. */
1947				if (disk == pdisk)
1948					continue;
1949				disk->d_sync.ds_syncid = 0;
1950			}
1951		} else if (dirty < ndisks) {
1952			/*
1953			 * Force synchronization for all dirty disks.
1954			 * We have some non-dirty disks.
1955			 */
1956			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1957				if (disk->d_sync.ds_syncid != syncid)
1958					continue;
1959				if ((disk->d_flags &
1960				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
1961					continue;
1962				}
1963				if ((disk->d_flags &
1964				    G_MIRROR_DISK_FLAG_DIRTY) == 0) {
1965					continue;
1966				}
1967				disk->d_sync.ds_syncid = 0;
1968			}
1969		}
1970
1971		/* Reset hint. */
1972		sc->sc_hint = NULL;
1973		sc->sc_syncid = syncid;
1974		if (force) {
1975			/* Remember to bump syncid on first write. */
1976			sc->sc_bump_syncid = G_MIRROR_BUMP_ON_FIRST_WRITE;
1977		}
1978		state = G_MIRROR_DEVICE_STATE_RUNNING;
1979		G_MIRROR_DEBUG(1, "Device %s state changed from %s to %s.",
1980		    sc->sc_name, g_mirror_device_state2str(sc->sc_state),
1981		    g_mirror_device_state2str(state));
1982		sc->sc_state = state;
1983		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1984			state = g_mirror_determine_state(disk);
1985			g_mirror_event_send(disk, state,
1986			    G_MIRROR_EVENT_DONTWAIT);
1987			if (state == G_MIRROR_DISK_STATE_STALE) {
1988				sc->sc_bump_syncid =
1989				    G_MIRROR_BUMP_ON_FIRST_WRITE;
1990			}
1991		}
1992		wakeup(&g_mirror_class);
1993		break;
1994	    }
1995	case G_MIRROR_DEVICE_STATE_RUNNING:
1996		if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 0 &&
1997		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
1998			/*
1999			 * No active disks or no disks at all,
2000			 * so destroy device.
2001			 */
2002			if (sc->sc_provider != NULL)
2003				g_mirror_destroy_provider(sc);
2004			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2005			break;
2006		} else if (g_mirror_ndisks(sc,
2007		    G_MIRROR_DISK_STATE_ACTIVE) > 0 &&
2008		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
2009			/*
2010			 * We have active disks, launch provider if it doesn't
2011			 * exist.
2012			 */
2013			if (sc->sc_provider == NULL)
2014				g_mirror_launch_provider(sc);
2015		}
2016		/*
2017		 * Bump syncid here, if we need to do it immediately.
2018		 */
2019		if (sc->sc_bump_syncid == G_MIRROR_BUMP_IMMEDIATELY) {
2020			sc->sc_bump_syncid = 0;
2021			g_mirror_bump_syncid(sc);
2022		}
2023		break;
2024	default:
2025		KASSERT(1 == 0, ("Wrong device state (%s, %s).",
2026		    sc->sc_name, g_mirror_device_state2str(sc->sc_state)));
2027		break;
2028	}
2029}
2030
2031/*
2032 * Update disk state and device state if needed.
2033 */
2034#define	DISK_STATE_CHANGED()	G_MIRROR_DEBUG(1,			\
2035	"Disk %s state changed from %s to %s (device %s).",		\
2036	g_mirror_get_diskname(disk),					\
2037	g_mirror_disk_state2str(disk->d_state),				\
2038	g_mirror_disk_state2str(state), sc->sc_name)
2039static int
2040g_mirror_update_disk(struct g_mirror_disk *disk, u_int state)
2041{
2042	struct g_mirror_softc *sc;
2043
2044	g_topology_assert();
2045
2046	sc = disk->d_softc;
2047again:
2048	G_MIRROR_DEBUG(3, "Changing disk %s state from %s to %s.",
2049	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(disk->d_state),
2050	    g_mirror_disk_state2str(state));
2051	switch (state) {
2052	case G_MIRROR_DISK_STATE_NEW:
2053		/*
2054		 * Possible scenarios:
2055		 * 1. New disk arrive.
2056		 */
2057		/* Previous state should be NONE. */
2058		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NONE,
2059		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2060		    g_mirror_disk_state2str(disk->d_state)));
2061		DISK_STATE_CHANGED();
2062
2063		disk->d_state = state;
2064		if (LIST_EMPTY(&sc->sc_disks))
2065			LIST_INSERT_HEAD(&sc->sc_disks, disk, d_next);
2066		else {
2067			struct g_mirror_disk *dp;
2068
2069			LIST_FOREACH(dp, &sc->sc_disks, d_next) {
2070				if (disk->d_priority >= dp->d_priority) {
2071					LIST_INSERT_BEFORE(dp, disk, d_next);
2072					dp = NULL;
2073					break;
2074				}
2075				if (LIST_NEXT(dp, d_next) == NULL)
2076					break;
2077			}
2078			if (dp != NULL)
2079				LIST_INSERT_AFTER(dp, disk, d_next);
2080		}
2081		G_MIRROR_DEBUG(0, "Device %s: provider %s detected.",
2082		    sc->sc_name, g_mirror_get_diskname(disk));
2083		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
2084			break;
2085		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2086		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2087		    g_mirror_device_state2str(sc->sc_state),
2088		    g_mirror_get_diskname(disk),
2089		    g_mirror_disk_state2str(disk->d_state)));
2090		state = g_mirror_determine_state(disk);
2091		if (state != G_MIRROR_DISK_STATE_NONE)
2092			goto again;
2093		break;
2094	case G_MIRROR_DISK_STATE_ACTIVE:
2095		/*
2096		 * Possible scenarios:
2097		 * 1. New disk does not need synchronization.
2098		 * 2. Synchronization process finished successfully.
2099		 */
2100		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2101		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2102		    g_mirror_device_state2str(sc->sc_state),
2103		    g_mirror_get_diskname(disk),
2104		    g_mirror_disk_state2str(disk->d_state)));
2105		/* Previous state should be NEW or SYNCHRONIZING. */
2106		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW ||
2107		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2108		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2109		    g_mirror_disk_state2str(disk->d_state)));
2110		DISK_STATE_CHANGED();
2111
2112		if (disk->d_state == G_MIRROR_DISK_STATE_NEW)
2113			disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2114		else if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
2115			disk->d_flags &= ~G_MIRROR_DISK_FLAG_SYNCHRONIZING;
2116			disk->d_flags &= ~G_MIRROR_DISK_FLAG_FORCE_SYNC;
2117			g_mirror_sync_stop(disk, 0);
2118		}
2119		disk->d_state = state;
2120		disk->d_sync.ds_offset = 0;
2121		disk->d_sync.ds_offset_done = 0;
2122		g_mirror_update_access(disk);
2123		g_mirror_update_metadata(disk);
2124		G_MIRROR_DEBUG(0, "Device %s: provider %s activated.",
2125		    sc->sc_name, g_mirror_get_diskname(disk));
2126		break;
2127	case G_MIRROR_DISK_STATE_STALE:
2128		/*
2129		 * Possible scenarios:
2130		 * 1. Stale disk was connected.
2131		 */
2132		/* Previous state should be NEW. */
2133		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2134		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2135		    g_mirror_disk_state2str(disk->d_state)));
2136		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2137		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2138		    g_mirror_device_state2str(sc->sc_state),
2139		    g_mirror_get_diskname(disk),
2140		    g_mirror_disk_state2str(disk->d_state)));
2141		/*
2142		 * STALE state is only possible if device is marked
2143		 * NOAUTOSYNC.
2144		 */
2145		KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) != 0,
2146		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2147		    g_mirror_device_state2str(sc->sc_state),
2148		    g_mirror_get_diskname(disk),
2149		    g_mirror_disk_state2str(disk->d_state)));
2150		DISK_STATE_CHANGED();
2151
2152		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2153		disk->d_state = state;
2154		g_mirror_update_metadata(disk);
2155		G_MIRROR_DEBUG(0, "Device %s: provider %s is stale.",
2156		    sc->sc_name, g_mirror_get_diskname(disk));
2157		break;
2158	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
2159		/*
2160		 * Possible scenarios:
2161		 * 1. Disk which needs synchronization was connected.
2162		 */
2163		/* Previous state should be NEW. */
2164		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2165		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2166		    g_mirror_disk_state2str(disk->d_state)));
2167		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2168		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2169		    g_mirror_device_state2str(sc->sc_state),
2170		    g_mirror_get_diskname(disk),
2171		    g_mirror_disk_state2str(disk->d_state)));
2172		DISK_STATE_CHANGED();
2173
2174		if (disk->d_state == G_MIRROR_DISK_STATE_NEW)
2175			disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2176		disk->d_state = state;
2177		if (sc->sc_provider != NULL) {
2178			g_mirror_sync_start(disk);
2179			g_mirror_update_metadata(disk);
2180		}
2181		break;
2182	case G_MIRROR_DISK_STATE_DISCONNECTED:
2183		/*
2184		 * Possible scenarios:
2185		 * 1. Device wasn't running yet, but disk disappear.
2186		 * 2. Disk was active and disapppear.
2187		 * 3. Disk disappear during synchronization process.
2188		 */
2189		if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING) {
2190			/*
2191			 * Previous state should be ACTIVE, STALE or
2192			 * SYNCHRONIZING.
2193			 */
2194			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
2195			    disk->d_state == G_MIRROR_DISK_STATE_STALE ||
2196			    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2197			    ("Wrong disk state (%s, %s).",
2198			    g_mirror_get_diskname(disk),
2199			    g_mirror_disk_state2str(disk->d_state)));
2200		} else if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING) {
2201			/* Previous state should be NEW. */
2202			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2203			    ("Wrong disk state (%s, %s).",
2204			    g_mirror_get_diskname(disk),
2205			    g_mirror_disk_state2str(disk->d_state)));
2206			/*
2207			 * Reset bumping syncid if disk disappeared in STARTING
2208			 * state.
2209			 */
2210			if (sc->sc_bump_syncid == G_MIRROR_BUMP_ON_FIRST_WRITE)
2211				sc->sc_bump_syncid = 0;
2212#ifdef	INVARIANTS
2213		} else {
2214			KASSERT(1 == 0, ("Wrong device state (%s, %s, %s, %s).",
2215			    sc->sc_name,
2216			    g_mirror_device_state2str(sc->sc_state),
2217			    g_mirror_get_diskname(disk),
2218			    g_mirror_disk_state2str(disk->d_state)));
2219#endif
2220		}
2221		DISK_STATE_CHANGED();
2222		G_MIRROR_DEBUG(0, "Device %s: provider %s disconnected.",
2223		    sc->sc_name, g_mirror_get_diskname(disk));
2224
2225		g_mirror_destroy_disk(disk);
2226		break;
2227	case G_MIRROR_DISK_STATE_DESTROY:
2228	    {
2229		int error;
2230
2231		error = g_mirror_clear_metadata(disk);
2232		if (error != 0)
2233			return (error);
2234		DISK_STATE_CHANGED();
2235		G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.",
2236		    sc->sc_name, g_mirror_get_diskname(disk));
2237
2238		g_mirror_destroy_disk(disk);
2239		sc->sc_ndisks--;
2240		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2241			g_mirror_update_metadata(disk);
2242		}
2243		break;
2244	    }
2245	default:
2246		KASSERT(1 == 0, ("Unknown state (%u).", state));
2247		break;
2248	}
2249	return (0);
2250}
2251#undef	DISK_STATE_CHANGED
2252
2253static int
2254g_mirror_read_metadata(struct g_consumer *cp, struct g_mirror_metadata *md)
2255{
2256	struct g_provider *pp;
2257	u_char *buf;
2258	int error;
2259
2260	g_topology_assert();
2261
2262	error = g_access(cp, 1, 0, 0);
2263	if (error != 0)
2264		return (error);
2265	pp = cp->provider;
2266	g_topology_unlock();
2267	/* Metadata are stored on last sector. */
2268	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
2269	    &error);
2270	g_topology_lock();
2271	if (buf == NULL) {
2272		g_access(cp, -1, 0, 0);
2273		return (error);
2274	}
2275	if (error != 0) {
2276		g_access(cp, -1, 0, 0);
2277		g_free(buf);
2278		return (error);
2279	}
2280	error = g_access(cp, -1, 0, 0);
2281	KASSERT(error == 0, ("Cannot decrease access count for %s.", pp->name));
2282
2283	/* Decode metadata. */
2284	error = mirror_metadata_decode(buf, md);
2285	g_free(buf);
2286	if (strcmp(md->md_magic, G_MIRROR_MAGIC) != 0)
2287		return (EINVAL);
2288	if (error != 0) {
2289		G_MIRROR_DEBUG(1, "MD5 metadata hash mismatch for provider %s.",
2290		    cp->provider->name);
2291		return (error);
2292	}
2293
2294	return (0);
2295}
2296
2297static int
2298g_mirror_check_metadata(struct g_mirror_softc *sc, struct g_provider *pp,
2299    struct g_mirror_metadata *md)
2300{
2301
2302	if (g_mirror_id2disk(sc, md->md_did) != NULL) {
2303		G_MIRROR_DEBUG(1, "Disk %s (id=%u) already exists, skipping.",
2304		    pp->name, md->md_did);
2305		return (EEXIST);
2306	}
2307	if (md->md_all != sc->sc_ndisks) {
2308		G_MIRROR_DEBUG(1,
2309		    "Invalid '%s' field on disk %s (device %s), skipping.",
2310		    "md_all", pp->name, sc->sc_name);
2311		return (EINVAL);
2312	}
2313	if (md->md_slice != sc->sc_slice) {
2314		G_MIRROR_DEBUG(1,
2315		    "Invalid '%s' field on disk %s (device %s), skipping.",
2316		    "md_slice", pp->name, sc->sc_name);
2317		return (EINVAL);
2318	}
2319	if (md->md_balance != sc->sc_balance) {
2320		G_MIRROR_DEBUG(1,
2321		    "Invalid '%s' field on disk %s (device %s), skipping.",
2322		    "md_balance", pp->name, sc->sc_name);
2323		return (EINVAL);
2324	}
2325	if (md->md_mediasize != sc->sc_mediasize) {
2326		G_MIRROR_DEBUG(1,
2327		    "Invalid '%s' field on disk %s (device %s), skipping.",
2328		    "md_mediasize", pp->name, sc->sc_name);
2329		return (EINVAL);
2330	}
2331	if (sc->sc_mediasize > pp->mediasize) {
2332		G_MIRROR_DEBUG(1,
2333		    "Invalid size of disk %s (device %s), skipping.", pp->name,
2334		    sc->sc_name);
2335		return (EINVAL);
2336	}
2337	if (md->md_sectorsize != sc->sc_sectorsize) {
2338		G_MIRROR_DEBUG(1,
2339		    "Invalid '%s' field on disk %s (device %s), skipping.",
2340		    "md_sectorsize", pp->name, sc->sc_name);
2341		return (EINVAL);
2342	}
2343	if ((sc->sc_sectorsize % pp->sectorsize) != 0) {
2344		G_MIRROR_DEBUG(1,
2345		    "Invalid sector size of disk %s (device %s), skipping.",
2346		    pp->name, sc->sc_name);
2347		return (EINVAL);
2348	}
2349	if ((md->md_mflags & ~G_MIRROR_DEVICE_FLAG_MASK) != 0) {
2350		G_MIRROR_DEBUG(1,
2351		    "Invalid device flags on disk %s (device %s), skipping.",
2352		    pp->name, sc->sc_name);
2353		return (EINVAL);
2354	}
2355	if ((md->md_dflags & ~G_MIRROR_DISK_FLAG_MASK) != 0) {
2356		G_MIRROR_DEBUG(1,
2357		    "Invalid disk flags on disk %s (device %s), skipping.",
2358		    pp->name, sc->sc_name);
2359		return (EINVAL);
2360	}
2361	return (0);
2362}
2363
2364static int
2365g_mirror_add_disk(struct g_mirror_softc *sc, struct g_provider *pp,
2366    struct g_mirror_metadata *md)
2367{
2368	struct g_mirror_disk *disk;
2369	int error;
2370
2371	g_topology_assert();
2372	G_MIRROR_DEBUG(2, "Adding disk %s.", pp->name);
2373
2374	error = g_mirror_check_metadata(sc, pp, md);
2375	if (error != 0)
2376		return (error);
2377	disk = g_mirror_init_disk(sc, pp, md, &error);
2378	if (disk == NULL)
2379		return (error);
2380	error = g_mirror_event_send(disk, G_MIRROR_DISK_STATE_NEW,
2381	    G_MIRROR_EVENT_WAIT);
2382	return (error);
2383}
2384
2385static int
2386g_mirror_access(struct g_provider *pp, int acr, int acw, int ace)
2387{
2388	struct g_mirror_softc *sc;
2389	struct g_mirror_disk *disk;
2390	int dcr, dcw, dce, err, error;
2391
2392	g_topology_assert();
2393	G_MIRROR_DEBUG(2, "Access request for %s: r%dw%de%d.", pp->name, acr,
2394	    acw, ace);
2395
2396	dcr = pp->acr + acr;
2397	dcw = pp->acw + acw;
2398	dce = pp->ace + ace;
2399
2400	/* On first open, grab an extra "exclusive" bit */
2401	if (pp->acr == 0 && pp->acw == 0 && pp->ace == 0)
2402		ace++;
2403	/* ... and let go of it on last close */
2404	if (dcr == 0 && dcw == 0 && dce == 0)
2405		ace--;
2406
2407	sc = pp->geom->softc;
2408	if (sc == NULL || LIST_EMPTY(&sc->sc_disks)) {
2409		if (acr <= 0 && acw <= 0 && ace <= 0)
2410			return (0);
2411		else
2412			return (ENXIO);
2413	}
2414	error = ENXIO;
2415	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2416		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
2417			continue;
2418		err = g_access(disk->d_consumer, acr, acw, ace);
2419		G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d",
2420		    g_mirror_get_diskname(disk), acr, acw, ace, err);
2421		if (err == 0) {
2422			/*
2423			 * Mark disk as dirty on open and unmark on close.
2424			 */
2425			if (pp->acw == 0 && dcw > 0) {
2426				G_MIRROR_DEBUG(1,
2427				    "Disk %s (device %s) marked as dirty.",
2428				    g_mirror_get_diskname(disk), sc->sc_name);
2429				disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
2430				g_mirror_update_metadata(disk);
2431			} else if (pp->acw > 0 && dcw == 0) {
2432				G_MIRROR_DEBUG(1,
2433				    "Disk %s (device %s) marked as clean.",
2434				    g_mirror_get_diskname(disk), sc->sc_name);
2435				disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2436				g_mirror_update_metadata(disk);
2437			}
2438			error = 0;
2439		} else {
2440			sc->sc_bump_syncid = G_MIRROR_BUMP_ON_FIRST_WRITE;
2441			g_mirror_event_send(disk,
2442			    G_MIRROR_DISK_STATE_DISCONNECTED,
2443			    G_MIRROR_EVENT_DONTWAIT);
2444		}
2445	}
2446	/*
2447	 * Be sure to return 0 for negativate access requests.
2448	 * In case of some HW problems, it is possible that we don't have
2449	 * any active disk here, so loop above will be no-op and error will
2450	 * be ENXIO.
2451	 */
2452	if (error != 0 && acr <= 0 && acw <= 0 && ace <= 0)
2453		error = 0;
2454	return (error);
2455}
2456
2457static struct g_geom *
2458g_mirror_create(struct g_class *mp, const struct g_mirror_metadata *md)
2459{
2460	struct g_mirror_softc *sc;
2461	struct g_geom *gp;
2462	int error, timeout;
2463
2464	g_topology_assert();
2465	G_MIRROR_DEBUG(1, "Creating device %s (id=%u).", md->md_name,
2466	    md->md_mid);
2467
2468	/* One disk is minimum. */
2469	if (md->md_all < 1)
2470		return (NULL);
2471	/*
2472	 * Action geom.
2473	 */
2474	gp = g_new_geomf(mp, "%s", md->md_name);
2475	sc = malloc(sizeof(*sc), M_MIRROR, M_WAITOK | M_ZERO);
2476	gp->start = g_mirror_start;
2477	gp->spoiled = g_mirror_spoiled;
2478	gp->orphan = g_mirror_orphan;
2479	gp->access = g_mirror_access;
2480	gp->dumpconf = g_mirror_dumpconf;
2481
2482	sc->sc_id = md->md_mid;
2483	sc->sc_slice = md->md_slice;
2484	sc->sc_balance = md->md_balance;
2485	sc->sc_mediasize = md->md_mediasize;
2486	sc->sc_sectorsize = md->md_sectorsize;
2487	sc->sc_ndisks = md->md_all;
2488	sc->sc_flags = md->md_mflags;
2489	sc->sc_bump_syncid = 0;
2490	sc->sc_idle = 0;
2491	bioq_init(&sc->sc_queue);
2492	mtx_init(&sc->sc_queue_mtx, "gmirror:queue", NULL, MTX_DEF);
2493	LIST_INIT(&sc->sc_disks);
2494	TAILQ_INIT(&sc->sc_events);
2495	mtx_init(&sc->sc_events_mtx, "gmirror:events", NULL, MTX_DEF);
2496	callout_init(&sc->sc_callout, CALLOUT_MPSAFE);
2497	sc->sc_state = G_MIRROR_DEVICE_STATE_STARTING;
2498	gp->softc = sc;
2499	sc->sc_geom = gp;
2500	sc->sc_provider = NULL;
2501	/*
2502	 * Synchronization geom.
2503	 */
2504	gp = g_new_geomf(mp, "%s.sync", md->md_name);
2505	gp->softc = sc;
2506	gp->orphan = g_mirror_orphan;
2507	sc->sc_sync.ds_geom = gp;
2508	sc->sc_sync.ds_ndisks = 0;
2509	error = kthread_create(g_mirror_worker, sc, &sc->sc_worker, 0, 0,
2510	    "g_mirror %s", md->md_name);
2511	if (error != 0) {
2512		G_MIRROR_DEBUG(1, "Cannot create kernel thread for %s.",
2513		    sc->sc_name);
2514		g_destroy_geom(sc->sc_sync.ds_geom);
2515		mtx_destroy(&sc->sc_events_mtx);
2516		mtx_destroy(&sc->sc_queue_mtx);
2517		g_destroy_geom(sc->sc_geom);
2518		free(sc, M_MIRROR);
2519		return (NULL);
2520	}
2521
2522	G_MIRROR_DEBUG(0, "Device %s created (id=%u).", sc->sc_name, sc->sc_id);
2523
2524	/*
2525	 * Run timeout.
2526	 */
2527	timeout = g_mirror_timeout * hz;
2528	callout_reset(&sc->sc_callout, timeout, g_mirror_go, sc);
2529	return (sc->sc_geom);
2530}
2531
2532int
2533g_mirror_destroy(struct g_mirror_softc *sc, boolean_t force)
2534{
2535	struct g_provider *pp;
2536
2537	g_topology_assert();
2538
2539	if (sc == NULL)
2540		return (ENXIO);
2541	pp = sc->sc_provider;
2542	if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
2543		if (force) {
2544			G_MIRROR_DEBUG(0, "Device %s is still open, so it "
2545			    "can't be definitely removed.", pp->name);
2546		} else {
2547			G_MIRROR_DEBUG(1,
2548			    "Device %s is still open (r%dw%de%d).", pp->name,
2549			    pp->acr, pp->acw, pp->ace);
2550			return (EBUSY);
2551		}
2552	}
2553
2554	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2555	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_WAIT;
2556	g_topology_unlock();
2557	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
2558	mtx_lock(&sc->sc_queue_mtx);
2559	wakeup(sc);
2560	mtx_unlock(&sc->sc_queue_mtx);
2561	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, &sc->sc_worker);
2562	while (sc->sc_worker != NULL)
2563		tsleep(&sc->sc_worker, PRIBIO, "m:destroy", hz / 5);
2564	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, &sc->sc_worker);
2565	g_topology_lock();
2566	g_mirror_destroy_device(sc);
2567	free(sc, M_MIRROR);
2568	return (0);
2569}
2570
2571static void
2572g_mirror_taste_orphan(struct g_consumer *cp)
2573{
2574
2575	KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
2576	    cp->provider->name));
2577}
2578
2579static struct g_geom *
2580g_mirror_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
2581{
2582	struct g_mirror_metadata md;
2583	struct g_mirror_softc *sc;
2584	struct g_consumer *cp;
2585	struct g_geom *gp;
2586	int error;
2587
2588	g_topology_assert();
2589	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
2590	G_MIRROR_DEBUG(2, "Tasting %s.", pp->name);
2591
2592	gp = g_new_geomf(mp, "mirror:taste");
2593	/*
2594	 * This orphan function should be never called.
2595	 */
2596	gp->orphan = g_mirror_taste_orphan;
2597	cp = g_new_consumer(gp);
2598	g_attach(cp, pp);
2599	error = g_mirror_read_metadata(cp, &md);
2600	g_detach(cp);
2601	g_destroy_consumer(cp);
2602	g_destroy_geom(gp);
2603	if (error != 0)
2604		return (NULL);
2605	gp = NULL;
2606
2607	if (md.md_version > G_MIRROR_VERSION) {
2608		printf("geom_mirror.ko module is too old to handle %s.\n",
2609		    pp->name);
2610		return (NULL);
2611	}
2612	if (md.md_provider[0] != '\0' && strcmp(md.md_provider, pp->name) != 0)
2613		return (NULL);
2614	if ((md.md_dflags & G_MIRROR_DISK_FLAG_INACTIVE) != 0) {
2615		G_MIRROR_DEBUG(0,
2616		    "Device %s: provider %s marked as inactive, skipping.",
2617		    md.md_name, pp->name);
2618		return (NULL);
2619	}
2620	if (g_mirror_debug >= 2)
2621		mirror_metadata_dump(&md);
2622
2623	/*
2624	 * Let's check if device already exists.
2625	 */
2626	sc = NULL;
2627	LIST_FOREACH(gp, &mp->geom, geom) {
2628		sc = gp->softc;
2629		if (sc == NULL)
2630			continue;
2631		if (sc->sc_sync.ds_geom == gp)
2632			continue;
2633		if (strcmp(md.md_name, sc->sc_name) != 0)
2634			continue;
2635		if (md.md_mid != sc->sc_id) {
2636			G_MIRROR_DEBUG(0, "Device %s already configured.",
2637			    sc->sc_name);
2638			return (NULL);
2639		}
2640		break;
2641	}
2642	if (gp == NULL) {
2643		gp = g_mirror_create(mp, &md);
2644		if (gp == NULL) {
2645			G_MIRROR_DEBUG(0, "Cannot create device %s.",
2646			    md.md_name);
2647			return (NULL);
2648		}
2649		sc = gp->softc;
2650	}
2651	G_MIRROR_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
2652	error = g_mirror_add_disk(sc, pp, &md);
2653	if (error != 0) {
2654		G_MIRROR_DEBUG(0, "Cannot add disk %s to %s (error=%d).",
2655		    pp->name, gp->name, error);
2656		if (LIST_EMPTY(&sc->sc_disks))
2657			g_mirror_destroy(sc, 1);
2658		return (NULL);
2659	}
2660	return (gp);
2661}
2662
2663static int
2664g_mirror_destroy_geom(struct gctl_req *req __unused,
2665    struct g_class *mp __unused, struct g_geom *gp)
2666{
2667
2668	return (g_mirror_destroy(gp->softc, 0));
2669}
2670
2671static void
2672g_mirror_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
2673    struct g_consumer *cp, struct g_provider *pp)
2674{
2675	struct g_mirror_softc *sc;
2676
2677	g_topology_assert();
2678
2679	sc = gp->softc;
2680	if (sc == NULL)
2681		return;
2682	/* Skip synchronization geom. */
2683	if (gp == sc->sc_sync.ds_geom)
2684		return;
2685	if (pp != NULL) {
2686		/* Nothing here. */
2687	} else if (cp != NULL) {
2688		struct g_mirror_disk *disk;
2689
2690		disk = cp->private;
2691		if (disk == NULL)
2692			return;
2693		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)disk->d_id);
2694		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
2695			sbuf_printf(sb, "%s<Synchronized>", indent);
2696			if (disk->d_sync.ds_offset_done == 0)
2697				sbuf_printf(sb, "0%%");
2698			else {
2699				sbuf_printf(sb, "%u%%",
2700				    (u_int)((disk->d_sync.ds_offset_done * 100) /
2701				    sc->sc_provider->mediasize));
2702			}
2703			sbuf_printf(sb, "</Synchronized>\n");
2704		}
2705		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent,
2706		    disk->d_sync.ds_syncid);
2707		sbuf_printf(sb, "%s<Flags>", indent);
2708		if (disk->d_flags == 0)
2709			sbuf_printf(sb, "NONE");
2710		else {
2711			int first = 1;
2712
2713#define	ADD_FLAG(flag, name)	do {					\
2714	if ((disk->d_flags & (flag)) != 0) {				\
2715		if (!first)						\
2716			sbuf_printf(sb, ", ");				\
2717		else							\
2718			first = 0;					\
2719		sbuf_printf(sb, name);					\
2720	}								\
2721} while (0)
2722			ADD_FLAG(G_MIRROR_DISK_FLAG_DIRTY, "DIRTY");
2723			ADD_FLAG(G_MIRROR_DISK_FLAG_HARDCODED, "HARDCODED");
2724			ADD_FLAG(G_MIRROR_DISK_FLAG_INACTIVE, "INACTIVE");
2725			ADD_FLAG(G_MIRROR_DISK_FLAG_SYNCHRONIZING,
2726			    "SYNCHRONIZING");
2727			ADD_FLAG(G_MIRROR_DISK_FLAG_FORCE_SYNC, "FORCE_SYNC");
2728#undef	ADD_FLAG
2729		}
2730		sbuf_printf(sb, "</Flags>\n");
2731		sbuf_printf(sb, "%s<Priority>%u</Priority>\n", indent,
2732		    disk->d_priority);
2733		sbuf_printf(sb, "%s<State>%s</State>\n", indent,
2734		    g_mirror_disk_state2str(disk->d_state));
2735	} else {
2736		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id);
2737		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, sc->sc_syncid);
2738		sbuf_printf(sb, "%s<Flags>", indent);
2739		if (sc->sc_flags == 0)
2740			sbuf_printf(sb, "NONE");
2741		else {
2742			int first = 1;
2743
2744#define	ADD_FLAG(flag, name)	do {					\
2745	if ((sc->sc_flags & (flag)) != 0) {				\
2746		if (!first)						\
2747			sbuf_printf(sb, ", ");				\
2748		else							\
2749			first = 0;					\
2750		sbuf_printf(sb, name);					\
2751	}								\
2752} while (0)
2753			ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOAUTOSYNC, "NOAUTOSYNC");
2754#undef	ADD_FLAG
2755		}
2756		sbuf_printf(sb, "</Flags>\n");
2757		sbuf_printf(sb, "%s<Slice>%u</Slice>\n", indent,
2758		    (u_int)sc->sc_slice);
2759		sbuf_printf(sb, "%s<Balance>%s</Balance>\n", indent,
2760		    balance_name(sc->sc_balance));
2761		sbuf_printf(sb, "%s<Components>%u</Components>\n", indent,
2762		    sc->sc_ndisks);
2763		sbuf_printf(sb, "%s<State>", indent);
2764		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
2765			sbuf_printf(sb, "%s", "STARTING");
2766		else if (sc->sc_ndisks ==
2767		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE))
2768			sbuf_printf(sb, "%s", "COMPLETE");
2769		else
2770			sbuf_printf(sb, "%s", "DEGRADED");
2771		sbuf_printf(sb, "</State>\n");
2772	}
2773}
2774
2775static int
2776g_mirror_can_go(void)
2777{
2778	struct g_mirror_softc *sc;
2779	struct g_geom *gp;
2780	struct g_provider *pp;
2781	int can_go;
2782
2783	DROP_GIANT();
2784	can_go = 1;
2785	g_topology_lock();
2786	LIST_FOREACH(gp, &g_mirror_class.geom, geom) {
2787		sc = gp->softc;
2788		if (sc == NULL) {
2789			can_go = 0;
2790			break;
2791		}
2792		pp = sc->sc_provider;
2793		if (pp == NULL || pp->error != 0) {
2794			can_go = 0;
2795			break;
2796		}
2797	}
2798	g_topology_unlock();
2799	PICKUP_GIANT();
2800	return (can_go);
2801}
2802
2803static void
2804g_mirror_rootwait(void)
2805{
2806
2807	/*
2808	 * HACK: Wait for GEOM, because g_mirror_rootwait() can be called,
2809	 * HACK: before we get providers for tasting.
2810	 */
2811	tsleep(&g_mirror_class, PRIBIO, "mroot", hz * 3);
2812	/*
2813	 * Wait for mirrors in degraded state.
2814	 */
2815	for (;;) {
2816		if (g_mirror_can_go())
2817			break;
2818		tsleep(&g_mirror_class, PRIBIO, "mroot", hz);
2819	}
2820}
2821
2822SYSINIT(g_mirror_root, SI_SUB_RAID, SI_ORDER_FIRST, g_mirror_rootwait, NULL)
2823
2824DECLARE_GEOM_CLASS(g_mirror_class, g_mirror);
2825