g_mirror.c revision 137251
1/*-
2 * Copyright (c) 2004 Pawel Jakub Dawidek <pjd@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/geom/mirror/g_mirror.c 137251 2004-11-05 10:55:04Z pjd $");
29
30#include <sys/param.h>
31#include <sys/systm.h>
32#include <sys/kernel.h>
33#include <sys/module.h>
34#include <sys/limits.h>
35#include <sys/lock.h>
36#include <sys/mutex.h>
37#include <sys/bio.h>
38#include <sys/sysctl.h>
39#include <sys/malloc.h>
40#include <sys/bitstring.h>
41#include <vm/uma.h>
42#include <geom/geom.h>
43#include <sys/proc.h>
44#include <sys/kthread.h>
45#include <geom/mirror/g_mirror.h>
46
47
48static MALLOC_DEFINE(M_MIRROR, "mirror data", "GEOM_MIRROR Data");
49
50SYSCTL_DECL(_kern_geom);
51SYSCTL_NODE(_kern_geom, OID_AUTO, mirror, CTLFLAG_RW, 0, "GEOM_MIRROR stuff");
52u_int g_mirror_debug = 0;
53TUNABLE_INT("kern.geom.mirror.debug", &g_mirror_debug);
54SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, debug, CTLFLAG_RW, &g_mirror_debug, 0,
55    "Debug level");
56static u_int g_mirror_timeout = 4;
57TUNABLE_INT("kern.geom.mirror.timeout", &g_mirror_timeout);
58SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, timeout, CTLFLAG_RW, &g_mirror_timeout,
59    0, "Time to wait on all mirror components");
60static u_int g_mirror_idletime = 5;
61TUNABLE_INT("kern.geom.mirror.idletime", &g_mirror_idletime);
62SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, idletime, CTLFLAG_RW,
63    &g_mirror_idletime, 0, "Mark components as clean when idling");
64static u_int g_mirror_reqs_per_sync = 5;
65SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, reqs_per_sync, CTLFLAG_RW,
66    &g_mirror_reqs_per_sync, 0,
67    "Number of regular I/O requests per synchronization request");
68static u_int g_mirror_syncs_per_sec = 100;
69SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, syncs_per_sec, CTLFLAG_RW,
70    &g_mirror_syncs_per_sec, 0,
71    "Number of synchronizations requests per second");
72
73#define	MSLEEP(ident, mtx, priority, wmesg, timeout)	do {		\
74	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, (ident));	\
75	msleep((ident), (mtx), (priority), (wmesg), (timeout));		\
76	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, (ident));	\
77} while (0)
78
79
80static int g_mirror_destroy_geom(struct gctl_req *req, struct g_class *mp,
81    struct g_geom *gp);
82static g_taste_t g_mirror_taste;
83
84struct g_class g_mirror_class = {
85	.name = G_MIRROR_CLASS_NAME,
86	.version = G_VERSION,
87	.ctlreq = g_mirror_config,
88	.taste = g_mirror_taste,
89	.destroy_geom = g_mirror_destroy_geom
90};
91
92
93static void g_mirror_destroy_provider(struct g_mirror_softc *sc);
94static int g_mirror_update_disk(struct g_mirror_disk *disk, u_int state);
95static void g_mirror_update_device(struct g_mirror_softc *sc, boolean_t force);
96static void g_mirror_dumpconf(struct sbuf *sb, const char *indent,
97    struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp);
98static void g_mirror_sync_stop(struct g_mirror_disk *disk, int type);
99
100
101static const char *
102g_mirror_disk_state2str(int state)
103{
104
105	switch (state) {
106	case G_MIRROR_DISK_STATE_NONE:
107		return ("NONE");
108	case G_MIRROR_DISK_STATE_NEW:
109		return ("NEW");
110	case G_MIRROR_DISK_STATE_ACTIVE:
111		return ("ACTIVE");
112	case G_MIRROR_DISK_STATE_STALE:
113		return ("STALE");
114	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
115		return ("SYNCHRONIZING");
116	case G_MIRROR_DISK_STATE_DISCONNECTED:
117		return ("DISCONNECTED");
118	case G_MIRROR_DISK_STATE_DESTROY:
119		return ("DESTROY");
120	default:
121		return ("INVALID");
122	}
123}
124
125static const char *
126g_mirror_device_state2str(int state)
127{
128
129	switch (state) {
130	case G_MIRROR_DEVICE_STATE_STARTING:
131		return ("STARTING");
132	case G_MIRROR_DEVICE_STATE_RUNNING:
133		return ("RUNNING");
134	default:
135		return ("INVALID");
136	}
137}
138
139static const char *
140g_mirror_get_diskname(struct g_mirror_disk *disk)
141{
142
143	if (disk->d_consumer == NULL || disk->d_consumer->provider == NULL)
144		return ("[unknown]");
145	return (disk->d_name);
146}
147
148/*
149 * --- Events handling functions ---
150 * Events in geom_mirror are used to maintain disks and device status
151 * from one thread to simplify locking.
152 */
153static void
154g_mirror_event_free(struct g_mirror_event *ep)
155{
156
157	free(ep, M_MIRROR);
158}
159
160int
161g_mirror_event_send(void *arg, int state, int flags)
162{
163	struct g_mirror_softc *sc;
164	struct g_mirror_disk *disk;
165	struct g_mirror_event *ep;
166	int error;
167
168	ep = malloc(sizeof(*ep), M_MIRROR, M_WAITOK);
169	G_MIRROR_DEBUG(4, "%s: Sending event %p.", __func__, ep);
170	if ((flags & G_MIRROR_EVENT_DEVICE) != 0) {
171		disk = NULL;
172		sc = arg;
173	} else {
174		disk = arg;
175		sc = disk->d_softc;
176	}
177	ep->e_disk = disk;
178	ep->e_state = state;
179	ep->e_flags = flags;
180	ep->e_error = 0;
181	mtx_lock(&sc->sc_events_mtx);
182	TAILQ_INSERT_TAIL(&sc->sc_events, ep, e_next);
183	mtx_unlock(&sc->sc_events_mtx);
184	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
185	mtx_lock(&sc->sc_queue_mtx);
186	wakeup(sc);
187	mtx_unlock(&sc->sc_queue_mtx);
188	if ((flags & G_MIRROR_EVENT_DONTWAIT) != 0)
189		return (0);
190	g_topology_assert();
191	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, ep);
192	g_topology_unlock();
193	while ((ep->e_flags & G_MIRROR_EVENT_DONE) == 0) {
194		mtx_lock(&sc->sc_events_mtx);
195		MSLEEP(ep, &sc->sc_events_mtx, PRIBIO | PDROP, "m:event",
196		    hz * 5);
197	}
198	/* Don't even try to use 'sc' here, because it could be already dead. */
199	g_topology_lock();
200	error = ep->e_error;
201	g_mirror_event_free(ep);
202	return (error);
203}
204
205static struct g_mirror_event *
206g_mirror_event_get(struct g_mirror_softc *sc)
207{
208	struct g_mirror_event *ep;
209
210	mtx_lock(&sc->sc_events_mtx);
211	ep = TAILQ_FIRST(&sc->sc_events);
212	if (ep != NULL)
213		TAILQ_REMOVE(&sc->sc_events, ep, e_next);
214	mtx_unlock(&sc->sc_events_mtx);
215	return (ep);
216}
217
218static void
219g_mirror_event_cancel(struct g_mirror_disk *disk)
220{
221	struct g_mirror_softc *sc;
222	struct g_mirror_event *ep, *tmpep;
223
224	g_topology_assert();
225
226	sc = disk->d_softc;
227	mtx_lock(&sc->sc_events_mtx);
228	TAILQ_FOREACH_SAFE(ep, &sc->sc_events, e_next, tmpep) {
229		if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0)
230			continue;
231		if (ep->e_disk != disk)
232			continue;
233		TAILQ_REMOVE(&sc->sc_events, ep, e_next);
234		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
235			g_mirror_event_free(ep);
236		else {
237			ep->e_error = ECANCELED;
238			wakeup(ep);
239		}
240	}
241	mtx_unlock(&sc->sc_events_mtx);
242}
243
244/*
245 * Return the number of disks in given state.
246 * If state is equal to -1, count all connected disks.
247 */
248u_int
249g_mirror_ndisks(struct g_mirror_softc *sc, int state)
250{
251	struct g_mirror_disk *disk;
252	u_int n = 0;
253
254	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
255		if (state == -1 || disk->d_state == state)
256			n++;
257	}
258	return (n);
259}
260
261/*
262 * Find a disk in mirror by its disk ID.
263 */
264static struct g_mirror_disk *
265g_mirror_id2disk(struct g_mirror_softc *sc, uint32_t id)
266{
267	struct g_mirror_disk *disk;
268
269	g_topology_assert();
270
271	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
272		if (disk->d_id == id)
273			return (disk);
274	}
275	return (NULL);
276}
277
278static u_int
279g_mirror_nrequests(struct g_mirror_softc *sc, struct g_consumer *cp)
280{
281	struct bio *bp;
282	u_int nreqs = 0;
283
284	mtx_lock(&sc->sc_queue_mtx);
285	TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) {
286		if (bp->bio_from == cp)
287			nreqs++;
288	}
289	mtx_unlock(&sc->sc_queue_mtx);
290	return (nreqs);
291}
292
293static int
294g_mirror_is_busy(struct g_mirror_softc *sc, struct g_consumer *cp)
295{
296
297	if (cp->index > 0) {
298		G_MIRROR_DEBUG(2,
299		    "I/O requests for %s exist, can't destroy it now.",
300		    cp->provider->name);
301		return (1);
302	}
303	if (g_mirror_nrequests(sc, cp) > 0) {
304		G_MIRROR_DEBUG(2,
305		    "I/O requests for %s in queue, can't destroy it now.",
306		    cp->provider->name);
307		return (1);
308	}
309	return (0);
310}
311
312static void
313g_mirror_kill_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
314{
315
316	g_topology_assert();
317
318	cp->private = NULL;
319	if (g_mirror_is_busy(sc, cp))
320		return;
321	G_MIRROR_DEBUG(2, "Consumer %s destroyed.", cp->provider->name);
322	g_detach(cp);
323	g_destroy_consumer(cp);
324}
325
326static int
327g_mirror_connect_disk(struct g_mirror_disk *disk, struct g_provider *pp)
328{
329	int error;
330
331	g_topology_assert();
332	KASSERT(disk->d_consumer == NULL,
333	    ("Disk already connected (device %s).", disk->d_softc->sc_name));
334
335	disk->d_consumer = g_new_consumer(disk->d_softc->sc_geom);
336	disk->d_consumer->private = disk;
337	disk->d_consumer->index = 0;
338	error = g_attach(disk->d_consumer, pp);
339	if (error != 0)
340		return (error);
341	G_MIRROR_DEBUG(2, "Disk %s connected.", g_mirror_get_diskname(disk));
342	return (0);
343}
344
345static void
346g_mirror_disconnect_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
347{
348
349	g_topology_assert();
350
351	if (cp == NULL)
352		return;
353	if (cp->provider != NULL) {
354		G_MIRROR_DEBUG(2, "Disk %s disconnected.", cp->provider->name);
355		if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0) {
356			G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d",
357			    cp->provider->name, -cp->acr, -cp->acw, -cp->ace,
358			    0);
359			g_access(cp, -cp->acr, -cp->acw, -cp->ace);
360		}
361		g_mirror_kill_consumer(sc, cp);
362	} else {
363		g_destroy_consumer(cp);
364	}
365}
366
367/*
368 * Initialize disk. This means allocate memory, create consumer, attach it
369 * to the provider and open access (r1w1e1) to it.
370 */
371static struct g_mirror_disk *
372g_mirror_init_disk(struct g_mirror_softc *sc, struct g_provider *pp,
373    struct g_mirror_metadata *md, int *errorp)
374{
375	struct g_mirror_disk *disk;
376	int error;
377
378	disk = malloc(sizeof(*disk), M_MIRROR, M_NOWAIT | M_ZERO);
379	if (disk == NULL) {
380		error = ENOMEM;
381		goto fail;
382	}
383	disk->d_softc = sc;
384	error = g_mirror_connect_disk(disk, pp);
385	if (error != 0)
386		goto fail;
387	disk->d_id = md->md_did;
388	disk->d_state = G_MIRROR_DISK_STATE_NONE;
389	disk->d_priority = md->md_priority;
390	disk->d_delay.sec = 0;
391	disk->d_delay.frac = 0;
392	binuptime(&disk->d_last_used);
393	disk->d_flags = md->md_dflags;
394	if (md->md_provider[0] != '\0')
395		disk->d_flags |= G_MIRROR_DISK_FLAG_HARDCODED;
396	disk->d_sync.ds_consumer = NULL;
397	disk->d_sync.ds_offset = md->md_sync_offset;
398	disk->d_sync.ds_offset_done = md->md_sync_offset;
399	disk->d_sync.ds_resync = -1;
400	disk->d_sync.ds_syncid = md->md_syncid;
401	if (errorp != NULL)
402		*errorp = 0;
403	return (disk);
404fail:
405	if (errorp != NULL)
406		*errorp = error;
407	if (disk != NULL) {
408		g_mirror_disconnect_consumer(sc, disk->d_consumer);
409		free(disk, M_MIRROR);
410	}
411	return (NULL);
412}
413
414static void
415g_mirror_destroy_disk(struct g_mirror_disk *disk)
416{
417	struct g_mirror_softc *sc;
418
419	g_topology_assert();
420
421	LIST_REMOVE(disk, d_next);
422	g_mirror_event_cancel(disk);
423	sc = disk->d_softc;
424	if (sc->sc_hint == disk)
425		sc->sc_hint = NULL;
426	switch (disk->d_state) {
427	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
428		g_mirror_sync_stop(disk, 1);
429		/* FALLTHROUGH */
430	case G_MIRROR_DISK_STATE_NEW:
431	case G_MIRROR_DISK_STATE_STALE:
432	case G_MIRROR_DISK_STATE_ACTIVE:
433		g_mirror_disconnect_consumer(sc, disk->d_consumer);
434		free(disk, M_MIRROR);
435		break;
436	default:
437		KASSERT(0 == 1, ("Wrong disk state (%s, %s).",
438		    g_mirror_get_diskname(disk),
439		    g_mirror_disk_state2str(disk->d_state)));
440	}
441}
442
443static void
444g_mirror_destroy_device(struct g_mirror_softc *sc)
445{
446	struct g_mirror_disk *disk;
447	struct g_mirror_event *ep;
448	struct g_geom *gp;
449	struct g_consumer *cp, *tmpcp;
450
451	g_topology_assert();
452
453	gp = sc->sc_geom;
454	if (sc->sc_provider != NULL)
455		g_mirror_destroy_provider(sc);
456	for (disk = LIST_FIRST(&sc->sc_disks); disk != NULL;
457	    disk = LIST_FIRST(&sc->sc_disks)) {
458		g_mirror_destroy_disk(disk);
459	}
460	while ((ep = g_mirror_event_get(sc)) != NULL) {
461		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
462			g_mirror_event_free(ep);
463		else {
464			ep->e_error = ECANCELED;
465			ep->e_flags |= G_MIRROR_EVENT_DONE;
466			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, ep);
467			mtx_lock(&sc->sc_events_mtx);
468			wakeup(ep);
469			mtx_unlock(&sc->sc_events_mtx);
470		}
471	}
472	callout_drain(&sc->sc_callout);
473	gp->softc = NULL;
474
475	LIST_FOREACH_SAFE(cp, &sc->sc_sync.ds_geom->consumer, consumer, tmpcp) {
476		g_mirror_disconnect_consumer(sc, cp);
477	}
478	sc->sc_sync.ds_geom->softc = NULL;
479	g_wither_geom(sc->sc_sync.ds_geom, ENXIO);
480	mtx_destroy(&sc->sc_queue_mtx);
481	mtx_destroy(&sc->sc_events_mtx);
482	G_MIRROR_DEBUG(0, "Device %s destroyed.", gp->name);
483	g_wither_geom(gp, ENXIO);
484}
485
486static void
487g_mirror_orphan(struct g_consumer *cp)
488{
489	struct g_mirror_disk *disk;
490
491	g_topology_assert();
492
493	disk = cp->private;
494	if (disk == NULL)
495		return;
496	disk->d_softc->sc_bump_syncid = G_MIRROR_BUMP_ON_FIRST_WRITE;
497	g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
498	    G_MIRROR_EVENT_DONTWAIT);
499}
500
501static void
502g_mirror_spoiled(struct g_consumer *cp)
503{
504	struct g_mirror_disk *disk;
505
506	g_topology_assert();
507
508	disk = cp->private;
509	if (disk == NULL)
510		return;
511	disk->d_softc->sc_bump_syncid = G_MIRROR_BUMP_IMMEDIATELY;
512	g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
513	    G_MIRROR_EVENT_DONTWAIT);
514}
515
516/*
517 * Function should return the next active disk on the list.
518 * It is possible that it will be the same disk as given.
519 * If there are no active disks on list, NULL is returned.
520 */
521static __inline struct g_mirror_disk *
522g_mirror_find_next(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
523{
524	struct g_mirror_disk *dp;
525
526	for (dp = LIST_NEXT(disk, d_next); dp != disk;
527	    dp = LIST_NEXT(dp, d_next)) {
528		if (dp == NULL)
529			dp = LIST_FIRST(&sc->sc_disks);
530		if (dp->d_state == G_MIRROR_DISK_STATE_ACTIVE)
531			break;
532	}
533	if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
534		return (NULL);
535	return (dp);
536}
537
538static struct g_mirror_disk *
539g_mirror_get_disk(struct g_mirror_softc *sc)
540{
541	struct g_mirror_disk *disk;
542
543	if (sc->sc_hint == NULL) {
544		sc->sc_hint = LIST_FIRST(&sc->sc_disks);
545		if (sc->sc_hint == NULL)
546			return (NULL);
547	}
548	disk = sc->sc_hint;
549	if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) {
550		disk = g_mirror_find_next(sc, disk);
551		if (disk == NULL)
552			return (NULL);
553	}
554	sc->sc_hint = g_mirror_find_next(sc, disk);
555	return (disk);
556}
557
558static int
559g_mirror_write_metadata(struct g_mirror_disk *disk,
560    struct g_mirror_metadata *md)
561{
562	struct g_mirror_softc *sc;
563	struct g_consumer *cp;
564	off_t offset, length;
565	u_char *sector;
566	int close = 0, error = 0;
567
568	g_topology_assert();
569
570	sc = disk->d_softc;
571	cp = disk->d_consumer;
572	KASSERT(cp != NULL, ("NULL consumer (%s).", sc->sc_name));
573	KASSERT(cp->provider != NULL, ("NULL provider (%s).", sc->sc_name));
574	length = cp->provider->sectorsize;
575	offset = cp->provider->mediasize - length;
576	sector = malloc((size_t)length, M_MIRROR, M_WAITOK | M_ZERO);
577	/*
578	 * Open consumer if it wasn't opened and remember to close it.
579	 */
580	if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) == 0) {
581		error = g_access(cp, 0, 1, 1);
582		G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d",
583		    cp->provider->name, 0, 1, 1, error);
584		if (error == 0)
585			close = 1;
586#ifdef	INVARIANTS
587	} else {
588		KASSERT(cp->acw > 0 && cp->ace > 0,
589		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
590		    cp->acr, cp->acw, cp->ace));
591#endif
592	}
593	if (error == 0) {
594		if (md != NULL)
595			mirror_metadata_encode(md, sector);
596		g_topology_unlock();
597		error = g_write_data(cp, offset, sector, length);
598		g_topology_lock();
599	}
600	free(sector, M_MIRROR);
601	if (close) {
602		g_access(cp, 0, -1, -1);
603		G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d",
604		    cp->provider->name, 0, -1, -1, 0);
605	}
606	if (error != 0) {
607		disk->d_softc->sc_bump_syncid = G_MIRROR_BUMP_IMMEDIATELY;
608		g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
609		    G_MIRROR_EVENT_DONTWAIT);
610	}
611	return (error);
612}
613
614static int
615g_mirror_clear_metadata(struct g_mirror_disk *disk)
616{
617	int error;
618
619	g_topology_assert();
620	error = g_mirror_write_metadata(disk, NULL);
621	if (error == 0) {
622		G_MIRROR_DEBUG(2, "Metadata on %s cleared.",
623		    g_mirror_get_diskname(disk));
624	} else {
625		G_MIRROR_DEBUG(0,
626		    "Cannot clear metadata on disk %s (error=%d).",
627		    g_mirror_get_diskname(disk), error);
628	}
629	return (error);
630}
631
632void
633g_mirror_fill_metadata(struct g_mirror_softc *sc, struct g_mirror_disk *disk,
634    struct g_mirror_metadata *md)
635{
636
637	strlcpy(md->md_magic, G_MIRROR_MAGIC, sizeof(md->md_magic));
638	md->md_version = G_MIRROR_VERSION;
639	strlcpy(md->md_name, sc->sc_name, sizeof(md->md_name));
640	md->md_mid = sc->sc_id;
641	md->md_all = sc->sc_ndisks;
642	md->md_slice = sc->sc_slice;
643	md->md_balance = sc->sc_balance;
644	md->md_mediasize = sc->sc_mediasize;
645	md->md_sectorsize = sc->sc_sectorsize;
646	md->md_mflags = (sc->sc_flags & G_MIRROR_DEVICE_FLAG_MASK);
647	bzero(md->md_provider, sizeof(md->md_provider));
648	if (disk == NULL) {
649		md->md_did = arc4random();
650		md->md_priority = 0;
651		md->md_syncid = 0;
652		md->md_dflags = 0;
653		md->md_sync_offset = 0;
654	} else {
655		md->md_did = disk->d_id;
656		md->md_priority = disk->d_priority;
657		md->md_syncid = disk->d_sync.ds_syncid;
658		md->md_dflags = (disk->d_flags & G_MIRROR_DISK_FLAG_MASK);
659		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
660			md->md_sync_offset = disk->d_sync.ds_offset_done;
661		else
662			md->md_sync_offset = 0;
663		if ((disk->d_flags & G_MIRROR_DISK_FLAG_HARDCODED) != 0) {
664			strlcpy(md->md_provider,
665			    disk->d_consumer->provider->name,
666			    sizeof(md->md_provider));
667		}
668	}
669}
670
671void
672g_mirror_update_metadata(struct g_mirror_disk *disk)
673{
674	struct g_mirror_metadata md;
675	int error;
676
677	g_topology_assert();
678	g_mirror_fill_metadata(disk->d_softc, disk, &md);
679	error = g_mirror_write_metadata(disk, &md);
680	if (error == 0) {
681		G_MIRROR_DEBUG(2, "Metadata on %s updated.",
682		    g_mirror_get_diskname(disk));
683	} else {
684		G_MIRROR_DEBUG(0,
685		    "Cannot update metadata on disk %s (error=%d).",
686		    g_mirror_get_diskname(disk), error);
687	}
688}
689
690static void
691g_mirror_bump_syncid(struct g_mirror_softc *sc)
692{
693	struct g_mirror_disk *disk;
694
695	g_topology_assert();
696	KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
697	    ("%s called with no active disks (device=%s).", __func__,
698	    sc->sc_name));
699
700	sc->sc_syncid++;
701	G_MIRROR_DEBUG(1, "Device %s: syncid bumped to %u.", sc->sc_name,
702	    sc->sc_syncid);
703	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
704		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
705		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
706			disk->d_sync.ds_syncid = sc->sc_syncid;
707			g_mirror_update_metadata(disk);
708		}
709	}
710}
711
712static void
713g_mirror_idle(struct g_mirror_softc *sc)
714{
715	struct g_mirror_disk *disk;
716
717	if (sc->sc_provider == NULL || sc->sc_provider->acw == 0)
718		return;
719	sc->sc_idle = 1;
720	g_topology_lock();
721	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
722		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
723			continue;
724		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as clean.",
725		    g_mirror_get_diskname(disk), sc->sc_name);
726		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
727		g_mirror_update_metadata(disk);
728	}
729	g_topology_unlock();
730}
731
732static void
733g_mirror_unidle(struct g_mirror_softc *sc)
734{
735	struct g_mirror_disk *disk;
736
737	sc->sc_idle = 0;
738	g_topology_lock();
739	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
740		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
741			continue;
742		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as dirty.",
743		    g_mirror_get_diskname(disk), sc->sc_name);
744		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
745		g_mirror_update_metadata(disk);
746	}
747	g_topology_unlock();
748}
749
750static __inline int
751bintime_cmp(struct bintime *bt1, struct bintime *bt2)
752{
753
754	if (bt1->sec < bt2->sec)
755		return (-1);
756	else if (bt1->sec > bt2->sec)
757		return (1);
758	if (bt1->frac < bt2->frac)
759		return (-1);
760	else if (bt1->frac > bt2->frac)
761		return (1);
762	return (0);
763}
764
765static void
766g_mirror_update_delay(struct g_mirror_disk *disk, struct bio *bp)
767{
768
769	if (disk->d_softc->sc_balance != G_MIRROR_BALANCE_LOAD)
770		return;
771	binuptime(&disk->d_delay);
772	bintime_sub(&disk->d_delay, &bp->bio_t0);
773}
774
775static void
776g_mirror_done(struct bio *bp)
777{
778	struct g_mirror_softc *sc;
779
780	sc = bp->bio_from->geom->softc;
781	bp->bio_cflags |= G_MIRROR_BIO_FLAG_REGULAR;
782	mtx_lock(&sc->sc_queue_mtx);
783	bioq_disksort(&sc->sc_queue, bp);
784	wakeup(sc);
785	mtx_unlock(&sc->sc_queue_mtx);
786}
787
788static void
789g_mirror_regular_request(struct bio *bp)
790{
791	struct g_mirror_softc *sc;
792	struct g_mirror_disk *disk;
793	struct bio *pbp;
794
795	g_topology_assert_not();
796
797	bp->bio_from->index--;
798	pbp = bp->bio_parent;
799	sc = pbp->bio_to->geom->softc;
800	disk = bp->bio_from->private;
801	if (disk == NULL) {
802		g_topology_lock();
803		g_mirror_kill_consumer(sc, bp->bio_from);
804		g_topology_unlock();
805	} else {
806		g_mirror_update_delay(disk, bp);
807	}
808
809	pbp->bio_inbed++;
810	KASSERT(pbp->bio_inbed <= pbp->bio_children,
811	    ("bio_inbed (%u) is bigger than bio_children (%u).", pbp->bio_inbed,
812	    pbp->bio_children));
813	if (bp->bio_error == 0 && pbp->bio_error == 0) {
814		G_MIRROR_LOGREQ(3, bp, "Request delivered.");
815		g_destroy_bio(bp);
816		if (pbp->bio_children == pbp->bio_inbed) {
817			G_MIRROR_LOGREQ(3, pbp, "Request delivered.");
818			pbp->bio_completed = pbp->bio_length;
819			g_io_deliver(pbp, pbp->bio_error);
820		}
821		return;
822	} else if (bp->bio_error != 0) {
823		if (pbp->bio_error == 0)
824			pbp->bio_error = bp->bio_error;
825		G_MIRROR_LOGREQ(0, bp, "Request failed (error=%d).",
826		    bp->bio_error);
827		if (disk != NULL) {
828			sc->sc_bump_syncid = G_MIRROR_BUMP_IMMEDIATELY;
829			g_mirror_event_send(disk,
830			    G_MIRROR_DISK_STATE_DISCONNECTED,
831			    G_MIRROR_EVENT_DONTWAIT);
832		}
833		switch (pbp->bio_cmd) {
834		case BIO_DELETE:
835		case BIO_WRITE:
836			pbp->bio_inbed--;
837			pbp->bio_children--;
838			break;
839		}
840	}
841	g_destroy_bio(bp);
842
843	switch (pbp->bio_cmd) {
844	case BIO_READ:
845		if (pbp->bio_children == pbp->bio_inbed) {
846			pbp->bio_error = 0;
847			mtx_lock(&sc->sc_queue_mtx);
848			bioq_disksort(&sc->sc_queue, pbp);
849			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
850			wakeup(sc);
851			mtx_unlock(&sc->sc_queue_mtx);
852		}
853		break;
854	case BIO_DELETE:
855	case BIO_WRITE:
856		if (pbp->bio_children == 0) {
857			/*
858			 * All requests failed.
859			 */
860		} else if (pbp->bio_inbed < pbp->bio_children) {
861			/* Do nothing. */
862			break;
863		} else if (pbp->bio_children == pbp->bio_inbed) {
864			/* Some requests succeeded. */
865			pbp->bio_error = 0;
866			pbp->bio_completed = pbp->bio_length;
867		}
868		g_io_deliver(pbp, pbp->bio_error);
869		break;
870	default:
871		KASSERT(1 == 0, ("Invalid request: %u.", pbp->bio_cmd));
872		break;
873	}
874}
875
876static void
877g_mirror_sync_done(struct bio *bp)
878{
879	struct g_mirror_softc *sc;
880
881	G_MIRROR_LOGREQ(3, bp, "Synchronization request delivered.");
882	sc = bp->bio_from->geom->softc;
883	bp->bio_cflags |= G_MIRROR_BIO_FLAG_SYNC;
884	mtx_lock(&sc->sc_queue_mtx);
885	bioq_disksort(&sc->sc_queue, bp);
886	wakeup(sc);
887	mtx_unlock(&sc->sc_queue_mtx);
888}
889
890static void
891g_mirror_start(struct bio *bp)
892{
893	struct g_mirror_softc *sc;
894
895	sc = bp->bio_to->geom->softc;
896	/*
897	 * If sc == NULL or there are no valid disks, provider's error
898	 * should be set and g_mirror_start() should not be called at all.
899	 */
900	KASSERT(sc != NULL && sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
901	    ("Provider's error should be set (error=%d)(mirror=%s).",
902	    bp->bio_to->error, bp->bio_to->name));
903	G_MIRROR_LOGREQ(3, bp, "Request received.");
904
905	switch (bp->bio_cmd) {
906	case BIO_READ:
907	case BIO_WRITE:
908	case BIO_DELETE:
909		break;
910	case BIO_GETATTR:
911	default:
912		g_io_deliver(bp, EOPNOTSUPP);
913		return;
914	}
915	mtx_lock(&sc->sc_queue_mtx);
916	bioq_disksort(&sc->sc_queue, bp);
917	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
918	wakeup(sc);
919	mtx_unlock(&sc->sc_queue_mtx);
920}
921
922/*
923 * Send one synchronization request.
924 */
925static void
926g_mirror_sync_one(struct g_mirror_disk *disk)
927{
928	struct g_mirror_softc *sc;
929	struct bio *bp;
930
931	sc = disk->d_softc;
932	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
933	    ("Disk %s is not marked for synchronization.",
934	    g_mirror_get_diskname(disk)));
935
936	bp = g_new_bio();
937	if (bp == NULL)
938		return;
939	bp->bio_parent = NULL;
940	bp->bio_cmd = BIO_READ;
941	bp->bio_offset = disk->d_sync.ds_offset;
942	bp->bio_length = MIN(MAXPHYS, sc->sc_mediasize - bp->bio_offset);
943	bp->bio_cflags = 0;
944	bp->bio_done = g_mirror_sync_done;
945	bp->bio_data = disk->d_sync.ds_data;
946	if (bp->bio_data == NULL) {
947		g_destroy_bio(bp);
948		return;
949	}
950	disk->d_sync.ds_offset += bp->bio_length;
951	bp->bio_to = sc->sc_provider;
952	G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
953	disk->d_sync.ds_consumer->index++;
954	g_io_request(bp, disk->d_sync.ds_consumer);
955}
956
957static void
958g_mirror_sync_request(struct bio *bp)
959{
960	struct g_mirror_softc *sc;
961	struct g_mirror_disk *disk;
962
963	bp->bio_from->index--;
964	sc = bp->bio_from->geom->softc;
965	disk = bp->bio_from->private;
966	if (disk == NULL) {
967		g_topology_lock();
968		g_mirror_kill_consumer(sc, bp->bio_from);
969		g_topology_unlock();
970		g_destroy_bio(bp);
971		return;
972	}
973
974	/*
975	 * Synchronization request.
976	 */
977	switch (bp->bio_cmd) {
978	case BIO_READ:
979	    {
980		struct g_consumer *cp;
981
982		if (bp->bio_error != 0) {
983			G_MIRROR_LOGREQ(0, bp,
984			    "Synchronization request failed (error=%d).",
985			    bp->bio_error);
986			g_destroy_bio(bp);
987			return;
988		}
989		G_MIRROR_LOGREQ(3, bp,
990		    "Synchronization request half-finished.");
991		bp->bio_cmd = BIO_WRITE;
992		bp->bio_cflags = 0;
993		cp = disk->d_consumer;
994		KASSERT(cp->acr == 0 && cp->acw == 1 && cp->ace == 1,
995		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
996		    cp->acr, cp->acw, cp->ace));
997		cp->index++;
998		g_io_request(bp, cp);
999		return;
1000	    }
1001	case BIO_WRITE:
1002	    {
1003		struct g_mirror_disk_sync *sync;
1004
1005		if (bp->bio_error != 0) {
1006			G_MIRROR_LOGREQ(0, bp,
1007			    "Synchronization request failed (error=%d).",
1008			    bp->bio_error);
1009			g_destroy_bio(bp);
1010			sc->sc_bump_syncid = G_MIRROR_BUMP_IMMEDIATELY;
1011			g_mirror_event_send(disk,
1012			    G_MIRROR_DISK_STATE_DISCONNECTED,
1013			    G_MIRROR_EVENT_DONTWAIT);
1014			return;
1015		}
1016		G_MIRROR_LOGREQ(3, bp, "Synchronization request finished.");
1017		sync = &disk->d_sync;
1018		sync->ds_offset_done = bp->bio_offset + bp->bio_length;
1019		g_destroy_bio(bp);
1020		if (sync->ds_resync != -1)
1021			break;
1022		if (sync->ds_offset_done == sc->sc_provider->mediasize) {
1023			/*
1024			 * Disk up-to-date, activate it.
1025			 */
1026			g_mirror_event_send(disk, G_MIRROR_DISK_STATE_ACTIVE,
1027			    G_MIRROR_EVENT_DONTWAIT);
1028			return;
1029		} else if (sync->ds_offset_done % (MAXPHYS * 100) == 0) {
1030			/*
1031			 * Update offset_done on every 100 blocks.
1032			 * XXX: This should be configurable.
1033			 */
1034			g_topology_lock();
1035			g_mirror_update_metadata(disk);
1036			g_topology_unlock();
1037		}
1038		return;
1039	    }
1040	default:
1041		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
1042		    bp->bio_cmd, sc->sc_name));
1043		break;
1044	}
1045}
1046
1047static void
1048g_mirror_request_prefer(struct g_mirror_softc *sc, struct bio *bp)
1049{
1050	struct g_mirror_disk *disk;
1051	struct g_consumer *cp;
1052	struct bio *cbp;
1053
1054	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1055		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE)
1056			break;
1057	}
1058	if (disk == NULL) {
1059		if (bp->bio_error == 0)
1060			bp->bio_error = ENXIO;
1061		g_io_deliver(bp, bp->bio_error);
1062		return;
1063	}
1064	cbp = g_clone_bio(bp);
1065	if (cbp == NULL) {
1066		if (bp->bio_error == 0)
1067			bp->bio_error = ENOMEM;
1068		g_io_deliver(bp, bp->bio_error);
1069		return;
1070	}
1071	/*
1072	 * Fill in the component buf structure.
1073	 */
1074	cp = disk->d_consumer;
1075	cbp->bio_done = g_mirror_done;
1076	cbp->bio_to = cp->provider;
1077	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1078	KASSERT(cp->acr > 0 && cp->ace > 0,
1079	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1080	    cp->acw, cp->ace));
1081	cp->index++;
1082	g_io_request(cbp, cp);
1083}
1084
1085static void
1086g_mirror_request_round_robin(struct g_mirror_softc *sc, struct bio *bp)
1087{
1088	struct g_mirror_disk *disk;
1089	struct g_consumer *cp;
1090	struct bio *cbp;
1091
1092	disk = g_mirror_get_disk(sc);
1093	if (disk == NULL) {
1094		if (bp->bio_error == 0)
1095			bp->bio_error = ENXIO;
1096		g_io_deliver(bp, bp->bio_error);
1097		return;
1098	}
1099	cbp = g_clone_bio(bp);
1100	if (cbp == NULL) {
1101		if (bp->bio_error == 0)
1102			bp->bio_error = ENOMEM;
1103		g_io_deliver(bp, bp->bio_error);
1104		return;
1105	}
1106	/*
1107	 * Fill in the component buf structure.
1108	 */
1109	cp = disk->d_consumer;
1110	cbp->bio_done = g_mirror_done;
1111	cbp->bio_to = cp->provider;
1112	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1113	KASSERT(cp->acr > 0 && cp->ace > 0,
1114	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1115	    cp->acw, cp->ace));
1116	cp->index++;
1117	g_io_request(cbp, cp);
1118}
1119
1120static void
1121g_mirror_request_load(struct g_mirror_softc *sc, struct bio *bp)
1122{
1123	struct g_mirror_disk *disk, *dp;
1124	struct g_consumer *cp;
1125	struct bio *cbp;
1126	struct bintime curtime;
1127
1128	binuptime(&curtime);
1129	/*
1130	 * Find a disk which the smallest load.
1131	 */
1132	disk = NULL;
1133	LIST_FOREACH(dp, &sc->sc_disks, d_next) {
1134		if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1135			continue;
1136		/* If disk wasn't used for more than 2 sec, use it. */
1137		if (curtime.sec - dp->d_last_used.sec >= 2) {
1138			disk = dp;
1139			break;
1140		}
1141		if (disk == NULL ||
1142		    bintime_cmp(&dp->d_delay, &disk->d_delay) < 0) {
1143			disk = dp;
1144		}
1145	}
1146	cbp = g_clone_bio(bp);
1147	if (cbp == NULL) {
1148		if (bp->bio_error == 0)
1149			bp->bio_error = ENOMEM;
1150		g_io_deliver(bp, bp->bio_error);
1151		return;
1152	}
1153	/*
1154	 * Fill in the component buf structure.
1155	 */
1156	cp = disk->d_consumer;
1157	cbp->bio_done = g_mirror_done;
1158	cbp->bio_to = cp->provider;
1159	binuptime(&disk->d_last_used);
1160	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1161	KASSERT(cp->acr > 0 && cp->ace > 0,
1162	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1163	    cp->acw, cp->ace));
1164	cp->index++;
1165	g_io_request(cbp, cp);
1166}
1167
1168static void
1169g_mirror_request_split(struct g_mirror_softc *sc, struct bio *bp)
1170{
1171	struct bio_queue_head queue;
1172	struct g_mirror_disk *disk;
1173	struct g_consumer *cp;
1174	struct bio *cbp;
1175	off_t left, mod, offset, slice;
1176	u_char *data;
1177	u_int ndisks;
1178
1179	if (bp->bio_length <= sc->sc_slice) {
1180		g_mirror_request_round_robin(sc, bp);
1181		return;
1182	}
1183	ndisks = g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE);
1184	slice = bp->bio_length / ndisks;
1185	mod = slice % sc->sc_provider->sectorsize;
1186	if (mod != 0)
1187		slice += sc->sc_provider->sectorsize - mod;
1188	/*
1189	 * Allocate all bios before sending any request, so we can
1190	 * return ENOMEM in nice and clean way.
1191	 */
1192	left = bp->bio_length;
1193	offset = bp->bio_offset;
1194	data = bp->bio_data;
1195	bioq_init(&queue);
1196	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1197		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1198			continue;
1199		cbp = g_clone_bio(bp);
1200		if (cbp == NULL) {
1201			for (cbp = bioq_first(&queue); cbp != NULL;
1202			    cbp = bioq_first(&queue)) {
1203				bioq_remove(&queue, cbp);
1204				g_destroy_bio(cbp);
1205			}
1206			if (bp->bio_error == 0)
1207				bp->bio_error = ENOMEM;
1208			g_io_deliver(bp, bp->bio_error);
1209			return;
1210		}
1211		bioq_insert_tail(&queue, cbp);
1212		cbp->bio_done = g_mirror_done;
1213		cbp->bio_caller1 = disk;
1214		cbp->bio_to = disk->d_consumer->provider;
1215		cbp->bio_offset = offset;
1216		cbp->bio_data = data;
1217		cbp->bio_length = MIN(left, slice);
1218		left -= cbp->bio_length;
1219		if (left == 0)
1220			break;
1221		offset += cbp->bio_length;
1222		data += cbp->bio_length;
1223	}
1224	for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) {
1225		bioq_remove(&queue, cbp);
1226		G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1227		disk = cbp->bio_caller1;
1228		cbp->bio_caller1 = NULL;
1229		cp = disk->d_consumer;
1230		KASSERT(cp->acr > 0 && cp->ace > 0,
1231		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1232		    cp->acr, cp->acw, cp->ace));
1233		disk->d_consumer->index++;
1234		g_io_request(cbp, disk->d_consumer);
1235	}
1236}
1237
1238static void
1239g_mirror_register_request(struct bio *bp)
1240{
1241	struct g_mirror_softc *sc;
1242
1243	sc = bp->bio_to->geom->softc;
1244	switch (bp->bio_cmd) {
1245	case BIO_READ:
1246		switch (sc->sc_balance) {
1247		case G_MIRROR_BALANCE_LOAD:
1248			g_mirror_request_load(sc, bp);
1249			break;
1250		case G_MIRROR_BALANCE_PREFER:
1251			g_mirror_request_prefer(sc, bp);
1252			break;
1253		case G_MIRROR_BALANCE_ROUND_ROBIN:
1254			g_mirror_request_round_robin(sc, bp);
1255			break;
1256		case G_MIRROR_BALANCE_SPLIT:
1257			g_mirror_request_split(sc, bp);
1258			break;
1259		}
1260		return;
1261	case BIO_WRITE:
1262	case BIO_DELETE:
1263	    {
1264		struct g_mirror_disk *disk;
1265		struct g_mirror_disk_sync *sync;
1266		struct bio_queue_head queue;
1267		struct g_consumer *cp;
1268		struct bio *cbp;
1269
1270		if (sc->sc_idle)
1271			g_mirror_unidle(sc);
1272		/*
1273		 * Allocate all bios before sending any request, so we can
1274		 * return ENOMEM in nice and clean way.
1275		 */
1276		bioq_init(&queue);
1277		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1278			sync = &disk->d_sync;
1279			switch (disk->d_state) {
1280			case G_MIRROR_DISK_STATE_ACTIVE:
1281				break;
1282			case G_MIRROR_DISK_STATE_SYNCHRONIZING:
1283				if (bp->bio_offset >= sync->ds_offset)
1284					continue;
1285				else if (bp->bio_offset + bp->bio_length >
1286				    sync->ds_offset_done &&
1287				    (bp->bio_offset < sync->ds_resync ||
1288				     sync->ds_resync == -1)) {
1289					sync->ds_resync = bp->bio_offset -
1290					    (bp->bio_offset % MAXPHYS);
1291				}
1292				break;
1293			default:
1294				continue;
1295			}
1296			cbp = g_clone_bio(bp);
1297			if (cbp == NULL) {
1298				for (cbp = bioq_first(&queue); cbp != NULL;
1299				    cbp = bioq_first(&queue)) {
1300					bioq_remove(&queue, cbp);
1301					g_destroy_bio(cbp);
1302				}
1303				if (bp->bio_error == 0)
1304					bp->bio_error = ENOMEM;
1305				g_io_deliver(bp, bp->bio_error);
1306				return;
1307			}
1308			bioq_insert_tail(&queue, cbp);
1309			cbp->bio_done = g_mirror_done;
1310			cp = disk->d_consumer;
1311			cbp->bio_caller1 = cp;
1312			cbp->bio_to = cp->provider;
1313			KASSERT(cp->acw > 0 && cp->ace > 0,
1314			    ("Consumer %s not opened (r%dw%de%d).",
1315			    cp->provider->name, cp->acr, cp->acw, cp->ace));
1316		}
1317		for (cbp = bioq_first(&queue); cbp != NULL;
1318		    cbp = bioq_first(&queue)) {
1319			bioq_remove(&queue, cbp);
1320			G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1321			cp = cbp->bio_caller1;
1322			cbp->bio_caller1 = NULL;
1323			cp->index++;
1324			g_io_request(cbp, cp);
1325		}
1326		/*
1327		 * Bump syncid on first write.
1328		 */
1329		if (sc->sc_bump_syncid == G_MIRROR_BUMP_ON_FIRST_WRITE) {
1330			sc->sc_bump_syncid = 0;
1331			g_topology_lock();
1332			g_mirror_bump_syncid(sc);
1333			g_topology_unlock();
1334		}
1335		return;
1336	    }
1337	default:
1338		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
1339		    bp->bio_cmd, sc->sc_name));
1340		break;
1341	}
1342}
1343
1344static int
1345g_mirror_can_destroy(struct g_mirror_softc *sc)
1346{
1347	struct g_geom *gp;
1348	struct g_consumer *cp;
1349
1350	g_topology_assert();
1351	gp = sc->sc_geom;
1352	LIST_FOREACH(cp, &gp->consumer, consumer) {
1353		if (g_mirror_is_busy(sc, cp))
1354			return (0);
1355	}
1356	gp = sc->sc_sync.ds_geom;
1357	LIST_FOREACH(cp, &gp->consumer, consumer) {
1358		if (g_mirror_is_busy(sc, cp))
1359			return (0);
1360	}
1361	G_MIRROR_DEBUG(2, "No I/O requests for %s, it can be destroyed.",
1362	    sc->sc_name);
1363	return (1);
1364}
1365
1366static int
1367g_mirror_try_destroy(struct g_mirror_softc *sc)
1368{
1369
1370	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_WAIT) != 0) {
1371		g_topology_lock();
1372		if (!g_mirror_can_destroy(sc)) {
1373			g_topology_unlock();
1374			return (0);
1375		}
1376		g_topology_unlock();
1377		G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1378		    &sc->sc_worker);
1379		wakeup(&sc->sc_worker);
1380		sc->sc_worker = NULL;
1381	} else {
1382		g_topology_lock();
1383		if (!g_mirror_can_destroy(sc)) {
1384			g_topology_unlock();
1385			return (0);
1386		}
1387		g_mirror_destroy_device(sc);
1388		g_topology_unlock();
1389		free(sc, M_MIRROR);
1390	}
1391	return (1);
1392}
1393
1394/*
1395 * Worker thread.
1396 */
1397static void
1398g_mirror_worker(void *arg)
1399{
1400	struct g_mirror_softc *sc;
1401	struct g_mirror_disk *disk;
1402	struct g_mirror_disk_sync *sync;
1403	struct g_mirror_event *ep;
1404	struct bio *bp;
1405	u_int nreqs;
1406
1407	sc = arg;
1408	curthread->td_base_pri = PRIBIO;
1409
1410	nreqs = 0;
1411	for (;;) {
1412		G_MIRROR_DEBUG(5, "%s: Let's see...", __func__);
1413		/*
1414		 * First take a look at events.
1415		 * This is important to handle events before any I/O requests.
1416		 */
1417		ep = g_mirror_event_get(sc);
1418		if (ep != NULL) {
1419			g_topology_lock();
1420			if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0) {
1421				/* Update only device status. */
1422				G_MIRROR_DEBUG(3,
1423				    "Running event for device %s.",
1424				    sc->sc_name);
1425				ep->e_error = 0;
1426				g_mirror_update_device(sc, 1);
1427			} else {
1428				/* Update disk status. */
1429				G_MIRROR_DEBUG(3, "Running event for disk %s.",
1430				     g_mirror_get_diskname(ep->e_disk));
1431				ep->e_error = g_mirror_update_disk(ep->e_disk,
1432				    ep->e_state);
1433				if (ep->e_error == 0)
1434					g_mirror_update_device(sc, 0);
1435			}
1436			g_topology_unlock();
1437			if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0) {
1438				KASSERT(ep->e_error == 0,
1439				    ("Error cannot be handled."));
1440				g_mirror_event_free(ep);
1441			} else {
1442				ep->e_flags |= G_MIRROR_EVENT_DONE;
1443				G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1444				    ep);
1445				mtx_lock(&sc->sc_events_mtx);
1446				wakeup(ep);
1447				mtx_unlock(&sc->sc_events_mtx);
1448			}
1449			if ((sc->sc_flags &
1450			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1451				if (g_mirror_try_destroy(sc))
1452					kthread_exit(0);
1453			}
1454			G_MIRROR_DEBUG(5, "%s: I'm here 1.", __func__);
1455			continue;
1456		}
1457		/*
1458		 * Now I/O requests.
1459		 */
1460		/* Get first request from the queue. */
1461		mtx_lock(&sc->sc_queue_mtx);
1462		bp = bioq_first(&sc->sc_queue);
1463		if (bp == NULL) {
1464			if ((sc->sc_flags &
1465			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1466				mtx_unlock(&sc->sc_queue_mtx);
1467				if (g_mirror_try_destroy(sc))
1468					kthread_exit(0);
1469				mtx_lock(&sc->sc_queue_mtx);
1470			}
1471		}
1472		if (sc->sc_sync.ds_ndisks > 0 &&
1473		    (bp == NULL || nreqs > g_mirror_reqs_per_sync)) {
1474			mtx_unlock(&sc->sc_queue_mtx);
1475			/*
1476			 * It is time for synchronization...
1477			 */
1478			nreqs = 0;
1479			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1480				if (disk->d_state !=
1481				    G_MIRROR_DISK_STATE_SYNCHRONIZING) {
1482					continue;
1483				}
1484				sync = &disk->d_sync;
1485				if (sync->ds_offset >=
1486				    sc->sc_provider->mediasize) {
1487					continue;
1488				}
1489				if (sync->ds_offset > sync->ds_offset_done)
1490					continue;
1491				if (sync->ds_resync != -1) {
1492					sync->ds_offset = sync->ds_resync;
1493					sync->ds_offset_done = sync->ds_resync;
1494					sync->ds_resync = -1;
1495				}
1496				g_mirror_sync_one(disk);
1497			}
1498			G_MIRROR_DEBUG(5, "%s: I'm here 2.", __func__);
1499			goto sleep;
1500		}
1501		if (bp == NULL) {
1502#define	G_MIRROR_IS_IDLE(sc)	((sc)->sc_idle ||			\
1503				 ((sc)->sc_provider != NULL &&		\
1504				  (sc)->sc_provider->acw == 0))
1505			if (G_MIRROR_IS_IDLE(sc)) {
1506				/*
1507				 * If we're already in idle state, sleep without
1508				 * a timeout.
1509				 */
1510				MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP,
1511				    "m:w1", 0);
1512				G_MIRROR_DEBUG(5, "%s: I'm here 3.", __func__);
1513			} else {
1514				u_int idletime;
1515
1516				idletime = g_mirror_idletime;
1517				if (idletime == 0)
1518					idletime = 1;
1519				idletime *= hz;
1520				if (msleep(sc, &sc->sc_queue_mtx, PRIBIO | PDROP,
1521				    "m:w2", idletime) == EWOULDBLOCK) {
1522					G_MIRROR_DEBUG(5, "%s: I'm here 4.",
1523					    __func__);
1524					/*
1525					 * No I/O requests in 5 seconds, so mark
1526					 * components as clean.
1527					 */
1528					g_mirror_idle(sc);
1529				}
1530				G_MIRROR_DEBUG(5, "%s: I'm here 5.", __func__);
1531			}
1532			continue;
1533		}
1534		nreqs++;
1535		bioq_remove(&sc->sc_queue, bp);
1536		mtx_unlock(&sc->sc_queue_mtx);
1537
1538		if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_REGULAR) != 0) {
1539			g_mirror_regular_request(bp);
1540		} else if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0) {
1541			u_int timeout, sps;
1542
1543			g_mirror_sync_request(bp);
1544sleep:
1545			sps = g_mirror_syncs_per_sec;
1546			if (sps == 0) {
1547				G_MIRROR_DEBUG(5, "%s: I'm here 6.", __func__);
1548				continue;
1549			}
1550			mtx_lock(&sc->sc_queue_mtx);
1551			if (bioq_first(&sc->sc_queue) != NULL) {
1552				mtx_unlock(&sc->sc_queue_mtx);
1553				G_MIRROR_DEBUG(5, "%s: I'm here 7.", __func__);
1554				continue;
1555			}
1556			timeout = hz / sps;
1557			if (timeout == 0)
1558				timeout = 1;
1559			MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "m:w3",
1560			    timeout);
1561		} else {
1562			g_mirror_register_request(bp);
1563		}
1564		G_MIRROR_DEBUG(5, "%s: I'm here 8.", __func__);
1565	}
1566}
1567
1568/*
1569 * Open disk's consumer if needed.
1570 */
1571static void
1572g_mirror_update_access(struct g_mirror_disk *disk)
1573{
1574	struct g_provider *pp;
1575	struct g_consumer *cp;
1576	int acr, acw, ace, cpw, error;
1577
1578	g_topology_assert();
1579
1580	cp = disk->d_consumer;
1581	pp = disk->d_softc->sc_provider;
1582	if (pp == NULL) {
1583		acr = -cp->acr;
1584		acw = -cp->acw;
1585		ace = -cp->ace;
1586	} else {
1587		acr = pp->acr - cp->acr;
1588		acw = pp->acw - cp->acw;
1589		ace = pp->ace - cp->ace;
1590		/* Grab an extra "exclusive" bit. */
1591		if (pp->acr > 0 || pp->acw > 0 || pp->ace > 0)
1592			ace++;
1593	}
1594	if (acr == 0 && acw == 0 && ace == 0)
1595		return;
1596	cpw = cp->acw;
1597	error = g_access(cp, acr, acw, ace);
1598	G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", cp->provider->name, acr,
1599	    acw, ace, error);
1600	if (error != 0) {
1601		disk->d_softc->sc_bump_syncid = G_MIRROR_BUMP_ON_FIRST_WRITE;
1602		g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
1603		    G_MIRROR_EVENT_DONTWAIT);
1604		return;
1605	}
1606	if (cpw == 0 && cp->acw > 0) {
1607		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as dirty.",
1608		    g_mirror_get_diskname(disk), disk->d_softc->sc_name);
1609		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
1610	} else if (cpw > 0 && cp->acw == 0) {
1611		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as clean.",
1612		    g_mirror_get_diskname(disk), disk->d_softc->sc_name);
1613		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
1614	}
1615}
1616
1617static void
1618g_mirror_sync_start(struct g_mirror_disk *disk)
1619{
1620	struct g_mirror_softc *sc;
1621	struct g_consumer *cp;
1622	int error;
1623
1624	g_topology_assert();
1625
1626	sc = disk->d_softc;
1627	KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
1628	    ("Device not in RUNNING state (%s, %u).", sc->sc_name,
1629	    sc->sc_state));
1630	cp = disk->d_consumer;
1631	KASSERT(cp->acr == 0 && cp->acw == 0 && cp->ace == 0,
1632	    ("Consumer %s already opened.", cp->provider->name));
1633
1634	G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s.", sc->sc_name,
1635	    g_mirror_get_diskname(disk));
1636	error = g_access(cp, 0, 1, 1);
1637	G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", cp->provider->name, 0, 1,
1638	    1, error);
1639	if (error != 0) {
1640		g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
1641		    G_MIRROR_EVENT_DONTWAIT);
1642		return;
1643	}
1644	disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
1645	KASSERT(disk->d_sync.ds_consumer == NULL,
1646	    ("Sync consumer already exists (device=%s, disk=%s).",
1647	    sc->sc_name, g_mirror_get_diskname(disk)));
1648	disk->d_sync.ds_consumer = g_new_consumer(sc->sc_sync.ds_geom);
1649	disk->d_sync.ds_consumer->private = disk;
1650	disk->d_sync.ds_consumer->index = 0;
1651	error = g_attach(disk->d_sync.ds_consumer, disk->d_softc->sc_provider);
1652	KASSERT(error == 0, ("Cannot attach to %s (error=%d).",
1653	    disk->d_softc->sc_name, error));
1654	error = g_access(disk->d_sync.ds_consumer, 1, 0, 0);
1655	KASSERT(error == 0, ("Cannot open %s (error=%d).",
1656	    disk->d_softc->sc_name, error));
1657	disk->d_sync.ds_data = malloc(MAXPHYS, M_MIRROR, M_WAITOK);
1658	sc->sc_sync.ds_ndisks++;
1659}
1660
1661/*
1662 * Stop synchronization process.
1663 * type: 0 - synchronization finished
1664 *       1 - synchronization stopped
1665 */
1666static void
1667g_mirror_sync_stop(struct g_mirror_disk *disk, int type)
1668{
1669	struct g_consumer *cp;
1670
1671	g_topology_assert();
1672	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
1673	    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
1674	    g_mirror_disk_state2str(disk->d_state)));
1675	if (disk->d_sync.ds_consumer == NULL)
1676		return;
1677
1678	if (type == 0) {
1679		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s finished.",
1680		    disk->d_softc->sc_name, g_mirror_get_diskname(disk));
1681	} else /* if (type == 1) */ {
1682		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s stopped.",
1683		    disk->d_softc->sc_name, g_mirror_get_diskname(disk));
1684	}
1685	cp = disk->d_sync.ds_consumer;
1686	g_access(cp, -1, 0, 0);
1687	g_mirror_kill_consumer(disk->d_softc, cp);
1688	free(disk->d_sync.ds_data, M_MIRROR);
1689	disk->d_sync.ds_consumer = NULL;
1690	disk->d_softc->sc_sync.ds_ndisks--;
1691	cp = disk->d_consumer;
1692	KASSERT(cp->acr == 0 && cp->acw == 1 && cp->ace == 1,
1693	    ("Consumer %s not opened.", cp->provider->name));
1694	g_access(cp, 0, -1, -1);
1695	G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", cp->provider->name, 0, -1,
1696	    -1, 0);
1697	disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
1698}
1699
1700static void
1701g_mirror_launch_provider(struct g_mirror_softc *sc)
1702{
1703	struct g_mirror_disk *disk;
1704	struct g_provider *pp;
1705
1706	g_topology_assert();
1707
1708	pp = g_new_providerf(sc->sc_geom, "mirror/%s", sc->sc_name);
1709	pp->mediasize = sc->sc_mediasize;
1710	pp->sectorsize = sc->sc_sectorsize;
1711	sc->sc_provider = pp;
1712	g_error_provider(pp, 0);
1713	G_MIRROR_DEBUG(0, "Device %s: provider %s launched.", sc->sc_name,
1714	    pp->name);
1715	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1716		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
1717			g_mirror_sync_start(disk);
1718	}
1719}
1720
1721static void
1722g_mirror_destroy_provider(struct g_mirror_softc *sc)
1723{
1724	struct g_mirror_disk *disk;
1725	struct bio *bp;
1726
1727	g_topology_assert();
1728	KASSERT(sc->sc_provider != NULL, ("NULL provider (device=%s).",
1729	    sc->sc_name));
1730
1731	g_error_provider(sc->sc_provider, ENXIO);
1732	mtx_lock(&sc->sc_queue_mtx);
1733	while ((bp = bioq_first(&sc->sc_queue)) != NULL) {
1734		bioq_remove(&sc->sc_queue, bp);
1735		g_io_deliver(bp, ENXIO);
1736	}
1737	mtx_unlock(&sc->sc_queue_mtx);
1738	G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.", sc->sc_name,
1739	    sc->sc_provider->name);
1740	sc->sc_provider->flags |= G_PF_WITHER;
1741	g_orphan_provider(sc->sc_provider, ENXIO);
1742	sc->sc_provider = NULL;
1743	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1744		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
1745			g_mirror_sync_stop(disk, 1);
1746	}
1747}
1748
1749static void
1750g_mirror_go(void *arg)
1751{
1752	struct g_mirror_softc *sc;
1753
1754	sc = arg;
1755	G_MIRROR_DEBUG(0, "Force device %s start due to timeout.", sc->sc_name);
1756	g_mirror_event_send(sc, 0,
1757	    G_MIRROR_EVENT_DONTWAIT | G_MIRROR_EVENT_DEVICE);
1758}
1759
1760static u_int
1761g_mirror_determine_state(struct g_mirror_disk *disk)
1762{
1763	struct g_mirror_softc *sc;
1764	u_int state;
1765
1766	sc = disk->d_softc;
1767	if (sc->sc_syncid == disk->d_sync.ds_syncid) {
1768		if ((disk->d_flags &
1769		    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) {
1770			/* Disk does not need synchronization. */
1771			state = G_MIRROR_DISK_STATE_ACTIVE;
1772		} else {
1773			if ((sc->sc_flags &
1774			     G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0  ||
1775			    (disk->d_flags &
1776			     G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
1777				/*
1778				 * We can start synchronization from
1779				 * the stored offset.
1780				 */
1781				state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
1782			} else {
1783				state = G_MIRROR_DISK_STATE_STALE;
1784			}
1785		}
1786	} else if (disk->d_sync.ds_syncid < sc->sc_syncid) {
1787		/*
1788		 * Reset all synchronization data for this disk,
1789		 * because if it even was synchronized, it was
1790		 * synchronized to disks with different syncid.
1791		 */
1792		disk->d_flags |= G_MIRROR_DISK_FLAG_SYNCHRONIZING;
1793		disk->d_sync.ds_offset = 0;
1794		disk->d_sync.ds_offset_done = 0;
1795		disk->d_sync.ds_syncid = sc->sc_syncid;
1796		if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
1797		    (disk->d_flags & G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
1798			state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
1799		} else {
1800			state = G_MIRROR_DISK_STATE_STALE;
1801		}
1802	} else /* if (sc->sc_syncid < disk->d_sync.ds_syncid) */ {
1803		/*
1804		 * Not good, NOT GOOD!
1805		 * It means that mirror was started on stale disks
1806		 * and more fresh disk just arrive.
1807		 * If there were writes, mirror is fucked up, sorry.
1808		 * I think the best choice here is don't touch
1809		 * this disk and inform the user laudly.
1810		 */
1811		G_MIRROR_DEBUG(0, "Device %s was started before the freshest "
1812		    "disk (%s) arrives!! It will not be connected to the "
1813		    "running device.", sc->sc_name,
1814		    g_mirror_get_diskname(disk));
1815		g_mirror_destroy_disk(disk);
1816		state = G_MIRROR_DISK_STATE_NONE;
1817		/* Return immediately, because disk was destroyed. */
1818		return (state);
1819	}
1820	G_MIRROR_DEBUG(3, "State for %s disk: %s.",
1821	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(state));
1822	return (state);
1823}
1824
1825/*
1826 * Update device state.
1827 */
1828static void
1829g_mirror_update_device(struct g_mirror_softc *sc, boolean_t force)
1830{
1831	struct g_mirror_disk *disk;
1832	u_int state;
1833
1834	g_topology_assert();
1835
1836	switch (sc->sc_state) {
1837	case G_MIRROR_DEVICE_STATE_STARTING:
1838	    {
1839		struct g_mirror_disk *pdisk;
1840		u_int dirty, ndisks, syncid;
1841
1842		KASSERT(sc->sc_provider == NULL,
1843		    ("Non-NULL provider in STARTING state (%s).", sc->sc_name));
1844		/*
1845		 * Are we ready? We are, if all disks are connected or
1846		 * if we have any disks and 'force' is true.
1847		 */
1848		if ((force && g_mirror_ndisks(sc, -1) > 0) ||
1849		    sc->sc_ndisks == g_mirror_ndisks(sc, -1)) {
1850			;
1851		} else if (g_mirror_ndisks(sc, -1) == 0) {
1852			/*
1853			 * Disks went down in starting phase, so destroy
1854			 * device.
1855			 */
1856			callout_drain(&sc->sc_callout);
1857			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
1858			return;
1859		} else {
1860			return;
1861		}
1862
1863		/*
1864		 * Activate all disks with the biggest syncid.
1865		 */
1866		if (force) {
1867			/*
1868			 * If 'force' is true, we have been called due to
1869			 * timeout, so don't bother canceling timeout.
1870			 */
1871			ndisks = 0;
1872			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1873				if ((disk->d_flags &
1874				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) {
1875					ndisks++;
1876				}
1877			}
1878			if (ndisks == 0) {
1879				/* No valid disks found, destroy device. */
1880				sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
1881				return;
1882			}
1883		} else {
1884			/* Cancel timeout. */
1885			callout_drain(&sc->sc_callout);
1886		}
1887
1888		/*
1889		 * Find disk with the biggest syncid.
1890		 */
1891		syncid = 0;
1892		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1893			if (disk->d_sync.ds_syncid > syncid)
1894				syncid = disk->d_sync.ds_syncid;
1895		}
1896
1897		/*
1898		 * Here we need to look for dirty disks and if all disks
1899		 * with the biggest syncid are dirty, we have to choose
1900		 * one with the biggest priority and rebuild the rest.
1901		 */
1902		/*
1903		 * Find the number of dirty disks with the biggest syncid.
1904		 * Find the number of disks with the biggest syncid.
1905		 * While here, find a disk with the biggest priority.
1906		 */
1907		dirty = ndisks = 0;
1908		pdisk = NULL;
1909		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1910			if (disk->d_sync.ds_syncid != syncid)
1911				continue;
1912			if ((disk->d_flags &
1913			    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
1914				continue;
1915			}
1916			ndisks++;
1917			if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
1918				dirty++;
1919				if (pdisk == NULL ||
1920				    pdisk->d_priority < disk->d_priority) {
1921					pdisk = disk;
1922				}
1923			}
1924		}
1925		if (dirty == 0) {
1926			/* No dirty disks at all, great. */
1927		} else if (dirty == ndisks) {
1928			/*
1929			 * Force synchronization for all dirty disks except one
1930			 * with the biggest priority.
1931			 */
1932			KASSERT(pdisk != NULL, ("pdisk == NULL"));
1933			G_MIRROR_DEBUG(1, "Using disk %s (device %s) as a "
1934			    "master disk for synchronization.",
1935			    g_mirror_get_diskname(pdisk), sc->sc_name);
1936			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1937				if (disk->d_sync.ds_syncid != syncid)
1938					continue;
1939				if ((disk->d_flags &
1940				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
1941					continue;
1942				}
1943				KASSERT((disk->d_flags &
1944				    G_MIRROR_DISK_FLAG_DIRTY) != 0,
1945				    ("Disk %s isn't marked as dirty.",
1946				    g_mirror_get_diskname(disk)));
1947				/* Skip the disk with the biggest priority. */
1948				if (disk == pdisk)
1949					continue;
1950				disk->d_sync.ds_syncid = 0;
1951			}
1952		} else if (dirty < ndisks) {
1953			/*
1954			 * Force synchronization for all dirty disks.
1955			 * We have some non-dirty disks.
1956			 */
1957			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1958				if (disk->d_sync.ds_syncid != syncid)
1959					continue;
1960				if ((disk->d_flags &
1961				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
1962					continue;
1963				}
1964				if ((disk->d_flags &
1965				    G_MIRROR_DISK_FLAG_DIRTY) == 0) {
1966					continue;
1967				}
1968				disk->d_sync.ds_syncid = 0;
1969			}
1970		}
1971
1972		/* Reset hint. */
1973		sc->sc_hint = NULL;
1974		sc->sc_syncid = syncid;
1975		if (force) {
1976			/* Remember to bump syncid on first write. */
1977			sc->sc_bump_syncid = G_MIRROR_BUMP_ON_FIRST_WRITE;
1978		}
1979		state = G_MIRROR_DEVICE_STATE_RUNNING;
1980		G_MIRROR_DEBUG(1, "Device %s state changed from %s to %s.",
1981		    sc->sc_name, g_mirror_device_state2str(sc->sc_state),
1982		    g_mirror_device_state2str(state));
1983		sc->sc_state = state;
1984		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1985			state = g_mirror_determine_state(disk);
1986			g_mirror_event_send(disk, state,
1987			    G_MIRROR_EVENT_DONTWAIT);
1988			if (state == G_MIRROR_DISK_STATE_STALE) {
1989				sc->sc_bump_syncid =
1990				    G_MIRROR_BUMP_ON_FIRST_WRITE;
1991			}
1992		}
1993		wakeup(&g_mirror_class);
1994		break;
1995	    }
1996	case G_MIRROR_DEVICE_STATE_RUNNING:
1997		if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 0 &&
1998		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
1999			/*
2000			 * No active disks or no disks at all,
2001			 * so destroy device.
2002			 */
2003			if (sc->sc_provider != NULL)
2004				g_mirror_destroy_provider(sc);
2005			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2006			break;
2007		} else if (g_mirror_ndisks(sc,
2008		    G_MIRROR_DISK_STATE_ACTIVE) > 0 &&
2009		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
2010			/*
2011			 * We have active disks, launch provider if it doesn't
2012			 * exist.
2013			 */
2014			if (sc->sc_provider == NULL)
2015				g_mirror_launch_provider(sc);
2016		}
2017		/*
2018		 * Bump syncid here, if we need to do it immediately.
2019		 */
2020		if (sc->sc_bump_syncid == G_MIRROR_BUMP_IMMEDIATELY) {
2021			sc->sc_bump_syncid = 0;
2022			g_mirror_bump_syncid(sc);
2023		}
2024		break;
2025	default:
2026		KASSERT(1 == 0, ("Wrong device state (%s, %s).",
2027		    sc->sc_name, g_mirror_device_state2str(sc->sc_state)));
2028		break;
2029	}
2030}
2031
2032/*
2033 * Update disk state and device state if needed.
2034 */
2035#define	DISK_STATE_CHANGED()	G_MIRROR_DEBUG(1,			\
2036	"Disk %s state changed from %s to %s (device %s).",		\
2037	g_mirror_get_diskname(disk),					\
2038	g_mirror_disk_state2str(disk->d_state),				\
2039	g_mirror_disk_state2str(state), sc->sc_name)
2040static int
2041g_mirror_update_disk(struct g_mirror_disk *disk, u_int state)
2042{
2043	struct g_mirror_softc *sc;
2044
2045	g_topology_assert();
2046
2047	sc = disk->d_softc;
2048again:
2049	G_MIRROR_DEBUG(3, "Changing disk %s state from %s to %s.",
2050	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(disk->d_state),
2051	    g_mirror_disk_state2str(state));
2052	switch (state) {
2053	case G_MIRROR_DISK_STATE_NEW:
2054		/*
2055		 * Possible scenarios:
2056		 * 1. New disk arrive.
2057		 */
2058		/* Previous state should be NONE. */
2059		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NONE,
2060		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2061		    g_mirror_disk_state2str(disk->d_state)));
2062		DISK_STATE_CHANGED();
2063
2064		disk->d_state = state;
2065		if (LIST_EMPTY(&sc->sc_disks))
2066			LIST_INSERT_HEAD(&sc->sc_disks, disk, d_next);
2067		else {
2068			struct g_mirror_disk *dp;
2069
2070			LIST_FOREACH(dp, &sc->sc_disks, d_next) {
2071				if (disk->d_priority >= dp->d_priority) {
2072					LIST_INSERT_BEFORE(dp, disk, d_next);
2073					dp = NULL;
2074					break;
2075				}
2076				if (LIST_NEXT(dp, d_next) == NULL)
2077					break;
2078			}
2079			if (dp != NULL)
2080				LIST_INSERT_AFTER(dp, disk, d_next);
2081		}
2082		G_MIRROR_DEBUG(0, "Device %s: provider %s detected.",
2083		    sc->sc_name, g_mirror_get_diskname(disk));
2084		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
2085			break;
2086		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2087		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2088		    g_mirror_device_state2str(sc->sc_state),
2089		    g_mirror_get_diskname(disk),
2090		    g_mirror_disk_state2str(disk->d_state)));
2091		state = g_mirror_determine_state(disk);
2092		if (state != G_MIRROR_DISK_STATE_NONE)
2093			goto again;
2094		break;
2095	case G_MIRROR_DISK_STATE_ACTIVE:
2096		/*
2097		 * Possible scenarios:
2098		 * 1. New disk does not need synchronization.
2099		 * 2. Synchronization process finished successfully.
2100		 */
2101		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2102		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2103		    g_mirror_device_state2str(sc->sc_state),
2104		    g_mirror_get_diskname(disk),
2105		    g_mirror_disk_state2str(disk->d_state)));
2106		/* Previous state should be NEW or SYNCHRONIZING. */
2107		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW ||
2108		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2109		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2110		    g_mirror_disk_state2str(disk->d_state)));
2111		DISK_STATE_CHANGED();
2112
2113		if (disk->d_state == G_MIRROR_DISK_STATE_NEW)
2114			disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2115		else if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
2116			disk->d_flags &= ~G_MIRROR_DISK_FLAG_SYNCHRONIZING;
2117			disk->d_flags &= ~G_MIRROR_DISK_FLAG_FORCE_SYNC;
2118			g_mirror_sync_stop(disk, 0);
2119		}
2120		disk->d_state = state;
2121		disk->d_sync.ds_offset = 0;
2122		disk->d_sync.ds_offset_done = 0;
2123		g_mirror_update_access(disk);
2124		g_mirror_update_metadata(disk);
2125		G_MIRROR_DEBUG(0, "Device %s: provider %s activated.",
2126		    sc->sc_name, g_mirror_get_diskname(disk));
2127		break;
2128	case G_MIRROR_DISK_STATE_STALE:
2129		/*
2130		 * Possible scenarios:
2131		 * 1. Stale disk was connected.
2132		 */
2133		/* Previous state should be NEW. */
2134		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2135		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2136		    g_mirror_disk_state2str(disk->d_state)));
2137		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2138		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2139		    g_mirror_device_state2str(sc->sc_state),
2140		    g_mirror_get_diskname(disk),
2141		    g_mirror_disk_state2str(disk->d_state)));
2142		/*
2143		 * STALE state is only possible if device is marked
2144		 * NOAUTOSYNC.
2145		 */
2146		KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) != 0,
2147		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2148		    g_mirror_device_state2str(sc->sc_state),
2149		    g_mirror_get_diskname(disk),
2150		    g_mirror_disk_state2str(disk->d_state)));
2151		DISK_STATE_CHANGED();
2152
2153		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2154		disk->d_state = state;
2155		g_mirror_update_metadata(disk);
2156		G_MIRROR_DEBUG(0, "Device %s: provider %s is stale.",
2157		    sc->sc_name, g_mirror_get_diskname(disk));
2158		break;
2159	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
2160		/*
2161		 * Possible scenarios:
2162		 * 1. Disk which needs synchronization was connected.
2163		 */
2164		/* Previous state should be NEW. */
2165		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2166		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2167		    g_mirror_disk_state2str(disk->d_state)));
2168		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2169		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2170		    g_mirror_device_state2str(sc->sc_state),
2171		    g_mirror_get_diskname(disk),
2172		    g_mirror_disk_state2str(disk->d_state)));
2173		DISK_STATE_CHANGED();
2174
2175		if (disk->d_state == G_MIRROR_DISK_STATE_NEW)
2176			disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2177		disk->d_state = state;
2178		if (sc->sc_provider != NULL) {
2179			g_mirror_sync_start(disk);
2180			g_mirror_update_metadata(disk);
2181		}
2182		break;
2183	case G_MIRROR_DISK_STATE_DISCONNECTED:
2184		/*
2185		 * Possible scenarios:
2186		 * 1. Device wasn't running yet, but disk disappear.
2187		 * 2. Disk was active and disapppear.
2188		 * 3. Disk disappear during synchronization process.
2189		 */
2190		if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING) {
2191			/*
2192			 * Previous state should be ACTIVE, STALE or
2193			 * SYNCHRONIZING.
2194			 */
2195			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
2196			    disk->d_state == G_MIRROR_DISK_STATE_STALE ||
2197			    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2198			    ("Wrong disk state (%s, %s).",
2199			    g_mirror_get_diskname(disk),
2200			    g_mirror_disk_state2str(disk->d_state)));
2201		} else if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING) {
2202			/* Previous state should be NEW. */
2203			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2204			    ("Wrong disk state (%s, %s).",
2205			    g_mirror_get_diskname(disk),
2206			    g_mirror_disk_state2str(disk->d_state)));
2207			/*
2208			 * Reset bumping syncid if disk disappeared in STARTING
2209			 * state.
2210			 */
2211			if (sc->sc_bump_syncid == G_MIRROR_BUMP_ON_FIRST_WRITE)
2212				sc->sc_bump_syncid = 0;
2213#ifdef	INVARIANTS
2214		} else {
2215			KASSERT(1 == 0, ("Wrong device state (%s, %s, %s, %s).",
2216			    sc->sc_name,
2217			    g_mirror_device_state2str(sc->sc_state),
2218			    g_mirror_get_diskname(disk),
2219			    g_mirror_disk_state2str(disk->d_state)));
2220#endif
2221		}
2222		DISK_STATE_CHANGED();
2223		G_MIRROR_DEBUG(0, "Device %s: provider %s disconnected.",
2224		    sc->sc_name, g_mirror_get_diskname(disk));
2225
2226		g_mirror_destroy_disk(disk);
2227		break;
2228	case G_MIRROR_DISK_STATE_DESTROY:
2229	    {
2230		int error;
2231
2232		error = g_mirror_clear_metadata(disk);
2233		if (error != 0)
2234			return (error);
2235		DISK_STATE_CHANGED();
2236		G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.",
2237		    sc->sc_name, g_mirror_get_diskname(disk));
2238
2239		g_mirror_destroy_disk(disk);
2240		sc->sc_ndisks--;
2241		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2242			g_mirror_update_metadata(disk);
2243		}
2244		break;
2245	    }
2246	default:
2247		KASSERT(1 == 0, ("Unknown state (%u).", state));
2248		break;
2249	}
2250	return (0);
2251}
2252#undef	DISK_STATE_CHANGED
2253
2254static int
2255g_mirror_read_metadata(struct g_consumer *cp, struct g_mirror_metadata *md)
2256{
2257	struct g_provider *pp;
2258	u_char *buf;
2259	int error;
2260
2261	g_topology_assert();
2262
2263	error = g_access(cp, 1, 0, 0);
2264	if (error != 0)
2265		return (error);
2266	pp = cp->provider;
2267	g_topology_unlock();
2268	/* Metadata are stored on last sector. */
2269	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
2270	    &error);
2271	g_topology_lock();
2272	if (buf == NULL) {
2273		g_access(cp, -1, 0, 0);
2274		return (error);
2275	}
2276	if (error != 0) {
2277		g_access(cp, -1, 0, 0);
2278		g_free(buf);
2279		return (error);
2280	}
2281	error = g_access(cp, -1, 0, 0);
2282	KASSERT(error == 0, ("Cannot decrease access count for %s.", pp->name));
2283
2284	/* Decode metadata. */
2285	error = mirror_metadata_decode(buf, md);
2286	g_free(buf);
2287	if (strcmp(md->md_magic, G_MIRROR_MAGIC) != 0)
2288		return (EINVAL);
2289	if (error != 0) {
2290		G_MIRROR_DEBUG(1, "MD5 metadata hash mismatch for provider %s.",
2291		    cp->provider->name);
2292		return (error);
2293	}
2294
2295	return (0);
2296}
2297
2298static int
2299g_mirror_check_metadata(struct g_mirror_softc *sc, struct g_provider *pp,
2300    struct g_mirror_metadata *md)
2301{
2302
2303	if (g_mirror_id2disk(sc, md->md_did) != NULL) {
2304		G_MIRROR_DEBUG(1, "Disk %s (id=%u) already exists, skipping.",
2305		    pp->name, md->md_did);
2306		return (EEXIST);
2307	}
2308	if (md->md_all != sc->sc_ndisks) {
2309		G_MIRROR_DEBUG(1,
2310		    "Invalid '%s' field on disk %s (device %s), skipping.",
2311		    "md_all", pp->name, sc->sc_name);
2312		return (EINVAL);
2313	}
2314	if (md->md_slice != sc->sc_slice) {
2315		G_MIRROR_DEBUG(1,
2316		    "Invalid '%s' field on disk %s (device %s), skipping.",
2317		    "md_slice", pp->name, sc->sc_name);
2318		return (EINVAL);
2319	}
2320	if (md->md_balance != sc->sc_balance) {
2321		G_MIRROR_DEBUG(1,
2322		    "Invalid '%s' field on disk %s (device %s), skipping.",
2323		    "md_balance", pp->name, sc->sc_name);
2324		return (EINVAL);
2325	}
2326	if (md->md_mediasize != sc->sc_mediasize) {
2327		G_MIRROR_DEBUG(1,
2328		    "Invalid '%s' field on disk %s (device %s), skipping.",
2329		    "md_mediasize", pp->name, sc->sc_name);
2330		return (EINVAL);
2331	}
2332	if (sc->sc_mediasize > pp->mediasize) {
2333		G_MIRROR_DEBUG(1,
2334		    "Invalid size of disk %s (device %s), skipping.", pp->name,
2335		    sc->sc_name);
2336		return (EINVAL);
2337	}
2338	if (md->md_sectorsize != sc->sc_sectorsize) {
2339		G_MIRROR_DEBUG(1,
2340		    "Invalid '%s' field on disk %s (device %s), skipping.",
2341		    "md_sectorsize", pp->name, sc->sc_name);
2342		return (EINVAL);
2343	}
2344	if ((sc->sc_sectorsize % pp->sectorsize) != 0) {
2345		G_MIRROR_DEBUG(1,
2346		    "Invalid sector size of disk %s (device %s), skipping.",
2347		    pp->name, sc->sc_name);
2348		return (EINVAL);
2349	}
2350	if ((md->md_mflags & ~G_MIRROR_DEVICE_FLAG_MASK) != 0) {
2351		G_MIRROR_DEBUG(1,
2352		    "Invalid device flags on disk %s (device %s), skipping.",
2353		    pp->name, sc->sc_name);
2354		return (EINVAL);
2355	}
2356	if ((md->md_dflags & ~G_MIRROR_DISK_FLAG_MASK) != 0) {
2357		G_MIRROR_DEBUG(1,
2358		    "Invalid disk flags on disk %s (device %s), skipping.",
2359		    pp->name, sc->sc_name);
2360		return (EINVAL);
2361	}
2362	return (0);
2363}
2364
2365static int
2366g_mirror_add_disk(struct g_mirror_softc *sc, struct g_provider *pp,
2367    struct g_mirror_metadata *md)
2368{
2369	struct g_mirror_disk *disk;
2370	int error;
2371
2372	g_topology_assert();
2373	G_MIRROR_DEBUG(2, "Adding disk %s.", pp->name);
2374
2375	error = g_mirror_check_metadata(sc, pp, md);
2376	if (error != 0)
2377		return (error);
2378	disk = g_mirror_init_disk(sc, pp, md, &error);
2379	if (disk == NULL)
2380		return (error);
2381	error = g_mirror_event_send(disk, G_MIRROR_DISK_STATE_NEW,
2382	    G_MIRROR_EVENT_WAIT);
2383	return (error);
2384}
2385
2386static int
2387g_mirror_access(struct g_provider *pp, int acr, int acw, int ace)
2388{
2389	struct g_mirror_softc *sc;
2390	struct g_mirror_disk *disk;
2391	int dcr, dcw, dce, err, error;
2392
2393	g_topology_assert();
2394	G_MIRROR_DEBUG(2, "Access request for %s: r%dw%de%d.", pp->name, acr,
2395	    acw, ace);
2396
2397	dcr = pp->acr + acr;
2398	dcw = pp->acw + acw;
2399	dce = pp->ace + ace;
2400
2401	/* On first open, grab an extra "exclusive" bit */
2402	if (pp->acr == 0 && pp->acw == 0 && pp->ace == 0)
2403		ace++;
2404	/* ... and let go of it on last close */
2405	if (dcr == 0 && dcw == 0 && dce == 0)
2406		ace--;
2407
2408	sc = pp->geom->softc;
2409	if (sc == NULL || LIST_EMPTY(&sc->sc_disks)) {
2410		if (acr <= 0 && acw <= 0 && ace <= 0)
2411			return (0);
2412		else
2413			return (ENXIO);
2414	}
2415	error = ENXIO;
2416	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2417		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
2418			continue;
2419		err = g_access(disk->d_consumer, acr, acw, ace);
2420		G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d",
2421		    g_mirror_get_diskname(disk), acr, acw, ace, err);
2422		if (err == 0) {
2423			/*
2424			 * Mark disk as dirty on open and unmark on close.
2425			 */
2426			if (pp->acw == 0 && dcw > 0) {
2427				G_MIRROR_DEBUG(1,
2428				    "Disk %s (device %s) marked as dirty.",
2429				    g_mirror_get_diskname(disk), sc->sc_name);
2430				disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
2431				g_mirror_update_metadata(disk);
2432			} else if (pp->acw > 0 && dcw == 0) {
2433				G_MIRROR_DEBUG(1,
2434				    "Disk %s (device %s) marked as clean.",
2435				    g_mirror_get_diskname(disk), sc->sc_name);
2436				disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2437				g_mirror_update_metadata(disk);
2438			}
2439			error = 0;
2440		} else {
2441			sc->sc_bump_syncid = G_MIRROR_BUMP_ON_FIRST_WRITE;
2442			g_mirror_event_send(disk,
2443			    G_MIRROR_DISK_STATE_DISCONNECTED,
2444			    G_MIRROR_EVENT_DONTWAIT);
2445		}
2446	}
2447	/*
2448	 * Be sure to return 0 for negativate access requests.
2449	 * In case of some HW problems, it is possible that we don't have
2450	 * any active disk here, so loop above will be no-op and error will
2451	 * be ENXIO.
2452	 */
2453	if (error != 0 && acr <= 0 && acw <= 0 && ace <= 0)
2454		error = 0;
2455	return (error);
2456}
2457
2458static struct g_geom *
2459g_mirror_create(struct g_class *mp, const struct g_mirror_metadata *md)
2460{
2461	struct g_mirror_softc *sc;
2462	struct g_geom *gp;
2463	int error, timeout;
2464
2465	g_topology_assert();
2466	G_MIRROR_DEBUG(1, "Creating device %s (id=%u).", md->md_name,
2467	    md->md_mid);
2468
2469	/* One disk is minimum. */
2470	if (md->md_all < 1)
2471		return (NULL);
2472	/*
2473	 * Action geom.
2474	 */
2475	gp = g_new_geomf(mp, "%s", md->md_name);
2476	sc = malloc(sizeof(*sc), M_MIRROR, M_WAITOK | M_ZERO);
2477	gp->start = g_mirror_start;
2478	gp->spoiled = g_mirror_spoiled;
2479	gp->orphan = g_mirror_orphan;
2480	gp->access = g_mirror_access;
2481	gp->dumpconf = g_mirror_dumpconf;
2482
2483	sc->sc_id = md->md_mid;
2484	sc->sc_slice = md->md_slice;
2485	sc->sc_balance = md->md_balance;
2486	sc->sc_mediasize = md->md_mediasize;
2487	sc->sc_sectorsize = md->md_sectorsize;
2488	sc->sc_ndisks = md->md_all;
2489	sc->sc_flags = md->md_mflags;
2490	sc->sc_bump_syncid = 0;
2491	sc->sc_idle = 0;
2492	bioq_init(&sc->sc_queue);
2493	mtx_init(&sc->sc_queue_mtx, "gmirror:queue", NULL, MTX_DEF);
2494	LIST_INIT(&sc->sc_disks);
2495	TAILQ_INIT(&sc->sc_events);
2496	mtx_init(&sc->sc_events_mtx, "gmirror:events", NULL, MTX_DEF);
2497	callout_init(&sc->sc_callout, CALLOUT_MPSAFE);
2498	sc->sc_state = G_MIRROR_DEVICE_STATE_STARTING;
2499	gp->softc = sc;
2500	sc->sc_geom = gp;
2501	sc->sc_provider = NULL;
2502	/*
2503	 * Synchronization geom.
2504	 */
2505	gp = g_new_geomf(mp, "%s.sync", md->md_name);
2506	gp->softc = sc;
2507	gp->orphan = g_mirror_orphan;
2508	sc->sc_sync.ds_geom = gp;
2509	sc->sc_sync.ds_ndisks = 0;
2510	error = kthread_create(g_mirror_worker, sc, &sc->sc_worker, 0, 0,
2511	    "g_mirror %s", md->md_name);
2512	if (error != 0) {
2513		G_MIRROR_DEBUG(1, "Cannot create kernel thread for %s.",
2514		    sc->sc_name);
2515		g_destroy_geom(sc->sc_sync.ds_geom);
2516		mtx_destroy(&sc->sc_events_mtx);
2517		mtx_destroy(&sc->sc_queue_mtx);
2518		g_destroy_geom(sc->sc_geom);
2519		free(sc, M_MIRROR);
2520		return (NULL);
2521	}
2522
2523	G_MIRROR_DEBUG(0, "Device %s created (id=%u).", sc->sc_name, sc->sc_id);
2524
2525	/*
2526	 * Run timeout.
2527	 */
2528	timeout = g_mirror_timeout * hz;
2529	callout_reset(&sc->sc_callout, timeout, g_mirror_go, sc);
2530	return (sc->sc_geom);
2531}
2532
2533int
2534g_mirror_destroy(struct g_mirror_softc *sc, boolean_t force)
2535{
2536	struct g_provider *pp;
2537
2538	g_topology_assert();
2539
2540	if (sc == NULL)
2541		return (ENXIO);
2542	pp = sc->sc_provider;
2543	if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
2544		if (force) {
2545			G_MIRROR_DEBUG(0, "Device %s is still open, so it "
2546			    "can't be definitely removed.", pp->name);
2547		} else {
2548			G_MIRROR_DEBUG(1,
2549			    "Device %s is still open (r%dw%de%d).", pp->name,
2550			    pp->acr, pp->acw, pp->ace);
2551			return (EBUSY);
2552		}
2553	}
2554
2555	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2556	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_WAIT;
2557	g_topology_unlock();
2558	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
2559	mtx_lock(&sc->sc_queue_mtx);
2560	wakeup(sc);
2561	mtx_unlock(&sc->sc_queue_mtx);
2562	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, &sc->sc_worker);
2563	while (sc->sc_worker != NULL)
2564		tsleep(&sc->sc_worker, PRIBIO, "m:destroy", hz / 5);
2565	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, &sc->sc_worker);
2566	g_topology_lock();
2567	g_mirror_destroy_device(sc);
2568	free(sc, M_MIRROR);
2569	return (0);
2570}
2571
2572static void
2573g_mirror_taste_orphan(struct g_consumer *cp)
2574{
2575
2576	KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
2577	    cp->provider->name));
2578}
2579
2580static struct g_geom *
2581g_mirror_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
2582{
2583	struct g_mirror_metadata md;
2584	struct g_mirror_softc *sc;
2585	struct g_consumer *cp;
2586	struct g_geom *gp;
2587	int error;
2588
2589	g_topology_assert();
2590	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
2591	G_MIRROR_DEBUG(2, "Tasting %s.", pp->name);
2592
2593	gp = g_new_geomf(mp, "mirror:taste");
2594	/*
2595	 * This orphan function should be never called.
2596	 */
2597	gp->orphan = g_mirror_taste_orphan;
2598	cp = g_new_consumer(gp);
2599	g_attach(cp, pp);
2600	error = g_mirror_read_metadata(cp, &md);
2601	g_detach(cp);
2602	g_destroy_consumer(cp);
2603	g_destroy_geom(gp);
2604	if (error != 0)
2605		return (NULL);
2606	gp = NULL;
2607
2608	if (md.md_version > G_MIRROR_VERSION) {
2609		printf("geom_mirror.ko module is too old to handle %s.\n",
2610		    pp->name);
2611		return (NULL);
2612	}
2613	if (md.md_provider[0] != '\0' && strcmp(md.md_provider, pp->name) != 0)
2614		return (NULL);
2615	if ((md.md_dflags & G_MIRROR_DISK_FLAG_INACTIVE) != 0) {
2616		G_MIRROR_DEBUG(0,
2617		    "Device %s: provider %s marked as inactive, skipping.",
2618		    md.md_name, pp->name);
2619		return (NULL);
2620	}
2621	if (g_mirror_debug >= 2)
2622		mirror_metadata_dump(&md);
2623
2624	/*
2625	 * Let's check if device already exists.
2626	 */
2627	sc = NULL;
2628	LIST_FOREACH(gp, &mp->geom, geom) {
2629		sc = gp->softc;
2630		if (sc == NULL)
2631			continue;
2632		if (sc->sc_sync.ds_geom == gp)
2633			continue;
2634		if (strcmp(md.md_name, sc->sc_name) != 0)
2635			continue;
2636		if (md.md_mid != sc->sc_id) {
2637			G_MIRROR_DEBUG(0, "Device %s already configured.",
2638			    sc->sc_name);
2639			return (NULL);
2640		}
2641		break;
2642	}
2643	if (gp == NULL) {
2644		gp = g_mirror_create(mp, &md);
2645		if (gp == NULL) {
2646			G_MIRROR_DEBUG(0, "Cannot create device %s.",
2647			    md.md_name);
2648			return (NULL);
2649		}
2650		sc = gp->softc;
2651	}
2652	G_MIRROR_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
2653	error = g_mirror_add_disk(sc, pp, &md);
2654	if (error != 0) {
2655		G_MIRROR_DEBUG(0, "Cannot add disk %s to %s (error=%d).",
2656		    pp->name, gp->name, error);
2657		if (LIST_EMPTY(&sc->sc_disks))
2658			g_mirror_destroy(sc, 1);
2659		return (NULL);
2660	}
2661	return (gp);
2662}
2663
2664static int
2665g_mirror_destroy_geom(struct gctl_req *req __unused,
2666    struct g_class *mp __unused, struct g_geom *gp)
2667{
2668
2669	return (g_mirror_destroy(gp->softc, 0));
2670}
2671
2672static void
2673g_mirror_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
2674    struct g_consumer *cp, struct g_provider *pp)
2675{
2676	struct g_mirror_softc *sc;
2677
2678	g_topology_assert();
2679
2680	sc = gp->softc;
2681	if (sc == NULL)
2682		return;
2683	/* Skip synchronization geom. */
2684	if (gp == sc->sc_sync.ds_geom)
2685		return;
2686	if (pp != NULL) {
2687		/* Nothing here. */
2688	} else if (cp != NULL) {
2689		struct g_mirror_disk *disk;
2690
2691		disk = cp->private;
2692		if (disk == NULL)
2693			return;
2694		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)disk->d_id);
2695		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
2696			sbuf_printf(sb, "%s<Synchronized>", indent);
2697			if (disk->d_sync.ds_offset_done == 0)
2698				sbuf_printf(sb, "0%%");
2699			else {
2700				sbuf_printf(sb, "%u%%",
2701				    (u_int)((disk->d_sync.ds_offset_done * 100) /
2702				    sc->sc_provider->mediasize));
2703			}
2704			sbuf_printf(sb, "</Synchronized>\n");
2705		}
2706		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent,
2707		    disk->d_sync.ds_syncid);
2708		sbuf_printf(sb, "%s<Flags>", indent);
2709		if (disk->d_flags == 0)
2710			sbuf_printf(sb, "NONE");
2711		else {
2712			int first = 1;
2713
2714#define	ADD_FLAG(flag, name)	do {					\
2715	if ((disk->d_flags & (flag)) != 0) {				\
2716		if (!first)						\
2717			sbuf_printf(sb, ", ");				\
2718		else							\
2719			first = 0;					\
2720		sbuf_printf(sb, name);					\
2721	}								\
2722} while (0)
2723			ADD_FLAG(G_MIRROR_DISK_FLAG_DIRTY, "DIRTY");
2724			ADD_FLAG(G_MIRROR_DISK_FLAG_HARDCODED, "HARDCODED");
2725			ADD_FLAG(G_MIRROR_DISK_FLAG_INACTIVE, "INACTIVE");
2726			ADD_FLAG(G_MIRROR_DISK_FLAG_SYNCHRONIZING,
2727			    "SYNCHRONIZING");
2728			ADD_FLAG(G_MIRROR_DISK_FLAG_FORCE_SYNC, "FORCE_SYNC");
2729#undef	ADD_FLAG
2730		}
2731		sbuf_printf(sb, "</Flags>\n");
2732		sbuf_printf(sb, "%s<Priority>%u</Priority>\n", indent,
2733		    disk->d_priority);
2734		sbuf_printf(sb, "%s<State>%s</State>\n", indent,
2735		    g_mirror_disk_state2str(disk->d_state));
2736	} else {
2737		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id);
2738		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, sc->sc_syncid);
2739		sbuf_printf(sb, "%s<Flags>", indent);
2740		if (sc->sc_flags == 0)
2741			sbuf_printf(sb, "NONE");
2742		else {
2743			int first = 1;
2744
2745#define	ADD_FLAG(flag, name)	do {					\
2746	if ((sc->sc_flags & (flag)) != 0) {				\
2747		if (!first)						\
2748			sbuf_printf(sb, ", ");				\
2749		else							\
2750			first = 0;					\
2751		sbuf_printf(sb, name);					\
2752	}								\
2753} while (0)
2754			ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOAUTOSYNC, "NOAUTOSYNC");
2755#undef	ADD_FLAG
2756		}
2757		sbuf_printf(sb, "</Flags>\n");
2758		sbuf_printf(sb, "%s<Slice>%u</Slice>\n", indent,
2759		    (u_int)sc->sc_slice);
2760		sbuf_printf(sb, "%s<Balance>%s</Balance>\n", indent,
2761		    balance_name(sc->sc_balance));
2762		sbuf_printf(sb, "%s<Components>%u</Components>\n", indent,
2763		    sc->sc_ndisks);
2764		sbuf_printf(sb, "%s<State>", indent);
2765		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
2766			sbuf_printf(sb, "%s", "STARTING");
2767		else if (sc->sc_ndisks ==
2768		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE))
2769			sbuf_printf(sb, "%s", "COMPLETE");
2770		else
2771			sbuf_printf(sb, "%s", "DEGRADED");
2772		sbuf_printf(sb, "</State>\n");
2773	}
2774}
2775
2776static int
2777g_mirror_can_go(void)
2778{
2779	struct g_mirror_softc *sc;
2780	struct g_geom *gp;
2781	struct g_provider *pp;
2782	int can_go;
2783
2784	DROP_GIANT();
2785	can_go = 1;
2786	g_topology_lock();
2787	LIST_FOREACH(gp, &g_mirror_class.geom, geom) {
2788		sc = gp->softc;
2789		if (sc == NULL) {
2790			can_go = 0;
2791			break;
2792		}
2793		pp = sc->sc_provider;
2794		if (pp == NULL || pp->error != 0) {
2795			can_go = 0;
2796			break;
2797		}
2798	}
2799	g_topology_unlock();
2800	PICKUP_GIANT();
2801	return (can_go);
2802}
2803
2804static void
2805g_mirror_rootwait(void)
2806{
2807
2808	/*
2809	 * HACK: Wait for GEOM, because g_mirror_rootwait() can be called,
2810	 * HACK: before we get providers for tasting.
2811	 */
2812	tsleep(&g_mirror_class, PRIBIO, "mroot", hz * 3);
2813	/*
2814	 * Wait for mirrors in degraded state.
2815	 */
2816	for (;;) {
2817		if (g_mirror_can_go())
2818			break;
2819		tsleep(&g_mirror_class, PRIBIO, "mroot", hz);
2820	}
2821}
2822
2823SYSINIT(g_mirror_root, SI_SUB_RAID, SI_ORDER_FIRST, g_mirror_rootwait, NULL)
2824
2825DECLARE_GEOM_CLASS(g_mirror_class, g_mirror);
2826