g_mirror.c revision 137248
1/*-
2 * Copyright (c) 2004 Pawel Jakub Dawidek <pjd@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/geom/mirror/g_mirror.c 137248 2004-11-05 09:05:15Z pjd $");
29
30#include <sys/param.h>
31#include <sys/systm.h>
32#include <sys/kernel.h>
33#include <sys/module.h>
34#include <sys/limits.h>
35#include <sys/lock.h>
36#include <sys/mutex.h>
37#include <sys/bio.h>
38#include <sys/sysctl.h>
39#include <sys/malloc.h>
40#include <sys/bitstring.h>
41#include <vm/uma.h>
42#include <machine/atomic.h>
43#include <geom/geom.h>
44#include <sys/proc.h>
45#include <sys/kthread.h>
46#include <geom/mirror/g_mirror.h>
47
48
49static MALLOC_DEFINE(M_MIRROR, "mirror data", "GEOM_MIRROR Data");
50
51SYSCTL_DECL(_kern_geom);
52SYSCTL_NODE(_kern_geom, OID_AUTO, mirror, CTLFLAG_RW, 0, "GEOM_MIRROR stuff");
53u_int g_mirror_debug = 0;
54TUNABLE_INT("kern.geom.mirror.debug", &g_mirror_debug);
55SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, debug, CTLFLAG_RW, &g_mirror_debug, 0,
56    "Debug level");
57static u_int g_mirror_timeout = 4;
58TUNABLE_INT("kern.geom.mirror.timeout", &g_mirror_timeout);
59SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, timeout, CTLFLAG_RW, &g_mirror_timeout,
60    0, "Time to wait on all mirror components");
61static u_int g_mirror_reqs_per_sync = 5;
62SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, reqs_per_sync, CTLFLAG_RW,
63    &g_mirror_reqs_per_sync, 0,
64    "Number of regular I/O requests per synchronization request");
65static u_int g_mirror_syncs_per_sec = 100;
66SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, syncs_per_sec, CTLFLAG_RW,
67    &g_mirror_syncs_per_sec, 0,
68    "Number of synchronizations requests per second");
69
70#define	MSLEEP(ident, mtx, priority, wmesg, timeout)	do {		\
71	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, (ident));	\
72	msleep((ident), (mtx), (priority), (wmesg), (timeout));		\
73	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, (ident));	\
74} while (0)
75
76
77static int g_mirror_destroy_geom(struct gctl_req *req, struct g_class *mp,
78    struct g_geom *gp);
79static g_taste_t g_mirror_taste;
80
81struct g_class g_mirror_class = {
82	.name = G_MIRROR_CLASS_NAME,
83	.version = G_VERSION,
84	.ctlreq = g_mirror_config,
85	.taste = g_mirror_taste,
86	.destroy_geom = g_mirror_destroy_geom
87};
88
89
90static void g_mirror_destroy_provider(struct g_mirror_softc *sc);
91static int g_mirror_update_disk(struct g_mirror_disk *disk, u_int state);
92static void g_mirror_update_device(struct g_mirror_softc *sc, boolean_t force);
93static void g_mirror_dumpconf(struct sbuf *sb, const char *indent,
94    struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp);
95static void g_mirror_sync_stop(struct g_mirror_disk *disk, int type);
96
97
98static const char *
99g_mirror_disk_state2str(int state)
100{
101
102	switch (state) {
103	case G_MIRROR_DISK_STATE_NONE:
104		return ("NONE");
105	case G_MIRROR_DISK_STATE_NEW:
106		return ("NEW");
107	case G_MIRROR_DISK_STATE_ACTIVE:
108		return ("ACTIVE");
109	case G_MIRROR_DISK_STATE_STALE:
110		return ("STALE");
111	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
112		return ("SYNCHRONIZING");
113	case G_MIRROR_DISK_STATE_DISCONNECTED:
114		return ("DISCONNECTED");
115	case G_MIRROR_DISK_STATE_DESTROY:
116		return ("DESTROY");
117	default:
118		return ("INVALID");
119	}
120}
121
122static const char *
123g_mirror_device_state2str(int state)
124{
125
126	switch (state) {
127	case G_MIRROR_DEVICE_STATE_STARTING:
128		return ("STARTING");
129	case G_MIRROR_DEVICE_STATE_RUNNING:
130		return ("RUNNING");
131	default:
132		return ("INVALID");
133	}
134}
135
136static const char *
137g_mirror_get_diskname(struct g_mirror_disk *disk)
138{
139
140	if (disk->d_consumer == NULL || disk->d_consumer->provider == NULL)
141		return ("[unknown]");
142	return (disk->d_name);
143}
144
145/*
146 * --- Events handling functions ---
147 * Events in geom_mirror are used to maintain disks and device status
148 * from one thread to simplify locking.
149 */
150static void
151g_mirror_event_free(struct g_mirror_event *ep)
152{
153
154	free(ep, M_MIRROR);
155}
156
157int
158g_mirror_event_send(void *arg, int state, int flags)
159{
160	struct g_mirror_softc *sc;
161	struct g_mirror_disk *disk;
162	struct g_mirror_event *ep;
163	int error;
164
165	ep = malloc(sizeof(*ep), M_MIRROR, M_WAITOK);
166	G_MIRROR_DEBUG(4, "%s: Sending event %p.", __func__, ep);
167	if ((flags & G_MIRROR_EVENT_DEVICE) != 0) {
168		disk = NULL;
169		sc = arg;
170	} else {
171		disk = arg;
172		sc = disk->d_softc;
173	}
174	ep->e_disk = disk;
175	ep->e_state = state;
176	ep->e_flags = flags;
177	ep->e_error = 0;
178	mtx_lock(&sc->sc_events_mtx);
179	TAILQ_INSERT_TAIL(&sc->sc_events, ep, e_next);
180	mtx_unlock(&sc->sc_events_mtx);
181	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
182	mtx_lock(&sc->sc_queue_mtx);
183	wakeup(sc);
184	mtx_unlock(&sc->sc_queue_mtx);
185	if ((flags & G_MIRROR_EVENT_DONTWAIT) != 0)
186		return (0);
187	g_topology_assert();
188	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, ep);
189	g_topology_unlock();
190	while ((ep->e_flags & G_MIRROR_EVENT_DONE) == 0) {
191		mtx_lock(&sc->sc_events_mtx);
192		MSLEEP(ep, &sc->sc_events_mtx, PRIBIO | PDROP, "m:event",
193		    hz * 5);
194	}
195	/* Don't even try to use 'sc' here, because it could be already dead. */
196	g_topology_lock();
197	error = ep->e_error;
198	g_mirror_event_free(ep);
199	return (error);
200}
201
202static struct g_mirror_event *
203g_mirror_event_get(struct g_mirror_softc *sc)
204{
205	struct g_mirror_event *ep;
206
207	mtx_lock(&sc->sc_events_mtx);
208	ep = TAILQ_FIRST(&sc->sc_events);
209	if (ep != NULL)
210		TAILQ_REMOVE(&sc->sc_events, ep, e_next);
211	mtx_unlock(&sc->sc_events_mtx);
212	return (ep);
213}
214
215static void
216g_mirror_event_cancel(struct g_mirror_disk *disk)
217{
218	struct g_mirror_softc *sc;
219	struct g_mirror_event *ep, *tmpep;
220
221	g_topology_assert();
222
223	sc = disk->d_softc;
224	mtx_lock(&sc->sc_events_mtx);
225	TAILQ_FOREACH_SAFE(ep, &sc->sc_events, e_next, tmpep) {
226		if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0)
227			continue;
228		if (ep->e_disk != disk)
229			continue;
230		TAILQ_REMOVE(&sc->sc_events, ep, e_next);
231		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
232			g_mirror_event_free(ep);
233		else {
234			ep->e_error = ECANCELED;
235			wakeup(ep);
236		}
237	}
238	mtx_unlock(&sc->sc_events_mtx);
239}
240
241/*
242 * Return the number of disks in given state.
243 * If state is equal to -1, count all connected disks.
244 */
245u_int
246g_mirror_ndisks(struct g_mirror_softc *sc, int state)
247{
248	struct g_mirror_disk *disk;
249	u_int n = 0;
250
251	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
252		if (state == -1 || disk->d_state == state)
253			n++;
254	}
255	return (n);
256}
257
258/*
259 * Find a disk in mirror by its disk ID.
260 */
261static struct g_mirror_disk *
262g_mirror_id2disk(struct g_mirror_softc *sc, uint32_t id)
263{
264	struct g_mirror_disk *disk;
265
266	g_topology_assert();
267
268	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
269		if (disk->d_id == id)
270			return (disk);
271	}
272	return (NULL);
273}
274
275static u_int
276g_mirror_nrequests(struct g_mirror_softc *sc, struct g_consumer *cp)
277{
278	struct bio *bp;
279	u_int nreqs = 0;
280
281	mtx_lock(&sc->sc_queue_mtx);
282	TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) {
283		if (bp->bio_from == cp)
284			nreqs++;
285	}
286	mtx_unlock(&sc->sc_queue_mtx);
287	return (nreqs);
288}
289
290static int
291g_mirror_is_busy(struct g_mirror_softc *sc, struct g_consumer *cp)
292{
293
294	if (cp->index > 0) {
295		G_MIRROR_DEBUG(2,
296		    "I/O requests for %s exist, can't destroy it now.",
297		    cp->provider->name);
298		return (1);
299	}
300	if (g_mirror_nrequests(sc, cp) > 0) {
301		G_MIRROR_DEBUG(2,
302		    "I/O requests for %s in queue, can't destroy it now.",
303		    cp->provider->name);
304		return (1);
305	}
306	return (0);
307}
308
309static void
310g_mirror_kill_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
311{
312
313	g_topology_assert();
314
315	cp->private = NULL;
316	if (g_mirror_is_busy(sc, cp))
317		return;
318	G_MIRROR_DEBUG(2, "Consumer %s destroyed.", cp->provider->name);
319	g_detach(cp);
320	g_destroy_consumer(cp);
321}
322
323static int
324g_mirror_connect_disk(struct g_mirror_disk *disk, struct g_provider *pp)
325{
326	int error;
327
328	g_topology_assert();
329	KASSERT(disk->d_consumer == NULL,
330	    ("Disk already connected (device %s).", disk->d_softc->sc_name));
331
332	disk->d_consumer = g_new_consumer(disk->d_softc->sc_geom);
333	disk->d_consumer->private = disk;
334	disk->d_consumer->index = 0;
335	error = g_attach(disk->d_consumer, pp);
336	if (error != 0)
337		return (error);
338	G_MIRROR_DEBUG(2, "Disk %s connected.", g_mirror_get_diskname(disk));
339	return (0);
340}
341
342static void
343g_mirror_disconnect_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
344{
345
346	g_topology_assert();
347
348	if (cp == NULL)
349		return;
350	if (cp->provider != NULL) {
351		G_MIRROR_DEBUG(2, "Disk %s disconnected.", cp->provider->name);
352		if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0) {
353			G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d",
354			    cp->provider->name, -cp->acr, -cp->acw, -cp->ace,
355			    0);
356			g_access(cp, -cp->acr, -cp->acw, -cp->ace);
357		}
358		g_mirror_kill_consumer(sc, cp);
359	} else {
360		g_destroy_consumer(cp);
361	}
362}
363
364/*
365 * Initialize disk. This means allocate memory, create consumer, attach it
366 * to the provider and open access (r1w1e1) to it.
367 */
368static struct g_mirror_disk *
369g_mirror_init_disk(struct g_mirror_softc *sc, struct g_provider *pp,
370    struct g_mirror_metadata *md, int *errorp)
371{
372	struct g_mirror_disk *disk;
373	int error;
374
375	disk = malloc(sizeof(*disk), M_MIRROR, M_NOWAIT | M_ZERO);
376	if (disk == NULL) {
377		error = ENOMEM;
378		goto fail;
379	}
380	disk->d_softc = sc;
381	error = g_mirror_connect_disk(disk, pp);
382	if (error != 0)
383		goto fail;
384	disk->d_id = md->md_did;
385	disk->d_state = G_MIRROR_DISK_STATE_NONE;
386	disk->d_priority = md->md_priority;
387	disk->d_delay.sec = 0;
388	disk->d_delay.frac = 0;
389	binuptime(&disk->d_last_used);
390	disk->d_flags = md->md_dflags;
391	if (md->md_provider[0] != '\0')
392		disk->d_flags |= G_MIRROR_DISK_FLAG_HARDCODED;
393	disk->d_sync.ds_consumer = NULL;
394	disk->d_sync.ds_offset = md->md_sync_offset;
395	disk->d_sync.ds_offset_done = md->md_sync_offset;
396	disk->d_sync.ds_resync = -1;
397	disk->d_sync.ds_syncid = md->md_syncid;
398	if (errorp != NULL)
399		*errorp = 0;
400	return (disk);
401fail:
402	if (errorp != NULL)
403		*errorp = error;
404	if (disk != NULL) {
405		g_mirror_disconnect_consumer(sc, disk->d_consumer);
406		free(disk, M_MIRROR);
407	}
408	return (NULL);
409}
410
411static void
412g_mirror_destroy_disk(struct g_mirror_disk *disk)
413{
414	struct g_mirror_softc *sc;
415
416	g_topology_assert();
417
418	LIST_REMOVE(disk, d_next);
419	g_mirror_event_cancel(disk);
420	sc = disk->d_softc;
421	if (sc->sc_hint == disk)
422		sc->sc_hint = NULL;
423	switch (disk->d_state) {
424	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
425		g_mirror_sync_stop(disk, 1);
426		/* FALLTHROUGH */
427	case G_MIRROR_DISK_STATE_NEW:
428	case G_MIRROR_DISK_STATE_STALE:
429	case G_MIRROR_DISK_STATE_ACTIVE:
430		g_mirror_disconnect_consumer(sc, disk->d_consumer);
431		free(disk, M_MIRROR);
432		break;
433	default:
434		KASSERT(0 == 1, ("Wrong disk state (%s, %s).",
435		    g_mirror_get_diskname(disk),
436		    g_mirror_disk_state2str(disk->d_state)));
437	}
438}
439
440static void
441g_mirror_destroy_device(struct g_mirror_softc *sc)
442{
443	struct g_mirror_disk *disk;
444	struct g_mirror_event *ep;
445	struct g_geom *gp;
446	struct g_consumer *cp, *tmpcp;
447
448	g_topology_assert();
449
450	gp = sc->sc_geom;
451	if (sc->sc_provider != NULL)
452		g_mirror_destroy_provider(sc);
453	for (disk = LIST_FIRST(&sc->sc_disks); disk != NULL;
454	    disk = LIST_FIRST(&sc->sc_disks)) {
455		g_mirror_destroy_disk(disk);
456	}
457	while ((ep = g_mirror_event_get(sc)) != NULL) {
458		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
459			g_mirror_event_free(ep);
460		else {
461			ep->e_error = ECANCELED;
462			ep->e_flags |= G_MIRROR_EVENT_DONE;
463			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, ep);
464			mtx_lock(&sc->sc_events_mtx);
465			wakeup(ep);
466			mtx_unlock(&sc->sc_events_mtx);
467		}
468	}
469	callout_drain(&sc->sc_callout);
470	gp->softc = NULL;
471
472	LIST_FOREACH_SAFE(cp, &sc->sc_sync.ds_geom->consumer, consumer, tmpcp) {
473		g_mirror_disconnect_consumer(sc, cp);
474	}
475	sc->sc_sync.ds_geom->softc = NULL;
476	g_wither_geom(sc->sc_sync.ds_geom, ENXIO);
477	mtx_destroy(&sc->sc_queue_mtx);
478	mtx_destroy(&sc->sc_events_mtx);
479	G_MIRROR_DEBUG(0, "Device %s destroyed.", gp->name);
480	g_wither_geom(gp, ENXIO);
481}
482
483static void
484g_mirror_orphan(struct g_consumer *cp)
485{
486	struct g_mirror_disk *disk;
487
488	g_topology_assert();
489
490	disk = cp->private;
491	if (disk == NULL)
492		return;
493	disk->d_softc->sc_bump_syncid = G_MIRROR_BUMP_ON_FIRST_WRITE;
494	g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
495	    G_MIRROR_EVENT_DONTWAIT);
496}
497
498static void
499g_mirror_spoiled(struct g_consumer *cp)
500{
501	struct g_mirror_disk *disk;
502
503	g_topology_assert();
504
505	disk = cp->private;
506	if (disk == NULL)
507		return;
508	disk->d_softc->sc_bump_syncid = G_MIRROR_BUMP_IMMEDIATELY;
509	g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
510	    G_MIRROR_EVENT_DONTWAIT);
511}
512
513/*
514 * Function should return the next active disk on the list.
515 * It is possible that it will be the same disk as given.
516 * If there are no active disks on list, NULL is returned.
517 */
518static __inline struct g_mirror_disk *
519g_mirror_find_next(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
520{
521	struct g_mirror_disk *dp;
522
523	for (dp = LIST_NEXT(disk, d_next); dp != disk;
524	    dp = LIST_NEXT(dp, d_next)) {
525		if (dp == NULL)
526			dp = LIST_FIRST(&sc->sc_disks);
527		if (dp->d_state == G_MIRROR_DISK_STATE_ACTIVE)
528			break;
529	}
530	if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
531		return (NULL);
532	return (dp);
533}
534
535static struct g_mirror_disk *
536g_mirror_get_disk(struct g_mirror_softc *sc)
537{
538	struct g_mirror_disk *disk;
539
540	if (sc->sc_hint == NULL) {
541		sc->sc_hint = LIST_FIRST(&sc->sc_disks);
542		if (sc->sc_hint == NULL)
543			return (NULL);
544	}
545	disk = sc->sc_hint;
546	if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) {
547		disk = g_mirror_find_next(sc, disk);
548		if (disk == NULL)
549			return (NULL);
550	}
551	sc->sc_hint = g_mirror_find_next(sc, disk);
552	return (disk);
553}
554
555static int
556g_mirror_write_metadata(struct g_mirror_disk *disk,
557    struct g_mirror_metadata *md)
558{
559	struct g_mirror_softc *sc;
560	struct g_consumer *cp;
561	off_t offset, length;
562	u_char *sector;
563	int close = 0, error = 0;
564
565	g_topology_assert();
566
567	sc = disk->d_softc;
568	cp = disk->d_consumer;
569	KASSERT(cp != NULL, ("NULL consumer (%s).", sc->sc_name));
570	KASSERT(cp->provider != NULL, ("NULL provider (%s).", sc->sc_name));
571	length = cp->provider->sectorsize;
572	offset = cp->provider->mediasize - length;
573	sector = malloc((size_t)length, M_MIRROR, M_WAITOK | M_ZERO);
574	/*
575	 * Open consumer if it wasn't opened and remember to close it.
576	 */
577	if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) == 0) {
578		error = g_access(cp, 0, 1, 1);
579		G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d",
580		    cp->provider->name, 0, 1, 1, error);
581		if (error == 0)
582			close = 1;
583#ifdef	INVARIANTS
584	} else {
585		KASSERT(cp->acw > 0 && cp->ace > 0,
586		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
587		    cp->acr, cp->acw, cp->ace));
588#endif
589	}
590	if (error == 0) {
591		if (md != NULL)
592			mirror_metadata_encode(md, sector);
593		g_topology_unlock();
594		error = g_write_data(cp, offset, sector, length);
595		g_topology_lock();
596	}
597	free(sector, M_MIRROR);
598	if (close) {
599		g_access(cp, 0, -1, -1);
600		G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d",
601		    cp->provider->name, 0, -1, -1, 0);
602	}
603	if (error != 0) {
604		disk->d_softc->sc_bump_syncid = G_MIRROR_BUMP_IMMEDIATELY;
605		g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
606		    G_MIRROR_EVENT_DONTWAIT);
607	}
608	return (error);
609}
610
611static int
612g_mirror_clear_metadata(struct g_mirror_disk *disk)
613{
614	int error;
615
616	g_topology_assert();
617	error = g_mirror_write_metadata(disk, NULL);
618	if (error == 0) {
619		G_MIRROR_DEBUG(2, "Metadata on %s cleared.",
620		    g_mirror_get_diskname(disk));
621	} else {
622		G_MIRROR_DEBUG(0,
623		    "Cannot clear metadata on disk %s (error=%d).",
624		    g_mirror_get_diskname(disk), error);
625	}
626	return (error);
627}
628
629void
630g_mirror_fill_metadata(struct g_mirror_softc *sc, struct g_mirror_disk *disk,
631    struct g_mirror_metadata *md)
632{
633
634	strlcpy(md->md_magic, G_MIRROR_MAGIC, sizeof(md->md_magic));
635	md->md_version = G_MIRROR_VERSION;
636	strlcpy(md->md_name, sc->sc_name, sizeof(md->md_name));
637	md->md_mid = sc->sc_id;
638	md->md_all = sc->sc_ndisks;
639	md->md_slice = sc->sc_slice;
640	md->md_balance = sc->sc_balance;
641	md->md_mediasize = sc->sc_mediasize;
642	md->md_sectorsize = sc->sc_sectorsize;
643	md->md_mflags = (sc->sc_flags & G_MIRROR_DEVICE_FLAG_MASK);
644	bzero(md->md_provider, sizeof(md->md_provider));
645	if (disk == NULL) {
646		md->md_did = arc4random();
647		md->md_priority = 0;
648		md->md_syncid = 0;
649		md->md_dflags = 0;
650		md->md_sync_offset = 0;
651	} else {
652		md->md_did = disk->d_id;
653		md->md_priority = disk->d_priority;
654		md->md_syncid = disk->d_sync.ds_syncid;
655		md->md_dflags = (disk->d_flags & G_MIRROR_DISK_FLAG_MASK);
656		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
657			md->md_sync_offset = disk->d_sync.ds_offset_done;
658		else
659			md->md_sync_offset = 0;
660		if ((disk->d_flags & G_MIRROR_DISK_FLAG_HARDCODED) != 0) {
661			strlcpy(md->md_provider,
662			    disk->d_consumer->provider->name,
663			    sizeof(md->md_provider));
664		}
665	}
666}
667
668void
669g_mirror_update_metadata(struct g_mirror_disk *disk)
670{
671	struct g_mirror_metadata md;
672	int error;
673
674	g_topology_assert();
675	g_mirror_fill_metadata(disk->d_softc, disk, &md);
676	error = g_mirror_write_metadata(disk, &md);
677	if (error == 0) {
678		G_MIRROR_DEBUG(2, "Metadata on %s updated.",
679		    g_mirror_get_diskname(disk));
680	} else {
681		G_MIRROR_DEBUG(0,
682		    "Cannot update metadata on disk %s (error=%d).",
683		    g_mirror_get_diskname(disk), error);
684	}
685}
686
687static void
688g_mirror_bump_syncid(struct g_mirror_softc *sc)
689{
690	struct g_mirror_disk *disk;
691
692	g_topology_assert();
693	KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
694	    ("%s called with no active disks (device=%s).", __func__,
695	    sc->sc_name));
696
697	sc->sc_syncid++;
698	G_MIRROR_DEBUG(1, "Device %s: syncid bumped to %u.", sc->sc_name,
699	    sc->sc_syncid);
700	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
701		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
702		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
703			disk->d_sync.ds_syncid = sc->sc_syncid;
704			g_mirror_update_metadata(disk);
705		}
706	}
707}
708
709static void
710g_mirror_idle(struct g_mirror_softc *sc)
711{
712	struct g_mirror_disk *disk;
713
714	if (sc->sc_provider == NULL || sc->sc_provider->acw == 0)
715		return;
716	sc->sc_idle = 1;
717	g_topology_lock();
718	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
719		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
720			continue;
721		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as clean.",
722		    g_mirror_get_diskname(disk), sc->sc_name);
723		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
724		g_mirror_update_metadata(disk);
725	}
726	g_topology_unlock();
727}
728
729static void
730g_mirror_unidle(struct g_mirror_softc *sc)
731{
732	struct g_mirror_disk *disk;
733
734	sc->sc_idle = 0;
735	g_topology_lock();
736	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
737		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
738			continue;
739		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as dirty.",
740		    g_mirror_get_diskname(disk), sc->sc_name);
741		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
742		g_mirror_update_metadata(disk);
743	}
744	g_topology_unlock();
745}
746
747static __inline int
748bintime_cmp(struct bintime *bt1, struct bintime *bt2)
749{
750
751	if (bt1->sec < bt2->sec)
752		return (-1);
753	else if (bt1->sec > bt2->sec)
754		return (1);
755	if (bt1->frac < bt2->frac)
756		return (-1);
757	else if (bt1->frac > bt2->frac)
758		return (1);
759	return (0);
760}
761
762static void
763g_mirror_update_delay(struct g_mirror_disk *disk, struct bio *bp)
764{
765
766	if (disk->d_softc->sc_balance != G_MIRROR_BALANCE_LOAD)
767		return;
768	binuptime(&disk->d_delay);
769	bintime_sub(&disk->d_delay, &bp->bio_t0);
770}
771
772static void
773g_mirror_done(struct bio *bp)
774{
775	struct g_mirror_softc *sc;
776
777	sc = bp->bio_from->geom->softc;
778	bp->bio_cflags |= G_MIRROR_BIO_FLAG_REGULAR;
779	mtx_lock(&sc->sc_queue_mtx);
780	bioq_disksort(&sc->sc_queue, bp);
781	wakeup(sc);
782	mtx_unlock(&sc->sc_queue_mtx);
783}
784
785static void
786g_mirror_regular_request(struct bio *bp)
787{
788	struct g_mirror_softc *sc;
789	struct g_mirror_disk *disk;
790	struct bio *pbp;
791
792	g_topology_assert_not();
793
794	bp->bio_from->index--;
795	pbp = bp->bio_parent;
796	sc = pbp->bio_to->geom->softc;
797	disk = bp->bio_from->private;
798	if (disk == NULL) {
799		g_topology_lock();
800		g_mirror_kill_consumer(sc, bp->bio_from);
801		g_topology_unlock();
802	} else {
803		g_mirror_update_delay(disk, bp);
804	}
805
806	pbp->bio_inbed++;
807	KASSERT(pbp->bio_inbed <= pbp->bio_children,
808	    ("bio_inbed (%u) is bigger than bio_children (%u).", pbp->bio_inbed,
809	    pbp->bio_children));
810	if (bp->bio_error == 0 && pbp->bio_error == 0) {
811		G_MIRROR_LOGREQ(3, bp, "Request delivered.");
812		g_destroy_bio(bp);
813		if (pbp->bio_children == pbp->bio_inbed) {
814			G_MIRROR_LOGREQ(3, pbp, "Request delivered.");
815			pbp->bio_completed = pbp->bio_length;
816			g_io_deliver(pbp, pbp->bio_error);
817		}
818		return;
819	} else if (bp->bio_error != 0) {
820		if (pbp->bio_error == 0)
821			pbp->bio_error = bp->bio_error;
822		G_MIRROR_LOGREQ(0, bp, "Request failed (error=%d).",
823		    bp->bio_error);
824		if (disk != NULL) {
825			sc->sc_bump_syncid = G_MIRROR_BUMP_IMMEDIATELY;
826			g_mirror_event_send(disk,
827			    G_MIRROR_DISK_STATE_DISCONNECTED,
828			    G_MIRROR_EVENT_DONTWAIT);
829		}
830		switch (pbp->bio_cmd) {
831		case BIO_DELETE:
832		case BIO_WRITE:
833			pbp->bio_inbed--;
834			pbp->bio_children--;
835			break;
836		}
837	}
838	g_destroy_bio(bp);
839
840	switch (pbp->bio_cmd) {
841	case BIO_READ:
842		if (pbp->bio_children == pbp->bio_inbed) {
843			pbp->bio_error = 0;
844			mtx_lock(&sc->sc_queue_mtx);
845			bioq_disksort(&sc->sc_queue, pbp);
846			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
847			wakeup(sc);
848			mtx_unlock(&sc->sc_queue_mtx);
849		}
850		break;
851	case BIO_DELETE:
852	case BIO_WRITE:
853		if (pbp->bio_children == 0) {
854			/*
855			 * All requests failed.
856			 */
857		} else if (pbp->bio_inbed < pbp->bio_children) {
858			/* Do nothing. */
859			break;
860		} else if (pbp->bio_children == pbp->bio_inbed) {
861			/* Some requests succeeded. */
862			pbp->bio_error = 0;
863			pbp->bio_completed = pbp->bio_length;
864		}
865		g_io_deliver(pbp, pbp->bio_error);
866		break;
867	default:
868		KASSERT(1 == 0, ("Invalid request: %u.", pbp->bio_cmd));
869		break;
870	}
871}
872
873static void
874g_mirror_sync_done(struct bio *bp)
875{
876	struct g_mirror_softc *sc;
877
878	G_MIRROR_LOGREQ(3, bp, "Synchronization request delivered.");
879	sc = bp->bio_from->geom->softc;
880	bp->bio_cflags |= G_MIRROR_BIO_FLAG_SYNC;
881	mtx_lock(&sc->sc_queue_mtx);
882	bioq_disksort(&sc->sc_queue, bp);
883	wakeup(sc);
884	mtx_unlock(&sc->sc_queue_mtx);
885}
886
887static void
888g_mirror_start(struct bio *bp)
889{
890	struct g_mirror_softc *sc;
891
892	sc = bp->bio_to->geom->softc;
893	/*
894	 * If sc == NULL or there are no valid disks, provider's error
895	 * should be set and g_mirror_start() should not be called at all.
896	 */
897	KASSERT(sc != NULL && sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
898	    ("Provider's error should be set (error=%d)(mirror=%s).",
899	    bp->bio_to->error, bp->bio_to->name));
900	G_MIRROR_LOGREQ(3, bp, "Request received.");
901
902	switch (bp->bio_cmd) {
903	case BIO_READ:
904	case BIO_WRITE:
905	case BIO_DELETE:
906		break;
907	case BIO_GETATTR:
908	default:
909		g_io_deliver(bp, EOPNOTSUPP);
910		return;
911	}
912	mtx_lock(&sc->sc_queue_mtx);
913	bioq_disksort(&sc->sc_queue, bp);
914	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
915	wakeup(sc);
916	mtx_unlock(&sc->sc_queue_mtx);
917}
918
919/*
920 * Send one synchronization request.
921 */
922static void
923g_mirror_sync_one(struct g_mirror_disk *disk)
924{
925	struct g_mirror_softc *sc;
926	struct bio *bp;
927
928	sc = disk->d_softc;
929	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
930	    ("Disk %s is not marked for synchronization.",
931	    g_mirror_get_diskname(disk)));
932
933	bp = g_new_bio();
934	if (bp == NULL)
935		return;
936	bp->bio_parent = NULL;
937	bp->bio_cmd = BIO_READ;
938	bp->bio_offset = disk->d_sync.ds_offset;
939	bp->bio_length = MIN(MAXPHYS, sc->sc_mediasize - bp->bio_offset);
940	bp->bio_cflags = 0;
941	bp->bio_done = g_mirror_sync_done;
942	bp->bio_data = disk->d_sync.ds_data;
943	if (bp->bio_data == NULL) {
944		g_destroy_bio(bp);
945		return;
946	}
947	disk->d_sync.ds_offset += bp->bio_length;
948	bp->bio_to = sc->sc_provider;
949	G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
950	disk->d_sync.ds_consumer->index++;
951	g_io_request(bp, disk->d_sync.ds_consumer);
952}
953
954static void
955g_mirror_sync_request(struct bio *bp)
956{
957	struct g_mirror_softc *sc;
958	struct g_mirror_disk *disk;
959
960	bp->bio_from->index--;
961	sc = bp->bio_from->geom->softc;
962	disk = bp->bio_from->private;
963	if (disk == NULL) {
964		g_topology_lock();
965		g_mirror_kill_consumer(sc, bp->bio_from);
966		g_topology_unlock();
967		g_destroy_bio(bp);
968		return;
969	}
970
971	/*
972	 * Synchronization request.
973	 */
974	switch (bp->bio_cmd) {
975	case BIO_READ:
976	    {
977		struct g_consumer *cp;
978
979		if (bp->bio_error != 0) {
980			G_MIRROR_LOGREQ(0, bp,
981			    "Synchronization request failed (error=%d).",
982			    bp->bio_error);
983			g_destroy_bio(bp);
984			return;
985		}
986		G_MIRROR_LOGREQ(3, bp,
987		    "Synchronization request half-finished.");
988		bp->bio_cmd = BIO_WRITE;
989		bp->bio_cflags = 0;
990		cp = disk->d_consumer;
991		KASSERT(cp->acr == 0 && cp->acw == 1 && cp->ace == 1,
992		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
993		    cp->acr, cp->acw, cp->ace));
994		cp->index++;
995		g_io_request(bp, cp);
996		return;
997	    }
998	case BIO_WRITE:
999	    {
1000		struct g_mirror_disk_sync *sync;
1001
1002		if (bp->bio_error != 0) {
1003			G_MIRROR_LOGREQ(0, bp,
1004			    "Synchronization request failed (error=%d).",
1005			    bp->bio_error);
1006			g_destroy_bio(bp);
1007			sc->sc_bump_syncid = G_MIRROR_BUMP_IMMEDIATELY;
1008			g_mirror_event_send(disk,
1009			    G_MIRROR_DISK_STATE_DISCONNECTED,
1010			    G_MIRROR_EVENT_DONTWAIT);
1011			return;
1012		}
1013		G_MIRROR_LOGREQ(3, bp, "Synchronization request finished.");
1014		sync = &disk->d_sync;
1015		sync->ds_offset_done = bp->bio_offset + bp->bio_length;
1016		g_destroy_bio(bp);
1017		if (sync->ds_resync != -1)
1018			break;
1019		if (sync->ds_offset_done == sc->sc_provider->mediasize) {
1020			/*
1021			 * Disk up-to-date, activate it.
1022			 */
1023			g_mirror_event_send(disk, G_MIRROR_DISK_STATE_ACTIVE,
1024			    G_MIRROR_EVENT_DONTWAIT);
1025			return;
1026		} else if (sync->ds_offset_done % (MAXPHYS * 100) == 0) {
1027			/*
1028			 * Update offset_done on every 100 blocks.
1029			 * XXX: This should be configurable.
1030			 */
1031			g_topology_lock();
1032			g_mirror_update_metadata(disk);
1033			g_topology_unlock();
1034		}
1035		return;
1036	    }
1037	default:
1038		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
1039		    bp->bio_cmd, sc->sc_name));
1040		break;
1041	}
1042}
1043
1044static void
1045g_mirror_request_prefer(struct g_mirror_softc *sc, struct bio *bp)
1046{
1047	struct g_mirror_disk *disk;
1048	struct g_consumer *cp;
1049	struct bio *cbp;
1050
1051	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1052		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE)
1053			break;
1054	}
1055	if (disk == NULL) {
1056		if (bp->bio_error == 0)
1057			bp->bio_error = ENXIO;
1058		g_io_deliver(bp, bp->bio_error);
1059		return;
1060	}
1061	cbp = g_clone_bio(bp);
1062	if (cbp == NULL) {
1063		if (bp->bio_error == 0)
1064			bp->bio_error = ENOMEM;
1065		g_io_deliver(bp, bp->bio_error);
1066		return;
1067	}
1068	/*
1069	 * Fill in the component buf structure.
1070	 */
1071	cp = disk->d_consumer;
1072	cbp->bio_done = g_mirror_done;
1073	cbp->bio_to = cp->provider;
1074	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1075	KASSERT(cp->acr > 0 && cp->ace > 0,
1076	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1077	    cp->acw, cp->ace));
1078	cp->index++;
1079	g_io_request(cbp, cp);
1080}
1081
1082static void
1083g_mirror_request_round_robin(struct g_mirror_softc *sc, struct bio *bp)
1084{
1085	struct g_mirror_disk *disk;
1086	struct g_consumer *cp;
1087	struct bio *cbp;
1088
1089	disk = g_mirror_get_disk(sc);
1090	if (disk == NULL) {
1091		if (bp->bio_error == 0)
1092			bp->bio_error = ENXIO;
1093		g_io_deliver(bp, bp->bio_error);
1094		return;
1095	}
1096	cbp = g_clone_bio(bp);
1097	if (cbp == NULL) {
1098		if (bp->bio_error == 0)
1099			bp->bio_error = ENOMEM;
1100		g_io_deliver(bp, bp->bio_error);
1101		return;
1102	}
1103	/*
1104	 * Fill in the component buf structure.
1105	 */
1106	cp = disk->d_consumer;
1107	cbp->bio_done = g_mirror_done;
1108	cbp->bio_to = cp->provider;
1109	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1110	KASSERT(cp->acr > 0 && cp->ace > 0,
1111	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1112	    cp->acw, cp->ace));
1113	cp->index++;
1114	g_io_request(cbp, cp);
1115}
1116
1117static void
1118g_mirror_request_load(struct g_mirror_softc *sc, struct bio *bp)
1119{
1120	struct g_mirror_disk *disk, *dp;
1121	struct g_consumer *cp;
1122	struct bio *cbp;
1123	struct bintime curtime;
1124
1125	binuptime(&curtime);
1126	/*
1127	 * Find a disk which the smallest load.
1128	 */
1129	disk = NULL;
1130	LIST_FOREACH(dp, &sc->sc_disks, d_next) {
1131		if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1132			continue;
1133		/* If disk wasn't used for more than 2 sec, use it. */
1134		if (curtime.sec - dp->d_last_used.sec >= 2) {
1135			disk = dp;
1136			break;
1137		}
1138		if (disk == NULL ||
1139		    bintime_cmp(&dp->d_delay, &disk->d_delay) < 0) {
1140			disk = dp;
1141		}
1142	}
1143	cbp = g_clone_bio(bp);
1144	if (cbp == NULL) {
1145		if (bp->bio_error == 0)
1146			bp->bio_error = ENOMEM;
1147		g_io_deliver(bp, bp->bio_error);
1148		return;
1149	}
1150	/*
1151	 * Fill in the component buf structure.
1152	 */
1153	cp = disk->d_consumer;
1154	cbp->bio_done = g_mirror_done;
1155	cbp->bio_to = cp->provider;
1156	binuptime(&disk->d_last_used);
1157	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1158	KASSERT(cp->acr > 0 && cp->ace > 0,
1159	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1160	    cp->acw, cp->ace));
1161	cp->index++;
1162	g_io_request(cbp, cp);
1163}
1164
1165static void
1166g_mirror_request_split(struct g_mirror_softc *sc, struct bio *bp)
1167{
1168	struct bio_queue_head queue;
1169	struct g_mirror_disk *disk;
1170	struct g_consumer *cp;
1171	struct bio *cbp;
1172	off_t left, mod, offset, slice;
1173	u_char *data;
1174	u_int ndisks;
1175
1176	if (bp->bio_length <= sc->sc_slice) {
1177		g_mirror_request_round_robin(sc, bp);
1178		return;
1179	}
1180	ndisks = g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE);
1181	slice = bp->bio_length / ndisks;
1182	mod = slice % sc->sc_provider->sectorsize;
1183	if (mod != 0)
1184		slice += sc->sc_provider->sectorsize - mod;
1185	/*
1186	 * Allocate all bios before sending any request, so we can
1187	 * return ENOMEM in nice and clean way.
1188	 */
1189	left = bp->bio_length;
1190	offset = bp->bio_offset;
1191	data = bp->bio_data;
1192	bioq_init(&queue);
1193	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1194		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1195			continue;
1196		cbp = g_clone_bio(bp);
1197		if (cbp == NULL) {
1198			for (cbp = bioq_first(&queue); cbp != NULL;
1199			    cbp = bioq_first(&queue)) {
1200				bioq_remove(&queue, cbp);
1201				g_destroy_bio(cbp);
1202			}
1203			if (bp->bio_error == 0)
1204				bp->bio_error = ENOMEM;
1205			g_io_deliver(bp, bp->bio_error);
1206			return;
1207		}
1208		bioq_insert_tail(&queue, cbp);
1209		cbp->bio_done = g_mirror_done;
1210		cbp->bio_caller1 = disk;
1211		cbp->bio_to = disk->d_consumer->provider;
1212		cbp->bio_offset = offset;
1213		cbp->bio_data = data;
1214		cbp->bio_length = MIN(left, slice);
1215		left -= cbp->bio_length;
1216		if (left == 0)
1217			break;
1218		offset += cbp->bio_length;
1219		data += cbp->bio_length;
1220	}
1221	for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) {
1222		bioq_remove(&queue, cbp);
1223		G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1224		disk = cbp->bio_caller1;
1225		cbp->bio_caller1 = NULL;
1226		cp = disk->d_consumer;
1227		KASSERT(cp->acr > 0 && cp->ace > 0,
1228		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1229		    cp->acr, cp->acw, cp->ace));
1230		disk->d_consumer->index++;
1231		g_io_request(cbp, disk->d_consumer);
1232	}
1233}
1234
1235static void
1236g_mirror_register_request(struct bio *bp)
1237{
1238	struct g_mirror_softc *sc;
1239
1240	sc = bp->bio_to->geom->softc;
1241	switch (bp->bio_cmd) {
1242	case BIO_READ:
1243		switch (sc->sc_balance) {
1244		case G_MIRROR_BALANCE_LOAD:
1245			g_mirror_request_load(sc, bp);
1246			break;
1247		case G_MIRROR_BALANCE_PREFER:
1248			g_mirror_request_prefer(sc, bp);
1249			break;
1250		case G_MIRROR_BALANCE_ROUND_ROBIN:
1251			g_mirror_request_round_robin(sc, bp);
1252			break;
1253		case G_MIRROR_BALANCE_SPLIT:
1254			g_mirror_request_split(sc, bp);
1255			break;
1256		}
1257		return;
1258	case BIO_WRITE:
1259	case BIO_DELETE:
1260	    {
1261		struct g_mirror_disk *disk;
1262		struct g_mirror_disk_sync *sync;
1263		struct bio_queue_head queue;
1264		struct g_consumer *cp;
1265		struct bio *cbp;
1266
1267		if (sc->sc_idle)
1268			g_mirror_unidle(sc);
1269		/*
1270		 * Allocate all bios before sending any request, so we can
1271		 * return ENOMEM in nice and clean way.
1272		 */
1273		bioq_init(&queue);
1274		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1275			sync = &disk->d_sync;
1276			switch (disk->d_state) {
1277			case G_MIRROR_DISK_STATE_ACTIVE:
1278				break;
1279			case G_MIRROR_DISK_STATE_SYNCHRONIZING:
1280				if (bp->bio_offset >= sync->ds_offset)
1281					continue;
1282				else if (bp->bio_offset + bp->bio_length >
1283				    sync->ds_offset_done &&
1284				    (bp->bio_offset < sync->ds_resync ||
1285				     sync->ds_resync == -1)) {
1286					sync->ds_resync = bp->bio_offset -
1287					    (bp->bio_offset % MAXPHYS);
1288				}
1289				break;
1290			default:
1291				continue;
1292			}
1293			cbp = g_clone_bio(bp);
1294			if (cbp == NULL) {
1295				for (cbp = bioq_first(&queue); cbp != NULL;
1296				    cbp = bioq_first(&queue)) {
1297					bioq_remove(&queue, cbp);
1298					g_destroy_bio(cbp);
1299				}
1300				if (bp->bio_error == 0)
1301					bp->bio_error = ENOMEM;
1302				g_io_deliver(bp, bp->bio_error);
1303				return;
1304			}
1305			bioq_insert_tail(&queue, cbp);
1306			cbp->bio_done = g_mirror_done;
1307			cp = disk->d_consumer;
1308			cbp->bio_caller1 = cp;
1309			cbp->bio_to = cp->provider;
1310			KASSERT(cp->acw > 0 && cp->ace > 0,
1311			    ("Consumer %s not opened (r%dw%de%d).",
1312			    cp->provider->name, cp->acr, cp->acw, cp->ace));
1313		}
1314		for (cbp = bioq_first(&queue); cbp != NULL;
1315		    cbp = bioq_first(&queue)) {
1316			bioq_remove(&queue, cbp);
1317			G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1318			cp = cbp->bio_caller1;
1319			cbp->bio_caller1 = NULL;
1320			cp->index++;
1321			g_io_request(cbp, cp);
1322		}
1323		/*
1324		 * Bump syncid on first write.
1325		 */
1326		if (sc->sc_bump_syncid == G_MIRROR_BUMP_ON_FIRST_WRITE) {
1327			sc->sc_bump_syncid = 0;
1328			g_topology_lock();
1329			g_mirror_bump_syncid(sc);
1330			g_topology_unlock();
1331		}
1332		return;
1333	    }
1334	default:
1335		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
1336		    bp->bio_cmd, sc->sc_name));
1337		break;
1338	}
1339}
1340
1341static int
1342g_mirror_can_destroy(struct g_mirror_softc *sc)
1343{
1344	struct g_geom *gp;
1345	struct g_consumer *cp;
1346
1347	g_topology_assert();
1348	gp = sc->sc_geom;
1349	LIST_FOREACH(cp, &gp->consumer, consumer) {
1350		if (g_mirror_is_busy(sc, cp))
1351			return (0);
1352	}
1353	gp = sc->sc_sync.ds_geom;
1354	LIST_FOREACH(cp, &gp->consumer, consumer) {
1355		if (g_mirror_is_busy(sc, cp))
1356			return (0);
1357	}
1358	G_MIRROR_DEBUG(2, "No I/O requests for %s, it can be destroyed.",
1359	    sc->sc_name);
1360	return (1);
1361}
1362
1363static int
1364g_mirror_try_destroy(struct g_mirror_softc *sc)
1365{
1366
1367	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_WAIT) != 0) {
1368		g_topology_lock();
1369		if (!g_mirror_can_destroy(sc)) {
1370			g_topology_unlock();
1371			return (0);
1372		}
1373		g_topology_unlock();
1374		G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1375		    &sc->sc_worker);
1376		wakeup(&sc->sc_worker);
1377		sc->sc_worker = NULL;
1378	} else {
1379		g_topology_lock();
1380		if (!g_mirror_can_destroy(sc)) {
1381			g_topology_unlock();
1382			return (0);
1383		}
1384		g_mirror_destroy_device(sc);
1385		g_topology_unlock();
1386		free(sc, M_MIRROR);
1387	}
1388	return (1);
1389}
1390
1391/*
1392 * Worker thread.
1393 */
1394static void
1395g_mirror_worker(void *arg)
1396{
1397	struct g_mirror_softc *sc;
1398	struct g_mirror_disk *disk;
1399	struct g_mirror_disk_sync *sync;
1400	struct g_mirror_event *ep;
1401	struct bio *bp;
1402	u_int nreqs;
1403
1404	sc = arg;
1405	curthread->td_base_pri = PRIBIO;
1406
1407	nreqs = 0;
1408	for (;;) {
1409		G_MIRROR_DEBUG(5, "%s: Let's see...", __func__);
1410		/*
1411		 * First take a look at events.
1412		 * This is important to handle events before any I/O requests.
1413		 */
1414		ep = g_mirror_event_get(sc);
1415		if (ep != NULL) {
1416			g_topology_lock();
1417			if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0) {
1418				/* Update only device status. */
1419				G_MIRROR_DEBUG(3,
1420				    "Running event for device %s.",
1421				    sc->sc_name);
1422				ep->e_error = 0;
1423				g_mirror_update_device(sc, 1);
1424			} else {
1425				/* Update disk status. */
1426				G_MIRROR_DEBUG(3, "Running event for disk %s.",
1427				     g_mirror_get_diskname(ep->e_disk));
1428				ep->e_error = g_mirror_update_disk(ep->e_disk,
1429				    ep->e_state);
1430				if (ep->e_error == 0)
1431					g_mirror_update_device(sc, 0);
1432			}
1433			g_topology_unlock();
1434			if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0) {
1435				KASSERT(ep->e_error == 0,
1436				    ("Error cannot be handled."));
1437				g_mirror_event_free(ep);
1438			} else {
1439				ep->e_flags |= G_MIRROR_EVENT_DONE;
1440				G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1441				    ep);
1442				mtx_lock(&sc->sc_events_mtx);
1443				wakeup(ep);
1444				mtx_unlock(&sc->sc_events_mtx);
1445			}
1446			if ((sc->sc_flags &
1447			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1448				if (g_mirror_try_destroy(sc))
1449					kthread_exit(0);
1450			}
1451			G_MIRROR_DEBUG(5, "%s: I'm here 1.", __func__);
1452			continue;
1453		}
1454		/*
1455		 * Now I/O requests.
1456		 */
1457		/* Get first request from the queue. */
1458		mtx_lock(&sc->sc_queue_mtx);
1459		bp = bioq_first(&sc->sc_queue);
1460		if (bp == NULL) {
1461			if ((sc->sc_flags &
1462			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1463				mtx_unlock(&sc->sc_queue_mtx);
1464				if (g_mirror_try_destroy(sc))
1465					kthread_exit(0);
1466				mtx_lock(&sc->sc_queue_mtx);
1467			}
1468		}
1469		if (sc->sc_sync.ds_ndisks > 0 &&
1470		    (bp == NULL || nreqs > g_mirror_reqs_per_sync)) {
1471			mtx_unlock(&sc->sc_queue_mtx);
1472			/*
1473			 * It is time for synchronization...
1474			 */
1475			nreqs = 0;
1476			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1477				if (disk->d_state !=
1478				    G_MIRROR_DISK_STATE_SYNCHRONIZING) {
1479					continue;
1480				}
1481				sync = &disk->d_sync;
1482				if (sync->ds_offset >=
1483				    sc->sc_provider->mediasize) {
1484					continue;
1485				}
1486				if (sync->ds_offset > sync->ds_offset_done)
1487					continue;
1488				if (sync->ds_resync != -1) {
1489					sync->ds_offset = sync->ds_resync;
1490					sync->ds_offset_done = sync->ds_resync;
1491					sync->ds_resync = -1;
1492				}
1493				g_mirror_sync_one(disk);
1494			}
1495			G_MIRROR_DEBUG(5, "%s: I'm here 2.", __func__);
1496			goto sleep;
1497		}
1498		if (bp == NULL) {
1499			if (msleep(sc, &sc->sc_queue_mtx, PRIBIO | PDROP,
1500			    "m:w1", hz * 5) == EWOULDBLOCK) {
1501				/*
1502				 * No I/O requests in 5 seconds, so mark
1503				 * components as clean.
1504				 */
1505				if (!sc->sc_idle)
1506					g_mirror_idle(sc);
1507			}
1508			continue;
1509		}
1510		nreqs++;
1511		bioq_remove(&sc->sc_queue, bp);
1512		mtx_unlock(&sc->sc_queue_mtx);
1513
1514		if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_REGULAR) != 0) {
1515			g_mirror_regular_request(bp);
1516		} else if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0) {
1517			u_int timeout, sps;
1518
1519			g_mirror_sync_request(bp);
1520sleep:
1521			sps = atomic_load_acq_int(&g_mirror_syncs_per_sec);
1522			if (sps == 0) {
1523				G_MIRROR_DEBUG(5, "%s: I'm here 5.", __func__);
1524				continue;
1525			}
1526			mtx_lock(&sc->sc_queue_mtx);
1527			if (bioq_first(&sc->sc_queue) != NULL) {
1528				mtx_unlock(&sc->sc_queue_mtx);
1529				G_MIRROR_DEBUG(5, "%s: I'm here 4.", __func__);
1530				continue;
1531			}
1532			timeout = hz / sps;
1533			if (timeout == 0)
1534				timeout = 1;
1535			MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "m:w2",
1536			    timeout);
1537		} else {
1538			g_mirror_register_request(bp);
1539		}
1540		G_MIRROR_DEBUG(5, "%s: I'm here 6.", __func__);
1541	}
1542}
1543
1544/*
1545 * Open disk's consumer if needed.
1546 */
1547static void
1548g_mirror_update_access(struct g_mirror_disk *disk)
1549{
1550	struct g_provider *pp;
1551	struct g_consumer *cp;
1552	int acr, acw, ace, cpw, error;
1553
1554	g_topology_assert();
1555
1556	cp = disk->d_consumer;
1557	pp = disk->d_softc->sc_provider;
1558	if (pp == NULL) {
1559		acr = -cp->acr;
1560		acw = -cp->acw;
1561		ace = -cp->ace;
1562	} else {
1563		acr = pp->acr - cp->acr;
1564		acw = pp->acw - cp->acw;
1565		ace = pp->ace - cp->ace;
1566		/* Grab an extra "exclusive" bit. */
1567		if (pp->acr > 0 || pp->acw > 0 || pp->ace > 0)
1568			ace++;
1569	}
1570	if (acr == 0 && acw == 0 && ace == 0)
1571		return;
1572	cpw = cp->acw;
1573	error = g_access(cp, acr, acw, ace);
1574	G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", cp->provider->name, acr,
1575	    acw, ace, error);
1576	if (error != 0) {
1577		disk->d_softc->sc_bump_syncid = G_MIRROR_BUMP_ON_FIRST_WRITE;
1578		g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
1579		    G_MIRROR_EVENT_DONTWAIT);
1580		return;
1581	}
1582	if (cpw == 0 && cp->acw > 0) {
1583		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as dirty.",
1584		    g_mirror_get_diskname(disk), disk->d_softc->sc_name);
1585		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
1586	} else if (cpw > 0 && cp->acw == 0) {
1587		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as clean.",
1588		    g_mirror_get_diskname(disk), disk->d_softc->sc_name);
1589		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
1590	}
1591}
1592
1593static void
1594g_mirror_sync_start(struct g_mirror_disk *disk)
1595{
1596	struct g_mirror_softc *sc;
1597	struct g_consumer *cp;
1598	int error;
1599
1600	g_topology_assert();
1601
1602	sc = disk->d_softc;
1603	KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
1604	    ("Device not in RUNNING state (%s, %u).", sc->sc_name,
1605	    sc->sc_state));
1606	cp = disk->d_consumer;
1607	KASSERT(cp->acr == 0 && cp->acw == 0 && cp->ace == 0,
1608	    ("Consumer %s already opened.", cp->provider->name));
1609
1610	G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s.", sc->sc_name,
1611	    g_mirror_get_diskname(disk));
1612	error = g_access(cp, 0, 1, 1);
1613	G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", cp->provider->name, 0, 1,
1614	    1, error);
1615	if (error != 0) {
1616		g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
1617		    G_MIRROR_EVENT_DONTWAIT);
1618		return;
1619	}
1620	disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
1621	KASSERT(disk->d_sync.ds_consumer == NULL,
1622	    ("Sync consumer already exists (device=%s, disk=%s).",
1623	    sc->sc_name, g_mirror_get_diskname(disk)));
1624	disk->d_sync.ds_consumer = g_new_consumer(sc->sc_sync.ds_geom);
1625	disk->d_sync.ds_consumer->private = disk;
1626	disk->d_sync.ds_consumer->index = 0;
1627	error = g_attach(disk->d_sync.ds_consumer, disk->d_softc->sc_provider);
1628	KASSERT(error == 0, ("Cannot attach to %s (error=%d).",
1629	    disk->d_softc->sc_name, error));
1630	error = g_access(disk->d_sync.ds_consumer, 1, 0, 0);
1631	KASSERT(error == 0, ("Cannot open %s (error=%d).",
1632	    disk->d_softc->sc_name, error));
1633	disk->d_sync.ds_data = malloc(MAXPHYS, M_MIRROR, M_WAITOK);
1634	sc->sc_sync.ds_ndisks++;
1635}
1636
1637/*
1638 * Stop synchronization process.
1639 * type: 0 - synchronization finished
1640 *       1 - synchronization stopped
1641 */
1642static void
1643g_mirror_sync_stop(struct g_mirror_disk *disk, int type)
1644{
1645	struct g_consumer *cp;
1646
1647	g_topology_assert();
1648	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
1649	    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
1650	    g_mirror_disk_state2str(disk->d_state)));
1651	if (disk->d_sync.ds_consumer == NULL)
1652		return;
1653
1654	if (type == 0) {
1655		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s finished.",
1656		    disk->d_softc->sc_name, g_mirror_get_diskname(disk));
1657	} else /* if (type == 1) */ {
1658		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s stopped.",
1659		    disk->d_softc->sc_name, g_mirror_get_diskname(disk));
1660	}
1661	cp = disk->d_sync.ds_consumer;
1662	g_access(cp, -1, 0, 0);
1663	g_mirror_kill_consumer(disk->d_softc, cp);
1664	free(disk->d_sync.ds_data, M_MIRROR);
1665	disk->d_sync.ds_consumer = NULL;
1666	disk->d_softc->sc_sync.ds_ndisks--;
1667	cp = disk->d_consumer;
1668	KASSERT(cp->acr == 0 && cp->acw == 1 && cp->ace == 1,
1669	    ("Consumer %s not opened.", cp->provider->name));
1670	g_access(cp, 0, -1, -1);
1671	G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", cp->provider->name, 0, -1,
1672	    -1, 0);
1673	disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
1674}
1675
1676static void
1677g_mirror_launch_provider(struct g_mirror_softc *sc)
1678{
1679	struct g_mirror_disk *disk;
1680	struct g_provider *pp;
1681
1682	g_topology_assert();
1683
1684	pp = g_new_providerf(sc->sc_geom, "mirror/%s", sc->sc_name);
1685	pp->mediasize = sc->sc_mediasize;
1686	pp->sectorsize = sc->sc_sectorsize;
1687	sc->sc_provider = pp;
1688	g_error_provider(pp, 0);
1689	G_MIRROR_DEBUG(0, "Device %s: provider %s launched.", sc->sc_name,
1690	    pp->name);
1691	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1692		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
1693			g_mirror_sync_start(disk);
1694	}
1695}
1696
1697static void
1698g_mirror_destroy_provider(struct g_mirror_softc *sc)
1699{
1700	struct g_mirror_disk *disk;
1701	struct bio *bp;
1702
1703	g_topology_assert();
1704	KASSERT(sc->sc_provider != NULL, ("NULL provider (device=%s).",
1705	    sc->sc_name));
1706
1707	g_error_provider(sc->sc_provider, ENXIO);
1708	mtx_lock(&sc->sc_queue_mtx);
1709	while ((bp = bioq_first(&sc->sc_queue)) != NULL) {
1710		bioq_remove(&sc->sc_queue, bp);
1711		g_io_deliver(bp, ENXIO);
1712	}
1713	mtx_unlock(&sc->sc_queue_mtx);
1714	G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.", sc->sc_name,
1715	    sc->sc_provider->name);
1716	sc->sc_provider->flags |= G_PF_WITHER;
1717	g_orphan_provider(sc->sc_provider, ENXIO);
1718	sc->sc_provider = NULL;
1719	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1720		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
1721			g_mirror_sync_stop(disk, 1);
1722	}
1723}
1724
1725static void
1726g_mirror_go(void *arg)
1727{
1728	struct g_mirror_softc *sc;
1729
1730	sc = arg;
1731	G_MIRROR_DEBUG(0, "Force device %s start due to timeout.", sc->sc_name);
1732	g_mirror_event_send(sc, 0,
1733	    G_MIRROR_EVENT_DONTWAIT | G_MIRROR_EVENT_DEVICE);
1734}
1735
1736static u_int
1737g_mirror_determine_state(struct g_mirror_disk *disk)
1738{
1739	struct g_mirror_softc *sc;
1740	u_int state;
1741
1742	sc = disk->d_softc;
1743	if (sc->sc_syncid == disk->d_sync.ds_syncid) {
1744		if ((disk->d_flags &
1745		    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) {
1746			/* Disk does not need synchronization. */
1747			state = G_MIRROR_DISK_STATE_ACTIVE;
1748		} else {
1749			if ((sc->sc_flags &
1750			     G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0  ||
1751			    (disk->d_flags &
1752			     G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
1753				/*
1754				 * We can start synchronization from
1755				 * the stored offset.
1756				 */
1757				state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
1758			} else {
1759				state = G_MIRROR_DISK_STATE_STALE;
1760			}
1761		}
1762	} else if (disk->d_sync.ds_syncid < sc->sc_syncid) {
1763		/*
1764		 * Reset all synchronization data for this disk,
1765		 * because if it even was synchronized, it was
1766		 * synchronized to disks with different syncid.
1767		 */
1768		disk->d_flags |= G_MIRROR_DISK_FLAG_SYNCHRONIZING;
1769		disk->d_sync.ds_offset = 0;
1770		disk->d_sync.ds_offset_done = 0;
1771		disk->d_sync.ds_syncid = sc->sc_syncid;
1772		if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
1773		    (disk->d_flags & G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
1774			state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
1775		} else {
1776			state = G_MIRROR_DISK_STATE_STALE;
1777		}
1778	} else /* if (sc->sc_syncid < disk->d_sync.ds_syncid) */ {
1779		/*
1780		 * Not good, NOT GOOD!
1781		 * It means that mirror was started on stale disks
1782		 * and more fresh disk just arrive.
1783		 * If there were writes, mirror is fucked up, sorry.
1784		 * I think the best choice here is don't touch
1785		 * this disk and inform the user laudly.
1786		 */
1787		G_MIRROR_DEBUG(0, "Device %s was started before the freshest "
1788		    "disk (%s) arrives!! It will not be connected to the "
1789		    "running device.", sc->sc_name,
1790		    g_mirror_get_diskname(disk));
1791		g_mirror_destroy_disk(disk);
1792		state = G_MIRROR_DISK_STATE_NONE;
1793		/* Return immediately, because disk was destroyed. */
1794		return (state);
1795	}
1796	G_MIRROR_DEBUG(3, "State for %s disk: %s.",
1797	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(state));
1798	return (state);
1799}
1800
1801/*
1802 * Update device state.
1803 */
1804static void
1805g_mirror_update_device(struct g_mirror_softc *sc, boolean_t force)
1806{
1807	struct g_mirror_disk *disk;
1808	u_int state;
1809
1810	g_topology_assert();
1811
1812	switch (sc->sc_state) {
1813	case G_MIRROR_DEVICE_STATE_STARTING:
1814	    {
1815		struct g_mirror_disk *pdisk;
1816		u_int dirty, ndisks, syncid;
1817
1818		KASSERT(sc->sc_provider == NULL,
1819		    ("Non-NULL provider in STARTING state (%s).", sc->sc_name));
1820		/*
1821		 * Are we ready? We are, if all disks are connected or
1822		 * if we have any disks and 'force' is true.
1823		 */
1824		if ((force && g_mirror_ndisks(sc, -1) > 0) ||
1825		    sc->sc_ndisks == g_mirror_ndisks(sc, -1)) {
1826			;
1827		} else if (g_mirror_ndisks(sc, -1) == 0) {
1828			/*
1829			 * Disks went down in starting phase, so destroy
1830			 * device.
1831			 */
1832			callout_drain(&sc->sc_callout);
1833			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
1834			return;
1835		} else {
1836			return;
1837		}
1838
1839		/*
1840		 * Activate all disks with the biggest syncid.
1841		 */
1842		if (force) {
1843			/*
1844			 * If 'force' is true, we have been called due to
1845			 * timeout, so don't bother canceling timeout.
1846			 */
1847			ndisks = 0;
1848			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1849				if ((disk->d_flags &
1850				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) {
1851					ndisks++;
1852				}
1853			}
1854			if (ndisks == 0) {
1855				/* No valid disks found, destroy device. */
1856				sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
1857				return;
1858			}
1859		} else {
1860			/* Cancel timeout. */
1861			callout_drain(&sc->sc_callout);
1862		}
1863
1864		/*
1865		 * Find disk with the biggest syncid.
1866		 */
1867		syncid = 0;
1868		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1869			if (disk->d_sync.ds_syncid > syncid)
1870				syncid = disk->d_sync.ds_syncid;
1871		}
1872
1873		/*
1874		 * Here we need to look for dirty disks and if all disks
1875		 * with the biggest syncid are dirty, we have to choose
1876		 * one with the biggest priority and rebuild the rest.
1877		 */
1878		/*
1879		 * Find the number of dirty disks with the biggest syncid.
1880		 * Find the number of disks with the biggest syncid.
1881		 * While here, find a disk with the biggest priority.
1882		 */
1883		dirty = ndisks = 0;
1884		pdisk = NULL;
1885		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1886			if (disk->d_sync.ds_syncid != syncid)
1887				continue;
1888			if ((disk->d_flags &
1889			    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
1890				continue;
1891			}
1892			ndisks++;
1893			if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
1894				dirty++;
1895				if (pdisk == NULL ||
1896				    pdisk->d_priority < disk->d_priority) {
1897					pdisk = disk;
1898				}
1899			}
1900		}
1901		if (dirty == 0) {
1902			/* No dirty disks at all, great. */
1903		} else if (dirty == ndisks) {
1904			/*
1905			 * Force synchronization for all dirty disks except one
1906			 * with the biggest priority.
1907			 */
1908			KASSERT(pdisk != NULL, ("pdisk == NULL"));
1909			G_MIRROR_DEBUG(1, "Using disk %s (device %s) as a "
1910			    "master disk for synchronization.",
1911			    g_mirror_get_diskname(pdisk), sc->sc_name);
1912			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1913				if (disk->d_sync.ds_syncid != syncid)
1914					continue;
1915				if ((disk->d_flags &
1916				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
1917					continue;
1918				}
1919				KASSERT((disk->d_flags &
1920				    G_MIRROR_DISK_FLAG_DIRTY) != 0,
1921				    ("Disk %s isn't marked as dirty.",
1922				    g_mirror_get_diskname(disk)));
1923				/* Skip the disk with the biggest priority. */
1924				if (disk == pdisk)
1925					continue;
1926				disk->d_sync.ds_syncid = 0;
1927			}
1928		} else if (dirty < ndisks) {
1929			/*
1930			 * Force synchronization for all dirty disks.
1931			 * We have some non-dirty disks.
1932			 */
1933			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1934				if (disk->d_sync.ds_syncid != syncid)
1935					continue;
1936				if ((disk->d_flags &
1937				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
1938					continue;
1939				}
1940				if ((disk->d_flags &
1941				    G_MIRROR_DISK_FLAG_DIRTY) == 0) {
1942					continue;
1943				}
1944				disk->d_sync.ds_syncid = 0;
1945			}
1946		}
1947
1948		/* Reset hint. */
1949		sc->sc_hint = NULL;
1950		sc->sc_syncid = syncid;
1951		if (force) {
1952			/* Remember to bump syncid on first write. */
1953			sc->sc_bump_syncid = G_MIRROR_BUMP_ON_FIRST_WRITE;
1954		}
1955		state = G_MIRROR_DEVICE_STATE_RUNNING;
1956		G_MIRROR_DEBUG(1, "Device %s state changed from %s to %s.",
1957		    sc->sc_name, g_mirror_device_state2str(sc->sc_state),
1958		    g_mirror_device_state2str(state));
1959		sc->sc_state = state;
1960		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1961			state = g_mirror_determine_state(disk);
1962			g_mirror_event_send(disk, state,
1963			    G_MIRROR_EVENT_DONTWAIT);
1964			if (state == G_MIRROR_DISK_STATE_STALE) {
1965				sc->sc_bump_syncid =
1966				    G_MIRROR_BUMP_ON_FIRST_WRITE;
1967			}
1968		}
1969		wakeup(&g_mirror_class);
1970		break;
1971	    }
1972	case G_MIRROR_DEVICE_STATE_RUNNING:
1973		if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 0 &&
1974		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
1975			/*
1976			 * No active disks or no disks at all,
1977			 * so destroy device.
1978			 */
1979			if (sc->sc_provider != NULL)
1980				g_mirror_destroy_provider(sc);
1981			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
1982			break;
1983		} else if (g_mirror_ndisks(sc,
1984		    G_MIRROR_DISK_STATE_ACTIVE) > 0 &&
1985		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
1986			/*
1987			 * We have active disks, launch provider if it doesn't
1988			 * exist.
1989			 */
1990			if (sc->sc_provider == NULL)
1991				g_mirror_launch_provider(sc);
1992		}
1993		/*
1994		 * Bump syncid here, if we need to do it immediately.
1995		 */
1996		if (sc->sc_bump_syncid == G_MIRROR_BUMP_IMMEDIATELY) {
1997			sc->sc_bump_syncid = 0;
1998			g_mirror_bump_syncid(sc);
1999		}
2000		break;
2001	default:
2002		KASSERT(1 == 0, ("Wrong device state (%s, %s).",
2003		    sc->sc_name, g_mirror_device_state2str(sc->sc_state)));
2004		break;
2005	}
2006}
2007
2008/*
2009 * Update disk state and device state if needed.
2010 */
2011#define	DISK_STATE_CHANGED()	G_MIRROR_DEBUG(1,			\
2012	"Disk %s state changed from %s to %s (device %s).",		\
2013	g_mirror_get_diskname(disk),					\
2014	g_mirror_disk_state2str(disk->d_state),				\
2015	g_mirror_disk_state2str(state), sc->sc_name)
2016static int
2017g_mirror_update_disk(struct g_mirror_disk *disk, u_int state)
2018{
2019	struct g_mirror_softc *sc;
2020
2021	g_topology_assert();
2022
2023	sc = disk->d_softc;
2024again:
2025	G_MIRROR_DEBUG(3, "Changing disk %s state from %s to %s.",
2026	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(disk->d_state),
2027	    g_mirror_disk_state2str(state));
2028	switch (state) {
2029	case G_MIRROR_DISK_STATE_NEW:
2030		/*
2031		 * Possible scenarios:
2032		 * 1. New disk arrive.
2033		 */
2034		/* Previous state should be NONE. */
2035		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NONE,
2036		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2037		    g_mirror_disk_state2str(disk->d_state)));
2038		DISK_STATE_CHANGED();
2039
2040		disk->d_state = state;
2041		if (LIST_EMPTY(&sc->sc_disks))
2042			LIST_INSERT_HEAD(&sc->sc_disks, disk, d_next);
2043		else {
2044			struct g_mirror_disk *dp;
2045
2046			LIST_FOREACH(dp, &sc->sc_disks, d_next) {
2047				if (disk->d_priority >= dp->d_priority) {
2048					LIST_INSERT_BEFORE(dp, disk, d_next);
2049					dp = NULL;
2050					break;
2051				}
2052				if (LIST_NEXT(dp, d_next) == NULL)
2053					break;
2054			}
2055			if (dp != NULL)
2056				LIST_INSERT_AFTER(dp, disk, d_next);
2057		}
2058		G_MIRROR_DEBUG(0, "Device %s: provider %s detected.",
2059		    sc->sc_name, g_mirror_get_diskname(disk));
2060		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
2061			break;
2062		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2063		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2064		    g_mirror_device_state2str(sc->sc_state),
2065		    g_mirror_get_diskname(disk),
2066		    g_mirror_disk_state2str(disk->d_state)));
2067		state = g_mirror_determine_state(disk);
2068		if (state != G_MIRROR_DISK_STATE_NONE)
2069			goto again;
2070		break;
2071	case G_MIRROR_DISK_STATE_ACTIVE:
2072		/*
2073		 * Possible scenarios:
2074		 * 1. New disk does not need synchronization.
2075		 * 2. Synchronization process finished successfully.
2076		 */
2077		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2078		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2079		    g_mirror_device_state2str(sc->sc_state),
2080		    g_mirror_get_diskname(disk),
2081		    g_mirror_disk_state2str(disk->d_state)));
2082		/* Previous state should be NEW or SYNCHRONIZING. */
2083		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW ||
2084		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2085		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2086		    g_mirror_disk_state2str(disk->d_state)));
2087		DISK_STATE_CHANGED();
2088
2089		if (disk->d_state == G_MIRROR_DISK_STATE_NEW)
2090			disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2091		else if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
2092			disk->d_flags &= ~G_MIRROR_DISK_FLAG_SYNCHRONIZING;
2093			disk->d_flags &= ~G_MIRROR_DISK_FLAG_FORCE_SYNC;
2094			g_mirror_sync_stop(disk, 0);
2095		}
2096		disk->d_state = state;
2097		disk->d_sync.ds_offset = 0;
2098		disk->d_sync.ds_offset_done = 0;
2099		g_mirror_update_access(disk);
2100		g_mirror_update_metadata(disk);
2101		G_MIRROR_DEBUG(0, "Device %s: provider %s activated.",
2102		    sc->sc_name, g_mirror_get_diskname(disk));
2103		break;
2104	case G_MIRROR_DISK_STATE_STALE:
2105		/*
2106		 * Possible scenarios:
2107		 * 1. Stale disk was connected.
2108		 */
2109		/* Previous state should be NEW. */
2110		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2111		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2112		    g_mirror_disk_state2str(disk->d_state)));
2113		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2114		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2115		    g_mirror_device_state2str(sc->sc_state),
2116		    g_mirror_get_diskname(disk),
2117		    g_mirror_disk_state2str(disk->d_state)));
2118		/*
2119		 * STALE state is only possible if device is marked
2120		 * NOAUTOSYNC.
2121		 */
2122		KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) != 0,
2123		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2124		    g_mirror_device_state2str(sc->sc_state),
2125		    g_mirror_get_diskname(disk),
2126		    g_mirror_disk_state2str(disk->d_state)));
2127		DISK_STATE_CHANGED();
2128
2129		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2130		disk->d_state = state;
2131		g_mirror_update_metadata(disk);
2132		G_MIRROR_DEBUG(0, "Device %s: provider %s is stale.",
2133		    sc->sc_name, g_mirror_get_diskname(disk));
2134		break;
2135	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
2136		/*
2137		 * Possible scenarios:
2138		 * 1. Disk which needs synchronization was connected.
2139		 */
2140		/* Previous state should be NEW. */
2141		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2142		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2143		    g_mirror_disk_state2str(disk->d_state)));
2144		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2145		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2146		    g_mirror_device_state2str(sc->sc_state),
2147		    g_mirror_get_diskname(disk),
2148		    g_mirror_disk_state2str(disk->d_state)));
2149		DISK_STATE_CHANGED();
2150
2151		if (disk->d_state == G_MIRROR_DISK_STATE_NEW)
2152			disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2153		disk->d_state = state;
2154		if (sc->sc_provider != NULL) {
2155			g_mirror_sync_start(disk);
2156			g_mirror_update_metadata(disk);
2157		}
2158		break;
2159	case G_MIRROR_DISK_STATE_DISCONNECTED:
2160		/*
2161		 * Possible scenarios:
2162		 * 1. Device wasn't running yet, but disk disappear.
2163		 * 2. Disk was active and disapppear.
2164		 * 3. Disk disappear during synchronization process.
2165		 */
2166		if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING) {
2167			/*
2168			 * Previous state should be ACTIVE, STALE or
2169			 * SYNCHRONIZING.
2170			 */
2171			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
2172			    disk->d_state == G_MIRROR_DISK_STATE_STALE ||
2173			    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2174			    ("Wrong disk state (%s, %s).",
2175			    g_mirror_get_diskname(disk),
2176			    g_mirror_disk_state2str(disk->d_state)));
2177		} else if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING) {
2178			/* Previous state should be NEW. */
2179			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2180			    ("Wrong disk state (%s, %s).",
2181			    g_mirror_get_diskname(disk),
2182			    g_mirror_disk_state2str(disk->d_state)));
2183			/*
2184			 * Reset bumping syncid if disk disappeared in STARTING
2185			 * state.
2186			 */
2187			if (sc->sc_bump_syncid == G_MIRROR_BUMP_ON_FIRST_WRITE)
2188				sc->sc_bump_syncid = 0;
2189#ifdef	INVARIANTS
2190		} else {
2191			KASSERT(1 == 0, ("Wrong device state (%s, %s, %s, %s).",
2192			    sc->sc_name,
2193			    g_mirror_device_state2str(sc->sc_state),
2194			    g_mirror_get_diskname(disk),
2195			    g_mirror_disk_state2str(disk->d_state)));
2196#endif
2197		}
2198		DISK_STATE_CHANGED();
2199		G_MIRROR_DEBUG(0, "Device %s: provider %s disconnected.",
2200		    sc->sc_name, g_mirror_get_diskname(disk));
2201
2202		g_mirror_destroy_disk(disk);
2203		break;
2204	case G_MIRROR_DISK_STATE_DESTROY:
2205	    {
2206		int error;
2207
2208		error = g_mirror_clear_metadata(disk);
2209		if (error != 0)
2210			return (error);
2211		DISK_STATE_CHANGED();
2212		G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.",
2213		    sc->sc_name, g_mirror_get_diskname(disk));
2214
2215		g_mirror_destroy_disk(disk);
2216		sc->sc_ndisks--;
2217		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2218			g_mirror_update_metadata(disk);
2219		}
2220		break;
2221	    }
2222	default:
2223		KASSERT(1 == 0, ("Unknown state (%u).", state));
2224		break;
2225	}
2226	return (0);
2227}
2228#undef	DISK_STATE_CHANGED
2229
2230static int
2231g_mirror_read_metadata(struct g_consumer *cp, struct g_mirror_metadata *md)
2232{
2233	struct g_provider *pp;
2234	u_char *buf;
2235	int error;
2236
2237	g_topology_assert();
2238
2239	error = g_access(cp, 1, 0, 0);
2240	if (error != 0)
2241		return (error);
2242	pp = cp->provider;
2243	g_topology_unlock();
2244	/* Metadata are stored on last sector. */
2245	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
2246	    &error);
2247	g_topology_lock();
2248	if (buf == NULL) {
2249		g_access(cp, -1, 0, 0);
2250		return (error);
2251	}
2252	if (error != 0) {
2253		g_access(cp, -1, 0, 0);
2254		g_free(buf);
2255		return (error);
2256	}
2257	error = g_access(cp, -1, 0, 0);
2258	KASSERT(error == 0, ("Cannot decrease access count for %s.", pp->name));
2259
2260	/* Decode metadata. */
2261	error = mirror_metadata_decode(buf, md);
2262	g_free(buf);
2263	if (strcmp(md->md_magic, G_MIRROR_MAGIC) != 0)
2264		return (EINVAL);
2265	if (error != 0) {
2266		G_MIRROR_DEBUG(1, "MD5 metadata hash mismatch for provider %s.",
2267		    cp->provider->name);
2268		return (error);
2269	}
2270
2271	return (0);
2272}
2273
2274static int
2275g_mirror_check_metadata(struct g_mirror_softc *sc, struct g_provider *pp,
2276    struct g_mirror_metadata *md)
2277{
2278
2279	if (g_mirror_id2disk(sc, md->md_did) != NULL) {
2280		G_MIRROR_DEBUG(1, "Disk %s (id=%u) already exists, skipping.",
2281		    pp->name, md->md_did);
2282		return (EEXIST);
2283	}
2284	if (md->md_all != sc->sc_ndisks) {
2285		G_MIRROR_DEBUG(1,
2286		    "Invalid '%s' field on disk %s (device %s), skipping.",
2287		    "md_all", pp->name, sc->sc_name);
2288		return (EINVAL);
2289	}
2290	if (md->md_slice != sc->sc_slice) {
2291		G_MIRROR_DEBUG(1,
2292		    "Invalid '%s' field on disk %s (device %s), skipping.",
2293		    "md_slice", pp->name, sc->sc_name);
2294		return (EINVAL);
2295	}
2296	if (md->md_balance != sc->sc_balance) {
2297		G_MIRROR_DEBUG(1,
2298		    "Invalid '%s' field on disk %s (device %s), skipping.",
2299		    "md_balance", pp->name, sc->sc_name);
2300		return (EINVAL);
2301	}
2302	if (md->md_mediasize != sc->sc_mediasize) {
2303		G_MIRROR_DEBUG(1,
2304		    "Invalid '%s' field on disk %s (device %s), skipping.",
2305		    "md_mediasize", pp->name, sc->sc_name);
2306		return (EINVAL);
2307	}
2308	if (sc->sc_mediasize > pp->mediasize) {
2309		G_MIRROR_DEBUG(1,
2310		    "Invalid size of disk %s (device %s), skipping.", pp->name,
2311		    sc->sc_name);
2312		return (EINVAL);
2313	}
2314	if (md->md_sectorsize != sc->sc_sectorsize) {
2315		G_MIRROR_DEBUG(1,
2316		    "Invalid '%s' field on disk %s (device %s), skipping.",
2317		    "md_sectorsize", pp->name, sc->sc_name);
2318		return (EINVAL);
2319	}
2320	if ((sc->sc_sectorsize % pp->sectorsize) != 0) {
2321		G_MIRROR_DEBUG(1,
2322		    "Invalid sector size of disk %s (device %s), skipping.",
2323		    pp->name, sc->sc_name);
2324		return (EINVAL);
2325	}
2326	if ((md->md_mflags & ~G_MIRROR_DEVICE_FLAG_MASK) != 0) {
2327		G_MIRROR_DEBUG(1,
2328		    "Invalid device flags on disk %s (device %s), skipping.",
2329		    pp->name, sc->sc_name);
2330		return (EINVAL);
2331	}
2332	if ((md->md_dflags & ~G_MIRROR_DISK_FLAG_MASK) != 0) {
2333		G_MIRROR_DEBUG(1,
2334		    "Invalid disk flags on disk %s (device %s), skipping.",
2335		    pp->name, sc->sc_name);
2336		return (EINVAL);
2337	}
2338	return (0);
2339}
2340
2341static int
2342g_mirror_add_disk(struct g_mirror_softc *sc, struct g_provider *pp,
2343    struct g_mirror_metadata *md)
2344{
2345	struct g_mirror_disk *disk;
2346	int error;
2347
2348	g_topology_assert();
2349	G_MIRROR_DEBUG(2, "Adding disk %s.", pp->name);
2350
2351	error = g_mirror_check_metadata(sc, pp, md);
2352	if (error != 0)
2353		return (error);
2354	disk = g_mirror_init_disk(sc, pp, md, &error);
2355	if (disk == NULL)
2356		return (error);
2357	error = g_mirror_event_send(disk, G_MIRROR_DISK_STATE_NEW,
2358	    G_MIRROR_EVENT_WAIT);
2359	return (error);
2360}
2361
2362static int
2363g_mirror_access(struct g_provider *pp, int acr, int acw, int ace)
2364{
2365	struct g_mirror_softc *sc;
2366	struct g_mirror_disk *disk;
2367	int dcr, dcw, dce, err, error;
2368
2369	g_topology_assert();
2370	G_MIRROR_DEBUG(2, "Access request for %s: r%dw%de%d.", pp->name, acr,
2371	    acw, ace);
2372
2373	dcr = pp->acr + acr;
2374	dcw = pp->acw + acw;
2375	dce = pp->ace + ace;
2376
2377	/* On first open, grab an extra "exclusive" bit */
2378	if (pp->acr == 0 && pp->acw == 0 && pp->ace == 0)
2379		ace++;
2380	/* ... and let go of it on last close */
2381	if (dcr == 0 && dcw == 0 && dce == 0)
2382		ace--;
2383
2384	sc = pp->geom->softc;
2385	if (sc == NULL || LIST_EMPTY(&sc->sc_disks)) {
2386		if (acr <= 0 && acw <= 0 && ace <= 0)
2387			return (0);
2388		else
2389			return (ENXIO);
2390	}
2391	error = ENXIO;
2392	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2393		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
2394			continue;
2395		err = g_access(disk->d_consumer, acr, acw, ace);
2396		G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d",
2397		    g_mirror_get_diskname(disk), acr, acw, ace, err);
2398		if (err == 0) {
2399			/*
2400			 * Mark disk as dirty on open and unmark on close.
2401			 */
2402			if (pp->acw == 0 && dcw > 0) {
2403				G_MIRROR_DEBUG(1,
2404				    "Disk %s (device %s) marked as dirty.",
2405				    g_mirror_get_diskname(disk), sc->sc_name);
2406				disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
2407				g_mirror_update_metadata(disk);
2408			} else if (pp->acw > 0 && dcw == 0) {
2409				G_MIRROR_DEBUG(1,
2410				    "Disk %s (device %s) marked as clean.",
2411				    g_mirror_get_diskname(disk), sc->sc_name);
2412				disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2413				g_mirror_update_metadata(disk);
2414			}
2415			error = 0;
2416		} else {
2417			sc->sc_bump_syncid = G_MIRROR_BUMP_ON_FIRST_WRITE;
2418			g_mirror_event_send(disk,
2419			    G_MIRROR_DISK_STATE_DISCONNECTED,
2420			    G_MIRROR_EVENT_DONTWAIT);
2421		}
2422	}
2423	/*
2424	 * Be sure to return 0 for negativate access requests.
2425	 * In case of some HW problems, it is possible that we don't have
2426	 * any active disk here, so loop above will be no-op and error will
2427	 * be ENXIO.
2428	 */
2429	if (error != 0 && acr <= 0 && acw <= 0 && ace <= 0)
2430		error = 0;
2431	return (error);
2432}
2433
2434static struct g_geom *
2435g_mirror_create(struct g_class *mp, const struct g_mirror_metadata *md)
2436{
2437	struct g_mirror_softc *sc;
2438	struct g_geom *gp;
2439	int error, timeout;
2440
2441	g_topology_assert();
2442	G_MIRROR_DEBUG(1, "Creating device %s (id=%u).", md->md_name,
2443	    md->md_mid);
2444
2445	/* One disk is minimum. */
2446	if (md->md_all < 1)
2447		return (NULL);
2448	/*
2449	 * Action geom.
2450	 */
2451	gp = g_new_geomf(mp, "%s", md->md_name);
2452	sc = malloc(sizeof(*sc), M_MIRROR, M_WAITOK | M_ZERO);
2453	gp->start = g_mirror_start;
2454	gp->spoiled = g_mirror_spoiled;
2455	gp->orphan = g_mirror_orphan;
2456	gp->access = g_mirror_access;
2457	gp->dumpconf = g_mirror_dumpconf;
2458
2459	sc->sc_id = md->md_mid;
2460	sc->sc_slice = md->md_slice;
2461	sc->sc_balance = md->md_balance;
2462	sc->sc_mediasize = md->md_mediasize;
2463	sc->sc_sectorsize = md->md_sectorsize;
2464	sc->sc_ndisks = md->md_all;
2465	sc->sc_flags = md->md_mflags;
2466	sc->sc_bump_syncid = 0;
2467	sc->sc_idle = 0;
2468	bioq_init(&sc->sc_queue);
2469	mtx_init(&sc->sc_queue_mtx, "gmirror:queue", NULL, MTX_DEF);
2470	LIST_INIT(&sc->sc_disks);
2471	TAILQ_INIT(&sc->sc_events);
2472	mtx_init(&sc->sc_events_mtx, "gmirror:events", NULL, MTX_DEF);
2473	callout_init(&sc->sc_callout, CALLOUT_MPSAFE);
2474	sc->sc_state = G_MIRROR_DEVICE_STATE_STARTING;
2475	gp->softc = sc;
2476	sc->sc_geom = gp;
2477	sc->sc_provider = NULL;
2478	/*
2479	 * Synchronization geom.
2480	 */
2481	gp = g_new_geomf(mp, "%s.sync", md->md_name);
2482	gp->softc = sc;
2483	gp->orphan = g_mirror_orphan;
2484	sc->sc_sync.ds_geom = gp;
2485	sc->sc_sync.ds_ndisks = 0;
2486	error = kthread_create(g_mirror_worker, sc, &sc->sc_worker, 0, 0,
2487	    "g_mirror %s", md->md_name);
2488	if (error != 0) {
2489		G_MIRROR_DEBUG(1, "Cannot create kernel thread for %s.",
2490		    sc->sc_name);
2491		g_destroy_geom(sc->sc_sync.ds_geom);
2492		mtx_destroy(&sc->sc_events_mtx);
2493		mtx_destroy(&sc->sc_queue_mtx);
2494		g_destroy_geom(sc->sc_geom);
2495		free(sc, M_MIRROR);
2496		return (NULL);
2497	}
2498
2499	G_MIRROR_DEBUG(0, "Device %s created (id=%u).", sc->sc_name, sc->sc_id);
2500
2501	/*
2502	 * Run timeout.
2503	 */
2504	timeout = atomic_load_acq_int(&g_mirror_timeout);
2505	callout_reset(&sc->sc_callout, timeout * hz, g_mirror_go, sc);
2506	return (sc->sc_geom);
2507}
2508
2509int
2510g_mirror_destroy(struct g_mirror_softc *sc, boolean_t force)
2511{
2512	struct g_provider *pp;
2513
2514	g_topology_assert();
2515
2516	if (sc == NULL)
2517		return (ENXIO);
2518	pp = sc->sc_provider;
2519	if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
2520		if (force) {
2521			G_MIRROR_DEBUG(0, "Device %s is still open, so it "
2522			    "can't be definitely removed.", pp->name);
2523		} else {
2524			G_MIRROR_DEBUG(1,
2525			    "Device %s is still open (r%dw%de%d).", pp->name,
2526			    pp->acr, pp->acw, pp->ace);
2527			return (EBUSY);
2528		}
2529	}
2530
2531	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2532	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_WAIT;
2533	g_topology_unlock();
2534	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
2535	mtx_lock(&sc->sc_queue_mtx);
2536	wakeup(sc);
2537	mtx_unlock(&sc->sc_queue_mtx);
2538	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, &sc->sc_worker);
2539	while (sc->sc_worker != NULL)
2540		tsleep(&sc->sc_worker, PRIBIO, "m:destroy", hz / 5);
2541	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, &sc->sc_worker);
2542	g_topology_lock();
2543	g_mirror_destroy_device(sc);
2544	free(sc, M_MIRROR);
2545	return (0);
2546}
2547
2548static void
2549g_mirror_taste_orphan(struct g_consumer *cp)
2550{
2551
2552	KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
2553	    cp->provider->name));
2554}
2555
2556static struct g_geom *
2557g_mirror_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
2558{
2559	struct g_mirror_metadata md;
2560	struct g_mirror_softc *sc;
2561	struct g_consumer *cp;
2562	struct g_geom *gp;
2563	int error;
2564
2565	g_topology_assert();
2566	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
2567	G_MIRROR_DEBUG(2, "Tasting %s.", pp->name);
2568
2569	gp = g_new_geomf(mp, "mirror:taste");
2570	/*
2571	 * This orphan function should be never called.
2572	 */
2573	gp->orphan = g_mirror_taste_orphan;
2574	cp = g_new_consumer(gp);
2575	g_attach(cp, pp);
2576	error = g_mirror_read_metadata(cp, &md);
2577	g_detach(cp);
2578	g_destroy_consumer(cp);
2579	g_destroy_geom(gp);
2580	if (error != 0)
2581		return (NULL);
2582	gp = NULL;
2583
2584	if (md.md_version > G_MIRROR_VERSION) {
2585		printf("geom_mirror.ko module is too old to handle %s.\n",
2586		    pp->name);
2587		return (NULL);
2588	}
2589	if (md.md_provider[0] != '\0' && strcmp(md.md_provider, pp->name) != 0)
2590		return (NULL);
2591	if ((md.md_dflags & G_MIRROR_DISK_FLAG_INACTIVE) != 0) {
2592		G_MIRROR_DEBUG(0,
2593		    "Device %s: provider %s marked as inactive, skipping.",
2594		    md.md_name, pp->name);
2595		return (NULL);
2596	}
2597	if (g_mirror_debug >= 2)
2598		mirror_metadata_dump(&md);
2599
2600	/*
2601	 * Let's check if device already exists.
2602	 */
2603	sc = NULL;
2604	LIST_FOREACH(gp, &mp->geom, geom) {
2605		sc = gp->softc;
2606		if (sc == NULL)
2607			continue;
2608		if (sc->sc_sync.ds_geom == gp)
2609			continue;
2610		if (strcmp(md.md_name, sc->sc_name) != 0)
2611			continue;
2612		if (md.md_mid != sc->sc_id) {
2613			G_MIRROR_DEBUG(0, "Device %s already configured.",
2614			    sc->sc_name);
2615			return (NULL);
2616		}
2617		break;
2618	}
2619	if (gp == NULL) {
2620		gp = g_mirror_create(mp, &md);
2621		if (gp == NULL) {
2622			G_MIRROR_DEBUG(0, "Cannot create device %s.",
2623			    md.md_name);
2624			return (NULL);
2625		}
2626		sc = gp->softc;
2627	}
2628	G_MIRROR_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
2629	error = g_mirror_add_disk(sc, pp, &md);
2630	if (error != 0) {
2631		G_MIRROR_DEBUG(0, "Cannot add disk %s to %s (error=%d).",
2632		    pp->name, gp->name, error);
2633		if (LIST_EMPTY(&sc->sc_disks))
2634			g_mirror_destroy(sc, 1);
2635		return (NULL);
2636	}
2637	return (gp);
2638}
2639
2640static int
2641g_mirror_destroy_geom(struct gctl_req *req __unused,
2642    struct g_class *mp __unused, struct g_geom *gp)
2643{
2644
2645	return (g_mirror_destroy(gp->softc, 0));
2646}
2647
2648static void
2649g_mirror_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
2650    struct g_consumer *cp, struct g_provider *pp)
2651{
2652	struct g_mirror_softc *sc;
2653
2654	g_topology_assert();
2655
2656	sc = gp->softc;
2657	if (sc == NULL)
2658		return;
2659	/* Skip synchronization geom. */
2660	if (gp == sc->sc_sync.ds_geom)
2661		return;
2662	if (pp != NULL) {
2663		/* Nothing here. */
2664	} else if (cp != NULL) {
2665		struct g_mirror_disk *disk;
2666
2667		disk = cp->private;
2668		if (disk == NULL)
2669			return;
2670		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)disk->d_id);
2671		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
2672			sbuf_printf(sb, "%s<Synchronized>", indent);
2673			if (disk->d_sync.ds_offset_done == 0)
2674				sbuf_printf(sb, "0%%");
2675			else {
2676				sbuf_printf(sb, "%u%%",
2677				    (u_int)((disk->d_sync.ds_offset_done * 100) /
2678				    sc->sc_provider->mediasize));
2679			}
2680			sbuf_printf(sb, "</Synchronized>\n");
2681		}
2682		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent,
2683		    disk->d_sync.ds_syncid);
2684		sbuf_printf(sb, "%s<Flags>", indent);
2685		if (disk->d_flags == 0)
2686			sbuf_printf(sb, "NONE");
2687		else {
2688			int first = 1;
2689
2690#define	ADD_FLAG(flag, name)	do {					\
2691	if ((disk->d_flags & (flag)) != 0) {				\
2692		if (!first)						\
2693			sbuf_printf(sb, ", ");				\
2694		else							\
2695			first = 0;					\
2696		sbuf_printf(sb, name);					\
2697	}								\
2698} while (0)
2699			ADD_FLAG(G_MIRROR_DISK_FLAG_DIRTY, "DIRTY");
2700			ADD_FLAG(G_MIRROR_DISK_FLAG_HARDCODED, "HARDCODED");
2701			ADD_FLAG(G_MIRROR_DISK_FLAG_INACTIVE, "INACTIVE");
2702			ADD_FLAG(G_MIRROR_DISK_FLAG_SYNCHRONIZING,
2703			    "SYNCHRONIZING");
2704			ADD_FLAG(G_MIRROR_DISK_FLAG_FORCE_SYNC, "FORCE_SYNC");
2705#undef	ADD_FLAG
2706		}
2707		sbuf_printf(sb, "</Flags>\n");
2708		sbuf_printf(sb, "%s<Priority>%u</Priority>\n", indent,
2709		    disk->d_priority);
2710		sbuf_printf(sb, "%s<State>%s</State>\n", indent,
2711		    g_mirror_disk_state2str(disk->d_state));
2712	} else {
2713		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id);
2714		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, sc->sc_syncid);
2715		sbuf_printf(sb, "%s<Flags>", indent);
2716		if (sc->sc_flags == 0)
2717			sbuf_printf(sb, "NONE");
2718		else {
2719			int first = 1;
2720
2721#define	ADD_FLAG(flag, name)	do {					\
2722	if ((sc->sc_flags & (flag)) != 0) {				\
2723		if (!first)						\
2724			sbuf_printf(sb, ", ");				\
2725		else							\
2726			first = 0;					\
2727		sbuf_printf(sb, name);					\
2728	}								\
2729} while (0)
2730			ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOAUTOSYNC, "NOAUTOSYNC");
2731#undef	ADD_FLAG
2732		}
2733		sbuf_printf(sb, "</Flags>\n");
2734		sbuf_printf(sb, "%s<Slice>%u</Slice>\n", indent,
2735		    (u_int)sc->sc_slice);
2736		sbuf_printf(sb, "%s<Balance>%s</Balance>\n", indent,
2737		    balance_name(sc->sc_balance));
2738		sbuf_printf(sb, "%s<Components>%u</Components>\n", indent,
2739		    sc->sc_ndisks);
2740		sbuf_printf(sb, "%s<State>", indent);
2741		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
2742			sbuf_printf(sb, "%s", "STARTING");
2743		else if (sc->sc_ndisks ==
2744		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE))
2745			sbuf_printf(sb, "%s", "COMPLETE");
2746		else
2747			sbuf_printf(sb, "%s", "DEGRADED");
2748		sbuf_printf(sb, "</State>\n");
2749	}
2750}
2751
2752static int
2753g_mirror_can_go(void)
2754{
2755	struct g_mirror_softc *sc;
2756	struct g_geom *gp;
2757	struct g_provider *pp;
2758	int can_go;
2759
2760	DROP_GIANT();
2761	can_go = 1;
2762	g_topology_lock();
2763	LIST_FOREACH(gp, &g_mirror_class.geom, geom) {
2764		sc = gp->softc;
2765		if (sc == NULL) {
2766			can_go = 0;
2767			break;
2768		}
2769		pp = sc->sc_provider;
2770		if (pp == NULL || pp->error != 0) {
2771			can_go = 0;
2772			break;
2773		}
2774	}
2775	g_topology_unlock();
2776	PICKUP_GIANT();
2777	return (can_go);
2778}
2779
2780static void
2781g_mirror_rootwait(void)
2782{
2783
2784	/*
2785	 * HACK: Wait for GEOM, because g_mirror_rootwait() can be called,
2786	 * HACK: before we get providers for tasting.
2787	 */
2788	tsleep(&g_mirror_class, PRIBIO, "mroot", hz * 3);
2789	/*
2790	 * Wait for mirrors in degraded state.
2791	 */
2792	for (;;) {
2793		if (g_mirror_can_go())
2794			break;
2795		tsleep(&g_mirror_class, PRIBIO, "mroot", hz);
2796	}
2797}
2798
2799SYSINIT(g_mirror_root, SI_SUB_RAID, SI_ORDER_FIRST, g_mirror_rootwait, NULL)
2800
2801DECLARE_GEOM_CLASS(g_mirror_class, g_mirror);
2802