g_stripe.c revision 197898
1/*-
2 * Copyright (c) 2004-2005 Pawel Jakub Dawidek <pjd@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/geom/stripe/g_stripe.c 197898 2009-10-09 09:42:22Z pjd $");
29
30#include <sys/param.h>
31#include <sys/systm.h>
32#include <sys/kernel.h>
33#include <sys/module.h>
34#include <sys/lock.h>
35#include <sys/mutex.h>
36#include <sys/bio.h>
37#include <sys/sysctl.h>
38#include <sys/malloc.h>
39#include <vm/uma.h>
40#include <geom/geom.h>
41#include <geom/stripe/g_stripe.h>
42
43
44static MALLOC_DEFINE(M_STRIPE, "stripe_data", "GEOM_STRIPE Data");
45
46static uma_zone_t g_stripe_zone;
47
48static int g_stripe_destroy(struct g_stripe_softc *sc, boolean_t force);
49static int g_stripe_destroy_geom(struct gctl_req *req, struct g_class *mp,
50    struct g_geom *gp);
51
52static g_taste_t g_stripe_taste;
53static g_ctl_req_t g_stripe_config;
54static g_dumpconf_t g_stripe_dumpconf;
55static g_init_t g_stripe_init;
56static g_fini_t g_stripe_fini;
57
58struct g_class g_stripe_class = {
59	.name = G_STRIPE_CLASS_NAME,
60	.version = G_VERSION,
61	.ctlreq = g_stripe_config,
62	.taste = g_stripe_taste,
63	.destroy_geom = g_stripe_destroy_geom,
64	.init = g_stripe_init,
65	.fini = g_stripe_fini
66};
67
68SYSCTL_DECL(_kern_geom);
69SYSCTL_NODE(_kern_geom, OID_AUTO, stripe, CTLFLAG_RW, 0, "GEOM_STRIPE stuff");
70static u_int g_stripe_debug = 0;
71TUNABLE_INT("kern.geom.stripe.debug", &g_stripe_debug);
72SYSCTL_UINT(_kern_geom_stripe, OID_AUTO, debug, CTLFLAG_RW, &g_stripe_debug, 0,
73    "Debug level");
74static int g_stripe_fast = 0;
75TUNABLE_INT("kern.geom.stripe.fast", &g_stripe_fast);
76static int
77g_sysctl_stripe_fast(SYSCTL_HANDLER_ARGS)
78{
79	int error, fast;
80
81	fast = g_stripe_fast;
82	error = sysctl_handle_int(oidp, &fast, 0, req);
83	if (error == 0 && req->newptr != NULL)
84		g_stripe_fast = fast;
85	return (error);
86}
87SYSCTL_PROC(_kern_geom_stripe, OID_AUTO, fast, CTLTYPE_INT | CTLFLAG_RW,
88    NULL, 0, g_sysctl_stripe_fast, "I", "Fast, but memory-consuming, mode");
89static u_int g_stripe_maxmem = MAXPHYS * 100;
90TUNABLE_INT("kern.geom.stripe.maxmem", &g_stripe_maxmem);
91SYSCTL_UINT(_kern_geom_stripe, OID_AUTO, maxmem, CTLFLAG_RD, &g_stripe_maxmem,
92    0, "Maximum memory that can be allocated in \"fast\" mode (in bytes)");
93static u_int g_stripe_fast_failed = 0;
94SYSCTL_UINT(_kern_geom_stripe, OID_AUTO, fast_failed, CTLFLAG_RD,
95    &g_stripe_fast_failed, 0, "How many times \"fast\" mode failed");
96
97/*
98 * Greatest Common Divisor.
99 */
100static u_int
101gcd(u_int a, u_int b)
102{
103	u_int c;
104
105	while (b != 0) {
106		c = a;
107		a = b;
108		b = (c % b);
109	}
110	return (a);
111}
112
113/*
114 * Least Common Multiple.
115 */
116static u_int
117lcm(u_int a, u_int b)
118{
119
120	return ((a * b) / gcd(a, b));
121}
122
123static void
124g_stripe_init(struct g_class *mp __unused)
125{
126
127	g_stripe_zone = uma_zcreate("g_stripe_zone", MAXPHYS, NULL, NULL,
128	    NULL, NULL, 0, 0);
129	g_stripe_maxmem -= g_stripe_maxmem % MAXPHYS;
130	uma_zone_set_max(g_stripe_zone, g_stripe_maxmem / MAXPHYS);
131}
132
133static void
134g_stripe_fini(struct g_class *mp __unused)
135{
136
137	uma_zdestroy(g_stripe_zone);
138}
139
140/*
141 * Return the number of valid disks.
142 */
143static u_int
144g_stripe_nvalid(struct g_stripe_softc *sc)
145{
146	u_int i, no;
147
148	no = 0;
149	for (i = 0; i < sc->sc_ndisks; i++) {
150		if (sc->sc_disks[i] != NULL)
151			no++;
152	}
153
154	return (no);
155}
156
157static void
158g_stripe_remove_disk(struct g_consumer *cp)
159{
160	struct g_stripe_softc *sc;
161	u_int no;
162
163	KASSERT(cp != NULL, ("Non-valid disk in %s.", __func__));
164	sc = (struct g_stripe_softc *)cp->private;
165	KASSERT(sc != NULL, ("NULL sc in %s.", __func__));
166	no = cp->index;
167
168	G_STRIPE_DEBUG(0, "Disk %s removed from %s.", cp->provider->name,
169	    sc->sc_name);
170
171	sc->sc_disks[no] = NULL;
172	if (sc->sc_provider != NULL) {
173		sc->sc_provider->flags |= G_PF_WITHER;
174		g_orphan_provider(sc->sc_provider, ENXIO);
175		sc->sc_provider = NULL;
176		G_STRIPE_DEBUG(0, "Device %s removed.", sc->sc_name);
177	}
178
179	if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
180		g_access(cp, -cp->acr, -cp->acw, -cp->ace);
181	g_detach(cp);
182	g_destroy_consumer(cp);
183}
184
185static void
186g_stripe_orphan(struct g_consumer *cp)
187{
188	struct g_stripe_softc *sc;
189	struct g_geom *gp;
190
191	g_topology_assert();
192	gp = cp->geom;
193	sc = gp->softc;
194	if (sc == NULL)
195		return;
196
197	g_stripe_remove_disk(cp);
198	/* If there are no valid disks anymore, remove device. */
199	if (g_stripe_nvalid(sc) == 0)
200		g_stripe_destroy(sc, 1);
201}
202
203static int
204g_stripe_access(struct g_provider *pp, int dr, int dw, int de)
205{
206	struct g_consumer *cp1, *cp2;
207	struct g_stripe_softc *sc;
208	struct g_geom *gp;
209	int error;
210
211	gp = pp->geom;
212	sc = gp->softc;
213
214	if (sc == NULL) {
215		/*
216		 * It looks like geom is being withered.
217		 * In that case we allow only negative requests.
218		 */
219		KASSERT(dr <= 0 && dw <= 0 && de <= 0,
220		    ("Positive access request (device=%s).", pp->name));
221		if ((pp->acr + dr) == 0 && (pp->acw + dw) == 0 &&
222		    (pp->ace + de) == 0) {
223			G_STRIPE_DEBUG(0, "Device %s definitely destroyed.",
224			    gp->name);
225		}
226		return (0);
227	}
228
229	/* On first open, grab an extra "exclusive" bit */
230	if (pp->acr == 0 && pp->acw == 0 && pp->ace == 0)
231		de++;
232	/* ... and let go of it on last close */
233	if ((pp->acr + dr) == 0 && (pp->acw + dw) == 0 && (pp->ace + de) == 0)
234		de--;
235
236	error = ENXIO;
237	LIST_FOREACH(cp1, &gp->consumer, consumer) {
238		error = g_access(cp1, dr, dw, de);
239		if (error == 0)
240			continue;
241		/*
242		 * If we fail here, backout all previous changes.
243		 */
244		LIST_FOREACH(cp2, &gp->consumer, consumer) {
245			if (cp1 == cp2)
246				return (error);
247			g_access(cp2, -dr, -dw, -de);
248		}
249		/* NOTREACHED */
250	}
251
252	return (error);
253}
254
255static void
256g_stripe_copy(struct g_stripe_softc *sc, char *src, char *dst, off_t offset,
257    off_t length, int mode)
258{
259	u_int stripesize;
260	size_t len;
261
262	stripesize = sc->sc_stripesize;
263	len = (size_t)(stripesize - (offset & (stripesize - 1)));
264	do {
265		bcopy(src, dst, len);
266		if (mode) {
267			dst += len + stripesize * (sc->sc_ndisks - 1);
268			src += len;
269		} else {
270			dst += len;
271			src += len + stripesize * (sc->sc_ndisks - 1);
272		}
273		length -= len;
274		KASSERT(length >= 0,
275		    ("Length < 0 (stripesize=%zu, offset=%jd, length=%jd).",
276		    (size_t)stripesize, (intmax_t)offset, (intmax_t)length));
277		if (length > stripesize)
278			len = stripesize;
279		else
280			len = length;
281	} while (length > 0);
282}
283
284static void
285g_stripe_done(struct bio *bp)
286{
287	struct g_stripe_softc *sc;
288	struct bio *pbp;
289
290	pbp = bp->bio_parent;
291	sc = pbp->bio_to->geom->softc;
292	if (pbp->bio_error == 0)
293		pbp->bio_error = bp->bio_error;
294	pbp->bio_completed += bp->bio_completed;
295	if (bp->bio_cmd == BIO_READ && bp->bio_caller1 != NULL) {
296		g_stripe_copy(sc, bp->bio_data, bp->bio_caller1, bp->bio_offset,
297		    bp->bio_length, 1);
298		bp->bio_data = bp->bio_caller1;
299		bp->bio_caller1 = NULL;
300	}
301	g_destroy_bio(bp);
302	pbp->bio_inbed++;
303	if (pbp->bio_children == pbp->bio_inbed) {
304		if (pbp->bio_driver1 != NULL)
305			uma_zfree(g_stripe_zone, pbp->bio_driver1);
306		g_io_deliver(pbp, pbp->bio_error);
307	}
308}
309
310static int
311g_stripe_start_fast(struct bio *bp, u_int no, off_t offset, off_t length)
312{
313	TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue);
314	u_int nparts = 0, stripesize;
315	struct g_stripe_softc *sc;
316	char *addr, *data = NULL;
317	struct bio *cbp;
318	int error;
319
320	sc = bp->bio_to->geom->softc;
321
322	addr = bp->bio_data;
323	stripesize = sc->sc_stripesize;
324
325	cbp = g_clone_bio(bp);
326	if (cbp == NULL) {
327		error = ENOMEM;
328		goto failure;
329	}
330	TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
331	nparts++;
332	/*
333	 * Fill in the component buf structure.
334	 */
335	cbp->bio_done = g_stripe_done;
336	cbp->bio_offset = offset;
337	cbp->bio_data = addr;
338	cbp->bio_caller1 = NULL;
339	cbp->bio_length = length;
340	cbp->bio_caller2 = sc->sc_disks[no];
341
342	/* offset -= offset % stripesize; */
343	offset -= offset & (stripesize - 1);
344	addr += length;
345	length = bp->bio_length - length;
346	for (no++; length > 0; no++, length -= stripesize, addr += stripesize) {
347		if (no > sc->sc_ndisks - 1) {
348			no = 0;
349			offset += stripesize;
350		}
351		if (nparts >= sc->sc_ndisks) {
352			cbp = TAILQ_NEXT(cbp, bio_queue);
353			if (cbp == NULL)
354				cbp = TAILQ_FIRST(&queue);
355			nparts++;
356			/*
357			 * Update bio structure.
358			 */
359			/*
360			 * MIN() is in case when
361			 * (bp->bio_length % sc->sc_stripesize) != 0.
362			 */
363			cbp->bio_length += MIN(stripesize, length);
364			if (cbp->bio_caller1 == NULL) {
365				cbp->bio_caller1 = cbp->bio_data;
366				cbp->bio_data = NULL;
367				if (data == NULL) {
368					data = uma_zalloc(g_stripe_zone,
369					    M_NOWAIT);
370					if (data == NULL) {
371						error = ENOMEM;
372						goto failure;
373					}
374				}
375			}
376		} else {
377			cbp = g_clone_bio(bp);
378			if (cbp == NULL) {
379				error = ENOMEM;
380				goto failure;
381			}
382			TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
383			nparts++;
384			/*
385			 * Fill in the component buf structure.
386			 */
387			cbp->bio_done = g_stripe_done;
388			cbp->bio_offset = offset;
389			cbp->bio_data = addr;
390			cbp->bio_caller1 = NULL;
391			/*
392			 * MIN() is in case when
393			 * (bp->bio_length % sc->sc_stripesize) != 0.
394			 */
395			cbp->bio_length = MIN(stripesize, length);
396			cbp->bio_caller2 = sc->sc_disks[no];
397		}
398	}
399	if (data != NULL)
400		bp->bio_driver1 = data;
401	/*
402	 * Fire off all allocated requests!
403	 */
404	while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
405		struct g_consumer *cp;
406
407		TAILQ_REMOVE(&queue, cbp, bio_queue);
408		cp = cbp->bio_caller2;
409		cbp->bio_caller2 = NULL;
410		cbp->bio_to = cp->provider;
411		if (cbp->bio_caller1 != NULL) {
412			cbp->bio_data = data;
413			if (bp->bio_cmd == BIO_WRITE) {
414				g_stripe_copy(sc, cbp->bio_caller1, data,
415				    cbp->bio_offset, cbp->bio_length, 0);
416			}
417			data += cbp->bio_length;
418		}
419		G_STRIPE_LOGREQ(cbp, "Sending request.");
420		g_io_request(cbp, cp);
421	}
422	return (0);
423failure:
424	if (data != NULL)
425		uma_zfree(g_stripe_zone, data);
426	while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
427		TAILQ_REMOVE(&queue, cbp, bio_queue);
428		if (cbp->bio_caller1 != NULL) {
429			cbp->bio_data = cbp->bio_caller1;
430			cbp->bio_caller1 = NULL;
431		}
432		bp->bio_children--;
433		g_destroy_bio(cbp);
434	}
435	return (error);
436}
437
438static int
439g_stripe_start_economic(struct bio *bp, u_int no, off_t offset, off_t length)
440{
441	TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue);
442	struct g_stripe_softc *sc;
443	uint32_t stripesize;
444	struct bio *cbp;
445	char *addr;
446	int error;
447
448	sc = bp->bio_to->geom->softc;
449
450	addr = bp->bio_data;
451	stripesize = sc->sc_stripesize;
452
453	cbp = g_clone_bio(bp);
454	if (cbp == NULL) {
455		error = ENOMEM;
456		goto failure;
457	}
458	TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
459	/*
460	 * Fill in the component buf structure.
461	 */
462	cbp->bio_done = g_std_done;
463	cbp->bio_offset = offset;
464	cbp->bio_data = addr;
465	cbp->bio_length = length;
466	cbp->bio_caller2 = sc->sc_disks[no];
467
468	/* offset -= offset % stripesize; */
469	offset -= offset & (stripesize - 1);
470	addr += length;
471	length = bp->bio_length - length;
472	for (no++; length > 0; no++, length -= stripesize, addr += stripesize) {
473		if (no > sc->sc_ndisks - 1) {
474			no = 0;
475			offset += stripesize;
476		}
477		cbp = g_clone_bio(bp);
478		if (cbp == NULL) {
479			error = ENOMEM;
480			goto failure;
481		}
482		TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
483
484		/*
485		 * Fill in the component buf structure.
486		 */
487		cbp->bio_done = g_std_done;
488		cbp->bio_offset = offset;
489		cbp->bio_data = addr;
490		/*
491		 * MIN() is in case when
492		 * (bp->bio_length % sc->sc_stripesize) != 0.
493		 */
494		cbp->bio_length = MIN(stripesize, length);
495
496		cbp->bio_caller2 = sc->sc_disks[no];
497	}
498	/*
499	 * Fire off all allocated requests!
500	 */
501	while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
502		struct g_consumer *cp;
503
504		TAILQ_REMOVE(&queue, cbp, bio_queue);
505		cp = cbp->bio_caller2;
506		cbp->bio_caller2 = NULL;
507		cbp->bio_to = cp->provider;
508		G_STRIPE_LOGREQ(cbp, "Sending request.");
509		g_io_request(cbp, cp);
510	}
511	return (0);
512failure:
513	while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
514		TAILQ_REMOVE(&queue, cbp, bio_queue);
515		bp->bio_children--;
516		g_destroy_bio(cbp);
517	}
518	return (error);
519}
520
521static void
522g_stripe_flush(struct g_stripe_softc *sc, struct bio *bp)
523{
524	struct bio_queue_head queue;
525	struct g_consumer *cp;
526	struct bio *cbp;
527	u_int no;
528
529	bioq_init(&queue);
530	for (no = 0; no < sc->sc_ndisks; no++) {
531		cbp = g_clone_bio(bp);
532		if (cbp == NULL) {
533			for (cbp = bioq_first(&queue); cbp != NULL;
534			    cbp = bioq_first(&queue)) {
535				bioq_remove(&queue, cbp);
536				g_destroy_bio(cbp);
537			}
538			if (bp->bio_error == 0)
539				bp->bio_error = ENOMEM;
540			g_io_deliver(bp, bp->bio_error);
541			return;
542		}
543		bioq_insert_tail(&queue, cbp);
544		cbp->bio_done = g_std_done;
545		cbp->bio_caller1 = sc->sc_disks[no];
546		cbp->bio_to = sc->sc_disks[no]->provider;
547	}
548	for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) {
549		bioq_remove(&queue, cbp);
550		G_STRIPE_LOGREQ(cbp, "Sending request.");
551		cp = cbp->bio_caller1;
552		cbp->bio_caller1 = NULL;
553		g_io_request(cbp, cp);
554	}
555}
556
557static void
558g_stripe_start(struct bio *bp)
559{
560	off_t offset, start, length, nstripe;
561	struct g_stripe_softc *sc;
562	u_int no, stripesize;
563	int error, fast = 0;
564
565	sc = bp->bio_to->geom->softc;
566	/*
567	 * If sc == NULL, provider's error should be set and g_stripe_start()
568	 * should not be called at all.
569	 */
570	KASSERT(sc != NULL,
571	    ("Provider's error should be set (error=%d)(device=%s).",
572	    bp->bio_to->error, bp->bio_to->name));
573
574	G_STRIPE_LOGREQ(bp, "Request received.");
575
576	switch (bp->bio_cmd) {
577	case BIO_READ:
578	case BIO_WRITE:
579	case BIO_DELETE:
580		break;
581	case BIO_FLUSH:
582		g_stripe_flush(sc, bp);
583		return;
584	case BIO_GETATTR:
585		/* To which provider it should be delivered? */
586	default:
587		g_io_deliver(bp, EOPNOTSUPP);
588		return;
589	}
590
591	stripesize = sc->sc_stripesize;
592
593	/*
594	 * Calculations are quite messy, but fast I hope.
595	 */
596
597	/* Stripe number. */
598	/* nstripe = bp->bio_offset / stripesize; */
599	nstripe = bp->bio_offset >> (off_t)sc->sc_stripebits;
600	/* Disk number. */
601	no = nstripe % sc->sc_ndisks;
602	/* Start position in stripe. */
603	/* start = bp->bio_offset % stripesize; */
604	start = bp->bio_offset & (stripesize - 1);
605	/* Start position in disk. */
606	/* offset = (nstripe / sc->sc_ndisks) * stripesize + start; */
607	offset = ((nstripe / sc->sc_ndisks) << sc->sc_stripebits) + start;
608	/* Length of data to operate. */
609	length = MIN(bp->bio_length, stripesize - start);
610
611	/*
612	 * Do use "fast" mode when:
613	 * 1. "Fast" mode is ON.
614	 * and
615	 * 2. Request size is less than or equal to MAXPHYS,
616	 *    which should always be true.
617	 * and
618	 * 3. Request size is bigger than stripesize * ndisks. If it isn't,
619	 *    there will be no need to send more than one I/O request to
620	 *    a provider, so there is nothing to optmize.
621	 */
622	if (g_stripe_fast && bp->bio_length <= MAXPHYS &&
623	    bp->bio_length >= stripesize * sc->sc_ndisks) {
624		fast = 1;
625	}
626	error = 0;
627	if (fast) {
628		error = g_stripe_start_fast(bp, no, offset, length);
629		if (error != 0)
630			g_stripe_fast_failed++;
631	}
632	/*
633	 * Do use "economic" when:
634	 * 1. "Economic" mode is ON.
635	 * or
636	 * 2. "Fast" mode failed. It can only failed if there is no memory.
637	 */
638	if (!fast || error != 0)
639		error = g_stripe_start_economic(bp, no, offset, length);
640	if (error != 0) {
641		if (bp->bio_error == 0)
642			bp->bio_error = error;
643		g_io_deliver(bp, bp->bio_error);
644	}
645}
646
647static void
648g_stripe_check_and_run(struct g_stripe_softc *sc)
649{
650	off_t mediasize, ms;
651	u_int no, sectorsize = 0;
652
653	if (g_stripe_nvalid(sc) != sc->sc_ndisks)
654		return;
655
656	sc->sc_provider = g_new_providerf(sc->sc_geom, "stripe/%s",
657	    sc->sc_name);
658	/*
659	 * Find the smallest disk.
660	 */
661	mediasize = sc->sc_disks[0]->provider->mediasize;
662	if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC)
663		mediasize -= sc->sc_disks[0]->provider->sectorsize;
664	mediasize -= mediasize % sc->sc_stripesize;
665	sectorsize = sc->sc_disks[0]->provider->sectorsize;
666	for (no = 1; no < sc->sc_ndisks; no++) {
667		ms = sc->sc_disks[no]->provider->mediasize;
668		if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC)
669			ms -= sc->sc_disks[no]->provider->sectorsize;
670		ms -= ms % sc->sc_stripesize;
671		if (ms < mediasize)
672			mediasize = ms;
673		sectorsize = lcm(sectorsize,
674		    sc->sc_disks[no]->provider->sectorsize);
675	}
676	sc->sc_provider->sectorsize = sectorsize;
677	sc->sc_provider->mediasize = mediasize * sc->sc_ndisks;
678	g_error_provider(sc->sc_provider, 0);
679
680	G_STRIPE_DEBUG(0, "Device %s activated.", sc->sc_name);
681}
682
683static int
684g_stripe_read_metadata(struct g_consumer *cp, struct g_stripe_metadata *md)
685{
686	struct g_provider *pp;
687	u_char *buf;
688	int error;
689
690	g_topology_assert();
691
692	error = g_access(cp, 1, 0, 0);
693	if (error != 0)
694		return (error);
695	pp = cp->provider;
696	g_topology_unlock();
697	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
698	    &error);
699	g_topology_lock();
700	g_access(cp, -1, 0, 0);
701	if (buf == NULL)
702		return (error);
703
704	/* Decode metadata. */
705	stripe_metadata_decode(buf, md);
706	g_free(buf);
707
708	return (0);
709}
710
711/*
712 * Add disk to given device.
713 */
714static int
715g_stripe_add_disk(struct g_stripe_softc *sc, struct g_provider *pp, u_int no)
716{
717	struct g_consumer *cp, *fcp;
718	struct g_geom *gp;
719	int error;
720
721	/* Metadata corrupted? */
722	if (no >= sc->sc_ndisks)
723		return (EINVAL);
724
725	/* Check if disk is not already attached. */
726	if (sc->sc_disks[no] != NULL)
727		return (EEXIST);
728
729	gp = sc->sc_geom;
730	fcp = LIST_FIRST(&gp->consumer);
731
732	cp = g_new_consumer(gp);
733	error = g_attach(cp, pp);
734	if (error != 0) {
735		g_destroy_consumer(cp);
736		return (error);
737	}
738
739	if (fcp != NULL && (fcp->acr > 0 || fcp->acw > 0 || fcp->ace > 0)) {
740		error = g_access(cp, fcp->acr, fcp->acw, fcp->ace);
741		if (error != 0) {
742			g_detach(cp);
743			g_destroy_consumer(cp);
744			return (error);
745		}
746	}
747	if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC) {
748		struct g_stripe_metadata md;
749
750		/* Reread metadata. */
751		error = g_stripe_read_metadata(cp, &md);
752		if (error != 0)
753			goto fail;
754
755		if (strcmp(md.md_magic, G_STRIPE_MAGIC) != 0 ||
756		    strcmp(md.md_name, sc->sc_name) != 0 ||
757		    md.md_id != sc->sc_id) {
758			G_STRIPE_DEBUG(0, "Metadata on %s changed.", pp->name);
759			goto fail;
760		}
761	}
762
763	cp->private = sc;
764	cp->index = no;
765	sc->sc_disks[no] = cp;
766
767	G_STRIPE_DEBUG(0, "Disk %s attached to %s.", pp->name, sc->sc_name);
768
769	g_stripe_check_and_run(sc);
770
771	return (0);
772fail:
773	if (fcp != NULL && (fcp->acr > 0 || fcp->acw > 0 || fcp->ace > 0))
774		g_access(cp, -fcp->acr, -fcp->acw, -fcp->ace);
775	g_detach(cp);
776	g_destroy_consumer(cp);
777	return (error);
778}
779
780static struct g_geom *
781g_stripe_create(struct g_class *mp, const struct g_stripe_metadata *md,
782    u_int type)
783{
784	struct g_stripe_softc *sc;
785	struct g_geom *gp;
786	u_int no;
787
788	G_STRIPE_DEBUG(1, "Creating device %s (id=%u).", md->md_name,
789	    md->md_id);
790
791	/* Two disks is minimum. */
792	if (md->md_all < 2) {
793		G_STRIPE_DEBUG(0, "Too few disks defined for %s.", md->md_name);
794		return (NULL);
795	}
796#if 0
797	/* Stripe size have to be grater than or equal to sector size. */
798	if (md->md_stripesize < sectorsize) {
799		G_STRIPE_DEBUG(0, "Invalid stripe size for %s.", md->md_name);
800		return (NULL);
801	}
802#endif
803	/* Stripe size have to be power of 2. */
804	if (!powerof2(md->md_stripesize)) {
805		G_STRIPE_DEBUG(0, "Invalid stripe size for %s.", md->md_name);
806		return (NULL);
807	}
808
809	/* Check for duplicate unit */
810	LIST_FOREACH(gp, &mp->geom, geom) {
811		sc = gp->softc;
812		if (sc != NULL && strcmp(sc->sc_name, md->md_name) == 0) {
813			G_STRIPE_DEBUG(0, "Device %s already configured.",
814			    sc->sc_name);
815			return (NULL);
816		}
817	}
818	gp = g_new_geomf(mp, "%s", md->md_name);
819	gp->softc = NULL;	/* for a moment */
820
821	sc = malloc(sizeof(*sc), M_STRIPE, M_WAITOK | M_ZERO);
822	gp->start = g_stripe_start;
823	gp->spoiled = g_stripe_orphan;
824	gp->orphan = g_stripe_orphan;
825	gp->access = g_stripe_access;
826	gp->dumpconf = g_stripe_dumpconf;
827
828	sc->sc_id = md->md_id;
829	sc->sc_stripesize = md->md_stripesize;
830	sc->sc_stripebits = bitcount32(sc->sc_stripesize - 1);
831	sc->sc_ndisks = md->md_all;
832	sc->sc_disks = malloc(sizeof(struct g_consumer *) * sc->sc_ndisks,
833	    M_STRIPE, M_WAITOK | M_ZERO);
834	for (no = 0; no < sc->sc_ndisks; no++)
835		sc->sc_disks[no] = NULL;
836	sc->sc_type = type;
837
838	gp->softc = sc;
839	sc->sc_geom = gp;
840	sc->sc_provider = NULL;
841
842	G_STRIPE_DEBUG(0, "Device %s created (id=%u).", sc->sc_name, sc->sc_id);
843
844	return (gp);
845}
846
847static int
848g_stripe_destroy(struct g_stripe_softc *sc, boolean_t force)
849{
850	struct g_provider *pp;
851	struct g_geom *gp;
852	u_int no;
853
854	g_topology_assert();
855
856	if (sc == NULL)
857		return (ENXIO);
858
859	pp = sc->sc_provider;
860	if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
861		if (force) {
862			G_STRIPE_DEBUG(0, "Device %s is still open, so it "
863			    "can't be definitely removed.", pp->name);
864		} else {
865			G_STRIPE_DEBUG(1,
866			    "Device %s is still open (r%dw%de%d).", pp->name,
867			    pp->acr, pp->acw, pp->ace);
868			return (EBUSY);
869		}
870	}
871
872	for (no = 0; no < sc->sc_ndisks; no++) {
873		if (sc->sc_disks[no] != NULL)
874			g_stripe_remove_disk(sc->sc_disks[no]);
875	}
876
877	gp = sc->sc_geom;
878	gp->softc = NULL;
879	KASSERT(sc->sc_provider == NULL, ("Provider still exists? (device=%s)",
880	    gp->name));
881	free(sc->sc_disks, M_STRIPE);
882	free(sc, M_STRIPE);
883
884	pp = LIST_FIRST(&gp->provider);
885	if (pp == NULL || (pp->acr == 0 && pp->acw == 0 && pp->ace == 0))
886		G_STRIPE_DEBUG(0, "Device %s destroyed.", gp->name);
887
888	g_wither_geom(gp, ENXIO);
889
890	return (0);
891}
892
893static int
894g_stripe_destroy_geom(struct gctl_req *req __unused,
895    struct g_class *mp __unused, struct g_geom *gp)
896{
897	struct g_stripe_softc *sc;
898
899	sc = gp->softc;
900	return (g_stripe_destroy(sc, 0));
901}
902
903static struct g_geom *
904g_stripe_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
905{
906	struct g_stripe_metadata md;
907	struct g_stripe_softc *sc;
908	struct g_consumer *cp;
909	struct g_geom *gp;
910	int error;
911
912	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
913	g_topology_assert();
914
915	/* Skip providers that are already open for writing. */
916	if (pp->acw > 0)
917		return (NULL);
918
919	G_STRIPE_DEBUG(3, "Tasting %s.", pp->name);
920
921	gp = g_new_geomf(mp, "stripe:taste");
922	gp->start = g_stripe_start;
923	gp->access = g_stripe_access;
924	gp->orphan = g_stripe_orphan;
925	cp = g_new_consumer(gp);
926	g_attach(cp, pp);
927	error = g_stripe_read_metadata(cp, &md);
928	g_detach(cp);
929	g_destroy_consumer(cp);
930	g_destroy_geom(gp);
931	if (error != 0)
932		return (NULL);
933	gp = NULL;
934
935	if (strcmp(md.md_magic, G_STRIPE_MAGIC) != 0)
936		return (NULL);
937	if (md.md_version > G_STRIPE_VERSION) {
938		printf("geom_stripe.ko module is too old to handle %s.\n",
939		    pp->name);
940		return (NULL);
941	}
942	/*
943	 * Backward compatibility:
944	 */
945	/* There was no md_provider field in earlier versions of metadata. */
946	if (md.md_version < 2)
947		bzero(md.md_provider, sizeof(md.md_provider));
948	/* There was no md_provsize field in earlier versions of metadata. */
949	if (md.md_version < 3)
950		md.md_provsize = pp->mediasize;
951
952	if (md.md_provider[0] != '\0' && strcmp(md.md_provider, pp->name) != 0)
953		return (NULL);
954	if (md.md_provsize != pp->mediasize)
955		return (NULL);
956
957	/*
958	 * Let's check if device already exists.
959	 */
960	sc = NULL;
961	LIST_FOREACH(gp, &mp->geom, geom) {
962		sc = gp->softc;
963		if (sc == NULL)
964			continue;
965		if (sc->sc_type != G_STRIPE_TYPE_AUTOMATIC)
966			continue;
967		if (strcmp(md.md_name, sc->sc_name) != 0)
968			continue;
969		if (md.md_id != sc->sc_id)
970			continue;
971		break;
972	}
973	if (gp != NULL) {
974		G_STRIPE_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
975		error = g_stripe_add_disk(sc, pp, md.md_no);
976		if (error != 0) {
977			G_STRIPE_DEBUG(0,
978			    "Cannot add disk %s to %s (error=%d).", pp->name,
979			    gp->name, error);
980			return (NULL);
981		}
982	} else {
983		gp = g_stripe_create(mp, &md, G_STRIPE_TYPE_AUTOMATIC);
984		if (gp == NULL) {
985			G_STRIPE_DEBUG(0, "Cannot create device %s.",
986			    md.md_name);
987			return (NULL);
988		}
989		sc = gp->softc;
990		G_STRIPE_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
991		error = g_stripe_add_disk(sc, pp, md.md_no);
992		if (error != 0) {
993			G_STRIPE_DEBUG(0,
994			    "Cannot add disk %s to %s (error=%d).", pp->name,
995			    gp->name, error);
996			g_stripe_destroy(sc, 1);
997			return (NULL);
998		}
999	}
1000
1001	return (gp);
1002}
1003
1004static void
1005g_stripe_ctl_create(struct gctl_req *req, struct g_class *mp)
1006{
1007	u_int attached, no;
1008	struct g_stripe_metadata md;
1009	struct g_provider *pp;
1010	struct g_stripe_softc *sc;
1011	struct g_geom *gp;
1012	struct sbuf *sb;
1013	intmax_t *stripesize;
1014	const char *name;
1015	char param[16];
1016	int *nargs;
1017
1018	g_topology_assert();
1019	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
1020	if (nargs == NULL) {
1021		gctl_error(req, "No '%s' argument.", "nargs");
1022		return;
1023	}
1024	if (*nargs <= 2) {
1025		gctl_error(req, "Too few arguments.");
1026		return;
1027	}
1028
1029	strlcpy(md.md_magic, G_STRIPE_MAGIC, sizeof(md.md_magic));
1030	md.md_version = G_STRIPE_VERSION;
1031	name = gctl_get_asciiparam(req, "arg0");
1032	if (name == NULL) {
1033		gctl_error(req, "No 'arg%u' argument.", 0);
1034		return;
1035	}
1036	strlcpy(md.md_name, name, sizeof(md.md_name));
1037	md.md_id = arc4random();
1038	md.md_no = 0;
1039	md.md_all = *nargs - 1;
1040	stripesize = gctl_get_paraml(req, "stripesize", sizeof(*stripesize));
1041	if (stripesize == NULL) {
1042		gctl_error(req, "No '%s' argument.", "stripesize");
1043		return;
1044	}
1045	md.md_stripesize = *stripesize;
1046	bzero(md.md_provider, sizeof(md.md_provider));
1047	/* This field is not important here. */
1048	md.md_provsize = 0;
1049
1050	/* Check all providers are valid */
1051	for (no = 1; no < *nargs; no++) {
1052		snprintf(param, sizeof(param), "arg%u", no);
1053		name = gctl_get_asciiparam(req, param);
1054		if (name == NULL) {
1055			gctl_error(req, "No 'arg%u' argument.", no);
1056			return;
1057		}
1058		if (strncmp(name, "/dev/", strlen("/dev/")) == 0)
1059			name += strlen("/dev/");
1060		pp = g_provider_by_name(name);
1061		if (pp == NULL) {
1062			G_STRIPE_DEBUG(1, "Disk %s is invalid.", name);
1063			gctl_error(req, "Disk %s is invalid.", name);
1064			return;
1065		}
1066	}
1067
1068	gp = g_stripe_create(mp, &md, G_STRIPE_TYPE_MANUAL);
1069	if (gp == NULL) {
1070		gctl_error(req, "Can't configure %s.", md.md_name);
1071		return;
1072	}
1073
1074	sc = gp->softc;
1075	sb = sbuf_new_auto();
1076	sbuf_printf(sb, "Can't attach disk(s) to %s:", gp->name);
1077	for (attached = 0, no = 1; no < *nargs; no++) {
1078		snprintf(param, sizeof(param), "arg%u", no);
1079		name = gctl_get_asciiparam(req, param);
1080		if (name == NULL) {
1081			gctl_error(req, "No 'arg%u' argument.", no);
1082			continue;
1083		}
1084		if (strncmp(name, "/dev/", strlen("/dev/")) == 0)
1085			name += strlen("/dev/");
1086		pp = g_provider_by_name(name);
1087		KASSERT(pp != NULL, ("Provider %s disappear?!", name));
1088		if (g_stripe_add_disk(sc, pp, no - 1) != 0) {
1089			G_STRIPE_DEBUG(1, "Disk %u (%s) not attached to %s.",
1090			    no, pp->name, gp->name);
1091			sbuf_printf(sb, " %s", pp->name);
1092			continue;
1093		}
1094		attached++;
1095	}
1096	sbuf_finish(sb);
1097	if (md.md_all != attached) {
1098		g_stripe_destroy(gp->softc, 1);
1099		gctl_error(req, "%s", sbuf_data(sb));
1100	}
1101	sbuf_delete(sb);
1102}
1103
1104static struct g_stripe_softc *
1105g_stripe_find_device(struct g_class *mp, const char *name)
1106{
1107	struct g_stripe_softc *sc;
1108	struct g_geom *gp;
1109
1110	LIST_FOREACH(gp, &mp->geom, geom) {
1111		sc = gp->softc;
1112		if (sc == NULL)
1113			continue;
1114		if (strcmp(sc->sc_name, name) == 0)
1115			return (sc);
1116	}
1117	return (NULL);
1118}
1119
1120static void
1121g_stripe_ctl_destroy(struct gctl_req *req, struct g_class *mp)
1122{
1123	struct g_stripe_softc *sc;
1124	int *force, *nargs, error;
1125	const char *name;
1126	char param[16];
1127	u_int i;
1128
1129	g_topology_assert();
1130
1131	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
1132	if (nargs == NULL) {
1133		gctl_error(req, "No '%s' argument.", "nargs");
1134		return;
1135	}
1136	if (*nargs <= 0) {
1137		gctl_error(req, "Missing device(s).");
1138		return;
1139	}
1140	force = gctl_get_paraml(req, "force", sizeof(*force));
1141	if (force == NULL) {
1142		gctl_error(req, "No '%s' argument.", "force");
1143		return;
1144	}
1145
1146	for (i = 0; i < (u_int)*nargs; i++) {
1147		snprintf(param, sizeof(param), "arg%u", i);
1148		name = gctl_get_asciiparam(req, param);
1149		if (name == NULL) {
1150			gctl_error(req, "No 'arg%u' argument.", i);
1151			return;
1152		}
1153		sc = g_stripe_find_device(mp, name);
1154		if (sc == NULL) {
1155			gctl_error(req, "No such device: %s.", name);
1156			return;
1157		}
1158		error = g_stripe_destroy(sc, *force);
1159		if (error != 0) {
1160			gctl_error(req, "Cannot destroy device %s (error=%d).",
1161			    sc->sc_name, error);
1162			return;
1163		}
1164	}
1165}
1166
1167static void
1168g_stripe_config(struct gctl_req *req, struct g_class *mp, const char *verb)
1169{
1170	uint32_t *version;
1171
1172	g_topology_assert();
1173
1174	version = gctl_get_paraml(req, "version", sizeof(*version));
1175	if (version == NULL) {
1176		gctl_error(req, "No '%s' argument.", "version");
1177		return;
1178	}
1179	if (*version != G_STRIPE_VERSION) {
1180		gctl_error(req, "Userland and kernel parts are out of sync.");
1181		return;
1182	}
1183
1184	if (strcmp(verb, "create") == 0) {
1185		g_stripe_ctl_create(req, mp);
1186		return;
1187	} else if (strcmp(verb, "destroy") == 0 ||
1188	    strcmp(verb, "stop") == 0) {
1189		g_stripe_ctl_destroy(req, mp);
1190		return;
1191	}
1192
1193	gctl_error(req, "Unknown verb.");
1194}
1195
1196static void
1197g_stripe_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
1198    struct g_consumer *cp, struct g_provider *pp)
1199{
1200	struct g_stripe_softc *sc;
1201
1202	sc = gp->softc;
1203	if (sc == NULL)
1204		return;
1205	if (pp != NULL) {
1206		/* Nothing here. */
1207	} else if (cp != NULL) {
1208		sbuf_printf(sb, "%s<Number>%u</Number>\n", indent,
1209		    (u_int)cp->index);
1210	} else {
1211		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id);
1212		sbuf_printf(sb, "%s<Stripesize>%u</Stripesize>\n", indent,
1213		    (u_int)sc->sc_stripesize);
1214		sbuf_printf(sb, "%s<Type>", indent);
1215		switch (sc->sc_type) {
1216		case G_STRIPE_TYPE_AUTOMATIC:
1217			sbuf_printf(sb, "AUTOMATIC");
1218			break;
1219		case G_STRIPE_TYPE_MANUAL:
1220			sbuf_printf(sb, "MANUAL");
1221			break;
1222		default:
1223			sbuf_printf(sb, "UNKNOWN");
1224			break;
1225		}
1226		sbuf_printf(sb, "</Type>\n");
1227		sbuf_printf(sb, "%s<Status>Total=%u, Online=%u</Status>\n",
1228		    indent, sc->sc_ndisks, g_stripe_nvalid(sc));
1229		sbuf_printf(sb, "%s<State>", indent);
1230		if (sc->sc_provider != NULL && sc->sc_provider->error == 0)
1231			sbuf_printf(sb, "UP");
1232		else
1233			sbuf_printf(sb, "DOWN");
1234		sbuf_printf(sb, "</State>\n");
1235	}
1236}
1237
1238DECLARE_GEOM_CLASS(g_stripe_class, g_stripe);
1239