g_stripe.c revision 133204
1/*-
2 * Copyright (c) 2003 Pawel Jakub Dawidek <pjd@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/geom/stripe/g_stripe.c 133204 2004-08-06 10:07:03Z pjd $");
29
30#include <sys/param.h>
31#include <sys/systm.h>
32#include <sys/kernel.h>
33#include <sys/module.h>
34#include <sys/lock.h>
35#include <sys/mutex.h>
36#include <sys/bio.h>
37#include <sys/sysctl.h>
38#include <sys/malloc.h>
39#include <vm/uma.h>
40#include <geom/geom.h>
41#include <geom/stripe/g_stripe.h>
42
43
44#define	MAX_IO_SIZE	(DFLTPHYS * 2)
45static MALLOC_DEFINE(M_STRIPE, "stripe data", "GEOM_STRIPE Data");
46
47static uma_zone_t g_stripe_zone;
48
49static int g_stripe_destroy(struct g_stripe_softc *sc, boolean_t force);
50static int g_stripe_destroy_geom(struct gctl_req *req, struct g_class *mp,
51    struct g_geom *gp);
52
53static g_taste_t g_stripe_taste;
54static g_ctl_req_t g_stripe_config;
55static g_dumpconf_t g_stripe_dumpconf;
56static g_init_t g_stripe_init;
57static g_fini_t g_stripe_fini;
58
59struct g_class g_stripe_class = {
60	.name = G_STRIPE_CLASS_NAME,
61	.ctlreq = g_stripe_config,
62	.taste = g_stripe_taste,
63	.destroy_geom = g_stripe_destroy_geom,
64	.init = g_stripe_init,
65	.fini = g_stripe_fini
66};
67
68SYSCTL_DECL(_kern_geom);
69SYSCTL_NODE(_kern_geom, OID_AUTO, stripe, CTLFLAG_RW, 0, "GEOM_STRIPE stuff");
70static u_int g_stripe_debug = 0;
71SYSCTL_UINT(_kern_geom_stripe, OID_AUTO, debug, CTLFLAG_RW, &g_stripe_debug, 0,
72    "Debug level");
73static int g_stripe_fast = 1;
74TUNABLE_INT("kern.geom.stripe.fast", &g_stripe_fast);
75static int
76g_sysctl_stripe_fast(SYSCTL_HANDLER_ARGS)
77{
78	int error, fast;
79
80	fast = g_stripe_fast;
81	error = sysctl_handle_int(oidp, &fast, sizeof(fast), req);
82	if (error == 0 && req->newptr != NULL)
83		g_stripe_fast = fast;
84	return (error);
85}
86SYSCTL_PROC(_kern_geom_stripe, OID_AUTO, fast, CTLTYPE_INT | CTLFLAG_RW,
87    NULL, 0, g_sysctl_stripe_fast, "I", "Fast, but memory-consuming, mode");
88static u_int g_stripe_maxmem = MAX_IO_SIZE * 10;
89TUNABLE_INT("kern.geom.stripe.maxmem", &g_stripe_maxmem);
90SYSCTL_UINT(_kern_geom_stripe, OID_AUTO, maxmem, CTLFLAG_RD, &g_stripe_maxmem,
91    0, "Maximum memory that can be allocated in \"fast\" mode (in bytes)");
92
93/*
94 * Greatest Common Divisor.
95 */
96static u_int
97gcd(u_int a, u_int b)
98{
99	u_int c;
100
101	while (b != 0) {
102		c = a;
103		a = b;
104		b = (c % b);
105	}
106	return (a);
107}
108
109/*
110 * Least Common Multiple.
111 */
112static u_int
113lcm(u_int a, u_int b)
114{
115
116	return ((a * b) / gcd(a, b));
117}
118
119static void
120g_stripe_init(struct g_class *mp __unused)
121{
122
123	g_stripe_zone = uma_zcreate("g_stripe_zone", MAX_IO_SIZE, NULL, NULL,
124	    NULL, NULL, 0, 0);
125	g_stripe_maxmem -= g_stripe_maxmem % MAX_IO_SIZE;
126	uma_zone_set_max(g_stripe_zone, g_stripe_maxmem / MAX_IO_SIZE);
127}
128
129static void
130g_stripe_fini(struct g_class *mp __unused)
131{
132
133	uma_zdestroy(g_stripe_zone);
134}
135
136/*
137 * Return the number of valid disks.
138 */
139static u_int
140g_stripe_nvalid(struct g_stripe_softc *sc)
141{
142	u_int i, no;
143
144	no = 0;
145	for (i = 0; i < sc->sc_ndisks; i++) {
146		if (sc->sc_disks[i] != NULL)
147			no++;
148	}
149
150	return (no);
151}
152
153static void
154g_stripe_remove_disk(struct g_consumer *cp)
155{
156	struct g_stripe_softc *sc;
157	u_int no;
158
159	KASSERT(cp != NULL, ("Non-valid disk in %s.", __func__));
160	sc = (struct g_stripe_softc *)cp->private;
161	KASSERT(sc != NULL, ("NULL sc in %s.", __func__));
162	no = cp->index;
163
164	G_STRIPE_DEBUG(0, "Disk %s removed from %s.", cp->provider->name,
165	    sc->sc_name);
166
167	sc->sc_disks[no] = NULL;
168	if (sc->sc_provider != NULL) {
169		g_orphan_provider(sc->sc_provider, ENXIO);
170		sc->sc_provider = NULL;
171		G_STRIPE_DEBUG(0, "Device %s removed.", sc->sc_name);
172	}
173
174	if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
175		g_access(cp, -cp->acr, -cp->acw, -cp->ace);
176	g_detach(cp);
177	g_destroy_consumer(cp);
178}
179
180static void
181g_stripe_orphan(struct g_consumer *cp)
182{
183	struct g_stripe_softc *sc;
184	struct g_geom *gp;
185
186	g_topology_assert();
187	gp = cp->geom;
188	sc = gp->softc;
189	if (sc == NULL)
190		return;
191
192	g_stripe_remove_disk(cp);
193	/* If there are no valid disks anymore, remove device. */
194	if (g_stripe_nvalid(sc) == 0)
195		g_stripe_destroy(sc, 1);
196}
197
198static int
199g_stripe_access(struct g_provider *pp, int dr, int dw, int de)
200{
201	struct g_consumer *cp1, *cp2;
202	struct g_stripe_softc *sc;
203	struct g_geom *gp;
204	int error;
205
206	gp = pp->geom;
207	sc = gp->softc;
208
209	if (sc == NULL) {
210		/*
211		 * It looks like geom is being withered.
212		 * In that case we allow only negative requests.
213		 */
214		KASSERT(dr <= 0 && dw <= 0 && de <= 0,
215		    ("Positive access request (device=%s).", pp->name));
216		if ((pp->acr + dr) == 0 && (pp->acw + dw) == 0 &&
217		    (pp->ace + de) == 0) {
218			G_STRIPE_DEBUG(0, "Device %s definitely destroyed.",
219			    gp->name);
220		}
221		return (0);
222	}
223
224	/* On first open, grab an extra "exclusive" bit */
225	if (pp->acr == 0 && pp->acw == 0 && pp->ace == 0)
226		de++;
227	/* ... and let go of it on last close */
228	if ((pp->acr + dr) == 0 && (pp->acw + dw) == 0 && (pp->ace + de) == 0)
229		de--;
230
231	error = ENXIO;
232	LIST_FOREACH(cp1, &gp->consumer, consumer) {
233		error = g_access(cp1, dr, dw, de);
234		if (error == 0)
235			continue;
236		/*
237		 * If we fail here, backout all previous changes.
238		 */
239		LIST_FOREACH(cp2, &gp->consumer, consumer) {
240			if (cp1 == cp2)
241				return (error);
242			g_access(cp2, -dr, -dw, -de);
243		}
244		/* NOTREACHED */
245	}
246
247	return (error);
248}
249
250static void
251g_stripe_copy(struct g_stripe_softc *sc, char *src, char *dst, off_t offset,
252    off_t length, int mode)
253{
254	u_int stripesize;
255	size_t len;
256
257	stripesize = sc->sc_stripesize;
258	len = (size_t)(stripesize - (offset & (stripesize - 1)));
259	do {
260		bcopy(src, dst, len);
261		if (mode) {
262			dst += len + stripesize * (sc->sc_ndisks - 1);
263			src += len;
264		} else {
265			dst += len;
266			src += len + stripesize * (sc->sc_ndisks - 1);
267		}
268		length -= len;
269		KASSERT(length >= 0,
270		    ("Length < 0 (stripesize=%zu, offset=%jd, length=%jd).",
271		    (size_t)stripesize, (intmax_t)offset, (intmax_t)length));
272		if (length > stripesize)
273			len = stripesize;
274		else
275			len = length;
276	} while (length > 0);
277}
278
279static void
280g_stripe_done(struct bio *bp)
281{
282	struct g_stripe_softc *sc;
283	struct bio *pbp;
284
285	pbp = bp->bio_parent;
286	sc = pbp->bio_to->geom->softc;
287	if (pbp->bio_error == 0)
288		pbp->bio_error = bp->bio_error;
289	pbp->bio_completed += bp->bio_completed;
290	if (bp->bio_cmd == BIO_READ && bp->bio_caller1 != NULL) {
291		g_stripe_copy(sc, bp->bio_data, bp->bio_caller1, bp->bio_offset,
292		    bp->bio_length, 1);
293		bp->bio_data = bp->bio_caller1;
294		bp->bio_caller1 = NULL;
295	}
296	g_destroy_bio(bp);
297	pbp->bio_inbed++;
298	if (pbp->bio_children == pbp->bio_inbed) {
299		if (pbp->bio_driver1 != NULL)
300			uma_zfree(g_stripe_zone, pbp->bio_driver1);
301		g_io_deliver(pbp, pbp->bio_error);
302	}
303}
304
305static int
306g_stripe_start_fast(struct bio *bp, u_int no, off_t offset, off_t length)
307{
308	TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue);
309	u_int nparts = 0, stripesize;
310	struct g_stripe_softc *sc;
311	char *addr, *data = NULL;
312	struct bio *cbp;
313	int error;
314
315	sc = bp->bio_to->geom->softc;
316
317	addr = bp->bio_data;
318	stripesize = sc->sc_stripesize;
319
320	cbp = g_clone_bio(bp);
321	if (cbp == NULL) {
322		error = ENOMEM;
323		goto failure;
324	}
325	TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
326	nparts++;
327	/*
328	 * Fill in the component buf structure.
329	 */
330	cbp->bio_done = g_stripe_done;
331	cbp->bio_offset = offset;
332	cbp->bio_data = addr;
333	cbp->bio_caller1 = NULL;
334	cbp->bio_length = length;
335	cbp->bio_caller2 = sc->sc_disks[no];
336
337	/* offset -= offset % stripesize; */
338	offset -= offset & (stripesize - 1);
339	addr += length;
340	length = bp->bio_length - length;
341	for (no++; length > 0; no++, length -= stripesize, addr += stripesize) {
342		if (no > sc->sc_ndisks - 1) {
343			no = 0;
344			offset += stripesize;
345		}
346		if (nparts >= sc->sc_ndisks) {
347			cbp = TAILQ_NEXT(cbp, bio_queue);
348			if (cbp == NULL)
349				cbp = TAILQ_FIRST(&queue);
350			nparts++;
351			/*
352			 * Update bio structure.
353			 */
354			/*
355			 * MIN() is in case when
356			 * (bp->bio_length % sc->sc_stripesize) != 0.
357			 */
358			cbp->bio_length += MIN(stripesize, length);
359			if (cbp->bio_caller1 == NULL) {
360				cbp->bio_caller1 = cbp->bio_data;
361				cbp->bio_data = NULL;
362				if (data == NULL) {
363					data = uma_zalloc(g_stripe_zone,
364					    M_NOWAIT);
365					if (data == NULL) {
366						error = ENOMEM;
367						goto failure;
368					}
369				}
370			}
371		} else {
372			cbp = g_clone_bio(bp);
373			if (cbp == NULL) {
374				error = ENOMEM;
375				goto failure;
376			}
377			TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
378			nparts++;
379			/*
380			 * Fill in the component buf structure.
381			 */
382			cbp->bio_done = g_stripe_done;
383			cbp->bio_offset = offset;
384			cbp->bio_data = addr;
385			cbp->bio_caller1 = NULL;
386			/*
387			 * MIN() is in case when
388			 * (bp->bio_length % sc->sc_stripesize) != 0.
389			 */
390			cbp->bio_length = MIN(stripesize, length);
391			cbp->bio_caller2 = sc->sc_disks[no];
392		}
393	}
394	if (data != NULL)
395		bp->bio_caller1 = data;
396	/*
397	 * Fire off all allocated requests!
398	 */
399	while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
400		struct g_consumer *cp;
401
402		TAILQ_REMOVE(&queue, cbp, bio_queue);
403		cp = cbp->bio_caller2;
404		cbp->bio_caller2 = NULL;
405		cbp->bio_to = cp->provider;
406		if (cbp->bio_caller1 != NULL) {
407			cbp->bio_data = data;
408			if (bp->bio_cmd == BIO_WRITE) {
409				g_stripe_copy(sc, cbp->bio_caller1, data,
410				    cbp->bio_offset, cbp->bio_length, 0);
411			}
412			data += cbp->bio_length;
413		}
414		G_STRIPE_LOGREQ(cbp, "Sending request.");
415		g_io_request(cbp, cp);
416	}
417	return (0);
418failure:
419	if (data != NULL)
420		uma_zfree(g_stripe_zone, data);
421	while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
422		TAILQ_REMOVE(&queue, cbp, bio_queue);
423		if (cbp->bio_caller1 != NULL) {
424			cbp->bio_data = cbp->bio_caller1;
425			cbp->bio_caller1 = NULL;
426		}
427		bp->bio_children--;
428		g_destroy_bio(cbp);
429	}
430	return (error);
431}
432
433static int
434g_stripe_start_economic(struct bio *bp, u_int no, off_t offset, off_t length)
435{
436	TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue);
437	struct g_stripe_softc *sc;
438	uint32_t stripesize;
439	struct bio *cbp;
440	char *addr;
441	int error;
442
443	sc = bp->bio_to->geom->softc;
444
445	addr = bp->bio_data;
446	stripesize = sc->sc_stripesize;
447
448	cbp = g_clone_bio(bp);
449	if (cbp == NULL) {
450		error = ENOMEM;
451		goto failure;
452	}
453	TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
454	/*
455	 * Fill in the component buf structure.
456	 */
457	cbp->bio_done = g_std_done;
458	cbp->bio_offset = offset;
459	cbp->bio_data = addr;
460	cbp->bio_length = length;
461	cbp->bio_caller2 = sc->sc_disks[no];
462
463	/* offset -= offset % stripesize; */
464	offset -= offset & (stripesize - 1);
465	addr += length;
466	length = bp->bio_length - length;
467	for (no++; length > 0; no++, length -= stripesize, addr += stripesize) {
468		if (no > sc->sc_ndisks - 1) {
469			no = 0;
470			offset += stripesize;
471		}
472		cbp = g_clone_bio(bp);
473		if (cbp == NULL) {
474			error = ENOMEM;
475			goto failure;
476		}
477		TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
478
479		/*
480		 * Fill in the component buf structure.
481		 */
482		cbp->bio_done = g_std_done;
483		cbp->bio_offset = offset;
484		cbp->bio_data = addr;
485		/*
486		 * MIN() is in case when
487		 * (bp->bio_length % sc->sc_stripesize) != 0.
488		 */
489		cbp->bio_length = MIN(stripesize, length);
490
491		cbp->bio_caller2 = sc->sc_disks[no];
492	}
493	/*
494	 * Fire off all allocated requests!
495	 */
496	while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
497		struct g_consumer *cp;
498
499		TAILQ_REMOVE(&queue, cbp, bio_queue);
500		cp = cbp->bio_caller2;
501		cbp->bio_caller2 = NULL;
502		cbp->bio_to = cp->provider;
503		G_STRIPE_LOGREQ(cbp, "Sending request.");
504		g_io_request(cbp, cp);
505	}
506	return (0);
507failure:
508	while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
509		TAILQ_REMOVE(&queue, cbp, bio_queue);
510		bp->bio_children--;
511		g_destroy_bio(cbp);
512	}
513	return (error);
514}
515
516static void
517g_stripe_start(struct bio *bp)
518{
519	off_t offset, start, length, nstripe;
520	struct g_stripe_softc *sc;
521	u_int no, stripesize;
522	int error, fast = 0;
523
524	sc = bp->bio_to->geom->softc;
525	/*
526	 * If sc == NULL, provider's error should be set and g_stripe_start()
527	 * should not be called at all.
528	 */
529	KASSERT(sc != NULL,
530	    ("Provider's error should be set (error=%d)(device=%s).",
531	    bp->bio_to->error, bp->bio_to->name));
532
533	G_STRIPE_LOGREQ(bp, "Request received.");
534
535	switch (bp->bio_cmd) {
536	case BIO_READ:
537	case BIO_WRITE:
538	case BIO_DELETE:
539		/*
540		 * Only those requests are supported.
541		 */
542		break;
543	case BIO_GETATTR:
544		/* To which provider it should be delivered? */
545	default:
546		g_io_deliver(bp, EOPNOTSUPP);
547		return;
548	}
549
550	stripesize = sc->sc_stripesize;
551
552	/*
553	 * Calculations are quite messy, but fast I hope.
554	 */
555
556	/* Stripe number. */
557	/* nstripe = bp->bio_offset / stripesize; */
558	nstripe = bp->bio_offset >> (off_t)sc->sc_stripebits;
559	/* Disk number. */
560	no = nstripe % sc->sc_ndisks;
561	/* Start position in stripe. */
562	/* start = bp->bio_offset % stripesize; */
563	start = bp->bio_offset & (stripesize - 1);
564	/* Start position in disk. */
565	/* offset = (nstripe / sc->sc_ndisks) * stripesize + start; */
566	offset = ((nstripe / sc->sc_ndisks) << sc->sc_stripebits) + start;
567	/* Length of data to operate. */
568	length = MIN(bp->bio_length, stripesize - start);
569
570	/*
571	 * Do use "fast" mode when:
572	 * 1. "Fast" mode is ON.
573	 * and
574	 * 2. Request size is less than or equal to MAX_IO_SIZE (128kB),
575	 *    which should always be true.
576	 * and
577	 * 3. Request size is bigger than stripesize * ndisks. If it isn't,
578	 *    there will be no need to send more than one I/O request to
579	 *    a provider, so there is nothing to optmize.
580	 */
581	if (g_stripe_fast && bp->bio_length <= MAX_IO_SIZE &&
582	    bp->bio_length >= stripesize * sc->sc_ndisks) {
583		fast = 1;
584	}
585	error = 0;
586	if (fast)
587		error = g_stripe_start_fast(bp, no, offset, length);
588	/*
589	 * Do use "economic" when:
590	 * 1. "Economic" mode is ON.
591	 * or
592	 * 2. "Fast" mode failed. It can only failed if there is no memory.
593	 */
594	if (!fast || error != 0)
595		error = g_stripe_start_economic(bp, no, offset, length);
596	if (error != 0) {
597		if (bp->bio_error == 0)
598			bp->bio_error = error;
599		g_io_deliver(bp, bp->bio_error);
600	}
601}
602
603static void
604g_stripe_check_and_run(struct g_stripe_softc *sc)
605{
606	off_t mediasize, ms;
607	u_int no, sectorsize = 0;
608
609	if (g_stripe_nvalid(sc) != sc->sc_ndisks)
610		return;
611
612	sc->sc_provider = g_new_providerf(sc->sc_geom, "stripe/%s",
613	    sc->sc_name);
614	/*
615	 * Find the smallest disk.
616	 */
617	mediasize = sc->sc_disks[0]->provider->mediasize;
618	if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC)
619		mediasize -= sc->sc_disks[0]->provider->sectorsize;
620	mediasize -= mediasize % sc->sc_stripesize;
621	sectorsize = sc->sc_disks[0]->provider->sectorsize;
622	for (no = 1; no < sc->sc_ndisks; no++) {
623		ms = sc->sc_disks[no]->provider->mediasize;
624		if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC)
625			ms -= sc->sc_disks[no]->provider->sectorsize;
626		ms -= ms % sc->sc_stripesize;
627		if (ms < mediasize)
628			mediasize = ms;
629		sectorsize = lcm(sectorsize,
630		    sc->sc_disks[no]->provider->sectorsize);
631	}
632	sc->sc_provider->sectorsize = sectorsize;
633	sc->sc_provider->mediasize = mediasize * sc->sc_ndisks;
634	g_error_provider(sc->sc_provider, 0);
635
636	G_STRIPE_DEBUG(0, "Device %s activated.", sc->sc_name);
637}
638
639static int
640g_stripe_read_metadata(struct g_consumer *cp, struct g_stripe_metadata *md)
641{
642	struct g_provider *pp;
643	u_char *buf;
644	int error;
645
646	g_topology_assert();
647
648	error = g_access(cp, 1, 0, 0);
649	if (error != 0)
650		return (error);
651	pp = cp->provider;
652	g_topology_unlock();
653	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
654	    &error);
655	g_topology_lock();
656	g_access(cp, -1, 0, 0);
657	if (buf == NULL)
658		return (error);
659
660	/* Decode metadata. */
661	stripe_metadata_decode(buf, md);
662	g_free(buf);
663
664	return (0);
665}
666
667/*
668 * Add disk to given device.
669 */
670static int
671g_stripe_add_disk(struct g_stripe_softc *sc, struct g_provider *pp, u_int no)
672{
673	struct g_consumer *cp, *fcp;
674	struct g_geom *gp;
675	int error;
676
677	/* Metadata corrupted? */
678	if (no >= sc->sc_ndisks)
679		return (EINVAL);
680
681	/* Check if disk is not already attached. */
682	if (sc->sc_disks[no] != NULL)
683		return (EEXIST);
684
685	gp = sc->sc_geom;
686	fcp = LIST_FIRST(&gp->consumer);
687
688	cp = g_new_consumer(gp);
689	error = g_attach(cp, pp);
690	if (error != 0) {
691		g_destroy_consumer(cp);
692		return (error);
693	}
694
695	if (fcp != NULL && (fcp->acr > 0 || fcp->acw > 0 || fcp->ace > 0)) {
696		error = g_access(cp, fcp->acr, fcp->acw, fcp->ace);
697		if (error != 0) {
698			g_detach(cp);
699			g_destroy_consumer(cp);
700			return (error);
701		}
702	}
703	if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC) {
704		struct g_stripe_metadata md;
705
706		/* Reread metadata. */
707		error = g_stripe_read_metadata(cp, &md);
708		if (error != 0)
709			goto fail;
710
711		if (strcmp(md.md_magic, G_STRIPE_MAGIC) != 0 ||
712		    strcmp(md.md_name, sc->sc_name) != 0 ||
713		    md.md_id != sc->sc_id) {
714			G_STRIPE_DEBUG(0, "Metadata on %s changed.", pp->name);
715			goto fail;
716		}
717	}
718
719	cp->private = sc;
720	cp->index = no;
721	sc->sc_disks[no] = cp;
722
723	G_STRIPE_DEBUG(0, "Disk %s attached to %s.", pp->name, sc->sc_name);
724
725	g_stripe_check_and_run(sc);
726
727	return (0);
728fail:
729	if (fcp != NULL && (fcp->acr > 0 || fcp->acw > 0 || fcp->ace > 0))
730		g_access(cp, -fcp->acr, -fcp->acw, -fcp->ace);
731	g_detach(cp);
732	g_destroy_consumer(cp);
733	return (error);
734}
735
736static struct g_geom *
737g_stripe_create(struct g_class *mp, const struct g_stripe_metadata *md,
738    u_int type)
739{
740	struct g_stripe_softc *sc;
741	struct g_geom *gp;
742	u_int no;
743
744	G_STRIPE_DEBUG(1, "Creating device %s (id=%u).", md->md_name,
745	    md->md_id);
746
747	/* Two disks is minimum. */
748	if (md->md_all < 2) {
749		G_STRIPE_DEBUG(0, "Too few disks defined for %s.", md->md_name);
750		return (NULL);
751	}
752#if 0
753	/* Stripe size have to be grater than or equal to sector size. */
754	if (md->md_stripesize < sectorsize) {
755		G_STRIPE_DEBUG(0, "Invalid stripe size for %s.", md->md_name);
756		return (NULL);
757	}
758#endif
759	/* Stripe size have to be power of 2. */
760	if (!powerof2(md->md_stripesize)) {
761		G_STRIPE_DEBUG(0, "Invalid stripe size for %s.", md->md_name);
762		return (NULL);
763	}
764
765	/* Check for duplicate unit */
766	LIST_FOREACH(gp, &mp->geom, geom) {
767		sc = gp->softc;
768		if (sc != NULL && strcmp(sc->sc_name, md->md_name) == 0) {
769			G_STRIPE_DEBUG(0, "Device %s already configured.",
770			    sc->sc_name);
771			return (NULL);
772		}
773	}
774	gp = g_new_geomf(mp, "%s", md->md_name);
775	gp->softc = NULL;	/* for a moment */
776
777	sc = malloc(sizeof(*sc), M_STRIPE, M_WAITOK | M_ZERO);
778	gp->start = g_stripe_start;
779	gp->spoiled = g_stripe_orphan;
780	gp->orphan = g_stripe_orphan;
781	gp->access = g_stripe_access;
782	gp->dumpconf = g_stripe_dumpconf;
783
784	sc->sc_id = md->md_id;
785	sc->sc_stripesize = md->md_stripesize;
786	sc->sc_stripebits = BITCOUNT(sc->sc_stripesize - 1);
787	sc->sc_ndisks = md->md_all;
788	sc->sc_disks = malloc(sizeof(struct g_consumer *) * sc->sc_ndisks,
789	    M_STRIPE, M_WAITOK | M_ZERO);
790	for (no = 0; no < sc->sc_ndisks; no++)
791		sc->sc_disks[no] = NULL;
792	sc->sc_type = type;
793
794	gp->softc = sc;
795	sc->sc_geom = gp;
796	sc->sc_provider = NULL;
797
798	G_STRIPE_DEBUG(0, "Device %s created (id=%u).", sc->sc_name, sc->sc_id);
799
800	return (gp);
801}
802
803static int
804g_stripe_destroy(struct g_stripe_softc *sc, boolean_t force)
805{
806	struct g_provider *pp;
807	struct g_geom *gp;
808	u_int no;
809
810	g_topology_assert();
811
812	if (sc == NULL)
813		return (ENXIO);
814
815	pp = sc->sc_provider;
816	if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
817		if (force) {
818			G_STRIPE_DEBUG(0, "Device %s is still open, so it "
819			    "can't be definitely removed.", pp->name);
820		} else {
821			G_STRIPE_DEBUG(1,
822			    "Device %s is still open (r%dw%de%d).", pp->name,
823			    pp->acr, pp->acw, pp->ace);
824			return (EBUSY);
825		}
826	}
827
828	for (no = 0; no < sc->sc_ndisks; no++) {
829		if (sc->sc_disks[no] != NULL)
830			g_stripe_remove_disk(sc->sc_disks[no]);
831	}
832
833	gp = sc->sc_geom;
834	gp->softc = NULL;
835	KASSERT(sc->sc_provider == NULL, ("Provider still exists? (device=%s)",
836	    gp->name));
837	free(sc->sc_disks, M_STRIPE);
838	free(sc, M_STRIPE);
839
840	pp = LIST_FIRST(&gp->provider);
841	if (pp == NULL || (pp->acr == 0 && pp->acw == 0 && pp->ace == 0))
842		G_STRIPE_DEBUG(0, "Device %s destroyed.", gp->name);
843
844	g_wither_geom(gp, ENXIO);
845
846	return (0);
847}
848
849static int
850g_stripe_destroy_geom(struct gctl_req *req __unused,
851    struct g_class *mp __unused, struct g_geom *gp)
852{
853	struct g_stripe_softc *sc;
854
855	sc = gp->softc;
856	return (g_stripe_destroy(sc, 0));
857}
858
859static struct g_geom *
860g_stripe_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
861{
862	struct g_stripe_metadata md;
863	struct g_stripe_softc *sc;
864	struct g_consumer *cp;
865	struct g_geom *gp;
866	int error;
867
868	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
869	g_topology_assert();
870
871	G_STRIPE_DEBUG(3, "Tasting %s.", pp->name);
872
873	gp = g_new_geomf(mp, "stripe:taste");
874	gp->start = g_stripe_start;
875	gp->access = g_stripe_access;
876	gp->orphan = g_stripe_orphan;
877	cp = g_new_consumer(gp);
878	g_attach(cp, pp);
879
880	error = g_stripe_read_metadata(cp, &md);
881	g_wither_geom(gp, ENXIO);
882	if (error != 0)
883		return (NULL);
884	gp = NULL;
885
886	if (strcmp(md.md_magic, G_STRIPE_MAGIC) != 0)
887		return (NULL);
888	if (md.md_version > G_STRIPE_VERSION) {
889		printf("geom_stripe.ko module is too old to handle %s.\n",
890		    pp->name);
891		return (NULL);
892	}
893
894	/*
895	 * Let's check if device already exists.
896	 */
897	sc = NULL;
898	LIST_FOREACH(gp, &mp->geom, geom) {
899		sc = gp->softc;
900		if (sc == NULL)
901			continue;
902		if (sc->sc_type != G_STRIPE_TYPE_AUTOMATIC)
903			continue;
904		if (strcmp(md.md_name, sc->sc_name) != 0)
905			continue;
906		if (md.md_id != sc->sc_id)
907			continue;
908		break;
909	}
910	if (gp != NULL) {
911		G_STRIPE_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
912		error = g_stripe_add_disk(sc, pp, md.md_no);
913		if (error != 0) {
914			G_STRIPE_DEBUG(0,
915			    "Cannot add disk %s to %s (error=%d).", pp->name,
916			    gp->name, error);
917			return (NULL);
918		}
919	} else {
920		gp = g_stripe_create(mp, &md, G_STRIPE_TYPE_AUTOMATIC);
921		if (gp == NULL) {
922			G_STRIPE_DEBUG(0, "Cannot create device %s.",
923			    md.md_name);
924			return (NULL);
925		}
926		sc = gp->softc;
927		G_STRIPE_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
928		error = g_stripe_add_disk(sc, pp, md.md_no);
929		if (error != 0) {
930			G_STRIPE_DEBUG(0,
931			    "Cannot add disk %s to %s (error=%d).", pp->name,
932			    gp->name, error);
933			g_stripe_destroy(sc, 1);
934			return (NULL);
935		}
936	}
937
938	return (gp);
939}
940
941static void
942g_stripe_ctl_create(struct gctl_req *req, struct g_class *mp)
943{
944	u_int attached, no;
945	struct g_stripe_metadata md;
946	struct g_provider *pp;
947	struct g_stripe_softc *sc;
948	struct g_geom *gp;
949	struct sbuf *sb;
950	intmax_t *stripesize;
951	const char *name;
952	char param[16];
953	int *nargs;
954
955	g_topology_assert();
956	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
957	if (nargs == NULL) {
958		gctl_error(req, "No '%s' argument.", "nargs");
959		return;
960	}
961	if (*nargs <= 2) {
962		gctl_error(req, "Too few arguments.");
963		return;
964	}
965
966	strlcpy(md.md_magic, G_STRIPE_MAGIC, sizeof(md.md_magic));
967	md.md_version = G_STRIPE_VERSION;
968	name = gctl_get_asciiparam(req, "arg0");
969	if (name == NULL) {
970		gctl_error(req, "No 'arg%u' argument.", 0);
971		return;
972	}
973	strlcpy(md.md_name, name, sizeof(md.md_name));
974	md.md_id = arc4random();
975	md.md_no = 0;
976	md.md_all = *nargs - 1;
977	stripesize = gctl_get_paraml(req, "stripesize", sizeof(*stripesize));
978	if (stripesize == NULL) {
979		gctl_error(req, "No '%s' argument.", "stripesize");
980		return;
981	}
982	md.md_stripesize = *stripesize;
983
984	/* Check all providers are valid */
985	for (no = 1; no < *nargs; no++) {
986		snprintf(param, sizeof(param), "arg%u", no);
987		name = gctl_get_asciiparam(req, param);
988		if (name == NULL) {
989			gctl_error(req, "No 'arg%u' argument.", no);
990			return;
991		}
992		if (strncmp(name, "/dev/", strlen("/dev/")) == 0)
993			name += strlen("/dev/");
994		pp = g_provider_by_name(name);
995		if (pp == NULL) {
996			G_STRIPE_DEBUG(1, "Disk %s is invalid.", name);
997			gctl_error(req, "Disk %s is invalid.", name);
998			return;
999		}
1000	}
1001
1002	gp = g_stripe_create(mp, &md, G_STRIPE_TYPE_MANUAL);
1003	if (gp == NULL) {
1004		gctl_error(req, "Can't configure %s.", md.md_name);
1005		return;
1006	}
1007
1008	sc = gp->softc;
1009	sb = sbuf_new(NULL, NULL, 0, SBUF_AUTOEXTEND);
1010	sbuf_printf(sb, "Can't attach disk(s) to %s:", gp->name);
1011	for (attached = 0, no = 1; no < *nargs; no++) {
1012		snprintf(param, sizeof(param), "arg%u", no);
1013		name = gctl_get_asciiparam(req, param);
1014		if (strncmp(name, "/dev/", strlen("/dev/")) == 0)
1015			name += strlen("/dev/");
1016		pp = g_provider_by_name(name);
1017		KASSERT(pp != NULL, ("Provider %s disappear?!", name));
1018		if (g_stripe_add_disk(sc, pp, no - 1) != 0) {
1019			G_STRIPE_DEBUG(1, "Disk %u (%s) not attached to %s.",
1020			    no, pp->name, gp->name);
1021			sbuf_printf(sb, " %s", pp->name);
1022			continue;
1023		}
1024		attached++;
1025	}
1026	sbuf_finish(sb);
1027	if (md.md_all != attached) {
1028		g_stripe_destroy(gp->softc, 1);
1029		gctl_error(req, "%s", sbuf_data(sb));
1030	}
1031	sbuf_delete(sb);
1032}
1033
1034static struct g_stripe_softc *
1035g_stripe_find_device(struct g_class *mp, const char *name)
1036{
1037	struct g_stripe_softc *sc;
1038	struct g_geom *gp;
1039
1040	LIST_FOREACH(gp, &mp->geom, geom) {
1041		sc = gp->softc;
1042		if (sc == NULL)
1043			continue;
1044		if (strcmp(sc->sc_name, name) == 0)
1045			return (sc);
1046	}
1047	return (NULL);
1048}
1049
1050static void
1051g_stripe_ctl_destroy(struct gctl_req *req, struct g_class *mp)
1052{
1053	struct g_stripe_softc *sc;
1054	int *force, *nargs, error;
1055	const char *name;
1056	char param[16];
1057	u_int i;
1058
1059	g_topology_assert();
1060
1061	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
1062	if (nargs == NULL) {
1063		gctl_error(req, "No '%s' argument.", "nargs");
1064		return;
1065	}
1066	if (*nargs <= 0) {
1067		gctl_error(req, "Missing device(s).");
1068		return;
1069	}
1070	force = gctl_get_paraml(req, "force", sizeof(*force));
1071	if (force == NULL) {
1072		gctl_error(req, "No '%s' argument.", "force");
1073		return;
1074	}
1075
1076	for (i = 0; i < (u_int)*nargs; i++) {
1077		snprintf(param, sizeof(param), "arg%u", i);
1078		name = gctl_get_asciiparam(req, param);
1079		if (name == NULL) {
1080			gctl_error(req, "No 'arg%u' argument.", i);
1081			return;
1082		}
1083		sc = g_stripe_find_device(mp, name);
1084		if (sc == NULL) {
1085			gctl_error(req, "No such device: %s.", name);
1086			return;
1087		}
1088		error = g_stripe_destroy(sc, *force);
1089		if (error != 0) {
1090			gctl_error(req, "Cannot destroy device %s (error=%d).",
1091			    sc->sc_name, error);
1092			return;
1093		}
1094	}
1095}
1096
1097static void
1098g_stripe_config(struct gctl_req *req, struct g_class *mp, const char *verb)
1099{
1100	uint32_t *version;
1101
1102	g_topology_assert();
1103
1104	version = gctl_get_paraml(req, "version", sizeof(*version));
1105	if (version == NULL) {
1106		gctl_error(req, "No '%s' argument.", "version");
1107		return;
1108	}
1109	if (*version != G_STRIPE_VERSION) {
1110		gctl_error(req, "Userland and kernel parts are out of sync.");
1111		return;
1112	}
1113
1114	if (strcmp(verb, "create") == 0) {
1115		g_stripe_ctl_create(req, mp);
1116		return;
1117	} else if (strcmp(verb, "destroy") == 0 ||
1118	    strcmp(verb, "stop") == 0) {
1119		g_stripe_ctl_destroy(req, mp);
1120		return;
1121	}
1122
1123	gctl_error(req, "Unknown verb.");
1124}
1125
1126static void
1127g_stripe_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
1128    struct g_consumer *cp, struct g_provider *pp)
1129{
1130	struct g_stripe_softc *sc;
1131
1132	sc = gp->softc;
1133	if (sc == NULL)
1134		return;
1135	if (pp != NULL) {
1136		/* Nothing here. */
1137	} else if (cp != NULL) {
1138		/* Nothing here. */
1139	} else {
1140		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id);
1141		sbuf_printf(sb, "%s<Stripesize>%u</Stripesize>\n", indent,
1142		    (u_int)sc->sc_stripesize);
1143		sbuf_printf(sb, "%s<Type>", indent);
1144		switch (sc->sc_type) {
1145		case G_STRIPE_TYPE_AUTOMATIC:
1146			sbuf_printf(sb, "AUTOMATIC");
1147			break;
1148		case G_STRIPE_TYPE_MANUAL:
1149			sbuf_printf(sb, "MANUAL");
1150			break;
1151		default:
1152			sbuf_printf(sb, "UNKNOWN");
1153			break;
1154		}
1155		sbuf_printf(sb, "</Type>\n");
1156		sbuf_printf(sb, "%s<Status>Total=%u, Online=%u</Status>\n",
1157		    indent, sc->sc_ndisks, g_stripe_nvalid(sc));
1158		sbuf_printf(sb, "%s<State>", indent);
1159		if (sc->sc_provider != NULL && sc->sc_provider->error == 0)
1160			sbuf_printf(sb, "UP");
1161		else
1162			sbuf_printf(sb, "DOWN");
1163		sbuf_printf(sb, "</State>\n");
1164	}
1165}
1166
1167DECLARE_GEOM_CLASS(g_stripe_class, g_stripe);
1168