g_stripe.c revision 131878
1165743Sdas/*-
2165743Sdas * Copyright (c) 2003 Pawel Jakub Dawidek <pjd@FreeBSD.org>
3165743Sdas * All rights reserved.
4165743Sdas *
5165743Sdas * Redistribution and use in source and binary forms, with or without
6165743Sdas * modification, are permitted provided that the following conditions
7165743Sdas * are met:
8165743Sdas * 1. Redistributions of source code must retain the above copyright
9165743Sdas *    notice, this list of conditions and the following disclaimer.
10165743Sdas * 2. Redistributions in binary form must reproduce the above copyright
11165743Sdas *    notice, this list of conditions and the following disclaimer in the
12165743Sdas *    documentation and/or other materials provided with the distribution.
13165743Sdas *
14165743Sdas * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15165743Sdas * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16165743Sdas * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17165743Sdas * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18165743Sdas * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19165743Sdas * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20165743Sdas * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21165743Sdas * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22165743Sdas * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23165743Sdas * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24165743Sdas * SUCH DAMAGE.
25165743Sdas */
26165743Sdas
27165743Sdas#include <sys/cdefs.h>
28165743Sdas__FBSDID("$FreeBSD: head/sys/geom/stripe/g_stripe.c 131878 2004-07-09 14:30:09Z pjd $");
29165743Sdas
30165743Sdas#include <sys/param.h>
31165743Sdas#include <sys/systm.h>
32165743Sdas#include <sys/kernel.h>
33165743Sdas#include <sys/module.h>
34165743Sdas#include <sys/lock.h>
35165743Sdas#include <sys/mutex.h>
36165743Sdas#include <sys/bio.h>
37165743Sdas#include <sys/sysctl.h>
38165743Sdas#include <sys/malloc.h>
39165743Sdas#include <vm/uma.h>
40165743Sdas#include <geom/geom.h>
41165743Sdas#include <geom/stripe/g_stripe.h>
42165743Sdas
43165743Sdas
44165743Sdas#define	MAX_IO_SIZE	(DFLTPHYS * 2)
45165743Sdasstatic MALLOC_DEFINE(M_STRIPE, "stripe data", "GEOM_STRIPE Data");
46165743Sdas
47165743Sdasstatic uma_zone_t g_stripe_zone;
48165743Sdas
49165743Sdasstatic int g_stripe_destroy(struct g_stripe_softc *sc, boolean_t force);
50165743Sdasstatic int g_stripe_destroy_geom(struct gctl_req *req, struct g_class *mp,
51165743Sdas    struct g_geom *gp);
52165743Sdas
53165743Sdasstatic g_taste_t g_stripe_taste;
54165743Sdasstatic g_ctl_req_t g_stripe_config;
55165743Sdasstatic g_dumpconf_t g_stripe_dumpconf;
56165743Sdasstatic g_init_t g_stripe_init;
57165743Sdasstatic g_fini_t g_stripe_fini;
58165743Sdas
59165743Sdasstruct g_class g_stripe_class = {
60165743Sdas	.name = G_STRIPE_CLASS_NAME,
61165743Sdas	.ctlreq = g_stripe_config,
62165743Sdas	.taste = g_stripe_taste,
63165743Sdas	.destroy_geom = g_stripe_destroy_geom,
64165743Sdas	.init = g_stripe_init,
65165743Sdas	.fini = g_stripe_fini
66165743Sdas};
67165743Sdas
68165743SdasSYSCTL_DECL(_kern_geom);
69165743SdasSYSCTL_NODE(_kern_geom, OID_AUTO, stripe, CTLFLAG_RW, 0, "GEOM_STRIPE stuff");
70165743Sdasstatic u_int g_stripe_debug = 0;
71165743SdasSYSCTL_UINT(_kern_geom_stripe, OID_AUTO, debug, CTLFLAG_RW, &g_stripe_debug, 0,
72165743Sdas    "Debug level");
73165743Sdasstatic int g_stripe_fast = 1;
74165743SdasTUNABLE_INT("kern.geom.stripe.fast", &g_stripe_fast);
75165743Sdasstatic int
76165743Sdasg_sysctl_stripe_fast(SYSCTL_HANDLER_ARGS)
77165743Sdas{
78165743Sdas	int error, fast;
79165743Sdas
80165743Sdas	fast = g_stripe_fast;
81165743Sdas	error = sysctl_handle_int(oidp, &fast, sizeof(fast), req);
82165743Sdas	if (error == 0 && req->newptr != NULL)
83165743Sdas		g_stripe_fast = fast;
84165743Sdas	return (error);
85165743Sdas}
86165743SdasSYSCTL_PROC(_kern_geom_stripe, OID_AUTO, fast, CTLTYPE_INT | CTLFLAG_RW,
87165743Sdas    NULL, 0, g_sysctl_stripe_fast, "I", "Fast, but memory-consuming mode");
88165743Sdasstatic u_int g_stripe_maxmem = MAX_IO_SIZE * 10;
89165743SdasTUNABLE_INT("kern.geom.stripe.maxmem", &g_stripe_maxmem);
90165743SdasSYSCTL_UINT(_kern_geom_stripe, OID_AUTO, maxmem, CTLFLAG_RD, &g_stripe_maxmem,
91165743Sdas    0, "Maximum memory that could be allocated in \"fast\" mode (in bytes)");
92165743Sdas
93165743Sdas/*
94165743Sdas * Greatest Common Divisor.
95165743Sdas */
96165743Sdasstatic u_int
97165743Sdasgcd(u_int a, u_int b)
98165743Sdas{
99165743Sdas	u_int c;
100165743Sdas
101165743Sdas	while (b != 0) {
102165743Sdas		c = a;
103165743Sdas		a = b;
104165743Sdas		b = (c % b);
105165743Sdas	}
106165743Sdas	return (a);
107165743Sdas}
108165743Sdas
109165743Sdas/*
110165743Sdas * Least Common Multiple.
111 */
112static u_int
113lcm(u_int a, u_int b)
114{
115
116	return ((a * b) / gcd(a, b));
117}
118
119static void
120g_stripe_init(struct g_class *mp __unused)
121{
122
123	g_stripe_zone = uma_zcreate("g_stripe_zone", MAX_IO_SIZE, NULL, NULL,
124	    NULL, NULL, 0, 0);
125	g_stripe_maxmem -= g_stripe_maxmem % MAX_IO_SIZE;
126	uma_zone_set_max(g_stripe_zone, g_stripe_maxmem / MAX_IO_SIZE);
127}
128
129static void
130g_stripe_fini(struct g_class *mp __unused)
131{
132
133	uma_zdestroy(g_stripe_zone);
134}
135
136/*
137 * Return the number of valid disks.
138 */
139static u_int
140g_stripe_nvalid(struct g_stripe_softc *sc)
141{
142	u_int i, no;
143
144	no = 0;
145	for (i = 0; i < sc->sc_ndisks; i++) {
146		if (sc->sc_disks[i] != NULL)
147			no++;
148	}
149
150	return (no);
151}
152
153static void
154g_stripe_remove_disk(struct g_consumer *cp)
155{
156	struct g_stripe_softc *sc;
157	u_int no;
158
159	KASSERT(cp != NULL, ("Non-valid disk in %s.", __func__));
160	sc = (struct g_stripe_softc *)cp->private;
161	KASSERT(sc != NULL, ("NULL sc in %s.", __func__));
162	no = cp->index;
163
164	G_STRIPE_DEBUG(0, "Disk %s removed from %s.", cp->provider->name,
165	    sc->sc_geom->name);
166
167	sc->sc_disks[no] = NULL;
168	if (sc->sc_provider != NULL) {
169		g_orphan_provider(sc->sc_provider, ENXIO);
170		sc->sc_provider = NULL;
171		G_STRIPE_DEBUG(0, "Device %s removed.", sc->sc_geom->name);
172	}
173
174	if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
175		g_access(cp, -cp->acr, -cp->acw, -cp->ace);
176	g_detach(cp);
177	g_destroy_consumer(cp);
178}
179
180static void
181g_stripe_orphan(struct g_consumer *cp)
182{
183	struct g_stripe_softc *sc;
184	struct g_geom *gp;
185
186	g_topology_assert();
187	gp = cp->geom;
188	sc = gp->softc;
189	if (sc == NULL)
190		return;
191
192	g_stripe_remove_disk(cp);
193	/* If there are no valid disks anymore, remove device. */
194	if (g_stripe_nvalid(sc) == 0)
195		g_stripe_destroy(sc, 1);
196}
197
198static int
199g_stripe_access(struct g_provider *pp, int dr, int dw, int de)
200{
201	struct g_consumer *cp1, *cp2;
202	struct g_stripe_softc *sc;
203	struct g_geom *gp;
204	int error;
205
206	gp = pp->geom;
207	sc = gp->softc;
208
209	if (sc == NULL) {
210		/*
211		 * It looks like geom is being withered.
212		 * In that case we allow only negative requests.
213		 */
214		KASSERT(dr <= 0 && dw <= 0 && de <= 0,
215		    ("Positive access request (device=%s).", pp->name));
216		if ((pp->acr + dr) == 0 && (pp->acw + dw) == 0 &&
217		    (pp->ace + de) == 0) {
218			G_STRIPE_DEBUG(0, "Device %s definitely destroyed.",
219			    gp->name);
220		}
221		return (0);
222	}
223
224	/* On first open, grab an extra "exclusive" bit */
225	if (pp->acr == 0 && pp->acw == 0 && pp->ace == 0)
226		de++;
227	/* ... and let go of it on last close */
228	if ((pp->acr + dr) == 0 && (pp->acw + dw) == 0 && (pp->ace + de) == 0)
229		de--;
230
231	error = ENXIO;
232	LIST_FOREACH(cp1, &gp->consumer, consumer) {
233		error = g_access(cp1, dr, dw, de);
234		if (error == 0)
235			continue;
236		/*
237		 * If we fail here, backout all previous changes.
238		 */
239		LIST_FOREACH(cp2, &gp->consumer, consumer) {
240			if (cp1 == cp2)
241				return (error);
242			g_access(cp2, -dr, -dw, -de);
243		}
244		/* NOTREACHED */
245	}
246
247	return (error);
248}
249
250static void
251g_stripe_copy(struct g_stripe_softc *sc, char *src, char *dst, off_t offset,
252    off_t length, int mode)
253{
254	u_int stripesize;
255	size_t len;
256
257	stripesize = sc->sc_stripesize;
258	len = (size_t)(stripesize - (offset & (stripesize - 1)));
259	do {
260		bcopy(src, dst, len);
261		if (mode) {
262			dst += len + stripesize * (sc->sc_ndisks - 1);
263			src += len;
264		} else {
265			dst += len;
266			src += len + stripesize * (sc->sc_ndisks - 1);
267		}
268		length -= len;
269		KASSERT(length >= 0,
270		    ("Length < 0 (stripesize=%zu, offset=%jd, length=%jd).",
271		    (size_t)stripesize, (intmax_t)offset, (intmax_t)length));
272		if (length > stripesize)
273			len = stripesize;
274		else
275			len = length;
276	} while (length > 0);
277}
278
279static void
280g_stripe_done(struct bio *bp)
281{
282	struct g_stripe_softc *sc;
283	struct bio *pbp;
284
285	pbp = bp->bio_parent;
286	sc = pbp->bio_to->geom->softc;
287	if (pbp->bio_error == 0)
288		pbp->bio_error = bp->bio_error;
289	pbp->bio_completed += bp->bio_completed;
290	if (bp->bio_cmd == BIO_READ && bp->bio_driver1 != NULL) {
291		g_stripe_copy(sc, bp->bio_data, bp->bio_driver1, bp->bio_offset,
292		    bp->bio_length, 1);
293		bp->bio_data = bp->bio_driver1;
294		bp->bio_driver1 = NULL;
295	}
296	g_destroy_bio(bp);
297	pbp->bio_inbed++;
298	if (pbp->bio_children == pbp->bio_inbed) {
299		if (pbp->bio_caller1 != NULL)
300			uma_zfree(g_stripe_zone, pbp->bio_caller1);
301		g_io_deliver(pbp, pbp->bio_error);
302	}
303}
304
305static int
306g_stripe_start_fast(struct bio *bp, u_int no, off_t offset, off_t length)
307{
308	TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue);
309	u_int nparts = 0, stripesize;
310	struct g_stripe_softc *sc;
311	char *addr, *data = NULL;
312	struct bio *cbp;
313	int error;
314
315	sc = bp->bio_to->geom->softc;
316
317	addr = bp->bio_data;
318	stripesize = sc->sc_stripesize;
319
320	cbp = g_clone_bio(bp);
321	if (cbp == NULL) {
322		error = ENOMEM;
323		goto failure;
324	}
325	TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
326	nparts++;
327	/*
328	 * Fill in the component buf structure.
329	 */
330	cbp->bio_done = g_stripe_done;
331	cbp->bio_offset = offset;
332	cbp->bio_data = addr;
333	cbp->bio_driver1 = NULL;
334	cbp->bio_length = length;
335	cbp->bio_driver2 = sc->sc_disks[no];
336
337	/* offset -= offset % stripesize; */
338	offset -= offset & (stripesize - 1);
339	addr += length;
340	length = bp->bio_length - length;
341	for (no++; length > 0; no++, length -= stripesize, addr += stripesize) {
342		if (no > sc->sc_ndisks - 1) {
343			no = 0;
344			offset += stripesize;
345		}
346		if (nparts >= sc->sc_ndisks) {
347			cbp = TAILQ_NEXT(cbp, bio_queue);
348			if (cbp == NULL)
349				cbp = TAILQ_FIRST(&queue);
350			nparts++;
351			/*
352			 * Update bio structure.
353			 */
354			/*
355			 * MIN() is in case when
356			 * (bp->bio_length % sc->sc_stripesize) != 0.
357			 */
358			cbp->bio_length += MIN(stripesize, length);
359			if (cbp->bio_driver1 == NULL) {
360				cbp->bio_driver1 = cbp->bio_data;
361				cbp->bio_data = NULL;
362				if (data == NULL) {
363					data = uma_zalloc(g_stripe_zone,
364					    M_NOWAIT);
365					if (data == NULL) {
366						error = ENOMEM;
367						goto failure;
368					}
369				}
370			}
371		} else {
372			cbp = g_clone_bio(bp);
373			if (cbp == NULL) {
374				error = ENOMEM;
375				goto failure;
376			}
377			TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
378			nparts++;
379			/*
380			 * Fill in the component buf structure.
381			 */
382			cbp->bio_done = g_stripe_done;
383			cbp->bio_offset = offset;
384			cbp->bio_data = addr;
385			cbp->bio_driver1 = NULL;
386			/*
387			 * MIN() is in case when
388			 * (bp->bio_length % sc->sc_stripesize) != 0.
389			 */
390			cbp->bio_length = MIN(stripesize, length);
391			cbp->bio_driver2 = sc->sc_disks[no];
392		}
393	}
394	if (data != NULL)
395		bp->bio_caller1 = data;
396	/*
397	 * Fire off all allocated requests!
398	 */
399	while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
400		struct g_consumer *cp;
401
402		TAILQ_REMOVE(&queue, cbp, bio_queue);
403		cp = cbp->bio_driver2;
404		cbp->bio_driver2 = NULL;
405		cbp->bio_to = cp->provider;
406		if (cbp->bio_driver1 != NULL) {
407			cbp->bio_data = data;
408			if (bp->bio_cmd == BIO_WRITE) {
409				g_stripe_copy(sc, cbp->bio_driver1, data,
410				    cbp->bio_offset, cbp->bio_length, 0);
411			}
412			data += cbp->bio_length;
413		}
414		G_STRIPE_LOGREQ(cbp, "Sending request.");
415		g_io_request(cbp, cp);
416	}
417	return (0);
418failure:
419	if (data != NULL)
420		uma_zfree(g_stripe_zone, data);
421	while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
422		TAILQ_REMOVE(&queue, cbp, bio_queue);
423		if (cbp->bio_driver1 != NULL) {
424			cbp->bio_data = cbp->bio_driver1;
425			cbp->bio_driver1 = NULL;
426		}
427		g_destroy_bio(cbp);
428	}
429	return (error);
430}
431
432static int
433g_stripe_start_economic(struct bio *bp, u_int no, off_t offset, off_t length)
434{
435	TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue);
436	struct g_stripe_softc *sc;
437	uint32_t stripesize;
438	struct bio *cbp;
439	char *addr;
440	int error;
441
442	sc = bp->bio_to->geom->softc;
443
444	addr = bp->bio_data;
445	stripesize = sc->sc_stripesize;
446
447	cbp = g_clone_bio(bp);
448	if (cbp == NULL) {
449		error = ENOMEM;
450		goto failure;
451	}
452	TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
453	/*
454	 * Fill in the component buf structure.
455	 */
456	cbp->bio_done = g_std_done;
457	cbp->bio_offset = offset;
458	cbp->bio_data = addr;
459	cbp->bio_length = length;
460	cbp->bio_driver2 = sc->sc_disks[no];
461
462	/* offset -= offset % stripesize; */
463	offset -= offset & (stripesize - 1);
464	addr += length;
465	length = bp->bio_length - length;
466	for (no++; length > 0; no++, length -= stripesize, addr += stripesize) {
467		if (no > sc->sc_ndisks - 1) {
468			no = 0;
469			offset += stripesize;
470		}
471		cbp = g_clone_bio(bp);
472		if (cbp == NULL) {
473			error = ENOMEM;
474			goto failure;
475		}
476		TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
477
478		/*
479		 * Fill in the component buf structure.
480		 */
481		cbp->bio_done = g_std_done;
482		cbp->bio_offset = offset;
483		cbp->bio_data = addr;
484		/*
485		 * MIN() is in case when
486		 * (bp->bio_length % sc->sc_stripesize) != 0.
487		 */
488		cbp->bio_length = MIN(stripesize, length);
489
490		cbp->bio_driver2 = sc->sc_disks[no];
491	}
492	/*
493	 * Fire off all allocated requests!
494	 */
495	while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
496		struct g_consumer *cp;
497
498		TAILQ_REMOVE(&queue, cbp, bio_queue);
499		cp = cbp->bio_driver2;
500		cbp->bio_driver2 = NULL;
501		cbp->bio_to = cp->provider;
502		G_STRIPE_LOGREQ(cbp, "Sending request.");
503		g_io_request(cbp, cp);
504	}
505	return (0);
506failure:
507	while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
508		TAILQ_REMOVE(&queue, cbp, bio_queue);
509		g_destroy_bio(cbp);
510	}
511	return (error);
512}
513
514static void
515g_stripe_start(struct bio *bp)
516{
517	off_t offset, start, length, nstripe;
518	struct g_stripe_softc *sc;
519	u_int no, stripesize;
520	int error, fast = 0;
521
522	sc = bp->bio_to->geom->softc;
523	/*
524	 * If sc == NULL, provider's error should be set and g_stripe_start()
525	 * should not be called at all.
526	 */
527	KASSERT(sc != NULL,
528	    ("Provider's error should be set (error=%d)(device=%s).",
529	    bp->bio_to->error, bp->bio_to->name));
530
531	G_STRIPE_LOGREQ(bp, "Request received.");
532
533	switch (bp->bio_cmd) {
534	case BIO_READ:
535	case BIO_WRITE:
536	case BIO_DELETE:
537		/*
538		 * Only those requests are supported.
539		 */
540		break;
541	case BIO_GETATTR:
542		/* To which provider it should be delivered? */
543	default:
544		g_io_deliver(bp, EOPNOTSUPP);
545		return;
546	}
547
548	stripesize = sc->sc_stripesize;
549
550	/*
551	 * Calculations are quite messy, but fast I hope.
552	 */
553
554	/* Stripe number. */
555	/* nstripe = bp->bio_offset / stripesize; */
556	nstripe = bp->bio_offset >> (off_t)sc->sc_stripebits;
557	/* Disk number. */
558	no = nstripe % sc->sc_ndisks;
559	/* Start position in stripe. */
560	/* start = bp->bio_offset % stripesize; */
561	start = bp->bio_offset & (stripesize - 1);
562	/* Start position in disk. */
563	/* offset = (nstripe / sc->sc_ndisks) * stripesize + start; */
564	offset = ((nstripe / sc->sc_ndisks) << sc->sc_stripebits) + start;
565	/* Length of data to operate. */
566	length = MIN(bp->bio_length, stripesize - start);
567
568	/*
569	 * Do use "fast" mode when:
570	 * 1. "Fast" mode is ON.
571	 * and
572	 * 2. Request size is less than or equal to MAX_IO_SIZE (128kB),
573	 *    which should always be true.
574	 * and
575	 * 3. Request size is bigger than stripesize * ndisks. If it isn't,
576	 *    there will be no need to send more than one I/O request to
577	 *    a provider, so there is nothing to optmize.
578	 */
579	if (g_stripe_fast && bp->bio_length <= MAX_IO_SIZE &&
580	    bp->bio_length >= stripesize * sc->sc_ndisks) {
581		fast = 1;
582	}
583	error = 0;
584	if (fast)
585		error = g_stripe_start_fast(bp, no, offset, length);
586	/*
587	 * Do use "economic" when:
588	 * 1. "Economic" mode is ON.
589	 * or
590	 * 2. "Fast" mode failed. It can only failed if there is no memory.
591	 */
592	if (!fast || error != 0)
593		error = g_stripe_start_economic(bp, no, offset, length);
594	if (error != 0) {
595		if (bp->bio_error == 0)
596			bp->bio_error = error;
597		g_io_deliver(bp, bp->bio_error);
598	}
599}
600
601static void
602g_stripe_check_and_run(struct g_stripe_softc *sc)
603{
604	off_t mediasize, ms;
605	u_int no, sectorsize = 0;
606
607	if (g_stripe_nvalid(sc) != sc->sc_ndisks)
608		return;
609
610	sc->sc_provider = g_new_providerf(sc->sc_geom, "%s", sc->sc_geom->name);
611	/*
612	 * Find the smallest disk.
613	 */
614	mediasize = sc->sc_disks[0]->provider->mediasize;
615	if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC)
616		mediasize -= sc->sc_disks[0]->provider->sectorsize;
617	mediasize -= mediasize % sc->sc_stripesize;
618	sectorsize = sc->sc_disks[0]->provider->sectorsize;
619	for (no = 1; no < sc->sc_ndisks; no++) {
620		ms = sc->sc_disks[no]->provider->mediasize;
621		if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC)
622			ms -= sc->sc_disks[no]->provider->sectorsize;
623		ms -= ms % sc->sc_stripesize;
624		if (ms < mediasize)
625			mediasize = ms;
626		sectorsize = lcm(sectorsize,
627		    sc->sc_disks[no]->provider->sectorsize);
628	}
629	sc->sc_provider->sectorsize = sectorsize;
630	sc->sc_provider->mediasize = mediasize * sc->sc_ndisks;
631	g_error_provider(sc->sc_provider, 0);
632
633	G_STRIPE_DEBUG(0, "Device %s activated.", sc->sc_geom->name);
634}
635
636static int
637g_stripe_read_metadata(struct g_consumer *cp, struct g_stripe_metadata *md)
638{
639	struct g_provider *pp;
640	u_char *buf;
641	int error;
642
643	g_topology_assert();
644
645	error = g_access(cp, 1, 0, 0);
646	if (error != 0)
647		return (error);
648	pp = cp->provider;
649	g_topology_unlock();
650	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
651	    &error);
652	g_topology_lock();
653	g_access(cp, -1, 0, 0);
654	if (buf == NULL)
655		return (error);
656
657	/* Decode metadata. */
658	stripe_metadata_decode(buf, md);
659	g_free(buf);
660
661	return (0);
662}
663
664/*
665 * Add disk to given device.
666 */
667static int
668g_stripe_add_disk(struct g_stripe_softc *sc, struct g_provider *pp, u_int no)
669{
670	struct g_consumer *cp, *fcp;
671	struct g_geom *gp;
672	int error;
673
674	/* Metadata corrupted? */
675	if (no >= sc->sc_ndisks)
676		return (EINVAL);
677
678	/* Check if disk is not already attached. */
679	if (sc->sc_disks[no] != NULL)
680		return (EEXIST);
681
682	gp = sc->sc_geom;
683	fcp = LIST_FIRST(&gp->consumer);
684
685	cp = g_new_consumer(gp);
686	error = g_attach(cp, pp);
687	if (error != 0) {
688		g_destroy_consumer(cp);
689		return (error);
690	}
691
692	if (fcp != NULL && (fcp->acr > 0 || fcp->acw > 0 || fcp->ace > 0)) {
693		error = g_access(cp, fcp->acr, fcp->acw, fcp->ace);
694		if (error != 0) {
695			g_detach(cp);
696			g_destroy_consumer(cp);
697			return (error);
698		}
699	}
700	if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC) {
701		struct g_stripe_metadata md;
702
703		/* Reread metadata. */
704		error = g_stripe_read_metadata(cp, &md);
705		if (error != 0)
706			goto fail;
707
708		if (strcmp(md.md_magic, G_STRIPE_MAGIC) != 0 ||
709		    strcmp(md.md_name, sc->sc_name) != 0 ||
710		    md.md_id != sc->sc_id) {
711			G_STRIPE_DEBUG(0, "Metadata on %s changed.", pp->name);
712			goto fail;
713		}
714	}
715
716	cp->private = sc;
717	cp->index = no;
718	sc->sc_disks[no] = cp;
719
720	G_STRIPE_DEBUG(0, "Disk %s attached to %s.", pp->name, gp->name);
721
722	g_stripe_check_and_run(sc);
723
724	return (0);
725fail:
726	if (fcp != NULL && (fcp->acr > 0 || fcp->acw > 0 || fcp->ace > 0))
727		g_access(cp, -fcp->acr, -fcp->acw, -fcp->ace);
728	g_detach(cp);
729	g_destroy_consumer(cp);
730	return (error);
731}
732
733static struct g_geom *
734g_stripe_create(struct g_class *mp, const struct g_stripe_metadata *md,
735    u_int type)
736{
737	struct g_stripe_softc *sc;
738	struct g_geom *gp;
739	u_int no;
740
741	G_STRIPE_DEBUG(1, "Creating device %s.stripe (id=%u).", md->md_name,
742	    md->md_id);
743
744	/* Two disks is minimum. */
745	if (md->md_all <= 1) {
746		G_STRIPE_DEBUG(0, "Too few disks defined for %s.stripe.",
747		    md->md_name);
748		return (NULL);
749	}
750#if 0
751	/* Stripe size have to be grater than or equal to sector size. */
752	if (md->md_stripesize < sectorsize) {
753		G_STRIPE_DEBUG(0, "Invalid stripe size for %s.stripe.",
754		    md->md_name);
755		return (NULL);
756	}
757#endif
758	/* Stripe size have to be power of 2. */
759	if (!powerof2(md->md_stripesize)) {
760		G_STRIPE_DEBUG(0, "Invalid stripe size for %s.stripe.",
761		    md->md_name);
762		return (NULL);
763	}
764
765	/* Check for duplicate unit */
766	LIST_FOREACH(gp, &mp->geom, geom) {
767		sc = gp->softc;
768		if (sc != NULL && strcmp(sc->sc_name, md->md_name) == 0) {
769			G_STRIPE_DEBUG(0, "Device %s already configured.",
770			    gp->name);
771			return (NULL);
772		}
773	}
774	gp = g_new_geomf(mp, "%s.stripe", md->md_name);
775	gp->softc = NULL;	/* for a moment */
776
777	sc = malloc(sizeof(*sc), M_STRIPE, M_NOWAIT | M_ZERO);
778	if (sc == NULL) {
779		G_STRIPE_DEBUG(0, "Can't allocate memory for device %s.",
780		    gp->name);
781		g_destroy_geom(gp);
782		return (NULL);
783	}
784
785	gp->start = g_stripe_start;
786	gp->spoiled = g_stripe_orphan;
787	gp->orphan = g_stripe_orphan;
788	gp->access = g_stripe_access;
789	gp->dumpconf = g_stripe_dumpconf;
790
791	strlcpy(sc->sc_name, md->md_name, sizeof(sc->sc_name));
792	sc->sc_id = md->md_id;
793	sc->sc_stripesize = md->md_stripesize;
794	sc->sc_stripebits = BITCOUNT(sc->sc_stripesize - 1);
795	sc->sc_ndisks = md->md_all;
796	sc->sc_disks = malloc(sizeof(struct g_consumer *) * sc->sc_ndisks,
797	    M_STRIPE, M_WAITOK | M_ZERO);
798	for (no = 0; no < sc->sc_ndisks; no++)
799		sc->sc_disks[no] = NULL;
800	sc->sc_type = type;
801
802	gp->softc = sc;
803	sc->sc_geom = gp;
804	sc->sc_provider = NULL;
805
806	G_STRIPE_DEBUG(0, "Device %s created (id=%u).", gp->name, sc->sc_id);
807
808	return (gp);
809}
810
811static int
812g_stripe_destroy(struct g_stripe_softc *sc, boolean_t force)
813{
814	struct g_provider *pp;
815	struct g_geom *gp;
816	u_int no;
817
818	g_topology_assert();
819
820	if (sc == NULL)
821		return (ENXIO);
822
823	pp = sc->sc_provider;
824	if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
825		if (force) {
826			G_STRIPE_DEBUG(0, "Device %s is still open, so it "
827			    "can't be definitely removed.", pp->name);
828		} else {
829			G_STRIPE_DEBUG(1,
830			    "Device %s is still open (r%dw%de%d).", pp->name,
831			    pp->acr, pp->acw, pp->ace);
832			return (EBUSY);
833		}
834	}
835
836	for (no = 0; no < sc->sc_ndisks; no++) {
837		if (sc->sc_disks[no] != NULL)
838			g_stripe_remove_disk(sc->sc_disks[no]);
839	}
840
841	gp = sc->sc_geom;
842	gp->softc = NULL;
843	KASSERT(sc->sc_provider == NULL, ("Provider still exists? (device=%s)",
844	    gp->name));
845	free(sc->sc_disks, M_STRIPE);
846	free(sc, M_STRIPE);
847
848	pp = LIST_FIRST(&gp->provider);
849	if (pp == NULL || (pp->acr == 0 && pp->acw == 0 && pp->ace == 0))
850		G_STRIPE_DEBUG(0, "Device %s destroyed.", gp->name);
851
852	g_wither_geom(gp, ENXIO);
853
854	return (0);
855}
856
857static int
858g_stripe_destroy_geom(struct gctl_req *req __unused,
859    struct g_class *mp __unused, struct g_geom *gp)
860{
861	struct g_stripe_softc *sc;
862
863	sc = gp->softc;
864	return (g_stripe_destroy(sc, 0));
865}
866
867static struct g_geom *
868g_stripe_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
869{
870	struct g_stripe_metadata md;
871	struct g_stripe_softc *sc;
872	struct g_consumer *cp;
873	struct g_geom *gp;
874	int error;
875
876	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
877	g_topology_assert();
878
879	G_STRIPE_DEBUG(3, "Tasting %s.", pp->name);
880
881	gp = g_new_geomf(mp, "stripe:taste");
882	gp->start = g_stripe_start;
883	gp->access = g_stripe_access;
884	gp->orphan = g_stripe_orphan;
885	cp = g_new_consumer(gp);
886	g_attach(cp, pp);
887
888	error = g_stripe_read_metadata(cp, &md);
889	g_wither_geom(gp, ENXIO);
890	if (error != 0)
891		return (NULL);
892	gp = NULL;
893
894	if (strcmp(md.md_magic, G_STRIPE_MAGIC) != 0)
895		return (NULL);
896	if (md.md_version > G_STRIPE_VERSION) {
897		printf("geom_stripe.ko module is too old to handle %s.\n",
898		    pp->name);
899		return (NULL);
900	}
901
902	/*
903	 * Let's check if device already exists.
904	 */
905	sc = NULL;
906	LIST_FOREACH(gp, &mp->geom, geom) {
907		sc = gp->softc;
908		if (sc == NULL)
909			continue;
910		if (sc->sc_type != G_STRIPE_TYPE_AUTOMATIC)
911			continue;
912		if (strcmp(md.md_name, sc->sc_name) != 0)
913			continue;
914		if (md.md_id != sc->sc_id)
915			continue;
916		break;
917	}
918	if (gp != NULL) {
919		G_STRIPE_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
920		error = g_stripe_add_disk(sc, pp, md.md_no);
921		if (error != 0) {
922			G_STRIPE_DEBUG(0,
923			    "Cannot add disk %s to %s (error=%d).", pp->name,
924			    gp->name, error);
925			return (NULL);
926		}
927	} else {
928		gp = g_stripe_create(mp, &md, G_STRIPE_TYPE_AUTOMATIC);
929		if (gp == NULL) {
930			G_STRIPE_DEBUG(0, "Cannot create device %s.stripe.",
931			    md.md_name);
932			return (NULL);
933		}
934		sc = gp->softc;
935		G_STRIPE_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
936		error = g_stripe_add_disk(sc, pp, md.md_no);
937		if (error != 0) {
938			G_STRIPE_DEBUG(0,
939			    "Cannot add disk %s to %s (error=%d).", pp->name,
940			    gp->name, error);
941			g_stripe_destroy(sc, 1);
942			return (NULL);
943		}
944	}
945
946	return (gp);
947}
948
949static void
950g_stripe_ctl_create(struct gctl_req *req, struct g_class *mp)
951{
952	u_int attached, no;
953	struct g_stripe_metadata md;
954	struct g_provider *pp;
955	struct g_stripe_softc *sc;
956	struct g_geom *gp;
957	struct sbuf *sb;
958	intmax_t *stripesize;
959	const char *name;
960	char param[16];
961	int *nargs;
962
963	g_topology_assert();
964	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
965	if (nargs == NULL) {
966		gctl_error(req, "No '%s' argument.", "nargs");
967		return;
968	}
969	if (*nargs <= 2) {
970		gctl_error(req, "Too few arguments.");
971		return;
972	}
973
974	strlcpy(md.md_magic, G_STRIPE_MAGIC, sizeof(md.md_magic));
975	md.md_version = G_STRIPE_VERSION;
976	name = gctl_get_asciiparam(req, "arg0");
977	if (name == NULL) {
978		gctl_error(req, "No 'arg%u' argument.", 0);
979		return;
980	}
981	strlcpy(md.md_name, name, sizeof(md.md_name));
982	md.md_id = arc4random();
983	md.md_no = 0;
984	md.md_all = *nargs - 1;
985	stripesize = gctl_get_paraml(req, "stripesize", sizeof(*stripesize));
986	if (stripesize == NULL) {
987		gctl_error(req, "No '%s' argument.", "stripesize");
988		return;
989	}
990	md.md_stripesize = *stripesize;
991
992	/* Check all providers are valid */
993	for (no = 1; no < *nargs; no++) {
994		snprintf(param, sizeof(param), "arg%u", no);
995		name = gctl_get_asciiparam(req, param);
996		if (name == NULL) {
997			gctl_error(req, "No 'arg%u' argument.", no);
998			return;
999		}
1000		if (strncmp(name, "/dev/", strlen("/dev/")) == 0)
1001			name += strlen("/dev/");
1002		pp = g_provider_by_name(name);
1003		if (pp == NULL) {
1004			G_STRIPE_DEBUG(1, "Disk %s is invalid.", name);
1005			gctl_error(req, "Disk %s is invalid.", name);
1006			return;
1007		}
1008	}
1009
1010	gp = g_stripe_create(mp, &md, G_STRIPE_TYPE_MANUAL);
1011	if (gp == NULL) {
1012		gctl_error(req, "Can't configure %s.stripe.", md.md_name);
1013		return;
1014	}
1015
1016	sc = gp->softc;
1017	sb = sbuf_new(NULL, NULL, 0, SBUF_AUTOEXTEND);
1018	sbuf_printf(sb, "Can't attach disk(s) to %s:", gp->name);
1019	for (attached = 0, no = 1; no < *nargs; no++) {
1020		snprintf(param, sizeof(param), "arg%u", no);
1021		name = gctl_get_asciiparam(req, param);
1022		if (strncmp(name, "/dev/", strlen("/dev/")) == 0)
1023			name += strlen("/dev/");
1024		pp = g_provider_by_name(name);
1025		KASSERT(pp != NULL, ("Provider %s disappear?!", name));
1026		if (g_stripe_add_disk(sc, pp, no - 1) != 0) {
1027			G_STRIPE_DEBUG(1, "Disk %u (%s) not attached to %s.",
1028			    no, pp->name, gp->name);
1029			sbuf_printf(sb, " %s", pp->name);
1030			continue;
1031		}
1032		attached++;
1033	}
1034	sbuf_finish(sb);
1035	if (md.md_all != attached) {
1036		g_stripe_destroy(gp->softc, 1);
1037		gctl_error(req, "%s", sbuf_data(sb));
1038	}
1039	sbuf_delete(sb);
1040}
1041
1042static struct g_stripe_softc *
1043g_stripe_find_device(struct g_class *mp, const char *name)
1044{
1045	struct g_stripe_softc *sc;
1046	struct g_geom *gp;
1047
1048	LIST_FOREACH(gp, &mp->geom, geom) {
1049		sc = gp->softc;
1050		if (sc == NULL)
1051			continue;
1052		if (strcmp(gp->name, name) == 0 ||
1053		    strcmp(sc->sc_name, name) == 0) {
1054			return (sc);
1055		}
1056	}
1057	return (NULL);
1058}
1059
1060static void
1061g_stripe_ctl_destroy(struct gctl_req *req, struct g_class *mp)
1062{
1063	struct g_stripe_softc *sc;
1064	int *force, *nargs, error;
1065	const char *name;
1066	char param[16];
1067	u_int i;
1068
1069	g_topology_assert();
1070
1071	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
1072	if (nargs == NULL) {
1073		gctl_error(req, "No '%s' argument.", "nargs");
1074		return;
1075	}
1076	if (*nargs <= 0) {
1077		gctl_error(req, "Missing device(s).");
1078		return;
1079	}
1080	force = gctl_get_paraml(req, "force", sizeof(*force));
1081	if (force == NULL) {
1082		gctl_error(req, "No '%s' argument.", "force");
1083		return;
1084	}
1085
1086	for (i = 0; i < (u_int)*nargs; i++) {
1087		snprintf(param, sizeof(param), "arg%u", i);
1088		name = gctl_get_asciiparam(req, param);
1089		if (name == NULL) {
1090			gctl_error(req, "No 'arg%u' argument.", i);
1091			return;
1092		}
1093		sc = g_stripe_find_device(mp, name);
1094		if (sc == NULL) {
1095			gctl_error(req, "No such device: %s.", name);
1096			return;
1097		}
1098		error = g_stripe_destroy(sc, *force);
1099		if (error != 0) {
1100			gctl_error(req, "Cannot destroy device %s (error=%d).",
1101			    sc->sc_geom->name, error);
1102			return;
1103		}
1104	}
1105}
1106
1107static void
1108g_stripe_config(struct gctl_req *req, struct g_class *mp, const char *verb)
1109{
1110	uint32_t *version;
1111
1112	g_topology_assert();
1113
1114	version = gctl_get_paraml(req, "version", sizeof(*version));
1115	if (version == NULL) {
1116		gctl_error(req, "No '%s' argument.", "version");
1117		return;
1118	}
1119	if (*version != G_STRIPE_VERSION) {
1120		gctl_error(req, "Userland and kernel parts are out of sync.");
1121		return;
1122	}
1123
1124	if (strcmp(verb, "create") == 0) {
1125		g_stripe_ctl_create(req, mp);
1126		return;
1127	} else if (strcmp(verb, "destroy") == 0 ||
1128	    strcmp(verb, "stop") == 0) {
1129		g_stripe_ctl_destroy(req, mp);
1130		return;
1131	}
1132
1133	gctl_error(req, "Unknown verb.");
1134}
1135
1136static void
1137g_stripe_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
1138    struct g_consumer *cp, struct g_provider *pp)
1139{
1140	struct g_stripe_softc *sc;
1141
1142	sc = gp->softc;
1143	if (sc == NULL || pp == NULL)
1144		return;
1145	sbuf_printf(sb, "%s<id>%u</id>\n", indent, (u_int)sc->sc_id);
1146	sbuf_printf(sb, "%s<stripesize>%u</stripesize>\n", indent,
1147	    (u_int)sc->sc_stripesize);
1148	switch (sc->sc_type) {
1149	case G_STRIPE_TYPE_AUTOMATIC:
1150		sbuf_printf(sb, "%s<type>%s</type>\n", indent, "automatic");
1151		break;
1152	case G_STRIPE_TYPE_MANUAL:
1153		sbuf_printf(sb, "%s<type>%s</type>\n", indent, "manual");
1154		break;
1155	default:
1156		sbuf_printf(sb, "%s<type>%s</type>\n", indent, "unknown");
1157		break;
1158	}
1159	sbuf_printf(sb, "%s<providers>", indent);
1160	LIST_FOREACH(cp, &gp->consumer, consumer) {
1161		if (cp->provider == NULL)
1162			continue;
1163		sbuf_printf(sb, "%s", cp->provider->name);
1164		if (LIST_NEXT(cp, consumer) != NULL)
1165			sbuf_printf(sb, " ");
1166	}
1167	sbuf_printf(sb, "</providers>\n");
1168	sbuf_printf(sb, "%s<status>total=%u, online=%u</status>\n", indent,
1169	    sc->sc_ndisks, g_stripe_nvalid(sc));
1170	if (pp->error == 0)
1171		sbuf_printf(sb, "%s<state>UP</state>\n", indent);
1172	else
1173		sbuf_printf(sb, "%s<state>DOWN</state>\n", indent);
1174}
1175
1176DECLARE_GEOM_CLASS(g_stripe_class, g_stripe);
1177