geom_vinum.c revision 131107
1/*
2 *  Copyright (c) 2004 Lukas Ertl
3 *  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD: head/sys/geom/vinum/geom_vinum.c 131107 2004-06-25 18:04:33Z le $");
30
31#include <sys/param.h>
32#include <sys/bio.h>
33#include <sys/kernel.h>
34#include <sys/lock.h>
35#include <sys/malloc.h>
36#include <sys/module.h>
37#include <sys/mutex.h>
38#include <sys/systm.h>
39
40#include <geom/geom.h>
41#include <geom/vinum/geom_vinum_var.h>
42#include <geom/vinum/geom_vinum.h>
43#include <geom/vinum/geom_vinum_share.h>
44
45#if 0
46SYSCTL_DECL(_kern_geom);
47SYSCTL_NODE(_kern_geom, OID_AUTO, vinum, CTLFLAG_RW, 0, "GEOM_VINUM stuff");
48SYSCTL_UINT(_kern_geom_vinum, OID_AUTO, debug, CTLFLAG_RW, &gv_debug, 0,
49    "Debug level");
50#endif
51
52int	gv_create(struct g_geom *, struct gctl_req *);
53void	config_new_drive(struct gv_drive *);
54
55static void
56gv_orphan(struct g_consumer *cp)
57{
58	struct g_geom *gp;
59	struct gv_softc *sc;
60	int error;
61
62	g_topology_assert();
63
64	KASSERT(cp != NULL, ("gv_orphan: null cp"));
65	gp = cp->geom;
66	KASSERT(gp != NULL, ("gv_orphan: null gp"));
67	sc = gp->softc;
68
69	g_trace(G_T_TOPOLOGY, "gv_orphan(%s)", gp->name);
70
71	if (cp->acr != 0 || cp->acw != 0 || cp->ace != 0)
72		g_access(cp, -cp->acr, -cp->acw, -cp->ace);
73	error = cp->provider->error;
74	if (error == 0)
75		error = ENXIO;
76	g_detach(cp);
77	g_destroy_consumer(cp);
78	if (!LIST_EMPTY(&gp->consumer))
79		return;
80	g_free(sc);
81	g_wither_geom(gp, error);
82}
83
84static void
85gv_start(struct bio *bp)
86{
87	struct bio *bp2;
88	struct g_geom *gp;
89
90	gp = bp->bio_to->geom;
91	switch(bp->bio_cmd) {
92	case BIO_READ:
93	case BIO_WRITE:
94	case BIO_DELETE:
95		bp2 = g_clone_bio(bp);
96		bp2->bio_done = g_std_done;
97		g_io_request(bp2, LIST_FIRST(&gp->consumer));
98		return;
99	default:
100		g_io_deliver(bp, EOPNOTSUPP);
101		return;
102	}
103}
104
105static int
106gv_access(struct g_provider *pp, int dr, int dw, int de)
107{
108	struct g_geom *gp;
109	struct g_consumer *cp;
110	int error;
111
112	gp = pp->geom;
113	error = ENXIO;
114	cp = LIST_FIRST(&gp->consumer);
115	error = g_access(cp, dr, dw, de);
116	return (error);
117}
118
119static struct g_geom *
120gv_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
121{
122	struct g_geom *gp;
123	struct g_consumer *cp;
124	struct gv_softc *sc;
125	struct gv_hdr *vhdr;
126	int error, first;
127	char *buf;
128
129	vhdr = NULL;
130	buf = NULL;
131	first = 0;
132
133	g_trace(G_T_TOPOLOGY, "gv_taste(%s, %s)", mp->name, pp->name);
134	g_topology_assert();
135
136	if (pp->sectorsize == 0)
137		return (NULL);
138
139	/* Check if we already have a VINUM geom, or create a new one. */
140	if (LIST_EMPTY(&mp->geom)) {
141		gp = g_new_geomf(mp, "VINUM");
142		gp->spoiled = gv_orphan;
143		gp->orphan = gv_orphan;
144		gp->access = gv_access;
145		gp->start = gv_start;
146		gp->softc = g_malloc(sizeof(struct gv_softc),
147		    M_WAITOK | M_ZERO);
148		sc = gp->softc;
149		sc->geom = gp;
150		LIST_INIT(&sc->drives);
151		LIST_INIT(&sc->subdisks);
152		LIST_INIT(&sc->plexes);
153		LIST_INIT(&sc->volumes);
154		first++;
155	} else {
156		gp = LIST_FIRST(&mp->geom);
157		sc = gp->softc;
158	}
159
160
161	/* We need a temporary consumer to read the config from. */
162	cp = g_new_consumer(gp);
163	error = g_attach(cp, pp);
164	if (error) {
165		g_destroy_consumer(cp);
166		if (first) {
167			g_free(sc);
168			g_destroy_geom(gp);
169		}
170		return (NULL);
171	}
172	error = g_access(cp, 1, 0, 0);
173	if (error) {
174		g_detach(cp);
175		g_destroy_consumer(cp);
176		if (first) {
177			g_free(gp->softc);
178			g_destroy_geom(gp);
179		}
180		return (NULL);
181	}
182
183	g_topology_unlock();
184
185	/* Check if the provided slice is a valid vinum drive. */
186	vhdr = g_read_data(cp, GV_HDR_OFFSET, GV_HDR_LEN, &error);
187	if (vhdr == NULL || error != 0) {
188		g_topology_lock();
189		g_access(cp, -1, 0, 0);
190		g_detach(cp);
191		g_destroy_consumer(cp);
192		if (first) {
193			g_free(sc);
194			g_destroy_geom(gp);
195		}
196		return (NULL);
197	}
198
199	/* This provider has no vinum magic on board. */
200	if (vhdr->magic != GV_MAGIC) {
201		/* Release the temporary consumer, we don't need it anymore. */
202		g_topology_lock();
203		g_access(cp, -1, 0, 0);
204		g_detach(cp);
205		g_destroy_consumer(cp);
206
207		g_free(vhdr);
208
209		/*
210		 * If there is no other VINUM geom yet just take this one; the
211		 * configuration is still empty, but it can be filled by other
212		 * valid vinum drives later.
213		 */
214		if (first)
215			return (gp);
216		else
217			return (NULL);
218
219	/*
220	 * We have found a valid vinum drive, now read the on-disk
221	 * configuration.
222	 */
223	} else {
224		g_free(vhdr);
225
226		buf = g_read_data(cp, GV_CFG_OFFSET, GV_CFG_LEN,
227		    &error);
228		if (buf == NULL || error != 0) {
229			g_topology_lock();
230			g_access(cp, -1, 0, 0);
231			g_detach(cp);
232			g_destroy_consumer(cp);
233			if (first) {
234				g_free(sc);
235				g_destroy_geom(gp);
236			}
237			return (NULL);
238		}
239
240		/* Release the temporary consumer, we don't need it anymore. */
241		g_topology_lock();
242		g_access(cp, -1, 0, 0);
243		g_detach(cp);
244		g_destroy_consumer(cp);
245
246		/* We are the first VINUM geom. */
247		if (first) {
248			gv_parse_config(sc, buf, 0);
249			g_free(buf);
250			return (gp);
251
252		/* Just merge the configs. */
253		} else {
254			gv_parse_config(sc, buf, 1);
255			g_free(buf);
256			return (NULL);
257		}
258	}
259}
260
261/* XXX this really belongs somewhere else */
262void
263config_new_drive(struct gv_drive *d)
264{
265	struct gv_hdr *vhdr;
266	struct gv_freelist *fl;
267
268	KASSERT(d != NULL, ("config_new_drive: NULL d"));
269
270	vhdr = g_malloc(sizeof(*vhdr), M_WAITOK | M_ZERO);
271	vhdr->magic = GV_MAGIC;
272	vhdr->config_length = GV_CFG_LEN;
273
274	bcopy(hostname, vhdr->label.sysname, GV_HOSTNAME_LEN);
275	strncpy(vhdr->label.name, d->name, GV_MAXDRIVENAME);
276	microtime(&vhdr->label.date_of_birth);
277
278	d->hdr = vhdr;
279
280	LIST_INIT(&d->subdisks);
281	LIST_INIT(&d->freelist);
282
283	fl = g_malloc(sizeof(struct gv_freelist), M_WAITOK | M_ZERO);
284	fl->offset = GV_DATA_START;
285	fl->size = d->avail;
286	LIST_INSERT_HEAD(&d->freelist, fl, freelist);
287	d->freelist_entries = 1;
288
289}
290
291/* Handle userland requests for creating new objects. */
292int
293gv_create(struct g_geom *gp, struct gctl_req *req)
294{
295	struct gv_softc *sc;
296	struct gv_drive *d, *d2;
297	struct gv_plex *p, *p2;
298	struct gv_sd *s, *s2;
299	struct gv_volume *v, *v2;
300	struct g_consumer *cp;
301	struct g_provider *pp;
302	int error, i, *drives, *plexes, *subdisks, *volumes;
303	char buf[20], errstr[ERRBUFSIZ];
304
305	g_topology_assert();
306
307	sc = gp->softc;
308
309	/* Find out how many of each object have been passed in. */
310	volumes = gctl_get_paraml(req, "volumes", sizeof(*volumes));
311	plexes = gctl_get_paraml(req, "plexes", sizeof(*plexes));
312	subdisks = gctl_get_paraml(req, "subdisks", sizeof(*subdisks));
313	drives = gctl_get_paraml(req, "drives", sizeof(*drives));
314
315	/* First, handle drive definitions ... */
316	for (i = 0; i < *drives; i++) {
317		snprintf(buf, sizeof(buf), "drive%d", i);
318		d2 = gctl_get_paraml(req, buf, sizeof(*d2));
319		d = g_malloc(sizeof(*d), M_WAITOK | M_ZERO);
320		bcopy(d2, d, sizeof(*d));
321
322		/*
323		 * Make sure that the provider specified in the drive
324		 * specification is an active GEOM provider.
325		 */
326		pp = g_provider_by_name(d->device);
327		if (pp == NULL) {
328			gctl_error(req, "%s: drive not found", d->device);
329			g_free(d);
330			return (-1);
331		}
332		d->size = pp->mediasize - GV_DATA_START;
333		d->avail = d->size;
334
335		config_new_drive(d);
336
337		LIST_INSERT_HEAD(&sc->drives, d, drive);
338	}
339
340	/* ... then volume definitions ... */
341	for (i = 0; i < *volumes; i++) {
342		error = 0;
343		snprintf(buf, sizeof(buf), "volume%d", i);
344		v2 = gctl_get_paraml(req, buf, sizeof(*v2));
345
346		v = gv_find_vol(sc, v2->name);
347		if (v != NULL) {
348			gctl_error(req, "volume '%s' is already known",
349			    v->name);
350			return (-1);
351		}
352
353		v = g_malloc(sizeof(*v), M_WAITOK | M_ZERO);
354		bcopy(v2, v, sizeof(*v));
355
356		v->vinumconf = sc;
357		LIST_INIT(&v->plexes);
358		LIST_INSERT_HEAD(&sc->volumes, v, volume);
359	}
360
361	/* ... then plex definitions ... */
362	for (i = 0; i < *plexes; i++) {
363		error = 0;
364		snprintf(buf, sizeof(buf), "plex%d", i);
365		p2 = gctl_get_paraml(req, buf, sizeof(*p2));
366
367		p = gv_find_plex(sc, p2->name);
368		if (p != NULL) {
369			gctl_error(req, "plex '%s' is already known", p->name);
370			return (-1);
371		}
372
373		p = g_malloc(sizeof(*p), M_WAITOK | M_ZERO);
374		bcopy(p2, p, sizeof(*p));
375
376		/* Find the volume this plex should be attached to. */
377		v = gv_find_vol(sc, p->volume);
378		if (v != NULL) {
379			if (v->plexcount)
380				p->flags |= GV_PLEX_ADDED;
381			p->vol_sc = v;
382			v->plexcount++;
383			LIST_INSERT_HEAD(&v->plexes, p, in_volume);
384		}
385
386		p->vinumconf = sc;
387		p->flags |= GV_PLEX_NEWBORN;
388		LIST_INIT(&p->subdisks);
389		LIST_INSERT_HEAD(&sc->plexes, p, plex);
390	}
391
392	/* ... and finally, subdisk definitions. */
393	for (i = 0; i < *subdisks; i++) {
394		error = 0;
395		snprintf(buf, sizeof(buf), "sd%d", i);
396		s2 = gctl_get_paraml(req, buf, sizeof(*s2));
397
398		s = gv_find_sd(sc, s2->name);
399		if (s != NULL) {
400			gctl_error(req, "subdisk '%s' is already known",
401			    s->name);
402			return (-1);
403		}
404
405		s = g_malloc(sizeof(*s), M_WAITOK | M_ZERO);
406		bcopy(s2, s, sizeof(*s));
407
408		/* Find the drive where this subdisk should be put on. */
409		d = gv_find_drive(sc, s->drive);
410
411		/* drive not found - XXX */
412		if (d == NULL) {
413			printf("FOO: drive '%s' not found\n", s->drive);
414			g_free(s);
415			continue;
416		}
417
418		/* Find the plex where this subdisk belongs to. */
419		p = gv_find_plex(sc, s->plex);
420
421		/* plex not found - XXX */
422		if (p == NULL) {
423			printf("FOO: plex '%s' not found\n", s->plex);
424			g_free(s);
425			continue;
426		}
427
428		/*
429		 * First we give the subdisk to the drive, to handle autosized
430		 * values ...
431		 */
432		error = gv_sd_to_drive(sc, d, s, errstr, sizeof(errstr));
433		if (error) {
434			gctl_error(req, errstr);
435			g_free(s);
436			continue;
437		}
438
439		/*
440		 * Then, we give the subdisk to the plex; we check if the
441		 * given values are correct and maybe adjust them.
442		 */
443		error = gv_sd_to_plex(p, s, 1);
444		if (error) {
445			printf("FOO: couldn't give sd '%s' to plex '%s'\n",
446			    s->name, p->name);
447		}
448		s->flags |= GV_SD_NEWBORN;
449
450		s->vinumconf = sc;
451		LIST_INSERT_HEAD(&sc->subdisks, s, sd);
452	}
453
454	LIST_FOREACH(s, &sc->subdisks, sd)
455		gv_update_sd_state(s);
456	LIST_FOREACH(p, &sc->plexes, plex)
457		gv_update_plex_config(p);
458	LIST_FOREACH(v, &sc->volumes, volume)
459		gv_update_vol_state(v);
460
461	/*
462	 * Write out the configuration to each drive.  If the drive doesn't
463	 * have a valid geom_slice geom yet, attach it temporarily to our VINUM
464	 * geom.
465	 */
466	LIST_FOREACH(d, &sc->drives, drive) {
467		if (d->geom == NULL) {
468			/*
469			 * XXX if the provider disapears before we get a chance
470			 * to write the config out to the drive, should this
471			 * be handled any differently?
472			 */
473			pp = g_provider_by_name(d->device);
474			if (pp == NULL) {
475				printf("geom_vinum: %s: drive disapeared?\n",
476				    d->device);
477				continue;
478			}
479			cp = g_new_consumer(gp);
480			g_attach(cp, pp);
481			gv_save_config(cp, d, sc);
482			g_detach(cp);
483			g_destroy_consumer(cp);
484		} else
485			gv_save_config(NULL, d, sc);
486	}
487
488	return (0);
489}
490
491static void
492gv_config(struct gctl_req *req, struct g_class *mp, char const *verb)
493{
494	struct g_geom *gp;
495	struct gv_softc *sc;
496	struct sbuf *sb;
497	char *comment;
498
499	g_topology_assert();
500
501	gp = LIST_FIRST(&mp->geom);
502	sc = gp->softc;
503
504	if (!strcmp(verb, "list")) {
505		gv_list(gp, req);
506
507	/* Save our configuration back to disk. */
508	} else if (!strcmp(verb, "saveconfig")) {
509
510		gv_save_config_all(sc);
511
512	/* Return configuration in string form. */
513	} else if (!strcmp(verb, "getconfig")) {
514		comment = gctl_get_param(req, "comment", NULL);
515
516		sb = sbuf_new(NULL, NULL, GV_CFG_LEN, SBUF_FIXEDLEN);
517		gv_format_config(sc, sb, 0, comment);
518		sbuf_finish(sb);
519		gctl_set_param(req, "config", sbuf_data(sb), sbuf_len(sb) + 1);
520		sbuf_delete(sb);
521
522	} else if (!strcmp(verb, "create")) {
523		gv_create(gp, req);
524
525	} else if (!strcmp(verb, "remove")) {
526		gv_remove(gp, req);
527
528	} else if (!strcmp(verb, "start")) {
529		gv_start_obj(gp, req);
530
531	} else
532		gctl_error(req, "Unknown verb parameter");
533}
534
535static int
536gv_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp)
537{
538	struct g_geom *gp2;
539	struct gv_softc *sc;
540	struct gv_drive *d, *d2;
541	struct gv_plex *p, *p2;
542	struct gv_sd *s, *s2;
543	struct gv_volume *v, *v2;
544	struct gv_freelist *fl, *fl2;
545
546	g_trace(G_T_TOPOLOGY, "gv_destroy_geom: %s", gp->name);
547	g_topology_assert();
548
549	KASSERT(gp != NULL, ("gv_destroy_geom: null gp"));
550	KASSERT(gp->softc != NULL, ("gv_destroy_geom: null sc"));
551
552	sc = gp->softc;
553
554	/*
555	 * Check if any of our drives is still open; if so, refuse destruction.
556	 */
557	LIST_FOREACH(d, &sc->drives, drive) {
558		gp2 = d->geom;
559		if (gv_is_open(gp2))
560			return (EBUSY);
561	}
562
563	/* Clean up and deallocate what we allocated. */
564	LIST_FOREACH_SAFE(d, &sc->drives, drive, d2) {
565		LIST_REMOVE(d, drive);
566		g_free(d->hdr);
567		d->hdr = NULL;
568		LIST_FOREACH_SAFE(fl, &d->freelist, freelist, fl2) {
569			d->freelist_entries--;
570			LIST_REMOVE(fl, freelist);
571			g_free(fl);
572			fl = NULL;
573		}
574		d->geom->softc = NULL;
575		g_free(d);
576	}
577
578	LIST_FOREACH_SAFE(s, &sc->subdisks, sd, s2) {
579		LIST_REMOVE(s, sd);
580		s->drive_sc = NULL;
581		s->plex_sc = NULL;
582		s->provider = NULL;
583		s->consumer = NULL;
584		g_free(s);
585	}
586
587	LIST_FOREACH_SAFE(p, &sc->plexes, plex, p2) {
588		LIST_REMOVE(p, plex);
589		gv_kill_thread(p);
590		p->vol_sc = NULL;
591		p->geom->softc = NULL;
592		p->provider = NULL;
593		p->consumer = NULL;
594		if (p->org == GV_PLEX_RAID5) {
595			mtx_destroy(&p->worklist_mtx);
596		}
597		g_free(p);
598	}
599
600	LIST_FOREACH_SAFE(v, &sc->volumes, volume, v2) {
601		LIST_REMOVE(v, volume);
602		v->geom->softc = NULL;
603		g_free(v);
604	}
605
606	gp->softc = NULL;
607	g_free(sc);
608	g_wither_geom(gp, ENXIO);
609	return (0);
610}
611
612#define	VINUM_CLASS_NAME "VINUM"
613
614static struct g_class g_vinum_class	= {
615	.name = VINUM_CLASS_NAME,
616	.taste = gv_taste,
617	.destroy_geom = gv_destroy_geom,
618	.ctlreq = gv_config,
619};
620
621DECLARE_GEOM_CLASS(g_vinum_class, g_vinum);
622