geom_vinum_drive.c revision 149094
1/*-
2 * Copyright (c) 2004, 2005 Lukas Ertl
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/geom/vinum/geom_vinum_drive.c 149094 2005-08-15 17:07:47Z le $");
29
30#include <sys/param.h>
31#include <sys/bio.h>
32#include <sys/errno.h>
33#include <sys/conf.h>
34#include <sys/kernel.h>
35#include <sys/kthread.h>
36#include <sys/libkern.h>
37#include <sys/lock.h>
38#include <sys/malloc.h>
39#include <sys/module.h>
40#include <sys/mutex.h>
41#include <sys/sbuf.h>
42#include <sys/systm.h>
43#include <sys/time.h>
44
45#include <geom/geom.h>
46#include <geom/vinum/geom_vinum_var.h>
47#include <geom/vinum/geom_vinum.h>
48#include <geom/vinum/geom_vinum_share.h>
49
50static void	gv_drive_dead(void *, int);
51static void	gv_drive_worker(void *);
52void	gv_drive_modify(struct gv_drive *);
53
54void
55gv_config_new_drive(struct gv_drive *d)
56{
57	struct gv_hdr *vhdr;
58	struct gv_freelist *fl;
59
60	KASSERT(d != NULL, ("config_new_drive: NULL d"));
61
62	vhdr = g_malloc(sizeof(*vhdr), M_WAITOK | M_ZERO);
63	vhdr->magic = GV_MAGIC;
64	vhdr->config_length = GV_CFG_LEN;
65
66	bcopy(hostname, vhdr->label.sysname, GV_HOSTNAME_LEN);
67	strncpy(vhdr->label.name, d->name, GV_MAXDRIVENAME);
68	microtime(&vhdr->label.date_of_birth);
69
70	d->hdr = vhdr;
71
72	LIST_INIT(&d->subdisks);
73	LIST_INIT(&d->freelist);
74
75	fl = g_malloc(sizeof(struct gv_freelist), M_WAITOK | M_ZERO);
76	fl->offset = GV_DATA_START;
77	fl->size = d->avail;
78	LIST_INSERT_HEAD(&d->freelist, fl, freelist);
79	d->freelist_entries = 1;
80
81	TAILQ_INIT(&d->bqueue);
82	mtx_init(&d->bqueue_mtx, "gv_drive", NULL, MTX_DEF);
83	kthread_create(gv_drive_worker, d, NULL, 0, 0, "gv_d %s", d->name);
84	d->flags |= GV_DRIVE_THREAD_ACTIVE;
85}
86
87void
88gv_save_config_all(struct gv_softc *sc)
89{
90	struct gv_drive *d;
91
92	g_topology_assert();
93
94	LIST_FOREACH(d, &sc->drives, drive) {
95		if (d->geom == NULL)
96			continue;
97		gv_save_config(NULL, d, sc);
98	}
99}
100
101/* Save the vinum configuration back to disk. */
102void
103gv_save_config(struct g_consumer *cp, struct gv_drive *d, struct gv_softc *sc)
104{
105	struct g_geom *gp;
106	struct g_consumer *cp2;
107	struct gv_hdr *vhdr, *hdr;
108	struct sbuf *sb;
109	int error;
110
111	g_topology_assert();
112
113	KASSERT(d != NULL, ("gv_save_config: null d"));
114	KASSERT(sc != NULL, ("gv_save_config: null sc"));
115
116	/*
117	 * We can't save the config on a drive that isn't up, but drives that
118	 * were just created aren't officially up yet, so we check a special
119	 * flag.
120	 */
121	if ((d->state != GV_DRIVE_UP) && !(d->flags && GV_DRIVE_NEWBORN))
122		return;
123
124	if (cp == NULL) {
125		gp = d->geom;
126		KASSERT(gp != NULL, ("gv_save_config: null gp"));
127		cp2 = LIST_FIRST(&gp->consumer);
128		KASSERT(cp2 != NULL, ("gv_save_config: null cp2"));
129	} else
130		cp2 = cp;
131
132	vhdr = g_malloc(GV_HDR_LEN, M_WAITOK | M_ZERO);
133	vhdr->magic = GV_MAGIC;
134	vhdr->config_length = GV_CFG_LEN;
135
136	hdr = d->hdr;
137	if (hdr == NULL) {
138		printf("GEOM_VINUM: drive %s has NULL hdr\n", d->name);
139		g_free(vhdr);
140		return;
141	}
142	microtime(&hdr->label.last_update);
143	bcopy(&hdr->label, &vhdr->label, sizeof(struct gv_label));
144
145	sb = sbuf_new(NULL, NULL, GV_CFG_LEN, SBUF_FIXEDLEN);
146	gv_format_config(sc, sb, 1, NULL);
147	sbuf_finish(sb);
148
149	error = g_access(cp2, 0, 1, 0);
150	if (error) {
151		printf("GEOM_VINUM: g_access failed on drive %s, errno %d\n",
152		    d->name, error);
153		sbuf_delete(sb);
154		g_free(vhdr);
155		return;
156	}
157	g_topology_unlock();
158
159	do {
160		error = g_write_data(cp2, GV_HDR_OFFSET, vhdr, GV_HDR_LEN);
161		if (error) {
162			printf("GEOM_VINUM: writing vhdr failed on drive %s, "
163			    "errno %d", d->name, error);
164			break;
165		}
166
167		error = g_write_data(cp2, GV_CFG_OFFSET, sbuf_data(sb),
168		    GV_CFG_LEN);
169		if (error) {
170			printf("GEOM_VINUM: writing first config copy failed "
171			    "on drive %s, errno %d", d->name, error);
172			break;
173		}
174
175		error = g_write_data(cp2, GV_CFG_OFFSET + GV_CFG_LEN,
176		    sbuf_data(sb), GV_CFG_LEN);
177		if (error)
178			printf("GEOM_VINUM: writing second config copy failed "
179			    "on drive %s, errno %d", d->name, error);
180	} while (0);
181
182	g_topology_lock();
183	g_access(cp2, 0, -1, 0);
184	sbuf_delete(sb);
185	g_free(vhdr);
186
187	if (d->geom != NULL)
188		gv_drive_modify(d);
189}
190
191/* This resembles g_slice_access(). */
192static int
193gv_drive_access(struct g_provider *pp, int dr, int dw, int de)
194{
195	struct g_geom *gp;
196	struct g_consumer *cp;
197	struct g_provider *pp2;
198	struct gv_drive *d;
199	struct gv_sd *s, *s2;
200	int error;
201
202	gp = pp->geom;
203	cp = LIST_FIRST(&gp->consumer);
204	if (cp == NULL)
205		return (0);
206
207	d = gp->softc;
208	if (d == NULL)
209		return (0);
210
211	s = pp->private;
212	KASSERT(s != NULL, ("gv_drive_access: NULL s"));
213
214	LIST_FOREACH(s2, &d->subdisks, from_drive) {
215		if (s == s2)
216			continue;
217		if (s->drive_offset + s->size <= s2->drive_offset)
218			continue;
219		if (s2->drive_offset + s2->size <= s->drive_offset)
220			continue;
221
222		/* Overlap. */
223		pp2 = s2->provider;
224		KASSERT(s2 != NULL, ("gv_drive_access: NULL s2"));
225		if ((pp->acw + dw) > 0 && pp2->ace > 0)
226			return (EPERM);
227		if ((pp->ace + de) > 0 && pp2->acw > 0)
228			return (EPERM);
229	}
230
231	error = g_access(cp, dr, dw, de);
232	return (error);
233}
234
235static void
236gv_drive_done(struct bio *bp)
237{
238	struct gv_drive *d;
239	struct gv_bioq *bq;
240
241	/* Put the BIO on the worker queue again. */
242	d = bp->bio_from->geom->softc;
243	bp->bio_cflags |= GV_BIO_DONE;
244	bq = g_malloc(sizeof(*bq), M_NOWAIT | M_ZERO);
245	bq->bp = bp;
246	mtx_lock(&d->bqueue_mtx);
247	TAILQ_INSERT_TAIL(&d->bqueue, bq, queue);
248	wakeup(d);
249	mtx_unlock(&d->bqueue_mtx);
250}
251
252
253static void
254gv_drive_start(struct bio *bp)
255{
256	struct gv_drive *d;
257	struct gv_sd *s;
258	struct gv_bioq *bq;
259
260	switch (bp->bio_cmd) {
261	case BIO_READ:
262	case BIO_WRITE:
263	case BIO_DELETE:
264		break;
265	case BIO_GETATTR:
266	default:
267		g_io_deliver(bp, EOPNOTSUPP);
268		return;
269	}
270
271	s = bp->bio_to->private;
272	if ((s->state == GV_SD_DOWN) || (s->state == GV_SD_STALE)) {
273		g_io_deliver(bp, ENXIO);
274		return;
275	}
276
277	d = bp->bio_to->geom->softc;
278
279	/*
280	 * Put the BIO on the worker queue, where the worker thread will pick
281	 * it up.
282	 */
283	bq = g_malloc(sizeof(*bq), M_NOWAIT | M_ZERO);
284	bq->bp = bp;
285	mtx_lock(&d->bqueue_mtx);
286	TAILQ_INSERT_TAIL(&d->bqueue, bq, queue);
287	wakeup(d);
288	mtx_unlock(&d->bqueue_mtx);
289
290}
291
292static void
293gv_drive_worker(void *arg)
294{
295	struct bio *bp, *cbp;
296	struct g_geom *gp;
297	struct g_provider *pp;
298	struct gv_drive *d;
299	struct gv_sd *s;
300	struct gv_bioq *bq, *bq2;
301	int error;
302
303	d = arg;
304
305	mtx_lock(&d->bqueue_mtx);
306	for (;;) {
307		/* We were signaled to exit. */
308		if (d->flags & GV_DRIVE_THREAD_DIE)
309			break;
310
311		/* Take the first BIO from out queue. */
312		bq = TAILQ_FIRST(&d->bqueue);
313		if (bq == NULL) {
314			msleep(d, &d->bqueue_mtx, PRIBIO, "-", hz/10);
315			continue;
316 		}
317		TAILQ_REMOVE(&d->bqueue, bq, queue);
318		mtx_unlock(&d->bqueue_mtx);
319
320		bp = bq->bp;
321		g_free(bq);
322		pp = bp->bio_to;
323		gp = pp->geom;
324
325		/* Completed request. */
326		if (bp->bio_cflags & GV_BIO_DONE) {
327			error = bp->bio_error;
328
329			/* Deliver the original request. */
330			g_std_done(bp);
331
332			/* The request had an error, we need to clean up. */
333			if (error != 0) {
334				g_topology_lock();
335				gv_set_drive_state(d, GV_DRIVE_DOWN,
336				    GV_SETSTATE_FORCE | GV_SETSTATE_CONFIG);
337				g_topology_unlock();
338				g_post_event(gv_drive_dead, d, M_WAITOK, d,
339				    NULL);
340			}
341
342		/* New request, needs to be sent downwards. */
343		} else {
344			s = pp->private;
345
346			if ((s->state == GV_SD_DOWN) ||
347			    (s->state == GV_SD_STALE)) {
348				g_io_deliver(bp, ENXIO);
349				mtx_lock(&d->bqueue_mtx);
350				continue;
351			}
352			if (bp->bio_offset > s->size) {
353				g_io_deliver(bp, EINVAL);
354				mtx_lock(&d->bqueue_mtx);
355				continue;
356			}
357
358			cbp = g_clone_bio(bp);
359			if (cbp == NULL) {
360				g_io_deliver(bp, ENOMEM);
361				mtx_lock(&d->bqueue_mtx);
362				continue;
363			}
364			if (cbp->bio_offset + cbp->bio_length > s->size)
365				cbp->bio_length = s->size -
366				    cbp->bio_offset;
367			cbp->bio_done = gv_drive_done;
368			cbp->bio_offset += s->drive_offset;
369			g_io_request(cbp, LIST_FIRST(&gp->consumer));
370		}
371
372		mtx_lock(&d->bqueue_mtx);
373	}
374
375	TAILQ_FOREACH_SAFE(bq, &d->bqueue, queue, bq2) {
376		TAILQ_REMOVE(&d->bqueue, bq, queue);
377		mtx_unlock(&d->bqueue_mtx);
378		bp = bq->bp;
379		g_free(bq);
380		if (bp->bio_cflags & GV_BIO_DONE)
381			g_std_done(bp);
382		else
383			g_io_deliver(bp, ENXIO);
384		mtx_lock(&d->bqueue_mtx);
385	}
386	mtx_unlock(&d->bqueue_mtx);
387	d->flags |= GV_DRIVE_THREAD_DEAD;
388
389	kthread_exit(ENXIO);
390}
391
392
393static void
394gv_drive_orphan(struct g_consumer *cp)
395{
396	struct g_geom *gp;
397	struct gv_drive *d;
398
399	g_topology_assert();
400	gp = cp->geom;
401	g_trace(G_T_TOPOLOGY, "gv_drive_orphan(%s)", gp->name);
402	d = gp->softc;
403	if (d != NULL) {
404		gv_set_drive_state(d, GV_DRIVE_DOWN,
405		    GV_SETSTATE_FORCE | GV_SETSTATE_CONFIG);
406		g_post_event(gv_drive_dead, d, M_WAITOK, d, NULL);
407	} else
408		g_wither_geom(gp, ENXIO);
409}
410
411static struct g_geom *
412gv_drive_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
413{
414	struct g_geom *gp, *gp2;
415	struct g_consumer *cp;
416	struct gv_drive *d;
417	struct gv_sd *s;
418	struct gv_softc *sc;
419	struct gv_freelist *fl;
420	struct gv_hdr *vhdr;
421	int error;
422	char *buf, errstr[ERRBUFSIZ];
423
424	vhdr = NULL;
425	d = NULL;
426
427	g_trace(G_T_TOPOLOGY, "gv_drive_taste(%s, %s)", mp->name, pp->name);
428	g_topology_assert();
429
430	/* Find the VINUM class and its associated geom. */
431	gp2 = find_vinum_geom();
432	if (gp2 == NULL)
433		return (NULL);
434	sc = gp2->softc;
435
436	gp = g_new_geomf(mp, "%s.vinumdrive", pp->name);
437	gp->start = gv_drive_start;
438	gp->orphan = gv_drive_orphan;
439	gp->access = gv_drive_access;
440	gp->start = gv_drive_start;
441
442	cp = g_new_consumer(gp);
443	g_attach(cp, pp);
444	error = g_access(cp, 1, 0, 0);
445	if (error) {
446		g_detach(cp);
447		g_destroy_consumer(cp);
448		g_destroy_geom(gp);
449		return (NULL);
450	}
451
452	g_topology_unlock();
453
454	/* Now check if the provided slice is a valid vinum drive. */
455	do {
456		vhdr = g_read_data(cp, GV_HDR_OFFSET, pp->sectorsize, &error);
457		if (vhdr == NULL || error != 0)
458			break;
459		if (vhdr->magic != GV_MAGIC) {
460			g_free(vhdr);
461			break;
462		}
463
464		/*
465		 * We have found a valid vinum drive.  Let's see if it is
466		 * already known in the configuration.  There's a chance that
467		 * the VINUMDRIVE class tastes before the VINUM class could
468		 * taste, so parse the configuration here too, just to be on
469		 * the safe side.
470		 */
471		buf = g_read_data(cp, GV_CFG_OFFSET, GV_CFG_LEN, &error);
472		if (buf == NULL || error != 0) {
473			g_free(vhdr);
474			break;
475		}
476		g_topology_lock();
477		gv_parse_config(sc, buf, 1);
478		g_free(buf);
479
480		d = gv_find_drive(sc, vhdr->label.name);
481
482		/* We already know about this drive. */
483		if (d != NULL) {
484			/* Check if this drive already has a geom. */
485			if (d->geom != NULL) {
486				g_topology_unlock();
487				break;
488			}
489			bcopy(vhdr, d->hdr, sizeof(*vhdr));
490
491		/* This is a new drive. */
492		} else {
493			d = g_malloc(sizeof(*d), M_WAITOK | M_ZERO);
494
495			/* Initialize all needed variables. */
496			d->size = pp->mediasize - GV_DATA_START;
497			d->avail = d->size;
498			d->hdr = vhdr;
499			strncpy(d->name, vhdr->label.name, GV_MAXDRIVENAME);
500			LIST_INIT(&d->subdisks);
501			LIST_INIT(&d->freelist);
502
503			/* We also need a freelist entry. */
504			fl = g_malloc(sizeof(*fl), M_WAITOK | M_ZERO);
505			fl->offset = GV_DATA_START;
506			fl->size = d->avail;
507			LIST_INSERT_HEAD(&d->freelist, fl, freelist);
508			d->freelist_entries = 1;
509
510			TAILQ_INIT(&d->bqueue);
511
512			/* Save it into the main configuration. */
513			LIST_INSERT_HEAD(&sc->drives, d, drive);
514		}
515
516		/*
517		 * Create a bio queue mutex and a worker thread, if necessary.
518		 */
519		if (mtx_initialized(&d->bqueue_mtx) == 0)
520			mtx_init(&d->bqueue_mtx, "gv_drive", NULL, MTX_DEF);
521
522		if (!(d->flags & GV_DRIVE_THREAD_ACTIVE)) {
523			kthread_create(gv_drive_worker, d, NULL, 0, 0,
524			    "gv_d %s", d->name);
525			d->flags |= GV_DRIVE_THREAD_ACTIVE;
526		}
527
528		g_access(cp, -1, 0, 0);
529
530		gp->softc = d;
531		d->geom = gp;
532		d->vinumconf = sc;
533		strncpy(d->device, pp->name, GV_MAXDRIVENAME);
534
535		/*
536		 * Find out which subdisks belong to this drive and crosslink
537		 * them.
538		 */
539		LIST_FOREACH(s, &sc->subdisks, sd) {
540			if (!strncmp(s->drive, d->name, GV_MAXDRIVENAME))
541				/* XXX: errors ignored */
542				gv_sd_to_drive(sc, d, s, errstr,
543				    sizeof(errstr));
544		}
545
546		/* This drive is now up for sure. */
547		gv_set_drive_state(d, GV_DRIVE_UP, 0);
548
549		/*
550		 * If there are subdisks on this drive, we need to create
551		 * providers for them.
552		 */
553		if (d->sdcount)
554			gv_drive_modify(d);
555
556		return (gp);
557
558	} while (0);
559
560	g_topology_lock();
561	g_access(cp, -1, 0, 0);
562
563	g_detach(cp);
564	g_destroy_consumer(cp);
565	g_destroy_geom(gp);
566	return (NULL);
567}
568
569/*
570 * Modify the providers for the given drive 'd'.  It is assumed that the
571 * subdisk list of 'd' is already correctly set up.
572 */
573void
574gv_drive_modify(struct gv_drive *d)
575{
576	struct g_geom *gp;
577	struct g_consumer *cp;
578	struct g_provider *pp, *pp2;
579	struct gv_sd *s;
580
581	KASSERT(d != NULL, ("gv_drive_modify: null d"));
582	gp = d->geom;
583	KASSERT(gp != NULL, ("gv_drive_modify: null gp"));
584	cp = LIST_FIRST(&gp->consumer);
585	KASSERT(cp != NULL, ("gv_drive_modify: null cp"));
586	pp = cp->provider;
587	KASSERT(pp != NULL, ("gv_drive_modify: null pp"));
588
589	g_topology_assert();
590
591	LIST_FOREACH(s, &d->subdisks, from_drive) {
592		/* This subdisk already has a provider. */
593		if (s->provider != NULL)
594			continue;
595		pp2 = g_new_providerf(gp, "gvinum/sd/%s", s->name);
596		pp2->mediasize = s->size;
597		pp2->sectorsize = pp->sectorsize;
598		g_error_provider(pp2, 0);
599		s->provider = pp2;
600		pp2->private = s;
601	}
602}
603
604static void
605gv_drive_dead(void *arg, int flag)
606{
607	struct g_geom *gp;
608	struct g_consumer *cp;
609	struct gv_drive *d;
610	struct gv_sd *s;
611
612	g_topology_assert();
613	KASSERT(arg != NULL, ("gv_drive_dead: NULL arg"));
614
615	if (flag == EV_CANCEL)
616		return;
617
618	d = arg;
619	if (d->state != GV_DRIVE_DOWN)
620		return;
621
622	g_trace(G_T_TOPOLOGY, "gv_drive_dead(%s)", d->name);
623
624	gp = d->geom;
625	if (gp == NULL)
626		return;
627
628	LIST_FOREACH(cp, &gp->consumer, consumer) {
629		if (cp->nstart != cp->nend) {
630			printf("GEOM_VINUM: dead drive '%s' has still "
631			    "active requests, can't detach consumer\n",
632			    d->name);
633			g_post_event(gv_drive_dead, d, M_WAITOK, d,
634			    NULL);
635			return;
636		}
637		if (cp->acr != 0 || cp->acw != 0 || cp->ace != 0)
638			g_access(cp, -cp->acr, -cp->acw, -cp->ace);
639	}
640
641	printf("GEOM_VINUM: lost drive '%s'\n", d->name);
642	d->geom = NULL;
643	LIST_FOREACH(s, &d->subdisks, from_drive) {
644		s->provider = NULL;
645		s->consumer = NULL;
646	}
647	gv_kill_drive_thread(d);
648	gp->softc = NULL;
649	g_wither_geom(gp, ENXIO);
650}
651
652static int
653gv_drive_destroy_geom(struct gctl_req *req, struct g_class *mp,
654    struct g_geom *gp)
655{
656	struct gv_drive *d;
657
658	g_trace(G_T_TOPOLOGY, "gv_drive_destroy_geom: %s", gp->name);
659	g_topology_assert();
660
661	d = gp->softc;
662	gv_kill_drive_thread(d);
663
664	g_wither_geom(gp, ENXIO);
665	return (0);
666}
667
668#define	VINUMDRIVE_CLASS_NAME "VINUMDRIVE"
669
670static struct g_class g_vinum_drive_class = {
671	.name = VINUMDRIVE_CLASS_NAME,
672	.version = G_VERSION,
673	.taste = gv_drive_taste,
674	.destroy_geom = gv_drive_destroy_geom
675};
676
677DECLARE_GEOM_CLASS(g_vinum_drive_class, g_vinum_drive);
678