geom_vinum_drive.c revision 146325
1/*-
2 * Copyright (c) 2004, 2005 Lukas Ertl
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/geom/vinum/geom_vinum_drive.c 146325 2005-05-17 16:38:30Z le $");
29
30#include <sys/param.h>
31#include <sys/bio.h>
32#include <sys/errno.h>
33#include <sys/conf.h>
34#include <sys/kernel.h>
35#include <sys/kthread.h>
36#include <sys/libkern.h>
37#include <sys/lock.h>
38#include <sys/malloc.h>
39#include <sys/module.h>
40#include <sys/mutex.h>
41#include <sys/sbuf.h>
42#include <sys/systm.h>
43#include <sys/time.h>
44
45#include <geom/geom.h>
46#include <geom/vinum/geom_vinum_var.h>
47#include <geom/vinum/geom_vinum.h>
48#include <geom/vinum/geom_vinum_share.h>
49
50static void	gv_drive_dead(void *, int);
51static void	gv_drive_worker(void *);
52void	gv_drive_modify(struct gv_drive *);
53
54void
55gv_config_new_drive(struct gv_drive *d)
56{
57	struct gv_hdr *vhdr;
58	struct gv_freelist *fl;
59
60	KASSERT(d != NULL, ("config_new_drive: NULL d"));
61
62	vhdr = g_malloc(sizeof(*vhdr), M_WAITOK | M_ZERO);
63	vhdr->magic = GV_MAGIC;
64	vhdr->config_length = GV_CFG_LEN;
65
66	bcopy(hostname, vhdr->label.sysname, GV_HOSTNAME_LEN);
67	strncpy(vhdr->label.name, d->name, GV_MAXDRIVENAME);
68	microtime(&vhdr->label.date_of_birth);
69
70	d->hdr = vhdr;
71
72	LIST_INIT(&d->subdisks);
73	LIST_INIT(&d->freelist);
74
75	fl = g_malloc(sizeof(struct gv_freelist), M_WAITOK | M_ZERO);
76	fl->offset = GV_DATA_START;
77	fl->size = d->avail;
78	LIST_INSERT_HEAD(&d->freelist, fl, freelist);
79	d->freelist_entries = 1;
80
81	TAILQ_INIT(&d->bqueue);
82	mtx_init(&d->bqueue_mtx, "gv_drive", NULL, MTX_DEF);
83	kthread_create(gv_drive_worker, d, NULL, 0, 0, "gv_d %s", d->name);
84	d->flags |= GV_DRIVE_THREAD_ACTIVE;
85}
86
87void
88gv_save_config_all(struct gv_softc *sc)
89{
90	struct gv_drive *d;
91
92	g_topology_assert();
93
94	LIST_FOREACH(d, &sc->drives, drive) {
95		if (d->geom == NULL)
96			continue;
97		gv_save_config(NULL, d, sc);
98	}
99}
100
101/* Save the vinum configuration back to disk. */
102void
103gv_save_config(struct g_consumer *cp, struct gv_drive *d, struct gv_softc *sc)
104{
105	struct g_geom *gp;
106	struct g_consumer *cp2;
107	struct gv_hdr *vhdr, *hdr;
108	struct sbuf *sb;
109	int error;
110
111	g_topology_assert();
112
113	KASSERT(d != NULL, ("gv_save_config: null d"));
114	KASSERT(sc != NULL, ("gv_save_config: null sc"));
115
116	if (d->state != GV_DRIVE_UP)
117		return;
118
119	if (cp == NULL) {
120		gp = d->geom;
121		KASSERT(gp != NULL, ("gv_save_config: null gp"));
122		cp2 = LIST_FIRST(&gp->consumer);
123		KASSERT(cp2 != NULL, ("gv_save_config: null cp2"));
124	} else
125		cp2 = cp;
126
127	vhdr = g_malloc(GV_HDR_LEN, M_WAITOK | M_ZERO);
128	vhdr->magic = GV_MAGIC;
129	vhdr->config_length = GV_CFG_LEN;
130
131	hdr = d->hdr;
132	if (hdr == NULL) {
133		printf("GEOM_VINUM: drive %s has NULL hdr\n", d->name);
134		g_free(vhdr);
135		return;
136	}
137	microtime(&hdr->label.last_update);
138	bcopy(&hdr->label, &vhdr->label, sizeof(struct gv_label));
139
140	sb = sbuf_new(NULL, NULL, GV_CFG_LEN, SBUF_FIXEDLEN);
141	gv_format_config(sc, sb, 1, NULL);
142	sbuf_finish(sb);
143
144	error = g_access(cp2, 0, 1, 0);
145	if (error) {
146		printf("GEOM_VINUM: g_access failed on drive %s, errno %d\n",
147		    d->name, error);
148		sbuf_delete(sb);
149		g_free(vhdr);
150		return;
151	}
152	g_topology_unlock();
153
154	do {
155		error = g_write_data(cp2, GV_HDR_OFFSET, vhdr, GV_HDR_LEN);
156		if (error) {
157			printf("GEOM_VINUM: writing vhdr failed on drive %s, "
158			    "errno %d", d->name, error);
159			break;
160		}
161
162		error = g_write_data(cp2, GV_CFG_OFFSET, sbuf_data(sb),
163		    GV_CFG_LEN);
164		if (error) {
165			printf("GEOM_VINUM: writing first config copy failed "
166			    "on drive %s, errno %d", d->name, error);
167			break;
168		}
169
170		error = g_write_data(cp2, GV_CFG_OFFSET + GV_CFG_LEN,
171		    sbuf_data(sb), GV_CFG_LEN);
172		if (error)
173			printf("GEOM_VINUM: writing second config copy failed "
174			    "on drive %s, errno %d", d->name, error);
175	} while (0);
176
177	g_topology_lock();
178	g_access(cp2, 0, -1, 0);
179	sbuf_delete(sb);
180	g_free(vhdr);
181
182	if (d->geom != NULL)
183		gv_drive_modify(d);
184}
185
186/* This resembles g_slice_access(). */
187static int
188gv_drive_access(struct g_provider *pp, int dr, int dw, int de)
189{
190	struct g_geom *gp;
191	struct g_consumer *cp;
192	struct g_provider *pp2;
193	struct gv_drive *d;
194	struct gv_sd *s, *s2;
195	int error;
196
197	gp = pp->geom;
198	cp = LIST_FIRST(&gp->consumer);
199	if (cp == NULL)
200		return (0);
201
202	d = gp->softc;
203	if (d == NULL)
204		return (0);
205
206	s = pp->private;
207	KASSERT(s != NULL, ("gv_drive_access: NULL s"));
208
209	LIST_FOREACH(s2, &d->subdisks, from_drive) {
210		if (s == s2)
211			continue;
212		if (s->drive_offset + s->size <= s2->drive_offset)
213			continue;
214		if (s2->drive_offset + s2->size <= s->drive_offset)
215			continue;
216
217		/* Overlap. */
218		pp2 = s2->provider;
219		KASSERT(s2 != NULL, ("gv_drive_access: NULL s2"));
220		if ((pp->acw + dw) > 0 && pp2->ace > 0)
221			return (EPERM);
222		if ((pp->ace + de) > 0 && pp2->acw > 0)
223			return (EPERM);
224	}
225
226	error = g_access(cp, dr, dw, de);
227	return (error);
228}
229
230static void
231gv_drive_done(struct bio *bp)
232{
233	struct gv_drive *d;
234	struct gv_bioq *bq;
235
236	/* Put the BIO on the worker queue again. */
237	d = bp->bio_from->geom->softc;
238	bp->bio_cflags |= GV_BIO_DONE;
239	bq = g_malloc(sizeof(*bq), M_NOWAIT | M_ZERO);
240	bq->bp = bp;
241	mtx_lock(&d->bqueue_mtx);
242	TAILQ_INSERT_TAIL(&d->bqueue, bq, queue);
243	wakeup(d);
244	mtx_unlock(&d->bqueue_mtx);
245}
246
247
248static void
249gv_drive_start(struct bio *bp)
250{
251	struct gv_drive *d;
252	struct gv_sd *s;
253	struct gv_bioq *bq;
254
255	switch (bp->bio_cmd) {
256	case BIO_READ:
257	case BIO_WRITE:
258	case BIO_DELETE:
259		break;
260	case BIO_GETATTR:
261	default:
262		g_io_deliver(bp, EOPNOTSUPP);
263		return;
264	}
265
266	s = bp->bio_to->private;
267	if ((s->state == GV_SD_DOWN) || (s->state == GV_SD_STALE)) {
268		g_io_deliver(bp, ENXIO);
269		return;
270	}
271
272	d = bp->bio_to->geom->softc;
273
274	/*
275	 * Put the BIO on the worker queue, where the worker thread will pick
276	 * it up.
277	 */
278	bq = g_malloc(sizeof(*bq), M_NOWAIT | M_ZERO);
279	bq->bp = bp;
280	mtx_lock(&d->bqueue_mtx);
281	TAILQ_INSERT_TAIL(&d->bqueue, bq, queue);
282	wakeup(d);
283	mtx_unlock(&d->bqueue_mtx);
284
285}
286
287static void
288gv_drive_worker(void *arg)
289{
290	struct bio *bp, *cbp;
291	struct g_geom *gp;
292	struct g_provider *pp;
293	struct gv_drive *d;
294	struct gv_sd *s;
295	struct gv_bioq *bq, *bq2;
296	int error;
297
298	d = arg;
299
300	mtx_lock(&d->bqueue_mtx);
301	for (;;) {
302		/* We were signaled to exit. */
303		if (d->flags & GV_DRIVE_THREAD_DIE)
304			break;
305
306		/* Take the first BIO from out queue. */
307		bq = TAILQ_FIRST(&d->bqueue);
308		if (bq == NULL) {
309			msleep(d, &d->bqueue_mtx, PRIBIO, "-", hz/10);
310			continue;
311 		}
312		TAILQ_REMOVE(&d->bqueue, bq, queue);
313		mtx_unlock(&d->bqueue_mtx);
314
315		bp = bq->bp;
316		g_free(bq);
317		pp = bp->bio_to;
318		gp = pp->geom;
319
320		/* Completed request. */
321		if (bp->bio_cflags & GV_BIO_DONE) {
322			error = bp->bio_error;
323
324			/* Deliver the original request. */
325			g_std_done(bp);
326
327			/* The request had an error, we need to clean up. */
328			if (error != 0) {
329				g_topology_lock();
330				gv_set_drive_state(d, GV_DRIVE_DOWN,
331				    GV_SETSTATE_FORCE | GV_SETSTATE_CONFIG);
332				g_topology_unlock();
333				g_post_event(gv_drive_dead, d, M_WAITOK, d,
334				    NULL);
335			}
336
337		/* New request, needs to be sent downwards. */
338		} else {
339			s = pp->private;
340
341			if ((s->state == GV_SD_DOWN) ||
342			    (s->state == GV_SD_STALE)) {
343				g_io_deliver(bp, ENXIO);
344				mtx_lock(&d->bqueue_mtx);
345				continue;
346			}
347			if (bp->bio_offset > s->size) {
348				g_io_deliver(bp, EINVAL);
349				mtx_lock(&d->bqueue_mtx);
350				continue;
351			}
352
353			cbp = g_clone_bio(bp);
354			if (cbp == NULL) {
355				g_io_deliver(bp, ENOMEM);
356				mtx_lock(&d->bqueue_mtx);
357				continue;
358			}
359			if (cbp->bio_offset + cbp->bio_length > s->size)
360				cbp->bio_length = s->size -
361				    cbp->bio_offset;
362			cbp->bio_done = gv_drive_done;
363			cbp->bio_offset += s->drive_offset;
364			g_io_request(cbp, LIST_FIRST(&gp->consumer));
365		}
366
367		mtx_lock(&d->bqueue_mtx);
368	}
369
370	TAILQ_FOREACH_SAFE(bq, &d->bqueue, queue, bq2) {
371		TAILQ_REMOVE(&d->bqueue, bq, queue);
372		mtx_unlock(&d->bqueue_mtx);
373		bp = bq->bp;
374		g_free(bq);
375		if (bp->bio_cflags & GV_BIO_DONE)
376			g_std_done(bp);
377		else
378			g_io_deliver(bp, ENXIO);
379		mtx_lock(&d->bqueue_mtx);
380	}
381	mtx_unlock(&d->bqueue_mtx);
382	d->flags |= GV_DRIVE_THREAD_DEAD;
383
384	kthread_exit(ENXIO);
385}
386
387
388static void
389gv_drive_orphan(struct g_consumer *cp)
390{
391	struct g_geom *gp;
392	struct gv_drive *d;
393
394	g_topology_assert();
395	gp = cp->geom;
396	g_trace(G_T_TOPOLOGY, "gv_drive_orphan(%s)", gp->name);
397	d = gp->softc;
398	if (d != NULL) {
399		gv_set_drive_state(d, GV_DRIVE_DOWN,
400		    GV_SETSTATE_FORCE | GV_SETSTATE_CONFIG);
401		g_post_event(gv_drive_dead, d, M_WAITOK, d, NULL);
402	} else
403		g_wither_geom(gp, ENXIO);
404}
405
406static struct g_geom *
407gv_drive_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
408{
409	struct g_geom *gp, *gp2;
410	struct g_consumer *cp;
411	struct gv_drive *d;
412	struct gv_sd *s;
413	struct gv_softc *sc;
414	struct gv_freelist *fl;
415	struct gv_hdr *vhdr;
416	int error;
417	char *buf, errstr[ERRBUFSIZ];
418
419	vhdr = NULL;
420	d = NULL;
421
422	g_trace(G_T_TOPOLOGY, "gv_drive_taste(%s, %s)", mp->name, pp->name);
423	g_topology_assert();
424
425	/* Find the VINUM class and its associated geom. */
426	gp2 = find_vinum_geom();
427	if (gp2 == NULL)
428		return (NULL);
429	sc = gp2->softc;
430
431	gp = g_new_geomf(mp, "%s.vinumdrive", pp->name);
432	gp->start = gv_drive_start;
433	gp->orphan = gv_drive_orphan;
434	gp->access = gv_drive_access;
435	gp->start = gv_drive_start;
436
437	cp = g_new_consumer(gp);
438	g_attach(cp, pp);
439	error = g_access(cp, 1, 0, 0);
440	if (error) {
441		g_detach(cp);
442		g_destroy_consumer(cp);
443		g_destroy_geom(gp);
444		return (NULL);
445	}
446
447	g_topology_unlock();
448
449	/* Now check if the provided slice is a valid vinum drive. */
450	do {
451		vhdr = g_read_data(cp, GV_HDR_OFFSET, pp->sectorsize, &error);
452		if (vhdr == NULL || error != 0)
453			break;
454		if (vhdr->magic != GV_MAGIC) {
455			g_free(vhdr);
456			break;
457		}
458
459		/*
460		 * We have found a valid vinum drive.  Let's see if it is
461		 * already known in the configuration.  There's a chance that
462		 * the VINUMDRIVE class tastes before the VINUM class could
463		 * taste, so parse the configuration here too, just to be on
464		 * the safe side.
465		 */
466		buf = g_read_data(cp, GV_CFG_OFFSET, GV_CFG_LEN, &error);
467		if (buf == NULL || error != 0) {
468			g_free(vhdr);
469			break;
470		}
471		g_topology_lock();
472		gv_parse_config(sc, buf, 1);
473		g_free(buf);
474
475		d = gv_find_drive(sc, vhdr->label.name);
476
477		/* We already know about this drive. */
478		if (d != NULL) {
479			/* Check if this drive already has a geom. */
480			if (d->geom != NULL) {
481				g_topology_unlock();
482				break;
483			}
484			bcopy(vhdr, d->hdr, sizeof(*vhdr));
485
486		/* This is a new drive. */
487		} else {
488			d = g_malloc(sizeof(*d), M_WAITOK | M_ZERO);
489
490			/* Initialize all needed variables. */
491			d->size = pp->mediasize - GV_DATA_START;
492			d->avail = d->size;
493			d->hdr = vhdr;
494			strncpy(d->name, vhdr->label.name, GV_MAXDRIVENAME);
495			LIST_INIT(&d->subdisks);
496			LIST_INIT(&d->freelist);
497
498			/* We also need a freelist entry. */
499			fl = g_malloc(sizeof(*fl), M_WAITOK | M_ZERO);
500			fl->offset = GV_DATA_START;
501			fl->size = d->avail;
502			LIST_INSERT_HEAD(&d->freelist, fl, freelist);
503			d->freelist_entries = 1;
504
505			TAILQ_INIT(&d->bqueue);
506
507			/* Save it into the main configuration. */
508			LIST_INSERT_HEAD(&sc->drives, d, drive);
509		}
510
511		/*
512		 * Create a bio queue mutex and a worker thread, if necessary.
513		 */
514		if (mtx_initialized(&d->bqueue_mtx) == 0)
515			mtx_init(&d->bqueue_mtx, "gv_drive", NULL, MTX_DEF);
516
517		if (!(d->flags & GV_DRIVE_THREAD_ACTIVE)) {
518			kthread_create(gv_drive_worker, d, NULL, 0, 0,
519			    "gv_d %s", d->name);
520			d->flags |= GV_DRIVE_THREAD_ACTIVE;
521		}
522
523		g_access(cp, -1, 0, 0);
524
525		gp->softc = d;
526		d->geom = gp;
527		d->vinumconf = sc;
528		strncpy(d->device, pp->name, GV_MAXDRIVENAME);
529
530		/*
531		 * Find out which subdisks belong to this drive and crosslink
532		 * them.
533		 */
534		LIST_FOREACH(s, &sc->subdisks, sd) {
535			if (!strncmp(s->drive, d->name, GV_MAXDRIVENAME))
536				/* XXX: errors ignored */
537				gv_sd_to_drive(sc, d, s, errstr,
538				    sizeof(errstr));
539		}
540
541		/* This drive is now up for sure. */
542		gv_set_drive_state(d, GV_DRIVE_UP, 0);
543
544		/*
545		 * If there are subdisks on this drive, we need to create
546		 * providers for them.
547		 */
548		if (d->sdcount)
549			gv_drive_modify(d);
550
551		return (gp);
552
553	} while (0);
554
555	g_topology_lock();
556	g_access(cp, -1, 0, 0);
557
558	g_detach(cp);
559	g_destroy_consumer(cp);
560	g_destroy_geom(gp);
561	return (NULL);
562}
563
564/*
565 * Modify the providers for the given drive 'd'.  It is assumed that the
566 * subdisk list of 'd' is already correctly set up.
567 */
568void
569gv_drive_modify(struct gv_drive *d)
570{
571	struct g_geom *gp;
572	struct g_consumer *cp;
573	struct g_provider *pp, *pp2;
574	struct gv_sd *s;
575
576	KASSERT(d != NULL, ("gv_drive_modify: null d"));
577	gp = d->geom;
578	KASSERT(gp != NULL, ("gv_drive_modify: null gp"));
579	cp = LIST_FIRST(&gp->consumer);
580	KASSERT(cp != NULL, ("gv_drive_modify: null cp"));
581	pp = cp->provider;
582	KASSERT(pp != NULL, ("gv_drive_modify: null pp"));
583
584	g_topology_assert();
585
586	LIST_FOREACH(s, &d->subdisks, from_drive) {
587		/* This subdisk already has a provider. */
588		if (s->provider != NULL)
589			continue;
590		pp2 = g_new_providerf(gp, "gvinum/sd/%s", s->name);
591		pp2->mediasize = s->size;
592		pp2->sectorsize = pp->sectorsize;
593		g_error_provider(pp2, 0);
594		s->provider = pp2;
595		pp2->private = s;
596	}
597}
598
599static void
600gv_drive_dead(void *arg, int flag)
601{
602	struct g_geom *gp;
603	struct g_consumer *cp;
604	struct gv_drive *d;
605	struct gv_sd *s;
606
607	g_topology_assert();
608	KASSERT(arg != NULL, ("gv_drive_dead: NULL arg"));
609
610	if (flag == EV_CANCEL)
611		return;
612
613	d = arg;
614	if (d->state != GV_DRIVE_DOWN)
615		return;
616
617	g_trace(G_T_TOPOLOGY, "gv_drive_dead(%s)", d->name);
618
619	gp = d->geom;
620	if (gp == NULL)
621		return;
622
623	LIST_FOREACH(cp, &gp->consumer, consumer) {
624		if (cp->nstart != cp->nend) {
625			printf("GEOM_VINUM: dead drive '%s' has still "
626			    "active requests, can't detach consumer\n",
627			    d->name);
628			g_post_event(gv_drive_dead, d, M_WAITOK, d,
629			    NULL);
630			return;
631		}
632		if (cp->acr != 0 || cp->acw != 0 || cp->ace != 0)
633			g_access(cp, -cp->acr, -cp->acw, -cp->ace);
634	}
635
636	printf("GEOM_VINUM: lost drive '%s'\n", d->name);
637	d->geom = NULL;
638	LIST_FOREACH(s, &d->subdisks, from_drive) {
639		s->provider = NULL;
640		s->consumer = NULL;
641	}
642	gv_kill_drive_thread(d);
643	gp->softc = NULL;
644	g_wither_geom(gp, ENXIO);
645}
646
647static int
648gv_drive_destroy_geom(struct gctl_req *req, struct g_class *mp,
649    struct g_geom *gp)
650{
651	struct gv_drive *d;
652
653	g_trace(G_T_TOPOLOGY, "gv_drive_destroy_geom: %s", gp->name);
654	g_topology_assert();
655
656	d = gp->softc;
657	gv_kill_drive_thread(d);
658
659	g_wither_geom(gp, ENXIO);
660	return (0);
661}
662
663#define	VINUMDRIVE_CLASS_NAME "VINUMDRIVE"
664
665static struct g_class g_vinum_drive_class = {
666	.name = VINUMDRIVE_CLASS_NAME,
667	.version = G_VERSION,
668	.taste = gv_drive_taste,
669	.destroy_geom = gv_drive_destroy_geom
670};
671
672DECLARE_GEOM_CLASS(g_vinum_drive_class, g_vinum_drive);
673