geom_vinum_drive.c revision 130597
1130389Sle/*-
2130389Sle * Copyright (c) 2004 Lukas Ertl
3130389Sle * All rights reserved.
4130389Sle *
5130389Sle * Redistribution and use in source and binary forms, with or without
6130389Sle * modification, are permitted provided that the following conditions
7130389Sle * are met:
8130389Sle * 1. Redistributions of source code must retain the above copyright
9130389Sle *    notice, this list of conditions and the following disclaimer.
10130389Sle * 2. Redistributions in binary form must reproduce the above copyright
11130389Sle *    notice, this list of conditions and the following disclaimer in the
12130389Sle *    documentation and/or other materials provided with the distribution.
13130389Sle *
14130389Sle * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15130389Sle * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16130389Sle * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17130389Sle * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18130389Sle * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19130389Sle * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20130389Sle * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21130389Sle * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22130389Sle * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23130389Sle * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24130389Sle * SUCH DAMAGE.
25130389Sle */
26130389Sle
27130389Sle#include <sys/cdefs.h>
28130389Sle__FBSDID("$FreeBSD: head/sys/geom/vinum/geom_vinum_drive.c 130597 2004-06-16 14:41:04Z le $");
29130389Sle
30130389Sle#include <sys/param.h>
31130389Sle#include <sys/bio.h>
32130389Sle#include <sys/errno.h>
33130389Sle#include <sys/conf.h>
34130389Sle#include <sys/kernel.h>
35130389Sle#include <sys/kthread.h>
36130389Sle#include <sys/libkern.h>
37130389Sle#include <sys/lock.h>
38130389Sle#include <sys/malloc.h>
39130389Sle#include <sys/module.h>
40130389Sle#include <sys/mutex.h>
41130389Sle#include <sys/sbuf.h>
42130389Sle#include <sys/systm.h>
43130389Sle#include <sys/time.h>
44130389Sle
45130389Sle#include <geom/geom.h>
46130389Sle#include <geom/vinum/geom_vinum_var.h>
47130389Sle#include <geom/vinum/geom_vinum.h>
48130389Sle#include <geom/vinum/geom_vinum_share.h>
49130389Sle
50130389Slevoid	gv_drive_modify(struct gv_drive *);
51130389Sle
52130389Slevoid
53130389Slegv_save_config_all(struct gv_softc *sc)
54130389Sle{
55130389Sle	struct gv_drive *d;
56130389Sle
57130389Sle	g_topology_assert();
58130389Sle
59130389Sle	LIST_FOREACH(d, &sc->drives, drive) {
60130389Sle		if (d->geom == NULL)
61130389Sle			continue;
62130389Sle		gv_save_config(NULL, d, sc);
63130389Sle	}
64130389Sle}
65130389Sle
66130389Sle/* Save the vinum configuration back to disk. */
67130389Slevoid
68130389Slegv_save_config(struct g_consumer *cp, struct gv_drive *d, struct gv_softc *sc)
69130389Sle{
70130389Sle	struct g_geom *gp;
71130389Sle	struct g_consumer *cp2;
72130389Sle	struct gv_hdr *vhdr, *hdr;
73130389Sle	struct sbuf *sb;
74130389Sle	int error;
75130389Sle
76130389Sle	g_topology_assert();
77130389Sle
78130389Sle	KASSERT(d != NULL, ("gv_save_config: null d"));
79130389Sle	KASSERT(sc != NULL, ("gv_save_config: null sc"));
80130389Sle
81130389Sle	if (cp == NULL) {
82130389Sle		gp = d->geom;
83130389Sle		KASSERT(gp != NULL, ("gv_save_config: null gp"));
84130389Sle		cp2 = LIST_FIRST(&gp->consumer);
85130389Sle		KASSERT(cp2 != NULL, ("gv_save_config: null cp2"));
86130389Sle	} else
87130389Sle		cp2 = cp;
88130389Sle
89130389Sle	vhdr = g_malloc(GV_HDR_LEN, M_WAITOK | M_ZERO);
90130389Sle	vhdr->magic = GV_MAGIC;
91130389Sle	vhdr->config_length = GV_CFG_LEN;
92130389Sle
93130389Sle	hdr = d->hdr;
94130389Sle	if (hdr == NULL) {
95130389Sle		printf("NULL hdr!!!\n");
96130389Sle		g_free(vhdr);
97130389Sle		return;
98130389Sle	}
99130389Sle	microtime(&hdr->label.last_update);
100130389Sle	bcopy(&hdr->label, &vhdr->label, sizeof(struct gv_label));
101130389Sle
102130389Sle	sb = sbuf_new(NULL, NULL, GV_CFG_LEN, SBUF_FIXEDLEN);
103130389Sle	gv_format_config(sc, sb, 1, NULL);
104130389Sle	sbuf_finish(sb);
105130389Sle
106130389Sle	error = g_access(cp2, 0, 1, 0);
107130389Sle	if (error) {
108130389Sle		printf("g_access failed: %d\n", error);
109130389Sle		sbuf_delete(sb);
110130389Sle		return;
111130389Sle	}
112130389Sle	g_topology_unlock();
113130389Sle
114130389Sle	do {
115130389Sle		error = g_write_data(cp2, GV_HDR_OFFSET, vhdr, GV_HDR_LEN);
116130389Sle		if (error) {
117130389Sle			printf("writing vhdr failed: %d", error);
118130389Sle			break;
119130389Sle		}
120130389Sle
121130389Sle		error = g_write_data(cp2, GV_CFG_OFFSET, sbuf_data(sb),
122130389Sle		    GV_CFG_LEN);
123130389Sle		if (error) {
124130389Sle			printf("writing first config copy failed: %d", error);
125130389Sle			break;
126130389Sle		}
127130389Sle
128130389Sle		error = g_write_data(cp2, GV_CFG_OFFSET + GV_CFG_LEN,
129130389Sle		    sbuf_data(sb), GV_CFG_LEN);
130130389Sle		if (error)
131130389Sle			printf("writing second config copy failed: %d", error);
132130389Sle	} while (0);
133130389Sle
134130389Sle	g_topology_lock();
135130389Sle	g_access(cp2, 0, -1, 0);
136130389Sle	sbuf_delete(sb);
137130389Sle	g_free(vhdr);
138130389Sle
139130389Sle	if (d->geom != NULL)
140130389Sle		gv_drive_modify(d);
141130389Sle}
142130389Sle
143130389Sle/* This resembles g_slice_access(). */
144130389Slestatic int
145130389Slegv_drive_access(struct g_provider *pp, int dr, int dw, int de)
146130389Sle{
147130389Sle	struct g_geom *gp;
148130389Sle	struct g_consumer *cp;
149130389Sle	struct g_provider *pp2;
150130389Sle	struct gv_drive *d;
151130389Sle	struct gv_sd *s, *s2;
152130389Sle	int error;
153130389Sle
154130389Sle	gp = pp->geom;
155130389Sle	cp = LIST_FIRST(&gp->consumer);
156130389Sle	KASSERT(cp != NULL, ("gv_drive_access: NULL cp"));
157130389Sle
158130389Sle	d = gp->softc;
159130389Sle
160130389Sle	s = pp->private;
161130389Sle	KASSERT(s != NULL, ("gv_drive_access: NULL s"));
162130389Sle
163130389Sle	LIST_FOREACH(s2, &d->subdisks, from_drive) {
164130389Sle		if (s == s2)
165130389Sle			continue;
166130389Sle		if (s->drive_offset + s->size <= s2->drive_offset)
167130389Sle			continue;
168130389Sle		if (s2->drive_offset + s2->size <= s->drive_offset)
169130389Sle			continue;
170130389Sle
171130389Sle		/* Overlap. */
172130389Sle		pp2 = s2->provider;
173130389Sle		KASSERT(s2 != NULL, ("gv_drive_access: NULL s2"));
174130389Sle		if ((pp->acw + dw) > 0 && pp2->ace > 0) {
175130389Sle			printf("FOOO: permission denied - e\n");
176130389Sle			return (EPERM);
177130389Sle		}
178130389Sle		if ((pp->ace + de) > 0 && pp2->acw > 0) {
179130389Sle			printf("FOOO: permission denied - w\n");
180130389Sle			return (EPERM);
181130389Sle		}
182130389Sle	}
183130389Sle
184130389Sle	/* On first open, grab an extra "exclusive" bit */
185130389Sle	if (cp->acr == 0 && cp->acw == 0 && cp->ace == 0)
186130389Sle		de++;
187130389Sle	/* ... and let go of it on last close */
188130389Sle	if ((cp->acr + dr) == 0 && (cp->acw + dw) == 0 && (cp->ace + de) == 1)
189130389Sle		de--;
190130389Sle	error = g_access(cp, dr, dw, de);
191130389Sle	if (error) {
192130389Sle		printf("FOOO: g_access failed: %d\n", error);
193130389Sle	}
194130389Sle	return (error);
195130389Sle}
196130389Sle
197130389Slestatic void
198130389Slegv_drive_start(struct bio *bp)
199130389Sle{
200130389Sle	struct bio *bp2;
201130389Sle	struct g_geom *gp;
202130389Sle	struct g_consumer *cp;
203130389Sle	struct g_provider *pp;
204130389Sle	struct gv_drive *d;
205130389Sle	struct gv_sd *s;
206130389Sle
207130389Sle	pp = bp->bio_to;
208130389Sle	gp = pp->geom;
209130389Sle	cp = LIST_FIRST(&gp->consumer);
210130389Sle	d = gp->softc;
211130389Sle	s = pp->private;
212130389Sle
213130389Sle	if ((s->state == GV_SD_DOWN) || (s->state == GV_SD_STALE)) {
214130389Sle		g_io_deliver(bp, ENXIO);
215130389Sle		return;
216130389Sle	}
217130389Sle
218130389Sle	switch(bp->bio_cmd) {
219130389Sle	case BIO_READ:
220130389Sle	case BIO_WRITE:
221130389Sle	case BIO_DELETE:
222130389Sle		if (bp->bio_offset > s->size) {
223130389Sle			g_io_deliver(bp, EINVAL); /* XXX: EWHAT ? */
224130389Sle			return;
225130389Sle		}
226130389Sle		bp2 = g_clone_bio(bp);
227130389Sle		if (bp2 == NULL) {
228130389Sle			g_io_deliver(bp, ENOMEM);
229130389Sle			return;
230130389Sle		}
231130389Sle		if (bp2->bio_offset + bp2->bio_length > s->size)
232130389Sle			bp2->bio_length = s->size - bp2->bio_offset;
233130389Sle		bp2->bio_done = g_std_done;
234130389Sle		bp2->bio_offset += s->drive_offset;
235130389Sle		g_io_request(bp2, cp);
236130389Sle		return;
237130389Sle
238130389Sle	case BIO_GETATTR:
239130389Sle		if (!strcmp("GEOM::kerneldump", bp->bio_attribute)) {
240130389Sle			struct g_kerneldump *gkd;
241130389Sle
242130389Sle			gkd = (struct g_kerneldump *)bp->bio_data;
243130389Sle			gkd->offset += s->drive_offset;
244130389Sle			if (gkd->length > s->size)
245130389Sle				gkd->length = s->size;
246130389Sle			/* now, pass it on downwards... */
247130389Sle		}
248130389Sle		bp2 = g_clone_bio(bp);
249130389Sle		if (bp2 == NULL) {
250130389Sle			g_io_deliver(bp, ENOMEM);
251130389Sle			return;
252130389Sle		}
253130389Sle		bp2->bio_done = g_std_done;
254130389Sle		g_io_request(bp2, cp);
255130389Sle		return;
256130389Sle
257130389Sle	default:
258130389Sle		g_io_deliver(bp, EOPNOTSUPP);
259130389Sle		return;
260130389Sle	}
261130389Sle}
262130389Sle
263130389Slestatic void
264130389Slegv_drive_orphan(struct g_consumer *cp)
265130389Sle{
266130389Sle	struct g_geom *gp;
267130597Sle	struct gv_drive *d;
268130597Sle	struct gv_sd *s;
269130389Sle	int error;
270130389Sle
271130389Sle	g_topology_assert();
272130389Sle	gp = cp->geom;
273130389Sle	g_trace(G_T_TOPOLOGY, "gv_drive_orphan(%s)", gp->name);
274130389Sle	if (cp->acr != 0 || cp->acw != 0 || cp->ace != 0)
275130389Sle		g_access(cp, -cp->acr, -cp->acw, -cp->ace);
276130389Sle	error = cp->provider->error;
277130389Sle	if (error == 0)
278130389Sle		error = ENXIO;
279130389Sle	g_detach(cp);
280130389Sle	g_destroy_consumer(cp);
281130389Sle	if (!LIST_EMPTY(&gp->consumer))
282130389Sle		return;
283130597Sle	d = gp->softc;
284130597Sle	printf("gvinum: lost drive '%s'\n", d->name);
285130597Sle	d->geom = NULL;
286130597Sle	LIST_FOREACH(s, &d->subdisks, from_drive) {
287130597Sle		s->provider = NULL;
288130597Sle		s->consumer = NULL;
289130597Sle	}
290130597Sle	gv_set_drive_state(d, GV_DRIVE_DOWN, GV_SETSTATE_FORCE);
291130597Sle	gp->softc = NULL;
292130389Sle	g_wither_geom(gp, error);
293130389Sle}
294130389Sle
295130389Slestatic struct g_geom *
296130389Slegv_drive_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
297130389Sle{
298130389Sle	struct g_geom *gp, *gp2;
299130389Sle	struct g_consumer *cp;
300130389Sle	struct gv_drive *d;
301130389Sle	struct gv_sd *s;
302130389Sle	struct gv_softc *sc;
303130389Sle	struct gv_freelist *fl;
304130389Sle	struct gv_hdr *vhdr;
305130389Sle	int error;
306130389Sle	char errstr[ERRBUFSIZ];
307130389Sle
308130389Sle	vhdr = NULL;
309130389Sle	d = NULL;
310130389Sle
311130389Sle	g_trace(G_T_TOPOLOGY, "gv_drive_taste(%s, %s)", mp->name, pp->name);
312130389Sle	g_topology_assert();
313130389Sle
314130389Sle	if (pp->sectorsize == 0)
315130389Sle		return(NULL);
316130389Sle
317130389Sle	/* Find the VINUM class and its associated geom. */
318130389Sle	gp2 = find_vinum_geom();
319130389Sle	if (gp2 == NULL)
320130389Sle		return (NULL);
321130389Sle	sc = gp2->softc;
322130389Sle
323130389Sle	gp = g_new_geomf(mp, "%s.vinumdrive", pp->name);
324130389Sle	gp->start = gv_drive_start;
325130389Sle	gp->spoiled = gv_drive_orphan;
326130389Sle	gp->orphan = gv_drive_orphan;
327130389Sle	gp->access = gv_drive_access;
328130389Sle	gp->start = gv_drive_start;
329130389Sle
330130389Sle	cp = g_new_consumer(gp);
331130389Sle	g_attach(cp, pp);
332130389Sle	error = g_access(cp, 1, 0, 0);
333130389Sle	if (error) {
334130389Sle		g_detach(cp);
335130389Sle		g_destroy_consumer(cp);
336130389Sle		g_destroy_geom(gp);
337130389Sle		return (NULL);
338130389Sle	}
339130389Sle
340130389Sle	g_topology_unlock();
341130389Sle
342130389Sle	/* Now check if the provided slice is a valid vinum drive. */
343130389Sle	do {
344130389Sle		vhdr = g_read_data(cp, GV_HDR_OFFSET, GV_HDR_LEN, &error);
345130389Sle		if (vhdr == NULL || error != 0)
346130389Sle			break;
347130389Sle		if (vhdr->magic != GV_MAGIC) {
348130389Sle			g_free(vhdr);
349130389Sle			break;
350130389Sle		}
351130389Sle
352130389Sle		/*
353130389Sle		 * We have found a valid vinum drive.  Let's see if it is
354130389Sle		 * already known in the configuration.
355130389Sle		 */
356130389Sle		g_topology_lock();
357130389Sle		g_access(cp, -1, 0, 0);
358130389Sle
359130389Sle		d = gv_find_drive(sc, vhdr->label.name);
360130389Sle
361130389Sle		/* We already know about this drive. */
362130389Sle		if (d != NULL) {
363130389Sle			bcopy(vhdr, d->hdr, sizeof(*vhdr));
364130389Sle
365130389Sle		/* This is a new drive. */
366130389Sle		} else {
367130389Sle			d = g_malloc(sizeof(*d), M_WAITOK | M_ZERO);
368130389Sle
369130389Sle			/* Initialize all needed variables. */
370130389Sle			d->size = pp->mediasize - GV_DATA_START;
371130389Sle			d->avail = d->size;
372130389Sle			d->hdr = vhdr;
373130389Sle			strncpy(d->name, vhdr->label.name, GV_MAXDRIVENAME);
374130389Sle			LIST_INIT(&d->subdisks);
375130389Sle			LIST_INIT(&d->freelist);
376130389Sle
377130389Sle			/* We also need a freelist entry. */
378130389Sle			fl = g_malloc(sizeof(*fl), M_WAITOK | M_ZERO);
379130389Sle			fl->offset = GV_DATA_START;
380130389Sle			fl->size = d->avail;
381130389Sle			LIST_INSERT_HEAD(&d->freelist, fl, freelist);
382130389Sle			d->freelist_entries = 1;
383130389Sle
384130389Sle			/* Save it into the main configuration. */
385130389Sle			LIST_INSERT_HEAD(&sc->drives, d, drive);
386130389Sle		}
387130389Sle
388130389Sle		gp->softc = d;
389130389Sle		d->geom = gp;
390130389Sle		strncpy(d->device, pp->name, GV_MAXDRIVENAME);
391130389Sle
392130389Sle		/*
393130389Sle		 * Find out which subdisks belong to this drive and crosslink
394130389Sle		 * them.
395130389Sle		 */
396130389Sle		LIST_FOREACH(s, &sc->subdisks, sd) {
397130389Sle			if (!strncmp(s->drive, d->name, GV_MAXDRIVENAME))
398130389Sle				/* XXX: errors ignored */
399130389Sle				gv_sd_to_drive(sc, d, s, errstr,
400130389Sle				    sizeof(errstr));
401130389Sle		}
402130389Sle
403130389Sle		/* This drive is now up for sure. */
404130389Sle		gv_set_drive_state(d, GV_DRIVE_UP, 0);
405130389Sle
406130389Sle		/*
407130389Sle		 * If there are subdisks on this drive, we need to create
408130389Sle		 * providers for them.
409130389Sle		 */
410130389Sle		if (d->sdcount)
411130389Sle			gv_drive_modify(d);
412130389Sle
413130389Sle		return (gp);
414130389Sle
415130389Sle	} while (0);
416130389Sle
417130389Sle	g_topology_lock();
418130389Sle	g_access(cp, -1, 0, 0);
419130389Sle
420130389Sle	g_detach(cp);
421130389Sle	g_destroy_consumer(cp);
422130389Sle	g_free(gp->softc);
423130389Sle	g_destroy_geom(gp);
424130389Sle	return (NULL);
425130389Sle}
426130389Sle
427130389Sle/*
428130389Sle * Modify the providers for the given drive 'd'.  It is assumed that the
429130389Sle * subdisk list of 'd' is already correctly set up.
430130389Sle */
431130389Slevoid
432130389Slegv_drive_modify(struct gv_drive *d)
433130389Sle{
434130389Sle	struct g_geom *gp;
435130389Sle	struct g_consumer *cp;
436130389Sle	struct g_provider *pp, *pp2;
437130389Sle	struct gv_sd *s;
438130389Sle	int nsd;
439130389Sle
440130389Sle	KASSERT(d != NULL, ("gv_drive_modify: null d"));
441130389Sle	gp = d->geom;
442130389Sle	KASSERT(gp != NULL, ("gv_drive_modify: null gp"));
443130389Sle	cp = LIST_FIRST(&gp->consumer);
444130389Sle	KASSERT(cp != NULL, ("gv_drive_modify: null cp"));
445130389Sle	pp = cp->provider;
446130389Sle	KASSERT(pp != NULL, ("gv_drive_modify: null pp"));
447130389Sle
448130389Sle	g_topology_assert();
449130389Sle
450130389Sle	nsd = 0;
451130389Sle	LIST_FOREACH(s, &d->subdisks, from_drive) {
452130389Sle		/* This subdisk already has a provider. */
453130389Sle		if (s->provider != NULL)
454130389Sle			continue;
455130389Sle		pp2 = g_new_providerf(gp, "gvinum/sd/%s", s->name);
456130389Sle		pp2->mediasize = s->size;
457130389Sle		pp2->sectorsize = pp->sectorsize;
458130389Sle		g_error_provider(pp2, 0);
459130389Sle		s->provider = pp2;
460130389Sle		pp2->private = s;
461130389Sle	}
462130389Sle}
463130389Sle
464130389Slestatic int
465130389Slegv_drive_destroy_geom(struct gctl_req *req, struct g_class *mp,
466130389Sle    struct g_geom *gp)
467130389Sle{
468130389Sle	/*struct gv_drive *d;*/
469130389Sle
470130389Sle	g_trace(G_T_TOPOLOGY, "gv_drive_destroy_geom: %s", gp->name);
471130389Sle	g_topology_assert();
472130389Sle
473130389Sle	/* g_free(sc); */
474130389Sle	g_wither_geom(gp, ENXIO);
475130389Sle	return (0);
476130389Sle}
477130389Sle
478130389Sle#define	VINUMDRIVE_CLASS_NAME "VINUMDRIVE"
479130389Sle
480130389Slestatic struct g_class g_vinum_drive_class = {
481130389Sle	.name = VINUMDRIVE_CLASS_NAME,
482130389Sle	.taste = gv_drive_taste,
483130389Sle	.destroy_geom = gv_drive_destroy_geom
484130389Sle};
485130389Sle
486130389SleDECLARE_GEOM_CLASS(g_vinum_drive_class, g_vinum_drive);
487