geom_vinum_drive.c revision 183514
1/*-
2 * Copyright (c) 2004, 2005 Lukas Ertl
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/geom/vinum/geom_vinum_drive.c 183514 2008-10-01 14:50:36Z lulf $");
29
30#include <sys/param.h>
31#include <sys/bio.h>
32#include <sys/errno.h>
33#include <sys/endian.h>
34#include <sys/conf.h>
35#include <sys/kernel.h>
36#include <sys/kthread.h>
37#include <sys/libkern.h>
38#include <sys/lock.h>
39#include <sys/malloc.h>
40#include <sys/module.h>
41#include <sys/mutex.h>
42#include <sys/sbuf.h>
43#include <sys/systm.h>
44#include <sys/time.h>
45#include <sys/vimage.h>
46
47#include <geom/geom.h>
48#include <geom/vinum/geom_vinum_var.h>
49#include <geom/vinum/geom_vinum.h>
50#include <geom/vinum/geom_vinum_share.h>
51
52#define GV_LEGACY_I386	0
53#define GV_LEGACY_AMD64 1
54#define GV_LEGACY_SPARC64 2
55#define GV_LEGACY_POWERPC 3
56
57static void	gv_drive_dead(void *, int);
58static void	gv_drive_worker(void *);
59static int	gv_legacy_header_type(uint8_t *, int);
60
61/*
62 * Here are the "offset (size)" for the various struct gv_hdr fields,
63 * for the legacy i386 (or 32-bit powerpc), legacy amd64 (or sparc64), and
64 * current (cpu & endian agnostic) versions of the on-disk format of the vinum
65 * header structure:
66 *
67 *       i386    amd64   current   field
68 *     -------- -------- --------  -----
69 *       0 ( 8)   0 ( 8)   0 ( 8)  magic
70 *       8 ( 4)   8 ( 8)   8 ( 8)  config_length
71 *      12 (32)  16 (32)  16 (32)  label.sysname
72 *      44 (32)  48 (32)  48 (32)  label.name
73 *      76 ( 4)  80 ( 8)  80 ( 8)  label.date_of_birth.tv_sec
74 *      80 ( 4)  88 ( 8)  88 ( 8)  label.date_of_birth.tv_usec
75 *      84 ( 4)  96 ( 8)  96 ( 8)  label.last_update.tv_sec
76 *      88 ( 4) 104 ( 8) 104 ( 8)  label.last_update.tv_usec
77 *      92 ( 8) 112 ( 8) 112 ( 8)  label.drive_size
78 *     ======== ======== ========
79 *     100      120      120       total size
80 *
81 * NOTE: i386 and amd64 formats are stored as little-endian; the current
82 * format uses big-endian (network order).
83 */
84
85
86/* Checks for legacy format depending on platform. */
87static int
88gv_legacy_header_type(uint8_t *hdr, int bigendian)
89{
90	uint32_t *i32;
91	int arch_32, arch_64, i;
92
93	/* Set arch according to endianess. */
94	if (bigendian) {
95		arch_32 = GV_LEGACY_POWERPC;
96		arch_64 = GV_LEGACY_SPARC64;
97	} else {
98		arch_32 = GV_LEGACY_I386;
99		arch_64 = GV_LEGACY_AMD64;
100	}
101
102	/* if non-empty hostname overlaps 64-bit config_length */
103	i32 = (uint32_t *)(hdr + 12);
104	if (*i32 != 0)
105		return (arch_32);
106	/* check for non-empty hostname */
107	if (hdr[16] != 0)
108		return (arch_64);
109	/* check bytes past 32-bit structure */
110	for (i = 100; i < 120; i++)
111		if (hdr[i] != 0)
112			return (arch_32);
113	/* check for overlapping timestamp */
114	i32 = (uint32_t *)(hdr + 84);
115
116	if (*i32 == 0)
117		return (arch_64);
118	return (arch_32);
119}
120
121/*
122 * Read the header while taking magic number into account, and write it to
123 * destination pointer.
124 */
125int
126gv_read_header(struct g_consumer *cp, struct gv_hdr *m_hdr)
127{
128	struct g_provider *pp;
129	uint64_t magic_machdep;
130	uint8_t *d_hdr;
131	int be, off;
132
133#define GV_GET32(endian)					\
134		endian##32toh(*((uint32_t *)&d_hdr[off]));	\
135		off += 4
136#define GV_GET64(endian)					\
137		endian##64toh(*((uint64_t *)&d_hdr[off]));	\
138		off += 8
139
140	KASSERT(m_hdr != NULL, ("gv_read_header: null m_hdr"));
141	KASSERT(cp != NULL, ("gv_read_header: null cp"));
142	pp = cp->provider;
143	KASSERT(pp != NULL, ("gv_read_header: null pp"));
144
145	d_hdr = g_read_data(cp, GV_HDR_OFFSET, pp->sectorsize, NULL);
146	if (d_hdr == NULL)
147		return (-1);
148	off = 0;
149	m_hdr->magic = GV_GET64(be);
150	magic_machdep = *((uint64_t *)&d_hdr[0]);
151	/*
152	 * The big endian machines will have a reverse of GV_OLD_MAGIC, so we
153	 * need to decide if we are running on a big endian machine as well as
154	 * checking the magic against the reverse of GV_OLD_MAGIC.
155	 */
156	be = (m_hdr->magic == magic_machdep);
157	if (m_hdr->magic == GV_MAGIC) {
158		m_hdr->config_length = GV_GET64(be);
159		off = 16;
160		bcopy(d_hdr + off, m_hdr->label.sysname, GV_HOSTNAME_LEN);
161		off += GV_HOSTNAME_LEN;
162		bcopy(d_hdr + off, m_hdr->label.name, GV_MAXDRIVENAME);
163		off += GV_MAXDRIVENAME;
164		m_hdr->label.date_of_birth.tv_sec = GV_GET64(be);
165		m_hdr->label.date_of_birth.tv_usec = GV_GET64(be);
166		m_hdr->label.last_update.tv_sec = GV_GET64(be);
167		m_hdr->label.last_update.tv_usec = GV_GET64(be);
168		m_hdr->label.drive_size = GV_GET64(be);
169	} else if (m_hdr->magic != GV_OLD_MAGIC &&
170	    m_hdr->magic != le64toh(GV_OLD_MAGIC)) {
171		/* Not a gvinum drive. */
172		g_free(d_hdr);
173		return (-1);
174	} else if (gv_legacy_header_type(d_hdr, be) == GV_LEGACY_SPARC64) {
175		printf("VINUM: detected legacy sparc64 header\n");
176		m_hdr->magic = GV_MAGIC;
177		/* Legacy sparc64 on-disk header */
178		m_hdr->config_length = GV_GET64(be);
179		bcopy(d_hdr + 16, m_hdr->label.sysname, GV_HOSTNAME_LEN);
180		off += GV_HOSTNAME_LEN;
181		bcopy(d_hdr + 48, m_hdr->label.name, GV_MAXDRIVENAME);
182		off += GV_MAXDRIVENAME;
183		m_hdr->label.date_of_birth.tv_sec = GV_GET64(be);
184		m_hdr->label.date_of_birth.tv_usec = GV_GET64(be);
185		m_hdr->label.last_update.tv_sec = GV_GET64(be);
186		m_hdr->label.last_update.tv_usec = GV_GET64(be);
187		m_hdr->label.drive_size = GV_GET64(be);
188	} else if (gv_legacy_header_type(d_hdr, be) == GV_LEGACY_POWERPC) {
189		printf("VINUM: detected legacy PowerPC header\n");
190		m_hdr->magic = GV_MAGIC;
191		/* legacy 32-bit big endian on-disk header */
192		m_hdr->config_length = GV_GET32(be);
193		bcopy(d_hdr + off, m_hdr->label.sysname, GV_HOSTNAME_LEN);
194		off += GV_HOSTNAME_LEN;
195		bcopy(d_hdr + off, m_hdr->label.name, GV_MAXDRIVENAME);
196		off += GV_MAXDRIVENAME;
197		m_hdr->label.date_of_birth.tv_sec = GV_GET32(be);
198		m_hdr->label.date_of_birth.tv_usec = GV_GET32(be);
199		m_hdr->label.last_update.tv_sec = GV_GET32(be);
200		m_hdr->label.last_update.tv_usec = GV_GET32(be);
201		m_hdr->label.drive_size = GV_GET64(be);
202	} else if (gv_legacy_header_type(d_hdr, be) == GV_LEGACY_I386) {
203		printf("VINUM: detected legacy i386 header\n");
204		m_hdr->magic = GV_MAGIC;
205		/* legacy i386 on-disk header */
206		m_hdr->config_length = GV_GET32(le);
207		bcopy(d_hdr + off, m_hdr->label.sysname, GV_HOSTNAME_LEN);
208		off += GV_HOSTNAME_LEN;
209		bcopy(d_hdr + off, m_hdr->label.name, GV_MAXDRIVENAME);
210		off += GV_MAXDRIVENAME;
211		m_hdr->label.date_of_birth.tv_sec = GV_GET32(le);
212		m_hdr->label.date_of_birth.tv_usec = GV_GET32(le);
213		m_hdr->label.last_update.tv_sec = GV_GET32(le);
214		m_hdr->label.last_update.tv_usec = GV_GET32(le);
215		m_hdr->label.drive_size = GV_GET64(le);
216	} else {
217		printf("VINUM: detected legacy amd64 header\n");
218		m_hdr->magic = GV_MAGIC;
219		/* legacy amd64 on-disk header */
220		m_hdr->config_length = GV_GET64(le);
221		bcopy(d_hdr + 16, m_hdr->label.sysname, GV_HOSTNAME_LEN);
222		off += GV_HOSTNAME_LEN;
223		bcopy(d_hdr + 48, m_hdr->label.name, GV_MAXDRIVENAME);
224		off += GV_MAXDRIVENAME;
225		m_hdr->label.date_of_birth.tv_sec = GV_GET64(le);
226		m_hdr->label.date_of_birth.tv_usec = GV_GET64(le);
227		m_hdr->label.last_update.tv_sec = GV_GET64(le);
228		m_hdr->label.last_update.tv_usec = GV_GET64(le);
229		m_hdr->label.drive_size = GV_GET64(le);
230	}
231
232	g_free(d_hdr);
233	return (0);
234}
235
236/* Write out the gvinum header. */
237int
238gv_write_header(struct g_consumer *cp, struct gv_hdr *m_hdr)
239{
240	uint8_t d_hdr[GV_HDR_LEN];
241	int off, ret;
242
243#define GV_SET32BE(field)					\
244	do {							\
245		*((uint32_t *)&d_hdr[off]) = htobe32(field);	\
246		off += 4;					\
247	} while (0)
248#define GV_SET64BE(field)					\
249	do {							\
250		*((uint64_t *)&d_hdr[off]) = htobe64(field);	\
251		off += 8;					\
252	} while (0)
253
254	KASSERT(m_hdr != NULL, ("gv_write_header: null m_hdr"));
255
256	off = 0;
257	memset(d_hdr, 0, GV_HDR_LEN);
258	GV_SET64BE(m_hdr->magic);
259	GV_SET64BE(m_hdr->config_length);
260	off = 16;
261	bcopy(m_hdr->label.sysname, d_hdr + off, GV_HOSTNAME_LEN);
262	off += GV_HOSTNAME_LEN;
263	bcopy(m_hdr->label.name, d_hdr + off, GV_MAXDRIVENAME);
264	off += GV_MAXDRIVENAME;
265	GV_SET64BE(m_hdr->label.date_of_birth.tv_sec);
266	GV_SET64BE(m_hdr->label.date_of_birth.tv_usec);
267	GV_SET64BE(m_hdr->label.last_update.tv_sec);
268	GV_SET64BE(m_hdr->label.last_update.tv_usec);
269	GV_SET64BE(m_hdr->label.drive_size);
270
271	ret = g_write_data(cp, GV_HDR_OFFSET, d_hdr, GV_HDR_LEN);
272	return (ret);
273}
274
275void
276gv_config_new_drive(struct gv_drive *d)
277{
278	struct gv_hdr *vhdr;
279	struct gv_freelist *fl;
280
281	KASSERT(d != NULL, ("config_new_drive: NULL d"));
282
283	vhdr = g_malloc(sizeof(*vhdr), M_WAITOK | M_ZERO);
284	vhdr->magic = GV_MAGIC;
285	vhdr->config_length = GV_CFG_LEN;
286
287	mtx_lock(&hostname_mtx);
288	bcopy(G_hostname, vhdr->label.sysname, GV_HOSTNAME_LEN);
289	mtx_unlock(&hostname_mtx);
290	strncpy(vhdr->label.name, d->name, GV_MAXDRIVENAME);
291	microtime(&vhdr->label.date_of_birth);
292
293	d->hdr = vhdr;
294
295	LIST_INIT(&d->subdisks);
296	LIST_INIT(&d->freelist);
297
298	fl = g_malloc(sizeof(struct gv_freelist), M_WAITOK | M_ZERO);
299	fl->offset = GV_DATA_START;
300	fl->size = d->avail;
301	LIST_INSERT_HEAD(&d->freelist, fl, freelist);
302	d->freelist_entries = 1;
303
304	d->bqueue = g_malloc(sizeof(struct bio_queue_head), M_WAITOK | M_ZERO);
305	bioq_init(d->bqueue);
306	mtx_init(&d->bqueue_mtx, "gv_drive", NULL, MTX_DEF);
307	kproc_create(gv_drive_worker, d, NULL, 0, 0, "gv_d %s", d->name);
308	d->flags |= GV_DRIVE_THREAD_ACTIVE;
309}
310
311void
312gv_save_config_all(struct gv_softc *sc)
313{
314	struct gv_drive *d;
315
316	g_topology_assert();
317
318	LIST_FOREACH(d, &sc->drives, drive) {
319		if (d->geom == NULL)
320			continue;
321		gv_save_config(NULL, d, sc);
322	}
323}
324
325/* Save the vinum configuration back to disk. */
326void
327gv_save_config(struct g_consumer *cp, struct gv_drive *d, struct gv_softc *sc)
328{
329	struct g_geom *gp;
330	struct g_consumer *cp2;
331	struct gv_hdr *vhdr, *hdr;
332	struct sbuf *sb;
333	int error;
334
335	g_topology_assert();
336
337	KASSERT(d != NULL, ("gv_save_config: null d"));
338	KASSERT(sc != NULL, ("gv_save_config: null sc"));
339
340	/*
341	 * We can't save the config on a drive that isn't up, but drives that
342	 * were just created aren't officially up yet, so we check a special
343	 * flag.
344	 */
345	if ((d->state != GV_DRIVE_UP) && !(d->flags && GV_DRIVE_NEWBORN))
346		return;
347
348	if (cp == NULL) {
349		gp = d->geom;
350		KASSERT(gp != NULL, ("gv_save_config: null gp"));
351		cp2 = LIST_FIRST(&gp->consumer);
352		KASSERT(cp2 != NULL, ("gv_save_config: null cp2"));
353	} else
354		cp2 = cp;
355
356	vhdr = g_malloc(GV_HDR_LEN, M_WAITOK | M_ZERO);
357	vhdr->magic = GV_MAGIC;
358	vhdr->config_length = GV_CFG_LEN;
359
360	hdr = d->hdr;
361	if (hdr == NULL) {
362		printf("GEOM_VINUM: drive %s has NULL hdr\n", d->name);
363		g_free(vhdr);
364		return;
365	}
366	microtime(&hdr->label.last_update);
367	bcopy(&hdr->label, &vhdr->label, sizeof(struct gv_label));
368
369	sb = sbuf_new(NULL, NULL, GV_CFG_LEN, SBUF_FIXEDLEN);
370	gv_format_config(sc, sb, 1, NULL);
371	sbuf_finish(sb);
372
373	error = g_access(cp2, 0, 1, 0);
374	if (error) {
375		printf("GEOM_VINUM: g_access failed on drive %s, errno %d\n",
376		    d->name, error);
377		sbuf_delete(sb);
378		g_free(vhdr);
379		return;
380	}
381	g_topology_unlock();
382
383	do {
384		error = gv_write_header(cp2, vhdr);
385		if (error) {
386			printf("GEOM_VINUM: writing vhdr failed on drive %s, "
387			    "errno %d", d->name, error);
388			break;
389		}
390
391		error = g_write_data(cp2, GV_CFG_OFFSET, sbuf_data(sb),
392		    GV_CFG_LEN);
393		if (error) {
394			printf("GEOM_VINUM: writing first config copy failed "
395			    "on drive %s, errno %d", d->name, error);
396			break;
397		}
398
399		error = g_write_data(cp2, GV_CFG_OFFSET + GV_CFG_LEN,
400		    sbuf_data(sb), GV_CFG_LEN);
401		if (error)
402			printf("GEOM_VINUM: writing second config copy failed "
403			    "on drive %s, errno %d", d->name, error);
404	} while (0);
405
406	g_topology_lock();
407	g_access(cp2, 0, -1, 0);
408	sbuf_delete(sb);
409	g_free(vhdr);
410
411	if (d->geom != NULL)
412		gv_drive_modify(d);
413}
414
415/* This resembles g_slice_access(). */
416static int
417gv_drive_access(struct g_provider *pp, int dr, int dw, int de)
418{
419	struct g_geom *gp;
420	struct g_consumer *cp;
421	struct g_provider *pp2;
422	struct gv_drive *d;
423	struct gv_sd *s, *s2;
424	int error;
425
426	gp = pp->geom;
427	cp = LIST_FIRST(&gp->consumer);
428	if (cp == NULL)
429		return (0);
430
431	d = gp->softc;
432	if (d == NULL)
433		return (0);
434
435	s = pp->private;
436	KASSERT(s != NULL, ("gv_drive_access: NULL s"));
437
438	LIST_FOREACH(s2, &d->subdisks, from_drive) {
439		if (s == s2)
440			continue;
441		if (s->drive_offset + s->size <= s2->drive_offset)
442			continue;
443		if (s2->drive_offset + s2->size <= s->drive_offset)
444			continue;
445
446		/* Overlap. */
447		pp2 = s2->provider;
448		KASSERT(s2 != NULL, ("gv_drive_access: NULL s2"));
449		if ((pp->acw + dw) > 0 && pp2->ace > 0)
450			return (EPERM);
451		if ((pp->ace + de) > 0 && pp2->acw > 0)
452			return (EPERM);
453	}
454
455	error = g_access(cp, dr, dw, de);
456	return (error);
457}
458
459static void
460gv_drive_done(struct bio *bp)
461{
462	struct gv_drive *d;
463
464	/* Put the BIO on the worker queue again. */
465	d = bp->bio_from->geom->softc;
466	bp->bio_cflags |= GV_BIO_DONE;
467	mtx_lock(&d->bqueue_mtx);
468	bioq_insert_tail(d->bqueue, bp);
469	wakeup(d);
470	mtx_unlock(&d->bqueue_mtx);
471}
472
473
474static void
475gv_drive_start(struct bio *bp)
476{
477	struct gv_drive *d;
478	struct gv_sd *s;
479
480	switch (bp->bio_cmd) {
481	case BIO_READ:
482	case BIO_WRITE:
483	case BIO_DELETE:
484		break;
485	case BIO_GETATTR:
486	default:
487		g_io_deliver(bp, EOPNOTSUPP);
488		return;
489	}
490
491	s = bp->bio_to->private;
492	if ((s->state == GV_SD_DOWN) || (s->state == GV_SD_STALE)) {
493		g_io_deliver(bp, ENXIO);
494		return;
495	}
496
497	d = bp->bio_to->geom->softc;
498
499	/*
500	 * Put the BIO on the worker queue, where the worker thread will pick
501	 * it up.
502	 */
503	mtx_lock(&d->bqueue_mtx);
504	bioq_disksort(d->bqueue, bp);
505	wakeup(d);
506	mtx_unlock(&d->bqueue_mtx);
507
508}
509
510static void
511gv_drive_worker(void *arg)
512{
513	struct bio *bp, *cbp;
514	struct g_geom *gp;
515	struct g_provider *pp;
516	struct gv_drive *d;
517	struct gv_sd *s;
518	int error;
519
520	d = arg;
521
522	mtx_lock(&d->bqueue_mtx);
523	for (;;) {
524		/* We were signaled to exit. */
525		if (d->flags & GV_DRIVE_THREAD_DIE)
526			break;
527
528		/* Take the first BIO from out queue. */
529		bp = bioq_takefirst(d->bqueue);
530		if (bp == NULL) {
531			msleep(d, &d->bqueue_mtx, PRIBIO, "-", hz/10);
532			continue;
533 		}
534		mtx_unlock(&d->bqueue_mtx);
535
536		pp = bp->bio_to;
537		gp = pp->geom;
538
539		/* Completed request. */
540		if (bp->bio_cflags & GV_BIO_DONE) {
541			error = bp->bio_error;
542
543			/* Deliver the original request. */
544			g_std_done(bp);
545
546			/* The request had an error, we need to clean up. */
547			if (error != 0) {
548				g_topology_lock();
549				gv_set_drive_state(d, GV_DRIVE_DOWN,
550				    GV_SETSTATE_FORCE | GV_SETSTATE_CONFIG);
551				g_topology_unlock();
552				g_post_event(gv_drive_dead, d, M_WAITOK, d,
553				    NULL);
554			}
555
556		/* New request, needs to be sent downwards. */
557		} else {
558			s = pp->private;
559
560			if ((s->state == GV_SD_DOWN) ||
561			    (s->state == GV_SD_STALE)) {
562				g_io_deliver(bp, ENXIO);
563				mtx_lock(&d->bqueue_mtx);
564				continue;
565			}
566			if (bp->bio_offset > s->size) {
567				g_io_deliver(bp, EINVAL);
568				mtx_lock(&d->bqueue_mtx);
569				continue;
570			}
571
572			cbp = g_clone_bio(bp);
573			if (cbp == NULL) {
574				g_io_deliver(bp, ENOMEM);
575				mtx_lock(&d->bqueue_mtx);
576				continue;
577			}
578			if (cbp->bio_offset + cbp->bio_length > s->size)
579				cbp->bio_length = s->size -
580				    cbp->bio_offset;
581			cbp->bio_done = gv_drive_done;
582			cbp->bio_offset += s->drive_offset;
583			g_io_request(cbp, LIST_FIRST(&gp->consumer));
584		}
585
586		mtx_lock(&d->bqueue_mtx);
587	}
588
589	while ((bp = bioq_takefirst(d->bqueue)) != NULL) {
590		mtx_unlock(&d->bqueue_mtx);
591		if (bp->bio_cflags & GV_BIO_DONE)
592			g_std_done(bp);
593		else
594			g_io_deliver(bp, ENXIO);
595		mtx_lock(&d->bqueue_mtx);
596	}
597	mtx_unlock(&d->bqueue_mtx);
598	d->flags |= GV_DRIVE_THREAD_DEAD;
599
600	kproc_exit(ENXIO);
601}
602
603
604static void
605gv_drive_orphan(struct g_consumer *cp)
606{
607	struct g_geom *gp;
608	struct gv_drive *d;
609
610	g_topology_assert();
611	gp = cp->geom;
612	g_trace(G_T_TOPOLOGY, "gv_drive_orphan(%s)", gp->name);
613	d = gp->softc;
614	if (d != NULL) {
615		gv_set_drive_state(d, GV_DRIVE_DOWN,
616		    GV_SETSTATE_FORCE | GV_SETSTATE_CONFIG);
617		g_post_event(gv_drive_dead, d, M_WAITOK, d, NULL);
618	} else
619		g_wither_geom(gp, ENXIO);
620}
621
622static struct g_geom *
623gv_drive_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
624{
625	struct g_geom *gp, *gp2;
626	struct g_consumer *cp;
627	struct gv_drive *d;
628	struct gv_sd *s;
629	struct gv_softc *sc;
630	struct gv_freelist *fl;
631	struct gv_hdr *vhdr;
632	int error;
633	char *buf, errstr[ERRBUFSIZ];
634
635	vhdr = NULL;
636	d = NULL;
637
638	g_trace(G_T_TOPOLOGY, "gv_drive_taste(%s, %s)", mp->name, pp->name);
639	g_topology_assert();
640
641	/* Find the VINUM class and its associated geom. */
642	gp2 = find_vinum_geom();
643	if (gp2 == NULL)
644		return (NULL);
645	sc = gp2->softc;
646
647	gp = g_new_geomf(mp, "%s.vinumdrive", pp->name);
648	gp->start = gv_drive_start;
649	gp->orphan = gv_drive_orphan;
650	gp->access = gv_drive_access;
651	gp->start = gv_drive_start;
652
653	cp = g_new_consumer(gp);
654	g_attach(cp, pp);
655	error = g_access(cp, 1, 0, 0);
656	if (error) {
657		g_detach(cp);
658		g_destroy_consumer(cp);
659		g_destroy_geom(gp);
660		return (NULL);
661	}
662
663	g_topology_unlock();
664
665	/* Now check if the provided slice is a valid vinum drive. */
666	do {
667		vhdr = g_malloc(GV_HDR_LEN, M_WAITOK | M_ZERO);
668		error = gv_read_header(cp, vhdr);
669		if (error) {
670			g_free(vhdr);
671			break;
672		}
673
674		/* A valid vinum drive, let's parse the on-disk information. */
675		buf = g_read_data(cp, GV_CFG_OFFSET, GV_CFG_LEN, NULL);
676		if (buf == NULL) {
677			g_free(vhdr);
678			break;
679		}
680		g_topology_lock();
681		gv_parse_config(sc, buf, 1);
682		g_free(buf);
683
684		/*
685		 * Let's see if this drive is already known in the
686		 * configuration.
687		 */
688		d = gv_find_drive(sc, vhdr->label.name);
689
690		/* We already know about this drive. */
691		if (d != NULL) {
692			/* Check if this drive already has a geom. */
693			if (d->geom != NULL) {
694				g_topology_unlock();
695				g_free(vhdr);
696				break;
697			}
698			bcopy(vhdr, d->hdr, sizeof(*vhdr));
699			g_free(vhdr);
700
701		/* This is a new drive. */
702		} else {
703			d = g_malloc(sizeof(*d), M_WAITOK | M_ZERO);
704
705			/* Initialize all needed variables. */
706			d->size = pp->mediasize - GV_DATA_START;
707			d->avail = d->size;
708			d->hdr = vhdr;
709			strncpy(d->name, vhdr->label.name, GV_MAXDRIVENAME);
710			LIST_INIT(&d->subdisks);
711			LIST_INIT(&d->freelist);
712
713			/* We also need a freelist entry. */
714			fl = g_malloc(sizeof(*fl), M_WAITOK | M_ZERO);
715			fl->offset = GV_DATA_START;
716			fl->size = d->avail;
717			LIST_INSERT_HEAD(&d->freelist, fl, freelist);
718			d->freelist_entries = 1;
719
720			/* Save it into the main configuration. */
721			LIST_INSERT_HEAD(&sc->drives, d, drive);
722		}
723
724		/*
725		 * Create bio queue, queue mutex and a worker thread, if
726		 * necessary.
727		 */
728		if (d->bqueue == NULL) {
729			d->bqueue = g_malloc(sizeof(struct bio_queue_head),
730			    M_WAITOK | M_ZERO);
731			bioq_init(d->bqueue);
732		}
733		if (mtx_initialized(&d->bqueue_mtx) == 0)
734			mtx_init(&d->bqueue_mtx, "gv_drive", NULL, MTX_DEF);
735
736		if (!(d->flags & GV_DRIVE_THREAD_ACTIVE)) {
737			kproc_create(gv_drive_worker, d, NULL, 0, 0,
738			    "gv_d %s", d->name);
739			d->flags |= GV_DRIVE_THREAD_ACTIVE;
740		}
741
742		g_access(cp, -1, 0, 0);
743
744		gp->softc = d;
745		d->geom = gp;
746		d->vinumconf = sc;
747		strncpy(d->device, pp->name, GV_MAXDRIVENAME);
748
749		/*
750		 * Find out which subdisks belong to this drive and crosslink
751		 * them.
752		 */
753		LIST_FOREACH(s, &sc->subdisks, sd) {
754			if (!strncmp(s->drive, d->name, GV_MAXDRIVENAME))
755				/* XXX: errors ignored */
756				gv_sd_to_drive(sc, d, s, errstr,
757				    sizeof(errstr));
758		}
759
760		/* This drive is now up for sure. */
761		gv_set_drive_state(d, GV_DRIVE_UP, 0);
762
763		/*
764		 * If there are subdisks on this drive, we need to create
765		 * providers for them.
766		 */
767		if (d->sdcount)
768			gv_drive_modify(d);
769
770		return (gp);
771
772	} while (0);
773
774	g_topology_lock();
775	g_access(cp, -1, 0, 0);
776
777	g_detach(cp);
778	g_destroy_consumer(cp);
779	g_destroy_geom(gp);
780	return (NULL);
781}
782
783/*
784 * Modify the providers for the given drive 'd'.  It is assumed that the
785 * subdisk list of 'd' is already correctly set up.
786 */
787void
788gv_drive_modify(struct gv_drive *d)
789{
790	struct g_geom *gp;
791	struct g_consumer *cp;
792	struct g_provider *pp, *pp2;
793	struct gv_sd *s;
794
795	KASSERT(d != NULL, ("gv_drive_modify: null d"));
796	gp = d->geom;
797	KASSERT(gp != NULL, ("gv_drive_modify: null gp"));
798	cp = LIST_FIRST(&gp->consumer);
799	KASSERT(cp != NULL, ("gv_drive_modify: null cp"));
800	pp = cp->provider;
801	KASSERT(pp != NULL, ("gv_drive_modify: null pp"));
802
803	g_topology_assert();
804
805	LIST_FOREACH(s, &d->subdisks, from_drive) {
806		/* This subdisk already has a provider. */
807		if (s->provider != NULL)
808			continue;
809		pp2 = g_new_providerf(gp, "gvinum/sd/%s", s->name);
810		pp2->mediasize = s->size;
811		pp2->sectorsize = pp->sectorsize;
812		g_error_provider(pp2, 0);
813		s->provider = pp2;
814		pp2->private = s;
815	}
816}
817
818static void
819gv_drive_dead(void *arg, int flag)
820{
821	struct g_geom *gp;
822	struct g_consumer *cp;
823	struct gv_drive *d;
824	struct gv_sd *s;
825
826	g_topology_assert();
827	KASSERT(arg != NULL, ("gv_drive_dead: NULL arg"));
828
829	if (flag == EV_CANCEL)
830		return;
831
832	d = arg;
833	if (d->state != GV_DRIVE_DOWN)
834		return;
835
836	g_trace(G_T_TOPOLOGY, "gv_drive_dead(%s)", d->name);
837
838	gp = d->geom;
839	if (gp == NULL)
840		return;
841
842	LIST_FOREACH(cp, &gp->consumer, consumer) {
843		if (cp->nstart != cp->nend) {
844			printf("GEOM_VINUM: dead drive '%s' has still "
845			    "active requests, can't detach consumer\n",
846			    d->name);
847			g_post_event(gv_drive_dead, d, M_WAITOK, d,
848			    NULL);
849			return;
850		}
851		if (cp->acr != 0 || cp->acw != 0 || cp->ace != 0)
852			g_access(cp, -cp->acr, -cp->acw, -cp->ace);
853	}
854
855	printf("GEOM_VINUM: lost drive '%s'\n", d->name);
856	d->geom = NULL;
857	LIST_FOREACH(s, &d->subdisks, from_drive) {
858		s->provider = NULL;
859		s->consumer = NULL;
860	}
861	gv_kill_drive_thread(d);
862	gp->softc = NULL;
863	g_wither_geom(gp, ENXIO);
864}
865
866static int
867gv_drive_destroy_geom(struct gctl_req *req, struct g_class *mp,
868    struct g_geom *gp)
869{
870	struct gv_drive *d;
871
872	g_trace(G_T_TOPOLOGY, "gv_drive_destroy_geom: %s", gp->name);
873	g_topology_assert();
874
875	d = gp->softc;
876	gv_kill_drive_thread(d);
877
878	g_wither_geom(gp, ENXIO);
879	return (0);
880}
881
882#define	VINUMDRIVE_CLASS_NAME "VINUMDRIVE"
883
884static struct g_class g_vinum_drive_class = {
885	.name = VINUMDRIVE_CLASS_NAME,
886	.version = G_VERSION,
887	.taste = gv_drive_taste,
888	.destroy_geom = gv_drive_destroy_geom
889};
890
891DECLARE_GEOM_CLASS(g_vinum_drive_class, g_vinum_drive);
892