11556Srgrimes/*-
21556Srgrimes * SPDX-License-Identifier: BSD-2-Clause
31556Srgrimes *
41556Srgrimes *  Copyright (c) 2004, 2007 Lukas Ertl
51556Srgrimes *  Copyright (c) 2007, 2009 Ulf Lilleengen
61556Srgrimes *  All rights reserved.
71556Srgrimes *
81556Srgrimes * Redistribution and use in source and binary forms, with or without
91556Srgrimes * modification, are permitted provided that the following conditions
101556Srgrimes * are met:
111556Srgrimes * 1. Redistributions of source code must retain the above copyright
121556Srgrimes *    notice, this list of conditions and the following disclaimer.
131556Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
141556Srgrimes *    notice, this list of conditions and the following disclaimer in the
151556Srgrimes *    documentation and/or other materials provided with the distribution.
161556Srgrimes *
171556Srgrimes * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
181556Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
191556Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
201556Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
211556Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
221556Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
231556Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
241556Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
251556Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
261556Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
271556Srgrimes * SUCH DAMAGE.
281556Srgrimes *
291556Srgrimes */
301556Srgrimes
311556Srgrimes#include <sys/param.h>
321556Srgrimes#include <sys/bio.h>
331556Srgrimes#include <sys/kernel.h>
34114433Sobrien#include <sys/kthread.h>
351556Srgrimes#include <sys/lock.h>
3620420Ssteve#include <sys/malloc.h>
371556Srgrimes#include <sys/module.h>
381556Srgrimes#include <sys/mutex.h>
391556Srgrimes#include <sys/sbuf.h>
401556Srgrimes#include <sys/sysctl.h>
411556Srgrimes#include <sys/systm.h>
4236007Scharnier
43114433Sobrien#include <geom/geom.h>
4435773Scharnier#include <geom/geom_dbg.h>
4599109Sobrien#include <geom/vinum/geom_vinum_var.h>
4699109Sobrien#include <geom/vinum/geom_vinum.h>
471556Srgrimes#include <geom/vinum/geom_vinum_raid5.h>
481556Srgrimes
491556SrgrimesSYSCTL_DECL(_kern_geom);
5050460Sgreenstatic SYSCTL_NODE(_kern_geom, OID_AUTO, vinum, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
5160859Sobrien    "GEOM_VINUM stuff");
5250460Sgreenu_int g_vinum_debug = 0;
5391807SmarkmSYSCTL_UINT(_kern_geom_vinum, OID_AUTO, debug, CTLFLAG_RWTUN, &g_vinum_debug, 0,
541556Srgrimes    "Debug level");
55266488Sthomas
561556Srgrimesstatic int	gv_create(struct g_geom *, struct gctl_req *);
571556Srgrimesstatic void	gv_attach(struct gv_softc *, struct gctl_req *);
581556Srgrimesstatic void	gv_detach(struct gv_softc *, struct gctl_req *);
591556Srgrimesstatic void	gv_parityop(struct gv_softc *, struct gctl_req *);
60111629Smarkm
6119720Sphkstatic void
621556Srgrimesgv_orphan(struct g_consumer *cp)
631556Srgrimes{
641556Srgrimes	struct g_geom *gp;
651556Srgrimes	struct gv_softc *sc;
661556Srgrimes	struct gv_drive *d;
671556Srgrimes
681556Srgrimes	g_topology_assert();
691556Srgrimes
7090108Simp	KASSERT(cp != NULL, ("gv_orphan: null cp"));
7190108Simp	gp = cp->geom;
7290108Simp	KASSERT(gp != NULL, ("gv_orphan: null gp"));
7390108Simp	sc = gp->softc;
741556Srgrimes	KASSERT(sc != NULL, ("gv_orphan: null sc"));
751556Srgrimes	d = cp->private;
761556Srgrimes	KASSERT(d != NULL, ("gv_orphan: null d"));
7790108Simp
78111629Smarkm	g_trace(G_T_TOPOLOGY, "gv_orphan(%s)", gp->name);
7991079Smarkm
80266488Sthomas	gv_post_event(sc, GV_EVENT_DRIVE_LOST, d, NULL, 0, 0);
81111629Smarkm}
8251208Sgreen
83111629Smarkmvoid
8451249Sgreengv_start(struct bio *bp)
85133762Srwatson{
86250469Seadler	struct g_geom *gp;
871556Srgrimes	struct gv_softc *sc;
881556Srgrimes
8990108Simp	gp = bp->bio_to->geom;
901556Srgrimes	sc = gp->softc;
9119720Sphk
921556Srgrimes	switch (bp->bio_cmd) {
931556Srgrimes	case BIO_READ:
941556Srgrimes	case BIO_WRITE:
95250469Seadler	case BIO_DELETE:
961556Srgrimes		break;
971556Srgrimes	case BIO_GETATTR:
981556Srgrimes	default:
991556Srgrimes		g_io_deliver(bp, EOPNOTSUPP);
1001556Srgrimes		return;
1011556Srgrimes	}
1021556Srgrimes	mtx_lock(&sc->bqueue_mtx);
1031556Srgrimes	bioq_disksort(sc->bqueue_down, bp);
104249063Sbrooks	wakeup(sc);
105249063Sbrooks	mtx_unlock(&sc->bqueue_mtx);
106249063Sbrooks}
107249063Sbrooks
108249063Sbrooksvoid
109249063Sbrooksgv_done(struct bio *bp)
110249063Sbrooks{
1111556Srgrimes	struct g_geom *gp;
1121556Srgrimes	struct gv_softc *sc;
1131556Srgrimes
114126667Sphk	KASSERT(bp != NULL, ("NULL bp"));
115126667Sphk
116126667Sphk	gp = bp->bio_from->geom;
117126667Sphk	sc = gp->softc;
118126667Sphk
119126667Sphk	mtx_lock(&sc->bqueue_mtx);
120126667Sphk	bioq_disksort(sc->bqueue_up, bp);
121126667Sphk	wakeup(sc);
122126667Sphk	mtx_unlock(&sc->bqueue_mtx);
123126667Sphk}
1241556Srgrimes
12590108Simpint
1261556Srgrimesgv_access(struct g_provider *pp, int dr, int dw, int de)
1271556Srgrimes{
12819720Sphk	struct g_geom *gp;
1291556Srgrimes	struct gv_softc *sc;
1301556Srgrimes	struct gv_drive *d, *d2;
1311556Srgrimes	int error;
1321556Srgrimes
1331556Srgrimes	gp = pp->geom;
13448051Sgreen	sc = gp->softc;
13548026Sgreen	/*
1361556Srgrimes	 * We want to modify the read count with the write count in case we have
1371556Srgrimes	 * plexes in a RAID-5 organization.
1381556Srgrimes	 */
1391556Srgrimes	dr += dw;
1401556Srgrimes
1411556Srgrimes	LIST_FOREACH(d, &sc->drives, drive) {
1421556Srgrimes		if (d->consumer == NULL)
1431556Srgrimes			continue;
1441556Srgrimes		error = g_access(d->consumer, dr, dw, de);
1451556Srgrimes		if (error) {
1461556Srgrimes			LIST_FOREACH(d2, &sc->drives, drive) {
1471556Srgrimes				if (d == d2)
1481556Srgrimes					break;
1491556Srgrimes				g_access(d2->consumer, -dr, -dw, -de);
1501556Srgrimes			}
1511556Srgrimes			G_VINUM_DEBUG(0, "g_access '%s' failed: %d", d->name,
1521556Srgrimes			    error);
1531556Srgrimes			return (error);
1541556Srgrimes		}
1551556Srgrimes	}
1561556Srgrimes	return (0);
15748026Sgreen}
1581556Srgrimes
1591556Srgrimesstatic void
1601556Srgrimesgv_init(struct g_class *mp)
16148026Sgreen{
1621556Srgrimes	struct g_geom *gp;
1631556Srgrimes	struct gv_softc *sc;
1641556Srgrimes
1651556Srgrimes	g_trace(G_T_TOPOLOGY, "gv_init(%p)", mp);
1661556Srgrimes
1671556Srgrimes	gp = g_new_geomf(mp, "VINUM");
1681556Srgrimes	gp->spoiled = gv_orphan;
1691556Srgrimes	gp->orphan = gv_orphan;
1701556Srgrimes	gp->access = gv_access;
17151249Sgreen	gp->start = gv_start;
1721556Srgrimes	gp->softc = g_malloc(sizeof(struct gv_softc), M_WAITOK | M_ZERO);
17351208Sgreen	sc = gp->softc;
1741556Srgrimes	sc->geom = gp;
17551208Sgreen	sc->bqueue_down = g_malloc(sizeof(struct bio_queue_head),
17651208Sgreen	    M_WAITOK | M_ZERO);
17751208Sgreen	sc->bqueue_up = g_malloc(sizeof(struct bio_queue_head),
178266488Sthomas	    M_WAITOK | M_ZERO);
179266488Sthomas	bioq_init(sc->bqueue_down);
1801556Srgrimes	bioq_init(sc->bqueue_up);
1811556Srgrimes	LIST_INIT(&sc->drives);
1821556Srgrimes	LIST_INIT(&sc->subdisks);
1831556Srgrimes	LIST_INIT(&sc->plexes);
1841556Srgrimes	LIST_INIT(&sc->volumes);
1851556Srgrimes	TAILQ_INIT(&sc->equeue);
1861556Srgrimes	mtx_init(&sc->config_mtx, "gv_config", NULL, MTX_DEF);
1871556Srgrimes	mtx_init(&sc->equeue_mtx, "gv_equeue", NULL, MTX_DEF);
1881556Srgrimes	mtx_init(&sc->bqueue_mtx, "gv_bqueue", NULL, MTX_DEF);
1891556Srgrimes	kproc_create(gv_worker, sc, &sc->worker, 0, 0, "gv_worker");
19063688Sgreen}
19163688Sgreen
1921556Srgrimesstatic int
19362311Sgreengv_unload(struct gctl_req *req, struct g_class *mp, struct g_geom *gp)
19462311Sgreen{
19562311Sgreen	struct gv_softc *sc;
19662311Sgreen
1971556Srgrimes	g_trace(G_T_TOPOLOGY, "gv_unload(%p)", mp);
198126667Sphk
199126667Sphk	g_topology_assert();
200126667Sphk	sc = gp->softc;
201126667Sphk
20246073Simp	if (sc != NULL) {
203126667Sphk		gv_worker_exit(sc);
204126667Sphk		gp->softc = NULL;
2055701Sache		g_wither_geom(gp, ENXIO);
206126667Sphk	}
207126667Sphk
208126667Sphk	return (0);
209126667Sphk}
210126667Sphk
211126667Sphk/* Handle userland request of attaching object. */
212126667Sphkstatic void
213126667Sphkgv_attach(struct gv_softc *sc, struct gctl_req *req)
214126667Sphk{
215126667Sphk	struct gv_volume *v;
216126667Sphk	struct gv_plex *p;
217126667Sphk	struct gv_sd *s;
218126667Sphk	off_t *offset;
219126667Sphk	int *rename, type_child, type_parent;
220126667Sphk	char *child, *parent;
221126667Sphk
222126667Sphk	child = gctl_get_param(req, "child", NULL);
223126667Sphk	if (child == NULL) {
224126667Sphk		gctl_error(req, "no child given");
225126667Sphk		return;
226126667Sphk	}
227126667Sphk	parent = gctl_get_param(req, "parent", NULL);
228126667Sphk	if (parent == NULL) {
229126667Sphk		gctl_error(req, "no parent given");
230126667Sphk		return;
231126667Sphk	}
232126667Sphk	offset = gctl_get_paraml(req, "offset", sizeof(*offset));
233126667Sphk	if (offset == NULL) {
234126667Sphk		gctl_error(req, "no offset given");
235126667Sphk		return;
236126667Sphk	}
237126667Sphk	rename = gctl_get_paraml(req, "rename", sizeof(*rename));
238126667Sphk	if (rename == NULL) {
239126667Sphk		gctl_error(req, "no rename flag given");
240126667Sphk		return;
241126667Sphk	}
242126667Sphk
243126667Sphk	type_child = gv_object_type(sc, child);
24451208Sgreen	type_parent = gv_object_type(sc, parent);
24546073Simp
24651208Sgreen	switch (type_child) {
247239991Sed	case GV_TYPE_PLEX:
248239991Sed		if (type_parent != GV_TYPE_VOL) {
2491556Srgrimes			gctl_error(req, "no such volume to attach to");
2501556Srgrimes			return;
2511556Srgrimes		}
25290108Simp		v = gv_find_vol(sc, parent);
2531556Srgrimes		p = gv_find_plex(sc, child);
2541556Srgrimes		gv_post_event(sc, GV_EVENT_ATTACH_PLEX, p, v, *offset, *rename);
25550460Sgreen		break;
2561556Srgrimes	case GV_TYPE_SD:
25751208Sgreen		if (type_parent != GV_TYPE_PLEX) {
2581556Srgrimes			gctl_error(req, "no such plex to attach to");
25962311Sgreen			return;
26062311Sgreen		}
26150460Sgreen		p = gv_find_plex(sc, parent);
26250487Sgreen		s = gv_find_sd(sc, child);
26351913Sgreen		gv_post_event(sc, GV_EVENT_ATTACH_SD, s, p, *offset, *rename);
26450487Sgreen		break;
26550487Sgreen	default:
26650487Sgreen		gctl_error(req, "invalid child type");
267109873Sphk		break;
26851212Sgreen	}
26950487Sgreen}
27050487Sgreen
27150487Sgreen/* Handle userland request of detaching object. */
27251335Sgreenstatic void
27351335Sgreengv_detach(struct gv_softc *sc, struct gctl_req *req)
27451335Sgreen{
27551335Sgreen	struct gv_plex *p;
27651335Sgreen	struct gv_sd *s;
27751335Sgreen	int *flags, type;
27851249Sgreen	char *object;
2791556Srgrimes
2801556Srgrimes	object = gctl_get_param(req, "object", NULL);
2811556Srgrimes	if (object == NULL) {
28290108Simp		gctl_error(req, "no argument given");
2831556Srgrimes		return;
28448026Sgreen	}
2851556Srgrimes
28628430Sjlemon	flags = gctl_get_paraml(req, "flags", sizeof(*flags));
28751335Sgreen	type = gv_object_type(sc, object);
28851335Sgreen	switch (type) {
2891556Srgrimes	case GV_TYPE_PLEX:
29051335Sgreen		p = gv_find_plex(sc, object);
29151335Sgreen		gv_post_event(sc, GV_EVENT_DETACH_PLEX, p, NULL, *flags, 0);
29251335Sgreen		break;
293111629Smarkm	case GV_TYPE_SD:
29451335Sgreen		s = gv_find_sd(sc, object);
29551335Sgreen		gv_post_event(sc, GV_EVENT_DETACH_SD, s, NULL, *flags, 0);
29651335Sgreen		break;
2971556Srgrimes	default:
2981556Srgrimes		gctl_error(req, "invalid object type");
29951208Sgreen		break;
3001556Srgrimes	}
3011556Srgrimes}
30248026Sgreen
303133762Srwatson/* Handle userland requests for creating new objects. */
304133762Srwatsonstatic int
305133762Srwatsongv_create(struct g_geom *gp, struct gctl_req *req)
3061556Srgrimes{
3071556Srgrimes	struct gv_softc *sc;
3081556Srgrimes	struct gv_drive *d, *d2;
30948026Sgreen	struct gv_plex *p, *p2;
3101556Srgrimes	struct gv_sd *s, *s2;
3111556Srgrimes	struct gv_volume *v, *v2;
3121556Srgrimes	struct g_provider *pp;
3131556Srgrimes	int i, *drives, *flags, *plexes, *subdisks, *volumes;
3141556Srgrimes	char buf[20];
3151556Srgrimes
3161556Srgrimes	g_topology_assert();
3171556Srgrimes
31848026Sgreen	sc = gp->softc;
3191556Srgrimes
3201556Srgrimes	/* Find out how many of each object have been passed in. */
3211556Srgrimes	volumes = gctl_get_paraml(req, "volumes", sizeof(*volumes));
3221556Srgrimes	plexes = gctl_get_paraml(req, "plexes", sizeof(*plexes));
32328430Sjlemon	subdisks = gctl_get_paraml(req, "subdisks", sizeof(*subdisks));
3241556Srgrimes	drives = gctl_get_paraml(req, "drives", sizeof(*drives));
3251556Srgrimes	if (volumes == NULL || plexes == NULL || subdisks == NULL ||
3261556Srgrimes	    drives == NULL) {
3271556Srgrimes		gctl_error(req, "number of objects not given");
3281556Srgrimes		return (-1);
32951249Sgreen	}
3301556Srgrimes	flags = gctl_get_paraml(req, "flags", sizeof(*flags));
3311556Srgrimes	if (flags == NULL) {
3321556Srgrimes		gctl_error(req, "flags not given");
3331556Srgrimes		return (-1);
33451249Sgreen	}
3351556Srgrimes
3361556Srgrimes	/* First, handle drive definitions ... */
3371556Srgrimes	for (i = 0; i < *drives; i++) {
3381556Srgrimes		snprintf(buf, sizeof(buf), "drive%d", i);
3391556Srgrimes		d2 = gctl_get_paraml(req, buf, sizeof(*d2));
3401556Srgrimes		if (d2 == NULL) {
3411556Srgrimes			gctl_error(req, "no drive definition given");
3421556Srgrimes			return (-1);
3431556Srgrimes		}
3441556Srgrimes		/*
3451556Srgrimes		 * Make sure that the device specified in the drive config is
3461556Srgrimes		 * an active GEOM provider.
34762311Sgreen		 */
3481556Srgrimes		pp = g_provider_by_name(d2->device);
3491556Srgrimes		if (pp == NULL) {
3501556Srgrimes			gctl_error(req, "%s: device not found", d2->device);
3511556Srgrimes			goto error;
3521556Srgrimes		}
3531556Srgrimes		if (gv_find_drive(sc, d2->name) != NULL) {
3541556Srgrimes			/* Ignore error. */
3551556Srgrimes			if (*flags & GV_FLAG_F)
3561556Srgrimes				continue;
3571556Srgrimes			gctl_error(req, "drive '%s' already exists", d2->name);
3581556Srgrimes			goto error;
3591556Srgrimes		}
3601556Srgrimes		if (gv_find_drive_device(sc, d2->device) != NULL) {
3611556Srgrimes			gctl_error(req, "device '%s' already configured in "
3621556Srgrimes			    "gvinum", d2->device);
3631556Srgrimes			goto error;
3641556Srgrimes		}
3651556Srgrimes
366249811Skib		d = g_malloc(sizeof(*d), M_WAITOK | M_ZERO);
3671556Srgrimes		bcopy(d2, d, sizeof(*d));
3681556Srgrimes
3691556Srgrimes		gv_post_event(sc, GV_EVENT_CREATE_DRIVE, d, NULL, 0, 0);
3701556Srgrimes	}
3711556Srgrimes
3721556Srgrimes	/* ... then volume definitions ... */
3731556Srgrimes	for (i = 0; i < *volumes; i++) {
37432324Sjoerg		snprintf(buf, sizeof(buf), "volume%d", i);
3751556Srgrimes		v2 = gctl_get_paraml(req, buf, sizeof(*v2));
3761556Srgrimes		if (v2 == NULL) {
3771556Srgrimes			gctl_error(req, "no volume definition given");
37862311Sgreen			return (-1);
3791556Srgrimes		}
3801556Srgrimes		if (gv_find_vol(sc, v2->name) != NULL) {
3811556Srgrimes			/* Ignore error. */
3821556Srgrimes			if (*flags & GV_FLAG_F)
383250469Seadler				continue;
384250469Seadler			gctl_error(req, "volume '%s' already exists", v2->name);
385250469Seadler			goto error;
3861556Srgrimes		}
3871556Srgrimes
3881556Srgrimes		v = g_malloc(sizeof(*v), M_WAITOK | M_ZERO);
3891556Srgrimes		bcopy(v2, v, sizeof(*v));
39051249Sgreen
3911556Srgrimes		gv_post_event(sc, GV_EVENT_CREATE_VOLUME, v, NULL, 0, 0);
3921556Srgrimes	}
3931556Srgrimes
39490108Simp	/* ... then plex definitions ... */
3951556Srgrimes	for (i = 0; i < *plexes; i++) {
3961556Srgrimes		snprintf(buf, sizeof(buf), "plex%d", i);
3971556Srgrimes		p2 = gctl_get_paraml(req, buf, sizeof(*p2));
3981556Srgrimes		if (p2 == NULL) {
3991556Srgrimes			gctl_error(req, "no plex definition given");
4001556Srgrimes			return (-1);
4011556Srgrimes		}
40228430Sjlemon		if (gv_find_plex(sc, p2->name) != NULL) {
403133762Srwatson			/* Ignore error. */
404133762Srwatson			if (*flags & GV_FLAG_F)
405133762Srwatson				continue;
40628430Sjlemon			gctl_error(req, "plex '%s' already exists", p2->name);
40728430Sjlemon			goto error;
40828430Sjlemon		}
4091556Srgrimes
4101556Srgrimes		p = g_malloc(sizeof(*p), M_WAITOK | M_ZERO);
41130312Sjoerg		bcopy(p2, p, sizeof(*p));
4121556Srgrimes
4131556Srgrimes		gv_post_event(sc, GV_EVENT_CREATE_PLEX, p, NULL, 0, 0);
4141556Srgrimes	}
4151556Srgrimes
41690108Simp	/* ... and, finally, subdisk definitions. */
4171556Srgrimes	for (i = 0; i < *subdisks; i++) {
41851208Sgreen		snprintf(buf, sizeof(buf), "sd%d", i);
41951208Sgreen		s2 = gctl_get_paraml(req, buf, sizeof(*s2));
42051208Sgreen		if (s2 == NULL) {
4211556Srgrimes			gctl_error(req, "no subdisk definition given");
42251208Sgreen			return (-1);
4231556Srgrimes		}
4241556Srgrimes		if (gv_find_sd(sc, s2->name) != NULL) {
4251556Srgrimes			/* Ignore error. */
4261556Srgrimes			if (*flags & GV_FLAG_F)
4271556Srgrimes				continue;
4281556Srgrimes			gctl_error(req, "sd '%s' already exists", s2->name);
4291556Srgrimes			goto error;
4301556Srgrimes		}
4311556Srgrimes
4321556Srgrimes		s = g_malloc(sizeof(*s), M_WAITOK | M_ZERO);
4331556Srgrimes		bcopy(s2, s, sizeof(*s));
4341556Srgrimes
4351556Srgrimes		gv_post_event(sc, GV_EVENT_CREATE_SD, s, NULL, 0, 0);
4361556Srgrimes	}
4371556Srgrimes
4381556Srgrimeserror:
4391556Srgrimes	gv_post_event(sc, GV_EVENT_SETUP_OBJECTS, sc, NULL, 0, 0);
4401556Srgrimes	gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0);
441266488Sthomas
442266488Sthomas	return (0);
443266488Sthomas}
444266488Sthomas
445266488Sthomasstatic void
446266488Sthomasgv_config(struct gctl_req *req, struct g_class *mp, char const *verb)
4471556Srgrimes{
448266488Sthomas	struct g_geom *gp;
449266488Sthomas	struct gv_softc *sc;
45030312Sjoerg	struct sbuf *sb;
45130312Sjoerg	char *comment;
45230312Sjoerg
45330312Sjoerg	g_topology_assert();
45430312Sjoerg
45530312Sjoerg	gp = LIST_FIRST(&mp->geom);
45630312Sjoerg	sc = gp->softc;
45730312Sjoerg
45830312Sjoerg	if (!strcmp(verb, "attach")) {
45930312Sjoerg		gv_attach(sc, req);
46030312Sjoerg
461266488Sthomas	} else if (!strcmp(verb, "concat")) {
46230312Sjoerg		gv_concat(gp, req);
46330312Sjoerg
46430312Sjoerg	} else if (!strcmp(verb, "detach")) {
465266488Sthomas		gv_detach(sc, req);
466266488Sthomas
467266488Sthomas	} else if (!strcmp(verb, "list")) {
468266488Sthomas		gv_list(gp, req);
469266488Sthomas
470266488Sthomas	/* Save our configuration back to disk. */
471266488Sthomas	} else if (!strcmp(verb, "saveconfig")) {
472266488Sthomas		gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0);
473266488Sthomas
47448051Sgreen	/* Return configuration in string form. */
47548051Sgreen	} else if (!strcmp(verb, "getconfig")) {
47648051Sgreen		comment = gctl_get_param(req, "comment", NULL);
47748051Sgreen		if (comment == NULL) {
478266488Sthomas			gctl_error(req, "no comment parameter given");
47930312Sjoerg			return;
48030312Sjoerg		}
48130312Sjoerg		sb = sbuf_new(NULL, NULL, GV_CFG_LEN, SBUF_FIXEDLEN);
48230312Sjoerg		gv_format_config(sc, sb, 0, comment);
48330312Sjoerg		sbuf_finish(sb);
48430312Sjoerg		gctl_set_param(req, "config", sbuf_data(sb), sbuf_len(sb) + 1);
48530312Sjoerg		sbuf_delete(sb);
4861556Srgrimes
4871556Srgrimes	} else if (!strcmp(verb, "create")) {
4881556Srgrimes		gv_create(gp, req);
4891556Srgrimes
4901556Srgrimes	} else if (!strcmp(verb, "mirror")) {
4911556Srgrimes		gv_mirror(gp, req);
4921556Srgrimes
493266488Sthomas	} else if (!strcmp(verb, "move")) {
4941556Srgrimes		gv_move(gp, req);
4951556Srgrimes
496266488Sthomas	} else if (!strcmp(verb, "raid5")) {
497266488Sthomas		gv_raid5(gp, req);
498266488Sthomas
499266488Sthomas	} else if (!strcmp(verb, "rebuildparity") ||
500266488Sthomas	    !strcmp(verb, "checkparity")) {
501266488Sthomas		gv_parityop(sc, req);
502266488Sthomas
503266488Sthomas	} else if (!strcmp(verb, "remove")) {
504266488Sthomas		gv_remove(gp, req);
505266488Sthomas
506266488Sthomas	} else if (!strcmp(verb, "rename")) {
507266488Sthomas		gv_rename(gp, req);
508266488Sthomas
509266488Sthomas	} else if (!strcmp(verb, "resetconfig")) {
510266488Sthomas		gv_post_event(sc, GV_EVENT_RESET_CONFIG, sc, NULL, 0, 0);
5111556Srgrimes
512266488Sthomas	} else if (!strcmp(verb, "start")) {
513266488Sthomas		gv_start_obj(gp, req);
514266488Sthomas
515266488Sthomas	} else if (!strcmp(verb, "stripe")) {
5161556Srgrimes		gv_stripe(gp, req);
5171556Srgrimes
5181556Srgrimes	} else if (!strcmp(verb, "setstate")) {
5191556Srgrimes		gv_setstate(gp, req);
5201556Srgrimes	} else
5211556Srgrimes		gctl_error(req, "Unknown verb parameter");
52251208Sgreen}
5231556Srgrimes
5241556Srgrimesstatic void
525gv_parityop(struct gv_softc *sc, struct gctl_req *req)
526{
527	struct gv_plex *p;
528	int *flags, *rebuild, type;
529	char *plex;
530
531	plex = gctl_get_param(req, "plex", NULL);
532	if (plex == NULL) {
533		gctl_error(req, "no plex given");
534		return;
535	}
536
537	flags = gctl_get_paraml(req, "flags", sizeof(*flags));
538	if (flags == NULL) {
539		gctl_error(req, "no flags given");
540		return;
541	}
542
543	rebuild = gctl_get_paraml(req, "rebuild", sizeof(*rebuild));
544	if (rebuild == NULL) {
545		gctl_error(req, "no operation given");
546		return;
547	}
548
549	type = gv_object_type(sc, plex);
550	if (type != GV_TYPE_PLEX) {
551		gctl_error(req, "'%s' is not a plex", plex);
552		return;
553	}
554	p = gv_find_plex(sc, plex);
555
556	if (p->state != GV_PLEX_UP) {
557		gctl_error(req, "plex %s is not completely accessible",
558		    p->name);
559		return;
560	}
561
562	if (p->org != GV_PLEX_RAID5) {
563		gctl_error(req, "plex %s is not a RAID5 plex", p->name);
564		return;
565	}
566
567	/* Put it in the event queue. */
568	/* XXX: The state of the plex might have changed when this event is
569	 * picked up ... We should perhaps check this afterwards. */
570	if (*rebuild)
571		gv_post_event(sc, GV_EVENT_PARITY_REBUILD, p, NULL, 0, 0);
572	else
573		gv_post_event(sc, GV_EVENT_PARITY_CHECK, p, NULL, 0, 0);
574}
575
576static struct g_geom *
577gv_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
578{
579	struct g_geom *gp;
580	struct g_consumer *cp;
581	struct gv_softc *sc;
582	struct gv_hdr vhdr;
583	int error;
584
585 	g_topology_assert();
586	g_trace(G_T_TOPOLOGY, "gv_taste(%s, %s)", mp->name, pp->name);
587
588	gp = LIST_FIRST(&mp->geom);
589	if (gp == NULL) {
590		G_VINUM_DEBUG(0, "error: tasting, but not initialized?");
591		return (NULL);
592	}
593	sc = gp->softc;
594
595	cp = g_new_consumer(gp);
596	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
597	if (g_attach(cp, pp) != 0) {
598		g_destroy_consumer(cp);
599		return (NULL);
600	}
601	if (g_access(cp, 1, 0, 0) != 0) {
602		g_detach(cp);
603		g_destroy_consumer(cp);
604		return (NULL);
605	}
606	g_topology_unlock();
607
608	error = gv_read_header(cp, &vhdr);
609
610	g_topology_lock();
611	g_access(cp, -1, 0, 0);
612	g_detach(cp);
613	g_destroy_consumer(cp);
614
615	/* Check if what we've been given is a valid vinum drive. */
616	if (!error)
617		gv_post_event(sc, GV_EVENT_DRIVE_TASTED, pp, NULL, 0, 0);
618
619	return (NULL);
620}
621
622void
623gv_worker(void *arg)
624{
625	struct g_provider *pp;
626	struct gv_softc *sc;
627	struct gv_event *ev;
628	struct gv_volume *v;
629	struct gv_plex *p;
630	struct gv_sd *s;
631	struct gv_drive *d;
632	struct bio *bp;
633	int newstate, flags, err, rename;
634	char *newname;
635	off_t offset;
636
637	sc = arg;
638	KASSERT(sc != NULL, ("NULL sc"));
639	for (;;) {
640		/* Look at the events first... */
641		ev = gv_get_event(sc);
642		if (ev != NULL) {
643			gv_remove_event(sc, ev);
644
645			switch (ev->type) {
646			case GV_EVENT_DRIVE_TASTED:
647				G_VINUM_DEBUG(2, "event 'drive tasted'");
648				pp = ev->arg1;
649				gv_drive_tasted(sc, pp);
650				break;
651
652			case GV_EVENT_DRIVE_LOST:
653				G_VINUM_DEBUG(2, "event 'drive lost'");
654				d = ev->arg1;
655				gv_drive_lost(sc, d);
656				break;
657
658			case GV_EVENT_CREATE_DRIVE:
659				G_VINUM_DEBUG(2, "event 'create drive'");
660				d = ev->arg1;
661				gv_create_drive(sc, d);
662				break;
663
664			case GV_EVENT_CREATE_VOLUME:
665				G_VINUM_DEBUG(2, "event 'create volume'");
666				v = ev->arg1;
667				gv_create_volume(sc, v);
668				break;
669
670			case GV_EVENT_CREATE_PLEX:
671				G_VINUM_DEBUG(2, "event 'create plex'");
672				p = ev->arg1;
673				gv_create_plex(sc, p);
674				break;
675
676			case GV_EVENT_CREATE_SD:
677				G_VINUM_DEBUG(2, "event 'create sd'");
678				s = ev->arg1;
679				gv_create_sd(sc, s);
680				break;
681
682			case GV_EVENT_RM_DRIVE:
683				G_VINUM_DEBUG(2, "event 'remove drive'");
684				d = ev->arg1;
685				flags = ev->arg3;
686				gv_rm_drive(sc, d, flags);
687				/*gv_setup_objects(sc);*/
688				break;
689
690			case GV_EVENT_RM_VOLUME:
691				G_VINUM_DEBUG(2, "event 'remove volume'");
692				v = ev->arg1;
693				gv_rm_vol(sc, v);
694				/*gv_setup_objects(sc);*/
695				break;
696
697			case GV_EVENT_RM_PLEX:
698				G_VINUM_DEBUG(2, "event 'remove plex'");
699				p = ev->arg1;
700				gv_rm_plex(sc, p);
701				/*gv_setup_objects(sc);*/
702				break;
703
704			case GV_EVENT_RM_SD:
705				G_VINUM_DEBUG(2, "event 'remove sd'");
706				s = ev->arg1;
707				gv_rm_sd(sc, s);
708				/*gv_setup_objects(sc);*/
709				break;
710
711			case GV_EVENT_SAVE_CONFIG:
712				G_VINUM_DEBUG(2, "event 'save config'");
713				gv_save_config(sc);
714				break;
715
716			case GV_EVENT_SET_SD_STATE:
717				G_VINUM_DEBUG(2, "event 'setstate sd'");
718				s = ev->arg1;
719				newstate = ev->arg3;
720				flags = ev->arg4;
721				err = gv_set_sd_state(s, newstate, flags);
722				if (err)
723					G_VINUM_DEBUG(0, "error setting subdisk"
724					    " state: error code %d", err);
725				break;
726
727			case GV_EVENT_SET_DRIVE_STATE:
728				G_VINUM_DEBUG(2, "event 'setstate drive'");
729				d = ev->arg1;
730				newstate = ev->arg3;
731				flags = ev->arg4;
732				err = gv_set_drive_state(d, newstate, flags);
733				if (err)
734					G_VINUM_DEBUG(0, "error setting drive "
735					    "state: error code %d", err);
736				break;
737
738			case GV_EVENT_SET_VOL_STATE:
739				G_VINUM_DEBUG(2, "event 'setstate volume'");
740				v = ev->arg1;
741				newstate = ev->arg3;
742				flags = ev->arg4;
743				err = gv_set_vol_state(v, newstate, flags);
744				if (err)
745					G_VINUM_DEBUG(0, "error setting volume "
746					    "state: error code %d", err);
747				break;
748
749			case GV_EVENT_SET_PLEX_STATE:
750				G_VINUM_DEBUG(2, "event 'setstate plex'");
751				p = ev->arg1;
752				newstate = ev->arg3;
753				flags = ev->arg4;
754				err = gv_set_plex_state(p, newstate, flags);
755				if (err)
756					G_VINUM_DEBUG(0, "error setting plex "
757					    "state: error code %d", err);
758				break;
759
760			case GV_EVENT_SETUP_OBJECTS:
761				G_VINUM_DEBUG(2, "event 'setup objects'");
762				gv_setup_objects(sc);
763				break;
764
765			case GV_EVENT_RESET_CONFIG:
766				G_VINUM_DEBUG(2, "event 'resetconfig'");
767				err = gv_resetconfig(sc);
768				if (err)
769					G_VINUM_DEBUG(0, "error resetting "
770					    "config: error code %d", err);
771				break;
772
773			case GV_EVENT_PARITY_REBUILD:
774				/*
775				 * Start the rebuild. The gv_plex_done will
776				 * handle issuing of the remaining rebuild bio's
777				 * until it's finished.
778				 */
779				G_VINUM_DEBUG(2, "event 'rebuild'");
780				p = ev->arg1;
781				if (p->state != GV_PLEX_UP) {
782					G_VINUM_DEBUG(0, "plex %s is not "
783					    "completely accessible", p->name);
784					break;
785				}
786				if (p->flags & GV_PLEX_SYNCING ||
787				    p->flags & GV_PLEX_REBUILDING ||
788				    p->flags & GV_PLEX_GROWING) {
789					G_VINUM_DEBUG(0, "plex %s is busy with "
790					    "syncing or parity build", p->name);
791					break;
792				}
793				p->synced = 0;
794				p->flags |= GV_PLEX_REBUILDING;
795				g_topology_assert_not();
796				g_topology_lock();
797				err = gv_access(p->vol_sc->provider, 1, 1, 0);
798				if (err) {
799					G_VINUM_DEBUG(0, "unable to access "
800					    "provider");
801					break;
802				}
803				g_topology_unlock();
804				gv_parity_request(p, GV_BIO_CHECK |
805				    GV_BIO_PARITY, 0);
806				break;
807
808			case GV_EVENT_PARITY_CHECK:
809				/* Start parity check. */
810				G_VINUM_DEBUG(2, "event 'check'");
811				p = ev->arg1;
812				if (p->state != GV_PLEX_UP) {
813					G_VINUM_DEBUG(0, "plex %s is not "
814					    "completely accessible", p->name);
815					break;
816				}
817				if (p->flags & GV_PLEX_SYNCING ||
818				    p->flags & GV_PLEX_REBUILDING ||
819				    p->flags & GV_PLEX_GROWING) {
820					G_VINUM_DEBUG(0, "plex %s is busy with "
821					    "syncing or parity build", p->name);
822					break;
823				}
824				p->synced = 0;
825				g_topology_assert_not();
826				g_topology_lock();
827				err = gv_access(p->vol_sc->provider, 1, 1, 0);
828				if (err) {
829					G_VINUM_DEBUG(0, "unable to access "
830					    "provider");
831					break;
832				}
833				g_topology_unlock();
834				gv_parity_request(p, GV_BIO_CHECK, 0);
835				break;
836
837			case GV_EVENT_START_PLEX:
838				G_VINUM_DEBUG(2, "event 'start' plex");
839				p = ev->arg1;
840				gv_start_plex(p);
841				break;
842
843			case GV_EVENT_START_VOLUME:
844				G_VINUM_DEBUG(2, "event 'start' volume");
845				v = ev->arg1;
846				gv_start_vol(v);
847				break;
848
849			case GV_EVENT_ATTACH_PLEX:
850				G_VINUM_DEBUG(2, "event 'attach' plex");
851				p = ev->arg1;
852				v = ev->arg2;
853				rename = ev->arg4;
854				err = gv_attach_plex(p, v, rename);
855				if (err)
856					G_VINUM_DEBUG(0, "error attaching %s to"
857					    " %s: error code %d", p->name,
858					    v->name, err);
859				break;
860
861			case GV_EVENT_ATTACH_SD:
862				G_VINUM_DEBUG(2, "event 'attach' sd");
863				s = ev->arg1;
864				p = ev->arg2;
865				offset = ev->arg3;
866				rename = ev->arg4;
867				err = gv_attach_sd(s, p, offset, rename);
868				if (err)
869					G_VINUM_DEBUG(0, "error attaching %s to"
870					    " %s: error code %d", s->name,
871					    p->name, err);
872				break;
873
874			case GV_EVENT_DETACH_PLEX:
875				G_VINUM_DEBUG(2, "event 'detach' plex");
876				p = ev->arg1;
877				flags = ev->arg3;
878				err = gv_detach_plex(p, flags);
879				if (err)
880					G_VINUM_DEBUG(0, "error detaching %s: "
881					    "error code %d", p->name, err);
882				break;
883
884			case GV_EVENT_DETACH_SD:
885				G_VINUM_DEBUG(2, "event 'detach' sd");
886				s = ev->arg1;
887				flags = ev->arg3;
888				err = gv_detach_sd(s, flags);
889				if (err)
890					G_VINUM_DEBUG(0, "error detaching %s: "
891					    "error code %d", s->name, err);
892				break;
893
894			case GV_EVENT_RENAME_VOL:
895				G_VINUM_DEBUG(2, "event 'rename' volume");
896				v = ev->arg1;
897				newname = ev->arg2;
898				flags = ev->arg3;
899				err = gv_rename_vol(sc, v, newname, flags);
900				if (err)
901					G_VINUM_DEBUG(0, "error renaming %s to "
902					    "%s: error code %d", v->name,
903					    newname, err);
904				g_free(newname);
905				/* Destroy and recreate the provider if we can. */
906				if (gv_provider_is_open(v->provider)) {
907					G_VINUM_DEBUG(0, "unable to rename "
908					    "provider to %s: provider in use",
909					    v->name);
910					break;
911				}
912				g_topology_lock();
913				g_wither_provider(v->provider, ENOENT);
914				g_topology_unlock();
915				v->provider = NULL;
916				gv_post_event(sc, GV_EVENT_SETUP_OBJECTS, sc,
917				    NULL, 0, 0);
918				break;
919
920			case GV_EVENT_RENAME_PLEX:
921				G_VINUM_DEBUG(2, "event 'rename' plex");
922				p = ev->arg1;
923				newname = ev->arg2;
924				flags = ev->arg3;
925				err = gv_rename_plex(sc, p, newname, flags);
926				if (err)
927					G_VINUM_DEBUG(0, "error renaming %s to "
928					    "%s: error code %d", p->name,
929					    newname, err);
930				g_free(newname);
931				break;
932
933			case GV_EVENT_RENAME_SD:
934				G_VINUM_DEBUG(2, "event 'rename' sd");
935				s = ev->arg1;
936				newname = ev->arg2;
937				flags = ev->arg3;
938				err = gv_rename_sd(sc, s, newname, flags);
939				if (err)
940					G_VINUM_DEBUG(0, "error renaming %s to "
941					    "%s: error code %d", s->name,
942					    newname, err);
943				g_free(newname);
944				break;
945
946			case GV_EVENT_RENAME_DRIVE:
947				G_VINUM_DEBUG(2, "event 'rename' drive");
948				d = ev->arg1;
949				newname = ev->arg2;
950				flags = ev->arg3;
951				err = gv_rename_drive(sc, d, newname, flags);
952				if (err)
953					G_VINUM_DEBUG(0, "error renaming %s to "
954					    "%s: error code %d", d->name,
955					    newname, err);
956				g_free(newname);
957				break;
958
959			case GV_EVENT_MOVE_SD:
960				G_VINUM_DEBUG(2, "event 'move' sd");
961				s = ev->arg1;
962				d = ev->arg2;
963				flags = ev->arg3;
964				err = gv_move_sd(sc, s, d, flags);
965				if (err)
966					G_VINUM_DEBUG(0, "error moving %s to "
967					    "%s: error code %d", s->name,
968					    d->name, err);
969				break;
970
971			case GV_EVENT_THREAD_EXIT:
972				G_VINUM_DEBUG(2, "event 'thread exit'");
973				g_free(ev);
974				mtx_lock(&sc->equeue_mtx);
975				mtx_lock(&sc->bqueue_mtx);
976				gv_cleanup(sc);
977				mtx_destroy(&sc->bqueue_mtx);
978				mtx_destroy(&sc->equeue_mtx);
979				g_free(sc->bqueue_down);
980				g_free(sc->bqueue_up);
981				g_free(sc);
982				kproc_exit(0);
983				/* NOTREACHED */
984
985			default:
986				G_VINUM_DEBUG(1, "unknown event %d", ev->type);
987			}
988
989			g_free(ev);
990			continue;
991		}
992
993		/* ... then do I/O processing. */
994		mtx_lock(&sc->bqueue_mtx);
995		/* First do new requests. */
996		bp = bioq_takefirst(sc->bqueue_down);
997		if (bp != NULL) {
998			mtx_unlock(&sc->bqueue_mtx);
999			/* A bio that interfered with another bio. */
1000			if (bp->bio_pflags & GV_BIO_ONHOLD) {
1001				s = bp->bio_caller1;
1002				p = s->plex_sc;
1003				/* Is it still locked out? */
1004				if (gv_stripe_active(p, bp)) {
1005					/* Park the bio on the waiting queue. */
1006					bioq_disksort(p->wqueue, bp);
1007				} else {
1008					bp->bio_pflags &= ~GV_BIO_ONHOLD;
1009					g_io_request(bp, s->drive_sc->consumer);
1010				}
1011			/* A special request requireing special handling. */
1012			} else if (bp->bio_pflags & GV_BIO_INTERNAL) {
1013				p = bp->bio_caller1;
1014				gv_plex_start(p, bp);
1015			} else {
1016				gv_volume_start(sc, bp);
1017			}
1018			mtx_lock(&sc->bqueue_mtx);
1019		}
1020		/* Then do completed requests. */
1021		bp = bioq_takefirst(sc->bqueue_up);
1022		if (bp == NULL) {
1023			msleep(sc, &sc->bqueue_mtx, PRIBIO, "-", hz/10);
1024			mtx_unlock(&sc->bqueue_mtx);
1025			continue;
1026		}
1027		mtx_unlock(&sc->bqueue_mtx);
1028		gv_bio_done(sc, bp);
1029	}
1030}
1031
1032#define	VINUM_CLASS_NAME "VINUM"
1033
1034static struct g_class g_vinum_class	= {
1035	.name = VINUM_CLASS_NAME,
1036	.version = G_VERSION,
1037	.init = gv_init,
1038	.taste = gv_taste,
1039	.ctlreq = gv_config,
1040	.destroy_geom = gv_unload,
1041};
1042
1043DECLARE_GEOM_CLASS(g_vinum_class, g_vinum);
1044MODULE_VERSION(geom_vinum, 0);
1045