1/*-
2 * Copyright (c) 2004, 2007 Lukas Ertl
3 * Copyright (c) 2007, 2009 Ulf Lilleengen
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD$");
30#include <sys/param.h>
31#include <sys/bio.h>
32#include <sys/libkern.h>
33#include <sys/malloc.h>
34
35#include <geom/geom.h>
36#include <geom/vinum/geom_vinum_var.h>
37#include <geom/vinum/geom_vinum.h>
38
39static int		 gv_sync(struct gv_volume *);
40static int		 gv_rebuild_plex(struct gv_plex *);
41static int		 gv_init_plex(struct gv_plex *);
42static int		 gv_grow_plex(struct gv_plex *);
43static int		 gv_sync_plex(struct gv_plex *, struct gv_plex *);
44static struct gv_plex	*gv_find_good_plex(struct gv_volume *);
45
46void
47gv_start_obj(struct g_geom *gp, struct gctl_req *req)
48{
49	struct gv_softc *sc;
50	struct gv_volume *v;
51	struct gv_plex *p;
52	int *argc, *initsize;
53	char *argv, buf[20];
54	int i, type;
55
56	argc = gctl_get_paraml(req, "argc", sizeof(*argc));
57	initsize = gctl_get_paraml(req, "initsize", sizeof(*initsize));
58
59	if (argc == NULL || *argc == 0) {
60		gctl_error(req, "no arguments given");
61		return;
62	}
63
64	sc = gp->softc;
65
66	for (i = 0; i < *argc; i++) {
67		snprintf(buf, sizeof(buf), "argv%d", i);
68		argv = gctl_get_param(req, buf, NULL);
69		if (argv == NULL)
70			continue;
71		type = gv_object_type(sc, argv);
72		switch (type) {
73		case GV_TYPE_VOL:
74			v = gv_find_vol(sc, argv);
75			if (v != NULL)
76				gv_post_event(sc, GV_EVENT_START_VOLUME, v,
77				    NULL, *initsize, 0);
78			break;
79
80		case GV_TYPE_PLEX:
81			p = gv_find_plex(sc, argv);
82			if (p != NULL)
83				gv_post_event(sc, GV_EVENT_START_PLEX, p, NULL,
84				    *initsize, 0);
85			break;
86
87		case GV_TYPE_SD:
88		case GV_TYPE_DRIVE:
89			/* XXX Not implemented, but what is the use? */
90			gctl_error(req, "unable to start '%s' - not yet supported",
91			    argv);
92			return;
93		default:
94			gctl_error(req, "unknown object '%s'", argv);
95			return;
96		}
97	}
98}
99
100int
101gv_start_plex(struct gv_plex *p)
102{
103	struct gv_volume *v;
104	struct gv_plex *up;
105	struct gv_sd *s;
106	int error;
107
108	KASSERT(p != NULL, ("gv_start_plex: NULL p"));
109
110	error = 0;
111	v = p->vol_sc;
112
113	/* RAID5 plexes can either be init, rebuilt or grown. */
114	if (p->org == GV_PLEX_RAID5) {
115		if (p->state > GV_PLEX_DEGRADED) {
116			LIST_FOREACH(s, &p->subdisks, in_plex) {
117				if (s->flags & GV_SD_GROW) {
118					error = gv_grow_plex(p);
119					return (error);
120				}
121			}
122		} else if (p->state == GV_PLEX_DEGRADED) {
123			error = gv_rebuild_plex(p);
124		} else
125			error = gv_init_plex(p);
126	} else {
127		/* We want to sync from the other plex if we're down. */
128		if (p->state == GV_PLEX_DOWN && v->plexcount > 1) {
129			up = gv_find_good_plex(v);
130			if (up == NULL) {
131				G_VINUM_DEBUG(1, "unable to find a good plex");
132				return (ENXIO);
133			}
134			g_topology_lock();
135			error = gv_access(v->provider, 1, 1, 0);
136			if (error) {
137				g_topology_unlock();
138				G_VINUM_DEBUG(0, "sync from '%s' failed to "
139				    "access volume: %d", up->name, error);
140				return (error);
141			}
142			g_topology_unlock();
143			error = gv_sync_plex(p, up);
144			if (error)
145				return (error);
146		/*
147		 * In case we have a stripe that is up, check whether it can be
148		 * grown.
149		 */
150		} else if (p->org == GV_PLEX_STRIPED &&
151		    p->state != GV_PLEX_DOWN) {
152			LIST_FOREACH(s, &p->subdisks, in_plex) {
153				if (s->flags & GV_SD_GROW) {
154					error = gv_grow_plex(p);
155					break;
156				}
157			}
158		}
159	}
160	return (error);
161}
162
163int
164gv_start_vol(struct gv_volume *v)
165{
166	struct gv_plex *p;
167	int error;
168
169	KASSERT(v != NULL, ("gv_start_vol: NULL v"));
170
171	error = 0;
172
173	if (v->plexcount == 0)
174		return (ENXIO);
175
176	else if (v->plexcount == 1) {
177		p = LIST_FIRST(&v->plexes);
178		KASSERT(p != NULL, ("gv_start_vol: NULL p on %s", v->name));
179		error = gv_start_plex(p);
180	} else
181		error = gv_sync(v);
182
183	return (error);
184}
185
186/* Sync a plex p from the plex up.  */
187static int
188gv_sync_plex(struct gv_plex *p, struct gv_plex *up)
189{
190	int error;
191
192	KASSERT(p != NULL, ("%s: NULL p", __func__));
193	KASSERT(up != NULL, ("%s: NULL up", __func__));
194	if ((p == up) || (p->state == GV_PLEX_UP))
195		return (0);
196	if (p->flags & GV_PLEX_SYNCING ||
197	    p->flags & GV_PLEX_REBUILDING ||
198	    p->flags & GV_PLEX_GROWING) {
199		return (EINPROGRESS);
200	}
201	p->synced = 0;
202	p->flags |= GV_PLEX_SYNCING;
203	G_VINUM_DEBUG(1, "starting sync of plex %s", p->name);
204	error = gv_sync_request(up, p, p->synced,
205	    MIN(GV_DFLT_SYNCSIZE, up->size - p->synced),
206	    BIO_READ, NULL);
207	if (error) {
208		G_VINUM_DEBUG(0, "error syncing plex %s", p->name);
209		return (error);
210	}
211	return (0);
212}
213
214/* Return a good plex from volume v. */
215static struct gv_plex *
216gv_find_good_plex(struct gv_volume *v)
217{
218	struct gv_plex *up;
219
220	/* Find the plex that's up. */
221	up = NULL;
222	LIST_FOREACH(up, &v->plexes, in_volume) {
223		if (up->state == GV_PLEX_UP)
224			break;
225	}
226	/* Didn't find a good plex. */
227	return (up);
228}
229
230static int
231gv_sync(struct gv_volume *v)
232{
233	struct gv_softc *sc;
234	struct gv_plex *p, *up;
235	int error;
236
237	KASSERT(v != NULL, ("gv_sync: NULL v"));
238	sc = v->vinumconf;
239	KASSERT(sc != NULL, ("gv_sync: NULL sc on %s", v->name));
240
241
242	up = gv_find_good_plex(v);
243	if (up == NULL)
244		return (ENXIO);
245	g_topology_lock();
246	error = gv_access(v->provider, 1, 1, 0);
247	if (error) {
248		g_topology_unlock();
249		G_VINUM_DEBUG(0, "sync from '%s' failed to access volume: %d",
250		    up->name, error);
251		return (error);
252	}
253	g_topology_unlock();
254
255	/* Go through the good plex, and issue BIO's to all other plexes. */
256	LIST_FOREACH(p, &v->plexes, in_volume) {
257		error = gv_sync_plex(p, up);
258		if (error)
259			break;
260	}
261	return (0);
262}
263
264static int
265gv_rebuild_plex(struct gv_plex *p)
266{
267	struct gv_drive *d;
268	struct gv_sd *s;
269	int error;
270
271	if (p->flags & GV_PLEX_SYNCING ||
272	    p->flags & GV_PLEX_REBUILDING ||
273	    p->flags & GV_PLEX_GROWING)
274		return (EINPROGRESS);
275	/*
276	 * Make sure that all subdisks have consumers. We won't allow a rebuild
277	 * unless every subdisk have one.
278	 */
279	LIST_FOREACH(s, &p->subdisks, in_plex) {
280		d = s->drive_sc;
281		if (d == NULL || (d->flags & GV_DRIVE_REFERENCED)) {
282			G_VINUM_DEBUG(0, "unable to rebuild %s, subdisk(s) have"
283			    " no drives", p->name);
284			return (ENXIO);
285		}
286	}
287	p->flags |= GV_PLEX_REBUILDING;
288	p->synced = 0;
289
290	g_topology_assert_not();
291	g_topology_lock();
292	error = gv_access(p->vol_sc->provider, 1, 1, 0);
293	if (error) {
294		G_VINUM_DEBUG(0, "unable to access provider");
295		return (0);
296	}
297	g_topology_unlock();
298
299	gv_parity_request(p, GV_BIO_REBUILD, 0);
300	return (0);
301}
302
303static int
304gv_grow_plex(struct gv_plex *p)
305{
306	struct gv_volume *v;
307	struct gv_sd *s;
308	off_t origsize, origlength;
309	int error, sdcount;
310
311	KASSERT(p != NULL, ("gv_grow_plex: NULL p"));
312	v = p->vol_sc;
313	KASSERT(v != NULL, ("gv_grow_plex: NULL v"));
314
315	if (p->flags & GV_PLEX_GROWING ||
316	    p->flags & GV_PLEX_SYNCING ||
317	    p->flags & GV_PLEX_REBUILDING)
318		return (EINPROGRESS);
319	g_topology_lock();
320	error = gv_access(v->provider, 1, 1, 0);
321	g_topology_unlock();
322	if (error) {
323		G_VINUM_DEBUG(0, "unable to access provider");
324		return (error);
325	}
326
327	/* XXX: This routine with finding origsize is used two other places as
328	 * well, so we should create a function for it. */
329	sdcount = p->sdcount;
330	LIST_FOREACH(s, &p->subdisks, in_plex) {
331		if (s->flags & GV_SD_GROW)
332			sdcount--;
333	}
334	s = LIST_FIRST(&p->subdisks);
335	if (s == NULL) {
336		G_VINUM_DEBUG(0, "error growing plex without subdisks");
337		return (GV_ERR_NOTFOUND);
338	}
339	p->flags |= GV_PLEX_GROWING;
340	origsize = (sdcount - 1) * s->size;
341	origlength = (sdcount - 1) * p->stripesize;
342	p->synced = 0;
343	G_VINUM_DEBUG(1, "starting growing of plex %s", p->name);
344	gv_grow_request(p, 0, MIN(origlength, origsize), BIO_READ, NULL);
345
346	return (0);
347}
348
349static int
350gv_init_plex(struct gv_plex *p)
351{
352	struct gv_drive *d;
353	struct gv_sd *s;
354	int error;
355	off_t start;
356	caddr_t data;
357
358	KASSERT(p != NULL, ("gv_init_plex: NULL p"));
359
360	LIST_FOREACH(s, &p->subdisks, in_plex) {
361		if (s->state == GV_SD_INITIALIZING)
362			return (EINPROGRESS);
363		gv_set_sd_state(s, GV_SD_INITIALIZING, GV_SETSTATE_FORCE);
364		s->init_size = GV_DFLT_SYNCSIZE;
365		start = s->drive_offset + s->initialized;
366		d = s->drive_sc;
367		if (d == NULL) {
368			G_VINUM_DEBUG(0, "subdisk %s has no drive yet", s->name);
369			break;
370		}
371		/*
372		 * Take the lock here since we need to avoid a race in
373		 * gv_init_request if the BIO is completed before the lock is
374		 * released.
375		 */
376		g_topology_lock();
377		error = g_access(d->consumer, 0, 1, 0);
378		g_topology_unlock();
379		if (error) {
380			G_VINUM_DEBUG(0, "error accessing consumer when "
381			    "initializing %s", s->name);
382			break;
383		}
384		data = g_malloc(s->init_size, M_WAITOK | M_ZERO);
385		gv_init_request(s, start, data, s->init_size);
386	}
387	return (0);
388}
389