1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2004, 2007 Lukas Ertl
5 * Copyright (c) 2007, 2009 Ulf Lilleengen
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD$");
32#include <sys/param.h>
33#include <sys/bio.h>
34#include <sys/libkern.h>
35#include <sys/malloc.h>
36
37#include <geom/geom.h>
38#include <geom/geom_dbg.h>
39#include <geom/vinum/geom_vinum_var.h>
40#include <geom/vinum/geom_vinum.h>
41
42static int		 gv_sync(struct gv_volume *);
43static int		 gv_rebuild_plex(struct gv_plex *);
44static int		 gv_init_plex(struct gv_plex *);
45static int		 gv_grow_plex(struct gv_plex *);
46static int		 gv_sync_plex(struct gv_plex *, struct gv_plex *);
47static struct gv_plex	*gv_find_good_plex(struct gv_volume *);
48
49void
50gv_start_obj(struct g_geom *gp, struct gctl_req *req)
51{
52	struct gv_softc *sc;
53	struct gv_volume *v;
54	struct gv_plex *p;
55	int *argc, *initsize;
56	char *argv, buf[20];
57	int i, type;
58
59	argc = gctl_get_paraml(req, "argc", sizeof(*argc));
60	initsize = gctl_get_paraml(req, "initsize", sizeof(*initsize));
61
62	if (argc == NULL || *argc == 0) {
63		gctl_error(req, "no arguments given");
64		return;
65	}
66
67	sc = gp->softc;
68
69	for (i = 0; i < *argc; i++) {
70		snprintf(buf, sizeof(buf), "argv%d", i);
71		argv = gctl_get_param(req, buf, NULL);
72		if (argv == NULL)
73			continue;
74		type = gv_object_type(sc, argv);
75		switch (type) {
76		case GV_TYPE_VOL:
77			v = gv_find_vol(sc, argv);
78			if (v != NULL)
79				gv_post_event(sc, GV_EVENT_START_VOLUME, v,
80				    NULL, *initsize, 0);
81			break;
82
83		case GV_TYPE_PLEX:
84			p = gv_find_plex(sc, argv);
85			if (p != NULL)
86				gv_post_event(sc, GV_EVENT_START_PLEX, p, NULL,
87				    *initsize, 0);
88			break;
89
90		case GV_TYPE_SD:
91		case GV_TYPE_DRIVE:
92			/* XXX Not implemented, but what is the use? */
93			gctl_error(req, "unable to start '%s' - not yet supported",
94			    argv);
95			return;
96		default:
97			gctl_error(req, "unknown object '%s'", argv);
98			return;
99		}
100	}
101}
102
103int
104gv_start_plex(struct gv_plex *p)
105{
106	struct gv_volume *v;
107	struct gv_plex *up;
108	struct gv_sd *s;
109	int error;
110
111	KASSERT(p != NULL, ("gv_start_plex: NULL p"));
112
113	error = 0;
114	v = p->vol_sc;
115
116	/* RAID5 plexes can either be init, rebuilt or grown. */
117	if (p->org == GV_PLEX_RAID5) {
118		if (p->state > GV_PLEX_DEGRADED) {
119			LIST_FOREACH(s, &p->subdisks, in_plex) {
120				if (s->flags & GV_SD_GROW) {
121					error = gv_grow_plex(p);
122					return (error);
123				}
124			}
125		} else if (p->state == GV_PLEX_DEGRADED) {
126			error = gv_rebuild_plex(p);
127		} else
128			error = gv_init_plex(p);
129	} else {
130		/* We want to sync from the other plex if we're down. */
131		if (p->state == GV_PLEX_DOWN && v->plexcount > 1) {
132			up = gv_find_good_plex(v);
133			if (up == NULL) {
134				G_VINUM_DEBUG(1, "unable to find a good plex");
135				return (ENXIO);
136			}
137			g_topology_lock();
138			error = gv_access(v->provider, 1, 1, 0);
139			if (error) {
140				g_topology_unlock();
141				G_VINUM_DEBUG(0, "sync from '%s' failed to "
142				    "access volume: %d", up->name, error);
143				return (error);
144			}
145			g_topology_unlock();
146			error = gv_sync_plex(p, up);
147			if (error)
148				return (error);
149		/*
150		 * In case we have a stripe that is up, check whether it can be
151		 * grown.
152		 */
153		} else if (p->org == GV_PLEX_STRIPED &&
154		    p->state != GV_PLEX_DOWN) {
155			LIST_FOREACH(s, &p->subdisks, in_plex) {
156				if (s->flags & GV_SD_GROW) {
157					error = gv_grow_plex(p);
158					break;
159				}
160			}
161		}
162	}
163	return (error);
164}
165
166int
167gv_start_vol(struct gv_volume *v)
168{
169	struct gv_plex *p;
170	int error;
171
172	KASSERT(v != NULL, ("gv_start_vol: NULL v"));
173
174	error = 0;
175
176	if (v->plexcount == 0)
177		return (ENXIO);
178
179	else if (v->plexcount == 1) {
180		p = LIST_FIRST(&v->plexes);
181		KASSERT(p != NULL, ("gv_start_vol: NULL p on %s", v->name));
182		error = gv_start_plex(p);
183	} else
184		error = gv_sync(v);
185
186	return (error);
187}
188
189/* Sync a plex p from the plex up.  */
190static int
191gv_sync_plex(struct gv_plex *p, struct gv_plex *up)
192{
193	int error;
194
195	KASSERT(p != NULL, ("%s: NULL p", __func__));
196	KASSERT(up != NULL, ("%s: NULL up", __func__));
197	if ((p == up) || (p->state == GV_PLEX_UP))
198		return (0);
199	if (p->flags & GV_PLEX_SYNCING ||
200	    p->flags & GV_PLEX_REBUILDING ||
201	    p->flags & GV_PLEX_GROWING) {
202		return (EINPROGRESS);
203	}
204	p->synced = 0;
205	p->flags |= GV_PLEX_SYNCING;
206	G_VINUM_DEBUG(1, "starting sync of plex %s", p->name);
207	error = gv_sync_request(up, p, p->synced,
208	    MIN(GV_DFLT_SYNCSIZE, up->size - p->synced),
209	    BIO_READ, NULL);
210	if (error) {
211		G_VINUM_DEBUG(0, "error syncing plex %s", p->name);
212		return (error);
213	}
214	return (0);
215}
216
217/* Return a good plex from volume v. */
218static struct gv_plex *
219gv_find_good_plex(struct gv_volume *v)
220{
221	struct gv_plex *up;
222
223	/* Find the plex that's up. */
224	up = NULL;
225	LIST_FOREACH(up, &v->plexes, in_volume) {
226		if (up->state == GV_PLEX_UP)
227			break;
228	}
229	/* Didn't find a good plex. */
230	return (up);
231}
232
233static int
234gv_sync(struct gv_volume *v)
235{
236	struct gv_softc *sc;
237	struct gv_plex *p, *up;
238	int error;
239
240	KASSERT(v != NULL, ("gv_sync: NULL v"));
241	sc = v->vinumconf;
242	KASSERT(sc != NULL, ("gv_sync: NULL sc on %s", v->name));
243
244	up = gv_find_good_plex(v);
245	if (up == NULL)
246		return (ENXIO);
247	g_topology_lock();
248	error = gv_access(v->provider, 1, 1, 0);
249	if (error) {
250		g_topology_unlock();
251		G_VINUM_DEBUG(0, "sync from '%s' failed to access volume: %d",
252		    up->name, error);
253		return (error);
254	}
255	g_topology_unlock();
256
257	/* Go through the good plex, and issue BIO's to all other plexes. */
258	LIST_FOREACH(p, &v->plexes, in_volume) {
259		error = gv_sync_plex(p, up);
260		if (error)
261			break;
262	}
263	return (0);
264}
265
266static int
267gv_rebuild_plex(struct gv_plex *p)
268{
269	struct gv_drive *d;
270	struct gv_sd *s;
271	int error;
272
273	if (p->flags & GV_PLEX_SYNCING ||
274	    p->flags & GV_PLEX_REBUILDING ||
275	    p->flags & GV_PLEX_GROWING)
276		return (EINPROGRESS);
277	/*
278	 * Make sure that all subdisks have consumers. We won't allow a rebuild
279	 * unless every subdisk have one.
280	 */
281	LIST_FOREACH(s, &p->subdisks, in_plex) {
282		d = s->drive_sc;
283		if (d == NULL || (d->flags & GV_DRIVE_REFERENCED)) {
284			G_VINUM_DEBUG(0, "unable to rebuild %s, subdisk(s) have"
285			    " no drives", p->name);
286			return (ENXIO);
287		}
288	}
289	p->flags |= GV_PLEX_REBUILDING;
290	p->synced = 0;
291
292	g_topology_assert_not();
293	g_topology_lock();
294	error = gv_access(p->vol_sc->provider, 1, 1, 0);
295	if (error) {
296		G_VINUM_DEBUG(0, "unable to access provider");
297		return (0);
298	}
299	g_topology_unlock();
300
301	gv_parity_request(p, GV_BIO_REBUILD, 0);
302	return (0);
303}
304
305static int
306gv_grow_plex(struct gv_plex *p)
307{
308	struct gv_volume *v;
309	struct gv_sd *s;
310	off_t origsize, origlength;
311	int error, sdcount;
312
313	KASSERT(p != NULL, ("gv_grow_plex: NULL p"));
314	v = p->vol_sc;
315	KASSERT(v != NULL, ("gv_grow_plex: NULL v"));
316
317	if (p->flags & GV_PLEX_GROWING ||
318	    p->flags & GV_PLEX_SYNCING ||
319	    p->flags & GV_PLEX_REBUILDING)
320		return (EINPROGRESS);
321	g_topology_lock();
322	error = gv_access(v->provider, 1, 1, 0);
323	g_topology_unlock();
324	if (error) {
325		G_VINUM_DEBUG(0, "unable to access provider");
326		return (error);
327	}
328
329	/* XXX: This routine with finding origsize is used two other places as
330	 * well, so we should create a function for it. */
331	sdcount = p->sdcount;
332	LIST_FOREACH(s, &p->subdisks, in_plex) {
333		if (s->flags & GV_SD_GROW)
334			sdcount--;
335	}
336	s = LIST_FIRST(&p->subdisks);
337	if (s == NULL) {
338		G_VINUM_DEBUG(0, "error growing plex without subdisks");
339		return (GV_ERR_NOTFOUND);
340	}
341	p->flags |= GV_PLEX_GROWING;
342	origsize = (sdcount - 1) * s->size;
343	origlength = (sdcount - 1) * p->stripesize;
344	p->synced = 0;
345	G_VINUM_DEBUG(1, "starting growing of plex %s", p->name);
346	gv_grow_request(p, 0, MIN(origlength, origsize), BIO_READ, NULL);
347
348	return (0);
349}
350
351static int
352gv_init_plex(struct gv_plex *p)
353{
354	struct gv_drive *d;
355	struct gv_sd *s;
356	int error;
357	off_t start;
358	caddr_t data;
359
360	KASSERT(p != NULL, ("gv_init_plex: NULL p"));
361
362	LIST_FOREACH(s, &p->subdisks, in_plex) {
363		if (s->state == GV_SD_INITIALIZING)
364			return (EINPROGRESS);
365		gv_set_sd_state(s, GV_SD_INITIALIZING, GV_SETSTATE_FORCE);
366		s->init_size = GV_DFLT_SYNCSIZE;
367		start = s->drive_offset + s->initialized;
368		d = s->drive_sc;
369		if (d == NULL) {
370			G_VINUM_DEBUG(0, "subdisk %s has no drive yet", s->name);
371			break;
372		}
373		/*
374		 * Take the lock here since we need to avoid a race in
375		 * gv_init_request if the BIO is completed before the lock is
376		 * released.
377		 */
378		g_topology_lock();
379		error = g_access(d->consumer, 0, 1, 0);
380		g_topology_unlock();
381		if (error) {
382			G_VINUM_DEBUG(0, "error accessing consumer when "
383			    "initializing %s", s->name);
384			break;
385		}
386		data = g_malloc(s->init_size, M_WAITOK | M_ZERO);
387		gv_init_request(s, start, data, s->init_size);
388	}
389	return (0);
390}
391