geom_vinum_init.c revision 135966
1/*-
2 * Copyright (c) 2004 Lukas Ertl
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/geom/vinum/geom_vinum_init.c 135966 2004-09-30 12:57:35Z le $");
29
30#include <sys/param.h>
31#include <sys/bio.h>
32#include <sys/kernel.h>
33#include <sys/kthread.h>
34#include <sys/libkern.h>
35#include <sys/malloc.h>
36#include <sys/queue.h>
37
38#include <geom/geom.h>
39#include <geom/vinum/geom_vinum_var.h>
40#include <geom/vinum/geom_vinum.h>
41#include <geom/vinum/geom_vinum_share.h>
42
43int	gv_init_plex(struct gv_plex *);
44int	gv_init_sd(struct gv_sd *);
45void	gv_init_td(void *);
46void	gv_rebuild_plex(struct gv_plex *);
47void	gv_rebuild_td(void *);
48void	gv_start_plex(struct gv_plex *);
49void	gv_start_vol(struct gv_volume *);
50void	gv_sync(struct gv_volume *);
51void	gv_sync_td(void *);
52
53struct gv_sync_args {
54	struct gv_volume *v;
55	struct gv_plex *from;
56	struct gv_plex *to;
57	off_t syncsize;
58};
59
60void
61gv_start_obj(struct g_geom *gp, struct gctl_req *req)
62{
63	struct gv_softc *sc;
64	struct gv_volume *v;
65	struct gv_plex *p;
66	int *argc, *initsize;
67	char *argv, buf[20];
68	int i, type;
69
70	argc = gctl_get_paraml(req, "argc", sizeof(*argc));
71	initsize = gctl_get_paraml(req, "initsize", sizeof(*initsize));
72
73	if (argc == NULL || *argc == 0) {
74		gctl_error(req, "no arguments given");
75		return;
76	}
77
78	sc = gp->softc;
79
80	for (i = 0; i < *argc; i++) {
81		snprintf(buf, sizeof(buf), "argv%d", i);
82		argv = gctl_get_param(req, buf, NULL);
83		if (argv == NULL)
84			continue;
85		type = gv_object_type(sc, argv);
86		switch (type) {
87		case GV_TYPE_VOL:
88			v = gv_find_vol(sc, argv);
89			gv_start_vol(v);
90			break;
91
92		case GV_TYPE_PLEX:
93			p = gv_find_plex(sc, argv);
94			gv_start_plex(p);
95			break;
96
97		case GV_TYPE_SD:
98		case GV_TYPE_DRIVE:
99			/* XXX not yet */
100			gctl_error(req, "cannot start '%s'", argv);
101			return;
102		default:
103			gctl_error(req, "unknown object '%s'", argv);
104			return;
105		}
106	}
107}
108
109void
110gv_start_plex(struct gv_plex *p)
111{
112	struct gv_volume *v;
113
114	KASSERT(p != NULL, ("gv_start_plex: NULL p"));
115
116	if (p->state == GV_PLEX_UP)
117		return;
118
119	v = p->vol_sc;
120	if ((v != NULL) && (v->plexcount > 1))
121		gv_sync(v);
122	else if (p->org == GV_PLEX_RAID5) {
123		if (p->state == GV_PLEX_DEGRADED)
124			gv_rebuild_plex(p);
125		else
126			gv_init_plex(p);
127	}
128
129	return;
130}
131
132void
133gv_start_vol(struct gv_volume *v)
134{
135	struct gv_plex *p;
136	struct gv_sd *s;
137
138	KASSERT(v != NULL, ("gv_start_vol: NULL v"));
139
140	if (v->plexcount == 0)
141		return;
142
143	else if (v->plexcount == 1) {
144		p = LIST_FIRST(&v->plexes);
145		KASSERT(p != NULL, ("gv_start_vol: NULL p on %s", v->name));
146		if (p->org == GV_PLEX_RAID5) {
147			switch (p->state) {
148			case GV_PLEX_DOWN:
149				gv_init_plex(p);
150				break;
151			case GV_PLEX_DEGRADED:
152				gv_rebuild_plex(p);
153				break;
154			default:
155				return;
156			}
157		} else {
158			LIST_FOREACH(s, &p->subdisks, in_plex) {
159				gv_set_sd_state(s, GV_SD_UP,
160				    GV_SETSTATE_CONFIG);
161			}
162		}
163	} else
164		gv_sync(v);
165}
166
167void
168gv_sync(struct gv_volume *v)
169{
170	struct gv_softc *sc;
171	struct gv_plex *p, *up;
172	struct gv_sync_args *sync;
173
174	KASSERT(v != NULL, ("gv_sync: NULL v"));
175	sc = v->vinumconf;
176	KASSERT(sc != NULL, ("gv_sync: NULL sc on %s", v->name));
177
178	/* Find the plex that's up. */
179	up = NULL;
180	LIST_FOREACH(up, &v->plexes, in_volume) {
181		if (up->state == GV_PLEX_UP)
182			break;
183	}
184
185	/* Didn't find a good plex. */
186	if (up == NULL)
187		return;
188
189	LIST_FOREACH(p, &v->plexes, in_volume) {
190		if ((p == up) || (p->state == GV_PLEX_UP))
191			continue;
192		sync = g_malloc(sizeof(*sync), M_WAITOK | M_ZERO);
193		sync->v = v;
194		sync->from = up;
195		sync->to = p;
196		sync->syncsize = GV_DFLT_SYNCSIZE;
197		kthread_create(gv_sync_td, sync, NULL, 0, 0, "sync_p '%s'",
198		    p->name);
199	}
200}
201
202void
203gv_rebuild_plex(struct gv_plex *p)
204{
205	struct gv_sync_args *sync;
206
207	if ((p->flags & GV_PLEX_SYNCING) || gv_is_open(p->geom))
208		return;
209
210	sync = g_malloc(sizeof(*sync), M_WAITOK | M_ZERO);
211	sync->to = p;
212	sync->syncsize = GV_DFLT_SYNCSIZE;
213
214	kthread_create(gv_rebuild_td, sync, NULL, 0, 0, "gv_rebuild %s",
215	    p->name);
216}
217
218int
219gv_init_plex(struct gv_plex *p)
220{
221	struct gv_sd *s;
222	int err;
223
224	KASSERT(p != NULL, ("gv_init_plex: NULL p"));
225
226	LIST_FOREACH(s, &p->subdisks, in_plex) {
227		err = gv_init_sd(s);
228		if (err)
229			return (err);
230	}
231
232	return (0);
233}
234
235int
236gv_init_sd(struct gv_sd *s)
237{
238	KASSERT(s != NULL, ("gv_init_sd: NULL s"));
239
240	if (gv_set_sd_state(s, GV_SD_INITIALIZING, GV_SETSTATE_FORCE))
241		return (-1);
242
243	s->init_size = GV_DFLT_SYNCSIZE;
244	s->flags &= ~GV_SD_INITCANCEL;
245
246	/* Spawn the thread that does the work for us. */
247	kthread_create(gv_init_td, s, NULL, 0, 0, "init_sd %s", s->name);
248
249	return (0);
250}
251
252/* This thread is responsible for rebuilding a degraded RAID5 plex. */
253void
254gv_rebuild_td(void *arg)
255{
256	struct bio *bp;
257	struct gv_plex *p;
258	struct g_consumer *cp;
259	struct gv_sync_args *sync;
260	u_char *buf;
261	off_t i;
262	int error;
263
264	buf = NULL;
265	bp = NULL;
266
267	sync = arg;
268	p = sync->to;
269	p->synced = 0;
270	p->flags |= GV_PLEX_SYNCING;
271	cp = p->consumer;
272
273	g_topology_lock();
274	error = g_access(cp, 1, 1, 0);
275	if (error) {
276		g_topology_unlock();
277		printf("GEOM_VINUM: rebuild of %s failed to access consumer: "
278		    "%d\n", p->name, error);
279		kthread_exit(error);
280	}
281	g_topology_unlock();
282
283	buf = g_malloc(sync->syncsize, M_WAITOK);
284
285	printf("GEOM_VINUM: rebuild of %s started\n", p->name);
286	i = 0;
287	for (i = 0; i < p->size; i += (p->stripesize * (p->sdcount - 1))) {
288/*
289		if (i + sync->syncsize > p->size)
290			sync->syncsize = p->size - i;
291*/
292		bp = g_new_bio();
293		if (bp == NULL) {
294			printf("GEOM_VINUM: rebuild of %s failed creating bio: "
295			    "out of memory\n", p->name);
296			break;
297		}
298		bp->bio_cmd = BIO_WRITE;
299		bp->bio_done = NULL;
300		bp->bio_data = buf;
301		bp->bio_cflags |= GV_BIO_REBUILD;
302		bp->bio_offset = i;
303		bp->bio_length = p->stripesize;
304
305		/* Schedule it down ... */
306		g_io_request(bp, cp);
307
308		/* ... and wait for the result. */
309		error = biowait(bp, "gwrite");
310		if (error) {
311			printf("GEOM_VINUM: rebuild of %s failed at offset %jd "
312			    "errno: %d\n", p->name, i, error);
313			break;
314		}
315		g_destroy_bio(bp);
316		bp = NULL;
317	}
318
319	if (bp != NULL)
320		g_destroy_bio(bp);
321	if (buf != NULL)
322		g_free(buf);
323
324	g_topology_lock();
325	g_access(cp, -1, -1, 0);
326	gv_save_config_all(p->vinumconf);
327	g_topology_unlock();
328
329	p->flags &= ~GV_PLEX_SYNCING;
330	p->synced = 0;
331
332	/* Successful initialization. */
333	if (!error)
334		printf("GEOM_VINUM: rebuild of %s finished\n", p->name);
335
336	g_free(sync);
337	kthread_exit(error);
338}
339
340void
341gv_sync_td(void *arg)
342{
343	struct bio *bp;
344	struct gv_plex *p;
345	struct g_consumer *from, *to;
346	struct gv_sync_args *sync;
347	u_char *buf;
348	off_t i;
349	int error;
350
351	sync = arg;
352
353	from = sync->from->consumer;
354	to = sync->to->consumer;
355
356	p = sync->to;
357	p->synced = 0;
358	p->flags |= GV_PLEX_SYNCING;
359
360	error = 0;
361
362	g_topology_lock();
363	error = g_access(from, 1, 0, 0);
364	if (error) {
365		g_topology_unlock();
366		printf("gvinum: sync from '%s' failed to access consumer: %d\n",
367		    sync->from->name, error);
368		kthread_exit(error);
369	}
370	error = g_access(to, 0, 1, 0);
371	if (error) {
372		g_access(from, -1, 0, 0);
373		g_topology_unlock();
374		printf("gvinum: sync to '%s' failed to access consumer: %d\n",
375		    p->name, error);
376		kthread_exit(error);
377	}
378	g_topology_unlock();
379
380	printf("GEOM_VINUM: plex sync %s -> %s started\n", sync->from->name,
381	    sync->to->name);
382	for (i = 0; i < p->size; i+= sync->syncsize) {
383		/* Read some bits from the good plex. */
384		buf = g_read_data(from, i, sync->syncsize, &error);
385		if (buf == NULL) {
386			printf("gvinum: sync read from '%s' failed at offset "
387			    "%jd, errno: %d\n", sync->from->name, i, error);
388			break;
389		}
390
391		/*
392		 * Create a bio and schedule it down on the 'bad' plex.  We
393		 * cannot simply use g_write_data() because we have to let the
394		 * lower parts know that we are an initialization process and
395		 * not a 'normal' request.
396		 */
397		bp = g_new_bio();
398		if (bp == NULL) {
399			printf("gvinum: sync write to '%s' failed at offset "
400			    "%jd, out of memory\n", p->name, i);
401			g_free(buf);
402			break;
403		}
404		bp->bio_cmd = BIO_WRITE;
405		bp->bio_offset = i;
406		bp->bio_length = sync->syncsize;
407		bp->bio_data = buf;
408		bp->bio_done = NULL;
409
410		/*
411		 * This hack declare this bio as part of an initialization
412		 * process, so that the lower levels allow it to get through.
413		 */
414		bp->bio_cflags |= GV_BIO_SYNCREQ;
415
416		/* Schedule it down ... */
417		g_io_request(bp, to);
418
419		/* ... and wait for the result. */
420		error = biowait(bp, "gwrite");
421		g_destroy_bio(bp);
422		g_free(buf);
423		if (error) {
424			printf("gvinum: sync write to '%s' failed at offset "
425			    "%jd, errno: %d\n", p->name, i, error);
426			break;
427		}
428
429		/* Note that we have synced a little bit more. */
430		p->synced += sync->syncsize;
431	}
432
433	g_topology_lock();
434	g_access(from, -1, 0, 0);
435	g_access(to, 0, -1, 0);
436	gv_save_config_all(p->vinumconf);
437	g_topology_unlock();
438
439	/* Successful initialization. */
440	if (!error) {
441		p->flags &= ~GV_PLEX_SYNCING;
442		printf("GEOM_VINUM: plex sync %s -> %s finished\n",
443		    sync->from->name, sync->to->name);
444	}
445
446	g_free(sync);
447	kthread_exit(error);
448}
449
450void
451gv_init_td(void *arg)
452{
453	struct gv_sd *s;
454	struct gv_drive *d;
455	struct g_geom *gp;
456	struct g_consumer *cp;
457	int error;
458	off_t i, init_size, start, offset, length;
459	u_char *buf;
460
461	s = arg;
462	KASSERT(s != NULL, ("gv_init_td: NULL s"));
463	d = s->drive_sc;
464	KASSERT(d != NULL, ("gv_init_td: NULL d"));
465	gp = d->geom;
466	KASSERT(gp != NULL, ("gv_init_td: NULL gp"));
467
468	cp = LIST_FIRST(&gp->consumer);
469	KASSERT(cp != NULL, ("gv_init_td: NULL cp"));
470
471	s->init_error = 0;
472	init_size = s->init_size;
473	start = s->drive_offset + s->initialized;
474	offset = s->drive_offset;
475	length = s->size;
476
477	buf = g_malloc(s->init_size, M_WAITOK | M_ZERO);
478
479	g_topology_lock();
480	error = g_access(cp, 0, 1, 0);
481	if (error) {
482		s->init_error = error;
483		g_topology_unlock();
484		printf("geom_vinum: init '%s' failed to access consumer: %d\n",
485		    s->name, error);
486		kthread_exit(error);
487	}
488	g_topology_unlock();
489
490	for (i = start; i < offset + length; i += init_size) {
491		if (s->flags & GV_SD_INITCANCEL) {
492			printf("geom_vinum: subdisk '%s' init: cancelled at"
493			    " offset %jd (drive offset %jd)\n", s->name,
494			    (intmax_t)s->initialized, (intmax_t)i);
495			error = EAGAIN;
496			break;
497		}
498		error = g_write_data(cp, i, buf, init_size);
499		if (error) {
500			printf("geom_vinum: subdisk '%s' init: write failed"
501			    " at offset %jd (drive offset %jd)\n", s->name,
502			    (intmax_t)s->initialized, (intmax_t)i);
503			break;
504		}
505		s->initialized += init_size;
506	}
507
508	g_free(buf);
509
510	g_topology_lock();
511	g_access(cp, 0, -1, 0);
512	g_topology_unlock();
513	if (error) {
514		s->init_error = error;
515		g_topology_lock();
516		gv_set_sd_state(s, GV_SD_STALE,
517		    GV_SETSTATE_FORCE | GV_SETSTATE_CONFIG);
518		g_topology_unlock();
519	} else {
520		g_topology_lock();
521		gv_set_sd_state(s, GV_SD_UP, GV_SETSTATE_CONFIG);
522		g_topology_unlock();
523		s->initialized = 0;
524		printf("geom_vinum: init '%s' finished\n", s->name);
525	}
526	kthread_exit(error);
527}
528