geom_vinum_init.c revision 143130
1/*-
2 * Copyright (c) 2004 Lukas Ertl
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/geom/vinum/geom_vinum_init.c 143130 2005-03-04 16:43:40Z le $");
29
30#include <sys/param.h>
31#include <sys/bio.h>
32#include <sys/kernel.h>
33#include <sys/kthread.h>
34#include <sys/libkern.h>
35#include <sys/malloc.h>
36#include <sys/queue.h>
37
38#include <geom/geom.h>
39#include <geom/vinum/geom_vinum_var.h>
40#include <geom/vinum/geom_vinum.h>
41#include <geom/vinum/geom_vinum_share.h>
42
43int	gv_init_plex(struct gv_plex *);
44int	gv_init_sd(struct gv_sd *);
45void	gv_init_td(void *);
46void	gv_rebuild_plex(struct gv_plex *);
47void	gv_rebuild_td(void *);
48void	gv_start_plex(struct gv_plex *);
49void	gv_start_vol(struct gv_volume *);
50void	gv_sync(struct gv_volume *);
51void	gv_sync_td(void *);
52
53struct gv_sync_args {
54	struct gv_volume *v;
55	struct gv_plex *from;
56	struct gv_plex *to;
57	off_t syncsize;
58};
59
60void
61gv_parityop(struct g_geom *gp, struct gctl_req *req)
62{
63	struct gv_softc *sc;
64	struct gv_plex *p;
65	struct bio *bp;
66	struct g_consumer *cp;
67	int error, *flags, type, *rebuild, rv;
68	char *plex;
69
70	rv = -1;
71
72	plex = gctl_get_param(req, "plex", NULL);
73	if (plex == NULL) {
74		gctl_error(req, "no plex given");
75		goto out;
76	}
77
78	flags = gctl_get_paraml(req, "flags", sizeof(*flags));
79	if (flags == NULL) {
80		gctl_error(req, "no flags given");
81		goto out;
82	}
83
84	rebuild = gctl_get_paraml(req, "rebuild", sizeof(*rebuild));
85	if (rebuild == NULL) {
86		gctl_error(req, "no rebuild op given");
87		goto out;
88	}
89
90	sc = gp->softc;
91	type = gv_object_type(sc, plex);
92	switch (type) {
93	case GV_TYPE_PLEX:
94		break;
95	case GV_TYPE_VOL:
96	case GV_TYPE_SD:
97	case GV_TYPE_DRIVE:
98	default:
99		gctl_error(req, "'%s' is not a plex", plex);
100		goto out;
101	}
102
103	p = gv_find_plex(sc, plex);
104	if (p->state != GV_PLEX_UP) {
105		gctl_error(req, "plex %s is not completely accessible",
106		    p->name);
107		goto out;
108	}
109
110	cp = p->consumer;
111	error = g_access(cp, 1, 1, 0);
112	if (error) {
113		gctl_error(req, "cannot access consumer");
114		goto out;
115	}
116	g_topology_unlock();
117
118	/* Reset the check pointer when using -f. */
119	if (*flags & GV_FLAG_F)
120		p->synced = 0;
121
122	bp = g_new_bio();
123	if (bp == NULL) {
124		gctl_error(req, "cannot create BIO - out of memory");
125		g_topology_lock();
126		error = g_access(cp, -1, -1, 0);
127		goto out;
128	}
129	bp->bio_cmd = BIO_WRITE;
130	bp->bio_done = NULL;
131	bp->bio_data = g_malloc(p->stripesize, M_WAITOK | M_ZERO);
132	bp->bio_cflags |= GV_BIO_CHECK;
133	if (*rebuild)
134		bp->bio_cflags |= GV_BIO_PARITY;
135	bp->bio_offset = p->synced;
136	bp->bio_length = p->stripesize;
137
138	/* Schedule it down ... */
139	g_io_request(bp, cp);
140
141	/* ... and wait for the result. */
142	error = biowait(bp, "gwrite");
143	g_free(bp->bio_data);
144	g_destroy_bio(bp);
145
146	if (error) {
147		/* Incorrect parity. */
148		if (error == EAGAIN)
149			rv = 1;
150
151		/* Some other error happened. */
152		else
153			gctl_error(req, "Parity check failed at offset 0x%jx, "
154			    "errno %d", (intmax_t)p->synced, error);
155
156	/* Correct parity. */
157	} else
158		rv = 0;
159
160	gctl_set_param(req, "offset", &p->synced, sizeof(p->synced));
161
162	/* Advance the checkpointer if there was no error. */
163	if (rv == 0)
164		p->synced += p->stripesize;
165
166	/* End of plex; reset the check pointer and signal it to the caller. */
167	if (p->synced >= p->size) {
168		p->synced = 0;
169		rv = -2;
170	}
171
172	g_topology_lock();
173	error = g_access(cp, -1, -1, 0);
174
175out:
176	gctl_set_param(req, "rv", &rv, sizeof(rv));
177}
178
179void
180gv_start_obj(struct g_geom *gp, struct gctl_req *req)
181{
182	struct gv_softc *sc;
183	struct gv_volume *v;
184	struct gv_plex *p;
185	int *argc, *initsize;
186	char *argv, buf[20];
187	int i, type;
188
189	argc = gctl_get_paraml(req, "argc", sizeof(*argc));
190	initsize = gctl_get_paraml(req, "initsize", sizeof(*initsize));
191
192	if (argc == NULL || *argc == 0) {
193		gctl_error(req, "no arguments given");
194		return;
195	}
196
197	sc = gp->softc;
198
199	for (i = 0; i < *argc; i++) {
200		snprintf(buf, sizeof(buf), "argv%d", i);
201		argv = gctl_get_param(req, buf, NULL);
202		if (argv == NULL)
203			continue;
204		type = gv_object_type(sc, argv);
205		switch (type) {
206		case GV_TYPE_VOL:
207			v = gv_find_vol(sc, argv);
208			gv_start_vol(v);
209			break;
210
211		case GV_TYPE_PLEX:
212			p = gv_find_plex(sc, argv);
213			gv_start_plex(p);
214			break;
215
216		case GV_TYPE_SD:
217		case GV_TYPE_DRIVE:
218			/* XXX not yet */
219			gctl_error(req, "cannot start '%s'", argv);
220			return;
221		default:
222			gctl_error(req, "unknown object '%s'", argv);
223			return;
224		}
225	}
226}
227
228void
229gv_start_plex(struct gv_plex *p)
230{
231	struct gv_volume *v;
232
233	KASSERT(p != NULL, ("gv_start_plex: NULL p"));
234
235	if (p->state == GV_PLEX_UP)
236		return;
237
238	v = p->vol_sc;
239	if ((v != NULL) && (v->plexcount > 1))
240		gv_sync(v);
241	else if (p->org == GV_PLEX_RAID5) {
242		if (p->state == GV_PLEX_DEGRADED)
243			gv_rebuild_plex(p);
244		else
245			gv_init_plex(p);
246	}
247
248	return;
249}
250
251void
252gv_start_vol(struct gv_volume *v)
253{
254	struct gv_plex *p;
255	struct gv_sd *s;
256
257	KASSERT(v != NULL, ("gv_start_vol: NULL v"));
258
259	if (v->plexcount == 0)
260		return;
261
262	else if (v->plexcount == 1) {
263		p = LIST_FIRST(&v->plexes);
264		KASSERT(p != NULL, ("gv_start_vol: NULL p on %s", v->name));
265		if (p->org == GV_PLEX_RAID5) {
266			switch (p->state) {
267			case GV_PLEX_DOWN:
268				gv_init_plex(p);
269				break;
270			case GV_PLEX_DEGRADED:
271				gv_rebuild_plex(p);
272				break;
273			default:
274				return;
275			}
276		} else {
277			LIST_FOREACH(s, &p->subdisks, in_plex) {
278				gv_set_sd_state(s, GV_SD_UP,
279				    GV_SETSTATE_CONFIG);
280			}
281		}
282	} else
283		gv_sync(v);
284}
285
286void
287gv_sync(struct gv_volume *v)
288{
289	struct gv_softc *sc;
290	struct gv_plex *p, *up;
291	struct gv_sync_args *sync;
292
293	KASSERT(v != NULL, ("gv_sync: NULL v"));
294	sc = v->vinumconf;
295	KASSERT(sc != NULL, ("gv_sync: NULL sc on %s", v->name));
296
297	/* Find the plex that's up. */
298	up = NULL;
299	LIST_FOREACH(up, &v->plexes, in_volume) {
300		if (up->state == GV_PLEX_UP)
301			break;
302	}
303
304	/* Didn't find a good plex. */
305	if (up == NULL)
306		return;
307
308	LIST_FOREACH(p, &v->plexes, in_volume) {
309		if ((p == up) || (p->state == GV_PLEX_UP))
310			continue;
311		sync = g_malloc(sizeof(*sync), M_WAITOK | M_ZERO);
312		sync->v = v;
313		sync->from = up;
314		sync->to = p;
315		sync->syncsize = GV_DFLT_SYNCSIZE;
316		kthread_create(gv_sync_td, sync, NULL, 0, 0, "gv_sync '%s'",
317		    p->name);
318	}
319}
320
321void
322gv_rebuild_plex(struct gv_plex *p)
323{
324	struct gv_sync_args *sync;
325
326	if ((p->flags & GV_PLEX_SYNCING) || gv_is_open(p->geom))
327		return;
328
329	sync = g_malloc(sizeof(*sync), M_WAITOK | M_ZERO);
330	sync->to = p;
331	sync->syncsize = GV_DFLT_SYNCSIZE;
332
333	kthread_create(gv_rebuild_td, sync, NULL, 0, 0, "gv_rebuild %s",
334	    p->name);
335}
336
337int
338gv_init_plex(struct gv_plex *p)
339{
340	struct gv_sd *s;
341	int err;
342
343	KASSERT(p != NULL, ("gv_init_plex: NULL p"));
344
345	LIST_FOREACH(s, &p->subdisks, in_plex) {
346		err = gv_init_sd(s);
347		if (err)
348			return (err);
349	}
350
351	return (0);
352}
353
354int
355gv_init_sd(struct gv_sd *s)
356{
357	KASSERT(s != NULL, ("gv_init_sd: NULL s"));
358
359	if (gv_set_sd_state(s, GV_SD_INITIALIZING, GV_SETSTATE_FORCE))
360		return (-1);
361
362	s->init_size = GV_DFLT_SYNCSIZE;
363	s->flags &= ~GV_SD_INITCANCEL;
364
365	/* Spawn the thread that does the work for us. */
366	kthread_create(gv_init_td, s, NULL, 0, 0, "gv_init %s", s->name);
367
368	return (0);
369}
370
371/* This thread is responsible for rebuilding a degraded RAID5 plex. */
372void
373gv_rebuild_td(void *arg)
374{
375	struct bio *bp;
376	struct gv_plex *p;
377	struct g_consumer *cp;
378	struct gv_sync_args *sync;
379	u_char *buf;
380	off_t i;
381	int error;
382
383	buf = NULL;
384	bp = NULL;
385
386	sync = arg;
387	p = sync->to;
388	p->synced = 0;
389	p->flags |= GV_PLEX_SYNCING;
390	cp = p->consumer;
391
392	g_topology_lock();
393	error = g_access(cp, 1, 1, 0);
394	if (error) {
395		g_topology_unlock();
396		printf("GEOM_VINUM: rebuild of %s failed to access consumer: "
397		    "%d\n", p->name, error);
398		kthread_exit(error);
399	}
400	g_topology_unlock();
401
402	buf = g_malloc(sync->syncsize, M_WAITOK);
403
404	printf("GEOM_VINUM: rebuild of %s started\n", p->name);
405	i = 0;
406	for (i = 0; i < p->size; i += (p->stripesize * (p->sdcount - 1))) {
407/*
408		if (i + sync->syncsize > p->size)
409			sync->syncsize = p->size - i;
410*/
411		bp = g_new_bio();
412		if (bp == NULL) {
413			printf("GEOM_VINUM: rebuild of %s failed creating bio: "
414			    "out of memory\n", p->name);
415			break;
416		}
417		bp->bio_cmd = BIO_WRITE;
418		bp->bio_done = NULL;
419		bp->bio_data = buf;
420		bp->bio_cflags |= GV_BIO_REBUILD;
421		bp->bio_offset = i;
422		bp->bio_length = p->stripesize;
423
424		/* Schedule it down ... */
425		g_io_request(bp, cp);
426
427		/* ... and wait for the result. */
428		error = biowait(bp, "gwrite");
429		if (error) {
430			printf("GEOM_VINUM: rebuild of %s failed at offset %jd "
431			    "errno: %d\n", p->name, i, error);
432			break;
433		}
434		g_destroy_bio(bp);
435		bp = NULL;
436	}
437
438	if (bp != NULL)
439		g_destroy_bio(bp);
440	if (buf != NULL)
441		g_free(buf);
442
443	g_topology_lock();
444	g_access(cp, -1, -1, 0);
445	gv_save_config_all(p->vinumconf);
446	g_topology_unlock();
447
448	p->flags &= ~GV_PLEX_SYNCING;
449	p->synced = 0;
450
451	/* Successful initialization. */
452	if (!error)
453		printf("GEOM_VINUM: rebuild of %s finished\n", p->name);
454
455	g_free(sync);
456	kthread_exit(error);
457}
458
459void
460gv_sync_td(void *arg)
461{
462	struct bio *bp;
463	struct gv_plex *p;
464	struct g_consumer *from, *to;
465	struct gv_sync_args *sync;
466	u_char *buf;
467	off_t i;
468	int error;
469
470	sync = arg;
471
472	from = sync->from->consumer;
473	to = sync->to->consumer;
474
475	p = sync->to;
476
477	if (p->flags & GV_PLEX_SYNCING) {
478		printf("GEOM_VINUM: plex '%s' is already syncing.\n", p->name);
479		g_free(sync);
480		kthread_exit(0);
481	}
482
483	p->synced = 0;
484	p->flags |= GV_PLEX_SYNCING;
485
486	error = 0;
487
488	g_topology_lock();
489	error = g_access(from, 1, 0, 0);
490	if (error) {
491		g_topology_unlock();
492		printf("GEOM_VINUM: sync from '%s' failed to access "
493		    "consumer: %d\n", sync->from->name, error);
494		g_free(sync);
495		kthread_exit(error);
496	}
497	error = g_access(to, 0, 1, 0);
498	if (error) {
499		g_access(from, -1, 0, 0);
500		g_topology_unlock();
501		printf("GEOM_VINUM: sync to '%s' failed to access "
502		    "consumer: %d\n", p->name, error);
503		g_free(sync);
504		kthread_exit(error);
505	}
506	g_topology_unlock();
507
508	printf("GEOM_VINUM: plex sync %s -> %s started\n", sync->from->name,
509	    sync->to->name);
510	for (i = 0; i < p->size; i+= sync->syncsize) {
511		/* Read some bits from the good plex. */
512		buf = g_read_data(from, i, sync->syncsize, &error);
513		if (buf == NULL) {
514			printf("GEOM_VINUM: sync read from '%s' failed at "
515			    "offset %jd; errno: %d\n", sync->from->name, i,
516			    error);
517			break;
518		}
519
520		/*
521		 * Create a bio and schedule it down on the 'bad' plex.  We
522		 * cannot simply use g_write_data() because we have to let the
523		 * lower parts know that we are an initialization process and
524		 * not a 'normal' request.
525		 */
526		bp = g_new_bio();
527		if (bp == NULL) {
528			printf("GEOM_VINUM: sync write to '%s' failed at "
529			    "offset %jd; out of memory\n", p->name, i);
530			g_free(buf);
531			break;
532		}
533		bp->bio_cmd = BIO_WRITE;
534		bp->bio_offset = i;
535		bp->bio_length = sync->syncsize;
536		bp->bio_data = buf;
537		bp->bio_done = NULL;
538
539		/*
540		 * This hack declare this bio as part of an initialization
541		 * process, so that the lower levels allow it to get through.
542		 */
543		bp->bio_cflags |= GV_BIO_SYNCREQ;
544
545		/* Schedule it down ... */
546		g_io_request(bp, to);
547
548		/* ... and wait for the result. */
549		error = biowait(bp, "gwrite");
550		g_destroy_bio(bp);
551		g_free(buf);
552		if (error) {
553			printf("GEOM_VINUM: sync write to '%s' failed at "
554			    "offset %jd; errno: %d\n", p->name, i, error);
555			break;
556		}
557
558		/* Note that we have synced a little bit more. */
559		p->synced += sync->syncsize;
560	}
561
562	g_topology_lock();
563	g_access(from, -1, 0, 0);
564	g_access(to, 0, -1, 0);
565	gv_save_config_all(p->vinumconf);
566	g_topology_unlock();
567
568	/* Successful initialization. */
569	if (!error)
570		printf("GEOM_VINUM: plex sync %s -> %s finished\n",
571		    sync->from->name, sync->to->name);
572
573	p->flags &= ~GV_PLEX_SYNCING;
574	p->synced = 0;
575
576	g_free(sync);
577	kthread_exit(error);
578}
579
580void
581gv_init_td(void *arg)
582{
583	struct gv_sd *s;
584	struct gv_drive *d;
585	struct g_geom *gp;
586	struct g_consumer *cp;
587	int error;
588	off_t i, init_size, start, offset, length;
589	u_char *buf;
590
591	s = arg;
592	KASSERT(s != NULL, ("gv_init_td: NULL s"));
593	d = s->drive_sc;
594	KASSERT(d != NULL, ("gv_init_td: NULL d"));
595	gp = d->geom;
596	KASSERT(gp != NULL, ("gv_init_td: NULL gp"));
597
598	cp = LIST_FIRST(&gp->consumer);
599	KASSERT(cp != NULL, ("gv_init_td: NULL cp"));
600
601	s->init_error = 0;
602	init_size = s->init_size;
603	start = s->drive_offset + s->initialized;
604	offset = s->drive_offset;
605	length = s->size;
606
607	buf = g_malloc(s->init_size, M_WAITOK | M_ZERO);
608
609	g_topology_lock();
610	error = g_access(cp, 0, 1, 0);
611	if (error) {
612		s->init_error = error;
613		g_topology_unlock();
614		printf("geom_vinum: init '%s' failed to access consumer: %d\n",
615		    s->name, error);
616		kthread_exit(error);
617	}
618	g_topology_unlock();
619
620	for (i = start; i < offset + length; i += init_size) {
621		if (s->flags & GV_SD_INITCANCEL) {
622			printf("geom_vinum: subdisk '%s' init: cancelled at"
623			    " offset %jd (drive offset %jd)\n", s->name,
624			    (intmax_t)s->initialized, (intmax_t)i);
625			error = EAGAIN;
626			break;
627		}
628		error = g_write_data(cp, i, buf, init_size);
629		if (error) {
630			printf("geom_vinum: subdisk '%s' init: write failed"
631			    " at offset %jd (drive offset %jd)\n", s->name,
632			    (intmax_t)s->initialized, (intmax_t)i);
633			break;
634		}
635		s->initialized += init_size;
636	}
637
638	g_free(buf);
639
640	g_topology_lock();
641	g_access(cp, 0, -1, 0);
642	g_topology_unlock();
643	if (error) {
644		s->init_error = error;
645		g_topology_lock();
646		gv_set_sd_state(s, GV_SD_STALE,
647		    GV_SETSTATE_FORCE | GV_SETSTATE_CONFIG);
648		g_topology_unlock();
649	} else {
650		g_topology_lock();
651		gv_set_sd_state(s, GV_SD_UP, GV_SETSTATE_CONFIG);
652		g_topology_unlock();
653		s->initialized = 0;
654		printf("geom_vinum: init '%s' finished\n", s->name);
655	}
656	kthread_exit(error);
657}
658