1/*-
2 * Copyright (c) 2004, 2007 Lukas Ertl
3 * Copyright (c) 2007, 2009 Ulf Lilleengen
4 * Copyright (c) 1997, 1998, 1999
5 *      Nan Yang Computer Services Limited.  All rights reserved.
6 *
7 *  Parts written by Greg Lehey
8 *
9 *  This software is distributed under the so-called ``Berkeley
10 *  License'':
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 *    notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 *    notice, this list of conditions and the following disclaimer in the
19 *    documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 *    must display the following acknowledgement:
22 *      This product includes software developed by Nan Yang Computer
23 *      Services Limited.
24 * 4. Neither the name of the Company nor the names of its contributors
25 *    may be used to endorse or promote products derived from this software
26 *    without specific prior written permission.
27 *
28 * This software is provided ``as is'', and any express or implied
29 * warranties, including, but not limited to, the implied warranties of
30 * merchantability and fitness for a particular purpose are disclaimed.
31 * In no event shall the company or contributors be liable for any
32 * direct, indirect, incidental, special, exemplary, or consequential
33 * damages (including, but not limited to, procurement of substitute
34 * goods or services; loss of use, data, or profits; or business
35 * interruption) however caused and on any theory of liability, whether
36 * in contract, strict liability, or tort (including negligence or
37 * otherwise) arising in any way out of the use of this software, even if
38 * advised of the possibility of such damage.
39 *
40 */
41
42#include <sys/cdefs.h>
43__FBSDID("$FreeBSD: releng/10.3/sys/geom/vinum/geom_vinum_subr.c 223921 2011-07-11 05:22:31Z ae $");
44
45#include <sys/param.h>
46#include <sys/malloc.h>
47#include <sys/sbuf.h>
48#include <sys/systm.h>
49
50#include <geom/geom.h>
51#include <geom/vinum/geom_vinum_var.h>
52#include <geom/vinum/geom_vinum.h>
53#include <geom/vinum/geom_vinum_share.h>
54
55int	gv_drive_is_newer(struct gv_softc *, struct gv_drive *);
56static off_t gv_plex_smallest_sd(struct gv_plex *);
57
58void
59gv_parse_config(struct gv_softc *sc, char *buf, struct gv_drive *d)
60{
61	char *aptr, *bptr, *cptr;
62	struct gv_volume *v, *v2;
63	struct gv_plex *p, *p2;
64	struct gv_sd *s, *s2;
65	int error, is_newer, tokens;
66	char *token[GV_MAXARGS];
67
68	is_newer = gv_drive_is_newer(sc, d);
69
70	/* Until the end of the string *buf. */
71	for (aptr = buf; *aptr != '\0'; aptr = bptr) {
72		bptr = aptr;
73		cptr = aptr;
74
75		/* Seperate input lines. */
76		while (*bptr != '\n')
77			bptr++;
78		*bptr = '\0';
79		bptr++;
80
81		tokens = gv_tokenize(cptr, token, GV_MAXARGS);
82
83		if (tokens <= 0)
84			continue;
85
86		if (!strcmp(token[0], "volume")) {
87			v = gv_new_volume(tokens, token);
88			if (v == NULL) {
89				G_VINUM_DEBUG(0, "config parse failed volume");
90				break;
91			}
92
93			v2 = gv_find_vol(sc, v->name);
94			if (v2 != NULL) {
95				if (is_newer) {
96					v2->state = v->state;
97					G_VINUM_DEBUG(2, "newer volume found!");
98				}
99				g_free(v);
100				continue;
101			}
102
103			gv_create_volume(sc, v);
104
105		} else if (!strcmp(token[0], "plex")) {
106			p = gv_new_plex(tokens, token);
107			if (p == NULL) {
108				G_VINUM_DEBUG(0, "config parse failed plex");
109				break;
110			}
111
112			p2 = gv_find_plex(sc, p->name);
113			if (p2 != NULL) {
114				/* XXX */
115				if (is_newer) {
116					p2->state = p->state;
117					G_VINUM_DEBUG(2, "newer plex found!");
118				}
119				g_free(p);
120				continue;
121			}
122
123			error = gv_create_plex(sc, p);
124			if (error)
125				continue;
126			/*
127			 * These flags were set in gv_create_plex() and are not
128			 * needed here (on-disk config parsing).
129			 */
130			p->flags &= ~GV_PLEX_ADDED;
131
132		} else if (!strcmp(token[0], "sd")) {
133			s = gv_new_sd(tokens, token);
134
135			if (s == NULL) {
136				G_VINUM_DEBUG(0, "config parse failed subdisk");
137				break;
138			}
139
140			s2 = gv_find_sd(sc, s->name);
141			if (s2 != NULL) {
142				/* XXX */
143				if (is_newer) {
144					s2->state = s->state;
145					G_VINUM_DEBUG(2, "newer subdisk found!");
146				}
147				g_free(s);
148				continue;
149			}
150
151			/*
152			 * Signal that this subdisk was tasted, and could
153			 * possibly reference a drive that isn't in our config
154			 * yet.
155			 */
156			s->flags |= GV_SD_TASTED;
157
158			if (s->state == GV_SD_UP)
159				s->flags |= GV_SD_CANGOUP;
160
161			error = gv_create_sd(sc, s);
162			if (error)
163				continue;
164
165			/*
166			 * This flag was set in gv_create_sd() and is not
167			 * needed here (on-disk config parsing).
168			 */
169			s->flags &= ~GV_SD_NEWBORN;
170			s->flags &= ~GV_SD_GROW;
171		}
172	}
173}
174
175/*
176 * Format the vinum configuration properly.  If ondisk is non-zero then the
177 * configuration is intended to be written to disk later.
178 */
179void
180gv_format_config(struct gv_softc *sc, struct sbuf *sb, int ondisk, char *prefix)
181{
182	struct gv_drive *d;
183	struct gv_sd *s;
184	struct gv_plex *p;
185	struct gv_volume *v;
186
187	/*
188	 * We don't need the drive configuration if we're not writing the
189	 * config to disk.
190	 */
191	if (!ondisk) {
192		LIST_FOREACH(d, &sc->drives, drive) {
193			sbuf_printf(sb, "%sdrive %s device /dev/%s\n", prefix,
194			    d->name, d->device);
195		}
196	}
197
198	LIST_FOREACH(v, &sc->volumes, volume) {
199		if (!ondisk)
200			sbuf_printf(sb, "%s", prefix);
201		sbuf_printf(sb, "volume %s", v->name);
202		if (ondisk)
203			sbuf_printf(sb, " state %s", gv_volstate(v->state));
204		sbuf_printf(sb, "\n");
205	}
206
207	LIST_FOREACH(p, &sc->plexes, plex) {
208		if (!ondisk)
209			sbuf_printf(sb, "%s", prefix);
210		sbuf_printf(sb, "plex name %s org %s ", p->name,
211		    gv_plexorg(p->org));
212		if (gv_is_striped(p))
213			sbuf_printf(sb, "%ds ", p->stripesize / 512);
214		if (p->vol_sc != NULL)
215			sbuf_printf(sb, "vol %s", p->volume);
216		if (ondisk)
217			sbuf_printf(sb, " state %s", gv_plexstate(p->state));
218		sbuf_printf(sb, "\n");
219	}
220
221	LIST_FOREACH(s, &sc->subdisks, sd) {
222		if (!ondisk)
223			sbuf_printf(sb, "%s", prefix);
224		sbuf_printf(sb, "sd name %s drive %s len %jds driveoffset "
225		    "%jds", s->name, s->drive, s->size / 512,
226		    s->drive_offset / 512);
227		if (s->plex_sc != NULL) {
228			sbuf_printf(sb, " plex %s plexoffset %jds", s->plex,
229			    s->plex_offset / 512);
230		}
231		if (ondisk)
232			sbuf_printf(sb, " state %s", gv_sdstate(s->state));
233		sbuf_printf(sb, "\n");
234	}
235}
236
237static off_t
238gv_plex_smallest_sd(struct gv_plex *p)
239{
240	struct gv_sd *s;
241	off_t smallest;
242
243	KASSERT(p != NULL, ("gv_plex_smallest_sd: NULL p"));
244
245	s = LIST_FIRST(&p->subdisks);
246	if (s == NULL)
247		return (-1);
248	smallest = s->size;
249	LIST_FOREACH(s, &p->subdisks, in_plex) {
250		if (s->size < smallest)
251			smallest = s->size;
252	}
253	return (smallest);
254}
255
256/* Walk over plexes in a volume and count how many are down. */
257int
258gv_plexdown(struct gv_volume *v)
259{
260	int plexdown;
261	struct gv_plex *p;
262
263	KASSERT(v != NULL, ("gv_plexdown: NULL v"));
264
265	plexdown = 0;
266
267	LIST_FOREACH(p, &v->plexes, plex) {
268		if (p->state == GV_PLEX_DOWN)
269			plexdown++;
270	}
271	return (plexdown);
272}
273
274int
275gv_sd_to_plex(struct gv_sd *s, struct gv_plex *p)
276{
277	struct gv_sd *s2;
278	off_t psizeorig, remainder, smallest;
279
280	/* If this subdisk was already given to this plex, do nothing. */
281	if (s->plex_sc == p)
282		return (0);
283
284	/* Check correct size of this subdisk. */
285	s2 = LIST_FIRST(&p->subdisks);
286	/* Adjust the subdisk-size if necessary. */
287	if (s2 != NULL && gv_is_striped(p)) {
288		/* First adjust to the stripesize. */
289		remainder = s->size % p->stripesize;
290
291		if (remainder) {
292			G_VINUM_DEBUG(1, "size of sd %s is not a "
293			    "multiple of plex stripesize, taking off "
294			    "%jd bytes", s->name,
295			    (intmax_t)remainder);
296			gv_adjust_freespace(s, remainder);
297		}
298
299		smallest = gv_plex_smallest_sd(p);
300		/* Then take off extra if other subdisks are smaller. */
301		remainder = s->size - smallest;
302
303		/*
304		 * Don't allow a remainder below zero for running plexes, it's too
305		 * painful, and if someone were to accidentally do this, the
306		 * resulting array might be smaller than the original... not god
307		 */
308		if (remainder < 0) {
309			if (!(p->flags & GV_PLEX_NEWBORN)) {
310				G_VINUM_DEBUG(0, "sd %s too small for plex %s!",
311				    s->name, p->name);
312				return (GV_ERR_BADSIZE);
313			}
314			/* Adjust other subdisks. */
315			LIST_FOREACH(s2, &p->subdisks, in_plex) {
316				G_VINUM_DEBUG(1, "size of sd %s is to big, "
317				    "taking off %jd bytes", s->name,
318				    (intmax_t)remainder);
319				gv_adjust_freespace(s2, (remainder * -1));
320			}
321		} else if (remainder > 0) {
322			G_VINUM_DEBUG(1, "size of sd %s is to big, "
323			    "taking off %jd bytes", s->name,
324			    (intmax_t)remainder);
325			gv_adjust_freespace(s, remainder);
326		}
327	}
328
329	/* Find the correct plex offset for this subdisk, if needed. */
330	if (s->plex_offset == -1) {
331		/*
332		 * First set it to 0 to catch the case where we had a detached
333		 * subdisk that didn't get any good offset.
334		 */
335		s->plex_offset = 0;
336		if (p->sdcount) {
337			LIST_FOREACH(s2, &p->subdisks, in_plex) {
338				if (gv_is_striped(p))
339					s->plex_offset = p->sdcount *
340					    p->stripesize;
341				else
342					s->plex_offset = s2->plex_offset +
343					    s2->size;
344			}
345		}
346	}
347
348	/* There are no subdisks for this plex yet, just insert it. */
349	if (LIST_EMPTY(&p->subdisks)) {
350		LIST_INSERT_HEAD(&p->subdisks, s, in_plex);
351
352	/* Insert in correct order, depending on plex_offset. */
353	} else {
354		LIST_FOREACH(s2, &p->subdisks, in_plex) {
355			if (s->plex_offset < s2->plex_offset) {
356				LIST_INSERT_BEFORE(s2, s, in_plex);
357				break;
358			} else if (LIST_NEXT(s2, in_plex) == NULL) {
359				LIST_INSERT_AFTER(s2, s, in_plex);
360				break;
361			}
362		}
363	}
364
365	s->plex_sc = p;
366        /* Adjust the size of our plex. We check if the plex misses a subdisk,
367	 * so we don't make the plex smaller than it actually should be.
368	 */
369	psizeorig = p->size;
370	p->size = gv_plex_size(p);
371	/* Make sure the size is not changed. */
372	if (p->sddetached > 0) {
373		if (p->size < psizeorig) {
374			p->size = psizeorig;
375			/* We make sure wee need another subdisk. */
376			if (p->sddetached == 1)
377				p->sddetached++;
378		}
379		p->sddetached--;
380	} else {
381		if ((p->org == GV_PLEX_RAID5 ||
382		    p->org == GV_PLEX_STRIPED) &&
383		    !(p->flags & GV_PLEX_NEWBORN) &&
384		    p->state == GV_PLEX_UP) {
385			s->flags |= GV_SD_GROW;
386		}
387		p->sdcount++;
388	}
389
390	return (0);
391}
392
393void
394gv_update_vol_size(struct gv_volume *v, off_t size)
395{
396	if (v == NULL)
397		return;
398	if (v->provider != NULL) {
399		g_topology_lock();
400		v->provider->mediasize = size;
401		g_topology_unlock();
402	}
403	v->size = size;
404}
405
406/* Return how many subdisks that constitute the original plex. */
407int
408gv_sdcount(struct gv_plex *p, int growing)
409{
410	struct gv_sd *s;
411	int sdcount;
412
413	sdcount = p->sdcount;
414	if (growing) {
415		LIST_FOREACH(s, &p->subdisks, in_plex) {
416			if (s->flags & GV_SD_GROW)
417				sdcount--;
418		}
419	}
420
421	return (sdcount);
422}
423
424/* Calculates the plex size. */
425off_t
426gv_plex_size(struct gv_plex *p)
427{
428	struct gv_sd *s;
429	off_t size;
430	int sdcount;
431
432	KASSERT(p != NULL, ("gv_plex_size: NULL p"));
433
434	/* Adjust the size of our plex. */
435	size = 0;
436	sdcount = gv_sdcount(p, 1);
437	switch (p->org) {
438	case GV_PLEX_CONCAT:
439		LIST_FOREACH(s, &p->subdisks, in_plex)
440			size += s->size;
441		break;
442	case GV_PLEX_STRIPED:
443		s = LIST_FIRST(&p->subdisks);
444		size = ((s != NULL) ? (sdcount * s->size) : 0);
445		break;
446	case GV_PLEX_RAID5:
447		s = LIST_FIRST(&p->subdisks);
448		size = ((s != NULL) ? ((sdcount - 1) * s->size) : 0);
449		break;
450	}
451
452	return (size);
453}
454
455/* Returns the size of a volume. */
456off_t
457gv_vol_size(struct gv_volume *v)
458{
459	struct gv_plex *p;
460	off_t minplexsize;
461
462	KASSERT(v != NULL, ("gv_vol_size: NULL v"));
463
464	p = LIST_FIRST(&v->plexes);
465	if (p == NULL)
466		return (0);
467
468	minplexsize = p->size;
469	LIST_FOREACH(p, &v->plexes, in_volume) {
470		if (p->size < minplexsize) {
471			minplexsize = p->size;
472		}
473	}
474	return (minplexsize);
475}
476
477void
478gv_update_plex_config(struct gv_plex *p)
479{
480	struct gv_sd *s, *s2;
481	off_t remainder;
482	int required_sds, state;
483
484	KASSERT(p != NULL, ("gv_update_plex_config: NULL p"));
485
486	/* The plex was added to an already running volume. */
487	if (p->flags & GV_PLEX_ADDED)
488		gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE);
489
490	switch (p->org) {
491	case GV_PLEX_STRIPED:
492		required_sds = 2;
493		break;
494	case GV_PLEX_RAID5:
495		required_sds = 3;
496		break;
497	case GV_PLEX_CONCAT:
498	default:
499		required_sds = 0;
500		break;
501	}
502
503	if (required_sds) {
504		if (p->sdcount < required_sds) {
505			gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE);
506		}
507
508		/*
509		 * The subdisks in striped plexes must all have the same size.
510		 */
511		s = LIST_FIRST(&p->subdisks);
512		LIST_FOREACH(s2, &p->subdisks, in_plex) {
513			if (s->size != s2->size) {
514				G_VINUM_DEBUG(0, "subdisk size mismatch %s"
515				    "(%jd) <> %s (%jd)", s->name, s->size,
516				    s2->name, s2->size);
517				gv_set_plex_state(p, GV_PLEX_DOWN,
518				    GV_SETSTATE_FORCE);
519			}
520		}
521
522		LIST_FOREACH(s, &p->subdisks, in_plex) {
523			/* Trim subdisk sizes to match the stripe size. */
524			remainder = s->size % p->stripesize;
525			if (remainder) {
526				G_VINUM_DEBUG(1, "size of sd %s is not a "
527				    "multiple of plex stripesize, taking off "
528				    "%jd bytes", s->name, (intmax_t)remainder);
529				gv_adjust_freespace(s, remainder);
530			}
531		}
532	}
533
534	p->size = gv_plex_size(p);
535	if (p->sdcount == 0)
536		gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE);
537	else if (p->org == GV_PLEX_RAID5 && p->flags & GV_PLEX_NEWBORN) {
538		LIST_FOREACH(s, &p->subdisks, in_plex)
539			gv_set_sd_state(s, GV_SD_UP, GV_SETSTATE_FORCE);
540		/* If added to a volume, we want the plex to be down. */
541		state = (p->flags & GV_PLEX_ADDED) ? GV_PLEX_DOWN : GV_PLEX_UP;
542		gv_set_plex_state(p, state, GV_SETSTATE_FORCE);
543		p->flags &= ~GV_PLEX_ADDED;
544	} else if (p->flags & GV_PLEX_ADDED) {
545		LIST_FOREACH(s, &p->subdisks, in_plex)
546			gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE);
547		gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE);
548		p->flags &= ~GV_PLEX_ADDED;
549	} else if (p->state == GV_PLEX_UP) {
550		LIST_FOREACH(s, &p->subdisks, in_plex) {
551			if (s->flags & GV_SD_GROW) {
552				gv_set_plex_state(p, GV_PLEX_GROWABLE,
553				    GV_SETSTATE_FORCE);
554				break;
555			}
556		}
557	}
558	/* Our plex is grown up now. */
559	p->flags &= ~GV_PLEX_NEWBORN;
560}
561
562/*
563 * Give a subdisk to a drive, check and adjust several parameters, adjust
564 * freelist.
565 */
566int
567gv_sd_to_drive(struct gv_sd *s, struct gv_drive *d)
568{
569	struct gv_sd *s2;
570	struct gv_freelist *fl, *fl2;
571	off_t tmp;
572	int i;
573
574	fl2 = NULL;
575
576	/* Shortcut for "referenced" drives. */
577	if (d->flags & GV_DRIVE_REFERENCED) {
578		s->drive_sc = d;
579		return (0);
580	}
581
582	/* Check if this subdisk was already given to this drive. */
583	if (s->drive_sc != NULL) {
584		if (s->drive_sc == d) {
585			if (!(s->flags & GV_SD_TASTED)) {
586				return (0);
587			}
588		} else {
589			G_VINUM_DEBUG(0, "error giving subdisk '%s' to '%s' "
590			    "(already on '%s')", s->name, d->name,
591			    s->drive_sc->name);
592			return (GV_ERR_ISATTACHED);
593		}
594	}
595
596	/* Preliminary checks. */
597	if ((s->size > d->avail) || (d->freelist_entries == 0)) {
598		G_VINUM_DEBUG(0, "not enough space on '%s' for '%s'", d->name,
599		    s->name);
600		return (GV_ERR_NOSPACE);
601	}
602
603	/* If no size was given for this subdisk, try to auto-size it... */
604	if (s->size == -1) {
605		/* Find the largest available slot. */
606		LIST_FOREACH(fl, &d->freelist, freelist) {
607			if (fl->size < s->size)
608				continue;
609			s->size = fl->size;
610			s->drive_offset = fl->offset;
611			fl2 = fl;
612		}
613
614		/* No good slot found? */
615		if (s->size == -1) {
616			G_VINUM_DEBUG(0, "unable to autosize '%s' on '%s'",
617			    s->name, d->name);
618			return (GV_ERR_BADSIZE);
619		}
620
621	/*
622	 * ... or check if we have a free slot that's large enough for the
623	 * given size.
624	 */
625	} else {
626		i = 0;
627		LIST_FOREACH(fl, &d->freelist, freelist) {
628			if (fl->size < s->size)
629				continue;
630			/* Assign drive offset, if not given. */
631			if (s->drive_offset == -1)
632				s->drive_offset = fl->offset;
633			fl2 = fl;
634			i++;
635			break;
636		}
637
638		/* Couldn't find a good free slot. */
639		if (i == 0) {
640			G_VINUM_DEBUG(0, "free slots to small for '%s' on '%s'",
641			    s->name, d->name);
642			return (GV_ERR_NOSPACE);
643		}
644	}
645
646	/* No drive offset given, try to calculate it. */
647	if (s->drive_offset == -1) {
648
649		/* Add offsets and sizes from other subdisks on this drive. */
650		LIST_FOREACH(s2, &d->subdisks, from_drive) {
651			s->drive_offset = s2->drive_offset + s2->size;
652		}
653
654		/*
655		 * If there are no other subdisks yet, then set the default
656		 * offset to GV_DATA_START.
657		 */
658		if (s->drive_offset == -1)
659			s->drive_offset = GV_DATA_START;
660
661	/* Check if we have a free slot at the given drive offset. */
662	} else {
663		i = 0;
664		LIST_FOREACH(fl, &d->freelist, freelist) {
665			/* Yes, this subdisk fits. */
666			if ((fl->offset <= s->drive_offset) &&
667			    (fl->offset + fl->size >=
668			    s->drive_offset + s->size)) {
669				i++;
670				fl2 = fl;
671				break;
672			}
673		}
674
675		/* Couldn't find a good free slot. */
676		if (i == 0) {
677			G_VINUM_DEBUG(0, "given drive_offset for '%s' won't fit "
678			    "on '%s'", s->name, d->name);
679			return (GV_ERR_NOSPACE);
680		}
681	}
682
683	/*
684	 * Now that all parameters are checked and set up, we can give the
685	 * subdisk to the drive and adjust the freelist.
686	 */
687
688	/* First, adjust the freelist. */
689	LIST_FOREACH(fl, &d->freelist, freelist) {
690		/* Look for the free slot that we have found before. */
691		if (fl != fl2)
692			continue;
693
694		/* The subdisk starts at the beginning of the free slot. */
695		if (fl->offset == s->drive_offset) {
696			fl->offset += s->size;
697			fl->size -= s->size;
698
699			/* The subdisk uses the whole slot, so remove it. */
700			if (fl->size == 0) {
701				d->freelist_entries--;
702				LIST_REMOVE(fl, freelist);
703			}
704		/*
705		 * The subdisk does not start at the beginning of the free
706		 * slot.
707		 */
708		} else {
709			tmp = fl->offset + fl->size;
710			fl->size = s->drive_offset - fl->offset;
711
712			/*
713			 * The subdisk didn't use the complete rest of the free
714			 * slot, so we need to split it.
715			 */
716			if (s->drive_offset + s->size != tmp) {
717				fl2 = g_malloc(sizeof(*fl2), M_WAITOK | M_ZERO);
718				fl2->offset = s->drive_offset + s->size;
719				fl2->size = tmp - fl2->offset;
720				LIST_INSERT_AFTER(fl, fl2, freelist);
721				d->freelist_entries++;
722			}
723		}
724		break;
725	}
726
727	/*
728	 * This is the first subdisk on this drive, just insert it into the
729	 * list.
730	 */
731	if (LIST_EMPTY(&d->subdisks)) {
732		LIST_INSERT_HEAD(&d->subdisks, s, from_drive);
733
734	/* There are other subdisks, so insert this one in correct order. */
735	} else {
736		LIST_FOREACH(s2, &d->subdisks, from_drive) {
737			if (s->drive_offset < s2->drive_offset) {
738				LIST_INSERT_BEFORE(s2, s, from_drive);
739				break;
740			} else if (LIST_NEXT(s2, from_drive) == NULL) {
741				LIST_INSERT_AFTER(s2, s, from_drive);
742				break;
743			}
744		}
745	}
746
747	d->sdcount++;
748	d->avail -= s->size;
749
750	s->flags &= ~GV_SD_TASTED;
751
752	/* Link back from the subdisk to this drive. */
753	s->drive_sc = d;
754
755	return (0);
756}
757
758void
759gv_free_sd(struct gv_sd *s)
760{
761	struct gv_drive *d;
762	struct gv_freelist *fl, *fl2;
763
764	KASSERT(s != NULL, ("gv_free_sd: NULL s"));
765
766	d = s->drive_sc;
767	if (d == NULL)
768		return;
769
770	/*
771	 * First, find the free slot that's immediately before or after this
772	 * subdisk.
773	 */
774	fl = NULL;
775	LIST_FOREACH(fl, &d->freelist, freelist) {
776		if (fl->offset == s->drive_offset + s->size)
777			break;
778		if (fl->offset + fl->size == s->drive_offset)
779			break;
780	}
781
782	/* If there is no free slot behind this subdisk, so create one. */
783	if (fl == NULL) {
784
785		fl = g_malloc(sizeof(*fl), M_WAITOK | M_ZERO);
786		fl->size = s->size;
787		fl->offset = s->drive_offset;
788
789		if (d->freelist_entries == 0) {
790			LIST_INSERT_HEAD(&d->freelist, fl, freelist);
791		} else {
792			LIST_FOREACH(fl2, &d->freelist, freelist) {
793				if (fl->offset < fl2->offset) {
794					LIST_INSERT_BEFORE(fl2, fl, freelist);
795					break;
796				} else if (LIST_NEXT(fl2, freelist) == NULL) {
797					LIST_INSERT_AFTER(fl2, fl, freelist);
798					break;
799				}
800			}
801		}
802
803		d->freelist_entries++;
804
805	/* Expand the free slot we just found. */
806	} else {
807		fl->size += s->size;
808		if (fl->offset > s->drive_offset)
809			fl->offset = s->drive_offset;
810	}
811
812	d->avail += s->size;
813	d->sdcount--;
814}
815
816void
817gv_adjust_freespace(struct gv_sd *s, off_t remainder)
818{
819	struct gv_drive *d;
820	struct gv_freelist *fl, *fl2;
821
822	KASSERT(s != NULL, ("gv_adjust_freespace: NULL s"));
823	d = s->drive_sc;
824	KASSERT(d != NULL, ("gv_adjust_freespace: NULL d"));
825
826	/* First, find the free slot that's immediately after this subdisk. */
827	fl = NULL;
828	LIST_FOREACH(fl, &d->freelist, freelist) {
829		if (fl->offset == s->drive_offset + s->size)
830			break;
831	}
832
833	/* If there is no free slot behind this subdisk, so create one. */
834	if (fl == NULL) {
835
836		fl = g_malloc(sizeof(*fl), M_WAITOK | M_ZERO);
837		fl->size = remainder;
838		fl->offset = s->drive_offset + s->size - remainder;
839
840		if (d->freelist_entries == 0) {
841			LIST_INSERT_HEAD(&d->freelist, fl, freelist);
842		} else {
843			LIST_FOREACH(fl2, &d->freelist, freelist) {
844				if (fl->offset < fl2->offset) {
845					LIST_INSERT_BEFORE(fl2, fl, freelist);
846					break;
847				} else if (LIST_NEXT(fl2, freelist) == NULL) {
848					LIST_INSERT_AFTER(fl2, fl, freelist);
849					break;
850				}
851			}
852		}
853
854		d->freelist_entries++;
855
856	/* Expand the free slot we just found. */
857	} else {
858		fl->offset -= remainder;
859		fl->size += remainder;
860	}
861
862	s->size -= remainder;
863	d->avail += remainder;
864}
865
866/* Check if the given plex is a striped one. */
867int
868gv_is_striped(struct gv_plex *p)
869{
870	KASSERT(p != NULL, ("gv_is_striped: NULL p"));
871	switch(p->org) {
872	case GV_PLEX_STRIPED:
873	case GV_PLEX_RAID5:
874		return (1);
875	default:
876		return (0);
877	}
878}
879
880/* Find a volume by name. */
881struct gv_volume *
882gv_find_vol(struct gv_softc *sc, char *name)
883{
884	struct gv_volume *v;
885
886	LIST_FOREACH(v, &sc->volumes, volume) {
887		if (!strncmp(v->name, name, GV_MAXVOLNAME))
888			return (v);
889	}
890
891	return (NULL);
892}
893
894/* Find a plex by name. */
895struct gv_plex *
896gv_find_plex(struct gv_softc *sc, char *name)
897{
898	struct gv_plex *p;
899
900	LIST_FOREACH(p, &sc->plexes, plex) {
901		if (!strncmp(p->name, name, GV_MAXPLEXNAME))
902			return (p);
903	}
904
905	return (NULL);
906}
907
908/* Find a subdisk by name. */
909struct gv_sd *
910gv_find_sd(struct gv_softc *sc, char *name)
911{
912	struct gv_sd *s;
913
914	LIST_FOREACH(s, &sc->subdisks, sd) {
915		if (!strncmp(s->name, name, GV_MAXSDNAME))
916			return (s);
917	}
918
919	return (NULL);
920}
921
922/* Find a drive by name. */
923struct gv_drive *
924gv_find_drive(struct gv_softc *sc, char *name)
925{
926	struct gv_drive *d;
927
928	LIST_FOREACH(d, &sc->drives, drive) {
929		if (!strncmp(d->name, name, GV_MAXDRIVENAME))
930			return (d);
931	}
932
933	return (NULL);
934}
935
936/* Find a drive given a device. */
937struct gv_drive *
938gv_find_drive_device(struct gv_softc *sc, char *device)
939{
940	struct gv_drive *d;
941
942	LIST_FOREACH(d, &sc->drives, drive) {
943		if(!strcmp(d->device, device))
944			return (d);
945	}
946
947	return (NULL);
948}
949
950/* Check if any consumer of the given geom is open. */
951int
952gv_consumer_is_open(struct g_consumer *cp)
953{
954	if (cp == NULL)
955		return (0);
956
957	if (cp->acr || cp->acw || cp->ace)
958		return (1);
959
960	return (0);
961}
962
963int
964gv_provider_is_open(struct g_provider *pp)
965{
966	if (pp == NULL)
967		return (0);
968
969	if (pp->acr || pp->acw || pp->ace)
970		return (1);
971
972	return (0);
973}
974
975/*
976 * Compare the modification dates of the drives.
977 * Return 1 if a > b, 0 otherwise.
978 */
979int
980gv_drive_is_newer(struct gv_softc *sc, struct gv_drive *d)
981{
982	struct gv_drive *d2;
983	struct timeval *a, *b;
984
985	KASSERT(!LIST_EMPTY(&sc->drives),
986	    ("gv_is_drive_newer: empty drive list"));
987
988	a = &d->hdr->label.last_update;
989	LIST_FOREACH(d2, &sc->drives, drive) {
990		if ((d == d2) || (d2->state != GV_DRIVE_UP) ||
991		    (d2->hdr == NULL))
992			continue;
993		b = &d2->hdr->label.last_update;
994		if (timevalcmp(a, b, >))
995			return (1);
996	}
997
998	return (0);
999}
1000
1001/* Return the type of object identified by string 'name'. */
1002int
1003gv_object_type(struct gv_softc *sc, char *name)
1004{
1005	struct gv_drive *d;
1006	struct gv_plex *p;
1007	struct gv_sd *s;
1008	struct gv_volume *v;
1009
1010	LIST_FOREACH(v, &sc->volumes, volume) {
1011		if (!strncmp(v->name, name, GV_MAXVOLNAME))
1012			return (GV_TYPE_VOL);
1013	}
1014
1015	LIST_FOREACH(p, &sc->plexes, plex) {
1016		if (!strncmp(p->name, name, GV_MAXPLEXNAME))
1017			return (GV_TYPE_PLEX);
1018	}
1019
1020	LIST_FOREACH(s, &sc->subdisks, sd) {
1021		if (!strncmp(s->name, name, GV_MAXSDNAME))
1022			return (GV_TYPE_SD);
1023	}
1024
1025	LIST_FOREACH(d, &sc->drives, drive) {
1026		if (!strncmp(d->name, name, GV_MAXDRIVENAME))
1027			return (GV_TYPE_DRIVE);
1028	}
1029
1030	return (GV_ERR_NOTFOUND);
1031}
1032
1033void
1034gv_setup_objects(struct gv_softc *sc)
1035{
1036	struct g_provider *pp;
1037	struct gv_volume *v;
1038	struct gv_plex *p;
1039	struct gv_sd *s;
1040	struct gv_drive *d;
1041
1042	LIST_FOREACH(s, &sc->subdisks, sd) {
1043		d = gv_find_drive(sc, s->drive);
1044		if (d != NULL)
1045			gv_sd_to_drive(s, d);
1046		p = gv_find_plex(sc, s->plex);
1047		if (p != NULL)
1048			gv_sd_to_plex(s, p);
1049		gv_update_sd_state(s);
1050	}
1051
1052	LIST_FOREACH(p, &sc->plexes, plex) {
1053		gv_update_plex_config(p);
1054		v = gv_find_vol(sc, p->volume);
1055		if (v != NULL && p->vol_sc != v) {
1056			p->vol_sc = v;
1057			v->plexcount++;
1058			LIST_INSERT_HEAD(&v->plexes, p, in_volume);
1059		}
1060		gv_update_plex_config(p);
1061	}
1062
1063	LIST_FOREACH(v, &sc->volumes, volume) {
1064		v->size = gv_vol_size(v);
1065		if (v->provider == NULL) {
1066			g_topology_lock();
1067			pp = g_new_providerf(sc->geom, "gvinum/%s", v->name);
1068			pp->mediasize = v->size;
1069			pp->sectorsize = 512;    /* XXX */
1070			g_error_provider(pp, 0);
1071			v->provider = pp;
1072			pp->private = v;
1073			g_topology_unlock();
1074		} else if (v->provider->mediasize != v->size) {
1075			g_topology_lock();
1076			v->provider->mediasize = v->size;
1077			g_topology_unlock();
1078		}
1079		v->flags &= ~GV_VOL_NEWBORN;
1080		gv_update_vol_state(v);
1081	}
1082}
1083
1084void
1085gv_cleanup(struct gv_softc *sc)
1086{
1087	struct gv_volume *v, *v2;
1088	struct gv_plex *p, *p2;
1089	struct gv_sd *s, *s2;
1090	struct gv_drive *d, *d2;
1091	struct gv_freelist *fl, *fl2;
1092
1093	mtx_lock(&sc->config_mtx);
1094	LIST_FOREACH_SAFE(v, &sc->volumes, volume, v2) {
1095		LIST_REMOVE(v, volume);
1096		g_free(v->wqueue);
1097		g_free(v);
1098	}
1099	LIST_FOREACH_SAFE(p, &sc->plexes, plex, p2) {
1100		LIST_REMOVE(p, plex);
1101		g_free(p->bqueue);
1102		g_free(p->rqueue);
1103		g_free(p->wqueue);
1104		g_free(p);
1105	}
1106	LIST_FOREACH_SAFE(s, &sc->subdisks, sd, s2) {
1107		LIST_REMOVE(s, sd);
1108		g_free(s);
1109	}
1110	LIST_FOREACH_SAFE(d, &sc->drives, drive, d2) {
1111		LIST_FOREACH_SAFE(fl, &d->freelist, freelist, fl2) {
1112			LIST_REMOVE(fl, freelist);
1113			g_free(fl);
1114		}
1115		LIST_REMOVE(d, drive);
1116		g_free(d->hdr);
1117		g_free(d);
1118	}
1119	mtx_destroy(&sc->config_mtx);
1120}
1121
1122/* General 'attach' routine. */
1123int
1124gv_attach_plex(struct gv_plex *p, struct gv_volume *v, int rename)
1125{
1126	struct gv_sd *s;
1127	struct gv_softc *sc;
1128
1129	g_topology_assert();
1130
1131	sc = p->vinumconf;
1132	KASSERT(sc != NULL, ("NULL sc"));
1133
1134	if (p->vol_sc != NULL) {
1135		G_VINUM_DEBUG(1, "unable to attach %s: already attached to %s",
1136		    p->name, p->volume);
1137		return (GV_ERR_ISATTACHED);
1138	}
1139
1140	/* Stale all subdisks of this plex. */
1141	LIST_FOREACH(s, &p->subdisks, in_plex) {
1142		if (s->state != GV_SD_STALE)
1143			gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE);
1144	}
1145	/* Attach to volume. Make sure volume is not up and running. */
1146	if (gv_provider_is_open(v->provider)) {
1147		G_VINUM_DEBUG(1, "unable to attach %s: volume %s is busy",
1148		    p->name, v->name);
1149		return (GV_ERR_ISBUSY);
1150	}
1151	p->vol_sc = v;
1152	strlcpy(p->volume, v->name, sizeof(p->volume));
1153	v->plexcount++;
1154	if (rename) {
1155		snprintf(p->name, sizeof(p->name), "%s.p%d", v->name,
1156		    v->plexcount);
1157	}
1158	LIST_INSERT_HEAD(&v->plexes, p, in_volume);
1159
1160	/* Get plex up again. */
1161	gv_update_vol_size(v, gv_vol_size(v));
1162	gv_set_plex_state(p, GV_PLEX_UP, 0);
1163	gv_save_config(p->vinumconf);
1164	return (0);
1165}
1166
1167int
1168gv_attach_sd(struct gv_sd *s, struct gv_plex *p, off_t offset, int rename)
1169{
1170	struct gv_sd *s2;
1171	int error, sdcount;
1172
1173	g_topology_assert();
1174
1175	/* If subdisk is attached, don't do it. */
1176	if (s->plex_sc != NULL) {
1177		G_VINUM_DEBUG(1, "unable to attach %s: already attached to %s",
1178		    s->name, s->plex);
1179		return (GV_ERR_ISATTACHED);
1180	}
1181
1182	gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE);
1183	/* First check that this subdisk has a correct offset. If none other
1184	 * starts at the same, and it's correct module stripesize, it is */
1185	if (offset != -1 && offset % p->stripesize != 0)
1186		return (GV_ERR_BADOFFSET);
1187	LIST_FOREACH(s2, &p->subdisks, in_plex) {
1188		if (s2->plex_offset == offset)
1189			return (GV_ERR_BADOFFSET);
1190	}
1191
1192	/* Attach the subdisk to the plex at given offset. */
1193	s->plex_offset = offset;
1194	strlcpy(s->plex, p->name, sizeof(s->plex));
1195
1196	sdcount = p->sdcount;
1197	error = gv_sd_to_plex(s, p);
1198	if (error)
1199		return (error);
1200	gv_update_plex_config(p);
1201
1202	if (rename) {
1203		snprintf(s->name, sizeof(s->name), "%s.s%d", s->plex,
1204		    p->sdcount);
1205	}
1206	if (p->vol_sc != NULL)
1207		gv_update_vol_size(p->vol_sc, gv_vol_size(p->vol_sc));
1208	gv_save_config(p->vinumconf);
1209	/* We don't update the subdisk state since the user might have to
1210	 * initiate a rebuild/sync first. */
1211	return (0);
1212}
1213
1214/* Detach a plex from a volume. */
1215int
1216gv_detach_plex(struct gv_plex *p, int flags)
1217{
1218	struct gv_volume *v;
1219
1220	g_topology_assert();
1221	v = p->vol_sc;
1222
1223	if (v == NULL) {
1224		G_VINUM_DEBUG(1, "unable to detach %s: already detached",
1225		    p->name);
1226		return (0); /* Not an error. */
1227	}
1228
1229	/*
1230	 * Only proceed if forced or volume inactive.
1231	 */
1232	if (!(flags & GV_FLAG_F) && (gv_provider_is_open(v->provider) ||
1233	    p->state == GV_PLEX_UP)) {
1234		G_VINUM_DEBUG(1, "unable to detach %s: volume %s is busy",
1235		    p->name, p->volume);
1236		return (GV_ERR_ISBUSY);
1237	}
1238	v->plexcount--;
1239	/* Make sure someone don't read us when gone. */
1240	v->last_read_plex = NULL;
1241	LIST_REMOVE(p, in_volume);
1242	p->vol_sc = NULL;
1243	memset(p->volume, 0, GV_MAXVOLNAME);
1244	gv_update_vol_size(v, gv_vol_size(v));
1245	gv_save_config(p->vinumconf);
1246	return (0);
1247}
1248
1249/* Detach a subdisk from a plex. */
1250int
1251gv_detach_sd(struct gv_sd *s, int flags)
1252{
1253	struct gv_plex *p;
1254
1255	g_topology_assert();
1256	p = s->plex_sc;
1257
1258	if (p == NULL) {
1259		G_VINUM_DEBUG(1, "unable to detach %s: already detached",
1260		    s->name);
1261		return (0); /* Not an error. */
1262	}
1263
1264	/*
1265	 * Don't proceed if we're not forcing, and the plex is up, or degraded
1266	 * with this subdisk up.
1267	 */
1268	if (!(flags & GV_FLAG_F) && ((p->state > GV_PLEX_DEGRADED) ||
1269	    ((p->state == GV_PLEX_DEGRADED) && (s->state == GV_SD_UP)))) {
1270	    	G_VINUM_DEBUG(1, "unable to detach %s: plex %s is busy",
1271		    s->name, s->plex);
1272		return (GV_ERR_ISBUSY);
1273	}
1274
1275	LIST_REMOVE(s, in_plex);
1276	s->plex_sc = NULL;
1277	memset(s->plex, 0, GV_MAXPLEXNAME);
1278	p->sddetached++;
1279	gv_save_config(s->vinumconf);
1280	return (0);
1281}
1282