1/*-
2 * SPDX-License-Identifier: BSD-4-Clause
3 *
4 * Copyright (c) 2004, 2007 Lukas Ertl
5 * Copyright (c) 2007, 2009 Ulf Lilleengen
6 * Copyright (c) 1997, 1998, 1999
7 *      Nan Yang Computer Services Limited.  All rights reserved.
8 *
9 *  Parts written by Greg Lehey
10 *
11 *  This software is distributed under the so-called ``Berkeley
12 *  License'':
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * 1. Redistributions of source code must retain the above copyright
18 *    notice, this list of conditions and the following disclaimer.
19 * 2. Redistributions in binary form must reproduce the above copyright
20 *    notice, this list of conditions and the following disclaimer in the
21 *    documentation and/or other materials provided with the distribution.
22 * 3. All advertising materials mentioning features or use of this software
23 *    must display the following acknowledgement:
24 *      This product includes software developed by Nan Yang Computer
25 *      Services Limited.
26 * 4. Neither the name of the Company nor the names of its contributors
27 *    may be used to endorse or promote products derived from this software
28 *    without specific prior written permission.
29 *
30 * This software is provided ``as is'', and any express or implied
31 * warranties, including, but not limited to, the implied warranties of
32 * merchantability and fitness for a particular purpose are disclaimed.
33 * In no event shall the company or contributors be liable for any
34 * direct, indirect, incidental, special, exemplary, or consequential
35 * damages (including, but not limited to, procurement of substitute
36 * goods or services; loss of use, data, or profits; or business
37 * interruption) however caused and on any theory of liability, whether
38 * in contract, strict liability, or tort (including negligence or
39 * otherwise) arising in any way out of the use of this software, even if
40 * advised of the possibility of such damage.
41 *
42 */
43
44#include <sys/param.h>
45#include <sys/malloc.h>
46#include <sys/sbuf.h>
47#include <sys/systm.h>
48
49#include <geom/geom.h>
50#include <geom/geom_dbg.h>
51#include <geom/vinum/geom_vinum_var.h>
52#include <geom/vinum/geom_vinum.h>
53#include <geom/vinum/geom_vinum_share.h>
54
55int	gv_drive_is_newer(struct gv_softc *, struct gv_drive *);
56static off_t gv_plex_smallest_sd(struct gv_plex *);
57
58void
59gv_parse_config(struct gv_softc *sc, char *buf, struct gv_drive *d)
60{
61	char *aptr, *bptr, *cptr;
62	struct gv_volume *v, *v2;
63	struct gv_plex *p, *p2;
64	struct gv_sd *s, *s2;
65	int error, is_newer, tokens;
66	char *token[GV_MAXARGS];
67
68	is_newer = gv_drive_is_newer(sc, d);
69
70	/* Until the end of the string *buf. */
71	for (aptr = buf; *aptr != '\0'; aptr = bptr) {
72		bptr = aptr;
73		cptr = aptr;
74
75		/* Separate input lines. */
76		while (*bptr != '\n')
77			bptr++;
78		*bptr = '\0';
79		bptr++;
80
81		tokens = gv_tokenize(cptr, token, GV_MAXARGS);
82
83		if (tokens <= 0)
84			continue;
85
86		if (!strcmp(token[0], "volume")) {
87			v = gv_new_volume(tokens, token);
88			if (v == NULL) {
89				G_VINUM_DEBUG(0, "config parse failed volume");
90				break;
91			}
92
93			v2 = gv_find_vol(sc, v->name);
94			if (v2 != NULL) {
95				if (is_newer) {
96					v2->state = v->state;
97					G_VINUM_DEBUG(2, "newer volume found!");
98				}
99				g_free(v);
100				continue;
101			}
102
103			gv_create_volume(sc, v);
104
105		} else if (!strcmp(token[0], "plex")) {
106			p = gv_new_plex(tokens, token);
107			if (p == NULL) {
108				G_VINUM_DEBUG(0, "config parse failed plex");
109				break;
110			}
111
112			p2 = gv_find_plex(sc, p->name);
113			if (p2 != NULL) {
114				/* XXX */
115				if (is_newer) {
116					p2->state = p->state;
117					G_VINUM_DEBUG(2, "newer plex found!");
118				}
119				g_free(p);
120				continue;
121			}
122
123			error = gv_create_plex(sc, p);
124			if (error)
125				continue;
126			/*
127			 * These flags were set in gv_create_plex() and are not
128			 * needed here (on-disk config parsing).
129			 */
130			p->flags &= ~GV_PLEX_ADDED;
131
132		} else if (!strcmp(token[0], "sd")) {
133			s = gv_new_sd(tokens, token);
134
135			if (s == NULL) {
136				G_VINUM_DEBUG(0, "config parse failed subdisk");
137				break;
138			}
139
140			s2 = gv_find_sd(sc, s->name);
141			if (s2 != NULL) {
142				/* XXX */
143				if (is_newer) {
144					s2->state = s->state;
145					G_VINUM_DEBUG(2, "newer subdisk found!");
146				}
147				g_free(s);
148				continue;
149			}
150
151			/*
152			 * Signal that this subdisk was tasted, and could
153			 * possibly reference a drive that isn't in our config
154			 * yet.
155			 */
156			s->flags |= GV_SD_TASTED;
157
158			if (s->state == GV_SD_UP)
159				s->flags |= GV_SD_CANGOUP;
160
161			error = gv_create_sd(sc, s);
162			if (error)
163				continue;
164
165			/*
166			 * This flag was set in gv_create_sd() and is not
167			 * needed here (on-disk config parsing).
168			 */
169			s->flags &= ~GV_SD_NEWBORN;
170			s->flags &= ~GV_SD_GROW;
171		}
172	}
173}
174
175/*
176 * Format the vinum configuration properly.  If ondisk is non-zero then the
177 * configuration is intended to be written to disk later.
178 */
179void
180gv_format_config(struct gv_softc *sc, struct sbuf *sb, int ondisk, char *prefix)
181{
182	struct gv_drive *d;
183	struct gv_sd *s;
184	struct gv_plex *p;
185	struct gv_volume *v;
186
187	/*
188	 * We don't need the drive configuration if we're not writing the
189	 * config to disk.
190	 */
191	if (!ondisk) {
192		LIST_FOREACH(d, &sc->drives, drive) {
193			sbuf_printf(sb, "%sdrive %s device /dev/%s\n", prefix,
194			    d->name, d->device);
195		}
196	}
197
198	LIST_FOREACH(v, &sc->volumes, volume) {
199		if (!ondisk)
200			sbuf_printf(sb, "%s", prefix);
201		sbuf_printf(sb, "volume %s", v->name);
202		if (ondisk)
203			sbuf_printf(sb, " state %s", gv_volstate(v->state));
204		sbuf_printf(sb, "\n");
205	}
206
207	LIST_FOREACH(p, &sc->plexes, plex) {
208		if (!ondisk)
209			sbuf_printf(sb, "%s", prefix);
210		sbuf_printf(sb, "plex name %s org %s ", p->name,
211		    gv_plexorg(p->org));
212		if (gv_is_striped(p))
213			sbuf_printf(sb, "%ds ", p->stripesize / 512);
214		if (p->vol_sc != NULL)
215			sbuf_printf(sb, "vol %s", p->volume);
216		if (ondisk)
217			sbuf_printf(sb, " state %s", gv_plexstate(p->state));
218		sbuf_printf(sb, "\n");
219	}
220
221	LIST_FOREACH(s, &sc->subdisks, sd) {
222		if (!ondisk)
223			sbuf_printf(sb, "%s", prefix);
224		sbuf_printf(sb, "sd name %s drive %s len %jds driveoffset "
225		    "%jds", s->name, s->drive, s->size / 512,
226		    s->drive_offset / 512);
227		if (s->plex_sc != NULL) {
228			sbuf_printf(sb, " plex %s plexoffset %jds", s->plex,
229			    s->plex_offset / 512);
230		}
231		if (ondisk)
232			sbuf_printf(sb, " state %s", gv_sdstate(s->state));
233		sbuf_printf(sb, "\n");
234	}
235}
236
237static off_t
238gv_plex_smallest_sd(struct gv_plex *p)
239{
240	struct gv_sd *s;
241	off_t smallest;
242
243	KASSERT(p != NULL, ("gv_plex_smallest_sd: NULL p"));
244
245	s = LIST_FIRST(&p->subdisks);
246	if (s == NULL)
247		return (-1);
248	smallest = s->size;
249	LIST_FOREACH(s, &p->subdisks, in_plex) {
250		if (s->size < smallest)
251			smallest = s->size;
252	}
253	return (smallest);
254}
255
256/* Walk over plexes in a volume and count how many are down. */
257int
258gv_plexdown(struct gv_volume *v)
259{
260	int plexdown;
261	struct gv_plex *p;
262
263	KASSERT(v != NULL, ("gv_plexdown: NULL v"));
264
265	plexdown = 0;
266
267	LIST_FOREACH(p, &v->plexes, plex) {
268		if (p->state == GV_PLEX_DOWN)
269			plexdown++;
270	}
271	return (plexdown);
272}
273
274int
275gv_sd_to_plex(struct gv_sd *s, struct gv_plex *p)
276{
277	struct gv_sd *s2;
278	off_t psizeorig, remainder, smallest;
279
280	/* If this subdisk was already given to this plex, do nothing. */
281	if (s->plex_sc == p)
282		return (0);
283
284	/* Check correct size of this subdisk. */
285	s2 = LIST_FIRST(&p->subdisks);
286	/* Adjust the subdisk-size if necessary. */
287	if (s2 != NULL && gv_is_striped(p)) {
288		/* First adjust to the stripesize. */
289		remainder = s->size % p->stripesize;
290
291		if (remainder) {
292			G_VINUM_DEBUG(1, "size of sd %s is not a "
293			    "multiple of plex stripesize, taking off "
294			    "%jd bytes", s->name,
295			    (intmax_t)remainder);
296			gv_adjust_freespace(s, remainder);
297		}
298
299		smallest = gv_plex_smallest_sd(p);
300		/* Then take off extra if other subdisks are smaller. */
301		remainder = s->size - smallest;
302
303		/*
304		 * Don't allow a remainder below zero for running plexes, it's too
305		 * painful, and if someone were to accidentally do this, the
306		 * resulting array might be smaller than the original... not god
307		 */
308		if (remainder < 0) {
309			if (!(p->flags & GV_PLEX_NEWBORN)) {
310				G_VINUM_DEBUG(0, "sd %s too small for plex %s!",
311				    s->name, p->name);
312				return (GV_ERR_BADSIZE);
313			}
314			/* Adjust other subdisks. */
315			LIST_FOREACH(s2, &p->subdisks, in_plex) {
316				G_VINUM_DEBUG(1, "size of sd %s is to big, "
317				    "taking off %jd bytes", s->name,
318				    (intmax_t)remainder);
319				gv_adjust_freespace(s2, (remainder * -1));
320			}
321		} else if (remainder > 0) {
322			G_VINUM_DEBUG(1, "size of sd %s is to big, "
323			    "taking off %jd bytes", s->name,
324			    (intmax_t)remainder);
325			gv_adjust_freespace(s, remainder);
326		}
327	}
328
329	/* Find the correct plex offset for this subdisk, if needed. */
330	if (s->plex_offset == -1) {
331		/*
332		 * First set it to 0 to catch the case where we had a detached
333		 * subdisk that didn't get any good offset.
334		 */
335		s->plex_offset = 0;
336		if (p->sdcount) {
337			LIST_FOREACH(s2, &p->subdisks, in_plex) {
338				if (gv_is_striped(p))
339					s->plex_offset = p->sdcount *
340					    p->stripesize;
341				else
342					s->plex_offset = s2->plex_offset +
343					    s2->size;
344			}
345		}
346	}
347
348	/* There are no subdisks for this plex yet, just insert it. */
349	if (LIST_EMPTY(&p->subdisks)) {
350		LIST_INSERT_HEAD(&p->subdisks, s, in_plex);
351
352	/* Insert in correct order, depending on plex_offset. */
353	} else {
354		LIST_FOREACH(s2, &p->subdisks, in_plex) {
355			if (s->plex_offset < s2->plex_offset) {
356				LIST_INSERT_BEFORE(s2, s, in_plex);
357				break;
358			} else if (LIST_NEXT(s2, in_plex) == NULL) {
359				LIST_INSERT_AFTER(s2, s, in_plex);
360				break;
361			}
362		}
363	}
364
365	s->plex_sc = p;
366        /* Adjust the size of our plex. We check if the plex misses a subdisk,
367	 * so we don't make the plex smaller than it actually should be.
368	 */
369	psizeorig = p->size;
370	p->size = gv_plex_size(p);
371	/* Make sure the size is not changed. */
372	if (p->sddetached > 0) {
373		if (p->size < psizeorig) {
374			p->size = psizeorig;
375			/* We make sure wee need another subdisk. */
376			if (p->sddetached == 1)
377				p->sddetached++;
378		}
379		p->sddetached--;
380	} else {
381		if ((p->org == GV_PLEX_RAID5 ||
382		    p->org == GV_PLEX_STRIPED) &&
383		    !(p->flags & GV_PLEX_NEWBORN) &&
384		    p->state == GV_PLEX_UP) {
385			s->flags |= GV_SD_GROW;
386		}
387		p->sdcount++;
388	}
389
390	return (0);
391}
392
393void
394gv_update_vol_size(struct gv_volume *v, off_t size)
395{
396	if (v == NULL)
397		return;
398	if (v->provider != NULL) {
399		g_topology_lock();
400		v->provider->mediasize = size;
401		g_topology_unlock();
402	}
403	v->size = size;
404}
405
406/* Return how many subdisks that constitute the original plex. */
407int
408gv_sdcount(struct gv_plex *p, int growing)
409{
410	struct gv_sd *s;
411	int sdcount;
412
413	sdcount = p->sdcount;
414	if (growing) {
415		LIST_FOREACH(s, &p->subdisks, in_plex) {
416			if (s->flags & GV_SD_GROW)
417				sdcount--;
418		}
419	}
420
421	return (sdcount);
422}
423
424/* Calculates the plex size. */
425off_t
426gv_plex_size(struct gv_plex *p)
427{
428	struct gv_sd *s;
429	off_t size;
430	int sdcount;
431
432	KASSERT(p != NULL, ("gv_plex_size: NULL p"));
433
434	/* Adjust the size of our plex. */
435	size = 0;
436	sdcount = gv_sdcount(p, 1);
437	switch (p->org) {
438	case GV_PLEX_CONCAT:
439		LIST_FOREACH(s, &p->subdisks, in_plex)
440			size += s->size;
441		break;
442	case GV_PLEX_STRIPED:
443		s = LIST_FIRST(&p->subdisks);
444		size = ((s != NULL) ? (sdcount * s->size) : 0);
445		break;
446	case GV_PLEX_RAID5:
447		s = LIST_FIRST(&p->subdisks);
448		size = ((s != NULL) ? ((sdcount - 1) * s->size) : 0);
449		break;
450	}
451
452	return (size);
453}
454
455/* Returns the size of a volume. */
456off_t
457gv_vol_size(struct gv_volume *v)
458{
459	struct gv_plex *p;
460	off_t minplexsize;
461
462	KASSERT(v != NULL, ("gv_vol_size: NULL v"));
463
464	p = LIST_FIRST(&v->plexes);
465	if (p == NULL)
466		return (0);
467
468	minplexsize = p->size;
469	LIST_FOREACH(p, &v->plexes, in_volume) {
470		if (p->size < minplexsize) {
471			minplexsize = p->size;
472		}
473	}
474	return (minplexsize);
475}
476
477void
478gv_update_plex_config(struct gv_plex *p)
479{
480	struct gv_sd *s, *s2;
481	off_t remainder;
482	int required_sds, state;
483
484	KASSERT(p != NULL, ("gv_update_plex_config: NULL p"));
485
486	/* The plex was added to an already running volume. */
487	if (p->flags & GV_PLEX_ADDED)
488		gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE);
489
490	switch (p->org) {
491	case GV_PLEX_STRIPED:
492		required_sds = 2;
493		break;
494	case GV_PLEX_RAID5:
495		required_sds = 3;
496		break;
497	case GV_PLEX_CONCAT:
498	default:
499		required_sds = 0;
500		break;
501	}
502
503	if (required_sds) {
504		if (p->sdcount < required_sds) {
505			gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE);
506		}
507
508		/*
509		 * The subdisks in striped plexes must all have the same size.
510		 */
511		s = LIST_FIRST(&p->subdisks);
512		LIST_FOREACH(s2, &p->subdisks, in_plex) {
513			if (s->size != s2->size) {
514				G_VINUM_DEBUG(0, "subdisk size mismatch %s"
515				    "(%jd) <> %s (%jd)", s->name, s->size,
516				    s2->name, s2->size);
517				gv_set_plex_state(p, GV_PLEX_DOWN,
518				    GV_SETSTATE_FORCE);
519			}
520		}
521
522		LIST_FOREACH(s, &p->subdisks, in_plex) {
523			/* Trim subdisk sizes to match the stripe size. */
524			remainder = s->size % p->stripesize;
525			if (remainder) {
526				G_VINUM_DEBUG(1, "size of sd %s is not a "
527				    "multiple of plex stripesize, taking off "
528				    "%jd bytes", s->name, (intmax_t)remainder);
529				gv_adjust_freespace(s, remainder);
530			}
531		}
532	}
533
534	p->size = gv_plex_size(p);
535	if (p->sdcount == 0)
536		gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE);
537	else if (p->org == GV_PLEX_RAID5 && p->flags & GV_PLEX_NEWBORN) {
538		LIST_FOREACH(s, &p->subdisks, in_plex)
539			gv_set_sd_state(s, GV_SD_UP, GV_SETSTATE_FORCE);
540		/* If added to a volume, we want the plex to be down. */
541		state = (p->flags & GV_PLEX_ADDED) ? GV_PLEX_DOWN : GV_PLEX_UP;
542		gv_set_plex_state(p, state, GV_SETSTATE_FORCE);
543		p->flags &= ~GV_PLEX_ADDED;
544	} else if (p->flags & GV_PLEX_ADDED) {
545		LIST_FOREACH(s, &p->subdisks, in_plex)
546			gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE);
547		gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE);
548		p->flags &= ~GV_PLEX_ADDED;
549	} else if (p->state == GV_PLEX_UP) {
550		LIST_FOREACH(s, &p->subdisks, in_plex) {
551			if (s->flags & GV_SD_GROW) {
552				gv_set_plex_state(p, GV_PLEX_GROWABLE,
553				    GV_SETSTATE_FORCE);
554				break;
555			}
556		}
557	}
558	/* Our plex is grown up now. */
559	p->flags &= ~GV_PLEX_NEWBORN;
560}
561
562/*
563 * Give a subdisk to a drive, check and adjust several parameters, adjust
564 * freelist.
565 */
566int
567gv_sd_to_drive(struct gv_sd *s, struct gv_drive *d)
568{
569	struct gv_sd *s2;
570	struct gv_freelist *fl, *fl2;
571	off_t tmp;
572	int i;
573
574	fl2 = NULL;
575
576	/* Shortcut for "referenced" drives. */
577	if (d->flags & GV_DRIVE_REFERENCED) {
578		s->drive_sc = d;
579		return (0);
580	}
581
582	/* Check if this subdisk was already given to this drive. */
583	if (s->drive_sc != NULL) {
584		if (s->drive_sc == d) {
585			if (!(s->flags & GV_SD_TASTED)) {
586				return (0);
587			}
588		} else {
589			G_VINUM_DEBUG(0, "error giving subdisk '%s' to '%s' "
590			    "(already on '%s')", s->name, d->name,
591			    s->drive_sc->name);
592			return (GV_ERR_ISATTACHED);
593		}
594	}
595
596	/* Preliminary checks. */
597	if ((s->size > d->avail) || (d->freelist_entries == 0)) {
598		G_VINUM_DEBUG(0, "not enough space on '%s' for '%s'", d->name,
599		    s->name);
600		return (GV_ERR_NOSPACE);
601	}
602
603	/* If no size was given for this subdisk, try to auto-size it... */
604	if (s->size == -1) {
605		/* Find the largest available slot. */
606		LIST_FOREACH(fl, &d->freelist, freelist) {
607			if (fl->size < s->size)
608				continue;
609			s->size = fl->size;
610			s->drive_offset = fl->offset;
611			fl2 = fl;
612		}
613
614		/* No good slot found? */
615		if (s->size == -1) {
616			G_VINUM_DEBUG(0, "unable to autosize '%s' on '%s'",
617			    s->name, d->name);
618			return (GV_ERR_BADSIZE);
619		}
620
621	/*
622	 * ... or check if we have a free slot that's large enough for the
623	 * given size.
624	 */
625	} else {
626		i = 0;
627		LIST_FOREACH(fl, &d->freelist, freelist) {
628			if (fl->size < s->size)
629				continue;
630			/* Assign drive offset, if not given. */
631			if (s->drive_offset == -1)
632				s->drive_offset = fl->offset;
633			fl2 = fl;
634			i++;
635			break;
636		}
637
638		/* Couldn't find a good free slot. */
639		if (i == 0) {
640			G_VINUM_DEBUG(0, "free slots to small for '%s' on '%s'",
641			    s->name, d->name);
642			return (GV_ERR_NOSPACE);
643		}
644	}
645
646	/* No drive offset given, try to calculate it. */
647	if (s->drive_offset == -1) {
648		/* Add offsets and sizes from other subdisks on this drive. */
649		LIST_FOREACH(s2, &d->subdisks, from_drive) {
650			s->drive_offset = s2->drive_offset + s2->size;
651		}
652
653		/*
654		 * If there are no other subdisks yet, then set the default
655		 * offset to GV_DATA_START.
656		 */
657		if (s->drive_offset == -1)
658			s->drive_offset = GV_DATA_START;
659
660	/* Check if we have a free slot at the given drive offset. */
661	} else {
662		i = 0;
663		LIST_FOREACH(fl, &d->freelist, freelist) {
664			/* Yes, this subdisk fits. */
665			if ((fl->offset <= s->drive_offset) &&
666			    (fl->offset + fl->size >=
667			    s->drive_offset + s->size)) {
668				i++;
669				fl2 = fl;
670				break;
671			}
672		}
673
674		/* Couldn't find a good free slot. */
675		if (i == 0) {
676			G_VINUM_DEBUG(0, "given drive_offset for '%s' won't fit "
677			    "on '%s'", s->name, d->name);
678			return (GV_ERR_NOSPACE);
679		}
680	}
681
682	/*
683	 * Now that all parameters are checked and set up, we can give the
684	 * subdisk to the drive and adjust the freelist.
685	 */
686
687	/* First, adjust the freelist. */
688	LIST_FOREACH(fl, &d->freelist, freelist) {
689		/* Look for the free slot that we have found before. */
690		if (fl != fl2)
691			continue;
692
693		/* The subdisk starts at the beginning of the free slot. */
694		if (fl->offset == s->drive_offset) {
695			fl->offset += s->size;
696			fl->size -= s->size;
697
698			/* The subdisk uses the whole slot, so remove it. */
699			if (fl->size == 0) {
700				d->freelist_entries--;
701				LIST_REMOVE(fl, freelist);
702			}
703		/*
704		 * The subdisk does not start at the beginning of the free
705		 * slot.
706		 */
707		} else {
708			tmp = fl->offset + fl->size;
709			fl->size = s->drive_offset - fl->offset;
710
711			/*
712			 * The subdisk didn't use the complete rest of the free
713			 * slot, so we need to split it.
714			 */
715			if (s->drive_offset + s->size != tmp) {
716				fl2 = g_malloc(sizeof(*fl2), M_WAITOK | M_ZERO);
717				fl2->offset = s->drive_offset + s->size;
718				fl2->size = tmp - fl2->offset;
719				LIST_INSERT_AFTER(fl, fl2, freelist);
720				d->freelist_entries++;
721			}
722		}
723		break;
724	}
725
726	/*
727	 * This is the first subdisk on this drive, just insert it into the
728	 * list.
729	 */
730	if (LIST_EMPTY(&d->subdisks)) {
731		LIST_INSERT_HEAD(&d->subdisks, s, from_drive);
732
733	/* There are other subdisks, so insert this one in correct order. */
734	} else {
735		LIST_FOREACH(s2, &d->subdisks, from_drive) {
736			if (s->drive_offset < s2->drive_offset) {
737				LIST_INSERT_BEFORE(s2, s, from_drive);
738				break;
739			} else if (LIST_NEXT(s2, from_drive) == NULL) {
740				LIST_INSERT_AFTER(s2, s, from_drive);
741				break;
742			}
743		}
744	}
745
746	d->sdcount++;
747	d->avail -= s->size;
748
749	s->flags &= ~GV_SD_TASTED;
750
751	/* Link back from the subdisk to this drive. */
752	s->drive_sc = d;
753
754	return (0);
755}
756
757void
758gv_free_sd(struct gv_sd *s)
759{
760	struct gv_drive *d;
761	struct gv_freelist *fl, *fl2;
762
763	KASSERT(s != NULL, ("gv_free_sd: NULL s"));
764
765	d = s->drive_sc;
766	if (d == NULL)
767		return;
768
769	/*
770	 * First, find the free slot that's immediately before or after this
771	 * subdisk.
772	 */
773	fl = NULL;
774	LIST_FOREACH(fl, &d->freelist, freelist) {
775		if (fl->offset == s->drive_offset + s->size)
776			break;
777		if (fl->offset + fl->size == s->drive_offset)
778			break;
779	}
780
781	/* If there is no free slot behind this subdisk, so create one. */
782	if (fl == NULL) {
783		fl = g_malloc(sizeof(*fl), M_WAITOK | M_ZERO);
784		fl->size = s->size;
785		fl->offset = s->drive_offset;
786
787		if (d->freelist_entries == 0) {
788			LIST_INSERT_HEAD(&d->freelist, fl, freelist);
789		} else {
790			LIST_FOREACH(fl2, &d->freelist, freelist) {
791				if (fl->offset < fl2->offset) {
792					LIST_INSERT_BEFORE(fl2, fl, freelist);
793					break;
794				} else if (LIST_NEXT(fl2, freelist) == NULL) {
795					LIST_INSERT_AFTER(fl2, fl, freelist);
796					break;
797				}
798			}
799		}
800
801		d->freelist_entries++;
802
803	/* Expand the free slot we just found. */
804	} else {
805		fl->size += s->size;
806		if (fl->offset > s->drive_offset)
807			fl->offset = s->drive_offset;
808	}
809
810	d->avail += s->size;
811	d->sdcount--;
812}
813
814void
815gv_adjust_freespace(struct gv_sd *s, off_t remainder)
816{
817	struct gv_drive *d;
818	struct gv_freelist *fl, *fl2;
819
820	KASSERT(s != NULL, ("gv_adjust_freespace: NULL s"));
821	d = s->drive_sc;
822	KASSERT(d != NULL, ("gv_adjust_freespace: NULL d"));
823
824	/* First, find the free slot that's immediately after this subdisk. */
825	fl = NULL;
826	LIST_FOREACH(fl, &d->freelist, freelist) {
827		if (fl->offset == s->drive_offset + s->size)
828			break;
829	}
830
831	/* If there is no free slot behind this subdisk, so create one. */
832	if (fl == NULL) {
833		fl = g_malloc(sizeof(*fl), M_WAITOK | M_ZERO);
834		fl->size = remainder;
835		fl->offset = s->drive_offset + s->size - remainder;
836
837		if (d->freelist_entries == 0) {
838			LIST_INSERT_HEAD(&d->freelist, fl, freelist);
839		} else {
840			LIST_FOREACH(fl2, &d->freelist, freelist) {
841				if (fl->offset < fl2->offset) {
842					LIST_INSERT_BEFORE(fl2, fl, freelist);
843					break;
844				} else if (LIST_NEXT(fl2, freelist) == NULL) {
845					LIST_INSERT_AFTER(fl2, fl, freelist);
846					break;
847				}
848			}
849		}
850
851		d->freelist_entries++;
852
853	/* Expand the free slot we just found. */
854	} else {
855		fl->offset -= remainder;
856		fl->size += remainder;
857	}
858
859	s->size -= remainder;
860	d->avail += remainder;
861}
862
863/* Check if the given plex is a striped one. */
864int
865gv_is_striped(struct gv_plex *p)
866{
867	KASSERT(p != NULL, ("gv_is_striped: NULL p"));
868	switch(p->org) {
869	case GV_PLEX_STRIPED:
870	case GV_PLEX_RAID5:
871		return (1);
872	default:
873		return (0);
874	}
875}
876
877/* Find a volume by name. */
878struct gv_volume *
879gv_find_vol(struct gv_softc *sc, char *name)
880{
881	struct gv_volume *v;
882
883	LIST_FOREACH(v, &sc->volumes, volume) {
884		if (!strncmp(v->name, name, GV_MAXVOLNAME))
885			return (v);
886	}
887
888	return (NULL);
889}
890
891/* Find a plex by name. */
892struct gv_plex *
893gv_find_plex(struct gv_softc *sc, char *name)
894{
895	struct gv_plex *p;
896
897	LIST_FOREACH(p, &sc->plexes, plex) {
898		if (!strncmp(p->name, name, GV_MAXPLEXNAME))
899			return (p);
900	}
901
902	return (NULL);
903}
904
905/* Find a subdisk by name. */
906struct gv_sd *
907gv_find_sd(struct gv_softc *sc, char *name)
908{
909	struct gv_sd *s;
910
911	LIST_FOREACH(s, &sc->subdisks, sd) {
912		if (!strncmp(s->name, name, GV_MAXSDNAME))
913			return (s);
914	}
915
916	return (NULL);
917}
918
919/* Find a drive by name. */
920struct gv_drive *
921gv_find_drive(struct gv_softc *sc, char *name)
922{
923	struct gv_drive *d;
924
925	LIST_FOREACH(d, &sc->drives, drive) {
926		if (!strncmp(d->name, name, GV_MAXDRIVENAME))
927			return (d);
928	}
929
930	return (NULL);
931}
932
933/* Find a drive given a device. */
934struct gv_drive *
935gv_find_drive_device(struct gv_softc *sc, char *device)
936{
937	struct gv_drive *d;
938
939	LIST_FOREACH(d, &sc->drives, drive) {
940		if(!strcmp(d->device, device))
941			return (d);
942	}
943
944	return (NULL);
945}
946
947/* Check if any consumer of the given geom is open. */
948int
949gv_consumer_is_open(struct g_consumer *cp)
950{
951	if (cp == NULL)
952		return (0);
953
954	if (cp->acr || cp->acw || cp->ace)
955		return (1);
956
957	return (0);
958}
959
960int
961gv_provider_is_open(struct g_provider *pp)
962{
963	if (pp == NULL)
964		return (0);
965
966	if (pp->acr || pp->acw || pp->ace)
967		return (1);
968
969	return (0);
970}
971
972/*
973 * Compare the modification dates of the drives.
974 * Return 1 if a > b, 0 otherwise.
975 */
976int
977gv_drive_is_newer(struct gv_softc *sc, struct gv_drive *d)
978{
979	struct gv_drive *d2;
980	struct timeval *a, *b;
981
982	KASSERT(!LIST_EMPTY(&sc->drives),
983	    ("gv_is_drive_newer: empty drive list"));
984
985	a = &d->hdr->label.last_update;
986	LIST_FOREACH(d2, &sc->drives, drive) {
987		if ((d == d2) || (d2->state != GV_DRIVE_UP) ||
988		    (d2->hdr == NULL))
989			continue;
990		b = &d2->hdr->label.last_update;
991		if (timevalcmp(a, b, >))
992			return (1);
993	}
994
995	return (0);
996}
997
998/* Return the type of object identified by string 'name'. */
999int
1000gv_object_type(struct gv_softc *sc, char *name)
1001{
1002	struct gv_drive *d;
1003	struct gv_plex *p;
1004	struct gv_sd *s;
1005	struct gv_volume *v;
1006
1007	LIST_FOREACH(v, &sc->volumes, volume) {
1008		if (!strncmp(v->name, name, GV_MAXVOLNAME))
1009			return (GV_TYPE_VOL);
1010	}
1011
1012	LIST_FOREACH(p, &sc->plexes, plex) {
1013		if (!strncmp(p->name, name, GV_MAXPLEXNAME))
1014			return (GV_TYPE_PLEX);
1015	}
1016
1017	LIST_FOREACH(s, &sc->subdisks, sd) {
1018		if (!strncmp(s->name, name, GV_MAXSDNAME))
1019			return (GV_TYPE_SD);
1020	}
1021
1022	LIST_FOREACH(d, &sc->drives, drive) {
1023		if (!strncmp(d->name, name, GV_MAXDRIVENAME))
1024			return (GV_TYPE_DRIVE);
1025	}
1026
1027	return (GV_ERR_NOTFOUND);
1028}
1029
1030void
1031gv_setup_objects(struct gv_softc *sc)
1032{
1033	struct g_provider *pp;
1034	struct gv_volume *v;
1035	struct gv_plex *p;
1036	struct gv_sd *s;
1037	struct gv_drive *d;
1038
1039	LIST_FOREACH(s, &sc->subdisks, sd) {
1040		d = gv_find_drive(sc, s->drive);
1041		if (d != NULL)
1042			gv_sd_to_drive(s, d);
1043		p = gv_find_plex(sc, s->plex);
1044		if (p != NULL)
1045			gv_sd_to_plex(s, p);
1046		gv_update_sd_state(s);
1047	}
1048
1049	LIST_FOREACH(p, &sc->plexes, plex) {
1050		gv_update_plex_config(p);
1051		v = gv_find_vol(sc, p->volume);
1052		if (v != NULL && p->vol_sc != v) {
1053			p->vol_sc = v;
1054			v->plexcount++;
1055			LIST_INSERT_HEAD(&v->plexes, p, in_volume);
1056		}
1057		gv_update_plex_config(p);
1058	}
1059
1060	LIST_FOREACH(v, &sc->volumes, volume) {
1061		v->size = gv_vol_size(v);
1062		if (v->provider == NULL) {
1063			g_topology_lock();
1064			pp = g_new_providerf(sc->geom, "gvinum/%s", v->name);
1065			pp->mediasize = v->size;
1066			pp->sectorsize = 512;    /* XXX */
1067			g_error_provider(pp, 0);
1068			v->provider = pp;
1069			pp->private = v;
1070			g_topology_unlock();
1071		} else if (v->provider->mediasize != v->size) {
1072			g_topology_lock();
1073			v->provider->mediasize = v->size;
1074			g_topology_unlock();
1075		}
1076		v->flags &= ~GV_VOL_NEWBORN;
1077		gv_update_vol_state(v);
1078	}
1079}
1080
1081void
1082gv_cleanup(struct gv_softc *sc)
1083{
1084	struct gv_volume *v, *v2;
1085	struct gv_plex *p, *p2;
1086	struct gv_sd *s, *s2;
1087	struct gv_drive *d, *d2;
1088	struct gv_freelist *fl, *fl2;
1089
1090	mtx_lock(&sc->config_mtx);
1091	LIST_FOREACH_SAFE(v, &sc->volumes, volume, v2) {
1092		LIST_REMOVE(v, volume);
1093		g_free(v->wqueue);
1094		g_free(v);
1095	}
1096	LIST_FOREACH_SAFE(p, &sc->plexes, plex, p2) {
1097		LIST_REMOVE(p, plex);
1098		g_free(p->bqueue);
1099		g_free(p->rqueue);
1100		g_free(p->wqueue);
1101		g_free(p);
1102	}
1103	LIST_FOREACH_SAFE(s, &sc->subdisks, sd, s2) {
1104		LIST_REMOVE(s, sd);
1105		g_free(s);
1106	}
1107	LIST_FOREACH_SAFE(d, &sc->drives, drive, d2) {
1108		LIST_FOREACH_SAFE(fl, &d->freelist, freelist, fl2) {
1109			LIST_REMOVE(fl, freelist);
1110			g_free(fl);
1111		}
1112		LIST_REMOVE(d, drive);
1113		g_free(d->hdr);
1114		g_free(d);
1115	}
1116	mtx_destroy(&sc->config_mtx);
1117}
1118
1119/* General 'attach' routine. */
1120int
1121gv_attach_plex(struct gv_plex *p, struct gv_volume *v, int rename)
1122{
1123	struct gv_sd *s;
1124	struct gv_softc *sc __diagused;
1125
1126	g_topology_assert();
1127
1128	sc = p->vinumconf;
1129	KASSERT(sc != NULL, ("NULL sc"));
1130
1131	if (p->vol_sc != NULL) {
1132		G_VINUM_DEBUG(1, "unable to attach %s: already attached to %s",
1133		    p->name, p->volume);
1134		return (GV_ERR_ISATTACHED);
1135	}
1136
1137	/* Stale all subdisks of this plex. */
1138	LIST_FOREACH(s, &p->subdisks, in_plex) {
1139		if (s->state != GV_SD_STALE)
1140			gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE);
1141	}
1142	/* Attach to volume. Make sure volume is not up and running. */
1143	if (gv_provider_is_open(v->provider)) {
1144		G_VINUM_DEBUG(1, "unable to attach %s: volume %s is busy",
1145		    p->name, v->name);
1146		return (GV_ERR_ISBUSY);
1147	}
1148	p->vol_sc = v;
1149	strlcpy(p->volume, v->name, sizeof(p->volume));
1150	v->plexcount++;
1151	if (rename) {
1152		snprintf(p->name, sizeof(p->name), "%s.p%d", v->name,
1153		    v->plexcount);
1154	}
1155	LIST_INSERT_HEAD(&v->plexes, p, in_volume);
1156
1157	/* Get plex up again. */
1158	gv_update_vol_size(v, gv_vol_size(v));
1159	gv_set_plex_state(p, GV_PLEX_UP, 0);
1160	gv_save_config(p->vinumconf);
1161	return (0);
1162}
1163
1164int
1165gv_attach_sd(struct gv_sd *s, struct gv_plex *p, off_t offset, int rename)
1166{
1167	struct gv_sd *s2;
1168	int error;
1169
1170	g_topology_assert();
1171
1172	/* If subdisk is attached, don't do it. */
1173	if (s->plex_sc != NULL) {
1174		G_VINUM_DEBUG(1, "unable to attach %s: already attached to %s",
1175		    s->name, s->plex);
1176		return (GV_ERR_ISATTACHED);
1177	}
1178
1179	gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE);
1180	/* First check that this subdisk has a correct offset. If none other
1181	 * starts at the same, and it's correct module stripesize, it is */
1182	if (offset != -1 && offset % p->stripesize != 0)
1183		return (GV_ERR_BADOFFSET);
1184	LIST_FOREACH(s2, &p->subdisks, in_plex) {
1185		if (s2->plex_offset == offset)
1186			return (GV_ERR_BADOFFSET);
1187	}
1188
1189	/* Attach the subdisk to the plex at given offset. */
1190	s->plex_offset = offset;
1191	strlcpy(s->plex, p->name, sizeof(s->plex));
1192
1193	error = gv_sd_to_plex(s, p);
1194	if (error)
1195		return (error);
1196	gv_update_plex_config(p);
1197
1198	if (rename) {
1199		snprintf(s->name, sizeof(s->name), "%s.s%d", s->plex,
1200		    p->sdcount);
1201	}
1202	if (p->vol_sc != NULL)
1203		gv_update_vol_size(p->vol_sc, gv_vol_size(p->vol_sc));
1204	gv_save_config(p->vinumconf);
1205	/* We don't update the subdisk state since the user might have to
1206	 * initiate a rebuild/sync first. */
1207	return (0);
1208}
1209
1210/* Detach a plex from a volume. */
1211int
1212gv_detach_plex(struct gv_plex *p, int flags)
1213{
1214	struct gv_volume *v;
1215
1216	g_topology_assert();
1217	v = p->vol_sc;
1218
1219	if (v == NULL) {
1220		G_VINUM_DEBUG(1, "unable to detach %s: already detached",
1221		    p->name);
1222		return (0); /* Not an error. */
1223	}
1224
1225	/*
1226	 * Only proceed if forced or volume inactive.
1227	 */
1228	if (!(flags & GV_FLAG_F) && (gv_provider_is_open(v->provider) ||
1229	    p->state == GV_PLEX_UP)) {
1230		G_VINUM_DEBUG(1, "unable to detach %s: volume %s is busy",
1231		    p->name, p->volume);
1232		return (GV_ERR_ISBUSY);
1233	}
1234	v->plexcount--;
1235	/* Make sure someone don't read us when gone. */
1236	v->last_read_plex = NULL;
1237	LIST_REMOVE(p, in_volume);
1238	p->vol_sc = NULL;
1239	memset(p->volume, 0, GV_MAXVOLNAME);
1240	gv_update_vol_size(v, gv_vol_size(v));
1241	gv_save_config(p->vinumconf);
1242	return (0);
1243}
1244
1245/* Detach a subdisk from a plex. */
1246int
1247gv_detach_sd(struct gv_sd *s, int flags)
1248{
1249	struct gv_plex *p;
1250
1251	g_topology_assert();
1252	p = s->plex_sc;
1253
1254	if (p == NULL) {
1255		G_VINUM_DEBUG(1, "unable to detach %s: already detached",
1256		    s->name);
1257		return (0); /* Not an error. */
1258	}
1259
1260	/*
1261	 * Don't proceed if we're not forcing, and the plex is up, or degraded
1262	 * with this subdisk up.
1263	 */
1264	if (!(flags & GV_FLAG_F) && ((p->state > GV_PLEX_DEGRADED) ||
1265	    ((p->state == GV_PLEX_DEGRADED) && (s->state == GV_SD_UP)))) {
1266	    	G_VINUM_DEBUG(1, "unable to detach %s: plex %s is busy",
1267		    s->name, s->plex);
1268		return (GV_ERR_ISBUSY);
1269	}
1270
1271	LIST_REMOVE(s, in_plex);
1272	s->plex_sc = NULL;
1273	memset(s->plex, 0, GV_MAXPLEXNAME);
1274	p->sddetached++;
1275	gv_save_config(s->vinumconf);
1276	return (0);
1277}
1278