1/*-
2 * SPDX-License-Identifier: BSD-4-Clause
3 *
4 * Copyright (c) 2004, 2007 Lukas Ertl
5 * Copyright (c) 2007, 2009 Ulf Lilleengen
6 * Copyright (c) 1997, 1998, 1999
7 *      Nan Yang Computer Services Limited.  All rights reserved.
8 *
9 *  Parts written by Greg Lehey
10 *
11 *  This software is distributed under the so-called ``Berkeley
12 *  License'':
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * 1. Redistributions of source code must retain the above copyright
18 *    notice, this list of conditions and the following disclaimer.
19 * 2. Redistributions in binary form must reproduce the above copyright
20 *    notice, this list of conditions and the following disclaimer in the
21 *    documentation and/or other materials provided with the distribution.
22 * 3. All advertising materials mentioning features or use of this software
23 *    must display the following acknowledgement:
24 *      This product includes software developed by Nan Yang Computer
25 *      Services Limited.
26 * 4. Neither the name of the Company nor the names of its contributors
27 *    may be used to endorse or promote products derived from this software
28 *    without specific prior written permission.
29 *
30 * This software is provided ``as is'', and any express or implied
31 * warranties, including, but not limited to, the implied warranties of
32 * merchantability and fitness for a particular purpose are disclaimed.
33 * In no event shall the company or contributors be liable for any
34 * direct, indirect, incidental, special, exemplary, or consequential
35 * damages (including, but not limited to, procurement of substitute
36 * goods or services; loss of use, data, or profits; or business
37 * interruption) however caused and on any theory of liability, whether
38 * in contract, strict liability, or tort (including negligence or
39 * otherwise) arising in any way out of the use of this software, even if
40 * advised of the possibility of such damage.
41 *
42 */
43
44#include <sys/cdefs.h>
45__FBSDID("$FreeBSD$");
46
47#include <sys/param.h>
48#include <sys/malloc.h>
49#include <sys/sbuf.h>
50#include <sys/systm.h>
51
52#include <geom/geom.h>
53#include <geom/geom_dbg.h>
54#include <geom/vinum/geom_vinum_var.h>
55#include <geom/vinum/geom_vinum.h>
56#include <geom/vinum/geom_vinum_share.h>
57
58int	gv_drive_is_newer(struct gv_softc *, struct gv_drive *);
59static off_t gv_plex_smallest_sd(struct gv_plex *);
60
61void
62gv_parse_config(struct gv_softc *sc, char *buf, struct gv_drive *d)
63{
64	char *aptr, *bptr, *cptr;
65	struct gv_volume *v, *v2;
66	struct gv_plex *p, *p2;
67	struct gv_sd *s, *s2;
68	int error, is_newer, tokens;
69	char *token[GV_MAXARGS];
70
71	is_newer = gv_drive_is_newer(sc, d);
72
73	/* Until the end of the string *buf. */
74	for (aptr = buf; *aptr != '\0'; aptr = bptr) {
75		bptr = aptr;
76		cptr = aptr;
77
78		/* Separate input lines. */
79		while (*bptr != '\n')
80			bptr++;
81		*bptr = '\0';
82		bptr++;
83
84		tokens = gv_tokenize(cptr, token, GV_MAXARGS);
85
86		if (tokens <= 0)
87			continue;
88
89		if (!strcmp(token[0], "volume")) {
90			v = gv_new_volume(tokens, token);
91			if (v == NULL) {
92				G_VINUM_DEBUG(0, "config parse failed volume");
93				break;
94			}
95
96			v2 = gv_find_vol(sc, v->name);
97			if (v2 != NULL) {
98				if (is_newer) {
99					v2->state = v->state;
100					G_VINUM_DEBUG(2, "newer volume found!");
101				}
102				g_free(v);
103				continue;
104			}
105
106			gv_create_volume(sc, v);
107
108		} else if (!strcmp(token[0], "plex")) {
109			p = gv_new_plex(tokens, token);
110			if (p == NULL) {
111				G_VINUM_DEBUG(0, "config parse failed plex");
112				break;
113			}
114
115			p2 = gv_find_plex(sc, p->name);
116			if (p2 != NULL) {
117				/* XXX */
118				if (is_newer) {
119					p2->state = p->state;
120					G_VINUM_DEBUG(2, "newer plex found!");
121				}
122				g_free(p);
123				continue;
124			}
125
126			error = gv_create_plex(sc, p);
127			if (error)
128				continue;
129			/*
130			 * These flags were set in gv_create_plex() and are not
131			 * needed here (on-disk config parsing).
132			 */
133			p->flags &= ~GV_PLEX_ADDED;
134
135		} else if (!strcmp(token[0], "sd")) {
136			s = gv_new_sd(tokens, token);
137
138			if (s == NULL) {
139				G_VINUM_DEBUG(0, "config parse failed subdisk");
140				break;
141			}
142
143			s2 = gv_find_sd(sc, s->name);
144			if (s2 != NULL) {
145				/* XXX */
146				if (is_newer) {
147					s2->state = s->state;
148					G_VINUM_DEBUG(2, "newer subdisk found!");
149				}
150				g_free(s);
151				continue;
152			}
153
154			/*
155			 * Signal that this subdisk was tasted, and could
156			 * possibly reference a drive that isn't in our config
157			 * yet.
158			 */
159			s->flags |= GV_SD_TASTED;
160
161			if (s->state == GV_SD_UP)
162				s->flags |= GV_SD_CANGOUP;
163
164			error = gv_create_sd(sc, s);
165			if (error)
166				continue;
167
168			/*
169			 * This flag was set in gv_create_sd() and is not
170			 * needed here (on-disk config parsing).
171			 */
172			s->flags &= ~GV_SD_NEWBORN;
173			s->flags &= ~GV_SD_GROW;
174		}
175	}
176}
177
178/*
179 * Format the vinum configuration properly.  If ondisk is non-zero then the
180 * configuration is intended to be written to disk later.
181 */
182void
183gv_format_config(struct gv_softc *sc, struct sbuf *sb, int ondisk, char *prefix)
184{
185	struct gv_drive *d;
186	struct gv_sd *s;
187	struct gv_plex *p;
188	struct gv_volume *v;
189
190	/*
191	 * We don't need the drive configuration if we're not writing the
192	 * config to disk.
193	 */
194	if (!ondisk) {
195		LIST_FOREACH(d, &sc->drives, drive) {
196			sbuf_printf(sb, "%sdrive %s device /dev/%s\n", prefix,
197			    d->name, d->device);
198		}
199	}
200
201	LIST_FOREACH(v, &sc->volumes, volume) {
202		if (!ondisk)
203			sbuf_printf(sb, "%s", prefix);
204		sbuf_printf(sb, "volume %s", v->name);
205		if (ondisk)
206			sbuf_printf(sb, " state %s", gv_volstate(v->state));
207		sbuf_printf(sb, "\n");
208	}
209
210	LIST_FOREACH(p, &sc->plexes, plex) {
211		if (!ondisk)
212			sbuf_printf(sb, "%s", prefix);
213		sbuf_printf(sb, "plex name %s org %s ", p->name,
214		    gv_plexorg(p->org));
215		if (gv_is_striped(p))
216			sbuf_printf(sb, "%ds ", p->stripesize / 512);
217		if (p->vol_sc != NULL)
218			sbuf_printf(sb, "vol %s", p->volume);
219		if (ondisk)
220			sbuf_printf(sb, " state %s", gv_plexstate(p->state));
221		sbuf_printf(sb, "\n");
222	}
223
224	LIST_FOREACH(s, &sc->subdisks, sd) {
225		if (!ondisk)
226			sbuf_printf(sb, "%s", prefix);
227		sbuf_printf(sb, "sd name %s drive %s len %jds driveoffset "
228		    "%jds", s->name, s->drive, s->size / 512,
229		    s->drive_offset / 512);
230		if (s->plex_sc != NULL) {
231			sbuf_printf(sb, " plex %s plexoffset %jds", s->plex,
232			    s->plex_offset / 512);
233		}
234		if (ondisk)
235			sbuf_printf(sb, " state %s", gv_sdstate(s->state));
236		sbuf_printf(sb, "\n");
237	}
238}
239
240static off_t
241gv_plex_smallest_sd(struct gv_plex *p)
242{
243	struct gv_sd *s;
244	off_t smallest;
245
246	KASSERT(p != NULL, ("gv_plex_smallest_sd: NULL p"));
247
248	s = LIST_FIRST(&p->subdisks);
249	if (s == NULL)
250		return (-1);
251	smallest = s->size;
252	LIST_FOREACH(s, &p->subdisks, in_plex) {
253		if (s->size < smallest)
254			smallest = s->size;
255	}
256	return (smallest);
257}
258
259/* Walk over plexes in a volume and count how many are down. */
260int
261gv_plexdown(struct gv_volume *v)
262{
263	int plexdown;
264	struct gv_plex *p;
265
266	KASSERT(v != NULL, ("gv_plexdown: NULL v"));
267
268	plexdown = 0;
269
270	LIST_FOREACH(p, &v->plexes, plex) {
271		if (p->state == GV_PLEX_DOWN)
272			plexdown++;
273	}
274	return (plexdown);
275}
276
277int
278gv_sd_to_plex(struct gv_sd *s, struct gv_plex *p)
279{
280	struct gv_sd *s2;
281	off_t psizeorig, remainder, smallest;
282
283	/* If this subdisk was already given to this plex, do nothing. */
284	if (s->plex_sc == p)
285		return (0);
286
287	/* Check correct size of this subdisk. */
288	s2 = LIST_FIRST(&p->subdisks);
289	/* Adjust the subdisk-size if necessary. */
290	if (s2 != NULL && gv_is_striped(p)) {
291		/* First adjust to the stripesize. */
292		remainder = s->size % p->stripesize;
293
294		if (remainder) {
295			G_VINUM_DEBUG(1, "size of sd %s is not a "
296			    "multiple of plex stripesize, taking off "
297			    "%jd bytes", s->name,
298			    (intmax_t)remainder);
299			gv_adjust_freespace(s, remainder);
300		}
301
302		smallest = gv_plex_smallest_sd(p);
303		/* Then take off extra if other subdisks are smaller. */
304		remainder = s->size - smallest;
305
306		/*
307		 * Don't allow a remainder below zero for running plexes, it's too
308		 * painful, and if someone were to accidentally do this, the
309		 * resulting array might be smaller than the original... not god
310		 */
311		if (remainder < 0) {
312			if (!(p->flags & GV_PLEX_NEWBORN)) {
313				G_VINUM_DEBUG(0, "sd %s too small for plex %s!",
314				    s->name, p->name);
315				return (GV_ERR_BADSIZE);
316			}
317			/* Adjust other subdisks. */
318			LIST_FOREACH(s2, &p->subdisks, in_plex) {
319				G_VINUM_DEBUG(1, "size of sd %s is to big, "
320				    "taking off %jd bytes", s->name,
321				    (intmax_t)remainder);
322				gv_adjust_freespace(s2, (remainder * -1));
323			}
324		} else if (remainder > 0) {
325			G_VINUM_DEBUG(1, "size of sd %s is to big, "
326			    "taking off %jd bytes", s->name,
327			    (intmax_t)remainder);
328			gv_adjust_freespace(s, remainder);
329		}
330	}
331
332	/* Find the correct plex offset for this subdisk, if needed. */
333	if (s->plex_offset == -1) {
334		/*
335		 * First set it to 0 to catch the case where we had a detached
336		 * subdisk that didn't get any good offset.
337		 */
338		s->plex_offset = 0;
339		if (p->sdcount) {
340			LIST_FOREACH(s2, &p->subdisks, in_plex) {
341				if (gv_is_striped(p))
342					s->plex_offset = p->sdcount *
343					    p->stripesize;
344				else
345					s->plex_offset = s2->plex_offset +
346					    s2->size;
347			}
348		}
349	}
350
351	/* There are no subdisks for this plex yet, just insert it. */
352	if (LIST_EMPTY(&p->subdisks)) {
353		LIST_INSERT_HEAD(&p->subdisks, s, in_plex);
354
355	/* Insert in correct order, depending on plex_offset. */
356	} else {
357		LIST_FOREACH(s2, &p->subdisks, in_plex) {
358			if (s->plex_offset < s2->plex_offset) {
359				LIST_INSERT_BEFORE(s2, s, in_plex);
360				break;
361			} else if (LIST_NEXT(s2, in_plex) == NULL) {
362				LIST_INSERT_AFTER(s2, s, in_plex);
363				break;
364			}
365		}
366	}
367
368	s->plex_sc = p;
369        /* Adjust the size of our plex. We check if the plex misses a subdisk,
370	 * so we don't make the plex smaller than it actually should be.
371	 */
372	psizeorig = p->size;
373	p->size = gv_plex_size(p);
374	/* Make sure the size is not changed. */
375	if (p->sddetached > 0) {
376		if (p->size < psizeorig) {
377			p->size = psizeorig;
378			/* We make sure wee need another subdisk. */
379			if (p->sddetached == 1)
380				p->sddetached++;
381		}
382		p->sddetached--;
383	} else {
384		if ((p->org == GV_PLEX_RAID5 ||
385		    p->org == GV_PLEX_STRIPED) &&
386		    !(p->flags & GV_PLEX_NEWBORN) &&
387		    p->state == GV_PLEX_UP) {
388			s->flags |= GV_SD_GROW;
389		}
390		p->sdcount++;
391	}
392
393	return (0);
394}
395
396void
397gv_update_vol_size(struct gv_volume *v, off_t size)
398{
399	if (v == NULL)
400		return;
401	if (v->provider != NULL) {
402		g_topology_lock();
403		v->provider->mediasize = size;
404		g_topology_unlock();
405	}
406	v->size = size;
407}
408
409/* Return how many subdisks that constitute the original plex. */
410int
411gv_sdcount(struct gv_plex *p, int growing)
412{
413	struct gv_sd *s;
414	int sdcount;
415
416	sdcount = p->sdcount;
417	if (growing) {
418		LIST_FOREACH(s, &p->subdisks, in_plex) {
419			if (s->flags & GV_SD_GROW)
420				sdcount--;
421		}
422	}
423
424	return (sdcount);
425}
426
427/* Calculates the plex size. */
428off_t
429gv_plex_size(struct gv_plex *p)
430{
431	struct gv_sd *s;
432	off_t size;
433	int sdcount;
434
435	KASSERT(p != NULL, ("gv_plex_size: NULL p"));
436
437	/* Adjust the size of our plex. */
438	size = 0;
439	sdcount = gv_sdcount(p, 1);
440	switch (p->org) {
441	case GV_PLEX_CONCAT:
442		LIST_FOREACH(s, &p->subdisks, in_plex)
443			size += s->size;
444		break;
445	case GV_PLEX_STRIPED:
446		s = LIST_FIRST(&p->subdisks);
447		size = ((s != NULL) ? (sdcount * s->size) : 0);
448		break;
449	case GV_PLEX_RAID5:
450		s = LIST_FIRST(&p->subdisks);
451		size = ((s != NULL) ? ((sdcount - 1) * s->size) : 0);
452		break;
453	}
454
455	return (size);
456}
457
458/* Returns the size of a volume. */
459off_t
460gv_vol_size(struct gv_volume *v)
461{
462	struct gv_plex *p;
463	off_t minplexsize;
464
465	KASSERT(v != NULL, ("gv_vol_size: NULL v"));
466
467	p = LIST_FIRST(&v->plexes);
468	if (p == NULL)
469		return (0);
470
471	minplexsize = p->size;
472	LIST_FOREACH(p, &v->plexes, in_volume) {
473		if (p->size < minplexsize) {
474			minplexsize = p->size;
475		}
476	}
477	return (minplexsize);
478}
479
480void
481gv_update_plex_config(struct gv_plex *p)
482{
483	struct gv_sd *s, *s2;
484	off_t remainder;
485	int required_sds, state;
486
487	KASSERT(p != NULL, ("gv_update_plex_config: NULL p"));
488
489	/* The plex was added to an already running volume. */
490	if (p->flags & GV_PLEX_ADDED)
491		gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE);
492
493	switch (p->org) {
494	case GV_PLEX_STRIPED:
495		required_sds = 2;
496		break;
497	case GV_PLEX_RAID5:
498		required_sds = 3;
499		break;
500	case GV_PLEX_CONCAT:
501	default:
502		required_sds = 0;
503		break;
504	}
505
506	if (required_sds) {
507		if (p->sdcount < required_sds) {
508			gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE);
509		}
510
511		/*
512		 * The subdisks in striped plexes must all have the same size.
513		 */
514		s = LIST_FIRST(&p->subdisks);
515		LIST_FOREACH(s2, &p->subdisks, in_plex) {
516			if (s->size != s2->size) {
517				G_VINUM_DEBUG(0, "subdisk size mismatch %s"
518				    "(%jd) <> %s (%jd)", s->name, s->size,
519				    s2->name, s2->size);
520				gv_set_plex_state(p, GV_PLEX_DOWN,
521				    GV_SETSTATE_FORCE);
522			}
523		}
524
525		LIST_FOREACH(s, &p->subdisks, in_plex) {
526			/* Trim subdisk sizes to match the stripe size. */
527			remainder = s->size % p->stripesize;
528			if (remainder) {
529				G_VINUM_DEBUG(1, "size of sd %s is not a "
530				    "multiple of plex stripesize, taking off "
531				    "%jd bytes", s->name, (intmax_t)remainder);
532				gv_adjust_freespace(s, remainder);
533			}
534		}
535	}
536
537	p->size = gv_plex_size(p);
538	if (p->sdcount == 0)
539		gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE);
540	else if (p->org == GV_PLEX_RAID5 && p->flags & GV_PLEX_NEWBORN) {
541		LIST_FOREACH(s, &p->subdisks, in_plex)
542			gv_set_sd_state(s, GV_SD_UP, GV_SETSTATE_FORCE);
543		/* If added to a volume, we want the plex to be down. */
544		state = (p->flags & GV_PLEX_ADDED) ? GV_PLEX_DOWN : GV_PLEX_UP;
545		gv_set_plex_state(p, state, GV_SETSTATE_FORCE);
546		p->flags &= ~GV_PLEX_ADDED;
547	} else if (p->flags & GV_PLEX_ADDED) {
548		LIST_FOREACH(s, &p->subdisks, in_plex)
549			gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE);
550		gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE);
551		p->flags &= ~GV_PLEX_ADDED;
552	} else if (p->state == GV_PLEX_UP) {
553		LIST_FOREACH(s, &p->subdisks, in_plex) {
554			if (s->flags & GV_SD_GROW) {
555				gv_set_plex_state(p, GV_PLEX_GROWABLE,
556				    GV_SETSTATE_FORCE);
557				break;
558			}
559		}
560	}
561	/* Our plex is grown up now. */
562	p->flags &= ~GV_PLEX_NEWBORN;
563}
564
565/*
566 * Give a subdisk to a drive, check and adjust several parameters, adjust
567 * freelist.
568 */
569int
570gv_sd_to_drive(struct gv_sd *s, struct gv_drive *d)
571{
572	struct gv_sd *s2;
573	struct gv_freelist *fl, *fl2;
574	off_t tmp;
575	int i;
576
577	fl2 = NULL;
578
579	/* Shortcut for "referenced" drives. */
580	if (d->flags & GV_DRIVE_REFERENCED) {
581		s->drive_sc = d;
582		return (0);
583	}
584
585	/* Check if this subdisk was already given to this drive. */
586	if (s->drive_sc != NULL) {
587		if (s->drive_sc == d) {
588			if (!(s->flags & GV_SD_TASTED)) {
589				return (0);
590			}
591		} else {
592			G_VINUM_DEBUG(0, "error giving subdisk '%s' to '%s' "
593			    "(already on '%s')", s->name, d->name,
594			    s->drive_sc->name);
595			return (GV_ERR_ISATTACHED);
596		}
597	}
598
599	/* Preliminary checks. */
600	if ((s->size > d->avail) || (d->freelist_entries == 0)) {
601		G_VINUM_DEBUG(0, "not enough space on '%s' for '%s'", d->name,
602		    s->name);
603		return (GV_ERR_NOSPACE);
604	}
605
606	/* If no size was given for this subdisk, try to auto-size it... */
607	if (s->size == -1) {
608		/* Find the largest available slot. */
609		LIST_FOREACH(fl, &d->freelist, freelist) {
610			if (fl->size < s->size)
611				continue;
612			s->size = fl->size;
613			s->drive_offset = fl->offset;
614			fl2 = fl;
615		}
616
617		/* No good slot found? */
618		if (s->size == -1) {
619			G_VINUM_DEBUG(0, "unable to autosize '%s' on '%s'",
620			    s->name, d->name);
621			return (GV_ERR_BADSIZE);
622		}
623
624	/*
625	 * ... or check if we have a free slot that's large enough for the
626	 * given size.
627	 */
628	} else {
629		i = 0;
630		LIST_FOREACH(fl, &d->freelist, freelist) {
631			if (fl->size < s->size)
632				continue;
633			/* Assign drive offset, if not given. */
634			if (s->drive_offset == -1)
635				s->drive_offset = fl->offset;
636			fl2 = fl;
637			i++;
638			break;
639		}
640
641		/* Couldn't find a good free slot. */
642		if (i == 0) {
643			G_VINUM_DEBUG(0, "free slots to small for '%s' on '%s'",
644			    s->name, d->name);
645			return (GV_ERR_NOSPACE);
646		}
647	}
648
649	/* No drive offset given, try to calculate it. */
650	if (s->drive_offset == -1) {
651		/* Add offsets and sizes from other subdisks on this drive. */
652		LIST_FOREACH(s2, &d->subdisks, from_drive) {
653			s->drive_offset = s2->drive_offset + s2->size;
654		}
655
656		/*
657		 * If there are no other subdisks yet, then set the default
658		 * offset to GV_DATA_START.
659		 */
660		if (s->drive_offset == -1)
661			s->drive_offset = GV_DATA_START;
662
663	/* Check if we have a free slot at the given drive offset. */
664	} else {
665		i = 0;
666		LIST_FOREACH(fl, &d->freelist, freelist) {
667			/* Yes, this subdisk fits. */
668			if ((fl->offset <= s->drive_offset) &&
669			    (fl->offset + fl->size >=
670			    s->drive_offset + s->size)) {
671				i++;
672				fl2 = fl;
673				break;
674			}
675		}
676
677		/* Couldn't find a good free slot. */
678		if (i == 0) {
679			G_VINUM_DEBUG(0, "given drive_offset for '%s' won't fit "
680			    "on '%s'", s->name, d->name);
681			return (GV_ERR_NOSPACE);
682		}
683	}
684
685	/*
686	 * Now that all parameters are checked and set up, we can give the
687	 * subdisk to the drive and adjust the freelist.
688	 */
689
690	/* First, adjust the freelist. */
691	LIST_FOREACH(fl, &d->freelist, freelist) {
692		/* Look for the free slot that we have found before. */
693		if (fl != fl2)
694			continue;
695
696		/* The subdisk starts at the beginning of the free slot. */
697		if (fl->offset == s->drive_offset) {
698			fl->offset += s->size;
699			fl->size -= s->size;
700
701			/* The subdisk uses the whole slot, so remove it. */
702			if (fl->size == 0) {
703				d->freelist_entries--;
704				LIST_REMOVE(fl, freelist);
705			}
706		/*
707		 * The subdisk does not start at the beginning of the free
708		 * slot.
709		 */
710		} else {
711			tmp = fl->offset + fl->size;
712			fl->size = s->drive_offset - fl->offset;
713
714			/*
715			 * The subdisk didn't use the complete rest of the free
716			 * slot, so we need to split it.
717			 */
718			if (s->drive_offset + s->size != tmp) {
719				fl2 = g_malloc(sizeof(*fl2), M_WAITOK | M_ZERO);
720				fl2->offset = s->drive_offset + s->size;
721				fl2->size = tmp - fl2->offset;
722				LIST_INSERT_AFTER(fl, fl2, freelist);
723				d->freelist_entries++;
724			}
725		}
726		break;
727	}
728
729	/*
730	 * This is the first subdisk on this drive, just insert it into the
731	 * list.
732	 */
733	if (LIST_EMPTY(&d->subdisks)) {
734		LIST_INSERT_HEAD(&d->subdisks, s, from_drive);
735
736	/* There are other subdisks, so insert this one in correct order. */
737	} else {
738		LIST_FOREACH(s2, &d->subdisks, from_drive) {
739			if (s->drive_offset < s2->drive_offset) {
740				LIST_INSERT_BEFORE(s2, s, from_drive);
741				break;
742			} else if (LIST_NEXT(s2, from_drive) == NULL) {
743				LIST_INSERT_AFTER(s2, s, from_drive);
744				break;
745			}
746		}
747	}
748
749	d->sdcount++;
750	d->avail -= s->size;
751
752	s->flags &= ~GV_SD_TASTED;
753
754	/* Link back from the subdisk to this drive. */
755	s->drive_sc = d;
756
757	return (0);
758}
759
760void
761gv_free_sd(struct gv_sd *s)
762{
763	struct gv_drive *d;
764	struct gv_freelist *fl, *fl2;
765
766	KASSERT(s != NULL, ("gv_free_sd: NULL s"));
767
768	d = s->drive_sc;
769	if (d == NULL)
770		return;
771
772	/*
773	 * First, find the free slot that's immediately before or after this
774	 * subdisk.
775	 */
776	fl = NULL;
777	LIST_FOREACH(fl, &d->freelist, freelist) {
778		if (fl->offset == s->drive_offset + s->size)
779			break;
780		if (fl->offset + fl->size == s->drive_offset)
781			break;
782	}
783
784	/* If there is no free slot behind this subdisk, so create one. */
785	if (fl == NULL) {
786		fl = g_malloc(sizeof(*fl), M_WAITOK | M_ZERO);
787		fl->size = s->size;
788		fl->offset = s->drive_offset;
789
790		if (d->freelist_entries == 0) {
791			LIST_INSERT_HEAD(&d->freelist, fl, freelist);
792		} else {
793			LIST_FOREACH(fl2, &d->freelist, freelist) {
794				if (fl->offset < fl2->offset) {
795					LIST_INSERT_BEFORE(fl2, fl, freelist);
796					break;
797				} else if (LIST_NEXT(fl2, freelist) == NULL) {
798					LIST_INSERT_AFTER(fl2, fl, freelist);
799					break;
800				}
801			}
802		}
803
804		d->freelist_entries++;
805
806	/* Expand the free slot we just found. */
807	} else {
808		fl->size += s->size;
809		if (fl->offset > s->drive_offset)
810			fl->offset = s->drive_offset;
811	}
812
813	d->avail += s->size;
814	d->sdcount--;
815}
816
817void
818gv_adjust_freespace(struct gv_sd *s, off_t remainder)
819{
820	struct gv_drive *d;
821	struct gv_freelist *fl, *fl2;
822
823	KASSERT(s != NULL, ("gv_adjust_freespace: NULL s"));
824	d = s->drive_sc;
825	KASSERT(d != NULL, ("gv_adjust_freespace: NULL d"));
826
827	/* First, find the free slot that's immediately after this subdisk. */
828	fl = NULL;
829	LIST_FOREACH(fl, &d->freelist, freelist) {
830		if (fl->offset == s->drive_offset + s->size)
831			break;
832	}
833
834	/* If there is no free slot behind this subdisk, so create one. */
835	if (fl == NULL) {
836		fl = g_malloc(sizeof(*fl), M_WAITOK | M_ZERO);
837		fl->size = remainder;
838		fl->offset = s->drive_offset + s->size - remainder;
839
840		if (d->freelist_entries == 0) {
841			LIST_INSERT_HEAD(&d->freelist, fl, freelist);
842		} else {
843			LIST_FOREACH(fl2, &d->freelist, freelist) {
844				if (fl->offset < fl2->offset) {
845					LIST_INSERT_BEFORE(fl2, fl, freelist);
846					break;
847				} else if (LIST_NEXT(fl2, freelist) == NULL) {
848					LIST_INSERT_AFTER(fl2, fl, freelist);
849					break;
850				}
851			}
852		}
853
854		d->freelist_entries++;
855
856	/* Expand the free slot we just found. */
857	} else {
858		fl->offset -= remainder;
859		fl->size += remainder;
860	}
861
862	s->size -= remainder;
863	d->avail += remainder;
864}
865
866/* Check if the given plex is a striped one. */
867int
868gv_is_striped(struct gv_plex *p)
869{
870	KASSERT(p != NULL, ("gv_is_striped: NULL p"));
871	switch(p->org) {
872	case GV_PLEX_STRIPED:
873	case GV_PLEX_RAID5:
874		return (1);
875	default:
876		return (0);
877	}
878}
879
880/* Find a volume by name. */
881struct gv_volume *
882gv_find_vol(struct gv_softc *sc, char *name)
883{
884	struct gv_volume *v;
885
886	LIST_FOREACH(v, &sc->volumes, volume) {
887		if (!strncmp(v->name, name, GV_MAXVOLNAME))
888			return (v);
889	}
890
891	return (NULL);
892}
893
894/* Find a plex by name. */
895struct gv_plex *
896gv_find_plex(struct gv_softc *sc, char *name)
897{
898	struct gv_plex *p;
899
900	LIST_FOREACH(p, &sc->plexes, plex) {
901		if (!strncmp(p->name, name, GV_MAXPLEXNAME))
902			return (p);
903	}
904
905	return (NULL);
906}
907
908/* Find a subdisk by name. */
909struct gv_sd *
910gv_find_sd(struct gv_softc *sc, char *name)
911{
912	struct gv_sd *s;
913
914	LIST_FOREACH(s, &sc->subdisks, sd) {
915		if (!strncmp(s->name, name, GV_MAXSDNAME))
916			return (s);
917	}
918
919	return (NULL);
920}
921
922/* Find a drive by name. */
923struct gv_drive *
924gv_find_drive(struct gv_softc *sc, char *name)
925{
926	struct gv_drive *d;
927
928	LIST_FOREACH(d, &sc->drives, drive) {
929		if (!strncmp(d->name, name, GV_MAXDRIVENAME))
930			return (d);
931	}
932
933	return (NULL);
934}
935
936/* Find a drive given a device. */
937struct gv_drive *
938gv_find_drive_device(struct gv_softc *sc, char *device)
939{
940	struct gv_drive *d;
941
942	LIST_FOREACH(d, &sc->drives, drive) {
943		if(!strcmp(d->device, device))
944			return (d);
945	}
946
947	return (NULL);
948}
949
950/* Check if any consumer of the given geom is open. */
951int
952gv_consumer_is_open(struct g_consumer *cp)
953{
954	if (cp == NULL)
955		return (0);
956
957	if (cp->acr || cp->acw || cp->ace)
958		return (1);
959
960	return (0);
961}
962
963int
964gv_provider_is_open(struct g_provider *pp)
965{
966	if (pp == NULL)
967		return (0);
968
969	if (pp->acr || pp->acw || pp->ace)
970		return (1);
971
972	return (0);
973}
974
975/*
976 * Compare the modification dates of the drives.
977 * Return 1 if a > b, 0 otherwise.
978 */
979int
980gv_drive_is_newer(struct gv_softc *sc, struct gv_drive *d)
981{
982	struct gv_drive *d2;
983	struct timeval *a, *b;
984
985	KASSERT(!LIST_EMPTY(&sc->drives),
986	    ("gv_is_drive_newer: empty drive list"));
987
988	a = &d->hdr->label.last_update;
989	LIST_FOREACH(d2, &sc->drives, drive) {
990		if ((d == d2) || (d2->state != GV_DRIVE_UP) ||
991		    (d2->hdr == NULL))
992			continue;
993		b = &d2->hdr->label.last_update;
994		if (timevalcmp(a, b, >))
995			return (1);
996	}
997
998	return (0);
999}
1000
1001/* Return the type of object identified by string 'name'. */
1002int
1003gv_object_type(struct gv_softc *sc, char *name)
1004{
1005	struct gv_drive *d;
1006	struct gv_plex *p;
1007	struct gv_sd *s;
1008	struct gv_volume *v;
1009
1010	LIST_FOREACH(v, &sc->volumes, volume) {
1011		if (!strncmp(v->name, name, GV_MAXVOLNAME))
1012			return (GV_TYPE_VOL);
1013	}
1014
1015	LIST_FOREACH(p, &sc->plexes, plex) {
1016		if (!strncmp(p->name, name, GV_MAXPLEXNAME))
1017			return (GV_TYPE_PLEX);
1018	}
1019
1020	LIST_FOREACH(s, &sc->subdisks, sd) {
1021		if (!strncmp(s->name, name, GV_MAXSDNAME))
1022			return (GV_TYPE_SD);
1023	}
1024
1025	LIST_FOREACH(d, &sc->drives, drive) {
1026		if (!strncmp(d->name, name, GV_MAXDRIVENAME))
1027			return (GV_TYPE_DRIVE);
1028	}
1029
1030	return (GV_ERR_NOTFOUND);
1031}
1032
1033void
1034gv_setup_objects(struct gv_softc *sc)
1035{
1036	struct g_provider *pp;
1037	struct gv_volume *v;
1038	struct gv_plex *p;
1039	struct gv_sd *s;
1040	struct gv_drive *d;
1041
1042	LIST_FOREACH(s, &sc->subdisks, sd) {
1043		d = gv_find_drive(sc, s->drive);
1044		if (d != NULL)
1045			gv_sd_to_drive(s, d);
1046		p = gv_find_plex(sc, s->plex);
1047		if (p != NULL)
1048			gv_sd_to_plex(s, p);
1049		gv_update_sd_state(s);
1050	}
1051
1052	LIST_FOREACH(p, &sc->plexes, plex) {
1053		gv_update_plex_config(p);
1054		v = gv_find_vol(sc, p->volume);
1055		if (v != NULL && p->vol_sc != v) {
1056			p->vol_sc = v;
1057			v->plexcount++;
1058			LIST_INSERT_HEAD(&v->plexes, p, in_volume);
1059		}
1060		gv_update_plex_config(p);
1061	}
1062
1063	LIST_FOREACH(v, &sc->volumes, volume) {
1064		v->size = gv_vol_size(v);
1065		if (v->provider == NULL) {
1066			g_topology_lock();
1067			pp = g_new_providerf(sc->geom, "gvinum/%s", v->name);
1068			pp->mediasize = v->size;
1069			pp->sectorsize = 512;    /* XXX */
1070			g_error_provider(pp, 0);
1071			v->provider = pp;
1072			pp->private = v;
1073			g_topology_unlock();
1074		} else if (v->provider->mediasize != v->size) {
1075			g_topology_lock();
1076			v->provider->mediasize = v->size;
1077			g_topology_unlock();
1078		}
1079		v->flags &= ~GV_VOL_NEWBORN;
1080		gv_update_vol_state(v);
1081	}
1082}
1083
1084void
1085gv_cleanup(struct gv_softc *sc)
1086{
1087	struct gv_volume *v, *v2;
1088	struct gv_plex *p, *p2;
1089	struct gv_sd *s, *s2;
1090	struct gv_drive *d, *d2;
1091	struct gv_freelist *fl, *fl2;
1092
1093	mtx_lock(&sc->config_mtx);
1094	LIST_FOREACH_SAFE(v, &sc->volumes, volume, v2) {
1095		LIST_REMOVE(v, volume);
1096		g_free(v->wqueue);
1097		g_free(v);
1098	}
1099	LIST_FOREACH_SAFE(p, &sc->plexes, plex, p2) {
1100		LIST_REMOVE(p, plex);
1101		g_free(p->bqueue);
1102		g_free(p->rqueue);
1103		g_free(p->wqueue);
1104		g_free(p);
1105	}
1106	LIST_FOREACH_SAFE(s, &sc->subdisks, sd, s2) {
1107		LIST_REMOVE(s, sd);
1108		g_free(s);
1109	}
1110	LIST_FOREACH_SAFE(d, &sc->drives, drive, d2) {
1111		LIST_FOREACH_SAFE(fl, &d->freelist, freelist, fl2) {
1112			LIST_REMOVE(fl, freelist);
1113			g_free(fl);
1114		}
1115		LIST_REMOVE(d, drive);
1116		g_free(d->hdr);
1117		g_free(d);
1118	}
1119	mtx_destroy(&sc->config_mtx);
1120}
1121
1122/* General 'attach' routine. */
1123int
1124gv_attach_plex(struct gv_plex *p, struct gv_volume *v, int rename)
1125{
1126	struct gv_sd *s;
1127	struct gv_softc *sc;
1128
1129	g_topology_assert();
1130
1131	sc = p->vinumconf;
1132	KASSERT(sc != NULL, ("NULL sc"));
1133
1134	if (p->vol_sc != NULL) {
1135		G_VINUM_DEBUG(1, "unable to attach %s: already attached to %s",
1136		    p->name, p->volume);
1137		return (GV_ERR_ISATTACHED);
1138	}
1139
1140	/* Stale all subdisks of this plex. */
1141	LIST_FOREACH(s, &p->subdisks, in_plex) {
1142		if (s->state != GV_SD_STALE)
1143			gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE);
1144	}
1145	/* Attach to volume. Make sure volume is not up and running. */
1146	if (gv_provider_is_open(v->provider)) {
1147		G_VINUM_DEBUG(1, "unable to attach %s: volume %s is busy",
1148		    p->name, v->name);
1149		return (GV_ERR_ISBUSY);
1150	}
1151	p->vol_sc = v;
1152	strlcpy(p->volume, v->name, sizeof(p->volume));
1153	v->plexcount++;
1154	if (rename) {
1155		snprintf(p->name, sizeof(p->name), "%s.p%d", v->name,
1156		    v->plexcount);
1157	}
1158	LIST_INSERT_HEAD(&v->plexes, p, in_volume);
1159
1160	/* Get plex up again. */
1161	gv_update_vol_size(v, gv_vol_size(v));
1162	gv_set_plex_state(p, GV_PLEX_UP, 0);
1163	gv_save_config(p->vinumconf);
1164	return (0);
1165}
1166
1167int
1168gv_attach_sd(struct gv_sd *s, struct gv_plex *p, off_t offset, int rename)
1169{
1170	struct gv_sd *s2;
1171	int error, sdcount;
1172
1173	g_topology_assert();
1174
1175	/* If subdisk is attached, don't do it. */
1176	if (s->plex_sc != NULL) {
1177		G_VINUM_DEBUG(1, "unable to attach %s: already attached to %s",
1178		    s->name, s->plex);
1179		return (GV_ERR_ISATTACHED);
1180	}
1181
1182	gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE);
1183	/* First check that this subdisk has a correct offset. If none other
1184	 * starts at the same, and it's correct module stripesize, it is */
1185	if (offset != -1 && offset % p->stripesize != 0)
1186		return (GV_ERR_BADOFFSET);
1187	LIST_FOREACH(s2, &p->subdisks, in_plex) {
1188		if (s2->plex_offset == offset)
1189			return (GV_ERR_BADOFFSET);
1190	}
1191
1192	/* Attach the subdisk to the plex at given offset. */
1193	s->plex_offset = offset;
1194	strlcpy(s->plex, p->name, sizeof(s->plex));
1195
1196	sdcount = p->sdcount;
1197	error = gv_sd_to_plex(s, p);
1198	if (error)
1199		return (error);
1200	gv_update_plex_config(p);
1201
1202	if (rename) {
1203		snprintf(s->name, sizeof(s->name), "%s.s%d", s->plex,
1204		    p->sdcount);
1205	}
1206	if (p->vol_sc != NULL)
1207		gv_update_vol_size(p->vol_sc, gv_vol_size(p->vol_sc));
1208	gv_save_config(p->vinumconf);
1209	/* We don't update the subdisk state since the user might have to
1210	 * initiate a rebuild/sync first. */
1211	return (0);
1212}
1213
1214/* Detach a plex from a volume. */
1215int
1216gv_detach_plex(struct gv_plex *p, int flags)
1217{
1218	struct gv_volume *v;
1219
1220	g_topology_assert();
1221	v = p->vol_sc;
1222
1223	if (v == NULL) {
1224		G_VINUM_DEBUG(1, "unable to detach %s: already detached",
1225		    p->name);
1226		return (0); /* Not an error. */
1227	}
1228
1229	/*
1230	 * Only proceed if forced or volume inactive.
1231	 */
1232	if (!(flags & GV_FLAG_F) && (gv_provider_is_open(v->provider) ||
1233	    p->state == GV_PLEX_UP)) {
1234		G_VINUM_DEBUG(1, "unable to detach %s: volume %s is busy",
1235		    p->name, p->volume);
1236		return (GV_ERR_ISBUSY);
1237	}
1238	v->plexcount--;
1239	/* Make sure someone don't read us when gone. */
1240	v->last_read_plex = NULL;
1241	LIST_REMOVE(p, in_volume);
1242	p->vol_sc = NULL;
1243	memset(p->volume, 0, GV_MAXVOLNAME);
1244	gv_update_vol_size(v, gv_vol_size(v));
1245	gv_save_config(p->vinumconf);
1246	return (0);
1247}
1248
1249/* Detach a subdisk from a plex. */
1250int
1251gv_detach_sd(struct gv_sd *s, int flags)
1252{
1253	struct gv_plex *p;
1254
1255	g_topology_assert();
1256	p = s->plex_sc;
1257
1258	if (p == NULL) {
1259		G_VINUM_DEBUG(1, "unable to detach %s: already detached",
1260		    s->name);
1261		return (0); /* Not an error. */
1262	}
1263
1264	/*
1265	 * Don't proceed if we're not forcing, and the plex is up, or degraded
1266	 * with this subdisk up.
1267	 */
1268	if (!(flags & GV_FLAG_F) && ((p->state > GV_PLEX_DEGRADED) ||
1269	    ((p->state == GV_PLEX_DEGRADED) && (s->state == GV_SD_UP)))) {
1270	    	G_VINUM_DEBUG(1, "unable to detach %s: plex %s is busy",
1271		    s->name, s->plex);
1272		return (GV_ERR_ISBUSY);
1273	}
1274
1275	LIST_REMOVE(s, in_plex);
1276	s->plex_sc = NULL;
1277	memset(s->plex, 0, GV_MAXPLEXNAME);
1278	p->sddetached++;
1279	gv_save_config(s->vinumconf);
1280	return (0);
1281}
1282