geom_subr.c revision 223089
1/*-
2 * Copyright (c) 2002 Poul-Henning Kamp
3 * Copyright (c) 2002 Networks Associates Technology, Inc.
4 * All rights reserved.
5 *
6 * This software was developed for the FreeBSD Project by Poul-Henning Kamp
7 * and NAI Labs, the Security Research Division of Network Associates, Inc.
8 * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
9 * DARPA CHATS research program.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. The names of the authors may not be used to endorse or promote
20 *    products derived from this software without specific prior written
21 *    permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
36#include <sys/cdefs.h>
37__FBSDID("$FreeBSD: head/sys/geom/geom_subr.c 223089 2011-06-14 17:10:32Z gibbs $");
38
39#include "opt_ddb.h"
40
41#include <sys/param.h>
42#include <sys/systm.h>
43#include <sys/devicestat.h>
44#include <sys/kernel.h>
45#include <sys/malloc.h>
46#include <sys/bio.h>
47#include <sys/sysctl.h>
48#include <sys/proc.h>
49#include <sys/kthread.h>
50#include <sys/lock.h>
51#include <sys/mutex.h>
52#include <sys/errno.h>
53#include <sys/sbuf.h>
54#include <geom/geom.h>
55#include <geom/geom_int.h>
56#include <machine/stdarg.h>
57
58#ifdef DDB
59#include <ddb/ddb.h>
60#endif
61
62#ifdef KDB
63#include <sys/kdb.h>
64#endif
65
66struct class_list_head g_classes = LIST_HEAD_INITIALIZER(g_classes);
67static struct g_tailq_head geoms = TAILQ_HEAD_INITIALIZER(geoms);
68char *g_wait_event, *g_wait_up, *g_wait_down, *g_wait_sim;
69
70struct g_hh00 {
71	struct g_class	*mp;
72	int		error;
73	int		post;
74};
75
76/*
77 * This event offers a new class a chance to taste all preexisting providers.
78 */
79static void
80g_load_class(void *arg, int flag)
81{
82	struct g_hh00 *hh;
83	struct g_class *mp2, *mp;
84	struct g_geom *gp;
85	struct g_provider *pp;
86
87	g_topology_assert();
88	if (flag == EV_CANCEL)	/* XXX: can't happen ? */
89		return;
90	if (g_shutdown)
91		return;
92
93	hh = arg;
94	mp = hh->mp;
95	hh->error = 0;
96	if (hh->post) {
97		g_free(hh);
98		hh = NULL;
99	}
100	g_trace(G_T_TOPOLOGY, "g_load_class(%s)", mp->name);
101	KASSERT(mp->name != NULL && *mp->name != '\0',
102	    ("GEOM class has no name"));
103	LIST_FOREACH(mp2, &g_classes, class) {
104		if (mp2 == mp) {
105			printf("The GEOM class %s is already loaded.\n",
106			    mp2->name);
107			if (hh != NULL)
108				hh->error = EEXIST;
109			return;
110		} else if (strcmp(mp2->name, mp->name) == 0) {
111			printf("A GEOM class %s is already loaded.\n",
112			    mp2->name);
113			if (hh != NULL)
114				hh->error = EEXIST;
115			return;
116		}
117	}
118
119	LIST_INIT(&mp->geom);
120	LIST_INSERT_HEAD(&g_classes, mp, class);
121	if (mp->init != NULL)
122		mp->init(mp);
123	if (mp->taste == NULL)
124		return;
125	LIST_FOREACH(mp2, &g_classes, class) {
126		if (mp == mp2)
127			continue;
128		LIST_FOREACH(gp, &mp2->geom, geom) {
129			LIST_FOREACH(pp, &gp->provider, provider) {
130				mp->taste(mp, pp, 0);
131				g_topology_assert();
132			}
133		}
134	}
135}
136
137static int
138g_unload_class(struct g_class *mp)
139{
140	struct g_geom *gp;
141	struct g_provider *pp;
142	struct g_consumer *cp;
143	int error;
144
145	g_topology_lock();
146	g_trace(G_T_TOPOLOGY, "g_unload_class(%s)", mp->name);
147retry:
148	G_VALID_CLASS(mp);
149	LIST_FOREACH(gp, &mp->geom, geom) {
150		/* We refuse to unload if anything is open */
151		LIST_FOREACH(pp, &gp->provider, provider)
152			if (pp->acr || pp->acw || pp->ace) {
153				g_topology_unlock();
154				return (EBUSY);
155			}
156		LIST_FOREACH(cp, &gp->consumer, consumer)
157			if (cp->acr || cp->acw || cp->ace) {
158				g_topology_unlock();
159				return (EBUSY);
160			}
161		/* If the geom is withering, wait for it to finish. */
162		if (gp->flags & G_GEOM_WITHER) {
163			g_topology_sleep(mp, 1);
164			goto retry;
165		}
166	}
167
168	/*
169	 * We allow unloading if we have no geoms, or a class
170	 * method we can use to get rid of them.
171	 */
172	if (!LIST_EMPTY(&mp->geom) && mp->destroy_geom == NULL) {
173		g_topology_unlock();
174		return (EOPNOTSUPP);
175	}
176
177	/* Bar new entries */
178	mp->taste = NULL;
179	mp->config = NULL;
180
181	LIST_FOREACH(gp, &mp->geom, geom) {
182		error = mp->destroy_geom(NULL, mp, gp);
183		if (error != 0) {
184			g_topology_unlock();
185			return (error);
186		}
187	}
188	/* Wait for withering to finish. */
189	for (;;) {
190		gp = LIST_FIRST(&mp->geom);
191		if (gp == NULL)
192			break;
193		KASSERT(gp->flags & G_GEOM_WITHER,
194		   ("Non-withering geom in class %s", mp->name));
195		g_topology_sleep(mp, 1);
196	}
197	G_VALID_CLASS(mp);
198	if (mp->fini != NULL)
199		mp->fini(mp);
200	LIST_REMOVE(mp, class);
201	g_topology_unlock();
202
203	return (0);
204}
205
206int
207g_modevent(module_t mod, int type, void *data)
208{
209	struct g_hh00 *hh;
210	int error;
211	static int g_ignition;
212	struct g_class *mp;
213
214	mp = data;
215	if (mp->version != G_VERSION) {
216		printf("GEOM class %s has Wrong version %x\n",
217		    mp->name, mp->version);
218		return (EINVAL);
219	}
220	if (!g_ignition) {
221		g_ignition++;
222		g_init();
223	}
224	error = EOPNOTSUPP;
225	switch (type) {
226	case MOD_LOAD:
227		g_trace(G_T_TOPOLOGY, "g_modevent(%s, LOAD)", mp->name);
228		hh = g_malloc(sizeof *hh, M_WAITOK | M_ZERO);
229		hh->mp = mp;
230		/*
231		 * Once the system is not cold, MOD_LOAD calls will be
232		 * from the userland and the g_event thread will be able
233		 * to acknowledge their completion.
234		 */
235		if (cold) {
236			hh->post = 1;
237			error = g_post_event(g_load_class, hh, M_WAITOK, NULL);
238		} else {
239			error = g_waitfor_event(g_load_class, hh, M_WAITOK,
240			    NULL);
241			if (error == 0)
242				error = hh->error;
243			g_free(hh);
244		}
245		break;
246	case MOD_UNLOAD:
247		g_trace(G_T_TOPOLOGY, "g_modevent(%s, UNLOAD)", mp->name);
248		DROP_GIANT();
249		error = g_unload_class(mp);
250		PICKUP_GIANT();
251		if (error == 0) {
252			KASSERT(LIST_EMPTY(&mp->geom),
253			    ("Unloaded class (%s) still has geom", mp->name));
254		}
255		break;
256	}
257	return (error);
258}
259
260static void
261g_retaste_event(void *arg, int flag)
262{
263	struct g_class *cp, *mp;
264	struct g_geom *gp, *gp2;
265	struct g_hh00 *hh;
266	struct g_provider *pp;
267
268	g_topology_assert();
269	if (flag == EV_CANCEL)  /* XXX: can't happen ? */
270		return;
271	if (g_shutdown)
272		return;
273
274	hh = arg;
275	mp = hh->mp;
276	hh->error = 0;
277	if (hh->post) {
278		g_free(hh);
279		hh = NULL;
280	}
281	g_trace(G_T_TOPOLOGY, "g_retaste(%s)", mp->name);
282
283	LIST_FOREACH(cp, &g_classes, class) {
284		LIST_FOREACH(gp, &cp->geom, geom) {
285			LIST_FOREACH(pp, &gp->provider, provider) {
286				if (pp->acr || pp->acw || pp->ace)
287					continue;
288				LIST_FOREACH(gp2, &mp->geom, geom) {
289					if (!strcmp(pp->name, gp2->name))
290						break;
291				}
292				if (gp2 != NULL)
293					g_wither_geom(gp2, ENXIO);
294				mp->taste(mp, pp, 0);
295				g_topology_assert();
296			}
297		}
298	}
299}
300
301int
302g_retaste(struct g_class *mp)
303{
304	struct g_hh00 *hh;
305	int error;
306
307	if (mp->taste == NULL)
308		return (EINVAL);
309
310	hh = g_malloc(sizeof *hh, M_WAITOK | M_ZERO);
311	hh->mp = mp;
312
313	if (cold) {
314		hh->post = 1;
315		error = g_post_event(g_retaste_event, hh, M_WAITOK, NULL);
316	} else {
317		error = g_waitfor_event(g_retaste_event, hh, M_WAITOK, NULL);
318		if (error == 0)
319			error = hh->error;
320		g_free(hh);
321	}
322
323	return (error);
324}
325
326struct g_geom *
327g_new_geomf(struct g_class *mp, const char *fmt, ...)
328{
329	struct g_geom *gp;
330	va_list ap;
331	struct sbuf *sb;
332
333	g_topology_assert();
334	G_VALID_CLASS(mp);
335	sb = sbuf_new_auto();
336	va_start(ap, fmt);
337	sbuf_vprintf(sb, fmt, ap);
338	va_end(ap);
339	sbuf_finish(sb);
340	gp = g_malloc(sizeof *gp, M_WAITOK | M_ZERO);
341	gp->name = g_malloc(sbuf_len(sb) + 1, M_WAITOK | M_ZERO);
342	gp->class = mp;
343	gp->rank = 1;
344	LIST_INIT(&gp->consumer);
345	LIST_INIT(&gp->provider);
346	LIST_INSERT_HEAD(&mp->geom, gp, geom);
347	TAILQ_INSERT_HEAD(&geoms, gp, geoms);
348	strcpy(gp->name, sbuf_data(sb));
349	sbuf_delete(sb);
350	/* Fill in defaults from class */
351	gp->start = mp->start;
352	gp->spoiled = mp->spoiled;
353	gp->attrchanged = mp->attrchanged;
354	gp->dumpconf = mp->dumpconf;
355	gp->access = mp->access;
356	gp->orphan = mp->orphan;
357	gp->ioctl = mp->ioctl;
358	return (gp);
359}
360
361void
362g_destroy_geom(struct g_geom *gp)
363{
364
365	g_topology_assert();
366	G_VALID_GEOM(gp);
367	g_trace(G_T_TOPOLOGY, "g_destroy_geom(%p(%s))", gp, gp->name);
368	KASSERT(LIST_EMPTY(&gp->consumer),
369	    ("g_destroy_geom(%s) with consumer(s) [%p]",
370	    gp->name, LIST_FIRST(&gp->consumer)));
371	KASSERT(LIST_EMPTY(&gp->provider),
372	    ("g_destroy_geom(%s) with provider(s) [%p]",
373	    gp->name, LIST_FIRST(&gp->provider)));
374	g_cancel_event(gp);
375	LIST_REMOVE(gp, geom);
376	TAILQ_REMOVE(&geoms, gp, geoms);
377	g_free(gp->name);
378	g_free(gp);
379}
380
381/*
382 * This function is called (repeatedly) until the geom has withered away.
383 */
384void
385g_wither_geom(struct g_geom *gp, int error)
386{
387	struct g_provider *pp;
388
389	g_topology_assert();
390	G_VALID_GEOM(gp);
391	g_trace(G_T_TOPOLOGY, "g_wither_geom(%p(%s))", gp, gp->name);
392	if (!(gp->flags & G_GEOM_WITHER)) {
393		gp->flags |= G_GEOM_WITHER;
394		LIST_FOREACH(pp, &gp->provider, provider)
395			if (!(pp->flags & G_PF_ORPHAN))
396				g_orphan_provider(pp, error);
397	}
398	g_do_wither();
399}
400
401/*
402 * Convenience function to destroy a particular provider.
403 */
404void
405g_wither_provider(struct g_provider *pp, int error)
406{
407
408	pp->flags |= G_PF_WITHER;
409	if (!(pp->flags & G_PF_ORPHAN))
410		g_orphan_provider(pp, error);
411}
412
413/*
414 * This function is called (repeatedly) until the has withered away.
415 */
416void
417g_wither_geom_close(struct g_geom *gp, int error)
418{
419	struct g_consumer *cp;
420
421	g_topology_assert();
422	G_VALID_GEOM(gp);
423	g_trace(G_T_TOPOLOGY, "g_wither_geom_close(%p(%s))", gp, gp->name);
424	LIST_FOREACH(cp, &gp->consumer, consumer)
425		if (cp->acr || cp->acw || cp->ace)
426			g_access(cp, -cp->acr, -cp->acw, -cp->ace);
427	g_wither_geom(gp, error);
428}
429
430/*
431 * This function is called (repeatedly) until we cant wash away more
432 * withered bits at present.  Return value contains two bits.  Bit 0
433 * set means "withering stuff we can't wash now", bit 1 means "call
434 * me again, there may be stuff I didn't get the first time around.
435 */
436int
437g_wither_washer()
438{
439	struct g_class *mp;
440	struct g_geom *gp, *gp2;
441	struct g_provider *pp, *pp2;
442	struct g_consumer *cp, *cp2;
443	int result;
444
445	result = 0;
446	g_topology_assert();
447	LIST_FOREACH(mp, &g_classes, class) {
448		LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
449			LIST_FOREACH_SAFE(pp, &gp->provider, provider, pp2) {
450				if (!(pp->flags & G_PF_WITHER))
451					continue;
452				if (LIST_EMPTY(&pp->consumers))
453					g_destroy_provider(pp);
454				else
455					result |= 1;
456			}
457			if (!(gp->flags & G_GEOM_WITHER))
458				continue;
459			LIST_FOREACH_SAFE(pp, &gp->provider, provider, pp2) {
460				if (LIST_EMPTY(&pp->consumers))
461					g_destroy_provider(pp);
462				else
463					result |= 1;
464			}
465			LIST_FOREACH_SAFE(cp, &gp->consumer, consumer, cp2) {
466				if (cp->acr || cp->acw || cp->ace) {
467					result |= 1;
468					continue;
469				}
470				if (cp->provider != NULL)
471					g_detach(cp);
472				g_destroy_consumer(cp);
473				result |= 2;
474			}
475			if (LIST_EMPTY(&gp->provider) &&
476			    LIST_EMPTY(&gp->consumer))
477				g_destroy_geom(gp);
478			else
479				result |= 1;
480		}
481	}
482	return (result);
483}
484
485struct g_consumer *
486g_new_consumer(struct g_geom *gp)
487{
488	struct g_consumer *cp;
489
490	g_topology_assert();
491	G_VALID_GEOM(gp);
492	KASSERT(!(gp->flags & G_GEOM_WITHER),
493	    ("g_new_consumer on WITHERing geom(%s) (class %s)",
494	    gp->name, gp->class->name));
495	KASSERT(gp->orphan != NULL,
496	    ("g_new_consumer on geom(%s) (class %s) without orphan",
497	    gp->name, gp->class->name));
498
499	cp = g_malloc(sizeof *cp, M_WAITOK | M_ZERO);
500	cp->geom = gp;
501	cp->stat = devstat_new_entry(cp, -1, 0, DEVSTAT_ALL_SUPPORTED,
502	    DEVSTAT_TYPE_DIRECT, DEVSTAT_PRIORITY_MAX);
503	LIST_INSERT_HEAD(&gp->consumer, cp, consumer);
504	return(cp);
505}
506
507void
508g_destroy_consumer(struct g_consumer *cp)
509{
510	struct g_geom *gp;
511
512	g_topology_assert();
513	G_VALID_CONSUMER(cp);
514	g_trace(G_T_TOPOLOGY, "g_destroy_consumer(%p)", cp);
515	KASSERT (cp->provider == NULL, ("g_destroy_consumer but attached"));
516	KASSERT (cp->acr == 0, ("g_destroy_consumer with acr"));
517	KASSERT (cp->acw == 0, ("g_destroy_consumer with acw"));
518	KASSERT (cp->ace == 0, ("g_destroy_consumer with ace"));
519	g_cancel_event(cp);
520	gp = cp->geom;
521	LIST_REMOVE(cp, consumer);
522	devstat_remove_entry(cp->stat);
523	g_free(cp);
524	if (gp->flags & G_GEOM_WITHER)
525		g_do_wither();
526}
527
528static void
529g_new_provider_event(void *arg, int flag)
530{
531	struct g_class *mp;
532	struct g_provider *pp;
533	struct g_consumer *cp;
534
535	g_topology_assert();
536	if (flag == EV_CANCEL)
537		return;
538	if (g_shutdown)
539		return;
540	pp = arg;
541	G_VALID_PROVIDER(pp);
542	KASSERT(!(pp->flags & G_PF_WITHER),
543	    ("g_new_provider_event but withered"));
544	LIST_FOREACH(mp, &g_classes, class) {
545		if (mp->taste == NULL)
546			continue;
547		LIST_FOREACH(cp, &pp->consumers, consumers)
548			if (cp->geom->class == mp)
549				break;
550		if (cp != NULL)
551			continue;
552		mp->taste(mp, pp, 0);
553		g_topology_assert();
554	}
555}
556
557
558struct g_provider *
559g_new_providerf(struct g_geom *gp, const char *fmt, ...)
560{
561	struct g_provider *pp;
562	struct sbuf *sb;
563	va_list ap;
564
565	g_topology_assert();
566	G_VALID_GEOM(gp);
567	KASSERT(gp->access != NULL,
568	    ("new provider on geom(%s) without ->access (class %s)",
569	    gp->name, gp->class->name));
570	KASSERT(gp->start != NULL,
571	    ("new provider on geom(%s) without ->start (class %s)",
572	    gp->name, gp->class->name));
573	KASSERT(!(gp->flags & G_GEOM_WITHER),
574	    ("new provider on WITHERing geom(%s) (class %s)",
575	    gp->name, gp->class->name));
576	sb = sbuf_new_auto();
577	va_start(ap, fmt);
578	sbuf_vprintf(sb, fmt, ap);
579	va_end(ap);
580	sbuf_finish(sb);
581	pp = g_malloc(sizeof *pp + sbuf_len(sb) + 1, M_WAITOK | M_ZERO);
582	pp->name = (char *)(pp + 1);
583	strcpy(pp->name, sbuf_data(sb));
584	sbuf_delete(sb);
585	LIST_INIT(&pp->consumers);
586	pp->error = ENXIO;
587	pp->geom = gp;
588	pp->stat = devstat_new_entry(pp, -1, 0, DEVSTAT_ALL_SUPPORTED,
589	    DEVSTAT_TYPE_DIRECT, DEVSTAT_PRIORITY_MAX);
590	LIST_INSERT_HEAD(&gp->provider, pp, provider);
591	g_post_event(g_new_provider_event, pp, M_WAITOK, pp, gp, NULL);
592	return (pp);
593}
594
595void
596g_error_provider(struct g_provider *pp, int error)
597{
598
599	/* G_VALID_PROVIDER(pp);  We may not have g_topology */
600	pp->error = error;
601}
602
603struct g_provider *
604g_provider_by_name(char const *arg)
605{
606	struct g_class *cp;
607	struct g_geom *gp;
608	struct g_provider *pp;
609
610	LIST_FOREACH(cp, &g_classes, class) {
611		LIST_FOREACH(gp, &cp->geom, geom) {
612			LIST_FOREACH(pp, &gp->provider, provider) {
613				if (!strcmp(arg, pp->name))
614					return (pp);
615			}
616		}
617	}
618	return (NULL);
619}
620
621void
622g_destroy_provider(struct g_provider *pp)
623{
624	struct g_geom *gp;
625
626	g_topology_assert();
627	G_VALID_PROVIDER(pp);
628	KASSERT(LIST_EMPTY(&pp->consumers),
629	    ("g_destroy_provider but attached"));
630	KASSERT (pp->acr == 0, ("g_destroy_provider with acr"));
631	KASSERT (pp->acw == 0, ("g_destroy_provider with acw"));
632	KASSERT (pp->ace == 0, ("g_destroy_provider with ace"));
633	g_cancel_event(pp);
634	LIST_REMOVE(pp, provider);
635	gp = pp->geom;
636	devstat_remove_entry(pp->stat);
637	g_free(pp);
638	if ((gp->flags & G_GEOM_WITHER))
639		g_do_wither();
640}
641
642/*
643 * We keep the "geoms" list sorted by topological order (== increasing
644 * numerical rank) at all times.
645 * When an attach is done, the attaching geoms rank is invalidated
646 * and it is moved to the tail of the list.
647 * All geoms later in the sequence has their ranks reevaluated in
648 * sequence.  If we cannot assign rank to a geom because it's
649 * prerequisites do not have rank, we move that element to the tail
650 * of the sequence with invalid rank as well.
651 * At some point we encounter our original geom and if we stil fail
652 * to assign it a rank, there must be a loop and we fail back to
653 * g_attach() which detach again and calls redo_rank again
654 * to fix up the damage.
655 * It would be much simpler code wise to do it recursively, but we
656 * can't risk that on the kernel stack.
657 */
658
659static int
660redo_rank(struct g_geom *gp)
661{
662	struct g_consumer *cp;
663	struct g_geom *gp1, *gp2;
664	int n, m;
665
666	g_topology_assert();
667	G_VALID_GEOM(gp);
668
669	/* Invalidate this geoms rank and move it to the tail */
670	gp1 = TAILQ_NEXT(gp, geoms);
671	if (gp1 != NULL) {
672		gp->rank = 0;
673		TAILQ_REMOVE(&geoms, gp, geoms);
674		TAILQ_INSERT_TAIL(&geoms, gp, geoms);
675	} else {
676		gp1 = gp;
677	}
678
679	/* re-rank the rest of the sequence */
680	for (; gp1 != NULL; gp1 = gp2) {
681		gp1->rank = 0;
682		m = 1;
683		LIST_FOREACH(cp, &gp1->consumer, consumer) {
684			if (cp->provider == NULL)
685				continue;
686			n = cp->provider->geom->rank;
687			if (n == 0) {
688				m = 0;
689				break;
690			} else if (n >= m)
691				m = n + 1;
692		}
693		gp1->rank = m;
694		gp2 = TAILQ_NEXT(gp1, geoms);
695
696		/* got a rank, moving on */
697		if (m != 0)
698			continue;
699
700		/* no rank to original geom means loop */
701		if (gp == gp1)
702			return (ELOOP);
703
704		/* no rank, put it at the end move on */
705		TAILQ_REMOVE(&geoms, gp1, geoms);
706		TAILQ_INSERT_TAIL(&geoms, gp1, geoms);
707	}
708	return (0);
709}
710
711int
712g_attach(struct g_consumer *cp, struct g_provider *pp)
713{
714	int error;
715
716	g_topology_assert();
717	G_VALID_CONSUMER(cp);
718	G_VALID_PROVIDER(pp);
719	g_trace(G_T_TOPOLOGY, "g_attach(%p, %p)", cp, pp);
720	KASSERT(cp->provider == NULL, ("attach but attached"));
721	cp->provider = pp;
722	LIST_INSERT_HEAD(&pp->consumers, cp, consumers);
723	error = redo_rank(cp->geom);
724	if (error) {
725		LIST_REMOVE(cp, consumers);
726		cp->provider = NULL;
727		redo_rank(cp->geom);
728	}
729	return (error);
730}
731
732void
733g_detach(struct g_consumer *cp)
734{
735	struct g_provider *pp;
736
737	g_topology_assert();
738	G_VALID_CONSUMER(cp);
739	g_trace(G_T_TOPOLOGY, "g_detach(%p)", cp);
740	KASSERT(cp->provider != NULL, ("detach but not attached"));
741	KASSERT(cp->acr == 0, ("detach but nonzero acr"));
742	KASSERT(cp->acw == 0, ("detach but nonzero acw"));
743	KASSERT(cp->ace == 0, ("detach but nonzero ace"));
744	KASSERT(cp->nstart == cp->nend,
745	    ("detach with active requests"));
746	pp = cp->provider;
747	LIST_REMOVE(cp, consumers);
748	cp->provider = NULL;
749	if (pp->geom->flags & G_GEOM_WITHER)
750		g_do_wither();
751	else if (pp->flags & G_PF_WITHER)
752		g_do_wither();
753	redo_rank(cp->geom);
754}
755
756/*
757 * g_access()
758 *
759 * Access-check with delta values.  The question asked is "can provider
760 * "cp" change the access counters by the relative amounts dc[rwe] ?"
761 */
762
763int
764g_access(struct g_consumer *cp, int dcr, int dcw, int dce)
765{
766	struct g_provider *pp;
767	int pr,pw,pe;
768	int error;
769
770	g_topology_assert();
771	G_VALID_CONSUMER(cp);
772	pp = cp->provider;
773	KASSERT(pp != NULL, ("access but not attached"));
774	G_VALID_PROVIDER(pp);
775
776	g_trace(G_T_ACCESS, "g_access(%p(%s), %d, %d, %d)",
777	    cp, pp->name, dcr, dcw, dce);
778
779	KASSERT(cp->acr + dcr >= 0, ("access resulting in negative acr"));
780	KASSERT(cp->acw + dcw >= 0, ("access resulting in negative acw"));
781	KASSERT(cp->ace + dce >= 0, ("access resulting in negative ace"));
782	KASSERT(dcr != 0 || dcw != 0 || dce != 0, ("NOP access request"));
783	KASSERT(pp->geom->access != NULL, ("NULL geom->access"));
784
785	/*
786	 * If our class cares about being spoiled, and we have been, we
787	 * are probably just ahead of the event telling us that.  Fail
788	 * now rather than having to unravel this later.
789	 */
790	if (cp->geom->spoiled != NULL && cp->spoiled &&
791	    (dcr > 0 || dcw > 0 || dce > 0))
792		return (ENXIO);
793
794	/*
795	 * Figure out what counts the provider would have had, if this
796	 * consumer had (r0w0e0) at this time.
797	 */
798	pr = pp->acr - cp->acr;
799	pw = pp->acw - cp->acw;
800	pe = pp->ace - cp->ace;
801
802	g_trace(G_T_ACCESS,
803    "open delta:[r%dw%de%d] old:[r%dw%de%d] provider:[r%dw%de%d] %p(%s)",
804	    dcr, dcw, dce,
805	    cp->acr, cp->acw, cp->ace,
806	    pp->acr, pp->acw, pp->ace,
807	    pp, pp->name);
808
809	/* If foot-shooting is enabled, any open on rank#1 is OK */
810	if ((g_debugflags & 16) && pp->geom->rank == 1)
811		;
812	/* If we try exclusive but already write: fail */
813	else if (dce > 0 && pw > 0)
814		return (EPERM);
815	/* If we try write but already exclusive: fail */
816	else if (dcw > 0 && pe > 0)
817		return (EPERM);
818	/* If we try to open more but provider is error'ed: fail */
819	else if ((dcr > 0 || dcw > 0 || dce > 0) && pp->error != 0)
820		return (pp->error);
821
822	/* Ok then... */
823
824	error = pp->geom->access(pp, dcr, dcw, dce);
825	KASSERT(dcr > 0 || dcw > 0 || dce > 0 || error == 0,
826	    ("Geom provider %s::%s failed closing ->access()",
827	    pp->geom->class->name, pp->name));
828	if (!error) {
829		/*
830		 * If we open first write, spoil any partner consumers.
831		 * If we close last write and provider is not errored,
832		 * trigger re-taste.
833		 */
834		if (pp->acw == 0 && dcw != 0)
835			g_spoil(pp, cp);
836		else if (pp->acw != 0 && pp->acw == -dcw && pp->error == 0 &&
837		    !(pp->geom->flags & G_GEOM_WITHER))
838			g_post_event(g_new_provider_event, pp, M_WAITOK,
839			    pp, NULL);
840
841		pp->acr += dcr;
842		pp->acw += dcw;
843		pp->ace += dce;
844		cp->acr += dcr;
845		cp->acw += dcw;
846		cp->ace += dce;
847		if (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)
848			KASSERT(pp->sectorsize > 0,
849			    ("Provider %s lacks sectorsize", pp->name));
850	}
851	return (error);
852}
853
854int
855g_handleattr_int(struct bio *bp, const char *attribute, int val)
856{
857
858	return (g_handleattr(bp, attribute, &val, sizeof val));
859}
860
861int
862g_handleattr_off_t(struct bio *bp, const char *attribute, off_t val)
863{
864
865	return (g_handleattr(bp, attribute, &val, sizeof val));
866}
867
868int
869g_handleattr_str(struct bio *bp, const char *attribute, const char *str)
870{
871
872	return (g_handleattr(bp, attribute, str, 0));
873}
874
875int
876g_handleattr(struct bio *bp, const char *attribute, const void *val, int len)
877{
878	int error = 0;
879
880	if (strcmp(bp->bio_attribute, attribute))
881		return (0);
882	if (len == 0) {
883		bzero(bp->bio_data, bp->bio_length);
884		if (strlcpy(bp->bio_data, val, bp->bio_length) >=
885		    bp->bio_length) {
886			printf("%s: %s bio_length %jd len %zu -> EFAULT\n",
887			    __func__, bp->bio_to->name,
888			    (intmax_t)bp->bio_length, strlen(val));
889			error = EFAULT;
890		}
891	} else if (bp->bio_length == len) {
892		bcopy(val, bp->bio_data, len);
893	} else {
894		printf("%s: %s bio_length %jd len %d -> EFAULT\n", __func__,
895		    bp->bio_to->name, (intmax_t)bp->bio_length, len);
896		error = EFAULT;
897	}
898	if (error == 0)
899		bp->bio_completed = bp->bio_length;
900	g_io_deliver(bp, error);
901	return (1);
902}
903
904int
905g_std_access(struct g_provider *pp,
906	int dr __unused, int dw __unused, int de __unused)
907{
908
909	g_topology_assert();
910	G_VALID_PROVIDER(pp);
911        return (0);
912}
913
914void
915g_std_done(struct bio *bp)
916{
917	struct bio *bp2;
918
919	bp2 = bp->bio_parent;
920	if (bp2->bio_error == 0)
921		bp2->bio_error = bp->bio_error;
922	bp2->bio_completed += bp->bio_completed;
923	g_destroy_bio(bp);
924	bp2->bio_inbed++;
925	if (bp2->bio_children == bp2->bio_inbed)
926		g_io_deliver(bp2, bp2->bio_error);
927}
928
929/* XXX: maybe this is only g_slice_spoiled */
930
931void
932g_std_spoiled(struct g_consumer *cp)
933{
934	struct g_geom *gp;
935	struct g_provider *pp;
936
937	g_topology_assert();
938	G_VALID_CONSUMER(cp);
939	g_trace(G_T_TOPOLOGY, "g_std_spoiled(%p)", cp);
940	g_detach(cp);
941	gp = cp->geom;
942	LIST_FOREACH(pp, &gp->provider, provider)
943		g_orphan_provider(pp, ENXIO);
944	g_destroy_consumer(cp);
945	if (LIST_EMPTY(&gp->provider) && LIST_EMPTY(&gp->consumer))
946		g_destroy_geom(gp);
947	else
948		gp->flags |= G_GEOM_WITHER;
949}
950
951/*
952 * Spoiling happens when a provider is opened for writing, but consumers
953 * which are configured by in-band data are attached (slicers for instance).
954 * Since the write might potentially change the in-band data, such consumers
955 * need to re-evaluate their existence after the writing session closes.
956 * We do this by (offering to) tear them down when the open for write happens
957 * in return for a re-taste when it closes again.
958 * Together with the fact that such consumers grab an 'e' bit whenever they
959 * are open, regardless of mode, this ends up DTRT.
960 */
961
962static void
963g_spoil_event(void *arg, int flag)
964{
965	struct g_provider *pp;
966	struct g_consumer *cp, *cp2;
967
968	g_topology_assert();
969	if (flag == EV_CANCEL)
970		return;
971	pp = arg;
972	G_VALID_PROVIDER(pp);
973	for (cp = LIST_FIRST(&pp->consumers); cp != NULL; cp = cp2) {
974		cp2 = LIST_NEXT(cp, consumers);
975		if (!cp->spoiled)
976			continue;
977		cp->spoiled = 0;
978		if (cp->geom->spoiled == NULL)
979			continue;
980		cp->geom->spoiled(cp);
981		g_topology_assert();
982	}
983}
984
985void
986g_spoil(struct g_provider *pp, struct g_consumer *cp)
987{
988	struct g_consumer *cp2;
989
990	g_topology_assert();
991	G_VALID_PROVIDER(pp);
992	G_VALID_CONSUMER(cp);
993
994	LIST_FOREACH(cp2, &pp->consumers, consumers) {
995		if (cp2 == cp)
996			continue;
997/*
998		KASSERT(cp2->acr == 0, ("spoiling cp->acr = %d", cp2->acr));
999		KASSERT(cp2->acw == 0, ("spoiling cp->acw = %d", cp2->acw));
1000*/
1001		KASSERT(cp2->ace == 0, ("spoiling cp->ace = %d", cp2->ace));
1002		cp2->spoiled++;
1003	}
1004	g_post_event(g_spoil_event, pp, M_WAITOK, pp, NULL);
1005}
1006
1007int
1008g_getattr__(const char *attr, struct g_consumer *cp, void *var, int len)
1009{
1010	int error, i;
1011
1012	i = len;
1013	error = g_io_getattr(attr, cp, &i, var);
1014	if (error)
1015		return (error);
1016	if (i != len)
1017		return (EINVAL);
1018	return (0);
1019}
1020
1021static int
1022g_get_device_prefix_len(const char *name)
1023{
1024	int len;
1025
1026	if (strncmp(name, "ada", 3) == 0)
1027		len = 3;
1028	else if (strncmp(name, "ad", 2) == 0)
1029		len = 2;
1030	else
1031		return (0);
1032	if (name[len] < '0' || name[len] > '9')
1033		return (0);
1034	do {
1035		len++;
1036	} while (name[len] >= '0' && name[len] <= '9');
1037	return (len);
1038}
1039
1040int
1041g_compare_names(const char *namea, const char *nameb)
1042{
1043	int deva, devb;
1044
1045	if (strcmp(namea, nameb) == 0)
1046		return (1);
1047	deva = g_get_device_prefix_len(namea);
1048	if (deva == 0)
1049		return (0);
1050	devb = g_get_device_prefix_len(nameb);
1051	if (devb == 0)
1052		return (0);
1053	if (strcmp(namea + deva, nameb + devb) == 0)
1054		return (1);
1055	return (0);
1056}
1057
1058#if defined(DIAGNOSTIC) || defined(DDB)
1059/*
1060 * This function walks the mesh and returns a non-zero integer if it
1061 * finds the argument pointer is an object. The return value indicates
1062 * which type of object it is believed to be. If topology is not locked,
1063 * this function is potentially dangerous, but we don't assert that the
1064 * topology lock is held when called from debugger.
1065 */
1066int
1067g_valid_obj(void const *ptr)
1068{
1069	struct g_class *mp;
1070	struct g_geom *gp;
1071	struct g_consumer *cp;
1072	struct g_provider *pp;
1073
1074#ifdef KDB
1075	if (kdb_active == 0)
1076#endif
1077		g_topology_assert();
1078
1079	LIST_FOREACH(mp, &g_classes, class) {
1080		if (ptr == mp)
1081			return (1);
1082		LIST_FOREACH(gp, &mp->geom, geom) {
1083			if (ptr == gp)
1084				return (2);
1085			LIST_FOREACH(cp, &gp->consumer, consumer)
1086				if (ptr == cp)
1087					return (3);
1088			LIST_FOREACH(pp, &gp->provider, provider)
1089				if (ptr == pp)
1090					return (4);
1091		}
1092	}
1093	return(0);
1094}
1095#endif
1096
1097#ifdef DDB
1098
1099#define	gprintf(...)	do {						\
1100	db_printf("%*s", indent, "");					\
1101	db_printf(__VA_ARGS__);						\
1102} while (0)
1103#define	gprintln(...)	do {						\
1104	gprintf(__VA_ARGS__);						\
1105	db_printf("\n");						\
1106} while (0)
1107
1108#define	ADDFLAG(obj, flag, sflag)	do {				\
1109	if ((obj)->flags & (flag)) {					\
1110		if (comma)						\
1111			strlcat(str, ",", size);			\
1112		strlcat(str, (sflag), size);				\
1113		comma = 1;						\
1114	}								\
1115} while (0)
1116
1117static char *
1118provider_flags_to_string(struct g_provider *pp, char *str, size_t size)
1119{
1120	int comma = 0;
1121
1122	bzero(str, size);
1123	if (pp->flags == 0) {
1124		strlcpy(str, "NONE", size);
1125		return (str);
1126	}
1127	ADDFLAG(pp, G_PF_CANDELETE, "G_PF_CANDELETE");
1128	ADDFLAG(pp, G_PF_WITHER, "G_PF_WITHER");
1129	ADDFLAG(pp, G_PF_ORPHAN, "G_PF_ORPHAN");
1130	return (str);
1131}
1132
1133static char *
1134geom_flags_to_string(struct g_geom *gp, char *str, size_t size)
1135{
1136	int comma = 0;
1137
1138	bzero(str, size);
1139	if (gp->flags == 0) {
1140		strlcpy(str, "NONE", size);
1141		return (str);
1142	}
1143	ADDFLAG(gp, G_GEOM_WITHER, "G_GEOM_WITHER");
1144	return (str);
1145}
1146static void
1147db_show_geom_consumer(int indent, struct g_consumer *cp)
1148{
1149
1150	if (indent == 0) {
1151		gprintln("consumer: %p", cp);
1152		gprintln("  class:    %s (%p)", cp->geom->class->name,
1153		    cp->geom->class);
1154		gprintln("  geom:     %s (%p)", cp->geom->name, cp->geom);
1155		if (cp->provider == NULL)
1156			gprintln("  provider: none");
1157		else {
1158			gprintln("  provider: %s (%p)", cp->provider->name,
1159			    cp->provider);
1160		}
1161		gprintln("  access:   r%dw%de%d", cp->acr, cp->acw, cp->ace);
1162		gprintln("  spoiled:  %d", cp->spoiled);
1163		gprintln("  nstart:   %u", cp->nstart);
1164		gprintln("  nend:     %u", cp->nend);
1165	} else {
1166		gprintf("consumer: %p (%s), access=r%dw%de%d", cp,
1167		    cp->provider != NULL ? cp->provider->name : "none",
1168		    cp->acr, cp->acw, cp->ace);
1169		if (cp->spoiled)
1170			db_printf(", spoiled=%d", cp->spoiled);
1171		db_printf("\n");
1172	}
1173}
1174
1175static void
1176db_show_geom_provider(int indent, struct g_provider *pp)
1177{
1178	struct g_consumer *cp;
1179	char flags[64];
1180
1181	if (indent == 0) {
1182		gprintln("provider: %s (%p)", pp->name, pp);
1183		gprintln("  class:        %s (%p)", pp->geom->class->name,
1184		    pp->geom->class);
1185		gprintln("  geom:         %s (%p)", pp->geom->name, pp->geom);
1186		gprintln("  mediasize:    %jd", (intmax_t)pp->mediasize);
1187		gprintln("  sectorsize:   %u", pp->sectorsize);
1188		gprintln("  stripesize:   %u", pp->stripesize);
1189		gprintln("  stripeoffset: %u", pp->stripeoffset);
1190		gprintln("  access:       r%dw%de%d", pp->acr, pp->acw,
1191		    pp->ace);
1192		gprintln("  flags:        %s (0x%04x)",
1193		    provider_flags_to_string(pp, flags, sizeof(flags)),
1194		    pp->flags);
1195		gprintln("  error:        %d", pp->error);
1196		gprintln("  nstart:       %u", pp->nstart);
1197		gprintln("  nend:         %u", pp->nend);
1198		if (LIST_EMPTY(&pp->consumers))
1199			gprintln("  consumers:    none");
1200	} else {
1201		gprintf("provider: %s (%p), access=r%dw%de%d",
1202		    pp->name, pp, pp->acr, pp->acw, pp->ace);
1203		if (pp->flags != 0) {
1204			db_printf(", flags=%s (0x%04x)",
1205			    provider_flags_to_string(pp, flags, sizeof(flags)),
1206			    pp->flags);
1207		}
1208		db_printf("\n");
1209	}
1210	if (!LIST_EMPTY(&pp->consumers)) {
1211		LIST_FOREACH(cp, &pp->consumers, consumers) {
1212			db_show_geom_consumer(indent + 2, cp);
1213			if (db_pager_quit)
1214				break;
1215		}
1216	}
1217}
1218
1219static void
1220db_show_geom_geom(int indent, struct g_geom *gp)
1221{
1222	struct g_provider *pp;
1223	struct g_consumer *cp;
1224	char flags[64];
1225
1226	if (indent == 0) {
1227		gprintln("geom: %s (%p)", gp->name, gp);
1228		gprintln("  class:     %s (%p)", gp->class->name, gp->class);
1229		gprintln("  flags:     %s (0x%04x)",
1230		    geom_flags_to_string(gp, flags, sizeof(flags)), gp->flags);
1231		gprintln("  rank:      %d", gp->rank);
1232		if (LIST_EMPTY(&gp->provider))
1233			gprintln("  providers: none");
1234		if (LIST_EMPTY(&gp->consumer))
1235			gprintln("  consumers: none");
1236	} else {
1237		gprintf("geom: %s (%p), rank=%d", gp->name, gp, gp->rank);
1238		if (gp->flags != 0) {
1239			db_printf(", flags=%s (0x%04x)",
1240			    geom_flags_to_string(gp, flags, sizeof(flags)),
1241			    gp->flags);
1242		}
1243		db_printf("\n");
1244	}
1245	if (!LIST_EMPTY(&gp->provider)) {
1246		LIST_FOREACH(pp, &gp->provider, provider) {
1247			db_show_geom_provider(indent + 2, pp);
1248			if (db_pager_quit)
1249				break;
1250		}
1251	}
1252	if (!LIST_EMPTY(&gp->consumer)) {
1253		LIST_FOREACH(cp, &gp->consumer, consumer) {
1254			db_show_geom_consumer(indent + 2, cp);
1255			if (db_pager_quit)
1256				break;
1257		}
1258	}
1259}
1260
1261static void
1262db_show_geom_class(struct g_class *mp)
1263{
1264	struct g_geom *gp;
1265
1266	db_printf("class: %s (%p)\n", mp->name, mp);
1267	LIST_FOREACH(gp, &mp->geom, geom) {
1268		db_show_geom_geom(2, gp);
1269		if (db_pager_quit)
1270			break;
1271	}
1272}
1273
1274/*
1275 * Print the GEOM topology or the given object.
1276 */
1277DB_SHOW_COMMAND(geom, db_show_geom)
1278{
1279	struct g_class *mp;
1280
1281	if (!have_addr) {
1282		/* No address given, print the entire topology. */
1283		LIST_FOREACH(mp, &g_classes, class) {
1284			db_show_geom_class(mp);
1285			db_printf("\n");
1286			if (db_pager_quit)
1287				break;
1288		}
1289	} else {
1290		switch (g_valid_obj((void *)addr)) {
1291		case 1:
1292			db_show_geom_class((struct g_class *)addr);
1293			break;
1294		case 2:
1295			db_show_geom_geom(0, (struct g_geom *)addr);
1296			break;
1297		case 3:
1298			db_show_geom_consumer(0, (struct g_consumer *)addr);
1299			break;
1300		case 4:
1301			db_show_geom_provider(0, (struct g_provider *)addr);
1302			break;
1303		default:
1304			db_printf("Not a GEOM object.\n");
1305			break;
1306		}
1307	}
1308}
1309
1310static void
1311db_print_bio_cmd(struct bio *bp)
1312{
1313	db_printf("  cmd: ");
1314	switch (bp->bio_cmd) {
1315	case BIO_READ: db_printf("BIO_READ"); break;
1316	case BIO_WRITE: db_printf("BIO_WRITE"); break;
1317	case BIO_DELETE: db_printf("BIO_DELETE"); break;
1318	case BIO_GETATTR: db_printf("BIO_GETATTR"); break;
1319	case BIO_FLUSH: db_printf("BIO_FLUSH"); break;
1320	case BIO_CMD0: db_printf("BIO_CMD0"); break;
1321	case BIO_CMD1: db_printf("BIO_CMD1"); break;
1322	case BIO_CMD2: db_printf("BIO_CMD2"); break;
1323	default: db_printf("UNKNOWN"); break;
1324	}
1325	db_printf("\n");
1326}
1327
1328static void
1329db_print_bio_flags(struct bio *bp)
1330{
1331	int comma;
1332
1333	comma = 0;
1334	db_printf("  flags: ");
1335	if (bp->bio_flags & BIO_ERROR) {
1336		db_printf("BIO_ERROR");
1337		comma = 1;
1338	}
1339	if (bp->bio_flags & BIO_DONE) {
1340		db_printf("%sBIO_DONE", (comma ? ", " : ""));
1341		comma = 1;
1342	}
1343	if (bp->bio_flags & BIO_ONQUEUE)
1344		db_printf("%sBIO_ONQUEUE", (comma ? ", " : ""));
1345	db_printf("\n");
1346}
1347
1348/*
1349 * Print useful information in a BIO
1350 */
1351DB_SHOW_COMMAND(bio, db_show_bio)
1352{
1353	struct bio *bp;
1354
1355	if (have_addr) {
1356		bp = (struct bio *)addr;
1357		db_printf("BIO %p\n", bp);
1358		db_print_bio_cmd(bp);
1359		db_print_bio_flags(bp);
1360		db_printf("  cflags: 0x%hhx\n", bp->bio_cflags);
1361		db_printf("  pflags: 0x%hhx\n", bp->bio_pflags);
1362		db_printf("  offset: %jd\n", (intmax_t)bp->bio_offset);
1363		db_printf("  length: %jd\n", (intmax_t)bp->bio_length);
1364		db_printf("  bcount: %ld\n", bp->bio_bcount);
1365		db_printf("  resid: %ld\n", bp->bio_resid);
1366		db_printf("  completed: %jd\n", (intmax_t)bp->bio_completed);
1367		db_printf("  children: %u\n", bp->bio_children);
1368		db_printf("  inbed: %u\n", bp->bio_inbed);
1369		db_printf("  error: %d\n", bp->bio_error);
1370		db_printf("  parent: %p\n", bp->bio_parent);
1371		db_printf("  driver1: %p\n", bp->bio_driver1);
1372		db_printf("  driver2: %p\n", bp->bio_driver2);
1373		db_printf("  caller1: %p\n", bp->bio_caller1);
1374		db_printf("  caller2: %p\n", bp->bio_caller2);
1375		db_printf("  bio_from: %p\n", bp->bio_from);
1376		db_printf("  bio_to: %p\n", bp->bio_to);
1377	}
1378}
1379
1380#undef	gprintf
1381#undef	gprintln
1382#undef	ADDFLAG
1383
1384#endif	/* DDB */
1385