geom_subr.c revision 181463
1/*-
2 * Copyright (c) 2002 Poul-Henning Kamp
3 * Copyright (c) 2002 Networks Associates Technology, Inc.
4 * All rights reserved.
5 *
6 * This software was developed for the FreeBSD Project by Poul-Henning Kamp
7 * and NAI Labs, the Security Research Division of Network Associates, Inc.
8 * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
9 * DARPA CHATS research program.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. The names of the authors may not be used to endorse or promote
20 *    products derived from this software without specific prior written
21 *    permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
36#include <sys/cdefs.h>
37__FBSDID("$FreeBSD: head/sys/geom/geom_subr.c 181463 2008-08-09 11:14:05Z des $");
38
39#include "opt_ddb.h"
40
41#include <sys/param.h>
42#include <sys/systm.h>
43#include <sys/devicestat.h>
44#include <sys/kernel.h>
45#include <sys/malloc.h>
46#include <sys/bio.h>
47#include <sys/sysctl.h>
48#include <sys/proc.h>
49#include <sys/kthread.h>
50#include <sys/lock.h>
51#include <sys/mutex.h>
52#include <sys/errno.h>
53#include <sys/sbuf.h>
54#include <geom/geom.h>
55#include <geom/geom_int.h>
56#include <machine/stdarg.h>
57
58#ifdef DDB
59#include <ddb/ddb.h>
60#endif
61
62struct class_list_head g_classes = LIST_HEAD_INITIALIZER(g_classes);
63static struct g_tailq_head geoms = TAILQ_HEAD_INITIALIZER(geoms);
64char *g_wait_event, *g_wait_up, *g_wait_down, *g_wait_sim;
65
66struct g_hh00 {
67	struct g_class	*mp;
68	int		error;
69	int		post;
70};
71
72/*
73 * This event offers a new class a chance to taste all preexisting providers.
74 */
75static void
76g_load_class(void *arg, int flag)
77{
78	struct g_hh00 *hh;
79	struct g_class *mp2, *mp;
80	struct g_geom *gp;
81	struct g_provider *pp;
82
83	g_topology_assert();
84	if (flag == EV_CANCEL)	/* XXX: can't happen ? */
85		return;
86	if (g_shutdown)
87		return;
88
89	hh = arg;
90	mp = hh->mp;
91	hh->error = 0;
92	if (hh->post) {
93		g_free(hh);
94		hh = NULL;
95	}
96	g_trace(G_T_TOPOLOGY, "g_load_class(%s)", mp->name);
97	KASSERT(mp->name != NULL && *mp->name != '\0',
98	    ("GEOM class has no name"));
99	LIST_FOREACH(mp2, &g_classes, class) {
100		if (mp2 == mp) {
101			printf("The GEOM class %s is already loaded.\n",
102			    mp2->name);
103			if (hh != NULL)
104				hh->error = EEXIST;
105			return;
106		} else if (strcmp(mp2->name, mp->name) == 0) {
107			printf("A GEOM class %s is already loaded.\n",
108			    mp2->name);
109			if (hh != NULL)
110				hh->error = EEXIST;
111			return;
112		}
113	}
114
115	LIST_INIT(&mp->geom);
116	LIST_INSERT_HEAD(&g_classes, mp, class);
117	if (mp->init != NULL)
118		mp->init(mp);
119	if (mp->taste == NULL)
120		return;
121	LIST_FOREACH(mp2, &g_classes, class) {
122		if (mp == mp2)
123			continue;
124		LIST_FOREACH(gp, &mp2->geom, geom) {
125			LIST_FOREACH(pp, &gp->provider, provider) {
126				mp->taste(mp, pp, 0);
127				g_topology_assert();
128			}
129		}
130	}
131}
132
133static void
134g_unload_class(void *arg, int flag)
135{
136	struct g_hh00 *hh;
137	struct g_class *mp;
138	struct g_geom *gp;
139	struct g_provider *pp;
140	struct g_consumer *cp;
141	int error;
142
143	g_topology_assert();
144	hh = arg;
145	mp = hh->mp;
146	G_VALID_CLASS(mp);
147	g_trace(G_T_TOPOLOGY, "g_unload_class(%s)", mp->name);
148
149	/*
150	 * We allow unloading if we have no geoms, or a class
151	 * method we can use to get rid of them.
152	 */
153	if (!LIST_EMPTY(&mp->geom) && mp->destroy_geom == NULL) {
154		hh->error = EOPNOTSUPP;
155		return;
156	}
157
158	/* We refuse to unload if anything is open */
159	LIST_FOREACH(gp, &mp->geom, geom) {
160		LIST_FOREACH(pp, &gp->provider, provider)
161			if (pp->acr || pp->acw || pp->ace) {
162				hh->error = EBUSY;
163				return;
164			}
165		LIST_FOREACH(cp, &gp->consumer, consumer)
166			if (cp->acr || cp->acw || cp->ace) {
167				hh->error = EBUSY;
168				return;
169			}
170	}
171
172	/* Bar new entries */
173	mp->taste = NULL;
174	mp->config = NULL;
175
176	error = 0;
177	for (;;) {
178		gp = LIST_FIRST(&mp->geom);
179		if (gp == NULL)
180			break;
181		error = mp->destroy_geom(NULL, mp, gp);
182		if (error != 0)
183			break;
184	}
185	if (error == 0) {
186		if (mp->fini != NULL)
187			mp->fini(mp);
188		LIST_REMOVE(mp, class);
189	}
190	hh->error = error;
191	return;
192}
193
194int
195g_modevent(module_t mod, int type, void *data)
196{
197	struct g_hh00 *hh;
198	int error;
199	static int g_ignition;
200	struct g_class *mp;
201
202	mp = data;
203	if (mp->version != G_VERSION) {
204		printf("GEOM class %s has Wrong version %x\n",
205		    mp->name, mp->version);
206		return (EINVAL);
207	}
208	if (!g_ignition) {
209		g_ignition++;
210		g_init();
211	}
212	hh = g_malloc(sizeof *hh, M_WAITOK | M_ZERO);
213	hh->mp = data;
214	error = EOPNOTSUPP;
215	switch (type) {
216	case MOD_LOAD:
217		g_trace(G_T_TOPOLOGY, "g_modevent(%s, LOAD)", hh->mp->name);
218		/*
219		 * Once the system is not cold, MOD_LOAD calls will be
220		 * from the userland and the g_event thread will be able
221		 * to acknowledge their completion.
222		 */
223		if (cold) {
224			hh->post = 1;
225			error = g_post_event(g_load_class, hh, M_WAITOK, NULL);
226		} else {
227			error = g_waitfor_event(g_load_class, hh, M_WAITOK,
228			    NULL);
229			if (error == 0)
230				error = hh->error;
231			g_free(hh);
232		}
233		break;
234	case MOD_UNLOAD:
235		g_trace(G_T_TOPOLOGY, "g_modevent(%s, UNLOAD)", hh->mp->name);
236		error = g_waitfor_event(g_unload_class, hh, M_WAITOK, NULL);
237		if (error == 0)
238			error = hh->error;
239		if (error == 0) {
240			KASSERT(LIST_EMPTY(&hh->mp->geom),
241			    ("Unloaded class (%s) still has geom", hh->mp->name));
242		}
243		g_free(hh);
244		break;
245	default:
246		g_free(hh);
247		break;
248	}
249	return (error);
250}
251
252static void
253g_retaste_event(void *arg, int flag)
254{
255	struct g_class *cp, *mp;
256	struct g_geom *gp, *gp2;
257	struct g_hh00 *hh;
258	struct g_provider *pp;
259
260	g_topology_assert();
261	if (flag == EV_CANCEL)  /* XXX: can't happen ? */
262		return;
263	if (g_shutdown)
264		return;
265
266	hh = arg;
267	mp = hh->mp;
268	hh->error = 0;
269	if (hh->post) {
270		g_free(hh);
271		hh = NULL;
272	}
273	g_trace(G_T_TOPOLOGY, "g_retaste(%s)", mp->name);
274
275	LIST_FOREACH(cp, &g_classes, class) {
276		LIST_FOREACH(gp, &cp->geom, geom) {
277			LIST_FOREACH(pp, &gp->provider, provider) {
278				if (pp->acr || pp->acw || pp->ace)
279					continue;
280				LIST_FOREACH(gp2, &mp->geom, geom) {
281					if (!strcmp(pp->name, gp2->name))
282						break;
283				}
284				if (gp2 != NULL)
285					g_wither_geom(gp2, ENXIO);
286				mp->taste(mp, pp, 0);
287				g_topology_assert();
288			}
289		}
290	}
291}
292
293int
294g_retaste(struct g_class *mp)
295{
296	struct g_hh00 *hh;
297	int error;
298
299	if (mp->taste == NULL)
300		return (EINVAL);
301
302	hh = g_malloc(sizeof *hh, M_WAITOK | M_ZERO);
303	hh->mp = mp;
304
305	if (cold) {
306		hh->post = 1;
307		error = g_post_event(g_retaste_event, hh, M_WAITOK, NULL);
308	} else {
309		error = g_waitfor_event(g_retaste_event, hh, M_WAITOK, NULL);
310		if (error == 0)
311			error = hh->error;
312		g_free(hh);
313	}
314
315	return (error);
316}
317
318struct g_geom *
319g_new_geomf(struct g_class *mp, const char *fmt, ...)
320{
321	struct g_geom *gp;
322	va_list ap;
323	struct sbuf *sb;
324
325	g_topology_assert();
326	G_VALID_CLASS(mp);
327	sb = sbuf_new_auto();
328	va_start(ap, fmt);
329	sbuf_vprintf(sb, fmt, ap);
330	va_end(ap);
331	sbuf_finish(sb);
332	gp = g_malloc(sizeof *gp, M_WAITOK | M_ZERO);
333	gp->name = g_malloc(sbuf_len(sb) + 1, M_WAITOK | M_ZERO);
334	gp->class = mp;
335	gp->rank = 1;
336	LIST_INIT(&gp->consumer);
337	LIST_INIT(&gp->provider);
338	LIST_INSERT_HEAD(&mp->geom, gp, geom);
339	TAILQ_INSERT_HEAD(&geoms, gp, geoms);
340	strcpy(gp->name, sbuf_data(sb));
341	sbuf_delete(sb);
342	/* Fill in defaults from class */
343	gp->start = mp->start;
344	gp->spoiled = mp->spoiled;
345	gp->dumpconf = mp->dumpconf;
346	gp->access = mp->access;
347	gp->orphan = mp->orphan;
348	gp->ioctl = mp->ioctl;
349	return (gp);
350}
351
352void
353g_destroy_geom(struct g_geom *gp)
354{
355
356	g_topology_assert();
357	G_VALID_GEOM(gp);
358	g_trace(G_T_TOPOLOGY, "g_destroy_geom(%p(%s))", gp, gp->name);
359	KASSERT(LIST_EMPTY(&gp->consumer),
360	    ("g_destroy_geom(%s) with consumer(s) [%p]",
361	    gp->name, LIST_FIRST(&gp->consumer)));
362	KASSERT(LIST_EMPTY(&gp->provider),
363	    ("g_destroy_geom(%s) with provider(s) [%p]",
364	    gp->name, LIST_FIRST(&gp->provider)));
365	g_cancel_event(gp);
366	LIST_REMOVE(gp, geom);
367	TAILQ_REMOVE(&geoms, gp, geoms);
368	g_free(gp->name);
369	g_free(gp);
370}
371
372/*
373 * This function is called (repeatedly) until the has withered away.
374 */
375void
376g_wither_geom(struct g_geom *gp, int error)
377{
378	struct g_provider *pp;
379
380	g_topology_assert();
381	G_VALID_GEOM(gp);
382	g_trace(G_T_TOPOLOGY, "g_wither_geom(%p(%s))", gp, gp->name);
383	if (!(gp->flags & G_GEOM_WITHER)) {
384		gp->flags |= G_GEOM_WITHER;
385		LIST_FOREACH(pp, &gp->provider, provider)
386			if (!(pp->flags & G_PF_ORPHAN))
387				g_orphan_provider(pp, error);
388	}
389	g_do_wither();
390}
391
392/*
393 * Convenience function to destroy a particular provider.
394 */
395void
396g_wither_provider(struct g_provider *pp, int error)
397{
398
399	pp->flags |= G_PF_WITHER;
400	if (!(pp->flags & G_PF_ORPHAN))
401		g_orphan_provider(pp, error);
402}
403
404/*
405 * This function is called (repeatedly) until the has withered away.
406 */
407void
408g_wither_geom_close(struct g_geom *gp, int error)
409{
410	struct g_consumer *cp;
411
412	g_topology_assert();
413	G_VALID_GEOM(gp);
414	g_trace(G_T_TOPOLOGY, "g_wither_geom_close(%p(%s))", gp, gp->name);
415	LIST_FOREACH(cp, &gp->consumer, consumer)
416		if (cp->acr || cp->acw || cp->ace)
417			g_access(cp, -cp->acr, -cp->acw, -cp->ace);
418	g_wither_geom(gp, error);
419}
420
421/*
422 * This function is called (repeatedly) until we cant wash away more
423 * withered bits at present.  Return value contains two bits.  Bit 0
424 * set means "withering stuff we can't wash now", bit 1 means "call
425 * me again, there may be stuff I didn't get the first time around.
426 */
427int
428g_wither_washer()
429{
430	struct g_class *mp;
431	struct g_geom *gp, *gp2;
432	struct g_provider *pp, *pp2;
433	struct g_consumer *cp, *cp2;
434	int result;
435
436	result = 0;
437	g_topology_assert();
438	LIST_FOREACH(mp, &g_classes, class) {
439		LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
440			LIST_FOREACH_SAFE(pp, &gp->provider, provider, pp2) {
441				if (!(pp->flags & G_PF_WITHER))
442					continue;
443				if (LIST_EMPTY(&pp->consumers))
444					g_destroy_provider(pp);
445				else
446					result |= 1;
447			}
448			if (!(gp->flags & G_GEOM_WITHER))
449				continue;
450			LIST_FOREACH_SAFE(pp, &gp->provider, provider, pp2) {
451				if (LIST_EMPTY(&pp->consumers))
452					g_destroy_provider(pp);
453				else
454					result |= 1;
455			}
456			LIST_FOREACH_SAFE(cp, &gp->consumer, consumer, cp2) {
457				if (cp->acr || cp->acw || cp->ace) {
458					result |= 1;
459					continue;
460				}
461				if (cp->provider != NULL)
462					g_detach(cp);
463				g_destroy_consumer(cp);
464				result |= 2;
465			}
466			if (LIST_EMPTY(&gp->provider) &&
467			    LIST_EMPTY(&gp->consumer))
468				g_destroy_geom(gp);
469			else
470				result |= 1;
471		}
472	}
473	return (result);
474}
475
476struct g_consumer *
477g_new_consumer(struct g_geom *gp)
478{
479	struct g_consumer *cp;
480
481	g_topology_assert();
482	G_VALID_GEOM(gp);
483	KASSERT(!(gp->flags & G_GEOM_WITHER),
484	    ("g_new_consumer on WITHERing geom(%s) (class %s)",
485	    gp->name, gp->class->name));
486	KASSERT(gp->orphan != NULL,
487	    ("g_new_consumer on geom(%s) (class %s) without orphan",
488	    gp->name, gp->class->name));
489
490	cp = g_malloc(sizeof *cp, M_WAITOK | M_ZERO);
491	cp->geom = gp;
492	cp->stat = devstat_new_entry(cp, -1, 0, DEVSTAT_ALL_SUPPORTED,
493	    DEVSTAT_TYPE_DIRECT, DEVSTAT_PRIORITY_MAX);
494	LIST_INSERT_HEAD(&gp->consumer, cp, consumer);
495	return(cp);
496}
497
498void
499g_destroy_consumer(struct g_consumer *cp)
500{
501	struct g_geom *gp;
502
503	g_topology_assert();
504	G_VALID_CONSUMER(cp);
505	g_trace(G_T_TOPOLOGY, "g_destroy_consumer(%p)", cp);
506	KASSERT (cp->provider == NULL, ("g_destroy_consumer but attached"));
507	KASSERT (cp->acr == 0, ("g_destroy_consumer with acr"));
508	KASSERT (cp->acw == 0, ("g_destroy_consumer with acw"));
509	KASSERT (cp->ace == 0, ("g_destroy_consumer with ace"));
510	g_cancel_event(cp);
511	gp = cp->geom;
512	LIST_REMOVE(cp, consumer);
513	devstat_remove_entry(cp->stat);
514	g_free(cp);
515	if (gp->flags & G_GEOM_WITHER)
516		g_do_wither();
517}
518
519static void
520g_new_provider_event(void *arg, int flag)
521{
522	struct g_class *mp;
523	struct g_provider *pp;
524	struct g_consumer *cp;
525	int i;
526
527	g_topology_assert();
528	if (flag == EV_CANCEL)
529		return;
530	if (g_shutdown)
531		return;
532	pp = arg;
533	G_VALID_PROVIDER(pp);
534	KASSERT(!(pp->flags & G_PF_WITHER),
535	    ("g_new_provider_event but withered"));
536	LIST_FOREACH(mp, &g_classes, class) {
537		if (mp->taste == NULL)
538			continue;
539		i = 1;
540		LIST_FOREACH(cp, &pp->consumers, consumers)
541			if (cp->geom->class == mp)
542				i = 0;
543		if (!i)
544			continue;
545		mp->taste(mp, pp, 0);
546		g_topology_assert();
547	}
548}
549
550
551struct g_provider *
552g_new_providerf(struct g_geom *gp, const char *fmt, ...)
553{
554	struct g_provider *pp;
555	struct sbuf *sb;
556	va_list ap;
557
558	g_topology_assert();
559	G_VALID_GEOM(gp);
560	KASSERT(gp->access != NULL,
561	    ("new provider on geom(%s) without ->access (class %s)",
562	    gp->name, gp->class->name));
563	KASSERT(gp->start != NULL,
564	    ("new provider on geom(%s) without ->start (class %s)",
565	    gp->name, gp->class->name));
566	KASSERT(!(gp->flags & G_GEOM_WITHER),
567	    ("new provider on WITHERing geom(%s) (class %s)",
568	    gp->name, gp->class->name));
569	sb = sbuf_new_auto();
570	va_start(ap, fmt);
571	sbuf_vprintf(sb, fmt, ap);
572	va_end(ap);
573	sbuf_finish(sb);
574	pp = g_malloc(sizeof *pp + sbuf_len(sb) + 1, M_WAITOK | M_ZERO);
575	pp->name = (char *)(pp + 1);
576	strcpy(pp->name, sbuf_data(sb));
577	sbuf_delete(sb);
578	LIST_INIT(&pp->consumers);
579	pp->error = ENXIO;
580	pp->geom = gp;
581	pp->stat = devstat_new_entry(pp, -1, 0, DEVSTAT_ALL_SUPPORTED,
582	    DEVSTAT_TYPE_DIRECT, DEVSTAT_PRIORITY_MAX);
583	LIST_INSERT_HEAD(&gp->provider, pp, provider);
584	g_post_event(g_new_provider_event, pp, M_WAITOK, pp, gp, NULL);
585	return (pp);
586}
587
588void
589g_error_provider(struct g_provider *pp, int error)
590{
591
592	/* G_VALID_PROVIDER(pp);  We may not have g_topology */
593	pp->error = error;
594}
595
596struct g_provider *
597g_provider_by_name(char const *arg)
598{
599	struct g_class *cp;
600	struct g_geom *gp;
601	struct g_provider *pp;
602
603	LIST_FOREACH(cp, &g_classes, class) {
604		LIST_FOREACH(gp, &cp->geom, geom) {
605			LIST_FOREACH(pp, &gp->provider, provider) {
606				if (!strcmp(arg, pp->name))
607					return (pp);
608			}
609		}
610	}
611	return (NULL);
612}
613
614void
615g_destroy_provider(struct g_provider *pp)
616{
617	struct g_geom *gp;
618
619	g_topology_assert();
620	G_VALID_PROVIDER(pp);
621	KASSERT(LIST_EMPTY(&pp->consumers),
622	    ("g_destroy_provider but attached"));
623	KASSERT (pp->acr == 0, ("g_destroy_provider with acr"));
624	KASSERT (pp->acw == 0, ("g_destroy_provider with acw"));
625	KASSERT (pp->ace == 0, ("g_destroy_provider with ace"));
626	g_cancel_event(pp);
627	LIST_REMOVE(pp, provider);
628	gp = pp->geom;
629	devstat_remove_entry(pp->stat);
630	g_free(pp);
631	if ((gp->flags & G_GEOM_WITHER))
632		g_do_wither();
633}
634
635/*
636 * We keep the "geoms" list sorted by topological order (== increasing
637 * numerical rank) at all times.
638 * When an attach is done, the attaching geoms rank is invalidated
639 * and it is moved to the tail of the list.
640 * All geoms later in the sequence has their ranks reevaluated in
641 * sequence.  If we cannot assign rank to a geom because it's
642 * prerequisites do not have rank, we move that element to the tail
643 * of the sequence with invalid rank as well.
644 * At some point we encounter our original geom and if we stil fail
645 * to assign it a rank, there must be a loop and we fail back to
646 * g_attach() which detach again and calls redo_rank again
647 * to fix up the damage.
648 * It would be much simpler code wise to do it recursively, but we
649 * can't risk that on the kernel stack.
650 */
651
652static int
653redo_rank(struct g_geom *gp)
654{
655	struct g_consumer *cp;
656	struct g_geom *gp1, *gp2;
657	int n, m;
658
659	g_topology_assert();
660	G_VALID_GEOM(gp);
661
662	/* Invalidate this geoms rank and move it to the tail */
663	gp1 = TAILQ_NEXT(gp, geoms);
664	if (gp1 != NULL) {
665		gp->rank = 0;
666		TAILQ_REMOVE(&geoms, gp, geoms);
667		TAILQ_INSERT_TAIL(&geoms, gp, geoms);
668	} else {
669		gp1 = gp;
670	}
671
672	/* re-rank the rest of the sequence */
673	for (; gp1 != NULL; gp1 = gp2) {
674		gp1->rank = 0;
675		m = 1;
676		LIST_FOREACH(cp, &gp1->consumer, consumer) {
677			if (cp->provider == NULL)
678				continue;
679			n = cp->provider->geom->rank;
680			if (n == 0) {
681				m = 0;
682				break;
683			} else if (n >= m)
684				m = n + 1;
685		}
686		gp1->rank = m;
687		gp2 = TAILQ_NEXT(gp1, geoms);
688
689		/* got a rank, moving on */
690		if (m != 0)
691			continue;
692
693		/* no rank to original geom means loop */
694		if (gp == gp1)
695			return (ELOOP);
696
697		/* no rank, put it at the end move on */
698		TAILQ_REMOVE(&geoms, gp1, geoms);
699		TAILQ_INSERT_TAIL(&geoms, gp1, geoms);
700	}
701	return (0);
702}
703
704int
705g_attach(struct g_consumer *cp, struct g_provider *pp)
706{
707	int error;
708
709	g_topology_assert();
710	G_VALID_CONSUMER(cp);
711	G_VALID_PROVIDER(pp);
712	KASSERT(cp->provider == NULL, ("attach but attached"));
713	cp->provider = pp;
714	LIST_INSERT_HEAD(&pp->consumers, cp, consumers);
715	error = redo_rank(cp->geom);
716	if (error) {
717		LIST_REMOVE(cp, consumers);
718		cp->provider = NULL;
719		redo_rank(cp->geom);
720	}
721	return (error);
722}
723
724void
725g_detach(struct g_consumer *cp)
726{
727	struct g_provider *pp;
728
729	g_topology_assert();
730	G_VALID_CONSUMER(cp);
731	g_trace(G_T_TOPOLOGY, "g_detach(%p)", cp);
732	KASSERT(cp->provider != NULL, ("detach but not attached"));
733	KASSERT(cp->acr == 0, ("detach but nonzero acr"));
734	KASSERT(cp->acw == 0, ("detach but nonzero acw"));
735	KASSERT(cp->ace == 0, ("detach but nonzero ace"));
736	KASSERT(cp->nstart == cp->nend,
737	    ("detach with active requests"));
738	pp = cp->provider;
739	LIST_REMOVE(cp, consumers);
740	cp->provider = NULL;
741	if (pp->geom->flags & G_GEOM_WITHER)
742		g_do_wither();
743	else if (pp->flags & G_PF_WITHER)
744		g_do_wither();
745	redo_rank(cp->geom);
746}
747
748/*
749 * g_access()
750 *
751 * Access-check with delta values.  The question asked is "can provider
752 * "cp" change the access counters by the relative amounts dc[rwe] ?"
753 */
754
755int
756g_access(struct g_consumer *cp, int dcr, int dcw, int dce)
757{
758	struct g_provider *pp;
759	int pr,pw,pe;
760	int error;
761
762	g_topology_assert();
763	G_VALID_CONSUMER(cp);
764	pp = cp->provider;
765	KASSERT(pp != NULL, ("access but not attached"));
766	G_VALID_PROVIDER(pp);
767
768	g_trace(G_T_ACCESS, "g_access(%p(%s), %d, %d, %d)",
769	    cp, pp->name, dcr, dcw, dce);
770
771	KASSERT(cp->acr + dcr >= 0, ("access resulting in negative acr"));
772	KASSERT(cp->acw + dcw >= 0, ("access resulting in negative acw"));
773	KASSERT(cp->ace + dce >= 0, ("access resulting in negative ace"));
774	KASSERT(dcr != 0 || dcw != 0 || dce != 0, ("NOP access request"));
775	KASSERT(pp->geom->access != NULL, ("NULL geom->access"));
776
777	/*
778	 * If our class cares about being spoiled, and we have been, we
779	 * are probably just ahead of the event telling us that.  Fail
780	 * now rather than having to unravel this later.
781	 */
782	if (cp->geom->spoiled != NULL && cp->spoiled &&
783	    (dcr > 0 || dcw > 0 || dce > 0))
784		return (ENXIO);
785
786	/*
787	 * Figure out what counts the provider would have had, if this
788	 * consumer had (r0w0e0) at this time.
789	 */
790	pr = pp->acr - cp->acr;
791	pw = pp->acw - cp->acw;
792	pe = pp->ace - cp->ace;
793
794	g_trace(G_T_ACCESS,
795    "open delta:[r%dw%de%d] old:[r%dw%de%d] provider:[r%dw%de%d] %p(%s)",
796	    dcr, dcw, dce,
797	    cp->acr, cp->acw, cp->ace,
798	    pp->acr, pp->acw, pp->ace,
799	    pp, pp->name);
800
801	/* If foot-shooting is enabled, any open on rank#1 is OK */
802	if ((g_debugflags & 16) && pp->geom->rank == 1)
803		;
804	/* If we try exclusive but already write: fail */
805	else if (dce > 0 && pw > 0)
806		return (EPERM);
807	/* If we try write but already exclusive: fail */
808	else if (dcw > 0 && pe > 0)
809		return (EPERM);
810	/* If we try to open more but provider is error'ed: fail */
811	else if ((dcr > 0 || dcw > 0 || dce > 0) && pp->error != 0)
812		return (pp->error);
813
814	/* Ok then... */
815
816	error = pp->geom->access(pp, dcr, dcw, dce);
817	KASSERT(dcr > 0 || dcw > 0 || dce > 0 || error == 0,
818	    ("Geom provider %s::%s failed closing ->access()",
819	    pp->geom->class->name, pp->name));
820	if (!error) {
821		/*
822		 * If we open first write, spoil any partner consumers.
823		 * If we close last write and provider is not errored,
824		 * trigger re-taste.
825		 */
826		if (pp->acw == 0 && dcw != 0)
827			g_spoil(pp, cp);
828		else if (pp->acw != 0 && pp->acw == -dcw && pp->error == 0 &&
829		    !(pp->geom->flags & G_GEOM_WITHER))
830			g_post_event(g_new_provider_event, pp, M_WAITOK,
831			    pp, NULL);
832
833		pp->acr += dcr;
834		pp->acw += dcw;
835		pp->ace += dce;
836		cp->acr += dcr;
837		cp->acw += dcw;
838		cp->ace += dce;
839		if (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)
840			KASSERT(pp->sectorsize > 0,
841			    ("Provider %s lacks sectorsize", pp->name));
842	}
843	return (error);
844}
845
846int
847g_handleattr_int(struct bio *bp, const char *attribute, int val)
848{
849
850	return (g_handleattr(bp, attribute, &val, sizeof val));
851}
852
853int
854g_handleattr_off_t(struct bio *bp, const char *attribute, off_t val)
855{
856
857	return (g_handleattr(bp, attribute, &val, sizeof val));
858}
859
860int
861g_handleattr_str(struct bio *bp, const char *attribute, char *str)
862{
863
864	return (g_handleattr(bp, attribute, str, 0));
865}
866
867int
868g_handleattr(struct bio *bp, const char *attribute, void *val, int len)
869{
870	int error = 0;
871
872	if (strcmp(bp->bio_attribute, attribute))
873		return (0);
874	if (len == 0) {
875		bzero(bp->bio_data, bp->bio_length);
876		if (strlcpy(bp->bio_data, val, bp->bio_length) >=
877		    bp->bio_length) {
878			printf("%s: %s bio_length %jd len %zu -> EFAULT\n",
879			    __func__, bp->bio_to->name,
880			    (intmax_t)bp->bio_length, strlen(val));
881			error = EFAULT;
882		}
883	} else if (bp->bio_length == len) {
884		bcopy(val, bp->bio_data, len);
885		bp->bio_completed = len;
886	} else {
887		printf("%s: %s bio_length %jd len %d -> EFAULT\n", __func__,
888		    bp->bio_to->name, (intmax_t)bp->bio_length, len);
889		error = EFAULT;
890	}
891	g_io_deliver(bp, error);
892	return (1);
893}
894
895int
896g_std_access(struct g_provider *pp,
897	int dr __unused, int dw __unused, int de __unused)
898{
899
900	g_topology_assert();
901	G_VALID_PROVIDER(pp);
902        return (0);
903}
904
905void
906g_std_done(struct bio *bp)
907{
908	struct bio *bp2;
909
910	bp2 = bp->bio_parent;
911	if (bp2->bio_error == 0)
912		bp2->bio_error = bp->bio_error;
913	bp2->bio_completed += bp->bio_completed;
914	g_destroy_bio(bp);
915	bp2->bio_inbed++;
916	if (bp2->bio_children == bp2->bio_inbed)
917		g_io_deliver(bp2, bp2->bio_error);
918}
919
920/* XXX: maybe this is only g_slice_spoiled */
921
922void
923g_std_spoiled(struct g_consumer *cp)
924{
925	struct g_geom *gp;
926	struct g_provider *pp;
927
928	g_topology_assert();
929	G_VALID_CONSUMER(cp);
930	g_trace(G_T_TOPOLOGY, "g_std_spoiled(%p)", cp);
931	g_detach(cp);
932	gp = cp->geom;
933	LIST_FOREACH(pp, &gp->provider, provider)
934		g_orphan_provider(pp, ENXIO);
935	g_destroy_consumer(cp);
936	if (LIST_EMPTY(&gp->provider) && LIST_EMPTY(&gp->consumer))
937		g_destroy_geom(gp);
938	else
939		gp->flags |= G_GEOM_WITHER;
940}
941
942/*
943 * Spoiling happens when a provider is opened for writing, but consumers
944 * which are configured by in-band data are attached (slicers for instance).
945 * Since the write might potentially change the in-band data, such consumers
946 * need to re-evaluate their existence after the writing session closes.
947 * We do this by (offering to) tear them down when the open for write happens
948 * in return for a re-taste when it closes again.
949 * Together with the fact that such consumers grab an 'e' bit whenever they
950 * are open, regardless of mode, this ends up DTRT.
951 */
952
953static void
954g_spoil_event(void *arg, int flag)
955{
956	struct g_provider *pp;
957	struct g_consumer *cp, *cp2;
958
959	g_topology_assert();
960	if (flag == EV_CANCEL)
961		return;
962	pp = arg;
963	G_VALID_PROVIDER(pp);
964	for (cp = LIST_FIRST(&pp->consumers); cp != NULL; cp = cp2) {
965		cp2 = LIST_NEXT(cp, consumers);
966		if (!cp->spoiled)
967			continue;
968		cp->spoiled = 0;
969		if (cp->geom->spoiled == NULL)
970			continue;
971		cp->geom->spoiled(cp);
972		g_topology_assert();
973	}
974}
975
976void
977g_spoil(struct g_provider *pp, struct g_consumer *cp)
978{
979	struct g_consumer *cp2;
980
981	g_topology_assert();
982	G_VALID_PROVIDER(pp);
983	G_VALID_CONSUMER(cp);
984
985	LIST_FOREACH(cp2, &pp->consumers, consumers) {
986		if (cp2 == cp)
987			continue;
988/*
989		KASSERT(cp2->acr == 0, ("spoiling cp->acr = %d", cp2->acr));
990		KASSERT(cp2->acw == 0, ("spoiling cp->acw = %d", cp2->acw));
991*/
992		KASSERT(cp2->ace == 0, ("spoiling cp->ace = %d", cp2->ace));
993		cp2->spoiled++;
994	}
995	g_post_event(g_spoil_event, pp, M_WAITOK, pp, NULL);
996}
997
998int
999g_getattr__(const char *attr, struct g_consumer *cp, void *var, int len)
1000{
1001	int error, i;
1002
1003	i = len;
1004	error = g_io_getattr(attr, cp, &i, var);
1005	if (error)
1006		return (error);
1007	if (i != len)
1008		return (EINVAL);
1009	return (0);
1010}
1011
1012#if defined(DIAGNOSTIC) || defined(DDB)
1013/*
1014 * This function walks (topologically unsafely) the mesh and return a
1015 * non-zero integer if it finds the argument pointer is an object.
1016 * The return value indicates which type of object it is belived to be.
1017 * If topology is not locked, this function is potentially dangerous,
1018 * but since it is for debugging purposes and can be useful for instance
1019 * from DDB, we do not assert topology lock is held.
1020 */
1021int
1022g_valid_obj(void const *ptr)
1023{
1024	struct g_class *mp;
1025	struct g_geom *gp;
1026	struct g_consumer *cp;
1027	struct g_provider *pp;
1028
1029	LIST_FOREACH(mp, &g_classes, class) {
1030		if (ptr == mp)
1031			return (1);
1032		LIST_FOREACH(gp, &mp->geom, geom) {
1033			if (ptr == gp)
1034				return (2);
1035			LIST_FOREACH(cp, &gp->consumer, consumer)
1036				if (ptr == cp)
1037					return (3);
1038			LIST_FOREACH(pp, &gp->provider, provider)
1039				if (ptr == pp)
1040					return (4);
1041		}
1042	}
1043	return(0);
1044}
1045#endif
1046
1047#ifdef DDB
1048
1049#define	gprintf(...)	do {						\
1050	printf("%*s", indent, "");					\
1051	printf(__VA_ARGS__);						\
1052} while (0)
1053#define	gprintln(...)	do {						\
1054	gprintf(__VA_ARGS__);						\
1055	printf("\n");							\
1056} while (0)
1057
1058#define	ADDFLAG(obj, flag, sflag)	do {				\
1059	if ((obj)->flags & (flag)) {					\
1060		if (comma)						\
1061			strlcat(str, ",", size);			\
1062		strlcat(str, (sflag), size);				\
1063		comma = 1;						\
1064	}								\
1065} while (0)
1066
1067static char *
1068provider_flags_to_string(struct g_provider *pp, char *str, size_t size)
1069{
1070	int comma = 0;
1071
1072	bzero(str, size);
1073	if (pp->flags == 0) {
1074		strlcpy(str, "NONE", size);
1075		return (str);
1076	}
1077	ADDFLAG(pp, G_PF_CANDELETE, "G_PF_CANDELETE");
1078	ADDFLAG(pp, G_PF_WITHER, "G_PF_WITHER");
1079	ADDFLAG(pp, G_PF_ORPHAN, "G_PF_ORPHAN");
1080	return (str);
1081}
1082
1083static char *
1084geom_flags_to_string(struct g_geom *gp, char *str, size_t size)
1085{
1086	int comma = 0;
1087
1088	bzero(str, size);
1089	if (gp->flags == 0) {
1090		strlcpy(str, "NONE", size);
1091		return (str);
1092	}
1093	ADDFLAG(gp, G_GEOM_WITHER, "G_GEOM_WITHER");
1094	return (str);
1095}
1096static void
1097db_show_geom_consumer(int indent, struct g_consumer *cp)
1098{
1099
1100	if (indent == 0) {
1101		gprintln("consumer: %p", cp);
1102		gprintln("  class:    %s (%p)", cp->geom->class->name,
1103		    cp->geom->class);
1104		gprintln("  geom:     %s (%p)", cp->geom->name, cp->geom);
1105		if (cp->provider == NULL)
1106			gprintln("  provider: none");
1107		else {
1108			gprintln("  provider: %s (%p)", cp->provider->name,
1109			    cp->provider);
1110		}
1111		gprintln("  access:   r%dw%de%d", cp->acr, cp->acw, cp->ace);
1112		gprintln("  spoiled:  %d", cp->spoiled);
1113		gprintln("  nstart:   %u", cp->nstart);
1114		gprintln("  nend:     %u", cp->nend);
1115	} else {
1116		gprintf("consumer: %p (%s), access=r%dw%de%d", cp,
1117		    cp->provider != NULL ? cp->provider->name : "none",
1118		    cp->acr, cp->acw, cp->ace);
1119		if (cp->spoiled)
1120			printf(", spoiled=%d", cp->spoiled);
1121		printf("\n");
1122	}
1123}
1124
1125static void
1126db_show_geom_provider(int indent, struct g_provider *pp)
1127{
1128	struct g_consumer *cp;
1129	char flags[64];
1130
1131	if (indent == 0) {
1132		gprintln("provider: %s (%p)", pp->name, pp);
1133		gprintln("  class:        %s (%p)", pp->geom->class->name,
1134		    pp->geom->class);
1135		gprintln("  geom:         %s (%p)", pp->geom->name, pp->geom);
1136		gprintln("  mediasize:    %jd", (intmax_t)pp->mediasize);
1137		gprintln("  sectorsize:   %u", pp->sectorsize);
1138		gprintln("  stripesize:   %u", pp->stripesize);
1139		gprintln("  stripeoffset: %u", pp->stripeoffset);
1140		gprintln("  access:       r%dw%de%d", pp->acr, pp->acw,
1141		    pp->ace);
1142		gprintln("  flags:        %s (0x%04x)",
1143		    provider_flags_to_string(pp, flags, sizeof(flags)),
1144		    pp->flags);
1145		gprintln("  error:        %d", pp->error);
1146		gprintln("  nstart:       %u", pp->nstart);
1147		gprintln("  nend:         %u", pp->nend);
1148		if (LIST_EMPTY(&pp->consumers))
1149			gprintln("  consumers:    none");
1150	} else {
1151		gprintf("provider: %s (%p), access=r%dw%de%d",
1152		    pp->name, pp, pp->acr, pp->acw, pp->ace);
1153		if (pp->flags != 0) {
1154			printf(", flags=%s (0x%04x)",
1155			    provider_flags_to_string(pp, flags, sizeof(flags)),
1156			    pp->flags);
1157		}
1158		printf("\n");
1159	}
1160	if (!LIST_EMPTY(&pp->consumers)) {
1161		LIST_FOREACH(cp, &pp->consumers, consumers) {
1162			db_show_geom_consumer(indent + 2, cp);
1163			if (db_pager_quit)
1164				break;
1165		}
1166	}
1167}
1168
1169static void
1170db_show_geom_geom(int indent, struct g_geom *gp)
1171{
1172	struct g_provider *pp;
1173	struct g_consumer *cp;
1174	char flags[64];
1175
1176	if (indent == 0) {
1177		gprintln("geom: %s (%p)", gp->name, gp);
1178		gprintln("  class:     %s (%p)", gp->class->name, gp->class);
1179		gprintln("  flags:     %s (0x%04x)",
1180		    geom_flags_to_string(gp, flags, sizeof(flags)), gp->flags);
1181		gprintln("  rank:      %d", gp->rank);
1182		if (LIST_EMPTY(&gp->provider))
1183			gprintln("  providers: none");
1184		if (LIST_EMPTY(&gp->consumer))
1185			gprintln("  consumers: none");
1186	} else {
1187		gprintf("geom: %s (%p), rank=%d", gp->name, gp, gp->rank);
1188		if (gp->flags != 0) {
1189			printf(", flags=%s (0x%04x)",
1190			    geom_flags_to_string(gp, flags, sizeof(flags)),
1191			    gp->flags);
1192		}
1193		printf("\n");
1194	}
1195	if (!LIST_EMPTY(&gp->provider)) {
1196		LIST_FOREACH(pp, &gp->provider, provider) {
1197			db_show_geom_provider(indent + 2, pp);
1198			if (db_pager_quit)
1199				break;
1200		}
1201	}
1202	if (!LIST_EMPTY(&gp->consumer)) {
1203		LIST_FOREACH(cp, &gp->consumer, consumer) {
1204			db_show_geom_consumer(indent + 2, cp);
1205			if (db_pager_quit)
1206				break;
1207		}
1208	}
1209}
1210
1211static void
1212db_show_geom_class(struct g_class *mp)
1213{
1214	struct g_geom *gp;
1215
1216	printf("class: %s (%p)\n", mp->name, mp);
1217	LIST_FOREACH(gp, &mp->geom, geom) {
1218		db_show_geom_geom(2, gp);
1219		if (db_pager_quit)
1220			break;
1221	}
1222}
1223
1224/*
1225 * Print the GEOM topology or the given object.
1226 */
1227DB_SHOW_COMMAND(geom, db_show_geom)
1228{
1229	struct g_class *mp;
1230
1231	if (!have_addr) {
1232		/* No address given, print the entire topology. */
1233		LIST_FOREACH(mp, &g_classes, class) {
1234			db_show_geom_class(mp);
1235			printf("\n");
1236			if (db_pager_quit)
1237				break;
1238		}
1239	} else {
1240		switch (g_valid_obj((void *)addr)) {
1241		case 1:
1242			db_show_geom_class((struct g_class *)addr);
1243			break;
1244		case 2:
1245			db_show_geom_geom(0, (struct g_geom *)addr);
1246			break;
1247		case 3:
1248			db_show_geom_consumer(0, (struct g_consumer *)addr);
1249			break;
1250		case 4:
1251			db_show_geom_provider(0, (struct g_provider *)addr);
1252			break;
1253		default:
1254			printf("Not a GEOM object.\n");
1255			break;
1256		}
1257	}
1258}
1259
1260#undef	gprintf
1261#undef	gprintln
1262#undef	ADDFLAG
1263
1264#endif	/* DDB */
1265