geom_subr.c revision 221101
1/*-
2 * Copyright (c) 2002 Poul-Henning Kamp
3 * Copyright (c) 2002 Networks Associates Technology, Inc.
4 * All rights reserved.
5 *
6 * This software was developed for the FreeBSD Project by Poul-Henning Kamp
7 * and NAI Labs, the Security Research Division of Network Associates, Inc.
8 * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
9 * DARPA CHATS research program.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. The names of the authors may not be used to endorse or promote
20 *    products derived from this software without specific prior written
21 *    permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
36#include <sys/cdefs.h>
37__FBSDID("$FreeBSD: head/sys/geom/geom_subr.c 221101 2011-04-27 00:10:26Z mav $");
38
39#include "opt_ddb.h"
40
41#include <sys/param.h>
42#include <sys/systm.h>
43#include <sys/devicestat.h>
44#include <sys/kernel.h>
45#include <sys/malloc.h>
46#include <sys/bio.h>
47#include <sys/sysctl.h>
48#include <sys/proc.h>
49#include <sys/kthread.h>
50#include <sys/lock.h>
51#include <sys/mutex.h>
52#include <sys/errno.h>
53#include <sys/sbuf.h>
54#include <geom/geom.h>
55#include <geom/geom_int.h>
56#include <machine/stdarg.h>
57
58#ifdef DDB
59#include <ddb/ddb.h>
60#endif
61
62#ifdef KDB
63#include <sys/kdb.h>
64#endif
65
66struct class_list_head g_classes = LIST_HEAD_INITIALIZER(g_classes);
67static struct g_tailq_head geoms = TAILQ_HEAD_INITIALIZER(geoms);
68char *g_wait_event, *g_wait_up, *g_wait_down, *g_wait_sim;
69
70struct g_hh00 {
71	struct g_class	*mp;
72	int		error;
73	int		post;
74};
75
76/*
77 * This event offers a new class a chance to taste all preexisting providers.
78 */
79static void
80g_load_class(void *arg, int flag)
81{
82	struct g_hh00 *hh;
83	struct g_class *mp2, *mp;
84	struct g_geom *gp;
85	struct g_provider *pp;
86
87	g_topology_assert();
88	if (flag == EV_CANCEL)	/* XXX: can't happen ? */
89		return;
90	if (g_shutdown)
91		return;
92
93	hh = arg;
94	mp = hh->mp;
95	hh->error = 0;
96	if (hh->post) {
97		g_free(hh);
98		hh = NULL;
99	}
100	g_trace(G_T_TOPOLOGY, "g_load_class(%s)", mp->name);
101	KASSERT(mp->name != NULL && *mp->name != '\0',
102	    ("GEOM class has no name"));
103	LIST_FOREACH(mp2, &g_classes, class) {
104		if (mp2 == mp) {
105			printf("The GEOM class %s is already loaded.\n",
106			    mp2->name);
107			if (hh != NULL)
108				hh->error = EEXIST;
109			return;
110		} else if (strcmp(mp2->name, mp->name) == 0) {
111			printf("A GEOM class %s is already loaded.\n",
112			    mp2->name);
113			if (hh != NULL)
114				hh->error = EEXIST;
115			return;
116		}
117	}
118
119	LIST_INIT(&mp->geom);
120	LIST_INSERT_HEAD(&g_classes, mp, class);
121	if (mp->init != NULL)
122		mp->init(mp);
123	if (mp->taste == NULL)
124		return;
125	LIST_FOREACH(mp2, &g_classes, class) {
126		if (mp == mp2)
127			continue;
128		LIST_FOREACH(gp, &mp2->geom, geom) {
129			LIST_FOREACH(pp, &gp->provider, provider) {
130				mp->taste(mp, pp, 0);
131				g_topology_assert();
132			}
133		}
134	}
135}
136
137static int
138g_unload_class(struct g_class *mp)
139{
140	struct g_geom *gp;
141	struct g_provider *pp;
142	struct g_consumer *cp;
143	int error;
144
145	g_topology_lock();
146	g_trace(G_T_TOPOLOGY, "g_unload_class(%s)", mp->name);
147retry:
148	G_VALID_CLASS(mp);
149	LIST_FOREACH(gp, &mp->geom, geom) {
150		/* We refuse to unload if anything is open */
151		LIST_FOREACH(pp, &gp->provider, provider)
152			if (pp->acr || pp->acw || pp->ace) {
153				g_topology_unlock();
154				return (EBUSY);
155			}
156		LIST_FOREACH(cp, &gp->consumer, consumer)
157			if (cp->acr || cp->acw || cp->ace) {
158				g_topology_unlock();
159				return (EBUSY);
160			}
161		/* If the geom is withering, wait for it to finish. */
162		if (gp->flags & G_GEOM_WITHER) {
163			g_topology_sleep(mp, 1);
164			goto retry;
165		}
166	}
167
168	/*
169	 * We allow unloading if we have no geoms, or a class
170	 * method we can use to get rid of them.
171	 */
172	if (!LIST_EMPTY(&mp->geom) && mp->destroy_geom == NULL) {
173		g_topology_unlock();
174		return (EOPNOTSUPP);
175	}
176
177	/* Bar new entries */
178	mp->taste = NULL;
179	mp->config = NULL;
180
181	LIST_FOREACH(gp, &mp->geom, geom) {
182		error = mp->destroy_geom(NULL, mp, gp);
183		if (error != 0) {
184			g_topology_unlock();
185			return (error);
186		}
187	}
188	/* Wait for withering to finish. */
189	for (;;) {
190		gp = LIST_FIRST(&mp->geom);
191		if (gp == NULL)
192			break;
193		KASSERT(gp->flags & G_GEOM_WITHER,
194		   ("Non-withering geom in class %s", mp->name));
195		g_topology_sleep(mp, 1);
196	}
197	G_VALID_CLASS(mp);
198	if (mp->fini != NULL)
199		mp->fini(mp);
200	LIST_REMOVE(mp, class);
201	g_topology_unlock();
202
203	return (0);
204}
205
206int
207g_modevent(module_t mod, int type, void *data)
208{
209	struct g_hh00 *hh;
210	int error;
211	static int g_ignition;
212	struct g_class *mp;
213
214	mp = data;
215	if (mp->version != G_VERSION) {
216		printf("GEOM class %s has Wrong version %x\n",
217		    mp->name, mp->version);
218		return (EINVAL);
219	}
220	if (!g_ignition) {
221		g_ignition++;
222		g_init();
223	}
224	error = EOPNOTSUPP;
225	switch (type) {
226	case MOD_LOAD:
227		g_trace(G_T_TOPOLOGY, "g_modevent(%s, LOAD)", mp->name);
228		hh = g_malloc(sizeof *hh, M_WAITOK | M_ZERO);
229		hh->mp = mp;
230		/*
231		 * Once the system is not cold, MOD_LOAD calls will be
232		 * from the userland and the g_event thread will be able
233		 * to acknowledge their completion.
234		 */
235		if (cold) {
236			hh->post = 1;
237			error = g_post_event(g_load_class, hh, M_WAITOK, NULL);
238		} else {
239			error = g_waitfor_event(g_load_class, hh, M_WAITOK,
240			    NULL);
241			if (error == 0)
242				error = hh->error;
243			g_free(hh);
244		}
245		break;
246	case MOD_UNLOAD:
247		g_trace(G_T_TOPOLOGY, "g_modevent(%s, UNLOAD)", mp->name);
248		DROP_GIANT();
249		error = g_unload_class(mp);
250		PICKUP_GIANT();
251		if (error == 0) {
252			KASSERT(LIST_EMPTY(&mp->geom),
253			    ("Unloaded class (%s) still has geom", mp->name));
254		}
255		break;
256	}
257	return (error);
258}
259
260static void
261g_retaste_event(void *arg, int flag)
262{
263	struct g_class *cp, *mp;
264	struct g_geom *gp, *gp2;
265	struct g_hh00 *hh;
266	struct g_provider *pp;
267
268	g_topology_assert();
269	if (flag == EV_CANCEL)  /* XXX: can't happen ? */
270		return;
271	if (g_shutdown)
272		return;
273
274	hh = arg;
275	mp = hh->mp;
276	hh->error = 0;
277	if (hh->post) {
278		g_free(hh);
279		hh = NULL;
280	}
281	g_trace(G_T_TOPOLOGY, "g_retaste(%s)", mp->name);
282
283	LIST_FOREACH(cp, &g_classes, class) {
284		LIST_FOREACH(gp, &cp->geom, geom) {
285			LIST_FOREACH(pp, &gp->provider, provider) {
286				if (pp->acr || pp->acw || pp->ace)
287					continue;
288				LIST_FOREACH(gp2, &mp->geom, geom) {
289					if (!strcmp(pp->name, gp2->name))
290						break;
291				}
292				if (gp2 != NULL)
293					g_wither_geom(gp2, ENXIO);
294				mp->taste(mp, pp, 0);
295				g_topology_assert();
296			}
297		}
298	}
299}
300
301int
302g_retaste(struct g_class *mp)
303{
304	struct g_hh00 *hh;
305	int error;
306
307	if (mp->taste == NULL)
308		return (EINVAL);
309
310	hh = g_malloc(sizeof *hh, M_WAITOK | M_ZERO);
311	hh->mp = mp;
312
313	if (cold) {
314		hh->post = 1;
315		error = g_post_event(g_retaste_event, hh, M_WAITOK, NULL);
316	} else {
317		error = g_waitfor_event(g_retaste_event, hh, M_WAITOK, NULL);
318		if (error == 0)
319			error = hh->error;
320		g_free(hh);
321	}
322
323	return (error);
324}
325
326struct g_geom *
327g_new_geomf(struct g_class *mp, const char *fmt, ...)
328{
329	struct g_geom *gp;
330	va_list ap;
331	struct sbuf *sb;
332
333	g_topology_assert();
334	G_VALID_CLASS(mp);
335	sb = sbuf_new_auto();
336	va_start(ap, fmt);
337	sbuf_vprintf(sb, fmt, ap);
338	va_end(ap);
339	sbuf_finish(sb);
340	gp = g_malloc(sizeof *gp, M_WAITOK | M_ZERO);
341	gp->name = g_malloc(sbuf_len(sb) + 1, M_WAITOK | M_ZERO);
342	gp->class = mp;
343	gp->rank = 1;
344	LIST_INIT(&gp->consumer);
345	LIST_INIT(&gp->provider);
346	LIST_INSERT_HEAD(&mp->geom, gp, geom);
347	TAILQ_INSERT_HEAD(&geoms, gp, geoms);
348	strcpy(gp->name, sbuf_data(sb));
349	sbuf_delete(sb);
350	/* Fill in defaults from class */
351	gp->start = mp->start;
352	gp->spoiled = mp->spoiled;
353	gp->dumpconf = mp->dumpconf;
354	gp->access = mp->access;
355	gp->orphan = mp->orphan;
356	gp->ioctl = mp->ioctl;
357	return (gp);
358}
359
360void
361g_destroy_geom(struct g_geom *gp)
362{
363
364	g_topology_assert();
365	G_VALID_GEOM(gp);
366	g_trace(G_T_TOPOLOGY, "g_destroy_geom(%p(%s))", gp, gp->name);
367	KASSERT(LIST_EMPTY(&gp->consumer),
368	    ("g_destroy_geom(%s) with consumer(s) [%p]",
369	    gp->name, LIST_FIRST(&gp->consumer)));
370	KASSERT(LIST_EMPTY(&gp->provider),
371	    ("g_destroy_geom(%s) with provider(s) [%p]",
372	    gp->name, LIST_FIRST(&gp->provider)));
373	g_cancel_event(gp);
374	LIST_REMOVE(gp, geom);
375	TAILQ_REMOVE(&geoms, gp, geoms);
376	g_free(gp->name);
377	g_free(gp);
378}
379
380/*
381 * This function is called (repeatedly) until the geom has withered away.
382 */
383void
384g_wither_geom(struct g_geom *gp, int error)
385{
386	struct g_provider *pp;
387
388	g_topology_assert();
389	G_VALID_GEOM(gp);
390	g_trace(G_T_TOPOLOGY, "g_wither_geom(%p(%s))", gp, gp->name);
391	if (!(gp->flags & G_GEOM_WITHER)) {
392		gp->flags |= G_GEOM_WITHER;
393		LIST_FOREACH(pp, &gp->provider, provider)
394			if (!(pp->flags & G_PF_ORPHAN))
395				g_orphan_provider(pp, error);
396	}
397	g_do_wither();
398}
399
400/*
401 * Convenience function to destroy a particular provider.
402 */
403void
404g_wither_provider(struct g_provider *pp, int error)
405{
406
407	pp->flags |= G_PF_WITHER;
408	if (!(pp->flags & G_PF_ORPHAN))
409		g_orphan_provider(pp, error);
410}
411
412/*
413 * This function is called (repeatedly) until the has withered away.
414 */
415void
416g_wither_geom_close(struct g_geom *gp, int error)
417{
418	struct g_consumer *cp;
419
420	g_topology_assert();
421	G_VALID_GEOM(gp);
422	g_trace(G_T_TOPOLOGY, "g_wither_geom_close(%p(%s))", gp, gp->name);
423	LIST_FOREACH(cp, &gp->consumer, consumer)
424		if (cp->acr || cp->acw || cp->ace)
425			g_access(cp, -cp->acr, -cp->acw, -cp->ace);
426	g_wither_geom(gp, error);
427}
428
429/*
430 * This function is called (repeatedly) until we cant wash away more
431 * withered bits at present.  Return value contains two bits.  Bit 0
432 * set means "withering stuff we can't wash now", bit 1 means "call
433 * me again, there may be stuff I didn't get the first time around.
434 */
435int
436g_wither_washer()
437{
438	struct g_class *mp;
439	struct g_geom *gp, *gp2;
440	struct g_provider *pp, *pp2;
441	struct g_consumer *cp, *cp2;
442	int result;
443
444	result = 0;
445	g_topology_assert();
446	LIST_FOREACH(mp, &g_classes, class) {
447		LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
448			LIST_FOREACH_SAFE(pp, &gp->provider, provider, pp2) {
449				if (!(pp->flags & G_PF_WITHER))
450					continue;
451				if (LIST_EMPTY(&pp->consumers))
452					g_destroy_provider(pp);
453				else
454					result |= 1;
455			}
456			if (!(gp->flags & G_GEOM_WITHER))
457				continue;
458			LIST_FOREACH_SAFE(pp, &gp->provider, provider, pp2) {
459				if (LIST_EMPTY(&pp->consumers))
460					g_destroy_provider(pp);
461				else
462					result |= 1;
463			}
464			LIST_FOREACH_SAFE(cp, &gp->consumer, consumer, cp2) {
465				if (cp->acr || cp->acw || cp->ace) {
466					result |= 1;
467					continue;
468				}
469				if (cp->provider != NULL)
470					g_detach(cp);
471				g_destroy_consumer(cp);
472				result |= 2;
473			}
474			if (LIST_EMPTY(&gp->provider) &&
475			    LIST_EMPTY(&gp->consumer))
476				g_destroy_geom(gp);
477			else
478				result |= 1;
479		}
480	}
481	return (result);
482}
483
484struct g_consumer *
485g_new_consumer(struct g_geom *gp)
486{
487	struct g_consumer *cp;
488
489	g_topology_assert();
490	G_VALID_GEOM(gp);
491	KASSERT(!(gp->flags & G_GEOM_WITHER),
492	    ("g_new_consumer on WITHERing geom(%s) (class %s)",
493	    gp->name, gp->class->name));
494	KASSERT(gp->orphan != NULL,
495	    ("g_new_consumer on geom(%s) (class %s) without orphan",
496	    gp->name, gp->class->name));
497
498	cp = g_malloc(sizeof *cp, M_WAITOK | M_ZERO);
499	cp->geom = gp;
500	cp->stat = devstat_new_entry(cp, -1, 0, DEVSTAT_ALL_SUPPORTED,
501	    DEVSTAT_TYPE_DIRECT, DEVSTAT_PRIORITY_MAX);
502	LIST_INSERT_HEAD(&gp->consumer, cp, consumer);
503	return(cp);
504}
505
506void
507g_destroy_consumer(struct g_consumer *cp)
508{
509	struct g_geom *gp;
510
511	g_topology_assert();
512	G_VALID_CONSUMER(cp);
513	g_trace(G_T_TOPOLOGY, "g_destroy_consumer(%p)", cp);
514	KASSERT (cp->provider == NULL, ("g_destroy_consumer but attached"));
515	KASSERT (cp->acr == 0, ("g_destroy_consumer with acr"));
516	KASSERT (cp->acw == 0, ("g_destroy_consumer with acw"));
517	KASSERT (cp->ace == 0, ("g_destroy_consumer with ace"));
518	g_cancel_event(cp);
519	gp = cp->geom;
520	LIST_REMOVE(cp, consumer);
521	devstat_remove_entry(cp->stat);
522	g_free(cp);
523	if (gp->flags & G_GEOM_WITHER)
524		g_do_wither();
525}
526
527static void
528g_new_provider_event(void *arg, int flag)
529{
530	struct g_class *mp;
531	struct g_provider *pp;
532	struct g_consumer *cp;
533
534	g_topology_assert();
535	if (flag == EV_CANCEL)
536		return;
537	if (g_shutdown)
538		return;
539	pp = arg;
540	G_VALID_PROVIDER(pp);
541	KASSERT(!(pp->flags & G_PF_WITHER),
542	    ("g_new_provider_event but withered"));
543	LIST_FOREACH(mp, &g_classes, class) {
544		if (mp->taste == NULL)
545			continue;
546		LIST_FOREACH(cp, &pp->consumers, consumers)
547			if (cp->geom->class == mp)
548				break;
549		if (cp != NULL)
550			continue;
551		mp->taste(mp, pp, 0);
552		g_topology_assert();
553	}
554}
555
556
557struct g_provider *
558g_new_providerf(struct g_geom *gp, const char *fmt, ...)
559{
560	struct g_provider *pp;
561	struct sbuf *sb;
562	va_list ap;
563
564	g_topology_assert();
565	G_VALID_GEOM(gp);
566	KASSERT(gp->access != NULL,
567	    ("new provider on geom(%s) without ->access (class %s)",
568	    gp->name, gp->class->name));
569	KASSERT(gp->start != NULL,
570	    ("new provider on geom(%s) without ->start (class %s)",
571	    gp->name, gp->class->name));
572	KASSERT(!(gp->flags & G_GEOM_WITHER),
573	    ("new provider on WITHERing geom(%s) (class %s)",
574	    gp->name, gp->class->name));
575	sb = sbuf_new_auto();
576	va_start(ap, fmt);
577	sbuf_vprintf(sb, fmt, ap);
578	va_end(ap);
579	sbuf_finish(sb);
580	pp = g_malloc(sizeof *pp + sbuf_len(sb) + 1, M_WAITOK | M_ZERO);
581	pp->name = (char *)(pp + 1);
582	strcpy(pp->name, sbuf_data(sb));
583	sbuf_delete(sb);
584	LIST_INIT(&pp->consumers);
585	pp->error = ENXIO;
586	pp->geom = gp;
587	pp->stat = devstat_new_entry(pp, -1, 0, DEVSTAT_ALL_SUPPORTED,
588	    DEVSTAT_TYPE_DIRECT, DEVSTAT_PRIORITY_MAX);
589	LIST_INSERT_HEAD(&gp->provider, pp, provider);
590	g_post_event(g_new_provider_event, pp, M_WAITOK, pp, gp, NULL);
591	return (pp);
592}
593
594void
595g_error_provider(struct g_provider *pp, int error)
596{
597
598	/* G_VALID_PROVIDER(pp);  We may not have g_topology */
599	pp->error = error;
600}
601
602struct g_provider *
603g_provider_by_name(char const *arg)
604{
605	struct g_class *cp;
606	struct g_geom *gp;
607	struct g_provider *pp;
608
609	LIST_FOREACH(cp, &g_classes, class) {
610		LIST_FOREACH(gp, &cp->geom, geom) {
611			LIST_FOREACH(pp, &gp->provider, provider) {
612				if (!strcmp(arg, pp->name))
613					return (pp);
614			}
615		}
616	}
617	return (NULL);
618}
619
620void
621g_destroy_provider(struct g_provider *pp)
622{
623	struct g_geom *gp;
624
625	g_topology_assert();
626	G_VALID_PROVIDER(pp);
627	KASSERT(LIST_EMPTY(&pp->consumers),
628	    ("g_destroy_provider but attached"));
629	KASSERT (pp->acr == 0, ("g_destroy_provider with acr"));
630	KASSERT (pp->acw == 0, ("g_destroy_provider with acw"));
631	KASSERT (pp->ace == 0, ("g_destroy_provider with ace"));
632	g_cancel_event(pp);
633	LIST_REMOVE(pp, provider);
634	gp = pp->geom;
635	devstat_remove_entry(pp->stat);
636	g_free(pp);
637	if ((gp->flags & G_GEOM_WITHER))
638		g_do_wither();
639}
640
641/*
642 * We keep the "geoms" list sorted by topological order (== increasing
643 * numerical rank) at all times.
644 * When an attach is done, the attaching geoms rank is invalidated
645 * and it is moved to the tail of the list.
646 * All geoms later in the sequence has their ranks reevaluated in
647 * sequence.  If we cannot assign rank to a geom because it's
648 * prerequisites do not have rank, we move that element to the tail
649 * of the sequence with invalid rank as well.
650 * At some point we encounter our original geom and if we stil fail
651 * to assign it a rank, there must be a loop and we fail back to
652 * g_attach() which detach again and calls redo_rank again
653 * to fix up the damage.
654 * It would be much simpler code wise to do it recursively, but we
655 * can't risk that on the kernel stack.
656 */
657
658static int
659redo_rank(struct g_geom *gp)
660{
661	struct g_consumer *cp;
662	struct g_geom *gp1, *gp2;
663	int n, m;
664
665	g_topology_assert();
666	G_VALID_GEOM(gp);
667
668	/* Invalidate this geoms rank and move it to the tail */
669	gp1 = TAILQ_NEXT(gp, geoms);
670	if (gp1 != NULL) {
671		gp->rank = 0;
672		TAILQ_REMOVE(&geoms, gp, geoms);
673		TAILQ_INSERT_TAIL(&geoms, gp, geoms);
674	} else {
675		gp1 = gp;
676	}
677
678	/* re-rank the rest of the sequence */
679	for (; gp1 != NULL; gp1 = gp2) {
680		gp1->rank = 0;
681		m = 1;
682		LIST_FOREACH(cp, &gp1->consumer, consumer) {
683			if (cp->provider == NULL)
684				continue;
685			n = cp->provider->geom->rank;
686			if (n == 0) {
687				m = 0;
688				break;
689			} else if (n >= m)
690				m = n + 1;
691		}
692		gp1->rank = m;
693		gp2 = TAILQ_NEXT(gp1, geoms);
694
695		/* got a rank, moving on */
696		if (m != 0)
697			continue;
698
699		/* no rank to original geom means loop */
700		if (gp == gp1)
701			return (ELOOP);
702
703		/* no rank, put it at the end move on */
704		TAILQ_REMOVE(&geoms, gp1, geoms);
705		TAILQ_INSERT_TAIL(&geoms, gp1, geoms);
706	}
707	return (0);
708}
709
710int
711g_attach(struct g_consumer *cp, struct g_provider *pp)
712{
713	int error;
714
715	g_topology_assert();
716	G_VALID_CONSUMER(cp);
717	G_VALID_PROVIDER(pp);
718	g_trace(G_T_TOPOLOGY, "g_attach(%p, %p)", cp, pp);
719	KASSERT(cp->provider == NULL, ("attach but attached"));
720	cp->provider = pp;
721	LIST_INSERT_HEAD(&pp->consumers, cp, consumers);
722	error = redo_rank(cp->geom);
723	if (error) {
724		LIST_REMOVE(cp, consumers);
725		cp->provider = NULL;
726		redo_rank(cp->geom);
727	}
728	return (error);
729}
730
731void
732g_detach(struct g_consumer *cp)
733{
734	struct g_provider *pp;
735
736	g_topology_assert();
737	G_VALID_CONSUMER(cp);
738	g_trace(G_T_TOPOLOGY, "g_detach(%p)", cp);
739	KASSERT(cp->provider != NULL, ("detach but not attached"));
740	KASSERT(cp->acr == 0, ("detach but nonzero acr"));
741	KASSERT(cp->acw == 0, ("detach but nonzero acw"));
742	KASSERT(cp->ace == 0, ("detach but nonzero ace"));
743	KASSERT(cp->nstart == cp->nend,
744	    ("detach with active requests"));
745	pp = cp->provider;
746	LIST_REMOVE(cp, consumers);
747	cp->provider = NULL;
748	if (pp->geom->flags & G_GEOM_WITHER)
749		g_do_wither();
750	else if (pp->flags & G_PF_WITHER)
751		g_do_wither();
752	redo_rank(cp->geom);
753}
754
755/*
756 * g_access()
757 *
758 * Access-check with delta values.  The question asked is "can provider
759 * "cp" change the access counters by the relative amounts dc[rwe] ?"
760 */
761
762int
763g_access(struct g_consumer *cp, int dcr, int dcw, int dce)
764{
765	struct g_provider *pp;
766	int pr,pw,pe;
767	int error;
768
769	g_topology_assert();
770	G_VALID_CONSUMER(cp);
771	pp = cp->provider;
772	KASSERT(pp != NULL, ("access but not attached"));
773	G_VALID_PROVIDER(pp);
774
775	g_trace(G_T_ACCESS, "g_access(%p(%s), %d, %d, %d)",
776	    cp, pp->name, dcr, dcw, dce);
777
778	KASSERT(cp->acr + dcr >= 0, ("access resulting in negative acr"));
779	KASSERT(cp->acw + dcw >= 0, ("access resulting in negative acw"));
780	KASSERT(cp->ace + dce >= 0, ("access resulting in negative ace"));
781	KASSERT(dcr != 0 || dcw != 0 || dce != 0, ("NOP access request"));
782	KASSERT(pp->geom->access != NULL, ("NULL geom->access"));
783
784	/*
785	 * If our class cares about being spoiled, and we have been, we
786	 * are probably just ahead of the event telling us that.  Fail
787	 * now rather than having to unravel this later.
788	 */
789	if (cp->geom->spoiled != NULL && cp->spoiled &&
790	    (dcr > 0 || dcw > 0 || dce > 0))
791		return (ENXIO);
792
793	/*
794	 * Figure out what counts the provider would have had, if this
795	 * consumer had (r0w0e0) at this time.
796	 */
797	pr = pp->acr - cp->acr;
798	pw = pp->acw - cp->acw;
799	pe = pp->ace - cp->ace;
800
801	g_trace(G_T_ACCESS,
802    "open delta:[r%dw%de%d] old:[r%dw%de%d] provider:[r%dw%de%d] %p(%s)",
803	    dcr, dcw, dce,
804	    cp->acr, cp->acw, cp->ace,
805	    pp->acr, pp->acw, pp->ace,
806	    pp, pp->name);
807
808	/* If foot-shooting is enabled, any open on rank#1 is OK */
809	if ((g_debugflags & 16) && pp->geom->rank == 1)
810		;
811	/* If we try exclusive but already write: fail */
812	else if (dce > 0 && pw > 0)
813		return (EPERM);
814	/* If we try write but already exclusive: fail */
815	else if (dcw > 0 && pe > 0)
816		return (EPERM);
817	/* If we try to open more but provider is error'ed: fail */
818	else if ((dcr > 0 || dcw > 0 || dce > 0) && pp->error != 0)
819		return (pp->error);
820
821	/* Ok then... */
822
823	error = pp->geom->access(pp, dcr, dcw, dce);
824	KASSERT(dcr > 0 || dcw > 0 || dce > 0 || error == 0,
825	    ("Geom provider %s::%s failed closing ->access()",
826	    pp->geom->class->name, pp->name));
827	if (!error) {
828		/*
829		 * If we open first write, spoil any partner consumers.
830		 * If we close last write and provider is not errored,
831		 * trigger re-taste.
832		 */
833		if (pp->acw == 0 && dcw != 0)
834			g_spoil(pp, cp);
835		else if (pp->acw != 0 && pp->acw == -dcw && pp->error == 0 &&
836		    !(pp->geom->flags & G_GEOM_WITHER))
837			g_post_event(g_new_provider_event, pp, M_WAITOK,
838			    pp, NULL);
839
840		pp->acr += dcr;
841		pp->acw += dcw;
842		pp->ace += dce;
843		cp->acr += dcr;
844		cp->acw += dcw;
845		cp->ace += dce;
846		if (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)
847			KASSERT(pp->sectorsize > 0,
848			    ("Provider %s lacks sectorsize", pp->name));
849	}
850	return (error);
851}
852
853int
854g_handleattr_int(struct bio *bp, const char *attribute, int val)
855{
856
857	return (g_handleattr(bp, attribute, &val, sizeof val));
858}
859
860int
861g_handleattr_off_t(struct bio *bp, const char *attribute, off_t val)
862{
863
864	return (g_handleattr(bp, attribute, &val, sizeof val));
865}
866
867int
868g_handleattr_str(struct bio *bp, const char *attribute, const char *str)
869{
870
871	return (g_handleattr(bp, attribute, str, 0));
872}
873
874int
875g_handleattr(struct bio *bp, const char *attribute, const void *val, int len)
876{
877	int error = 0;
878
879	if (strcmp(bp->bio_attribute, attribute))
880		return (0);
881	if (len == 0) {
882		bzero(bp->bio_data, bp->bio_length);
883		if (strlcpy(bp->bio_data, val, bp->bio_length) >=
884		    bp->bio_length) {
885			printf("%s: %s bio_length %jd len %zu -> EFAULT\n",
886			    __func__, bp->bio_to->name,
887			    (intmax_t)bp->bio_length, strlen(val));
888			error = EFAULT;
889		}
890	} else if (bp->bio_length == len) {
891		bcopy(val, bp->bio_data, len);
892	} else {
893		printf("%s: %s bio_length %jd len %d -> EFAULT\n", __func__,
894		    bp->bio_to->name, (intmax_t)bp->bio_length, len);
895		error = EFAULT;
896	}
897	if (error == 0)
898		bp->bio_completed = bp->bio_length;
899	g_io_deliver(bp, error);
900	return (1);
901}
902
903int
904g_std_access(struct g_provider *pp,
905	int dr __unused, int dw __unused, int de __unused)
906{
907
908	g_topology_assert();
909	G_VALID_PROVIDER(pp);
910        return (0);
911}
912
913void
914g_std_done(struct bio *bp)
915{
916	struct bio *bp2;
917
918	bp2 = bp->bio_parent;
919	if (bp2->bio_error == 0)
920		bp2->bio_error = bp->bio_error;
921	bp2->bio_completed += bp->bio_completed;
922	g_destroy_bio(bp);
923	bp2->bio_inbed++;
924	if (bp2->bio_children == bp2->bio_inbed)
925		g_io_deliver(bp2, bp2->bio_error);
926}
927
928/* XXX: maybe this is only g_slice_spoiled */
929
930void
931g_std_spoiled(struct g_consumer *cp)
932{
933	struct g_geom *gp;
934	struct g_provider *pp;
935
936	g_topology_assert();
937	G_VALID_CONSUMER(cp);
938	g_trace(G_T_TOPOLOGY, "g_std_spoiled(%p)", cp);
939	g_detach(cp);
940	gp = cp->geom;
941	LIST_FOREACH(pp, &gp->provider, provider)
942		g_orphan_provider(pp, ENXIO);
943	g_destroy_consumer(cp);
944	if (LIST_EMPTY(&gp->provider) && LIST_EMPTY(&gp->consumer))
945		g_destroy_geom(gp);
946	else
947		gp->flags |= G_GEOM_WITHER;
948}
949
950/*
951 * Spoiling happens when a provider is opened for writing, but consumers
952 * which are configured by in-band data are attached (slicers for instance).
953 * Since the write might potentially change the in-band data, such consumers
954 * need to re-evaluate their existence after the writing session closes.
955 * We do this by (offering to) tear them down when the open for write happens
956 * in return for a re-taste when it closes again.
957 * Together with the fact that such consumers grab an 'e' bit whenever they
958 * are open, regardless of mode, this ends up DTRT.
959 */
960
961static void
962g_spoil_event(void *arg, int flag)
963{
964	struct g_provider *pp;
965	struct g_consumer *cp, *cp2;
966
967	g_topology_assert();
968	if (flag == EV_CANCEL)
969		return;
970	pp = arg;
971	G_VALID_PROVIDER(pp);
972	for (cp = LIST_FIRST(&pp->consumers); cp != NULL; cp = cp2) {
973		cp2 = LIST_NEXT(cp, consumers);
974		if (!cp->spoiled)
975			continue;
976		cp->spoiled = 0;
977		if (cp->geom->spoiled == NULL)
978			continue;
979		cp->geom->spoiled(cp);
980		g_topology_assert();
981	}
982}
983
984void
985g_spoil(struct g_provider *pp, struct g_consumer *cp)
986{
987	struct g_consumer *cp2;
988
989	g_topology_assert();
990	G_VALID_PROVIDER(pp);
991	G_VALID_CONSUMER(cp);
992
993	LIST_FOREACH(cp2, &pp->consumers, consumers) {
994		if (cp2 == cp)
995			continue;
996/*
997		KASSERT(cp2->acr == 0, ("spoiling cp->acr = %d", cp2->acr));
998		KASSERT(cp2->acw == 0, ("spoiling cp->acw = %d", cp2->acw));
999*/
1000		KASSERT(cp2->ace == 0, ("spoiling cp->ace = %d", cp2->ace));
1001		cp2->spoiled++;
1002	}
1003	g_post_event(g_spoil_event, pp, M_WAITOK, pp, NULL);
1004}
1005
1006int
1007g_getattr__(const char *attr, struct g_consumer *cp, void *var, int len)
1008{
1009	int error, i;
1010
1011	i = len;
1012	error = g_io_getattr(attr, cp, &i, var);
1013	if (error)
1014		return (error);
1015	if (i != len)
1016		return (EINVAL);
1017	return (0);
1018}
1019
1020static int
1021g_get_device_prefix_len(const char *name)
1022{
1023	int len;
1024
1025	if (strncmp(name, "ada", 3) == 0)
1026		len = 3;
1027	else if (strncmp(name, "ad", 2) == 0)
1028		len = 2;
1029	else
1030		return (0);
1031	if (name[len] < '0' || name[len] > '9')
1032		return (0);
1033	do {
1034		len++;
1035	} while (name[len] >= '0' && name[len] <= '9');
1036	return (len);
1037}
1038
1039int
1040g_compare_names(const char *namea, const char *nameb)
1041{
1042	int deva, devb;
1043
1044	if (strcmp(namea, nameb) == 0)
1045		return (1);
1046	deva = g_get_device_prefix_len(namea);
1047	if (deva == 0)
1048		return (0);
1049	devb = g_get_device_prefix_len(nameb);
1050	if (devb == 0)
1051		return (0);
1052	if (strcmp(namea + deva, nameb + devb) == 0)
1053		return (1);
1054	return (0);
1055}
1056
1057#if defined(DIAGNOSTIC) || defined(DDB)
1058/*
1059 * This function walks the mesh and returns a non-zero integer if it
1060 * finds the argument pointer is an object. The return value indicates
1061 * which type of object it is believed to be. If topology is not locked,
1062 * this function is potentially dangerous, but we don't assert that the
1063 * topology lock is held when called from debugger.
1064 */
1065int
1066g_valid_obj(void const *ptr)
1067{
1068	struct g_class *mp;
1069	struct g_geom *gp;
1070	struct g_consumer *cp;
1071	struct g_provider *pp;
1072
1073#ifdef KDB
1074	if (kdb_active == 0)
1075#endif
1076		g_topology_assert();
1077
1078	LIST_FOREACH(mp, &g_classes, class) {
1079		if (ptr == mp)
1080			return (1);
1081		LIST_FOREACH(gp, &mp->geom, geom) {
1082			if (ptr == gp)
1083				return (2);
1084			LIST_FOREACH(cp, &gp->consumer, consumer)
1085				if (ptr == cp)
1086					return (3);
1087			LIST_FOREACH(pp, &gp->provider, provider)
1088				if (ptr == pp)
1089					return (4);
1090		}
1091	}
1092	return(0);
1093}
1094#endif
1095
1096#ifdef DDB
1097
1098#define	gprintf(...)	do {						\
1099	db_printf("%*s", indent, "");					\
1100	db_printf(__VA_ARGS__);						\
1101} while (0)
1102#define	gprintln(...)	do {						\
1103	gprintf(__VA_ARGS__);						\
1104	db_printf("\n");						\
1105} while (0)
1106
1107#define	ADDFLAG(obj, flag, sflag)	do {				\
1108	if ((obj)->flags & (flag)) {					\
1109		if (comma)						\
1110			strlcat(str, ",", size);			\
1111		strlcat(str, (sflag), size);				\
1112		comma = 1;						\
1113	}								\
1114} while (0)
1115
1116static char *
1117provider_flags_to_string(struct g_provider *pp, char *str, size_t size)
1118{
1119	int comma = 0;
1120
1121	bzero(str, size);
1122	if (pp->flags == 0) {
1123		strlcpy(str, "NONE", size);
1124		return (str);
1125	}
1126	ADDFLAG(pp, G_PF_CANDELETE, "G_PF_CANDELETE");
1127	ADDFLAG(pp, G_PF_WITHER, "G_PF_WITHER");
1128	ADDFLAG(pp, G_PF_ORPHAN, "G_PF_ORPHAN");
1129	return (str);
1130}
1131
1132static char *
1133geom_flags_to_string(struct g_geom *gp, char *str, size_t size)
1134{
1135	int comma = 0;
1136
1137	bzero(str, size);
1138	if (gp->flags == 0) {
1139		strlcpy(str, "NONE", size);
1140		return (str);
1141	}
1142	ADDFLAG(gp, G_GEOM_WITHER, "G_GEOM_WITHER");
1143	return (str);
1144}
1145static void
1146db_show_geom_consumer(int indent, struct g_consumer *cp)
1147{
1148
1149	if (indent == 0) {
1150		gprintln("consumer: %p", cp);
1151		gprintln("  class:    %s (%p)", cp->geom->class->name,
1152		    cp->geom->class);
1153		gprintln("  geom:     %s (%p)", cp->geom->name, cp->geom);
1154		if (cp->provider == NULL)
1155			gprintln("  provider: none");
1156		else {
1157			gprintln("  provider: %s (%p)", cp->provider->name,
1158			    cp->provider);
1159		}
1160		gprintln("  access:   r%dw%de%d", cp->acr, cp->acw, cp->ace);
1161		gprintln("  spoiled:  %d", cp->spoiled);
1162		gprintln("  nstart:   %u", cp->nstart);
1163		gprintln("  nend:     %u", cp->nend);
1164	} else {
1165		gprintf("consumer: %p (%s), access=r%dw%de%d", cp,
1166		    cp->provider != NULL ? cp->provider->name : "none",
1167		    cp->acr, cp->acw, cp->ace);
1168		if (cp->spoiled)
1169			db_printf(", spoiled=%d", cp->spoiled);
1170		db_printf("\n");
1171	}
1172}
1173
1174static void
1175db_show_geom_provider(int indent, struct g_provider *pp)
1176{
1177	struct g_consumer *cp;
1178	char flags[64];
1179
1180	if (indent == 0) {
1181		gprintln("provider: %s (%p)", pp->name, pp);
1182		gprintln("  class:        %s (%p)", pp->geom->class->name,
1183		    pp->geom->class);
1184		gprintln("  geom:         %s (%p)", pp->geom->name, pp->geom);
1185		gprintln("  mediasize:    %jd", (intmax_t)pp->mediasize);
1186		gprintln("  sectorsize:   %u", pp->sectorsize);
1187		gprintln("  stripesize:   %u", pp->stripesize);
1188		gprintln("  stripeoffset: %u", pp->stripeoffset);
1189		gprintln("  access:       r%dw%de%d", pp->acr, pp->acw,
1190		    pp->ace);
1191		gprintln("  flags:        %s (0x%04x)",
1192		    provider_flags_to_string(pp, flags, sizeof(flags)),
1193		    pp->flags);
1194		gprintln("  error:        %d", pp->error);
1195		gprintln("  nstart:       %u", pp->nstart);
1196		gprintln("  nend:         %u", pp->nend);
1197		if (LIST_EMPTY(&pp->consumers))
1198			gprintln("  consumers:    none");
1199	} else {
1200		gprintf("provider: %s (%p), access=r%dw%de%d",
1201		    pp->name, pp, pp->acr, pp->acw, pp->ace);
1202		if (pp->flags != 0) {
1203			db_printf(", flags=%s (0x%04x)",
1204			    provider_flags_to_string(pp, flags, sizeof(flags)),
1205			    pp->flags);
1206		}
1207		db_printf("\n");
1208	}
1209	if (!LIST_EMPTY(&pp->consumers)) {
1210		LIST_FOREACH(cp, &pp->consumers, consumers) {
1211			db_show_geom_consumer(indent + 2, cp);
1212			if (db_pager_quit)
1213				break;
1214		}
1215	}
1216}
1217
1218static void
1219db_show_geom_geom(int indent, struct g_geom *gp)
1220{
1221	struct g_provider *pp;
1222	struct g_consumer *cp;
1223	char flags[64];
1224
1225	if (indent == 0) {
1226		gprintln("geom: %s (%p)", gp->name, gp);
1227		gprintln("  class:     %s (%p)", gp->class->name, gp->class);
1228		gprintln("  flags:     %s (0x%04x)",
1229		    geom_flags_to_string(gp, flags, sizeof(flags)), gp->flags);
1230		gprintln("  rank:      %d", gp->rank);
1231		if (LIST_EMPTY(&gp->provider))
1232			gprintln("  providers: none");
1233		if (LIST_EMPTY(&gp->consumer))
1234			gprintln("  consumers: none");
1235	} else {
1236		gprintf("geom: %s (%p), rank=%d", gp->name, gp, gp->rank);
1237		if (gp->flags != 0) {
1238			db_printf(", flags=%s (0x%04x)",
1239			    geom_flags_to_string(gp, flags, sizeof(flags)),
1240			    gp->flags);
1241		}
1242		db_printf("\n");
1243	}
1244	if (!LIST_EMPTY(&gp->provider)) {
1245		LIST_FOREACH(pp, &gp->provider, provider) {
1246			db_show_geom_provider(indent + 2, pp);
1247			if (db_pager_quit)
1248				break;
1249		}
1250	}
1251	if (!LIST_EMPTY(&gp->consumer)) {
1252		LIST_FOREACH(cp, &gp->consumer, consumer) {
1253			db_show_geom_consumer(indent + 2, cp);
1254			if (db_pager_quit)
1255				break;
1256		}
1257	}
1258}
1259
1260static void
1261db_show_geom_class(struct g_class *mp)
1262{
1263	struct g_geom *gp;
1264
1265	db_printf("class: %s (%p)\n", mp->name, mp);
1266	LIST_FOREACH(gp, &mp->geom, geom) {
1267		db_show_geom_geom(2, gp);
1268		if (db_pager_quit)
1269			break;
1270	}
1271}
1272
1273/*
1274 * Print the GEOM topology or the given object.
1275 */
1276DB_SHOW_COMMAND(geom, db_show_geom)
1277{
1278	struct g_class *mp;
1279
1280	if (!have_addr) {
1281		/* No address given, print the entire topology. */
1282		LIST_FOREACH(mp, &g_classes, class) {
1283			db_show_geom_class(mp);
1284			db_printf("\n");
1285			if (db_pager_quit)
1286				break;
1287		}
1288	} else {
1289		switch (g_valid_obj((void *)addr)) {
1290		case 1:
1291			db_show_geom_class((struct g_class *)addr);
1292			break;
1293		case 2:
1294			db_show_geom_geom(0, (struct g_geom *)addr);
1295			break;
1296		case 3:
1297			db_show_geom_consumer(0, (struct g_consumer *)addr);
1298			break;
1299		case 4:
1300			db_show_geom_provider(0, (struct g_provider *)addr);
1301			break;
1302		default:
1303			db_printf("Not a GEOM object.\n");
1304			break;
1305		}
1306	}
1307}
1308
1309static void
1310db_print_bio_cmd(struct bio *bp)
1311{
1312	db_printf("  cmd: ");
1313	switch (bp->bio_cmd) {
1314	case BIO_READ: db_printf("BIO_READ"); break;
1315	case BIO_WRITE: db_printf("BIO_WRITE"); break;
1316	case BIO_DELETE: db_printf("BIO_DELETE"); break;
1317	case BIO_GETATTR: db_printf("BIO_GETATTR"); break;
1318	case BIO_FLUSH: db_printf("BIO_FLUSH"); break;
1319	case BIO_CMD0: db_printf("BIO_CMD0"); break;
1320	case BIO_CMD1: db_printf("BIO_CMD1"); break;
1321	case BIO_CMD2: db_printf("BIO_CMD2"); break;
1322	default: db_printf("UNKNOWN"); break;
1323	}
1324	db_printf("\n");
1325}
1326
1327static void
1328db_print_bio_flags(struct bio *bp)
1329{
1330	int comma;
1331
1332	comma = 0;
1333	db_printf("  flags: ");
1334	if (bp->bio_flags & BIO_ERROR) {
1335		db_printf("BIO_ERROR");
1336		comma = 1;
1337	}
1338	if (bp->bio_flags & BIO_DONE) {
1339		db_printf("%sBIO_DONE", (comma ? ", " : ""));
1340		comma = 1;
1341	}
1342	if (bp->bio_flags & BIO_ONQUEUE)
1343		db_printf("%sBIO_ONQUEUE", (comma ? ", " : ""));
1344	db_printf("\n");
1345}
1346
1347/*
1348 * Print useful information in a BIO
1349 */
1350DB_SHOW_COMMAND(bio, db_show_bio)
1351{
1352	struct bio *bp;
1353
1354	if (have_addr) {
1355		bp = (struct bio *)addr;
1356		db_printf("BIO %p\n", bp);
1357		db_print_bio_cmd(bp);
1358		db_print_bio_flags(bp);
1359		db_printf("  cflags: 0x%hhx\n", bp->bio_cflags);
1360		db_printf("  pflags: 0x%hhx\n", bp->bio_pflags);
1361		db_printf("  offset: %jd\n", (intmax_t)bp->bio_offset);
1362		db_printf("  length: %jd\n", (intmax_t)bp->bio_length);
1363		db_printf("  bcount: %ld\n", bp->bio_bcount);
1364		db_printf("  resid: %ld\n", bp->bio_resid);
1365		db_printf("  completed: %jd\n", (intmax_t)bp->bio_completed);
1366		db_printf("  children: %u\n", bp->bio_children);
1367		db_printf("  inbed: %u\n", bp->bio_inbed);
1368		db_printf("  error: %d\n", bp->bio_error);
1369		db_printf("  parent: %p\n", bp->bio_parent);
1370		db_printf("  driver1: %p\n", bp->bio_driver1);
1371		db_printf("  driver2: %p\n", bp->bio_driver2);
1372		db_printf("  caller1: %p\n", bp->bio_caller1);
1373		db_printf("  caller2: %p\n", bp->bio_caller2);
1374		db_printf("  bio_from: %p\n", bp->bio_from);
1375		db_printf("  bio_to: %p\n", bp->bio_to);
1376	}
1377}
1378
1379#undef	gprintf
1380#undef	gprintln
1381#undef	ADDFLAG
1382
1383#endif	/* DDB */
1384