geom_subr.c revision 94284
1/*-
2 * Copyright (c) 2002 Poul-Henning Kamp
3 * Copyright (c) 2002 Networks Associates Technology, Inc.
4 * All rights reserved.
5 *
6 * This software was developed for the FreeBSD Project by Poul-Henning Kamp
7 * and NAI Labs, the Security Research Division of Network Associates, Inc.
8 * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
9 * DARPA CHATS research program.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. The names of the authors may not be used to endorse or promote
20 *    products derived from this software without specific prior written
21 *    permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 * $FreeBSD: head/sys/geom/geom_subr.c 94284 2002-04-09 15:13:42Z phk $
36 */
37
38
39#include <sys/param.h>
40#ifndef _KERNEL
41#include <stdio.h>
42#include <unistd.h>
43#include <stdlib.h>
44#include <signal.h>
45#include <string.h>
46#include <err.h>
47#else
48#include <sys/systm.h>
49#include <sys/kernel.h>
50#include <sys/malloc.h>
51#include <sys/bio.h>
52#include <sys/sysctl.h>
53#include <sys/proc.h>
54#include <sys/kthread.h>
55#include <sys/lock.h>
56#include <sys/mutex.h>
57#endif
58#include <sys/errno.h>
59#include <sys/sbuf.h>
60#include <geom/geom.h>
61#include <geom/geom_int.h>
62#include <machine/stdarg.h>
63
64struct class_list_head g_classes = LIST_HEAD_INITIALIZER(g_classes);
65static struct g_tailq_head geoms = TAILQ_HEAD_INITIALIZER(geoms);
66static int g_nproviders;
67char *g_wait_event, *g_wait_up, *g_wait_down, *g_wait_sim;
68
69static int g_ignition;
70
71void
72g_add_class(struct g_class *mp)
73{
74
75	if (!g_ignition) {
76		g_ignition++;
77		g_init();
78	}
79	g_topology_lock();
80	g_trace(G_T_TOPOLOGY, "g_add_class(%s)", mp->name);
81	LIST_INIT(&mp->geom);
82	LIST_INSERT_HEAD(&g_classes, mp, class);
83	if (g_nproviders > 0)
84		g_post_event(EV_NEW_CLASS, mp, NULL, NULL, NULL);
85	g_topology_unlock();
86}
87
88struct g_geom *
89g_new_geomf(struct g_class *mp, char *fmt, ...)
90{
91	struct g_geom *gp;
92	va_list ap;
93	struct sbuf *sb;
94
95	g_topology_assert();
96	va_start(ap, fmt);
97	mtx_lock(&Giant);
98	sb = sbuf_new(NULL, NULL, 0, SBUF_AUTOEXTEND);
99	sbuf_vprintf(sb, fmt, ap);
100	sbuf_finish(sb);
101	mtx_unlock(&Giant);
102	gp = g_malloc(sizeof *gp + sbuf_len(sb) + 1, M_WAITOK | M_ZERO);
103	gp->name = (char *)(gp + 1);
104	gp->class = mp;
105	gp->rank = 1;
106	LIST_INIT(&gp->consumer);
107	LIST_INIT(&gp->provider);
108	LIST_INSERT_HEAD(&mp->geom, gp, geom);
109	TAILQ_INSERT_HEAD(&geoms, gp, geoms);
110	strcpy(gp->name, sbuf_data(sb));
111	sbuf_delete(sb);
112	return (gp);
113}
114
115void
116g_destroy_geom(struct g_geom *gp)
117{
118
119	g_trace(G_T_TOPOLOGY, "g_destroy_geom(%p(%s))", gp, gp->name);
120	g_topology_assert();
121	KASSERT(gp->event == NULL, ("g_destroy_geom() with event"));
122	KASSERT(LIST_EMPTY(&gp->consumer),
123	    ("g_destroy_geom(%s) with consumer(s) [%p]",
124	    gp->name, LIST_FIRST(&gp->consumer)));
125	KASSERT(LIST_EMPTY(&gp->provider),
126	    ("g_destroy_geom(%s) with provider(s) [%p]",
127	    gp->name, LIST_FIRST(&gp->consumer)));
128	LIST_REMOVE(gp, geom);
129	TAILQ_REMOVE(&geoms, gp, geoms);
130	g_free(gp);
131}
132
133struct g_consumer *
134g_new_consumer(struct g_geom *gp)
135{
136	struct g_consumer *cp;
137
138	g_topology_assert();
139	KASSERT(gp->orphan != NULL,
140	    ("g_new_consumer on geom(%s) (class %s) without orphan",
141	    gp->name, gp->class->name));
142
143	cp = g_malloc(sizeof *cp, M_WAITOK | M_ZERO);
144	cp->geom = gp;
145	LIST_INSERT_HEAD(&gp->consumer, cp, consumer);
146	return(cp);
147}
148
149void
150g_destroy_consumer(struct g_consumer *cp)
151{
152
153	g_trace(G_T_TOPOLOGY, "g_destroy_consumer(%p)", cp);
154	g_topology_assert();
155	KASSERT(cp->event == NULL, ("g_destroy_consumer() with event"));
156	KASSERT (cp->provider == NULL, ("g_destroy_consumer but attached"));
157	KASSERT (cp->acr == 0, ("g_destroy_consumer with acr"));
158	KASSERT (cp->acw == 0, ("g_destroy_consumer with acw"));
159	KASSERT (cp->ace == 0, ("g_destroy_consumer with ace"));
160	LIST_REMOVE(cp, consumer);
161	g_free(cp);
162}
163
164struct g_provider *
165g_new_providerf(struct g_geom *gp, char *fmt, ...)
166{
167	struct g_provider *pp;
168	struct sbuf *sb;
169	va_list ap;
170
171	g_topology_assert();
172	va_start(ap, fmt);
173	mtx_lock(&Giant);
174	sb = sbuf_new(NULL, NULL, 0, SBUF_AUTOEXTEND);
175	sbuf_vprintf(sb, fmt, ap);
176	sbuf_finish(sb);
177	mtx_unlock(&Giant);
178	pp = g_malloc(sizeof *pp + sbuf_len(sb) + 1, M_WAITOK | M_ZERO);
179	pp->name = (char *)(pp + 1);
180	strcpy(pp->name, sbuf_data(sb));
181	sbuf_delete(sb);
182	LIST_INIT(&pp->consumers);
183	pp->error = ENXIO;
184	pp->geom = gp;
185	LIST_INSERT_HEAD(&gp->provider, pp, provider);
186	g_nproviders++;
187	g_post_event(EV_NEW_PROVIDER, NULL, NULL, pp, NULL);
188	return (pp);
189}
190
191void
192g_error_provider(struct g_provider *pp, int error)
193{
194
195	pp->error = error;
196}
197
198
199void
200g_destroy_provider(struct g_provider *pp)
201{
202	struct g_geom *gp;
203	struct g_consumer *cp;
204
205	g_topology_assert();
206	KASSERT(pp->event == NULL, ("g_destroy_provider() with event"));
207	KASSERT(LIST_EMPTY(&pp->consumers),
208	    ("g_destroy_provider but attached"));
209	KASSERT (pp->acr == 0, ("g_destroy_provider with acr"));
210	KASSERT (pp->acw == 0, ("g_destroy_provider with acw"));
211	KASSERT (pp->acw == 0, ("g_destroy_provider with ace"));
212	g_nproviders--;
213	LIST_REMOVE(pp, provider);
214	gp = pp->geom;
215	g_free(pp);
216	if (!(gp->flags & G_GEOM_WITHER))
217		return;
218	if (!LIST_EMPTY(&gp->provider))
219		return;
220	for (;;) {
221		cp = LIST_FIRST(&gp->consumer);
222		if (cp == NULL)
223			break;
224		g_dettach(cp);
225		g_destroy_consumer(cp);
226	}
227	g_destroy_geom(gp);
228}
229
230/*
231 * We keep the "geoms" list sorted by topological order (== increasing
232 * numerical rank) at all times.
233 * When an attach is done, the attaching geoms rank is invalidated
234 * and it is moved to the tail of the list.
235 * All geoms later in the sequence has their ranks reevaluated in
236 * sequence.  If we cannot assign rank to a geom because it's
237 * prerequisites do not have rank, we move that element to the tail
238 * of the sequence with invalid rank as well.
239 * At some point we encounter our original geom and if we stil fail
240 * to assign it a rank, there must be a loop and we fail back to
241 * g_attach() which dettach again and calls redo_rank again
242 * to fix up the damage.
243 * It would be much simpler code wise to do it recursively, but we
244 * can't risk that on the kernel stack.
245 */
246
247static int
248redo_rank(struct g_geom *gp)
249{
250	struct g_consumer *cp;
251	struct g_geom *gp1, *gp2;
252	int n, m;
253
254	g_topology_assert();
255
256	/* Invalidate this geoms rank and move it to the tail */
257	gp1 = TAILQ_NEXT(gp, geoms);
258	if (gp1 != NULL) {
259		gp->rank = 0;
260		TAILQ_REMOVE(&geoms, gp, geoms);
261		TAILQ_INSERT_TAIL(&geoms, gp, geoms);
262	} else {
263		gp1 = gp;
264	}
265
266	/* re-rank the rest of the sequence */
267	for (; gp1 != NULL; gp1 = gp2) {
268		gp1->rank = 0;
269		m = 1;
270		LIST_FOREACH(cp, &gp1->consumer, consumer) {
271			if (cp->provider == NULL)
272				continue;
273			n = cp->provider->geom->rank;
274			if (n == 0) {
275				m = 0;
276				break;
277			} else if (n >= m)
278				m = n + 1;
279		}
280		gp1->rank = m;
281		gp2 = TAILQ_NEXT(gp1, geoms);
282
283		/* got a rank, moving on */
284		if (m != 0)
285			continue;
286
287		/* no rank to original geom means loop */
288		if (gp == gp1) {
289			return (ELOOP);
290
291		/* no rank, put it at the end move on */
292		TAILQ_REMOVE(&geoms, gp1, geoms);
293		TAILQ_INSERT_TAIL(&geoms, gp1, geoms);
294		}
295	}
296	return (0);
297}
298
299int
300g_attach(struct g_consumer *cp, struct g_provider *pp)
301{
302	int error;
303
304	g_topology_assert();
305	KASSERT(cp->provider == NULL, ("attach but attached"));
306	cp->provider = pp;
307	LIST_INSERT_HEAD(&pp->consumers, cp, consumers);
308	error = redo_rank(cp->geom);
309	if (error) {
310		LIST_REMOVE(cp, consumers);
311		cp->provider = NULL;
312		redo_rank(cp->geom);
313	}
314	return (error);
315}
316
317void
318g_dettach(struct g_consumer *cp)
319{
320	struct g_provider *pp;
321
322	g_trace(G_T_TOPOLOGY, "g_dettach(%p)", cp);
323	KASSERT(cp != (void*)0xd0d0d0d0, ("ARGH!"));
324	g_topology_assert();
325	KASSERT(cp->provider != NULL, ("dettach but not attached"));
326	KASSERT(cp->acr == 0, ("dettach but nonzero acr"));
327	KASSERT(cp->acw == 0, ("dettach but nonzero acw"));
328	KASSERT(cp->ace == 0, ("dettach but nonzero ace"));
329	KASSERT(cp->biocount == 0, ("dettach but nonzero biocount"));
330	pp = cp->provider;
331	LIST_REMOVE(cp, consumers);
332	cp->provider = NULL;
333	if (LIST_EMPTY(&pp->consumers)) {
334		if (pp->geom->flags & G_GEOM_WITHER)
335			g_destroy_provider(pp);
336	}
337	redo_rank(cp->geom);
338}
339
340
341/*
342 * g_access_abs()
343 *
344 * Access-check with absolute new values:  Just fall through
345 * and use the relative version.
346 */
347int
348g_access_abs(struct g_consumer *cp, int acr, int acw, int ace)
349{
350
351	g_topology_assert();
352	return(g_access_rel(cp,
353		acr - cp->acr,
354		acw - cp->acw,
355		ace - cp->ace));
356}
357
358/*
359 * g_access_rel()
360 *
361 * Access-check with delta values.  The question asked is "can provider
362 * "cp" change the access counters by the relative amounts dc[rwe] ?"
363 */
364
365int
366g_access_rel(struct g_consumer *cp, int dcr, int dcw, int dce)
367{
368	struct g_provider *pp;
369	int pr,pw,pe;
370	int error;
371
372	pp = cp->provider;
373
374	g_trace(G_T_ACCESS, "g_access_rel(%p(%s), %d, %d, %d)",
375	    cp, pp->name, dcr, dcw, dce);
376
377	g_topology_assert();
378	KASSERT(cp->provider != NULL, ("access but not attached"));
379	KASSERT(cp->acr + dcr >= 0, ("access resulting in negative acr"));
380	KASSERT(cp->acw + dcw >= 0, ("access resulting in negative acw"));
381	KASSERT(cp->ace + dce >= 0, ("access resulting in negative ace"));
382	KASSERT(pp->geom->access != NULL, ("NULL geom->access"));
383
384	/*
385	 * If our class cares about being spoiled, and we have been, we
386	 * are probably just ahead of the event telling us that.  Fail
387	 * now rather than having to unravel this later.
388	 */
389	if (cp->geom->spoiled != NULL && cp->spoiled) {
390		KASSERT(dcr >= 0, ("spoiled but dcr = %d", dcr));
391		KASSERT(dcw >= 0, ("spoiled but dce = %d", dcw));
392		KASSERT(dce >= 0, ("spoiled but dcw = %d", dce));
393		KASSERT(cp->acr == 0, ("spoiled but cp->acr = %d", cp->acr));
394		KASSERT(cp->acw == 0, ("spoiled but cp->acw = %d", cp->acw));
395		KASSERT(cp->ace == 0, ("spoiled but cp->ace = %d", cp->ace));
396		return(ENXIO);
397	}
398
399	/*
400	 * Figure out what counts the provider would have had, if this
401	 * consumer had (r0w0e0) at this time.
402	 */
403	pr = pp->acr - cp->acr;
404	pw = pp->acw - cp->acw;
405	pe = pp->ace - cp->ace;
406
407	g_trace(G_T_ACCESS,
408    "open delta:[r%dw%de%d] old:[r%dw%de%d] provider:[r%dw%de%d] %p(%s)",
409	    dcr, dcw, dce,
410	    cp->acr, cp->acw, cp->ace,
411	    pp->acr, pp->acw, pp->ace,
412	    pp, pp->name);
413
414	/* If we try exclusive but already write: fail */
415	if (dce > 0 && pw > 0)
416		return (EPERM);
417	/* If we try write but already exclusive: fail */
418	if (dcw > 0 && pe > 0)
419		return (EPERM);
420	/* If we try to open more but provider is error'ed: fail */
421	if ((dcr > 0 || dcw > 0 || dce > 0) && pp->error != 0)
422		return (pp->error);
423
424	/* Ok then... */
425
426	/*
427	 * If we open first write, spoil any partner consumers.
428	 * If we close last write, trigger re-taste.
429	 */
430	if (pp->acw == 0 && dcw != 0)
431		g_spoil(pp, cp);
432	else if (pp->acw != 0 && pp->acw == -dcw && !(pp->geom->flags & G_GEOM_WITHER))
433		g_post_event(EV_NEW_PROVIDER, NULL, NULL, pp, NULL);
434
435	error = pp->geom->access(pp, dcr, dcw, dce);
436	if (!error) {
437		pp->acr += dcr;
438		pp->acw += dcw;
439		pp->ace += dce;
440		cp->acr += dcr;
441		cp->acw += dcw;
442		cp->ace += dce;
443	}
444	return (error);
445}
446
447int
448g_haveattr_int(struct bio *bp, char *attribute, int val)
449{
450
451	return (g_haveattr(bp, attribute, &val, sizeof val));
452}
453
454int
455g_haveattr_off_t(struct bio *bp, char *attribute, off_t val)
456{
457
458	return (g_haveattr(bp, attribute, &val, sizeof val));
459}
460
461
462int
463g_haveattr(struct bio *bp, char *attribute, void *val, int len)
464{
465	int error;
466
467	if (strcmp(bp->bio_attribute, attribute))
468		return (0);
469	if (bp->bio_length != len) {
470		printf("bio_length %lld len %d -> EFAULT\n",
471		    (long long)bp->bio_length, len);
472		error = EFAULT;
473	} else {
474		error = 0;
475		bcopy(val, bp->bio_data, len);
476		bp->bio_completed = len;
477	}
478	bp->bio_error = error;
479	g_io_deliver(bp);
480	return (1);
481}
482
483int
484g_std_access(struct g_provider *pp __unused,
485	int dr __unused, int dw __unused, int de __unused)
486{
487
488        return (0);
489}
490
491void
492g_std_done(struct bio *bp)
493{
494	struct bio *bp2;
495
496	bp2 = bp->bio_linkage;
497	bp2->bio_error = bp->bio_error;
498	bp2->bio_completed = bp->bio_completed;
499	g_destroy_bio(bp);
500	g_io_deliver(bp2);
501}
502
503/* XXX: maybe this is only g_slice_spoiled */
504
505void
506g_std_spoiled(struct g_consumer *cp)
507{
508	struct g_geom *gp;
509	struct g_provider *pp;
510
511	g_trace(G_T_TOPOLOGY, "g_std_spoiled(%p)", cp);
512	g_topology_assert();
513	g_dettach(cp);
514	gp = cp->geom;
515	LIST_FOREACH(pp, &gp->provider, provider)
516		g_orphan_provider(pp, ENXIO);
517	g_destroy_consumer(cp);
518	if (LIST_EMPTY(&gp->provider) && LIST_EMPTY(&gp->consumer))
519		g_destroy_geom(gp);
520	else
521		gp->flags |= G_GEOM_WITHER;
522}
523
524/*
525 * Spoiling happens when a provider is opened for writing, but consumers
526 * which are configured by in-band data are attached (slicers for instance).
527 * Since the write might potentially change the in-band data, such consumers
528 * need to re-evaluate their existence after the writing session closes.
529 * We do this by (offering to) tear them down when the open for write happens
530 * in return for a re-taste when it closes again.
531 * Together with the fact that such consumers grab an 'e' bit whenever they
532 * are open, regardless of mode, this ends up DTRT.
533 */
534
535void
536g_spoil(struct g_provider *pp, struct g_consumer *cp)
537{
538	struct g_consumer *cp2;
539
540	g_topology_assert();
541
542	LIST_FOREACH(cp2, &pp->consumers, consumers) {
543		if (cp2 == cp)
544			continue;
545/*
546		KASSERT(cp2->acr == 0, ("spoiling cp->acr = %d", cp2->acr));
547		KASSERT(cp2->acw == 0, ("spoiling cp->acw = %d", cp2->acw));
548*/
549		KASSERT(cp2->ace == 0, ("spoiling cp->ace = %d", cp2->ace));
550		cp2->spoiled++;
551	}
552	g_post_event(EV_SPOILED, NULL, NULL, pp, cp);
553}
554
555static struct g_class *
556g_class_by_name(char *name)
557{
558	struct g_class *mp;
559
560	g_trace(G_T_TOPOLOGY, "g_class_by_name(%s)", name);
561	g_topology_assert();
562	LIST_FOREACH(mp, &g_classes, class)
563		if (!strcmp(mp->name, name))
564			return (mp);
565	return (NULL);
566}
567
568struct g_geom *
569g_create_geomf(char *class, struct g_provider *pp, char *fmt, ...)
570{
571	va_list ap;
572	struct sbuf *sb;
573	char *s;
574	struct g_class *mp;
575	struct g_geom *gp;
576
577	g_trace(G_T_TOPOLOGY, "g_create_geom(%s, %p(%s))", class,
578		pp, pp == NULL ? "" : pp->name);
579	g_topology_assert();
580	gp = NULL;
581	mp = g_class_by_name(class);
582	if (mp == NULL)
583		return (NULL);
584	if (fmt != NULL) {
585		va_start(ap, fmt);
586		mtx_lock(&Giant);
587		sb = sbuf_new(NULL, NULL, 0, SBUF_AUTOEXTEND);
588		sbuf_vprintf(sb, fmt, ap);
589		sbuf_finish(sb);
590		mtx_unlock(&Giant);
591		s = sbuf_data(sb);
592	} else {
593		s = NULL;
594	}
595	if (pp != NULL)
596		gp = mp->taste(mp, pp, G_TF_INSIST);
597	if (gp == NULL && mp->create_geom == NULL)
598		return (NULL);
599	if (gp == NULL)
600		gp = mp->create_geom(mp, pp, s);
601	/* XXX: delete sbuf  */
602	return (gp);
603}
604
605struct g_geom *
606g_insert_geom(char *class, struct g_consumer *cp)
607{
608	struct g_class *mp;
609	struct g_geom *gp;
610	struct g_provider *pp, *pp2;
611	struct g_consumer *cp2;
612	int error;
613
614	g_trace(G_T_TOPOLOGY, "g_insert_geomf(%s, %p)", class, cp);
615	g_topology_assert();
616	KASSERT(cp->provider != NULL, ("g_insert_geomf but not attached"));
617	/* XXX: check for events ?? */
618	mp = g_class_by_name(class);
619	if (mp == NULL)
620		return (NULL);
621	if (mp->create_geom == NULL)
622		return (NULL);
623	pp = cp->provider;
624	gp = mp->taste(mp, pp, G_TF_TRANSPARENT);
625	if (gp == NULL)
626		return (NULL);
627	pp2 = LIST_FIRST(&gp->provider);
628	cp2 = LIST_FIRST(&gp->consumer);
629	cp2->acr += pp->acr;
630	cp2->acw += pp->acw;
631	cp2->ace += pp->ace;
632	pp2->acr += pp->acr;
633	pp2->acw += pp->acw;
634	pp2->ace += pp->ace;
635	LIST_REMOVE(cp, consumers);
636	LIST_INSERT_HEAD(&pp2->consumers, cp, consumers);
637	cp->provider = pp2;
638	error = redo_rank(gp);
639	KASSERT(error == 0, ("redo_rank failed in g_insert_geom"));
640	return (gp);
641}
642
643int
644g_getattr__(const char *attr, struct g_consumer *cp, void *var, int len)
645{
646	int error, i;
647
648	i = len;
649	error = g_io_getattr(attr, cp, &i, var);
650	if (error)
651		return (error);
652	if (i != len)
653		return (EINVAL);
654	return (0);
655}
656