geom_subr.c revision 92108
1/*-
2 * Copyright (c) 2002 Poul-Henning Kamp
3 * Copyright (c) 2002 Networks Associates Technology, Inc.
4 * All rights reserved.
5 *
6 * This software was developed for the FreeBSD Project by Poul-Henning Kamp
7 * and NAI Labs, the Security Research Division of Network Associates, Inc.
8 * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
9 * DARPA CHATS research program.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. The names of the authors may not be used to endorse or promote
20 *    products derived from this software without specific prior written
21 *    permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 * $FreeBSD: head/sys/geom/geom_subr.c 92108 2002-03-11 21:42:35Z phk $
36 */
37
38
39#include <sys/param.h>
40#ifndef _KERNEL
41#include <stdio.h>
42#include <unistd.h>
43#include <stdlib.h>
44#include <signal.h>
45#include <string.h>
46#include <err.h>
47#else
48#include <sys/systm.h>
49#include <sys/kernel.h>
50#include <sys/malloc.h>
51#include <sys/bio.h>
52#include <sys/sysctl.h>
53#include <sys/proc.h>
54#include <sys/kthread.h>
55#include <sys/lock.h>
56#include <sys/mutex.h>
57#endif
58#include <sys/errno.h>
59#include <sys/sbuf.h>
60#include <geom/geom.h>
61#include <machine/stdarg.h>
62
63struct method_list_head g_methods = LIST_HEAD_INITIALIZER(g_methods);
64static struct g_tailq_head geoms = TAILQ_HEAD_INITIALIZER(geoms);
65static int g_nproviders;
66char *g_wait_event, *g_wait_up, *g_wait_down, *g_wait_sim;
67
68static int g_ignition;
69
70void
71g_add_method(struct g_method *mp)
72{
73
74	if (!g_ignition) {
75		g_ignition++;
76		g_init();
77	}
78	g_topology_lock();
79	g_trace(G_T_TOPOLOGY, "g_add_method(%s)", mp->name);
80	LIST_INIT(&mp->geom);
81	LIST_INSERT_HEAD(&g_methods, mp, method);
82	if (g_nproviders > 0)
83		g_post_event(EV_NEW_METHOD, mp, NULL, NULL, NULL);
84	g_topology_unlock();
85}
86
87struct g_geom *
88g_new_geomf(struct g_method *mp, char *fmt, ...)
89{
90	struct g_geom *gp;
91	va_list ap;
92	struct sbuf *sb;
93
94	g_topology_assert();
95	va_start(ap, fmt);
96	mtx_lock(&Giant);
97	sb = sbuf_new(NULL, NULL, 0, SBUF_AUTOEXTEND);
98	sbuf_vprintf(sb, fmt, ap);
99	sbuf_finish(sb);
100	mtx_unlock(&Giant);
101	gp = g_malloc(sizeof *gp + sbuf_len(sb) + 1, M_WAITOK | M_ZERO);
102	gp->name = (char *)(gp + 1);
103	gp->method = mp;
104	gp->rank = 1;
105	LIST_INIT(&gp->consumer);
106	LIST_INIT(&gp->provider);
107	LIST_INSERT_HEAD(&mp->geom, gp, geom);
108	TAILQ_INSERT_HEAD(&geoms, gp, geoms);
109	strcpy(gp->name, sbuf_data(sb));
110	sbuf_delete(sb);
111	return (gp);
112}
113
114void
115g_destroy_geom(struct g_geom *gp)
116{
117
118	g_trace(G_T_TOPOLOGY, "g_destroy_geom(%p(%s))", gp, gp->name);
119	g_topology_assert();
120	KASSERT(gp->event == NULL, ("g_destroy_geom() with event"));
121	KASSERT(LIST_EMPTY(&gp->consumer),
122	    ("g_destroy_geom(%s) with consumer(s) [%p]",
123	    gp->name, LIST_FIRST(&gp->consumer)));
124	KASSERT(LIST_EMPTY(&gp->provider),
125	    ("g_destroy_geom(%s) with provider(s) [%p]",
126	    gp->name, LIST_FIRST(&gp->consumer)));
127	LIST_REMOVE(gp, geom);
128	TAILQ_REMOVE(&geoms, gp, geoms);
129	g_free(gp);
130}
131
132struct g_consumer *
133g_new_consumer(struct g_geom *gp)
134{
135	struct g_consumer *cp;
136
137	g_topology_assert();
138	KASSERT(gp->method->orphan != NULL,
139	    ("g_new_consumer on method(%s) without orphan", gp->method->name));
140
141	cp = g_malloc(sizeof *cp, M_WAITOK | M_ZERO);
142	cp->geom = gp;
143	LIST_INSERT_HEAD(&gp->consumer, cp, consumer);
144	return(cp);
145}
146
147void
148g_destroy_consumer(struct g_consumer *cp)
149{
150
151	g_trace(G_T_TOPOLOGY, "g_destroy_consumer(%p)", cp);
152	g_topology_assert();
153	KASSERT(cp->event == NULL, ("g_destroy_consumer() with event"));
154	KASSERT (cp->provider == NULL, ("g_destroy_consumer but attached"));
155	KASSERT (cp->acr == 0, ("g_destroy_consumer with acr"));
156	KASSERT (cp->acw == 0, ("g_destroy_consumer with acw"));
157	KASSERT (cp->ace == 0, ("g_destroy_consumer with ace"));
158	LIST_REMOVE(cp, consumer);
159	g_free(cp);
160}
161
162struct g_provider *
163g_new_providerf(struct g_geom *gp, char *fmt, ...)
164{
165	struct g_provider *pp;
166	struct sbuf *sb;
167	va_list ap;
168
169	g_topology_assert();
170	va_start(ap, fmt);
171	mtx_lock(&Giant);
172	sb = sbuf_new(NULL, NULL, 0, SBUF_AUTOEXTEND);
173	sbuf_vprintf(sb, fmt, ap);
174	sbuf_finish(sb);
175	mtx_unlock(&Giant);
176	pp = g_malloc(sizeof *pp + sbuf_len(sb) + 1, M_WAITOK | M_ZERO);
177	pp->name = (char *)(pp + 1);
178	strcpy(pp->name, sbuf_data(sb));
179	sbuf_delete(sb);
180	LIST_INIT(&pp->consumers);
181	pp->error = ENXIO;
182	pp->geom = gp;
183	LIST_INSERT_HEAD(&gp->provider, pp, provider);
184	g_nproviders++;
185	g_post_event(EV_NEW_PROVIDER, NULL, NULL, pp, NULL);
186	return (pp);
187}
188
189void
190g_error_provider(struct g_provider *pp, int error)
191{
192
193	pp->error = error;
194}
195
196
197void
198g_destroy_provider(struct g_provider *pp)
199{
200	struct g_geom *gp;
201	struct g_consumer *cp;
202
203	g_topology_assert();
204	KASSERT(pp->event == NULL, ("g_destroy_provider() with event"));
205	KASSERT(LIST_EMPTY(&pp->consumers),
206	    ("g_destroy_provider but attached"));
207	KASSERT (pp->acr == 0, ("g_destroy_provider with acr"));
208	KASSERT (pp->acw == 0, ("g_destroy_provider with acw"));
209	KASSERT (pp->acw == 0, ("g_destroy_provider with ace"));
210	g_nproviders--;
211	LIST_REMOVE(pp, provider);
212	gp = pp->geom;
213	g_free(pp);
214	if (!(gp->flags & G_GEOM_WITHER))
215		return;
216	if (!LIST_EMPTY(&gp->provider))
217		return;
218	for (;;) {
219		cp = LIST_FIRST(&gp->consumer);
220		if (cp == NULL)
221			break;
222		g_dettach(cp);
223		g_destroy_consumer(cp);
224	}
225	g_destroy_geom(gp);
226}
227
228/*
229 * We keep the "geoms" list sorted by topological order (== increasing
230 * numerical rank) at all times.
231 * When an attach is done, the attaching geoms rank is invalidated
232 * and it is moved to the tail of the list.
233 * All geoms later in the sequence has their ranks reevaluated in
234 * sequence.  If we cannot assign rank to a geom because it's
235 * prerequisites do not have rank, we move that element to the tail
236 * of the sequence with invalid rank as well.
237 * At some point we encounter our original geom and if we stil fail
238 * to assign it a rank, there must be a loop and we fail back to
239 * g_attach() which dettach again and calls redo_rank again
240 * to fix up the damage.
241 * It would be much simpler code wise to do it recursively, but we
242 * can't risk that on the kernel stack.
243 */
244
245static int
246redo_rank(struct g_geom *gp)
247{
248	struct g_consumer *cp;
249	struct g_geom *gp1, *gp2;
250	int n, m;
251
252	g_topology_assert();
253
254	/* Invalidate this geoms rank and move it to the tail */
255	gp1 = TAILQ_NEXT(gp, geoms);
256	if (gp1 != NULL) {
257		gp->rank = 0;
258		TAILQ_REMOVE(&geoms, gp, geoms);
259		TAILQ_INSERT_TAIL(&geoms, gp, geoms);
260	} else {
261		gp1 = gp;
262	}
263
264	/* re-rank the rest of the sequence */
265	for (; gp1 != NULL; gp1 = gp2) {
266		gp1->rank = 0;
267		m = 1;
268		LIST_FOREACH(cp, &gp1->consumer, consumer) {
269			if (cp->provider == NULL)
270				continue;
271			n = cp->provider->geom->rank;
272			if (n == 0) {
273				m = 0;
274				break;
275			} else if (n >= m)
276				m = n + 1;
277		}
278		gp1->rank = m;
279		gp2 = TAILQ_NEXT(gp1, geoms);
280
281		/* got a rank, moving on */
282		if (m != 0)
283			continue;
284
285		/* no rank to original geom means loop */
286		if (gp == gp1) {
287			return (ELOOP);
288
289		/* no rank, put it at the end move on */
290		TAILQ_REMOVE(&geoms, gp1, geoms);
291		TAILQ_INSERT_TAIL(&geoms, gp1, geoms);
292		}
293	}
294	return (0);
295}
296
297int
298g_attach(struct g_consumer *cp, struct g_provider *pp)
299{
300	int error;
301
302	g_topology_assert();
303	KASSERT(cp->provider == NULL, ("attach but attached"));
304	cp->provider = pp;
305	LIST_INSERT_HEAD(&pp->consumers, cp, consumers);
306	error = redo_rank(cp->geom);
307	if (error) {
308		LIST_REMOVE(cp, consumers);
309		cp->provider = NULL;
310		redo_rank(cp->geom);
311	}
312	return (error);
313}
314
315void
316g_dettach(struct g_consumer *cp)
317{
318	struct g_provider *pp;
319
320	g_trace(G_T_TOPOLOGY, "g_dettach(%p)", cp);
321	KASSERT(cp != (void*)0xd0d0d0d0, ("ARGH!"));
322	g_topology_assert();
323	KASSERT(cp->provider != NULL, ("dettach but not attached"));
324	KASSERT(cp->acr == 0, ("dettach but nonzero acr"));
325	KASSERT(cp->acw == 0, ("dettach but nonzero acw"));
326	KASSERT(cp->ace == 0, ("dettach but nonzero ace"));
327	KASSERT(cp->biocount == 0, ("dettach but nonzero biocount"));
328	pp = cp->provider;
329	LIST_REMOVE(cp, consumers);
330	cp->provider = NULL;
331	if (LIST_EMPTY(&pp->consumers)) {
332		if (pp->geom->flags & G_GEOM_WITHER)
333			g_destroy_provider(pp);
334	}
335	redo_rank(cp->geom);
336}
337
338
339/*
340 * g_access_abs()
341 *
342 * Access-check with absolute new values:  Just fall through
343 * and use the relative version.
344 */
345int
346g_access_abs(struct g_consumer *cp, int acr, int acw, int ace)
347{
348
349	g_topology_assert();
350	return(g_access_rel(cp,
351		acr - cp->acr,
352		acw - cp->acw,
353		ace - cp->ace));
354}
355
356/*
357 * g_access_rel()
358 *
359 * Access-check with delta values.  The question asked is "can provider
360 * "cp" change the access counters by the relative amounts dc[rwe] ?"
361 */
362
363int
364g_access_rel(struct g_consumer *cp, int dcr, int dcw, int dce)
365{
366	struct g_provider *pp;
367	int pr,pw,pe;
368	int error;
369
370	pp = cp->provider;
371
372	g_trace(G_T_ACCESS, "g_access_rel(%p(%s), %d, %d, %d)",
373	    cp, pp->name, dcr, dcw, dce);
374
375	g_topology_assert();
376	KASSERT(cp->provider != NULL, ("access but not attached"));
377	KASSERT(cp->acr + dcr >= 0, ("access resulting in negative acr"));
378	KASSERT(cp->acw + dcw >= 0, ("access resulting in negative acw"));
379	KASSERT(cp->ace + dce >= 0, ("access resulting in negative ace"));
380	KASSERT(pp->geom->method->access != NULL, ("NULL method->access"));
381
382	/*
383	 * If our method cares about being spoiled, and we have been, we
384	 * are probably just ahead of the event telling us that.  Fail
385	 * now rather than having to unravel this later.
386	 */
387	if (cp->geom->spoiled != NULL && cp->spoiled) {
388		KASSERT(dcr >= 0, ("spoiled but dcr = %d", dcr));
389		KASSERT(dcw >= 0, ("spoiled but dce = %d", dcw));
390		KASSERT(dce >= 0, ("spoiled but dcw = %d", dce));
391		KASSERT(cp->acr == 0, ("spoiled but cp->acr = %d", cp->acr));
392		KASSERT(cp->acw == 0, ("spoiled but cp->acw = %d", cp->acw));
393		KASSERT(cp->ace == 0, ("spoiled but cp->ace = %d", cp->ace));
394		return(ENXIO);
395	}
396
397	/*
398	 * Figure out what counts the provider would have had, if this
399	 * consumer had (r0w0e0) at this time.
400	 */
401	pr = pp->acr - cp->acr;
402	pw = pp->acw - cp->acw;
403	pe = pp->ace - cp->ace;
404
405	g_trace(G_T_ACCESS,
406    "open delta:[r%dw%de%d] old:[r%dw%de%d] provider:[r%dw%de%d] %p(%s)",
407	    dcr, dcw, dce,
408	    cp->acr, cp->acw, cp->ace,
409	    pp->acr, pp->acw, pp->ace,
410	    pp, pp->name);
411
412	/* If we try exclusive but already write: fail */
413	if (dce > 0 && pw > 0)
414		return (EPERM);
415	/* If we try write but already exclusive: fail */
416	if (dcw > 0 && pe > 0)
417		return (EPERM);
418	/* If we try to open more but provider is error'ed: fail */
419	if ((dcr > 0 || dcw > 0 || dce > 0) && pp->error != 0)
420		return (pp->error);
421
422	/* Ok then... */
423
424	/*
425	 * If we open first write, spoil any partner consumers.
426	 * If we close last write, trigger re-taste.
427	 */
428	if (pp->acw == 0 && dcw != 0)
429		g_spoil(pp, cp);
430	else if (pp->acw != 0 && pp->acw == -dcw && !(pp->geom->flags & G_GEOM_WITHER))
431		g_post_event(EV_NEW_PROVIDER, NULL, NULL, pp, NULL);
432
433	error = pp->geom->method->access(pp, dcr, dcw, dce);
434	if (!error) {
435		pp->acr += dcr;
436		pp->acw += dcw;
437		pp->ace += dce;
438		cp->acr += dcr;
439		cp->acw += dcw;
440		cp->ace += dce;
441	}
442	return (error);
443}
444
445int
446g_haveattr_int(struct bio *bp, char *attribute, int val)
447{
448
449	return (g_haveattr(bp, attribute, &val, sizeof val));
450}
451
452int
453g_haveattr_off_t(struct bio *bp, char *attribute, off_t val)
454{
455
456	return (g_haveattr(bp, attribute, &val, sizeof val));
457}
458
459
460int
461g_haveattr(struct bio *bp, char *attribute, void *val, int len)
462{
463	int error;
464
465	if (strcmp(bp->bio_attribute, attribute))
466		return (0);
467	if (bp->bio_length != len) {
468		printf("bio_length %lld len %d -> EFAULT\n", bp->bio_length, len);
469		error = EFAULT;
470	} else {
471		error = 0;
472		bcopy(val, bp->bio_data, len);
473		bp->bio_completed = len;
474	}
475	bp->bio_error = error;
476	g_io_deliver(bp);
477	return (1);
478}
479
480int
481g_std_access(struct g_provider *pp __unused,
482	int dr __unused, int dw __unused, int de __unused)
483{
484
485        return (0);
486}
487
488void
489g_std_done(struct bio *bp)
490{
491	struct bio *bp2;
492
493	bp2 = bp->bio_linkage;
494	bp2->bio_error = bp->bio_error;
495	bp2->bio_completed = bp->bio_completed;
496	g_destroy_bio(bp);
497	g_io_deliver(bp2);
498}
499
500/* XXX: maybe this is only g_slice_spoiled */
501
502void
503g_std_spoiled(struct g_consumer *cp)
504{
505	struct g_geom *gp;
506	struct g_provider *pp;
507
508	g_trace(G_T_TOPOLOGY, "g_std_spoiled(%p)", cp);
509	g_topology_assert();
510	g_dettach(cp);
511	gp = cp->geom;
512	LIST_FOREACH(pp, &gp->provider, provider)
513		g_orphan_provider(pp, ENXIO);
514	g_destroy_consumer(cp);
515	if (LIST_EMPTY(&gp->provider) && LIST_EMPTY(&gp->consumer))
516		g_destroy_geom(gp);
517	else
518		gp->flags |= G_GEOM_WITHER;
519}
520
521/*
522 * Spoiling happens when a provider is opened for writing, but consumers
523 * which are configured by in-band data are attached (slicers for instance).
524 * Since the write might potentially change the in-band data, such consumers
525 * need to re-evaluate their existence after the writing session closes.
526 * We do this by (offering to) tear them down when the open for write happens
527 * in return for a re-taste when it closes again.
528 * Together with the fact that such consumers grab an 'e' bit whenever they
529 * are open, regardless of mode, this ends up DTRT.
530 */
531
532void
533g_spoil(struct g_provider *pp, struct g_consumer *cp)
534{
535	struct g_consumer *cp2;
536
537	g_topology_assert();
538
539	LIST_FOREACH(cp2, &pp->consumers, consumers) {
540		if (cp2 == cp)
541			continue;
542/*
543		KASSERT(cp2->acr == 0, ("spoiling cp->acr = %d", cp2->acr));
544		KASSERT(cp2->acw == 0, ("spoiling cp->acw = %d", cp2->acw));
545*/
546		KASSERT(cp2->ace == 0, ("spoiling cp->ace = %d", cp2->ace));
547		cp2->spoiled++;
548	}
549	g_post_event(EV_SPOILED, NULL, NULL, pp, cp);
550}
551
552static struct g_method *
553g_method_by_name(char *name)
554{
555	struct g_method *mp;
556
557	g_trace(G_T_TOPOLOGY, "g_method_by_name(%s)", name);
558	g_topology_assert();
559	LIST_FOREACH(mp, &g_methods, method)
560		if (!strcmp(mp->name, name))
561			return (mp);
562	return (NULL);
563}
564
565struct g_geom *
566g_create_geomf(char *method, struct g_provider *pp, char *fmt, ...)
567{
568	va_list ap;
569	struct sbuf *sb;
570	char *s;
571	struct g_method *mp;
572	struct g_geom *gp;
573
574	g_trace(G_T_TOPOLOGY, "g_create_geom(%s, %p(%s))", method,
575		pp, pp == NULL ? "" : pp->name);
576	g_topology_assert();
577	gp = NULL;
578	mp = g_method_by_name(method);
579	if (mp == NULL)
580		return (NULL);
581	if (fmt != NULL) {
582		va_start(ap, fmt);
583		mtx_lock(&Giant);
584		sb = sbuf_new(NULL, NULL, 0, SBUF_AUTOEXTEND);
585		sbuf_vprintf(sb, fmt, ap);
586		sbuf_finish(sb);
587		mtx_unlock(&Giant);
588		s = sbuf_data(sb);
589	} else {
590		s = NULL;
591	}
592	if (pp != NULL)
593		gp = mp->taste(mp, pp, NULL, G_TF_INSIST);
594	if (gp == NULL && mp->create_geom == NULL)
595		return (NULL);
596	if (gp == NULL)
597		gp = mp->create_geom(mp, pp, s);
598	/* XXX: delete sbuf  */
599	return (gp);
600}
601
602struct g_geom *
603g_insert_geom(char *method, struct g_consumer *cp)
604{
605	struct g_method *mp;
606	struct g_geom *gp;
607	struct g_provider *pp, *pp2;
608	struct g_consumer *cp2;
609	int error;
610
611	g_trace(G_T_TOPOLOGY, "g_insert_geomf(%s, %p)", method, cp);
612	g_topology_assert();
613	KASSERT(cp->provider != NULL, ("g_insert_geomf but not attached"));
614	/* XXX: check for events ?? */
615	mp = g_method_by_name(method);
616	if (mp == NULL)
617		return (NULL);
618	if (mp->create_geom == NULL)
619		return (NULL);
620	pp = cp->provider;
621	gp = mp->taste(mp, pp, NULL, G_TF_TRANSPARENT);
622	if (gp == NULL)
623		return (NULL);
624	pp2 = LIST_FIRST(&gp->provider);
625	cp2 = LIST_FIRST(&gp->consumer);
626	cp2->acr += pp->acr;
627	cp2->acw += pp->acw;
628	cp2->ace += pp->ace;
629	pp2->acr += pp->acr;
630	pp2->acw += pp->acw;
631	pp2->ace += pp->ace;
632	LIST_REMOVE(cp, consumers);
633	LIST_INSERT_HEAD(&pp2->consumers, cp, consumers);
634	cp->provider = pp2;
635	error = redo_rank(gp);
636	KASSERT(error == 0, ("redo_rank failed in g_insert_geom"));
637	return (gp);
638}
639
640