geom_io.c revision 118855
1241675Suqs/*-
2241675Suqs * Copyright (c) 2002 Poul-Henning Kamp
3241675Suqs * Copyright (c) 2002 Networks Associates Technology, Inc.
4241675Suqs * All rights reserved.
5241675Suqs *
6241675Suqs * This software was developed for the FreeBSD Project by Poul-Henning Kamp
7241675Suqs * and NAI Labs, the Security Research Division of Network Associates, Inc.
8241675Suqs * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
9241675Suqs * DARPA CHATS research program.
10241675Suqs *
11241675Suqs * Redistribution and use in source and binary forms, with or without
12241675Suqs * modification, are permitted provided that the following conditions
13241675Suqs * are met:
14241675Suqs * 1. Redistributions of source code must retain the above copyright
15241675Suqs *    notice, this list of conditions and the following disclaimer.
16241675Suqs * 2. Redistributions in binary form must reproduce the above copyright
17241675Suqs *    notice, this list of conditions and the following disclaimer in the
18241675Suqs *    documentation and/or other materials provided with the distribution.
19241675Suqs * 3. The names of the authors may not be used to endorse or promote
20241675Suqs *    products derived from this software without specific prior written
21241675Suqs *    permission.
22241675Suqs *
23241675Suqs * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
24241675Suqs * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25241675Suqs * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26241675Suqs * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
27241675Suqs * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28241675Suqs * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29241675Suqs * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30241675Suqs * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31241675Suqs * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32241675Suqs * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33241675Suqs * SUCH DAMAGE.
34241675Suqs */
35241675Suqs
36241675Suqs#include <sys/cdefs.h>
37241675Suqs__FBSDID("$FreeBSD: head/sys/geom/geom_io.c 118855 2003-08-13 06:42:56Z phk $");
38241675Suqs
39241675Suqs#include <sys/param.h>
40241675Suqs#include <sys/systm.h>
41241675Suqs#include <sys/kernel.h>
42241675Suqs#include <sys/malloc.h>
43241675Suqs#include <sys/bio.h>
44241675Suqs
45241675Suqs#include <sys/errno.h>
46241675Suqs#include <geom/geom.h>
47241675Suqs#include <geom/geom_int.h>
48241675Suqs#include <sys/devicestat.h>
49241675Suqs
50241675Suqs#include <vm/uma.h>
51241675Suqs
52241675Suqsstatic struct g_bioq g_bio_run_down;
53241675Suqsstatic struct g_bioq g_bio_run_up;
54241675Suqs
55241675Suqsstatic u_int pace;
56241675Suqsstatic uma_zone_t	biozone;
57241675Suqs
58241675Suqs#include <machine/atomic.h>
59241675Suqs
60241675Suqsstatic void
61241675Suqsg_bioq_lock(struct g_bioq *bq)
62241675Suqs{
63241675Suqs
64241675Suqs	mtx_lock(&bq->bio_queue_lock);
65241675Suqs}
66241675Suqs
67241675Suqsstatic void
68241675Suqsg_bioq_unlock(struct g_bioq *bq)
69241675Suqs{
70241675Suqs
71241675Suqs	mtx_unlock(&bq->bio_queue_lock);
72241675Suqs}
73241675Suqs
74241675Suqs#if 0
75241675Suqsstatic void
76241675Suqsg_bioq_destroy(struct g_bioq *bq)
77241675Suqs{
78241675Suqs
79241675Suqs	mtx_destroy(&bq->bio_queue_lock);
80241675Suqs}
81241675Suqs#endif
82241675Suqs
83241675Suqsstatic void
84241675Suqsg_bioq_init(struct g_bioq *bq)
85241675Suqs{
86241675Suqs
87241675Suqs	TAILQ_INIT(&bq->bio_queue);
88241675Suqs	mtx_init(&bq->bio_queue_lock, "bio queue", NULL, MTX_DEF);
89241675Suqs}
90241675Suqs
91241675Suqsstatic struct bio *
92241675Suqsg_bioq_first(struct g_bioq *bq)
93241675Suqs{
94241675Suqs	struct bio *bp;
95241675Suqs
96241675Suqs	bp = TAILQ_FIRST(&bq->bio_queue);
97241675Suqs	if (bp != NULL) {
98241675Suqs		TAILQ_REMOVE(&bq->bio_queue, bp, bio_queue);
99241675Suqs		bq->bio_queue_length--;
100241675Suqs	}
101241675Suqs	return (bp);
102241675Suqs}
103241675Suqs
104241675Suqsstatic void
105241675Suqsg_bioq_enqueue_tail(struct bio *bp, struct g_bioq *rq)
106241675Suqs{
107241675Suqs
108241675Suqs	g_bioq_lock(rq);
109241675Suqs	TAILQ_INSERT_TAIL(&rq->bio_queue, bp, bio_queue);
110241675Suqs	rq->bio_queue_length++;
111241675Suqs	g_bioq_unlock(rq);
112241675Suqs}
113241675Suqs
114241675Suqsstruct bio *
115241675Suqsg_new_bio(void)
116241675Suqs{
117241675Suqs	struct bio *bp;
118241675Suqs
119241675Suqs	bp = uma_zalloc(biozone, M_NOWAIT | M_ZERO);
120241675Suqs	return (bp);
121241675Suqs}
122241675Suqs
123241675Suqsvoid
124241675Suqsg_destroy_bio(struct bio *bp)
125241675Suqs{
126241675Suqs
127241675Suqs	uma_zfree(biozone, bp);
128241675Suqs}
129241675Suqs
130241675Suqsstruct bio *
131241675Suqsg_clone_bio(struct bio *bp)
132241675Suqs{
133241675Suqs	struct bio *bp2;
134241675Suqs
135241675Suqs	bp2 = uma_zalloc(biozone, M_NOWAIT | M_ZERO);
136241675Suqs	if (bp2 != NULL) {
137241675Suqs		bp2->bio_parent = bp;
138241675Suqs		bp2->bio_cmd = bp->bio_cmd;
139241675Suqs		bp2->bio_length = bp->bio_length;
140241675Suqs		bp2->bio_offset = bp->bio_offset;
141241675Suqs		bp2->bio_data = bp->bio_data;
142241675Suqs		bp2->bio_attribute = bp->bio_attribute;
143241675Suqs		bp->bio_children++;
144241675Suqs	}
145241675Suqs	return(bp2);
146241675Suqs}
147241675Suqs
148241675Suqsvoid
149241675Suqsg_io_init()
150241675Suqs{
151241675Suqs
152241675Suqs	g_bioq_init(&g_bio_run_down);
153241675Suqs	g_bioq_init(&g_bio_run_up);
154241675Suqs	biozone = uma_zcreate("g_bio", sizeof (struct bio),
155241675Suqs	    NULL, NULL,
156241675Suqs	    NULL, NULL,
157241675Suqs	    0, 0);
158241675Suqs}
159241675Suqs
160241675Suqsint
161241675Suqsg_io_getattr(const char *attr, struct g_consumer *cp, int *len, void *ptr)
162241675Suqs{
163241675Suqs	struct bio *bp;
164241675Suqs	int error;
165241675Suqs
166241675Suqs	g_trace(G_T_BIO, "bio_getattr(%s)", attr);
167241675Suqs	bp = g_new_bio();
168241675Suqs	bp->bio_cmd = BIO_GETATTR;
169241675Suqs	bp->bio_done = NULL;
170241675Suqs	bp->bio_attribute = attr;
171241675Suqs	bp->bio_length = *len;
172241675Suqs	bp->bio_data = ptr;
173241675Suqs	g_io_request(bp, cp);
174241675Suqs	error = biowait(bp, "ggetattr");
175241675Suqs	*len = bp->bio_completed;
176241675Suqs	g_destroy_bio(bp);
177241675Suqs	return (error);
178241675Suqs}
179241675Suqs
180241675Suqsstatic int
181241675Suqsg_io_check(struct bio *bp)
182241675Suqs{
183241675Suqs	struct g_consumer *cp;
184241675Suqs	struct g_provider *pp;
185241675Suqs
186241675Suqs	cp = bp->bio_from;
187241675Suqs	pp = bp->bio_to;
188241675Suqs
189241675Suqs	/* Fail if access counters dont allow the operation */
190241675Suqs	switch(bp->bio_cmd) {
191241675Suqs	case BIO_READ:
192241675Suqs	case BIO_GETATTR:
193241675Suqs		if (cp->acr == 0)
194241675Suqs			return (EPERM);
195241675Suqs		break;
196241675Suqs	case BIO_WRITE:
197241675Suqs	case BIO_DELETE:
198241675Suqs		if (cp->acw == 0)
199241675Suqs			return (EPERM);
200241675Suqs		break;
201241675Suqs	default:
202241675Suqs		return (EPERM);
203241675Suqs	}
204241675Suqs	/* if provider is marked for error, don't disturb. */
205241675Suqs	if (pp->error)
206241675Suqs		return (pp->error);
207241675Suqs
208241675Suqs	switch(bp->bio_cmd) {
209241675Suqs	case BIO_READ:
210241675Suqs	case BIO_WRITE:
211241675Suqs	case BIO_DELETE:
212241675Suqs		/* Noisily reject zero size sectors */
213241675Suqs		if (pp->sectorsize == 0) {
214241675Suqs			printf("GEOM provider %s has zero sectorsize\n",
215241675Suqs			    pp->name);
216241675Suqs			return (EDOOFUS);
217241675Suqs		}
218241675Suqs		/* Reject I/O not on sector boundary */
219241675Suqs		if (bp->bio_offset % pp->sectorsize)
220241675Suqs			return (EINVAL);
221241675Suqs		/* Reject I/O not integral sector long */
222241675Suqs		if (bp->bio_length % pp->sectorsize)
223241675Suqs			return (EINVAL);
224241675Suqs		/* Reject requests past the end of media. */
225241675Suqs		if (bp->bio_offset > pp->mediasize)
226241675Suqs			return (EIO);
227241675Suqs		break;
228241675Suqs	default:
229241675Suqs		break;
230241675Suqs	}
231241675Suqs	return (0);
232241675Suqs}
233241675Suqs
234241675Suqsvoid
235241675Suqsg_io_request(struct bio *bp, struct g_consumer *cp)
236241675Suqs{
237241675Suqs	struct g_provider *pp;
238241675Suqs
239241675Suqs	pp = cp->provider;
240241675Suqs	KASSERT(cp != NULL, ("NULL cp in g_io_request"));
241241675Suqs	KASSERT(bp != NULL, ("NULL bp in g_io_request"));
242241675Suqs	KASSERT(bp->bio_data != NULL, ("NULL bp->data in g_io_request"));
243241675Suqs	KASSERT(pp != NULL, ("consumer not attached in g_io_request"));
244241675Suqs
245241675Suqs	bp->bio_from = cp;
246241675Suqs	bp->bio_to = pp;
247241675Suqs	bp->bio_error = 0;
248241675Suqs	bp->bio_completed = 0;
249241675Suqs
250241675Suqs	if (g_collectstats) {
251241675Suqs		devstat_start_transaction_bio(cp->stat, bp);
252241675Suqs		devstat_start_transaction_bio(pp->stat, bp);
253241675Suqs	}
254241675Suqs	cp->nstart++;
255241675Suqs	pp->nstart++;
256241675Suqs
257241675Suqs	/* Pass it on down. */
258241675Suqs	g_trace(G_T_BIO, "bio_request(%p) from %p(%s) to %p(%s) cmd %d",
259241675Suqs	    bp, cp, cp->geom->name, pp, pp->name, bp->bio_cmd);
260241675Suqs	g_bioq_enqueue_tail(bp, &g_bio_run_down);
261241675Suqs	wakeup(&g_wait_down);
262241675Suqs}
263241675Suqs
264241675Suqsvoid
265241675Suqsg_io_deliver(struct bio *bp, int error)
266241675Suqs{
267241675Suqs	struct g_consumer *cp;
268241675Suqs	struct g_provider *pp;
269241675Suqs
270241675Suqs	cp = bp->bio_from;
271241675Suqs	pp = bp->bio_to;
272241675Suqs	KASSERT(bp != NULL, ("NULL bp in g_io_deliver"));
273241675Suqs	KASSERT(cp != NULL, ("NULL bio_from in g_io_deliver"));
274241675Suqs	KASSERT(cp->geom != NULL, ("NULL bio_from->geom in g_io_deliver"));
275241675Suqs	KASSERT(pp != NULL, ("NULL bio_to in g_io_deliver"));
276241675Suqs
277241675Suqs	g_trace(G_T_BIO,
278241675Suqs"g_io_deliver(%p) from %p(%s) to %p(%s) cmd %d error %d off %jd len %jd",
279241675Suqs	    bp, cp, cp->geom->name, pp, pp->name, bp->bio_cmd, error,
280241675Suqs	    (intmax_t)bp->bio_offset, (intmax_t)bp->bio_length);
281241675Suqs
282241675Suqs	bp->bio_bcount = bp->bio_length;
283241675Suqs	if (g_collectstats) {
284241675Suqs		bp->bio_resid = bp->bio_bcount - bp->bio_completed;
285241675Suqs		devstat_end_transaction_bio(cp->stat, bp);
286241675Suqs		devstat_end_transaction_bio(pp->stat, bp);
287241675Suqs	}
288241675Suqs	cp->nend++;
289241675Suqs	pp->nend++;
290241675Suqs
291241675Suqs	if (error == ENOMEM) {
292241675Suqs		if (bootverbose)
293241675Suqs			printf("ENOMEM %p on %p(%s)\n", bp, pp, pp->name);
294241675Suqs		g_io_request(bp, cp);
295241675Suqs		pace++;
296241675Suqs		return;
297241675Suqs	}
298241675Suqs	bp->bio_error = error;
299241675Suqs	g_bioq_enqueue_tail(bp, &g_bio_run_up);
300241675Suqs	wakeup(&g_wait_up);
301241675Suqs}
302241675Suqs
303241675Suqsvoid
304241675Suqsg_io_schedule_down(struct thread *tp __unused)
305241675Suqs{
306241675Suqs	struct bio *bp;
307241675Suqs	off_t excess;
308241675Suqs	int error;
309241675Suqs	struct mtx mymutex;
310241675Suqs
311241675Suqs	bzero(&mymutex, sizeof mymutex);
312241675Suqs	mtx_init(&mymutex, "g_xdown", MTX_DEF, 0);
313241675Suqs
314241675Suqs	for(;;) {
315241675Suqs		g_bioq_lock(&g_bio_run_down);
316241675Suqs		bp = g_bioq_first(&g_bio_run_down);
317241675Suqs		if (bp == NULL) {
318241675Suqs			msleep(&g_wait_down, &g_bio_run_down.bio_queue_lock,
319241675Suqs			    PRIBIO | PDROP, "-", hz/10);
320241675Suqs			continue;
321241675Suqs		}
322241675Suqs		g_bioq_unlock(&g_bio_run_down);
323241675Suqs		if (pace > 0) {
324241675Suqs			msleep(&error, NULL, PRIBIO, "g_down", hz/10);
325241675Suqs			pace--;
326241675Suqs		}
327241675Suqs		error = g_io_check(bp);
328241675Suqs		if (error) {
329241675Suqs			g_io_deliver(bp, error);
330241675Suqs			continue;
331241675Suqs		}
332241675Suqs		switch (bp->bio_cmd) {
333241675Suqs		case BIO_READ:
334241675Suqs		case BIO_WRITE:
335241675Suqs		case BIO_DELETE:
336241675Suqs			/* Truncate requests to the end of providers media. */
337241675Suqs			excess = bp->bio_offset + bp->bio_length;
338241675Suqs			if (excess > bp->bio_to->mediasize) {
339241675Suqs				excess -= bp->bio_to->mediasize;
340241675Suqs				bp->bio_length -= excess;
341241675Suqs			}
342241675Suqs			/* Deliver zero length transfers right here. */
343241675Suqs			if (bp->bio_length == 0) {
344241675Suqs				g_io_deliver(bp, 0);
345241675Suqs				continue;
346241675Suqs			}
347241675Suqs			break;
348241675Suqs		default:
349241675Suqs			break;
350241675Suqs		}
351241675Suqs		mtx_lock(&mymutex);
352241675Suqs		bp->bio_to->geom->start(bp);
353241675Suqs		mtx_unlock(&mymutex);
354241675Suqs	}
355241675Suqs}
356241675Suqs
357241675Suqsvoid
358241675Suqsg_io_schedule_up(struct thread *tp __unused)
359241675Suqs{
360241675Suqs	struct bio *bp;
361241675Suqs	struct mtx mymutex;
362241675Suqs
363241675Suqs	bzero(&mymutex, sizeof mymutex);
364241675Suqs	mtx_init(&mymutex, "g_xup", MTX_DEF, 0);
365241675Suqs	for(;;) {
366241675Suqs		g_bioq_lock(&g_bio_run_up);
367241675Suqs		bp = g_bioq_first(&g_bio_run_up);
368241675Suqs		if (bp != NULL) {
369241675Suqs			g_bioq_unlock(&g_bio_run_up);
370241675Suqs			mtx_lock(&mymutex);
371241675Suqs			biodone(bp);
372241675Suqs			mtx_unlock(&mymutex);
373241675Suqs			continue;
374241675Suqs		}
375241675Suqs		msleep(&g_wait_up, &g_bio_run_up.bio_queue_lock,
376241675Suqs		    PRIBIO | PDROP, "-", hz/10);
377241675Suqs	}
378241675Suqs}
379241675Suqs
380241675Suqsvoid *
381241675Suqsg_read_data(struct g_consumer *cp, off_t offset, off_t length, int *error)
382241675Suqs{
383241675Suqs	struct bio *bp;
384241675Suqs	void *ptr;
385241675Suqs	int errorc;
386241675Suqs
387241675Suqs	bp = g_new_bio();
388241675Suqs	bp->bio_cmd = BIO_READ;
389241675Suqs	bp->bio_done = NULL;
390241675Suqs	bp->bio_offset = offset;
391241675Suqs	bp->bio_length = length;
392241675Suqs	ptr = g_malloc(length, M_WAITOK);
393241675Suqs	bp->bio_data = ptr;
394241675Suqs	g_io_request(bp, cp);
395241675Suqs	errorc = biowait(bp, "gread");
396241675Suqs	if (error != NULL)
397241675Suqs		*error = errorc;
398241675Suqs	g_destroy_bio(bp);
399241675Suqs	if (errorc) {
400241675Suqs		g_free(ptr);
401241675Suqs		ptr = NULL;
402241675Suqs	}
403241675Suqs	return (ptr);
404241675Suqs}
405241675Suqs
406241675Suqsint
407241675Suqsg_write_data(struct g_consumer *cp, off_t offset, void *ptr, off_t length)
408241675Suqs{
409241675Suqs	struct bio *bp;
410241675Suqs	int error;
411241675Suqs
412241675Suqs	bp = g_new_bio();
413241675Suqs	bp->bio_cmd = BIO_WRITE;
414241675Suqs	bp->bio_done = NULL;
415241675Suqs	bp->bio_offset = offset;
416241675Suqs	bp->bio_length = length;
417241675Suqs	bp->bio_data = ptr;
418241675Suqs	g_io_request(bp, cp);
419241675Suqs	error = biowait(bp, "gwrite");
420241675Suqs	g_destroy_bio(bp);
421241675Suqs	return (error);
422241675Suqs}
423241675Suqs