geom_io.c revision 120493
1330567Sgordon/*-
2275970Scy * Copyright (c) 2002 Poul-Henning Kamp
3275970Scy * Copyright (c) 2002 Networks Associates Technology, Inc.
4275970Scy * All rights reserved.
5275970Scy *
6330567Sgordon * This software was developed for the FreeBSD Project by Poul-Henning Kamp
7275970Scy * and NAI Labs, the Security Research Division of Network Associates, Inc.
8275970Scy * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
9275970Scy * DARPA CHATS research program.
10275970Scy *
11275970Scy * Redistribution and use in source and binary forms, with or without
12275970Scy * modification, are permitted provided that the following conditions
13275970Scy * are met:
14275970Scy * 1. Redistributions of source code must retain the above copyright
15275970Scy *    notice, this list of conditions and the following disclaimer.
16275970Scy * 2. Redistributions in binary form must reproduce the above copyright
17275970Scy *    notice, this list of conditions and the following disclaimer in the
18275970Scy *    documentation and/or other materials provided with the distribution.
19275970Scy * 3. The names of the authors may not be used to endorse or promote
20275970Scy *    products derived from this software without specific prior written
21275970Scy *    permission.
22275970Scy *
23275970Scy * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
24330567Sgordon * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25330567Sgordon * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26330567Sgordon * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
27330567Sgordon * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28275970Scy * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29330567Sgordon * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30275970Scy * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31275970Scy * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32330567Sgordon * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33330567Sgordon * SUCH DAMAGE.
34330567Sgordon */
35330567Sgordon
36275970Scy#include <sys/cdefs.h>
37275970Scy__FBSDID("$FreeBSD: head/sys/geom/geom_io.c 120493 2003-09-26 20:52:46Z phk $");
38275970Scy
39330567Sgordon#include <sys/param.h>
40330567Sgordon#include <sys/systm.h>
41330567Sgordon#include <sys/kernel.h>
42330567Sgordon#include <sys/malloc.h>
43275970Scy#include <sys/bio.h>
44330567Sgordon
45330567Sgordon#include <sys/errno.h>
46330567Sgordon#include <geom/geom.h>
47275970Scy#include <geom/geom_int.h>
48275970Scy#include <sys/devicestat.h>
49275970Scy
50275970Scy#include <vm/uma.h>
51275970Scy
52275970Scystatic struct g_bioq g_bio_run_down;
53275970Scystatic struct g_bioq g_bio_run_up;
54275970Scy
55275970Scystatic u_int pace;
56275970Scystatic uma_zone_t	biozone;
57275970Scy
58275970Scy#include <machine/atomic.h>
59275970Scy
60275970Scystatic void
61275970Scyg_bioq_lock(struct g_bioq *bq)
62275970Scy{
63275970Scy
64275970Scy	mtx_lock(&bq->bio_queue_lock);
65275970Scy}
66330567Sgordon
67275970Scystatic void
68330567Sgordong_bioq_unlock(struct g_bioq *bq)
69275970Scy{
70330567Sgordon
71330567Sgordon	mtx_unlock(&bq->bio_queue_lock);
72330567Sgordon}
73330567Sgordon
74330567Sgordon#if 0
75330567Sgordonstatic void
76330567Sgordong_bioq_destroy(struct g_bioq *bq)
77330567Sgordon{
78330567Sgordon
79330567Sgordon	mtx_destroy(&bq->bio_queue_lock);
80275970Scy}
81275970Scy#endif
82330567Sgordon
83275970Scystatic void
84330567Sgordong_bioq_init(struct g_bioq *bq)
85330567Sgordon{
86275970Scy
87275970Scy	TAILQ_INIT(&bq->bio_queue);
88330567Sgordon	mtx_init(&bq->bio_queue_lock, "bio queue", NULL, MTX_DEF);
89275970Scy}
90330567Sgordon
91275970Scystatic struct bio *
92330567Sgordong_bioq_first(struct g_bioq *bq)
93330567Sgordon{
94330567Sgordon	struct bio *bp;
95330567Sgordon
96275970Scy	bp = TAILQ_FIRST(&bq->bio_queue);
97275970Scy	if (bp != NULL) {
98275970Scy		TAILQ_REMOVE(&bq->bio_queue, bp, bio_queue);
99275970Scy		bq->bio_queue_length--;
100275970Scy	}
101330567Sgordon	return (bp);
102275970Scy}
103275970Scy
104275970Scystatic void
105275970Scyg_bioq_enqueue_tail(struct bio *bp, struct g_bioq *rq)
106275970Scy{
107275970Scy
108330567Sgordon	g_bioq_lock(rq);
109330567Sgordon	TAILQ_INSERT_TAIL(&rq->bio_queue, bp, bio_queue);
110330567Sgordon	rq->bio_queue_length++;
111330567Sgordon	g_bioq_unlock(rq);
112330567Sgordon}
113330567Sgordon
114330567Sgordonstruct bio *
115275970Scyg_new_bio(void)
116275970Scy{
117275970Scy	struct bio *bp;
118330567Sgordon
119275970Scy	bp = uma_zalloc(biozone, M_NOWAIT | M_ZERO);
120330567Sgordon	return (bp);
121275970Scy}
122275970Scy
123275970Scyvoid
124330567Sgordong_destroy_bio(struct bio *bp)
125275970Scy{
126275970Scy
127330567Sgordon	uma_zfree(biozone, bp);
128275970Scy}
129275970Scy
130275970Scystruct bio *
131275970Scyg_clone_bio(struct bio *bp)
132275970Scy{
133330567Sgordon	struct bio *bp2;
134330567Sgordon
135330567Sgordon	bp2 = uma_zalloc(biozone, M_NOWAIT | M_ZERO);
136275970Scy	if (bp2 != NULL) {
137275970Scy		bp2->bio_parent = bp;
138330567Sgordon		bp2->bio_cmd = bp->bio_cmd;
139330567Sgordon		bp2->bio_length = bp->bio_length;
140330567Sgordon		bp2->bio_offset = bp->bio_offset;
141330567Sgordon		bp2->bio_data = bp->bio_data;
142330567Sgordon		bp2->bio_attribute = bp->bio_attribute;
143330567Sgordon		bp->bio_children++;
144330567Sgordon	}
145330567Sgordon	return(bp2);
146330567Sgordon}
147330567Sgordon
148330567Sgordonvoid
149330567Sgordong_io_init()
150330567Sgordon{
151330567Sgordon
152330567Sgordon	g_bioq_init(&g_bio_run_down);
153275970Scy	g_bioq_init(&g_bio_run_up);
154275970Scy	biozone = uma_zcreate("g_bio", sizeof (struct bio),
155275970Scy	    NULL, NULL,
156330567Sgordon	    NULL, NULL,
157330567Sgordon	    0, 0);
158330567Sgordon}
159275970Scy
160330567Sgordonint
161330567Sgordong_io_getattr(const char *attr, struct g_consumer *cp, int *len, void *ptr)
162330567Sgordon{
163275970Scy	struct bio *bp;
164275970Scy	int error;
165330567Sgordon
166330567Sgordon	g_trace(G_T_BIO, "bio_getattr(%s)", attr);
167330567Sgordon	bp = g_new_bio();
168330567Sgordon	bp->bio_cmd = BIO_GETATTR;
169330567Sgordon	bp->bio_done = NULL;
170330567Sgordon	bp->bio_attribute = attr;
171330567Sgordon	bp->bio_length = *len;
172330567Sgordon	bp->bio_data = ptr;
173275970Scy	g_io_request(bp, cp);
174330567Sgordon	error = biowait(bp, "ggetattr");
175330567Sgordon	*len = bp->bio_completed;
176330567Sgordon	g_destroy_bio(bp);
177330567Sgordon	return (error);
178330567Sgordon}
179275970Scy
180275970Scystatic int
181330567Sgordong_io_check(struct bio *bp)
182330567Sgordon{
183330567Sgordon	struct g_consumer *cp;
184330567Sgordon	struct g_provider *pp;
185330567Sgordon
186330567Sgordon	cp = bp->bio_from;
187330567Sgordon	pp = bp->bio_to;
188330567Sgordon
189330567Sgordon	/* Fail if access counters dont allow the operation */
190330567Sgordon	switch(bp->bio_cmd) {
191275970Scy	case BIO_READ:
192275970Scy	case BIO_GETATTR:
193275970Scy		if (cp->acr == 0)
194275970Scy			return (EPERM);
195275970Scy		break;
196275970Scy	case BIO_WRITE:
197275970Scy	case BIO_DELETE:
198275970Scy		if (cp->acw == 0)
199275970Scy			return (EPERM);
200275970Scy		break;
201330567Sgordon	default:
202275970Scy		return (EPERM);
203275970Scy	}
204275970Scy	/* if provider is marked for error, don't disturb. */
205330567Sgordon	if (pp->error)
206275970Scy		return (pp->error);
207275970Scy
208330567Sgordon	switch(bp->bio_cmd) {
209275970Scy	case BIO_READ:
210275970Scy	case BIO_WRITE:
211330567Sgordon	case BIO_DELETE:
212275970Scy		/* Noisily reject zero size sectors */
213330567Sgordon		if (pp->sectorsize == 0) {
214275970Scy			printf("GEOM provider %s has zero sectorsize\n",
215275970Scy			    pp->name);
216275970Scy			return (EDOOFUS);
217275970Scy		}
218275970Scy		/* Reject I/O not on sector boundary */
219275970Scy		if (bp->bio_offset % pp->sectorsize)
220275970Scy			return (EINVAL);
221275970Scy		/* Reject I/O not integral sector long */
222275970Scy		if (bp->bio_length % pp->sectorsize)
223330567Sgordon			return (EINVAL);
224330567Sgordon		/* Reject requests past the end of media. */
225275970Scy		if (bp->bio_offset > pp->mediasize)
226275970Scy			return (EIO);
227275970Scy		break;
228275970Scy	default:
229275970Scy		break;
230275970Scy	}
231275970Scy	return (0);
232275970Scy}
233275970Scy
234275970Scyvoid
235275970Scyg_io_request(struct bio *bp, struct g_consumer *cp)
236275970Scy{
237275970Scy	struct g_provider *pp;
238275970Scy
239275970Scy	KASSERT(cp != NULL, ("NULL cp in g_io_request"));
240275970Scy	KASSERT(bp != NULL, ("NULL bp in g_io_request"));
241275970Scy	KASSERT(bp->bio_data != NULL, ("NULL bp->data in g_io_request"));
242275970Scy	pp = cp->provider;
243275970Scy	KASSERT(pp != NULL, ("consumer not attached in g_io_request"));
244275970Scy
245275970Scy	bp->bio_from = cp;
246275970Scy	bp->bio_to = pp;
247275970Scy	bp->bio_error = 0;
248275970Scy	bp->bio_completed = 0;
249275970Scy
250275970Scy	if (g_collectstats) {
251275970Scy		devstat_start_transaction_bio(cp->stat, bp);
252275970Scy		devstat_start_transaction_bio(pp->stat, bp);
253275970Scy	}
254275970Scy	cp->nstart++;
255275970Scy	pp->nstart++;
256275970Scy
257330567Sgordon	/* Pass it on down. */
258330567Sgordon	g_trace(G_T_BIO, "bio_request(%p) from %p(%s) to %p(%s) cmd %d",
259330567Sgordon	    bp, cp, cp->geom->name, pp, pp->name, bp->bio_cmd);
260330567Sgordon	g_bioq_enqueue_tail(bp, &g_bio_run_down);
261330567Sgordon	wakeup(&g_wait_down);
262275970Scy}
263275970Scy
264275970Scyvoid
265275970Scyg_io_deliver(struct bio *bp, int error)
266275970Scy{
267275970Scy	struct g_consumer *cp;
268275970Scy	struct g_provider *pp;
269275970Scy
270330567Sgordon	KASSERT(bp != NULL, ("NULL bp in g_io_deliver"));
271330567Sgordon	cp = bp->bio_from;
272330567Sgordon	pp = bp->bio_to;
273330567Sgordon	KASSERT(cp != NULL, ("NULL bio_from in g_io_deliver"));
274275970Scy	KASSERT(cp->geom != NULL, ("NULL bio_from->geom in g_io_deliver"));
275275970Scy	KASSERT(pp != NULL, ("NULL bio_to in g_io_deliver"));
276330567Sgordon
277330567Sgordon	g_trace(G_T_BIO,
278330567Sgordon"g_io_deliver(%p) from %p(%s) to %p(%s) cmd %d error %d off %jd len %jd",
279275970Scy	    bp, cp, cp->geom->name, pp, pp->name, bp->bio_cmd, error,
280275970Scy	    (intmax_t)bp->bio_offset, (intmax_t)bp->bio_length);
281275970Scy
282275970Scy	bp->bio_bcount = bp->bio_length;
283330567Sgordon	if (g_collectstats) {
284330567Sgordon		bp->bio_resid = bp->bio_bcount - bp->bio_completed;
285330567Sgordon		devstat_end_transaction_bio(cp->stat, bp);
286275970Scy		devstat_end_transaction_bio(pp->stat, bp);
287275970Scy	}
288330567Sgordon	cp->nend++;
289330567Sgordon	pp->nend++;
290330567Sgordon
291275970Scy	if (error == ENOMEM) {
292330567Sgordon		if (bootverbose)
293330567Sgordon			printf("ENOMEM %p on %p(%s)\n", bp, pp, pp->name);
294330567Sgordon		g_io_request(bp, cp);
295330567Sgordon		pace++;
296330567Sgordon		return;
297330567Sgordon	}
298330567Sgordon	bp->bio_error = error;
299330567Sgordon	g_bioq_enqueue_tail(bp, &g_bio_run_up);
300330567Sgordon	wakeup(&g_wait_up);
301275970Scy}
302330567Sgordon
303330567Sgordonvoid
304330567Sgordong_io_schedule_down(struct thread *tp __unused)
305330567Sgordon{
306330567Sgordon	struct bio *bp;
307330567Sgordon	off_t excess;
308330567Sgordon	int error;
309330567Sgordon	struct mtx mymutex;
310330567Sgordon
311330567Sgordon	bzero(&mymutex, sizeof mymutex);
312330567Sgordon	mtx_init(&mymutex, "g_xdown", MTX_DEF, 0);
313330567Sgordon
314330567Sgordon	for(;;) {
315330567Sgordon		g_bioq_lock(&g_bio_run_down);
316330567Sgordon		bp = g_bioq_first(&g_bio_run_down);
317330567Sgordon		if (bp == NULL) {
318330567Sgordon			msleep(&g_wait_down, &g_bio_run_down.bio_queue_lock,
319330567Sgordon			    PRIBIO | PDROP, "-", hz/10);
320330567Sgordon			continue;
321275970Scy		}
322330567Sgordon		g_bioq_unlock(&g_bio_run_down);
323330567Sgordon		if (pace > 0) {
324330567Sgordon			msleep(&error, NULL, PRIBIO, "g_down", hz/10);
325330567Sgordon			pace--;
326330567Sgordon		}
327330567Sgordon		error = g_io_check(bp);
328330567Sgordon		if (error) {
329330567Sgordon			g_io_deliver(bp, error);
330330567Sgordon			continue;
331330567Sgordon		}
332275970Scy		switch (bp->bio_cmd) {
333330567Sgordon		case BIO_READ:
334330567Sgordon		case BIO_WRITE:
335330567Sgordon		case BIO_DELETE:
336275970Scy			/* Truncate requests to the end of providers media. */
337330567Sgordon			excess = bp->bio_offset + bp->bio_length;
338330567Sgordon			if (excess > bp->bio_to->mediasize) {
339330567Sgordon				excess -= bp->bio_to->mediasize;
340330567Sgordon				bp->bio_length -= excess;
341330567Sgordon			}
342330567Sgordon			/* Deliver zero length transfers right here. */
343330567Sgordon			if (bp->bio_length == 0) {
344330567Sgordon				g_io_deliver(bp, 0);
345330567Sgordon				continue;
346330567Sgordon			}
347330567Sgordon			break;
348330567Sgordon		default:
349330567Sgordon			break;
350330567Sgordon		}
351330567Sgordon		mtx_lock(&mymutex);
352330567Sgordon		bp->bio_to->geom->start(bp);
353330567Sgordon		mtx_unlock(&mymutex);
354330567Sgordon	}
355330567Sgordon}
356275970Scy
357330567Sgordonvoid
358330567Sgordong_io_schedule_up(struct thread *tp __unused)
359330567Sgordon{
360330567Sgordon	struct bio *bp;
361275970Scy	struct mtx mymutex;
362275970Scy
363275970Scy	bzero(&mymutex, sizeof mymutex);
364275970Scy	mtx_init(&mymutex, "g_xup", MTX_DEF, 0);
365275970Scy	for(;;) {
366275970Scy		g_bioq_lock(&g_bio_run_up);
367330567Sgordon		bp = g_bioq_first(&g_bio_run_up);
368330567Sgordon		if (bp != NULL) {
369330567Sgordon			g_bioq_unlock(&g_bio_run_up);
370330567Sgordon			mtx_lock(&mymutex);
371330567Sgordon			biodone(bp);
372330567Sgordon			mtx_unlock(&mymutex);
373330567Sgordon			continue;
374330567Sgordon		}
375275970Scy		msleep(&g_wait_up, &g_bio_run_up.bio_queue_lock,
376275970Scy		    PRIBIO | PDROP, "-", hz/10);
377275970Scy	}
378275970Scy}
379275970Scy
380275970Scyvoid *
381275970Scyg_read_data(struct g_consumer *cp, off_t offset, off_t length, int *error)
382275970Scy{
383275970Scy	struct bio *bp;
384275970Scy	void *ptr;
385275970Scy	int errorc;
386275970Scy
387275970Scy	KASSERT(length >= 512 && length <= DFLTPHYS,
388275970Scy		("g_read_data(): invalid length %jd", (intmax_t)length));
389275970Scy
390275970Scy	bp = g_new_bio();
391275970Scy	bp->bio_cmd = BIO_READ;
392330567Sgordon	bp->bio_done = NULL;
393275970Scy	bp->bio_offset = offset;
394275970Scy	bp->bio_length = length;
395275970Scy	ptr = g_malloc(length, M_WAITOK);
396275970Scy	bp->bio_data = ptr;
397275970Scy	g_io_request(bp, cp);
398275970Scy	errorc = biowait(bp, "gread");
399275970Scy	if (error != NULL)
400275970Scy		*error = errorc;
401275970Scy	g_destroy_bio(bp);
402275970Scy	if (errorc) {
403275970Scy		g_free(ptr);
404275970Scy		ptr = NULL;
405275970Scy	}
406275970Scy	return (ptr);
407275970Scy}
408275970Scy
409275970Scyint
410275970Scyg_write_data(struct g_consumer *cp, off_t offset, void *ptr, off_t length)
411275970Scy{
412275970Scy	struct bio *bp;
413275970Scy	int error;
414275970Scy
415275970Scy	KASSERT(length >= 512 && length <= DFLTPHYS,
416275970Scy		("g_write_data(): invalid length %jd", (intmax_t)length));
417330567Sgordon
418330567Sgordon	bp = g_new_bio();
419330567Sgordon	bp->bio_cmd = BIO_WRITE;
420275970Scy	bp->bio_done = NULL;
421275970Scy	bp->bio_offset = offset;
422275970Scy	bp->bio_length = length;
423275970Scy	bp->bio_data = ptr;
424275970Scy	g_io_request(bp, cp);
425275970Scy	error = biowait(bp, "gwrite");
426275970Scy	g_destroy_bio(bp);
427275970Scy	return (error);
428330567Sgordon}
429275970Scy