192108Sphk/*-
292108Sphk * Copyright (c) 2002 Poul-Henning Kamp
392108Sphk * Copyright (c) 2002 Networks Associates Technology, Inc.
4248508Skib * Copyright (c) 2013 The FreeBSD Foundation
592108Sphk * All rights reserved.
692108Sphk *
792108Sphk * This software was developed for the FreeBSD Project by Poul-Henning Kamp
892108Sphk * and NAI Labs, the Security Research Division of Network Associates, Inc.
992108Sphk * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
1092108Sphk * DARPA CHATS research program.
1192108Sphk *
12248508Skib * Portions of this software were developed by Konstantin Belousov
13248508Skib * under sponsorship from the FreeBSD Foundation.
14248508Skib *
1592108Sphk * Redistribution and use in source and binary forms, with or without
1692108Sphk * modification, are permitted provided that the following conditions
1792108Sphk * are met:
1892108Sphk * 1. Redistributions of source code must retain the above copyright
1992108Sphk *    notice, this list of conditions and the following disclaimer.
2092108Sphk * 2. Redistributions in binary form must reproduce the above copyright
2192108Sphk *    notice, this list of conditions and the following disclaimer in the
2292108Sphk *    documentation and/or other materials provided with the distribution.
2392108Sphk * 3. The names of the authors may not be used to endorse or promote
2492108Sphk *    products derived from this software without specific prior written
2592108Sphk *    permission.
2692108Sphk *
2792108Sphk * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
2892108Sphk * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2992108Sphk * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
3092108Sphk * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
3192108Sphk * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
3292108Sphk * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
3392108Sphk * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
3492108Sphk * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
3592108Sphk * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
3692108Sphk * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3792108Sphk * SUCH DAMAGE.
3892108Sphk */
3992108Sphk
40116196Sobrien#include <sys/cdefs.h>
41116196Sobrien__FBSDID("$FreeBSD$");
4292108Sphk
4392108Sphk#include <sys/param.h>
4492108Sphk#include <sys/systm.h>
4592108Sphk#include <sys/kernel.h>
4692108Sphk#include <sys/malloc.h>
4792108Sphk#include <sys/bio.h>
48136755Srwatson#include <sys/ktr.h>
49150177Sjhb#include <sys/proc.h>
50149576Spjd#include <sys/stack.h>
51248508Skib#include <sys/sysctl.h>
52252330Sjeff#include <sys/vmem.h>
5392108Sphk
5492108Sphk#include <sys/errno.h>
5592108Sphk#include <geom/geom.h>
5693250Sphk#include <geom/geom_int.h>
57112370Sphk#include <sys/devicestat.h>
5892108Sphk
59114526Sphk#include <vm/uma.h>
60248508Skib#include <vm/vm.h>
61248508Skib#include <vm/vm_param.h>
62248508Skib#include <vm/vm_kern.h>
63248508Skib#include <vm/vm_page.h>
64248508Skib#include <vm/vm_object.h>
65248508Skib#include <vm/vm_extern.h>
66248508Skib#include <vm/vm_map.h>
67114526Sphk
68256880Smavstatic int	g_io_transient_map_bio(struct bio *bp);
69256880Smav
7092108Sphkstatic struct g_bioq g_bio_run_down;
7192108Sphkstatic struct g_bioq g_bio_run_up;
72125137Sphkstatic struct g_bioq g_bio_run_task;
7392108Sphk
74287405Simp/*
75287405Simp * Pace is a hint that we've had some trouble recently allocating
76287405Simp * bios, so we should back off trying to send I/O down the stack
77287405Simp * a bit to let the problem resolve. When pacing, we also turn
78287405Simp * off direct dispatch to also reduce memory pressure from I/Os
79287405Simp * there, at the expxense of some added latency while the memory
80287405Simp * pressures exist. See g_io_schedule_down() for more details
81287405Simp * and limitations.
82287405Simp */
83287405Simpstatic volatile u_int pace;
84287405Simp
85114526Sphkstatic uma_zone_t	biozone;
86106338Sphk
87193981Sluigi/*
88193981Sluigi * The head of the list of classifiers used in g_io_request.
89193981Sluigi * Use g_register_classifier() and g_unregister_classifier()
90193981Sluigi * to add/remove entries to the list.
91193981Sluigi * Classifiers are invoked in registration order.
92193981Sluigi */
93193981Sluigistatic TAILQ_HEAD(g_classifier_tailq, g_classifier_hook)
94193981Sluigi    g_classifier_tailq = TAILQ_HEAD_INITIALIZER(g_classifier_tailq);
95193981Sluigi
9692108Sphk#include <machine/atomic.h>
9792108Sphk
9892108Sphkstatic void
9992108Sphkg_bioq_lock(struct g_bioq *bq)
10092108Sphk{
10192108Sphk
10292108Sphk	mtx_lock(&bq->bio_queue_lock);
10392108Sphk}
10492108Sphk
10592108Sphkstatic void
10692108Sphkg_bioq_unlock(struct g_bioq *bq)
10792108Sphk{
10892108Sphk
10992108Sphk	mtx_unlock(&bq->bio_queue_lock);
11092108Sphk}
11192108Sphk
11292108Sphk#if 0
11392108Sphkstatic void
11492108Sphkg_bioq_destroy(struct g_bioq *bq)
11592108Sphk{
11692108Sphk
11792108Sphk	mtx_destroy(&bq->bio_queue_lock);
11892108Sphk}
11992108Sphk#endif
12092108Sphk
12192108Sphkstatic void
12292108Sphkg_bioq_init(struct g_bioq *bq)
12392108Sphk{
12492108Sphk
12592108Sphk	TAILQ_INIT(&bq->bio_queue);
12693818Sjhb	mtx_init(&bq->bio_queue_lock, "bio queue", NULL, MTX_DEF);
12792108Sphk}
12892108Sphk
12992108Sphkstatic struct bio *
13092108Sphkg_bioq_first(struct g_bioq *bq)
13192108Sphk{
13292108Sphk	struct bio *bp;
13392108Sphk
13492108Sphk	bp = TAILQ_FIRST(&bq->bio_queue);
13592108Sphk	if (bp != NULL) {
136134519Sphk		KASSERT((bp->bio_flags & BIO_ONQUEUE),
137134519Sphk		    ("Bio not on queue bp=%p target %p", bp, bq));
138134519Sphk		bp->bio_flags &= ~BIO_ONQUEUE;
13992108Sphk		TAILQ_REMOVE(&bq->bio_queue, bp, bio_queue);
14092108Sphk		bq->bio_queue_length--;
14192108Sphk	}
14292108Sphk	return (bp);
14392108Sphk}
14492108Sphk
14592108Sphkstruct bio *
14692108Sphkg_new_bio(void)
14792108Sphk{
14892108Sphk	struct bio *bp;
14992108Sphk
150114526Sphk	bp = uma_zalloc(biozone, M_NOWAIT | M_ZERO);
151149576Spjd#ifdef KTR
152173001Spjd	if ((KTR_COMPILE & KTR_GEOM) && (ktr_mask & KTR_GEOM)) {
153149576Spjd		struct stack st;
154149576Spjd
155149576Spjd		CTR1(KTR_GEOM, "g_new_bio(): %p", bp);
156149576Spjd		stack_save(&st);
157149576Spjd		CTRSTACK(KTR_GEOM, &st, 3, 0);
158149576Spjd	}
159149576Spjd#endif
16092108Sphk	return (bp);
16192108Sphk}
16292108Sphk
163134379Sphkstruct bio *
164134379Sphkg_alloc_bio(void)
165134379Sphk{
166134379Sphk	struct bio *bp;
167134379Sphk
168134379Sphk	bp = uma_zalloc(biozone, M_WAITOK | M_ZERO);
169149576Spjd#ifdef KTR
170173001Spjd	if ((KTR_COMPILE & KTR_GEOM) && (ktr_mask & KTR_GEOM)) {
171149576Spjd		struct stack st;
172149576Spjd
173149576Spjd		CTR1(KTR_GEOM, "g_alloc_bio(): %p", bp);
174149576Spjd		stack_save(&st);
175149576Spjd		CTRSTACK(KTR_GEOM, &st, 3, 0);
176149576Spjd	}
177149576Spjd#endif
178134379Sphk	return (bp);
179134379Sphk}
180134379Sphk
18192108Sphkvoid
18292108Sphkg_destroy_bio(struct bio *bp)
18392108Sphk{
184149576Spjd#ifdef KTR
185173001Spjd	if ((KTR_COMPILE & KTR_GEOM) && (ktr_mask & KTR_GEOM)) {
186149576Spjd		struct stack st;
18792108Sphk
188149576Spjd		CTR1(KTR_GEOM, "g_destroy_bio(): %p", bp);
189149576Spjd		stack_save(&st);
190149576Spjd		CTRSTACK(KTR_GEOM, &st, 3, 0);
191149576Spjd	}
192149576Spjd#endif
193114526Sphk	uma_zfree(biozone, bp);
19492108Sphk}
19592108Sphk
19692108Sphkstruct bio *
19792108Sphkg_clone_bio(struct bio *bp)
19892108Sphk{
19992108Sphk	struct bio *bp2;
20092108Sphk
201114526Sphk	bp2 = uma_zalloc(biozone, M_NOWAIT | M_ZERO);
202104058Sphk	if (bp2 != NULL) {
203110517Sphk		bp2->bio_parent = bp;
204104058Sphk		bp2->bio_cmd = bp->bio_cmd;
205239132Sjimharris		/*
206239132Sjimharris		 *  BIO_ORDERED flag may be used by disk drivers to enforce
207239132Sjimharris		 *  ordering restrictions, so this flag needs to be cloned.
208291716Sken		 *  BIO_UNMAPPED and BIO_VLIST should be inherited, to properly
209291716Sken		 *  indicate which way the buffer is passed.
210239132Sjimharris		 *  Other bio flags are not suitable for cloning.
211239132Sjimharris		 */
212291716Sken		bp2->bio_flags = bp->bio_flags &
213291716Sken		    (BIO_ORDERED | BIO_UNMAPPED | BIO_VLIST);
214104058Sphk		bp2->bio_length = bp->bio_length;
215104058Sphk		bp2->bio_offset = bp->bio_offset;
216104058Sphk		bp2->bio_data = bp->bio_data;
217248508Skib		bp2->bio_ma = bp->bio_ma;
218248508Skib		bp2->bio_ma_n = bp->bio_ma_n;
219248508Skib		bp2->bio_ma_offset = bp->bio_ma_offset;
220104058Sphk		bp2->bio_attribute = bp->bio_attribute;
221300207Sken		if (bp->bio_cmd == BIO_ZONE)
222300207Sken			bcopy(&bp->bio_zone, &bp2->bio_zone,
223300207Sken			    sizeof(bp->bio_zone));
224193981Sluigi		/* Inherit classification info from the parent */
225193981Sluigi		bp2->bio_classifier1 = bp->bio_classifier1;
226193981Sluigi		bp2->bio_classifier2 = bp->bio_classifier2;
227110523Sphk		bp->bio_children++;
228104058Sphk	}
229149576Spjd#ifdef KTR
230173001Spjd	if ((KTR_COMPILE & KTR_GEOM) && (ktr_mask & KTR_GEOM)) {
231149576Spjd		struct stack st;
232149576Spjd
233156686Sru		CTR2(KTR_GEOM, "g_clone_bio(%p): %p", bp, bp2);
234149576Spjd		stack_save(&st);
235149576Spjd		CTRSTACK(KTR_GEOM, &st, 3, 0);
236149576Spjd	}
237149576Spjd#endif
23892108Sphk	return(bp2);
23992108Sphk}
24092108Sphk
241159304Spjdstruct bio *
242159304Spjdg_duplicate_bio(struct bio *bp)
243159304Spjd{
244159304Spjd	struct bio *bp2;
245159304Spjd
246159304Spjd	bp2 = uma_zalloc(biozone, M_WAITOK | M_ZERO);
247291716Sken	bp2->bio_flags = bp->bio_flags & (BIO_UNMAPPED | BIO_VLIST);
248159304Spjd	bp2->bio_parent = bp;
249159304Spjd	bp2->bio_cmd = bp->bio_cmd;
250159304Spjd	bp2->bio_length = bp->bio_length;
251159304Spjd	bp2->bio_offset = bp->bio_offset;
252159304Spjd	bp2->bio_data = bp->bio_data;
253248508Skib	bp2->bio_ma = bp->bio_ma;
254248508Skib	bp2->bio_ma_n = bp->bio_ma_n;
255248508Skib	bp2->bio_ma_offset = bp->bio_ma_offset;
256159304Spjd	bp2->bio_attribute = bp->bio_attribute;
257159304Spjd	bp->bio_children++;
258159304Spjd#ifdef KTR
259173001Spjd	if ((KTR_COMPILE & KTR_GEOM) && (ktr_mask & KTR_GEOM)) {
260159304Spjd		struct stack st;
261159304Spjd
262159304Spjd		CTR2(KTR_GEOM, "g_duplicate_bio(%p): %p", bp, bp2);
263159304Spjd		stack_save(&st);
264159304Spjd		CTRSTACK(KTR_GEOM, &st, 3, 0);
265159304Spjd	}
266159304Spjd#endif
267159304Spjd	return(bp2);
268159304Spjd}
269159304Spjd
27092108Sphkvoid
271295707Simpg_reset_bio(struct bio *bp)
272295707Simp{
273295707Simp
274295712Simp	bzero(bp, sizeof(*bp));
275295707Simp}
276295707Simp
277295707Simpvoid
27892108Sphkg_io_init()
27992108Sphk{
28092108Sphk
28192108Sphk	g_bioq_init(&g_bio_run_down);
28292108Sphk	g_bioq_init(&g_bio_run_up);
283125137Sphk	g_bioq_init(&g_bio_run_task);
284114526Sphk	biozone = uma_zcreate("g_bio", sizeof (struct bio),
285114526Sphk	    NULL, NULL,
286114526Sphk	    NULL, NULL,
287114526Sphk	    0, 0);
28892108Sphk}
28992108Sphk
29092108Sphkint
29194283Sphkg_io_getattr(const char *attr, struct g_consumer *cp, int *len, void *ptr)
29292108Sphk{
29392108Sphk	struct bio *bp;
29492108Sphk	int error;
29592108Sphk
29692108Sphk	g_trace(G_T_BIO, "bio_getattr(%s)", attr);
297134379Sphk	bp = g_alloc_bio();
298104665Sphk	bp->bio_cmd = BIO_GETATTR;
299104665Sphk	bp->bio_done = NULL;
300104665Sphk	bp->bio_attribute = attr;
301104665Sphk	bp->bio_length = *len;
302104665Sphk	bp->bio_data = ptr;
303104665Sphk	g_io_request(bp, cp);
304104665Sphk	error = biowait(bp, "ggetattr");
305104665Sphk	*len = bp->bio_completed;
306104665Sphk	g_destroy_bio(bp);
30792108Sphk	return (error);
30892108Sphk}
30992108Sphk
310163832Spjdint
311300207Skeng_io_zonecmd(struct disk_zone_args *zone_args, struct g_consumer *cp)
312300207Sken{
313300207Sken	struct bio *bp;
314300207Sken	int error;
315300207Sken
316300207Sken	g_trace(G_T_BIO, "bio_zone(%d)", zone_args->zone_cmd);
317300207Sken	bp = g_alloc_bio();
318300207Sken	bp->bio_cmd = BIO_ZONE;
319300207Sken	bp->bio_done = NULL;
320300207Sken	/*
321300207Sken	 * XXX KDM need to handle report zone data.
322300207Sken	 */
323300207Sken	bcopy(zone_args, &bp->bio_zone, sizeof(*zone_args));
324300207Sken	if (zone_args->zone_cmd == DISK_ZONE_REPORT_ZONES)
325300207Sken		bp->bio_length =
326300207Sken		    zone_args->zone_params.report.entries_allocated *
327300207Sken		    sizeof(struct disk_zone_rep_entry);
328300207Sken	else
329300207Sken		bp->bio_length = 0;
330300207Sken
331300207Sken	g_io_request(bp, cp);
332300207Sken	error = biowait(bp, "gzone");
333300207Sken	bcopy(&bp->bio_zone, zone_args, sizeof(*zone_args));
334300207Sken	g_destroy_bio(bp);
335300207Sken	return (error);
336300207Sken}
337300207Sken
338300207Skenint
339163832Spjdg_io_flush(struct g_consumer *cp)
340163832Spjd{
341163832Spjd	struct bio *bp;
342163832Spjd	int error;
343163832Spjd
344163832Spjd	g_trace(G_T_BIO, "bio_flush(%s)", cp->provider->name);
345163832Spjd	bp = g_alloc_bio();
346163832Spjd	bp->bio_cmd = BIO_FLUSH;
347212160Sgibbs	bp->bio_flags |= BIO_ORDERED;
348163832Spjd	bp->bio_done = NULL;
349163832Spjd	bp->bio_attribute = NULL;
350163832Spjd	bp->bio_offset = cp->provider->mediasize;
351163832Spjd	bp->bio_length = 0;
352163832Spjd	bp->bio_data = NULL;
353163832Spjd	g_io_request(bp, cp);
354163832Spjd	error = biowait(bp, "gflush");
355163832Spjd	g_destroy_bio(bp);
356163832Spjd	return (error);
357163832Spjd}
358163832Spjd
359110471Sphkstatic int
360110471Sphkg_io_check(struct bio *bp)
36192108Sphk{
362110471Sphk	struct g_consumer *cp;
363110471Sphk	struct g_provider *pp;
364256880Smav	off_t excess;
365256880Smav	int error;
36692108Sphk
367110471Sphk	cp = bp->bio_from;
368110471Sphk	pp = bp->bio_to;
36992108Sphk
370110471Sphk	/* Fail if access counters dont allow the operation */
37193778Sphk	switch(bp->bio_cmd) {
37293778Sphk	case BIO_READ:
37393778Sphk	case BIO_GETATTR:
374110471Sphk		if (cp->acr == 0)
375110471Sphk			return (EPERM);
37693778Sphk		break;
37793778Sphk	case BIO_WRITE:
37893778Sphk	case BIO_DELETE:
379163832Spjd	case BIO_FLUSH:
380110471Sphk		if (cp->acw == 0)
381110471Sphk			return (EPERM);
38293778Sphk		break;
383300207Sken	case BIO_ZONE:
384300207Sken		if ((bp->bio_zone.zone_cmd == DISK_ZONE_REPORT_ZONES) ||
385300207Sken		    (bp->bio_zone.zone_cmd == DISK_ZONE_GET_PARAMS)) {
386300207Sken			if (cp->acr == 0)
387300207Sken				return (EPERM);
388300207Sken		} else if (cp->acw == 0)
389300207Sken			return (EPERM);
390300207Sken		break;
39193778Sphk	default:
392110471Sphk		return (EPERM);
39393778Sphk	}
39493778Sphk	/* if provider is marked for error, don't disturb. */
395110471Sphk	if (pp->error)
396110471Sphk		return (pp->error);
397238886Smav	if (cp->flags & G_CF_ORPHAN)
398238886Smav		return (ENXIO);
399110471Sphk
40093778Sphk	switch(bp->bio_cmd) {
40193778Sphk	case BIO_READ:
40293778Sphk	case BIO_WRITE:
40393778Sphk	case BIO_DELETE:
404206650Savg		/* Zero sectorsize or mediasize is probably a lack of media. */
405206650Savg		if (pp->sectorsize == 0 || pp->mediasize == 0)
406121323Sphk			return (ENXIO);
407108051Sphk		/* Reject I/O not on sector boundary */
408110471Sphk		if (bp->bio_offset % pp->sectorsize)
409110471Sphk			return (EINVAL);
410108051Sphk		/* Reject I/O not integral sector long */
411110471Sphk		if (bp->bio_length % pp->sectorsize)
412110471Sphk			return (EINVAL);
413121253Sphk		/* Reject requests before or past the end of media. */
414121253Sphk		if (bp->bio_offset < 0)
415121253Sphk			return (EIO);
416110471Sphk		if (bp->bio_offset > pp->mediasize)
417110471Sphk			return (EIO);
418256880Smav
419256880Smav		/* Truncate requests to the end of providers media. */
420256880Smav		excess = bp->bio_offset + bp->bio_length;
421256880Smav		if (excess > bp->bio_to->mediasize) {
422256880Smav			KASSERT((bp->bio_flags & BIO_UNMAPPED) == 0 ||
423256880Smav			    round_page(bp->bio_ma_offset +
424256880Smav			    bp->bio_length) / PAGE_SIZE == bp->bio_ma_n,
425256880Smav			    ("excess bio %p too short", bp));
426256880Smav			excess -= bp->bio_to->mediasize;
427256880Smav			bp->bio_length -= excess;
428256880Smav			if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
429256880Smav				bp->bio_ma_n = round_page(bp->bio_ma_offset +
430256880Smav				    bp->bio_length) / PAGE_SIZE;
431256880Smav			}
432256880Smav			if (excess > 0)
433256880Smav				CTR3(KTR_GEOM, "g_down truncated bio "
434256880Smav				    "%p provider %s by %d", bp,
435256880Smav				    bp->bio_to->name, excess);
436256880Smav		}
437256880Smav
438256880Smav		/* Deliver zero length transfers right here. */
439256880Smav		if (bp->bio_length == 0) {
440256880Smav			CTR2(KTR_GEOM, "g_down terminated 0-length "
441256880Smav			    "bp %p provider %s", bp, bp->bio_to->name);
442256880Smav			return (0);
443256880Smav		}
444256880Smav
445256880Smav		if ((bp->bio_flags & BIO_UNMAPPED) != 0 &&
446256880Smav		    (bp->bio_to->flags & G_PF_ACCEPT_UNMAPPED) == 0 &&
447256880Smav		    (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE)) {
448256880Smav			if ((error = g_io_transient_map_bio(bp)) >= 0)
449256880Smav				return (error);
450256880Smav		}
45193778Sphk		break;
45293778Sphk	default:
45393778Sphk		break;
45492108Sphk	}
455256880Smav	return (EJUSTRETURN);
456110471Sphk}
457110471Sphk
458193981Sluigi/*
459193981Sluigi * bio classification support.
460193981Sluigi *
461193981Sluigi * g_register_classifier() and g_unregister_classifier()
462193981Sluigi * are used to add/remove a classifier from the list.
463193981Sluigi * The list is protected using the g_bio_run_down lock,
464193981Sluigi * because the classifiers are called in this path.
465193981Sluigi *
466193981Sluigi * g_io_request() passes bio's that are not already classified
467193981Sluigi * (i.e. those with bio_classifier1 == NULL) to g_run_classifiers().
468193981Sluigi * Classifiers can store their result in the two fields
469193981Sluigi * bio_classifier1 and bio_classifier2.
470193981Sluigi * A classifier that updates one of the fields should
471193981Sluigi * return a non-zero value.
472193981Sluigi * If no classifier updates the field, g_run_classifiers() sets
473193981Sluigi * bio_classifier1 = BIO_NOTCLASSIFIED to avoid further calls.
474193981Sluigi */
475193981Sluigi
476193981Sluigiint
477193981Sluigig_register_classifier(struct g_classifier_hook *hook)
478193981Sluigi{
479193981Sluigi
480193981Sluigi	g_bioq_lock(&g_bio_run_down);
481193981Sluigi	TAILQ_INSERT_TAIL(&g_classifier_tailq, hook, link);
482193981Sluigi	g_bioq_unlock(&g_bio_run_down);
483193981Sluigi
484193981Sluigi	return (0);
485193981Sluigi}
486193981Sluigi
487110471Sphkvoid
488193981Sluigig_unregister_classifier(struct g_classifier_hook *hook)
489193981Sluigi{
490193981Sluigi	struct g_classifier_hook *entry;
491193981Sluigi
492193981Sluigi	g_bioq_lock(&g_bio_run_down);
493193981Sluigi	TAILQ_FOREACH(entry, &g_classifier_tailq, link) {
494193981Sluigi		if (entry == hook) {
495193981Sluigi			TAILQ_REMOVE(&g_classifier_tailq, hook, link);
496193981Sluigi			break;
497193981Sluigi		}
498193981Sluigi	}
499193981Sluigi	g_bioq_unlock(&g_bio_run_down);
500193981Sluigi}
501193981Sluigi
502193981Sluigistatic void
503193981Sluigig_run_classifiers(struct bio *bp)
504193981Sluigi{
505193981Sluigi	struct g_classifier_hook *hook;
506193981Sluigi	int classified = 0;
507193981Sluigi
508193981Sluigi	TAILQ_FOREACH(hook, &g_classifier_tailq, link)
509193981Sluigi		classified |= hook->func(hook->arg, bp);
510193981Sluigi
511193981Sluigi	if (!classified)
512193981Sluigi		bp->bio_classifier1 = BIO_NOTCLASSIFIED;
513193981Sluigi}
514193981Sluigi
515193981Sluigivoid
516110471Sphkg_io_request(struct bio *bp, struct g_consumer *cp)
517110471Sphk{
518110523Sphk	struct g_provider *pp;
519256880Smav	struct mtx *mtxp;
520256880Smav	int direct, error, first;
521296605Simp	uint8_t cmd;
522110471Sphk
523110471Sphk	KASSERT(cp != NULL, ("NULL cp in g_io_request"));
524110471Sphk	KASSERT(bp != NULL, ("NULL bp in g_io_request"));
525119973Sphk	pp = cp->provider;
526110523Sphk	KASSERT(pp != NULL, ("consumer not attached in g_io_request"));
527156170Spjd#ifdef DIAGNOSTIC
528156170Spjd	KASSERT(bp->bio_driver1 == NULL,
529156170Spjd	    ("bio_driver1 used by the consumer (geom %s)", cp->geom->name));
530156170Spjd	KASSERT(bp->bio_driver2 == NULL,
531156170Spjd	    ("bio_driver2 used by the consumer (geom %s)", cp->geom->name));
532156170Spjd	KASSERT(bp->bio_pflags == 0,
533156170Spjd	    ("bio_pflags used by the consumer (geom %s)", cp->geom->name));
534156170Spjd	/*
535156170Spjd	 * Remember consumer's private fields, so we can detect if they were
536156170Spjd	 * modified by the provider.
537156170Spjd	 */
538156170Spjd	bp->_bio_caller1 = bp->bio_caller1;
539156170Spjd	bp->_bio_caller2 = bp->bio_caller2;
540156170Spjd	bp->_bio_cflags = bp->bio_cflags;
541156170Spjd#endif
542110523Sphk
543296605Simp	cmd = bp->bio_cmd;
544296605Simp	if (cmd == BIO_READ || cmd == BIO_WRITE || cmd == BIO_GETATTR) {
545163832Spjd		KASSERT(bp->bio_data != NULL,
546297955Simp		    ("NULL bp->data in g_io_request(cmd=%hu)", bp->bio_cmd));
547163832Spjd	}
548296605Simp	if (cmd == BIO_DELETE || cmd == BIO_FLUSH) {
549166325Spjd		KASSERT(bp->bio_data == NULL,
550297955Simp		    ("non-NULL bp->data in g_io_request(cmd=%hu)",
551166325Spjd		    bp->bio_cmd));
552166325Spjd	}
553296605Simp	if (cmd == BIO_READ || cmd == BIO_WRITE || cmd == BIO_DELETE) {
554134519Sphk		KASSERT(bp->bio_offset % cp->provider->sectorsize == 0,
555134519Sphk		    ("wrong offset %jd for sectorsize %u",
556134519Sphk		    bp->bio_offset, cp->provider->sectorsize));
557134519Sphk		KASSERT(bp->bio_length % cp->provider->sectorsize == 0,
558134519Sphk		    ("wrong length %jd for sectorsize %u",
559134519Sphk		    bp->bio_length, cp->provider->sectorsize));
560134519Sphk	}
561134519Sphk
562136399Sups	g_trace(G_T_BIO, "bio_request(%p) from %p(%s) to %p(%s) cmd %d",
563136399Sups	    bp, cp, cp->geom->name, pp, pp->name, bp->bio_cmd);
564136399Sups
565110471Sphk	bp->bio_from = cp;
566110523Sphk	bp->bio_to = pp;
567110471Sphk	bp->bio_error = 0;
568110471Sphk	bp->bio_completed = 0;
569110471Sphk
570135876Sphk	KASSERT(!(bp->bio_flags & BIO_ONQUEUE),
571135876Sphk	    ("Bio already on queue bp=%p", bp));
572256880Smav	if ((g_collectstats & G_STATS_CONSUMERS) != 0 ||
573256880Smav	    ((g_collectstats & G_STATS_PROVIDERS) != 0 && pp->stat != NULL))
574205619Smav		binuptime(&bp->bio_t0);
575205619Smav	else
576205619Smav		getbinuptime(&bp->bio_t0);
577148410Sphk
578256880Smav#ifdef GET_STACK_USAGE
579286405Skib	direct = (cp->flags & G_CF_DIRECT_SEND) != 0 &&
580286405Skib	    (pp->flags & G_PF_DIRECT_RECEIVE) != 0 &&
581286405Skib	    !g_is_geom_thread(curthread) &&
582286405Skib	    ((pp->flags & G_PF_ACCEPT_UNMAPPED) != 0 ||
583287405Simp	    (bp->bio_flags & BIO_UNMAPPED) == 0 || THREAD_CAN_SLEEP()) &&
584287405Simp	    pace == 0;
585256880Smav	if (direct) {
586256880Smav		/* Block direct execution if less then half of stack left. */
587256880Smav		size_t	st, su;
588256880Smav		GET_STACK_USAGE(st, su);
589256880Smav		if (su * 2 > st)
590256880Smav			direct = 0;
591256880Smav	}
592256880Smav#else
593256880Smav	direct = 0;
594256880Smav#endif
595256880Smav
596256880Smav	if (!TAILQ_EMPTY(&g_classifier_tailq) && !bp->bio_classifier1) {
597256880Smav		g_bioq_lock(&g_bio_run_down);
598256880Smav		g_run_classifiers(bp);
599256880Smav		g_bioq_unlock(&g_bio_run_down);
600256880Smav	}
601256880Smav
602148410Sphk	/*
603148410Sphk	 * The statistics collection is lockless, as such, but we
604148410Sphk	 * can not update one instance of the statistics from more
605148410Sphk	 * than one thread at a time, so grab the lock first.
606148410Sphk	 */
607256880Smav	mtxp = mtx_pool_find(mtxpool_sleep, pp);
608256880Smav	mtx_lock(mtxp);
609256880Smav	if (g_collectstats & G_STATS_PROVIDERS)
610135876Sphk		devstat_start_transaction(pp->stat, &bp->bio_t0);
611256880Smav	if (g_collectstats & G_STATS_CONSUMERS)
612135876Sphk		devstat_start_transaction(cp->stat, &bp->bio_t0);
613130280Sphk	pp->nstart++;
614112027Sphk	cp->nstart++;
615256880Smav	mtx_unlock(mtxp);
616110471Sphk
617256880Smav	if (direct) {
618256880Smav		error = g_io_check(bp);
619256880Smav		if (error >= 0) {
620256880Smav			CTR3(KTR_GEOM, "g_io_request g_io_check on bp %p "
621256880Smav			    "provider %s returned %d", bp, bp->bio_to->name,
622256880Smav			    error);
623256880Smav			g_io_deliver(bp, error);
624256880Smav			return;
625256880Smav		}
626256880Smav		bp->bio_to->geom->start(bp);
627256880Smav	} else {
628256880Smav		g_bioq_lock(&g_bio_run_down);
629256880Smav		first = TAILQ_EMPTY(&g_bio_run_down.bio_queue);
630256880Smav		TAILQ_INSERT_TAIL(&g_bio_run_down.bio_queue, bp, bio_queue);
631256880Smav		bp->bio_flags |= BIO_ONQUEUE;
632256880Smav		g_bio_run_down.bio_queue_length++;
633256880Smav		g_bioq_unlock(&g_bio_run_down);
634256880Smav		/* Pass it on down. */
635256880Smav		if (first)
636256880Smav			wakeup(&g_wait_down);
637256880Smav	}
63892108Sphk}
63992108Sphk
64092108Sphkvoid
641104195Sphkg_io_deliver(struct bio *bp, int error)
64292108Sphk{
643256603Smav	struct bintime now;
644110523Sphk	struct g_consumer *cp;
645110523Sphk	struct g_provider *pp;
646256880Smav	struct mtx *mtxp;
647256880Smav	int direct, first;
64892108Sphk
649119973Sphk	KASSERT(bp != NULL, ("NULL bp in g_io_deliver"));
650120852Sphk	pp = bp->bio_to;
651120852Sphk	KASSERT(pp != NULL, ("NULL bio_to in g_io_deliver"));
652110523Sphk	cp = bp->bio_from;
653120852Sphk	if (cp == NULL) {
654120852Sphk		bp->bio_error = error;
655120852Sphk		bp->bio_done(bp);
656120852Sphk		return;
657120852Sphk	}
658110523Sphk	KASSERT(cp != NULL, ("NULL bio_from in g_io_deliver"));
659110523Sphk	KASSERT(cp->geom != NULL, ("NULL bio_from->geom in g_io_deliver"));
660195195Strasz#ifdef DIAGNOSTIC
661195195Strasz	/*
662195195Strasz	 * Some classes - GJournal in particular - can modify bio's
663195195Strasz	 * private fields while the bio is in transit; G_GEOM_VOLATILE_BIO
664195195Strasz	 * flag means it's an expected behaviour for that particular geom.
665195195Strasz	 */
666195195Strasz	if ((cp->geom->flags & G_GEOM_VOLATILE_BIO) == 0) {
667195195Strasz		KASSERT(bp->bio_caller1 == bp->_bio_caller1,
668195195Strasz		    ("bio_caller1 used by the provider %s", pp->name));
669195195Strasz		KASSERT(bp->bio_caller2 == bp->_bio_caller2,
670195195Strasz		    ("bio_caller2 used by the provider %s", pp->name));
671195195Strasz		KASSERT(bp->bio_cflags == bp->_bio_cflags,
672195195Strasz		    ("bio_cflags used by the provider %s", pp->name));
673195195Strasz	}
674195195Strasz#endif
675127863Spjd	KASSERT(bp->bio_completed >= 0, ("bio_completed can't be less than 0"));
676127863Spjd	KASSERT(bp->bio_completed <= bp->bio_length,
677127863Spjd	    ("bio_completed can't be greater than bio_length"));
678108297Sphk
67992108Sphk	g_trace(G_T_BIO,
680108297Sphk"g_io_deliver(%p) from %p(%s) to %p(%s) cmd %d error %d off %jd len %jd",
681110523Sphk	    bp, cp, cp->geom->name, pp, pp->name, bp->bio_cmd, error,
682105506Sphk	    (intmax_t)bp->bio_offset, (intmax_t)bp->bio_length);
68392108Sphk
684135876Sphk	KASSERT(!(bp->bio_flags & BIO_ONQUEUE),
685135876Sphk	    ("Bio already on queue bp=%p", bp));
686135876Sphk
687134519Sphk	/*
688134519Sphk	 * XXX: next two doesn't belong here
689134519Sphk	 */
690112370Sphk	bp->bio_bcount = bp->bio_length;
691130280Sphk	bp->bio_resid = bp->bio_bcount - bp->bio_completed;
692135876Sphk
693256880Smav#ifdef GET_STACK_USAGE
694256880Smav	direct = (pp->flags & G_PF_DIRECT_SEND) &&
695256880Smav		 (cp->flags & G_CF_DIRECT_RECEIVE) &&
696256880Smav		 !g_is_geom_thread(curthread);
697256880Smav	if (direct) {
698256880Smav		/* Block direct execution if less then half of stack left. */
699256880Smav		size_t	st, su;
700256880Smav		GET_STACK_USAGE(st, su);
701256880Smav		if (su * 2 > st)
702256880Smav			direct = 0;
703256880Smav	}
704256880Smav#else
705256880Smav	direct = 0;
706256880Smav#endif
707256880Smav
708148410Sphk	/*
709148410Sphk	 * The statistics collection is lockless, as such, but we
710148410Sphk	 * can not update one instance of the statistics from more
711148410Sphk	 * than one thread at a time, so grab the lock first.
712148410Sphk	 */
713256880Smav	if ((g_collectstats & G_STATS_CONSUMERS) != 0 ||
714256880Smav	    ((g_collectstats & G_STATS_PROVIDERS) != 0 && pp->stat != NULL))
715256603Smav		binuptime(&now);
716256880Smav	mtxp = mtx_pool_find(mtxpool_sleep, cp);
717256880Smav	mtx_lock(mtxp);
718256880Smav	if (g_collectstats & G_STATS_PROVIDERS)
719256603Smav		devstat_end_transaction_bio_bt(pp->stat, bp, &now);
720256880Smav	if (g_collectstats & G_STATS_CONSUMERS)
721256603Smav		devstat_end_transaction_bio_bt(cp->stat, bp, &now);
722112027Sphk	cp->nend++;
723112027Sphk	pp->nend++;
724256880Smav	mtx_unlock(mtxp);
725256880Smav
726135876Sphk	if (error != ENOMEM) {
727135876Sphk		bp->bio_error = error;
728256880Smav		if (direct) {
729256880Smav			biodone(bp);
730256880Smav		} else {
731256880Smav			g_bioq_lock(&g_bio_run_up);
732256880Smav			first = TAILQ_EMPTY(&g_bio_run_up.bio_queue);
733256880Smav			TAILQ_INSERT_TAIL(&g_bio_run_up.bio_queue, bp, bio_queue);
734256880Smav			bp->bio_flags |= BIO_ONQUEUE;
735256880Smav			g_bio_run_up.bio_queue_length++;
736256880Smav			g_bioq_unlock(&g_bio_run_up);
737256880Smav			if (first)
738256880Smav				wakeup(&g_wait_up);
739256880Smav		}
740106338Sphk		return;
741106338Sphk	}
742135876Sphk
743135876Sphk	if (bootverbose)
744135876Sphk		printf("ENOMEM %p on %p(%s)\n", bp, pp, pp->name);
745135876Sphk	bp->bio_children = 0;
746135876Sphk	bp->bio_inbed = 0;
747244716Spjd	bp->bio_driver1 = NULL;
748244716Spjd	bp->bio_driver2 = NULL;
749244716Spjd	bp->bio_pflags = 0;
750135876Sphk	g_io_request(bp, cp);
751287405Simp	pace = 1;
752135876Sphk	return;
75392108Sphk}
75492108Sphk
755248508SkibSYSCTL_DECL(_kern_geom);
756248508Skib
757248508Skibstatic long transient_maps;
758248508SkibSYSCTL_LONG(_kern_geom, OID_AUTO, transient_maps, CTLFLAG_RD,
759248508Skib    &transient_maps, 0,
760248508Skib    "Total count of the transient mapping requests");
761248508Skibu_int transient_map_retries = 10;
762248508SkibSYSCTL_UINT(_kern_geom, OID_AUTO, transient_map_retries, CTLFLAG_RW,
763248508Skib    &transient_map_retries, 0,
764248508Skib    "Max count of retries used before giving up on creating transient map");
765248508Skibint transient_map_hard_failures;
766248508SkibSYSCTL_INT(_kern_geom, OID_AUTO, transient_map_hard_failures, CTLFLAG_RD,
767248508Skib    &transient_map_hard_failures, 0,
768248508Skib    "Failures to establish the transient mapping due to retry attempts "
769248508Skib    "exhausted");
770248508Skibint transient_map_soft_failures;
771248508SkibSYSCTL_INT(_kern_geom, OID_AUTO, transient_map_soft_failures, CTLFLAG_RD,
772248508Skib    &transient_map_soft_failures, 0,
773248508Skib    "Count of retried failures to establish the transient mapping");
774248508Skibint inflight_transient_maps;
775248508SkibSYSCTL_INT(_kern_geom, OID_AUTO, inflight_transient_maps, CTLFLAG_RD,
776248508Skib    &inflight_transient_maps, 0,
777248508Skib    "Current count of the active transient maps");
778248508Skib
779248508Skibstatic int
780248508Skibg_io_transient_map_bio(struct bio *bp)
781248508Skib{
782248508Skib	vm_offset_t addr;
783248508Skib	long size;
784248508Skib	u_int retried;
785248508Skib
786248568Skib	KASSERT(unmapped_buf_allowed, ("unmapped disabled"));
787248568Skib
788248508Skib	size = round_page(bp->bio_ma_offset + bp->bio_length);
789248508Skib	KASSERT(size / PAGE_SIZE == bp->bio_ma_n, ("Bio too short %p", bp));
790248508Skib	addr = 0;
791248508Skib	retried = 0;
792248508Skib	atomic_add_long(&transient_maps, 1);
793248508Skibretry:
794252330Sjeff	if (vmem_alloc(transient_arena, size, M_BESTFIT | M_NOWAIT, &addr)) {
795248508Skib		if (transient_map_retries != 0 &&
796248508Skib		    retried >= transient_map_retries) {
797248508Skib			CTR2(KTR_GEOM, "g_down cannot map bp %p provider %s",
798248508Skib			    bp, bp->bio_to->name);
799248508Skib			atomic_add_int(&transient_map_hard_failures, 1);
800256880Smav			return (EDEADLK/* XXXKIB */);
801248508Skib		} else {
802248508Skib			/*
803248508Skib			 * Naive attempt to quisce the I/O to get more
804248508Skib			 * in-flight requests completed and defragment
805252330Sjeff			 * the transient_arena.
806248508Skib			 */
807248508Skib			CTR3(KTR_GEOM, "g_down retrymap bp %p provider %s r %d",
808248508Skib			    bp, bp->bio_to->name, retried);
809248508Skib			pause("g_d_tra", hz / 10);
810248508Skib			retried++;
811248508Skib			atomic_add_int(&transient_map_soft_failures, 1);
812248508Skib			goto retry;
813248508Skib		}
814248508Skib	}
815248508Skib	atomic_add_int(&inflight_transient_maps, 1);
816248508Skib	pmap_qenter((vm_offset_t)addr, bp->bio_ma, OFF_TO_IDX(size));
817248508Skib	bp->bio_data = (caddr_t)addr + bp->bio_ma_offset;
818248508Skib	bp->bio_flags |= BIO_TRANSIENT_MAPPING;
819248508Skib	bp->bio_flags &= ~BIO_UNMAPPED;
820256880Smav	return (EJUSTRETURN);
821248508Skib}
822248508Skib
82392108Sphkvoid
82492108Sphkg_io_schedule_down(struct thread *tp __unused)
82592108Sphk{
82692108Sphk	struct bio *bp;
827110471Sphk	int error;
82892108Sphk
82992108Sphk	for(;;) {
830110736Sphk		g_bioq_lock(&g_bio_run_down);
83192108Sphk		bp = g_bioq_first(&g_bio_run_down);
832110736Sphk		if (bp == NULL) {
833136755Srwatson			CTR0(KTR_GEOM, "g_down going to sleep");
834110736Sphk			msleep(&g_wait_down, &g_bio_run_down.bio_queue_lock,
835196904Smav			    PRIBIO | PDROP, "-", 0);
836110736Sphk			continue;
837110736Sphk		}
838136755Srwatson		CTR0(KTR_GEOM, "g_down has work to do");
839110736Sphk		g_bioq_unlock(&g_bio_run_down);
840287405Simp		if (pace != 0) {
841287405Simp			/*
842287405Simp			 * There has been at least one memory allocation
843287405Simp			 * failure since the last I/O completed. Pause 1ms to
844287405Simp			 * give the system a chance to free up memory. We only
845287405Simp			 * do this once because a large number of allocations
846287405Simp			 * can fail in the direct dispatch case and there's no
847287405Simp			 * relationship between the number of these failures and
848287405Simp			 * the length of the outage. If there's still an outage,
849287405Simp			 * we'll pause again and again until it's
850287405Simp			 * resolved. Older versions paused longer and once per
851287405Simp			 * allocation failure. This was OK for a single threaded
852287405Simp			 * g_down, but with direct dispatch would lead to max of
853287405Simp			 * 10 IOPs for minutes at a time when transient memory
854287405Simp			 * issues prevented allocation for a batch of requests
855287405Simp			 * from the upper layers.
856287405Simp			 *
857287405Simp			 * XXX This pacing is really lame. It needs to be solved
858287405Simp			 * by other methods. This is OK only because the worst
859287405Simp			 * case scenario is so rare. In the worst case scenario
860287405Simp			 * all memory is tied up waiting for I/O to complete
861287405Simp			 * which can never happen since we can't allocate bios
862287405Simp			 * for that I/O.
863287405Simp			 */
864287405Simp			CTR0(KTR_GEOM, "g_down pacing self");
865287405Simp			pause("g_down", min(hz/1000, 1));
866287405Simp			pace = 0;
867112830Sphk		}
868256880Smav		CTR2(KTR_GEOM, "g_down processing bp %p provider %s", bp,
869256880Smav		    bp->bio_to->name);
870110471Sphk		error = g_io_check(bp);
871256880Smav		if (error >= 0) {
872136755Srwatson			CTR3(KTR_GEOM, "g_down g_io_check on bp %p provider "
873136755Srwatson			    "%s returned %d", bp, bp->bio_to->name, error);
874110471Sphk			g_io_deliver(bp, error);
875110471Sphk			continue;
876110471Sphk		}
877150177Sjhb		THREAD_NO_SLEEPING();
878136755Srwatson		CTR4(KTR_GEOM, "g_down starting bp %p provider %s off %ld "
879136755Srwatson		    "len %ld", bp, bp->bio_to->name, bp->bio_offset,
880136755Srwatson		    bp->bio_length);
88192108Sphk		bp->bio_to->geom->start(bp);
882150177Sjhb		THREAD_SLEEPING_OK();
88392108Sphk	}
88492108Sphk}
88592108Sphk
88692108Sphkvoid
887125137Sphkbio_taskqueue(struct bio *bp, bio_task_t *func, void *arg)
888125137Sphk{
889125137Sphk	bp->bio_task = func;
890125137Sphk	bp->bio_task_arg = arg;
891125137Sphk	/*
892125137Sphk	 * The taskqueue is actually just a second queue off the "up"
893125137Sphk	 * queue, so we use the same lock.
894125137Sphk	 */
895125137Sphk	g_bioq_lock(&g_bio_run_up);
896134519Sphk	KASSERT(!(bp->bio_flags & BIO_ONQUEUE),
897134519Sphk	    ("Bio already on queue bp=%p target taskq", bp));
898134519Sphk	bp->bio_flags |= BIO_ONQUEUE;
899125137Sphk	TAILQ_INSERT_TAIL(&g_bio_run_task.bio_queue, bp, bio_queue);
900125137Sphk	g_bio_run_task.bio_queue_length++;
901125137Sphk	wakeup(&g_wait_up);
902125137Sphk	g_bioq_unlock(&g_bio_run_up);
903125137Sphk}
904125137Sphk
905125137Sphk
906125137Sphkvoid
90792108Sphkg_io_schedule_up(struct thread *tp __unused)
90892108Sphk{
90992108Sphk	struct bio *bp;
91092108Sphk	for(;;) {
911110736Sphk		g_bioq_lock(&g_bio_run_up);
912125137Sphk		bp = g_bioq_first(&g_bio_run_task);
913125137Sphk		if (bp != NULL) {
914125137Sphk			g_bioq_unlock(&g_bio_run_up);
915150177Sjhb			THREAD_NO_SLEEPING();
916136755Srwatson			CTR1(KTR_GEOM, "g_up processing task bp %p", bp);
917125137Sphk			bp->bio_task(bp->bio_task_arg);
918150177Sjhb			THREAD_SLEEPING_OK();
919125137Sphk			continue;
920125137Sphk		}
92192108Sphk		bp = g_bioq_first(&g_bio_run_up);
922110736Sphk		if (bp != NULL) {
923110736Sphk			g_bioq_unlock(&g_bio_run_up);
924150177Sjhb			THREAD_NO_SLEEPING();
925136755Srwatson			CTR4(KTR_GEOM, "g_up biodone bp %p provider %s off "
926183146Ssbruno			    "%jd len %ld", bp, bp->bio_to->name,
927136755Srwatson			    bp->bio_offset, bp->bio_length);
928110736Sphk			biodone(bp);
929150177Sjhb			THREAD_SLEEPING_OK();
930110736Sphk			continue;
931110736Sphk		}
932136755Srwatson		CTR0(KTR_GEOM, "g_up going to sleep");
933110736Sphk		msleep(&g_wait_up, &g_bio_run_up.bio_queue_lock,
934196904Smav		    PRIBIO | PDROP, "-", 0);
93592108Sphk	}
93692108Sphk}
93792108Sphk
93892108Sphkvoid *
93992108Sphkg_read_data(struct g_consumer *cp, off_t offset, off_t length, int *error)
94092108Sphk{
94192108Sphk	struct bio *bp;
94292108Sphk	void *ptr;
94392108Sphk	int errorc;
94492108Sphk
945135873Spjd	KASSERT(length > 0 && length >= cp->provider->sectorsize &&
946135873Spjd	    length <= MAXPHYS, ("g_read_data(): invalid length %jd",
947135873Spjd	    (intmax_t)length));
948120493Sphk
949134379Sphk	bp = g_alloc_bio();
950104665Sphk	bp->bio_cmd = BIO_READ;
951104665Sphk	bp->bio_done = NULL;
952104665Sphk	bp->bio_offset = offset;
953104665Sphk	bp->bio_length = length;
954111119Simp	ptr = g_malloc(length, M_WAITOK);
955104665Sphk	bp->bio_data = ptr;
956104665Sphk	g_io_request(bp, cp);
957104665Sphk	errorc = biowait(bp, "gread");
958104665Sphk	if (error != NULL)
959104665Sphk		*error = errorc;
960104665Sphk	g_destroy_bio(bp);
961104665Sphk	if (errorc) {
962104665Sphk		g_free(ptr);
963104665Sphk		ptr = NULL;
964104665Sphk	}
96592108Sphk	return (ptr);
96692108Sphk}
967104194Sphk
968104194Sphkint
969104194Sphkg_write_data(struct g_consumer *cp, off_t offset, void *ptr, off_t length)
970104194Sphk{
971104194Sphk	struct bio *bp;
972104194Sphk	int error;
973104194Sphk
974135873Spjd	KASSERT(length > 0 && length >= cp->provider->sectorsize &&
975135873Spjd	    length <= MAXPHYS, ("g_write_data(): invalid length %jd",
976135873Spjd	    (intmax_t)length));
977120493Sphk
978134379Sphk	bp = g_alloc_bio();
979104194Sphk	bp->bio_cmd = BIO_WRITE;
980104194Sphk	bp->bio_done = NULL;
981104194Sphk	bp->bio_offset = offset;
982104194Sphk	bp->bio_length = length;
983104194Sphk	bp->bio_data = ptr;
984104194Sphk	g_io_request(bp, cp);
985104194Sphk	error = biowait(bp, "gwrite");
986104194Sphk	g_destroy_bio(bp);
987104194Sphk	return (error);
988104194Sphk}
989125713Spjd
990169283Spjdint
991169283Spjdg_delete_data(struct g_consumer *cp, off_t offset, off_t length)
992169283Spjd{
993169283Spjd	struct bio *bp;
994169283Spjd	int error;
995169283Spjd
996174669Sphk	KASSERT(length > 0 && length >= cp->provider->sectorsize,
997174669Sphk	    ("g_delete_data(): invalid length %jd", (intmax_t)length));
998169283Spjd
999169283Spjd	bp = g_alloc_bio();
1000169283Spjd	bp->bio_cmd = BIO_DELETE;
1001169283Spjd	bp->bio_done = NULL;
1002169283Spjd	bp->bio_offset = offset;
1003169283Spjd	bp->bio_length = length;
1004169283Spjd	bp->bio_data = NULL;
1005169283Spjd	g_io_request(bp, cp);
1006169283Spjd	error = biowait(bp, "gdelete");
1007169283Spjd	g_destroy_bio(bp);
1008169283Spjd	return (error);
1009169283Spjd}
1010169283Spjd
1011125713Spjdvoid
1012125713Spjdg_print_bio(struct bio *bp)
1013125713Spjd{
1014125713Spjd	const char *pname, *cmd = NULL;
1015125713Spjd
1016125713Spjd	if (bp->bio_to != NULL)
1017125713Spjd		pname = bp->bio_to->name;
1018125713Spjd	else
1019125713Spjd		pname = "[unknown]";
1020125713Spjd
1021125713Spjd	switch (bp->bio_cmd) {
1022125713Spjd	case BIO_GETATTR:
1023125713Spjd		cmd = "GETATTR";
1024125713Spjd		printf("%s[%s(attr=%s)]", pname, cmd, bp->bio_attribute);
1025125713Spjd		return;
1026163832Spjd	case BIO_FLUSH:
1027163832Spjd		cmd = "FLUSH";
1028163832Spjd		printf("%s[%s]", pname, cmd);
1029163832Spjd		return;
1030300207Sken	case BIO_ZONE: {
1031300207Sken		char *subcmd = NULL;
1032300207Sken		cmd = "ZONE";
1033300207Sken		switch (bp->bio_zone.zone_cmd) {
1034300207Sken		case DISK_ZONE_OPEN:
1035300207Sken			subcmd = "OPEN";
1036300207Sken			break;
1037300207Sken		case DISK_ZONE_CLOSE:
1038300207Sken			subcmd = "CLOSE";
1039300207Sken			break;
1040300207Sken		case DISK_ZONE_FINISH:
1041300207Sken			subcmd = "FINISH";
1042300207Sken			break;
1043300207Sken		case DISK_ZONE_RWP:
1044300207Sken			subcmd = "RWP";
1045300207Sken			break;
1046300207Sken		case DISK_ZONE_REPORT_ZONES:
1047300207Sken			subcmd = "REPORT ZONES";
1048300207Sken			break;
1049300207Sken		case DISK_ZONE_GET_PARAMS:
1050300207Sken			subcmd = "GET PARAMS";
1051300207Sken			break;
1052300207Sken		default:
1053300207Sken			subcmd = "UNKNOWN";
1054300207Sken			break;
1055300207Sken		}
1056300207Sken		printf("%s[%s,%s]", pname, cmd, subcmd);
1057300207Sken		return;
1058300207Sken	}
1059125713Spjd	case BIO_READ:
1060125713Spjd		cmd = "READ";
1061208992Strasz		break;
1062125713Spjd	case BIO_WRITE:
1063208992Strasz		cmd = "WRITE";
1064208992Strasz		break;
1065125713Spjd	case BIO_DELETE:
1066208992Strasz		cmd = "DELETE";
1067208992Strasz		break;
1068125713Spjd	default:
1069125713Spjd		cmd = "UNKNOWN";
1070125713Spjd		printf("%s[%s()]", pname, cmd);
1071125713Spjd		return;
1072125713Spjd	}
1073208992Strasz	printf("%s[%s(offset=%jd, length=%jd)]", pname, cmd,
1074208992Strasz	    (intmax_t)bp->bio_offset, (intmax_t)bp->bio_length);
1075125713Spjd}
1076