g_bde_work.c revision 110541
1/*-
2 * Copyright (c) 2002 Poul-Henning Kamp
3 * Copyright (c) 2002 Networks Associates Technology, Inc.
4 * All rights reserved.
5 *
6 * This software was developed for the FreeBSD Project by Poul-Henning Kamp
7 * and NAI Labs, the Security Research Division of Network Associates, Inc.
8 * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
9 * DARPA CHATS research program.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * $FreeBSD: head/sys/geom/bde/g_bde_work.c 110541 2003-02-08 13:03:57Z phk $
33 *
34 * This source file contains the state-engine which makes things happen in the
35 * right order.
36 *
37 * Outline:
38 *   1) g_bde_start1()
39 *	Break the struct bio into multiple work packets one per zone.
40 *   2) g_bde_start2()
41 *	Setup the necessary sector buffers and start those read operations
42 *	which we can start at this time and put the item on the work-list.
43 *   3) g_bde_worker()
44 *	Scan the work-list for items which are ready for crypto processing
45 *	and call the matching crypto function in g_bde_crypt.c and schedule
46 *	any writes needed.  Read operations finish here by releasing the
47 *	sector buffers and delivering the original bio request.
48 *   4) g_bde_write_done()
49 *	Release sector buffers and deliver the original bio request.
50 *
51 * Because of the C-scope rules, the functions are almost perfectly in the
52 * opposite order in this source file.
53 *
54 * XXX: A switch to the hardware assisted crypto in src/sys/opencrypto will add
55 * XXX: additional states to this state-engine.  Since no hardware available
56 * XXX: at this time has AES support, implementing this has been postponed
57 * XXX: until such time as it would result in a benefit.
58 */
59
60#include <sys/param.h>
61#include <sys/stdint.h>
62#include <sys/bio.h>
63#include <sys/lock.h>
64#include <sys/mutex.h>
65#include <sys/queue.h>
66#include <sys/malloc.h>
67#include <sys/systm.h>
68#include <sys/kernel.h>
69#include <sys/sysctl.h>
70#include <sys/proc.h>
71#include <sys/kthread.h>
72
73#include <crypto/rijndael/rijndael.h>
74#include <crypto/sha2/sha2.h>
75#include <geom/geom.h>
76#include <geom/bde/g_bde.h>
77
78static void g_bde_delete_sector(struct g_bde_softc *wp, struct g_bde_sector *sp);
79static struct g_bde_sector * g_bde_new_sector(struct g_bde_work *wp, u_int len);
80static void g_bde_release_sector(struct g_bde_work *wp, struct g_bde_sector *sp);
81static struct g_bde_sector *g_bde_get_sector(struct g_bde_work *wp, off_t offset);
82static int g_bde_start_read(struct g_bde_sector *sp);
83static void g_bde_purge_sector(struct g_bde_softc *sc, int fraction);
84
85/*
86 * Work item allocation.
87 *
88 * C++ would call these constructors and destructors.
89 */
90static u_int g_bde_nwork;
91SYSCTL_UINT(_debug, OID_AUTO, gbde_nwork, CTLFLAG_RD, &g_bde_nwork, 0, "");
92
93static struct g_bde_work *
94g_bde_new_work(struct g_bde_softc *sc)
95{
96	struct g_bde_work *wp;
97
98	wp = g_malloc(sizeof *wp, M_NOWAIT | M_ZERO);
99	if (wp == NULL)
100		return (wp);
101	wp->state = SETUP;
102	wp->softc = sc;
103	g_bde_nwork++;
104	sc->nwork++;
105	TAILQ_INSERT_TAIL(&sc->worklist, wp, list);
106	return (wp);
107}
108
109static void
110g_bde_delete_work(struct g_bde_work *wp)
111{
112	struct g_bde_softc *sc;
113
114	sc = wp->softc;
115	g_bde_nwork--;
116	sc->nwork--;
117	TAILQ_REMOVE(&sc->worklist, wp, list);
118	g_free(wp);
119}
120
121/*
122 * Sector buffer allocation
123 *
124 * These two functions allocate and free back variable sized sector buffers
125 */
126
127static u_int g_bde_nsect;
128SYSCTL_UINT(_debug, OID_AUTO, gbde_nsect, CTLFLAG_RD, &g_bde_nsect, 0, "");
129
130static void
131g_bde_delete_sector(struct g_bde_softc *sc, struct g_bde_sector *sp)
132{
133
134	g_bde_nsect--;
135	sc->nsect--;
136	if (sp->malloc)
137		g_free(sp->data);
138	g_free(sp);
139}
140
141static struct g_bde_sector *
142g_bde_new_sector(struct g_bde_work *wp, u_int len)
143{
144	struct g_bde_sector *sp;
145
146	sp = g_malloc(sizeof *sp, M_NOWAIT | M_ZERO);
147	if (sp == NULL)
148		return (sp);
149	if (len > 0) {
150		sp->data = g_malloc(len, M_NOWAIT | M_ZERO);
151		if (sp->data == NULL) {
152			g_free(sp);
153			return (NULL);
154		}
155		sp->malloc = 1;
156	}
157	g_bde_nsect++;
158	wp->softc->nsect++;
159	sp->size = len;
160	sp->softc = wp->softc;
161	sp->ref = 1;
162	sp->owner = wp;
163	sp->offset = wp->so;
164	sp->state = JUNK;
165	return (sp);
166}
167
168/*
169 * Skey sector cache.
170 *
171 * Nothing prevents two separate I/O requests from addressing the same zone
172 * and thereby needing the same skey sector.  We therefore need to sequence
173 * I/O operations to the skey sectors.  A certain amount of caching is also
174 * desirable, although the extent of benefit from this is not at this point
175 * determined.
176 *
177 * XXX: GEOM may be able to grow a generic caching facility at some point
178 * XXX: to support such needs.
179 */
180
181static u_int g_bde_ncache;
182SYSCTL_UINT(_debug, OID_AUTO, gbde_ncache, CTLFLAG_RD, &g_bde_ncache, 0, "");
183
184static void
185g_bde_purge_one_sector(struct g_bde_softc *sc, struct g_bde_sector *sp)
186{
187
188	g_trace(G_T_TOPOLOGY, "g_bde_purge_one_sector(%p, %p)", sc, sp);
189	if (sp->ref != 0)
190		return;
191	TAILQ_REMOVE(&sc->freelist, sp, list);
192	g_bde_ncache--;
193	sc->ncache--;
194	bzero(sp->data, sp->size);
195	g_bde_delete_sector(sc, sp);
196}
197
198static struct g_bde_sector *
199g_bde_get_sector(struct g_bde_work *wp, off_t offset)
200{
201	struct g_bde_sector *sp;
202	struct g_bde_softc *sc;
203
204	g_trace(G_T_TOPOLOGY, "g_bde_get_sector(%p, %jd)", wp, (intmax_t)offset);
205	sc = wp->softc;
206
207	if (malloc_last_fail() < g_bde_ncache)
208		g_bde_purge_sector(sc, -1);
209
210	sp = TAILQ_FIRST(&sc->freelist);
211	if (sp != NULL && sp->ref == 0 && sp->used + 300 < time_uptime)
212		g_bde_purge_one_sector(sc, sp);
213
214	TAILQ_FOREACH(sp, &sc->freelist, list) {
215		if (sp->offset == offset)
216			break;
217	}
218	if (sp != NULL) {
219		sp->ref++;
220		KASSERT(sp->offset == offset, ("wrong offset"));
221		KASSERT(sp->softc == wp->softc, ("wrong softc"));
222		if (sp->ref == 1)
223			sp->owner = wp;
224	} else {
225		if (malloc_last_fail() < g_bde_ncache) {
226			TAILQ_FOREACH(sp, &sc->freelist, list)
227				if (sp->ref == 0)
228					break;
229		}
230		if (sp == NULL && !TAILQ_EMPTY(&sc->freelist))
231			sp = TAILQ_FIRST(&sc->freelist);
232		if (sp != NULL && sp->ref > 0)
233			sp = NULL;
234		if (sp == NULL) {
235			g_bde_ncache++;
236			sc->ncache++;
237			sp = g_bde_new_sector(wp, sc->sectorsize);
238			if (sp != NULL) {
239				TAILQ_INSERT_TAIL(&sc->freelist, sp, list);
240				sp->malloc = 2;
241			}
242		}
243		if (sp != NULL) {
244			sp->offset = offset;
245			sp->softc = wp->softc;
246			sp->ref = 1;
247			sp->owner = wp;
248			sp->state = JUNK;
249			sp->error = 0;
250		}
251	}
252	if (sp != NULL) {
253		TAILQ_REMOVE(&sc->freelist, sp, list);
254		TAILQ_INSERT_TAIL(&sc->freelist, sp, list);
255	}
256	wp->ksp = sp;
257	if (sp == NULL) {
258		g_bde_purge_sector(sc, -1);
259		sp = g_bde_get_sector(wp, offset);
260	}
261	if (sp != NULL)
262		sp->used = time_uptime;
263	KASSERT(sp != NULL, ("get_sector failed"));
264	return(sp);
265}
266
267static void
268g_bde_release_sector(struct g_bde_work *wp, struct g_bde_sector *sp)
269{
270	struct g_bde_softc *sc;
271	struct g_bde_work *wp2;
272
273	g_trace(G_T_TOPOLOGY, "g_bde_release_sector(%p)", sp);
274	KASSERT(sp->malloc == 2, ("Wrong sector released"));
275	sc = sp->softc;
276	KASSERT(sc != NULL, ("NULL sp->softc"));
277	KASSERT(wp == sp->owner, ("Releasing, not owner"));
278	sp->owner = NULL;
279	wp->ksp = NULL;
280	sp->ref--;
281	if (sp->ref > 0) {
282		TAILQ_REMOVE(&sc->freelist, sp, list);
283		TAILQ_INSERT_TAIL(&sc->freelist, sp, list);
284		TAILQ_FOREACH(wp2, &sc->worklist, list) {
285			if (wp2->ksp == sp) {
286				KASSERT(wp2 != wp, ("Self-reowning"));
287				sp->owner = wp2;
288				wakeup(sp->softc);
289				break;
290			}
291		}
292		KASSERT(wp2 != NULL, ("Failed to pick up owner for %p\n", sp));
293	} else if (sp->error != 0) {
294		sp->offset = ~0;
295		sp->error = 0;
296		sp->state = JUNK;
297	}
298	TAILQ_REMOVE(&sc->freelist, sp, list);
299	TAILQ_INSERT_HEAD(&sc->freelist, sp, list);
300}
301
302static void
303g_bde_purge_sector(struct g_bde_softc *sc, int fraction)
304{
305	struct g_bde_sector *sp;
306	int n;
307
308	g_trace(G_T_TOPOLOGY, "g_bde_purge_sector(%p)", sc);
309	if (fraction > 0)
310		n = sc->ncache / fraction + 1;
311	else
312		n = g_bde_ncache - malloc_last_fail();
313	if (n < 0)
314		return;
315	if (n > sc->ncache)
316		n = sc->ncache;
317	while(n--) {
318		TAILQ_FOREACH(sp, &sc->freelist, list) {
319			if (sp->ref != 0)
320				continue;
321			TAILQ_REMOVE(&sc->freelist, sp, list);
322			g_bde_ncache--;
323			sc->ncache--;
324			bzero(sp->data, sp->size);
325			g_bde_delete_sector(sc, sp);
326			break;
327		}
328	}
329}
330
331static struct g_bde_sector *
332g_bde_read_sector(struct g_bde_softc *sc, struct g_bde_work *wp, off_t offset)
333{
334	struct g_bde_sector *sp;
335
336	g_trace(G_T_TOPOLOGY, "g_bde_read_sector(%p)", wp);
337	sp = g_bde_get_sector(wp, offset);
338	if (sp == NULL)
339		return (sp);
340	if (sp->owner != wp)
341		return (sp);
342	if (sp->state == VALID)
343		return (sp);
344	if (g_bde_start_read(sp) == 0)
345		return (sp);
346	g_bde_release_sector(wp, sp);
347	return (NULL);
348}
349
350/*
351 * Contribute to the completion of the original bio request.
352 *
353 * We have no simple way to tell how many bits the original bio request has
354 * been segmented into, so the easiest way to determine when we can deliver
355 * it is to keep track of the number of bytes we have completed.  We keep
356 * track of any errors underway and latch onto the first one.
357 *
358 * We always report "nothing done" in case of error, because random bits here
359 * and there may be completed and returning a number of completed bytes does
360 * not convey any useful information about which bytes they were.  If some
361 * piece of broken code somewhere interprets this to mean that nothing has
362 * changed on the underlying media they deserve the lossage headed for them.
363 *
364 * A single mutex per g_bde instance is used to prevent contention.
365 */
366
367static void
368g_bde_contribute(struct bio *bp, off_t bytes, int error)
369{
370	struct g_bde_softc *sc;
371
372	g_trace(G_T_TOPOLOGY, "g_bde_contribute bp %p bytes %jd error %d",
373	     bp, (intmax_t)bytes, error);
374	sc = bp->bio_driver1;
375	if (bp->bio_error == 0)
376		bp->bio_error = error;
377	bp->bio_completed += bytes;
378	KASSERT(bp->bio_completed <= bp->bio_length, ("Too large contribution"));
379	if (bp->bio_completed == bp->bio_length) {
380		if (bp->bio_error != 0)
381			bp->bio_completed = 0;
382		g_io_deliver(bp, bp->bio_error);
383	}
384}
385
386/*
387 * A write operation has finished.  When we have all expected cows in the
388 * barn close the door and call it a day.
389 */
390
391static void
392g_bde_write_done(struct bio *bp)
393{
394	struct g_bde_sector *sp;
395	struct g_bde_work *wp;
396	struct g_bde_softc *sc;
397
398	sp = bp->bio_caller1;
399	sc = bp->bio_caller2;
400	mtx_lock(&sc->worklist_mutex);
401	KASSERT(sp != NULL, ("NULL sp"));
402	KASSERT(sc != NULL, ("NULL sc"));
403	KASSERT(sp->owner != NULL, ("NULL sp->owner"));
404	g_trace(G_T_TOPOLOGY, "g_bde_write_done(%p)", sp);
405	sp->error = bp->bio_error;
406	g_destroy_bio(bp);
407	wp = sp->owner;
408	if (wp->error == 0)
409		wp->error = sp->error;
410
411	if (wp->bp->bio_cmd == BIO_DELETE) {
412		KASSERT(sp == wp->sp, ("trashed delete op"));
413		g_bde_contribute(wp->bp, wp->length, wp->error);
414		g_bde_delete_sector(sc, sp);
415		g_bde_delete_work(wp);
416		mtx_unlock(&sc->worklist_mutex);
417		return;
418	}
419
420	KASSERT(wp->bp->bio_cmd == BIO_WRITE, ("Confused in g_bde_write_done()"));
421	KASSERT(sp == wp->sp || sp == wp->ksp, ("trashed write op"));
422	if (wp->sp == sp) {
423		g_bde_delete_sector(sc, wp->sp);
424		wp->sp = NULL;
425	} else {
426		sp->state = VALID;
427	}
428	if (wp->sp == NULL && wp->ksp != NULL && wp->ksp->state == VALID) {
429		g_bde_contribute(wp->bp, wp->length, wp->error);
430		g_bde_release_sector(wp, wp->ksp);
431		g_bde_delete_work(wp);
432	}
433	mtx_unlock(&sc->worklist_mutex);
434	return;
435}
436
437/*
438 * Send a write request for the given sector down the pipeline.
439 */
440
441static int
442g_bde_start_write(struct g_bde_sector *sp)
443{
444	struct bio *bp;
445	struct g_bde_softc *sc;
446
447	g_trace(G_T_TOPOLOGY, "g_bde_start_write(%p)", sp);
448	sc = sp->softc;
449	KASSERT(sc != NULL, ("NULL sc in g_bde_start_write"));
450	KASSERT(sp->owner != NULL, ("NULL sp->owner in g_bde_start_write"));
451	bp = g_new_bio();
452	if (bp == NULL)
453		return (ENOMEM);
454	bp->bio_cmd = BIO_WRITE;
455	bp->bio_offset = sp->offset;
456	bp->bio_data = sp->data;
457	bp->bio_length = sp->size;
458	bp->bio_done = g_bde_write_done;
459	bp->bio_caller1 = sp;
460	bp->bio_caller2 = sc;
461	sp->state = IO;
462	g_io_request(bp, sc->consumer);
463	return(0);
464}
465
466/*
467 * A read operation has finished.  Mark the sector no longer iobusy and
468 * wake up the worker thread and let it do its thing.
469 */
470
471static void
472g_bde_read_done(struct bio *bp)
473{
474	struct g_bde_sector *sp;
475	struct g_bde_softc *sc;
476
477	sp = bp->bio_caller1;
478	g_trace(G_T_TOPOLOGY, "g_bde_read_done(%p)", sp);
479	sc = bp->bio_caller2;
480	mtx_lock(&sc->worklist_mutex);
481	sp->error = bp->bio_error;
482	sp->state = VALID;
483	wakeup(sc);
484	g_destroy_bio(bp);
485	mtx_unlock(&sc->worklist_mutex);
486}
487
488/*
489 * Send a read request for the given sector down the pipeline.
490 */
491
492static int
493g_bde_start_read(struct g_bde_sector *sp)
494{
495	struct bio *bp;
496	struct g_bde_softc *sc;
497
498	g_trace(G_T_TOPOLOGY, "g_bde_start_read(%p)", sp);
499	sc = sp->softc;
500	KASSERT(sc != NULL, ("Null softc in sp %p", sp));
501	bp = g_new_bio();
502	if (bp == NULL)
503		return (ENOMEM);
504	bp->bio_cmd = BIO_READ;
505	bp->bio_offset = sp->offset;
506	bp->bio_data = sp->data;
507	bp->bio_length = sp->size;
508	bp->bio_done = g_bde_read_done;
509	bp->bio_caller1 = sp;
510	bp->bio_caller2 = sc;
511	sp->state = IO;
512	g_io_request(bp, sc->consumer);
513	return(0);
514}
515
516/*
517 * The worker thread.
518 *
519 * The up/down path of GEOM is not allowed to sleep or do any major work
520 * so we use this thread to do the actual crypto operations and to push
521 * the state engine onwards.
522 *
523 * XXX: if we switch to the src/sys/opencrypt hardware assisted encryption
524 * XXX: using a thread here is probably not needed.
525 */
526
527void
528g_bde_worker(void *arg)
529{
530	struct g_bde_softc *sc;
531	struct g_bde_work *wp;
532	struct g_geom *gp;
533	int busy, error;
534
535	gp = arg;
536	sc = gp->softc;
537
538	mtx_lock(&sc->worklist_mutex);
539	for (;;) {
540		busy = 0;
541		g_trace(G_T_TOPOLOGY, "g_bde_worker scan");
542		TAILQ_FOREACH(wp, &sc->worklist, list) {
543			KASSERT(wp != NULL, ("NULL wp"));
544			KASSERT(wp->softc != NULL, ("NULL wp->softc"));
545			if (wp->state != WAIT)
546				continue;		/* Not interesting here */
547
548			KASSERT(wp->bp != NULL, ("NULL wp->bp"));
549			KASSERT(wp->sp != NULL, ("NULL wp->sp"));
550
551			if (wp->ksp != NULL) {
552				if (wp->ksp->owner != wp)
553					continue;
554				if (wp->ksp->state == IO)
555					continue;
556				KASSERT(wp->ksp->state == VALID,
557				    ("Illegal sector state (JUNK ?)"));
558			}
559
560			if (wp->bp->bio_cmd == BIO_READ && wp->sp->state != VALID)
561				continue;
562
563			if (wp->ksp != NULL && wp->ksp->error != 0) {
564				g_bde_contribute(wp->bp, wp->length,
565				    wp->ksp->error);
566				g_bde_delete_sector(sc, wp->sp);
567				g_bde_release_sector(wp, wp->ksp);
568				g_bde_delete_work(wp);
569				busy++;
570				break;
571			}
572			switch(wp->bp->bio_cmd) {
573			case BIO_READ:
574				if (wp->ksp != NULL && wp->sp->error == 0) {
575					mtx_unlock(&sc->worklist_mutex);
576					g_bde_crypt_read(wp);
577					mtx_lock(&sc->worklist_mutex);
578				}
579				g_bde_contribute(wp->bp, wp->length,
580					    wp->sp->error);
581				g_bde_delete_sector(sc, wp->sp);
582				if (wp->ksp != NULL)
583					g_bde_release_sector(wp, wp->ksp);
584				g_bde_delete_work(wp);
585				break;
586			case BIO_WRITE:
587				wp->state = FINISH;
588				KASSERT(wp->sp->owner == wp, ("Write not owner sp"));
589				KASSERT(wp->ksp->owner == wp, ("Write not owner ksp"));
590				mtx_unlock(&sc->worklist_mutex);
591				g_bde_crypt_write(wp);
592				mtx_lock(&sc->worklist_mutex);
593				g_bde_start_write(wp->sp);
594				g_bde_start_write(wp->ksp);
595				break;
596			case BIO_DELETE:
597				wp->state = FINISH;
598				mtx_unlock(&sc->worklist_mutex);
599				g_bde_crypt_delete(wp);
600				mtx_lock(&sc->worklist_mutex);
601				g_bde_start_write(wp->sp);
602				break;
603			}
604			busy++;
605			break;
606		}
607		if (!busy) {
608			/*
609			 * We don't look for our death-warrant until we are
610			 * idle.  Shouldn't make a difference in practice.
611			 */
612			if (sc->dead)
613				break;
614			g_trace(G_T_TOPOLOGY, "g_bde_worker sleep");
615			error = msleep(sc, &sc->worklist_mutex,
616			    PRIBIO, "g_bde", hz);
617			if (error == EWOULDBLOCK) {
618				/*
619				 * Loose our skey cache in an orderly fashion.
620				 * The exact rate can be tuned to be less
621				 * aggressive if this is desirable.  10% per
622				 * second means that the cache is gone in a
623				 * few minutes.
624				 */
625				g_bde_purge_sector(sc, 10);
626			}
627		}
628	}
629	g_trace(G_T_TOPOLOGY, "g_bde_worker die");
630	g_bde_purge_sector(sc, 1);
631	KASSERT(sc->nwork == 0, ("Dead but %d work remaining", sc->nwork));
632	KASSERT(sc->ncache == 0, ("Dead but %d cache remaining", sc->ncache));
633	KASSERT(sc->nsect == 0, ("Dead but %d sect remaining", sc->nsect));
634	mtx_unlock(&sc->worklist_mutex);
635	sc->dead = 2;
636	wakeup(sc);
637	mtx_lock(&Giant);
638	kthread_exit(0);
639}
640
641/*
642 * g_bde_start1 has chopped the incoming request up so all the requests
643 * we see here are inside a single zone.  Map the data and key locations
644 * grab the buffers we need and fire off the first volley of read requests.
645 */
646
647static void
648g_bde_start2(struct g_bde_work *wp)
649{
650	struct g_bde_softc *sc;
651
652	KASSERT(wp != NULL, ("NULL wp in g_bde_start2"));
653	KASSERT(wp->softc != NULL, ("NULL wp->softc"));
654	g_trace(G_T_TOPOLOGY, "g_bde_start2(%p)", wp);
655	sc = wp->softc;
656	if (wp->bp->bio_cmd == BIO_READ) {
657		wp->sp = g_bde_new_sector(wp, 0);
658		if (wp->sp == NULL) {
659			g_bde_contribute(wp->bp, wp->length, ENOMEM);
660			g_bde_delete_work(wp);
661			return;
662		}
663		wp->sp->size = wp->length;
664		wp->sp->data = wp->data;
665		if (g_bde_start_read(wp->sp) != 0) {
666			g_bde_contribute(wp->bp, wp->length, ENOMEM);
667			g_bde_delete_sector(sc, wp->sp);
668			g_bde_delete_work(wp);
669			return;
670		}
671		g_bde_read_sector(sc, wp, wp->kso);
672		if (wp->ksp == NULL)
673			wp->error = ENOMEM;
674	} else if (wp->bp->bio_cmd == BIO_DELETE) {
675		wp->sp = g_bde_new_sector(wp, wp->length);
676		if (wp->sp == NULL) {
677			g_bde_contribute(wp->bp, wp->length, ENOMEM);
678			g_bde_delete_work(wp);
679			return;
680		}
681	} else if (wp->bp->bio_cmd == BIO_WRITE) {
682		wp->sp = g_bde_new_sector(wp, wp->length);
683		if (wp->sp == NULL) {
684			g_bde_contribute(wp->bp, wp->length, ENOMEM);
685			g_bde_delete_work(wp);
686			return;
687		}
688		g_bde_read_sector(sc, wp, wp->kso);
689		if (wp->ksp == NULL) {
690			g_bde_contribute(wp->bp, wp->length, ENOMEM);
691			g_bde_delete_sector(sc, wp->sp);
692			g_bde_delete_work(wp);
693			return;
694		}
695	} else {
696		KASSERT(0 == 1,
697		    ("Wrong bio_cmd %d in g_bde_start2", wp->bp->bio_cmd));
698	}
699
700	wp->state = WAIT;
701	wakeup(sc);
702}
703
704/*
705 * Create a sequence of work structures, and have g_bde_map_sector() determine
706 * how long they each can be.  Feed them to g_bde_start2().
707 */
708
709void
710g_bde_start1(struct bio *bp)
711{
712	struct g_bde_softc *sc;
713	struct g_bde_work *wp;
714	off_t done;
715
716	sc = bp->bio_to->geom->softc;
717	bp->bio_driver1 = sc;
718
719	mtx_lock(&sc->worklist_mutex);
720	for(done = 0; done < bp->bio_length; ) {
721		wp = g_bde_new_work(sc);
722		if (wp == NULL) {
723			g_io_deliver(bp, ENOMEM);
724			mtx_unlock(&sc->worklist_mutex);
725			return;
726		}
727		wp->bp = bp;
728		wp->offset = bp->bio_offset + done;
729		wp->data = bp->bio_data + done;
730		wp->length = bp->bio_length - done;
731		g_bde_map_sector(wp);
732		done += wp->length;
733		g_bde_start2(wp);
734	}
735	mtx_unlock(&sc->worklist_mutex);
736	return;
737}
738