g_bde_work.c revision 114153
1/*-
2 * Copyright (c) 2002 Poul-Henning Kamp
3 * Copyright (c) 2002 Networks Associates Technology, Inc.
4 * All rights reserved.
5 *
6 * This software was developed for the FreeBSD Project by Poul-Henning Kamp
7 * and NAI Labs, the Security Research Division of Network Associates, Inc.
8 * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
9 * DARPA CHATS research program.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * $FreeBSD: head/sys/geom/bde/g_bde_work.c 114153 2003-04-28 06:38:31Z phk $
33 *
34 * This source file contains the state-engine which makes things happen in the
35 * right order.
36 *
37 * Outline:
38 *   1) g_bde_start1()
39 *	Break the struct bio into multiple work packets one per zone.
40 *   2) g_bde_start2()
41 *	Setup the necessary sector buffers and start those read operations
42 *	which we can start at this time and put the item on the work-list.
43 *   3) g_bde_worker()
44 *	Scan the work-list for items which are ready for crypto processing
45 *	and call the matching crypto function in g_bde_crypt.c and schedule
46 *	any writes needed.  Read operations finish here by releasing the
47 *	sector buffers and delivering the original bio request.
48 *   4) g_bde_write_done()
49 *	Release sector buffers and deliver the original bio request.
50 *
51 * Because of the C-scope rules, the functions are almost perfectly in the
52 * opposite order in this source file.
53 *
54 * XXX: A switch to the hardware assisted crypto in src/sys/opencrypto will add
55 * XXX: additional states to this state-engine.  Since no hardware available
56 * XXX: at this time has AES support, implementing this has been postponed
57 * XXX: until such time as it would result in a benefit.
58 */
59
60#include <sys/param.h>
61#include <sys/bio.h>
62#include <sys/lock.h>
63#include <sys/mutex.h>
64#include <sys/queue.h>
65#include <sys/malloc.h>
66#include <sys/systm.h>
67#include <sys/kernel.h>
68#include <sys/sysctl.h>
69#include <sys/proc.h>
70#include <sys/kthread.h>
71
72#include <crypto/rijndael/rijndael.h>
73#include <crypto/sha2/sha2.h>
74#include <geom/geom.h>
75#include <geom/bde/g_bde.h>
76
77static void g_bde_delete_sector(struct g_bde_softc *wp, struct g_bde_sector *sp);
78static struct g_bde_sector * g_bde_new_sector(struct g_bde_work *wp, u_int len);
79static void g_bde_release_keysector(struct g_bde_work *wp);
80static struct g_bde_sector *g_bde_get_keysector(struct g_bde_work *wp);
81static int g_bde_start_read(struct g_bde_sector *sp);
82static void g_bde_purge_sector(struct g_bde_softc *sc, int fraction);
83
84/*
85 * Work item allocation.
86 *
87 * C++ would call these constructors and destructors.
88 */
89static u_int g_bde_nwork;
90SYSCTL_UINT(_debug, OID_AUTO, gbde_nwork, CTLFLAG_RD, &g_bde_nwork, 0, "");
91
92static struct g_bde_work *
93g_bde_new_work(struct g_bde_softc *sc)
94{
95	struct g_bde_work *wp;
96
97	wp = g_malloc(sizeof *wp, M_NOWAIT | M_ZERO);
98	if (wp == NULL)
99		return (wp);
100	wp->state = SETUP;
101	wp->softc = sc;
102	g_bde_nwork++;
103	sc->nwork++;
104	TAILQ_INSERT_TAIL(&sc->worklist, wp, list);
105	return (wp);
106}
107
108static void
109g_bde_delete_work(struct g_bde_work *wp)
110{
111	struct g_bde_softc *sc;
112
113	sc = wp->softc;
114	g_bde_nwork--;
115	sc->nwork--;
116	TAILQ_REMOVE(&sc->worklist, wp, list);
117	g_free(wp);
118}
119
120/*
121 * Sector buffer allocation
122 *
123 * These two functions allocate and free back variable sized sector buffers
124 */
125
126static u_int g_bde_nsect;
127SYSCTL_UINT(_debug, OID_AUTO, gbde_nsect, CTLFLAG_RD, &g_bde_nsect, 0, "");
128
129static void
130g_bde_delete_sector(struct g_bde_softc *sc, struct g_bde_sector *sp)
131{
132
133	g_bde_nsect--;
134	sc->nsect--;
135	if (sp->malloc)
136		g_free(sp->data);
137	g_free(sp);
138}
139
140static struct g_bde_sector *
141g_bde_new_sector(struct g_bde_work *wp, u_int len)
142{
143	struct g_bde_sector *sp;
144
145	sp = g_malloc(sizeof *sp, M_NOWAIT | M_ZERO);
146	if (sp == NULL)
147		return (sp);
148	if (len > 0) {
149		sp->data = g_malloc(len, M_NOWAIT | M_ZERO);
150		if (sp->data == NULL) {
151			g_free(sp);
152			return (NULL);
153		}
154		sp->malloc = 1;
155	}
156	g_bde_nsect++;
157	wp->softc->nsect++;
158	sp->size = len;
159	sp->softc = wp->softc;
160	sp->ref = 1;
161	sp->owner = wp;
162	sp->offset = wp->so;
163	sp->state = JUNK;
164	return (sp);
165}
166
167/*
168 * Skey sector cache.
169 *
170 * Nothing prevents two separate I/O requests from addressing the same zone
171 * and thereby needing the same skey sector.  We therefore need to sequence
172 * I/O operations to the skey sectors.  A certain amount of caching is also
173 * desirable, although the extent of benefit from this is not at this point
174 * determined.
175 *
176 * XXX: GEOM may be able to grow a generic caching facility at some point
177 * XXX: to support such needs.
178 */
179
180static u_int g_bde_ncache;
181SYSCTL_UINT(_debug, OID_AUTO, gbde_ncache, CTLFLAG_RD, &g_bde_ncache, 0, "");
182
183static void
184g_bde_purge_one_sector(struct g_bde_softc *sc, struct g_bde_sector *sp)
185{
186
187	g_trace(G_T_TOPOLOGY, "g_bde_purge_one_sector(%p, %p)", sc, sp);
188	if (sp->ref != 0)
189		return;
190	TAILQ_REMOVE(&sc->freelist, sp, list);
191	g_bde_ncache--;
192	sc->ncache--;
193	bzero(sp->data, sp->size);
194	g_bde_delete_sector(sc, sp);
195}
196
197static struct g_bde_sector *
198g_bde_get_keysector(struct g_bde_work *wp)
199{
200	struct g_bde_sector *sp;
201	struct g_bde_softc *sc;
202	off_t offset;
203
204	offset = wp->kso;
205	g_trace(G_T_TOPOLOGY, "g_bde_get_keysector(%p, %jd)", wp, (intmax_t)offset);
206	sc = wp->softc;
207
208	if (malloc_last_fail() < g_bde_ncache)
209		g_bde_purge_sector(sc, -1);
210
211	sp = TAILQ_FIRST(&sc->freelist);
212	if (sp != NULL && sp->ref == 0 && sp->used + 300 < time_uptime)
213		g_bde_purge_one_sector(sc, sp);
214
215	TAILQ_FOREACH(sp, &sc->freelist, list) {
216		if (sp->offset == offset)
217			break;
218	}
219	if (sp != NULL) {
220		sp->ref++;
221		KASSERT(sp->offset == offset, ("wrong offset"));
222		KASSERT(sp->softc == wp->softc, ("wrong softc"));
223		if (sp->ref == 1)
224			sp->owner = wp;
225	} else {
226		if (malloc_last_fail() < g_bde_ncache) {
227			TAILQ_FOREACH(sp, &sc->freelist, list)
228				if (sp->ref == 0)
229					break;
230		}
231		if (sp == NULL && !TAILQ_EMPTY(&sc->freelist))
232			sp = TAILQ_FIRST(&sc->freelist);
233		if (sp != NULL && sp->ref > 0)
234			sp = NULL;
235		if (sp == NULL) {
236			sp = g_bde_new_sector(wp, sc->sectorsize);
237			if (sp != NULL) {
238				g_bde_ncache++;
239				sc->ncache++;
240				TAILQ_INSERT_TAIL(&sc->freelist, sp, list);
241				sp->malloc = 2;
242			}
243		}
244		if (sp != NULL) {
245			sp->offset = offset;
246			sp->softc = wp->softc;
247			sp->ref = 1;
248			sp->owner = wp;
249			sp->state = JUNK;
250			sp->error = 0;
251		}
252	}
253	if (sp != NULL) {
254		TAILQ_REMOVE(&sc->freelist, sp, list);
255		TAILQ_INSERT_TAIL(&sc->freelist, sp, list);
256		sp->used = time_uptime;
257	}
258	wp->ksp = sp;
259	return(sp);
260}
261
262static void
263g_bde_release_keysector(struct g_bde_work *wp)
264{
265	struct g_bde_softc *sc;
266	struct g_bde_work *wp2;
267	struct g_bde_sector *sp;
268
269	sp = wp->ksp;
270	g_trace(G_T_TOPOLOGY, "g_bde_release_keysector(%p)", sp);
271	KASSERT(sp->malloc == 2, ("Wrong sector released"));
272	sc = sp->softc;
273	KASSERT(sc != NULL, ("NULL sp->softc"));
274	KASSERT(wp == sp->owner, ("Releasing, not owner"));
275	sp->owner = NULL;
276	wp->ksp = NULL;
277	sp->ref--;
278	if (sp->ref > 0) {
279		TAILQ_REMOVE(&sc->freelist, sp, list);
280		TAILQ_INSERT_TAIL(&sc->freelist, sp, list);
281		TAILQ_FOREACH(wp2, &sc->worklist, list) {
282			if (wp2->ksp == sp) {
283				KASSERT(wp2 != wp, ("Self-reowning"));
284				sp->owner = wp2;
285				wakeup(sp->softc);
286				break;
287			}
288		}
289		KASSERT(wp2 != NULL, ("Failed to pick up owner for %p\n", sp));
290	} else if (sp->error != 0) {
291		sp->offset = ~0;
292		sp->error = 0;
293		sp->state = JUNK;
294	}
295	TAILQ_REMOVE(&sc->freelist, sp, list);
296	TAILQ_INSERT_HEAD(&sc->freelist, sp, list);
297}
298
299static void
300g_bde_purge_sector(struct g_bde_softc *sc, int fraction)
301{
302	struct g_bde_sector *sp;
303	int n;
304
305	g_trace(G_T_TOPOLOGY, "g_bde_purge_sector(%p)", sc);
306	if (fraction > 0)
307		n = sc->ncache / fraction + 1;
308	else
309		n = g_bde_ncache - malloc_last_fail();
310	if (n < 0)
311		return;
312	if (n > sc->ncache)
313		n = sc->ncache;
314	while(n--) {
315		TAILQ_FOREACH(sp, &sc->freelist, list) {
316			if (sp->ref != 0)
317				continue;
318			TAILQ_REMOVE(&sc->freelist, sp, list);
319			g_bde_ncache--;
320			sc->ncache--;
321			bzero(sp->data, sp->size);
322			g_bde_delete_sector(sc, sp);
323			break;
324		}
325	}
326}
327
328static struct g_bde_sector *
329g_bde_read_keysector(struct g_bde_softc *sc, struct g_bde_work *wp)
330{
331	struct g_bde_sector *sp;
332
333	g_trace(G_T_TOPOLOGY, "g_bde_read_keysector(%p)", wp);
334	sp = g_bde_get_keysector(wp);
335	if (sp == NULL) {
336		g_bde_purge_sector(sc, -1);
337		sp = g_bde_get_keysector(wp);
338	}
339	if (sp == NULL)
340		return (sp);
341	if (sp->owner != wp)
342		return (sp);
343	if (sp->state == VALID)
344		return (sp);
345	if (g_bde_start_read(sp) == 0)
346		return (sp);
347	g_bde_release_keysector(wp);
348	return (NULL);
349}
350
351/*
352 * Contribute to the completion of the original bio request.
353 *
354 * We have no simple way to tell how many bits the original bio request has
355 * been segmented into, so the easiest way to determine when we can deliver
356 * it is to keep track of the number of bytes we have completed.  We keep
357 * track of any errors underway and latch onto the first one.
358 *
359 * We always report "nothing done" in case of error, because random bits here
360 * and there may be completed and returning a number of completed bytes does
361 * not convey any useful information about which bytes they were.  If some
362 * piece of broken code somewhere interprets this to mean that nothing has
363 * changed on the underlying media they deserve the lossage headed for them.
364 *
365 * A single mutex per g_bde instance is used to prevent contention.
366 */
367
368static void
369g_bde_contribute(struct bio *bp, off_t bytes, int error)
370{
371	struct g_bde_softc *sc;
372
373	g_trace(G_T_TOPOLOGY, "g_bde_contribute bp %p bytes %jd error %d",
374	     bp, (intmax_t)bytes, error);
375	sc = bp->bio_driver1;
376	if (bp->bio_error == 0)
377		bp->bio_error = error;
378	bp->bio_completed += bytes;
379	KASSERT(bp->bio_completed <= bp->bio_length, ("Too large contribution"));
380	if (bp->bio_completed == bp->bio_length) {
381		if (bp->bio_error != 0)
382			bp->bio_completed = 0;
383		g_io_deliver(bp, bp->bio_error);
384	}
385}
386
387/*
388 * A write operation has finished.  When we have all expected cows in the
389 * barn close the door and call it a day.
390 */
391
392static void
393g_bde_write_done(struct bio *bp)
394{
395	struct g_bde_sector *sp;
396	struct g_bde_work *wp;
397	struct g_bde_softc *sc;
398
399	sp = bp->bio_caller1;
400	sc = bp->bio_caller2;
401	mtx_lock(&sc->worklist_mutex);
402	KASSERT(sp != NULL, ("NULL sp"));
403	KASSERT(sc != NULL, ("NULL sc"));
404	KASSERT(sp->owner != NULL, ("NULL sp->owner"));
405	g_trace(G_T_TOPOLOGY, "g_bde_write_done(%p)", sp);
406	sp->error = bp->bio_error;
407	g_destroy_bio(bp);
408	wp = sp->owner;
409	if (wp->error == 0)
410		wp->error = sp->error;
411
412	if (wp->bp->bio_cmd == BIO_DELETE) {
413		KASSERT(sp == wp->sp, ("trashed delete op"));
414		g_bde_contribute(wp->bp, wp->length, wp->error);
415		g_bde_delete_sector(sc, sp);
416		g_bde_delete_work(wp);
417		mtx_unlock(&sc->worklist_mutex);
418		return;
419	}
420
421	KASSERT(wp->bp->bio_cmd == BIO_WRITE, ("Confused in g_bde_write_done()"));
422	KASSERT(sp == wp->sp || sp == wp->ksp, ("trashed write op"));
423	if (wp->sp == sp) {
424		g_bde_delete_sector(sc, wp->sp);
425		wp->sp = NULL;
426	} else {
427		sp->state = VALID;
428	}
429	if (wp->sp == NULL && wp->ksp != NULL && wp->ksp->state == VALID) {
430		g_bde_contribute(wp->bp, wp->length, wp->error);
431		g_bde_release_keysector(wp);
432		g_bde_delete_work(wp);
433	}
434	mtx_unlock(&sc->worklist_mutex);
435	return;
436}
437
438/*
439 * Send a write request for the given sector down the pipeline.
440 */
441
442static int
443g_bde_start_write(struct g_bde_sector *sp)
444{
445	struct bio *bp;
446	struct g_bde_softc *sc;
447
448	g_trace(G_T_TOPOLOGY, "g_bde_start_write(%p)", sp);
449	sc = sp->softc;
450	KASSERT(sc != NULL, ("NULL sc in g_bde_start_write"));
451	KASSERT(sp->owner != NULL, ("NULL sp->owner in g_bde_start_write"));
452	bp = g_new_bio();
453	if (bp == NULL)
454		return (ENOMEM);
455	bp->bio_cmd = BIO_WRITE;
456	bp->bio_offset = sp->offset;
457	bp->bio_data = sp->data;
458	bp->bio_length = sp->size;
459	bp->bio_done = g_bde_write_done;
460	bp->bio_caller1 = sp;
461	bp->bio_caller2 = sc;
462	sp->state = IO;
463	g_io_request(bp, sc->consumer);
464	return(0);
465}
466
467/*
468 * A read operation has finished.  Mark the sector no longer iobusy and
469 * wake up the worker thread and let it do its thing.
470 */
471
472static void
473g_bde_read_done(struct bio *bp)
474{
475	struct g_bde_sector *sp;
476	struct g_bde_softc *sc;
477
478	sp = bp->bio_caller1;
479	g_trace(G_T_TOPOLOGY, "g_bde_read_done(%p)", sp);
480	sc = bp->bio_caller2;
481	mtx_lock(&sc->worklist_mutex);
482	sp->error = bp->bio_error;
483	if (sp->error == 0)
484		sp->state = VALID;
485	else
486		sp->state = JUNK;
487	wakeup(sc);
488	g_destroy_bio(bp);
489	mtx_unlock(&sc->worklist_mutex);
490}
491
492/*
493 * Send a read request for the given sector down the pipeline.
494 */
495
496static int
497g_bde_start_read(struct g_bde_sector *sp)
498{
499	struct bio *bp;
500	struct g_bde_softc *sc;
501
502	g_trace(G_T_TOPOLOGY, "g_bde_start_read(%p)", sp);
503	sc = sp->softc;
504	KASSERT(sc != NULL, ("Null softc in sp %p", sp));
505	bp = g_new_bio();
506	if (bp == NULL)
507		return (ENOMEM);
508	bp->bio_cmd = BIO_READ;
509	bp->bio_offset = sp->offset;
510	bp->bio_data = sp->data;
511	bp->bio_length = sp->size;
512	bp->bio_done = g_bde_read_done;
513	bp->bio_caller1 = sp;
514	bp->bio_caller2 = sc;
515	sp->state = IO;
516	g_io_request(bp, sc->consumer);
517	return(0);
518}
519
520/*
521 * The worker thread.
522 *
523 * The up/down path of GEOM is not allowed to sleep or do any major work
524 * so we use this thread to do the actual crypto operations and to push
525 * the state engine onwards.
526 *
527 * XXX: if we switch to the src/sys/opencrypt hardware assisted encryption
528 * XXX: using a thread here is probably not needed.
529 */
530
531void
532g_bde_worker(void *arg)
533{
534	struct g_bde_softc *sc;
535	struct g_bde_work *wp;
536	struct g_geom *gp;
537	int busy, error;
538
539	gp = arg;
540	sc = gp->softc;
541
542	mtx_lock(&sc->worklist_mutex);
543	for (;;) {
544		busy = 0;
545		g_trace(G_T_TOPOLOGY, "g_bde_worker scan");
546		TAILQ_FOREACH(wp, &sc->worklist, list) {
547			KASSERT(wp != NULL, ("NULL wp"));
548			KASSERT(wp->softc != NULL, ("NULL wp->softc"));
549			if (wp->state != WAIT)
550				continue;		/* Not interesting here */
551
552			KASSERT(wp->bp != NULL, ("NULL wp->bp"));
553			KASSERT(wp->sp != NULL, ("NULL wp->sp"));
554
555			if (wp->ksp != NULL) {
556				if (wp->ksp->owner != wp)
557					continue;
558				if (wp->ksp->state == IO)
559					continue;
560				KASSERT(wp->ksp->state == VALID,
561				    ("Illegal sector state (JUNK ?)"));
562			}
563
564			if (wp->bp->bio_cmd == BIO_READ &&
565			     wp->sp->state == IO)
566				continue;
567
568			if (wp->ksp != NULL && wp->ksp->error != 0) {
569				g_bde_contribute(wp->bp, wp->length,
570				    wp->ksp->error);
571				g_bde_delete_sector(sc, wp->sp);
572				g_bde_release_keysector(wp);
573				g_bde_delete_work(wp);
574				busy++;
575				break;
576			}
577			switch(wp->bp->bio_cmd) {
578			case BIO_READ:
579				if (wp->ksp == NULL) {
580					KASSERT(wp->error != 0,
581					    ("BIO_READ, no ksp and no error"));
582					g_bde_contribute(wp->bp, wp->length,
583						    wp->error);
584				} else {
585					if (wp->sp->error == 0) {
586						mtx_unlock(&sc->worklist_mutex);
587						g_bde_crypt_read(wp);
588						mtx_lock(&sc->worklist_mutex);
589					}
590					g_bde_contribute(wp->bp, wp->length,
591						    wp->sp->error);
592				}
593				g_bde_delete_sector(sc, wp->sp);
594				if (wp->ksp != NULL)
595					g_bde_release_keysector(wp);
596				g_bde_delete_work(wp);
597				break;
598			case BIO_WRITE:
599				wp->state = FINISH;
600				KASSERT(wp->sp->owner == wp, ("Write not owner sp"));
601				KASSERT(wp->ksp->owner == wp, ("Write not owner ksp"));
602				mtx_unlock(&sc->worklist_mutex);
603				g_bde_crypt_write(wp);
604				mtx_lock(&sc->worklist_mutex);
605				error = g_bde_start_write(wp->sp);
606				if (error) {
607					g_bde_contribute(wp->bp, wp->length, error);
608					g_bde_release_keysector(wp);
609					g_bde_delete_sector(sc, wp->sp);
610					g_bde_delete_work(wp);
611					break;
612				}
613				error = g_bde_start_write(wp->ksp);
614				if (wp->error == 0)
615					wp->error = error;
616				break;
617			case BIO_DELETE:
618				wp->state = FINISH;
619				mtx_unlock(&sc->worklist_mutex);
620				g_bde_crypt_delete(wp);
621				mtx_lock(&sc->worklist_mutex);
622				g_bde_start_write(wp->sp);
623				break;
624			}
625			busy++;
626			break;
627		}
628		if (!busy) {
629			/*
630			 * We don't look for our death-warrant until we are
631			 * idle.  Shouldn't make a difference in practice.
632			 */
633			if (sc->dead)
634				break;
635			g_trace(G_T_TOPOLOGY, "g_bde_worker sleep");
636			error = msleep(sc, &sc->worklist_mutex,
637			    PRIBIO, "g_bde", hz);
638			if (error == EWOULDBLOCK) {
639				/*
640				 * Loose our skey cache in an orderly fashion.
641				 * The exact rate can be tuned to be less
642				 * aggressive if this is desirable.  10% per
643				 * second means that the cache is gone in a
644				 * few minutes.
645				 */
646				g_bde_purge_sector(sc, 10);
647			}
648		}
649	}
650	g_trace(G_T_TOPOLOGY, "g_bde_worker die");
651	g_bde_purge_sector(sc, 1);
652	KASSERT(sc->nwork == 0, ("Dead but %d work remaining", sc->nwork));
653	KASSERT(sc->ncache == 0, ("Dead but %d cache remaining", sc->ncache));
654	KASSERT(sc->nsect == 0, ("Dead but %d sect remaining", sc->nsect));
655	mtx_unlock(&sc->worklist_mutex);
656	sc->dead = 2;
657	wakeup(sc);
658	mtx_lock(&Giant);
659	kthread_exit(0);
660}
661
662/*
663 * g_bde_start1 has chopped the incoming request up so all the requests
664 * we see here are inside a single zone.  Map the data and key locations
665 * grab the buffers we need and fire off the first volley of read requests.
666 */
667
668static void
669g_bde_start2(struct g_bde_work *wp)
670{
671	struct g_bde_softc *sc;
672
673	KASSERT(wp != NULL, ("NULL wp in g_bde_start2"));
674	KASSERT(wp->softc != NULL, ("NULL wp->softc"));
675	g_trace(G_T_TOPOLOGY, "g_bde_start2(%p)", wp);
676	sc = wp->softc;
677	if (wp->bp->bio_cmd == BIO_READ) {
678		wp->sp = g_bde_new_sector(wp, 0);
679		if (wp->sp == NULL) {
680			g_bde_contribute(wp->bp, wp->length, ENOMEM);
681			g_bde_delete_work(wp);
682			return;
683		}
684		wp->sp->size = wp->length;
685		wp->sp->data = wp->data;
686		if (g_bde_start_read(wp->sp) != 0) {
687			g_bde_contribute(wp->bp, wp->length, ENOMEM);
688			g_bde_delete_sector(sc, wp->sp);
689			g_bde_delete_work(wp);
690			return;
691		}
692		g_bde_read_keysector(sc, wp);
693		if (wp->ksp == NULL)
694			wp->error = ENOMEM;
695	} else if (wp->bp->bio_cmd == BIO_DELETE) {
696		wp->sp = g_bde_new_sector(wp, wp->length);
697		if (wp->sp == NULL) {
698			g_bde_contribute(wp->bp, wp->length, ENOMEM);
699			g_bde_delete_work(wp);
700			return;
701		}
702	} else if (wp->bp->bio_cmd == BIO_WRITE) {
703		wp->sp = g_bde_new_sector(wp, wp->length);
704		if (wp->sp == NULL) {
705			g_bde_contribute(wp->bp, wp->length, ENOMEM);
706			g_bde_delete_work(wp);
707			return;
708		}
709		g_bde_read_keysector(sc, wp);
710		if (wp->ksp == NULL) {
711			g_bde_contribute(wp->bp, wp->length, ENOMEM);
712			g_bde_delete_sector(sc, wp->sp);
713			g_bde_delete_work(wp);
714			return;
715		}
716	} else {
717		KASSERT(0 == 1,
718		    ("Wrong bio_cmd %d in g_bde_start2", wp->bp->bio_cmd));
719	}
720
721	wp->state = WAIT;
722	wakeup(sc);
723}
724
725/*
726 * Create a sequence of work structures, and have g_bde_map_sector() determine
727 * how long they each can be.  Feed them to g_bde_start2().
728 */
729
730void
731g_bde_start1(struct bio *bp)
732{
733	struct g_bde_softc *sc;
734	struct g_bde_work *wp;
735	off_t done;
736
737	sc = bp->bio_to->geom->softc;
738	bp->bio_driver1 = sc;
739
740	mtx_lock(&sc->worklist_mutex);
741	for(done = 0; done < bp->bio_length; ) {
742		wp = g_bde_new_work(sc);
743		if (wp != NULL) {
744			wp->bp = bp;
745			wp->offset = bp->bio_offset + done;
746			wp->data = bp->bio_data + done;
747			wp->length = bp->bio_length - done;
748			g_bde_map_sector(wp);
749			done += wp->length;
750			g_bde_start2(wp);
751		}
752		if (wp == NULL || bp->bio_error != 0) {
753			g_bde_contribute(bp, bp->bio_length - done, ENOMEM);
754			break;
755		}
756	}
757	mtx_unlock(&sc->worklist_mutex);
758	return;
759}
760