1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2002 Poul-Henning Kamp
5 * Copyright (c) 2002 Networks Associates Technology, Inc.
6 * All rights reserved.
7 *
8 * This software was developed for the FreeBSD Project by Poul-Henning Kamp
9 * and NAI Labs, the Security Research Division of Network Associates, Inc.
10 * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
11 * DARPA CHATS research program.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 *    notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 *    notice, this list of conditions and the following disclaimer in the
20 *    documentation and/or other materials provided with the distribution.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 * $FreeBSD$
35 */
36/*
37 * This source file contains the state-engine which makes things happen in the
38 * right order.
39 *
40 * Outline:
41 *   1) g_bde_start1()
42 *	Break the struct bio into multiple work packets one per zone.
43 *   2) g_bde_start2()
44 *	Setup the necessary sector buffers and start those read operations
45 *	which we can start at this time and put the item on the work-list.
46 *   3) g_bde_worker()
47 *	Scan the work-list for items which are ready for crypto processing
48 *	and call the matching crypto function in g_bde_crypt.c and schedule
49 *	any writes needed.  Read operations finish here by releasing the
50 *	sector buffers and delivering the original bio request.
51 *   4) g_bde_write_done()
52 *	Release sector buffers and deliver the original bio request.
53 *
54 * Because of the C-scope rules, the functions are almost perfectly in the
55 * opposite order in this source file.
56 *
57 * XXX: A switch to the hardware assisted crypto in src/sys/opencrypto will add
58 * XXX: additional states to this state-engine.  Since no hardware available
59 * XXX: at this time has AES support, implementing this has been postponed
60 * XXX: until such time as it would result in a benefit.
61 */
62
63#include <sys/param.h>
64#include <sys/bio.h>
65#include <sys/lock.h>
66#include <sys/mutex.h>
67#include <sys/queue.h>
68#include <sys/malloc.h>
69#include <sys/systm.h>
70#include <sys/kernel.h>
71#include <sys/sysctl.h>
72#include <sys/proc.h>
73#include <sys/kthread.h>
74
75#include <crypto/rijndael/rijndael-api-fst.h>
76#include <crypto/sha2/sha512.h>
77#include <geom/geom.h>
78#include <geom/bde/g_bde.h>
79
80static void g_bde_delete_sector(struct g_bde_softc *wp, struct g_bde_sector *sp);
81static struct g_bde_sector * g_bde_new_sector(struct g_bde_work *wp, u_int len);
82static void g_bde_release_keysector(struct g_bde_work *wp);
83static struct g_bde_sector *g_bde_get_keysector(struct g_bde_work *wp);
84static int g_bde_start_read(struct g_bde_sector *sp);
85static void g_bde_purge_sector(struct g_bde_softc *sc, int fraction);
86
87/*
88 * Work item allocation.
89 *
90 * C++ would call these constructors and destructors.
91 */
92static u_int g_bde_nwork;
93SYSCTL_UINT(_debug, OID_AUTO, gbde_nwork, CTLFLAG_RD, &g_bde_nwork, 0, "");
94
95static MALLOC_DEFINE(M_GBDE, "gbde", "GBDE data structures");
96
97static struct g_bde_work *
98g_bde_new_work(struct g_bde_softc *sc)
99{
100	struct g_bde_work *wp;
101
102	wp = malloc(sizeof *wp, M_GBDE, M_NOWAIT | M_ZERO);
103	if (wp == NULL)
104		return (wp);
105	wp->state = SETUP;
106	wp->softc = sc;
107	g_bde_nwork++;
108	sc->nwork++;
109	TAILQ_INSERT_TAIL(&sc->worklist, wp, list);
110	return (wp);
111}
112
113static void
114g_bde_delete_work(struct g_bde_work *wp)
115{
116	struct g_bde_softc *sc;
117
118	sc = wp->softc;
119	g_bde_nwork--;
120	sc->nwork--;
121	TAILQ_REMOVE(&sc->worklist, wp, list);
122	free(wp, M_GBDE);
123}
124
125/*
126 * Sector buffer allocation
127 *
128 * These two functions allocate and free back variable sized sector buffers
129 */
130
131static u_int g_bde_nsect;
132SYSCTL_UINT(_debug, OID_AUTO, gbde_nsect, CTLFLAG_RD, &g_bde_nsect, 0, "");
133
134static void
135g_bde_delete_sector(struct g_bde_softc *sc, struct g_bde_sector *sp)
136{
137
138	g_bde_nsect--;
139	sc->nsect--;
140	if (sp->malloc)
141		free(sp->data, M_GBDE);
142	free(sp, M_GBDE);
143}
144
145static struct g_bde_sector *
146g_bde_new_sector(struct g_bde_work *wp, u_int len)
147{
148	struct g_bde_sector *sp;
149
150	sp = malloc(sizeof *sp, M_GBDE, M_NOWAIT | M_ZERO);
151	if (sp == NULL)
152		return (sp);
153	if (len > 0) {
154		sp->data = malloc(len, M_GBDE, M_NOWAIT | M_ZERO);
155		if (sp->data == NULL) {
156			free(sp, M_GBDE);
157			return (NULL);
158		}
159		sp->malloc = 1;
160	}
161	g_bde_nsect++;
162	wp->softc->nsect++;
163	sp->size = len;
164	sp->softc = wp->softc;
165	sp->ref = 1;
166	sp->owner = wp;
167	sp->offset = wp->so;
168	sp->state = JUNK;
169	return (sp);
170}
171
172/*
173 * Skey sector cache.
174 *
175 * Nothing prevents two separate I/O requests from addressing the same zone
176 * and thereby needing the same skey sector.  We therefore need to sequence
177 * I/O operations to the skey sectors.  A certain amount of caching is also
178 * desirable, although the extent of benefit from this is not at this point
179 * determined.
180 *
181 * XXX: GEOM may be able to grow a generic caching facility at some point
182 * XXX: to support such needs.
183 */
184
185static u_int g_bde_ncache;
186SYSCTL_UINT(_debug, OID_AUTO, gbde_ncache, CTLFLAG_RD, &g_bde_ncache, 0, "");
187
188static void
189g_bde_purge_one_sector(struct g_bde_softc *sc, struct g_bde_sector *sp)
190{
191
192	g_trace(G_T_TOPOLOGY, "g_bde_purge_one_sector(%p, %p)", sc, sp);
193	if (sp->ref != 0)
194		return;
195	TAILQ_REMOVE(&sc->freelist, sp, list);
196	g_bde_ncache--;
197	sc->ncache--;
198	bzero(sp->data, sp->size);
199	g_bde_delete_sector(sc, sp);
200}
201
202static struct g_bde_sector *
203g_bde_get_keysector(struct g_bde_work *wp)
204{
205	struct g_bde_sector *sp;
206	struct g_bde_softc *sc;
207	off_t offset;
208
209	offset = wp->kso;
210	g_trace(G_T_TOPOLOGY, "g_bde_get_keysector(%p, %jd)", wp, (intmax_t)offset);
211	sc = wp->softc;
212
213	if (malloc_last_fail() < g_bde_ncache)
214		g_bde_purge_sector(sc, -1);
215
216	sp = TAILQ_FIRST(&sc->freelist);
217	if (sp != NULL && sp->ref == 0 && sp->used + 300 < time_uptime)
218		g_bde_purge_one_sector(sc, sp);
219
220	TAILQ_FOREACH(sp, &sc->freelist, list) {
221		if (sp->offset == offset)
222			break;
223	}
224	if (sp != NULL) {
225		sp->ref++;
226		KASSERT(sp->offset == offset, ("wrong offset"));
227		KASSERT(sp->softc == wp->softc, ("wrong softc"));
228		if (sp->ref == 1)
229			sp->owner = wp;
230	} else {
231		if (malloc_last_fail() < g_bde_ncache) {
232			TAILQ_FOREACH(sp, &sc->freelist, list)
233				if (sp->ref == 0)
234					break;
235		}
236		if (sp == NULL && !TAILQ_EMPTY(&sc->freelist))
237			sp = TAILQ_FIRST(&sc->freelist);
238		if (sp != NULL && sp->ref > 0)
239			sp = NULL;
240		if (sp == NULL) {
241			sp = g_bde_new_sector(wp, sc->sectorsize);
242			if (sp != NULL) {
243				g_bde_ncache++;
244				sc->ncache++;
245				TAILQ_INSERT_TAIL(&sc->freelist, sp, list);
246				sp->malloc = 2;
247			}
248		}
249		if (sp != NULL) {
250			sp->offset = offset;
251			sp->softc = wp->softc;
252			sp->ref = 1;
253			sp->owner = wp;
254			sp->state = JUNK;
255			sp->error = 0;
256		}
257	}
258	if (sp != NULL) {
259		TAILQ_REMOVE(&sc->freelist, sp, list);
260		TAILQ_INSERT_TAIL(&sc->freelist, sp, list);
261		sp->used = time_uptime;
262	}
263	wp->ksp = sp;
264	return(sp);
265}
266
267static void
268g_bde_release_keysector(struct g_bde_work *wp)
269{
270	struct g_bde_softc *sc;
271	struct g_bde_work *wp2;
272	struct g_bde_sector *sp;
273
274	sp = wp->ksp;
275	g_trace(G_T_TOPOLOGY, "g_bde_release_keysector(%p)", sp);
276	KASSERT(sp->malloc == 2, ("Wrong sector released"));
277	sc = sp->softc;
278	KASSERT(sc != NULL, ("NULL sp->softc"));
279	KASSERT(wp == sp->owner, ("Releasing, not owner"));
280	sp->owner = NULL;
281	wp->ksp = NULL;
282	sp->ref--;
283	if (sp->ref > 0) {
284		TAILQ_REMOVE(&sc->freelist, sp, list);
285		TAILQ_INSERT_TAIL(&sc->freelist, sp, list);
286		TAILQ_FOREACH(wp2, &sc->worklist, list) {
287			if (wp2->ksp == sp) {
288				KASSERT(wp2 != wp, ("Self-reowning"));
289				sp->owner = wp2;
290				wakeup(sp->softc);
291				break;
292			}
293		}
294		KASSERT(wp2 != NULL, ("Failed to pick up owner for %p\n", sp));
295	} else if (sp->error != 0) {
296		sp->offset = ~0;
297		sp->error = 0;
298		sp->state = JUNK;
299	}
300	TAILQ_REMOVE(&sc->freelist, sp, list);
301	TAILQ_INSERT_HEAD(&sc->freelist, sp, list);
302}
303
304static void
305g_bde_purge_sector(struct g_bde_softc *sc, int fraction)
306{
307	struct g_bde_sector *sp;
308	int n;
309
310	g_trace(G_T_TOPOLOGY, "g_bde_purge_sector(%p)", sc);
311	if (fraction > 0)
312		n = sc->ncache / fraction + 1;
313	else
314		n = g_bde_ncache - malloc_last_fail();
315	if (n < 0)
316		return;
317	if (n > sc->ncache)
318		n = sc->ncache;
319	while(n--) {
320		TAILQ_FOREACH(sp, &sc->freelist, list) {
321			if (sp->ref != 0)
322				continue;
323			TAILQ_REMOVE(&sc->freelist, sp, list);
324			g_bde_ncache--;
325			sc->ncache--;
326			bzero(sp->data, sp->size);
327			g_bde_delete_sector(sc, sp);
328			break;
329		}
330	}
331}
332
333static struct g_bde_sector *
334g_bde_read_keysector(struct g_bde_softc *sc, struct g_bde_work *wp)
335{
336	struct g_bde_sector *sp;
337
338	g_trace(G_T_TOPOLOGY, "g_bde_read_keysector(%p)", wp);
339	sp = g_bde_get_keysector(wp);
340	if (sp == NULL) {
341		g_bde_purge_sector(sc, -1);
342		sp = g_bde_get_keysector(wp);
343	}
344	if (sp == NULL)
345		return (sp);
346	if (sp->owner != wp)
347		return (sp);
348	if (sp->state == VALID)
349		return (sp);
350	if (g_bde_start_read(sp) == 0)
351		return (sp);
352	g_bde_release_keysector(wp);
353	return (NULL);
354}
355
356/*
357 * Contribute to the completion of the original bio request.
358 *
359 * We have no simple way to tell how many bits the original bio request has
360 * been segmented into, so the easiest way to determine when we can deliver
361 * it is to keep track of the number of bytes we have completed.  We keep
362 * track of any errors underway and latch onto the first one.
363 *
364 * We always report "nothing done" in case of error, because random bits here
365 * and there may be completed and returning a number of completed bytes does
366 * not convey any useful information about which bytes they were.  If some
367 * piece of broken code somewhere interprets this to mean that nothing has
368 * changed on the underlying media they deserve the lossage headed for them.
369 *
370 * A single mutex per g_bde instance is used to prevent contention.
371 */
372
373static void
374g_bde_contribute(struct bio *bp, off_t bytes, int error)
375{
376
377	g_trace(G_T_TOPOLOGY, "g_bde_contribute bp %p bytes %jd error %d",
378	     bp, (intmax_t)bytes, error);
379	if (bp->bio_error == 0)
380		bp->bio_error = error;
381	bp->bio_completed += bytes;
382	KASSERT(bp->bio_completed <= bp->bio_length, ("Too large contribution"));
383	if (bp->bio_completed == bp->bio_length) {
384		if (bp->bio_error != 0)
385			bp->bio_completed = 0;
386		g_io_deliver(bp, bp->bio_error);
387	}
388}
389
390/*
391 * This is the common case "we're done with this work package" function
392 */
393
394static void
395g_bde_work_done(struct g_bde_work *wp, int error)
396{
397
398	g_bde_contribute(wp->bp, wp->length, error);
399	if (wp->sp != NULL)
400		g_bde_delete_sector(wp->softc, wp->sp);
401	if (wp->ksp != NULL)
402		g_bde_release_keysector(wp);
403	g_bde_delete_work(wp);
404}
405
406/*
407 * A write operation has finished.  When we have all expected cows in the
408 * barn close the door and call it a day.
409 */
410
411static void
412g_bde_write_done(struct bio *bp)
413{
414	struct g_bde_sector *sp;
415	struct g_bde_work *wp;
416	struct g_bde_softc *sc;
417
418	sp = bp->bio_caller1;
419	sc = bp->bio_caller2;
420	mtx_lock(&sc->worklist_mutex);
421	KASSERT(sp != NULL, ("NULL sp"));
422	KASSERT(sc != NULL, ("NULL sc"));
423	KASSERT(sp->owner != NULL, ("NULL sp->owner"));
424	g_trace(G_T_TOPOLOGY, "g_bde_write_done(%p)", sp);
425	if (bp->bio_error == 0 && bp->bio_completed != sp->size)
426		bp->bio_error = EIO;
427	sp->error = bp->bio_error;
428	g_destroy_bio(bp);
429	wp = sp->owner;
430	if (wp->error == 0)
431		wp->error = sp->error;
432
433	if (wp->bp->bio_cmd == BIO_DELETE) {
434		KASSERT(sp == wp->sp, ("trashed delete op"));
435		g_bde_work_done(wp, wp->error);
436		mtx_unlock(&sc->worklist_mutex);
437		return;
438	}
439
440	KASSERT(wp->bp->bio_cmd == BIO_WRITE, ("Confused in g_bde_write_done()"));
441	KASSERT(sp == wp->sp || sp == wp->ksp, ("trashed write op"));
442	if (wp->sp == sp) {
443		g_bde_delete_sector(sc, wp->sp);
444		wp->sp = NULL;
445	} else {
446		sp->state = VALID;
447	}
448	if (wp->sp == NULL && wp->ksp != NULL && wp->ksp->state == VALID)
449		g_bde_work_done(wp, wp->error);
450	mtx_unlock(&sc->worklist_mutex);
451	return;
452}
453
454/*
455 * Send a write request for the given sector down the pipeline.
456 */
457
458static int
459g_bde_start_write(struct g_bde_sector *sp)
460{
461	struct bio *bp;
462	struct g_bde_softc *sc;
463
464	g_trace(G_T_TOPOLOGY, "g_bde_start_write(%p)", sp);
465	sc = sp->softc;
466	KASSERT(sc != NULL, ("NULL sc in g_bde_start_write"));
467	KASSERT(sp->owner != NULL, ("NULL sp->owner in g_bde_start_write"));
468	bp = g_new_bio();
469	if (bp == NULL)
470		return (ENOMEM);
471	bp->bio_cmd = BIO_WRITE;
472	bp->bio_offset = sp->offset;
473	bp->bio_data = sp->data;
474	bp->bio_length = sp->size;
475	bp->bio_done = g_bde_write_done;
476	bp->bio_caller1 = sp;
477	bp->bio_caller2 = sc;
478	sp->state = IO;
479	g_io_request(bp, sc->consumer);
480	return(0);
481}
482
483/*
484 * A read operation has finished.  Mark the sector no longer iobusy and
485 * wake up the worker thread and let it do its thing.
486 */
487
488static void
489g_bde_read_done(struct bio *bp)
490{
491	struct g_bde_sector *sp;
492	struct g_bde_softc *sc;
493
494	sp = bp->bio_caller1;
495	g_trace(G_T_TOPOLOGY, "g_bde_read_done(%p)", sp);
496	sc = bp->bio_caller2;
497	mtx_lock(&sc->worklist_mutex);
498	if (bp->bio_error == 0 && bp->bio_completed != sp->size)
499		bp->bio_error = EIO;
500	sp->error = bp->bio_error;
501	if (sp->error == 0)
502		sp->state = VALID;
503	else
504		sp->state = JUNK;
505	wakeup(sc);
506	g_destroy_bio(bp);
507	mtx_unlock(&sc->worklist_mutex);
508}
509
510/*
511 * Send a read request for the given sector down the pipeline.
512 */
513
514static int
515g_bde_start_read(struct g_bde_sector *sp)
516{
517	struct bio *bp;
518	struct g_bde_softc *sc;
519
520	g_trace(G_T_TOPOLOGY, "g_bde_start_read(%p)", sp);
521	sc = sp->softc;
522	KASSERT(sc != NULL, ("Null softc in sp %p", sp));
523	bp = g_new_bio();
524	if (bp == NULL)
525		return (ENOMEM);
526	bp->bio_cmd = BIO_READ;
527	bp->bio_offset = sp->offset;
528	bp->bio_data = sp->data;
529	bp->bio_length = sp->size;
530	bp->bio_done = g_bde_read_done;
531	bp->bio_caller1 = sp;
532	bp->bio_caller2 = sc;
533	sp->state = IO;
534	g_io_request(bp, sc->consumer);
535	return(0);
536}
537
538/*
539 * The worker thread.
540 *
541 * The up/down path of GEOM is not allowed to sleep or do any major work
542 * so we use this thread to do the actual crypto operations and to push
543 * the state engine onwards.
544 *
545 * XXX: if we switch to the src/sys/opencrypt hardware assisted encryption
546 * XXX: using a thread here is probably not needed.
547 */
548
549void
550g_bde_worker(void *arg)
551{
552	struct g_bde_softc *sc;
553	struct g_bde_work *wp, *twp;
554	struct g_geom *gp;
555	int restart, error;
556
557	gp = arg;
558	sc = gp->softc;
559
560	mtx_lock(&sc->worklist_mutex);
561	for (;;) {
562		restart = 0;
563		g_trace(G_T_TOPOLOGY, "g_bde_worker scan");
564		TAILQ_FOREACH_SAFE(wp, &sc->worklist, list, twp) {
565			KASSERT(wp != NULL, ("NULL wp"));
566			KASSERT(wp->softc != NULL, ("NULL wp->softc"));
567			if (wp->state != WAIT)
568				continue;	/* Not interesting here */
569
570			KASSERT(wp->bp != NULL, ("NULL wp->bp"));
571			KASSERT(wp->sp != NULL, ("NULL wp->sp"));
572
573			if (wp->ksp != NULL) {
574				if (wp->ksp->owner != wp)
575					continue;
576				if (wp->ksp->state == IO)
577					continue;
578				KASSERT(wp->ksp->state == VALID,
579				    ("Illegal sector state (%d)",
580				    wp->ksp->state));
581			}
582
583			if (wp->bp->bio_cmd == BIO_READ && wp->sp->state == IO)
584				continue;
585
586			if (wp->ksp != NULL && wp->ksp->error != 0) {
587				g_bde_work_done(wp, wp->ksp->error);
588				continue;
589			}
590			switch(wp->bp->bio_cmd) {
591			case BIO_READ:
592				if (wp->ksp == NULL) {
593					KASSERT(wp->error != 0,
594					    ("BIO_READ, no ksp and no error"));
595					g_bde_work_done(wp, wp->error);
596					break;
597				}
598				if (wp->sp->error != 0) {
599					g_bde_work_done(wp, wp->sp->error);
600					break;
601				}
602				mtx_unlock(&sc->worklist_mutex);
603				g_bde_crypt_read(wp);
604				mtx_lock(&sc->worklist_mutex);
605				restart++;
606				g_bde_work_done(wp, wp->sp->error);
607				break;
608			case BIO_WRITE:
609				wp->state = FINISH;
610				KASSERT(wp->sp->owner == wp,
611				    ("Write not owner sp"));
612				KASSERT(wp->ksp->owner == wp,
613				    ("Write not owner ksp"));
614				mtx_unlock(&sc->worklist_mutex);
615				g_bde_crypt_write(wp);
616				mtx_lock(&sc->worklist_mutex);
617				restart++;
618				error = g_bde_start_write(wp->sp);
619				if (error) {
620					g_bde_work_done(wp, error);
621					break;
622				}
623				error = g_bde_start_write(wp->ksp);
624				if (wp->error != 0)
625					wp->error = error;
626				break;
627			case BIO_DELETE:
628				wp->state = FINISH;
629				mtx_unlock(&sc->worklist_mutex);
630				g_bde_crypt_delete(wp);
631				mtx_lock(&sc->worklist_mutex);
632				restart++;
633				g_bde_start_write(wp->sp);
634				break;
635			}
636			if (restart)
637				break;
638		}
639		if (!restart) {
640			/*
641			 * We don't look for our death-warrant until we are
642			 * idle.  Shouldn't make a difference in practice.
643			 */
644			if (sc->dead)
645				break;
646			g_trace(G_T_TOPOLOGY, "g_bde_worker sleep");
647			error = msleep(sc, &sc->worklist_mutex,
648			    PRIBIO, "-", hz);
649			if (error == EWOULDBLOCK) {
650				/*
651				 * Lose our skey cache in an orderly fashion.
652				 * The exact rate can be tuned to be less
653				 * aggressive if this is desirable.  10% per
654				 * second means that the cache is gone in a
655				 * few minutes.
656				 */
657				g_bde_purge_sector(sc, 10);
658			}
659		}
660	}
661	g_trace(G_T_TOPOLOGY, "g_bde_worker die");
662	g_bde_purge_sector(sc, 1);
663	KASSERT(sc->nwork == 0, ("Dead but %d work remaining", sc->nwork));
664	KASSERT(sc->ncache == 0, ("Dead but %d cache remaining", sc->ncache));
665	KASSERT(sc->nsect == 0, ("Dead but %d sect remaining", sc->nsect));
666	mtx_unlock(&sc->worklist_mutex);
667	sc->dead = 2;
668	wakeup(sc);
669	kproc_exit(0);
670}
671
672/*
673 * g_bde_start1 has chopped the incoming request up so all the requests
674 * we see here are inside a single zone.  Map the data and key locations
675 * grab the buffers we need and fire off the first volley of read requests.
676 */
677
678static void
679g_bde_start2(struct g_bde_work *wp)
680{
681	struct g_bde_softc *sc;
682
683	KASSERT(wp != NULL, ("NULL wp in g_bde_start2"));
684	KASSERT(wp->softc != NULL, ("NULL wp->softc"));
685	g_trace(G_T_TOPOLOGY, "g_bde_start2(%p)", wp);
686	sc = wp->softc;
687	switch (wp->bp->bio_cmd) {
688	case BIO_READ:
689		wp->sp = g_bde_new_sector(wp, 0);
690		if (wp->sp == NULL) {
691			g_bde_work_done(wp, ENOMEM);
692			return;
693		}
694		wp->sp->size = wp->length;
695		wp->sp->data = wp->data;
696		if (g_bde_start_read(wp->sp) != 0) {
697			g_bde_work_done(wp, ENOMEM);
698			return;
699		}
700		g_bde_read_keysector(sc, wp);
701		if (wp->ksp == NULL)
702			wp->error = ENOMEM;
703		break;
704	case BIO_DELETE:
705		wp->sp = g_bde_new_sector(wp, wp->length);
706		if (wp->sp == NULL) {
707			g_bde_work_done(wp, ENOMEM);
708			return;
709		}
710		break;
711	case BIO_WRITE:
712		wp->sp = g_bde_new_sector(wp, wp->length);
713		if (wp->sp == NULL) {
714			g_bde_work_done(wp, ENOMEM);
715			return;
716		}
717		g_bde_read_keysector(sc, wp);
718		if (wp->ksp == NULL) {
719			g_bde_work_done(wp, ENOMEM);
720			return;
721		}
722		break;
723	default:
724		KASSERT(0 == 1,
725		    ("Wrong bio_cmd %d in g_bde_start2", wp->bp->bio_cmd));
726	}
727
728	wp->state = WAIT;
729	wakeup(sc);
730}
731
732/*
733 * Create a sequence of work structures, and have g_bde_map_sector() determine
734 * how long they each can be.  Feed them to g_bde_start2().
735 */
736
737void
738g_bde_start1(struct bio *bp)
739{
740	struct g_bde_softc *sc;
741	struct g_bde_work *wp;
742	off_t done;
743
744	sc = bp->bio_to->geom->softc;
745	bp->bio_driver1 = sc;
746
747	mtx_lock(&sc->worklist_mutex);
748	for(done = 0; done < bp->bio_length; ) {
749		wp = g_bde_new_work(sc);
750		if (wp != NULL) {
751			wp->bp = bp;
752			wp->offset = bp->bio_offset + done;
753			wp->data = bp->bio_data + done;
754			wp->length = bp->bio_length - done;
755			g_bde_map_sector(wp);
756			done += wp->length;
757			g_bde_start2(wp);
758		}
759		if (wp == NULL || bp->bio_error != 0) {
760			g_bde_contribute(bp, bp->bio_length - done, ENOMEM);
761			break;
762		}
763	}
764	mtx_unlock(&sc->worklist_mutex);
765	return;
766}
767