Deleted Added
sdiff udiff text old ( 114152 ) new ( 114153 )
full compact
1/*-
2 * Copyright (c) 2002 Poul-Henning Kamp
3 * Copyright (c) 2002 Networks Associates Technology, Inc.
4 * All rights reserved.
5 *
6 * This software was developed for the FreeBSD Project by Poul-Henning Kamp
7 * and NAI Labs, the Security Research Division of Network Associates, Inc.
8 * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
9 * DARPA CHATS research program.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * $FreeBSD: head/sys/geom/bde/g_bde_work.c 114152 2003-04-28 06:19:41Z phk $
33 *
34 * This source file contains the state-engine which makes things happen in the
35 * right order.
36 *
37 * Outline:
38 * 1) g_bde_start1()
39 * Break the struct bio into multiple work packets one per zone.
40 * 2) g_bde_start2()
41 * Setup the necessary sector buffers and start those read operations
42 * which we can start at this time and put the item on the work-list.
43 * 3) g_bde_worker()
44 * Scan the work-list for items which are ready for crypto processing
45 * and call the matching crypto function in g_bde_crypt.c and schedule
46 * any writes needed. Read operations finish here by releasing the
47 * sector buffers and delivering the original bio request.
48 * 4) g_bde_write_done()
49 * Release sector buffers and deliver the original bio request.
50 *
51 * Because of the C-scope rules, the functions are almost perfectly in the
52 * opposite order in this source file.
53 *
54 * XXX: A switch to the hardware assisted crypto in src/sys/opencrypto will add
55 * XXX: additional states to this state-engine. Since no hardware available
56 * XXX: at this time has AES support, implementing this has been postponed
57 * XXX: until such time as it would result in a benefit.
58 */
59
60#include <sys/param.h>
61#include <sys/bio.h>
62#include <sys/lock.h>
63#include <sys/mutex.h>
64#include <sys/queue.h>
65#include <sys/malloc.h>
66#include <sys/systm.h>
67#include <sys/kernel.h>
68#include <sys/sysctl.h>
69#include <sys/proc.h>
70#include <sys/kthread.h>
71
72#include <crypto/rijndael/rijndael.h>
73#include <crypto/sha2/sha2.h>
74#include <geom/geom.h>
75#include <geom/bde/g_bde.h>
76
77static void g_bde_delete_sector(struct g_bde_softc *wp, struct g_bde_sector *sp);
78static struct g_bde_sector * g_bde_new_sector(struct g_bde_work *wp, u_int len);
79static void g_bde_release_keysector(struct g_bde_work *wp);
80static struct g_bde_sector *g_bde_get_sector(struct g_bde_work *wp, off_t offset);
81static int g_bde_start_read(struct g_bde_sector *sp);
82static void g_bde_purge_sector(struct g_bde_softc *sc, int fraction);
83
84/*
85 * Work item allocation.
86 *
87 * C++ would call these constructors and destructors.
88 */
89static u_int g_bde_nwork;
90SYSCTL_UINT(_debug, OID_AUTO, gbde_nwork, CTLFLAG_RD, &g_bde_nwork, 0, "");
91
92static struct g_bde_work *
93g_bde_new_work(struct g_bde_softc *sc)
94{
95 struct g_bde_work *wp;
96
97 wp = g_malloc(sizeof *wp, M_NOWAIT | M_ZERO);
98 if (wp == NULL)
99 return (wp);
100 wp->state = SETUP;
101 wp->softc = sc;
102 g_bde_nwork++;
103 sc->nwork++;
104 TAILQ_INSERT_TAIL(&sc->worklist, wp, list);
105 return (wp);
106}
107
108static void
109g_bde_delete_work(struct g_bde_work *wp)
110{
111 struct g_bde_softc *sc;
112
113 sc = wp->softc;
114 g_bde_nwork--;
115 sc->nwork--;
116 TAILQ_REMOVE(&sc->worklist, wp, list);
117 g_free(wp);
118}
119
120/*
121 * Sector buffer allocation
122 *
123 * These two functions allocate and free back variable sized sector buffers
124 */
125
126static u_int g_bde_nsect;
127SYSCTL_UINT(_debug, OID_AUTO, gbde_nsect, CTLFLAG_RD, &g_bde_nsect, 0, "");
128
129static void
130g_bde_delete_sector(struct g_bde_softc *sc, struct g_bde_sector *sp)
131{
132
133 g_bde_nsect--;
134 sc->nsect--;
135 if (sp->malloc)
136 g_free(sp->data);
137 g_free(sp);
138}
139
140static struct g_bde_sector *
141g_bde_new_sector(struct g_bde_work *wp, u_int len)
142{
143 struct g_bde_sector *sp;
144
145 sp = g_malloc(sizeof *sp, M_NOWAIT | M_ZERO);
146 if (sp == NULL)
147 return (sp);
148 if (len > 0) {
149 sp->data = g_malloc(len, M_NOWAIT | M_ZERO);
150 if (sp->data == NULL) {
151 g_free(sp);
152 return (NULL);
153 }
154 sp->malloc = 1;
155 }
156 g_bde_nsect++;
157 wp->softc->nsect++;
158 sp->size = len;
159 sp->softc = wp->softc;
160 sp->ref = 1;
161 sp->owner = wp;
162 sp->offset = wp->so;
163 sp->state = JUNK;
164 return (sp);
165}
166
167/*
168 * Skey sector cache.
169 *
170 * Nothing prevents two separate I/O requests from addressing the same zone
171 * and thereby needing the same skey sector. We therefore need to sequence
172 * I/O operations to the skey sectors. A certain amount of caching is also
173 * desirable, although the extent of benefit from this is not at this point
174 * determined.
175 *
176 * XXX: GEOM may be able to grow a generic caching facility at some point
177 * XXX: to support such needs.
178 */
179
180static u_int g_bde_ncache;
181SYSCTL_UINT(_debug, OID_AUTO, gbde_ncache, CTLFLAG_RD, &g_bde_ncache, 0, "");
182
183static void
184g_bde_purge_one_sector(struct g_bde_softc *sc, struct g_bde_sector *sp)
185{
186
187 g_trace(G_T_TOPOLOGY, "g_bde_purge_one_sector(%p, %p)", sc, sp);
188 if (sp->ref != 0)
189 return;
190 TAILQ_REMOVE(&sc->freelist, sp, list);
191 g_bde_ncache--;
192 sc->ncache--;
193 bzero(sp->data, sp->size);
194 g_bde_delete_sector(sc, sp);
195}
196
197static struct g_bde_sector *
198g_bde_get_sector(struct g_bde_work *wp, off_t offset)
199{
200 struct g_bde_sector *sp;
201 struct g_bde_softc *sc;
202
203 g_trace(G_T_TOPOLOGY, "g_bde_get_sector(%p, %jd)", wp, (intmax_t)offset);
204 sc = wp->softc;
205
206 if (malloc_last_fail() < g_bde_ncache)
207 g_bde_purge_sector(sc, -1);
208
209 sp = TAILQ_FIRST(&sc->freelist);
210 if (sp != NULL && sp->ref == 0 && sp->used + 300 < time_uptime)
211 g_bde_purge_one_sector(sc, sp);
212
213 TAILQ_FOREACH(sp, &sc->freelist, list) {
214 if (sp->offset == offset)
215 break;
216 }
217 if (sp != NULL) {
218 sp->ref++;
219 KASSERT(sp->offset == offset, ("wrong offset"));
220 KASSERT(sp->softc == wp->softc, ("wrong softc"));
221 if (sp->ref == 1)
222 sp->owner = wp;
223 } else {
224 if (malloc_last_fail() < g_bde_ncache) {
225 TAILQ_FOREACH(sp, &sc->freelist, list)
226 if (sp->ref == 0)
227 break;
228 }
229 if (sp == NULL && !TAILQ_EMPTY(&sc->freelist))
230 sp = TAILQ_FIRST(&sc->freelist);
231 if (sp != NULL && sp->ref > 0)
232 sp = NULL;
233 if (sp == NULL) {
234 sp = g_bde_new_sector(wp, sc->sectorsize);
235 if (sp != NULL) {
236 g_bde_ncache++;
237 sc->ncache++;
238 TAILQ_INSERT_TAIL(&sc->freelist, sp, list);
239 sp->malloc = 2;
240 }
241 }
242 if (sp != NULL) {
243 sp->offset = offset;
244 sp->softc = wp->softc;
245 sp->ref = 1;
246 sp->owner = wp;
247 sp->state = JUNK;
248 sp->error = 0;
249 }
250 }
251 if (sp != NULL) {
252 TAILQ_REMOVE(&sc->freelist, sp, list);
253 TAILQ_INSERT_TAIL(&sc->freelist, sp, list);
254 sp->used = time_uptime;
255 }
256 wp->ksp = sp;
257 return(sp);
258}
259
260static void
261g_bde_release_keysector(struct g_bde_work *wp)
262{
263 struct g_bde_softc *sc;
264 struct g_bde_work *wp2;
265 struct g_bde_sector *sp;
266
267 sp = wp->ksp;
268 g_trace(G_T_TOPOLOGY, "g_bde_release_keysector(%p)", sp);
269 KASSERT(sp->malloc == 2, ("Wrong sector released"));
270 sc = sp->softc;
271 KASSERT(sc != NULL, ("NULL sp->softc"));
272 KASSERT(wp == sp->owner, ("Releasing, not owner"));
273 sp->owner = NULL;
274 wp->ksp = NULL;
275 sp->ref--;
276 if (sp->ref > 0) {
277 TAILQ_REMOVE(&sc->freelist, sp, list);
278 TAILQ_INSERT_TAIL(&sc->freelist, sp, list);
279 TAILQ_FOREACH(wp2, &sc->worklist, list) {
280 if (wp2->ksp == sp) {
281 KASSERT(wp2 != wp, ("Self-reowning"));
282 sp->owner = wp2;
283 wakeup(sp->softc);
284 break;
285 }
286 }
287 KASSERT(wp2 != NULL, ("Failed to pick up owner for %p\n", sp));
288 } else if (sp->error != 0) {
289 sp->offset = ~0;
290 sp->error = 0;
291 sp->state = JUNK;
292 }
293 TAILQ_REMOVE(&sc->freelist, sp, list);
294 TAILQ_INSERT_HEAD(&sc->freelist, sp, list);
295}
296
297static void
298g_bde_purge_sector(struct g_bde_softc *sc, int fraction)
299{
300 struct g_bde_sector *sp;
301 int n;
302
303 g_trace(G_T_TOPOLOGY, "g_bde_purge_sector(%p)", sc);
304 if (fraction > 0)
305 n = sc->ncache / fraction + 1;
306 else
307 n = g_bde_ncache - malloc_last_fail();
308 if (n < 0)
309 return;
310 if (n > sc->ncache)
311 n = sc->ncache;
312 while(n--) {
313 TAILQ_FOREACH(sp, &sc->freelist, list) {
314 if (sp->ref != 0)
315 continue;
316 TAILQ_REMOVE(&sc->freelist, sp, list);
317 g_bde_ncache--;
318 sc->ncache--;
319 bzero(sp->data, sp->size);
320 g_bde_delete_sector(sc, sp);
321 break;
322 }
323 }
324}
325
326static struct g_bde_sector *
327g_bde_read_keysector(struct g_bde_softc *sc, struct g_bde_work *wp)
328{
329 struct g_bde_sector *sp;
330
331 g_trace(G_T_TOPOLOGY, "g_bde_read_keysector(%p)", wp);
332 sp = g_bde_get_sector(wp, wp->kso);
333 if (sp == NULL) {
334 g_bde_purge_sector(sc, -1);
335 sp = g_bde_get_sector(wp, wp->kso);
336 }
337 if (sp == NULL)
338 return (sp);
339 if (sp->owner != wp)
340 return (sp);
341 if (sp->state == VALID)
342 return (sp);
343 if (g_bde_start_read(sp) == 0)
344 return (sp);
345 g_bde_release_keysector(wp);
346 return (NULL);
347}
348
349/*
350 * Contribute to the completion of the original bio request.
351 *
352 * We have no simple way to tell how many bits the original bio request has
353 * been segmented into, so the easiest way to determine when we can deliver
354 * it is to keep track of the number of bytes we have completed. We keep
355 * track of any errors underway and latch onto the first one.
356 *
357 * We always report "nothing done" in case of error, because random bits here
358 * and there may be completed and returning a number of completed bytes does
359 * not convey any useful information about which bytes they were. If some
360 * piece of broken code somewhere interprets this to mean that nothing has
361 * changed on the underlying media they deserve the lossage headed for them.
362 *
363 * A single mutex per g_bde instance is used to prevent contention.
364 */
365
366static void
367g_bde_contribute(struct bio *bp, off_t bytes, int error)
368{
369 struct g_bde_softc *sc;
370
371 g_trace(G_T_TOPOLOGY, "g_bde_contribute bp %p bytes %jd error %d",
372 bp, (intmax_t)bytes, error);
373 sc = bp->bio_driver1;
374 if (bp->bio_error == 0)
375 bp->bio_error = error;
376 bp->bio_completed += bytes;
377 KASSERT(bp->bio_completed <= bp->bio_length, ("Too large contribution"));
378 if (bp->bio_completed == bp->bio_length) {
379 if (bp->bio_error != 0)
380 bp->bio_completed = 0;
381 g_io_deliver(bp, bp->bio_error);
382 }
383}
384
385/*
386 * A write operation has finished. When we have all expected cows in the
387 * barn close the door and call it a day.
388 */
389
390static void
391g_bde_write_done(struct bio *bp)
392{
393 struct g_bde_sector *sp;
394 struct g_bde_work *wp;
395 struct g_bde_softc *sc;
396
397 sp = bp->bio_caller1;
398 sc = bp->bio_caller2;
399 mtx_lock(&sc->worklist_mutex);
400 KASSERT(sp != NULL, ("NULL sp"));
401 KASSERT(sc != NULL, ("NULL sc"));
402 KASSERT(sp->owner != NULL, ("NULL sp->owner"));
403 g_trace(G_T_TOPOLOGY, "g_bde_write_done(%p)", sp);
404 sp->error = bp->bio_error;
405 g_destroy_bio(bp);
406 wp = sp->owner;
407 if (wp->error == 0)
408 wp->error = sp->error;
409
410 if (wp->bp->bio_cmd == BIO_DELETE) {
411 KASSERT(sp == wp->sp, ("trashed delete op"));
412 g_bde_contribute(wp->bp, wp->length, wp->error);
413 g_bde_delete_sector(sc, sp);
414 g_bde_delete_work(wp);
415 mtx_unlock(&sc->worklist_mutex);
416 return;
417 }
418
419 KASSERT(wp->bp->bio_cmd == BIO_WRITE, ("Confused in g_bde_write_done()"));
420 KASSERT(sp == wp->sp || sp == wp->ksp, ("trashed write op"));
421 if (wp->sp == sp) {
422 g_bde_delete_sector(sc, wp->sp);
423 wp->sp = NULL;
424 } else {
425 sp->state = VALID;
426 }
427 if (wp->sp == NULL && wp->ksp != NULL && wp->ksp->state == VALID) {
428 g_bde_contribute(wp->bp, wp->length, wp->error);
429 g_bde_release_keysector(wp);
430 g_bde_delete_work(wp);
431 }
432 mtx_unlock(&sc->worklist_mutex);
433 return;
434}
435
436/*
437 * Send a write request for the given sector down the pipeline.
438 */
439
440static int
441g_bde_start_write(struct g_bde_sector *sp)
442{
443 struct bio *bp;
444 struct g_bde_softc *sc;
445
446 g_trace(G_T_TOPOLOGY, "g_bde_start_write(%p)", sp);
447 sc = sp->softc;
448 KASSERT(sc != NULL, ("NULL sc in g_bde_start_write"));
449 KASSERT(sp->owner != NULL, ("NULL sp->owner in g_bde_start_write"));
450 bp = g_new_bio();
451 if (bp == NULL)
452 return (ENOMEM);
453 bp->bio_cmd = BIO_WRITE;
454 bp->bio_offset = sp->offset;
455 bp->bio_data = sp->data;
456 bp->bio_length = sp->size;
457 bp->bio_done = g_bde_write_done;
458 bp->bio_caller1 = sp;
459 bp->bio_caller2 = sc;
460 sp->state = IO;
461 g_io_request(bp, sc->consumer);
462 return(0);
463}
464
465/*
466 * A read operation has finished. Mark the sector no longer iobusy and
467 * wake up the worker thread and let it do its thing.
468 */
469
470static void
471g_bde_read_done(struct bio *bp)
472{
473 struct g_bde_sector *sp;
474 struct g_bde_softc *sc;
475
476 sp = bp->bio_caller1;
477 g_trace(G_T_TOPOLOGY, "g_bde_read_done(%p)", sp);
478 sc = bp->bio_caller2;
479 mtx_lock(&sc->worklist_mutex);
480 sp->error = bp->bio_error;
481 if (sp->error == 0)
482 sp->state = VALID;
483 else
484 sp->state = JUNK;
485 wakeup(sc);
486 g_destroy_bio(bp);
487 mtx_unlock(&sc->worklist_mutex);
488}
489
490/*
491 * Send a read request for the given sector down the pipeline.
492 */
493
494static int
495g_bde_start_read(struct g_bde_sector *sp)
496{
497 struct bio *bp;
498 struct g_bde_softc *sc;
499
500 g_trace(G_T_TOPOLOGY, "g_bde_start_read(%p)", sp);
501 sc = sp->softc;
502 KASSERT(sc != NULL, ("Null softc in sp %p", sp));
503 bp = g_new_bio();
504 if (bp == NULL)
505 return (ENOMEM);
506 bp->bio_cmd = BIO_READ;
507 bp->bio_offset = sp->offset;
508 bp->bio_data = sp->data;
509 bp->bio_length = sp->size;
510 bp->bio_done = g_bde_read_done;
511 bp->bio_caller1 = sp;
512 bp->bio_caller2 = sc;
513 sp->state = IO;
514 g_io_request(bp, sc->consumer);
515 return(0);
516}
517
518/*
519 * The worker thread.
520 *
521 * The up/down path of GEOM is not allowed to sleep or do any major work
522 * so we use this thread to do the actual crypto operations and to push
523 * the state engine onwards.
524 *
525 * XXX: if we switch to the src/sys/opencrypt hardware assisted encryption
526 * XXX: using a thread here is probably not needed.
527 */
528
529void
530g_bde_worker(void *arg)
531{
532 struct g_bde_softc *sc;
533 struct g_bde_work *wp;
534 struct g_geom *gp;
535 int busy, error;
536
537 gp = arg;
538 sc = gp->softc;
539
540 mtx_lock(&sc->worklist_mutex);
541 for (;;) {
542 busy = 0;
543 g_trace(G_T_TOPOLOGY, "g_bde_worker scan");
544 TAILQ_FOREACH(wp, &sc->worklist, list) {
545 KASSERT(wp != NULL, ("NULL wp"));
546 KASSERT(wp->softc != NULL, ("NULL wp->softc"));
547 if (wp->state != WAIT)
548 continue; /* Not interesting here */
549
550 KASSERT(wp->bp != NULL, ("NULL wp->bp"));
551 KASSERT(wp->sp != NULL, ("NULL wp->sp"));
552
553 if (wp->ksp != NULL) {
554 if (wp->ksp->owner != wp)
555 continue;
556 if (wp->ksp->state == IO)
557 continue;
558 KASSERT(wp->ksp->state == VALID,
559 ("Illegal sector state (JUNK ?)"));
560 }
561
562 if (wp->bp->bio_cmd == BIO_READ &&
563 wp->sp->state == IO)
564 continue;
565
566 if (wp->ksp != NULL && wp->ksp->error != 0) {
567 g_bde_contribute(wp->bp, wp->length,
568 wp->ksp->error);
569 g_bde_delete_sector(sc, wp->sp);
570 g_bde_release_keysector(wp);
571 g_bde_delete_work(wp);
572 busy++;
573 break;
574 }
575 switch(wp->bp->bio_cmd) {
576 case BIO_READ:
577 if (wp->ksp == NULL) {
578 KASSERT(wp->error != 0,
579 ("BIO_READ, no ksp and no error"));
580 g_bde_contribute(wp->bp, wp->length,
581 wp->error);
582 } else {
583 if (wp->sp->error == 0) {
584 mtx_unlock(&sc->worklist_mutex);
585 g_bde_crypt_read(wp);
586 mtx_lock(&sc->worklist_mutex);
587 }
588 g_bde_contribute(wp->bp, wp->length,
589 wp->sp->error);
590 }
591 g_bde_delete_sector(sc, wp->sp);
592 if (wp->ksp != NULL)
593 g_bde_release_keysector(wp);
594 g_bde_delete_work(wp);
595 break;
596 case BIO_WRITE:
597 wp->state = FINISH;
598 KASSERT(wp->sp->owner == wp, ("Write not owner sp"));
599 KASSERT(wp->ksp->owner == wp, ("Write not owner ksp"));
600 mtx_unlock(&sc->worklist_mutex);
601 g_bde_crypt_write(wp);
602 mtx_lock(&sc->worklist_mutex);
603 error = g_bde_start_write(wp->sp);
604 if (error) {
605 g_bde_contribute(wp->bp, wp->length, error);
606 g_bde_release_keysector(wp);
607 g_bde_delete_sector(sc, wp->sp);
608 g_bde_delete_work(wp);
609 break;
610 }
611 error = g_bde_start_write(wp->ksp);
612 if (wp->error == 0)
613 wp->error = error;
614 break;
615 case BIO_DELETE:
616 wp->state = FINISH;
617 mtx_unlock(&sc->worklist_mutex);
618 g_bde_crypt_delete(wp);
619 mtx_lock(&sc->worklist_mutex);
620 g_bde_start_write(wp->sp);
621 break;
622 }
623 busy++;
624 break;
625 }
626 if (!busy) {
627 /*
628 * We don't look for our death-warrant until we are
629 * idle. Shouldn't make a difference in practice.
630 */
631 if (sc->dead)
632 break;
633 g_trace(G_T_TOPOLOGY, "g_bde_worker sleep");
634 error = msleep(sc, &sc->worklist_mutex,
635 PRIBIO, "g_bde", hz);
636 if (error == EWOULDBLOCK) {
637 /*
638 * Loose our skey cache in an orderly fashion.
639 * The exact rate can be tuned to be less
640 * aggressive if this is desirable. 10% per
641 * second means that the cache is gone in a
642 * few minutes.
643 */
644 g_bde_purge_sector(sc, 10);
645 }
646 }
647 }
648 g_trace(G_T_TOPOLOGY, "g_bde_worker die");
649 g_bde_purge_sector(sc, 1);
650 KASSERT(sc->nwork == 0, ("Dead but %d work remaining", sc->nwork));
651 KASSERT(sc->ncache == 0, ("Dead but %d cache remaining", sc->ncache));
652 KASSERT(sc->nsect == 0, ("Dead but %d sect remaining", sc->nsect));
653 mtx_unlock(&sc->worklist_mutex);
654 sc->dead = 2;
655 wakeup(sc);
656 mtx_lock(&Giant);
657 kthread_exit(0);
658}
659
660/*
661 * g_bde_start1 has chopped the incoming request up so all the requests
662 * we see here are inside a single zone. Map the data and key locations
663 * grab the buffers we need and fire off the first volley of read requests.
664 */
665
666static void
667g_bde_start2(struct g_bde_work *wp)
668{
669 struct g_bde_softc *sc;
670
671 KASSERT(wp != NULL, ("NULL wp in g_bde_start2"));
672 KASSERT(wp->softc != NULL, ("NULL wp->softc"));
673 g_trace(G_T_TOPOLOGY, "g_bde_start2(%p)", wp);
674 sc = wp->softc;
675 if (wp->bp->bio_cmd == BIO_READ) {
676 wp->sp = g_bde_new_sector(wp, 0);
677 if (wp->sp == NULL) {
678 g_bde_contribute(wp->bp, wp->length, ENOMEM);
679 g_bde_delete_work(wp);
680 return;
681 }
682 wp->sp->size = wp->length;
683 wp->sp->data = wp->data;
684 if (g_bde_start_read(wp->sp) != 0) {
685 g_bde_contribute(wp->bp, wp->length, ENOMEM);
686 g_bde_delete_sector(sc, wp->sp);
687 g_bde_delete_work(wp);
688 return;
689 }
690 g_bde_read_keysector(sc, wp);
691 if (wp->ksp == NULL)
692 wp->error = ENOMEM;
693 } else if (wp->bp->bio_cmd == BIO_DELETE) {
694 wp->sp = g_bde_new_sector(wp, wp->length);
695 if (wp->sp == NULL) {
696 g_bde_contribute(wp->bp, wp->length, ENOMEM);
697 g_bde_delete_work(wp);
698 return;
699 }
700 } else if (wp->bp->bio_cmd == BIO_WRITE) {
701 wp->sp = g_bde_new_sector(wp, wp->length);
702 if (wp->sp == NULL) {
703 g_bde_contribute(wp->bp, wp->length, ENOMEM);
704 g_bde_delete_work(wp);
705 return;
706 }
707 g_bde_read_keysector(sc, wp);
708 if (wp->ksp == NULL) {
709 g_bde_contribute(wp->bp, wp->length, ENOMEM);
710 g_bde_delete_sector(sc, wp->sp);
711 g_bde_delete_work(wp);
712 return;
713 }
714 } else {
715 KASSERT(0 == 1,
716 ("Wrong bio_cmd %d in g_bde_start2", wp->bp->bio_cmd));
717 }
718
719 wp->state = WAIT;
720 wakeup(sc);
721}
722
723/*
724 * Create a sequence of work structures, and have g_bde_map_sector() determine
725 * how long they each can be. Feed them to g_bde_start2().
726 */
727
728void
729g_bde_start1(struct bio *bp)
730{
731 struct g_bde_softc *sc;
732 struct g_bde_work *wp;
733 off_t done;
734
735 sc = bp->bio_to->geom->softc;
736 bp->bio_driver1 = sc;
737
738 mtx_lock(&sc->worklist_mutex);
739 for(done = 0; done < bp->bio_length; ) {
740 wp = g_bde_new_work(sc);
741 if (wp != NULL) {
742 wp->bp = bp;
743 wp->offset = bp->bio_offset + done;
744 wp->data = bp->bio_data + done;
745 wp->length = bp->bio_length - done;
746 g_bde_map_sector(wp);
747 done += wp->length;
748 g_bde_start2(wp);
749 }
750 if (wp == NULL || bp->bio_error != 0) {
751 g_bde_contribute(bp, bp->bio_length - done, ENOMEM);
752 break;
753 }
754 }
755 mtx_unlock(&sc->worklist_mutex);
756 return;
757}