g_bde_work.c revision 115504
1/*- 2 * Copyright (c) 2002 Poul-Henning Kamp 3 * Copyright (c) 2002 Networks Associates Technology, Inc. 4 * All rights reserved. 5 * 6 * This software was developed for the FreeBSD Project by Poul-Henning Kamp 7 * and NAI Labs, the Security Research Division of Network Associates, Inc. 8 * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the 9 * DARPA CHATS research program. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * $FreeBSD: head/sys/geom/bde/g_bde_work.c 115504 2003-05-31 19:08:23Z phk $ 33 * 34 * This source file contains the state-engine which makes things happen in the 35 * right order. 36 * 37 * Outline: 38 * 1) g_bde_start1() 39 * Break the struct bio into multiple work packets one per zone. 40 * 2) g_bde_start2() 41 * Setup the necessary sector buffers and start those read operations 42 * which we can start at this time and put the item on the work-list. 43 * 3) g_bde_worker() 44 * Scan the work-list for items which are ready for crypto processing 45 * and call the matching crypto function in g_bde_crypt.c and schedule 46 * any writes needed. Read operations finish here by releasing the 47 * sector buffers and delivering the original bio request. 48 * 4) g_bde_write_done() 49 * Release sector buffers and deliver the original bio request. 50 * 51 * Because of the C-scope rules, the functions are almost perfectly in the 52 * opposite order in this source file. 53 * 54 * XXX: A switch to the hardware assisted crypto in src/sys/opencrypto will add 55 * XXX: additional states to this state-engine. Since no hardware available 56 * XXX: at this time has AES support, implementing this has been postponed 57 * XXX: until such time as it would result in a benefit. 58 */ 59 60#include <sys/param.h> 61#include <sys/bio.h> 62#include <sys/lock.h> 63#include <sys/mutex.h> 64#include <sys/queue.h> 65#include <sys/malloc.h> 66#include <sys/systm.h> 67#include <sys/kernel.h> 68#include <sys/sysctl.h> 69#include <sys/proc.h> 70#include <sys/kthread.h> 71 72#include <crypto/rijndael/rijndael.h> 73#include <crypto/sha2/sha2.h> 74#include <geom/geom.h> 75#include <geom/bde/g_bde.h> 76 77static void g_bde_delete_sector(struct g_bde_softc *wp, struct g_bde_sector *sp); 78static struct g_bde_sector * g_bde_new_sector(struct g_bde_work *wp, u_int len); 79static void g_bde_release_keysector(struct g_bde_work *wp); 80static struct g_bde_sector *g_bde_get_keysector(struct g_bde_work *wp); 81static int g_bde_start_read(struct g_bde_sector *sp); 82static void g_bde_purge_sector(struct g_bde_softc *sc, int fraction); 83 84/* 85 * Work item allocation. 86 * 87 * C++ would call these constructors and destructors. 88 */ 89static u_int g_bde_nwork; 90SYSCTL_UINT(_debug, OID_AUTO, gbde_nwork, CTLFLAG_RD, &g_bde_nwork, 0, ""); 91 92static MALLOC_DEFINE(M_GBDE, "GBDE", "GBDE data structures"); 93 94static struct g_bde_work * 95g_bde_new_work(struct g_bde_softc *sc) 96{ 97 struct g_bde_work *wp; 98 99 wp = malloc(sizeof *wp, M_GBDE, M_NOWAIT | M_ZERO); 100 if (wp == NULL) 101 return (wp); 102 wp->state = SETUP; 103 wp->softc = sc; 104 g_bde_nwork++; 105 sc->nwork++; 106 TAILQ_INSERT_TAIL(&sc->worklist, wp, list); 107 return (wp); 108} 109 110static void 111g_bde_delete_work(struct g_bde_work *wp) 112{ 113 struct g_bde_softc *sc; 114 115 sc = wp->softc; 116 g_bde_nwork--; 117 sc->nwork--; 118 TAILQ_REMOVE(&sc->worklist, wp, list); 119 free(wp, M_GBDE); 120} 121 122/* 123 * Sector buffer allocation 124 * 125 * These two functions allocate and free back variable sized sector buffers 126 */ 127 128static u_int g_bde_nsect; 129SYSCTL_UINT(_debug, OID_AUTO, gbde_nsect, CTLFLAG_RD, &g_bde_nsect, 0, ""); 130 131static void 132g_bde_delete_sector(struct g_bde_softc *sc, struct g_bde_sector *sp) 133{ 134 135 g_bde_nsect--; 136 sc->nsect--; 137 if (sp->malloc) 138 free(sp->data, M_GBDE); 139 free(sp, M_GBDE); 140} 141 142static struct g_bde_sector * 143g_bde_new_sector(struct g_bde_work *wp, u_int len) 144{ 145 struct g_bde_sector *sp; 146 147 sp = malloc(sizeof *sp, M_GBDE, M_NOWAIT | M_ZERO); 148 if (sp == NULL) 149 return (sp); 150 if (len > 0) { 151 sp->data = malloc(len, M_GBDE, M_NOWAIT | M_ZERO); 152 if (sp->data == NULL) { 153 free(sp, M_GBDE); 154 return (NULL); 155 } 156 sp->malloc = 1; 157 } 158 g_bde_nsect++; 159 wp->softc->nsect++; 160 sp->size = len; 161 sp->softc = wp->softc; 162 sp->ref = 1; 163 sp->owner = wp; 164 sp->offset = wp->so; 165 sp->state = JUNK; 166 return (sp); 167} 168 169/* 170 * Skey sector cache. 171 * 172 * Nothing prevents two separate I/O requests from addressing the same zone 173 * and thereby needing the same skey sector. We therefore need to sequence 174 * I/O operations to the skey sectors. A certain amount of caching is also 175 * desirable, although the extent of benefit from this is not at this point 176 * determined. 177 * 178 * XXX: GEOM may be able to grow a generic caching facility at some point 179 * XXX: to support such needs. 180 */ 181 182static u_int g_bde_ncache; 183SYSCTL_UINT(_debug, OID_AUTO, gbde_ncache, CTLFLAG_RD, &g_bde_ncache, 0, ""); 184 185static void 186g_bde_purge_one_sector(struct g_bde_softc *sc, struct g_bde_sector *sp) 187{ 188 189 g_trace(G_T_TOPOLOGY, "g_bde_purge_one_sector(%p, %p)", sc, sp); 190 if (sp->ref != 0) 191 return; 192 TAILQ_REMOVE(&sc->freelist, sp, list); 193 g_bde_ncache--; 194 sc->ncache--; 195 bzero(sp->data, sp->size); 196 g_bde_delete_sector(sc, sp); 197} 198 199static struct g_bde_sector * 200g_bde_get_keysector(struct g_bde_work *wp) 201{ 202 struct g_bde_sector *sp; 203 struct g_bde_softc *sc; 204 off_t offset; 205 206 offset = wp->kso; 207 g_trace(G_T_TOPOLOGY, "g_bde_get_keysector(%p, %jd)", wp, (intmax_t)offset); 208 sc = wp->softc; 209 210 if (malloc_last_fail() < g_bde_ncache) 211 g_bde_purge_sector(sc, -1); 212 213 sp = TAILQ_FIRST(&sc->freelist); 214 if (sp != NULL && sp->ref == 0 && sp->used + 300 < time_uptime) 215 g_bde_purge_one_sector(sc, sp); 216 217 TAILQ_FOREACH(sp, &sc->freelist, list) { 218 if (sp->offset == offset) 219 break; 220 } 221 if (sp != NULL) { 222 sp->ref++; 223 KASSERT(sp->offset == offset, ("wrong offset")); 224 KASSERT(sp->softc == wp->softc, ("wrong softc")); 225 if (sp->ref == 1) 226 sp->owner = wp; 227 } else { 228 if (malloc_last_fail() < g_bde_ncache) { 229 TAILQ_FOREACH(sp, &sc->freelist, list) 230 if (sp->ref == 0) 231 break; 232 } 233 if (sp == NULL && !TAILQ_EMPTY(&sc->freelist)) 234 sp = TAILQ_FIRST(&sc->freelist); 235 if (sp != NULL && sp->ref > 0) 236 sp = NULL; 237 if (sp == NULL) { 238 sp = g_bde_new_sector(wp, sc->sectorsize); 239 if (sp != NULL) { 240 g_bde_ncache++; 241 sc->ncache++; 242 TAILQ_INSERT_TAIL(&sc->freelist, sp, list); 243 sp->malloc = 2; 244 } 245 } 246 if (sp != NULL) { 247 sp->offset = offset; 248 sp->softc = wp->softc; 249 sp->ref = 1; 250 sp->owner = wp; 251 sp->state = JUNK; 252 sp->error = 0; 253 } 254 } 255 if (sp != NULL) { 256 TAILQ_REMOVE(&sc->freelist, sp, list); 257 TAILQ_INSERT_TAIL(&sc->freelist, sp, list); 258 sp->used = time_uptime; 259 } 260 wp->ksp = sp; 261 return(sp); 262} 263 264static void 265g_bde_release_keysector(struct g_bde_work *wp) 266{ 267 struct g_bde_softc *sc; 268 struct g_bde_work *wp2; 269 struct g_bde_sector *sp; 270 271 sp = wp->ksp; 272 g_trace(G_T_TOPOLOGY, "g_bde_release_keysector(%p)", sp); 273 KASSERT(sp->malloc == 2, ("Wrong sector released")); 274 sc = sp->softc; 275 KASSERT(sc != NULL, ("NULL sp->softc")); 276 KASSERT(wp == sp->owner, ("Releasing, not owner")); 277 sp->owner = NULL; 278 wp->ksp = NULL; 279 sp->ref--; 280 if (sp->ref > 0) { 281 TAILQ_REMOVE(&sc->freelist, sp, list); 282 TAILQ_INSERT_TAIL(&sc->freelist, sp, list); 283 TAILQ_FOREACH(wp2, &sc->worklist, list) { 284 if (wp2->ksp == sp) { 285 KASSERT(wp2 != wp, ("Self-reowning")); 286 sp->owner = wp2; 287 wakeup(sp->softc); 288 break; 289 } 290 } 291 KASSERT(wp2 != NULL, ("Failed to pick up owner for %p\n", sp)); 292 } else if (sp->error != 0) { 293 sp->offset = ~0; 294 sp->error = 0; 295 sp->state = JUNK; 296 } 297 TAILQ_REMOVE(&sc->freelist, sp, list); 298 TAILQ_INSERT_HEAD(&sc->freelist, sp, list); 299} 300 301static void 302g_bde_purge_sector(struct g_bde_softc *sc, int fraction) 303{ 304 struct g_bde_sector *sp; 305 int n; 306 307 g_trace(G_T_TOPOLOGY, "g_bde_purge_sector(%p)", sc); 308 if (fraction > 0) 309 n = sc->ncache / fraction + 1; 310 else 311 n = g_bde_ncache - malloc_last_fail(); 312 if (n < 0) 313 return; 314 if (n > sc->ncache) 315 n = sc->ncache; 316 while(n--) { 317 TAILQ_FOREACH(sp, &sc->freelist, list) { 318 if (sp->ref != 0) 319 continue; 320 TAILQ_REMOVE(&sc->freelist, sp, list); 321 g_bde_ncache--; 322 sc->ncache--; 323 bzero(sp->data, sp->size); 324 g_bde_delete_sector(sc, sp); 325 break; 326 } 327 } 328} 329 330static struct g_bde_sector * 331g_bde_read_keysector(struct g_bde_softc *sc, struct g_bde_work *wp) 332{ 333 struct g_bde_sector *sp; 334 335 g_trace(G_T_TOPOLOGY, "g_bde_read_keysector(%p)", wp); 336 sp = g_bde_get_keysector(wp); 337 if (sp == NULL) { 338 g_bde_purge_sector(sc, -1); 339 sp = g_bde_get_keysector(wp); 340 } 341 if (sp == NULL) 342 return (sp); 343 if (sp->owner != wp) 344 return (sp); 345 if (sp->state == VALID) 346 return (sp); 347 if (g_bde_start_read(sp) == 0) 348 return (sp); 349 g_bde_release_keysector(wp); 350 return (NULL); 351} 352 353/* 354 * Contribute to the completion of the original bio request. 355 * 356 * We have no simple way to tell how many bits the original bio request has 357 * been segmented into, so the easiest way to determine when we can deliver 358 * it is to keep track of the number of bytes we have completed. We keep 359 * track of any errors underway and latch onto the first one. 360 * 361 * We always report "nothing done" in case of error, because random bits here 362 * and there may be completed and returning a number of completed bytes does 363 * not convey any useful information about which bytes they were. If some 364 * piece of broken code somewhere interprets this to mean that nothing has 365 * changed on the underlying media they deserve the lossage headed for them. 366 * 367 * A single mutex per g_bde instance is used to prevent contention. 368 */ 369 370static void 371g_bde_contribute(struct bio *bp, off_t bytes, int error) 372{ 373 374 g_trace(G_T_TOPOLOGY, "g_bde_contribute bp %p bytes %jd error %d", 375 bp, (intmax_t)bytes, error); 376 if (bp->bio_error == 0) 377 bp->bio_error = error; 378 bp->bio_completed += bytes; 379 KASSERT(bp->bio_completed <= bp->bio_length, ("Too large contribution")); 380 if (bp->bio_completed == bp->bio_length) { 381 if (bp->bio_error != 0) 382 bp->bio_completed = 0; 383 g_io_deliver(bp, bp->bio_error); 384 } 385} 386 387/* 388 * A write operation has finished. When we have all expected cows in the 389 * barn close the door and call it a day. 390 */ 391 392static void 393g_bde_write_done(struct bio *bp) 394{ 395 struct g_bde_sector *sp; 396 struct g_bde_work *wp; 397 struct g_bde_softc *sc; 398 399 sp = bp->bio_caller1; 400 sc = bp->bio_caller2; 401 mtx_lock(&sc->worklist_mutex); 402 KASSERT(sp != NULL, ("NULL sp")); 403 KASSERT(sc != NULL, ("NULL sc")); 404 KASSERT(sp->owner != NULL, ("NULL sp->owner")); 405 g_trace(G_T_TOPOLOGY, "g_bde_write_done(%p)", sp); 406 if (bp->bio_error == 0 && bp->bio_completed != sp->size) 407 bp->bio_error = EIO; 408 sp->error = bp->bio_error; 409 g_destroy_bio(bp); 410 wp = sp->owner; 411 if (wp->error == 0) 412 wp->error = sp->error; 413 414 if (wp->bp->bio_cmd == BIO_DELETE) { 415 KASSERT(sp == wp->sp, ("trashed delete op")); 416 g_bde_contribute(wp->bp, wp->length, wp->error); 417 g_bde_delete_sector(sc, sp); 418 g_bde_delete_work(wp); 419 mtx_unlock(&sc->worklist_mutex); 420 return; 421 } 422 423 KASSERT(wp->bp->bio_cmd == BIO_WRITE, ("Confused in g_bde_write_done()")); 424 KASSERT(sp == wp->sp || sp == wp->ksp, ("trashed write op")); 425 if (wp->sp == sp) { 426 g_bde_delete_sector(sc, wp->sp); 427 wp->sp = NULL; 428 } else { 429 sp->state = VALID; 430 } 431 if (wp->sp == NULL && wp->ksp != NULL && wp->ksp->state == VALID) { 432 g_bde_contribute(wp->bp, wp->length, wp->error); 433 g_bde_release_keysector(wp); 434 g_bde_delete_work(wp); 435 } 436 mtx_unlock(&sc->worklist_mutex); 437 return; 438} 439 440/* 441 * Send a write request for the given sector down the pipeline. 442 */ 443 444static int 445g_bde_start_write(struct g_bde_sector *sp) 446{ 447 struct bio *bp; 448 struct g_bde_softc *sc; 449 450 g_trace(G_T_TOPOLOGY, "g_bde_start_write(%p)", sp); 451 sc = sp->softc; 452 KASSERT(sc != NULL, ("NULL sc in g_bde_start_write")); 453 KASSERT(sp->owner != NULL, ("NULL sp->owner in g_bde_start_write")); 454 bp = g_new_bio(); 455 if (bp == NULL) 456 return (ENOMEM); 457 bp->bio_cmd = BIO_WRITE; 458 bp->bio_offset = sp->offset; 459 bp->bio_data = sp->data; 460 bp->bio_length = sp->size; 461 bp->bio_done = g_bde_write_done; 462 bp->bio_caller1 = sp; 463 bp->bio_caller2 = sc; 464 sp->state = IO; 465 g_io_request(bp, sc->consumer); 466 return(0); 467} 468 469/* 470 * A read operation has finished. Mark the sector no longer iobusy and 471 * wake up the worker thread and let it do its thing. 472 */ 473 474static void 475g_bde_read_done(struct bio *bp) 476{ 477 struct g_bde_sector *sp; 478 struct g_bde_softc *sc; 479 480 sp = bp->bio_caller1; 481 g_trace(G_T_TOPOLOGY, "g_bde_read_done(%p)", sp); 482 sc = bp->bio_caller2; 483 mtx_lock(&sc->worklist_mutex); 484 if (bp->bio_error == 0 && bp->bio_completed != sp->size) 485 bp->bio_error = EIO; 486 sp->error = bp->bio_error; 487 if (sp->error == 0) 488 sp->state = VALID; 489 else 490 sp->state = JUNK; 491 wakeup(sc); 492 g_destroy_bio(bp); 493 mtx_unlock(&sc->worklist_mutex); 494} 495 496/* 497 * Send a read request for the given sector down the pipeline. 498 */ 499 500static int 501g_bde_start_read(struct g_bde_sector *sp) 502{ 503 struct bio *bp; 504 struct g_bde_softc *sc; 505 506 g_trace(G_T_TOPOLOGY, "g_bde_start_read(%p)", sp); 507 sc = sp->softc; 508 KASSERT(sc != NULL, ("Null softc in sp %p", sp)); 509 bp = g_new_bio(); 510 if (bp == NULL) 511 return (ENOMEM); 512 bp->bio_cmd = BIO_READ; 513 bp->bio_offset = sp->offset; 514 bp->bio_data = sp->data; 515 bp->bio_length = sp->size; 516 bp->bio_done = g_bde_read_done; 517 bp->bio_caller1 = sp; 518 bp->bio_caller2 = sc; 519 sp->state = IO; 520 g_io_request(bp, sc->consumer); 521 return(0); 522} 523 524/* 525 * The worker thread. 526 * 527 * The up/down path of GEOM is not allowed to sleep or do any major work 528 * so we use this thread to do the actual crypto operations and to push 529 * the state engine onwards. 530 * 531 * XXX: if we switch to the src/sys/opencrypt hardware assisted encryption 532 * XXX: using a thread here is probably not needed. 533 */ 534 535void 536g_bde_worker(void *arg) 537{ 538 struct g_bde_softc *sc; 539 struct g_bde_work *wp; 540 struct g_geom *gp; 541 int busy, error; 542 543 gp = arg; 544 sc = gp->softc; 545 546 mtx_lock(&sc->worklist_mutex); 547 for (;;) { 548 busy = 0; 549 g_trace(G_T_TOPOLOGY, "g_bde_worker scan"); 550 TAILQ_FOREACH(wp, &sc->worklist, list) { 551 KASSERT(wp != NULL, ("NULL wp")); 552 KASSERT(wp->softc != NULL, ("NULL wp->softc")); 553 if (wp->state != WAIT) 554 continue; /* Not interesting here */ 555 556 KASSERT(wp->bp != NULL, ("NULL wp->bp")); 557 KASSERT(wp->sp != NULL, ("NULL wp->sp")); 558 559 if (wp->ksp != NULL) { 560 if (wp->ksp->owner != wp) 561 continue; 562 if (wp->ksp->state == IO) 563 continue; 564 KASSERT(wp->ksp->state == VALID, 565 ("Illegal sector state (JUNK ?)")); 566 } 567 568 if (wp->bp->bio_cmd == BIO_READ && 569 wp->sp->state == IO) 570 continue; 571 572 if (wp->ksp != NULL && wp->ksp->error != 0) { 573 g_bde_contribute(wp->bp, wp->length, 574 wp->ksp->error); 575 g_bde_delete_sector(sc, wp->sp); 576 g_bde_release_keysector(wp); 577 g_bde_delete_work(wp); 578 busy++; 579 break; 580 } 581 switch(wp->bp->bio_cmd) { 582 case BIO_READ: 583 if (wp->ksp == NULL) { 584 KASSERT(wp->error != 0, 585 ("BIO_READ, no ksp and no error")); 586 g_bde_contribute(wp->bp, wp->length, 587 wp->error); 588 } else { 589 if (wp->sp->error == 0) { 590 mtx_unlock(&sc->worklist_mutex); 591 g_bde_crypt_read(wp); 592 mtx_lock(&sc->worklist_mutex); 593 } 594 g_bde_contribute(wp->bp, wp->length, 595 wp->sp->error); 596 } 597 g_bde_delete_sector(sc, wp->sp); 598 if (wp->ksp != NULL) 599 g_bde_release_keysector(wp); 600 g_bde_delete_work(wp); 601 break; 602 case BIO_WRITE: 603 wp->state = FINISH; 604 KASSERT(wp->sp->owner == wp, ("Write not owner sp")); 605 KASSERT(wp->ksp->owner == wp, ("Write not owner ksp")); 606 mtx_unlock(&sc->worklist_mutex); 607 g_bde_crypt_write(wp); 608 mtx_lock(&sc->worklist_mutex); 609 error = g_bde_start_write(wp->sp); 610 if (error) { 611 g_bde_contribute(wp->bp, wp->length, error); 612 g_bde_release_keysector(wp); 613 g_bde_delete_sector(sc, wp->sp); 614 g_bde_delete_work(wp); 615 break; 616 } 617 error = g_bde_start_write(wp->ksp); 618 if (wp->error == 0) 619 wp->error = error; 620 break; 621 case BIO_DELETE: 622 wp->state = FINISH; 623 mtx_unlock(&sc->worklist_mutex); 624 g_bde_crypt_delete(wp); 625 mtx_lock(&sc->worklist_mutex); 626 g_bde_start_write(wp->sp); 627 break; 628 } 629 busy++; 630 break; 631 } 632 if (!busy) { 633 /* 634 * We don't look for our death-warrant until we are 635 * idle. Shouldn't make a difference in practice. 636 */ 637 if (sc->dead) 638 break; 639 g_trace(G_T_TOPOLOGY, "g_bde_worker sleep"); 640 error = msleep(sc, &sc->worklist_mutex, 641 PRIBIO, "g_bde", hz); 642 if (error == EWOULDBLOCK) { 643 /* 644 * Loose our skey cache in an orderly fashion. 645 * The exact rate can be tuned to be less 646 * aggressive if this is desirable. 10% per 647 * second means that the cache is gone in a 648 * few minutes. 649 */ 650 g_bde_purge_sector(sc, 10); 651 } 652 } 653 } 654 g_trace(G_T_TOPOLOGY, "g_bde_worker die"); 655 g_bde_purge_sector(sc, 1); 656 KASSERT(sc->nwork == 0, ("Dead but %d work remaining", sc->nwork)); 657 KASSERT(sc->ncache == 0, ("Dead but %d cache remaining", sc->ncache)); 658 KASSERT(sc->nsect == 0, ("Dead but %d sect remaining", sc->nsect)); 659 mtx_unlock(&sc->worklist_mutex); 660 sc->dead = 2; 661 wakeup(sc); 662 mtx_lock(&Giant); 663 kthread_exit(0); 664} 665 666/* 667 * g_bde_start1 has chopped the incoming request up so all the requests 668 * we see here are inside a single zone. Map the data and key locations 669 * grab the buffers we need and fire off the first volley of read requests. 670 */ 671 672static void 673g_bde_start2(struct g_bde_work *wp) 674{ 675 struct g_bde_softc *sc; 676 677 KASSERT(wp != NULL, ("NULL wp in g_bde_start2")); 678 KASSERT(wp->softc != NULL, ("NULL wp->softc")); 679 g_trace(G_T_TOPOLOGY, "g_bde_start2(%p)", wp); 680 sc = wp->softc; 681 if (wp->bp->bio_cmd == BIO_READ) { 682 wp->sp = g_bde_new_sector(wp, 0); 683 if (wp->sp == NULL) { 684 g_bde_contribute(wp->bp, wp->length, ENOMEM); 685 g_bde_delete_work(wp); 686 return; 687 } 688 wp->sp->size = wp->length; 689 wp->sp->data = wp->data; 690 if (g_bde_start_read(wp->sp) != 0) { 691 g_bde_contribute(wp->bp, wp->length, ENOMEM); 692 g_bde_delete_sector(sc, wp->sp); 693 g_bde_delete_work(wp); 694 return; 695 } 696 g_bde_read_keysector(sc, wp); 697 if (wp->ksp == NULL) 698 wp->error = ENOMEM; 699 } else if (wp->bp->bio_cmd == BIO_DELETE) { 700 wp->sp = g_bde_new_sector(wp, wp->length); 701 if (wp->sp == NULL) { 702 g_bde_contribute(wp->bp, wp->length, ENOMEM); 703 g_bde_delete_work(wp); 704 return; 705 } 706 } else if (wp->bp->bio_cmd == BIO_WRITE) { 707 wp->sp = g_bde_new_sector(wp, wp->length); 708 if (wp->sp == NULL) { 709 g_bde_contribute(wp->bp, wp->length, ENOMEM); 710 g_bde_delete_work(wp); 711 return; 712 } 713 g_bde_read_keysector(sc, wp); 714 if (wp->ksp == NULL) { 715 g_bde_contribute(wp->bp, wp->length, ENOMEM); 716 g_bde_delete_sector(sc, wp->sp); 717 g_bde_delete_work(wp); 718 return; 719 } 720 } else { 721 KASSERT(0 == 1, 722 ("Wrong bio_cmd %d in g_bde_start2", wp->bp->bio_cmd)); 723 } 724 725 wp->state = WAIT; 726 wakeup(sc); 727} 728 729/* 730 * Create a sequence of work structures, and have g_bde_map_sector() determine 731 * how long they each can be. Feed them to g_bde_start2(). 732 */ 733 734void 735g_bde_start1(struct bio *bp) 736{ 737 struct g_bde_softc *sc; 738 struct g_bde_work *wp; 739 off_t done; 740 741 sc = bp->bio_to->geom->softc; 742 bp->bio_driver1 = sc; 743 744 mtx_lock(&sc->worklist_mutex); 745 for(done = 0; done < bp->bio_length; ) { 746 wp = g_bde_new_work(sc); 747 if (wp != NULL) { 748 wp->bp = bp; 749 wp->offset = bp->bio_offset + done; 750 wp->data = bp->bio_data + done; 751 wp->length = bp->bio_length - done; 752 g_bde_map_sector(wp); 753 done += wp->length; 754 g_bde_start2(wp); 755 } 756 if (wp == NULL || bp->bio_error != 0) { 757 g_bde_contribute(bp, bp->bio_length - done, ENOMEM); 758 break; 759 } 760 } 761 mtx_unlock(&sc->worklist_mutex); 762 return; 763} 764