g_bde_work.c revision 314327
1/*- 2 * Copyright (c) 2002 Poul-Henning Kamp 3 * Copyright (c) 2002 Networks Associates Technology, Inc. 4 * All rights reserved. 5 * 6 * This software was developed for the FreeBSD Project by Poul-Henning Kamp 7 * and NAI Labs, the Security Research Division of Network Associates, Inc. 8 * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the 9 * DARPA CHATS research program. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * $FreeBSD: stable/10/sys/geom/bde/g_bde_work.c 314327 2017-02-27 08:27:38Z avg $ 33 */ 34/* 35 * This source file contains the state-engine which makes things happen in the 36 * right order. 37 * 38 * Outline: 39 * 1) g_bde_start1() 40 * Break the struct bio into multiple work packets one per zone. 41 * 2) g_bde_start2() 42 * Setup the necessary sector buffers and start those read operations 43 * which we can start at this time and put the item on the work-list. 44 * 3) g_bde_worker() 45 * Scan the work-list for items which are ready for crypto processing 46 * and call the matching crypto function in g_bde_crypt.c and schedule 47 * any writes needed. Read operations finish here by releasing the 48 * sector buffers and delivering the original bio request. 49 * 4) g_bde_write_done() 50 * Release sector buffers and deliver the original bio request. 51 * 52 * Because of the C-scope rules, the functions are almost perfectly in the 53 * opposite order in this source file. 54 * 55 * XXX: A switch to the hardware assisted crypto in src/sys/opencrypto will add 56 * XXX: additional states to this state-engine. Since no hardware available 57 * XXX: at this time has AES support, implementing this has been postponed 58 * XXX: until such time as it would result in a benefit. 59 */ 60 61#include <sys/param.h> 62#include <sys/bio.h> 63#include <sys/lock.h> 64#include <sys/mutex.h> 65#include <sys/queue.h> 66#include <sys/malloc.h> 67#include <sys/systm.h> 68#include <sys/kernel.h> 69#include <sys/sysctl.h> 70#include <sys/proc.h> 71#include <sys/kthread.h> 72 73#include <crypto/rijndael/rijndael-api-fst.h> 74#include <crypto/sha2/sha512.h> 75#include <geom/geom.h> 76#include <geom/bde/g_bde.h> 77 78static void g_bde_delete_sector(struct g_bde_softc *wp, struct g_bde_sector *sp); 79static struct g_bde_sector * g_bde_new_sector(struct g_bde_work *wp, u_int len); 80static void g_bde_release_keysector(struct g_bde_work *wp); 81static struct g_bde_sector *g_bde_get_keysector(struct g_bde_work *wp); 82static int g_bde_start_read(struct g_bde_sector *sp); 83static void g_bde_purge_sector(struct g_bde_softc *sc, int fraction); 84 85/* 86 * Work item allocation. 87 * 88 * C++ would call these constructors and destructors. 89 */ 90static u_int g_bde_nwork; 91SYSCTL_UINT(_debug, OID_AUTO, gbde_nwork, CTLFLAG_RD, &g_bde_nwork, 0, ""); 92 93static MALLOC_DEFINE(M_GBDE, "gbde", "GBDE data structures"); 94 95static struct g_bde_work * 96g_bde_new_work(struct g_bde_softc *sc) 97{ 98 struct g_bde_work *wp; 99 100 wp = malloc(sizeof *wp, M_GBDE, M_NOWAIT | M_ZERO); 101 if (wp == NULL) 102 return (wp); 103 wp->state = SETUP; 104 wp->softc = sc; 105 g_bde_nwork++; 106 sc->nwork++; 107 TAILQ_INSERT_TAIL(&sc->worklist, wp, list); 108 return (wp); 109} 110 111static void 112g_bde_delete_work(struct g_bde_work *wp) 113{ 114 struct g_bde_softc *sc; 115 116 sc = wp->softc; 117 g_bde_nwork--; 118 sc->nwork--; 119 TAILQ_REMOVE(&sc->worklist, wp, list); 120 free(wp, M_GBDE); 121} 122 123/* 124 * Sector buffer allocation 125 * 126 * These two functions allocate and free back variable sized sector buffers 127 */ 128 129static u_int g_bde_nsect; 130SYSCTL_UINT(_debug, OID_AUTO, gbde_nsect, CTLFLAG_RD, &g_bde_nsect, 0, ""); 131 132static void 133g_bde_delete_sector(struct g_bde_softc *sc, struct g_bde_sector *sp) 134{ 135 136 g_bde_nsect--; 137 sc->nsect--; 138 if (sp->malloc) 139 free(sp->data, M_GBDE); 140 free(sp, M_GBDE); 141} 142 143static struct g_bde_sector * 144g_bde_new_sector(struct g_bde_work *wp, u_int len) 145{ 146 struct g_bde_sector *sp; 147 148 sp = malloc(sizeof *sp, M_GBDE, M_NOWAIT | M_ZERO); 149 if (sp == NULL) 150 return (sp); 151 if (len > 0) { 152 sp->data = malloc(len, M_GBDE, M_NOWAIT | M_ZERO); 153 if (sp->data == NULL) { 154 free(sp, M_GBDE); 155 return (NULL); 156 } 157 sp->malloc = 1; 158 } 159 g_bde_nsect++; 160 wp->softc->nsect++; 161 sp->size = len; 162 sp->softc = wp->softc; 163 sp->ref = 1; 164 sp->owner = wp; 165 sp->offset = wp->so; 166 sp->state = JUNK; 167 return (sp); 168} 169 170/* 171 * Skey sector cache. 172 * 173 * Nothing prevents two separate I/O requests from addressing the same zone 174 * and thereby needing the same skey sector. We therefore need to sequence 175 * I/O operations to the skey sectors. A certain amount of caching is also 176 * desirable, although the extent of benefit from this is not at this point 177 * determined. 178 * 179 * XXX: GEOM may be able to grow a generic caching facility at some point 180 * XXX: to support such needs. 181 */ 182 183static u_int g_bde_ncache; 184SYSCTL_UINT(_debug, OID_AUTO, gbde_ncache, CTLFLAG_RD, &g_bde_ncache, 0, ""); 185 186static void 187g_bde_purge_one_sector(struct g_bde_softc *sc, struct g_bde_sector *sp) 188{ 189 190 g_trace(G_T_TOPOLOGY, "g_bde_purge_one_sector(%p, %p)", sc, sp); 191 if (sp->ref != 0) 192 return; 193 TAILQ_REMOVE(&sc->freelist, sp, list); 194 g_bde_ncache--; 195 sc->ncache--; 196 bzero(sp->data, sp->size); 197 g_bde_delete_sector(sc, sp); 198} 199 200static struct g_bde_sector * 201g_bde_get_keysector(struct g_bde_work *wp) 202{ 203 struct g_bde_sector *sp; 204 struct g_bde_softc *sc; 205 off_t offset; 206 207 offset = wp->kso; 208 g_trace(G_T_TOPOLOGY, "g_bde_get_keysector(%p, %jd)", wp, (intmax_t)offset); 209 sc = wp->softc; 210 211 if (malloc_last_fail() < g_bde_ncache) 212 g_bde_purge_sector(sc, -1); 213 214 sp = TAILQ_FIRST(&sc->freelist); 215 if (sp != NULL && sp->ref == 0 && sp->used + 300 < time_uptime) 216 g_bde_purge_one_sector(sc, sp); 217 218 TAILQ_FOREACH(sp, &sc->freelist, list) { 219 if (sp->offset == offset) 220 break; 221 } 222 if (sp != NULL) { 223 sp->ref++; 224 KASSERT(sp->offset == offset, ("wrong offset")); 225 KASSERT(sp->softc == wp->softc, ("wrong softc")); 226 if (sp->ref == 1) 227 sp->owner = wp; 228 } else { 229 if (malloc_last_fail() < g_bde_ncache) { 230 TAILQ_FOREACH(sp, &sc->freelist, list) 231 if (sp->ref == 0) 232 break; 233 } 234 if (sp == NULL && !TAILQ_EMPTY(&sc->freelist)) 235 sp = TAILQ_FIRST(&sc->freelist); 236 if (sp != NULL && sp->ref > 0) 237 sp = NULL; 238 if (sp == NULL) { 239 sp = g_bde_new_sector(wp, sc->sectorsize); 240 if (sp != NULL) { 241 g_bde_ncache++; 242 sc->ncache++; 243 TAILQ_INSERT_TAIL(&sc->freelist, sp, list); 244 sp->malloc = 2; 245 } 246 } 247 if (sp != NULL) { 248 sp->offset = offset; 249 sp->softc = wp->softc; 250 sp->ref = 1; 251 sp->owner = wp; 252 sp->state = JUNK; 253 sp->error = 0; 254 } 255 } 256 if (sp != NULL) { 257 TAILQ_REMOVE(&sc->freelist, sp, list); 258 TAILQ_INSERT_TAIL(&sc->freelist, sp, list); 259 sp->used = time_uptime; 260 } 261 wp->ksp = sp; 262 return(sp); 263} 264 265static void 266g_bde_release_keysector(struct g_bde_work *wp) 267{ 268 struct g_bde_softc *sc; 269 struct g_bde_work *wp2; 270 struct g_bde_sector *sp; 271 272 sp = wp->ksp; 273 g_trace(G_T_TOPOLOGY, "g_bde_release_keysector(%p)", sp); 274 KASSERT(sp->malloc == 2, ("Wrong sector released")); 275 sc = sp->softc; 276 KASSERT(sc != NULL, ("NULL sp->softc")); 277 KASSERT(wp == sp->owner, ("Releasing, not owner")); 278 sp->owner = NULL; 279 wp->ksp = NULL; 280 sp->ref--; 281 if (sp->ref > 0) { 282 TAILQ_REMOVE(&sc->freelist, sp, list); 283 TAILQ_INSERT_TAIL(&sc->freelist, sp, list); 284 TAILQ_FOREACH(wp2, &sc->worklist, list) { 285 if (wp2->ksp == sp) { 286 KASSERT(wp2 != wp, ("Self-reowning")); 287 sp->owner = wp2; 288 wakeup(sp->softc); 289 break; 290 } 291 } 292 KASSERT(wp2 != NULL, ("Failed to pick up owner for %p\n", sp)); 293 } else if (sp->error != 0) { 294 sp->offset = ~0; 295 sp->error = 0; 296 sp->state = JUNK; 297 } 298 TAILQ_REMOVE(&sc->freelist, sp, list); 299 TAILQ_INSERT_HEAD(&sc->freelist, sp, list); 300} 301 302static void 303g_bde_purge_sector(struct g_bde_softc *sc, int fraction) 304{ 305 struct g_bde_sector *sp; 306 int n; 307 308 g_trace(G_T_TOPOLOGY, "g_bde_purge_sector(%p)", sc); 309 if (fraction > 0) 310 n = sc->ncache / fraction + 1; 311 else 312 n = g_bde_ncache - malloc_last_fail(); 313 if (n < 0) 314 return; 315 if (n > sc->ncache) 316 n = sc->ncache; 317 while(n--) { 318 TAILQ_FOREACH(sp, &sc->freelist, list) { 319 if (sp->ref != 0) 320 continue; 321 TAILQ_REMOVE(&sc->freelist, sp, list); 322 g_bde_ncache--; 323 sc->ncache--; 324 bzero(sp->data, sp->size); 325 g_bde_delete_sector(sc, sp); 326 break; 327 } 328 } 329} 330 331static struct g_bde_sector * 332g_bde_read_keysector(struct g_bde_softc *sc, struct g_bde_work *wp) 333{ 334 struct g_bde_sector *sp; 335 336 g_trace(G_T_TOPOLOGY, "g_bde_read_keysector(%p)", wp); 337 sp = g_bde_get_keysector(wp); 338 if (sp == NULL) { 339 g_bde_purge_sector(sc, -1); 340 sp = g_bde_get_keysector(wp); 341 } 342 if (sp == NULL) 343 return (sp); 344 if (sp->owner != wp) 345 return (sp); 346 if (sp->state == VALID) 347 return (sp); 348 if (g_bde_start_read(sp) == 0) 349 return (sp); 350 g_bde_release_keysector(wp); 351 return (NULL); 352} 353 354/* 355 * Contribute to the completion of the original bio request. 356 * 357 * We have no simple way to tell how many bits the original bio request has 358 * been segmented into, so the easiest way to determine when we can deliver 359 * it is to keep track of the number of bytes we have completed. We keep 360 * track of any errors underway and latch onto the first one. 361 * 362 * We always report "nothing done" in case of error, because random bits here 363 * and there may be completed and returning a number of completed bytes does 364 * not convey any useful information about which bytes they were. If some 365 * piece of broken code somewhere interprets this to mean that nothing has 366 * changed on the underlying media they deserve the lossage headed for them. 367 * 368 * A single mutex per g_bde instance is used to prevent contention. 369 */ 370 371static void 372g_bde_contribute(struct bio *bp, off_t bytes, int error) 373{ 374 375 g_trace(G_T_TOPOLOGY, "g_bde_contribute bp %p bytes %jd error %d", 376 bp, (intmax_t)bytes, error); 377 if (bp->bio_error == 0) 378 bp->bio_error = error; 379 bp->bio_completed += bytes; 380 KASSERT(bp->bio_completed <= bp->bio_length, ("Too large contribution")); 381 if (bp->bio_completed == bp->bio_length) { 382 if (bp->bio_error != 0) 383 bp->bio_completed = 0; 384 g_io_deliver(bp, bp->bio_error); 385 } 386} 387 388/* 389 * This is the common case "we're done with this work package" function 390 */ 391 392static void 393g_bde_work_done(struct g_bde_work *wp, int error) 394{ 395 396 g_bde_contribute(wp->bp, wp->length, error); 397 if (wp->sp != NULL) 398 g_bde_delete_sector(wp->softc, wp->sp); 399 if (wp->ksp != NULL) 400 g_bde_release_keysector(wp); 401 g_bde_delete_work(wp); 402} 403 404/* 405 * A write operation has finished. When we have all expected cows in the 406 * barn close the door and call it a day. 407 */ 408 409static void 410g_bde_write_done(struct bio *bp) 411{ 412 struct g_bde_sector *sp; 413 struct g_bde_work *wp; 414 struct g_bde_softc *sc; 415 416 sp = bp->bio_caller1; 417 sc = bp->bio_caller2; 418 mtx_lock(&sc->worklist_mutex); 419 KASSERT(sp != NULL, ("NULL sp")); 420 KASSERT(sc != NULL, ("NULL sc")); 421 KASSERT(sp->owner != NULL, ("NULL sp->owner")); 422 g_trace(G_T_TOPOLOGY, "g_bde_write_done(%p)", sp); 423 if (bp->bio_error == 0 && bp->bio_completed != sp->size) 424 bp->bio_error = EIO; 425 sp->error = bp->bio_error; 426 g_destroy_bio(bp); 427 wp = sp->owner; 428 if (wp->error == 0) 429 wp->error = sp->error; 430 431 if (wp->bp->bio_cmd == BIO_DELETE) { 432 KASSERT(sp == wp->sp, ("trashed delete op")); 433 g_bde_work_done(wp, wp->error); 434 mtx_unlock(&sc->worklist_mutex); 435 return; 436 } 437 438 KASSERT(wp->bp->bio_cmd == BIO_WRITE, ("Confused in g_bde_write_done()")); 439 KASSERT(sp == wp->sp || sp == wp->ksp, ("trashed write op")); 440 if (wp->sp == sp) { 441 g_bde_delete_sector(sc, wp->sp); 442 wp->sp = NULL; 443 } else { 444 sp->state = VALID; 445 } 446 if (wp->sp == NULL && wp->ksp != NULL && wp->ksp->state == VALID) 447 g_bde_work_done(wp, wp->error); 448 mtx_unlock(&sc->worklist_mutex); 449 return; 450} 451 452/* 453 * Send a write request for the given sector down the pipeline. 454 */ 455 456static int 457g_bde_start_write(struct g_bde_sector *sp) 458{ 459 struct bio *bp; 460 struct g_bde_softc *sc; 461 462 g_trace(G_T_TOPOLOGY, "g_bde_start_write(%p)", sp); 463 sc = sp->softc; 464 KASSERT(sc != NULL, ("NULL sc in g_bde_start_write")); 465 KASSERT(sp->owner != NULL, ("NULL sp->owner in g_bde_start_write")); 466 bp = g_new_bio(); 467 if (bp == NULL) 468 return (ENOMEM); 469 bp->bio_cmd = BIO_WRITE; 470 bp->bio_offset = sp->offset; 471 bp->bio_data = sp->data; 472 bp->bio_length = sp->size; 473 bp->bio_done = g_bde_write_done; 474 bp->bio_caller1 = sp; 475 bp->bio_caller2 = sc; 476 sp->state = IO; 477 g_io_request(bp, sc->consumer); 478 return(0); 479} 480 481/* 482 * A read operation has finished. Mark the sector no longer iobusy and 483 * wake up the worker thread and let it do its thing. 484 */ 485 486static void 487g_bde_read_done(struct bio *bp) 488{ 489 struct g_bde_sector *sp; 490 struct g_bde_softc *sc; 491 492 sp = bp->bio_caller1; 493 g_trace(G_T_TOPOLOGY, "g_bde_read_done(%p)", sp); 494 sc = bp->bio_caller2; 495 mtx_lock(&sc->worklist_mutex); 496 if (bp->bio_error == 0 && bp->bio_completed != sp->size) 497 bp->bio_error = EIO; 498 sp->error = bp->bio_error; 499 if (sp->error == 0) 500 sp->state = VALID; 501 else 502 sp->state = JUNK; 503 wakeup(sc); 504 g_destroy_bio(bp); 505 mtx_unlock(&sc->worklist_mutex); 506} 507 508/* 509 * Send a read request for the given sector down the pipeline. 510 */ 511 512static int 513g_bde_start_read(struct g_bde_sector *sp) 514{ 515 struct bio *bp; 516 struct g_bde_softc *sc; 517 518 g_trace(G_T_TOPOLOGY, "g_bde_start_read(%p)", sp); 519 sc = sp->softc; 520 KASSERT(sc != NULL, ("Null softc in sp %p", sp)); 521 bp = g_new_bio(); 522 if (bp == NULL) 523 return (ENOMEM); 524 bp->bio_cmd = BIO_READ; 525 bp->bio_offset = sp->offset; 526 bp->bio_data = sp->data; 527 bp->bio_length = sp->size; 528 bp->bio_done = g_bde_read_done; 529 bp->bio_caller1 = sp; 530 bp->bio_caller2 = sc; 531 sp->state = IO; 532 g_io_request(bp, sc->consumer); 533 return(0); 534} 535 536/* 537 * The worker thread. 538 * 539 * The up/down path of GEOM is not allowed to sleep or do any major work 540 * so we use this thread to do the actual crypto operations and to push 541 * the state engine onwards. 542 * 543 * XXX: if we switch to the src/sys/opencrypt hardware assisted encryption 544 * XXX: using a thread here is probably not needed. 545 */ 546 547void 548g_bde_worker(void *arg) 549{ 550 struct g_bde_softc *sc; 551 struct g_bde_work *wp, *twp; 552 struct g_geom *gp; 553 int restart, error; 554 555 gp = arg; 556 sc = gp->softc; 557 558 mtx_lock(&sc->worklist_mutex); 559 for (;;) { 560 restart = 0; 561 g_trace(G_T_TOPOLOGY, "g_bde_worker scan"); 562 TAILQ_FOREACH_SAFE(wp, &sc->worklist, list, twp) { 563 KASSERT(wp != NULL, ("NULL wp")); 564 KASSERT(wp->softc != NULL, ("NULL wp->softc")); 565 if (wp->state != WAIT) 566 continue; /* Not interesting here */ 567 568 KASSERT(wp->bp != NULL, ("NULL wp->bp")); 569 KASSERT(wp->sp != NULL, ("NULL wp->sp")); 570 571 if (wp->ksp != NULL) { 572 if (wp->ksp->owner != wp) 573 continue; 574 if (wp->ksp->state == IO) 575 continue; 576 KASSERT(wp->ksp->state == VALID, 577 ("Illegal sector state (%d)", 578 wp->ksp->state)); 579 } 580 581 if (wp->bp->bio_cmd == BIO_READ && wp->sp->state == IO) 582 continue; 583 584 if (wp->ksp != NULL && wp->ksp->error != 0) { 585 g_bde_work_done(wp, wp->ksp->error); 586 continue; 587 } 588 switch(wp->bp->bio_cmd) { 589 case BIO_READ: 590 if (wp->ksp == NULL) { 591 KASSERT(wp->error != 0, 592 ("BIO_READ, no ksp and no error")); 593 g_bde_work_done(wp, wp->error); 594 break; 595 } 596 if (wp->sp->error != 0) { 597 g_bde_work_done(wp, wp->sp->error); 598 break; 599 } 600 mtx_unlock(&sc->worklist_mutex); 601 g_bde_crypt_read(wp); 602 mtx_lock(&sc->worklist_mutex); 603 restart++; 604 g_bde_work_done(wp, wp->sp->error); 605 break; 606 case BIO_WRITE: 607 wp->state = FINISH; 608 KASSERT(wp->sp->owner == wp, 609 ("Write not owner sp")); 610 KASSERT(wp->ksp->owner == wp, 611 ("Write not owner ksp")); 612 mtx_unlock(&sc->worklist_mutex); 613 g_bde_crypt_write(wp); 614 mtx_lock(&sc->worklist_mutex); 615 restart++; 616 error = g_bde_start_write(wp->sp); 617 if (error) { 618 g_bde_work_done(wp, error); 619 break; 620 } 621 error = g_bde_start_write(wp->ksp); 622 if (wp->error != 0) 623 wp->error = error; 624 break; 625 case BIO_DELETE: 626 wp->state = FINISH; 627 mtx_unlock(&sc->worklist_mutex); 628 g_bde_crypt_delete(wp); 629 mtx_lock(&sc->worklist_mutex); 630 restart++; 631 g_bde_start_write(wp->sp); 632 break; 633 } 634 if (restart) 635 break; 636 } 637 if (!restart) { 638 /* 639 * We don't look for our death-warrant until we are 640 * idle. Shouldn't make a difference in practice. 641 */ 642 if (sc->dead) 643 break; 644 g_trace(G_T_TOPOLOGY, "g_bde_worker sleep"); 645 error = msleep(sc, &sc->worklist_mutex, 646 PRIBIO, "-", hz); 647 if (error == EWOULDBLOCK) { 648 /* 649 * Lose our skey cache in an orderly fashion. 650 * The exact rate can be tuned to be less 651 * aggressive if this is desirable. 10% per 652 * second means that the cache is gone in a 653 * few minutes. 654 */ 655 g_bde_purge_sector(sc, 10); 656 } 657 } 658 } 659 g_trace(G_T_TOPOLOGY, "g_bde_worker die"); 660 g_bde_purge_sector(sc, 1); 661 KASSERT(sc->nwork == 0, ("Dead but %d work remaining", sc->nwork)); 662 KASSERT(sc->ncache == 0, ("Dead but %d cache remaining", sc->ncache)); 663 KASSERT(sc->nsect == 0, ("Dead but %d sect remaining", sc->nsect)); 664 mtx_unlock(&sc->worklist_mutex); 665 sc->dead = 2; 666 wakeup(sc); 667 kproc_exit(0); 668} 669 670/* 671 * g_bde_start1 has chopped the incoming request up so all the requests 672 * we see here are inside a single zone. Map the data and key locations 673 * grab the buffers we need and fire off the first volley of read requests. 674 */ 675 676static void 677g_bde_start2(struct g_bde_work *wp) 678{ 679 struct g_bde_softc *sc; 680 681 KASSERT(wp != NULL, ("NULL wp in g_bde_start2")); 682 KASSERT(wp->softc != NULL, ("NULL wp->softc")); 683 g_trace(G_T_TOPOLOGY, "g_bde_start2(%p)", wp); 684 sc = wp->softc; 685 switch (wp->bp->bio_cmd) { 686 case BIO_READ: 687 wp->sp = g_bde_new_sector(wp, 0); 688 if (wp->sp == NULL) { 689 g_bde_work_done(wp, ENOMEM); 690 return; 691 } 692 wp->sp->size = wp->length; 693 wp->sp->data = wp->data; 694 if (g_bde_start_read(wp->sp) != 0) { 695 g_bde_work_done(wp, ENOMEM); 696 return; 697 } 698 g_bde_read_keysector(sc, wp); 699 if (wp->ksp == NULL) 700 wp->error = ENOMEM; 701 break; 702 case BIO_DELETE: 703 wp->sp = g_bde_new_sector(wp, wp->length); 704 if (wp->sp == NULL) { 705 g_bde_work_done(wp, ENOMEM); 706 return; 707 } 708 break; 709 case BIO_WRITE: 710 wp->sp = g_bde_new_sector(wp, wp->length); 711 if (wp->sp == NULL) { 712 g_bde_work_done(wp, ENOMEM); 713 return; 714 } 715 g_bde_read_keysector(sc, wp); 716 if (wp->ksp == NULL) { 717 g_bde_work_done(wp, ENOMEM); 718 return; 719 } 720 break; 721 default: 722 KASSERT(0 == 1, 723 ("Wrong bio_cmd %d in g_bde_start2", wp->bp->bio_cmd)); 724 } 725 726 wp->state = WAIT; 727 wakeup(sc); 728} 729 730/* 731 * Create a sequence of work structures, and have g_bde_map_sector() determine 732 * how long they each can be. Feed them to g_bde_start2(). 733 */ 734 735void 736g_bde_start1(struct bio *bp) 737{ 738 struct g_bde_softc *sc; 739 struct g_bde_work *wp; 740 off_t done; 741 742 sc = bp->bio_to->geom->softc; 743 bp->bio_driver1 = sc; 744 745 mtx_lock(&sc->worklist_mutex); 746 for(done = 0; done < bp->bio_length; ) { 747 wp = g_bde_new_work(sc); 748 if (wp != NULL) { 749 wp->bp = bp; 750 wp->offset = bp->bio_offset + done; 751 wp->data = bp->bio_data + done; 752 wp->length = bp->bio_length - done; 753 g_bde_map_sector(wp); 754 done += wp->length; 755 g_bde_start2(wp); 756 } 757 if (wp == NULL || bp->bio_error != 0) { 758 g_bde_contribute(bp, bp->bio_length - done, ENOMEM); 759 break; 760 } 761 } 762 mtx_unlock(&sc->worklist_mutex); 763 return; 764} 765