1/*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2002 Poul-Henning Kamp 5 * Copyright (c) 2002 Networks Associates Technology, Inc. 6 * All rights reserved. 7 * 8 * This software was developed for the FreeBSD Project by Poul-Henning Kamp 9 * and NAI Labs, the Security Research Division of Network Associates, Inc. 10 * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the 11 * DARPA CHATS research program. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $FreeBSD$ 35 */ 36/* 37 * This source file contains the state-engine which makes things happen in the 38 * right order. 39 * 40 * Outline: 41 * 1) g_bde_start1() 42 * Break the struct bio into multiple work packets one per zone. 43 * 2) g_bde_start2() 44 * Setup the necessary sector buffers and start those read operations 45 * which we can start at this time and put the item on the work-list. 46 * 3) g_bde_worker() 47 * Scan the work-list for items which are ready for crypto processing 48 * and call the matching crypto function in g_bde_crypt.c and schedule 49 * any writes needed. Read operations finish here by releasing the 50 * sector buffers and delivering the original bio request. 51 * 4) g_bde_write_done() 52 * Release sector buffers and deliver the original bio request. 53 * 54 * Because of the C-scope rules, the functions are almost perfectly in the 55 * opposite order in this source file. 56 * 57 * XXX: A switch to the hardware assisted crypto in src/sys/opencrypto will add 58 * XXX: additional states to this state-engine. Since no hardware available 59 * XXX: at this time has AES support, implementing this has been postponed 60 * XXX: until such time as it would result in a benefit. 61 */ 62 63#include <sys/param.h> 64#include <sys/bio.h> 65#include <sys/lock.h> 66#include <sys/mutex.h> 67#include <sys/queue.h> 68#include <sys/malloc.h> 69#include <sys/systm.h> 70#include <sys/kernel.h> 71#include <sys/sysctl.h> 72#include <sys/proc.h> 73#include <sys/kthread.h> 74 75#include <crypto/rijndael/rijndael-api-fst.h> 76#include <crypto/sha2/sha512.h> 77#include <geom/geom.h> 78#include <geom/bde/g_bde.h> 79 80/* 81 * FIXME: This used to call malloc_last_fail which in practice was almost 82 * guaranteed to return time_uptime even in face of severe memory shortage. 83 * As GBDE is the only consumer the kludge below was added to facilitate the 84 * removal with minimial changes. The code should be fixed to respond to memory 85 * pressure (e.g., by using lowmem eventhandler) instead. 86 */ 87static int 88g_bde_malloc_last_fail(void) 89{ 90 91 return (time_uptime); 92} 93 94static void g_bde_delete_sector(struct g_bde_softc *wp, struct g_bde_sector *sp); 95static struct g_bde_sector * g_bde_new_sector(struct g_bde_work *wp, u_int len); 96static void g_bde_release_keysector(struct g_bde_work *wp); 97static struct g_bde_sector *g_bde_get_keysector(struct g_bde_work *wp); 98static int g_bde_start_read(struct g_bde_sector *sp); 99static void g_bde_purge_sector(struct g_bde_softc *sc, int fraction); 100 101/* 102 * Work item allocation. 103 * 104 * C++ would call these constructors and destructors. 105 */ 106static u_int g_bde_nwork; 107SYSCTL_UINT(_debug, OID_AUTO, gbde_nwork, CTLFLAG_RD, &g_bde_nwork, 0, ""); 108 109static MALLOC_DEFINE(M_GBDE, "gbde", "GBDE data structures"); 110 111static struct g_bde_work * 112g_bde_new_work(struct g_bde_softc *sc) 113{ 114 struct g_bde_work *wp; 115 116 wp = malloc(sizeof *wp, M_GBDE, M_NOWAIT | M_ZERO); 117 if (wp == NULL) 118 return (wp); 119 wp->state = SETUP; 120 wp->softc = sc; 121 g_bde_nwork++; 122 sc->nwork++; 123 TAILQ_INSERT_TAIL(&sc->worklist, wp, list); 124 return (wp); 125} 126 127static void 128g_bde_delete_work(struct g_bde_work *wp) 129{ 130 struct g_bde_softc *sc; 131 132 sc = wp->softc; 133 g_bde_nwork--; 134 sc->nwork--; 135 TAILQ_REMOVE(&sc->worklist, wp, list); 136 free(wp, M_GBDE); 137} 138 139/* 140 * Sector buffer allocation 141 * 142 * These two functions allocate and free back variable sized sector buffers 143 */ 144 145static u_int g_bde_nsect; 146SYSCTL_UINT(_debug, OID_AUTO, gbde_nsect, CTLFLAG_RD, &g_bde_nsect, 0, ""); 147 148static void 149g_bde_delete_sector(struct g_bde_softc *sc, struct g_bde_sector *sp) 150{ 151 152 g_bde_nsect--; 153 sc->nsect--; 154 if (sp->malloc) 155 free(sp->data, M_GBDE); 156 free(sp, M_GBDE); 157} 158 159static struct g_bde_sector * 160g_bde_new_sector(struct g_bde_work *wp, u_int len) 161{ 162 struct g_bde_sector *sp; 163 164 sp = malloc(sizeof *sp, M_GBDE, M_NOWAIT | M_ZERO); 165 if (sp == NULL) 166 return (sp); 167 if (len > 0) { 168 sp->data = malloc(len, M_GBDE, M_NOWAIT | M_ZERO); 169 if (sp->data == NULL) { 170 free(sp, M_GBDE); 171 return (NULL); 172 } 173 sp->malloc = 1; 174 } 175 g_bde_nsect++; 176 wp->softc->nsect++; 177 sp->size = len; 178 sp->softc = wp->softc; 179 sp->ref = 1; 180 sp->owner = wp; 181 sp->offset = wp->so; 182 sp->state = JUNK; 183 return (sp); 184} 185 186/* 187 * Skey sector cache. 188 * 189 * Nothing prevents two separate I/O requests from addressing the same zone 190 * and thereby needing the same skey sector. We therefore need to sequence 191 * I/O operations to the skey sectors. A certain amount of caching is also 192 * desirable, although the extent of benefit from this is not at this point 193 * determined. 194 * 195 * XXX: GEOM may be able to grow a generic caching facility at some point 196 * XXX: to support such needs. 197 */ 198 199static u_int g_bde_ncache; 200SYSCTL_UINT(_debug, OID_AUTO, gbde_ncache, CTLFLAG_RD, &g_bde_ncache, 0, ""); 201 202static void 203g_bde_purge_one_sector(struct g_bde_softc *sc, struct g_bde_sector *sp) 204{ 205 206 g_trace(G_T_TOPOLOGY, "g_bde_purge_one_sector(%p, %p)", sc, sp); 207 if (sp->ref != 0) 208 return; 209 TAILQ_REMOVE(&sc->freelist, sp, list); 210 g_bde_ncache--; 211 sc->ncache--; 212 bzero(sp->data, sp->size); 213 g_bde_delete_sector(sc, sp); 214} 215 216static struct g_bde_sector * 217g_bde_get_keysector(struct g_bde_work *wp) 218{ 219 struct g_bde_sector *sp; 220 struct g_bde_softc *sc; 221 off_t offset; 222 223 offset = wp->kso; 224 g_trace(G_T_TOPOLOGY, "g_bde_get_keysector(%p, %jd)", wp, (intmax_t)offset); 225 sc = wp->softc; 226 227 if (g_bde_malloc_last_fail() < g_bde_ncache) 228 g_bde_purge_sector(sc, -1); 229 230 sp = TAILQ_FIRST(&sc->freelist); 231 if (sp != NULL && sp->ref == 0 && sp->used + 300 < time_uptime) 232 g_bde_purge_one_sector(sc, sp); 233 234 TAILQ_FOREACH(sp, &sc->freelist, list) { 235 if (sp->offset == offset) 236 break; 237 } 238 if (sp != NULL) { 239 sp->ref++; 240 KASSERT(sp->offset == offset, ("wrong offset")); 241 KASSERT(sp->softc == wp->softc, ("wrong softc")); 242 if (sp->ref == 1) 243 sp->owner = wp; 244 } else { 245 if (g_bde_malloc_last_fail() < g_bde_ncache) { 246 TAILQ_FOREACH(sp, &sc->freelist, list) 247 if (sp->ref == 0) 248 break; 249 } 250 if (sp == NULL && !TAILQ_EMPTY(&sc->freelist)) 251 sp = TAILQ_FIRST(&sc->freelist); 252 if (sp != NULL && sp->ref > 0) 253 sp = NULL; 254 if (sp == NULL) { 255 sp = g_bde_new_sector(wp, sc->sectorsize); 256 if (sp != NULL) { 257 g_bde_ncache++; 258 sc->ncache++; 259 TAILQ_INSERT_TAIL(&sc->freelist, sp, list); 260 sp->malloc = 2; 261 } 262 } 263 if (sp != NULL) { 264 sp->offset = offset; 265 sp->softc = wp->softc; 266 sp->ref = 1; 267 sp->owner = wp; 268 sp->state = JUNK; 269 sp->error = 0; 270 } 271 } 272 if (sp != NULL) { 273 TAILQ_REMOVE(&sc->freelist, sp, list); 274 TAILQ_INSERT_TAIL(&sc->freelist, sp, list); 275 sp->used = time_uptime; 276 } 277 wp->ksp = sp; 278 return(sp); 279} 280 281static void 282g_bde_release_keysector(struct g_bde_work *wp) 283{ 284 struct g_bde_softc *sc; 285 struct g_bde_work *wp2; 286 struct g_bde_sector *sp; 287 288 sp = wp->ksp; 289 g_trace(G_T_TOPOLOGY, "g_bde_release_keysector(%p)", sp); 290 KASSERT(sp->malloc == 2, ("Wrong sector released")); 291 sc = sp->softc; 292 KASSERT(sc != NULL, ("NULL sp->softc")); 293 KASSERT(wp == sp->owner, ("Releasing, not owner")); 294 sp->owner = NULL; 295 wp->ksp = NULL; 296 sp->ref--; 297 if (sp->ref > 0) { 298 TAILQ_REMOVE(&sc->freelist, sp, list); 299 TAILQ_INSERT_TAIL(&sc->freelist, sp, list); 300 TAILQ_FOREACH(wp2, &sc->worklist, list) { 301 if (wp2->ksp == sp) { 302 KASSERT(wp2 != wp, ("Self-reowning")); 303 sp->owner = wp2; 304 wakeup(sp->softc); 305 break; 306 } 307 } 308 KASSERT(wp2 != NULL, ("Failed to pick up owner for %p\n", sp)); 309 } else if (sp->error != 0) { 310 sp->offset = ~0; 311 sp->error = 0; 312 sp->state = JUNK; 313 } 314 TAILQ_REMOVE(&sc->freelist, sp, list); 315 TAILQ_INSERT_HEAD(&sc->freelist, sp, list); 316} 317 318static void 319g_bde_purge_sector(struct g_bde_softc *sc, int fraction) 320{ 321 struct g_bde_sector *sp; 322 int n; 323 324 g_trace(G_T_TOPOLOGY, "g_bde_purge_sector(%p)", sc); 325 if (fraction > 0) 326 n = sc->ncache / fraction + 1; 327 else 328 n = g_bde_ncache - g_bde_malloc_last_fail(); 329 if (n < 0) 330 return; 331 if (n > sc->ncache) 332 n = sc->ncache; 333 while(n--) { 334 TAILQ_FOREACH(sp, &sc->freelist, list) { 335 if (sp->ref != 0) 336 continue; 337 TAILQ_REMOVE(&sc->freelist, sp, list); 338 g_bde_ncache--; 339 sc->ncache--; 340 bzero(sp->data, sp->size); 341 g_bde_delete_sector(sc, sp); 342 break; 343 } 344 } 345} 346 347static struct g_bde_sector * 348g_bde_read_keysector(struct g_bde_softc *sc, struct g_bde_work *wp) 349{ 350 struct g_bde_sector *sp; 351 352 g_trace(G_T_TOPOLOGY, "g_bde_read_keysector(%p)", wp); 353 sp = g_bde_get_keysector(wp); 354 if (sp == NULL) { 355 g_bde_purge_sector(sc, -1); 356 sp = g_bde_get_keysector(wp); 357 } 358 if (sp == NULL) 359 return (sp); 360 if (sp->owner != wp) 361 return (sp); 362 if (sp->state == VALID) 363 return (sp); 364 if (g_bde_start_read(sp) == 0) 365 return (sp); 366 g_bde_release_keysector(wp); 367 return (NULL); 368} 369 370/* 371 * Contribute to the completion of the original bio request. 372 * 373 * We have no simple way to tell how many bits the original bio request has 374 * been segmented into, so the easiest way to determine when we can deliver 375 * it is to keep track of the number of bytes we have completed. We keep 376 * track of any errors underway and latch onto the first one. 377 * 378 * We always report "nothing done" in case of error, because random bits here 379 * and there may be completed and returning a number of completed bytes does 380 * not convey any useful information about which bytes they were. If some 381 * piece of broken code somewhere interprets this to mean that nothing has 382 * changed on the underlying media they deserve the lossage headed for them. 383 * 384 * A single mutex per g_bde instance is used to prevent contention. 385 */ 386 387static void 388g_bde_contribute(struct bio *bp, off_t bytes, int error) 389{ 390 391 g_trace(G_T_TOPOLOGY, "g_bde_contribute bp %p bytes %jd error %d", 392 bp, (intmax_t)bytes, error); 393 if (bp->bio_error == 0) 394 bp->bio_error = error; 395 bp->bio_completed += bytes; 396 KASSERT(bp->bio_completed <= bp->bio_length, ("Too large contribution")); 397 if (bp->bio_completed == bp->bio_length) { 398 if (bp->bio_error != 0) 399 bp->bio_completed = 0; 400 g_io_deliver(bp, bp->bio_error); 401 } 402} 403 404/* 405 * This is the common case "we're done with this work package" function 406 */ 407 408static void 409g_bde_work_done(struct g_bde_work *wp, int error) 410{ 411 412 g_bde_contribute(wp->bp, wp->length, error); 413 if (wp->sp != NULL) 414 g_bde_delete_sector(wp->softc, wp->sp); 415 if (wp->ksp != NULL) 416 g_bde_release_keysector(wp); 417 g_bde_delete_work(wp); 418} 419 420/* 421 * A write operation has finished. When we have all expected cows in the 422 * barn close the door and call it a day. 423 */ 424 425static void 426g_bde_write_done(struct bio *bp) 427{ 428 struct g_bde_sector *sp; 429 struct g_bde_work *wp; 430 struct g_bde_softc *sc; 431 432 sp = bp->bio_caller1; 433 sc = bp->bio_caller2; 434 mtx_lock(&sc->worklist_mutex); 435 KASSERT(sp != NULL, ("NULL sp")); 436 KASSERT(sc != NULL, ("NULL sc")); 437 KASSERT(sp->owner != NULL, ("NULL sp->owner")); 438 g_trace(G_T_TOPOLOGY, "g_bde_write_done(%p)", sp); 439 if (bp->bio_error == 0 && bp->bio_completed != sp->size) 440 bp->bio_error = EIO; 441 sp->error = bp->bio_error; 442 g_destroy_bio(bp); 443 wp = sp->owner; 444 if (wp->error == 0) 445 wp->error = sp->error; 446 447 if (wp->bp->bio_cmd == BIO_DELETE) { 448 KASSERT(sp == wp->sp, ("trashed delete op")); 449 g_bde_work_done(wp, wp->error); 450 mtx_unlock(&sc->worklist_mutex); 451 return; 452 } 453 454 KASSERT(wp->bp->bio_cmd == BIO_WRITE, ("Confused in g_bde_write_done()")); 455 KASSERT(sp == wp->sp || sp == wp->ksp, ("trashed write op")); 456 if (wp->sp == sp) { 457 g_bde_delete_sector(sc, wp->sp); 458 wp->sp = NULL; 459 } else { 460 sp->state = VALID; 461 } 462 if (wp->sp == NULL && wp->ksp != NULL && wp->ksp->state == VALID) 463 g_bde_work_done(wp, wp->error); 464 mtx_unlock(&sc->worklist_mutex); 465 return; 466} 467 468/* 469 * Send a write request for the given sector down the pipeline. 470 */ 471 472static int 473g_bde_start_write(struct g_bde_sector *sp) 474{ 475 struct bio *bp; 476 struct g_bde_softc *sc; 477 478 g_trace(G_T_TOPOLOGY, "g_bde_start_write(%p)", sp); 479 sc = sp->softc; 480 KASSERT(sc != NULL, ("NULL sc in g_bde_start_write")); 481 KASSERT(sp->owner != NULL, ("NULL sp->owner in g_bde_start_write")); 482 bp = g_new_bio(); 483 if (bp == NULL) 484 return (ENOMEM); 485 bp->bio_cmd = BIO_WRITE; 486 bp->bio_offset = sp->offset; 487 bp->bio_data = sp->data; 488 bp->bio_length = sp->size; 489 bp->bio_done = g_bde_write_done; 490 bp->bio_caller1 = sp; 491 bp->bio_caller2 = sc; 492 sp->state = IO; 493 g_io_request(bp, sc->consumer); 494 return(0); 495} 496 497/* 498 * A read operation has finished. Mark the sector no longer iobusy and 499 * wake up the worker thread and let it do its thing. 500 */ 501 502static void 503g_bde_read_done(struct bio *bp) 504{ 505 struct g_bde_sector *sp; 506 struct g_bde_softc *sc; 507 508 sp = bp->bio_caller1; 509 g_trace(G_T_TOPOLOGY, "g_bde_read_done(%p)", sp); 510 sc = bp->bio_caller2; 511 mtx_lock(&sc->worklist_mutex); 512 if (bp->bio_error == 0 && bp->bio_completed != sp->size) 513 bp->bio_error = EIO; 514 sp->error = bp->bio_error; 515 if (sp->error == 0) 516 sp->state = VALID; 517 else 518 sp->state = JUNK; 519 wakeup(sc); 520 g_destroy_bio(bp); 521 mtx_unlock(&sc->worklist_mutex); 522} 523 524/* 525 * Send a read request for the given sector down the pipeline. 526 */ 527 528static int 529g_bde_start_read(struct g_bde_sector *sp) 530{ 531 struct bio *bp; 532 struct g_bde_softc *sc; 533 534 g_trace(G_T_TOPOLOGY, "g_bde_start_read(%p)", sp); 535 sc = sp->softc; 536 KASSERT(sc != NULL, ("Null softc in sp %p", sp)); 537 bp = g_new_bio(); 538 if (bp == NULL) 539 return (ENOMEM); 540 bp->bio_cmd = BIO_READ; 541 bp->bio_offset = sp->offset; 542 bp->bio_data = sp->data; 543 bp->bio_length = sp->size; 544 bp->bio_done = g_bde_read_done; 545 bp->bio_caller1 = sp; 546 bp->bio_caller2 = sc; 547 sp->state = IO; 548 g_io_request(bp, sc->consumer); 549 return(0); 550} 551 552/* 553 * The worker thread. 554 * 555 * The up/down path of GEOM is not allowed to sleep or do any major work 556 * so we use this thread to do the actual crypto operations and to push 557 * the state engine onwards. 558 * 559 * XXX: if we switch to the src/sys/opencrypt hardware assisted encryption 560 * XXX: using a thread here is probably not needed. 561 */ 562 563void 564g_bde_worker(void *arg) 565{ 566 struct g_bde_softc *sc; 567 struct g_bde_work *wp, *twp; 568 struct g_geom *gp; 569 int restart, error; 570 571 gp = arg; 572 sc = gp->softc; 573 574 mtx_lock(&sc->worklist_mutex); 575 for (;;) { 576 restart = 0; 577 g_trace(G_T_TOPOLOGY, "g_bde_worker scan"); 578 TAILQ_FOREACH_SAFE(wp, &sc->worklist, list, twp) { 579 KASSERT(wp != NULL, ("NULL wp")); 580 KASSERT(wp->softc != NULL, ("NULL wp->softc")); 581 if (wp->state != WAIT) 582 continue; /* Not interesting here */ 583 584 KASSERT(wp->bp != NULL, ("NULL wp->bp")); 585 KASSERT(wp->sp != NULL, ("NULL wp->sp")); 586 587 if (wp->ksp != NULL) { 588 if (wp->ksp->owner != wp) 589 continue; 590 if (wp->ksp->state == IO) 591 continue; 592 KASSERT(wp->ksp->state == VALID, 593 ("Illegal sector state (%d)", 594 wp->ksp->state)); 595 } 596 597 if (wp->bp->bio_cmd == BIO_READ && wp->sp->state == IO) 598 continue; 599 600 if (wp->ksp != NULL && wp->ksp->error != 0) { 601 g_bde_work_done(wp, wp->ksp->error); 602 continue; 603 } 604 switch(wp->bp->bio_cmd) { 605 case BIO_READ: 606 if (wp->ksp == NULL) { 607 KASSERT(wp->error != 0, 608 ("BIO_READ, no ksp and no error")); 609 g_bde_work_done(wp, wp->error); 610 break; 611 } 612 if (wp->sp->error != 0) { 613 g_bde_work_done(wp, wp->sp->error); 614 break; 615 } 616 mtx_unlock(&sc->worklist_mutex); 617 g_bde_crypt_read(wp); 618 mtx_lock(&sc->worklist_mutex); 619 restart++; 620 g_bde_work_done(wp, wp->sp->error); 621 break; 622 case BIO_WRITE: 623 wp->state = FINISH; 624 KASSERT(wp->sp->owner == wp, 625 ("Write not owner sp")); 626 KASSERT(wp->ksp->owner == wp, 627 ("Write not owner ksp")); 628 mtx_unlock(&sc->worklist_mutex); 629 g_bde_crypt_write(wp); 630 mtx_lock(&sc->worklist_mutex); 631 restart++; 632 error = g_bde_start_write(wp->sp); 633 if (error) { 634 g_bde_work_done(wp, error); 635 break; 636 } 637 error = g_bde_start_write(wp->ksp); 638 if (wp->error != 0) 639 wp->error = error; 640 break; 641 case BIO_DELETE: 642 wp->state = FINISH; 643 mtx_unlock(&sc->worklist_mutex); 644 g_bde_crypt_delete(wp); 645 mtx_lock(&sc->worklist_mutex); 646 restart++; 647 g_bde_start_write(wp->sp); 648 break; 649 } 650 if (restart) 651 break; 652 } 653 if (!restart) { 654 /* 655 * We don't look for our death-warrant until we are 656 * idle. Shouldn't make a difference in practice. 657 */ 658 if (sc->dead) 659 break; 660 g_trace(G_T_TOPOLOGY, "g_bde_worker sleep"); 661 error = msleep(sc, &sc->worklist_mutex, 662 PRIBIO, "-", hz); 663 if (error == EWOULDBLOCK) { 664 /* 665 * Lose our skey cache in an orderly fashion. 666 * The exact rate can be tuned to be less 667 * aggressive if this is desirable. 10% per 668 * second means that the cache is gone in a 669 * few minutes. 670 */ 671 g_bde_purge_sector(sc, 10); 672 } 673 } 674 } 675 g_trace(G_T_TOPOLOGY, "g_bde_worker die"); 676 g_bde_purge_sector(sc, 1); 677 KASSERT(sc->nwork == 0, ("Dead but %d work remaining", sc->nwork)); 678 KASSERT(sc->ncache == 0, ("Dead but %d cache remaining", sc->ncache)); 679 KASSERT(sc->nsect == 0, ("Dead but %d sect remaining", sc->nsect)); 680 mtx_unlock(&sc->worklist_mutex); 681 sc->dead = 2; 682 wakeup(sc); 683 kproc_exit(0); 684} 685 686/* 687 * g_bde_start1 has chopped the incoming request up so all the requests 688 * we see here are inside a single zone. Map the data and key locations 689 * grab the buffers we need and fire off the first volley of read requests. 690 */ 691 692static void 693g_bde_start2(struct g_bde_work *wp) 694{ 695 struct g_bde_softc *sc; 696 697 KASSERT(wp != NULL, ("NULL wp in g_bde_start2")); 698 KASSERT(wp->softc != NULL, ("NULL wp->softc")); 699 g_trace(G_T_TOPOLOGY, "g_bde_start2(%p)", wp); 700 sc = wp->softc; 701 switch (wp->bp->bio_cmd) { 702 case BIO_READ: 703 wp->sp = g_bde_new_sector(wp, 0); 704 if (wp->sp == NULL) { 705 g_bde_work_done(wp, ENOMEM); 706 return; 707 } 708 wp->sp->size = wp->length; 709 wp->sp->data = wp->data; 710 if (g_bde_start_read(wp->sp) != 0) { 711 g_bde_work_done(wp, ENOMEM); 712 return; 713 } 714 g_bde_read_keysector(sc, wp); 715 if (wp->ksp == NULL) 716 wp->error = ENOMEM; 717 break; 718 case BIO_DELETE: 719 wp->sp = g_bde_new_sector(wp, wp->length); 720 if (wp->sp == NULL) { 721 g_bde_work_done(wp, ENOMEM); 722 return; 723 } 724 break; 725 case BIO_WRITE: 726 wp->sp = g_bde_new_sector(wp, wp->length); 727 if (wp->sp == NULL) { 728 g_bde_work_done(wp, ENOMEM); 729 return; 730 } 731 g_bde_read_keysector(sc, wp); 732 if (wp->ksp == NULL) { 733 g_bde_work_done(wp, ENOMEM); 734 return; 735 } 736 break; 737 default: 738 KASSERT(0 == 1, 739 ("Wrong bio_cmd %d in g_bde_start2", wp->bp->bio_cmd)); 740 } 741 742 wp->state = WAIT; 743 wakeup(sc); 744} 745 746/* 747 * Create a sequence of work structures, and have g_bde_map_sector() determine 748 * how long they each can be. Feed them to g_bde_start2(). 749 */ 750 751void 752g_bde_start1(struct bio *bp) 753{ 754 struct g_bde_softc *sc; 755 struct g_bde_work *wp; 756 off_t done; 757 758 sc = bp->bio_to->geom->softc; 759 bp->bio_driver1 = sc; 760 761 mtx_lock(&sc->worklist_mutex); 762 for(done = 0; done < bp->bio_length; ) { 763 wp = g_bde_new_work(sc); 764 if (wp != NULL) { 765 wp->bp = bp; 766 wp->offset = bp->bio_offset + done; 767 wp->data = bp->bio_data + done; 768 wp->length = bp->bio_length - done; 769 g_bde_map_sector(wp); 770 done += wp->length; 771 g_bde_start2(wp); 772 } 773 if (wp == NULL || bp->bio_error != 0) { 774 g_bde_contribute(bp, bp->bio_length - done, ENOMEM); 775 break; 776 } 777 } 778 mtx_unlock(&sc->worklist_mutex); 779 return; 780} 781