g_bde_work.c revision 125591
1169691Skan/*- 2169691Skan * Copyright (c) 2002 Poul-Henning Kamp 3169691Skan * Copyright (c) 2002 Networks Associates Technology, Inc. 4169691Skan * All rights reserved. 5169691Skan * 6169691Skan * This software was developed for the FreeBSD Project by Poul-Henning Kamp 7169691Skan * and NAI Labs, the Security Research Division of Network Associates, Inc. 8169691Skan * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the 9169691Skan * DARPA CHATS research program. 10169691Skan * 11169691Skan * Redistribution and use in source and binary forms, with or without 12169691Skan * modification, are permitted provided that the following conditions 13169691Skan * are met: 14169691Skan * 1. Redistributions of source code must retain the above copyright 15169691Skan * notice, this list of conditions and the following disclaimer. 16169691Skan * 2. Redistributions in binary form must reproduce the above copyright 17169691Skan * notice, this list of conditions and the following disclaimer in the 18169691Skan * documentation and/or other materials provided with the distribution. 19169691Skan * 20169691Skan * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21169691Skan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22169691Skan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23169691Skan * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24169691Skan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25169691Skan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26169691Skan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27169691Skan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28169691Skan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29169691Skan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30169691Skan * SUCH DAMAGE. 31169691Skan * 32169691Skan * $FreeBSD: head/sys/geom/bde/g_bde_work.c 125591 2004-02-08 10:19:18Z phk $ 33169691Skan * 34169691Skan * This source file contains the state-engine which makes things happen in the 35169691Skan * right order. 36169691Skan * 37169691Skan * Outline: 38169691Skan * 1) g_bde_start1() 39169691Skan * Break the struct bio into multiple work packets one per zone. 40169691Skan * 2) g_bde_start2() 41169691Skan * Setup the necessary sector buffers and start those read operations 42169691Skan * which we can start at this time and put the item on the work-list. 43169691Skan * 3) g_bde_worker() 44169691Skan * Scan the work-list for items which are ready for crypto processing 45169691Skan * and call the matching crypto function in g_bde_crypt.c and schedule 46169691Skan * any writes needed. Read operations finish here by releasing the 47169691Skan * sector buffers and delivering the original bio request. 48169691Skan * 4) g_bde_write_done() 49169691Skan * Release sector buffers and deliver the original bio request. 50169691Skan * 51169691Skan * Because of the C-scope rules, the functions are almost perfectly in the 52169691Skan * opposite order in this source file. 53169691Skan * 54169691Skan * XXX: A switch to the hardware assisted crypto in src/sys/opencrypto will add 55169691Skan * XXX: additional states to this state-engine. Since no hardware available 56169691Skan * XXX: at this time has AES support, implementing this has been postponed 57169691Skan * XXX: until such time as it would result in a benefit. 58169691Skan */ 59169691Skan 60169691Skan#include <sys/param.h> 61169691Skan#include <sys/bio.h> 62169691Skan#include <sys/lock.h> 63169691Skan#include <sys/mutex.h> 64169691Skan#include <sys/queue.h> 65169691Skan#include <sys/malloc.h> 66169691Skan#include <sys/systm.h> 67169691Skan#include <sys/kernel.h> 68169691Skan#include <sys/sysctl.h> 69169691Skan#include <sys/proc.h> 70169691Skan#include <sys/kthread.h> 71169691Skan 72169691Skan#include <crypto/rijndael/rijndael.h> 73169691Skan#include <crypto/sha2/sha2.h> 74169691Skan#include <geom/geom.h> 75169691Skan#include <geom/bde/g_bde.h> 76169691Skan 77169691Skanstatic void g_bde_delete_sector(struct g_bde_softc *wp, struct g_bde_sector *sp); 78169691Skanstatic struct g_bde_sector * g_bde_new_sector(struct g_bde_work *wp, u_int len); 79169691Skanstatic void g_bde_release_keysector(struct g_bde_work *wp); 80169691Skanstatic struct g_bde_sector *g_bde_get_keysector(struct g_bde_work *wp); 81169691Skanstatic int g_bde_start_read(struct g_bde_sector *sp); 82169691Skanstatic void g_bde_purge_sector(struct g_bde_softc *sc, int fraction); 83169691Skan 84169691Skan/* 85169691Skan * Work item allocation. 86169691Skan * 87169691Skan * C++ would call these constructors and destructors. 88169691Skan */ 89169691Skanstatic u_int g_bde_nwork; 90169691SkanSYSCTL_UINT(_debug, OID_AUTO, gbde_nwork, CTLFLAG_RD, &g_bde_nwork, 0, ""); 91169691Skan 92169691Skanstatic MALLOC_DEFINE(M_GBDE, "GBDE", "GBDE data structures"); 93169691Skan 94169691Skanstatic struct g_bde_work * 95169691Skang_bde_new_work(struct g_bde_softc *sc) 96169691Skan{ 97169691Skan struct g_bde_work *wp; 98169691Skan 99169691Skan wp = malloc(sizeof *wp, M_GBDE, M_NOWAIT | M_ZERO); 100169691Skan if (wp == NULL) 101169691Skan return (wp); 102169691Skan wp->state = SETUP; 103169691Skan wp->softc = sc; 104169691Skan g_bde_nwork++; 105169691Skan sc->nwork++; 106169691Skan TAILQ_INSERT_TAIL(&sc->worklist, wp, list); 107169691Skan return (wp); 108169691Skan} 109169691Skan 110169691Skanstatic void 111169691Skang_bde_delete_work(struct g_bde_work *wp) 112169691Skan{ 113169691Skan struct g_bde_softc *sc; 114169691Skan 115169691Skan sc = wp->softc; 116169691Skan g_bde_nwork--; 117169691Skan sc->nwork--; 118169691Skan TAILQ_REMOVE(&sc->worklist, wp, list); 119169691Skan free(wp, M_GBDE); 120169691Skan} 121169691Skan 122169691Skan/* 123169691Skan * Sector buffer allocation 124169691Skan * 125169691Skan * These two functions allocate and free back variable sized sector buffers 126169691Skan */ 127169691Skan 128169691Skanstatic u_int g_bde_nsect; 129169691SkanSYSCTL_UINT(_debug, OID_AUTO, gbde_nsect, CTLFLAG_RD, &g_bde_nsect, 0, ""); 130169691Skan 131169691Skanstatic void 132169691Skang_bde_delete_sector(struct g_bde_softc *sc, struct g_bde_sector *sp) 133169691Skan{ 134169691Skan 135169691Skan g_bde_nsect--; 136169691Skan sc->nsect--; 137169691Skan if (sp->malloc) 138169691Skan free(sp->data, M_GBDE); 139169691Skan free(sp, M_GBDE); 140169691Skan} 141169691Skan 142169691Skanstatic struct g_bde_sector * 143169691Skang_bde_new_sector(struct g_bde_work *wp, u_int len) 144169691Skan{ 145169691Skan struct g_bde_sector *sp; 146169691Skan 147169691Skan sp = malloc(sizeof *sp, M_GBDE, M_NOWAIT | M_ZERO); 148169691Skan if (sp == NULL) 149169691Skan return (sp); 150169691Skan if (len > 0) { 151169691Skan sp->data = malloc(len, M_GBDE, M_NOWAIT | M_ZERO); 152169691Skan if (sp->data == NULL) { 153169691Skan free(sp, M_GBDE); 154169691Skan return (NULL); 155169691Skan } 156169691Skan sp->malloc = 1; 157169691Skan } 158169691Skan g_bde_nsect++; 159169691Skan wp->softc->nsect++; 160169691Skan sp->size = len; 161169691Skan sp->softc = wp->softc; 162169691Skan sp->ref = 1; 163169691Skan sp->owner = wp; 164169691Skan sp->offset = wp->so; 165169691Skan sp->state = JUNK; 166169691Skan return (sp); 167169691Skan} 168169691Skan 169169691Skan/* 170169691Skan * Skey sector cache. 171169691Skan * 172169691Skan * Nothing prevents two separate I/O requests from addressing the same zone 173169691Skan * and thereby needing the same skey sector. We therefore need to sequence 174169691Skan * I/O operations to the skey sectors. A certain amount of caching is also 175169691Skan * desirable, although the extent of benefit from this is not at this point 176169691Skan * determined. 177169691Skan * 178169691Skan * XXX: GEOM may be able to grow a generic caching facility at some point 179169691Skan * XXX: to support such needs. 180169691Skan */ 181169691Skan 182169691Skanstatic u_int g_bde_ncache; 183169691SkanSYSCTL_UINT(_debug, OID_AUTO, gbde_ncache, CTLFLAG_RD, &g_bde_ncache, 0, ""); 184169691Skan 185169691Skanstatic void 186169691Skang_bde_purge_one_sector(struct g_bde_softc *sc, struct g_bde_sector *sp) 187169691Skan{ 188169691Skan 189169691Skan g_trace(G_T_TOPOLOGY, "g_bde_purge_one_sector(%p, %p)", sc, sp); 190169691Skan if (sp->ref != 0) 191169691Skan return; 192169691Skan TAILQ_REMOVE(&sc->freelist, sp, list); 193169691Skan g_bde_ncache--; 194169691Skan sc->ncache--; 195169691Skan bzero(sp->data, sp->size); 196169691Skan g_bde_delete_sector(sc, sp); 197169691Skan} 198169691Skan 199169691Skanstatic struct g_bde_sector * 200169691Skang_bde_get_keysector(struct g_bde_work *wp) 201169691Skan{ 202169691Skan struct g_bde_sector *sp; 203169691Skan struct g_bde_softc *sc; 204169691Skan off_t offset; 205169691Skan 206169691Skan offset = wp->kso; 207169691Skan g_trace(G_T_TOPOLOGY, "g_bde_get_keysector(%p, %jd)", wp, (intmax_t)offset); 208169691Skan sc = wp->softc; 209169691Skan 210169691Skan if (malloc_last_fail() < g_bde_ncache) 211169691Skan g_bde_purge_sector(sc, -1); 212169691Skan 213169691Skan sp = TAILQ_FIRST(&sc->freelist); 214169691Skan if (sp != NULL && sp->ref == 0 && sp->used + 300 < time_uptime) 215169691Skan g_bde_purge_one_sector(sc, sp); 216169691Skan 217169691Skan TAILQ_FOREACH(sp, &sc->freelist, list) { 218169691Skan if (sp->offset == offset) 219169691Skan break; 220169691Skan } 221169691Skan if (sp != NULL) { 222169691Skan sp->ref++; 223169691Skan KASSERT(sp->offset == offset, ("wrong offset")); 224169691Skan KASSERT(sp->softc == wp->softc, ("wrong softc")); 225169691Skan if (sp->ref == 1) 226169691Skan sp->owner = wp; 227169691Skan } else { 228169691Skan if (malloc_last_fail() < g_bde_ncache) { 229169691Skan TAILQ_FOREACH(sp, &sc->freelist, list) 230169691Skan if (sp->ref == 0) 231169691Skan break; 232169691Skan } 233169691Skan if (sp == NULL && !TAILQ_EMPTY(&sc->freelist)) 234169691Skan sp = TAILQ_FIRST(&sc->freelist); 235169691Skan if (sp != NULL && sp->ref > 0) 236169691Skan sp = NULL; 237169691Skan if (sp == NULL) { 238169691Skan sp = g_bde_new_sector(wp, sc->sectorsize); 239169691Skan if (sp != NULL) { 240169691Skan g_bde_ncache++; 241169691Skan sc->ncache++; 242169691Skan TAILQ_INSERT_TAIL(&sc->freelist, sp, list); 243169691Skan sp->malloc = 2; 244169691Skan } 245169691Skan } 246169691Skan if (sp != NULL) { 247169691Skan sp->offset = offset; 248169691Skan sp->softc = wp->softc; 249169691Skan sp->ref = 1; 250169691Skan sp->owner = wp; 251169691Skan sp->state = JUNK; 252169691Skan sp->error = 0; 253169691Skan } 254169691Skan } 255169691Skan if (sp != NULL) { 256169691Skan TAILQ_REMOVE(&sc->freelist, sp, list); 257169691Skan TAILQ_INSERT_TAIL(&sc->freelist, sp, list); 258169691Skan sp->used = time_uptime; 259169691Skan } 260169691Skan wp->ksp = sp; 261169691Skan return(sp); 262169691Skan} 263169691Skan 264169691Skanstatic void 265169691Skang_bde_release_keysector(struct g_bde_work *wp) 266169691Skan{ 267169691Skan struct g_bde_softc *sc; 268169691Skan struct g_bde_work *wp2; 269169691Skan struct g_bde_sector *sp; 270169691Skan 271169691Skan sp = wp->ksp; 272169691Skan g_trace(G_T_TOPOLOGY, "g_bde_release_keysector(%p)", sp); 273169691Skan KASSERT(sp->malloc == 2, ("Wrong sector released")); 274169691Skan sc = sp->softc; 275169691Skan KASSERT(sc != NULL, ("NULL sp->softc")); 276169691Skan KASSERT(wp == sp->owner, ("Releasing, not owner")); 277169691Skan sp->owner = NULL; 278169691Skan wp->ksp = NULL; 279169691Skan sp->ref--; 280169691Skan if (sp->ref > 0) { 281169691Skan TAILQ_REMOVE(&sc->freelist, sp, list); 282169691Skan TAILQ_INSERT_TAIL(&sc->freelist, sp, list); 283169691Skan TAILQ_FOREACH(wp2, &sc->worklist, list) { 284169691Skan if (wp2->ksp == sp) { 285169691Skan KASSERT(wp2 != wp, ("Self-reowning")); 286169691Skan sp->owner = wp2; 287169691Skan wakeup(sp->softc); 288169691Skan break; 289169691Skan } 290169691Skan } 291169691Skan KASSERT(wp2 != NULL, ("Failed to pick up owner for %p\n", sp)); 292169691Skan } else if (sp->error != 0) { 293169691Skan sp->offset = ~0; 294169691Skan sp->error = 0; 295169691Skan sp->state = JUNK; 296169691Skan } 297169691Skan TAILQ_REMOVE(&sc->freelist, sp, list); 298169691Skan TAILQ_INSERT_HEAD(&sc->freelist, sp, list); 299169691Skan} 300169691Skan 301169691Skanstatic void 302169691Skang_bde_purge_sector(struct g_bde_softc *sc, int fraction) 303169691Skan{ 304169691Skan struct g_bde_sector *sp; 305169691Skan int n; 306169691Skan 307169691Skan g_trace(G_T_TOPOLOGY, "g_bde_purge_sector(%p)", sc); 308169691Skan if (fraction > 0) 309169691Skan n = sc->ncache / fraction + 1; 310169691Skan else 311169691Skan n = g_bde_ncache - malloc_last_fail(); 312169691Skan if (n < 0) 313169691Skan return; 314169691Skan if (n > sc->ncache) 315169691Skan n = sc->ncache; 316169691Skan while(n--) { 317169691Skan TAILQ_FOREACH(sp, &sc->freelist, list) { 318169691Skan if (sp->ref != 0) 319169691Skan continue; 320169691Skan TAILQ_REMOVE(&sc->freelist, sp, list); 321169691Skan g_bde_ncache--; 322169691Skan sc->ncache--; 323169691Skan bzero(sp->data, sp->size); 324169691Skan g_bde_delete_sector(sc, sp); 325169691Skan break; 326169691Skan } 327169691Skan } 328169691Skan} 329169691Skan 330169691Skanstatic struct g_bde_sector * 331169691Skang_bde_read_keysector(struct g_bde_softc *sc, struct g_bde_work *wp) 332169691Skan{ 333169691Skan struct g_bde_sector *sp; 334169691Skan 335169691Skan g_trace(G_T_TOPOLOGY, "g_bde_read_keysector(%p)", wp); 336169691Skan sp = g_bde_get_keysector(wp); 337169691Skan if (sp == NULL) { 338169691Skan g_bde_purge_sector(sc, -1); 339169691Skan sp = g_bde_get_keysector(wp); 340169691Skan } 341169691Skan if (sp == NULL) 342169691Skan return (sp); 343169691Skan if (sp->owner != wp) 344169691Skan return (sp); 345169691Skan if (sp->state == VALID) 346169691Skan return (sp); 347169691Skan if (g_bde_start_read(sp) == 0) 348169691Skan return (sp); 349169691Skan g_bde_release_keysector(wp); 350169691Skan return (NULL); 351169691Skan} 352169691Skan 353169691Skan/* 354169691Skan * Contribute to the completion of the original bio request. 355169691Skan * 356169691Skan * We have no simple way to tell how many bits the original bio request has 357169691Skan * been segmented into, so the easiest way to determine when we can deliver 358 * it is to keep track of the number of bytes we have completed. We keep 359 * track of any errors underway and latch onto the first one. 360 * 361 * We always report "nothing done" in case of error, because random bits here 362 * and there may be completed and returning a number of completed bytes does 363 * not convey any useful information about which bytes they were. If some 364 * piece of broken code somewhere interprets this to mean that nothing has 365 * changed on the underlying media they deserve the lossage headed for them. 366 * 367 * A single mutex per g_bde instance is used to prevent contention. 368 */ 369 370static void 371g_bde_contribute(struct bio *bp, off_t bytes, int error) 372{ 373 374 g_trace(G_T_TOPOLOGY, "g_bde_contribute bp %p bytes %jd error %d", 375 bp, (intmax_t)bytes, error); 376 if (bp->bio_error == 0) 377 bp->bio_error = error; 378 bp->bio_completed += bytes; 379 KASSERT(bp->bio_completed <= bp->bio_length, ("Too large contribution")); 380 if (bp->bio_completed == bp->bio_length) { 381 if (bp->bio_error != 0) 382 bp->bio_completed = 0; 383 g_io_deliver(bp, bp->bio_error); 384 } 385} 386 387/* 388 * This is the common case "we're done with this work package" function 389 */ 390 391static void 392g_bde_work_done(struct g_bde_work *wp, int error) 393{ 394 395 g_bde_contribute(wp->bp, wp->length, error); 396 if (wp->sp != NULL) 397 g_bde_delete_sector(wp->softc, wp->sp); 398 if (wp->ksp != NULL) 399 g_bde_release_keysector(wp); 400 g_bde_delete_work(wp); 401} 402 403/* 404 * A write operation has finished. When we have all expected cows in the 405 * barn close the door and call it a day. 406 */ 407 408static void 409g_bde_write_done(struct bio *bp) 410{ 411 struct g_bde_sector *sp; 412 struct g_bde_work *wp; 413 struct g_bde_softc *sc; 414 415 sp = bp->bio_caller1; 416 sc = bp->bio_caller2; 417 mtx_lock(&sc->worklist_mutex); 418 KASSERT(sp != NULL, ("NULL sp")); 419 KASSERT(sc != NULL, ("NULL sc")); 420 KASSERT(sp->owner != NULL, ("NULL sp->owner")); 421 g_trace(G_T_TOPOLOGY, "g_bde_write_done(%p)", sp); 422 if (bp->bio_error == 0 && bp->bio_completed != sp->size) 423 bp->bio_error = EIO; 424 sp->error = bp->bio_error; 425 g_destroy_bio(bp); 426 wp = sp->owner; 427 if (wp->error == 0) 428 wp->error = sp->error; 429 430 if (wp->bp->bio_cmd == BIO_DELETE) { 431 KASSERT(sp == wp->sp, ("trashed delete op")); 432 g_bde_work_done(wp, wp->error); 433 mtx_unlock(&sc->worklist_mutex); 434 return; 435 } 436 437 KASSERT(wp->bp->bio_cmd == BIO_WRITE, ("Confused in g_bde_write_done()")); 438 KASSERT(sp == wp->sp || sp == wp->ksp, ("trashed write op")); 439 if (wp->sp == sp) { 440 g_bde_delete_sector(sc, wp->sp); 441 wp->sp = NULL; 442 } else { 443 sp->state = VALID; 444 } 445 if (wp->sp == NULL && wp->ksp != NULL && wp->ksp->state == VALID) 446 g_bde_work_done(wp, wp->error); 447 mtx_unlock(&sc->worklist_mutex); 448 return; 449} 450 451/* 452 * Send a write request for the given sector down the pipeline. 453 */ 454 455static int 456g_bde_start_write(struct g_bde_sector *sp) 457{ 458 struct bio *bp; 459 struct g_bde_softc *sc; 460 461 g_trace(G_T_TOPOLOGY, "g_bde_start_write(%p)", sp); 462 sc = sp->softc; 463 KASSERT(sc != NULL, ("NULL sc in g_bde_start_write")); 464 KASSERT(sp->owner != NULL, ("NULL sp->owner in g_bde_start_write")); 465 bp = g_new_bio(); 466 if (bp == NULL) 467 return (ENOMEM); 468 bp->bio_cmd = BIO_WRITE; 469 bp->bio_offset = sp->offset; 470 bp->bio_data = sp->data; 471 bp->bio_length = sp->size; 472 bp->bio_done = g_bde_write_done; 473 bp->bio_caller1 = sp; 474 bp->bio_caller2 = sc; 475 sp->state = IO; 476 g_io_request(bp, sc->consumer); 477 return(0); 478} 479 480/* 481 * A read operation has finished. Mark the sector no longer iobusy and 482 * wake up the worker thread and let it do its thing. 483 */ 484 485static void 486g_bde_read_done(struct bio *bp) 487{ 488 struct g_bde_sector *sp; 489 struct g_bde_softc *sc; 490 491 sp = bp->bio_caller1; 492 g_trace(G_T_TOPOLOGY, "g_bde_read_done(%p)", sp); 493 sc = bp->bio_caller2; 494 mtx_lock(&sc->worklist_mutex); 495 if (bp->bio_error == 0 && bp->bio_completed != sp->size) 496 bp->bio_error = EIO; 497 sp->error = bp->bio_error; 498 if (sp->error == 0) 499 sp->state = VALID; 500 else 501 sp->state = JUNK; 502 wakeup(sc); 503 g_destroy_bio(bp); 504 mtx_unlock(&sc->worklist_mutex); 505} 506 507/* 508 * Send a read request for the given sector down the pipeline. 509 */ 510 511static int 512g_bde_start_read(struct g_bde_sector *sp) 513{ 514 struct bio *bp; 515 struct g_bde_softc *sc; 516 517 g_trace(G_T_TOPOLOGY, "g_bde_start_read(%p)", sp); 518 sc = sp->softc; 519 KASSERT(sc != NULL, ("Null softc in sp %p", sp)); 520 bp = g_new_bio(); 521 if (bp == NULL) 522 return (ENOMEM); 523 bp->bio_cmd = BIO_READ; 524 bp->bio_offset = sp->offset; 525 bp->bio_data = sp->data; 526 bp->bio_length = sp->size; 527 bp->bio_done = g_bde_read_done; 528 bp->bio_caller1 = sp; 529 bp->bio_caller2 = sc; 530 sp->state = IO; 531 g_io_request(bp, sc->consumer); 532 return(0); 533} 534 535/* 536 * The worker thread. 537 * 538 * The up/down path of GEOM is not allowed to sleep or do any major work 539 * so we use this thread to do the actual crypto operations and to push 540 * the state engine onwards. 541 * 542 * XXX: if we switch to the src/sys/opencrypt hardware assisted encryption 543 * XXX: using a thread here is probably not needed. 544 */ 545 546void 547g_bde_worker(void *arg) 548{ 549 struct g_bde_softc *sc; 550 struct g_bde_work *wp, *twp; 551 struct g_geom *gp; 552 int restart, error; 553 554 gp = arg; 555 sc = gp->softc; 556 557 mtx_lock(&sc->worklist_mutex); 558 for (;;) { 559 restart = 0; 560 g_trace(G_T_TOPOLOGY, "g_bde_worker scan"); 561 TAILQ_FOREACH_SAFE(wp, &sc->worklist, list, twp) { 562 KASSERT(wp != NULL, ("NULL wp")); 563 KASSERT(wp->softc != NULL, ("NULL wp->softc")); 564 if (wp->state != WAIT) 565 continue; /* Not interesting here */ 566 567 KASSERT(wp->bp != NULL, ("NULL wp->bp")); 568 KASSERT(wp->sp != NULL, ("NULL wp->sp")); 569 570 if (wp->ksp != NULL) { 571 if (wp->ksp->owner != wp) 572 continue; 573 if (wp->ksp->state == IO) 574 continue; 575 KASSERT(wp->ksp->state == VALID, 576 ("Illegal sector state (%d)", 577 wp->ksp->state)); 578 } 579 580 if (wp->bp->bio_cmd == BIO_READ && wp->sp->state == IO) 581 continue; 582 583 if (wp->ksp != NULL && wp->ksp->error != 0) { 584 g_bde_work_done(wp, wp->ksp->error); 585 continue; 586 } 587 switch(wp->bp->bio_cmd) { 588 case BIO_READ: 589 if (wp->ksp == NULL) { 590 KASSERT(wp->error != 0, 591 ("BIO_READ, no ksp and no error")); 592 g_bde_work_done(wp, wp->error); 593 break; 594 } 595 if (wp->sp->error != 0) { 596 g_bde_work_done(wp, wp->sp->error); 597 break; 598 } 599 mtx_unlock(&sc->worklist_mutex); 600 g_bde_crypt_read(wp); 601 mtx_lock(&sc->worklist_mutex); 602 restart++; 603 g_bde_work_done(wp, wp->sp->error); 604 break; 605 case BIO_WRITE: 606 wp->state = FINISH; 607 KASSERT(wp->sp->owner == wp, 608 ("Write not owner sp")); 609 KASSERT(wp->ksp->owner == wp, 610 ("Write not owner ksp")); 611 mtx_unlock(&sc->worklist_mutex); 612 g_bde_crypt_write(wp); 613 mtx_lock(&sc->worklist_mutex); 614 restart++; 615 error = g_bde_start_write(wp->sp); 616 if (error) { 617 g_bde_work_done(wp, error); 618 break; 619 } 620 error = g_bde_start_write(wp->ksp); 621 if (wp->error != 0) 622 wp->error = error; 623 break; 624 case BIO_DELETE: 625 wp->state = FINISH; 626 mtx_unlock(&sc->worklist_mutex); 627 g_bde_crypt_delete(wp); 628 mtx_lock(&sc->worklist_mutex); 629 restart++; 630 g_bde_start_write(wp->sp); 631 break; 632 } 633 if (restart) 634 break; 635 } 636 if (!restart) { 637 /* 638 * We don't look for our death-warrant until we are 639 * idle. Shouldn't make a difference in practice. 640 */ 641 if (sc->dead) 642 break; 643 g_trace(G_T_TOPOLOGY, "g_bde_worker sleep"); 644 error = msleep(sc, &sc->worklist_mutex, 645 PRIBIO, "-", hz); 646 if (error == EWOULDBLOCK) { 647 /* 648 * Loose our skey cache in an orderly fashion. 649 * The exact rate can be tuned to be less 650 * aggressive if this is desirable. 10% per 651 * second means that the cache is gone in a 652 * few minutes. 653 */ 654 g_bde_purge_sector(sc, 10); 655 } 656 } 657 } 658 g_trace(G_T_TOPOLOGY, "g_bde_worker die"); 659 g_bde_purge_sector(sc, 1); 660 KASSERT(sc->nwork == 0, ("Dead but %d work remaining", sc->nwork)); 661 KASSERT(sc->ncache == 0, ("Dead but %d cache remaining", sc->ncache)); 662 KASSERT(sc->nsect == 0, ("Dead but %d sect remaining", sc->nsect)); 663 mtx_unlock(&sc->worklist_mutex); 664 sc->dead = 2; 665 wakeup(sc); 666 mtx_lock(&Giant); 667 kthread_exit(0); 668} 669 670/* 671 * g_bde_start1 has chopped the incoming request up so all the requests 672 * we see here are inside a single zone. Map the data and key locations 673 * grab the buffers we need and fire off the first volley of read requests. 674 */ 675 676static void 677g_bde_start2(struct g_bde_work *wp) 678{ 679 struct g_bde_softc *sc; 680 681 KASSERT(wp != NULL, ("NULL wp in g_bde_start2")); 682 KASSERT(wp->softc != NULL, ("NULL wp->softc")); 683 g_trace(G_T_TOPOLOGY, "g_bde_start2(%p)", wp); 684 sc = wp->softc; 685 switch (wp->bp->bio_cmd) { 686 case BIO_READ: 687 wp->sp = g_bde_new_sector(wp, 0); 688 if (wp->sp == NULL) { 689 g_bde_work_done(wp, ENOMEM); 690 return; 691 } 692 wp->sp->size = wp->length; 693 wp->sp->data = wp->data; 694 if (g_bde_start_read(wp->sp) != 0) { 695 g_bde_work_done(wp, ENOMEM); 696 return; 697 } 698 g_bde_read_keysector(sc, wp); 699 if (wp->ksp == NULL) 700 wp->error = ENOMEM; 701 break; 702 case BIO_DELETE: 703 wp->sp = g_bde_new_sector(wp, wp->length); 704 if (wp->sp == NULL) { 705 g_bde_work_done(wp, ENOMEM); 706 return; 707 } 708 break; 709 case BIO_WRITE: 710 wp->sp = g_bde_new_sector(wp, wp->length); 711 if (wp->sp == NULL) { 712 g_bde_work_done(wp, ENOMEM); 713 return; 714 } 715 g_bde_read_keysector(sc, wp); 716 if (wp->ksp == NULL) { 717 g_bde_work_done(wp, ENOMEM); 718 return; 719 } 720 break; 721 default: 722 KASSERT(0 == 1, 723 ("Wrong bio_cmd %d in g_bde_start2", wp->bp->bio_cmd)); 724 } 725 726 wp->state = WAIT; 727 wakeup(sc); 728} 729 730/* 731 * Create a sequence of work structures, and have g_bde_map_sector() determine 732 * how long they each can be. Feed them to g_bde_start2(). 733 */ 734 735void 736g_bde_start1(struct bio *bp) 737{ 738 struct g_bde_softc *sc; 739 struct g_bde_work *wp; 740 off_t done; 741 742 sc = bp->bio_to->geom->softc; 743 bp->bio_driver1 = sc; 744 745 mtx_lock(&sc->worklist_mutex); 746 for(done = 0; done < bp->bio_length; ) { 747 wp = g_bde_new_work(sc); 748 if (wp != NULL) { 749 wp->bp = bp; 750 wp->offset = bp->bio_offset + done; 751 wp->data = bp->bio_data + done; 752 wp->length = bp->bio_length - done; 753 g_bde_map_sector(wp); 754 done += wp->length; 755 g_bde_start2(wp); 756 } 757 if (wp == NULL || bp->bio_error != 0) { 758 g_bde_contribute(bp, bp->bio_length - done, ENOMEM); 759 break; 760 } 761 } 762 mtx_unlock(&sc->worklist_mutex); 763 return; 764} 765