g_bde_work.c revision 105520
1/*- 2 * Copyright (c) 2002 Poul-Henning Kamp 3 * Copyright (c) 2002 Networks Associates Technology, Inc. 4 * All rights reserved. 5 * 6 * This software was developed for the FreeBSD Project by Poul-Henning Kamp 7 * and NAI Labs, the Security Research Division of Network Associates, Inc. 8 * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the 9 * DARPA CHATS research program. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. The names of the authors may not be used to endorse or promote 20 * products derived from this software without specific prior written 21 * permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * $FreeBSD: head/sys/geom/bde/g_bde_work.c 105520 2002-10-20 14:30:28Z phk $ 36 * 37 * This source file contains the state-engine which makes things happen in the 38 * right order. 39 * 40 * Outline: 41 * 1) g_bde_start1() 42 * Break the struct bio into multiple work packets one per zone. 43 * 2) g_bde_start2() 44 * Setup the necessary sector buffers and start those read operations 45 * which we can start at this time and put the item on the work-list. 46 * 3) g_bde_worker() 47 * Scan the work-list for items which are ready for crypto processing 48 * and call the matching crypto function in g_bde_crypt.c and schedule 49 * any writes needed. Read operations finish here by releasing the 50 * sector buffers and delivering the original bio request. 51 * 4) g_bde_write_done() 52 * Release sector buffers and deliver the original bio request. 53 * 54 * Because of the C-scope rules, the functions are almost perfectly in the 55 * opposite order in this source file. 56 * 57 * XXX: A switch to the hardware assisted crypto in src/sys/opencrypto will add 58 * XXX: additional states to this state-engine. Since no hardware available 59 * XXX: at this time has AES support, implementing this has been postponed 60 * XXX: until such time as it would result in a benefit. 61 */ 62 63#include <sys/param.h> 64#include <sys/stdint.h> 65#include <sys/bio.h> 66#include <sys/lock.h> 67#include <sys/mutex.h> 68#include <sys/queue.h> 69#include <sys/malloc.h> 70#include <sys/systm.h> 71#include <sys/kernel.h> 72#include <sys/sysctl.h> 73#include <sys/proc.h> 74#include <sys/kthread.h> 75 76#include <geom/geom.h> 77#include <geom/bde/g_bde.h> 78 79static void g_bde_delete_sector(struct g_bde_softc *wp, struct g_bde_sector *sp); 80static struct g_bde_sector * g_bde_new_sector(struct g_bde_work *wp, u_int len); 81static void g_bde_release_sector(struct g_bde_work *wp, struct g_bde_sector *sp); 82static struct g_bde_sector *g_bde_get_sector(struct g_bde_work *wp, off_t offset); 83static int g_bde_start_read(struct g_bde_sector *sp); 84 85/* 86 * Work item allocation. 87 * 88 * C++ would call these constructors and destructors. 89 */ 90static u_int g_bde_nwork; 91SYSCTL_UINT(_debug, OID_AUTO, gbde_nwork, CTLFLAG_RD, &g_bde_nwork, 0, ""); 92 93static struct g_bde_work * 94g_bde_new_work(struct g_bde_softc *sc) 95{ 96 struct g_bde_work *wp; 97 98 wp = g_malloc(sizeof *wp, M_NOWAIT | M_ZERO); 99 if (wp == NULL) 100 return (wp); 101 wp->state = SETUP; 102 wp->softc = sc; 103 g_bde_nwork++; 104 sc->nwork++; 105 TAILQ_INSERT_TAIL(&sc->worklist, wp, list); 106 return (wp); 107} 108 109static void 110g_bde_delete_work(struct g_bde_work *wp) 111{ 112 struct g_bde_softc *sc; 113 114 sc = wp->softc; 115 g_bde_nwork--; 116 sc->nwork--; 117 TAILQ_REMOVE(&sc->worklist, wp, list); 118 g_free(wp); 119} 120 121/* 122 * Sector buffer allocation 123 * 124 * These two functions allocate and free back variable sized sector buffers 125 */ 126 127static u_int g_bde_nsect; 128SYSCTL_UINT(_debug, OID_AUTO, gbde_nsect, CTLFLAG_RD, &g_bde_nsect, 0, ""); 129 130static void 131g_bde_delete_sector(struct g_bde_softc *sc, struct g_bde_sector *sp) 132{ 133 134 g_bde_nsect--; 135 sc->nsect--; 136 if (sp->malloc) 137 g_free(sp->data); 138 g_free(sp); 139} 140 141static struct g_bde_sector * 142g_bde_new_sector(struct g_bde_work *wp, u_int len) 143{ 144 struct g_bde_sector *sp; 145 146 sp = g_malloc(sizeof *sp, M_NOWAIT | M_ZERO); 147 if (sp == NULL) 148 return (sp); 149 if (len > 0) { 150 sp->data = g_malloc(len, M_NOWAIT | M_ZERO); 151 if (sp->data == NULL) { 152 g_free(sp); 153 return (NULL); 154 } 155 sp->malloc = 1; 156 } 157 g_bde_nsect++; 158 wp->softc->nsect++; 159 sp->size = len; 160 sp->softc = wp->softc; 161 sp->ref = 1; 162 sp->owner = wp; 163 sp->offset = wp->so; 164 sp->state = JUNK; 165 return (sp); 166} 167 168/* 169 * Skey sector cache. 170 * 171 * Nothing prevents two separate I/O requests from addressing the same zone 172 * and thereby needing the same skey sector. We therefore need to sequence 173 * I/O operations to the skey sectors. A certain amount of caching is also 174 * desirable, although the extent of benefit from this is not at this point 175 * determined. 176 * 177 * XXX: GEOM may be able to grow a generic caching facility at some point 178 * XXX: to support such needs. 179 */ 180 181static u_int g_bde_ncache; 182SYSCTL_UINT(_debug, OID_AUTO, gbde_ncache, CTLFLAG_RD, &g_bde_ncache, 0, ""); 183 184static struct g_bde_sector * 185g_bde_get_sector(struct g_bde_work *wp, off_t offset) 186{ 187 struct g_bde_sector *sp; 188 struct g_bde_softc *sc; 189 190 g_trace(G_T_TOPOLOGY, "g_bde_get_sector(%p, %jd)", wp, (intmax_t)offset); 191 sc = wp->softc; 192 TAILQ_FOREACH(sp, &sc->freelist, list) { 193 if (sp->offset == offset) 194 break; 195 } 196 if (sp != NULL) { 197 sp->ref++; 198 KASSERT(sp->offset == offset, ("wrong offset")); 199 KASSERT(sp->softc == wp->softc, ("wrong softc")); 200 if (sp->ref == 1) 201 sp->owner = wp; 202 } else { 203 if (!TAILQ_EMPTY(&sc->freelist)) 204 sp = TAILQ_FIRST(&sc->freelist); 205 if (sp != NULL && sp->ref > 0) 206 sp = NULL; 207 if (sp == NULL) { 208 g_bde_ncache++; 209 sc->ncache++; 210 sp = g_bde_new_sector(wp, sc->sectorsize); 211 if (sp != NULL) { 212 TAILQ_INSERT_TAIL(&sc->freelist, sp, list); 213 sp->malloc = 2; 214 } 215 } 216 if (sp != NULL) { 217 sp->offset = offset; 218 sp->softc = wp->softc; 219 sp->ref = 1; 220 sp->owner = wp; 221 sp->state = JUNK; 222 sp->error = 0; 223 } 224 } 225 if (sp != NULL) { 226 TAILQ_REMOVE(&sc->freelist, sp, list); 227 TAILQ_INSERT_TAIL(&sc->freelist, sp, list); 228 } 229 wp->ksp = sp; 230 KASSERT(sp != NULL, ("get_sector failed")); 231 return(sp); 232} 233 234static void 235g_bde_release_sector(struct g_bde_work *wp, struct g_bde_sector *sp) 236{ 237 struct g_bde_softc *sc; 238 struct g_bde_work *wp2; 239 240 g_trace(G_T_TOPOLOGY, "g_bde_release_sector(%p)", sp); 241 KASSERT(sp->malloc == 2, ("Wrong sector released")); 242 sc = sp->softc; 243 KASSERT(sc != NULL, ("NULL sp->softc")); 244 KASSERT(wp == sp->owner, ("Releasing, not owner")); 245 sp->owner = NULL; 246 wp->ksp = NULL; 247 sp->ref--; 248 if (sp->ref > 0) { 249 TAILQ_REMOVE(&sc->freelist, sp, list); 250 TAILQ_INSERT_TAIL(&sc->freelist, sp, list); 251 TAILQ_FOREACH(wp2, &sc->worklist, list) { 252 if (wp2->ksp == sp) { 253 KASSERT(wp2 != wp, ("Self-reowning")); 254 sp->owner = wp2; 255 wakeup(sp->softc); 256 break; 257 } 258 } 259 KASSERT(wp2 != NULL, ("Failed to pick up owner for %p\n", sp)); 260 } else if (sp->error != 0) { 261 sp->offset = ~0; 262 sp->error = 0; 263 sp->state = JUNK; 264 } 265 TAILQ_REMOVE(&sc->freelist, sp, list); 266 TAILQ_INSERT_HEAD(&sc->freelist, sp, list); 267} 268 269static void 270g_bde_purge_sector(struct g_bde_softc *sc, int fraction) 271{ 272 struct g_bde_sector *sp; 273 int n; 274 275 g_trace(G_T_TOPOLOGY, "g_bde_purge_sector(%p)", sc); 276 n = sc->ncache / fraction + 1; 277 while(n--) { 278 TAILQ_FOREACH(sp, &sc->freelist, list) { 279 if (sp->ref != 0) 280 continue; 281 TAILQ_REMOVE(&sc->freelist, sp, list); 282 g_bde_ncache--; 283 sc->ncache--; 284 bzero(sp->data, sp->size); 285 g_bde_delete_sector(sc, sp); 286 break; 287 } 288 } 289} 290 291static struct g_bde_sector * 292g_bde_read_sector(struct g_bde_softc *sc, struct g_bde_work *wp, off_t offset) 293{ 294 struct g_bde_sector *sp; 295 296 g_trace(G_T_TOPOLOGY, "g_bde_read_sector(%p)", wp); 297 sp = g_bde_get_sector(wp, offset); 298 if (sp == NULL) 299 return (sp); 300 if (sp->owner != wp) 301 return (sp); 302 if (sp->state == VALID) 303 return (sp); 304 if (g_bde_start_read(sp) == 0) 305 return (sp); 306 g_bde_release_sector(wp, sp); 307 return (NULL); 308} 309 310/* 311 * Contribute to the completion of the original bio request. 312 * 313 * We have no simple way to tell how many bits the original bio request has 314 * been segmented into, so the easiest way to determine when we can deliver 315 * it is to keep track of the number of bytes we have completed. We keep 316 * track of any errors underway and latch onto the first one. 317 * 318 * We always report "nothing done" in case of error, because random bits here 319 * and there may be completed and returning a number of completed bytes does 320 * not convey any useful information about which bytes they were. If some 321 * piece of broken code somewhere interprets this to mean that nothing has 322 * changed on the underlying media they deserve the lossage headed for them. 323 * 324 * A single mutex per g_bde instance is used to prevent contention. 325 */ 326 327static void 328g_bde_contribute(struct bio *bp, off_t bytes, int error) 329{ 330 struct g_bde_softc *sc; 331 332 g_trace(G_T_TOPOLOGY, "g_bde_contribute bp %p bytes %jd error %d", 333 bp, (intmax_t)bytes, error); 334 sc = bp->bio_driver1; 335 if (bp->bio_error == 0) 336 bp->bio_error = error; 337 bp->bio_completed += bytes; 338 KASSERT(bp->bio_completed <= bp->bio_length, ("Too large contribution")); 339 if (bp->bio_completed == bp->bio_length) { 340 if (bp->bio_error != 0) 341 bp->bio_completed = 0; 342 g_io_deliver(bp, bp->bio_error); 343 } 344} 345 346/* 347 * A write operation has finished. When we have all expected cows in the 348 * barn close the door and call it a day. 349 */ 350 351static void 352g_bde_write_done(struct bio *bp) 353{ 354 struct g_bde_sector *sp; 355 struct g_bde_work *wp; 356 struct g_bde_softc *sc; 357 358 sp = bp->bio_caller1; 359 sc = bp->bio_caller2; 360 mtx_lock(&sc->worklist_mutex); 361 KASSERT(sp != NULL, ("NULL sp")); 362 KASSERT(sc != NULL, ("NULL sc")); 363 KASSERT(sp->owner != NULL, ("NULL sp->owner")); 364 g_trace(G_T_TOPOLOGY, "g_bde_write_done(%p)", sp); 365 sp->error = bp->bio_error; 366 g_destroy_bio(bp); 367 wp = sp->owner; 368 if (wp->error == 0) 369 wp->error = sp->error; 370 371 if (wp->bp->bio_cmd == BIO_DELETE) { 372 KASSERT(sp == wp->sp, ("trashed delete op")); 373 g_bde_contribute(wp->bp, wp->length, wp->error); 374 g_bde_delete_sector(sc, sp); 375 g_bde_delete_work(wp); 376 mtx_unlock(&sc->worklist_mutex); 377 return; 378 } 379 380 KASSERT(wp->bp->bio_cmd == BIO_WRITE, ("Confused in g_bde_write_done()")); 381 KASSERT(sp == wp->sp || sp == wp->ksp, ("trashed write op")); 382 if (wp->sp == sp) { 383 g_bde_delete_sector(sc, wp->sp); 384 wp->sp = NULL; 385 } else { 386 sp->state = VALID; 387 } 388 if (wp->sp == NULL && wp->ksp != NULL && wp->ksp->state == VALID) { 389 g_bde_contribute(wp->bp, wp->length, wp->error); 390 g_bde_release_sector(wp, wp->ksp); 391 g_bde_delete_work(wp); 392 } 393 mtx_unlock(&sc->worklist_mutex); 394 return; 395} 396 397/* 398 * Send a write request for the given sector down the pipeline. 399 */ 400 401static int 402g_bde_start_write(struct g_bde_sector *sp) 403{ 404 struct bio *bp; 405 struct g_bde_softc *sc; 406 407 g_trace(G_T_TOPOLOGY, "g_bde_start_write(%p)", sp); 408 sc = sp->softc; 409 KASSERT(sc != NULL, ("NULL sc in g_bde_start_write")); 410 KASSERT(sp->owner != NULL, ("NULL sp->owner in g_bde_start_write")); 411 bp = g_new_bio(); 412 if (bp == NULL) 413 return (ENOMEM); 414 bp->bio_cmd = BIO_WRITE; 415 bp->bio_offset = sp->offset; 416 bp->bio_data = sp->data; 417 bp->bio_length = sp->size; 418 bp->bio_done = g_bde_write_done; 419 bp->bio_caller1 = sp; 420 bp->bio_caller2 = sc; 421 sp->state = IO; 422 g_io_request(bp, sc->consumer); 423 return(0); 424} 425 426/* 427 * A read operation has finished. Mark the sector no longer iobusy and 428 * wake up the worker thread and let it do its thing. 429 */ 430 431static void 432g_bde_read_done(struct bio *bp) 433{ 434 struct g_bde_sector *sp; 435 struct g_bde_softc *sc; 436 437 sp = bp->bio_caller1; 438 g_trace(G_T_TOPOLOGY, "g_bde_read_done(%p)", sp); 439 sc = bp->bio_caller2; 440 mtx_lock(&sc->worklist_mutex); 441 sp->error = bp->bio_error; 442 sp->state = VALID; 443 wakeup(sc); 444 g_destroy_bio(bp); 445 mtx_unlock(&sc->worklist_mutex); 446} 447 448/* 449 * Send a read request for the given sector down the pipeline. 450 */ 451 452static int 453g_bde_start_read(struct g_bde_sector *sp) 454{ 455 struct bio *bp; 456 struct g_bde_softc *sc; 457 458 g_trace(G_T_TOPOLOGY, "g_bde_start_read(%p)", sp); 459 sc = sp->softc; 460 KASSERT(sc != NULL, ("Null softc in sp %p", sp)); 461 bp = g_new_bio(); 462 if (bp == NULL) 463 return (ENOMEM); 464 bp->bio_cmd = BIO_READ; 465 bp->bio_offset = sp->offset; 466 bp->bio_data = sp->data; 467 bp->bio_length = sp->size; 468 bp->bio_done = g_bde_read_done; 469 bp->bio_caller1 = sp; 470 bp->bio_caller2 = sc; 471 sp->state = IO; 472 g_io_request(bp, sc->consumer); 473 return(0); 474} 475 476/* 477 * The worker thread. 478 * 479 * The up/down path of GEOM is not allowed to sleep or do any major work 480 * so we use this thread to do the actual crypto operations and to push 481 * the state engine onwards. 482 * 483 * XXX: if we switch to the src/sys/opencrypt hardware assisted encryption 484 * XXX: using a thread here is probably not needed. 485 */ 486 487void 488g_bde_worker(void *arg) 489{ 490 struct g_bde_softc *sc; 491 struct g_bde_work *wp; 492 struct g_geom *gp; 493 int busy, error; 494 495 gp = arg; 496 sc = gp->softc; 497 498 mtx_lock(&sc->worklist_mutex); 499 for (;;) { 500 busy = 0; 501 g_trace(G_T_TOPOLOGY, "g_bde_worker scan"); 502 TAILQ_FOREACH(wp, &sc->worklist, list) { 503 KASSERT(wp != NULL, ("NULL wp")); 504 KASSERT(wp->softc != NULL, ("NULL wp->softc")); 505 if (wp->state != WAIT) 506 continue; /* Not interesting here */ 507 508 KASSERT(wp->bp != NULL, ("NULL wp->bp")); 509 KASSERT(wp->sp != NULL, ("NULL wp->sp")); 510 511 if (wp->ksp != NULL) { 512 if (wp->ksp->owner != wp) 513 continue; 514 if (wp->ksp->state == IO) 515 continue; 516 KASSERT(wp->ksp->state == VALID, 517 ("Illegal sector state (JUNK ?)")); 518 } 519 520 if (wp->bp->bio_cmd == BIO_READ && wp->sp->state != VALID) 521 continue; 522 523 if (wp->ksp != NULL && wp->ksp->error != 0) { 524 g_bde_contribute(wp->bp, wp->length, 525 wp->ksp->error); 526 g_bde_delete_sector(sc, wp->sp); 527 g_bde_release_sector(wp, wp->ksp); 528 g_bde_delete_work(wp); 529 busy++; 530 break; 531 } 532 switch(wp->bp->bio_cmd) { 533 case BIO_READ: 534 if (wp->ksp != NULL && wp->sp->error == 0) { 535 mtx_unlock(&sc->worklist_mutex); 536 g_bde_crypt_read(wp); 537 mtx_lock(&sc->worklist_mutex); 538 } 539 g_bde_contribute(wp->bp, wp->length, 540 wp->sp->error); 541 g_bde_delete_sector(sc, wp->sp); 542 if (wp->ksp != NULL) 543 g_bde_release_sector(wp, wp->ksp); 544 g_bde_delete_work(wp); 545 break; 546 case BIO_WRITE: 547 wp->state = FINISH; 548 KASSERT(wp->sp->owner == wp, ("Write not owner sp")); 549 KASSERT(wp->ksp->owner == wp, ("Write not owner ksp")); 550 mtx_unlock(&sc->worklist_mutex); 551 g_bde_crypt_write(wp); 552 mtx_lock(&sc->worklist_mutex); 553 g_bde_start_write(wp->sp); 554 g_bde_start_write(wp->ksp); 555 break; 556 case BIO_DELETE: 557 wp->state = FINISH; 558 mtx_unlock(&sc->worklist_mutex); 559 g_bde_crypt_delete(wp); 560 mtx_lock(&sc->worklist_mutex); 561 g_bde_start_write(wp->sp); 562 break; 563 } 564 busy++; 565 break; 566 } 567 if (!busy) { 568 /* 569 * We don't look for our death-warrant until we are 570 * idle. Shouldn't make a difference in practice. 571 */ 572 if (sc->dead) 573 break; 574 g_trace(G_T_TOPOLOGY, "g_bde_worker sleep"); 575 error = msleep(sc, &sc->worklist_mutex, 576 PRIBIO, "g_bde", hz); 577 if (error == EWOULDBLOCK) { 578 /* 579 * Loose our skey cache in an orderly fashion. 580 * The exact rate can be tuned to be less 581 * aggressive if this is desirable. 10% per 582 * second means that the cache is gone in a 583 * few minutes. 584 */ 585 g_bde_purge_sector(sc, 10); 586 } 587 } 588 } 589 g_trace(G_T_TOPOLOGY, "g_bde_worker die"); 590 g_bde_purge_sector(sc, 1); 591 KASSERT(sc->nwork == 0, ("Dead but %d work remaining", sc->nwork)); 592 KASSERT(sc->ncache == 0, ("Dead but %d cache remaining", sc->ncache)); 593 KASSERT(sc->nsect == 0, ("Dead but %d sect remaining", sc->nsect)); 594 mtx_unlock(&sc->worklist_mutex); 595 sc->dead = 2; 596 wakeup(sc); 597 mtx_lock(&Giant); 598 kthread_exit(0); 599} 600 601/* 602 * g_bde_start1 has chopped the incoming request up so all the requests 603 * we see here are inside a single zone. Map the data and key locations 604 * grab the buffers we need and fire off the first volley of read requests. 605 */ 606 607static void 608g_bde_start2(struct g_bde_work *wp) 609{ 610 struct g_bde_softc *sc; 611 612 KASSERT(wp != NULL, ("NULL wp in g_bde_start2")); 613 g_trace(G_T_TOPOLOGY, "g_bde_start2(%p)", wp); 614 sc = wp->softc; 615 KASSERT(wp->softc != NULL, ("NULL wp->softc")); 616 g_bde_map_sector(&sc->key, wp->offset, &wp->so, &wp->kso, &wp->ko); 617 if (wp->bp->bio_cmd == BIO_READ) { 618 wp->sp = g_bde_new_sector(wp, 0); 619 if (wp->sp == NULL) { 620 g_bde_contribute(wp->bp, wp->length, ENOMEM); 621 g_bde_delete_work(wp); 622 return; 623 } 624 wp->sp->size = wp->length; 625 wp->sp->data = wp->data; 626 if (g_bde_start_read(wp->sp) != 0) { 627 g_bde_contribute(wp->bp, wp->length, ENOMEM); 628 g_bde_delete_sector(sc, wp->sp); 629 g_bde_delete_work(wp); 630 return; 631 } 632 g_bde_read_sector(sc, wp, wp->kso); 633 if (wp->ksp == NULL) 634 wp->error = ENOMEM; 635 } else if (wp->bp->bio_cmd == BIO_DELETE) { 636 wp->sp = g_bde_new_sector(wp, wp->length); 637 if (wp->sp == NULL) { 638 g_bde_contribute(wp->bp, wp->length, ENOMEM); 639 g_bde_delete_work(wp); 640 return; 641 } 642 } else if (wp->bp->bio_cmd == BIO_WRITE) { 643 wp->sp = g_bde_new_sector(wp, wp->length); 644 if (wp->sp == NULL) { 645 g_bde_contribute(wp->bp, wp->length, ENOMEM); 646 g_bde_delete_work(wp); 647 return; 648 } 649 g_bde_read_sector(sc, wp, wp->kso); 650 if (wp->ksp == NULL) { 651 g_bde_contribute(wp->bp, wp->length, ENOMEM); 652 g_bde_delete_sector(sc, wp->sp); 653 g_bde_delete_work(wp); 654 return; 655 } 656 } else { 657 KASSERT(0 == 1, 658 ("Wrong bio_cmd %d in g_bde_start2", wp->bp->bio_cmd)); 659 } 660 661 wp->state = WAIT; 662 wakeup(sc); 663} 664 665/* 666 * Split the incoming bio on zone boundaries and submit the resulting 667 * work structures to g_bde_start2(). 668 */ 669 670void 671g_bde_start1(struct bio *bp) 672{ 673 struct g_bde_softc *sc; 674 struct g_bde_work *wp; 675 off_t zone_start, left; 676 caddr_t p; 677 678 sc = bp->bio_to->geom->softc; 679 bp->bio_driver1 = sc; 680 681 mtx_lock(&sc->worklist_mutex); 682 zone_start = bp->bio_offset - bp->bio_offset % sc->zone_cont; 683 wp = g_bde_new_work(sc); 684 if (wp == NULL) { 685 g_io_deliver(bp, ENOMEM); 686 mtx_unlock(&sc->worklist_mutex); 687 return; 688 } 689 left = bp->bio_length; 690 p = bp->bio_data; 691 692 /* Do the first and possible only fragment */ 693 wp->bp = bp; 694 wp->offset = bp->bio_offset; 695 wp->data = p; 696 wp->length = zone_start + sc->zone_cont - wp->offset; 697 if (wp->length >= left) { 698 /* Only this one fragment needed */ 699 wp->length = left; 700 g_bde_start2(wp); 701 mtx_unlock(&sc->worklist_mutex); 702 return; 703 } 704 705 /* Submit the first fragment */ 706 g_bde_start2(wp); 707 left -= wp->length; 708 p += wp->length; 709 710 /* Do the subsequent fragments */ 711 for(;left > 0;) { 712 wp = g_bde_new_work(sc); 713 if (wp == NULL) { 714 g_bde_contribute(bp, left, ENOMEM); 715 mtx_unlock(&sc->worklist_mutex); 716 return; 717 } 718 zone_start += sc->zone_cont; 719 wp->bp = bp; 720 wp->offset = zone_start; 721 wp->data = p; 722 if (left > sc->zone_cont) 723 wp->length = sc->zone_cont; 724 else 725 wp->length = left; 726 left -= wp->length; 727 p += wp->length; 728 g_bde_start2(wp); 729 } 730 mtx_unlock(&sc->worklist_mutex); 731} 732