g_bde_work.c revision 172836
1105464Sphk/*- 2105464Sphk * Copyright (c) 2002 Poul-Henning Kamp 3105464Sphk * Copyright (c) 2002 Networks Associates Technology, Inc. 4105464Sphk * All rights reserved. 5105464Sphk * 6105464Sphk * This software was developed for the FreeBSD Project by Poul-Henning Kamp 7105464Sphk * and NAI Labs, the Security Research Division of Network Associates, Inc. 8105464Sphk * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the 9105464Sphk * DARPA CHATS research program. 10105464Sphk * 11105464Sphk * Redistribution and use in source and binary forms, with or without 12105464Sphk * modification, are permitted provided that the following conditions 13105464Sphk * are met: 14105464Sphk * 1. Redistributions of source code must retain the above copyright 15105464Sphk * notice, this list of conditions and the following disclaimer. 16105464Sphk * 2. Redistributions in binary form must reproduce the above copyright 17105464Sphk * notice, this list of conditions and the following disclaimer in the 18105464Sphk * documentation and/or other materials provided with the distribution. 19105464Sphk * 20105464Sphk * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21105464Sphk * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22105464Sphk * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23105464Sphk * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24105464Sphk * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25105464Sphk * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26105464Sphk * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27105464Sphk * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28105464Sphk * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29105464Sphk * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30105464Sphk * SUCH DAMAGE. 31105464Sphk * 32105464Sphk * $FreeBSD: head/sys/geom/bde/g_bde_work.c 172836 2007-10-20 23:23:23Z julian $ 33139778Simp */ 34139778Simp/* 35105464Sphk * This source file contains the state-engine which makes things happen in the 36105464Sphk * right order. 37105464Sphk * 38105464Sphk * Outline: 39105464Sphk * 1) g_bde_start1() 40105464Sphk * Break the struct bio into multiple work packets one per zone. 41105464Sphk * 2) g_bde_start2() 42105464Sphk * Setup the necessary sector buffers and start those read operations 43105464Sphk * which we can start at this time and put the item on the work-list. 44105464Sphk * 3) g_bde_worker() 45105464Sphk * Scan the work-list for items which are ready for crypto processing 46105464Sphk * and call the matching crypto function in g_bde_crypt.c and schedule 47105464Sphk * any writes needed. Read operations finish here by releasing the 48105464Sphk * sector buffers and delivering the original bio request. 49105464Sphk * 4) g_bde_write_done() 50105464Sphk * Release sector buffers and deliver the original bio request. 51105464Sphk * 52105464Sphk * Because of the C-scope rules, the functions are almost perfectly in the 53105464Sphk * opposite order in this source file. 54105464Sphk * 55105464Sphk * XXX: A switch to the hardware assisted crypto in src/sys/opencrypto will add 56105464Sphk * XXX: additional states to this state-engine. Since no hardware available 57105464Sphk * XXX: at this time has AES support, implementing this has been postponed 58105464Sphk * XXX: until such time as it would result in a benefit. 59105464Sphk */ 60105464Sphk 61105464Sphk#include <sys/param.h> 62105464Sphk#include <sys/bio.h> 63105464Sphk#include <sys/lock.h> 64105464Sphk#include <sys/mutex.h> 65105464Sphk#include <sys/queue.h> 66105464Sphk#include <sys/malloc.h> 67105464Sphk#include <sys/systm.h> 68105464Sphk#include <sys/kernel.h> 69105464Sphk#include <sys/sysctl.h> 70105464Sphk#include <sys/proc.h> 71105464Sphk#include <sys/kthread.h> 72105464Sphk 73143418Sume#include <crypto/rijndael/rijndael-api-fst.h> 74106407Sphk#include <crypto/sha2/sha2.h> 75105464Sphk#include <geom/geom.h> 76105464Sphk#include <geom/bde/g_bde.h> 77105464Sphk 78105464Sphkstatic void g_bde_delete_sector(struct g_bde_softc *wp, struct g_bde_sector *sp); 79105464Sphkstatic struct g_bde_sector * g_bde_new_sector(struct g_bde_work *wp, u_int len); 80114034Sphkstatic void g_bde_release_keysector(struct g_bde_work *wp); 81114153Sphkstatic struct g_bde_sector *g_bde_get_keysector(struct g_bde_work *wp); 82105464Sphkstatic int g_bde_start_read(struct g_bde_sector *sp); 83106407Sphkstatic void g_bde_purge_sector(struct g_bde_softc *sc, int fraction); 84105464Sphk 85105464Sphk/* 86105464Sphk * Work item allocation. 87105464Sphk * 88105464Sphk * C++ would call these constructors and destructors. 89105464Sphk */ 90105464Sphkstatic u_int g_bde_nwork; 91105464SphkSYSCTL_UINT(_debug, OID_AUTO, gbde_nwork, CTLFLAG_RD, &g_bde_nwork, 0, ""); 92105464Sphk 93151897Srwatsonstatic MALLOC_DEFINE(M_GBDE, "gbde", "GBDE data structures"); 94114715Sphk 95105464Sphkstatic struct g_bde_work * 96105464Sphkg_bde_new_work(struct g_bde_softc *sc) 97105464Sphk{ 98105464Sphk struct g_bde_work *wp; 99105464Sphk 100114715Sphk wp = malloc(sizeof *wp, M_GBDE, M_NOWAIT | M_ZERO); 101105464Sphk if (wp == NULL) 102105464Sphk return (wp); 103105464Sphk wp->state = SETUP; 104105464Sphk wp->softc = sc; 105105464Sphk g_bde_nwork++; 106105464Sphk sc->nwork++; 107105464Sphk TAILQ_INSERT_TAIL(&sc->worklist, wp, list); 108105464Sphk return (wp); 109105464Sphk} 110105464Sphk 111105464Sphkstatic void 112105464Sphkg_bde_delete_work(struct g_bde_work *wp) 113105464Sphk{ 114105464Sphk struct g_bde_softc *sc; 115105464Sphk 116105464Sphk sc = wp->softc; 117105464Sphk g_bde_nwork--; 118105464Sphk sc->nwork--; 119105464Sphk TAILQ_REMOVE(&sc->worklist, wp, list); 120114715Sphk free(wp, M_GBDE); 121105464Sphk} 122105464Sphk 123105464Sphk/* 124105464Sphk * Sector buffer allocation 125105464Sphk * 126105464Sphk * These two functions allocate and free back variable sized sector buffers 127105464Sphk */ 128105464Sphk 129105464Sphkstatic u_int g_bde_nsect; 130105464SphkSYSCTL_UINT(_debug, OID_AUTO, gbde_nsect, CTLFLAG_RD, &g_bde_nsect, 0, ""); 131105464Sphk 132105520Sphkstatic void 133105464Sphkg_bde_delete_sector(struct g_bde_softc *sc, struct g_bde_sector *sp) 134105464Sphk{ 135105464Sphk 136105464Sphk g_bde_nsect--; 137105464Sphk sc->nsect--; 138105464Sphk if (sp->malloc) 139114715Sphk free(sp->data, M_GBDE); 140114715Sphk free(sp, M_GBDE); 141105464Sphk} 142105464Sphk 143105520Sphkstatic struct g_bde_sector * 144105464Sphkg_bde_new_sector(struct g_bde_work *wp, u_int len) 145105464Sphk{ 146105464Sphk struct g_bde_sector *sp; 147105464Sphk 148114715Sphk sp = malloc(sizeof *sp, M_GBDE, M_NOWAIT | M_ZERO); 149105464Sphk if (sp == NULL) 150105464Sphk return (sp); 151105464Sphk if (len > 0) { 152114715Sphk sp->data = malloc(len, M_GBDE, M_NOWAIT | M_ZERO); 153105464Sphk if (sp->data == NULL) { 154114715Sphk free(sp, M_GBDE); 155105464Sphk return (NULL); 156105464Sphk } 157105464Sphk sp->malloc = 1; 158105464Sphk } 159105464Sphk g_bde_nsect++; 160105464Sphk wp->softc->nsect++; 161105464Sphk sp->size = len; 162105464Sphk sp->softc = wp->softc; 163105464Sphk sp->ref = 1; 164105464Sphk sp->owner = wp; 165105464Sphk sp->offset = wp->so; 166105464Sphk sp->state = JUNK; 167105464Sphk return (sp); 168105464Sphk} 169105464Sphk 170105464Sphk/* 171105464Sphk * Skey sector cache. 172105464Sphk * 173105464Sphk * Nothing prevents two separate I/O requests from addressing the same zone 174105464Sphk * and thereby needing the same skey sector. We therefore need to sequence 175105464Sphk * I/O operations to the skey sectors. A certain amount of caching is also 176105464Sphk * desirable, although the extent of benefit from this is not at this point 177105464Sphk * determined. 178105464Sphk * 179105464Sphk * XXX: GEOM may be able to grow a generic caching facility at some point 180105464Sphk * XXX: to support such needs. 181105464Sphk */ 182105464Sphk 183105464Sphkstatic u_int g_bde_ncache; 184105464SphkSYSCTL_UINT(_debug, OID_AUTO, gbde_ncache, CTLFLAG_RD, &g_bde_ncache, 0, ""); 185105464Sphk 186106407Sphkstatic void 187106407Sphkg_bde_purge_one_sector(struct g_bde_softc *sc, struct g_bde_sector *sp) 188106407Sphk{ 189106407Sphk 190106407Sphk g_trace(G_T_TOPOLOGY, "g_bde_purge_one_sector(%p, %p)", sc, sp); 191106407Sphk if (sp->ref != 0) 192106407Sphk return; 193106407Sphk TAILQ_REMOVE(&sc->freelist, sp, list); 194106407Sphk g_bde_ncache--; 195106407Sphk sc->ncache--; 196106407Sphk bzero(sp->data, sp->size); 197106407Sphk g_bde_delete_sector(sc, sp); 198106407Sphk} 199106407Sphk 200105464Sphkstatic struct g_bde_sector * 201114153Sphkg_bde_get_keysector(struct g_bde_work *wp) 202105464Sphk{ 203105464Sphk struct g_bde_sector *sp; 204105464Sphk struct g_bde_softc *sc; 205114153Sphk off_t offset; 206105464Sphk 207114153Sphk offset = wp->kso; 208114153Sphk g_trace(G_T_TOPOLOGY, "g_bde_get_keysector(%p, %jd)", wp, (intmax_t)offset); 209105464Sphk sc = wp->softc; 210106407Sphk 211106407Sphk if (malloc_last_fail() < g_bde_ncache) 212106407Sphk g_bde_purge_sector(sc, -1); 213106407Sphk 214106407Sphk sp = TAILQ_FIRST(&sc->freelist); 215106407Sphk if (sp != NULL && sp->ref == 0 && sp->used + 300 < time_uptime) 216106407Sphk g_bde_purge_one_sector(sc, sp); 217106407Sphk 218105464Sphk TAILQ_FOREACH(sp, &sc->freelist, list) { 219105464Sphk if (sp->offset == offset) 220105464Sphk break; 221105464Sphk } 222105464Sphk if (sp != NULL) { 223105464Sphk sp->ref++; 224105464Sphk KASSERT(sp->offset == offset, ("wrong offset")); 225105464Sphk KASSERT(sp->softc == wp->softc, ("wrong softc")); 226105464Sphk if (sp->ref == 1) 227105464Sphk sp->owner = wp; 228105464Sphk } else { 229106407Sphk if (malloc_last_fail() < g_bde_ncache) { 230106407Sphk TAILQ_FOREACH(sp, &sc->freelist, list) 231106407Sphk if (sp->ref == 0) 232106407Sphk break; 233106407Sphk } 234106407Sphk if (sp == NULL && !TAILQ_EMPTY(&sc->freelist)) 235105464Sphk sp = TAILQ_FIRST(&sc->freelist); 236105464Sphk if (sp != NULL && sp->ref > 0) 237105464Sphk sp = NULL; 238105464Sphk if (sp == NULL) { 239105464Sphk sp = g_bde_new_sector(wp, sc->sectorsize); 240105464Sphk if (sp != NULL) { 241114035Sphk g_bde_ncache++; 242114035Sphk sc->ncache++; 243105464Sphk TAILQ_INSERT_TAIL(&sc->freelist, sp, list); 244105464Sphk sp->malloc = 2; 245105464Sphk } 246105464Sphk } 247105464Sphk if (sp != NULL) { 248105464Sphk sp->offset = offset; 249105464Sphk sp->softc = wp->softc; 250105464Sphk sp->ref = 1; 251105464Sphk sp->owner = wp; 252105464Sphk sp->state = JUNK; 253105464Sphk sp->error = 0; 254105464Sphk } 255105464Sphk } 256105464Sphk if (sp != NULL) { 257105464Sphk TAILQ_REMOVE(&sc->freelist, sp, list); 258105464Sphk TAILQ_INSERT_TAIL(&sc->freelist, sp, list); 259114041Sphk sp->used = time_uptime; 260105464Sphk } 261105464Sphk wp->ksp = sp; 262105464Sphk return(sp); 263105464Sphk} 264105464Sphk 265105464Sphkstatic void 266114034Sphkg_bde_release_keysector(struct g_bde_work *wp) 267105464Sphk{ 268105464Sphk struct g_bde_softc *sc; 269105464Sphk struct g_bde_work *wp2; 270114034Sphk struct g_bde_sector *sp; 271105464Sphk 272114034Sphk sp = wp->ksp; 273114034Sphk g_trace(G_T_TOPOLOGY, "g_bde_release_keysector(%p)", sp); 274105464Sphk KASSERT(sp->malloc == 2, ("Wrong sector released")); 275105464Sphk sc = sp->softc; 276105464Sphk KASSERT(sc != NULL, ("NULL sp->softc")); 277105464Sphk KASSERT(wp == sp->owner, ("Releasing, not owner")); 278105464Sphk sp->owner = NULL; 279105464Sphk wp->ksp = NULL; 280105464Sphk sp->ref--; 281105464Sphk if (sp->ref > 0) { 282105464Sphk TAILQ_REMOVE(&sc->freelist, sp, list); 283105464Sphk TAILQ_INSERT_TAIL(&sc->freelist, sp, list); 284105464Sphk TAILQ_FOREACH(wp2, &sc->worklist, list) { 285105464Sphk if (wp2->ksp == sp) { 286105464Sphk KASSERT(wp2 != wp, ("Self-reowning")); 287105464Sphk sp->owner = wp2; 288105464Sphk wakeup(sp->softc); 289105464Sphk break; 290105464Sphk } 291105464Sphk } 292105464Sphk KASSERT(wp2 != NULL, ("Failed to pick up owner for %p\n", sp)); 293105464Sphk } else if (sp->error != 0) { 294105464Sphk sp->offset = ~0; 295105464Sphk sp->error = 0; 296105464Sphk sp->state = JUNK; 297105464Sphk } 298105464Sphk TAILQ_REMOVE(&sc->freelist, sp, list); 299105464Sphk TAILQ_INSERT_HEAD(&sc->freelist, sp, list); 300105464Sphk} 301105464Sphk 302105464Sphkstatic void 303105464Sphkg_bde_purge_sector(struct g_bde_softc *sc, int fraction) 304105464Sphk{ 305105464Sphk struct g_bde_sector *sp; 306105464Sphk int n; 307105464Sphk 308105464Sphk g_trace(G_T_TOPOLOGY, "g_bde_purge_sector(%p)", sc); 309106407Sphk if (fraction > 0) 310106407Sphk n = sc->ncache / fraction + 1; 311106407Sphk else 312106407Sphk n = g_bde_ncache - malloc_last_fail(); 313106407Sphk if (n < 0) 314106407Sphk return; 315106407Sphk if (n > sc->ncache) 316106407Sphk n = sc->ncache; 317105464Sphk while(n--) { 318105464Sphk TAILQ_FOREACH(sp, &sc->freelist, list) { 319105464Sphk if (sp->ref != 0) 320105464Sphk continue; 321105464Sphk TAILQ_REMOVE(&sc->freelist, sp, list); 322105464Sphk g_bde_ncache--; 323105464Sphk sc->ncache--; 324105464Sphk bzero(sp->data, sp->size); 325105464Sphk g_bde_delete_sector(sc, sp); 326105464Sphk break; 327105464Sphk } 328105464Sphk } 329105464Sphk} 330105464Sphk 331105464Sphkstatic struct g_bde_sector * 332114033Sphkg_bde_read_keysector(struct g_bde_softc *sc, struct g_bde_work *wp) 333105464Sphk{ 334105464Sphk struct g_bde_sector *sp; 335105464Sphk 336114033Sphk g_trace(G_T_TOPOLOGY, "g_bde_read_keysector(%p)", wp); 337114153Sphk sp = g_bde_get_keysector(wp); 338114152Sphk if (sp == NULL) { 339114152Sphk g_bde_purge_sector(sc, -1); 340114153Sphk sp = g_bde_get_keysector(wp); 341114152Sphk } 342114041Sphk if (sp == NULL) 343105464Sphk return (sp); 344105464Sphk if (sp->owner != wp) 345105464Sphk return (sp); 346105464Sphk if (sp->state == VALID) 347105464Sphk return (sp); 348105464Sphk if (g_bde_start_read(sp) == 0) 349105464Sphk return (sp); 350114034Sphk g_bde_release_keysector(wp); 351105464Sphk return (NULL); 352105464Sphk} 353105464Sphk 354105464Sphk/* 355105464Sphk * Contribute to the completion of the original bio request. 356105464Sphk * 357105464Sphk * We have no simple way to tell how many bits the original bio request has 358105464Sphk * been segmented into, so the easiest way to determine when we can deliver 359105464Sphk * it is to keep track of the number of bytes we have completed. We keep 360105464Sphk * track of any errors underway and latch onto the first one. 361105464Sphk * 362105464Sphk * We always report "nothing done" in case of error, because random bits here 363105464Sphk * and there may be completed and returning a number of completed bytes does 364105464Sphk * not convey any useful information about which bytes they were. If some 365105464Sphk * piece of broken code somewhere interprets this to mean that nothing has 366105464Sphk * changed on the underlying media they deserve the lossage headed for them. 367105464Sphk * 368105464Sphk * A single mutex per g_bde instance is used to prevent contention. 369105464Sphk */ 370105464Sphk 371105464Sphkstatic void 372105464Sphkg_bde_contribute(struct bio *bp, off_t bytes, int error) 373105464Sphk{ 374105464Sphk 375105464Sphk g_trace(G_T_TOPOLOGY, "g_bde_contribute bp %p bytes %jd error %d", 376105464Sphk bp, (intmax_t)bytes, error); 377105464Sphk if (bp->bio_error == 0) 378105464Sphk bp->bio_error = error; 379105464Sphk bp->bio_completed += bytes; 380105464Sphk KASSERT(bp->bio_completed <= bp->bio_length, ("Too large contribution")); 381105464Sphk if (bp->bio_completed == bp->bio_length) { 382105464Sphk if (bp->bio_error != 0) 383105464Sphk bp->bio_completed = 0; 384105464Sphk g_io_deliver(bp, bp->bio_error); 385105464Sphk } 386105464Sphk} 387105464Sphk 388105464Sphk/* 389125591Sphk * This is the common case "we're done with this work package" function 390125591Sphk */ 391125591Sphk 392125591Sphkstatic void 393125591Sphkg_bde_work_done(struct g_bde_work *wp, int error) 394125591Sphk{ 395125591Sphk 396125591Sphk g_bde_contribute(wp->bp, wp->length, error); 397125591Sphk if (wp->sp != NULL) 398125591Sphk g_bde_delete_sector(wp->softc, wp->sp); 399125591Sphk if (wp->ksp != NULL) 400125591Sphk g_bde_release_keysector(wp); 401125591Sphk g_bde_delete_work(wp); 402125591Sphk} 403125591Sphk 404125591Sphk/* 405105464Sphk * A write operation has finished. When we have all expected cows in the 406105464Sphk * barn close the door and call it a day. 407105464Sphk */ 408105464Sphk 409105464Sphkstatic void 410105464Sphkg_bde_write_done(struct bio *bp) 411105464Sphk{ 412105464Sphk struct g_bde_sector *sp; 413105464Sphk struct g_bde_work *wp; 414105464Sphk struct g_bde_softc *sc; 415105464Sphk 416105464Sphk sp = bp->bio_caller1; 417105464Sphk sc = bp->bio_caller2; 418105464Sphk mtx_lock(&sc->worklist_mutex); 419105464Sphk KASSERT(sp != NULL, ("NULL sp")); 420105464Sphk KASSERT(sc != NULL, ("NULL sc")); 421105464Sphk KASSERT(sp->owner != NULL, ("NULL sp->owner")); 422105464Sphk g_trace(G_T_TOPOLOGY, "g_bde_write_done(%p)", sp); 423114249Sphk if (bp->bio_error == 0 && bp->bio_completed != sp->size) 424114249Sphk bp->bio_error = EIO; 425105464Sphk sp->error = bp->bio_error; 426105464Sphk g_destroy_bio(bp); 427105464Sphk wp = sp->owner; 428105464Sphk if (wp->error == 0) 429105464Sphk wp->error = sp->error; 430105464Sphk 431105464Sphk if (wp->bp->bio_cmd == BIO_DELETE) { 432105464Sphk KASSERT(sp == wp->sp, ("trashed delete op")); 433125591Sphk g_bde_work_done(wp, wp->error); 434105464Sphk mtx_unlock(&sc->worklist_mutex); 435105464Sphk return; 436105464Sphk } 437105464Sphk 438105464Sphk KASSERT(wp->bp->bio_cmd == BIO_WRITE, ("Confused in g_bde_write_done()")); 439105464Sphk KASSERT(sp == wp->sp || sp == wp->ksp, ("trashed write op")); 440105464Sphk if (wp->sp == sp) { 441105464Sphk g_bde_delete_sector(sc, wp->sp); 442105464Sphk wp->sp = NULL; 443105464Sphk } else { 444105464Sphk sp->state = VALID; 445105464Sphk } 446125591Sphk if (wp->sp == NULL && wp->ksp != NULL && wp->ksp->state == VALID) 447125591Sphk g_bde_work_done(wp, wp->error); 448105464Sphk mtx_unlock(&sc->worklist_mutex); 449105464Sphk return; 450105464Sphk} 451105464Sphk 452105464Sphk/* 453105464Sphk * Send a write request for the given sector down the pipeline. 454105464Sphk */ 455105464Sphk 456105464Sphkstatic int 457105464Sphkg_bde_start_write(struct g_bde_sector *sp) 458105464Sphk{ 459105464Sphk struct bio *bp; 460105464Sphk struct g_bde_softc *sc; 461105464Sphk 462105464Sphk g_trace(G_T_TOPOLOGY, "g_bde_start_write(%p)", sp); 463105464Sphk sc = sp->softc; 464105464Sphk KASSERT(sc != NULL, ("NULL sc in g_bde_start_write")); 465105464Sphk KASSERT(sp->owner != NULL, ("NULL sp->owner in g_bde_start_write")); 466105464Sphk bp = g_new_bio(); 467105464Sphk if (bp == NULL) 468105464Sphk return (ENOMEM); 469105464Sphk bp->bio_cmd = BIO_WRITE; 470105464Sphk bp->bio_offset = sp->offset; 471105464Sphk bp->bio_data = sp->data; 472105464Sphk bp->bio_length = sp->size; 473105464Sphk bp->bio_done = g_bde_write_done; 474105464Sphk bp->bio_caller1 = sp; 475105464Sphk bp->bio_caller2 = sc; 476105464Sphk sp->state = IO; 477105464Sphk g_io_request(bp, sc->consumer); 478105464Sphk return(0); 479105464Sphk} 480105464Sphk 481105464Sphk/* 482105464Sphk * A read operation has finished. Mark the sector no longer iobusy and 483105464Sphk * wake up the worker thread and let it do its thing. 484105464Sphk */ 485105464Sphk 486105464Sphkstatic void 487105464Sphkg_bde_read_done(struct bio *bp) 488105464Sphk{ 489105464Sphk struct g_bde_sector *sp; 490105464Sphk struct g_bde_softc *sc; 491105464Sphk 492105464Sphk sp = bp->bio_caller1; 493105464Sphk g_trace(G_T_TOPOLOGY, "g_bde_read_done(%p)", sp); 494105464Sphk sc = bp->bio_caller2; 495105464Sphk mtx_lock(&sc->worklist_mutex); 496114250Sphk if (bp->bio_error == 0 && bp->bio_completed != sp->size) 497114249Sphk bp->bio_error = EIO; 498105464Sphk sp->error = bp->bio_error; 499114148Sphk if (sp->error == 0) 500114148Sphk sp->state = VALID; 501114148Sphk else 502114148Sphk sp->state = JUNK; 503105464Sphk wakeup(sc); 504105464Sphk g_destroy_bio(bp); 505105464Sphk mtx_unlock(&sc->worklist_mutex); 506105464Sphk} 507105464Sphk 508105464Sphk/* 509105464Sphk * Send a read request for the given sector down the pipeline. 510105464Sphk */ 511105464Sphk 512105464Sphkstatic int 513105464Sphkg_bde_start_read(struct g_bde_sector *sp) 514105464Sphk{ 515105464Sphk struct bio *bp; 516105464Sphk struct g_bde_softc *sc; 517105464Sphk 518105464Sphk g_trace(G_T_TOPOLOGY, "g_bde_start_read(%p)", sp); 519105464Sphk sc = sp->softc; 520105464Sphk KASSERT(sc != NULL, ("Null softc in sp %p", sp)); 521105464Sphk bp = g_new_bio(); 522105464Sphk if (bp == NULL) 523105464Sphk return (ENOMEM); 524105464Sphk bp->bio_cmd = BIO_READ; 525105464Sphk bp->bio_offset = sp->offset; 526105464Sphk bp->bio_data = sp->data; 527105464Sphk bp->bio_length = sp->size; 528105464Sphk bp->bio_done = g_bde_read_done; 529105464Sphk bp->bio_caller1 = sp; 530105464Sphk bp->bio_caller2 = sc; 531105464Sphk sp->state = IO; 532105464Sphk g_io_request(bp, sc->consumer); 533105464Sphk return(0); 534105464Sphk} 535105464Sphk 536105464Sphk/* 537105464Sphk * The worker thread. 538105464Sphk * 539105464Sphk * The up/down path of GEOM is not allowed to sleep or do any major work 540105464Sphk * so we use this thread to do the actual crypto operations and to push 541105464Sphk * the state engine onwards. 542105464Sphk * 543105464Sphk * XXX: if we switch to the src/sys/opencrypt hardware assisted encryption 544105464Sphk * XXX: using a thread here is probably not needed. 545105464Sphk */ 546105464Sphk 547105464Sphkvoid 548105464Sphkg_bde_worker(void *arg) 549105464Sphk{ 550105464Sphk struct g_bde_softc *sc; 551125591Sphk struct g_bde_work *wp, *twp; 552105464Sphk struct g_geom *gp; 553125591Sphk int restart, error; 554105464Sphk 555105464Sphk gp = arg; 556105464Sphk sc = gp->softc; 557105464Sphk 558105464Sphk mtx_lock(&sc->worklist_mutex); 559105464Sphk for (;;) { 560125591Sphk restart = 0; 561105464Sphk g_trace(G_T_TOPOLOGY, "g_bde_worker scan"); 562125591Sphk TAILQ_FOREACH_SAFE(wp, &sc->worklist, list, twp) { 563105464Sphk KASSERT(wp != NULL, ("NULL wp")); 564105464Sphk KASSERT(wp->softc != NULL, ("NULL wp->softc")); 565105464Sphk if (wp->state != WAIT) 566125591Sphk continue; /* Not interesting here */ 567105464Sphk 568105464Sphk KASSERT(wp->bp != NULL, ("NULL wp->bp")); 569105464Sphk KASSERT(wp->sp != NULL, ("NULL wp->sp")); 570105464Sphk 571105464Sphk if (wp->ksp != NULL) { 572105464Sphk if (wp->ksp->owner != wp) 573105464Sphk continue; 574105464Sphk if (wp->ksp->state == IO) 575105464Sphk continue; 576105464Sphk KASSERT(wp->ksp->state == VALID, 577125591Sphk ("Illegal sector state (%d)", 578125591Sphk wp->ksp->state)); 579105464Sphk } 580105464Sphk 581125591Sphk if (wp->bp->bio_cmd == BIO_READ && wp->sp->state == IO) 582105464Sphk continue; 583105464Sphk 584105464Sphk if (wp->ksp != NULL && wp->ksp->error != 0) { 585125591Sphk g_bde_work_done(wp, wp->ksp->error); 586125591Sphk continue; 587105464Sphk } 588105464Sphk switch(wp->bp->bio_cmd) { 589105464Sphk case BIO_READ: 590114040Sphk if (wp->ksp == NULL) { 591114040Sphk KASSERT(wp->error != 0, 592114040Sphk ("BIO_READ, no ksp and no error")); 593125591Sphk g_bde_work_done(wp, wp->error); 594125591Sphk break; 595105464Sphk } 596125591Sphk if (wp->sp->error != 0) { 597125591Sphk g_bde_work_done(wp, wp->sp->error); 598125591Sphk break; 599125591Sphk } 600125591Sphk mtx_unlock(&sc->worklist_mutex); 601125591Sphk g_bde_crypt_read(wp); 602125591Sphk mtx_lock(&sc->worklist_mutex); 603125591Sphk restart++; 604125591Sphk g_bde_work_done(wp, wp->sp->error); 605105464Sphk break; 606105464Sphk case BIO_WRITE: 607105464Sphk wp->state = FINISH; 608125591Sphk KASSERT(wp->sp->owner == wp, 609125591Sphk ("Write not owner sp")); 610125591Sphk KASSERT(wp->ksp->owner == wp, 611125591Sphk ("Write not owner ksp")); 612105464Sphk mtx_unlock(&sc->worklist_mutex); 613105464Sphk g_bde_crypt_write(wp); 614105464Sphk mtx_lock(&sc->worklist_mutex); 615125591Sphk restart++; 616114088Sphk error = g_bde_start_write(wp->sp); 617114088Sphk if (error) { 618125591Sphk g_bde_work_done(wp, error); 619114088Sphk break; 620114088Sphk } 621114088Sphk error = g_bde_start_write(wp->ksp); 622125591Sphk if (wp->error != 0) 623114088Sphk wp->error = error; 624105464Sphk break; 625105464Sphk case BIO_DELETE: 626105464Sphk wp->state = FINISH; 627105464Sphk mtx_unlock(&sc->worklist_mutex); 628105464Sphk g_bde_crypt_delete(wp); 629105464Sphk mtx_lock(&sc->worklist_mutex); 630125591Sphk restart++; 631105464Sphk g_bde_start_write(wp->sp); 632105464Sphk break; 633105464Sphk } 634125591Sphk if (restart) 635125591Sphk break; 636105464Sphk } 637125591Sphk if (!restart) { 638105464Sphk /* 639105464Sphk * We don't look for our death-warrant until we are 640105464Sphk * idle. Shouldn't make a difference in practice. 641105464Sphk */ 642105464Sphk if (sc->dead) 643105464Sphk break; 644105464Sphk g_trace(G_T_TOPOLOGY, "g_bde_worker sleep"); 645105464Sphk error = msleep(sc, &sc->worklist_mutex, 646125591Sphk PRIBIO, "-", hz); 647105464Sphk if (error == EWOULDBLOCK) { 648105464Sphk /* 649160964Syar * Lose our skey cache in an orderly fashion. 650105464Sphk * The exact rate can be tuned to be less 651105464Sphk * aggressive if this is desirable. 10% per 652105464Sphk * second means that the cache is gone in a 653105464Sphk * few minutes. 654105464Sphk */ 655105464Sphk g_bde_purge_sector(sc, 10); 656105464Sphk } 657105464Sphk } 658105464Sphk } 659105464Sphk g_trace(G_T_TOPOLOGY, "g_bde_worker die"); 660105464Sphk g_bde_purge_sector(sc, 1); 661105464Sphk KASSERT(sc->nwork == 0, ("Dead but %d work remaining", sc->nwork)); 662105464Sphk KASSERT(sc->ncache == 0, ("Dead but %d cache remaining", sc->ncache)); 663105464Sphk KASSERT(sc->nsect == 0, ("Dead but %d sect remaining", sc->nsect)); 664105464Sphk mtx_unlock(&sc->worklist_mutex); 665105464Sphk sc->dead = 2; 666105464Sphk wakeup(sc); 667172836Sjulian kproc_exit(0); 668105464Sphk} 669105464Sphk 670105464Sphk/* 671105464Sphk * g_bde_start1 has chopped the incoming request up so all the requests 672105464Sphk * we see here are inside a single zone. Map the data and key locations 673105464Sphk * grab the buffers we need and fire off the first volley of read requests. 674105464Sphk */ 675105464Sphk 676105464Sphkstatic void 677105464Sphkg_bde_start2(struct g_bde_work *wp) 678105464Sphk{ 679105464Sphk struct g_bde_softc *sc; 680105464Sphk 681105464Sphk KASSERT(wp != NULL, ("NULL wp in g_bde_start2")); 682108558Sphk KASSERT(wp->softc != NULL, ("NULL wp->softc")); 683105464Sphk g_trace(G_T_TOPOLOGY, "g_bde_start2(%p)", wp); 684105464Sphk sc = wp->softc; 685125591Sphk switch (wp->bp->bio_cmd) { 686125591Sphk case BIO_READ: 687105464Sphk wp->sp = g_bde_new_sector(wp, 0); 688105464Sphk if (wp->sp == NULL) { 689125591Sphk g_bde_work_done(wp, ENOMEM); 690105464Sphk return; 691105464Sphk } 692105464Sphk wp->sp->size = wp->length; 693105464Sphk wp->sp->data = wp->data; 694105464Sphk if (g_bde_start_read(wp->sp) != 0) { 695125591Sphk g_bde_work_done(wp, ENOMEM); 696105464Sphk return; 697105464Sphk } 698114033Sphk g_bde_read_keysector(sc, wp); 699105464Sphk if (wp->ksp == NULL) 700105464Sphk wp->error = ENOMEM; 701125591Sphk break; 702125591Sphk case BIO_DELETE: 703105464Sphk wp->sp = g_bde_new_sector(wp, wp->length); 704105464Sphk if (wp->sp == NULL) { 705125591Sphk g_bde_work_done(wp, ENOMEM); 706105464Sphk return; 707105464Sphk } 708125591Sphk break; 709125591Sphk case BIO_WRITE: 710105464Sphk wp->sp = g_bde_new_sector(wp, wp->length); 711105464Sphk if (wp->sp == NULL) { 712125591Sphk g_bde_work_done(wp, ENOMEM); 713105464Sphk return; 714105464Sphk } 715114033Sphk g_bde_read_keysector(sc, wp); 716105464Sphk if (wp->ksp == NULL) { 717125591Sphk g_bde_work_done(wp, ENOMEM); 718105464Sphk return; 719105464Sphk } 720125591Sphk break; 721125591Sphk default: 722105464Sphk KASSERT(0 == 1, 723105464Sphk ("Wrong bio_cmd %d in g_bde_start2", wp->bp->bio_cmd)); 724105464Sphk } 725105464Sphk 726105464Sphk wp->state = WAIT; 727105464Sphk wakeup(sc); 728105464Sphk} 729105464Sphk 730105464Sphk/* 731108558Sphk * Create a sequence of work structures, and have g_bde_map_sector() determine 732108558Sphk * how long they each can be. Feed them to g_bde_start2(). 733105464Sphk */ 734105464Sphk 735105464Sphkvoid 736105464Sphkg_bde_start1(struct bio *bp) 737105464Sphk{ 738105464Sphk struct g_bde_softc *sc; 739105464Sphk struct g_bde_work *wp; 740108558Sphk off_t done; 741105464Sphk 742105464Sphk sc = bp->bio_to->geom->softc; 743105464Sphk bp->bio_driver1 = sc; 744105464Sphk 745105464Sphk mtx_lock(&sc->worklist_mutex); 746108558Sphk for(done = 0; done < bp->bio_length; ) { 747105464Sphk wp = g_bde_new_work(sc); 748114038Sphk if (wp != NULL) { 749114038Sphk wp->bp = bp; 750114038Sphk wp->offset = bp->bio_offset + done; 751114038Sphk wp->data = bp->bio_data + done; 752114038Sphk wp->length = bp->bio_length - done; 753114038Sphk g_bde_map_sector(wp); 754114038Sphk done += wp->length; 755114038Sphk g_bde_start2(wp); 756105464Sphk } 757114038Sphk if (wp == NULL || bp->bio_error != 0) { 758114038Sphk g_bde_contribute(bp, bp->bio_length - done, ENOMEM); 759114038Sphk break; 760114038Sphk } 761105464Sphk } 762105464Sphk mtx_unlock(&sc->worklist_mutex); 763108052Sphk return; 764105464Sphk} 765