geom_io.c revision 118855
1241675Suqs/*- 2241675Suqs * Copyright (c) 2002 Poul-Henning Kamp 3241675Suqs * Copyright (c) 2002 Networks Associates Technology, Inc. 4241675Suqs * All rights reserved. 5241675Suqs * 6241675Suqs * This software was developed for the FreeBSD Project by Poul-Henning Kamp 7241675Suqs * and NAI Labs, the Security Research Division of Network Associates, Inc. 8241675Suqs * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the 9241675Suqs * DARPA CHATS research program. 10241675Suqs * 11241675Suqs * Redistribution and use in source and binary forms, with or without 12241675Suqs * modification, are permitted provided that the following conditions 13241675Suqs * are met: 14241675Suqs * 1. Redistributions of source code must retain the above copyright 15241675Suqs * notice, this list of conditions and the following disclaimer. 16241675Suqs * 2. Redistributions in binary form must reproduce the above copyright 17241675Suqs * notice, this list of conditions and the following disclaimer in the 18241675Suqs * documentation and/or other materials provided with the distribution. 19241675Suqs * 3. The names of the authors may not be used to endorse or promote 20241675Suqs * products derived from this software without specific prior written 21241675Suqs * permission. 22241675Suqs * 23241675Suqs * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 24241675Suqs * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25241675Suqs * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26241675Suqs * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 27241675Suqs * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28241675Suqs * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29241675Suqs * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30241675Suqs * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31241675Suqs * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32241675Suqs * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33241675Suqs * SUCH DAMAGE. 34241675Suqs */ 35241675Suqs 36241675Suqs#include <sys/cdefs.h> 37241675Suqs__FBSDID("$FreeBSD: head/sys/geom/geom_io.c 118855 2003-08-13 06:42:56Z phk $"); 38241675Suqs 39241675Suqs#include <sys/param.h> 40241675Suqs#include <sys/systm.h> 41241675Suqs#include <sys/kernel.h> 42241675Suqs#include <sys/malloc.h> 43241675Suqs#include <sys/bio.h> 44241675Suqs 45241675Suqs#include <sys/errno.h> 46241675Suqs#include <geom/geom.h> 47241675Suqs#include <geom/geom_int.h> 48241675Suqs#include <sys/devicestat.h> 49241675Suqs 50241675Suqs#include <vm/uma.h> 51241675Suqs 52241675Suqsstatic struct g_bioq g_bio_run_down; 53241675Suqsstatic struct g_bioq g_bio_run_up; 54241675Suqs 55241675Suqsstatic u_int pace; 56241675Suqsstatic uma_zone_t biozone; 57241675Suqs 58241675Suqs#include <machine/atomic.h> 59241675Suqs 60241675Suqsstatic void 61241675Suqsg_bioq_lock(struct g_bioq *bq) 62241675Suqs{ 63241675Suqs 64241675Suqs mtx_lock(&bq->bio_queue_lock); 65241675Suqs} 66241675Suqs 67241675Suqsstatic void 68241675Suqsg_bioq_unlock(struct g_bioq *bq) 69241675Suqs{ 70241675Suqs 71241675Suqs mtx_unlock(&bq->bio_queue_lock); 72241675Suqs} 73241675Suqs 74241675Suqs#if 0 75241675Suqsstatic void 76241675Suqsg_bioq_destroy(struct g_bioq *bq) 77241675Suqs{ 78241675Suqs 79241675Suqs mtx_destroy(&bq->bio_queue_lock); 80241675Suqs} 81241675Suqs#endif 82241675Suqs 83241675Suqsstatic void 84241675Suqsg_bioq_init(struct g_bioq *bq) 85241675Suqs{ 86241675Suqs 87241675Suqs TAILQ_INIT(&bq->bio_queue); 88241675Suqs mtx_init(&bq->bio_queue_lock, "bio queue", NULL, MTX_DEF); 89241675Suqs} 90241675Suqs 91241675Suqsstatic struct bio * 92241675Suqsg_bioq_first(struct g_bioq *bq) 93241675Suqs{ 94241675Suqs struct bio *bp; 95241675Suqs 96241675Suqs bp = TAILQ_FIRST(&bq->bio_queue); 97241675Suqs if (bp != NULL) { 98241675Suqs TAILQ_REMOVE(&bq->bio_queue, bp, bio_queue); 99241675Suqs bq->bio_queue_length--; 100241675Suqs } 101241675Suqs return (bp); 102241675Suqs} 103241675Suqs 104241675Suqsstatic void 105241675Suqsg_bioq_enqueue_tail(struct bio *bp, struct g_bioq *rq) 106241675Suqs{ 107241675Suqs 108241675Suqs g_bioq_lock(rq); 109241675Suqs TAILQ_INSERT_TAIL(&rq->bio_queue, bp, bio_queue); 110241675Suqs rq->bio_queue_length++; 111241675Suqs g_bioq_unlock(rq); 112241675Suqs} 113241675Suqs 114241675Suqsstruct bio * 115241675Suqsg_new_bio(void) 116241675Suqs{ 117241675Suqs struct bio *bp; 118241675Suqs 119241675Suqs bp = uma_zalloc(biozone, M_NOWAIT | M_ZERO); 120241675Suqs return (bp); 121241675Suqs} 122241675Suqs 123241675Suqsvoid 124241675Suqsg_destroy_bio(struct bio *bp) 125241675Suqs{ 126241675Suqs 127241675Suqs uma_zfree(biozone, bp); 128241675Suqs} 129241675Suqs 130241675Suqsstruct bio * 131241675Suqsg_clone_bio(struct bio *bp) 132241675Suqs{ 133241675Suqs struct bio *bp2; 134241675Suqs 135241675Suqs bp2 = uma_zalloc(biozone, M_NOWAIT | M_ZERO); 136241675Suqs if (bp2 != NULL) { 137241675Suqs bp2->bio_parent = bp; 138241675Suqs bp2->bio_cmd = bp->bio_cmd; 139241675Suqs bp2->bio_length = bp->bio_length; 140241675Suqs bp2->bio_offset = bp->bio_offset; 141241675Suqs bp2->bio_data = bp->bio_data; 142241675Suqs bp2->bio_attribute = bp->bio_attribute; 143241675Suqs bp->bio_children++; 144241675Suqs } 145241675Suqs return(bp2); 146241675Suqs} 147241675Suqs 148241675Suqsvoid 149241675Suqsg_io_init() 150241675Suqs{ 151241675Suqs 152241675Suqs g_bioq_init(&g_bio_run_down); 153241675Suqs g_bioq_init(&g_bio_run_up); 154241675Suqs biozone = uma_zcreate("g_bio", sizeof (struct bio), 155241675Suqs NULL, NULL, 156241675Suqs NULL, NULL, 157241675Suqs 0, 0); 158241675Suqs} 159241675Suqs 160241675Suqsint 161241675Suqsg_io_getattr(const char *attr, struct g_consumer *cp, int *len, void *ptr) 162241675Suqs{ 163241675Suqs struct bio *bp; 164241675Suqs int error; 165241675Suqs 166241675Suqs g_trace(G_T_BIO, "bio_getattr(%s)", attr); 167241675Suqs bp = g_new_bio(); 168241675Suqs bp->bio_cmd = BIO_GETATTR; 169241675Suqs bp->bio_done = NULL; 170241675Suqs bp->bio_attribute = attr; 171241675Suqs bp->bio_length = *len; 172241675Suqs bp->bio_data = ptr; 173241675Suqs g_io_request(bp, cp); 174241675Suqs error = biowait(bp, "ggetattr"); 175241675Suqs *len = bp->bio_completed; 176241675Suqs g_destroy_bio(bp); 177241675Suqs return (error); 178241675Suqs} 179241675Suqs 180241675Suqsstatic int 181241675Suqsg_io_check(struct bio *bp) 182241675Suqs{ 183241675Suqs struct g_consumer *cp; 184241675Suqs struct g_provider *pp; 185241675Suqs 186241675Suqs cp = bp->bio_from; 187241675Suqs pp = bp->bio_to; 188241675Suqs 189241675Suqs /* Fail if access counters dont allow the operation */ 190241675Suqs switch(bp->bio_cmd) { 191241675Suqs case BIO_READ: 192241675Suqs case BIO_GETATTR: 193241675Suqs if (cp->acr == 0) 194241675Suqs return (EPERM); 195241675Suqs break; 196241675Suqs case BIO_WRITE: 197241675Suqs case BIO_DELETE: 198241675Suqs if (cp->acw == 0) 199241675Suqs return (EPERM); 200241675Suqs break; 201241675Suqs default: 202241675Suqs return (EPERM); 203241675Suqs } 204241675Suqs /* if provider is marked for error, don't disturb. */ 205241675Suqs if (pp->error) 206241675Suqs return (pp->error); 207241675Suqs 208241675Suqs switch(bp->bio_cmd) { 209241675Suqs case BIO_READ: 210241675Suqs case BIO_WRITE: 211241675Suqs case BIO_DELETE: 212241675Suqs /* Noisily reject zero size sectors */ 213241675Suqs if (pp->sectorsize == 0) { 214241675Suqs printf("GEOM provider %s has zero sectorsize\n", 215241675Suqs pp->name); 216241675Suqs return (EDOOFUS); 217241675Suqs } 218241675Suqs /* Reject I/O not on sector boundary */ 219241675Suqs if (bp->bio_offset % pp->sectorsize) 220241675Suqs return (EINVAL); 221241675Suqs /* Reject I/O not integral sector long */ 222241675Suqs if (bp->bio_length % pp->sectorsize) 223241675Suqs return (EINVAL); 224241675Suqs /* Reject requests past the end of media. */ 225241675Suqs if (bp->bio_offset > pp->mediasize) 226241675Suqs return (EIO); 227241675Suqs break; 228241675Suqs default: 229241675Suqs break; 230241675Suqs } 231241675Suqs return (0); 232241675Suqs} 233241675Suqs 234241675Suqsvoid 235241675Suqsg_io_request(struct bio *bp, struct g_consumer *cp) 236241675Suqs{ 237241675Suqs struct g_provider *pp; 238241675Suqs 239241675Suqs pp = cp->provider; 240241675Suqs KASSERT(cp != NULL, ("NULL cp in g_io_request")); 241241675Suqs KASSERT(bp != NULL, ("NULL bp in g_io_request")); 242241675Suqs KASSERT(bp->bio_data != NULL, ("NULL bp->data in g_io_request")); 243241675Suqs KASSERT(pp != NULL, ("consumer not attached in g_io_request")); 244241675Suqs 245241675Suqs bp->bio_from = cp; 246241675Suqs bp->bio_to = pp; 247241675Suqs bp->bio_error = 0; 248241675Suqs bp->bio_completed = 0; 249241675Suqs 250241675Suqs if (g_collectstats) { 251241675Suqs devstat_start_transaction_bio(cp->stat, bp); 252241675Suqs devstat_start_transaction_bio(pp->stat, bp); 253241675Suqs } 254241675Suqs cp->nstart++; 255241675Suqs pp->nstart++; 256241675Suqs 257241675Suqs /* Pass it on down. */ 258241675Suqs g_trace(G_T_BIO, "bio_request(%p) from %p(%s) to %p(%s) cmd %d", 259241675Suqs bp, cp, cp->geom->name, pp, pp->name, bp->bio_cmd); 260241675Suqs g_bioq_enqueue_tail(bp, &g_bio_run_down); 261241675Suqs wakeup(&g_wait_down); 262241675Suqs} 263241675Suqs 264241675Suqsvoid 265241675Suqsg_io_deliver(struct bio *bp, int error) 266241675Suqs{ 267241675Suqs struct g_consumer *cp; 268241675Suqs struct g_provider *pp; 269241675Suqs 270241675Suqs cp = bp->bio_from; 271241675Suqs pp = bp->bio_to; 272241675Suqs KASSERT(bp != NULL, ("NULL bp in g_io_deliver")); 273241675Suqs KASSERT(cp != NULL, ("NULL bio_from in g_io_deliver")); 274241675Suqs KASSERT(cp->geom != NULL, ("NULL bio_from->geom in g_io_deliver")); 275241675Suqs KASSERT(pp != NULL, ("NULL bio_to in g_io_deliver")); 276241675Suqs 277241675Suqs g_trace(G_T_BIO, 278241675Suqs"g_io_deliver(%p) from %p(%s) to %p(%s) cmd %d error %d off %jd len %jd", 279241675Suqs bp, cp, cp->geom->name, pp, pp->name, bp->bio_cmd, error, 280241675Suqs (intmax_t)bp->bio_offset, (intmax_t)bp->bio_length); 281241675Suqs 282241675Suqs bp->bio_bcount = bp->bio_length; 283241675Suqs if (g_collectstats) { 284241675Suqs bp->bio_resid = bp->bio_bcount - bp->bio_completed; 285241675Suqs devstat_end_transaction_bio(cp->stat, bp); 286241675Suqs devstat_end_transaction_bio(pp->stat, bp); 287241675Suqs } 288241675Suqs cp->nend++; 289241675Suqs pp->nend++; 290241675Suqs 291241675Suqs if (error == ENOMEM) { 292241675Suqs if (bootverbose) 293241675Suqs printf("ENOMEM %p on %p(%s)\n", bp, pp, pp->name); 294241675Suqs g_io_request(bp, cp); 295241675Suqs pace++; 296241675Suqs return; 297241675Suqs } 298241675Suqs bp->bio_error = error; 299241675Suqs g_bioq_enqueue_tail(bp, &g_bio_run_up); 300241675Suqs wakeup(&g_wait_up); 301241675Suqs} 302241675Suqs 303241675Suqsvoid 304241675Suqsg_io_schedule_down(struct thread *tp __unused) 305241675Suqs{ 306241675Suqs struct bio *bp; 307241675Suqs off_t excess; 308241675Suqs int error; 309241675Suqs struct mtx mymutex; 310241675Suqs 311241675Suqs bzero(&mymutex, sizeof mymutex); 312241675Suqs mtx_init(&mymutex, "g_xdown", MTX_DEF, 0); 313241675Suqs 314241675Suqs for(;;) { 315241675Suqs g_bioq_lock(&g_bio_run_down); 316241675Suqs bp = g_bioq_first(&g_bio_run_down); 317241675Suqs if (bp == NULL) { 318241675Suqs msleep(&g_wait_down, &g_bio_run_down.bio_queue_lock, 319241675Suqs PRIBIO | PDROP, "-", hz/10); 320241675Suqs continue; 321241675Suqs } 322241675Suqs g_bioq_unlock(&g_bio_run_down); 323241675Suqs if (pace > 0) { 324241675Suqs msleep(&error, NULL, PRIBIO, "g_down", hz/10); 325241675Suqs pace--; 326241675Suqs } 327241675Suqs error = g_io_check(bp); 328241675Suqs if (error) { 329241675Suqs g_io_deliver(bp, error); 330241675Suqs continue; 331241675Suqs } 332241675Suqs switch (bp->bio_cmd) { 333241675Suqs case BIO_READ: 334241675Suqs case BIO_WRITE: 335241675Suqs case BIO_DELETE: 336241675Suqs /* Truncate requests to the end of providers media. */ 337241675Suqs excess = bp->bio_offset + bp->bio_length; 338241675Suqs if (excess > bp->bio_to->mediasize) { 339241675Suqs excess -= bp->bio_to->mediasize; 340241675Suqs bp->bio_length -= excess; 341241675Suqs } 342241675Suqs /* Deliver zero length transfers right here. */ 343241675Suqs if (bp->bio_length == 0) { 344241675Suqs g_io_deliver(bp, 0); 345241675Suqs continue; 346241675Suqs } 347241675Suqs break; 348241675Suqs default: 349241675Suqs break; 350241675Suqs } 351241675Suqs mtx_lock(&mymutex); 352241675Suqs bp->bio_to->geom->start(bp); 353241675Suqs mtx_unlock(&mymutex); 354241675Suqs } 355241675Suqs} 356241675Suqs 357241675Suqsvoid 358241675Suqsg_io_schedule_up(struct thread *tp __unused) 359241675Suqs{ 360241675Suqs struct bio *bp; 361241675Suqs struct mtx mymutex; 362241675Suqs 363241675Suqs bzero(&mymutex, sizeof mymutex); 364241675Suqs mtx_init(&mymutex, "g_xup", MTX_DEF, 0); 365241675Suqs for(;;) { 366241675Suqs g_bioq_lock(&g_bio_run_up); 367241675Suqs bp = g_bioq_first(&g_bio_run_up); 368241675Suqs if (bp != NULL) { 369241675Suqs g_bioq_unlock(&g_bio_run_up); 370241675Suqs mtx_lock(&mymutex); 371241675Suqs biodone(bp); 372241675Suqs mtx_unlock(&mymutex); 373241675Suqs continue; 374241675Suqs } 375241675Suqs msleep(&g_wait_up, &g_bio_run_up.bio_queue_lock, 376241675Suqs PRIBIO | PDROP, "-", hz/10); 377241675Suqs } 378241675Suqs} 379241675Suqs 380241675Suqsvoid * 381241675Suqsg_read_data(struct g_consumer *cp, off_t offset, off_t length, int *error) 382241675Suqs{ 383241675Suqs struct bio *bp; 384241675Suqs void *ptr; 385241675Suqs int errorc; 386241675Suqs 387241675Suqs bp = g_new_bio(); 388241675Suqs bp->bio_cmd = BIO_READ; 389241675Suqs bp->bio_done = NULL; 390241675Suqs bp->bio_offset = offset; 391241675Suqs bp->bio_length = length; 392241675Suqs ptr = g_malloc(length, M_WAITOK); 393241675Suqs bp->bio_data = ptr; 394241675Suqs g_io_request(bp, cp); 395241675Suqs errorc = biowait(bp, "gread"); 396241675Suqs if (error != NULL) 397241675Suqs *error = errorc; 398241675Suqs g_destroy_bio(bp); 399241675Suqs if (errorc) { 400241675Suqs g_free(ptr); 401241675Suqs ptr = NULL; 402241675Suqs } 403241675Suqs return (ptr); 404241675Suqs} 405241675Suqs 406241675Suqsint 407241675Suqsg_write_data(struct g_consumer *cp, off_t offset, void *ptr, off_t length) 408241675Suqs{ 409241675Suqs struct bio *bp; 410241675Suqs int error; 411241675Suqs 412241675Suqs bp = g_new_bio(); 413241675Suqs bp->bio_cmd = BIO_WRITE; 414241675Suqs bp->bio_done = NULL; 415241675Suqs bp->bio_offset = offset; 416241675Suqs bp->bio_length = length; 417241675Suqs bp->bio_data = ptr; 418241675Suqs g_io_request(bp, cp); 419241675Suqs error = biowait(bp, "gwrite"); 420241675Suqs g_destroy_bio(bp); 421241675Suqs return (error); 422241675Suqs} 423241675Suqs