geom_io.c revision 148410
1/*- 2 * Copyright (c) 2002 Poul-Henning Kamp 3 * Copyright (c) 2002 Networks Associates Technology, Inc. 4 * All rights reserved. 5 * 6 * This software was developed for the FreeBSD Project by Poul-Henning Kamp 7 * and NAI Labs, the Security Research Division of Network Associates, Inc. 8 * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the 9 * DARPA CHATS research program. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. The names of the authors may not be used to endorse or promote 20 * products derived from this software without specific prior written 21 * permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36#include <sys/cdefs.h> 37__FBSDID("$FreeBSD: head/sys/geom/geom_io.c 148410 2005-07-25 21:12:54Z phk $"); 38 39#include <sys/param.h> 40#include <sys/systm.h> 41#include <sys/kernel.h> 42#include <sys/malloc.h> 43#include <sys/bio.h> 44#include <sys/ktr.h> 45 46#include <sys/errno.h> 47#include <geom/geom.h> 48#include <geom/geom_int.h> 49#include <sys/devicestat.h> 50 51#include <vm/uma.h> 52 53static struct g_bioq g_bio_run_down; 54static struct g_bioq g_bio_run_up; 55static struct g_bioq g_bio_run_task; 56 57static u_int pace; 58static uma_zone_t biozone; 59 60#include <machine/atomic.h> 61 62static void 63g_bioq_lock(struct g_bioq *bq) 64{ 65 66 mtx_lock(&bq->bio_queue_lock); 67} 68 69static void 70g_bioq_unlock(struct g_bioq *bq) 71{ 72 73 mtx_unlock(&bq->bio_queue_lock); 74} 75 76#if 0 77static void 78g_bioq_destroy(struct g_bioq *bq) 79{ 80 81 mtx_destroy(&bq->bio_queue_lock); 82} 83#endif 84 85static void 86g_bioq_init(struct g_bioq *bq) 87{ 88 89 TAILQ_INIT(&bq->bio_queue); 90 mtx_init(&bq->bio_queue_lock, "bio queue", NULL, MTX_DEF); 91} 92 93static struct bio * 94g_bioq_first(struct g_bioq *bq) 95{ 96 struct bio *bp; 97 98 bp = TAILQ_FIRST(&bq->bio_queue); 99 if (bp != NULL) { 100 KASSERT((bp->bio_flags & BIO_ONQUEUE), 101 ("Bio not on queue bp=%p target %p", bp, bq)); 102 bp->bio_flags &= ~BIO_ONQUEUE; 103 TAILQ_REMOVE(&bq->bio_queue, bp, bio_queue); 104 bq->bio_queue_length--; 105 } 106 return (bp); 107} 108 109struct bio * 110g_new_bio(void) 111{ 112 struct bio *bp; 113 114 bp = uma_zalloc(biozone, M_NOWAIT | M_ZERO); 115 return (bp); 116} 117 118struct bio * 119g_alloc_bio(void) 120{ 121 struct bio *bp; 122 123 bp = uma_zalloc(biozone, M_WAITOK | M_ZERO); 124 return (bp); 125} 126 127void 128g_destroy_bio(struct bio *bp) 129{ 130 131 uma_zfree(biozone, bp); 132} 133 134struct bio * 135g_clone_bio(struct bio *bp) 136{ 137 struct bio *bp2; 138 139 bp2 = uma_zalloc(biozone, M_NOWAIT | M_ZERO); 140 if (bp2 != NULL) { 141 bp2->bio_parent = bp; 142 bp2->bio_cmd = bp->bio_cmd; 143 bp2->bio_length = bp->bio_length; 144 bp2->bio_offset = bp->bio_offset; 145 bp2->bio_data = bp->bio_data; 146 bp2->bio_attribute = bp->bio_attribute; 147 bp->bio_children++; 148 } 149 return(bp2); 150} 151 152void 153g_io_init() 154{ 155 156 g_bioq_init(&g_bio_run_down); 157 g_bioq_init(&g_bio_run_up); 158 g_bioq_init(&g_bio_run_task); 159 biozone = uma_zcreate("g_bio", sizeof (struct bio), 160 NULL, NULL, 161 NULL, NULL, 162 0, 0); 163} 164 165int 166g_io_getattr(const char *attr, struct g_consumer *cp, int *len, void *ptr) 167{ 168 struct bio *bp; 169 int error; 170 171 g_trace(G_T_BIO, "bio_getattr(%s)", attr); 172 bp = g_alloc_bio(); 173 bp->bio_cmd = BIO_GETATTR; 174 bp->bio_done = NULL; 175 bp->bio_attribute = attr; 176 bp->bio_length = *len; 177 bp->bio_data = ptr; 178 g_io_request(bp, cp); 179 error = biowait(bp, "ggetattr"); 180 *len = bp->bio_completed; 181 g_destroy_bio(bp); 182 return (error); 183} 184 185static int 186g_io_check(struct bio *bp) 187{ 188 struct g_consumer *cp; 189 struct g_provider *pp; 190 191 cp = bp->bio_from; 192 pp = bp->bio_to; 193 194 /* Fail if access counters dont allow the operation */ 195 switch(bp->bio_cmd) { 196 case BIO_READ: 197 case BIO_GETATTR: 198 if (cp->acr == 0) 199 return (EPERM); 200 break; 201 case BIO_WRITE: 202 case BIO_DELETE: 203 if (cp->acw == 0) 204 return (EPERM); 205 break; 206 default: 207 return (EPERM); 208 } 209 /* if provider is marked for error, don't disturb. */ 210 if (pp->error) 211 return (pp->error); 212 213 switch(bp->bio_cmd) { 214 case BIO_READ: 215 case BIO_WRITE: 216 case BIO_DELETE: 217 /* Zero sectorsize is a probably lack of media */ 218 if (pp->sectorsize == 0) 219 return (ENXIO); 220 /* Reject I/O not on sector boundary */ 221 if (bp->bio_offset % pp->sectorsize) 222 return (EINVAL); 223 /* Reject I/O not integral sector long */ 224 if (bp->bio_length % pp->sectorsize) 225 return (EINVAL); 226 /* Reject requests before or past the end of media. */ 227 if (bp->bio_offset < 0) 228 return (EIO); 229 if (bp->bio_offset > pp->mediasize) 230 return (EIO); 231 break; 232 default: 233 break; 234 } 235 return (0); 236} 237 238void 239g_io_request(struct bio *bp, struct g_consumer *cp) 240{ 241 struct g_provider *pp; 242 243 KASSERT(cp != NULL, ("NULL cp in g_io_request")); 244 KASSERT(bp != NULL, ("NULL bp in g_io_request")); 245 KASSERT(bp->bio_data != NULL, ("NULL bp->data in g_io_request")); 246 pp = cp->provider; 247 KASSERT(pp != NULL, ("consumer not attached in g_io_request")); 248 249 if (bp->bio_cmd & (BIO_READ|BIO_WRITE|BIO_DELETE)) { 250 KASSERT(bp->bio_offset % cp->provider->sectorsize == 0, 251 ("wrong offset %jd for sectorsize %u", 252 bp->bio_offset, cp->provider->sectorsize)); 253 KASSERT(bp->bio_length % cp->provider->sectorsize == 0, 254 ("wrong length %jd for sectorsize %u", 255 bp->bio_length, cp->provider->sectorsize)); 256 } 257 258 g_trace(G_T_BIO, "bio_request(%p) from %p(%s) to %p(%s) cmd %d", 259 bp, cp, cp->geom->name, pp, pp->name, bp->bio_cmd); 260 261 bp->bio_from = cp; 262 bp->bio_to = pp; 263 bp->bio_error = 0; 264 bp->bio_completed = 0; 265 266 KASSERT(!(bp->bio_flags & BIO_ONQUEUE), 267 ("Bio already on queue bp=%p", bp)); 268 bp->bio_flags |= BIO_ONQUEUE; 269 270 binuptime(&bp->bio_t0); 271 272 /* 273 * The statistics collection is lockless, as such, but we 274 * can not update one instance of the statistics from more 275 * than one thread at a time, so grab the lock first. 276 */ 277 g_bioq_lock(&g_bio_run_down); 278 if (g_collectstats & 1) 279 devstat_start_transaction(pp->stat, &bp->bio_t0); 280 if (g_collectstats & 2) 281 devstat_start_transaction(cp->stat, &bp->bio_t0); 282 283 pp->nstart++; 284 cp->nstart++; 285 TAILQ_INSERT_TAIL(&g_bio_run_down.bio_queue, bp, bio_queue); 286 g_bio_run_down.bio_queue_length++; 287 g_bioq_unlock(&g_bio_run_down); 288 289 /* Pass it on down. */ 290 wakeup(&g_wait_down); 291} 292 293void 294g_io_deliver(struct bio *bp, int error) 295{ 296 struct g_consumer *cp; 297 struct g_provider *pp; 298 299 KASSERT(bp != NULL, ("NULL bp in g_io_deliver")); 300 pp = bp->bio_to; 301 KASSERT(pp != NULL, ("NULL bio_to in g_io_deliver")); 302 cp = bp->bio_from; 303 if (cp == NULL) { 304 bp->bio_error = error; 305 bp->bio_done(bp); 306 return; 307 } 308 KASSERT(cp != NULL, ("NULL bio_from in g_io_deliver")); 309 KASSERT(cp->geom != NULL, ("NULL bio_from->geom in g_io_deliver")); 310 KASSERT(bp->bio_completed >= 0, ("bio_completed can't be less than 0")); 311 KASSERT(bp->bio_completed <= bp->bio_length, 312 ("bio_completed can't be greater than bio_length")); 313 314 g_trace(G_T_BIO, 315"g_io_deliver(%p) from %p(%s) to %p(%s) cmd %d error %d off %jd len %jd", 316 bp, cp, cp->geom->name, pp, pp->name, bp->bio_cmd, error, 317 (intmax_t)bp->bio_offset, (intmax_t)bp->bio_length); 318 319 KASSERT(!(bp->bio_flags & BIO_ONQUEUE), 320 ("Bio already on queue bp=%p", bp)); 321 322 /* 323 * XXX: next two doesn't belong here 324 */ 325 bp->bio_bcount = bp->bio_length; 326 bp->bio_resid = bp->bio_bcount - bp->bio_completed; 327 328 /* 329 * The statistics collection is lockless, as such, but we 330 * can not update one instance of the statistics from more 331 * than one thread at a time, so grab the lock first. 332 */ 333 g_bioq_lock(&g_bio_run_up); 334 if (g_collectstats & 1) 335 devstat_end_transaction_bio(pp->stat, bp); 336 if (g_collectstats & 2) 337 devstat_end_transaction_bio(cp->stat, bp); 338 339 cp->nend++; 340 pp->nend++; 341 if (error != ENOMEM) { 342 bp->bio_error = error; 343 TAILQ_INSERT_TAIL(&g_bio_run_up.bio_queue, bp, bio_queue); 344 bp->bio_flags |= BIO_ONQUEUE; 345 g_bio_run_up.bio_queue_length++; 346 g_bioq_unlock(&g_bio_run_up); 347 wakeup(&g_wait_up); 348 return; 349 } 350 g_bioq_unlock(&g_bio_run_up); 351 352 if (bootverbose) 353 printf("ENOMEM %p on %p(%s)\n", bp, pp, pp->name); 354 bp->bio_children = 0; 355 bp->bio_inbed = 0; 356 g_io_request(bp, cp); 357 pace++; 358 return; 359} 360 361void 362g_io_schedule_down(struct thread *tp __unused) 363{ 364 struct bio *bp; 365 off_t excess; 366 int error; 367#ifdef WITNESS 368 struct mtx mymutex; 369 370 bzero(&mymutex, sizeof mymutex); 371 mtx_init(&mymutex, "g_xdown", NULL, MTX_DEF); 372#endif 373 374 for(;;) { 375 g_bioq_lock(&g_bio_run_down); 376 bp = g_bioq_first(&g_bio_run_down); 377 if (bp == NULL) { 378 CTR0(KTR_GEOM, "g_down going to sleep"); 379 msleep(&g_wait_down, &g_bio_run_down.bio_queue_lock, 380 PRIBIO | PDROP, "-", hz/10); 381 continue; 382 } 383 CTR0(KTR_GEOM, "g_down has work to do"); 384 g_bioq_unlock(&g_bio_run_down); 385 if (pace > 0) { 386 CTR1(KTR_GEOM, "g_down pacing self (pace %d)", pace); 387 msleep(&error, NULL, PRIBIO, "g_down", hz/10); 388 pace--; 389 } 390 error = g_io_check(bp); 391 if (error) { 392 CTR3(KTR_GEOM, "g_down g_io_check on bp %p provider " 393 "%s returned %d", bp, bp->bio_to->name, error); 394 g_io_deliver(bp, error); 395 continue; 396 } 397 CTR2(KTR_GEOM, "g_down processing bp %p provider %s", bp, 398 bp->bio_to->name); 399 switch (bp->bio_cmd) { 400 case BIO_READ: 401 case BIO_WRITE: 402 case BIO_DELETE: 403 /* Truncate requests to the end of providers media. */ 404 /* 405 * XXX: What if we truncate because of offset being 406 * bad, not length? 407 */ 408 excess = bp->bio_offset + bp->bio_length; 409 if (excess > bp->bio_to->mediasize) { 410 excess -= bp->bio_to->mediasize; 411 bp->bio_length -= excess; 412 if (excess > 0) 413 CTR3(KTR_GEOM, "g_down truncated bio " 414 "%p provider %s by %d", bp, 415 bp->bio_to->name, excess); 416 } 417 /* Deliver zero length transfers right here. */ 418 if (bp->bio_length == 0) { 419 g_io_deliver(bp, 0); 420 CTR2(KTR_GEOM, "g_down terminated 0-length " 421 "bp %p provider %s", bp, bp->bio_to->name); 422 continue; 423 } 424 break; 425 default: 426 break; 427 } 428#ifdef WITNESS 429 mtx_lock(&mymutex); 430#endif 431 CTR4(KTR_GEOM, "g_down starting bp %p provider %s off %ld " 432 "len %ld", bp, bp->bio_to->name, bp->bio_offset, 433 bp->bio_length); 434 bp->bio_to->geom->start(bp); 435#ifdef WITNESS 436 mtx_unlock(&mymutex); 437#endif 438 } 439} 440 441void 442bio_taskqueue(struct bio *bp, bio_task_t *func, void *arg) 443{ 444 bp->bio_task = func; 445 bp->bio_task_arg = arg; 446 /* 447 * The taskqueue is actually just a second queue off the "up" 448 * queue, so we use the same lock. 449 */ 450 g_bioq_lock(&g_bio_run_up); 451 KASSERT(!(bp->bio_flags & BIO_ONQUEUE), 452 ("Bio already on queue bp=%p target taskq", bp)); 453 bp->bio_flags |= BIO_ONQUEUE; 454 TAILQ_INSERT_TAIL(&g_bio_run_task.bio_queue, bp, bio_queue); 455 g_bio_run_task.bio_queue_length++; 456 wakeup(&g_wait_up); 457 g_bioq_unlock(&g_bio_run_up); 458} 459 460 461void 462g_io_schedule_up(struct thread *tp __unused) 463{ 464 struct bio *bp; 465#ifdef WITNESS 466 struct mtx mymutex; 467 468 bzero(&mymutex, sizeof mymutex); 469 mtx_init(&mymutex, "g_xup", NULL, MTX_DEF); 470#endif 471 for(;;) { 472 g_bioq_lock(&g_bio_run_up); 473 bp = g_bioq_first(&g_bio_run_task); 474 if (bp != NULL) { 475 g_bioq_unlock(&g_bio_run_up); 476#ifdef WITNESS 477 mtx_lock(&mymutex); 478#endif 479 CTR1(KTR_GEOM, "g_up processing task bp %p", bp); 480 bp->bio_task(bp->bio_task_arg); 481#ifdef WITNESS 482 mtx_unlock(&mymutex); 483#endif 484 continue; 485 } 486 bp = g_bioq_first(&g_bio_run_up); 487 if (bp != NULL) { 488 g_bioq_unlock(&g_bio_run_up); 489#ifdef WITNESS 490 mtx_lock(&mymutex); 491#endif 492 CTR4(KTR_GEOM, "g_up biodone bp %p provider %s off " 493 "%ld len %ld", bp, bp->bio_to->name, 494 bp->bio_offset, bp->bio_length); 495 biodone(bp); 496#ifdef WITNESS 497 mtx_unlock(&mymutex); 498#endif 499 continue; 500 } 501 CTR0(KTR_GEOM, "g_up going to sleep"); 502 msleep(&g_wait_up, &g_bio_run_up.bio_queue_lock, 503 PRIBIO | PDROP, "-", hz/10); 504 } 505} 506 507void * 508g_read_data(struct g_consumer *cp, off_t offset, off_t length, int *error) 509{ 510 struct bio *bp; 511 void *ptr; 512 int errorc; 513 514 KASSERT(length > 0 && length >= cp->provider->sectorsize && 515 length <= MAXPHYS, ("g_read_data(): invalid length %jd", 516 (intmax_t)length)); 517 518 bp = g_alloc_bio(); 519 bp->bio_cmd = BIO_READ; 520 bp->bio_done = NULL; 521 bp->bio_offset = offset; 522 bp->bio_length = length; 523 ptr = g_malloc(length, M_WAITOK); 524 bp->bio_data = ptr; 525 g_io_request(bp, cp); 526 errorc = biowait(bp, "gread"); 527 if (error != NULL) 528 *error = errorc; 529 g_destroy_bio(bp); 530 if (errorc) { 531 g_free(ptr); 532 ptr = NULL; 533 } 534 return (ptr); 535} 536 537int 538g_write_data(struct g_consumer *cp, off_t offset, void *ptr, off_t length) 539{ 540 struct bio *bp; 541 int error; 542 543 KASSERT(length > 0 && length >= cp->provider->sectorsize && 544 length <= MAXPHYS, ("g_write_data(): invalid length %jd", 545 (intmax_t)length)); 546 547 bp = g_alloc_bio(); 548 bp->bio_cmd = BIO_WRITE; 549 bp->bio_done = NULL; 550 bp->bio_offset = offset; 551 bp->bio_length = length; 552 bp->bio_data = ptr; 553 g_io_request(bp, cp); 554 error = biowait(bp, "gwrite"); 555 g_destroy_bio(bp); 556 return (error); 557} 558 559void 560g_print_bio(struct bio *bp) 561{ 562 const char *pname, *cmd = NULL; 563 564 if (bp->bio_to != NULL) 565 pname = bp->bio_to->name; 566 else 567 pname = "[unknown]"; 568 569 switch (bp->bio_cmd) { 570 case BIO_GETATTR: 571 cmd = "GETATTR"; 572 printf("%s[%s(attr=%s)]", pname, cmd, bp->bio_attribute); 573 return; 574 case BIO_READ: 575 cmd = "READ"; 576 case BIO_WRITE: 577 if (cmd == NULL) 578 cmd = "WRITE"; 579 case BIO_DELETE: 580 if (cmd == NULL) 581 cmd = "DELETE"; 582 printf("%s[%s(offset=%jd, length=%jd)]", pname, cmd, 583 (intmax_t)bp->bio_offset, (intmax_t)bp->bio_length); 584 return; 585 default: 586 cmd = "UNKNOWN"; 587 printf("%s[%s()]", pname, cmd); 588 return; 589 } 590 /* NOTREACHED */ 591} 592