geom_io.c revision 134379
1/*- 2 * Copyright (c) 2002 Poul-Henning Kamp 3 * Copyright (c) 2002 Networks Associates Technology, Inc. 4 * All rights reserved. 5 * 6 * This software was developed for the FreeBSD Project by Poul-Henning Kamp 7 * and NAI Labs, the Security Research Division of Network Associates, Inc. 8 * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the 9 * DARPA CHATS research program. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. The names of the authors may not be used to endorse or promote 20 * products derived from this software without specific prior written 21 * permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36#include <sys/cdefs.h> 37__FBSDID("$FreeBSD: head/sys/geom/geom_io.c 134379 2004-08-27 14:43:11Z phk $"); 38 39#include <sys/param.h> 40#include <sys/systm.h> 41#include <sys/kernel.h> 42#include <sys/malloc.h> 43#include <sys/bio.h> 44 45#include <sys/errno.h> 46#include <geom/geom.h> 47#include <geom/geom_int.h> 48#include <sys/devicestat.h> 49 50#include <vm/uma.h> 51 52static struct g_bioq g_bio_run_down; 53static struct g_bioq g_bio_run_up; 54static struct g_bioq g_bio_run_task; 55 56static u_int pace; 57static uma_zone_t biozone; 58 59#include <machine/atomic.h> 60 61static void 62g_bioq_lock(struct g_bioq *bq) 63{ 64 65 mtx_lock(&bq->bio_queue_lock); 66} 67 68static void 69g_bioq_unlock(struct g_bioq *bq) 70{ 71 72 mtx_unlock(&bq->bio_queue_lock); 73} 74 75#if 0 76static void 77g_bioq_destroy(struct g_bioq *bq) 78{ 79 80 mtx_destroy(&bq->bio_queue_lock); 81} 82#endif 83 84static void 85g_bioq_init(struct g_bioq *bq) 86{ 87 88 TAILQ_INIT(&bq->bio_queue); 89 mtx_init(&bq->bio_queue_lock, "bio queue", NULL, MTX_DEF); 90} 91 92static struct bio * 93g_bioq_first(struct g_bioq *bq) 94{ 95 struct bio *bp; 96 97 bp = TAILQ_FIRST(&bq->bio_queue); 98 if (bp != NULL) { 99 TAILQ_REMOVE(&bq->bio_queue, bp, bio_queue); 100 bq->bio_queue_length--; 101 } 102 return (bp); 103} 104 105static void 106g_bioq_enqueue_tail(struct bio *bp, struct g_bioq *rq) 107{ 108 109 g_bioq_lock(rq); 110 TAILQ_INSERT_TAIL(&rq->bio_queue, bp, bio_queue); 111 rq->bio_queue_length++; 112 g_bioq_unlock(rq); 113} 114 115struct bio * 116g_new_bio(void) 117{ 118 struct bio *bp; 119 120 bp = uma_zalloc(biozone, M_NOWAIT | M_ZERO); 121 return (bp); 122} 123 124struct bio * 125g_alloc_bio(void) 126{ 127 struct bio *bp; 128 129 bp = uma_zalloc(biozone, M_WAITOK | M_ZERO); 130 return (bp); 131} 132 133void 134g_destroy_bio(struct bio *bp) 135{ 136 137 uma_zfree(biozone, bp); 138} 139 140struct bio * 141g_clone_bio(struct bio *bp) 142{ 143 struct bio *bp2; 144 145 bp2 = uma_zalloc(biozone, M_NOWAIT | M_ZERO); 146 if (bp2 != NULL) { 147 bp2->bio_parent = bp; 148 bp2->bio_cmd = bp->bio_cmd; 149 bp2->bio_length = bp->bio_length; 150 bp2->bio_offset = bp->bio_offset; 151 bp2->bio_data = bp->bio_data; 152 bp2->bio_attribute = bp->bio_attribute; 153 bp->bio_children++; 154 } 155 return(bp2); 156} 157 158void 159g_io_init() 160{ 161 162 g_bioq_init(&g_bio_run_down); 163 g_bioq_init(&g_bio_run_up); 164 g_bioq_init(&g_bio_run_task); 165 biozone = uma_zcreate("g_bio", sizeof (struct bio), 166 NULL, NULL, 167 NULL, NULL, 168 0, 0); 169} 170 171int 172g_io_getattr(const char *attr, struct g_consumer *cp, int *len, void *ptr) 173{ 174 struct bio *bp; 175 int error; 176 177 g_trace(G_T_BIO, "bio_getattr(%s)", attr); 178 bp = g_alloc_bio(); 179 bp->bio_cmd = BIO_GETATTR; 180 bp->bio_done = NULL; 181 bp->bio_attribute = attr; 182 bp->bio_length = *len; 183 bp->bio_data = ptr; 184 g_io_request(bp, cp); 185 error = biowait(bp, "ggetattr"); 186 *len = bp->bio_completed; 187 g_destroy_bio(bp); 188 return (error); 189} 190 191static int 192g_io_check(struct bio *bp) 193{ 194 struct g_consumer *cp; 195 struct g_provider *pp; 196 197 cp = bp->bio_from; 198 pp = bp->bio_to; 199 200 /* Fail if access counters dont allow the operation */ 201 switch(bp->bio_cmd) { 202 case BIO_READ: 203 case BIO_GETATTR: 204 if (cp->acr == 0) 205 return (EPERM); 206 break; 207 case BIO_WRITE: 208 case BIO_DELETE: 209 if (cp->acw == 0) 210 return (EPERM); 211 break; 212 default: 213 return (EPERM); 214 } 215 /* if provider is marked for error, don't disturb. */ 216 if (pp->error) 217 return (pp->error); 218 219 switch(bp->bio_cmd) { 220 case BIO_READ: 221 case BIO_WRITE: 222 case BIO_DELETE: 223 /* Zero sectorsize is a probably lack of media */ 224 if (pp->sectorsize == 0) 225 return (ENXIO); 226 /* Reject I/O not on sector boundary */ 227 if (bp->bio_offset % pp->sectorsize) 228 return (EINVAL); 229 /* Reject I/O not integral sector long */ 230 if (bp->bio_length % pp->sectorsize) 231 return (EINVAL); 232 /* Reject requests before or past the end of media. */ 233 if (bp->bio_offset < 0) 234 return (EIO); 235 if (bp->bio_offset > pp->mediasize) 236 return (EIO); 237 break; 238 default: 239 break; 240 } 241 return (0); 242} 243 244void 245g_io_request(struct bio *bp, struct g_consumer *cp) 246{ 247 struct g_provider *pp; 248 249 KASSERT(cp != NULL, ("NULL cp in g_io_request")); 250 KASSERT(bp != NULL, ("NULL bp in g_io_request")); 251 KASSERT(bp->bio_data != NULL, ("NULL bp->data in g_io_request")); 252 pp = cp->provider; 253 KASSERT(pp != NULL, ("consumer not attached in g_io_request")); 254 255 bp->bio_from = cp; 256 bp->bio_to = pp; 257 bp->bio_error = 0; 258 bp->bio_completed = 0; 259 260 if (g_collectstats & 1) 261 devstat_start_transaction_bio(pp->stat, bp); 262 pp->nstart++; 263 if (g_collectstats & 2) 264 devstat_start_transaction_bio(cp->stat, bp); 265 cp->nstart++; 266 267 /* Pass it on down. */ 268 g_trace(G_T_BIO, "bio_request(%p) from %p(%s) to %p(%s) cmd %d", 269 bp, cp, cp->geom->name, pp, pp->name, bp->bio_cmd); 270 g_bioq_enqueue_tail(bp, &g_bio_run_down); 271 wakeup(&g_wait_down); 272} 273 274void 275g_io_deliver(struct bio *bp, int error) 276{ 277 struct g_consumer *cp; 278 struct g_provider *pp; 279 280 KASSERT(bp != NULL, ("NULL bp in g_io_deliver")); 281 pp = bp->bio_to; 282 KASSERT(pp != NULL, ("NULL bio_to in g_io_deliver")); 283 cp = bp->bio_from; 284 if (cp == NULL) { 285 bp->bio_error = error; 286 bp->bio_done(bp); 287 return; 288 } 289 KASSERT(cp != NULL, ("NULL bio_from in g_io_deliver")); 290 KASSERT(cp->geom != NULL, ("NULL bio_from->geom in g_io_deliver")); 291 KASSERT(bp->bio_completed >= 0, ("bio_completed can't be less than 0")); 292 KASSERT(bp->bio_completed <= bp->bio_length, 293 ("bio_completed can't be greater than bio_length")); 294 295 g_trace(G_T_BIO, 296"g_io_deliver(%p) from %p(%s) to %p(%s) cmd %d error %d off %jd len %jd", 297 bp, cp, cp->geom->name, pp, pp->name, bp->bio_cmd, error, 298 (intmax_t)bp->bio_offset, (intmax_t)bp->bio_length); 299 300 bp->bio_bcount = bp->bio_length; 301 bp->bio_resid = bp->bio_bcount - bp->bio_completed; 302 if (g_collectstats & 1) 303 devstat_end_transaction_bio(pp->stat, bp); 304 if (g_collectstats & 2) 305 devstat_end_transaction_bio(cp->stat, bp); 306 cp->nend++; 307 pp->nend++; 308 309 if (error == ENOMEM) { 310 if (bootverbose) 311 printf("ENOMEM %p on %p(%s)\n", bp, pp, pp->name); 312 bp->bio_children = 0; 313 bp->bio_inbed = 0; 314 g_io_request(bp, cp); 315 pace++; 316 return; 317 } 318 bp->bio_error = error; 319 g_bioq_enqueue_tail(bp, &g_bio_run_up); 320 wakeup(&g_wait_up); 321} 322 323void 324g_io_schedule_down(struct thread *tp __unused) 325{ 326 struct bio *bp; 327 off_t excess; 328 int error; 329#ifdef WITNESS 330 struct mtx mymutex; 331 332 bzero(&mymutex, sizeof mymutex); 333 mtx_init(&mymutex, "g_xdown", NULL, MTX_DEF); 334#endif 335 336 for(;;) { 337 g_bioq_lock(&g_bio_run_down); 338 bp = g_bioq_first(&g_bio_run_down); 339 if (bp == NULL) { 340 msleep(&g_wait_down, &g_bio_run_down.bio_queue_lock, 341 PRIBIO | PDROP, "-", hz/10); 342 continue; 343 } 344 g_bioq_unlock(&g_bio_run_down); 345 if (pace > 0) { 346 msleep(&error, NULL, PRIBIO, "g_down", hz/10); 347 pace--; 348 } 349 error = g_io_check(bp); 350 if (error) { 351 g_io_deliver(bp, error); 352 continue; 353 } 354 switch (bp->bio_cmd) { 355 case BIO_READ: 356 case BIO_WRITE: 357 case BIO_DELETE: 358 /* Truncate requests to the end of providers media. */ 359 excess = bp->bio_offset + bp->bio_length; 360 if (excess > bp->bio_to->mediasize) { 361 excess -= bp->bio_to->mediasize; 362 bp->bio_length -= excess; 363 } 364 /* Deliver zero length transfers right here. */ 365 if (bp->bio_length == 0) { 366 g_io_deliver(bp, 0); 367 continue; 368 } 369 break; 370 default: 371 break; 372 } 373#ifdef WITNESS 374 mtx_lock(&mymutex); 375#endif 376 bp->bio_to->geom->start(bp); 377#ifdef WITNESS 378 mtx_unlock(&mymutex); 379#endif 380 } 381} 382 383void 384bio_taskqueue(struct bio *bp, bio_task_t *func, void *arg) 385{ 386 bp->bio_task = func; 387 bp->bio_task_arg = arg; 388 /* 389 * The taskqueue is actually just a second queue off the "up" 390 * queue, so we use the same lock. 391 */ 392 g_bioq_lock(&g_bio_run_up); 393 TAILQ_INSERT_TAIL(&g_bio_run_task.bio_queue, bp, bio_queue); 394 g_bio_run_task.bio_queue_length++; 395 wakeup(&g_wait_up); 396 g_bioq_unlock(&g_bio_run_up); 397} 398 399 400void 401g_io_schedule_up(struct thread *tp __unused) 402{ 403 struct bio *bp; 404#ifdef WITNESS 405 struct mtx mymutex; 406 407 bzero(&mymutex, sizeof mymutex); 408 mtx_init(&mymutex, "g_xup", NULL, MTX_DEF); 409#endif 410 for(;;) { 411 g_bioq_lock(&g_bio_run_up); 412 bp = g_bioq_first(&g_bio_run_task); 413 if (bp != NULL) { 414 g_bioq_unlock(&g_bio_run_up); 415#ifdef WITNESS 416 mtx_lock(&mymutex); 417#endif 418 bp->bio_task(bp->bio_task_arg); 419#ifdef WITNESS 420 mtx_unlock(&mymutex); 421#endif 422 continue; 423 } 424 bp = g_bioq_first(&g_bio_run_up); 425 if (bp != NULL) { 426 g_bioq_unlock(&g_bio_run_up); 427#ifdef WITNESS 428 mtx_lock(&mymutex); 429#endif 430 biodone(bp); 431#ifdef WITNESS 432 mtx_unlock(&mymutex); 433#endif 434 continue; 435 } 436 msleep(&g_wait_up, &g_bio_run_up.bio_queue_lock, 437 PRIBIO | PDROP, "-", hz/10); 438 } 439} 440 441void * 442g_read_data(struct g_consumer *cp, off_t offset, off_t length, int *error) 443{ 444 struct bio *bp; 445 void *ptr; 446 int errorc; 447 448 KASSERT(length >= 512 && length <= DFLTPHYS, 449 ("g_read_data(): invalid length %jd", (intmax_t)length)); 450 451 bp = g_alloc_bio(); 452 bp->bio_cmd = BIO_READ; 453 bp->bio_done = NULL; 454 bp->bio_offset = offset; 455 bp->bio_length = length; 456 ptr = g_malloc(length, M_WAITOK); 457 bp->bio_data = ptr; 458 g_io_request(bp, cp); 459 errorc = biowait(bp, "gread"); 460 if (error != NULL) 461 *error = errorc; 462 g_destroy_bio(bp); 463 if (errorc) { 464 g_free(ptr); 465 ptr = NULL; 466 } 467 return (ptr); 468} 469 470int 471g_write_data(struct g_consumer *cp, off_t offset, void *ptr, off_t length) 472{ 473 struct bio *bp; 474 int error; 475 476 KASSERT(length >= 512 && length <= DFLTPHYS, 477 ("g_write_data(): invalid length %jd", (intmax_t)length)); 478 479 bp = g_alloc_bio(); 480 bp->bio_cmd = BIO_WRITE; 481 bp->bio_done = NULL; 482 bp->bio_offset = offset; 483 bp->bio_length = length; 484 bp->bio_data = ptr; 485 g_io_request(bp, cp); 486 error = biowait(bp, "gwrite"); 487 g_destroy_bio(bp); 488 return (error); 489} 490 491void 492g_print_bio(struct bio *bp) 493{ 494 const char *pname, *cmd = NULL; 495 496 if (bp->bio_to != NULL) 497 pname = bp->bio_to->name; 498 else 499 pname = "[unknown]"; 500 501 switch (bp->bio_cmd) { 502 case BIO_GETATTR: 503 cmd = "GETATTR"; 504 printf("%s[%s(attr=%s)]", pname, cmd, bp->bio_attribute); 505 return; 506 case BIO_READ: 507 cmd = "READ"; 508 case BIO_WRITE: 509 if (cmd == NULL) 510 cmd = "WRITE"; 511 case BIO_DELETE: 512 if (cmd == NULL) 513 cmd = "DELETE"; 514 printf("%s[%s(offset=%jd, length=%jd)]", pname, cmd, 515 (intmax_t)bp->bio_offset, (intmax_t)bp->bio_length); 516 return; 517 default: 518 cmd = "UNKNOWN"; 519 printf("%s[%s()]", pname, cmd); 520 return; 521 } 522 /* NOTREACHED */ 523} 524