geom_io.c revision 256603
1/*- 2 * Copyright (c) 2002 Poul-Henning Kamp 3 * Copyright (c) 2002 Networks Associates Technology, Inc. 4 * Copyright (c) 2013 The FreeBSD Foundation 5 * All rights reserved. 6 * 7 * This software was developed for the FreeBSD Project by Poul-Henning Kamp 8 * and NAI Labs, the Security Research Division of Network Associates, Inc. 9 * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the 10 * DARPA CHATS research program. 11 * 12 * Portions of this software were developed by Konstantin Belousov 13 * under sponsorship from the FreeBSD Foundation. 14 * 15 * Redistribution and use in source and binary forms, with or without 16 * modification, are permitted provided that the following conditions 17 * are met: 18 * 1. Redistributions of source code must retain the above copyright 19 * notice, this list of conditions and the following disclaimer. 20 * 2. Redistributions in binary form must reproduce the above copyright 21 * notice, this list of conditions and the following disclaimer in the 22 * documentation and/or other materials provided with the distribution. 23 * 3. The names of the authors may not be used to endorse or promote 24 * products derived from this software without specific prior written 25 * permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 */ 39 40#include <sys/cdefs.h> 41__FBSDID("$FreeBSD: head/sys/geom/geom_io.c 256603 2013-10-16 09:12:40Z mav $"); 42 43#include <sys/param.h> 44#include <sys/systm.h> 45#include <sys/kernel.h> 46#include <sys/malloc.h> 47#include <sys/bio.h> 48#include <sys/ktr.h> 49#include <sys/proc.h> 50#include <sys/stack.h> 51#include <sys/sysctl.h> 52#include <sys/vmem.h> 53 54#include <sys/errno.h> 55#include <geom/geom.h> 56#include <geom/geom_int.h> 57#include <sys/devicestat.h> 58 59#include <vm/uma.h> 60#include <vm/vm.h> 61#include <vm/vm_param.h> 62#include <vm/vm_kern.h> 63#include <vm/vm_page.h> 64#include <vm/vm_object.h> 65#include <vm/vm_extern.h> 66#include <vm/vm_map.h> 67 68static struct g_bioq g_bio_run_down; 69static struct g_bioq g_bio_run_up; 70static struct g_bioq g_bio_run_task; 71 72static u_int pace; 73static uma_zone_t biozone; 74 75/* 76 * The head of the list of classifiers used in g_io_request. 77 * Use g_register_classifier() and g_unregister_classifier() 78 * to add/remove entries to the list. 79 * Classifiers are invoked in registration order. 80 */ 81static TAILQ_HEAD(g_classifier_tailq, g_classifier_hook) 82 g_classifier_tailq = TAILQ_HEAD_INITIALIZER(g_classifier_tailq); 83 84#include <machine/atomic.h> 85 86static void 87g_bioq_lock(struct g_bioq *bq) 88{ 89 90 mtx_lock(&bq->bio_queue_lock); 91} 92 93static void 94g_bioq_unlock(struct g_bioq *bq) 95{ 96 97 mtx_unlock(&bq->bio_queue_lock); 98} 99 100#if 0 101static void 102g_bioq_destroy(struct g_bioq *bq) 103{ 104 105 mtx_destroy(&bq->bio_queue_lock); 106} 107#endif 108 109static void 110g_bioq_init(struct g_bioq *bq) 111{ 112 113 TAILQ_INIT(&bq->bio_queue); 114 mtx_init(&bq->bio_queue_lock, "bio queue", NULL, MTX_DEF); 115} 116 117static struct bio * 118g_bioq_first(struct g_bioq *bq) 119{ 120 struct bio *bp; 121 122 bp = TAILQ_FIRST(&bq->bio_queue); 123 if (bp != NULL) { 124 KASSERT((bp->bio_flags & BIO_ONQUEUE), 125 ("Bio not on queue bp=%p target %p", bp, bq)); 126 bp->bio_flags &= ~BIO_ONQUEUE; 127 TAILQ_REMOVE(&bq->bio_queue, bp, bio_queue); 128 bq->bio_queue_length--; 129 } 130 return (bp); 131} 132 133struct bio * 134g_new_bio(void) 135{ 136 struct bio *bp; 137 138 bp = uma_zalloc(biozone, M_NOWAIT | M_ZERO); 139#ifdef KTR 140 if ((KTR_COMPILE & KTR_GEOM) && (ktr_mask & KTR_GEOM)) { 141 struct stack st; 142 143 CTR1(KTR_GEOM, "g_new_bio(): %p", bp); 144 stack_save(&st); 145 CTRSTACK(KTR_GEOM, &st, 3, 0); 146 } 147#endif 148 return (bp); 149} 150 151struct bio * 152g_alloc_bio(void) 153{ 154 struct bio *bp; 155 156 bp = uma_zalloc(biozone, M_WAITOK | M_ZERO); 157#ifdef KTR 158 if ((KTR_COMPILE & KTR_GEOM) && (ktr_mask & KTR_GEOM)) { 159 struct stack st; 160 161 CTR1(KTR_GEOM, "g_alloc_bio(): %p", bp); 162 stack_save(&st); 163 CTRSTACK(KTR_GEOM, &st, 3, 0); 164 } 165#endif 166 return (bp); 167} 168 169void 170g_destroy_bio(struct bio *bp) 171{ 172#ifdef KTR 173 if ((KTR_COMPILE & KTR_GEOM) && (ktr_mask & KTR_GEOM)) { 174 struct stack st; 175 176 CTR1(KTR_GEOM, "g_destroy_bio(): %p", bp); 177 stack_save(&st); 178 CTRSTACK(KTR_GEOM, &st, 3, 0); 179 } 180#endif 181 uma_zfree(biozone, bp); 182} 183 184struct bio * 185g_clone_bio(struct bio *bp) 186{ 187 struct bio *bp2; 188 189 bp2 = uma_zalloc(biozone, M_NOWAIT | M_ZERO); 190 if (bp2 != NULL) { 191 bp2->bio_parent = bp; 192 bp2->bio_cmd = bp->bio_cmd; 193 /* 194 * BIO_ORDERED flag may be used by disk drivers to enforce 195 * ordering restrictions, so this flag needs to be cloned. 196 * BIO_UNMAPPED should be inherited, to properly indicate 197 * which way the buffer is passed. 198 * Other bio flags are not suitable for cloning. 199 */ 200 bp2->bio_flags = bp->bio_flags & (BIO_ORDERED | BIO_UNMAPPED); 201 bp2->bio_length = bp->bio_length; 202 bp2->bio_offset = bp->bio_offset; 203 bp2->bio_data = bp->bio_data; 204 bp2->bio_ma = bp->bio_ma; 205 bp2->bio_ma_n = bp->bio_ma_n; 206 bp2->bio_ma_offset = bp->bio_ma_offset; 207 bp2->bio_attribute = bp->bio_attribute; 208 /* Inherit classification info from the parent */ 209 bp2->bio_classifier1 = bp->bio_classifier1; 210 bp2->bio_classifier2 = bp->bio_classifier2; 211 bp->bio_children++; 212 } 213#ifdef KTR 214 if ((KTR_COMPILE & KTR_GEOM) && (ktr_mask & KTR_GEOM)) { 215 struct stack st; 216 217 CTR2(KTR_GEOM, "g_clone_bio(%p): %p", bp, bp2); 218 stack_save(&st); 219 CTRSTACK(KTR_GEOM, &st, 3, 0); 220 } 221#endif 222 return(bp2); 223} 224 225struct bio * 226g_duplicate_bio(struct bio *bp) 227{ 228 struct bio *bp2; 229 230 bp2 = uma_zalloc(biozone, M_WAITOK | M_ZERO); 231 bp2->bio_flags = bp->bio_flags & BIO_UNMAPPED; 232 bp2->bio_parent = bp; 233 bp2->bio_cmd = bp->bio_cmd; 234 bp2->bio_length = bp->bio_length; 235 bp2->bio_offset = bp->bio_offset; 236 bp2->bio_data = bp->bio_data; 237 bp2->bio_ma = bp->bio_ma; 238 bp2->bio_ma_n = bp->bio_ma_n; 239 bp2->bio_ma_offset = bp->bio_ma_offset; 240 bp2->bio_attribute = bp->bio_attribute; 241 bp->bio_children++; 242#ifdef KTR 243 if ((KTR_COMPILE & KTR_GEOM) && (ktr_mask & KTR_GEOM)) { 244 struct stack st; 245 246 CTR2(KTR_GEOM, "g_duplicate_bio(%p): %p", bp, bp2); 247 stack_save(&st); 248 CTRSTACK(KTR_GEOM, &st, 3, 0); 249 } 250#endif 251 return(bp2); 252} 253 254void 255g_io_init() 256{ 257 258 g_bioq_init(&g_bio_run_down); 259 g_bioq_init(&g_bio_run_up); 260 g_bioq_init(&g_bio_run_task); 261 biozone = uma_zcreate("g_bio", sizeof (struct bio), 262 NULL, NULL, 263 NULL, NULL, 264 0, 0); 265} 266 267int 268g_io_getattr(const char *attr, struct g_consumer *cp, int *len, void *ptr) 269{ 270 struct bio *bp; 271 int error; 272 273 g_trace(G_T_BIO, "bio_getattr(%s)", attr); 274 bp = g_alloc_bio(); 275 bp->bio_cmd = BIO_GETATTR; 276 bp->bio_done = NULL; 277 bp->bio_attribute = attr; 278 bp->bio_length = *len; 279 bp->bio_data = ptr; 280 g_io_request(bp, cp); 281 error = biowait(bp, "ggetattr"); 282 *len = bp->bio_completed; 283 g_destroy_bio(bp); 284 return (error); 285} 286 287int 288g_io_flush(struct g_consumer *cp) 289{ 290 struct bio *bp; 291 int error; 292 293 g_trace(G_T_BIO, "bio_flush(%s)", cp->provider->name); 294 bp = g_alloc_bio(); 295 bp->bio_cmd = BIO_FLUSH; 296 bp->bio_flags |= BIO_ORDERED; 297 bp->bio_done = NULL; 298 bp->bio_attribute = NULL; 299 bp->bio_offset = cp->provider->mediasize; 300 bp->bio_length = 0; 301 bp->bio_data = NULL; 302 g_io_request(bp, cp); 303 error = biowait(bp, "gflush"); 304 g_destroy_bio(bp); 305 return (error); 306} 307 308static int 309g_io_check(struct bio *bp) 310{ 311 struct g_consumer *cp; 312 struct g_provider *pp; 313 314 cp = bp->bio_from; 315 pp = bp->bio_to; 316 317 /* Fail if access counters dont allow the operation */ 318 switch(bp->bio_cmd) { 319 case BIO_READ: 320 case BIO_GETATTR: 321 if (cp->acr == 0) 322 return (EPERM); 323 break; 324 case BIO_WRITE: 325 case BIO_DELETE: 326 case BIO_FLUSH: 327 if (cp->acw == 0) 328 return (EPERM); 329 break; 330 default: 331 return (EPERM); 332 } 333 /* if provider is marked for error, don't disturb. */ 334 if (pp->error) 335 return (pp->error); 336 if (cp->flags & G_CF_ORPHAN) 337 return (ENXIO); 338 339 switch(bp->bio_cmd) { 340 case BIO_READ: 341 case BIO_WRITE: 342 case BIO_DELETE: 343 /* Zero sectorsize or mediasize is probably a lack of media. */ 344 if (pp->sectorsize == 0 || pp->mediasize == 0) 345 return (ENXIO); 346 /* Reject I/O not on sector boundary */ 347 if (bp->bio_offset % pp->sectorsize) 348 return (EINVAL); 349 /* Reject I/O not integral sector long */ 350 if (bp->bio_length % pp->sectorsize) 351 return (EINVAL); 352 /* Reject requests before or past the end of media. */ 353 if (bp->bio_offset < 0) 354 return (EIO); 355 if (bp->bio_offset > pp->mediasize) 356 return (EIO); 357 break; 358 default: 359 break; 360 } 361 return (0); 362} 363 364/* 365 * bio classification support. 366 * 367 * g_register_classifier() and g_unregister_classifier() 368 * are used to add/remove a classifier from the list. 369 * The list is protected using the g_bio_run_down lock, 370 * because the classifiers are called in this path. 371 * 372 * g_io_request() passes bio's that are not already classified 373 * (i.e. those with bio_classifier1 == NULL) to g_run_classifiers(). 374 * Classifiers can store their result in the two fields 375 * bio_classifier1 and bio_classifier2. 376 * A classifier that updates one of the fields should 377 * return a non-zero value. 378 * If no classifier updates the field, g_run_classifiers() sets 379 * bio_classifier1 = BIO_NOTCLASSIFIED to avoid further calls. 380 */ 381 382int 383g_register_classifier(struct g_classifier_hook *hook) 384{ 385 386 g_bioq_lock(&g_bio_run_down); 387 TAILQ_INSERT_TAIL(&g_classifier_tailq, hook, link); 388 g_bioq_unlock(&g_bio_run_down); 389 390 return (0); 391} 392 393void 394g_unregister_classifier(struct g_classifier_hook *hook) 395{ 396 struct g_classifier_hook *entry; 397 398 g_bioq_lock(&g_bio_run_down); 399 TAILQ_FOREACH(entry, &g_classifier_tailq, link) { 400 if (entry == hook) { 401 TAILQ_REMOVE(&g_classifier_tailq, hook, link); 402 break; 403 } 404 } 405 g_bioq_unlock(&g_bio_run_down); 406} 407 408static void 409g_run_classifiers(struct bio *bp) 410{ 411 struct g_classifier_hook *hook; 412 int classified = 0; 413 414 TAILQ_FOREACH(hook, &g_classifier_tailq, link) 415 classified |= hook->func(hook->arg, bp); 416 417 if (!classified) 418 bp->bio_classifier1 = BIO_NOTCLASSIFIED; 419} 420 421void 422g_io_request(struct bio *bp, struct g_consumer *cp) 423{ 424 struct g_provider *pp; 425 int first; 426 427 KASSERT(cp != NULL, ("NULL cp in g_io_request")); 428 KASSERT(bp != NULL, ("NULL bp in g_io_request")); 429 pp = cp->provider; 430 KASSERT(pp != NULL, ("consumer not attached in g_io_request")); 431#ifdef DIAGNOSTIC 432 KASSERT(bp->bio_driver1 == NULL, 433 ("bio_driver1 used by the consumer (geom %s)", cp->geom->name)); 434 KASSERT(bp->bio_driver2 == NULL, 435 ("bio_driver2 used by the consumer (geom %s)", cp->geom->name)); 436 KASSERT(bp->bio_pflags == 0, 437 ("bio_pflags used by the consumer (geom %s)", cp->geom->name)); 438 /* 439 * Remember consumer's private fields, so we can detect if they were 440 * modified by the provider. 441 */ 442 bp->_bio_caller1 = bp->bio_caller1; 443 bp->_bio_caller2 = bp->bio_caller2; 444 bp->_bio_cflags = bp->bio_cflags; 445#endif 446 447 if (bp->bio_cmd & (BIO_READ|BIO_WRITE|BIO_GETATTR)) { 448 KASSERT(bp->bio_data != NULL, 449 ("NULL bp->data in g_io_request(cmd=%hhu)", bp->bio_cmd)); 450 } 451 if (bp->bio_cmd & (BIO_DELETE|BIO_FLUSH)) { 452 KASSERT(bp->bio_data == NULL, 453 ("non-NULL bp->data in g_io_request(cmd=%hhu)", 454 bp->bio_cmd)); 455 } 456 if (bp->bio_cmd & (BIO_READ|BIO_WRITE|BIO_DELETE)) { 457 KASSERT(bp->bio_offset % cp->provider->sectorsize == 0, 458 ("wrong offset %jd for sectorsize %u", 459 bp->bio_offset, cp->provider->sectorsize)); 460 KASSERT(bp->bio_length % cp->provider->sectorsize == 0, 461 ("wrong length %jd for sectorsize %u", 462 bp->bio_length, cp->provider->sectorsize)); 463 } 464 465 g_trace(G_T_BIO, "bio_request(%p) from %p(%s) to %p(%s) cmd %d", 466 bp, cp, cp->geom->name, pp, pp->name, bp->bio_cmd); 467 468 bp->bio_from = cp; 469 bp->bio_to = pp; 470 bp->bio_error = 0; 471 bp->bio_completed = 0; 472 473 KASSERT(!(bp->bio_flags & BIO_ONQUEUE), 474 ("Bio already on queue bp=%p", bp)); 475 bp->bio_flags |= BIO_ONQUEUE; 476 477 if (g_collectstats) 478 binuptime(&bp->bio_t0); 479 else 480 getbinuptime(&bp->bio_t0); 481 482 /* 483 * The statistics collection is lockless, as such, but we 484 * can not update one instance of the statistics from more 485 * than one thread at a time, so grab the lock first. 486 * 487 * We also use the lock to protect the list of classifiers. 488 */ 489 g_bioq_lock(&g_bio_run_down); 490 491 if (!TAILQ_EMPTY(&g_classifier_tailq) && !bp->bio_classifier1) 492 g_run_classifiers(bp); 493 494 if (g_collectstats & 1) 495 devstat_start_transaction(pp->stat, &bp->bio_t0); 496 if (g_collectstats & 2) 497 devstat_start_transaction(cp->stat, &bp->bio_t0); 498 499 pp->nstart++; 500 cp->nstart++; 501 first = TAILQ_EMPTY(&g_bio_run_down.bio_queue); 502 TAILQ_INSERT_TAIL(&g_bio_run_down.bio_queue, bp, bio_queue); 503 g_bio_run_down.bio_queue_length++; 504 g_bioq_unlock(&g_bio_run_down); 505 506 /* Pass it on down. */ 507 if (first) 508 wakeup(&g_wait_down); 509} 510 511void 512g_io_deliver(struct bio *bp, int error) 513{ 514 struct bintime now; 515 struct g_consumer *cp; 516 struct g_provider *pp; 517 int first; 518 519 KASSERT(bp != NULL, ("NULL bp in g_io_deliver")); 520 pp = bp->bio_to; 521 KASSERT(pp != NULL, ("NULL bio_to in g_io_deliver")); 522 cp = bp->bio_from; 523 if (cp == NULL) { 524 bp->bio_error = error; 525 bp->bio_done(bp); 526 return; 527 } 528 KASSERT(cp != NULL, ("NULL bio_from in g_io_deliver")); 529 KASSERT(cp->geom != NULL, ("NULL bio_from->geom in g_io_deliver")); 530#ifdef DIAGNOSTIC 531 /* 532 * Some classes - GJournal in particular - can modify bio's 533 * private fields while the bio is in transit; G_GEOM_VOLATILE_BIO 534 * flag means it's an expected behaviour for that particular geom. 535 */ 536 if ((cp->geom->flags & G_GEOM_VOLATILE_BIO) == 0) { 537 KASSERT(bp->bio_caller1 == bp->_bio_caller1, 538 ("bio_caller1 used by the provider %s", pp->name)); 539 KASSERT(bp->bio_caller2 == bp->_bio_caller2, 540 ("bio_caller2 used by the provider %s", pp->name)); 541 KASSERT(bp->bio_cflags == bp->_bio_cflags, 542 ("bio_cflags used by the provider %s", pp->name)); 543 } 544#endif 545 KASSERT(bp->bio_completed >= 0, ("bio_completed can't be less than 0")); 546 KASSERT(bp->bio_completed <= bp->bio_length, 547 ("bio_completed can't be greater than bio_length")); 548 549 g_trace(G_T_BIO, 550"g_io_deliver(%p) from %p(%s) to %p(%s) cmd %d error %d off %jd len %jd", 551 bp, cp, cp->geom->name, pp, pp->name, bp->bio_cmd, error, 552 (intmax_t)bp->bio_offset, (intmax_t)bp->bio_length); 553 554 KASSERT(!(bp->bio_flags & BIO_ONQUEUE), 555 ("Bio already on queue bp=%p", bp)); 556 557 /* 558 * XXX: next two doesn't belong here 559 */ 560 bp->bio_bcount = bp->bio_length; 561 bp->bio_resid = bp->bio_bcount - bp->bio_completed; 562 563 /* 564 * The statistics collection is lockless, as such, but we 565 * can not update one instance of the statistics from more 566 * than one thread at a time, so grab the lock first. 567 */ 568 if (g_collectstats) 569 binuptime(&now); 570 g_bioq_lock(&g_bio_run_up); 571 if (g_collectstats & 1) 572 devstat_end_transaction_bio_bt(pp->stat, bp, &now); 573 if (g_collectstats & 2) 574 devstat_end_transaction_bio_bt(cp->stat, bp, &now); 575 576 cp->nend++; 577 pp->nend++; 578 if (error != ENOMEM) { 579 bp->bio_error = error; 580 first = TAILQ_EMPTY(&g_bio_run_up.bio_queue); 581 TAILQ_INSERT_TAIL(&g_bio_run_up.bio_queue, bp, bio_queue); 582 bp->bio_flags |= BIO_ONQUEUE; 583 g_bio_run_up.bio_queue_length++; 584 g_bioq_unlock(&g_bio_run_up); 585 if (first) 586 wakeup(&g_wait_up); 587 return; 588 } 589 g_bioq_unlock(&g_bio_run_up); 590 591 if (bootverbose) 592 printf("ENOMEM %p on %p(%s)\n", bp, pp, pp->name); 593 bp->bio_children = 0; 594 bp->bio_inbed = 0; 595 bp->bio_driver1 = NULL; 596 bp->bio_driver2 = NULL; 597 bp->bio_pflags = 0; 598 g_io_request(bp, cp); 599 pace++; 600 return; 601} 602 603SYSCTL_DECL(_kern_geom); 604 605static long transient_maps; 606SYSCTL_LONG(_kern_geom, OID_AUTO, transient_maps, CTLFLAG_RD, 607 &transient_maps, 0, 608 "Total count of the transient mapping requests"); 609u_int transient_map_retries = 10; 610SYSCTL_UINT(_kern_geom, OID_AUTO, transient_map_retries, CTLFLAG_RW, 611 &transient_map_retries, 0, 612 "Max count of retries used before giving up on creating transient map"); 613int transient_map_hard_failures; 614SYSCTL_INT(_kern_geom, OID_AUTO, transient_map_hard_failures, CTLFLAG_RD, 615 &transient_map_hard_failures, 0, 616 "Failures to establish the transient mapping due to retry attempts " 617 "exhausted"); 618int transient_map_soft_failures; 619SYSCTL_INT(_kern_geom, OID_AUTO, transient_map_soft_failures, CTLFLAG_RD, 620 &transient_map_soft_failures, 0, 621 "Count of retried failures to establish the transient mapping"); 622int inflight_transient_maps; 623SYSCTL_INT(_kern_geom, OID_AUTO, inflight_transient_maps, CTLFLAG_RD, 624 &inflight_transient_maps, 0, 625 "Current count of the active transient maps"); 626 627static int 628g_io_transient_map_bio(struct bio *bp) 629{ 630 vm_offset_t addr; 631 long size; 632 u_int retried; 633 634 KASSERT(unmapped_buf_allowed, ("unmapped disabled")); 635 636 size = round_page(bp->bio_ma_offset + bp->bio_length); 637 KASSERT(size / PAGE_SIZE == bp->bio_ma_n, ("Bio too short %p", bp)); 638 addr = 0; 639 retried = 0; 640 atomic_add_long(&transient_maps, 1); 641retry: 642 if (vmem_alloc(transient_arena, size, M_BESTFIT | M_NOWAIT, &addr)) { 643 if (transient_map_retries != 0 && 644 retried >= transient_map_retries) { 645 g_io_deliver(bp, EDEADLK/* XXXKIB */); 646 CTR2(KTR_GEOM, "g_down cannot map bp %p provider %s", 647 bp, bp->bio_to->name); 648 atomic_add_int(&transient_map_hard_failures, 1); 649 return (1); 650 } else { 651 /* 652 * Naive attempt to quisce the I/O to get more 653 * in-flight requests completed and defragment 654 * the transient_arena. 655 */ 656 CTR3(KTR_GEOM, "g_down retrymap bp %p provider %s r %d", 657 bp, bp->bio_to->name, retried); 658 pause("g_d_tra", hz / 10); 659 retried++; 660 atomic_add_int(&transient_map_soft_failures, 1); 661 goto retry; 662 } 663 } 664 atomic_add_int(&inflight_transient_maps, 1); 665 pmap_qenter((vm_offset_t)addr, bp->bio_ma, OFF_TO_IDX(size)); 666 bp->bio_data = (caddr_t)addr + bp->bio_ma_offset; 667 bp->bio_flags |= BIO_TRANSIENT_MAPPING; 668 bp->bio_flags &= ~BIO_UNMAPPED; 669 return (0); 670} 671 672void 673g_io_schedule_down(struct thread *tp __unused) 674{ 675 struct bio *bp; 676 off_t excess; 677 int error; 678 679 for(;;) { 680 g_bioq_lock(&g_bio_run_down); 681 bp = g_bioq_first(&g_bio_run_down); 682 if (bp == NULL) { 683 CTR0(KTR_GEOM, "g_down going to sleep"); 684 msleep(&g_wait_down, &g_bio_run_down.bio_queue_lock, 685 PRIBIO | PDROP, "-", 0); 686 continue; 687 } 688 CTR0(KTR_GEOM, "g_down has work to do"); 689 g_bioq_unlock(&g_bio_run_down); 690 if (pace > 0) { 691 CTR1(KTR_GEOM, "g_down pacing self (pace %d)", pace); 692 pause("g_down", hz/10); 693 pace--; 694 } 695 error = g_io_check(bp); 696 if (error) { 697 CTR3(KTR_GEOM, "g_down g_io_check on bp %p provider " 698 "%s returned %d", bp, bp->bio_to->name, error); 699 g_io_deliver(bp, error); 700 continue; 701 } 702 CTR2(KTR_GEOM, "g_down processing bp %p provider %s", bp, 703 bp->bio_to->name); 704 switch (bp->bio_cmd) { 705 case BIO_READ: 706 case BIO_WRITE: 707 case BIO_DELETE: 708 /* Truncate requests to the end of providers media. */ 709 /* 710 * XXX: What if we truncate because of offset being 711 * bad, not length? 712 */ 713 excess = bp->bio_offset + bp->bio_length; 714 if (excess > bp->bio_to->mediasize) { 715 KASSERT((bp->bio_flags & BIO_UNMAPPED) == 0 || 716 round_page(bp->bio_ma_offset + 717 bp->bio_length) / PAGE_SIZE == bp->bio_ma_n, 718 ("excess bio %p too short", bp)); 719 excess -= bp->bio_to->mediasize; 720 bp->bio_length -= excess; 721 if ((bp->bio_flags & BIO_UNMAPPED) != 0) { 722 bp->bio_ma_n = round_page( 723 bp->bio_ma_offset + 724 bp->bio_length) / PAGE_SIZE; 725 } 726 if (excess > 0) 727 CTR3(KTR_GEOM, "g_down truncated bio " 728 "%p provider %s by %d", bp, 729 bp->bio_to->name, excess); 730 } 731 /* Deliver zero length transfers right here. */ 732 if (bp->bio_length == 0) { 733 g_io_deliver(bp, 0); 734 CTR2(KTR_GEOM, "g_down terminated 0-length " 735 "bp %p provider %s", bp, bp->bio_to->name); 736 continue; 737 } 738 break; 739 default: 740 break; 741 } 742 if ((bp->bio_flags & BIO_UNMAPPED) != 0 && 743 (bp->bio_to->flags & G_PF_ACCEPT_UNMAPPED) == 0 && 744 (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE)) { 745 if (g_io_transient_map_bio(bp)) 746 continue; 747 } 748 THREAD_NO_SLEEPING(); 749 CTR4(KTR_GEOM, "g_down starting bp %p provider %s off %ld " 750 "len %ld", bp, bp->bio_to->name, bp->bio_offset, 751 bp->bio_length); 752 bp->bio_to->geom->start(bp); 753 THREAD_SLEEPING_OK(); 754 } 755} 756 757void 758bio_taskqueue(struct bio *bp, bio_task_t *func, void *arg) 759{ 760 bp->bio_task = func; 761 bp->bio_task_arg = arg; 762 /* 763 * The taskqueue is actually just a second queue off the "up" 764 * queue, so we use the same lock. 765 */ 766 g_bioq_lock(&g_bio_run_up); 767 KASSERT(!(bp->bio_flags & BIO_ONQUEUE), 768 ("Bio already on queue bp=%p target taskq", bp)); 769 bp->bio_flags |= BIO_ONQUEUE; 770 TAILQ_INSERT_TAIL(&g_bio_run_task.bio_queue, bp, bio_queue); 771 g_bio_run_task.bio_queue_length++; 772 wakeup(&g_wait_up); 773 g_bioq_unlock(&g_bio_run_up); 774} 775 776 777void 778g_io_schedule_up(struct thread *tp __unused) 779{ 780 struct bio *bp; 781 for(;;) { 782 g_bioq_lock(&g_bio_run_up); 783 bp = g_bioq_first(&g_bio_run_task); 784 if (bp != NULL) { 785 g_bioq_unlock(&g_bio_run_up); 786 THREAD_NO_SLEEPING(); 787 CTR1(KTR_GEOM, "g_up processing task bp %p", bp); 788 bp->bio_task(bp->bio_task_arg); 789 THREAD_SLEEPING_OK(); 790 continue; 791 } 792 bp = g_bioq_first(&g_bio_run_up); 793 if (bp != NULL) { 794 g_bioq_unlock(&g_bio_run_up); 795 THREAD_NO_SLEEPING(); 796 CTR4(KTR_GEOM, "g_up biodone bp %p provider %s off " 797 "%jd len %ld", bp, bp->bio_to->name, 798 bp->bio_offset, bp->bio_length); 799 biodone(bp); 800 THREAD_SLEEPING_OK(); 801 continue; 802 } 803 CTR0(KTR_GEOM, "g_up going to sleep"); 804 msleep(&g_wait_up, &g_bio_run_up.bio_queue_lock, 805 PRIBIO | PDROP, "-", 0); 806 } 807} 808 809void * 810g_read_data(struct g_consumer *cp, off_t offset, off_t length, int *error) 811{ 812 struct bio *bp; 813 void *ptr; 814 int errorc; 815 816 KASSERT(length > 0 && length >= cp->provider->sectorsize && 817 length <= MAXPHYS, ("g_read_data(): invalid length %jd", 818 (intmax_t)length)); 819 820 bp = g_alloc_bio(); 821 bp->bio_cmd = BIO_READ; 822 bp->bio_done = NULL; 823 bp->bio_offset = offset; 824 bp->bio_length = length; 825 ptr = g_malloc(length, M_WAITOK); 826 bp->bio_data = ptr; 827 g_io_request(bp, cp); 828 errorc = biowait(bp, "gread"); 829 if (error != NULL) 830 *error = errorc; 831 g_destroy_bio(bp); 832 if (errorc) { 833 g_free(ptr); 834 ptr = NULL; 835 } 836 return (ptr); 837} 838 839int 840g_write_data(struct g_consumer *cp, off_t offset, void *ptr, off_t length) 841{ 842 struct bio *bp; 843 int error; 844 845 KASSERT(length > 0 && length >= cp->provider->sectorsize && 846 length <= MAXPHYS, ("g_write_data(): invalid length %jd", 847 (intmax_t)length)); 848 849 bp = g_alloc_bio(); 850 bp->bio_cmd = BIO_WRITE; 851 bp->bio_done = NULL; 852 bp->bio_offset = offset; 853 bp->bio_length = length; 854 bp->bio_data = ptr; 855 g_io_request(bp, cp); 856 error = biowait(bp, "gwrite"); 857 g_destroy_bio(bp); 858 return (error); 859} 860 861int 862g_delete_data(struct g_consumer *cp, off_t offset, off_t length) 863{ 864 struct bio *bp; 865 int error; 866 867 KASSERT(length > 0 && length >= cp->provider->sectorsize, 868 ("g_delete_data(): invalid length %jd", (intmax_t)length)); 869 870 bp = g_alloc_bio(); 871 bp->bio_cmd = BIO_DELETE; 872 bp->bio_done = NULL; 873 bp->bio_offset = offset; 874 bp->bio_length = length; 875 bp->bio_data = NULL; 876 g_io_request(bp, cp); 877 error = biowait(bp, "gdelete"); 878 g_destroy_bio(bp); 879 return (error); 880} 881 882void 883g_print_bio(struct bio *bp) 884{ 885 const char *pname, *cmd = NULL; 886 887 if (bp->bio_to != NULL) 888 pname = bp->bio_to->name; 889 else 890 pname = "[unknown]"; 891 892 switch (bp->bio_cmd) { 893 case BIO_GETATTR: 894 cmd = "GETATTR"; 895 printf("%s[%s(attr=%s)]", pname, cmd, bp->bio_attribute); 896 return; 897 case BIO_FLUSH: 898 cmd = "FLUSH"; 899 printf("%s[%s]", pname, cmd); 900 return; 901 case BIO_READ: 902 cmd = "READ"; 903 break; 904 case BIO_WRITE: 905 cmd = "WRITE"; 906 break; 907 case BIO_DELETE: 908 cmd = "DELETE"; 909 break; 910 default: 911 cmd = "UNKNOWN"; 912 printf("%s[%s()]", pname, cmd); 913 return; 914 } 915 printf("%s[%s(offset=%jd, length=%jd)]", pname, cmd, 916 (intmax_t)bp->bio_offset, (intmax_t)bp->bio_length); 917} 918