zio.c (209261) | zio.c (209962) |
---|---|
1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE --- 5 unchanged lines hidden (view full) --- 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* | 1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE --- 5 unchanged lines hidden (view full) --- 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* |
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. | 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. |
23 * Use is subject to license terms. 24 */ 25 26#include <sys/zfs_context.h> 27#include <sys/fm/fs/zfs.h> 28#include <sys/spa.h> 29#include <sys/txg.h> 30#include <sys/spa_impl.h> --- 40 unchanged lines hidden (view full) --- 71#define SYNC_PASS_REWRITE 1 /* rewrite new bps after this pass */ 72 73/* 74 * ========================================================================== 75 * I/O kmem caches 76 * ========================================================================== 77 */ 78kmem_cache_t *zio_cache; | 23 * Use is subject to license terms. 24 */ 25 26#include <sys/zfs_context.h> 27#include <sys/fm/fs/zfs.h> 28#include <sys/spa.h> 29#include <sys/txg.h> 30#include <sys/spa_impl.h> --- 40 unchanged lines hidden (view full) --- 71#define SYNC_PASS_REWRITE 1 /* rewrite new bps after this pass */ 72 73/* 74 * ========================================================================== 75 * I/O kmem caches 76 * ========================================================================== 77 */ 78kmem_cache_t *zio_cache; |
79kmem_cache_t *zio_link_cache; |
|
79kmem_cache_t *zio_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT]; 80kmem_cache_t *zio_data_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT]; 81 82#ifdef _KERNEL 83extern vmem_t *zio_alloc_arena; 84#endif 85 86/* 87 * An allocating zio is one that either currently has the DVA allocate 88 * stage set or will have it later in its lifetime. 89 */ 90#define IO_IS_ALLOCATING(zio) \ 91 ((zio)->io_orig_pipeline & (1U << ZIO_STAGE_DVA_ALLOCATE)) 92 93void 94zio_init(void) 95{ 96 size_t c; | 80kmem_cache_t *zio_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT]; 81kmem_cache_t *zio_data_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT]; 82 83#ifdef _KERNEL 84extern vmem_t *zio_alloc_arena; 85#endif 86 87/* 88 * An allocating zio is one that either currently has the DVA allocate 89 * stage set or will have it later in its lifetime. 90 */ 91#define IO_IS_ALLOCATING(zio) \ 92 ((zio)->io_orig_pipeline & (1U << ZIO_STAGE_DVA_ALLOCATE)) 93 94void 95zio_init(void) 96{ 97 size_t c; |
97 zio_cache = kmem_cache_create("zio_cache", sizeof (zio_t), 0, 98 NULL, NULL, NULL, NULL, NULL, 0); | 98 zio_cache = kmem_cache_create("zio_cache", 99 sizeof (zio_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 100 zio_link_cache = kmem_cache_create("zio_link_cache", 101 sizeof (zio_link_t), 0, NULL, NULL, NULL, NULL, NULL, 0); |
99 100 /* 101 * For small buffers, we want a cache for each multiple of 102 * SPA_MINBLOCKSIZE. For medium-size buffers, we want a cache 103 * for each quarter-power of 2. For large buffers, we want 104 * a cache for each multiple of PAGESIZE. 105 */ 106 for (c = 0; c < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT; c++) { --- 53 unchanged lines hidden (view full) --- 160 161 if (zio_data_buf_cache[c] != last_data_cache) { 162 last_data_cache = zio_data_buf_cache[c]; 163 kmem_cache_destroy(zio_data_buf_cache[c]); 164 } 165 zio_data_buf_cache[c] = NULL; 166 } 167 | 102 103 /* 104 * For small buffers, we want a cache for each multiple of 105 * SPA_MINBLOCKSIZE. For medium-size buffers, we want a cache 106 * for each quarter-power of 2. For large buffers, we want 107 * a cache for each multiple of PAGESIZE. 108 */ 109 for (c = 0; c < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT; c++) { --- 53 unchanged lines hidden (view full) --- 163 164 if (zio_data_buf_cache[c] != last_data_cache) { 165 last_data_cache = zio_data_buf_cache[c]; 166 kmem_cache_destroy(zio_data_buf_cache[c]); 167 } 168 zio_data_buf_cache[c] = NULL; 169 } 170 |
171 kmem_cache_destroy(zio_link_cache); |
|
168 kmem_cache_destroy(zio_cache); 169 170 zio_inject_fini(); 171} 172 173/* 174 * ========================================================================== 175 * Allocate and free I/O buffers --- 130 unchanged lines hidden (view full) --- 306 zio->io_error = EIO; 307} 308 309/* 310 * ========================================================================== 311 * I/O parent/child relationships and pipeline interlocks 312 * ========================================================================== 313 */ | 172 kmem_cache_destroy(zio_cache); 173 174 zio_inject_fini(); 175} 176 177/* 178 * ========================================================================== 179 * Allocate and free I/O buffers --- 130 unchanged lines hidden (view full) --- 310 zio->io_error = EIO; 311} 312 313/* 314 * ========================================================================== 315 * I/O parent/child relationships and pipeline interlocks 316 * ========================================================================== 317 */ |
318/* 319 * NOTE - Callers to zio_walk_parents() and zio_walk_children must 320 * continue calling these functions until they return NULL. 321 * Otherwise, the next caller will pick up the list walk in 322 * some indeterminate state. (Otherwise every caller would 323 * have to pass in a cookie to keep the state represented by 324 * io_walk_link, which gets annoying.) 325 */ 326zio_t * 327zio_walk_parents(zio_t *cio) 328{ 329 zio_link_t *zl = cio->io_walk_link; 330 list_t *pl = &cio->io_parent_list; |
|
314 | 331 |
315static void 316zio_add_child(zio_t *pio, zio_t *zio) | 332 zl = (zl == NULL) ? list_head(pl) : list_next(pl, zl); 333 cio->io_walk_link = zl; 334 335 if (zl == NULL) 336 return (NULL); 337 338 ASSERT(zl->zl_child == cio); 339 return (zl->zl_parent); 340} 341 342zio_t * 343zio_walk_children(zio_t *pio) |
317{ | 344{ |
345 zio_link_t *zl = pio->io_walk_link; 346 list_t *cl = &pio->io_child_list; 347 348 zl = (zl == NULL) ? list_head(cl) : list_next(cl, zl); 349 pio->io_walk_link = zl; 350 351 if (zl == NULL) 352 return (NULL); 353 354 ASSERT(zl->zl_parent == pio); 355 return (zl->zl_child); 356} 357 358zio_t * 359zio_unique_parent(zio_t *cio) 360{ 361 zio_t *pio = zio_walk_parents(cio); 362 363 VERIFY(zio_walk_parents(cio) == NULL); 364 return (pio); 365} 366 367void 368zio_add_child(zio_t *pio, zio_t *cio) 369{ 370 zio_link_t *zl = kmem_cache_alloc(zio_link_cache, KM_SLEEP); 371 372 /* 373 * Logical I/Os can have logical, gang, or vdev children. 374 * Gang I/Os can have gang or vdev children. 375 * Vdev I/Os can only have vdev children. 376 * The following ASSERT captures all of these constraints. 377 */ 378 ASSERT(cio->io_child_type <= pio->io_child_type); 379 380 zl->zl_parent = pio; 381 zl->zl_child = cio; 382 383 mutex_enter(&cio->io_lock); |
|
318 mutex_enter(&pio->io_lock); | 384 mutex_enter(&pio->io_lock); |
319 if (zio->io_stage < ZIO_STAGE_READY) 320 pio->io_children[zio->io_child_type][ZIO_WAIT_READY]++; 321 if (zio->io_stage < ZIO_STAGE_DONE) 322 pio->io_children[zio->io_child_type][ZIO_WAIT_DONE]++; 323 zio->io_sibling_prev = NULL; 324 zio->io_sibling_next = pio->io_child; 325 if (pio->io_child != NULL) 326 pio->io_child->io_sibling_prev = zio; 327 pio->io_child = zio; 328 zio->io_parent = pio; | 385 386 ASSERT(pio->io_state[ZIO_WAIT_DONE] == 0); 387 388 for (int w = 0; w < ZIO_WAIT_TYPES; w++) 389 pio->io_children[cio->io_child_type][w] += !cio->io_state[w]; 390 391 list_insert_head(&pio->io_child_list, zl); 392 list_insert_head(&cio->io_parent_list, zl); 393 |
329 mutex_exit(&pio->io_lock); | 394 mutex_exit(&pio->io_lock); |
395 mutex_exit(&cio->io_lock); |
|
330} 331 332static void | 396} 397 398static void |
333zio_remove_child(zio_t *pio, zio_t *zio) | 399zio_remove_child(zio_t *pio, zio_t *cio, zio_link_t *zl) |
334{ | 400{ |
335 zio_t *next, *prev; | 401 ASSERT(zl->zl_parent == pio); 402 ASSERT(zl->zl_child == cio); |
336 | 403 |
337 ASSERT(zio->io_parent == pio); 338 | 404 mutex_enter(&cio->io_lock); |
339 mutex_enter(&pio->io_lock); | 405 mutex_enter(&pio->io_lock); |
340 next = zio->io_sibling_next; 341 prev = zio->io_sibling_prev; 342 if (next != NULL) 343 next->io_sibling_prev = prev; 344 if (prev != NULL) 345 prev->io_sibling_next = next; 346 if (pio->io_child == zio) 347 pio->io_child = next; | 406 407 list_remove(&pio->io_child_list, zl); 408 list_remove(&cio->io_parent_list, zl); 409 |
348 mutex_exit(&pio->io_lock); | 410 mutex_exit(&pio->io_lock); |
411 mutex_exit(&cio->io_lock); 412 413 kmem_cache_free(zio_link_cache, zl); |
|
349} 350 351static boolean_t 352zio_wait_for_children(zio_t *zio, enum zio_child child, enum zio_wait_type wait) 353{ 354 uint64_t *countp = &zio->io_children[child][wait]; 355 boolean_t waiting = B_FALSE; 356 --- 58 unchanged lines hidden (view full) --- 415 ASSERT(vd || stage == ZIO_STAGE_OPEN); 416 417 zio = kmem_cache_alloc(zio_cache, KM_SLEEP); 418 bzero(zio, sizeof (zio_t)); 419 420 mutex_init(&zio->io_lock, NULL, MUTEX_DEFAULT, NULL); 421 cv_init(&zio->io_cv, NULL, CV_DEFAULT, NULL); 422 | 414} 415 416static boolean_t 417zio_wait_for_children(zio_t *zio, enum zio_child child, enum zio_wait_type wait) 418{ 419 uint64_t *countp = &zio->io_children[child][wait]; 420 boolean_t waiting = B_FALSE; 421 --- 58 unchanged lines hidden (view full) --- 480 ASSERT(vd || stage == ZIO_STAGE_OPEN); 481 482 zio = kmem_cache_alloc(zio_cache, KM_SLEEP); 483 bzero(zio, sizeof (zio_t)); 484 485 mutex_init(&zio->io_lock, NULL, MUTEX_DEFAULT, NULL); 486 cv_init(&zio->io_cv, NULL, CV_DEFAULT, NULL); 487 |
488 list_create(&zio->io_parent_list, sizeof (zio_link_t), 489 offsetof(zio_link_t, zl_parent_node)); 490 list_create(&zio->io_child_list, sizeof (zio_link_t), 491 offsetof(zio_link_t, zl_child_node)); 492 |
|
423 if (vd != NULL) 424 zio->io_child_type = ZIO_CHILD_VDEV; 425 else if (flags & ZIO_FLAG_GANG_CHILD) 426 zio->io_child_type = ZIO_CHILD_GANG; 427 else 428 zio->io_child_type = ZIO_CHILD_LOGICAL; 429 430 if (bp != NULL) { 431 zio->io_bp = bp; 432 zio->io_bp_copy = *bp; 433 zio->io_bp_orig = *bp; 434 if (type != ZIO_TYPE_WRITE) 435 zio->io_bp = &zio->io_bp_copy; /* so caller can free */ | 493 if (vd != NULL) 494 zio->io_child_type = ZIO_CHILD_VDEV; 495 else if (flags & ZIO_FLAG_GANG_CHILD) 496 zio->io_child_type = ZIO_CHILD_GANG; 497 else 498 zio->io_child_type = ZIO_CHILD_LOGICAL; 499 500 if (bp != NULL) { 501 zio->io_bp = bp; 502 zio->io_bp_copy = *bp; 503 zio->io_bp_orig = *bp; 504 if (type != ZIO_TYPE_WRITE) 505 zio->io_bp = &zio->io_bp_copy; /* so caller can free */ |
436 if (zio->io_child_type == ZIO_CHILD_LOGICAL) { 437 if (BP_IS_GANG(bp)) 438 pipeline |= ZIO_GANG_STAGES; | 506 if (zio->io_child_type == ZIO_CHILD_LOGICAL) |
439 zio->io_logical = zio; | 507 zio->io_logical = zio; |
440 } | 508 if (zio->io_child_type > ZIO_CHILD_GANG && BP_IS_GANG(bp)) 509 pipeline |= ZIO_GANG_STAGES; |
441 } 442 443 zio->io_spa = spa; 444 zio->io_txg = txg; 445 zio->io_data = data; 446 zio->io_size = size; 447 zio->io_done = done; 448 zio->io_private = private; 449 zio->io_type = type; 450 zio->io_priority = priority; 451 zio->io_vd = vd; 452 zio->io_offset = offset; 453 zio->io_orig_flags = zio->io_flags = flags; 454 zio->io_orig_stage = zio->io_stage = stage; 455 zio->io_orig_pipeline = zio->io_pipeline = pipeline; 456 | 510 } 511 512 zio->io_spa = spa; 513 zio->io_txg = txg; 514 zio->io_data = data; 515 zio->io_size = size; 516 zio->io_done = done; 517 zio->io_private = private; 518 zio->io_type = type; 519 zio->io_priority = priority; 520 zio->io_vd = vd; 521 zio->io_offset = offset; 522 zio->io_orig_flags = zio->io_flags = flags; 523 zio->io_orig_stage = zio->io_stage = stage; 524 zio->io_orig_pipeline = zio->io_pipeline = pipeline; 525 |
526 zio->io_state[ZIO_WAIT_READY] = (stage >= ZIO_STAGE_READY); 527 zio->io_state[ZIO_WAIT_DONE] = (stage >= ZIO_STAGE_DONE); 528 |
|
457 if (zb != NULL) 458 zio->io_bookmark = *zb; 459 460 if (pio != NULL) { | 529 if (zb != NULL) 530 zio->io_bookmark = *zb; 531 532 if (pio != NULL) { |
461 /* 462 * Logical I/Os can have logical, gang, or vdev children. 463 * Gang I/Os can have gang or vdev children. 464 * Vdev I/Os can only have vdev children. 465 * The following ASSERT captures all of these constraints. 466 */ 467 ASSERT(zio->io_child_type <= pio->io_child_type); | |
468 if (zio->io_logical == NULL) 469 zio->io_logical = pio->io_logical; | 533 if (zio->io_logical == NULL) 534 zio->io_logical = pio->io_logical; |
535 if (zio->io_child_type == ZIO_CHILD_GANG) 536 zio->io_gang_leader = pio->io_gang_leader; |
|
470 zio_add_child(pio, zio); 471 } 472 473 return (zio); 474} 475 476static void 477zio_destroy(zio_t *zio) 478{ | 537 zio_add_child(pio, zio); 538 } 539 540 return (zio); 541} 542 543static void 544zio_destroy(zio_t *zio) 545{ |
479 spa_t *spa = zio->io_spa; 480 uint8_t async_root = zio->io_async_root; 481 | 546 list_destroy(&zio->io_parent_list); 547 list_destroy(&zio->io_child_list); |
482 mutex_destroy(&zio->io_lock); 483 cv_destroy(&zio->io_cv); 484 kmem_cache_free(zio_cache, zio); | 548 mutex_destroy(&zio->io_lock); 549 cv_destroy(&zio->io_cv); 550 kmem_cache_free(zio_cache, zio); |
485 486 if (async_root) { 487 mutex_enter(&spa->spa_async_root_lock); 488 if (--spa->spa_async_root_count == 0) 489 cv_broadcast(&spa->spa_async_root_cv); 490 mutex_exit(&spa->spa_async_root_lock); 491 } | |
492} 493 494zio_t * | 551} 552 553zio_t * |
495zio_null(zio_t *pio, spa_t *spa, zio_done_func_t *done, void *private, 496 int flags) | 554zio_null(zio_t *pio, spa_t *spa, vdev_t *vd, zio_done_func_t *done, 555 void *private, int flags) |
497{ 498 zio_t *zio; 499 500 zio = zio_create(pio, spa, 0, NULL, NULL, 0, done, private, | 556{ 557 zio_t *zio; 558 559 zio = zio_create(pio, spa, 0, NULL, NULL, 0, done, private, |
501 ZIO_TYPE_NULL, ZIO_PRIORITY_NOW, flags, NULL, 0, NULL, | 560 ZIO_TYPE_NULL, ZIO_PRIORITY_NOW, flags, vd, 0, NULL, |
502 ZIO_STAGE_OPEN, ZIO_INTERLOCK_PIPELINE); 503 504 return (zio); 505} 506 507zio_t * 508zio_root(spa_t *spa, zio_done_func_t *done, void *private, int flags) 509{ | 561 ZIO_STAGE_OPEN, ZIO_INTERLOCK_PIPELINE); 562 563 return (zio); 564} 565 566zio_t * 567zio_root(spa_t *spa, zio_done_func_t *done, void *private, int flags) 568{ |
510 return (zio_null(NULL, spa, done, private, flags)); | 569 return (zio_null(NULL, spa, NULL, done, private, flags)); |
511} 512 513zio_t * 514zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, 515 void *data, uint64_t size, zio_done_func_t *done, void *private, 516 int priority, int flags, const zbookmark_t *zb) 517{ 518 zio_t *zio; --- 52 unchanged lines hidden (view full) --- 571zio_free(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, 572 zio_done_func_t *done, void *private, int flags) 573{ 574 zio_t *zio; 575 576 ASSERT(!BP_IS_HOLE(bp)); 577 578 if (bp->blk_fill == BLK_FILL_ALREADY_FREED) | 570} 571 572zio_t * 573zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, 574 void *data, uint64_t size, zio_done_func_t *done, void *private, 575 int priority, int flags, const zbookmark_t *zb) 576{ 577 zio_t *zio; --- 52 unchanged lines hidden (view full) --- 630zio_free(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, 631 zio_done_func_t *done, void *private, int flags) 632{ 633 zio_t *zio; 634 635 ASSERT(!BP_IS_HOLE(bp)); 636 637 if (bp->blk_fill == BLK_FILL_ALREADY_FREED) |
579 return (zio_null(pio, spa, NULL, NULL, flags)); | 638 return (zio_null(pio, spa, NULL, NULL, NULL, flags)); |
580 581 if (txg == spa->spa_syncing_txg && 582 spa_sync_pass(spa) > SYNC_PASS_DEFERRED_FREE) { 583 bplist_enqueue_deferred(&spa->spa_sync_bplist, bp); | 639 640 if (txg == spa->spa_syncing_txg && 641 spa_sync_pass(spa) > SYNC_PASS_DEFERRED_FREE) { 642 bplist_enqueue_deferred(&spa->spa_sync_bplist, bp); |
584 return (zio_null(pio, spa, NULL, NULL, flags)); | 643 return (zio_null(pio, spa, NULL, NULL, NULL, flags)); |
585 } 586 587 zio = zio_create(pio, spa, txg, bp, NULL, BP_GET_PSIZE(bp), 588 done, private, ZIO_TYPE_FREE, ZIO_PRIORITY_FREE, flags, 589 NULL, 0, NULL, ZIO_STAGE_OPEN, ZIO_FREE_PIPELINE); 590 591 return (zio); 592} --- 34 unchanged lines hidden (view full) --- 627 628 if (vd->vdev_children == 0) { 629 zio = zio_create(pio, spa, 0, NULL, NULL, 0, done, private, 630 ZIO_TYPE_IOCTL, priority, flags, vd, 0, NULL, 631 ZIO_STAGE_OPEN, ZIO_IOCTL_PIPELINE); 632 633 zio->io_cmd = cmd; 634 } else { | 644 } 645 646 zio = zio_create(pio, spa, txg, bp, NULL, BP_GET_PSIZE(bp), 647 done, private, ZIO_TYPE_FREE, ZIO_PRIORITY_FREE, flags, 648 NULL, 0, NULL, ZIO_STAGE_OPEN, ZIO_FREE_PIPELINE); 649 650 return (zio); 651} --- 34 unchanged lines hidden (view full) --- 686 687 if (vd->vdev_children == 0) { 688 zio = zio_create(pio, spa, 0, NULL, NULL, 0, done, private, 689 ZIO_TYPE_IOCTL, priority, flags, vd, 0, NULL, 690 ZIO_STAGE_OPEN, ZIO_IOCTL_PIPELINE); 691 692 zio->io_cmd = cmd; 693 } else { |
635 zio = zio_null(pio, spa, NULL, NULL, flags); | 694 zio = zio_null(pio, spa, NULL, NULL, NULL, flags); |
636 637 for (c = 0; c < vd->vdev_children; c++) 638 zio_nowait(zio_ioctl(zio, spa, vd->vdev_child[c], cmd, 639 done, private, priority, flags)); 640 } 641 642 return (zio); 643} --- 121 unchanged lines hidden (view full) --- 765 * ========================================================================== 766 */ 767 768static int 769zio_read_bp_init(zio_t *zio) 770{ 771 blkptr_t *bp = zio->io_bp; 772 | 695 696 for (c = 0; c < vd->vdev_children; c++) 697 zio_nowait(zio_ioctl(zio, spa, vd->vdev_child[c], cmd, 698 done, private, priority, flags)); 699 } 700 701 return (zio); 702} --- 121 unchanged lines hidden (view full) --- 824 * ========================================================================== 825 */ 826 827static int 828zio_read_bp_init(zio_t *zio) 829{ 830 blkptr_t *bp = zio->io_bp; 831 |
773 if (BP_GET_COMPRESS(bp) != ZIO_COMPRESS_OFF && zio->io_logical == zio) { | 832 if (BP_GET_COMPRESS(bp) != ZIO_COMPRESS_OFF && 833 zio->io_child_type == ZIO_CHILD_LOGICAL && 834 !(zio->io_flags & ZIO_FLAG_RAW)) { |
774 uint64_t csize = BP_GET_PSIZE(bp); 775 void *cbuf = zio_buf_alloc(csize); 776 777 zio_push_transform(zio, cbuf, csize, csize, zio_decompress); 778 } 779 780 if (!dmu_ot[BP_GET_TYPE(bp)].ot_metadata && BP_GET_LEVEL(bp) == 0) 781 zio->io_flags |= ZIO_FLAG_DONT_CACHE; --- 32 unchanged lines hidden (view full) --- 814 * working on behalf of spa_sync(). For spa_sync() to 815 * converge, it must eventually be the case that we don't 816 * have to allocate new blocks. But compression changes 817 * the blocksize, which forces a reallocate, and makes 818 * convergence take longer. Therefore, after the first 819 * few passes, stop compressing to ensure convergence. 820 */ 821 pass = spa_sync_pass(zio->io_spa); | 835 uint64_t csize = BP_GET_PSIZE(bp); 836 void *cbuf = zio_buf_alloc(csize); 837 838 zio_push_transform(zio, cbuf, csize, csize, zio_decompress); 839 } 840 841 if (!dmu_ot[BP_GET_TYPE(bp)].ot_metadata && BP_GET_LEVEL(bp) == 0) 842 zio->io_flags |= ZIO_FLAG_DONT_CACHE; --- 32 unchanged lines hidden (view full) --- 875 * working on behalf of spa_sync(). For spa_sync() to 876 * converge, it must eventually be the case that we don't 877 * have to allocate new blocks. But compression changes 878 * the blocksize, which forces a reallocate, and makes 879 * convergence take longer. Therefore, after the first 880 * few passes, stop compressing to ensure convergence. 881 */ 882 pass = spa_sync_pass(zio->io_spa); |
822 ASSERT(pass > 1); | |
823 824 if (pass > SYNC_PASS_DONT_COMPRESS) 825 compress = ZIO_COMPRESS_OFF; 826 | 883 884 if (pass > SYNC_PASS_DONT_COMPRESS) 885 compress = ZIO_COMPRESS_OFF; 886 |
827 /* 828 * Only MOS (objset 0) data should need to be rewritten. 829 */ 830 ASSERT(zio->io_logical->io_bookmark.zb_objset == 0); 831 | |
832 /* Make sure someone doesn't change their mind on overwrites */ 833 ASSERT(MIN(zp->zp_ndvas + BP_IS_GANG(bp), 834 spa_max_replication(zio->io_spa)) == BP_GET_NDVAS(bp)); 835 } 836 837 if (compress != ZIO_COMPRESS_OFF) { 838 if (!zio_compress_data(compress, zio->io_data, zio->io_size, 839 &cbuf, &csize, &cbufsize)) { --- 177 unchanged lines hidden (view full) --- 1017 return (error); 1018} 1019 1020void 1021zio_nowait(zio_t *zio) 1022{ 1023 ASSERT(zio->io_executor == NULL); 1024 | 887 /* Make sure someone doesn't change their mind on overwrites */ 888 ASSERT(MIN(zp->zp_ndvas + BP_IS_GANG(bp), 889 spa_max_replication(zio->io_spa)) == BP_GET_NDVAS(bp)); 890 } 891 892 if (compress != ZIO_COMPRESS_OFF) { 893 if (!zio_compress_data(compress, zio->io_data, zio->io_size, 894 &cbuf, &csize, &cbufsize)) { --- 177 unchanged lines hidden (view full) --- 1072 return (error); 1073} 1074 1075void 1076zio_nowait(zio_t *zio) 1077{ 1078 ASSERT(zio->io_executor == NULL); 1079 |
1025 if (zio->io_parent == NULL && zio->io_child_type == ZIO_CHILD_LOGICAL) { | 1080 if (zio->io_child_type == ZIO_CHILD_LOGICAL && 1081 zio_unique_parent(zio) == NULL) { |
1026 /* 1027 * This is a logical async I/O with no parent to wait for it. | 1082 /* 1083 * This is a logical async I/O with no parent to wait for it. |
1028 * Attach it to the pool's global async root zio so that 1029 * spa_unload() has a way of waiting for async I/O to finish. | 1084 * We add it to the spa_async_root_zio "Godfather" I/O which 1085 * will ensure they complete prior to unloading the pool. |
1030 */ 1031 spa_t *spa = zio->io_spa; | 1086 */ 1087 spa_t *spa = zio->io_spa; |
1032 zio->io_async_root = B_TRUE; 1033 mutex_enter(&spa->spa_async_root_lock); 1034 spa->spa_async_root_count++; 1035 mutex_exit(&spa->spa_async_root_lock); | 1088 1089 zio_add_child(spa->spa_async_zio_root, zio); |
1036 } 1037 1038 zio_execute(zio); 1039} 1040 1041/* 1042 * ========================================================================== 1043 * Reexecute or suspend/resume failed I/O 1044 * ========================================================================== 1045 */ 1046 1047static void 1048zio_reexecute(zio_t *pio) 1049{ | 1090 } 1091 1092 zio_execute(zio); 1093} 1094 1095/* 1096 * ========================================================================== 1097 * Reexecute or suspend/resume failed I/O 1098 * ========================================================================== 1099 */ 1100 1101static void 1102zio_reexecute(zio_t *pio) 1103{ |
1050 zio_t *zio, *zio_next; | 1104 zio_t *cio, *cio_next; |
1051 | 1105 |
1106 ASSERT(pio->io_child_type == ZIO_CHILD_LOGICAL); 1107 ASSERT(pio->io_orig_stage == ZIO_STAGE_OPEN); 1108 ASSERT(pio->io_gang_leader == NULL); 1109 ASSERT(pio->io_gang_tree == NULL); 1110 |
|
1052 pio->io_flags = pio->io_orig_flags; 1053 pio->io_stage = pio->io_orig_stage; 1054 pio->io_pipeline = pio->io_orig_pipeline; 1055 pio->io_reexecute = 0; 1056 pio->io_error = 0; | 1111 pio->io_flags = pio->io_orig_flags; 1112 pio->io_stage = pio->io_orig_stage; 1113 pio->io_pipeline = pio->io_orig_pipeline; 1114 pio->io_reexecute = 0; 1115 pio->io_error = 0; |
1116 for (int w = 0; w < ZIO_WAIT_TYPES; w++) 1117 pio->io_state[w] = 0; |
|
1057 for (int c = 0; c < ZIO_CHILD_TYPES; c++) 1058 pio->io_child_error[c] = 0; 1059 1060 if (IO_IS_ALLOCATING(pio)) { 1061 /* 1062 * Remember the failed bp so that the io_ready() callback 1063 * can update its accounting upon reexecution. The block 1064 * was already freed in zio_done(); we indicate this with 1065 * a fill count of -1 so that zio_free() knows to skip it. 1066 */ 1067 blkptr_t *bp = pio->io_bp; 1068 ASSERT(bp->blk_birth == 0 || bp->blk_birth == pio->io_txg); 1069 bp->blk_fill = BLK_FILL_ALREADY_FREED; 1070 pio->io_bp_orig = *bp; 1071 BP_ZERO(bp); 1072 } 1073 1074 /* 1075 * As we reexecute pio's children, new children could be created. | 1118 for (int c = 0; c < ZIO_CHILD_TYPES; c++) 1119 pio->io_child_error[c] = 0; 1120 1121 if (IO_IS_ALLOCATING(pio)) { 1122 /* 1123 * Remember the failed bp so that the io_ready() callback 1124 * can update its accounting upon reexecution. The block 1125 * was already freed in zio_done(); we indicate this with 1126 * a fill count of -1 so that zio_free() knows to skip it. 1127 */ 1128 blkptr_t *bp = pio->io_bp; 1129 ASSERT(bp->blk_birth == 0 || bp->blk_birth == pio->io_txg); 1130 bp->blk_fill = BLK_FILL_ALREADY_FREED; 1131 pio->io_bp_orig = *bp; 1132 BP_ZERO(bp); 1133 } 1134 1135 /* 1136 * As we reexecute pio's children, new children could be created. |
1076 * New children go to the head of the io_child list, however, | 1137 * New children go to the head of pio's io_child_list, however, |
1077 * so we will (correctly) not reexecute them. The key is that | 1138 * so we will (correctly) not reexecute them. The key is that |
1078 * the remainder of the io_child list, from 'zio_next' onward, 1079 * cannot be affected by any side effects of reexecuting 'zio'. | 1139 * the remainder of pio's io_child_list, from 'cio_next' onward, 1140 * cannot be affected by any side effects of reexecuting 'cio'. |
1080 */ | 1141 */ |
1081 for (zio = pio->io_child; zio != NULL; zio = zio_next) { 1082 zio_next = zio->io_sibling_next; | 1142 for (cio = zio_walk_children(pio); cio != NULL; cio = cio_next) { 1143 cio_next = zio_walk_children(pio); |
1083 mutex_enter(&pio->io_lock); | 1144 mutex_enter(&pio->io_lock); |
1084 pio->io_children[zio->io_child_type][ZIO_WAIT_READY]++; 1085 pio->io_children[zio->io_child_type][ZIO_WAIT_DONE]++; | 1145 for (int w = 0; w < ZIO_WAIT_TYPES; w++) 1146 pio->io_children[cio->io_child_type][w]++; |
1086 mutex_exit(&pio->io_lock); | 1147 mutex_exit(&pio->io_lock); |
1087 zio_reexecute(zio); | 1148 zio_reexecute(cio); |
1088 } 1089 1090 /* 1091 * Now that all children have been reexecuted, execute the parent. | 1149 } 1150 1151 /* 1152 * Now that all children have been reexecuted, execute the parent. |
1153 * We don't reexecute "The Godfather" I/O here as it's the 1154 * responsibility of the caller to wait on him. |
|
1092 */ | 1155 */ |
1093 zio_execute(pio); | 1156 if (!(pio->io_flags & ZIO_FLAG_GODFATHER)) 1157 zio_execute(pio); |
1094} 1095 1096void 1097zio_suspend(spa_t *spa, zio_t *zio) 1098{ 1099 if (spa_get_failmode(spa) == ZIO_FAILURE_MODE_PANIC) 1100 fm_panic("Pool '%s' has encountered an uncorrectable I/O " 1101 "failure and the failure mode property for this pool " 1102 "is set to panic.", spa_name(spa)); 1103 1104 zfs_ereport_post(FM_EREPORT_ZFS_IO_FAILURE, spa, NULL, NULL, 0, 0); 1105 1106 mutex_enter(&spa->spa_suspend_lock); 1107 1108 if (spa->spa_suspend_zio_root == NULL) | 1158} 1159 1160void 1161zio_suspend(spa_t *spa, zio_t *zio) 1162{ 1163 if (spa_get_failmode(spa) == ZIO_FAILURE_MODE_PANIC) 1164 fm_panic("Pool '%s' has encountered an uncorrectable I/O " 1165 "failure and the failure mode property for this pool " 1166 "is set to panic.", spa_name(spa)); 1167 1168 zfs_ereport_post(FM_EREPORT_ZFS_IO_FAILURE, spa, NULL, NULL, 0, 0); 1169 1170 mutex_enter(&spa->spa_suspend_lock); 1171 1172 if (spa->spa_suspend_zio_root == NULL) |
1109 spa->spa_suspend_zio_root = zio_root(spa, NULL, NULL, 0); | 1173 spa->spa_suspend_zio_root = zio_root(spa, NULL, NULL, 1174 ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | 1175 ZIO_FLAG_GODFATHER); |
1110 1111 spa->spa_suspended = B_TRUE; 1112 1113 if (zio != NULL) { | 1176 1177 spa->spa_suspended = B_TRUE; 1178 1179 if (zio != NULL) { |
1180 ASSERT(!(zio->io_flags & ZIO_FLAG_GODFATHER)); |
|
1114 ASSERT(zio != spa->spa_suspend_zio_root); 1115 ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL); | 1181 ASSERT(zio != spa->spa_suspend_zio_root); 1182 ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL); |
1116 ASSERT(zio->io_parent == NULL); | 1183 ASSERT(zio_unique_parent(zio) == NULL); |
1117 ASSERT(zio->io_stage == ZIO_STAGE_DONE); 1118 zio_add_child(spa->spa_suspend_zio_root, zio); 1119 } 1120 1121 mutex_exit(&spa->spa_suspend_lock); 1122} 1123 | 1184 ASSERT(zio->io_stage == ZIO_STAGE_DONE); 1185 zio_add_child(spa->spa_suspend_zio_root, zio); 1186 } 1187 1188 mutex_exit(&spa->spa_suspend_lock); 1189} 1190 |
1124void | 1191int |
1125zio_resume(spa_t *spa) 1126{ | 1192zio_resume(spa_t *spa) 1193{ |
1127 zio_t *pio, *zio; | 1194 zio_t *pio; |
1128 1129 /* 1130 * Reexecute all previously suspended i/o. 1131 */ 1132 mutex_enter(&spa->spa_suspend_lock); 1133 spa->spa_suspended = B_FALSE; 1134 cv_broadcast(&spa->spa_suspend_cv); 1135 pio = spa->spa_suspend_zio_root; 1136 spa->spa_suspend_zio_root = NULL; 1137 mutex_exit(&spa->spa_suspend_lock); 1138 1139 if (pio == NULL) | 1195 1196 /* 1197 * Reexecute all previously suspended i/o. 1198 */ 1199 mutex_enter(&spa->spa_suspend_lock); 1200 spa->spa_suspended = B_FALSE; 1201 cv_broadcast(&spa->spa_suspend_cv); 1202 pio = spa->spa_suspend_zio_root; 1203 spa->spa_suspend_zio_root = NULL; 1204 mutex_exit(&spa->spa_suspend_lock); 1205 1206 if (pio == NULL) |
1140 return; | 1207 return (0); |
1141 | 1208 |
1142 while ((zio = pio->io_child) != NULL) { 1143 zio_remove_child(pio, zio); 1144 zio->io_parent = NULL; 1145 zio_reexecute(zio); 1146 } 1147 1148 ASSERT(pio->io_children[ZIO_CHILD_LOGICAL][ZIO_WAIT_DONE] == 0); 1149 1150 (void) zio_wait(pio); | 1209 zio_reexecute(pio); 1210 return (zio_wait(pio)); |
1151} 1152 1153void 1154zio_resume_wait(spa_t *spa) 1155{ 1156 mutex_enter(&spa->spa_suspend_lock); 1157 while (spa_suspended(spa)) 1158 cv_wait(&spa->spa_suspend_cv, &spa->spa_suspend_lock); --- 90 unchanged lines hidden (view full) --- 1249 * As we rewrite each gang header, the pipeline will compute 1250 * a new gang block header checksum for it; but no one will 1251 * compute a new data checksum, so we do that here. The one 1252 * exception is the gang leader: the pipeline already computed 1253 * its data checksum because that stage precedes gang assembly. 1254 * (Presently, nothing actually uses interior data checksums; 1255 * this is just good hygiene.) 1256 */ | 1211} 1212 1213void 1214zio_resume_wait(spa_t *spa) 1215{ 1216 mutex_enter(&spa->spa_suspend_lock); 1217 while (spa_suspended(spa)) 1218 cv_wait(&spa->spa_suspend_cv, &spa->spa_suspend_lock); --- 90 unchanged lines hidden (view full) --- 1309 * As we rewrite each gang header, the pipeline will compute 1310 * a new gang block header checksum for it; but no one will 1311 * compute a new data checksum, so we do that here. The one 1312 * exception is the gang leader: the pipeline already computed 1313 * its data checksum because that stage precedes gang assembly. 1314 * (Presently, nothing actually uses interior data checksums; 1315 * this is just good hygiene.) 1316 */ |
1257 if (gn != pio->io_logical->io_gang_tree) { | 1317 if (gn != pio->io_gang_leader->io_gang_tree) { |
1258 zio_checksum_compute(zio, BP_GET_CHECKSUM(bp), 1259 data, BP_GET_PSIZE(bp)); 1260 } 1261 } else { 1262 zio = zio_rewrite(pio, pio->io_spa, pio->io_txg, bp, 1263 data, BP_GET_PSIZE(bp), NULL, NULL, pio->io_priority, 1264 ZIO_GANG_CHILD_FLAGS(pio), &pio->io_bookmark); 1265 } --- 65 unchanged lines hidden (view full) --- 1331 1332 for (int g = 0; g < SPA_GBH_NBLKPTRS; g++) 1333 zio_gang_tree_free(&gn->gn_child[g]); 1334 1335 zio_gang_node_free(gnpp); 1336} 1337 1338static void | 1318 zio_checksum_compute(zio, BP_GET_CHECKSUM(bp), 1319 data, BP_GET_PSIZE(bp)); 1320 } 1321 } else { 1322 zio = zio_rewrite(pio, pio->io_spa, pio->io_txg, bp, 1323 data, BP_GET_PSIZE(bp), NULL, NULL, pio->io_priority, 1324 ZIO_GANG_CHILD_FLAGS(pio), &pio->io_bookmark); 1325 } --- 65 unchanged lines hidden (view full) --- 1391 1392 for (int g = 0; g < SPA_GBH_NBLKPTRS; g++) 1393 zio_gang_tree_free(&gn->gn_child[g]); 1394 1395 zio_gang_node_free(gnpp); 1396} 1397 1398static void |
1339zio_gang_tree_assemble(zio_t *lio, blkptr_t *bp, zio_gang_node_t **gnpp) | 1399zio_gang_tree_assemble(zio_t *gio, blkptr_t *bp, zio_gang_node_t **gnpp) |
1340{ 1341 zio_gang_node_t *gn = zio_gang_node_alloc(gnpp); 1342 | 1400{ 1401 zio_gang_node_t *gn = zio_gang_node_alloc(gnpp); 1402 |
1343 ASSERT(lio->io_logical == lio); | 1403 ASSERT(gio->io_gang_leader == gio); |
1344 ASSERT(BP_IS_GANG(bp)); 1345 | 1404 ASSERT(BP_IS_GANG(bp)); 1405 |
1346 zio_nowait(zio_read(lio, lio->io_spa, bp, gn->gn_gbh, | 1406 zio_nowait(zio_read(gio, gio->io_spa, bp, gn->gn_gbh, |
1347 SPA_GANGBLOCKSIZE, zio_gang_tree_assemble_done, gn, | 1407 SPA_GANGBLOCKSIZE, zio_gang_tree_assemble_done, gn, |
1348 lio->io_priority, ZIO_GANG_CHILD_FLAGS(lio), &lio->io_bookmark)); | 1408 gio->io_priority, ZIO_GANG_CHILD_FLAGS(gio), &gio->io_bookmark)); |
1349} 1350 1351static void 1352zio_gang_tree_assemble_done(zio_t *zio) 1353{ | 1409} 1410 1411static void 1412zio_gang_tree_assemble_done(zio_t *zio) 1413{ |
1354 zio_t *lio = zio->io_logical; | 1414 zio_t *gio = zio->io_gang_leader; |
1355 zio_gang_node_t *gn = zio->io_private; 1356 blkptr_t *bp = zio->io_bp; 1357 | 1415 zio_gang_node_t *gn = zio->io_private; 1416 blkptr_t *bp = zio->io_bp; 1417 |
1358 ASSERT(zio->io_parent == lio); 1359 ASSERT(zio->io_child == NULL); | 1418 ASSERT(gio == zio_unique_parent(zio)); 1419 ASSERT(zio_walk_children(zio) == NULL); |
1360 1361 if (zio->io_error) 1362 return; 1363 1364 if (BP_SHOULD_BYTESWAP(bp)) 1365 byteswap_uint64_array(zio->io_data, zio->io_size); 1366 1367 ASSERT(zio->io_data == gn->gn_gbh); 1368 ASSERT(zio->io_size == SPA_GANGBLOCKSIZE); 1369 ASSERT(gn->gn_gbh->zg_tail.zbt_magic == ZBT_MAGIC); 1370 1371 for (int g = 0; g < SPA_GBH_NBLKPTRS; g++) { 1372 blkptr_t *gbp = &gn->gn_gbh->zg_blkptr[g]; 1373 if (!BP_IS_GANG(gbp)) 1374 continue; | 1420 1421 if (zio->io_error) 1422 return; 1423 1424 if (BP_SHOULD_BYTESWAP(bp)) 1425 byteswap_uint64_array(zio->io_data, zio->io_size); 1426 1427 ASSERT(zio->io_data == gn->gn_gbh); 1428 ASSERT(zio->io_size == SPA_GANGBLOCKSIZE); 1429 ASSERT(gn->gn_gbh->zg_tail.zbt_magic == ZBT_MAGIC); 1430 1431 for (int g = 0; g < SPA_GBH_NBLKPTRS; g++) { 1432 blkptr_t *gbp = &gn->gn_gbh->zg_blkptr[g]; 1433 if (!BP_IS_GANG(gbp)) 1434 continue; |
1375 zio_gang_tree_assemble(lio, gbp, &gn->gn_child[g]); | 1435 zio_gang_tree_assemble(gio, gbp, &gn->gn_child[g]); |
1376 } 1377} 1378 1379static void 1380zio_gang_tree_issue(zio_t *pio, zio_gang_node_t *gn, blkptr_t *bp, void *data) 1381{ | 1436 } 1437} 1438 1439static void 1440zio_gang_tree_issue(zio_t *pio, zio_gang_node_t *gn, blkptr_t *bp, void *data) 1441{ |
1382 zio_t *lio = pio->io_logical; | 1442 zio_t *gio = pio->io_gang_leader; |
1383 zio_t *zio; 1384 1385 ASSERT(BP_IS_GANG(bp) == !!gn); | 1443 zio_t *zio; 1444 1445 ASSERT(BP_IS_GANG(bp) == !!gn); |
1386 ASSERT(BP_GET_CHECKSUM(bp) == BP_GET_CHECKSUM(lio->io_bp)); 1387 ASSERT(BP_GET_LSIZE(bp) == BP_GET_PSIZE(bp) || gn == lio->io_gang_tree); | 1446 ASSERT(BP_GET_CHECKSUM(bp) == BP_GET_CHECKSUM(gio->io_bp)); 1447 ASSERT(BP_GET_LSIZE(bp) == BP_GET_PSIZE(bp) || gn == gio->io_gang_tree); |
1388 1389 /* 1390 * If you're a gang header, your data is in gn->gn_gbh. 1391 * If you're a gang member, your data is in 'data' and gn == NULL. 1392 */ | 1448 1449 /* 1450 * If you're a gang header, your data is in gn->gn_gbh. 1451 * If you're a gang member, your data is in 'data' and gn == NULL. 1452 */ |
1393 zio = zio_gang_issue_func[lio->io_type](pio, bp, gn, data); | 1453 zio = zio_gang_issue_func[gio->io_type](pio, bp, gn, data); |
1394 1395 if (gn != NULL) { 1396 ASSERT(gn->gn_gbh->zg_tail.zbt_magic == ZBT_MAGIC); 1397 1398 for (int g = 0; g < SPA_GBH_NBLKPTRS; g++) { 1399 blkptr_t *gbp = &gn->gn_gbh->zg_blkptr[g]; 1400 if (BP_IS_HOLE(gbp)) 1401 continue; 1402 zio_gang_tree_issue(zio, gn->gn_child[g], gbp, data); 1403 data = (char *)data + BP_GET_PSIZE(gbp); 1404 } 1405 } 1406 | 1454 1455 if (gn != NULL) { 1456 ASSERT(gn->gn_gbh->zg_tail.zbt_magic == ZBT_MAGIC); 1457 1458 for (int g = 0; g < SPA_GBH_NBLKPTRS; g++) { 1459 blkptr_t *gbp = &gn->gn_gbh->zg_blkptr[g]; 1460 if (BP_IS_HOLE(gbp)) 1461 continue; 1462 zio_gang_tree_issue(zio, gn->gn_child[g], gbp, data); 1463 data = (char *)data + BP_GET_PSIZE(gbp); 1464 } 1465 } 1466 |
1407 if (gn == lio->io_gang_tree) 1408 ASSERT3P((char *)lio->io_data + lio->io_size, ==, data); | 1467 if (gn == gio->io_gang_tree) 1468 ASSERT3P((char *)gio->io_data + gio->io_size, ==, data); |
1409 1410 if (zio != pio) 1411 zio_nowait(zio); 1412} 1413 1414static int 1415zio_gang_assemble(zio_t *zio) 1416{ 1417 blkptr_t *bp = zio->io_bp; 1418 | 1469 1470 if (zio != pio) 1471 zio_nowait(zio); 1472} 1473 1474static int 1475zio_gang_assemble(zio_t *zio) 1476{ 1477 blkptr_t *bp = zio->io_bp; 1478 |
1419 ASSERT(BP_IS_GANG(bp) && zio == zio->io_logical); | 1479 ASSERT(BP_IS_GANG(bp) && zio->io_gang_leader == NULL); 1480 ASSERT(zio->io_child_type > ZIO_CHILD_GANG); |
1420 | 1481 |
1482 zio->io_gang_leader = zio; 1483 |
|
1421 zio_gang_tree_assemble(zio, bp, &zio->io_gang_tree); 1422 1423 return (ZIO_PIPELINE_CONTINUE); 1424} 1425 1426static int 1427zio_gang_issue(zio_t *zio) 1428{ | 1484 zio_gang_tree_assemble(zio, bp, &zio->io_gang_tree); 1485 1486 return (ZIO_PIPELINE_CONTINUE); 1487} 1488 1489static int 1490zio_gang_issue(zio_t *zio) 1491{ |
1429 zio_t *lio = zio->io_logical; | |
1430 blkptr_t *bp = zio->io_bp; 1431 1432 if (zio_wait_for_children(zio, ZIO_CHILD_GANG, ZIO_WAIT_DONE)) 1433 return (ZIO_PIPELINE_STOP); 1434 | 1492 blkptr_t *bp = zio->io_bp; 1493 1494 if (zio_wait_for_children(zio, ZIO_CHILD_GANG, ZIO_WAIT_DONE)) 1495 return (ZIO_PIPELINE_STOP); 1496 |
1435 ASSERT(BP_IS_GANG(bp) && zio == lio); | 1497 ASSERT(BP_IS_GANG(bp) && zio->io_gang_leader == zio); 1498 ASSERT(zio->io_child_type > ZIO_CHILD_GANG); |
1436 1437 if (zio->io_child_error[ZIO_CHILD_GANG] == 0) | 1499 1500 if (zio->io_child_error[ZIO_CHILD_GANG] == 0) |
1438 zio_gang_tree_issue(lio, lio->io_gang_tree, bp, lio->io_data); | 1501 zio_gang_tree_issue(zio, zio->io_gang_tree, bp, zio->io_data); |
1439 else | 1502 else |
1440 zio_gang_tree_free(&lio->io_gang_tree); | 1503 zio_gang_tree_free(&zio->io_gang_tree); |
1441 1442 zio->io_pipeline = ZIO_INTERLOCK_PIPELINE; 1443 1444 return (ZIO_PIPELINE_CONTINUE); 1445} 1446 1447static void 1448zio_write_gang_member_ready(zio_t *zio) 1449{ | 1504 1505 zio->io_pipeline = ZIO_INTERLOCK_PIPELINE; 1506 1507 return (ZIO_PIPELINE_CONTINUE); 1508} 1509 1510static void 1511zio_write_gang_member_ready(zio_t *zio) 1512{ |
1450 zio_t *pio = zio->io_parent; 1451 zio_t *lio = zio->io_logical; | 1513 zio_t *pio = zio_unique_parent(zio); 1514 zio_t *gio = zio->io_gang_leader; |
1452 dva_t *cdva = zio->io_bp->blk_dva; 1453 dva_t *pdva = pio->io_bp->blk_dva; 1454 uint64_t asize; 1455 1456 if (BP_IS_HOLE(zio->io_bp)) 1457 return; 1458 1459 ASSERT(BP_IS_HOLE(&zio->io_bp_orig)); 1460 1461 ASSERT(zio->io_child_type == ZIO_CHILD_GANG); | 1515 dva_t *cdva = zio->io_bp->blk_dva; 1516 dva_t *pdva = pio->io_bp->blk_dva; 1517 uint64_t asize; 1518 1519 if (BP_IS_HOLE(zio->io_bp)) 1520 return; 1521 1522 ASSERT(BP_IS_HOLE(&zio->io_bp_orig)); 1523 1524 ASSERT(zio->io_child_type == ZIO_CHILD_GANG); |
1462 ASSERT3U(zio->io_prop.zp_ndvas, ==, lio->io_prop.zp_ndvas); | 1525 ASSERT3U(zio->io_prop.zp_ndvas, ==, gio->io_prop.zp_ndvas); |
1463 ASSERT3U(zio->io_prop.zp_ndvas, <=, BP_GET_NDVAS(zio->io_bp)); 1464 ASSERT3U(pio->io_prop.zp_ndvas, <=, BP_GET_NDVAS(pio->io_bp)); 1465 ASSERT3U(BP_GET_NDVAS(zio->io_bp), <=, BP_GET_NDVAS(pio->io_bp)); 1466 1467 mutex_enter(&pio->io_lock); 1468 for (int d = 0; d < BP_GET_NDVAS(zio->io_bp); d++) { 1469 ASSERT(DVA_GET_GANG(&pdva[d])); 1470 asize = DVA_GET_ASIZE(&pdva[d]); 1471 asize += DVA_GET_ASIZE(&cdva[d]); 1472 DVA_SET_ASIZE(&pdva[d], asize); 1473 } 1474 mutex_exit(&pio->io_lock); 1475} 1476 1477static int 1478zio_write_gang_block(zio_t *pio) 1479{ 1480 spa_t *spa = pio->io_spa; 1481 blkptr_t *bp = pio->io_bp; | 1526 ASSERT3U(zio->io_prop.zp_ndvas, <=, BP_GET_NDVAS(zio->io_bp)); 1527 ASSERT3U(pio->io_prop.zp_ndvas, <=, BP_GET_NDVAS(pio->io_bp)); 1528 ASSERT3U(BP_GET_NDVAS(zio->io_bp), <=, BP_GET_NDVAS(pio->io_bp)); 1529 1530 mutex_enter(&pio->io_lock); 1531 for (int d = 0; d < BP_GET_NDVAS(zio->io_bp); d++) { 1532 ASSERT(DVA_GET_GANG(&pdva[d])); 1533 asize = DVA_GET_ASIZE(&pdva[d]); 1534 asize += DVA_GET_ASIZE(&cdva[d]); 1535 DVA_SET_ASIZE(&pdva[d], asize); 1536 } 1537 mutex_exit(&pio->io_lock); 1538} 1539 1540static int 1541zio_write_gang_block(zio_t *pio) 1542{ 1543 spa_t *spa = pio->io_spa; 1544 blkptr_t *bp = pio->io_bp; |
1482 zio_t *lio = pio->io_logical; | 1545 zio_t *gio = pio->io_gang_leader; |
1483 zio_t *zio; 1484 zio_gang_node_t *gn, **gnpp; 1485 zio_gbh_phys_t *gbh; 1486 uint64_t txg = pio->io_txg; 1487 uint64_t resid = pio->io_size; 1488 uint64_t lsize; | 1546 zio_t *zio; 1547 zio_gang_node_t *gn, **gnpp; 1548 zio_gbh_phys_t *gbh; 1549 uint64_t txg = pio->io_txg; 1550 uint64_t resid = pio->io_size; 1551 uint64_t lsize; |
1489 int ndvas = lio->io_prop.zp_ndvas; | 1552 int ndvas = gio->io_prop.zp_ndvas; |
1490 int gbh_ndvas = MIN(ndvas + 1, spa_max_replication(spa)); 1491 zio_prop_t zp; 1492 int error; 1493 1494 error = metaslab_alloc(spa, spa->spa_normal_class, SPA_GANGBLOCKSIZE, | 1553 int gbh_ndvas = MIN(ndvas + 1, spa_max_replication(spa)); 1554 zio_prop_t zp; 1555 int error; 1556 1557 error = metaslab_alloc(spa, spa->spa_normal_class, SPA_GANGBLOCKSIZE, |
1495 bp, gbh_ndvas, txg, pio == lio ? NULL : lio->io_bp, | 1558 bp, gbh_ndvas, txg, pio == gio ? NULL : gio->io_bp, |
1496 METASLAB_HINTBP_FAVOR | METASLAB_GANG_HEADER); 1497 if (error) { 1498 pio->io_error = error; 1499 return (ZIO_PIPELINE_CONTINUE); 1500 } 1501 | 1559 METASLAB_HINTBP_FAVOR | METASLAB_GANG_HEADER); 1560 if (error) { 1561 pio->io_error = error; 1562 return (ZIO_PIPELINE_CONTINUE); 1563 } 1564 |
1502 if (pio == lio) { 1503 gnpp = &lio->io_gang_tree; | 1565 if (pio == gio) { 1566 gnpp = &gio->io_gang_tree; |
1504 } else { 1505 gnpp = pio->io_private; 1506 ASSERT(pio->io_ready == zio_write_gang_member_ready); 1507 } 1508 1509 gn = zio_gang_node_alloc(gnpp); 1510 gbh = gn->gn_gbh; 1511 bzero(gbh, SPA_GANGBLOCKSIZE); --- 7 unchanged lines hidden (view full) --- 1519 /* 1520 * Create and nowait the gang children. 1521 */ 1522 for (int g = 0; resid != 0; resid -= lsize, g++) { 1523 lsize = P2ROUNDUP(resid / (SPA_GBH_NBLKPTRS - g), 1524 SPA_MINBLOCKSIZE); 1525 ASSERT(lsize >= SPA_MINBLOCKSIZE && lsize <= resid); 1526 | 1567 } else { 1568 gnpp = pio->io_private; 1569 ASSERT(pio->io_ready == zio_write_gang_member_ready); 1570 } 1571 1572 gn = zio_gang_node_alloc(gnpp); 1573 gbh = gn->gn_gbh; 1574 bzero(gbh, SPA_GANGBLOCKSIZE); --- 7 unchanged lines hidden (view full) --- 1582 /* 1583 * Create and nowait the gang children. 1584 */ 1585 for (int g = 0; resid != 0; resid -= lsize, g++) { 1586 lsize = P2ROUNDUP(resid / (SPA_GBH_NBLKPTRS - g), 1587 SPA_MINBLOCKSIZE); 1588 ASSERT(lsize >= SPA_MINBLOCKSIZE && lsize <= resid); 1589 |
1527 zp.zp_checksum = lio->io_prop.zp_checksum; | 1590 zp.zp_checksum = gio->io_prop.zp_checksum; |
1528 zp.zp_compress = ZIO_COMPRESS_OFF; 1529 zp.zp_type = DMU_OT_NONE; 1530 zp.zp_level = 0; | 1591 zp.zp_compress = ZIO_COMPRESS_OFF; 1592 zp.zp_type = DMU_OT_NONE; 1593 zp.zp_level = 0; |
1531 zp.zp_ndvas = lio->io_prop.zp_ndvas; | 1594 zp.zp_ndvas = gio->io_prop.zp_ndvas; |
1532 1533 zio_nowait(zio_write(zio, spa, txg, &gbh->zg_blkptr[g], 1534 (char *)pio->io_data + (pio->io_size - resid), lsize, &zp, 1535 zio_write_gang_member_ready, NULL, &gn->gn_child[g], 1536 pio->io_priority, ZIO_GANG_CHILD_FLAGS(pio), 1537 &pio->io_bookmark)); 1538 } 1539 --- 16 unchanged lines hidden (view full) --- 1556static int 1557zio_dva_allocate(zio_t *zio) 1558{ 1559 spa_t *spa = zio->io_spa; 1560 metaslab_class_t *mc = spa->spa_normal_class; 1561 blkptr_t *bp = zio->io_bp; 1562 int error; 1563 | 1595 1596 zio_nowait(zio_write(zio, spa, txg, &gbh->zg_blkptr[g], 1597 (char *)pio->io_data + (pio->io_size - resid), lsize, &zp, 1598 zio_write_gang_member_ready, NULL, &gn->gn_child[g], 1599 pio->io_priority, ZIO_GANG_CHILD_FLAGS(pio), 1600 &pio->io_bookmark)); 1601 } 1602 --- 16 unchanged lines hidden (view full) --- 1619static int 1620zio_dva_allocate(zio_t *zio) 1621{ 1622 spa_t *spa = zio->io_spa; 1623 metaslab_class_t *mc = spa->spa_normal_class; 1624 blkptr_t *bp = zio->io_bp; 1625 int error; 1626 |
1627 if (zio->io_gang_leader == NULL) { 1628 ASSERT(zio->io_child_type > ZIO_CHILD_GANG); 1629 zio->io_gang_leader = zio; 1630 } 1631 |
|
1564 ASSERT(BP_IS_HOLE(bp)); 1565 ASSERT3U(BP_GET_NDVAS(bp), ==, 0); 1566 ASSERT3U(zio->io_prop.zp_ndvas, >, 0); 1567 ASSERT3U(zio->io_prop.zp_ndvas, <=, spa_max_replication(spa)); 1568 ASSERT3U(zio->io_size, ==, BP_GET_PSIZE(bp)); 1569 1570 error = metaslab_alloc(spa, mc, zio->io_size, bp, 1571 zio->io_prop.zp_ndvas, zio->io_txg, NULL, 0); --- 115 unchanged lines hidden (view full) --- 1687 metaslab_free(spa, bp, txg, B_FALSE); 1688} 1689 1690/* 1691 * ========================================================================== 1692 * Read and write to physical devices 1693 * ========================================================================== 1694 */ | 1632 ASSERT(BP_IS_HOLE(bp)); 1633 ASSERT3U(BP_GET_NDVAS(bp), ==, 0); 1634 ASSERT3U(zio->io_prop.zp_ndvas, >, 0); 1635 ASSERT3U(zio->io_prop.zp_ndvas, <=, spa_max_replication(spa)); 1636 ASSERT3U(zio->io_size, ==, BP_GET_PSIZE(bp)); 1637 1638 error = metaslab_alloc(spa, mc, zio->io_size, bp, 1639 zio->io_prop.zp_ndvas, zio->io_txg, NULL, 0); --- 115 unchanged lines hidden (view full) --- 1755 metaslab_free(spa, bp, txg, B_FALSE); 1756} 1757 1758/* 1759 * ========================================================================== 1760 * Read and write to physical devices 1761 * ========================================================================== 1762 */ |
1695 1696static void 1697zio_vdev_io_probe_done(zio_t *zio) 1698{ 1699 zio_t *dio; 1700 vdev_t *vd = zio->io_private; 1701 1702 mutex_enter(&vd->vdev_probe_lock); 1703 ASSERT(vd->vdev_probe_zio == zio); 1704 vd->vdev_probe_zio = NULL; 1705 mutex_exit(&vd->vdev_probe_lock); 1706 1707 while ((dio = zio->io_delegate_list) != NULL) { 1708 zio->io_delegate_list = dio->io_delegate_next; 1709 dio->io_delegate_next = NULL; 1710 if (!vdev_accessible(vd, dio)) 1711 dio->io_error = ENXIO; 1712 zio_execute(dio); 1713 } 1714} 1715 1716/* 1717 * Probe the device to determine whether I/O failure is specific to this 1718 * zio (e.g. a bad sector) or affects the entire vdev (e.g. unplugged). 1719 */ | |
1720static int | 1763static int |
1721zio_vdev_io_probe(zio_t *zio) 1722{ 1723 vdev_t *vd = zio->io_vd; 1724 zio_t *pio = NULL; 1725 boolean_t created_pio = B_FALSE; 1726 1727 /* 1728 * Don't probe the probe. 1729 */ 1730 if (zio->io_flags & ZIO_FLAG_PROBE) 1731 return (ZIO_PIPELINE_CONTINUE); 1732 1733 /* 1734 * To prevent 'probe storms' when a device fails, we create 1735 * just one probe i/o at a time. All zios that want to probe 1736 * this vdev will join the probe zio's io_delegate_list. 1737 */ 1738 mutex_enter(&vd->vdev_probe_lock); 1739 1740 if ((pio = vd->vdev_probe_zio) == NULL) { 1741 vd->vdev_probe_zio = pio = zio_root(zio->io_spa, 1742 zio_vdev_io_probe_done, vd, ZIO_FLAG_CANFAIL); 1743 created_pio = B_TRUE; 1744 vd->vdev_probe_wanted = B_TRUE; 1745 spa_async_request(zio->io_spa, SPA_ASYNC_PROBE); 1746 } 1747 1748 zio->io_delegate_next = pio->io_delegate_list; 1749 pio->io_delegate_list = zio; 1750 1751 mutex_exit(&vd->vdev_probe_lock); 1752 1753 if (created_pio) { 1754 zio_nowait(vdev_probe(vd, pio)); 1755 zio_nowait(pio); 1756 } 1757 1758 return (ZIO_PIPELINE_STOP); 1759} 1760 1761static int | |
1762zio_vdev_io_start(zio_t *zio) 1763{ 1764 vdev_t *vd = zio->io_vd; 1765 uint64_t align; 1766 spa_t *spa = zio->io_spa; 1767 1768 ASSERT(zio->io_error == 0); 1769 ASSERT(zio->io_child_error[ZIO_CHILD_VDEV] == 0); --- 18 unchanged lines hidden (view full) --- 1788 bcopy(zio->io_data, abuf, zio->io_size); 1789 bzero(abuf + zio->io_size, asize - zio->io_size); 1790 } 1791 zio_push_transform(zio, abuf, asize, asize, zio_subblock); 1792 } 1793 1794 ASSERT(P2PHASE(zio->io_offset, align) == 0); 1795 ASSERT(P2PHASE(zio->io_size, align) == 0); | 1764zio_vdev_io_start(zio_t *zio) 1765{ 1766 vdev_t *vd = zio->io_vd; 1767 uint64_t align; 1768 spa_t *spa = zio->io_spa; 1769 1770 ASSERT(zio->io_error == 0); 1771 ASSERT(zio->io_child_error[ZIO_CHILD_VDEV] == 0); --- 18 unchanged lines hidden (view full) --- 1790 bcopy(zio->io_data, abuf, zio->io_size); 1791 bzero(abuf + zio->io_size, asize - zio->io_size); 1792 } 1793 zio_push_transform(zio, abuf, asize, asize, zio_subblock); 1794 } 1795 1796 ASSERT(P2PHASE(zio->io_offset, align) == 0); 1797 ASSERT(P2PHASE(zio->io_size, align) == 0); |
1796 ASSERT(zio->io_type != ZIO_TYPE_WRITE || (spa_mode & FWRITE)); | 1798 ASSERT(zio->io_type != ZIO_TYPE_WRITE || spa_writeable(spa)); |
1797 | 1799 |
1800 /* 1801 * If this is a repair I/O, and there's no self-healing involved -- 1802 * that is, we're just resilvering what we expect to resilver -- 1803 * then don't do the I/O unless zio's txg is actually in vd's DTL. 1804 * This prevents spurious resilvering with nested replication. 1805 * For example, given a mirror of mirrors, (A+B)+(C+D), if only 1806 * A is out of date, we'll read from C+D, then use the data to 1807 * resilver A+B -- but we don't actually want to resilver B, just A. 1808 * The top-level mirror has no way to know this, so instead we just 1809 * discard unnecessary repairs as we work our way down the vdev tree. 1810 * The same logic applies to any form of nested replication: 1811 * ditto + mirror, RAID-Z + replacing, etc. This covers them all. 1812 */ 1813 if ((zio->io_flags & ZIO_FLAG_IO_REPAIR) && 1814 !(zio->io_flags & ZIO_FLAG_SELF_HEAL) && 1815 zio->io_txg != 0 && /* not a delegated i/o */ 1816 !vdev_dtl_contains(vd, DTL_PARTIAL, zio->io_txg, 1)) { 1817 ASSERT(zio->io_type == ZIO_TYPE_WRITE); 1818 zio_vdev_io_bypass(zio); 1819 return (ZIO_PIPELINE_CONTINUE); 1820 } 1821 |
|
1798 if (vd->vdev_ops->vdev_op_leaf && 1799 (zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE)) { 1800 1801 if (zio->io_type == ZIO_TYPE_READ && vdev_cache_read(zio) == 0) | 1822 if (vd->vdev_ops->vdev_op_leaf && 1823 (zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE)) { 1824 1825 if (zio->io_type == ZIO_TYPE_READ && vdev_cache_read(zio) == 0) |
1802 return (ZIO_PIPELINE_STOP); | 1826 return (ZIO_PIPELINE_CONTINUE); |
1803 1804 if ((zio = vdev_queue_io(zio)) == NULL) 1805 return (ZIO_PIPELINE_STOP); 1806 1807 if (!vdev_accessible(vd, zio)) { 1808 zio->io_error = ENXIO; 1809 zio_interrupt(zio); 1810 return (ZIO_PIPELINE_STOP); 1811 } | 1827 1828 if ((zio = vdev_queue_io(zio)) == NULL) 1829 return (ZIO_PIPELINE_STOP); 1830 1831 if (!vdev_accessible(vd, zio)) { 1832 zio->io_error = ENXIO; 1833 zio_interrupt(zio); 1834 return (ZIO_PIPELINE_STOP); 1835 } |
1812 | |
1813 } 1814 1815 return (vd->vdev_ops->vdev_op_io_start(zio)); 1816} 1817 1818static int 1819zio_vdev_io_done(zio_t *zio) 1820{ --- 26 unchanged lines hidden (view full) --- 1847 unexpected_error = B_TRUE; 1848 } 1849 } 1850 } 1851 1852 ops->vdev_op_io_done(zio); 1853 1854 if (unexpected_error) | 1836 } 1837 1838 return (vd->vdev_ops->vdev_op_io_start(zio)); 1839} 1840 1841static int 1842zio_vdev_io_done(zio_t *zio) 1843{ --- 26 unchanged lines hidden (view full) --- 1870 unexpected_error = B_TRUE; 1871 } 1872 } 1873 } 1874 1875 ops->vdev_op_io_done(zio); 1876 1877 if (unexpected_error) |
1855 return (zio_vdev_io_probe(zio)); | 1878 VERIFY(vdev_probe(vd, zio) == NULL); |
1856 1857 return (ZIO_PIPELINE_CONTINUE); 1858} 1859 1860static int 1861zio_vdev_io_assess(zio_t *zio) 1862{ 1863 vdev_t *vd = zio->io_vd; --- 179 unchanged lines hidden (view full) --- 2043 * ========================================================================== 2044 * I/O completion 2045 * ========================================================================== 2046 */ 2047static int 2048zio_ready(zio_t *zio) 2049{ 2050 blkptr_t *bp = zio->io_bp; | 1879 1880 return (ZIO_PIPELINE_CONTINUE); 1881} 1882 1883static int 1884zio_vdev_io_assess(zio_t *zio) 1885{ 1886 vdev_t *vd = zio->io_vd; --- 179 unchanged lines hidden (view full) --- 2066 * ========================================================================== 2067 * I/O completion 2068 * ========================================================================== 2069 */ 2070static int 2071zio_ready(zio_t *zio) 2072{ 2073 blkptr_t *bp = zio->io_bp; |
2051 zio_t *pio = zio->io_parent; | 2074 zio_t *pio, *pio_next; |
2052 | 2075 |
2053 if (zio->io_ready) { 2054 if (BP_IS_GANG(bp) && 2055 zio_wait_for_children(zio, ZIO_CHILD_GANG, ZIO_WAIT_READY)) 2056 return (ZIO_PIPELINE_STOP); | 2076 if (zio_wait_for_children(zio, ZIO_CHILD_GANG, ZIO_WAIT_READY)) 2077 return (ZIO_PIPELINE_STOP); |
2057 | 2078 |
2079 if (zio->io_ready) { |
|
2058 ASSERT(IO_IS_ALLOCATING(zio)); 2059 ASSERT(bp->blk_birth == zio->io_txg || BP_IS_HOLE(bp)); 2060 ASSERT(zio->io_children[ZIO_CHILD_GANG][ZIO_WAIT_READY] == 0); 2061 2062 zio->io_ready(zio); 2063 } 2064 2065 if (bp != NULL && bp != &zio->io_bp_copy) 2066 zio->io_bp_copy = *bp; 2067 2068 if (zio->io_error) 2069 zio->io_pipeline = ZIO_INTERLOCK_PIPELINE; 2070 | 2080 ASSERT(IO_IS_ALLOCATING(zio)); 2081 ASSERT(bp->blk_birth == zio->io_txg || BP_IS_HOLE(bp)); 2082 ASSERT(zio->io_children[ZIO_CHILD_GANG][ZIO_WAIT_READY] == 0); 2083 2084 zio->io_ready(zio); 2085 } 2086 2087 if (bp != NULL && bp != &zio->io_bp_copy) 2088 zio->io_bp_copy = *bp; 2089 2090 if (zio->io_error) 2091 zio->io_pipeline = ZIO_INTERLOCK_PIPELINE; 2092 |
2071 if (pio != NULL) | 2093 mutex_enter(&zio->io_lock); 2094 zio->io_state[ZIO_WAIT_READY] = 1; 2095 pio = zio_walk_parents(zio); 2096 mutex_exit(&zio->io_lock); 2097 2098 /* 2099 * As we notify zio's parents, new parents could be added. 2100 * New parents go to the head of zio's io_parent_list, however, 2101 * so we will (correctly) not notify them. The remainder of zio's 2102 * io_parent_list, from 'pio_next' onward, cannot change because 2103 * all parents must wait for us to be done before they can be done. 2104 */ 2105 for (; pio != NULL; pio = pio_next) { 2106 pio_next = zio_walk_parents(zio); |
2072 zio_notify_parent(pio, zio, ZIO_WAIT_READY); | 2107 zio_notify_parent(pio, zio, ZIO_WAIT_READY); |
2108 } |
|
2073 2074 return (ZIO_PIPELINE_CONTINUE); 2075} 2076 2077static int 2078zio_done(zio_t *zio) 2079{ 2080 spa_t *spa = zio->io_spa; | 2109 2110 return (ZIO_PIPELINE_CONTINUE); 2111} 2112 2113static int 2114zio_done(zio_t *zio) 2115{ 2116 spa_t *spa = zio->io_spa; |
2081 zio_t *pio = zio->io_parent; | |
2082 zio_t *lio = zio->io_logical; 2083 blkptr_t *bp = zio->io_bp; 2084 vdev_t *vd = zio->io_vd; 2085 uint64_t psize = zio->io_size; | 2117 zio_t *lio = zio->io_logical; 2118 blkptr_t *bp = zio->io_bp; 2119 vdev_t *vd = zio->io_vd; 2120 uint64_t psize = zio->io_size; |
2121 zio_t *pio, *pio_next; |
|
2086 2087 /* | 2122 2123 /* |
2088 * If our of children haven't all completed, | 2124 * If our children haven't all completed, |
2089 * wait for them and then repeat this pipeline stage. 2090 */ 2091 if (zio_wait_for_children(zio, ZIO_CHILD_VDEV, ZIO_WAIT_DONE) || 2092 zio_wait_for_children(zio, ZIO_CHILD_GANG, ZIO_WAIT_DONE) || 2093 zio_wait_for_children(zio, ZIO_CHILD_LOGICAL, ZIO_WAIT_DONE)) 2094 return (ZIO_PIPELINE_STOP); 2095 2096 for (int c = 0; c < ZIO_CHILD_TYPES; c++) 2097 for (int w = 0; w < ZIO_WAIT_TYPES; w++) 2098 ASSERT(zio->io_children[c][w] == 0); 2099 2100 if (bp != NULL) { 2101 ASSERT(bp->blk_pad[0] == 0); 2102 ASSERT(bp->blk_pad[1] == 0); 2103 ASSERT(bp->blk_pad[2] == 0); 2104 ASSERT(bcmp(bp, &zio->io_bp_copy, sizeof (blkptr_t)) == 0 || | 2125 * wait for them and then repeat this pipeline stage. 2126 */ 2127 if (zio_wait_for_children(zio, ZIO_CHILD_VDEV, ZIO_WAIT_DONE) || 2128 zio_wait_for_children(zio, ZIO_CHILD_GANG, ZIO_WAIT_DONE) || 2129 zio_wait_for_children(zio, ZIO_CHILD_LOGICAL, ZIO_WAIT_DONE)) 2130 return (ZIO_PIPELINE_STOP); 2131 2132 for (int c = 0; c < ZIO_CHILD_TYPES; c++) 2133 for (int w = 0; w < ZIO_WAIT_TYPES; w++) 2134 ASSERT(zio->io_children[c][w] == 0); 2135 2136 if (bp != NULL) { 2137 ASSERT(bp->blk_pad[0] == 0); 2138 ASSERT(bp->blk_pad[1] == 0); 2139 ASSERT(bp->blk_pad[2] == 0); 2140 ASSERT(bcmp(bp, &zio->io_bp_copy, sizeof (blkptr_t)) == 0 || |
2105 (pio != NULL && bp == pio->io_bp)); | 2141 (bp == zio_unique_parent(zio)->io_bp)); |
2106 if (zio->io_type == ZIO_TYPE_WRITE && !BP_IS_HOLE(bp) && 2107 !(zio->io_flags & ZIO_FLAG_IO_REPAIR)) { 2108 ASSERT(!BP_SHOULD_BYTESWAP(bp)); 2109 ASSERT3U(zio->io_prop.zp_ndvas, <=, BP_GET_NDVAS(bp)); 2110 ASSERT(BP_COUNT_GANG(bp) == 0 || 2111 (BP_COUNT_GANG(bp) == BP_GET_NDVAS(bp))); 2112 } 2113 } --- 41 unchanged lines hidden (view full) --- 2155 if (zio->io_error != ENOSPC) 2156 zio->io_reexecute |= ZIO_REEXECUTE_NOW; 2157 else 2158 zio->io_reexecute |= ZIO_REEXECUTE_SUSPEND; 2159 2160 if ((zio->io_type == ZIO_TYPE_READ || 2161 zio->io_type == ZIO_TYPE_FREE) && 2162 zio->io_error == ENXIO && | 2142 if (zio->io_type == ZIO_TYPE_WRITE && !BP_IS_HOLE(bp) && 2143 !(zio->io_flags & ZIO_FLAG_IO_REPAIR)) { 2144 ASSERT(!BP_SHOULD_BYTESWAP(bp)); 2145 ASSERT3U(zio->io_prop.zp_ndvas, <=, BP_GET_NDVAS(bp)); 2146 ASSERT(BP_COUNT_GANG(bp) == 0 || 2147 (BP_COUNT_GANG(bp) == BP_GET_NDVAS(bp))); 2148 } 2149 } --- 41 unchanged lines hidden (view full) --- 2191 if (zio->io_error != ENOSPC) 2192 zio->io_reexecute |= ZIO_REEXECUTE_NOW; 2193 else 2194 zio->io_reexecute |= ZIO_REEXECUTE_SUSPEND; 2195 2196 if ((zio->io_type == ZIO_TYPE_READ || 2197 zio->io_type == ZIO_TYPE_FREE) && 2198 zio->io_error == ENXIO && |
2199 spa->spa_load_state == SPA_LOAD_NONE && |
|
2163 spa_get_failmode(spa) != ZIO_FAILURE_MODE_CONTINUE) 2164 zio->io_reexecute |= ZIO_REEXECUTE_SUSPEND; 2165 2166 if (!(zio->io_flags & ZIO_FLAG_CANFAIL) && !zio->io_reexecute) 2167 zio->io_reexecute |= ZIO_REEXECUTE_SUSPEND; 2168 } 2169 2170 /* 2171 * If there were logical child errors, they apply to us now. 2172 * We defer this until now to avoid conflating logical child 2173 * errors with errors that happened to the zio itself when 2174 * updating vdev stats and reporting FMA events above. 2175 */ 2176 zio_inherit_child_errors(zio, ZIO_CHILD_LOGICAL); 2177 | 2200 spa_get_failmode(spa) != ZIO_FAILURE_MODE_CONTINUE) 2201 zio->io_reexecute |= ZIO_REEXECUTE_SUSPEND; 2202 2203 if (!(zio->io_flags & ZIO_FLAG_CANFAIL) && !zio->io_reexecute) 2204 zio->io_reexecute |= ZIO_REEXECUTE_SUSPEND; 2205 } 2206 2207 /* 2208 * If there were logical child errors, they apply to us now. 2209 * We defer this until now to avoid conflating logical child 2210 * errors with errors that happened to the zio itself when 2211 * updating vdev stats and reporting FMA events above. 2212 */ 2213 zio_inherit_child_errors(zio, ZIO_CHILD_LOGICAL); 2214 |
2215 if ((zio->io_error || zio->io_reexecute) && IO_IS_ALLOCATING(zio) && 2216 zio->io_child_type == ZIO_CHILD_LOGICAL) { 2217 ASSERT(zio->io_child_type != ZIO_CHILD_GANG); 2218 zio_dva_unallocate(zio, zio->io_gang_tree, bp); 2219 } 2220 2221 zio_gang_tree_free(&zio->io_gang_tree); 2222 2223 /* 2224 * Godfather I/Os should never suspend. 2225 */ 2226 if ((zio->io_flags & ZIO_FLAG_GODFATHER) && 2227 (zio->io_reexecute & ZIO_REEXECUTE_SUSPEND)) 2228 zio->io_reexecute = 0; 2229 |
|
2178 if (zio->io_reexecute) { 2179 /* 2180 * This is a logical I/O that wants to reexecute. 2181 * 2182 * Reexecute is top-down. When an i/o fails, if it's not 2183 * the root, it simply notifies its parent and sticks around. 2184 * The parent, seeing that it still has children in zio_done(), 2185 * does the same. This percolates all the way up to the root. 2186 * The root i/o will reexecute or suspend the entire tree. 2187 * 2188 * This approach ensures that zio_reexecute() honors 2189 * all the original i/o dependency relationships, e.g. 2190 * parents not executing until children are ready. 2191 */ 2192 ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL); 2193 | 2230 if (zio->io_reexecute) { 2231 /* 2232 * This is a logical I/O that wants to reexecute. 2233 * 2234 * Reexecute is top-down. When an i/o fails, if it's not 2235 * the root, it simply notifies its parent and sticks around. 2236 * The parent, seeing that it still has children in zio_done(), 2237 * does the same. This percolates all the way up to the root. 2238 * The root i/o will reexecute or suspend the entire tree. 2239 * 2240 * This approach ensures that zio_reexecute() honors 2241 * all the original i/o dependency relationships, e.g. 2242 * parents not executing until children are ready. 2243 */ 2244 ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL); 2245 |
2194 if (IO_IS_ALLOCATING(zio)) 2195 zio_dva_unallocate(zio, zio->io_gang_tree, bp); | 2246 zio->io_gang_leader = NULL; |
2196 | 2247 |
2197 zio_gang_tree_free(&zio->io_gang_tree); | 2248 mutex_enter(&zio->io_lock); 2249 zio->io_state[ZIO_WAIT_DONE] = 1; 2250 mutex_exit(&zio->io_lock); |
2198 | 2251 |
2199 if (pio != NULL) { | 2252 /* 2253 * "The Godfather" I/O monitors its children but is 2254 * not a true parent to them. It will track them through 2255 * the pipeline but severs its ties whenever they get into 2256 * trouble (e.g. suspended). This allows "The Godfather" 2257 * I/O to return status without blocking. 2258 */ 2259 for (pio = zio_walk_parents(zio); pio != NULL; pio = pio_next) { 2260 zio_link_t *zl = zio->io_walk_link; 2261 pio_next = zio_walk_parents(zio); 2262 2263 if ((pio->io_flags & ZIO_FLAG_GODFATHER) && 2264 (zio->io_reexecute & ZIO_REEXECUTE_SUSPEND)) { 2265 zio_remove_child(pio, zio, zl); 2266 zio_notify_parent(pio, zio, ZIO_WAIT_DONE); 2267 } 2268 } 2269 2270 if ((pio = zio_unique_parent(zio)) != NULL) { |
2200 /* 2201 * We're not a root i/o, so there's nothing to do 2202 * but notify our parent. Don't propagate errors 2203 * upward since we haven't permanently failed yet. 2204 */ | 2271 /* 2272 * We're not a root i/o, so there's nothing to do 2273 * but notify our parent. Don't propagate errors 2274 * upward since we haven't permanently failed yet. 2275 */ |
2276 ASSERT(!(zio->io_flags & ZIO_FLAG_GODFATHER)); |
|
2205 zio->io_flags |= ZIO_FLAG_DONT_PROPAGATE; 2206 zio_notify_parent(pio, zio, ZIO_WAIT_DONE); 2207 } else if (zio->io_reexecute & ZIO_REEXECUTE_SUSPEND) { 2208 /* 2209 * We'd fail again if we reexecuted now, so suspend 2210 * until conditions improve (e.g. device comes online). 2211 */ 2212 zio_suspend(spa, zio); --- 4 unchanged lines hidden (view full) --- 2217 */ 2218 (void) taskq_dispatch_safe( 2219 spa->spa_zio_taskq[ZIO_TYPE_CLAIM][ZIO_TASKQ_ISSUE], 2220 (task_func_t *)zio_reexecute, zio, &zio->io_task); 2221 } 2222 return (ZIO_PIPELINE_STOP); 2223 } 2224 | 2277 zio->io_flags |= ZIO_FLAG_DONT_PROPAGATE; 2278 zio_notify_parent(pio, zio, ZIO_WAIT_DONE); 2279 } else if (zio->io_reexecute & ZIO_REEXECUTE_SUSPEND) { 2280 /* 2281 * We'd fail again if we reexecuted now, so suspend 2282 * until conditions improve (e.g. device comes online). 2283 */ 2284 zio_suspend(spa, zio); --- 4 unchanged lines hidden (view full) --- 2289 */ 2290 (void) taskq_dispatch_safe( 2291 spa->spa_zio_taskq[ZIO_TYPE_CLAIM][ZIO_TASKQ_ISSUE], 2292 (task_func_t *)zio_reexecute, zio, &zio->io_task); 2293 } 2294 return (ZIO_PIPELINE_STOP); 2295 } 2296 |
2225 ASSERT(zio->io_child == NULL); | 2297 ASSERT(zio_walk_children(zio) == NULL); |
2226 ASSERT(zio->io_reexecute == 0); 2227 ASSERT(zio->io_error == 0 || (zio->io_flags & ZIO_FLAG_CANFAIL)); 2228 | 2298 ASSERT(zio->io_reexecute == 0); 2299 ASSERT(zio->io_error == 0 || (zio->io_flags & ZIO_FLAG_CANFAIL)); 2300 |
2301 /* 2302 * It is the responsibility of the done callback to ensure that this 2303 * particular zio is no longer discoverable for adoption, and as 2304 * such, cannot acquire any new parents. 2305 */ |
|
2229 if (zio->io_done) 2230 zio->io_done(zio); 2231 | 2306 if (zio->io_done) 2307 zio->io_done(zio); 2308 |
2232 zio_gang_tree_free(&zio->io_gang_tree); | 2309 mutex_enter(&zio->io_lock); 2310 zio->io_state[ZIO_WAIT_DONE] = 1; 2311 mutex_exit(&zio->io_lock); |
2233 | 2312 |
2234 ASSERT(zio->io_delegate_list == NULL); 2235 ASSERT(zio->io_delegate_next == NULL); 2236 2237 if (pio != NULL) { 2238 zio_remove_child(pio, zio); | 2313 for (pio = zio_walk_parents(zio); pio != NULL; pio = pio_next) { 2314 zio_link_t *zl = zio->io_walk_link; 2315 pio_next = zio_walk_parents(zio); 2316 zio_remove_child(pio, zio, zl); |
2239 zio_notify_parent(pio, zio, ZIO_WAIT_DONE); 2240 } 2241 2242 if (zio->io_waiter != NULL) { 2243 mutex_enter(&zio->io_lock); 2244 zio->io_executor = NULL; 2245 cv_broadcast(&zio->io_cv); 2246 mutex_exit(&zio->io_lock); --- 30 unchanged lines hidden --- | 2317 zio_notify_parent(pio, zio, ZIO_WAIT_DONE); 2318 } 2319 2320 if (zio->io_waiter != NULL) { 2321 mutex_enter(&zio->io_lock); 2322 zio->io_executor = NULL; 2323 cv_broadcast(&zio->io_cv); 2324 mutex_exit(&zio->io_lock); --- 30 unchanged lines hidden --- |