Deleted Added
full compact
zdb.c (196928) zdb.c (208047)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE

--- 36 unchanged lines hidden (view full) ---

45#include <sys/zil.h>
46#include <sys/zil_impl.h>
47#include <sys/stat.h>
48#include <sys/resource.h>
49#include <sys/dmu_traverse.h>
50#include <sys/zio_checksum.h>
51#include <sys/zio_compress.h>
52#include <sys/zfs_fuid.h>
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE

--- 36 unchanged lines hidden (view full) ---

45#include <sys/zil.h>
46#include <sys/zil_impl.h>
47#include <sys/stat.h>
48#include <sys/resource.h>
49#include <sys/dmu_traverse.h>
50#include <sys/zio_checksum.h>
51#include <sys/zio_compress.h>
52#include <sys/zfs_fuid.h>
53#include <sys/arc.h>
53#undef ZFS_MAXNAMELEN
54#undef verify
55#include <libzfs.h>
56
57const char cmdname[] = "zdb";
58uint8_t dump_opt[256];
59
60typedef void object_viewer_t(objset_t *, uint64_t, void *data, size_t size);
61
62extern void dump_intent_log(zilog_t *);
63uint64_t *zopt_object = NULL;
64int zopt_objects = 0;
54#undef ZFS_MAXNAMELEN
55#undef verify
56#include <libzfs.h>
57
58const char cmdname[] = "zdb";
59uint8_t dump_opt[256];
60
61typedef void object_viewer_t(objset_t *, uint64_t, void *data, size_t size);
62
63extern void dump_intent_log(zilog_t *);
64uint64_t *zopt_object = NULL;
65int zopt_objects = 0;
65int zdb_advance = ADVANCE_PRE;
66zbookmark_t zdb_noread = { 0, 0, ZB_NO_LEVEL, 0 };
67libzfs_handle_t *g_zfs;
68boolean_t zdb_sig_user_data = B_TRUE;
69int zdb_sig_cksumalg = ZIO_CHECKSUM_SHA256;
70
71/*
72 * These libumem hooks provide a reasonable set of defaults for the allocator's
73 * debugging facilities.
74 */

--- 8 unchanged lines hidden (view full) ---

83{
84 return ("fail,contents"); /* $UMEM_LOGGING setting */
85}
86
87static void
88usage(void)
89{
90 (void) fprintf(stderr,
66libzfs_handle_t *g_zfs;
67boolean_t zdb_sig_user_data = B_TRUE;
68int zdb_sig_cksumalg = ZIO_CHECKSUM_SHA256;
69
70/*
71 * These libumem hooks provide a reasonable set of defaults for the allocator's
72 * debugging facilities.
73 */

--- 8 unchanged lines hidden (view full) ---

82{
83 return ("fail,contents"); /* $UMEM_LOGGING setting */
84}
85
86static void
87usage(void)
88{
89 (void) fprintf(stderr,
91 "Usage: %s [-udibcsvL] [-U cachefile_path] [-O order] "
92 "[-B os:obj:level:blkid] [-S user:cksumalg] "
90 "Usage: %s [-udibcsv] [-U cachefile_path] "
91 "[-S user:cksumalg] "
93 "dataset [object...]\n"
94 " %s -C [pool]\n"
95 " %s -l dev\n"
96 " %s -R pool:vdev:offset:size:flags\n"
97 " %s [-p path_to_vdev_dir]\n"
98 " %s -e pool | GUID | devid ...\n",
99 cmdname, cmdname, cmdname, cmdname, cmdname, cmdname);
100
101 (void) fprintf(stderr, " -u uberblock\n");
102 (void) fprintf(stderr, " -d datasets\n");
103 (void) fprintf(stderr, " -C cached pool configuration\n");
104 (void) fprintf(stderr, " -i intent logs\n");
105 (void) fprintf(stderr, " -b block statistics\n");
106 (void) fprintf(stderr, " -c checksum all data blocks\n");
107 (void) fprintf(stderr, " -s report stats on zdb's I/O\n");
108 (void) fprintf(stderr, " -S <user|all>:<cksum_alg|all> -- "
109 "dump blkptr signatures\n");
110 (void) fprintf(stderr, " -v verbose (applies to all others)\n");
111 (void) fprintf(stderr, " -l dump label contents\n");
92 "dataset [object...]\n"
93 " %s -C [pool]\n"
94 " %s -l dev\n"
95 " %s -R pool:vdev:offset:size:flags\n"
96 " %s [-p path_to_vdev_dir]\n"
97 " %s -e pool | GUID | devid ...\n",
98 cmdname, cmdname, cmdname, cmdname, cmdname, cmdname);
99
100 (void) fprintf(stderr, " -u uberblock\n");
101 (void) fprintf(stderr, " -d datasets\n");
102 (void) fprintf(stderr, " -C cached pool configuration\n");
103 (void) fprintf(stderr, " -i intent logs\n");
104 (void) fprintf(stderr, " -b block statistics\n");
105 (void) fprintf(stderr, " -c checksum all data blocks\n");
106 (void) fprintf(stderr, " -s report stats on zdb's I/O\n");
107 (void) fprintf(stderr, " -S <user|all>:<cksum_alg|all> -- "
108 "dump blkptr signatures\n");
109 (void) fprintf(stderr, " -v verbose (applies to all others)\n");
110 (void) fprintf(stderr, " -l dump label contents\n");
112 (void) fprintf(stderr, " -L live pool (allows some errors)\n");
113 (void) fprintf(stderr, " -O [!]<pre|post|prune|data|holes> "
114 "visitation order\n");
115 (void) fprintf(stderr, " -U cachefile_path -- use alternate "
116 "cachefile\n");
111 (void) fprintf(stderr, " -U cachefile_path -- use alternate "
112 "cachefile\n");
117 (void) fprintf(stderr, " -B objset:object:level:blkid -- "
118 "simulate bad block\n");
119 (void) fprintf(stderr, " -R read and display block from a "
120 "device\n");
121 (void) fprintf(stderr, " -e Pool is exported/destroyed/"
122 "has altroot\n");
123 (void) fprintf(stderr, " -p <Path to vdev dir> (use with -e)\n");
124 (void) fprintf(stderr, "Specify an option more than once (e.g. -bb) "
125 "to make only that option verbose\n");
126 (void) fprintf(stderr, "Default is to dump everything non-verbosely\n");

--- 6 unchanged lines hidden (view full) ---

133 va_list ap;
134
135 va_start(ap, fmt);
136 (void) fprintf(stderr, "%s: ", cmdname);
137 (void) vfprintf(stderr, fmt, ap);
138 va_end(ap);
139 (void) fprintf(stderr, "\n");
140
113 (void) fprintf(stderr, " -R read and display block from a "
114 "device\n");
115 (void) fprintf(stderr, " -e Pool is exported/destroyed/"
116 "has altroot\n");
117 (void) fprintf(stderr, " -p <Path to vdev dir> (use with -e)\n");
118 (void) fprintf(stderr, "Specify an option more than once (e.g. -bb) "
119 "to make only that option verbose\n");
120 (void) fprintf(stderr, "Default is to dump everything non-verbosely\n");

--- 6 unchanged lines hidden (view full) ---

127 va_list ap;
128
129 va_start(ap, fmt);
130 (void) fprintf(stderr, "%s: ", cmdname);
131 (void) vfprintf(stderr, fmt, ap);
132 va_end(ap);
133 (void) fprintf(stderr, "\n");
134
141 exit(1);
135 abort();
142}
143
144static void
145dump_nvlist(nvlist_t *list, int indent)
146{
147 nvpair_t *elem = NULL;
148
149 while ((elem = nvlist_next_nvpair(list, elem)) != NULL) {

--- 416 unchanged lines hidden (view full) ---

566
567/*ARGSUSED*/
568static void
569dump_dnode(objset_t *os, uint64_t object, void *data, size_t size)
570{
571}
572
573static uint64_t
136}
137
138static void
139dump_nvlist(nvlist_t *list, int indent)
140{
141 nvpair_t *elem = NULL;
142
143 while ((elem = nvlist_next_nvpair(list, elem)) != NULL) {

--- 416 unchanged lines hidden (view full) ---

560
561/*ARGSUSED*/
562static void
563dump_dnode(objset_t *os, uint64_t object, void *data, size_t size)
564{
565}
566
567static uint64_t
574blkid2offset(dnode_phys_t *dnp, int level, uint64_t blkid)
568blkid2offset(const dnode_phys_t *dnp, int level, uint64_t blkid)
575{
576 if (level < 0)
577 return (blkid);
578
579 return ((blkid << (level * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) *
580 dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT);
581}
582

--- 14 unchanged lines hidden (view full) ---

597
598 (void) sprintf(blkbuf + strlen(blkbuf), "%llxL/%llxP F=%llu B=%llu",
599 (u_longlong_t)BP_GET_LSIZE(bp),
600 (u_longlong_t)BP_GET_PSIZE(bp),
601 (u_longlong_t)bp->blk_fill,
602 (u_longlong_t)bp->blk_birth);
603}
604
569{
570 if (level < 0)
571 return (blkid);
572
573 return ((blkid << (level * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) *
574 dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT);
575}
576

--- 14 unchanged lines hidden (view full) ---

591
592 (void) sprintf(blkbuf + strlen(blkbuf), "%llxL/%llxP F=%llu B=%llu",
593 (u_longlong_t)BP_GET_LSIZE(bp),
594 (u_longlong_t)BP_GET_PSIZE(bp),
595 (u_longlong_t)bp->blk_fill,
596 (u_longlong_t)bp->blk_birth);
597}
598
605/* ARGSUSED */
606static int
607zdb_indirect_cb(traverse_blk_cache_t *bc, spa_t *spa, void *a)
599static void
600print_indirect(blkptr_t *bp, const zbookmark_t *zb,
601 const dnode_phys_t *dnp)
608{
602{
609 zbookmark_t *zb = &bc->bc_bookmark;
610 blkptr_t *bp = &bc->bc_blkptr;
611 void *data = bc->bc_data;
612 dnode_phys_t *dnp = bc->bc_dnode;
613 char blkbuf[BP_SPRINTF_LEN + 80];
603 char blkbuf[BP_SPRINTF_LEN];
614 int l;
615
604 int l;
605
616 if (bc->bc_errno) {
617 (void) sprintf(blkbuf,
618 "Error %d reading <%llu, %llu, %lld, %llu>: ",
619 bc->bc_errno,
620 (u_longlong_t)zb->zb_objset,
621 (u_longlong_t)zb->zb_object,
622 (u_longlong_t)zb->zb_level,
623 (u_longlong_t)zb->zb_blkid);
624 goto out;
625 }
606 ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type);
607 ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level);
626
608
627 if (zb->zb_level == -1) {
628 ASSERT3U(BP_GET_TYPE(bp), ==, DMU_OT_OBJSET);
629 ASSERT3U(BP_GET_LEVEL(bp), ==, 0);
630 } else {
631 ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type);
632 ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level);
633 }
609 (void) printf("%16llx ",
610 (u_longlong_t)blkid2offset(dnp, zb->zb_level, zb->zb_blkid));
634
611
635 if (zb->zb_level > 0) {
636 uint64_t fill = 0;
637 blkptr_t *bpx, *bpend;
612 ASSERT(zb->zb_level >= 0);
638
613
639 for (bpx = data, bpend = bpx + BP_GET_LSIZE(bp) / sizeof (*bpx);
640 bpx < bpend; bpx++) {
641 if (bpx->blk_birth != 0) {
642 fill += bpx->blk_fill;
643 } else {
644 ASSERT(bpx->blk_fill == 0);
645 }
614 for (l = dnp->dn_nlevels - 1; l >= -1; l--) {
615 if (l == zb->zb_level) {
616 (void) printf("L%llx", (u_longlong_t)zb->zb_level);
617 } else {
618 (void) printf(" ");
646 }
619 }
647 ASSERT3U(fill, ==, bp->blk_fill);
648 }
649
620 }
621
650 if (zb->zb_level == 0 && dnp->dn_type == DMU_OT_DNODE) {
651 uint64_t fill = 0;
652 dnode_phys_t *dnx, *dnend;
622 sprintf_blkptr_compact(blkbuf, bp, dump_opt['d'] > 5 ? 1 : 0);
623 (void) printf("%s\n", blkbuf);
624}
653
625
654 for (dnx = data, dnend = dnx + (BP_GET_LSIZE(bp)>>DNODE_SHIFT);
655 dnx < dnend; dnx++) {
656 if (dnx->dn_type != DMU_OT_NONE)
657 fill++;
658 }
659 ASSERT3U(fill, ==, bp->blk_fill);
660 }
626#define SET_BOOKMARK(zb, objset, object, level, blkid) \
627{ \
628 (zb)->zb_objset = objset; \
629 (zb)->zb_object = object; \
630 (zb)->zb_level = level; \
631 (zb)->zb_blkid = blkid; \
632}
661
633
662 (void) sprintf(blkbuf, "%16llx ",
663 (u_longlong_t)blkid2offset(dnp, zb->zb_level, zb->zb_blkid));
634static int
635visit_indirect(spa_t *spa, const dnode_phys_t *dnp,
636 blkptr_t *bp, const zbookmark_t *zb)
637{
638 int err;
664
639
665 ASSERT(zb->zb_level >= 0);
640 if (bp->blk_birth == 0)
641 return (0);
666
642
667 for (l = dnp->dn_nlevels - 1; l >= -1; l--) {
668 if (l == zb->zb_level) {
669 (void) sprintf(blkbuf + strlen(blkbuf), "L%llx",
670 (u_longlong_t)zb->zb_level);
671 } else {
672 (void) sprintf(blkbuf + strlen(blkbuf), " ");
643 print_indirect(bp, zb, dnp);
644
645 if (BP_GET_LEVEL(bp) > 0) {
646 uint32_t flags = ARC_WAIT;
647 int i;
648 blkptr_t *cbp;
649 int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT;
650 arc_buf_t *buf;
651 uint64_t fill = 0;
652
653 err = arc_read_nolock(NULL, spa, bp, arc_getbuf_func, &buf,
654 ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
655 if (err)
656 return (err);
657
658 /* recursively visit blocks below this */
659 cbp = buf->b_data;
660 for (i = 0; i < epb; i++, cbp++) {
661 zbookmark_t czb;
662
663 SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object,
664 zb->zb_level - 1,
665 zb->zb_blkid * epb + i);
666 err = visit_indirect(spa, dnp, cbp, &czb);
667 if (err)
668 break;
669 fill += cbp->blk_fill;
673 }
670 }
671 ASSERT3U(fill, ==, bp->blk_fill);
672 (void) arc_buf_remove_ref(buf, &buf);
674 }
675
673 }
674
676out:
677 if (bp->blk_birth == 0) {
678 (void) sprintf(blkbuf + strlen(blkbuf), "<hole>");
679 (void) printf("%s\n", blkbuf);
680 } else {
681 sprintf_blkptr_compact(blkbuf + strlen(blkbuf), bp,
682 dump_opt['d'] > 5 ? 1 : 0);
683 (void) printf("%s\n", blkbuf);
684 }
685
686 return (bc->bc_errno ? ERESTART : 0);
675 return (err);
687}
688
689/*ARGSUSED*/
690static void
676}
677
678/*ARGSUSED*/
679static void
691dump_indirect(objset_t *os, uint64_t object, void *data, size_t size)
680dump_indirect(dnode_t *dn)
692{
681{
693 traverse_handle_t *th;
694 uint64_t objset = dmu_objset_id(os);
695 int advance = zdb_advance;
682 dnode_phys_t *dnp = dn->dn_phys;
683 int j;
684 zbookmark_t czb;
696
697 (void) printf("Indirect blocks:\n");
698
685
686 (void) printf("Indirect blocks:\n");
687
699 if (object == 0)
700 advance |= ADVANCE_DATA;
688 SET_BOOKMARK(&czb, dmu_objset_id(&dn->dn_objset->os),
689 dn->dn_object, dnp->dn_nlevels - 1, 0);
690 for (j = 0; j < dnp->dn_nblkptr; j++) {
691 czb.zb_blkid = j;
692 (void) visit_indirect(dmu_objset_spa(&dn->dn_objset->os), dnp,
693 &dnp->dn_blkptr[j], &czb);
694 }
701
695
702 th = traverse_init(dmu_objset_spa(os), zdb_indirect_cb, NULL, advance,
703 ZIO_FLAG_CANFAIL);
704 th->th_noread = zdb_noread;
705
706 traverse_add_dnode(th, 0, -1ULL, objset, object);
707
708 while (traverse_more(th) == EAGAIN)
709 continue;
710
711 (void) printf("\n");
696 (void) printf("\n");
712
713 traverse_fini(th);
714}
715
716/*ARGSUSED*/
717static void
718dump_dsl_dir(objset_t *os, uint64_t object, void *data, size_t size)
719{
720 dsl_dir_phys_t *dd = data;
721 time_t crtime;

--- 366 unchanged lines hidden (view full) ---

1088
1089 if (verbosity >= 4) {
1090 object_viewer[doi.doi_bonus_type](os, object, bonus, bsize);
1091 object_viewer[doi.doi_type](os, object, NULL, 0);
1092 *print_header = 1;
1093 }
1094
1095 if (verbosity >= 5)
697}
698
699/*ARGSUSED*/
700static void
701dump_dsl_dir(objset_t *os, uint64_t object, void *data, size_t size)
702{
703 dsl_dir_phys_t *dd = data;
704 time_t crtime;

--- 366 unchanged lines hidden (view full) ---

1071
1072 if (verbosity >= 4) {
1073 object_viewer[doi.doi_bonus_type](os, object, bonus, bsize);
1074 object_viewer[doi.doi_type](os, object, NULL, 0);
1075 *print_header = 1;
1076 }
1077
1078 if (verbosity >= 5)
1096 dump_indirect(os, object, NULL, 0);
1079 dump_indirect(dn);
1097
1098 if (verbosity >= 5) {
1099 /*
1100 * Report the list of segments that comprise the object.
1101 */
1102 uint64_t start = 0;
1103 uint64_t end;
1104 uint64_t blkfill = 1;

--- 348 unchanged lines hidden (view full) ---

1453 uint64_t zb_lsize;
1454 uint64_t zb_psize;
1455 uint64_t zb_count;
1456} zdb_blkstats_t;
1457
1458#define DMU_OT_DEFERRED DMU_OT_NONE
1459#define DMU_OT_TOTAL DMU_OT_NUMTYPES
1460
1080
1081 if (verbosity >= 5) {
1082 /*
1083 * Report the list of segments that comprise the object.
1084 */
1085 uint64_t start = 0;
1086 uint64_t end;
1087 uint64_t blkfill = 1;

--- 348 unchanged lines hidden (view full) ---

1436 uint64_t zb_lsize;
1437 uint64_t zb_psize;
1438 uint64_t zb_count;
1439} zdb_blkstats_t;
1440
1441#define DMU_OT_DEFERRED DMU_OT_NONE
1442#define DMU_OT_TOTAL DMU_OT_NUMTYPES
1443
1461#define ZB_TOTAL ZB_MAXLEVEL
1444#define ZB_TOTAL DN_MAX_LEVELS
1462
1463typedef struct zdb_cb {
1464 zdb_blkstats_t zcb_type[ZB_TOTAL + 1][DMU_OT_TOTAL + 1];
1465 uint64_t zcb_errors[256];
1445
1446typedef struct zdb_cb {
1447 zdb_blkstats_t zcb_type[ZB_TOTAL + 1][DMU_OT_TOTAL + 1];
1448 uint64_t zcb_errors[256];
1466 traverse_blk_cache_t *zcb_cache;
1467 int zcb_readfails;
1468 int zcb_haderrors;
1469} zdb_cb_t;
1470
1471static void
1449 int zcb_readfails;
1450 int zcb_haderrors;
1451} zdb_cb_t;
1452
1453static void
1472zdb_count_block(spa_t *spa, zdb_cb_t *zcb, blkptr_t *bp, int type)
1454zdb_count_block(spa_t *spa, zdb_cb_t *zcb, blkptr_t *bp, dmu_object_type_t type)
1473{
1474 for (int i = 0; i < 4; i++) {
1475 int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL;
1476 int t = (i & 1) ? type : DMU_OT_TOTAL;
1477 zdb_blkstats_t *zb = &zcb->zcb_type[l][t];
1478
1479 zb->zb_asize += BP_GET_ASIZE(bp);
1480 zb->zb_lsize += BP_GET_LSIZE(bp);
1481 zb->zb_psize += BP_GET_PSIZE(bp);
1482 zb->zb_count++;
1483 }
1484
1485 if (dump_opt['S']) {
1486 boolean_t print_sig;
1487
1455{
1456 for (int i = 0; i < 4; i++) {
1457 int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL;
1458 int t = (i & 1) ? type : DMU_OT_TOTAL;
1459 zdb_blkstats_t *zb = &zcb->zcb_type[l][t];
1460
1461 zb->zb_asize += BP_GET_ASIZE(bp);
1462 zb->zb_lsize += BP_GET_LSIZE(bp);
1463 zb->zb_psize += BP_GET_PSIZE(bp);
1464 zb->zb_count++;
1465 }
1466
1467 if (dump_opt['S']) {
1468 boolean_t print_sig;
1469
1488 print_sig = !zdb_sig_user_data || (BP_GET_LEVEL(bp) == 0 &&
1470 print_sig = !zdb_sig_user_data || (BP_GET_LEVEL(bp) == 0 &&
1489 BP_GET_TYPE(bp) == DMU_OT_PLAIN_FILE_CONTENTS);
1490
1491 if (BP_GET_CHECKSUM(bp) < zdb_sig_cksumalg)
1492 print_sig = B_FALSE;
1493
1494 if (print_sig) {
1495 (void) printf("%llu\t%lld\t%lld\t%s\t%s\t%s\t"
1496 "%llx:%llx:%llx:%llx\n",

--- 5 unchanged lines hidden (view full) ---

1502 zio_compress_table[BP_GET_COMPRESS(bp)].ci_name,
1503 (u_longlong_t)bp->blk_cksum.zc_word[0],
1504 (u_longlong_t)bp->blk_cksum.zc_word[1],
1505 (u_longlong_t)bp->blk_cksum.zc_word[2],
1506 (u_longlong_t)bp->blk_cksum.zc_word[3]);
1507 }
1508 }
1509
1471 BP_GET_TYPE(bp) == DMU_OT_PLAIN_FILE_CONTENTS);
1472
1473 if (BP_GET_CHECKSUM(bp) < zdb_sig_cksumalg)
1474 print_sig = B_FALSE;
1475
1476 if (print_sig) {
1477 (void) printf("%llu\t%lld\t%lld\t%s\t%s\t%s\t"
1478 "%llx:%llx:%llx:%llx\n",

--- 5 unchanged lines hidden (view full) ---

1484 zio_compress_table[BP_GET_COMPRESS(bp)].ci_name,
1485 (u_longlong_t)bp->blk_cksum.zc_word[0],
1486 (u_longlong_t)bp->blk_cksum.zc_word[1],
1487 (u_longlong_t)bp->blk_cksum.zc_word[2],
1488 (u_longlong_t)bp->blk_cksum.zc_word[3]);
1489 }
1490 }
1491
1510 if (!dump_opt['L'])
1511 VERIFY(zio_wait(zio_claim(NULL, spa, spa_first_txg(spa), bp,
1512 NULL, NULL, ZIO_FLAG_MUSTSUCCEED)) == 0);
1492 VERIFY(zio_wait(zio_claim(NULL, spa, spa_first_txg(spa), bp,
1493 NULL, NULL, ZIO_FLAG_MUSTSUCCEED)) == 0);
1513}
1514
1515static int
1494}
1495
1496static int
1516zdb_blkptr_cb(traverse_blk_cache_t *bc, spa_t *spa, void *arg)
1497zdb_blkptr_cb(spa_t *spa, blkptr_t *bp, const zbookmark_t *zb,
1498 const dnode_phys_t *dnp, void *arg)
1517{
1499{
1518 zbookmark_t *zb = &bc->bc_bookmark;
1519 zdb_cb_t *zcb = arg;
1500 zdb_cb_t *zcb = arg;
1520 blkptr_t *bp = &bc->bc_blkptr;
1521 dmu_object_type_t type = BP_GET_TYPE(bp);
1522 char blkbuf[BP_SPRINTF_LEN];
1501 char blkbuf[BP_SPRINTF_LEN];
1523 int error = 0;
1524
1502
1525 ASSERT(!BP_IS_HOLE(bp));
1503 if (bp == NULL)
1504 return (0);
1526
1505
1527 zdb_count_block(spa, zcb, bp, type);
1506 zdb_count_block(spa, zcb, bp, BP_GET_TYPE(bp));
1528
1507
1529 if (bc->bc_errno) {
1530 if (zcb->zcb_readfails++ < 10 && dump_opt['L']) {
1531 uberblock_t ub;
1532 vdev_uberblock_load(NULL, spa->spa_root_vdev, &ub);
1533 if (ub.ub_txg != 0)
1534 spa->spa_ubsync = ub;
1535 error = EAGAIN;
1536 } else {
1508 if (dump_opt['c'] || dump_opt['S']) {
1509 int ioerr, size;
1510 void *data;
1511
1512 size = BP_GET_LSIZE(bp);
1513 data = malloc(size);
1514 ioerr = zio_wait(zio_read(NULL, spa, bp, data, size,
1515 NULL, NULL, ZIO_PRIORITY_ASYNC_READ,
1516 ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB, zb));
1517 free(data);
1518
1519 /* We expect io errors on intent log */
1520 if (ioerr && BP_GET_TYPE(bp) != DMU_OT_INTENT_LOG) {
1537 zcb->zcb_haderrors = 1;
1521 zcb->zcb_haderrors = 1;
1538 zcb->zcb_errors[bc->bc_errno]++;
1539 error = ERESTART;
1540 }
1522 zcb->zcb_errors[ioerr]++;
1541
1523
1542 if (dump_opt['b'] >= 3 || (dump_opt['b'] >= 2 && bc->bc_errno))
1543 sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, bp);
1544 else
1545 blkbuf[0] = '\0';
1524 if (dump_opt['b'] >= 2)
1525 sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, bp);
1526 else
1527 blkbuf[0] = '\0';
1546
1528
1547 if (!dump_opt['S']) {
1548 (void) printf("zdb_blkptr_cb: Got error %d reading "
1549 "<%llu, %llu, %lld, %llx> %s -- %s\n",
1550 bc->bc_errno,
1551 (u_longlong_t)zb->zb_objset,
1552 (u_longlong_t)zb->zb_object,
1553 (u_longlong_t)zb->zb_level,
1554 (u_longlong_t)zb->zb_blkid,
1555 blkbuf,
1556 error == EAGAIN ? "retrying" : "skipping");
1529 if (!dump_opt['S']) {
1530 (void) printf("zdb_blkptr_cb: "
1531 "Got error %d reading "
1532 "<%llu, %llu, %lld, %llx> %s -- skipping\n",
1533 ioerr,
1534 (u_longlong_t)zb->zb_objset,
1535 (u_longlong_t)zb->zb_object,
1536 (u_longlong_t)zb->zb_level,
1537 (u_longlong_t)zb->zb_blkid,
1538 blkbuf);
1539 }
1557 }
1540 }
1558
1559 return (error);
1560 }
1561
1562 zcb->zcb_readfails = 0;
1563
1564 if (dump_opt['b'] >= 4) {
1565 sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, bp);
1566 (void) printf("objset %llu object %llu offset 0x%llx %s\n",
1567 (u_longlong_t)zb->zb_objset,
1568 (u_longlong_t)zb->zb_object,
1541 }
1542
1543 zcb->zcb_readfails = 0;
1544
1545 if (dump_opt['b'] >= 4) {
1546 sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, bp);
1547 (void) printf("objset %llu object %llu offset 0x%llx %s\n",
1548 (u_longlong_t)zb->zb_objset,
1549 (u_longlong_t)zb->zb_object,
1569 (u_longlong_t)blkid2offset(bc->bc_dnode,
1570 zb->zb_level, zb->zb_blkid), blkbuf);
1550 (u_longlong_t)blkid2offset(dnp, zb->zb_level, zb->zb_blkid),
1551 blkbuf);
1571 }
1572
1573 return (0);
1574}
1575
1576static int
1577dump_block_stats(spa_t *spa)
1578{
1552 }
1553
1554 return (0);
1555}
1556
1557static int
1558dump_block_stats(spa_t *spa)
1559{
1579 traverse_handle_t *th;
1580 zdb_cb_t zcb = { 0 };
1560 zdb_cb_t zcb = { 0 };
1581 traverse_blk_cache_t dummy_cache = { 0 };
1582 zdb_blkstats_t *zb, *tzb;
1583 uint64_t alloc, space, logalloc;
1584 vdev_t *rvd = spa->spa_root_vdev;
1585 int leaks = 0;
1561 zdb_blkstats_t *zb, *tzb;
1562 uint64_t alloc, space, logalloc;
1563 vdev_t *rvd = spa->spa_root_vdev;
1564 int leaks = 0;
1586 int advance = zdb_advance;
1587 int c, e, flags;
1565 int c, e;
1588
1566
1589 zcb.zcb_cache = &dummy_cache;
1590
1591 if (dump_opt['c'] || dump_opt['S'])
1592 advance |= ADVANCE_DATA;
1593
1594 advance |= ADVANCE_PRUNE | ADVANCE_ZIL;
1595
1596 if (!dump_opt['S']) {
1597 (void) printf("\nTraversing all blocks to %sverify"
1598 " nothing leaked ...\n",
1599 dump_opt['c'] ? "verify checksums and " : "");
1600 }
1601
1602 /*
1603 * Load all space maps as SM_ALLOC maps, then traverse the pool
1604 * claiming each block we discover. If the pool is perfectly
1605 * consistent, the space maps will be empty when we're done.
1606 * Anything left over is a leak; any block we can't claim (because
1607 * it's not part of any space map) is a double allocation,
1608 * reference to a freed block, or an unclaimed log block.
1609 */
1567 if (!dump_opt['S']) {
1568 (void) printf("\nTraversing all blocks to %sverify"
1569 " nothing leaked ...\n",
1570 dump_opt['c'] ? "verify checksums and " : "");
1571 }
1572
1573 /*
1574 * Load all space maps as SM_ALLOC maps, then traverse the pool
1575 * claiming each block we discover. If the pool is perfectly
1576 * consistent, the space maps will be empty when we're done.
1577 * Anything left over is a leak; any block we can't claim (because
1578 * it's not part of any space map) is a double allocation,
1579 * reference to a freed block, or an unclaimed log block.
1580 */
1610 if (!dump_opt['L'])
1611 zdb_leak_init(spa);
1581 zdb_leak_init(spa);
1612
1613 /*
1614 * If there's a deferred-free bplist, process that first.
1615 */
1616 if (spa->spa_sync_bplist_obj != 0) {
1617 bplist_t *bpl = &spa->spa_sync_bplist;
1618 blkptr_t blk;
1619 uint64_t itor = 0;

--- 9 unchanged lines hidden (view full) ---

1629 "deferred free", blkbuf);
1630 }
1631 zdb_count_block(spa, &zcb, &blk, DMU_OT_DEFERRED);
1632 }
1633
1634 bplist_close(bpl);
1635 }
1636
1582
1583 /*
1584 * If there's a deferred-free bplist, process that first.
1585 */
1586 if (spa->spa_sync_bplist_obj != 0) {
1587 bplist_t *bpl = &spa->spa_sync_bplist;
1588 blkptr_t blk;
1589 uint64_t itor = 0;

--- 9 unchanged lines hidden (view full) ---

1599 "deferred free", blkbuf);
1600 }
1601 zdb_count_block(spa, &zcb, &blk, DMU_OT_DEFERRED);
1602 }
1603
1604 bplist_close(bpl);
1605 }
1606
1637 /*
1638 * Now traverse the pool. If we're reading all data to verify
1639 * checksums, do a scrubbing read so that we validate all copies.
1640 */
1641 flags = ZIO_FLAG_CANFAIL;
1642 if (advance & ADVANCE_DATA)
1643 flags |= ZIO_FLAG_SCRUB;
1644 th = traverse_init(spa, zdb_blkptr_cb, &zcb, advance, flags);
1645 th->th_noread = zdb_noread;
1607 zcb.zcb_haderrors |= traverse_pool(spa, zdb_blkptr_cb, &zcb);
1646
1608
1647 traverse_add_pool(th, 0, spa_first_txg(spa) + TXG_CONCURRENT_STATES);
1648
1649 while (traverse_more(th) == EAGAIN)
1650 continue;
1651
1652 traverse_fini(th);
1653
1654 if (zcb.zcb_haderrors && !dump_opt['S']) {
1655 (void) printf("\nError counts:\n\n");
1656 (void) printf("\t%5s %s\n", "errno", "count");
1657 for (e = 0; e < 256; e++) {
1658 if (zcb.zcb_errors[e] != 0) {
1659 (void) printf("\t%5d %llu\n",
1660 e, (u_longlong_t)zcb.zcb_errors[e]);
1661 }
1662 }
1663 }
1664
1665 /*
1666 * Report any leaked segments.
1667 */
1609 if (zcb.zcb_haderrors && !dump_opt['S']) {
1610 (void) printf("\nError counts:\n\n");
1611 (void) printf("\t%5s %s\n", "errno", "count");
1612 for (e = 0; e < 256; e++) {
1613 if (zcb.zcb_errors[e] != 0) {
1614 (void) printf("\t%5d %llu\n",
1615 e, (u_longlong_t)zcb.zcb_errors[e]);
1616 }
1617 }
1618 }
1619
1620 /*
1621 * Report any leaked segments.
1622 */
1668 if (!dump_opt['L'])
1669 zdb_leak_fini(spa);
1623 zdb_leak_fini(spa);
1670
1671 /*
1672 * If we're interested in printing out the blkptr signatures,
1673 * return now as we don't print out anything else (including
1674 * errors and leaks).
1675 */
1676 if (dump_opt['S'])
1677 return (zcb.zcb_haderrors ? 3 : 0);
1678
1624
1625 /*
1626 * If we're interested in printing out the blkptr signatures,
1627 * return now as we don't print out anything else (including
1628 * errors and leaks).
1629 */
1630 if (dump_opt['S'])
1631 return (zcb.zcb_haderrors ? 3 : 0);
1632
1679 if (dump_opt['L'])
1680 (void) printf("\n\n *** Live pool traversal; "
1681 "block counts are only approximate ***\n\n");
1682
1683 alloc = spa_get_alloc(spa);
1684 space = spa_get_space(spa);
1685
1686 /*
1687 * Log blocks allocated from a separate log device don't count
1688 * as part of the normal pool space; factor them in here.
1689 */
1690 logalloc = 0;

--- 589 unchanged lines hidden (view full) ---

2280 int i, c;
2281 struct rlimit rl = { 1024, 1024 };
2282 spa_t *spa;
2283 objset_t *os = NULL;
2284 char *endstr;
2285 int dump_all = 1;
2286 int verbose = 0;
2287 int error;
1633 alloc = spa_get_alloc(spa);
1634 space = spa_get_space(spa);
1635
1636 /*
1637 * Log blocks allocated from a separate log device don't count
1638 * as part of the normal pool space; factor them in here.
1639 */
1640 logalloc = 0;

--- 589 unchanged lines hidden (view full) ---

2230 int i, c;
2231 struct rlimit rl = { 1024, 1024 };
2232 spa_t *spa;
2233 objset_t *os = NULL;
2234 char *endstr;
2235 int dump_all = 1;
2236 int verbose = 0;
2237 int error;
2288 int flag, set;
2289 int exported = 0;
2290 char *vdev_dir = NULL;
2291
2292 (void) setrlimit(RLIMIT_NOFILE, &rl);
2293 (void) enable_extended_FILE_stdio(-1, -1);
2294
2295 dprintf_setup(&argc, argv);
2296
2238 int exported = 0;
2239 char *vdev_dir = NULL;
2240
2241 (void) setrlimit(RLIMIT_NOFILE, &rl);
2242 (void) enable_extended_FILE_stdio(-1, -1);
2243
2244 dprintf_setup(&argc, argv);
2245
2297 while ((c = getopt(argc, argv, "udibcsvCLO:B:S:U:lRep:")) != -1) {
2246 while ((c = getopt(argc, argv, "udibcsvCS:U:lRep:")) != -1) {
2298 switch (c) {
2299 case 'u':
2300 case 'd':
2301 case 'i':
2302 case 'b':
2303 case 'c':
2304 case 's':
2305 case 'C':
2306 case 'l':
2307 case 'R':
2308 dump_opt[c]++;
2309 dump_all = 0;
2310 break;
2247 switch (c) {
2248 case 'u':
2249 case 'd':
2250 case 'i':
2251 case 'b':
2252 case 'c':
2253 case 's':
2254 case 'C':
2255 case 'l':
2256 case 'R':
2257 dump_opt[c]++;
2258 dump_all = 0;
2259 break;
2311 case 'L':
2312 dump_opt[c]++;
2313 break;
2314 case 'O':
2315 endstr = optarg;
2316 if (endstr[0] == '!') {
2317 endstr++;
2318 set = 0;
2319 } else {
2320 set = 1;
2321 }
2322 if (strcmp(endstr, "post") == 0) {
2323 flag = ADVANCE_PRE;
2324 set = !set;
2325 } else if (strcmp(endstr, "pre") == 0) {
2326 flag = ADVANCE_PRE;
2327 } else if (strcmp(endstr, "prune") == 0) {
2328 flag = ADVANCE_PRUNE;
2329 } else if (strcmp(endstr, "data") == 0) {
2330 flag = ADVANCE_DATA;
2331 } else if (strcmp(endstr, "holes") == 0) {
2332 flag = ADVANCE_HOLES;
2333 } else {
2334 usage();
2335 }
2336 if (set)
2337 zdb_advance |= flag;
2338 else
2339 zdb_advance &= ~flag;
2340 break;
2341 case 'B':
2342 endstr = optarg - 1;
2343 zdb_noread.zb_objset = strtoull(endstr + 1, &endstr, 0);
2344 zdb_noread.zb_object = strtoull(endstr + 1, &endstr, 0);
2345 zdb_noread.zb_level = strtol(endstr + 1, &endstr, 0);
2346 zdb_noread.zb_blkid = strtoull(endstr + 1, &endstr, 16);
2347 (void) printf("simulating bad block "
2348 "<%llu, %llu, %lld, %llx>\n",
2349 (u_longlong_t)zdb_noread.zb_objset,
2350 (u_longlong_t)zdb_noread.zb_object,
2351 (u_longlong_t)zdb_noread.zb_level,
2352 (u_longlong_t)zdb_noread.zb_blkid);
2353 break;
2354 case 'v':
2355 verbose++;
2356 break;
2357 case 'U':
2358 spa_config_path = optarg;
2359 break;
2360 case 'e':
2361 exported = 1;

--- 20 unchanged lines hidden (view full) ---

2382 usage();
2383 break;
2384 default:
2385 usage();
2386 break;
2387 }
2388 }
2389
2260 case 'v':
2261 verbose++;
2262 break;
2263 case 'U':
2264 spa_config_path = optarg;
2265 break;
2266 case 'e':
2267 exported = 1;

--- 20 unchanged lines hidden (view full) ---

2288 usage();
2289 break;
2290 default:
2291 usage();
2292 break;
2293 }
2294 }
2295
2390 if (vdev_dir != NULL && exported == 0)
2391 (void) fatal("-p option requires use of -e\n");
2296 if (vdev_dir != NULL && exported == 0) {
2297 (void) fprintf(stderr, "-p option requires use of -e\n");
2298 usage();
2299 }
2392
2393 kernel_init(FREAD);
2394 g_zfs = libzfs_init();
2395 ASSERT(g_zfs != NULL);
2396
2300
2301 kernel_init(FREAD);
2302 g_zfs = libzfs_init();
2303 ASSERT(g_zfs != NULL);
2304
2397 /*
2398 * Disable vdev caching. If we don't do this, live pool traversal
2399 * won't make progress because it will never see disk updates.
2400 */
2401 zfs_vdev_cache_size = 0;
2402
2403 for (c = 0; c < 256; c++) {
2305 for (c = 0; c < 256; c++) {
2404 if (dump_all && c != 'L' && c != 'l' && c != 'R')
2306 if (dump_all && c != 'l' && c != 'R')
2405 dump_opt[c] = 1;
2406 if (dump_opt[c])
2407 dump_opt[c] += verbose;
2408 }
2409
2410 argc -= optind;
2411 argv += optind;
2412

--- 112 unchanged lines hidden ---
2307 dump_opt[c] = 1;
2308 if (dump_opt[c])
2309 dump_opt[c] += verbose;
2310 }
2311
2312 argc -= optind;
2313 argv += optind;
2314

--- 112 unchanged lines hidden ---