ddt.c revision 219089
1219089Spjd/* 2219089Spjd * CDDL HEADER START 3219089Spjd * 4219089Spjd * The contents of this file are subject to the terms of the 5219089Spjd * Common Development and Distribution License (the "License"). 6219089Spjd * You may not use this file except in compliance with the License. 7219089Spjd * 8219089Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9219089Spjd * or http://www.opensolaris.org/os/licensing. 10219089Spjd * See the License for the specific language governing permissions 11219089Spjd * and limitations under the License. 12219089Spjd * 13219089Spjd * When distributing Covered Code, include this CDDL HEADER in each 14219089Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15219089Spjd * If applicable, add the following below this CDDL HEADER, with the 16219089Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17219089Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18219089Spjd * 19219089Spjd * CDDL HEADER END 20219089Spjd */ 21219089Spjd 22219089Spjd/* 23219089Spjd * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. 24219089Spjd */ 25219089Spjd 26219089Spjd#include <sys/zfs_context.h> 27219089Spjd#include <sys/spa.h> 28219089Spjd#include <sys/spa_impl.h> 29219089Spjd#include <sys/zio.h> 30219089Spjd#include <sys/ddt.h> 31219089Spjd#include <sys/zap.h> 32219089Spjd#include <sys/dmu_tx.h> 33219089Spjd#include <sys/arc.h> 34219089Spjd#include <sys/dsl_pool.h> 35219089Spjd#include <sys/zio_checksum.h> 36219089Spjd#include <sys/zio_compress.h> 37219089Spjd#include <sys/dsl_scan.h> 38219089Spjd 39219089Spjd/* 40219089Spjd * Enable/disable prefetching of dedup-ed blocks which are going to be freed. 41219089Spjd */ 42219089Spjdint zfs_dedup_prefetch = 1; 43219089Spjd 44219089SpjdSYSCTL_DECL(_vfs_zfs); 45219089SpjdSYSCTL_NODE(_vfs_zfs, OID_AUTO, dedup, CTLFLAG_RW, 0, "ZFS DEDUP"); 46219089SpjdTUNABLE_INT("vfs.zfs.dedup.prefetch", &zfs_dedup_prefetch); 47219089SpjdSYSCTL_INT(_vfs_zfs_dedup, OID_AUTO, prefetch, CTLFLAG_RW, &zfs_dedup_prefetch, 48219089Spjd 0, "Enable/disable prefetching of dedup-ed blocks which are going to be freed"); 49219089Spjd 50219089Spjdstatic const ddt_ops_t *ddt_ops[DDT_TYPES] = { 51219089Spjd &ddt_zap_ops, 52219089Spjd}; 53219089Spjd 54219089Spjdstatic const char *ddt_class_name[DDT_CLASSES] = { 55219089Spjd "ditto", 56219089Spjd "duplicate", 57219089Spjd "unique", 58219089Spjd}; 59219089Spjd 60219089Spjdstatic void 61219089Spjdddt_object_create(ddt_t *ddt, enum ddt_type type, enum ddt_class class, 62219089Spjd dmu_tx_t *tx) 63219089Spjd{ 64219089Spjd spa_t *spa = ddt->ddt_spa; 65219089Spjd objset_t *os = ddt->ddt_os; 66219089Spjd uint64_t *objectp = &ddt->ddt_object[type][class]; 67219089Spjd boolean_t prehash = zio_checksum_table[ddt->ddt_checksum].ci_dedup; 68219089Spjd char name[DDT_NAMELEN]; 69219089Spjd 70219089Spjd ddt_object_name(ddt, type, class, name); 71219089Spjd 72219089Spjd ASSERT(*objectp == 0); 73219089Spjd VERIFY(ddt_ops[type]->ddt_op_create(os, objectp, tx, prehash) == 0); 74219089Spjd ASSERT(*objectp != 0); 75219089Spjd 76219089Spjd VERIFY(zap_add(os, DMU_POOL_DIRECTORY_OBJECT, name, 77219089Spjd sizeof (uint64_t), 1, objectp, tx) == 0); 78219089Spjd 79219089Spjd VERIFY(zap_add(os, spa->spa_ddt_stat_object, name, 80219089Spjd sizeof (uint64_t), sizeof (ddt_histogram_t) / sizeof (uint64_t), 81219089Spjd &ddt->ddt_histogram[type][class], tx) == 0); 82219089Spjd} 83219089Spjd 84219089Spjdstatic void 85219089Spjdddt_object_destroy(ddt_t *ddt, enum ddt_type type, enum ddt_class class, 86219089Spjd dmu_tx_t *tx) 87219089Spjd{ 88219089Spjd spa_t *spa = ddt->ddt_spa; 89219089Spjd objset_t *os = ddt->ddt_os; 90219089Spjd uint64_t *objectp = &ddt->ddt_object[type][class]; 91219089Spjd char name[DDT_NAMELEN]; 92219089Spjd 93219089Spjd ddt_object_name(ddt, type, class, name); 94219089Spjd 95219089Spjd ASSERT(*objectp != 0); 96219089Spjd ASSERT(ddt_object_count(ddt, type, class) == 0); 97219089Spjd ASSERT(ddt_histogram_empty(&ddt->ddt_histogram[type][class])); 98219089Spjd VERIFY(zap_remove(os, DMU_POOL_DIRECTORY_OBJECT, name, tx) == 0); 99219089Spjd VERIFY(zap_remove(os, spa->spa_ddt_stat_object, name, tx) == 0); 100219089Spjd VERIFY(ddt_ops[type]->ddt_op_destroy(os, *objectp, tx) == 0); 101219089Spjd bzero(&ddt->ddt_object_stats[type][class], sizeof (ddt_object_t)); 102219089Spjd 103219089Spjd *objectp = 0; 104219089Spjd} 105219089Spjd 106219089Spjdstatic int 107219089Spjdddt_object_load(ddt_t *ddt, enum ddt_type type, enum ddt_class class) 108219089Spjd{ 109219089Spjd ddt_object_t *ddo = &ddt->ddt_object_stats[type][class]; 110219089Spjd dmu_object_info_t doi; 111219089Spjd char name[DDT_NAMELEN]; 112219089Spjd int error; 113219089Spjd 114219089Spjd ddt_object_name(ddt, type, class, name); 115219089Spjd 116219089Spjd error = zap_lookup(ddt->ddt_os, DMU_POOL_DIRECTORY_OBJECT, name, 117219089Spjd sizeof (uint64_t), 1, &ddt->ddt_object[type][class]); 118219089Spjd 119219089Spjd if (error) 120219089Spjd return (error); 121219089Spjd 122219089Spjd error = zap_lookup(ddt->ddt_os, ddt->ddt_spa->spa_ddt_stat_object, name, 123219089Spjd sizeof (uint64_t), sizeof (ddt_histogram_t) / sizeof (uint64_t), 124219089Spjd &ddt->ddt_histogram[type][class]); 125219089Spjd 126219089Spjd /* 127219089Spjd * Seed the cached statistics. 128219089Spjd */ 129219089Spjd VERIFY(ddt_object_info(ddt, type, class, &doi) == 0); 130219089Spjd 131219089Spjd ddo->ddo_count = ddt_object_count(ddt, type, class); 132219089Spjd ddo->ddo_dspace = doi.doi_physical_blocks_512 << 9; 133219089Spjd ddo->ddo_mspace = doi.doi_fill_count * doi.doi_data_block_size; 134219089Spjd 135219089Spjd ASSERT(error == 0); 136219089Spjd return (error); 137219089Spjd} 138219089Spjd 139219089Spjdstatic void 140219089Spjdddt_object_sync(ddt_t *ddt, enum ddt_type type, enum ddt_class class, 141219089Spjd dmu_tx_t *tx) 142219089Spjd{ 143219089Spjd ddt_object_t *ddo = &ddt->ddt_object_stats[type][class]; 144219089Spjd dmu_object_info_t doi; 145219089Spjd char name[DDT_NAMELEN]; 146219089Spjd 147219089Spjd ddt_object_name(ddt, type, class, name); 148219089Spjd 149219089Spjd VERIFY(zap_update(ddt->ddt_os, ddt->ddt_spa->spa_ddt_stat_object, name, 150219089Spjd sizeof (uint64_t), sizeof (ddt_histogram_t) / sizeof (uint64_t), 151219089Spjd &ddt->ddt_histogram[type][class], tx) == 0); 152219089Spjd 153219089Spjd /* 154219089Spjd * Cache DDT statistics; this is the only time they'll change. 155219089Spjd */ 156219089Spjd VERIFY(ddt_object_info(ddt, type, class, &doi) == 0); 157219089Spjd 158219089Spjd ddo->ddo_count = ddt_object_count(ddt, type, class); 159219089Spjd ddo->ddo_dspace = doi.doi_physical_blocks_512 << 9; 160219089Spjd ddo->ddo_mspace = doi.doi_fill_count * doi.doi_data_block_size; 161219089Spjd} 162219089Spjd 163219089Spjdstatic int 164219089Spjdddt_object_lookup(ddt_t *ddt, enum ddt_type type, enum ddt_class class, 165219089Spjd ddt_entry_t *dde) 166219089Spjd{ 167219089Spjd if (!ddt_object_exists(ddt, type, class)) 168219089Spjd return (ENOENT); 169219089Spjd 170219089Spjd return (ddt_ops[type]->ddt_op_lookup(ddt->ddt_os, 171219089Spjd ddt->ddt_object[type][class], dde)); 172219089Spjd} 173219089Spjd 174219089Spjdstatic void 175219089Spjdddt_object_prefetch(ddt_t *ddt, enum ddt_type type, enum ddt_class class, 176219089Spjd ddt_entry_t *dde) 177219089Spjd{ 178219089Spjd if (!ddt_object_exists(ddt, type, class)) 179219089Spjd return; 180219089Spjd 181219089Spjd ddt_ops[type]->ddt_op_prefetch(ddt->ddt_os, 182219089Spjd ddt->ddt_object[type][class], dde); 183219089Spjd} 184219089Spjd 185219089Spjdint 186219089Spjdddt_object_update(ddt_t *ddt, enum ddt_type type, enum ddt_class class, 187219089Spjd ddt_entry_t *dde, dmu_tx_t *tx) 188219089Spjd{ 189219089Spjd ASSERT(ddt_object_exists(ddt, type, class)); 190219089Spjd 191219089Spjd return (ddt_ops[type]->ddt_op_update(ddt->ddt_os, 192219089Spjd ddt->ddt_object[type][class], dde, tx)); 193219089Spjd} 194219089Spjd 195219089Spjdstatic int 196219089Spjdddt_object_remove(ddt_t *ddt, enum ddt_type type, enum ddt_class class, 197219089Spjd ddt_entry_t *dde, dmu_tx_t *tx) 198219089Spjd{ 199219089Spjd ASSERT(ddt_object_exists(ddt, type, class)); 200219089Spjd 201219089Spjd return (ddt_ops[type]->ddt_op_remove(ddt->ddt_os, 202219089Spjd ddt->ddt_object[type][class], dde, tx)); 203219089Spjd} 204219089Spjd 205219089Spjdint 206219089Spjdddt_object_walk(ddt_t *ddt, enum ddt_type type, enum ddt_class class, 207219089Spjd uint64_t *walk, ddt_entry_t *dde) 208219089Spjd{ 209219089Spjd ASSERT(ddt_object_exists(ddt, type, class)); 210219089Spjd 211219089Spjd return (ddt_ops[type]->ddt_op_walk(ddt->ddt_os, 212219089Spjd ddt->ddt_object[type][class], dde, walk)); 213219089Spjd} 214219089Spjd 215219089Spjduint64_t 216219089Spjdddt_object_count(ddt_t *ddt, enum ddt_type type, enum ddt_class class) 217219089Spjd{ 218219089Spjd ASSERT(ddt_object_exists(ddt, type, class)); 219219089Spjd 220219089Spjd return (ddt_ops[type]->ddt_op_count(ddt->ddt_os, 221219089Spjd ddt->ddt_object[type][class])); 222219089Spjd} 223219089Spjd 224219089Spjdint 225219089Spjdddt_object_info(ddt_t *ddt, enum ddt_type type, enum ddt_class class, 226219089Spjd dmu_object_info_t *doi) 227219089Spjd{ 228219089Spjd if (!ddt_object_exists(ddt, type, class)) 229219089Spjd return (ENOENT); 230219089Spjd 231219089Spjd return (dmu_object_info(ddt->ddt_os, ddt->ddt_object[type][class], 232219089Spjd doi)); 233219089Spjd} 234219089Spjd 235219089Spjdboolean_t 236219089Spjdddt_object_exists(ddt_t *ddt, enum ddt_type type, enum ddt_class class) 237219089Spjd{ 238219089Spjd return (!!ddt->ddt_object[type][class]); 239219089Spjd} 240219089Spjd 241219089Spjdvoid 242219089Spjdddt_object_name(ddt_t *ddt, enum ddt_type type, enum ddt_class class, 243219089Spjd char *name) 244219089Spjd{ 245219089Spjd (void) sprintf(name, DMU_POOL_DDT, 246219089Spjd zio_checksum_table[ddt->ddt_checksum].ci_name, 247219089Spjd ddt_ops[type]->ddt_op_name, ddt_class_name[class]); 248219089Spjd} 249219089Spjd 250219089Spjdvoid 251219089Spjdddt_bp_fill(const ddt_phys_t *ddp, blkptr_t *bp, uint64_t txg) 252219089Spjd{ 253219089Spjd ASSERT(txg != 0); 254219089Spjd 255219089Spjd for (int d = 0; d < SPA_DVAS_PER_BP; d++) 256219089Spjd bp->blk_dva[d] = ddp->ddp_dva[d]; 257219089Spjd BP_SET_BIRTH(bp, txg, ddp->ddp_phys_birth); 258219089Spjd} 259219089Spjd 260219089Spjdvoid 261219089Spjdddt_bp_create(enum zio_checksum checksum, 262219089Spjd const ddt_key_t *ddk, const ddt_phys_t *ddp, blkptr_t *bp) 263219089Spjd{ 264219089Spjd BP_ZERO(bp); 265219089Spjd 266219089Spjd if (ddp != NULL) 267219089Spjd ddt_bp_fill(ddp, bp, ddp->ddp_phys_birth); 268219089Spjd 269219089Spjd bp->blk_cksum = ddk->ddk_cksum; 270219089Spjd bp->blk_fill = 1; 271219089Spjd 272219089Spjd BP_SET_LSIZE(bp, DDK_GET_LSIZE(ddk)); 273219089Spjd BP_SET_PSIZE(bp, DDK_GET_PSIZE(ddk)); 274219089Spjd BP_SET_COMPRESS(bp, DDK_GET_COMPRESS(ddk)); 275219089Spjd BP_SET_CHECKSUM(bp, checksum); 276219089Spjd BP_SET_TYPE(bp, DMU_OT_DEDUP); 277219089Spjd BP_SET_LEVEL(bp, 0); 278219089Spjd BP_SET_DEDUP(bp, 0); 279219089Spjd BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER); 280219089Spjd} 281219089Spjd 282219089Spjdvoid 283219089Spjdddt_key_fill(ddt_key_t *ddk, const blkptr_t *bp) 284219089Spjd{ 285219089Spjd ddk->ddk_cksum = bp->blk_cksum; 286219089Spjd ddk->ddk_prop = 0; 287219089Spjd 288219089Spjd DDK_SET_LSIZE(ddk, BP_GET_LSIZE(bp)); 289219089Spjd DDK_SET_PSIZE(ddk, BP_GET_PSIZE(bp)); 290219089Spjd DDK_SET_COMPRESS(ddk, BP_GET_COMPRESS(bp)); 291219089Spjd} 292219089Spjd 293219089Spjdvoid 294219089Spjdddt_phys_fill(ddt_phys_t *ddp, const blkptr_t *bp) 295219089Spjd{ 296219089Spjd ASSERT(ddp->ddp_phys_birth == 0); 297219089Spjd 298219089Spjd for (int d = 0; d < SPA_DVAS_PER_BP; d++) 299219089Spjd ddp->ddp_dva[d] = bp->blk_dva[d]; 300219089Spjd ddp->ddp_phys_birth = BP_PHYSICAL_BIRTH(bp); 301219089Spjd} 302219089Spjd 303219089Spjdvoid 304219089Spjdddt_phys_clear(ddt_phys_t *ddp) 305219089Spjd{ 306219089Spjd bzero(ddp, sizeof (*ddp)); 307219089Spjd} 308219089Spjd 309219089Spjdvoid 310219089Spjdddt_phys_addref(ddt_phys_t *ddp) 311219089Spjd{ 312219089Spjd ddp->ddp_refcnt++; 313219089Spjd} 314219089Spjd 315219089Spjdvoid 316219089Spjdddt_phys_decref(ddt_phys_t *ddp) 317219089Spjd{ 318219089Spjd ASSERT((int64_t)ddp->ddp_refcnt > 0); 319219089Spjd ddp->ddp_refcnt--; 320219089Spjd} 321219089Spjd 322219089Spjdvoid 323219089Spjdddt_phys_free(ddt_t *ddt, ddt_key_t *ddk, ddt_phys_t *ddp, uint64_t txg) 324219089Spjd{ 325219089Spjd blkptr_t blk; 326219089Spjd 327219089Spjd ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk); 328219089Spjd ddt_phys_clear(ddp); 329219089Spjd zio_free(ddt->ddt_spa, txg, &blk); 330219089Spjd} 331219089Spjd 332219089Spjdddt_phys_t * 333219089Spjdddt_phys_select(const ddt_entry_t *dde, const blkptr_t *bp) 334219089Spjd{ 335219089Spjd ddt_phys_t *ddp = (ddt_phys_t *)dde->dde_phys; 336219089Spjd 337219089Spjd for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) { 338219089Spjd if (DVA_EQUAL(BP_IDENTITY(bp), &ddp->ddp_dva[0]) && 339219089Spjd BP_PHYSICAL_BIRTH(bp) == ddp->ddp_phys_birth) 340219089Spjd return (ddp); 341219089Spjd } 342219089Spjd return (NULL); 343219089Spjd} 344219089Spjd 345219089Spjduint64_t 346219089Spjdddt_phys_total_refcnt(const ddt_entry_t *dde) 347219089Spjd{ 348219089Spjd uint64_t refcnt = 0; 349219089Spjd 350219089Spjd for (int p = DDT_PHYS_SINGLE; p <= DDT_PHYS_TRIPLE; p++) 351219089Spjd refcnt += dde->dde_phys[p].ddp_refcnt; 352219089Spjd 353219089Spjd return (refcnt); 354219089Spjd} 355219089Spjd 356219089Spjdstatic void 357219089Spjdddt_stat_generate(ddt_t *ddt, ddt_entry_t *dde, ddt_stat_t *dds) 358219089Spjd{ 359219089Spjd spa_t *spa = ddt->ddt_spa; 360219089Spjd ddt_phys_t *ddp = dde->dde_phys; 361219089Spjd ddt_key_t *ddk = &dde->dde_key; 362219089Spjd uint64_t lsize = DDK_GET_LSIZE(ddk); 363219089Spjd uint64_t psize = DDK_GET_PSIZE(ddk); 364219089Spjd 365219089Spjd bzero(dds, sizeof (*dds)); 366219089Spjd 367219089Spjd for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) { 368219089Spjd uint64_t dsize = 0; 369219089Spjd uint64_t refcnt = ddp->ddp_refcnt; 370219089Spjd 371219089Spjd if (ddp->ddp_phys_birth == 0) 372219089Spjd continue; 373219089Spjd 374219089Spjd for (int d = 0; d < SPA_DVAS_PER_BP; d++) 375219089Spjd dsize += dva_get_dsize_sync(spa, &ddp->ddp_dva[d]); 376219089Spjd 377219089Spjd dds->dds_blocks += 1; 378219089Spjd dds->dds_lsize += lsize; 379219089Spjd dds->dds_psize += psize; 380219089Spjd dds->dds_dsize += dsize; 381219089Spjd 382219089Spjd dds->dds_ref_blocks += refcnt; 383219089Spjd dds->dds_ref_lsize += lsize * refcnt; 384219089Spjd dds->dds_ref_psize += psize * refcnt; 385219089Spjd dds->dds_ref_dsize += dsize * refcnt; 386219089Spjd } 387219089Spjd} 388219089Spjd 389219089Spjdvoid 390219089Spjdddt_stat_add(ddt_stat_t *dst, const ddt_stat_t *src, uint64_t neg) 391219089Spjd{ 392219089Spjd const uint64_t *s = (const uint64_t *)src; 393219089Spjd uint64_t *d = (uint64_t *)dst; 394219089Spjd uint64_t *d_end = (uint64_t *)(dst + 1); 395219089Spjd 396219089Spjd ASSERT(neg == 0 || neg == -1ULL); /* add or subtract */ 397219089Spjd 398219089Spjd while (d < d_end) 399219089Spjd *d++ += (*s++ ^ neg) - neg; 400219089Spjd} 401219089Spjd 402219089Spjdstatic void 403219089Spjdddt_stat_update(ddt_t *ddt, ddt_entry_t *dde, uint64_t neg) 404219089Spjd{ 405219089Spjd ddt_stat_t dds; 406219089Spjd ddt_histogram_t *ddh; 407219089Spjd int bucket; 408219089Spjd 409219089Spjd ddt_stat_generate(ddt, dde, &dds); 410219089Spjd 411219089Spjd bucket = highbit(dds.dds_ref_blocks) - 1; 412219089Spjd ASSERT(bucket >= 0); 413219089Spjd 414219089Spjd ddh = &ddt->ddt_histogram[dde->dde_type][dde->dde_class]; 415219089Spjd 416219089Spjd ddt_stat_add(&ddh->ddh_stat[bucket], &dds, neg); 417219089Spjd} 418219089Spjd 419219089Spjdvoid 420219089Spjdddt_histogram_add(ddt_histogram_t *dst, const ddt_histogram_t *src) 421219089Spjd{ 422219089Spjd for (int h = 0; h < 64; h++) 423219089Spjd ddt_stat_add(&dst->ddh_stat[h], &src->ddh_stat[h], 0); 424219089Spjd} 425219089Spjd 426219089Spjdvoid 427219089Spjdddt_histogram_stat(ddt_stat_t *dds, const ddt_histogram_t *ddh) 428219089Spjd{ 429219089Spjd bzero(dds, sizeof (*dds)); 430219089Spjd 431219089Spjd for (int h = 0; h < 64; h++) 432219089Spjd ddt_stat_add(dds, &ddh->ddh_stat[h], 0); 433219089Spjd} 434219089Spjd 435219089Spjdboolean_t 436219089Spjdddt_histogram_empty(const ddt_histogram_t *ddh) 437219089Spjd{ 438219089Spjd const uint64_t *s = (const uint64_t *)ddh; 439219089Spjd const uint64_t *s_end = (const uint64_t *)(ddh + 1); 440219089Spjd 441219089Spjd while (s < s_end) 442219089Spjd if (*s++ != 0) 443219089Spjd return (B_FALSE); 444219089Spjd 445219089Spjd return (B_TRUE); 446219089Spjd} 447219089Spjd 448219089Spjdvoid 449219089Spjdddt_get_dedup_object_stats(spa_t *spa, ddt_object_t *ddo_total) 450219089Spjd{ 451219089Spjd /* Sum the statistics we cached in ddt_object_sync(). */ 452219089Spjd for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) { 453219089Spjd ddt_t *ddt = spa->spa_ddt[c]; 454219089Spjd for (enum ddt_type type = 0; type < DDT_TYPES; type++) { 455219089Spjd for (enum ddt_class class = 0; class < DDT_CLASSES; 456219089Spjd class++) { 457219089Spjd ddt_object_t *ddo = 458219089Spjd &ddt->ddt_object_stats[type][class]; 459219089Spjd ddo_total->ddo_count += ddo->ddo_count; 460219089Spjd ddo_total->ddo_dspace += ddo->ddo_dspace; 461219089Spjd ddo_total->ddo_mspace += ddo->ddo_mspace; 462219089Spjd } 463219089Spjd } 464219089Spjd } 465219089Spjd 466219089Spjd /* ... and compute the averages. */ 467219089Spjd if (ddo_total->ddo_count != 0) { 468219089Spjd ddo_total->ddo_dspace /= ddo_total->ddo_count; 469219089Spjd ddo_total->ddo_mspace /= ddo_total->ddo_count; 470219089Spjd } 471219089Spjd} 472219089Spjd 473219089Spjdvoid 474219089Spjdddt_get_dedup_histogram(spa_t *spa, ddt_histogram_t *ddh) 475219089Spjd{ 476219089Spjd for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) { 477219089Spjd ddt_t *ddt = spa->spa_ddt[c]; 478219089Spjd for (enum ddt_type type = 0; type < DDT_TYPES; type++) { 479219089Spjd for (enum ddt_class class = 0; class < DDT_CLASSES; 480219089Spjd class++) { 481219089Spjd ddt_histogram_add(ddh, 482219089Spjd &ddt->ddt_histogram_cache[type][class]); 483219089Spjd } 484219089Spjd } 485219089Spjd } 486219089Spjd} 487219089Spjd 488219089Spjdvoid 489219089Spjdddt_get_dedup_stats(spa_t *spa, ddt_stat_t *dds_total) 490219089Spjd{ 491219089Spjd ddt_histogram_t *ddh_total; 492219089Spjd 493219089Spjd ddh_total = kmem_zalloc(sizeof (ddt_histogram_t), KM_SLEEP); 494219089Spjd ddt_get_dedup_histogram(spa, ddh_total); 495219089Spjd ddt_histogram_stat(dds_total, ddh_total); 496219089Spjd kmem_free(ddh_total, sizeof (ddt_histogram_t)); 497219089Spjd} 498219089Spjd 499219089Spjduint64_t 500219089Spjdddt_get_dedup_dspace(spa_t *spa) 501219089Spjd{ 502219089Spjd ddt_stat_t dds_total = { 0 }; 503219089Spjd 504219089Spjd ddt_get_dedup_stats(spa, &dds_total); 505219089Spjd return (dds_total.dds_ref_dsize - dds_total.dds_dsize); 506219089Spjd} 507219089Spjd 508219089Spjduint64_t 509219089Spjdddt_get_pool_dedup_ratio(spa_t *spa) 510219089Spjd{ 511219089Spjd ddt_stat_t dds_total = { 0 }; 512219089Spjd 513219089Spjd ddt_get_dedup_stats(spa, &dds_total); 514219089Spjd if (dds_total.dds_dsize == 0) 515219089Spjd return (100); 516219089Spjd 517219089Spjd return (dds_total.dds_ref_dsize * 100 / dds_total.dds_dsize); 518219089Spjd} 519219089Spjd 520219089Spjdint 521219089Spjdddt_ditto_copies_needed(ddt_t *ddt, ddt_entry_t *dde, ddt_phys_t *ddp_willref) 522219089Spjd{ 523219089Spjd spa_t *spa = ddt->ddt_spa; 524219089Spjd uint64_t total_refcnt = 0; 525219089Spjd uint64_t ditto = spa->spa_dedup_ditto; 526219089Spjd int total_copies = 0; 527219089Spjd int desired_copies = 0; 528219089Spjd 529219089Spjd for (int p = DDT_PHYS_SINGLE; p <= DDT_PHYS_TRIPLE; p++) { 530219089Spjd ddt_phys_t *ddp = &dde->dde_phys[p]; 531219089Spjd zio_t *zio = dde->dde_lead_zio[p]; 532219089Spjd uint64_t refcnt = ddp->ddp_refcnt; /* committed refs */ 533219089Spjd if (zio != NULL) 534219089Spjd refcnt += zio->io_parent_count; /* pending refs */ 535219089Spjd if (ddp == ddp_willref) 536219089Spjd refcnt++; /* caller's ref */ 537219089Spjd if (refcnt != 0) { 538219089Spjd total_refcnt += refcnt; 539219089Spjd total_copies += p; 540219089Spjd } 541219089Spjd } 542219089Spjd 543219089Spjd if (ditto == 0 || ditto > UINT32_MAX) 544219089Spjd ditto = UINT32_MAX; 545219089Spjd 546219089Spjd if (total_refcnt >= 1) 547219089Spjd desired_copies++; 548219089Spjd if (total_refcnt >= ditto) 549219089Spjd desired_copies++; 550219089Spjd if (total_refcnt >= ditto * ditto) 551219089Spjd desired_copies++; 552219089Spjd 553219089Spjd return (MAX(desired_copies, total_copies) - total_copies); 554219089Spjd} 555219089Spjd 556219089Spjdint 557219089Spjdddt_ditto_copies_present(ddt_entry_t *dde) 558219089Spjd{ 559219089Spjd ddt_phys_t *ddp = &dde->dde_phys[DDT_PHYS_DITTO]; 560219089Spjd dva_t *dva = ddp->ddp_dva; 561219089Spjd int copies = 0 - DVA_GET_GANG(dva); 562219089Spjd 563219089Spjd for (int d = 0; d < SPA_DVAS_PER_BP; d++, dva++) 564219089Spjd if (DVA_IS_VALID(dva)) 565219089Spjd copies++; 566219089Spjd 567219089Spjd ASSERT(copies >= 0 && copies < SPA_DVAS_PER_BP); 568219089Spjd 569219089Spjd return (copies); 570219089Spjd} 571219089Spjd 572219089Spjdsize_t 573219089Spjdddt_compress(void *src, uchar_t *dst, size_t s_len, size_t d_len) 574219089Spjd{ 575219089Spjd uchar_t *version = dst++; 576219089Spjd int cpfunc = ZIO_COMPRESS_ZLE; 577219089Spjd zio_compress_info_t *ci = &zio_compress_table[cpfunc]; 578219089Spjd size_t c_len; 579219089Spjd 580219089Spjd ASSERT(d_len >= s_len + 1); /* no compression plus version byte */ 581219089Spjd 582219089Spjd c_len = ci->ci_compress(src, dst, s_len, d_len - 1, ci->ci_level); 583219089Spjd 584219089Spjd if (c_len == s_len) { 585219089Spjd cpfunc = ZIO_COMPRESS_OFF; 586219089Spjd bcopy(src, dst, s_len); 587219089Spjd } 588219089Spjd 589219089Spjd *version = (ZFS_HOST_BYTEORDER & DDT_COMPRESS_BYTEORDER_MASK) | cpfunc; 590219089Spjd 591219089Spjd return (c_len + 1); 592219089Spjd} 593219089Spjd 594219089Spjdvoid 595219089Spjdddt_decompress(uchar_t *src, void *dst, size_t s_len, size_t d_len) 596219089Spjd{ 597219089Spjd uchar_t version = *src++; 598219089Spjd int cpfunc = version & DDT_COMPRESS_FUNCTION_MASK; 599219089Spjd zio_compress_info_t *ci = &zio_compress_table[cpfunc]; 600219089Spjd 601219089Spjd if (ci->ci_decompress != NULL) 602219089Spjd (void) ci->ci_decompress(src, dst, s_len, d_len, ci->ci_level); 603219089Spjd else 604219089Spjd bcopy(src, dst, d_len); 605219089Spjd 606219089Spjd if ((version ^ ZFS_HOST_BYTEORDER) & DDT_COMPRESS_BYTEORDER_MASK) 607219089Spjd byteswap_uint64_array(dst, d_len); 608219089Spjd} 609219089Spjd 610219089Spjdddt_t * 611219089Spjdddt_select_by_checksum(spa_t *spa, enum zio_checksum c) 612219089Spjd{ 613219089Spjd return (spa->spa_ddt[c]); 614219089Spjd} 615219089Spjd 616219089Spjdddt_t * 617219089Spjdddt_select(spa_t *spa, const blkptr_t *bp) 618219089Spjd{ 619219089Spjd return (spa->spa_ddt[BP_GET_CHECKSUM(bp)]); 620219089Spjd} 621219089Spjd 622219089Spjdvoid 623219089Spjdddt_enter(ddt_t *ddt) 624219089Spjd{ 625219089Spjd mutex_enter(&ddt->ddt_lock); 626219089Spjd} 627219089Spjd 628219089Spjdvoid 629219089Spjdddt_exit(ddt_t *ddt) 630219089Spjd{ 631219089Spjd mutex_exit(&ddt->ddt_lock); 632219089Spjd} 633219089Spjd 634219089Spjdstatic ddt_entry_t * 635219089Spjdddt_alloc(const ddt_key_t *ddk) 636219089Spjd{ 637219089Spjd ddt_entry_t *dde; 638219089Spjd 639219089Spjd dde = kmem_zalloc(sizeof (ddt_entry_t), KM_SLEEP); 640219089Spjd cv_init(&dde->dde_cv, NULL, CV_DEFAULT, NULL); 641219089Spjd 642219089Spjd dde->dde_key = *ddk; 643219089Spjd 644219089Spjd return (dde); 645219089Spjd} 646219089Spjd 647219089Spjdstatic void 648219089Spjdddt_free(ddt_entry_t *dde) 649219089Spjd{ 650219089Spjd ASSERT(!dde->dde_loading); 651219089Spjd 652219089Spjd for (int p = 0; p < DDT_PHYS_TYPES; p++) 653219089Spjd ASSERT(dde->dde_lead_zio[p] == NULL); 654219089Spjd 655219089Spjd if (dde->dde_repair_data != NULL) 656219089Spjd zio_buf_free(dde->dde_repair_data, 657219089Spjd DDK_GET_PSIZE(&dde->dde_key)); 658219089Spjd 659219089Spjd cv_destroy(&dde->dde_cv); 660219089Spjd kmem_free(dde, sizeof (*dde)); 661219089Spjd} 662219089Spjd 663219089Spjdvoid 664219089Spjdddt_remove(ddt_t *ddt, ddt_entry_t *dde) 665219089Spjd{ 666219089Spjd ASSERT(MUTEX_HELD(&ddt->ddt_lock)); 667219089Spjd 668219089Spjd avl_remove(&ddt->ddt_tree, dde); 669219089Spjd ddt_free(dde); 670219089Spjd} 671219089Spjd 672219089Spjdddt_entry_t * 673219089Spjdddt_lookup(ddt_t *ddt, const blkptr_t *bp, boolean_t add) 674219089Spjd{ 675219089Spjd ddt_entry_t *dde, dde_search; 676219089Spjd enum ddt_type type; 677219089Spjd enum ddt_class class; 678219089Spjd avl_index_t where; 679219089Spjd int error; 680219089Spjd 681219089Spjd ASSERT(MUTEX_HELD(&ddt->ddt_lock)); 682219089Spjd 683219089Spjd ddt_key_fill(&dde_search.dde_key, bp); 684219089Spjd 685219089Spjd dde = avl_find(&ddt->ddt_tree, &dde_search, &where); 686219089Spjd if (dde == NULL) { 687219089Spjd if (!add) 688219089Spjd return (NULL); 689219089Spjd dde = ddt_alloc(&dde_search.dde_key); 690219089Spjd avl_insert(&ddt->ddt_tree, dde, where); 691219089Spjd } 692219089Spjd 693219089Spjd while (dde->dde_loading) 694219089Spjd cv_wait(&dde->dde_cv, &ddt->ddt_lock); 695219089Spjd 696219089Spjd if (dde->dde_loaded) 697219089Spjd return (dde); 698219089Spjd 699219089Spjd dde->dde_loading = B_TRUE; 700219089Spjd 701219089Spjd ddt_exit(ddt); 702219089Spjd 703219089Spjd error = ENOENT; 704219089Spjd 705219089Spjd for (type = 0; type < DDT_TYPES; type++) { 706219089Spjd for (class = 0; class < DDT_CLASSES; class++) { 707219089Spjd error = ddt_object_lookup(ddt, type, class, dde); 708219089Spjd if (error != ENOENT) 709219089Spjd break; 710219089Spjd } 711219089Spjd if (error != ENOENT) 712219089Spjd break; 713219089Spjd } 714219089Spjd 715219089Spjd ASSERT(error == 0 || error == ENOENT); 716219089Spjd 717219089Spjd ddt_enter(ddt); 718219089Spjd 719219089Spjd ASSERT(dde->dde_loaded == B_FALSE); 720219089Spjd ASSERT(dde->dde_loading == B_TRUE); 721219089Spjd 722219089Spjd dde->dde_type = type; /* will be DDT_TYPES if no entry found */ 723219089Spjd dde->dde_class = class; /* will be DDT_CLASSES if no entry found */ 724219089Spjd dde->dde_loaded = B_TRUE; 725219089Spjd dde->dde_loading = B_FALSE; 726219089Spjd 727219089Spjd if (error == 0) 728219089Spjd ddt_stat_update(ddt, dde, -1ULL); 729219089Spjd 730219089Spjd cv_broadcast(&dde->dde_cv); 731219089Spjd 732219089Spjd return (dde); 733219089Spjd} 734219089Spjd 735219089Spjdvoid 736219089Spjdddt_prefetch(spa_t *spa, const blkptr_t *bp) 737219089Spjd{ 738219089Spjd ddt_t *ddt; 739219089Spjd ddt_entry_t dde; 740219089Spjd 741219089Spjd if (!zfs_dedup_prefetch || bp == NULL || !BP_GET_DEDUP(bp)) 742219089Spjd return; 743219089Spjd 744219089Spjd /* 745219089Spjd * We only remove the DDT once all tables are empty and only 746219089Spjd * prefetch dedup blocks when there are entries in the DDT. 747219089Spjd * Thus no locking is required as the DDT can't disappear on us. 748219089Spjd */ 749219089Spjd ddt = ddt_select(spa, bp); 750219089Spjd ddt_key_fill(&dde.dde_key, bp); 751219089Spjd 752219089Spjd for (enum ddt_type type = 0; type < DDT_TYPES; type++) { 753219089Spjd for (enum ddt_class class = 0; class < DDT_CLASSES; class++) { 754219089Spjd ddt_object_prefetch(ddt, type, class, &dde); 755219089Spjd } 756219089Spjd } 757219089Spjd} 758219089Spjd 759219089Spjdint 760219089Spjdddt_entry_compare(const void *x1, const void *x2) 761219089Spjd{ 762219089Spjd const ddt_entry_t *dde1 = x1; 763219089Spjd const ddt_entry_t *dde2 = x2; 764219089Spjd const uint64_t *u1 = (const uint64_t *)&dde1->dde_key; 765219089Spjd const uint64_t *u2 = (const uint64_t *)&dde2->dde_key; 766219089Spjd 767219089Spjd for (int i = 0; i < DDT_KEY_WORDS; i++) { 768219089Spjd if (u1[i] < u2[i]) 769219089Spjd return (-1); 770219089Spjd if (u1[i] > u2[i]) 771219089Spjd return (1); 772219089Spjd } 773219089Spjd 774219089Spjd return (0); 775219089Spjd} 776219089Spjd 777219089Spjdstatic ddt_t * 778219089Spjdddt_table_alloc(spa_t *spa, enum zio_checksum c) 779219089Spjd{ 780219089Spjd ddt_t *ddt; 781219089Spjd 782219089Spjd ddt = kmem_zalloc(sizeof (*ddt), KM_SLEEP); 783219089Spjd 784219089Spjd mutex_init(&ddt->ddt_lock, NULL, MUTEX_DEFAULT, NULL); 785219089Spjd avl_create(&ddt->ddt_tree, ddt_entry_compare, 786219089Spjd sizeof (ddt_entry_t), offsetof(ddt_entry_t, dde_node)); 787219089Spjd avl_create(&ddt->ddt_repair_tree, ddt_entry_compare, 788219089Spjd sizeof (ddt_entry_t), offsetof(ddt_entry_t, dde_node)); 789219089Spjd ddt->ddt_checksum = c; 790219089Spjd ddt->ddt_spa = spa; 791219089Spjd ddt->ddt_os = spa->spa_meta_objset; 792219089Spjd 793219089Spjd return (ddt); 794219089Spjd} 795219089Spjd 796219089Spjdstatic void 797219089Spjdddt_table_free(ddt_t *ddt) 798219089Spjd{ 799219089Spjd ASSERT(avl_numnodes(&ddt->ddt_tree) == 0); 800219089Spjd ASSERT(avl_numnodes(&ddt->ddt_repair_tree) == 0); 801219089Spjd avl_destroy(&ddt->ddt_tree); 802219089Spjd avl_destroy(&ddt->ddt_repair_tree); 803219089Spjd mutex_destroy(&ddt->ddt_lock); 804219089Spjd kmem_free(ddt, sizeof (*ddt)); 805219089Spjd} 806219089Spjd 807219089Spjdvoid 808219089Spjdddt_create(spa_t *spa) 809219089Spjd{ 810219089Spjd spa->spa_dedup_checksum = ZIO_DEDUPCHECKSUM; 811219089Spjd 812219089Spjd for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) 813219089Spjd spa->spa_ddt[c] = ddt_table_alloc(spa, c); 814219089Spjd} 815219089Spjd 816219089Spjdint 817219089Spjdddt_load(spa_t *spa) 818219089Spjd{ 819219089Spjd int error; 820219089Spjd 821219089Spjd ddt_create(spa); 822219089Spjd 823219089Spjd error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 824219089Spjd DMU_POOL_DDT_STATS, sizeof (uint64_t), 1, 825219089Spjd &spa->spa_ddt_stat_object); 826219089Spjd 827219089Spjd if (error) 828219089Spjd return (error == ENOENT ? 0 : error); 829219089Spjd 830219089Spjd for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) { 831219089Spjd ddt_t *ddt = spa->spa_ddt[c]; 832219089Spjd for (enum ddt_type type = 0; type < DDT_TYPES; type++) { 833219089Spjd for (enum ddt_class class = 0; class < DDT_CLASSES; 834219089Spjd class++) { 835219089Spjd error = ddt_object_load(ddt, type, class); 836219089Spjd if (error != 0 && error != ENOENT) 837219089Spjd return (error); 838219089Spjd } 839219089Spjd } 840219089Spjd 841219089Spjd /* 842219089Spjd * Seed the cached histograms. 843219089Spjd */ 844219089Spjd bcopy(ddt->ddt_histogram, &ddt->ddt_histogram_cache, 845219089Spjd sizeof (ddt->ddt_histogram)); 846219089Spjd } 847219089Spjd 848219089Spjd return (0); 849219089Spjd} 850219089Spjd 851219089Spjdvoid 852219089Spjdddt_unload(spa_t *spa) 853219089Spjd{ 854219089Spjd for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) { 855219089Spjd if (spa->spa_ddt[c]) { 856219089Spjd ddt_table_free(spa->spa_ddt[c]); 857219089Spjd spa->spa_ddt[c] = NULL; 858219089Spjd } 859219089Spjd } 860219089Spjd} 861219089Spjd 862219089Spjdboolean_t 863219089Spjdddt_class_contains(spa_t *spa, enum ddt_class max_class, const blkptr_t *bp) 864219089Spjd{ 865219089Spjd ddt_t *ddt; 866219089Spjd ddt_entry_t dde; 867219089Spjd 868219089Spjd if (!BP_GET_DEDUP(bp)) 869219089Spjd return (B_FALSE); 870219089Spjd 871219089Spjd if (max_class == DDT_CLASS_UNIQUE) 872219089Spjd return (B_TRUE); 873219089Spjd 874219089Spjd ddt = spa->spa_ddt[BP_GET_CHECKSUM(bp)]; 875219089Spjd 876219089Spjd ddt_key_fill(&dde.dde_key, bp); 877219089Spjd 878219089Spjd for (enum ddt_type type = 0; type < DDT_TYPES; type++) 879219089Spjd for (enum ddt_class class = 0; class <= max_class; class++) 880219089Spjd if (ddt_object_lookup(ddt, type, class, &dde) == 0) 881219089Spjd return (B_TRUE); 882219089Spjd 883219089Spjd return (B_FALSE); 884219089Spjd} 885219089Spjd 886219089Spjdddt_entry_t * 887219089Spjdddt_repair_start(ddt_t *ddt, const blkptr_t *bp) 888219089Spjd{ 889219089Spjd ddt_key_t ddk; 890219089Spjd ddt_entry_t *dde; 891219089Spjd 892219089Spjd ddt_key_fill(&ddk, bp); 893219089Spjd 894219089Spjd dde = ddt_alloc(&ddk); 895219089Spjd 896219089Spjd for (enum ddt_type type = 0; type < DDT_TYPES; type++) { 897219089Spjd for (enum ddt_class class = 0; class < DDT_CLASSES; class++) { 898219089Spjd /* 899219089Spjd * We can only do repair if there are multiple copies 900219089Spjd * of the block. For anything in the UNIQUE class, 901219089Spjd * there's definitely only one copy, so don't even try. 902219089Spjd */ 903219089Spjd if (class != DDT_CLASS_UNIQUE && 904219089Spjd ddt_object_lookup(ddt, type, class, dde) == 0) 905219089Spjd return (dde); 906219089Spjd } 907219089Spjd } 908219089Spjd 909219089Spjd bzero(dde->dde_phys, sizeof (dde->dde_phys)); 910219089Spjd 911219089Spjd return (dde); 912219089Spjd} 913219089Spjd 914219089Spjdvoid 915219089Spjdddt_repair_done(ddt_t *ddt, ddt_entry_t *dde) 916219089Spjd{ 917219089Spjd avl_index_t where; 918219089Spjd 919219089Spjd ddt_enter(ddt); 920219089Spjd 921219089Spjd if (dde->dde_repair_data != NULL && spa_writeable(ddt->ddt_spa) && 922219089Spjd avl_find(&ddt->ddt_repair_tree, dde, &where) == NULL) 923219089Spjd avl_insert(&ddt->ddt_repair_tree, dde, where); 924219089Spjd else 925219089Spjd ddt_free(dde); 926219089Spjd 927219089Spjd ddt_exit(ddt); 928219089Spjd} 929219089Spjd 930219089Spjdstatic void 931219089Spjdddt_repair_entry_done(zio_t *zio) 932219089Spjd{ 933219089Spjd ddt_entry_t *rdde = zio->io_private; 934219089Spjd 935219089Spjd ddt_free(rdde); 936219089Spjd} 937219089Spjd 938219089Spjdstatic void 939219089Spjdddt_repair_entry(ddt_t *ddt, ddt_entry_t *dde, ddt_entry_t *rdde, zio_t *rio) 940219089Spjd{ 941219089Spjd ddt_phys_t *ddp = dde->dde_phys; 942219089Spjd ddt_phys_t *rddp = rdde->dde_phys; 943219089Spjd ddt_key_t *ddk = &dde->dde_key; 944219089Spjd ddt_key_t *rddk = &rdde->dde_key; 945219089Spjd zio_t *zio; 946219089Spjd blkptr_t blk; 947219089Spjd 948219089Spjd zio = zio_null(rio, rio->io_spa, NULL, 949219089Spjd ddt_repair_entry_done, rdde, rio->io_flags); 950219089Spjd 951219089Spjd for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++, rddp++) { 952219089Spjd if (ddp->ddp_phys_birth == 0 || 953219089Spjd ddp->ddp_phys_birth != rddp->ddp_phys_birth || 954219089Spjd bcmp(ddp->ddp_dva, rddp->ddp_dva, sizeof (ddp->ddp_dva))) 955219089Spjd continue; 956219089Spjd ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk); 957219089Spjd zio_nowait(zio_rewrite(zio, zio->io_spa, 0, &blk, 958219089Spjd rdde->dde_repair_data, DDK_GET_PSIZE(rddk), NULL, NULL, 959219089Spjd ZIO_PRIORITY_SYNC_WRITE, ZIO_DDT_CHILD_FLAGS(zio), NULL)); 960219089Spjd } 961219089Spjd 962219089Spjd zio_nowait(zio); 963219089Spjd} 964219089Spjd 965219089Spjdstatic void 966219089Spjdddt_repair_table(ddt_t *ddt, zio_t *rio) 967219089Spjd{ 968219089Spjd spa_t *spa = ddt->ddt_spa; 969219089Spjd ddt_entry_t *dde, *rdde_next, *rdde; 970219089Spjd avl_tree_t *t = &ddt->ddt_repair_tree; 971219089Spjd blkptr_t blk; 972219089Spjd 973219089Spjd if (spa_sync_pass(spa) > 1) 974219089Spjd return; 975219089Spjd 976219089Spjd ddt_enter(ddt); 977219089Spjd for (rdde = avl_first(t); rdde != NULL; rdde = rdde_next) { 978219089Spjd rdde_next = AVL_NEXT(t, rdde); 979219089Spjd avl_remove(&ddt->ddt_repair_tree, rdde); 980219089Spjd ddt_exit(ddt); 981219089Spjd ddt_bp_create(ddt->ddt_checksum, &rdde->dde_key, NULL, &blk); 982219089Spjd dde = ddt_repair_start(ddt, &blk); 983219089Spjd ddt_repair_entry(ddt, dde, rdde, rio); 984219089Spjd ddt_repair_done(ddt, dde); 985219089Spjd ddt_enter(ddt); 986219089Spjd } 987219089Spjd ddt_exit(ddt); 988219089Spjd} 989219089Spjd 990219089Spjdstatic void 991219089Spjdddt_sync_entry(ddt_t *ddt, ddt_entry_t *dde, dmu_tx_t *tx, uint64_t txg) 992219089Spjd{ 993219089Spjd dsl_pool_t *dp = ddt->ddt_spa->spa_dsl_pool; 994219089Spjd ddt_phys_t *ddp = dde->dde_phys; 995219089Spjd ddt_key_t *ddk = &dde->dde_key; 996219089Spjd enum ddt_type otype = dde->dde_type; 997219089Spjd enum ddt_type ntype = DDT_TYPE_CURRENT; 998219089Spjd enum ddt_class oclass = dde->dde_class; 999219089Spjd enum ddt_class nclass; 1000219089Spjd uint64_t total_refcnt = 0; 1001219089Spjd 1002219089Spjd ASSERT(dde->dde_loaded); 1003219089Spjd ASSERT(!dde->dde_loading); 1004219089Spjd 1005219089Spjd for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) { 1006219089Spjd ASSERT(dde->dde_lead_zio[p] == NULL); 1007219089Spjd ASSERT((int64_t)ddp->ddp_refcnt >= 0); 1008219089Spjd if (ddp->ddp_phys_birth == 0) { 1009219089Spjd ASSERT(ddp->ddp_refcnt == 0); 1010219089Spjd continue; 1011219089Spjd } 1012219089Spjd if (p == DDT_PHYS_DITTO) { 1013219089Spjd if (ddt_ditto_copies_needed(ddt, dde, NULL) == 0) 1014219089Spjd ddt_phys_free(ddt, ddk, ddp, txg); 1015219089Spjd continue; 1016219089Spjd } 1017219089Spjd if (ddp->ddp_refcnt == 0) 1018219089Spjd ddt_phys_free(ddt, ddk, ddp, txg); 1019219089Spjd total_refcnt += ddp->ddp_refcnt; 1020219089Spjd } 1021219089Spjd 1022219089Spjd if (dde->dde_phys[DDT_PHYS_DITTO].ddp_phys_birth != 0) 1023219089Spjd nclass = DDT_CLASS_DITTO; 1024219089Spjd else if (total_refcnt > 1) 1025219089Spjd nclass = DDT_CLASS_DUPLICATE; 1026219089Spjd else 1027219089Spjd nclass = DDT_CLASS_UNIQUE; 1028219089Spjd 1029219089Spjd if (otype != DDT_TYPES && 1030219089Spjd (otype != ntype || oclass != nclass || total_refcnt == 0)) { 1031219089Spjd VERIFY(ddt_object_remove(ddt, otype, oclass, dde, tx) == 0); 1032219089Spjd ASSERT(ddt_object_lookup(ddt, otype, oclass, dde) == ENOENT); 1033219089Spjd } 1034219089Spjd 1035219089Spjd if (total_refcnt != 0) { 1036219089Spjd dde->dde_type = ntype; 1037219089Spjd dde->dde_class = nclass; 1038219089Spjd ddt_stat_update(ddt, dde, 0); 1039219089Spjd if (!ddt_object_exists(ddt, ntype, nclass)) 1040219089Spjd ddt_object_create(ddt, ntype, nclass, tx); 1041219089Spjd VERIFY(ddt_object_update(ddt, ntype, nclass, dde, tx) == 0); 1042219089Spjd 1043219089Spjd /* 1044219089Spjd * If the class changes, the order that we scan this bp 1045219089Spjd * changes. If it decreases, we could miss it, so 1046219089Spjd * scan it right now. (This covers both class changing 1047219089Spjd * while we are doing ddt_walk(), and when we are 1048219089Spjd * traversing.) 1049219089Spjd */ 1050219089Spjd if (nclass < oclass) { 1051219089Spjd dsl_scan_ddt_entry(dp->dp_scan, 1052219089Spjd ddt->ddt_checksum, dde, tx); 1053219089Spjd } 1054219089Spjd } 1055219089Spjd} 1056219089Spjd 1057219089Spjdstatic void 1058219089Spjdddt_sync_table(ddt_t *ddt, dmu_tx_t *tx, uint64_t txg) 1059219089Spjd{ 1060219089Spjd spa_t *spa = ddt->ddt_spa; 1061219089Spjd ddt_entry_t *dde; 1062219089Spjd void *cookie = NULL; 1063219089Spjd 1064219089Spjd if (avl_numnodes(&ddt->ddt_tree) == 0) 1065219089Spjd return; 1066219089Spjd 1067219089Spjd ASSERT(spa->spa_uberblock.ub_version >= SPA_VERSION_DEDUP); 1068219089Spjd 1069219089Spjd if (spa->spa_ddt_stat_object == 0) { 1070219089Spjd spa->spa_ddt_stat_object = zap_create(ddt->ddt_os, 1071219089Spjd DMU_OT_DDT_STATS, DMU_OT_NONE, 0, tx); 1072219089Spjd VERIFY(zap_add(ddt->ddt_os, DMU_POOL_DIRECTORY_OBJECT, 1073219089Spjd DMU_POOL_DDT_STATS, sizeof (uint64_t), 1, 1074219089Spjd &spa->spa_ddt_stat_object, tx) == 0); 1075219089Spjd } 1076219089Spjd 1077219089Spjd while ((dde = avl_destroy_nodes(&ddt->ddt_tree, &cookie)) != NULL) { 1078219089Spjd ddt_sync_entry(ddt, dde, tx, txg); 1079219089Spjd ddt_free(dde); 1080219089Spjd } 1081219089Spjd 1082219089Spjd for (enum ddt_type type = 0; type < DDT_TYPES; type++) { 1083219089Spjd uint64_t count = 0; 1084219089Spjd for (enum ddt_class class = 0; class < DDT_CLASSES; class++) { 1085219089Spjd if (ddt_object_exists(ddt, type, class)) { 1086219089Spjd ddt_object_sync(ddt, type, class, tx); 1087219089Spjd count += ddt_object_count(ddt, type, class); 1088219089Spjd } 1089219089Spjd } 1090219089Spjd for (enum ddt_class class = 0; class < DDT_CLASSES; class++) { 1091219089Spjd if (count == 0 && ddt_object_exists(ddt, type, class)) 1092219089Spjd ddt_object_destroy(ddt, type, class, tx); 1093219089Spjd } 1094219089Spjd } 1095219089Spjd 1096219089Spjd bcopy(ddt->ddt_histogram, &ddt->ddt_histogram_cache, 1097219089Spjd sizeof (ddt->ddt_histogram)); 1098219089Spjd} 1099219089Spjd 1100219089Spjdvoid 1101219089Spjdddt_sync(spa_t *spa, uint64_t txg) 1102219089Spjd{ 1103219089Spjd dmu_tx_t *tx; 1104219089Spjd zio_t *rio = zio_root(spa, NULL, NULL, 1105219089Spjd ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE); 1106219089Spjd 1107219089Spjd ASSERT(spa_syncing_txg(spa) == txg); 1108219089Spjd 1109219089Spjd tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg); 1110219089Spjd 1111219089Spjd for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) { 1112219089Spjd ddt_t *ddt = spa->spa_ddt[c]; 1113219089Spjd if (ddt == NULL) 1114219089Spjd continue; 1115219089Spjd ddt_sync_table(ddt, tx, txg); 1116219089Spjd ddt_repair_table(ddt, rio); 1117219089Spjd } 1118219089Spjd 1119219089Spjd (void) zio_wait(rio); 1120219089Spjd 1121219089Spjd dmu_tx_commit(tx); 1122219089Spjd} 1123219089Spjd 1124219089Spjdint 1125219089Spjdddt_walk(spa_t *spa, ddt_bookmark_t *ddb, ddt_entry_t *dde) 1126219089Spjd{ 1127219089Spjd do { 1128219089Spjd do { 1129219089Spjd do { 1130219089Spjd ddt_t *ddt = spa->spa_ddt[ddb->ddb_checksum]; 1131219089Spjd int error = ENOENT; 1132219089Spjd if (ddt_object_exists(ddt, ddb->ddb_type, 1133219089Spjd ddb->ddb_class)) { 1134219089Spjd error = ddt_object_walk(ddt, 1135219089Spjd ddb->ddb_type, ddb->ddb_class, 1136219089Spjd &ddb->ddb_cursor, dde); 1137219089Spjd } 1138219089Spjd dde->dde_type = ddb->ddb_type; 1139219089Spjd dde->dde_class = ddb->ddb_class; 1140219089Spjd if (error == 0) 1141219089Spjd return (0); 1142219089Spjd if (error != ENOENT) 1143219089Spjd return (error); 1144219089Spjd ddb->ddb_cursor = 0; 1145219089Spjd } while (++ddb->ddb_checksum < ZIO_CHECKSUM_FUNCTIONS); 1146219089Spjd ddb->ddb_checksum = 0; 1147219089Spjd } while (++ddb->ddb_type < DDT_TYPES); 1148219089Spjd ddb->ddb_type = 0; 1149219089Spjd } while (++ddb->ddb_class < DDT_CLASSES); 1150219089Spjd 1151219089Spjd return (ENOENT); 1152219089Spjd} 1153