1219089Spjd/* 2219089Spjd * CDDL HEADER START 3219089Spjd * 4219089Spjd * The contents of this file are subject to the terms of the 5219089Spjd * Common Development and Distribution License (the "License"). 6219089Spjd * You may not use this file except in compliance with the License. 7219089Spjd * 8219089Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9219089Spjd * or http://www.opensolaris.org/os/licensing. 10219089Spjd * See the License for the specific language governing permissions 11219089Spjd * and limitations under the License. 12219089Spjd * 13219089Spjd * When distributing Covered Code, include this CDDL HEADER in each 14219089Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15219089Spjd * If applicable, add the following below this CDDL HEADER, with the 16219089Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17219089Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18219089Spjd * 19219089Spjd * CDDL HEADER END 20219089Spjd */ 21219089Spjd 22219089Spjd/* 23219089Spjd * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. 24321610Smav * Copyright (c) 2012, 2016 by Delphix. All rights reserved. 25219089Spjd */ 26219089Spjd 27219089Spjd#include <sys/zfs_context.h> 28219089Spjd#include <sys/spa.h> 29219089Spjd#include <sys/spa_impl.h> 30219089Spjd#include <sys/zio.h> 31219089Spjd#include <sys/ddt.h> 32219089Spjd#include <sys/zap.h> 33219089Spjd#include <sys/dmu_tx.h> 34219089Spjd#include <sys/arc.h> 35219089Spjd#include <sys/dsl_pool.h> 36219089Spjd#include <sys/zio_checksum.h> 37219089Spjd#include <sys/zio_compress.h> 38219089Spjd#include <sys/dsl_scan.h> 39321610Smav#include <sys/abd.h> 40219089Spjd 41219089Spjd/* 42219089Spjd * Enable/disable prefetching of dedup-ed blocks which are going to be freed. 43219089Spjd */ 44219089Spjdint zfs_dedup_prefetch = 1; 45219089Spjd 46219089SpjdSYSCTL_DECL(_vfs_zfs); 47219089SpjdSYSCTL_NODE(_vfs_zfs, OID_AUTO, dedup, CTLFLAG_RW, 0, "ZFS DEDUP"); 48267992ShselaskySYSCTL_INT(_vfs_zfs_dedup, OID_AUTO, prefetch, CTLFLAG_RWTUN, &zfs_dedup_prefetch, 49219089Spjd 0, "Enable/disable prefetching of dedup-ed blocks which are going to be freed"); 50219089Spjd 51219089Spjdstatic const ddt_ops_t *ddt_ops[DDT_TYPES] = { 52219089Spjd &ddt_zap_ops, 53219089Spjd}; 54219089Spjd 55219089Spjdstatic const char *ddt_class_name[DDT_CLASSES] = { 56219089Spjd "ditto", 57219089Spjd "duplicate", 58219089Spjd "unique", 59219089Spjd}; 60219089Spjd 61219089Spjdstatic void 62219089Spjdddt_object_create(ddt_t *ddt, enum ddt_type type, enum ddt_class class, 63219089Spjd dmu_tx_t *tx) 64219089Spjd{ 65219089Spjd spa_t *spa = ddt->ddt_spa; 66219089Spjd objset_t *os = ddt->ddt_os; 67219089Spjd uint64_t *objectp = &ddt->ddt_object[type][class]; 68289422Smav boolean_t prehash = zio_checksum_table[ddt->ddt_checksum].ci_flags & 69289422Smav ZCHECKSUM_FLAG_DEDUP; 70219089Spjd char name[DDT_NAMELEN]; 71219089Spjd 72219089Spjd ddt_object_name(ddt, type, class, name); 73219089Spjd 74219089Spjd ASSERT(*objectp == 0); 75219089Spjd VERIFY(ddt_ops[type]->ddt_op_create(os, objectp, tx, prehash) == 0); 76219089Spjd ASSERT(*objectp != 0); 77219089Spjd 78219089Spjd VERIFY(zap_add(os, DMU_POOL_DIRECTORY_OBJECT, name, 79219089Spjd sizeof (uint64_t), 1, objectp, tx) == 0); 80219089Spjd 81219089Spjd VERIFY(zap_add(os, spa->spa_ddt_stat_object, name, 82219089Spjd sizeof (uint64_t), sizeof (ddt_histogram_t) / sizeof (uint64_t), 83219089Spjd &ddt->ddt_histogram[type][class], tx) == 0); 84219089Spjd} 85219089Spjd 86219089Spjdstatic void 87219089Spjdddt_object_destroy(ddt_t *ddt, enum ddt_type type, enum ddt_class class, 88219089Spjd dmu_tx_t *tx) 89219089Spjd{ 90219089Spjd spa_t *spa = ddt->ddt_spa; 91219089Spjd objset_t *os = ddt->ddt_os; 92219089Spjd uint64_t *objectp = &ddt->ddt_object[type][class]; 93245264Sdelphij uint64_t count; 94219089Spjd char name[DDT_NAMELEN]; 95219089Spjd 96219089Spjd ddt_object_name(ddt, type, class, name); 97219089Spjd 98219089Spjd ASSERT(*objectp != 0); 99245264Sdelphij VERIFY(ddt_object_count(ddt, type, class, &count) == 0 && count == 0); 100219089Spjd ASSERT(ddt_histogram_empty(&ddt->ddt_histogram[type][class])); 101219089Spjd VERIFY(zap_remove(os, DMU_POOL_DIRECTORY_OBJECT, name, tx) == 0); 102219089Spjd VERIFY(zap_remove(os, spa->spa_ddt_stat_object, name, tx) == 0); 103219089Spjd VERIFY(ddt_ops[type]->ddt_op_destroy(os, *objectp, tx) == 0); 104219089Spjd bzero(&ddt->ddt_object_stats[type][class], sizeof (ddt_object_t)); 105219089Spjd 106219089Spjd *objectp = 0; 107219089Spjd} 108219089Spjd 109219089Spjdstatic int 110219089Spjdddt_object_load(ddt_t *ddt, enum ddt_type type, enum ddt_class class) 111219089Spjd{ 112219089Spjd ddt_object_t *ddo = &ddt->ddt_object_stats[type][class]; 113219089Spjd dmu_object_info_t doi; 114245264Sdelphij uint64_t count; 115219089Spjd char name[DDT_NAMELEN]; 116219089Spjd int error; 117219089Spjd 118219089Spjd ddt_object_name(ddt, type, class, name); 119219089Spjd 120219089Spjd error = zap_lookup(ddt->ddt_os, DMU_POOL_DIRECTORY_OBJECT, name, 121219089Spjd sizeof (uint64_t), 1, &ddt->ddt_object[type][class]); 122219089Spjd 123260150Sdelphij if (error != 0) 124219089Spjd return (error); 125219089Spjd 126260150Sdelphij VERIFY0(zap_lookup(ddt->ddt_os, ddt->ddt_spa->spa_ddt_stat_object, name, 127219089Spjd sizeof (uint64_t), sizeof (ddt_histogram_t) / sizeof (uint64_t), 128260150Sdelphij &ddt->ddt_histogram[type][class])); 129219089Spjd 130219089Spjd /* 131219089Spjd * Seed the cached statistics. 132219089Spjd */ 133219089Spjd VERIFY(ddt_object_info(ddt, type, class, &doi) == 0); 134219089Spjd 135245264Sdelphij error = ddt_object_count(ddt, type, class, &count); 136245264Sdelphij if (error) 137245264Sdelphij return error; 138245264Sdelphij 139245264Sdelphij ddo->ddo_count = count; 140219089Spjd ddo->ddo_dspace = doi.doi_physical_blocks_512 << 9; 141219089Spjd ddo->ddo_mspace = doi.doi_fill_count * doi.doi_data_block_size; 142219089Spjd 143260150Sdelphij return (0); 144219089Spjd} 145219089Spjd 146219089Spjdstatic void 147219089Spjdddt_object_sync(ddt_t *ddt, enum ddt_type type, enum ddt_class class, 148219089Spjd dmu_tx_t *tx) 149219089Spjd{ 150219089Spjd ddt_object_t *ddo = &ddt->ddt_object_stats[type][class]; 151219089Spjd dmu_object_info_t doi; 152245264Sdelphij uint64_t count; 153219089Spjd char name[DDT_NAMELEN]; 154219089Spjd 155219089Spjd ddt_object_name(ddt, type, class, name); 156219089Spjd 157219089Spjd VERIFY(zap_update(ddt->ddt_os, ddt->ddt_spa->spa_ddt_stat_object, name, 158219089Spjd sizeof (uint64_t), sizeof (ddt_histogram_t) / sizeof (uint64_t), 159219089Spjd &ddt->ddt_histogram[type][class], tx) == 0); 160219089Spjd 161219089Spjd /* 162219089Spjd * Cache DDT statistics; this is the only time they'll change. 163219089Spjd */ 164219089Spjd VERIFY(ddt_object_info(ddt, type, class, &doi) == 0); 165245264Sdelphij VERIFY(ddt_object_count(ddt, type, class, &count) == 0); 166219089Spjd 167245264Sdelphij ddo->ddo_count = count; 168219089Spjd ddo->ddo_dspace = doi.doi_physical_blocks_512 << 9; 169219089Spjd ddo->ddo_mspace = doi.doi_fill_count * doi.doi_data_block_size; 170219089Spjd} 171219089Spjd 172219089Spjdstatic int 173219089Spjdddt_object_lookup(ddt_t *ddt, enum ddt_type type, enum ddt_class class, 174219089Spjd ddt_entry_t *dde) 175219089Spjd{ 176219089Spjd if (!ddt_object_exists(ddt, type, class)) 177249195Smm return (SET_ERROR(ENOENT)); 178219089Spjd 179219089Spjd return (ddt_ops[type]->ddt_op_lookup(ddt->ddt_os, 180219089Spjd ddt->ddt_object[type][class], dde)); 181219089Spjd} 182219089Spjd 183219089Spjdstatic void 184219089Spjdddt_object_prefetch(ddt_t *ddt, enum ddt_type type, enum ddt_class class, 185219089Spjd ddt_entry_t *dde) 186219089Spjd{ 187219089Spjd if (!ddt_object_exists(ddt, type, class)) 188219089Spjd return; 189219089Spjd 190219089Spjd ddt_ops[type]->ddt_op_prefetch(ddt->ddt_os, 191219089Spjd ddt->ddt_object[type][class], dde); 192219089Spjd} 193219089Spjd 194219089Spjdint 195219089Spjdddt_object_update(ddt_t *ddt, enum ddt_type type, enum ddt_class class, 196219089Spjd ddt_entry_t *dde, dmu_tx_t *tx) 197219089Spjd{ 198219089Spjd ASSERT(ddt_object_exists(ddt, type, class)); 199219089Spjd 200219089Spjd return (ddt_ops[type]->ddt_op_update(ddt->ddt_os, 201219089Spjd ddt->ddt_object[type][class], dde, tx)); 202219089Spjd} 203219089Spjd 204219089Spjdstatic int 205219089Spjdddt_object_remove(ddt_t *ddt, enum ddt_type type, enum ddt_class class, 206219089Spjd ddt_entry_t *dde, dmu_tx_t *tx) 207219089Spjd{ 208219089Spjd ASSERT(ddt_object_exists(ddt, type, class)); 209219089Spjd 210219089Spjd return (ddt_ops[type]->ddt_op_remove(ddt->ddt_os, 211219089Spjd ddt->ddt_object[type][class], dde, tx)); 212219089Spjd} 213219089Spjd 214219089Spjdint 215219089Spjdddt_object_walk(ddt_t *ddt, enum ddt_type type, enum ddt_class class, 216219089Spjd uint64_t *walk, ddt_entry_t *dde) 217219089Spjd{ 218219089Spjd ASSERT(ddt_object_exists(ddt, type, class)); 219219089Spjd 220219089Spjd return (ddt_ops[type]->ddt_op_walk(ddt->ddt_os, 221219089Spjd ddt->ddt_object[type][class], dde, walk)); 222219089Spjd} 223219089Spjd 224245264Sdelphijint 225245264Sdelphijddt_object_count(ddt_t *ddt, enum ddt_type type, enum ddt_class class, uint64_t *count) 226219089Spjd{ 227219089Spjd ASSERT(ddt_object_exists(ddt, type, class)); 228219089Spjd 229219089Spjd return (ddt_ops[type]->ddt_op_count(ddt->ddt_os, 230245264Sdelphij ddt->ddt_object[type][class], count)); 231219089Spjd} 232219089Spjd 233219089Spjdint 234219089Spjdddt_object_info(ddt_t *ddt, enum ddt_type type, enum ddt_class class, 235219089Spjd dmu_object_info_t *doi) 236219089Spjd{ 237219089Spjd if (!ddt_object_exists(ddt, type, class)) 238249195Smm return (SET_ERROR(ENOENT)); 239219089Spjd 240219089Spjd return (dmu_object_info(ddt->ddt_os, ddt->ddt_object[type][class], 241219089Spjd doi)); 242219089Spjd} 243219089Spjd 244219089Spjdboolean_t 245219089Spjdddt_object_exists(ddt_t *ddt, enum ddt_type type, enum ddt_class class) 246219089Spjd{ 247219089Spjd return (!!ddt->ddt_object[type][class]); 248219089Spjd} 249219089Spjd 250219089Spjdvoid 251219089Spjdddt_object_name(ddt_t *ddt, enum ddt_type type, enum ddt_class class, 252219089Spjd char *name) 253219089Spjd{ 254219089Spjd (void) sprintf(name, DMU_POOL_DDT, 255219089Spjd zio_checksum_table[ddt->ddt_checksum].ci_name, 256219089Spjd ddt_ops[type]->ddt_op_name, ddt_class_name[class]); 257219089Spjd} 258219089Spjd 259219089Spjdvoid 260219089Spjdddt_bp_fill(const ddt_phys_t *ddp, blkptr_t *bp, uint64_t txg) 261219089Spjd{ 262219089Spjd ASSERT(txg != 0); 263219089Spjd 264219089Spjd for (int d = 0; d < SPA_DVAS_PER_BP; d++) 265219089Spjd bp->blk_dva[d] = ddp->ddp_dva[d]; 266219089Spjd BP_SET_BIRTH(bp, txg, ddp->ddp_phys_birth); 267219089Spjd} 268219089Spjd 269219089Spjdvoid 270219089Spjdddt_bp_create(enum zio_checksum checksum, 271219089Spjd const ddt_key_t *ddk, const ddt_phys_t *ddp, blkptr_t *bp) 272219089Spjd{ 273219089Spjd BP_ZERO(bp); 274219089Spjd 275219089Spjd if (ddp != NULL) 276219089Spjd ddt_bp_fill(ddp, bp, ddp->ddp_phys_birth); 277219089Spjd 278219089Spjd bp->blk_cksum = ddk->ddk_cksum; 279219089Spjd bp->blk_fill = 1; 280219089Spjd 281219089Spjd BP_SET_LSIZE(bp, DDK_GET_LSIZE(ddk)); 282219089Spjd BP_SET_PSIZE(bp, DDK_GET_PSIZE(ddk)); 283219089Spjd BP_SET_COMPRESS(bp, DDK_GET_COMPRESS(ddk)); 284219089Spjd BP_SET_CHECKSUM(bp, checksum); 285219089Spjd BP_SET_TYPE(bp, DMU_OT_DEDUP); 286219089Spjd BP_SET_LEVEL(bp, 0); 287219089Spjd BP_SET_DEDUP(bp, 0); 288219089Spjd BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER); 289219089Spjd} 290219089Spjd 291219089Spjdvoid 292219089Spjdddt_key_fill(ddt_key_t *ddk, const blkptr_t *bp) 293219089Spjd{ 294219089Spjd ddk->ddk_cksum = bp->blk_cksum; 295219089Spjd ddk->ddk_prop = 0; 296219089Spjd 297219089Spjd DDK_SET_LSIZE(ddk, BP_GET_LSIZE(bp)); 298219089Spjd DDK_SET_PSIZE(ddk, BP_GET_PSIZE(bp)); 299219089Spjd DDK_SET_COMPRESS(ddk, BP_GET_COMPRESS(bp)); 300219089Spjd} 301219089Spjd 302219089Spjdvoid 303219089Spjdddt_phys_fill(ddt_phys_t *ddp, const blkptr_t *bp) 304219089Spjd{ 305219089Spjd ASSERT(ddp->ddp_phys_birth == 0); 306219089Spjd 307219089Spjd for (int d = 0; d < SPA_DVAS_PER_BP; d++) 308219089Spjd ddp->ddp_dva[d] = bp->blk_dva[d]; 309219089Spjd ddp->ddp_phys_birth = BP_PHYSICAL_BIRTH(bp); 310219089Spjd} 311219089Spjd 312219089Spjdvoid 313219089Spjdddt_phys_clear(ddt_phys_t *ddp) 314219089Spjd{ 315219089Spjd bzero(ddp, sizeof (*ddp)); 316219089Spjd} 317219089Spjd 318219089Spjdvoid 319219089Spjdddt_phys_addref(ddt_phys_t *ddp) 320219089Spjd{ 321219089Spjd ddp->ddp_refcnt++; 322219089Spjd} 323219089Spjd 324219089Spjdvoid 325219089Spjdddt_phys_decref(ddt_phys_t *ddp) 326219089Spjd{ 327219089Spjd ASSERT((int64_t)ddp->ddp_refcnt > 0); 328219089Spjd ddp->ddp_refcnt--; 329219089Spjd} 330219089Spjd 331219089Spjdvoid 332219089Spjdddt_phys_free(ddt_t *ddt, ddt_key_t *ddk, ddt_phys_t *ddp, uint64_t txg) 333219089Spjd{ 334219089Spjd blkptr_t blk; 335219089Spjd 336219089Spjd ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk); 337219089Spjd ddt_phys_clear(ddp); 338219089Spjd zio_free(ddt->ddt_spa, txg, &blk); 339219089Spjd} 340219089Spjd 341219089Spjdddt_phys_t * 342219089Spjdddt_phys_select(const ddt_entry_t *dde, const blkptr_t *bp) 343219089Spjd{ 344219089Spjd ddt_phys_t *ddp = (ddt_phys_t *)dde->dde_phys; 345219089Spjd 346219089Spjd for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) { 347219089Spjd if (DVA_EQUAL(BP_IDENTITY(bp), &ddp->ddp_dva[0]) && 348219089Spjd BP_PHYSICAL_BIRTH(bp) == ddp->ddp_phys_birth) 349219089Spjd return (ddp); 350219089Spjd } 351219089Spjd return (NULL); 352219089Spjd} 353219089Spjd 354219089Spjduint64_t 355219089Spjdddt_phys_total_refcnt(const ddt_entry_t *dde) 356219089Spjd{ 357219089Spjd uint64_t refcnt = 0; 358219089Spjd 359219089Spjd for (int p = DDT_PHYS_SINGLE; p <= DDT_PHYS_TRIPLE; p++) 360219089Spjd refcnt += dde->dde_phys[p].ddp_refcnt; 361219089Spjd 362219089Spjd return (refcnt); 363219089Spjd} 364219089Spjd 365219089Spjdstatic void 366219089Spjdddt_stat_generate(ddt_t *ddt, ddt_entry_t *dde, ddt_stat_t *dds) 367219089Spjd{ 368219089Spjd spa_t *spa = ddt->ddt_spa; 369219089Spjd ddt_phys_t *ddp = dde->dde_phys; 370219089Spjd ddt_key_t *ddk = &dde->dde_key; 371219089Spjd uint64_t lsize = DDK_GET_LSIZE(ddk); 372219089Spjd uint64_t psize = DDK_GET_PSIZE(ddk); 373219089Spjd 374219089Spjd bzero(dds, sizeof (*dds)); 375219089Spjd 376219089Spjd for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) { 377219089Spjd uint64_t dsize = 0; 378219089Spjd uint64_t refcnt = ddp->ddp_refcnt; 379219089Spjd 380219089Spjd if (ddp->ddp_phys_birth == 0) 381219089Spjd continue; 382219089Spjd 383219089Spjd for (int d = 0; d < SPA_DVAS_PER_BP; d++) 384219089Spjd dsize += dva_get_dsize_sync(spa, &ddp->ddp_dva[d]); 385219089Spjd 386219089Spjd dds->dds_blocks += 1; 387219089Spjd dds->dds_lsize += lsize; 388219089Spjd dds->dds_psize += psize; 389219089Spjd dds->dds_dsize += dsize; 390219089Spjd 391219089Spjd dds->dds_ref_blocks += refcnt; 392219089Spjd dds->dds_ref_lsize += lsize * refcnt; 393219089Spjd dds->dds_ref_psize += psize * refcnt; 394219089Spjd dds->dds_ref_dsize += dsize * refcnt; 395219089Spjd } 396219089Spjd} 397219089Spjd 398219089Spjdvoid 399219089Spjdddt_stat_add(ddt_stat_t *dst, const ddt_stat_t *src, uint64_t neg) 400219089Spjd{ 401219089Spjd const uint64_t *s = (const uint64_t *)src; 402219089Spjd uint64_t *d = (uint64_t *)dst; 403219089Spjd uint64_t *d_end = (uint64_t *)(dst + 1); 404219089Spjd 405219089Spjd ASSERT(neg == 0 || neg == -1ULL); /* add or subtract */ 406219089Spjd 407219089Spjd while (d < d_end) 408219089Spjd *d++ += (*s++ ^ neg) - neg; 409219089Spjd} 410219089Spjd 411219089Spjdstatic void 412219089Spjdddt_stat_update(ddt_t *ddt, ddt_entry_t *dde, uint64_t neg) 413219089Spjd{ 414219089Spjd ddt_stat_t dds; 415219089Spjd ddt_histogram_t *ddh; 416219089Spjd int bucket; 417219089Spjd 418219089Spjd ddt_stat_generate(ddt, dde, &dds); 419219089Spjd 420264669Sdelphij bucket = highbit64(dds.dds_ref_blocks) - 1; 421219089Spjd ASSERT(bucket >= 0); 422219089Spjd 423219089Spjd ddh = &ddt->ddt_histogram[dde->dde_type][dde->dde_class]; 424219089Spjd 425219089Spjd ddt_stat_add(&ddh->ddh_stat[bucket], &dds, neg); 426219089Spjd} 427219089Spjd 428219089Spjdvoid 429219089Spjdddt_histogram_add(ddt_histogram_t *dst, const ddt_histogram_t *src) 430219089Spjd{ 431219089Spjd for (int h = 0; h < 64; h++) 432219089Spjd ddt_stat_add(&dst->ddh_stat[h], &src->ddh_stat[h], 0); 433219089Spjd} 434219089Spjd 435219089Spjdvoid 436219089Spjdddt_histogram_stat(ddt_stat_t *dds, const ddt_histogram_t *ddh) 437219089Spjd{ 438219089Spjd bzero(dds, sizeof (*dds)); 439219089Spjd 440219089Spjd for (int h = 0; h < 64; h++) 441219089Spjd ddt_stat_add(dds, &ddh->ddh_stat[h], 0); 442219089Spjd} 443219089Spjd 444219089Spjdboolean_t 445219089Spjdddt_histogram_empty(const ddt_histogram_t *ddh) 446219089Spjd{ 447219089Spjd const uint64_t *s = (const uint64_t *)ddh; 448219089Spjd const uint64_t *s_end = (const uint64_t *)(ddh + 1); 449219089Spjd 450219089Spjd while (s < s_end) 451219089Spjd if (*s++ != 0) 452219089Spjd return (B_FALSE); 453219089Spjd 454219089Spjd return (B_TRUE); 455219089Spjd} 456219089Spjd 457219089Spjdvoid 458219089Spjdddt_get_dedup_object_stats(spa_t *spa, ddt_object_t *ddo_total) 459219089Spjd{ 460219089Spjd /* Sum the statistics we cached in ddt_object_sync(). */ 461219089Spjd for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) { 462219089Spjd ddt_t *ddt = spa->spa_ddt[c]; 463219089Spjd for (enum ddt_type type = 0; type < DDT_TYPES; type++) { 464219089Spjd for (enum ddt_class class = 0; class < DDT_CLASSES; 465219089Spjd class++) { 466219089Spjd ddt_object_t *ddo = 467219089Spjd &ddt->ddt_object_stats[type][class]; 468219089Spjd ddo_total->ddo_count += ddo->ddo_count; 469219089Spjd ddo_total->ddo_dspace += ddo->ddo_dspace; 470219089Spjd ddo_total->ddo_mspace += ddo->ddo_mspace; 471219089Spjd } 472219089Spjd } 473219089Spjd } 474219089Spjd 475219089Spjd /* ... and compute the averages. */ 476219089Spjd if (ddo_total->ddo_count != 0) { 477219089Spjd ddo_total->ddo_dspace /= ddo_total->ddo_count; 478219089Spjd ddo_total->ddo_mspace /= ddo_total->ddo_count; 479219089Spjd } 480219089Spjd} 481219089Spjd 482219089Spjdvoid 483219089Spjdddt_get_dedup_histogram(spa_t *spa, ddt_histogram_t *ddh) 484219089Spjd{ 485219089Spjd for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) { 486219089Spjd ddt_t *ddt = spa->spa_ddt[c]; 487219089Spjd for (enum ddt_type type = 0; type < DDT_TYPES; type++) { 488219089Spjd for (enum ddt_class class = 0; class < DDT_CLASSES; 489219089Spjd class++) { 490219089Spjd ddt_histogram_add(ddh, 491219089Spjd &ddt->ddt_histogram_cache[type][class]); 492219089Spjd } 493219089Spjd } 494219089Spjd } 495219089Spjd} 496219089Spjd 497219089Spjdvoid 498219089Spjdddt_get_dedup_stats(spa_t *spa, ddt_stat_t *dds_total) 499219089Spjd{ 500219089Spjd ddt_histogram_t *ddh_total; 501219089Spjd 502219089Spjd ddh_total = kmem_zalloc(sizeof (ddt_histogram_t), KM_SLEEP); 503219089Spjd ddt_get_dedup_histogram(spa, ddh_total); 504219089Spjd ddt_histogram_stat(dds_total, ddh_total); 505219089Spjd kmem_free(ddh_total, sizeof (ddt_histogram_t)); 506219089Spjd} 507219089Spjd 508219089Spjduint64_t 509219089Spjdddt_get_dedup_dspace(spa_t *spa) 510219089Spjd{ 511219089Spjd ddt_stat_t dds_total = { 0 }; 512219089Spjd 513219089Spjd ddt_get_dedup_stats(spa, &dds_total); 514219089Spjd return (dds_total.dds_ref_dsize - dds_total.dds_dsize); 515219089Spjd} 516219089Spjd 517219089Spjduint64_t 518219089Spjdddt_get_pool_dedup_ratio(spa_t *spa) 519219089Spjd{ 520219089Spjd ddt_stat_t dds_total = { 0 }; 521219089Spjd 522219089Spjd ddt_get_dedup_stats(spa, &dds_total); 523219089Spjd if (dds_total.dds_dsize == 0) 524219089Spjd return (100); 525219089Spjd 526219089Spjd return (dds_total.dds_ref_dsize * 100 / dds_total.dds_dsize); 527219089Spjd} 528219089Spjd 529219089Spjdint 530219089Spjdddt_ditto_copies_needed(ddt_t *ddt, ddt_entry_t *dde, ddt_phys_t *ddp_willref) 531219089Spjd{ 532219089Spjd spa_t *spa = ddt->ddt_spa; 533219089Spjd uint64_t total_refcnt = 0; 534219089Spjd uint64_t ditto = spa->spa_dedup_ditto; 535219089Spjd int total_copies = 0; 536219089Spjd int desired_copies = 0; 537219089Spjd 538219089Spjd for (int p = DDT_PHYS_SINGLE; p <= DDT_PHYS_TRIPLE; p++) { 539219089Spjd ddt_phys_t *ddp = &dde->dde_phys[p]; 540219089Spjd zio_t *zio = dde->dde_lead_zio[p]; 541219089Spjd uint64_t refcnt = ddp->ddp_refcnt; /* committed refs */ 542219089Spjd if (zio != NULL) 543219089Spjd refcnt += zio->io_parent_count; /* pending refs */ 544219089Spjd if (ddp == ddp_willref) 545219089Spjd refcnt++; /* caller's ref */ 546219089Spjd if (refcnt != 0) { 547219089Spjd total_refcnt += refcnt; 548219089Spjd total_copies += p; 549219089Spjd } 550219089Spjd } 551219089Spjd 552219089Spjd if (ditto == 0 || ditto > UINT32_MAX) 553219089Spjd ditto = UINT32_MAX; 554219089Spjd 555219089Spjd if (total_refcnt >= 1) 556219089Spjd desired_copies++; 557219089Spjd if (total_refcnt >= ditto) 558219089Spjd desired_copies++; 559219089Spjd if (total_refcnt >= ditto * ditto) 560219089Spjd desired_copies++; 561219089Spjd 562219089Spjd return (MAX(desired_copies, total_copies) - total_copies); 563219089Spjd} 564219089Spjd 565219089Spjdint 566219089Spjdddt_ditto_copies_present(ddt_entry_t *dde) 567219089Spjd{ 568219089Spjd ddt_phys_t *ddp = &dde->dde_phys[DDT_PHYS_DITTO]; 569219089Spjd dva_t *dva = ddp->ddp_dva; 570219089Spjd int copies = 0 - DVA_GET_GANG(dva); 571219089Spjd 572219089Spjd for (int d = 0; d < SPA_DVAS_PER_BP; d++, dva++) 573219089Spjd if (DVA_IS_VALID(dva)) 574219089Spjd copies++; 575219089Spjd 576219089Spjd ASSERT(copies >= 0 && copies < SPA_DVAS_PER_BP); 577219089Spjd 578219089Spjd return (copies); 579219089Spjd} 580219089Spjd 581219089Spjdsize_t 582219089Spjdddt_compress(void *src, uchar_t *dst, size_t s_len, size_t d_len) 583219089Spjd{ 584219089Spjd uchar_t *version = dst++; 585219089Spjd int cpfunc = ZIO_COMPRESS_ZLE; 586219089Spjd zio_compress_info_t *ci = &zio_compress_table[cpfunc]; 587219089Spjd size_t c_len; 588219089Spjd 589219089Spjd ASSERT(d_len >= s_len + 1); /* no compression plus version byte */ 590219089Spjd 591219089Spjd c_len = ci->ci_compress(src, dst, s_len, d_len - 1, ci->ci_level); 592219089Spjd 593219089Spjd if (c_len == s_len) { 594219089Spjd cpfunc = ZIO_COMPRESS_OFF; 595219089Spjd bcopy(src, dst, s_len); 596219089Spjd } 597219089Spjd 598260150Sdelphij *version = cpfunc; 599260150Sdelphij /* CONSTCOND */ 600260150Sdelphij if (ZFS_HOST_BYTEORDER) 601260150Sdelphij *version |= DDT_COMPRESS_BYTEORDER_MASK; 602219089Spjd 603219089Spjd return (c_len + 1); 604219089Spjd} 605219089Spjd 606219089Spjdvoid 607219089Spjdddt_decompress(uchar_t *src, void *dst, size_t s_len, size_t d_len) 608219089Spjd{ 609219089Spjd uchar_t version = *src++; 610219089Spjd int cpfunc = version & DDT_COMPRESS_FUNCTION_MASK; 611219089Spjd zio_compress_info_t *ci = &zio_compress_table[cpfunc]; 612219089Spjd 613219089Spjd if (ci->ci_decompress != NULL) 614219089Spjd (void) ci->ci_decompress(src, dst, s_len, d_len, ci->ci_level); 615219089Spjd else 616219089Spjd bcopy(src, dst, d_len); 617219089Spjd 618260150Sdelphij if (((version & DDT_COMPRESS_BYTEORDER_MASK) != 0) != 619260150Sdelphij (ZFS_HOST_BYTEORDER != 0)) 620219089Spjd byteswap_uint64_array(dst, d_len); 621219089Spjd} 622219089Spjd 623219089Spjdddt_t * 624219089Spjdddt_select_by_checksum(spa_t *spa, enum zio_checksum c) 625219089Spjd{ 626219089Spjd return (spa->spa_ddt[c]); 627219089Spjd} 628219089Spjd 629219089Spjdddt_t * 630219089Spjdddt_select(spa_t *spa, const blkptr_t *bp) 631219089Spjd{ 632219089Spjd return (spa->spa_ddt[BP_GET_CHECKSUM(bp)]); 633219089Spjd} 634219089Spjd 635219089Spjdvoid 636219089Spjdddt_enter(ddt_t *ddt) 637219089Spjd{ 638219089Spjd mutex_enter(&ddt->ddt_lock); 639219089Spjd} 640219089Spjd 641219089Spjdvoid 642219089Spjdddt_exit(ddt_t *ddt) 643219089Spjd{ 644219089Spjd mutex_exit(&ddt->ddt_lock); 645219089Spjd} 646219089Spjd 647219089Spjdstatic ddt_entry_t * 648219089Spjdddt_alloc(const ddt_key_t *ddk) 649219089Spjd{ 650219089Spjd ddt_entry_t *dde; 651219089Spjd 652219089Spjd dde = kmem_zalloc(sizeof (ddt_entry_t), KM_SLEEP); 653219089Spjd cv_init(&dde->dde_cv, NULL, CV_DEFAULT, NULL); 654219089Spjd 655219089Spjd dde->dde_key = *ddk; 656219089Spjd 657219089Spjd return (dde); 658219089Spjd} 659219089Spjd 660219089Spjdstatic void 661219089Spjdddt_free(ddt_entry_t *dde) 662219089Spjd{ 663219089Spjd ASSERT(!dde->dde_loading); 664219089Spjd 665219089Spjd for (int p = 0; p < DDT_PHYS_TYPES; p++) 666219089Spjd ASSERT(dde->dde_lead_zio[p] == NULL); 667219089Spjd 668321610Smav if (dde->dde_repair_abd != NULL) 669321610Smav abd_free(dde->dde_repair_abd); 670219089Spjd 671219089Spjd cv_destroy(&dde->dde_cv); 672219089Spjd kmem_free(dde, sizeof (*dde)); 673219089Spjd} 674219089Spjd 675219089Spjdvoid 676219089Spjdddt_remove(ddt_t *ddt, ddt_entry_t *dde) 677219089Spjd{ 678219089Spjd ASSERT(MUTEX_HELD(&ddt->ddt_lock)); 679219089Spjd 680219089Spjd avl_remove(&ddt->ddt_tree, dde); 681219089Spjd ddt_free(dde); 682219089Spjd} 683219089Spjd 684219089Spjdddt_entry_t * 685219089Spjdddt_lookup(ddt_t *ddt, const blkptr_t *bp, boolean_t add) 686219089Spjd{ 687219089Spjd ddt_entry_t *dde, dde_search; 688219089Spjd enum ddt_type type; 689219089Spjd enum ddt_class class; 690219089Spjd avl_index_t where; 691219089Spjd int error; 692219089Spjd 693219089Spjd ASSERT(MUTEX_HELD(&ddt->ddt_lock)); 694219089Spjd 695219089Spjd ddt_key_fill(&dde_search.dde_key, bp); 696219089Spjd 697219089Spjd dde = avl_find(&ddt->ddt_tree, &dde_search, &where); 698219089Spjd if (dde == NULL) { 699219089Spjd if (!add) 700219089Spjd return (NULL); 701219089Spjd dde = ddt_alloc(&dde_search.dde_key); 702219089Spjd avl_insert(&ddt->ddt_tree, dde, where); 703219089Spjd } 704219089Spjd 705219089Spjd while (dde->dde_loading) 706219089Spjd cv_wait(&dde->dde_cv, &ddt->ddt_lock); 707219089Spjd 708219089Spjd if (dde->dde_loaded) 709219089Spjd return (dde); 710219089Spjd 711219089Spjd dde->dde_loading = B_TRUE; 712219089Spjd 713219089Spjd ddt_exit(ddt); 714219089Spjd 715219089Spjd error = ENOENT; 716219089Spjd 717219089Spjd for (type = 0; type < DDT_TYPES; type++) { 718219089Spjd for (class = 0; class < DDT_CLASSES; class++) { 719219089Spjd error = ddt_object_lookup(ddt, type, class, dde); 720332525Smav if (error != ENOENT) { 721332525Smav ASSERT0(error); 722219089Spjd break; 723332525Smav } 724219089Spjd } 725219089Spjd if (error != ENOENT) 726219089Spjd break; 727219089Spjd } 728219089Spjd 729219089Spjd ddt_enter(ddt); 730219089Spjd 731219089Spjd ASSERT(dde->dde_loaded == B_FALSE); 732219089Spjd ASSERT(dde->dde_loading == B_TRUE); 733219089Spjd 734219089Spjd dde->dde_type = type; /* will be DDT_TYPES if no entry found */ 735219089Spjd dde->dde_class = class; /* will be DDT_CLASSES if no entry found */ 736219089Spjd dde->dde_loaded = B_TRUE; 737219089Spjd dde->dde_loading = B_FALSE; 738219089Spjd 739219089Spjd if (error == 0) 740219089Spjd ddt_stat_update(ddt, dde, -1ULL); 741219089Spjd 742219089Spjd cv_broadcast(&dde->dde_cv); 743219089Spjd 744219089Spjd return (dde); 745219089Spjd} 746219089Spjd 747219089Spjdvoid 748219089Spjdddt_prefetch(spa_t *spa, const blkptr_t *bp) 749219089Spjd{ 750219089Spjd ddt_t *ddt; 751219089Spjd ddt_entry_t dde; 752219089Spjd 753219089Spjd if (!zfs_dedup_prefetch || bp == NULL || !BP_GET_DEDUP(bp)) 754219089Spjd return; 755219089Spjd 756219089Spjd /* 757219089Spjd * We only remove the DDT once all tables are empty and only 758219089Spjd * prefetch dedup blocks when there are entries in the DDT. 759219089Spjd * Thus no locking is required as the DDT can't disappear on us. 760219089Spjd */ 761219089Spjd ddt = ddt_select(spa, bp); 762219089Spjd ddt_key_fill(&dde.dde_key, bp); 763219089Spjd 764219089Spjd for (enum ddt_type type = 0; type < DDT_TYPES; type++) { 765219089Spjd for (enum ddt_class class = 0; class < DDT_CLASSES; class++) { 766219089Spjd ddt_object_prefetch(ddt, type, class, &dde); 767219089Spjd } 768219089Spjd } 769219089Spjd} 770219089Spjd 771219089Spjdint 772219089Spjdddt_entry_compare(const void *x1, const void *x2) 773219089Spjd{ 774219089Spjd const ddt_entry_t *dde1 = x1; 775219089Spjd const ddt_entry_t *dde2 = x2; 776219089Spjd const uint64_t *u1 = (const uint64_t *)&dde1->dde_key; 777219089Spjd const uint64_t *u2 = (const uint64_t *)&dde2->dde_key; 778219089Spjd 779219089Spjd for (int i = 0; i < DDT_KEY_WORDS; i++) { 780219089Spjd if (u1[i] < u2[i]) 781219089Spjd return (-1); 782219089Spjd if (u1[i] > u2[i]) 783219089Spjd return (1); 784219089Spjd } 785219089Spjd 786219089Spjd return (0); 787219089Spjd} 788219089Spjd 789219089Spjdstatic ddt_t * 790219089Spjdddt_table_alloc(spa_t *spa, enum zio_checksum c) 791219089Spjd{ 792219089Spjd ddt_t *ddt; 793219089Spjd 794219089Spjd ddt = kmem_zalloc(sizeof (*ddt), KM_SLEEP); 795219089Spjd 796219089Spjd mutex_init(&ddt->ddt_lock, NULL, MUTEX_DEFAULT, NULL); 797219089Spjd avl_create(&ddt->ddt_tree, ddt_entry_compare, 798219089Spjd sizeof (ddt_entry_t), offsetof(ddt_entry_t, dde_node)); 799219089Spjd avl_create(&ddt->ddt_repair_tree, ddt_entry_compare, 800219089Spjd sizeof (ddt_entry_t), offsetof(ddt_entry_t, dde_node)); 801219089Spjd ddt->ddt_checksum = c; 802219089Spjd ddt->ddt_spa = spa; 803219089Spjd ddt->ddt_os = spa->spa_meta_objset; 804219089Spjd 805219089Spjd return (ddt); 806219089Spjd} 807219089Spjd 808219089Spjdstatic void 809219089Spjdddt_table_free(ddt_t *ddt) 810219089Spjd{ 811219089Spjd ASSERT(avl_numnodes(&ddt->ddt_tree) == 0); 812219089Spjd ASSERT(avl_numnodes(&ddt->ddt_repair_tree) == 0); 813219089Spjd avl_destroy(&ddt->ddt_tree); 814219089Spjd avl_destroy(&ddt->ddt_repair_tree); 815219089Spjd mutex_destroy(&ddt->ddt_lock); 816219089Spjd kmem_free(ddt, sizeof (*ddt)); 817219089Spjd} 818219089Spjd 819219089Spjdvoid 820219089Spjdddt_create(spa_t *spa) 821219089Spjd{ 822219089Spjd spa->spa_dedup_checksum = ZIO_DEDUPCHECKSUM; 823219089Spjd 824219089Spjd for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) 825219089Spjd spa->spa_ddt[c] = ddt_table_alloc(spa, c); 826219089Spjd} 827219089Spjd 828219089Spjdint 829219089Spjdddt_load(spa_t *spa) 830219089Spjd{ 831219089Spjd int error; 832219089Spjd 833219089Spjd ddt_create(spa); 834219089Spjd 835219089Spjd error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 836219089Spjd DMU_POOL_DDT_STATS, sizeof (uint64_t), 1, 837219089Spjd &spa->spa_ddt_stat_object); 838219089Spjd 839219089Spjd if (error) 840219089Spjd return (error == ENOENT ? 0 : error); 841219089Spjd 842219089Spjd for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) { 843219089Spjd ddt_t *ddt = spa->spa_ddt[c]; 844219089Spjd for (enum ddt_type type = 0; type < DDT_TYPES; type++) { 845219089Spjd for (enum ddt_class class = 0; class < DDT_CLASSES; 846219089Spjd class++) { 847219089Spjd error = ddt_object_load(ddt, type, class); 848219089Spjd if (error != 0 && error != ENOENT) 849219089Spjd return (error); 850219089Spjd } 851219089Spjd } 852219089Spjd 853219089Spjd /* 854219089Spjd * Seed the cached histograms. 855219089Spjd */ 856219089Spjd bcopy(ddt->ddt_histogram, &ddt->ddt_histogram_cache, 857219089Spjd sizeof (ddt->ddt_histogram)); 858219089Spjd } 859219089Spjd 860219089Spjd return (0); 861219089Spjd} 862219089Spjd 863219089Spjdvoid 864219089Spjdddt_unload(spa_t *spa) 865219089Spjd{ 866219089Spjd for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) { 867219089Spjd if (spa->spa_ddt[c]) { 868219089Spjd ddt_table_free(spa->spa_ddt[c]); 869219089Spjd spa->spa_ddt[c] = NULL; 870219089Spjd } 871219089Spjd } 872219089Spjd} 873219089Spjd 874219089Spjdboolean_t 875219089Spjdddt_class_contains(spa_t *spa, enum ddt_class max_class, const blkptr_t *bp) 876219089Spjd{ 877219089Spjd ddt_t *ddt; 878219089Spjd ddt_entry_t dde; 879219089Spjd 880219089Spjd if (!BP_GET_DEDUP(bp)) 881219089Spjd return (B_FALSE); 882219089Spjd 883219089Spjd if (max_class == DDT_CLASS_UNIQUE) 884219089Spjd return (B_TRUE); 885219089Spjd 886219089Spjd ddt = spa->spa_ddt[BP_GET_CHECKSUM(bp)]; 887219089Spjd 888219089Spjd ddt_key_fill(&dde.dde_key, bp); 889219089Spjd 890219089Spjd for (enum ddt_type type = 0; type < DDT_TYPES; type++) 891219089Spjd for (enum ddt_class class = 0; class <= max_class; class++) 892219089Spjd if (ddt_object_lookup(ddt, type, class, &dde) == 0) 893219089Spjd return (B_TRUE); 894219089Spjd 895219089Spjd return (B_FALSE); 896219089Spjd} 897219089Spjd 898219089Spjdddt_entry_t * 899219089Spjdddt_repair_start(ddt_t *ddt, const blkptr_t *bp) 900219089Spjd{ 901219089Spjd ddt_key_t ddk; 902219089Spjd ddt_entry_t *dde; 903219089Spjd 904219089Spjd ddt_key_fill(&ddk, bp); 905219089Spjd 906219089Spjd dde = ddt_alloc(&ddk); 907219089Spjd 908219089Spjd for (enum ddt_type type = 0; type < DDT_TYPES; type++) { 909219089Spjd for (enum ddt_class class = 0; class < DDT_CLASSES; class++) { 910219089Spjd /* 911219089Spjd * We can only do repair if there are multiple copies 912219089Spjd * of the block. For anything in the UNIQUE class, 913219089Spjd * there's definitely only one copy, so don't even try. 914219089Spjd */ 915219089Spjd if (class != DDT_CLASS_UNIQUE && 916219089Spjd ddt_object_lookup(ddt, type, class, dde) == 0) 917219089Spjd return (dde); 918219089Spjd } 919219089Spjd } 920219089Spjd 921219089Spjd bzero(dde->dde_phys, sizeof (dde->dde_phys)); 922219089Spjd 923219089Spjd return (dde); 924219089Spjd} 925219089Spjd 926219089Spjdvoid 927219089Spjdddt_repair_done(ddt_t *ddt, ddt_entry_t *dde) 928219089Spjd{ 929219089Spjd avl_index_t where; 930219089Spjd 931219089Spjd ddt_enter(ddt); 932219089Spjd 933321610Smav if (dde->dde_repair_abd != NULL && spa_writeable(ddt->ddt_spa) && 934219089Spjd avl_find(&ddt->ddt_repair_tree, dde, &where) == NULL) 935219089Spjd avl_insert(&ddt->ddt_repair_tree, dde, where); 936219089Spjd else 937219089Spjd ddt_free(dde); 938219089Spjd 939219089Spjd ddt_exit(ddt); 940219089Spjd} 941219089Spjd 942219089Spjdstatic void 943219089Spjdddt_repair_entry_done(zio_t *zio) 944219089Spjd{ 945219089Spjd ddt_entry_t *rdde = zio->io_private; 946219089Spjd 947219089Spjd ddt_free(rdde); 948219089Spjd} 949219089Spjd 950219089Spjdstatic void 951219089Spjdddt_repair_entry(ddt_t *ddt, ddt_entry_t *dde, ddt_entry_t *rdde, zio_t *rio) 952219089Spjd{ 953219089Spjd ddt_phys_t *ddp = dde->dde_phys; 954219089Spjd ddt_phys_t *rddp = rdde->dde_phys; 955219089Spjd ddt_key_t *ddk = &dde->dde_key; 956219089Spjd ddt_key_t *rddk = &rdde->dde_key; 957219089Spjd zio_t *zio; 958219089Spjd blkptr_t blk; 959219089Spjd 960219089Spjd zio = zio_null(rio, rio->io_spa, NULL, 961219089Spjd ddt_repair_entry_done, rdde, rio->io_flags); 962219089Spjd 963219089Spjd for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++, rddp++) { 964219089Spjd if (ddp->ddp_phys_birth == 0 || 965219089Spjd ddp->ddp_phys_birth != rddp->ddp_phys_birth || 966219089Spjd bcmp(ddp->ddp_dva, rddp->ddp_dva, sizeof (ddp->ddp_dva))) 967219089Spjd continue; 968219089Spjd ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk); 969219089Spjd zio_nowait(zio_rewrite(zio, zio->io_spa, 0, &blk, 970321610Smav rdde->dde_repair_abd, DDK_GET_PSIZE(rddk), NULL, NULL, 971219089Spjd ZIO_PRIORITY_SYNC_WRITE, ZIO_DDT_CHILD_FLAGS(zio), NULL)); 972219089Spjd } 973219089Spjd 974219089Spjd zio_nowait(zio); 975219089Spjd} 976219089Spjd 977219089Spjdstatic void 978219089Spjdddt_repair_table(ddt_t *ddt, zio_t *rio) 979219089Spjd{ 980219089Spjd spa_t *spa = ddt->ddt_spa; 981219089Spjd ddt_entry_t *dde, *rdde_next, *rdde; 982219089Spjd avl_tree_t *t = &ddt->ddt_repair_tree; 983219089Spjd blkptr_t blk; 984219089Spjd 985219089Spjd if (spa_sync_pass(spa) > 1) 986219089Spjd return; 987219089Spjd 988219089Spjd ddt_enter(ddt); 989219089Spjd for (rdde = avl_first(t); rdde != NULL; rdde = rdde_next) { 990219089Spjd rdde_next = AVL_NEXT(t, rdde); 991219089Spjd avl_remove(&ddt->ddt_repair_tree, rdde); 992219089Spjd ddt_exit(ddt); 993219089Spjd ddt_bp_create(ddt->ddt_checksum, &rdde->dde_key, NULL, &blk); 994219089Spjd dde = ddt_repair_start(ddt, &blk); 995219089Spjd ddt_repair_entry(ddt, dde, rdde, rio); 996219089Spjd ddt_repair_done(ddt, dde); 997219089Spjd ddt_enter(ddt); 998219089Spjd } 999219089Spjd ddt_exit(ddt); 1000219089Spjd} 1001219089Spjd 1002219089Spjdstatic void 1003219089Spjdddt_sync_entry(ddt_t *ddt, ddt_entry_t *dde, dmu_tx_t *tx, uint64_t txg) 1004219089Spjd{ 1005219089Spjd dsl_pool_t *dp = ddt->ddt_spa->spa_dsl_pool; 1006219089Spjd ddt_phys_t *ddp = dde->dde_phys; 1007219089Spjd ddt_key_t *ddk = &dde->dde_key; 1008219089Spjd enum ddt_type otype = dde->dde_type; 1009219089Spjd enum ddt_type ntype = DDT_TYPE_CURRENT; 1010219089Spjd enum ddt_class oclass = dde->dde_class; 1011219089Spjd enum ddt_class nclass; 1012219089Spjd uint64_t total_refcnt = 0; 1013219089Spjd 1014219089Spjd ASSERT(dde->dde_loaded); 1015219089Spjd ASSERT(!dde->dde_loading); 1016219089Spjd 1017219089Spjd for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) { 1018219089Spjd ASSERT(dde->dde_lead_zio[p] == NULL); 1019219089Spjd ASSERT((int64_t)ddp->ddp_refcnt >= 0); 1020219089Spjd if (ddp->ddp_phys_birth == 0) { 1021219089Spjd ASSERT(ddp->ddp_refcnt == 0); 1022219089Spjd continue; 1023219089Spjd } 1024219089Spjd if (p == DDT_PHYS_DITTO) { 1025219089Spjd if (ddt_ditto_copies_needed(ddt, dde, NULL) == 0) 1026219089Spjd ddt_phys_free(ddt, ddk, ddp, txg); 1027219089Spjd continue; 1028219089Spjd } 1029219089Spjd if (ddp->ddp_refcnt == 0) 1030219089Spjd ddt_phys_free(ddt, ddk, ddp, txg); 1031219089Spjd total_refcnt += ddp->ddp_refcnt; 1032219089Spjd } 1033219089Spjd 1034219089Spjd if (dde->dde_phys[DDT_PHYS_DITTO].ddp_phys_birth != 0) 1035219089Spjd nclass = DDT_CLASS_DITTO; 1036219089Spjd else if (total_refcnt > 1) 1037219089Spjd nclass = DDT_CLASS_DUPLICATE; 1038219089Spjd else 1039219089Spjd nclass = DDT_CLASS_UNIQUE; 1040219089Spjd 1041219089Spjd if (otype != DDT_TYPES && 1042219089Spjd (otype != ntype || oclass != nclass || total_refcnt == 0)) { 1043219089Spjd VERIFY(ddt_object_remove(ddt, otype, oclass, dde, tx) == 0); 1044219089Spjd ASSERT(ddt_object_lookup(ddt, otype, oclass, dde) == ENOENT); 1045219089Spjd } 1046219089Spjd 1047219089Spjd if (total_refcnt != 0) { 1048219089Spjd dde->dde_type = ntype; 1049219089Spjd dde->dde_class = nclass; 1050219089Spjd ddt_stat_update(ddt, dde, 0); 1051219089Spjd if (!ddt_object_exists(ddt, ntype, nclass)) 1052219089Spjd ddt_object_create(ddt, ntype, nclass, tx); 1053219089Spjd VERIFY(ddt_object_update(ddt, ntype, nclass, dde, tx) == 0); 1054219089Spjd 1055219089Spjd /* 1056219089Spjd * If the class changes, the order that we scan this bp 1057219089Spjd * changes. If it decreases, we could miss it, so 1058219089Spjd * scan it right now. (This covers both class changing 1059219089Spjd * while we are doing ddt_walk(), and when we are 1060219089Spjd * traversing.) 1061219089Spjd */ 1062219089Spjd if (nclass < oclass) { 1063219089Spjd dsl_scan_ddt_entry(dp->dp_scan, 1064219089Spjd ddt->ddt_checksum, dde, tx); 1065219089Spjd } 1066219089Spjd } 1067219089Spjd} 1068219089Spjd 1069219089Spjdstatic void 1070219089Spjdddt_sync_table(ddt_t *ddt, dmu_tx_t *tx, uint64_t txg) 1071219089Spjd{ 1072219089Spjd spa_t *spa = ddt->ddt_spa; 1073219089Spjd ddt_entry_t *dde; 1074219089Spjd void *cookie = NULL; 1075219089Spjd 1076219089Spjd if (avl_numnodes(&ddt->ddt_tree) == 0) 1077219089Spjd return; 1078219089Spjd 1079219089Spjd ASSERT(spa->spa_uberblock.ub_version >= SPA_VERSION_DEDUP); 1080219089Spjd 1081219089Spjd if (spa->spa_ddt_stat_object == 0) { 1082236884Smm spa->spa_ddt_stat_object = zap_create_link(ddt->ddt_os, 1083236884Smm DMU_OT_DDT_STATS, DMU_POOL_DIRECTORY_OBJECT, 1084236884Smm DMU_POOL_DDT_STATS, tx); 1085219089Spjd } 1086219089Spjd 1087219089Spjd while ((dde = avl_destroy_nodes(&ddt->ddt_tree, &cookie)) != NULL) { 1088219089Spjd ddt_sync_entry(ddt, dde, tx, txg); 1089219089Spjd ddt_free(dde); 1090219089Spjd } 1091219089Spjd 1092219089Spjd for (enum ddt_type type = 0; type < DDT_TYPES; type++) { 1093245264Sdelphij uint64_t add, count = 0; 1094219089Spjd for (enum ddt_class class = 0; class < DDT_CLASSES; class++) { 1095219089Spjd if (ddt_object_exists(ddt, type, class)) { 1096219089Spjd ddt_object_sync(ddt, type, class, tx); 1097245264Sdelphij VERIFY(ddt_object_count(ddt, type, class, 1098245264Sdelphij &add) == 0); 1099245264Sdelphij count += add; 1100219089Spjd } 1101219089Spjd } 1102219089Spjd for (enum ddt_class class = 0; class < DDT_CLASSES; class++) { 1103219089Spjd if (count == 0 && ddt_object_exists(ddt, type, class)) 1104219089Spjd ddt_object_destroy(ddt, type, class, tx); 1105219089Spjd } 1106219089Spjd } 1107219089Spjd 1108219089Spjd bcopy(ddt->ddt_histogram, &ddt->ddt_histogram_cache, 1109219089Spjd sizeof (ddt->ddt_histogram)); 1110219089Spjd} 1111219089Spjd 1112219089Spjdvoid 1113219089Spjdddt_sync(spa_t *spa, uint64_t txg) 1114219089Spjd{ 1115339034Ssef dsl_scan_t *scn = spa->spa_dsl_pool->dp_scan; 1116219089Spjd dmu_tx_t *tx; 1117339034Ssef zio_t *rio; 1118219089Spjd 1119219089Spjd ASSERT(spa_syncing_txg(spa) == txg); 1120219089Spjd 1121219089Spjd tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg); 1122219089Spjd 1123339034Ssef rio = zio_root(spa, NULL, NULL, 1124339034Ssef ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_SELF_HEAL); 1125339034Ssef 1126339034Ssef /* 1127339034Ssef * This function may cause an immediate scan of ddt blocks (see 1128339034Ssef * the comment above dsl_scan_ddt() for details). We set the 1129339034Ssef * scan's root zio here so that we can wait for any scan IOs in 1130339034Ssef * addition to the regular ddt IOs. 1131339034Ssef */ 1132339034Ssef ASSERT3P(scn->scn_zio_root, ==, NULL); 1133339034Ssef scn->scn_zio_root = rio; 1134339034Ssef 1135219089Spjd for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) { 1136219089Spjd ddt_t *ddt = spa->spa_ddt[c]; 1137219089Spjd if (ddt == NULL) 1138219089Spjd continue; 1139219089Spjd ddt_sync_table(ddt, tx, txg); 1140219089Spjd ddt_repair_table(ddt, rio); 1141219089Spjd } 1142219089Spjd 1143219089Spjd (void) zio_wait(rio); 1144339034Ssef scn->scn_zio_root = NULL; 1145219089Spjd 1146219089Spjd dmu_tx_commit(tx); 1147219089Spjd} 1148219089Spjd 1149219089Spjdint 1150219089Spjdddt_walk(spa_t *spa, ddt_bookmark_t *ddb, ddt_entry_t *dde) 1151219089Spjd{ 1152219089Spjd do { 1153219089Spjd do { 1154219089Spjd do { 1155219089Spjd ddt_t *ddt = spa->spa_ddt[ddb->ddb_checksum]; 1156219089Spjd int error = ENOENT; 1157219089Spjd if (ddt_object_exists(ddt, ddb->ddb_type, 1158219089Spjd ddb->ddb_class)) { 1159219089Spjd error = ddt_object_walk(ddt, 1160219089Spjd ddb->ddb_type, ddb->ddb_class, 1161219089Spjd &ddb->ddb_cursor, dde); 1162219089Spjd } 1163219089Spjd dde->dde_type = ddb->ddb_type; 1164219089Spjd dde->dde_class = ddb->ddb_class; 1165219089Spjd if (error == 0) 1166219089Spjd return (0); 1167219089Spjd if (error != ENOENT) 1168219089Spjd return (error); 1169219089Spjd ddb->ddb_cursor = 0; 1170219089Spjd } while (++ddb->ddb_checksum < ZIO_CHECKSUM_FUNCTIONS); 1171219089Spjd ddb->ddb_checksum = 0; 1172219089Spjd } while (++ddb->ddb_type < DDT_TYPES); 1173219089Spjd ddb->ddb_type = 0; 1174219089Spjd } while (++ddb->ddb_class < DDT_CLASSES); 1175219089Spjd 1176249195Smm return (SET_ERROR(ENOENT)); 1177219089Spjd} 1178