1219089Spjd/* 2219089Spjd * CDDL HEADER START 3219089Spjd * 4219089Spjd * The contents of this file are subject to the terms of the 5219089Spjd * Common Development and Distribution License (the "License"). 6219089Spjd * You may not use this file except in compliance with the License. 7219089Spjd * 8219089Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9219089Spjd * or http://www.opensolaris.org/os/licensing. 10219089Spjd * See the License for the specific language governing permissions 11219089Spjd * and limitations under the License. 12219089Spjd * 13219089Spjd * When distributing Covered Code, include this CDDL HEADER in each 14219089Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15219089Spjd * If applicable, add the following below this CDDL HEADER, with the 16219089Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17219089Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18219089Spjd * 19219089Spjd * CDDL HEADER END 20219089Spjd */ 21219089Spjd 22219089Spjd/* 23219089Spjd * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. 24265751Sdelphij * Copyright (c) 2012, 2014 by Delphix. All rights reserved. 25219089Spjd */ 26219089Spjd 27219089Spjd#include <sys/zfs_context.h> 28219089Spjd#include <sys/spa.h> 29219089Spjd#include <sys/spa_impl.h> 30219089Spjd#include <sys/zio.h> 31219089Spjd#include <sys/ddt.h> 32219089Spjd#include <sys/zap.h> 33219089Spjd#include <sys/dmu_tx.h> 34219089Spjd#include <sys/arc.h> 35219089Spjd#include <sys/dsl_pool.h> 36219089Spjd#include <sys/zio_checksum.h> 37219089Spjd#include <sys/zio_compress.h> 38219089Spjd#include <sys/dsl_scan.h> 39219089Spjd 40219089Spjd/* 41219089Spjd * Enable/disable prefetching of dedup-ed blocks which are going to be freed. 42219089Spjd */ 43219089Spjdint zfs_dedup_prefetch = 1; 44219089Spjd 45219089SpjdSYSCTL_DECL(_vfs_zfs); 46219089SpjdSYSCTL_NODE(_vfs_zfs, OID_AUTO, dedup, CTLFLAG_RW, 0, "ZFS DEDUP"); 47219089SpjdTUNABLE_INT("vfs.zfs.dedup.prefetch", &zfs_dedup_prefetch); 48219089SpjdSYSCTL_INT(_vfs_zfs_dedup, OID_AUTO, prefetch, CTLFLAG_RW, &zfs_dedup_prefetch, 49219089Spjd 0, "Enable/disable prefetching of dedup-ed blocks which are going to be freed"); 50219089Spjd 51219089Spjdstatic const ddt_ops_t *ddt_ops[DDT_TYPES] = { 52219089Spjd &ddt_zap_ops, 53219089Spjd}; 54219089Spjd 55219089Spjdstatic const char *ddt_class_name[DDT_CLASSES] = { 56219089Spjd "ditto", 57219089Spjd "duplicate", 58219089Spjd "unique", 59219089Spjd}; 60219089Spjd 61219089Spjdstatic void 62219089Spjdddt_object_create(ddt_t *ddt, enum ddt_type type, enum ddt_class class, 63219089Spjd dmu_tx_t *tx) 64219089Spjd{ 65219089Spjd spa_t *spa = ddt->ddt_spa; 66219089Spjd objset_t *os = ddt->ddt_os; 67219089Spjd uint64_t *objectp = &ddt->ddt_object[type][class]; 68219089Spjd boolean_t prehash = zio_checksum_table[ddt->ddt_checksum].ci_dedup; 69219089Spjd char name[DDT_NAMELEN]; 70219089Spjd 71219089Spjd ddt_object_name(ddt, type, class, name); 72219089Spjd 73219089Spjd ASSERT(*objectp == 0); 74219089Spjd VERIFY(ddt_ops[type]->ddt_op_create(os, objectp, tx, prehash) == 0); 75219089Spjd ASSERT(*objectp != 0); 76219089Spjd 77219089Spjd VERIFY(zap_add(os, DMU_POOL_DIRECTORY_OBJECT, name, 78219089Spjd sizeof (uint64_t), 1, objectp, tx) == 0); 79219089Spjd 80219089Spjd VERIFY(zap_add(os, spa->spa_ddt_stat_object, name, 81219089Spjd sizeof (uint64_t), sizeof (ddt_histogram_t) / sizeof (uint64_t), 82219089Spjd &ddt->ddt_histogram[type][class], tx) == 0); 83219089Spjd} 84219089Spjd 85219089Spjdstatic void 86219089Spjdddt_object_destroy(ddt_t *ddt, enum ddt_type type, enum ddt_class class, 87219089Spjd dmu_tx_t *tx) 88219089Spjd{ 89219089Spjd spa_t *spa = ddt->ddt_spa; 90219089Spjd objset_t *os = ddt->ddt_os; 91219089Spjd uint64_t *objectp = &ddt->ddt_object[type][class]; 92246574Sdelphij uint64_t count; 93219089Spjd char name[DDT_NAMELEN]; 94219089Spjd 95219089Spjd ddt_object_name(ddt, type, class, name); 96219089Spjd 97219089Spjd ASSERT(*objectp != 0); 98246574Sdelphij VERIFY(ddt_object_count(ddt, type, class, &count) == 0 && count == 0); 99219089Spjd ASSERT(ddt_histogram_empty(&ddt->ddt_histogram[type][class])); 100219089Spjd VERIFY(zap_remove(os, DMU_POOL_DIRECTORY_OBJECT, name, tx) == 0); 101219089Spjd VERIFY(zap_remove(os, spa->spa_ddt_stat_object, name, tx) == 0); 102219089Spjd VERIFY(ddt_ops[type]->ddt_op_destroy(os, *objectp, tx) == 0); 103219089Spjd bzero(&ddt->ddt_object_stats[type][class], sizeof (ddt_object_t)); 104219089Spjd 105219089Spjd *objectp = 0; 106219089Spjd} 107219089Spjd 108219089Spjdstatic int 109219089Spjdddt_object_load(ddt_t *ddt, enum ddt_type type, enum ddt_class class) 110219089Spjd{ 111219089Spjd ddt_object_t *ddo = &ddt->ddt_object_stats[type][class]; 112219089Spjd dmu_object_info_t doi; 113246574Sdelphij uint64_t count; 114219089Spjd char name[DDT_NAMELEN]; 115219089Spjd int error; 116219089Spjd 117219089Spjd ddt_object_name(ddt, type, class, name); 118219089Spjd 119219089Spjd error = zap_lookup(ddt->ddt_os, DMU_POOL_DIRECTORY_OBJECT, name, 120219089Spjd sizeof (uint64_t), 1, &ddt->ddt_object[type][class]); 121219089Spjd 122263398Sdelphij if (error != 0) 123219089Spjd return (error); 124219089Spjd 125263398Sdelphij VERIFY0(zap_lookup(ddt->ddt_os, ddt->ddt_spa->spa_ddt_stat_object, name, 126219089Spjd sizeof (uint64_t), sizeof (ddt_histogram_t) / sizeof (uint64_t), 127263398Sdelphij &ddt->ddt_histogram[type][class])); 128219089Spjd 129219089Spjd /* 130219089Spjd * Seed the cached statistics. 131219089Spjd */ 132219089Spjd VERIFY(ddt_object_info(ddt, type, class, &doi) == 0); 133219089Spjd 134246574Sdelphij error = ddt_object_count(ddt, type, class, &count); 135246574Sdelphij if (error) 136246574Sdelphij return error; 137246574Sdelphij 138246574Sdelphij ddo->ddo_count = count; 139219089Spjd ddo->ddo_dspace = doi.doi_physical_blocks_512 << 9; 140219089Spjd ddo->ddo_mspace = doi.doi_fill_count * doi.doi_data_block_size; 141219089Spjd 142263398Sdelphij return (0); 143219089Spjd} 144219089Spjd 145219089Spjdstatic void 146219089Spjdddt_object_sync(ddt_t *ddt, enum ddt_type type, enum ddt_class class, 147219089Spjd dmu_tx_t *tx) 148219089Spjd{ 149219089Spjd ddt_object_t *ddo = &ddt->ddt_object_stats[type][class]; 150219089Spjd dmu_object_info_t doi; 151246574Sdelphij uint64_t count; 152219089Spjd char name[DDT_NAMELEN]; 153219089Spjd 154219089Spjd ddt_object_name(ddt, type, class, name); 155219089Spjd 156219089Spjd VERIFY(zap_update(ddt->ddt_os, ddt->ddt_spa->spa_ddt_stat_object, name, 157219089Spjd sizeof (uint64_t), sizeof (ddt_histogram_t) / sizeof (uint64_t), 158219089Spjd &ddt->ddt_histogram[type][class], tx) == 0); 159219089Spjd 160219089Spjd /* 161219089Spjd * Cache DDT statistics; this is the only time they'll change. 162219089Spjd */ 163219089Spjd VERIFY(ddt_object_info(ddt, type, class, &doi) == 0); 164246574Sdelphij VERIFY(ddt_object_count(ddt, type, class, &count) == 0); 165219089Spjd 166246574Sdelphij ddo->ddo_count = count; 167219089Spjd ddo->ddo_dspace = doi.doi_physical_blocks_512 << 9; 168219089Spjd ddo->ddo_mspace = doi.doi_fill_count * doi.doi_data_block_size; 169219089Spjd} 170219089Spjd 171219089Spjdstatic int 172219089Spjdddt_object_lookup(ddt_t *ddt, enum ddt_type type, enum ddt_class class, 173219089Spjd ddt_entry_t *dde) 174219089Spjd{ 175219089Spjd if (!ddt_object_exists(ddt, type, class)) 176249643Smm return (SET_ERROR(ENOENT)); 177219089Spjd 178219089Spjd return (ddt_ops[type]->ddt_op_lookup(ddt->ddt_os, 179219089Spjd ddt->ddt_object[type][class], dde)); 180219089Spjd} 181219089Spjd 182219089Spjdstatic void 183219089Spjdddt_object_prefetch(ddt_t *ddt, enum ddt_type type, enum ddt_class class, 184219089Spjd ddt_entry_t *dde) 185219089Spjd{ 186219089Spjd if (!ddt_object_exists(ddt, type, class)) 187219089Spjd return; 188219089Spjd 189219089Spjd ddt_ops[type]->ddt_op_prefetch(ddt->ddt_os, 190219089Spjd ddt->ddt_object[type][class], dde); 191219089Spjd} 192219089Spjd 193219089Spjdint 194219089Spjdddt_object_update(ddt_t *ddt, enum ddt_type type, enum ddt_class class, 195219089Spjd ddt_entry_t *dde, dmu_tx_t *tx) 196219089Spjd{ 197219089Spjd ASSERT(ddt_object_exists(ddt, type, class)); 198219089Spjd 199219089Spjd return (ddt_ops[type]->ddt_op_update(ddt->ddt_os, 200219089Spjd ddt->ddt_object[type][class], dde, tx)); 201219089Spjd} 202219089Spjd 203219089Spjdstatic int 204219089Spjdddt_object_remove(ddt_t *ddt, enum ddt_type type, enum ddt_class class, 205219089Spjd ddt_entry_t *dde, dmu_tx_t *tx) 206219089Spjd{ 207219089Spjd ASSERT(ddt_object_exists(ddt, type, class)); 208219089Spjd 209219089Spjd return (ddt_ops[type]->ddt_op_remove(ddt->ddt_os, 210219089Spjd ddt->ddt_object[type][class], dde, tx)); 211219089Spjd} 212219089Spjd 213219089Spjdint 214219089Spjdddt_object_walk(ddt_t *ddt, enum ddt_type type, enum ddt_class class, 215219089Spjd uint64_t *walk, ddt_entry_t *dde) 216219089Spjd{ 217219089Spjd ASSERT(ddt_object_exists(ddt, type, class)); 218219089Spjd 219219089Spjd return (ddt_ops[type]->ddt_op_walk(ddt->ddt_os, 220219089Spjd ddt->ddt_object[type][class], dde, walk)); 221219089Spjd} 222219089Spjd 223246574Sdelphijint 224246574Sdelphijddt_object_count(ddt_t *ddt, enum ddt_type type, enum ddt_class class, uint64_t *count) 225219089Spjd{ 226219089Spjd ASSERT(ddt_object_exists(ddt, type, class)); 227219089Spjd 228219089Spjd return (ddt_ops[type]->ddt_op_count(ddt->ddt_os, 229246574Sdelphij ddt->ddt_object[type][class], count)); 230219089Spjd} 231219089Spjd 232219089Spjdint 233219089Spjdddt_object_info(ddt_t *ddt, enum ddt_type type, enum ddt_class class, 234219089Spjd dmu_object_info_t *doi) 235219089Spjd{ 236219089Spjd if (!ddt_object_exists(ddt, type, class)) 237249643Smm return (SET_ERROR(ENOENT)); 238219089Spjd 239219089Spjd return (dmu_object_info(ddt->ddt_os, ddt->ddt_object[type][class], 240219089Spjd doi)); 241219089Spjd} 242219089Spjd 243219089Spjdboolean_t 244219089Spjdddt_object_exists(ddt_t *ddt, enum ddt_type type, enum ddt_class class) 245219089Spjd{ 246219089Spjd return (!!ddt->ddt_object[type][class]); 247219089Spjd} 248219089Spjd 249219089Spjdvoid 250219089Spjdddt_object_name(ddt_t *ddt, enum ddt_type type, enum ddt_class class, 251219089Spjd char *name) 252219089Spjd{ 253219089Spjd (void) sprintf(name, DMU_POOL_DDT, 254219089Spjd zio_checksum_table[ddt->ddt_checksum].ci_name, 255219089Spjd ddt_ops[type]->ddt_op_name, ddt_class_name[class]); 256219089Spjd} 257219089Spjd 258219089Spjdvoid 259219089Spjdddt_bp_fill(const ddt_phys_t *ddp, blkptr_t *bp, uint64_t txg) 260219089Spjd{ 261219089Spjd ASSERT(txg != 0); 262219089Spjd 263219089Spjd for (int d = 0; d < SPA_DVAS_PER_BP; d++) 264219089Spjd bp->blk_dva[d] = ddp->ddp_dva[d]; 265219089Spjd BP_SET_BIRTH(bp, txg, ddp->ddp_phys_birth); 266219089Spjd} 267219089Spjd 268219089Spjdvoid 269219089Spjdddt_bp_create(enum zio_checksum checksum, 270219089Spjd const ddt_key_t *ddk, const ddt_phys_t *ddp, blkptr_t *bp) 271219089Spjd{ 272219089Spjd BP_ZERO(bp); 273219089Spjd 274219089Spjd if (ddp != NULL) 275219089Spjd ddt_bp_fill(ddp, bp, ddp->ddp_phys_birth); 276219089Spjd 277219089Spjd bp->blk_cksum = ddk->ddk_cksum; 278219089Spjd bp->blk_fill = 1; 279219089Spjd 280219089Spjd BP_SET_LSIZE(bp, DDK_GET_LSIZE(ddk)); 281219089Spjd BP_SET_PSIZE(bp, DDK_GET_PSIZE(ddk)); 282219089Spjd BP_SET_COMPRESS(bp, DDK_GET_COMPRESS(ddk)); 283219089Spjd BP_SET_CHECKSUM(bp, checksum); 284219089Spjd BP_SET_TYPE(bp, DMU_OT_DEDUP); 285219089Spjd BP_SET_LEVEL(bp, 0); 286219089Spjd BP_SET_DEDUP(bp, 0); 287219089Spjd BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER); 288219089Spjd} 289219089Spjd 290219089Spjdvoid 291219089Spjdddt_key_fill(ddt_key_t *ddk, const blkptr_t *bp) 292219089Spjd{ 293219089Spjd ddk->ddk_cksum = bp->blk_cksum; 294219089Spjd ddk->ddk_prop = 0; 295219089Spjd 296219089Spjd DDK_SET_LSIZE(ddk, BP_GET_LSIZE(bp)); 297219089Spjd DDK_SET_PSIZE(ddk, BP_GET_PSIZE(bp)); 298219089Spjd DDK_SET_COMPRESS(ddk, BP_GET_COMPRESS(bp)); 299219089Spjd} 300219089Spjd 301219089Spjdvoid 302219089Spjdddt_phys_fill(ddt_phys_t *ddp, const blkptr_t *bp) 303219089Spjd{ 304219089Spjd ASSERT(ddp->ddp_phys_birth == 0); 305219089Spjd 306219089Spjd for (int d = 0; d < SPA_DVAS_PER_BP; d++) 307219089Spjd ddp->ddp_dva[d] = bp->blk_dva[d]; 308219089Spjd ddp->ddp_phys_birth = BP_PHYSICAL_BIRTH(bp); 309219089Spjd} 310219089Spjd 311219089Spjdvoid 312219089Spjdddt_phys_clear(ddt_phys_t *ddp) 313219089Spjd{ 314219089Spjd bzero(ddp, sizeof (*ddp)); 315219089Spjd} 316219089Spjd 317219089Spjdvoid 318219089Spjdddt_phys_addref(ddt_phys_t *ddp) 319219089Spjd{ 320219089Spjd ddp->ddp_refcnt++; 321219089Spjd} 322219089Spjd 323219089Spjdvoid 324219089Spjdddt_phys_decref(ddt_phys_t *ddp) 325219089Spjd{ 326219089Spjd ASSERT((int64_t)ddp->ddp_refcnt > 0); 327219089Spjd ddp->ddp_refcnt--; 328219089Spjd} 329219089Spjd 330219089Spjdvoid 331219089Spjdddt_phys_free(ddt_t *ddt, ddt_key_t *ddk, ddt_phys_t *ddp, uint64_t txg) 332219089Spjd{ 333219089Spjd blkptr_t blk; 334219089Spjd 335219089Spjd ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk); 336219089Spjd ddt_phys_clear(ddp); 337219089Spjd zio_free(ddt->ddt_spa, txg, &blk); 338219089Spjd} 339219089Spjd 340219089Spjdddt_phys_t * 341219089Spjdddt_phys_select(const ddt_entry_t *dde, const blkptr_t *bp) 342219089Spjd{ 343219089Spjd ddt_phys_t *ddp = (ddt_phys_t *)dde->dde_phys; 344219089Spjd 345219089Spjd for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) { 346219089Spjd if (DVA_EQUAL(BP_IDENTITY(bp), &ddp->ddp_dva[0]) && 347219089Spjd BP_PHYSICAL_BIRTH(bp) == ddp->ddp_phys_birth) 348219089Spjd return (ddp); 349219089Spjd } 350219089Spjd return (NULL); 351219089Spjd} 352219089Spjd 353219089Spjduint64_t 354219089Spjdddt_phys_total_refcnt(const ddt_entry_t *dde) 355219089Spjd{ 356219089Spjd uint64_t refcnt = 0; 357219089Spjd 358219089Spjd for (int p = DDT_PHYS_SINGLE; p <= DDT_PHYS_TRIPLE; p++) 359219089Spjd refcnt += dde->dde_phys[p].ddp_refcnt; 360219089Spjd 361219089Spjd return (refcnt); 362219089Spjd} 363219089Spjd 364219089Spjdstatic void 365219089Spjdddt_stat_generate(ddt_t *ddt, ddt_entry_t *dde, ddt_stat_t *dds) 366219089Spjd{ 367219089Spjd spa_t *spa = ddt->ddt_spa; 368219089Spjd ddt_phys_t *ddp = dde->dde_phys; 369219089Spjd ddt_key_t *ddk = &dde->dde_key; 370219089Spjd uint64_t lsize = DDK_GET_LSIZE(ddk); 371219089Spjd uint64_t psize = DDK_GET_PSIZE(ddk); 372219089Spjd 373219089Spjd bzero(dds, sizeof (*dds)); 374219089Spjd 375219089Spjd for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) { 376219089Spjd uint64_t dsize = 0; 377219089Spjd uint64_t refcnt = ddp->ddp_refcnt; 378219089Spjd 379219089Spjd if (ddp->ddp_phys_birth == 0) 380219089Spjd continue; 381219089Spjd 382219089Spjd for (int d = 0; d < SPA_DVAS_PER_BP; d++) 383219089Spjd dsize += dva_get_dsize_sync(spa, &ddp->ddp_dva[d]); 384219089Spjd 385219089Spjd dds->dds_blocks += 1; 386219089Spjd dds->dds_lsize += lsize; 387219089Spjd dds->dds_psize += psize; 388219089Spjd dds->dds_dsize += dsize; 389219089Spjd 390219089Spjd dds->dds_ref_blocks += refcnt; 391219089Spjd dds->dds_ref_lsize += lsize * refcnt; 392219089Spjd dds->dds_ref_psize += psize * refcnt; 393219089Spjd dds->dds_ref_dsize += dsize * refcnt; 394219089Spjd } 395219089Spjd} 396219089Spjd 397219089Spjdvoid 398219089Spjdddt_stat_add(ddt_stat_t *dst, const ddt_stat_t *src, uint64_t neg) 399219089Spjd{ 400219089Spjd const uint64_t *s = (const uint64_t *)src; 401219089Spjd uint64_t *d = (uint64_t *)dst; 402219089Spjd uint64_t *d_end = (uint64_t *)(dst + 1); 403219089Spjd 404219089Spjd ASSERT(neg == 0 || neg == -1ULL); /* add or subtract */ 405219089Spjd 406219089Spjd while (d < d_end) 407219089Spjd *d++ += (*s++ ^ neg) - neg; 408219089Spjd} 409219089Spjd 410219089Spjdstatic void 411219089Spjdddt_stat_update(ddt_t *ddt, ddt_entry_t *dde, uint64_t neg) 412219089Spjd{ 413219089Spjd ddt_stat_t dds; 414219089Spjd ddt_histogram_t *ddh; 415219089Spjd int bucket; 416219089Spjd 417219089Spjd ddt_stat_generate(ddt, dde, &dds); 418219089Spjd 419265751Sdelphij bucket = highbit64(dds.dds_ref_blocks) - 1; 420219089Spjd ASSERT(bucket >= 0); 421219089Spjd 422219089Spjd ddh = &ddt->ddt_histogram[dde->dde_type][dde->dde_class]; 423219089Spjd 424219089Spjd ddt_stat_add(&ddh->ddh_stat[bucket], &dds, neg); 425219089Spjd} 426219089Spjd 427219089Spjdvoid 428219089Spjdddt_histogram_add(ddt_histogram_t *dst, const ddt_histogram_t *src) 429219089Spjd{ 430219089Spjd for (int h = 0; h < 64; h++) 431219089Spjd ddt_stat_add(&dst->ddh_stat[h], &src->ddh_stat[h], 0); 432219089Spjd} 433219089Spjd 434219089Spjdvoid 435219089Spjdddt_histogram_stat(ddt_stat_t *dds, const ddt_histogram_t *ddh) 436219089Spjd{ 437219089Spjd bzero(dds, sizeof (*dds)); 438219089Spjd 439219089Spjd for (int h = 0; h < 64; h++) 440219089Spjd ddt_stat_add(dds, &ddh->ddh_stat[h], 0); 441219089Spjd} 442219089Spjd 443219089Spjdboolean_t 444219089Spjdddt_histogram_empty(const ddt_histogram_t *ddh) 445219089Spjd{ 446219089Spjd const uint64_t *s = (const uint64_t *)ddh; 447219089Spjd const uint64_t *s_end = (const uint64_t *)(ddh + 1); 448219089Spjd 449219089Spjd while (s < s_end) 450219089Spjd if (*s++ != 0) 451219089Spjd return (B_FALSE); 452219089Spjd 453219089Spjd return (B_TRUE); 454219089Spjd} 455219089Spjd 456219089Spjdvoid 457219089Spjdddt_get_dedup_object_stats(spa_t *spa, ddt_object_t *ddo_total) 458219089Spjd{ 459219089Spjd /* Sum the statistics we cached in ddt_object_sync(). */ 460219089Spjd for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) { 461219089Spjd ddt_t *ddt = spa->spa_ddt[c]; 462219089Spjd for (enum ddt_type type = 0; type < DDT_TYPES; type++) { 463219089Spjd for (enum ddt_class class = 0; class < DDT_CLASSES; 464219089Spjd class++) { 465219089Spjd ddt_object_t *ddo = 466219089Spjd &ddt->ddt_object_stats[type][class]; 467219089Spjd ddo_total->ddo_count += ddo->ddo_count; 468219089Spjd ddo_total->ddo_dspace += ddo->ddo_dspace; 469219089Spjd ddo_total->ddo_mspace += ddo->ddo_mspace; 470219089Spjd } 471219089Spjd } 472219089Spjd } 473219089Spjd 474219089Spjd /* ... and compute the averages. */ 475219089Spjd if (ddo_total->ddo_count != 0) { 476219089Spjd ddo_total->ddo_dspace /= ddo_total->ddo_count; 477219089Spjd ddo_total->ddo_mspace /= ddo_total->ddo_count; 478219089Spjd } 479219089Spjd} 480219089Spjd 481219089Spjdvoid 482219089Spjdddt_get_dedup_histogram(spa_t *spa, ddt_histogram_t *ddh) 483219089Spjd{ 484219089Spjd for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) { 485219089Spjd ddt_t *ddt = spa->spa_ddt[c]; 486219089Spjd for (enum ddt_type type = 0; type < DDT_TYPES; type++) { 487219089Spjd for (enum ddt_class class = 0; class < DDT_CLASSES; 488219089Spjd class++) { 489219089Spjd ddt_histogram_add(ddh, 490219089Spjd &ddt->ddt_histogram_cache[type][class]); 491219089Spjd } 492219089Spjd } 493219089Spjd } 494219089Spjd} 495219089Spjd 496219089Spjdvoid 497219089Spjdddt_get_dedup_stats(spa_t *spa, ddt_stat_t *dds_total) 498219089Spjd{ 499219089Spjd ddt_histogram_t *ddh_total; 500219089Spjd 501219089Spjd ddh_total = kmem_zalloc(sizeof (ddt_histogram_t), KM_SLEEP); 502219089Spjd ddt_get_dedup_histogram(spa, ddh_total); 503219089Spjd ddt_histogram_stat(dds_total, ddh_total); 504219089Spjd kmem_free(ddh_total, sizeof (ddt_histogram_t)); 505219089Spjd} 506219089Spjd 507219089Spjduint64_t 508219089Spjdddt_get_dedup_dspace(spa_t *spa) 509219089Spjd{ 510219089Spjd ddt_stat_t dds_total = { 0 }; 511219089Spjd 512219089Spjd ddt_get_dedup_stats(spa, &dds_total); 513219089Spjd return (dds_total.dds_ref_dsize - dds_total.dds_dsize); 514219089Spjd} 515219089Spjd 516219089Spjduint64_t 517219089Spjdddt_get_pool_dedup_ratio(spa_t *spa) 518219089Spjd{ 519219089Spjd ddt_stat_t dds_total = { 0 }; 520219089Spjd 521219089Spjd ddt_get_dedup_stats(spa, &dds_total); 522219089Spjd if (dds_total.dds_dsize == 0) 523219089Spjd return (100); 524219089Spjd 525219089Spjd return (dds_total.dds_ref_dsize * 100 / dds_total.dds_dsize); 526219089Spjd} 527219089Spjd 528219089Spjdint 529219089Spjdddt_ditto_copies_needed(ddt_t *ddt, ddt_entry_t *dde, ddt_phys_t *ddp_willref) 530219089Spjd{ 531219089Spjd spa_t *spa = ddt->ddt_spa; 532219089Spjd uint64_t total_refcnt = 0; 533219089Spjd uint64_t ditto = spa->spa_dedup_ditto; 534219089Spjd int total_copies = 0; 535219089Spjd int desired_copies = 0; 536219089Spjd 537219089Spjd for (int p = DDT_PHYS_SINGLE; p <= DDT_PHYS_TRIPLE; p++) { 538219089Spjd ddt_phys_t *ddp = &dde->dde_phys[p]; 539219089Spjd zio_t *zio = dde->dde_lead_zio[p]; 540219089Spjd uint64_t refcnt = ddp->ddp_refcnt; /* committed refs */ 541219089Spjd if (zio != NULL) 542219089Spjd refcnt += zio->io_parent_count; /* pending refs */ 543219089Spjd if (ddp == ddp_willref) 544219089Spjd refcnt++; /* caller's ref */ 545219089Spjd if (refcnt != 0) { 546219089Spjd total_refcnt += refcnt; 547219089Spjd total_copies += p; 548219089Spjd } 549219089Spjd } 550219089Spjd 551219089Spjd if (ditto == 0 || ditto > UINT32_MAX) 552219089Spjd ditto = UINT32_MAX; 553219089Spjd 554219089Spjd if (total_refcnt >= 1) 555219089Spjd desired_copies++; 556219089Spjd if (total_refcnt >= ditto) 557219089Spjd desired_copies++; 558219089Spjd if (total_refcnt >= ditto * ditto) 559219089Spjd desired_copies++; 560219089Spjd 561219089Spjd return (MAX(desired_copies, total_copies) - total_copies); 562219089Spjd} 563219089Spjd 564219089Spjdint 565219089Spjdddt_ditto_copies_present(ddt_entry_t *dde) 566219089Spjd{ 567219089Spjd ddt_phys_t *ddp = &dde->dde_phys[DDT_PHYS_DITTO]; 568219089Spjd dva_t *dva = ddp->ddp_dva; 569219089Spjd int copies = 0 - DVA_GET_GANG(dva); 570219089Spjd 571219089Spjd for (int d = 0; d < SPA_DVAS_PER_BP; d++, dva++) 572219089Spjd if (DVA_IS_VALID(dva)) 573219089Spjd copies++; 574219089Spjd 575219089Spjd ASSERT(copies >= 0 && copies < SPA_DVAS_PER_BP); 576219089Spjd 577219089Spjd return (copies); 578219089Spjd} 579219089Spjd 580219089Spjdsize_t 581219089Spjdddt_compress(void *src, uchar_t *dst, size_t s_len, size_t d_len) 582219089Spjd{ 583219089Spjd uchar_t *version = dst++; 584219089Spjd int cpfunc = ZIO_COMPRESS_ZLE; 585219089Spjd zio_compress_info_t *ci = &zio_compress_table[cpfunc]; 586219089Spjd size_t c_len; 587219089Spjd 588219089Spjd ASSERT(d_len >= s_len + 1); /* no compression plus version byte */ 589219089Spjd 590219089Spjd c_len = ci->ci_compress(src, dst, s_len, d_len - 1, ci->ci_level); 591219089Spjd 592219089Spjd if (c_len == s_len) { 593219089Spjd cpfunc = ZIO_COMPRESS_OFF; 594219089Spjd bcopy(src, dst, s_len); 595219089Spjd } 596219089Spjd 597263398Sdelphij *version = cpfunc; 598263398Sdelphij /* CONSTCOND */ 599263398Sdelphij if (ZFS_HOST_BYTEORDER) 600263398Sdelphij *version |= DDT_COMPRESS_BYTEORDER_MASK; 601219089Spjd 602219089Spjd return (c_len + 1); 603219089Spjd} 604219089Spjd 605219089Spjdvoid 606219089Spjdddt_decompress(uchar_t *src, void *dst, size_t s_len, size_t d_len) 607219089Spjd{ 608219089Spjd uchar_t version = *src++; 609219089Spjd int cpfunc = version & DDT_COMPRESS_FUNCTION_MASK; 610219089Spjd zio_compress_info_t *ci = &zio_compress_table[cpfunc]; 611219089Spjd 612219089Spjd if (ci->ci_decompress != NULL) 613219089Spjd (void) ci->ci_decompress(src, dst, s_len, d_len, ci->ci_level); 614219089Spjd else 615219089Spjd bcopy(src, dst, d_len); 616219089Spjd 617263398Sdelphij if (((version & DDT_COMPRESS_BYTEORDER_MASK) != 0) != 618263398Sdelphij (ZFS_HOST_BYTEORDER != 0)) 619219089Spjd byteswap_uint64_array(dst, d_len); 620219089Spjd} 621219089Spjd 622219089Spjdddt_t * 623219089Spjdddt_select_by_checksum(spa_t *spa, enum zio_checksum c) 624219089Spjd{ 625219089Spjd return (spa->spa_ddt[c]); 626219089Spjd} 627219089Spjd 628219089Spjdddt_t * 629219089Spjdddt_select(spa_t *spa, const blkptr_t *bp) 630219089Spjd{ 631219089Spjd return (spa->spa_ddt[BP_GET_CHECKSUM(bp)]); 632219089Spjd} 633219089Spjd 634219089Spjdvoid 635219089Spjdddt_enter(ddt_t *ddt) 636219089Spjd{ 637219089Spjd mutex_enter(&ddt->ddt_lock); 638219089Spjd} 639219089Spjd 640219089Spjdvoid 641219089Spjdddt_exit(ddt_t *ddt) 642219089Spjd{ 643219089Spjd mutex_exit(&ddt->ddt_lock); 644219089Spjd} 645219089Spjd 646219089Spjdstatic ddt_entry_t * 647219089Spjdddt_alloc(const ddt_key_t *ddk) 648219089Spjd{ 649219089Spjd ddt_entry_t *dde; 650219089Spjd 651219089Spjd dde = kmem_zalloc(sizeof (ddt_entry_t), KM_SLEEP); 652219089Spjd cv_init(&dde->dde_cv, NULL, CV_DEFAULT, NULL); 653219089Spjd 654219089Spjd dde->dde_key = *ddk; 655219089Spjd 656219089Spjd return (dde); 657219089Spjd} 658219089Spjd 659219089Spjdstatic void 660219089Spjdddt_free(ddt_entry_t *dde) 661219089Spjd{ 662219089Spjd ASSERT(!dde->dde_loading); 663219089Spjd 664219089Spjd for (int p = 0; p < DDT_PHYS_TYPES; p++) 665219089Spjd ASSERT(dde->dde_lead_zio[p] == NULL); 666219089Spjd 667219089Spjd if (dde->dde_repair_data != NULL) 668219089Spjd zio_buf_free(dde->dde_repair_data, 669219089Spjd DDK_GET_PSIZE(&dde->dde_key)); 670219089Spjd 671219089Spjd cv_destroy(&dde->dde_cv); 672219089Spjd kmem_free(dde, sizeof (*dde)); 673219089Spjd} 674219089Spjd 675219089Spjdvoid 676219089Spjdddt_remove(ddt_t *ddt, ddt_entry_t *dde) 677219089Spjd{ 678219089Spjd ASSERT(MUTEX_HELD(&ddt->ddt_lock)); 679219089Spjd 680219089Spjd avl_remove(&ddt->ddt_tree, dde); 681219089Spjd ddt_free(dde); 682219089Spjd} 683219089Spjd 684219089Spjdddt_entry_t * 685219089Spjdddt_lookup(ddt_t *ddt, const blkptr_t *bp, boolean_t add) 686219089Spjd{ 687219089Spjd ddt_entry_t *dde, dde_search; 688219089Spjd enum ddt_type type; 689219089Spjd enum ddt_class class; 690219089Spjd avl_index_t where; 691219089Spjd int error; 692219089Spjd 693219089Spjd ASSERT(MUTEX_HELD(&ddt->ddt_lock)); 694219089Spjd 695219089Spjd ddt_key_fill(&dde_search.dde_key, bp); 696219089Spjd 697219089Spjd dde = avl_find(&ddt->ddt_tree, &dde_search, &where); 698219089Spjd if (dde == NULL) { 699219089Spjd if (!add) 700219089Spjd return (NULL); 701219089Spjd dde = ddt_alloc(&dde_search.dde_key); 702219089Spjd avl_insert(&ddt->ddt_tree, dde, where); 703219089Spjd } 704219089Spjd 705219089Spjd while (dde->dde_loading) 706219089Spjd cv_wait(&dde->dde_cv, &ddt->ddt_lock); 707219089Spjd 708219089Spjd if (dde->dde_loaded) 709219089Spjd return (dde); 710219089Spjd 711219089Spjd dde->dde_loading = B_TRUE; 712219089Spjd 713219089Spjd ddt_exit(ddt); 714219089Spjd 715219089Spjd error = ENOENT; 716219089Spjd 717219089Spjd for (type = 0; type < DDT_TYPES; type++) { 718219089Spjd for (class = 0; class < DDT_CLASSES; class++) { 719219089Spjd error = ddt_object_lookup(ddt, type, class, dde); 720219089Spjd if (error != ENOENT) 721219089Spjd break; 722219089Spjd } 723219089Spjd if (error != ENOENT) 724219089Spjd break; 725219089Spjd } 726219089Spjd 727219089Spjd ASSERT(error == 0 || error == ENOENT); 728219089Spjd 729219089Spjd ddt_enter(ddt); 730219089Spjd 731219089Spjd ASSERT(dde->dde_loaded == B_FALSE); 732219089Spjd ASSERT(dde->dde_loading == B_TRUE); 733219089Spjd 734219089Spjd dde->dde_type = type; /* will be DDT_TYPES if no entry found */ 735219089Spjd dde->dde_class = class; /* will be DDT_CLASSES if no entry found */ 736219089Spjd dde->dde_loaded = B_TRUE; 737219089Spjd dde->dde_loading = B_FALSE; 738219089Spjd 739219089Spjd if (error == 0) 740219089Spjd ddt_stat_update(ddt, dde, -1ULL); 741219089Spjd 742219089Spjd cv_broadcast(&dde->dde_cv); 743219089Spjd 744219089Spjd return (dde); 745219089Spjd} 746219089Spjd 747219089Spjdvoid 748219089Spjdddt_prefetch(spa_t *spa, const blkptr_t *bp) 749219089Spjd{ 750219089Spjd ddt_t *ddt; 751219089Spjd ddt_entry_t dde; 752219089Spjd 753219089Spjd if (!zfs_dedup_prefetch || bp == NULL || !BP_GET_DEDUP(bp)) 754219089Spjd return; 755219089Spjd 756219089Spjd /* 757219089Spjd * We only remove the DDT once all tables are empty and only 758219089Spjd * prefetch dedup blocks when there are entries in the DDT. 759219089Spjd * Thus no locking is required as the DDT can't disappear on us. 760219089Spjd */ 761219089Spjd ddt = ddt_select(spa, bp); 762219089Spjd ddt_key_fill(&dde.dde_key, bp); 763219089Spjd 764219089Spjd for (enum ddt_type type = 0; type < DDT_TYPES; type++) { 765219089Spjd for (enum ddt_class class = 0; class < DDT_CLASSES; class++) { 766219089Spjd ddt_object_prefetch(ddt, type, class, &dde); 767219089Spjd } 768219089Spjd } 769219089Spjd} 770219089Spjd 771219089Spjdint 772219089Spjdddt_entry_compare(const void *x1, const void *x2) 773219089Spjd{ 774219089Spjd const ddt_entry_t *dde1 = x1; 775219089Spjd const ddt_entry_t *dde2 = x2; 776219089Spjd const uint64_t *u1 = (const uint64_t *)&dde1->dde_key; 777219089Spjd const uint64_t *u2 = (const uint64_t *)&dde2->dde_key; 778219089Spjd 779219089Spjd for (int i = 0; i < DDT_KEY_WORDS; i++) { 780219089Spjd if (u1[i] < u2[i]) 781219089Spjd return (-1); 782219089Spjd if (u1[i] > u2[i]) 783219089Spjd return (1); 784219089Spjd } 785219089Spjd 786219089Spjd return (0); 787219089Spjd} 788219089Spjd 789219089Spjdstatic ddt_t * 790219089Spjdddt_table_alloc(spa_t *spa, enum zio_checksum c) 791219089Spjd{ 792219089Spjd ddt_t *ddt; 793219089Spjd 794219089Spjd ddt = kmem_zalloc(sizeof (*ddt), KM_SLEEP); 795219089Spjd 796219089Spjd mutex_init(&ddt->ddt_lock, NULL, MUTEX_DEFAULT, NULL); 797219089Spjd avl_create(&ddt->ddt_tree, ddt_entry_compare, 798219089Spjd sizeof (ddt_entry_t), offsetof(ddt_entry_t, dde_node)); 799219089Spjd avl_create(&ddt->ddt_repair_tree, ddt_entry_compare, 800219089Spjd sizeof (ddt_entry_t), offsetof(ddt_entry_t, dde_node)); 801219089Spjd ddt->ddt_checksum = c; 802219089Spjd ddt->ddt_spa = spa; 803219089Spjd ddt->ddt_os = spa->spa_meta_objset; 804219089Spjd 805219089Spjd return (ddt); 806219089Spjd} 807219089Spjd 808219089Spjdstatic void 809219089Spjdddt_table_free(ddt_t *ddt) 810219089Spjd{ 811219089Spjd ASSERT(avl_numnodes(&ddt->ddt_tree) == 0); 812219089Spjd ASSERT(avl_numnodes(&ddt->ddt_repair_tree) == 0); 813219089Spjd avl_destroy(&ddt->ddt_tree); 814219089Spjd avl_destroy(&ddt->ddt_repair_tree); 815219089Spjd mutex_destroy(&ddt->ddt_lock); 816219089Spjd kmem_free(ddt, sizeof (*ddt)); 817219089Spjd} 818219089Spjd 819219089Spjdvoid 820219089Spjdddt_create(spa_t *spa) 821219089Spjd{ 822219089Spjd spa->spa_dedup_checksum = ZIO_DEDUPCHECKSUM; 823219089Spjd 824219089Spjd for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) 825219089Spjd spa->spa_ddt[c] = ddt_table_alloc(spa, c); 826219089Spjd} 827219089Spjd 828219089Spjdint 829219089Spjdddt_load(spa_t *spa) 830219089Spjd{ 831219089Spjd int error; 832219089Spjd 833219089Spjd ddt_create(spa); 834219089Spjd 835219089Spjd error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 836219089Spjd DMU_POOL_DDT_STATS, sizeof (uint64_t), 1, 837219089Spjd &spa->spa_ddt_stat_object); 838219089Spjd 839219089Spjd if (error) 840219089Spjd return (error == ENOENT ? 0 : error); 841219089Spjd 842219089Spjd for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) { 843219089Spjd ddt_t *ddt = spa->spa_ddt[c]; 844219089Spjd for (enum ddt_type type = 0; type < DDT_TYPES; type++) { 845219089Spjd for (enum ddt_class class = 0; class < DDT_CLASSES; 846219089Spjd class++) { 847219089Spjd error = ddt_object_load(ddt, type, class); 848219089Spjd if (error != 0 && error != ENOENT) 849219089Spjd return (error); 850219089Spjd } 851219089Spjd } 852219089Spjd 853219089Spjd /* 854219089Spjd * Seed the cached histograms. 855219089Spjd */ 856219089Spjd bcopy(ddt->ddt_histogram, &ddt->ddt_histogram_cache, 857219089Spjd sizeof (ddt->ddt_histogram)); 858219089Spjd } 859219089Spjd 860219089Spjd return (0); 861219089Spjd} 862219089Spjd 863219089Spjdvoid 864219089Spjdddt_unload(spa_t *spa) 865219089Spjd{ 866219089Spjd for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) { 867219089Spjd if (spa->spa_ddt[c]) { 868219089Spjd ddt_table_free(spa->spa_ddt[c]); 869219089Spjd spa->spa_ddt[c] = NULL; 870219089Spjd } 871219089Spjd } 872219089Spjd} 873219089Spjd 874219089Spjdboolean_t 875219089Spjdddt_class_contains(spa_t *spa, enum ddt_class max_class, const blkptr_t *bp) 876219089Spjd{ 877219089Spjd ddt_t *ddt; 878219089Spjd ddt_entry_t dde; 879219089Spjd 880219089Spjd if (!BP_GET_DEDUP(bp)) 881219089Spjd return (B_FALSE); 882219089Spjd 883219089Spjd if (max_class == DDT_CLASS_UNIQUE) 884219089Spjd return (B_TRUE); 885219089Spjd 886219089Spjd ddt = spa->spa_ddt[BP_GET_CHECKSUM(bp)]; 887219089Spjd 888219089Spjd ddt_key_fill(&dde.dde_key, bp); 889219089Spjd 890219089Spjd for (enum ddt_type type = 0; type < DDT_TYPES; type++) 891219089Spjd for (enum ddt_class class = 0; class <= max_class; class++) 892219089Spjd if (ddt_object_lookup(ddt, type, class, &dde) == 0) 893219089Spjd return (B_TRUE); 894219089Spjd 895219089Spjd return (B_FALSE); 896219089Spjd} 897219089Spjd 898219089Spjdddt_entry_t * 899219089Spjdddt_repair_start(ddt_t *ddt, const blkptr_t *bp) 900219089Spjd{ 901219089Spjd ddt_key_t ddk; 902219089Spjd ddt_entry_t *dde; 903219089Spjd 904219089Spjd ddt_key_fill(&ddk, bp); 905219089Spjd 906219089Spjd dde = ddt_alloc(&ddk); 907219089Spjd 908219089Spjd for (enum ddt_type type = 0; type < DDT_TYPES; type++) { 909219089Spjd for (enum ddt_class class = 0; class < DDT_CLASSES; class++) { 910219089Spjd /* 911219089Spjd * We can only do repair if there are multiple copies 912219089Spjd * of the block. For anything in the UNIQUE class, 913219089Spjd * there's definitely only one copy, so don't even try. 914219089Spjd */ 915219089Spjd if (class != DDT_CLASS_UNIQUE && 916219089Spjd ddt_object_lookup(ddt, type, class, dde) == 0) 917219089Spjd return (dde); 918219089Spjd } 919219089Spjd } 920219089Spjd 921219089Spjd bzero(dde->dde_phys, sizeof (dde->dde_phys)); 922219089Spjd 923219089Spjd return (dde); 924219089Spjd} 925219089Spjd 926219089Spjdvoid 927219089Spjdddt_repair_done(ddt_t *ddt, ddt_entry_t *dde) 928219089Spjd{ 929219089Spjd avl_index_t where; 930219089Spjd 931219089Spjd ddt_enter(ddt); 932219089Spjd 933219089Spjd if (dde->dde_repair_data != NULL && spa_writeable(ddt->ddt_spa) && 934219089Spjd avl_find(&ddt->ddt_repair_tree, dde, &where) == NULL) 935219089Spjd avl_insert(&ddt->ddt_repair_tree, dde, where); 936219089Spjd else 937219089Spjd ddt_free(dde); 938219089Spjd 939219089Spjd ddt_exit(ddt); 940219089Spjd} 941219089Spjd 942219089Spjdstatic void 943219089Spjdddt_repair_entry_done(zio_t *zio) 944219089Spjd{ 945219089Spjd ddt_entry_t *rdde = zio->io_private; 946219089Spjd 947219089Spjd ddt_free(rdde); 948219089Spjd} 949219089Spjd 950219089Spjdstatic void 951219089Spjdddt_repair_entry(ddt_t *ddt, ddt_entry_t *dde, ddt_entry_t *rdde, zio_t *rio) 952219089Spjd{ 953219089Spjd ddt_phys_t *ddp = dde->dde_phys; 954219089Spjd ddt_phys_t *rddp = rdde->dde_phys; 955219089Spjd ddt_key_t *ddk = &dde->dde_key; 956219089Spjd ddt_key_t *rddk = &rdde->dde_key; 957219089Spjd zio_t *zio; 958219089Spjd blkptr_t blk; 959219089Spjd 960219089Spjd zio = zio_null(rio, rio->io_spa, NULL, 961219089Spjd ddt_repair_entry_done, rdde, rio->io_flags); 962219089Spjd 963219089Spjd for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++, rddp++) { 964219089Spjd if (ddp->ddp_phys_birth == 0 || 965219089Spjd ddp->ddp_phys_birth != rddp->ddp_phys_birth || 966219089Spjd bcmp(ddp->ddp_dva, rddp->ddp_dva, sizeof (ddp->ddp_dva))) 967219089Spjd continue; 968219089Spjd ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk); 969219089Spjd zio_nowait(zio_rewrite(zio, zio->io_spa, 0, &blk, 970219089Spjd rdde->dde_repair_data, DDK_GET_PSIZE(rddk), NULL, NULL, 971219089Spjd ZIO_PRIORITY_SYNC_WRITE, ZIO_DDT_CHILD_FLAGS(zio), NULL)); 972219089Spjd } 973219089Spjd 974219089Spjd zio_nowait(zio); 975219089Spjd} 976219089Spjd 977219089Spjdstatic void 978219089Spjdddt_repair_table(ddt_t *ddt, zio_t *rio) 979219089Spjd{ 980219089Spjd spa_t *spa = ddt->ddt_spa; 981219089Spjd ddt_entry_t *dde, *rdde_next, *rdde; 982219089Spjd avl_tree_t *t = &ddt->ddt_repair_tree; 983219089Spjd blkptr_t blk; 984219089Spjd 985219089Spjd if (spa_sync_pass(spa) > 1) 986219089Spjd return; 987219089Spjd 988219089Spjd ddt_enter(ddt); 989219089Spjd for (rdde = avl_first(t); rdde != NULL; rdde = rdde_next) { 990219089Spjd rdde_next = AVL_NEXT(t, rdde); 991219089Spjd avl_remove(&ddt->ddt_repair_tree, rdde); 992219089Spjd ddt_exit(ddt); 993219089Spjd ddt_bp_create(ddt->ddt_checksum, &rdde->dde_key, NULL, &blk); 994219089Spjd dde = ddt_repair_start(ddt, &blk); 995219089Spjd ddt_repair_entry(ddt, dde, rdde, rio); 996219089Spjd ddt_repair_done(ddt, dde); 997219089Spjd ddt_enter(ddt); 998219089Spjd } 999219089Spjd ddt_exit(ddt); 1000219089Spjd} 1001219089Spjd 1002219089Spjdstatic void 1003219089Spjdddt_sync_entry(ddt_t *ddt, ddt_entry_t *dde, dmu_tx_t *tx, uint64_t txg) 1004219089Spjd{ 1005219089Spjd dsl_pool_t *dp = ddt->ddt_spa->spa_dsl_pool; 1006219089Spjd ddt_phys_t *ddp = dde->dde_phys; 1007219089Spjd ddt_key_t *ddk = &dde->dde_key; 1008219089Spjd enum ddt_type otype = dde->dde_type; 1009219089Spjd enum ddt_type ntype = DDT_TYPE_CURRENT; 1010219089Spjd enum ddt_class oclass = dde->dde_class; 1011219089Spjd enum ddt_class nclass; 1012219089Spjd uint64_t total_refcnt = 0; 1013219089Spjd 1014219089Spjd ASSERT(dde->dde_loaded); 1015219089Spjd ASSERT(!dde->dde_loading); 1016219089Spjd 1017219089Spjd for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) { 1018219089Spjd ASSERT(dde->dde_lead_zio[p] == NULL); 1019219089Spjd ASSERT((int64_t)ddp->ddp_refcnt >= 0); 1020219089Spjd if (ddp->ddp_phys_birth == 0) { 1021219089Spjd ASSERT(ddp->ddp_refcnt == 0); 1022219089Spjd continue; 1023219089Spjd } 1024219089Spjd if (p == DDT_PHYS_DITTO) { 1025219089Spjd if (ddt_ditto_copies_needed(ddt, dde, NULL) == 0) 1026219089Spjd ddt_phys_free(ddt, ddk, ddp, txg); 1027219089Spjd continue; 1028219089Spjd } 1029219089Spjd if (ddp->ddp_refcnt == 0) 1030219089Spjd ddt_phys_free(ddt, ddk, ddp, txg); 1031219089Spjd total_refcnt += ddp->ddp_refcnt; 1032219089Spjd } 1033219089Spjd 1034219089Spjd if (dde->dde_phys[DDT_PHYS_DITTO].ddp_phys_birth != 0) 1035219089Spjd nclass = DDT_CLASS_DITTO; 1036219089Spjd else if (total_refcnt > 1) 1037219089Spjd nclass = DDT_CLASS_DUPLICATE; 1038219089Spjd else 1039219089Spjd nclass = DDT_CLASS_UNIQUE; 1040219089Spjd 1041219089Spjd if (otype != DDT_TYPES && 1042219089Spjd (otype != ntype || oclass != nclass || total_refcnt == 0)) { 1043219089Spjd VERIFY(ddt_object_remove(ddt, otype, oclass, dde, tx) == 0); 1044219089Spjd ASSERT(ddt_object_lookup(ddt, otype, oclass, dde) == ENOENT); 1045219089Spjd } 1046219089Spjd 1047219089Spjd if (total_refcnt != 0) { 1048219089Spjd dde->dde_type = ntype; 1049219089Spjd dde->dde_class = nclass; 1050219089Spjd ddt_stat_update(ddt, dde, 0); 1051219089Spjd if (!ddt_object_exists(ddt, ntype, nclass)) 1052219089Spjd ddt_object_create(ddt, ntype, nclass, tx); 1053219089Spjd VERIFY(ddt_object_update(ddt, ntype, nclass, dde, tx) == 0); 1054219089Spjd 1055219089Spjd /* 1056219089Spjd * If the class changes, the order that we scan this bp 1057219089Spjd * changes. If it decreases, we could miss it, so 1058219089Spjd * scan it right now. (This covers both class changing 1059219089Spjd * while we are doing ddt_walk(), and when we are 1060219089Spjd * traversing.) 1061219089Spjd */ 1062219089Spjd if (nclass < oclass) { 1063219089Spjd dsl_scan_ddt_entry(dp->dp_scan, 1064219089Spjd ddt->ddt_checksum, dde, tx); 1065219089Spjd } 1066219089Spjd } 1067219089Spjd} 1068219089Spjd 1069219089Spjdstatic void 1070219089Spjdddt_sync_table(ddt_t *ddt, dmu_tx_t *tx, uint64_t txg) 1071219089Spjd{ 1072219089Spjd spa_t *spa = ddt->ddt_spa; 1073219089Spjd ddt_entry_t *dde; 1074219089Spjd void *cookie = NULL; 1075219089Spjd 1076219089Spjd if (avl_numnodes(&ddt->ddt_tree) == 0) 1077219089Spjd return; 1078219089Spjd 1079219089Spjd ASSERT(spa->spa_uberblock.ub_version >= SPA_VERSION_DEDUP); 1080219089Spjd 1081219089Spjd if (spa->spa_ddt_stat_object == 0) { 1082243674Smm spa->spa_ddt_stat_object = zap_create_link(ddt->ddt_os, 1083243674Smm DMU_OT_DDT_STATS, DMU_POOL_DIRECTORY_OBJECT, 1084243674Smm DMU_POOL_DDT_STATS, tx); 1085219089Spjd } 1086219089Spjd 1087219089Spjd while ((dde = avl_destroy_nodes(&ddt->ddt_tree, &cookie)) != NULL) { 1088219089Spjd ddt_sync_entry(ddt, dde, tx, txg); 1089219089Spjd ddt_free(dde); 1090219089Spjd } 1091219089Spjd 1092219089Spjd for (enum ddt_type type = 0; type < DDT_TYPES; type++) { 1093246574Sdelphij uint64_t add, count = 0; 1094219089Spjd for (enum ddt_class class = 0; class < DDT_CLASSES; class++) { 1095219089Spjd if (ddt_object_exists(ddt, type, class)) { 1096219089Spjd ddt_object_sync(ddt, type, class, tx); 1097246574Sdelphij VERIFY(ddt_object_count(ddt, type, class, 1098246574Sdelphij &add) == 0); 1099246574Sdelphij count += add; 1100219089Spjd } 1101219089Spjd } 1102219089Spjd for (enum ddt_class class = 0; class < DDT_CLASSES; class++) { 1103219089Spjd if (count == 0 && ddt_object_exists(ddt, type, class)) 1104219089Spjd ddt_object_destroy(ddt, type, class, tx); 1105219089Spjd } 1106219089Spjd } 1107219089Spjd 1108219089Spjd bcopy(ddt->ddt_histogram, &ddt->ddt_histogram_cache, 1109219089Spjd sizeof (ddt->ddt_histogram)); 1110219089Spjd} 1111219089Spjd 1112219089Spjdvoid 1113219089Spjdddt_sync(spa_t *spa, uint64_t txg) 1114219089Spjd{ 1115219089Spjd dmu_tx_t *tx; 1116219089Spjd zio_t *rio = zio_root(spa, NULL, NULL, 1117219089Spjd ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE); 1118219089Spjd 1119219089Spjd ASSERT(spa_syncing_txg(spa) == txg); 1120219089Spjd 1121219089Spjd tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg); 1122219089Spjd 1123219089Spjd for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) { 1124219089Spjd ddt_t *ddt = spa->spa_ddt[c]; 1125219089Spjd if (ddt == NULL) 1126219089Spjd continue; 1127219089Spjd ddt_sync_table(ddt, tx, txg); 1128219089Spjd ddt_repair_table(ddt, rio); 1129219089Spjd } 1130219089Spjd 1131219089Spjd (void) zio_wait(rio); 1132219089Spjd 1133219089Spjd dmu_tx_commit(tx); 1134219089Spjd} 1135219089Spjd 1136219089Spjdint 1137219089Spjdddt_walk(spa_t *spa, ddt_bookmark_t *ddb, ddt_entry_t *dde) 1138219089Spjd{ 1139219089Spjd do { 1140219089Spjd do { 1141219089Spjd do { 1142219089Spjd ddt_t *ddt = spa->spa_ddt[ddb->ddb_checksum]; 1143219089Spjd int error = ENOENT; 1144219089Spjd if (ddt_object_exists(ddt, ddb->ddb_type, 1145219089Spjd ddb->ddb_class)) { 1146219089Spjd error = ddt_object_walk(ddt, 1147219089Spjd ddb->ddb_type, ddb->ddb_class, 1148219089Spjd &ddb->ddb_cursor, dde); 1149219089Spjd } 1150219089Spjd dde->dde_type = ddb->ddb_type; 1151219089Spjd dde->dde_class = ddb->ddb_class; 1152219089Spjd if (error == 0) 1153219089Spjd return (0); 1154219089Spjd if (error != ENOENT) 1155219089Spjd return (error); 1156219089Spjd ddb->ddb_cursor = 0; 1157219089Spjd } while (++ddb->ddb_checksum < ZIO_CHECKSUM_FUNCTIONS); 1158219089Spjd ddb->ddb_checksum = 0; 1159219089Spjd } while (++ddb->ddb_type < DDT_TYPES); 1160219089Spjd ddb->ddb_type = 0; 1161219089Spjd } while (++ddb->ddb_class < DDT_CLASSES); 1162219089Spjd 1163249643Smm return (SET_ERROR(ENOENT)); 1164219089Spjd} 1165