1219089Spjd/* 2219089Spjd * CDDL HEADER START 3219089Spjd * 4219089Spjd * The contents of this file are subject to the terms of the 5219089Spjd * Common Development and Distribution License (the "License"). 6219089Spjd * You may not use this file except in compliance with the License. 7219089Spjd * 8219089Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9219089Spjd * or http://www.opensolaris.org/os/licensing. 10219089Spjd * See the License for the specific language governing permissions 11219089Spjd * and limitations under the License. 12219089Spjd * 13219089Spjd * When distributing Covered Code, include this CDDL HEADER in each 14219089Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15219089Spjd * If applicable, add the following below this CDDL HEADER, with the 16219089Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17219089Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18219089Spjd * 19219089Spjd * CDDL HEADER END 20219089Spjd */ 21219089Spjd 22219089Spjd/* 23219089Spjd * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. 24290757Smav * Copyright (c) 2012, 2015 by Delphix. All rights reserved. 25219089Spjd */ 26219089Spjd 27219089Spjd#include <sys/zfs_context.h> 28219089Spjd#include <sys/spa.h> 29219089Spjd#include <sys/spa_impl.h> 30219089Spjd#include <sys/zio.h> 31219089Spjd#include <sys/ddt.h> 32219089Spjd#include <sys/zap.h> 33219089Spjd#include <sys/dmu_tx.h> 34219089Spjd#include <sys/arc.h> 35219089Spjd#include <sys/dsl_pool.h> 36219089Spjd#include <sys/zio_checksum.h> 37219089Spjd#include <sys/zio_compress.h> 38219089Spjd#include <sys/dsl_scan.h> 39219089Spjd 40219089Spjd/* 41219089Spjd * Enable/disable prefetching of dedup-ed blocks which are going to be freed. 42219089Spjd */ 43219089Spjdint zfs_dedup_prefetch = 1; 44219089Spjd 45219089SpjdSYSCTL_DECL(_vfs_zfs); 46219089SpjdSYSCTL_NODE(_vfs_zfs, OID_AUTO, dedup, CTLFLAG_RW, 0, "ZFS DEDUP"); 47219089SpjdTUNABLE_INT("vfs.zfs.dedup.prefetch", &zfs_dedup_prefetch); 48219089SpjdSYSCTL_INT(_vfs_zfs_dedup, OID_AUTO, prefetch, CTLFLAG_RW, &zfs_dedup_prefetch, 49219089Spjd 0, "Enable/disable prefetching of dedup-ed blocks which are going to be freed"); 50219089Spjd 51219089Spjdstatic const ddt_ops_t *ddt_ops[DDT_TYPES] = { 52219089Spjd &ddt_zap_ops, 53219089Spjd}; 54219089Spjd 55219089Spjdstatic const char *ddt_class_name[DDT_CLASSES] = { 56219089Spjd "ditto", 57219089Spjd "duplicate", 58219089Spjd "unique", 59219089Spjd}; 60219089Spjd 61219089Spjdstatic void 62219089Spjdddt_object_create(ddt_t *ddt, enum ddt_type type, enum ddt_class class, 63219089Spjd dmu_tx_t *tx) 64219089Spjd{ 65219089Spjd spa_t *spa = ddt->ddt_spa; 66219089Spjd objset_t *os = ddt->ddt_os; 67219089Spjd uint64_t *objectp = &ddt->ddt_object[type][class]; 68290757Smav boolean_t prehash = zio_checksum_table[ddt->ddt_checksum].ci_flags & 69290757Smav ZCHECKSUM_FLAG_DEDUP; 70219089Spjd char name[DDT_NAMELEN]; 71219089Spjd 72219089Spjd ddt_object_name(ddt, type, class, name); 73219089Spjd 74219089Spjd ASSERT(*objectp == 0); 75219089Spjd VERIFY(ddt_ops[type]->ddt_op_create(os, objectp, tx, prehash) == 0); 76219089Spjd ASSERT(*objectp != 0); 77219089Spjd 78219089Spjd VERIFY(zap_add(os, DMU_POOL_DIRECTORY_OBJECT, name, 79219089Spjd sizeof (uint64_t), 1, objectp, tx) == 0); 80219089Spjd 81219089Spjd VERIFY(zap_add(os, spa->spa_ddt_stat_object, name, 82219089Spjd sizeof (uint64_t), sizeof (ddt_histogram_t) / sizeof (uint64_t), 83219089Spjd &ddt->ddt_histogram[type][class], tx) == 0); 84219089Spjd} 85219089Spjd 86219089Spjdstatic void 87219089Spjdddt_object_destroy(ddt_t *ddt, enum ddt_type type, enum ddt_class class, 88219089Spjd dmu_tx_t *tx) 89219089Spjd{ 90219089Spjd spa_t *spa = ddt->ddt_spa; 91219089Spjd objset_t *os = ddt->ddt_os; 92219089Spjd uint64_t *objectp = &ddt->ddt_object[type][class]; 93245264Sdelphij uint64_t count; 94219089Spjd char name[DDT_NAMELEN]; 95219089Spjd 96219089Spjd ddt_object_name(ddt, type, class, name); 97219089Spjd 98219089Spjd ASSERT(*objectp != 0); 99245264Sdelphij VERIFY(ddt_object_count(ddt, type, class, &count) == 0 && count == 0); 100219089Spjd ASSERT(ddt_histogram_empty(&ddt->ddt_histogram[type][class])); 101219089Spjd VERIFY(zap_remove(os, DMU_POOL_DIRECTORY_OBJECT, name, tx) == 0); 102219089Spjd VERIFY(zap_remove(os, spa->spa_ddt_stat_object, name, tx) == 0); 103219089Spjd VERIFY(ddt_ops[type]->ddt_op_destroy(os, *objectp, tx) == 0); 104219089Spjd bzero(&ddt->ddt_object_stats[type][class], sizeof (ddt_object_t)); 105219089Spjd 106219089Spjd *objectp = 0; 107219089Spjd} 108219089Spjd 109219089Spjdstatic int 110219089Spjdddt_object_load(ddt_t *ddt, enum ddt_type type, enum ddt_class class) 111219089Spjd{ 112219089Spjd ddt_object_t *ddo = &ddt->ddt_object_stats[type][class]; 113219089Spjd dmu_object_info_t doi; 114245264Sdelphij uint64_t count; 115219089Spjd char name[DDT_NAMELEN]; 116219089Spjd int error; 117219089Spjd 118219089Spjd ddt_object_name(ddt, type, class, name); 119219089Spjd 120219089Spjd error = zap_lookup(ddt->ddt_os, DMU_POOL_DIRECTORY_OBJECT, name, 121219089Spjd sizeof (uint64_t), 1, &ddt->ddt_object[type][class]); 122219089Spjd 123263397Sdelphij if (error != 0) 124219089Spjd return (error); 125219089Spjd 126263397Sdelphij VERIFY0(zap_lookup(ddt->ddt_os, ddt->ddt_spa->spa_ddt_stat_object, name, 127219089Spjd sizeof (uint64_t), sizeof (ddt_histogram_t) / sizeof (uint64_t), 128263397Sdelphij &ddt->ddt_histogram[type][class])); 129219089Spjd 130219089Spjd /* 131219089Spjd * Seed the cached statistics. 132219089Spjd */ 133219089Spjd VERIFY(ddt_object_info(ddt, type, class, &doi) == 0); 134219089Spjd 135245264Sdelphij error = ddt_object_count(ddt, type, class, &count); 136245264Sdelphij if (error) 137245264Sdelphij return error; 138245264Sdelphij 139245264Sdelphij ddo->ddo_count = count; 140219089Spjd ddo->ddo_dspace = doi.doi_physical_blocks_512 << 9; 141219089Spjd ddo->ddo_mspace = doi.doi_fill_count * doi.doi_data_block_size; 142219089Spjd 143263397Sdelphij return (0); 144219089Spjd} 145219089Spjd 146219089Spjdstatic void 147219089Spjdddt_object_sync(ddt_t *ddt, enum ddt_type type, enum ddt_class class, 148219089Spjd dmu_tx_t *tx) 149219089Spjd{ 150219089Spjd ddt_object_t *ddo = &ddt->ddt_object_stats[type][class]; 151219089Spjd dmu_object_info_t doi; 152245264Sdelphij uint64_t count; 153219089Spjd char name[DDT_NAMELEN]; 154219089Spjd 155219089Spjd ddt_object_name(ddt, type, class, name); 156219089Spjd 157219089Spjd VERIFY(zap_update(ddt->ddt_os, ddt->ddt_spa->spa_ddt_stat_object, name, 158219089Spjd sizeof (uint64_t), sizeof (ddt_histogram_t) / sizeof (uint64_t), 159219089Spjd &ddt->ddt_histogram[type][class], tx) == 0); 160219089Spjd 161219089Spjd /* 162219089Spjd * Cache DDT statistics; this is the only time they'll change. 163219089Spjd */ 164219089Spjd VERIFY(ddt_object_info(ddt, type, class, &doi) == 0); 165245264Sdelphij VERIFY(ddt_object_count(ddt, type, class, &count) == 0); 166219089Spjd 167245264Sdelphij ddo->ddo_count = count; 168219089Spjd ddo->ddo_dspace = doi.doi_physical_blocks_512 << 9; 169219089Spjd ddo->ddo_mspace = doi.doi_fill_count * doi.doi_data_block_size; 170219089Spjd} 171219089Spjd 172219089Spjdstatic int 173219089Spjdddt_object_lookup(ddt_t *ddt, enum ddt_type type, enum ddt_class class, 174219089Spjd ddt_entry_t *dde) 175219089Spjd{ 176219089Spjd if (!ddt_object_exists(ddt, type, class)) 177249195Smm return (SET_ERROR(ENOENT)); 178219089Spjd 179219089Spjd return (ddt_ops[type]->ddt_op_lookup(ddt->ddt_os, 180219089Spjd ddt->ddt_object[type][class], dde)); 181219089Spjd} 182219089Spjd 183219089Spjdstatic void 184219089Spjdddt_object_prefetch(ddt_t *ddt, enum ddt_type type, enum ddt_class class, 185219089Spjd ddt_entry_t *dde) 186219089Spjd{ 187219089Spjd if (!ddt_object_exists(ddt, type, class)) 188219089Spjd return; 189219089Spjd 190219089Spjd ddt_ops[type]->ddt_op_prefetch(ddt->ddt_os, 191219089Spjd ddt->ddt_object[type][class], dde); 192219089Spjd} 193219089Spjd 194219089Spjdint 195219089Spjdddt_object_update(ddt_t *ddt, enum ddt_type type, enum ddt_class class, 196219089Spjd ddt_entry_t *dde, dmu_tx_t *tx) 197219089Spjd{ 198219089Spjd ASSERT(ddt_object_exists(ddt, type, class)); 199219089Spjd 200219089Spjd return (ddt_ops[type]->ddt_op_update(ddt->ddt_os, 201219089Spjd ddt->ddt_object[type][class], dde, tx)); 202219089Spjd} 203219089Spjd 204219089Spjdstatic int 205219089Spjdddt_object_remove(ddt_t *ddt, enum ddt_type type, enum ddt_class class, 206219089Spjd ddt_entry_t *dde, dmu_tx_t *tx) 207219089Spjd{ 208219089Spjd ASSERT(ddt_object_exists(ddt, type, class)); 209219089Spjd 210219089Spjd return (ddt_ops[type]->ddt_op_remove(ddt->ddt_os, 211219089Spjd ddt->ddt_object[type][class], dde, tx)); 212219089Spjd} 213219089Spjd 214219089Spjdint 215219089Spjdddt_object_walk(ddt_t *ddt, enum ddt_type type, enum ddt_class class, 216219089Spjd uint64_t *walk, ddt_entry_t *dde) 217219089Spjd{ 218219089Spjd ASSERT(ddt_object_exists(ddt, type, class)); 219219089Spjd 220219089Spjd return (ddt_ops[type]->ddt_op_walk(ddt->ddt_os, 221219089Spjd ddt->ddt_object[type][class], dde, walk)); 222219089Spjd} 223219089Spjd 224245264Sdelphijint 225245264Sdelphijddt_object_count(ddt_t *ddt, enum ddt_type type, enum ddt_class class, uint64_t *count) 226219089Spjd{ 227219089Spjd ASSERT(ddt_object_exists(ddt, type, class)); 228219089Spjd 229219089Spjd return (ddt_ops[type]->ddt_op_count(ddt->ddt_os, 230245264Sdelphij ddt->ddt_object[type][class], count)); 231219089Spjd} 232219089Spjd 233219089Spjdint 234219089Spjdddt_object_info(ddt_t *ddt, enum ddt_type type, enum ddt_class class, 235219089Spjd dmu_object_info_t *doi) 236219089Spjd{ 237219089Spjd if (!ddt_object_exists(ddt, type, class)) 238249195Smm return (SET_ERROR(ENOENT)); 239219089Spjd 240219089Spjd return (dmu_object_info(ddt->ddt_os, ddt->ddt_object[type][class], 241219089Spjd doi)); 242219089Spjd} 243219089Spjd 244219089Spjdboolean_t 245219089Spjdddt_object_exists(ddt_t *ddt, enum ddt_type type, enum ddt_class class) 246219089Spjd{ 247219089Spjd return (!!ddt->ddt_object[type][class]); 248219089Spjd} 249219089Spjd 250219089Spjdvoid 251219089Spjdddt_object_name(ddt_t *ddt, enum ddt_type type, enum ddt_class class, 252219089Spjd char *name) 253219089Spjd{ 254219089Spjd (void) sprintf(name, DMU_POOL_DDT, 255219089Spjd zio_checksum_table[ddt->ddt_checksum].ci_name, 256219089Spjd ddt_ops[type]->ddt_op_name, ddt_class_name[class]); 257219089Spjd} 258219089Spjd 259219089Spjdvoid 260219089Spjdddt_bp_fill(const ddt_phys_t *ddp, blkptr_t *bp, uint64_t txg) 261219089Spjd{ 262219089Spjd ASSERT(txg != 0); 263219089Spjd 264219089Spjd for (int d = 0; d < SPA_DVAS_PER_BP; d++) 265219089Spjd bp->blk_dva[d] = ddp->ddp_dva[d]; 266219089Spjd BP_SET_BIRTH(bp, txg, ddp->ddp_phys_birth); 267219089Spjd} 268219089Spjd 269219089Spjdvoid 270219089Spjdddt_bp_create(enum zio_checksum checksum, 271219089Spjd const ddt_key_t *ddk, const ddt_phys_t *ddp, blkptr_t *bp) 272219089Spjd{ 273219089Spjd BP_ZERO(bp); 274219089Spjd 275219089Spjd if (ddp != NULL) 276219089Spjd ddt_bp_fill(ddp, bp, ddp->ddp_phys_birth); 277219089Spjd 278219089Spjd bp->blk_cksum = ddk->ddk_cksum; 279219089Spjd bp->blk_fill = 1; 280219089Spjd 281219089Spjd BP_SET_LSIZE(bp, DDK_GET_LSIZE(ddk)); 282219089Spjd BP_SET_PSIZE(bp, DDK_GET_PSIZE(ddk)); 283219089Spjd BP_SET_COMPRESS(bp, DDK_GET_COMPRESS(ddk)); 284219089Spjd BP_SET_CHECKSUM(bp, checksum); 285219089Spjd BP_SET_TYPE(bp, DMU_OT_DEDUP); 286219089Spjd BP_SET_LEVEL(bp, 0); 287219089Spjd BP_SET_DEDUP(bp, 0); 288219089Spjd BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER); 289219089Spjd} 290219089Spjd 291219089Spjdvoid 292219089Spjdddt_key_fill(ddt_key_t *ddk, const blkptr_t *bp) 293219089Spjd{ 294219089Spjd ddk->ddk_cksum = bp->blk_cksum; 295219089Spjd ddk->ddk_prop = 0; 296219089Spjd 297219089Spjd DDK_SET_LSIZE(ddk, BP_GET_LSIZE(bp)); 298219089Spjd DDK_SET_PSIZE(ddk, BP_GET_PSIZE(bp)); 299219089Spjd DDK_SET_COMPRESS(ddk, BP_GET_COMPRESS(bp)); 300219089Spjd} 301219089Spjd 302219089Spjdvoid 303219089Spjdddt_phys_fill(ddt_phys_t *ddp, const blkptr_t *bp) 304219089Spjd{ 305219089Spjd ASSERT(ddp->ddp_phys_birth == 0); 306219089Spjd 307219089Spjd for (int d = 0; d < SPA_DVAS_PER_BP; d++) 308219089Spjd ddp->ddp_dva[d] = bp->blk_dva[d]; 309219089Spjd ddp->ddp_phys_birth = BP_PHYSICAL_BIRTH(bp); 310219089Spjd} 311219089Spjd 312219089Spjdvoid 313219089Spjdddt_phys_clear(ddt_phys_t *ddp) 314219089Spjd{ 315219089Spjd bzero(ddp, sizeof (*ddp)); 316219089Spjd} 317219089Spjd 318219089Spjdvoid 319219089Spjdddt_phys_addref(ddt_phys_t *ddp) 320219089Spjd{ 321219089Spjd ddp->ddp_refcnt++; 322219089Spjd} 323219089Spjd 324219089Spjdvoid 325219089Spjdddt_phys_decref(ddt_phys_t *ddp) 326219089Spjd{ 327219089Spjd ASSERT((int64_t)ddp->ddp_refcnt > 0); 328219089Spjd ddp->ddp_refcnt--; 329219089Spjd} 330219089Spjd 331219089Spjdvoid 332219089Spjdddt_phys_free(ddt_t *ddt, ddt_key_t *ddk, ddt_phys_t *ddp, uint64_t txg) 333219089Spjd{ 334219089Spjd blkptr_t blk; 335219089Spjd 336219089Spjd ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk); 337219089Spjd ddt_phys_clear(ddp); 338219089Spjd zio_free(ddt->ddt_spa, txg, &blk); 339219089Spjd} 340219089Spjd 341219089Spjdddt_phys_t * 342219089Spjdddt_phys_select(const ddt_entry_t *dde, const blkptr_t *bp) 343219089Spjd{ 344219089Spjd ddt_phys_t *ddp = (ddt_phys_t *)dde->dde_phys; 345219089Spjd 346219089Spjd for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) { 347219089Spjd if (DVA_EQUAL(BP_IDENTITY(bp), &ddp->ddp_dva[0]) && 348219089Spjd BP_PHYSICAL_BIRTH(bp) == ddp->ddp_phys_birth) 349219089Spjd return (ddp); 350219089Spjd } 351219089Spjd return (NULL); 352219089Spjd} 353219089Spjd 354219089Spjduint64_t 355219089Spjdddt_phys_total_refcnt(const ddt_entry_t *dde) 356219089Spjd{ 357219089Spjd uint64_t refcnt = 0; 358219089Spjd 359219089Spjd for (int p = DDT_PHYS_SINGLE; p <= DDT_PHYS_TRIPLE; p++) 360219089Spjd refcnt += dde->dde_phys[p].ddp_refcnt; 361219089Spjd 362219089Spjd return (refcnt); 363219089Spjd} 364219089Spjd 365219089Spjdstatic void 366219089Spjdddt_stat_generate(ddt_t *ddt, ddt_entry_t *dde, ddt_stat_t *dds) 367219089Spjd{ 368219089Spjd spa_t *spa = ddt->ddt_spa; 369219089Spjd ddt_phys_t *ddp = dde->dde_phys; 370219089Spjd ddt_key_t *ddk = &dde->dde_key; 371219089Spjd uint64_t lsize = DDK_GET_LSIZE(ddk); 372219089Spjd uint64_t psize = DDK_GET_PSIZE(ddk); 373219089Spjd 374219089Spjd bzero(dds, sizeof (*dds)); 375219089Spjd 376219089Spjd for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) { 377219089Spjd uint64_t dsize = 0; 378219089Spjd uint64_t refcnt = ddp->ddp_refcnt; 379219089Spjd 380219089Spjd if (ddp->ddp_phys_birth == 0) 381219089Spjd continue; 382219089Spjd 383219089Spjd for (int d = 0; d < SPA_DVAS_PER_BP; d++) 384219089Spjd dsize += dva_get_dsize_sync(spa, &ddp->ddp_dva[d]); 385219089Spjd 386219089Spjd dds->dds_blocks += 1; 387219089Spjd dds->dds_lsize += lsize; 388219089Spjd dds->dds_psize += psize; 389219089Spjd dds->dds_dsize += dsize; 390219089Spjd 391219089Spjd dds->dds_ref_blocks += refcnt; 392219089Spjd dds->dds_ref_lsize += lsize * refcnt; 393219089Spjd dds->dds_ref_psize += psize * refcnt; 394219089Spjd dds->dds_ref_dsize += dsize * refcnt; 395219089Spjd } 396219089Spjd} 397219089Spjd 398219089Spjdvoid 399219089Spjdddt_stat_add(ddt_stat_t *dst, const ddt_stat_t *src, uint64_t neg) 400219089Spjd{ 401219089Spjd const uint64_t *s = (const uint64_t *)src; 402219089Spjd uint64_t *d = (uint64_t *)dst; 403219089Spjd uint64_t *d_end = (uint64_t *)(dst + 1); 404219089Spjd 405219089Spjd ASSERT(neg == 0 || neg == -1ULL); /* add or subtract */ 406219089Spjd 407219089Spjd while (d < d_end) 408219089Spjd *d++ += (*s++ ^ neg) - neg; 409219089Spjd} 410219089Spjd 411219089Spjdstatic void 412219089Spjdddt_stat_update(ddt_t *ddt, ddt_entry_t *dde, uint64_t neg) 413219089Spjd{ 414219089Spjd ddt_stat_t dds; 415219089Spjd ddt_histogram_t *ddh; 416219089Spjd int bucket; 417219089Spjd 418219089Spjd ddt_stat_generate(ddt, dde, &dds); 419219089Spjd 420265740Sdelphij bucket = highbit64(dds.dds_ref_blocks) - 1; 421219089Spjd ASSERT(bucket >= 0); 422219089Spjd 423219089Spjd ddh = &ddt->ddt_histogram[dde->dde_type][dde->dde_class]; 424219089Spjd 425219089Spjd ddt_stat_add(&ddh->ddh_stat[bucket], &dds, neg); 426219089Spjd} 427219089Spjd 428219089Spjdvoid 429219089Spjdddt_histogram_add(ddt_histogram_t *dst, const ddt_histogram_t *src) 430219089Spjd{ 431219089Spjd for (int h = 0; h < 64; h++) 432219089Spjd ddt_stat_add(&dst->ddh_stat[h], &src->ddh_stat[h], 0); 433219089Spjd} 434219089Spjd 435219089Spjdvoid 436219089Spjdddt_histogram_stat(ddt_stat_t *dds, const ddt_histogram_t *ddh) 437219089Spjd{ 438219089Spjd bzero(dds, sizeof (*dds)); 439219089Spjd 440219089Spjd for (int h = 0; h < 64; h++) 441219089Spjd ddt_stat_add(dds, &ddh->ddh_stat[h], 0); 442219089Spjd} 443219089Spjd 444219089Spjdboolean_t 445219089Spjdddt_histogram_empty(const ddt_histogram_t *ddh) 446219089Spjd{ 447219089Spjd const uint64_t *s = (const uint64_t *)ddh; 448219089Spjd const uint64_t *s_end = (const uint64_t *)(ddh + 1); 449219089Spjd 450219089Spjd while (s < s_end) 451219089Spjd if (*s++ != 0) 452219089Spjd return (B_FALSE); 453219089Spjd 454219089Spjd return (B_TRUE); 455219089Spjd} 456219089Spjd 457219089Spjdvoid 458219089Spjdddt_get_dedup_object_stats(spa_t *spa, ddt_object_t *ddo_total) 459219089Spjd{ 460219089Spjd /* Sum the statistics we cached in ddt_object_sync(). */ 461219089Spjd for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) { 462219089Spjd ddt_t *ddt = spa->spa_ddt[c]; 463219089Spjd for (enum ddt_type type = 0; type < DDT_TYPES; type++) { 464219089Spjd for (enum ddt_class class = 0; class < DDT_CLASSES; 465219089Spjd class++) { 466219089Spjd ddt_object_t *ddo = 467219089Spjd &ddt->ddt_object_stats[type][class]; 468219089Spjd ddo_total->ddo_count += ddo->ddo_count; 469219089Spjd ddo_total->ddo_dspace += ddo->ddo_dspace; 470219089Spjd ddo_total->ddo_mspace += ddo->ddo_mspace; 471219089Spjd } 472219089Spjd } 473219089Spjd } 474219089Spjd 475219089Spjd /* ... and compute the averages. */ 476219089Spjd if (ddo_total->ddo_count != 0) { 477219089Spjd ddo_total->ddo_dspace /= ddo_total->ddo_count; 478219089Spjd ddo_total->ddo_mspace /= ddo_total->ddo_count; 479219089Spjd } 480219089Spjd} 481219089Spjd 482219089Spjdvoid 483219089Spjdddt_get_dedup_histogram(spa_t *spa, ddt_histogram_t *ddh) 484219089Spjd{ 485219089Spjd for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) { 486219089Spjd ddt_t *ddt = spa->spa_ddt[c]; 487219089Spjd for (enum ddt_type type = 0; type < DDT_TYPES; type++) { 488219089Spjd for (enum ddt_class class = 0; class < DDT_CLASSES; 489219089Spjd class++) { 490219089Spjd ddt_histogram_add(ddh, 491219089Spjd &ddt->ddt_histogram_cache[type][class]); 492219089Spjd } 493219089Spjd } 494219089Spjd } 495219089Spjd} 496219089Spjd 497219089Spjdvoid 498219089Spjdddt_get_dedup_stats(spa_t *spa, ddt_stat_t *dds_total) 499219089Spjd{ 500219089Spjd ddt_histogram_t *ddh_total; 501219089Spjd 502219089Spjd ddh_total = kmem_zalloc(sizeof (ddt_histogram_t), KM_SLEEP); 503219089Spjd ddt_get_dedup_histogram(spa, ddh_total); 504219089Spjd ddt_histogram_stat(dds_total, ddh_total); 505219089Spjd kmem_free(ddh_total, sizeof (ddt_histogram_t)); 506219089Spjd} 507219089Spjd 508219089Spjduint64_t 509219089Spjdddt_get_dedup_dspace(spa_t *spa) 510219089Spjd{ 511219089Spjd ddt_stat_t dds_total = { 0 }; 512219089Spjd 513219089Spjd ddt_get_dedup_stats(spa, &dds_total); 514219089Spjd return (dds_total.dds_ref_dsize - dds_total.dds_dsize); 515219089Spjd} 516219089Spjd 517219089Spjduint64_t 518219089Spjdddt_get_pool_dedup_ratio(spa_t *spa) 519219089Spjd{ 520219089Spjd ddt_stat_t dds_total = { 0 }; 521219089Spjd 522219089Spjd ddt_get_dedup_stats(spa, &dds_total); 523219089Spjd if (dds_total.dds_dsize == 0) 524219089Spjd return (100); 525219089Spjd 526219089Spjd return (dds_total.dds_ref_dsize * 100 / dds_total.dds_dsize); 527219089Spjd} 528219089Spjd 529219089Spjdint 530219089Spjdddt_ditto_copies_needed(ddt_t *ddt, ddt_entry_t *dde, ddt_phys_t *ddp_willref) 531219089Spjd{ 532219089Spjd spa_t *spa = ddt->ddt_spa; 533219089Spjd uint64_t total_refcnt = 0; 534219089Spjd uint64_t ditto = spa->spa_dedup_ditto; 535219089Spjd int total_copies = 0; 536219089Spjd int desired_copies = 0; 537219089Spjd 538219089Spjd for (int p = DDT_PHYS_SINGLE; p <= DDT_PHYS_TRIPLE; p++) { 539219089Spjd ddt_phys_t *ddp = &dde->dde_phys[p]; 540219089Spjd zio_t *zio = dde->dde_lead_zio[p]; 541219089Spjd uint64_t refcnt = ddp->ddp_refcnt; /* committed refs */ 542219089Spjd if (zio != NULL) 543219089Spjd refcnt += zio->io_parent_count; /* pending refs */ 544219089Spjd if (ddp == ddp_willref) 545219089Spjd refcnt++; /* caller's ref */ 546219089Spjd if (refcnt != 0) { 547219089Spjd total_refcnt += refcnt; 548219089Spjd total_copies += p; 549219089Spjd } 550219089Spjd } 551219089Spjd 552219089Spjd if (ditto == 0 || ditto > UINT32_MAX) 553219089Spjd ditto = UINT32_MAX; 554219089Spjd 555219089Spjd if (total_refcnt >= 1) 556219089Spjd desired_copies++; 557219089Spjd if (total_refcnt >= ditto) 558219089Spjd desired_copies++; 559219089Spjd if (total_refcnt >= ditto * ditto) 560219089Spjd desired_copies++; 561219089Spjd 562219089Spjd return (MAX(desired_copies, total_copies) - total_copies); 563219089Spjd} 564219089Spjd 565219089Spjdint 566219089Spjdddt_ditto_copies_present(ddt_entry_t *dde) 567219089Spjd{ 568219089Spjd ddt_phys_t *ddp = &dde->dde_phys[DDT_PHYS_DITTO]; 569219089Spjd dva_t *dva = ddp->ddp_dva; 570219089Spjd int copies = 0 - DVA_GET_GANG(dva); 571219089Spjd 572219089Spjd for (int d = 0; d < SPA_DVAS_PER_BP; d++, dva++) 573219089Spjd if (DVA_IS_VALID(dva)) 574219089Spjd copies++; 575219089Spjd 576219089Spjd ASSERT(copies >= 0 && copies < SPA_DVAS_PER_BP); 577219089Spjd 578219089Spjd return (copies); 579219089Spjd} 580219089Spjd 581219089Spjdsize_t 582219089Spjdddt_compress(void *src, uchar_t *dst, size_t s_len, size_t d_len) 583219089Spjd{ 584219089Spjd uchar_t *version = dst++; 585219089Spjd int cpfunc = ZIO_COMPRESS_ZLE; 586219089Spjd zio_compress_info_t *ci = &zio_compress_table[cpfunc]; 587219089Spjd size_t c_len; 588219089Spjd 589219089Spjd ASSERT(d_len >= s_len + 1); /* no compression plus version byte */ 590219089Spjd 591219089Spjd c_len = ci->ci_compress(src, dst, s_len, d_len - 1, ci->ci_level); 592219089Spjd 593219089Spjd if (c_len == s_len) { 594219089Spjd cpfunc = ZIO_COMPRESS_OFF; 595219089Spjd bcopy(src, dst, s_len); 596219089Spjd } 597219089Spjd 598263397Sdelphij *version = cpfunc; 599263397Sdelphij /* CONSTCOND */ 600263397Sdelphij if (ZFS_HOST_BYTEORDER) 601263397Sdelphij *version |= DDT_COMPRESS_BYTEORDER_MASK; 602219089Spjd 603219089Spjd return (c_len + 1); 604219089Spjd} 605219089Spjd 606219089Spjdvoid 607219089Spjdddt_decompress(uchar_t *src, void *dst, size_t s_len, size_t d_len) 608219089Spjd{ 609219089Spjd uchar_t version = *src++; 610219089Spjd int cpfunc = version & DDT_COMPRESS_FUNCTION_MASK; 611219089Spjd zio_compress_info_t *ci = &zio_compress_table[cpfunc]; 612219089Spjd 613219089Spjd if (ci->ci_decompress != NULL) 614219089Spjd (void) ci->ci_decompress(src, dst, s_len, d_len, ci->ci_level); 615219089Spjd else 616219089Spjd bcopy(src, dst, d_len); 617219089Spjd 618263397Sdelphij if (((version & DDT_COMPRESS_BYTEORDER_MASK) != 0) != 619263397Sdelphij (ZFS_HOST_BYTEORDER != 0)) 620219089Spjd byteswap_uint64_array(dst, d_len); 621219089Spjd} 622219089Spjd 623219089Spjdddt_t * 624219089Spjdddt_select_by_checksum(spa_t *spa, enum zio_checksum c) 625219089Spjd{ 626219089Spjd return (spa->spa_ddt[c]); 627219089Spjd} 628219089Spjd 629219089Spjdddt_t * 630219089Spjdddt_select(spa_t *spa, const blkptr_t *bp) 631219089Spjd{ 632219089Spjd return (spa->spa_ddt[BP_GET_CHECKSUM(bp)]); 633219089Spjd} 634219089Spjd 635219089Spjdvoid 636219089Spjdddt_enter(ddt_t *ddt) 637219089Spjd{ 638219089Spjd mutex_enter(&ddt->ddt_lock); 639219089Spjd} 640219089Spjd 641219089Spjdvoid 642219089Spjdddt_exit(ddt_t *ddt) 643219089Spjd{ 644219089Spjd mutex_exit(&ddt->ddt_lock); 645219089Spjd} 646219089Spjd 647219089Spjdstatic ddt_entry_t * 648219089Spjdddt_alloc(const ddt_key_t *ddk) 649219089Spjd{ 650219089Spjd ddt_entry_t *dde; 651219089Spjd 652219089Spjd dde = kmem_zalloc(sizeof (ddt_entry_t), KM_SLEEP); 653219089Spjd cv_init(&dde->dde_cv, NULL, CV_DEFAULT, NULL); 654219089Spjd 655219089Spjd dde->dde_key = *ddk; 656219089Spjd 657219089Spjd return (dde); 658219089Spjd} 659219089Spjd 660219089Spjdstatic void 661219089Spjdddt_free(ddt_entry_t *dde) 662219089Spjd{ 663219089Spjd ASSERT(!dde->dde_loading); 664219089Spjd 665219089Spjd for (int p = 0; p < DDT_PHYS_TYPES; p++) 666219089Spjd ASSERT(dde->dde_lead_zio[p] == NULL); 667219089Spjd 668219089Spjd if (dde->dde_repair_data != NULL) 669219089Spjd zio_buf_free(dde->dde_repair_data, 670219089Spjd DDK_GET_PSIZE(&dde->dde_key)); 671219089Spjd 672219089Spjd cv_destroy(&dde->dde_cv); 673219089Spjd kmem_free(dde, sizeof (*dde)); 674219089Spjd} 675219089Spjd 676219089Spjdvoid 677219089Spjdddt_remove(ddt_t *ddt, ddt_entry_t *dde) 678219089Spjd{ 679219089Spjd ASSERT(MUTEX_HELD(&ddt->ddt_lock)); 680219089Spjd 681219089Spjd avl_remove(&ddt->ddt_tree, dde); 682219089Spjd ddt_free(dde); 683219089Spjd} 684219089Spjd 685219089Spjdddt_entry_t * 686219089Spjdddt_lookup(ddt_t *ddt, const blkptr_t *bp, boolean_t add) 687219089Spjd{ 688219089Spjd ddt_entry_t *dde, dde_search; 689219089Spjd enum ddt_type type; 690219089Spjd enum ddt_class class; 691219089Spjd avl_index_t where; 692219089Spjd int error; 693219089Spjd 694219089Spjd ASSERT(MUTEX_HELD(&ddt->ddt_lock)); 695219089Spjd 696219089Spjd ddt_key_fill(&dde_search.dde_key, bp); 697219089Spjd 698219089Spjd dde = avl_find(&ddt->ddt_tree, &dde_search, &where); 699219089Spjd if (dde == NULL) { 700219089Spjd if (!add) 701219089Spjd return (NULL); 702219089Spjd dde = ddt_alloc(&dde_search.dde_key); 703219089Spjd avl_insert(&ddt->ddt_tree, dde, where); 704219089Spjd } 705219089Spjd 706219089Spjd while (dde->dde_loading) 707219089Spjd cv_wait(&dde->dde_cv, &ddt->ddt_lock); 708219089Spjd 709219089Spjd if (dde->dde_loaded) 710219089Spjd return (dde); 711219089Spjd 712219089Spjd dde->dde_loading = B_TRUE; 713219089Spjd 714219089Spjd ddt_exit(ddt); 715219089Spjd 716219089Spjd error = ENOENT; 717219089Spjd 718219089Spjd for (type = 0; type < DDT_TYPES; type++) { 719219089Spjd for (class = 0; class < DDT_CLASSES; class++) { 720219089Spjd error = ddt_object_lookup(ddt, type, class, dde); 721219089Spjd if (error != ENOENT) 722219089Spjd break; 723219089Spjd } 724219089Spjd if (error != ENOENT) 725219089Spjd break; 726219089Spjd } 727219089Spjd 728219089Spjd ASSERT(error == 0 || error == ENOENT); 729219089Spjd 730219089Spjd ddt_enter(ddt); 731219089Spjd 732219089Spjd ASSERT(dde->dde_loaded == B_FALSE); 733219089Spjd ASSERT(dde->dde_loading == B_TRUE); 734219089Spjd 735219089Spjd dde->dde_type = type; /* will be DDT_TYPES if no entry found */ 736219089Spjd dde->dde_class = class; /* will be DDT_CLASSES if no entry found */ 737219089Spjd dde->dde_loaded = B_TRUE; 738219089Spjd dde->dde_loading = B_FALSE; 739219089Spjd 740219089Spjd if (error == 0) 741219089Spjd ddt_stat_update(ddt, dde, -1ULL); 742219089Spjd 743219089Spjd cv_broadcast(&dde->dde_cv); 744219089Spjd 745219089Spjd return (dde); 746219089Spjd} 747219089Spjd 748219089Spjdvoid 749219089Spjdddt_prefetch(spa_t *spa, const blkptr_t *bp) 750219089Spjd{ 751219089Spjd ddt_t *ddt; 752219089Spjd ddt_entry_t dde; 753219089Spjd 754219089Spjd if (!zfs_dedup_prefetch || bp == NULL || !BP_GET_DEDUP(bp)) 755219089Spjd return; 756219089Spjd 757219089Spjd /* 758219089Spjd * We only remove the DDT once all tables are empty and only 759219089Spjd * prefetch dedup blocks when there are entries in the DDT. 760219089Spjd * Thus no locking is required as the DDT can't disappear on us. 761219089Spjd */ 762219089Spjd ddt = ddt_select(spa, bp); 763219089Spjd ddt_key_fill(&dde.dde_key, bp); 764219089Spjd 765219089Spjd for (enum ddt_type type = 0; type < DDT_TYPES; type++) { 766219089Spjd for (enum ddt_class class = 0; class < DDT_CLASSES; class++) { 767219089Spjd ddt_object_prefetch(ddt, type, class, &dde); 768219089Spjd } 769219089Spjd } 770219089Spjd} 771219089Spjd 772219089Spjdint 773219089Spjdddt_entry_compare(const void *x1, const void *x2) 774219089Spjd{ 775219089Spjd const ddt_entry_t *dde1 = x1; 776219089Spjd const ddt_entry_t *dde2 = x2; 777219089Spjd const uint64_t *u1 = (const uint64_t *)&dde1->dde_key; 778219089Spjd const uint64_t *u2 = (const uint64_t *)&dde2->dde_key; 779219089Spjd 780219089Spjd for (int i = 0; i < DDT_KEY_WORDS; i++) { 781219089Spjd if (u1[i] < u2[i]) 782219089Spjd return (-1); 783219089Spjd if (u1[i] > u2[i]) 784219089Spjd return (1); 785219089Spjd } 786219089Spjd 787219089Spjd return (0); 788219089Spjd} 789219089Spjd 790219089Spjdstatic ddt_t * 791219089Spjdddt_table_alloc(spa_t *spa, enum zio_checksum c) 792219089Spjd{ 793219089Spjd ddt_t *ddt; 794219089Spjd 795219089Spjd ddt = kmem_zalloc(sizeof (*ddt), KM_SLEEP); 796219089Spjd 797219089Spjd mutex_init(&ddt->ddt_lock, NULL, MUTEX_DEFAULT, NULL); 798219089Spjd avl_create(&ddt->ddt_tree, ddt_entry_compare, 799219089Spjd sizeof (ddt_entry_t), offsetof(ddt_entry_t, dde_node)); 800219089Spjd avl_create(&ddt->ddt_repair_tree, ddt_entry_compare, 801219089Spjd sizeof (ddt_entry_t), offsetof(ddt_entry_t, dde_node)); 802219089Spjd ddt->ddt_checksum = c; 803219089Spjd ddt->ddt_spa = spa; 804219089Spjd ddt->ddt_os = spa->spa_meta_objset; 805219089Spjd 806219089Spjd return (ddt); 807219089Spjd} 808219089Spjd 809219089Spjdstatic void 810219089Spjdddt_table_free(ddt_t *ddt) 811219089Spjd{ 812219089Spjd ASSERT(avl_numnodes(&ddt->ddt_tree) == 0); 813219089Spjd ASSERT(avl_numnodes(&ddt->ddt_repair_tree) == 0); 814219089Spjd avl_destroy(&ddt->ddt_tree); 815219089Spjd avl_destroy(&ddt->ddt_repair_tree); 816219089Spjd mutex_destroy(&ddt->ddt_lock); 817219089Spjd kmem_free(ddt, sizeof (*ddt)); 818219089Spjd} 819219089Spjd 820219089Spjdvoid 821219089Spjdddt_create(spa_t *spa) 822219089Spjd{ 823219089Spjd spa->spa_dedup_checksum = ZIO_DEDUPCHECKSUM; 824219089Spjd 825219089Spjd for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) 826219089Spjd spa->spa_ddt[c] = ddt_table_alloc(spa, c); 827219089Spjd} 828219089Spjd 829219089Spjdint 830219089Spjdddt_load(spa_t *spa) 831219089Spjd{ 832219089Spjd int error; 833219089Spjd 834219089Spjd ddt_create(spa); 835219089Spjd 836219089Spjd error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 837219089Spjd DMU_POOL_DDT_STATS, sizeof (uint64_t), 1, 838219089Spjd &spa->spa_ddt_stat_object); 839219089Spjd 840219089Spjd if (error) 841219089Spjd return (error == ENOENT ? 0 : error); 842219089Spjd 843219089Spjd for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) { 844219089Spjd ddt_t *ddt = spa->spa_ddt[c]; 845219089Spjd for (enum ddt_type type = 0; type < DDT_TYPES; type++) { 846219089Spjd for (enum ddt_class class = 0; class < DDT_CLASSES; 847219089Spjd class++) { 848219089Spjd error = ddt_object_load(ddt, type, class); 849219089Spjd if (error != 0 && error != ENOENT) 850219089Spjd return (error); 851219089Spjd } 852219089Spjd } 853219089Spjd 854219089Spjd /* 855219089Spjd * Seed the cached histograms. 856219089Spjd */ 857219089Spjd bcopy(ddt->ddt_histogram, &ddt->ddt_histogram_cache, 858219089Spjd sizeof (ddt->ddt_histogram)); 859219089Spjd } 860219089Spjd 861219089Spjd return (0); 862219089Spjd} 863219089Spjd 864219089Spjdvoid 865219089Spjdddt_unload(spa_t *spa) 866219089Spjd{ 867219089Spjd for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) { 868219089Spjd if (spa->spa_ddt[c]) { 869219089Spjd ddt_table_free(spa->spa_ddt[c]); 870219089Spjd spa->spa_ddt[c] = NULL; 871219089Spjd } 872219089Spjd } 873219089Spjd} 874219089Spjd 875219089Spjdboolean_t 876219089Spjdddt_class_contains(spa_t *spa, enum ddt_class max_class, const blkptr_t *bp) 877219089Spjd{ 878219089Spjd ddt_t *ddt; 879219089Spjd ddt_entry_t dde; 880219089Spjd 881219089Spjd if (!BP_GET_DEDUP(bp)) 882219089Spjd return (B_FALSE); 883219089Spjd 884219089Spjd if (max_class == DDT_CLASS_UNIQUE) 885219089Spjd return (B_TRUE); 886219089Spjd 887219089Spjd ddt = spa->spa_ddt[BP_GET_CHECKSUM(bp)]; 888219089Spjd 889219089Spjd ddt_key_fill(&dde.dde_key, bp); 890219089Spjd 891219089Spjd for (enum ddt_type type = 0; type < DDT_TYPES; type++) 892219089Spjd for (enum ddt_class class = 0; class <= max_class; class++) 893219089Spjd if (ddt_object_lookup(ddt, type, class, &dde) == 0) 894219089Spjd return (B_TRUE); 895219089Spjd 896219089Spjd return (B_FALSE); 897219089Spjd} 898219089Spjd 899219089Spjdddt_entry_t * 900219089Spjdddt_repair_start(ddt_t *ddt, const blkptr_t *bp) 901219089Spjd{ 902219089Spjd ddt_key_t ddk; 903219089Spjd ddt_entry_t *dde; 904219089Spjd 905219089Spjd ddt_key_fill(&ddk, bp); 906219089Spjd 907219089Spjd dde = ddt_alloc(&ddk); 908219089Spjd 909219089Spjd for (enum ddt_type type = 0; type < DDT_TYPES; type++) { 910219089Spjd for (enum ddt_class class = 0; class < DDT_CLASSES; class++) { 911219089Spjd /* 912219089Spjd * We can only do repair if there are multiple copies 913219089Spjd * of the block. For anything in the UNIQUE class, 914219089Spjd * there's definitely only one copy, so don't even try. 915219089Spjd */ 916219089Spjd if (class != DDT_CLASS_UNIQUE && 917219089Spjd ddt_object_lookup(ddt, type, class, dde) == 0) 918219089Spjd return (dde); 919219089Spjd } 920219089Spjd } 921219089Spjd 922219089Spjd bzero(dde->dde_phys, sizeof (dde->dde_phys)); 923219089Spjd 924219089Spjd return (dde); 925219089Spjd} 926219089Spjd 927219089Spjdvoid 928219089Spjdddt_repair_done(ddt_t *ddt, ddt_entry_t *dde) 929219089Spjd{ 930219089Spjd avl_index_t where; 931219089Spjd 932219089Spjd ddt_enter(ddt); 933219089Spjd 934219089Spjd if (dde->dde_repair_data != NULL && spa_writeable(ddt->ddt_spa) && 935219089Spjd avl_find(&ddt->ddt_repair_tree, dde, &where) == NULL) 936219089Spjd avl_insert(&ddt->ddt_repair_tree, dde, where); 937219089Spjd else 938219089Spjd ddt_free(dde); 939219089Spjd 940219089Spjd ddt_exit(ddt); 941219089Spjd} 942219089Spjd 943219089Spjdstatic void 944219089Spjdddt_repair_entry_done(zio_t *zio) 945219089Spjd{ 946219089Spjd ddt_entry_t *rdde = zio->io_private; 947219089Spjd 948219089Spjd ddt_free(rdde); 949219089Spjd} 950219089Spjd 951219089Spjdstatic void 952219089Spjdddt_repair_entry(ddt_t *ddt, ddt_entry_t *dde, ddt_entry_t *rdde, zio_t *rio) 953219089Spjd{ 954219089Spjd ddt_phys_t *ddp = dde->dde_phys; 955219089Spjd ddt_phys_t *rddp = rdde->dde_phys; 956219089Spjd ddt_key_t *ddk = &dde->dde_key; 957219089Spjd ddt_key_t *rddk = &rdde->dde_key; 958219089Spjd zio_t *zio; 959219089Spjd blkptr_t blk; 960219089Spjd 961219089Spjd zio = zio_null(rio, rio->io_spa, NULL, 962219089Spjd ddt_repair_entry_done, rdde, rio->io_flags); 963219089Spjd 964219089Spjd for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++, rddp++) { 965219089Spjd if (ddp->ddp_phys_birth == 0 || 966219089Spjd ddp->ddp_phys_birth != rddp->ddp_phys_birth || 967219089Spjd bcmp(ddp->ddp_dva, rddp->ddp_dva, sizeof (ddp->ddp_dva))) 968219089Spjd continue; 969219089Spjd ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk); 970219089Spjd zio_nowait(zio_rewrite(zio, zio->io_spa, 0, &blk, 971219089Spjd rdde->dde_repair_data, DDK_GET_PSIZE(rddk), NULL, NULL, 972219089Spjd ZIO_PRIORITY_SYNC_WRITE, ZIO_DDT_CHILD_FLAGS(zio), NULL)); 973219089Spjd } 974219089Spjd 975219089Spjd zio_nowait(zio); 976219089Spjd} 977219089Spjd 978219089Spjdstatic void 979219089Spjdddt_repair_table(ddt_t *ddt, zio_t *rio) 980219089Spjd{ 981219089Spjd spa_t *spa = ddt->ddt_spa; 982219089Spjd ddt_entry_t *dde, *rdde_next, *rdde; 983219089Spjd avl_tree_t *t = &ddt->ddt_repair_tree; 984219089Spjd blkptr_t blk; 985219089Spjd 986219089Spjd if (spa_sync_pass(spa) > 1) 987219089Spjd return; 988219089Spjd 989219089Spjd ddt_enter(ddt); 990219089Spjd for (rdde = avl_first(t); rdde != NULL; rdde = rdde_next) { 991219089Spjd rdde_next = AVL_NEXT(t, rdde); 992219089Spjd avl_remove(&ddt->ddt_repair_tree, rdde); 993219089Spjd ddt_exit(ddt); 994219089Spjd ddt_bp_create(ddt->ddt_checksum, &rdde->dde_key, NULL, &blk); 995219089Spjd dde = ddt_repair_start(ddt, &blk); 996219089Spjd ddt_repair_entry(ddt, dde, rdde, rio); 997219089Spjd ddt_repair_done(ddt, dde); 998219089Spjd ddt_enter(ddt); 999219089Spjd } 1000219089Spjd ddt_exit(ddt); 1001219089Spjd} 1002219089Spjd 1003219089Spjdstatic void 1004219089Spjdddt_sync_entry(ddt_t *ddt, ddt_entry_t *dde, dmu_tx_t *tx, uint64_t txg) 1005219089Spjd{ 1006219089Spjd dsl_pool_t *dp = ddt->ddt_spa->spa_dsl_pool; 1007219089Spjd ddt_phys_t *ddp = dde->dde_phys; 1008219089Spjd ddt_key_t *ddk = &dde->dde_key; 1009219089Spjd enum ddt_type otype = dde->dde_type; 1010219089Spjd enum ddt_type ntype = DDT_TYPE_CURRENT; 1011219089Spjd enum ddt_class oclass = dde->dde_class; 1012219089Spjd enum ddt_class nclass; 1013219089Spjd uint64_t total_refcnt = 0; 1014219089Spjd 1015219089Spjd ASSERT(dde->dde_loaded); 1016219089Spjd ASSERT(!dde->dde_loading); 1017219089Spjd 1018219089Spjd for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) { 1019219089Spjd ASSERT(dde->dde_lead_zio[p] == NULL); 1020219089Spjd ASSERT((int64_t)ddp->ddp_refcnt >= 0); 1021219089Spjd if (ddp->ddp_phys_birth == 0) { 1022219089Spjd ASSERT(ddp->ddp_refcnt == 0); 1023219089Spjd continue; 1024219089Spjd } 1025219089Spjd if (p == DDT_PHYS_DITTO) { 1026219089Spjd if (ddt_ditto_copies_needed(ddt, dde, NULL) == 0) 1027219089Spjd ddt_phys_free(ddt, ddk, ddp, txg); 1028219089Spjd continue; 1029219089Spjd } 1030219089Spjd if (ddp->ddp_refcnt == 0) 1031219089Spjd ddt_phys_free(ddt, ddk, ddp, txg); 1032219089Spjd total_refcnt += ddp->ddp_refcnt; 1033219089Spjd } 1034219089Spjd 1035219089Spjd if (dde->dde_phys[DDT_PHYS_DITTO].ddp_phys_birth != 0) 1036219089Spjd nclass = DDT_CLASS_DITTO; 1037219089Spjd else if (total_refcnt > 1) 1038219089Spjd nclass = DDT_CLASS_DUPLICATE; 1039219089Spjd else 1040219089Spjd nclass = DDT_CLASS_UNIQUE; 1041219089Spjd 1042219089Spjd if (otype != DDT_TYPES && 1043219089Spjd (otype != ntype || oclass != nclass || total_refcnt == 0)) { 1044219089Spjd VERIFY(ddt_object_remove(ddt, otype, oclass, dde, tx) == 0); 1045219089Spjd ASSERT(ddt_object_lookup(ddt, otype, oclass, dde) == ENOENT); 1046219089Spjd } 1047219089Spjd 1048219089Spjd if (total_refcnt != 0) { 1049219089Spjd dde->dde_type = ntype; 1050219089Spjd dde->dde_class = nclass; 1051219089Spjd ddt_stat_update(ddt, dde, 0); 1052219089Spjd if (!ddt_object_exists(ddt, ntype, nclass)) 1053219089Spjd ddt_object_create(ddt, ntype, nclass, tx); 1054219089Spjd VERIFY(ddt_object_update(ddt, ntype, nclass, dde, tx) == 0); 1055219089Spjd 1056219089Spjd /* 1057219089Spjd * If the class changes, the order that we scan this bp 1058219089Spjd * changes. If it decreases, we could miss it, so 1059219089Spjd * scan it right now. (This covers both class changing 1060219089Spjd * while we are doing ddt_walk(), and when we are 1061219089Spjd * traversing.) 1062219089Spjd */ 1063219089Spjd if (nclass < oclass) { 1064219089Spjd dsl_scan_ddt_entry(dp->dp_scan, 1065219089Spjd ddt->ddt_checksum, dde, tx); 1066219089Spjd } 1067219089Spjd } 1068219089Spjd} 1069219089Spjd 1070219089Spjdstatic void 1071219089Spjdddt_sync_table(ddt_t *ddt, dmu_tx_t *tx, uint64_t txg) 1072219089Spjd{ 1073219089Spjd spa_t *spa = ddt->ddt_spa; 1074219089Spjd ddt_entry_t *dde; 1075219089Spjd void *cookie = NULL; 1076219089Spjd 1077219089Spjd if (avl_numnodes(&ddt->ddt_tree) == 0) 1078219089Spjd return; 1079219089Spjd 1080219089Spjd ASSERT(spa->spa_uberblock.ub_version >= SPA_VERSION_DEDUP); 1081219089Spjd 1082219089Spjd if (spa->spa_ddt_stat_object == 0) { 1083236884Smm spa->spa_ddt_stat_object = zap_create_link(ddt->ddt_os, 1084236884Smm DMU_OT_DDT_STATS, DMU_POOL_DIRECTORY_OBJECT, 1085236884Smm DMU_POOL_DDT_STATS, tx); 1086219089Spjd } 1087219089Spjd 1088219089Spjd while ((dde = avl_destroy_nodes(&ddt->ddt_tree, &cookie)) != NULL) { 1089219089Spjd ddt_sync_entry(ddt, dde, tx, txg); 1090219089Spjd ddt_free(dde); 1091219089Spjd } 1092219089Spjd 1093219089Spjd for (enum ddt_type type = 0; type < DDT_TYPES; type++) { 1094245264Sdelphij uint64_t add, count = 0; 1095219089Spjd for (enum ddt_class class = 0; class < DDT_CLASSES; class++) { 1096219089Spjd if (ddt_object_exists(ddt, type, class)) { 1097219089Spjd ddt_object_sync(ddt, type, class, tx); 1098245264Sdelphij VERIFY(ddt_object_count(ddt, type, class, 1099245264Sdelphij &add) == 0); 1100245264Sdelphij count += add; 1101219089Spjd } 1102219089Spjd } 1103219089Spjd for (enum ddt_class class = 0; class < DDT_CLASSES; class++) { 1104219089Spjd if (count == 0 && ddt_object_exists(ddt, type, class)) 1105219089Spjd ddt_object_destroy(ddt, type, class, tx); 1106219089Spjd } 1107219089Spjd } 1108219089Spjd 1109219089Spjd bcopy(ddt->ddt_histogram, &ddt->ddt_histogram_cache, 1110219089Spjd sizeof (ddt->ddt_histogram)); 1111219089Spjd} 1112219089Spjd 1113219089Spjdvoid 1114219089Spjdddt_sync(spa_t *spa, uint64_t txg) 1115219089Spjd{ 1116219089Spjd dmu_tx_t *tx; 1117219089Spjd zio_t *rio = zio_root(spa, NULL, NULL, 1118219089Spjd ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE); 1119219089Spjd 1120219089Spjd ASSERT(spa_syncing_txg(spa) == txg); 1121219089Spjd 1122219089Spjd tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg); 1123219089Spjd 1124219089Spjd for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) { 1125219089Spjd ddt_t *ddt = spa->spa_ddt[c]; 1126219089Spjd if (ddt == NULL) 1127219089Spjd continue; 1128219089Spjd ddt_sync_table(ddt, tx, txg); 1129219089Spjd ddt_repair_table(ddt, rio); 1130219089Spjd } 1131219089Spjd 1132219089Spjd (void) zio_wait(rio); 1133219089Spjd 1134219089Spjd dmu_tx_commit(tx); 1135219089Spjd} 1136219089Spjd 1137219089Spjdint 1138219089Spjdddt_walk(spa_t *spa, ddt_bookmark_t *ddb, ddt_entry_t *dde) 1139219089Spjd{ 1140219089Spjd do { 1141219089Spjd do { 1142219089Spjd do { 1143219089Spjd ddt_t *ddt = spa->spa_ddt[ddb->ddb_checksum]; 1144219089Spjd int error = ENOENT; 1145219089Spjd if (ddt_object_exists(ddt, ddb->ddb_type, 1146219089Spjd ddb->ddb_class)) { 1147219089Spjd error = ddt_object_walk(ddt, 1148219089Spjd ddb->ddb_type, ddb->ddb_class, 1149219089Spjd &ddb->ddb_cursor, dde); 1150219089Spjd } 1151219089Spjd dde->dde_type = ddb->ddb_type; 1152219089Spjd dde->dde_class = ddb->ddb_class; 1153219089Spjd if (error == 0) 1154219089Spjd return (0); 1155219089Spjd if (error != ENOENT) 1156219089Spjd return (error); 1157219089Spjd ddb->ddb_cursor = 0; 1158219089Spjd } while (++ddb->ddb_checksum < ZIO_CHECKSUM_FUNCTIONS); 1159219089Spjd ddb->ddb_checksum = 0; 1160219089Spjd } while (++ddb->ddb_type < DDT_TYPES); 1161219089Spjd ddb->ddb_type = 0; 1162219089Spjd } while (++ddb->ddb_class < DDT_CLASSES); 1163219089Spjd 1164249195Smm return (SET_ERROR(ENOENT)); 1165219089Spjd} 1166