1219089Spjd/*
2219089Spjd * CDDL HEADER START
3219089Spjd *
4219089Spjd * The contents of this file are subject to the terms of the
5219089Spjd * Common Development and Distribution License (the "License").
6219089Spjd * You may not use this file except in compliance with the License.
7219089Spjd *
8219089Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9219089Spjd * or http://www.opensolaris.org/os/licensing.
10219089Spjd * See the License for the specific language governing permissions
11219089Spjd * and limitations under the License.
12219089Spjd *
13219089Spjd * When distributing Covered Code, include this CDDL HEADER in each
14219089Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15219089Spjd * If applicable, add the following below this CDDL HEADER, with the
16219089Spjd * fields enclosed by brackets "[]" replaced with your own identifying
17219089Spjd * information: Portions Copyright [yyyy] [name of copyright owner]
18219089Spjd *
19219089Spjd * CDDL HEADER END
20219089Spjd */
21219089Spjd/*
22219089Spjd * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
23219089Spjd */
24219089Spjd
25219089Spjd#ifndef _SYS_DDT_H
26219089Spjd#define	_SYS_DDT_H
27219089Spjd
28219089Spjd#include <sys/sysmacros.h>
29219089Spjd#include <sys/types.h>
30219089Spjd#include <sys/fs/zfs.h>
31219089Spjd#include <sys/zio.h>
32219089Spjd#include <sys/dmu.h>
33219089Spjd
34219089Spjd#ifdef	__cplusplus
35219089Spjdextern "C" {
36219089Spjd#endif
37219089Spjd
38219089Spjd/*
39219089Spjd * On-disk DDT formats, in the desired search order (newest version first).
40219089Spjd */
41219089Spjdenum ddt_type {
42219089Spjd	DDT_TYPE_ZAP = 0,
43219089Spjd	DDT_TYPES
44219089Spjd};
45219089Spjd
46219089Spjd/*
47219089Spjd * DDT classes, in the desired search order (highest replication level first).
48219089Spjd */
49219089Spjdenum ddt_class {
50219089Spjd	DDT_CLASS_DITTO = 0,
51219089Spjd	DDT_CLASS_DUPLICATE,
52219089Spjd	DDT_CLASS_UNIQUE,
53219089Spjd	DDT_CLASSES
54219089Spjd};
55219089Spjd
56219089Spjd#define	DDT_TYPE_CURRENT		0
57219089Spjd
58219089Spjd#define	DDT_COMPRESS_BYTEORDER_MASK	0x80
59219089Spjd#define	DDT_COMPRESS_FUNCTION_MASK	0x7f
60219089Spjd
61219089Spjd/*
62219089Spjd * On-disk ddt entry:  key (name) and physical storage (value).
63219089Spjd */
64219089Spjdtypedef struct ddt_key {
65219089Spjd	zio_cksum_t	ddk_cksum;	/* 256-bit block checksum */
66251631Sdelphij	/*
67251631Sdelphij	 * Encoded with logical & physical size, and compression, as follows:
68251631Sdelphij	 *   +-------+-------+-------+-------+-------+-------+-------+-------+
69251631Sdelphij	 *   |   0   |   0   |   0   | comp  |     PSIZE     |     LSIZE     |
70251631Sdelphij	 *   +-------+-------+-------+-------+-------+-------+-------+-------+
71251631Sdelphij	 */
72251631Sdelphij	uint64_t	ddk_prop;
73219089Spjd} ddt_key_t;
74219089Spjd
75219089Spjd#define	DDK_GET_LSIZE(ddk)	\
76219089Spjd	BF64_GET_SB((ddk)->ddk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1)
77219089Spjd#define	DDK_SET_LSIZE(ddk, x)	\
78219089Spjd	BF64_SET_SB((ddk)->ddk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1, x)
79219089Spjd
80219089Spjd#define	DDK_GET_PSIZE(ddk)	\
81219089Spjd	BF64_GET_SB((ddk)->ddk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1)
82219089Spjd#define	DDK_SET_PSIZE(ddk, x)	\
83219089Spjd	BF64_SET_SB((ddk)->ddk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1, x)
84219089Spjd
85219089Spjd#define	DDK_GET_COMPRESS(ddk)		BF64_GET((ddk)->ddk_prop, 32, 8)
86219089Spjd#define	DDK_SET_COMPRESS(ddk, x)	BF64_SET((ddk)->ddk_prop, 32, 8, x)
87219089Spjd
88219089Spjd#define	DDT_KEY_WORDS	(sizeof (ddt_key_t) / sizeof (uint64_t))
89219089Spjd
90219089Spjdtypedef struct ddt_phys {
91219089Spjd	dva_t		ddp_dva[SPA_DVAS_PER_BP];
92219089Spjd	uint64_t	ddp_refcnt;
93219089Spjd	uint64_t	ddp_phys_birth;
94219089Spjd} ddt_phys_t;
95219089Spjd
96219089Spjdenum ddt_phys_type {
97219089Spjd	DDT_PHYS_DITTO = 0,
98219089Spjd	DDT_PHYS_SINGLE = 1,
99219089Spjd	DDT_PHYS_DOUBLE = 2,
100219089Spjd	DDT_PHYS_TRIPLE = 3,
101219089Spjd	DDT_PHYS_TYPES
102219089Spjd};
103219089Spjd
104219089Spjd/*
105219089Spjd * In-core ddt entry
106219089Spjd */
107219089Spjdstruct ddt_entry {
108219089Spjd	ddt_key_t	dde_key;
109219089Spjd	ddt_phys_t	dde_phys[DDT_PHYS_TYPES];
110219089Spjd	zio_t		*dde_lead_zio[DDT_PHYS_TYPES];
111219089Spjd	void		*dde_repair_data;
112219089Spjd	enum ddt_type	dde_type;
113219089Spjd	enum ddt_class	dde_class;
114219089Spjd	uint8_t		dde_loading;
115219089Spjd	uint8_t		dde_loaded;
116219089Spjd	kcondvar_t	dde_cv;
117219089Spjd	avl_node_t	dde_node;
118219089Spjd};
119219089Spjd
120219089Spjd/*
121219089Spjd * In-core ddt
122219089Spjd */
123219089Spjdstruct ddt {
124219089Spjd	kmutex_t	ddt_lock;
125219089Spjd	avl_tree_t	ddt_tree;
126219089Spjd	avl_tree_t	ddt_repair_tree;
127219089Spjd	enum zio_checksum ddt_checksum;
128219089Spjd	spa_t		*ddt_spa;
129219089Spjd	objset_t	*ddt_os;
130219089Spjd	uint64_t	ddt_stat_object;
131219089Spjd	uint64_t	ddt_object[DDT_TYPES][DDT_CLASSES];
132219089Spjd	ddt_histogram_t	ddt_histogram[DDT_TYPES][DDT_CLASSES];
133219089Spjd	ddt_histogram_t	ddt_histogram_cache[DDT_TYPES][DDT_CLASSES];
134219089Spjd	ddt_object_t	ddt_object_stats[DDT_TYPES][DDT_CLASSES];
135219089Spjd	avl_node_t	ddt_node;
136219089Spjd};
137219089Spjd
138219089Spjd/*
139219089Spjd * In-core and on-disk bookmark for DDT walks
140219089Spjd */
141219089Spjdtypedef struct ddt_bookmark {
142219089Spjd	uint64_t	ddb_class;
143219089Spjd	uint64_t	ddb_type;
144219089Spjd	uint64_t	ddb_checksum;
145219089Spjd	uint64_t	ddb_cursor;
146219089Spjd} ddt_bookmark_t;
147219089Spjd
148219089Spjd/*
149219089Spjd * Ops vector to access a specific DDT object type.
150219089Spjd */
151219089Spjdtypedef struct ddt_ops {
152219089Spjd	char ddt_op_name[32];
153219089Spjd	int (*ddt_op_create)(objset_t *os, uint64_t *object, dmu_tx_t *tx,
154219089Spjd	    boolean_t prehash);
155219089Spjd	int (*ddt_op_destroy)(objset_t *os, uint64_t object, dmu_tx_t *tx);
156219089Spjd	int (*ddt_op_lookup)(objset_t *os, uint64_t object, ddt_entry_t *dde);
157219089Spjd	void (*ddt_op_prefetch)(objset_t *os, uint64_t object,
158219089Spjd	    ddt_entry_t *dde);
159219089Spjd	int (*ddt_op_update)(objset_t *os, uint64_t object, ddt_entry_t *dde,
160219089Spjd	    dmu_tx_t *tx);
161219089Spjd	int (*ddt_op_remove)(objset_t *os, uint64_t object, ddt_entry_t *dde,
162219089Spjd	    dmu_tx_t *tx);
163219089Spjd	int (*ddt_op_walk)(objset_t *os, uint64_t object, ddt_entry_t *dde,
164219089Spjd	    uint64_t *walk);
165245264Sdelphij	int (*ddt_op_count)(objset_t *os, uint64_t object, uint64_t *count);
166219089Spjd} ddt_ops_t;
167219089Spjd
168219089Spjd#define	DDT_NAMELEN	80
169219089Spjd
170219089Spjdextern void ddt_object_name(ddt_t *ddt, enum ddt_type type,
171222950Sgibbs    enum ddt_class cls, char *name);
172219089Spjdextern int ddt_object_walk(ddt_t *ddt, enum ddt_type type,
173222950Sgibbs    enum ddt_class cls, uint64_t *walk, ddt_entry_t *dde);
174245264Sdelphijextern int ddt_object_count(ddt_t *ddt, enum ddt_type type,
175253643Smav    enum ddt_class cls, uint64_t *count);
176219089Spjdextern int ddt_object_info(ddt_t *ddt, enum ddt_type type,
177222950Sgibbs    enum ddt_class cls, dmu_object_info_t *);
178219089Spjdextern boolean_t ddt_object_exists(ddt_t *ddt, enum ddt_type type,
179222950Sgibbs    enum ddt_class cls);
180219089Spjd
181219089Spjdextern void ddt_bp_fill(const ddt_phys_t *ddp, blkptr_t *bp,
182219089Spjd    uint64_t txg);
183219089Spjdextern void ddt_bp_create(enum zio_checksum checksum, const ddt_key_t *ddk,
184219089Spjd    const ddt_phys_t *ddp, blkptr_t *bp);
185219089Spjd
186219089Spjdextern void ddt_key_fill(ddt_key_t *ddk, const blkptr_t *bp);
187219089Spjd
188219089Spjdextern void ddt_phys_fill(ddt_phys_t *ddp, const blkptr_t *bp);
189219089Spjdextern void ddt_phys_clear(ddt_phys_t *ddp);
190219089Spjdextern void ddt_phys_addref(ddt_phys_t *ddp);
191219089Spjdextern void ddt_phys_decref(ddt_phys_t *ddp);
192219089Spjdextern void ddt_phys_free(ddt_t *ddt, ddt_key_t *ddk, ddt_phys_t *ddp,
193219089Spjd    uint64_t txg);
194219089Spjdextern ddt_phys_t *ddt_phys_select(const ddt_entry_t *dde, const blkptr_t *bp);
195219089Spjdextern uint64_t ddt_phys_total_refcnt(const ddt_entry_t *dde);
196219089Spjd
197219089Spjdextern void ddt_stat_add(ddt_stat_t *dst, const ddt_stat_t *src, uint64_t neg);
198219089Spjd
199219089Spjdextern void ddt_histogram_add(ddt_histogram_t *dst, const ddt_histogram_t *src);
200219089Spjdextern void ddt_histogram_stat(ddt_stat_t *dds, const ddt_histogram_t *ddh);
201219089Spjdextern boolean_t ddt_histogram_empty(const ddt_histogram_t *ddh);
202219089Spjdextern void ddt_get_dedup_object_stats(spa_t *spa, ddt_object_t *ddo);
203219089Spjdextern void ddt_get_dedup_histogram(spa_t *spa, ddt_histogram_t *ddh);
204219089Spjdextern void ddt_get_dedup_stats(spa_t *spa, ddt_stat_t *dds_total);
205219089Spjd
206219089Spjdextern uint64_t ddt_get_dedup_dspace(spa_t *spa);
207219089Spjdextern uint64_t ddt_get_pool_dedup_ratio(spa_t *spa);
208219089Spjd
209219089Spjdextern int ddt_ditto_copies_needed(ddt_t *ddt, ddt_entry_t *dde,
210219089Spjd    ddt_phys_t *ddp_willref);
211219089Spjdextern int ddt_ditto_copies_present(ddt_entry_t *dde);
212219089Spjd
213219089Spjdextern size_t ddt_compress(void *src, uchar_t *dst, size_t s_len, size_t d_len);
214219089Spjdextern void ddt_decompress(uchar_t *src, void *dst, size_t s_len, size_t d_len);
215219089Spjd
216219089Spjdextern ddt_t *ddt_select(spa_t *spa, const blkptr_t *bp);
217219089Spjdextern void ddt_enter(ddt_t *ddt);
218219089Spjdextern void ddt_exit(ddt_t *ddt);
219219089Spjdextern ddt_entry_t *ddt_lookup(ddt_t *ddt, const blkptr_t *bp, boolean_t add);
220219089Spjdextern void ddt_prefetch(spa_t *spa, const blkptr_t *bp);
221219089Spjdextern void ddt_remove(ddt_t *ddt, ddt_entry_t *dde);
222219089Spjd
223219089Spjdextern boolean_t ddt_class_contains(spa_t *spa, enum ddt_class max_class,
224219089Spjd    const blkptr_t *bp);
225219089Spjd
226219089Spjdextern ddt_entry_t *ddt_repair_start(ddt_t *ddt, const blkptr_t *bp);
227219089Spjdextern void ddt_repair_done(ddt_t *ddt, ddt_entry_t *dde);
228219089Spjd
229219089Spjdextern int ddt_entry_compare(const void *x1, const void *x2);
230219089Spjd
231219089Spjdextern void ddt_create(spa_t *spa);
232219089Spjdextern int ddt_load(spa_t *spa);
233219089Spjdextern void ddt_unload(spa_t *spa);
234219089Spjdextern void ddt_sync(spa_t *spa, uint64_t txg);
235219089Spjdextern int ddt_walk(spa_t *spa, ddt_bookmark_t *ddb, ddt_entry_t *dde);
236219089Spjdextern int ddt_object_update(ddt_t *ddt, enum ddt_type type,
237222950Sgibbs    enum ddt_class cls, ddt_entry_t *dde, dmu_tx_t *tx);
238219089Spjd
239219089Spjdextern const ddt_ops_t ddt_zap_ops;
240219089Spjd
241219089Spjd#ifdef	__cplusplus
242219089Spjd}
243219089Spjd#endif
244219089Spjd
245219089Spjd#endif	/* _SYS_DDT_H */
246