1219089Spjd/*
2219089Spjd * CDDL HEADER START
3219089Spjd *
4219089Spjd * The contents of this file are subject to the terms of the
5219089Spjd * Common Development and Distribution License (the "License").
6219089Spjd * You may not use this file except in compliance with the License.
7219089Spjd *
8219089Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9219089Spjd * or http://www.opensolaris.org/os/licensing.
10219089Spjd * See the License for the specific language governing permissions
11219089Spjd * and limitations under the License.
12219089Spjd *
13219089Spjd * When distributing Covered Code, include this CDDL HEADER in each
14219089Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15219089Spjd * If applicable, add the following below this CDDL HEADER, with the
16219089Spjd * fields enclosed by brackets "[]" replaced with your own identifying
17219089Spjd * information: Portions Copyright [yyyy] [name of copyright owner]
18219089Spjd *
19219089Spjd * CDDL HEADER END
20219089Spjd */
21219089Spjd/*
22219089Spjd * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23321548Smav * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
24296519Smav * Copyright (c) 2014 Integros [integros.com]
25324010Savg * Copyright (c) 2017 Datto Inc.
26219089Spjd */
27219089Spjd
28219089Spjd#include <sys/bpobj.h>
29219089Spjd#include <sys/zfs_context.h>
30219089Spjd#include <sys/refcount.h>
31228103Smm#include <sys/dsl_pool.h>
32239774Smm#include <sys/zfeature.h>
33239774Smm#include <sys/zap.h>
34219089Spjd
35239774Smm/*
36239774Smm * Return an empty bpobj, preferably the empty dummy one (dp_empty_bpobj).
37239774Smm */
38219089Spjduint64_t
39239774Smmbpobj_alloc_empty(objset_t *os, int blocksize, dmu_tx_t *tx)
40239774Smm{
41239774Smm	spa_t *spa = dmu_objset_spa(os);
42239774Smm	dsl_pool_t *dp = dmu_objset_pool(os);
43239774Smm
44259813Sdelphij	if (spa_feature_is_enabled(spa, SPA_FEATURE_EMPTY_BPOBJ)) {
45259813Sdelphij		if (!spa_feature_is_active(spa, SPA_FEATURE_EMPTY_BPOBJ)) {
46240415Smm			ASSERT0(dp->dp_empty_bpobj);
47239774Smm			dp->dp_empty_bpobj =
48274337Sdelphij			    bpobj_alloc(os, SPA_OLD_MAXBLOCKSIZE, tx);
49239774Smm			VERIFY(zap_add(os,
50239774Smm			    DMU_POOL_DIRECTORY_OBJECT,
51239774Smm			    DMU_POOL_EMPTY_BPOBJ, sizeof (uint64_t), 1,
52239774Smm			    &dp->dp_empty_bpobj, tx) == 0);
53239774Smm		}
54259813Sdelphij		spa_feature_incr(spa, SPA_FEATURE_EMPTY_BPOBJ, tx);
55239774Smm		ASSERT(dp->dp_empty_bpobj != 0);
56239774Smm		return (dp->dp_empty_bpobj);
57239774Smm	} else {
58239774Smm		return (bpobj_alloc(os, blocksize, tx));
59239774Smm	}
60239774Smm}
61239774Smm
62239774Smmvoid
63239774Smmbpobj_decr_empty(objset_t *os, dmu_tx_t *tx)
64239774Smm{
65239774Smm	dsl_pool_t *dp = dmu_objset_pool(os);
66239774Smm
67259813Sdelphij	spa_feature_decr(dmu_objset_spa(os), SPA_FEATURE_EMPTY_BPOBJ, tx);
68259813Sdelphij	if (!spa_feature_is_active(dmu_objset_spa(os),
69259813Sdelphij	    SPA_FEATURE_EMPTY_BPOBJ)) {
70239774Smm		VERIFY3U(0, ==, zap_remove(dp->dp_meta_objset,
71239774Smm		    DMU_POOL_DIRECTORY_OBJECT,
72239774Smm		    DMU_POOL_EMPTY_BPOBJ, tx));
73239774Smm		VERIFY3U(0, ==, dmu_object_free(os, dp->dp_empty_bpobj, tx));
74239774Smm		dp->dp_empty_bpobj = 0;
75239774Smm	}
76239774Smm}
77239774Smm
78239774Smmuint64_t
79219089Spjdbpobj_alloc(objset_t *os, int blocksize, dmu_tx_t *tx)
80219089Spjd{
81219089Spjd	int size;
82219089Spjd
83219089Spjd	if (spa_version(dmu_objset_spa(os)) < SPA_VERSION_BPOBJ_ACCOUNT)
84219089Spjd		size = BPOBJ_SIZE_V0;
85219089Spjd	else if (spa_version(dmu_objset_spa(os)) < SPA_VERSION_DEADLISTS)
86219089Spjd		size = BPOBJ_SIZE_V1;
87219089Spjd	else
88219089Spjd		size = sizeof (bpobj_phys_t);
89219089Spjd
90219089Spjd	return (dmu_object_alloc(os, DMU_OT_BPOBJ, blocksize,
91219089Spjd	    DMU_OT_BPOBJ_HDR, size, tx));
92219089Spjd}
93219089Spjd
94219089Spjdvoid
95219089Spjdbpobj_free(objset_t *os, uint64_t obj, dmu_tx_t *tx)
96219089Spjd{
97219089Spjd	int64_t i;
98219089Spjd	bpobj_t bpo;
99219089Spjd	dmu_object_info_t doi;
100219089Spjd	int epb;
101219089Spjd	dmu_buf_t *dbuf = NULL;
102219089Spjd
103239774Smm	ASSERT(obj != dmu_objset_pool(os)->dp_empty_bpobj);
104219089Spjd	VERIFY3U(0, ==, bpobj_open(&bpo, os, obj));
105219089Spjd
106219089Spjd	mutex_enter(&bpo.bpo_lock);
107219089Spjd
108219089Spjd	if (!bpo.bpo_havesubobj || bpo.bpo_phys->bpo_subobjs == 0)
109219089Spjd		goto out;
110219089Spjd
111219089Spjd	VERIFY3U(0, ==, dmu_object_info(os, bpo.bpo_phys->bpo_subobjs, &doi));
112219089Spjd	epb = doi.doi_data_block_size / sizeof (uint64_t);
113219089Spjd
114219089Spjd	for (i = bpo.bpo_phys->bpo_num_subobjs - 1; i >= 0; i--) {
115219089Spjd		uint64_t *objarray;
116219089Spjd		uint64_t offset, blkoff;
117219089Spjd
118219089Spjd		offset = i * sizeof (uint64_t);
119219089Spjd		blkoff = P2PHASE(i, epb);
120219089Spjd
121219089Spjd		if (dbuf == NULL || dbuf->db_offset > offset) {
122219089Spjd			if (dbuf)
123219089Spjd				dmu_buf_rele(dbuf, FTAG);
124219089Spjd			VERIFY3U(0, ==, dmu_buf_hold(os,
125219089Spjd			    bpo.bpo_phys->bpo_subobjs, offset, FTAG, &dbuf, 0));
126219089Spjd		}
127219089Spjd
128219089Spjd		ASSERT3U(offset, >=, dbuf->db_offset);
129219089Spjd		ASSERT3U(offset, <, dbuf->db_offset + dbuf->db_size);
130219089Spjd
131219089Spjd		objarray = dbuf->db_data;
132219089Spjd		bpobj_free(os, objarray[blkoff], tx);
133219089Spjd	}
134219089Spjd	if (dbuf) {
135219089Spjd		dmu_buf_rele(dbuf, FTAG);
136219089Spjd		dbuf = NULL;
137219089Spjd	}
138219089Spjd	VERIFY3U(0, ==, dmu_object_free(os, bpo.bpo_phys->bpo_subobjs, tx));
139219089Spjd
140219089Spjdout:
141219089Spjd	mutex_exit(&bpo.bpo_lock);
142219089Spjd	bpobj_close(&bpo);
143219089Spjd
144219089Spjd	VERIFY3U(0, ==, dmu_object_free(os, obj, tx));
145219089Spjd}
146219089Spjd
147219089Spjdint
148219089Spjdbpobj_open(bpobj_t *bpo, objset_t *os, uint64_t object)
149219089Spjd{
150219089Spjd	dmu_object_info_t doi;
151219089Spjd	int err;
152219089Spjd
153219089Spjd	err = dmu_object_info(os, object, &doi);
154219089Spjd	if (err)
155219089Spjd		return (err);
156219089Spjd
157219089Spjd	bzero(bpo, sizeof (*bpo));
158219089Spjd	mutex_init(&bpo->bpo_lock, NULL, MUTEX_DEFAULT, NULL);
159219089Spjd
160219089Spjd	ASSERT(bpo->bpo_dbuf == NULL);
161219089Spjd	ASSERT(bpo->bpo_phys == NULL);
162219089Spjd	ASSERT(object != 0);
163219089Spjd	ASSERT3U(doi.doi_type, ==, DMU_OT_BPOBJ);
164219089Spjd	ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_BPOBJ_HDR);
165219089Spjd
166219089Spjd	err = dmu_bonus_hold(os, object, bpo, &bpo->bpo_dbuf);
167219089Spjd	if (err)
168219089Spjd		return (err);
169219089Spjd
170219089Spjd	bpo->bpo_os = os;
171219089Spjd	bpo->bpo_object = object;
172219089Spjd	bpo->bpo_epb = doi.doi_data_block_size >> SPA_BLKPTRSHIFT;
173219089Spjd	bpo->bpo_havecomp = (doi.doi_bonus_size > BPOBJ_SIZE_V0);
174219089Spjd	bpo->bpo_havesubobj = (doi.doi_bonus_size > BPOBJ_SIZE_V1);
175219089Spjd	bpo->bpo_phys = bpo->bpo_dbuf->db_data;
176219089Spjd	return (0);
177219089Spjd}
178219089Spjd
179332525Smavboolean_t
180332525Smavbpobj_is_open(const bpobj_t *bpo)
181332525Smav{
182332525Smav	return (bpo->bpo_object != 0);
183332525Smav}
184332525Smav
185219089Spjdvoid
186219089Spjdbpobj_close(bpobj_t *bpo)
187219089Spjd{
188219089Spjd	/* Lame workaround for closing a bpobj that was never opened. */
189219089Spjd	if (bpo->bpo_object == 0)
190219089Spjd		return;
191219089Spjd
192219089Spjd	dmu_buf_rele(bpo->bpo_dbuf, bpo);
193219089Spjd	if (bpo->bpo_cached_dbuf != NULL)
194219089Spjd		dmu_buf_rele(bpo->bpo_cached_dbuf, bpo);
195219089Spjd	bpo->bpo_dbuf = NULL;
196219089Spjd	bpo->bpo_phys = NULL;
197219089Spjd	bpo->bpo_cached_dbuf = NULL;
198219089Spjd	bpo->bpo_object = 0;
199219089Spjd
200219089Spjd	mutex_destroy(&bpo->bpo_lock);
201219089Spjd}
202219089Spjd
203332525Smavboolean_t
204332525Smavbpobj_is_empty(bpobj_t *bpo)
205268075Sdelphij{
206332525Smav	return (bpo->bpo_phys->bpo_num_blkptrs == 0 &&
207332525Smav	    (!bpo->bpo_havesubobj || bpo->bpo_phys->bpo_num_subobjs == 0));
208268075Sdelphij}
209268075Sdelphij
210219089Spjdstatic int
211219089Spjdbpobj_iterate_impl(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx,
212219089Spjd    boolean_t free)
213219089Spjd{
214219089Spjd	dmu_object_info_t doi;
215219089Spjd	int epb;
216219089Spjd	int64_t i;
217219089Spjd	int err = 0;
218219089Spjd	dmu_buf_t *dbuf = NULL;
219219089Spjd
220332525Smav	ASSERT(bpobj_is_open(bpo));
221219089Spjd	mutex_enter(&bpo->bpo_lock);
222219089Spjd
223219089Spjd	if (free)
224219089Spjd		dmu_buf_will_dirty(bpo->bpo_dbuf, tx);
225219089Spjd
226219089Spjd	for (i = bpo->bpo_phys->bpo_num_blkptrs - 1; i >= 0; i--) {
227219089Spjd		blkptr_t *bparray;
228219089Spjd		blkptr_t *bp;
229219089Spjd		uint64_t offset, blkoff;
230219089Spjd
231219089Spjd		offset = i * sizeof (blkptr_t);
232219089Spjd		blkoff = P2PHASE(i, bpo->bpo_epb);
233219089Spjd
234219089Spjd		if (dbuf == NULL || dbuf->db_offset > offset) {
235219089Spjd			if (dbuf)
236219089Spjd				dmu_buf_rele(dbuf, FTAG);
237219089Spjd			err = dmu_buf_hold(bpo->bpo_os, bpo->bpo_object, offset,
238219089Spjd			    FTAG, &dbuf, 0);
239219089Spjd			if (err)
240219089Spjd				break;
241219089Spjd		}
242219089Spjd
243219089Spjd		ASSERT3U(offset, >=, dbuf->db_offset);
244219089Spjd		ASSERT3U(offset, <, dbuf->db_offset + dbuf->db_size);
245219089Spjd
246219089Spjd		bparray = dbuf->db_data;
247219089Spjd		bp = &bparray[blkoff];
248219089Spjd		err = func(arg, bp, tx);
249219089Spjd		if (err)
250219089Spjd			break;
251219089Spjd		if (free) {
252219089Spjd			bpo->bpo_phys->bpo_bytes -=
253219089Spjd			    bp_get_dsize_sync(dmu_objset_spa(bpo->bpo_os), bp);
254219089Spjd			ASSERT3S(bpo->bpo_phys->bpo_bytes, >=, 0);
255219089Spjd			if (bpo->bpo_havecomp) {
256219089Spjd				bpo->bpo_phys->bpo_comp -= BP_GET_PSIZE(bp);
257219089Spjd				bpo->bpo_phys->bpo_uncomp -= BP_GET_UCSIZE(bp);
258219089Spjd			}
259219089Spjd			bpo->bpo_phys->bpo_num_blkptrs--;
260219089Spjd			ASSERT3S(bpo->bpo_phys->bpo_num_blkptrs, >=, 0);
261219089Spjd		}
262219089Spjd	}
263219089Spjd	if (dbuf) {
264219089Spjd		dmu_buf_rele(dbuf, FTAG);
265219089Spjd		dbuf = NULL;
266219089Spjd	}
267219089Spjd	if (free) {
268219089Spjd		VERIFY3U(0, ==, dmu_free_range(bpo->bpo_os, bpo->bpo_object,
269286603Smav		    (i + 1) * sizeof (blkptr_t), -1ULL, tx));
270219089Spjd	}
271219089Spjd	if (err || !bpo->bpo_havesubobj || bpo->bpo_phys->bpo_subobjs == 0)
272219089Spjd		goto out;
273219089Spjd
274219089Spjd	ASSERT(bpo->bpo_havecomp);
275219089Spjd	err = dmu_object_info(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, &doi);
276219089Spjd	if (err) {
277219089Spjd		mutex_exit(&bpo->bpo_lock);
278219089Spjd		return (err);
279219089Spjd	}
280259813Sdelphij	ASSERT3U(doi.doi_type, ==, DMU_OT_BPOBJ_SUBOBJ);
281219089Spjd	epb = doi.doi_data_block_size / sizeof (uint64_t);
282219089Spjd
283219089Spjd	for (i = bpo->bpo_phys->bpo_num_subobjs - 1; i >= 0; i--) {
284219089Spjd		uint64_t *objarray;
285219089Spjd		uint64_t offset, blkoff;
286219089Spjd		bpobj_t sublist;
287219089Spjd		uint64_t used_before, comp_before, uncomp_before;
288219089Spjd		uint64_t used_after, comp_after, uncomp_after;
289219089Spjd
290219089Spjd		offset = i * sizeof (uint64_t);
291219089Spjd		blkoff = P2PHASE(i, epb);
292219089Spjd
293219089Spjd		if (dbuf == NULL || dbuf->db_offset > offset) {
294219089Spjd			if (dbuf)
295219089Spjd				dmu_buf_rele(dbuf, FTAG);
296219089Spjd			err = dmu_buf_hold(bpo->bpo_os,
297219089Spjd			    bpo->bpo_phys->bpo_subobjs, offset, FTAG, &dbuf, 0);
298219089Spjd			if (err)
299219089Spjd				break;
300219089Spjd		}
301219089Spjd
302219089Spjd		ASSERT3U(offset, >=, dbuf->db_offset);
303219089Spjd		ASSERT3U(offset, <, dbuf->db_offset + dbuf->db_size);
304219089Spjd
305219089Spjd		objarray = dbuf->db_data;
306219089Spjd		err = bpobj_open(&sublist, bpo->bpo_os, objarray[blkoff]);
307219089Spjd		if (err)
308219089Spjd			break;
309219089Spjd		if (free) {
310219089Spjd			err = bpobj_space(&sublist,
311219089Spjd			    &used_before, &comp_before, &uncomp_before);
312271781Swill			if (err != 0) {
313271781Swill				bpobj_close(&sublist);
314219089Spjd				break;
315271781Swill			}
316219089Spjd		}
317219089Spjd		err = bpobj_iterate_impl(&sublist, func, arg, tx, free);
318219089Spjd		if (free) {
319219089Spjd			VERIFY3U(0, ==, bpobj_space(&sublist,
320219089Spjd			    &used_after, &comp_after, &uncomp_after));
321219089Spjd			bpo->bpo_phys->bpo_bytes -= used_before - used_after;
322219089Spjd			ASSERT3S(bpo->bpo_phys->bpo_bytes, >=, 0);
323219089Spjd			bpo->bpo_phys->bpo_comp -= comp_before - comp_after;
324219089Spjd			bpo->bpo_phys->bpo_uncomp -=
325219089Spjd			    uncomp_before - uncomp_after;
326219089Spjd		}
327219089Spjd
328219089Spjd		bpobj_close(&sublist);
329219089Spjd		if (err)
330219089Spjd			break;
331219089Spjd		if (free) {
332219089Spjd			err = dmu_object_free(bpo->bpo_os,
333219089Spjd			    objarray[blkoff], tx);
334219089Spjd			if (err)
335219089Spjd				break;
336219089Spjd			bpo->bpo_phys->bpo_num_subobjs--;
337219089Spjd			ASSERT3S(bpo->bpo_phys->bpo_num_subobjs, >=, 0);
338219089Spjd		}
339219089Spjd	}
340219089Spjd	if (dbuf) {
341219089Spjd		dmu_buf_rele(dbuf, FTAG);
342219089Spjd		dbuf = NULL;
343219089Spjd	}
344219089Spjd	if (free) {
345219089Spjd		VERIFY3U(0, ==, dmu_free_range(bpo->bpo_os,
346219089Spjd		    bpo->bpo_phys->bpo_subobjs,
347219089Spjd		    (i + 1) * sizeof (uint64_t), -1ULL, tx));
348219089Spjd	}
349219089Spjd
350219089Spjdout:
351219089Spjd	/* If there are no entries, there should be no bytes. */
352332525Smav	if (bpobj_is_empty(bpo)) {
353268075Sdelphij		ASSERT0(bpo->bpo_phys->bpo_bytes);
354268075Sdelphij		ASSERT0(bpo->bpo_phys->bpo_comp);
355268075Sdelphij		ASSERT0(bpo->bpo_phys->bpo_uncomp);
356268075Sdelphij	}
357219089Spjd
358219089Spjd	mutex_exit(&bpo->bpo_lock);
359219089Spjd	return (err);
360219089Spjd}
361219089Spjd
362219089Spjd/*
363219089Spjd * Iterate and remove the entries.  If func returns nonzero, iteration
364219089Spjd * will stop and that entry will not be removed.
365219089Spjd */
366219089Spjdint
367219089Spjdbpobj_iterate(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx)
368219089Spjd{
369219089Spjd	return (bpobj_iterate_impl(bpo, func, arg, tx, B_TRUE));
370219089Spjd}
371219089Spjd
372219089Spjd/*
373219089Spjd * Iterate the entries.  If func returns nonzero, iteration will stop.
374219089Spjd */
375219089Spjdint
376219089Spjdbpobj_iterate_nofree(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx)
377219089Spjd{
378219089Spjd	return (bpobj_iterate_impl(bpo, func, arg, tx, B_FALSE));
379219089Spjd}
380219089Spjd
381219089Spjdvoid
382219089Spjdbpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx)
383219089Spjd{
384219089Spjd	bpobj_t subbpo;
385219089Spjd	uint64_t used, comp, uncomp, subsubobjs;
386219089Spjd
387332525Smav	ASSERT(bpobj_is_open(bpo));
388332525Smav	ASSERT(subobj != 0);
389219089Spjd	ASSERT(bpo->bpo_havesubobj);
390219089Spjd	ASSERT(bpo->bpo_havecomp);
391239774Smm	ASSERT(bpo->bpo_object != dmu_objset_pool(bpo->bpo_os)->dp_empty_bpobj);
392219089Spjd
393239774Smm	if (subobj == dmu_objset_pool(bpo->bpo_os)->dp_empty_bpobj) {
394239774Smm		bpobj_decr_empty(bpo->bpo_os, tx);
395239774Smm		return;
396239774Smm	}
397239774Smm
398219089Spjd	VERIFY3U(0, ==, bpobj_open(&subbpo, bpo->bpo_os, subobj));
399219089Spjd	VERIFY3U(0, ==, bpobj_space(&subbpo, &used, &comp, &uncomp));
400219089Spjd
401332525Smav	if (bpobj_is_empty(&subbpo)) {
402219089Spjd		/* No point in having an empty subobj. */
403219089Spjd		bpobj_close(&subbpo);
404219089Spjd		bpobj_free(bpo->bpo_os, subobj, tx);
405219089Spjd		return;
406219089Spjd	}
407219089Spjd
408321548Smav	mutex_enter(&bpo->bpo_lock);
409219089Spjd	dmu_buf_will_dirty(bpo->bpo_dbuf, tx);
410219089Spjd	if (bpo->bpo_phys->bpo_subobjs == 0) {
411219089Spjd		bpo->bpo_phys->bpo_subobjs = dmu_object_alloc(bpo->bpo_os,
412274337Sdelphij		    DMU_OT_BPOBJ_SUBOBJ, SPA_OLD_MAXBLOCKSIZE,
413274337Sdelphij		    DMU_OT_NONE, 0, tx);
414219089Spjd	}
415219089Spjd
416248571Smm	dmu_object_info_t doi;
417248571Smm	ASSERT0(dmu_object_info(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, &doi));
418248571Smm	ASSERT3U(doi.doi_type, ==, DMU_OT_BPOBJ_SUBOBJ);
419248571Smm
420219089Spjd	dmu_write(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs,
421219089Spjd	    bpo->bpo_phys->bpo_num_subobjs * sizeof (subobj),
422219089Spjd	    sizeof (subobj), &subobj, tx);
423219089Spjd	bpo->bpo_phys->bpo_num_subobjs++;
424219089Spjd
425219089Spjd	/*
426219089Spjd	 * If subobj has only one block of subobjs, then move subobj's
427219089Spjd	 * subobjs to bpo's subobj list directly.  This reduces
428219089Spjd	 * recursion in bpobj_iterate due to nested subobjs.
429219089Spjd	 */
430219089Spjd	subsubobjs = subbpo.bpo_phys->bpo_subobjs;
431219089Spjd	if (subsubobjs != 0) {
432219089Spjd		dmu_object_info_t doi;
433219089Spjd
434219089Spjd		VERIFY3U(0, ==, dmu_object_info(bpo->bpo_os, subsubobjs, &doi));
435219089Spjd		if (doi.doi_max_offset == doi.doi_data_block_size) {
436219089Spjd			dmu_buf_t *subdb;
437219089Spjd			uint64_t numsubsub = subbpo.bpo_phys->bpo_num_subobjs;
438219089Spjd
439219089Spjd			VERIFY3U(0, ==, dmu_buf_hold(bpo->bpo_os, subsubobjs,
440219089Spjd			    0, FTAG, &subdb, 0));
441247852Smm			/*
442247852Smm			 * Make sure that we are not asking dmu_write()
443247852Smm			 * to write more data than we have in our buffer.
444247852Smm			 */
445247852Smm			VERIFY3U(subdb->db_size, >=,
446247852Smm			    numsubsub * sizeof (subobj));
447219089Spjd			dmu_write(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs,
448219089Spjd			    bpo->bpo_phys->bpo_num_subobjs * sizeof (subobj),
449219089Spjd			    numsubsub * sizeof (subobj), subdb->db_data, tx);
450219089Spjd			dmu_buf_rele(subdb, FTAG);
451219089Spjd			bpo->bpo_phys->bpo_num_subobjs += numsubsub;
452219089Spjd
453219089Spjd			dmu_buf_will_dirty(subbpo.bpo_dbuf, tx);
454219089Spjd			subbpo.bpo_phys->bpo_subobjs = 0;
455219089Spjd			VERIFY3U(0, ==, dmu_object_free(bpo->bpo_os,
456219089Spjd			    subsubobjs, tx));
457219089Spjd		}
458219089Spjd	}
459219089Spjd	bpo->bpo_phys->bpo_bytes += used;
460219089Spjd	bpo->bpo_phys->bpo_comp += comp;
461219089Spjd	bpo->bpo_phys->bpo_uncomp += uncomp;
462219089Spjd	mutex_exit(&bpo->bpo_lock);
463219089Spjd
464219089Spjd	bpobj_close(&subbpo);
465219089Spjd}
466219089Spjd
467219089Spjdvoid
468219089Spjdbpobj_enqueue(bpobj_t *bpo, const blkptr_t *bp, dmu_tx_t *tx)
469219089Spjd{
470219089Spjd	blkptr_t stored_bp = *bp;
471219089Spjd	uint64_t offset;
472219089Spjd	int blkoff;
473219089Spjd	blkptr_t *bparray;
474219089Spjd
475332525Smav	ASSERT(bpobj_is_open(bpo));
476219089Spjd	ASSERT(!BP_IS_HOLE(bp));
477239774Smm	ASSERT(bpo->bpo_object != dmu_objset_pool(bpo->bpo_os)->dp_empty_bpobj);
478219089Spjd
479268075Sdelphij	if (BP_IS_EMBEDDED(bp)) {
480268075Sdelphij		/*
481268075Sdelphij		 * The bpobj will compress better without the payload.
482268075Sdelphij		 *
483268075Sdelphij		 * Note that we store EMBEDDED bp's because they have an
484268075Sdelphij		 * uncompressed size, which must be accounted for.  An
485268075Sdelphij		 * alternative would be to add their size to bpo_uncomp
486268075Sdelphij		 * without storing the bp, but that would create additional
487268075Sdelphij		 * complications: bpo_uncomp would be inconsistent with the
488268075Sdelphij		 * set of BP's stored, and bpobj_iterate() wouldn't visit
489268075Sdelphij		 * all the space accounted for in the bpobj.
490268075Sdelphij		 */
491268075Sdelphij		bzero(&stored_bp, sizeof (stored_bp));
492268075Sdelphij		stored_bp.blk_prop = bp->blk_prop;
493268075Sdelphij		stored_bp.blk_birth = bp->blk_birth;
494268075Sdelphij	} else if (!BP_GET_DEDUP(bp)) {
495268075Sdelphij		/* The bpobj will compress better without the checksum */
496268075Sdelphij		bzero(&stored_bp.blk_cksum, sizeof (stored_bp.blk_cksum));
497268075Sdelphij	}
498268075Sdelphij
499219089Spjd	/* We never need the fill count. */
500219089Spjd	stored_bp.blk_fill = 0;
501219089Spjd
502219089Spjd	mutex_enter(&bpo->bpo_lock);
503219089Spjd
504219089Spjd	offset = bpo->bpo_phys->bpo_num_blkptrs * sizeof (stored_bp);
505219089Spjd	blkoff = P2PHASE(bpo->bpo_phys->bpo_num_blkptrs, bpo->bpo_epb);
506219089Spjd
507219089Spjd	if (bpo->bpo_cached_dbuf == NULL ||
508219089Spjd	    offset < bpo->bpo_cached_dbuf->db_offset ||
509219089Spjd	    offset >= bpo->bpo_cached_dbuf->db_offset +
510219089Spjd	    bpo->bpo_cached_dbuf->db_size) {
511219089Spjd		if (bpo->bpo_cached_dbuf)
512219089Spjd			dmu_buf_rele(bpo->bpo_cached_dbuf, bpo);
513219089Spjd		VERIFY3U(0, ==, dmu_buf_hold(bpo->bpo_os, bpo->bpo_object,
514219089Spjd		    offset, bpo, &bpo->bpo_cached_dbuf, 0));
515219089Spjd	}
516219089Spjd
517219089Spjd	dmu_buf_will_dirty(bpo->bpo_cached_dbuf, tx);
518219089Spjd	bparray = bpo->bpo_cached_dbuf->db_data;
519219089Spjd	bparray[blkoff] = stored_bp;
520219089Spjd
521219089Spjd	dmu_buf_will_dirty(bpo->bpo_dbuf, tx);
522219089Spjd	bpo->bpo_phys->bpo_num_blkptrs++;
523219089Spjd	bpo->bpo_phys->bpo_bytes +=
524219089Spjd	    bp_get_dsize_sync(dmu_objset_spa(bpo->bpo_os), bp);
525219089Spjd	if (bpo->bpo_havecomp) {
526219089Spjd		bpo->bpo_phys->bpo_comp += BP_GET_PSIZE(bp);
527219089Spjd		bpo->bpo_phys->bpo_uncomp += BP_GET_UCSIZE(bp);
528219089Spjd	}
529219089Spjd	mutex_exit(&bpo->bpo_lock);
530219089Spjd}
531219089Spjd
532219089Spjdstruct space_range_arg {
533219089Spjd	spa_t *spa;
534219089Spjd	uint64_t mintxg;
535219089Spjd	uint64_t maxtxg;
536219089Spjd	uint64_t used;
537219089Spjd	uint64_t comp;
538219089Spjd	uint64_t uncomp;
539219089Spjd};
540219089Spjd
541219089Spjd/* ARGSUSED */
542219089Spjdstatic int
543219089Spjdspace_range_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
544219089Spjd{
545219089Spjd	struct space_range_arg *sra = arg;
546219089Spjd
547219089Spjd	if (bp->blk_birth > sra->mintxg && bp->blk_birth <= sra->maxtxg) {
548228103Smm		if (dsl_pool_sync_context(spa_get_dsl(sra->spa)))
549228103Smm			sra->used += bp_get_dsize_sync(sra->spa, bp);
550228103Smm		else
551228103Smm			sra->used += bp_get_dsize(sra->spa, bp);
552219089Spjd		sra->comp += BP_GET_PSIZE(bp);
553219089Spjd		sra->uncomp += BP_GET_UCSIZE(bp);
554219089Spjd	}
555219089Spjd	return (0);
556219089Spjd}
557219089Spjd
558219089Spjdint
559219089Spjdbpobj_space(bpobj_t *bpo, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
560219089Spjd{
561332525Smav	ASSERT(bpobj_is_open(bpo));
562219089Spjd	mutex_enter(&bpo->bpo_lock);
563219089Spjd
564219089Spjd	*usedp = bpo->bpo_phys->bpo_bytes;
565219089Spjd	if (bpo->bpo_havecomp) {
566219089Spjd		*compp = bpo->bpo_phys->bpo_comp;
567219089Spjd		*uncompp = bpo->bpo_phys->bpo_uncomp;
568219089Spjd		mutex_exit(&bpo->bpo_lock);
569219089Spjd		return (0);
570219089Spjd	} else {
571219089Spjd		mutex_exit(&bpo->bpo_lock);
572219089Spjd		return (bpobj_space_range(bpo, 0, UINT64_MAX,
573219089Spjd		    usedp, compp, uncompp));
574219089Spjd	}
575219089Spjd}
576219089Spjd
577219089Spjd/*
578219089Spjd * Return the amount of space in the bpobj which is:
579219089Spjd * mintxg < blk_birth <= maxtxg
580219089Spjd */
581219089Spjdint
582219089Spjdbpobj_space_range(bpobj_t *bpo, uint64_t mintxg, uint64_t maxtxg,
583219089Spjd    uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
584219089Spjd{
585219089Spjd	struct space_range_arg sra = { 0 };
586219089Spjd	int err;
587219089Spjd
588332525Smav	ASSERT(bpobj_is_open(bpo));
589332525Smav
590219089Spjd	/*
591219089Spjd	 * As an optimization, if they want the whole txg range, just
592219089Spjd	 * get bpo_bytes rather than iterating over the bps.
593219089Spjd	 */
594219089Spjd	if (mintxg < TXG_INITIAL && maxtxg == UINT64_MAX && bpo->bpo_havecomp)
595219089Spjd		return (bpobj_space(bpo, usedp, compp, uncompp));
596219089Spjd
597219089Spjd	sra.spa = dmu_objset_spa(bpo->bpo_os);
598219089Spjd	sra.mintxg = mintxg;
599219089Spjd	sra.maxtxg = maxtxg;
600219089Spjd
601219089Spjd	err = bpobj_iterate_nofree(bpo, space_range_cb, &sra, NULL);
602219089Spjd	*usedp = sra.used;
603219089Spjd	*compp = sra.comp;
604219089Spjd	*uncompp = sra.uncomp;
605219089Spjd	return (err);
606219089Spjd}
607