bplist.c revision 4577:ed36b0e652bc
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#pragma ident	"%Z%%M%	%I%	%E% SMI"
27
28#include <sys/bplist.h>
29#include <sys/zfs_context.h>
30
31static int
32bplist_hold(bplist_t *bpl)
33{
34	ASSERT(MUTEX_HELD(&bpl->bpl_lock));
35	if (bpl->bpl_dbuf == NULL) {
36		int err = dmu_bonus_hold(bpl->bpl_mos,
37		    bpl->bpl_object, bpl, &bpl->bpl_dbuf);
38		if (err)
39			return (err);
40		bpl->bpl_phys = bpl->bpl_dbuf->db_data;
41	}
42	return (0);
43}
44
45uint64_t
46bplist_create(objset_t *mos, int blocksize, dmu_tx_t *tx)
47{
48	int size;
49
50	size = spa_version(dmu_objset_spa(mos)) < SPA_VERSION_BPLIST_ACCOUNT ?
51	    BPLIST_SIZE_V0 : sizeof (bplist_phys_t);
52
53	return (dmu_object_alloc(mos, DMU_OT_BPLIST, blocksize,
54	    DMU_OT_BPLIST_HDR, size, tx));
55}
56
57void
58bplist_destroy(objset_t *mos, uint64_t object, dmu_tx_t *tx)
59{
60	VERIFY(dmu_object_free(mos, object, tx) == 0);
61}
62
63int
64bplist_open(bplist_t *bpl, objset_t *mos, uint64_t object)
65{
66	dmu_object_info_t doi;
67	int err;
68
69	err = dmu_object_info(mos, object, &doi);
70	if (err)
71		return (err);
72
73	mutex_enter(&bpl->bpl_lock);
74
75	ASSERT(bpl->bpl_dbuf == NULL);
76	ASSERT(bpl->bpl_phys == NULL);
77	ASSERT(bpl->bpl_cached_dbuf == NULL);
78	ASSERT(bpl->bpl_queue == NULL);
79	ASSERT(object != 0);
80	ASSERT3U(doi.doi_type, ==, DMU_OT_BPLIST);
81	ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_BPLIST_HDR);
82
83	bpl->bpl_mos = mos;
84	bpl->bpl_object = object;
85	bpl->bpl_blockshift = highbit(doi.doi_data_block_size - 1);
86	bpl->bpl_bpshift = bpl->bpl_blockshift - SPA_BLKPTRSHIFT;
87	bpl->bpl_havecomp = (doi.doi_bonus_size == sizeof (bplist_phys_t));
88
89	mutex_exit(&bpl->bpl_lock);
90	return (0);
91}
92
93void
94bplist_close(bplist_t *bpl)
95{
96	mutex_enter(&bpl->bpl_lock);
97
98	ASSERT(bpl->bpl_queue == NULL);
99
100	if (bpl->bpl_cached_dbuf) {
101		dmu_buf_rele(bpl->bpl_cached_dbuf, bpl);
102		bpl->bpl_cached_dbuf = NULL;
103	}
104	if (bpl->bpl_dbuf) {
105		dmu_buf_rele(bpl->bpl_dbuf, bpl);
106		bpl->bpl_dbuf = NULL;
107		bpl->bpl_phys = NULL;
108	}
109
110	mutex_exit(&bpl->bpl_lock);
111}
112
113boolean_t
114bplist_empty(bplist_t *bpl)
115{
116	boolean_t rv;
117
118	if (bpl->bpl_object == 0)
119		return (B_TRUE);
120
121	mutex_enter(&bpl->bpl_lock);
122	VERIFY(0 == bplist_hold(bpl)); /* XXX */
123	rv = (bpl->bpl_phys->bpl_entries == 0);
124	mutex_exit(&bpl->bpl_lock);
125
126	return (rv);
127}
128
129static int
130bplist_cache(bplist_t *bpl, uint64_t blkid)
131{
132	int err = 0;
133
134	if (bpl->bpl_cached_dbuf == NULL ||
135	    bpl->bpl_cached_dbuf->db_offset != (blkid << bpl->bpl_blockshift)) {
136		if (bpl->bpl_cached_dbuf != NULL)
137			dmu_buf_rele(bpl->bpl_cached_dbuf, bpl);
138		err = dmu_buf_hold(bpl->bpl_mos,
139		    bpl->bpl_object, blkid << bpl->bpl_blockshift,
140		    bpl, &bpl->bpl_cached_dbuf);
141		ASSERT(err || bpl->bpl_cached_dbuf->db_size ==
142		    1ULL << bpl->bpl_blockshift);
143	}
144	return (err);
145}
146
147int
148bplist_iterate(bplist_t *bpl, uint64_t *itorp, blkptr_t *bp)
149{
150	uint64_t blk, off;
151	blkptr_t *bparray;
152	int err;
153
154	mutex_enter(&bpl->bpl_lock);
155
156	err = bplist_hold(bpl);
157	if (err) {
158		mutex_exit(&bpl->bpl_lock);
159		return (err);
160	}
161
162	if (*itorp >= bpl->bpl_phys->bpl_entries) {
163		mutex_exit(&bpl->bpl_lock);
164		return (ENOENT);
165	}
166
167	blk = *itorp >> bpl->bpl_bpshift;
168	off = P2PHASE(*itorp, 1ULL << bpl->bpl_bpshift);
169
170	err = bplist_cache(bpl, blk);
171	if (err) {
172		mutex_exit(&bpl->bpl_lock);
173		return (err);
174	}
175
176	bparray = bpl->bpl_cached_dbuf->db_data;
177	*bp = bparray[off];
178	(*itorp)++;
179	mutex_exit(&bpl->bpl_lock);
180	return (0);
181}
182
183int
184bplist_enqueue(bplist_t *bpl, blkptr_t *bp, dmu_tx_t *tx)
185{
186	uint64_t blk, off;
187	blkptr_t *bparray;
188	int err;
189
190	ASSERT(!BP_IS_HOLE(bp));
191	mutex_enter(&bpl->bpl_lock);
192	err = bplist_hold(bpl);
193	if (err)
194		return (err);
195
196	blk = bpl->bpl_phys->bpl_entries >> bpl->bpl_bpshift;
197	off = P2PHASE(bpl->bpl_phys->bpl_entries, 1ULL << bpl->bpl_bpshift);
198
199	err = bplist_cache(bpl, blk);
200	if (err) {
201		mutex_exit(&bpl->bpl_lock);
202		return (err);
203	}
204
205	dmu_buf_will_dirty(bpl->bpl_cached_dbuf, tx);
206	bparray = bpl->bpl_cached_dbuf->db_data;
207	bparray[off] = *bp;
208
209	/* We never need the fill count. */
210	bparray[off].blk_fill = 0;
211
212	/* The bplist will compress better if we can leave off the checksum */
213	bzero(&bparray[off].blk_cksum, sizeof (bparray[off].blk_cksum));
214
215	dmu_buf_will_dirty(bpl->bpl_dbuf, tx);
216	bpl->bpl_phys->bpl_entries++;
217	bpl->bpl_phys->bpl_bytes +=
218	    bp_get_dasize(dmu_objset_spa(bpl->bpl_mos), bp);
219	if (bpl->bpl_havecomp) {
220		bpl->bpl_phys->bpl_comp += BP_GET_PSIZE(bp);
221		bpl->bpl_phys->bpl_uncomp += BP_GET_UCSIZE(bp);
222	}
223	mutex_exit(&bpl->bpl_lock);
224
225	return (0);
226}
227
228/*
229 * Deferred entry; will be written later by bplist_sync().
230 */
231void
232bplist_enqueue_deferred(bplist_t *bpl, blkptr_t *bp)
233{
234	bplist_q_t *bpq = kmem_alloc(sizeof (*bpq), KM_SLEEP);
235
236	ASSERT(!BP_IS_HOLE(bp));
237	mutex_enter(&bpl->bpl_lock);
238	bpq->bpq_blk = *bp;
239	bpq->bpq_next = bpl->bpl_queue;
240	bpl->bpl_queue = bpq;
241	mutex_exit(&bpl->bpl_lock);
242}
243
244void
245bplist_sync(bplist_t *bpl, dmu_tx_t *tx)
246{
247	bplist_q_t *bpq;
248
249	mutex_enter(&bpl->bpl_lock);
250	while ((bpq = bpl->bpl_queue) != NULL) {
251		bpl->bpl_queue = bpq->bpq_next;
252		mutex_exit(&bpl->bpl_lock);
253		VERIFY(0 == bplist_enqueue(bpl, &bpq->bpq_blk, tx));
254		kmem_free(bpq, sizeof (*bpq));
255		mutex_enter(&bpl->bpl_lock);
256	}
257	mutex_exit(&bpl->bpl_lock);
258}
259
260void
261bplist_vacate(bplist_t *bpl, dmu_tx_t *tx)
262{
263	mutex_enter(&bpl->bpl_lock);
264	ASSERT3P(bpl->bpl_queue, ==, NULL);
265	VERIFY(0 == bplist_hold(bpl));
266	dmu_buf_will_dirty(bpl->bpl_dbuf, tx);
267	VERIFY(0 == dmu_free_range(bpl->bpl_mos,
268	    bpl->bpl_object, 0, -1ULL, tx));
269	bpl->bpl_phys->bpl_entries = 0;
270	bpl->bpl_phys->bpl_bytes = 0;
271	if (bpl->bpl_havecomp) {
272		bpl->bpl_phys->bpl_comp = 0;
273		bpl->bpl_phys->bpl_uncomp = 0;
274	}
275	mutex_exit(&bpl->bpl_lock);
276}
277
278int
279bplist_space(bplist_t *bpl, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
280{
281	uint64_t itor = 0, comp = 0, uncomp = 0;
282	int err;
283	blkptr_t bp;
284
285	mutex_enter(&bpl->bpl_lock);
286
287	err = bplist_hold(bpl);
288	if (err) {
289		mutex_exit(&bpl->bpl_lock);
290		return (err);
291	}
292
293	*usedp = bpl->bpl_phys->bpl_bytes;
294	if (bpl->bpl_havecomp) {
295		*compp = bpl->bpl_phys->bpl_comp;
296		*uncompp = bpl->bpl_phys->bpl_uncomp;
297	}
298	mutex_exit(&bpl->bpl_lock);
299
300	if (!bpl->bpl_havecomp) {
301		while ((err = bplist_iterate(bpl, &itor, &bp)) == 0) {
302			comp += BP_GET_PSIZE(&bp);
303			uncomp += BP_GET_UCSIZE(&bp);
304		}
305		if (err == ENOENT)
306			err = 0;
307		*compp = comp;
308		*uncompp = uncomp;
309	}
310
311	return (err);
312}
313