bplist.c revision 1544:938876158511
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#pragma ident	"%Z%%M%	%I%	%E% SMI"
27
28#include <sys/bplist.h>
29#include <sys/zfs_context.h>
30
31static int
32bplist_hold(bplist_t *bpl)
33{
34	ASSERT(MUTEX_HELD(&bpl->bpl_lock));
35	if (bpl->bpl_dbuf == NULL) {
36		int err = dmu_bonus_hold(bpl->bpl_mos,
37		    bpl->bpl_object, bpl, &bpl->bpl_dbuf);
38		if (err)
39			return (err);
40		bpl->bpl_phys = bpl->bpl_dbuf->db_data;
41	}
42	return (0);
43}
44
45uint64_t
46bplist_create(objset_t *mos, int blocksize, dmu_tx_t *tx)
47{
48	uint64_t obj;
49
50	obj = dmu_object_alloc(mos, DMU_OT_BPLIST, blocksize,
51	    DMU_OT_BPLIST_HDR, sizeof (bplist_phys_t), tx);
52
53	return (obj);
54}
55
56void
57bplist_destroy(objset_t *mos, uint64_t object, dmu_tx_t *tx)
58{
59	VERIFY(dmu_object_free(mos, object, tx) == 0);
60}
61
62int
63bplist_open(bplist_t *bpl, objset_t *mos, uint64_t object)
64{
65	dmu_object_info_t doi;
66	int err;
67
68	err = dmu_object_info(mos, object, &doi);
69	if (err)
70		return (err);
71
72	mutex_enter(&bpl->bpl_lock);
73
74	ASSERT(bpl->bpl_dbuf == NULL);
75	ASSERT(bpl->bpl_phys == NULL);
76	ASSERT(bpl->bpl_cached_dbuf == NULL);
77	ASSERT(bpl->bpl_queue == NULL);
78	ASSERT(object != 0);
79
80	bpl->bpl_mos = mos;
81	bpl->bpl_object = object;
82	bpl->bpl_blockshift = highbit(doi.doi_data_block_size - 1);
83	bpl->bpl_bpshift = bpl->bpl_blockshift - SPA_BLKPTRSHIFT;
84
85	mutex_exit(&bpl->bpl_lock);
86	return (0);
87}
88
89void
90bplist_close(bplist_t *bpl)
91{
92	mutex_enter(&bpl->bpl_lock);
93
94	ASSERT(bpl->bpl_queue == NULL);
95
96	if (bpl->bpl_cached_dbuf) {
97		dmu_buf_rele(bpl->bpl_cached_dbuf, bpl);
98		bpl->bpl_cached_dbuf = NULL;
99	}
100	if (bpl->bpl_dbuf) {
101		dmu_buf_rele(bpl->bpl_dbuf, bpl);
102		bpl->bpl_dbuf = NULL;
103		bpl->bpl_phys = NULL;
104	}
105
106	mutex_exit(&bpl->bpl_lock);
107}
108
109boolean_t
110bplist_empty(bplist_t *bpl)
111{
112	boolean_t rv;
113
114	if (bpl->bpl_object == 0)
115		return (B_TRUE);
116
117	mutex_enter(&bpl->bpl_lock);
118	VERIFY(0 == bplist_hold(bpl)); /* XXX */
119	rv = (bpl->bpl_phys->bpl_entries == 0);
120	mutex_exit(&bpl->bpl_lock);
121
122	return (rv);
123}
124
125static int
126bplist_cache(bplist_t *bpl, uint64_t blkid)
127{
128	int err = 0;
129
130	if (bpl->bpl_cached_dbuf == NULL ||
131	    bpl->bpl_cached_dbuf->db_offset != (blkid << bpl->bpl_blockshift)) {
132		if (bpl->bpl_cached_dbuf != NULL)
133			dmu_buf_rele(bpl->bpl_cached_dbuf, bpl);
134		err = dmu_buf_hold(bpl->bpl_mos,
135		    bpl->bpl_object, blkid << bpl->bpl_blockshift,
136		    bpl, &bpl->bpl_cached_dbuf);
137		ASSERT(err || bpl->bpl_cached_dbuf->db_size ==
138		    1ULL << bpl->bpl_blockshift);
139	}
140	return (err);
141}
142
143int
144bplist_iterate(bplist_t *bpl, uint64_t *itorp, blkptr_t *bp)
145{
146	uint64_t blk, off;
147	blkptr_t *bparray;
148	int err;
149
150	mutex_enter(&bpl->bpl_lock);
151
152	err = bplist_hold(bpl);
153	if (err) {
154		mutex_exit(&bpl->bpl_lock);
155		return (err);
156	}
157
158	if (*itorp >= bpl->bpl_phys->bpl_entries) {
159		mutex_exit(&bpl->bpl_lock);
160		return (ENOENT);
161	}
162
163	blk = *itorp >> bpl->bpl_bpshift;
164	off = P2PHASE(*itorp, 1ULL << bpl->bpl_bpshift);
165
166	err = bplist_cache(bpl, blk);
167	if (err) {
168		mutex_exit(&bpl->bpl_lock);
169		return (err);
170	}
171
172	bparray = bpl->bpl_cached_dbuf->db_data;
173	*bp = bparray[off];
174	(*itorp)++;
175	mutex_exit(&bpl->bpl_lock);
176	return (0);
177}
178
179int
180bplist_enqueue(bplist_t *bpl, blkptr_t *bp, dmu_tx_t *tx)
181{
182	uint64_t blk, off;
183	blkptr_t *bparray;
184	int err;
185
186	ASSERT(!BP_IS_HOLE(bp));
187	mutex_enter(&bpl->bpl_lock);
188	err = bplist_hold(bpl);
189	if (err)
190		return (err);
191
192	blk = bpl->bpl_phys->bpl_entries >> bpl->bpl_bpshift;
193	off = P2PHASE(bpl->bpl_phys->bpl_entries, 1ULL << bpl->bpl_bpshift);
194
195	err = bplist_cache(bpl, blk);
196	if (err) {
197		mutex_exit(&bpl->bpl_lock);
198		return (err);
199	}
200
201	dmu_buf_will_dirty(bpl->bpl_cached_dbuf, tx);
202	bparray = bpl->bpl_cached_dbuf->db_data;
203	bparray[off] = *bp;
204
205	/* We never need the fill count. */
206	bparray[off].blk_fill = 0;
207
208	/* The bplist will compress better if we can leave off the checksum */
209	bzero(&bparray[off].blk_cksum, sizeof (bparray[off].blk_cksum));
210
211	dmu_buf_will_dirty(bpl->bpl_dbuf, tx);
212	bpl->bpl_phys->bpl_entries++;
213	bpl->bpl_phys->bpl_bytes += BP_GET_ASIZE(bp);
214	mutex_exit(&bpl->bpl_lock);
215
216	return (0);
217}
218
219/*
220 * Deferred entry; will be written later by bplist_sync().
221 */
222void
223bplist_enqueue_deferred(bplist_t *bpl, blkptr_t *bp)
224{
225	bplist_q_t *bpq = kmem_alloc(sizeof (*bpq), KM_SLEEP);
226
227	ASSERT(!BP_IS_HOLE(bp));
228	mutex_enter(&bpl->bpl_lock);
229	bpq->bpq_blk = *bp;
230	bpq->bpq_next = bpl->bpl_queue;
231	bpl->bpl_queue = bpq;
232	mutex_exit(&bpl->bpl_lock);
233}
234
235void
236bplist_sync(bplist_t *bpl, dmu_tx_t *tx)
237{
238	bplist_q_t *bpq;
239
240	mutex_enter(&bpl->bpl_lock);
241	while ((bpq = bpl->bpl_queue) != NULL) {
242		bpl->bpl_queue = bpq->bpq_next;
243		mutex_exit(&bpl->bpl_lock);
244		VERIFY(0 == bplist_enqueue(bpl, &bpq->bpq_blk, tx));
245		kmem_free(bpq, sizeof (*bpq));
246		mutex_enter(&bpl->bpl_lock);
247	}
248	mutex_exit(&bpl->bpl_lock);
249}
250
251void
252bplist_vacate(bplist_t *bpl, dmu_tx_t *tx)
253{
254	mutex_enter(&bpl->bpl_lock);
255	ASSERT3P(bpl->bpl_queue, ==, NULL);
256	VERIFY(0 == bplist_hold(bpl));
257	dmu_buf_will_dirty(bpl->bpl_dbuf, tx);
258	VERIFY(0 == dmu_free_range(bpl->bpl_mos,
259	    bpl->bpl_object, 0, -1ULL, tx));
260	bpl->bpl_phys->bpl_entries = 0;
261	bpl->bpl_phys->bpl_bytes = 0;
262	mutex_exit(&bpl->bpl_lock);
263}
264