zap_micro.c revision 209962
1168404Spjd/*
2168404Spjd * CDDL HEADER START
3168404Spjd *
4168404Spjd * The contents of this file are subject to the terms of the
5168404Spjd * Common Development and Distribution License (the "License").
6168404Spjd * You may not use this file except in compliance with the License.
7168404Spjd *
8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9168404Spjd * or http://www.opensolaris.org/os/licensing.
10168404Spjd * See the License for the specific language governing permissions
11168404Spjd * and limitations under the License.
12168404Spjd *
13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each
14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15168404Spjd * If applicable, add the following below this CDDL HEADER, with the
16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying
17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner]
18168404Spjd *
19168404Spjd * CDDL HEADER END
20168404Spjd */
21168404Spjd/*
22185029Spjd * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23168404Spjd * Use is subject to license terms.
24168404Spjd */
25168404Spjd
26168404Spjd#pragma ident	"%Z%%M%	%I%	%E% SMI"
27168404Spjd
28168404Spjd#include <sys/spa.h>
29168404Spjd#include <sys/dmu.h>
30168404Spjd#include <sys/zfs_context.h>
31168404Spjd#include <sys/zap.h>
32168404Spjd#include <sys/refcount.h>
33168404Spjd#include <sys/zap_impl.h>
34168404Spjd#include <sys/zap_leaf.h>
35168404Spjd#include <sys/avl.h>
36168404Spjd
37185029Spjd#ifdef _KERNEL
38185029Spjd#include <sys/sunddi.h>
39185029Spjd#endif
40168404Spjd
41185029Spjdstatic int mzap_upgrade(zap_t **zapp, dmu_tx_t *tx);
42168404Spjd
43168404Spjd
44185029Spjdstatic uint64_t
45185029Spjdzap_hash(zap_t *zap, const char *normname)
46185029Spjd{
47185029Spjd	const uint8_t *cp;
48185029Spjd	uint8_t c;
49185029Spjd	uint64_t crc = zap->zap_salt;
50185029Spjd
51185029Spjd	/* NB: name must already be normalized, if necessary */
52185029Spjd
53185029Spjd	ASSERT(crc != 0);
54185029Spjd	ASSERT(zfs_crc64_table[128] == ZFS_CRC64_POLY);
55185029Spjd	for (cp = (const uint8_t *)normname; (c = *cp) != '\0'; cp++) {
56185029Spjd		crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ c) & 0xFF];
57185029Spjd	}
58185029Spjd
59185029Spjd	/*
60185029Spjd	 * Only use 28 bits, since we need 4 bits in the cookie for the
61185029Spjd	 * collision differentiator.  We MUST use the high bits, since
62185029Spjd	 * those are the ones that we first pay attention to when
63185029Spjd	 * chosing the bucket.
64185029Spjd	 */
65185029Spjd	crc &= ~((1ULL << (64 - ZAP_HASHBITS)) - 1);
66185029Spjd
67185029Spjd	return (crc);
68185029Spjd}
69185029Spjd
70185029Spjdstatic int
71185029Spjdzap_normalize(zap_t *zap, const char *name, char *namenorm)
72185029Spjd{
73185029Spjd	size_t inlen, outlen;
74185029Spjd	int err;
75185029Spjd
76185029Spjd	inlen = strlen(name) + 1;
77185029Spjd	outlen = ZAP_MAXNAMELEN;
78185029Spjd
79185029Spjd	err = 0;
80185029Spjd	(void) u8_textprep_str((char *)name, &inlen, namenorm, &outlen,
81185029Spjd	    zap->zap_normflags | U8_TEXTPREP_IGNORE_NULL, U8_UNICODE_LATEST,
82185029Spjd	    &err);
83185029Spjd
84185029Spjd	return (err);
85185029Spjd}
86185029Spjd
87185029Spjdboolean_t
88185029Spjdzap_match(zap_name_t *zn, const char *matchname)
89185029Spjd{
90185029Spjd	if (zn->zn_matchtype == MT_FIRST) {
91185029Spjd		char norm[ZAP_MAXNAMELEN];
92185029Spjd
93185029Spjd		if (zap_normalize(zn->zn_zap, matchname, norm) != 0)
94185029Spjd			return (B_FALSE);
95185029Spjd
96185029Spjd		return (strcmp(zn->zn_name_norm, norm) == 0);
97185029Spjd	} else {
98185029Spjd		/* MT_BEST or MT_EXACT */
99185029Spjd		return (strcmp(zn->zn_name_orij, matchname) == 0);
100185029Spjd	}
101185029Spjd}
102185029Spjd
103185029Spjdvoid
104185029Spjdzap_name_free(zap_name_t *zn)
105185029Spjd{
106185029Spjd	kmem_free(zn, sizeof (zap_name_t));
107185029Spjd}
108185029Spjd
109185029Spjd/* XXX combine this with zap_lockdir()? */
110185029Spjdzap_name_t *
111185029Spjdzap_name_alloc(zap_t *zap, const char *name, matchtype_t mt)
112185029Spjd{
113185029Spjd	zap_name_t *zn = kmem_alloc(sizeof (zap_name_t), KM_SLEEP);
114185029Spjd
115185029Spjd	zn->zn_zap = zap;
116185029Spjd	zn->zn_name_orij = name;
117185029Spjd	zn->zn_matchtype = mt;
118185029Spjd	if (zap->zap_normflags) {
119185029Spjd		if (zap_normalize(zap, name, zn->zn_normbuf) != 0) {
120185029Spjd			zap_name_free(zn);
121185029Spjd			return (NULL);
122185029Spjd		}
123185029Spjd		zn->zn_name_norm = zn->zn_normbuf;
124185029Spjd	} else {
125185029Spjd		if (mt != MT_EXACT) {
126185029Spjd			zap_name_free(zn);
127185029Spjd			return (NULL);
128185029Spjd		}
129185029Spjd		zn->zn_name_norm = zn->zn_name_orij;
130185029Spjd	}
131185029Spjd
132185029Spjd	zn->zn_hash = zap_hash(zap, zn->zn_name_norm);
133185029Spjd	return (zn);
134185029Spjd}
135185029Spjd
136168404Spjdstatic void
137168404Spjdmzap_byteswap(mzap_phys_t *buf, size_t size)
138168404Spjd{
139168404Spjd	int i, max;
140168404Spjd	buf->mz_block_type = BSWAP_64(buf->mz_block_type);
141168404Spjd	buf->mz_salt = BSWAP_64(buf->mz_salt);
142185029Spjd	buf->mz_normflags = BSWAP_64(buf->mz_normflags);
143168404Spjd	max = (size / MZAP_ENT_LEN) - 1;
144168404Spjd	for (i = 0; i < max; i++) {
145168404Spjd		buf->mz_chunk[i].mze_value =
146168404Spjd		    BSWAP_64(buf->mz_chunk[i].mze_value);
147168404Spjd		buf->mz_chunk[i].mze_cd =
148168404Spjd		    BSWAP_32(buf->mz_chunk[i].mze_cd);
149168404Spjd	}
150168404Spjd}
151168404Spjd
152168404Spjdvoid
153168404Spjdzap_byteswap(void *buf, size_t size)
154168404Spjd{
155168404Spjd	uint64_t block_type;
156168404Spjd
157168404Spjd	block_type = *(uint64_t *)buf;
158168404Spjd
159168404Spjd	if (block_type == ZBT_MICRO || block_type == BSWAP_64(ZBT_MICRO)) {
160168404Spjd		/* ASSERT(magic == ZAP_LEAF_MAGIC); */
161168404Spjd		mzap_byteswap(buf, size);
162168404Spjd	} else {
163168404Spjd		fzap_byteswap(buf, size);
164168404Spjd	}
165168404Spjd}
166168404Spjd
167168404Spjdstatic int
168168404Spjdmze_compare(const void *arg1, const void *arg2)
169168404Spjd{
170168404Spjd	const mzap_ent_t *mze1 = arg1;
171168404Spjd	const mzap_ent_t *mze2 = arg2;
172168404Spjd
173168404Spjd	if (mze1->mze_hash > mze2->mze_hash)
174168404Spjd		return (+1);
175168404Spjd	if (mze1->mze_hash < mze2->mze_hash)
176168404Spjd		return (-1);
177168404Spjd	if (mze1->mze_phys.mze_cd > mze2->mze_phys.mze_cd)
178168404Spjd		return (+1);
179168404Spjd	if (mze1->mze_phys.mze_cd < mze2->mze_phys.mze_cd)
180168404Spjd		return (-1);
181168404Spjd	return (0);
182168404Spjd}
183168404Spjd
184197150Spjdstatic int
185168404Spjdmze_insert(zap_t *zap, int chunkid, uint64_t hash, mzap_ent_phys_t *mzep)
186168404Spjd{
187168404Spjd	mzap_ent_t *mze;
188197150Spjd	avl_index_t idx;
189168404Spjd
190168404Spjd	ASSERT(zap->zap_ismicro);
191168404Spjd	ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
192168404Spjd	ASSERT(mzep->mze_cd < ZAP_MAXCD);
193168404Spjd
194168404Spjd	mze = kmem_alloc(sizeof (mzap_ent_t), KM_SLEEP);
195168404Spjd	mze->mze_chunkid = chunkid;
196168404Spjd	mze->mze_hash = hash;
197168404Spjd	mze->mze_phys = *mzep;
198197150Spjd	if (avl_find(&zap->zap_m.zap_avl, mze, &idx) != NULL) {
199197150Spjd		kmem_free(mze, sizeof (mzap_ent_t));
200197150Spjd		return (EEXIST);
201197150Spjd	}
202197150Spjd	avl_insert(&zap->zap_m.zap_avl, mze, idx);
203197150Spjd	return (0);
204168404Spjd}
205168404Spjd
206168404Spjdstatic mzap_ent_t *
207185029Spjdmze_find(zap_name_t *zn)
208168404Spjd{
209168404Spjd	mzap_ent_t mze_tofind;
210168404Spjd	mzap_ent_t *mze;
211168404Spjd	avl_index_t idx;
212185029Spjd	avl_tree_t *avl = &zn->zn_zap->zap_m.zap_avl;
213168404Spjd
214185029Spjd	ASSERT(zn->zn_zap->zap_ismicro);
215185029Spjd	ASSERT(RW_LOCK_HELD(&zn->zn_zap->zap_rwlock));
216168404Spjd
217185029Spjd	if (strlen(zn->zn_name_norm) >= sizeof (mze_tofind.mze_phys.mze_name))
218168404Spjd		return (NULL);
219168404Spjd
220185029Spjd	mze_tofind.mze_hash = zn->zn_hash;
221168404Spjd	mze_tofind.mze_phys.mze_cd = 0;
222168404Spjd
223185029Spjdagain:
224168404Spjd	mze = avl_find(avl, &mze_tofind, &idx);
225168404Spjd	if (mze == NULL)
226168404Spjd		mze = avl_nearest(avl, idx, AVL_AFTER);
227185029Spjd	for (; mze && mze->mze_hash == zn->zn_hash; mze = AVL_NEXT(avl, mze)) {
228185029Spjd		if (zap_match(zn, mze->mze_phys.mze_name))
229168404Spjd			return (mze);
230168404Spjd	}
231185029Spjd	if (zn->zn_matchtype == MT_BEST) {
232185029Spjd		zn->zn_matchtype = MT_FIRST;
233185029Spjd		goto again;
234185029Spjd	}
235168404Spjd	return (NULL);
236168404Spjd}
237168404Spjd
238168404Spjdstatic uint32_t
239168404Spjdmze_find_unused_cd(zap_t *zap, uint64_t hash)
240168404Spjd{
241168404Spjd	mzap_ent_t mze_tofind;
242168404Spjd	mzap_ent_t *mze;
243168404Spjd	avl_index_t idx;
244168404Spjd	avl_tree_t *avl = &zap->zap_m.zap_avl;
245168404Spjd	uint32_t cd;
246168404Spjd
247168404Spjd	ASSERT(zap->zap_ismicro);
248168404Spjd	ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
249168404Spjd
250168404Spjd	mze_tofind.mze_hash = hash;
251168404Spjd	mze_tofind.mze_phys.mze_cd = 0;
252168404Spjd
253168404Spjd	cd = 0;
254168404Spjd	for (mze = avl_find(avl, &mze_tofind, &idx);
255168404Spjd	    mze && mze->mze_hash == hash; mze = AVL_NEXT(avl, mze)) {
256168404Spjd		if (mze->mze_phys.mze_cd != cd)
257168404Spjd			break;
258168404Spjd		cd++;
259168404Spjd	}
260168404Spjd
261168404Spjd	return (cd);
262168404Spjd}
263168404Spjd
264168404Spjdstatic void
265168404Spjdmze_remove(zap_t *zap, mzap_ent_t *mze)
266168404Spjd{
267168404Spjd	ASSERT(zap->zap_ismicro);
268168404Spjd	ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
269168404Spjd
270168404Spjd	avl_remove(&zap->zap_m.zap_avl, mze);
271168404Spjd	kmem_free(mze, sizeof (mzap_ent_t));
272168404Spjd}
273168404Spjd
274168404Spjdstatic void
275168404Spjdmze_destroy(zap_t *zap)
276168404Spjd{
277168404Spjd	mzap_ent_t *mze;
278168404Spjd	void *avlcookie = NULL;
279168404Spjd
280168404Spjd	while (mze = avl_destroy_nodes(&zap->zap_m.zap_avl, &avlcookie))
281168404Spjd		kmem_free(mze, sizeof (mzap_ent_t));
282168404Spjd	avl_destroy(&zap->zap_m.zap_avl);
283168404Spjd}
284168404Spjd
285168404Spjdstatic zap_t *
286168404Spjdmzap_open(objset_t *os, uint64_t obj, dmu_buf_t *db)
287168404Spjd{
288168404Spjd	zap_t *winner;
289168404Spjd	zap_t *zap;
290168404Spjd	int i;
291168404Spjd
292168404Spjd	ASSERT3U(MZAP_ENT_LEN, ==, sizeof (mzap_ent_phys_t));
293168404Spjd
294168404Spjd	zap = kmem_zalloc(sizeof (zap_t), KM_SLEEP);
295168404Spjd	rw_init(&zap->zap_rwlock, NULL, RW_DEFAULT, 0);
296168404Spjd	rw_enter(&zap->zap_rwlock, RW_WRITER);
297168404Spjd	zap->zap_objset = os;
298168404Spjd	zap->zap_object = obj;
299168404Spjd	zap->zap_dbuf = db;
300168404Spjd
301185029Spjd	if (*(uint64_t *)db->db_data != ZBT_MICRO) {
302168404Spjd		mutex_init(&zap->zap_f.zap_num_entries_mtx, NULL,
303168404Spjd		    MUTEX_DEFAULT, 0);
304168404Spjd		zap->zap_f.zap_block_shift = highbit(db->db_size) - 1;
305168404Spjd	} else {
306168404Spjd		zap->zap_ismicro = TRUE;
307168404Spjd	}
308168404Spjd
309168404Spjd	/*
310168404Spjd	 * Make sure that zap_ismicro is set before we let others see
311168404Spjd	 * it, because zap_lockdir() checks zap_ismicro without the lock
312168404Spjd	 * held.
313168404Spjd	 */
314168404Spjd	winner = dmu_buf_set_user(db, zap, &zap->zap_m.zap_phys, zap_evict);
315168404Spjd
316168404Spjd	if (winner != NULL) {
317172443Spjd		rw_exit(&zap->zap_rwlock);
318172443Spjd		rw_destroy(&zap->zap_rwlock);
319168404Spjd		if (!zap->zap_ismicro)
320168404Spjd			mutex_destroy(&zap->zap_f.zap_num_entries_mtx);
321168404Spjd		kmem_free(zap, sizeof (zap_t));
322168404Spjd		return (winner);
323168404Spjd	}
324168404Spjd
325168404Spjd	if (zap->zap_ismicro) {
326168404Spjd		zap->zap_salt = zap->zap_m.zap_phys->mz_salt;
327185029Spjd		zap->zap_normflags = zap->zap_m.zap_phys->mz_normflags;
328168404Spjd		zap->zap_m.zap_num_chunks = db->db_size / MZAP_ENT_LEN - 1;
329168404Spjd		avl_create(&zap->zap_m.zap_avl, mze_compare,
330168404Spjd		    sizeof (mzap_ent_t), offsetof(mzap_ent_t, mze_node));
331168404Spjd
332168404Spjd		for (i = 0; i < zap->zap_m.zap_num_chunks; i++) {
333168404Spjd			mzap_ent_phys_t *mze =
334168404Spjd			    &zap->zap_m.zap_phys->mz_chunk[i];
335168404Spjd			if (mze->mze_name[0]) {
336185029Spjd				zap_name_t *zn;
337185029Spjd
338185029Spjd				zn = zap_name_alloc(zap, mze->mze_name,
339185029Spjd				    MT_EXACT);
340197150Spjd				if (mze_insert(zap, i, zn->zn_hash, mze) == 0)
341197150Spjd					zap->zap_m.zap_num_entries++;
342197150Spjd				else {
343197150Spjd					printf("ZFS WARNING: Duplicated ZAP "
344197172Spjd					    "entry detected (%s).\n",
345197150Spjd					    mze->mze_name);
346197150Spjd				}
347185029Spjd				zap_name_free(zn);
348168404Spjd			}
349168404Spjd		}
350168404Spjd	} else {
351168404Spjd		zap->zap_salt = zap->zap_f.zap_phys->zap_salt;
352185029Spjd		zap->zap_normflags = zap->zap_f.zap_phys->zap_normflags;
353168404Spjd
354168404Spjd		ASSERT3U(sizeof (struct zap_leaf_header), ==,
355168404Spjd		    2*ZAP_LEAF_CHUNKSIZE);
356168404Spjd
357168404Spjd		/*
358168404Spjd		 * The embedded pointer table should not overlap the
359168404Spjd		 * other members.
360168404Spjd		 */
361168404Spjd		ASSERT3P(&ZAP_EMBEDDED_PTRTBL_ENT(zap, 0), >,
362168404Spjd		    &zap->zap_f.zap_phys->zap_salt);
363168404Spjd
364168404Spjd		/*
365168404Spjd		 * The embedded pointer table should end at the end of
366168404Spjd		 * the block
367168404Spjd		 */
368168404Spjd		ASSERT3U((uintptr_t)&ZAP_EMBEDDED_PTRTBL_ENT(zap,
369168404Spjd		    1<<ZAP_EMBEDDED_PTRTBL_SHIFT(zap)) -
370168404Spjd		    (uintptr_t)zap->zap_f.zap_phys, ==,
371168404Spjd		    zap->zap_dbuf->db_size);
372168404Spjd	}
373168404Spjd	rw_exit(&zap->zap_rwlock);
374168404Spjd	return (zap);
375168404Spjd}
376168404Spjd
377168404Spjdint
378168404Spjdzap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx,
379185029Spjd    krw_t lti, boolean_t fatreader, boolean_t adding, zap_t **zapp)
380168404Spjd{
381168404Spjd	zap_t *zap;
382168404Spjd	dmu_buf_t *db;
383168404Spjd	krw_t lt;
384168404Spjd	int err;
385168404Spjd
386168404Spjd	*zapp = NULL;
387168404Spjd
388168404Spjd	err = dmu_buf_hold(os, obj, 0, NULL, &db);
389168404Spjd	if (err)
390168404Spjd		return (err);
391168404Spjd
392168404Spjd#ifdef ZFS_DEBUG
393168404Spjd	{
394168404Spjd		dmu_object_info_t doi;
395168404Spjd		dmu_object_info_from_db(db, &doi);
396168404Spjd		ASSERT(dmu_ot[doi.doi_type].ot_byteswap == zap_byteswap);
397168404Spjd	}
398168404Spjd#endif
399168404Spjd
400168404Spjd	zap = dmu_buf_get_user(db);
401168404Spjd	if (zap == NULL)
402168404Spjd		zap = mzap_open(os, obj, db);
403168404Spjd
404168404Spjd	/*
405168404Spjd	 * We're checking zap_ismicro without the lock held, in order to
406168404Spjd	 * tell what type of lock we want.  Once we have some sort of
407168404Spjd	 * lock, see if it really is the right type.  In practice this
408168404Spjd	 * can only be different if it was upgraded from micro to fat,
409168404Spjd	 * and micro wanted WRITER but fat only needs READER.
410168404Spjd	 */
411168404Spjd	lt = (!zap->zap_ismicro && fatreader) ? RW_READER : lti;
412168404Spjd	rw_enter(&zap->zap_rwlock, lt);
413168404Spjd	if (lt != ((!zap->zap_ismicro && fatreader) ? RW_READER : lti)) {
414168404Spjd		/* it was upgraded, now we only need reader */
415168404Spjd		ASSERT(lt == RW_WRITER);
416168404Spjd		ASSERT(RW_READER ==
417168404Spjd		    (!zap->zap_ismicro && fatreader) ? RW_READER : lti);
418168404Spjd		rw_downgrade(&zap->zap_rwlock);
419168404Spjd		lt = RW_READER;
420168404Spjd	}
421168404Spjd
422168404Spjd	zap->zap_objset = os;
423168404Spjd
424168404Spjd	if (lt == RW_WRITER)
425168404Spjd		dmu_buf_will_dirty(db, tx);
426168404Spjd
427168404Spjd	ASSERT3P(zap->zap_dbuf, ==, db);
428168404Spjd
429168404Spjd	ASSERT(!zap->zap_ismicro ||
430168404Spjd	    zap->zap_m.zap_num_entries <= zap->zap_m.zap_num_chunks);
431185029Spjd	if (zap->zap_ismicro && tx && adding &&
432168404Spjd	    zap->zap_m.zap_num_entries == zap->zap_m.zap_num_chunks) {
433168404Spjd		uint64_t newsz = db->db_size + SPA_MINBLOCKSIZE;
434168404Spjd		if (newsz > MZAP_MAX_BLKSZ) {
435168404Spjd			dprintf("upgrading obj %llu: num_entries=%u\n",
436168404Spjd			    obj, zap->zap_m.zap_num_entries);
437168404Spjd			*zapp = zap;
438185029Spjd			return (mzap_upgrade(zapp, tx));
439168404Spjd		}
440168404Spjd		err = dmu_object_set_blocksize(os, obj, newsz, 0, tx);
441168404Spjd		ASSERT3U(err, ==, 0);
442168404Spjd		zap->zap_m.zap_num_chunks =
443168404Spjd		    db->db_size / MZAP_ENT_LEN - 1;
444168404Spjd	}
445168404Spjd
446168404Spjd	*zapp = zap;
447168404Spjd	return (0);
448168404Spjd}
449168404Spjd
450168404Spjdvoid
451168404Spjdzap_unlockdir(zap_t *zap)
452168404Spjd{
453168404Spjd	rw_exit(&zap->zap_rwlock);
454168404Spjd	dmu_buf_rele(zap->zap_dbuf, NULL);
455168404Spjd}
456168404Spjd
457185029Spjdstatic int
458185029Spjdmzap_upgrade(zap_t **zapp, dmu_tx_t *tx)
459168404Spjd{
460168404Spjd	mzap_phys_t *mzp;
461168404Spjd	int i, sz, nchunks, err;
462185029Spjd	zap_t *zap = *zapp;
463168404Spjd
464168404Spjd	ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
465168404Spjd
466168404Spjd	sz = zap->zap_dbuf->db_size;
467168404Spjd	mzp = kmem_alloc(sz, KM_SLEEP);
468168404Spjd	bcopy(zap->zap_dbuf->db_data, mzp, sz);
469168404Spjd	nchunks = zap->zap_m.zap_num_chunks;
470168404Spjd
471168404Spjd	err = dmu_object_set_blocksize(zap->zap_objset, zap->zap_object,
472168404Spjd	    1ULL << fzap_default_block_shift, 0, tx);
473185029Spjd	if (err) {
474185029Spjd		kmem_free(mzp, sz);
475185029Spjd		return (err);
476185029Spjd	}
477168404Spjd
478168404Spjd	dprintf("upgrading obj=%llu with %u chunks\n",
479168404Spjd	    zap->zap_object, nchunks);
480185029Spjd	/* XXX destroy the avl later, so we can use the stored hash value */
481168404Spjd	mze_destroy(zap);
482168404Spjd
483168404Spjd	fzap_upgrade(zap, tx);
484168404Spjd
485168404Spjd	for (i = 0; i < nchunks; i++) {
486168404Spjd		int err;
487168404Spjd		mzap_ent_phys_t *mze = &mzp->mz_chunk[i];
488185029Spjd		zap_name_t *zn;
489168404Spjd		if (mze->mze_name[0] == 0)
490168404Spjd			continue;
491168404Spjd		dprintf("adding %s=%llu\n",
492168404Spjd		    mze->mze_name, mze->mze_value);
493185029Spjd		zn = zap_name_alloc(zap, mze->mze_name, MT_EXACT);
494185029Spjd		err = fzap_add_cd(zn, 8, 1, &mze->mze_value, mze->mze_cd, tx);
495185029Spjd		zap = zn->zn_zap;	/* fzap_add_cd() may change zap */
496185029Spjd		zap_name_free(zn);
497185029Spjd		if (err)
498185029Spjd			break;
499168404Spjd	}
500168404Spjd	kmem_free(mzp, sz);
501185029Spjd	*zapp = zap;
502185029Spjd	return (err);
503168404Spjd}
504168404Spjd
505168404Spjdstatic void
506185029Spjdmzap_create_impl(objset_t *os, uint64_t obj, int normflags, dmu_tx_t *tx)
507168404Spjd{
508168404Spjd	dmu_buf_t *db;
509168404Spjd	mzap_phys_t *zp;
510168404Spjd
511168404Spjd	VERIFY(0 == dmu_buf_hold(os, obj, 0, FTAG, &db));
512168404Spjd
513168404Spjd#ifdef ZFS_DEBUG
514168404Spjd	{
515168404Spjd		dmu_object_info_t doi;
516168404Spjd		dmu_object_info_from_db(db, &doi);
517168404Spjd		ASSERT(dmu_ot[doi.doi_type].ot_byteswap == zap_byteswap);
518168404Spjd	}
519168404Spjd#endif
520168404Spjd
521168404Spjd	dmu_buf_will_dirty(db, tx);
522168404Spjd	zp = db->db_data;
523168404Spjd	zp->mz_block_type = ZBT_MICRO;
524168404Spjd	zp->mz_salt = ((uintptr_t)db ^ (uintptr_t)tx ^ (obj << 1)) | 1ULL;
525185029Spjd	zp->mz_normflags = normflags;
526168404Spjd	dmu_buf_rele(db, FTAG);
527168404Spjd}
528168404Spjd
529168404Spjdint
530168404Spjdzap_create_claim(objset_t *os, uint64_t obj, dmu_object_type_t ot,
531168404Spjd    dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
532168404Spjd{
533185029Spjd	return (zap_create_claim_norm(os, obj,
534185029Spjd	    0, ot, bonustype, bonuslen, tx));
535185029Spjd}
536185029Spjd
537185029Spjdint
538185029Spjdzap_create_claim_norm(objset_t *os, uint64_t obj, int normflags,
539185029Spjd    dmu_object_type_t ot,
540185029Spjd    dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
541185029Spjd{
542168404Spjd	int err;
543168404Spjd
544168404Spjd	err = dmu_object_claim(os, obj, ot, 0, bonustype, bonuslen, tx);
545168404Spjd	if (err != 0)
546168404Spjd		return (err);
547185029Spjd	mzap_create_impl(os, obj, normflags, tx);
548168404Spjd	return (0);
549168404Spjd}
550168404Spjd
551168404Spjduint64_t
552168404Spjdzap_create(objset_t *os, dmu_object_type_t ot,
553168404Spjd    dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
554168404Spjd{
555185029Spjd	return (zap_create_norm(os, 0, ot, bonustype, bonuslen, tx));
556185029Spjd}
557185029Spjd
558185029Spjduint64_t
559185029Spjdzap_create_norm(objset_t *os, int normflags, dmu_object_type_t ot,
560185029Spjd    dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
561185029Spjd{
562168404Spjd	uint64_t obj = dmu_object_alloc(os, ot, 0, bonustype, bonuslen, tx);
563168404Spjd
564185029Spjd	mzap_create_impl(os, obj, normflags, tx);
565168404Spjd	return (obj);
566168404Spjd}
567168404Spjd
568168404Spjdint
569168404Spjdzap_destroy(objset_t *os, uint64_t zapobj, dmu_tx_t *tx)
570168404Spjd{
571168404Spjd	/*
572168404Spjd	 * dmu_object_free will free the object number and free the
573168404Spjd	 * data.  Freeing the data will cause our pageout function to be
574168404Spjd	 * called, which will destroy our data (zap_leaf_t's and zap_t).
575168404Spjd	 */
576168404Spjd
577168404Spjd	return (dmu_object_free(os, zapobj, tx));
578168404Spjd}
579168404Spjd
580168404Spjd_NOTE(ARGSUSED(0))
581168404Spjdvoid
582168404Spjdzap_evict(dmu_buf_t *db, void *vzap)
583168404Spjd{
584168404Spjd	zap_t *zap = vzap;
585168404Spjd
586168404Spjd	rw_destroy(&zap->zap_rwlock);
587168404Spjd
588168404Spjd	if (zap->zap_ismicro)
589168404Spjd		mze_destroy(zap);
590168404Spjd	else
591168404Spjd		mutex_destroy(&zap->zap_f.zap_num_entries_mtx);
592168404Spjd
593168404Spjd	kmem_free(zap, sizeof (zap_t));
594168404Spjd}
595168404Spjd
596168404Spjdint
597168404Spjdzap_count(objset_t *os, uint64_t zapobj, uint64_t *count)
598168404Spjd{
599168404Spjd	zap_t *zap;
600168404Spjd	int err;
601168404Spjd
602185029Spjd	err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap);
603168404Spjd	if (err)
604168404Spjd		return (err);
605168404Spjd	if (!zap->zap_ismicro) {
606168404Spjd		err = fzap_count(zap, count);
607168404Spjd	} else {
608168404Spjd		*count = zap->zap_m.zap_num_entries;
609168404Spjd	}
610168404Spjd	zap_unlockdir(zap);
611168404Spjd	return (err);
612168404Spjd}
613168404Spjd
614168404Spjd/*
615185029Spjd * zn may be NULL; if not specified, it will be computed if needed.
616185029Spjd * See also the comment above zap_entry_normalization_conflict().
617168404Spjd */
618185029Spjdstatic boolean_t
619185029Spjdmzap_normalization_conflict(zap_t *zap, zap_name_t *zn, mzap_ent_t *mze)
620185029Spjd{
621185029Spjd	mzap_ent_t *other;
622185029Spjd	int direction = AVL_BEFORE;
623185029Spjd	boolean_t allocdzn = B_FALSE;
624168404Spjd
625185029Spjd	if (zap->zap_normflags == 0)
626185029Spjd		return (B_FALSE);
627185029Spjd
628185029Spjdagain:
629185029Spjd	for (other = avl_walk(&zap->zap_m.zap_avl, mze, direction);
630185029Spjd	    other && other->mze_hash == mze->mze_hash;
631185029Spjd	    other = avl_walk(&zap->zap_m.zap_avl, other, direction)) {
632185029Spjd
633185029Spjd		if (zn == NULL) {
634185029Spjd			zn = zap_name_alloc(zap, mze->mze_phys.mze_name,
635185029Spjd			    MT_FIRST);
636185029Spjd			allocdzn = B_TRUE;
637185029Spjd		}
638185029Spjd		if (zap_match(zn, other->mze_phys.mze_name)) {
639185029Spjd			if (allocdzn)
640185029Spjd				zap_name_free(zn);
641185029Spjd			return (B_TRUE);
642185029Spjd		}
643185029Spjd	}
644185029Spjd
645185029Spjd	if (direction == AVL_BEFORE) {
646185029Spjd		direction = AVL_AFTER;
647185029Spjd		goto again;
648185029Spjd	}
649185029Spjd
650185029Spjd	if (allocdzn)
651185029Spjd		zap_name_free(zn);
652185029Spjd	return (B_FALSE);
653185029Spjd}
654185029Spjd
655185029Spjd/*
656185029Spjd * Routines for manipulating attributes.
657185029Spjd */
658185029Spjd
659168404Spjdint
660168404Spjdzap_lookup(objset_t *os, uint64_t zapobj, const char *name,
661168404Spjd    uint64_t integer_size, uint64_t num_integers, void *buf)
662168404Spjd{
663185029Spjd	return (zap_lookup_norm(os, zapobj, name, integer_size,
664185029Spjd	    num_integers, buf, MT_EXACT, NULL, 0, NULL));
665185029Spjd}
666185029Spjd
667185029Spjdint
668185029Spjdzap_lookup_norm(objset_t *os, uint64_t zapobj, const char *name,
669185029Spjd    uint64_t integer_size, uint64_t num_integers, void *buf,
670185029Spjd    matchtype_t mt, char *realname, int rn_len,
671185029Spjd    boolean_t *ncp)
672185029Spjd{
673168404Spjd	zap_t *zap;
674168404Spjd	int err;
675168404Spjd	mzap_ent_t *mze;
676185029Spjd	zap_name_t *zn;
677168404Spjd
678185029Spjd	err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap);
679168404Spjd	if (err)
680168404Spjd		return (err);
681185029Spjd	zn = zap_name_alloc(zap, name, mt);
682185029Spjd	if (zn == NULL) {
683185029Spjd		zap_unlockdir(zap);
684185029Spjd		return (ENOTSUP);
685185029Spjd	}
686185029Spjd
687168404Spjd	if (!zap->zap_ismicro) {
688185029Spjd		err = fzap_lookup(zn, integer_size, num_integers, buf,
689185029Spjd		    realname, rn_len, ncp);
690168404Spjd	} else {
691185029Spjd		mze = mze_find(zn);
692168404Spjd		if (mze == NULL) {
693168404Spjd			err = ENOENT;
694168404Spjd		} else {
695185029Spjd			if (num_integers < 1) {
696168404Spjd				err = EOVERFLOW;
697185029Spjd			} else if (integer_size != 8) {
698168404Spjd				err = EINVAL;
699185029Spjd			} else {
700168404Spjd				*(uint64_t *)buf = mze->mze_phys.mze_value;
701185029Spjd				(void) strlcpy(realname,
702185029Spjd				    mze->mze_phys.mze_name, rn_len);
703185029Spjd				if (ncp) {
704185029Spjd					*ncp = mzap_normalization_conflict(zap,
705185029Spjd					    zn, mze);
706185029Spjd				}
707185029Spjd			}
708168404Spjd		}
709168404Spjd	}
710185029Spjd	zap_name_free(zn);
711168404Spjd	zap_unlockdir(zap);
712168404Spjd	return (err);
713168404Spjd}
714168404Spjd
715168404Spjdint
716168404Spjdzap_length(objset_t *os, uint64_t zapobj, const char *name,
717168404Spjd    uint64_t *integer_size, uint64_t *num_integers)
718168404Spjd{
719168404Spjd	zap_t *zap;
720168404Spjd	int err;
721168404Spjd	mzap_ent_t *mze;
722185029Spjd	zap_name_t *zn;
723168404Spjd
724185029Spjd	err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap);
725168404Spjd	if (err)
726168404Spjd		return (err);
727185029Spjd	zn = zap_name_alloc(zap, name, MT_EXACT);
728185029Spjd	if (zn == NULL) {
729185029Spjd		zap_unlockdir(zap);
730185029Spjd		return (ENOTSUP);
731185029Spjd	}
732168404Spjd	if (!zap->zap_ismicro) {
733185029Spjd		err = fzap_length(zn, integer_size, num_integers);
734168404Spjd	} else {
735185029Spjd		mze = mze_find(zn);
736168404Spjd		if (mze == NULL) {
737168404Spjd			err = ENOENT;
738168404Spjd		} else {
739168404Spjd			if (integer_size)
740168404Spjd				*integer_size = 8;
741168404Spjd			if (num_integers)
742168404Spjd				*num_integers = 1;
743168404Spjd		}
744168404Spjd	}
745185029Spjd	zap_name_free(zn);
746168404Spjd	zap_unlockdir(zap);
747168404Spjd	return (err);
748168404Spjd}
749168404Spjd
750168404Spjdstatic void
751185029Spjdmzap_addent(zap_name_t *zn, uint64_t value)
752168404Spjd{
753168404Spjd	int i;
754185029Spjd	zap_t *zap = zn->zn_zap;
755168404Spjd	int start = zap->zap_m.zap_alloc_next;
756168404Spjd	uint32_t cd;
757168404Spjd
758185029Spjd	dprintf("obj=%llu %s=%llu\n", zap->zap_object,
759185029Spjd	    zn->zn_name_orij, value);
760168404Spjd	ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
761168404Spjd
762168404Spjd#ifdef ZFS_DEBUG
763168404Spjd	for (i = 0; i < zap->zap_m.zap_num_chunks; i++) {
764168404Spjd		mzap_ent_phys_t *mze = &zap->zap_m.zap_phys->mz_chunk[i];
765185029Spjd		ASSERT(strcmp(zn->zn_name_orij, mze->mze_name) != 0);
766168404Spjd	}
767168404Spjd#endif
768168404Spjd
769185029Spjd	cd = mze_find_unused_cd(zap, zn->zn_hash);
770168404Spjd	/* given the limited size of the microzap, this can't happen */
771168404Spjd	ASSERT(cd != ZAP_MAXCD);
772168404Spjd
773168404Spjdagain:
774168404Spjd	for (i = start; i < zap->zap_m.zap_num_chunks; i++) {
775168404Spjd		mzap_ent_phys_t *mze = &zap->zap_m.zap_phys->mz_chunk[i];
776168404Spjd		if (mze->mze_name[0] == 0) {
777168404Spjd			mze->mze_value = value;
778168404Spjd			mze->mze_cd = cd;
779185029Spjd			(void) strcpy(mze->mze_name, zn->zn_name_orij);
780168404Spjd			zap->zap_m.zap_num_entries++;
781168404Spjd			zap->zap_m.zap_alloc_next = i+1;
782168404Spjd			if (zap->zap_m.zap_alloc_next ==
783168404Spjd			    zap->zap_m.zap_num_chunks)
784168404Spjd				zap->zap_m.zap_alloc_next = 0;
785197150Spjd			VERIFY(0 == mze_insert(zap, i, zn->zn_hash, mze));
786168404Spjd			return;
787168404Spjd		}
788168404Spjd	}
789168404Spjd	if (start != 0) {
790168404Spjd		start = 0;
791168404Spjd		goto again;
792168404Spjd	}
793168404Spjd	ASSERT(!"out of entries!");
794168404Spjd}
795168404Spjd
796168404Spjdint
797168404Spjdzap_add(objset_t *os, uint64_t zapobj, const char *name,
798168404Spjd    int integer_size, uint64_t num_integers,
799168404Spjd    const void *val, dmu_tx_t *tx)
800168404Spjd{
801168404Spjd	zap_t *zap;
802168404Spjd	int err;
803168404Spjd	mzap_ent_t *mze;
804168404Spjd	const uint64_t *intval = val;
805185029Spjd	zap_name_t *zn;
806168404Spjd
807185029Spjd	err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap);
808168404Spjd	if (err)
809168404Spjd		return (err);
810185029Spjd	zn = zap_name_alloc(zap, name, MT_EXACT);
811185029Spjd	if (zn == NULL) {
812185029Spjd		zap_unlockdir(zap);
813185029Spjd		return (ENOTSUP);
814185029Spjd	}
815168404Spjd	if (!zap->zap_ismicro) {
816185029Spjd		err = fzap_add(zn, integer_size, num_integers, val, tx);
817185029Spjd		zap = zn->zn_zap;	/* fzap_add() may change zap */
818168404Spjd	} else if (integer_size != 8 || num_integers != 1 ||
819168404Spjd	    strlen(name) >= MZAP_NAME_LEN) {
820168404Spjd		dprintf("upgrading obj %llu: intsz=%u numint=%llu name=%s\n",
821168404Spjd		    zapobj, integer_size, num_integers, name);
822185029Spjd		err = mzap_upgrade(&zn->zn_zap, tx);
823185029Spjd		if (err == 0)
824185029Spjd			err = fzap_add(zn, integer_size, num_integers, val, tx);
825185029Spjd		zap = zn->zn_zap;	/* fzap_add() may change zap */
826168404Spjd	} else {
827185029Spjd		mze = mze_find(zn);
828168404Spjd		if (mze != NULL) {
829168404Spjd			err = EEXIST;
830168404Spjd		} else {
831185029Spjd			mzap_addent(zn, *intval);
832168404Spjd		}
833168404Spjd	}
834185029Spjd	ASSERT(zap == zn->zn_zap);
835185029Spjd	zap_name_free(zn);
836185029Spjd	if (zap != NULL)	/* may be NULL if fzap_add() failed */
837185029Spjd		zap_unlockdir(zap);
838168404Spjd	return (err);
839168404Spjd}
840168404Spjd
841168404Spjdint
842168404Spjdzap_update(objset_t *os, uint64_t zapobj, const char *name,
843168404Spjd    int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx)
844168404Spjd{
845168404Spjd	zap_t *zap;
846168404Spjd	mzap_ent_t *mze;
847168404Spjd	const uint64_t *intval = val;
848185029Spjd	zap_name_t *zn;
849168404Spjd	int err;
850168404Spjd
851185029Spjd	err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap);
852168404Spjd	if (err)
853168404Spjd		return (err);
854185029Spjd	zn = zap_name_alloc(zap, name, MT_EXACT);
855185029Spjd	if (zn == NULL) {
856185029Spjd		zap_unlockdir(zap);
857185029Spjd		return (ENOTSUP);
858185029Spjd	}
859168404Spjd	if (!zap->zap_ismicro) {
860185029Spjd		err = fzap_update(zn, integer_size, num_integers, val, tx);
861185029Spjd		zap = zn->zn_zap;	/* fzap_update() may change zap */
862168404Spjd	} else if (integer_size != 8 || num_integers != 1 ||
863168404Spjd	    strlen(name) >= MZAP_NAME_LEN) {
864168404Spjd		dprintf("upgrading obj %llu: intsz=%u numint=%llu name=%s\n",
865168404Spjd		    zapobj, integer_size, num_integers, name);
866185029Spjd		err = mzap_upgrade(&zn->zn_zap, tx);
867185029Spjd		if (err == 0)
868185029Spjd			err = fzap_update(zn, integer_size, num_integers,
869185029Spjd			    val, tx);
870185029Spjd		zap = zn->zn_zap;	/* fzap_update() may change zap */
871168404Spjd	} else {
872185029Spjd		mze = mze_find(zn);
873168404Spjd		if (mze != NULL) {
874168404Spjd			mze->mze_phys.mze_value = *intval;
875168404Spjd			zap->zap_m.zap_phys->mz_chunk
876168404Spjd			    [mze->mze_chunkid].mze_value = *intval;
877168404Spjd		} else {
878185029Spjd			mzap_addent(zn, *intval);
879168404Spjd		}
880168404Spjd	}
881185029Spjd	ASSERT(zap == zn->zn_zap);
882185029Spjd	zap_name_free(zn);
883185029Spjd	if (zap != NULL)	/* may be NULL if fzap_upgrade() failed */
884185029Spjd		zap_unlockdir(zap);
885168404Spjd	return (err);
886168404Spjd}
887168404Spjd
888168404Spjdint
889168404Spjdzap_remove(objset_t *os, uint64_t zapobj, const char *name, dmu_tx_t *tx)
890168404Spjd{
891185029Spjd	return (zap_remove_norm(os, zapobj, name, MT_EXACT, tx));
892185029Spjd}
893185029Spjd
894185029Spjdint
895185029Spjdzap_remove_norm(objset_t *os, uint64_t zapobj, const char *name,
896185029Spjd    matchtype_t mt, dmu_tx_t *tx)
897185029Spjd{
898168404Spjd	zap_t *zap;
899168404Spjd	int err;
900168404Spjd	mzap_ent_t *mze;
901185029Spjd	zap_name_t *zn;
902168404Spjd
903185029Spjd	err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, FALSE, &zap);
904168404Spjd	if (err)
905168404Spjd		return (err);
906185029Spjd	zn = zap_name_alloc(zap, name, mt);
907185029Spjd	if (zn == NULL) {
908185029Spjd		zap_unlockdir(zap);
909185029Spjd		return (ENOTSUP);
910185029Spjd	}
911168404Spjd	if (!zap->zap_ismicro) {
912185029Spjd		err = fzap_remove(zn, tx);
913168404Spjd	} else {
914185029Spjd		mze = mze_find(zn);
915168404Spjd		if (mze == NULL) {
916168404Spjd			err = ENOENT;
917168404Spjd		} else {
918168404Spjd			zap->zap_m.zap_num_entries--;
919168404Spjd			bzero(&zap->zap_m.zap_phys->mz_chunk[mze->mze_chunkid],
920168404Spjd			    sizeof (mzap_ent_phys_t));
921168404Spjd			mze_remove(zap, mze);
922168404Spjd		}
923168404Spjd	}
924185029Spjd	zap_name_free(zn);
925168404Spjd	zap_unlockdir(zap);
926168404Spjd	return (err);
927168404Spjd}
928168404Spjd
929168404Spjd/*
930168404Spjd * Routines for iterating over the attributes.
931168404Spjd */
932168404Spjd
933168404Spjd/*
934168404Spjd * We want to keep the high 32 bits of the cursor zero if we can, so
935168404Spjd * that 32-bit programs can access this.  So use a small hash value so
936168404Spjd * we can fit 4 bits of cd into the 32-bit cursor.
937168404Spjd *
938168404Spjd * [ 4 zero bits | 32-bit collision differentiator | 28-bit hash value ]
939168404Spjd */
940168404Spjdvoid
941168404Spjdzap_cursor_init_serialized(zap_cursor_t *zc, objset_t *os, uint64_t zapobj,
942168404Spjd    uint64_t serialized)
943168404Spjd{
944168404Spjd	zc->zc_objset = os;
945168404Spjd	zc->zc_zap = NULL;
946168404Spjd	zc->zc_leaf = NULL;
947168404Spjd	zc->zc_zapobj = zapobj;
948168404Spjd	if (serialized == -1ULL) {
949168404Spjd		zc->zc_hash = -1ULL;
950168404Spjd		zc->zc_cd = 0;
951168404Spjd	} else {
952168404Spjd		zc->zc_hash = serialized << (64-ZAP_HASHBITS);
953168404Spjd		zc->zc_cd = serialized >> ZAP_HASHBITS;
954168404Spjd		if (zc->zc_cd >= ZAP_MAXCD) /* corrupt serialized */
955168404Spjd			zc->zc_cd = 0;
956168404Spjd	}
957168404Spjd}
958168404Spjd
959168404Spjdvoid
960168404Spjdzap_cursor_init(zap_cursor_t *zc, objset_t *os, uint64_t zapobj)
961168404Spjd{
962168404Spjd	zap_cursor_init_serialized(zc, os, zapobj, 0);
963168404Spjd}
964168404Spjd
965168404Spjdvoid
966168404Spjdzap_cursor_fini(zap_cursor_t *zc)
967168404Spjd{
968168404Spjd	if (zc->zc_zap) {
969168404Spjd		rw_enter(&zc->zc_zap->zap_rwlock, RW_READER);
970168404Spjd		zap_unlockdir(zc->zc_zap);
971168404Spjd		zc->zc_zap = NULL;
972168404Spjd	}
973168404Spjd	if (zc->zc_leaf) {
974168404Spjd		rw_enter(&zc->zc_leaf->l_rwlock, RW_READER);
975168404Spjd		zap_put_leaf(zc->zc_leaf);
976168404Spjd		zc->zc_leaf = NULL;
977168404Spjd	}
978168404Spjd	zc->zc_objset = NULL;
979168404Spjd}
980168404Spjd
981168404Spjduint64_t
982168404Spjdzap_cursor_serialize(zap_cursor_t *zc)
983168404Spjd{
984168404Spjd	if (zc->zc_hash == -1ULL)
985168404Spjd		return (-1ULL);
986168404Spjd	ASSERT((zc->zc_hash & (ZAP_MAXCD-1)) == 0);
987168404Spjd	ASSERT(zc->zc_cd < ZAP_MAXCD);
988168404Spjd	return ((zc->zc_hash >> (64-ZAP_HASHBITS)) |
989168404Spjd	    ((uint64_t)zc->zc_cd << ZAP_HASHBITS));
990168404Spjd}
991168404Spjd
992168404Spjdint
993168404Spjdzap_cursor_retrieve(zap_cursor_t *zc, zap_attribute_t *za)
994168404Spjd{
995168404Spjd	int err;
996168404Spjd	avl_index_t idx;
997168404Spjd	mzap_ent_t mze_tofind;
998168404Spjd	mzap_ent_t *mze;
999168404Spjd
1000168404Spjd	if (zc->zc_hash == -1ULL)
1001168404Spjd		return (ENOENT);
1002168404Spjd
1003168404Spjd	if (zc->zc_zap == NULL) {
1004168404Spjd		err = zap_lockdir(zc->zc_objset, zc->zc_zapobj, NULL,
1005185029Spjd		    RW_READER, TRUE, FALSE, &zc->zc_zap);
1006168404Spjd		if (err)
1007168404Spjd			return (err);
1008168404Spjd	} else {
1009168404Spjd		rw_enter(&zc->zc_zap->zap_rwlock, RW_READER);
1010168404Spjd	}
1011168404Spjd	if (!zc->zc_zap->zap_ismicro) {
1012168404Spjd		err = fzap_cursor_retrieve(zc->zc_zap, zc, za);
1013168404Spjd	} else {
1014168404Spjd		err = ENOENT;
1015168404Spjd
1016168404Spjd		mze_tofind.mze_hash = zc->zc_hash;
1017168404Spjd		mze_tofind.mze_phys.mze_cd = zc->zc_cd;
1018168404Spjd
1019168404Spjd		mze = avl_find(&zc->zc_zap->zap_m.zap_avl, &mze_tofind, &idx);
1020168404Spjd		if (mze == NULL) {
1021168404Spjd			mze = avl_nearest(&zc->zc_zap->zap_m.zap_avl,
1022168404Spjd			    idx, AVL_AFTER);
1023168404Spjd		}
1024168404Spjd		if (mze) {
1025185029Spjd			ASSERT(0 == bcmp(&mze->mze_phys,
1026185029Spjd			    &zc->zc_zap->zap_m.zap_phys->mz_chunk
1027185029Spjd			    [mze->mze_chunkid], sizeof (mze->mze_phys)));
1028185029Spjd
1029185029Spjd			za->za_normalization_conflict =
1030185029Spjd			    mzap_normalization_conflict(zc->zc_zap, NULL, mze);
1031168404Spjd			za->za_integer_length = 8;
1032168404Spjd			za->za_num_integers = 1;
1033168404Spjd			za->za_first_integer = mze->mze_phys.mze_value;
1034168404Spjd			(void) strcpy(za->za_name, mze->mze_phys.mze_name);
1035168404Spjd			zc->zc_hash = mze->mze_hash;
1036168404Spjd			zc->zc_cd = mze->mze_phys.mze_cd;
1037168404Spjd			err = 0;
1038168404Spjd		} else {
1039168404Spjd			zc->zc_hash = -1ULL;
1040168404Spjd		}
1041168404Spjd	}
1042168404Spjd	rw_exit(&zc->zc_zap->zap_rwlock);
1043168404Spjd	return (err);
1044168404Spjd}
1045168404Spjd
1046168404Spjdvoid
1047168404Spjdzap_cursor_advance(zap_cursor_t *zc)
1048168404Spjd{
1049168404Spjd	if (zc->zc_hash == -1ULL)
1050168404Spjd		return;
1051168404Spjd	zc->zc_cd++;
1052168404Spjd	if (zc->zc_cd >= ZAP_MAXCD) {
1053168404Spjd		zc->zc_cd = 0;
1054168404Spjd		zc->zc_hash += 1ULL<<(64-ZAP_HASHBITS);
1055168404Spjd		if (zc->zc_hash == 0) /* EOF */
1056168404Spjd			zc->zc_hash = -1ULL;
1057168404Spjd	}
1058168404Spjd}
1059168404Spjd
1060168404Spjdint
1061168404Spjdzap_get_stats(objset_t *os, uint64_t zapobj, zap_stats_t *zs)
1062168404Spjd{
1063168404Spjd	int err;
1064168404Spjd	zap_t *zap;
1065168404Spjd
1066185029Spjd	err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap);
1067168404Spjd	if (err)
1068168404Spjd		return (err);
1069168404Spjd
1070168404Spjd	bzero(zs, sizeof (zap_stats_t));
1071168404Spjd
1072168404Spjd	if (zap->zap_ismicro) {
1073168404Spjd		zs->zs_blocksize = zap->zap_dbuf->db_size;
1074168404Spjd		zs->zs_num_entries = zap->zap_m.zap_num_entries;
1075168404Spjd		zs->zs_num_blocks = 1;
1076168404Spjd	} else {
1077168404Spjd		fzap_get_stats(zap, zs);
1078168404Spjd	}
1079168404Spjd	zap_unlockdir(zap);
1080168404Spjd	return (0);
1081168404Spjd}
1082209962Smm
1083209962Smmint
1084209962Smmzap_count_write(objset_t *os, uint64_t zapobj, const char *name, int add,
1085209962Smm    uint64_t *towrite, uint64_t *tooverwrite)
1086209962Smm{
1087209962Smm	zap_t *zap;
1088209962Smm	int err = 0;
1089209962Smm
1090209962Smm
1091209962Smm	/*
1092209962Smm	 * Since, we don't have a name, we cannot figure out which blocks will
1093209962Smm	 * be affected in this operation. So, account for the worst case :
1094209962Smm	 * - 3 blocks overwritten: target leaf, ptrtbl block, header block
1095209962Smm	 * - 4 new blocks written if adding:
1096209962Smm	 * 	- 2 blocks for possibly split leaves,
1097209962Smm	 * 	- 2 grown ptrtbl blocks
1098209962Smm	 *
1099209962Smm	 * This also accomodates the case where an add operation to a fairly
1100209962Smm	 * large microzap results in a promotion to fatzap.
1101209962Smm	 */
1102209962Smm	if (name == NULL) {
1103209962Smm		*towrite += (3 + (add ? 4 : 0)) * SPA_MAXBLOCKSIZE;
1104209962Smm		return (err);
1105209962Smm	}
1106209962Smm
1107209962Smm	/*
1108209962Smm	 * We lock the zap with adding ==  FALSE. Because, if we pass
1109209962Smm	 * the actual value of add, it could trigger a mzap_upgrade().
1110209962Smm	 * At present we are just evaluating the possibility of this operation
1111209962Smm	 * and hence we donot want to trigger an upgrade.
1112209962Smm	 */
1113209962Smm	err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap);
1114209962Smm	if (err)
1115209962Smm		return (err);
1116209962Smm
1117209962Smm	if (!zap->zap_ismicro) {
1118209962Smm		zap_name_t *zn = zap_name_alloc(zap, name, MT_EXACT);
1119209962Smm		if (zn) {
1120209962Smm			err = fzap_count_write(zn, add, towrite,
1121209962Smm			    tooverwrite);
1122209962Smm			zap_name_free(zn);
1123209962Smm		} else {
1124209962Smm			/*
1125209962Smm			 * We treat this case as similar to (name == NULL)
1126209962Smm			 */
1127209962Smm			*towrite += (3 + (add ? 4 : 0)) * SPA_MAXBLOCKSIZE;
1128209962Smm		}
1129209962Smm	} else {
1130209962Smm		/*
1131209962Smm		 * We are here if (name != NULL) and this is a micro-zap.
1132209962Smm		 * We account for the header block depending on whether it
1133209962Smm		 * is freeable.
1134209962Smm		 *
1135209962Smm		 * Incase of an add-operation it is hard to find out
1136209962Smm		 * if this add will promote this microzap to fatzap.
1137209962Smm		 * Hence, we consider the worst case and account for the
1138209962Smm		 * blocks assuming this microzap would be promoted to a
1139209962Smm		 * fatzap.
1140209962Smm		 *
1141209962Smm		 * 1 block overwritten  : header block
1142209962Smm		 * 4 new blocks written : 2 new split leaf, 2 grown
1143209962Smm		 *			ptrtbl blocks
1144209962Smm		 */
1145209962Smm		if (dmu_buf_freeable(zap->zap_dbuf))
1146209962Smm			*tooverwrite += SPA_MAXBLOCKSIZE;
1147209962Smm		else
1148209962Smm			*towrite += SPA_MAXBLOCKSIZE;
1149209962Smm
1150209962Smm		if (add) {
1151209962Smm			*towrite += 4 * SPA_MAXBLOCKSIZE;
1152209962Smm		}
1153209962Smm	}
1154209962Smm
1155209962Smm	zap_unlockdir(zap);
1156209962Smm	return (err);
1157209962Smm}
1158