sa.c revision 295125
1219089Spjd/*
2219089Spjd * CDDL HEADER START
3219089Spjd *
4219089Spjd * The contents of this file are subject to the terms of the
5219089Spjd * Common Development and Distribution License (the "License").
6219089Spjd * You may not use this file except in compliance with the License.
7219089Spjd *
8219089Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9219089Spjd * or http://www.opensolaris.org/os/licensing.
10219089Spjd * See the License for the specific language governing permissions
11219089Spjd * and limitations under the License.
12219089Spjd *
13219089Spjd * When distributing Covered Code, include this CDDL HEADER in each
14219089Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15219089Spjd * If applicable, add the following below this CDDL HEADER, with the
16219089Spjd * fields enclosed by brackets "[]" replaced with your own identifying
17219089Spjd * information: Portions Copyright [yyyy] [name of copyright owner]
18219089Spjd *
19219089Spjd * CDDL HEADER END
20219089Spjd */
21236884Smm
22219089Spjd/*
23219089Spjd * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24226724Smm * Portions Copyright 2011 iXsystems, Inc
25249195Smm * Copyright (c) 2013 by Delphix. All rights reserved.
26286575Smav * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
27219089Spjd */
28219089Spjd
29219089Spjd#include <sys/zfs_context.h>
30219089Spjd#include <sys/types.h>
31219089Spjd#include <sys/param.h>
32219089Spjd#include <sys/systm.h>
33219089Spjd#include <sys/sysmacros.h>
34219089Spjd#include <sys/dmu.h>
35219089Spjd#include <sys/dmu_impl.h>
36219089Spjd#include <sys/dmu_objset.h>
37219089Spjd#include <sys/dbuf.h>
38219089Spjd#include <sys/dnode.h>
39219089Spjd#include <sys/zap.h>
40219089Spjd#include <sys/sa.h>
41219089Spjd#include <sys/sunddi.h>
42219089Spjd#include <sys/sa_impl.h>
43219089Spjd#include <sys/dnode.h>
44219089Spjd#include <sys/errno.h>
45219089Spjd#include <sys/zfs_context.h>
46219089Spjd
47219089Spjd/*
48219089Spjd * ZFS System attributes:
49219089Spjd *
50219089Spjd * A generic mechanism to allow for arbitrary attributes
51219089Spjd * to be stored in a dnode.  The data will be stored in the bonus buffer of
52219089Spjd * the dnode and if necessary a special "spill" block will be used to handle
53219089Spjd * overflow situations.  The spill block will be sized to fit the data
54219089Spjd * from 512 - 128K.  When a spill block is used the BP (blkptr_t) for the
55219089Spjd * spill block is stored at the end of the current bonus buffer.  Any
56219089Spjd * attributes that would be in the way of the blkptr_t will be relocated
57219089Spjd * into the spill block.
58219089Spjd *
59219089Spjd * Attribute registration:
60219089Spjd *
61219089Spjd * Stored persistently on a per dataset basis
62219089Spjd * a mapping between attribute "string" names and their actual attribute
63219089Spjd * numeric values, length, and byteswap function.  The names are only used
64219089Spjd * during registration.  All  attributes are known by their unique attribute
65219089Spjd * id value.  If an attribute can have a variable size then the value
66219089Spjd * 0 will be used to indicate this.
67219089Spjd *
68219089Spjd * Attribute Layout:
69219089Spjd *
70219089Spjd * Attribute layouts are a way to compactly store multiple attributes, but
71219089Spjd * without taking the overhead associated with managing each attribute
72219089Spjd * individually.  Since you will typically have the same set of attributes
73219089Spjd * stored in the same order a single table will be used to represent that
74219089Spjd * layout.  The ZPL for example will usually have only about 10 different
75219089Spjd * layouts (regular files, device files, symlinks,
76219089Spjd * regular files + scanstamp, files/dir with extended attributes, and then
77219089Spjd * you have the possibility of all of those minus ACL, because it would
78219089Spjd * be kicked out into the spill block)
79219089Spjd *
80219089Spjd * Layouts are simply an array of the attributes and their
81219089Spjd * ordering i.e. [0, 1, 4, 5, 2]
82219089Spjd *
83219089Spjd * Each distinct layout is given a unique layout number and that is whats
84219089Spjd * stored in the header at the beginning of the SA data buffer.
85219089Spjd *
86219089Spjd * A layout only covers a single dbuf (bonus or spill).  If a set of
87219089Spjd * attributes is split up between the bonus buffer and a spill buffer then
88219089Spjd * two different layouts will be used.  This allows us to byteswap the
89219089Spjd * spill without looking at the bonus buffer and keeps the on disk format of
90219089Spjd * the bonus and spill buffer the same.
91219089Spjd *
92219089Spjd * Adding a single attribute will cause the entire set of attributes to
93219089Spjd * be rewritten and could result in a new layout number being constructed
94219089Spjd * as part of the rewrite if no such layout exists for the new set of
95219089Spjd * attribues.  The new attribute will be appended to the end of the already
96219089Spjd * existing attributes.
97219089Spjd *
98219089Spjd * Both the attribute registration and attribute layout information are
99219089Spjd * stored in normal ZAP attributes.  Their should be a small number of
100219089Spjd * known layouts and the set of attributes is assumed to typically be quite
101219089Spjd * small.
102219089Spjd *
103219089Spjd * The registered attributes and layout "table" information is maintained
104219089Spjd * in core and a special "sa_os_t" is attached to the objset_t.
105219089Spjd *
106219089Spjd * A special interface is provided to allow for quickly applying
107219089Spjd * a large set of attributes at once.  sa_replace_all_by_template() is
108219089Spjd * used to set an array of attributes.  This is used by the ZPL when
109219089Spjd * creating a brand new file.  The template that is passed into the function
110219089Spjd * specifies the attribute, size for variable length attributes, location of
111219089Spjd * data and special "data locator" function if the data isn't in a contiguous
112219089Spjd * location.
113219089Spjd *
114219089Spjd * Byteswap implications:
115251631Sdelphij *
116219089Spjd * Since the SA attributes are not entirely self describing we can't do
117219089Spjd * the normal byteswap processing.  The special ZAP layout attribute and
118219089Spjd * attribute registration attributes define the byteswap function and the
119219089Spjd * size of the attributes, unless it is variable sized.
120219089Spjd * The normal ZFS byteswapping infrastructure assumes you don't need
121219089Spjd * to read any objects in order to do the necessary byteswapping.  Whereas
122219089Spjd * SA attributes can only be properly byteswapped if the dataset is opened
123219089Spjd * and the layout/attribute ZAP attributes are available.  Because of this
124219089Spjd * the SA attributes will be byteswapped when they are first accessed by
125219089Spjd * the SA code that will read the SA data.
126219089Spjd */
127219089Spjd
128219089Spjdtypedef void (sa_iterfunc_t)(void *hdr, void *addr, sa_attr_type_t,
129219089Spjd    uint16_t length, int length_idx, boolean_t, void *userp);
130219089Spjd
131219089Spjdstatic int sa_build_index(sa_handle_t *hdl, sa_buf_type_t buftype);
132219089Spjdstatic void sa_idx_tab_hold(objset_t *os, sa_idx_tab_t *idx_tab);
133219089Spjdstatic void *sa_find_idx_tab(objset_t *os, dmu_object_type_t bonustype,
134219089Spjd    void *data);
135219089Spjdstatic void sa_idx_tab_rele(objset_t *os, void *arg);
136219089Spjdstatic void sa_copy_data(sa_data_locator_t *func, void *start, void *target,
137219089Spjd    int buflen);
138219089Spjdstatic int sa_modify_attrs(sa_handle_t *hdl, sa_attr_type_t newattr,
139219089Spjd    sa_data_op_t action, sa_data_locator_t *locator, void *datastart,
140219089Spjd    uint16_t buflen, dmu_tx_t *tx);
141219089Spjd
142219089Spjdarc_byteswap_func_t *sa_bswap_table[] = {
143219089Spjd	byteswap_uint64_array,
144219089Spjd	byteswap_uint32_array,
145219089Spjd	byteswap_uint16_array,
146219089Spjd	byteswap_uint8_array,
147219089Spjd	zfs_acl_byteswap,
148219089Spjd};
149219089Spjd
150219089Spjd#define	SA_COPY_DATA(f, s, t, l) \
151219089Spjd	{ \
152219089Spjd		if (f == NULL) { \
153219089Spjd			if (l == 8) { \
154219089Spjd				*(uint64_t *)t = *(uint64_t *)s; \
155219089Spjd			} else if (l == 16) { \
156219089Spjd				*(uint64_t *)t = *(uint64_t *)s; \
157219089Spjd				*(uint64_t *)((uintptr_t)t + 8) = \
158219089Spjd				    *(uint64_t *)((uintptr_t)s + 8); \
159219089Spjd			} else { \
160219089Spjd				bcopy(s, t, l); \
161219089Spjd			} \
162219089Spjd		} else \
163219089Spjd			sa_copy_data(f, s, t, l); \
164219089Spjd	}
165219089Spjd
166219089Spjd/*
167219089Spjd * This table is fixed and cannot be changed.  Its purpose is to
168219089Spjd * allow the SA code to work with both old/new ZPL file systems.
169219089Spjd * It contains the list of legacy attributes.  These attributes aren't
170219089Spjd * stored in the "attribute" registry zap objects, since older ZPL file systems
171219089Spjd * won't have the registry.  Only objsets of type ZFS_TYPE_FILESYSTEM will
172219089Spjd * use this static table.
173219089Spjd */
174219089Spjdsa_attr_reg_t sa_legacy_attrs[] = {
175219089Spjd	{"ZPL_ATIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 0},
176219089Spjd	{"ZPL_MTIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 1},
177219089Spjd	{"ZPL_CTIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 2},
178219089Spjd	{"ZPL_CRTIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 3},
179219089Spjd	{"ZPL_GEN", sizeof (uint64_t), SA_UINT64_ARRAY, 4},
180219089Spjd	{"ZPL_MODE", sizeof (uint64_t), SA_UINT64_ARRAY, 5},
181219089Spjd	{"ZPL_SIZE", sizeof (uint64_t), SA_UINT64_ARRAY, 6},
182219089Spjd	{"ZPL_PARENT", sizeof (uint64_t), SA_UINT64_ARRAY, 7},
183219089Spjd	{"ZPL_LINKS", sizeof (uint64_t), SA_UINT64_ARRAY, 8},
184219089Spjd	{"ZPL_XATTR", sizeof (uint64_t), SA_UINT64_ARRAY, 9},
185219089Spjd	{"ZPL_RDEV", sizeof (uint64_t), SA_UINT64_ARRAY, 10},
186219089Spjd	{"ZPL_FLAGS", sizeof (uint64_t), SA_UINT64_ARRAY, 11},
187219089Spjd	{"ZPL_UID", sizeof (uint64_t), SA_UINT64_ARRAY, 12},
188219089Spjd	{"ZPL_GID", sizeof (uint64_t), SA_UINT64_ARRAY, 13},
189219089Spjd	{"ZPL_PAD", sizeof (uint64_t) * 4, SA_UINT64_ARRAY, 14},
190219089Spjd	{"ZPL_ZNODE_ACL", 88, SA_UINT8_ARRAY, 15},
191219089Spjd};
192219089Spjd
193219089Spjd/*
194219089Spjd * This is only used for objects of type DMU_OT_ZNODE
195219089Spjd */
196219089Spjdsa_attr_type_t sa_legacy_zpl_layout[] = {
197219089Spjd    0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
198219089Spjd};
199219089Spjd
200219089Spjd/*
201219089Spjd * Special dummy layout used for buffers with no attributes.
202219089Spjd */
203219089Spjdsa_attr_type_t sa_dummy_zpl_layout[] = { 0 };
204219089Spjd
205219089Spjdstatic int sa_legacy_attr_count = 16;
206219089Spjdstatic kmem_cache_t *sa_cache = NULL;
207219089Spjd
208219089Spjd/*ARGSUSED*/
209219089Spjdstatic int
210219089Spjdsa_cache_constructor(void *buf, void *unused, int kmflag)
211219089Spjd{
212219089Spjd	sa_handle_t *hdl = buf;
213219089Spjd
214219089Spjd	mutex_init(&hdl->sa_lock, NULL, MUTEX_DEFAULT, NULL);
215219089Spjd	return (0);
216219089Spjd}
217219089Spjd
218219089Spjd/*ARGSUSED*/
219219089Spjdstatic void
220219089Spjdsa_cache_destructor(void *buf, void *unused)
221219089Spjd{
222219089Spjd	sa_handle_t *hdl = buf;
223219089Spjd	mutex_destroy(&hdl->sa_lock);
224219089Spjd}
225219089Spjd
226219089Spjdvoid
227219089Spjdsa_cache_init(void)
228219089Spjd{
229219089Spjd	sa_cache = kmem_cache_create("sa_cache",
230219089Spjd	    sizeof (sa_handle_t), 0, sa_cache_constructor,
231219089Spjd	    sa_cache_destructor, NULL, NULL, NULL, 0);
232219089Spjd}
233219089Spjd
234219089Spjdvoid
235219089Spjdsa_cache_fini(void)
236219089Spjd{
237219089Spjd	if (sa_cache)
238219089Spjd		kmem_cache_destroy(sa_cache);
239219089Spjd}
240219089Spjd
241219089Spjdstatic int
242219089Spjdlayout_num_compare(const void *arg1, const void *arg2)
243219089Spjd{
244219089Spjd	const sa_lot_t *node1 = arg1;
245219089Spjd	const sa_lot_t *node2 = arg2;
246219089Spjd
247219089Spjd	if (node1->lot_num > node2->lot_num)
248219089Spjd		return (1);
249219089Spjd	else if (node1->lot_num < node2->lot_num)
250219089Spjd		return (-1);
251219089Spjd	return (0);
252219089Spjd}
253219089Spjd
254219089Spjdstatic int
255219089Spjdlayout_hash_compare(const void *arg1, const void *arg2)
256219089Spjd{
257219089Spjd	const sa_lot_t *node1 = arg1;
258219089Spjd	const sa_lot_t *node2 = arg2;
259219089Spjd
260219089Spjd	if (node1->lot_hash > node2->lot_hash)
261219089Spjd		return (1);
262219089Spjd	if (node1->lot_hash < node2->lot_hash)
263219089Spjd		return (-1);
264219089Spjd	if (node1->lot_instance > node2->lot_instance)
265219089Spjd		return (1);
266219089Spjd	if (node1->lot_instance < node2->lot_instance)
267219089Spjd		return (-1);
268219089Spjd	return (0);
269219089Spjd}
270219089Spjd
271219089Spjdboolean_t
272219089Spjdsa_layout_equal(sa_lot_t *tbf, sa_attr_type_t *attrs, int count)
273219089Spjd{
274219089Spjd	int i;
275219089Spjd
276219089Spjd	if (count != tbf->lot_attr_count)
277219089Spjd		return (1);
278219089Spjd
279219089Spjd	for (i = 0; i != count; i++) {
280219089Spjd		if (attrs[i] != tbf->lot_attrs[i])
281219089Spjd			return (1);
282219089Spjd	}
283219089Spjd	return (0);
284219089Spjd}
285219089Spjd
286219089Spjd#define	SA_ATTR_HASH(attr) (zfs_crc64_table[(-1ULL ^ attr) & 0xFF])
287219089Spjd
288219089Spjdstatic uint64_t
289219089Spjdsa_layout_info_hash(sa_attr_type_t *attrs, int attr_count)
290219089Spjd{
291219089Spjd	int i;
292219089Spjd	uint64_t crc = -1ULL;
293219089Spjd
294219089Spjd	for (i = 0; i != attr_count; i++)
295219089Spjd		crc ^= SA_ATTR_HASH(attrs[i]);
296219089Spjd
297219089Spjd	return (crc);
298219089Spjd}
299219089Spjd
300219089Spjdstatic int
301219089Spjdsa_get_spill(sa_handle_t *hdl)
302219089Spjd{
303219089Spjd	int rc;
304219089Spjd	if (hdl->sa_spill == NULL) {
305219089Spjd		if ((rc = dmu_spill_hold_existing(hdl->sa_bonus, NULL,
306219089Spjd		    &hdl->sa_spill)) == 0)
307219089Spjd			VERIFY(0 == sa_build_index(hdl, SA_SPILL));
308219089Spjd	} else {
309219089Spjd		rc = 0;
310219089Spjd	}
311219089Spjd
312219089Spjd	return (rc);
313219089Spjd}
314219089Spjd
315219089Spjd/*
316219089Spjd * Main attribute lookup/update function
317219089Spjd * returns 0 for success or non zero for failures
318219089Spjd *
319219089Spjd * Operates on bulk array, first failure will abort further processing
320219089Spjd */
321219089Spjdint
322219089Spjdsa_attr_op(sa_handle_t *hdl, sa_bulk_attr_t *bulk, int count,
323219089Spjd    sa_data_op_t data_op, dmu_tx_t *tx)
324219089Spjd{
325219089Spjd	sa_os_t *sa = hdl->sa_os->os_sa;
326219089Spjd	int i;
327219089Spjd	int error = 0;
328219089Spjd	sa_buf_type_t buftypes;
329219089Spjd
330219089Spjd	buftypes = 0;
331219089Spjd
332219089Spjd	ASSERT(count > 0);
333219089Spjd	for (i = 0; i != count; i++) {
334219089Spjd		ASSERT(bulk[i].sa_attr <= hdl->sa_os->os_sa->sa_num_attrs);
335219089Spjd
336219089Spjd		bulk[i].sa_addr = NULL;
337219089Spjd		/* First check the bonus buffer */
338219089Spjd
339219089Spjd		if (hdl->sa_bonus_tab && TOC_ATTR_PRESENT(
340219089Spjd		    hdl->sa_bonus_tab->sa_idx_tab[bulk[i].sa_attr])) {
341219089Spjd			SA_ATTR_INFO(sa, hdl->sa_bonus_tab,
342219089Spjd			    SA_GET_HDR(hdl, SA_BONUS),
343219089Spjd			    bulk[i].sa_attr, bulk[i], SA_BONUS, hdl);
344219089Spjd			if (tx && !(buftypes & SA_BONUS)) {
345219089Spjd				dmu_buf_will_dirty(hdl->sa_bonus, tx);
346219089Spjd				buftypes |= SA_BONUS;
347219089Spjd			}
348219089Spjd		}
349219089Spjd		if (bulk[i].sa_addr == NULL &&
350219089Spjd		    ((error = sa_get_spill(hdl)) == 0)) {
351219089Spjd			if (TOC_ATTR_PRESENT(
352219089Spjd			    hdl->sa_spill_tab->sa_idx_tab[bulk[i].sa_attr])) {
353219089Spjd				SA_ATTR_INFO(sa, hdl->sa_spill_tab,
354219089Spjd				    SA_GET_HDR(hdl, SA_SPILL),
355219089Spjd				    bulk[i].sa_attr, bulk[i], SA_SPILL, hdl);
356219089Spjd				if (tx && !(buftypes & SA_SPILL) &&
357219089Spjd				    bulk[i].sa_size == bulk[i].sa_length) {
358219089Spjd					dmu_buf_will_dirty(hdl->sa_spill, tx);
359219089Spjd					buftypes |= SA_SPILL;
360219089Spjd				}
361219089Spjd			}
362219089Spjd		}
363219089Spjd		if (error && error != ENOENT) {
364219089Spjd			return ((error == ECKSUM) ? EIO : error);
365219089Spjd		}
366219089Spjd
367219089Spjd		switch (data_op) {
368219089Spjd		case SA_LOOKUP:
369219089Spjd			if (bulk[i].sa_addr == NULL)
370249195Smm				return (SET_ERROR(ENOENT));
371219089Spjd			if (bulk[i].sa_data) {
372219089Spjd				SA_COPY_DATA(bulk[i].sa_data_func,
373219089Spjd				    bulk[i].sa_addr, bulk[i].sa_data,
374219089Spjd				    bulk[i].sa_size);
375219089Spjd			}
376219089Spjd			continue;
377219089Spjd
378219089Spjd		case SA_UPDATE:
379219089Spjd			/* existing rewrite of attr */
380219089Spjd			if (bulk[i].sa_addr &&
381219089Spjd			    bulk[i].sa_size == bulk[i].sa_length) {
382219089Spjd				SA_COPY_DATA(bulk[i].sa_data_func,
383219089Spjd				    bulk[i].sa_data, bulk[i].sa_addr,
384219089Spjd				    bulk[i].sa_length);
385219089Spjd				continue;
386219089Spjd			} else if (bulk[i].sa_addr) { /* attr size change */
387219089Spjd				error = sa_modify_attrs(hdl, bulk[i].sa_attr,
388219089Spjd				    SA_REPLACE, bulk[i].sa_data_func,
389219089Spjd				    bulk[i].sa_data, bulk[i].sa_length, tx);
390219089Spjd			} else { /* adding new attribute */
391219089Spjd				error = sa_modify_attrs(hdl, bulk[i].sa_attr,
392219089Spjd				    SA_ADD, bulk[i].sa_data_func,
393219089Spjd				    bulk[i].sa_data, bulk[i].sa_length, tx);
394219089Spjd			}
395219089Spjd			if (error)
396219089Spjd				return (error);
397219089Spjd			break;
398219089Spjd		}
399219089Spjd	}
400219089Spjd	return (error);
401219089Spjd}
402219089Spjd
403219089Spjdstatic sa_lot_t *
404219089Spjdsa_add_layout_entry(objset_t *os, sa_attr_type_t *attrs, int attr_count,
405219089Spjd    uint64_t lot_num, uint64_t hash, boolean_t zapadd, dmu_tx_t *tx)
406219089Spjd{
407219089Spjd	sa_os_t *sa = os->os_sa;
408219089Spjd	sa_lot_t *tb, *findtb;
409219089Spjd	int i;
410219089Spjd	avl_index_t loc;
411219089Spjd
412219089Spjd	ASSERT(MUTEX_HELD(&sa->sa_lock));
413219089Spjd	tb = kmem_zalloc(sizeof (sa_lot_t), KM_SLEEP);
414219089Spjd	tb->lot_attr_count = attr_count;
415219089Spjd	tb->lot_attrs = kmem_alloc(sizeof (sa_attr_type_t) * attr_count,
416219089Spjd	    KM_SLEEP);
417219089Spjd	bcopy(attrs, tb->lot_attrs, sizeof (sa_attr_type_t) * attr_count);
418219089Spjd	tb->lot_num = lot_num;
419219089Spjd	tb->lot_hash = hash;
420219089Spjd	tb->lot_instance = 0;
421219089Spjd
422219089Spjd	if (zapadd) {
423219089Spjd		char attr_name[8];
424219089Spjd
425219089Spjd		if (sa->sa_layout_attr_obj == 0) {
426236884Smm			sa->sa_layout_attr_obj = zap_create_link(os,
427236884Smm			    DMU_OT_SA_ATTR_LAYOUTS,
428236884Smm			    sa->sa_master_obj, SA_LAYOUTS, tx);
429219089Spjd		}
430219089Spjd
431219089Spjd		(void) snprintf(attr_name, sizeof (attr_name),
432219089Spjd		    "%d", (int)lot_num);
433219089Spjd		VERIFY(0 == zap_update(os, os->os_sa->sa_layout_attr_obj,
434219089Spjd		    attr_name, 2, attr_count, attrs, tx));
435219089Spjd	}
436219089Spjd
437219089Spjd	list_create(&tb->lot_idx_tab, sizeof (sa_idx_tab_t),
438219089Spjd	    offsetof(sa_idx_tab_t, sa_next));
439219089Spjd
440219089Spjd	for (i = 0; i != attr_count; i++) {
441219089Spjd		if (sa->sa_attr_table[tb->lot_attrs[i]].sa_length == 0)
442219089Spjd			tb->lot_var_sizes++;
443219089Spjd	}
444219089Spjd
445219089Spjd	avl_add(&sa->sa_layout_num_tree, tb);
446219089Spjd
447219089Spjd	/* verify we don't have a hash collision */
448219089Spjd	if ((findtb = avl_find(&sa->sa_layout_hash_tree, tb, &loc)) != NULL) {
449219089Spjd		for (; findtb && findtb->lot_hash == hash;
450219089Spjd		    findtb = AVL_NEXT(&sa->sa_layout_hash_tree, findtb)) {
451219089Spjd			if (findtb->lot_instance != tb->lot_instance)
452219089Spjd				break;
453219089Spjd			tb->lot_instance++;
454219089Spjd		}
455219089Spjd	}
456219089Spjd	avl_add(&sa->sa_layout_hash_tree, tb);
457219089Spjd	return (tb);
458219089Spjd}
459219089Spjd
460219089Spjdstatic void
461219089Spjdsa_find_layout(objset_t *os, uint64_t hash, sa_attr_type_t *attrs,
462219089Spjd    int count, dmu_tx_t *tx, sa_lot_t **lot)
463219089Spjd{
464219089Spjd	sa_lot_t *tb, tbsearch;
465219089Spjd	avl_index_t loc;
466219089Spjd	sa_os_t *sa = os->os_sa;
467219089Spjd	boolean_t found = B_FALSE;
468219089Spjd
469219089Spjd	mutex_enter(&sa->sa_lock);
470219089Spjd	tbsearch.lot_hash = hash;
471219089Spjd	tbsearch.lot_instance = 0;
472219089Spjd	tb = avl_find(&sa->sa_layout_hash_tree, &tbsearch, &loc);
473219089Spjd	if (tb) {
474219089Spjd		for (; tb && tb->lot_hash == hash;
475219089Spjd		    tb = AVL_NEXT(&sa->sa_layout_hash_tree, tb)) {
476219089Spjd			if (sa_layout_equal(tb, attrs, count) == 0) {
477219089Spjd				found = B_TRUE;
478219089Spjd				break;
479219089Spjd			}
480219089Spjd		}
481219089Spjd	}
482219089Spjd	if (!found) {
483219089Spjd		tb = sa_add_layout_entry(os, attrs, count,
484219089Spjd		    avl_numnodes(&sa->sa_layout_num_tree), hash, B_TRUE, tx);
485219089Spjd	}
486219089Spjd	mutex_exit(&sa->sa_lock);
487219089Spjd	*lot = tb;
488219089Spjd}
489219089Spjd
490219089Spjdstatic int
491219089Spjdsa_resize_spill(sa_handle_t *hdl, uint32_t size, dmu_tx_t *tx)
492219089Spjd{
493219089Spjd	int error;
494219089Spjd	uint32_t blocksize;
495219089Spjd
496219089Spjd	if (size == 0) {
497219089Spjd		blocksize = SPA_MINBLOCKSIZE;
498274337Sdelphij	} else if (size > SPA_OLD_MAXBLOCKSIZE) {
499219089Spjd		ASSERT(0);
500249195Smm		return (SET_ERROR(EFBIG));
501219089Spjd	} else {
502219089Spjd		blocksize = P2ROUNDUP_TYPED(size, SPA_MINBLOCKSIZE, uint32_t);
503219089Spjd	}
504219089Spjd
505219089Spjd	error = dbuf_spill_set_blksz(hdl->sa_spill, blocksize, tx);
506219089Spjd	ASSERT(error == 0);
507219089Spjd	return (error);
508219089Spjd}
509219089Spjd
510219089Spjdstatic void
511219089Spjdsa_copy_data(sa_data_locator_t *func, void *datastart, void *target, int buflen)
512219089Spjd{
513219089Spjd	if (func == NULL) {
514219089Spjd		bcopy(datastart, target, buflen);
515219089Spjd	} else {
516219089Spjd		boolean_t start;
517219089Spjd		int bytes;
518219089Spjd		void *dataptr;
519219089Spjd		void *saptr = target;
520219089Spjd		uint32_t length;
521219089Spjd
522219089Spjd		start = B_TRUE;
523219089Spjd		bytes = 0;
524219089Spjd		while (bytes < buflen) {
525219089Spjd			func(&dataptr, &length, buflen, start, datastart);
526219089Spjd			bcopy(dataptr, saptr, length);
527219089Spjd			saptr = (void *)((caddr_t)saptr + length);
528219089Spjd			bytes += length;
529219089Spjd			start = B_FALSE;
530219089Spjd		}
531219089Spjd	}
532219089Spjd}
533219089Spjd
534219089Spjd/*
535219089Spjd * Determine several different sizes
536219089Spjd * first the sa header size
537219089Spjd * the number of bytes to be stored
538219089Spjd * if spill would occur the index in the attribute array is returned
539219089Spjd *
540219089Spjd * the boolean will_spill will be set when spilling is necessary.  It
541219089Spjd * is only set when the buftype is SA_BONUS
542219089Spjd */
543219089Spjdstatic int
544219089Spjdsa_find_sizes(sa_os_t *sa, sa_bulk_attr_t *attr_desc, int attr_count,
545219089Spjd    dmu_buf_t *db, sa_buf_type_t buftype, int *index, int *total,
546219089Spjd    boolean_t *will_spill)
547219089Spjd{
548219089Spjd	int var_size = 0;
549219089Spjd	int i;
550219089Spjd	int full_space;
551219089Spjd	int hdrsize;
552294813Smav	int extra_hdrsize;
553219089Spjd
554219089Spjd	if (buftype == SA_BONUS && sa->sa_force_spill) {
555219089Spjd		*total = 0;
556219089Spjd		*index = 0;
557219089Spjd		*will_spill = B_TRUE;
558219089Spjd		return (0);
559219089Spjd	}
560219089Spjd
561219089Spjd	*index = -1;
562219089Spjd	*total = 0;
563294813Smav	*will_spill = B_FALSE;
564219089Spjd
565294813Smav	extra_hdrsize = 0;
566219089Spjd	hdrsize = (SA_BONUSTYPE_FROM_DB(db) == DMU_OT_ZNODE) ? 0 :
567219089Spjd	    sizeof (sa_hdr_phys_t);
568219089Spjd
569219089Spjd	full_space = (buftype == SA_BONUS) ? DN_MAX_BONUSLEN : db->db_size;
570246678Smm	ASSERT(IS_P2ALIGNED(full_space, 8));
571219089Spjd
572219089Spjd	for (i = 0; i != attr_count; i++) {
573219089Spjd		boolean_t is_var_sz;
574219089Spjd
575246678Smm		*total = P2ROUNDUP(*total, 8);
576246678Smm		*total += attr_desc[i].sa_length;
577294813Smav		if (*will_spill)
578294813Smav			continue;
579219089Spjd
580219089Spjd		is_var_sz = (SA_REGISTERED_LEN(sa, attr_desc[i].sa_attr) == 0);
581219089Spjd		if (is_var_sz) {
582219089Spjd			var_size++;
583219089Spjd		}
584219089Spjd
585219089Spjd		if (is_var_sz && var_size > 1) {
586294813Smav			/*
587294813Smav			 * Don't worry that the spill block might overflow.
588294813Smav			 * It will be resized if needed in sa_build_layouts().
589294813Smav			 */
590294813Smav			if (buftype == SA_SPILL ||
591294813Smav			    P2ROUNDUP(hdrsize + sizeof (uint16_t), 8) +
592219089Spjd			    *total < full_space) {
593246678Smm				/*
594246678Smm				 * Account for header space used by array of
595246678Smm				 * optional sizes of variable-length attributes.
596294813Smav				 * Record the extra header size in case this
597294813Smav				 * increase needs to be reversed due to
598294813Smav				 * spill-over.
599246678Smm				 */
600219089Spjd				hdrsize += sizeof (uint16_t);
601294813Smav				if (*index != -1)
602294813Smav					extra_hdrsize += sizeof (uint16_t);
603219089Spjd			} else {
604294813Smav				ASSERT(buftype == SA_BONUS);
605294813Smav				if (*index == -1)
606294813Smav					*index = i;
607294813Smav				*will_spill = B_TRUE;
608219089Spjd				continue;
609219089Spjd			}
610219089Spjd		}
611219089Spjd
612219089Spjd		/*
613219089Spjd		 * find index of where spill *could* occur.
614219089Spjd		 * Then continue to count of remainder attribute
615219089Spjd		 * space.  The sum is used later for sizing bonus
616219089Spjd		 * and spill buffer.
617219089Spjd		 */
618219089Spjd		if (buftype == SA_BONUS && *index == -1 &&
619226483Sdelphij		    (*total + P2ROUNDUP(hdrsize, 8)) >
620219089Spjd		    (full_space - sizeof (blkptr_t))) {
621219089Spjd			*index = i;
622219089Spjd		}
623219089Spjd
624226483Sdelphij		if ((*total + P2ROUNDUP(hdrsize, 8)) > full_space &&
625219089Spjd		    buftype == SA_BONUS)
626219089Spjd			*will_spill = B_TRUE;
627219089Spjd	}
628219089Spjd
629294813Smav	if (*will_spill)
630294813Smav		hdrsize -= extra_hdrsize;
631246678Smm
632219089Spjd	hdrsize = P2ROUNDUP(hdrsize, 8);
633219089Spjd	return (hdrsize);
634219089Spjd}
635219089Spjd
636219089Spjd#define	BUF_SPACE_NEEDED(total, header) (total + header)
637219089Spjd
638219089Spjd/*
639219089Spjd * Find layout that corresponds to ordering of attributes
640219089Spjd * If not found a new layout number is created and added to
641219089Spjd * persistent layout tables.
642219089Spjd */
643219089Spjdstatic int
644219089Spjdsa_build_layouts(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc, int attr_count,
645219089Spjd    dmu_tx_t *tx)
646219089Spjd{
647219089Spjd	sa_os_t *sa = hdl->sa_os->os_sa;
648219089Spjd	uint64_t hash;
649219089Spjd	sa_buf_type_t buftype;
650219089Spjd	sa_hdr_phys_t *sahdr;
651219089Spjd	void *data_start;
652219089Spjd	int buf_space;
653219089Spjd	sa_attr_type_t *attrs, *attrs_start;
654219089Spjd	int i, lot_count;
655247187Smm	int hdrsize;
656247187Smm	int spillhdrsize = 0;
657219089Spjd	int used;
658219089Spjd	dmu_object_type_t bonustype;
659219089Spjd	sa_lot_t *lot;
660219089Spjd	int len_idx;
661219089Spjd	int spill_used;
662219089Spjd	boolean_t spilling;
663219089Spjd
664219089Spjd	dmu_buf_will_dirty(hdl->sa_bonus, tx);
665219089Spjd	bonustype = SA_BONUSTYPE_FROM_DB(hdl->sa_bonus);
666219089Spjd
667219089Spjd	/* first determine bonus header size and sum of all attributes */
668219089Spjd	hdrsize = sa_find_sizes(sa, attr_desc, attr_count, hdl->sa_bonus,
669219089Spjd	    SA_BONUS, &i, &used, &spilling);
670219089Spjd
671274337Sdelphij	if (used > SPA_OLD_MAXBLOCKSIZE)
672249195Smm		return (SET_ERROR(EFBIG));
673219089Spjd
674219089Spjd	VERIFY(0 == dmu_set_bonus(hdl->sa_bonus, spilling ?
675219089Spjd	    MIN(DN_MAX_BONUSLEN - sizeof (blkptr_t), used + hdrsize) :
676219089Spjd	    used + hdrsize, tx));
677219089Spjd
678219089Spjd	ASSERT((bonustype == DMU_OT_ZNODE && spilling == 0) ||
679219089Spjd	    bonustype == DMU_OT_SA);
680219089Spjd
681219089Spjd	/* setup and size spill buffer when needed */
682219089Spjd	if (spilling) {
683219089Spjd		boolean_t dummy;
684219089Spjd
685219089Spjd		if (hdl->sa_spill == NULL) {
686219089Spjd			VERIFY(dmu_spill_hold_by_bonus(hdl->sa_bonus, NULL,
687219089Spjd			    &hdl->sa_spill) == 0);
688219089Spjd		}
689219089Spjd		dmu_buf_will_dirty(hdl->sa_spill, tx);
690219089Spjd
691219089Spjd		spillhdrsize = sa_find_sizes(sa, &attr_desc[i],
692219089Spjd		    attr_count - i, hdl->sa_spill, SA_SPILL, &i,
693219089Spjd		    &spill_used, &dummy);
694219089Spjd
695274337Sdelphij		if (spill_used > SPA_OLD_MAXBLOCKSIZE)
696249195Smm			return (SET_ERROR(EFBIG));
697219089Spjd
698219089Spjd		buf_space = hdl->sa_spill->db_size - spillhdrsize;
699219089Spjd		if (BUF_SPACE_NEEDED(spill_used, spillhdrsize) >
700219089Spjd		    hdl->sa_spill->db_size)
701219089Spjd			VERIFY(0 == sa_resize_spill(hdl,
702219089Spjd			    BUF_SPACE_NEEDED(spill_used, spillhdrsize), tx));
703219089Spjd	}
704219089Spjd
705219089Spjd	/* setup starting pointers to lay down data */
706219089Spjd	data_start = (void *)((uintptr_t)hdl->sa_bonus->db_data + hdrsize);
707219089Spjd	sahdr = (sa_hdr_phys_t *)hdl->sa_bonus->db_data;
708219089Spjd	buftype = SA_BONUS;
709219089Spjd
710219089Spjd	if (spilling)
711219089Spjd		buf_space = (sa->sa_force_spill) ?
712219089Spjd		    0 : SA_BLKPTR_SPACE - hdrsize;
713219089Spjd	else
714219089Spjd		buf_space = hdl->sa_bonus->db_size - hdrsize;
715219089Spjd
716219089Spjd	attrs_start = attrs = kmem_alloc(sizeof (sa_attr_type_t) * attr_count,
717219089Spjd	    KM_SLEEP);
718219089Spjd	lot_count = 0;
719219089Spjd
720219089Spjd	for (i = 0, len_idx = 0, hash = -1ULL; i != attr_count; i++) {
721219089Spjd		uint16_t length;
722219089Spjd
723246678Smm		ASSERT(IS_P2ALIGNED(data_start, 8));
724246678Smm		ASSERT(IS_P2ALIGNED(buf_space, 8));
725219089Spjd		attrs[i] = attr_desc[i].sa_attr;
726219089Spjd		length = SA_REGISTERED_LEN(sa, attrs[i]);
727219089Spjd		if (length == 0)
728219089Spjd			length = attr_desc[i].sa_length;
729240632Savg		else
730240632Savg			VERIFY(length == attr_desc[i].sa_length);
731219089Spjd
732219089Spjd		if (buf_space < length) {  /* switch to spill buffer */
733246678Smm			VERIFY(spilling);
734219089Spjd			VERIFY(bonustype == DMU_OT_SA);
735219089Spjd			if (buftype == SA_BONUS && !sa->sa_force_spill) {
736219089Spjd				sa_find_layout(hdl->sa_os, hash, attrs_start,
737219089Spjd				    lot_count, tx, &lot);
738219089Spjd				SA_SET_HDR(sahdr, lot->lot_num, hdrsize);
739219089Spjd			}
740219089Spjd
741219089Spjd			buftype = SA_SPILL;
742219089Spjd			hash = -1ULL;
743219089Spjd			len_idx = 0;
744219089Spjd
745219089Spjd			sahdr = (sa_hdr_phys_t *)hdl->sa_spill->db_data;
746219089Spjd			sahdr->sa_magic = SA_MAGIC;
747219089Spjd			data_start = (void *)((uintptr_t)sahdr +
748219089Spjd			    spillhdrsize);
749219089Spjd			attrs_start = &attrs[i];
750219089Spjd			buf_space = hdl->sa_spill->db_size - spillhdrsize;
751219089Spjd			lot_count = 0;
752219089Spjd		}
753219089Spjd		hash ^= SA_ATTR_HASH(attrs[i]);
754219089Spjd		attr_desc[i].sa_addr = data_start;
755219089Spjd		attr_desc[i].sa_size = length;
756219089Spjd		SA_COPY_DATA(attr_desc[i].sa_data_func, attr_desc[i].sa_data,
757219089Spjd		    data_start, length);
758219089Spjd		if (sa->sa_attr_table[attrs[i]].sa_length == 0) {
759219089Spjd			sahdr->sa_lengths[len_idx++] = length;
760219089Spjd		}
761240632Savg		VERIFY((uintptr_t)data_start % 8 == 0);
762219089Spjd		data_start = (void *)P2ROUNDUP(((uintptr_t)data_start +
763219089Spjd		    length), 8);
764219089Spjd		buf_space -= P2ROUNDUP(length, 8);
765219089Spjd		lot_count++;
766219089Spjd	}
767219089Spjd
768219089Spjd	sa_find_layout(hdl->sa_os, hash, attrs_start, lot_count, tx, &lot);
769219089Spjd
770219089Spjd	/*
771219089Spjd	 * Verify that old znodes always have layout number 0.
772219089Spjd	 * Must be DMU_OT_SA for arbitrary layouts
773219089Spjd	 */
774219089Spjd	VERIFY((bonustype == DMU_OT_ZNODE && lot->lot_num == 0) ||
775219089Spjd	    (bonustype == DMU_OT_SA && lot->lot_num > 1));
776219089Spjd
777219089Spjd	if (bonustype == DMU_OT_SA) {
778219089Spjd		SA_SET_HDR(sahdr, lot->lot_num,
779219089Spjd		    buftype == SA_BONUS ? hdrsize : spillhdrsize);
780219089Spjd	}
781219089Spjd
782219089Spjd	kmem_free(attrs, sizeof (sa_attr_type_t) * attr_count);
783219089Spjd	if (hdl->sa_bonus_tab) {
784219089Spjd		sa_idx_tab_rele(hdl->sa_os, hdl->sa_bonus_tab);
785219089Spjd		hdl->sa_bonus_tab = NULL;
786219089Spjd	}
787219089Spjd	if (!sa->sa_force_spill)
788219089Spjd		VERIFY(0 == sa_build_index(hdl, SA_BONUS));
789219089Spjd	if (hdl->sa_spill) {
790219089Spjd		sa_idx_tab_rele(hdl->sa_os, hdl->sa_spill_tab);
791219089Spjd		if (!spilling) {
792219089Spjd			/*
793219089Spjd			 * remove spill block that is no longer needed.
794219089Spjd			 */
795219089Spjd			dmu_buf_rele(hdl->sa_spill, NULL);
796219089Spjd			hdl->sa_spill = NULL;
797219089Spjd			hdl->sa_spill_tab = NULL;
798219089Spjd			VERIFY(0 == dmu_rm_spill(hdl->sa_os,
799219089Spjd			    sa_handle_object(hdl), tx));
800219089Spjd		} else {
801219089Spjd			VERIFY(0 == sa_build_index(hdl, SA_SPILL));
802219089Spjd		}
803219089Spjd	}
804219089Spjd
805219089Spjd	return (0);
806219089Spjd}
807219089Spjd
808219089Spjdstatic void
809219089Spjdsa_free_attr_table(sa_os_t *sa)
810219089Spjd{
811219089Spjd	int i;
812219089Spjd
813219089Spjd	if (sa->sa_attr_table == NULL)
814219089Spjd		return;
815219089Spjd
816219089Spjd	for (i = 0; i != sa->sa_num_attrs; i++) {
817219089Spjd		if (sa->sa_attr_table[i].sa_name)
818219089Spjd			kmem_free(sa->sa_attr_table[i].sa_name,
819219089Spjd			    strlen(sa->sa_attr_table[i].sa_name) + 1);
820219089Spjd	}
821219089Spjd
822219089Spjd	kmem_free(sa->sa_attr_table,
823219089Spjd	    sizeof (sa_attr_table_t) * sa->sa_num_attrs);
824219089Spjd
825219089Spjd	sa->sa_attr_table = NULL;
826219089Spjd}
827219089Spjd
828219089Spjdstatic int
829219089Spjdsa_attr_table_setup(objset_t *os, sa_attr_reg_t *reg_attrs, int count)
830219089Spjd{
831219089Spjd	sa_os_t *sa = os->os_sa;
832219089Spjd	uint64_t sa_attr_count = 0;
833247187Smm	uint64_t sa_reg_count = 0;
834219089Spjd	int error = 0;
835219089Spjd	uint64_t attr_value;
836219089Spjd	sa_attr_table_t *tb;
837219089Spjd	zap_cursor_t zc;
838219089Spjd	zap_attribute_t za;
839219089Spjd	int registered_count = 0;
840219089Spjd	int i;
841219089Spjd	dmu_objset_type_t ostype = dmu_objset_type(os);
842219089Spjd
843219089Spjd	sa->sa_user_table =
844219089Spjd	    kmem_zalloc(count * sizeof (sa_attr_type_t), KM_SLEEP);
845219089Spjd	sa->sa_user_table_sz = count * sizeof (sa_attr_type_t);
846219089Spjd
847219089Spjd	if (sa->sa_reg_attr_obj != 0) {
848219089Spjd		error = zap_count(os, sa->sa_reg_attr_obj,
849219089Spjd		    &sa_attr_count);
850219089Spjd
851219089Spjd		/*
852219089Spjd		 * Make sure we retrieved a count and that it isn't zero
853219089Spjd		 */
854219089Spjd		if (error || (error == 0 && sa_attr_count == 0)) {
855219089Spjd			if (error == 0)
856249195Smm				error = SET_ERROR(EINVAL);
857219089Spjd			goto bail;
858219089Spjd		}
859219089Spjd		sa_reg_count = sa_attr_count;
860219089Spjd	}
861219089Spjd
862219089Spjd	if (ostype == DMU_OST_ZFS && sa_attr_count == 0)
863219089Spjd		sa_attr_count += sa_legacy_attr_count;
864219089Spjd
865219089Spjd	/* Allocate attribute numbers for attributes that aren't registered */
866219089Spjd	for (i = 0; i != count; i++) {
867219089Spjd		boolean_t found = B_FALSE;
868219089Spjd		int j;
869219089Spjd
870219089Spjd		if (ostype == DMU_OST_ZFS) {
871219089Spjd			for (j = 0; j != sa_legacy_attr_count; j++) {
872219089Spjd				if (strcmp(reg_attrs[i].sa_name,
873219089Spjd				    sa_legacy_attrs[j].sa_name) == 0) {
874219089Spjd					sa->sa_user_table[i] =
875219089Spjd					    sa_legacy_attrs[j].sa_attr;
876219089Spjd					found = B_TRUE;
877219089Spjd				}
878219089Spjd			}
879219089Spjd		}
880219089Spjd		if (found)
881219089Spjd			continue;
882219089Spjd
883219089Spjd		if (sa->sa_reg_attr_obj)
884219089Spjd			error = zap_lookup(os, sa->sa_reg_attr_obj,
885219089Spjd			    reg_attrs[i].sa_name, 8, 1, &attr_value);
886219089Spjd		else
887249195Smm			error = SET_ERROR(ENOENT);
888219089Spjd		switch (error) {
889219089Spjd		case ENOENT:
890219089Spjd			sa->sa_user_table[i] = (sa_attr_type_t)sa_attr_count;
891219089Spjd			sa_attr_count++;
892219089Spjd			break;
893219089Spjd		case 0:
894219089Spjd			sa->sa_user_table[i] = ATTR_NUM(attr_value);
895219089Spjd			break;
896219089Spjd		default:
897219089Spjd			goto bail;
898219089Spjd		}
899219089Spjd	}
900219089Spjd
901219089Spjd	sa->sa_num_attrs = sa_attr_count;
902219089Spjd	tb = sa->sa_attr_table =
903219089Spjd	    kmem_zalloc(sizeof (sa_attr_table_t) * sa_attr_count, KM_SLEEP);
904219089Spjd
905219089Spjd	/*
906219089Spjd	 * Attribute table is constructed from requested attribute list,
907219089Spjd	 * previously foreign registered attributes, and also the legacy
908219089Spjd	 * ZPL set of attributes.
909219089Spjd	 */
910219089Spjd
911219089Spjd	if (sa->sa_reg_attr_obj) {
912219089Spjd		for (zap_cursor_init(&zc, os, sa->sa_reg_attr_obj);
913219089Spjd		    (error = zap_cursor_retrieve(&zc, &za)) == 0;
914219089Spjd		    zap_cursor_advance(&zc)) {
915219089Spjd			uint64_t value;
916219089Spjd			value  = za.za_first_integer;
917219089Spjd
918219089Spjd			registered_count++;
919219089Spjd			tb[ATTR_NUM(value)].sa_attr = ATTR_NUM(value);
920219089Spjd			tb[ATTR_NUM(value)].sa_length = ATTR_LENGTH(value);
921219089Spjd			tb[ATTR_NUM(value)].sa_byteswap = ATTR_BSWAP(value);
922219089Spjd			tb[ATTR_NUM(value)].sa_registered = B_TRUE;
923219089Spjd
924219089Spjd			if (tb[ATTR_NUM(value)].sa_name) {
925219089Spjd				continue;
926219089Spjd			}
927219089Spjd			tb[ATTR_NUM(value)].sa_name =
928219089Spjd			    kmem_zalloc(strlen(za.za_name) +1, KM_SLEEP);
929219089Spjd			(void) strlcpy(tb[ATTR_NUM(value)].sa_name, za.za_name,
930219089Spjd			    strlen(za.za_name) +1);
931219089Spjd		}
932219089Spjd		zap_cursor_fini(&zc);
933219089Spjd		/*
934219089Spjd		 * Make sure we processed the correct number of registered
935219089Spjd		 * attributes
936219089Spjd		 */
937219089Spjd		if (registered_count != sa_reg_count) {
938219089Spjd			ASSERT(error != 0);
939219089Spjd			goto bail;
940219089Spjd		}
941219089Spjd
942219089Spjd	}
943219089Spjd
944219089Spjd	if (ostype == DMU_OST_ZFS) {
945219089Spjd		for (i = 0; i != sa_legacy_attr_count; i++) {
946219089Spjd			if (tb[i].sa_name)
947219089Spjd				continue;
948219089Spjd			tb[i].sa_attr = sa_legacy_attrs[i].sa_attr;
949219089Spjd			tb[i].sa_length = sa_legacy_attrs[i].sa_length;
950219089Spjd			tb[i].sa_byteswap = sa_legacy_attrs[i].sa_byteswap;
951219089Spjd			tb[i].sa_registered = B_FALSE;
952219089Spjd			tb[i].sa_name =
953219089Spjd			    kmem_zalloc(strlen(sa_legacy_attrs[i].sa_name) +1,
954219089Spjd			    KM_SLEEP);
955219089Spjd			(void) strlcpy(tb[i].sa_name,
956219089Spjd			    sa_legacy_attrs[i].sa_name,
957219089Spjd			    strlen(sa_legacy_attrs[i].sa_name) + 1);
958219089Spjd		}
959219089Spjd	}
960219089Spjd
961219089Spjd	for (i = 0; i != count; i++) {
962219089Spjd		sa_attr_type_t attr_id;
963219089Spjd
964219089Spjd		attr_id = sa->sa_user_table[i];
965219089Spjd		if (tb[attr_id].sa_name)
966219089Spjd			continue;
967219089Spjd
968219089Spjd		tb[attr_id].sa_length = reg_attrs[i].sa_length;
969219089Spjd		tb[attr_id].sa_byteswap = reg_attrs[i].sa_byteswap;
970219089Spjd		tb[attr_id].sa_attr = attr_id;
971219089Spjd		tb[attr_id].sa_name =
972219089Spjd		    kmem_zalloc(strlen(reg_attrs[i].sa_name) + 1, KM_SLEEP);
973219089Spjd		(void) strlcpy(tb[attr_id].sa_name, reg_attrs[i].sa_name,
974219089Spjd		    strlen(reg_attrs[i].sa_name) + 1);
975219089Spjd	}
976219089Spjd
977219089Spjd	sa->sa_need_attr_registration =
978219089Spjd	    (sa_attr_count != registered_count);
979219089Spjd
980219089Spjd	return (0);
981219089Spjdbail:
982219089Spjd	kmem_free(sa->sa_user_table, count * sizeof (sa_attr_type_t));
983219089Spjd	sa->sa_user_table = NULL;
984219089Spjd	sa_free_attr_table(sa);
985219089Spjd	return ((error != 0) ? error : EINVAL);
986219089Spjd}
987219089Spjd
988219089Spjdint
989219089Spjdsa_setup(objset_t *os, uint64_t sa_obj, sa_attr_reg_t *reg_attrs, int count,
990219089Spjd    sa_attr_type_t **user_table)
991219089Spjd{
992219089Spjd	zap_cursor_t zc;
993219089Spjd	zap_attribute_t za;
994219089Spjd	sa_os_t *sa;
995219089Spjd	dmu_objset_type_t ostype = dmu_objset_type(os);
996219089Spjd	sa_attr_type_t *tb;
997219089Spjd	int error;
998219089Spjd
999248571Smm	mutex_enter(&os->os_user_ptr_lock);
1000219089Spjd	if (os->os_sa) {
1001219089Spjd		mutex_enter(&os->os_sa->sa_lock);
1002248571Smm		mutex_exit(&os->os_user_ptr_lock);
1003219089Spjd		tb = os->os_sa->sa_user_table;
1004219089Spjd		mutex_exit(&os->os_sa->sa_lock);
1005219089Spjd		*user_table = tb;
1006219089Spjd		return (0);
1007219089Spjd	}
1008219089Spjd
1009219089Spjd	sa = kmem_zalloc(sizeof (sa_os_t), KM_SLEEP);
1010219089Spjd	mutex_init(&sa->sa_lock, NULL, MUTEX_DEFAULT, NULL);
1011219089Spjd	sa->sa_master_obj = sa_obj;
1012219089Spjd
1013219089Spjd	os->os_sa = sa;
1014219089Spjd	mutex_enter(&sa->sa_lock);
1015248571Smm	mutex_exit(&os->os_user_ptr_lock);
1016219089Spjd	avl_create(&sa->sa_layout_num_tree, layout_num_compare,
1017219089Spjd	    sizeof (sa_lot_t), offsetof(sa_lot_t, lot_num_node));
1018219089Spjd	avl_create(&sa->sa_layout_hash_tree, layout_hash_compare,
1019219089Spjd	    sizeof (sa_lot_t), offsetof(sa_lot_t, lot_hash_node));
1020219089Spjd
1021219089Spjd	if (sa_obj) {
1022219089Spjd		error = zap_lookup(os, sa_obj, SA_LAYOUTS,
1023219089Spjd		    8, 1, &sa->sa_layout_attr_obj);
1024219089Spjd		if (error != 0 && error != ENOENT)
1025219089Spjd			goto fail;
1026219089Spjd		error = zap_lookup(os, sa_obj, SA_REGISTRY,
1027219089Spjd		    8, 1, &sa->sa_reg_attr_obj);
1028219089Spjd		if (error != 0 && error != ENOENT)
1029219089Spjd			goto fail;
1030219089Spjd	}
1031219089Spjd
1032219089Spjd	if ((error = sa_attr_table_setup(os, reg_attrs, count)) != 0)
1033219089Spjd		goto fail;
1034219089Spjd
1035219089Spjd	if (sa->sa_layout_attr_obj != 0) {
1036219089Spjd		uint64_t layout_count;
1037219089Spjd
1038219089Spjd		error = zap_count(os, sa->sa_layout_attr_obj,
1039219089Spjd		    &layout_count);
1040219089Spjd
1041219089Spjd		/*
1042219089Spjd		 * Layout number count should be > 0
1043219089Spjd		 */
1044219089Spjd		if (error || (error == 0 && layout_count == 0)) {
1045219089Spjd			if (error == 0)
1046249195Smm				error = SET_ERROR(EINVAL);
1047219089Spjd			goto fail;
1048219089Spjd		}
1049219089Spjd
1050219089Spjd		for (zap_cursor_init(&zc, os, sa->sa_layout_attr_obj);
1051219089Spjd		    (error = zap_cursor_retrieve(&zc, &za)) == 0;
1052219089Spjd		    zap_cursor_advance(&zc)) {
1053219089Spjd			sa_attr_type_t *lot_attrs;
1054219089Spjd			uint64_t lot_num;
1055219089Spjd
1056219089Spjd			lot_attrs = kmem_zalloc(sizeof (sa_attr_type_t) *
1057219089Spjd			    za.za_num_integers, KM_SLEEP);
1058219089Spjd
1059219089Spjd			if ((error = (zap_lookup(os, sa->sa_layout_attr_obj,
1060219089Spjd			    za.za_name, 2, za.za_num_integers,
1061219089Spjd			    lot_attrs))) != 0) {
1062219089Spjd				kmem_free(lot_attrs, sizeof (sa_attr_type_t) *
1063219089Spjd				    za.za_num_integers);
1064219089Spjd				break;
1065219089Spjd			}
1066219089Spjd			VERIFY(ddi_strtoull(za.za_name, NULL, 10,
1067219089Spjd			    (unsigned long long *)&lot_num) == 0);
1068219089Spjd
1069219089Spjd			(void) sa_add_layout_entry(os, lot_attrs,
1070219089Spjd			    za.za_num_integers, lot_num,
1071219089Spjd			    sa_layout_info_hash(lot_attrs,
1072219089Spjd			    za.za_num_integers), B_FALSE, NULL);
1073219089Spjd			kmem_free(lot_attrs, sizeof (sa_attr_type_t) *
1074219089Spjd			    za.za_num_integers);
1075219089Spjd		}
1076219089Spjd		zap_cursor_fini(&zc);
1077219089Spjd
1078219089Spjd		/*
1079219089Spjd		 * Make sure layout count matches number of entries added
1080219089Spjd		 * to AVL tree
1081219089Spjd		 */
1082219089Spjd		if (avl_numnodes(&sa->sa_layout_num_tree) != layout_count) {
1083219089Spjd			ASSERT(error != 0);
1084219089Spjd			goto fail;
1085219089Spjd		}
1086219089Spjd	}
1087219089Spjd
1088219089Spjd	/* Add special layout number for old ZNODES */
1089219089Spjd	if (ostype == DMU_OST_ZFS) {
1090219089Spjd		(void) sa_add_layout_entry(os, sa_legacy_zpl_layout,
1091219089Spjd		    sa_legacy_attr_count, 0,
1092219089Spjd		    sa_layout_info_hash(sa_legacy_zpl_layout,
1093219089Spjd		    sa_legacy_attr_count), B_FALSE, NULL);
1094219089Spjd
1095219089Spjd		(void) sa_add_layout_entry(os, sa_dummy_zpl_layout, 0, 1,
1096219089Spjd		    0, B_FALSE, NULL);
1097219089Spjd	}
1098219089Spjd	*user_table = os->os_sa->sa_user_table;
1099219089Spjd	mutex_exit(&sa->sa_lock);
1100219089Spjd	return (0);
1101219089Spjdfail:
1102219089Spjd	os->os_sa = NULL;
1103219089Spjd	sa_free_attr_table(sa);
1104219089Spjd	if (sa->sa_user_table)
1105219089Spjd		kmem_free(sa->sa_user_table, sa->sa_user_table_sz);
1106219089Spjd	mutex_exit(&sa->sa_lock);
1107268713Sdelphij	avl_destroy(&sa->sa_layout_hash_tree);
1108268713Sdelphij	avl_destroy(&sa->sa_layout_num_tree);
1109268713Sdelphij	mutex_destroy(&sa->sa_lock);
1110219089Spjd	kmem_free(sa, sizeof (sa_os_t));
1111219089Spjd	return ((error == ECKSUM) ? EIO : error);
1112219089Spjd}
1113219089Spjd
1114219089Spjdvoid
1115219089Spjdsa_tear_down(objset_t *os)
1116219089Spjd{
1117219089Spjd	sa_os_t *sa = os->os_sa;
1118219089Spjd	sa_lot_t *layout;
1119219089Spjd	void *cookie;
1120219089Spjd
1121219089Spjd	kmem_free(sa->sa_user_table, sa->sa_user_table_sz);
1122219089Spjd
1123219089Spjd	/* Free up attr table */
1124219089Spjd
1125219089Spjd	sa_free_attr_table(sa);
1126219089Spjd
1127219089Spjd	cookie = NULL;
1128219089Spjd	while (layout = avl_destroy_nodes(&sa->sa_layout_hash_tree, &cookie)) {
1129219089Spjd		sa_idx_tab_t *tab;
1130219089Spjd		while (tab = list_head(&layout->lot_idx_tab)) {
1131219089Spjd			ASSERT(refcount_count(&tab->sa_refcount));
1132219089Spjd			sa_idx_tab_rele(os, tab);
1133219089Spjd		}
1134219089Spjd	}
1135219089Spjd
1136219089Spjd	cookie = NULL;
1137219089Spjd	while (layout = avl_destroy_nodes(&sa->sa_layout_num_tree, &cookie)) {
1138219089Spjd		kmem_free(layout->lot_attrs,
1139219089Spjd		    sizeof (sa_attr_type_t) * layout->lot_attr_count);
1140219089Spjd		kmem_free(layout, sizeof (sa_lot_t));
1141219089Spjd	}
1142219089Spjd
1143219089Spjd	avl_destroy(&sa->sa_layout_hash_tree);
1144219089Spjd	avl_destroy(&sa->sa_layout_num_tree);
1145268713Sdelphij	mutex_destroy(&sa->sa_lock);
1146219089Spjd
1147219089Spjd	kmem_free(sa, sizeof (sa_os_t));
1148219089Spjd	os->os_sa = NULL;
1149219089Spjd}
1150219089Spjd
1151219089Spjdvoid
1152219089Spjdsa_build_idx_tab(void *hdr, void *attr_addr, sa_attr_type_t attr,
1153219089Spjd    uint16_t length, int length_idx, boolean_t var_length, void *userp)
1154219089Spjd{
1155219089Spjd	sa_idx_tab_t *idx_tab = userp;
1156219089Spjd
1157219089Spjd	if (var_length) {
1158219089Spjd		ASSERT(idx_tab->sa_variable_lengths);
1159219089Spjd		idx_tab->sa_variable_lengths[length_idx] = length;
1160219089Spjd	}
1161219089Spjd	TOC_ATTR_ENCODE(idx_tab->sa_idx_tab[attr], length_idx,
1162219089Spjd	    (uint32_t)((uintptr_t)attr_addr - (uintptr_t)hdr));
1163219089Spjd}
1164219089Spjd
1165219089Spjdstatic void
1166219089Spjdsa_attr_iter(objset_t *os, sa_hdr_phys_t *hdr, dmu_object_type_t type,
1167219089Spjd    sa_iterfunc_t func, sa_lot_t *tab, void *userp)
1168219089Spjd{
1169219089Spjd	void *data_start;
1170219089Spjd	sa_lot_t *tb = tab;
1171219089Spjd	sa_lot_t search;
1172219089Spjd	avl_index_t loc;
1173219089Spjd	sa_os_t *sa = os->os_sa;
1174219089Spjd	int i;
1175219089Spjd	uint16_t *length_start = NULL;
1176219089Spjd	uint8_t length_idx = 0;
1177219089Spjd
1178219089Spjd	if (tab == NULL) {
1179219089Spjd		search.lot_num = SA_LAYOUT_NUM(hdr, type);
1180219089Spjd		tb = avl_find(&sa->sa_layout_num_tree, &search, &loc);
1181219089Spjd		ASSERT(tb);
1182219089Spjd	}
1183219089Spjd
1184219089Spjd	if (IS_SA_BONUSTYPE(type)) {
1185219089Spjd		data_start = (void *)P2ROUNDUP(((uintptr_t)hdr +
1186219089Spjd		    offsetof(sa_hdr_phys_t, sa_lengths) +
1187219089Spjd		    (sizeof (uint16_t) * tb->lot_var_sizes)), 8);
1188219089Spjd		length_start = hdr->sa_lengths;
1189219089Spjd	} else {
1190219089Spjd		data_start = hdr;
1191219089Spjd	}
1192219089Spjd
1193219089Spjd	for (i = 0; i != tb->lot_attr_count; i++) {
1194219089Spjd		int attr_length, reg_length;
1195219089Spjd		uint8_t idx_len;
1196219089Spjd
1197219089Spjd		reg_length = sa->sa_attr_table[tb->lot_attrs[i]].sa_length;
1198219089Spjd		if (reg_length) {
1199219089Spjd			attr_length = reg_length;
1200219089Spjd			idx_len = 0;
1201219089Spjd		} else {
1202219089Spjd			attr_length = length_start[length_idx];
1203219089Spjd			idx_len = length_idx++;
1204219089Spjd		}
1205219089Spjd
1206219089Spjd		func(hdr, data_start, tb->lot_attrs[i], attr_length,
1207219089Spjd		    idx_len, reg_length == 0 ? B_TRUE : B_FALSE, userp);
1208219089Spjd
1209219089Spjd		data_start = (void *)P2ROUNDUP(((uintptr_t)data_start +
1210219089Spjd		    attr_length), 8);
1211219089Spjd	}
1212219089Spjd}
1213219089Spjd
1214219089Spjd/*ARGSUSED*/
1215219089Spjdvoid
1216219089Spjdsa_byteswap_cb(void *hdr, void *attr_addr, sa_attr_type_t attr,
1217219089Spjd    uint16_t length, int length_idx, boolean_t variable_length, void *userp)
1218219089Spjd{
1219219089Spjd	sa_handle_t *hdl = userp;
1220219089Spjd	sa_os_t *sa = hdl->sa_os->os_sa;
1221219089Spjd
1222219089Spjd	sa_bswap_table[sa->sa_attr_table[attr].sa_byteswap](attr_addr, length);
1223219089Spjd}
1224219089Spjd
1225219089Spjdvoid
1226219089Spjdsa_byteswap(sa_handle_t *hdl, sa_buf_type_t buftype)
1227219089Spjd{
1228219089Spjd	sa_hdr_phys_t *sa_hdr_phys = SA_GET_HDR(hdl, buftype);
1229219089Spjd	dmu_buf_impl_t *db;
1230219089Spjd	sa_os_t *sa = hdl->sa_os->os_sa;
1231219089Spjd	int num_lengths = 1;
1232219089Spjd	int i;
1233219089Spjd
1234219089Spjd	ASSERT(MUTEX_HELD(&sa->sa_lock));
1235219089Spjd	if (sa_hdr_phys->sa_magic == SA_MAGIC)
1236219089Spjd		return;
1237219089Spjd
1238219089Spjd	db = SA_GET_DB(hdl, buftype);
1239219089Spjd
1240219089Spjd	if (buftype == SA_SPILL) {
1241219089Spjd		arc_release(db->db_buf, NULL);
1242219089Spjd		arc_buf_thaw(db->db_buf);
1243219089Spjd	}
1244219089Spjd
1245219089Spjd	sa_hdr_phys->sa_magic = BSWAP_32(sa_hdr_phys->sa_magic);
1246219089Spjd	sa_hdr_phys->sa_layout_info = BSWAP_16(sa_hdr_phys->sa_layout_info);
1247219089Spjd
1248219089Spjd	/*
1249219089Spjd	 * Determine number of variable lenghts in header
1250219089Spjd	 * The standard 8 byte header has one for free and a
1251219089Spjd	 * 16 byte header would have 4 + 1;
1252219089Spjd	 */
1253219089Spjd	if (SA_HDR_SIZE(sa_hdr_phys) > 8)
1254219089Spjd		num_lengths += (SA_HDR_SIZE(sa_hdr_phys) - 8) >> 1;
1255219089Spjd	for (i = 0; i != num_lengths; i++)
1256219089Spjd		sa_hdr_phys->sa_lengths[i] =
1257219089Spjd		    BSWAP_16(sa_hdr_phys->sa_lengths[i]);
1258219089Spjd
1259219089Spjd	sa_attr_iter(hdl->sa_os, sa_hdr_phys, DMU_OT_SA,
1260219089Spjd	    sa_byteswap_cb, NULL, hdl);
1261219089Spjd
1262219089Spjd	if (buftype == SA_SPILL)
1263219089Spjd		arc_buf_freeze(((dmu_buf_impl_t *)hdl->sa_spill)->db_buf);
1264219089Spjd}
1265219089Spjd
1266219089Spjdstatic int
1267219089Spjdsa_build_index(sa_handle_t *hdl, sa_buf_type_t buftype)
1268219089Spjd{
1269219089Spjd	sa_hdr_phys_t *sa_hdr_phys;
1270219089Spjd	dmu_buf_impl_t *db = SA_GET_DB(hdl, buftype);
1271219089Spjd	dmu_object_type_t bonustype = SA_BONUSTYPE_FROM_DB(db);
1272219089Spjd	sa_os_t *sa = hdl->sa_os->os_sa;
1273219089Spjd	sa_idx_tab_t *idx_tab;
1274219089Spjd
1275219089Spjd	sa_hdr_phys = SA_GET_HDR(hdl, buftype);
1276219089Spjd
1277219089Spjd	mutex_enter(&sa->sa_lock);
1278219089Spjd
1279219089Spjd	/* Do we need to byteswap? */
1280219089Spjd
1281219089Spjd	/* only check if not old znode */
1282219089Spjd	if (IS_SA_BONUSTYPE(bonustype) && sa_hdr_phys->sa_magic != SA_MAGIC &&
1283219089Spjd	    sa_hdr_phys->sa_magic != 0) {
1284219089Spjd		VERIFY(BSWAP_32(sa_hdr_phys->sa_magic) == SA_MAGIC);
1285219089Spjd		sa_byteswap(hdl, buftype);
1286219089Spjd	}
1287219089Spjd
1288219089Spjd	idx_tab = sa_find_idx_tab(hdl->sa_os, bonustype, sa_hdr_phys);
1289219089Spjd
1290219089Spjd	if (buftype == SA_BONUS)
1291219089Spjd		hdl->sa_bonus_tab = idx_tab;
1292219089Spjd	else
1293219089Spjd		hdl->sa_spill_tab = idx_tab;
1294219089Spjd
1295219089Spjd	mutex_exit(&sa->sa_lock);
1296219089Spjd	return (0);
1297219089Spjd}
1298219089Spjd
1299219089Spjd/*ARGSUSED*/
1300286575Smavstatic void
1301286575Smavsa_evict(void *dbu)
1302219089Spjd{
1303286575Smav	panic("evicting sa dbuf\n");
1304219089Spjd}
1305219089Spjd
1306219089Spjdstatic void
1307219089Spjdsa_idx_tab_rele(objset_t *os, void *arg)
1308219089Spjd{
1309219089Spjd	sa_os_t *sa = os->os_sa;
1310219089Spjd	sa_idx_tab_t *idx_tab = arg;
1311219089Spjd
1312219089Spjd	if (idx_tab == NULL)
1313219089Spjd		return;
1314219089Spjd
1315219089Spjd	mutex_enter(&sa->sa_lock);
1316219089Spjd	if (refcount_remove(&idx_tab->sa_refcount, NULL) == 0) {
1317219089Spjd		list_remove(&idx_tab->sa_layout->lot_idx_tab, idx_tab);
1318219089Spjd		if (idx_tab->sa_variable_lengths)
1319219089Spjd			kmem_free(idx_tab->sa_variable_lengths,
1320219089Spjd			    sizeof (uint16_t) *
1321219089Spjd			    idx_tab->sa_layout->lot_var_sizes);
1322219089Spjd		refcount_destroy(&idx_tab->sa_refcount);
1323219089Spjd		kmem_free(idx_tab->sa_idx_tab,
1324219089Spjd		    sizeof (uint32_t) * sa->sa_num_attrs);
1325219089Spjd		kmem_free(idx_tab, sizeof (sa_idx_tab_t));
1326219089Spjd	}
1327219089Spjd	mutex_exit(&sa->sa_lock);
1328219089Spjd}
1329219089Spjd
1330219089Spjdstatic void
1331219089Spjdsa_idx_tab_hold(objset_t *os, sa_idx_tab_t *idx_tab)
1332219089Spjd{
1333219089Spjd	sa_os_t *sa = os->os_sa;
1334219089Spjd
1335219089Spjd	ASSERT(MUTEX_HELD(&sa->sa_lock));
1336219089Spjd	(void) refcount_add(&idx_tab->sa_refcount, NULL);
1337219089Spjd}
1338219089Spjd
1339219089Spjdvoid
1340219089Spjdsa_handle_destroy(sa_handle_t *hdl)
1341219089Spjd{
1342286575Smav	dmu_buf_t *db = hdl->sa_bonus;
1343286575Smav
1344219089Spjd	mutex_enter(&hdl->sa_lock);
1345286575Smav	(void) dmu_buf_remove_user(db, &hdl->sa_dbu);
1346219089Spjd
1347286539Smav	if (hdl->sa_bonus_tab)
1348219089Spjd		sa_idx_tab_rele(hdl->sa_os, hdl->sa_bonus_tab);
1349286539Smav
1350286539Smav	if (hdl->sa_spill_tab)
1351219089Spjd		sa_idx_tab_rele(hdl->sa_os, hdl->sa_spill_tab);
1352219089Spjd
1353219089Spjd	dmu_buf_rele(hdl->sa_bonus, NULL);
1354219089Spjd
1355219089Spjd	if (hdl->sa_spill)
1356219089Spjd		dmu_buf_rele((dmu_buf_t *)hdl->sa_spill, NULL);
1357219089Spjd	mutex_exit(&hdl->sa_lock);
1358219089Spjd
1359219089Spjd	kmem_cache_free(sa_cache, hdl);
1360219089Spjd}
1361219089Spjd
1362219089Spjdint
1363219089Spjdsa_handle_get_from_db(objset_t *os, dmu_buf_t *db, void *userp,
1364219089Spjd    sa_handle_type_t hdl_type, sa_handle_t **handlepp)
1365219089Spjd{
1366219089Spjd	int error = 0;
1367219089Spjd	dmu_object_info_t doi;
1368286575Smav	sa_handle_t *handle = NULL;
1369219089Spjd
1370219089Spjd#ifdef ZFS_DEBUG
1371219089Spjd	dmu_object_info_from_db(db, &doi);
1372219089Spjd	ASSERT(doi.doi_bonus_type == DMU_OT_SA ||
1373219089Spjd	    doi.doi_bonus_type == DMU_OT_ZNODE);
1374219089Spjd#endif
1375219089Spjd	/* find handle, if it exists */
1376219089Spjd	/* if one doesn't exist then create a new one, and initialize it */
1377219089Spjd
1378286575Smav	if (hdl_type == SA_HDL_SHARED)
1379286575Smav		handle = dmu_buf_get_user(db);
1380286575Smav
1381219089Spjd	if (handle == NULL) {
1382286575Smav		sa_handle_t *winner = NULL;
1383286575Smav
1384219089Spjd		handle = kmem_cache_alloc(sa_cache, KM_SLEEP);
1385286983Savg		handle->sa_dbu.dbu_evict_func = NULL;
1386219089Spjd		handle->sa_userp = userp;
1387219089Spjd		handle->sa_bonus = db;
1388219089Spjd		handle->sa_os = os;
1389219089Spjd		handle->sa_spill = NULL;
1390286539Smav		handle->sa_bonus_tab = NULL;
1391286539Smav		handle->sa_spill_tab = NULL;
1392219089Spjd
1393219089Spjd		error = sa_build_index(handle, SA_BONUS);
1394219089Spjd
1395286575Smav		if (hdl_type == SA_HDL_SHARED) {
1396286575Smav			dmu_buf_init_user(&handle->sa_dbu, sa_evict, NULL);
1397286575Smav			winner = dmu_buf_set_user_ie(db, &handle->sa_dbu);
1398286575Smav		}
1399286575Smav
1400286575Smav		if (winner != NULL) {
1401219089Spjd			kmem_cache_free(sa_cache, handle);
1402286575Smav			handle = winner;
1403219089Spjd		}
1404219089Spjd	}
1405219089Spjd	*handlepp = handle;
1406219089Spjd
1407219089Spjd	return (error);
1408219089Spjd}
1409219089Spjd
1410219089Spjdint
1411219089Spjdsa_handle_get(objset_t *objset, uint64_t objid, void *userp,
1412219089Spjd    sa_handle_type_t hdl_type, sa_handle_t **handlepp)
1413219089Spjd{
1414219089Spjd	dmu_buf_t *db;
1415219089Spjd	int error;
1416219089Spjd
1417219089Spjd	if (error = dmu_bonus_hold(objset, objid, NULL, &db))
1418219089Spjd		return (error);
1419219089Spjd
1420219089Spjd	return (sa_handle_get_from_db(objset, db, userp, hdl_type,
1421219089Spjd	    handlepp));
1422219089Spjd}
1423219089Spjd
1424219089Spjdint
1425219089Spjdsa_buf_hold(objset_t *objset, uint64_t obj_num, void *tag, dmu_buf_t **db)
1426219089Spjd{
1427219089Spjd	return (dmu_bonus_hold(objset, obj_num, tag, db));
1428219089Spjd}
1429219089Spjd
1430219089Spjdvoid
1431219089Spjdsa_buf_rele(dmu_buf_t *db, void *tag)
1432219089Spjd{
1433219089Spjd	dmu_buf_rele(db, tag);
1434219089Spjd}
1435219089Spjd
1436219089Spjdint
1437219089Spjdsa_lookup_impl(sa_handle_t *hdl, sa_bulk_attr_t *bulk, int count)
1438219089Spjd{
1439219089Spjd	ASSERT(hdl);
1440219089Spjd	ASSERT(MUTEX_HELD(&hdl->sa_lock));
1441219089Spjd	return (sa_attr_op(hdl, bulk, count, SA_LOOKUP, NULL));
1442219089Spjd}
1443219089Spjd
1444219089Spjdint
1445219089Spjdsa_lookup(sa_handle_t *hdl, sa_attr_type_t attr, void *buf, uint32_t buflen)
1446219089Spjd{
1447219089Spjd	int error;
1448219089Spjd	sa_bulk_attr_t bulk;
1449219089Spjd
1450219089Spjd	bulk.sa_attr = attr;
1451219089Spjd	bulk.sa_data = buf;
1452219089Spjd	bulk.sa_length = buflen;
1453219089Spjd	bulk.sa_data_func = NULL;
1454219089Spjd
1455219089Spjd	ASSERT(hdl);
1456219089Spjd	mutex_enter(&hdl->sa_lock);
1457219089Spjd	error = sa_lookup_impl(hdl, &bulk, 1);
1458219089Spjd	mutex_exit(&hdl->sa_lock);
1459219089Spjd	return (error);
1460219089Spjd}
1461219089Spjd
1462219089Spjd#ifdef _KERNEL
1463219089Spjdint
1464219089Spjdsa_lookup_uio(sa_handle_t *hdl, sa_attr_type_t attr, uio_t *uio)
1465219089Spjd{
1466219089Spjd	int error;
1467219089Spjd	sa_bulk_attr_t bulk;
1468219089Spjd
1469219089Spjd	bulk.sa_data = NULL;
1470219089Spjd	bulk.sa_attr = attr;
1471219089Spjd	bulk.sa_data_func = NULL;
1472219089Spjd
1473219089Spjd	ASSERT(hdl);
1474219089Spjd
1475219089Spjd	mutex_enter(&hdl->sa_lock);
1476219089Spjd	if ((error = sa_attr_op(hdl, &bulk, 1, SA_LOOKUP, NULL)) == 0) {
1477219089Spjd		error = uiomove((void *)bulk.sa_addr, MIN(bulk.sa_size,
1478219089Spjd		    uio->uio_resid), UIO_READ, uio);
1479219089Spjd	}
1480219089Spjd	mutex_exit(&hdl->sa_lock);
1481219089Spjd	return (error);
1482219089Spjd
1483219089Spjd}
1484219089Spjd#endif
1485219089Spjd
1486219089Spjdvoid *
1487219089Spjdsa_find_idx_tab(objset_t *os, dmu_object_type_t bonustype, void *data)
1488219089Spjd{
1489219089Spjd	sa_idx_tab_t *idx_tab;
1490219089Spjd	sa_hdr_phys_t *hdr = (sa_hdr_phys_t *)data;
1491219089Spjd	sa_os_t *sa = os->os_sa;
1492219089Spjd	sa_lot_t *tb, search;
1493219089Spjd	avl_index_t loc;
1494219089Spjd
1495219089Spjd	/*
1496219089Spjd	 * Deterimine layout number.  If SA node and header == 0 then
1497219089Spjd	 * force the index table to the dummy "1" empty layout.
1498219089Spjd	 *
1499219089Spjd	 * The layout number would only be zero for a newly created file
1500219089Spjd	 * that has not added any attributes yet, or with crypto enabled which
1501219089Spjd	 * doesn't write any attributes to the bonus buffer.
1502219089Spjd	 */
1503219089Spjd
1504219089Spjd	search.lot_num = SA_LAYOUT_NUM(hdr, bonustype);
1505219089Spjd
1506219089Spjd	tb = avl_find(&sa->sa_layout_num_tree, &search, &loc);
1507219089Spjd
1508219089Spjd	/* Verify header size is consistent with layout information */
1509219089Spjd	ASSERT(tb);
1510219089Spjd	ASSERT(IS_SA_BONUSTYPE(bonustype) &&
1511219089Spjd	    SA_HDR_SIZE_MATCH_LAYOUT(hdr, tb) || !IS_SA_BONUSTYPE(bonustype) ||
1512219089Spjd	    (IS_SA_BONUSTYPE(bonustype) && hdr->sa_layout_info == 0));
1513219089Spjd
1514219089Spjd	/*
1515219089Spjd	 * See if any of the already existing TOC entries can be reused?
1516219089Spjd	 */
1517219089Spjd
1518219089Spjd	for (idx_tab = list_head(&tb->lot_idx_tab); idx_tab;
1519219089Spjd	    idx_tab = list_next(&tb->lot_idx_tab, idx_tab)) {
1520219089Spjd		boolean_t valid_idx = B_TRUE;
1521219089Spjd		int i;
1522219089Spjd
1523219089Spjd		if (tb->lot_var_sizes != 0 &&
1524219089Spjd		    idx_tab->sa_variable_lengths != NULL) {
1525219089Spjd			for (i = 0; i != tb->lot_var_sizes; i++) {
1526219089Spjd				if (hdr->sa_lengths[i] !=
1527219089Spjd				    idx_tab->sa_variable_lengths[i]) {
1528219089Spjd					valid_idx = B_FALSE;
1529219089Spjd					break;
1530219089Spjd				}
1531219089Spjd			}
1532219089Spjd		}
1533219089Spjd		if (valid_idx) {
1534219089Spjd			sa_idx_tab_hold(os, idx_tab);
1535219089Spjd			return (idx_tab);
1536219089Spjd		}
1537219089Spjd	}
1538219089Spjd
1539219089Spjd	/* No such luck, create a new entry */
1540219089Spjd	idx_tab = kmem_zalloc(sizeof (sa_idx_tab_t), KM_SLEEP);
1541219089Spjd	idx_tab->sa_idx_tab =
1542219089Spjd	    kmem_zalloc(sizeof (uint32_t) * sa->sa_num_attrs, KM_SLEEP);
1543219089Spjd	idx_tab->sa_layout = tb;
1544219089Spjd	refcount_create(&idx_tab->sa_refcount);
1545219089Spjd	if (tb->lot_var_sizes)
1546219089Spjd		idx_tab->sa_variable_lengths = kmem_alloc(sizeof (uint16_t) *
1547219089Spjd		    tb->lot_var_sizes, KM_SLEEP);
1548219089Spjd
1549219089Spjd	sa_attr_iter(os, hdr, bonustype, sa_build_idx_tab,
1550219089Spjd	    tb, idx_tab);
1551219089Spjd	sa_idx_tab_hold(os, idx_tab);   /* one hold for consumer */
1552219089Spjd	sa_idx_tab_hold(os, idx_tab);	/* one for layout */
1553219089Spjd	list_insert_tail(&tb->lot_idx_tab, idx_tab);
1554219089Spjd	return (idx_tab);
1555219089Spjd}
1556219089Spjd
1557219089Spjdvoid
1558219089Spjdsa_default_locator(void **dataptr, uint32_t *len, uint32_t total_len,
1559219089Spjd    boolean_t start, void *userdata)
1560219089Spjd{
1561219089Spjd	ASSERT(start);
1562219089Spjd
1563219089Spjd	*dataptr = userdata;
1564219089Spjd	*len = total_len;
1565219089Spjd}
1566219089Spjd
1567219089Spjdstatic void
1568219089Spjdsa_attr_register_sync(sa_handle_t *hdl, dmu_tx_t *tx)
1569219089Spjd{
1570219089Spjd	uint64_t attr_value = 0;
1571219089Spjd	sa_os_t *sa = hdl->sa_os->os_sa;
1572219089Spjd	sa_attr_table_t *tb = sa->sa_attr_table;
1573219089Spjd	int i;
1574219089Spjd
1575219089Spjd	mutex_enter(&sa->sa_lock);
1576219089Spjd
1577219089Spjd	if (!sa->sa_need_attr_registration || sa->sa_master_obj == 0) {
1578219089Spjd		mutex_exit(&sa->sa_lock);
1579219089Spjd		return;
1580219089Spjd	}
1581219089Spjd
1582219089Spjd	if (sa->sa_reg_attr_obj == 0) {
1583236884Smm		sa->sa_reg_attr_obj = zap_create_link(hdl->sa_os,
1584236884Smm		    DMU_OT_SA_ATTR_REGISTRATION,
1585236884Smm		    sa->sa_master_obj, SA_REGISTRY, tx);
1586219089Spjd	}
1587219089Spjd	for (i = 0; i != sa->sa_num_attrs; i++) {
1588219089Spjd		if (sa->sa_attr_table[i].sa_registered)
1589219089Spjd			continue;
1590219089Spjd		ATTR_ENCODE(attr_value, tb[i].sa_attr, tb[i].sa_length,
1591219089Spjd		    tb[i].sa_byteswap);
1592219089Spjd		VERIFY(0 == zap_update(hdl->sa_os, sa->sa_reg_attr_obj,
1593219089Spjd		    tb[i].sa_name, 8, 1, &attr_value, tx));
1594219089Spjd		tb[i].sa_registered = B_TRUE;
1595219089Spjd	}
1596219089Spjd	sa->sa_need_attr_registration = B_FALSE;
1597219089Spjd	mutex_exit(&sa->sa_lock);
1598219089Spjd}
1599219089Spjd
1600219089Spjd/*
1601219089Spjd * Replace all attributes with attributes specified in template.
1602219089Spjd * If dnode had a spill buffer then those attributes will be
1603219089Spjd * also be replaced, possibly with just an empty spill block
1604219089Spjd *
1605219089Spjd * This interface is intended to only be used for bulk adding of
1606219089Spjd * attributes for a new file.  It will also be used by the ZPL
1607219089Spjd * when converting and old formatted znode to native SA support.
1608219089Spjd */
1609219089Spjdint
1610219089Spjdsa_replace_all_by_template_locked(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc,
1611219089Spjd    int attr_count, dmu_tx_t *tx)
1612219089Spjd{
1613219089Spjd	sa_os_t *sa = hdl->sa_os->os_sa;
1614219089Spjd
1615219089Spjd	if (sa->sa_need_attr_registration)
1616219089Spjd		sa_attr_register_sync(hdl, tx);
1617219089Spjd	return (sa_build_layouts(hdl, attr_desc, attr_count, tx));
1618219089Spjd}
1619219089Spjd
1620219089Spjdint
1621219089Spjdsa_replace_all_by_template(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc,
1622219089Spjd    int attr_count, dmu_tx_t *tx)
1623219089Spjd{
1624219089Spjd	int error;
1625219089Spjd
1626219089Spjd	mutex_enter(&hdl->sa_lock);
1627219089Spjd	error = sa_replace_all_by_template_locked(hdl, attr_desc,
1628219089Spjd	    attr_count, tx);
1629219089Spjd	mutex_exit(&hdl->sa_lock);
1630219089Spjd	return (error);
1631219089Spjd}
1632219089Spjd
1633219089Spjd/*
1634240345Savg * Add/remove a single attribute or replace a variable-sized attribute value
1635240345Savg * with a value of a different size, and then rewrite the entire set
1636219089Spjd * of attributes.
1637240345Savg * Same-length attribute value replacement (including fixed-length attributes)
1638240345Savg * is handled more efficiently by the upper layers.
1639219089Spjd */
1640219089Spjdstatic int
1641219089Spjdsa_modify_attrs(sa_handle_t *hdl, sa_attr_type_t newattr,
1642219089Spjd    sa_data_op_t action, sa_data_locator_t *locator, void *datastart,
1643219089Spjd    uint16_t buflen, dmu_tx_t *tx)
1644219089Spjd{
1645219089Spjd	sa_os_t *sa = hdl->sa_os->os_sa;
1646219089Spjd	dmu_buf_impl_t *db = (dmu_buf_impl_t *)hdl->sa_bonus;
1647219089Spjd	dnode_t *dn;
1648219089Spjd	sa_bulk_attr_t *attr_desc;
1649219089Spjd	void *old_data[2];
1650219089Spjd	int bonus_attr_count = 0;
1651247187Smm	int bonus_data_size = 0;
1652247187Smm	int spill_data_size = 0;
1653219089Spjd	int spill_attr_count = 0;
1654219089Spjd	int error;
1655295125Savg	uint16_t length, reg_length;
1656219089Spjd	int i, j, k, length_idx;
1657219089Spjd	sa_hdr_phys_t *hdr;
1658219089Spjd	sa_idx_tab_t *idx_tab;
1659219089Spjd	int attr_count;
1660219089Spjd	int count;
1661219089Spjd
1662219089Spjd	ASSERT(MUTEX_HELD(&hdl->sa_lock));
1663219089Spjd
1664219089Spjd	/* First make of copy of the old data */
1665219089Spjd
1666219089Spjd	DB_DNODE_ENTER(db);
1667219089Spjd	dn = DB_DNODE(db);
1668219089Spjd	if (dn->dn_bonuslen != 0) {
1669219089Spjd		bonus_data_size = hdl->sa_bonus->db_size;
1670219089Spjd		old_data[0] = kmem_alloc(bonus_data_size, KM_SLEEP);
1671219089Spjd		bcopy(hdl->sa_bonus->db_data, old_data[0],
1672219089Spjd		    hdl->sa_bonus->db_size);
1673219089Spjd		bonus_attr_count = hdl->sa_bonus_tab->sa_layout->lot_attr_count;
1674219089Spjd	} else {
1675219089Spjd		old_data[0] = NULL;
1676219089Spjd	}
1677219089Spjd	DB_DNODE_EXIT(db);
1678219089Spjd
1679219089Spjd	/* Bring spill buffer online if it isn't currently */
1680219089Spjd
1681219089Spjd	if ((error = sa_get_spill(hdl)) == 0) {
1682219089Spjd		spill_data_size = hdl->sa_spill->db_size;
1683219089Spjd		old_data[1] = kmem_alloc(spill_data_size, KM_SLEEP);
1684219089Spjd		bcopy(hdl->sa_spill->db_data, old_data[1],
1685219089Spjd		    hdl->sa_spill->db_size);
1686219089Spjd		spill_attr_count =
1687219089Spjd		    hdl->sa_spill_tab->sa_layout->lot_attr_count;
1688219089Spjd	} else if (error && error != ENOENT) {
1689219089Spjd		if (old_data[0])
1690219089Spjd			kmem_free(old_data[0], bonus_data_size);
1691219089Spjd		return (error);
1692219089Spjd	} else {
1693219089Spjd		old_data[1] = NULL;
1694219089Spjd	}
1695219089Spjd
1696219089Spjd	/* build descriptor of all attributes */
1697219089Spjd
1698219089Spjd	attr_count = bonus_attr_count + spill_attr_count;
1699219089Spjd	if (action == SA_ADD)
1700219089Spjd		attr_count++;
1701219089Spjd	else if (action == SA_REMOVE)
1702219089Spjd		attr_count--;
1703219089Spjd
1704219089Spjd	attr_desc = kmem_zalloc(sizeof (sa_bulk_attr_t) * attr_count, KM_SLEEP);
1705219089Spjd
1706219089Spjd	/*
1707219089Spjd	 * loop through bonus and spill buffer if it exists, and
1708219089Spjd	 * build up new attr_descriptor to reset the attributes
1709219089Spjd	 */
1710219089Spjd	k = j = 0;
1711219089Spjd	count = bonus_attr_count;
1712219089Spjd	hdr = SA_GET_HDR(hdl, SA_BONUS);
1713219089Spjd	idx_tab = SA_IDX_TAB_GET(hdl, SA_BONUS);
1714219089Spjd	for (; k != 2; k++) {
1715295125Savg		/*
1716295125Savg		 * Iterate over each attribute in layout.  Fetch the
1717295125Savg		 * size of variable-length attributes needing rewrite
1718295125Savg		 * from sa_lengths[].
1719295125Savg		 */
1720219089Spjd		for (i = 0, length_idx = 0; i != count; i++) {
1721219089Spjd			sa_attr_type_t attr;
1722219089Spjd
1723219089Spjd			attr = idx_tab->sa_layout->lot_attrs[i];
1724295125Savg			reg_length = SA_REGISTERED_LEN(sa, attr);
1725295125Savg			if (reg_length == 0) {
1726295125Savg				length = hdr->sa_lengths[length_idx];
1727295125Savg				length_idx++;
1728295125Savg			} else {
1729295125Savg				length = reg_length;
1730295125Savg			}
1731219089Spjd			if (attr == newattr) {
1732295125Savg				/*
1733295125Savg				 * There is nothing to do for SA_REMOVE,
1734295125Savg				 * so it is just skipped.
1735295125Savg				 */
1736295125Savg				if (action == SA_REMOVE)
1737295125Savg					continue;
1738219089Spjd
1739295125Savg				/*
1740295125Savg				 * Duplicate attributes are not allowed, so the
1741295125Savg				 * action can not be SA_ADD here.
1742295125Savg				 */
1743295125Savg				ASSERT3S(action, ==, SA_REPLACE);
1744295125Savg
1745295125Savg				/*
1746295125Savg				 * Only a variable-sized attribute can be
1747295125Savg				 * replaced here, and its size must be changing.
1748295125Savg				 */
1749295125Savg				ASSERT3U(reg_length, ==, 0);
1750295125Savg				ASSERT3U(length, !=, buflen);
1751219089Spjd				SA_ADD_BULK_ATTR(attr_desc, j, attr,
1752295125Savg				    locator, datastart, buflen);
1753295125Savg			} else {
1754295125Savg				SA_ADD_BULK_ATTR(attr_desc, j, attr,
1755219089Spjd				    NULL, (void *)
1756219089Spjd				    (TOC_OFF(idx_tab->sa_idx_tab[attr]) +
1757219089Spjd				    (uintptr_t)old_data[k]), length);
1758219089Spjd			}
1759219089Spjd		}
1760219089Spjd		if (k == 0 && hdl->sa_spill) {
1761219089Spjd			hdr = SA_GET_HDR(hdl, SA_SPILL);
1762219089Spjd			idx_tab = SA_IDX_TAB_GET(hdl, SA_SPILL);
1763219089Spjd			count = spill_attr_count;
1764219089Spjd		} else {
1765219089Spjd			break;
1766219089Spjd		}
1767219089Spjd	}
1768219089Spjd	if (action == SA_ADD) {
1769295125Savg		reg_length = SA_REGISTERED_LEN(sa, newattr);
1770295125Savg		IMPLY(reg_length != 0, reg_length == buflen);
1771219089Spjd		SA_ADD_BULK_ATTR(attr_desc, j, newattr, locator,
1772219089Spjd		    datastart, buflen);
1773219089Spjd	}
1774240345Savg	ASSERT3U(j, ==, attr_count);
1775219089Spjd
1776219089Spjd	error = sa_build_layouts(hdl, attr_desc, attr_count, tx);
1777219089Spjd
1778219089Spjd	if (old_data[0])
1779219089Spjd		kmem_free(old_data[0], bonus_data_size);
1780219089Spjd	if (old_data[1])
1781219089Spjd		kmem_free(old_data[1], spill_data_size);
1782219089Spjd	kmem_free(attr_desc, sizeof (sa_bulk_attr_t) * attr_count);
1783219089Spjd
1784219089Spjd	return (error);
1785219089Spjd}
1786219089Spjd
1787219089Spjdstatic int
1788219089Spjdsa_bulk_update_impl(sa_handle_t *hdl, sa_bulk_attr_t *bulk, int count,
1789219089Spjd    dmu_tx_t *tx)
1790219089Spjd{
1791219089Spjd	int error;
1792219089Spjd	sa_os_t *sa = hdl->sa_os->os_sa;
1793219089Spjd	dmu_object_type_t bonustype;
1794219089Spjd
1795219089Spjd	bonustype = SA_BONUSTYPE_FROM_DB(SA_GET_DB(hdl, SA_BONUS));
1796219089Spjd
1797219089Spjd	ASSERT(hdl);
1798219089Spjd	ASSERT(MUTEX_HELD(&hdl->sa_lock));
1799219089Spjd
1800219089Spjd	/* sync out registration table if necessary */
1801219089Spjd	if (sa->sa_need_attr_registration)
1802219089Spjd		sa_attr_register_sync(hdl, tx);
1803219089Spjd
1804219089Spjd	error = sa_attr_op(hdl, bulk, count, SA_UPDATE, tx);
1805219089Spjd	if (error == 0 && !IS_SA_BONUSTYPE(bonustype) && sa->sa_update_cb)
1806219089Spjd		sa->sa_update_cb(hdl, tx);
1807219089Spjd
1808219089Spjd	return (error);
1809219089Spjd}
1810219089Spjd
1811219089Spjd/*
1812219089Spjd * update or add new attribute
1813219089Spjd */
1814219089Spjdint
1815219089Spjdsa_update(sa_handle_t *hdl, sa_attr_type_t type,
1816219089Spjd    void *buf, uint32_t buflen, dmu_tx_t *tx)
1817219089Spjd{
1818219089Spjd	int error;
1819219089Spjd	sa_bulk_attr_t bulk;
1820219089Spjd
1821219089Spjd	bulk.sa_attr = type;
1822219089Spjd	bulk.sa_data_func = NULL;
1823219089Spjd	bulk.sa_length = buflen;
1824219089Spjd	bulk.sa_data = buf;
1825219089Spjd
1826219089Spjd	mutex_enter(&hdl->sa_lock);
1827219089Spjd	error = sa_bulk_update_impl(hdl, &bulk, 1, tx);
1828219089Spjd	mutex_exit(&hdl->sa_lock);
1829219089Spjd	return (error);
1830219089Spjd}
1831219089Spjd
1832219089Spjdint
1833219089Spjdsa_update_from_cb(sa_handle_t *hdl, sa_attr_type_t attr,
1834219089Spjd    uint32_t buflen, sa_data_locator_t *locator, void *userdata, dmu_tx_t *tx)
1835219089Spjd{
1836219089Spjd	int error;
1837219089Spjd	sa_bulk_attr_t bulk;
1838219089Spjd
1839219089Spjd	bulk.sa_attr = attr;
1840219089Spjd	bulk.sa_data = userdata;
1841219089Spjd	bulk.sa_data_func = locator;
1842219089Spjd	bulk.sa_length = buflen;
1843219089Spjd
1844219089Spjd	mutex_enter(&hdl->sa_lock);
1845219089Spjd	error = sa_bulk_update_impl(hdl, &bulk, 1, tx);
1846219089Spjd	mutex_exit(&hdl->sa_lock);
1847219089Spjd	return (error);
1848219089Spjd}
1849219089Spjd
1850219089Spjd/*
1851219089Spjd * Return size of an attribute
1852219089Spjd */
1853219089Spjd
1854219089Spjdint
1855219089Spjdsa_size(sa_handle_t *hdl, sa_attr_type_t attr, int *size)
1856219089Spjd{
1857219089Spjd	sa_bulk_attr_t bulk;
1858219089Spjd	int error;
1859219089Spjd
1860219089Spjd	bulk.sa_data = NULL;
1861219089Spjd	bulk.sa_attr = attr;
1862219089Spjd	bulk.sa_data_func = NULL;
1863219089Spjd
1864219089Spjd	ASSERT(hdl);
1865219089Spjd	mutex_enter(&hdl->sa_lock);
1866219089Spjd	if ((error = sa_attr_op(hdl, &bulk, 1, SA_LOOKUP, NULL)) != 0) {
1867219089Spjd		mutex_exit(&hdl->sa_lock);
1868219089Spjd		return (error);
1869219089Spjd	}
1870219089Spjd	*size = bulk.sa_size;
1871219089Spjd
1872219089Spjd	mutex_exit(&hdl->sa_lock);
1873219089Spjd	return (0);
1874219089Spjd}
1875219089Spjd
1876219089Spjdint
1877219089Spjdsa_bulk_lookup_locked(sa_handle_t *hdl, sa_bulk_attr_t *attrs, int count)
1878219089Spjd{
1879219089Spjd	ASSERT(hdl);
1880219089Spjd	ASSERT(MUTEX_HELD(&hdl->sa_lock));
1881219089Spjd	return (sa_lookup_impl(hdl, attrs, count));
1882219089Spjd}
1883219089Spjd
1884219089Spjdint
1885219089Spjdsa_bulk_lookup(sa_handle_t *hdl, sa_bulk_attr_t *attrs, int count)
1886219089Spjd{
1887219089Spjd	int error;
1888219089Spjd
1889219089Spjd	ASSERT(hdl);
1890219089Spjd	mutex_enter(&hdl->sa_lock);
1891219089Spjd	error = sa_bulk_lookup_locked(hdl, attrs, count);
1892219089Spjd	mutex_exit(&hdl->sa_lock);
1893219089Spjd	return (error);
1894219089Spjd}
1895219089Spjd
1896219089Spjdint
1897219089Spjdsa_bulk_update(sa_handle_t *hdl, sa_bulk_attr_t *attrs, int count, dmu_tx_t *tx)
1898219089Spjd{
1899219089Spjd	int error;
1900219089Spjd
1901219089Spjd	ASSERT(hdl);
1902219089Spjd	mutex_enter(&hdl->sa_lock);
1903219089Spjd	error = sa_bulk_update_impl(hdl, attrs, count, tx);
1904219089Spjd	mutex_exit(&hdl->sa_lock);
1905219089Spjd	return (error);
1906219089Spjd}
1907219089Spjd
1908219089Spjdint
1909219089Spjdsa_remove(sa_handle_t *hdl, sa_attr_type_t attr, dmu_tx_t *tx)
1910219089Spjd{
1911219089Spjd	int error;
1912219089Spjd
1913219089Spjd	mutex_enter(&hdl->sa_lock);
1914219089Spjd	error = sa_modify_attrs(hdl, attr, SA_REMOVE, NULL,
1915219089Spjd	    NULL, 0, tx);
1916219089Spjd	mutex_exit(&hdl->sa_lock);
1917219089Spjd	return (error);
1918219089Spjd}
1919219089Spjd
1920219089Spjdvoid
1921219089Spjdsa_object_info(sa_handle_t *hdl, dmu_object_info_t *doi)
1922219089Spjd{
1923219089Spjd	dmu_object_info_from_db((dmu_buf_t *)hdl->sa_bonus, doi);
1924219089Spjd}
1925219089Spjd
1926219089Spjdvoid
1927219089Spjdsa_object_size(sa_handle_t *hdl, uint32_t *blksize, u_longlong_t *nblocks)
1928219089Spjd{
1929219089Spjd	dmu_object_size_from_db((dmu_buf_t *)hdl->sa_bonus,
1930219089Spjd	    blksize, nblocks);
1931219089Spjd}
1932219089Spjd
1933219089Spjdvoid
1934219089Spjdsa_set_userp(sa_handle_t *hdl, void *ptr)
1935219089Spjd{
1936219089Spjd	hdl->sa_userp = ptr;
1937219089Spjd}
1938219089Spjd
1939219089Spjddmu_buf_t *
1940219089Spjdsa_get_db(sa_handle_t *hdl)
1941219089Spjd{
1942219089Spjd	return ((dmu_buf_t *)hdl->sa_bonus);
1943219089Spjd}
1944219089Spjd
1945219089Spjdvoid *
1946219089Spjdsa_get_userdata(sa_handle_t *hdl)
1947219089Spjd{
1948219089Spjd	return (hdl->sa_userp);
1949219089Spjd}
1950219089Spjd
1951219089Spjdvoid
1952219089Spjdsa_register_update_callback_locked(objset_t *os, sa_update_cb_t *func)
1953219089Spjd{
1954219089Spjd	ASSERT(MUTEX_HELD(&os->os_sa->sa_lock));
1955219089Spjd	os->os_sa->sa_update_cb = func;
1956219089Spjd}
1957219089Spjd
1958219089Spjdvoid
1959219089Spjdsa_register_update_callback(objset_t *os, sa_update_cb_t *func)
1960219089Spjd{
1961219089Spjd
1962219089Spjd	mutex_enter(&os->os_sa->sa_lock);
1963219089Spjd	sa_register_update_callback_locked(os, func);
1964219089Spjd	mutex_exit(&os->os_sa->sa_lock);
1965219089Spjd}
1966219089Spjd
1967219089Spjduint64_t
1968219089Spjdsa_handle_object(sa_handle_t *hdl)
1969219089Spjd{
1970219089Spjd	return (hdl->sa_bonus->db_object);
1971219089Spjd}
1972219089Spjd
1973219089Spjdboolean_t
1974219089Spjdsa_enabled(objset_t *os)
1975219089Spjd{
1976219089Spjd	return (os->os_sa == NULL);
1977219089Spjd}
1978219089Spjd
1979219089Spjdint
1980219089Spjdsa_set_sa_object(objset_t *os, uint64_t sa_object)
1981219089Spjd{
1982219089Spjd	sa_os_t *sa = os->os_sa;
1983219089Spjd
1984219089Spjd	if (sa->sa_master_obj)
1985219089Spjd		return (1);
1986219089Spjd
1987219089Spjd	sa->sa_master_obj = sa_object;
1988219089Spjd
1989219089Spjd	return (0);
1990219089Spjd}
1991219089Spjd
1992219089Spjdint
1993219089Spjdsa_hdrsize(void *arg)
1994219089Spjd{
1995219089Spjd	sa_hdr_phys_t *hdr = arg;
1996219089Spjd
1997219089Spjd	return (SA_HDR_SIZE(hdr));
1998219089Spjd}
1999219089Spjd
2000219089Spjdvoid
2001219089Spjdsa_handle_lock(sa_handle_t *hdl)
2002219089Spjd{
2003219089Spjd	ASSERT(hdl);
2004219089Spjd	mutex_enter(&hdl->sa_lock);
2005219089Spjd}
2006219089Spjd
2007219089Spjdvoid
2008219089Spjdsa_handle_unlock(sa_handle_t *hdl)
2009219089Spjd{
2010219089Spjd	ASSERT(hdl);
2011219089Spjd	mutex_exit(&hdl->sa_lock);
2012219089Spjd}
2013