1219089Spjd/*
2219089Spjd * CDDL HEADER START
3219089Spjd *
4219089Spjd * The contents of this file are subject to the terms of the
5219089Spjd * Common Development and Distribution License (the "License").
6219089Spjd * You may not use this file except in compliance with the License.
7219089Spjd *
8219089Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9219089Spjd * or http://www.opensolaris.org/os/licensing.
10219089Spjd * See the License for the specific language governing permissions
11219089Spjd * and limitations under the License.
12219089Spjd *
13219089Spjd * When distributing Covered Code, include this CDDL HEADER in each
14219089Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15219089Spjd * If applicable, add the following below this CDDL HEADER, with the
16219089Spjd * fields enclosed by brackets "[]" replaced with your own identifying
17219089Spjd * information: Portions Copyright [yyyy] [name of copyright owner]
18219089Spjd *
19219089Spjd * CDDL HEADER END
20219089Spjd */
21236884Smm
22219089Spjd/*
23219089Spjd * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24226724Smm * Portions Copyright 2011 iXsystems, Inc
25321558Smav * Copyright (c) 2013, 2017 by Delphix. All rights reserved.
26286575Smav * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
27296519Smav * Copyright (c) 2014 Integros [integros.com]
28219089Spjd */
29219089Spjd
30219089Spjd#include <sys/zfs_context.h>
31219089Spjd#include <sys/types.h>
32219089Spjd#include <sys/param.h>
33219089Spjd#include <sys/systm.h>
34219089Spjd#include <sys/sysmacros.h>
35219089Spjd#include <sys/dmu.h>
36219089Spjd#include <sys/dmu_impl.h>
37219089Spjd#include <sys/dmu_objset.h>
38219089Spjd#include <sys/dbuf.h>
39219089Spjd#include <sys/dnode.h>
40219089Spjd#include <sys/zap.h>
41219089Spjd#include <sys/sa.h>
42219089Spjd#include <sys/sunddi.h>
43219089Spjd#include <sys/sa_impl.h>
44219089Spjd#include <sys/dnode.h>
45219089Spjd#include <sys/errno.h>
46219089Spjd#include <sys/zfs_context.h>
47219089Spjd
48219089Spjd/*
49219089Spjd * ZFS System attributes:
50219089Spjd *
51219089Spjd * A generic mechanism to allow for arbitrary attributes
52219089Spjd * to be stored in a dnode.  The data will be stored in the bonus buffer of
53219089Spjd * the dnode and if necessary a special "spill" block will be used to handle
54219089Spjd * overflow situations.  The spill block will be sized to fit the data
55219089Spjd * from 512 - 128K.  When a spill block is used the BP (blkptr_t) for the
56219089Spjd * spill block is stored at the end of the current bonus buffer.  Any
57219089Spjd * attributes that would be in the way of the blkptr_t will be relocated
58219089Spjd * into the spill block.
59219089Spjd *
60219089Spjd * Attribute registration:
61219089Spjd *
62219089Spjd * Stored persistently on a per dataset basis
63219089Spjd * a mapping between attribute "string" names and their actual attribute
64219089Spjd * numeric values, length, and byteswap function.  The names are only used
65219089Spjd * during registration.  All  attributes are known by their unique attribute
66219089Spjd * id value.  If an attribute can have a variable size then the value
67219089Spjd * 0 will be used to indicate this.
68219089Spjd *
69219089Spjd * Attribute Layout:
70219089Spjd *
71219089Spjd * Attribute layouts are a way to compactly store multiple attributes, but
72219089Spjd * without taking the overhead associated with managing each attribute
73219089Spjd * individually.  Since you will typically have the same set of attributes
74219089Spjd * stored in the same order a single table will be used to represent that
75219089Spjd * layout.  The ZPL for example will usually have only about 10 different
76219089Spjd * layouts (regular files, device files, symlinks,
77219089Spjd * regular files + scanstamp, files/dir with extended attributes, and then
78219089Spjd * you have the possibility of all of those minus ACL, because it would
79219089Spjd * be kicked out into the spill block)
80219089Spjd *
81219089Spjd * Layouts are simply an array of the attributes and their
82219089Spjd * ordering i.e. [0, 1, 4, 5, 2]
83219089Spjd *
84219089Spjd * Each distinct layout is given a unique layout number and that is whats
85219089Spjd * stored in the header at the beginning of the SA data buffer.
86219089Spjd *
87219089Spjd * A layout only covers a single dbuf (bonus or spill).  If a set of
88219089Spjd * attributes is split up between the bonus buffer and a spill buffer then
89219089Spjd * two different layouts will be used.  This allows us to byteswap the
90219089Spjd * spill without looking at the bonus buffer and keeps the on disk format of
91219089Spjd * the bonus and spill buffer the same.
92219089Spjd *
93219089Spjd * Adding a single attribute will cause the entire set of attributes to
94219089Spjd * be rewritten and could result in a new layout number being constructed
95219089Spjd * as part of the rewrite if no such layout exists for the new set of
96219089Spjd * attribues.  The new attribute will be appended to the end of the already
97219089Spjd * existing attributes.
98219089Spjd *
99219089Spjd * Both the attribute registration and attribute layout information are
100219089Spjd * stored in normal ZAP attributes.  Their should be a small number of
101219089Spjd * known layouts and the set of attributes is assumed to typically be quite
102219089Spjd * small.
103219089Spjd *
104219089Spjd * The registered attributes and layout "table" information is maintained
105219089Spjd * in core and a special "sa_os_t" is attached to the objset_t.
106219089Spjd *
107219089Spjd * A special interface is provided to allow for quickly applying
108219089Spjd * a large set of attributes at once.  sa_replace_all_by_template() is
109219089Spjd * used to set an array of attributes.  This is used by the ZPL when
110219089Spjd * creating a brand new file.  The template that is passed into the function
111219089Spjd * specifies the attribute, size for variable length attributes, location of
112219089Spjd * data and special "data locator" function if the data isn't in a contiguous
113219089Spjd * location.
114219089Spjd *
115219089Spjd * Byteswap implications:
116251631Sdelphij *
117219089Spjd * Since the SA attributes are not entirely self describing we can't do
118219089Spjd * the normal byteswap processing.  The special ZAP layout attribute and
119219089Spjd * attribute registration attributes define the byteswap function and the
120219089Spjd * size of the attributes, unless it is variable sized.
121219089Spjd * The normal ZFS byteswapping infrastructure assumes you don't need
122219089Spjd * to read any objects in order to do the necessary byteswapping.  Whereas
123219089Spjd * SA attributes can only be properly byteswapped if the dataset is opened
124219089Spjd * and the layout/attribute ZAP attributes are available.  Because of this
125219089Spjd * the SA attributes will be byteswapped when they are first accessed by
126219089Spjd * the SA code that will read the SA data.
127219089Spjd */
128219089Spjd
129219089Spjdtypedef void (sa_iterfunc_t)(void *hdr, void *addr, sa_attr_type_t,
130219089Spjd    uint16_t length, int length_idx, boolean_t, void *userp);
131219089Spjd
132219089Spjdstatic int sa_build_index(sa_handle_t *hdl, sa_buf_type_t buftype);
133219089Spjdstatic void sa_idx_tab_hold(objset_t *os, sa_idx_tab_t *idx_tab);
134321558Smavstatic sa_idx_tab_t *sa_find_idx_tab(objset_t *os, dmu_object_type_t bonustype,
135321558Smav    sa_hdr_phys_t *hdr);
136219089Spjdstatic void sa_idx_tab_rele(objset_t *os, void *arg);
137219089Spjdstatic void sa_copy_data(sa_data_locator_t *func, void *start, void *target,
138219089Spjd    int buflen);
139219089Spjdstatic int sa_modify_attrs(sa_handle_t *hdl, sa_attr_type_t newattr,
140219089Spjd    sa_data_op_t action, sa_data_locator_t *locator, void *datastart,
141219089Spjd    uint16_t buflen, dmu_tx_t *tx);
142219089Spjd
143219089Spjdarc_byteswap_func_t *sa_bswap_table[] = {
144219089Spjd	byteswap_uint64_array,
145219089Spjd	byteswap_uint32_array,
146219089Spjd	byteswap_uint16_array,
147219089Spjd	byteswap_uint8_array,
148219089Spjd	zfs_acl_byteswap,
149219089Spjd};
150219089Spjd
151219089Spjd#define	SA_COPY_DATA(f, s, t, l) \
152219089Spjd	{ \
153219089Spjd		if (f == NULL) { \
154219089Spjd			if (l == 8) { \
155219089Spjd				*(uint64_t *)t = *(uint64_t *)s; \
156219089Spjd			} else if (l == 16) { \
157219089Spjd				*(uint64_t *)t = *(uint64_t *)s; \
158219089Spjd				*(uint64_t *)((uintptr_t)t + 8) = \
159219089Spjd				    *(uint64_t *)((uintptr_t)s + 8); \
160219089Spjd			} else { \
161219089Spjd				bcopy(s, t, l); \
162219089Spjd			} \
163219089Spjd		} else \
164219089Spjd			sa_copy_data(f, s, t, l); \
165219089Spjd	}
166219089Spjd
167219089Spjd/*
168219089Spjd * This table is fixed and cannot be changed.  Its purpose is to
169219089Spjd * allow the SA code to work with both old/new ZPL file systems.
170219089Spjd * It contains the list of legacy attributes.  These attributes aren't
171219089Spjd * stored in the "attribute" registry zap objects, since older ZPL file systems
172219089Spjd * won't have the registry.  Only objsets of type ZFS_TYPE_FILESYSTEM will
173219089Spjd * use this static table.
174219089Spjd */
175219089Spjdsa_attr_reg_t sa_legacy_attrs[] = {
176219089Spjd	{"ZPL_ATIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 0},
177219089Spjd	{"ZPL_MTIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 1},
178219089Spjd	{"ZPL_CTIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 2},
179219089Spjd	{"ZPL_CRTIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 3},
180219089Spjd	{"ZPL_GEN", sizeof (uint64_t), SA_UINT64_ARRAY, 4},
181219089Spjd	{"ZPL_MODE", sizeof (uint64_t), SA_UINT64_ARRAY, 5},
182219089Spjd	{"ZPL_SIZE", sizeof (uint64_t), SA_UINT64_ARRAY, 6},
183219089Spjd	{"ZPL_PARENT", sizeof (uint64_t), SA_UINT64_ARRAY, 7},
184219089Spjd	{"ZPL_LINKS", sizeof (uint64_t), SA_UINT64_ARRAY, 8},
185219089Spjd	{"ZPL_XATTR", sizeof (uint64_t), SA_UINT64_ARRAY, 9},
186219089Spjd	{"ZPL_RDEV", sizeof (uint64_t), SA_UINT64_ARRAY, 10},
187219089Spjd	{"ZPL_FLAGS", sizeof (uint64_t), SA_UINT64_ARRAY, 11},
188219089Spjd	{"ZPL_UID", sizeof (uint64_t), SA_UINT64_ARRAY, 12},
189219089Spjd	{"ZPL_GID", sizeof (uint64_t), SA_UINT64_ARRAY, 13},
190219089Spjd	{"ZPL_PAD", sizeof (uint64_t) * 4, SA_UINT64_ARRAY, 14},
191219089Spjd	{"ZPL_ZNODE_ACL", 88, SA_UINT8_ARRAY, 15},
192219089Spjd};
193219089Spjd
194219089Spjd/*
195219089Spjd * This is only used for objects of type DMU_OT_ZNODE
196219089Spjd */
197219089Spjdsa_attr_type_t sa_legacy_zpl_layout[] = {
198219089Spjd    0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
199219089Spjd};
200219089Spjd
201219089Spjd/*
202219089Spjd * Special dummy layout used for buffers with no attributes.
203219089Spjd */
204219089Spjdsa_attr_type_t sa_dummy_zpl_layout[] = { 0 };
205219089Spjd
206219089Spjdstatic int sa_legacy_attr_count = 16;
207219089Spjdstatic kmem_cache_t *sa_cache = NULL;
208219089Spjd
209219089Spjd/*ARGSUSED*/
210219089Spjdstatic int
211219089Spjdsa_cache_constructor(void *buf, void *unused, int kmflag)
212219089Spjd{
213219089Spjd	sa_handle_t *hdl = buf;
214219089Spjd
215219089Spjd	mutex_init(&hdl->sa_lock, NULL, MUTEX_DEFAULT, NULL);
216219089Spjd	return (0);
217219089Spjd}
218219089Spjd
219219089Spjd/*ARGSUSED*/
220219089Spjdstatic void
221219089Spjdsa_cache_destructor(void *buf, void *unused)
222219089Spjd{
223219089Spjd	sa_handle_t *hdl = buf;
224219089Spjd	mutex_destroy(&hdl->sa_lock);
225219089Spjd}
226219089Spjd
227219089Spjdvoid
228219089Spjdsa_cache_init(void)
229219089Spjd{
230219089Spjd	sa_cache = kmem_cache_create("sa_cache",
231219089Spjd	    sizeof (sa_handle_t), 0, sa_cache_constructor,
232219089Spjd	    sa_cache_destructor, NULL, NULL, NULL, 0);
233219089Spjd}
234219089Spjd
235219089Spjdvoid
236219089Spjdsa_cache_fini(void)
237219089Spjd{
238219089Spjd	if (sa_cache)
239219089Spjd		kmem_cache_destroy(sa_cache);
240219089Spjd}
241219089Spjd
242219089Spjdstatic int
243219089Spjdlayout_num_compare(const void *arg1, const void *arg2)
244219089Spjd{
245339158Smav	const sa_lot_t *node1 = (const sa_lot_t *)arg1;
246339158Smav	const sa_lot_t *node2 = (const sa_lot_t *)arg2;
247219089Spjd
248339158Smav	return (AVL_CMP(node1->lot_num, node2->lot_num));
249219089Spjd}
250219089Spjd
251219089Spjdstatic int
252219089Spjdlayout_hash_compare(const void *arg1, const void *arg2)
253219089Spjd{
254339158Smav	const sa_lot_t *node1 = (const sa_lot_t *)arg1;
255339158Smav	const sa_lot_t *node2 = (const sa_lot_t *)arg2;
256219089Spjd
257339158Smav	int cmp = AVL_CMP(node1->lot_hash, node2->lot_hash);
258339158Smav	if (likely(cmp))
259339158Smav		return (cmp);
260339158Smav
261339158Smav	return (AVL_CMP(node1->lot_instance, node2->lot_instance));
262219089Spjd}
263219089Spjd
264219089Spjdboolean_t
265219089Spjdsa_layout_equal(sa_lot_t *tbf, sa_attr_type_t *attrs, int count)
266219089Spjd{
267219089Spjd	int i;
268219089Spjd
269219089Spjd	if (count != tbf->lot_attr_count)
270219089Spjd		return (1);
271219089Spjd
272219089Spjd	for (i = 0; i != count; i++) {
273219089Spjd		if (attrs[i] != tbf->lot_attrs[i])
274219089Spjd			return (1);
275219089Spjd	}
276219089Spjd	return (0);
277219089Spjd}
278219089Spjd
279219089Spjd#define	SA_ATTR_HASH(attr) (zfs_crc64_table[(-1ULL ^ attr) & 0xFF])
280219089Spjd
281219089Spjdstatic uint64_t
282219089Spjdsa_layout_info_hash(sa_attr_type_t *attrs, int attr_count)
283219089Spjd{
284219089Spjd	int i;
285219089Spjd	uint64_t crc = -1ULL;
286219089Spjd
287219089Spjd	for (i = 0; i != attr_count; i++)
288219089Spjd		crc ^= SA_ATTR_HASH(attrs[i]);
289219089Spjd
290219089Spjd	return (crc);
291219089Spjd}
292219089Spjd
293219089Spjdstatic int
294219089Spjdsa_get_spill(sa_handle_t *hdl)
295219089Spjd{
296219089Spjd	int rc;
297219089Spjd	if (hdl->sa_spill == NULL) {
298219089Spjd		if ((rc = dmu_spill_hold_existing(hdl->sa_bonus, NULL,
299219089Spjd		    &hdl->sa_spill)) == 0)
300219089Spjd			VERIFY(0 == sa_build_index(hdl, SA_SPILL));
301219089Spjd	} else {
302219089Spjd		rc = 0;
303219089Spjd	}
304219089Spjd
305219089Spjd	return (rc);
306219089Spjd}
307219089Spjd
308219089Spjd/*
309219089Spjd * Main attribute lookup/update function
310219089Spjd * returns 0 for success or non zero for failures
311219089Spjd *
312219089Spjd * Operates on bulk array, first failure will abort further processing
313219089Spjd */
314219089Spjdint
315219089Spjdsa_attr_op(sa_handle_t *hdl, sa_bulk_attr_t *bulk, int count,
316219089Spjd    sa_data_op_t data_op, dmu_tx_t *tx)
317219089Spjd{
318219089Spjd	sa_os_t *sa = hdl->sa_os->os_sa;
319219089Spjd	int i;
320219089Spjd	int error = 0;
321219089Spjd	sa_buf_type_t buftypes;
322219089Spjd
323219089Spjd	buftypes = 0;
324219089Spjd
325219089Spjd	ASSERT(count > 0);
326219089Spjd	for (i = 0; i != count; i++) {
327219089Spjd		ASSERT(bulk[i].sa_attr <= hdl->sa_os->os_sa->sa_num_attrs);
328219089Spjd
329219089Spjd		bulk[i].sa_addr = NULL;
330219089Spjd		/* First check the bonus buffer */
331219089Spjd
332219089Spjd		if (hdl->sa_bonus_tab && TOC_ATTR_PRESENT(
333219089Spjd		    hdl->sa_bonus_tab->sa_idx_tab[bulk[i].sa_attr])) {
334219089Spjd			SA_ATTR_INFO(sa, hdl->sa_bonus_tab,
335219089Spjd			    SA_GET_HDR(hdl, SA_BONUS),
336219089Spjd			    bulk[i].sa_attr, bulk[i], SA_BONUS, hdl);
337219089Spjd			if (tx && !(buftypes & SA_BONUS)) {
338219089Spjd				dmu_buf_will_dirty(hdl->sa_bonus, tx);
339219089Spjd				buftypes |= SA_BONUS;
340219089Spjd			}
341219089Spjd		}
342219089Spjd		if (bulk[i].sa_addr == NULL &&
343219089Spjd		    ((error = sa_get_spill(hdl)) == 0)) {
344219089Spjd			if (TOC_ATTR_PRESENT(
345219089Spjd			    hdl->sa_spill_tab->sa_idx_tab[bulk[i].sa_attr])) {
346219089Spjd				SA_ATTR_INFO(sa, hdl->sa_spill_tab,
347219089Spjd				    SA_GET_HDR(hdl, SA_SPILL),
348219089Spjd				    bulk[i].sa_attr, bulk[i], SA_SPILL, hdl);
349219089Spjd				if (tx && !(buftypes & SA_SPILL) &&
350219089Spjd				    bulk[i].sa_size == bulk[i].sa_length) {
351219089Spjd					dmu_buf_will_dirty(hdl->sa_spill, tx);
352219089Spjd					buftypes |= SA_SPILL;
353219089Spjd				}
354219089Spjd			}
355219089Spjd		}
356219089Spjd		if (error && error != ENOENT) {
357219089Spjd			return ((error == ECKSUM) ? EIO : error);
358219089Spjd		}
359219089Spjd
360219089Spjd		switch (data_op) {
361219089Spjd		case SA_LOOKUP:
362219089Spjd			if (bulk[i].sa_addr == NULL)
363249195Smm				return (SET_ERROR(ENOENT));
364219089Spjd			if (bulk[i].sa_data) {
365219089Spjd				SA_COPY_DATA(bulk[i].sa_data_func,
366219089Spjd				    bulk[i].sa_addr, bulk[i].sa_data,
367219089Spjd				    bulk[i].sa_size);
368219089Spjd			}
369219089Spjd			continue;
370219089Spjd
371219089Spjd		case SA_UPDATE:
372219089Spjd			/* existing rewrite of attr */
373219089Spjd			if (bulk[i].sa_addr &&
374219089Spjd			    bulk[i].sa_size == bulk[i].sa_length) {
375219089Spjd				SA_COPY_DATA(bulk[i].sa_data_func,
376219089Spjd				    bulk[i].sa_data, bulk[i].sa_addr,
377219089Spjd				    bulk[i].sa_length);
378219089Spjd				continue;
379219089Spjd			} else if (bulk[i].sa_addr) { /* attr size change */
380219089Spjd				error = sa_modify_attrs(hdl, bulk[i].sa_attr,
381219089Spjd				    SA_REPLACE, bulk[i].sa_data_func,
382219089Spjd				    bulk[i].sa_data, bulk[i].sa_length, tx);
383219089Spjd			} else { /* adding new attribute */
384219089Spjd				error = sa_modify_attrs(hdl, bulk[i].sa_attr,
385219089Spjd				    SA_ADD, bulk[i].sa_data_func,
386219089Spjd				    bulk[i].sa_data, bulk[i].sa_length, tx);
387219089Spjd			}
388219089Spjd			if (error)
389219089Spjd				return (error);
390219089Spjd			break;
391219089Spjd		}
392219089Spjd	}
393219089Spjd	return (error);
394219089Spjd}
395219089Spjd
396219089Spjdstatic sa_lot_t *
397219089Spjdsa_add_layout_entry(objset_t *os, sa_attr_type_t *attrs, int attr_count,
398219089Spjd    uint64_t lot_num, uint64_t hash, boolean_t zapadd, dmu_tx_t *tx)
399219089Spjd{
400219089Spjd	sa_os_t *sa = os->os_sa;
401219089Spjd	sa_lot_t *tb, *findtb;
402219089Spjd	int i;
403219089Spjd	avl_index_t loc;
404219089Spjd
405219089Spjd	ASSERT(MUTEX_HELD(&sa->sa_lock));
406219089Spjd	tb = kmem_zalloc(sizeof (sa_lot_t), KM_SLEEP);
407219089Spjd	tb->lot_attr_count = attr_count;
408219089Spjd	tb->lot_attrs = kmem_alloc(sizeof (sa_attr_type_t) * attr_count,
409219089Spjd	    KM_SLEEP);
410219089Spjd	bcopy(attrs, tb->lot_attrs, sizeof (sa_attr_type_t) * attr_count);
411219089Spjd	tb->lot_num = lot_num;
412219089Spjd	tb->lot_hash = hash;
413219089Spjd	tb->lot_instance = 0;
414219089Spjd
415219089Spjd	if (zapadd) {
416219089Spjd		char attr_name[8];
417219089Spjd
418219089Spjd		if (sa->sa_layout_attr_obj == 0) {
419236884Smm			sa->sa_layout_attr_obj = zap_create_link(os,
420236884Smm			    DMU_OT_SA_ATTR_LAYOUTS,
421236884Smm			    sa->sa_master_obj, SA_LAYOUTS, tx);
422219089Spjd		}
423219089Spjd
424219089Spjd		(void) snprintf(attr_name, sizeof (attr_name),
425219089Spjd		    "%d", (int)lot_num);
426219089Spjd		VERIFY(0 == zap_update(os, os->os_sa->sa_layout_attr_obj,
427219089Spjd		    attr_name, 2, attr_count, attrs, tx));
428219089Spjd	}
429219089Spjd
430219089Spjd	list_create(&tb->lot_idx_tab, sizeof (sa_idx_tab_t),
431219089Spjd	    offsetof(sa_idx_tab_t, sa_next));
432219089Spjd
433219089Spjd	for (i = 0; i != attr_count; i++) {
434219089Spjd		if (sa->sa_attr_table[tb->lot_attrs[i]].sa_length == 0)
435219089Spjd			tb->lot_var_sizes++;
436219089Spjd	}
437219089Spjd
438219089Spjd	avl_add(&sa->sa_layout_num_tree, tb);
439219089Spjd
440219089Spjd	/* verify we don't have a hash collision */
441219089Spjd	if ((findtb = avl_find(&sa->sa_layout_hash_tree, tb, &loc)) != NULL) {
442219089Spjd		for (; findtb && findtb->lot_hash == hash;
443219089Spjd		    findtb = AVL_NEXT(&sa->sa_layout_hash_tree, findtb)) {
444219089Spjd			if (findtb->lot_instance != tb->lot_instance)
445219089Spjd				break;
446219089Spjd			tb->lot_instance++;
447219089Spjd		}
448219089Spjd	}
449219089Spjd	avl_add(&sa->sa_layout_hash_tree, tb);
450219089Spjd	return (tb);
451219089Spjd}
452219089Spjd
453219089Spjdstatic void
454219089Spjdsa_find_layout(objset_t *os, uint64_t hash, sa_attr_type_t *attrs,
455219089Spjd    int count, dmu_tx_t *tx, sa_lot_t **lot)
456219089Spjd{
457219089Spjd	sa_lot_t *tb, tbsearch;
458219089Spjd	avl_index_t loc;
459219089Spjd	sa_os_t *sa = os->os_sa;
460219089Spjd	boolean_t found = B_FALSE;
461219089Spjd
462219089Spjd	mutex_enter(&sa->sa_lock);
463219089Spjd	tbsearch.lot_hash = hash;
464219089Spjd	tbsearch.lot_instance = 0;
465219089Spjd	tb = avl_find(&sa->sa_layout_hash_tree, &tbsearch, &loc);
466219089Spjd	if (tb) {
467219089Spjd		for (; tb && tb->lot_hash == hash;
468219089Spjd		    tb = AVL_NEXT(&sa->sa_layout_hash_tree, tb)) {
469219089Spjd			if (sa_layout_equal(tb, attrs, count) == 0) {
470219089Spjd				found = B_TRUE;
471219089Spjd				break;
472219089Spjd			}
473219089Spjd		}
474219089Spjd	}
475219089Spjd	if (!found) {
476219089Spjd		tb = sa_add_layout_entry(os, attrs, count,
477219089Spjd		    avl_numnodes(&sa->sa_layout_num_tree), hash, B_TRUE, tx);
478219089Spjd	}
479219089Spjd	mutex_exit(&sa->sa_lock);
480219089Spjd	*lot = tb;
481219089Spjd}
482219089Spjd
483219089Spjdstatic int
484219089Spjdsa_resize_spill(sa_handle_t *hdl, uint32_t size, dmu_tx_t *tx)
485219089Spjd{
486219089Spjd	int error;
487219089Spjd	uint32_t blocksize;
488219089Spjd
489219089Spjd	if (size == 0) {
490219089Spjd		blocksize = SPA_MINBLOCKSIZE;
491274337Sdelphij	} else if (size > SPA_OLD_MAXBLOCKSIZE) {
492219089Spjd		ASSERT(0);
493249195Smm		return (SET_ERROR(EFBIG));
494219089Spjd	} else {
495219089Spjd		blocksize = P2ROUNDUP_TYPED(size, SPA_MINBLOCKSIZE, uint32_t);
496219089Spjd	}
497219089Spjd
498219089Spjd	error = dbuf_spill_set_blksz(hdl->sa_spill, blocksize, tx);
499219089Spjd	ASSERT(error == 0);
500219089Spjd	return (error);
501219089Spjd}
502219089Spjd
503219089Spjdstatic void
504219089Spjdsa_copy_data(sa_data_locator_t *func, void *datastart, void *target, int buflen)
505219089Spjd{
506219089Spjd	if (func == NULL) {
507219089Spjd		bcopy(datastart, target, buflen);
508219089Spjd	} else {
509219089Spjd		boolean_t start;
510219089Spjd		int bytes;
511219089Spjd		void *dataptr;
512219089Spjd		void *saptr = target;
513219089Spjd		uint32_t length;
514219089Spjd
515219089Spjd		start = B_TRUE;
516219089Spjd		bytes = 0;
517219089Spjd		while (bytes < buflen) {
518219089Spjd			func(&dataptr, &length, buflen, start, datastart);
519219089Spjd			bcopy(dataptr, saptr, length);
520219089Spjd			saptr = (void *)((caddr_t)saptr + length);
521219089Spjd			bytes += length;
522219089Spjd			start = B_FALSE;
523219089Spjd		}
524219089Spjd	}
525219089Spjd}
526219089Spjd
527219089Spjd/*
528219089Spjd * Determine several different sizes
529219089Spjd * first the sa header size
530219089Spjd * the number of bytes to be stored
531219089Spjd * if spill would occur the index in the attribute array is returned
532219089Spjd *
533219089Spjd * the boolean will_spill will be set when spilling is necessary.  It
534219089Spjd * is only set when the buftype is SA_BONUS
535219089Spjd */
536219089Spjdstatic int
537219089Spjdsa_find_sizes(sa_os_t *sa, sa_bulk_attr_t *attr_desc, int attr_count,
538219089Spjd    dmu_buf_t *db, sa_buf_type_t buftype, int *index, int *total,
539219089Spjd    boolean_t *will_spill)
540219089Spjd{
541219089Spjd	int var_size = 0;
542219089Spjd	int i;
543219089Spjd	int full_space;
544219089Spjd	int hdrsize;
545294813Smav	int extra_hdrsize;
546219089Spjd
547219089Spjd	if (buftype == SA_BONUS && sa->sa_force_spill) {
548219089Spjd		*total = 0;
549219089Spjd		*index = 0;
550219089Spjd		*will_spill = B_TRUE;
551219089Spjd		return (0);
552219089Spjd	}
553219089Spjd
554219089Spjd	*index = -1;
555219089Spjd	*total = 0;
556294813Smav	*will_spill = B_FALSE;
557219089Spjd
558294813Smav	extra_hdrsize = 0;
559219089Spjd	hdrsize = (SA_BONUSTYPE_FROM_DB(db) == DMU_OT_ZNODE) ? 0 :
560219089Spjd	    sizeof (sa_hdr_phys_t);
561219089Spjd
562219089Spjd	full_space = (buftype == SA_BONUS) ? DN_MAX_BONUSLEN : db->db_size;
563246678Smm	ASSERT(IS_P2ALIGNED(full_space, 8));
564219089Spjd
565219089Spjd	for (i = 0; i != attr_count; i++) {
566219089Spjd		boolean_t is_var_sz;
567219089Spjd
568246678Smm		*total = P2ROUNDUP(*total, 8);
569246678Smm		*total += attr_desc[i].sa_length;
570294813Smav		if (*will_spill)
571294813Smav			continue;
572219089Spjd
573219089Spjd		is_var_sz = (SA_REGISTERED_LEN(sa, attr_desc[i].sa_attr) == 0);
574219089Spjd		if (is_var_sz) {
575219089Spjd			var_size++;
576219089Spjd		}
577219089Spjd
578219089Spjd		if (is_var_sz && var_size > 1) {
579294813Smav			/*
580294813Smav			 * Don't worry that the spill block might overflow.
581294813Smav			 * It will be resized if needed in sa_build_layouts().
582294813Smav			 */
583294813Smav			if (buftype == SA_SPILL ||
584294813Smav			    P2ROUNDUP(hdrsize + sizeof (uint16_t), 8) +
585219089Spjd			    *total < full_space) {
586246678Smm				/*
587246678Smm				 * Account for header space used by array of
588246678Smm				 * optional sizes of variable-length attributes.
589294813Smav				 * Record the extra header size in case this
590294813Smav				 * increase needs to be reversed due to
591294813Smav				 * spill-over.
592246678Smm				 */
593219089Spjd				hdrsize += sizeof (uint16_t);
594294813Smav				if (*index != -1)
595294813Smav					extra_hdrsize += sizeof (uint16_t);
596219089Spjd			} else {
597294813Smav				ASSERT(buftype == SA_BONUS);
598294813Smav				if (*index == -1)
599294813Smav					*index = i;
600294813Smav				*will_spill = B_TRUE;
601219089Spjd				continue;
602219089Spjd			}
603219089Spjd		}
604219089Spjd
605219089Spjd		/*
606219089Spjd		 * find index of where spill *could* occur.
607219089Spjd		 * Then continue to count of remainder attribute
608219089Spjd		 * space.  The sum is used later for sizing bonus
609219089Spjd		 * and spill buffer.
610219089Spjd		 */
611219089Spjd		if (buftype == SA_BONUS && *index == -1 &&
612226483Sdelphij		    (*total + P2ROUNDUP(hdrsize, 8)) >
613219089Spjd		    (full_space - sizeof (blkptr_t))) {
614219089Spjd			*index = i;
615219089Spjd		}
616219089Spjd
617226483Sdelphij		if ((*total + P2ROUNDUP(hdrsize, 8)) > full_space &&
618219089Spjd		    buftype == SA_BONUS)
619219089Spjd			*will_spill = B_TRUE;
620219089Spjd	}
621219089Spjd
622294813Smav	if (*will_spill)
623294813Smav		hdrsize -= extra_hdrsize;
624246678Smm
625219089Spjd	hdrsize = P2ROUNDUP(hdrsize, 8);
626219089Spjd	return (hdrsize);
627219089Spjd}
628219089Spjd
629219089Spjd#define	BUF_SPACE_NEEDED(total, header) (total + header)
630219089Spjd
631219089Spjd/*
632219089Spjd * Find layout that corresponds to ordering of attributes
633219089Spjd * If not found a new layout number is created and added to
634219089Spjd * persistent layout tables.
635219089Spjd */
636219089Spjdstatic int
637219089Spjdsa_build_layouts(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc, int attr_count,
638219089Spjd    dmu_tx_t *tx)
639219089Spjd{
640219089Spjd	sa_os_t *sa = hdl->sa_os->os_sa;
641219089Spjd	uint64_t hash;
642219089Spjd	sa_buf_type_t buftype;
643219089Spjd	sa_hdr_phys_t *sahdr;
644219089Spjd	void *data_start;
645219089Spjd	int buf_space;
646219089Spjd	sa_attr_type_t *attrs, *attrs_start;
647219089Spjd	int i, lot_count;
648247187Smm	int hdrsize;
649247187Smm	int spillhdrsize = 0;
650219089Spjd	int used;
651219089Spjd	dmu_object_type_t bonustype;
652219089Spjd	sa_lot_t *lot;
653219089Spjd	int len_idx;
654219089Spjd	int spill_used;
655219089Spjd	boolean_t spilling;
656219089Spjd
657219089Spjd	dmu_buf_will_dirty(hdl->sa_bonus, tx);
658219089Spjd	bonustype = SA_BONUSTYPE_FROM_DB(hdl->sa_bonus);
659219089Spjd
660219089Spjd	/* first determine bonus header size and sum of all attributes */
661219089Spjd	hdrsize = sa_find_sizes(sa, attr_desc, attr_count, hdl->sa_bonus,
662219089Spjd	    SA_BONUS, &i, &used, &spilling);
663219089Spjd
664274337Sdelphij	if (used > SPA_OLD_MAXBLOCKSIZE)
665249195Smm		return (SET_ERROR(EFBIG));
666219089Spjd
667219089Spjd	VERIFY(0 == dmu_set_bonus(hdl->sa_bonus, spilling ?
668219089Spjd	    MIN(DN_MAX_BONUSLEN - sizeof (blkptr_t), used + hdrsize) :
669219089Spjd	    used + hdrsize, tx));
670219089Spjd
671219089Spjd	ASSERT((bonustype == DMU_OT_ZNODE && spilling == 0) ||
672219089Spjd	    bonustype == DMU_OT_SA);
673219089Spjd
674219089Spjd	/* setup and size spill buffer when needed */
675219089Spjd	if (spilling) {
676219089Spjd		boolean_t dummy;
677219089Spjd
678219089Spjd		if (hdl->sa_spill == NULL) {
679219089Spjd			VERIFY(dmu_spill_hold_by_bonus(hdl->sa_bonus, NULL,
680219089Spjd			    &hdl->sa_spill) == 0);
681219089Spjd		}
682219089Spjd		dmu_buf_will_dirty(hdl->sa_spill, tx);
683219089Spjd
684219089Spjd		spillhdrsize = sa_find_sizes(sa, &attr_desc[i],
685219089Spjd		    attr_count - i, hdl->sa_spill, SA_SPILL, &i,
686219089Spjd		    &spill_used, &dummy);
687219089Spjd
688274337Sdelphij		if (spill_used > SPA_OLD_MAXBLOCKSIZE)
689249195Smm			return (SET_ERROR(EFBIG));
690219089Spjd
691219089Spjd		buf_space = hdl->sa_spill->db_size - spillhdrsize;
692219089Spjd		if (BUF_SPACE_NEEDED(spill_used, spillhdrsize) >
693219089Spjd		    hdl->sa_spill->db_size)
694219089Spjd			VERIFY(0 == sa_resize_spill(hdl,
695219089Spjd			    BUF_SPACE_NEEDED(spill_used, spillhdrsize), tx));
696219089Spjd	}
697219089Spjd
698219089Spjd	/* setup starting pointers to lay down data */
699219089Spjd	data_start = (void *)((uintptr_t)hdl->sa_bonus->db_data + hdrsize);
700219089Spjd	sahdr = (sa_hdr_phys_t *)hdl->sa_bonus->db_data;
701219089Spjd	buftype = SA_BONUS;
702219089Spjd
703219089Spjd	if (spilling)
704219089Spjd		buf_space = (sa->sa_force_spill) ?
705219089Spjd		    0 : SA_BLKPTR_SPACE - hdrsize;
706219089Spjd	else
707219089Spjd		buf_space = hdl->sa_bonus->db_size - hdrsize;
708219089Spjd
709219089Spjd	attrs_start = attrs = kmem_alloc(sizeof (sa_attr_type_t) * attr_count,
710219089Spjd	    KM_SLEEP);
711219089Spjd	lot_count = 0;
712219089Spjd
713219089Spjd	for (i = 0, len_idx = 0, hash = -1ULL; i != attr_count; i++) {
714219089Spjd		uint16_t length;
715219089Spjd
716246678Smm		ASSERT(IS_P2ALIGNED(data_start, 8));
717246678Smm		ASSERT(IS_P2ALIGNED(buf_space, 8));
718219089Spjd		attrs[i] = attr_desc[i].sa_attr;
719219089Spjd		length = SA_REGISTERED_LEN(sa, attrs[i]);
720219089Spjd		if (length == 0)
721219089Spjd			length = attr_desc[i].sa_length;
722240632Savg		else
723240632Savg			VERIFY(length == attr_desc[i].sa_length);
724219089Spjd
725219089Spjd		if (buf_space < length) {  /* switch to spill buffer */
726246678Smm			VERIFY(spilling);
727219089Spjd			VERIFY(bonustype == DMU_OT_SA);
728219089Spjd			if (buftype == SA_BONUS && !sa->sa_force_spill) {
729219089Spjd				sa_find_layout(hdl->sa_os, hash, attrs_start,
730219089Spjd				    lot_count, tx, &lot);
731219089Spjd				SA_SET_HDR(sahdr, lot->lot_num, hdrsize);
732219089Spjd			}
733219089Spjd
734219089Spjd			buftype = SA_SPILL;
735219089Spjd			hash = -1ULL;
736219089Spjd			len_idx = 0;
737219089Spjd
738219089Spjd			sahdr = (sa_hdr_phys_t *)hdl->sa_spill->db_data;
739219089Spjd			sahdr->sa_magic = SA_MAGIC;
740219089Spjd			data_start = (void *)((uintptr_t)sahdr +
741219089Spjd			    spillhdrsize);
742219089Spjd			attrs_start = &attrs[i];
743219089Spjd			buf_space = hdl->sa_spill->db_size - spillhdrsize;
744219089Spjd			lot_count = 0;
745219089Spjd		}
746219089Spjd		hash ^= SA_ATTR_HASH(attrs[i]);
747219089Spjd		attr_desc[i].sa_addr = data_start;
748219089Spjd		attr_desc[i].sa_size = length;
749219089Spjd		SA_COPY_DATA(attr_desc[i].sa_data_func, attr_desc[i].sa_data,
750219089Spjd		    data_start, length);
751219089Spjd		if (sa->sa_attr_table[attrs[i]].sa_length == 0) {
752219089Spjd			sahdr->sa_lengths[len_idx++] = length;
753219089Spjd		}
754240632Savg		VERIFY((uintptr_t)data_start % 8 == 0);
755219089Spjd		data_start = (void *)P2ROUNDUP(((uintptr_t)data_start +
756219089Spjd		    length), 8);
757219089Spjd		buf_space -= P2ROUNDUP(length, 8);
758219089Spjd		lot_count++;
759219089Spjd	}
760219089Spjd
761219089Spjd	sa_find_layout(hdl->sa_os, hash, attrs_start, lot_count, tx, &lot);
762219089Spjd
763219089Spjd	/*
764219089Spjd	 * Verify that old znodes always have layout number 0.
765219089Spjd	 * Must be DMU_OT_SA for arbitrary layouts
766219089Spjd	 */
767219089Spjd	VERIFY((bonustype == DMU_OT_ZNODE && lot->lot_num == 0) ||
768219089Spjd	    (bonustype == DMU_OT_SA && lot->lot_num > 1));
769219089Spjd
770219089Spjd	if (bonustype == DMU_OT_SA) {
771219089Spjd		SA_SET_HDR(sahdr, lot->lot_num,
772219089Spjd		    buftype == SA_BONUS ? hdrsize : spillhdrsize);
773219089Spjd	}
774219089Spjd
775219089Spjd	kmem_free(attrs, sizeof (sa_attr_type_t) * attr_count);
776219089Spjd	if (hdl->sa_bonus_tab) {
777219089Spjd		sa_idx_tab_rele(hdl->sa_os, hdl->sa_bonus_tab);
778219089Spjd		hdl->sa_bonus_tab = NULL;
779219089Spjd	}
780219089Spjd	if (!sa->sa_force_spill)
781219089Spjd		VERIFY(0 == sa_build_index(hdl, SA_BONUS));
782219089Spjd	if (hdl->sa_spill) {
783219089Spjd		sa_idx_tab_rele(hdl->sa_os, hdl->sa_spill_tab);
784219089Spjd		if (!spilling) {
785219089Spjd			/*
786219089Spjd			 * remove spill block that is no longer needed.
787219089Spjd			 */
788219089Spjd			dmu_buf_rele(hdl->sa_spill, NULL);
789219089Spjd			hdl->sa_spill = NULL;
790219089Spjd			hdl->sa_spill_tab = NULL;
791219089Spjd			VERIFY(0 == dmu_rm_spill(hdl->sa_os,
792219089Spjd			    sa_handle_object(hdl), tx));
793219089Spjd		} else {
794219089Spjd			VERIFY(0 == sa_build_index(hdl, SA_SPILL));
795219089Spjd		}
796219089Spjd	}
797219089Spjd
798219089Spjd	return (0);
799219089Spjd}
800219089Spjd
801219089Spjdstatic void
802219089Spjdsa_free_attr_table(sa_os_t *sa)
803219089Spjd{
804219089Spjd	int i;
805219089Spjd
806219089Spjd	if (sa->sa_attr_table == NULL)
807219089Spjd		return;
808219089Spjd
809219089Spjd	for (i = 0; i != sa->sa_num_attrs; i++) {
810219089Spjd		if (sa->sa_attr_table[i].sa_name)
811219089Spjd			kmem_free(sa->sa_attr_table[i].sa_name,
812219089Spjd			    strlen(sa->sa_attr_table[i].sa_name) + 1);
813219089Spjd	}
814219089Spjd
815219089Spjd	kmem_free(sa->sa_attr_table,
816219089Spjd	    sizeof (sa_attr_table_t) * sa->sa_num_attrs);
817219089Spjd
818219089Spjd	sa->sa_attr_table = NULL;
819219089Spjd}
820219089Spjd
821219089Spjdstatic int
822219089Spjdsa_attr_table_setup(objset_t *os, sa_attr_reg_t *reg_attrs, int count)
823219089Spjd{
824219089Spjd	sa_os_t *sa = os->os_sa;
825219089Spjd	uint64_t sa_attr_count = 0;
826247187Smm	uint64_t sa_reg_count = 0;
827219089Spjd	int error = 0;
828219089Spjd	uint64_t attr_value;
829219089Spjd	sa_attr_table_t *tb;
830219089Spjd	zap_cursor_t zc;
831219089Spjd	zap_attribute_t za;
832219089Spjd	int registered_count = 0;
833219089Spjd	int i;
834219089Spjd	dmu_objset_type_t ostype = dmu_objset_type(os);
835219089Spjd
836219089Spjd	sa->sa_user_table =
837219089Spjd	    kmem_zalloc(count * sizeof (sa_attr_type_t), KM_SLEEP);
838219089Spjd	sa->sa_user_table_sz = count * sizeof (sa_attr_type_t);
839219089Spjd
840219089Spjd	if (sa->sa_reg_attr_obj != 0) {
841219089Spjd		error = zap_count(os, sa->sa_reg_attr_obj,
842219089Spjd		    &sa_attr_count);
843219089Spjd
844219089Spjd		/*
845219089Spjd		 * Make sure we retrieved a count and that it isn't zero
846219089Spjd		 */
847219089Spjd		if (error || (error == 0 && sa_attr_count == 0)) {
848219089Spjd			if (error == 0)
849249195Smm				error = SET_ERROR(EINVAL);
850219089Spjd			goto bail;
851219089Spjd		}
852219089Spjd		sa_reg_count = sa_attr_count;
853219089Spjd	}
854219089Spjd
855219089Spjd	if (ostype == DMU_OST_ZFS && sa_attr_count == 0)
856219089Spjd		sa_attr_count += sa_legacy_attr_count;
857219089Spjd
858219089Spjd	/* Allocate attribute numbers for attributes that aren't registered */
859219089Spjd	for (i = 0; i != count; i++) {
860219089Spjd		boolean_t found = B_FALSE;
861219089Spjd		int j;
862219089Spjd
863219089Spjd		if (ostype == DMU_OST_ZFS) {
864219089Spjd			for (j = 0; j != sa_legacy_attr_count; j++) {
865219089Spjd				if (strcmp(reg_attrs[i].sa_name,
866219089Spjd				    sa_legacy_attrs[j].sa_name) == 0) {
867219089Spjd					sa->sa_user_table[i] =
868219089Spjd					    sa_legacy_attrs[j].sa_attr;
869219089Spjd					found = B_TRUE;
870219089Spjd				}
871219089Spjd			}
872219089Spjd		}
873219089Spjd		if (found)
874219089Spjd			continue;
875219089Spjd
876219089Spjd		if (sa->sa_reg_attr_obj)
877219089Spjd			error = zap_lookup(os, sa->sa_reg_attr_obj,
878219089Spjd			    reg_attrs[i].sa_name, 8, 1, &attr_value);
879219089Spjd		else
880249195Smm			error = SET_ERROR(ENOENT);
881219089Spjd		switch (error) {
882219089Spjd		case ENOENT:
883219089Spjd			sa->sa_user_table[i] = (sa_attr_type_t)sa_attr_count;
884219089Spjd			sa_attr_count++;
885219089Spjd			break;
886219089Spjd		case 0:
887219089Spjd			sa->sa_user_table[i] = ATTR_NUM(attr_value);
888219089Spjd			break;
889219089Spjd		default:
890219089Spjd			goto bail;
891219089Spjd		}
892219089Spjd	}
893219089Spjd
894219089Spjd	sa->sa_num_attrs = sa_attr_count;
895219089Spjd	tb = sa->sa_attr_table =
896219089Spjd	    kmem_zalloc(sizeof (sa_attr_table_t) * sa_attr_count, KM_SLEEP);
897219089Spjd
898219089Spjd	/*
899219089Spjd	 * Attribute table is constructed from requested attribute list,
900219089Spjd	 * previously foreign registered attributes, and also the legacy
901219089Spjd	 * ZPL set of attributes.
902219089Spjd	 */
903219089Spjd
904219089Spjd	if (sa->sa_reg_attr_obj) {
905219089Spjd		for (zap_cursor_init(&zc, os, sa->sa_reg_attr_obj);
906219089Spjd		    (error = zap_cursor_retrieve(&zc, &za)) == 0;
907219089Spjd		    zap_cursor_advance(&zc)) {
908219089Spjd			uint64_t value;
909219089Spjd			value  = za.za_first_integer;
910219089Spjd
911219089Spjd			registered_count++;
912219089Spjd			tb[ATTR_NUM(value)].sa_attr = ATTR_NUM(value);
913219089Spjd			tb[ATTR_NUM(value)].sa_length = ATTR_LENGTH(value);
914219089Spjd			tb[ATTR_NUM(value)].sa_byteswap = ATTR_BSWAP(value);
915219089Spjd			tb[ATTR_NUM(value)].sa_registered = B_TRUE;
916219089Spjd
917219089Spjd			if (tb[ATTR_NUM(value)].sa_name) {
918219089Spjd				continue;
919219089Spjd			}
920219089Spjd			tb[ATTR_NUM(value)].sa_name =
921219089Spjd			    kmem_zalloc(strlen(za.za_name) +1, KM_SLEEP);
922219089Spjd			(void) strlcpy(tb[ATTR_NUM(value)].sa_name, za.za_name,
923219089Spjd			    strlen(za.za_name) +1);
924219089Spjd		}
925219089Spjd		zap_cursor_fini(&zc);
926219089Spjd		/*
927219089Spjd		 * Make sure we processed the correct number of registered
928219089Spjd		 * attributes
929219089Spjd		 */
930219089Spjd		if (registered_count != sa_reg_count) {
931219089Spjd			ASSERT(error != 0);
932219089Spjd			goto bail;
933219089Spjd		}
934219089Spjd
935219089Spjd	}
936219089Spjd
937219089Spjd	if (ostype == DMU_OST_ZFS) {
938219089Spjd		for (i = 0; i != sa_legacy_attr_count; i++) {
939219089Spjd			if (tb[i].sa_name)
940219089Spjd				continue;
941219089Spjd			tb[i].sa_attr = sa_legacy_attrs[i].sa_attr;
942219089Spjd			tb[i].sa_length = sa_legacy_attrs[i].sa_length;
943219089Spjd			tb[i].sa_byteswap = sa_legacy_attrs[i].sa_byteswap;
944219089Spjd			tb[i].sa_registered = B_FALSE;
945219089Spjd			tb[i].sa_name =
946219089Spjd			    kmem_zalloc(strlen(sa_legacy_attrs[i].sa_name) +1,
947219089Spjd			    KM_SLEEP);
948219089Spjd			(void) strlcpy(tb[i].sa_name,
949219089Spjd			    sa_legacy_attrs[i].sa_name,
950219089Spjd			    strlen(sa_legacy_attrs[i].sa_name) + 1);
951219089Spjd		}
952219089Spjd	}
953219089Spjd
954219089Spjd	for (i = 0; i != count; i++) {
955219089Spjd		sa_attr_type_t attr_id;
956219089Spjd
957219089Spjd		attr_id = sa->sa_user_table[i];
958219089Spjd		if (tb[attr_id].sa_name)
959219089Spjd			continue;
960219089Spjd
961219089Spjd		tb[attr_id].sa_length = reg_attrs[i].sa_length;
962219089Spjd		tb[attr_id].sa_byteswap = reg_attrs[i].sa_byteswap;
963219089Spjd		tb[attr_id].sa_attr = attr_id;
964219089Spjd		tb[attr_id].sa_name =
965219089Spjd		    kmem_zalloc(strlen(reg_attrs[i].sa_name) + 1, KM_SLEEP);
966219089Spjd		(void) strlcpy(tb[attr_id].sa_name, reg_attrs[i].sa_name,
967219089Spjd		    strlen(reg_attrs[i].sa_name) + 1);
968219089Spjd	}
969219089Spjd
970219089Spjd	sa->sa_need_attr_registration =
971219089Spjd	    (sa_attr_count != registered_count);
972219089Spjd
973219089Spjd	return (0);
974219089Spjdbail:
975219089Spjd	kmem_free(sa->sa_user_table, count * sizeof (sa_attr_type_t));
976219089Spjd	sa->sa_user_table = NULL;
977219089Spjd	sa_free_attr_table(sa);
978219089Spjd	return ((error != 0) ? error : EINVAL);
979219089Spjd}
980219089Spjd
981219089Spjdint
982219089Spjdsa_setup(objset_t *os, uint64_t sa_obj, sa_attr_reg_t *reg_attrs, int count,
983219089Spjd    sa_attr_type_t **user_table)
984219089Spjd{
985219089Spjd	zap_cursor_t zc;
986219089Spjd	zap_attribute_t za;
987219089Spjd	sa_os_t *sa;
988219089Spjd	dmu_objset_type_t ostype = dmu_objset_type(os);
989219089Spjd	sa_attr_type_t *tb;
990219089Spjd	int error;
991219089Spjd
992248571Smm	mutex_enter(&os->os_user_ptr_lock);
993219089Spjd	if (os->os_sa) {
994219089Spjd		mutex_enter(&os->os_sa->sa_lock);
995248571Smm		mutex_exit(&os->os_user_ptr_lock);
996219089Spjd		tb = os->os_sa->sa_user_table;
997219089Spjd		mutex_exit(&os->os_sa->sa_lock);
998219089Spjd		*user_table = tb;
999219089Spjd		return (0);
1000219089Spjd	}
1001219089Spjd
1002219089Spjd	sa = kmem_zalloc(sizeof (sa_os_t), KM_SLEEP);
1003219089Spjd	mutex_init(&sa->sa_lock, NULL, MUTEX_DEFAULT, NULL);
1004219089Spjd	sa->sa_master_obj = sa_obj;
1005219089Spjd
1006219089Spjd	os->os_sa = sa;
1007219089Spjd	mutex_enter(&sa->sa_lock);
1008248571Smm	mutex_exit(&os->os_user_ptr_lock);
1009219089Spjd	avl_create(&sa->sa_layout_num_tree, layout_num_compare,
1010219089Spjd	    sizeof (sa_lot_t), offsetof(sa_lot_t, lot_num_node));
1011219089Spjd	avl_create(&sa->sa_layout_hash_tree, layout_hash_compare,
1012219089Spjd	    sizeof (sa_lot_t), offsetof(sa_lot_t, lot_hash_node));
1013219089Spjd
1014219089Spjd	if (sa_obj) {
1015219089Spjd		error = zap_lookup(os, sa_obj, SA_LAYOUTS,
1016219089Spjd		    8, 1, &sa->sa_layout_attr_obj);
1017219089Spjd		if (error != 0 && error != ENOENT)
1018219089Spjd			goto fail;
1019219089Spjd		error = zap_lookup(os, sa_obj, SA_REGISTRY,
1020219089Spjd		    8, 1, &sa->sa_reg_attr_obj);
1021219089Spjd		if (error != 0 && error != ENOENT)
1022219089Spjd			goto fail;
1023219089Spjd	}
1024219089Spjd
1025219089Spjd	if ((error = sa_attr_table_setup(os, reg_attrs, count)) != 0)
1026219089Spjd		goto fail;
1027219089Spjd
1028219089Spjd	if (sa->sa_layout_attr_obj != 0) {
1029219089Spjd		uint64_t layout_count;
1030219089Spjd
1031219089Spjd		error = zap_count(os, sa->sa_layout_attr_obj,
1032219089Spjd		    &layout_count);
1033219089Spjd
1034219089Spjd		/*
1035219089Spjd		 * Layout number count should be > 0
1036219089Spjd		 */
1037219089Spjd		if (error || (error == 0 && layout_count == 0)) {
1038219089Spjd			if (error == 0)
1039249195Smm				error = SET_ERROR(EINVAL);
1040219089Spjd			goto fail;
1041219089Spjd		}
1042219089Spjd
1043219089Spjd		for (zap_cursor_init(&zc, os, sa->sa_layout_attr_obj);
1044219089Spjd		    (error = zap_cursor_retrieve(&zc, &za)) == 0;
1045219089Spjd		    zap_cursor_advance(&zc)) {
1046219089Spjd			sa_attr_type_t *lot_attrs;
1047219089Spjd			uint64_t lot_num;
1048219089Spjd
1049219089Spjd			lot_attrs = kmem_zalloc(sizeof (sa_attr_type_t) *
1050219089Spjd			    za.za_num_integers, KM_SLEEP);
1051219089Spjd
1052219089Spjd			if ((error = (zap_lookup(os, sa->sa_layout_attr_obj,
1053219089Spjd			    za.za_name, 2, za.za_num_integers,
1054219089Spjd			    lot_attrs))) != 0) {
1055219089Spjd				kmem_free(lot_attrs, sizeof (sa_attr_type_t) *
1056219089Spjd				    za.za_num_integers);
1057219089Spjd				break;
1058219089Spjd			}
1059219089Spjd			VERIFY(ddi_strtoull(za.za_name, NULL, 10,
1060219089Spjd			    (unsigned long long *)&lot_num) == 0);
1061219089Spjd
1062219089Spjd			(void) sa_add_layout_entry(os, lot_attrs,
1063219089Spjd			    za.za_num_integers, lot_num,
1064219089Spjd			    sa_layout_info_hash(lot_attrs,
1065219089Spjd			    za.za_num_integers), B_FALSE, NULL);
1066219089Spjd			kmem_free(lot_attrs, sizeof (sa_attr_type_t) *
1067219089Spjd			    za.za_num_integers);
1068219089Spjd		}
1069219089Spjd		zap_cursor_fini(&zc);
1070219089Spjd
1071219089Spjd		/*
1072219089Spjd		 * Make sure layout count matches number of entries added
1073219089Spjd		 * to AVL tree
1074219089Spjd		 */
1075219089Spjd		if (avl_numnodes(&sa->sa_layout_num_tree) != layout_count) {
1076219089Spjd			ASSERT(error != 0);
1077219089Spjd			goto fail;
1078219089Spjd		}
1079219089Spjd	}
1080219089Spjd
1081219089Spjd	/* Add special layout number for old ZNODES */
1082219089Spjd	if (ostype == DMU_OST_ZFS) {
1083219089Spjd		(void) sa_add_layout_entry(os, sa_legacy_zpl_layout,
1084219089Spjd		    sa_legacy_attr_count, 0,
1085219089Spjd		    sa_layout_info_hash(sa_legacy_zpl_layout,
1086219089Spjd		    sa_legacy_attr_count), B_FALSE, NULL);
1087219089Spjd
1088219089Spjd		(void) sa_add_layout_entry(os, sa_dummy_zpl_layout, 0, 1,
1089219089Spjd		    0, B_FALSE, NULL);
1090219089Spjd	}
1091219089Spjd	*user_table = os->os_sa->sa_user_table;
1092219089Spjd	mutex_exit(&sa->sa_lock);
1093219089Spjd	return (0);
1094219089Spjdfail:
1095219089Spjd	os->os_sa = NULL;
1096219089Spjd	sa_free_attr_table(sa);
1097219089Spjd	if (sa->sa_user_table)
1098219089Spjd		kmem_free(sa->sa_user_table, sa->sa_user_table_sz);
1099219089Spjd	mutex_exit(&sa->sa_lock);
1100268713Sdelphij	avl_destroy(&sa->sa_layout_hash_tree);
1101268713Sdelphij	avl_destroy(&sa->sa_layout_num_tree);
1102268713Sdelphij	mutex_destroy(&sa->sa_lock);
1103219089Spjd	kmem_free(sa, sizeof (sa_os_t));
1104219089Spjd	return ((error == ECKSUM) ? EIO : error);
1105219089Spjd}
1106219089Spjd
1107219089Spjdvoid
1108219089Spjdsa_tear_down(objset_t *os)
1109219089Spjd{
1110219089Spjd	sa_os_t *sa = os->os_sa;
1111219089Spjd	sa_lot_t *layout;
1112219089Spjd	void *cookie;
1113219089Spjd
1114219089Spjd	kmem_free(sa->sa_user_table, sa->sa_user_table_sz);
1115219089Spjd
1116219089Spjd	/* Free up attr table */
1117219089Spjd
1118219089Spjd	sa_free_attr_table(sa);
1119219089Spjd
1120219089Spjd	cookie = NULL;
1121219089Spjd	while (layout = avl_destroy_nodes(&sa->sa_layout_hash_tree, &cookie)) {
1122219089Spjd		sa_idx_tab_t *tab;
1123219089Spjd		while (tab = list_head(&layout->lot_idx_tab)) {
1124219089Spjd			ASSERT(refcount_count(&tab->sa_refcount));
1125219089Spjd			sa_idx_tab_rele(os, tab);
1126219089Spjd		}
1127219089Spjd	}
1128219089Spjd
1129219089Spjd	cookie = NULL;
1130219089Spjd	while (layout = avl_destroy_nodes(&sa->sa_layout_num_tree, &cookie)) {
1131219089Spjd		kmem_free(layout->lot_attrs,
1132219089Spjd		    sizeof (sa_attr_type_t) * layout->lot_attr_count);
1133219089Spjd		kmem_free(layout, sizeof (sa_lot_t));
1134219089Spjd	}
1135219089Spjd
1136219089Spjd	avl_destroy(&sa->sa_layout_hash_tree);
1137219089Spjd	avl_destroy(&sa->sa_layout_num_tree);
1138268713Sdelphij	mutex_destroy(&sa->sa_lock);
1139219089Spjd
1140219089Spjd	kmem_free(sa, sizeof (sa_os_t));
1141219089Spjd	os->os_sa = NULL;
1142219089Spjd}
1143219089Spjd
1144219089Spjdvoid
1145219089Spjdsa_build_idx_tab(void *hdr, void *attr_addr, sa_attr_type_t attr,
1146219089Spjd    uint16_t length, int length_idx, boolean_t var_length, void *userp)
1147219089Spjd{
1148219089Spjd	sa_idx_tab_t *idx_tab = userp;
1149219089Spjd
1150219089Spjd	if (var_length) {
1151219089Spjd		ASSERT(idx_tab->sa_variable_lengths);
1152219089Spjd		idx_tab->sa_variable_lengths[length_idx] = length;
1153219089Spjd	}
1154219089Spjd	TOC_ATTR_ENCODE(idx_tab->sa_idx_tab[attr], length_idx,
1155219089Spjd	    (uint32_t)((uintptr_t)attr_addr - (uintptr_t)hdr));
1156219089Spjd}
1157219089Spjd
1158219089Spjdstatic void
1159219089Spjdsa_attr_iter(objset_t *os, sa_hdr_phys_t *hdr, dmu_object_type_t type,
1160219089Spjd    sa_iterfunc_t func, sa_lot_t *tab, void *userp)
1161219089Spjd{
1162219089Spjd	void *data_start;
1163219089Spjd	sa_lot_t *tb = tab;
1164219089Spjd	sa_lot_t search;
1165219089Spjd	avl_index_t loc;
1166219089Spjd	sa_os_t *sa = os->os_sa;
1167219089Spjd	int i;
1168219089Spjd	uint16_t *length_start = NULL;
1169219089Spjd	uint8_t length_idx = 0;
1170219089Spjd
1171219089Spjd	if (tab == NULL) {
1172219089Spjd		search.lot_num = SA_LAYOUT_NUM(hdr, type);
1173219089Spjd		tb = avl_find(&sa->sa_layout_num_tree, &search, &loc);
1174219089Spjd		ASSERT(tb);
1175219089Spjd	}
1176219089Spjd
1177219089Spjd	if (IS_SA_BONUSTYPE(type)) {
1178219089Spjd		data_start = (void *)P2ROUNDUP(((uintptr_t)hdr +
1179219089Spjd		    offsetof(sa_hdr_phys_t, sa_lengths) +
1180219089Spjd		    (sizeof (uint16_t) * tb->lot_var_sizes)), 8);
1181219089Spjd		length_start = hdr->sa_lengths;
1182219089Spjd	} else {
1183219089Spjd		data_start = hdr;
1184219089Spjd	}
1185219089Spjd
1186219089Spjd	for (i = 0; i != tb->lot_attr_count; i++) {
1187219089Spjd		int attr_length, reg_length;
1188219089Spjd		uint8_t idx_len;
1189219089Spjd
1190219089Spjd		reg_length = sa->sa_attr_table[tb->lot_attrs[i]].sa_length;
1191219089Spjd		if (reg_length) {
1192219089Spjd			attr_length = reg_length;
1193219089Spjd			idx_len = 0;
1194219089Spjd		} else {
1195219089Spjd			attr_length = length_start[length_idx];
1196219089Spjd			idx_len = length_idx++;
1197219089Spjd		}
1198219089Spjd
1199219089Spjd		func(hdr, data_start, tb->lot_attrs[i], attr_length,
1200219089Spjd		    idx_len, reg_length == 0 ? B_TRUE : B_FALSE, userp);
1201219089Spjd
1202219089Spjd		data_start = (void *)P2ROUNDUP(((uintptr_t)data_start +
1203219089Spjd		    attr_length), 8);
1204219089Spjd	}
1205219089Spjd}
1206219089Spjd
1207219089Spjd/*ARGSUSED*/
1208219089Spjdvoid
1209219089Spjdsa_byteswap_cb(void *hdr, void *attr_addr, sa_attr_type_t attr,
1210219089Spjd    uint16_t length, int length_idx, boolean_t variable_length, void *userp)
1211219089Spjd{
1212219089Spjd	sa_handle_t *hdl = userp;
1213219089Spjd	sa_os_t *sa = hdl->sa_os->os_sa;
1214219089Spjd
1215219089Spjd	sa_bswap_table[sa->sa_attr_table[attr].sa_byteswap](attr_addr, length);
1216219089Spjd}
1217219089Spjd
1218219089Spjdvoid
1219219089Spjdsa_byteswap(sa_handle_t *hdl, sa_buf_type_t buftype)
1220219089Spjd{
1221219089Spjd	sa_hdr_phys_t *sa_hdr_phys = SA_GET_HDR(hdl, buftype);
1222219089Spjd	dmu_buf_impl_t *db;
1223219089Spjd	sa_os_t *sa = hdl->sa_os->os_sa;
1224219089Spjd	int num_lengths = 1;
1225219089Spjd	int i;
1226219089Spjd
1227219089Spjd	ASSERT(MUTEX_HELD(&sa->sa_lock));
1228219089Spjd	if (sa_hdr_phys->sa_magic == SA_MAGIC)
1229219089Spjd		return;
1230219089Spjd
1231219089Spjd	db = SA_GET_DB(hdl, buftype);
1232219089Spjd
1233219089Spjd	if (buftype == SA_SPILL) {
1234219089Spjd		arc_release(db->db_buf, NULL);
1235219089Spjd		arc_buf_thaw(db->db_buf);
1236219089Spjd	}
1237219089Spjd
1238219089Spjd	sa_hdr_phys->sa_magic = BSWAP_32(sa_hdr_phys->sa_magic);
1239219089Spjd	sa_hdr_phys->sa_layout_info = BSWAP_16(sa_hdr_phys->sa_layout_info);
1240219089Spjd
1241219089Spjd	/*
1242219089Spjd	 * Determine number of variable lenghts in header
1243219089Spjd	 * The standard 8 byte header has one for free and a
1244219089Spjd	 * 16 byte header would have 4 + 1;
1245219089Spjd	 */
1246219089Spjd	if (SA_HDR_SIZE(sa_hdr_phys) > 8)
1247219089Spjd		num_lengths += (SA_HDR_SIZE(sa_hdr_phys) - 8) >> 1;
1248219089Spjd	for (i = 0; i != num_lengths; i++)
1249219089Spjd		sa_hdr_phys->sa_lengths[i] =
1250219089Spjd		    BSWAP_16(sa_hdr_phys->sa_lengths[i]);
1251219089Spjd
1252219089Spjd	sa_attr_iter(hdl->sa_os, sa_hdr_phys, DMU_OT_SA,
1253219089Spjd	    sa_byteswap_cb, NULL, hdl);
1254219089Spjd
1255219089Spjd	if (buftype == SA_SPILL)
1256219089Spjd		arc_buf_freeze(((dmu_buf_impl_t *)hdl->sa_spill)->db_buf);
1257219089Spjd}
1258219089Spjd
1259219089Spjdstatic int
1260219089Spjdsa_build_index(sa_handle_t *hdl, sa_buf_type_t buftype)
1261219089Spjd{
1262219089Spjd	sa_hdr_phys_t *sa_hdr_phys;
1263219089Spjd	dmu_buf_impl_t *db = SA_GET_DB(hdl, buftype);
1264219089Spjd	dmu_object_type_t bonustype = SA_BONUSTYPE_FROM_DB(db);
1265219089Spjd	sa_os_t *sa = hdl->sa_os->os_sa;
1266219089Spjd	sa_idx_tab_t *idx_tab;
1267219089Spjd
1268219089Spjd	sa_hdr_phys = SA_GET_HDR(hdl, buftype);
1269219089Spjd
1270219089Spjd	mutex_enter(&sa->sa_lock);
1271219089Spjd
1272219089Spjd	/* Do we need to byteswap? */
1273219089Spjd
1274219089Spjd	/* only check if not old znode */
1275219089Spjd	if (IS_SA_BONUSTYPE(bonustype) && sa_hdr_phys->sa_magic != SA_MAGIC &&
1276219089Spjd	    sa_hdr_phys->sa_magic != 0) {
1277219089Spjd		VERIFY(BSWAP_32(sa_hdr_phys->sa_magic) == SA_MAGIC);
1278219089Spjd		sa_byteswap(hdl, buftype);
1279219089Spjd	}
1280219089Spjd
1281219089Spjd	idx_tab = sa_find_idx_tab(hdl->sa_os, bonustype, sa_hdr_phys);
1282219089Spjd
1283219089Spjd	if (buftype == SA_BONUS)
1284219089Spjd		hdl->sa_bonus_tab = idx_tab;
1285219089Spjd	else
1286219089Spjd		hdl->sa_spill_tab = idx_tab;
1287219089Spjd
1288219089Spjd	mutex_exit(&sa->sa_lock);
1289219089Spjd	return (0);
1290219089Spjd}
1291219089Spjd
1292219089Spjd/*ARGSUSED*/
1293286575Smavstatic void
1294321527Smavsa_evict_sync(void *dbu)
1295219089Spjd{
1296286575Smav	panic("evicting sa dbuf\n");
1297219089Spjd}
1298219089Spjd
1299219089Spjdstatic void
1300219089Spjdsa_idx_tab_rele(objset_t *os, void *arg)
1301219089Spjd{
1302219089Spjd	sa_os_t *sa = os->os_sa;
1303219089Spjd	sa_idx_tab_t *idx_tab = arg;
1304219089Spjd
1305219089Spjd	if (idx_tab == NULL)
1306219089Spjd		return;
1307219089Spjd
1308219089Spjd	mutex_enter(&sa->sa_lock);
1309219089Spjd	if (refcount_remove(&idx_tab->sa_refcount, NULL) == 0) {
1310219089Spjd		list_remove(&idx_tab->sa_layout->lot_idx_tab, idx_tab);
1311219089Spjd		if (idx_tab->sa_variable_lengths)
1312219089Spjd			kmem_free(idx_tab->sa_variable_lengths,
1313219089Spjd			    sizeof (uint16_t) *
1314219089Spjd			    idx_tab->sa_layout->lot_var_sizes);
1315219089Spjd		refcount_destroy(&idx_tab->sa_refcount);
1316219089Spjd		kmem_free(idx_tab->sa_idx_tab,
1317219089Spjd		    sizeof (uint32_t) * sa->sa_num_attrs);
1318219089Spjd		kmem_free(idx_tab, sizeof (sa_idx_tab_t));
1319219089Spjd	}
1320219089Spjd	mutex_exit(&sa->sa_lock);
1321219089Spjd}
1322219089Spjd
1323219089Spjdstatic void
1324219089Spjdsa_idx_tab_hold(objset_t *os, sa_idx_tab_t *idx_tab)
1325219089Spjd{
1326219089Spjd	sa_os_t *sa = os->os_sa;
1327219089Spjd
1328219089Spjd	ASSERT(MUTEX_HELD(&sa->sa_lock));
1329219089Spjd	(void) refcount_add(&idx_tab->sa_refcount, NULL);
1330219089Spjd}
1331219089Spjd
1332219089Spjdvoid
1333219089Spjdsa_handle_destroy(sa_handle_t *hdl)
1334219089Spjd{
1335286575Smav	dmu_buf_t *db = hdl->sa_bonus;
1336286575Smav
1337219089Spjd	mutex_enter(&hdl->sa_lock);
1338286575Smav	(void) dmu_buf_remove_user(db, &hdl->sa_dbu);
1339219089Spjd
1340286539Smav	if (hdl->sa_bonus_tab)
1341219089Spjd		sa_idx_tab_rele(hdl->sa_os, hdl->sa_bonus_tab);
1342286539Smav
1343286539Smav	if (hdl->sa_spill_tab)
1344219089Spjd		sa_idx_tab_rele(hdl->sa_os, hdl->sa_spill_tab);
1345219089Spjd
1346219089Spjd	dmu_buf_rele(hdl->sa_bonus, NULL);
1347219089Spjd
1348219089Spjd	if (hdl->sa_spill)
1349219089Spjd		dmu_buf_rele((dmu_buf_t *)hdl->sa_spill, NULL);
1350219089Spjd	mutex_exit(&hdl->sa_lock);
1351219089Spjd
1352219089Spjd	kmem_cache_free(sa_cache, hdl);
1353219089Spjd}
1354219089Spjd
1355219089Spjdint
1356219089Spjdsa_handle_get_from_db(objset_t *os, dmu_buf_t *db, void *userp,
1357219089Spjd    sa_handle_type_t hdl_type, sa_handle_t **handlepp)
1358219089Spjd{
1359219089Spjd	int error = 0;
1360219089Spjd	dmu_object_info_t doi;
1361286575Smav	sa_handle_t *handle = NULL;
1362219089Spjd
1363219089Spjd#ifdef ZFS_DEBUG
1364219089Spjd	dmu_object_info_from_db(db, &doi);
1365219089Spjd	ASSERT(doi.doi_bonus_type == DMU_OT_SA ||
1366219089Spjd	    doi.doi_bonus_type == DMU_OT_ZNODE);
1367219089Spjd#endif
1368219089Spjd	/* find handle, if it exists */
1369219089Spjd	/* if one doesn't exist then create a new one, and initialize it */
1370219089Spjd
1371286575Smav	if (hdl_type == SA_HDL_SHARED)
1372286575Smav		handle = dmu_buf_get_user(db);
1373286575Smav
1374219089Spjd	if (handle == NULL) {
1375286575Smav		sa_handle_t *winner = NULL;
1376286575Smav
1377219089Spjd		handle = kmem_cache_alloc(sa_cache, KM_SLEEP);
1378321527Smav		handle->sa_dbu.dbu_evict_func_sync = NULL;
1379321527Smav		handle->sa_dbu.dbu_evict_func_async = NULL;
1380219089Spjd		handle->sa_userp = userp;
1381219089Spjd		handle->sa_bonus = db;
1382219089Spjd		handle->sa_os = os;
1383219089Spjd		handle->sa_spill = NULL;
1384286539Smav		handle->sa_bonus_tab = NULL;
1385286539Smav		handle->sa_spill_tab = NULL;
1386219089Spjd
1387219089Spjd		error = sa_build_index(handle, SA_BONUS);
1388219089Spjd
1389286575Smav		if (hdl_type == SA_HDL_SHARED) {
1390321527Smav			dmu_buf_init_user(&handle->sa_dbu, sa_evict_sync, NULL,
1391321527Smav			    NULL);
1392286575Smav			winner = dmu_buf_set_user_ie(db, &handle->sa_dbu);
1393286575Smav		}
1394286575Smav
1395286575Smav		if (winner != NULL) {
1396219089Spjd			kmem_cache_free(sa_cache, handle);
1397286575Smav			handle = winner;
1398219089Spjd		}
1399219089Spjd	}
1400219089Spjd	*handlepp = handle;
1401219089Spjd
1402219089Spjd	return (error);
1403219089Spjd}
1404219089Spjd
1405219089Spjdint
1406219089Spjdsa_handle_get(objset_t *objset, uint64_t objid, void *userp,
1407219089Spjd    sa_handle_type_t hdl_type, sa_handle_t **handlepp)
1408219089Spjd{
1409219089Spjd	dmu_buf_t *db;
1410219089Spjd	int error;
1411219089Spjd
1412219089Spjd	if (error = dmu_bonus_hold(objset, objid, NULL, &db))
1413219089Spjd		return (error);
1414219089Spjd
1415219089Spjd	return (sa_handle_get_from_db(objset, db, userp, hdl_type,
1416219089Spjd	    handlepp));
1417219089Spjd}
1418219089Spjd
1419219089Spjdint
1420219089Spjdsa_buf_hold(objset_t *objset, uint64_t obj_num, void *tag, dmu_buf_t **db)
1421219089Spjd{
1422219089Spjd	return (dmu_bonus_hold(objset, obj_num, tag, db));
1423219089Spjd}
1424219089Spjd
1425219089Spjdvoid
1426219089Spjdsa_buf_rele(dmu_buf_t *db, void *tag)
1427219089Spjd{
1428219089Spjd	dmu_buf_rele(db, tag);
1429219089Spjd}
1430219089Spjd
1431219089Spjdint
1432219089Spjdsa_lookup_impl(sa_handle_t *hdl, sa_bulk_attr_t *bulk, int count)
1433219089Spjd{
1434219089Spjd	ASSERT(hdl);
1435219089Spjd	ASSERT(MUTEX_HELD(&hdl->sa_lock));
1436219089Spjd	return (sa_attr_op(hdl, bulk, count, SA_LOOKUP, NULL));
1437219089Spjd}
1438219089Spjd
1439219089Spjdint
1440219089Spjdsa_lookup(sa_handle_t *hdl, sa_attr_type_t attr, void *buf, uint32_t buflen)
1441219089Spjd{
1442219089Spjd	int error;
1443219089Spjd	sa_bulk_attr_t bulk;
1444219089Spjd
1445219089Spjd	bulk.sa_attr = attr;
1446219089Spjd	bulk.sa_data = buf;
1447219089Spjd	bulk.sa_length = buflen;
1448219089Spjd	bulk.sa_data_func = NULL;
1449219089Spjd
1450219089Spjd	ASSERT(hdl);
1451219089Spjd	mutex_enter(&hdl->sa_lock);
1452219089Spjd	error = sa_lookup_impl(hdl, &bulk, 1);
1453219089Spjd	mutex_exit(&hdl->sa_lock);
1454219089Spjd	return (error);
1455219089Spjd}
1456219089Spjd
1457219089Spjd#ifdef _KERNEL
1458219089Spjdint
1459219089Spjdsa_lookup_uio(sa_handle_t *hdl, sa_attr_type_t attr, uio_t *uio)
1460219089Spjd{
1461219089Spjd	int error;
1462219089Spjd	sa_bulk_attr_t bulk;
1463219089Spjd
1464219089Spjd	bulk.sa_data = NULL;
1465219089Spjd	bulk.sa_attr = attr;
1466219089Spjd	bulk.sa_data_func = NULL;
1467219089Spjd
1468219089Spjd	ASSERT(hdl);
1469219089Spjd
1470219089Spjd	mutex_enter(&hdl->sa_lock);
1471219089Spjd	if ((error = sa_attr_op(hdl, &bulk, 1, SA_LOOKUP, NULL)) == 0) {
1472219089Spjd		error = uiomove((void *)bulk.sa_addr, MIN(bulk.sa_size,
1473219089Spjd		    uio->uio_resid), UIO_READ, uio);
1474219089Spjd	}
1475219089Spjd	mutex_exit(&hdl->sa_lock);
1476219089Spjd	return (error);
1477219089Spjd
1478219089Spjd}
1479219089Spjd#endif
1480219089Spjd
1481321558Smavstatic sa_idx_tab_t *
1482321558Smavsa_find_idx_tab(objset_t *os, dmu_object_type_t bonustype, sa_hdr_phys_t *hdr)
1483219089Spjd{
1484219089Spjd	sa_idx_tab_t *idx_tab;
1485219089Spjd	sa_os_t *sa = os->os_sa;
1486219089Spjd	sa_lot_t *tb, search;
1487219089Spjd	avl_index_t loc;
1488219089Spjd
1489219089Spjd	/*
1490219089Spjd	 * Deterimine layout number.  If SA node and header == 0 then
1491219089Spjd	 * force the index table to the dummy "1" empty layout.
1492219089Spjd	 *
1493219089Spjd	 * The layout number would only be zero for a newly created file
1494219089Spjd	 * that has not added any attributes yet, or with crypto enabled which
1495219089Spjd	 * doesn't write any attributes to the bonus buffer.
1496219089Spjd	 */
1497219089Spjd
1498219089Spjd	search.lot_num = SA_LAYOUT_NUM(hdr, bonustype);
1499219089Spjd
1500219089Spjd	tb = avl_find(&sa->sa_layout_num_tree, &search, &loc);
1501219089Spjd
1502219089Spjd	/* Verify header size is consistent with layout information */
1503219089Spjd	ASSERT(tb);
1504219089Spjd	ASSERT(IS_SA_BONUSTYPE(bonustype) &&
1505219089Spjd	    SA_HDR_SIZE_MATCH_LAYOUT(hdr, tb) || !IS_SA_BONUSTYPE(bonustype) ||
1506219089Spjd	    (IS_SA_BONUSTYPE(bonustype) && hdr->sa_layout_info == 0));
1507219089Spjd
1508219089Spjd	/*
1509219089Spjd	 * See if any of the already existing TOC entries can be reused?
1510219089Spjd	 */
1511219089Spjd
1512219089Spjd	for (idx_tab = list_head(&tb->lot_idx_tab); idx_tab;
1513219089Spjd	    idx_tab = list_next(&tb->lot_idx_tab, idx_tab)) {
1514219089Spjd		boolean_t valid_idx = B_TRUE;
1515219089Spjd		int i;
1516219089Spjd
1517219089Spjd		if (tb->lot_var_sizes != 0 &&
1518219089Spjd		    idx_tab->sa_variable_lengths != NULL) {
1519219089Spjd			for (i = 0; i != tb->lot_var_sizes; i++) {
1520219089Spjd				if (hdr->sa_lengths[i] !=
1521219089Spjd				    idx_tab->sa_variable_lengths[i]) {
1522219089Spjd					valid_idx = B_FALSE;
1523219089Spjd					break;
1524219089Spjd				}
1525219089Spjd			}
1526219089Spjd		}
1527219089Spjd		if (valid_idx) {
1528219089Spjd			sa_idx_tab_hold(os, idx_tab);
1529219089Spjd			return (idx_tab);
1530219089Spjd		}
1531219089Spjd	}
1532219089Spjd
1533219089Spjd	/* No such luck, create a new entry */
1534219089Spjd	idx_tab = kmem_zalloc(sizeof (sa_idx_tab_t), KM_SLEEP);
1535219089Spjd	idx_tab->sa_idx_tab =
1536219089Spjd	    kmem_zalloc(sizeof (uint32_t) * sa->sa_num_attrs, KM_SLEEP);
1537219089Spjd	idx_tab->sa_layout = tb;
1538219089Spjd	refcount_create(&idx_tab->sa_refcount);
1539219089Spjd	if (tb->lot_var_sizes)
1540219089Spjd		idx_tab->sa_variable_lengths = kmem_alloc(sizeof (uint16_t) *
1541219089Spjd		    tb->lot_var_sizes, KM_SLEEP);
1542219089Spjd
1543219089Spjd	sa_attr_iter(os, hdr, bonustype, sa_build_idx_tab,
1544219089Spjd	    tb, idx_tab);
1545219089Spjd	sa_idx_tab_hold(os, idx_tab);   /* one hold for consumer */
1546219089Spjd	sa_idx_tab_hold(os, idx_tab);	/* one for layout */
1547219089Spjd	list_insert_tail(&tb->lot_idx_tab, idx_tab);
1548219089Spjd	return (idx_tab);
1549219089Spjd}
1550219089Spjd
1551219089Spjdvoid
1552219089Spjdsa_default_locator(void **dataptr, uint32_t *len, uint32_t total_len,
1553219089Spjd    boolean_t start, void *userdata)
1554219089Spjd{
1555219089Spjd	ASSERT(start);
1556219089Spjd
1557219089Spjd	*dataptr = userdata;
1558219089Spjd	*len = total_len;
1559219089Spjd}
1560219089Spjd
1561219089Spjdstatic void
1562219089Spjdsa_attr_register_sync(sa_handle_t *hdl, dmu_tx_t *tx)
1563219089Spjd{
1564219089Spjd	uint64_t attr_value = 0;
1565219089Spjd	sa_os_t *sa = hdl->sa_os->os_sa;
1566219089Spjd	sa_attr_table_t *tb = sa->sa_attr_table;
1567219089Spjd	int i;
1568219089Spjd
1569219089Spjd	mutex_enter(&sa->sa_lock);
1570219089Spjd
1571219089Spjd	if (!sa->sa_need_attr_registration || sa->sa_master_obj == 0) {
1572219089Spjd		mutex_exit(&sa->sa_lock);
1573219089Spjd		return;
1574219089Spjd	}
1575219089Spjd
1576219089Spjd	if (sa->sa_reg_attr_obj == 0) {
1577236884Smm		sa->sa_reg_attr_obj = zap_create_link(hdl->sa_os,
1578236884Smm		    DMU_OT_SA_ATTR_REGISTRATION,
1579236884Smm		    sa->sa_master_obj, SA_REGISTRY, tx);
1580219089Spjd	}
1581219089Spjd	for (i = 0; i != sa->sa_num_attrs; i++) {
1582219089Spjd		if (sa->sa_attr_table[i].sa_registered)
1583219089Spjd			continue;
1584219089Spjd		ATTR_ENCODE(attr_value, tb[i].sa_attr, tb[i].sa_length,
1585219089Spjd		    tb[i].sa_byteswap);
1586219089Spjd		VERIFY(0 == zap_update(hdl->sa_os, sa->sa_reg_attr_obj,
1587219089Spjd		    tb[i].sa_name, 8, 1, &attr_value, tx));
1588219089Spjd		tb[i].sa_registered = B_TRUE;
1589219089Spjd	}
1590219089Spjd	sa->sa_need_attr_registration = B_FALSE;
1591219089Spjd	mutex_exit(&sa->sa_lock);
1592219089Spjd}
1593219089Spjd
1594219089Spjd/*
1595219089Spjd * Replace all attributes with attributes specified in template.
1596219089Spjd * If dnode had a spill buffer then those attributes will be
1597219089Spjd * also be replaced, possibly with just an empty spill block
1598219089Spjd *
1599219089Spjd * This interface is intended to only be used for bulk adding of
1600219089Spjd * attributes for a new file.  It will also be used by the ZPL
1601219089Spjd * when converting and old formatted znode to native SA support.
1602219089Spjd */
1603219089Spjdint
1604219089Spjdsa_replace_all_by_template_locked(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc,
1605219089Spjd    int attr_count, dmu_tx_t *tx)
1606219089Spjd{
1607219089Spjd	sa_os_t *sa = hdl->sa_os->os_sa;
1608219089Spjd
1609219089Spjd	if (sa->sa_need_attr_registration)
1610219089Spjd		sa_attr_register_sync(hdl, tx);
1611219089Spjd	return (sa_build_layouts(hdl, attr_desc, attr_count, tx));
1612219089Spjd}
1613219089Spjd
1614219089Spjdint
1615219089Spjdsa_replace_all_by_template(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc,
1616219089Spjd    int attr_count, dmu_tx_t *tx)
1617219089Spjd{
1618219089Spjd	int error;
1619219089Spjd
1620219089Spjd	mutex_enter(&hdl->sa_lock);
1621219089Spjd	error = sa_replace_all_by_template_locked(hdl, attr_desc,
1622219089Spjd	    attr_count, tx);
1623219089Spjd	mutex_exit(&hdl->sa_lock);
1624219089Spjd	return (error);
1625219089Spjd}
1626219089Spjd
1627219089Spjd/*
1628240345Savg * Add/remove a single attribute or replace a variable-sized attribute value
1629240345Savg * with a value of a different size, and then rewrite the entire set
1630219089Spjd * of attributes.
1631240345Savg * Same-length attribute value replacement (including fixed-length attributes)
1632240345Savg * is handled more efficiently by the upper layers.
1633219089Spjd */
1634219089Spjdstatic int
1635219089Spjdsa_modify_attrs(sa_handle_t *hdl, sa_attr_type_t newattr,
1636219089Spjd    sa_data_op_t action, sa_data_locator_t *locator, void *datastart,
1637219089Spjd    uint16_t buflen, dmu_tx_t *tx)
1638219089Spjd{
1639219089Spjd	sa_os_t *sa = hdl->sa_os->os_sa;
1640219089Spjd	dmu_buf_impl_t *db = (dmu_buf_impl_t *)hdl->sa_bonus;
1641219089Spjd	dnode_t *dn;
1642219089Spjd	sa_bulk_attr_t *attr_desc;
1643219089Spjd	void *old_data[2];
1644219089Spjd	int bonus_attr_count = 0;
1645247187Smm	int bonus_data_size = 0;
1646247187Smm	int spill_data_size = 0;
1647219089Spjd	int spill_attr_count = 0;
1648219089Spjd	int error;
1649295125Savg	uint16_t length, reg_length;
1650219089Spjd	int i, j, k, length_idx;
1651219089Spjd	sa_hdr_phys_t *hdr;
1652219089Spjd	sa_idx_tab_t *idx_tab;
1653219089Spjd	int attr_count;
1654219089Spjd	int count;
1655219089Spjd
1656219089Spjd	ASSERT(MUTEX_HELD(&hdl->sa_lock));
1657219089Spjd
1658219089Spjd	/* First make of copy of the old data */
1659219089Spjd
1660219089Spjd	DB_DNODE_ENTER(db);
1661219089Spjd	dn = DB_DNODE(db);
1662219089Spjd	if (dn->dn_bonuslen != 0) {
1663219089Spjd		bonus_data_size = hdl->sa_bonus->db_size;
1664219089Spjd		old_data[0] = kmem_alloc(bonus_data_size, KM_SLEEP);
1665219089Spjd		bcopy(hdl->sa_bonus->db_data, old_data[0],
1666219089Spjd		    hdl->sa_bonus->db_size);
1667219089Spjd		bonus_attr_count = hdl->sa_bonus_tab->sa_layout->lot_attr_count;
1668219089Spjd	} else {
1669219089Spjd		old_data[0] = NULL;
1670219089Spjd	}
1671219089Spjd	DB_DNODE_EXIT(db);
1672219089Spjd
1673219089Spjd	/* Bring spill buffer online if it isn't currently */
1674219089Spjd
1675219089Spjd	if ((error = sa_get_spill(hdl)) == 0) {
1676219089Spjd		spill_data_size = hdl->sa_spill->db_size;
1677219089Spjd		old_data[1] = kmem_alloc(spill_data_size, KM_SLEEP);
1678219089Spjd		bcopy(hdl->sa_spill->db_data, old_data[1],
1679219089Spjd		    hdl->sa_spill->db_size);
1680219089Spjd		spill_attr_count =
1681219089Spjd		    hdl->sa_spill_tab->sa_layout->lot_attr_count;
1682219089Spjd	} else if (error && error != ENOENT) {
1683219089Spjd		if (old_data[0])
1684219089Spjd			kmem_free(old_data[0], bonus_data_size);
1685219089Spjd		return (error);
1686219089Spjd	} else {
1687219089Spjd		old_data[1] = NULL;
1688219089Spjd	}
1689219089Spjd
1690219089Spjd	/* build descriptor of all attributes */
1691219089Spjd
1692219089Spjd	attr_count = bonus_attr_count + spill_attr_count;
1693219089Spjd	if (action == SA_ADD)
1694219089Spjd		attr_count++;
1695219089Spjd	else if (action == SA_REMOVE)
1696219089Spjd		attr_count--;
1697219089Spjd
1698219089Spjd	attr_desc = kmem_zalloc(sizeof (sa_bulk_attr_t) * attr_count, KM_SLEEP);
1699219089Spjd
1700219089Spjd	/*
1701219089Spjd	 * loop through bonus and spill buffer if it exists, and
1702219089Spjd	 * build up new attr_descriptor to reset the attributes
1703219089Spjd	 */
1704219089Spjd	k = j = 0;
1705219089Spjd	count = bonus_attr_count;
1706219089Spjd	hdr = SA_GET_HDR(hdl, SA_BONUS);
1707219089Spjd	idx_tab = SA_IDX_TAB_GET(hdl, SA_BONUS);
1708219089Spjd	for (; k != 2; k++) {
1709295125Savg		/*
1710295125Savg		 * Iterate over each attribute in layout.  Fetch the
1711295125Savg		 * size of variable-length attributes needing rewrite
1712295125Savg		 * from sa_lengths[].
1713295125Savg		 */
1714219089Spjd		for (i = 0, length_idx = 0; i != count; i++) {
1715219089Spjd			sa_attr_type_t attr;
1716219089Spjd
1717219089Spjd			attr = idx_tab->sa_layout->lot_attrs[i];
1718295125Savg			reg_length = SA_REGISTERED_LEN(sa, attr);
1719295125Savg			if (reg_length == 0) {
1720295125Savg				length = hdr->sa_lengths[length_idx];
1721295125Savg				length_idx++;
1722295125Savg			} else {
1723295125Savg				length = reg_length;
1724295125Savg			}
1725219089Spjd			if (attr == newattr) {
1726295125Savg				/*
1727295125Savg				 * There is nothing to do for SA_REMOVE,
1728295125Savg				 * so it is just skipped.
1729295125Savg				 */
1730295125Savg				if (action == SA_REMOVE)
1731295125Savg					continue;
1732219089Spjd
1733295125Savg				/*
1734295125Savg				 * Duplicate attributes are not allowed, so the
1735295125Savg				 * action can not be SA_ADD here.
1736295125Savg				 */
1737295125Savg				ASSERT3S(action, ==, SA_REPLACE);
1738295125Savg
1739295125Savg				/*
1740295125Savg				 * Only a variable-sized attribute can be
1741295125Savg				 * replaced here, and its size must be changing.
1742295125Savg				 */
1743295125Savg				ASSERT3U(reg_length, ==, 0);
1744295125Savg				ASSERT3U(length, !=, buflen);
1745219089Spjd				SA_ADD_BULK_ATTR(attr_desc, j, attr,
1746295125Savg				    locator, datastart, buflen);
1747295125Savg			} else {
1748295125Savg				SA_ADD_BULK_ATTR(attr_desc, j, attr,
1749219089Spjd				    NULL, (void *)
1750219089Spjd				    (TOC_OFF(idx_tab->sa_idx_tab[attr]) +
1751219089Spjd				    (uintptr_t)old_data[k]), length);
1752219089Spjd			}
1753219089Spjd		}
1754219089Spjd		if (k == 0 && hdl->sa_spill) {
1755219089Spjd			hdr = SA_GET_HDR(hdl, SA_SPILL);
1756219089Spjd			idx_tab = SA_IDX_TAB_GET(hdl, SA_SPILL);
1757219089Spjd			count = spill_attr_count;
1758219089Spjd		} else {
1759219089Spjd			break;
1760219089Spjd		}
1761219089Spjd	}
1762219089Spjd	if (action == SA_ADD) {
1763295125Savg		reg_length = SA_REGISTERED_LEN(sa, newattr);
1764295125Savg		IMPLY(reg_length != 0, reg_length == buflen);
1765219089Spjd		SA_ADD_BULK_ATTR(attr_desc, j, newattr, locator,
1766219089Spjd		    datastart, buflen);
1767219089Spjd	}
1768240345Savg	ASSERT3U(j, ==, attr_count);
1769219089Spjd
1770219089Spjd	error = sa_build_layouts(hdl, attr_desc, attr_count, tx);
1771219089Spjd
1772219089Spjd	if (old_data[0])
1773219089Spjd		kmem_free(old_data[0], bonus_data_size);
1774219089Spjd	if (old_data[1])
1775219089Spjd		kmem_free(old_data[1], spill_data_size);
1776219089Spjd	kmem_free(attr_desc, sizeof (sa_bulk_attr_t) * attr_count);
1777219089Spjd
1778219089Spjd	return (error);
1779219089Spjd}
1780219089Spjd
1781219089Spjdstatic int
1782219089Spjdsa_bulk_update_impl(sa_handle_t *hdl, sa_bulk_attr_t *bulk, int count,
1783219089Spjd    dmu_tx_t *tx)
1784219089Spjd{
1785219089Spjd	int error;
1786219089Spjd	sa_os_t *sa = hdl->sa_os->os_sa;
1787219089Spjd	dmu_object_type_t bonustype;
1788219089Spjd
1789219089Spjd	bonustype = SA_BONUSTYPE_FROM_DB(SA_GET_DB(hdl, SA_BONUS));
1790219089Spjd
1791219089Spjd	ASSERT(hdl);
1792219089Spjd	ASSERT(MUTEX_HELD(&hdl->sa_lock));
1793219089Spjd
1794219089Spjd	/* sync out registration table if necessary */
1795219089Spjd	if (sa->sa_need_attr_registration)
1796219089Spjd		sa_attr_register_sync(hdl, tx);
1797219089Spjd
1798219089Spjd	error = sa_attr_op(hdl, bulk, count, SA_UPDATE, tx);
1799219089Spjd	if (error == 0 && !IS_SA_BONUSTYPE(bonustype) && sa->sa_update_cb)
1800219089Spjd		sa->sa_update_cb(hdl, tx);
1801219089Spjd
1802219089Spjd	return (error);
1803219089Spjd}
1804219089Spjd
1805219089Spjd/*
1806219089Spjd * update or add new attribute
1807219089Spjd */
1808219089Spjdint
1809219089Spjdsa_update(sa_handle_t *hdl, sa_attr_type_t type,
1810219089Spjd    void *buf, uint32_t buflen, dmu_tx_t *tx)
1811219089Spjd{
1812219089Spjd	int error;
1813219089Spjd	sa_bulk_attr_t bulk;
1814219089Spjd
1815219089Spjd	bulk.sa_attr = type;
1816219089Spjd	bulk.sa_data_func = NULL;
1817219089Spjd	bulk.sa_length = buflen;
1818219089Spjd	bulk.sa_data = buf;
1819219089Spjd
1820219089Spjd	mutex_enter(&hdl->sa_lock);
1821219089Spjd	error = sa_bulk_update_impl(hdl, &bulk, 1, tx);
1822219089Spjd	mutex_exit(&hdl->sa_lock);
1823219089Spjd	return (error);
1824219089Spjd}
1825219089Spjd
1826219089Spjdint
1827219089Spjdsa_update_from_cb(sa_handle_t *hdl, sa_attr_type_t attr,
1828219089Spjd    uint32_t buflen, sa_data_locator_t *locator, void *userdata, dmu_tx_t *tx)
1829219089Spjd{
1830219089Spjd	int error;
1831219089Spjd	sa_bulk_attr_t bulk;
1832219089Spjd
1833219089Spjd	bulk.sa_attr = attr;
1834219089Spjd	bulk.sa_data = userdata;
1835219089Spjd	bulk.sa_data_func = locator;
1836219089Spjd	bulk.sa_length = buflen;
1837219089Spjd
1838219089Spjd	mutex_enter(&hdl->sa_lock);
1839219089Spjd	error = sa_bulk_update_impl(hdl, &bulk, 1, tx);
1840219089Spjd	mutex_exit(&hdl->sa_lock);
1841219089Spjd	return (error);
1842219089Spjd}
1843219089Spjd
1844219089Spjd/*
1845219089Spjd * Return size of an attribute
1846219089Spjd */
1847219089Spjd
1848219089Spjdint
1849219089Spjdsa_size(sa_handle_t *hdl, sa_attr_type_t attr, int *size)
1850219089Spjd{
1851219089Spjd	sa_bulk_attr_t bulk;
1852219089Spjd	int error;
1853219089Spjd
1854219089Spjd	bulk.sa_data = NULL;
1855219089Spjd	bulk.sa_attr = attr;
1856219089Spjd	bulk.sa_data_func = NULL;
1857219089Spjd
1858219089Spjd	ASSERT(hdl);
1859219089Spjd	mutex_enter(&hdl->sa_lock);
1860219089Spjd	if ((error = sa_attr_op(hdl, &bulk, 1, SA_LOOKUP, NULL)) != 0) {
1861219089Spjd		mutex_exit(&hdl->sa_lock);
1862219089Spjd		return (error);
1863219089Spjd	}
1864219089Spjd	*size = bulk.sa_size;
1865219089Spjd
1866219089Spjd	mutex_exit(&hdl->sa_lock);
1867219089Spjd	return (0);
1868219089Spjd}
1869219089Spjd
1870219089Spjdint
1871219089Spjdsa_bulk_lookup_locked(sa_handle_t *hdl, sa_bulk_attr_t *attrs, int count)
1872219089Spjd{
1873219089Spjd	ASSERT(hdl);
1874219089Spjd	ASSERT(MUTEX_HELD(&hdl->sa_lock));
1875219089Spjd	return (sa_lookup_impl(hdl, attrs, count));
1876219089Spjd}
1877219089Spjd
1878219089Spjdint
1879219089Spjdsa_bulk_lookup(sa_handle_t *hdl, sa_bulk_attr_t *attrs, int count)
1880219089Spjd{
1881219089Spjd	int error;
1882219089Spjd
1883219089Spjd	ASSERT(hdl);
1884219089Spjd	mutex_enter(&hdl->sa_lock);
1885219089Spjd	error = sa_bulk_lookup_locked(hdl, attrs, count);
1886219089Spjd	mutex_exit(&hdl->sa_lock);
1887219089Spjd	return (error);
1888219089Spjd}
1889219089Spjd
1890219089Spjdint
1891219089Spjdsa_bulk_update(sa_handle_t *hdl, sa_bulk_attr_t *attrs, int count, dmu_tx_t *tx)
1892219089Spjd{
1893219089Spjd	int error;
1894219089Spjd
1895219089Spjd	ASSERT(hdl);
1896219089Spjd	mutex_enter(&hdl->sa_lock);
1897219089Spjd	error = sa_bulk_update_impl(hdl, attrs, count, tx);
1898219089Spjd	mutex_exit(&hdl->sa_lock);
1899219089Spjd	return (error);
1900219089Spjd}
1901219089Spjd
1902219089Spjdint
1903219089Spjdsa_remove(sa_handle_t *hdl, sa_attr_type_t attr, dmu_tx_t *tx)
1904219089Spjd{
1905219089Spjd	int error;
1906219089Spjd
1907219089Spjd	mutex_enter(&hdl->sa_lock);
1908219089Spjd	error = sa_modify_attrs(hdl, attr, SA_REMOVE, NULL,
1909219089Spjd	    NULL, 0, tx);
1910219089Spjd	mutex_exit(&hdl->sa_lock);
1911219089Spjd	return (error);
1912219089Spjd}
1913219089Spjd
1914219089Spjdvoid
1915219089Spjdsa_object_info(sa_handle_t *hdl, dmu_object_info_t *doi)
1916219089Spjd{
1917219089Spjd	dmu_object_info_from_db((dmu_buf_t *)hdl->sa_bonus, doi);
1918219089Spjd}
1919219089Spjd
1920219089Spjdvoid
1921219089Spjdsa_object_size(sa_handle_t *hdl, uint32_t *blksize, u_longlong_t *nblocks)
1922219089Spjd{
1923219089Spjd	dmu_object_size_from_db((dmu_buf_t *)hdl->sa_bonus,
1924219089Spjd	    blksize, nblocks);
1925219089Spjd}
1926219089Spjd
1927219089Spjdvoid
1928219089Spjdsa_set_userp(sa_handle_t *hdl, void *ptr)
1929219089Spjd{
1930219089Spjd	hdl->sa_userp = ptr;
1931219089Spjd}
1932219089Spjd
1933219089Spjddmu_buf_t *
1934219089Spjdsa_get_db(sa_handle_t *hdl)
1935219089Spjd{
1936219089Spjd	return ((dmu_buf_t *)hdl->sa_bonus);
1937219089Spjd}
1938219089Spjd
1939219089Spjdvoid *
1940219089Spjdsa_get_userdata(sa_handle_t *hdl)
1941219089Spjd{
1942219089Spjd	return (hdl->sa_userp);
1943219089Spjd}
1944219089Spjd
1945219089Spjdvoid
1946219089Spjdsa_register_update_callback_locked(objset_t *os, sa_update_cb_t *func)
1947219089Spjd{
1948219089Spjd	ASSERT(MUTEX_HELD(&os->os_sa->sa_lock));
1949219089Spjd	os->os_sa->sa_update_cb = func;
1950219089Spjd}
1951219089Spjd
1952219089Spjdvoid
1953219089Spjdsa_register_update_callback(objset_t *os, sa_update_cb_t *func)
1954219089Spjd{
1955219089Spjd
1956219089Spjd	mutex_enter(&os->os_sa->sa_lock);
1957219089Spjd	sa_register_update_callback_locked(os, func);
1958219089Spjd	mutex_exit(&os->os_sa->sa_lock);
1959219089Spjd}
1960219089Spjd
1961219089Spjduint64_t
1962219089Spjdsa_handle_object(sa_handle_t *hdl)
1963219089Spjd{
1964219089Spjd	return (hdl->sa_bonus->db_object);
1965219089Spjd}
1966219089Spjd
1967219089Spjdboolean_t
1968219089Spjdsa_enabled(objset_t *os)
1969219089Spjd{
1970219089Spjd	return (os->os_sa == NULL);
1971219089Spjd}
1972219089Spjd
1973219089Spjdint
1974219089Spjdsa_set_sa_object(objset_t *os, uint64_t sa_object)
1975219089Spjd{
1976219089Spjd	sa_os_t *sa = os->os_sa;
1977219089Spjd
1978219089Spjd	if (sa->sa_master_obj)
1979219089Spjd		return (1);
1980219089Spjd
1981219089Spjd	sa->sa_master_obj = sa_object;
1982219089Spjd
1983219089Spjd	return (0);
1984219089Spjd}
1985219089Spjd
1986219089Spjdint
1987219089Spjdsa_hdrsize(void *arg)
1988219089Spjd{
1989219089Spjd	sa_hdr_phys_t *hdr = arg;
1990219089Spjd
1991219089Spjd	return (SA_HDR_SIZE(hdr));
1992219089Spjd}
1993219089Spjd
1994219089Spjdvoid
1995219089Spjdsa_handle_lock(sa_handle_t *hdl)
1996219089Spjd{
1997219089Spjd	ASSERT(hdl);
1998219089Spjd	mutex_enter(&hdl->sa_lock);
1999219089Spjd}
2000219089Spjd
2001219089Spjdvoid
2002219089Spjdsa_handle_unlock(sa_handle_t *hdl)
2003219089Spjd{
2004219089Spjd	ASSERT(hdl);
2005219089Spjd	mutex_exit(&hdl->sa_lock);
2006219089Spjd}
2007