1219089Spjd/*
2219089Spjd * CDDL HEADER START
3219089Spjd *
4219089Spjd * The contents of this file are subject to the terms of the
5219089Spjd * Common Development and Distribution License (the "License").
6219089Spjd * You may not use this file except in compliance with the License.
7219089Spjd *
8219089Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9219089Spjd * or http://www.opensolaris.org/os/licensing.
10219089Spjd * See the License for the specific language governing permissions
11219089Spjd * and limitations under the License.
12219089Spjd *
13219089Spjd * When distributing Covered Code, include this CDDL HEADER in each
14219089Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15219089Spjd * If applicable, add the following below this CDDL HEADER, with the
16219089Spjd * fields enclosed by brackets "[]" replaced with your own identifying
17219089Spjd * information: Portions Copyright [yyyy] [name of copyright owner]
18219089Spjd *
19219089Spjd * CDDL HEADER END
20219089Spjd */
21236884Smm
22219089Spjd/*
23219089Spjd * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24226724Smm * Portions Copyright 2011 iXsystems, Inc
25249195Smm * Copyright (c) 2013 by Delphix. All rights reserved.
26288549Smav * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
27297112Smav * Copyright (c) 2014 Integros [integros.com]
28219089Spjd */
29219089Spjd
30219089Spjd#include <sys/zfs_context.h>
31219089Spjd#include <sys/types.h>
32219089Spjd#include <sys/param.h>
33219089Spjd#include <sys/systm.h>
34219089Spjd#include <sys/sysmacros.h>
35219089Spjd#include <sys/dmu.h>
36219089Spjd#include <sys/dmu_impl.h>
37219089Spjd#include <sys/dmu_objset.h>
38219089Spjd#include <sys/dbuf.h>
39219089Spjd#include <sys/dnode.h>
40219089Spjd#include <sys/zap.h>
41219089Spjd#include <sys/sa.h>
42219089Spjd#include <sys/sunddi.h>
43219089Spjd#include <sys/sa_impl.h>
44219089Spjd#include <sys/dnode.h>
45219089Spjd#include <sys/errno.h>
46219089Spjd#include <sys/zfs_context.h>
47219089Spjd
48219089Spjd/*
49219089Spjd * ZFS System attributes:
50219089Spjd *
51219089Spjd * A generic mechanism to allow for arbitrary attributes
52219089Spjd * to be stored in a dnode.  The data will be stored in the bonus buffer of
53219089Spjd * the dnode and if necessary a special "spill" block will be used to handle
54219089Spjd * overflow situations.  The spill block will be sized to fit the data
55219089Spjd * from 512 - 128K.  When a spill block is used the BP (blkptr_t) for the
56219089Spjd * spill block is stored at the end of the current bonus buffer.  Any
57219089Spjd * attributes that would be in the way of the blkptr_t will be relocated
58219089Spjd * into the spill block.
59219089Spjd *
60219089Spjd * Attribute registration:
61219089Spjd *
62219089Spjd * Stored persistently on a per dataset basis
63219089Spjd * a mapping between attribute "string" names and their actual attribute
64219089Spjd * numeric values, length, and byteswap function.  The names are only used
65219089Spjd * during registration.  All  attributes are known by their unique attribute
66219089Spjd * id value.  If an attribute can have a variable size then the value
67219089Spjd * 0 will be used to indicate this.
68219089Spjd *
69219089Spjd * Attribute Layout:
70219089Spjd *
71219089Spjd * Attribute layouts are a way to compactly store multiple attributes, but
72219089Spjd * without taking the overhead associated with managing each attribute
73219089Spjd * individually.  Since you will typically have the same set of attributes
74219089Spjd * stored in the same order a single table will be used to represent that
75219089Spjd * layout.  The ZPL for example will usually have only about 10 different
76219089Spjd * layouts (regular files, device files, symlinks,
77219089Spjd * regular files + scanstamp, files/dir with extended attributes, and then
78219089Spjd * you have the possibility of all of those minus ACL, because it would
79219089Spjd * be kicked out into the spill block)
80219089Spjd *
81219089Spjd * Layouts are simply an array of the attributes and their
82219089Spjd * ordering i.e. [0, 1, 4, 5, 2]
83219089Spjd *
84219089Spjd * Each distinct layout is given a unique layout number and that is whats
85219089Spjd * stored in the header at the beginning of the SA data buffer.
86219089Spjd *
87219089Spjd * A layout only covers a single dbuf (bonus or spill).  If a set of
88219089Spjd * attributes is split up between the bonus buffer and a spill buffer then
89219089Spjd * two different layouts will be used.  This allows us to byteswap the
90219089Spjd * spill without looking at the bonus buffer and keeps the on disk format of
91219089Spjd * the bonus and spill buffer the same.
92219089Spjd *
93219089Spjd * Adding a single attribute will cause the entire set of attributes to
94219089Spjd * be rewritten and could result in a new layout number being constructed
95219089Spjd * as part of the rewrite if no such layout exists for the new set of
96219089Spjd * attribues.  The new attribute will be appended to the end of the already
97219089Spjd * existing attributes.
98219089Spjd *
99219089Spjd * Both the attribute registration and attribute layout information are
100219089Spjd * stored in normal ZAP attributes.  Their should be a small number of
101219089Spjd * known layouts and the set of attributes is assumed to typically be quite
102219089Spjd * small.
103219089Spjd *
104219089Spjd * The registered attributes and layout "table" information is maintained
105219089Spjd * in core and a special "sa_os_t" is attached to the objset_t.
106219089Spjd *
107219089Spjd * A special interface is provided to allow for quickly applying
108219089Spjd * a large set of attributes at once.  sa_replace_all_by_template() is
109219089Spjd * used to set an array of attributes.  This is used by the ZPL when
110219089Spjd * creating a brand new file.  The template that is passed into the function
111219089Spjd * specifies the attribute, size for variable length attributes, location of
112219089Spjd * data and special "data locator" function if the data isn't in a contiguous
113219089Spjd * location.
114219089Spjd *
115219089Spjd * Byteswap implications:
116251631Sdelphij *
117219089Spjd * Since the SA attributes are not entirely self describing we can't do
118219089Spjd * the normal byteswap processing.  The special ZAP layout attribute and
119219089Spjd * attribute registration attributes define the byteswap function and the
120219089Spjd * size of the attributes, unless it is variable sized.
121219089Spjd * The normal ZFS byteswapping infrastructure assumes you don't need
122219089Spjd * to read any objects in order to do the necessary byteswapping.  Whereas
123219089Spjd * SA attributes can only be properly byteswapped if the dataset is opened
124219089Spjd * and the layout/attribute ZAP attributes are available.  Because of this
125219089Spjd * the SA attributes will be byteswapped when they are first accessed by
126219089Spjd * the SA code that will read the SA data.
127219089Spjd */
128219089Spjd
129219089Spjdtypedef void (sa_iterfunc_t)(void *hdr, void *addr, sa_attr_type_t,
130219089Spjd    uint16_t length, int length_idx, boolean_t, void *userp);
131219089Spjd
132219089Spjdstatic int sa_build_index(sa_handle_t *hdl, sa_buf_type_t buftype);
133219089Spjdstatic void sa_idx_tab_hold(objset_t *os, sa_idx_tab_t *idx_tab);
134219089Spjdstatic void *sa_find_idx_tab(objset_t *os, dmu_object_type_t bonustype,
135219089Spjd    void *data);
136219089Spjdstatic void sa_idx_tab_rele(objset_t *os, void *arg);
137219089Spjdstatic void sa_copy_data(sa_data_locator_t *func, void *start, void *target,
138219089Spjd    int buflen);
139219089Spjdstatic int sa_modify_attrs(sa_handle_t *hdl, sa_attr_type_t newattr,
140219089Spjd    sa_data_op_t action, sa_data_locator_t *locator, void *datastart,
141219089Spjd    uint16_t buflen, dmu_tx_t *tx);
142219089Spjd
143219089Spjdarc_byteswap_func_t *sa_bswap_table[] = {
144219089Spjd	byteswap_uint64_array,
145219089Spjd	byteswap_uint32_array,
146219089Spjd	byteswap_uint16_array,
147219089Spjd	byteswap_uint8_array,
148219089Spjd	zfs_acl_byteswap,
149219089Spjd};
150219089Spjd
151219089Spjd#define	SA_COPY_DATA(f, s, t, l) \
152219089Spjd	{ \
153219089Spjd		if (f == NULL) { \
154219089Spjd			if (l == 8) { \
155219089Spjd				*(uint64_t *)t = *(uint64_t *)s; \
156219089Spjd			} else if (l == 16) { \
157219089Spjd				*(uint64_t *)t = *(uint64_t *)s; \
158219089Spjd				*(uint64_t *)((uintptr_t)t + 8) = \
159219089Spjd				    *(uint64_t *)((uintptr_t)s + 8); \
160219089Spjd			} else { \
161219089Spjd				bcopy(s, t, l); \
162219089Spjd			} \
163219089Spjd		} else \
164219089Spjd			sa_copy_data(f, s, t, l); \
165219089Spjd	}
166219089Spjd
167219089Spjd/*
168219089Spjd * This table is fixed and cannot be changed.  Its purpose is to
169219089Spjd * allow the SA code to work with both old/new ZPL file systems.
170219089Spjd * It contains the list of legacy attributes.  These attributes aren't
171219089Spjd * stored in the "attribute" registry zap objects, since older ZPL file systems
172219089Spjd * won't have the registry.  Only objsets of type ZFS_TYPE_FILESYSTEM will
173219089Spjd * use this static table.
174219089Spjd */
175219089Spjdsa_attr_reg_t sa_legacy_attrs[] = {
176219089Spjd	{"ZPL_ATIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 0},
177219089Spjd	{"ZPL_MTIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 1},
178219089Spjd	{"ZPL_CTIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 2},
179219089Spjd	{"ZPL_CRTIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 3},
180219089Spjd	{"ZPL_GEN", sizeof (uint64_t), SA_UINT64_ARRAY, 4},
181219089Spjd	{"ZPL_MODE", sizeof (uint64_t), SA_UINT64_ARRAY, 5},
182219089Spjd	{"ZPL_SIZE", sizeof (uint64_t), SA_UINT64_ARRAY, 6},
183219089Spjd	{"ZPL_PARENT", sizeof (uint64_t), SA_UINT64_ARRAY, 7},
184219089Spjd	{"ZPL_LINKS", sizeof (uint64_t), SA_UINT64_ARRAY, 8},
185219089Spjd	{"ZPL_XATTR", sizeof (uint64_t), SA_UINT64_ARRAY, 9},
186219089Spjd	{"ZPL_RDEV", sizeof (uint64_t), SA_UINT64_ARRAY, 10},
187219089Spjd	{"ZPL_FLAGS", sizeof (uint64_t), SA_UINT64_ARRAY, 11},
188219089Spjd	{"ZPL_UID", sizeof (uint64_t), SA_UINT64_ARRAY, 12},
189219089Spjd	{"ZPL_GID", sizeof (uint64_t), SA_UINT64_ARRAY, 13},
190219089Spjd	{"ZPL_PAD", sizeof (uint64_t) * 4, SA_UINT64_ARRAY, 14},
191219089Spjd	{"ZPL_ZNODE_ACL", 88, SA_UINT8_ARRAY, 15},
192219089Spjd};
193219089Spjd
194219089Spjd/*
195219089Spjd * This is only used for objects of type DMU_OT_ZNODE
196219089Spjd */
197219089Spjdsa_attr_type_t sa_legacy_zpl_layout[] = {
198219089Spjd    0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
199219089Spjd};
200219089Spjd
201219089Spjd/*
202219089Spjd * Special dummy layout used for buffers with no attributes.
203219089Spjd */
204219089Spjdsa_attr_type_t sa_dummy_zpl_layout[] = { 0 };
205219089Spjd
206219089Spjdstatic int sa_legacy_attr_count = 16;
207219089Spjdstatic kmem_cache_t *sa_cache = NULL;
208219089Spjd
209219089Spjd/*ARGSUSED*/
210219089Spjdstatic int
211219089Spjdsa_cache_constructor(void *buf, void *unused, int kmflag)
212219089Spjd{
213219089Spjd	sa_handle_t *hdl = buf;
214219089Spjd
215219089Spjd	mutex_init(&hdl->sa_lock, NULL, MUTEX_DEFAULT, NULL);
216219089Spjd	return (0);
217219089Spjd}
218219089Spjd
219219089Spjd/*ARGSUSED*/
220219089Spjdstatic void
221219089Spjdsa_cache_destructor(void *buf, void *unused)
222219089Spjd{
223219089Spjd	sa_handle_t *hdl = buf;
224219089Spjd	mutex_destroy(&hdl->sa_lock);
225219089Spjd}
226219089Spjd
227219089Spjdvoid
228219089Spjdsa_cache_init(void)
229219089Spjd{
230219089Spjd	sa_cache = kmem_cache_create("sa_cache",
231219089Spjd	    sizeof (sa_handle_t), 0, sa_cache_constructor,
232219089Spjd	    sa_cache_destructor, NULL, NULL, NULL, 0);
233219089Spjd}
234219089Spjd
235219089Spjdvoid
236219089Spjdsa_cache_fini(void)
237219089Spjd{
238219089Spjd	if (sa_cache)
239219089Spjd		kmem_cache_destroy(sa_cache);
240219089Spjd}
241219089Spjd
242219089Spjdstatic int
243219089Spjdlayout_num_compare(const void *arg1, const void *arg2)
244219089Spjd{
245219089Spjd	const sa_lot_t *node1 = arg1;
246219089Spjd	const sa_lot_t *node2 = arg2;
247219089Spjd
248219089Spjd	if (node1->lot_num > node2->lot_num)
249219089Spjd		return (1);
250219089Spjd	else if (node1->lot_num < node2->lot_num)
251219089Spjd		return (-1);
252219089Spjd	return (0);
253219089Spjd}
254219089Spjd
255219089Spjdstatic int
256219089Spjdlayout_hash_compare(const void *arg1, const void *arg2)
257219089Spjd{
258219089Spjd	const sa_lot_t *node1 = arg1;
259219089Spjd	const sa_lot_t *node2 = arg2;
260219089Spjd
261219089Spjd	if (node1->lot_hash > node2->lot_hash)
262219089Spjd		return (1);
263219089Spjd	if (node1->lot_hash < node2->lot_hash)
264219089Spjd		return (-1);
265219089Spjd	if (node1->lot_instance > node2->lot_instance)
266219089Spjd		return (1);
267219089Spjd	if (node1->lot_instance < node2->lot_instance)
268219089Spjd		return (-1);
269219089Spjd	return (0);
270219089Spjd}
271219089Spjd
272219089Spjdboolean_t
273219089Spjdsa_layout_equal(sa_lot_t *tbf, sa_attr_type_t *attrs, int count)
274219089Spjd{
275219089Spjd	int i;
276219089Spjd
277219089Spjd	if (count != tbf->lot_attr_count)
278219089Spjd		return (1);
279219089Spjd
280219089Spjd	for (i = 0; i != count; i++) {
281219089Spjd		if (attrs[i] != tbf->lot_attrs[i])
282219089Spjd			return (1);
283219089Spjd	}
284219089Spjd	return (0);
285219089Spjd}
286219089Spjd
287219089Spjd#define	SA_ATTR_HASH(attr) (zfs_crc64_table[(-1ULL ^ attr) & 0xFF])
288219089Spjd
289219089Spjdstatic uint64_t
290219089Spjdsa_layout_info_hash(sa_attr_type_t *attrs, int attr_count)
291219089Spjd{
292219089Spjd	int i;
293219089Spjd	uint64_t crc = -1ULL;
294219089Spjd
295219089Spjd	for (i = 0; i != attr_count; i++)
296219089Spjd		crc ^= SA_ATTR_HASH(attrs[i]);
297219089Spjd
298219089Spjd	return (crc);
299219089Spjd}
300219089Spjd
301219089Spjdstatic int
302219089Spjdsa_get_spill(sa_handle_t *hdl)
303219089Spjd{
304219089Spjd	int rc;
305219089Spjd	if (hdl->sa_spill == NULL) {
306219089Spjd		if ((rc = dmu_spill_hold_existing(hdl->sa_bonus, NULL,
307219089Spjd		    &hdl->sa_spill)) == 0)
308219089Spjd			VERIFY(0 == sa_build_index(hdl, SA_SPILL));
309219089Spjd	} else {
310219089Spjd		rc = 0;
311219089Spjd	}
312219089Spjd
313219089Spjd	return (rc);
314219089Spjd}
315219089Spjd
316219089Spjd/*
317219089Spjd * Main attribute lookup/update function
318219089Spjd * returns 0 for success or non zero for failures
319219089Spjd *
320219089Spjd * Operates on bulk array, first failure will abort further processing
321219089Spjd */
322219089Spjdint
323219089Spjdsa_attr_op(sa_handle_t *hdl, sa_bulk_attr_t *bulk, int count,
324219089Spjd    sa_data_op_t data_op, dmu_tx_t *tx)
325219089Spjd{
326219089Spjd	sa_os_t *sa = hdl->sa_os->os_sa;
327219089Spjd	int i;
328219089Spjd	int error = 0;
329219089Spjd	sa_buf_type_t buftypes;
330219089Spjd
331219089Spjd	buftypes = 0;
332219089Spjd
333219089Spjd	ASSERT(count > 0);
334219089Spjd	for (i = 0; i != count; i++) {
335219089Spjd		ASSERT(bulk[i].sa_attr <= hdl->sa_os->os_sa->sa_num_attrs);
336219089Spjd
337219089Spjd		bulk[i].sa_addr = NULL;
338219089Spjd		/* First check the bonus buffer */
339219089Spjd
340219089Spjd		if (hdl->sa_bonus_tab && TOC_ATTR_PRESENT(
341219089Spjd		    hdl->sa_bonus_tab->sa_idx_tab[bulk[i].sa_attr])) {
342219089Spjd			SA_ATTR_INFO(sa, hdl->sa_bonus_tab,
343219089Spjd			    SA_GET_HDR(hdl, SA_BONUS),
344219089Spjd			    bulk[i].sa_attr, bulk[i], SA_BONUS, hdl);
345219089Spjd			if (tx && !(buftypes & SA_BONUS)) {
346219089Spjd				dmu_buf_will_dirty(hdl->sa_bonus, tx);
347219089Spjd				buftypes |= SA_BONUS;
348219089Spjd			}
349219089Spjd		}
350219089Spjd		if (bulk[i].sa_addr == NULL &&
351219089Spjd		    ((error = sa_get_spill(hdl)) == 0)) {
352219089Spjd			if (TOC_ATTR_PRESENT(
353219089Spjd			    hdl->sa_spill_tab->sa_idx_tab[bulk[i].sa_attr])) {
354219089Spjd				SA_ATTR_INFO(sa, hdl->sa_spill_tab,
355219089Spjd				    SA_GET_HDR(hdl, SA_SPILL),
356219089Spjd				    bulk[i].sa_attr, bulk[i], SA_SPILL, hdl);
357219089Spjd				if (tx && !(buftypes & SA_SPILL) &&
358219089Spjd				    bulk[i].sa_size == bulk[i].sa_length) {
359219089Spjd					dmu_buf_will_dirty(hdl->sa_spill, tx);
360219089Spjd					buftypes |= SA_SPILL;
361219089Spjd				}
362219089Spjd			}
363219089Spjd		}
364219089Spjd		if (error && error != ENOENT) {
365219089Spjd			return ((error == ECKSUM) ? EIO : error);
366219089Spjd		}
367219089Spjd
368219089Spjd		switch (data_op) {
369219089Spjd		case SA_LOOKUP:
370219089Spjd			if (bulk[i].sa_addr == NULL)
371249195Smm				return (SET_ERROR(ENOENT));
372219089Spjd			if (bulk[i].sa_data) {
373219089Spjd				SA_COPY_DATA(bulk[i].sa_data_func,
374219089Spjd				    bulk[i].sa_addr, bulk[i].sa_data,
375219089Spjd				    bulk[i].sa_size);
376219089Spjd			}
377219089Spjd			continue;
378219089Spjd
379219089Spjd		case SA_UPDATE:
380219089Spjd			/* existing rewrite of attr */
381219089Spjd			if (bulk[i].sa_addr &&
382219089Spjd			    bulk[i].sa_size == bulk[i].sa_length) {
383219089Spjd				SA_COPY_DATA(bulk[i].sa_data_func,
384219089Spjd				    bulk[i].sa_data, bulk[i].sa_addr,
385219089Spjd				    bulk[i].sa_length);
386219089Spjd				continue;
387219089Spjd			} else if (bulk[i].sa_addr) { /* attr size change */
388219089Spjd				error = sa_modify_attrs(hdl, bulk[i].sa_attr,
389219089Spjd				    SA_REPLACE, bulk[i].sa_data_func,
390219089Spjd				    bulk[i].sa_data, bulk[i].sa_length, tx);
391219089Spjd			} else { /* adding new attribute */
392219089Spjd				error = sa_modify_attrs(hdl, bulk[i].sa_attr,
393219089Spjd				    SA_ADD, bulk[i].sa_data_func,
394219089Spjd				    bulk[i].sa_data, bulk[i].sa_length, tx);
395219089Spjd			}
396219089Spjd			if (error)
397219089Spjd				return (error);
398219089Spjd			break;
399219089Spjd		}
400219089Spjd	}
401219089Spjd	return (error);
402219089Spjd}
403219089Spjd
404219089Spjdstatic sa_lot_t *
405219089Spjdsa_add_layout_entry(objset_t *os, sa_attr_type_t *attrs, int attr_count,
406219089Spjd    uint64_t lot_num, uint64_t hash, boolean_t zapadd, dmu_tx_t *tx)
407219089Spjd{
408219089Spjd	sa_os_t *sa = os->os_sa;
409219089Spjd	sa_lot_t *tb, *findtb;
410219089Spjd	int i;
411219089Spjd	avl_index_t loc;
412219089Spjd
413219089Spjd	ASSERT(MUTEX_HELD(&sa->sa_lock));
414219089Spjd	tb = kmem_zalloc(sizeof (sa_lot_t), KM_SLEEP);
415219089Spjd	tb->lot_attr_count = attr_count;
416219089Spjd	tb->lot_attrs = kmem_alloc(sizeof (sa_attr_type_t) * attr_count,
417219089Spjd	    KM_SLEEP);
418219089Spjd	bcopy(attrs, tb->lot_attrs, sizeof (sa_attr_type_t) * attr_count);
419219089Spjd	tb->lot_num = lot_num;
420219089Spjd	tb->lot_hash = hash;
421219089Spjd	tb->lot_instance = 0;
422219089Spjd
423219089Spjd	if (zapadd) {
424219089Spjd		char attr_name[8];
425219089Spjd
426219089Spjd		if (sa->sa_layout_attr_obj == 0) {
427236884Smm			sa->sa_layout_attr_obj = zap_create_link(os,
428236884Smm			    DMU_OT_SA_ATTR_LAYOUTS,
429236884Smm			    sa->sa_master_obj, SA_LAYOUTS, tx);
430219089Spjd		}
431219089Spjd
432219089Spjd		(void) snprintf(attr_name, sizeof (attr_name),
433219089Spjd		    "%d", (int)lot_num);
434219089Spjd		VERIFY(0 == zap_update(os, os->os_sa->sa_layout_attr_obj,
435219089Spjd		    attr_name, 2, attr_count, attrs, tx));
436219089Spjd	}
437219089Spjd
438219089Spjd	list_create(&tb->lot_idx_tab, sizeof (sa_idx_tab_t),
439219089Spjd	    offsetof(sa_idx_tab_t, sa_next));
440219089Spjd
441219089Spjd	for (i = 0; i != attr_count; i++) {
442219089Spjd		if (sa->sa_attr_table[tb->lot_attrs[i]].sa_length == 0)
443219089Spjd			tb->lot_var_sizes++;
444219089Spjd	}
445219089Spjd
446219089Spjd	avl_add(&sa->sa_layout_num_tree, tb);
447219089Spjd
448219089Spjd	/* verify we don't have a hash collision */
449219089Spjd	if ((findtb = avl_find(&sa->sa_layout_hash_tree, tb, &loc)) != NULL) {
450219089Spjd		for (; findtb && findtb->lot_hash == hash;
451219089Spjd		    findtb = AVL_NEXT(&sa->sa_layout_hash_tree, findtb)) {
452219089Spjd			if (findtb->lot_instance != tb->lot_instance)
453219089Spjd				break;
454219089Spjd			tb->lot_instance++;
455219089Spjd		}
456219089Spjd	}
457219089Spjd	avl_add(&sa->sa_layout_hash_tree, tb);
458219089Spjd	return (tb);
459219089Spjd}
460219089Spjd
461219089Spjdstatic void
462219089Spjdsa_find_layout(objset_t *os, uint64_t hash, sa_attr_type_t *attrs,
463219089Spjd    int count, dmu_tx_t *tx, sa_lot_t **lot)
464219089Spjd{
465219089Spjd	sa_lot_t *tb, tbsearch;
466219089Spjd	avl_index_t loc;
467219089Spjd	sa_os_t *sa = os->os_sa;
468219089Spjd	boolean_t found = B_FALSE;
469219089Spjd
470219089Spjd	mutex_enter(&sa->sa_lock);
471219089Spjd	tbsearch.lot_hash = hash;
472219089Spjd	tbsearch.lot_instance = 0;
473219089Spjd	tb = avl_find(&sa->sa_layout_hash_tree, &tbsearch, &loc);
474219089Spjd	if (tb) {
475219089Spjd		for (; tb && tb->lot_hash == hash;
476219089Spjd		    tb = AVL_NEXT(&sa->sa_layout_hash_tree, tb)) {
477219089Spjd			if (sa_layout_equal(tb, attrs, count) == 0) {
478219089Spjd				found = B_TRUE;
479219089Spjd				break;
480219089Spjd			}
481219089Spjd		}
482219089Spjd	}
483219089Spjd	if (!found) {
484219089Spjd		tb = sa_add_layout_entry(os, attrs, count,
485219089Spjd		    avl_numnodes(&sa->sa_layout_num_tree), hash, B_TRUE, tx);
486219089Spjd	}
487219089Spjd	mutex_exit(&sa->sa_lock);
488219089Spjd	*lot = tb;
489219089Spjd}
490219089Spjd
491219089Spjdstatic int
492219089Spjdsa_resize_spill(sa_handle_t *hdl, uint32_t size, dmu_tx_t *tx)
493219089Spjd{
494219089Spjd	int error;
495219089Spjd	uint32_t blocksize;
496219089Spjd
497219089Spjd	if (size == 0) {
498219089Spjd		blocksize = SPA_MINBLOCKSIZE;
499276081Sdelphij	} else if (size > SPA_OLD_MAXBLOCKSIZE) {
500219089Spjd		ASSERT(0);
501249195Smm		return (SET_ERROR(EFBIG));
502219089Spjd	} else {
503219089Spjd		blocksize = P2ROUNDUP_TYPED(size, SPA_MINBLOCKSIZE, uint32_t);
504219089Spjd	}
505219089Spjd
506219089Spjd	error = dbuf_spill_set_blksz(hdl->sa_spill, blocksize, tx);
507219089Spjd	ASSERT(error == 0);
508219089Spjd	return (error);
509219089Spjd}
510219089Spjd
511219089Spjdstatic void
512219089Spjdsa_copy_data(sa_data_locator_t *func, void *datastart, void *target, int buflen)
513219089Spjd{
514219089Spjd	if (func == NULL) {
515219089Spjd		bcopy(datastart, target, buflen);
516219089Spjd	} else {
517219089Spjd		boolean_t start;
518219089Spjd		int bytes;
519219089Spjd		void *dataptr;
520219089Spjd		void *saptr = target;
521219089Spjd		uint32_t length;
522219089Spjd
523219089Spjd		start = B_TRUE;
524219089Spjd		bytes = 0;
525219089Spjd		while (bytes < buflen) {
526219089Spjd			func(&dataptr, &length, buflen, start, datastart);
527219089Spjd			bcopy(dataptr, saptr, length);
528219089Spjd			saptr = (void *)((caddr_t)saptr + length);
529219089Spjd			bytes += length;
530219089Spjd			start = B_FALSE;
531219089Spjd		}
532219089Spjd	}
533219089Spjd}
534219089Spjd
535219089Spjd/*
536219089Spjd * Determine several different sizes
537219089Spjd * first the sa header size
538219089Spjd * the number of bytes to be stored
539219089Spjd * if spill would occur the index in the attribute array is returned
540219089Spjd *
541219089Spjd * the boolean will_spill will be set when spilling is necessary.  It
542219089Spjd * is only set when the buftype is SA_BONUS
543219089Spjd */
544219089Spjdstatic int
545219089Spjdsa_find_sizes(sa_os_t *sa, sa_bulk_attr_t *attr_desc, int attr_count,
546219089Spjd    dmu_buf_t *db, sa_buf_type_t buftype, int *index, int *total,
547219089Spjd    boolean_t *will_spill)
548219089Spjd{
549219089Spjd	int var_size = 0;
550219089Spjd	int i;
551219089Spjd	int full_space;
552219089Spjd	int hdrsize;
553297101Smav	int extra_hdrsize;
554219089Spjd
555219089Spjd	if (buftype == SA_BONUS && sa->sa_force_spill) {
556219089Spjd		*total = 0;
557219089Spjd		*index = 0;
558219089Spjd		*will_spill = B_TRUE;
559219089Spjd		return (0);
560219089Spjd	}
561219089Spjd
562219089Spjd	*index = -1;
563219089Spjd	*total = 0;
564297101Smav	*will_spill = B_FALSE;
565219089Spjd
566297101Smav	extra_hdrsize = 0;
567219089Spjd	hdrsize = (SA_BONUSTYPE_FROM_DB(db) == DMU_OT_ZNODE) ? 0 :
568219089Spjd	    sizeof (sa_hdr_phys_t);
569219089Spjd
570219089Spjd	full_space = (buftype == SA_BONUS) ? DN_MAX_BONUSLEN : db->db_size;
571246678Smm	ASSERT(IS_P2ALIGNED(full_space, 8));
572219089Spjd
573219089Spjd	for (i = 0; i != attr_count; i++) {
574219089Spjd		boolean_t is_var_sz;
575219089Spjd
576246678Smm		*total = P2ROUNDUP(*total, 8);
577246678Smm		*total += attr_desc[i].sa_length;
578297101Smav		if (*will_spill)
579297101Smav			continue;
580219089Spjd
581219089Spjd		is_var_sz = (SA_REGISTERED_LEN(sa, attr_desc[i].sa_attr) == 0);
582219089Spjd		if (is_var_sz) {
583219089Spjd			var_size++;
584219089Spjd		}
585219089Spjd
586219089Spjd		if (is_var_sz && var_size > 1) {
587297101Smav			/*
588297101Smav			 * Don't worry that the spill block might overflow.
589297101Smav			 * It will be resized if needed in sa_build_layouts().
590297101Smav			 */
591297101Smav			if (buftype == SA_SPILL ||
592297101Smav			    P2ROUNDUP(hdrsize + sizeof (uint16_t), 8) +
593219089Spjd			    *total < full_space) {
594246678Smm				/*
595246678Smm				 * Account for header space used by array of
596246678Smm				 * optional sizes of variable-length attributes.
597297101Smav				 * Record the extra header size in case this
598297101Smav				 * increase needs to be reversed due to
599297101Smav				 * spill-over.
600246678Smm				 */
601219089Spjd				hdrsize += sizeof (uint16_t);
602297101Smav				if (*index != -1)
603297101Smav					extra_hdrsize += sizeof (uint16_t);
604219089Spjd			} else {
605297101Smav				ASSERT(buftype == SA_BONUS);
606297101Smav				if (*index == -1)
607297101Smav					*index = i;
608297101Smav				*will_spill = B_TRUE;
609219089Spjd				continue;
610219089Spjd			}
611219089Spjd		}
612219089Spjd
613219089Spjd		/*
614219089Spjd		 * find index of where spill *could* occur.
615219089Spjd		 * Then continue to count of remainder attribute
616219089Spjd		 * space.  The sum is used later for sizing bonus
617219089Spjd		 * and spill buffer.
618219089Spjd		 */
619219089Spjd		if (buftype == SA_BONUS && *index == -1 &&
620226483Sdelphij		    (*total + P2ROUNDUP(hdrsize, 8)) >
621219089Spjd		    (full_space - sizeof (blkptr_t))) {
622219089Spjd			*index = i;
623219089Spjd		}
624219089Spjd
625226483Sdelphij		if ((*total + P2ROUNDUP(hdrsize, 8)) > full_space &&
626219089Spjd		    buftype == SA_BONUS)
627219089Spjd			*will_spill = B_TRUE;
628219089Spjd	}
629219089Spjd
630297101Smav	if (*will_spill)
631297101Smav		hdrsize -= extra_hdrsize;
632246678Smm
633219089Spjd	hdrsize = P2ROUNDUP(hdrsize, 8);
634219089Spjd	return (hdrsize);
635219089Spjd}
636219089Spjd
637219089Spjd#define	BUF_SPACE_NEEDED(total, header) (total + header)
638219089Spjd
639219089Spjd/*
640219089Spjd * Find layout that corresponds to ordering of attributes
641219089Spjd * If not found a new layout number is created and added to
642219089Spjd * persistent layout tables.
643219089Spjd */
644219089Spjdstatic int
645219089Spjdsa_build_layouts(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc, int attr_count,
646219089Spjd    dmu_tx_t *tx)
647219089Spjd{
648219089Spjd	sa_os_t *sa = hdl->sa_os->os_sa;
649219089Spjd	uint64_t hash;
650219089Spjd	sa_buf_type_t buftype;
651219089Spjd	sa_hdr_phys_t *sahdr;
652219089Spjd	void *data_start;
653219089Spjd	int buf_space;
654219089Spjd	sa_attr_type_t *attrs, *attrs_start;
655219089Spjd	int i, lot_count;
656247187Smm	int hdrsize;
657247187Smm	int spillhdrsize = 0;
658219089Spjd	int used;
659219089Spjd	dmu_object_type_t bonustype;
660219089Spjd	sa_lot_t *lot;
661219089Spjd	int len_idx;
662219089Spjd	int spill_used;
663219089Spjd	boolean_t spilling;
664219089Spjd
665219089Spjd	dmu_buf_will_dirty(hdl->sa_bonus, tx);
666219089Spjd	bonustype = SA_BONUSTYPE_FROM_DB(hdl->sa_bonus);
667219089Spjd
668219089Spjd	/* first determine bonus header size and sum of all attributes */
669219089Spjd	hdrsize = sa_find_sizes(sa, attr_desc, attr_count, hdl->sa_bonus,
670219089Spjd	    SA_BONUS, &i, &used, &spilling);
671219089Spjd
672276081Sdelphij	if (used > SPA_OLD_MAXBLOCKSIZE)
673249195Smm		return (SET_ERROR(EFBIG));
674219089Spjd
675219089Spjd	VERIFY(0 == dmu_set_bonus(hdl->sa_bonus, spilling ?
676219089Spjd	    MIN(DN_MAX_BONUSLEN - sizeof (blkptr_t), used + hdrsize) :
677219089Spjd	    used + hdrsize, tx));
678219089Spjd
679219089Spjd	ASSERT((bonustype == DMU_OT_ZNODE && spilling == 0) ||
680219089Spjd	    bonustype == DMU_OT_SA);
681219089Spjd
682219089Spjd	/* setup and size spill buffer when needed */
683219089Spjd	if (spilling) {
684219089Spjd		boolean_t dummy;
685219089Spjd
686219089Spjd		if (hdl->sa_spill == NULL) {
687219089Spjd			VERIFY(dmu_spill_hold_by_bonus(hdl->sa_bonus, NULL,
688219089Spjd			    &hdl->sa_spill) == 0);
689219089Spjd		}
690219089Spjd		dmu_buf_will_dirty(hdl->sa_spill, tx);
691219089Spjd
692219089Spjd		spillhdrsize = sa_find_sizes(sa, &attr_desc[i],
693219089Spjd		    attr_count - i, hdl->sa_spill, SA_SPILL, &i,
694219089Spjd		    &spill_used, &dummy);
695219089Spjd
696276081Sdelphij		if (spill_used > SPA_OLD_MAXBLOCKSIZE)
697249195Smm			return (SET_ERROR(EFBIG));
698219089Spjd
699219089Spjd		buf_space = hdl->sa_spill->db_size - spillhdrsize;
700219089Spjd		if (BUF_SPACE_NEEDED(spill_used, spillhdrsize) >
701219089Spjd		    hdl->sa_spill->db_size)
702219089Spjd			VERIFY(0 == sa_resize_spill(hdl,
703219089Spjd			    BUF_SPACE_NEEDED(spill_used, spillhdrsize), tx));
704219089Spjd	}
705219089Spjd
706219089Spjd	/* setup starting pointers to lay down data */
707219089Spjd	data_start = (void *)((uintptr_t)hdl->sa_bonus->db_data + hdrsize);
708219089Spjd	sahdr = (sa_hdr_phys_t *)hdl->sa_bonus->db_data;
709219089Spjd	buftype = SA_BONUS;
710219089Spjd
711219089Spjd	if (spilling)
712219089Spjd		buf_space = (sa->sa_force_spill) ?
713219089Spjd		    0 : SA_BLKPTR_SPACE - hdrsize;
714219089Spjd	else
715219089Spjd		buf_space = hdl->sa_bonus->db_size - hdrsize;
716219089Spjd
717219089Spjd	attrs_start = attrs = kmem_alloc(sizeof (sa_attr_type_t) * attr_count,
718219089Spjd	    KM_SLEEP);
719219089Spjd	lot_count = 0;
720219089Spjd
721219089Spjd	for (i = 0, len_idx = 0, hash = -1ULL; i != attr_count; i++) {
722219089Spjd		uint16_t length;
723219089Spjd
724246678Smm		ASSERT(IS_P2ALIGNED(data_start, 8));
725246678Smm		ASSERT(IS_P2ALIGNED(buf_space, 8));
726219089Spjd		attrs[i] = attr_desc[i].sa_attr;
727219089Spjd		length = SA_REGISTERED_LEN(sa, attrs[i]);
728219089Spjd		if (length == 0)
729219089Spjd			length = attr_desc[i].sa_length;
730240632Savg		else
731240632Savg			VERIFY(length == attr_desc[i].sa_length);
732219089Spjd
733219089Spjd		if (buf_space < length) {  /* switch to spill buffer */
734246678Smm			VERIFY(spilling);
735219089Spjd			VERIFY(bonustype == DMU_OT_SA);
736219089Spjd			if (buftype == SA_BONUS && !sa->sa_force_spill) {
737219089Spjd				sa_find_layout(hdl->sa_os, hash, attrs_start,
738219089Spjd				    lot_count, tx, &lot);
739219089Spjd				SA_SET_HDR(sahdr, lot->lot_num, hdrsize);
740219089Spjd			}
741219089Spjd
742219089Spjd			buftype = SA_SPILL;
743219089Spjd			hash = -1ULL;
744219089Spjd			len_idx = 0;
745219089Spjd
746219089Spjd			sahdr = (sa_hdr_phys_t *)hdl->sa_spill->db_data;
747219089Spjd			sahdr->sa_magic = SA_MAGIC;
748219089Spjd			data_start = (void *)((uintptr_t)sahdr +
749219089Spjd			    spillhdrsize);
750219089Spjd			attrs_start = &attrs[i];
751219089Spjd			buf_space = hdl->sa_spill->db_size - spillhdrsize;
752219089Spjd			lot_count = 0;
753219089Spjd		}
754219089Spjd		hash ^= SA_ATTR_HASH(attrs[i]);
755219089Spjd		attr_desc[i].sa_addr = data_start;
756219089Spjd		attr_desc[i].sa_size = length;
757219089Spjd		SA_COPY_DATA(attr_desc[i].sa_data_func, attr_desc[i].sa_data,
758219089Spjd		    data_start, length);
759219089Spjd		if (sa->sa_attr_table[attrs[i]].sa_length == 0) {
760219089Spjd			sahdr->sa_lengths[len_idx++] = length;
761219089Spjd		}
762240632Savg		VERIFY((uintptr_t)data_start % 8 == 0);
763219089Spjd		data_start = (void *)P2ROUNDUP(((uintptr_t)data_start +
764219089Spjd		    length), 8);
765219089Spjd		buf_space -= P2ROUNDUP(length, 8);
766219089Spjd		lot_count++;
767219089Spjd	}
768219089Spjd
769219089Spjd	sa_find_layout(hdl->sa_os, hash, attrs_start, lot_count, tx, &lot);
770219089Spjd
771219089Spjd	/*
772219089Spjd	 * Verify that old znodes always have layout number 0.
773219089Spjd	 * Must be DMU_OT_SA for arbitrary layouts
774219089Spjd	 */
775219089Spjd	VERIFY((bonustype == DMU_OT_ZNODE && lot->lot_num == 0) ||
776219089Spjd	    (bonustype == DMU_OT_SA && lot->lot_num > 1));
777219089Spjd
778219089Spjd	if (bonustype == DMU_OT_SA) {
779219089Spjd		SA_SET_HDR(sahdr, lot->lot_num,
780219089Spjd		    buftype == SA_BONUS ? hdrsize : spillhdrsize);
781219089Spjd	}
782219089Spjd
783219089Spjd	kmem_free(attrs, sizeof (sa_attr_type_t) * attr_count);
784219089Spjd	if (hdl->sa_bonus_tab) {
785219089Spjd		sa_idx_tab_rele(hdl->sa_os, hdl->sa_bonus_tab);
786219089Spjd		hdl->sa_bonus_tab = NULL;
787219089Spjd	}
788219089Spjd	if (!sa->sa_force_spill)
789219089Spjd		VERIFY(0 == sa_build_index(hdl, SA_BONUS));
790219089Spjd	if (hdl->sa_spill) {
791219089Spjd		sa_idx_tab_rele(hdl->sa_os, hdl->sa_spill_tab);
792219089Spjd		if (!spilling) {
793219089Spjd			/*
794219089Spjd			 * remove spill block that is no longer needed.
795219089Spjd			 */
796219089Spjd			dmu_buf_rele(hdl->sa_spill, NULL);
797219089Spjd			hdl->sa_spill = NULL;
798219089Spjd			hdl->sa_spill_tab = NULL;
799219089Spjd			VERIFY(0 == dmu_rm_spill(hdl->sa_os,
800219089Spjd			    sa_handle_object(hdl), tx));
801219089Spjd		} else {
802219089Spjd			VERIFY(0 == sa_build_index(hdl, SA_SPILL));
803219089Spjd		}
804219089Spjd	}
805219089Spjd
806219089Spjd	return (0);
807219089Spjd}
808219089Spjd
809219089Spjdstatic void
810219089Spjdsa_free_attr_table(sa_os_t *sa)
811219089Spjd{
812219089Spjd	int i;
813219089Spjd
814219089Spjd	if (sa->sa_attr_table == NULL)
815219089Spjd		return;
816219089Spjd
817219089Spjd	for (i = 0; i != sa->sa_num_attrs; i++) {
818219089Spjd		if (sa->sa_attr_table[i].sa_name)
819219089Spjd			kmem_free(sa->sa_attr_table[i].sa_name,
820219089Spjd			    strlen(sa->sa_attr_table[i].sa_name) + 1);
821219089Spjd	}
822219089Spjd
823219089Spjd	kmem_free(sa->sa_attr_table,
824219089Spjd	    sizeof (sa_attr_table_t) * sa->sa_num_attrs);
825219089Spjd
826219089Spjd	sa->sa_attr_table = NULL;
827219089Spjd}
828219089Spjd
829219089Spjdstatic int
830219089Spjdsa_attr_table_setup(objset_t *os, sa_attr_reg_t *reg_attrs, int count)
831219089Spjd{
832219089Spjd	sa_os_t *sa = os->os_sa;
833219089Spjd	uint64_t sa_attr_count = 0;
834247187Smm	uint64_t sa_reg_count = 0;
835219089Spjd	int error = 0;
836219089Spjd	uint64_t attr_value;
837219089Spjd	sa_attr_table_t *tb;
838219089Spjd	zap_cursor_t zc;
839219089Spjd	zap_attribute_t za;
840219089Spjd	int registered_count = 0;
841219089Spjd	int i;
842219089Spjd	dmu_objset_type_t ostype = dmu_objset_type(os);
843219089Spjd
844219089Spjd	sa->sa_user_table =
845219089Spjd	    kmem_zalloc(count * sizeof (sa_attr_type_t), KM_SLEEP);
846219089Spjd	sa->sa_user_table_sz = count * sizeof (sa_attr_type_t);
847219089Spjd
848219089Spjd	if (sa->sa_reg_attr_obj != 0) {
849219089Spjd		error = zap_count(os, sa->sa_reg_attr_obj,
850219089Spjd		    &sa_attr_count);
851219089Spjd
852219089Spjd		/*
853219089Spjd		 * Make sure we retrieved a count and that it isn't zero
854219089Spjd		 */
855219089Spjd		if (error || (error == 0 && sa_attr_count == 0)) {
856219089Spjd			if (error == 0)
857249195Smm				error = SET_ERROR(EINVAL);
858219089Spjd			goto bail;
859219089Spjd		}
860219089Spjd		sa_reg_count = sa_attr_count;
861219089Spjd	}
862219089Spjd
863219089Spjd	if (ostype == DMU_OST_ZFS && sa_attr_count == 0)
864219089Spjd		sa_attr_count += sa_legacy_attr_count;
865219089Spjd
866219089Spjd	/* Allocate attribute numbers for attributes that aren't registered */
867219089Spjd	for (i = 0; i != count; i++) {
868219089Spjd		boolean_t found = B_FALSE;
869219089Spjd		int j;
870219089Spjd
871219089Spjd		if (ostype == DMU_OST_ZFS) {
872219089Spjd			for (j = 0; j != sa_legacy_attr_count; j++) {
873219089Spjd				if (strcmp(reg_attrs[i].sa_name,
874219089Spjd				    sa_legacy_attrs[j].sa_name) == 0) {
875219089Spjd					sa->sa_user_table[i] =
876219089Spjd					    sa_legacy_attrs[j].sa_attr;
877219089Spjd					found = B_TRUE;
878219089Spjd				}
879219089Spjd			}
880219089Spjd		}
881219089Spjd		if (found)
882219089Spjd			continue;
883219089Spjd
884219089Spjd		if (sa->sa_reg_attr_obj)
885219089Spjd			error = zap_lookup(os, sa->sa_reg_attr_obj,
886219089Spjd			    reg_attrs[i].sa_name, 8, 1, &attr_value);
887219089Spjd		else
888249195Smm			error = SET_ERROR(ENOENT);
889219089Spjd		switch (error) {
890219089Spjd		case ENOENT:
891219089Spjd			sa->sa_user_table[i] = (sa_attr_type_t)sa_attr_count;
892219089Spjd			sa_attr_count++;
893219089Spjd			break;
894219089Spjd		case 0:
895219089Spjd			sa->sa_user_table[i] = ATTR_NUM(attr_value);
896219089Spjd			break;
897219089Spjd		default:
898219089Spjd			goto bail;
899219089Spjd		}
900219089Spjd	}
901219089Spjd
902219089Spjd	sa->sa_num_attrs = sa_attr_count;
903219089Spjd	tb = sa->sa_attr_table =
904219089Spjd	    kmem_zalloc(sizeof (sa_attr_table_t) * sa_attr_count, KM_SLEEP);
905219089Spjd
906219089Spjd	/*
907219089Spjd	 * Attribute table is constructed from requested attribute list,
908219089Spjd	 * previously foreign registered attributes, and also the legacy
909219089Spjd	 * ZPL set of attributes.
910219089Spjd	 */
911219089Spjd
912219089Spjd	if (sa->sa_reg_attr_obj) {
913219089Spjd		for (zap_cursor_init(&zc, os, sa->sa_reg_attr_obj);
914219089Spjd		    (error = zap_cursor_retrieve(&zc, &za)) == 0;
915219089Spjd		    zap_cursor_advance(&zc)) {
916219089Spjd			uint64_t value;
917219089Spjd			value  = za.za_first_integer;
918219089Spjd
919219089Spjd			registered_count++;
920219089Spjd			tb[ATTR_NUM(value)].sa_attr = ATTR_NUM(value);
921219089Spjd			tb[ATTR_NUM(value)].sa_length = ATTR_LENGTH(value);
922219089Spjd			tb[ATTR_NUM(value)].sa_byteswap = ATTR_BSWAP(value);
923219089Spjd			tb[ATTR_NUM(value)].sa_registered = B_TRUE;
924219089Spjd
925219089Spjd			if (tb[ATTR_NUM(value)].sa_name) {
926219089Spjd				continue;
927219089Spjd			}
928219089Spjd			tb[ATTR_NUM(value)].sa_name =
929219089Spjd			    kmem_zalloc(strlen(za.za_name) +1, KM_SLEEP);
930219089Spjd			(void) strlcpy(tb[ATTR_NUM(value)].sa_name, za.za_name,
931219089Spjd			    strlen(za.za_name) +1);
932219089Spjd		}
933219089Spjd		zap_cursor_fini(&zc);
934219089Spjd		/*
935219089Spjd		 * Make sure we processed the correct number of registered
936219089Spjd		 * attributes
937219089Spjd		 */
938219089Spjd		if (registered_count != sa_reg_count) {
939219089Spjd			ASSERT(error != 0);
940219089Spjd			goto bail;
941219089Spjd		}
942219089Spjd
943219089Spjd	}
944219089Spjd
945219089Spjd	if (ostype == DMU_OST_ZFS) {
946219089Spjd		for (i = 0; i != sa_legacy_attr_count; i++) {
947219089Spjd			if (tb[i].sa_name)
948219089Spjd				continue;
949219089Spjd			tb[i].sa_attr = sa_legacy_attrs[i].sa_attr;
950219089Spjd			tb[i].sa_length = sa_legacy_attrs[i].sa_length;
951219089Spjd			tb[i].sa_byteswap = sa_legacy_attrs[i].sa_byteswap;
952219089Spjd			tb[i].sa_registered = B_FALSE;
953219089Spjd			tb[i].sa_name =
954219089Spjd			    kmem_zalloc(strlen(sa_legacy_attrs[i].sa_name) +1,
955219089Spjd			    KM_SLEEP);
956219089Spjd			(void) strlcpy(tb[i].sa_name,
957219089Spjd			    sa_legacy_attrs[i].sa_name,
958219089Spjd			    strlen(sa_legacy_attrs[i].sa_name) + 1);
959219089Spjd		}
960219089Spjd	}
961219089Spjd
962219089Spjd	for (i = 0; i != count; i++) {
963219089Spjd		sa_attr_type_t attr_id;
964219089Spjd
965219089Spjd		attr_id = sa->sa_user_table[i];
966219089Spjd		if (tb[attr_id].sa_name)
967219089Spjd			continue;
968219089Spjd
969219089Spjd		tb[attr_id].sa_length = reg_attrs[i].sa_length;
970219089Spjd		tb[attr_id].sa_byteswap = reg_attrs[i].sa_byteswap;
971219089Spjd		tb[attr_id].sa_attr = attr_id;
972219089Spjd		tb[attr_id].sa_name =
973219089Spjd		    kmem_zalloc(strlen(reg_attrs[i].sa_name) + 1, KM_SLEEP);
974219089Spjd		(void) strlcpy(tb[attr_id].sa_name, reg_attrs[i].sa_name,
975219089Spjd		    strlen(reg_attrs[i].sa_name) + 1);
976219089Spjd	}
977219089Spjd
978219089Spjd	sa->sa_need_attr_registration =
979219089Spjd	    (sa_attr_count != registered_count);
980219089Spjd
981219089Spjd	return (0);
982219089Spjdbail:
983219089Spjd	kmem_free(sa->sa_user_table, count * sizeof (sa_attr_type_t));
984219089Spjd	sa->sa_user_table = NULL;
985219089Spjd	sa_free_attr_table(sa);
986219089Spjd	return ((error != 0) ? error : EINVAL);
987219089Spjd}
988219089Spjd
989219089Spjdint
990219089Spjdsa_setup(objset_t *os, uint64_t sa_obj, sa_attr_reg_t *reg_attrs, int count,
991219089Spjd    sa_attr_type_t **user_table)
992219089Spjd{
993219089Spjd	zap_cursor_t zc;
994219089Spjd	zap_attribute_t za;
995219089Spjd	sa_os_t *sa;
996219089Spjd	dmu_objset_type_t ostype = dmu_objset_type(os);
997219089Spjd	sa_attr_type_t *tb;
998219089Spjd	int error;
999219089Spjd
1000248571Smm	mutex_enter(&os->os_user_ptr_lock);
1001219089Spjd	if (os->os_sa) {
1002219089Spjd		mutex_enter(&os->os_sa->sa_lock);
1003248571Smm		mutex_exit(&os->os_user_ptr_lock);
1004219089Spjd		tb = os->os_sa->sa_user_table;
1005219089Spjd		mutex_exit(&os->os_sa->sa_lock);
1006219089Spjd		*user_table = tb;
1007219089Spjd		return (0);
1008219089Spjd	}
1009219089Spjd
1010219089Spjd	sa = kmem_zalloc(sizeof (sa_os_t), KM_SLEEP);
1011219089Spjd	mutex_init(&sa->sa_lock, NULL, MUTEX_DEFAULT, NULL);
1012219089Spjd	sa->sa_master_obj = sa_obj;
1013219089Spjd
1014219089Spjd	os->os_sa = sa;
1015219089Spjd	mutex_enter(&sa->sa_lock);
1016248571Smm	mutex_exit(&os->os_user_ptr_lock);
1017219089Spjd	avl_create(&sa->sa_layout_num_tree, layout_num_compare,
1018219089Spjd	    sizeof (sa_lot_t), offsetof(sa_lot_t, lot_num_node));
1019219089Spjd	avl_create(&sa->sa_layout_hash_tree, layout_hash_compare,
1020219089Spjd	    sizeof (sa_lot_t), offsetof(sa_lot_t, lot_hash_node));
1021219089Spjd
1022219089Spjd	if (sa_obj) {
1023219089Spjd		error = zap_lookup(os, sa_obj, SA_LAYOUTS,
1024219089Spjd		    8, 1, &sa->sa_layout_attr_obj);
1025219089Spjd		if (error != 0 && error != ENOENT)
1026219089Spjd			goto fail;
1027219089Spjd		error = zap_lookup(os, sa_obj, SA_REGISTRY,
1028219089Spjd		    8, 1, &sa->sa_reg_attr_obj);
1029219089Spjd		if (error != 0 && error != ENOENT)
1030219089Spjd			goto fail;
1031219089Spjd	}
1032219089Spjd
1033219089Spjd	if ((error = sa_attr_table_setup(os, reg_attrs, count)) != 0)
1034219089Spjd		goto fail;
1035219089Spjd
1036219089Spjd	if (sa->sa_layout_attr_obj != 0) {
1037219089Spjd		uint64_t layout_count;
1038219089Spjd
1039219089Spjd		error = zap_count(os, sa->sa_layout_attr_obj,
1040219089Spjd		    &layout_count);
1041219089Spjd
1042219089Spjd		/*
1043219089Spjd		 * Layout number count should be > 0
1044219089Spjd		 */
1045219089Spjd		if (error || (error == 0 && layout_count == 0)) {
1046219089Spjd			if (error == 0)
1047249195Smm				error = SET_ERROR(EINVAL);
1048219089Spjd			goto fail;
1049219089Spjd		}
1050219089Spjd
1051219089Spjd		for (zap_cursor_init(&zc, os, sa->sa_layout_attr_obj);
1052219089Spjd		    (error = zap_cursor_retrieve(&zc, &za)) == 0;
1053219089Spjd		    zap_cursor_advance(&zc)) {
1054219089Spjd			sa_attr_type_t *lot_attrs;
1055219089Spjd			uint64_t lot_num;
1056219089Spjd
1057219089Spjd			lot_attrs = kmem_zalloc(sizeof (sa_attr_type_t) *
1058219089Spjd			    za.za_num_integers, KM_SLEEP);
1059219089Spjd
1060219089Spjd			if ((error = (zap_lookup(os, sa->sa_layout_attr_obj,
1061219089Spjd			    za.za_name, 2, za.za_num_integers,
1062219089Spjd			    lot_attrs))) != 0) {
1063219089Spjd				kmem_free(lot_attrs, sizeof (sa_attr_type_t) *
1064219089Spjd				    za.za_num_integers);
1065219089Spjd				break;
1066219089Spjd			}
1067219089Spjd			VERIFY(ddi_strtoull(za.za_name, NULL, 10,
1068219089Spjd			    (unsigned long long *)&lot_num) == 0);
1069219089Spjd
1070219089Spjd			(void) sa_add_layout_entry(os, lot_attrs,
1071219089Spjd			    za.za_num_integers, lot_num,
1072219089Spjd			    sa_layout_info_hash(lot_attrs,
1073219089Spjd			    za.za_num_integers), B_FALSE, NULL);
1074219089Spjd			kmem_free(lot_attrs, sizeof (sa_attr_type_t) *
1075219089Spjd			    za.za_num_integers);
1076219089Spjd		}
1077219089Spjd		zap_cursor_fini(&zc);
1078219089Spjd
1079219089Spjd		/*
1080219089Spjd		 * Make sure layout count matches number of entries added
1081219089Spjd		 * to AVL tree
1082219089Spjd		 */
1083219089Spjd		if (avl_numnodes(&sa->sa_layout_num_tree) != layout_count) {
1084219089Spjd			ASSERT(error != 0);
1085219089Spjd			goto fail;
1086219089Spjd		}
1087219089Spjd	}
1088219089Spjd
1089219089Spjd	/* Add special layout number for old ZNODES */
1090219089Spjd	if (ostype == DMU_OST_ZFS) {
1091219089Spjd		(void) sa_add_layout_entry(os, sa_legacy_zpl_layout,
1092219089Spjd		    sa_legacy_attr_count, 0,
1093219089Spjd		    sa_layout_info_hash(sa_legacy_zpl_layout,
1094219089Spjd		    sa_legacy_attr_count), B_FALSE, NULL);
1095219089Spjd
1096219089Spjd		(void) sa_add_layout_entry(os, sa_dummy_zpl_layout, 0, 1,
1097219089Spjd		    0, B_FALSE, NULL);
1098219089Spjd	}
1099219089Spjd	*user_table = os->os_sa->sa_user_table;
1100219089Spjd	mutex_exit(&sa->sa_lock);
1101219089Spjd	return (0);
1102219089Spjdfail:
1103219089Spjd	os->os_sa = NULL;
1104219089Spjd	sa_free_attr_table(sa);
1105219089Spjd	if (sa->sa_user_table)
1106219089Spjd		kmem_free(sa->sa_user_table, sa->sa_user_table_sz);
1107219089Spjd	mutex_exit(&sa->sa_lock);
1108269218Sdelphij	avl_destroy(&sa->sa_layout_hash_tree);
1109269218Sdelphij	avl_destroy(&sa->sa_layout_num_tree);
1110269218Sdelphij	mutex_destroy(&sa->sa_lock);
1111219089Spjd	kmem_free(sa, sizeof (sa_os_t));
1112219089Spjd	return ((error == ECKSUM) ? EIO : error);
1113219089Spjd}
1114219089Spjd
1115219089Spjdvoid
1116219089Spjdsa_tear_down(objset_t *os)
1117219089Spjd{
1118219089Spjd	sa_os_t *sa = os->os_sa;
1119219089Spjd	sa_lot_t *layout;
1120219089Spjd	void *cookie;
1121219089Spjd
1122219089Spjd	kmem_free(sa->sa_user_table, sa->sa_user_table_sz);
1123219089Spjd
1124219089Spjd	/* Free up attr table */
1125219089Spjd
1126219089Spjd	sa_free_attr_table(sa);
1127219089Spjd
1128219089Spjd	cookie = NULL;
1129219089Spjd	while (layout = avl_destroy_nodes(&sa->sa_layout_hash_tree, &cookie)) {
1130219089Spjd		sa_idx_tab_t *tab;
1131219089Spjd		while (tab = list_head(&layout->lot_idx_tab)) {
1132219089Spjd			ASSERT(refcount_count(&tab->sa_refcount));
1133219089Spjd			sa_idx_tab_rele(os, tab);
1134219089Spjd		}
1135219089Spjd	}
1136219089Spjd
1137219089Spjd	cookie = NULL;
1138219089Spjd	while (layout = avl_destroy_nodes(&sa->sa_layout_num_tree, &cookie)) {
1139219089Spjd		kmem_free(layout->lot_attrs,
1140219089Spjd		    sizeof (sa_attr_type_t) * layout->lot_attr_count);
1141219089Spjd		kmem_free(layout, sizeof (sa_lot_t));
1142219089Spjd	}
1143219089Spjd
1144219089Spjd	avl_destroy(&sa->sa_layout_hash_tree);
1145219089Spjd	avl_destroy(&sa->sa_layout_num_tree);
1146269218Sdelphij	mutex_destroy(&sa->sa_lock);
1147219089Spjd
1148219089Spjd	kmem_free(sa, sizeof (sa_os_t));
1149219089Spjd	os->os_sa = NULL;
1150219089Spjd}
1151219089Spjd
1152219089Spjdvoid
1153219089Spjdsa_build_idx_tab(void *hdr, void *attr_addr, sa_attr_type_t attr,
1154219089Spjd    uint16_t length, int length_idx, boolean_t var_length, void *userp)
1155219089Spjd{
1156219089Spjd	sa_idx_tab_t *idx_tab = userp;
1157219089Spjd
1158219089Spjd	if (var_length) {
1159219089Spjd		ASSERT(idx_tab->sa_variable_lengths);
1160219089Spjd		idx_tab->sa_variable_lengths[length_idx] = length;
1161219089Spjd	}
1162219089Spjd	TOC_ATTR_ENCODE(idx_tab->sa_idx_tab[attr], length_idx,
1163219089Spjd	    (uint32_t)((uintptr_t)attr_addr - (uintptr_t)hdr));
1164219089Spjd}
1165219089Spjd
1166219089Spjdstatic void
1167219089Spjdsa_attr_iter(objset_t *os, sa_hdr_phys_t *hdr, dmu_object_type_t type,
1168219089Spjd    sa_iterfunc_t func, sa_lot_t *tab, void *userp)
1169219089Spjd{
1170219089Spjd	void *data_start;
1171219089Spjd	sa_lot_t *tb = tab;
1172219089Spjd	sa_lot_t search;
1173219089Spjd	avl_index_t loc;
1174219089Spjd	sa_os_t *sa = os->os_sa;
1175219089Spjd	int i;
1176219089Spjd	uint16_t *length_start = NULL;
1177219089Spjd	uint8_t length_idx = 0;
1178219089Spjd
1179219089Spjd	if (tab == NULL) {
1180219089Spjd		search.lot_num = SA_LAYOUT_NUM(hdr, type);
1181219089Spjd		tb = avl_find(&sa->sa_layout_num_tree, &search, &loc);
1182219089Spjd		ASSERT(tb);
1183219089Spjd	}
1184219089Spjd
1185219089Spjd	if (IS_SA_BONUSTYPE(type)) {
1186219089Spjd		data_start = (void *)P2ROUNDUP(((uintptr_t)hdr +
1187219089Spjd		    offsetof(sa_hdr_phys_t, sa_lengths) +
1188219089Spjd		    (sizeof (uint16_t) * tb->lot_var_sizes)), 8);
1189219089Spjd		length_start = hdr->sa_lengths;
1190219089Spjd	} else {
1191219089Spjd		data_start = hdr;
1192219089Spjd	}
1193219089Spjd
1194219089Spjd	for (i = 0; i != tb->lot_attr_count; i++) {
1195219089Spjd		int attr_length, reg_length;
1196219089Spjd		uint8_t idx_len;
1197219089Spjd
1198219089Spjd		reg_length = sa->sa_attr_table[tb->lot_attrs[i]].sa_length;
1199219089Spjd		if (reg_length) {
1200219089Spjd			attr_length = reg_length;
1201219089Spjd			idx_len = 0;
1202219089Spjd		} else {
1203219089Spjd			attr_length = length_start[length_idx];
1204219089Spjd			idx_len = length_idx++;
1205219089Spjd		}
1206219089Spjd
1207219089Spjd		func(hdr, data_start, tb->lot_attrs[i], attr_length,
1208219089Spjd		    idx_len, reg_length == 0 ? B_TRUE : B_FALSE, userp);
1209219089Spjd
1210219089Spjd		data_start = (void *)P2ROUNDUP(((uintptr_t)data_start +
1211219089Spjd		    attr_length), 8);
1212219089Spjd	}
1213219089Spjd}
1214219089Spjd
1215219089Spjd/*ARGSUSED*/
1216219089Spjdvoid
1217219089Spjdsa_byteswap_cb(void *hdr, void *attr_addr, sa_attr_type_t attr,
1218219089Spjd    uint16_t length, int length_idx, boolean_t variable_length, void *userp)
1219219089Spjd{
1220219089Spjd	sa_handle_t *hdl = userp;
1221219089Spjd	sa_os_t *sa = hdl->sa_os->os_sa;
1222219089Spjd
1223219089Spjd	sa_bswap_table[sa->sa_attr_table[attr].sa_byteswap](attr_addr, length);
1224219089Spjd}
1225219089Spjd
1226219089Spjdvoid
1227219089Spjdsa_byteswap(sa_handle_t *hdl, sa_buf_type_t buftype)
1228219089Spjd{
1229219089Spjd	sa_hdr_phys_t *sa_hdr_phys = SA_GET_HDR(hdl, buftype);
1230219089Spjd	dmu_buf_impl_t *db;
1231219089Spjd	sa_os_t *sa = hdl->sa_os->os_sa;
1232219089Spjd	int num_lengths = 1;
1233219089Spjd	int i;
1234219089Spjd
1235219089Spjd	ASSERT(MUTEX_HELD(&sa->sa_lock));
1236219089Spjd	if (sa_hdr_phys->sa_magic == SA_MAGIC)
1237219089Spjd		return;
1238219089Spjd
1239219089Spjd	db = SA_GET_DB(hdl, buftype);
1240219089Spjd
1241219089Spjd	if (buftype == SA_SPILL) {
1242219089Spjd		arc_release(db->db_buf, NULL);
1243219089Spjd		arc_buf_thaw(db->db_buf);
1244219089Spjd	}
1245219089Spjd
1246219089Spjd	sa_hdr_phys->sa_magic = BSWAP_32(sa_hdr_phys->sa_magic);
1247219089Spjd	sa_hdr_phys->sa_layout_info = BSWAP_16(sa_hdr_phys->sa_layout_info);
1248219089Spjd
1249219089Spjd	/*
1250219089Spjd	 * Determine number of variable lenghts in header
1251219089Spjd	 * The standard 8 byte header has one for free and a
1252219089Spjd	 * 16 byte header would have 4 + 1;
1253219089Spjd	 */
1254219089Spjd	if (SA_HDR_SIZE(sa_hdr_phys) > 8)
1255219089Spjd		num_lengths += (SA_HDR_SIZE(sa_hdr_phys) - 8) >> 1;
1256219089Spjd	for (i = 0; i != num_lengths; i++)
1257219089Spjd		sa_hdr_phys->sa_lengths[i] =
1258219089Spjd		    BSWAP_16(sa_hdr_phys->sa_lengths[i]);
1259219089Spjd
1260219089Spjd	sa_attr_iter(hdl->sa_os, sa_hdr_phys, DMU_OT_SA,
1261219089Spjd	    sa_byteswap_cb, NULL, hdl);
1262219089Spjd
1263219089Spjd	if (buftype == SA_SPILL)
1264219089Spjd		arc_buf_freeze(((dmu_buf_impl_t *)hdl->sa_spill)->db_buf);
1265219089Spjd}
1266219089Spjd
1267219089Spjdstatic int
1268219089Spjdsa_build_index(sa_handle_t *hdl, sa_buf_type_t buftype)
1269219089Spjd{
1270219089Spjd	sa_hdr_phys_t *sa_hdr_phys;
1271219089Spjd	dmu_buf_impl_t *db = SA_GET_DB(hdl, buftype);
1272219089Spjd	dmu_object_type_t bonustype = SA_BONUSTYPE_FROM_DB(db);
1273219089Spjd	sa_os_t *sa = hdl->sa_os->os_sa;
1274219089Spjd	sa_idx_tab_t *idx_tab;
1275219089Spjd
1276219089Spjd	sa_hdr_phys = SA_GET_HDR(hdl, buftype);
1277219089Spjd
1278219089Spjd	mutex_enter(&sa->sa_lock);
1279219089Spjd
1280219089Spjd	/* Do we need to byteswap? */
1281219089Spjd
1282219089Spjd	/* only check if not old znode */
1283219089Spjd	if (IS_SA_BONUSTYPE(bonustype) && sa_hdr_phys->sa_magic != SA_MAGIC &&
1284219089Spjd	    sa_hdr_phys->sa_magic != 0) {
1285219089Spjd		VERIFY(BSWAP_32(sa_hdr_phys->sa_magic) == SA_MAGIC);
1286219089Spjd		sa_byteswap(hdl, buftype);
1287219089Spjd	}
1288219089Spjd
1289219089Spjd	idx_tab = sa_find_idx_tab(hdl->sa_os, bonustype, sa_hdr_phys);
1290219089Spjd
1291219089Spjd	if (buftype == SA_BONUS)
1292219089Spjd		hdl->sa_bonus_tab = idx_tab;
1293219089Spjd	else
1294219089Spjd		hdl->sa_spill_tab = idx_tab;
1295219089Spjd
1296219089Spjd	mutex_exit(&sa->sa_lock);
1297219089Spjd	return (0);
1298219089Spjd}
1299219089Spjd
1300219089Spjd/*ARGSUSED*/
1301288549Smavstatic void
1302288549Smavsa_evict(void *dbu)
1303219089Spjd{
1304288549Smav	panic("evicting sa dbuf\n");
1305219089Spjd}
1306219089Spjd
1307219089Spjdstatic void
1308219089Spjdsa_idx_tab_rele(objset_t *os, void *arg)
1309219089Spjd{
1310219089Spjd	sa_os_t *sa = os->os_sa;
1311219089Spjd	sa_idx_tab_t *idx_tab = arg;
1312219089Spjd
1313219089Spjd	if (idx_tab == NULL)
1314219089Spjd		return;
1315219089Spjd
1316219089Spjd	mutex_enter(&sa->sa_lock);
1317219089Spjd	if (refcount_remove(&idx_tab->sa_refcount, NULL) == 0) {
1318219089Spjd		list_remove(&idx_tab->sa_layout->lot_idx_tab, idx_tab);
1319219089Spjd		if (idx_tab->sa_variable_lengths)
1320219089Spjd			kmem_free(idx_tab->sa_variable_lengths,
1321219089Spjd			    sizeof (uint16_t) *
1322219089Spjd			    idx_tab->sa_layout->lot_var_sizes);
1323219089Spjd		refcount_destroy(&idx_tab->sa_refcount);
1324219089Spjd		kmem_free(idx_tab->sa_idx_tab,
1325219089Spjd		    sizeof (uint32_t) * sa->sa_num_attrs);
1326219089Spjd		kmem_free(idx_tab, sizeof (sa_idx_tab_t));
1327219089Spjd	}
1328219089Spjd	mutex_exit(&sa->sa_lock);
1329219089Spjd}
1330219089Spjd
1331219089Spjdstatic void
1332219089Spjdsa_idx_tab_hold(objset_t *os, sa_idx_tab_t *idx_tab)
1333219089Spjd{
1334219089Spjd	sa_os_t *sa = os->os_sa;
1335219089Spjd
1336219089Spjd	ASSERT(MUTEX_HELD(&sa->sa_lock));
1337219089Spjd	(void) refcount_add(&idx_tab->sa_refcount, NULL);
1338219089Spjd}
1339219089Spjd
1340219089Spjdvoid
1341219089Spjdsa_handle_destroy(sa_handle_t *hdl)
1342219089Spjd{
1343288549Smav	dmu_buf_t *db = hdl->sa_bonus;
1344288549Smav
1345219089Spjd	mutex_enter(&hdl->sa_lock);
1346288549Smav	(void) dmu_buf_remove_user(db, &hdl->sa_dbu);
1347219089Spjd
1348288537Smav	if (hdl->sa_bonus_tab)
1349219089Spjd		sa_idx_tab_rele(hdl->sa_os, hdl->sa_bonus_tab);
1350288537Smav
1351288537Smav	if (hdl->sa_spill_tab)
1352219089Spjd		sa_idx_tab_rele(hdl->sa_os, hdl->sa_spill_tab);
1353219089Spjd
1354219089Spjd	dmu_buf_rele(hdl->sa_bonus, NULL);
1355219089Spjd
1356219089Spjd	if (hdl->sa_spill)
1357219089Spjd		dmu_buf_rele((dmu_buf_t *)hdl->sa_spill, NULL);
1358219089Spjd	mutex_exit(&hdl->sa_lock);
1359219089Spjd
1360219089Spjd	kmem_cache_free(sa_cache, hdl);
1361219089Spjd}
1362219089Spjd
1363219089Spjdint
1364219089Spjdsa_handle_get_from_db(objset_t *os, dmu_buf_t *db, void *userp,
1365219089Spjd    sa_handle_type_t hdl_type, sa_handle_t **handlepp)
1366219089Spjd{
1367219089Spjd	int error = 0;
1368219089Spjd	dmu_object_info_t doi;
1369288549Smav	sa_handle_t *handle = NULL;
1370219089Spjd
1371219089Spjd#ifdef ZFS_DEBUG
1372219089Spjd	dmu_object_info_from_db(db, &doi);
1373219089Spjd	ASSERT(doi.doi_bonus_type == DMU_OT_SA ||
1374219089Spjd	    doi.doi_bonus_type == DMU_OT_ZNODE);
1375219089Spjd#endif
1376219089Spjd	/* find handle, if it exists */
1377219089Spjd	/* if one doesn't exist then create a new one, and initialize it */
1378219089Spjd
1379288549Smav	if (hdl_type == SA_HDL_SHARED)
1380288549Smav		handle = dmu_buf_get_user(db);
1381288549Smav
1382219089Spjd	if (handle == NULL) {
1383288549Smav		sa_handle_t *winner = NULL;
1384288549Smav
1385219089Spjd		handle = kmem_cache_alloc(sa_cache, KM_SLEEP);
1386288589Smav		handle->sa_dbu.dbu_evict_func = NULL;
1387219089Spjd		handle->sa_userp = userp;
1388219089Spjd		handle->sa_bonus = db;
1389219089Spjd		handle->sa_os = os;
1390219089Spjd		handle->sa_spill = NULL;
1391288537Smav		handle->sa_bonus_tab = NULL;
1392288537Smav		handle->sa_spill_tab = NULL;
1393219089Spjd
1394219089Spjd		error = sa_build_index(handle, SA_BONUS);
1395219089Spjd
1396288549Smav		if (hdl_type == SA_HDL_SHARED) {
1397288549Smav			dmu_buf_init_user(&handle->sa_dbu, sa_evict, NULL);
1398288549Smav			winner = dmu_buf_set_user_ie(db, &handle->sa_dbu);
1399288549Smav		}
1400288549Smav
1401288549Smav		if (winner != NULL) {
1402219089Spjd			kmem_cache_free(sa_cache, handle);
1403288549Smav			handle = winner;
1404219089Spjd		}
1405219089Spjd	}
1406219089Spjd	*handlepp = handle;
1407219089Spjd
1408219089Spjd	return (error);
1409219089Spjd}
1410219089Spjd
1411219089Spjdint
1412219089Spjdsa_handle_get(objset_t *objset, uint64_t objid, void *userp,
1413219089Spjd    sa_handle_type_t hdl_type, sa_handle_t **handlepp)
1414219089Spjd{
1415219089Spjd	dmu_buf_t *db;
1416219089Spjd	int error;
1417219089Spjd
1418219089Spjd	if (error = dmu_bonus_hold(objset, objid, NULL, &db))
1419219089Spjd		return (error);
1420219089Spjd
1421219089Spjd	return (sa_handle_get_from_db(objset, db, userp, hdl_type,
1422219089Spjd	    handlepp));
1423219089Spjd}
1424219089Spjd
1425219089Spjdint
1426219089Spjdsa_buf_hold(objset_t *objset, uint64_t obj_num, void *tag, dmu_buf_t **db)
1427219089Spjd{
1428219089Spjd	return (dmu_bonus_hold(objset, obj_num, tag, db));
1429219089Spjd}
1430219089Spjd
1431219089Spjdvoid
1432219089Spjdsa_buf_rele(dmu_buf_t *db, void *tag)
1433219089Spjd{
1434219089Spjd	dmu_buf_rele(db, tag);
1435219089Spjd}
1436219089Spjd
1437219089Spjdint
1438219089Spjdsa_lookup_impl(sa_handle_t *hdl, sa_bulk_attr_t *bulk, int count)
1439219089Spjd{
1440219089Spjd	ASSERT(hdl);
1441219089Spjd	ASSERT(MUTEX_HELD(&hdl->sa_lock));
1442219089Spjd	return (sa_attr_op(hdl, bulk, count, SA_LOOKUP, NULL));
1443219089Spjd}
1444219089Spjd
1445219089Spjdint
1446219089Spjdsa_lookup(sa_handle_t *hdl, sa_attr_type_t attr, void *buf, uint32_t buflen)
1447219089Spjd{
1448219089Spjd	int error;
1449219089Spjd	sa_bulk_attr_t bulk;
1450219089Spjd
1451219089Spjd	bulk.sa_attr = attr;
1452219089Spjd	bulk.sa_data = buf;
1453219089Spjd	bulk.sa_length = buflen;
1454219089Spjd	bulk.sa_data_func = NULL;
1455219089Spjd
1456219089Spjd	ASSERT(hdl);
1457219089Spjd	mutex_enter(&hdl->sa_lock);
1458219089Spjd	error = sa_lookup_impl(hdl, &bulk, 1);
1459219089Spjd	mutex_exit(&hdl->sa_lock);
1460219089Spjd	return (error);
1461219089Spjd}
1462219089Spjd
1463219089Spjd#ifdef _KERNEL
1464219089Spjdint
1465219089Spjdsa_lookup_uio(sa_handle_t *hdl, sa_attr_type_t attr, uio_t *uio)
1466219089Spjd{
1467219089Spjd	int error;
1468219089Spjd	sa_bulk_attr_t bulk;
1469219089Spjd
1470219089Spjd	bulk.sa_data = NULL;
1471219089Spjd	bulk.sa_attr = attr;
1472219089Spjd	bulk.sa_data_func = NULL;
1473219089Spjd
1474219089Spjd	ASSERT(hdl);
1475219089Spjd
1476219089Spjd	mutex_enter(&hdl->sa_lock);
1477219089Spjd	if ((error = sa_attr_op(hdl, &bulk, 1, SA_LOOKUP, NULL)) == 0) {
1478219089Spjd		error = uiomove((void *)bulk.sa_addr, MIN(bulk.sa_size,
1479219089Spjd		    uio->uio_resid), UIO_READ, uio);
1480219089Spjd	}
1481219089Spjd	mutex_exit(&hdl->sa_lock);
1482219089Spjd	return (error);
1483219089Spjd
1484219089Spjd}
1485219089Spjd#endif
1486219089Spjd
1487219089Spjdvoid *
1488219089Spjdsa_find_idx_tab(objset_t *os, dmu_object_type_t bonustype, void *data)
1489219089Spjd{
1490219089Spjd	sa_idx_tab_t *idx_tab;
1491219089Spjd	sa_hdr_phys_t *hdr = (sa_hdr_phys_t *)data;
1492219089Spjd	sa_os_t *sa = os->os_sa;
1493219089Spjd	sa_lot_t *tb, search;
1494219089Spjd	avl_index_t loc;
1495219089Spjd
1496219089Spjd	/*
1497219089Spjd	 * Deterimine layout number.  If SA node and header == 0 then
1498219089Spjd	 * force the index table to the dummy "1" empty layout.
1499219089Spjd	 *
1500219089Spjd	 * The layout number would only be zero for a newly created file
1501219089Spjd	 * that has not added any attributes yet, or with crypto enabled which
1502219089Spjd	 * doesn't write any attributes to the bonus buffer.
1503219089Spjd	 */
1504219089Spjd
1505219089Spjd	search.lot_num = SA_LAYOUT_NUM(hdr, bonustype);
1506219089Spjd
1507219089Spjd	tb = avl_find(&sa->sa_layout_num_tree, &search, &loc);
1508219089Spjd
1509219089Spjd	/* Verify header size is consistent with layout information */
1510219089Spjd	ASSERT(tb);
1511219089Spjd	ASSERT(IS_SA_BONUSTYPE(bonustype) &&
1512219089Spjd	    SA_HDR_SIZE_MATCH_LAYOUT(hdr, tb) || !IS_SA_BONUSTYPE(bonustype) ||
1513219089Spjd	    (IS_SA_BONUSTYPE(bonustype) && hdr->sa_layout_info == 0));
1514219089Spjd
1515219089Spjd	/*
1516219089Spjd	 * See if any of the already existing TOC entries can be reused?
1517219089Spjd	 */
1518219089Spjd
1519219089Spjd	for (idx_tab = list_head(&tb->lot_idx_tab); idx_tab;
1520219089Spjd	    idx_tab = list_next(&tb->lot_idx_tab, idx_tab)) {
1521219089Spjd		boolean_t valid_idx = B_TRUE;
1522219089Spjd		int i;
1523219089Spjd
1524219089Spjd		if (tb->lot_var_sizes != 0 &&
1525219089Spjd		    idx_tab->sa_variable_lengths != NULL) {
1526219089Spjd			for (i = 0; i != tb->lot_var_sizes; i++) {
1527219089Spjd				if (hdr->sa_lengths[i] !=
1528219089Spjd				    idx_tab->sa_variable_lengths[i]) {
1529219089Spjd					valid_idx = B_FALSE;
1530219089Spjd					break;
1531219089Spjd				}
1532219089Spjd			}
1533219089Spjd		}
1534219089Spjd		if (valid_idx) {
1535219089Spjd			sa_idx_tab_hold(os, idx_tab);
1536219089Spjd			return (idx_tab);
1537219089Spjd		}
1538219089Spjd	}
1539219089Spjd
1540219089Spjd	/* No such luck, create a new entry */
1541219089Spjd	idx_tab = kmem_zalloc(sizeof (sa_idx_tab_t), KM_SLEEP);
1542219089Spjd	idx_tab->sa_idx_tab =
1543219089Spjd	    kmem_zalloc(sizeof (uint32_t) * sa->sa_num_attrs, KM_SLEEP);
1544219089Spjd	idx_tab->sa_layout = tb;
1545219089Spjd	refcount_create(&idx_tab->sa_refcount);
1546219089Spjd	if (tb->lot_var_sizes)
1547219089Spjd		idx_tab->sa_variable_lengths = kmem_alloc(sizeof (uint16_t) *
1548219089Spjd		    tb->lot_var_sizes, KM_SLEEP);
1549219089Spjd
1550219089Spjd	sa_attr_iter(os, hdr, bonustype, sa_build_idx_tab,
1551219089Spjd	    tb, idx_tab);
1552219089Spjd	sa_idx_tab_hold(os, idx_tab);   /* one hold for consumer */
1553219089Spjd	sa_idx_tab_hold(os, idx_tab);	/* one for layout */
1554219089Spjd	list_insert_tail(&tb->lot_idx_tab, idx_tab);
1555219089Spjd	return (idx_tab);
1556219089Spjd}
1557219089Spjd
1558219089Spjdvoid
1559219089Spjdsa_default_locator(void **dataptr, uint32_t *len, uint32_t total_len,
1560219089Spjd    boolean_t start, void *userdata)
1561219089Spjd{
1562219089Spjd	ASSERT(start);
1563219089Spjd
1564219089Spjd	*dataptr = userdata;
1565219089Spjd	*len = total_len;
1566219089Spjd}
1567219089Spjd
1568219089Spjdstatic void
1569219089Spjdsa_attr_register_sync(sa_handle_t *hdl, dmu_tx_t *tx)
1570219089Spjd{
1571219089Spjd	uint64_t attr_value = 0;
1572219089Spjd	sa_os_t *sa = hdl->sa_os->os_sa;
1573219089Spjd	sa_attr_table_t *tb = sa->sa_attr_table;
1574219089Spjd	int i;
1575219089Spjd
1576219089Spjd	mutex_enter(&sa->sa_lock);
1577219089Spjd
1578219089Spjd	if (!sa->sa_need_attr_registration || sa->sa_master_obj == 0) {
1579219089Spjd		mutex_exit(&sa->sa_lock);
1580219089Spjd		return;
1581219089Spjd	}
1582219089Spjd
1583219089Spjd	if (sa->sa_reg_attr_obj == 0) {
1584236884Smm		sa->sa_reg_attr_obj = zap_create_link(hdl->sa_os,
1585236884Smm		    DMU_OT_SA_ATTR_REGISTRATION,
1586236884Smm		    sa->sa_master_obj, SA_REGISTRY, tx);
1587219089Spjd	}
1588219089Spjd	for (i = 0; i != sa->sa_num_attrs; i++) {
1589219089Spjd		if (sa->sa_attr_table[i].sa_registered)
1590219089Spjd			continue;
1591219089Spjd		ATTR_ENCODE(attr_value, tb[i].sa_attr, tb[i].sa_length,
1592219089Spjd		    tb[i].sa_byteswap);
1593219089Spjd		VERIFY(0 == zap_update(hdl->sa_os, sa->sa_reg_attr_obj,
1594219089Spjd		    tb[i].sa_name, 8, 1, &attr_value, tx));
1595219089Spjd		tb[i].sa_registered = B_TRUE;
1596219089Spjd	}
1597219089Spjd	sa->sa_need_attr_registration = B_FALSE;
1598219089Spjd	mutex_exit(&sa->sa_lock);
1599219089Spjd}
1600219089Spjd
1601219089Spjd/*
1602219089Spjd * Replace all attributes with attributes specified in template.
1603219089Spjd * If dnode had a spill buffer then those attributes will be
1604219089Spjd * also be replaced, possibly with just an empty spill block
1605219089Spjd *
1606219089Spjd * This interface is intended to only be used for bulk adding of
1607219089Spjd * attributes for a new file.  It will also be used by the ZPL
1608219089Spjd * when converting and old formatted znode to native SA support.
1609219089Spjd */
1610219089Spjdint
1611219089Spjdsa_replace_all_by_template_locked(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc,
1612219089Spjd    int attr_count, dmu_tx_t *tx)
1613219089Spjd{
1614219089Spjd	sa_os_t *sa = hdl->sa_os->os_sa;
1615219089Spjd
1616219089Spjd	if (sa->sa_need_attr_registration)
1617219089Spjd		sa_attr_register_sync(hdl, tx);
1618219089Spjd	return (sa_build_layouts(hdl, attr_desc, attr_count, tx));
1619219089Spjd}
1620219089Spjd
1621219089Spjdint
1622219089Spjdsa_replace_all_by_template(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc,
1623219089Spjd    int attr_count, dmu_tx_t *tx)
1624219089Spjd{
1625219089Spjd	int error;
1626219089Spjd
1627219089Spjd	mutex_enter(&hdl->sa_lock);
1628219089Spjd	error = sa_replace_all_by_template_locked(hdl, attr_desc,
1629219089Spjd	    attr_count, tx);
1630219089Spjd	mutex_exit(&hdl->sa_lock);
1631219089Spjd	return (error);
1632219089Spjd}
1633219089Spjd
1634219089Spjd/*
1635240345Savg * Add/remove a single attribute or replace a variable-sized attribute value
1636240345Savg * with a value of a different size, and then rewrite the entire set
1637219089Spjd * of attributes.
1638240345Savg * Same-length attribute value replacement (including fixed-length attributes)
1639240345Savg * is handled more efficiently by the upper layers.
1640219089Spjd */
1641219089Spjdstatic int
1642219089Spjdsa_modify_attrs(sa_handle_t *hdl, sa_attr_type_t newattr,
1643219089Spjd    sa_data_op_t action, sa_data_locator_t *locator, void *datastart,
1644219089Spjd    uint16_t buflen, dmu_tx_t *tx)
1645219089Spjd{
1646219089Spjd	sa_os_t *sa = hdl->sa_os->os_sa;
1647219089Spjd	dmu_buf_impl_t *db = (dmu_buf_impl_t *)hdl->sa_bonus;
1648219089Spjd	dnode_t *dn;
1649219089Spjd	sa_bulk_attr_t *attr_desc;
1650219089Spjd	void *old_data[2];
1651219089Spjd	int bonus_attr_count = 0;
1652247187Smm	int bonus_data_size = 0;
1653247187Smm	int spill_data_size = 0;
1654219089Spjd	int spill_attr_count = 0;
1655219089Spjd	int error;
1656297106Smav	uint16_t length, reg_length;
1657219089Spjd	int i, j, k, length_idx;
1658219089Spjd	sa_hdr_phys_t *hdr;
1659219089Spjd	sa_idx_tab_t *idx_tab;
1660219089Spjd	int attr_count;
1661219089Spjd	int count;
1662219089Spjd
1663219089Spjd	ASSERT(MUTEX_HELD(&hdl->sa_lock));
1664219089Spjd
1665219089Spjd	/* First make of copy of the old data */
1666219089Spjd
1667219089Spjd	DB_DNODE_ENTER(db);
1668219089Spjd	dn = DB_DNODE(db);
1669219089Spjd	if (dn->dn_bonuslen != 0) {
1670219089Spjd		bonus_data_size = hdl->sa_bonus->db_size;
1671219089Spjd		old_data[0] = kmem_alloc(bonus_data_size, KM_SLEEP);
1672219089Spjd		bcopy(hdl->sa_bonus->db_data, old_data[0],
1673219089Spjd		    hdl->sa_bonus->db_size);
1674219089Spjd		bonus_attr_count = hdl->sa_bonus_tab->sa_layout->lot_attr_count;
1675219089Spjd	} else {
1676219089Spjd		old_data[0] = NULL;
1677219089Spjd	}
1678219089Spjd	DB_DNODE_EXIT(db);
1679219089Spjd
1680219089Spjd	/* Bring spill buffer online if it isn't currently */
1681219089Spjd
1682219089Spjd	if ((error = sa_get_spill(hdl)) == 0) {
1683219089Spjd		spill_data_size = hdl->sa_spill->db_size;
1684219089Spjd		old_data[1] = kmem_alloc(spill_data_size, KM_SLEEP);
1685219089Spjd		bcopy(hdl->sa_spill->db_data, old_data[1],
1686219089Spjd		    hdl->sa_spill->db_size);
1687219089Spjd		spill_attr_count =
1688219089Spjd		    hdl->sa_spill_tab->sa_layout->lot_attr_count;
1689219089Spjd	} else if (error && error != ENOENT) {
1690219089Spjd		if (old_data[0])
1691219089Spjd			kmem_free(old_data[0], bonus_data_size);
1692219089Spjd		return (error);
1693219089Spjd	} else {
1694219089Spjd		old_data[1] = NULL;
1695219089Spjd	}
1696219089Spjd
1697219089Spjd	/* build descriptor of all attributes */
1698219089Spjd
1699219089Spjd	attr_count = bonus_attr_count + spill_attr_count;
1700219089Spjd	if (action == SA_ADD)
1701219089Spjd		attr_count++;
1702219089Spjd	else if (action == SA_REMOVE)
1703219089Spjd		attr_count--;
1704219089Spjd
1705219089Spjd	attr_desc = kmem_zalloc(sizeof (sa_bulk_attr_t) * attr_count, KM_SLEEP);
1706219089Spjd
1707219089Spjd	/*
1708219089Spjd	 * loop through bonus and spill buffer if it exists, and
1709219089Spjd	 * build up new attr_descriptor to reset the attributes
1710219089Spjd	 */
1711219089Spjd	k = j = 0;
1712219089Spjd	count = bonus_attr_count;
1713219089Spjd	hdr = SA_GET_HDR(hdl, SA_BONUS);
1714219089Spjd	idx_tab = SA_IDX_TAB_GET(hdl, SA_BONUS);
1715219089Spjd	for (; k != 2; k++) {
1716297106Smav		/*
1717297106Smav		 * Iterate over each attribute in layout.  Fetch the
1718297106Smav		 * size of variable-length attributes needing rewrite
1719297106Smav		 * from sa_lengths[].
1720297106Smav		 */
1721219089Spjd		for (i = 0, length_idx = 0; i != count; i++) {
1722219089Spjd			sa_attr_type_t attr;
1723219089Spjd
1724219089Spjd			attr = idx_tab->sa_layout->lot_attrs[i];
1725297106Smav			reg_length = SA_REGISTERED_LEN(sa, attr);
1726297106Smav			if (reg_length == 0) {
1727297106Smav				length = hdr->sa_lengths[length_idx];
1728297106Smav				length_idx++;
1729297106Smav			} else {
1730297106Smav				length = reg_length;
1731297106Smav			}
1732219089Spjd			if (attr == newattr) {
1733297106Smav				/*
1734297106Smav				 * There is nothing to do for SA_REMOVE,
1735297106Smav				 * so it is just skipped.
1736297106Smav				 */
1737297106Smav				if (action == SA_REMOVE)
1738297106Smav					continue;
1739219089Spjd
1740297106Smav				/*
1741297106Smav				 * Duplicate attributes are not allowed, so the
1742297106Smav				 * action can not be SA_ADD here.
1743297106Smav				 */
1744297106Smav				ASSERT3S(action, ==, SA_REPLACE);
1745297106Smav
1746297106Smav				/*
1747297106Smav				 * Only a variable-sized attribute can be
1748297106Smav				 * replaced here, and its size must be changing.
1749297106Smav				 */
1750297106Smav				ASSERT3U(reg_length, ==, 0);
1751297106Smav				ASSERT3U(length, !=, buflen);
1752219089Spjd				SA_ADD_BULK_ATTR(attr_desc, j, attr,
1753297106Smav				    locator, datastart, buflen);
1754297106Smav			} else {
1755297106Smav				SA_ADD_BULK_ATTR(attr_desc, j, attr,
1756219089Spjd				    NULL, (void *)
1757219089Spjd				    (TOC_OFF(idx_tab->sa_idx_tab[attr]) +
1758219089Spjd				    (uintptr_t)old_data[k]), length);
1759219089Spjd			}
1760219089Spjd		}
1761219089Spjd		if (k == 0 && hdl->sa_spill) {
1762219089Spjd			hdr = SA_GET_HDR(hdl, SA_SPILL);
1763219089Spjd			idx_tab = SA_IDX_TAB_GET(hdl, SA_SPILL);
1764219089Spjd			count = spill_attr_count;
1765219089Spjd		} else {
1766219089Spjd			break;
1767219089Spjd		}
1768219089Spjd	}
1769219089Spjd	if (action == SA_ADD) {
1770297106Smav		reg_length = SA_REGISTERED_LEN(sa, newattr);
1771297106Smav		IMPLY(reg_length != 0, reg_length == buflen);
1772219089Spjd		SA_ADD_BULK_ATTR(attr_desc, j, newattr, locator,
1773219089Spjd		    datastart, buflen);
1774219089Spjd	}
1775240345Savg	ASSERT3U(j, ==, attr_count);
1776219089Spjd
1777219089Spjd	error = sa_build_layouts(hdl, attr_desc, attr_count, tx);
1778219089Spjd
1779219089Spjd	if (old_data[0])
1780219089Spjd		kmem_free(old_data[0], bonus_data_size);
1781219089Spjd	if (old_data[1])
1782219089Spjd		kmem_free(old_data[1], spill_data_size);
1783219089Spjd	kmem_free(attr_desc, sizeof (sa_bulk_attr_t) * attr_count);
1784219089Spjd
1785219089Spjd	return (error);
1786219089Spjd}
1787219089Spjd
1788219089Spjdstatic int
1789219089Spjdsa_bulk_update_impl(sa_handle_t *hdl, sa_bulk_attr_t *bulk, int count,
1790219089Spjd    dmu_tx_t *tx)
1791219089Spjd{
1792219089Spjd	int error;
1793219089Spjd	sa_os_t *sa = hdl->sa_os->os_sa;
1794219089Spjd	dmu_object_type_t bonustype;
1795219089Spjd
1796219089Spjd	bonustype = SA_BONUSTYPE_FROM_DB(SA_GET_DB(hdl, SA_BONUS));
1797219089Spjd
1798219089Spjd	ASSERT(hdl);
1799219089Spjd	ASSERT(MUTEX_HELD(&hdl->sa_lock));
1800219089Spjd
1801219089Spjd	/* sync out registration table if necessary */
1802219089Spjd	if (sa->sa_need_attr_registration)
1803219089Spjd		sa_attr_register_sync(hdl, tx);
1804219089Spjd
1805219089Spjd	error = sa_attr_op(hdl, bulk, count, SA_UPDATE, tx);
1806219089Spjd	if (error == 0 && !IS_SA_BONUSTYPE(bonustype) && sa->sa_update_cb)
1807219089Spjd		sa->sa_update_cb(hdl, tx);
1808219089Spjd
1809219089Spjd	return (error);
1810219089Spjd}
1811219089Spjd
1812219089Spjd/*
1813219089Spjd * update or add new attribute
1814219089Spjd */
1815219089Spjdint
1816219089Spjdsa_update(sa_handle_t *hdl, sa_attr_type_t type,
1817219089Spjd    void *buf, uint32_t buflen, dmu_tx_t *tx)
1818219089Spjd{
1819219089Spjd	int error;
1820219089Spjd	sa_bulk_attr_t bulk;
1821219089Spjd
1822219089Spjd	bulk.sa_attr = type;
1823219089Spjd	bulk.sa_data_func = NULL;
1824219089Spjd	bulk.sa_length = buflen;
1825219089Spjd	bulk.sa_data = buf;
1826219089Spjd
1827219089Spjd	mutex_enter(&hdl->sa_lock);
1828219089Spjd	error = sa_bulk_update_impl(hdl, &bulk, 1, tx);
1829219089Spjd	mutex_exit(&hdl->sa_lock);
1830219089Spjd	return (error);
1831219089Spjd}
1832219089Spjd
1833219089Spjdint
1834219089Spjdsa_update_from_cb(sa_handle_t *hdl, sa_attr_type_t attr,
1835219089Spjd    uint32_t buflen, sa_data_locator_t *locator, void *userdata, dmu_tx_t *tx)
1836219089Spjd{
1837219089Spjd	int error;
1838219089Spjd	sa_bulk_attr_t bulk;
1839219089Spjd
1840219089Spjd	bulk.sa_attr = attr;
1841219089Spjd	bulk.sa_data = userdata;
1842219089Spjd	bulk.sa_data_func = locator;
1843219089Spjd	bulk.sa_length = buflen;
1844219089Spjd
1845219089Spjd	mutex_enter(&hdl->sa_lock);
1846219089Spjd	error = sa_bulk_update_impl(hdl, &bulk, 1, tx);
1847219089Spjd	mutex_exit(&hdl->sa_lock);
1848219089Spjd	return (error);
1849219089Spjd}
1850219089Spjd
1851219089Spjd/*
1852219089Spjd * Return size of an attribute
1853219089Spjd */
1854219089Spjd
1855219089Spjdint
1856219089Spjdsa_size(sa_handle_t *hdl, sa_attr_type_t attr, int *size)
1857219089Spjd{
1858219089Spjd	sa_bulk_attr_t bulk;
1859219089Spjd	int error;
1860219089Spjd
1861219089Spjd	bulk.sa_data = NULL;
1862219089Spjd	bulk.sa_attr = attr;
1863219089Spjd	bulk.sa_data_func = NULL;
1864219089Spjd
1865219089Spjd	ASSERT(hdl);
1866219089Spjd	mutex_enter(&hdl->sa_lock);
1867219089Spjd	if ((error = sa_attr_op(hdl, &bulk, 1, SA_LOOKUP, NULL)) != 0) {
1868219089Spjd		mutex_exit(&hdl->sa_lock);
1869219089Spjd		return (error);
1870219089Spjd	}
1871219089Spjd	*size = bulk.sa_size;
1872219089Spjd
1873219089Spjd	mutex_exit(&hdl->sa_lock);
1874219089Spjd	return (0);
1875219089Spjd}
1876219089Spjd
1877219089Spjdint
1878219089Spjdsa_bulk_lookup_locked(sa_handle_t *hdl, sa_bulk_attr_t *attrs, int count)
1879219089Spjd{
1880219089Spjd	ASSERT(hdl);
1881219089Spjd	ASSERT(MUTEX_HELD(&hdl->sa_lock));
1882219089Spjd	return (sa_lookup_impl(hdl, attrs, count));
1883219089Spjd}
1884219089Spjd
1885219089Spjdint
1886219089Spjdsa_bulk_lookup(sa_handle_t *hdl, sa_bulk_attr_t *attrs, int count)
1887219089Spjd{
1888219089Spjd	int error;
1889219089Spjd
1890219089Spjd	ASSERT(hdl);
1891219089Spjd	mutex_enter(&hdl->sa_lock);
1892219089Spjd	error = sa_bulk_lookup_locked(hdl, attrs, count);
1893219089Spjd	mutex_exit(&hdl->sa_lock);
1894219089Spjd	return (error);
1895219089Spjd}
1896219089Spjd
1897219089Spjdint
1898219089Spjdsa_bulk_update(sa_handle_t *hdl, sa_bulk_attr_t *attrs, int count, dmu_tx_t *tx)
1899219089Spjd{
1900219089Spjd	int error;
1901219089Spjd
1902219089Spjd	ASSERT(hdl);
1903219089Spjd	mutex_enter(&hdl->sa_lock);
1904219089Spjd	error = sa_bulk_update_impl(hdl, attrs, count, tx);
1905219089Spjd	mutex_exit(&hdl->sa_lock);
1906219089Spjd	return (error);
1907219089Spjd}
1908219089Spjd
1909219089Spjdint
1910219089Spjdsa_remove(sa_handle_t *hdl, sa_attr_type_t attr, dmu_tx_t *tx)
1911219089Spjd{
1912219089Spjd	int error;
1913219089Spjd
1914219089Spjd	mutex_enter(&hdl->sa_lock);
1915219089Spjd	error = sa_modify_attrs(hdl, attr, SA_REMOVE, NULL,
1916219089Spjd	    NULL, 0, tx);
1917219089Spjd	mutex_exit(&hdl->sa_lock);
1918219089Spjd	return (error);
1919219089Spjd}
1920219089Spjd
1921219089Spjdvoid
1922219089Spjdsa_object_info(sa_handle_t *hdl, dmu_object_info_t *doi)
1923219089Spjd{
1924219089Spjd	dmu_object_info_from_db((dmu_buf_t *)hdl->sa_bonus, doi);
1925219089Spjd}
1926219089Spjd
1927219089Spjdvoid
1928219089Spjdsa_object_size(sa_handle_t *hdl, uint32_t *blksize, u_longlong_t *nblocks)
1929219089Spjd{
1930219089Spjd	dmu_object_size_from_db((dmu_buf_t *)hdl->sa_bonus,
1931219089Spjd	    blksize, nblocks);
1932219089Spjd}
1933219089Spjd
1934219089Spjdvoid
1935219089Spjdsa_set_userp(sa_handle_t *hdl, void *ptr)
1936219089Spjd{
1937219089Spjd	hdl->sa_userp = ptr;
1938219089Spjd}
1939219089Spjd
1940219089Spjddmu_buf_t *
1941219089Spjdsa_get_db(sa_handle_t *hdl)
1942219089Spjd{
1943219089Spjd	return ((dmu_buf_t *)hdl->sa_bonus);
1944219089Spjd}
1945219089Spjd
1946219089Spjdvoid *
1947219089Spjdsa_get_userdata(sa_handle_t *hdl)
1948219089Spjd{
1949219089Spjd	return (hdl->sa_userp);
1950219089Spjd}
1951219089Spjd
1952219089Spjdvoid
1953219089Spjdsa_register_update_callback_locked(objset_t *os, sa_update_cb_t *func)
1954219089Spjd{
1955219089Spjd	ASSERT(MUTEX_HELD(&os->os_sa->sa_lock));
1956219089Spjd	os->os_sa->sa_update_cb = func;
1957219089Spjd}
1958219089Spjd
1959219089Spjdvoid
1960219089Spjdsa_register_update_callback(objset_t *os, sa_update_cb_t *func)
1961219089Spjd{
1962219089Spjd
1963219089Spjd	mutex_enter(&os->os_sa->sa_lock);
1964219089Spjd	sa_register_update_callback_locked(os, func);
1965219089Spjd	mutex_exit(&os->os_sa->sa_lock);
1966219089Spjd}
1967219089Spjd
1968219089Spjduint64_t
1969219089Spjdsa_handle_object(sa_handle_t *hdl)
1970219089Spjd{
1971219089Spjd	return (hdl->sa_bonus->db_object);
1972219089Spjd}
1973219089Spjd
1974219089Spjdboolean_t
1975219089Spjdsa_enabled(objset_t *os)
1976219089Spjd{
1977219089Spjd	return (os->os_sa == NULL);
1978219089Spjd}
1979219089Spjd
1980219089Spjdint
1981219089Spjdsa_set_sa_object(objset_t *os, uint64_t sa_object)
1982219089Spjd{
1983219089Spjd	sa_os_t *sa = os->os_sa;
1984219089Spjd
1985219089Spjd	if (sa->sa_master_obj)
1986219089Spjd		return (1);
1987219089Spjd
1988219089Spjd	sa->sa_master_obj = sa_object;
1989219089Spjd
1990219089Spjd	return (0);
1991219089Spjd}
1992219089Spjd
1993219089Spjdint
1994219089Spjdsa_hdrsize(void *arg)
1995219089Spjd{
1996219089Spjd	sa_hdr_phys_t *hdr = arg;
1997219089Spjd
1998219089Spjd	return (SA_HDR_SIZE(hdr));
1999219089Spjd}
2000219089Spjd
2001219089Spjdvoid
2002219089Spjdsa_handle_lock(sa_handle_t *hdl)
2003219089Spjd{
2004219089Spjd	ASSERT(hdl);
2005219089Spjd	mutex_enter(&hdl->sa_lock);
2006219089Spjd}
2007219089Spjd
2008219089Spjdvoid
2009219089Spjdsa_handle_unlock(sa_handle_t *hdl)
2010219089Spjd{
2011219089Spjd	ASSERT(hdl);
2012219089Spjd	mutex_exit(&hdl->sa_lock);
2013219089Spjd}
2014