sa_impl.h revision 260764
1178151Srpaulo/*
2189769Srpaulo * CDDL HEADER START
3189769Srpaulo *
4178151Srpaulo * The contents of this file are subject to the terms of the
5178151Srpaulo * Common Development and Distribution License (the "License").
6178151Srpaulo * You may not use this file except in compliance with the License.
7178151Srpaulo *
8178151Srpaulo * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9178151Srpaulo * or http://www.opensolaris.org/os/licensing.
10178151Srpaulo * See the License for the specific language governing permissions
11178151Srpaulo * and limitations under the License.
12178151Srpaulo *
13178151Srpaulo * When distributing Covered Code, include this CDDL HEADER in each
14178151Srpaulo * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15178151Srpaulo * If applicable, add the following below this CDDL HEADER, with the
16178151Srpaulo * fields enclosed by brackets "[]" replaced with your own identifying
17178151Srpaulo * information: Portions Copyright [yyyy] [name of copyright owner]
18178151Srpaulo *
19178151Srpaulo * CDDL HEADER END
20178151Srpaulo */
21178151Srpaulo/*
22178151Srpaulo * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
23178151Srpaulo * Copyright (c) 2013 by Delphix. All rights reserved.
24178151Srpaulo */
25178151Srpaulo
26178151Srpaulo#ifndef	_SYS_SA_IMPL_H
27178151Srpaulo#define	_SYS_SA_IMPL_H
28178151Srpaulo
29189769Srpaulo#include <sys/dmu.h>
30189769Srpaulo#include <sys/refcount.h>
31178151Srpaulo#include <sys/list.h>
32178151Srpaulo
33178151Srpaulo/*
34178151Srpaulo * Array of known attributes and their
35178151Srpaulo * various characteristics.
36178151Srpaulo */
37178151Srpaulotypedef struct sa_attr_table {
38178151Srpaulo	sa_attr_type_t	sa_attr;
39178151Srpaulo	uint8_t sa_registered;
40178151Srpaulo	uint16_t sa_length;
41178151Srpaulo	sa_bswap_type_t sa_byteswap;
42178151Srpaulo	char *sa_name;
43178151Srpaulo} sa_attr_table_t;
44178151Srpaulo
45178151Srpaulo/*
46178151Srpaulo * Zap attribute format for attribute registration
47178151Srpaulo *
48178151Srpaulo * 64      56      48      40      32      24      16      8       0
49178151Srpaulo * +-------+-------+-------+-------+-------+-------+-------+-------+
50178151Srpaulo * |        unused         |      len      | bswap |   attr num    |
51178151Srpaulo * +-------+-------+-------+-------+-------+-------+-------+-------+
52189769Srpaulo *
53189769Srpaulo * Zap attribute format for layout information.
54189769Srpaulo *
55189769Srpaulo * layout information is stored as an array of attribute numbers
56189769Srpaulo * The name of the attribute is the layout number (0, 1, 2, ...)
57189769Srpaulo *
58189769Srpaulo * 16       0
59189769Srpaulo * +---- ---+
60189769Srpaulo * | attr # |
61189769Srpaulo * +--------+
62178151Srpaulo * | attr # |
63178151Srpaulo * +--- ----+
64178151Srpaulo *  ......
65178151Srpaulo *
66178151Srpaulo */
67180312Srpaulo
68189769Srpaulo#define	ATTR_BSWAP(x)	BF32_GET(x, 16, 8)
69178151Srpaulo#define	ATTR_LENGTH(x)	BF32_GET(x, 24, 16)
70178151Srpaulo#define	ATTR_NUM(x)	BF32_GET(x, 0, 16)
71178151Srpaulo#define	ATTR_ENCODE(x, attr, length, bswap) \
72189769Srpaulo{ \
73189769Srpaulo	BF64_SET(x, 24, 16, length); \
74189769Srpaulo	BF64_SET(x, 16, 8, bswap); \
75178151Srpaulo	BF64_SET(x, 0, 16, attr); \
76189769Srpaulo}
77189769Srpaulo
78189769Srpaulo#define	TOC_OFF(x)		BF32_GET(x, 0, 23)
79189769Srpaulo#define	TOC_ATTR_PRESENT(x)	BF32_GET(x, 31, 1)
80189769Srpaulo#define	TOC_LEN_IDX(x)		BF32_GET(x, 24, 4)
81189769Srpaulo#define	TOC_ATTR_ENCODE(x, len_idx, offset) \
82189769Srpaulo{ \
83178151Srpaulo	BF32_SET(x, 31, 1, 1); \
84178151Srpaulo	BF32_SET(x, 24, 7, len_idx); \
85178151Srpaulo	BF32_SET(x, 0, 24, offset); \
86178151Srpaulo}
87189769Srpaulo
88178151Srpaulo#define	SA_LAYOUTS	"LAYOUTS"
89189769Srpaulo#define	SA_REGISTRY	"REGISTRY"
90189769Srpaulo
91189769Srpaulo/*
92178151Srpaulo * Each unique layout will have their own table
93189769Srpaulo * sa_lot (layout_table)
94189769Srpaulo */
95189769Srpaulotypedef struct sa_lot {
96189769Srpaulo	avl_node_t lot_num_node;
97178151Srpaulo	avl_node_t lot_hash_node;
98189769Srpaulo	uint64_t lot_num;
99178151Srpaulo	uint64_t lot_hash;
100189769Srpaulo	sa_attr_type_t *lot_attrs;	/* array of attr #'s */
101189769Srpaulo	uint32_t lot_var_sizes;	/* how many aren't fixed size */
102189769Srpaulo	uint32_t lot_attr_count;	/* total attr count */
103189769Srpaulo	list_t 	lot_idx_tab;	/* should be only a couple of entries */
104178151Srpaulo	int	lot_instance;	/* used with lot_hash to identify entry */
105178151Srpaulo} sa_lot_t;
106178151Srpaulo
107189769Srpaulo/* index table of offsets */
108189769Srpaulotypedef struct sa_idx_tab {
109189769Srpaulo	list_node_t	sa_next;
110189769Srpaulo	sa_lot_t	*sa_layout;
111189769Srpaulo	uint16_t	*sa_variable_lengths;
112189769Srpaulo	refcount_t	sa_refcount;
113189769Srpaulo	uint32_t	*sa_idx_tab;	/* array of offsets */
114189769Srpaulo} sa_idx_tab_t;
115189769Srpaulo
116178151Srpaulo/*
117189769Srpaulo * Since the offset/index information into the actual data
118178151Srpaulo * will usually be identical we can share that information with
119189769Srpaulo * all handles that have the exact same offsets.
120189769Srpaulo *
121189769Srpaulo * You would typically only have a large number of different table of
122189769Srpaulo * contents if you had a several variable sized attributes.
123178151Srpaulo *
124178151Srpaulo * Two AVL trees are used to track the attribute layout numbers.
125178151Srpaulo * one is keyed by number and will be consulted when a DMU_OT_SA
126178151Srpaulo * object is first read.  The second tree is keyed by the hash signature
127189769Srpaulo * of the attributes and will be consulted when an attribute is added
128189769Srpaulo * to determine if we already have an instance of that layout.  Both
129189769Srpaulo * of these tree's are interconnected.  The only difference is that
130189769Srpaulo * when an entry is found in the "hash" tree the list of attributes will
131178151Srpaulo * need to be compared against the list of attributes you have in hand.
132178151Srpaulo * The assumption is that typically attributes will just be updated and
133189769Srpaulo * adding a completely new attribute is a very rare operation.
134189769Srpaulo */
135178151Srpaulostruct sa_os {
136178151Srpaulo	kmutex_t 	sa_lock;
137189769Srpaulo	boolean_t	sa_need_attr_registration;
138178151Srpaulo	boolean_t	sa_force_spill;
139178151Srpaulo	uint64_t	sa_master_obj;
140178151Srpaulo	uint64_t	sa_reg_attr_obj;
141178151Srpaulo	uint64_t	sa_layout_attr_obj;
142178151Srpaulo	int		sa_num_attrs;
143178151Srpaulo	sa_attr_table_t *sa_attr_table;	 /* private attr table */
144178151Srpaulo	sa_update_cb_t	*sa_update_cb;
145189769Srpaulo	avl_tree_t	sa_layout_num_tree;  /* keyed by layout number */
146189769Srpaulo	avl_tree_t	sa_layout_hash_tree; /* keyed by layout hash value */
147189769Srpaulo	int		sa_user_table_sz;
148178151Srpaulo	sa_attr_type_t	*sa_user_table; /* user name->attr mapping table */
149178151Srpaulo};
150178151Srpaulo
151178151Srpaulo/*
152178151Srpaulo * header for all bonus and spill buffers.
153178151Srpaulo *
154178151Srpaulo * The header has a fixed portion with a variable number
155189769Srpaulo * of "lengths" depending on the number of variable sized
156178151Srpaulo * attributes which are determined by the "layout number"
157178151Srpaulo */
158178151Srpaulo
159178151Srpaulo#define	SA_MAGIC	0x2F505A  /* ZFS SA */
160189769Srpaulotypedef struct sa_hdr_phys {
161178151Srpaulo	uint32_t sa_magic;
162178151Srpaulo	/* BEGIN CSTYLED */
163189769Srpaulo	/*
164189769Srpaulo	 * Encoded with hdrsize and layout number as follows:
165178151Srpaulo	 * 16      10       0
166189769Srpaulo	 * +--------+-------+
167178151Srpaulo	 * | hdrsz  |layout |
168178151Srpaulo	 * +--------+-------+
169178151Srpaulo	 *
170178151Srpaulo	 * Bits 0-10 are the layout number
171178151Srpaulo	 * Bits 11-16 are the size of the header.
172189769Srpaulo	 * The hdrsize is the number * 8
173178151Srpaulo	 *
174178151Srpaulo	 * For example.
175178151Srpaulo	 * hdrsz of 1 ==> 8 byte header
176189769Srpaulo	 *          2 ==> 16 byte header
177178151Srpaulo	 *
178178151Srpaulo	 */
179178151Srpaulo	/* END CSTYLED */
180178151Srpaulo	uint16_t sa_layout_info;
181178151Srpaulo	uint16_t sa_lengths[1];	/* optional sizes for variable length attrs */
182178151Srpaulo	/* ... Data follows the lengths.  */
183178151Srpaulo} sa_hdr_phys_t;
184178151Srpaulo
185178151Srpaulo#define	SA_HDR_LAYOUT_NUM(hdr) BF32_GET(hdr->sa_layout_info, 0, 10)
186178151Srpaulo#define	SA_HDR_SIZE(hdr) BF32_GET_SB(hdr->sa_layout_info, 10, 6, 3, 0)
187178151Srpaulo#define	SA_HDR_LAYOUT_INFO_ENCODE(x, num, size) \
188178151Srpaulo{ \
189178151Srpaulo	BF32_SET_SB(x, 10, 6, 3, 0, size); \
190178151Srpaulo	BF32_SET(x, 0, 10, num); \
191178151Srpaulo}
192189769Srpaulo
193178151Srpaulotypedef enum sa_buf_type {
194189769Srpaulo	SA_BONUS = 1,
195178151Srpaulo	SA_SPILL = 2
196178151Srpaulo} sa_buf_type_t;
197178151Srpaulo
198180312Srpaulotypedef enum sa_data_op {
199178151Srpaulo	SA_LOOKUP,
200180312Srpaulo	SA_UPDATE,
201180326Srpaulo	SA_ADD,
202180312Srpaulo	SA_REPLACE,
203178151Srpaulo	SA_REMOVE
204189769Srpaulo} sa_data_op_t;
205180312Srpaulo
206185434Srpaulo/*
207185434Srpaulo * Opaque handle used for most sa functions
208185434Srpaulo *
209185434Srpaulo * This needs to be kept as small as possible.
210185434Srpaulo */
211180312Srpaulo
212189769Srpaulostruct sa_handle {
213189769Srpaulo	kmutex_t	sa_lock;
214189769Srpaulo	dmu_buf_t	*sa_bonus;
215189769Srpaulo	dmu_buf_t	*sa_spill;
216189769Srpaulo	objset_t	*sa_os;
217189769Srpaulo	void 		*sa_userp;
218189769Srpaulo	sa_idx_tab_t	*sa_bonus_tab;	 /* idx of bonus */
219178151Srpaulo	sa_idx_tab_t	*sa_spill_tab; /* only present if spill activated */
220189769Srpaulo};
221178151Srpaulo
222178151Srpaulo#define	SA_GET_DB(hdl, type)	\
223178151Srpaulo	(dmu_buf_impl_t *)((type == SA_BONUS) ? hdl->sa_bonus : hdl->sa_spill)
224178151Srpaulo
225178151Srpaulo#define	SA_GET_HDR(hdl, type) \
226178151Srpaulo	((sa_hdr_phys_t *)((dmu_buf_impl_t *)(SA_GET_DB(hdl, \
227178151Srpaulo	type))->db.db_data))
228178151Srpaulo
229178151Srpaulo#define	SA_IDX_TAB_GET(hdl, type) \
230189769Srpaulo	(type == SA_BONUS ? hdl->sa_bonus_tab : hdl->sa_spill_tab)
231178151Srpaulo
232178151Srpaulo#define	IS_SA_BONUSTYPE(a)	\
233178151Srpaulo	((a == DMU_OT_SA) ? B_TRUE : B_FALSE)
234178151Srpaulo
235178151Srpaulo#define	SA_BONUSTYPE_FROM_DB(db) \
236189769Srpaulo	(dmu_get_bonustype((dmu_buf_t *)db))
237178151Srpaulo
238178151Srpaulo#define	SA_BLKPTR_SPACE	(DN_MAX_BONUSLEN - sizeof (blkptr_t))
239178151Srpaulo
240178151Srpaulo#define	SA_LAYOUT_NUM(x, type) \
241178151Srpaulo	((!IS_SA_BONUSTYPE(type) ? 0 : (((IS_SA_BONUSTYPE(type)) && \
242178151Srpaulo	((SA_HDR_LAYOUT_NUM(x)) == 0)) ? 1 : SA_HDR_LAYOUT_NUM(x))))
243178151Srpaulo
244178151Srpaulo
245178151Srpaulo#define	SA_REGISTERED_LEN(sa, attr) sa->sa_attr_table[attr].sa_length
246189769Srpaulo
247178151Srpaulo#define	SA_ATTR_LEN(sa, idx, attr, hdr) ((SA_REGISTERED_LEN(sa, attr) == 0) ?\
248178151Srpaulo	hdr->sa_lengths[TOC_LEN_IDX(idx->sa_idx_tab[attr])] : \
249178151Srpaulo	SA_REGISTERED_LEN(sa, attr))
250178151Srpaulo
251178151Srpaulo#define	SA_SET_HDR(hdr, num, size) \
252189769Srpaulo	{ \
253178151Srpaulo		hdr->sa_magic = SA_MAGIC; \
254178151Srpaulo		SA_HDR_LAYOUT_INFO_ENCODE(hdr->sa_layout_info, num, size); \
255178151Srpaulo	}
256178151Srpaulo
257178151Srpaulo#define	SA_ATTR_INFO(sa, idx, hdr, attr, bulk, type, hdl) \
258180312Srpaulo	{ \
259189769Srpaulo		bulk.sa_size = SA_ATTR_LEN(sa, idx, attr, hdr); \
260180312Srpaulo		bulk.sa_buftype = type; \
261180312Srpaulo		bulk.sa_addr = \
262180312Srpaulo		    (void *)((uintptr_t)TOC_OFF(idx->sa_idx_tab[attr]) + \
263180312Srpaulo		    (uintptr_t)hdr); \
264189769Srpaulo}
265180312Srpaulo
266180312Srpaulo#define	SA_HDR_SIZE_MATCH_LAYOUT(hdr, tb) \
267180312Srpaulo	(SA_HDR_SIZE(hdr) == (sizeof (sa_hdr_phys_t) + \
268180312Srpaulo	(tb->lot_var_sizes > 1 ? P2ROUNDUP((tb->lot_var_sizes - 1) * \
269180312Srpaulo	sizeof (uint16_t), 8) : 0)))
270180312Srpaulo
271180312Srpauloint sa_add_impl(sa_handle_t *, sa_attr_type_t,
272180312Srpaulo    uint32_t, sa_data_locator_t, void *, dmu_tx_t *);
273180312Srpaulo
274180312Srpaulovoid sa_register_update_callback_locked(objset_t *, sa_update_cb_t *);
275180312Srpauloint sa_size_locked(sa_handle_t *, sa_attr_type_t, int *);
276180312Srpaulo
277180312Srpaulovoid sa_default_locator(void **, uint32_t *, uint32_t, boolean_t, void *);
278180312Srpauloint sa_attr_size(sa_os_t *, sa_idx_tab_t *, sa_attr_type_t,
279180312Srpaulo    uint16_t *, sa_hdr_phys_t *);
280180312Srpaulo
281180312Srpaulo#ifdef	__cplusplus
282180312Srpauloextern "C" {
283180312Srpaulo#endif
284189769Srpaulo
285180312Srpaulo#ifdef	__cplusplus
286180312Srpaulo}
287180312Srpaulo#endif
288180312Srpaulo
289180312Srpaulo#endif	/* _SYS_SA_IMPL_H */
290180312Srpaulo