1219089Spjd/*
2219089Spjd * CDDL HEADER START
3219089Spjd *
4219089Spjd * The contents of this file are subject to the terms of the
5219089Spjd * Common Development and Distribution License (the "License").
6219089Spjd * You may not use this file except in compliance with the License.
7219089Spjd *
8219089Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9219089Spjd * or http://www.opensolaris.org/os/licensing.
10219089Spjd * See the License for the specific language governing permissions
11219089Spjd * and limitations under the License.
12219089Spjd *
13219089Spjd * When distributing Covered Code, include this CDDL HEADER in each
14219089Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15219089Spjd * If applicable, add the following below this CDDL HEADER, with the
16219089Spjd * fields enclosed by brackets "[]" replaced with your own identifying
17219089Spjd * information: Portions Copyright [yyyy] [name of copyright owner]
18219089Spjd *
19219089Spjd * CDDL HEADER END
20219089Spjd */
21219089Spjd/*
22219089Spjd * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
23219089Spjd */
24219089Spjd
25286575Smav#include <sys/zfs_context.h>
26219089Spjd#include <sys/vnode.h>
27219089Spjd#include <sys/sa.h>
28219089Spjd#include <sys/zfs_acl.h>
29219089Spjd#include <sys/zfs_sa.h>
30219089Spjd
31219089Spjd/*
32219089Spjd * ZPL attribute registration table.
33219089Spjd * Order of attributes doesn't matter
34219089Spjd * a unique value will be assigned for each
35219089Spjd * attribute that is file system specific
36219089Spjd *
37219089Spjd * This is just the set of ZPL attributes that this
38219089Spjd * version of ZFS deals with natively.  The file system
39219089Spjd * could have other attributes stored in files, but they will be
40219089Spjd * ignored.  The SA framework will preserve them, just that
41219089Spjd * this version of ZFS won't change or delete them.
42219089Spjd */
43219089Spjd
44219089Spjdsa_attr_reg_t zfs_attr_table[ZPL_END+1] = {
45219089Spjd	{"ZPL_ATIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 0},
46219089Spjd	{"ZPL_MTIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 1},
47219089Spjd	{"ZPL_CTIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 2},
48219089Spjd	{"ZPL_CRTIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 3},
49219089Spjd	{"ZPL_GEN", sizeof (uint64_t), SA_UINT64_ARRAY, 4},
50219089Spjd	{"ZPL_MODE", sizeof (uint64_t), SA_UINT64_ARRAY, 5},
51219089Spjd	{"ZPL_SIZE", sizeof (uint64_t), SA_UINT64_ARRAY, 6},
52219089Spjd	{"ZPL_PARENT", sizeof (uint64_t), SA_UINT64_ARRAY, 7},
53219089Spjd	{"ZPL_LINKS", sizeof (uint64_t), SA_UINT64_ARRAY, 8},
54219089Spjd	{"ZPL_XATTR", sizeof (uint64_t), SA_UINT64_ARRAY, 9},
55219089Spjd	{"ZPL_RDEV", sizeof (uint64_t), SA_UINT64_ARRAY, 10},
56219089Spjd	{"ZPL_FLAGS", sizeof (uint64_t), SA_UINT64_ARRAY, 11},
57219089Spjd	{"ZPL_UID", sizeof (uint64_t), SA_UINT64_ARRAY, 12},
58219089Spjd	{"ZPL_GID", sizeof (uint64_t), SA_UINT64_ARRAY, 13},
59219089Spjd	{"ZPL_PAD", sizeof (uint64_t) * 4, SA_UINT64_ARRAY, 14},
60219089Spjd	{"ZPL_ZNODE_ACL", 88, SA_UINT8_ARRAY, 15},
61219089Spjd	{"ZPL_DACL_COUNT", sizeof (uint64_t), SA_UINT64_ARRAY, 0},
62219089Spjd	{"ZPL_SYMLINK", 0, SA_UINT8_ARRAY, 0},
63219089Spjd	{"ZPL_SCANSTAMP", 32, SA_UINT8_ARRAY, 0},
64219089Spjd	{"ZPL_DACL_ACES", 0, SA_ACL, 0},
65219089Spjd	{NULL, 0, 0, 0}
66219089Spjd};
67219089Spjd
68219089Spjd#ifdef _KERNEL
69219089Spjd
70219089Spjdint
71219089Spjdzfs_sa_readlink(znode_t *zp, uio_t *uio)
72219089Spjd{
73219089Spjd	dmu_buf_t *db = sa_get_db(zp->z_sa_hdl);
74219089Spjd	size_t bufsz;
75219089Spjd	int error;
76219089Spjd
77219089Spjd	bufsz = zp->z_size;
78219089Spjd	if (bufsz + ZFS_OLD_ZNODE_PHYS_SIZE <= db->db_size) {
79219089Spjd		error = uiomove((caddr_t)db->db_data +
80219089Spjd		    ZFS_OLD_ZNODE_PHYS_SIZE,
81219089Spjd		    MIN((size_t)bufsz, uio->uio_resid), UIO_READ, uio);
82219089Spjd	} else {
83219089Spjd		dmu_buf_t *dbp;
84219089Spjd		if ((error = dmu_buf_hold(zp->z_zfsvfs->z_os, zp->z_id,
85219089Spjd		    0, FTAG, &dbp, DMU_READ_NO_PREFETCH)) == 0) {
86219089Spjd			error = uiomove(dbp->db_data,
87219089Spjd			    MIN((size_t)bufsz, uio->uio_resid), UIO_READ, uio);
88219089Spjd			dmu_buf_rele(dbp, FTAG);
89219089Spjd		}
90219089Spjd	}
91219089Spjd	return (error);
92219089Spjd}
93219089Spjd
94219089Spjdvoid
95219089Spjdzfs_sa_symlink(znode_t *zp, char *link, int len, dmu_tx_t *tx)
96219089Spjd{
97219089Spjd	dmu_buf_t *db = sa_get_db(zp->z_sa_hdl);
98219089Spjd
99219089Spjd	if (ZFS_OLD_ZNODE_PHYS_SIZE + len <= dmu_bonus_max()) {
100219089Spjd		VERIFY(dmu_set_bonus(db,
101219089Spjd		    len + ZFS_OLD_ZNODE_PHYS_SIZE, tx) == 0);
102219089Spjd		if (len) {
103219089Spjd			bcopy(link, (caddr_t)db->db_data +
104219089Spjd			    ZFS_OLD_ZNODE_PHYS_SIZE, len);
105219089Spjd		}
106219089Spjd	} else {
107219089Spjd		dmu_buf_t *dbp;
108219089Spjd
109219089Spjd		zfs_grow_blocksize(zp, len, tx);
110219089Spjd		VERIFY(0 == dmu_buf_hold(zp->z_zfsvfs->z_os,
111219089Spjd		    zp->z_id, 0, FTAG, &dbp, DMU_READ_NO_PREFETCH));
112219089Spjd
113219089Spjd		dmu_buf_will_dirty(dbp, tx);
114219089Spjd
115219089Spjd		ASSERT3U(len, <=, dbp->db_size);
116219089Spjd		bcopy(link, dbp->db_data, len);
117219089Spjd		dmu_buf_rele(dbp, FTAG);
118219089Spjd	}
119219089Spjd}
120219089Spjd
121219089Spjdvoid
122219089Spjdzfs_sa_get_scanstamp(znode_t *zp, xvattr_t *xvap)
123219089Spjd{
124219089Spjd	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
125219089Spjd	xoptattr_t *xoap;
126219089Spjd
127303970Savg	ASSERT_VOP_LOCKED(ZTOV(zp), __func__);
128219089Spjd	VERIFY((xoap = xva_getxoptattr(xvap)) != NULL);
129219089Spjd	if (zp->z_is_sa) {
130219089Spjd		if (sa_lookup(zp->z_sa_hdl, SA_ZPL_SCANSTAMP(zfsvfs),
131219089Spjd		    &xoap->xoa_av_scanstamp,
132219089Spjd		    sizeof (xoap->xoa_av_scanstamp)) != 0)
133219089Spjd			return;
134219089Spjd	} else {
135219089Spjd		dmu_object_info_t doi;
136219089Spjd		dmu_buf_t *db = sa_get_db(zp->z_sa_hdl);
137219089Spjd		int len;
138219089Spjd
139219089Spjd		if (!(zp->z_pflags & ZFS_BONUS_SCANSTAMP))
140219089Spjd			return;
141219089Spjd
142219089Spjd		sa_object_info(zp->z_sa_hdl, &doi);
143219089Spjd		len = sizeof (xoap->xoa_av_scanstamp) +
144219089Spjd		    ZFS_OLD_ZNODE_PHYS_SIZE;
145219089Spjd
146219089Spjd		if (len <= doi.doi_bonus_size) {
147219089Spjd			(void) memcpy(xoap->xoa_av_scanstamp,
148219089Spjd			    (caddr_t)db->db_data + ZFS_OLD_ZNODE_PHYS_SIZE,
149219089Spjd			    sizeof (xoap->xoa_av_scanstamp));
150219089Spjd		}
151219089Spjd	}
152219089Spjd	XVA_SET_RTN(xvap, XAT_AV_SCANSTAMP);
153219089Spjd}
154219089Spjd
155219089Spjdvoid
156219089Spjdzfs_sa_set_scanstamp(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx)
157219089Spjd{
158219089Spjd	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
159219089Spjd	xoptattr_t *xoap;
160219089Spjd
161303970Savg	ASSERT_VOP_ELOCKED(ZTOV(zp), __func__);
162219089Spjd	VERIFY((xoap = xva_getxoptattr(xvap)) != NULL);
163219089Spjd	if (zp->z_is_sa)
164219089Spjd		VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_SCANSTAMP(zfsvfs),
165219089Spjd		    &xoap->xoa_av_scanstamp,
166219089Spjd		    sizeof (xoap->xoa_av_scanstamp), tx));
167219089Spjd	else {
168219089Spjd		dmu_object_info_t doi;
169219089Spjd		dmu_buf_t *db = sa_get_db(zp->z_sa_hdl);
170219089Spjd		int len;
171219089Spjd
172219089Spjd		sa_object_info(zp->z_sa_hdl, &doi);
173219089Spjd		len = sizeof (xoap->xoa_av_scanstamp) +
174219089Spjd		    ZFS_OLD_ZNODE_PHYS_SIZE;
175219089Spjd		if (len > doi.doi_bonus_size)
176219089Spjd			VERIFY(dmu_set_bonus(db, len, tx) == 0);
177219089Spjd		(void) memcpy((caddr_t)db->db_data + ZFS_OLD_ZNODE_PHYS_SIZE,
178219089Spjd		    xoap->xoa_av_scanstamp, sizeof (xoap->xoa_av_scanstamp));
179219089Spjd
180219089Spjd		zp->z_pflags |= ZFS_BONUS_SCANSTAMP;
181219089Spjd		VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs),
182219089Spjd		    &zp->z_pflags, sizeof (uint64_t), tx));
183219089Spjd	}
184219089Spjd}
185219089Spjd
186219089Spjd/*
187219089Spjd * I'm not convinced we should do any of this upgrade.
188219089Spjd * since the SA code can read both old/new znode formats
189251631Sdelphij * with probably little to no performance difference.
190219089Spjd *
191219089Spjd * All new files will be created with the new format.
192219089Spjd */
193219089Spjd
194219089Spjdvoid
195219089Spjdzfs_sa_upgrade(sa_handle_t *hdl, dmu_tx_t *tx)
196219089Spjd{
197219089Spjd	dmu_buf_t *db = sa_get_db(hdl);
198219089Spjd	znode_t *zp = sa_get_userdata(hdl);
199219089Spjd	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
200219089Spjd	sa_bulk_attr_t bulk[20];
201219089Spjd	int count = 0;
202219089Spjd	sa_bulk_attr_t sa_attrs[20] = { 0 };
203219089Spjd	zfs_acl_locator_cb_t locate = { 0 };
204219089Spjd	uint64_t uid, gid, mode, rdev, xattr, parent;
205219089Spjd	uint64_t crtime[2], mtime[2], ctime[2];
206219089Spjd	zfs_acl_phys_t znode_acl;
207219089Spjd	char scanstamp[AV_SCANSTAMP_SZ];
208219089Spjd
209219089Spjd	/*
210219089Spjd	 * No upgrade if ACL isn't cached
211219089Spjd	 * since we won't know which locks are held
212219089Spjd	 * and ready the ACL would require special "locked"
213219089Spjd	 * interfaces that would be messy
214219089Spjd	 */
215219089Spjd	if (zp->z_acl_cached == NULL || ZTOV(zp)->v_type == VLNK)
216219089Spjd		return;
217219089Spjd
218219089Spjd	/*
219303970Savg	 * If the vnode lock is held and we aren't the owner
220303970Savg	 * then just return since we don't want to deadlock
221219089Spjd	 * trying to update the status of z_is_sa.  This
222219089Spjd	 * file can then be upgraded at a later time.
223219089Spjd	 *
224219089Spjd	 * Otherwise, we know we are doing the
225219089Spjd	 * sa_update() that caused us to enter this function.
226219089Spjd	 */
227303970Savg	if (vn_lock(ZTOV(zp), LK_EXCLUSIVE | LK_NOWAIT) != 0)
228219089Spjd			return;
229219089Spjd
230219089Spjd	/* First do a bulk query of the attributes that aren't cached */
231219089Spjd	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
232219089Spjd	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
233219089Spjd	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zfsvfs), NULL, &crtime, 16);
234219089Spjd	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, &mode, 8);
235219089Spjd	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL, &parent, 8);
236219089Spjd	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_XATTR(zfsvfs), NULL, &xattr, 8);
237219089Spjd	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_RDEV(zfsvfs), NULL, &rdev, 8);
238219089Spjd	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, &uid, 8);
239219089Spjd	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL, &gid, 8);
240219089Spjd	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ZNODE_ACL(zfsvfs), NULL,
241219089Spjd	    &znode_acl, 88);
242219089Spjd
243219089Spjd	if (sa_bulk_lookup_locked(hdl, bulk, count) != 0)
244219089Spjd		goto done;
245219089Spjd
246219089Spjd
247219089Spjd	/*
248219089Spjd	 * While the order here doesn't matter its best to try and organize
249219089Spjd	 * it is such a way to pick up an already existing layout number
250219089Spjd	 */
251219089Spjd	count = 0;
252219089Spjd	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_MODE(zfsvfs), NULL, &mode, 8);
253219089Spjd	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_SIZE(zfsvfs), NULL,
254219089Spjd	    &zp->z_size, 8);
255219089Spjd	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_GEN(zfsvfs),
256219089Spjd	    NULL, &zp->z_gen, 8);
257219089Spjd	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_UID(zfsvfs), NULL, &uid, 8);
258219089Spjd	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_GID(zfsvfs), NULL, &gid, 8);
259219089Spjd	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_PARENT(zfsvfs),
260219089Spjd	    NULL, &parent, 8);
261219089Spjd	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_FLAGS(zfsvfs), NULL,
262219089Spjd	    &zp->z_pflags, 8);
263219089Spjd	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_ATIME(zfsvfs), NULL,
264219089Spjd	    zp->z_atime, 16);
265219089Spjd	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_MTIME(zfsvfs), NULL,
266219089Spjd	    &mtime, 16);
267219089Spjd	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_CTIME(zfsvfs), NULL,
268219089Spjd	    &ctime, 16);
269219089Spjd	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_CRTIME(zfsvfs), NULL,
270219089Spjd	    &crtime, 16);
271219089Spjd	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_LINKS(zfsvfs), NULL,
272219089Spjd	    &zp->z_links, 8);
273219089Spjd	if (zp->z_vnode->v_type == VBLK || zp->z_vnode->v_type == VCHR)
274219089Spjd		SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_RDEV(zfsvfs), NULL,
275219089Spjd		    &rdev, 8);
276219089Spjd	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_DACL_COUNT(zfsvfs), NULL,
277219089Spjd	    &zp->z_acl_cached->z_acl_count, 8);
278219089Spjd
279219089Spjd	if (zp->z_acl_cached->z_version < ZFS_ACL_VERSION_FUID)
280219089Spjd		zfs_acl_xform(zp, zp->z_acl_cached, CRED());
281219089Spjd
282219089Spjd	locate.cb_aclp = zp->z_acl_cached;
283219089Spjd	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_DACL_ACES(zfsvfs),
284219089Spjd	    zfs_acl_data_locator, &locate, zp->z_acl_cached->z_acl_bytes);
285219089Spjd
286219089Spjd	if (xattr)
287219089Spjd		SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_XATTR(zfsvfs),
288219089Spjd		    NULL, &xattr, 8);
289219089Spjd
290219089Spjd	/* if scanstamp then add scanstamp */
291219089Spjd
292219089Spjd	if (zp->z_pflags & ZFS_BONUS_SCANSTAMP) {
293219089Spjd		bcopy((caddr_t)db->db_data + ZFS_OLD_ZNODE_PHYS_SIZE,
294219089Spjd		    scanstamp, AV_SCANSTAMP_SZ);
295219089Spjd		SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_SCANSTAMP(zfsvfs),
296219089Spjd		    NULL, scanstamp, AV_SCANSTAMP_SZ);
297219089Spjd		zp->z_pflags &= ~ZFS_BONUS_SCANSTAMP;
298219089Spjd	}
299219089Spjd
300219089Spjd	VERIFY(dmu_set_bonustype(db, DMU_OT_SA, tx) == 0);
301219089Spjd	VERIFY(sa_replace_all_by_template_locked(hdl, sa_attrs,
302219089Spjd	    count, tx) == 0);
303219089Spjd	if (znode_acl.z_acl_extern_obj)
304219089Spjd		VERIFY(0 == dmu_object_free(zfsvfs->z_os,
305219089Spjd		    znode_acl.z_acl_extern_obj, tx));
306219089Spjd
307219089Spjd	zp->z_is_sa = B_TRUE;
308219089Spjddone:
309303970Savg	VOP_UNLOCK(ZTOV(zp), 0);
310219089Spjd}
311219089Spjd
312219089Spjdvoid
313219089Spjdzfs_sa_upgrade_txholds(dmu_tx_t *tx, znode_t *zp)
314219089Spjd{
315219089Spjd	if (!zp->z_zfsvfs->z_use_sa || zp->z_is_sa)
316219089Spjd		return;
317219089Spjd
318219089Spjd
319219089Spjd	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
320219089Spjd
321219089Spjd	if (zfs_external_acl(zp)) {
322219089Spjd		dmu_tx_hold_free(tx, zfs_external_acl(zp), 0,
323219089Spjd		    DMU_OBJECT_END);
324219089Spjd	}
325219089Spjd}
326219089Spjd
327219089Spjd#endif
328