zfs_znode.c revision 168404
1168404Spjd/* 2168404Spjd * CDDL HEADER START 3168404Spjd * 4168404Spjd * The contents of this file are subject to the terms of the 5168404Spjd * Common Development and Distribution License (the "License"). 6168404Spjd * You may not use this file except in compliance with the License. 7168404Spjd * 8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9168404Spjd * or http://www.opensolaris.org/os/licensing. 10168404Spjd * See the License for the specific language governing permissions 11168404Spjd * and limitations under the License. 12168404Spjd * 13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each 14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15168404Spjd * If applicable, add the following below this CDDL HEADER, with the 16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18168404Spjd * 19168404Spjd * CDDL HEADER END 20168404Spjd */ 21168404Spjd/* 22168404Spjd * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23168404Spjd * Use is subject to license terms. 24168404Spjd */ 25168404Spjd 26168404Spjd#pragma ident "%Z%%M% %I% %E% SMI" 27168404Spjd 28168404Spjd#ifdef _KERNEL 29168404Spjd#include <sys/types.h> 30168404Spjd#include <sys/param.h> 31168404Spjd#include <sys/time.h> 32168404Spjd#include <sys/systm.h> 33168404Spjd#include <sys/sysmacros.h> 34168404Spjd#include <sys/resource.h> 35168404Spjd#include <sys/mntent.h> 36168404Spjd#include <sys/vfs.h> 37168404Spjd#include <sys/vnode.h> 38168404Spjd#include <sys/file.h> 39168404Spjd#include <sys/kmem.h> 40168404Spjd#include <sys/cmn_err.h> 41168404Spjd#include <sys/errno.h> 42168404Spjd#include <sys/unistd.h> 43168404Spjd#include <sys/atomic.h> 44168404Spjd#include <sys/zfs_dir.h> 45168404Spjd#include <sys/zfs_acl.h> 46168404Spjd#include <sys/zfs_ioctl.h> 47168404Spjd#include <sys/zfs_rlock.h> 48168404Spjd#include <sys/fs/zfs.h> 49168404Spjd#endif /* _KERNEL */ 50168404Spjd 51168404Spjd#include <sys/dmu.h> 52168404Spjd#include <sys/refcount.h> 53168404Spjd#include <sys/stat.h> 54168404Spjd#include <sys/zap.h> 55168404Spjd#include <sys/zfs_znode.h> 56168404Spjd#include <sys/refcount.h> 57168404Spjd 58168404Spjd/* 59168404Spjd * Functions needed for userland (ie: libzpool) are not put under 60168404Spjd * #ifdef_KERNEL; the rest of the functions have dependencies 61168404Spjd * (such as VFS logic) that will not compile easily in userland. 62168404Spjd */ 63168404Spjd#ifdef _KERNEL 64168404Spjdstruct kmem_cache *znode_cache = NULL; 65168404Spjd 66168404Spjd/*ARGSUSED*/ 67168404Spjdstatic void 68168404Spjdznode_pageout_func(dmu_buf_t *dbuf, void *user_ptr) 69168404Spjd{ 70168404Spjd znode_t *zp = user_ptr; 71168404Spjd vnode_t *vp = ZTOV(zp); 72168404Spjd 73168404Spjd mutex_enter(&zp->z_lock); 74168404Spjd if (vp == NULL) { 75168404Spjd mutex_exit(&zp->z_lock); 76168404Spjd zfs_znode_free(zp); 77168404Spjd } else if (vp->v_count == 0) { 78168404Spjd ZTOV(zp) = NULL; 79168404Spjd mutex_exit(&zp->z_lock); 80168404Spjd vhold(vp); 81168404Spjd vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, curthread); 82168404Spjd vrecycle(vp, curthread); 83168404Spjd VOP_UNLOCK(vp, 0, curthread); 84168404Spjd vdrop(vp); 85168404Spjd zfs_znode_free(zp); 86168404Spjd } else { 87168404Spjd /* signal force unmount that this znode can be freed */ 88168404Spjd zp->z_dbuf = NULL; 89168404Spjd mutex_exit(&zp->z_lock); 90168404Spjd } 91168404Spjd} 92168404Spjd 93168404Spjdextern struct vop_vector zfs_vnodeops; 94168404Spjdextern struct vop_vector zfs_fifoops; 95168404Spjd 96168404Spjd/* 97168404Spjd * XXX: We cannot use this function as a cache constructor, because 98168404Spjd * there is one global cache for all file systems and we need 99168404Spjd * to pass vfsp here, which is not possible, because argument 100168404Spjd * 'cdrarg' is defined at kmem_cache_create() time. 101168404Spjd */ 102168404Spjdstatic int 103168404Spjdzfs_znode_cache_constructor(void *buf, void *cdrarg, int kmflags) 104168404Spjd{ 105168404Spjd znode_t *zp = buf; 106168404Spjd vfs_t *vfsp = cdrarg; 107168404Spjd int error; 108168404Spjd 109168404Spjd if (cdrarg != NULL) { 110168404Spjd error = getnewvnode("zfs", vfsp, &zfs_vnodeops, &zp->z_vnode); 111168404Spjd ASSERT(error == 0); 112168404Spjd zp->z_vnode->v_data = (caddr_t)zp; 113168404Spjd vhold(zp->z_vnode); 114168404Spjd } else { 115168404Spjd zp->z_vnode = NULL; 116168404Spjd } 117168404Spjd mutex_init(&zp->z_lock, NULL, MUTEX_DEFAULT, NULL); 118168404Spjd rw_init(&zp->z_map_lock, NULL, RW_DEFAULT, NULL); 119168404Spjd rw_init(&zp->z_parent_lock, NULL, RW_DEFAULT, NULL); 120168404Spjd rw_init(&zp->z_name_lock, NULL, RW_DEFAULT, NULL); 121168404Spjd mutex_init(&zp->z_acl_lock, NULL, MUTEX_DEFAULT, NULL); 122168404Spjd 123168404Spjd mutex_init(&zp->z_range_lock, NULL, MUTEX_DEFAULT, NULL); 124168404Spjd avl_create(&zp->z_range_avl, zfs_range_compare, 125168404Spjd sizeof (rl_t), offsetof(rl_t, r_node)); 126168404Spjd 127168404Spjd zp->z_dbuf_held = 0; 128168404Spjd zp->z_dirlocks = 0; 129168404Spjd zp->z_lockf = NULL; 130168404Spjd return (0); 131168404Spjd} 132168404Spjd 133168404Spjd/*ARGSUSED*/ 134168404Spjdstatic void 135168404Spjdzfs_znode_cache_destructor(void *buf, void *cdarg) 136168404Spjd{ 137168404Spjd znode_t *zp = buf; 138168404Spjd 139168404Spjd ASSERT(zp->z_dirlocks == 0); 140168404Spjd mutex_destroy(&zp->z_lock); 141168404Spjd rw_destroy(&zp->z_map_lock); 142168404Spjd rw_destroy(&zp->z_parent_lock); 143168404Spjd rw_destroy(&zp->z_name_lock); 144168404Spjd mutex_destroy(&zp->z_acl_lock); 145168404Spjd mutex_destroy(&zp->z_range_lock); 146168404Spjd avl_destroy(&zp->z_range_avl); 147168404Spjd 148168404Spjd ASSERT(zp->z_dbuf_held == 0); 149168404Spjd} 150168404Spjd 151168404Spjdvoid 152168404Spjdzfs_znode_init(void) 153168404Spjd{ 154168404Spjd /* 155168404Spjd * Initialize zcache 156168404Spjd */ 157168404Spjd ASSERT(znode_cache == NULL); 158168404Spjd znode_cache = kmem_cache_create("zfs_znode_cache", 159168404Spjd sizeof (znode_t), 0, /* zfs_znode_cache_constructor */ NULL, 160168404Spjd zfs_znode_cache_destructor, NULL, NULL, NULL, 0); 161168404Spjd} 162168404Spjd 163168404Spjdvoid 164168404Spjdzfs_znode_fini(void) 165168404Spjd{ 166168404Spjd /* 167168404Spjd * Cleanup zcache 168168404Spjd */ 169168404Spjd if (znode_cache) 170168404Spjd kmem_cache_destroy(znode_cache); 171168404Spjd znode_cache = NULL; 172168404Spjd} 173168404Spjd 174168404Spjd/* 175168404Spjd * zfs_init_fs - Initialize the zfsvfs struct and the file system 176168404Spjd * incore "master" object. Verify version compatibility. 177168404Spjd */ 178168404Spjdint 179168404Spjdzfs_init_fs(zfsvfs_t *zfsvfs, znode_t **zpp, cred_t *cr) 180168404Spjd{ 181168404Spjd objset_t *os = zfsvfs->z_os; 182168404Spjd uint64_t version = ZPL_VERSION; 183168404Spjd int i, error; 184168404Spjd dmu_object_info_t doi; 185168404Spjd uint64_t fsid_guid; 186168404Spjd 187168404Spjd *zpp = NULL; 188168404Spjd 189168404Spjd /* 190168404Spjd * XXX - hack to auto-create the pool root filesystem at 191168404Spjd * the first attempted mount. 192168404Spjd */ 193168404Spjd if (dmu_object_info(os, MASTER_NODE_OBJ, &doi) == ENOENT) { 194168404Spjd dmu_tx_t *tx = dmu_tx_create(os); 195168404Spjd 196168404Spjd dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, TRUE, NULL); /* master */ 197168404Spjd dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, TRUE, NULL); /* del queue */ 198168404Spjd dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); /* root node */ 199168404Spjd error = dmu_tx_assign(tx, TXG_WAIT); 200168404Spjd ASSERT3U(error, ==, 0); 201168404Spjd zfs_create_fs(os, cr, tx); 202168404Spjd dmu_tx_commit(tx); 203168404Spjd } 204168404Spjd 205168404Spjd error = zap_lookup(os, MASTER_NODE_OBJ, ZPL_VERSION_OBJ, 8, 1, 206168404Spjd &version); 207168404Spjd if (error) { 208168404Spjd return (error); 209168404Spjd } else if (version != ZPL_VERSION) { 210168404Spjd (void) printf("Mismatched versions: File system " 211168404Spjd "is version %lld on-disk format, which is " 212168404Spjd "incompatible with this software version %lld!", 213168404Spjd (u_longlong_t)version, ZPL_VERSION); 214168404Spjd return (ENOTSUP); 215168404Spjd } 216168404Spjd 217168404Spjd /* 218168404Spjd * The fsid is 64 bits, composed of an 8-bit fs type, which 219168404Spjd * separates our fsid from any other filesystem types, and a 220168404Spjd * 56-bit objset unique ID. The objset unique ID is unique to 221168404Spjd * all objsets open on this system, provided by unique_create(). 222168404Spjd * The 8-bit fs type must be put in the low bits of fsid[1] 223168404Spjd * because that's where other Solaris filesystems put it. 224168404Spjd */ 225168404Spjd fsid_guid = dmu_objset_fsid_guid(os); 226168404Spjd ASSERT((fsid_guid & ~((1ULL<<56)-1)) == 0); 227168404Spjd zfsvfs->z_vfs->vfs_fsid.val[0] = fsid_guid; 228168404Spjd zfsvfs->z_vfs->vfs_fsid.val[1] = ((fsid_guid>>32) << 8) | 229168404Spjd zfsvfs->z_vfs->mnt_vfc->vfc_typenum & 0xFF; 230168404Spjd 231168404Spjd error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1, 232168404Spjd &zfsvfs->z_root); 233168404Spjd if (error) 234168404Spjd return (error); 235168404Spjd ASSERT(zfsvfs->z_root != 0); 236168404Spjd 237168404Spjd /* 238168404Spjd * Create the per mount vop tables. 239168404Spjd */ 240168404Spjd 241168404Spjd /* 242168404Spjd * Initialize zget mutex's 243168404Spjd */ 244168404Spjd for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) 245168404Spjd mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL); 246168404Spjd 247168404Spjd error = zfs_zget(zfsvfs, zfsvfs->z_root, zpp); 248168404Spjd if (error) 249168404Spjd return (error); 250168404Spjd ASSERT3U((*zpp)->z_id, ==, zfsvfs->z_root); 251168404Spjd 252168404Spjd error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1, 253168404Spjd &zfsvfs->z_unlinkedobj); 254168404Spjd if (error) 255168404Spjd return (error); 256168404Spjd 257168404Spjd return (0); 258168404Spjd} 259168404Spjd 260168404Spjd/* 261168404Spjd * define a couple of values we need available 262168404Spjd * for both 64 and 32 bit environments. 263168404Spjd */ 264168404Spjd#ifndef NBITSMINOR64 265168404Spjd#define NBITSMINOR64 32 266168404Spjd#endif 267168404Spjd#ifndef MAXMAJ64 268168404Spjd#define MAXMAJ64 0xffffffffUL 269168404Spjd#endif 270168404Spjd#ifndef MAXMIN64 271168404Spjd#define MAXMIN64 0xffffffffUL 272168404Spjd#endif 273168404Spjd 274168404Spjd/* 275168404Spjd * Create special expldev for ZFS private use. 276168404Spjd * Can't use standard expldev since it doesn't do 277168404Spjd * what we want. The standard expldev() takes a 278168404Spjd * dev32_t in LP64 and expands it to a long dev_t. 279168404Spjd * We need an interface that takes a dev32_t in ILP32 280168404Spjd * and expands it to a long dev_t. 281168404Spjd */ 282168404Spjdstatic uint64_t 283168404Spjdzfs_expldev(dev_t dev) 284168404Spjd{ 285168404Spjd return ((uint64_t)0); 286168404Spjd} 287168404Spjd/* 288168404Spjd * Special cmpldev for ZFS private use. 289168404Spjd * Can't use standard cmpldev since it takes 290168404Spjd * a long dev_t and compresses it to dev32_t in 291168404Spjd * LP64. We need to do a compaction of a long dev_t 292168404Spjd * to a dev32_t in ILP32. 293168404Spjd */ 294168404Spjddev_t 295168404Spjdzfs_cmpldev(uint64_t dev) 296168404Spjd{ 297168404Spjd return ((dev_t)0); 298168404Spjd} 299168404Spjd 300168404Spjd/* 301168404Spjd * Construct a new znode/vnode and intialize. 302168404Spjd * 303168404Spjd * This does not do a call to dmu_set_user() that is 304168404Spjd * up to the caller to do, in case you don't want to 305168404Spjd * return the znode 306168404Spjd */ 307168404Spjdstatic znode_t * 308168404Spjdzfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, uint64_t obj_num, int blksz) 309168404Spjd{ 310168404Spjd znode_t *zp; 311168404Spjd vnode_t *vp; 312168404Spjd int error; 313168404Spjd 314168404Spjd zp = kmem_cache_alloc(znode_cache, KM_SLEEP); 315168404Spjd zfs_znode_cache_constructor(zp, zfsvfs->z_vfs, 0); 316168404Spjd 317168404Spjd ASSERT(zp->z_dirlocks == NULL); 318168404Spjd 319168404Spjd zp->z_phys = db->db_data; 320168404Spjd zp->z_zfsvfs = zfsvfs; 321168404Spjd zp->z_unlinked = 0; 322168404Spjd zp->z_atime_dirty = 0; 323168404Spjd zp->z_dbuf_held = 0; 324168404Spjd zp->z_mapcnt = 0; 325168404Spjd zp->z_last_itx = 0; 326168404Spjd zp->z_dbuf = db; 327168404Spjd zp->z_id = obj_num; 328168404Spjd zp->z_blksz = blksz; 329168404Spjd zp->z_seq = 0x7A4653; 330168404Spjd zp->z_sync_cnt = 0; 331168404Spjd 332168404Spjd mutex_enter(&zfsvfs->z_znodes_lock); 333168404Spjd list_insert_tail(&zfsvfs->z_all_znodes, zp); 334168404Spjd mutex_exit(&zfsvfs->z_znodes_lock); 335168404Spjd 336168404Spjd vp = ZTOV(zp); 337168404Spjd if (vp == NULL) 338168404Spjd return (zp); 339168404Spjd 340168404Spjd error = insmntque(vp, zfsvfs->z_vfs); 341168404Spjd KASSERT(error == 0, ("insmntque() failed: error %d", error)); 342168404Spjd 343168404Spjd vp->v_type = IFTOVT((mode_t)zp->z_phys->zp_mode); 344168404Spjd switch (vp->v_type) { 345168404Spjd case VDIR: 346168404Spjd zp->z_zn_prefetch = B_TRUE; /* z_prefetch default is enabled */ 347168404Spjd break; 348168404Spjd case VFIFO: 349168404Spjd vp->v_op = &zfs_fifoops; 350168404Spjd break; 351168404Spjd } 352168404Spjd 353168404Spjd return (zp); 354168404Spjd} 355168404Spjd 356168404Spjdstatic void 357168404Spjdzfs_znode_dmu_init(znode_t *zp) 358168404Spjd{ 359168404Spjd znode_t *nzp; 360168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 361168404Spjd dmu_buf_t *db = zp->z_dbuf; 362168404Spjd 363168404Spjd mutex_enter(&zp->z_lock); 364168404Spjd 365168404Spjd nzp = dmu_buf_set_user(db, zp, &zp->z_phys, znode_pageout_func); 366168404Spjd 367168404Spjd /* 368168404Spjd * there should be no 369168404Spjd * concurrent zgets on this object. 370168404Spjd */ 371168404Spjd ASSERT3P(nzp, ==, NULL); 372168404Spjd 373168404Spjd /* 374168404Spjd * Slap on VROOT if we are the root znode 375168404Spjd */ 376168404Spjd if (zp->z_id == zfsvfs->z_root) { 377168404Spjd ZTOV(zp)->v_flag |= VROOT; 378168404Spjd } 379168404Spjd 380168404Spjd ASSERT(zp->z_dbuf_held == 0); 381168404Spjd zp->z_dbuf_held = 1; 382168404Spjd VFS_HOLD(zfsvfs->z_vfs); 383168404Spjd mutex_exit(&zp->z_lock); 384168404Spjd} 385168404Spjd 386168404Spjd/* 387168404Spjd * Create a new DMU object to hold a zfs znode. 388168404Spjd * 389168404Spjd * IN: dzp - parent directory for new znode 390168404Spjd * vap - file attributes for new znode 391168404Spjd * tx - dmu transaction id for zap operations 392168404Spjd * cr - credentials of caller 393168404Spjd * flag - flags: 394168404Spjd * IS_ROOT_NODE - new object will be root 395168404Spjd * IS_XATTR - new object is an attribute 396168404Spjd * IS_REPLAY - intent log replay 397168404Spjd * 398168404Spjd * OUT: oid - ID of created object 399168404Spjd * 400168404Spjd */ 401168404Spjdvoid 402168404Spjdzfs_mknode(znode_t *dzp, vattr_t *vap, uint64_t *oid, dmu_tx_t *tx, cred_t *cr, 403168404Spjd uint_t flag, znode_t **zpp, int bonuslen) 404168404Spjd{ 405168404Spjd dmu_buf_t *dbp; 406168404Spjd znode_phys_t *pzp; 407168404Spjd znode_t *zp; 408168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 409168404Spjd timestruc_t now; 410168404Spjd uint64_t gen; 411168404Spjd int err; 412168404Spjd 413168404Spjd ASSERT(vap && (vap->va_mask & (AT_TYPE|AT_MODE)) == (AT_TYPE|AT_MODE)); 414168404Spjd 415168404Spjd if (zfsvfs->z_assign >= TXG_INITIAL) { /* ZIL replay */ 416168404Spjd *oid = vap->va_nodeid; 417168404Spjd flag |= IS_REPLAY; 418168404Spjd now = vap->va_ctime; /* see zfs_replay_create() */ 419168404Spjd gen = vap->va_nblocks; /* ditto */ 420168404Spjd } else { 421168404Spjd *oid = 0; 422168404Spjd gethrestime(&now); 423168404Spjd gen = dmu_tx_get_txg(tx); 424168404Spjd } 425168404Spjd 426168404Spjd /* 427168404Spjd * Create a new DMU object. 428168404Spjd */ 429168404Spjd /* 430168404Spjd * There's currently no mechanism for pre-reading the blocks that will 431168404Spjd * be to needed allocate a new object, so we accept the small chance 432168404Spjd * that there will be an i/o error and we will fail one of the 433168404Spjd * assertions below. 434168404Spjd */ 435168404Spjd if (vap->va_type == VDIR) { 436168404Spjd if (flag & IS_REPLAY) { 437168404Spjd err = zap_create_claim(zfsvfs->z_os, *oid, 438168404Spjd DMU_OT_DIRECTORY_CONTENTS, 439168404Spjd DMU_OT_ZNODE, sizeof (znode_phys_t) + bonuslen, tx); 440168404Spjd ASSERT3U(err, ==, 0); 441168404Spjd } else { 442168404Spjd *oid = zap_create(zfsvfs->z_os, 443168404Spjd DMU_OT_DIRECTORY_CONTENTS, 444168404Spjd DMU_OT_ZNODE, sizeof (znode_phys_t) + bonuslen, tx); 445168404Spjd } 446168404Spjd } else { 447168404Spjd if (flag & IS_REPLAY) { 448168404Spjd err = dmu_object_claim(zfsvfs->z_os, *oid, 449168404Spjd DMU_OT_PLAIN_FILE_CONTENTS, 0, 450168404Spjd DMU_OT_ZNODE, sizeof (znode_phys_t) + bonuslen, tx); 451168404Spjd ASSERT3U(err, ==, 0); 452168404Spjd } else { 453168404Spjd *oid = dmu_object_alloc(zfsvfs->z_os, 454168404Spjd DMU_OT_PLAIN_FILE_CONTENTS, 0, 455168404Spjd DMU_OT_ZNODE, sizeof (znode_phys_t) + bonuslen, tx); 456168404Spjd } 457168404Spjd } 458168404Spjd VERIFY(0 == dmu_bonus_hold(zfsvfs->z_os, *oid, NULL, &dbp)); 459168404Spjd dmu_buf_will_dirty(dbp, tx); 460168404Spjd 461168404Spjd /* 462168404Spjd * Initialize the znode physical data to zero. 463168404Spjd */ 464168404Spjd ASSERT(dbp->db_size >= sizeof (znode_phys_t)); 465168404Spjd bzero(dbp->db_data, dbp->db_size); 466168404Spjd pzp = dbp->db_data; 467168404Spjd 468168404Spjd /* 469168404Spjd * If this is the root, fix up the half-initialized parent pointer 470168404Spjd * to reference the just-allocated physical data area. 471168404Spjd */ 472168404Spjd if (flag & IS_ROOT_NODE) { 473168404Spjd dzp->z_phys = pzp; 474168404Spjd dzp->z_id = *oid; 475168404Spjd } 476168404Spjd 477168404Spjd /* 478168404Spjd * If parent is an xattr, so am I. 479168404Spjd */ 480168404Spjd if (dzp->z_phys->zp_flags & ZFS_XATTR) 481168404Spjd flag |= IS_XATTR; 482168404Spjd 483168404Spjd if (vap->va_type == VBLK || vap->va_type == VCHR) { 484168404Spjd pzp->zp_rdev = zfs_expldev(vap->va_rdev); 485168404Spjd } 486168404Spjd 487168404Spjd if (vap->va_type == VDIR) { 488168404Spjd pzp->zp_size = 2; /* contents ("." and "..") */ 489168404Spjd pzp->zp_links = (flag & (IS_ROOT_NODE | IS_XATTR)) ? 2 : 1; 490168404Spjd } 491168404Spjd 492168404Spjd pzp->zp_parent = dzp->z_id; 493168404Spjd if (flag & IS_XATTR) 494168404Spjd pzp->zp_flags |= ZFS_XATTR; 495168404Spjd 496168404Spjd pzp->zp_gen = gen; 497168404Spjd 498168404Spjd ZFS_TIME_ENCODE(&now, pzp->zp_crtime); 499168404Spjd ZFS_TIME_ENCODE(&now, pzp->zp_ctime); 500168404Spjd 501168404Spjd if (vap->va_mask & AT_ATIME) { 502168404Spjd ZFS_TIME_ENCODE(&vap->va_atime, pzp->zp_atime); 503168404Spjd } else { 504168404Spjd ZFS_TIME_ENCODE(&now, pzp->zp_atime); 505168404Spjd } 506168404Spjd 507168404Spjd if (vap->va_mask & AT_MTIME) { 508168404Spjd ZFS_TIME_ENCODE(&vap->va_mtime, pzp->zp_mtime); 509168404Spjd } else { 510168404Spjd ZFS_TIME_ENCODE(&now, pzp->zp_mtime); 511168404Spjd } 512168404Spjd 513168404Spjd pzp->zp_mode = MAKEIMODE(vap->va_type, vap->va_mode); 514168404Spjd zp = zfs_znode_alloc(zfsvfs, dbp, *oid, 0); 515168404Spjd 516168404Spjd zfs_perm_init(zp, dzp, flag, vap, tx, cr); 517168404Spjd 518168404Spjd if (zpp) { 519168404Spjd kmutex_t *hash_mtx = ZFS_OBJ_MUTEX(zp); 520168404Spjd 521168404Spjd mutex_enter(hash_mtx); 522168404Spjd zfs_znode_dmu_init(zp); 523168404Spjd mutex_exit(hash_mtx); 524168404Spjd 525168404Spjd *zpp = zp; 526168404Spjd } else { 527168404Spjd if (ZTOV(zp) != NULL) 528168404Spjd ZTOV(zp)->v_count = 0; 529168404Spjd dmu_buf_rele(dbp, NULL); 530168404Spjd zfs_znode_free(zp); 531168404Spjd } 532168404Spjd} 533168404Spjd 534168404Spjdint 535168404Spjdzfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp) 536168404Spjd{ 537168404Spjd dmu_object_info_t doi; 538168404Spjd dmu_buf_t *db; 539168404Spjd znode_t *zp; 540168404Spjd vnode_t *vp; 541168404Spjd int err; 542168404Spjd 543168404Spjd *zpp = NULL; 544168404Spjd 545168404Spjd ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num); 546168404Spjd 547168404Spjd err = dmu_bonus_hold(zfsvfs->z_os, obj_num, NULL, &db); 548168404Spjd if (err) { 549168404Spjd ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 550168404Spjd return (err); 551168404Spjd } 552168404Spjd 553168404Spjd dmu_object_info_from_db(db, &doi); 554168404Spjd if (doi.doi_bonus_type != DMU_OT_ZNODE || 555168404Spjd doi.doi_bonus_size < sizeof (znode_phys_t)) { 556168404Spjd dmu_buf_rele(db, NULL); 557168404Spjd ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 558168404Spjd return (EINVAL); 559168404Spjd } 560168404Spjd 561168404Spjd ASSERT(db->db_object == obj_num); 562168404Spjd ASSERT(db->db_offset == -1); 563168404Spjd ASSERT(db->db_data != NULL); 564168404Spjd 565168404Spjd zp = dmu_buf_get_user(db); 566168404Spjd 567168404Spjd if (zp != NULL) { 568168404Spjd mutex_enter(&zp->z_lock); 569168404Spjd 570168404Spjd ASSERT3U(zp->z_id, ==, obj_num); 571168404Spjd if (zp->z_unlinked) { 572168404Spjd dmu_buf_rele(db, NULL); 573168404Spjd mutex_exit(&zp->z_lock); 574168404Spjd ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 575168404Spjd return (ENOENT); 576168404Spjd } else if (zp->z_dbuf_held) { 577168404Spjd dmu_buf_rele(db, NULL); 578168404Spjd } else { 579168404Spjd zp->z_dbuf_held = 1; 580168404Spjd VFS_HOLD(zfsvfs->z_vfs); 581168404Spjd } 582168404Spjd 583168404Spjd if (ZTOV(zp) != NULL) 584168404Spjd VN_HOLD(ZTOV(zp)); 585168404Spjd else { 586168404Spjd err = getnewvnode("zfs", zfsvfs->z_vfs, &zfs_vnodeops, 587168404Spjd &zp->z_vnode); 588168404Spjd ASSERT(err == 0); 589168404Spjd vp = ZTOV(zp); 590168404Spjd vp->v_data = (caddr_t)zp; 591168404Spjd vhold(vp); 592168404Spjd vp->v_type = IFTOVT((mode_t)zp->z_phys->zp_mode); 593168404Spjd if (vp->v_type == VDIR) 594168404Spjd zp->z_zn_prefetch = B_TRUE; /* z_prefetch default is enabled */ 595168404Spjd err = insmntque(vp, zfsvfs->z_vfs); 596168404Spjd KASSERT(err == 0, ("insmntque() failed: error %d", err)); 597168404Spjd } 598168404Spjd mutex_exit(&zp->z_lock); 599168404Spjd ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 600168404Spjd *zpp = zp; 601168404Spjd return (0); 602168404Spjd } 603168404Spjd 604168404Spjd /* 605168404Spjd * Not found create new znode/vnode 606168404Spjd */ 607168404Spjd zp = zfs_znode_alloc(zfsvfs, db, obj_num, doi.doi_data_block_size); 608168404Spjd ASSERT3U(zp->z_id, ==, obj_num); 609168404Spjd zfs_znode_dmu_init(zp); 610168404Spjd ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 611168404Spjd *zpp = zp; 612168404Spjd return (0); 613168404Spjd} 614168404Spjd 615168404Spjdvoid 616168404Spjdzfs_znode_delete(znode_t *zp, dmu_tx_t *tx) 617168404Spjd{ 618168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 619168404Spjd int error; 620168404Spjd 621168404Spjd ZFS_OBJ_HOLD_ENTER(zfsvfs, zp->z_id); 622168404Spjd if (zp->z_phys->zp_acl.z_acl_extern_obj) { 623168404Spjd error = dmu_object_free(zfsvfs->z_os, 624168404Spjd zp->z_phys->zp_acl.z_acl_extern_obj, tx); 625168404Spjd ASSERT3U(error, ==, 0); 626168404Spjd } 627168404Spjd error = dmu_object_free(zfsvfs->z_os, zp->z_id, tx); 628168404Spjd ASSERT3U(error, ==, 0); 629168404Spjd zp->z_dbuf_held = 0; 630168404Spjd ZFS_OBJ_HOLD_EXIT(zfsvfs, zp->z_id); 631168404Spjd dmu_buf_rele(zp->z_dbuf, NULL); 632168404Spjd} 633168404Spjd 634168404Spjdvoid 635168404Spjdzfs_zinactive(znode_t *zp) 636168404Spjd{ 637168404Spjd vnode_t *vp = ZTOV(zp); 638168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 639168404Spjd uint64_t z_id = zp->z_id; 640168404Spjd 641168404Spjd ASSERT(zp->z_dbuf_held && zp->z_phys); 642168404Spjd 643168404Spjd /* 644168404Spjd * Don't allow a zfs_zget() while were trying to release this znode 645168404Spjd */ 646168404Spjd ZFS_OBJ_HOLD_ENTER(zfsvfs, z_id); 647168404Spjd 648168404Spjd mutex_enter(&zp->z_lock); 649168404Spjd VI_LOCK(vp); 650168404Spjd if (vp->v_count > 0) { 651168404Spjd /* 652168404Spjd * If the hold count is greater than zero, somebody has 653168404Spjd * obtained a new reference on this znode while we were 654168404Spjd * processing it here, so we are done. 655168404Spjd */ 656168404Spjd VI_UNLOCK(vp); 657168404Spjd mutex_exit(&zp->z_lock); 658168404Spjd ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id); 659168404Spjd return; 660168404Spjd } 661168404Spjd VI_UNLOCK(vp); 662168404Spjd 663168404Spjd /* 664168404Spjd * If this was the last reference to a file with no links, 665168404Spjd * remove the file from the file system. 666168404Spjd */ 667168404Spjd if (zp->z_unlinked) { 668168404Spjd ZTOV(zp) = NULL; 669168404Spjd mutex_exit(&zp->z_lock); 670168404Spjd ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id); 671168404Spjd ASSERT(vp->v_count == 0); 672168404Spjd vrecycle(vp, curthread); 673168404Spjd zfs_rmnode(zp); 674168404Spjd VFS_RELE(zfsvfs->z_vfs); 675168404Spjd return; 676168404Spjd } 677168404Spjd ASSERT(zp->z_phys); 678168404Spjd ASSERT(zp->z_dbuf_held); 679168404Spjd 680168404Spjd zp->z_dbuf_held = 0; 681168404Spjd mutex_exit(&zp->z_lock); 682168404Spjd dmu_buf_rele(zp->z_dbuf, NULL); 683168404Spjd ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id); 684168404Spjd VFS_RELE(zfsvfs->z_vfs); 685168404Spjd} 686168404Spjd 687168404Spjd/* 688168404Spjd * FreeBSD: Should be called from ->vop_reclaim(). 689168404Spjd */ 690168404Spjdvoid 691168404Spjdzfs_znode_free(znode_t *zp) 692168404Spjd{ 693168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 694168404Spjd 695168404Spjd mutex_enter(&zfsvfs->z_znodes_lock); 696168404Spjd list_remove(&zfsvfs->z_all_znodes, zp); 697168404Spjd mutex_exit(&zfsvfs->z_znodes_lock); 698168404Spjd 699168404Spjd kmem_cache_free(znode_cache, zp); 700168404Spjd} 701168404Spjd 702168404Spjdvoid 703168404Spjdzfs_time_stamper_locked(znode_t *zp, uint_t flag, dmu_tx_t *tx) 704168404Spjd{ 705168404Spjd timestruc_t now; 706168404Spjd 707168404Spjd ASSERT(MUTEX_HELD(&zp->z_lock)); 708168404Spjd 709168404Spjd gethrestime(&now); 710168404Spjd 711168404Spjd if (tx) { 712168404Spjd dmu_buf_will_dirty(zp->z_dbuf, tx); 713168404Spjd zp->z_atime_dirty = 0; 714168404Spjd zp->z_seq++; 715168404Spjd } else { 716168404Spjd zp->z_atime_dirty = 1; 717168404Spjd } 718168404Spjd 719168404Spjd if (flag & AT_ATIME) 720168404Spjd ZFS_TIME_ENCODE(&now, zp->z_phys->zp_atime); 721168404Spjd 722168404Spjd if (flag & AT_MTIME) 723168404Spjd ZFS_TIME_ENCODE(&now, zp->z_phys->zp_mtime); 724168404Spjd 725168404Spjd if (flag & AT_CTIME) 726168404Spjd ZFS_TIME_ENCODE(&now, zp->z_phys->zp_ctime); 727168404Spjd} 728168404Spjd 729168404Spjd/* 730168404Spjd * Update the requested znode timestamps with the current time. 731168404Spjd * If we are in a transaction, then go ahead and mark the znode 732168404Spjd * dirty in the transaction so the timestamps will go to disk. 733168404Spjd * Otherwise, we will get pushed next time the znode is updated 734168404Spjd * in a transaction, or when this znode eventually goes inactive. 735168404Spjd * 736168404Spjd * Why is this OK? 737168404Spjd * 1 - Only the ACCESS time is ever updated outside of a transaction. 738168404Spjd * 2 - Multiple consecutive updates will be collapsed into a single 739168404Spjd * znode update by the transaction grouping semantics of the DMU. 740168404Spjd */ 741168404Spjdvoid 742168404Spjdzfs_time_stamper(znode_t *zp, uint_t flag, dmu_tx_t *tx) 743168404Spjd{ 744168404Spjd mutex_enter(&zp->z_lock); 745168404Spjd zfs_time_stamper_locked(zp, flag, tx); 746168404Spjd mutex_exit(&zp->z_lock); 747168404Spjd} 748168404Spjd 749168404Spjd/* 750168404Spjd * Grow the block size for a file. 751168404Spjd * 752168404Spjd * IN: zp - znode of file to free data in. 753168404Spjd * size - requested block size 754168404Spjd * tx - open transaction. 755168404Spjd * 756168404Spjd * NOTE: this function assumes that the znode is write locked. 757168404Spjd */ 758168404Spjdvoid 759168404Spjdzfs_grow_blocksize(znode_t *zp, uint64_t size, dmu_tx_t *tx) 760168404Spjd{ 761168404Spjd int error; 762168404Spjd u_longlong_t dummy; 763168404Spjd 764168404Spjd if (size <= zp->z_blksz) 765168404Spjd return; 766168404Spjd /* 767168404Spjd * If the file size is already greater than the current blocksize, 768168404Spjd * we will not grow. If there is more than one block in a file, 769168404Spjd * the blocksize cannot change. 770168404Spjd */ 771168404Spjd if (zp->z_blksz && zp->z_phys->zp_size > zp->z_blksz) 772168404Spjd return; 773168404Spjd 774168404Spjd error = dmu_object_set_blocksize(zp->z_zfsvfs->z_os, zp->z_id, 775168404Spjd size, 0, tx); 776168404Spjd if (error == ENOTSUP) 777168404Spjd return; 778168404Spjd ASSERT3U(error, ==, 0); 779168404Spjd 780168404Spjd /* What blocksize did we actually get? */ 781168404Spjd dmu_object_size_from_db(zp->z_dbuf, &zp->z_blksz, &dummy); 782168404Spjd} 783168404Spjd 784168404Spjd/* 785168404Spjd * Free space in a file. 786168404Spjd * 787168404Spjd * IN: zp - znode of file to free data in. 788168404Spjd * off - start of section to free. 789168404Spjd * len - length of section to free (0 => to EOF). 790168404Spjd * flag - current file open mode flags. 791168404Spjd * 792168404Spjd * RETURN: 0 if success 793168404Spjd * error code if failure 794168404Spjd */ 795168404Spjdint 796168404Spjdzfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log) 797168404Spjd{ 798168404Spjd vnode_t *vp = ZTOV(zp); 799168404Spjd dmu_tx_t *tx; 800168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 801168404Spjd zilog_t *zilog = zfsvfs->z_log; 802168404Spjd rl_t *rl; 803168404Spjd uint64_t end = off + len; 804168404Spjd uint64_t size, new_blksz; 805168404Spjd int error; 806168404Spjd 807168404Spjd if (ZTOV(zp)->v_type == VFIFO) 808168404Spjd return (0); 809168404Spjd 810168404Spjd /* 811168404Spjd * If we will change zp_size then lock the whole file, 812168404Spjd * otherwise just lock the range being freed. 813168404Spjd */ 814168404Spjd if (len == 0 || off + len > zp->z_phys->zp_size) { 815168404Spjd rl = zfs_range_lock(zp, 0, UINT64_MAX, RL_WRITER); 816168404Spjd } else { 817168404Spjd rl = zfs_range_lock(zp, off, len, RL_WRITER); 818168404Spjd /* recheck, in case zp_size changed */ 819168404Spjd if (off + len > zp->z_phys->zp_size) { 820168404Spjd /* lost race: file size changed, lock whole file */ 821168404Spjd zfs_range_unlock(rl); 822168404Spjd rl = zfs_range_lock(zp, 0, UINT64_MAX, RL_WRITER); 823168404Spjd } 824168404Spjd } 825168404Spjd 826168404Spjd /* 827168404Spjd * Nothing to do if file already at desired length. 828168404Spjd */ 829168404Spjd size = zp->z_phys->zp_size; 830168404Spjd if (len == 0 && size == off) { 831168404Spjd zfs_range_unlock(rl); 832168404Spjd return (0); 833168404Spjd } 834168404Spjd 835168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 836168404Spjd dmu_tx_hold_bonus(tx, zp->z_id); 837168404Spjd new_blksz = 0; 838168404Spjd if (end > size && 839168404Spjd (!ISP2(zp->z_blksz) || zp->z_blksz < zfsvfs->z_max_blksz)) { 840168404Spjd /* 841168404Spjd * We are growing the file past the current block size. 842168404Spjd */ 843168404Spjd if (zp->z_blksz > zp->z_zfsvfs->z_max_blksz) { 844168404Spjd ASSERT(!ISP2(zp->z_blksz)); 845168404Spjd new_blksz = MIN(end, SPA_MAXBLOCKSIZE); 846168404Spjd } else { 847168404Spjd new_blksz = MIN(end, zp->z_zfsvfs->z_max_blksz); 848168404Spjd } 849168404Spjd dmu_tx_hold_write(tx, zp->z_id, 0, MIN(end, new_blksz)); 850168404Spjd } else if (off < size) { 851168404Spjd /* 852168404Spjd * If len == 0, we are truncating the file. 853168404Spjd */ 854168404Spjd dmu_tx_hold_free(tx, zp->z_id, off, len ? len : DMU_OBJECT_END); 855168404Spjd } 856168404Spjd 857168404Spjd error = dmu_tx_assign(tx, zfsvfs->z_assign); 858168404Spjd if (error) { 859168404Spjd if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) 860168404Spjd dmu_tx_wait(tx); 861168404Spjd dmu_tx_abort(tx); 862168404Spjd zfs_range_unlock(rl); 863168404Spjd return (error); 864168404Spjd } 865168404Spjd 866168404Spjd if (new_blksz) 867168404Spjd zfs_grow_blocksize(zp, new_blksz, tx); 868168404Spjd 869168404Spjd if (end > size || len == 0) 870168404Spjd zp->z_phys->zp_size = end; 871168404Spjd 872168404Spjd if (off < size) { 873168404Spjd objset_t *os = zfsvfs->z_os; 874168404Spjd uint64_t rlen = len; 875168404Spjd 876168404Spjd if (len == 0) 877168404Spjd rlen = -1; 878168404Spjd else if (end > size) 879168404Spjd rlen = size - off; 880168404Spjd VERIFY(0 == dmu_free_range(os, zp->z_id, off, rlen, tx)); 881168404Spjd } 882168404Spjd 883168404Spjd if (log) { 884168404Spjd zfs_time_stamper(zp, CONTENT_MODIFIED, tx); 885168404Spjd zfs_log_truncate(zilog, tx, TX_TRUNCATE, zp, off, len); 886168404Spjd } 887168404Spjd 888168404Spjd zfs_range_unlock(rl); 889168404Spjd 890168404Spjd dmu_tx_commit(tx); 891168404Spjd 892168404Spjd /* 893168404Spjd * Clear any mapped pages in the truncated region. This has to 894168404Spjd * happen outside of the transaction to avoid the possibility of 895168404Spjd * a deadlock with someone trying to push a page that we are 896168404Spjd * about to invalidate. 897168404Spjd */ 898168404Spjd rw_enter(&zp->z_map_lock, RW_WRITER); 899168404Spjd if (end > size) 900168404Spjd vnode_pager_setsize(vp, end); 901168404Spjd else if (len == 0) { 902168404Spjd#if 0 903168404Spjd error = vtruncbuf(vp, curthread->td_ucred, curthread, end, PAGE_SIZE); 904168404Spjd#else 905168404Spjd error = vinvalbuf(vp, V_SAVE, curthread, 0, 0); 906168404Spjd vnode_pager_setsize(vp, end); 907168404Spjd#endif 908168404Spjd } 909168404Spjd rw_exit(&zp->z_map_lock); 910168404Spjd 911168404Spjd return (0); 912168404Spjd} 913168404Spjd 914168404Spjdvoid 915168404Spjdzfs_create_fs(objset_t *os, cred_t *cr, dmu_tx_t *tx) 916168404Spjd{ 917168404Spjd zfsvfs_t zfsvfs; 918168404Spjd uint64_t moid, doid, roid = 0; 919168404Spjd uint64_t version = ZPL_VERSION; 920168404Spjd int error; 921168404Spjd znode_t *rootzp = NULL; 922168404Spjd vattr_t vattr; 923168404Spjd 924168404Spjd /* 925168404Spjd * First attempt to create master node. 926168404Spjd */ 927168404Spjd /* 928168404Spjd * In an empty objset, there are no blocks to read and thus 929168404Spjd * there can be no i/o errors (which we assert below). 930168404Spjd */ 931168404Spjd moid = MASTER_NODE_OBJ; 932168404Spjd error = zap_create_claim(os, moid, DMU_OT_MASTER_NODE, 933168404Spjd DMU_OT_NONE, 0, tx); 934168404Spjd ASSERT(error == 0); 935168404Spjd 936168404Spjd /* 937168404Spjd * Set starting attributes. 938168404Spjd */ 939168404Spjd 940168404Spjd error = zap_update(os, moid, ZPL_VERSION_OBJ, 8, 1, &version, tx); 941168404Spjd ASSERT(error == 0); 942168404Spjd 943168404Spjd /* 944168404Spjd * Create a delete queue. 945168404Spjd */ 946168404Spjd doid = zap_create(os, DMU_OT_UNLINKED_SET, DMU_OT_NONE, 0, tx); 947168404Spjd 948168404Spjd error = zap_add(os, moid, ZFS_UNLINKED_SET, 8, 1, &doid, tx); 949168404Spjd ASSERT(error == 0); 950168404Spjd 951168404Spjd /* 952168404Spjd * Create root znode. Create minimal znode/vnode/zfsvfs 953168404Spjd * to allow zfs_mknode to work. 954168404Spjd */ 955168404Spjd vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE; 956168404Spjd vattr.va_type = VDIR; 957168404Spjd vattr.va_mode = S_IFDIR|0755; 958168404Spjd vattr.va_uid = UID_ROOT; 959168404Spjd vattr.va_gid = GID_WHEEL; 960168404Spjd 961168404Spjd rootzp = kmem_cache_alloc(znode_cache, KM_SLEEP); 962168404Spjd zfs_znode_cache_constructor(rootzp, NULL, 0); 963168404Spjd rootzp->z_zfsvfs = &zfsvfs; 964168404Spjd rootzp->z_unlinked = 0; 965168404Spjd rootzp->z_atime_dirty = 0; 966168404Spjd rootzp->z_dbuf_held = 0; 967168404Spjd 968168404Spjd bzero(&zfsvfs, sizeof (zfsvfs_t)); 969168404Spjd 970168404Spjd zfsvfs.z_os = os; 971168404Spjd zfsvfs.z_assign = TXG_NOWAIT; 972168404Spjd zfsvfs.z_parent = &zfsvfs; 973168404Spjd 974168404Spjd mutex_init(&zfsvfs.z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); 975168404Spjd list_create(&zfsvfs.z_all_znodes, sizeof (znode_t), 976168404Spjd offsetof(znode_t, z_link_node)); 977168404Spjd 978168404Spjd zfs_mknode(rootzp, &vattr, &roid, tx, cr, IS_ROOT_NODE, NULL, 0); 979168404Spjd ASSERT3U(rootzp->z_id, ==, roid); 980168404Spjd error = zap_add(os, moid, ZFS_ROOT_OBJ, 8, 1, &roid, tx); 981168404Spjd ASSERT(error == 0); 982168404Spjd 983168404Spjd kmem_cache_free(znode_cache, rootzp); 984168404Spjd} 985168404Spjd#endif /* _KERNEL */ 986168404Spjd 987168404Spjd/* 988168404Spjd * Given an object number, return its parent object number and whether 989168404Spjd * or not the object is an extended attribute directory. 990168404Spjd */ 991168404Spjdstatic int 992168404Spjdzfs_obj_to_pobj(objset_t *osp, uint64_t obj, uint64_t *pobjp, int *is_xattrdir) 993168404Spjd{ 994168404Spjd dmu_buf_t *db; 995168404Spjd dmu_object_info_t doi; 996168404Spjd znode_phys_t *zp; 997168404Spjd int error; 998168404Spjd 999168404Spjd if ((error = dmu_bonus_hold(osp, obj, FTAG, &db)) != 0) 1000168404Spjd return (error); 1001168404Spjd 1002168404Spjd dmu_object_info_from_db(db, &doi); 1003168404Spjd if (doi.doi_bonus_type != DMU_OT_ZNODE || 1004168404Spjd doi.doi_bonus_size < sizeof (znode_phys_t)) { 1005168404Spjd dmu_buf_rele(db, FTAG); 1006168404Spjd return (EINVAL); 1007168404Spjd } 1008168404Spjd 1009168404Spjd zp = db->db_data; 1010168404Spjd *pobjp = zp->zp_parent; 1011168404Spjd *is_xattrdir = ((zp->zp_flags & ZFS_XATTR) != 0) && 1012168404Spjd S_ISDIR(zp->zp_mode); 1013168404Spjd dmu_buf_rele(db, FTAG); 1014168404Spjd 1015168404Spjd return (0); 1016168404Spjd} 1017168404Spjd 1018168404Spjdint 1019168404Spjdzfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len) 1020168404Spjd{ 1021168404Spjd char *path = buf + len - 1; 1022168404Spjd int error; 1023168404Spjd 1024168404Spjd *path = '\0'; 1025168404Spjd 1026168404Spjd for (;;) { 1027168404Spjd uint64_t pobj; 1028168404Spjd char component[MAXNAMELEN + 2]; 1029168404Spjd size_t complen; 1030168404Spjd int is_xattrdir; 1031168404Spjd 1032168404Spjd if ((error = zfs_obj_to_pobj(osp, obj, &pobj, 1033168404Spjd &is_xattrdir)) != 0) 1034168404Spjd break; 1035168404Spjd 1036168404Spjd if (pobj == obj) { 1037168404Spjd if (path[0] != '/') 1038168404Spjd *--path = '/'; 1039168404Spjd break; 1040168404Spjd } 1041168404Spjd 1042168404Spjd component[0] = '/'; 1043168404Spjd if (is_xattrdir) { 1044168404Spjd (void) sprintf(component + 1, "<xattrdir>"); 1045168404Spjd } else { 1046168404Spjd error = zap_value_search(osp, pobj, obj, component + 1); 1047168404Spjd if (error != 0) 1048168404Spjd break; 1049168404Spjd } 1050168404Spjd 1051168404Spjd complen = strlen(component); 1052168404Spjd path -= complen; 1053168404Spjd ASSERT(path >= buf); 1054168404Spjd bcopy(component, path, complen); 1055168404Spjd obj = pobj; 1056168404Spjd } 1057168404Spjd 1058168404Spjd if (error == 0) 1059168404Spjd (void) memmove(buf, path, buf + len - path); 1060168404Spjd return (error); 1061168404Spjd} 1062