zfs_znode.c revision 168958
1168404Spjd/* 2168404Spjd * CDDL HEADER START 3168404Spjd * 4168404Spjd * The contents of this file are subject to the terms of the 5168404Spjd * Common Development and Distribution License (the "License"). 6168404Spjd * You may not use this file except in compliance with the License. 7168404Spjd * 8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9168404Spjd * or http://www.opensolaris.org/os/licensing. 10168404Spjd * See the License for the specific language governing permissions 11168404Spjd * and limitations under the License. 12168404Spjd * 13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each 14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15168404Spjd * If applicable, add the following below this CDDL HEADER, with the 16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18168404Spjd * 19168404Spjd * CDDL HEADER END 20168404Spjd */ 21168404Spjd/* 22168404Spjd * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23168404Spjd * Use is subject to license terms. 24168404Spjd */ 25168404Spjd 26168404Spjd#pragma ident "%Z%%M% %I% %E% SMI" 27168404Spjd 28168404Spjd#ifdef _KERNEL 29168404Spjd#include <sys/types.h> 30168404Spjd#include <sys/param.h> 31168404Spjd#include <sys/time.h> 32168404Spjd#include <sys/systm.h> 33168404Spjd#include <sys/sysmacros.h> 34168404Spjd#include <sys/resource.h> 35168404Spjd#include <sys/mntent.h> 36168404Spjd#include <sys/vfs.h> 37168404Spjd#include <sys/vnode.h> 38168404Spjd#include <sys/file.h> 39168404Spjd#include <sys/kmem.h> 40168404Spjd#include <sys/cmn_err.h> 41168404Spjd#include <sys/errno.h> 42168404Spjd#include <sys/unistd.h> 43168404Spjd#include <sys/atomic.h> 44168404Spjd#include <sys/zfs_dir.h> 45168404Spjd#include <sys/zfs_acl.h> 46168404Spjd#include <sys/zfs_ioctl.h> 47168404Spjd#include <sys/zfs_rlock.h> 48168404Spjd#include <sys/fs/zfs.h> 49168404Spjd#endif /* _KERNEL */ 50168404Spjd 51168404Spjd#include <sys/dmu.h> 52168404Spjd#include <sys/refcount.h> 53168404Spjd#include <sys/stat.h> 54168404Spjd#include <sys/zap.h> 55168404Spjd#include <sys/zfs_znode.h> 56168404Spjd#include <sys/refcount.h> 57168404Spjd 58168404Spjd/* 59168404Spjd * Functions needed for userland (ie: libzpool) are not put under 60168404Spjd * #ifdef_KERNEL; the rest of the functions have dependencies 61168404Spjd * (such as VFS logic) that will not compile easily in userland. 62168404Spjd */ 63168404Spjd#ifdef _KERNEL 64168404Spjdstruct kmem_cache *znode_cache = NULL; 65168404Spjd 66168404Spjd/*ARGSUSED*/ 67168404Spjdstatic void 68168404Spjdznode_pageout_func(dmu_buf_t *dbuf, void *user_ptr) 69168404Spjd{ 70168404Spjd znode_t *zp = user_ptr; 71168488Spjd vnode_t *vp; 72168404Spjd 73168404Spjd mutex_enter(&zp->z_lock); 74168488Spjd vp = ZTOV(zp); 75168404Spjd if (vp == NULL) { 76168404Spjd mutex_exit(&zp->z_lock); 77168404Spjd zfs_znode_free(zp); 78168404Spjd } else if (vp->v_count == 0) { 79168404Spjd ZTOV(zp) = NULL; 80168488Spjd vhold(vp); 81168404Spjd mutex_exit(&zp->z_lock); 82168404Spjd vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, curthread); 83168404Spjd vrecycle(vp, curthread); 84168404Spjd VOP_UNLOCK(vp, 0, curthread); 85168404Spjd vdrop(vp); 86168404Spjd zfs_znode_free(zp); 87168404Spjd } else { 88168404Spjd /* signal force unmount that this znode can be freed */ 89168404Spjd zp->z_dbuf = NULL; 90168404Spjd mutex_exit(&zp->z_lock); 91168404Spjd } 92168404Spjd} 93168404Spjd 94168404Spjdextern struct vop_vector zfs_vnodeops; 95168404Spjdextern struct vop_vector zfs_fifoops; 96168404Spjd 97168404Spjd/* 98168404Spjd * XXX: We cannot use this function as a cache constructor, because 99168404Spjd * there is one global cache for all file systems and we need 100168404Spjd * to pass vfsp here, which is not possible, because argument 101168404Spjd * 'cdrarg' is defined at kmem_cache_create() time. 102168404Spjd */ 103168404Spjdstatic int 104168404Spjdzfs_znode_cache_constructor(void *buf, void *cdrarg, int kmflags) 105168404Spjd{ 106168404Spjd znode_t *zp = buf; 107168404Spjd vfs_t *vfsp = cdrarg; 108168404Spjd int error; 109168404Spjd 110168404Spjd if (cdrarg != NULL) { 111168404Spjd error = getnewvnode("zfs", vfsp, &zfs_vnodeops, &zp->z_vnode); 112168404Spjd ASSERT(error == 0); 113168404Spjd zp->z_vnode->v_data = (caddr_t)zp; 114168404Spjd vhold(zp->z_vnode); 115168404Spjd } else { 116168404Spjd zp->z_vnode = NULL; 117168404Spjd } 118168404Spjd mutex_init(&zp->z_lock, NULL, MUTEX_DEFAULT, NULL); 119168404Spjd rw_init(&zp->z_map_lock, NULL, RW_DEFAULT, NULL); 120168404Spjd rw_init(&zp->z_parent_lock, NULL, RW_DEFAULT, NULL); 121168404Spjd rw_init(&zp->z_name_lock, NULL, RW_DEFAULT, NULL); 122168404Spjd mutex_init(&zp->z_acl_lock, NULL, MUTEX_DEFAULT, NULL); 123168404Spjd 124168404Spjd mutex_init(&zp->z_range_lock, NULL, MUTEX_DEFAULT, NULL); 125168404Spjd avl_create(&zp->z_range_avl, zfs_range_compare, 126168404Spjd sizeof (rl_t), offsetof(rl_t, r_node)); 127168404Spjd 128168404Spjd zp->z_dbuf_held = 0; 129168404Spjd zp->z_dirlocks = 0; 130168404Spjd zp->z_lockf = NULL; 131168404Spjd return (0); 132168404Spjd} 133168404Spjd 134168404Spjd/*ARGSUSED*/ 135168404Spjdstatic void 136168404Spjdzfs_znode_cache_destructor(void *buf, void *cdarg) 137168404Spjd{ 138168404Spjd znode_t *zp = buf; 139168404Spjd 140168404Spjd ASSERT(zp->z_dirlocks == 0); 141168404Spjd mutex_destroy(&zp->z_lock); 142168404Spjd rw_destroy(&zp->z_map_lock); 143168404Spjd rw_destroy(&zp->z_parent_lock); 144168404Spjd rw_destroy(&zp->z_name_lock); 145168404Spjd mutex_destroy(&zp->z_acl_lock); 146168404Spjd mutex_destroy(&zp->z_range_lock); 147168404Spjd avl_destroy(&zp->z_range_avl); 148168404Spjd 149168404Spjd ASSERT(zp->z_dbuf_held == 0); 150168404Spjd} 151168404Spjd 152168404Spjdvoid 153168404Spjdzfs_znode_init(void) 154168404Spjd{ 155168404Spjd /* 156168404Spjd * Initialize zcache 157168404Spjd */ 158168404Spjd ASSERT(znode_cache == NULL); 159168404Spjd znode_cache = kmem_cache_create("zfs_znode_cache", 160168404Spjd sizeof (znode_t), 0, /* zfs_znode_cache_constructor */ NULL, 161168404Spjd zfs_znode_cache_destructor, NULL, NULL, NULL, 0); 162168404Spjd} 163168404Spjd 164168404Spjdvoid 165168404Spjdzfs_znode_fini(void) 166168404Spjd{ 167168404Spjd /* 168168404Spjd * Cleanup zcache 169168404Spjd */ 170168404Spjd if (znode_cache) 171168404Spjd kmem_cache_destroy(znode_cache); 172168404Spjd znode_cache = NULL; 173168404Spjd} 174168404Spjd 175168404Spjd/* 176168404Spjd * zfs_init_fs - Initialize the zfsvfs struct and the file system 177168404Spjd * incore "master" object. Verify version compatibility. 178168404Spjd */ 179168404Spjdint 180168404Spjdzfs_init_fs(zfsvfs_t *zfsvfs, znode_t **zpp, cred_t *cr) 181168404Spjd{ 182168404Spjd objset_t *os = zfsvfs->z_os; 183168404Spjd uint64_t version = ZPL_VERSION; 184168404Spjd int i, error; 185168404Spjd dmu_object_info_t doi; 186168404Spjd uint64_t fsid_guid; 187168404Spjd 188168404Spjd *zpp = NULL; 189168404Spjd 190168404Spjd /* 191168404Spjd * XXX - hack to auto-create the pool root filesystem at 192168404Spjd * the first attempted mount. 193168404Spjd */ 194168404Spjd if (dmu_object_info(os, MASTER_NODE_OBJ, &doi) == ENOENT) { 195168404Spjd dmu_tx_t *tx = dmu_tx_create(os); 196168404Spjd 197168404Spjd dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, TRUE, NULL); /* master */ 198168404Spjd dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, TRUE, NULL); /* del queue */ 199168404Spjd dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); /* root node */ 200168404Spjd error = dmu_tx_assign(tx, TXG_WAIT); 201168404Spjd ASSERT3U(error, ==, 0); 202168404Spjd zfs_create_fs(os, cr, tx); 203168404Spjd dmu_tx_commit(tx); 204168404Spjd } 205168404Spjd 206168404Spjd error = zap_lookup(os, MASTER_NODE_OBJ, ZPL_VERSION_OBJ, 8, 1, 207168404Spjd &version); 208168404Spjd if (error) { 209168404Spjd return (error); 210168404Spjd } else if (version != ZPL_VERSION) { 211168404Spjd (void) printf("Mismatched versions: File system " 212168404Spjd "is version %lld on-disk format, which is " 213168404Spjd "incompatible with this software version %lld!", 214168404Spjd (u_longlong_t)version, ZPL_VERSION); 215168404Spjd return (ENOTSUP); 216168404Spjd } 217168404Spjd 218168404Spjd /* 219168404Spjd * The fsid is 64 bits, composed of an 8-bit fs type, which 220168404Spjd * separates our fsid from any other filesystem types, and a 221168404Spjd * 56-bit objset unique ID. The objset unique ID is unique to 222168404Spjd * all objsets open on this system, provided by unique_create(). 223168404Spjd * The 8-bit fs type must be put in the low bits of fsid[1] 224168404Spjd * because that's where other Solaris filesystems put it. 225168404Spjd */ 226168404Spjd fsid_guid = dmu_objset_fsid_guid(os); 227168404Spjd ASSERT((fsid_guid & ~((1ULL<<56)-1)) == 0); 228168404Spjd zfsvfs->z_vfs->vfs_fsid.val[0] = fsid_guid; 229168404Spjd zfsvfs->z_vfs->vfs_fsid.val[1] = ((fsid_guid>>32) << 8) | 230168404Spjd zfsvfs->z_vfs->mnt_vfc->vfc_typenum & 0xFF; 231168404Spjd 232168404Spjd error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1, 233168404Spjd &zfsvfs->z_root); 234168404Spjd if (error) 235168404Spjd return (error); 236168404Spjd ASSERT(zfsvfs->z_root != 0); 237168404Spjd 238168404Spjd /* 239168404Spjd * Create the per mount vop tables. 240168404Spjd */ 241168404Spjd 242168404Spjd /* 243168404Spjd * Initialize zget mutex's 244168404Spjd */ 245168404Spjd for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) 246168404Spjd mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL); 247168404Spjd 248168404Spjd error = zfs_zget(zfsvfs, zfsvfs->z_root, zpp); 249168404Spjd if (error) 250168404Spjd return (error); 251168404Spjd ASSERT3U((*zpp)->z_id, ==, zfsvfs->z_root); 252168404Spjd 253168404Spjd error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1, 254168404Spjd &zfsvfs->z_unlinkedobj); 255168404Spjd if (error) 256168404Spjd return (error); 257168404Spjd 258168404Spjd return (0); 259168404Spjd} 260168404Spjd 261168404Spjd/* 262168404Spjd * define a couple of values we need available 263168404Spjd * for both 64 and 32 bit environments. 264168404Spjd */ 265168404Spjd#ifndef NBITSMINOR64 266168404Spjd#define NBITSMINOR64 32 267168404Spjd#endif 268168404Spjd#ifndef MAXMAJ64 269168404Spjd#define MAXMAJ64 0xffffffffUL 270168404Spjd#endif 271168404Spjd#ifndef MAXMIN64 272168404Spjd#define MAXMIN64 0xffffffffUL 273168404Spjd#endif 274168958Spjd#ifndef major 275168958Spjd#define major(x) ((int)(((u_int)(x) >> 8)&0xff)) /* major number */ 276168958Spjd#endif 277168958Spjd#ifndef minor 278168958Spjd#define minor(x) ((int)((x)&0xffff00ff)) /* minor number */ 279168958Spjd#endif 280168404Spjd 281168404Spjd/* 282168404Spjd * Create special expldev for ZFS private use. 283168404Spjd * Can't use standard expldev since it doesn't do 284168404Spjd * what we want. The standard expldev() takes a 285168404Spjd * dev32_t in LP64 and expands it to a long dev_t. 286168404Spjd * We need an interface that takes a dev32_t in ILP32 287168404Spjd * and expands it to a long dev_t. 288168404Spjd */ 289168404Spjdstatic uint64_t 290168404Spjdzfs_expldev(dev_t dev) 291168404Spjd{ 292168958Spjd return (((uint64_t)major(dev) << NBITSMINOR64) | minor(dev)); 293168404Spjd} 294168404Spjd/* 295168404Spjd * Special cmpldev for ZFS private use. 296168404Spjd * Can't use standard cmpldev since it takes 297168404Spjd * a long dev_t and compresses it to dev32_t in 298168404Spjd * LP64. We need to do a compaction of a long dev_t 299168404Spjd * to a dev32_t in ILP32. 300168404Spjd */ 301168404Spjddev_t 302168404Spjdzfs_cmpldev(uint64_t dev) 303168404Spjd{ 304168958Spjd return (makedev((dev >> NBITSMINOR64), (dev & MAXMIN64))); 305168404Spjd} 306168404Spjd 307168404Spjd/* 308168404Spjd * Construct a new znode/vnode and intialize. 309168404Spjd * 310168404Spjd * This does not do a call to dmu_set_user() that is 311168404Spjd * up to the caller to do, in case you don't want to 312168404Spjd * return the znode 313168404Spjd */ 314168404Spjdstatic znode_t * 315168404Spjdzfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, uint64_t obj_num, int blksz) 316168404Spjd{ 317168404Spjd znode_t *zp; 318168404Spjd vnode_t *vp; 319168404Spjd int error; 320168404Spjd 321168404Spjd zp = kmem_cache_alloc(znode_cache, KM_SLEEP); 322168404Spjd zfs_znode_cache_constructor(zp, zfsvfs->z_vfs, 0); 323168404Spjd 324168404Spjd ASSERT(zp->z_dirlocks == NULL); 325168404Spjd 326168404Spjd zp->z_phys = db->db_data; 327168404Spjd zp->z_zfsvfs = zfsvfs; 328168404Spjd zp->z_unlinked = 0; 329168404Spjd zp->z_atime_dirty = 0; 330168404Spjd zp->z_dbuf_held = 0; 331168404Spjd zp->z_mapcnt = 0; 332168404Spjd zp->z_last_itx = 0; 333168404Spjd zp->z_dbuf = db; 334168404Spjd zp->z_id = obj_num; 335168404Spjd zp->z_blksz = blksz; 336168404Spjd zp->z_seq = 0x7A4653; 337168404Spjd zp->z_sync_cnt = 0; 338168404Spjd 339168404Spjd mutex_enter(&zfsvfs->z_znodes_lock); 340168404Spjd list_insert_tail(&zfsvfs->z_all_znodes, zp); 341168404Spjd mutex_exit(&zfsvfs->z_znodes_lock); 342168404Spjd 343168404Spjd vp = ZTOV(zp); 344168404Spjd if (vp == NULL) 345168404Spjd return (zp); 346168404Spjd 347168404Spjd error = insmntque(vp, zfsvfs->z_vfs); 348168404Spjd KASSERT(error == 0, ("insmntque() failed: error %d", error)); 349168404Spjd 350168404Spjd vp->v_type = IFTOVT((mode_t)zp->z_phys->zp_mode); 351168404Spjd switch (vp->v_type) { 352168404Spjd case VDIR: 353168404Spjd zp->z_zn_prefetch = B_TRUE; /* z_prefetch default is enabled */ 354168404Spjd break; 355168404Spjd case VFIFO: 356168404Spjd vp->v_op = &zfs_fifoops; 357168404Spjd break; 358168404Spjd } 359168404Spjd 360168404Spjd return (zp); 361168404Spjd} 362168404Spjd 363168404Spjdstatic void 364168404Spjdzfs_znode_dmu_init(znode_t *zp) 365168404Spjd{ 366168404Spjd znode_t *nzp; 367168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 368168404Spjd dmu_buf_t *db = zp->z_dbuf; 369168404Spjd 370168404Spjd mutex_enter(&zp->z_lock); 371168404Spjd 372168404Spjd nzp = dmu_buf_set_user(db, zp, &zp->z_phys, znode_pageout_func); 373168404Spjd 374168404Spjd /* 375168404Spjd * there should be no 376168404Spjd * concurrent zgets on this object. 377168404Spjd */ 378168404Spjd ASSERT3P(nzp, ==, NULL); 379168404Spjd 380168404Spjd /* 381168404Spjd * Slap on VROOT if we are the root znode 382168404Spjd */ 383168404Spjd if (zp->z_id == zfsvfs->z_root) { 384168404Spjd ZTOV(zp)->v_flag |= VROOT; 385168404Spjd } 386168404Spjd 387168404Spjd ASSERT(zp->z_dbuf_held == 0); 388168404Spjd zp->z_dbuf_held = 1; 389168404Spjd VFS_HOLD(zfsvfs->z_vfs); 390168404Spjd mutex_exit(&zp->z_lock); 391168404Spjd} 392168404Spjd 393168404Spjd/* 394168404Spjd * Create a new DMU object to hold a zfs znode. 395168404Spjd * 396168404Spjd * IN: dzp - parent directory for new znode 397168404Spjd * vap - file attributes for new znode 398168404Spjd * tx - dmu transaction id for zap operations 399168404Spjd * cr - credentials of caller 400168404Spjd * flag - flags: 401168404Spjd * IS_ROOT_NODE - new object will be root 402168404Spjd * IS_XATTR - new object is an attribute 403168404Spjd * IS_REPLAY - intent log replay 404168404Spjd * 405168404Spjd * OUT: oid - ID of created object 406168404Spjd * 407168404Spjd */ 408168404Spjdvoid 409168404Spjdzfs_mknode(znode_t *dzp, vattr_t *vap, uint64_t *oid, dmu_tx_t *tx, cred_t *cr, 410168404Spjd uint_t flag, znode_t **zpp, int bonuslen) 411168404Spjd{ 412168404Spjd dmu_buf_t *dbp; 413168404Spjd znode_phys_t *pzp; 414168404Spjd znode_t *zp; 415168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 416168404Spjd timestruc_t now; 417168404Spjd uint64_t gen; 418168404Spjd int err; 419168404Spjd 420168404Spjd ASSERT(vap && (vap->va_mask & (AT_TYPE|AT_MODE)) == (AT_TYPE|AT_MODE)); 421168404Spjd 422168404Spjd if (zfsvfs->z_assign >= TXG_INITIAL) { /* ZIL replay */ 423168404Spjd *oid = vap->va_nodeid; 424168404Spjd flag |= IS_REPLAY; 425168404Spjd now = vap->va_ctime; /* see zfs_replay_create() */ 426168404Spjd gen = vap->va_nblocks; /* ditto */ 427168404Spjd } else { 428168404Spjd *oid = 0; 429168404Spjd gethrestime(&now); 430168404Spjd gen = dmu_tx_get_txg(tx); 431168404Spjd } 432168404Spjd 433168404Spjd /* 434168404Spjd * Create a new DMU object. 435168404Spjd */ 436168404Spjd /* 437168404Spjd * There's currently no mechanism for pre-reading the blocks that will 438168404Spjd * be to needed allocate a new object, so we accept the small chance 439168404Spjd * that there will be an i/o error and we will fail one of the 440168404Spjd * assertions below. 441168404Spjd */ 442168404Spjd if (vap->va_type == VDIR) { 443168404Spjd if (flag & IS_REPLAY) { 444168404Spjd err = zap_create_claim(zfsvfs->z_os, *oid, 445168404Spjd DMU_OT_DIRECTORY_CONTENTS, 446168404Spjd DMU_OT_ZNODE, sizeof (znode_phys_t) + bonuslen, tx); 447168404Spjd ASSERT3U(err, ==, 0); 448168404Spjd } else { 449168404Spjd *oid = zap_create(zfsvfs->z_os, 450168404Spjd DMU_OT_DIRECTORY_CONTENTS, 451168404Spjd DMU_OT_ZNODE, sizeof (znode_phys_t) + bonuslen, tx); 452168404Spjd } 453168404Spjd } else { 454168404Spjd if (flag & IS_REPLAY) { 455168404Spjd err = dmu_object_claim(zfsvfs->z_os, *oid, 456168404Spjd DMU_OT_PLAIN_FILE_CONTENTS, 0, 457168404Spjd DMU_OT_ZNODE, sizeof (znode_phys_t) + bonuslen, tx); 458168404Spjd ASSERT3U(err, ==, 0); 459168404Spjd } else { 460168404Spjd *oid = dmu_object_alloc(zfsvfs->z_os, 461168404Spjd DMU_OT_PLAIN_FILE_CONTENTS, 0, 462168404Spjd DMU_OT_ZNODE, sizeof (znode_phys_t) + bonuslen, tx); 463168404Spjd } 464168404Spjd } 465168404Spjd VERIFY(0 == dmu_bonus_hold(zfsvfs->z_os, *oid, NULL, &dbp)); 466168404Spjd dmu_buf_will_dirty(dbp, tx); 467168404Spjd 468168404Spjd /* 469168404Spjd * Initialize the znode physical data to zero. 470168404Spjd */ 471168404Spjd ASSERT(dbp->db_size >= sizeof (znode_phys_t)); 472168404Spjd bzero(dbp->db_data, dbp->db_size); 473168404Spjd pzp = dbp->db_data; 474168404Spjd 475168404Spjd /* 476168404Spjd * If this is the root, fix up the half-initialized parent pointer 477168404Spjd * to reference the just-allocated physical data area. 478168404Spjd */ 479168404Spjd if (flag & IS_ROOT_NODE) { 480168404Spjd dzp->z_phys = pzp; 481168404Spjd dzp->z_id = *oid; 482168404Spjd } 483168404Spjd 484168404Spjd /* 485168404Spjd * If parent is an xattr, so am I. 486168404Spjd */ 487168404Spjd if (dzp->z_phys->zp_flags & ZFS_XATTR) 488168404Spjd flag |= IS_XATTR; 489168404Spjd 490168404Spjd if (vap->va_type == VBLK || vap->va_type == VCHR) { 491168404Spjd pzp->zp_rdev = zfs_expldev(vap->va_rdev); 492168404Spjd } 493168404Spjd 494168404Spjd if (vap->va_type == VDIR) { 495168404Spjd pzp->zp_size = 2; /* contents ("." and "..") */ 496168404Spjd pzp->zp_links = (flag & (IS_ROOT_NODE | IS_XATTR)) ? 2 : 1; 497168404Spjd } 498168404Spjd 499168404Spjd pzp->zp_parent = dzp->z_id; 500168404Spjd if (flag & IS_XATTR) 501168404Spjd pzp->zp_flags |= ZFS_XATTR; 502168404Spjd 503168404Spjd pzp->zp_gen = gen; 504168404Spjd 505168404Spjd ZFS_TIME_ENCODE(&now, pzp->zp_crtime); 506168404Spjd ZFS_TIME_ENCODE(&now, pzp->zp_ctime); 507168404Spjd 508168404Spjd if (vap->va_mask & AT_ATIME) { 509168404Spjd ZFS_TIME_ENCODE(&vap->va_atime, pzp->zp_atime); 510168404Spjd } else { 511168404Spjd ZFS_TIME_ENCODE(&now, pzp->zp_atime); 512168404Spjd } 513168404Spjd 514168404Spjd if (vap->va_mask & AT_MTIME) { 515168404Spjd ZFS_TIME_ENCODE(&vap->va_mtime, pzp->zp_mtime); 516168404Spjd } else { 517168404Spjd ZFS_TIME_ENCODE(&now, pzp->zp_mtime); 518168404Spjd } 519168404Spjd 520168404Spjd pzp->zp_mode = MAKEIMODE(vap->va_type, vap->va_mode); 521168404Spjd zp = zfs_znode_alloc(zfsvfs, dbp, *oid, 0); 522168404Spjd 523168404Spjd zfs_perm_init(zp, dzp, flag, vap, tx, cr); 524168404Spjd 525168404Spjd if (zpp) { 526168404Spjd kmutex_t *hash_mtx = ZFS_OBJ_MUTEX(zp); 527168404Spjd 528168404Spjd mutex_enter(hash_mtx); 529168404Spjd zfs_znode_dmu_init(zp); 530168404Spjd mutex_exit(hash_mtx); 531168404Spjd 532168404Spjd *zpp = zp; 533168404Spjd } else { 534168404Spjd if (ZTOV(zp) != NULL) 535168404Spjd ZTOV(zp)->v_count = 0; 536168404Spjd dmu_buf_rele(dbp, NULL); 537168404Spjd zfs_znode_free(zp); 538168404Spjd } 539168404Spjd} 540168404Spjd 541168404Spjdint 542168404Spjdzfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp) 543168404Spjd{ 544168404Spjd dmu_object_info_t doi; 545168404Spjd dmu_buf_t *db; 546168404Spjd znode_t *zp; 547168404Spjd vnode_t *vp; 548168404Spjd int err; 549168404Spjd 550168404Spjd *zpp = NULL; 551168404Spjd 552168404Spjd ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num); 553168404Spjd 554168404Spjd err = dmu_bonus_hold(zfsvfs->z_os, obj_num, NULL, &db); 555168404Spjd if (err) { 556168404Spjd ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 557168404Spjd return (err); 558168404Spjd } 559168404Spjd 560168404Spjd dmu_object_info_from_db(db, &doi); 561168404Spjd if (doi.doi_bonus_type != DMU_OT_ZNODE || 562168404Spjd doi.doi_bonus_size < sizeof (znode_phys_t)) { 563168404Spjd dmu_buf_rele(db, NULL); 564168404Spjd ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 565168404Spjd return (EINVAL); 566168404Spjd } 567168404Spjd 568168404Spjd ASSERT(db->db_object == obj_num); 569168404Spjd ASSERT(db->db_offset == -1); 570168404Spjd ASSERT(db->db_data != NULL); 571168404Spjd 572168404Spjd zp = dmu_buf_get_user(db); 573168404Spjd 574168404Spjd if (zp != NULL) { 575168404Spjd mutex_enter(&zp->z_lock); 576168404Spjd 577168404Spjd ASSERT3U(zp->z_id, ==, obj_num); 578168404Spjd if (zp->z_unlinked) { 579168404Spjd dmu_buf_rele(db, NULL); 580168404Spjd mutex_exit(&zp->z_lock); 581168404Spjd ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 582168404Spjd return (ENOENT); 583168404Spjd } else if (zp->z_dbuf_held) { 584168404Spjd dmu_buf_rele(db, NULL); 585168404Spjd } else { 586168404Spjd zp->z_dbuf_held = 1; 587168404Spjd VFS_HOLD(zfsvfs->z_vfs); 588168404Spjd } 589168404Spjd 590168404Spjd if (ZTOV(zp) != NULL) 591168404Spjd VN_HOLD(ZTOV(zp)); 592168404Spjd else { 593168404Spjd err = getnewvnode("zfs", zfsvfs->z_vfs, &zfs_vnodeops, 594168404Spjd &zp->z_vnode); 595168404Spjd ASSERT(err == 0); 596168404Spjd vp = ZTOV(zp); 597168404Spjd vp->v_data = (caddr_t)zp; 598168404Spjd vhold(vp); 599168404Spjd vp->v_type = IFTOVT((mode_t)zp->z_phys->zp_mode); 600168404Spjd if (vp->v_type == VDIR) 601168404Spjd zp->z_zn_prefetch = B_TRUE; /* z_prefetch default is enabled */ 602168404Spjd err = insmntque(vp, zfsvfs->z_vfs); 603168404Spjd KASSERT(err == 0, ("insmntque() failed: error %d", err)); 604168404Spjd } 605168404Spjd mutex_exit(&zp->z_lock); 606168404Spjd ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 607168404Spjd *zpp = zp; 608168404Spjd return (0); 609168404Spjd } 610168404Spjd 611168404Spjd /* 612168404Spjd * Not found create new znode/vnode 613168404Spjd */ 614168404Spjd zp = zfs_znode_alloc(zfsvfs, db, obj_num, doi.doi_data_block_size); 615168404Spjd ASSERT3U(zp->z_id, ==, obj_num); 616168404Spjd zfs_znode_dmu_init(zp); 617168404Spjd ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 618168404Spjd *zpp = zp; 619168404Spjd return (0); 620168404Spjd} 621168404Spjd 622168404Spjdvoid 623168404Spjdzfs_znode_delete(znode_t *zp, dmu_tx_t *tx) 624168404Spjd{ 625168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 626168404Spjd int error; 627168404Spjd 628168404Spjd ZFS_OBJ_HOLD_ENTER(zfsvfs, zp->z_id); 629168404Spjd if (zp->z_phys->zp_acl.z_acl_extern_obj) { 630168404Spjd error = dmu_object_free(zfsvfs->z_os, 631168404Spjd zp->z_phys->zp_acl.z_acl_extern_obj, tx); 632168404Spjd ASSERT3U(error, ==, 0); 633168404Spjd } 634168404Spjd error = dmu_object_free(zfsvfs->z_os, zp->z_id, tx); 635168404Spjd ASSERT3U(error, ==, 0); 636168404Spjd zp->z_dbuf_held = 0; 637168404Spjd ZFS_OBJ_HOLD_EXIT(zfsvfs, zp->z_id); 638168404Spjd dmu_buf_rele(zp->z_dbuf, NULL); 639168404Spjd} 640168404Spjd 641168404Spjdvoid 642168404Spjdzfs_zinactive(znode_t *zp) 643168404Spjd{ 644168404Spjd vnode_t *vp = ZTOV(zp); 645168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 646168404Spjd uint64_t z_id = zp->z_id; 647168404Spjd 648168404Spjd ASSERT(zp->z_dbuf_held && zp->z_phys); 649168404Spjd 650168404Spjd /* 651168404Spjd * Don't allow a zfs_zget() while were trying to release this znode 652168404Spjd */ 653168404Spjd ZFS_OBJ_HOLD_ENTER(zfsvfs, z_id); 654168404Spjd 655168404Spjd mutex_enter(&zp->z_lock); 656168404Spjd VI_LOCK(vp); 657168404Spjd if (vp->v_count > 0) { 658168404Spjd /* 659168404Spjd * If the hold count is greater than zero, somebody has 660168404Spjd * obtained a new reference on this znode while we were 661168404Spjd * processing it here, so we are done. 662168404Spjd */ 663168404Spjd VI_UNLOCK(vp); 664168404Spjd mutex_exit(&zp->z_lock); 665168404Spjd ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id); 666168404Spjd return; 667168404Spjd } 668168404Spjd VI_UNLOCK(vp); 669168404Spjd 670168404Spjd /* 671168404Spjd * If this was the last reference to a file with no links, 672168404Spjd * remove the file from the file system. 673168404Spjd */ 674168404Spjd if (zp->z_unlinked) { 675168404Spjd ZTOV(zp) = NULL; 676168404Spjd mutex_exit(&zp->z_lock); 677168404Spjd ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id); 678168404Spjd ASSERT(vp->v_count == 0); 679168404Spjd vrecycle(vp, curthread); 680168404Spjd zfs_rmnode(zp); 681168404Spjd VFS_RELE(zfsvfs->z_vfs); 682168404Spjd return; 683168404Spjd } 684168404Spjd ASSERT(zp->z_phys); 685168404Spjd ASSERT(zp->z_dbuf_held); 686168404Spjd 687168404Spjd zp->z_dbuf_held = 0; 688168404Spjd mutex_exit(&zp->z_lock); 689168404Spjd dmu_buf_rele(zp->z_dbuf, NULL); 690168404Spjd ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id); 691168404Spjd VFS_RELE(zfsvfs->z_vfs); 692168404Spjd} 693168404Spjd 694168404Spjd/* 695168404Spjd * FreeBSD: Should be called from ->vop_reclaim(). 696168404Spjd */ 697168404Spjdvoid 698168404Spjdzfs_znode_free(znode_t *zp) 699168404Spjd{ 700168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 701168404Spjd 702168404Spjd mutex_enter(&zfsvfs->z_znodes_lock); 703168404Spjd list_remove(&zfsvfs->z_all_znodes, zp); 704168404Spjd mutex_exit(&zfsvfs->z_znodes_lock); 705168404Spjd 706168404Spjd kmem_cache_free(znode_cache, zp); 707168404Spjd} 708168404Spjd 709168404Spjdvoid 710168404Spjdzfs_time_stamper_locked(znode_t *zp, uint_t flag, dmu_tx_t *tx) 711168404Spjd{ 712168404Spjd timestruc_t now; 713168404Spjd 714168404Spjd ASSERT(MUTEX_HELD(&zp->z_lock)); 715168404Spjd 716168404Spjd gethrestime(&now); 717168404Spjd 718168404Spjd if (tx) { 719168404Spjd dmu_buf_will_dirty(zp->z_dbuf, tx); 720168404Spjd zp->z_atime_dirty = 0; 721168404Spjd zp->z_seq++; 722168404Spjd } else { 723168404Spjd zp->z_atime_dirty = 1; 724168404Spjd } 725168404Spjd 726168404Spjd if (flag & AT_ATIME) 727168404Spjd ZFS_TIME_ENCODE(&now, zp->z_phys->zp_atime); 728168404Spjd 729168404Spjd if (flag & AT_MTIME) 730168404Spjd ZFS_TIME_ENCODE(&now, zp->z_phys->zp_mtime); 731168404Spjd 732168404Spjd if (flag & AT_CTIME) 733168404Spjd ZFS_TIME_ENCODE(&now, zp->z_phys->zp_ctime); 734168404Spjd} 735168404Spjd 736168404Spjd/* 737168404Spjd * Update the requested znode timestamps with the current time. 738168404Spjd * If we are in a transaction, then go ahead and mark the znode 739168404Spjd * dirty in the transaction so the timestamps will go to disk. 740168404Spjd * Otherwise, we will get pushed next time the znode is updated 741168404Spjd * in a transaction, or when this znode eventually goes inactive. 742168404Spjd * 743168404Spjd * Why is this OK? 744168404Spjd * 1 - Only the ACCESS time is ever updated outside of a transaction. 745168404Spjd * 2 - Multiple consecutive updates will be collapsed into a single 746168404Spjd * znode update by the transaction grouping semantics of the DMU. 747168404Spjd */ 748168404Spjdvoid 749168404Spjdzfs_time_stamper(znode_t *zp, uint_t flag, dmu_tx_t *tx) 750168404Spjd{ 751168404Spjd mutex_enter(&zp->z_lock); 752168404Spjd zfs_time_stamper_locked(zp, flag, tx); 753168404Spjd mutex_exit(&zp->z_lock); 754168404Spjd} 755168404Spjd 756168404Spjd/* 757168404Spjd * Grow the block size for a file. 758168404Spjd * 759168404Spjd * IN: zp - znode of file to free data in. 760168404Spjd * size - requested block size 761168404Spjd * tx - open transaction. 762168404Spjd * 763168404Spjd * NOTE: this function assumes that the znode is write locked. 764168404Spjd */ 765168404Spjdvoid 766168404Spjdzfs_grow_blocksize(znode_t *zp, uint64_t size, dmu_tx_t *tx) 767168404Spjd{ 768168404Spjd int error; 769168404Spjd u_longlong_t dummy; 770168404Spjd 771168404Spjd if (size <= zp->z_blksz) 772168404Spjd return; 773168404Spjd /* 774168404Spjd * If the file size is already greater than the current blocksize, 775168404Spjd * we will not grow. If there is more than one block in a file, 776168404Spjd * the blocksize cannot change. 777168404Spjd */ 778168404Spjd if (zp->z_blksz && zp->z_phys->zp_size > zp->z_blksz) 779168404Spjd return; 780168404Spjd 781168404Spjd error = dmu_object_set_blocksize(zp->z_zfsvfs->z_os, zp->z_id, 782168404Spjd size, 0, tx); 783168404Spjd if (error == ENOTSUP) 784168404Spjd return; 785168404Spjd ASSERT3U(error, ==, 0); 786168404Spjd 787168404Spjd /* What blocksize did we actually get? */ 788168404Spjd dmu_object_size_from_db(zp->z_dbuf, &zp->z_blksz, &dummy); 789168404Spjd} 790168404Spjd 791168404Spjd/* 792168404Spjd * Free space in a file. 793168404Spjd * 794168404Spjd * IN: zp - znode of file to free data in. 795168404Spjd * off - start of section to free. 796168404Spjd * len - length of section to free (0 => to EOF). 797168404Spjd * flag - current file open mode flags. 798168404Spjd * 799168404Spjd * RETURN: 0 if success 800168404Spjd * error code if failure 801168404Spjd */ 802168404Spjdint 803168404Spjdzfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log) 804168404Spjd{ 805168404Spjd vnode_t *vp = ZTOV(zp); 806168404Spjd dmu_tx_t *tx; 807168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 808168404Spjd zilog_t *zilog = zfsvfs->z_log; 809168404Spjd rl_t *rl; 810168404Spjd uint64_t end = off + len; 811168404Spjd uint64_t size, new_blksz; 812168404Spjd int error; 813168404Spjd 814168404Spjd if (ZTOV(zp)->v_type == VFIFO) 815168404Spjd return (0); 816168404Spjd 817168404Spjd /* 818168404Spjd * If we will change zp_size then lock the whole file, 819168404Spjd * otherwise just lock the range being freed. 820168404Spjd */ 821168404Spjd if (len == 0 || off + len > zp->z_phys->zp_size) { 822168404Spjd rl = zfs_range_lock(zp, 0, UINT64_MAX, RL_WRITER); 823168404Spjd } else { 824168404Spjd rl = zfs_range_lock(zp, off, len, RL_WRITER); 825168404Spjd /* recheck, in case zp_size changed */ 826168404Spjd if (off + len > zp->z_phys->zp_size) { 827168404Spjd /* lost race: file size changed, lock whole file */ 828168404Spjd zfs_range_unlock(rl); 829168404Spjd rl = zfs_range_lock(zp, 0, UINT64_MAX, RL_WRITER); 830168404Spjd } 831168404Spjd } 832168404Spjd 833168404Spjd /* 834168404Spjd * Nothing to do if file already at desired length. 835168404Spjd */ 836168404Spjd size = zp->z_phys->zp_size; 837168404Spjd if (len == 0 && size == off) { 838168404Spjd zfs_range_unlock(rl); 839168404Spjd return (0); 840168404Spjd } 841168404Spjd 842168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 843168404Spjd dmu_tx_hold_bonus(tx, zp->z_id); 844168404Spjd new_blksz = 0; 845168404Spjd if (end > size && 846168404Spjd (!ISP2(zp->z_blksz) || zp->z_blksz < zfsvfs->z_max_blksz)) { 847168404Spjd /* 848168404Spjd * We are growing the file past the current block size. 849168404Spjd */ 850168404Spjd if (zp->z_blksz > zp->z_zfsvfs->z_max_blksz) { 851168404Spjd ASSERT(!ISP2(zp->z_blksz)); 852168404Spjd new_blksz = MIN(end, SPA_MAXBLOCKSIZE); 853168404Spjd } else { 854168404Spjd new_blksz = MIN(end, zp->z_zfsvfs->z_max_blksz); 855168404Spjd } 856168404Spjd dmu_tx_hold_write(tx, zp->z_id, 0, MIN(end, new_blksz)); 857168404Spjd } else if (off < size) { 858168404Spjd /* 859168404Spjd * If len == 0, we are truncating the file. 860168404Spjd */ 861168404Spjd dmu_tx_hold_free(tx, zp->z_id, off, len ? len : DMU_OBJECT_END); 862168404Spjd } 863168404Spjd 864168404Spjd error = dmu_tx_assign(tx, zfsvfs->z_assign); 865168404Spjd if (error) { 866168404Spjd if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) 867168404Spjd dmu_tx_wait(tx); 868168404Spjd dmu_tx_abort(tx); 869168404Spjd zfs_range_unlock(rl); 870168404Spjd return (error); 871168404Spjd } 872168404Spjd 873168404Spjd if (new_blksz) 874168404Spjd zfs_grow_blocksize(zp, new_blksz, tx); 875168404Spjd 876168404Spjd if (end > size || len == 0) 877168404Spjd zp->z_phys->zp_size = end; 878168404Spjd 879168404Spjd if (off < size) { 880168404Spjd objset_t *os = zfsvfs->z_os; 881168404Spjd uint64_t rlen = len; 882168404Spjd 883168404Spjd if (len == 0) 884168404Spjd rlen = -1; 885168404Spjd else if (end > size) 886168404Spjd rlen = size - off; 887168404Spjd VERIFY(0 == dmu_free_range(os, zp->z_id, off, rlen, tx)); 888168404Spjd } 889168404Spjd 890168404Spjd if (log) { 891168404Spjd zfs_time_stamper(zp, CONTENT_MODIFIED, tx); 892168404Spjd zfs_log_truncate(zilog, tx, TX_TRUNCATE, zp, off, len); 893168404Spjd } 894168404Spjd 895168404Spjd zfs_range_unlock(rl); 896168404Spjd 897168404Spjd dmu_tx_commit(tx); 898168404Spjd 899168404Spjd /* 900168404Spjd * Clear any mapped pages in the truncated region. This has to 901168404Spjd * happen outside of the transaction to avoid the possibility of 902168404Spjd * a deadlock with someone trying to push a page that we are 903168404Spjd * about to invalidate. 904168404Spjd */ 905168404Spjd rw_enter(&zp->z_map_lock, RW_WRITER); 906168404Spjd if (end > size) 907168404Spjd vnode_pager_setsize(vp, end); 908168404Spjd else if (len == 0) { 909168404Spjd#if 0 910168404Spjd error = vtruncbuf(vp, curthread->td_ucred, curthread, end, PAGE_SIZE); 911168404Spjd#else 912168404Spjd error = vinvalbuf(vp, V_SAVE, curthread, 0, 0); 913168404Spjd vnode_pager_setsize(vp, end); 914168404Spjd#endif 915168404Spjd } 916168404Spjd rw_exit(&zp->z_map_lock); 917168404Spjd 918168404Spjd return (0); 919168404Spjd} 920168404Spjd 921168404Spjdvoid 922168404Spjdzfs_create_fs(objset_t *os, cred_t *cr, dmu_tx_t *tx) 923168404Spjd{ 924168404Spjd zfsvfs_t zfsvfs; 925168404Spjd uint64_t moid, doid, roid = 0; 926168404Spjd uint64_t version = ZPL_VERSION; 927168404Spjd int error; 928168404Spjd znode_t *rootzp = NULL; 929168404Spjd vattr_t vattr; 930168404Spjd 931168404Spjd /* 932168404Spjd * First attempt to create master node. 933168404Spjd */ 934168404Spjd /* 935168404Spjd * In an empty objset, there are no blocks to read and thus 936168404Spjd * there can be no i/o errors (which we assert below). 937168404Spjd */ 938168404Spjd moid = MASTER_NODE_OBJ; 939168404Spjd error = zap_create_claim(os, moid, DMU_OT_MASTER_NODE, 940168404Spjd DMU_OT_NONE, 0, tx); 941168404Spjd ASSERT(error == 0); 942168404Spjd 943168404Spjd /* 944168404Spjd * Set starting attributes. 945168404Spjd */ 946168404Spjd 947168404Spjd error = zap_update(os, moid, ZPL_VERSION_OBJ, 8, 1, &version, tx); 948168404Spjd ASSERT(error == 0); 949168404Spjd 950168404Spjd /* 951168404Spjd * Create a delete queue. 952168404Spjd */ 953168404Spjd doid = zap_create(os, DMU_OT_UNLINKED_SET, DMU_OT_NONE, 0, tx); 954168404Spjd 955168404Spjd error = zap_add(os, moid, ZFS_UNLINKED_SET, 8, 1, &doid, tx); 956168404Spjd ASSERT(error == 0); 957168404Spjd 958168404Spjd /* 959168404Spjd * Create root znode. Create minimal znode/vnode/zfsvfs 960168404Spjd * to allow zfs_mknode to work. 961168404Spjd */ 962168404Spjd vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE; 963168404Spjd vattr.va_type = VDIR; 964168404Spjd vattr.va_mode = S_IFDIR|0755; 965168404Spjd vattr.va_uid = UID_ROOT; 966168404Spjd vattr.va_gid = GID_WHEEL; 967168404Spjd 968168404Spjd rootzp = kmem_cache_alloc(znode_cache, KM_SLEEP); 969168404Spjd zfs_znode_cache_constructor(rootzp, NULL, 0); 970168404Spjd rootzp->z_zfsvfs = &zfsvfs; 971168404Spjd rootzp->z_unlinked = 0; 972168404Spjd rootzp->z_atime_dirty = 0; 973168404Spjd rootzp->z_dbuf_held = 0; 974168404Spjd 975168404Spjd bzero(&zfsvfs, sizeof (zfsvfs_t)); 976168404Spjd 977168404Spjd zfsvfs.z_os = os; 978168404Spjd zfsvfs.z_assign = TXG_NOWAIT; 979168404Spjd zfsvfs.z_parent = &zfsvfs; 980168404Spjd 981168404Spjd mutex_init(&zfsvfs.z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); 982168404Spjd list_create(&zfsvfs.z_all_znodes, sizeof (znode_t), 983168404Spjd offsetof(znode_t, z_link_node)); 984168404Spjd 985168404Spjd zfs_mknode(rootzp, &vattr, &roid, tx, cr, IS_ROOT_NODE, NULL, 0); 986168404Spjd ASSERT3U(rootzp->z_id, ==, roid); 987168404Spjd error = zap_add(os, moid, ZFS_ROOT_OBJ, 8, 1, &roid, tx); 988168404Spjd ASSERT(error == 0); 989168404Spjd 990168404Spjd kmem_cache_free(znode_cache, rootzp); 991168404Spjd} 992168404Spjd#endif /* _KERNEL */ 993168404Spjd 994168404Spjd/* 995168404Spjd * Given an object number, return its parent object number and whether 996168404Spjd * or not the object is an extended attribute directory. 997168404Spjd */ 998168404Spjdstatic int 999168404Spjdzfs_obj_to_pobj(objset_t *osp, uint64_t obj, uint64_t *pobjp, int *is_xattrdir) 1000168404Spjd{ 1001168404Spjd dmu_buf_t *db; 1002168404Spjd dmu_object_info_t doi; 1003168404Spjd znode_phys_t *zp; 1004168404Spjd int error; 1005168404Spjd 1006168404Spjd if ((error = dmu_bonus_hold(osp, obj, FTAG, &db)) != 0) 1007168404Spjd return (error); 1008168404Spjd 1009168404Spjd dmu_object_info_from_db(db, &doi); 1010168404Spjd if (doi.doi_bonus_type != DMU_OT_ZNODE || 1011168404Spjd doi.doi_bonus_size < sizeof (znode_phys_t)) { 1012168404Spjd dmu_buf_rele(db, FTAG); 1013168404Spjd return (EINVAL); 1014168404Spjd } 1015168404Spjd 1016168404Spjd zp = db->db_data; 1017168404Spjd *pobjp = zp->zp_parent; 1018168404Spjd *is_xattrdir = ((zp->zp_flags & ZFS_XATTR) != 0) && 1019168404Spjd S_ISDIR(zp->zp_mode); 1020168404Spjd dmu_buf_rele(db, FTAG); 1021168404Spjd 1022168404Spjd return (0); 1023168404Spjd} 1024168404Spjd 1025168404Spjdint 1026168404Spjdzfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len) 1027168404Spjd{ 1028168404Spjd char *path = buf + len - 1; 1029168404Spjd int error; 1030168404Spjd 1031168404Spjd *path = '\0'; 1032168404Spjd 1033168404Spjd for (;;) { 1034168404Spjd uint64_t pobj; 1035168404Spjd char component[MAXNAMELEN + 2]; 1036168404Spjd size_t complen; 1037168404Spjd int is_xattrdir; 1038168404Spjd 1039168404Spjd if ((error = zfs_obj_to_pobj(osp, obj, &pobj, 1040168404Spjd &is_xattrdir)) != 0) 1041168404Spjd break; 1042168404Spjd 1043168404Spjd if (pobj == obj) { 1044168404Spjd if (path[0] != '/') 1045168404Spjd *--path = '/'; 1046168404Spjd break; 1047168404Spjd } 1048168404Spjd 1049168404Spjd component[0] = '/'; 1050168404Spjd if (is_xattrdir) { 1051168404Spjd (void) sprintf(component + 1, "<xattrdir>"); 1052168404Spjd } else { 1053168404Spjd error = zap_value_search(osp, pobj, obj, component + 1); 1054168404Spjd if (error != 0) 1055168404Spjd break; 1056168404Spjd } 1057168404Spjd 1058168404Spjd complen = strlen(component); 1059168404Spjd path -= complen; 1060168404Spjd ASSERT(path >= buf); 1061168404Spjd bcopy(component, path, complen); 1062168404Spjd obj = pobj; 1063168404Spjd } 1064168404Spjd 1065168404Spjd if (error == 0) 1066168404Spjd (void) memmove(buf, path, buf + len - path); 1067168404Spjd return (error); 1068168404Spjd} 1069