1168404Spjd/* 2168404Spjd * CDDL HEADER START 3168404Spjd * 4168404Spjd * The contents of this file are subject to the terms of the 5168404Spjd * Common Development and Distribution License (the "License"). 6168404Spjd * You may not use this file except in compliance with the License. 7168404Spjd * 8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9168404Spjd * or http://www.opensolaris.org/os/licensing. 10168404Spjd * See the License for the specific language governing permissions 11168404Spjd * and limitations under the License. 12168404Spjd * 13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each 14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15168404Spjd * If applicable, add the following below this CDDL HEADER, with the 16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18168404Spjd * 19168404Spjd * CDDL HEADER END 20168404Spjd */ 21168404Spjd/* 22219089Spjd * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23249643Smm * Copyright (c) 2013 by Delphix. All rights reserved. 24168404Spjd */ 25168404Spjd 26168404Spjd#include <sys/types.h> 27168404Spjd#include <sys/param.h> 28168404Spjd#include <sys/time.h> 29168404Spjd#include <sys/systm.h> 30168404Spjd#include <sys/sysmacros.h> 31168404Spjd#include <sys/resource.h> 32168404Spjd#include <sys/vfs.h> 33168404Spjd#include <sys/vnode.h> 34168404Spjd#include <sys/file.h> 35168404Spjd#include <sys/kmem.h> 36168404Spjd#include <sys/uio.h> 37168404Spjd#include <sys/cmn_err.h> 38168404Spjd#include <sys/errno.h> 39168404Spjd#include <sys/stat.h> 40168404Spjd#include <sys/unistd.h> 41185029Spjd#include <sys/sunddi.h> 42168404Spjd#include <sys/random.h> 43169023Spjd#include <sys/policy.h> 44168404Spjd#include <sys/kcondvar.h> 45168404Spjd#include <sys/callb.h> 46168404Spjd#include <sys/smp.h> 47168404Spjd#include <sys/zfs_dir.h> 48168404Spjd#include <sys/zfs_acl.h> 49168404Spjd#include <sys/fs/zfs.h> 50168404Spjd#include <sys/zap.h> 51168404Spjd#include <sys/dmu.h> 52168404Spjd#include <sys/atomic.h> 53168404Spjd#include <sys/zfs_ctldir.h> 54185029Spjd#include <sys/zfs_fuid.h> 55219089Spjd#include <sys/sa.h> 56219089Spjd#include <sys/zfs_sa.h> 57168404Spjd#include <sys/dnlc.h> 58185029Spjd#include <sys/extdirent.h> 59168404Spjd 60168404Spjd/* 61185029Spjd * zfs_match_find() is used by zfs_dirent_lock() to peform zap lookups 62185029Spjd * of names after deciding which is the appropriate lookup interface. 63185029Spjd */ 64185029Spjdstatic int 65185029Spjdzfs_match_find(zfsvfs_t *zfsvfs, znode_t *dzp, char *name, boolean_t exact, 66185029Spjd boolean_t update, int *deflags, pathname_t *rpnp, uint64_t *zoid) 67185029Spjd{ 68185029Spjd int error; 69185029Spjd 70185029Spjd if (zfsvfs->z_norm) { 71185029Spjd matchtype_t mt = MT_FIRST; 72185029Spjd boolean_t conflict = B_FALSE; 73185029Spjd size_t bufsz = 0; 74185029Spjd char *buf = NULL; 75185029Spjd 76185029Spjd if (rpnp) { 77185029Spjd buf = rpnp->pn_buf; 78185029Spjd bufsz = rpnp->pn_bufsize; 79185029Spjd } 80185029Spjd if (exact) 81185029Spjd mt = MT_EXACT; 82185029Spjd /* 83185029Spjd * In the non-mixed case we only expect there would ever 84185029Spjd * be one match, but we need to use the normalizing lookup. 85185029Spjd */ 86185029Spjd error = zap_lookup_norm(zfsvfs->z_os, dzp->z_id, name, 8, 1, 87185029Spjd zoid, mt, buf, bufsz, &conflict); 88185029Spjd if (!error && deflags) 89185029Spjd *deflags = conflict ? ED_CASE_CONFLICT : 0; 90185029Spjd } else { 91185029Spjd error = zap_lookup(zfsvfs->z_os, dzp->z_id, name, 8, 1, zoid); 92185029Spjd } 93185029Spjd *zoid = ZFS_DIRENT_OBJ(*zoid); 94185029Spjd 95185029Spjd if (error == ENOENT && update) 96185029Spjd dnlc_update(ZTOV(dzp), name, DNLC_NO_VNODE); 97185029Spjd 98185029Spjd return (error); 99185029Spjd} 100185029Spjd 101185029Spjd/* 102168404Spjd * Lock a directory entry. A dirlock on <dzp, name> protects that name 103168404Spjd * in dzp's directory zap object. As long as you hold a dirlock, you can 104168404Spjd * assume two things: (1) dzp cannot be reaped, and (2) no other thread 105168404Spjd * can change the zap entry for (i.e. link or unlink) this name. 106168404Spjd * 107168404Spjd * Input arguments: 108168404Spjd * dzp - znode for directory 109168404Spjd * name - name of entry to lock 110168404Spjd * flag - ZNEW: if the entry already exists, fail with EEXIST. 111168404Spjd * ZEXISTS: if the entry does not exist, fail with ENOENT. 112168404Spjd * ZSHARED: allow concurrent access with other ZSHARED callers. 113168404Spjd * ZXATTR: we want dzp's xattr directory 114185029Spjd * ZCILOOK: On a mixed sensitivity file system, 115185029Spjd * this lookup should be case-insensitive. 116185029Spjd * ZCIEXACT: On a purely case-insensitive file system, 117185029Spjd * this lookup should be case-sensitive. 118185029Spjd * ZRENAMING: we are locking for renaming, force narrow locks 119208131Smm * ZHAVELOCK: Don't grab the z_name_lock for this call. The 120208131Smm * current thread already holds it. 121168404Spjd * 122168404Spjd * Output arguments: 123168404Spjd * zpp - pointer to the znode for the entry (NULL if there isn't one) 124168404Spjd * dlpp - pointer to the dirlock for this entry (NULL on error) 125185029Spjd * direntflags - (case-insensitive lookup only) 126185029Spjd * flags if multiple case-sensitive matches exist in directory 127185029Spjd * realpnp - (case-insensitive lookup only) 128185029Spjd * actual name matched within the directory 129168404Spjd * 130168404Spjd * Return value: 0 on success or errno on failure. 131168404Spjd * 132168404Spjd * NOTE: Always checks for, and rejects, '.' and '..'. 133185029Spjd * NOTE: For case-insensitive file systems we take wide locks (see below), 134185029Spjd * but return znode pointers to a single match. 135168404Spjd */ 136168404Spjdint 137168404Spjdzfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp, 138185029Spjd int flag, int *direntflags, pathname_t *realpnp) 139168404Spjd{ 140168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 141168404Spjd zfs_dirlock_t *dl; 142185029Spjd boolean_t update; 143185029Spjd boolean_t exact; 144168404Spjd uint64_t zoid; 145185029Spjd vnode_t *vp = NULL; 146185029Spjd int error = 0; 147185029Spjd int cmpflags; 148168404Spjd 149168404Spjd *zpp = NULL; 150168404Spjd *dlpp = NULL; 151168404Spjd 152168404Spjd /* 153168404Spjd * Verify that we are not trying to lock '.', '..', or '.zfs' 154168404Spjd */ 155168404Spjd if (name[0] == '.' && 156168404Spjd (name[1] == '\0' || (name[1] == '.' && name[2] == '\0')) || 157168404Spjd zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0) 158249643Smm return (SET_ERROR(EEXIST)); 159168404Spjd 160168404Spjd /* 161185029Spjd * Case sensitivity and normalization preferences are set when 162185029Spjd * the file system is created. These are stored in the 163185029Spjd * zfsvfs->z_case and zfsvfs->z_norm fields. These choices 164185029Spjd * affect what vnodes can be cached in the DNLC, how we 165185029Spjd * perform zap lookups, and the "width" of our dirlocks. 166185029Spjd * 167185029Spjd * A normal dirlock locks a single name. Note that with 168185029Spjd * normalization a name can be composed multiple ways, but 169185029Spjd * when normalized, these names all compare equal. A wide 170185029Spjd * dirlock locks multiple names. We need these when the file 171185029Spjd * system is supporting mixed-mode access. It is sometimes 172185029Spjd * necessary to lock all case permutations of file name at 173185029Spjd * once so that simultaneous case-insensitive/case-sensitive 174185029Spjd * behaves as rationally as possible. 175185029Spjd */ 176185029Spjd 177185029Spjd /* 178185029Spjd * Decide if exact matches should be requested when performing 179185029Spjd * a zap lookup on file systems supporting case-insensitive 180185029Spjd * access. 181185029Spjd */ 182185029Spjd exact = 183185029Spjd ((zfsvfs->z_case == ZFS_CASE_INSENSITIVE) && (flag & ZCIEXACT)) || 184185029Spjd ((zfsvfs->z_case == ZFS_CASE_MIXED) && !(flag & ZCILOOK)); 185185029Spjd 186185029Spjd /* 187185029Spjd * Only look in or update the DNLC if we are looking for the 188185029Spjd * name on a file system that does not require normalization 189185029Spjd * or case folding. We can also look there if we happen to be 190185029Spjd * on a non-normalizing, mixed sensitivity file system IF we 191185029Spjd * are looking for the exact name. 192185029Spjd * 193185029Spjd * Maybe can add TO-UPPERed version of name to dnlc in ci-only 194185029Spjd * case for performance improvement? 195185029Spjd */ 196185029Spjd update = !zfsvfs->z_norm || 197185029Spjd ((zfsvfs->z_case == ZFS_CASE_MIXED) && 198185029Spjd !(zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER) && !(flag & ZCILOOK)); 199185029Spjd 200185029Spjd /* 201185029Spjd * ZRENAMING indicates we are in a situation where we should 202185029Spjd * take narrow locks regardless of the file system's 203185029Spjd * preferences for normalizing and case folding. This will 204185029Spjd * prevent us deadlocking trying to grab the same wide lock 205185029Spjd * twice if the two names happen to be case-insensitive 206185029Spjd * matches. 207185029Spjd */ 208185029Spjd if (flag & ZRENAMING) 209185029Spjd cmpflags = 0; 210185029Spjd else 211185029Spjd cmpflags = zfsvfs->z_norm; 212185029Spjd 213185029Spjd /* 214168404Spjd * Wait until there are no locks on this name. 215208131Smm * 216208131Smm * Don't grab the the lock if it is already held. However, cannot 217208131Smm * have both ZSHARED and ZHAVELOCK together. 218168404Spjd */ 219208131Smm ASSERT(!(flag & ZSHARED) || !(flag & ZHAVELOCK)); 220208131Smm if (!(flag & ZHAVELOCK)) 221208131Smm rw_enter(&dzp->z_name_lock, RW_READER); 222208131Smm 223168404Spjd mutex_enter(&dzp->z_lock); 224168404Spjd for (;;) { 225168404Spjd if (dzp->z_unlinked) { 226168404Spjd mutex_exit(&dzp->z_lock); 227208131Smm if (!(flag & ZHAVELOCK)) 228208131Smm rw_exit(&dzp->z_name_lock); 229249643Smm return (SET_ERROR(ENOENT)); 230168404Spjd } 231185029Spjd for (dl = dzp->z_dirlocks; dl != NULL; dl = dl->dl_next) { 232185029Spjd if ((u8_strcmp(name, dl->dl_name, 0, cmpflags, 233185029Spjd U8_UNICODE_LATEST, &error) == 0) || error != 0) 234168404Spjd break; 235185029Spjd } 236185029Spjd if (error != 0) { 237185029Spjd mutex_exit(&dzp->z_lock); 238208131Smm if (!(flag & ZHAVELOCK)) 239208131Smm rw_exit(&dzp->z_name_lock); 240249643Smm return (SET_ERROR(ENOENT)); 241185029Spjd } 242168404Spjd if (dl == NULL) { 243222268Spjd size_t namesize; 244222268Spjd 245168404Spjd /* 246168404Spjd * Allocate a new dirlock and add it to the list. 247168404Spjd */ 248222268Spjd namesize = strlen(name) + 1; 249222268Spjd dl = kmem_alloc(sizeof (zfs_dirlock_t) + namesize, 250222268Spjd KM_SLEEP); 251168404Spjd cv_init(&dl->dl_cv, NULL, CV_DEFAULT, NULL); 252222268Spjd dl->dl_name = (char *)(dl + 1); 253222268Spjd bcopy(name, dl->dl_name, namesize); 254168404Spjd dl->dl_sharecnt = 0; 255208131Smm dl->dl_namelock = 0; 256222268Spjd dl->dl_namesize = namesize; 257168404Spjd dl->dl_dzp = dzp; 258168404Spjd dl->dl_next = dzp->z_dirlocks; 259168404Spjd dzp->z_dirlocks = dl; 260168404Spjd break; 261168404Spjd } 262168404Spjd if ((flag & ZSHARED) && dl->dl_sharecnt != 0) 263168404Spjd break; 264168404Spjd cv_wait(&dl->dl_cv, &dzp->z_lock); 265168404Spjd } 266168404Spjd 267208131Smm /* 268208131Smm * If the z_name_lock was NOT held for this dirlock record it. 269208131Smm */ 270208131Smm if (flag & ZHAVELOCK) 271208131Smm dl->dl_namelock = 1; 272208131Smm 273222268Spjd if (flag & ZSHARED) 274222268Spjd dl->dl_sharecnt++; 275168404Spjd 276168404Spjd mutex_exit(&dzp->z_lock); 277168404Spjd 278168404Spjd /* 279168404Spjd * We have a dirlock on the name. (Note that it is the dirlock, 280168404Spjd * not the dzp's z_lock, that protects the name in the zap object.) 281168404Spjd * See if there's an object by this name; if so, put a hold on it. 282168404Spjd */ 283168404Spjd if (flag & ZXATTR) { 284219089Spjd error = sa_lookup(dzp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &zoid, 285219089Spjd sizeof (zoid)); 286219089Spjd if (error == 0) 287219089Spjd error = (zoid == 0 ? ENOENT : 0); 288168404Spjd } else { 289185029Spjd if (update) 290185029Spjd vp = dnlc_lookup(ZTOV(dzp), name); 291168404Spjd if (vp == DNLC_NO_VNODE) { 292168404Spjd VN_RELE(vp); 293249643Smm error = SET_ERROR(ENOENT); 294168404Spjd } else if (vp) { 295168404Spjd if (flag & ZNEW) { 296168404Spjd zfs_dirent_unlock(dl); 297168404Spjd VN_RELE(vp); 298249643Smm return (SET_ERROR(EEXIST)); 299168404Spjd } 300168404Spjd *dlpp = dl; 301168404Spjd *zpp = VTOZ(vp); 302168404Spjd return (0); 303168404Spjd } else { 304185029Spjd error = zfs_match_find(zfsvfs, dzp, name, exact, 305185029Spjd update, direntflags, realpnp, &zoid); 306168404Spjd } 307168404Spjd } 308168404Spjd if (error) { 309168404Spjd if (error != ENOENT || (flag & ZEXISTS)) { 310168404Spjd zfs_dirent_unlock(dl); 311168404Spjd return (error); 312168404Spjd } 313168404Spjd } else { 314168404Spjd if (flag & ZNEW) { 315168404Spjd zfs_dirent_unlock(dl); 316249643Smm return (SET_ERROR(EEXIST)); 317168404Spjd } 318168404Spjd error = zfs_zget(zfsvfs, zoid, zpp); 319168404Spjd if (error) { 320168404Spjd zfs_dirent_unlock(dl); 321168404Spjd return (error); 322168404Spjd } 323185029Spjd if (!(flag & ZXATTR) && update) 324168404Spjd dnlc_update(ZTOV(dzp), name, ZTOV(*zpp)); 325168404Spjd } 326168404Spjd 327168404Spjd *dlpp = dl; 328168404Spjd 329168404Spjd return (0); 330168404Spjd} 331168404Spjd 332168404Spjd/* 333168404Spjd * Unlock this directory entry and wake anyone who was waiting for it. 334168404Spjd */ 335168404Spjdvoid 336168404Spjdzfs_dirent_unlock(zfs_dirlock_t *dl) 337168404Spjd{ 338168404Spjd znode_t *dzp = dl->dl_dzp; 339168404Spjd zfs_dirlock_t **prev_dl, *cur_dl; 340168404Spjd 341168404Spjd mutex_enter(&dzp->z_lock); 342208131Smm 343208131Smm if (!dl->dl_namelock) 344208131Smm rw_exit(&dzp->z_name_lock); 345208131Smm 346168404Spjd if (dl->dl_sharecnt > 1) { 347168404Spjd dl->dl_sharecnt--; 348168404Spjd mutex_exit(&dzp->z_lock); 349168404Spjd return; 350168404Spjd } 351168404Spjd prev_dl = &dzp->z_dirlocks; 352168404Spjd while ((cur_dl = *prev_dl) != dl) 353168404Spjd prev_dl = &cur_dl->dl_next; 354168404Spjd *prev_dl = dl->dl_next; 355168404Spjd cv_broadcast(&dl->dl_cv); 356168404Spjd mutex_exit(&dzp->z_lock); 357168404Spjd 358168404Spjd cv_destroy(&dl->dl_cv); 359222268Spjd kmem_free(dl, sizeof (*dl) + dl->dl_namesize); 360168404Spjd} 361168404Spjd 362168404Spjd/* 363168404Spjd * Look up an entry in a directory. 364168404Spjd * 365168404Spjd * NOTE: '.' and '..' are handled as special cases because 366168404Spjd * no directory entries are actually stored for them. If this is 367168404Spjd * the root of a filesystem, then '.zfs' is also treated as a 368168404Spjd * special pseudo-directory. 369168404Spjd */ 370168404Spjdint 371185029Spjdzfs_dirlook(znode_t *dzp, char *name, vnode_t **vpp, int flags, 372185029Spjd int *deflg, pathname_t *rpnp) 373168404Spjd{ 374168404Spjd zfs_dirlock_t *dl; 375168404Spjd znode_t *zp; 376168404Spjd int error = 0; 377219089Spjd uint64_t parent; 378243767Savg int unlinked; 379168404Spjd 380168404Spjd if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) { 381243767Savg mutex_enter(&dzp->z_lock); 382243767Savg unlinked = dzp->z_unlinked; 383243767Savg mutex_exit(&dzp->z_lock); 384243767Savg if (unlinked) 385243767Savg return (ENOENT); 386243767Savg 387168404Spjd *vpp = ZTOV(dzp); 388168404Spjd VN_HOLD(*vpp); 389168404Spjd } else if (name[0] == '.' && name[1] == '.' && name[2] == 0) { 390168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 391219089Spjd 392168404Spjd /* 393168404Spjd * If we are a snapshot mounted under .zfs, return 394168404Spjd * the vp for the snapshot directory. 395168404Spjd */ 396219089Spjd if ((error = sa_lookup(dzp->z_sa_hdl, 397219089Spjd SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0) 398219089Spjd return (error); 399219089Spjd if (parent == dzp->z_id && zfsvfs->z_parent != zfsvfs) { 400168404Spjd error = zfsctl_root_lookup(zfsvfs->z_parent->z_ctldir, 401185029Spjd "snapshot", vpp, NULL, 0, NULL, kcred, 402185029Spjd NULL, NULL, NULL); 403168404Spjd return (error); 404168404Spjd } 405243767Savg 406243767Savg mutex_enter(&dzp->z_lock); 407243767Savg unlinked = dzp->z_unlinked; 408243767Savg mutex_exit(&dzp->z_lock); 409243767Savg if (unlinked) 410243767Savg return (ENOENT); 411243767Savg 412168404Spjd rw_enter(&dzp->z_parent_lock, RW_READER); 413219089Spjd error = zfs_zget(zfsvfs, parent, &zp); 414168404Spjd if (error == 0) 415168404Spjd *vpp = ZTOV(zp); 416168404Spjd rw_exit(&dzp->z_parent_lock); 417168404Spjd } else if (zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0) { 418168404Spjd *vpp = zfsctl_root(dzp); 419168404Spjd } else { 420185029Spjd int zf; 421185029Spjd 422185029Spjd zf = ZEXISTS | ZSHARED; 423185029Spjd if (flags & FIGNORECASE) 424185029Spjd zf |= ZCILOOK; 425185029Spjd 426185029Spjd error = zfs_dirent_lock(&dl, dzp, name, &zp, zf, deflg, rpnp); 427168404Spjd if (error == 0) { 428168404Spjd *vpp = ZTOV(zp); 429168404Spjd zfs_dirent_unlock(dl); 430168404Spjd dzp->z_zn_prefetch = B_TRUE; /* enable prefetching */ 431168404Spjd } 432185029Spjd rpnp = NULL; 433168404Spjd } 434168404Spjd 435185029Spjd if ((flags & FIGNORECASE) && rpnp && !error) 436185029Spjd (void) strlcpy(rpnp->pn_buf, name, rpnp->pn_bufsize); 437185029Spjd 438168404Spjd return (error); 439168404Spjd} 440168404Spjd 441168404Spjd/* 442168404Spjd * unlinked Set (formerly known as the "delete queue") Error Handling 443168404Spjd * 444168404Spjd * When dealing with the unlinked set, we dmu_tx_hold_zap(), but we 445168404Spjd * don't specify the name of the entry that we will be manipulating. We 446168404Spjd * also fib and say that we won't be adding any new entries to the 447168404Spjd * unlinked set, even though we might (this is to lower the minimum file 448168404Spjd * size that can be deleted in a full filesystem). So on the small 449168404Spjd * chance that the nlink list is using a fat zap (ie. has more than 450168404Spjd * 2000 entries), we *may* not pre-read a block that's needed. 451168404Spjd * Therefore it is remotely possible for some of the assertions 452168404Spjd * regarding the unlinked set below to fail due to i/o error. On a 453168404Spjd * nondebug system, this will result in the space being leaked. 454168404Spjd */ 455168404Spjdvoid 456168404Spjdzfs_unlinked_add(znode_t *zp, dmu_tx_t *tx) 457168404Spjd{ 458168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 459168404Spjd 460168404Spjd ASSERT(zp->z_unlinked); 461219089Spjd ASSERT(zp->z_links == 0); 462168404Spjd 463185029Spjd VERIFY3U(0, ==, 464185029Spjd zap_add_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx)); 465168404Spjd} 466168404Spjd 467168404Spjd/* 468168404Spjd * Clean up any znodes that had no links when we either crashed or 469168404Spjd * (force) umounted the file system. 470168404Spjd */ 471168404Spjdvoid 472168404Spjdzfs_unlinked_drain(zfsvfs_t *zfsvfs) 473168404Spjd{ 474168404Spjd zap_cursor_t zc; 475168404Spjd zap_attribute_t zap; 476168404Spjd dmu_object_info_t doi; 477168404Spjd znode_t *zp; 478168404Spjd int error; 479168404Spjd 480168404Spjd /* 481168404Spjd * Interate over the contents of the unlinked set. 482168404Spjd */ 483168404Spjd for (zap_cursor_init(&zc, zfsvfs->z_os, zfsvfs->z_unlinkedobj); 484168404Spjd zap_cursor_retrieve(&zc, &zap) == 0; 485168404Spjd zap_cursor_advance(&zc)) { 486168404Spjd 487168404Spjd /* 488168404Spjd * See what kind of object we have in list 489168404Spjd */ 490168404Spjd 491168404Spjd error = dmu_object_info(zfsvfs->z_os, 492168404Spjd zap.za_first_integer, &doi); 493168404Spjd if (error != 0) 494168404Spjd continue; 495168404Spjd 496168404Spjd ASSERT((doi.doi_type == DMU_OT_PLAIN_FILE_CONTENTS) || 497168404Spjd (doi.doi_type == DMU_OT_DIRECTORY_CONTENTS)); 498168404Spjd /* 499168404Spjd * We need to re-mark these list entries for deletion, 500168404Spjd * so we pull them back into core and set zp->z_unlinked. 501168404Spjd */ 502168404Spjd error = zfs_zget(zfsvfs, zap.za_first_integer, &zp); 503168404Spjd 504168404Spjd /* 505168404Spjd * We may pick up znodes that are already marked for deletion. 506168404Spjd * This could happen during the purge of an extended attribute 507168404Spjd * directory. All we need to do is skip over them, since they 508168404Spjd * are already in the system marked z_unlinked. 509168404Spjd */ 510168404Spjd if (error != 0) 511168404Spjd continue; 512168404Spjd 513168404Spjd zp->z_unlinked = B_TRUE; 514168404Spjd VN_RELE(ZTOV(zp)); 515168404Spjd } 516168404Spjd zap_cursor_fini(&zc); 517168404Spjd} 518168404Spjd 519168404Spjd/* 520168404Spjd * Delete the entire contents of a directory. Return a count 521185029Spjd * of the number of entries that could not be deleted. If we encounter 522185029Spjd * an error, return a count of at least one so that the directory stays 523185029Spjd * in the unlinked set. 524168404Spjd * 525168404Spjd * NOTE: this function assumes that the directory is inactive, 526168404Spjd * so there is no need to lock its entries before deletion. 527168404Spjd * Also, it assumes the directory contents is *only* regular 528168404Spjd * files. 529168404Spjd */ 530168404Spjdstatic int 531168404Spjdzfs_purgedir(znode_t *dzp) 532168404Spjd{ 533168404Spjd zap_cursor_t zc; 534168404Spjd zap_attribute_t zap; 535168404Spjd znode_t *xzp; 536168404Spjd dmu_tx_t *tx; 537168404Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 538168404Spjd zfs_dirlock_t dl; 539168404Spjd int skipped = 0; 540168404Spjd int error; 541168404Spjd 542168404Spjd for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id); 543168404Spjd (error = zap_cursor_retrieve(&zc, &zap)) == 0; 544168404Spjd zap_cursor_advance(&zc)) { 545168404Spjd error = zfs_zget(zfsvfs, 546168404Spjd ZFS_DIRENT_OBJ(zap.za_first_integer), &xzp); 547185029Spjd if (error) { 548185029Spjd skipped += 1; 549185029Spjd continue; 550185029Spjd } 551168404Spjd 552168404Spjd ASSERT((ZTOV(xzp)->v_type == VREG) || 553168404Spjd (ZTOV(xzp)->v_type == VLNK)); 554168404Spjd 555168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 556219089Spjd dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); 557168404Spjd dmu_tx_hold_zap(tx, dzp->z_id, FALSE, zap.za_name); 558219089Spjd dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE); 559168404Spjd dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 560219089Spjd /* Is this really needed ? */ 561219089Spjd zfs_sa_upgrade_txholds(tx, xzp); 562168404Spjd error = dmu_tx_assign(tx, TXG_WAIT); 563168404Spjd if (error) { 564168404Spjd dmu_tx_abort(tx); 565168404Spjd VN_RELE(ZTOV(xzp)); 566168404Spjd skipped += 1; 567168404Spjd continue; 568168404Spjd } 569168404Spjd bzero(&dl, sizeof (dl)); 570168404Spjd dl.dl_dzp = dzp; 571168404Spjd dl.dl_name = zap.za_name; 572168404Spjd 573168404Spjd error = zfs_link_destroy(&dl, xzp, tx, 0, NULL); 574185029Spjd if (error) 575185029Spjd skipped += 1; 576168404Spjd dmu_tx_commit(tx); 577168404Spjd 578168404Spjd VN_RELE(ZTOV(xzp)); 579168404Spjd } 580168404Spjd zap_cursor_fini(&zc); 581185029Spjd if (error != ENOENT) 582185029Spjd skipped += 1; 583168404Spjd return (skipped); 584168404Spjd} 585168404Spjd 586168404Spjdvoid 587168404Spjdzfs_rmnode(znode_t *zp) 588168404Spjd{ 589168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 590168404Spjd objset_t *os = zfsvfs->z_os; 591168404Spjd znode_t *xzp = NULL; 592168404Spjd dmu_tx_t *tx; 593168404Spjd uint64_t acl_obj; 594219089Spjd uint64_t xattr_obj; 595168404Spjd int error; 596168404Spjd 597219089Spjd ASSERT(zp->z_links == 0); 598168404Spjd 599168404Spjd /* 600168404Spjd * If this is an attribute directory, purge its contents. 601168404Spjd */ 602168404Spjd if (ZTOV(zp) != NULL && ZTOV(zp)->v_type == VDIR && 603219089Spjd (zp->z_pflags & ZFS_XATTR)) { 604168404Spjd if (zfs_purgedir(zp) != 0) { 605168404Spjd /* 606168404Spjd * Not enough space to delete some xattrs. 607185029Spjd * Leave it in the unlinked set. 608168404Spjd */ 609185029Spjd zfs_znode_dmu_fini(zp); 610185029Spjd zfs_znode_free(zp); 611168404Spjd return; 612168404Spjd } 613168404Spjd } 614168404Spjd 615168404Spjd /* 616185029Spjd * Free up all the data in the file. 617185029Spjd */ 618185029Spjd error = dmu_free_long_range(os, zp->z_id, 0, DMU_OBJECT_END); 619185029Spjd if (error) { 620185029Spjd /* 621185029Spjd * Not enough space. Leave the file in the unlinked set. 622185029Spjd */ 623185029Spjd zfs_znode_dmu_fini(zp); 624185029Spjd zfs_znode_free(zp); 625185029Spjd return; 626185029Spjd } 627185029Spjd 628185029Spjd /* 629168404Spjd * If the file has extended attributes, we're going to unlink 630168404Spjd * the xattr dir. 631168404Spjd */ 632219089Spjd error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 633219089Spjd &xattr_obj, sizeof (xattr_obj)); 634219089Spjd if (error == 0 && xattr_obj) { 635219089Spjd error = zfs_zget(zfsvfs, xattr_obj, &xzp); 636168404Spjd ASSERT(error == 0); 637168404Spjd } 638168404Spjd 639219089Spjd acl_obj = zfs_external_acl(zp); 640168404Spjd 641168404Spjd /* 642185029Spjd * Set up the final transaction. 643168404Spjd */ 644168404Spjd tx = dmu_tx_create(os); 645168404Spjd dmu_tx_hold_free(tx, zp->z_id, 0, DMU_OBJECT_END); 646168404Spjd dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 647168404Spjd if (xzp) { 648168404Spjd dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, TRUE, NULL); 649219089Spjd dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE); 650168404Spjd } 651168404Spjd if (acl_obj) 652168404Spjd dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END); 653219089Spjd 654219089Spjd zfs_sa_upgrade_txholds(tx, zp); 655168404Spjd error = dmu_tx_assign(tx, TXG_WAIT); 656168404Spjd if (error) { 657168404Spjd /* 658168404Spjd * Not enough space to delete the file. Leave it in the 659168404Spjd * unlinked set, leaking it until the fs is remounted (at 660168404Spjd * which point we'll call zfs_unlinked_drain() to process it). 661168404Spjd */ 662168404Spjd dmu_tx_abort(tx); 663185029Spjd zfs_znode_dmu_fini(zp); 664185029Spjd zfs_znode_free(zp); 665185029Spjd goto out; 666168404Spjd } 667168404Spjd 668168404Spjd if (xzp) { 669219089Spjd ASSERT(error == 0); 670168404Spjd mutex_enter(&xzp->z_lock); 671168404Spjd xzp->z_unlinked = B_TRUE; /* mark xzp for deletion */ 672219089Spjd xzp->z_links = 0; /* no more links to it */ 673219089Spjd VERIFY(0 == sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs), 674219089Spjd &xzp->z_links, sizeof (xzp->z_links), tx)); 675168404Spjd mutex_exit(&xzp->z_lock); 676168404Spjd zfs_unlinked_add(xzp, tx); 677168404Spjd } 678168404Spjd 679168404Spjd /* Remove this znode from the unlinked set */ 680185029Spjd VERIFY3U(0, ==, 681185029Spjd zap_remove_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx)); 682168404Spjd 683168404Spjd zfs_znode_delete(zp, tx); 684168404Spjd 685168404Spjd dmu_tx_commit(tx); 686185029Spjdout: 687168404Spjd if (xzp) 688168404Spjd VN_RELE(ZTOV(xzp)); 689168404Spjd} 690168404Spjd 691185029Spjdstatic uint64_t 692219089Spjdzfs_dirent(znode_t *zp, uint64_t mode) 693185029Spjd{ 694185029Spjd uint64_t de = zp->z_id; 695219089Spjd 696185029Spjd if (zp->z_zfsvfs->z_version >= ZPL_VERSION_DIRENT_TYPE) 697219089Spjd de |= IFTODT(mode) << 60; 698185029Spjd return (de); 699185029Spjd} 700185029Spjd 701168404Spjd/* 702168404Spjd * Link zp into dl. Can only fail if zp has been unlinked. 703168404Spjd */ 704168404Spjdint 705168404Spjdzfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag) 706168404Spjd{ 707168404Spjd znode_t *dzp = dl->dl_dzp; 708219089Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 709168404Spjd vnode_t *vp = ZTOV(zp); 710168404Spjd uint64_t value; 711168404Spjd int zp_is_dir = (vp->v_type == VDIR); 712219089Spjd sa_bulk_attr_t bulk[5]; 713219089Spjd uint64_t mtime[2], ctime[2]; 714219089Spjd int count = 0; 715168404Spjd int error; 716168404Spjd 717168404Spjd mutex_enter(&zp->z_lock); 718168404Spjd 719168404Spjd if (!(flag & ZRENAMING)) { 720168404Spjd if (zp->z_unlinked) { /* no new links to unlinked zp */ 721168404Spjd ASSERT(!(flag & (ZNEW | ZEXISTS))); 722168404Spjd mutex_exit(&zp->z_lock); 723249643Smm return (SET_ERROR(ENOENT)); 724168404Spjd } 725219089Spjd zp->z_links++; 726219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL, 727219089Spjd &zp->z_links, sizeof (zp->z_links)); 728219089Spjd 729168404Spjd } 730219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL, 731219089Spjd &dzp->z_id, sizeof (dzp->z_id)); 732219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 733219089Spjd &zp->z_pflags, sizeof (zp->z_pflags)); 734168404Spjd 735219089Spjd if (!(flag & ZNEW)) { 736219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 737219089Spjd ctime, sizeof (ctime)); 738219089Spjd zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, 739219089Spjd ctime, B_TRUE); 740219089Spjd } 741219089Spjd error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 742219089Spjd ASSERT(error == 0); 743219089Spjd 744168404Spjd mutex_exit(&zp->z_lock); 745168404Spjd 746168404Spjd mutex_enter(&dzp->z_lock); 747219089Spjd dzp->z_size++; 748219089Spjd dzp->z_links += zp_is_dir; 749219089Spjd count = 0; 750219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL, 751219089Spjd &dzp->z_size, sizeof (dzp->z_size)); 752219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL, 753219089Spjd &dzp->z_links, sizeof (dzp->z_links)); 754219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 755219089Spjd mtime, sizeof (mtime)); 756219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 757219089Spjd ctime, sizeof (ctime)); 758219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 759219089Spjd &dzp->z_pflags, sizeof (dzp->z_pflags)); 760219089Spjd zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime, B_TRUE); 761219089Spjd error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx); 762219089Spjd ASSERT(error == 0); 763168404Spjd mutex_exit(&dzp->z_lock); 764168404Spjd 765219089Spjd value = zfs_dirent(zp, zp->z_mode); 766168404Spjd error = zap_add(zp->z_zfsvfs->z_os, dzp->z_id, dl->dl_name, 767168404Spjd 8, 1, &value, tx); 768168404Spjd ASSERT(error == 0); 769168404Spjd 770168404Spjd dnlc_update(ZTOV(dzp), dl->dl_name, vp); 771168404Spjd 772168404Spjd return (0); 773168404Spjd} 774168404Spjd 775219089Spjdstatic int 776219089Spjdzfs_dropname(zfs_dirlock_t *dl, znode_t *zp, znode_t *dzp, dmu_tx_t *tx, 777219089Spjd int flag) 778219089Spjd{ 779219089Spjd int error; 780219089Spjd 781219089Spjd if (zp->z_zfsvfs->z_norm) { 782219089Spjd if (((zp->z_zfsvfs->z_case == ZFS_CASE_INSENSITIVE) && 783219089Spjd (flag & ZCIEXACT)) || 784219089Spjd ((zp->z_zfsvfs->z_case == ZFS_CASE_MIXED) && 785219089Spjd !(flag & ZCILOOK))) 786219089Spjd error = zap_remove_norm(zp->z_zfsvfs->z_os, 787219089Spjd dzp->z_id, dl->dl_name, MT_EXACT, tx); 788219089Spjd else 789219089Spjd error = zap_remove_norm(zp->z_zfsvfs->z_os, 790219089Spjd dzp->z_id, dl->dl_name, MT_FIRST, tx); 791219089Spjd } else { 792219089Spjd error = zap_remove(zp->z_zfsvfs->z_os, 793219089Spjd dzp->z_id, dl->dl_name, tx); 794219089Spjd } 795219089Spjd 796219089Spjd return (error); 797219089Spjd} 798219089Spjd 799168404Spjd/* 800168404Spjd * Unlink zp from dl, and mark zp for deletion if this was the last link. 801168404Spjd * Can fail if zp is a mount point (EBUSY) or a non-empty directory (EEXIST). 802168404Spjd * If 'unlinkedp' is NULL, we put unlinked znodes on the unlinked list. 803168404Spjd * If it's non-NULL, we use it to indicate whether the znode needs deletion, 804168404Spjd * and it's the caller's job to do it. 805168404Spjd */ 806168404Spjdint 807168404Spjdzfs_link_destroy(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag, 808168404Spjd boolean_t *unlinkedp) 809168404Spjd{ 810168404Spjd znode_t *dzp = dl->dl_dzp; 811219089Spjd zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 812168404Spjd vnode_t *vp = ZTOV(zp); 813168404Spjd int zp_is_dir = (vp->v_type == VDIR); 814168404Spjd boolean_t unlinked = B_FALSE; 815219089Spjd sa_bulk_attr_t bulk[5]; 816219089Spjd uint64_t mtime[2], ctime[2]; 817219089Spjd int count = 0; 818168404Spjd int error; 819168404Spjd 820168404Spjd dnlc_remove(ZTOV(dzp), dl->dl_name); 821168404Spjd 822168404Spjd if (!(flag & ZRENAMING)) { 823168404Spjd if (vn_vfswlock(vp)) /* prevent new mounts on zp */ 824249643Smm return (SET_ERROR(EBUSY)); 825168404Spjd 826168404Spjd if (vn_ismntpt(vp)) { /* don't remove mount point */ 827168404Spjd vn_vfsunlock(vp); 828249643Smm return (SET_ERROR(EBUSY)); 829168404Spjd } 830168404Spjd 831168404Spjd mutex_enter(&zp->z_lock); 832219089Spjd 833219089Spjd if (zp_is_dir && !zfs_dirempty(zp)) { 834168404Spjd mutex_exit(&zp->z_lock); 835168404Spjd vn_vfsunlock(vp); 836249643Smm#ifdef illumos 837249643Smm return (SET_ERROR(EEXIST)); 838249643Smm#else 839249643Smm return (SET_ERROR(ENOTEMPTY)); 840249643Smm#endif 841168404Spjd } 842219089Spjd 843219089Spjd /* 844219089Spjd * If we get here, we are going to try to remove the object. 845219089Spjd * First try removing the name from the directory; if that 846219089Spjd * fails, return the error. 847219089Spjd */ 848219089Spjd error = zfs_dropname(dl, zp, dzp, tx, flag); 849219089Spjd if (error != 0) { 850219089Spjd mutex_exit(&zp->z_lock); 851219089Spjd vn_vfsunlock(vp); 852219089Spjd return (error); 853219089Spjd } 854219089Spjd 855219089Spjd if (zp->z_links <= zp_is_dir) { 856168404Spjd zfs_panic_recover("zfs: link count on vnode %p is %u, " 857168404Spjd "should be at least %u", zp->z_vnode, 858219089Spjd (int)zp->z_links, 859168404Spjd zp_is_dir + 1); 860219089Spjd zp->z_links = zp_is_dir + 1; 861168404Spjd } 862219089Spjd if (--zp->z_links == zp_is_dir) { 863168404Spjd zp->z_unlinked = B_TRUE; 864219089Spjd zp->z_links = 0; 865168404Spjd unlinked = B_TRUE; 866168404Spjd } else { 867219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), 868219089Spjd NULL, &ctime, sizeof (ctime)); 869219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), 870219089Spjd NULL, &zp->z_pflags, sizeof (zp->z_pflags)); 871219089Spjd zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime, 872219089Spjd B_TRUE); 873168404Spjd } 874219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), 875219089Spjd NULL, &zp->z_links, sizeof (zp->z_links)); 876219089Spjd error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 877219089Spjd count = 0; 878219089Spjd ASSERT(error == 0); 879168404Spjd mutex_exit(&zp->z_lock); 880168404Spjd vn_vfsunlock(vp); 881219089Spjd } else { 882219089Spjd error = zfs_dropname(dl, zp, dzp, tx, flag); 883219089Spjd if (error != 0) 884219089Spjd return (error); 885168404Spjd } 886168404Spjd 887168404Spjd mutex_enter(&dzp->z_lock); 888219089Spjd dzp->z_size--; /* one dirent removed */ 889219089Spjd dzp->z_links -= zp_is_dir; /* ".." link from zp */ 890219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), 891219089Spjd NULL, &dzp->z_links, sizeof (dzp->z_links)); 892219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), 893219089Spjd NULL, &dzp->z_size, sizeof (dzp->z_size)); 894219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), 895219089Spjd NULL, ctime, sizeof (ctime)); 896219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), 897219089Spjd NULL, mtime, sizeof (mtime)); 898219089Spjd SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), 899219089Spjd NULL, &dzp->z_pflags, sizeof (dzp->z_pflags)); 900219089Spjd zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime, B_TRUE); 901219089Spjd error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx); 902219089Spjd ASSERT(error == 0); 903168404Spjd mutex_exit(&dzp->z_lock); 904168404Spjd 905168404Spjd if (unlinkedp != NULL) 906168404Spjd *unlinkedp = unlinked; 907168404Spjd else if (unlinked) 908168404Spjd zfs_unlinked_add(zp, tx); 909168404Spjd 910168404Spjd return (0); 911168404Spjd} 912168404Spjd 913168404Spjd/* 914168404Spjd * Indicate whether the directory is empty. Works with or without z_lock 915168404Spjd * held, but can only be consider a hint in the latter case. Returns true 916168404Spjd * if only "." and ".." remain and there's no work in progress. 917168404Spjd */ 918168404Spjdboolean_t 919168404Spjdzfs_dirempty(znode_t *dzp) 920168404Spjd{ 921219089Spjd return (dzp->z_size == 2 && dzp->z_dirlocks == 0); 922168404Spjd} 923168404Spjd 924168404Spjdint 925168404Spjdzfs_make_xattrdir(znode_t *zp, vattr_t *vap, vnode_t **xvpp, cred_t *cr) 926168404Spjd{ 927168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 928168404Spjd znode_t *xzp; 929168404Spjd dmu_tx_t *tx; 930168404Spjd int error; 931209962Smm zfs_acl_ids_t acl_ids; 932209962Smm boolean_t fuid_dirtied; 933219089Spjd uint64_t parent; 934168404Spjd 935168404Spjd *xvpp = NULL; 936168404Spjd 937195785Strasz /* 938195785Strasz * In FreeBSD, access checking for creating an EA is being done 939195785Strasz * in zfs_setextattr(), 940195785Strasz */ 941195785Strasz#ifndef __FreeBSD__ 942185029Spjd if (error = zfs_zaccess(zp, ACE_WRITE_NAMED_ATTRS, 0, B_FALSE, cr)) 943168404Spjd return (error); 944195785Strasz#endif 945168404Spjd 946209962Smm if ((error = zfs_acl_ids_create(zp, IS_XATTR, vap, cr, NULL, 947209962Smm &acl_ids)) != 0) 948209962Smm return (error); 949209962Smm if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 950209962Smm zfs_acl_ids_free(&acl_ids); 951249643Smm return (SET_ERROR(EDQUOT)); 952209962Smm } 953209962Smm 954262111Savg getnewvnode_reserve(1); 955262111Savg 956168404Spjd tx = dmu_tx_create(zfsvfs->z_os); 957219089Spjd dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 958219089Spjd ZFS_SA_BASE_ATTR_SIZE); 959219089Spjd dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 960168404Spjd dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); 961209962Smm fuid_dirtied = zfsvfs->z_fuid_dirty; 962209962Smm if (fuid_dirtied) 963209962Smm zfs_fuid_txhold(zfsvfs, tx); 964260777Savg error = dmu_tx_assign(tx, TXG_WAIT); 965168404Spjd if (error) { 966209962Smm zfs_acl_ids_free(&acl_ids); 967168404Spjd dmu_tx_abort(tx); 968168404Spjd return (error); 969168404Spjd } 970219089Spjd zfs_mknode(zp, vap, tx, cr, IS_XATTR, &xzp, &acl_ids); 971209962Smm 972209962Smm if (fuid_dirtied) 973209962Smm zfs_fuid_sync(zfsvfs, tx); 974209962Smm 975219089Spjd#ifdef DEBUG 976219089Spjd error = sa_lookup(xzp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 977219089Spjd &parent, sizeof (parent)); 978219089Spjd ASSERT(error == 0 && parent == zp->z_id); 979219089Spjd#endif 980168404Spjd 981219089Spjd VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &xzp->z_id, 982219089Spjd sizeof (xzp->z_id), tx)); 983219089Spjd 984185029Spjd (void) zfs_log_create(zfsvfs->z_log, tx, TX_MKXATTR, zp, 985209962Smm xzp, "", NULL, acl_ids.z_fuidp, vap); 986209962Smm 987209962Smm zfs_acl_ids_free(&acl_ids); 988168404Spjd dmu_tx_commit(tx); 989168404Spjd 990262111Savg getnewvnode_drop_reserve(); 991262111Savg 992168404Spjd *xvpp = ZTOV(xzp); 993168404Spjd 994168404Spjd return (0); 995168404Spjd} 996168404Spjd 997168404Spjd/* 998168404Spjd * Return a znode for the extended attribute directory for zp. 999168404Spjd * ** If the directory does not already exist, it is created ** 1000168404Spjd * 1001168404Spjd * IN: zp - znode to obtain attribute directory from 1002168404Spjd * cr - credentials of caller 1003168404Spjd * flags - flags from the VOP_LOOKUP call 1004168404Spjd * 1005168404Spjd * OUT: xzpp - pointer to extended attribute znode 1006168404Spjd * 1007168404Spjd * RETURN: 0 on success 1008168404Spjd * error number on failure 1009168404Spjd */ 1010168404Spjdint 1011168404Spjdzfs_get_xattrdir(znode_t *zp, vnode_t **xvpp, cred_t *cr, int flags) 1012168404Spjd{ 1013168404Spjd zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1014168404Spjd znode_t *xzp; 1015168404Spjd zfs_dirlock_t *dl; 1016168404Spjd vattr_t va; 1017168404Spjd int error; 1018168404Spjdtop: 1019185029Spjd error = zfs_dirent_lock(&dl, zp, "", &xzp, ZXATTR, NULL, NULL); 1020168404Spjd if (error) 1021168404Spjd return (error); 1022168404Spjd 1023168404Spjd if (xzp != NULL) { 1024168404Spjd *xvpp = ZTOV(xzp); 1025168404Spjd zfs_dirent_unlock(dl); 1026168404Spjd return (0); 1027168404Spjd } 1028168404Spjd 1029168404Spjd 1030168404Spjd if (!(flags & CREATE_XATTR_DIR)) { 1031168404Spjd zfs_dirent_unlock(dl); 1032249643Smm#ifdef illumos 1033249643Smm return (SET_ERROR(ENOENT)); 1034195785Strasz#else 1035249643Smm return (SET_ERROR(ENOATTR)); 1036195785Strasz#endif 1037168404Spjd } 1038168404Spjd 1039168404Spjd if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { 1040168404Spjd zfs_dirent_unlock(dl); 1041249643Smm return (SET_ERROR(EROFS)); 1042168404Spjd } 1043168404Spjd 1044168404Spjd /* 1045168404Spjd * The ability to 'create' files in an attribute 1046168404Spjd * directory comes from the write_xattr permission on the base file. 1047168404Spjd * 1048168404Spjd * The ability to 'search' an attribute directory requires 1049168404Spjd * read_xattr permission on the base file. 1050168404Spjd * 1051168404Spjd * Once in a directory the ability to read/write attributes 1052168404Spjd * is controlled by the permissions on the attribute file. 1053168404Spjd */ 1054168404Spjd va.va_mask = AT_TYPE | AT_MODE | AT_UID | AT_GID; 1055168404Spjd va.va_type = VDIR; 1056168404Spjd va.va_mode = S_IFDIR | S_ISVTX | 0777; 1057185029Spjd zfs_fuid_map_ids(zp, cr, &va.va_uid, &va.va_gid); 1058168404Spjd 1059168404Spjd error = zfs_make_xattrdir(zp, &va, xvpp, cr); 1060168404Spjd zfs_dirent_unlock(dl); 1061168404Spjd 1062209962Smm if (error == ERESTART) { 1063168404Spjd /* NB: we already did dmu_tx_wait() if necessary */ 1064168404Spjd goto top; 1065168404Spjd } 1066189967Sjhb if (error == 0) 1067189967Sjhb VOP_UNLOCK(*xvpp, 0); 1068168404Spjd 1069168404Spjd return (error); 1070168404Spjd} 1071168404Spjd 1072168404Spjd/* 1073168404Spjd * Decide whether it is okay to remove within a sticky directory. 1074168404Spjd * 1075168404Spjd * In sticky directories, write access is not sufficient; 1076168404Spjd * you can remove entries from a directory only if: 1077168404Spjd * 1078168404Spjd * you own the directory, 1079168404Spjd * you own the entry, 1080168404Spjd * the entry is a plain file and you have write access, 1081168404Spjd * or you are privileged (checked in secpolicy...). 1082168404Spjd * 1083168404Spjd * The function returns 0 if remove access is granted. 1084168404Spjd */ 1085168404Spjdint 1086168404Spjdzfs_sticky_remove_access(znode_t *zdp, znode_t *zp, cred_t *cr) 1087168404Spjd{ 1088168404Spjd uid_t uid; 1089185029Spjd uid_t downer; 1090185029Spjd uid_t fowner; 1091185029Spjd zfsvfs_t *zfsvfs = zdp->z_zfsvfs; 1092168404Spjd 1093209962Smm if (zdp->z_zfsvfs->z_replay) 1094168404Spjd return (0); 1095168404Spjd 1096219089Spjd if ((zdp->z_mode & S_ISVTX) == 0) 1097185029Spjd return (0); 1098185029Spjd 1099219089Spjd downer = zfs_fuid_map_id(zfsvfs, zdp->z_uid, cr, ZFS_OWNER); 1100219089Spjd fowner = zfs_fuid_map_id(zfsvfs, zp->z_uid, cr, ZFS_OWNER); 1101185029Spjd 1102185029Spjd if ((uid = crgetuid(cr)) == downer || uid == fowner || 1103168404Spjd (ZTOV(zp)->v_type == VREG && 1104185029Spjd zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr) == 0)) 1105168404Spjd return (0); 1106168404Spjd else 1107185029Spjd return (secpolicy_vnode_remove(ZTOV(zp), cr)); 1108168404Spjd} 1109