1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2013, 2015 by Delphix. All rights reserved. 24 */ 25 26#include <sys/types.h> 27#include <sys/param.h> 28#include <sys/time.h> 29#include <sys/systm.h> 30#include <sys/sysmacros.h> 31#include <sys/resource.h> 32#include <sys/vfs.h> 33#include <sys/vnode.h> 34#include <sys/file.h> 35#include <sys/kmem.h> 36#include <sys/uio.h> 37#include <sys/cmn_err.h> 38#include <sys/errno.h> 39#include <sys/stat.h> 40#include <sys/unistd.h> 41#include <sys/sunddi.h> 42#include <sys/random.h> 43#include <sys/policy.h> 44#ifdef __FreeBSD__ 45#include <sys/kcondvar.h> 46#include <sys/callb.h> 47#include <sys/smp.h> 48#endif 49#include <sys/zfs_dir.h> 50#include <sys/zfs_acl.h> 51#include <sys/fs/zfs.h> 52#include <sys/zap.h> 53#include <sys/dmu.h> 54#include <sys/atomic.h> 55#include <sys/zfs_ctldir.h> 56#include <sys/zfs_fuid.h> 57#include <sys/sa.h> 58#include <sys/zfs_sa.h> 59#include <sys/dnlc.h> 60#include <sys/extdirent.h> 61 62/* 63 * zfs_match_find() is used by zfs_dirent_lookup() to peform zap lookups 64 * of names after deciding which is the appropriate lookup interface. 65 */ 66static int 67zfs_match_find(zfsvfs_t *zfsvfs, znode_t *dzp, const char *name, 68 boolean_t exact, uint64_t *zoid) 69{ 70 int error; 71 72 if (zfsvfs->z_norm) { 73 matchtype_t mt = exact? MT_EXACT : MT_FIRST; 74 75 /* 76 * In the non-mixed case we only expect there would ever 77 * be one match, but we need to use the normalizing lookup. 78 */ 79 error = zap_lookup_norm(zfsvfs->z_os, dzp->z_id, name, 8, 1, 80 zoid, mt, NULL, 0, NULL); 81 } else { 82 error = zap_lookup(zfsvfs->z_os, dzp->z_id, name, 8, 1, zoid); 83 } 84 *zoid = ZFS_DIRENT_OBJ(*zoid); 85 86 return (error); 87} 88 89/* 90 * Look up a directory entry under a locked vnode. 91 * dvp being locked gives us a guarantee that there are no concurrent 92 * modification of the directory and, thus, if a node can be found in 93 * the directory, then it must not be unlinked. 94 * 95 * Input arguments: 96 * dzp - znode for directory 97 * name - name of entry to lock 98 * flag - ZNEW: if the entry already exists, fail with EEXIST. 99 * ZEXISTS: if the entry does not exist, fail with ENOENT. 100 * ZXATTR: we want dzp's xattr directory 101 * 102 * Output arguments: 103 * zpp - pointer to the znode for the entry (NULL if there isn't one) 104 * 105 * Return value: 0 on success or errno on failure. 106 * 107 * NOTE: Always checks for, and rejects, '.' and '..'. 108 */ 109int 110zfs_dirent_lookup(znode_t *dzp, const char *name, znode_t **zpp, int flag) 111{ 112 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 113 boolean_t exact; 114 uint64_t zoid; 115 vnode_t *vp = NULL; 116 int error = 0; 117 118 ASSERT_VOP_LOCKED(ZTOV(dzp), __func__); 119 120 *zpp = NULL; 121 122 /* 123 * Verify that we are not trying to lock '.', '..', or '.zfs' 124 */ 125 if (name[0] == '.' && 126 (name[1] == '\0' || (name[1] == '.' && name[2] == '\0')) || 127 zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0) 128 return (SET_ERROR(EEXIST)); 129 130 /* 131 * Case sensitivity and normalization preferences are set when 132 * the file system is created. These are stored in the 133 * zfsvfs->z_case and zfsvfs->z_norm fields. These choices 134 * affect how we perform zap lookups. 135 * 136 * Decide if exact matches should be requested when performing 137 * a zap lookup on file systems supporting case-insensitive 138 * access. 139 * 140 * NB: we do not need to worry about this flag for ZFS_CASE_SENSITIVE 141 * because in that case MT_EXACT and MT_FIRST should produce exactly 142 * the same result. 143 */ 144 exact = zfsvfs->z_case == ZFS_CASE_MIXED; 145 146 if (dzp->z_unlinked && !(flag & ZXATTR)) 147 return (ENOENT); 148 if (flag & ZXATTR) { 149 error = sa_lookup(dzp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &zoid, 150 sizeof (zoid)); 151 if (error == 0) 152 error = (zoid == 0 ? ENOENT : 0); 153 } else { 154 error = zfs_match_find(zfsvfs, dzp, name, exact, &zoid); 155 } 156 if (error) { 157 if (error != ENOENT || (flag & ZEXISTS)) { 158 return (error); 159 } 160 } else { 161 if (flag & ZNEW) { 162 return (SET_ERROR(EEXIST)); 163 } 164 error = zfs_zget(zfsvfs, zoid, zpp); 165 if (error) 166 return (error); 167 ASSERT(!(*zpp)->z_unlinked); 168 } 169 170 return (0); 171} 172 173static int 174zfs_dd_lookup(znode_t *dzp, znode_t **zpp) 175{ 176 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 177 znode_t *zp; 178 uint64_t parent; 179 int error; 180 181 ASSERT_VOP_LOCKED(ZTOV(dzp), __func__); 182 ASSERT(RRM_READ_HELD(&zfsvfs->z_teardown_lock)); 183 184 if (dzp->z_unlinked) 185 return (ENOENT); 186 187 if ((error = sa_lookup(dzp->z_sa_hdl, 188 SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0) 189 return (error); 190 191 error = zfs_zget(zfsvfs, parent, &zp); 192 if (error == 0) 193 *zpp = zp; 194 return (error); 195} 196 197int 198zfs_dirlook(znode_t *dzp, const char *name, znode_t **zpp) 199{ 200 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 201 znode_t *zp; 202 int error = 0; 203 204 ASSERT_VOP_LOCKED(ZTOV(dzp), __func__); 205 ASSERT(RRM_READ_HELD(&zfsvfs->z_teardown_lock)); 206 207 if (dzp->z_unlinked) 208 return (SET_ERROR(ENOENT)); 209 210 if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) { 211 *zpp = dzp; 212 } else if (name[0] == '.' && name[1] == '.' && name[2] == 0) { 213 error = zfs_dd_lookup(dzp, zpp); 214 } else { 215 error = zfs_dirent_lookup(dzp, name, &zp, ZEXISTS); 216 if (error == 0) { 217 dzp->z_zn_prefetch = B_TRUE; /* enable prefetching */ 218 *zpp = zp; 219 } 220 } 221 return (error); 222} 223 224/* 225 * unlinked Set (formerly known as the "delete queue") Error Handling 226 * 227 * When dealing with the unlinked set, we dmu_tx_hold_zap(), but we 228 * don't specify the name of the entry that we will be manipulating. We 229 * also fib and say that we won't be adding any new entries to the 230 * unlinked set, even though we might (this is to lower the minimum file 231 * size that can be deleted in a full filesystem). So on the small 232 * chance that the nlink list is using a fat zap (ie. has more than 233 * 2000 entries), we *may* not pre-read a block that's needed. 234 * Therefore it is remotely possible for some of the assertions 235 * regarding the unlinked set below to fail due to i/o error. On a 236 * nondebug system, this will result in the space being leaked. 237 */ 238void 239zfs_unlinked_add(znode_t *zp, dmu_tx_t *tx) 240{ 241 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 242 243 ASSERT(zp->z_unlinked); 244 ASSERT(zp->z_links == 0); 245 246 VERIFY3U(0, ==, 247 zap_add_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx)); 248} 249 250/* 251 * Clean up any znodes that had no links when we either crashed or 252 * (force) umounted the file system. 253 */ 254void 255zfs_unlinked_drain(zfsvfs_t *zfsvfs) 256{ 257 zap_cursor_t zc; 258 zap_attribute_t zap; 259 dmu_object_info_t doi; 260 znode_t *zp; 261 int error; 262 263 /* 264 * Interate over the contents of the unlinked set. 265 */ 266 for (zap_cursor_init(&zc, zfsvfs->z_os, zfsvfs->z_unlinkedobj); 267 zap_cursor_retrieve(&zc, &zap) == 0; 268 zap_cursor_advance(&zc)) { 269 270 /* 271 * See what kind of object we have in list 272 */ 273 274 error = dmu_object_info(zfsvfs->z_os, 275 zap.za_first_integer, &doi); 276 if (error != 0) 277 continue; 278 279 ASSERT((doi.doi_type == DMU_OT_PLAIN_FILE_CONTENTS) || 280 (doi.doi_type == DMU_OT_DIRECTORY_CONTENTS)); 281 /* 282 * We need to re-mark these list entries for deletion, 283 * so we pull them back into core and set zp->z_unlinked. 284 */ 285 error = zfs_zget(zfsvfs, zap.za_first_integer, &zp); 286 287 /* 288 * We may pick up znodes that are already marked for deletion. 289 * This could happen during the purge of an extended attribute 290 * directory. All we need to do is skip over them, since they 291 * are already in the system marked z_unlinked. 292 */ 293 if (error != 0) 294 continue; 295 296 vn_lock(ZTOV(zp), LK_EXCLUSIVE | LK_RETRY); 297 zp->z_unlinked = B_TRUE; 298 vput(ZTOV(zp)); 299 } 300 zap_cursor_fini(&zc); 301} 302 303/* 304 * Delete the entire contents of a directory. Return a count 305 * of the number of entries that could not be deleted. If we encounter 306 * an error, return a count of at least one so that the directory stays 307 * in the unlinked set. 308 * 309 * NOTE: this function assumes that the directory is inactive, 310 * so there is no need to lock its entries before deletion. 311 * Also, it assumes the directory contents is *only* regular 312 * files. 313 */ 314static int 315zfs_purgedir(znode_t *dzp) 316{ 317 zap_cursor_t zc; 318 zap_attribute_t zap; 319 znode_t *xzp; 320 dmu_tx_t *tx; 321 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 322 int skipped = 0; 323 int error; 324 325 for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id); 326 (error = zap_cursor_retrieve(&zc, &zap)) == 0; 327 zap_cursor_advance(&zc)) { 328 error = zfs_zget(zfsvfs, 329 ZFS_DIRENT_OBJ(zap.za_first_integer), &xzp); 330 if (error) { 331 skipped += 1; 332 continue; 333 } 334 335 vn_lock(ZTOV(xzp), LK_EXCLUSIVE | LK_RETRY); 336 ASSERT((ZTOV(xzp)->v_type == VREG) || 337 (ZTOV(xzp)->v_type == VLNK)); 338 339 tx = dmu_tx_create(zfsvfs->z_os); 340 dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); 341 dmu_tx_hold_zap(tx, dzp->z_id, FALSE, zap.za_name); 342 dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE); 343 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 344 /* Is this really needed ? */ 345 zfs_sa_upgrade_txholds(tx, xzp); 346 dmu_tx_mark_netfree(tx); 347 error = dmu_tx_assign(tx, TXG_WAIT); 348 if (error) { 349 dmu_tx_abort(tx); 350 vput(ZTOV(xzp)); 351 skipped += 1; 352 continue; 353 } 354 355 error = zfs_link_destroy(dzp, zap.za_name, xzp, tx, 0, NULL); 356 if (error) 357 skipped += 1; 358 dmu_tx_commit(tx); 359 360 vput(ZTOV(xzp)); 361 } 362 zap_cursor_fini(&zc); 363 if (error != ENOENT) 364 skipped += 1; 365 return (skipped); 366} 367 368void 369zfs_rmnode(znode_t *zp) 370{ 371 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 372 objset_t *os = zfsvfs->z_os; 373 znode_t *xzp = NULL; 374 dmu_tx_t *tx; 375 uint64_t acl_obj; 376 uint64_t xattr_obj; 377 int error; 378 379 ASSERT(zp->z_links == 0); 380#ifndef __NetBSD__ 381 ASSERT_VOP_ELOCKED(ZTOV(zp), __func__); 382#endif 383 384 /* 385 * If this is an attribute directory, purge its contents. 386 */ 387 if (ZTOV(zp) != NULL && ZTOV(zp)->v_type == VDIR && 388 (zp->z_pflags & ZFS_XATTR)) { 389 if (zfs_purgedir(zp) != 0) { 390 /* 391 * Not enough space to delete some xattrs. 392 * Leave it in the unlinked set. 393 */ 394 zfs_znode_dmu_fini(zp); 395 zfs_znode_free(zp); 396 return; 397 } 398 } else { 399 /* 400 * Free up all the data in the file. We don't do this for 401 * XATTR directories because we need truncate and remove to be 402 * in the same tx, like in zfs_znode_delete(). Otherwise, if 403 * we crash here we'll end up with an inconsistent truncated 404 * zap object in the delete queue. Note a truncated file is 405 * harmless since it only contains user data. 406 */ 407 error = dmu_free_long_range(os, zp->z_id, 0, DMU_OBJECT_END); 408 if (error) { 409 /* 410 * Not enough space. Leave the file in the unlinked 411 * set. 412 */ 413 zfs_znode_dmu_fini(zp); 414 zfs_znode_free(zp); 415 return; 416 } 417 } 418 419 /* 420 * If the file has extended attributes, we're going to unlink 421 * the xattr dir. 422 */ 423 error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 424 &xattr_obj, sizeof (xattr_obj)); 425 if (error == 0 && xattr_obj) { 426 error = zfs_zget(zfsvfs, xattr_obj, &xzp); 427 ASSERT3S(error, ==, 0); 428 vn_lock(ZTOV(xzp), LK_EXCLUSIVE | LK_RETRY); 429 } 430 431 acl_obj = zfs_external_acl(zp); 432 433 /* 434 * Set up the final transaction. 435 */ 436 tx = dmu_tx_create(os); 437 dmu_tx_hold_free(tx, zp->z_id, 0, DMU_OBJECT_END); 438 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 439 if (xzp) { 440 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, TRUE, NULL); 441 dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE); 442 } 443 if (acl_obj) 444 dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END); 445 446 zfs_sa_upgrade_txholds(tx, zp); 447 error = dmu_tx_assign(tx, TXG_WAIT); 448 if (error) { 449 /* 450 * Not enough space to delete the file. Leave it in the 451 * unlinked set, leaking it until the fs is remounted (at 452 * which point we'll call zfs_unlinked_drain() to process it). 453 */ 454 dmu_tx_abort(tx); 455 zfs_znode_dmu_fini(zp); 456 zfs_znode_free(zp); 457 goto out; 458 } 459 460 if (xzp) { 461 ASSERT(error == 0); 462 xzp->z_unlinked = B_TRUE; /* mark xzp for deletion */ 463 xzp->z_links = 0; /* no more links to it */ 464 VERIFY(0 == sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs), 465 &xzp->z_links, sizeof (xzp->z_links), tx)); 466 zfs_unlinked_add(xzp, tx); 467 } 468 469 /* Remove this znode from the unlinked set */ 470 VERIFY3U(0, ==, 471 zap_remove_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx)); 472 473 zfs_znode_delete(zp, tx); 474 475 dmu_tx_commit(tx); 476out: 477 if (xzp) 478 vput(ZTOV(xzp)); 479} 480 481static uint64_t 482zfs_dirent(znode_t *zp, uint64_t mode) 483{ 484 uint64_t de = zp->z_id; 485 486 if (zp->z_zfsvfs->z_version >= ZPL_VERSION_DIRENT_TYPE) 487 de |= IFTODT(mode) << 60; 488 return (de); 489} 490 491/* 492 * Link zp into dzp. Can only fail if zp has been unlinked. 493 */ 494int 495zfs_link_create(znode_t *dzp, const char *name, znode_t *zp, dmu_tx_t *tx, 496 int flag) 497{ 498 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 499 vnode_t *vp = ZTOV(zp); 500 uint64_t value; 501 int zp_is_dir = (vp->v_type == VDIR); 502 sa_bulk_attr_t bulk[5]; 503 uint64_t mtime[2], ctime[2]; 504 int count = 0; 505 int error; 506 507 ASSERT_VOP_ELOCKED(ZTOV(dzp), __func__); 508 ASSERT_VOP_ELOCKED(ZTOV(zp), __func__); 509#if 0 510 if (zp_is_dir) { 511 error = 0; 512 if (dzp->z_links >= LINK_MAX) 513 error = SET_ERROR(EMLINK); 514 return (error); 515 } 516#endif 517 if (!(flag & ZRENAMING)) { 518 if (zp->z_unlinked) { /* no new links to unlinked zp */ 519 ASSERT(!(flag & (ZNEW | ZEXISTS))); 520 return (SET_ERROR(ENOENT)); 521 } 522#if 0 523 if (zp->z_links >= LINK_MAX) { 524 return (SET_ERROR(EMLINK)); 525 } 526#endif 527 zp->z_links++; 528 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL, 529 &zp->z_links, sizeof (zp->z_links)); 530 531 } else { 532 ASSERT(zp->z_unlinked == 0); 533 } 534 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL, 535 &dzp->z_id, sizeof (dzp->z_id)); 536 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 537 &zp->z_pflags, sizeof (zp->z_pflags)); 538 539 if (!(flag & ZNEW)) { 540 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 541 ctime, sizeof (ctime)); 542 zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, 543 ctime, B_TRUE); 544 } 545 error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 546 ASSERT0(error); 547 548 dzp->z_size++; 549 dzp->z_links += zp_is_dir; 550 count = 0; 551 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL, 552 &dzp->z_size, sizeof (dzp->z_size)); 553 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL, 554 &dzp->z_links, sizeof (dzp->z_links)); 555 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 556 mtime, sizeof (mtime)); 557 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 558 ctime, sizeof (ctime)); 559 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 560 &dzp->z_pflags, sizeof (dzp->z_pflags)); 561 zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime, B_TRUE); 562 error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx); 563 ASSERT0(error); 564 565 value = zfs_dirent(zp, zp->z_mode); 566 error = zap_add(zp->z_zfsvfs->z_os, dzp->z_id, name, 567 8, 1, &value, tx); 568 VERIFY0(error); 569 570 return (0); 571} 572 573static int 574zfs_dropname(znode_t *dzp, const char *name, znode_t *zp, dmu_tx_t *tx, 575 int flag) 576{ 577 int error; 578 579 if (zp->z_zfsvfs->z_norm) { 580 if (zp->z_zfsvfs->z_case == ZFS_CASE_MIXED) 581 error = zap_remove_norm(zp->z_zfsvfs->z_os, 582 dzp->z_id, name, MT_EXACT, tx); 583 else 584 error = zap_remove_norm(zp->z_zfsvfs->z_os, 585 dzp->z_id, name, MT_FIRST, tx); 586 } else { 587 error = zap_remove(zp->z_zfsvfs->z_os, 588 dzp->z_id, name, tx); 589 } 590 591 return (error); 592} 593 594/* 595 * Unlink zp from dzp, and mark zp for deletion if this was the last link. 596 * Can fail if zp is a mount point (EBUSY) or a non-empty directory (EEXIST). 597 * If 'unlinkedp' is NULL, we put unlinked znodes on the unlinked list. 598 * If it's non-NULL, we use it to indicate whether the znode needs deletion, 599 * and it's the caller's job to do it. 600 */ 601int 602zfs_link_destroy(znode_t *dzp, const char *name, znode_t *zp, dmu_tx_t *tx, 603 int flag, boolean_t *unlinkedp) 604{ 605 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 606 vnode_t *vp = ZTOV(zp); 607 int zp_is_dir = (vp->v_type == VDIR); 608 boolean_t unlinked = B_FALSE; 609 sa_bulk_attr_t bulk[5]; 610 uint64_t mtime[2], ctime[2]; 611 int count = 0; 612 int error; 613 614 ASSERT_VOP_ELOCKED(ZTOV(dzp), __func__); 615 ASSERT_VOP_ELOCKED(ZTOV(zp), __func__); 616 617 if (!(flag & ZRENAMING)) { 618 619 if (zp_is_dir && !zfs_dirempty(zp)) { 620#ifdef illumos 621 return (SET_ERROR(EEXIST)); 622#else 623 return (SET_ERROR(ENOTEMPTY)); 624#endif 625 } 626 627 /* 628 * If we get here, we are going to try to remove the object. 629 * First try removing the name from the directory; if that 630 * fails, return the error. 631 */ 632 error = zfs_dropname(dzp, name, zp, tx, flag); 633 if (error != 0) { 634 return (error); 635 } 636 637 if (zp->z_links <= zp_is_dir) { 638 zfs_panic_recover("zfs: link count on vnode %p is %u, " 639 "should be at least %u", zp->z_vnode, 640 (int)zp->z_links, 641 zp_is_dir + 1); 642 zp->z_links = zp_is_dir + 1; 643 } 644 if (--zp->z_links == zp_is_dir) { 645 zp->z_unlinked = B_TRUE; 646 zp->z_links = 0; 647 unlinked = B_TRUE; 648 } else { 649 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), 650 NULL, &ctime, sizeof (ctime)); 651 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), 652 NULL, &zp->z_pflags, sizeof (zp->z_pflags)); 653 zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime, 654 B_TRUE); 655 } 656 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), 657 NULL, &zp->z_links, sizeof (zp->z_links)); 658 error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 659 count = 0; 660 ASSERT0(error); 661 } else { 662 ASSERT(zp->z_unlinked == 0); 663 error = zfs_dropname(dzp, name, zp, tx, flag); 664 if (error != 0) 665 return (error); 666 } 667 668 dzp->z_size--; /* one dirent removed */ 669 dzp->z_links -= zp_is_dir; /* ".." link from zp */ 670 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), 671 NULL, &dzp->z_links, sizeof (dzp->z_links)); 672 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), 673 NULL, &dzp->z_size, sizeof (dzp->z_size)); 674 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), 675 NULL, ctime, sizeof (ctime)); 676 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), 677 NULL, mtime, sizeof (mtime)); 678 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), 679 NULL, &dzp->z_pflags, sizeof (dzp->z_pflags)); 680 zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime, B_TRUE); 681 error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx); 682 ASSERT0(error); 683 684 if (unlinkedp != NULL) 685 *unlinkedp = unlinked; 686 else if (unlinked) 687 zfs_unlinked_add(zp, tx); 688 689 return (0); 690} 691 692/* 693 * Indicate whether the directory is empty. 694 */ 695boolean_t 696zfs_dirempty(znode_t *dzp) 697{ 698 return (dzp->z_size == 2); 699} 700 701int 702zfs_make_xattrdir(znode_t *zp, vattr_t *vap, vnode_t **xvpp, cred_t *cr) 703{ 704 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 705 znode_t *xzp; 706 dmu_tx_t *tx; 707 int error; 708 zfs_acl_ids_t acl_ids; 709 boolean_t fuid_dirtied; 710 uint64_t parent; 711 712 *xvpp = NULL; 713 714 /* 715 * In FreeBSD, access checking for creating an EA is being done 716 * in zfs_setextattr(), 717 */ 718#ifndef __FreeBSD_kernel__ 719 if (error = zfs_zaccess(zp, ACE_WRITE_NAMED_ATTRS, 0, B_FALSE, cr)) 720 return (error); 721#endif 722 723 if ((error = zfs_acl_ids_create(zp, IS_XATTR, vap, cr, NULL, 724 &acl_ids)) != 0) 725 return (error); 726 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 727 zfs_acl_ids_free(&acl_ids); 728 return (SET_ERROR(EDQUOT)); 729 } 730 731 getnewvnode_reserve(1); 732 733 tx = dmu_tx_create(zfsvfs->z_os); 734 dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 735 ZFS_SA_BASE_ATTR_SIZE); 736 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 737 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); 738 fuid_dirtied = zfsvfs->z_fuid_dirty; 739 if (fuid_dirtied) 740 zfs_fuid_txhold(zfsvfs, tx); 741 error = dmu_tx_assign(tx, TXG_WAIT); 742 if (error) { 743 zfs_acl_ids_free(&acl_ids); 744 dmu_tx_abort(tx); 745 return (error); 746 } 747 zfs_mknode(zp, vap, tx, cr, IS_XATTR, &xzp, &acl_ids); 748 749 if (fuid_dirtied) 750 zfs_fuid_sync(zfsvfs, tx); 751 752#ifdef DEBUG 753 error = sa_lookup(xzp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 754 &parent, sizeof (parent)); 755 ASSERT(error == 0 && parent == zp->z_id); 756#endif 757 758 VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &xzp->z_id, 759 sizeof (xzp->z_id), tx)); 760 761 (void) zfs_log_create(zfsvfs->z_log, tx, TX_MKXATTR, zp, 762 xzp, "", NULL, acl_ids.z_fuidp, vap); 763 764 zfs_acl_ids_free(&acl_ids); 765 dmu_tx_commit(tx); 766 767 getnewvnode_drop_reserve(); 768 769 *xvpp = ZTOV(xzp); 770 771 return (0); 772} 773 774/* 775 * Return a znode for the extended attribute directory for zp. 776 * ** If the directory does not already exist, it is created ** 777 * 778 * IN: zp - znode to obtain attribute directory from 779 * cr - credentials of caller 780 * flags - flags from the VOP_LOOKUP call 781 * 782 * OUT: xzpp - pointer to extended attribute znode 783 * 784 * RETURN: 0 on success 785 * error number on failure 786 */ 787int 788zfs_get_xattrdir(znode_t *zp, vnode_t **xvpp, cred_t *cr, int flags) 789{ 790 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 791 znode_t *xzp; 792 vattr_t va; 793 int error; 794top: 795 error = zfs_dirent_lookup(zp, "", &xzp, ZXATTR); 796 if (error) 797 return (error); 798 799 if (xzp != NULL) { 800 *xvpp = ZTOV(xzp); 801 return (0); 802 } 803 804 805 if (!(flags & CREATE_XATTR_DIR)) { 806#ifdef illumos 807 return (SET_ERROR(ENOENT)); 808#else 809 return (SET_ERROR(ENOATTR)); 810#endif 811 } 812 813 if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { 814 return (SET_ERROR(EROFS)); 815 } 816 817 /* 818 * The ability to 'create' files in an attribute 819 * directory comes from the write_xattr permission on the base file. 820 * 821 * The ability to 'search' an attribute directory requires 822 * read_xattr permission on the base file. 823 * 824 * Once in a directory the ability to read/write attributes 825 * is controlled by the permissions on the attribute file. 826 */ 827 va.va_mask = AT_TYPE | AT_MODE | AT_UID | AT_GID; 828 va.va_type = VDIR; 829 va.va_mode = S_IFDIR | S_ISVTX | 0777; 830 zfs_fuid_map_ids(zp, cr, &va.va_uid, &va.va_gid); 831 832 error = zfs_make_xattrdir(zp, &va, xvpp, cr); 833 834 if (error == ERESTART) { 835 /* NB: we already did dmu_tx_wait() if necessary */ 836 goto top; 837 } 838 if (error == 0) 839 VOP_UNLOCK(*xvpp, 0); 840 841 return (error); 842} 843 844/* 845 * Decide whether it is okay to remove within a sticky directory. 846 * 847 * In sticky directories, write access is not sufficient; 848 * you can remove entries from a directory only if: 849 * 850 * you own the directory, 851 * you own the entry, 852 * the entry is a plain file and you have write access, 853 * or you are privileged (checked in secpolicy...). 854 * 855 * The function returns 0 if remove access is granted. 856 */ 857int 858zfs_sticky_remove_access(znode_t *zdp, znode_t *zp, cred_t *cr) 859{ 860 uid_t uid; 861 uid_t downer; 862 uid_t fowner; 863 zfsvfs_t *zfsvfs = zdp->z_zfsvfs; 864 865 if (zdp->z_zfsvfs->z_replay) 866 return (0); 867 868 if ((zdp->z_mode & S_ISVTX) == 0) 869 return (0); 870 871 downer = zfs_fuid_map_id(zfsvfs, zdp->z_uid, cr, ZFS_OWNER); 872 fowner = zfs_fuid_map_id(zfsvfs, zp->z_uid, cr, ZFS_OWNER); 873 874 if ((uid = crgetuid(cr)) == downer || uid == fowner || 875 (ZTOV(zp)->v_type == VREG && 876 zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr) == 0)) 877 return (0); 878 else 879 return (secpolicy_vnode_remove(ZTOV(zp), cr)); 880} 881