1168404Spjd/* 2168404Spjd * CDDL HEADER START 3168404Spjd * 4168404Spjd * The contents of this file are subject to the terms of the 5168404Spjd * Common Development and Distribution License (the "License"). 6168404Spjd * You may not use this file except in compliance with the License. 7168404Spjd * 8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9168404Spjd * or http://www.opensolaris.org/os/licensing. 10168404Spjd * See the License for the specific language governing permissions 11168404Spjd * and limitations under the License. 12168404Spjd * 13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each 14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15168404Spjd * If applicable, add the following below this CDDL HEADER, with the 16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18168404Spjd * 19168404Spjd * CDDL HEADER END 20168404Spjd */ 21168404Spjd/* 22219089Spjd * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23229565Smm * Copyright (c) 2011 Pawel Jakub Dawidek <pawel@dawidek.net>. 24229565Smm * All rights reserved. 25249643Smm * Copyright (c) 2013 by Delphix. All rights reserved. 26265754Sdelphij * Copyright (c) 2014 Joyent, Inc. All rights reserved. 27168404Spjd */ 28168404Spjd 29168404Spjd#include <sys/dmu.h> 30185029Spjd#include <sys/dmu_objset.h> 31168404Spjd#include <sys/dmu_tx.h> 32168404Spjd#include <sys/dsl_dataset.h> 33168404Spjd#include <sys/dsl_dir.h> 34168404Spjd#include <sys/dsl_prop.h> 35168404Spjd#include <sys/dsl_synctask.h> 36185029Spjd#include <sys/dsl_deleg.h> 37263391Sdelphij#include <sys/dmu_impl.h> 38168404Spjd#include <sys/spa.h> 39219089Spjd#include <sys/metaslab.h> 40168404Spjd#include <sys/zap.h> 41168404Spjd#include <sys/zio.h> 42168404Spjd#include <sys/arc.h> 43185029Spjd#include <sys/sunddi.h> 44219317Spjd#include <sys/zvol.h> 45229565Smm#ifdef _KERNEL 46229565Smm#include <sys/zfs_vfsops.h> 47229565Smm#endif 48265754Sdelphij#include <sys/zfeature.h> 49265754Sdelphij#include <sys/policy.h> 50265754Sdelphij#include <sys/zfs_znode.h> 51168404Spjd#include "zfs_namecheck.h" 52265754Sdelphij#include "zfs_prop.h" 53168404Spjd 54265754Sdelphij/* 55265754Sdelphij * Filesystem and Snapshot Limits 56265754Sdelphij * ------------------------------ 57265754Sdelphij * 58265754Sdelphij * These limits are used to restrict the number of filesystems and/or snapshots 59265754Sdelphij * that can be created at a given level in the tree or below. A typical 60265754Sdelphij * use-case is with a delegated dataset where the administrator wants to ensure 61265754Sdelphij * that a user within the zone is not creating too many additional filesystems 62265754Sdelphij * or snapshots, even though they're not exceeding their space quota. 63265754Sdelphij * 64265754Sdelphij * The filesystem and snapshot counts are stored as extensible properties. This 65265754Sdelphij * capability is controlled by a feature flag and must be enabled to be used. 66265754Sdelphij * Once enabled, the feature is not active until the first limit is set. At 67265754Sdelphij * that point, future operations to create/destroy filesystems or snapshots 68265754Sdelphij * will validate and update the counts. 69265754Sdelphij * 70265754Sdelphij * Because the count properties will not exist before the feature is active, 71265754Sdelphij * the counts are updated when a limit is first set on an uninitialized 72265754Sdelphij * dsl_dir node in the tree (The filesystem/snapshot count on a node includes 73265754Sdelphij * all of the nested filesystems/snapshots. Thus, a new leaf node has a 74265754Sdelphij * filesystem count of 0 and a snapshot count of 0. Non-existent filesystem and 75265754Sdelphij * snapshot count properties on a node indicate uninitialized counts on that 76265754Sdelphij * node.) When first setting a limit on an uninitialized node, the code starts 77265754Sdelphij * at the filesystem with the new limit and descends into all sub-filesystems 78265754Sdelphij * to add the count properties. 79265754Sdelphij * 80265754Sdelphij * In practice this is lightweight since a limit is typically set when the 81265754Sdelphij * filesystem is created and thus has no children. Once valid, changing the 82265754Sdelphij * limit value won't require a re-traversal since the counts are already valid. 83265754Sdelphij * When recursively fixing the counts, if a node with a limit is encountered 84265754Sdelphij * during the descent, the counts are known to be valid and there is no need to 85265754Sdelphij * descend into that filesystem's children. The counts on filesystems above the 86265754Sdelphij * one with the new limit will still be uninitialized, unless a limit is 87265754Sdelphij * eventually set on one of those filesystems. The counts are always recursively 88265754Sdelphij * updated when a limit is set on a dataset, unless there is already a limit. 89265754Sdelphij * When a new limit value is set on a filesystem with an existing limit, it is 90265754Sdelphij * possible for the new limit to be less than the current count at that level 91265754Sdelphij * since a user who can change the limit is also allowed to exceed the limit. 92265754Sdelphij * 93265754Sdelphij * Once the feature is active, then whenever a filesystem or snapshot is 94265754Sdelphij * created, the code recurses up the tree, validating the new count against the 95265754Sdelphij * limit at each initialized level. In practice, most levels will not have a 96265754Sdelphij * limit set. If there is a limit at any initialized level up the tree, the 97265754Sdelphij * check must pass or the creation will fail. Likewise, when a filesystem or 98265754Sdelphij * snapshot is destroyed, the counts are recursively adjusted all the way up 99265754Sdelphij * the initizized nodes in the tree. Renaming a filesystem into different point 100265754Sdelphij * in the tree will first validate, then update the counts on each branch up to 101265754Sdelphij * the common ancestor. A receive will also validate the counts and then update 102265754Sdelphij * them. 103265754Sdelphij * 104265754Sdelphij * An exception to the above behavior is that the limit is not enforced if the 105265754Sdelphij * user has permission to modify the limit. This is primarily so that 106265754Sdelphij * recursive snapshots in the global zone always work. We want to prevent a 107265754Sdelphij * denial-of-service in which a lower level delegated dataset could max out its 108265754Sdelphij * limit and thus block recursive snapshots from being taken in the global zone. 109265754Sdelphij * Because of this, it is possible for the snapshot count to be over the limit 110265754Sdelphij * and snapshots taken in the global zone could cause a lower level dataset to 111265754Sdelphij * hit or exceed its limit. The administrator taking the global zone recursive 112265754Sdelphij * snapshot should be aware of this side-effect and behave accordingly. 113265754Sdelphij * For consistency, the filesystem limit is also not enforced if the user can 114265754Sdelphij * modify the limit. 115265754Sdelphij * 116265754Sdelphij * The filesystem and snapshot limits are validated by dsl_fs_ss_limit_check() 117265754Sdelphij * and updated by dsl_fs_ss_count_adjust(). A new limit value is setup in 118265754Sdelphij * dsl_dir_activate_fs_ss_limit() and the counts are adjusted, if necessary, by 119265754Sdelphij * dsl_dir_init_fs_ss_count(). 120265754Sdelphij * 121265754Sdelphij * There is a special case when we receive a filesystem that already exists. In 122265754Sdelphij * this case a temporary clone name of %X is created (see dmu_recv_begin). We 123265754Sdelphij * never update the filesystem counts for temporary clones. 124265754Sdelphij * 125265754Sdelphij * Likewise, we do not update the snapshot counts for temporary snapshots, 126265754Sdelphij * such as those created by zfs diff. 127265754Sdelphij */ 128265754Sdelphij 129185029Spjdstatic uint64_t dsl_dir_space_towrite(dsl_dir_t *dd); 130168404Spjd 131168404Spjd/* ARGSUSED */ 132168404Spjdstatic void 133168404Spjddsl_dir_evict(dmu_buf_t *db, void *arg) 134168404Spjd{ 135168404Spjd dsl_dir_t *dd = arg; 136168404Spjd dsl_pool_t *dp = dd->dd_pool; 137168404Spjd int t; 138168404Spjd 139168404Spjd for (t = 0; t < TXG_SIZE; t++) { 140168404Spjd ASSERT(!txg_list_member(&dp->dp_dirty_dirs, dd, t)); 141168404Spjd ASSERT(dd->dd_tempreserved[t] == 0); 142168404Spjd ASSERT(dd->dd_space_towrite[t] == 0); 143168404Spjd } 144168404Spjd 145168404Spjd if (dd->dd_parent) 146249643Smm dsl_dir_rele(dd->dd_parent, dd); 147168404Spjd 148168404Spjd spa_close(dd->dd_pool->dp_spa, dd); 149168404Spjd 150168404Spjd /* 151219089Spjd * The props callback list should have been cleaned up by 152219089Spjd * objset_evict(). 153168404Spjd */ 154168404Spjd list_destroy(&dd->dd_prop_cbs); 155168404Spjd mutex_destroy(&dd->dd_lock); 156168404Spjd kmem_free(dd, sizeof (dsl_dir_t)); 157168404Spjd} 158168404Spjd 159168404Spjdint 160249643Smmdsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj, 161168404Spjd const char *tail, void *tag, dsl_dir_t **ddp) 162168404Spjd{ 163168404Spjd dmu_buf_t *dbuf; 164168404Spjd dsl_dir_t *dd; 165168404Spjd int err; 166168404Spjd 167249643Smm ASSERT(dsl_pool_config_held(dp)); 168168404Spjd 169168404Spjd err = dmu_bonus_hold(dp->dp_meta_objset, ddobj, tag, &dbuf); 170249643Smm if (err != 0) 171168404Spjd return (err); 172168404Spjd dd = dmu_buf_get_user(dbuf); 173168404Spjd#ifdef ZFS_DEBUG 174168404Spjd { 175168404Spjd dmu_object_info_t doi; 176168404Spjd dmu_object_info_from_db(dbuf, &doi); 177263391Sdelphij ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_DSL_DIR); 178185029Spjd ASSERT3U(doi.doi_bonus_size, >=, sizeof (dsl_dir_phys_t)); 179168404Spjd } 180168404Spjd#endif 181168404Spjd if (dd == NULL) { 182168404Spjd dsl_dir_t *winner; 183168404Spjd 184168404Spjd dd = kmem_zalloc(sizeof (dsl_dir_t), KM_SLEEP); 185168404Spjd dd->dd_object = ddobj; 186168404Spjd dd->dd_dbuf = dbuf; 187168404Spjd dd->dd_pool = dp; 188168404Spjd dd->dd_phys = dbuf->db_data; 189168404Spjd mutex_init(&dd->dd_lock, NULL, MUTEX_DEFAULT, NULL); 190168404Spjd 191168404Spjd list_create(&dd->dd_prop_cbs, sizeof (dsl_prop_cb_record_t), 192168404Spjd offsetof(dsl_prop_cb_record_t, cbr_node)); 193168404Spjd 194219089Spjd dsl_dir_snap_cmtime_update(dd); 195219089Spjd 196168404Spjd if (dd->dd_phys->dd_parent_obj) { 197249643Smm err = dsl_dir_hold_obj(dp, dd->dd_phys->dd_parent_obj, 198168404Spjd NULL, dd, &dd->dd_parent); 199249643Smm if (err != 0) 200185029Spjd goto errout; 201168404Spjd if (tail) { 202168404Spjd#ifdef ZFS_DEBUG 203168404Spjd uint64_t foundobj; 204168404Spjd 205168404Spjd err = zap_lookup(dp->dp_meta_objset, 206185029Spjd dd->dd_parent->dd_phys->dd_child_dir_zapobj, 207168404Spjd tail, sizeof (foundobj), 1, &foundobj); 208168404Spjd ASSERT(err || foundobj == ddobj); 209168404Spjd#endif 210168404Spjd (void) strcpy(dd->dd_myname, tail); 211168404Spjd } else { 212168404Spjd err = zap_value_search(dp->dp_meta_objset, 213185029Spjd dd->dd_parent->dd_phys->dd_child_dir_zapobj, 214185029Spjd ddobj, 0, dd->dd_myname); 215168404Spjd } 216249643Smm if (err != 0) 217185029Spjd goto errout; 218168404Spjd } else { 219168404Spjd (void) strcpy(dd->dd_myname, spa_name(dp->dp_spa)); 220168404Spjd } 221168404Spjd 222219089Spjd if (dsl_dir_is_clone(dd)) { 223219089Spjd dmu_buf_t *origin_bonus; 224219089Spjd dsl_dataset_phys_t *origin_phys; 225219089Spjd 226219089Spjd /* 227219089Spjd * We can't open the origin dataset, because 228219089Spjd * that would require opening this dsl_dir. 229219089Spjd * Just look at its phys directly instead. 230219089Spjd */ 231219089Spjd err = dmu_bonus_hold(dp->dp_meta_objset, 232219089Spjd dd->dd_phys->dd_origin_obj, FTAG, &origin_bonus); 233249643Smm if (err != 0) 234219089Spjd goto errout; 235219089Spjd origin_phys = origin_bonus->db_data; 236219089Spjd dd->dd_origin_txg = 237219089Spjd origin_phys->ds_creation_txg; 238219089Spjd dmu_buf_rele(origin_bonus, FTAG); 239219089Spjd } 240219089Spjd 241168404Spjd winner = dmu_buf_set_user_ie(dbuf, dd, &dd->dd_phys, 242168404Spjd dsl_dir_evict); 243168404Spjd if (winner) { 244168404Spjd if (dd->dd_parent) 245249643Smm dsl_dir_rele(dd->dd_parent, dd); 246168404Spjd mutex_destroy(&dd->dd_lock); 247168404Spjd kmem_free(dd, sizeof (dsl_dir_t)); 248168404Spjd dd = winner; 249168404Spjd } else { 250168404Spjd spa_open_ref(dp->dp_spa, dd); 251168404Spjd } 252168404Spjd } 253168404Spjd 254168404Spjd /* 255168404Spjd * The dsl_dir_t has both open-to-close and instantiate-to-evict 256168404Spjd * holds on the spa. We need the open-to-close holds because 257168404Spjd * otherwise the spa_refcnt wouldn't change when we open a 258168404Spjd * dir which the spa also has open, so we could incorrectly 259168404Spjd * think it was OK to unload/export/destroy the pool. We need 260168404Spjd * the instantiate-to-evict hold because the dsl_dir_t has a 261168404Spjd * pointer to the dd_pool, which has a pointer to the spa_t. 262168404Spjd */ 263168404Spjd spa_open_ref(dp->dp_spa, tag); 264168404Spjd ASSERT3P(dd->dd_pool, ==, dp); 265168404Spjd ASSERT3U(dd->dd_object, ==, ddobj); 266168404Spjd ASSERT3P(dd->dd_dbuf, ==, dbuf); 267168404Spjd *ddp = dd; 268168404Spjd return (0); 269185029Spjd 270185029Spjderrout: 271185029Spjd if (dd->dd_parent) 272249643Smm dsl_dir_rele(dd->dd_parent, dd); 273185029Spjd mutex_destroy(&dd->dd_lock); 274185029Spjd kmem_free(dd, sizeof (dsl_dir_t)); 275185029Spjd dmu_buf_rele(dbuf, tag); 276185029Spjd return (err); 277168404Spjd} 278168404Spjd 279168404Spjdvoid 280249643Smmdsl_dir_rele(dsl_dir_t *dd, void *tag) 281168404Spjd{ 282168404Spjd dprintf_dd(dd, "%s\n", ""); 283168404Spjd spa_close(dd->dd_pool->dp_spa, tag); 284168404Spjd dmu_buf_rele(dd->dd_dbuf, tag); 285168404Spjd} 286168404Spjd 287168404Spjd/* buf must be long enough (MAXNAMELEN + strlen(MOS_DIR_NAME) + 1 should do) */ 288168404Spjdvoid 289168404Spjddsl_dir_name(dsl_dir_t *dd, char *buf) 290168404Spjd{ 291168404Spjd if (dd->dd_parent) { 292168404Spjd dsl_dir_name(dd->dd_parent, buf); 293168404Spjd (void) strcat(buf, "/"); 294168404Spjd } else { 295168404Spjd buf[0] = '\0'; 296168404Spjd } 297168404Spjd if (!MUTEX_HELD(&dd->dd_lock)) { 298168404Spjd /* 299168404Spjd * recursive mutex so that we can use 300168404Spjd * dprintf_dd() with dd_lock held 301168404Spjd */ 302168404Spjd mutex_enter(&dd->dd_lock); 303168404Spjd (void) strcat(buf, dd->dd_myname); 304168404Spjd mutex_exit(&dd->dd_lock); 305168404Spjd } else { 306168404Spjd (void) strcat(buf, dd->dd_myname); 307168404Spjd } 308168404Spjd} 309168404Spjd 310243674Smm/* Calculate name length, avoiding all the strcat calls of dsl_dir_name */ 311168404Spjdint 312168498Spjddsl_dir_namelen(dsl_dir_t *dd) 313168498Spjd{ 314168498Spjd int result = 0; 315168498Spjd 316168498Spjd if (dd->dd_parent) { 317168498Spjd /* parent's name + 1 for the "/" */ 318168498Spjd result = dsl_dir_namelen(dd->dd_parent) + 1; 319168498Spjd } 320168498Spjd 321168498Spjd if (!MUTEX_HELD(&dd->dd_lock)) { 322168498Spjd /* see dsl_dir_name */ 323168498Spjd mutex_enter(&dd->dd_lock); 324168498Spjd result += strlen(dd->dd_myname); 325168498Spjd mutex_exit(&dd->dd_lock); 326168498Spjd } else { 327168498Spjd result += strlen(dd->dd_myname); 328168498Spjd } 329168498Spjd 330168498Spjd return (result); 331168498Spjd} 332168498Spjd 333168404Spjdstatic int 334168404Spjdgetcomponent(const char *path, char *component, const char **nextp) 335168404Spjd{ 336168404Spjd char *p; 337249643Smm 338209962Smm if ((path == NULL) || (path[0] == '\0')) 339249643Smm return (SET_ERROR(ENOENT)); 340168404Spjd /* This would be a good place to reserve some namespace... */ 341168404Spjd p = strpbrk(path, "/@"); 342168404Spjd if (p && (p[1] == '/' || p[1] == '@')) { 343168404Spjd /* two separators in a row */ 344249643Smm return (SET_ERROR(EINVAL)); 345168404Spjd } 346168404Spjd if (p == NULL || p == path) { 347168404Spjd /* 348168404Spjd * if the first thing is an @ or /, it had better be an 349168404Spjd * @ and it had better not have any more ats or slashes, 350168404Spjd * and it had better have something after the @. 351168404Spjd */ 352168404Spjd if (p != NULL && 353168404Spjd (p[0] != '@' || strpbrk(path+1, "/@") || p[1] == '\0')) 354249643Smm return (SET_ERROR(EINVAL)); 355168404Spjd if (strlen(path) >= MAXNAMELEN) 356249643Smm return (SET_ERROR(ENAMETOOLONG)); 357168404Spjd (void) strcpy(component, path); 358168404Spjd p = NULL; 359168404Spjd } else if (p[0] == '/') { 360249643Smm if (p - path >= MAXNAMELEN) 361249643Smm return (SET_ERROR(ENAMETOOLONG)); 362168404Spjd (void) strncpy(component, path, p - path); 363249643Smm component[p - path] = '\0'; 364168404Spjd p++; 365168404Spjd } else if (p[0] == '@') { 366168404Spjd /* 367168404Spjd * if the next separator is an @, there better not be 368168404Spjd * any more slashes. 369168404Spjd */ 370168404Spjd if (strchr(path, '/')) 371249643Smm return (SET_ERROR(EINVAL)); 372249643Smm if (p - path >= MAXNAMELEN) 373249643Smm return (SET_ERROR(ENAMETOOLONG)); 374168404Spjd (void) strncpy(component, path, p - path); 375249643Smm component[p - path] = '\0'; 376168404Spjd } else { 377249643Smm panic("invalid p=%p", (void *)p); 378168404Spjd } 379168404Spjd *nextp = p; 380168404Spjd return (0); 381168404Spjd} 382168404Spjd 383168404Spjd/* 384249643Smm * Return the dsl_dir_t, and possibly the last component which couldn't 385249643Smm * be found in *tail. The name must be in the specified dsl_pool_t. This 386249643Smm * thread must hold the dp_config_rwlock for the pool. Returns NULL if the 387249643Smm * path is bogus, or if tail==NULL and we couldn't parse the whole name. 388249643Smm * (*tail)[0] == '@' means that the last component is a snapshot. 389168404Spjd */ 390168404Spjdint 391249643Smmdsl_dir_hold(dsl_pool_t *dp, const char *name, void *tag, 392168404Spjd dsl_dir_t **ddp, const char **tailp) 393168404Spjd{ 394168404Spjd char buf[MAXNAMELEN]; 395249643Smm const char *spaname, *next, *nextnext = NULL; 396168404Spjd int err; 397168404Spjd dsl_dir_t *dd; 398168404Spjd uint64_t ddobj; 399168404Spjd 400168404Spjd err = getcomponent(name, buf, &next); 401249643Smm if (err != 0) 402168404Spjd return (err); 403168404Spjd 404249643Smm /* Make sure the name is in the specified pool. */ 405249643Smm spaname = spa_name(dp->dp_spa); 406249643Smm if (strcmp(buf, spaname) != 0) 407249643Smm return (SET_ERROR(EINVAL)); 408168404Spjd 409249643Smm ASSERT(dsl_pool_config_held(dp)); 410168404Spjd 411249643Smm err = dsl_dir_hold_obj(dp, dp->dp_root_dir_obj, NULL, tag, &dd); 412249643Smm if (err != 0) { 413168404Spjd return (err); 414168404Spjd } 415168404Spjd 416168404Spjd while (next != NULL) { 417168404Spjd dsl_dir_t *child_ds; 418168404Spjd err = getcomponent(next, buf, &nextnext); 419249643Smm if (err != 0) 420168404Spjd break; 421168404Spjd ASSERT(next[0] != '\0'); 422168404Spjd if (next[0] == '@') 423168404Spjd break; 424168404Spjd dprintf("looking up %s in obj%lld\n", 425168404Spjd buf, dd->dd_phys->dd_child_dir_zapobj); 426168404Spjd 427168404Spjd err = zap_lookup(dp->dp_meta_objset, 428168404Spjd dd->dd_phys->dd_child_dir_zapobj, 429168404Spjd buf, sizeof (ddobj), 1, &ddobj); 430249643Smm if (err != 0) { 431168404Spjd if (err == ENOENT) 432168404Spjd err = 0; 433168404Spjd break; 434168404Spjd } 435168404Spjd 436249643Smm err = dsl_dir_hold_obj(dp, ddobj, buf, tag, &child_ds); 437249643Smm if (err != 0) 438168404Spjd break; 439249643Smm dsl_dir_rele(dd, tag); 440168404Spjd dd = child_ds; 441168404Spjd next = nextnext; 442168404Spjd } 443168404Spjd 444249643Smm if (err != 0) { 445249643Smm dsl_dir_rele(dd, tag); 446168404Spjd return (err); 447168404Spjd } 448168404Spjd 449168404Spjd /* 450168404Spjd * It's an error if there's more than one component left, or 451168404Spjd * tailp==NULL and there's any component left. 452168404Spjd */ 453168404Spjd if (next != NULL && 454168404Spjd (tailp == NULL || (nextnext && nextnext[0] != '\0'))) { 455168404Spjd /* bad path name */ 456249643Smm dsl_dir_rele(dd, tag); 457168404Spjd dprintf("next=%p (%s) tail=%p\n", next, next?next:"", tailp); 458249643Smm err = SET_ERROR(ENOENT); 459168404Spjd } 460249643Smm if (tailp != NULL) 461168404Spjd *tailp = next; 462168404Spjd *ddp = dd; 463168404Spjd return (err); 464168404Spjd} 465168404Spjd 466265754Sdelphij/* 467265754Sdelphij * If the counts are already initialized for this filesystem and its 468265754Sdelphij * descendants then do nothing, otherwise initialize the counts. 469265754Sdelphij * 470265754Sdelphij * The counts on this filesystem, and those below, may be uninitialized due to 471265754Sdelphij * either the use of a pre-existing pool which did not support the 472265754Sdelphij * filesystem/snapshot limit feature, or one in which the feature had not yet 473265754Sdelphij * been enabled. 474265754Sdelphij * 475265754Sdelphij * Recursively descend the filesystem tree and update the filesystem/snapshot 476265754Sdelphij * counts on each filesystem below, then update the cumulative count on the 477265754Sdelphij * current filesystem. If the filesystem already has a count set on it, 478265754Sdelphij * then we know that its counts, and the counts on the filesystems below it, 479265754Sdelphij * are already correct, so we don't have to update this filesystem. 480265754Sdelphij */ 481265754Sdelphijstatic void 482265754Sdelphijdsl_dir_init_fs_ss_count(dsl_dir_t *dd, dmu_tx_t *tx) 483265754Sdelphij{ 484265754Sdelphij uint64_t my_fs_cnt = 0; 485265754Sdelphij uint64_t my_ss_cnt = 0; 486265754Sdelphij dsl_pool_t *dp = dd->dd_pool; 487265754Sdelphij objset_t *os = dp->dp_meta_objset; 488265754Sdelphij zap_cursor_t *zc; 489265754Sdelphij zap_attribute_t *za; 490265754Sdelphij dsl_dataset_t *ds; 491265754Sdelphij 492267139Sdelphij ASSERT(spa_feature_is_active(dp->dp_spa, SPA_FEATURE_FS_SS_LIMIT)); 493265754Sdelphij ASSERT(dsl_pool_config_held(dp)); 494265754Sdelphij ASSERT(dmu_tx_is_syncing(tx)); 495265754Sdelphij 496265754Sdelphij dsl_dir_zapify(dd, tx); 497265754Sdelphij 498265754Sdelphij /* 499265754Sdelphij * If the filesystem count has already been initialized then we 500265754Sdelphij * don't need to recurse down any further. 501265754Sdelphij */ 502265754Sdelphij if (zap_contains(os, dd->dd_object, DD_FIELD_FILESYSTEM_COUNT) == 0) 503265754Sdelphij return; 504265754Sdelphij 505265754Sdelphij zc = kmem_alloc(sizeof (zap_cursor_t), KM_SLEEP); 506265754Sdelphij za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); 507265754Sdelphij 508265754Sdelphij /* Iterate my child dirs */ 509265754Sdelphij for (zap_cursor_init(zc, os, dd->dd_phys->dd_child_dir_zapobj); 510265754Sdelphij zap_cursor_retrieve(zc, za) == 0; zap_cursor_advance(zc)) { 511265754Sdelphij dsl_dir_t *chld_dd; 512265754Sdelphij uint64_t count; 513265754Sdelphij 514265754Sdelphij VERIFY0(dsl_dir_hold_obj(dp, za->za_first_integer, NULL, FTAG, 515265754Sdelphij &chld_dd)); 516265754Sdelphij 517265754Sdelphij /* 518265754Sdelphij * Ignore hidden ($FREE, $MOS & $ORIGIN) objsets and 519265754Sdelphij * temporary datasets. 520265754Sdelphij */ 521265754Sdelphij if (chld_dd->dd_myname[0] == '$' || 522265754Sdelphij chld_dd->dd_myname[0] == '%') { 523265754Sdelphij dsl_dir_rele(chld_dd, FTAG); 524265754Sdelphij continue; 525265754Sdelphij } 526265754Sdelphij 527265754Sdelphij my_fs_cnt++; /* count this child */ 528265754Sdelphij 529265754Sdelphij dsl_dir_init_fs_ss_count(chld_dd, tx); 530265754Sdelphij 531265754Sdelphij VERIFY0(zap_lookup(os, chld_dd->dd_object, 532265754Sdelphij DD_FIELD_FILESYSTEM_COUNT, sizeof (count), 1, &count)); 533265754Sdelphij my_fs_cnt += count; 534265754Sdelphij VERIFY0(zap_lookup(os, chld_dd->dd_object, 535265754Sdelphij DD_FIELD_SNAPSHOT_COUNT, sizeof (count), 1, &count)); 536265754Sdelphij my_ss_cnt += count; 537265754Sdelphij 538265754Sdelphij dsl_dir_rele(chld_dd, FTAG); 539265754Sdelphij } 540265754Sdelphij zap_cursor_fini(zc); 541265754Sdelphij /* Count my snapshots (we counted children's snapshots above) */ 542265754Sdelphij VERIFY0(dsl_dataset_hold_obj(dd->dd_pool, 543265754Sdelphij dd->dd_phys->dd_head_dataset_obj, FTAG, &ds)); 544265754Sdelphij 545265754Sdelphij for (zap_cursor_init(zc, os, ds->ds_phys->ds_snapnames_zapobj); 546265754Sdelphij zap_cursor_retrieve(zc, za) == 0; 547265754Sdelphij zap_cursor_advance(zc)) { 548265754Sdelphij /* Don't count temporary snapshots */ 549265754Sdelphij if (za->za_name[0] != '%') 550265754Sdelphij my_ss_cnt++; 551265754Sdelphij } 552267139Sdelphij zap_cursor_fini(zc); 553265754Sdelphij 554265754Sdelphij dsl_dataset_rele(ds, FTAG); 555265754Sdelphij 556265754Sdelphij kmem_free(zc, sizeof (zap_cursor_t)); 557265754Sdelphij kmem_free(za, sizeof (zap_attribute_t)); 558265754Sdelphij 559265754Sdelphij /* we're in a sync task, update counts */ 560265754Sdelphij dmu_buf_will_dirty(dd->dd_dbuf, tx); 561265754Sdelphij VERIFY0(zap_add(os, dd->dd_object, DD_FIELD_FILESYSTEM_COUNT, 562265754Sdelphij sizeof (my_fs_cnt), 1, &my_fs_cnt, tx)); 563265754Sdelphij VERIFY0(zap_add(os, dd->dd_object, DD_FIELD_SNAPSHOT_COUNT, 564265754Sdelphij sizeof (my_ss_cnt), 1, &my_ss_cnt, tx)); 565265754Sdelphij} 566265754Sdelphij 567265754Sdelphijstatic int 568265754Sdelphijdsl_dir_actv_fs_ss_limit_check(void *arg, dmu_tx_t *tx) 569265754Sdelphij{ 570265754Sdelphij char *ddname = (char *)arg; 571265754Sdelphij dsl_pool_t *dp = dmu_tx_pool(tx); 572265754Sdelphij dsl_dataset_t *ds; 573265754Sdelphij dsl_dir_t *dd; 574265754Sdelphij int error; 575265754Sdelphij 576265754Sdelphij error = dsl_dataset_hold(dp, ddname, FTAG, &ds); 577265754Sdelphij if (error != 0) 578265754Sdelphij return (error); 579265754Sdelphij 580265754Sdelphij if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_FS_SS_LIMIT)) { 581265754Sdelphij dsl_dataset_rele(ds, FTAG); 582265754Sdelphij return (SET_ERROR(ENOTSUP)); 583265754Sdelphij } 584265754Sdelphij 585265754Sdelphij dd = ds->ds_dir; 586265754Sdelphij if (spa_feature_is_active(dp->dp_spa, SPA_FEATURE_FS_SS_LIMIT) && 587265754Sdelphij dsl_dir_is_zapified(dd) && 588265754Sdelphij zap_contains(dp->dp_meta_objset, dd->dd_object, 589265754Sdelphij DD_FIELD_FILESYSTEM_COUNT) == 0) { 590265754Sdelphij dsl_dataset_rele(ds, FTAG); 591265754Sdelphij return (SET_ERROR(EALREADY)); 592265754Sdelphij } 593265754Sdelphij 594265754Sdelphij dsl_dataset_rele(ds, FTAG); 595265754Sdelphij return (0); 596265754Sdelphij} 597265754Sdelphij 598265754Sdelphijstatic void 599265754Sdelphijdsl_dir_actv_fs_ss_limit_sync(void *arg, dmu_tx_t *tx) 600265754Sdelphij{ 601265754Sdelphij char *ddname = (char *)arg; 602265754Sdelphij dsl_pool_t *dp = dmu_tx_pool(tx); 603265754Sdelphij dsl_dataset_t *ds; 604265754Sdelphij spa_t *spa; 605265754Sdelphij 606265754Sdelphij VERIFY0(dsl_dataset_hold(dp, ddname, FTAG, &ds)); 607265754Sdelphij 608265754Sdelphij spa = dsl_dataset_get_spa(ds); 609265754Sdelphij 610265754Sdelphij if (!spa_feature_is_active(spa, SPA_FEATURE_FS_SS_LIMIT)) { 611265754Sdelphij /* 612265754Sdelphij * Since the feature was not active and we're now setting a 613265754Sdelphij * limit, increment the feature-active counter so that the 614265754Sdelphij * feature becomes active for the first time. 615265754Sdelphij * 616265754Sdelphij * We are already in a sync task so we can update the MOS. 617265754Sdelphij */ 618265754Sdelphij spa_feature_incr(spa, SPA_FEATURE_FS_SS_LIMIT, tx); 619265754Sdelphij } 620265754Sdelphij 621265754Sdelphij /* 622265754Sdelphij * Since we are now setting a non-UINT64_MAX limit on the filesystem, 623265754Sdelphij * we need to ensure the counts are correct. Descend down the tree from 624265754Sdelphij * this point and update all of the counts to be accurate. 625265754Sdelphij */ 626265754Sdelphij dsl_dir_init_fs_ss_count(ds->ds_dir, tx); 627265754Sdelphij 628265754Sdelphij dsl_dataset_rele(ds, FTAG); 629265754Sdelphij} 630265754Sdelphij 631265754Sdelphij/* 632265754Sdelphij * Make sure the feature is enabled and activate it if necessary. 633265754Sdelphij * Since we're setting a limit, ensure the on-disk counts are valid. 634265754Sdelphij * This is only called by the ioctl path when setting a limit value. 635265754Sdelphij * 636265754Sdelphij * We do not need to validate the new limit, since users who can change the 637265754Sdelphij * limit are also allowed to exceed the limit. 638265754Sdelphij */ 639265754Sdelphijint 640265754Sdelphijdsl_dir_activate_fs_ss_limit(const char *ddname) 641265754Sdelphij{ 642265754Sdelphij int error; 643265754Sdelphij 644265754Sdelphij error = dsl_sync_task(ddname, dsl_dir_actv_fs_ss_limit_check, 645265754Sdelphij dsl_dir_actv_fs_ss_limit_sync, (void *)ddname, 0); 646265754Sdelphij 647265754Sdelphij if (error == EALREADY) 648265754Sdelphij error = 0; 649265754Sdelphij 650265754Sdelphij return (error); 651265754Sdelphij} 652265754Sdelphij 653265754Sdelphij/* 654265754Sdelphij * Used to determine if the filesystem_limit or snapshot_limit should be 655265754Sdelphij * enforced. We allow the limit to be exceeded if the user has permission to 656265754Sdelphij * write the property value. We pass in the creds that we got in the open 657265754Sdelphij * context since we will always be the GZ root in syncing context. We also have 658265754Sdelphij * to handle the case where we are allowed to change the limit on the current 659265754Sdelphij * dataset, but there may be another limit in the tree above. 660265754Sdelphij * 661265754Sdelphij * We can never modify these two properties within a non-global zone. In 662265754Sdelphij * addition, the other checks are modeled on zfs_secpolicy_write_perms. We 663265754Sdelphij * can't use that function since we are already holding the dp_config_rwlock. 664265754Sdelphij * In addition, we already have the dd and dealing with snapshots is simplified 665265754Sdelphij * in this code. 666265754Sdelphij */ 667265754Sdelphij 668265754Sdelphijtypedef enum { 669265754Sdelphij ENFORCE_ALWAYS, 670265754Sdelphij ENFORCE_NEVER, 671265754Sdelphij ENFORCE_ABOVE 672265754Sdelphij} enforce_res_t; 673265754Sdelphij 674265754Sdelphijstatic enforce_res_t 675265754Sdelphijdsl_enforce_ds_ss_limits(dsl_dir_t *dd, zfs_prop_t prop, cred_t *cr) 676265754Sdelphij{ 677265754Sdelphij enforce_res_t enforce = ENFORCE_ALWAYS; 678265754Sdelphij uint64_t obj; 679265754Sdelphij dsl_dataset_t *ds; 680265754Sdelphij uint64_t zoned; 681265754Sdelphij 682265754Sdelphij ASSERT(prop == ZFS_PROP_FILESYSTEM_LIMIT || 683265754Sdelphij prop == ZFS_PROP_SNAPSHOT_LIMIT); 684265754Sdelphij 685265754Sdelphij#ifdef _KERNEL 686265754Sdelphij#ifdef __FreeBSD__ 687265754Sdelphij if (jailed(cr)) 688265754Sdelphij#else 689265754Sdelphij if (crgetzoneid(cr) != GLOBAL_ZONEID) 690265754Sdelphij#endif 691265754Sdelphij return (ENFORCE_ALWAYS); 692265754Sdelphij 693265754Sdelphij if (secpolicy_zfs(cr) == 0) 694265754Sdelphij return (ENFORCE_NEVER); 695265754Sdelphij#endif 696265754Sdelphij 697265754Sdelphij if ((obj = dd->dd_phys->dd_head_dataset_obj) == 0) 698265754Sdelphij return (ENFORCE_ALWAYS); 699265754Sdelphij 700265754Sdelphij ASSERT(dsl_pool_config_held(dd->dd_pool)); 701265754Sdelphij 702265754Sdelphij if (dsl_dataset_hold_obj(dd->dd_pool, obj, FTAG, &ds) != 0) 703265754Sdelphij return (ENFORCE_ALWAYS); 704265754Sdelphij 705265754Sdelphij if (dsl_prop_get_ds(ds, "zoned", 8, 1, &zoned, NULL) || zoned) { 706265754Sdelphij /* Only root can access zoned fs's from the GZ */ 707265754Sdelphij enforce = ENFORCE_ALWAYS; 708265754Sdelphij } else { 709265754Sdelphij if (dsl_deleg_access_impl(ds, zfs_prop_to_name(prop), cr) == 0) 710265754Sdelphij enforce = ENFORCE_ABOVE; 711265754Sdelphij } 712265754Sdelphij 713265754Sdelphij dsl_dataset_rele(ds, FTAG); 714265754Sdelphij return (enforce); 715265754Sdelphij} 716265754Sdelphij 717265754Sdelphij/* 718265754Sdelphij * Check if adding additional child filesystem(s) would exceed any filesystem 719265754Sdelphij * limits or adding additional snapshot(s) would exceed any snapshot limits. 720265754Sdelphij * The prop argument indicates which limit to check. 721265754Sdelphij * 722265754Sdelphij * Note that all filesystem limits up to the root (or the highest 723265754Sdelphij * initialized) filesystem or the given ancestor must be satisfied. 724265754Sdelphij */ 725265754Sdelphijint 726265754Sdelphijdsl_fs_ss_limit_check(dsl_dir_t *dd, uint64_t delta, zfs_prop_t prop, 727265754Sdelphij dsl_dir_t *ancestor, cred_t *cr) 728265754Sdelphij{ 729265754Sdelphij objset_t *os = dd->dd_pool->dp_meta_objset; 730265754Sdelphij uint64_t limit, count; 731265754Sdelphij char *count_prop; 732265754Sdelphij enforce_res_t enforce; 733265754Sdelphij int err = 0; 734265754Sdelphij 735265754Sdelphij ASSERT(dsl_pool_config_held(dd->dd_pool)); 736265754Sdelphij ASSERT(prop == ZFS_PROP_FILESYSTEM_LIMIT || 737265754Sdelphij prop == ZFS_PROP_SNAPSHOT_LIMIT); 738265754Sdelphij 739265754Sdelphij /* 740265754Sdelphij * If we're allowed to change the limit, don't enforce the limit 741265754Sdelphij * e.g. this can happen if a snapshot is taken by an administrative 742265754Sdelphij * user in the global zone (i.e. a recursive snapshot by root). 743265754Sdelphij * However, we must handle the case of delegated permissions where we 744265754Sdelphij * are allowed to change the limit on the current dataset, but there 745265754Sdelphij * is another limit in the tree above. 746265754Sdelphij */ 747265754Sdelphij enforce = dsl_enforce_ds_ss_limits(dd, prop, cr); 748265754Sdelphij if (enforce == ENFORCE_NEVER) 749265754Sdelphij return (0); 750265754Sdelphij 751265754Sdelphij /* 752265754Sdelphij * e.g. if renaming a dataset with no snapshots, count adjustment 753265754Sdelphij * is 0. 754265754Sdelphij */ 755265754Sdelphij if (delta == 0) 756265754Sdelphij return (0); 757265754Sdelphij 758265754Sdelphij if (prop == ZFS_PROP_SNAPSHOT_LIMIT) { 759265754Sdelphij /* 760265754Sdelphij * We don't enforce the limit for temporary snapshots. This is 761265754Sdelphij * indicated by a NULL cred_t argument. 762265754Sdelphij */ 763265754Sdelphij if (cr == NULL) 764265754Sdelphij return (0); 765265754Sdelphij 766265754Sdelphij count_prop = DD_FIELD_SNAPSHOT_COUNT; 767265754Sdelphij } else { 768265754Sdelphij count_prop = DD_FIELD_FILESYSTEM_COUNT; 769265754Sdelphij } 770265754Sdelphij 771265754Sdelphij /* 772265754Sdelphij * If an ancestor has been provided, stop checking the limit once we 773265754Sdelphij * hit that dir. We need this during rename so that we don't overcount 774265754Sdelphij * the check once we recurse up to the common ancestor. 775265754Sdelphij */ 776265754Sdelphij if (ancestor == dd) 777265754Sdelphij return (0); 778265754Sdelphij 779265754Sdelphij /* 780265754Sdelphij * If we hit an uninitialized node while recursing up the tree, we can 781265754Sdelphij * stop since we know there is no limit here (or above). The counts are 782265754Sdelphij * not valid on this node and we know we won't touch this node's counts. 783265754Sdelphij */ 784265754Sdelphij if (!dsl_dir_is_zapified(dd) || zap_lookup(os, dd->dd_object, 785265754Sdelphij count_prop, sizeof (count), 1, &count) == ENOENT) 786265754Sdelphij return (0); 787265754Sdelphij 788265754Sdelphij err = dsl_prop_get_dd(dd, zfs_prop_to_name(prop), 8, 1, &limit, NULL, 789265754Sdelphij B_FALSE); 790265754Sdelphij if (err != 0) 791265754Sdelphij return (err); 792265754Sdelphij 793265754Sdelphij /* Is there a limit which we've hit? */ 794265754Sdelphij if (enforce == ENFORCE_ALWAYS && (count + delta) > limit) 795265754Sdelphij return (SET_ERROR(EDQUOT)); 796265754Sdelphij 797265754Sdelphij if (dd->dd_parent != NULL) 798265754Sdelphij err = dsl_fs_ss_limit_check(dd->dd_parent, delta, prop, 799265754Sdelphij ancestor, cr); 800265754Sdelphij 801265754Sdelphij return (err); 802265754Sdelphij} 803265754Sdelphij 804265754Sdelphij/* 805265754Sdelphij * Adjust the filesystem or snapshot count for the specified dsl_dir_t and all 806265754Sdelphij * parents. When a new filesystem/snapshot is created, increment the count on 807265754Sdelphij * all parents, and when a filesystem/snapshot is destroyed, decrement the 808265754Sdelphij * count. 809265754Sdelphij */ 810265754Sdelphijvoid 811265754Sdelphijdsl_fs_ss_count_adjust(dsl_dir_t *dd, int64_t delta, const char *prop, 812265754Sdelphij dmu_tx_t *tx) 813265754Sdelphij{ 814265754Sdelphij int err; 815265754Sdelphij objset_t *os = dd->dd_pool->dp_meta_objset; 816265754Sdelphij uint64_t count; 817265754Sdelphij 818265754Sdelphij ASSERT(dsl_pool_config_held(dd->dd_pool)); 819265754Sdelphij ASSERT(dmu_tx_is_syncing(tx)); 820265754Sdelphij ASSERT(strcmp(prop, DD_FIELD_FILESYSTEM_COUNT) == 0 || 821265754Sdelphij strcmp(prop, DD_FIELD_SNAPSHOT_COUNT) == 0); 822265754Sdelphij 823265754Sdelphij /* 824265754Sdelphij * When we receive an incremental stream into a filesystem that already 825265754Sdelphij * exists, a temporary clone is created. We don't count this temporary 826265754Sdelphij * clone, whose name begins with a '%'. We also ignore hidden ($FREE, 827265754Sdelphij * $MOS & $ORIGIN) objsets. 828265754Sdelphij */ 829265754Sdelphij if ((dd->dd_myname[0] == '%' || dd->dd_myname[0] == '$') && 830265754Sdelphij strcmp(prop, DD_FIELD_FILESYSTEM_COUNT) == 0) 831265754Sdelphij return; 832265754Sdelphij 833265754Sdelphij /* 834265754Sdelphij * e.g. if renaming a dataset with no snapshots, count adjustment is 0 835265754Sdelphij */ 836265754Sdelphij if (delta == 0) 837265754Sdelphij return; 838265754Sdelphij 839265754Sdelphij /* 840265754Sdelphij * If we hit an uninitialized node while recursing up the tree, we can 841265754Sdelphij * stop since we know the counts are not valid on this node and we 842265754Sdelphij * know we shouldn't touch this node's counts. An uninitialized count 843265754Sdelphij * on the node indicates that either the feature has not yet been 844265754Sdelphij * activated or there are no limits on this part of the tree. 845265754Sdelphij */ 846265754Sdelphij if (!dsl_dir_is_zapified(dd) || (err = zap_lookup(os, dd->dd_object, 847265754Sdelphij prop, sizeof (count), 1, &count)) == ENOENT) 848265754Sdelphij return; 849265754Sdelphij VERIFY0(err); 850265754Sdelphij 851265754Sdelphij count += delta; 852265754Sdelphij /* Use a signed verify to make sure we're not neg. */ 853265754Sdelphij VERIFY3S(count, >=, 0); 854265754Sdelphij 855265754Sdelphij VERIFY0(zap_update(os, dd->dd_object, prop, sizeof (count), 1, &count, 856265754Sdelphij tx)); 857265754Sdelphij 858265754Sdelphij /* Roll up this additional count into our ancestors */ 859265754Sdelphij if (dd->dd_parent != NULL) 860265754Sdelphij dsl_fs_ss_count_adjust(dd->dd_parent, delta, prop, tx); 861265754Sdelphij} 862265754Sdelphij 863168404Spjduint64_t 864185029Spjddsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds, const char *name, 865185029Spjd dmu_tx_t *tx) 866168404Spjd{ 867185029Spjd objset_t *mos = dp->dp_meta_objset; 868168404Spjd uint64_t ddobj; 869219089Spjd dsl_dir_phys_t *ddphys; 870168404Spjd dmu_buf_t *dbuf; 871168404Spjd 872168404Spjd ddobj = dmu_object_alloc(mos, DMU_OT_DSL_DIR, 0, 873168404Spjd DMU_OT_DSL_DIR, sizeof (dsl_dir_phys_t), tx); 874185029Spjd if (pds) { 875185029Spjd VERIFY(0 == zap_add(mos, pds->dd_phys->dd_child_dir_zapobj, 876185029Spjd name, sizeof (uint64_t), 1, &ddobj, tx)); 877185029Spjd } else { 878185029Spjd /* it's the root dir */ 879185029Spjd VERIFY(0 == zap_add(mos, DMU_POOL_DIRECTORY_OBJECT, 880185029Spjd DMU_POOL_ROOT_DATASET, sizeof (uint64_t), 1, &ddobj, tx)); 881185029Spjd } 882168404Spjd VERIFY(0 == dmu_bonus_hold(mos, ddobj, FTAG, &dbuf)); 883168404Spjd dmu_buf_will_dirty(dbuf, tx); 884219089Spjd ddphys = dbuf->db_data; 885168404Spjd 886219089Spjd ddphys->dd_creation_time = gethrestime_sec(); 887265754Sdelphij if (pds) { 888219089Spjd ddphys->dd_parent_obj = pds->dd_object; 889265754Sdelphij 890265754Sdelphij /* update the filesystem counts */ 891265754Sdelphij dsl_fs_ss_count_adjust(pds, 1, DD_FIELD_FILESYSTEM_COUNT, tx); 892265754Sdelphij } 893219089Spjd ddphys->dd_props_zapobj = zap_create(mos, 894168404Spjd DMU_OT_DSL_PROPS, DMU_OT_NONE, 0, tx); 895219089Spjd ddphys->dd_child_dir_zapobj = zap_create(mos, 896168404Spjd DMU_OT_DSL_DIR_CHILD_MAP, DMU_OT_NONE, 0, tx); 897185029Spjd if (spa_version(dp->dp_spa) >= SPA_VERSION_USED_BREAKDOWN) 898219089Spjd ddphys->dd_flags |= DD_FLAG_USED_BREAKDOWN; 899168404Spjd dmu_buf_rele(dbuf, FTAG); 900168404Spjd 901168404Spjd return (ddobj); 902168404Spjd} 903168404Spjd 904185029Spjdboolean_t 905185029Spjddsl_dir_is_clone(dsl_dir_t *dd) 906168404Spjd{ 907185029Spjd return (dd->dd_phys->dd_origin_obj && 908185029Spjd (dd->dd_pool->dp_origin_snap == NULL || 909185029Spjd dd->dd_phys->dd_origin_obj != 910185029Spjd dd->dd_pool->dp_origin_snap->ds_object)); 911168404Spjd} 912168404Spjd 913168404Spjdvoid 914168404Spjddsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv) 915168404Spjd{ 916168404Spjd mutex_enter(&dd->dd_lock); 917185029Spjd dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, 918185029Spjd dd->dd_phys->dd_used_bytes); 919185029Spjd dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_QUOTA, dd->dd_phys->dd_quota); 920168404Spjd dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_RESERVATION, 921168404Spjd dd->dd_phys->dd_reserved); 922168404Spjd dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, 923168404Spjd dd->dd_phys->dd_compressed_bytes == 0 ? 100 : 924168404Spjd (dd->dd_phys->dd_uncompressed_bytes * 100 / 925168404Spjd dd->dd_phys->dd_compressed_bytes)); 926248369Smm dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_LOGICALUSED, 927248369Smm dd->dd_phys->dd_uncompressed_bytes); 928185029Spjd if (dd->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) { 929185029Spjd dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDSNAP, 930185029Spjd dd->dd_phys->dd_used_breakdown[DD_USED_SNAP]); 931185029Spjd dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDDS, 932185029Spjd dd->dd_phys->dd_used_breakdown[DD_USED_HEAD]); 933185029Spjd dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDREFRESERV, 934185029Spjd dd->dd_phys->dd_used_breakdown[DD_USED_REFRSRV]); 935185029Spjd dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDCHILD, 936185029Spjd dd->dd_phys->dd_used_breakdown[DD_USED_CHILD] + 937185029Spjd dd->dd_phys->dd_used_breakdown[DD_USED_CHILD_RSRV]); 938185029Spjd } 939168404Spjd mutex_exit(&dd->dd_lock); 940168404Spjd 941265754Sdelphij if (dsl_dir_is_zapified(dd)) { 942265754Sdelphij uint64_t count; 943265754Sdelphij objset_t *os = dd->dd_pool->dp_meta_objset; 944265754Sdelphij 945265754Sdelphij if (zap_lookup(os, dd->dd_object, DD_FIELD_FILESYSTEM_COUNT, 946265754Sdelphij sizeof (count), 1, &count) == 0) { 947265754Sdelphij dsl_prop_nvlist_add_uint64(nv, 948265754Sdelphij ZFS_PROP_FILESYSTEM_COUNT, count); 949265754Sdelphij } 950265754Sdelphij if (zap_lookup(os, dd->dd_object, DD_FIELD_SNAPSHOT_COUNT, 951265754Sdelphij sizeof (count), 1, &count) == 0) { 952265754Sdelphij dsl_prop_nvlist_add_uint64(nv, 953265754Sdelphij ZFS_PROP_SNAPSHOT_COUNT, count); 954265754Sdelphij } 955265754Sdelphij } 956265754Sdelphij 957185029Spjd if (dsl_dir_is_clone(dd)) { 958168404Spjd dsl_dataset_t *ds; 959168404Spjd char buf[MAXNAMELEN]; 960168404Spjd 961249643Smm VERIFY0(dsl_dataset_hold_obj(dd->dd_pool, 962185029Spjd dd->dd_phys->dd_origin_obj, FTAG, &ds)); 963168404Spjd dsl_dataset_name(ds, buf); 964185029Spjd dsl_dataset_rele(ds, FTAG); 965168404Spjd dsl_prop_nvlist_add_string(nv, ZFS_PROP_ORIGIN, buf); 966168404Spjd } 967168404Spjd} 968168404Spjd 969168404Spjdvoid 970168404Spjddsl_dir_dirty(dsl_dir_t *dd, dmu_tx_t *tx) 971168404Spjd{ 972168404Spjd dsl_pool_t *dp = dd->dd_pool; 973168404Spjd 974168404Spjd ASSERT(dd->dd_phys); 975168404Spjd 976249643Smm if (txg_list_add(&dp->dp_dirty_dirs, dd, tx->tx_txg)) { 977168404Spjd /* up the hold count until we can be written out */ 978168404Spjd dmu_buf_add_ref(dd->dd_dbuf, dd); 979168404Spjd } 980168404Spjd} 981168404Spjd 982168404Spjdstatic int64_t 983168404Spjdparent_delta(dsl_dir_t *dd, uint64_t used, int64_t delta) 984168404Spjd{ 985168404Spjd uint64_t old_accounted = MAX(used, dd->dd_phys->dd_reserved); 986168404Spjd uint64_t new_accounted = MAX(used + delta, dd->dd_phys->dd_reserved); 987168404Spjd return (new_accounted - old_accounted); 988168404Spjd} 989168404Spjd 990168404Spjdvoid 991168404Spjddsl_dir_sync(dsl_dir_t *dd, dmu_tx_t *tx) 992168404Spjd{ 993168404Spjd ASSERT(dmu_tx_is_syncing(tx)); 994168404Spjd 995168404Spjd mutex_enter(&dd->dd_lock); 996243674Smm ASSERT0(dd->dd_tempreserved[tx->tx_txg&TXG_MASK]); 997168404Spjd dprintf_dd(dd, "txg=%llu towrite=%lluK\n", tx->tx_txg, 998168404Spjd dd->dd_space_towrite[tx->tx_txg&TXG_MASK] / 1024); 999168404Spjd dd->dd_space_towrite[tx->tx_txg&TXG_MASK] = 0; 1000168404Spjd mutex_exit(&dd->dd_lock); 1001168404Spjd 1002168404Spjd /* release the hold from dsl_dir_dirty */ 1003168404Spjd dmu_buf_rele(dd->dd_dbuf, dd); 1004168404Spjd} 1005168404Spjd 1006168404Spjdstatic uint64_t 1007185029Spjddsl_dir_space_towrite(dsl_dir_t *dd) 1008168404Spjd{ 1009185029Spjd uint64_t space = 0; 1010168404Spjd int i; 1011168404Spjd 1012168404Spjd ASSERT(MUTEX_HELD(&dd->dd_lock)); 1013168404Spjd 1014168404Spjd for (i = 0; i < TXG_SIZE; i++) { 1015168404Spjd space += dd->dd_space_towrite[i&TXG_MASK]; 1016168404Spjd ASSERT3U(dd->dd_space_towrite[i&TXG_MASK], >=, 0); 1017168404Spjd } 1018168404Spjd return (space); 1019168404Spjd} 1020168404Spjd 1021168404Spjd/* 1022168404Spjd * How much space would dd have available if ancestor had delta applied 1023168404Spjd * to it? If ondiskonly is set, we're only interested in what's 1024168404Spjd * on-disk, not estimated pending changes. 1025168404Spjd */ 1026168404Spjduint64_t 1027168404Spjddsl_dir_space_available(dsl_dir_t *dd, 1028168404Spjd dsl_dir_t *ancestor, int64_t delta, int ondiskonly) 1029168404Spjd{ 1030168404Spjd uint64_t parentspace, myspace, quota, used; 1031168404Spjd 1032168404Spjd /* 1033168404Spjd * If there are no restrictions otherwise, assume we have 1034168404Spjd * unlimited space available. 1035168404Spjd */ 1036168404Spjd quota = UINT64_MAX; 1037168404Spjd parentspace = UINT64_MAX; 1038168404Spjd 1039168404Spjd if (dd->dd_parent != NULL) { 1040168404Spjd parentspace = dsl_dir_space_available(dd->dd_parent, 1041168404Spjd ancestor, delta, ondiskonly); 1042168404Spjd } 1043168404Spjd 1044168404Spjd mutex_enter(&dd->dd_lock); 1045168404Spjd if (dd->dd_phys->dd_quota != 0) 1046168404Spjd quota = dd->dd_phys->dd_quota; 1047185029Spjd used = dd->dd_phys->dd_used_bytes; 1048185029Spjd if (!ondiskonly) 1049185029Spjd used += dsl_dir_space_towrite(dd); 1050168404Spjd 1051168404Spjd if (dd->dd_parent == NULL) { 1052168404Spjd uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, FALSE); 1053168404Spjd quota = MIN(quota, poolsize); 1054168404Spjd } 1055168404Spjd 1056168404Spjd if (dd->dd_phys->dd_reserved > used && parentspace != UINT64_MAX) { 1057168404Spjd /* 1058168404Spjd * We have some space reserved, in addition to what our 1059168404Spjd * parent gave us. 1060168404Spjd */ 1061168404Spjd parentspace += dd->dd_phys->dd_reserved - used; 1062168404Spjd } 1063168404Spjd 1064185029Spjd if (dd == ancestor) { 1065185029Spjd ASSERT(delta <= 0); 1066185029Spjd ASSERT(used >= -delta); 1067185029Spjd used += delta; 1068185029Spjd if (parentspace != UINT64_MAX) 1069185029Spjd parentspace -= delta; 1070185029Spjd } 1071185029Spjd 1072168404Spjd if (used > quota) { 1073168404Spjd /* over quota */ 1074168404Spjd myspace = 0; 1075168404Spjd } else { 1076168404Spjd /* 1077168404Spjd * the lesser of the space provided by our parent and 1078168404Spjd * the space left in our quota 1079168404Spjd */ 1080168404Spjd myspace = MIN(parentspace, quota - used); 1081168404Spjd } 1082168404Spjd 1083168404Spjd mutex_exit(&dd->dd_lock); 1084168404Spjd 1085168404Spjd return (myspace); 1086168404Spjd} 1087168404Spjd 1088168404Spjdstruct tempreserve { 1089168404Spjd list_node_t tr_node; 1090168404Spjd dsl_dir_t *tr_ds; 1091168404Spjd uint64_t tr_size; 1092168404Spjd}; 1093168404Spjd 1094168404Spjdstatic int 1095185029Spjddsl_dir_tempreserve_impl(dsl_dir_t *dd, uint64_t asize, boolean_t netfree, 1096185029Spjd boolean_t ignorequota, boolean_t checkrefquota, list_t *tr_list, 1097185029Spjd dmu_tx_t *tx, boolean_t first) 1098168404Spjd{ 1099168404Spjd uint64_t txg = tx->tx_txg; 1100185029Spjd uint64_t est_inflight, used_on_disk, quota, parent_rsrv; 1101219089Spjd uint64_t deferred = 0; 1102185029Spjd struct tempreserve *tr; 1103219089Spjd int retval = EDQUOT; 1104168404Spjd int txgidx = txg & TXG_MASK; 1105168404Spjd int i; 1106185029Spjd uint64_t ref_rsrv = 0; 1107168404Spjd 1108168404Spjd ASSERT3U(txg, !=, 0); 1109185029Spjd ASSERT3S(asize, >, 0); 1110168404Spjd 1111168404Spjd mutex_enter(&dd->dd_lock); 1112185029Spjd 1113168404Spjd /* 1114168404Spjd * Check against the dsl_dir's quota. We don't add in the delta 1115168404Spjd * when checking for over-quota because they get one free hit. 1116168404Spjd */ 1117185029Spjd est_inflight = dsl_dir_space_towrite(dd); 1118168404Spjd for (i = 0; i < TXG_SIZE; i++) 1119185029Spjd est_inflight += dd->dd_tempreserved[i]; 1120185029Spjd used_on_disk = dd->dd_phys->dd_used_bytes; 1121168404Spjd 1122185029Spjd /* 1123185029Spjd * On the first iteration, fetch the dataset's used-on-disk and 1124185029Spjd * refreservation values. Also, if checkrefquota is set, test if 1125185029Spjd * allocating this space would exceed the dataset's refquota. 1126185029Spjd */ 1127185029Spjd if (first && tx->tx_objset) { 1128185029Spjd int error; 1129219089Spjd dsl_dataset_t *ds = tx->tx_objset->os_dsl_dataset; 1130168404Spjd 1131185029Spjd error = dsl_dataset_check_quota(ds, checkrefquota, 1132185029Spjd asize, est_inflight, &used_on_disk, &ref_rsrv); 1133185029Spjd if (error) { 1134185029Spjd mutex_exit(&dd->dd_lock); 1135185029Spjd return (error); 1136185029Spjd } 1137185029Spjd } 1138185029Spjd 1139185029Spjd /* 1140185029Spjd * If this transaction will result in a net free of space, 1141185029Spjd * we want to let it through. 1142185029Spjd */ 1143185029Spjd if (ignorequota || netfree || dd->dd_phys->dd_quota == 0) 1144185029Spjd quota = UINT64_MAX; 1145185029Spjd else 1146168404Spjd quota = dd->dd_phys->dd_quota; 1147168404Spjd 1148168404Spjd /* 1149219089Spjd * Adjust the quota against the actual pool size at the root 1150219089Spjd * minus any outstanding deferred frees. 1151185029Spjd * To ensure that it's possible to remove files from a full 1152185029Spjd * pool without inducing transient overcommits, we throttle 1153168404Spjd * netfree transactions against a quota that is slightly larger, 1154168404Spjd * but still within the pool's allocation slop. In cases where 1155168404Spjd * we're very close to full, this will allow a steady trickle of 1156168404Spjd * removes to get through. 1157168404Spjd */ 1158168404Spjd if (dd->dd_parent == NULL) { 1159219089Spjd spa_t *spa = dd->dd_pool->dp_spa; 1160168404Spjd uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, netfree); 1161219089Spjd deferred = metaslab_class_get_deferred(spa_normal_class(spa)); 1162219089Spjd if (poolsize - deferred < quota) { 1163219089Spjd quota = poolsize - deferred; 1164219089Spjd retval = ENOSPC; 1165168404Spjd } 1166168404Spjd } 1167168404Spjd 1168168404Spjd /* 1169168404Spjd * If they are requesting more space, and our current estimate 1170185029Spjd * is over quota, they get to try again unless the actual 1171168404Spjd * on-disk is over quota and there are no pending changes (which 1172168404Spjd * may free up space for us). 1173168404Spjd */ 1174219089Spjd if (used_on_disk + est_inflight >= quota) { 1175219089Spjd if (est_inflight > 0 || used_on_disk < quota || 1176219089Spjd (retval == ENOSPC && used_on_disk < quota + deferred)) 1177219089Spjd retval = ERESTART; 1178185029Spjd dprintf_dd(dd, "failing: used=%lluK inflight = %lluK " 1179168404Spjd "quota=%lluK tr=%lluK err=%d\n", 1180185029Spjd used_on_disk>>10, est_inflight>>10, 1181219089Spjd quota>>10, asize>>10, retval); 1182168404Spjd mutex_exit(&dd->dd_lock); 1183249643Smm return (SET_ERROR(retval)); 1184168404Spjd } 1185168404Spjd 1186168404Spjd /* We need to up our estimated delta before dropping dd_lock */ 1187168404Spjd dd->dd_tempreserved[txgidx] += asize; 1188168404Spjd 1189185029Spjd parent_rsrv = parent_delta(dd, used_on_disk + est_inflight, 1190185029Spjd asize - ref_rsrv); 1191168404Spjd mutex_exit(&dd->dd_lock); 1192168404Spjd 1193185029Spjd tr = kmem_zalloc(sizeof (struct tempreserve), KM_SLEEP); 1194168404Spjd tr->tr_ds = dd; 1195168404Spjd tr->tr_size = asize; 1196168404Spjd list_insert_tail(tr_list, tr); 1197168404Spjd 1198168404Spjd /* see if it's OK with our parent */ 1199168404Spjd if (dd->dd_parent && parent_rsrv) { 1200185029Spjd boolean_t ismos = (dd->dd_phys->dd_head_dataset_obj == 0); 1201185029Spjd 1202168404Spjd return (dsl_dir_tempreserve_impl(dd->dd_parent, 1203185029Spjd parent_rsrv, netfree, ismos, TRUE, tr_list, tx, FALSE)); 1204168404Spjd } else { 1205168404Spjd return (0); 1206168404Spjd } 1207168404Spjd} 1208168404Spjd 1209168404Spjd/* 1210168404Spjd * Reserve space in this dsl_dir, to be used in this tx's txg. 1211185029Spjd * After the space has been dirtied (and dsl_dir_willuse_space() 1212185029Spjd * has been called), the reservation should be canceled, using 1213185029Spjd * dsl_dir_tempreserve_clear(). 1214168404Spjd */ 1215168404Spjdint 1216185029Spjddsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t lsize, uint64_t asize, 1217185029Spjd uint64_t fsize, uint64_t usize, void **tr_cookiep, dmu_tx_t *tx) 1218168404Spjd{ 1219185029Spjd int err; 1220168404Spjd list_t *tr_list; 1221168404Spjd 1222185029Spjd if (asize == 0) { 1223185029Spjd *tr_cookiep = NULL; 1224185029Spjd return (0); 1225185029Spjd } 1226185029Spjd 1227168404Spjd tr_list = kmem_alloc(sizeof (list_t), KM_SLEEP); 1228168404Spjd list_create(tr_list, sizeof (struct tempreserve), 1229168404Spjd offsetof(struct tempreserve, tr_node)); 1230185029Spjd ASSERT3S(asize, >, 0); 1231168404Spjd ASSERT3S(fsize, >=, 0); 1232168404Spjd 1233185029Spjd err = arc_tempreserve_space(lsize, tx->tx_txg); 1234168404Spjd if (err == 0) { 1235168404Spjd struct tempreserve *tr; 1236168404Spjd 1237185029Spjd tr = kmem_zalloc(sizeof (struct tempreserve), KM_SLEEP); 1238185029Spjd tr->tr_size = lsize; 1239185029Spjd list_insert_tail(tr_list, tr); 1240185029Spjd } else { 1241185029Spjd if (err == EAGAIN) { 1242260764Savg /* 1243260764Savg * If arc_memory_throttle() detected that pageout 1244260764Savg * is running and we are low on memory, we delay new 1245260764Savg * non-pageout transactions to give pageout an 1246260764Savg * advantage. 1247260764Savg * 1248260764Savg * It is unfortunate to be delaying while the caller's 1249260764Savg * locks are held. 1250260764Savg */ 1251260754Savg txg_delay(dd->dd_pool, tx->tx_txg, 1252260754Savg MSEC2NSEC(10), MSEC2NSEC(10)); 1253249643Smm err = SET_ERROR(ERESTART); 1254168404Spjd } 1255168404Spjd } 1256168404Spjd 1257185029Spjd if (err == 0) { 1258185029Spjd err = dsl_dir_tempreserve_impl(dd, asize, fsize >= asize, 1259185029Spjd FALSE, asize > usize, tr_list, tx, TRUE); 1260185029Spjd } 1261185029Spjd 1262249643Smm if (err != 0) 1263168404Spjd dsl_dir_tempreserve_clear(tr_list, tx); 1264168404Spjd else 1265168404Spjd *tr_cookiep = tr_list; 1266185029Spjd 1267168404Spjd return (err); 1268168404Spjd} 1269168404Spjd 1270168404Spjd/* 1271168404Spjd * Clear a temporary reservation that we previously made with 1272168404Spjd * dsl_dir_tempreserve_space(). 1273168404Spjd */ 1274168404Spjdvoid 1275168404Spjddsl_dir_tempreserve_clear(void *tr_cookie, dmu_tx_t *tx) 1276168404Spjd{ 1277168404Spjd int txgidx = tx->tx_txg & TXG_MASK; 1278168404Spjd list_t *tr_list = tr_cookie; 1279168404Spjd struct tempreserve *tr; 1280168404Spjd 1281168404Spjd ASSERT3U(tx->tx_txg, !=, 0); 1282168404Spjd 1283185029Spjd if (tr_cookie == NULL) 1284185029Spjd return; 1285185029Spjd 1286260764Savg while ((tr = list_head(tr_list)) != NULL) { 1287260764Savg if (tr->tr_ds) { 1288168404Spjd mutex_enter(&tr->tr_ds->dd_lock); 1289168404Spjd ASSERT3U(tr->tr_ds->dd_tempreserved[txgidx], >=, 1290168404Spjd tr->tr_size); 1291168404Spjd tr->tr_ds->dd_tempreserved[txgidx] -= tr->tr_size; 1292168404Spjd mutex_exit(&tr->tr_ds->dd_lock); 1293185029Spjd } else { 1294185029Spjd arc_tempreserve_clear(tr->tr_size); 1295168404Spjd } 1296168404Spjd list_remove(tr_list, tr); 1297168404Spjd kmem_free(tr, sizeof (struct tempreserve)); 1298168404Spjd } 1299168404Spjd 1300168404Spjd kmem_free(tr_list, sizeof (list_t)); 1301168404Spjd} 1302168404Spjd 1303260764Savg/* 1304260764Savg * This should be called from open context when we think we're going to write 1305260764Savg * or free space, for example when dirtying data. Be conservative; it's okay 1306260764Savg * to write less space or free more, but we don't want to write more or free 1307260764Savg * less than the amount specified. 1308260764Savg */ 1309260764Savgvoid 1310260764Savgdsl_dir_willuse_space(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx) 1311168404Spjd{ 1312168404Spjd int64_t parent_space; 1313168404Spjd uint64_t est_used; 1314168404Spjd 1315168404Spjd mutex_enter(&dd->dd_lock); 1316168404Spjd if (space > 0) 1317168404Spjd dd->dd_space_towrite[tx->tx_txg & TXG_MASK] += space; 1318168404Spjd 1319185029Spjd est_used = dsl_dir_space_towrite(dd) + dd->dd_phys->dd_used_bytes; 1320168404Spjd parent_space = parent_delta(dd, est_used, space); 1321168404Spjd mutex_exit(&dd->dd_lock); 1322168404Spjd 1323168404Spjd /* Make sure that we clean up dd_space_to* */ 1324168404Spjd dsl_dir_dirty(dd, tx); 1325168404Spjd 1326168404Spjd /* XXX this is potentially expensive and unnecessary... */ 1327168404Spjd if (parent_space && dd->dd_parent) 1328260764Savg dsl_dir_willuse_space(dd->dd_parent, parent_space, tx); 1329168404Spjd} 1330168404Spjd 1331168404Spjd/* call from syncing context when we actually write/free space for this dd */ 1332168404Spjdvoid 1333185029Spjddsl_dir_diduse_space(dsl_dir_t *dd, dd_used_t type, 1334168404Spjd int64_t used, int64_t compressed, int64_t uncompressed, dmu_tx_t *tx) 1335168404Spjd{ 1336168404Spjd int64_t accounted_delta; 1337262171Savg 1338262171Savg /* 1339262171Savg * dsl_dataset_set_refreservation_sync_impl() calls this with 1340262171Savg * dd_lock held, so that it can atomically update 1341262171Savg * ds->ds_reserved and the dsl_dir accounting, so that 1342262171Savg * dsl_dataset_check_quota() can see dataset and dir accounting 1343262171Savg * consistently. 1344262171Savg */ 1345185029Spjd boolean_t needlock = !MUTEX_HELD(&dd->dd_lock); 1346168404Spjd 1347168404Spjd ASSERT(dmu_tx_is_syncing(tx)); 1348185029Spjd ASSERT(type < DD_USED_NUM); 1349168404Spjd 1350262171Savg dmu_buf_will_dirty(dd->dd_dbuf, tx); 1351262171Savg 1352185029Spjd if (needlock) 1353185029Spjd mutex_enter(&dd->dd_lock); 1354185029Spjd accounted_delta = parent_delta(dd, dd->dd_phys->dd_used_bytes, used); 1355185029Spjd ASSERT(used >= 0 || dd->dd_phys->dd_used_bytes >= -used); 1356168404Spjd ASSERT(compressed >= 0 || 1357168404Spjd dd->dd_phys->dd_compressed_bytes >= -compressed); 1358168404Spjd ASSERT(uncompressed >= 0 || 1359168404Spjd dd->dd_phys->dd_uncompressed_bytes >= -uncompressed); 1360185029Spjd dd->dd_phys->dd_used_bytes += used; 1361168404Spjd dd->dd_phys->dd_uncompressed_bytes += uncompressed; 1362168404Spjd dd->dd_phys->dd_compressed_bytes += compressed; 1363168404Spjd 1364185029Spjd if (dd->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) { 1365185029Spjd ASSERT(used > 0 || 1366185029Spjd dd->dd_phys->dd_used_breakdown[type] >= -used); 1367185029Spjd dd->dd_phys->dd_used_breakdown[type] += used; 1368185029Spjd#ifdef DEBUG 1369185029Spjd dd_used_t t; 1370185029Spjd uint64_t u = 0; 1371185029Spjd for (t = 0; t < DD_USED_NUM; t++) 1372185029Spjd u += dd->dd_phys->dd_used_breakdown[t]; 1373185029Spjd ASSERT3U(u, ==, dd->dd_phys->dd_used_bytes); 1374185029Spjd#endif 1375185029Spjd } 1376185029Spjd if (needlock) 1377185029Spjd mutex_exit(&dd->dd_lock); 1378185029Spjd 1379168404Spjd if (dd->dd_parent != NULL) { 1380185029Spjd dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD, 1381168404Spjd accounted_delta, compressed, uncompressed, tx); 1382185029Spjd dsl_dir_transfer_space(dd->dd_parent, 1383185029Spjd used - accounted_delta, 1384185029Spjd DD_USED_CHILD_RSRV, DD_USED_CHILD, tx); 1385168404Spjd } 1386168404Spjd} 1387168404Spjd 1388185029Spjdvoid 1389185029Spjddsl_dir_transfer_space(dsl_dir_t *dd, int64_t delta, 1390185029Spjd dd_used_t oldtype, dd_used_t newtype, dmu_tx_t *tx) 1391185029Spjd{ 1392185029Spjd ASSERT(dmu_tx_is_syncing(tx)); 1393185029Spjd ASSERT(oldtype < DD_USED_NUM); 1394185029Spjd ASSERT(newtype < DD_USED_NUM); 1395185029Spjd 1396185029Spjd if (delta == 0 || !(dd->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN)) 1397185029Spjd return; 1398185029Spjd 1399262171Savg dmu_buf_will_dirty(dd->dd_dbuf, tx); 1400262171Savg mutex_enter(&dd->dd_lock); 1401185029Spjd ASSERT(delta > 0 ? 1402185029Spjd dd->dd_phys->dd_used_breakdown[oldtype] >= delta : 1403185029Spjd dd->dd_phys->dd_used_breakdown[newtype] >= -delta); 1404185029Spjd ASSERT(dd->dd_phys->dd_used_bytes >= ABS(delta)); 1405185029Spjd dd->dd_phys->dd_used_breakdown[oldtype] -= delta; 1406185029Spjd dd->dd_phys->dd_used_breakdown[newtype] += delta; 1407262171Savg mutex_exit(&dd->dd_lock); 1408185029Spjd} 1409185029Spjd 1410249643Smmtypedef struct dsl_dir_set_qr_arg { 1411249643Smm const char *ddsqra_name; 1412249643Smm zprop_source_t ddsqra_source; 1413249643Smm uint64_t ddsqra_value; 1414249643Smm} dsl_dir_set_qr_arg_t; 1415249643Smm 1416168404Spjdstatic int 1417249643Smmdsl_dir_set_quota_check(void *arg, dmu_tx_t *tx) 1418168404Spjd{ 1419249643Smm dsl_dir_set_qr_arg_t *ddsqra = arg; 1420249643Smm dsl_pool_t *dp = dmu_tx_pool(tx); 1421249643Smm dsl_dataset_t *ds; 1422249643Smm int error; 1423249643Smm uint64_t towrite, newval; 1424168404Spjd 1425249643Smm error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds); 1426249643Smm if (error != 0) 1427249643Smm return (error); 1428219089Spjd 1429249643Smm error = dsl_prop_predict(ds->ds_dir, "quota", 1430249643Smm ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval); 1431249643Smm if (error != 0) { 1432249643Smm dsl_dataset_rele(ds, FTAG); 1433249643Smm return (error); 1434249643Smm } 1435249643Smm 1436249643Smm if (newval == 0) { 1437249643Smm dsl_dataset_rele(ds, FTAG); 1438168404Spjd return (0); 1439249643Smm } 1440168404Spjd 1441249643Smm mutex_enter(&ds->ds_dir->dd_lock); 1442168404Spjd /* 1443168404Spjd * If we are doing the preliminary check in open context, and 1444168404Spjd * there are pending changes, then don't fail it, since the 1445185029Spjd * pending changes could under-estimate the amount of space to be 1446168404Spjd * freed up. 1447168404Spjd */ 1448249643Smm towrite = dsl_dir_space_towrite(ds->ds_dir); 1449168404Spjd if ((dmu_tx_is_syncing(tx) || towrite == 0) && 1450249643Smm (newval < ds->ds_dir->dd_phys->dd_reserved || 1451249643Smm newval < ds->ds_dir->dd_phys->dd_used_bytes + towrite)) { 1452249643Smm error = SET_ERROR(ENOSPC); 1453168404Spjd } 1454249643Smm mutex_exit(&ds->ds_dir->dd_lock); 1455249643Smm dsl_dataset_rele(ds, FTAG); 1456249643Smm return (error); 1457168404Spjd} 1458168404Spjd 1459168404Spjdstatic void 1460249643Smmdsl_dir_set_quota_sync(void *arg, dmu_tx_t *tx) 1461168404Spjd{ 1462249643Smm dsl_dir_set_qr_arg_t *ddsqra = arg; 1463249643Smm dsl_pool_t *dp = dmu_tx_pool(tx); 1464249643Smm dsl_dataset_t *ds; 1465249643Smm uint64_t newval; 1466168404Spjd 1467249643Smm VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds)); 1468219089Spjd 1469249920Smm if (spa_version(dp->dp_spa) >= SPA_VERSION_RECVD_PROPS) { 1470249920Smm dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_QUOTA), 1471249920Smm ddsqra->ddsqra_source, sizeof (ddsqra->ddsqra_value), 1, 1472249920Smm &ddsqra->ddsqra_value, tx); 1473168404Spjd 1474249920Smm VERIFY0(dsl_prop_get_int_ds(ds, 1475249920Smm zfs_prop_to_name(ZFS_PROP_QUOTA), &newval)); 1476249920Smm } else { 1477249920Smm newval = ddsqra->ddsqra_value; 1478249920Smm spa_history_log_internal_ds(ds, "set", tx, "%s=%lld", 1479249920Smm zfs_prop_to_name(ZFS_PROP_QUOTA), (longlong_t)newval); 1480249920Smm } 1481249643Smm 1482249643Smm dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx); 1483249643Smm mutex_enter(&ds->ds_dir->dd_lock); 1484249643Smm ds->ds_dir->dd_phys->dd_quota = newval; 1485249643Smm mutex_exit(&ds->ds_dir->dd_lock); 1486249643Smm dsl_dataset_rele(ds, FTAG); 1487168404Spjd} 1488168404Spjd 1489168404Spjdint 1490219089Spjddsl_dir_set_quota(const char *ddname, zprop_source_t source, uint64_t quota) 1491168404Spjd{ 1492249643Smm dsl_dir_set_qr_arg_t ddsqra; 1493168404Spjd 1494249643Smm ddsqra.ddsqra_name = ddname; 1495249643Smm ddsqra.ddsqra_source = source; 1496249643Smm ddsqra.ddsqra_value = quota; 1497219089Spjd 1498249643Smm return (dsl_sync_task(ddname, dsl_dir_set_quota_check, 1499249643Smm dsl_dir_set_quota_sync, &ddsqra, 0)); 1500168404Spjd} 1501168404Spjd 1502185029Spjdint 1503249643Smmdsl_dir_set_reservation_check(void *arg, dmu_tx_t *tx) 1504168404Spjd{ 1505249643Smm dsl_dir_set_qr_arg_t *ddsqra = arg; 1506249643Smm dsl_pool_t *dp = dmu_tx_pool(tx); 1507249643Smm dsl_dataset_t *ds; 1508249643Smm dsl_dir_t *dd; 1509249643Smm uint64_t newval, used, avail; 1510249643Smm int error; 1511168404Spjd 1512249643Smm error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds); 1513249643Smm if (error != 0) 1514249643Smm return (error); 1515249643Smm dd = ds->ds_dir; 1516219089Spjd 1517168404Spjd /* 1518168404Spjd * If we are doing the preliminary check in open context, the 1519168404Spjd * space estimates may be inaccurate. 1520168404Spjd */ 1521249643Smm if (!dmu_tx_is_syncing(tx)) { 1522249643Smm dsl_dataset_rele(ds, FTAG); 1523168404Spjd return (0); 1524249643Smm } 1525168404Spjd 1526249643Smm error = dsl_prop_predict(ds->ds_dir, 1527249643Smm zfs_prop_to_name(ZFS_PROP_RESERVATION), 1528249643Smm ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval); 1529249643Smm if (error != 0) { 1530249643Smm dsl_dataset_rele(ds, FTAG); 1531249643Smm return (error); 1532249643Smm } 1533249643Smm 1534168404Spjd mutex_enter(&dd->dd_lock); 1535185029Spjd used = dd->dd_phys->dd_used_bytes; 1536168404Spjd mutex_exit(&dd->dd_lock); 1537168404Spjd 1538168404Spjd if (dd->dd_parent) { 1539168404Spjd avail = dsl_dir_space_available(dd->dd_parent, 1540168404Spjd NULL, 0, FALSE); 1541168404Spjd } else { 1542168404Spjd avail = dsl_pool_adjustedsize(dd->dd_pool, B_FALSE) - used; 1543168404Spjd } 1544168404Spjd 1545249643Smm if (MAX(used, newval) > MAX(used, dd->dd_phys->dd_reserved)) { 1546249643Smm uint64_t delta = MAX(used, newval) - 1547209962Smm MAX(used, dd->dd_phys->dd_reserved); 1548209962Smm 1549249643Smm if (delta > avail || 1550249643Smm (dd->dd_phys->dd_quota > 0 && 1551249643Smm newval > dd->dd_phys->dd_quota)) 1552249643Smm error = SET_ERROR(ENOSPC); 1553209962Smm } 1554209962Smm 1555249643Smm dsl_dataset_rele(ds, FTAG); 1556249643Smm return (error); 1557168404Spjd} 1558168404Spjd 1559249643Smmvoid 1560249643Smmdsl_dir_set_reservation_sync_impl(dsl_dir_t *dd, uint64_t value, dmu_tx_t *tx) 1561168404Spjd{ 1562168404Spjd uint64_t used; 1563168404Spjd int64_t delta; 1564168404Spjd 1565185029Spjd dmu_buf_will_dirty(dd->dd_dbuf, tx); 1566185029Spjd 1567168404Spjd mutex_enter(&dd->dd_lock); 1568185029Spjd used = dd->dd_phys->dd_used_bytes; 1569249643Smm delta = MAX(used, value) - MAX(used, dd->dd_phys->dd_reserved); 1570249643Smm dd->dd_phys->dd_reserved = value; 1571168404Spjd 1572168404Spjd if (dd->dd_parent != NULL) { 1573168404Spjd /* Roll up this additional usage into our ancestors */ 1574185029Spjd dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD_RSRV, 1575185029Spjd delta, 0, 0, tx); 1576168404Spjd } 1577185029Spjd mutex_exit(&dd->dd_lock); 1578168404Spjd} 1579168404Spjd 1580249643Smmstatic void 1581249643Smmdsl_dir_set_reservation_sync(void *arg, dmu_tx_t *tx) 1582168404Spjd{ 1583249643Smm dsl_dir_set_qr_arg_t *ddsqra = arg; 1584249643Smm dsl_pool_t *dp = dmu_tx_pool(tx); 1585219089Spjd dsl_dataset_t *ds; 1586249643Smm uint64_t newval; 1587168404Spjd 1588249643Smm VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds)); 1589219089Spjd 1590249920Smm if (spa_version(dp->dp_spa) >= SPA_VERSION_RECVD_PROPS) { 1591249920Smm dsl_prop_set_sync_impl(ds, 1592249920Smm zfs_prop_to_name(ZFS_PROP_RESERVATION), 1593249920Smm ddsqra->ddsqra_source, sizeof (ddsqra->ddsqra_value), 1, 1594249920Smm &ddsqra->ddsqra_value, tx); 1595219089Spjd 1596249920Smm VERIFY0(dsl_prop_get_int_ds(ds, 1597249920Smm zfs_prop_to_name(ZFS_PROP_RESERVATION), &newval)); 1598249920Smm } else { 1599249920Smm newval = ddsqra->ddsqra_value; 1600249920Smm spa_history_log_internal_ds(ds, "set", tx, "%s=%lld", 1601249920Smm zfs_prop_to_name(ZFS_PROP_RESERVATION), 1602249920Smm (longlong_t)newval); 1603249920Smm } 1604219089Spjd 1605249643Smm dsl_dir_set_reservation_sync_impl(ds->ds_dir, newval, tx); 1606249643Smm dsl_dataset_rele(ds, FTAG); 1607249643Smm} 1608219089Spjd 1609249643Smmint 1610249643Smmdsl_dir_set_reservation(const char *ddname, zprop_source_t source, 1611249643Smm uint64_t reservation) 1612249643Smm{ 1613249643Smm dsl_dir_set_qr_arg_t ddsqra; 1614219089Spjd 1615249643Smm ddsqra.ddsqra_name = ddname; 1616249643Smm ddsqra.ddsqra_source = source; 1617249643Smm ddsqra.ddsqra_value = reservation; 1618249643Smm 1619249643Smm return (dsl_sync_task(ddname, dsl_dir_set_reservation_check, 1620249643Smm dsl_dir_set_reservation_sync, &ddsqra, 0)); 1621168404Spjd} 1622168404Spjd 1623168404Spjdstatic dsl_dir_t * 1624168404Spjdclosest_common_ancestor(dsl_dir_t *ds1, dsl_dir_t *ds2) 1625168404Spjd{ 1626168404Spjd for (; ds1; ds1 = ds1->dd_parent) { 1627168404Spjd dsl_dir_t *dd; 1628168404Spjd for (dd = ds2; dd; dd = dd->dd_parent) { 1629168404Spjd if (ds1 == dd) 1630168404Spjd return (dd); 1631168404Spjd } 1632168404Spjd } 1633168404Spjd return (NULL); 1634168404Spjd} 1635168404Spjd 1636168404Spjd/* 1637168404Spjd * If delta is applied to dd, how much of that delta would be applied to 1638168404Spjd * ancestor? Syncing context only. 1639168404Spjd */ 1640168404Spjdstatic int64_t 1641168404Spjdwould_change(dsl_dir_t *dd, int64_t delta, dsl_dir_t *ancestor) 1642168404Spjd{ 1643168404Spjd if (dd == ancestor) 1644168404Spjd return (delta); 1645168404Spjd 1646168404Spjd mutex_enter(&dd->dd_lock); 1647185029Spjd delta = parent_delta(dd, dd->dd_phys->dd_used_bytes, delta); 1648168404Spjd mutex_exit(&dd->dd_lock); 1649168404Spjd return (would_change(dd->dd_parent, delta, ancestor)); 1650168404Spjd} 1651168404Spjd 1652249643Smmtypedef struct dsl_dir_rename_arg { 1653249643Smm const char *ddra_oldname; 1654249643Smm const char *ddra_newname; 1655265754Sdelphij cred_t *ddra_cred; 1656249643Smm} dsl_dir_rename_arg_t; 1657168404Spjd 1658249643Smm/* ARGSUSED */ 1659168404Spjdstatic int 1660249643Smmdsl_valid_rename(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg) 1661168404Spjd{ 1662249643Smm int *deltap = arg; 1663249643Smm char namebuf[MAXNAMELEN]; 1664168404Spjd 1665249643Smm dsl_dataset_name(ds, namebuf); 1666249643Smm 1667249643Smm if (strlen(namebuf) + *deltap >= MAXNAMELEN) 1668249643Smm return (SET_ERROR(ENAMETOOLONG)); 1669249643Smm return (0); 1670249643Smm} 1671249643Smm 1672249643Smmstatic int 1673249643Smmdsl_dir_rename_check(void *arg, dmu_tx_t *tx) 1674249643Smm{ 1675249643Smm dsl_dir_rename_arg_t *ddra = arg; 1676249643Smm dsl_pool_t *dp = dmu_tx_pool(tx); 1677249643Smm dsl_dir_t *dd, *newparent; 1678249643Smm const char *mynewname; 1679249643Smm int error; 1680249643Smm int delta = strlen(ddra->ddra_newname) - strlen(ddra->ddra_oldname); 1681249643Smm 1682249643Smm /* target dir should exist */ 1683249643Smm error = dsl_dir_hold(dp, ddra->ddra_oldname, FTAG, &dd, NULL); 1684249643Smm if (error != 0) 1685249643Smm return (error); 1686249643Smm 1687249643Smm /* new parent should exist */ 1688249643Smm error = dsl_dir_hold(dp, ddra->ddra_newname, FTAG, 1689249643Smm &newparent, &mynewname); 1690249643Smm if (error != 0) { 1691249643Smm dsl_dir_rele(dd, FTAG); 1692249643Smm return (error); 1693229565Smm } 1694168404Spjd 1695249643Smm /* can't rename to different pool */ 1696249643Smm if (dd->dd_pool != newparent->dd_pool) { 1697249643Smm dsl_dir_rele(newparent, FTAG); 1698249643Smm dsl_dir_rele(dd, FTAG); 1699249643Smm return (SET_ERROR(ENXIO)); 1700249643Smm } 1701168404Spjd 1702249643Smm /* new name should not already exist */ 1703249643Smm if (mynewname == NULL) { 1704249643Smm dsl_dir_rele(newparent, FTAG); 1705249643Smm dsl_dir_rele(dd, FTAG); 1706249643Smm return (SET_ERROR(EEXIST)); 1707249643Smm } 1708249643Smm 1709249643Smm /* if the name length is growing, validate child name lengths */ 1710249643Smm if (delta > 0) { 1711249643Smm error = dmu_objset_find_dp(dp, dd->dd_object, dsl_valid_rename, 1712249643Smm &delta, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS); 1713249643Smm if (error != 0) { 1714249643Smm dsl_dir_rele(newparent, FTAG); 1715249643Smm dsl_dir_rele(dd, FTAG); 1716249643Smm return (error); 1717249643Smm } 1718249643Smm } 1719249643Smm 1720265754Sdelphij if (dmu_tx_is_syncing(tx)) { 1721267139Sdelphij if (spa_feature_is_active(dp->dp_spa, 1722265754Sdelphij SPA_FEATURE_FS_SS_LIMIT)) { 1723265754Sdelphij /* 1724265754Sdelphij * Although this is the check function and we don't 1725265754Sdelphij * normally make on-disk changes in check functions, 1726265754Sdelphij * we need to do that here. 1727265754Sdelphij * 1728265754Sdelphij * Ensure this portion of the tree's counts have been 1729265754Sdelphij * initialized in case the new parent has limits set. 1730265754Sdelphij */ 1731265754Sdelphij dsl_dir_init_fs_ss_count(dd, tx); 1732265754Sdelphij } 1733265754Sdelphij } 1734265754Sdelphij 1735249643Smm if (newparent != dd->dd_parent) { 1736168404Spjd /* is there enough space? */ 1737168404Spjd uint64_t myspace = 1738185029Spjd MAX(dd->dd_phys->dd_used_bytes, dd->dd_phys->dd_reserved); 1739265754Sdelphij objset_t *os = dd->dd_pool->dp_meta_objset; 1740265754Sdelphij uint64_t fs_cnt = 0; 1741265754Sdelphij uint64_t ss_cnt = 0; 1742168404Spjd 1743265754Sdelphij if (dsl_dir_is_zapified(dd)) { 1744265754Sdelphij int err; 1745265754Sdelphij 1746265754Sdelphij err = zap_lookup(os, dd->dd_object, 1747265754Sdelphij DD_FIELD_FILESYSTEM_COUNT, sizeof (fs_cnt), 1, 1748265754Sdelphij &fs_cnt); 1749267139Sdelphij if (err != ENOENT && err != 0) { 1750267139Sdelphij dsl_dir_rele(newparent, FTAG); 1751267139Sdelphij dsl_dir_rele(dd, FTAG); 1752265754Sdelphij return (err); 1753267139Sdelphij } 1754265754Sdelphij 1755265754Sdelphij /* 1756265754Sdelphij * have to add 1 for the filesystem itself that we're 1757265754Sdelphij * moving 1758265754Sdelphij */ 1759265754Sdelphij fs_cnt++; 1760265754Sdelphij 1761265754Sdelphij err = zap_lookup(os, dd->dd_object, 1762265754Sdelphij DD_FIELD_SNAPSHOT_COUNT, sizeof (ss_cnt), 1, 1763265754Sdelphij &ss_cnt); 1764267139Sdelphij if (err != ENOENT && err != 0) { 1765267139Sdelphij dsl_dir_rele(newparent, FTAG); 1766267139Sdelphij dsl_dir_rele(dd, FTAG); 1767265754Sdelphij return (err); 1768267139Sdelphij } 1769265754Sdelphij } 1770265754Sdelphij 1771168404Spjd /* no rename into our descendant */ 1772249643Smm if (closest_common_ancestor(dd, newparent) == dd) { 1773249643Smm dsl_dir_rele(newparent, FTAG); 1774249643Smm dsl_dir_rele(dd, FTAG); 1775249643Smm return (SET_ERROR(EINVAL)); 1776249643Smm } 1777168404Spjd 1778249643Smm error = dsl_dir_transfer_possible(dd->dd_parent, 1779265754Sdelphij newparent, fs_cnt, ss_cnt, myspace, ddra->ddra_cred); 1780249643Smm if (error != 0) { 1781249643Smm dsl_dir_rele(newparent, FTAG); 1782249643Smm dsl_dir_rele(dd, FTAG); 1783249643Smm return (error); 1784249643Smm } 1785168404Spjd } 1786168404Spjd 1787249643Smm dsl_dir_rele(newparent, FTAG); 1788249643Smm dsl_dir_rele(dd, FTAG); 1789168404Spjd return (0); 1790168404Spjd} 1791168404Spjd 1792168404Spjdstatic void 1793249643Smmdsl_dir_rename_sync(void *arg, dmu_tx_t *tx) 1794168404Spjd{ 1795249643Smm dsl_dir_rename_arg_t *ddra = arg; 1796249643Smm dsl_pool_t *dp = dmu_tx_pool(tx); 1797249643Smm dsl_dir_t *dd, *newparent; 1798249643Smm const char *mynewname; 1799249643Smm int error; 1800168404Spjd objset_t *mos = dp->dp_meta_objset; 1801168404Spjd 1802249643Smm VERIFY0(dsl_dir_hold(dp, ddra->ddra_oldname, FTAG, &dd, NULL)); 1803249643Smm VERIFY0(dsl_dir_hold(dp, ddra->ddra_newname, FTAG, &newparent, 1804249643Smm &mynewname)); 1805249643Smm 1806249643Smm /* Log this before we change the name. */ 1807249643Smm spa_history_log_internal_dd(dd, "rename", tx, 1808249643Smm "-> %s", ddra->ddra_newname); 1809249643Smm 1810249643Smm if (newparent != dd->dd_parent) { 1811265754Sdelphij objset_t *os = dd->dd_pool->dp_meta_objset; 1812265754Sdelphij uint64_t fs_cnt = 0; 1813265754Sdelphij uint64_t ss_cnt = 0; 1814265754Sdelphij 1815265754Sdelphij /* 1816265754Sdelphij * We already made sure the dd counts were initialized in the 1817265754Sdelphij * check function. 1818265754Sdelphij */ 1819267139Sdelphij if (spa_feature_is_active(dp->dp_spa, 1820265754Sdelphij SPA_FEATURE_FS_SS_LIMIT)) { 1821265754Sdelphij VERIFY0(zap_lookup(os, dd->dd_object, 1822265754Sdelphij DD_FIELD_FILESYSTEM_COUNT, sizeof (fs_cnt), 1, 1823265754Sdelphij &fs_cnt)); 1824265754Sdelphij /* add 1 for the filesystem itself that we're moving */ 1825265754Sdelphij fs_cnt++; 1826265754Sdelphij 1827265754Sdelphij VERIFY0(zap_lookup(os, dd->dd_object, 1828265754Sdelphij DD_FIELD_SNAPSHOT_COUNT, sizeof (ss_cnt), 1, 1829265754Sdelphij &ss_cnt)); 1830265754Sdelphij } 1831265754Sdelphij 1832265754Sdelphij dsl_fs_ss_count_adjust(dd->dd_parent, -fs_cnt, 1833265754Sdelphij DD_FIELD_FILESYSTEM_COUNT, tx); 1834265754Sdelphij dsl_fs_ss_count_adjust(newparent, fs_cnt, 1835265754Sdelphij DD_FIELD_FILESYSTEM_COUNT, tx); 1836265754Sdelphij 1837265754Sdelphij dsl_fs_ss_count_adjust(dd->dd_parent, -ss_cnt, 1838265754Sdelphij DD_FIELD_SNAPSHOT_COUNT, tx); 1839265754Sdelphij dsl_fs_ss_count_adjust(newparent, ss_cnt, 1840265754Sdelphij DD_FIELD_SNAPSHOT_COUNT, tx); 1841265754Sdelphij 1842185029Spjd dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD, 1843185029Spjd -dd->dd_phys->dd_used_bytes, 1844168404Spjd -dd->dd_phys->dd_compressed_bytes, 1845168404Spjd -dd->dd_phys->dd_uncompressed_bytes, tx); 1846249643Smm dsl_dir_diduse_space(newparent, DD_USED_CHILD, 1847185029Spjd dd->dd_phys->dd_used_bytes, 1848168404Spjd dd->dd_phys->dd_compressed_bytes, 1849168404Spjd dd->dd_phys->dd_uncompressed_bytes, tx); 1850185029Spjd 1851185029Spjd if (dd->dd_phys->dd_reserved > dd->dd_phys->dd_used_bytes) { 1852185029Spjd uint64_t unused_rsrv = dd->dd_phys->dd_reserved - 1853185029Spjd dd->dd_phys->dd_used_bytes; 1854185029Spjd 1855185029Spjd dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD_RSRV, 1856185029Spjd -unused_rsrv, 0, 0, tx); 1857249643Smm dsl_dir_diduse_space(newparent, DD_USED_CHILD_RSRV, 1858185029Spjd unused_rsrv, 0, 0, tx); 1859185029Spjd } 1860168404Spjd } 1861168404Spjd 1862168404Spjd dmu_buf_will_dirty(dd->dd_dbuf, tx); 1863168404Spjd 1864168404Spjd /* remove from old parent zapobj */ 1865249643Smm error = zap_remove(mos, dd->dd_parent->dd_phys->dd_child_dir_zapobj, 1866168404Spjd dd->dd_myname, tx); 1867249643Smm ASSERT0(error); 1868168404Spjd 1869249643Smm (void) strcpy(dd->dd_myname, mynewname); 1870249643Smm dsl_dir_rele(dd->dd_parent, dd); 1871249643Smm dd->dd_phys->dd_parent_obj = newparent->dd_object; 1872249643Smm VERIFY0(dsl_dir_hold_obj(dp, 1873249643Smm newparent->dd_object, NULL, dd, &dd->dd_parent)); 1874168404Spjd 1875168404Spjd /* add to new parent zapobj */ 1876249643Smm VERIFY0(zap_add(mos, newparent->dd_phys->dd_child_dir_zapobj, 1877249643Smm dd->dd_myname, 8, 1, &dd->dd_object, tx)); 1878249643Smm 1879249643Smm#ifdef __FreeBSD__ 1880219320Spjd#ifdef _KERNEL 1881249643Smm zfsvfs_update_fromname(ddra->ddra_oldname, ddra->ddra_newname); 1882249643Smm zvol_rename_minors(ddra->ddra_oldname, ddra->ddra_newname); 1883219320Spjd#endif 1884249643Smm#endif 1885185029Spjd 1886249643Smm dsl_prop_notify_all(dd); 1887249643Smm 1888249643Smm dsl_dir_rele(newparent, FTAG); 1889249643Smm dsl_dir_rele(dd, FTAG); 1890168404Spjd} 1891168404Spjd 1892168404Spjdint 1893249643Smmdsl_dir_rename(const char *oldname, const char *newname) 1894168404Spjd{ 1895249643Smm dsl_dir_rename_arg_t ddra; 1896168404Spjd 1897249643Smm ddra.ddra_oldname = oldname; 1898249643Smm ddra.ddra_newname = newname; 1899265754Sdelphij ddra.ddra_cred = CRED(); 1900168404Spjd 1901249643Smm return (dsl_sync_task(oldname, 1902249643Smm dsl_dir_rename_check, dsl_dir_rename_sync, &ddra, 3)); 1903168404Spjd} 1904168404Spjd 1905168404Spjdint 1906265754Sdelphijdsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd, 1907265754Sdelphij uint64_t fs_cnt, uint64_t ss_cnt, uint64_t space, cred_t *cr) 1908168404Spjd{ 1909168404Spjd dsl_dir_t *ancestor; 1910168404Spjd int64_t adelta; 1911168404Spjd uint64_t avail; 1912265754Sdelphij int err; 1913168404Spjd 1914168404Spjd ancestor = closest_common_ancestor(sdd, tdd); 1915168404Spjd adelta = would_change(sdd, -space, ancestor); 1916168404Spjd avail = dsl_dir_space_available(tdd, ancestor, adelta, FALSE); 1917168404Spjd if (avail < space) 1918249643Smm return (SET_ERROR(ENOSPC)); 1919168404Spjd 1920265754Sdelphij err = dsl_fs_ss_limit_check(tdd, fs_cnt, ZFS_PROP_FILESYSTEM_LIMIT, 1921265754Sdelphij ancestor, cr); 1922265754Sdelphij if (err != 0) 1923265754Sdelphij return (err); 1924265754Sdelphij err = dsl_fs_ss_limit_check(tdd, ss_cnt, ZFS_PROP_SNAPSHOT_LIMIT, 1925265754Sdelphij ancestor, cr); 1926265754Sdelphij if (err != 0) 1927265754Sdelphij return (err); 1928265754Sdelphij 1929168404Spjd return (0); 1930168404Spjd} 1931219089Spjd 1932219089Spjdtimestruc_t 1933219089Spjddsl_dir_snap_cmtime(dsl_dir_t *dd) 1934219089Spjd{ 1935219089Spjd timestruc_t t; 1936219089Spjd 1937219089Spjd mutex_enter(&dd->dd_lock); 1938219089Spjd t = dd->dd_snap_cmtime; 1939219089Spjd mutex_exit(&dd->dd_lock); 1940219089Spjd 1941219089Spjd return (t); 1942219089Spjd} 1943219089Spjd 1944219089Spjdvoid 1945219089Spjddsl_dir_snap_cmtime_update(dsl_dir_t *dd) 1946219089Spjd{ 1947219089Spjd timestruc_t t; 1948219089Spjd 1949219089Spjd gethrestime(&t); 1950219089Spjd mutex_enter(&dd->dd_lock); 1951219089Spjd dd->dd_snap_cmtime = t; 1952219089Spjd mutex_exit(&dd->dd_lock); 1953219089Spjd} 1954263391Sdelphij 1955263391Sdelphijvoid 1956263391Sdelphijdsl_dir_zapify(dsl_dir_t *dd, dmu_tx_t *tx) 1957263391Sdelphij{ 1958263391Sdelphij objset_t *mos = dd->dd_pool->dp_meta_objset; 1959263391Sdelphij dmu_object_zapify(mos, dd->dd_object, DMU_OT_DSL_DIR, tx); 1960263391Sdelphij} 1961265754Sdelphij 1962265754Sdelphijboolean_t 1963265754Sdelphijdsl_dir_is_zapified(dsl_dir_t *dd) 1964265754Sdelphij{ 1965265754Sdelphij dmu_object_info_t doi; 1966265754Sdelphij 1967265754Sdelphij dmu_object_info_from_db(dd->dd_dbuf, &doi); 1968265754Sdelphij return (doi.doi_type == DMU_OTN_ZAP_METADATA); 1969265754Sdelphij} 1970