1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
24 * Copyright (c) 2013 Steven Hartland. All rights reserved.
25 * Copyright (c) 2013 by Joyent, Inc. All rights reserved.
26 * Copyright (c) 2016 Actifio, Inc. All rights reserved.
27 */
28
29#include <sys/zfs_context.h>
30#include <sys/dsl_userhold.h>
31#include <sys/dsl_dataset.h>
32#include <sys/dsl_synctask.h>
33#include <sys/dsl_destroy.h>
34#include <sys/dsl_bookmark.h>
35#include <sys/dmu_tx.h>
36#include <sys/dsl_pool.h>
37#include <sys/dsl_dir.h>
38#include <sys/dmu_traverse.h>
39#include <sys/dsl_scan.h>
40#include <sys/dmu_objset.h>
41#include <sys/zap.h>
42#include <sys/zfeature.h>
43#include <sys/zfs_ioctl.h>
44#include <sys/dsl_deleg.h>
45#include <sys/dmu_impl.h>
46#include <sys/zvol.h>
47#include <sys/zcp.h>
48#include <sys/dsl_deadlist.h>
49#include <sys/zthr.h>
50#include <sys/spa_impl.h>
51
52extern int zfs_snapshot_history_enabled;
53
54int
55dsl_destroy_snapshot_check_impl(dsl_dataset_t *ds, boolean_t defer)
56{
57	if (!ds->ds_is_snapshot)
58		return (SET_ERROR(EINVAL));
59
60	if (dsl_dataset_long_held(ds))
61		return (SET_ERROR(EBUSY));
62
63	/*
64	 * Only allow deferred destroy on pools that support it.
65	 * NOTE: deferred destroy is only supported on snapshots.
66	 */
67	if (defer) {
68		if (spa_version(ds->ds_dir->dd_pool->dp_spa) <
69		    SPA_VERSION_USERREFS)
70			return (SET_ERROR(ENOTSUP));
71		return (0);
72	}
73
74	/*
75	 * If this snapshot has an elevated user reference count,
76	 * we can't destroy it yet.
77	 */
78	if (ds->ds_userrefs > 0)
79		return (SET_ERROR(EBUSY));
80
81	/*
82	 * Can't delete a branch point.
83	 */
84	if (dsl_dataset_phys(ds)->ds_num_children > 1)
85		return (SET_ERROR(EEXIST));
86
87	return (0);
88}
89
90int
91dsl_destroy_snapshot_check(void *arg, dmu_tx_t *tx)
92{
93	dsl_destroy_snapshot_arg_t *ddsa = arg;
94	const char *dsname = ddsa->ddsa_name;
95	boolean_t defer = ddsa->ddsa_defer;
96
97	dsl_pool_t *dp = dmu_tx_pool(tx);
98	int error = 0;
99	dsl_dataset_t *ds;
100
101	error = dsl_dataset_hold(dp, dsname, FTAG, &ds);
102
103	/*
104	 * If the snapshot does not exist, silently ignore it, and
105	 * dsl_destroy_snapshot_sync() will be a no-op
106	 * (it's "already destroyed").
107	 */
108	if (error == ENOENT)
109		return (0);
110
111	if (error == 0) {
112		error = dsl_destroy_snapshot_check_impl(ds, defer);
113		dsl_dataset_rele(ds, FTAG);
114	}
115
116	return (error);
117}
118
119struct process_old_arg {
120	dsl_dataset_t *ds;
121	dsl_dataset_t *ds_prev;
122	boolean_t after_branch_point;
123	zio_t *pio;
124	uint64_t used, comp, uncomp;
125};
126
127static int
128process_old_cb(void *arg, const blkptr_t *bp, boolean_t bp_freed, dmu_tx_t *tx)
129{
130	struct process_old_arg *poa = arg;
131	dsl_pool_t *dp = poa->ds->ds_dir->dd_pool;
132
133	ASSERT(!BP_IS_HOLE(bp));
134
135	if (BP_GET_LOGICAL_BIRTH(bp) <=
136	    dsl_dataset_phys(poa->ds)->ds_prev_snap_txg) {
137		dsl_deadlist_insert(&poa->ds->ds_deadlist, bp, bp_freed, tx);
138		if (poa->ds_prev && !poa->after_branch_point &&
139		    BP_GET_LOGICAL_BIRTH(bp) >
140		    dsl_dataset_phys(poa->ds_prev)->ds_prev_snap_txg) {
141			dsl_dataset_phys(poa->ds_prev)->ds_unique_bytes +=
142			    bp_get_dsize_sync(dp->dp_spa, bp);
143		}
144	} else {
145		poa->used += bp_get_dsize_sync(dp->dp_spa, bp);
146		poa->comp += BP_GET_PSIZE(bp);
147		poa->uncomp += BP_GET_UCSIZE(bp);
148		dsl_free_sync(poa->pio, dp, tx->tx_txg, bp);
149	}
150	return (0);
151}
152
153static void
154process_old_deadlist(dsl_dataset_t *ds, dsl_dataset_t *ds_prev,
155    dsl_dataset_t *ds_next, boolean_t after_branch_point, dmu_tx_t *tx)
156{
157	struct process_old_arg poa = { 0 };
158	dsl_pool_t *dp = ds->ds_dir->dd_pool;
159	objset_t *mos = dp->dp_meta_objset;
160	uint64_t deadlist_obj;
161
162	ASSERT(ds->ds_deadlist.dl_oldfmt);
163	ASSERT(ds_next->ds_deadlist.dl_oldfmt);
164
165	poa.ds = ds;
166	poa.ds_prev = ds_prev;
167	poa.after_branch_point = after_branch_point;
168	poa.pio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
169	VERIFY0(bpobj_iterate(&ds_next->ds_deadlist.dl_bpobj,
170	    process_old_cb, &poa, tx));
171	VERIFY0(zio_wait(poa.pio));
172	ASSERT3U(poa.used, ==, dsl_dataset_phys(ds)->ds_unique_bytes);
173
174	/* change snapused */
175	dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP,
176	    -poa.used, -poa.comp, -poa.uncomp, tx);
177
178	/* swap next's deadlist to our deadlist */
179	dsl_deadlist_close(&ds->ds_deadlist);
180	dsl_deadlist_close(&ds_next->ds_deadlist);
181	deadlist_obj = dsl_dataset_phys(ds)->ds_deadlist_obj;
182	dsl_dataset_phys(ds)->ds_deadlist_obj =
183	    dsl_dataset_phys(ds_next)->ds_deadlist_obj;
184	dsl_dataset_phys(ds_next)->ds_deadlist_obj = deadlist_obj;
185	dsl_deadlist_open(&ds->ds_deadlist, mos,
186	    dsl_dataset_phys(ds)->ds_deadlist_obj);
187	dsl_deadlist_open(&ds_next->ds_deadlist, mos,
188	    dsl_dataset_phys(ds_next)->ds_deadlist_obj);
189}
190
191typedef struct remaining_clones_key {
192	dsl_dataset_t *rck_clone;
193	list_node_t rck_node;
194} remaining_clones_key_t;
195
196static remaining_clones_key_t *
197rck_alloc(dsl_dataset_t *clone)
198{
199	remaining_clones_key_t *rck = kmem_alloc(sizeof (*rck), KM_SLEEP);
200	rck->rck_clone = clone;
201	return (rck);
202}
203
204static void
205dsl_dir_remove_clones_key_impl(dsl_dir_t *dd, uint64_t mintxg, dmu_tx_t *tx,
206    list_t *stack, const void *tag)
207{
208	objset_t *mos = dd->dd_pool->dp_meta_objset;
209
210	/*
211	 * If it is the old version, dd_clones doesn't exist so we can't
212	 * find the clones, but dsl_deadlist_remove_key() is a no-op so it
213	 * doesn't matter.
214	 */
215	if (dsl_dir_phys(dd)->dd_clones == 0)
216		return;
217
218	zap_cursor_t *zc = kmem_alloc(sizeof (zap_cursor_t), KM_SLEEP);
219	zap_attribute_t *za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
220
221	for (zap_cursor_init(zc, mos, dsl_dir_phys(dd)->dd_clones);
222	    zap_cursor_retrieve(zc, za) == 0;
223	    zap_cursor_advance(zc)) {
224		dsl_dataset_t *clone;
225
226		VERIFY0(dsl_dataset_hold_obj(dd->dd_pool,
227		    za->za_first_integer, tag, &clone));
228
229		if (clone->ds_dir->dd_origin_txg > mintxg) {
230			dsl_deadlist_remove_key(&clone->ds_deadlist,
231			    mintxg, tx);
232
233			if (dsl_dataset_remap_deadlist_exists(clone)) {
234				dsl_deadlist_remove_key(
235				    &clone->ds_remap_deadlist, mintxg, tx);
236			}
237
238			list_insert_head(stack, rck_alloc(clone));
239		} else {
240			dsl_dataset_rele(clone, tag);
241		}
242	}
243	zap_cursor_fini(zc);
244
245	kmem_free(za, sizeof (zap_attribute_t));
246	kmem_free(zc, sizeof (zap_cursor_t));
247}
248
249void
250dsl_dir_remove_clones_key(dsl_dir_t *top_dd, uint64_t mintxg, dmu_tx_t *tx)
251{
252	list_t stack;
253
254	list_create(&stack, sizeof (remaining_clones_key_t),
255	    offsetof(remaining_clones_key_t, rck_node));
256
257	dsl_dir_remove_clones_key_impl(top_dd, mintxg, tx, &stack, FTAG);
258	for (remaining_clones_key_t *rck = list_remove_head(&stack);
259	    rck != NULL; rck = list_remove_head(&stack)) {
260		dsl_dataset_t *clone = rck->rck_clone;
261		dsl_dir_t *clone_dir = clone->ds_dir;
262
263		kmem_free(rck, sizeof (*rck));
264
265		dsl_dir_remove_clones_key_impl(clone_dir, mintxg, tx,
266		    &stack, FTAG);
267		dsl_dataset_rele(clone, FTAG);
268	}
269
270	list_destroy(&stack);
271}
272
273static void
274dsl_destroy_snapshot_handle_remaps(dsl_dataset_t *ds, dsl_dataset_t *ds_next,
275    dmu_tx_t *tx)
276{
277	dsl_pool_t *dp = ds->ds_dir->dd_pool;
278
279	/* Move blocks to be obsoleted to pool's obsolete list. */
280	if (dsl_dataset_remap_deadlist_exists(ds_next)) {
281		if (!bpobj_is_open(&dp->dp_obsolete_bpobj))
282			dsl_pool_create_obsolete_bpobj(dp, tx);
283
284		dsl_deadlist_move_bpobj(&ds_next->ds_remap_deadlist,
285		    &dp->dp_obsolete_bpobj,
286		    dsl_dataset_phys(ds)->ds_prev_snap_txg, tx);
287	}
288
289	/* Merge our deadlist into next's and free it. */
290	if (dsl_dataset_remap_deadlist_exists(ds)) {
291		uint64_t remap_deadlist_object =
292		    dsl_dataset_get_remap_deadlist_object(ds);
293		ASSERT(remap_deadlist_object != 0);
294
295		mutex_enter(&ds_next->ds_remap_deadlist_lock);
296		if (!dsl_dataset_remap_deadlist_exists(ds_next))
297			dsl_dataset_create_remap_deadlist(ds_next, tx);
298		mutex_exit(&ds_next->ds_remap_deadlist_lock);
299
300		dsl_deadlist_merge(&ds_next->ds_remap_deadlist,
301		    remap_deadlist_object, tx);
302		dsl_dataset_destroy_remap_deadlist(ds, tx);
303	}
304}
305
306void
307dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx)
308{
309	int after_branch_point = FALSE;
310	dsl_pool_t *dp = ds->ds_dir->dd_pool;
311	objset_t *mos = dp->dp_meta_objset;
312	dsl_dataset_t *ds_prev = NULL;
313	uint64_t obj;
314
315	ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
316	rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
317	ASSERT3U(BP_GET_LOGICAL_BIRTH(&dsl_dataset_phys(ds)->ds_bp), <=,
318	    tx->tx_txg);
319	rrw_exit(&ds->ds_bp_rwlock, FTAG);
320	ASSERT(zfs_refcount_is_zero(&ds->ds_longholds));
321
322	if (defer &&
323	    (ds->ds_userrefs > 0 ||
324	    dsl_dataset_phys(ds)->ds_num_children > 1)) {
325		ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS);
326		dmu_buf_will_dirty(ds->ds_dbuf, tx);
327		dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_DEFER_DESTROY;
328		if (zfs_snapshot_history_enabled) {
329			spa_history_log_internal_ds(ds, "defer_destroy", tx,
330			    " ");
331		}
332		return;
333	}
334
335	ASSERT3U(dsl_dataset_phys(ds)->ds_num_children, <=, 1);
336
337	if (zfs_snapshot_history_enabled) {
338		/* We need to log before removing it from the namespace. */
339		spa_history_log_internal_ds(ds, "destroy", tx, " ");
340	}
341
342	dsl_scan_ds_destroyed(ds, tx);
343
344	obj = ds->ds_object;
345
346	boolean_t book_exists = dsl_bookmark_ds_destroyed(ds, tx);
347
348	for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
349		if (dsl_dataset_feature_is_active(ds, f))
350			dsl_dataset_deactivate_feature(ds, f, tx);
351	}
352	if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) {
353		ASSERT3P(ds->ds_prev, ==, NULL);
354		VERIFY0(dsl_dataset_hold_obj(dp,
355		    dsl_dataset_phys(ds)->ds_prev_snap_obj, FTAG, &ds_prev));
356		after_branch_point =
357		    (dsl_dataset_phys(ds_prev)->ds_next_snap_obj != obj);
358
359		dmu_buf_will_dirty(ds_prev->ds_dbuf, tx);
360		if (after_branch_point &&
361		    dsl_dataset_phys(ds_prev)->ds_next_clones_obj != 0) {
362			dsl_dataset_remove_from_next_clones(ds_prev, obj, tx);
363			if (dsl_dataset_phys(ds)->ds_next_snap_obj != 0) {
364				VERIFY0(zap_add_int(mos,
365				    dsl_dataset_phys(ds_prev)->
366				    ds_next_clones_obj,
367				    dsl_dataset_phys(ds)->ds_next_snap_obj,
368				    tx));
369			}
370		}
371		if (!after_branch_point) {
372			dsl_dataset_phys(ds_prev)->ds_next_snap_obj =
373			    dsl_dataset_phys(ds)->ds_next_snap_obj;
374		}
375	}
376
377	dsl_dataset_t *ds_next;
378	uint64_t old_unique;
379	uint64_t used = 0, comp = 0, uncomp = 0;
380
381	VERIFY0(dsl_dataset_hold_obj(dp,
382	    dsl_dataset_phys(ds)->ds_next_snap_obj, FTAG, &ds_next));
383	ASSERT3U(dsl_dataset_phys(ds_next)->ds_prev_snap_obj, ==, obj);
384
385	old_unique = dsl_dataset_phys(ds_next)->ds_unique_bytes;
386
387	dmu_buf_will_dirty(ds_next->ds_dbuf, tx);
388	dsl_dataset_phys(ds_next)->ds_prev_snap_obj =
389	    dsl_dataset_phys(ds)->ds_prev_snap_obj;
390	dsl_dataset_phys(ds_next)->ds_prev_snap_txg =
391	    dsl_dataset_phys(ds)->ds_prev_snap_txg;
392	ASSERT3U(dsl_dataset_phys(ds)->ds_prev_snap_txg, ==,
393	    ds_prev ? dsl_dataset_phys(ds_prev)->ds_creation_txg : 0);
394
395	if (ds_next->ds_deadlist.dl_oldfmt) {
396		process_old_deadlist(ds, ds_prev, ds_next,
397		    after_branch_point, tx);
398	} else {
399		/* Adjust prev's unique space. */
400		if (ds_prev && !after_branch_point) {
401			dsl_deadlist_space_range(&ds_next->ds_deadlist,
402			    dsl_dataset_phys(ds_prev)->ds_prev_snap_txg,
403			    dsl_dataset_phys(ds)->ds_prev_snap_txg,
404			    &used, &comp, &uncomp);
405			dsl_dataset_phys(ds_prev)->ds_unique_bytes += used;
406		}
407
408		/* Adjust snapused. */
409		dsl_deadlist_space_range(&ds_next->ds_deadlist,
410		    dsl_dataset_phys(ds)->ds_prev_snap_txg, UINT64_MAX,
411		    &used, &comp, &uncomp);
412		dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP,
413		    -used, -comp, -uncomp, tx);
414
415		/* Move blocks to be freed to pool's free list. */
416		dsl_deadlist_move_bpobj(&ds_next->ds_deadlist,
417		    &dp->dp_free_bpobj, dsl_dataset_phys(ds)->ds_prev_snap_txg,
418		    tx);
419		dsl_dir_diduse_space(tx->tx_pool->dp_free_dir,
420		    DD_USED_HEAD, used, comp, uncomp, tx);
421
422		/* Merge our deadlist into next's and free it. */
423		dsl_deadlist_merge(&ds_next->ds_deadlist,
424		    dsl_dataset_phys(ds)->ds_deadlist_obj, tx);
425
426		/*
427		 * We are done with the deadlist tree (generated/used
428		 * by dsl_deadlist_move_bpobj() and dsl_deadlist_merge()).
429		 * Discard it to save memory.
430		 */
431		dsl_deadlist_discard_tree(&ds_next->ds_deadlist);
432	}
433
434	dsl_deadlist_close(&ds->ds_deadlist);
435	dsl_deadlist_free(mos, dsl_dataset_phys(ds)->ds_deadlist_obj, tx);
436	dmu_buf_will_dirty(ds->ds_dbuf, tx);
437	dsl_dataset_phys(ds)->ds_deadlist_obj = 0;
438
439	dsl_destroy_snapshot_handle_remaps(ds, ds_next, tx);
440
441	if (!book_exists) {
442		/* Collapse range in clone heads */
443		dsl_dir_remove_clones_key(ds->ds_dir,
444		    dsl_dataset_phys(ds)->ds_creation_txg, tx);
445	}
446
447	if (ds_next->ds_is_snapshot) {
448		dsl_dataset_t *ds_nextnext;
449
450		/*
451		 * Update next's unique to include blocks which
452		 * were previously shared by only this snapshot
453		 * and it.  Those blocks will be born after the
454		 * prev snap and before this snap, and will have
455		 * died after the next snap and before the one
456		 * after that (ie. be on the snap after next's
457		 * deadlist).
458		 */
459		VERIFY0(dsl_dataset_hold_obj(dp,
460		    dsl_dataset_phys(ds_next)->ds_next_snap_obj,
461		    FTAG, &ds_nextnext));
462		dsl_deadlist_space_range(&ds_nextnext->ds_deadlist,
463		    dsl_dataset_phys(ds)->ds_prev_snap_txg,
464		    dsl_dataset_phys(ds)->ds_creation_txg,
465		    &used, &comp, &uncomp);
466		dsl_dataset_phys(ds_next)->ds_unique_bytes += used;
467		dsl_dataset_rele(ds_nextnext, FTAG);
468		ASSERT3P(ds_next->ds_prev, ==, NULL);
469
470		/* Collapse range in this head. */
471		dsl_dataset_t *hds;
472		VERIFY0(dsl_dataset_hold_obj(dp,
473		    dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj,
474		    FTAG, &hds));
475		if (!book_exists) {
476			/* Collapse range in this head. */
477			dsl_deadlist_remove_key(&hds->ds_deadlist,
478			    dsl_dataset_phys(ds)->ds_creation_txg, tx);
479		}
480		if (dsl_dataset_remap_deadlist_exists(hds)) {
481			dsl_deadlist_remove_key(&hds->ds_remap_deadlist,
482			    dsl_dataset_phys(ds)->ds_creation_txg, tx);
483		}
484		dsl_dataset_rele(hds, FTAG);
485
486	} else {
487		ASSERT3P(ds_next->ds_prev, ==, ds);
488		dsl_dataset_rele(ds_next->ds_prev, ds_next);
489		ds_next->ds_prev = NULL;
490		if (ds_prev) {
491			VERIFY0(dsl_dataset_hold_obj(dp,
492			    dsl_dataset_phys(ds)->ds_prev_snap_obj,
493			    ds_next, &ds_next->ds_prev));
494		}
495
496		dsl_dataset_recalc_head_uniq(ds_next);
497
498		/*
499		 * Reduce the amount of our unconsumed refreservation
500		 * being charged to our parent by the amount of
501		 * new unique data we have gained.
502		 */
503		if (old_unique < ds_next->ds_reserved) {
504			int64_t mrsdelta;
505			uint64_t new_unique =
506			    dsl_dataset_phys(ds_next)->ds_unique_bytes;
507
508			ASSERT(old_unique <= new_unique);
509			mrsdelta = MIN(new_unique - old_unique,
510			    ds_next->ds_reserved - old_unique);
511			dsl_dir_diduse_space(ds->ds_dir,
512			    DD_USED_REFRSRV, -mrsdelta, 0, 0, tx);
513		}
514	}
515	dsl_dataset_rele(ds_next, FTAG);
516
517	/*
518	 * This must be done after the dsl_traverse(), because it will
519	 * re-open the objset.
520	 */
521	if (ds->ds_objset) {
522		dmu_objset_evict(ds->ds_objset);
523		ds->ds_objset = NULL;
524	}
525
526	/* remove from snapshot namespace */
527	dsl_dataset_t *ds_head;
528	ASSERT(dsl_dataset_phys(ds)->ds_snapnames_zapobj == 0);
529	VERIFY0(dsl_dataset_hold_obj(dp,
530	    dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj, FTAG, &ds_head));
531	VERIFY0(dsl_dataset_get_snapname(ds));
532#ifdef ZFS_DEBUG
533	{
534		uint64_t val;
535		int err;
536
537		err = dsl_dataset_snap_lookup(ds_head,
538		    ds->ds_snapname, &val);
539		ASSERT0(err);
540		ASSERT3U(val, ==, obj);
541	}
542#endif
543	VERIFY0(dsl_dataset_snap_remove(ds_head, ds->ds_snapname, tx, B_TRUE));
544	dsl_dataset_rele(ds_head, FTAG);
545
546	if (ds_prev != NULL)
547		dsl_dataset_rele(ds_prev, FTAG);
548
549	spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx);
550
551	if (dsl_dataset_phys(ds)->ds_next_clones_obj != 0) {
552		uint64_t count __maybe_unused;
553		ASSERT0(zap_count(mos,
554		    dsl_dataset_phys(ds)->ds_next_clones_obj, &count) &&
555		    count == 0);
556		VERIFY0(dmu_object_free(mos,
557		    dsl_dataset_phys(ds)->ds_next_clones_obj, tx));
558	}
559	if (dsl_dataset_phys(ds)->ds_props_obj != 0)
560		VERIFY0(zap_destroy(mos, dsl_dataset_phys(ds)->ds_props_obj,
561		    tx));
562	if (dsl_dataset_phys(ds)->ds_userrefs_obj != 0)
563		VERIFY0(zap_destroy(mos, dsl_dataset_phys(ds)->ds_userrefs_obj,
564		    tx));
565	dsl_dir_rele(ds->ds_dir, ds);
566	ds->ds_dir = NULL;
567	dmu_object_free_zapified(mos, obj, tx);
568}
569
570void
571dsl_destroy_snapshot_sync(void *arg, dmu_tx_t *tx)
572{
573	dsl_destroy_snapshot_arg_t *ddsa = arg;
574	const char *dsname = ddsa->ddsa_name;
575	boolean_t defer = ddsa->ddsa_defer;
576
577	dsl_pool_t *dp = dmu_tx_pool(tx);
578	dsl_dataset_t *ds;
579
580	int error = dsl_dataset_hold(dp, dsname, FTAG, &ds);
581	if (error == ENOENT)
582		return;
583	ASSERT0(error);
584	dsl_destroy_snapshot_sync_impl(ds, defer, tx);
585	zvol_remove_minors(dp->dp_spa, dsname, B_TRUE);
586	dsl_dataset_rele(ds, FTAG);
587}
588
589/*
590 * The semantics of this function are described in the comment above
591 * lzc_destroy_snaps().  To summarize:
592 *
593 * The snapshots must all be in the same pool.
594 *
595 * Snapshots that don't exist will be silently ignored (considered to be
596 * "already deleted").
597 *
598 * On success, all snaps will be destroyed and this will return 0.
599 * On failure, no snaps will be destroyed, the errlist will be filled in,
600 * and this will return an errno.
601 */
602int
603dsl_destroy_snapshots_nvl(nvlist_t *snaps, boolean_t defer,
604    nvlist_t *errlist)
605{
606	if (nvlist_next_nvpair(snaps, NULL) == NULL)
607		return (0);
608
609	/*
610	 * lzc_destroy_snaps() is documented to take an nvlist whose
611	 * values "don't matter".  We need to convert that nvlist to
612	 * one that we know can be converted to LUA.
613	 */
614	nvlist_t *snaps_normalized = fnvlist_alloc();
615	for (nvpair_t *pair = nvlist_next_nvpair(snaps, NULL);
616	    pair != NULL; pair = nvlist_next_nvpair(snaps, pair)) {
617		fnvlist_add_boolean_value(snaps_normalized,
618		    nvpair_name(pair), B_TRUE);
619	}
620
621	nvlist_t *arg = fnvlist_alloc();
622	fnvlist_add_nvlist(arg, "snaps", snaps_normalized);
623	fnvlist_free(snaps_normalized);
624	fnvlist_add_boolean_value(arg, "defer", defer);
625
626	nvlist_t *wrapper = fnvlist_alloc();
627	fnvlist_add_nvlist(wrapper, ZCP_ARG_ARGLIST, arg);
628	fnvlist_free(arg);
629
630	const char *program =
631	    "arg = ...\n"
632	    "snaps = arg['snaps']\n"
633	    "defer = arg['defer']\n"
634	    "errors = { }\n"
635	    "has_errors = false\n"
636	    "for snap, v in pairs(snaps) do\n"
637	    "    errno = zfs.check.destroy{snap, defer=defer}\n"
638	    "    zfs.debug('snap: ' .. snap .. ' errno: ' .. errno)\n"
639	    "    if errno == ENOENT then\n"
640	    "        snaps[snap] = nil\n"
641	    "    elseif errno ~= 0 then\n"
642	    "        errors[snap] = errno\n"
643	    "        has_errors = true\n"
644	    "    end\n"
645	    "end\n"
646	    "if has_errors then\n"
647	    "    return errors\n"
648	    "end\n"
649	    "for snap, v in pairs(snaps) do\n"
650	    "    errno = zfs.sync.destroy{snap, defer=defer}\n"
651	    "    assert(errno == 0)\n"
652	    "end\n"
653	    "return { }\n";
654
655	nvlist_t *result = fnvlist_alloc();
656	int error = zcp_eval(nvpair_name(nvlist_next_nvpair(snaps, NULL)),
657	    program,
658	    B_TRUE,
659	    0,
660	    zfs_lua_max_memlimit,
661	    fnvlist_lookup_nvpair(wrapper, ZCP_ARG_ARGLIST), result);
662	if (error != 0) {
663		const char *errorstr = NULL;
664		(void) nvlist_lookup_string(result, ZCP_RET_ERROR, &errorstr);
665		if (errorstr != NULL) {
666			zfs_dbgmsg("%s", errorstr);
667		}
668		fnvlist_free(wrapper);
669		fnvlist_free(result);
670		return (error);
671	}
672	fnvlist_free(wrapper);
673
674	/*
675	 * lzc_destroy_snaps() is documented to fill the errlist with
676	 * int32 values, so we need to convert the int64 values that are
677	 * returned from LUA.
678	 */
679	int rv = 0;
680	nvlist_t *errlist_raw = fnvlist_lookup_nvlist(result, ZCP_RET_RETURN);
681	for (nvpair_t *pair = nvlist_next_nvpair(errlist_raw, NULL);
682	    pair != NULL; pair = nvlist_next_nvpair(errlist_raw, pair)) {
683		int32_t val = (int32_t)fnvpair_value_int64(pair);
684		if (rv == 0)
685			rv = val;
686		fnvlist_add_int32(errlist, nvpair_name(pair), val);
687	}
688	fnvlist_free(result);
689	return (rv);
690}
691
692int
693dsl_destroy_snapshot(const char *name, boolean_t defer)
694{
695	int error;
696	nvlist_t *nvl = fnvlist_alloc();
697	nvlist_t *errlist = fnvlist_alloc();
698
699	fnvlist_add_boolean(nvl, name);
700	error = dsl_destroy_snapshots_nvl(nvl, defer, errlist);
701	fnvlist_free(errlist);
702	fnvlist_free(nvl);
703	return (error);
704}
705
706struct killarg {
707	dsl_dataset_t *ds;
708	dmu_tx_t *tx;
709};
710
711static int
712kill_blkptr(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
713    const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
714{
715	(void) spa, (void) dnp;
716	struct killarg *ka = arg;
717	dmu_tx_t *tx = ka->tx;
718
719	if (zb->zb_level == ZB_DNODE_LEVEL || BP_IS_HOLE(bp) ||
720	    BP_IS_EMBEDDED(bp))
721		return (0);
722
723	if (zb->zb_level == ZB_ZIL_LEVEL) {
724		ASSERT(zilog != NULL);
725		/*
726		 * It's a block in the intent log.  It has no
727		 * accounting, so just free it.
728		 */
729		dsl_free(ka->tx->tx_pool, ka->tx->tx_txg, bp);
730	} else {
731		ASSERT(zilog == NULL);
732		ASSERT3U(BP_GET_LOGICAL_BIRTH(bp), >,
733		    dsl_dataset_phys(ka->ds)->ds_prev_snap_txg);
734		(void) dsl_dataset_block_kill(ka->ds, bp, tx, B_FALSE);
735	}
736
737	return (0);
738}
739
740static void
741old_synchronous_dataset_destroy(dsl_dataset_t *ds, dmu_tx_t *tx)
742{
743	struct killarg ka;
744
745	spa_history_log_internal_ds(ds, "destroy", tx,
746	    "(synchronous, mintxg=%llu)",
747	    (long long)dsl_dataset_phys(ds)->ds_prev_snap_txg);
748
749	/*
750	 * Free everything that we point to (that's born after
751	 * the previous snapshot, if we are a clone)
752	 *
753	 * NB: this should be very quick, because we already
754	 * freed all the objects in open context.
755	 */
756	ka.ds = ds;
757	ka.tx = tx;
758	VERIFY0(traverse_dataset(ds,
759	    dsl_dataset_phys(ds)->ds_prev_snap_txg, TRAVERSE_POST |
760	    TRAVERSE_NO_DECRYPT, kill_blkptr, &ka));
761	ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) ||
762	    dsl_dataset_phys(ds)->ds_unique_bytes == 0);
763}
764
765int
766dsl_destroy_head_check_impl(dsl_dataset_t *ds, int expected_holds)
767{
768	int error;
769	uint64_t count;
770	objset_t *mos;
771
772	ASSERT(!ds->ds_is_snapshot);
773	if (ds->ds_is_snapshot)
774		return (SET_ERROR(EINVAL));
775
776	if (zfs_refcount_count(&ds->ds_longholds) != expected_holds)
777		return (SET_ERROR(EBUSY));
778
779	ASSERT0(ds->ds_dir->dd_activity_waiters);
780
781	mos = ds->ds_dir->dd_pool->dp_meta_objset;
782
783	/*
784	 * Can't delete a head dataset if there are snapshots of it.
785	 * (Except if the only snapshots are from the branch we cloned
786	 * from.)
787	 */
788	if (ds->ds_prev != NULL &&
789	    dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj == ds->ds_object)
790		return (SET_ERROR(EBUSY));
791
792	/*
793	 * Can't delete if there are children of this fs.
794	 */
795	error = zap_count(mos,
796	    dsl_dir_phys(ds->ds_dir)->dd_child_dir_zapobj, &count);
797	if (error != 0)
798		return (error);
799	if (count != 0)
800		return (SET_ERROR(EEXIST));
801
802	if (dsl_dir_is_clone(ds->ds_dir) && DS_IS_DEFER_DESTROY(ds->ds_prev) &&
803	    dsl_dataset_phys(ds->ds_prev)->ds_num_children == 2 &&
804	    ds->ds_prev->ds_userrefs == 0) {
805		/* We need to remove the origin snapshot as well. */
806		if (!zfs_refcount_is_zero(&ds->ds_prev->ds_longholds))
807			return (SET_ERROR(EBUSY));
808	}
809	return (0);
810}
811
812int
813dsl_destroy_head_check(void *arg, dmu_tx_t *tx)
814{
815	dsl_destroy_head_arg_t *ddha = arg;
816	dsl_pool_t *dp = dmu_tx_pool(tx);
817	dsl_dataset_t *ds;
818	int error;
819
820	error = dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds);
821	if (error != 0)
822		return (error);
823
824	error = dsl_destroy_head_check_impl(ds, 0);
825	dsl_dataset_rele(ds, FTAG);
826	return (error);
827}
828
829static void
830dsl_dir_destroy_sync(uint64_t ddobj, dmu_tx_t *tx)
831{
832	dsl_dir_t *dd;
833	dsl_pool_t *dp = dmu_tx_pool(tx);
834	objset_t *mos = dp->dp_meta_objset;
835	dd_used_t t;
836
837	ASSERT(RRW_WRITE_HELD(&dmu_tx_pool(tx)->dp_config_rwlock));
838
839	VERIFY0(dsl_dir_hold_obj(dp, ddobj, NULL, FTAG, &dd));
840
841	ASSERT0(dsl_dir_phys(dd)->dd_head_dataset_obj);
842
843	/* Decrement the filesystem count for all parent filesystems. */
844	if (dd->dd_parent != NULL)
845		dsl_fs_ss_count_adjust(dd->dd_parent, -1,
846		    DD_FIELD_FILESYSTEM_COUNT, tx);
847
848	/*
849	 * Remove our reservation. The impl() routine avoids setting the
850	 * actual property, which would require the (already destroyed) ds.
851	 */
852	dsl_dir_set_reservation_sync_impl(dd, 0, tx);
853
854	ASSERT0(dsl_dir_phys(dd)->dd_used_bytes);
855	ASSERT0(dsl_dir_phys(dd)->dd_reserved);
856	for (t = 0; t < DD_USED_NUM; t++)
857		ASSERT0(dsl_dir_phys(dd)->dd_used_breakdown[t]);
858
859	if (dd->dd_crypto_obj != 0) {
860		dsl_crypto_key_destroy_sync(dd->dd_crypto_obj, tx);
861		(void) spa_keystore_unload_wkey_impl(dp->dp_spa, dd->dd_object);
862	}
863
864	VERIFY0(zap_destroy(mos, dsl_dir_phys(dd)->dd_child_dir_zapobj, tx));
865	VERIFY0(zap_destroy(mos, dsl_dir_phys(dd)->dd_props_zapobj, tx));
866	if (dsl_dir_phys(dd)->dd_clones != 0)
867		VERIFY0(zap_destroy(mos, dsl_dir_phys(dd)->dd_clones, tx));
868	VERIFY0(dsl_deleg_destroy(mos, dsl_dir_phys(dd)->dd_deleg_zapobj, tx));
869	VERIFY0(zap_remove(mos,
870	    dsl_dir_phys(dd->dd_parent)->dd_child_dir_zapobj,
871	    dd->dd_myname, tx));
872
873	dsl_dir_rele(dd, FTAG);
874	dmu_object_free_zapified(mos, ddobj, tx);
875}
876
877static void
878dsl_clone_destroy_assert(dsl_dir_t *dd)
879{
880	uint64_t used, comp, uncomp;
881
882	ASSERT(dsl_dir_is_clone(dd));
883	dsl_deadlist_space(&dd->dd_livelist, &used, &comp, &uncomp);
884
885	ASSERT3U(dsl_dir_phys(dd)->dd_used_bytes, ==, used);
886	ASSERT3U(dsl_dir_phys(dd)->dd_compressed_bytes, ==, comp);
887	/*
888	 * Greater than because we do not track embedded block pointers in
889	 * the livelist
890	 */
891	ASSERT3U(dsl_dir_phys(dd)->dd_uncompressed_bytes, >=, uncomp);
892
893	ASSERT(list_is_empty(&dd->dd_pending_allocs.bpl_list));
894	ASSERT(list_is_empty(&dd->dd_pending_frees.bpl_list));
895}
896
897/*
898 * Start the delete process for a clone. Free its zil, verify the space usage
899 * and queue the blkptrs for deletion by adding the livelist to the pool-wide
900 * delete queue.
901 */
902static void
903dsl_async_clone_destroy(dsl_dataset_t *ds, dmu_tx_t *tx)
904{
905	uint64_t zap_obj, to_delete, used, comp, uncomp;
906	objset_t *os;
907	dsl_dir_t *dd = ds->ds_dir;
908	dsl_pool_t *dp = dmu_tx_pool(tx);
909	objset_t *mos = dp->dp_meta_objset;
910	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
911	VERIFY0(dmu_objset_from_ds(ds, &os));
912
913	uint64_t mintxg = 0;
914	dsl_deadlist_entry_t *dle = dsl_deadlist_first(&dd->dd_livelist);
915	if (dle != NULL)
916		mintxg = dle->dle_mintxg;
917
918	spa_history_log_internal_ds(ds, "destroy", tx,
919	    "(livelist, mintxg=%llu)", (long long)mintxg);
920
921	/* Check that the clone is in a correct state to be deleted */
922	dsl_clone_destroy_assert(dd);
923
924	/* Destroy the zil */
925	zil_destroy_sync(dmu_objset_zil(os), tx);
926
927	VERIFY0(zap_lookup(mos, dd->dd_object,
928	    DD_FIELD_LIVELIST, sizeof (uint64_t), 1, &to_delete));
929	/* Initialize deleted_clones entry to track livelists to cleanup */
930	int error = zap_lookup(mos, DMU_POOL_DIRECTORY_OBJECT,
931	    DMU_POOL_DELETED_CLONES, sizeof (uint64_t), 1, &zap_obj);
932	if (error == ENOENT) {
933		zap_obj = zap_create(mos, DMU_OTN_ZAP_METADATA,
934		    DMU_OT_NONE, 0, tx);
935		VERIFY0(zap_add(mos, DMU_POOL_DIRECTORY_OBJECT,
936		    DMU_POOL_DELETED_CLONES, sizeof (uint64_t), 1,
937		    &(zap_obj), tx));
938		spa->spa_livelists_to_delete = zap_obj;
939	} else if (error != 0) {
940		zfs_panic_recover("zfs: error %d was returned while looking "
941		    "up DMU_POOL_DELETED_CLONES in the zap", error);
942		return;
943	}
944	VERIFY0(zap_add_int(mos, zap_obj, to_delete, tx));
945
946	/* Clone is no longer using space, now tracked by dp_free_dir */
947	dsl_deadlist_space(&dd->dd_livelist, &used, &comp, &uncomp);
948	dsl_dir_diduse_space(dd, DD_USED_HEAD,
949	    -used, -comp, -dsl_dir_phys(dd)->dd_uncompressed_bytes,
950	    tx);
951	dsl_dir_diduse_space(dp->dp_free_dir, DD_USED_HEAD,
952	    used, comp, uncomp, tx);
953	dsl_dir_remove_livelist(dd, tx, B_FALSE);
954	zthr_wakeup(spa->spa_livelist_delete_zthr);
955}
956
957/*
958 * Move the bptree into the pool's list of trees to clean up, update space
959 * accounting information and destroy the zil.
960 */
961static void
962dsl_async_dataset_destroy(dsl_dataset_t *ds, dmu_tx_t *tx)
963{
964	uint64_t used, comp, uncomp;
965	objset_t *os;
966
967	VERIFY0(dmu_objset_from_ds(ds, &os));
968	dsl_pool_t *dp = dmu_tx_pool(tx);
969	objset_t *mos = dp->dp_meta_objset;
970
971	spa_history_log_internal_ds(ds, "destroy", tx,
972	    "(bptree, mintxg=%llu)",
973	    (long long)dsl_dataset_phys(ds)->ds_prev_snap_txg);
974
975	zil_destroy_sync(dmu_objset_zil(os), tx);
976
977	if (!spa_feature_is_active(dp->dp_spa,
978	    SPA_FEATURE_ASYNC_DESTROY)) {
979		dsl_scan_t *scn = dp->dp_scan;
980		spa_feature_incr(dp->dp_spa, SPA_FEATURE_ASYNC_DESTROY,
981		    tx);
982		dp->dp_bptree_obj = bptree_alloc(mos, tx);
983		VERIFY0(zap_add(mos,
984		    DMU_POOL_DIRECTORY_OBJECT,
985		    DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1,
986		    &dp->dp_bptree_obj, tx));
987		ASSERT(!scn->scn_async_destroying);
988		scn->scn_async_destroying = B_TRUE;
989	}
990
991	used = dsl_dir_phys(ds->ds_dir)->dd_used_bytes;
992	comp = dsl_dir_phys(ds->ds_dir)->dd_compressed_bytes;
993	uncomp = dsl_dir_phys(ds->ds_dir)->dd_uncompressed_bytes;
994
995	ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) ||
996	    dsl_dataset_phys(ds)->ds_unique_bytes == used);
997
998	rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
999	bptree_add(mos, dp->dp_bptree_obj,
1000	    &dsl_dataset_phys(ds)->ds_bp,
1001	    dsl_dataset_phys(ds)->ds_prev_snap_txg,
1002	    used, comp, uncomp, tx);
1003	rrw_exit(&ds->ds_bp_rwlock, FTAG);
1004	dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD,
1005	    -used, -comp, -uncomp, tx);
1006	dsl_dir_diduse_space(dp->dp_free_dir, DD_USED_HEAD,
1007	    used, comp, uncomp, tx);
1008}
1009
1010void
1011dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx)
1012{
1013	dsl_pool_t *dp = dmu_tx_pool(tx);
1014	objset_t *mos = dp->dp_meta_objset;
1015	uint64_t obj, ddobj, prevobj = 0;
1016	boolean_t rmorigin;
1017
1018	ASSERT3U(dsl_dataset_phys(ds)->ds_num_children, <=, 1);
1019	ASSERT(ds->ds_prev == NULL ||
1020	    dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj != ds->ds_object);
1021	rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
1022	ASSERT3U(BP_GET_LOGICAL_BIRTH(&dsl_dataset_phys(ds)->ds_bp), <=,
1023	    tx->tx_txg);
1024	rrw_exit(&ds->ds_bp_rwlock, FTAG);
1025	ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
1026
1027	dsl_dir_cancel_waiters(ds->ds_dir);
1028
1029	rmorigin = (dsl_dir_is_clone(ds->ds_dir) &&
1030	    DS_IS_DEFER_DESTROY(ds->ds_prev) &&
1031	    dsl_dataset_phys(ds->ds_prev)->ds_num_children == 2 &&
1032	    ds->ds_prev->ds_userrefs == 0);
1033
1034	/* Remove our reservation. */
1035	if (ds->ds_reserved != 0) {
1036		dsl_dataset_set_refreservation_sync_impl(ds,
1037		    (ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED),
1038		    0, tx);
1039		ASSERT0(ds->ds_reserved);
1040	}
1041
1042	obj = ds->ds_object;
1043
1044	for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
1045		if (dsl_dataset_feature_is_active(ds, f))
1046			dsl_dataset_deactivate_feature(ds, f, tx);
1047	}
1048
1049	dsl_scan_ds_destroyed(ds, tx);
1050
1051	if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) {
1052		/* This is a clone */
1053		ASSERT(ds->ds_prev != NULL);
1054		ASSERT3U(dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj, !=,
1055		    obj);
1056		ASSERT0(dsl_dataset_phys(ds)->ds_next_snap_obj);
1057
1058		dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
1059		if (dsl_dataset_phys(ds->ds_prev)->ds_next_clones_obj != 0) {
1060			dsl_dataset_remove_from_next_clones(ds->ds_prev,
1061			    obj, tx);
1062		}
1063
1064		ASSERT3U(dsl_dataset_phys(ds->ds_prev)->ds_num_children, >, 1);
1065		dsl_dataset_phys(ds->ds_prev)->ds_num_children--;
1066	}
1067
1068	/*
1069	 * Destroy the deadlist. Unless it's a clone, the
1070	 * deadlist should be empty since the dataset has no snapshots.
1071	 * (If it's a clone, it's safe to ignore the deadlist contents
1072	 * since they are still referenced by the origin snapshot.)
1073	 */
1074	dsl_deadlist_close(&ds->ds_deadlist);
1075	dsl_deadlist_free(mos, dsl_dataset_phys(ds)->ds_deadlist_obj, tx);
1076	dmu_buf_will_dirty(ds->ds_dbuf, tx);
1077	dsl_dataset_phys(ds)->ds_deadlist_obj = 0;
1078
1079	if (dsl_dataset_remap_deadlist_exists(ds))
1080		dsl_dataset_destroy_remap_deadlist(ds, tx);
1081
1082	/*
1083	 * Each destroy is responsible for both destroying (enqueuing
1084	 * to be destroyed) the blkptrs comprising the dataset as well as
1085	 * those belonging to the zil.
1086	 */
1087	if (dsl_deadlist_is_open(&ds->ds_dir->dd_livelist)) {
1088		dsl_async_clone_destroy(ds, tx);
1089	} else if (spa_feature_is_enabled(dp->dp_spa,
1090	    SPA_FEATURE_ASYNC_DESTROY)) {
1091		dsl_async_dataset_destroy(ds, tx);
1092	} else {
1093		old_synchronous_dataset_destroy(ds, tx);
1094	}
1095
1096	if (ds->ds_prev != NULL) {
1097		if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
1098			VERIFY0(zap_remove_int(mos,
1099			    dsl_dir_phys(ds->ds_prev->ds_dir)->dd_clones,
1100			    ds->ds_object, tx));
1101		}
1102		prevobj = ds->ds_prev->ds_object;
1103		dsl_dataset_rele(ds->ds_prev, ds);
1104		ds->ds_prev = NULL;
1105	}
1106
1107	/*
1108	 * This must be done after the dsl_traverse(), because it will
1109	 * re-open the objset.
1110	 */
1111	if (ds->ds_objset) {
1112		dmu_objset_evict(ds->ds_objset);
1113		ds->ds_objset = NULL;
1114	}
1115
1116	/* Erase the link in the dir */
1117	dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx);
1118	dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj = 0;
1119	ddobj = ds->ds_dir->dd_object;
1120	ASSERT(dsl_dataset_phys(ds)->ds_snapnames_zapobj != 0);
1121	VERIFY0(zap_destroy(mos,
1122	    dsl_dataset_phys(ds)->ds_snapnames_zapobj, tx));
1123
1124	if (ds->ds_bookmarks_obj != 0) {
1125		void *cookie = NULL;
1126		dsl_bookmark_node_t *dbn;
1127
1128		while ((dbn = avl_destroy_nodes(&ds->ds_bookmarks, &cookie)) !=
1129		    NULL) {
1130			if (dbn->dbn_phys.zbm_redaction_obj != 0) {
1131				dnode_t *rl;
1132				VERIFY0(dnode_hold(mos,
1133				    dbn->dbn_phys.zbm_redaction_obj, FTAG,
1134				    &rl));
1135				if (rl->dn_have_spill) {
1136					spa_feature_decr(dmu_objset_spa(mos),
1137					    SPA_FEATURE_REDACTION_LIST_SPILL,
1138					    tx);
1139				}
1140				dnode_rele(rl, FTAG);
1141				VERIFY0(dmu_object_free(mos,
1142				    dbn->dbn_phys.zbm_redaction_obj, tx));
1143				spa_feature_decr(dmu_objset_spa(mos),
1144				    SPA_FEATURE_REDACTION_BOOKMARKS, tx);
1145			}
1146			if (dbn->dbn_phys.zbm_flags & ZBM_FLAG_HAS_FBN) {
1147				spa_feature_decr(dmu_objset_spa(mos),
1148				    SPA_FEATURE_BOOKMARK_WRITTEN, tx);
1149			}
1150			spa_strfree(dbn->dbn_name);
1151			mutex_destroy(&dbn->dbn_lock);
1152			kmem_free(dbn, sizeof (*dbn));
1153		}
1154		avl_destroy(&ds->ds_bookmarks);
1155		VERIFY0(zap_destroy(mos, ds->ds_bookmarks_obj, tx));
1156		spa_feature_decr(dp->dp_spa, SPA_FEATURE_BOOKMARKS, tx);
1157	}
1158
1159	spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx);
1160
1161	ASSERT0(dsl_dataset_phys(ds)->ds_next_clones_obj);
1162	ASSERT0(dsl_dataset_phys(ds)->ds_props_obj);
1163	ASSERT0(dsl_dataset_phys(ds)->ds_userrefs_obj);
1164	dsl_dir_rele(ds->ds_dir, ds);
1165	ds->ds_dir = NULL;
1166	dmu_object_free_zapified(mos, obj, tx);
1167
1168	dsl_dir_destroy_sync(ddobj, tx);
1169
1170	if (rmorigin) {
1171		dsl_dataset_t *prev;
1172		VERIFY0(dsl_dataset_hold_obj(dp, prevobj, FTAG, &prev));
1173		dsl_destroy_snapshot_sync_impl(prev, B_FALSE, tx);
1174		dsl_dataset_rele(prev, FTAG);
1175	}
1176	/* Delete errlog. */
1177	if (spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_HEAD_ERRLOG))
1178		spa_delete_dataset_errlog(dp->dp_spa, ds->ds_object, tx);
1179}
1180
1181void
1182dsl_destroy_head_sync(void *arg, dmu_tx_t *tx)
1183{
1184	dsl_destroy_head_arg_t *ddha = arg;
1185	dsl_pool_t *dp = dmu_tx_pool(tx);
1186	dsl_dataset_t *ds;
1187
1188	VERIFY0(dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds));
1189	dsl_destroy_head_sync_impl(ds, tx);
1190	zvol_remove_minors(dp->dp_spa, ddha->ddha_name, B_TRUE);
1191	dsl_dataset_rele(ds, FTAG);
1192}
1193
1194static void
1195dsl_destroy_head_begin_sync(void *arg, dmu_tx_t *tx)
1196{
1197	dsl_destroy_head_arg_t *ddha = arg;
1198	dsl_pool_t *dp = dmu_tx_pool(tx);
1199	dsl_dataset_t *ds;
1200
1201	VERIFY0(dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds));
1202
1203	/* Mark it as inconsistent on-disk, in case we crash */
1204	dmu_buf_will_dirty(ds->ds_dbuf, tx);
1205	dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_INCONSISTENT;
1206
1207	spa_history_log_internal_ds(ds, "destroy begin", tx, " ");
1208	dsl_dataset_rele(ds, FTAG);
1209}
1210
1211int
1212dsl_destroy_head(const char *name)
1213{
1214	dsl_destroy_head_arg_t ddha;
1215	int error;
1216	spa_t *spa;
1217	boolean_t isenabled;
1218
1219#ifdef _KERNEL
1220	zfs_destroy_unmount_origin(name);
1221#endif
1222
1223	error = spa_open(name, &spa, FTAG);
1224	if (error != 0)
1225		return (error);
1226	isenabled = spa_feature_is_enabled(spa, SPA_FEATURE_ASYNC_DESTROY);
1227	spa_close(spa, FTAG);
1228
1229	ddha.ddha_name = name;
1230
1231	if (!isenabled) {
1232		objset_t *os;
1233
1234		error = dsl_sync_task(name, dsl_destroy_head_check,
1235		    dsl_destroy_head_begin_sync, &ddha,
1236		    0, ZFS_SPACE_CHECK_DESTROY);
1237		if (error != 0)
1238			return (error);
1239
1240		/*
1241		 * Head deletion is processed in one txg on old pools;
1242		 * remove the objects from open context so that the txg sync
1243		 * is not too long. This optimization can only work for
1244		 * encrypted datasets if the wrapping key is loaded.
1245		 */
1246		error = dmu_objset_own(name, DMU_OST_ANY, B_FALSE, B_TRUE,
1247		    FTAG, &os);
1248		if (error == 0) {
1249			uint64_t prev_snap_txg =
1250			    dsl_dataset_phys(dmu_objset_ds(os))->
1251			    ds_prev_snap_txg;
1252			for (uint64_t obj = 0; error == 0;
1253			    error = dmu_object_next(os, &obj, FALSE,
1254			    prev_snap_txg))
1255				(void) dmu_free_long_object(os, obj);
1256			/* sync out all frees */
1257			txg_wait_synced(dmu_objset_pool(os), 0);
1258			dmu_objset_disown(os, B_TRUE, FTAG);
1259		}
1260	}
1261
1262	return (dsl_sync_task(name, dsl_destroy_head_check,
1263	    dsl_destroy_head_sync, &ddha, 0, ZFS_SPACE_CHECK_DESTROY));
1264}
1265
1266/*
1267 * Note, this function is used as the callback for dmu_objset_find().  We
1268 * always return 0 so that we will continue to find and process
1269 * inconsistent datasets, even if we encounter an error trying to
1270 * process one of them.
1271 */
1272int
1273dsl_destroy_inconsistent(const char *dsname, void *arg)
1274{
1275	(void) arg;
1276	objset_t *os;
1277
1278	if (dmu_objset_hold(dsname, FTAG, &os) == 0) {
1279		boolean_t need_destroy = DS_IS_INCONSISTENT(dmu_objset_ds(os));
1280
1281		/*
1282		 * If the dataset is inconsistent because a resumable receive
1283		 * has failed, then do not destroy it.
1284		 */
1285		if (dsl_dataset_has_resume_receive_state(dmu_objset_ds(os)))
1286			need_destroy = B_FALSE;
1287
1288		dmu_objset_rele(os, FTAG);
1289		if (need_destroy)
1290			(void) dsl_destroy_head(dsname);
1291	}
1292	return (0);
1293}
1294
1295
1296#if defined(_KERNEL)
1297EXPORT_SYMBOL(dsl_destroy_head);
1298EXPORT_SYMBOL(dsl_destroy_head_sync_impl);
1299EXPORT_SYMBOL(dsl_dataset_user_hold_check_one);
1300EXPORT_SYMBOL(dsl_destroy_snapshot_sync_impl);
1301EXPORT_SYMBOL(dsl_destroy_inconsistent);
1302EXPORT_SYMBOL(dsl_dataset_user_release_tmp);
1303EXPORT_SYMBOL(dsl_destroy_head_check_impl);
1304#endif
1305