1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2011, 2020 by Delphix. All rights reserved.
25 * Copyright (c) 2012, Joyent, Inc. All rights reserved.
26 * Copyright (c) 2012 Pawel Jakub Dawidek <pawel@dawidek.net>.
27 * All rights reserved
28 * Copyright (c) 2013 Steven Hartland. All rights reserved.
29 * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved.
30 * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
31 * Copyright (c) 2018, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
32 * Copyright (c) 2019 Datto Inc.
33 */
34
35#include <assert.h>
36#include <ctype.h>
37#include <errno.h>
38#include <libintl.h>
39#include <stdio.h>
40#include <stdlib.h>
41#include <strings.h>
42#include <unistd.h>
43#include <stddef.h>
44#include <fcntl.h>
45#include <sys/mount.h>
46#include <sys/mntent.h>
47#include <sys/mnttab.h>
48#include <sys/avl.h>
49#include <sys/debug.h>
50#include <sys/stat.h>
51#include <pthread.h>
52#include <umem.h>
53#include <time.h>
54
55#include <libzfs.h>
56#include <libzfs_core.h>
57#include <libzutil.h>
58
59#include "zfs_namecheck.h"
60#include "zfs_prop.h"
61#include "zfs_fletcher.h"
62#include "libzfs_impl.h"
63#include <cityhash.h>
64#include <zlib.h>
65#include <sys/zio_checksum.h>
66#include <sys/dsl_crypt.h>
67#include <sys/ddt.h>
68#include <sys/socket.h>
69#include <sys/sha2.h>
70
71static int zfs_receive_impl(libzfs_handle_t *, const char *, const char *,
72    recvflags_t *, int, const char *, nvlist_t *, avl_tree_t *, char **,
73    const char *, nvlist_t *);
74static int guid_to_name_redact_snaps(libzfs_handle_t *hdl, const char *parent,
75    uint64_t guid, boolean_t bookmark_ok, uint64_t *redact_snap_guids,
76    uint64_t num_redact_snaps, char *name);
77static int guid_to_name(libzfs_handle_t *, const char *,
78    uint64_t, boolean_t, char *);
79
80typedef struct progress_arg {
81	zfs_handle_t *pa_zhp;
82	int pa_fd;
83	boolean_t pa_parsable;
84	boolean_t pa_estimate;
85	int pa_verbosity;
86} progress_arg_t;
87
88static int
89dump_record(dmu_replay_record_t *drr, void *payload, int payload_len,
90    zio_cksum_t *zc, int outfd)
91{
92	ASSERT3U(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum),
93	    ==, sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t));
94	fletcher_4_incremental_native(drr,
95	    offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum), zc);
96	if (drr->drr_type != DRR_BEGIN) {
97		ASSERT(ZIO_CHECKSUM_IS_ZERO(&drr->drr_u.
98		    drr_checksum.drr_checksum));
99		drr->drr_u.drr_checksum.drr_checksum = *zc;
100	}
101	fletcher_4_incremental_native(&drr->drr_u.drr_checksum.drr_checksum,
102	    sizeof (zio_cksum_t), zc);
103	if (write(outfd, drr, sizeof (*drr)) == -1)
104		return (errno);
105	if (payload_len != 0) {
106		fletcher_4_incremental_native(payload, payload_len, zc);
107		if (write(outfd, payload, payload_len) == -1)
108			return (errno);
109	}
110	return (0);
111}
112
113/*
114 * Routines for dealing with the AVL tree of fs-nvlists
115 */
116typedef struct fsavl_node {
117	avl_node_t fn_node;
118	nvlist_t *fn_nvfs;
119	char *fn_snapname;
120	uint64_t fn_guid;
121} fsavl_node_t;
122
123static int
124fsavl_compare(const void *arg1, const void *arg2)
125{
126	const fsavl_node_t *fn1 = (const fsavl_node_t *)arg1;
127	const fsavl_node_t *fn2 = (const fsavl_node_t *)arg2;
128
129	return (TREE_CMP(fn1->fn_guid, fn2->fn_guid));
130}
131
132/*
133 * Given the GUID of a snapshot, find its containing filesystem and
134 * (optionally) name.
135 */
136static nvlist_t *
137fsavl_find(avl_tree_t *avl, uint64_t snapguid, char **snapname)
138{
139	fsavl_node_t fn_find;
140	fsavl_node_t *fn;
141
142	fn_find.fn_guid = snapguid;
143
144	fn = avl_find(avl, &fn_find, NULL);
145	if (fn) {
146		if (snapname)
147			*snapname = fn->fn_snapname;
148		return (fn->fn_nvfs);
149	}
150	return (NULL);
151}
152
153static void
154fsavl_destroy(avl_tree_t *avl)
155{
156	fsavl_node_t *fn;
157	void *cookie;
158
159	if (avl == NULL)
160		return;
161
162	cookie = NULL;
163	while ((fn = avl_destroy_nodes(avl, &cookie)) != NULL)
164		free(fn);
165	avl_destroy(avl);
166	free(avl);
167}
168
169/*
170 * Given an nvlist, produce an avl tree of snapshots, ordered by guid
171 */
172static avl_tree_t *
173fsavl_create(nvlist_t *fss)
174{
175	avl_tree_t *fsavl;
176	nvpair_t *fselem = NULL;
177
178	if ((fsavl = malloc(sizeof (avl_tree_t))) == NULL)
179		return (NULL);
180
181	avl_create(fsavl, fsavl_compare, sizeof (fsavl_node_t),
182	    offsetof(fsavl_node_t, fn_node));
183
184	while ((fselem = nvlist_next_nvpair(fss, fselem)) != NULL) {
185		nvlist_t *nvfs, *snaps;
186		nvpair_t *snapelem = NULL;
187
188		nvfs = fnvpair_value_nvlist(fselem);
189		snaps = fnvlist_lookup_nvlist(nvfs, "snaps");
190
191		while ((snapelem =
192		    nvlist_next_nvpair(snaps, snapelem)) != NULL) {
193			fsavl_node_t *fn;
194			uint64_t guid;
195
196			guid = fnvpair_value_uint64(snapelem);
197			if ((fn = malloc(sizeof (fsavl_node_t))) == NULL) {
198				fsavl_destroy(fsavl);
199				return (NULL);
200			}
201			fn->fn_nvfs = nvfs;
202			fn->fn_snapname = nvpair_name(snapelem);
203			fn->fn_guid = guid;
204
205			/*
206			 * Note: if there are multiple snaps with the
207			 * same GUID, we ignore all but one.
208			 */
209			if (avl_find(fsavl, fn, NULL) == NULL)
210				avl_add(fsavl, fn);
211			else
212				free(fn);
213		}
214	}
215
216	return (fsavl);
217}
218
219/*
220 * Routines for dealing with the giant nvlist of fs-nvlists, etc.
221 */
222typedef struct send_data {
223	/*
224	 * assigned inside every recursive call,
225	 * restored from *_save on return:
226	 *
227	 * guid of fromsnap snapshot in parent dataset
228	 * txg of fromsnap snapshot in current dataset
229	 * txg of tosnap snapshot in current dataset
230	 */
231
232	uint64_t parent_fromsnap_guid;
233	uint64_t fromsnap_txg;
234	uint64_t tosnap_txg;
235
236	/* the nvlists get accumulated during depth-first traversal */
237	nvlist_t *parent_snaps;
238	nvlist_t *fss;
239	nvlist_t *snapprops;
240	nvlist_t *snapholds;	/* user holds */
241
242	/* send-receive configuration, does not change during traversal */
243	const char *fsname;
244	const char *fromsnap;
245	const char *tosnap;
246	boolean_t recursive;
247	boolean_t raw;
248	boolean_t doall;
249	boolean_t replicate;
250	boolean_t skipmissing;
251	boolean_t verbose;
252	boolean_t backup;
253	boolean_t seenfrom;
254	boolean_t seento;
255	boolean_t holds;	/* were holds requested with send -h */
256	boolean_t props;
257
258	/*
259	 * The header nvlist is of the following format:
260	 * {
261	 *   "tosnap" -> string
262	 *   "fromsnap" -> string (if incremental)
263	 *   "fss" -> {
264	 *	id -> {
265	 *
266	 *	 "name" -> string (full name; for debugging)
267	 *	 "parentfromsnap" -> number (guid of fromsnap in parent)
268	 *
269	 *	 "props" -> { name -> value (only if set here) }
270	 *	 "snaps" -> { name (lastname) -> number (guid) }
271	 *	 "snapprops" -> { name (lastname) -> { name -> value } }
272	 *	 "snapholds" -> { name (lastname) -> { holdname -> crtime } }
273	 *
274	 *	 "origin" -> number (guid) (if clone)
275	 *	 "is_encroot" -> boolean
276	 *	 "sent" -> boolean (not on-disk)
277	 *	}
278	 *   }
279	 * }
280	 *
281	 */
282} send_data_t;
283
284static void
285send_iterate_prop(zfs_handle_t *zhp, boolean_t received_only, nvlist_t *nv);
286
287static int
288send_iterate_snap(zfs_handle_t *zhp, void *arg)
289{
290	send_data_t *sd = arg;
291	uint64_t guid = zhp->zfs_dmustats.dds_guid;
292	uint64_t txg = zhp->zfs_dmustats.dds_creation_txg;
293	char *snapname;
294	nvlist_t *nv;
295	boolean_t isfromsnap, istosnap, istosnapwithnofrom;
296
297	snapname = strrchr(zhp->zfs_name, '@')+1;
298	isfromsnap = (sd->fromsnap != NULL &&
299	    strcmp(sd->fromsnap, snapname) == 0);
300	istosnap = (sd->tosnap != NULL && (strcmp(sd->tosnap, snapname) == 0));
301	istosnapwithnofrom = (istosnap && sd->fromsnap == NULL);
302
303	if (sd->tosnap_txg != 0 && txg > sd->tosnap_txg) {
304		if (sd->verbose) {
305			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
306			    "skipping snapshot %s because it was created "
307			    "after the destination snapshot (%s)\n"),
308			    zhp->zfs_name, sd->tosnap);
309		}
310		zfs_close(zhp);
311		return (0);
312	}
313
314	fnvlist_add_uint64(sd->parent_snaps, snapname, guid);
315	/*
316	 * NB: if there is no fromsnap here (it's a newly created fs in
317	 * an incremental replication), we will substitute the tosnap.
318	 */
319	if (isfromsnap || (sd->parent_fromsnap_guid == 0 && istosnap)) {
320		sd->parent_fromsnap_guid = guid;
321	}
322
323	if (!sd->recursive) {
324
325		/*
326		 * To allow a doall stream to work properly
327		 * with a NULL fromsnap
328		 */
329		if (sd->doall && sd->fromsnap == NULL && !sd->seenfrom) {
330			sd->seenfrom = B_TRUE;
331		}
332
333		if (!sd->seenfrom && isfromsnap) {
334			sd->seenfrom = B_TRUE;
335			zfs_close(zhp);
336			return (0);
337		}
338
339		if ((sd->seento || !sd->seenfrom) && !istosnapwithnofrom) {
340			zfs_close(zhp);
341			return (0);
342		}
343
344		if (istosnap)
345			sd->seento = B_TRUE;
346	}
347
348	nv = fnvlist_alloc();
349	send_iterate_prop(zhp, sd->backup, nv);
350	fnvlist_add_nvlist(sd->snapprops, snapname, nv);
351	fnvlist_free(nv);
352	if (sd->holds) {
353		nvlist_t *holds = fnvlist_alloc();
354		int err = lzc_get_holds(zhp->zfs_name, &holds);
355		if (err == 0) {
356			fnvlist_add_nvlist(sd->snapholds, snapname, holds);
357		}
358		fnvlist_free(holds);
359	}
360
361	zfs_close(zhp);
362	return (0);
363}
364
365static void
366send_iterate_prop(zfs_handle_t *zhp, boolean_t received_only, nvlist_t *nv)
367{
368	nvlist_t *props = NULL;
369	nvpair_t *elem = NULL;
370
371	if (received_only)
372		props = zfs_get_recvd_props(zhp);
373	else
374		props = zhp->zfs_props;
375
376	while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
377		char *propname = nvpair_name(elem);
378		zfs_prop_t prop = zfs_name_to_prop(propname);
379		nvlist_t *propnv;
380
381		if (!zfs_prop_user(propname)) {
382			/*
383			 * Realistically, this should never happen.  However,
384			 * we want the ability to add DSL properties without
385			 * needing to make incompatible version changes.  We
386			 * need to ignore unknown properties to allow older
387			 * software to still send datasets containing these
388			 * properties, with the unknown properties elided.
389			 */
390			if (prop == ZPROP_INVAL)
391				continue;
392
393			if (zfs_prop_readonly(prop))
394				continue;
395		}
396
397		verify(nvpair_value_nvlist(elem, &propnv) == 0);
398		if (prop == ZFS_PROP_QUOTA || prop == ZFS_PROP_RESERVATION ||
399		    prop == ZFS_PROP_REFQUOTA ||
400		    prop == ZFS_PROP_REFRESERVATION) {
401			char *source;
402			uint64_t value;
403			verify(nvlist_lookup_uint64(propnv,
404			    ZPROP_VALUE, &value) == 0);
405			if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT)
406				continue;
407			/*
408			 * May have no source before SPA_VERSION_RECVD_PROPS,
409			 * but is still modifiable.
410			 */
411			if (nvlist_lookup_string(propnv,
412			    ZPROP_SOURCE, &source) == 0) {
413				if ((strcmp(source, zhp->zfs_name) != 0) &&
414				    (strcmp(source,
415				    ZPROP_SOURCE_VAL_RECVD) != 0))
416					continue;
417			}
418		} else {
419			char *source;
420			if (nvlist_lookup_string(propnv,
421			    ZPROP_SOURCE, &source) != 0)
422				continue;
423			if ((strcmp(source, zhp->zfs_name) != 0) &&
424			    (strcmp(source, ZPROP_SOURCE_VAL_RECVD) != 0))
425				continue;
426		}
427
428		if (zfs_prop_user(propname) ||
429		    zfs_prop_get_type(prop) == PROP_TYPE_STRING) {
430			char *value;
431			value = fnvlist_lookup_string(propnv, ZPROP_VALUE);
432			fnvlist_add_string(nv, propname, value);
433		} else {
434			uint64_t value;
435			value = fnvlist_lookup_uint64(propnv, ZPROP_VALUE);
436			fnvlist_add_uint64(nv, propname, value);
437		}
438	}
439}
440
441/*
442 * returns snapshot creation txg
443 * and returns 0 if the snapshot does not exist
444 */
445static uint64_t
446get_snap_txg(libzfs_handle_t *hdl, const char *fs, const char *snap)
447{
448	char name[ZFS_MAX_DATASET_NAME_LEN];
449	uint64_t txg = 0;
450
451	if (fs == NULL || fs[0] == '\0' || snap == NULL || snap[0] == '\0')
452		return (txg);
453
454	(void) snprintf(name, sizeof (name), "%s@%s", fs, snap);
455	if (zfs_dataset_exists(hdl, name, ZFS_TYPE_SNAPSHOT)) {
456		zfs_handle_t *zhp = zfs_open(hdl, name, ZFS_TYPE_SNAPSHOT);
457		if (zhp != NULL) {
458			txg = zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG);
459			zfs_close(zhp);
460		}
461	}
462
463	return (txg);
464}
465
466/*
467 * recursively generate nvlists describing datasets.  See comment
468 * for the data structure send_data_t above for description of contents
469 * of the nvlist.
470 */
471static int
472send_iterate_fs(zfs_handle_t *zhp, void *arg)
473{
474	send_data_t *sd = arg;
475	nvlist_t *nvfs = NULL, *nv = NULL;
476	int rv = 0;
477	uint64_t min_txg = 0, max_txg = 0;
478	uint64_t parent_fromsnap_guid_save = sd->parent_fromsnap_guid;
479	uint64_t fromsnap_txg_save = sd->fromsnap_txg;
480	uint64_t tosnap_txg_save = sd->tosnap_txg;
481	uint64_t txg = zhp->zfs_dmustats.dds_creation_txg;
482	uint64_t guid = zhp->zfs_dmustats.dds_guid;
483	uint64_t fromsnap_txg, tosnap_txg;
484	char guidstring[64];
485
486	fromsnap_txg = get_snap_txg(zhp->zfs_hdl, zhp->zfs_name, sd->fromsnap);
487	if (fromsnap_txg != 0)
488		sd->fromsnap_txg = fromsnap_txg;
489
490	tosnap_txg = get_snap_txg(zhp->zfs_hdl, zhp->zfs_name, sd->tosnap);
491	if (tosnap_txg != 0)
492		sd->tosnap_txg = tosnap_txg;
493
494	/*
495	 * on the send side, if the current dataset does not have tosnap,
496	 * perform two additional checks:
497	 *
498	 * - skip sending the current dataset if it was created later than
499	 *   the parent tosnap
500	 * - return error if the current dataset was created earlier than
501	 *   the parent tosnap, unless --skip-missing specified. Then
502	 *   just print a warning
503	 */
504	if (sd->tosnap != NULL && tosnap_txg == 0) {
505		if (sd->tosnap_txg != 0 && txg > sd->tosnap_txg) {
506			if (sd->verbose) {
507				(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
508				    "skipping dataset %s: snapshot %s does "
509				    "not exist\n"), zhp->zfs_name, sd->tosnap);
510			}
511		} else if (sd->skipmissing) {
512			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
513			    "WARNING: skipping dataset %s and its children:"
514			    " snapshot %s does not exist\n"),
515			    zhp->zfs_name, sd->tosnap);
516		} else {
517			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
518			    "cannot send %s@%s%s: snapshot %s@%s does not "
519			    "exist\n"), sd->fsname, sd->tosnap, sd->recursive ?
520			    dgettext(TEXT_DOMAIN, " recursively") : "",
521			    zhp->zfs_name, sd->tosnap);
522			rv = EZFS_NOENT;
523		}
524		goto out;
525	}
526
527	nvfs = fnvlist_alloc();
528	fnvlist_add_string(nvfs, "name", zhp->zfs_name);
529	fnvlist_add_uint64(nvfs, "parentfromsnap",
530	    sd->parent_fromsnap_guid);
531
532	if (zhp->zfs_dmustats.dds_origin[0]) {
533		zfs_handle_t *origin = zfs_open(zhp->zfs_hdl,
534		    zhp->zfs_dmustats.dds_origin, ZFS_TYPE_SNAPSHOT);
535		if (origin == NULL) {
536			rv = -1;
537			goto out;
538		}
539		fnvlist_add_uint64(nvfs, "origin",
540		    origin->zfs_dmustats.dds_guid);
541
542		zfs_close(origin);
543	}
544
545	/* iterate over props */
546	if (sd->props || sd->backup || sd->recursive) {
547		nv = fnvlist_alloc();
548		send_iterate_prop(zhp, sd->backup, nv);
549	}
550	if (zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) != ZIO_CRYPT_OFF) {
551		boolean_t encroot;
552
553		/* determine if this dataset is an encryption root */
554		if (zfs_crypto_get_encryption_root(zhp, &encroot, NULL) != 0) {
555			rv = -1;
556			goto out;
557		}
558
559		if (encroot)
560			fnvlist_add_boolean(nvfs, "is_encroot");
561
562		/*
563		 * Encrypted datasets can only be sent with properties if
564		 * the raw flag is specified because the receive side doesn't
565		 * currently have a mechanism for recursively asking the user
566		 * for new encryption parameters.
567		 */
568		if (!sd->raw) {
569			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
570			    "cannot send %s@%s: encrypted dataset %s may not "
571			    "be sent with properties without the raw flag\n"),
572			    sd->fsname, sd->tosnap, zhp->zfs_name);
573			rv = -1;
574			goto out;
575		}
576
577	}
578
579	if (nv != NULL)
580		fnvlist_add_nvlist(nvfs, "props", nv);
581
582	/* iterate over snaps, and set sd->parent_fromsnap_guid */
583	sd->parent_fromsnap_guid = 0;
584	sd->parent_snaps = fnvlist_alloc();
585	sd->snapprops = fnvlist_alloc();
586	if (sd->holds)
587		sd->snapholds = fnvlist_alloc();
588
589	/*
590	 * If this is a "doall" send, a replicate send or we're just trying
591	 * to gather a list of previous snapshots, iterate through all the
592	 * snaps in the txg range. Otherwise just look at the one we're
593	 * interested in.
594	 */
595	if (sd->doall || sd->replicate || sd->tosnap == NULL) {
596		if (!sd->replicate && fromsnap_txg != 0)
597			min_txg = fromsnap_txg;
598		if (!sd->replicate && tosnap_txg != 0)
599			max_txg = tosnap_txg;
600		(void) zfs_iter_snapshots_sorted(zhp, send_iterate_snap, sd,
601		    min_txg, max_txg);
602	} else {
603		char snapname[MAXPATHLEN] = { 0 };
604		zfs_handle_t *snap;
605
606		(void) snprintf(snapname, sizeof (snapname), "%s@%s",
607		    zhp->zfs_name, sd->tosnap);
608		if (sd->fromsnap != NULL)
609			sd->seenfrom = B_TRUE;
610		snap = zfs_open(zhp->zfs_hdl, snapname,
611		    ZFS_TYPE_SNAPSHOT);
612		if (snap != NULL)
613			(void) send_iterate_snap(snap, sd);
614	}
615
616	fnvlist_add_nvlist(nvfs, "snaps", sd->parent_snaps);
617	fnvlist_add_nvlist(nvfs, "snapprops", sd->snapprops);
618	if (sd->holds)
619		fnvlist_add_nvlist(nvfs, "snapholds", sd->snapholds);
620	fnvlist_free(sd->parent_snaps);
621	fnvlist_free(sd->snapprops);
622	fnvlist_free(sd->snapholds);
623
624	/* Do not allow the size of the properties list to exceed the limit */
625	if ((fnvlist_size(nvfs) + fnvlist_size(sd->fss)) >
626	    zhp->zfs_hdl->libzfs_max_nvlist) {
627		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
628		    "warning: cannot send %s@%s: the size of the list of "
629		    "snapshots and properties is too large to be received "
630		    "successfully.\n"
631		    "Select a smaller number of snapshots to send.\n"),
632		    zhp->zfs_name, sd->tosnap);
633		rv = EZFS_NOSPC;
634		goto out;
635	}
636	/* add this fs to nvlist */
637	(void) snprintf(guidstring, sizeof (guidstring),
638	    "0x%llx", (longlong_t)guid);
639	fnvlist_add_nvlist(sd->fss, guidstring, nvfs);
640
641	/* iterate over children */
642	if (sd->recursive)
643		rv = zfs_iter_filesystems(zhp, send_iterate_fs, sd);
644
645out:
646	sd->parent_fromsnap_guid = parent_fromsnap_guid_save;
647	sd->fromsnap_txg = fromsnap_txg_save;
648	sd->tosnap_txg = tosnap_txg_save;
649	fnvlist_free(nv);
650	fnvlist_free(nvfs);
651
652	zfs_close(zhp);
653	return (rv);
654}
655
656static int
657gather_nvlist(libzfs_handle_t *hdl, const char *fsname, const char *fromsnap,
658    const char *tosnap, boolean_t recursive, boolean_t raw, boolean_t doall,
659    boolean_t replicate, boolean_t skipmissing, boolean_t verbose,
660    boolean_t backup, boolean_t holds, boolean_t props, nvlist_t **nvlp,
661    avl_tree_t **avlp)
662{
663	zfs_handle_t *zhp;
664	send_data_t sd = { 0 };
665	int error;
666
667	zhp = zfs_open(hdl, fsname, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
668	if (zhp == NULL)
669		return (EZFS_BADTYPE);
670
671	sd.fss = fnvlist_alloc();
672	sd.fsname = fsname;
673	sd.fromsnap = fromsnap;
674	sd.tosnap = tosnap;
675	sd.recursive = recursive;
676	sd.raw = raw;
677	sd.doall = doall;
678	sd.replicate = replicate;
679	sd.skipmissing = skipmissing;
680	sd.verbose = verbose;
681	sd.backup = backup;
682	sd.holds = holds;
683	sd.props = props;
684
685	if ((error = send_iterate_fs(zhp, &sd)) != 0) {
686		fnvlist_free(sd.fss);
687		if (avlp != NULL)
688			*avlp = NULL;
689		*nvlp = NULL;
690		return (error);
691	}
692
693	if (avlp != NULL && (*avlp = fsavl_create(sd.fss)) == NULL) {
694		fnvlist_free(sd.fss);
695		*nvlp = NULL;
696		return (EZFS_NOMEM);
697	}
698
699	*nvlp = sd.fss;
700	return (0);
701}
702
703/*
704 * Routines specific to "zfs send"
705 */
706typedef struct send_dump_data {
707	/* these are all just the short snapname (the part after the @) */
708	const char *fromsnap;
709	const char *tosnap;
710	char prevsnap[ZFS_MAX_DATASET_NAME_LEN];
711	uint64_t prevsnap_obj;
712	boolean_t seenfrom, seento, replicate, doall, fromorigin;
713	boolean_t dryrun, parsable, progress, embed_data, std_out;
714	boolean_t large_block, compress, raw, holds;
715	int outfd;
716	boolean_t err;
717	nvlist_t *fss;
718	nvlist_t *snapholds;
719	avl_tree_t *fsavl;
720	snapfilter_cb_t *filter_cb;
721	void *filter_cb_arg;
722	nvlist_t *debugnv;
723	char holdtag[ZFS_MAX_DATASET_NAME_LEN];
724	int cleanup_fd;
725	int verbosity;
726	uint64_t size;
727} send_dump_data_t;
728
729static int
730zfs_send_space(zfs_handle_t *zhp, const char *snapname, const char *from,
731    enum lzc_send_flags flags, uint64_t *spacep)
732{
733	libzfs_handle_t *hdl = zhp->zfs_hdl;
734	int error;
735
736	assert(snapname != NULL);
737	error = lzc_send_space(snapname, from, flags, spacep);
738
739	if (error != 0) {
740		char errbuf[1024];
741		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
742		    "warning: cannot estimate space for '%s'"), snapname);
743
744		switch (error) {
745		case EXDEV:
746			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
747			    "not an earlier snapshot from the same fs"));
748			return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
749
750		case ENOENT:
751			if (zfs_dataset_exists(hdl, snapname,
752			    ZFS_TYPE_SNAPSHOT)) {
753				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
754				    "incremental source (%s) does not exist"),
755				    snapname);
756			}
757			return (zfs_error(hdl, EZFS_NOENT, errbuf));
758
759		case EDQUOT:
760		case EFBIG:
761		case EIO:
762		case ENOLINK:
763		case ENOSPC:
764		case ENOSTR:
765		case ENXIO:
766		case EPIPE:
767		case ERANGE:
768		case EFAULT:
769		case EROFS:
770		case EINVAL:
771			zfs_error_aux(hdl, "%s", strerror(error));
772			return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
773
774		default:
775			return (zfs_standard_error(hdl, error, errbuf));
776		}
777	}
778
779	return (0);
780}
781
782/*
783 * Dumps a backup of the given snapshot (incremental from fromsnap if it's not
784 * NULL) to the file descriptor specified by outfd.
785 */
786static int
787dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, uint64_t fromsnap_obj,
788    boolean_t fromorigin, int outfd, enum lzc_send_flags flags,
789    nvlist_t *debugnv)
790{
791	zfs_cmd_t zc = {"\0"};
792	libzfs_handle_t *hdl = zhp->zfs_hdl;
793	nvlist_t *thisdbg;
794
795	assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
796	assert(fromsnap_obj == 0 || !fromorigin);
797
798	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
799	zc.zc_cookie = outfd;
800	zc.zc_obj = fromorigin;
801	zc.zc_sendobj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
802	zc.zc_fromobj = fromsnap_obj;
803	zc.zc_flags = flags;
804
805	thisdbg = fnvlist_alloc();
806	if (fromsnap && fromsnap[0] != '\0') {
807		fnvlist_add_string(thisdbg, "fromsnap", fromsnap);
808	}
809
810	if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SEND, &zc) != 0) {
811		char errbuf[1024];
812		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
813		    "warning: cannot send '%s'"), zhp->zfs_name);
814
815		fnvlist_add_uint64(thisdbg, "error", errno);
816		if (debugnv) {
817			fnvlist_add_nvlist(debugnv, zhp->zfs_name, thisdbg);
818		}
819		fnvlist_free(thisdbg);
820
821		switch (errno) {
822		case EXDEV:
823			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
824			    "not an earlier snapshot from the same fs"));
825			return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
826
827		case EACCES:
828			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
829			    "source key must be loaded"));
830			return (zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf));
831
832		case ENOENT:
833			if (zfs_dataset_exists(hdl, zc.zc_name,
834			    ZFS_TYPE_SNAPSHOT)) {
835				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
836				    "incremental source (@%s) does not exist"),
837				    zc.zc_value);
838			}
839			return (zfs_error(hdl, EZFS_NOENT, errbuf));
840
841		case EDQUOT:
842		case EFBIG:
843		case EIO:
844		case ENOLINK:
845		case ENOSPC:
846		case ENOSTR:
847		case ENXIO:
848		case EPIPE:
849		case ERANGE:
850		case EFAULT:
851		case EROFS:
852		case EINVAL:
853			zfs_error_aux(hdl, "%s", strerror(errno));
854			return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
855
856		default:
857			return (zfs_standard_error(hdl, errno, errbuf));
858		}
859	}
860
861	if (debugnv)
862		fnvlist_add_nvlist(debugnv, zhp->zfs_name, thisdbg);
863	fnvlist_free(thisdbg);
864
865	return (0);
866}
867
868static void
869gather_holds(zfs_handle_t *zhp, send_dump_data_t *sdd)
870{
871	assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
872
873	/*
874	 * zfs_send() only sets snapholds for sends that need them,
875	 * e.g. replication and doall.
876	 */
877	if (sdd->snapholds == NULL)
878		return;
879
880	fnvlist_add_string(sdd->snapholds, zhp->zfs_name, sdd->holdtag);
881}
882
883int
884zfs_send_progress(zfs_handle_t *zhp, int fd, uint64_t *bytes_written,
885    uint64_t *blocks_visited)
886{
887	zfs_cmd_t zc = {"\0"};
888
889	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
890	zc.zc_cookie = fd;
891	if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SEND_PROGRESS, &zc) != 0)
892		return (errno);
893	if (bytes_written != NULL)
894		*bytes_written = zc.zc_cookie;
895	if (blocks_visited != NULL)
896		*blocks_visited = zc.zc_objset_type;
897	return (0);
898}
899
900static void *
901send_progress_thread(void *arg)
902{
903	progress_arg_t *pa = arg;
904	zfs_handle_t *zhp = pa->pa_zhp;
905	uint64_t bytes;
906	uint64_t blocks;
907	char buf[16];
908	time_t t;
909	struct tm *tm;
910	boolean_t firstloop = B_TRUE;
911
912	/*
913	 * Print the progress from ZFS_IOC_SEND_PROGRESS every second.
914	 */
915	for (;;) {
916		int err;
917		(void) sleep(1);
918		if ((err = zfs_send_progress(zhp, pa->pa_fd, &bytes,
919		    &blocks)) != 0) {
920			if (err == EINTR || err == ENOENT)
921				return ((void *)0);
922			return ((void *)(uintptr_t)err);
923		}
924
925		if (firstloop && !pa->pa_parsable) {
926			(void) fprintf(stderr,
927			    "TIME       %s   %sSNAPSHOT %s\n",
928			    pa->pa_estimate ? "BYTES" : " SENT",
929			    pa->pa_verbosity >= 2 ? "   BLOCKS    " : "",
930			    zhp->zfs_name);
931			firstloop = B_FALSE;
932		}
933
934		(void) time(&t);
935		tm = localtime(&t);
936
937		if (pa->pa_verbosity >= 2 && pa->pa_parsable) {
938			(void) fprintf(stderr,
939			    "%02d:%02d:%02d\t%llu\t%llu\t%s\n",
940			    tm->tm_hour, tm->tm_min, tm->tm_sec,
941			    (u_longlong_t)bytes, (u_longlong_t)blocks,
942			    zhp->zfs_name);
943		} else if (pa->pa_verbosity >= 2) {
944			zfs_nicenum(bytes, buf, sizeof (buf));
945			(void) fprintf(stderr,
946			    "%02d:%02d:%02d   %5s    %8llu    %s\n",
947			    tm->tm_hour, tm->tm_min, tm->tm_sec,
948			    buf, (u_longlong_t)blocks, zhp->zfs_name);
949		} else if (pa->pa_parsable) {
950			(void) fprintf(stderr, "%02d:%02d:%02d\t%llu\t%s\n",
951			    tm->tm_hour, tm->tm_min, tm->tm_sec,
952			    (u_longlong_t)bytes, zhp->zfs_name);
953		} else {
954			zfs_nicebytes(bytes, buf, sizeof (buf));
955			(void) fprintf(stderr, "%02d:%02d:%02d   %5s   %s\n",
956			    tm->tm_hour, tm->tm_min, tm->tm_sec,
957			    buf, zhp->zfs_name);
958		}
959	}
960}
961
962static void
963send_print_verbose(FILE *fout, const char *tosnap, const char *fromsnap,
964    uint64_t size, boolean_t parsable)
965{
966	if (parsable) {
967		if (fromsnap != NULL) {
968			(void) fprintf(fout, "incremental\t%s\t%s",
969			    fromsnap, tosnap);
970		} else {
971			(void) fprintf(fout, "full\t%s",
972			    tosnap);
973		}
974	} else {
975		if (fromsnap != NULL) {
976			if (strchr(fromsnap, '@') == NULL &&
977			    strchr(fromsnap, '#') == NULL) {
978				(void) fprintf(fout, dgettext(TEXT_DOMAIN,
979				    "send from @%s to %s"),
980				    fromsnap, tosnap);
981			} else {
982				(void) fprintf(fout, dgettext(TEXT_DOMAIN,
983				    "send from %s to %s"),
984				    fromsnap, tosnap);
985			}
986		} else {
987			(void) fprintf(fout, dgettext(TEXT_DOMAIN,
988			    "full send of %s"),
989			    tosnap);
990		}
991	}
992
993	if (parsable) {
994		(void) fprintf(fout, "\t%llu",
995		    (longlong_t)size);
996	} else if (size != 0) {
997		char buf[16];
998		zfs_nicebytes(size, buf, sizeof (buf));
999		(void) fprintf(fout, dgettext(TEXT_DOMAIN,
1000		    " estimated size is %s"), buf);
1001	}
1002	(void) fprintf(fout, "\n");
1003}
1004
1005static int
1006dump_snapshot(zfs_handle_t *zhp, void *arg)
1007{
1008	send_dump_data_t *sdd = arg;
1009	progress_arg_t pa = { 0 };
1010	pthread_t tid;
1011	char *thissnap;
1012	enum lzc_send_flags flags = 0;
1013	int err;
1014	boolean_t isfromsnap, istosnap, fromorigin;
1015	boolean_t exclude = B_FALSE;
1016	FILE *fout = sdd->std_out ? stdout : stderr;
1017
1018	err = 0;
1019	thissnap = strchr(zhp->zfs_name, '@') + 1;
1020	isfromsnap = (sdd->fromsnap != NULL &&
1021	    strcmp(sdd->fromsnap, thissnap) == 0);
1022
1023	if (!sdd->seenfrom && isfromsnap) {
1024		gather_holds(zhp, sdd);
1025		sdd->seenfrom = B_TRUE;
1026		(void) strlcpy(sdd->prevsnap, thissnap,
1027		    sizeof (sdd->prevsnap));
1028		sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
1029		zfs_close(zhp);
1030		return (0);
1031	}
1032
1033	if (sdd->seento || !sdd->seenfrom) {
1034		zfs_close(zhp);
1035		return (0);
1036	}
1037
1038	istosnap = (strcmp(sdd->tosnap, thissnap) == 0);
1039	if (istosnap)
1040		sdd->seento = B_TRUE;
1041
1042	if (sdd->large_block)
1043		flags |= LZC_SEND_FLAG_LARGE_BLOCK;
1044	if (sdd->embed_data)
1045		flags |= LZC_SEND_FLAG_EMBED_DATA;
1046	if (sdd->compress)
1047		flags |= LZC_SEND_FLAG_COMPRESS;
1048	if (sdd->raw)
1049		flags |= LZC_SEND_FLAG_RAW;
1050
1051	if (!sdd->doall && !isfromsnap && !istosnap) {
1052		if (sdd->replicate) {
1053			char *snapname;
1054			nvlist_t *snapprops;
1055			/*
1056			 * Filter out all intermediate snapshots except origin
1057			 * snapshots needed to replicate clones.
1058			 */
1059			nvlist_t *nvfs = fsavl_find(sdd->fsavl,
1060			    zhp->zfs_dmustats.dds_guid, &snapname);
1061
1062			snapprops = fnvlist_lookup_nvlist(nvfs, "snapprops");
1063			snapprops = fnvlist_lookup_nvlist(snapprops, thissnap);
1064			exclude = !nvlist_exists(snapprops, "is_clone_origin");
1065		} else {
1066			exclude = B_TRUE;
1067		}
1068	}
1069
1070	/*
1071	 * If a filter function exists, call it to determine whether
1072	 * this snapshot will be sent.
1073	 */
1074	if (exclude || (sdd->filter_cb != NULL &&
1075	    sdd->filter_cb(zhp, sdd->filter_cb_arg) == B_FALSE)) {
1076		/*
1077		 * This snapshot is filtered out.  Don't send it, and don't
1078		 * set prevsnap_obj, so it will be as if this snapshot didn't
1079		 * exist, and the next accepted snapshot will be sent as
1080		 * an incremental from the last accepted one, or as the
1081		 * first (and full) snapshot in the case of a replication,
1082		 * non-incremental send.
1083		 */
1084		zfs_close(zhp);
1085		return (0);
1086	}
1087
1088	gather_holds(zhp, sdd);
1089	fromorigin = sdd->prevsnap[0] == '\0' &&
1090	    (sdd->fromorigin || sdd->replicate);
1091
1092	if (sdd->verbosity != 0) {
1093		uint64_t size = 0;
1094		char fromds[ZFS_MAX_DATASET_NAME_LEN];
1095
1096		if (sdd->prevsnap[0] != '\0') {
1097			(void) strlcpy(fromds, zhp->zfs_name, sizeof (fromds));
1098			*(strchr(fromds, '@') + 1) = '\0';
1099			(void) strlcat(fromds, sdd->prevsnap, sizeof (fromds));
1100		}
1101		if (zfs_send_space(zhp, zhp->zfs_name,
1102		    sdd->prevsnap[0] ? fromds : NULL, flags, &size) != 0) {
1103			size = 0; /* cannot estimate send space */
1104		} else {
1105			send_print_verbose(fout, zhp->zfs_name,
1106			    sdd->prevsnap[0] ? sdd->prevsnap : NULL,
1107			    size, sdd->parsable);
1108		}
1109		sdd->size += size;
1110	}
1111
1112	if (!sdd->dryrun) {
1113		/*
1114		 * If progress reporting is requested, spawn a new thread to
1115		 * poll ZFS_IOC_SEND_PROGRESS at a regular interval.
1116		 */
1117		if (sdd->progress) {
1118			pa.pa_zhp = zhp;
1119			pa.pa_fd = sdd->outfd;
1120			pa.pa_parsable = sdd->parsable;
1121			pa.pa_estimate = B_FALSE;
1122			pa.pa_verbosity = sdd->verbosity;
1123
1124			if ((err = pthread_create(&tid, NULL,
1125			    send_progress_thread, &pa)) != 0) {
1126				zfs_close(zhp);
1127				return (err);
1128			}
1129		}
1130
1131		err = dump_ioctl(zhp, sdd->prevsnap, sdd->prevsnap_obj,
1132		    fromorigin, sdd->outfd, flags, sdd->debugnv);
1133
1134		if (sdd->progress) {
1135			void *status = NULL;
1136			(void) pthread_cancel(tid);
1137			(void) pthread_join(tid, &status);
1138			int error = (int)(uintptr_t)status;
1139			if (error != 0 && status != PTHREAD_CANCELED) {
1140				char errbuf[1024];
1141				(void) snprintf(errbuf, sizeof (errbuf),
1142				    dgettext(TEXT_DOMAIN,
1143				    "progress thread exited nonzero"));
1144				return (zfs_standard_error(zhp->zfs_hdl, error,
1145				    errbuf));
1146			}
1147		}
1148	}
1149
1150	(void) strcpy(sdd->prevsnap, thissnap);
1151	sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
1152	zfs_close(zhp);
1153	return (err);
1154}
1155
1156static int
1157dump_filesystem(zfs_handle_t *zhp, void *arg)
1158{
1159	int rv = 0;
1160	send_dump_data_t *sdd = arg;
1161	boolean_t missingfrom = B_FALSE;
1162	zfs_cmd_t zc = {"\0"};
1163	uint64_t min_txg = 0, max_txg = 0;
1164
1165	(void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
1166	    zhp->zfs_name, sdd->tosnap);
1167	if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_OBJSET_STATS, &zc) != 0) {
1168		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1169		    "WARNING: could not send %s@%s: does not exist\n"),
1170		    zhp->zfs_name, sdd->tosnap);
1171		sdd->err = B_TRUE;
1172		return (0);
1173	}
1174
1175	if (sdd->replicate && sdd->fromsnap) {
1176		/*
1177		 * If this fs does not have fromsnap, and we're doing
1178		 * recursive, we need to send a full stream from the
1179		 * beginning (or an incremental from the origin if this
1180		 * is a clone).  If we're doing non-recursive, then let
1181		 * them get the error.
1182		 */
1183		(void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
1184		    zhp->zfs_name, sdd->fromsnap);
1185		if (zfs_ioctl(zhp->zfs_hdl,
1186		    ZFS_IOC_OBJSET_STATS, &zc) != 0) {
1187			missingfrom = B_TRUE;
1188		}
1189	}
1190
1191	sdd->seenfrom = sdd->seento = sdd->prevsnap[0] = 0;
1192	sdd->prevsnap_obj = 0;
1193	if (sdd->fromsnap == NULL || missingfrom)
1194		sdd->seenfrom = B_TRUE;
1195
1196
1197
1198	/*
1199	 * Iterate through all snapshots and process the ones we will be
1200	 * sending. If we only have a "from" and "to" snapshot to deal
1201	 * with, we can avoid iterating through all the other snapshots.
1202	 */
1203	if (sdd->doall || sdd->replicate || sdd->tosnap == NULL) {
1204		if (!sdd->replicate && sdd->fromsnap != NULL)
1205			min_txg = get_snap_txg(zhp->zfs_hdl, zhp->zfs_name,
1206			    sdd->fromsnap);
1207		if (!sdd->replicate && sdd->tosnap != NULL)
1208			max_txg = get_snap_txg(zhp->zfs_hdl, zhp->zfs_name,
1209			    sdd->tosnap);
1210		rv = zfs_iter_snapshots_sorted(zhp, dump_snapshot, arg,
1211		    min_txg, max_txg);
1212	} else {
1213		char snapname[MAXPATHLEN] = { 0 };
1214		zfs_handle_t *snap;
1215
1216		if (!sdd->seenfrom) {
1217			(void) snprintf(snapname, sizeof (snapname),
1218			    "%s@%s", zhp->zfs_name, sdd->fromsnap);
1219			snap = zfs_open(zhp->zfs_hdl, snapname,
1220			    ZFS_TYPE_SNAPSHOT);
1221			if (snap != NULL)
1222				rv = dump_snapshot(snap, sdd);
1223			else
1224				rv = -1;
1225		}
1226
1227		if (rv == 0) {
1228			(void) snprintf(snapname, sizeof (snapname),
1229			    "%s@%s", zhp->zfs_name, sdd->tosnap);
1230			snap = zfs_open(zhp->zfs_hdl, snapname,
1231			    ZFS_TYPE_SNAPSHOT);
1232			if (snap != NULL)
1233				rv = dump_snapshot(snap, sdd);
1234			else
1235				rv = -1;
1236		}
1237	}
1238
1239	if (!sdd->seenfrom) {
1240		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1241		    "WARNING: could not send %s@%s:\n"
1242		    "incremental source (%s@%s) does not exist\n"),
1243		    zhp->zfs_name, sdd->tosnap,
1244		    zhp->zfs_name, sdd->fromsnap);
1245		sdd->err = B_TRUE;
1246	} else if (!sdd->seento) {
1247		if (sdd->fromsnap) {
1248			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1249			    "WARNING: could not send %s@%s:\n"
1250			    "incremental source (%s@%s) "
1251			    "is not earlier than it\n"),
1252			    zhp->zfs_name, sdd->tosnap,
1253			    zhp->zfs_name, sdd->fromsnap);
1254		} else {
1255			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1256			    "WARNING: "
1257			    "could not send %s@%s: does not exist\n"),
1258			    zhp->zfs_name, sdd->tosnap);
1259		}
1260		sdd->err = B_TRUE;
1261	}
1262
1263	return (rv);
1264}
1265
1266static int
1267dump_filesystems(zfs_handle_t *rzhp, void *arg)
1268{
1269	send_dump_data_t *sdd = arg;
1270	nvpair_t *fspair;
1271	boolean_t needagain, progress;
1272
1273	if (!sdd->replicate)
1274		return (dump_filesystem(rzhp, sdd));
1275
1276	/* Mark the clone origin snapshots. */
1277	for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
1278	    fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
1279		nvlist_t *nvfs;
1280		uint64_t origin_guid = 0;
1281
1282		nvfs = fnvpair_value_nvlist(fspair);
1283		(void) nvlist_lookup_uint64(nvfs, "origin", &origin_guid);
1284		if (origin_guid != 0) {
1285			char *snapname;
1286			nvlist_t *origin_nv = fsavl_find(sdd->fsavl,
1287			    origin_guid, &snapname);
1288			if (origin_nv != NULL) {
1289				nvlist_t *snapprops;
1290				snapprops = fnvlist_lookup_nvlist(origin_nv,
1291				    "snapprops");
1292				snapprops = fnvlist_lookup_nvlist(snapprops,
1293				    snapname);
1294				fnvlist_add_boolean(snapprops,
1295				    "is_clone_origin");
1296			}
1297		}
1298	}
1299again:
1300	needagain = progress = B_FALSE;
1301	for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
1302	    fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
1303		nvlist_t *fslist, *parent_nv;
1304		char *fsname;
1305		zfs_handle_t *zhp;
1306		int err;
1307		uint64_t origin_guid = 0;
1308		uint64_t parent_guid = 0;
1309
1310		fslist = fnvpair_value_nvlist(fspair);
1311		if (nvlist_lookup_boolean(fslist, "sent") == 0)
1312			continue;
1313
1314		fsname = fnvlist_lookup_string(fslist, "name");
1315		(void) nvlist_lookup_uint64(fslist, "origin", &origin_guid);
1316		(void) nvlist_lookup_uint64(fslist, "parentfromsnap",
1317		    &parent_guid);
1318
1319		if (parent_guid != 0) {
1320			parent_nv = fsavl_find(sdd->fsavl, parent_guid, NULL);
1321			if (!nvlist_exists(parent_nv, "sent")) {
1322				/* parent has not been sent; skip this one */
1323				needagain = B_TRUE;
1324				continue;
1325			}
1326		}
1327
1328		if (origin_guid != 0) {
1329			nvlist_t *origin_nv = fsavl_find(sdd->fsavl,
1330			    origin_guid, NULL);
1331			if (origin_nv != NULL &&
1332			    !nvlist_exists(origin_nv, "sent")) {
1333				/*
1334				 * origin has not been sent yet;
1335				 * skip this clone.
1336				 */
1337				needagain = B_TRUE;
1338				continue;
1339			}
1340		}
1341
1342		zhp = zfs_open(rzhp->zfs_hdl, fsname, ZFS_TYPE_DATASET);
1343		if (zhp == NULL)
1344			return (-1);
1345		err = dump_filesystem(zhp, sdd);
1346		fnvlist_add_boolean(fslist, "sent");
1347		progress = B_TRUE;
1348		zfs_close(zhp);
1349		if (err)
1350			return (err);
1351	}
1352	if (needagain) {
1353		assert(progress);
1354		goto again;
1355	}
1356
1357	/* clean out the sent flags in case we reuse this fss */
1358	for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
1359	    fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
1360		nvlist_t *fslist;
1361
1362		fslist = fnvpair_value_nvlist(fspair);
1363		(void) nvlist_remove_all(fslist, "sent");
1364	}
1365
1366	return (0);
1367}
1368
1369nvlist_t *
1370zfs_send_resume_token_to_nvlist(libzfs_handle_t *hdl, const char *token)
1371{
1372	unsigned int version;
1373	int nread, i;
1374	unsigned long long checksum, packed_len;
1375
1376	/*
1377	 * Decode token header, which is:
1378	 *   <token version>-<checksum of payload>-<uncompressed payload length>
1379	 * Note that the only supported token version is 1.
1380	 */
1381	nread = sscanf(token, "%u-%llx-%llx-",
1382	    &version, &checksum, &packed_len);
1383	if (nread != 3) {
1384		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1385		    "resume token is corrupt (invalid format)"));
1386		return (NULL);
1387	}
1388
1389	if (version != ZFS_SEND_RESUME_TOKEN_VERSION) {
1390		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1391		    "resume token is corrupt (invalid version %u)"),
1392		    version);
1393		return (NULL);
1394	}
1395
1396	/* convert hexadecimal representation to binary */
1397	token = strrchr(token, '-') + 1;
1398	int len = strlen(token) / 2;
1399	unsigned char *compressed = zfs_alloc(hdl, len);
1400	for (i = 0; i < len; i++) {
1401		nread = sscanf(token + i * 2, "%2hhx", compressed + i);
1402		if (nread != 1) {
1403			free(compressed);
1404			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1405			    "resume token is corrupt "
1406			    "(payload is not hex-encoded)"));
1407			return (NULL);
1408		}
1409	}
1410
1411	/* verify checksum */
1412	zio_cksum_t cksum;
1413	fletcher_4_native_varsize(compressed, len, &cksum);
1414	if (cksum.zc_word[0] != checksum) {
1415		free(compressed);
1416		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1417		    "resume token is corrupt (incorrect checksum)"));
1418		return (NULL);
1419	}
1420
1421	/* uncompress */
1422	void *packed = zfs_alloc(hdl, packed_len);
1423	uLongf packed_len_long = packed_len;
1424	if (uncompress(packed, &packed_len_long, compressed, len) != Z_OK ||
1425	    packed_len_long != packed_len) {
1426		free(packed);
1427		free(compressed);
1428		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1429		    "resume token is corrupt (decompression failed)"));
1430		return (NULL);
1431	}
1432
1433	/* unpack nvlist */
1434	nvlist_t *nv;
1435	int error = nvlist_unpack(packed, packed_len, &nv, KM_SLEEP);
1436	free(packed);
1437	free(compressed);
1438	if (error != 0) {
1439		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1440		    "resume token is corrupt (nvlist_unpack failed)"));
1441		return (NULL);
1442	}
1443	return (nv);
1444}
1445static enum lzc_send_flags
1446lzc_flags_from_sendflags(const sendflags_t *flags)
1447{
1448	enum lzc_send_flags lzc_flags = 0;
1449	if (flags->largeblock)
1450		lzc_flags |= LZC_SEND_FLAG_LARGE_BLOCK;
1451	if (flags->embed_data)
1452		lzc_flags |= LZC_SEND_FLAG_EMBED_DATA;
1453	if (flags->compress)
1454		lzc_flags |= LZC_SEND_FLAG_COMPRESS;
1455	if (flags->raw)
1456		lzc_flags |= LZC_SEND_FLAG_RAW;
1457	if (flags->saved)
1458		lzc_flags |= LZC_SEND_FLAG_SAVED;
1459	return (lzc_flags);
1460}
1461
1462static int
1463estimate_size(zfs_handle_t *zhp, const char *from, int fd, sendflags_t *flags,
1464    uint64_t resumeobj, uint64_t resumeoff, uint64_t bytes,
1465    const char *redactbook, char *errbuf)
1466{
1467	uint64_t size;
1468	FILE *fout = flags->dryrun ? stdout : stderr;
1469	progress_arg_t pa = { 0 };
1470	int err = 0;
1471	pthread_t ptid;
1472
1473	if (flags->progress) {
1474		pa.pa_zhp = zhp;
1475		pa.pa_fd = fd;
1476		pa.pa_parsable = flags->parsable;
1477		pa.pa_estimate = B_TRUE;
1478		pa.pa_verbosity = flags->verbosity;
1479
1480		err = pthread_create(&ptid, NULL,
1481		    send_progress_thread, &pa);
1482		if (err != 0) {
1483			zfs_error_aux(zhp->zfs_hdl, "%s", strerror(errno));
1484			return (zfs_error(zhp->zfs_hdl,
1485			    EZFS_THREADCREATEFAILED, errbuf));
1486		}
1487	}
1488
1489	err = lzc_send_space_resume_redacted(zhp->zfs_name, from,
1490	    lzc_flags_from_sendflags(flags), resumeobj, resumeoff, bytes,
1491	    redactbook, fd, &size);
1492
1493	if (flags->progress) {
1494		void *status = NULL;
1495		(void) pthread_cancel(ptid);
1496		(void) pthread_join(ptid, &status);
1497		int error = (int)(uintptr_t)status;
1498		if (error != 0 && status != PTHREAD_CANCELED) {
1499			char errbuf[1024];
1500			(void) snprintf(errbuf, sizeof (errbuf),
1501			    dgettext(TEXT_DOMAIN, "progress thread exited "
1502			    "nonzero"));
1503			return (zfs_standard_error(zhp->zfs_hdl, error,
1504			    errbuf));
1505		}
1506	}
1507
1508	if (err != 0) {
1509		zfs_error_aux(zhp->zfs_hdl, "%s", strerror(err));
1510		return (zfs_error(zhp->zfs_hdl, EZFS_BADBACKUP,
1511		    errbuf));
1512	}
1513	send_print_verbose(fout, zhp->zfs_name, from, size,
1514	    flags->parsable);
1515
1516	if (flags->parsable) {
1517		(void) fprintf(fout, "size\t%llu\n", (longlong_t)size);
1518	} else {
1519		char buf[16];
1520		zfs_nicenum(size, buf, sizeof (buf));
1521		(void) fprintf(fout, dgettext(TEXT_DOMAIN,
1522		    "total estimated size is %s\n"), buf);
1523	}
1524	return (0);
1525}
1526
1527static boolean_t
1528redact_snaps_contains(const uint64_t *snaps, uint64_t num_snaps, uint64_t guid)
1529{
1530	for (int i = 0; i < num_snaps; i++) {
1531		if (snaps[i] == guid)
1532			return (B_TRUE);
1533	}
1534	return (B_FALSE);
1535}
1536
1537static boolean_t
1538redact_snaps_equal(const uint64_t *snaps1, uint64_t num_snaps1,
1539    const uint64_t *snaps2, uint64_t num_snaps2)
1540{
1541	if (num_snaps1 != num_snaps2)
1542		return (B_FALSE);
1543	for (int i = 0; i < num_snaps1; i++) {
1544		if (!redact_snaps_contains(snaps2, num_snaps2, snaps1[i]))
1545			return (B_FALSE);
1546	}
1547	return (B_TRUE);
1548}
1549
1550/*
1551 * Check that the list of redaction snapshots in the bookmark matches the send
1552 * we're resuming, and return whether or not it's complete.
1553 *
1554 * Note that the caller needs to free the contents of *bookname with free() if
1555 * this function returns successfully.
1556 */
1557static int
1558find_redact_book(libzfs_handle_t *hdl, const char *path,
1559    const uint64_t *redact_snap_guids, int num_redact_snaps,
1560    char **bookname)
1561{
1562	char errbuf[1024];
1563	int error = 0;
1564	nvlist_t *props = fnvlist_alloc();
1565	nvlist_t *bmarks;
1566
1567	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1568	    "cannot resume send"));
1569
1570	fnvlist_add_boolean(props, "redact_complete");
1571	fnvlist_add_boolean(props, zfs_prop_to_name(ZFS_PROP_REDACT_SNAPS));
1572	error = lzc_get_bookmarks(path, props, &bmarks);
1573	fnvlist_free(props);
1574	if (error != 0) {
1575		if (error == ESRCH) {
1576			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1577			    "nonexistent redaction bookmark provided"));
1578		} else if (error == ENOENT) {
1579			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1580			    "dataset to be sent no longer exists"));
1581		} else {
1582			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1583			    "unknown error: %s"), strerror(error));
1584		}
1585		return (zfs_error(hdl, EZFS_BADPROP, errbuf));
1586	}
1587	nvpair_t *pair;
1588	for (pair = nvlist_next_nvpair(bmarks, NULL); pair;
1589	    pair = nvlist_next_nvpair(bmarks, pair)) {
1590
1591		nvlist_t *bmark = fnvpair_value_nvlist(pair);
1592		nvlist_t *vallist = fnvlist_lookup_nvlist(bmark,
1593		    zfs_prop_to_name(ZFS_PROP_REDACT_SNAPS));
1594		uint_t len = 0;
1595		uint64_t *bmarksnaps = fnvlist_lookup_uint64_array(vallist,
1596		    ZPROP_VALUE, &len);
1597		if (redact_snaps_equal(redact_snap_guids,
1598		    num_redact_snaps, bmarksnaps, len)) {
1599			break;
1600		}
1601	}
1602	if (pair == NULL)  {
1603		fnvlist_free(bmarks);
1604		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1605		    "no appropriate redaction bookmark exists"));
1606		return (zfs_error(hdl, EZFS_BADPROP, errbuf));
1607	}
1608	char *name = nvpair_name(pair);
1609	nvlist_t *bmark = fnvpair_value_nvlist(pair);
1610	nvlist_t *vallist = fnvlist_lookup_nvlist(bmark, "redact_complete");
1611	boolean_t complete = fnvlist_lookup_boolean_value(vallist,
1612	    ZPROP_VALUE);
1613	if (!complete) {
1614		fnvlist_free(bmarks);
1615		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1616		    "incomplete redaction bookmark provided"));
1617		return (zfs_error(hdl, EZFS_BADPROP, errbuf));
1618	}
1619	*bookname = strndup(name, ZFS_MAX_DATASET_NAME_LEN);
1620	ASSERT3P(*bookname, !=, NULL);
1621	fnvlist_free(bmarks);
1622	return (0);
1623}
1624
1625static int
1626zfs_send_resume_impl(libzfs_handle_t *hdl, sendflags_t *flags, int outfd,
1627    nvlist_t *resume_nvl)
1628{
1629	char errbuf[1024];
1630	char *toname;
1631	char *fromname = NULL;
1632	uint64_t resumeobj, resumeoff, toguid, fromguid, bytes;
1633	zfs_handle_t *zhp;
1634	int error = 0;
1635	char name[ZFS_MAX_DATASET_NAME_LEN];
1636	enum lzc_send_flags lzc_flags = 0;
1637	FILE *fout = (flags->verbosity > 0 && flags->dryrun) ? stdout : stderr;
1638	uint64_t *redact_snap_guids = NULL;
1639	int num_redact_snaps = 0;
1640	char *redact_book = NULL;
1641
1642	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1643	    "cannot resume send"));
1644
1645	if (flags->verbosity != 0) {
1646		(void) fprintf(fout, dgettext(TEXT_DOMAIN,
1647		    "resume token contents:\n"));
1648		nvlist_print(fout, resume_nvl);
1649	}
1650
1651	if (nvlist_lookup_string(resume_nvl, "toname", &toname) != 0 ||
1652	    nvlist_lookup_uint64(resume_nvl, "object", &resumeobj) != 0 ||
1653	    nvlist_lookup_uint64(resume_nvl, "offset", &resumeoff) != 0 ||
1654	    nvlist_lookup_uint64(resume_nvl, "bytes", &bytes) != 0 ||
1655	    nvlist_lookup_uint64(resume_nvl, "toguid", &toguid) != 0) {
1656		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1657		    "resume token is corrupt"));
1658		return (zfs_error(hdl, EZFS_FAULT, errbuf));
1659	}
1660	fromguid = 0;
1661	(void) nvlist_lookup_uint64(resume_nvl, "fromguid", &fromguid);
1662
1663	if (flags->largeblock || nvlist_exists(resume_nvl, "largeblockok"))
1664		lzc_flags |= LZC_SEND_FLAG_LARGE_BLOCK;
1665	if (flags->embed_data || nvlist_exists(resume_nvl, "embedok"))
1666		lzc_flags |= LZC_SEND_FLAG_EMBED_DATA;
1667	if (flags->compress || nvlist_exists(resume_nvl, "compressok"))
1668		lzc_flags |= LZC_SEND_FLAG_COMPRESS;
1669	if (flags->raw || nvlist_exists(resume_nvl, "rawok"))
1670		lzc_flags |= LZC_SEND_FLAG_RAW;
1671	if (flags->saved || nvlist_exists(resume_nvl, "savedok"))
1672		lzc_flags |= LZC_SEND_FLAG_SAVED;
1673
1674	if (flags->saved) {
1675		(void) strcpy(name, toname);
1676	} else {
1677		error = guid_to_name(hdl, toname, toguid, B_FALSE, name);
1678		if (error != 0) {
1679			if (zfs_dataset_exists(hdl, toname, ZFS_TYPE_DATASET)) {
1680				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1681				    "'%s' is no longer the same snapshot "
1682				    "used in the initial send"), toname);
1683			} else {
1684				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1685				    "'%s' used in the initial send no "
1686				    "longer exists"), toname);
1687			}
1688			return (zfs_error(hdl, EZFS_BADPATH, errbuf));
1689		}
1690	}
1691
1692	zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
1693	if (zhp == NULL) {
1694		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1695		    "unable to access '%s'"), name);
1696		return (zfs_error(hdl, EZFS_BADPATH, errbuf));
1697	}
1698
1699	if (nvlist_lookup_uint64_array(resume_nvl, "book_redact_snaps",
1700	    &redact_snap_guids, (uint_t *)&num_redact_snaps) != 0) {
1701		num_redact_snaps = -1;
1702	}
1703
1704	if (fromguid != 0) {
1705		if (guid_to_name_redact_snaps(hdl, toname, fromguid, B_TRUE,
1706		    redact_snap_guids, num_redact_snaps, name) != 0) {
1707			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1708			    "incremental source %#llx no longer exists"),
1709			    (longlong_t)fromguid);
1710			return (zfs_error(hdl, EZFS_BADPATH, errbuf));
1711		}
1712		fromname = name;
1713	}
1714
1715	redact_snap_guids = NULL;
1716
1717	if (nvlist_lookup_uint64_array(resume_nvl,
1718	    zfs_prop_to_name(ZFS_PROP_REDACT_SNAPS), &redact_snap_guids,
1719	    (uint_t *)&num_redact_snaps) == 0) {
1720		char path[ZFS_MAX_DATASET_NAME_LEN];
1721
1722		(void) strlcpy(path, toname, sizeof (path));
1723		char *at = strchr(path, '@');
1724		ASSERT3P(at, !=, NULL);
1725
1726		*at = '\0';
1727
1728		if ((error = find_redact_book(hdl, path, redact_snap_guids,
1729		    num_redact_snaps, &redact_book)) != 0) {
1730			return (error);
1731		}
1732	}
1733
1734	if (flags->verbosity != 0) {
1735		/*
1736		 * Some of these may have come from the resume token, set them
1737		 * here for size estimate purposes.
1738		 */
1739		sendflags_t tmpflags = *flags;
1740		if (lzc_flags & LZC_SEND_FLAG_LARGE_BLOCK)
1741			tmpflags.largeblock = B_TRUE;
1742		if (lzc_flags & LZC_SEND_FLAG_COMPRESS)
1743			tmpflags.compress = B_TRUE;
1744		if (lzc_flags & LZC_SEND_FLAG_EMBED_DATA)
1745			tmpflags.embed_data = B_TRUE;
1746		error = estimate_size(zhp, fromname, outfd, &tmpflags,
1747		    resumeobj, resumeoff, bytes, redact_book, errbuf);
1748	}
1749
1750	if (!flags->dryrun) {
1751		progress_arg_t pa = { 0 };
1752		pthread_t tid;
1753		/*
1754		 * If progress reporting is requested, spawn a new thread to
1755		 * poll ZFS_IOC_SEND_PROGRESS at a regular interval.
1756		 */
1757		if (flags->progress) {
1758			pa.pa_zhp = zhp;
1759			pa.pa_fd = outfd;
1760			pa.pa_parsable = flags->parsable;
1761			pa.pa_estimate = B_FALSE;
1762			pa.pa_verbosity = flags->verbosity;
1763
1764			error = pthread_create(&tid, NULL,
1765			    send_progress_thread, &pa);
1766			if (error != 0) {
1767				if (redact_book != NULL)
1768					free(redact_book);
1769				zfs_close(zhp);
1770				return (error);
1771			}
1772		}
1773
1774		error = lzc_send_resume_redacted(zhp->zfs_name, fromname, outfd,
1775		    lzc_flags, resumeobj, resumeoff, redact_book);
1776		if (redact_book != NULL)
1777			free(redact_book);
1778
1779		if (flags->progress) {
1780			void *status = NULL;
1781			(void) pthread_cancel(tid);
1782			(void) pthread_join(tid, &status);
1783			int error = (int)(uintptr_t)status;
1784			if (error != 0 && status != PTHREAD_CANCELED) {
1785				char errbuf[1024];
1786				(void) snprintf(errbuf, sizeof (errbuf),
1787				    dgettext(TEXT_DOMAIN,
1788				    "progress thread exited nonzero"));
1789				return (zfs_standard_error(hdl, error, errbuf));
1790			}
1791		}
1792
1793		char errbuf[1024];
1794		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1795		    "warning: cannot send '%s'"), zhp->zfs_name);
1796
1797		zfs_close(zhp);
1798
1799		switch (error) {
1800		case 0:
1801			return (0);
1802		case EACCES:
1803			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1804			    "source key must be loaded"));
1805			return (zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf));
1806		case ESRCH:
1807			if (lzc_exists(zhp->zfs_name)) {
1808				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1809				    "incremental source could not be found"));
1810			}
1811			return (zfs_error(hdl, EZFS_NOENT, errbuf));
1812
1813		case EXDEV:
1814		case ENOENT:
1815		case EDQUOT:
1816		case EFBIG:
1817		case EIO:
1818		case ENOLINK:
1819		case ENOSPC:
1820		case ENOSTR:
1821		case ENXIO:
1822		case EPIPE:
1823		case ERANGE:
1824		case EFAULT:
1825		case EROFS:
1826			zfs_error_aux(hdl, "%s", strerror(errno));
1827			return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
1828
1829		default:
1830			return (zfs_standard_error(hdl, errno, errbuf));
1831		}
1832	} else {
1833		if (redact_book != NULL)
1834			free(redact_book);
1835	}
1836
1837	zfs_close(zhp);
1838
1839	return (error);
1840}
1841
1842int
1843zfs_send_resume(libzfs_handle_t *hdl, sendflags_t *flags, int outfd,
1844    const char *resume_token)
1845{
1846	int ret;
1847	char errbuf[1024];
1848	nvlist_t *resume_nvl;
1849
1850	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1851	    "cannot resume send"));
1852
1853	resume_nvl = zfs_send_resume_token_to_nvlist(hdl, resume_token);
1854	if (resume_nvl == NULL) {
1855		/*
1856		 * zfs_error_aux has already been set by
1857		 * zfs_send_resume_token_to_nvlist()
1858		 */
1859		return (zfs_error(hdl, EZFS_FAULT, errbuf));
1860	}
1861
1862	ret = zfs_send_resume_impl(hdl, flags, outfd, resume_nvl);
1863	fnvlist_free(resume_nvl);
1864
1865	return (ret);
1866}
1867
1868int
1869zfs_send_saved(zfs_handle_t *zhp, sendflags_t *flags, int outfd,
1870    const char *resume_token)
1871{
1872	int ret;
1873	libzfs_handle_t *hdl = zhp->zfs_hdl;
1874	nvlist_t *saved_nvl = NULL, *resume_nvl = NULL;
1875	uint64_t saved_guid = 0, resume_guid = 0;
1876	uint64_t obj = 0, off = 0, bytes = 0;
1877	char token_buf[ZFS_MAXPROPLEN];
1878	char errbuf[1024];
1879
1880	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1881	    "saved send failed"));
1882
1883	ret = zfs_prop_get(zhp, ZFS_PROP_RECEIVE_RESUME_TOKEN,
1884	    token_buf, sizeof (token_buf), NULL, NULL, 0, B_TRUE);
1885	if (ret != 0)
1886		goto out;
1887
1888	saved_nvl = zfs_send_resume_token_to_nvlist(hdl, token_buf);
1889	if (saved_nvl == NULL) {
1890		/*
1891		 * zfs_error_aux has already been set by
1892		 * zfs_send_resume_token_to_nvlist()
1893		 */
1894		ret = zfs_error(hdl, EZFS_FAULT, errbuf);
1895		goto out;
1896	}
1897
1898	/*
1899	 * If a resume token is provided we use the object and offset
1900	 * from that instead of the default, which starts from the
1901	 * beginning.
1902	 */
1903	if (resume_token != NULL) {
1904		resume_nvl = zfs_send_resume_token_to_nvlist(hdl,
1905		    resume_token);
1906		if (resume_nvl == NULL) {
1907			ret = zfs_error(hdl, EZFS_FAULT, errbuf);
1908			goto out;
1909		}
1910
1911		if (nvlist_lookup_uint64(resume_nvl, "object", &obj) != 0 ||
1912		    nvlist_lookup_uint64(resume_nvl, "offset", &off) != 0 ||
1913		    nvlist_lookup_uint64(resume_nvl, "bytes", &bytes) != 0 ||
1914		    nvlist_lookup_uint64(resume_nvl, "toguid",
1915		    &resume_guid) != 0) {
1916			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1917			    "provided resume token is corrupt"));
1918			ret = zfs_error(hdl, EZFS_FAULT, errbuf);
1919			goto out;
1920		}
1921
1922		if (nvlist_lookup_uint64(saved_nvl, "toguid",
1923		    &saved_guid)) {
1924			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1925			    "dataset's resume token is corrupt"));
1926			ret = zfs_error(hdl, EZFS_FAULT, errbuf);
1927			goto out;
1928		}
1929
1930		if (resume_guid != saved_guid) {
1931			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1932			    "provided resume token does not match dataset"));
1933			ret = zfs_error(hdl, EZFS_BADBACKUP, errbuf);
1934			goto out;
1935		}
1936	}
1937
1938	(void) nvlist_remove_all(saved_nvl, "object");
1939	fnvlist_add_uint64(saved_nvl, "object", obj);
1940
1941	(void) nvlist_remove_all(saved_nvl, "offset");
1942	fnvlist_add_uint64(saved_nvl, "offset", off);
1943
1944	(void) nvlist_remove_all(saved_nvl, "bytes");
1945	fnvlist_add_uint64(saved_nvl, "bytes", bytes);
1946
1947	(void) nvlist_remove_all(saved_nvl, "toname");
1948	fnvlist_add_string(saved_nvl, "toname", zhp->zfs_name);
1949
1950	ret = zfs_send_resume_impl(hdl, flags, outfd, saved_nvl);
1951
1952out:
1953	fnvlist_free(saved_nvl);
1954	fnvlist_free(resume_nvl);
1955	return (ret);
1956}
1957
1958/*
1959 * This function informs the target system that the recursive send is complete.
1960 * The record is also expected in the case of a send -p.
1961 */
1962static int
1963send_conclusion_record(int fd, zio_cksum_t *zc)
1964{
1965	dmu_replay_record_t drr = { 0 };
1966	drr.drr_type = DRR_END;
1967	if (zc != NULL)
1968		drr.drr_u.drr_end.drr_checksum = *zc;
1969	if (write(fd, &drr, sizeof (drr)) == -1) {
1970		return (errno);
1971	}
1972	return (0);
1973}
1974
1975/*
1976 * This function is responsible for sending the records that contain the
1977 * necessary information for the target system's libzfs to be able to set the
1978 * properties of the filesystem being received, or to be able to prepare for
1979 * a recursive receive.
1980 *
1981 * The "zhp" argument is the handle of the snapshot we are sending
1982 * (the "tosnap").  The "from" argument is the short snapshot name (the part
1983 * after the @) of the incremental source.
1984 */
1985static int
1986send_prelim_records(zfs_handle_t *zhp, const char *from, int fd,
1987    boolean_t gather_props, boolean_t recursive, boolean_t verbose,
1988    boolean_t dryrun, boolean_t raw, boolean_t replicate, boolean_t skipmissing,
1989    boolean_t backup, boolean_t holds, boolean_t props, boolean_t doall,
1990    nvlist_t **fssp, avl_tree_t **fsavlp)
1991{
1992	int err = 0;
1993	char *packbuf = NULL;
1994	size_t buflen = 0;
1995	zio_cksum_t zc = { {0} };
1996	int featureflags = 0;
1997	/* name of filesystem/volume that contains snapshot we are sending */
1998	char tofs[ZFS_MAX_DATASET_NAME_LEN];
1999	/* short name of snap we are sending */
2000	char *tosnap = "";
2001
2002	char errbuf[1024];
2003	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2004	    "warning: cannot send '%s'"), zhp->zfs_name);
2005	if (zhp->zfs_type == ZFS_TYPE_FILESYSTEM && zfs_prop_get_int(zhp,
2006	    ZFS_PROP_VERSION) >= ZPL_VERSION_SA) {
2007		featureflags |= DMU_BACKUP_FEATURE_SA_SPILL;
2008	}
2009
2010	if (holds)
2011		featureflags |= DMU_BACKUP_FEATURE_HOLDS;
2012
2013	(void) strlcpy(tofs, zhp->zfs_name, ZFS_MAX_DATASET_NAME_LEN);
2014	char *at = strchr(tofs, '@');
2015	if (at != NULL) {
2016		*at = '\0';
2017		tosnap = at + 1;
2018	}
2019
2020	if (gather_props) {
2021		nvlist_t *hdrnv = fnvlist_alloc();
2022		nvlist_t *fss = NULL;
2023
2024		if (from != NULL)
2025			fnvlist_add_string(hdrnv, "fromsnap", from);
2026		fnvlist_add_string(hdrnv, "tosnap", tosnap);
2027		if (!recursive)
2028			fnvlist_add_boolean(hdrnv, "not_recursive");
2029
2030		if (raw) {
2031			fnvlist_add_boolean(hdrnv, "raw");
2032		}
2033
2034		if ((err = gather_nvlist(zhp->zfs_hdl, tofs,
2035		    from, tosnap, recursive, raw, doall, replicate, skipmissing,
2036		    verbose, backup, holds, props, &fss, fsavlp)) != 0) {
2037			return (zfs_error(zhp->zfs_hdl, EZFS_BADBACKUP,
2038			    errbuf));
2039		}
2040		/*
2041		 * Do not allow the size of the properties list to exceed
2042		 * the limit
2043		 */
2044		if ((fnvlist_size(fss) + fnvlist_size(hdrnv)) >
2045		    zhp->zfs_hdl->libzfs_max_nvlist) {
2046			(void) snprintf(errbuf, sizeof (errbuf),
2047			    dgettext(TEXT_DOMAIN, "warning: cannot send '%s': "
2048			    "the size of the list of snapshots and properties "
2049			    "is too large to be received successfully.\n"
2050			    "Select a smaller number of snapshots to send.\n"),
2051			    zhp->zfs_name);
2052			return (zfs_error(zhp->zfs_hdl, EZFS_NOSPC,
2053			    errbuf));
2054		}
2055		fnvlist_add_nvlist(hdrnv, "fss", fss);
2056		VERIFY0(nvlist_pack(hdrnv, &packbuf, &buflen, NV_ENCODE_XDR,
2057		    0));
2058		if (fssp != NULL) {
2059			*fssp = fss;
2060		} else {
2061			fnvlist_free(fss);
2062		}
2063		fnvlist_free(hdrnv);
2064	}
2065
2066	if (!dryrun) {
2067		dmu_replay_record_t drr = { 0 };
2068		/* write first begin record */
2069		drr.drr_type = DRR_BEGIN;
2070		drr.drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
2071		DMU_SET_STREAM_HDRTYPE(drr.drr_u.drr_begin.
2072		    drr_versioninfo, DMU_COMPOUNDSTREAM);
2073		DMU_SET_FEATUREFLAGS(drr.drr_u.drr_begin.
2074		    drr_versioninfo, featureflags);
2075		if (snprintf(drr.drr_u.drr_begin.drr_toname,
2076		    sizeof (drr.drr_u.drr_begin.drr_toname), "%s@%s", tofs,
2077		    tosnap) >= sizeof (drr.drr_u.drr_begin.drr_toname)) {
2078			return (zfs_error(zhp->zfs_hdl, EZFS_BADBACKUP,
2079			    errbuf));
2080		}
2081		drr.drr_payloadlen = buflen;
2082
2083		err = dump_record(&drr, packbuf, buflen, &zc, fd);
2084		free(packbuf);
2085		if (err != 0) {
2086			zfs_error_aux(zhp->zfs_hdl, "%s", strerror(err));
2087			return (zfs_error(zhp->zfs_hdl, EZFS_BADBACKUP,
2088			    errbuf));
2089		}
2090		err = send_conclusion_record(fd, &zc);
2091		if (err != 0) {
2092			zfs_error_aux(zhp->zfs_hdl, "%s", strerror(err));
2093			return (zfs_error(zhp->zfs_hdl, EZFS_BADBACKUP,
2094			    errbuf));
2095		}
2096	}
2097	return (0);
2098}
2099
2100/*
2101 * Generate a send stream.  The "zhp" argument is the filesystem/volume
2102 * that contains the snapshot to send.  The "fromsnap" argument is the
2103 * short name (the part after the '@') of the snapshot that is the
2104 * incremental source to send from (if non-NULL).  The "tosnap" argument
2105 * is the short name of the snapshot to send.
2106 *
2107 * The content of the send stream is the snapshot identified by
2108 * 'tosnap'.  Incremental streams are requested in two ways:
2109 *     - from the snapshot identified by "fromsnap" (if non-null) or
2110 *     - from the origin of the dataset identified by zhp, which must
2111 *	 be a clone.  In this case, "fromsnap" is null and "fromorigin"
2112 *	 is TRUE.
2113 *
2114 * The send stream is recursive (i.e. dumps a hierarchy of snapshots) and
2115 * uses a special header (with a hdrtype field of DMU_COMPOUNDSTREAM)
2116 * if "replicate" is set.  If "doall" is set, dump all the intermediate
2117 * snapshots. The DMU_COMPOUNDSTREAM header is used in the "doall"
2118 * case too. If "props" is set, send properties.
2119 */
2120int
2121zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
2122    sendflags_t *flags, int outfd, snapfilter_cb_t filter_func,
2123    void *cb_arg, nvlist_t **debugnvp)
2124{
2125	char errbuf[1024];
2126	send_dump_data_t sdd = { 0 };
2127	int err = 0;
2128	nvlist_t *fss = NULL;
2129	avl_tree_t *fsavl = NULL;
2130	static uint64_t holdseq;
2131	int spa_version;
2132	int featureflags = 0;
2133	FILE *fout;
2134
2135	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2136	    "cannot send '%s'"), zhp->zfs_name);
2137
2138	if (fromsnap && fromsnap[0] == '\0') {
2139		zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
2140		    "zero-length incremental source"));
2141		return (zfs_error(zhp->zfs_hdl, EZFS_NOENT, errbuf));
2142	}
2143
2144	if (zhp->zfs_type == ZFS_TYPE_FILESYSTEM) {
2145		uint64_t version;
2146		version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION);
2147		if (version >= ZPL_VERSION_SA) {
2148			featureflags |= DMU_BACKUP_FEATURE_SA_SPILL;
2149		}
2150	}
2151
2152	if (flags->holds)
2153		featureflags |= DMU_BACKUP_FEATURE_HOLDS;
2154
2155	if (flags->replicate || flags->doall || flags->props ||
2156	    flags->holds || flags->backup) {
2157		char full_tosnap_name[ZFS_MAX_DATASET_NAME_LEN];
2158		if (snprintf(full_tosnap_name, sizeof (full_tosnap_name),
2159		    "%s@%s", zhp->zfs_name, tosnap) >=
2160		    sizeof (full_tosnap_name)) {
2161			err = EINVAL;
2162			goto stderr_out;
2163		}
2164		zfs_handle_t *tosnap = zfs_open(zhp->zfs_hdl,
2165		    full_tosnap_name, ZFS_TYPE_SNAPSHOT);
2166		if (tosnap == NULL) {
2167			err = -1;
2168			goto err_out;
2169		}
2170		err = send_prelim_records(tosnap, fromsnap, outfd,
2171		    flags->replicate || flags->props || flags->holds,
2172		    flags->replicate, flags->verbosity > 0, flags->dryrun,
2173		    flags->raw, flags->replicate, flags->skipmissing,
2174		    flags->backup, flags->holds, flags->props, flags->doall,
2175		    &fss, &fsavl);
2176		zfs_close(tosnap);
2177		if (err != 0)
2178			goto err_out;
2179	}
2180
2181	/* dump each stream */
2182	sdd.fromsnap = fromsnap;
2183	sdd.tosnap = tosnap;
2184	sdd.outfd = outfd;
2185	sdd.replicate = flags->replicate;
2186	sdd.doall = flags->doall;
2187	sdd.fromorigin = flags->fromorigin;
2188	sdd.fss = fss;
2189	sdd.fsavl = fsavl;
2190	sdd.verbosity = flags->verbosity;
2191	sdd.parsable = flags->parsable;
2192	sdd.progress = flags->progress;
2193	sdd.dryrun = flags->dryrun;
2194	sdd.large_block = flags->largeblock;
2195	sdd.embed_data = flags->embed_data;
2196	sdd.compress = flags->compress;
2197	sdd.raw = flags->raw;
2198	sdd.holds = flags->holds;
2199	sdd.filter_cb = filter_func;
2200	sdd.filter_cb_arg = cb_arg;
2201	if (debugnvp)
2202		sdd.debugnv = *debugnvp;
2203	if (sdd.verbosity != 0 && sdd.dryrun)
2204		sdd.std_out = B_TRUE;
2205	fout = sdd.std_out ? stdout : stderr;
2206
2207	/*
2208	 * Some flags require that we place user holds on the datasets that are
2209	 * being sent so they don't get destroyed during the send. We can skip
2210	 * this step if the pool is imported read-only since the datasets cannot
2211	 * be destroyed.
2212	 */
2213	if (!flags->dryrun && !zpool_get_prop_int(zfs_get_pool_handle(zhp),
2214	    ZPOOL_PROP_READONLY, NULL) &&
2215	    zfs_spa_version(zhp, &spa_version) == 0 &&
2216	    spa_version >= SPA_VERSION_USERREFS &&
2217	    (flags->doall || flags->replicate)) {
2218		++holdseq;
2219		(void) snprintf(sdd.holdtag, sizeof (sdd.holdtag),
2220		    ".send-%d-%llu", getpid(), (u_longlong_t)holdseq);
2221		sdd.cleanup_fd = open(ZFS_DEV, O_RDWR | O_CLOEXEC);
2222		if (sdd.cleanup_fd < 0) {
2223			err = errno;
2224			goto stderr_out;
2225		}
2226		sdd.snapholds = fnvlist_alloc();
2227	} else {
2228		sdd.cleanup_fd = -1;
2229		sdd.snapholds = NULL;
2230	}
2231
2232	if (flags->verbosity != 0 || sdd.snapholds != NULL) {
2233		/*
2234		 * Do a verbose no-op dry run to get all the verbose output
2235		 * or to gather snapshot hold's before generating any data,
2236		 * then do a non-verbose real run to generate the streams.
2237		 */
2238		sdd.dryrun = B_TRUE;
2239		err = dump_filesystems(zhp, &sdd);
2240
2241		if (err != 0)
2242			goto stderr_out;
2243
2244		if (flags->verbosity != 0) {
2245			if (flags->parsable) {
2246				(void) fprintf(fout, "size\t%llu\n",
2247				    (longlong_t)sdd.size);
2248			} else {
2249				char buf[16];
2250				zfs_nicebytes(sdd.size, buf, sizeof (buf));
2251				(void) fprintf(fout, dgettext(TEXT_DOMAIN,
2252				    "total estimated size is %s\n"), buf);
2253			}
2254		}
2255
2256		/* Ensure no snaps found is treated as an error. */
2257		if (!sdd.seento) {
2258			err = ENOENT;
2259			goto err_out;
2260		}
2261
2262		/* Skip the second run if dryrun was requested. */
2263		if (flags->dryrun)
2264			goto err_out;
2265
2266		if (sdd.snapholds != NULL) {
2267			err = zfs_hold_nvl(zhp, sdd.cleanup_fd, sdd.snapholds);
2268			if (err != 0)
2269				goto stderr_out;
2270
2271			fnvlist_free(sdd.snapholds);
2272			sdd.snapholds = NULL;
2273		}
2274
2275		sdd.dryrun = B_FALSE;
2276		sdd.verbosity = 0;
2277	}
2278
2279	err = dump_filesystems(zhp, &sdd);
2280	fsavl_destroy(fsavl);
2281	fnvlist_free(fss);
2282
2283	/* Ensure no snaps found is treated as an error. */
2284	if (err == 0 && !sdd.seento)
2285		err = ENOENT;
2286
2287	if (sdd.cleanup_fd != -1) {
2288		VERIFY(0 == close(sdd.cleanup_fd));
2289		sdd.cleanup_fd = -1;
2290	}
2291
2292	if (!flags->dryrun && (flags->replicate || flags->doall ||
2293	    flags->props || flags->backup || flags->holds)) {
2294		/*
2295		 * write final end record.  NB: want to do this even if
2296		 * there was some error, because it might not be totally
2297		 * failed.
2298		 */
2299		err = send_conclusion_record(outfd, NULL);
2300		if (err != 0)
2301			return (zfs_standard_error(zhp->zfs_hdl, err, errbuf));
2302	}
2303
2304	return (err || sdd.err);
2305
2306stderr_out:
2307	err = zfs_standard_error(zhp->zfs_hdl, err, errbuf);
2308err_out:
2309	fsavl_destroy(fsavl);
2310	fnvlist_free(fss);
2311	fnvlist_free(sdd.snapholds);
2312
2313	if (sdd.cleanup_fd != -1)
2314		VERIFY(0 == close(sdd.cleanup_fd));
2315	return (err);
2316}
2317
2318static zfs_handle_t *
2319name_to_dir_handle(libzfs_handle_t *hdl, const char *snapname)
2320{
2321	char dirname[ZFS_MAX_DATASET_NAME_LEN];
2322	(void) strlcpy(dirname, snapname, ZFS_MAX_DATASET_NAME_LEN);
2323	char *c = strchr(dirname, '@');
2324	if (c != NULL)
2325		*c = '\0';
2326	return (zfs_open(hdl, dirname, ZFS_TYPE_DATASET));
2327}
2328
2329/*
2330 * Returns B_TRUE if earlier is an earlier snapshot in later's timeline; either
2331 * an earlier snapshot in the same filesystem, or a snapshot before later's
2332 * origin, or it's origin's origin, etc.
2333 */
2334static boolean_t
2335snapshot_is_before(zfs_handle_t *earlier, zfs_handle_t *later)
2336{
2337	boolean_t ret;
2338	uint64_t later_txg =
2339	    (later->zfs_type == ZFS_TYPE_FILESYSTEM ||
2340	    later->zfs_type == ZFS_TYPE_VOLUME ?
2341	    UINT64_MAX : zfs_prop_get_int(later, ZFS_PROP_CREATETXG));
2342	uint64_t earlier_txg = zfs_prop_get_int(earlier, ZFS_PROP_CREATETXG);
2343
2344	if (earlier_txg >= later_txg)
2345		return (B_FALSE);
2346
2347	zfs_handle_t *earlier_dir = name_to_dir_handle(earlier->zfs_hdl,
2348	    earlier->zfs_name);
2349	zfs_handle_t *later_dir = name_to_dir_handle(later->zfs_hdl,
2350	    later->zfs_name);
2351
2352	if (strcmp(earlier_dir->zfs_name, later_dir->zfs_name) == 0) {
2353		zfs_close(earlier_dir);
2354		zfs_close(later_dir);
2355		return (B_TRUE);
2356	}
2357
2358	char clonename[ZFS_MAX_DATASET_NAME_LEN];
2359	if (zfs_prop_get(later_dir, ZFS_PROP_ORIGIN, clonename,
2360	    ZFS_MAX_DATASET_NAME_LEN, NULL, NULL, 0, B_TRUE) != 0) {
2361		zfs_close(earlier_dir);
2362		zfs_close(later_dir);
2363		return (B_FALSE);
2364	}
2365
2366	zfs_handle_t *origin = zfs_open(earlier->zfs_hdl, clonename,
2367	    ZFS_TYPE_DATASET);
2368	uint64_t origin_txg = zfs_prop_get_int(origin, ZFS_PROP_CREATETXG);
2369
2370	/*
2371	 * If "earlier" is exactly the origin, then
2372	 * snapshot_is_before(earlier, origin) will return false (because
2373	 * they're the same).
2374	 */
2375	if (origin_txg == earlier_txg &&
2376	    strcmp(origin->zfs_name, earlier->zfs_name) == 0) {
2377		zfs_close(earlier_dir);
2378		zfs_close(later_dir);
2379		zfs_close(origin);
2380		return (B_TRUE);
2381	}
2382	zfs_close(earlier_dir);
2383	zfs_close(later_dir);
2384
2385	ret = snapshot_is_before(earlier, origin);
2386	zfs_close(origin);
2387	return (ret);
2388}
2389
2390/*
2391 * The "zhp" argument is the handle of the dataset to send (typically a
2392 * snapshot).  The "from" argument is the full name of the snapshot or
2393 * bookmark that is the incremental source.
2394 */
2395int
2396zfs_send_one(zfs_handle_t *zhp, const char *from, int fd, sendflags_t *flags,
2397    const char *redactbook)
2398{
2399	int err;
2400	libzfs_handle_t *hdl = zhp->zfs_hdl;
2401	char *name = zhp->zfs_name;
2402	int orig_fd = fd;
2403	pthread_t ptid;
2404	progress_arg_t pa = { 0 };
2405
2406	char errbuf[1024];
2407	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2408	    "warning: cannot send '%s'"), name);
2409
2410	if (from != NULL && strchr(from, '@')) {
2411		zfs_handle_t *from_zhp = zfs_open(hdl, from,
2412		    ZFS_TYPE_DATASET);
2413		if (from_zhp == NULL)
2414			return (-1);
2415		if (!snapshot_is_before(from_zhp, zhp)) {
2416			zfs_close(from_zhp);
2417			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2418			    "not an earlier snapshot from the same fs"));
2419			return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
2420		}
2421		zfs_close(from_zhp);
2422	}
2423
2424	if (redactbook != NULL) {
2425		char bookname[ZFS_MAX_DATASET_NAME_LEN];
2426		nvlist_t *redact_snaps;
2427		zfs_handle_t *book_zhp;
2428		char *at, *pound;
2429		int dsnamelen;
2430
2431		pound = strchr(redactbook, '#');
2432		if (pound != NULL)
2433			redactbook = pound + 1;
2434		at = strchr(name, '@');
2435		if (at == NULL) {
2436			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2437			    "cannot do a redacted send to a filesystem"));
2438			return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
2439		}
2440		dsnamelen = at - name;
2441		if (snprintf(bookname, sizeof (bookname), "%.*s#%s",
2442		    dsnamelen, name, redactbook)
2443		    >= sizeof (bookname)) {
2444			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2445			    "invalid bookmark name"));
2446			return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
2447		}
2448		book_zhp = zfs_open(hdl, bookname, ZFS_TYPE_BOOKMARK);
2449		if (book_zhp == NULL)
2450			return (-1);
2451		if (nvlist_lookup_nvlist(book_zhp->zfs_props,
2452		    zfs_prop_to_name(ZFS_PROP_REDACT_SNAPS),
2453		    &redact_snaps) != 0 || redact_snaps == NULL) {
2454			zfs_close(book_zhp);
2455			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2456			    "not a redaction bookmark"));
2457			return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
2458		}
2459		zfs_close(book_zhp);
2460	}
2461
2462	/*
2463	 * Send fs properties
2464	 */
2465	if (flags->props || flags->holds || flags->backup) {
2466		/*
2467		 * Note: the header generated by send_prelim_records()
2468		 * assumes that the incremental source is in the same
2469		 * filesystem/volume as the target (which is a requirement
2470		 * when doing "zfs send -R").  But that isn't always the
2471		 * case here (e.g. send from snap in origin, or send from
2472		 * bookmark).  We pass from=NULL, which will omit this
2473		 * information from the prelim records; it isn't used
2474		 * when receiving this type of stream.
2475		 */
2476		err = send_prelim_records(zhp, NULL, fd, B_TRUE, B_FALSE,
2477		    flags->verbosity > 0, flags->dryrun, flags->raw,
2478		    flags->replicate, B_FALSE, flags->backup, flags->holds,
2479		    flags->props, flags->doall, NULL, NULL);
2480		if (err != 0)
2481			return (err);
2482	}
2483
2484	/*
2485	 * Perform size estimate if verbose was specified.
2486	 */
2487	if (flags->verbosity != 0) {
2488		err = estimate_size(zhp, from, fd, flags, 0, 0, 0, redactbook,
2489		    errbuf);
2490		if (err != 0)
2491			return (err);
2492	}
2493
2494	if (flags->dryrun)
2495		return (0);
2496
2497	/*
2498	 * If progress reporting is requested, spawn a new thread to poll
2499	 * ZFS_IOC_SEND_PROGRESS at a regular interval.
2500	 */
2501	if (flags->progress) {
2502		pa.pa_zhp = zhp;
2503		pa.pa_fd = fd;
2504		pa.pa_parsable = flags->parsable;
2505		pa.pa_estimate = B_FALSE;
2506		pa.pa_verbosity = flags->verbosity;
2507
2508		err = pthread_create(&ptid, NULL,
2509		    send_progress_thread, &pa);
2510		if (err != 0) {
2511			zfs_error_aux(zhp->zfs_hdl, "%s", strerror(errno));
2512			return (zfs_error(zhp->zfs_hdl,
2513			    EZFS_THREADCREATEFAILED, errbuf));
2514		}
2515	}
2516
2517	err = lzc_send_redacted(name, from, fd,
2518	    lzc_flags_from_sendflags(flags), redactbook);
2519
2520	if (flags->progress) {
2521		void *status = NULL;
2522		if (err != 0)
2523			(void) pthread_cancel(ptid);
2524		(void) pthread_join(ptid, &status);
2525		int error = (int)(uintptr_t)status;
2526		if (error != 0 && status != PTHREAD_CANCELED)
2527			return (zfs_standard_error_fmt(hdl, error,
2528			    dgettext(TEXT_DOMAIN,
2529			    "progress thread exited nonzero")));
2530	}
2531
2532	if (flags->props || flags->holds || flags->backup) {
2533		/* Write the final end record. */
2534		err = send_conclusion_record(orig_fd, NULL);
2535		if (err != 0)
2536			return (zfs_standard_error(hdl, err, errbuf));
2537	}
2538	if (err != 0) {
2539		switch (errno) {
2540		case EXDEV:
2541			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2542			    "not an earlier snapshot from the same fs"));
2543			return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
2544
2545		case ENOENT:
2546		case ESRCH:
2547			if (lzc_exists(name)) {
2548				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2549				    "incremental source (%s) does not exist"),
2550				    from);
2551			}
2552			return (zfs_error(hdl, EZFS_NOENT, errbuf));
2553
2554		case EACCES:
2555			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2556			    "dataset key must be loaded"));
2557			return (zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf));
2558
2559		case EBUSY:
2560			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2561			    "target is busy; if a filesystem, "
2562			    "it must not be mounted"));
2563			return (zfs_error(hdl, EZFS_BUSY, errbuf));
2564
2565		case EDQUOT:
2566		case EFAULT:
2567		case EFBIG:
2568		case EINVAL:
2569		case EIO:
2570		case ENOLINK:
2571		case ENOSPC:
2572		case ENOSTR:
2573		case ENXIO:
2574		case EPIPE:
2575		case ERANGE:
2576		case EROFS:
2577			zfs_error_aux(hdl, "%s", strerror(errno));
2578			return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
2579
2580		default:
2581			return (zfs_standard_error(hdl, errno, errbuf));
2582		}
2583	}
2584	return (err != 0);
2585}
2586
2587/*
2588 * Routines specific to "zfs recv"
2589 */
2590
2591static int
2592recv_read(libzfs_handle_t *hdl, int fd, void *buf, int ilen,
2593    boolean_t byteswap, zio_cksum_t *zc)
2594{
2595	char *cp = buf;
2596	int rv;
2597	int len = ilen;
2598
2599	do {
2600		rv = read(fd, cp, len);
2601		cp += rv;
2602		len -= rv;
2603	} while (rv > 0);
2604
2605	if (rv < 0 || len != 0) {
2606		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2607		    "failed to read from stream"));
2608		return (zfs_error(hdl, EZFS_BADSTREAM, dgettext(TEXT_DOMAIN,
2609		    "cannot receive")));
2610	}
2611
2612	if (zc) {
2613		if (byteswap)
2614			fletcher_4_incremental_byteswap(buf, ilen, zc);
2615		else
2616			fletcher_4_incremental_native(buf, ilen, zc);
2617	}
2618	return (0);
2619}
2620
2621static int
2622recv_read_nvlist(libzfs_handle_t *hdl, int fd, int len, nvlist_t **nvp,
2623    boolean_t byteswap, zio_cksum_t *zc)
2624{
2625	char *buf;
2626	int err;
2627
2628	buf = zfs_alloc(hdl, len);
2629	if (buf == NULL)
2630		return (ENOMEM);
2631
2632	if (len > hdl->libzfs_max_nvlist) {
2633		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "nvlist too large"));
2634		free(buf);
2635		return (ENOMEM);
2636	}
2637
2638	err = recv_read(hdl, fd, buf, len, byteswap, zc);
2639	if (err != 0) {
2640		free(buf);
2641		return (err);
2642	}
2643
2644	err = nvlist_unpack(buf, len, nvp, 0);
2645	free(buf);
2646	if (err != 0) {
2647		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
2648		    "stream (malformed nvlist)"));
2649		return (EINVAL);
2650	}
2651	return (0);
2652}
2653
2654/*
2655 * Returns the grand origin (origin of origin of origin...) of a given handle.
2656 * If this dataset is not a clone, it simply returns a copy of the original
2657 * handle.
2658 */
2659static zfs_handle_t *
2660recv_open_grand_origin(zfs_handle_t *zhp)
2661{
2662	char origin[ZFS_MAX_DATASET_NAME_LEN];
2663	zprop_source_t src;
2664	zfs_handle_t *ozhp = zfs_handle_dup(zhp);
2665
2666	while (ozhp != NULL) {
2667		if (zfs_prop_get(ozhp, ZFS_PROP_ORIGIN, origin,
2668		    sizeof (origin), &src, NULL, 0, B_FALSE) != 0)
2669			break;
2670
2671		(void) zfs_close(ozhp);
2672		ozhp = zfs_open(zhp->zfs_hdl, origin, ZFS_TYPE_FILESYSTEM);
2673	}
2674
2675	return (ozhp);
2676}
2677
2678static int
2679recv_rename_impl(zfs_handle_t *zhp, const char *name, const char *newname)
2680{
2681	int err;
2682	zfs_handle_t *ozhp = NULL;
2683
2684	/*
2685	 * Attempt to rename the dataset. If it fails with EACCES we have
2686	 * attempted to rename the dataset outside of its encryption root.
2687	 * Force the dataset to become an encryption root and try again.
2688	 */
2689	err = lzc_rename(name, newname);
2690	if (err == EACCES) {
2691		ozhp = recv_open_grand_origin(zhp);
2692		if (ozhp == NULL) {
2693			err = ENOENT;
2694			goto out;
2695		}
2696
2697		err = lzc_change_key(ozhp->zfs_name, DCP_CMD_FORCE_NEW_KEY,
2698		    NULL, NULL, 0);
2699		if (err != 0)
2700			goto out;
2701
2702		err = lzc_rename(name, newname);
2703	}
2704
2705out:
2706	if (ozhp != NULL)
2707		zfs_close(ozhp);
2708	return (err);
2709}
2710
2711static int
2712recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname,
2713    int baselen, char *newname, recvflags_t *flags)
2714{
2715	static int seq;
2716	int err;
2717	prop_changelist_t *clp = NULL;
2718	zfs_handle_t *zhp = NULL;
2719
2720	zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
2721	if (zhp == NULL) {
2722		err = -1;
2723		goto out;
2724	}
2725	clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
2726	    flags->force ? MS_FORCE : 0);
2727	if (clp == NULL) {
2728		err = -1;
2729		goto out;
2730	}
2731	err = changelist_prefix(clp);
2732	if (err)
2733		goto out;
2734
2735	if (tryname) {
2736		(void) strcpy(newname, tryname);
2737		if (flags->verbose) {
2738			(void) printf("attempting rename %s to %s\n",
2739			    name, newname);
2740		}
2741		err = recv_rename_impl(zhp, name, newname);
2742		if (err == 0)
2743			changelist_rename(clp, name, tryname);
2744	} else {
2745		err = ENOENT;
2746	}
2747
2748	if (err != 0 && strncmp(name + baselen, "recv-", 5) != 0) {
2749		seq++;
2750
2751		(void) snprintf(newname, ZFS_MAX_DATASET_NAME_LEN,
2752		    "%.*srecv-%u-%u", baselen, name, getpid(), seq);
2753
2754		if (flags->verbose) {
2755			(void) printf("failed - trying rename %s to %s\n",
2756			    name, newname);
2757		}
2758		err = recv_rename_impl(zhp, name, newname);
2759		if (err == 0)
2760			changelist_rename(clp, name, newname);
2761		if (err && flags->verbose) {
2762			(void) printf("failed (%u) - "
2763			    "will try again on next pass\n", errno);
2764		}
2765		err = EAGAIN;
2766	} else if (flags->verbose) {
2767		if (err == 0)
2768			(void) printf("success\n");
2769		else
2770			(void) printf("failed (%u)\n", errno);
2771	}
2772
2773	(void) changelist_postfix(clp);
2774
2775out:
2776	if (clp != NULL)
2777		changelist_free(clp);
2778	if (zhp != NULL)
2779		zfs_close(zhp);
2780
2781	return (err);
2782}
2783
2784static int
2785recv_promote(libzfs_handle_t *hdl, const char *fsname,
2786    const char *origin_fsname, recvflags_t *flags)
2787{
2788	int err;
2789	zfs_cmd_t zc = {"\0"};
2790	zfs_handle_t *zhp = NULL, *ozhp = NULL;
2791
2792	if (flags->verbose)
2793		(void) printf("promoting %s\n", fsname);
2794
2795	(void) strlcpy(zc.zc_value, origin_fsname, sizeof (zc.zc_value));
2796	(void) strlcpy(zc.zc_name, fsname, sizeof (zc.zc_name));
2797
2798	/*
2799	 * Attempt to promote the dataset. If it fails with EACCES the
2800	 * promotion would cause this dataset to leave its encryption root.
2801	 * Force the origin to become an encryption root and try again.
2802	 */
2803	err = zfs_ioctl(hdl, ZFS_IOC_PROMOTE, &zc);
2804	if (err == EACCES) {
2805		zhp = zfs_open(hdl, fsname, ZFS_TYPE_DATASET);
2806		if (zhp == NULL) {
2807			err = -1;
2808			goto out;
2809		}
2810
2811		ozhp = recv_open_grand_origin(zhp);
2812		if (ozhp == NULL) {
2813			err = -1;
2814			goto out;
2815		}
2816
2817		err = lzc_change_key(ozhp->zfs_name, DCP_CMD_FORCE_NEW_KEY,
2818		    NULL, NULL, 0);
2819		if (err != 0)
2820			goto out;
2821
2822		err = zfs_ioctl(hdl, ZFS_IOC_PROMOTE, &zc);
2823	}
2824
2825out:
2826	if (zhp != NULL)
2827		zfs_close(zhp);
2828	if (ozhp != NULL)
2829		zfs_close(ozhp);
2830
2831	return (err);
2832}
2833
2834static int
2835recv_destroy(libzfs_handle_t *hdl, const char *name, int baselen,
2836    char *newname, recvflags_t *flags)
2837{
2838	int err = 0;
2839	prop_changelist_t *clp;
2840	zfs_handle_t *zhp;
2841	boolean_t defer = B_FALSE;
2842	int spa_version;
2843
2844	zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
2845	if (zhp == NULL)
2846		return (-1);
2847	clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
2848	    flags->force ? MS_FORCE : 0);
2849	if (zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT &&
2850	    zfs_spa_version(zhp, &spa_version) == 0 &&
2851	    spa_version >= SPA_VERSION_USERREFS)
2852		defer = B_TRUE;
2853	zfs_close(zhp);
2854	if (clp == NULL)
2855		return (-1);
2856	err = changelist_prefix(clp);
2857	if (err)
2858		return (err);
2859
2860	if (flags->verbose)
2861		(void) printf("attempting destroy %s\n", name);
2862	if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT) {
2863		nvlist_t *nv = fnvlist_alloc();
2864		fnvlist_add_boolean(nv, name);
2865		err = lzc_destroy_snaps(nv, defer, NULL);
2866		fnvlist_free(nv);
2867	} else {
2868		err = lzc_destroy(name);
2869	}
2870	if (err == 0) {
2871		if (flags->verbose)
2872			(void) printf("success\n");
2873		changelist_remove(clp, name);
2874	}
2875
2876	(void) changelist_postfix(clp);
2877	changelist_free(clp);
2878
2879	/*
2880	 * Deferred destroy might destroy the snapshot or only mark it to be
2881	 * destroyed later, and it returns success in either case.
2882	 */
2883	if (err != 0 || (defer && zfs_dataset_exists(hdl, name,
2884	    ZFS_TYPE_SNAPSHOT))) {
2885		err = recv_rename(hdl, name, NULL, baselen, newname, flags);
2886	}
2887
2888	return (err);
2889}
2890
2891typedef struct guid_to_name_data {
2892	uint64_t guid;
2893	boolean_t bookmark_ok;
2894	char *name;
2895	char *skip;
2896	uint64_t *redact_snap_guids;
2897	uint64_t num_redact_snaps;
2898} guid_to_name_data_t;
2899
2900static boolean_t
2901redact_snaps_match(zfs_handle_t *zhp, guid_to_name_data_t *gtnd)
2902{
2903	uint64_t *bmark_snaps;
2904	uint_t bmark_num_snaps;
2905	nvlist_t *nvl;
2906	if (zhp->zfs_type != ZFS_TYPE_BOOKMARK)
2907		return (B_FALSE);
2908
2909	nvl = fnvlist_lookup_nvlist(zhp->zfs_props,
2910	    zfs_prop_to_name(ZFS_PROP_REDACT_SNAPS));
2911	bmark_snaps = fnvlist_lookup_uint64_array(nvl, ZPROP_VALUE,
2912	    &bmark_num_snaps);
2913	if (bmark_num_snaps != gtnd->num_redact_snaps)
2914		return (B_FALSE);
2915	int i = 0;
2916	for (; i < bmark_num_snaps; i++) {
2917		int j = 0;
2918		for (; j < bmark_num_snaps; j++) {
2919			if (bmark_snaps[i] == gtnd->redact_snap_guids[j])
2920				break;
2921		}
2922		if (j == bmark_num_snaps)
2923			break;
2924	}
2925	return (i == bmark_num_snaps);
2926}
2927
2928static int
2929guid_to_name_cb(zfs_handle_t *zhp, void *arg)
2930{
2931	guid_to_name_data_t *gtnd = arg;
2932	const char *slash;
2933	int err;
2934
2935	if (gtnd->skip != NULL &&
2936	    (slash = strrchr(zhp->zfs_name, '/')) != NULL &&
2937	    strcmp(slash + 1, gtnd->skip) == 0) {
2938		zfs_close(zhp);
2939		return (0);
2940	}
2941
2942	if (zfs_prop_get_int(zhp, ZFS_PROP_GUID) == gtnd->guid &&
2943	    (gtnd->num_redact_snaps == -1 || redact_snaps_match(zhp, gtnd))) {
2944		(void) strcpy(gtnd->name, zhp->zfs_name);
2945		zfs_close(zhp);
2946		return (EEXIST);
2947	}
2948
2949	err = zfs_iter_children(zhp, guid_to_name_cb, gtnd);
2950	if (err != EEXIST && gtnd->bookmark_ok)
2951		err = zfs_iter_bookmarks(zhp, guid_to_name_cb, gtnd);
2952	zfs_close(zhp);
2953	return (err);
2954}
2955
2956/*
2957 * Attempt to find the local dataset associated with this guid.  In the case of
2958 * multiple matches, we attempt to find the "best" match by searching
2959 * progressively larger portions of the hierarchy.  This allows one to send a
2960 * tree of datasets individually and guarantee that we will find the source
2961 * guid within that hierarchy, even if there are multiple matches elsewhere.
2962 *
2963 * If num_redact_snaps is not -1, we attempt to find a redaction bookmark with
2964 * the specified number of redaction snapshots.  If num_redact_snaps isn't 0 or
2965 * -1, then redact_snap_guids will be an array of the guids of the snapshots the
2966 * redaction bookmark was created with.  If num_redact_snaps is -1, then we will
2967 * attempt to find a snapshot or bookmark (if bookmark_ok is passed) with the
2968 * given guid.  Note that a redaction bookmark can be returned if
2969 * num_redact_snaps == -1.
2970 */
2971static int
2972guid_to_name_redact_snaps(libzfs_handle_t *hdl, const char *parent,
2973    uint64_t guid, boolean_t bookmark_ok, uint64_t *redact_snap_guids,
2974    uint64_t num_redact_snaps, char *name)
2975{
2976	char pname[ZFS_MAX_DATASET_NAME_LEN];
2977	guid_to_name_data_t gtnd;
2978
2979	gtnd.guid = guid;
2980	gtnd.bookmark_ok = bookmark_ok;
2981	gtnd.name = name;
2982	gtnd.skip = NULL;
2983	gtnd.redact_snap_guids = redact_snap_guids;
2984	gtnd.num_redact_snaps = num_redact_snaps;
2985
2986	/*
2987	 * Search progressively larger portions of the hierarchy, starting
2988	 * with the filesystem specified by 'parent'.  This will
2989	 * select the "most local" version of the origin snapshot in the case
2990	 * that there are multiple matching snapshots in the system.
2991	 */
2992	(void) strlcpy(pname, parent, sizeof (pname));
2993	char *cp = strrchr(pname, '@');
2994	if (cp == NULL)
2995		cp = strchr(pname, '\0');
2996	for (; cp != NULL; cp = strrchr(pname, '/')) {
2997		/* Chop off the last component and open the parent */
2998		*cp = '\0';
2999		zfs_handle_t *zhp = make_dataset_handle(hdl, pname);
3000
3001		if (zhp == NULL)
3002			continue;
3003		int err = guid_to_name_cb(zfs_handle_dup(zhp), &gtnd);
3004		if (err != EEXIST)
3005			err = zfs_iter_children(zhp, guid_to_name_cb, &gtnd);
3006		if (err != EEXIST && bookmark_ok)
3007			err = zfs_iter_bookmarks(zhp, guid_to_name_cb, &gtnd);
3008		zfs_close(zhp);
3009		if (err == EEXIST)
3010			return (0);
3011
3012		/*
3013		 * Remember the last portion of the dataset so we skip it next
3014		 * time through (as we've already searched that portion of the
3015		 * hierarchy).
3016		 */
3017		gtnd.skip = strrchr(pname, '/') + 1;
3018	}
3019
3020	return (ENOENT);
3021}
3022
3023static int
3024guid_to_name(libzfs_handle_t *hdl, const char *parent, uint64_t guid,
3025    boolean_t bookmark_ok, char *name)
3026{
3027	return (guid_to_name_redact_snaps(hdl, parent, guid, bookmark_ok, NULL,
3028	    -1, name));
3029}
3030
3031/*
3032 * Return +1 if guid1 is before guid2, 0 if they are the same, and -1 if
3033 * guid1 is after guid2.
3034 */
3035static int
3036created_before(libzfs_handle_t *hdl, avl_tree_t *avl,
3037    uint64_t guid1, uint64_t guid2)
3038{
3039	nvlist_t *nvfs;
3040	char *fsname = NULL, *snapname = NULL;
3041	char buf[ZFS_MAX_DATASET_NAME_LEN];
3042	int rv;
3043	zfs_handle_t *guid1hdl, *guid2hdl;
3044	uint64_t create1, create2;
3045
3046	if (guid2 == 0)
3047		return (0);
3048	if (guid1 == 0)
3049		return (1);
3050
3051	nvfs = fsavl_find(avl, guid1, &snapname);
3052	fsname = fnvlist_lookup_string(nvfs, "name");
3053	(void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname);
3054	guid1hdl = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT);
3055	if (guid1hdl == NULL)
3056		return (-1);
3057
3058	nvfs = fsavl_find(avl, guid2, &snapname);
3059	fsname = fnvlist_lookup_string(nvfs, "name");
3060	(void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname);
3061	guid2hdl = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT);
3062	if (guid2hdl == NULL) {
3063		zfs_close(guid1hdl);
3064		return (-1);
3065	}
3066
3067	create1 = zfs_prop_get_int(guid1hdl, ZFS_PROP_CREATETXG);
3068	create2 = zfs_prop_get_int(guid2hdl, ZFS_PROP_CREATETXG);
3069
3070	if (create1 < create2)
3071		rv = -1;
3072	else if (create1 > create2)
3073		rv = +1;
3074	else
3075		rv = 0;
3076
3077	zfs_close(guid1hdl);
3078	zfs_close(guid2hdl);
3079
3080	return (rv);
3081}
3082
3083/*
3084 * This function reestablishes the hierarchy of encryption roots after a
3085 * recursive incremental receive has completed. This must be done after the
3086 * second call to recv_incremental_replication() has renamed and promoted all
3087 * sent datasets to their final locations in the dataset hierarchy.
3088 */
3089static int
3090recv_fix_encryption_hierarchy(libzfs_handle_t *hdl, const char *top_zfs,
3091    nvlist_t *stream_nv, avl_tree_t *stream_avl)
3092{
3093	int err;
3094	nvpair_t *fselem = NULL;
3095	nvlist_t *stream_fss;
3096
3097	stream_fss = fnvlist_lookup_nvlist(stream_nv, "fss");
3098
3099	while ((fselem = nvlist_next_nvpair(stream_fss, fselem)) != NULL) {
3100		zfs_handle_t *zhp = NULL;
3101		uint64_t crypt;
3102		nvlist_t *snaps, *props, *stream_nvfs = NULL;
3103		nvpair_t *snapel = NULL;
3104		boolean_t is_encroot, is_clone, stream_encroot;
3105		char *cp;
3106		char *stream_keylocation = NULL;
3107		char keylocation[MAXNAMELEN];
3108		char fsname[ZFS_MAX_DATASET_NAME_LEN];
3109
3110		keylocation[0] = '\0';
3111		stream_nvfs = fnvpair_value_nvlist(fselem);
3112		snaps = fnvlist_lookup_nvlist(stream_nvfs, "snaps");
3113		props = fnvlist_lookup_nvlist(stream_nvfs, "props");
3114		stream_encroot = nvlist_exists(stream_nvfs, "is_encroot");
3115
3116		/* find a snapshot from the stream that exists locally */
3117		err = ENOENT;
3118		while ((snapel = nvlist_next_nvpair(snaps, snapel)) != NULL) {
3119			uint64_t guid;
3120
3121			guid = fnvpair_value_uint64(snapel);
3122			err = guid_to_name(hdl, top_zfs, guid, B_FALSE,
3123			    fsname);
3124			if (err == 0)
3125				break;
3126		}
3127
3128		if (err != 0)
3129			continue;
3130
3131		cp = strchr(fsname, '@');
3132		if (cp != NULL)
3133			*cp = '\0';
3134
3135		zhp = zfs_open(hdl, fsname, ZFS_TYPE_DATASET);
3136		if (zhp == NULL) {
3137			err = ENOENT;
3138			goto error;
3139		}
3140
3141		crypt = zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION);
3142		is_clone = zhp->zfs_dmustats.dds_origin[0] != '\0';
3143		(void) zfs_crypto_get_encryption_root(zhp, &is_encroot, NULL);
3144
3145		/* we don't need to do anything for unencrypted datasets */
3146		if (crypt == ZIO_CRYPT_OFF) {
3147			zfs_close(zhp);
3148			continue;
3149		}
3150
3151		/*
3152		 * If the dataset is flagged as an encryption root, was not
3153		 * received as a clone and is not currently an encryption root,
3154		 * force it to become one. Fixup the keylocation if necessary.
3155		 */
3156		if (stream_encroot) {
3157			if (!is_clone && !is_encroot) {
3158				err = lzc_change_key(fsname,
3159				    DCP_CMD_FORCE_NEW_KEY, NULL, NULL, 0);
3160				if (err != 0) {
3161					zfs_close(zhp);
3162					goto error;
3163				}
3164			}
3165
3166			stream_keylocation = fnvlist_lookup_string(props,
3167			    zfs_prop_to_name(ZFS_PROP_KEYLOCATION));
3168
3169			/*
3170			 * Refresh the properties in case the call to
3171			 * lzc_change_key() changed the value.
3172			 */
3173			zfs_refresh_properties(zhp);
3174			err = zfs_prop_get(zhp, ZFS_PROP_KEYLOCATION,
3175			    keylocation, sizeof (keylocation), NULL, NULL,
3176			    0, B_TRUE);
3177			if (err != 0) {
3178				zfs_close(zhp);
3179				goto error;
3180			}
3181
3182			if (strcmp(keylocation, stream_keylocation) != 0) {
3183				err = zfs_prop_set(zhp,
3184				    zfs_prop_to_name(ZFS_PROP_KEYLOCATION),
3185				    stream_keylocation);
3186				if (err != 0) {
3187					zfs_close(zhp);
3188					goto error;
3189				}
3190			}
3191		}
3192
3193		/*
3194		 * If the dataset is not flagged as an encryption root and is
3195		 * currently an encryption root, force it to inherit from its
3196		 * parent. The root of a raw send should never be
3197		 * force-inherited.
3198		 */
3199		if (!stream_encroot && is_encroot &&
3200		    strcmp(top_zfs, fsname) != 0) {
3201			err = lzc_change_key(fsname, DCP_CMD_FORCE_INHERIT,
3202			    NULL, NULL, 0);
3203			if (err != 0) {
3204				zfs_close(zhp);
3205				goto error;
3206			}
3207		}
3208
3209		zfs_close(zhp);
3210	}
3211
3212	return (0);
3213
3214error:
3215	return (err);
3216}
3217
3218static int
3219recv_incremental_replication(libzfs_handle_t *hdl, const char *tofs,
3220    recvflags_t *flags, nvlist_t *stream_nv, avl_tree_t *stream_avl,
3221    nvlist_t *renamed)
3222{
3223	nvlist_t *local_nv, *deleted = NULL;
3224	avl_tree_t *local_avl;
3225	nvpair_t *fselem, *nextfselem;
3226	char *fromsnap;
3227	char newname[ZFS_MAX_DATASET_NAME_LEN];
3228	char guidname[32];
3229	int error;
3230	boolean_t needagain, progress, recursive;
3231	char *s1, *s2;
3232
3233	fromsnap = fnvlist_lookup_string(stream_nv, "fromsnap");
3234
3235	recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
3236	    ENOENT);
3237
3238	if (flags->dryrun)
3239		return (0);
3240
3241again:
3242	needagain = progress = B_FALSE;
3243
3244	deleted = fnvlist_alloc();
3245
3246	if ((error = gather_nvlist(hdl, tofs, fromsnap, NULL,
3247	    recursive, B_TRUE, B_FALSE, recursive, B_FALSE, B_FALSE, B_FALSE,
3248	    B_FALSE, B_TRUE, &local_nv, &local_avl)) != 0)
3249		return (error);
3250
3251	/*
3252	 * Process deletes and renames
3253	 */
3254	for (fselem = nvlist_next_nvpair(local_nv, NULL);
3255	    fselem; fselem = nextfselem) {
3256		nvlist_t *nvfs, *snaps;
3257		nvlist_t *stream_nvfs = NULL;
3258		nvpair_t *snapelem, *nextsnapelem;
3259		uint64_t fromguid = 0;
3260		uint64_t originguid = 0;
3261		uint64_t stream_originguid = 0;
3262		uint64_t parent_fromsnap_guid, stream_parent_fromsnap_guid;
3263		char *fsname, *stream_fsname;
3264
3265		nextfselem = nvlist_next_nvpair(local_nv, fselem);
3266
3267		nvfs = fnvpair_value_nvlist(fselem);
3268		snaps = fnvlist_lookup_nvlist(nvfs, "snaps");
3269		fsname = fnvlist_lookup_string(nvfs, "name");
3270		parent_fromsnap_guid = fnvlist_lookup_uint64(nvfs,
3271		    "parentfromsnap");
3272		(void) nvlist_lookup_uint64(nvfs, "origin", &originguid);
3273
3274		/*
3275		 * First find the stream's fs, so we can check for
3276		 * a different origin (due to "zfs promote")
3277		 */
3278		for (snapelem = nvlist_next_nvpair(snaps, NULL);
3279		    snapelem; snapelem = nvlist_next_nvpair(snaps, snapelem)) {
3280			uint64_t thisguid;
3281
3282			thisguid = fnvpair_value_uint64(snapelem);
3283			stream_nvfs = fsavl_find(stream_avl, thisguid, NULL);
3284
3285			if (stream_nvfs != NULL)
3286				break;
3287		}
3288
3289		/* check for promote */
3290		(void) nvlist_lookup_uint64(stream_nvfs, "origin",
3291		    &stream_originguid);
3292		if (stream_nvfs && originguid != stream_originguid) {
3293			switch (created_before(hdl, local_avl,
3294			    stream_originguid, originguid)) {
3295			case 1: {
3296				/* promote it! */
3297				nvlist_t *origin_nvfs;
3298				char *origin_fsname;
3299
3300				origin_nvfs = fsavl_find(local_avl, originguid,
3301				    NULL);
3302				origin_fsname = fnvlist_lookup_string(
3303				    origin_nvfs, "name");
3304				error = recv_promote(hdl, fsname, origin_fsname,
3305				    flags);
3306				if (error == 0)
3307					progress = B_TRUE;
3308				break;
3309			}
3310			default:
3311				break;
3312			case -1:
3313				fsavl_destroy(local_avl);
3314				fnvlist_free(local_nv);
3315				return (-1);
3316			}
3317			/*
3318			 * We had/have the wrong origin, therefore our
3319			 * list of snapshots is wrong.  Need to handle
3320			 * them on the next pass.
3321			 */
3322			needagain = B_TRUE;
3323			continue;
3324		}
3325
3326		for (snapelem = nvlist_next_nvpair(snaps, NULL);
3327		    snapelem; snapelem = nextsnapelem) {
3328			uint64_t thisguid;
3329			char *stream_snapname;
3330			nvlist_t *found, *props;
3331
3332			nextsnapelem = nvlist_next_nvpair(snaps, snapelem);
3333
3334			thisguid = fnvpair_value_uint64(snapelem);
3335			found = fsavl_find(stream_avl, thisguid,
3336			    &stream_snapname);
3337
3338			/* check for delete */
3339			if (found == NULL) {
3340				char name[ZFS_MAX_DATASET_NAME_LEN];
3341
3342				if (!flags->force)
3343					continue;
3344
3345				(void) snprintf(name, sizeof (name), "%s@%s",
3346				    fsname, nvpair_name(snapelem));
3347
3348				error = recv_destroy(hdl, name,
3349				    strlen(fsname)+1, newname, flags);
3350				if (error)
3351					needagain = B_TRUE;
3352				else
3353					progress = B_TRUE;
3354				sprintf(guidname, "%llu",
3355				    (u_longlong_t)thisguid);
3356				nvlist_add_boolean(deleted, guidname);
3357				continue;
3358			}
3359
3360			stream_nvfs = found;
3361
3362			if (0 == nvlist_lookup_nvlist(stream_nvfs, "snapprops",
3363			    &props) && 0 == nvlist_lookup_nvlist(props,
3364			    stream_snapname, &props)) {
3365				zfs_cmd_t zc = {"\0"};
3366
3367				zc.zc_cookie = B_TRUE; /* received */
3368				(void) snprintf(zc.zc_name, sizeof (zc.zc_name),
3369				    "%s@%s", fsname, nvpair_name(snapelem));
3370				if (zcmd_write_src_nvlist(hdl, &zc,
3371				    props) == 0) {
3372					(void) zfs_ioctl(hdl,
3373					    ZFS_IOC_SET_PROP, &zc);
3374					zcmd_free_nvlists(&zc);
3375				}
3376			}
3377
3378			/* check for different snapname */
3379			if (strcmp(nvpair_name(snapelem),
3380			    stream_snapname) != 0) {
3381				char name[ZFS_MAX_DATASET_NAME_LEN];
3382				char tryname[ZFS_MAX_DATASET_NAME_LEN];
3383
3384				(void) snprintf(name, sizeof (name), "%s@%s",
3385				    fsname, nvpair_name(snapelem));
3386				(void) snprintf(tryname, sizeof (name), "%s@%s",
3387				    fsname, stream_snapname);
3388
3389				error = recv_rename(hdl, name, tryname,
3390				    strlen(fsname)+1, newname, flags);
3391				if (error)
3392					needagain = B_TRUE;
3393				else
3394					progress = B_TRUE;
3395			}
3396
3397			if (strcmp(stream_snapname, fromsnap) == 0)
3398				fromguid = thisguid;
3399		}
3400
3401		/* check for delete */
3402		if (stream_nvfs == NULL) {
3403			if (!flags->force)
3404				continue;
3405
3406			error = recv_destroy(hdl, fsname, strlen(tofs)+1,
3407			    newname, flags);
3408			if (error)
3409				needagain = B_TRUE;
3410			else
3411				progress = B_TRUE;
3412			sprintf(guidname, "%llu",
3413			    (u_longlong_t)parent_fromsnap_guid);
3414			nvlist_add_boolean(deleted, guidname);
3415			continue;
3416		}
3417
3418		if (fromguid == 0) {
3419			if (flags->verbose) {
3420				(void) printf("local fs %s does not have "
3421				    "fromsnap (%s in stream); must have "
3422				    "been deleted locally; ignoring\n",
3423				    fsname, fromsnap);
3424			}
3425			continue;
3426		}
3427
3428		stream_fsname = fnvlist_lookup_string(stream_nvfs, "name");
3429		stream_parent_fromsnap_guid = fnvlist_lookup_uint64(
3430		    stream_nvfs, "parentfromsnap");
3431
3432		s1 = strrchr(fsname, '/');
3433		s2 = strrchr(stream_fsname, '/');
3434
3435		/*
3436		 * Check if we're going to rename based on parent guid change
3437		 * and the current parent guid was also deleted. If it was then
3438		 * rename will fail and is likely unneeded, so avoid this and
3439		 * force an early retry to determine the new
3440		 * parent_fromsnap_guid.
3441		 */
3442		if (stream_parent_fromsnap_guid != 0 &&
3443		    parent_fromsnap_guid != 0 &&
3444		    stream_parent_fromsnap_guid != parent_fromsnap_guid) {
3445			sprintf(guidname, "%llu",
3446			    (u_longlong_t)parent_fromsnap_guid);
3447			if (nvlist_exists(deleted, guidname)) {
3448				progress = B_TRUE;
3449				needagain = B_TRUE;
3450				goto doagain;
3451			}
3452		}
3453
3454		/*
3455		 * Check for rename. If the exact receive path is specified, it
3456		 * does not count as a rename, but we still need to check the
3457		 * datasets beneath it.
3458		 */
3459		if ((stream_parent_fromsnap_guid != 0 &&
3460		    parent_fromsnap_guid != 0 &&
3461		    stream_parent_fromsnap_guid != parent_fromsnap_guid) ||
3462		    ((flags->isprefix || strcmp(tofs, fsname) != 0) &&
3463		    (s1 != NULL) && (s2 != NULL) && strcmp(s1, s2) != 0)) {
3464			nvlist_t *parent;
3465			char tryname[ZFS_MAX_DATASET_NAME_LEN];
3466
3467			parent = fsavl_find(local_avl,
3468			    stream_parent_fromsnap_guid, NULL);
3469			/*
3470			 * NB: parent might not be found if we used the
3471			 * tosnap for stream_parent_fromsnap_guid,
3472			 * because the parent is a newly-created fs;
3473			 * we'll be able to rename it after we recv the
3474			 * new fs.
3475			 */
3476			if (parent != NULL) {
3477				char *pname;
3478
3479				pname = fnvlist_lookup_string(parent, "name");
3480				(void) snprintf(tryname, sizeof (tryname),
3481				    "%s%s", pname, strrchr(stream_fsname, '/'));
3482			} else {
3483				tryname[0] = '\0';
3484				if (flags->verbose) {
3485					(void) printf("local fs %s new parent "
3486					    "not found\n", fsname);
3487				}
3488			}
3489
3490			newname[0] = '\0';
3491
3492			error = recv_rename(hdl, fsname, tryname,
3493			    strlen(tofs)+1, newname, flags);
3494
3495			if (renamed != NULL && newname[0] != '\0') {
3496				fnvlist_add_boolean(renamed, newname);
3497			}
3498
3499			if (error)
3500				needagain = B_TRUE;
3501			else
3502				progress = B_TRUE;
3503		}
3504	}
3505
3506doagain:
3507	fsavl_destroy(local_avl);
3508	fnvlist_free(local_nv);
3509	fnvlist_free(deleted);
3510
3511	if (needagain && progress) {
3512		/* do another pass to fix up temporary names */
3513		if (flags->verbose)
3514			(void) printf("another pass:\n");
3515		goto again;
3516	}
3517
3518	return (needagain || error != 0);
3519}
3520
3521static int
3522zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname,
3523    recvflags_t *flags, dmu_replay_record_t *drr, zio_cksum_t *zc,
3524    char **top_zfs, nvlist_t *cmdprops)
3525{
3526	nvlist_t *stream_nv = NULL;
3527	avl_tree_t *stream_avl = NULL;
3528	char *fromsnap = NULL;
3529	char *sendsnap = NULL;
3530	char *cp;
3531	char tofs[ZFS_MAX_DATASET_NAME_LEN];
3532	char sendfs[ZFS_MAX_DATASET_NAME_LEN];
3533	char errbuf[1024];
3534	dmu_replay_record_t drre;
3535	int error;
3536	boolean_t anyerr = B_FALSE;
3537	boolean_t softerr = B_FALSE;
3538	boolean_t recursive, raw;
3539
3540	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
3541	    "cannot receive"));
3542
3543	assert(drr->drr_type == DRR_BEGIN);
3544	assert(drr->drr_u.drr_begin.drr_magic == DMU_BACKUP_MAGIC);
3545	assert(DMU_GET_STREAM_HDRTYPE(drr->drr_u.drr_begin.drr_versioninfo) ==
3546	    DMU_COMPOUNDSTREAM);
3547
3548	/*
3549	 * Read in the nvlist from the stream.
3550	 */
3551	if (drr->drr_payloadlen != 0) {
3552		error = recv_read_nvlist(hdl, fd, drr->drr_payloadlen,
3553		    &stream_nv, flags->byteswap, zc);
3554		if (error) {
3555			error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
3556			goto out;
3557		}
3558	}
3559
3560	recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
3561	    ENOENT);
3562	raw = (nvlist_lookup_boolean(stream_nv, "raw") == 0);
3563
3564	if (recursive && strchr(destname, '@')) {
3565		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3566		    "cannot specify snapshot name for multi-snapshot stream"));
3567		error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
3568		goto out;
3569	}
3570
3571	/*
3572	 * Read in the end record and verify checksum.
3573	 */
3574	if (0 != (error = recv_read(hdl, fd, &drre, sizeof (drre),
3575	    flags->byteswap, NULL)))
3576		goto out;
3577	if (flags->byteswap) {
3578		drre.drr_type = BSWAP_32(drre.drr_type);
3579		drre.drr_u.drr_end.drr_checksum.zc_word[0] =
3580		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[0]);
3581		drre.drr_u.drr_end.drr_checksum.zc_word[1] =
3582		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[1]);
3583		drre.drr_u.drr_end.drr_checksum.zc_word[2] =
3584		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[2]);
3585		drre.drr_u.drr_end.drr_checksum.zc_word[3] =
3586		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[3]);
3587	}
3588	if (drre.drr_type != DRR_END) {
3589		error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
3590		goto out;
3591	}
3592	if (!ZIO_CHECKSUM_EQUAL(drre.drr_u.drr_end.drr_checksum, *zc)) {
3593		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3594		    "incorrect header checksum"));
3595		error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
3596		goto out;
3597	}
3598
3599	(void) nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap);
3600
3601	if (drr->drr_payloadlen != 0) {
3602		nvlist_t *stream_fss;
3603
3604		stream_fss = fnvlist_lookup_nvlist(stream_nv, "fss");
3605		if ((stream_avl = fsavl_create(stream_fss)) == NULL) {
3606			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3607			    "couldn't allocate avl tree"));
3608			error = zfs_error(hdl, EZFS_NOMEM, errbuf);
3609			goto out;
3610		}
3611
3612		if (fromsnap != NULL && recursive) {
3613			nvlist_t *renamed = NULL;
3614			nvpair_t *pair = NULL;
3615
3616			(void) strlcpy(tofs, destname, sizeof (tofs));
3617			if (flags->isprefix) {
3618				struct drr_begin *drrb = &drr->drr_u.drr_begin;
3619				int i;
3620
3621				if (flags->istail) {
3622					cp = strrchr(drrb->drr_toname, '/');
3623					if (cp == NULL) {
3624						(void) strlcat(tofs, "/",
3625						    sizeof (tofs));
3626						i = 0;
3627					} else {
3628						i = (cp - drrb->drr_toname);
3629					}
3630				} else {
3631					i = strcspn(drrb->drr_toname, "/@");
3632				}
3633				/* zfs_receive_one() will create_parents() */
3634				(void) strlcat(tofs, &drrb->drr_toname[i],
3635				    sizeof (tofs));
3636				*strchr(tofs, '@') = '\0';
3637			}
3638
3639			if (!flags->dryrun && !flags->nomount) {
3640				renamed = fnvlist_alloc();
3641			}
3642
3643			softerr = recv_incremental_replication(hdl, tofs, flags,
3644			    stream_nv, stream_avl, renamed);
3645
3646			/* Unmount renamed filesystems before receiving. */
3647			while ((pair = nvlist_next_nvpair(renamed,
3648			    pair)) != NULL) {
3649				zfs_handle_t *zhp;
3650				prop_changelist_t *clp = NULL;
3651
3652				zhp = zfs_open(hdl, nvpair_name(pair),
3653				    ZFS_TYPE_FILESYSTEM);
3654				if (zhp != NULL) {
3655					clp = changelist_gather(zhp,
3656					    ZFS_PROP_MOUNTPOINT, 0,
3657					    flags->forceunmount ? MS_FORCE : 0);
3658					zfs_close(zhp);
3659					if (clp != NULL) {
3660						softerr |=
3661						    changelist_prefix(clp);
3662						changelist_free(clp);
3663					}
3664				}
3665			}
3666
3667			fnvlist_free(renamed);
3668		}
3669	}
3670
3671	/*
3672	 * Get the fs specified by the first path in the stream (the top level
3673	 * specified by 'zfs send') and pass it to each invocation of
3674	 * zfs_receive_one().
3675	 */
3676	(void) strlcpy(sendfs, drr->drr_u.drr_begin.drr_toname,
3677	    sizeof (sendfs));
3678	if ((cp = strchr(sendfs, '@')) != NULL) {
3679		*cp = '\0';
3680		/*
3681		 * Find the "sendsnap", the final snapshot in a replication
3682		 * stream.  zfs_receive_one() handles certain errors
3683		 * differently, depending on if the contained stream is the
3684		 * last one or not.
3685		 */
3686		sendsnap = (cp + 1);
3687	}
3688
3689	/* Finally, receive each contained stream */
3690	do {
3691		/*
3692		 * we should figure out if it has a recoverable
3693		 * error, in which case do a recv_skip() and drive on.
3694		 * Note, if we fail due to already having this guid,
3695		 * zfs_receive_one() will take care of it (ie,
3696		 * recv_skip() and return 0).
3697		 */
3698		error = zfs_receive_impl(hdl, destname, NULL, flags, fd,
3699		    sendfs, stream_nv, stream_avl, top_zfs, sendsnap, cmdprops);
3700		if (error == ENODATA) {
3701			error = 0;
3702			break;
3703		}
3704		anyerr |= error;
3705	} while (error == 0);
3706
3707	if (drr->drr_payloadlen != 0 && recursive && fromsnap != NULL) {
3708		/*
3709		 * Now that we have the fs's they sent us, try the
3710		 * renames again.
3711		 */
3712		softerr = recv_incremental_replication(hdl, tofs, flags,
3713		    stream_nv, stream_avl, NULL);
3714	}
3715
3716	if (raw && softerr == 0 && *top_zfs != NULL) {
3717		softerr = recv_fix_encryption_hierarchy(hdl, *top_zfs,
3718		    stream_nv, stream_avl);
3719	}
3720
3721out:
3722	fsavl_destroy(stream_avl);
3723	fnvlist_free(stream_nv);
3724	if (softerr)
3725		error = -2;
3726	if (anyerr)
3727		error = -1;
3728	return (error);
3729}
3730
3731static void
3732trunc_prop_errs(int truncated)
3733{
3734	ASSERT(truncated != 0);
3735
3736	if (truncated == 1)
3737		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
3738		    "1 more property could not be set\n"));
3739	else
3740		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
3741		    "%d more properties could not be set\n"), truncated);
3742}
3743
3744static int
3745recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap)
3746{
3747	dmu_replay_record_t *drr;
3748	void *buf = zfs_alloc(hdl, SPA_MAXBLOCKSIZE);
3749	uint64_t payload_size;
3750	char errbuf[1024];
3751
3752	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
3753	    "cannot receive"));
3754
3755	/* XXX would be great to use lseek if possible... */
3756	drr = buf;
3757
3758	while (recv_read(hdl, fd, drr, sizeof (dmu_replay_record_t),
3759	    byteswap, NULL) == 0) {
3760		if (byteswap)
3761			drr->drr_type = BSWAP_32(drr->drr_type);
3762
3763		switch (drr->drr_type) {
3764		case DRR_BEGIN:
3765			if (drr->drr_payloadlen != 0) {
3766				(void) recv_read(hdl, fd, buf,
3767				    drr->drr_payloadlen, B_FALSE, NULL);
3768			}
3769			break;
3770
3771		case DRR_END:
3772			free(buf);
3773			return (0);
3774
3775		case DRR_OBJECT:
3776			if (byteswap) {
3777				drr->drr_u.drr_object.drr_bonuslen =
3778				    BSWAP_32(drr->drr_u.drr_object.
3779				    drr_bonuslen);
3780				drr->drr_u.drr_object.drr_raw_bonuslen =
3781				    BSWAP_32(drr->drr_u.drr_object.
3782				    drr_raw_bonuslen);
3783			}
3784
3785			payload_size =
3786			    DRR_OBJECT_PAYLOAD_SIZE(&drr->drr_u.drr_object);
3787			(void) recv_read(hdl, fd, buf, payload_size,
3788			    B_FALSE, NULL);
3789			break;
3790
3791		case DRR_WRITE:
3792			if (byteswap) {
3793				drr->drr_u.drr_write.drr_logical_size =
3794				    BSWAP_64(
3795				    drr->drr_u.drr_write.drr_logical_size);
3796				drr->drr_u.drr_write.drr_compressed_size =
3797				    BSWAP_64(
3798				    drr->drr_u.drr_write.drr_compressed_size);
3799			}
3800			payload_size =
3801			    DRR_WRITE_PAYLOAD_SIZE(&drr->drr_u.drr_write);
3802			assert(payload_size <= SPA_MAXBLOCKSIZE);
3803			(void) recv_read(hdl, fd, buf,
3804			    payload_size, B_FALSE, NULL);
3805			break;
3806		case DRR_SPILL:
3807			if (byteswap) {
3808				drr->drr_u.drr_spill.drr_length =
3809				    BSWAP_64(drr->drr_u.drr_spill.drr_length);
3810				drr->drr_u.drr_spill.drr_compressed_size =
3811				    BSWAP_64(drr->drr_u.drr_spill.
3812				    drr_compressed_size);
3813			}
3814
3815			payload_size =
3816			    DRR_SPILL_PAYLOAD_SIZE(&drr->drr_u.drr_spill);
3817			(void) recv_read(hdl, fd, buf, payload_size,
3818			    B_FALSE, NULL);
3819			break;
3820		case DRR_WRITE_EMBEDDED:
3821			if (byteswap) {
3822				drr->drr_u.drr_write_embedded.drr_psize =
3823				    BSWAP_32(drr->drr_u.drr_write_embedded.
3824				    drr_psize);
3825			}
3826			(void) recv_read(hdl, fd, buf,
3827			    P2ROUNDUP(drr->drr_u.drr_write_embedded.drr_psize,
3828			    8), B_FALSE, NULL);
3829			break;
3830		case DRR_OBJECT_RANGE:
3831		case DRR_WRITE_BYREF:
3832		case DRR_FREEOBJECTS:
3833		case DRR_FREE:
3834			break;
3835
3836		default:
3837			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3838			    "invalid record type"));
3839			free(buf);
3840			return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
3841		}
3842	}
3843
3844	free(buf);
3845	return (-1);
3846}
3847
3848static void
3849recv_ecksum_set_aux(libzfs_handle_t *hdl, const char *target_snap,
3850    boolean_t resumable, boolean_t checksum)
3851{
3852	char target_fs[ZFS_MAX_DATASET_NAME_LEN];
3853
3854	zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, (checksum ?
3855	    "checksum mismatch" : "incomplete stream")));
3856
3857	if (!resumable)
3858		return;
3859	(void) strlcpy(target_fs, target_snap, sizeof (target_fs));
3860	*strchr(target_fs, '@') = '\0';
3861	zfs_handle_t *zhp = zfs_open(hdl, target_fs,
3862	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
3863	if (zhp == NULL)
3864		return;
3865
3866	char token_buf[ZFS_MAXPROPLEN];
3867	int error = zfs_prop_get(zhp, ZFS_PROP_RECEIVE_RESUME_TOKEN,
3868	    token_buf, sizeof (token_buf),
3869	    NULL, NULL, 0, B_TRUE);
3870	if (error == 0) {
3871		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3872		    "checksum mismatch or incomplete stream.\n"
3873		    "Partially received snapshot is saved.\n"
3874		    "A resuming stream can be generated on the sending "
3875		    "system by running:\n"
3876		    "    zfs send -t %s"),
3877		    token_buf);
3878	}
3879	zfs_close(zhp);
3880}
3881
3882/*
3883 * Prepare a new nvlist of properties that are to override (-o) or be excluded
3884 * (-x) from the received dataset
3885 * recvprops: received properties from the send stream
3886 * cmdprops: raw input properties from command line
3887 * origprops: properties, both locally-set and received, currently set on the
3888 *            target dataset if it exists, NULL otherwise.
3889 * oxprops: valid output override (-o) and excluded (-x) properties
3890 */
3891static int
3892zfs_setup_cmdline_props(libzfs_handle_t *hdl, zfs_type_t type,
3893    char *fsname, boolean_t zoned, boolean_t recursive, boolean_t newfs,
3894    boolean_t raw, boolean_t toplevel, nvlist_t *recvprops, nvlist_t *cmdprops,
3895    nvlist_t *origprops, nvlist_t **oxprops, uint8_t **wkeydata_out,
3896    uint_t *wkeylen_out, const char *errbuf)
3897{
3898	nvpair_t *nvp;
3899	nvlist_t *oprops, *voprops;
3900	zfs_handle_t *zhp = NULL;
3901	zpool_handle_t *zpool_hdl = NULL;
3902	char *cp;
3903	int ret = 0;
3904	char namebuf[ZFS_MAX_DATASET_NAME_LEN];
3905
3906	if (nvlist_empty(cmdprops))
3907		return (0); /* No properties to override or exclude */
3908
3909	*oxprops = fnvlist_alloc();
3910	oprops = fnvlist_alloc();
3911
3912	strlcpy(namebuf, fsname, ZFS_MAX_DATASET_NAME_LEN);
3913
3914	/*
3915	 * Get our dataset handle. The target dataset may not exist yet.
3916	 */
3917	if (zfs_dataset_exists(hdl, namebuf, ZFS_TYPE_DATASET)) {
3918		zhp = zfs_open(hdl, namebuf, ZFS_TYPE_DATASET);
3919		if (zhp == NULL) {
3920			ret = -1;
3921			goto error;
3922		}
3923	}
3924
3925	/* open the zpool handle */
3926	cp = strchr(namebuf, '/');
3927	if (cp != NULL)
3928		*cp = '\0';
3929	zpool_hdl = zpool_open(hdl, namebuf);
3930	if (zpool_hdl == NULL) {
3931		ret = -1;
3932		goto error;
3933	}
3934
3935	/* restore namebuf to match fsname for later use */
3936	if (cp != NULL)
3937		*cp = '/';
3938
3939	/*
3940	 * first iteration: process excluded (-x) properties now and gather
3941	 * added (-o) properties to be later processed by zfs_valid_proplist()
3942	 */
3943	nvp = NULL;
3944	while ((nvp = nvlist_next_nvpair(cmdprops, nvp)) != NULL) {
3945		const char *name = nvpair_name(nvp);
3946		zfs_prop_t prop = zfs_name_to_prop(name);
3947
3948		/* "origin" is processed separately, don't handle it here */
3949		if (prop == ZFS_PROP_ORIGIN)
3950			continue;
3951
3952		/* raw streams can't override encryption properties */
3953		if ((zfs_prop_encryption_key_param(prop) ||
3954		    prop == ZFS_PROP_ENCRYPTION) && raw) {
3955			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3956			    "encryption property '%s' cannot "
3957			    "be set or excluded for raw streams."), name);
3958			ret = zfs_error(hdl, EZFS_BADPROP, errbuf);
3959			goto error;
3960		}
3961
3962		/* incremental streams can only exclude encryption properties */
3963		if ((zfs_prop_encryption_key_param(prop) ||
3964		    prop == ZFS_PROP_ENCRYPTION) && !newfs &&
3965		    nvpair_type(nvp) != DATA_TYPE_BOOLEAN) {
3966			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3967			    "encryption property '%s' cannot "
3968			    "be set for incremental streams."), name);
3969			ret = zfs_error(hdl, EZFS_BADPROP, errbuf);
3970			goto error;
3971		}
3972
3973		switch (nvpair_type(nvp)) {
3974		case DATA_TYPE_BOOLEAN: /* -x property */
3975			/*
3976			 * DATA_TYPE_BOOLEAN is the way we're asked to "exclude"
3977			 * a property: this is done by forcing an explicit
3978			 * inherit on the destination so the effective value is
3979			 * not the one we received from the send stream.
3980			 */
3981			if (!zfs_prop_valid_for_type(prop, type, B_FALSE) &&
3982			    !zfs_prop_user(name)) {
3983				(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
3984				    "Warning: %s: property '%s' does not "
3985				    "apply to datasets of this type\n"),
3986				    fsname, name);
3987				continue;
3988			}
3989			/*
3990			 * We do this only if the property is not already
3991			 * locally-set, in which case its value will take
3992			 * priority over the received anyway.
3993			 */
3994			if (nvlist_exists(origprops, name)) {
3995				nvlist_t *attrs;
3996				char *source = NULL;
3997
3998				attrs = fnvlist_lookup_nvlist(origprops, name);
3999				if (nvlist_lookup_string(attrs,
4000				    ZPROP_SOURCE, &source) == 0 &&
4001				    strcmp(source, ZPROP_SOURCE_VAL_RECVD) != 0)
4002					continue;
4003			}
4004			/*
4005			 * We can't force an explicit inherit on non-inheritable
4006			 * properties: if we're asked to exclude this kind of
4007			 * values we remove them from "recvprops" input nvlist.
4008			 */
4009			if (!zfs_prop_inheritable(prop) &&
4010			    !zfs_prop_user(name) && /* can be inherited too */
4011			    nvlist_exists(recvprops, name))
4012				fnvlist_remove(recvprops, name);
4013			else
4014				fnvlist_add_nvpair(*oxprops, nvp);
4015			break;
4016		case DATA_TYPE_STRING: /* -o property=value */
4017			/*
4018			 * we're trying to override a property that does not
4019			 * make sense for this type of dataset, but we don't
4020			 * want to fail if the receive is recursive: this comes
4021			 * in handy when the send stream contains, for
4022			 * instance, a child ZVOL and we're trying to receive
4023			 * it with "-o atime=on"
4024			 */
4025			if (!zfs_prop_valid_for_type(prop, type, B_FALSE) &&
4026			    !zfs_prop_user(name)) {
4027				if (recursive)
4028					continue;
4029				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4030				    "property '%s' does not apply to datasets "
4031				    "of this type"), name);
4032				ret = zfs_error(hdl, EZFS_BADPROP, errbuf);
4033				goto error;
4034			}
4035			fnvlist_add_nvpair(oprops, nvp);
4036			break;
4037		default:
4038			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4039			    "property '%s' must be a string or boolean"), name);
4040			ret = zfs_error(hdl, EZFS_BADPROP, errbuf);
4041			goto error;
4042		}
4043	}
4044
4045	if (toplevel) {
4046		/* convert override strings properties to native */
4047		if ((voprops = zfs_valid_proplist(hdl, ZFS_TYPE_DATASET,
4048		    oprops, zoned, zhp, zpool_hdl, B_FALSE, errbuf)) == NULL) {
4049			ret = zfs_error(hdl, EZFS_BADPROP, errbuf);
4050			goto error;
4051		}
4052
4053		/*
4054		 * zfs_crypto_create() requires the parent name. Get it
4055		 * by truncating the fsname copy stored in namebuf.
4056		 */
4057		cp = strrchr(namebuf, '/');
4058		if (cp != NULL)
4059			*cp = '\0';
4060
4061		if (!raw && zfs_crypto_create(hdl, namebuf, voprops, NULL,
4062		    B_FALSE, wkeydata_out, wkeylen_out) != 0) {
4063			fnvlist_free(voprops);
4064			ret = zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf);
4065			goto error;
4066		}
4067
4068		/* second pass: process "-o" properties */
4069		fnvlist_merge(*oxprops, voprops);
4070		fnvlist_free(voprops);
4071	} else {
4072		/* override props on child dataset are inherited */
4073		nvp = NULL;
4074		while ((nvp = nvlist_next_nvpair(oprops, nvp)) != NULL) {
4075			const char *name = nvpair_name(nvp);
4076			fnvlist_add_boolean(*oxprops, name);
4077		}
4078	}
4079
4080error:
4081	if (zhp != NULL)
4082		zfs_close(zhp);
4083	if (zpool_hdl != NULL)
4084		zpool_close(zpool_hdl);
4085	fnvlist_free(oprops);
4086	return (ret);
4087}
4088
4089/*
4090 * Restores a backup of tosnap from the file descriptor specified by infd.
4091 */
4092static int
4093zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
4094    const char *originsnap, recvflags_t *flags, dmu_replay_record_t *drr,
4095    dmu_replay_record_t *drr_noswap, const char *sendfs, nvlist_t *stream_nv,
4096    avl_tree_t *stream_avl, char **top_zfs,
4097    const char *finalsnap, nvlist_t *cmdprops)
4098{
4099	time_t begin_time;
4100	int ioctl_err, ioctl_errno, err;
4101	char *cp;
4102	struct drr_begin *drrb = &drr->drr_u.drr_begin;
4103	char errbuf[1024];
4104	const char *chopprefix;
4105	boolean_t newfs = B_FALSE;
4106	boolean_t stream_wantsnewfs, stream_resumingnewfs;
4107	boolean_t newprops = B_FALSE;
4108	uint64_t read_bytes = 0;
4109	uint64_t errflags = 0;
4110	uint64_t parent_snapguid = 0;
4111	prop_changelist_t *clp = NULL;
4112	nvlist_t *snapprops_nvlist = NULL;
4113	nvlist_t *snapholds_nvlist = NULL;
4114	zprop_errflags_t prop_errflags;
4115	nvlist_t *prop_errors = NULL;
4116	boolean_t recursive;
4117	char *snapname = NULL;
4118	char destsnap[MAXPATHLEN * 2];
4119	char origin[MAXNAMELEN];
4120	char name[MAXPATHLEN];
4121	char tmp_keylocation[MAXNAMELEN];
4122	nvlist_t *rcvprops = NULL; /* props received from the send stream */
4123	nvlist_t *oxprops = NULL; /* override (-o) and exclude (-x) props */
4124	nvlist_t *origprops = NULL; /* original props (if destination exists) */
4125	zfs_type_t type;
4126	boolean_t toplevel = B_FALSE;
4127	boolean_t zoned = B_FALSE;
4128	boolean_t hastoken = B_FALSE;
4129	boolean_t redacted;
4130	uint8_t *wkeydata = NULL;
4131	uint_t wkeylen = 0;
4132
4133	begin_time = time(NULL);
4134	bzero(origin, MAXNAMELEN);
4135	bzero(tmp_keylocation, MAXNAMELEN);
4136
4137	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
4138	    "cannot receive"));
4139
4140	recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
4141	    ENOENT);
4142
4143	/* Did the user request holds be skipped via zfs recv -k? */
4144	boolean_t holds = flags->holds && !flags->skipholds;
4145
4146	if (stream_avl != NULL) {
4147		char *keylocation = NULL;
4148		nvlist_t *lookup = NULL;
4149		nvlist_t *fs = fsavl_find(stream_avl, drrb->drr_toguid,
4150		    &snapname);
4151
4152		(void) nvlist_lookup_uint64(fs, "parentfromsnap",
4153		    &parent_snapguid);
4154		err = nvlist_lookup_nvlist(fs, "props", &rcvprops);
4155		if (err) {
4156			rcvprops = fnvlist_alloc();
4157			newprops = B_TRUE;
4158		}
4159
4160		/*
4161		 * The keylocation property may only be set on encryption roots,
4162		 * but this dataset might not become an encryption root until
4163		 * recv_fix_encryption_hierarchy() is called. That function
4164		 * will fixup the keylocation anyway, so we temporarily unset
4165		 * the keylocation for now to avoid any errors from the receive
4166		 * ioctl.
4167		 */
4168		err = nvlist_lookup_string(rcvprops,
4169		    zfs_prop_to_name(ZFS_PROP_KEYLOCATION), &keylocation);
4170		if (err == 0) {
4171			strcpy(tmp_keylocation, keylocation);
4172			(void) nvlist_remove_all(rcvprops,
4173			    zfs_prop_to_name(ZFS_PROP_KEYLOCATION));
4174		}
4175
4176		if (flags->canmountoff) {
4177			fnvlist_add_uint64(rcvprops,
4178			    zfs_prop_to_name(ZFS_PROP_CANMOUNT), 0);
4179		} else if (newprops) {	/* nothing in rcvprops, eliminate it */
4180			fnvlist_free(rcvprops);
4181			rcvprops = NULL;
4182			newprops = B_FALSE;
4183		}
4184		if (0 == nvlist_lookup_nvlist(fs, "snapprops", &lookup)) {
4185			snapprops_nvlist = fnvlist_lookup_nvlist(lookup,
4186			    snapname);
4187		}
4188		if (holds) {
4189			if (0 == nvlist_lookup_nvlist(fs, "snapholds",
4190			    &lookup)) {
4191				snapholds_nvlist = fnvlist_lookup_nvlist(
4192				    lookup, snapname);
4193			}
4194		}
4195	}
4196
4197	cp = NULL;
4198
4199	/*
4200	 * Determine how much of the snapshot name stored in the stream
4201	 * we are going to tack on to the name they specified on the
4202	 * command line, and how much we are going to chop off.
4203	 *
4204	 * If they specified a snapshot, chop the entire name stored in
4205	 * the stream.
4206	 */
4207	if (flags->istail) {
4208		/*
4209		 * A filesystem was specified with -e. We want to tack on only
4210		 * the tail of the sent snapshot path.
4211		 */
4212		if (strchr(tosnap, '@')) {
4213			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
4214			    "argument - snapshot not allowed with -e"));
4215			err = zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
4216			goto out;
4217		}
4218
4219		chopprefix = strrchr(sendfs, '/');
4220
4221		if (chopprefix == NULL) {
4222			/*
4223			 * The tail is the poolname, so we need to
4224			 * prepend a path separator.
4225			 */
4226			int len = strlen(drrb->drr_toname);
4227			cp = malloc(len + 2);
4228			cp[0] = '/';
4229			(void) strcpy(&cp[1], drrb->drr_toname);
4230			chopprefix = cp;
4231		} else {
4232			chopprefix = drrb->drr_toname + (chopprefix - sendfs);
4233		}
4234	} else if (flags->isprefix) {
4235		/*
4236		 * A filesystem was specified with -d. We want to tack on
4237		 * everything but the first element of the sent snapshot path
4238		 * (all but the pool name).
4239		 */
4240		if (strchr(tosnap, '@')) {
4241			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
4242			    "argument - snapshot not allowed with -d"));
4243			err = zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
4244			goto out;
4245		}
4246
4247		chopprefix = strchr(drrb->drr_toname, '/');
4248		if (chopprefix == NULL)
4249			chopprefix = strchr(drrb->drr_toname, '@');
4250	} else if (strchr(tosnap, '@') == NULL) {
4251		/*
4252		 * If a filesystem was specified without -d or -e, we want to
4253		 * tack on everything after the fs specified by 'zfs send'.
4254		 */
4255		chopprefix = drrb->drr_toname + strlen(sendfs);
4256	} else {
4257		/* A snapshot was specified as an exact path (no -d or -e). */
4258		if (recursive) {
4259			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4260			    "cannot specify snapshot name for multi-snapshot "
4261			    "stream"));
4262			err = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
4263			goto out;
4264		}
4265		chopprefix = drrb->drr_toname + strlen(drrb->drr_toname);
4266	}
4267
4268	ASSERT(strstr(drrb->drr_toname, sendfs) == drrb->drr_toname);
4269	ASSERT(chopprefix > drrb->drr_toname || strchr(sendfs, '/') == NULL);
4270	ASSERT(chopprefix <= drrb->drr_toname + strlen(drrb->drr_toname) ||
4271	    strchr(sendfs, '/') == NULL);
4272	ASSERT(chopprefix[0] == '/' || chopprefix[0] == '@' ||
4273	    chopprefix[0] == '\0');
4274
4275	/*
4276	 * Determine name of destination snapshot.
4277	 */
4278	(void) strlcpy(destsnap, tosnap, sizeof (destsnap));
4279	(void) strlcat(destsnap, chopprefix, sizeof (destsnap));
4280	free(cp);
4281	if (!zfs_name_valid(destsnap, ZFS_TYPE_SNAPSHOT)) {
4282		err = zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
4283		goto out;
4284	}
4285
4286	/*
4287	 * Determine the name of the origin snapshot.
4288	 */
4289	if (originsnap) {
4290		(void) strlcpy(origin, originsnap, sizeof (origin));
4291		if (flags->verbose)
4292			(void) printf("using provided clone origin %s\n",
4293			    origin);
4294	} else if (drrb->drr_flags & DRR_FLAG_CLONE) {
4295		if (guid_to_name(hdl, destsnap,
4296		    drrb->drr_fromguid, B_FALSE, origin) != 0) {
4297			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4298			    "local origin for clone %s does not exist"),
4299			    destsnap);
4300			err = zfs_error(hdl, EZFS_NOENT, errbuf);
4301			goto out;
4302		}
4303		if (flags->verbose)
4304			(void) printf("found clone origin %s\n", origin);
4305	}
4306
4307	if ((DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
4308	    DMU_BACKUP_FEATURE_DEDUP)) {
4309		(void) fprintf(stderr,
4310		    gettext("ERROR: \"zfs receive\" no longer supports "
4311		    "deduplicated send streams.  Use\n"
4312		    "the \"zstream redup\" command to convert this stream "
4313		    "to a regular,\n"
4314		    "non-deduplicated stream.\n"));
4315		err = zfs_error(hdl, EZFS_NOTSUP, errbuf);
4316		goto out;
4317	}
4318
4319	boolean_t resuming = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
4320	    DMU_BACKUP_FEATURE_RESUMING;
4321	boolean_t raw = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
4322	    DMU_BACKUP_FEATURE_RAW;
4323	boolean_t embedded = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
4324	    DMU_BACKUP_FEATURE_EMBED_DATA;
4325	stream_wantsnewfs = (drrb->drr_fromguid == 0 ||
4326	    (drrb->drr_flags & DRR_FLAG_CLONE) || originsnap) && !resuming;
4327	stream_resumingnewfs = (drrb->drr_fromguid == 0 ||
4328	    (drrb->drr_flags & DRR_FLAG_CLONE) || originsnap) && resuming;
4329
4330	if (stream_wantsnewfs) {
4331		/*
4332		 * if the parent fs does not exist, look for it based on
4333		 * the parent snap GUID
4334		 */
4335		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
4336		    "cannot receive new filesystem stream"));
4337
4338		(void) strcpy(name, destsnap);
4339		cp = strrchr(name, '/');
4340		if (cp)
4341			*cp = '\0';
4342		if (cp &&
4343		    !zfs_dataset_exists(hdl, name, ZFS_TYPE_DATASET)) {
4344			char suffix[ZFS_MAX_DATASET_NAME_LEN];
4345			(void) strcpy(suffix, strrchr(destsnap, '/'));
4346			if (guid_to_name(hdl, name, parent_snapguid,
4347			    B_FALSE, destsnap) == 0) {
4348				*strchr(destsnap, '@') = '\0';
4349				(void) strcat(destsnap, suffix);
4350			}
4351		}
4352	} else {
4353		/*
4354		 * If the fs does not exist, look for it based on the
4355		 * fromsnap GUID.
4356		 */
4357		if (resuming) {
4358			(void) snprintf(errbuf, sizeof (errbuf),
4359			    dgettext(TEXT_DOMAIN,
4360			    "cannot receive resume stream"));
4361		} else {
4362			(void) snprintf(errbuf, sizeof (errbuf),
4363			    dgettext(TEXT_DOMAIN,
4364			    "cannot receive incremental stream"));
4365		}
4366
4367		(void) strcpy(name, destsnap);
4368		*strchr(name, '@') = '\0';
4369
4370		/*
4371		 * If the exact receive path was specified and this is the
4372		 * topmost path in the stream, then if the fs does not exist we
4373		 * should look no further.
4374		 */
4375		if ((flags->isprefix || (*(chopprefix = drrb->drr_toname +
4376		    strlen(sendfs)) != '\0' && *chopprefix != '@')) &&
4377		    !zfs_dataset_exists(hdl, name, ZFS_TYPE_DATASET)) {
4378			char snap[ZFS_MAX_DATASET_NAME_LEN];
4379			(void) strcpy(snap, strchr(destsnap, '@'));
4380			if (guid_to_name(hdl, name, drrb->drr_fromguid,
4381			    B_FALSE, destsnap) == 0) {
4382				*strchr(destsnap, '@') = '\0';
4383				(void) strcat(destsnap, snap);
4384			}
4385		}
4386	}
4387
4388	(void) strcpy(name, destsnap);
4389	*strchr(name, '@') = '\0';
4390
4391	redacted = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
4392	    DMU_BACKUP_FEATURE_REDACTED;
4393
4394	if (zfs_dataset_exists(hdl, name, ZFS_TYPE_DATASET)) {
4395		zfs_cmd_t zc = {"\0"};
4396		zfs_handle_t *zhp;
4397		boolean_t encrypted;
4398
4399		(void) strcpy(zc.zc_name, name);
4400
4401		/*
4402		 * Destination fs exists.  It must be one of these cases:
4403		 *  - an incremental send stream
4404		 *  - the stream specifies a new fs (full stream or clone)
4405		 *    and they want us to blow away the existing fs (and
4406		 *    have therefore specified -F and removed any snapshots)
4407		 *  - we are resuming a failed receive.
4408		 */
4409		if (stream_wantsnewfs) {
4410			boolean_t is_volume = drrb->drr_type == DMU_OST_ZVOL;
4411			if (!flags->force) {
4412				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4413				    "destination '%s' exists\n"
4414				    "must specify -F to overwrite it"), name);
4415				err = zfs_error(hdl, EZFS_EXISTS, errbuf);
4416				goto out;
4417			}
4418			if (zfs_ioctl(hdl, ZFS_IOC_SNAPSHOT_LIST_NEXT,
4419			    &zc) == 0) {
4420				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4421				    "destination has snapshots (eg. %s)\n"
4422				    "must destroy them to overwrite it"),
4423				    zc.zc_name);
4424				err = zfs_error(hdl, EZFS_EXISTS, errbuf);
4425				goto out;
4426			}
4427			if (is_volume && strrchr(name, '/') == NULL) {
4428				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4429				    "destination %s is the root dataset\n"
4430				    "cannot overwrite with a ZVOL"),
4431				    name);
4432				err = zfs_error(hdl, EZFS_EXISTS, errbuf);
4433				goto out;
4434			}
4435			if (is_volume &&
4436			    zfs_ioctl(hdl, ZFS_IOC_DATASET_LIST_NEXT,
4437			    &zc) == 0) {
4438				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4439				    "destination has children (eg. %s)\n"
4440				    "cannot overwrite with a ZVOL"),
4441				    zc.zc_name);
4442				err = zfs_error(hdl, EZFS_WRONG_PARENT, errbuf);
4443				goto out;
4444			}
4445		}
4446
4447		if ((zhp = zfs_open(hdl, name,
4448		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME)) == NULL) {
4449			err = -1;
4450			goto out;
4451		}
4452
4453		if (stream_wantsnewfs &&
4454		    zhp->zfs_dmustats.dds_origin[0]) {
4455			zfs_close(zhp);
4456			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4457			    "destination '%s' is a clone\n"
4458			    "must destroy it to overwrite it"), name);
4459			err = zfs_error(hdl, EZFS_EXISTS, errbuf);
4460			goto out;
4461		}
4462
4463		/*
4464		 * Raw sends can not be performed as an incremental on top
4465		 * of existing unencrypted datasets. zfs recv -F can't be
4466		 * used to blow away an existing encrypted filesystem. This
4467		 * is because it would require the dsl dir to point to the
4468		 * new key (or lack of a key) and the old key at the same
4469		 * time. The -F flag may still be used for deleting
4470		 * intermediate snapshots that would otherwise prevent the
4471		 * receive from working.
4472		 */
4473		encrypted = zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) !=
4474		    ZIO_CRYPT_OFF;
4475		if (!stream_wantsnewfs && !encrypted && raw) {
4476			zfs_close(zhp);
4477			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4478			    "cannot perform raw receive on top of "
4479			    "existing unencrypted dataset"));
4480			err = zfs_error(hdl, EZFS_BADRESTORE, errbuf);
4481			goto out;
4482		}
4483
4484		if (stream_wantsnewfs && flags->force &&
4485		    ((raw && !encrypted) || encrypted)) {
4486			zfs_close(zhp);
4487			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4488			    "zfs receive -F cannot be used to destroy an "
4489			    "encrypted filesystem or overwrite an "
4490			    "unencrypted one with an encrypted one"));
4491			err = zfs_error(hdl, EZFS_BADRESTORE, errbuf);
4492			goto out;
4493		}
4494
4495		if (!flags->dryrun && zhp->zfs_type == ZFS_TYPE_FILESYSTEM &&
4496		    (stream_wantsnewfs || stream_resumingnewfs)) {
4497			/* We can't do online recv in this case */
4498			clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
4499			    flags->forceunmount ? MS_FORCE : 0);
4500			if (clp == NULL) {
4501				zfs_close(zhp);
4502				err = -1;
4503				goto out;
4504			}
4505			if (changelist_prefix(clp) != 0) {
4506				changelist_free(clp);
4507				zfs_close(zhp);
4508				err = -1;
4509				goto out;
4510			}
4511		}
4512
4513		/*
4514		 * If we are resuming a newfs, set newfs here so that we will
4515		 * mount it if the recv succeeds this time.  We can tell
4516		 * that it was a newfs on the first recv because the fs
4517		 * itself will be inconsistent (if the fs existed when we
4518		 * did the first recv, we would have received it into
4519		 * .../%recv).
4520		 */
4521		if (resuming && zfs_prop_get_int(zhp, ZFS_PROP_INCONSISTENT))
4522			newfs = B_TRUE;
4523
4524		/* we want to know if we're zoned when validating -o|-x props */
4525		zoned = zfs_prop_get_int(zhp, ZFS_PROP_ZONED);
4526
4527		/* may need this info later, get it now we have zhp around */
4528		if (zfs_prop_get(zhp, ZFS_PROP_RECEIVE_RESUME_TOKEN, NULL, 0,
4529		    NULL, NULL, 0, B_TRUE) == 0)
4530			hastoken = B_TRUE;
4531
4532		/* gather existing properties on destination */
4533		origprops = fnvlist_alloc();
4534		fnvlist_merge(origprops, zhp->zfs_props);
4535		fnvlist_merge(origprops, zhp->zfs_user_props);
4536
4537		zfs_close(zhp);
4538	} else {
4539		zfs_handle_t *zhp;
4540
4541		/*
4542		 * Destination filesystem does not exist.  Therefore we better
4543		 * be creating a new filesystem (either from a full backup, or
4544		 * a clone).  It would therefore be invalid if the user
4545		 * specified only the pool name (i.e. if the destination name
4546		 * contained no slash character).
4547		 */
4548		cp = strrchr(name, '/');
4549
4550		if (!stream_wantsnewfs || cp == NULL) {
4551			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4552			    "destination '%s' does not exist"), name);
4553			err = zfs_error(hdl, EZFS_NOENT, errbuf);
4554			goto out;
4555		}
4556
4557		/*
4558		 * Trim off the final dataset component so we perform the
4559		 * recvbackup ioctl to the filesystems's parent.
4560		 */
4561		*cp = '\0';
4562
4563		if (flags->isprefix && !flags->istail && !flags->dryrun &&
4564		    create_parents(hdl, destsnap, strlen(tosnap)) != 0) {
4565			err = zfs_error(hdl, EZFS_BADRESTORE, errbuf);
4566			goto out;
4567		}
4568
4569		/* validate parent */
4570		zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
4571		if (zhp == NULL) {
4572			err = zfs_error(hdl, EZFS_BADRESTORE, errbuf);
4573			goto out;
4574		}
4575		if (zfs_get_type(zhp) != ZFS_TYPE_FILESYSTEM) {
4576			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4577			    "parent '%s' is not a filesystem"), name);
4578			err = zfs_error(hdl, EZFS_WRONG_PARENT, errbuf);
4579			zfs_close(zhp);
4580			goto out;
4581		}
4582
4583		zfs_close(zhp);
4584
4585		newfs = B_TRUE;
4586		*cp = '/';
4587	}
4588
4589	if (flags->verbose) {
4590		(void) printf("%s %s stream of %s into %s\n",
4591		    flags->dryrun ? "would receive" : "receiving",
4592		    drrb->drr_fromguid ? "incremental" : "full",
4593		    drrb->drr_toname, destsnap);
4594		(void) fflush(stdout);
4595	}
4596
4597	/*
4598	 * If this is the top-level dataset, record it so we can use it
4599	 * for recursive operations later.
4600	 */
4601	if (top_zfs != NULL &&
4602	    (*top_zfs == NULL || strcmp(*top_zfs, name) == 0)) {
4603		toplevel = B_TRUE;
4604		if (*top_zfs == NULL)
4605			*top_zfs = zfs_strdup(hdl, name);
4606	}
4607
4608	if (drrb->drr_type == DMU_OST_ZVOL) {
4609		type = ZFS_TYPE_VOLUME;
4610	} else if (drrb->drr_type == DMU_OST_ZFS) {
4611		type = ZFS_TYPE_FILESYSTEM;
4612	} else {
4613		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4614		    "invalid record type: 0x%d"), drrb->drr_type);
4615		err = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
4616		goto out;
4617	}
4618	if ((err = zfs_setup_cmdline_props(hdl, type, name, zoned, recursive,
4619	    stream_wantsnewfs, raw, toplevel, rcvprops, cmdprops, origprops,
4620	    &oxprops, &wkeydata, &wkeylen, errbuf)) != 0)
4621		goto out;
4622
4623	/*
4624	 * When sending with properties (zfs send -p), the encryption property
4625	 * is not included because it is a SETONCE property and therefore
4626	 * treated as read only. However, we are always able to determine its
4627	 * value because raw sends will include it in the DRR_BDEGIN payload
4628	 * and non-raw sends with properties are not allowed for encrypted
4629	 * datasets. Therefore, if this is a non-raw properties stream, we can
4630	 * infer that the value should be ZIO_CRYPT_OFF and manually add that
4631	 * to the received properties.
4632	 */
4633	if (stream_wantsnewfs && !raw && rcvprops != NULL &&
4634	    !nvlist_exists(cmdprops, zfs_prop_to_name(ZFS_PROP_ENCRYPTION))) {
4635		if (oxprops == NULL)
4636			oxprops = fnvlist_alloc();
4637		fnvlist_add_uint64(oxprops,
4638		    zfs_prop_to_name(ZFS_PROP_ENCRYPTION), ZIO_CRYPT_OFF);
4639	}
4640
4641	if (flags->dryrun) {
4642		void *buf = zfs_alloc(hdl, SPA_MAXBLOCKSIZE);
4643
4644		/*
4645		 * We have read the DRR_BEGIN record, but we have
4646		 * not yet read the payload. For non-dryrun sends
4647		 * this will be done by the kernel, so we must
4648		 * emulate that here, before attempting to read
4649		 * more records.
4650		 */
4651		err = recv_read(hdl, infd, buf, drr->drr_payloadlen,
4652		    flags->byteswap, NULL);
4653		free(buf);
4654		if (err != 0)
4655			goto out;
4656
4657		err = recv_skip(hdl, infd, flags->byteswap);
4658		goto out;
4659	}
4660
4661	err = ioctl_err = lzc_receive_with_cmdprops(destsnap, rcvprops,
4662	    oxprops, wkeydata, wkeylen, origin, flags->force, flags->resumable,
4663	    raw, infd, drr_noswap, -1, &read_bytes, &errflags,
4664	    NULL, &prop_errors);
4665	ioctl_errno = ioctl_err;
4666	prop_errflags = errflags;
4667
4668	if (err == 0) {
4669		nvpair_t *prop_err = NULL;
4670
4671		while ((prop_err = nvlist_next_nvpair(prop_errors,
4672		    prop_err)) != NULL) {
4673			char tbuf[1024];
4674			zfs_prop_t prop;
4675			int intval;
4676
4677			prop = zfs_name_to_prop(nvpair_name(prop_err));
4678			(void) nvpair_value_int32(prop_err, &intval);
4679			if (strcmp(nvpair_name(prop_err),
4680			    ZPROP_N_MORE_ERRORS) == 0) {
4681				trunc_prop_errs(intval);
4682				break;
4683			} else if (snapname == NULL || finalsnap == NULL ||
4684			    strcmp(finalsnap, snapname) == 0 ||
4685			    strcmp(nvpair_name(prop_err),
4686			    zfs_prop_to_name(ZFS_PROP_REFQUOTA)) != 0) {
4687				/*
4688				 * Skip the special case of, for example,
4689				 * "refquota", errors on intermediate
4690				 * snapshots leading up to a final one.
4691				 * That's why we have all of the checks above.
4692				 *
4693				 * See zfs_ioctl.c's extract_delay_props() for
4694				 * a list of props which can fail on
4695				 * intermediate snapshots, but shouldn't
4696				 * affect the overall receive.
4697				 */
4698				(void) snprintf(tbuf, sizeof (tbuf),
4699				    dgettext(TEXT_DOMAIN,
4700				    "cannot receive %s property on %s"),
4701				    nvpair_name(prop_err), name);
4702				zfs_setprop_error(hdl, prop, intval, tbuf);
4703			}
4704		}
4705	}
4706
4707	if (err == 0 && snapprops_nvlist) {
4708		zfs_cmd_t zc = {"\0"};
4709
4710		(void) strcpy(zc.zc_name, destsnap);
4711		zc.zc_cookie = B_TRUE; /* received */
4712		if (zcmd_write_src_nvlist(hdl, &zc, snapprops_nvlist) == 0) {
4713			(void) zfs_ioctl(hdl, ZFS_IOC_SET_PROP, &zc);
4714			zcmd_free_nvlists(&zc);
4715		}
4716	}
4717	if (err == 0 && snapholds_nvlist) {
4718		nvpair_t *pair;
4719		nvlist_t *holds, *errors = NULL;
4720		int cleanup_fd = -1;
4721
4722		VERIFY(0 == nvlist_alloc(&holds, 0, KM_SLEEP));
4723		for (pair = nvlist_next_nvpair(snapholds_nvlist, NULL);
4724		    pair != NULL;
4725		    pair = nvlist_next_nvpair(snapholds_nvlist, pair)) {
4726			fnvlist_add_string(holds, destsnap, nvpair_name(pair));
4727		}
4728		(void) lzc_hold(holds, cleanup_fd, &errors);
4729		fnvlist_free(snapholds_nvlist);
4730		fnvlist_free(holds);
4731	}
4732
4733	if (err && (ioctl_errno == ENOENT || ioctl_errno == EEXIST)) {
4734		/*
4735		 * It may be that this snapshot already exists,
4736		 * in which case we want to consume & ignore it
4737		 * rather than failing.
4738		 */
4739		avl_tree_t *local_avl;
4740		nvlist_t *local_nv, *fs;
4741		cp = strchr(destsnap, '@');
4742
4743		/*
4744		 * XXX Do this faster by just iterating over snaps in
4745		 * this fs.  Also if zc_value does not exist, we will
4746		 * get a strange "does not exist" error message.
4747		 */
4748		*cp = '\0';
4749		if (gather_nvlist(hdl, destsnap, NULL, NULL, B_FALSE, B_TRUE,
4750		    B_FALSE, B_FALSE, B_FALSE, B_FALSE, B_FALSE, B_FALSE,
4751		    B_TRUE, &local_nv, &local_avl) == 0) {
4752			*cp = '@';
4753			fs = fsavl_find(local_avl, drrb->drr_toguid, NULL);
4754			fsavl_destroy(local_avl);
4755			fnvlist_free(local_nv);
4756
4757			if (fs != NULL) {
4758				if (flags->verbose) {
4759					(void) printf("snap %s already exists; "
4760					    "ignoring\n", destsnap);
4761				}
4762				err = ioctl_err = recv_skip(hdl, infd,
4763				    flags->byteswap);
4764			}
4765		}
4766		*cp = '@';
4767	}
4768
4769	if (ioctl_err != 0) {
4770		switch (ioctl_errno) {
4771		case ENODEV:
4772			cp = strchr(destsnap, '@');
4773			*cp = '\0';
4774			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4775			    "most recent snapshot of %s does not\n"
4776			    "match incremental source"), destsnap);
4777			(void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
4778			*cp = '@';
4779			break;
4780		case ETXTBSY:
4781			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4782			    "destination %s has been modified\n"
4783			    "since most recent snapshot"), name);
4784			(void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
4785			break;
4786		case EACCES:
4787			if (raw && stream_wantsnewfs) {
4788				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4789				    "failed to create encryption key"));
4790			} else if (raw && !stream_wantsnewfs) {
4791				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4792				    "encryption key does not match "
4793				    "existing key"));
4794			} else {
4795				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4796				    "inherited key must be loaded"));
4797			}
4798			(void) zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf);
4799			break;
4800		case EEXIST:
4801			cp = strchr(destsnap, '@');
4802			if (newfs) {
4803				/* it's the containing fs that exists */
4804				*cp = '\0';
4805			}
4806			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4807			    "destination already exists"));
4808			(void) zfs_error_fmt(hdl, EZFS_EXISTS,
4809			    dgettext(TEXT_DOMAIN, "cannot restore to %s"),
4810			    destsnap);
4811			*cp = '@';
4812			break;
4813		case EINVAL:
4814			if (flags->resumable) {
4815				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4816				    "kernel modules must be upgraded to "
4817				    "receive this stream."));
4818			} else if (embedded && !raw) {
4819				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4820				    "incompatible embedded data stream "
4821				    "feature with encrypted receive."));
4822			}
4823			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
4824			break;
4825		case ECKSUM:
4826		case ZFS_ERR_STREAM_TRUNCATED:
4827			recv_ecksum_set_aux(hdl, destsnap, flags->resumable,
4828			    ioctl_err == ECKSUM);
4829			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
4830			break;
4831		case ZFS_ERR_STREAM_LARGE_BLOCK_MISMATCH:
4832			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4833			    "incremental send stream requires -L "
4834			    "(--large-block), to match previous receive."));
4835			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
4836			break;
4837		case ENOTSUP:
4838			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4839			    "pool must be upgraded to receive this stream."));
4840			(void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
4841			break;
4842		case EDQUOT:
4843			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4844			    "destination %s space quota exceeded."), name);
4845			(void) zfs_error(hdl, EZFS_NOSPC, errbuf);
4846			break;
4847		case ZFS_ERR_FROM_IVSET_GUID_MISSING:
4848			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4849			    "IV set guid missing. See errata %u at "
4850			    "https://openzfs.github.io/openzfs-docs/msg/"
4851			    "ZFS-8000-ER."),
4852			    ZPOOL_ERRATA_ZOL_8308_ENCRYPTION);
4853			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
4854			break;
4855		case ZFS_ERR_FROM_IVSET_GUID_MISMATCH:
4856			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4857			    "IV set guid mismatch. See the 'zfs receive' "
4858			    "man page section\n discussing the limitations "
4859			    "of raw encrypted send streams."));
4860			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
4861			break;
4862		case ZFS_ERR_SPILL_BLOCK_FLAG_MISSING:
4863			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4864			    "Spill block flag missing for raw send.\n"
4865			    "The zfs software on the sending system must "
4866			    "be updated."));
4867			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
4868			break;
4869		case EBUSY:
4870			if (hastoken) {
4871				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4872				    "destination %s contains "
4873				    "partially-complete state from "
4874				    "\"zfs receive -s\"."), name);
4875				(void) zfs_error(hdl, EZFS_BUSY, errbuf);
4876				break;
4877			}
4878			/* fallthru */
4879		default:
4880			(void) zfs_standard_error(hdl, ioctl_errno, errbuf);
4881		}
4882	}
4883
4884	/*
4885	 * Mount the target filesystem (if created).  Also mount any
4886	 * children of the target filesystem if we did a replication
4887	 * receive (indicated by stream_avl being non-NULL).
4888	 */
4889	if (clp) {
4890		if (!flags->nomount)
4891			err |= changelist_postfix(clp);
4892		changelist_free(clp);
4893	}
4894
4895	if ((newfs || stream_avl) && type == ZFS_TYPE_FILESYSTEM && !redacted)
4896		flags->domount = B_TRUE;
4897
4898	if (prop_errflags & ZPROP_ERR_NOCLEAR) {
4899		(void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Warning: "
4900		    "failed to clear unreceived properties on %s"), name);
4901		(void) fprintf(stderr, "\n");
4902	}
4903	if (prop_errflags & ZPROP_ERR_NORESTORE) {
4904		(void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Warning: "
4905		    "failed to restore original properties on %s"), name);
4906		(void) fprintf(stderr, "\n");
4907	}
4908
4909	if (err || ioctl_err) {
4910		err = -1;
4911		goto out;
4912	}
4913
4914	if (flags->verbose) {
4915		char buf1[64];
4916		char buf2[64];
4917		uint64_t bytes = read_bytes;
4918		time_t delta = time(NULL) - begin_time;
4919		if (delta == 0)
4920			delta = 1;
4921		zfs_nicebytes(bytes, buf1, sizeof (buf1));
4922		zfs_nicebytes(bytes/delta, buf2, sizeof (buf1));
4923
4924		(void) printf("received %s stream in %lld seconds (%s/sec)\n",
4925		    buf1, (longlong_t)delta, buf2);
4926	}
4927
4928	err = 0;
4929out:
4930	if (prop_errors != NULL)
4931		fnvlist_free(prop_errors);
4932
4933	if (tmp_keylocation[0] != '\0') {
4934		fnvlist_add_string(rcvprops,
4935		    zfs_prop_to_name(ZFS_PROP_KEYLOCATION), tmp_keylocation);
4936	}
4937
4938	if (newprops)
4939		fnvlist_free(rcvprops);
4940
4941	fnvlist_free(oxprops);
4942	fnvlist_free(origprops);
4943
4944	return (err);
4945}
4946
4947/*
4948 * Check properties we were asked to override (both -o|-x)
4949 */
4950static boolean_t
4951zfs_receive_checkprops(libzfs_handle_t *hdl, nvlist_t *props,
4952    const char *errbuf)
4953{
4954	nvpair_t *nvp;
4955	zfs_prop_t prop;
4956	const char *name;
4957
4958	nvp = NULL;
4959	while ((nvp = nvlist_next_nvpair(props, nvp)) != NULL) {
4960		name = nvpair_name(nvp);
4961		prop = zfs_name_to_prop(name);
4962
4963		if (prop == ZPROP_INVAL) {
4964			if (!zfs_prop_user(name)) {
4965				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4966				    "invalid property '%s'"), name);
4967				return (B_FALSE);
4968			}
4969			continue;
4970		}
4971		/*
4972		 * "origin" is readonly but is used to receive datasets as
4973		 * clones so we don't raise an error here
4974		 */
4975		if (prop == ZFS_PROP_ORIGIN)
4976			continue;
4977
4978		/* encryption params have their own verification later */
4979		if (prop == ZFS_PROP_ENCRYPTION ||
4980		    zfs_prop_encryption_key_param(prop))
4981			continue;
4982
4983		/*
4984		 * cannot override readonly, set-once and other specific
4985		 * settable properties
4986		 */
4987		if (zfs_prop_readonly(prop) || prop == ZFS_PROP_VERSION ||
4988		    prop == ZFS_PROP_VOLSIZE) {
4989			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4990			    "invalid property '%s'"), name);
4991			return (B_FALSE);
4992		}
4993	}
4994
4995	return (B_TRUE);
4996}
4997
4998static int
4999zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap,
5000    const char *originsnap, recvflags_t *flags, int infd, const char *sendfs,
5001    nvlist_t *stream_nv, avl_tree_t *stream_avl, char **top_zfs,
5002    const char *finalsnap, nvlist_t *cmdprops)
5003{
5004	int err;
5005	dmu_replay_record_t drr, drr_noswap;
5006	struct drr_begin *drrb = &drr.drr_u.drr_begin;
5007	char errbuf[1024];
5008	zio_cksum_t zcksum = { { 0 } };
5009	uint64_t featureflags;
5010	int hdrtype;
5011
5012	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
5013	    "cannot receive"));
5014
5015	/* check cmdline props, raise an error if they cannot be received */
5016	if (!zfs_receive_checkprops(hdl, cmdprops, errbuf)) {
5017		return (zfs_error(hdl, EZFS_BADPROP, errbuf));
5018	}
5019
5020	if (flags->isprefix &&
5021	    !zfs_dataset_exists(hdl, tosnap, ZFS_TYPE_DATASET)) {
5022		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "specified fs "
5023		    "(%s) does not exist"), tosnap);
5024		return (zfs_error(hdl, EZFS_NOENT, errbuf));
5025	}
5026	if (originsnap &&
5027	    !zfs_dataset_exists(hdl, originsnap, ZFS_TYPE_DATASET)) {
5028		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "specified origin fs "
5029		    "(%s) does not exist"), originsnap);
5030		return (zfs_error(hdl, EZFS_NOENT, errbuf));
5031	}
5032
5033	/* read in the BEGIN record */
5034	if (0 != (err = recv_read(hdl, infd, &drr, sizeof (drr), B_FALSE,
5035	    &zcksum)))
5036		return (err);
5037
5038	if (drr.drr_type == DRR_END || drr.drr_type == BSWAP_32(DRR_END)) {
5039		/* It's the double end record at the end of a package */
5040		return (ENODATA);
5041	}
5042
5043	/* the kernel needs the non-byteswapped begin record */
5044	drr_noswap = drr;
5045
5046	flags->byteswap = B_FALSE;
5047	if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) {
5048		/*
5049		 * We computed the checksum in the wrong byteorder in
5050		 * recv_read() above; do it again correctly.
5051		 */
5052		bzero(&zcksum, sizeof (zio_cksum_t));
5053		fletcher_4_incremental_byteswap(&drr, sizeof (drr), &zcksum);
5054		flags->byteswap = B_TRUE;
5055
5056		drr.drr_type = BSWAP_32(drr.drr_type);
5057		drr.drr_payloadlen = BSWAP_32(drr.drr_payloadlen);
5058		drrb->drr_magic = BSWAP_64(drrb->drr_magic);
5059		drrb->drr_versioninfo = BSWAP_64(drrb->drr_versioninfo);
5060		drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time);
5061		drrb->drr_type = BSWAP_32(drrb->drr_type);
5062		drrb->drr_flags = BSWAP_32(drrb->drr_flags);
5063		drrb->drr_toguid = BSWAP_64(drrb->drr_toguid);
5064		drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid);
5065	}
5066
5067	if (drrb->drr_magic != DMU_BACKUP_MAGIC || drr.drr_type != DRR_BEGIN) {
5068		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
5069		    "stream (bad magic number)"));
5070		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
5071	}
5072
5073	featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
5074	hdrtype = DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo);
5075
5076	if (!DMU_STREAM_SUPPORTED(featureflags) ||
5077	    (hdrtype != DMU_SUBSTREAM && hdrtype != DMU_COMPOUNDSTREAM)) {
5078		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5079		    "stream has unsupported feature, feature flags = %llx"),
5080		    (unsigned long long)featureflags);
5081		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
5082	}
5083
5084	/* Holds feature is set once in the compound stream header. */
5085	if (featureflags & DMU_BACKUP_FEATURE_HOLDS)
5086		flags->holds = B_TRUE;
5087
5088	if (strchr(drrb->drr_toname, '@') == NULL) {
5089		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
5090		    "stream (bad snapshot name)"));
5091		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
5092	}
5093
5094	if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) == DMU_SUBSTREAM) {
5095		char nonpackage_sendfs[ZFS_MAX_DATASET_NAME_LEN];
5096		if (sendfs == NULL) {
5097			/*
5098			 * We were not called from zfs_receive_package(). Get
5099			 * the fs specified by 'zfs send'.
5100			 */
5101			char *cp;
5102			(void) strlcpy(nonpackage_sendfs,
5103			    drr.drr_u.drr_begin.drr_toname,
5104			    sizeof (nonpackage_sendfs));
5105			if ((cp = strchr(nonpackage_sendfs, '@')) != NULL)
5106				*cp = '\0';
5107			sendfs = nonpackage_sendfs;
5108			VERIFY(finalsnap == NULL);
5109		}
5110		return (zfs_receive_one(hdl, infd, tosnap, originsnap, flags,
5111		    &drr, &drr_noswap, sendfs, stream_nv, stream_avl, top_zfs,
5112		    finalsnap, cmdprops));
5113	} else {
5114		assert(DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
5115		    DMU_COMPOUNDSTREAM);
5116		return (zfs_receive_package(hdl, infd, tosnap, flags, &drr,
5117		    &zcksum, top_zfs, cmdprops));
5118	}
5119}
5120
5121/*
5122 * Restores a backup of tosnap from the file descriptor specified by infd.
5123 * Return 0 on total success, -2 if some things couldn't be
5124 * destroyed/renamed/promoted, -1 if some things couldn't be received.
5125 * (-1 will override -2, if -1 and the resumable flag was specified the
5126 * transfer can be resumed if the sending side supports it).
5127 */
5128int
5129zfs_receive(libzfs_handle_t *hdl, const char *tosnap, nvlist_t *props,
5130    recvflags_t *flags, int infd, avl_tree_t *stream_avl)
5131{
5132	char *top_zfs = NULL;
5133	int err;
5134	struct stat sb;
5135	char *originsnap = NULL;
5136
5137	/*
5138	 * The only way fstat can fail is if we do not have a valid file
5139	 * descriptor.
5140	 */
5141	if (fstat(infd, &sb) == -1) {
5142		perror("fstat");
5143		return (-2);
5144	}
5145
5146	/*
5147	 * It is not uncommon for gigabytes to be processed in zfs receive.
5148	 * Speculatively increase the buffer size if supported by the platform.
5149	 */
5150	if (S_ISFIFO(sb.st_mode))
5151		libzfs_set_pipe_max(infd);
5152
5153	if (props) {
5154		err = nvlist_lookup_string(props, "origin", &originsnap);
5155		if (err && err != ENOENT)
5156			return (err);
5157	}
5158
5159	err = zfs_receive_impl(hdl, tosnap, originsnap, flags, infd, NULL, NULL,
5160	    stream_avl, &top_zfs, NULL, props);
5161
5162	if (err == 0 && !flags->nomount && flags->domount && top_zfs) {
5163		zfs_handle_t *zhp = NULL;
5164		prop_changelist_t *clp = NULL;
5165
5166		zhp = zfs_open(hdl, top_zfs,
5167		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
5168		if (zhp == NULL) {
5169			err = -1;
5170			goto out;
5171		} else {
5172			if (zhp->zfs_type == ZFS_TYPE_VOLUME) {
5173				zfs_close(zhp);
5174				goto out;
5175			}
5176
5177			clp = changelist_gather(zhp, ZFS_PROP_MOUNTPOINT,
5178			    CL_GATHER_MOUNT_ALWAYS,
5179			    flags->forceunmount ? MS_FORCE : 0);
5180			zfs_close(zhp);
5181			if (clp == NULL) {
5182				err = -1;
5183				goto out;
5184			}
5185
5186			/* mount and share received datasets */
5187			err = changelist_postfix(clp);
5188			changelist_free(clp);
5189			if (err != 0)
5190				err = -1;
5191		}
5192	}
5193
5194out:
5195	if (top_zfs)
5196		free(top_zfs);
5197
5198	return (err);
5199}
5200