1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
25 * Copyright (c) 2012, Joyent, Inc. All rights reserved.
26 * Copyright (c) 2012 Pawel Jakub Dawidek. All rights reserved.
27 * Copyright (c) 2013 Steven Hartland. All rights reserved.
28 * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved.
29 * Copyright (c) 2014 Integros [integros.com]
30 * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
31 * Copyright (c) 2018, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
32 * Copyright (c) 2019 Datto Inc.
33 */
34
35#include <assert.h>
36#include <ctype.h>
37#include <errno.h>
38#include <libintl.h>
39#include <stdio.h>
40#include <stdlib.h>
41#include <strings.h>
42#include <unistd.h>
43#include <stddef.h>
44#include <fcntl.h>
45#include <sys/param.h>
46#include <sys/mount.h>
47#include <pthread.h>
48#include <umem.h>
49#include <time.h>
50
51#include <libzfs.h>
52#include <libzfs_core.h>
53
54#include "zfs_namecheck.h"
55#include "zfs_prop.h"
56#include "zfs_fletcher.h"
57#include "libzfs_impl.h"
58#include <zlib.h>
59#include <sha2.h>
60#include <sys/zio_checksum.h>
61#include <sys/ddt.h>
62
63#ifdef __FreeBSD__
64extern int zfs_ioctl_version;
65#endif
66
67/* in libzfs_dataset.c */
68extern void zfs_setprop_error(libzfs_handle_t *, zfs_prop_t, int, char *);
69/* We need to use something for ENODATA. */
70#define	ENODATA	EIDRM
71
72static int zfs_receive_impl(libzfs_handle_t *, const char *, const char *,
73    recvflags_t *, int, const char *, nvlist_t *, avl_tree_t *, char **, int,
74    uint64_t *, const char *);
75static int guid_to_name(libzfs_handle_t *, const char *,
76    uint64_t, boolean_t, char *);
77
78static const zio_cksum_t zero_cksum = { 0 };
79
80typedef struct dedup_arg {
81	int	inputfd;
82	int	outputfd;
83	libzfs_handle_t  *dedup_hdl;
84} dedup_arg_t;
85
86typedef struct progress_arg {
87	zfs_handle_t *pa_zhp;
88	int pa_fd;
89	boolean_t pa_parsable;
90	boolean_t pa_astitle;
91	uint64_t pa_size;
92} progress_arg_t;
93
94typedef struct dataref {
95	uint64_t ref_guid;
96	uint64_t ref_object;
97	uint64_t ref_offset;
98} dataref_t;
99
100typedef struct dedup_entry {
101	struct dedup_entry	*dde_next;
102	zio_cksum_t dde_chksum;
103	uint64_t dde_prop;
104	dataref_t dde_ref;
105} dedup_entry_t;
106
107#define	MAX_DDT_PHYSMEM_PERCENT		20
108#define	SMALLEST_POSSIBLE_MAX_DDT_MB		128
109
110typedef struct dedup_table {
111	dedup_entry_t	**dedup_hash_array;
112	umem_cache_t	*ddecache;
113	uint64_t	max_ddt_size;  /* max dedup table size in bytes */
114	uint64_t	cur_ddt_size;  /* current dedup table size in bytes */
115	uint64_t	ddt_count;
116	int		numhashbits;
117	boolean_t	ddt_full;
118} dedup_table_t;
119
120static int
121high_order_bit(uint64_t n)
122{
123	int count;
124
125	for (count = 0; n != 0; count++)
126		n >>= 1;
127	return (count);
128}
129
130static size_t
131ssread(void *buf, size_t len, FILE *stream)
132{
133	size_t outlen;
134
135	if ((outlen = fread(buf, len, 1, stream)) == 0)
136		return (0);
137
138	return (outlen);
139}
140
141static void
142ddt_hash_append(libzfs_handle_t *hdl, dedup_table_t *ddt, dedup_entry_t **ddepp,
143    zio_cksum_t *cs, uint64_t prop, dataref_t *dr)
144{
145	dedup_entry_t	*dde;
146
147	if (ddt->cur_ddt_size >= ddt->max_ddt_size) {
148		if (ddt->ddt_full == B_FALSE) {
149			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
150			    "Dedup table full.  Deduplication will continue "
151			    "with existing table entries"));
152			ddt->ddt_full = B_TRUE;
153		}
154		return;
155	}
156
157	if ((dde = umem_cache_alloc(ddt->ddecache, UMEM_DEFAULT))
158	    != NULL) {
159		assert(*ddepp == NULL);
160		dde->dde_next = NULL;
161		dde->dde_chksum = *cs;
162		dde->dde_prop = prop;
163		dde->dde_ref = *dr;
164		*ddepp = dde;
165		ddt->cur_ddt_size += sizeof (dedup_entry_t);
166		ddt->ddt_count++;
167	}
168}
169
170/*
171 * Using the specified dedup table, do a lookup for an entry with
172 * the checksum cs.  If found, return the block's reference info
173 * in *dr. Otherwise, insert a new entry in the dedup table, using
174 * the reference information specified by *dr.
175 *
176 * return value:  true - entry was found
177 *		  false - entry was not found
178 */
179static boolean_t
180ddt_update(libzfs_handle_t *hdl, dedup_table_t *ddt, zio_cksum_t *cs,
181    uint64_t prop, dataref_t *dr)
182{
183	uint32_t hashcode;
184	dedup_entry_t **ddepp;
185
186	hashcode = BF64_GET(cs->zc_word[0], 0, ddt->numhashbits);
187
188	for (ddepp = &(ddt->dedup_hash_array[hashcode]); *ddepp != NULL;
189	    ddepp = &((*ddepp)->dde_next)) {
190		if (ZIO_CHECKSUM_EQUAL(((*ddepp)->dde_chksum), *cs) &&
191		    (*ddepp)->dde_prop == prop) {
192			*dr = (*ddepp)->dde_ref;
193			return (B_TRUE);
194		}
195	}
196	ddt_hash_append(hdl, ddt, ddepp, cs, prop, dr);
197	return (B_FALSE);
198}
199
200static int
201dump_record(dmu_replay_record_t *drr, void *payload, int payload_len,
202    zio_cksum_t *zc, int outfd)
203{
204	ASSERT3U(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum),
205	    ==, sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t));
206	(void) fletcher_4_incremental_native(drr,
207	    offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum), zc);
208	if (drr->drr_type != DRR_BEGIN) {
209		ASSERT(ZIO_CHECKSUM_IS_ZERO(&drr->drr_u.
210		    drr_checksum.drr_checksum));
211		drr->drr_u.drr_checksum.drr_checksum = *zc;
212	}
213	(void) fletcher_4_incremental_native(
214	    &drr->drr_u.drr_checksum.drr_checksum, sizeof (zio_cksum_t), zc);
215	if (write(outfd, drr, sizeof (*drr)) == -1)
216		return (errno);
217	if (payload_len != 0) {
218		(void) fletcher_4_incremental_native(payload, payload_len, zc);
219		if (write(outfd, payload, payload_len) == -1)
220			return (errno);
221	}
222	return (0);
223}
224
225/*
226 * This function is started in a separate thread when the dedup option
227 * has been requested.  The main send thread determines the list of
228 * snapshots to be included in the send stream and makes the ioctl calls
229 * for each one.  But instead of having the ioctl send the output to the
230 * the output fd specified by the caller of zfs_send()), the
231 * ioctl is told to direct the output to a pipe, which is read by the
232 * alternate thread running THIS function.  This function does the
233 * dedup'ing by:
234 *  1. building a dedup table (the DDT)
235 *  2. doing checksums on each data block and inserting a record in the DDT
236 *  3. looking for matching checksums, and
237 *  4.  sending a DRR_WRITE_BYREF record instead of a write record whenever
238 *      a duplicate block is found.
239 * The output of this function then goes to the output fd requested
240 * by the caller of zfs_send().
241 */
242static void *
243cksummer(void *arg)
244{
245	dedup_arg_t *dda = arg;
246	char *buf = zfs_alloc(dda->dedup_hdl, SPA_MAXBLOCKSIZE);
247	dmu_replay_record_t thedrr;
248	dmu_replay_record_t *drr = &thedrr;
249	FILE *ofp;
250	int outfd;
251	dedup_table_t ddt;
252	zio_cksum_t stream_cksum;
253	uint64_t physmem = sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE);
254	uint64_t numbuckets;
255
256	ddt.max_ddt_size =
257	    MAX((physmem * MAX_DDT_PHYSMEM_PERCENT) / 100,
258	    SMALLEST_POSSIBLE_MAX_DDT_MB << 20);
259
260	numbuckets = ddt.max_ddt_size / (sizeof (dedup_entry_t));
261
262	/*
263	 * numbuckets must be a power of 2.  Increase number to
264	 * a power of 2 if necessary.
265	 */
266	if (!ISP2(numbuckets))
267		numbuckets = 1 << high_order_bit(numbuckets);
268
269	ddt.dedup_hash_array = calloc(numbuckets, sizeof (dedup_entry_t *));
270	ddt.ddecache = umem_cache_create("dde", sizeof (dedup_entry_t), 0,
271	    NULL, NULL, NULL, NULL, NULL, 0);
272	ddt.cur_ddt_size = numbuckets * sizeof (dedup_entry_t *);
273	ddt.numhashbits = high_order_bit(numbuckets) - 1;
274	ddt.ddt_full = B_FALSE;
275
276	outfd = dda->outputfd;
277	ofp = fdopen(dda->inputfd, "r");
278	while (ssread(drr, sizeof (*drr), ofp) != 0) {
279
280		/*
281		 * kernel filled in checksum, we are going to write same
282		 * record, but need to regenerate checksum.
283		 */
284		if (drr->drr_type != DRR_BEGIN) {
285			bzero(&drr->drr_u.drr_checksum.drr_checksum,
286			    sizeof (drr->drr_u.drr_checksum.drr_checksum));
287		}
288
289		switch (drr->drr_type) {
290		case DRR_BEGIN:
291		{
292			struct drr_begin *drrb = &drr->drr_u.drr_begin;
293			int fflags;
294			int sz = 0;
295			ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0);
296
297			ASSERT3U(drrb->drr_magic, ==, DMU_BACKUP_MAGIC);
298
299			/* set the DEDUP feature flag for this stream */
300			fflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
301			fflags |= (DMU_BACKUP_FEATURE_DEDUP |
302			    DMU_BACKUP_FEATURE_DEDUPPROPS);
303			DMU_SET_FEATUREFLAGS(drrb->drr_versioninfo, fflags);
304
305			if (drr->drr_payloadlen != 0) {
306				sz = drr->drr_payloadlen;
307
308				if (sz > SPA_MAXBLOCKSIZE) {
309					buf = zfs_realloc(dda->dedup_hdl, buf,
310					    SPA_MAXBLOCKSIZE, sz);
311				}
312				(void) ssread(buf, sz, ofp);
313				if (ferror(stdin))
314					perror("fread");
315			}
316			if (dump_record(drr, buf, sz, &stream_cksum,
317			    outfd) != 0)
318				goto out;
319			break;
320		}
321
322		case DRR_END:
323		{
324			struct drr_end *drre = &drr->drr_u.drr_end;
325			/* use the recalculated checksum */
326			drre->drr_checksum = stream_cksum;
327			if (dump_record(drr, NULL, 0, &stream_cksum,
328			    outfd) != 0)
329				goto out;
330			break;
331		}
332
333		case DRR_OBJECT:
334		{
335			struct drr_object *drro = &drr->drr_u.drr_object;
336			if (drro->drr_bonuslen > 0) {
337				(void) ssread(buf,
338				    P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8),
339				    ofp);
340			}
341			if (dump_record(drr, buf,
342			    P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8),
343			    &stream_cksum, outfd) != 0)
344				goto out;
345			break;
346		}
347
348		case DRR_SPILL:
349		{
350			struct drr_spill *drrs = &drr->drr_u.drr_spill;
351			(void) ssread(buf, drrs->drr_length, ofp);
352			if (dump_record(drr, buf, drrs->drr_length,
353			    &stream_cksum, outfd) != 0)
354				goto out;
355			break;
356		}
357
358		case DRR_FREEOBJECTS:
359		{
360			if (dump_record(drr, NULL, 0, &stream_cksum,
361			    outfd) != 0)
362				goto out;
363			break;
364		}
365
366		case DRR_WRITE:
367		{
368			struct drr_write *drrw = &drr->drr_u.drr_write;
369			dataref_t	dataref;
370			uint64_t	payload_size;
371
372			payload_size = DRR_WRITE_PAYLOAD_SIZE(drrw);
373			(void) ssread(buf, payload_size, ofp);
374
375			/*
376			 * Use the existing checksum if it's dedup-capable,
377			 * else calculate a SHA256 checksum for it.
378			 */
379
380			if (ZIO_CHECKSUM_EQUAL(drrw->drr_key.ddk_cksum,
381			    zero_cksum) ||
382			    !DRR_IS_DEDUP_CAPABLE(drrw->drr_checksumflags)) {
383				SHA256_CTX	ctx;
384				zio_cksum_t	tmpsha256;
385
386				SHA256Init(&ctx);
387				SHA256Update(&ctx, buf, payload_size);
388				SHA256Final(&tmpsha256, &ctx);
389				drrw->drr_key.ddk_cksum.zc_word[0] =
390				    BE_64(tmpsha256.zc_word[0]);
391				drrw->drr_key.ddk_cksum.zc_word[1] =
392				    BE_64(tmpsha256.zc_word[1]);
393				drrw->drr_key.ddk_cksum.zc_word[2] =
394				    BE_64(tmpsha256.zc_word[2]);
395				drrw->drr_key.ddk_cksum.zc_word[3] =
396				    BE_64(tmpsha256.zc_word[3]);
397				drrw->drr_checksumtype = ZIO_CHECKSUM_SHA256;
398				drrw->drr_checksumflags = DRR_CHECKSUM_DEDUP;
399			}
400
401			dataref.ref_guid = drrw->drr_toguid;
402			dataref.ref_object = drrw->drr_object;
403			dataref.ref_offset = drrw->drr_offset;
404
405			if (ddt_update(dda->dedup_hdl, &ddt,
406			    &drrw->drr_key.ddk_cksum, drrw->drr_key.ddk_prop,
407			    &dataref)) {
408				dmu_replay_record_t wbr_drr = {0};
409				struct drr_write_byref *wbr_drrr =
410				    &wbr_drr.drr_u.drr_write_byref;
411
412				/* block already present in stream */
413				wbr_drr.drr_type = DRR_WRITE_BYREF;
414
415				wbr_drrr->drr_object = drrw->drr_object;
416				wbr_drrr->drr_offset = drrw->drr_offset;
417				wbr_drrr->drr_length = drrw->drr_logical_size;
418				wbr_drrr->drr_toguid = drrw->drr_toguid;
419				wbr_drrr->drr_refguid = dataref.ref_guid;
420				wbr_drrr->drr_refobject =
421				    dataref.ref_object;
422				wbr_drrr->drr_refoffset =
423				    dataref.ref_offset;
424
425				wbr_drrr->drr_checksumtype =
426				    drrw->drr_checksumtype;
427				wbr_drrr->drr_checksumflags =
428				    drrw->drr_checksumtype;
429				wbr_drrr->drr_key.ddk_cksum =
430				    drrw->drr_key.ddk_cksum;
431				wbr_drrr->drr_key.ddk_prop =
432				    drrw->drr_key.ddk_prop;
433
434				if (dump_record(&wbr_drr, NULL, 0,
435				    &stream_cksum, outfd) != 0)
436					goto out;
437			} else {
438				/* block not previously seen */
439				if (dump_record(drr, buf, payload_size,
440				    &stream_cksum, outfd) != 0)
441					goto out;
442			}
443			break;
444		}
445
446		case DRR_WRITE_EMBEDDED:
447		{
448			struct drr_write_embedded *drrwe =
449			    &drr->drr_u.drr_write_embedded;
450			(void) ssread(buf,
451			    P2ROUNDUP((uint64_t)drrwe->drr_psize, 8), ofp);
452			if (dump_record(drr, buf,
453			    P2ROUNDUP((uint64_t)drrwe->drr_psize, 8),
454			    &stream_cksum, outfd) != 0)
455				goto out;
456			break;
457		}
458
459		case DRR_FREE:
460		{
461			if (dump_record(drr, NULL, 0, &stream_cksum,
462			    outfd) != 0)
463				goto out;
464			break;
465		}
466
467		default:
468			(void) fprintf(stderr, "INVALID record type 0x%x\n",
469			    drr->drr_type);
470			/* should never happen, so assert */
471			assert(B_FALSE);
472		}
473	}
474out:
475	umem_cache_destroy(ddt.ddecache);
476	free(ddt.dedup_hash_array);
477	free(buf);
478	(void) fclose(ofp);
479
480	return (NULL);
481}
482
483/*
484 * Routines for dealing with the AVL tree of fs-nvlists
485 */
486typedef struct fsavl_node {
487	avl_node_t fn_node;
488	nvlist_t *fn_nvfs;
489	char *fn_snapname;
490	uint64_t fn_guid;
491} fsavl_node_t;
492
493static int
494fsavl_compare(const void *arg1, const void *arg2)
495{
496	const fsavl_node_t *fn1 = (const fsavl_node_t *)arg1;
497	const fsavl_node_t *fn2 = (const fsavl_node_t *)arg2;
498
499	return (AVL_CMP(fn1->fn_guid, fn2->fn_guid));
500}
501
502/*
503 * Given the GUID of a snapshot, find its containing filesystem and
504 * (optionally) name.
505 */
506static nvlist_t *
507fsavl_find(avl_tree_t *avl, uint64_t snapguid, char **snapname)
508{
509	fsavl_node_t fn_find;
510	fsavl_node_t *fn;
511
512	fn_find.fn_guid = snapguid;
513
514	fn = avl_find(avl, &fn_find, NULL);
515	if (fn) {
516		if (snapname)
517			*snapname = fn->fn_snapname;
518		return (fn->fn_nvfs);
519	}
520	return (NULL);
521}
522
523static void
524fsavl_destroy(avl_tree_t *avl)
525{
526	fsavl_node_t *fn;
527	void *cookie;
528
529	if (avl == NULL)
530		return;
531
532	cookie = NULL;
533	while ((fn = avl_destroy_nodes(avl, &cookie)) != NULL)
534		free(fn);
535	avl_destroy(avl);
536	free(avl);
537}
538
539/*
540 * Given an nvlist, produce an avl tree of snapshots, ordered by guid
541 */
542static avl_tree_t *
543fsavl_create(nvlist_t *fss)
544{
545	avl_tree_t *fsavl;
546	nvpair_t *fselem = NULL;
547
548	if ((fsavl = malloc(sizeof (avl_tree_t))) == NULL)
549		return (NULL);
550
551	avl_create(fsavl, fsavl_compare, sizeof (fsavl_node_t),
552	    offsetof(fsavl_node_t, fn_node));
553
554	while ((fselem = nvlist_next_nvpair(fss, fselem)) != NULL) {
555		nvlist_t *nvfs, *snaps;
556		nvpair_t *snapelem = NULL;
557
558		VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs));
559		VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps));
560
561		while ((snapelem =
562		    nvlist_next_nvpair(snaps, snapelem)) != NULL) {
563			fsavl_node_t *fn;
564			uint64_t guid;
565
566			VERIFY(0 == nvpair_value_uint64(snapelem, &guid));
567			if ((fn = malloc(sizeof (fsavl_node_t))) == NULL) {
568				fsavl_destroy(fsavl);
569				return (NULL);
570			}
571			fn->fn_nvfs = nvfs;
572			fn->fn_snapname = nvpair_name(snapelem);
573			fn->fn_guid = guid;
574
575			/*
576			 * Note: if there are multiple snaps with the
577			 * same GUID, we ignore all but one.
578			 */
579			if (avl_find(fsavl, fn, NULL) == NULL)
580				avl_add(fsavl, fn);
581			else
582				free(fn);
583		}
584	}
585
586	return (fsavl);
587}
588
589/*
590 * Routines for dealing with the giant nvlist of fs-nvlists, etc.
591 */
592typedef struct send_data {
593	/*
594	 * assigned inside every recursive call,
595	 * restored from *_save on return:
596	 *
597	 * guid of fromsnap snapshot in parent dataset
598	 * txg of fromsnap snapshot in current dataset
599	 * txg of tosnap snapshot in current dataset
600	 */
601
602	uint64_t parent_fromsnap_guid;
603	uint64_t fromsnap_txg;
604	uint64_t tosnap_txg;
605
606	/* the nvlists get accumulated during depth-first traversal */
607	nvlist_t *parent_snaps;
608	nvlist_t *fss;
609	nvlist_t *snapprops;
610
611	/* send-receive configuration, does not change during traversal */
612	const char *fsname;
613	const char *fromsnap;
614	const char *tosnap;
615	boolean_t recursive;
616	boolean_t replicate;
617	boolean_t verbose;
618
619	/*
620	 * The header nvlist is of the following format:
621	 * {
622	 *   "tosnap" -> string
623	 *   "fromsnap" -> string (if incremental)
624	 *   "fss" -> {
625	 *	id -> {
626	 *
627	 *	 "name" -> string (full name; for debugging)
628	 *	 "parentfromsnap" -> number (guid of fromsnap in parent)
629	 *
630	 *	 "props" -> { name -> value (only if set here) }
631	 *	 "snaps" -> { name (lastname) -> number (guid) }
632	 *	 "snapprops" -> { name (lastname) -> { name -> value } }
633	 *
634	 *	 "origin" -> number (guid) (if clone)
635	 *	 "sent" -> boolean (not on-disk)
636	 *	}
637	 *   }
638	 * }
639	 *
640	 */
641} send_data_t;
642
643static void send_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv);
644
645static int
646send_iterate_snap(zfs_handle_t *zhp, void *arg)
647{
648	send_data_t *sd = arg;
649	uint64_t guid = zhp->zfs_dmustats.dds_guid;
650	uint64_t txg = zhp->zfs_dmustats.dds_creation_txg;
651	char *snapname;
652	nvlist_t *nv;
653
654	snapname = strrchr(zhp->zfs_name, '@')+1;
655
656	if (sd->tosnap_txg != 0 && txg > sd->tosnap_txg) {
657		if (sd->verbose) {
658			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
659			    "skipping snapshot %s because it was created "
660			    "after the destination snapshot (%s)\n"),
661			    zhp->zfs_name, sd->tosnap);
662		}
663		zfs_close(zhp);
664		return (0);
665	}
666
667	VERIFY(0 == nvlist_add_uint64(sd->parent_snaps, snapname, guid));
668	/*
669	 * NB: if there is no fromsnap here (it's a newly created fs in
670	 * an incremental replication), we will substitute the tosnap.
671	 */
672	if ((sd->fromsnap && strcmp(snapname, sd->fromsnap) == 0) ||
673	    (sd->parent_fromsnap_guid == 0 && sd->tosnap &&
674	    strcmp(snapname, sd->tosnap) == 0)) {
675		sd->parent_fromsnap_guid = guid;
676	}
677
678	VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0));
679	send_iterate_prop(zhp, nv);
680	VERIFY(0 == nvlist_add_nvlist(sd->snapprops, snapname, nv));
681	nvlist_free(nv);
682
683	zfs_close(zhp);
684	return (0);
685}
686
687static void
688send_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv)
689{
690	nvpair_t *elem = NULL;
691
692	while ((elem = nvlist_next_nvpair(zhp->zfs_props, elem)) != NULL) {
693		char *propname = nvpair_name(elem);
694		zfs_prop_t prop = zfs_name_to_prop(propname);
695		nvlist_t *propnv;
696
697		if (!zfs_prop_user(propname)) {
698			/*
699			 * Realistically, this should never happen.  However,
700			 * we want the ability to add DSL properties without
701			 * needing to make incompatible version changes.  We
702			 * need to ignore unknown properties to allow older
703			 * software to still send datasets containing these
704			 * properties, with the unknown properties elided.
705			 */
706			if (prop == ZPROP_INVAL)
707				continue;
708
709			if (zfs_prop_readonly(prop))
710				continue;
711		}
712
713		verify(nvpair_value_nvlist(elem, &propnv) == 0);
714		if (prop == ZFS_PROP_QUOTA || prop == ZFS_PROP_RESERVATION ||
715		    prop == ZFS_PROP_REFQUOTA ||
716		    prop == ZFS_PROP_REFRESERVATION) {
717			char *source;
718			uint64_t value;
719			verify(nvlist_lookup_uint64(propnv,
720			    ZPROP_VALUE, &value) == 0);
721			if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT)
722				continue;
723			/*
724			 * May have no source before SPA_VERSION_RECVD_PROPS,
725			 * but is still modifiable.
726			 */
727			if (nvlist_lookup_string(propnv,
728			    ZPROP_SOURCE, &source) == 0) {
729				if ((strcmp(source, zhp->zfs_name) != 0) &&
730				    (strcmp(source,
731				    ZPROP_SOURCE_VAL_RECVD) != 0))
732					continue;
733			}
734		} else {
735			char *source;
736			if (nvlist_lookup_string(propnv,
737			    ZPROP_SOURCE, &source) != 0)
738				continue;
739			if ((strcmp(source, zhp->zfs_name) != 0) &&
740			    (strcmp(source, ZPROP_SOURCE_VAL_RECVD) != 0))
741				continue;
742		}
743
744		if (zfs_prop_user(propname) ||
745		    zfs_prop_get_type(prop) == PROP_TYPE_STRING) {
746			char *value;
747			verify(nvlist_lookup_string(propnv,
748			    ZPROP_VALUE, &value) == 0);
749			VERIFY(0 == nvlist_add_string(nv, propname, value));
750		} else {
751			uint64_t value;
752			verify(nvlist_lookup_uint64(propnv,
753			    ZPROP_VALUE, &value) == 0);
754			VERIFY(0 == nvlist_add_uint64(nv, propname, value));
755		}
756	}
757}
758
759/*
760 * returns snapshot creation txg
761 * and returns 0 if the snapshot does not exist
762 */
763static uint64_t
764get_snap_txg(libzfs_handle_t *hdl, const char *fs, const char *snap)
765{
766	char name[ZFS_MAX_DATASET_NAME_LEN];
767	uint64_t txg = 0;
768
769	if (fs == NULL || fs[0] == '\0' || snap == NULL || snap[0] == '\0')
770		return (txg);
771
772	(void) snprintf(name, sizeof (name), "%s@%s", fs, snap);
773	if (zfs_dataset_exists(hdl, name, ZFS_TYPE_SNAPSHOT)) {
774		zfs_handle_t *zhp = zfs_open(hdl, name, ZFS_TYPE_SNAPSHOT);
775		if (zhp != NULL) {
776			txg = zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG);
777			zfs_close(zhp);
778		}
779	}
780
781	return (txg);
782}
783
784/*
785 * recursively generate nvlists describing datasets.  See comment
786 * for the data structure send_data_t above for description of contents
787 * of the nvlist.
788 */
789static int
790send_iterate_fs(zfs_handle_t *zhp, void *arg)
791{
792	send_data_t *sd = arg;
793	nvlist_t *nvfs, *nv;
794	int rv = 0;
795	uint64_t min_txg = 0, max_txg = 0;
796	uint64_t parent_fromsnap_guid_save = sd->parent_fromsnap_guid;
797	uint64_t fromsnap_txg_save = sd->fromsnap_txg;
798	uint64_t tosnap_txg_save = sd->tosnap_txg;
799	uint64_t txg = zhp->zfs_dmustats.dds_creation_txg;
800	uint64_t guid = zhp->zfs_dmustats.dds_guid;
801	uint64_t fromsnap_txg, tosnap_txg;
802	char guidstring[64];
803
804	fromsnap_txg = get_snap_txg(zhp->zfs_hdl, zhp->zfs_name, sd->fromsnap);
805	if (fromsnap_txg != 0)
806		sd->fromsnap_txg = fromsnap_txg;
807
808	tosnap_txg = get_snap_txg(zhp->zfs_hdl, zhp->zfs_name, sd->tosnap);
809	if (tosnap_txg != 0)
810		sd->tosnap_txg = tosnap_txg;
811
812	/*
813	 * on the send side, if the current dataset does not have tosnap,
814	 * perform two additional checks:
815	 *
816	 * - skip sending the current dataset if it was created later than
817	 *   the parent tosnap
818	 * - return error if the current dataset was created earlier than
819	 *   the parent tosnap
820	 */
821	if (sd->tosnap != NULL && tosnap_txg == 0) {
822		if (sd->tosnap_txg != 0 && txg > sd->tosnap_txg) {
823			if (sd->verbose) {
824				(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
825				    "skipping dataset %s: snapshot %s does "
826				    "not exist\n"), zhp->zfs_name, sd->tosnap);
827			}
828		} else {
829			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
830			    "cannot send %s@%s%s: snapshot %s@%s does not "
831			    "exist\n"), sd->fsname, sd->tosnap, sd->recursive ?
832			    dgettext(TEXT_DOMAIN, " recursively") : "",
833			    zhp->zfs_name, sd->tosnap);
834			rv = -1;
835		}
836		goto out;
837	}
838
839	nvfs = fnvlist_alloc();
840	fnvlist_add_string(nvfs, "name", zhp->zfs_name);
841	fnvlist_add_uint64(nvfs, "parentfromsnap",
842	    sd->parent_fromsnap_guid);
843
844	if (zhp->zfs_dmustats.dds_origin[0]) {
845		zfs_handle_t *origin = zfs_open(zhp->zfs_hdl,
846		    zhp->zfs_dmustats.dds_origin, ZFS_TYPE_SNAPSHOT);
847		if (origin == NULL) {
848			rv = -1;
849			goto out;
850		}
851		fnvlist_add_uint64(nvfs, "origin",
852		    origin->zfs_dmustats.dds_guid);
853	}
854
855	/* iterate over props */
856	nv = fnvlist_alloc();
857	send_iterate_prop(zhp, nv);
858	fnvlist_add_nvlist(nvfs, "props", nv);
859	fnvlist_free(nv);
860
861	/* iterate over snaps, and set sd->parent_fromsnap_guid */
862	sd->parent_fromsnap_guid = 0;
863	sd->parent_snaps = fnvlist_alloc();
864	sd->snapprops = fnvlist_alloc();
865	if (!sd->replicate && fromsnap_txg != 0)
866		min_txg = fromsnap_txg;
867	if (!sd->replicate && tosnap_txg != 0)
868		max_txg = tosnap_txg;
869	(void) zfs_iter_snapshots_sorted(zhp, send_iterate_snap, sd,
870	    min_txg, max_txg);
871	fnvlist_add_nvlist(nvfs, "snaps", sd->parent_snaps);
872	fnvlist_add_nvlist(nvfs, "snapprops", sd->snapprops);
873	fnvlist_free(sd->parent_snaps);
874	fnvlist_free(sd->snapprops);
875
876	/* add this fs to nvlist */
877	(void) snprintf(guidstring, sizeof (guidstring),
878	    "0x%llx", (longlong_t)guid);
879	fnvlist_add_nvlist(sd->fss, guidstring, nvfs);
880	fnvlist_free(nvfs);
881
882	/* iterate over children */
883	if (sd->recursive)
884		rv = zfs_iter_filesystems(zhp, send_iterate_fs, sd);
885
886out:
887	sd->parent_fromsnap_guid = parent_fromsnap_guid_save;
888	sd->fromsnap_txg = fromsnap_txg_save;
889	sd->tosnap_txg = tosnap_txg_save;
890
891	zfs_close(zhp);
892	return (rv);
893}
894
895static int
896gather_nvlist(libzfs_handle_t *hdl, const char *fsname, const char *fromsnap,
897    const char *tosnap, boolean_t recursive, boolean_t replicate,
898    boolean_t verbose, nvlist_t **nvlp, avl_tree_t **avlp)
899{
900	zfs_handle_t *zhp;
901	send_data_t sd = { 0 };
902	int error;
903
904	zhp = zfs_open(hdl, fsname, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
905	if (zhp == NULL)
906		return (EZFS_BADTYPE);
907
908	VERIFY(0 == nvlist_alloc(&sd.fss, NV_UNIQUE_NAME, 0));
909	sd.fsname = fsname;
910	sd.fromsnap = fromsnap;
911	sd.tosnap = tosnap;
912	sd.recursive = recursive;
913	sd.replicate = replicate;
914	sd.verbose = verbose;
915
916	if ((error = send_iterate_fs(zhp, &sd)) != 0) {
917		nvlist_free(sd.fss);
918		if (avlp != NULL)
919			*avlp = NULL;
920		*nvlp = NULL;
921		return (error);
922	}
923
924	if (avlp != NULL && (*avlp = fsavl_create(sd.fss)) == NULL) {
925		nvlist_free(sd.fss);
926		*nvlp = NULL;
927		return (EZFS_NOMEM);
928	}
929
930	*nvlp = sd.fss;
931	return (0);
932}
933
934/*
935 * Routines specific to "zfs send"
936 */
937typedef struct send_dump_data {
938	/* these are all just the short snapname (the part after the @) */
939	const char *fromsnap;
940	const char *tosnap;
941	char prevsnap[ZFS_MAX_DATASET_NAME_LEN];
942	uint64_t prevsnap_obj;
943	boolean_t seenfrom, seento, replicate, doall, fromorigin;
944	boolean_t verbose, dryrun, parsable, progress, embed_data, std_out;
945	boolean_t progressastitle;
946	boolean_t large_block, compress;
947	int outfd;
948	boolean_t err;
949	nvlist_t *fss;
950	nvlist_t *snapholds;
951	avl_tree_t *fsavl;
952	snapfilter_cb_t *filter_cb;
953	void *filter_cb_arg;
954	nvlist_t *debugnv;
955	char holdtag[ZFS_MAX_DATASET_NAME_LEN];
956	int cleanup_fd;
957	uint64_t size;
958} send_dump_data_t;
959
960static int
961zfs_send_space(zfs_handle_t *zhp, const char *snapname, const char *from,
962    enum lzc_send_flags flags, uint64_t *spacep)
963{
964	libzfs_handle_t *hdl = zhp->zfs_hdl;
965	int error;
966
967	assert(snapname != NULL);
968	error = lzc_send_space(snapname, from, flags, spacep);
969
970	if (error != 0) {
971		char errbuf[1024];
972		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
973		    "warning: cannot estimate space for '%s'"), snapname);
974
975		switch (error) {
976		case EXDEV:
977			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
978			    "not an earlier snapshot from the same fs"));
979			return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
980
981		case ENOENT:
982			if (zfs_dataset_exists(hdl, snapname,
983			    ZFS_TYPE_SNAPSHOT)) {
984				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
985				    "incremental source (%s) does not exist"),
986				    snapname);
987			}
988			return (zfs_error(hdl, EZFS_NOENT, errbuf));
989
990		case EDQUOT:
991		case EFBIG:
992		case EIO:
993		case ENOLINK:
994		case ENOSPC:
995		case ENXIO:
996		case EPIPE:
997		case ERANGE:
998		case EFAULT:
999		case EROFS:
1000		case EINVAL:
1001			zfs_error_aux(hdl, strerror(error));
1002			return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
1003
1004		default:
1005			return (zfs_standard_error(hdl, error, errbuf));
1006		}
1007	}
1008
1009	return (0);
1010}
1011
1012/*
1013 * Dumps a backup of the given snapshot (incremental from fromsnap if it's not
1014 * NULL) to the file descriptor specified by outfd.
1015 */
1016static int
1017dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, uint64_t fromsnap_obj,
1018    boolean_t fromorigin, int outfd, enum lzc_send_flags flags,
1019    nvlist_t *debugnv)
1020{
1021	zfs_cmd_t zc = { 0 };
1022	libzfs_handle_t *hdl = zhp->zfs_hdl;
1023	nvlist_t *thisdbg;
1024
1025	assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
1026	assert(fromsnap_obj == 0 || !fromorigin);
1027
1028	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
1029	zc.zc_cookie = outfd;
1030	zc.zc_obj = fromorigin;
1031	zc.zc_sendobj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
1032	zc.zc_fromobj = fromsnap_obj;
1033	zc.zc_flags = flags;
1034
1035	VERIFY(0 == nvlist_alloc(&thisdbg, NV_UNIQUE_NAME, 0));
1036	if (fromsnap && fromsnap[0] != '\0') {
1037		VERIFY(0 == nvlist_add_string(thisdbg,
1038		    "fromsnap", fromsnap));
1039	}
1040
1041	if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SEND, &zc) != 0) {
1042		char errbuf[1024];
1043		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1044		    "warning: cannot send '%s'"), zhp->zfs_name);
1045
1046		VERIFY(0 == nvlist_add_uint64(thisdbg, "error", errno));
1047		if (debugnv) {
1048			VERIFY(0 == nvlist_add_nvlist(debugnv,
1049			    zhp->zfs_name, thisdbg));
1050		}
1051		nvlist_free(thisdbg);
1052
1053		switch (errno) {
1054		case EXDEV:
1055			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1056			    "not an earlier snapshot from the same fs"));
1057			return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
1058
1059		case ENOENT:
1060			if (zfs_dataset_exists(hdl, zc.zc_name,
1061			    ZFS_TYPE_SNAPSHOT)) {
1062				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1063				    "incremental source (@%s) does not exist"),
1064				    zc.zc_value);
1065			}
1066			return (zfs_error(hdl, EZFS_NOENT, errbuf));
1067
1068		case EDQUOT:
1069		case EFBIG:
1070		case EIO:
1071		case ENOLINK:
1072		case ENOSPC:
1073#ifdef illumos
1074		case ENOSTR:
1075#endif
1076		case ENXIO:
1077		case EPIPE:
1078		case ERANGE:
1079		case EFAULT:
1080		case EROFS:
1081			zfs_error_aux(hdl, strerror(errno));
1082			return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
1083
1084		default:
1085			return (zfs_standard_error(hdl, errno, errbuf));
1086		}
1087	}
1088
1089	if (debugnv)
1090		VERIFY(0 == nvlist_add_nvlist(debugnv, zhp->zfs_name, thisdbg));
1091	nvlist_free(thisdbg);
1092
1093	return (0);
1094}
1095
1096static void
1097gather_holds(zfs_handle_t *zhp, send_dump_data_t *sdd)
1098{
1099	assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
1100
1101	/*
1102	 * zfs_send() only sets snapholds for sends that need them,
1103	 * e.g. replication and doall.
1104	 */
1105	if (sdd->snapholds == NULL)
1106		return;
1107
1108	fnvlist_add_string(sdd->snapholds, zhp->zfs_name, sdd->holdtag);
1109}
1110
1111static void *
1112send_progress_thread(void *arg)
1113{
1114	progress_arg_t *pa = arg;
1115	zfs_cmd_t zc = { 0 };
1116	zfs_handle_t *zhp = pa->pa_zhp;
1117	libzfs_handle_t *hdl = zhp->zfs_hdl;
1118	unsigned long long bytes, total;
1119	char buf[16];
1120	time_t t;
1121	struct tm *tm;
1122
1123	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
1124
1125	if (!pa->pa_parsable && !pa->pa_astitle)
1126		(void) fprintf(stderr, "TIME        SENT   SNAPSHOT\n");
1127
1128	/*
1129	 * Print the progress from ZFS_IOC_SEND_PROGRESS every second.
1130	 */
1131	for (;;) {
1132		(void) sleep(1);
1133
1134		zc.zc_cookie = pa->pa_fd;
1135		if (zfs_ioctl(hdl, ZFS_IOC_SEND_PROGRESS, &zc) != 0)
1136			return ((void *)-1);
1137
1138		(void) time(&t);
1139		tm = localtime(&t);
1140		bytes = zc.zc_cookie;
1141
1142		if (pa->pa_astitle) {
1143			int pct;
1144			if (pa->pa_size > bytes)
1145				pct = 100 * bytes / pa->pa_size;
1146			else
1147				pct = 100;
1148
1149			setproctitle("sending %s (%d%%: %llu/%llu)",
1150			    zhp->zfs_name, pct, bytes, pa->pa_size);
1151		} else if (pa->pa_parsable) {
1152			(void) fprintf(stderr, "%02d:%02d:%02d\t%llu\t%s\n",
1153			    tm->tm_hour, tm->tm_min, tm->tm_sec,
1154			    bytes, zhp->zfs_name);
1155		} else {
1156			zfs_nicenum(bytes, buf, sizeof (buf));
1157			(void) fprintf(stderr, "%02d:%02d:%02d   %5s   %s\n",
1158			    tm->tm_hour, tm->tm_min, tm->tm_sec,
1159			    buf, zhp->zfs_name);
1160		}
1161	}
1162}
1163
1164static void
1165send_print_verbose(FILE *fout, const char *tosnap, const char *fromsnap,
1166    uint64_t size, boolean_t parsable)
1167{
1168	if (parsable) {
1169		if (fromsnap != NULL) {
1170			(void) fprintf(fout, "incremental\t%s\t%s",
1171			    fromsnap, tosnap);
1172		} else {
1173			(void) fprintf(fout, "full\t%s",
1174			    tosnap);
1175		}
1176	} else {
1177		if (fromsnap != NULL) {
1178			if (strchr(fromsnap, '@') == NULL &&
1179			    strchr(fromsnap, '#') == NULL) {
1180				(void) fprintf(fout, dgettext(TEXT_DOMAIN,
1181				    "send from @%s to %s"),
1182				    fromsnap, tosnap);
1183			} else {
1184				(void) fprintf(fout, dgettext(TEXT_DOMAIN,
1185				    "send from %s to %s"),
1186				    fromsnap, tosnap);
1187			}
1188		} else {
1189			(void) fprintf(fout, dgettext(TEXT_DOMAIN,
1190			    "full send of %s"),
1191			    tosnap);
1192		}
1193	}
1194
1195	if (parsable) {
1196		(void) fprintf(fout, "\t%llu",
1197		    (longlong_t)size);
1198	} else if (size != 0) {
1199		char buf[16];
1200		zfs_nicenum(size, buf, sizeof (buf));
1201		(void) fprintf(fout, dgettext(TEXT_DOMAIN,
1202		    " estimated size is %s"), buf);
1203	}
1204	(void) fprintf(fout, "\n");
1205}
1206
1207static int
1208dump_snapshot(zfs_handle_t *zhp, void *arg)
1209{
1210	send_dump_data_t *sdd = arg;
1211	progress_arg_t pa = { 0 };
1212	pthread_t tid;
1213	char *thissnap;
1214	enum lzc_send_flags flags = 0;
1215	int err;
1216	boolean_t isfromsnap, istosnap, fromorigin;
1217	boolean_t exclude = B_FALSE;
1218	FILE *fout = sdd->std_out ? stdout : stderr;
1219
1220	err = 0;
1221	thissnap = strchr(zhp->zfs_name, '@') + 1;
1222	isfromsnap = (sdd->fromsnap != NULL &&
1223	    strcmp(sdd->fromsnap, thissnap) == 0);
1224
1225	if (!sdd->seenfrom && isfromsnap) {
1226		gather_holds(zhp, sdd);
1227		sdd->seenfrom = B_TRUE;
1228		(void) strcpy(sdd->prevsnap, thissnap);
1229		sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
1230		zfs_close(zhp);
1231		return (0);
1232	}
1233
1234	if (sdd->seento || !sdd->seenfrom) {
1235		zfs_close(zhp);
1236		return (0);
1237	}
1238
1239	istosnap = (strcmp(sdd->tosnap, thissnap) == 0);
1240	if (istosnap)
1241		sdd->seento = B_TRUE;
1242
1243	if (sdd->large_block)
1244		flags |= LZC_SEND_FLAG_LARGE_BLOCK;
1245	if (sdd->embed_data)
1246		flags |= LZC_SEND_FLAG_EMBED_DATA;
1247	if (sdd->compress)
1248		flags |= LZC_SEND_FLAG_COMPRESS;
1249
1250	if (!sdd->doall && !isfromsnap && !istosnap) {
1251		if (sdd->replicate) {
1252			char *snapname;
1253			nvlist_t *snapprops;
1254			/*
1255			 * Filter out all intermediate snapshots except origin
1256			 * snapshots needed to replicate clones.
1257			 */
1258			nvlist_t *nvfs = fsavl_find(sdd->fsavl,
1259			    zhp->zfs_dmustats.dds_guid, &snapname);
1260
1261			VERIFY(0 == nvlist_lookup_nvlist(nvfs,
1262			    "snapprops", &snapprops));
1263			VERIFY(0 == nvlist_lookup_nvlist(snapprops,
1264			    thissnap, &snapprops));
1265			exclude = !nvlist_exists(snapprops, "is_clone_origin");
1266		} else {
1267			exclude = B_TRUE;
1268		}
1269	}
1270
1271	/*
1272	 * If a filter function exists, call it to determine whether
1273	 * this snapshot will be sent.
1274	 */
1275	if (exclude || (sdd->filter_cb != NULL &&
1276	    sdd->filter_cb(zhp, sdd->filter_cb_arg) == B_FALSE)) {
1277		/*
1278		 * This snapshot is filtered out.  Don't send it, and don't
1279		 * set prevsnap_obj, so it will be as if this snapshot didn't
1280		 * exist, and the next accepted snapshot will be sent as
1281		 * an incremental from the last accepted one, or as the
1282		 * first (and full) snapshot in the case of a replication,
1283		 * non-incremental send.
1284		 */
1285		zfs_close(zhp);
1286		return (0);
1287	}
1288
1289	gather_holds(zhp, sdd);
1290	fromorigin = sdd->prevsnap[0] == '\0' &&
1291	    (sdd->fromorigin || sdd->replicate);
1292
1293	if (sdd->verbose || sdd->progress) {
1294		uint64_t size = 0;
1295		char fromds[ZFS_MAX_DATASET_NAME_LEN];
1296
1297		if (sdd->prevsnap[0] != '\0') {
1298			(void) strlcpy(fromds, zhp->zfs_name, sizeof (fromds));
1299			*(strchr(fromds, '@') + 1) = '\0';
1300			(void) strlcat(fromds, sdd->prevsnap, sizeof (fromds));
1301		}
1302		if (zfs_send_space(zhp, zhp->zfs_name,
1303		    sdd->prevsnap[0] ? fromds : NULL, flags, &size) != 0) {
1304			size = 0; /* cannot estimate send space */
1305		} else {
1306			send_print_verbose(fout, zhp->zfs_name,
1307			    sdd->prevsnap[0] ? sdd->prevsnap : NULL,
1308			    size, sdd->parsable);
1309		}
1310		sdd->size += size;
1311	}
1312
1313	if (!sdd->dryrun) {
1314		/*
1315		 * If progress reporting is requested, spawn a new thread to
1316		 * poll ZFS_IOC_SEND_PROGRESS at a regular interval.
1317		 */
1318		if (sdd->progress) {
1319			pa.pa_zhp = zhp;
1320			pa.pa_fd = sdd->outfd;
1321			pa.pa_parsable = sdd->parsable;
1322			pa.pa_size = sdd->size;
1323			pa.pa_astitle = sdd->progressastitle;
1324
1325			if ((err = pthread_create(&tid, NULL,
1326			    send_progress_thread, &pa)) != 0) {
1327				zfs_close(zhp);
1328				return (err);
1329			}
1330		}
1331
1332		err = dump_ioctl(zhp, sdd->prevsnap, sdd->prevsnap_obj,
1333		    fromorigin, sdd->outfd, flags, sdd->debugnv);
1334
1335		if (sdd->progress) {
1336			(void) pthread_cancel(tid);
1337			(void) pthread_join(tid, NULL);
1338		}
1339	}
1340
1341	(void) strcpy(sdd->prevsnap, thissnap);
1342	sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
1343	zfs_close(zhp);
1344	return (err);
1345}
1346
1347static int
1348dump_filesystem(zfs_handle_t *zhp, void *arg)
1349{
1350	int rv = 0;
1351	send_dump_data_t *sdd = arg;
1352	boolean_t missingfrom = B_FALSE;
1353	zfs_cmd_t zc = { 0 };
1354	uint64_t min_txg = 0, max_txg = 0;
1355
1356	(void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
1357	    zhp->zfs_name, sdd->tosnap);
1358	if (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0) {
1359		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1360		    "WARNING: could not send %s@%s: does not exist\n"),
1361		    zhp->zfs_name, sdd->tosnap);
1362		sdd->err = B_TRUE;
1363		return (0);
1364	}
1365
1366	if (sdd->replicate && sdd->fromsnap) {
1367		/*
1368		 * If this fs does not have fromsnap, and we're doing
1369		 * recursive, we need to send a full stream from the
1370		 * beginning (or an incremental from the origin if this
1371		 * is a clone).  If we're doing non-recursive, then let
1372		 * them get the error.
1373		 */
1374		(void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
1375		    zhp->zfs_name, sdd->fromsnap);
1376		if (ioctl(zhp->zfs_hdl->libzfs_fd,
1377		    ZFS_IOC_OBJSET_STATS, &zc) != 0) {
1378			missingfrom = B_TRUE;
1379		}
1380	}
1381
1382	sdd->seenfrom = sdd->seento = sdd->prevsnap[0] = 0;
1383	sdd->prevsnap_obj = 0;
1384	if (sdd->fromsnap == NULL || missingfrom)
1385		sdd->seenfrom = B_TRUE;
1386
1387	if (!sdd->replicate && sdd->fromsnap != NULL)
1388		min_txg = get_snap_txg(zhp->zfs_hdl, zhp->zfs_name,
1389		    sdd->fromsnap);
1390	if (!sdd->replicate && sdd->tosnap != NULL)
1391		max_txg = get_snap_txg(zhp->zfs_hdl, zhp->zfs_name,
1392		    sdd->tosnap);
1393
1394	rv = zfs_iter_snapshots_sorted(zhp, dump_snapshot, arg,
1395	    min_txg, max_txg);
1396	if (!sdd->seenfrom) {
1397		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1398		    "WARNING: could not send %s@%s:\n"
1399		    "incremental source (%s@%s) does not exist\n"),
1400		    zhp->zfs_name, sdd->tosnap,
1401		    zhp->zfs_name, sdd->fromsnap);
1402		sdd->err = B_TRUE;
1403	} else if (!sdd->seento) {
1404		if (sdd->fromsnap) {
1405			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1406			    "WARNING: could not send %s@%s:\n"
1407			    "incremental source (%s@%s) "
1408			    "is not earlier than it\n"),
1409			    zhp->zfs_name, sdd->tosnap,
1410			    zhp->zfs_name, sdd->fromsnap);
1411		} else {
1412			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1413			    "WARNING: "
1414			    "could not send %s@%s: does not exist\n"),
1415			    zhp->zfs_name, sdd->tosnap);
1416		}
1417		sdd->err = B_TRUE;
1418	}
1419
1420	return (rv);
1421}
1422
1423static int
1424dump_filesystems(zfs_handle_t *rzhp, void *arg)
1425{
1426	send_dump_data_t *sdd = arg;
1427	nvpair_t *fspair;
1428	boolean_t needagain, progress;
1429
1430	if (!sdd->replicate)
1431		return (dump_filesystem(rzhp, sdd));
1432
1433	/* Mark the clone origin snapshots. */
1434	for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
1435	    fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
1436		nvlist_t *nvfs;
1437		uint64_t origin_guid = 0;
1438
1439		VERIFY(0 == nvpair_value_nvlist(fspair, &nvfs));
1440		(void) nvlist_lookup_uint64(nvfs, "origin", &origin_guid);
1441		if (origin_guid != 0) {
1442			char *snapname;
1443			nvlist_t *origin_nv = fsavl_find(sdd->fsavl,
1444			    origin_guid, &snapname);
1445			if (origin_nv != NULL) {
1446				nvlist_t *snapprops;
1447				VERIFY(0 == nvlist_lookup_nvlist(origin_nv,
1448				    "snapprops", &snapprops));
1449				VERIFY(0 == nvlist_lookup_nvlist(snapprops,
1450				    snapname, &snapprops));
1451				VERIFY(0 == nvlist_add_boolean(
1452				    snapprops, "is_clone_origin"));
1453			}
1454		}
1455	}
1456again:
1457	needagain = progress = B_FALSE;
1458	for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
1459	    fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
1460		nvlist_t *fslist, *parent_nv;
1461		char *fsname;
1462		zfs_handle_t *zhp;
1463		int err;
1464		uint64_t origin_guid = 0;
1465		uint64_t parent_guid = 0;
1466
1467		VERIFY(nvpair_value_nvlist(fspair, &fslist) == 0);
1468		if (nvlist_lookup_boolean(fslist, "sent") == 0)
1469			continue;
1470
1471		VERIFY(nvlist_lookup_string(fslist, "name", &fsname) == 0);
1472		(void) nvlist_lookup_uint64(fslist, "origin", &origin_guid);
1473		(void) nvlist_lookup_uint64(fslist, "parentfromsnap",
1474		    &parent_guid);
1475
1476		if (parent_guid != 0) {
1477			parent_nv = fsavl_find(sdd->fsavl, parent_guid, NULL);
1478			if (!nvlist_exists(parent_nv, "sent")) {
1479				/* parent has not been sent; skip this one */
1480				needagain = B_TRUE;
1481				continue;
1482			}
1483		}
1484
1485		if (origin_guid != 0) {
1486			nvlist_t *origin_nv = fsavl_find(sdd->fsavl,
1487			    origin_guid, NULL);
1488			if (origin_nv != NULL &&
1489			    !nvlist_exists(origin_nv, "sent")) {
1490				/*
1491				 * origin has not been sent yet;
1492				 * skip this clone.
1493				 */
1494				needagain = B_TRUE;
1495				continue;
1496			}
1497		}
1498
1499		zhp = zfs_open(rzhp->zfs_hdl, fsname, ZFS_TYPE_DATASET);
1500		if (zhp == NULL)
1501			return (-1);
1502		err = dump_filesystem(zhp, sdd);
1503		VERIFY(nvlist_add_boolean(fslist, "sent") == 0);
1504		progress = B_TRUE;
1505		zfs_close(zhp);
1506		if (err)
1507			return (err);
1508	}
1509	if (needagain) {
1510		assert(progress);
1511		goto again;
1512	}
1513
1514	/* clean out the sent flags in case we reuse this fss */
1515	for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
1516	    fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
1517		nvlist_t *fslist;
1518
1519		VERIFY(nvpair_value_nvlist(fspair, &fslist) == 0);
1520		(void) nvlist_remove_all(fslist, "sent");
1521	}
1522
1523	return (0);
1524}
1525
1526nvlist_t *
1527zfs_send_resume_token_to_nvlist(libzfs_handle_t *hdl, const char *token)
1528{
1529	unsigned int version;
1530	int nread;
1531	unsigned long long checksum, packed_len;
1532
1533	/*
1534	 * Decode token header, which is:
1535	 *   <token version>-<checksum of payload>-<uncompressed payload length>
1536	 * Note that the only supported token version is 1.
1537	 */
1538	nread = sscanf(token, "%u-%llx-%llx-",
1539	    &version, &checksum, &packed_len);
1540	if (nread != 3) {
1541		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1542		    "resume token is corrupt (invalid format)"));
1543		return (NULL);
1544	}
1545
1546	if (version != ZFS_SEND_RESUME_TOKEN_VERSION) {
1547		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1548		    "resume token is corrupt (invalid version %u)"),
1549		    version);
1550		return (NULL);
1551	}
1552
1553	/* convert hexadecimal representation to binary */
1554	token = strrchr(token, '-') + 1;
1555	int len = strlen(token) / 2;
1556	unsigned char *compressed = zfs_alloc(hdl, len);
1557	for (int i = 0; i < len; i++) {
1558		nread = sscanf(token + i * 2, "%2hhx", compressed + i);
1559		if (nread != 1) {
1560			free(compressed);
1561			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1562			    "resume token is corrupt "
1563			    "(payload is not hex-encoded)"));
1564			return (NULL);
1565		}
1566	}
1567
1568	/* verify checksum */
1569	zio_cksum_t cksum;
1570	fletcher_4_native(compressed, len, NULL, &cksum);
1571	if (cksum.zc_word[0] != checksum) {
1572		free(compressed);
1573		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1574		    "resume token is corrupt (incorrect checksum)"));
1575		return (NULL);
1576	}
1577
1578	/* uncompress */
1579	void *packed = zfs_alloc(hdl, packed_len);
1580	uLongf packed_len_long = packed_len;
1581	if (uncompress(packed, &packed_len_long, compressed, len) != Z_OK ||
1582	    packed_len_long != packed_len) {
1583		free(packed);
1584		free(compressed);
1585		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1586		    "resume token is corrupt (decompression failed)"));
1587		return (NULL);
1588	}
1589
1590	/* unpack nvlist */
1591	nvlist_t *nv;
1592	int error = nvlist_unpack(packed, packed_len, &nv, KM_SLEEP);
1593	free(packed);
1594	free(compressed);
1595	if (error != 0) {
1596		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1597		    "resume token is corrupt (nvlist_unpack failed)"));
1598		return (NULL);
1599	}
1600	return (nv);
1601}
1602
1603int
1604zfs_send_resume(libzfs_handle_t *hdl, sendflags_t *flags, int outfd,
1605    const char *resume_token)
1606{
1607	char errbuf[1024];
1608	char *toname;
1609	char *fromname = NULL;
1610	uint64_t resumeobj, resumeoff, toguid, fromguid, bytes;
1611	zfs_handle_t *zhp;
1612	int error = 0;
1613	char name[ZFS_MAX_DATASET_NAME_LEN];
1614	enum lzc_send_flags lzc_flags = 0;
1615	uint64_t size = 0;
1616	FILE *fout = (flags->verbose && flags->dryrun) ? stdout : stderr;
1617
1618	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1619	    "cannot resume send"));
1620
1621	nvlist_t *resume_nvl =
1622	    zfs_send_resume_token_to_nvlist(hdl, resume_token);
1623	if (resume_nvl == NULL) {
1624		/*
1625		 * zfs_error_aux has already been set by
1626		 * zfs_send_resume_token_to_nvlist
1627		 */
1628		return (zfs_error(hdl, EZFS_FAULT, errbuf));
1629	}
1630	if (flags->verbose) {
1631		(void) fprintf(fout, dgettext(TEXT_DOMAIN,
1632		    "resume token contents:\n"));
1633		nvlist_print(fout, resume_nvl);
1634	}
1635
1636	if (nvlist_lookup_string(resume_nvl, "toname", &toname) != 0 ||
1637	    nvlist_lookup_uint64(resume_nvl, "object", &resumeobj) != 0 ||
1638	    nvlist_lookup_uint64(resume_nvl, "offset", &resumeoff) != 0 ||
1639	    nvlist_lookup_uint64(resume_nvl, "bytes", &bytes) != 0 ||
1640	    nvlist_lookup_uint64(resume_nvl, "toguid", &toguid) != 0) {
1641		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1642		    "resume token is corrupt"));
1643		return (zfs_error(hdl, EZFS_FAULT, errbuf));
1644	}
1645	fromguid = 0;
1646	(void) nvlist_lookup_uint64(resume_nvl, "fromguid", &fromguid);
1647
1648	if (flags->largeblock || nvlist_exists(resume_nvl, "largeblockok"))
1649		lzc_flags |= LZC_SEND_FLAG_LARGE_BLOCK;
1650	if (flags->embed_data || nvlist_exists(resume_nvl, "embedok"))
1651		lzc_flags |= LZC_SEND_FLAG_EMBED_DATA;
1652	if (flags->compress || nvlist_exists(resume_nvl, "compressok"))
1653		lzc_flags |= LZC_SEND_FLAG_COMPRESS;
1654
1655	if (guid_to_name(hdl, toname, toguid, B_FALSE, name) != 0) {
1656		if (zfs_dataset_exists(hdl, toname, ZFS_TYPE_DATASET)) {
1657			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1658			    "'%s' is no longer the same snapshot used in "
1659			    "the initial send"), toname);
1660		} else {
1661			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1662			    "'%s' used in the initial send no longer exists"),
1663			    toname);
1664		}
1665		return (zfs_error(hdl, EZFS_BADPATH, errbuf));
1666	}
1667	zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
1668	if (zhp == NULL) {
1669		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1670		    "unable to access '%s'"), name);
1671		return (zfs_error(hdl, EZFS_BADPATH, errbuf));
1672	}
1673
1674	if (fromguid != 0) {
1675		if (guid_to_name(hdl, toname, fromguid, B_TRUE, name) != 0) {
1676			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1677			    "incremental source %#llx no longer exists"),
1678			    (longlong_t)fromguid);
1679			return (zfs_error(hdl, EZFS_BADPATH, errbuf));
1680		}
1681		fromname = name;
1682	}
1683
1684	if (flags->progress || flags->verbose) {
1685		error = lzc_send_space(zhp->zfs_name, fromname,
1686		    lzc_flags, &size);
1687		if (error == 0)
1688			size = MAX(0, (int64_t)(size - bytes));
1689	}
1690	if (flags->verbose) {
1691		send_print_verbose(fout, zhp->zfs_name, fromname,
1692		    size, flags->parsable);
1693	}
1694
1695	if (!flags->dryrun) {
1696		progress_arg_t pa = { 0 };
1697		pthread_t tid;
1698		/*
1699		 * If progress reporting is requested, spawn a new thread to
1700		 * poll ZFS_IOC_SEND_PROGRESS at a regular interval.
1701		 */
1702		if (flags->progress) {
1703			pa.pa_zhp = zhp;
1704			pa.pa_fd = outfd;
1705			pa.pa_parsable = flags->parsable;
1706			pa.pa_size = size;
1707			pa.pa_astitle = flags->progressastitle;
1708
1709			error = pthread_create(&tid, NULL,
1710			    send_progress_thread, &pa);
1711			if (error != 0) {
1712				zfs_close(zhp);
1713				return (error);
1714			}
1715		}
1716
1717		error = lzc_send_resume(zhp->zfs_name, fromname, outfd,
1718		    lzc_flags, resumeobj, resumeoff);
1719
1720		if (flags->progress) {
1721			(void) pthread_cancel(tid);
1722			(void) pthread_join(tid, NULL);
1723		}
1724
1725		char errbuf[1024];
1726		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1727		    "warning: cannot send '%s'"), zhp->zfs_name);
1728
1729		zfs_close(zhp);
1730
1731		switch (error) {
1732		case 0:
1733			return (0);
1734		case EXDEV:
1735		case ENOENT:
1736		case EDQUOT:
1737		case EFBIG:
1738		case EIO:
1739		case ENOLINK:
1740		case ENOSPC:
1741#ifdef illumos
1742		case ENOSTR:
1743#endif
1744		case ENXIO:
1745		case EPIPE:
1746		case ERANGE:
1747		case EFAULT:
1748		case EROFS:
1749			zfs_error_aux(hdl, strerror(errno));
1750			return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
1751
1752		default:
1753			return (zfs_standard_error(hdl, errno, errbuf));
1754		}
1755	}
1756
1757
1758	zfs_close(zhp);
1759
1760	return (error);
1761}
1762
1763/*
1764 * Generate a send stream for the dataset identified by the argument zhp.
1765 *
1766 * The content of the send stream is the snapshot identified by
1767 * 'tosnap'.  Incremental streams are requested in two ways:
1768 *     - from the snapshot identified by "fromsnap" (if non-null) or
1769 *     - from the origin of the dataset identified by zhp, which must
1770 *	 be a clone.  In this case, "fromsnap" is null and "fromorigin"
1771 *	 is TRUE.
1772 *
1773 * The send stream is recursive (i.e. dumps a hierarchy of snapshots) and
1774 * uses a special header (with a hdrtype field of DMU_COMPOUNDSTREAM)
1775 * if "replicate" is set.  If "doall" is set, dump all the intermediate
1776 * snapshots. The DMU_COMPOUNDSTREAM header is used in the "doall"
1777 * case too. If "props" is set, send properties.
1778 */
1779int
1780zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
1781    sendflags_t *flags, int outfd, snapfilter_cb_t filter_func,
1782    void *cb_arg, nvlist_t **debugnvp)
1783{
1784	char errbuf[1024];
1785	send_dump_data_t sdd = { 0 };
1786	int err = 0;
1787	nvlist_t *fss = NULL;
1788	avl_tree_t *fsavl = NULL;
1789	static uint64_t holdseq;
1790	int spa_version;
1791	pthread_t tid = 0;
1792	int pipefd[2];
1793	dedup_arg_t dda = { 0 };
1794	int featureflags = 0;
1795	FILE *fout;
1796
1797	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1798	    "cannot send '%s'"), zhp->zfs_name);
1799
1800	if (fromsnap && fromsnap[0] == '\0') {
1801		zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
1802		    "zero-length incremental source"));
1803		return (zfs_error(zhp->zfs_hdl, EZFS_NOENT, errbuf));
1804	}
1805
1806	if (zhp->zfs_type == ZFS_TYPE_FILESYSTEM) {
1807		uint64_t version;
1808		version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION);
1809		if (version >= ZPL_VERSION_SA) {
1810			featureflags |= DMU_BACKUP_FEATURE_SA_SPILL;
1811		}
1812	}
1813
1814	if (flags->dedup && !flags->dryrun) {
1815		featureflags |= (DMU_BACKUP_FEATURE_DEDUP |
1816		    DMU_BACKUP_FEATURE_DEDUPPROPS);
1817		if ((err = pipe(pipefd)) != 0) {
1818			zfs_error_aux(zhp->zfs_hdl, strerror(errno));
1819			return (zfs_error(zhp->zfs_hdl, EZFS_PIPEFAILED,
1820			    errbuf));
1821		}
1822		dda.outputfd = outfd;
1823		dda.inputfd = pipefd[1];
1824		dda.dedup_hdl = zhp->zfs_hdl;
1825		if ((err = pthread_create(&tid, NULL, cksummer, &dda)) != 0) {
1826			(void) close(pipefd[0]);
1827			(void) close(pipefd[1]);
1828			zfs_error_aux(zhp->zfs_hdl, strerror(errno));
1829			return (zfs_error(zhp->zfs_hdl,
1830			    EZFS_THREADCREATEFAILED, errbuf));
1831		}
1832	}
1833
1834	if (flags->replicate || flags->doall || flags->props) {
1835		dmu_replay_record_t drr = { 0 };
1836		char *packbuf = NULL;
1837		size_t buflen = 0;
1838		zio_cksum_t zc = { 0 };
1839
1840		if (flags->replicate || flags->props) {
1841			nvlist_t *hdrnv;
1842
1843			VERIFY(0 == nvlist_alloc(&hdrnv, NV_UNIQUE_NAME, 0));
1844			if (fromsnap) {
1845				VERIFY(0 == nvlist_add_string(hdrnv,
1846				    "fromsnap", fromsnap));
1847			}
1848			VERIFY(0 == nvlist_add_string(hdrnv, "tosnap", tosnap));
1849			if (!flags->replicate) {
1850				VERIFY(0 == nvlist_add_boolean(hdrnv,
1851				    "not_recursive"));
1852			}
1853
1854			err = gather_nvlist(zhp->zfs_hdl, zhp->zfs_name,
1855			    fromsnap, tosnap, flags->replicate,
1856			    flags->replicate, flags->verbose, &fss, &fsavl);
1857			if (err)
1858				goto err_out;
1859			VERIFY(0 == nvlist_add_nvlist(hdrnv, "fss", fss));
1860			err = nvlist_pack(hdrnv, &packbuf, &buflen,
1861			    NV_ENCODE_XDR, 0);
1862			if (debugnvp)
1863				*debugnvp = hdrnv;
1864			else
1865				nvlist_free(hdrnv);
1866			if (err)
1867				goto stderr_out;
1868		}
1869
1870		if (!flags->dryrun) {
1871			/* write first begin record */
1872			drr.drr_type = DRR_BEGIN;
1873			drr.drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
1874			DMU_SET_STREAM_HDRTYPE(drr.drr_u.drr_begin.
1875			    drr_versioninfo, DMU_COMPOUNDSTREAM);
1876			DMU_SET_FEATUREFLAGS(drr.drr_u.drr_begin.
1877			    drr_versioninfo, featureflags);
1878			(void) snprintf(drr.drr_u.drr_begin.drr_toname,
1879			    sizeof (drr.drr_u.drr_begin.drr_toname),
1880			    "%s@%s", zhp->zfs_name, tosnap);
1881			drr.drr_payloadlen = buflen;
1882
1883			err = dump_record(&drr, packbuf, buflen, &zc, outfd);
1884			free(packbuf);
1885			if (err != 0)
1886				goto stderr_out;
1887
1888			/* write end record */
1889			bzero(&drr, sizeof (drr));
1890			drr.drr_type = DRR_END;
1891			drr.drr_u.drr_end.drr_checksum = zc;
1892			err = write(outfd, &drr, sizeof (drr));
1893			if (err == -1) {
1894				err = errno;
1895				goto stderr_out;
1896			}
1897
1898			err = 0;
1899		}
1900	}
1901
1902	/* dump each stream */
1903	sdd.fromsnap = fromsnap;
1904	sdd.tosnap = tosnap;
1905	if (tid != 0)
1906		sdd.outfd = pipefd[0];
1907	else
1908		sdd.outfd = outfd;
1909	sdd.replicate = flags->replicate;
1910	sdd.doall = flags->doall;
1911	sdd.fromorigin = flags->fromorigin;
1912	sdd.fss = fss;
1913	sdd.fsavl = fsavl;
1914	sdd.verbose = flags->verbose;
1915	sdd.parsable = flags->parsable;
1916	sdd.progress = flags->progress;
1917	sdd.progressastitle = flags->progressastitle;
1918	sdd.dryrun = flags->dryrun;
1919	sdd.large_block = flags->largeblock;
1920	sdd.embed_data = flags->embed_data;
1921	sdd.compress = flags->compress;
1922	sdd.filter_cb = filter_func;
1923	sdd.filter_cb_arg = cb_arg;
1924	if (debugnvp)
1925		sdd.debugnv = *debugnvp;
1926	if (sdd.verbose && sdd.dryrun)
1927		sdd.std_out = B_TRUE;
1928	fout = sdd.std_out ? stdout : stderr;
1929
1930	/*
1931	 * Some flags require that we place user holds on the datasets that are
1932	 * being sent so they don't get destroyed during the send. We can skip
1933	 * this step if the pool is imported read-only since the datasets cannot
1934	 * be destroyed.
1935	 */
1936	if (!flags->dryrun && !zpool_get_prop_int(zfs_get_pool_handle(zhp),
1937	    ZPOOL_PROP_READONLY, NULL) &&
1938	    zfs_spa_version(zhp, &spa_version) == 0 &&
1939	    spa_version >= SPA_VERSION_USERREFS &&
1940	    (flags->doall || flags->replicate)) {
1941		++holdseq;
1942		(void) snprintf(sdd.holdtag, sizeof (sdd.holdtag),
1943		    ".send-%d-%llu", getpid(), (u_longlong_t)holdseq);
1944		sdd.cleanup_fd = open(ZFS_DEV, O_RDWR|O_EXCL);
1945		if (sdd.cleanup_fd < 0) {
1946			err = errno;
1947			goto stderr_out;
1948		}
1949		sdd.snapholds = fnvlist_alloc();
1950	} else {
1951		sdd.cleanup_fd = -1;
1952		sdd.snapholds = NULL;
1953	}
1954	if (flags->progress || flags->verbose || sdd.snapholds != NULL) {
1955		/*
1956		 * Do a verbose no-op dry run to get all the verbose output
1957		 * or to gather snapshot hold's before generating any data,
1958		 * then do a non-verbose real run to generate the streams.
1959		 */
1960		sdd.dryrun = B_TRUE;
1961		err = dump_filesystems(zhp, &sdd);
1962
1963		if (err != 0)
1964			goto stderr_out;
1965
1966		if (flags->verbose) {
1967			if (flags->parsable) {
1968				(void) fprintf(fout, "size\t%llu\n",
1969				    (longlong_t)sdd.size);
1970			} else {
1971				char buf[16];
1972				zfs_nicenum(sdd.size, buf, sizeof (buf));
1973				(void) fprintf(fout, dgettext(TEXT_DOMAIN,
1974				    "total estimated size is %s\n"), buf);
1975			}
1976		}
1977
1978		/* Ensure no snaps found is treated as an error. */
1979		if (!sdd.seento) {
1980			err = ENOENT;
1981			goto err_out;
1982		}
1983
1984		/* Skip the second run if dryrun was requested. */
1985		if (flags->dryrun)
1986			goto err_out;
1987
1988		if (sdd.snapholds != NULL) {
1989			err = zfs_hold_nvl(zhp, sdd.cleanup_fd, sdd.snapholds);
1990			if (err != 0)
1991				goto stderr_out;
1992
1993			fnvlist_free(sdd.snapholds);
1994			sdd.snapholds = NULL;
1995		}
1996
1997		sdd.dryrun = B_FALSE;
1998		sdd.verbose = B_FALSE;
1999	}
2000
2001	err = dump_filesystems(zhp, &sdd);
2002	fsavl_destroy(fsavl);
2003	nvlist_free(fss);
2004
2005	/* Ensure no snaps found is treated as an error. */
2006	if (err == 0 && !sdd.seento)
2007		err = ENOENT;
2008
2009	if (tid != 0) {
2010		if (err != 0)
2011			(void) pthread_cancel(tid);
2012		(void) close(pipefd[0]);
2013		(void) pthread_join(tid, NULL);
2014	}
2015
2016	if (sdd.cleanup_fd != -1) {
2017		VERIFY(0 == close(sdd.cleanup_fd));
2018		sdd.cleanup_fd = -1;
2019	}
2020
2021	if (!flags->dryrun && (flags->replicate || flags->doall ||
2022	    flags->props)) {
2023		/*
2024		 * write final end record.  NB: want to do this even if
2025		 * there was some error, because it might not be totally
2026		 * failed.
2027		 */
2028		dmu_replay_record_t drr = { 0 };
2029		drr.drr_type = DRR_END;
2030		if (write(outfd, &drr, sizeof (drr)) == -1) {
2031			return (zfs_standard_error(zhp->zfs_hdl,
2032			    errno, errbuf));
2033		}
2034	}
2035
2036	return (err || sdd.err);
2037
2038stderr_out:
2039	err = zfs_standard_error(zhp->zfs_hdl, err, errbuf);
2040err_out:
2041	fsavl_destroy(fsavl);
2042	nvlist_free(fss);
2043	fnvlist_free(sdd.snapholds);
2044
2045	if (sdd.cleanup_fd != -1)
2046		VERIFY(0 == close(sdd.cleanup_fd));
2047	if (tid != 0) {
2048		(void) pthread_cancel(tid);
2049		(void) close(pipefd[0]);
2050		(void) pthread_join(tid, NULL);
2051	}
2052	return (err);
2053}
2054
2055int
2056zfs_send_one(zfs_handle_t *zhp, const char *from, int fd, sendflags_t flags)
2057{
2058	int err = 0;
2059	libzfs_handle_t *hdl = zhp->zfs_hdl;
2060	enum lzc_send_flags lzc_flags = 0;
2061	FILE *fout = (flags.verbose && flags.dryrun) ? stdout : stderr;
2062	char errbuf[1024];
2063
2064	if (flags.largeblock)
2065		lzc_flags |= LZC_SEND_FLAG_LARGE_BLOCK;
2066	if (flags.embed_data)
2067		lzc_flags |= LZC_SEND_FLAG_EMBED_DATA;
2068	if (flags.compress)
2069		lzc_flags |= LZC_SEND_FLAG_COMPRESS;
2070
2071	if (flags.verbose) {
2072		uint64_t size = 0;
2073		err = lzc_send_space(zhp->zfs_name, from, lzc_flags, &size);
2074		if (err == 0) {
2075			send_print_verbose(fout, zhp->zfs_name, from, size,
2076			    flags.parsable);
2077			if (flags.parsable) {
2078				(void) fprintf(fout, "size\t%llu\n",
2079				    (longlong_t)size);
2080			} else {
2081				char buf[16];
2082				zfs_nicenum(size, buf, sizeof (buf));
2083				(void) fprintf(fout, dgettext(TEXT_DOMAIN,
2084				    "total estimated size is %s\n"), buf);
2085			}
2086		} else {
2087			(void) fprintf(stderr, "Cannot estimate send size: "
2088			    "%s\n", strerror(errno));
2089		}
2090	}
2091
2092	if (flags.dryrun)
2093		return (err);
2094
2095	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2096	    "warning: cannot send '%s'"), zhp->zfs_name);
2097
2098	err = lzc_send(zhp->zfs_name, from, fd, lzc_flags);
2099	if (err != 0) {
2100		switch (errno) {
2101		case EXDEV:
2102			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2103			    "not an earlier snapshot from the same fs"));
2104			return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
2105
2106		case ENOENT:
2107		case ESRCH:
2108			if (lzc_exists(zhp->zfs_name)) {
2109				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2110				    "incremental source (%s) does not exist"),
2111				    from);
2112			}
2113			return (zfs_error(hdl, EZFS_NOENT, errbuf));
2114
2115		case EBUSY:
2116			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2117			    "target is busy; if a filesystem, "
2118			    "it must not be mounted"));
2119			return (zfs_error(hdl, EZFS_BUSY, errbuf));
2120
2121		case EDQUOT:
2122		case EFBIG:
2123		case EIO:
2124		case ENOLINK:
2125		case ENOSPC:
2126#ifdef illumos
2127		case ENOSTR:
2128#endif
2129		case ENXIO:
2130		case EPIPE:
2131		case ERANGE:
2132		case EFAULT:
2133		case EROFS:
2134			zfs_error_aux(hdl, strerror(errno));
2135			return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
2136
2137		default:
2138			return (zfs_standard_error(hdl, errno, errbuf));
2139		}
2140	}
2141	return (err != 0);
2142}
2143
2144/*
2145 * Routines specific to "zfs recv"
2146 */
2147
2148static int
2149recv_read(libzfs_handle_t *hdl, int fd, void *buf, int ilen,
2150    boolean_t byteswap, zio_cksum_t *zc)
2151{
2152	char *cp = buf;
2153	int rv;
2154	int len = ilen;
2155
2156	assert(ilen <= SPA_MAXBLOCKSIZE);
2157
2158	do {
2159		rv = read(fd, cp, len);
2160		cp += rv;
2161		len -= rv;
2162	} while (rv > 0);
2163
2164	if (rv < 0 || len != 0) {
2165		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2166		    "failed to read from stream"));
2167		return (zfs_error(hdl, EZFS_BADSTREAM, dgettext(TEXT_DOMAIN,
2168		    "cannot receive")));
2169	}
2170
2171	if (zc) {
2172		if (byteswap)
2173			(void) fletcher_4_incremental_byteswap(buf, ilen, zc);
2174		else
2175			(void) fletcher_4_incremental_native(buf, ilen, zc);
2176	}
2177	return (0);
2178}
2179
2180static int
2181recv_read_nvlist(libzfs_handle_t *hdl, int fd, int len, nvlist_t **nvp,
2182    boolean_t byteswap, zio_cksum_t *zc)
2183{
2184	char *buf;
2185	int err;
2186
2187	buf = zfs_alloc(hdl, len);
2188	if (buf == NULL)
2189		return (ENOMEM);
2190
2191	err = recv_read(hdl, fd, buf, len, byteswap, zc);
2192	if (err != 0) {
2193		free(buf);
2194		return (err);
2195	}
2196
2197	err = nvlist_unpack(buf, len, nvp, 0);
2198	free(buf);
2199	if (err != 0) {
2200		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
2201		    "stream (malformed nvlist)"));
2202		return (EINVAL);
2203	}
2204	return (0);
2205}
2206
2207static int
2208recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname,
2209    int baselen, char *newname, recvflags_t *flags)
2210{
2211	static int seq;
2212	int err;
2213	prop_changelist_t *clp;
2214	zfs_handle_t *zhp;
2215
2216	zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
2217	if (zhp == NULL)
2218		return (-1);
2219	clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
2220	    flags->force ? MS_FORCE : 0);
2221	zfs_close(zhp);
2222	if (clp == NULL)
2223		return (-1);
2224	err = changelist_prefix(clp);
2225	if (err)
2226		return (err);
2227
2228	if (tryname) {
2229		(void) strcpy(newname, tryname);
2230		if (flags->verbose) {
2231			(void) printf("attempting rename %s to %s\n",
2232			    name, newname);
2233		}
2234		err = lzc_rename(name, newname);
2235		if (err == 0)
2236			changelist_rename(clp, name, tryname);
2237	} else {
2238		err = ENOENT;
2239	}
2240
2241	if (err != 0 && strncmp(name + baselen, "recv-", 5) != 0) {
2242		seq++;
2243
2244		(void) snprintf(newname, ZFS_MAX_DATASET_NAME_LEN,
2245		    "%.*srecv-%u-%u", baselen, name, getpid(), seq);
2246		if (flags->verbose) {
2247			(void) printf("failed - trying rename %s to %s\n",
2248			    name, newname);
2249		}
2250		err = lzc_rename(name, newname);
2251		if (err == 0)
2252			changelist_rename(clp, name, newname);
2253		if (err && flags->verbose) {
2254			(void) printf("failed (%u) - "
2255			    "will try again on next pass\n", errno);
2256		}
2257		err = EAGAIN;
2258	} else if (flags->verbose) {
2259		if (err == 0)
2260			(void) printf("success\n");
2261		else
2262			(void) printf("failed (%u)\n", errno);
2263	}
2264
2265	(void) changelist_postfix(clp);
2266	changelist_free(clp);
2267
2268	return (err);
2269}
2270
2271static int
2272recv_destroy(libzfs_handle_t *hdl, const char *name, int baselen,
2273    char *newname, recvflags_t *flags)
2274{
2275	int err = 0;
2276	prop_changelist_t *clp;
2277	zfs_handle_t *zhp;
2278	boolean_t defer = B_FALSE;
2279	int spa_version;
2280
2281	zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
2282	if (zhp == NULL)
2283		return (-1);
2284	clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
2285	    flags->force ? MS_FORCE : 0);
2286	if (zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT &&
2287	    zfs_spa_version(zhp, &spa_version) == 0 &&
2288	    spa_version >= SPA_VERSION_USERREFS)
2289		defer = B_TRUE;
2290	zfs_close(zhp);
2291	if (clp == NULL)
2292		return (-1);
2293	err = changelist_prefix(clp);
2294	if (err)
2295		return (err);
2296
2297	if (flags->verbose)
2298		(void) printf("attempting destroy %s\n", name);
2299	if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT) {
2300		nvlist_t *nv = fnvlist_alloc();
2301		fnvlist_add_boolean(nv, name);
2302		err = lzc_destroy_snaps(nv, defer, NULL);
2303		fnvlist_free(nv);
2304	} else {
2305		err = lzc_destroy(name);
2306	}
2307	if (err == 0) {
2308		if (flags->verbose)
2309			(void) printf("success\n");
2310		changelist_remove(clp, name);
2311	}
2312
2313	(void) changelist_postfix(clp);
2314	changelist_free(clp);
2315
2316	/*
2317	 * Deferred destroy might destroy the snapshot or only mark it to be
2318	 * destroyed later, and it returns success in either case.
2319	 */
2320	if (err != 0 || (defer && zfs_dataset_exists(hdl, name,
2321	    ZFS_TYPE_SNAPSHOT))) {
2322		err = recv_rename(hdl, name, NULL, baselen, newname, flags);
2323	}
2324
2325	return (err);
2326}
2327
2328typedef struct guid_to_name_data {
2329	uint64_t guid;
2330	boolean_t bookmark_ok;
2331	char *name;
2332	char *skip;
2333} guid_to_name_data_t;
2334
2335static int
2336guid_to_name_cb(zfs_handle_t *zhp, void *arg)
2337{
2338	guid_to_name_data_t *gtnd = arg;
2339	const char *slash;
2340	int err;
2341
2342	if (gtnd->skip != NULL &&
2343	    (slash = strrchr(zhp->zfs_name, '/')) != NULL &&
2344	    strcmp(slash + 1, gtnd->skip) == 0) {
2345		zfs_close(zhp);
2346		return (0);
2347	}
2348
2349	if (zfs_prop_get_int(zhp, ZFS_PROP_GUID) == gtnd->guid) {
2350		(void) strcpy(gtnd->name, zhp->zfs_name);
2351		zfs_close(zhp);
2352		return (EEXIST);
2353	}
2354
2355	err = zfs_iter_children(zhp, guid_to_name_cb, gtnd);
2356	if (err != EEXIST && gtnd->bookmark_ok)
2357		err = zfs_iter_bookmarks(zhp, guid_to_name_cb, gtnd);
2358	zfs_close(zhp);
2359	return (err);
2360}
2361
2362/*
2363 * Attempt to find the local dataset associated with this guid.  In the case of
2364 * multiple matches, we attempt to find the "best" match by searching
2365 * progressively larger portions of the hierarchy.  This allows one to send a
2366 * tree of datasets individually and guarantee that we will find the source
2367 * guid within that hierarchy, even if there are multiple matches elsewhere.
2368 */
2369static int
2370guid_to_name(libzfs_handle_t *hdl, const char *parent, uint64_t guid,
2371    boolean_t bookmark_ok, char *name)
2372{
2373	char pname[ZFS_MAX_DATASET_NAME_LEN];
2374	guid_to_name_data_t gtnd;
2375
2376	gtnd.guid = guid;
2377	gtnd.bookmark_ok = bookmark_ok;
2378	gtnd.name = name;
2379	gtnd.skip = NULL;
2380
2381	/*
2382	 * Search progressively larger portions of the hierarchy, starting
2383	 * with the filesystem specified by 'parent'.  This will
2384	 * select the "most local" version of the origin snapshot in the case
2385	 * that there are multiple matching snapshots in the system.
2386	 */
2387	(void) strlcpy(pname, parent, sizeof (pname));
2388	char *cp = strrchr(pname, '@');
2389	if (cp == NULL)
2390		cp = strchr(pname, '\0');
2391	for (; cp != NULL; cp = strrchr(pname, '/')) {
2392		/* Chop off the last component and open the parent */
2393		*cp = '\0';
2394		zfs_handle_t *zhp = make_dataset_handle(hdl, pname);
2395
2396		if (zhp == NULL)
2397			continue;
2398		int err = guid_to_name_cb(zfs_handle_dup(zhp), &gtnd);
2399		if (err != EEXIST)
2400			err = zfs_iter_children(zhp, guid_to_name_cb, &gtnd);
2401		if (err != EEXIST && bookmark_ok)
2402			err = zfs_iter_bookmarks(zhp, guid_to_name_cb, &gtnd);
2403		zfs_close(zhp);
2404		if (err == EEXIST)
2405			return (0);
2406
2407		/*
2408		 * Remember the last portion of the dataset so we skip it next
2409		 * time through (as we've already searched that portion of the
2410		 * hierarchy).
2411		 */
2412		gtnd.skip = strrchr(pname, '/') + 1;
2413	}
2414
2415	return (ENOENT);
2416}
2417
2418/*
2419 * Return +1 if guid1 is before guid2, 0 if they are the same, and -1 if
2420 * guid1 is after guid2.
2421 */
2422static int
2423created_before(libzfs_handle_t *hdl, avl_tree_t *avl,
2424    uint64_t guid1, uint64_t guid2)
2425{
2426	nvlist_t *nvfs;
2427	char *fsname, *snapname;
2428	char buf[ZFS_MAX_DATASET_NAME_LEN];
2429	int rv;
2430	zfs_handle_t *guid1hdl, *guid2hdl;
2431	uint64_t create1, create2;
2432
2433	if (guid2 == 0)
2434		return (0);
2435	if (guid1 == 0)
2436		return (1);
2437
2438	nvfs = fsavl_find(avl, guid1, &snapname);
2439	VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
2440	(void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname);
2441	guid1hdl = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT);
2442	if (guid1hdl == NULL)
2443		return (-1);
2444
2445	nvfs = fsavl_find(avl, guid2, &snapname);
2446	VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
2447	(void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname);
2448	guid2hdl = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT);
2449	if (guid2hdl == NULL) {
2450		zfs_close(guid1hdl);
2451		return (-1);
2452	}
2453
2454	create1 = zfs_prop_get_int(guid1hdl, ZFS_PROP_CREATETXG);
2455	create2 = zfs_prop_get_int(guid2hdl, ZFS_PROP_CREATETXG);
2456
2457	if (create1 < create2)
2458		rv = -1;
2459	else if (create1 > create2)
2460		rv = +1;
2461	else
2462		rv = 0;
2463
2464	zfs_close(guid1hdl);
2465	zfs_close(guid2hdl);
2466
2467	return (rv);
2468}
2469
2470static int
2471recv_incremental_replication(libzfs_handle_t *hdl, const char *tofs,
2472    recvflags_t *flags, nvlist_t *stream_nv, avl_tree_t *stream_avl,
2473    nvlist_t *renamed)
2474{
2475	nvlist_t *local_nv, *deleted = NULL;
2476	avl_tree_t *local_avl;
2477	nvpair_t *fselem, *nextfselem;
2478	char *fromsnap;
2479	char newname[ZFS_MAX_DATASET_NAME_LEN];
2480	char guidname[32];
2481	int error;
2482	boolean_t needagain, progress, recursive;
2483	char *s1, *s2;
2484
2485	VERIFY(0 == nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap));
2486
2487	recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
2488	    ENOENT);
2489
2490	if (flags->dryrun)
2491		return (0);
2492
2493again:
2494	needagain = progress = B_FALSE;
2495
2496	VERIFY(0 == nvlist_alloc(&deleted, NV_UNIQUE_NAME, 0));
2497
2498	if ((error = gather_nvlist(hdl, tofs, fromsnap, NULL,
2499	    recursive, recursive, B_FALSE, &local_nv, &local_avl)) != 0)
2500		return (error);
2501
2502	/*
2503	 * Process deletes and renames
2504	 */
2505	for (fselem = nvlist_next_nvpair(local_nv, NULL);
2506	    fselem; fselem = nextfselem) {
2507		nvlist_t *nvfs, *snaps;
2508		nvlist_t *stream_nvfs = NULL;
2509		nvpair_t *snapelem, *nextsnapelem;
2510		uint64_t fromguid = 0;
2511		uint64_t originguid = 0;
2512		uint64_t stream_originguid = 0;
2513		uint64_t parent_fromsnap_guid, stream_parent_fromsnap_guid;
2514		char *fsname, *stream_fsname;
2515
2516		nextfselem = nvlist_next_nvpair(local_nv, fselem);
2517
2518		VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs));
2519		VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps));
2520		VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
2521		VERIFY(0 == nvlist_lookup_uint64(nvfs, "parentfromsnap",
2522		    &parent_fromsnap_guid));
2523		(void) nvlist_lookup_uint64(nvfs, "origin", &originguid);
2524
2525		/*
2526		 * First find the stream's fs, so we can check for
2527		 * a different origin (due to "zfs promote")
2528		 */
2529		for (snapelem = nvlist_next_nvpair(snaps, NULL);
2530		    snapelem; snapelem = nvlist_next_nvpair(snaps, snapelem)) {
2531			uint64_t thisguid;
2532
2533			VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid));
2534			stream_nvfs = fsavl_find(stream_avl, thisguid, NULL);
2535
2536			if (stream_nvfs != NULL)
2537				break;
2538		}
2539
2540		/* check for promote */
2541		(void) nvlist_lookup_uint64(stream_nvfs, "origin",
2542		    &stream_originguid);
2543		if (stream_nvfs && originguid != stream_originguid) {
2544			switch (created_before(hdl, local_avl,
2545			    stream_originguid, originguid)) {
2546			case 1: {
2547				/* promote it! */
2548				zfs_cmd_t zc = { 0 };
2549				nvlist_t *origin_nvfs;
2550				char *origin_fsname;
2551
2552				if (flags->verbose)
2553					(void) printf("promoting %s\n", fsname);
2554
2555				origin_nvfs = fsavl_find(local_avl, originguid,
2556				    NULL);
2557				VERIFY(0 == nvlist_lookup_string(origin_nvfs,
2558				    "name", &origin_fsname));
2559				(void) strlcpy(zc.zc_value, origin_fsname,
2560				    sizeof (zc.zc_value));
2561				(void) strlcpy(zc.zc_name, fsname,
2562				    sizeof (zc.zc_name));
2563				error = zfs_ioctl(hdl, ZFS_IOC_PROMOTE, &zc);
2564				if (error == 0)
2565					progress = B_TRUE;
2566				break;
2567			}
2568			default:
2569				break;
2570			case -1:
2571				fsavl_destroy(local_avl);
2572				nvlist_free(local_nv);
2573				return (-1);
2574			}
2575			/*
2576			 * We had/have the wrong origin, therefore our
2577			 * list of snapshots is wrong.  Need to handle
2578			 * them on the next pass.
2579			 */
2580			needagain = B_TRUE;
2581			continue;
2582		}
2583
2584		for (snapelem = nvlist_next_nvpair(snaps, NULL);
2585		    snapelem; snapelem = nextsnapelem) {
2586			uint64_t thisguid;
2587			char *stream_snapname;
2588			nvlist_t *found, *props;
2589
2590			nextsnapelem = nvlist_next_nvpair(snaps, snapelem);
2591
2592			VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid));
2593			found = fsavl_find(stream_avl, thisguid,
2594			    &stream_snapname);
2595
2596			/* check for delete */
2597			if (found == NULL) {
2598				char name[ZFS_MAX_DATASET_NAME_LEN];
2599
2600				if (!flags->force)
2601					continue;
2602
2603				(void) snprintf(name, sizeof (name), "%s@%s",
2604				    fsname, nvpair_name(snapelem));
2605
2606				error = recv_destroy(hdl, name,
2607				    strlen(fsname)+1, newname, flags);
2608				if (error)
2609					needagain = B_TRUE;
2610				else
2611					progress = B_TRUE;
2612				sprintf(guidname, "%" PRIu64, thisguid);
2613				nvlist_add_boolean(deleted, guidname);
2614				continue;
2615			}
2616
2617			stream_nvfs = found;
2618
2619			if (0 == nvlist_lookup_nvlist(stream_nvfs, "snapprops",
2620			    &props) && 0 == nvlist_lookup_nvlist(props,
2621			    stream_snapname, &props)) {
2622				zfs_cmd_t zc = { 0 };
2623
2624				zc.zc_cookie = B_TRUE; /* received */
2625				(void) snprintf(zc.zc_name, sizeof (zc.zc_name),
2626				    "%s@%s", fsname, nvpair_name(snapelem));
2627				if (zcmd_write_src_nvlist(hdl, &zc,
2628				    props) == 0) {
2629					(void) zfs_ioctl(hdl,
2630					    ZFS_IOC_SET_PROP, &zc);
2631					zcmd_free_nvlists(&zc);
2632				}
2633			}
2634
2635			/* check for different snapname */
2636			if (strcmp(nvpair_name(snapelem),
2637			    stream_snapname) != 0) {
2638				char name[ZFS_MAX_DATASET_NAME_LEN];
2639				char tryname[ZFS_MAX_DATASET_NAME_LEN];
2640
2641				(void) snprintf(name, sizeof (name), "%s@%s",
2642				    fsname, nvpair_name(snapelem));
2643				(void) snprintf(tryname, sizeof (name), "%s@%s",
2644				    fsname, stream_snapname);
2645
2646				error = recv_rename(hdl, name, tryname,
2647				    strlen(fsname)+1, newname, flags);
2648				if (error)
2649					needagain = B_TRUE;
2650				else
2651					progress = B_TRUE;
2652			}
2653
2654			if (strcmp(stream_snapname, fromsnap) == 0)
2655				fromguid = thisguid;
2656		}
2657
2658		/* check for delete */
2659		if (stream_nvfs == NULL) {
2660			if (!flags->force)
2661				continue;
2662
2663			error = recv_destroy(hdl, fsname, strlen(tofs)+1,
2664			    newname, flags);
2665			if (error)
2666				needagain = B_TRUE;
2667			else
2668				progress = B_TRUE;
2669			sprintf(guidname, "%" PRIu64, parent_fromsnap_guid);
2670			nvlist_add_boolean(deleted, guidname);
2671			continue;
2672		}
2673
2674		if (fromguid == 0) {
2675			if (flags->verbose) {
2676				(void) printf("local fs %s does not have "
2677				    "fromsnap (%s in stream); must have "
2678				    "been deleted locally; ignoring\n",
2679				    fsname, fromsnap);
2680			}
2681			continue;
2682		}
2683
2684		VERIFY(0 == nvlist_lookup_string(stream_nvfs,
2685		    "name", &stream_fsname));
2686		VERIFY(0 == nvlist_lookup_uint64(stream_nvfs,
2687		    "parentfromsnap", &stream_parent_fromsnap_guid));
2688
2689		s1 = strrchr(fsname, '/');
2690		s2 = strrchr(stream_fsname, '/');
2691
2692		/*
2693		 * Check if we're going to rename based on parent guid change
2694		 * and the current parent guid was also deleted. If it was then
2695		 * rename will fail and is likely unneeded, so avoid this and
2696		 * force an early retry to determine the new
2697		 * parent_fromsnap_guid.
2698		 */
2699		if (stream_parent_fromsnap_guid != 0 &&
2700                    parent_fromsnap_guid != 0 &&
2701                    stream_parent_fromsnap_guid != parent_fromsnap_guid) {
2702			sprintf(guidname, "%" PRIu64, parent_fromsnap_guid);
2703			if (nvlist_exists(deleted, guidname)) {
2704				progress = B_TRUE;
2705				needagain = B_TRUE;
2706				goto doagain;
2707			}
2708		}
2709
2710		/*
2711		 * Check for rename. If the exact receive path is specified, it
2712		 * does not count as a rename, but we still need to check the
2713		 * datasets beneath it.
2714		 */
2715		if ((stream_parent_fromsnap_guid != 0 &&
2716		    parent_fromsnap_guid != 0 &&
2717		    stream_parent_fromsnap_guid != parent_fromsnap_guid) ||
2718		    ((flags->isprefix || strcmp(tofs, fsname) != 0) &&
2719		    (s1 != NULL) && (s2 != NULL) && strcmp(s1, s2) != 0)) {
2720			nvlist_t *parent;
2721			char tryname[ZFS_MAX_DATASET_NAME_LEN];
2722
2723			parent = fsavl_find(local_avl,
2724			    stream_parent_fromsnap_guid, NULL);
2725			/*
2726			 * NB: parent might not be found if we used the
2727			 * tosnap for stream_parent_fromsnap_guid,
2728			 * because the parent is a newly-created fs;
2729			 * we'll be able to rename it after we recv the
2730			 * new fs.
2731			 */
2732			if (parent != NULL) {
2733				char *pname;
2734
2735				VERIFY(0 == nvlist_lookup_string(parent, "name",
2736				    &pname));
2737				(void) snprintf(tryname, sizeof (tryname),
2738				    "%s%s", pname, strrchr(stream_fsname, '/'));
2739			} else {
2740				tryname[0] = '\0';
2741				if (flags->verbose) {
2742					(void) printf("local fs %s new parent "
2743					    "not found\n", fsname);
2744				}
2745			}
2746
2747			newname[0] = '\0';
2748
2749			error = recv_rename(hdl, fsname, tryname,
2750			    strlen(tofs)+1, newname, flags);
2751
2752			if (renamed != NULL && newname[0] != '\0') {
2753				VERIFY(0 == nvlist_add_boolean(renamed,
2754				    newname));
2755			}
2756
2757			if (error)
2758				needagain = B_TRUE;
2759			else
2760				progress = B_TRUE;
2761		}
2762	}
2763
2764doagain:
2765	fsavl_destroy(local_avl);
2766	nvlist_free(local_nv);
2767	nvlist_free(deleted);
2768
2769	if (needagain && progress) {
2770		/* do another pass to fix up temporary names */
2771		if (flags->verbose)
2772			(void) printf("another pass:\n");
2773		goto again;
2774	}
2775
2776	return (needagain);
2777}
2778
2779static int
2780zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname,
2781    recvflags_t *flags, dmu_replay_record_t *drr, zio_cksum_t *zc,
2782    char **top_zfs, int cleanup_fd, uint64_t *action_handlep)
2783{
2784	nvlist_t *stream_nv = NULL;
2785	avl_tree_t *stream_avl = NULL;
2786	char *fromsnap = NULL;
2787	char *sendsnap = NULL;
2788	char *cp;
2789	char tofs[ZFS_MAX_DATASET_NAME_LEN];
2790	char sendfs[ZFS_MAX_DATASET_NAME_LEN];
2791	char errbuf[1024];
2792	dmu_replay_record_t drre;
2793	int error;
2794	boolean_t anyerr = B_FALSE;
2795	boolean_t softerr = B_FALSE;
2796	boolean_t recursive;
2797
2798	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2799	    "cannot receive"));
2800
2801	assert(drr->drr_type == DRR_BEGIN);
2802	assert(drr->drr_u.drr_begin.drr_magic == DMU_BACKUP_MAGIC);
2803	assert(DMU_GET_STREAM_HDRTYPE(drr->drr_u.drr_begin.drr_versioninfo) ==
2804	    DMU_COMPOUNDSTREAM);
2805
2806	/*
2807	 * Read in the nvlist from the stream.
2808	 */
2809	if (drr->drr_payloadlen != 0) {
2810		error = recv_read_nvlist(hdl, fd, drr->drr_payloadlen,
2811		    &stream_nv, flags->byteswap, zc);
2812		if (error) {
2813			error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
2814			goto out;
2815		}
2816	}
2817
2818	recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
2819	    ENOENT);
2820
2821	if (recursive && strchr(destname, '@')) {
2822		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2823		    "cannot specify snapshot name for multi-snapshot stream"));
2824		error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
2825		goto out;
2826	}
2827
2828	/*
2829	 * Read in the end record and verify checksum.
2830	 */
2831	if (0 != (error = recv_read(hdl, fd, &drre, sizeof (drre),
2832	    flags->byteswap, NULL)))
2833		goto out;
2834	if (flags->byteswap) {
2835		drre.drr_type = BSWAP_32(drre.drr_type);
2836		drre.drr_u.drr_end.drr_checksum.zc_word[0] =
2837		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[0]);
2838		drre.drr_u.drr_end.drr_checksum.zc_word[1] =
2839		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[1]);
2840		drre.drr_u.drr_end.drr_checksum.zc_word[2] =
2841		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[2]);
2842		drre.drr_u.drr_end.drr_checksum.zc_word[3] =
2843		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[3]);
2844	}
2845	if (drre.drr_type != DRR_END) {
2846		error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
2847		goto out;
2848	}
2849	if (!ZIO_CHECKSUM_EQUAL(drre.drr_u.drr_end.drr_checksum, *zc)) {
2850		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2851		    "incorrect header checksum"));
2852		error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
2853		goto out;
2854	}
2855
2856	(void) nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap);
2857
2858	if (drr->drr_payloadlen != 0) {
2859		nvlist_t *stream_fss;
2860
2861		VERIFY(0 == nvlist_lookup_nvlist(stream_nv, "fss",
2862		    &stream_fss));
2863		if ((stream_avl = fsavl_create(stream_fss)) == NULL) {
2864			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2865			    "couldn't allocate avl tree"));
2866			error = zfs_error(hdl, EZFS_NOMEM, errbuf);
2867			goto out;
2868		}
2869
2870		if (fromsnap != NULL && recursive) {
2871			nvlist_t *renamed = NULL;
2872			nvpair_t *pair = NULL;
2873
2874			(void) strlcpy(tofs, destname, sizeof (tofs));
2875			if (flags->isprefix) {
2876				struct drr_begin *drrb = &drr->drr_u.drr_begin;
2877				int i;
2878
2879				if (flags->istail) {
2880					cp = strrchr(drrb->drr_toname, '/');
2881					if (cp == NULL) {
2882						(void) strlcat(tofs, "/",
2883						    sizeof (tofs));
2884						i = 0;
2885					} else {
2886						i = (cp - drrb->drr_toname);
2887					}
2888				} else {
2889					i = strcspn(drrb->drr_toname, "/@");
2890				}
2891				/* zfs_receive_one() will create_parents() */
2892				(void) strlcat(tofs, &drrb->drr_toname[i],
2893				    sizeof (tofs));
2894				*strchr(tofs, '@') = '\0';
2895			}
2896
2897			if (!flags->dryrun && !flags->nomount) {
2898				VERIFY(0 == nvlist_alloc(&renamed,
2899				    NV_UNIQUE_NAME, 0));
2900			}
2901
2902			softerr = recv_incremental_replication(hdl, tofs, flags,
2903			    stream_nv, stream_avl, renamed);
2904
2905			/* Unmount renamed filesystems before receiving. */
2906			while ((pair = nvlist_next_nvpair(renamed,
2907			    pair)) != NULL) {
2908				zfs_handle_t *zhp;
2909				prop_changelist_t *clp = NULL;
2910
2911				zhp = zfs_open(hdl, nvpair_name(pair),
2912				    ZFS_TYPE_FILESYSTEM);
2913				if (zhp != NULL) {
2914					clp = changelist_gather(zhp,
2915					    ZFS_PROP_MOUNTPOINT, 0, 0);
2916					zfs_close(zhp);
2917					if (clp != NULL) {
2918						softerr |=
2919						    changelist_prefix(clp);
2920						changelist_free(clp);
2921					}
2922				}
2923			}
2924
2925			nvlist_free(renamed);
2926		}
2927	}
2928
2929	/*
2930	 * Get the fs specified by the first path in the stream (the top level
2931	 * specified by 'zfs send') and pass it to each invocation of
2932	 * zfs_receive_one().
2933	 */
2934	(void) strlcpy(sendfs, drr->drr_u.drr_begin.drr_toname,
2935	    sizeof (sendfs));
2936	if ((cp = strchr(sendfs, '@')) != NULL) {
2937		*cp = '\0';
2938		/*
2939		 * Find the "sendsnap", the final snapshot in a replication
2940		 * stream.  zfs_receive_one() handles certain errors
2941		 * differently, depending on if the contained stream is the
2942		 * last one or not.
2943		 */
2944		sendsnap = (cp + 1);
2945	}
2946
2947	/* Finally, receive each contained stream */
2948	do {
2949		/*
2950		 * we should figure out if it has a recoverable
2951		 * error, in which case do a recv_skip() and drive on.
2952		 * Note, if we fail due to already having this guid,
2953		 * zfs_receive_one() will take care of it (ie,
2954		 * recv_skip() and return 0).
2955		 */
2956		error = zfs_receive_impl(hdl, destname, NULL, flags, fd,
2957		    sendfs, stream_nv, stream_avl, top_zfs, cleanup_fd,
2958		    action_handlep, sendsnap);
2959		if (error == ENODATA) {
2960			error = 0;
2961			break;
2962		}
2963		anyerr |= error;
2964	} while (error == 0);
2965
2966	if (drr->drr_payloadlen != 0 && recursive && fromsnap != NULL) {
2967		/*
2968		 * Now that we have the fs's they sent us, try the
2969		 * renames again.
2970		 */
2971		softerr = recv_incremental_replication(hdl, tofs, flags,
2972		    stream_nv, stream_avl, NULL);
2973	}
2974
2975out:
2976	fsavl_destroy(stream_avl);
2977	nvlist_free(stream_nv);
2978	if (softerr)
2979		error = -2;
2980	if (anyerr)
2981		error = -1;
2982	return (error);
2983}
2984
2985static void
2986trunc_prop_errs(int truncated)
2987{
2988	ASSERT(truncated != 0);
2989
2990	if (truncated == 1)
2991		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
2992		    "1 more property could not be set\n"));
2993	else
2994		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
2995		    "%d more properties could not be set\n"), truncated);
2996}
2997
2998static int
2999recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap)
3000{
3001	dmu_replay_record_t *drr;
3002	void *buf = zfs_alloc(hdl, SPA_MAXBLOCKSIZE);
3003	char errbuf[1024];
3004
3005	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
3006	    "cannot receive:"));
3007
3008	/* XXX would be great to use lseek if possible... */
3009	drr = buf;
3010
3011	while (recv_read(hdl, fd, drr, sizeof (dmu_replay_record_t),
3012	    byteswap, NULL) == 0) {
3013		if (byteswap)
3014			drr->drr_type = BSWAP_32(drr->drr_type);
3015
3016		switch (drr->drr_type) {
3017		case DRR_BEGIN:
3018			if (drr->drr_payloadlen != 0) {
3019				(void) recv_read(hdl, fd, buf,
3020				    drr->drr_payloadlen, B_FALSE, NULL);
3021			}
3022			break;
3023
3024		case DRR_END:
3025			free(buf);
3026			return (0);
3027
3028		case DRR_OBJECT:
3029			if (byteswap) {
3030				drr->drr_u.drr_object.drr_bonuslen =
3031				    BSWAP_32(drr->drr_u.drr_object.
3032				    drr_bonuslen);
3033			}
3034			(void) recv_read(hdl, fd, buf,
3035			    P2ROUNDUP(drr->drr_u.drr_object.drr_bonuslen, 8),
3036			    B_FALSE, NULL);
3037			break;
3038
3039		case DRR_WRITE:
3040			if (byteswap) {
3041				drr->drr_u.drr_write.drr_logical_size =
3042				    BSWAP_64(
3043				    drr->drr_u.drr_write.drr_logical_size);
3044				drr->drr_u.drr_write.drr_compressed_size =
3045				    BSWAP_64(
3046				    drr->drr_u.drr_write.drr_compressed_size);
3047			}
3048			uint64_t payload_size =
3049			    DRR_WRITE_PAYLOAD_SIZE(&drr->drr_u.drr_write);
3050			(void) recv_read(hdl, fd, buf,
3051			    payload_size, B_FALSE, NULL);
3052			break;
3053		case DRR_SPILL:
3054			if (byteswap) {
3055				drr->drr_u.drr_spill.drr_length =
3056				    BSWAP_64(drr->drr_u.drr_spill.drr_length);
3057			}
3058			(void) recv_read(hdl, fd, buf,
3059			    drr->drr_u.drr_spill.drr_length, B_FALSE, NULL);
3060			break;
3061		case DRR_WRITE_EMBEDDED:
3062			if (byteswap) {
3063				drr->drr_u.drr_write_embedded.drr_psize =
3064				    BSWAP_32(drr->drr_u.drr_write_embedded.
3065				    drr_psize);
3066			}
3067			(void) recv_read(hdl, fd, buf,
3068			    P2ROUNDUP(drr->drr_u.drr_write_embedded.drr_psize,
3069			    8), B_FALSE, NULL);
3070			break;
3071		case DRR_WRITE_BYREF:
3072		case DRR_FREEOBJECTS:
3073		case DRR_FREE:
3074			break;
3075
3076		default:
3077			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3078			    "invalid record type"));
3079			return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
3080		}
3081	}
3082
3083	free(buf);
3084	return (-1);
3085}
3086
3087static void
3088recv_ecksum_set_aux(libzfs_handle_t *hdl, const char *target_snap,
3089    boolean_t resumable)
3090{
3091	char target_fs[ZFS_MAX_DATASET_NAME_LEN];
3092
3093	zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3094	    "checksum mismatch or incomplete stream"));
3095
3096	if (!resumable)
3097		return;
3098	(void) strlcpy(target_fs, target_snap, sizeof (target_fs));
3099	*strchr(target_fs, '@') = '\0';
3100	zfs_handle_t *zhp = zfs_open(hdl, target_fs,
3101	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
3102	if (zhp == NULL)
3103		return;
3104
3105	char token_buf[ZFS_MAXPROPLEN];
3106	int error = zfs_prop_get(zhp, ZFS_PROP_RECEIVE_RESUME_TOKEN,
3107	    token_buf, sizeof (token_buf),
3108	    NULL, NULL, 0, B_TRUE);
3109	if (error == 0) {
3110		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3111		    "checksum mismatch or incomplete stream.\n"
3112		    "Partially received snapshot is saved.\n"
3113		    "A resuming stream can be generated on the sending "
3114		    "system by running:\n"
3115		    "    zfs send -t %s"),
3116		    token_buf);
3117	}
3118	zfs_close(zhp);
3119}
3120
3121/*
3122 * Restores a backup of tosnap from the file descriptor specified by infd.
3123 */
3124static int
3125zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
3126    const char *originsnap, recvflags_t *flags, dmu_replay_record_t *drr,
3127    dmu_replay_record_t *drr_noswap, const char *sendfs, nvlist_t *stream_nv,
3128    avl_tree_t *stream_avl, char **top_zfs, int cleanup_fd,
3129    uint64_t *action_handlep, const char *finalsnap)
3130{
3131	zfs_cmd_t zc = { 0 };
3132	time_t begin_time;
3133	int ioctl_err, ioctl_errno, err;
3134	char *cp;
3135	struct drr_begin *drrb = &drr->drr_u.drr_begin;
3136	char errbuf[1024];
3137	char prop_errbuf[1024];
3138	const char *chopprefix;
3139	boolean_t newfs = B_FALSE;
3140	boolean_t stream_wantsnewfs, stream_resumingnewfs;
3141	uint64_t parent_snapguid = 0;
3142	prop_changelist_t *clp = NULL;
3143	nvlist_t *snapprops_nvlist = NULL;
3144	zprop_errflags_t prop_errflags;
3145	boolean_t recursive;
3146	char *snapname = NULL;
3147
3148	begin_time = time(NULL);
3149
3150	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
3151	    "cannot receive"));
3152
3153	recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
3154	    ENOENT);
3155
3156	if (stream_avl != NULL) {
3157		nvlist_t *fs = fsavl_find(stream_avl, drrb->drr_toguid,
3158		    &snapname);
3159		nvlist_t *props;
3160		int ret;
3161
3162		(void) nvlist_lookup_uint64(fs, "parentfromsnap",
3163		    &parent_snapguid);
3164		err = nvlist_lookup_nvlist(fs, "props", &props);
3165		if (err)
3166			VERIFY(0 == nvlist_alloc(&props, NV_UNIQUE_NAME, 0));
3167
3168		if (flags->canmountoff) {
3169			VERIFY(0 == nvlist_add_uint64(props,
3170			    zfs_prop_to_name(ZFS_PROP_CANMOUNT), 0));
3171		}
3172		ret = zcmd_write_src_nvlist(hdl, &zc, props);
3173		if (err)
3174			nvlist_free(props);
3175
3176		if (0 == nvlist_lookup_nvlist(fs, "snapprops", &props)) {
3177			VERIFY(0 == nvlist_lookup_nvlist(props,
3178			    snapname, &snapprops_nvlist));
3179		}
3180
3181		if (ret != 0)
3182			return (-1);
3183	}
3184
3185	cp = NULL;
3186
3187	/*
3188	 * Determine how much of the snapshot name stored in the stream
3189	 * we are going to tack on to the name they specified on the
3190	 * command line, and how much we are going to chop off.
3191	 *
3192	 * If they specified a snapshot, chop the entire name stored in
3193	 * the stream.
3194	 */
3195	if (flags->istail) {
3196		/*
3197		 * A filesystem was specified with -e. We want to tack on only
3198		 * the tail of the sent snapshot path.
3199		 */
3200		if (strchr(tosnap, '@')) {
3201			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
3202			    "argument - snapshot not allowed with -e"));
3203			return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
3204		}
3205
3206		chopprefix = strrchr(sendfs, '/');
3207
3208		if (chopprefix == NULL) {
3209			/*
3210			 * The tail is the poolname, so we need to
3211			 * prepend a path separator.
3212			 */
3213			int len = strlen(drrb->drr_toname);
3214			cp = malloc(len + 2);
3215			cp[0] = '/';
3216			(void) strcpy(&cp[1], drrb->drr_toname);
3217			chopprefix = cp;
3218		} else {
3219			chopprefix = drrb->drr_toname + (chopprefix - sendfs);
3220		}
3221	} else if (flags->isprefix) {
3222		/*
3223		 * A filesystem was specified with -d. We want to tack on
3224		 * everything but the first element of the sent snapshot path
3225		 * (all but the pool name).
3226		 */
3227		if (strchr(tosnap, '@')) {
3228			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
3229			    "argument - snapshot not allowed with -d"));
3230			return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
3231		}
3232
3233		chopprefix = strchr(drrb->drr_toname, '/');
3234		if (chopprefix == NULL)
3235			chopprefix = strchr(drrb->drr_toname, '@');
3236	} else if (strchr(tosnap, '@') == NULL) {
3237		/*
3238		 * If a filesystem was specified without -d or -e, we want to
3239		 * tack on everything after the fs specified by 'zfs send'.
3240		 */
3241		chopprefix = drrb->drr_toname + strlen(sendfs);
3242	} else {
3243		/* A snapshot was specified as an exact path (no -d or -e). */
3244		if (recursive) {
3245			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3246			    "cannot specify snapshot name for multi-snapshot "
3247			    "stream"));
3248			return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
3249		}
3250		chopprefix = drrb->drr_toname + strlen(drrb->drr_toname);
3251	}
3252
3253	ASSERT(strstr(drrb->drr_toname, sendfs) == drrb->drr_toname);
3254	ASSERT(chopprefix > drrb->drr_toname);
3255	ASSERT(chopprefix <= drrb->drr_toname + strlen(drrb->drr_toname));
3256	ASSERT(chopprefix[0] == '/' || chopprefix[0] == '@' ||
3257	    chopprefix[0] == '\0');
3258
3259	/*
3260	 * Determine name of destination snapshot, store in zc_value.
3261	 */
3262	(void) strcpy(zc.zc_value, tosnap);
3263	(void) strncat(zc.zc_value, chopprefix, sizeof (zc.zc_value));
3264#ifdef __FreeBSD__
3265	if (zfs_ioctl_version == ZFS_IOCVER_UNDEF)
3266		zfs_ioctl_version = get_zfs_ioctl_version();
3267	/*
3268	 * For forward compatibility hide tosnap in zc_value
3269	 */
3270	if (zfs_ioctl_version < ZFS_IOCVER_LZC)
3271		(void) strcpy(zc.zc_value + strlen(zc.zc_value) + 1, tosnap);
3272#endif
3273	free(cp);
3274	if (!zfs_name_valid(zc.zc_value, ZFS_TYPE_SNAPSHOT)) {
3275		zcmd_free_nvlists(&zc);
3276		return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
3277	}
3278
3279	/*
3280	 * Determine the name of the origin snapshot, store in zc_string.
3281	 */
3282	if (originsnap) {
3283		(void) strncpy(zc.zc_string, originsnap, sizeof (zc.zc_string));
3284		if (flags->verbose)
3285			(void) printf("using provided clone origin %s\n",
3286			    zc.zc_string);
3287	} else if (drrb->drr_flags & DRR_FLAG_CLONE) {
3288		if (guid_to_name(hdl, zc.zc_value,
3289		    drrb->drr_fromguid, B_FALSE, zc.zc_string) != 0) {
3290			zcmd_free_nvlists(&zc);
3291			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3292			    "local origin for clone %s does not exist"),
3293			    zc.zc_value);
3294			return (zfs_error(hdl, EZFS_NOENT, errbuf));
3295		}
3296		if (flags->verbose)
3297			(void) printf("found clone origin %s\n", zc.zc_string);
3298	}
3299
3300	boolean_t resuming = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
3301	    DMU_BACKUP_FEATURE_RESUMING;
3302	stream_wantsnewfs = (drrb->drr_fromguid == 0 ||
3303	    (drrb->drr_flags & DRR_FLAG_CLONE) || originsnap) && !resuming;
3304	stream_resumingnewfs = (drrb->drr_fromguid == 0 ||
3305	    (drrb->drr_flags & DRR_FLAG_CLONE) || originsnap) && resuming;
3306
3307	if (stream_wantsnewfs) {
3308		/*
3309		 * if the parent fs does not exist, look for it based on
3310		 * the parent snap GUID
3311		 */
3312		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
3313		    "cannot receive new filesystem stream"));
3314
3315		(void) strcpy(zc.zc_name, zc.zc_value);
3316		cp = strrchr(zc.zc_name, '/');
3317		if (cp)
3318			*cp = '\0';
3319		if (cp &&
3320		    !zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
3321			char suffix[ZFS_MAX_DATASET_NAME_LEN];
3322			(void) strcpy(suffix, strrchr(zc.zc_value, '/'));
3323			if (guid_to_name(hdl, zc.zc_name, parent_snapguid,
3324			    B_FALSE, zc.zc_value) == 0) {
3325				*strchr(zc.zc_value, '@') = '\0';
3326				(void) strcat(zc.zc_value, suffix);
3327			}
3328		}
3329	} else {
3330		/*
3331		 * If the fs does not exist, look for it based on the
3332		 * fromsnap GUID.
3333		 */
3334		if (resuming) {
3335			(void) snprintf(errbuf, sizeof (errbuf),
3336			    dgettext(TEXT_DOMAIN,
3337			    "cannot receive resume stream"));
3338		} else {
3339			(void) snprintf(errbuf, sizeof (errbuf),
3340			    dgettext(TEXT_DOMAIN,
3341			    "cannot receive incremental stream"));
3342		}
3343
3344		(void) strcpy(zc.zc_name, zc.zc_value);
3345		*strchr(zc.zc_name, '@') = '\0';
3346
3347		/*
3348		 * If the exact receive path was specified and this is the
3349		 * topmost path in the stream, then if the fs does not exist we
3350		 * should look no further.
3351		 */
3352		if ((flags->isprefix || (*(chopprefix = drrb->drr_toname +
3353		    strlen(sendfs)) != '\0' && *chopprefix != '@')) &&
3354		    !zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
3355			char snap[ZFS_MAX_DATASET_NAME_LEN];
3356			(void) strcpy(snap, strchr(zc.zc_value, '@'));
3357			if (guid_to_name(hdl, zc.zc_name, drrb->drr_fromguid,
3358			    B_FALSE, zc.zc_value) == 0) {
3359				*strchr(zc.zc_value, '@') = '\0';
3360				(void) strcat(zc.zc_value, snap);
3361			}
3362		}
3363	}
3364
3365	(void) strcpy(zc.zc_name, zc.zc_value);
3366	*strchr(zc.zc_name, '@') = '\0';
3367
3368	if (zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
3369		zfs_handle_t *zhp;
3370
3371		/*
3372		 * Destination fs exists.  It must be one of these cases:
3373		 *  - an incremental send stream
3374		 *  - the stream specifies a new fs (full stream or clone)
3375		 *    and they want us to blow away the existing fs (and
3376		 *    have therefore specified -F and removed any snapshots)
3377		 *  - we are resuming a failed receive.
3378		 */
3379		if (stream_wantsnewfs) {
3380			boolean_t is_volume = drrb->drr_type == DMU_OST_ZVOL;
3381			if (!flags->force) {
3382				zcmd_free_nvlists(&zc);
3383				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3384				    "destination '%s' exists\n"
3385				    "must specify -F to overwrite it"),
3386				    zc.zc_name);
3387				return (zfs_error(hdl, EZFS_EXISTS, errbuf));
3388			}
3389			if (ioctl(hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT,
3390			    &zc) == 0) {
3391				zcmd_free_nvlists(&zc);
3392				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3393				    "destination has snapshots (eg. %s)\n"
3394				    "must destroy them to overwrite it"),
3395				    zc.zc_name);
3396				return (zfs_error(hdl, EZFS_EXISTS, errbuf));
3397			}
3398			if (is_volume && strrchr(zc.zc_name, '/') == NULL) {
3399				zcmd_free_nvlists(&zc);
3400				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3401				    "destination '%s' is the root dataset\n"
3402				    "cannot overwrite with a ZVOL"),
3403				    zc.zc_name);
3404				return (zfs_error(hdl, EZFS_EXISTS, errbuf));
3405			}
3406			if (is_volume &&
3407			    ioctl(hdl->libzfs_fd, ZFS_IOC_DATASET_LIST_NEXT,
3408			    &zc) == 0) {
3409				zcmd_free_nvlists(&zc);
3410				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3411				    "destination has children (eg. %s)\n"
3412				    "cannot overwrite with a ZVOL"),
3413				    zc.zc_name);
3414				return (zfs_error(hdl, EZFS_WRONG_PARENT,
3415				    errbuf));
3416			}
3417		}
3418
3419		if ((zhp = zfs_open(hdl, zc.zc_name,
3420		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME)) == NULL) {
3421			zcmd_free_nvlists(&zc);
3422			return (-1);
3423		}
3424
3425		if (stream_wantsnewfs &&
3426		    zhp->zfs_dmustats.dds_origin[0]) {
3427			zcmd_free_nvlists(&zc);
3428			zfs_close(zhp);
3429			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3430			    "destination '%s' is a clone\n"
3431			    "must destroy it to overwrite it"),
3432			    zc.zc_name);
3433			return (zfs_error(hdl, EZFS_EXISTS, errbuf));
3434		}
3435
3436		if (!flags->dryrun && zhp->zfs_type == ZFS_TYPE_FILESYSTEM &&
3437		    (stream_wantsnewfs || stream_resumingnewfs)) {
3438			/* We can't do online recv in this case */
3439			clp = changelist_gather(zhp, ZFS_PROP_NAME, 0, 0);
3440			if (clp == NULL) {
3441				zfs_close(zhp);
3442				zcmd_free_nvlists(&zc);
3443				return (-1);
3444			}
3445			if (changelist_prefix(clp) != 0) {
3446				changelist_free(clp);
3447				zfs_close(zhp);
3448				zcmd_free_nvlists(&zc);
3449				return (-1);
3450			}
3451		}
3452
3453		/*
3454		 * If we are resuming a newfs, set newfs here so that we will
3455		 * mount it if the recv succeeds this time.  We can tell
3456		 * that it was a newfs on the first recv because the fs
3457		 * itself will be inconsistent (if the fs existed when we
3458		 * did the first recv, we would have received it into
3459		 * .../%recv).
3460		 */
3461		if (resuming && zfs_prop_get_int(zhp, ZFS_PROP_INCONSISTENT))
3462			newfs = B_TRUE;
3463
3464		zfs_close(zhp);
3465	} else {
3466		zfs_handle_t *zhp;
3467
3468		/*
3469		 * Destination filesystem does not exist.  Therefore we better
3470		 * be creating a new filesystem (either from a full backup, or
3471		 * a clone).  It would therefore be invalid if the user
3472		 * specified only the pool name (i.e. if the destination name
3473		 * contained no slash character).
3474		 */
3475		if (!stream_wantsnewfs ||
3476		    (cp = strrchr(zc.zc_name, '/')) == NULL) {
3477			zcmd_free_nvlists(&zc);
3478			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3479			    "destination '%s' does not exist"), zc.zc_name);
3480			return (zfs_error(hdl, EZFS_NOENT, errbuf));
3481		}
3482
3483		/*
3484		 * Trim off the final dataset component so we perform the
3485		 * recvbackup ioctl to the filesystems's parent.
3486		 */
3487		*cp = '\0';
3488
3489		if (flags->isprefix && !flags->istail && !flags->dryrun &&
3490		    create_parents(hdl, zc.zc_value, strlen(tosnap)) != 0) {
3491			zcmd_free_nvlists(&zc);
3492			return (zfs_error(hdl, EZFS_BADRESTORE, errbuf));
3493		}
3494
3495		/* validate parent */
3496		zhp = zfs_open(hdl, zc.zc_name, ZFS_TYPE_DATASET);
3497		if (zhp == NULL) {
3498			zcmd_free_nvlists(&zc);
3499			return (zfs_error(hdl, EZFS_BADRESTORE, errbuf));
3500		}
3501		if (zfs_get_type(zhp) != ZFS_TYPE_FILESYSTEM) {
3502			zcmd_free_nvlists(&zc);
3503			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3504			    "parent '%s' is not a filesystem"), zc.zc_name);
3505			zfs_close(zhp);
3506			return (zfs_error(hdl, EZFS_WRONG_PARENT, errbuf));
3507		}
3508		zfs_close(zhp);
3509
3510		newfs = B_TRUE;
3511	}
3512
3513	zc.zc_begin_record = *drr_noswap;
3514	zc.zc_cookie = infd;
3515	zc.zc_guid = flags->force;
3516	zc.zc_resumable = flags->resumable;
3517	if (flags->verbose) {
3518		(void) printf("%s %s stream of %s into %s\n",
3519		    flags->dryrun ? "would receive" : "receiving",
3520		    drrb->drr_fromguid ? "incremental" : "full",
3521		    drrb->drr_toname, zc.zc_value);
3522		(void) fflush(stdout);
3523	}
3524
3525	if (flags->dryrun) {
3526		zcmd_free_nvlists(&zc);
3527		return (recv_skip(hdl, infd, flags->byteswap));
3528	}
3529
3530	zc.zc_nvlist_dst = (uint64_t)(uintptr_t)prop_errbuf;
3531	zc.zc_nvlist_dst_size = sizeof (prop_errbuf);
3532	zc.zc_cleanup_fd = cleanup_fd;
3533	zc.zc_action_handle = *action_handlep;
3534
3535	err = ioctl_err = zfs_ioctl(hdl, ZFS_IOC_RECV, &zc);
3536	ioctl_errno = errno;
3537	prop_errflags = (zprop_errflags_t)zc.zc_obj;
3538
3539	if (err == 0) {
3540		nvlist_t *prop_errors;
3541		VERIFY(0 == nvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst,
3542		    zc.zc_nvlist_dst_size, &prop_errors, 0));
3543
3544		nvpair_t *prop_err = NULL;
3545
3546		while ((prop_err = nvlist_next_nvpair(prop_errors,
3547		    prop_err)) != NULL) {
3548			char tbuf[1024];
3549			zfs_prop_t prop;
3550			int intval;
3551
3552			prop = zfs_name_to_prop(nvpair_name(prop_err));
3553			(void) nvpair_value_int32(prop_err, &intval);
3554			if (strcmp(nvpair_name(prop_err),
3555			    ZPROP_N_MORE_ERRORS) == 0) {
3556				trunc_prop_errs(intval);
3557				break;
3558			} else if (snapname == NULL || finalsnap == NULL ||
3559			    strcmp(finalsnap, snapname) == 0 ||
3560			    strcmp(nvpair_name(prop_err),
3561			    zfs_prop_to_name(ZFS_PROP_REFQUOTA)) != 0) {
3562				/*
3563				 * Skip the special case of, for example,
3564				 * "refquota", errors on intermediate
3565				 * snapshots leading up to a final one.
3566				 * That's why we have all of the checks above.
3567				 *
3568				 * See zfs_ioctl.c's extract_delay_props() for
3569				 * a list of props which can fail on
3570				 * intermediate snapshots, but shouldn't
3571				 * affect the overall receive.
3572				 */
3573				(void) snprintf(tbuf, sizeof (tbuf),
3574				    dgettext(TEXT_DOMAIN,
3575				    "cannot receive %s property on %s"),
3576				    nvpair_name(prop_err), zc.zc_name);
3577				zfs_setprop_error(hdl, prop, intval, tbuf);
3578			}
3579		}
3580		nvlist_free(prop_errors);
3581	}
3582
3583	zc.zc_nvlist_dst = 0;
3584	zc.zc_nvlist_dst_size = 0;
3585	zcmd_free_nvlists(&zc);
3586
3587	if (err == 0 && snapprops_nvlist) {
3588		zfs_cmd_t zc2 = { 0 };
3589
3590		(void) strcpy(zc2.zc_name, zc.zc_value);
3591		zc2.zc_cookie = B_TRUE; /* received */
3592		if (zcmd_write_src_nvlist(hdl, &zc2, snapprops_nvlist) == 0) {
3593			(void) zfs_ioctl(hdl, ZFS_IOC_SET_PROP, &zc2);
3594			zcmd_free_nvlists(&zc2);
3595		}
3596	}
3597
3598	if (err && (ioctl_errno == ENOENT || ioctl_errno == EEXIST)) {
3599		/*
3600		 * It may be that this snapshot already exists,
3601		 * in which case we want to consume & ignore it
3602		 * rather than failing.
3603		 */
3604		avl_tree_t *local_avl;
3605		nvlist_t *local_nv, *fs;
3606		cp = strchr(zc.zc_value, '@');
3607
3608		/*
3609		 * XXX Do this faster by just iterating over snaps in
3610		 * this fs.  Also if zc_value does not exist, we will
3611		 * get a strange "does not exist" error message.
3612		 */
3613		*cp = '\0';
3614		if (gather_nvlist(hdl, zc.zc_value, NULL, NULL, B_FALSE,
3615		    B_FALSE, B_FALSE, &local_nv, &local_avl) == 0) {
3616			*cp = '@';
3617			fs = fsavl_find(local_avl, drrb->drr_toguid, NULL);
3618			fsavl_destroy(local_avl);
3619			nvlist_free(local_nv);
3620
3621			if (fs != NULL) {
3622				if (flags->verbose) {
3623					(void) printf("snap %s already exists; "
3624					    "ignoring\n", zc.zc_value);
3625				}
3626				err = ioctl_err = recv_skip(hdl, infd,
3627				    flags->byteswap);
3628			}
3629		}
3630		*cp = '@';
3631	}
3632
3633	if (ioctl_err != 0) {
3634		switch (ioctl_errno) {
3635		case ENODEV:
3636			cp = strchr(zc.zc_value, '@');
3637			*cp = '\0';
3638			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3639			    "most recent snapshot of %s does not\n"
3640			    "match incremental source"), zc.zc_value);
3641			(void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
3642			*cp = '@';
3643			break;
3644		case ETXTBSY:
3645			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3646			    "destination %s has been modified\n"
3647			    "since most recent snapshot"), zc.zc_name);
3648			(void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
3649			break;
3650		case EEXIST:
3651			cp = strchr(zc.zc_value, '@');
3652			if (newfs) {
3653				/* it's the containing fs that exists */
3654				*cp = '\0';
3655			}
3656			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3657			    "destination already exists"));
3658			(void) zfs_error_fmt(hdl, EZFS_EXISTS,
3659			    dgettext(TEXT_DOMAIN, "cannot restore to %s"),
3660			    zc.zc_value);
3661			*cp = '@';
3662			break;
3663		case EINVAL:
3664			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
3665			break;
3666		case ECKSUM:
3667			recv_ecksum_set_aux(hdl, zc.zc_value, flags->resumable);
3668			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
3669			break;
3670		case ENOTSUP:
3671			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3672			    "pool must be upgraded to receive this stream."));
3673			(void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
3674			break;
3675		case EDQUOT:
3676			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3677			    "destination %s space quota exceeded"), zc.zc_name);
3678			(void) zfs_error(hdl, EZFS_NOSPC, errbuf);
3679			break;
3680		default:
3681			(void) zfs_standard_error(hdl, ioctl_errno, errbuf);
3682		}
3683	}
3684
3685	/*
3686	 * Mount the target filesystem (if created).  Also mount any
3687	 * children of the target filesystem if we did a replication
3688	 * receive (indicated by stream_avl being non-NULL).
3689	 */
3690	cp = strchr(zc.zc_value, '@');
3691	if (cp && (ioctl_err == 0 || !newfs)) {
3692		zfs_handle_t *h;
3693
3694		*cp = '\0';
3695		h = zfs_open(hdl, zc.zc_value,
3696		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
3697		if (h != NULL) {
3698			if (h->zfs_type == ZFS_TYPE_VOLUME) {
3699				*cp = '@';
3700			} else if (newfs || stream_avl) {
3701				/*
3702				 * Track the first/top of hierarchy fs,
3703				 * for mounting and sharing later.
3704				 */
3705				if (top_zfs && *top_zfs == NULL)
3706					*top_zfs = zfs_strdup(hdl, zc.zc_value);
3707			}
3708			zfs_close(h);
3709		}
3710		*cp = '@';
3711	}
3712
3713	if (clp) {
3714		if (!flags->nomount)
3715			err |= changelist_postfix(clp);
3716		changelist_free(clp);
3717	}
3718
3719	if (prop_errflags & ZPROP_ERR_NOCLEAR) {
3720		(void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Warning: "
3721		    "failed to clear unreceived properties on %s"),
3722		    zc.zc_name);
3723		(void) fprintf(stderr, "\n");
3724	}
3725	if (prop_errflags & ZPROP_ERR_NORESTORE) {
3726		(void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Warning: "
3727		    "failed to restore original properties on %s"),
3728		    zc.zc_name);
3729		(void) fprintf(stderr, "\n");
3730	}
3731
3732	if (err || ioctl_err)
3733		return (-1);
3734
3735	*action_handlep = zc.zc_action_handle;
3736
3737	if (flags->verbose) {
3738		char buf1[64];
3739		char buf2[64];
3740		uint64_t bytes = zc.zc_cookie;
3741		time_t delta = time(NULL) - begin_time;
3742		if (delta == 0)
3743			delta = 1;
3744		zfs_nicenum(bytes, buf1, sizeof (buf1));
3745		zfs_nicenum(bytes/delta, buf2, sizeof (buf1));
3746
3747		(void) printf("received %sB stream in %lu seconds (%sB/sec)\n",
3748		    buf1, delta, buf2);
3749	}
3750
3751	return (0);
3752}
3753
3754static int
3755zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap,
3756    const char *originsnap, recvflags_t *flags, int infd, const char *sendfs,
3757    nvlist_t *stream_nv, avl_tree_t *stream_avl, char **top_zfs, int cleanup_fd,
3758    uint64_t *action_handlep, const char *finalsnap)
3759{
3760	int err;
3761	dmu_replay_record_t drr, drr_noswap;
3762	struct drr_begin *drrb = &drr.drr_u.drr_begin;
3763	char errbuf[1024];
3764	zio_cksum_t zcksum = { 0 };
3765	uint64_t featureflags;
3766	int hdrtype;
3767
3768	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
3769	    "cannot receive"));
3770
3771	if (flags->isprefix &&
3772	    !zfs_dataset_exists(hdl, tosnap, ZFS_TYPE_DATASET)) {
3773		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "specified fs "
3774		    "(%s) does not exist"), tosnap);
3775		return (zfs_error(hdl, EZFS_NOENT, errbuf));
3776	}
3777	if (originsnap &&
3778	    !zfs_dataset_exists(hdl, originsnap, ZFS_TYPE_DATASET)) {
3779		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "specified origin fs "
3780		    "(%s) does not exist"), originsnap);
3781		return (zfs_error(hdl, EZFS_NOENT, errbuf));
3782	}
3783
3784	/* read in the BEGIN record */
3785	if (0 != (err = recv_read(hdl, infd, &drr, sizeof (drr), B_FALSE,
3786	    &zcksum)))
3787		return (err);
3788
3789	if (drr.drr_type == DRR_END || drr.drr_type == BSWAP_32(DRR_END)) {
3790		/* It's the double end record at the end of a package */
3791		return (ENODATA);
3792	}
3793
3794	/* the kernel needs the non-byteswapped begin record */
3795	drr_noswap = drr;
3796
3797	flags->byteswap = B_FALSE;
3798	if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) {
3799		/*
3800		 * We computed the checksum in the wrong byteorder in
3801		 * recv_read() above; do it again correctly.
3802		 */
3803		bzero(&zcksum, sizeof (zio_cksum_t));
3804		(void) fletcher_4_incremental_byteswap(&drr,
3805		    sizeof (drr), &zcksum);
3806		flags->byteswap = B_TRUE;
3807
3808		drr.drr_type = BSWAP_32(drr.drr_type);
3809		drr.drr_payloadlen = BSWAP_32(drr.drr_payloadlen);
3810		drrb->drr_magic = BSWAP_64(drrb->drr_magic);
3811		drrb->drr_versioninfo = BSWAP_64(drrb->drr_versioninfo);
3812		drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time);
3813		drrb->drr_type = BSWAP_32(drrb->drr_type);
3814		drrb->drr_flags = BSWAP_32(drrb->drr_flags);
3815		drrb->drr_toguid = BSWAP_64(drrb->drr_toguid);
3816		drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid);
3817	}
3818
3819	if (drrb->drr_magic != DMU_BACKUP_MAGIC || drr.drr_type != DRR_BEGIN) {
3820		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
3821		    "stream (bad magic number)"));
3822		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
3823	}
3824
3825	featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
3826	hdrtype = DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo);
3827
3828	if (!DMU_STREAM_SUPPORTED(featureflags) ||
3829	    (hdrtype != DMU_SUBSTREAM && hdrtype != DMU_COMPOUNDSTREAM)) {
3830		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3831		    "stream has unsupported feature, feature flags = %lx"),
3832		    featureflags);
3833		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
3834	}
3835
3836	if (strchr(drrb->drr_toname, '@') == NULL) {
3837		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
3838		    "stream (bad snapshot name)"));
3839		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
3840	}
3841
3842	if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) == DMU_SUBSTREAM) {
3843		char nonpackage_sendfs[ZFS_MAX_DATASET_NAME_LEN];
3844		if (sendfs == NULL) {
3845			/*
3846			 * We were not called from zfs_receive_package(). Get
3847			 * the fs specified by 'zfs send'.
3848			 */
3849			char *cp;
3850			(void) strlcpy(nonpackage_sendfs,
3851			    drr.drr_u.drr_begin.drr_toname,
3852			    sizeof (nonpackage_sendfs));
3853			if ((cp = strchr(nonpackage_sendfs, '@')) != NULL)
3854				*cp = '\0';
3855			sendfs = nonpackage_sendfs;
3856			VERIFY(finalsnap == NULL);
3857		}
3858		return (zfs_receive_one(hdl, infd, tosnap, originsnap, flags,
3859		    &drr, &drr_noswap, sendfs, stream_nv, stream_avl, top_zfs,
3860		    cleanup_fd, action_handlep, finalsnap));
3861	} else {
3862		assert(DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
3863		    DMU_COMPOUNDSTREAM);
3864		return (zfs_receive_package(hdl, infd, tosnap, flags, &drr,
3865		    &zcksum, top_zfs, cleanup_fd, action_handlep));
3866	}
3867}
3868
3869/*
3870 * Restores a backup of tosnap from the file descriptor specified by infd.
3871 * Return 0 on total success, -2 if some things couldn't be
3872 * destroyed/renamed/promoted, -1 if some things couldn't be received.
3873 * (-1 will override -2, if -1 and the resumable flag was specified the
3874 * transfer can be resumed if the sending side supports it).
3875 */
3876int
3877zfs_receive(libzfs_handle_t *hdl, const char *tosnap, nvlist_t *props,
3878    recvflags_t *flags, int infd, avl_tree_t *stream_avl)
3879{
3880	char *top_zfs = NULL;
3881	int err;
3882	int cleanup_fd;
3883	uint64_t action_handle = 0;
3884	char *originsnap = NULL;
3885	if (props) {
3886		err = nvlist_lookup_string(props, "origin", &originsnap);
3887		if (err && err != ENOENT)
3888			return (err);
3889	}
3890
3891	cleanup_fd = open(ZFS_DEV, O_RDWR|O_EXCL);
3892	VERIFY(cleanup_fd >= 0);
3893
3894	err = zfs_receive_impl(hdl, tosnap, originsnap, flags, infd, NULL, NULL,
3895	    stream_avl, &top_zfs, cleanup_fd, &action_handle, NULL);
3896
3897	VERIFY(0 == close(cleanup_fd));
3898
3899	if (err == 0 && !flags->nomount && top_zfs) {
3900		zfs_handle_t *zhp;
3901		prop_changelist_t *clp;
3902
3903		zhp = zfs_open(hdl, top_zfs, ZFS_TYPE_FILESYSTEM);
3904		if (zhp != NULL) {
3905			clp = changelist_gather(zhp, ZFS_PROP_MOUNTPOINT,
3906			    CL_GATHER_MOUNT_ALWAYS, 0);
3907			zfs_close(zhp);
3908			if (clp != NULL) {
3909				/* mount and share received datasets */
3910				err = changelist_postfix(clp);
3911				changelist_free(clp);
3912			}
3913		}
3914		if (zhp == NULL || clp == NULL || err)
3915			err = -1;
3916	}
3917	if (top_zfs)
3918		free(top_zfs);
3919
3920	return (err);
3921}
3922