dmu_send.c revision 275811
117680Spst/*
2127668Sbms * CDDL HEADER START
3127668Sbms *
4127668Sbms * The contents of this file are subject to the terms of the
5111726Sfenner * Common Development and Distribution License (the "License").
6127668Sbms * You may not use this file except in compliance with the License.
7127668Sbms *
8127668Sbms * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9127668Sbms * or http://www.opensolaris.org/os/licensing.
10127668Sbms * See the License for the specific language governing permissions
11127668Sbms * and limitations under the License.
12127668Sbms *
1375115Sfenner * When distributing Covered Code, include this CDDL HEADER in each
14127668Sbms * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15127668Sbms * If applicable, add the following below this CDDL HEADER, with the
16127668Sbms * fields enclosed by brackets "[]" replaced with your own identifying
17127668Sbms * information: Portions Copyright [yyyy] [name of copyright owner]
18127668Sbms *
19127668Sbms * CDDL HEADER END
20127668Sbms */
21127668Sbms/*
22127668Sbms * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23127668Sbms * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
2475115Sfenner * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
25127668Sbms * Copyright (c) 2014, Joyent, Inc. All rights reserved.
26127668Sbms * Copyright (c) 2012, Martin Matuska <mm@FreeBSD.org>. All rights reserved.
27127668Sbms * Copyright 2014 HybridCluster. All rights reserved.
28127668Sbms */
29127668Sbms
30127668Sbms#include <sys/dmu.h>
3175115Sfenner#include <sys/dmu_impl.h>
3275115Sfenner#include <sys/dmu_tx.h>
33127668Sbms#include <sys/dbuf.h>
34127668Sbms#include <sys/dnode.h>
35127668Sbms#include <sys/zfs_context.h>
36127668Sbms#include <sys/dmu_objset.h>
37127668Sbms#include <sys/dmu_traverse.h>
3875115Sfenner#include <sys/dsl_dataset.h>
39127668Sbms#include <sys/dsl_dir.h>
40127668Sbms#include <sys/dsl_prop.h>
41127668Sbms#include <sys/dsl_pool.h>
42127668Sbms#include <sys/dsl_synctask.h>
43127668Sbms#include <sys/zfs_ioctl.h>
44127668Sbms#include <sys/zap.h>
45127668Sbms#include <sys/zio_checksum.h>
46127668Sbms#include <sys/zfs_znode.h>
47127668Sbms#include <zfs_fletcher.h>
48127668Sbms#include <sys/avl.h>
49127668Sbms#include <sys/ddt.h>
50127668Sbms#include <sys/zfs_onexit.h>
5175115Sfenner#include <sys/dmu_send.h>
52127668Sbms#include <sys/dsl_destroy.h>
53127668Sbms#include <sys/blkptr.h>
54127668Sbms#include <sys/dsl_bookmark.h>
55127668Sbms#include <sys/zfeature.h>
56127668Sbms
57127668Sbms#ifdef __FreeBSD__
5875115Sfenner#undef dump_write
59127668Sbms#define dump_write dmu_dump_write
60127668Sbms#endif
61127668Sbms
62127668Sbms/* Set this tunable to TRUE to replace corrupt data with 0x2f5baddb10c */
63127668Sbmsint zfs_send_corrupt_data = B_FALSE;
6475115Sfenner
6575115Sfennerstatic char *dmu_recv_tag = "dmu_recv_tag";
66127668Sbmsstatic const char *recv_clone_name = "%recv";
67127668Sbms
68127668Sbmsstatic int
69127668Sbmsdump_bytes(dmu_sendarg_t *dsp, void *buf, int len)
70127668Sbms{
71127668Sbms	dsl_dataset_t *ds = dsp->dsa_os->os_dsl_dataset;
72127668Sbms	struct uio auio;
73127668Sbms	struct iovec aiov;
74127668Sbms	ASSERT0(len % 8);
75127668Sbms
76127668Sbms	fletcher_4_incremental_native(buf, len, &dsp->dsa_zc);
7775115Sfenner	aiov.iov_base = buf;
7875115Sfenner	aiov.iov_len = len;
79127668Sbms	auio.uio_iov = &aiov;
80127668Sbms	auio.uio_iovcnt = 1;
81127668Sbms	auio.uio_resid = len;
82127668Sbms	auio.uio_segflg = UIO_SYSSPACE;
83127668Sbms	auio.uio_rw = UIO_WRITE;
84127668Sbms	auio.uio_offset = (off_t)-1;
85127668Sbms	auio.uio_td = dsp->dsa_td;
8675115Sfenner#ifdef _KERNEL
87127668Sbms	if (dsp->dsa_fp->f_type == DTYPE_VNODE)
88127668Sbms		bwillwrite();
89127668Sbms	dsp->dsa_err = fo_write(dsp->dsa_fp, &auio, dsp->dsa_td->td_ucred, 0,
90127668Sbms	    dsp->dsa_td);
91127668Sbms#else
92127668Sbms	fprintf(stderr, "%s: returning EOPNOTSUPP\n", __func__);
93127668Sbms	dsp->dsa_err = EOPNOTSUPP;
94127668Sbms#endif
95127668Sbms	mutex_enter(&ds->ds_sendstream_lock);
96127668Sbms	*dsp->dsa_off += len;
97127668Sbms	mutex_exit(&ds->ds_sendstream_lock);
98127668Sbms
9975115Sfenner	return (dsp->dsa_err);
10075115Sfenner}
101127668Sbms
102127668Sbmsstatic int
103127668Sbmsdump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
104127668Sbms    uint64_t length)
105127668Sbms{
106127668Sbms	struct drr_free *drrf = &(dsp->dsa_drr->drr_u.drr_free);
107127668Sbms
108127668Sbms	/*
109127668Sbms	 * When we receive a free record, dbuf_free_range() assumes
110127668Sbms	 * that the receiving system doesn't have any dbufs in the range
111127668Sbms	 * being freed.  This is always true because there is a one-record
112127668Sbms	 * constraint: we only send one WRITE record for any given
113127668Sbms	 * object+offset.  We know that the one-record constraint is
114127668Sbms	 * true because we always send data in increasing order by
115127668Sbms	 * object,offset.
116127668Sbms	 *
11775115Sfenner	 * If the increasing-order constraint ever changes, we should find
118127668Sbms	 * another way to assert that the one-record constraint is still
119127668Sbms	 * satisfied.
120127668Sbms	 */
121127668Sbms	ASSERT(object > dsp->dsa_last_data_object ||
122127668Sbms	    (object == dsp->dsa_last_data_object &&
123127668Sbms	    offset > dsp->dsa_last_data_offset));
124127668Sbms
125127668Sbms	/*
126127668Sbms	 * If we are doing a non-incremental send, then there can't
127127668Sbms	 * be any data in the dataset we're receiving into.  Therefore
128127668Sbms	 * a free record would simply be a no-op.  Save space by not
129127668Sbms	 * sending it to begin with.
130127668Sbms	 */
131127668Sbms	if (!dsp->dsa_incremental)
132127668Sbms		return (0);
133127668Sbms
134127668Sbms	if (length != -1ULL && offset + length < offset)
135127668Sbms		length = -1ULL;
136127668Sbms
137127668Sbms	/*
138127668Sbms	 * If there is a pending op, but it's not PENDING_FREE, push it out,
139127668Sbms	 * since free block aggregation can only be done for blocks of the
140127668Sbms	 * same type (i.e., DRR_FREE records can only be aggregated with
141127668Sbms	 * other DRR_FREE records.  DRR_FREEOBJECTS records can only be
142127668Sbms	 * aggregated with other DRR_FREEOBJECTS records.
143127668Sbms	 */
144127668Sbms	if (dsp->dsa_pending_op != PENDING_NONE &&
145127668Sbms	    dsp->dsa_pending_op != PENDING_FREE) {
146127668Sbms		if (dump_bytes(dsp, dsp->dsa_drr,
147127668Sbms		    sizeof (dmu_replay_record_t)) != 0)
148127668Sbms			return (SET_ERROR(EINTR));
149127668Sbms		dsp->dsa_pending_op = PENDING_NONE;
150127668Sbms	}
151127668Sbms
152127668Sbms	if (dsp->dsa_pending_op == PENDING_FREE) {
153127668Sbms		/*
154127668Sbms		 * There should never be a PENDING_FREE if length is -1
155127668Sbms		 * (because dump_dnode is the only place where this
15675115Sfenner		 * function is called with a -1, and only after flushing
157127668Sbms		 * any pending record).
158127668Sbms		 */
159127668Sbms		ASSERT(length != -1ULL);
160127668Sbms		/*
161127668Sbms		 * Check to see whether this free block can be aggregated
162127668Sbms		 * with pending one.
163127668Sbms		 */
164127668Sbms		if (drrf->drr_object == object && drrf->drr_offset +
165127668Sbms		    drrf->drr_length == offset) {
166127668Sbms			drrf->drr_length += length;
167127668Sbms			return (0);
168127668Sbms		} else {
169127668Sbms			/* not a continuation.  Push out pending record */
170127668Sbms			if (dump_bytes(dsp, dsp->dsa_drr,
171127668Sbms			    sizeof (dmu_replay_record_t)) != 0)
172127668Sbms				return (SET_ERROR(EINTR));
173127668Sbms			dsp->dsa_pending_op = PENDING_NONE;
174127668Sbms		}
175127668Sbms	}
176127668Sbms	/* create a FREE record and make it pending */
177127668Sbms	bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
17875115Sfenner	dsp->dsa_drr->drr_type = DRR_FREE;
179127668Sbms	drrf->drr_object = object;
180127668Sbms	drrf->drr_offset = offset;
181127668Sbms	drrf->drr_length = length;
182127668Sbms	drrf->drr_toguid = dsp->dsa_toguid;
183127668Sbms	if (length == -1ULL) {
184127668Sbms		if (dump_bytes(dsp, dsp->dsa_drr,
185127668Sbms		    sizeof (dmu_replay_record_t)) != 0)
18675115Sfenner			return (SET_ERROR(EINTR));
18775115Sfenner	} else {
188127668Sbms		dsp->dsa_pending_op = PENDING_FREE;
189127668Sbms	}
190127668Sbms
191127668Sbms	return (0);
192127668Sbms}
193127668Sbms
19475115Sfennerstatic int
195127668Sbmsdump_write(dmu_sendarg_t *dsp, dmu_object_type_t type,
196127668Sbms    uint64_t object, uint64_t offset, int blksz, const blkptr_t *bp, void *data)
197127668Sbms{
198127668Sbms	struct drr_write *drrw = &(dsp->dsa_drr->drr_u.drr_write);
199127668Sbms
20075115Sfenner	/*
201127668Sbms	 * We send data in increasing object, offset order.
202127668Sbms	 * See comment in dump_free() for details.
203127668Sbms	 */
204127668Sbms	ASSERT(object > dsp->dsa_last_data_object ||
205127668Sbms	    (object == dsp->dsa_last_data_object &&
206127668Sbms	    offset > dsp->dsa_last_data_offset));
207127668Sbms	dsp->dsa_last_data_object = object;
208127668Sbms	dsp->dsa_last_data_offset = offset + blksz - 1;
209127668Sbms
210127668Sbms	/*
211127668Sbms	 * If there is any kind of pending aggregation (currently either
212127668Sbms	 * a grouping of free objects or free blocks), push it out to
213127668Sbms	 * the stream, since aggregation can't be done across operations
214127668Sbms	 * of different types.
215127668Sbms	 */
216127668Sbms	if (dsp->dsa_pending_op != PENDING_NONE) {
217127668Sbms		if (dump_bytes(dsp, dsp->dsa_drr,
21875115Sfenner		    sizeof (dmu_replay_record_t)) != 0)
219127668Sbms			return (SET_ERROR(EINTR));
220127668Sbms		dsp->dsa_pending_op = PENDING_NONE;
221127668Sbms	}
222127668Sbms	/* write a DATA record */
223127668Sbms	bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
22475115Sfenner	dsp->dsa_drr->drr_type = DRR_WRITE;
225127668Sbms	drrw->drr_object = object;
22675115Sfenner	drrw->drr_type = type;
227127668Sbms	drrw->drr_offset = offset;
228127668Sbms	drrw->drr_length = blksz;
22975115Sfenner	drrw->drr_toguid = dsp->dsa_toguid;
230127668Sbms	if (bp == NULL || BP_IS_EMBEDDED(bp)) {
231127668Sbms		/*
23275115Sfenner		 * There's no pre-computed checksum for partial-block
23375115Sfenner		 * writes or embedded BP's, so (like
234127668Sbms		 * fletcher4-checkummed blocks) userland will have to
235127668Sbms		 * compute a dedup-capable checksum itself.
236127668Sbms		 */
237127668Sbms		drrw->drr_checksumtype = ZIO_CHECKSUM_OFF;
238127668Sbms	} else {
23975115Sfenner		drrw->drr_checksumtype = BP_GET_CHECKSUM(bp);
240127668Sbms		if (zio_checksum_table[drrw->drr_checksumtype].ci_dedup)
241127668Sbms			drrw->drr_checksumflags |= DRR_CHECKSUM_DEDUP;
24275115Sfenner		DDK_SET_LSIZE(&drrw->drr_key, BP_GET_LSIZE(bp));
24375115Sfenner		DDK_SET_PSIZE(&drrw->drr_key, BP_GET_PSIZE(bp));
244127668Sbms		DDK_SET_COMPRESS(&drrw->drr_key, BP_GET_COMPRESS(bp));
245127668Sbms		drrw->drr_key.ddk_cksum = bp->blk_cksum;
246127668Sbms	}
247127668Sbms
24875115Sfenner	if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0)
249127668Sbms		return (SET_ERROR(EINTR));
25075115Sfenner	if (dump_bytes(dsp, data, blksz) != 0)
251127668Sbms		return (SET_ERROR(EINTR));
252127668Sbms	return (0);
253127668Sbms}
254127668Sbms
255127668Sbmsstatic int
256127668Sbmsdump_write_embedded(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
257127668Sbms    int blksz, const blkptr_t *bp)
258127668Sbms{
259127668Sbms	char buf[BPE_PAYLOAD_SIZE];
260127668Sbms	struct drr_write_embedded *drrw =
26175115Sfenner	    &(dsp->dsa_drr->drr_u.drr_write_embedded);
262127668Sbms
263127668Sbms	if (dsp->dsa_pending_op != PENDING_NONE) {
264127668Sbms		if (dump_bytes(dsp, dsp->dsa_drr,
265127668Sbms		    sizeof (dmu_replay_record_t)) != 0)
26675115Sfenner			return (EINTR);
267127668Sbms		dsp->dsa_pending_op = PENDING_NONE;
268127668Sbms	}
269127668Sbms
270127668Sbms	ASSERT(BP_IS_EMBEDDED(bp));
271127668Sbms
272127668Sbms	bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
27375115Sfenner	dsp->dsa_drr->drr_type = DRR_WRITE_EMBEDDED;
274127668Sbms	drrw->drr_object = object;
275127668Sbms	drrw->drr_offset = offset;
276127668Sbms	drrw->drr_length = blksz;
277127668Sbms	drrw->drr_toguid = dsp->dsa_toguid;
278127668Sbms	drrw->drr_compression = BP_GET_COMPRESS(bp);
279127668Sbms	drrw->drr_etype = BPE_GET_ETYPE(bp);
280127668Sbms	drrw->drr_lsize = BPE_GET_LSIZE(bp);
281127668Sbms	drrw->drr_psize = BPE_GET_PSIZE(bp);
282127668Sbms
283127668Sbms	decode_embedded_bp_compressed(bp, buf);
284127668Sbms
285127668Sbms	if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0)
286127668Sbms		return (EINTR);
287127668Sbms	if (dump_bytes(dsp, buf, P2ROUNDUP(drrw->drr_psize, 8)) != 0)
288127668Sbms		return (EINTR);
289127668Sbms	return (0);
290127668Sbms}
291127668Sbms
292127668Sbmsstatic int
293127668Sbmsdump_spill(dmu_sendarg_t *dsp, uint64_t object, int blksz, void *data)
294127668Sbms{
295127668Sbms	struct drr_spill *drrs = &(dsp->dsa_drr->drr_u.drr_spill);
296127668Sbms
297127668Sbms	if (dsp->dsa_pending_op != PENDING_NONE) {
298127668Sbms		if (dump_bytes(dsp, dsp->dsa_drr,
299127668Sbms		    sizeof (dmu_replay_record_t)) != 0)
300127668Sbms			return (SET_ERROR(EINTR));
301127668Sbms		dsp->dsa_pending_op = PENDING_NONE;
302127668Sbms	}
303127668Sbms
304127668Sbms	/* write a SPILL record */
305127668Sbms	bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
306127668Sbms	dsp->dsa_drr->drr_type = DRR_SPILL;
307127668Sbms	drrs->drr_object = object;
308127668Sbms	drrs->drr_length = blksz;
309127668Sbms	drrs->drr_toguid = dsp->dsa_toguid;
310127668Sbms
31175115Sfenner	if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)))
312127668Sbms		return (SET_ERROR(EINTR));
313127668Sbms	if (dump_bytes(dsp, data, blksz))
31475115Sfenner		return (SET_ERROR(EINTR));
31517680Spst	return (0);
316127668Sbms}
317127668Sbms
31817680Spststatic int
31917680Spstdump_freeobjects(dmu_sendarg_t *dsp, uint64_t firstobj, uint64_t numobjs)
320127668Sbms{
32117680Spst	struct drr_freeobjects *drrfo = &(dsp->dsa_drr->drr_u.drr_freeobjects);
32217680Spst
32317680Spst	/* See comment in dump_free(). */
32417680Spst	if (!dsp->dsa_incremental)
32517680Spst		return (0);
32617680Spst
32717680Spst	/*
32817680Spst	 * If there is a pending op, but it's not PENDING_FREEOBJECTS,
32917680Spst	 * push it out, since free block aggregation can only be done for
33017680Spst	 * blocks of the same type (i.e., DRR_FREE records can only be
33117680Spst	 * aggregated with other DRR_FREE records.  DRR_FREEOBJECTS records
33217680Spst	 * can only be aggregated with other DRR_FREEOBJECTS records.
33317680Spst	 */
334127668Sbms	if (dsp->dsa_pending_op != PENDING_NONE &&
335127668Sbms	    dsp->dsa_pending_op != PENDING_FREEOBJECTS) {
336127668Sbms		if (dump_bytes(dsp, dsp->dsa_drr,
337127668Sbms		    sizeof (dmu_replay_record_t)) != 0)
338127668Sbms			return (SET_ERROR(EINTR));
339127668Sbms		dsp->dsa_pending_op = PENDING_NONE;
34017680Spst	}
34117680Spst	if (dsp->dsa_pending_op == PENDING_FREEOBJECTS) {
34217680Spst		/*
34317680Spst		 * See whether this free object array can be aggregated
34417680Spst		 * with pending one
34517680Spst		 */
34617680Spst		if (drrfo->drr_firstobj + drrfo->drr_numobjs == firstobj) {
34717680Spst			drrfo->drr_numobjs += numobjs;
34817680Spst			return (0);
34917680Spst		} else {
35017680Spst			/* can't be aggregated.  Push out pending record */
35117680Spst			if (dump_bytes(dsp, dsp->dsa_drr,
35217680Spst			    sizeof (dmu_replay_record_t)) != 0)
35317680Spst				return (SET_ERROR(EINTR));
35417680Spst			dsp->dsa_pending_op = PENDING_NONE;
35517680Spst		}
35617680Spst	}
35717680Spst
35817680Spst	/* write a FREEOBJECTS record */
35917680Spst	bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
36017680Spst	dsp->dsa_drr->drr_type = DRR_FREEOBJECTS;
36117680Spst	drrfo->drr_firstobj = firstobj;
36217680Spst	drrfo->drr_numobjs = numobjs;
363127668Sbms	drrfo->drr_toguid = dsp->dsa_toguid;
36417680Spst
36517680Spst	dsp->dsa_pending_op = PENDING_FREEOBJECTS;
36617680Spst
367127668Sbms	return (0);
36817680Spst}
36917680Spst
37017680Spststatic int
37117680Spstdump_dnode(dmu_sendarg_t *dsp, uint64_t object, dnode_phys_t *dnp)
372127668Sbms{
37317680Spst	struct drr_object *drro = &(dsp->dsa_drr->drr_u.drr_object);
37417680Spst
375127668Sbms	if (dnp == NULL || dnp->dn_type == DMU_OT_NONE)
37617680Spst		return (dump_freeobjects(dsp, object, 1));
377127668Sbms
37817680Spst	if (dsp->dsa_pending_op != PENDING_NONE) {
37917680Spst		if (dump_bytes(dsp, dsp->dsa_drr,
38017680Spst		    sizeof (dmu_replay_record_t)) != 0)
38117680Spst			return (SET_ERROR(EINTR));
38217680Spst		dsp->dsa_pending_op = PENDING_NONE;
38317680Spst	}
384127668Sbms
38517680Spst	/* write an OBJECT record */
386127668Sbms	bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
387127668Sbms	dsp->dsa_drr->drr_type = DRR_OBJECT;
388127668Sbms	drro->drr_object = object;
38917680Spst	drro->drr_type = dnp->dn_type;
39017680Spst	drro->drr_bonustype = dnp->dn_bonustype;
39117680Spst	drro->drr_blksz = dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT;
39217680Spst	drro->drr_bonuslen = dnp->dn_bonuslen;
393127668Sbms	drro->drr_checksumtype = dnp->dn_checksum;
39417680Spst	drro->drr_compress = dnp->dn_compress;
39517680Spst	drro->drr_toguid = dsp->dsa_toguid;
396127668Sbms
39717680Spst	if (!(dsp->dsa_featureflags & DMU_BACKUP_FEATURE_LARGE_BLOCKS) &&
398127668Sbms	    drro->drr_blksz > SPA_OLD_MAXBLOCKSIZE)
399127668Sbms		drro->drr_blksz = SPA_OLD_MAXBLOCKSIZE;
400127668Sbms
401127668Sbms	if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0)
402127668Sbms		return (SET_ERROR(EINTR));
40317680Spst
40417680Spst	if (dump_bytes(dsp, DN_BONUS(dnp), P2ROUNDUP(dnp->dn_bonuslen, 8)) != 0)
405127668Sbms		return (SET_ERROR(EINTR));
40617680Spst
407127668Sbms	/* Free anything past the end of the file. */
408127668Sbms	if (dump_free(dsp, object, (dnp->dn_maxblkid + 1) *
409127668Sbms	    (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT), -1ULL) != 0)
410127668Sbms		return (SET_ERROR(EINTR));
411127668Sbms	if (dsp->dsa_err != 0)
412127668Sbms		return (SET_ERROR(EINTR));
41317680Spst	return (0);
41417680Spst}
415127668Sbms
41617680Spststatic boolean_t
41717680Spstbackup_do_embed(dmu_sendarg_t *dsp, const blkptr_t *bp)
41817680Spst{
41917680Spst	if (!BP_IS_EMBEDDED(bp))
42017680Spst		return (B_FALSE);
42117680Spst
42217680Spst	/*
42317680Spst	 * Compression function must be legacy, or explicitly enabled.
424127668Sbms	 */
42517680Spst	if ((BP_GET_COMPRESS(bp) >= ZIO_COMPRESS_LEGACY_FUNCTIONS &&
42617680Spst	    !(dsp->dsa_featureflags & DMU_BACKUP_FEATURE_EMBED_DATA_LZ4)))
42717680Spst		return (B_FALSE);
42817680Spst
42917680Spst	/*
430127668Sbms	 * Embed type must be explicitly enabled.
431127668Sbms	 */
432127668Sbms	switch (BPE_GET_ETYPE(bp)) {
433127668Sbms	case BP_EMBEDDED_TYPE_DATA:
434127668Sbms		if (dsp->dsa_featureflags & DMU_BACKUP_FEATURE_EMBED_DATA)
435127668Sbms			return (B_TRUE);
43617680Spst		break;
43717680Spst	default:
438127668Sbms		return (B_FALSE);
43917680Spst	}
440127668Sbms	return (B_FALSE);
44117680Spst}
44217680Spst
44317680Spst#define	BP_SPAN(dnp, level) \
44417680Spst	(((uint64_t)dnp->dn_datablkszsec) << (SPA_MINBLOCKSHIFT + \
44517680Spst	(level) * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT)))
44617680Spst
447127668Sbms/* ARGSUSED */
44817680Spststatic int
44917680Spstbackup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
45017680Spst    const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
45117680Spst{
452127668Sbms	dmu_sendarg_t *dsp = arg;
45317680Spst	dmu_object_type_t type = bp ? BP_GET_TYPE(bp) : DMU_OT_NONE;
45417680Spst	int err = 0;
45517680Spst
45617680Spst	if (issig(JUSTLOOKING) && issig(FORREAL))
457127668Sbms		return (SET_ERROR(EINTR));
45817680Spst
45917680Spst	if (zb->zb_object != DMU_META_DNODE_OBJECT &&
46017680Spst	    DMU_OBJECT_IS_SPECIAL(zb->zb_object)) {
46117680Spst		return (0);
46217680Spst	} else if (zb->zb_level == ZB_ZIL_LEVEL) {
46317680Spst		/*
464127668Sbms		 * If we are sending a non-snapshot (which is allowed on
46517680Spst		 * read-only pools), it may have a ZIL, which must be ignored.
46617680Spst		 */
46717680Spst		return (0);
46817680Spst	} else if (BP_IS_HOLE(bp) &&
46917680Spst	    zb->zb_object == DMU_META_DNODE_OBJECT) {
47017680Spst		uint64_t span = BP_SPAN(dnp, zb->zb_level);
47117680Spst		uint64_t dnobj = (zb->zb_blkid * span) >> DNODE_SHIFT;
47217680Spst		err = dump_freeobjects(dsp, dnobj, span >> DNODE_SHIFT);
473127668Sbms	} else if (BP_IS_HOLE(bp)) {
47417680Spst		uint64_t span = BP_SPAN(dnp, zb->zb_level);
47517680Spst		err = dump_free(dsp, zb->zb_object, zb->zb_blkid * span, span);
47617680Spst	} else if (zb->zb_level > 0 || type == DMU_OT_OBJSET) {
47717680Spst		return (0);
478127668Sbms	} else if (type == DMU_OT_DNODE) {
47917680Spst		dnode_phys_t *blk;
48017680Spst		int i;
48117680Spst		int blksz = BP_GET_LSIZE(bp);
48217680Spst		arc_flags_t aflags = ARC_FLAG_WAIT;
48317680Spst		arc_buf_t *abuf;
48417680Spst
485127668Sbms		if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf,
48617680Spst		    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL,
48717680Spst		    &aflags, zb) != 0)
48817680Spst			return (SET_ERROR(EIO));
48917680Spst
49017680Spst		blk = abuf->b_data;
49117680Spst		for (i = 0; i < blksz >> DNODE_SHIFT; i++) {
49217680Spst			uint64_t dnobj = (zb->zb_blkid <<
49317680Spst			    (DNODE_BLOCK_SHIFT - DNODE_SHIFT)) + i;
49417680Spst			err = dump_dnode(dsp, dnobj, blk+i);
49517680Spst			if (err != 0)
49617680Spst				break;
49717680Spst		}
49817680Spst		(void) arc_buf_remove_ref(abuf, &abuf);
499127668Sbms	} else if (type == DMU_OT_SA) {
50017680Spst		arc_flags_t aflags = ARC_FLAG_WAIT;
50117680Spst		arc_buf_t *abuf;
50217680Spst		int blksz = BP_GET_LSIZE(bp);
50317680Spst
504127668Sbms		if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf,
50517680Spst		    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL,
50617680Spst		    &aflags, zb) != 0)
50717680Spst			return (SET_ERROR(EIO));
50817680Spst
50917680Spst		err = dump_spill(dsp, zb->zb_object, blksz, abuf->b_data);
51017680Spst		(void) arc_buf_remove_ref(abuf, &abuf);
511127668Sbms	} else if (backup_do_embed(dsp, bp)) {
51217680Spst		/* it's an embedded level-0 block of a regular object */
51317680Spst		int blksz = dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT;
51417680Spst		err = dump_write_embedded(dsp, zb->zb_object,
51517680Spst		    zb->zb_blkid * blksz, blksz, bp);
51617680Spst	} else { /* it's a level-0 block of a regular object */
51717680Spst		arc_flags_t aflags = ARC_FLAG_WAIT;
518127668Sbms		arc_buf_t *abuf;
51917680Spst		int blksz = BP_GET_LSIZE(bp);
52017680Spst		uint64_t offset;
52117680Spst
52217680Spst		ASSERT3U(blksz, ==, dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT);
52317680Spst		ASSERT0(zb->zb_level);
52417680Spst		if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf,
52517680Spst		    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL,
52617680Spst		    &aflags, zb) != 0) {
52717680Spst			if (zfs_send_corrupt_data) {
52817680Spst				/* Send a block filled with 0x"zfs badd bloc" */
52917680Spst				abuf = arc_buf_alloc(spa, blksz, &abuf,
53017680Spst				    ARC_BUFC_DATA);
53117680Spst				uint64_t *ptr;
53217680Spst				for (ptr = abuf->b_data;
53317680Spst				    (char *)ptr < (char *)abuf->b_data + blksz;
53417680Spst				    ptr++)
535127668Sbms					*ptr = 0x2f5baddb10c;
53617680Spst			} else {
53717680Spst				return (SET_ERROR(EIO));
53817680Spst			}
53917680Spst		}
54017680Spst
54117680Spst		offset = zb->zb_blkid * blksz;
54217680Spst
54317680Spst		if (!(dsp->dsa_featureflags &
54417680Spst		    DMU_BACKUP_FEATURE_LARGE_BLOCKS) &&
545127668Sbms		    blksz > SPA_OLD_MAXBLOCKSIZE) {
54617680Spst			char *buf = abuf->b_data;
54717680Spst			while (blksz > 0 && err == 0) {
54817680Spst				int n = MIN(blksz, SPA_OLD_MAXBLOCKSIZE);
54917680Spst				err = dump_write(dsp, type, zb->zb_object,
55017680Spst				    offset, n, NULL, buf);
55117680Spst				offset += n;
55217680Spst				buf += n;
55317680Spst				blksz -= n;
55417680Spst			}
55517680Spst		} else {
556127668Sbms			err = dump_write(dsp, type, zb->zb_object,
55717680Spst			    offset, blksz, bp, abuf->b_data);
55817680Spst		}
55917680Spst		(void) arc_buf_remove_ref(abuf, &abuf);
56017680Spst	}
561127668Sbms
56217680Spst	ASSERT(err == 0 || err == EINTR);
56317680Spst	return (err);
56417680Spst}
56517680Spst
566127668Sbms/*
56717680Spst * Releases dp using the specified tag.
56817680Spst */
56917680Spststatic int
57017680Spstdmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *ds,
57117680Spst    zfs_bookmark_phys_t *fromzb, boolean_t is_clone, boolean_t embedok,
57217680Spst#ifdef illumos
573127668Sbms    boolean_t large_block_ok, int outfd, vnode_t *vp, offset_t *off)
57417680Spst#else
57517680Spst    boolean_t large_block_ok, int outfd, struct file *fp, offset_t *off)
576127668Sbms#endif
57717680Spst{
578127668Sbms	objset_t *os;
57917680Spst	dmu_replay_record_t *drr;
58017680Spst	dmu_sendarg_t *dsp;
58117680Spst	int err;
58217680Spst	uint64_t fromtxg = 0;
583127668Sbms	uint64_t featureflags = 0;
584127668Sbms
58517680Spst	err = dmu_objset_from_ds(ds, &os);
58617680Spst	if (err != 0) {
587127668Sbms		dsl_pool_rele(dp, tag);
58817680Spst		return (err);
589127668Sbms	}
590127668Sbms
591127668Sbms	drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP);
59217680Spst	drr->drr_type = DRR_BEGIN;
593127668Sbms	drr->drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
594127668Sbms	DMU_SET_STREAM_HDRTYPE(drr->drr_u.drr_begin.drr_versioninfo,
59517680Spst	    DMU_SUBSTREAM);
59617680Spst
597127668Sbms#ifdef _KERNEL
59817680Spst	if (dmu_objset_type(os) == DMU_OST_ZFS) {
59917680Spst		uint64_t version;
600127668Sbms		if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &version) != 0) {
60117680Spst			kmem_free(drr, sizeof (dmu_replay_record_t));
602127668Sbms			dsl_pool_rele(dp, tag);
603127668Sbms			return (SET_ERROR(EINVAL));
604127668Sbms		}
605127668Sbms		if (version >= ZPL_VERSION_SA) {
606127668Sbms			featureflags |= DMU_BACKUP_FEATURE_SA_SPILL;
60717680Spst		}
60817680Spst	}
60917680Spst#endif
61017680Spst
61117680Spst	if (large_block_ok && ds->ds_large_blocks)
61217680Spst		featureflags |= DMU_BACKUP_FEATURE_LARGE_BLOCKS;
61317680Spst	if (embedok &&
61417680Spst	    spa_feature_is_active(dp->dp_spa, SPA_FEATURE_EMBEDDED_DATA)) {
61517680Spst		featureflags |= DMU_BACKUP_FEATURE_EMBED_DATA;
61617680Spst		if (spa_feature_is_active(dp->dp_spa, SPA_FEATURE_LZ4_COMPRESS))
617127668Sbms			featureflags |= DMU_BACKUP_FEATURE_EMBED_DATA_LZ4;
61817680Spst	} else {
61917680Spst		embedok = B_FALSE;
62017680Spst	}
62117680Spst
62217680Spst	DMU_SET_FEATUREFLAGS(drr->drr_u.drr_begin.drr_versioninfo,
62317680Spst	    featureflags);
624127668Sbms
62517680Spst	drr->drr_u.drr_begin.drr_creation_time =
626127668Sbms	    dsl_dataset_phys(ds)->ds_creation_time;
627127668Sbms	drr->drr_u.drr_begin.drr_type = dmu_objset_type(os);
628127668Sbms	if (is_clone)
62917680Spst		drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CLONE;
63017680Spst	drr->drr_u.drr_begin.drr_toguid = dsl_dataset_phys(ds)->ds_guid;
631127668Sbms	if (dsl_dataset_phys(ds)->ds_flags & DS_FLAG_CI_DATASET)
632127668Sbms		drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CI_DATA;
633127668Sbms
634127668Sbms	if (fromzb != NULL) {
635127668Sbms		drr->drr_u.drr_begin.drr_fromguid = fromzb->zbm_guid;
636127668Sbms		fromtxg = fromzb->zbm_creation_txg;
637127668Sbms	}
638127668Sbms	dsl_dataset_name(ds, drr->drr_u.drr_begin.drr_toname);
639127668Sbms	if (!dsl_dataset_is_snapshot(ds)) {
640127668Sbms		(void) strlcat(drr->drr_u.drr_begin.drr_toname, "@--head--",
64117680Spst		    sizeof (drr->drr_u.drr_begin.drr_toname));
642127668Sbms	}
643127668Sbms
644127668Sbms	dsp = kmem_zalloc(sizeof (dmu_sendarg_t), KM_SLEEP);
645127668Sbms
646127668Sbms	dsp->dsa_drr = drr;
64717680Spst	dsp->dsa_outfd = outfd;
64817680Spst	dsp->dsa_proc = curproc;
64917680Spst	dsp->dsa_td = curthread;
65017680Spst	dsp->dsa_fp = fp;
65117680Spst	dsp->dsa_os = os;
65217680Spst	dsp->dsa_off = off;
653127668Sbms	dsp->dsa_toguid = dsl_dataset_phys(ds)->ds_guid;
654127668Sbms	ZIO_SET_CHECKSUM(&dsp->dsa_zc, 0, 0, 0, 0);
655127668Sbms	dsp->dsa_pending_op = PENDING_NONE;
65617680Spst	dsp->dsa_incremental = (fromzb != NULL);
65717680Spst	dsp->dsa_featureflags = featureflags;
658127668Sbms
659127668Sbms	mutex_enter(&ds->ds_sendstream_lock);
660127668Sbms	list_insert_head(&ds->ds_sendstreams, dsp);
661127668Sbms	mutex_exit(&ds->ds_sendstream_lock);
662127668Sbms
663127668Sbms	dsl_dataset_long_hold(ds, FTAG);
664127668Sbms	dsl_pool_rele(dp, tag);
665127668Sbms
666127668Sbms	if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) {
667127668Sbms		err = dsp->dsa_err;
66817680Spst		goto out;
669127668Sbms	}
670127668Sbms
671127668Sbms	err = traverse_dataset(ds, fromtxg, TRAVERSE_PRE | TRAVERSE_PREFETCH,
67217680Spst	    backup_cb, dsp);
673127668Sbms
674127668Sbms	if (dsp->dsa_pending_op != PENDING_NONE)
675127668Sbms		if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0)
676127668Sbms			err = SET_ERROR(EINTR);
677127668Sbms
67817680Spst	if (err != 0) {
67917680Spst		if (err == EINTR && dsp->dsa_err != 0)
68017680Spst			err = dsp->dsa_err;
681127668Sbms		goto out;
682127668Sbms	}
683127668Sbms
684127668Sbms	bzero(drr, sizeof (dmu_replay_record_t));
685127668Sbms	drr->drr_type = DRR_END;
686127668Sbms	drr->drr_u.drr_end.drr_checksum = dsp->dsa_zc;
68717680Spst	drr->drr_u.drr_end.drr_toguid = dsp->dsa_toguid;
688127668Sbms
689127668Sbms	if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) {
690127668Sbms		err = dsp->dsa_err;
691127668Sbms		goto out;
692127668Sbms	}
693127668Sbms
694127668Sbmsout:
695127668Sbms	mutex_enter(&ds->ds_sendstream_lock);
696127668Sbms	list_remove(&ds->ds_sendstreams, dsp);
697127668Sbms	mutex_exit(&ds->ds_sendstream_lock);
69817680Spst
699127668Sbms	kmem_free(drr, sizeof (dmu_replay_record_t));
700127668Sbms	kmem_free(dsp, sizeof (dmu_sendarg_t));
70117680Spst
702127668Sbms	dsl_dataset_long_rele(ds, FTAG);
703127668Sbms
704127668Sbms	return (err);
70517680Spst}
70617680Spst
70717680Spstint
70817680Spstdmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap,
709127668Sbms    boolean_t embedok, boolean_t large_block_ok,
710127668Sbms#ifdef illumos
711127668Sbms    int outfd, vnode_t *vp, offset_t *off)
712127668Sbms#else
713127668Sbms    int outfd, struct file *fp, offset_t *off)
714127668Sbms#endif
715127668Sbms{
716127668Sbms	dsl_pool_t *dp;
717127668Sbms	dsl_dataset_t *ds;
718127668Sbms	dsl_dataset_t *fromds = NULL;
719127668Sbms	int err;
720127668Sbms
72117680Spst	err = dsl_pool_hold(pool, FTAG, &dp);
72217680Spst	if (err != 0)
72317680Spst		return (err);
72417680Spst
72517680Spst	err = dsl_dataset_hold_obj(dp, tosnap, FTAG, &ds);
72617680Spst	if (err != 0) {
72717680Spst		dsl_pool_rele(dp, FTAG);
72817680Spst		return (err);
72917680Spst	}
730127668Sbms
731127668Sbms	if (fromsnap != 0) {
73217680Spst		zfs_bookmark_phys_t zb;
733127668Sbms		boolean_t is_clone;
734127668Sbms
73517680Spst		err = dsl_dataset_hold_obj(dp, fromsnap, FTAG, &fromds);
73617680Spst		if (err != 0) {
737127668Sbms			dsl_dataset_rele(ds, FTAG);
738127668Sbms			dsl_pool_rele(dp, FTAG);
739127668Sbms			return (err);
740127668Sbms		}
741127668Sbms		if (!dsl_dataset_is_before(ds, fromds, 0))
742127668Sbms			err = SET_ERROR(EXDEV);
743127668Sbms		zb.zbm_creation_time =
744127668Sbms		    dsl_dataset_phys(fromds)->ds_creation_time;
745127668Sbms		zb.zbm_creation_txg = dsl_dataset_phys(fromds)->ds_creation_txg;
746127668Sbms		zb.zbm_guid = dsl_dataset_phys(fromds)->ds_guid;
747127668Sbms		is_clone = (fromds->ds_dir != ds->ds_dir);
748127668Sbms		dsl_dataset_rele(fromds, FTAG);
749127668Sbms		err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone,
750127668Sbms		    embedok, large_block_ok, outfd, fp, off);
751127668Sbms	} else {
752127668Sbms		err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE,
753127668Sbms		    embedok, large_block_ok, outfd, fp, off);
754127668Sbms	}
755127668Sbms	dsl_dataset_rele(ds, FTAG);
756127668Sbms	return (err);
757127668Sbms}
758127668Sbms
759127668Sbmsint
760127668Sbmsdmu_send(const char *tosnap, const char *fromsnap,
761127668Sbms    boolean_t embedok, boolean_t large_block_ok,
762127668Sbms#ifdef illumos
763127668Sbms    int outfd, vnode_t *vp, offset_t *off)
764127668Sbms#else
765127668Sbms    int outfd, struct file *fp, offset_t *off)
766127668Sbms#endif
767127668Sbms{
768127668Sbms	dsl_pool_t *dp;
769127668Sbms	dsl_dataset_t *ds;
770127668Sbms	int err;
771127668Sbms	boolean_t owned = B_FALSE;
772127668Sbms
77317680Spst	if (fromsnap != NULL && strpbrk(fromsnap, "@#") == NULL)
774127668Sbms		return (SET_ERROR(EINVAL));
775127668Sbms
776127668Sbms	err = dsl_pool_hold(tosnap, FTAG, &dp);
777127668Sbms	if (err != 0)
778127668Sbms		return (err);
779127668Sbms
780127668Sbms	if (strchr(tosnap, '@') == NULL && spa_writeable(dp->dp_spa)) {
781127668Sbms		/*
782127668Sbms		 * We are sending a filesystem or volume.  Ensure
783127668Sbms		 * that it doesn't change by owning the dataset.
784127668Sbms		 */
785127668Sbms		err = dsl_dataset_own(dp, tosnap, FTAG, &ds);
786127668Sbms		owned = B_TRUE;
787127668Sbms	} else {
788127668Sbms		err = dsl_dataset_hold(dp, tosnap, FTAG, &ds);
789127668Sbms	}
790127668Sbms	if (err != 0) {
791127668Sbms		dsl_pool_rele(dp, FTAG);
792127668Sbms		return (err);
793127668Sbms	}
794127668Sbms
795127668Sbms	if (fromsnap != NULL) {
796127668Sbms		zfs_bookmark_phys_t zb;
797127668Sbms		boolean_t is_clone = B_FALSE;
798127668Sbms		int fsnamelen = strchr(tosnap, '@') - tosnap;
799127668Sbms
800127668Sbms		/*
801127668Sbms		 * If the fromsnap is in a different filesystem, then
802127668Sbms		 * mark the send stream as a clone.
803127668Sbms		 */
804127668Sbms		if (strncmp(tosnap, fromsnap, fsnamelen) != 0 ||
805127668Sbms		    (fromsnap[fsnamelen] != '@' &&
806127668Sbms		    fromsnap[fsnamelen] != '#')) {
807127668Sbms			is_clone = B_TRUE;
808127668Sbms		}
809127668Sbms
810127668Sbms		if (strchr(fromsnap, '@')) {
811127668Sbms			dsl_dataset_t *fromds;
812127668Sbms			err = dsl_dataset_hold(dp, fromsnap, FTAG, &fromds);
813127668Sbms			if (err == 0) {
814127668Sbms				if (!dsl_dataset_is_before(ds, fromds, 0))
815127668Sbms					err = SET_ERROR(EXDEV);
816127668Sbms				zb.zbm_creation_time =
817127668Sbms				    dsl_dataset_phys(fromds)->ds_creation_time;
818127668Sbms				zb.zbm_creation_txg =
819127668Sbms				    dsl_dataset_phys(fromds)->ds_creation_txg;
820127668Sbms				zb.zbm_guid = dsl_dataset_phys(fromds)->ds_guid;
821127668Sbms				is_clone = (ds->ds_dir != fromds->ds_dir);
822127668Sbms				dsl_dataset_rele(fromds, FTAG);
823127668Sbms			}
824127668Sbms		} else {
825127668Sbms			err = dsl_bookmark_lookup(dp, fromsnap, ds, &zb);
826127668Sbms		}
827127668Sbms		if (err != 0) {
828127668Sbms			dsl_dataset_rele(ds, FTAG);
829127668Sbms			dsl_pool_rele(dp, FTAG);
830127668Sbms			return (err);
831127668Sbms		}
832127668Sbms		err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone,
833127668Sbms		    embedok, large_block_ok, outfd, fp, off);
834127668Sbms	} else {
835127668Sbms		err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE,
836127668Sbms		    embedok, large_block_ok, outfd, fp, off);
837127668Sbms	}
838127668Sbms	if (owned)
839127668Sbms		dsl_dataset_disown(ds, FTAG);
840127668Sbms	else
841127668Sbms		dsl_dataset_rele(ds, FTAG);
842127668Sbms	return (err);
843127668Sbms}
844127668Sbms
845127668Sbmsint
846127668Sbmsdmu_send_estimate(dsl_dataset_t *ds, dsl_dataset_t *fromds, uint64_t *sizep)
847127668Sbms{
848127668Sbms	dsl_pool_t *dp = ds->ds_dir->dd_pool;
849127668Sbms	int err;
850127668Sbms	uint64_t size;
851127668Sbms
852127668Sbms	ASSERT(dsl_pool_config_held(dp));
853127668Sbms
854127668Sbms	/* tosnap must be a snapshot */
855127668Sbms	if (!dsl_dataset_is_snapshot(ds))
856127668Sbms		return (SET_ERROR(EINVAL));
857127668Sbms
858127668Sbms	/*
859127668Sbms	 * fromsnap must be an earlier snapshot from the same fs as tosnap,
860127668Sbms	 * or the origin's fs.
861127668Sbms	 */
862127668Sbms	if (fromds != NULL && !dsl_dataset_is_before(ds, fromds, 0))
863127668Sbms		return (SET_ERROR(EXDEV));
864127668Sbms
865127668Sbms	/* Get uncompressed size estimate of changed data. */
866127668Sbms	if (fromds == NULL) {
867127668Sbms		size = dsl_dataset_phys(ds)->ds_uncompressed_bytes;
868127668Sbms	} else {
869127668Sbms		uint64_t used, comp;
870127668Sbms		err = dsl_dataset_space_written(fromds, ds,
871127668Sbms		    &used, &comp, &size);
872127668Sbms		if (err != 0)
873127668Sbms			return (err);
874127668Sbms	}
875127668Sbms
876127668Sbms	/*
877127668Sbms	 * Assume that space (both on-disk and in-stream) is dominated by
878127668Sbms	 * data.  We will adjust for indirect blocks and the copies property,
879127668Sbms	 * but ignore per-object space used (eg, dnodes and DRR_OBJECT records).
880127668Sbms	 */
881127668Sbms
882127668Sbms	/*
883127668Sbms	 * Subtract out approximate space used by indirect blocks.
884127668Sbms	 * Assume most space is used by data blocks (non-indirect, non-dnode).
885127668Sbms	 * Assume all blocks are recordsize.  Assume ditto blocks and
886127668Sbms	 * internal fragmentation counter out compression.
887127668Sbms	 *
888127668Sbms	 * Therefore, space used by indirect blocks is sizeof(blkptr_t) per
889127668Sbms	 * block, which we observe in practice.
890127668Sbms	 */
891127668Sbms	uint64_t recordsize;
892127668Sbms	err = dsl_prop_get_int_ds(ds, "recordsize", &recordsize);
893127668Sbms	if (err != 0)
894127668Sbms		return (err);
895127668Sbms	size -= size / recordsize * sizeof (blkptr_t);
896127668Sbms
897127668Sbms	/* Add in the space for the record associated with each block. */
898127668Sbms	size += size / recordsize * sizeof (dmu_replay_record_t);
899127668Sbms
900127668Sbms	*sizep = size;
901127668Sbms
902127668Sbms	return (0);
903127668Sbms}
904127668Sbms
905127668Sbmstypedef struct dmu_recv_begin_arg {
906127668Sbms	const char *drba_origin;
907127668Sbms	dmu_recv_cookie_t *drba_cookie;
908127668Sbms	cred_t *drba_cred;
909127668Sbms	uint64_t drba_snapobj;
910127668Sbms} dmu_recv_begin_arg_t;
911127668Sbms
912127668Sbmsstatic int
913127668Sbmsrecv_begin_check_existing_impl(dmu_recv_begin_arg_t *drba, dsl_dataset_t *ds,
914127668Sbms    uint64_t fromguid)
915127668Sbms{
916127668Sbms	uint64_t val;
917127668Sbms	int error;
918127668Sbms	dsl_pool_t *dp = ds->ds_dir->dd_pool;
919127668Sbms
920127668Sbms	/* temporary clone name must not exist */
921127668Sbms	error = zap_lookup(dp->dp_meta_objset,
922127668Sbms	    dsl_dir_phys(ds->ds_dir)->dd_child_dir_zapobj, recv_clone_name,
923127668Sbms	    8, 1, &val);
924127668Sbms	if (error != ENOENT)
925127668Sbms		return (error == 0 ? EBUSY : error);
926127668Sbms
927127668Sbms	/* new snapshot name must not exist */
928127668Sbms	error = zap_lookup(dp->dp_meta_objset,
929127668Sbms	    dsl_dataset_phys(ds)->ds_snapnames_zapobj,
930127668Sbms	    drba->drba_cookie->drc_tosnap, 8, 1, &val);
931127668Sbms	if (error != ENOENT)
932127668Sbms		return (error == 0 ? EEXIST : error);
933127668Sbms
934127668Sbms	/*
935127668Sbms	 * Check snapshot limit before receiving. We'll recheck again at the
936127668Sbms	 * end, but might as well abort before receiving if we're already over
937127668Sbms	 * the limit.
938127668Sbms	 *
939127668Sbms	 * Note that we do not check the file system limit with
940127668Sbms	 * dsl_dir_fscount_check because the temporary %clones don't count
941127668Sbms	 * against that limit.
942127668Sbms	 */
943127668Sbms	error = dsl_fs_ss_limit_check(ds->ds_dir, 1, ZFS_PROP_SNAPSHOT_LIMIT,
944127668Sbms	    NULL, drba->drba_cred);
945127668Sbms	if (error != 0)
946127668Sbms		return (error);
947127668Sbms
948127668Sbms	if (fromguid != 0) {
949127668Sbms		dsl_dataset_t *snap;
950127668Sbms		uint64_t obj = dsl_dataset_phys(ds)->ds_prev_snap_obj;
951127668Sbms
952127668Sbms		/* Find snapshot in this dir that matches fromguid. */
953127668Sbms		while (obj != 0) {
954127668Sbms			error = dsl_dataset_hold_obj(dp, obj, FTAG,
955127668Sbms			    &snap);
956127668Sbms			if (error != 0)
957127668Sbms				return (SET_ERROR(ENODEV));
958127668Sbms			if (snap->ds_dir != ds->ds_dir) {
959127668Sbms				dsl_dataset_rele(snap, FTAG);
960127668Sbms				return (SET_ERROR(ENODEV));
961127668Sbms			}
962127668Sbms			if (dsl_dataset_phys(snap)->ds_guid == fromguid)
963127668Sbms				break;
964127668Sbms			obj = dsl_dataset_phys(snap)->ds_prev_snap_obj;
965127668Sbms			dsl_dataset_rele(snap, FTAG);
966127668Sbms		}
967127668Sbms		if (obj == 0)
968127668Sbms			return (SET_ERROR(ENODEV));
969127668Sbms
970127668Sbms		if (drba->drba_cookie->drc_force) {
971127668Sbms			drba->drba_snapobj = obj;
972127668Sbms		} else {
973127668Sbms			/*
974127668Sbms			 * If we are not forcing, there must be no
975127668Sbms			 * changes since fromsnap.
976127668Sbms			 */
977127668Sbms			if (dsl_dataset_modified_since_snap(ds, snap)) {
978127668Sbms				dsl_dataset_rele(snap, FTAG);
979127668Sbms				return (SET_ERROR(ETXTBSY));
980127668Sbms			}
981127668Sbms			drba->drba_snapobj = ds->ds_prev->ds_object;
982127668Sbms		}
983127668Sbms
984127668Sbms		dsl_dataset_rele(snap, FTAG);
985127668Sbms	} else {
986127668Sbms		/* if full, most recent snapshot must be $ORIGIN */
987127668Sbms		if (dsl_dataset_phys(ds)->ds_prev_snap_txg >= TXG_INITIAL)
988127668Sbms			return (SET_ERROR(ENODEV));
989127668Sbms		drba->drba_snapobj = dsl_dataset_phys(ds)->ds_prev_snap_obj;
990127668Sbms	}
991127668Sbms
992127668Sbms	return (0);
993127668Sbms
994127668Sbms}
995127668Sbms
996127668Sbmsstatic int
997127668Sbmsdmu_recv_begin_check(void *arg, dmu_tx_t *tx)
998127668Sbms{
999127668Sbms	dmu_recv_begin_arg_t *drba = arg;
1000127668Sbms	dsl_pool_t *dp = dmu_tx_pool(tx);
1001127668Sbms	struct drr_begin *drrb = drba->drba_cookie->drc_drrb;
1002127668Sbms	uint64_t fromguid = drrb->drr_fromguid;
1003127668Sbms	int flags = drrb->drr_flags;
1004127668Sbms	int error;
1005127668Sbms	uint64_t featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
1006127668Sbms	dsl_dataset_t *ds;
1007127668Sbms	const char *tofs = drba->drba_cookie->drc_tofs;
1008127668Sbms
1009127668Sbms	/* already checked */
1010127668Sbms	ASSERT3U(drrb->drr_magic, ==, DMU_BACKUP_MAGIC);
1011127668Sbms
1012127668Sbms	if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
1013127668Sbms	    DMU_COMPOUNDSTREAM ||
1014127668Sbms	    drrb->drr_type >= DMU_OST_NUMTYPES ||
1015127668Sbms	    ((flags & DRR_FLAG_CLONE) && drba->drba_origin == NULL))
1016127668Sbms		return (SET_ERROR(EINVAL));
1017127668Sbms
1018127668Sbms	/* Verify pool version supports SA if SA_SPILL feature set */
1019127668Sbms	if ((featureflags & DMU_BACKUP_FEATURE_SA_SPILL) &&
1020127668Sbms	    spa_version(dp->dp_spa) < SPA_VERSION_SA)
1021127668Sbms		return (SET_ERROR(ENOTSUP));
1022127668Sbms
1023127668Sbms	/*
1024127668Sbms	 * The receiving code doesn't know how to translate a WRITE_EMBEDDED
1025127668Sbms	 * record to a plan WRITE record, so the pool must have the
1026127668Sbms	 * EMBEDDED_DATA feature enabled if the stream has WRITE_EMBEDDED
1027127668Sbms	 * records.  Same with WRITE_EMBEDDED records that use LZ4 compression.
1028127668Sbms	 */
1029127668Sbms	if ((featureflags & DMU_BACKUP_FEATURE_EMBED_DATA) &&
1030127668Sbms	    !spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_EMBEDDED_DATA))
1031127668Sbms		return (SET_ERROR(ENOTSUP));
1032127668Sbms	if ((featureflags & DMU_BACKUP_FEATURE_EMBED_DATA_LZ4) &&
1033127668Sbms	    !spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_LZ4_COMPRESS))
1034127668Sbms		return (SET_ERROR(ENOTSUP));
1035127668Sbms
1036127668Sbms	/*
1037127668Sbms	 * The receiving code doesn't know how to translate large blocks
1038127668Sbms	 * to smaller ones, so the pool must have the LARGE_BLOCKS
1039127668Sbms	 * feature enabled if the stream has LARGE_BLOCKS.
1040127668Sbms	 */
1041127668Sbms	if ((featureflags & DMU_BACKUP_FEATURE_LARGE_BLOCKS) &&
1042127668Sbms	    !spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_LARGE_BLOCKS))
1043127668Sbms		return (SET_ERROR(ENOTSUP));
1044127668Sbms
1045127668Sbms	error = dsl_dataset_hold(dp, tofs, FTAG, &ds);
1046127668Sbms	if (error == 0) {
1047127668Sbms		/* target fs already exists; recv into temp clone */
1048127668Sbms
1049127668Sbms		/* Can't recv a clone into an existing fs */
1050127668Sbms		if (flags & DRR_FLAG_CLONE) {
1051127668Sbms			dsl_dataset_rele(ds, FTAG);
1052127668Sbms			return (SET_ERROR(EINVAL));
1053127668Sbms		}
1054127668Sbms
1055127668Sbms		error = recv_begin_check_existing_impl(drba, ds, fromguid);
1056127668Sbms		dsl_dataset_rele(ds, FTAG);
1057127668Sbms	} else if (error == ENOENT) {
1058127668Sbms		/* target fs does not exist; must be a full backup or clone */
1059127668Sbms		char buf[MAXNAMELEN];
1060127668Sbms
1061127668Sbms		/*
1062127668Sbms		 * If it's a non-clone incremental, we are missing the
1063127668Sbms		 * target fs, so fail the recv.
1064127668Sbms		 */
1065127668Sbms		if (fromguid != 0 && !(flags & DRR_FLAG_CLONE))
1066127668Sbms			return (SET_ERROR(ENOENT));
1067127668Sbms
1068127668Sbms		/* Open the parent of tofs */
1069127668Sbms		ASSERT3U(strlen(tofs), <, MAXNAMELEN);
1070127668Sbms		(void) strlcpy(buf, tofs, strrchr(tofs, '/') - tofs + 1);
1071127668Sbms		error = dsl_dataset_hold(dp, buf, FTAG, &ds);
1072127668Sbms		if (error != 0)
1073127668Sbms			return (error);
1074127668Sbms
1075127668Sbms		/*
1076127668Sbms		 * Check filesystem and snapshot limits before receiving. We'll
1077127668Sbms		 * recheck snapshot limits again at the end (we create the
1078127668Sbms		 * filesystems and increment those counts during begin_sync).
1079127668Sbms		 */
1080127668Sbms		error = dsl_fs_ss_limit_check(ds->ds_dir, 1,
1081127668Sbms		    ZFS_PROP_FILESYSTEM_LIMIT, NULL, drba->drba_cred);
1082127668Sbms		if (error != 0) {
1083127668Sbms			dsl_dataset_rele(ds, FTAG);
1084127668Sbms			return (error);
1085127668Sbms		}
1086127668Sbms
1087127668Sbms		error = dsl_fs_ss_limit_check(ds->ds_dir, 1,
1088127668Sbms		    ZFS_PROP_SNAPSHOT_LIMIT, NULL, drba->drba_cred);
1089127668Sbms		if (error != 0) {
1090127668Sbms			dsl_dataset_rele(ds, FTAG);
1091127668Sbms			return (error);
1092127668Sbms		}
1093127668Sbms
1094127668Sbms		if (drba->drba_origin != NULL) {
1095127668Sbms			dsl_dataset_t *origin;
1096127668Sbms			error = dsl_dataset_hold(dp, drba->drba_origin,
1097127668Sbms			    FTAG, &origin);
1098127668Sbms			if (error != 0) {
1099127668Sbms				dsl_dataset_rele(ds, FTAG);
1100127668Sbms				return (error);
1101127668Sbms			}
1102127668Sbms			if (!dsl_dataset_is_snapshot(origin)) {
1103127668Sbms				dsl_dataset_rele(origin, FTAG);
1104127668Sbms				dsl_dataset_rele(ds, FTAG);
1105127668Sbms				return (SET_ERROR(EINVAL));
1106127668Sbms			}
1107127668Sbms			if (dsl_dataset_phys(origin)->ds_guid != fromguid) {
1108127668Sbms				dsl_dataset_rele(origin, FTAG);
1109127668Sbms				dsl_dataset_rele(ds, FTAG);
1110127668Sbms				return (SET_ERROR(ENODEV));
1111127668Sbms			}
1112127668Sbms			dsl_dataset_rele(origin, FTAG);
1113127668Sbms		}
1114127668Sbms		dsl_dataset_rele(ds, FTAG);
1115127668Sbms		error = 0;
1116127668Sbms	}
1117127668Sbms	return (error);
1118127668Sbms}
1119127668Sbms
1120127668Sbmsstatic void
1121127668Sbmsdmu_recv_begin_sync(void *arg, dmu_tx_t *tx)
1122127668Sbms{
1123127668Sbms	dmu_recv_begin_arg_t *drba = arg;
1124127668Sbms	dsl_pool_t *dp = dmu_tx_pool(tx);
1125127668Sbms	struct drr_begin *drrb = drba->drba_cookie->drc_drrb;
1126127668Sbms	const char *tofs = drba->drba_cookie->drc_tofs;
1127127668Sbms	dsl_dataset_t *ds, *newds;
1128127668Sbms	uint64_t dsobj;
1129127668Sbms	int error;
1130127668Sbms	uint64_t crflags;
1131127668Sbms
1132127668Sbms	crflags = (drrb->drr_flags & DRR_FLAG_CI_DATA) ?
1133127668Sbms	    DS_FLAG_CI_DATASET : 0;
1134127668Sbms
1135127668Sbms	error = dsl_dataset_hold(dp, tofs, FTAG, &ds);
1136127668Sbms	if (error == 0) {
1137127668Sbms		/* create temporary clone */
1138127668Sbms		dsl_dataset_t *snap = NULL;
1139127668Sbms		if (drba->drba_snapobj != 0) {
1140127668Sbms			VERIFY0(dsl_dataset_hold_obj(dp,
1141127668Sbms			    drba->drba_snapobj, FTAG, &snap));
1142127668Sbms		}
1143127668Sbms		dsobj = dsl_dataset_create_sync(ds->ds_dir, recv_clone_name,
1144127668Sbms		    snap, crflags, drba->drba_cred, tx);
1145127668Sbms		dsl_dataset_rele(snap, FTAG);
1146127668Sbms		dsl_dataset_rele(ds, FTAG);
1147127668Sbms	} else {
1148127668Sbms		dsl_dir_t *dd;
1149127668Sbms		const char *tail;
1150127668Sbms		dsl_dataset_t *origin = NULL;
1151127668Sbms
1152127668Sbms		VERIFY0(dsl_dir_hold(dp, tofs, FTAG, &dd, &tail));
1153127668Sbms
1154127668Sbms		if (drba->drba_origin != NULL) {
1155127668Sbms			VERIFY0(dsl_dataset_hold(dp, drba->drba_origin,
1156127668Sbms			    FTAG, &origin));
1157127668Sbms		}
1158127668Sbms
1159127668Sbms		/* Create new dataset. */
1160127668Sbms		dsobj = dsl_dataset_create_sync(dd,
1161127668Sbms		    strrchr(tofs, '/') + 1,
1162127668Sbms		    origin, crflags, drba->drba_cred, tx);
1163127668Sbms		if (origin != NULL)
116417680Spst			dsl_dataset_rele(origin, FTAG);
116517680Spst		dsl_dir_rele(dd, FTAG);
116617680Spst		drba->drba_cookie->drc_newfs = B_TRUE;
116717680Spst	}
116817680Spst	VERIFY0(dsl_dataset_own_obj(dp, dsobj, dmu_recv_tag, &newds));
116917680Spst
117017680Spst	if ((DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
117117680Spst	    DMU_BACKUP_FEATURE_LARGE_BLOCKS) &&
117217680Spst	    !newds->ds_large_blocks) {
117317680Spst		dsl_dataset_activate_large_blocks_sync_impl(dsobj, tx);
1174127668Sbms		newds->ds_large_blocks = B_TRUE;
1175127668Sbms	}
1176127668Sbms
117717680Spst	dmu_buf_will_dirty(newds->ds_dbuf, tx);
117817680Spst	dsl_dataset_phys(newds)->ds_flags |= DS_FLAG_INCONSISTENT;
117917680Spst
118017680Spst	/*
118117680Spst	 * If we actually created a non-clone, we need to create the
1182127668Sbms	 * objset in our new dataset.
1183127668Sbms	 */
1184127668Sbms	if (BP_IS_HOLE(dsl_dataset_get_blkptr(newds))) {
1185127668Sbms		(void) dmu_objset_create_impl(dp->dp_spa,
1186127668Sbms		    newds, dsl_dataset_get_blkptr(newds), drrb->drr_type, tx);
1187127668Sbms	}
1188127668Sbms
1189127668Sbms	drba->drba_cookie->drc_ds = newds;
1190127668Sbms
1191127668Sbms	spa_history_log_internal_ds(newds, "receive", tx, "");
119217680Spst}
1193127668Sbms
1194127668Sbms/*
1195127668Sbms * NB: callers *MUST* call dmu_recv_stream() if dmu_recv_begin()
119617680Spst * succeeds; otherwise we will leak the holds on the datasets.
119717680Spst */
1198127668Sbmsint
1199127668Sbmsdmu_recv_begin(char *tofs, char *tosnap, struct drr_begin *drrb,
1200127668Sbms    boolean_t force, char *origin, dmu_recv_cookie_t *drc)
1201127668Sbms{
1202127668Sbms	dmu_recv_begin_arg_t drba = { 0 };
1203127668Sbms	dmu_replay_record_t *drr;
1204127668Sbms
1205127668Sbms	bzero(drc, sizeof (dmu_recv_cookie_t));
1206127668Sbms	drc->drc_drrb = drrb;
1207127668Sbms	drc->drc_tosnap = tosnap;
1208127668Sbms	drc->drc_tofs = tofs;
1209127668Sbms	drc->drc_force = force;
1210127668Sbms	drc->drc_cred = CRED();
1211127668Sbms
1212127668Sbms	if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC))
1213127668Sbms		drc->drc_byteswap = B_TRUE;
1214127668Sbms	else if (drrb->drr_magic != DMU_BACKUP_MAGIC)
1215127668Sbms		return (SET_ERROR(EINVAL));
1216127668Sbms
1217127668Sbms	drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP);
1218127668Sbms	drr->drr_type = DRR_BEGIN;
1219127668Sbms	drr->drr_u.drr_begin = *drc->drc_drrb;
1220127668Sbms	if (drc->drc_byteswap) {
1221127668Sbms		fletcher_4_incremental_byteswap(drr,
1222127668Sbms		    sizeof (dmu_replay_record_t), &drc->drc_cksum);
1223127668Sbms	} else {
1224127668Sbms		fletcher_4_incremental_native(drr,
1225127668Sbms		    sizeof (dmu_replay_record_t), &drc->drc_cksum);
1226127668Sbms	}
1227127668Sbms	kmem_free(drr, sizeof (dmu_replay_record_t));
1228127668Sbms
1229127668Sbms	if (drc->drc_byteswap) {
1230127668Sbms		drrb->drr_magic = BSWAP_64(drrb->drr_magic);
1231127668Sbms		drrb->drr_versioninfo = BSWAP_64(drrb->drr_versioninfo);
1232127668Sbms		drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time);
1233127668Sbms		drrb->drr_type = BSWAP_32(drrb->drr_type);
1234127668Sbms		drrb->drr_toguid = BSWAP_64(drrb->drr_toguid);
1235127668Sbms		drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid);
1236127668Sbms	}
1237127668Sbms
1238127668Sbms	drba.drba_origin = origin;
1239127668Sbms	drba.drba_cookie = drc;
1240127668Sbms	drba.drba_cred = CRED();
1241127668Sbms
1242127668Sbms	return (dsl_sync_task(tofs, dmu_recv_begin_check, dmu_recv_begin_sync,
1243127668Sbms	    &drba, 5, ZFS_SPACE_CHECK_NORMAL));
1244127668Sbms}
1245127668Sbms
1246127668Sbmsstruct restorearg {
1247127668Sbms	int err;
1248127668Sbms	boolean_t byteswap;
124917680Spst	kthread_t *td;
125017680Spst	struct file *fp;
1251127668Sbms	char *buf;
1252127668Sbms	uint64_t voff;
1253127668Sbms	int bufsize; /* amount of memory allocated for buf */
125417680Spst	zio_cksum_t cksum;
125517680Spst	avl_tree_t *guid_to_ds_map;
125617680Spst};
125717680Spst
1258127668Sbmstypedef struct guid_map_entry {
1259127668Sbms	uint64_t	guid;
1260127668Sbms	dsl_dataset_t	*gme_ds;
1261127668Sbms	avl_node_t	avlnode;
1262127668Sbms} guid_map_entry_t;
1263127668Sbms
1264127668Sbmsstatic int
1265127668Sbmsguid_compare(const void *arg1, const void *arg2)
1266127668Sbms{
1267127668Sbms	const guid_map_entry_t *gmep1 = arg1;
1268127668Sbms	const guid_map_entry_t *gmep2 = arg2;
1269127668Sbms
1270127668Sbms	if (gmep1->guid < gmep2->guid)
1271127668Sbms		return (-1);
1272127668Sbms	else if (gmep1->guid > gmep2->guid)
1273127668Sbms		return (1);
127417680Spst	return (0);
127517680Spst}
127617680Spst
127717680Spststatic void
127817680Spstfree_guid_map_onexit(void *arg)
127917680Spst{
128017680Spst	avl_tree_t *ca = arg;
128117680Spst	void *cookie = NULL;
128217680Spst	guid_map_entry_t *gmep;
128317680Spst
1284127668Sbms	while ((gmep = avl_destroy_nodes(ca, &cookie)) != NULL) {
1285127668Sbms		dsl_dataset_long_rele(gmep->gme_ds, gmep);
1286127668Sbms		dsl_dataset_rele(gmep->gme_ds, gmep);
1287127668Sbms		kmem_free(gmep, sizeof (guid_map_entry_t));
128817680Spst	}
128917680Spst	avl_destroy(ca);
129017680Spst	kmem_free(ca, sizeof (avl_tree_t));
1291127668Sbms}
1292127668Sbms
1293127668Sbmsstatic int
129417680Spstrestore_bytes(struct restorearg *ra, void *buf, int len, off_t off, ssize_t *resid)
1295127668Sbms{
1296127668Sbms	struct uio auio;
1297127668Sbms	struct iovec aiov;
129817680Spst	int error;
1299127668Sbms
1300127668Sbms	aiov.iov_base = buf;
1301127668Sbms	aiov.iov_len = len;
1302127668Sbms	auio.uio_iov = &aiov;
1303127668Sbms	auio.uio_iovcnt = 1;
130417680Spst	auio.uio_resid = len;
1305127668Sbms	auio.uio_segflg = UIO_SYSSPACE;
1306127668Sbms	auio.uio_rw = UIO_READ;
1307127668Sbms	auio.uio_offset = off;
1308127668Sbms	auio.uio_td = ra->td;
1309127668Sbms#ifdef _KERNEL
1310127668Sbms	error = fo_read(ra->fp, &auio, ra->td->td_ucred, FOF_OFFSET, ra->td);
1311127668Sbms#else
1312127668Sbms	fprintf(stderr, "%s: returning EOPNOTSUPP\n", __func__);
1313127668Sbms	error = EOPNOTSUPP;
1314127668Sbms#endif
1315127668Sbms	*resid = auio.uio_resid;
1316127668Sbms	return (error);
1317127668Sbms}
1318127668Sbms
1319127668Sbmsstatic void *
1320127668Sbmsrestore_read(struct restorearg *ra, int len, char *buf)
1321127668Sbms{
132217680Spst	int done = 0;
1323127668Sbms
1324127668Sbms	if (buf == NULL)
1325127668Sbms		buf = ra->buf;
1326127668Sbms
1327127668Sbms	/* some things will require 8-byte alignment, so everything must */
1328127668Sbms	ASSERT0(len % 8);
132917680Spst	ASSERT3U(len, <=, ra->bufsize);
133017680Spst
1331127668Sbms	while (done < len) {
1332127668Sbms		ssize_t resid;
1333127668Sbms
1334127668Sbms		ra->err = restore_bytes(ra, buf + done,
1335127668Sbms		    len - done, ra->voff, &resid);
1336127668Sbms
1337127668Sbms		if (resid == len - done)
1338127668Sbms			ra->err = SET_ERROR(EINVAL);
1339127668Sbms		ra->voff += len - done - resid;
1340127668Sbms		done = len - resid;
1341127668Sbms		if (ra->err != 0)
1342127668Sbms			return (NULL);
134317680Spst	}
1344127668Sbms
1345127668Sbms	ASSERT3U(done, ==, len);
1346127668Sbms	if (ra->byteswap)
1347127668Sbms		fletcher_4_incremental_byteswap(buf, len, &ra->cksum);
1348127668Sbms	else
1349127668Sbms		fletcher_4_incremental_native(buf, len, &ra->cksum);
1350127668Sbms	return (buf);
135117680Spst}
135217680Spst
135317680Spststatic void
1354127668Sbmsbackup_byteswap(dmu_replay_record_t *drr)
1355127668Sbms{
1356127668Sbms#define	DO64(X) (drr->drr_u.X = BSWAP_64(drr->drr_u.X))
1357127668Sbms#define	DO32(X) (drr->drr_u.X = BSWAP_32(drr->drr_u.X))
1358127668Sbms	drr->drr_type = BSWAP_32(drr->drr_type);
1359127668Sbms	drr->drr_payloadlen = BSWAP_32(drr->drr_payloadlen);
136039297Sfenner	switch (drr->drr_type) {
136139297Sfenner	case DRR_BEGIN:
136239297Sfenner		DO64(drr_begin.drr_magic);
1363127668Sbms		DO64(drr_begin.drr_versioninfo);
136439297Sfenner		DO64(drr_begin.drr_creation_time);
136517680Spst		DO32(drr_begin.drr_type);
136639297Sfenner		DO32(drr_begin.drr_flags);
136756893Sfenner		DO64(drr_begin.drr_toguid);
136839297Sfenner		DO64(drr_begin.drr_fromguid);
136926180Sfenner		break;
137026180Sfenner	case DRR_OBJECT:
137126180Sfenner		DO64(drr_object.drr_object);
137217680Spst		DO32(drr_object.drr_type);
137398524Sfenner		DO32(drr_object.drr_bonustype);
137417680Spst		DO32(drr_object.drr_blksz);
137517680Spst		DO32(drr_object.drr_bonuslen);
137617680Spst		DO64(drr_object.drr_toguid);
137717680Spst		break;
1378127668Sbms	case DRR_FREEOBJECTS:
1379127668Sbms		DO64(drr_freeobjects.drr_firstobj);
1380127668Sbms		DO64(drr_freeobjects.drr_numobjs);
1381127668Sbms		DO64(drr_freeobjects.drr_toguid);
138217680Spst		break;
138317680Spst	case DRR_WRITE:
138417680Spst		DO64(drr_write.drr_object);
138517680Spst		DO32(drr_write.drr_type);
1386127668Sbms		DO64(drr_write.drr_offset);
1387127668Sbms		DO64(drr_write.drr_length);
1388127668Sbms		DO64(drr_write.drr_toguid);
1389127668Sbms		DO64(drr_write.drr_key.ddk_cksum.zc_word[0]);
1390127668Sbms		DO64(drr_write.drr_key.ddk_cksum.zc_word[1]);
1391127668Sbms		DO64(drr_write.drr_key.ddk_cksum.zc_word[2]);
1392127668Sbms		DO64(drr_write.drr_key.ddk_cksum.zc_word[3]);
1393127668Sbms		DO64(drr_write.drr_key.ddk_prop);
1394127668Sbms		break;
1395127668Sbms	case DRR_WRITE_BYREF:
1396127668Sbms		DO64(drr_write_byref.drr_object);
1397127668Sbms		DO64(drr_write_byref.drr_offset);
1398127668Sbms		DO64(drr_write_byref.drr_length);
1399127668Sbms		DO64(drr_write_byref.drr_toguid);
140017680Spst		DO64(drr_write_byref.drr_refguid);
140117680Spst		DO64(drr_write_byref.drr_refobject);
140217680Spst		DO64(drr_write_byref.drr_refoffset);
1403127668Sbms		DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[0]);
140417680Spst		DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[1]);
1405127668Sbms		DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[2]);
1406127668Sbms		DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[3]);
140717680Spst		DO64(drr_write_byref.drr_key.ddk_prop);
1408127668Sbms		break;
1409127668Sbms	case DRR_WRITE_EMBEDDED:
141017680Spst		DO64(drr_write_embedded.drr_object);
141117680Spst		DO64(drr_write_embedded.drr_offset);
141217680Spst		DO64(drr_write_embedded.drr_length);
141317680Spst		DO64(drr_write_embedded.drr_toguid);
141417680Spst		DO32(drr_write_embedded.drr_lsize);
141517680Spst		DO32(drr_write_embedded.drr_psize);
141617680Spst		break;
141717680Spst	case DRR_FREE:
141817680Spst		DO64(drr_free.drr_object);
141939297Sfenner		DO64(drr_free.drr_offset);
142039297Sfenner		DO64(drr_free.drr_length);
142139297Sfenner		DO64(drr_free.drr_toguid);
142239297Sfenner		break;
1423127668Sbms	case DRR_SPILL:
1424127668Sbms		DO64(drr_spill.drr_object);
1425127668Sbms		DO64(drr_spill.drr_length);
1426127668Sbms		DO64(drr_spill.drr_toguid);
1427127668Sbms		break;
1428127668Sbms	case DRR_END:
1429127668Sbms		DO64(drr_end.drr_checksum.zc_word[0]);
1430127668Sbms		DO64(drr_end.drr_checksum.zc_word[1]);
1431127668Sbms		DO64(drr_end.drr_checksum.zc_word[2]);
1432127668Sbms		DO64(drr_end.drr_checksum.zc_word[3]);
1433127668Sbms		DO64(drr_end.drr_toguid);
1434127668Sbms		break;
1435127668Sbms	}
1436127668Sbms#undef DO64
1437127668Sbms#undef DO32
1438127668Sbms}
1439127668Sbms
1440127668Sbmsstatic inline uint8_t
1441127668Sbmsdeduce_nblkptr(dmu_object_type_t bonus_type, uint64_t bonus_size)
1442127668Sbms{
1443127668Sbms	if (bonus_type == DMU_OT_SA) {
1444127668Sbms		return (1);
1445127668Sbms	} else {
1446127668Sbms		return (1 +
1447127668Sbms		    ((DN_MAX_BONUSLEN - bonus_size) >> SPA_BLKPTRSHIFT));
1448127668Sbms	}
1449127668Sbms}
1450127668Sbms
1451127668Sbmsstatic int
1452127668Sbmsrestore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro)
1453127668Sbms{
1454127668Sbms	dmu_object_info_t doi;
1455127668Sbms	dmu_tx_t *tx;
1456127668Sbms	void *data = NULL;
1457127668Sbms	uint64_t object;
1458127668Sbms	int err;
1459127668Sbms
1460127668Sbms	if (drro->drr_type == DMU_OT_NONE ||
1461127668Sbms	    !DMU_OT_IS_VALID(drro->drr_type) ||
1462127668Sbms	    !DMU_OT_IS_VALID(drro->drr_bonustype) ||
1463127668Sbms	    drro->drr_checksumtype >= ZIO_CHECKSUM_FUNCTIONS ||
1464127668Sbms	    drro->drr_compress >= ZIO_COMPRESS_FUNCTIONS ||
1465127668Sbms	    P2PHASE(drro->drr_blksz, SPA_MINBLOCKSIZE) ||
1466127668Sbms	    drro->drr_blksz < SPA_MINBLOCKSIZE ||
1467127668Sbms	    drro->drr_blksz > spa_maxblocksize(dmu_objset_spa(os)) ||
146817680Spst	    drro->drr_bonuslen > DN_MAX_BONUSLEN) {
1469127668Sbms		return (SET_ERROR(EINVAL));
1470127668Sbms	}
1471127668Sbms
1472127668Sbms	err = dmu_object_info(os, drro->drr_object, &doi);
147317680Spst
1474127668Sbms	if (err != 0 && err != ENOENT)
1475127668Sbms		return (SET_ERROR(EINVAL));
1476127668Sbms	object = err == 0 ? drro->drr_object : DMU_NEW_OBJECT;
1477127668Sbms
1478127668Sbms	if (drro->drr_bonuslen) {
1479127668Sbms		data = restore_read(ra, P2ROUNDUP(drro->drr_bonuslen, 8), NULL);
1480127668Sbms		if (ra->err != 0)
1481127668Sbms			return (ra->err);
1482127668Sbms	}
1483127668Sbms
1484127668Sbms	/*
1485127668Sbms	 * If we are losing blkptrs or changing the block size this must
1486127668Sbms	 * be a new file instance.  We must clear out the previous file
1487127668Sbms	 * contents before we can change this type of metadata in the dnode.
1488127668Sbms	 */
1489127668Sbms	if (err == 0) {
1490127668Sbms		int nblkptr;
1491127668Sbms
1492127668Sbms		nblkptr = deduce_nblkptr(drro->drr_bonustype,
1493127668Sbms		    drro->drr_bonuslen);
1494127668Sbms
1495127668Sbms		if (drro->drr_blksz != doi.doi_data_block_size ||
1496127668Sbms		    nblkptr < doi.doi_nblkptr) {
1497127668Sbms			err = dmu_free_long_range(os, drro->drr_object,
1498127668Sbms			    0, DMU_OBJECT_END);
1499127668Sbms			if (err != 0)
1500127668Sbms				return (SET_ERROR(EINVAL));
1501127668Sbms		}
1502127668Sbms	}
1503127668Sbms
1504127668Sbms	tx = dmu_tx_create(os);
1505127668Sbms	dmu_tx_hold_bonus(tx, object);
1506127668Sbms	err = dmu_tx_assign(tx, TXG_WAIT);
1507127668Sbms	if (err != 0) {
1508127668Sbms		dmu_tx_abort(tx);
1509127668Sbms		return (err);
1510127668Sbms	}
1511127668Sbms
1512127668Sbms	if (object == DMU_NEW_OBJECT) {
1513127668Sbms		/* currently free, want to be allocated */
1514127668Sbms		err = dmu_object_claim(os, drro->drr_object,
1515127668Sbms		    drro->drr_type, drro->drr_blksz,
151617680Spst		    drro->drr_bonustype, drro->drr_bonuslen, tx);
151717680Spst	} else if (drro->drr_type != doi.doi_type ||
151817680Spst	    drro->drr_blksz != doi.doi_data_block_size ||
1519127668Sbms	    drro->drr_bonustype != doi.doi_bonus_type ||
1520127668Sbms	    drro->drr_bonuslen != doi.doi_bonus_size) {
1521127668Sbms		/* currently allocated, but with different properties */
1522127668Sbms		err = dmu_object_reclaim(os, drro->drr_object,
1523127668Sbms		    drro->drr_type, drro->drr_blksz,
1524127668Sbms		    drro->drr_bonustype, drro->drr_bonuslen, tx);
1525127668Sbms	}
1526127668Sbms	if (err != 0) {
1527127668Sbms		dmu_tx_commit(tx);
1528127668Sbms		return (SET_ERROR(EINVAL));
1529127668Sbms	}
1530127668Sbms
1531127668Sbms	dmu_object_set_checksum(os, drro->drr_object, drro->drr_checksumtype,
1532127668Sbms	    tx);
153317680Spst	dmu_object_set_compress(os, drro->drr_object, drro->drr_compress, tx);
153417680Spst
1535127668Sbms	if (data != NULL) {
153617680Spst		dmu_buf_t *db;
1537127668Sbms
1538127668Sbms		VERIFY(0 == dmu_bonus_hold(os, drro->drr_object, FTAG, &db));
153917680Spst		dmu_buf_will_dirty(db, tx);
1540127668Sbms
1541127668Sbms		ASSERT3U(db->db_size, >=, drro->drr_bonuslen);
154217680Spst		bcopy(data, db->db_data, drro->drr_bonuslen);
154317680Spst		if (ra->byteswap) {
1544127668Sbms			dmu_object_byteswap_t byteswap =
1545127668Sbms			    DMU_OT_BYTESWAP(drro->drr_bonustype);
1546127668Sbms			dmu_ot_byteswap[byteswap].ob_func(db->db_data,
1547127668Sbms			    drro->drr_bonuslen);
1548127668Sbms		}
1549127668Sbms		dmu_buf_rele(db, FTAG);
1550127668Sbms	}
1551127668Sbms	dmu_tx_commit(tx);
1552127668Sbms	return (0);
1553127668Sbms}
1554127668Sbms
1555127668Sbms/* ARGSUSED */
1556127668Sbmsstatic int
1557127668Sbmsrestore_freeobjects(struct restorearg *ra, objset_t *os,
1558127668Sbms    struct drr_freeobjects *drrfo)
1559127668Sbms{
1560127668Sbms	uint64_t obj;
1561127668Sbms
1562127668Sbms	if (drrfo->drr_firstobj + drrfo->drr_numobjs < drrfo->drr_firstobj)
1563127668Sbms		return (SET_ERROR(EINVAL));
1564127668Sbms
1565127668Sbms	for (obj = drrfo->drr_firstobj;
1566127668Sbms	    obj < drrfo->drr_firstobj + drrfo->drr_numobjs;
1567127668Sbms	    (void) dmu_object_next(os, &obj, FALSE, 0)) {
1568127668Sbms		int err;
1569127668Sbms
1570127668Sbms		if (dmu_object_info(os, obj, NULL) != 0)
1571127668Sbms			continue;
1572127668Sbms
1573127668Sbms		err = dmu_free_long_object(os, obj);
1574127668Sbms		if (err != 0)
1575127668Sbms			return (err);
1576127668Sbms	}
1577127668Sbms	return (0);
1578127668Sbms}
1579127668Sbms
1580127668Sbmsstatic int
1581127668Sbmsrestore_write(struct restorearg *ra, objset_t *os,
1582127668Sbms    struct drr_write *drrw)
1583127668Sbms{
1584127668Sbms	dmu_tx_t *tx;
1585127668Sbms	void *data;
1586127668Sbms	int err;
1587127668Sbms
158817680Spst	if (drrw->drr_offset + drrw->drr_length < drrw->drr_offset ||
158917680Spst	    !DMU_OT_IS_VALID(drrw->drr_type))
159017680Spst		return (SET_ERROR(EINVAL));
1591127668Sbms
1592127668Sbms	if (dmu_object_info(os, drrw->drr_object, NULL) != 0)
1593127668Sbms		return (SET_ERROR(EINVAL));
1594127668Sbms
159517680Spst	dmu_buf_t *bonus;
159617680Spst	if (dmu_bonus_hold(os, drrw->drr_object, FTAG, &bonus) != 0)
159717680Spst		return (SET_ERROR(EINVAL));
159817680Spst
159917680Spst	arc_buf_t *abuf = dmu_request_arcbuf(bonus, drrw->drr_length);
1600127668Sbms
1601127668Sbms	data = restore_read(ra, drrw->drr_length, abuf->b_data);
1602127668Sbms	if (data == NULL) {
1603127668Sbms		dmu_return_arcbuf(abuf);
1604127668Sbms		dmu_buf_rele(bonus, FTAG);
1605127668Sbms		return (ra->err);
1606127668Sbms	}
1607127668Sbms
1608127668Sbms	tx = dmu_tx_create(os);
1609127668Sbms
1610127668Sbms	dmu_tx_hold_write(tx, drrw->drr_object,
1611127668Sbms	    drrw->drr_offset, drrw->drr_length);
1612127668Sbms	err = dmu_tx_assign(tx, TXG_WAIT);
1613127668Sbms	if (err != 0) {
1614127668Sbms		dmu_return_arcbuf(abuf);
1615127668Sbms		dmu_buf_rele(bonus, FTAG);
1616127668Sbms		dmu_tx_abort(tx);
1617127668Sbms		return (err);
161817680Spst	}
161917680Spst	if (ra->byteswap) {
162017680Spst		dmu_object_byteswap_t byteswap =
162117680Spst		    DMU_OT_BYTESWAP(drrw->drr_type);
1622127668Sbms		dmu_ot_byteswap[byteswap].ob_func(data, drrw->drr_length);
162317680Spst	}
162417680Spst	dmu_assign_arcbuf(bonus, drrw->drr_offset, abuf, tx);
162517680Spst	dmu_tx_commit(tx);
162617680Spst	dmu_buf_rele(bonus, FTAG);
1627127668Sbms	return (0);
162817680Spst}
162917680Spst
163017680Spst/*
163117680Spst * Handle a DRR_WRITE_BYREF record.  This record is used in dedup'ed
1632127668Sbms * streams to refer to a copy of the data that is already on the
163317680Spst * system because it came in earlier in the stream.  This function
1634127668Sbms * finds the earlier copy of the data, and uses that copy instead of
1635127668Sbms * data from the stream to fulfill this write.
163617680Spst */
1637127668Sbmsstatic int
1638127668Sbmsrestore_write_byref(struct restorearg *ra, objset_t *os,
163917680Spst    struct drr_write_byref *drrwbr)
164017680Spst{
1641127668Sbms	dmu_tx_t *tx;
1642127668Sbms	int err;
1643127668Sbms	guid_map_entry_t gmesrch;
1644127668Sbms	guid_map_entry_t *gmep;
1645127668Sbms	avl_index_t where;
1646127668Sbms	objset_t *ref_os = NULL;
1647127668Sbms	dmu_buf_t *dbp;
1648127668Sbms
1649127668Sbms	if (drrwbr->drr_offset + drrwbr->drr_length < drrwbr->drr_offset)
1650127668Sbms		return (SET_ERROR(EINVAL));
1651127668Sbms
165256893Sfenner	/*
165356893Sfenner	 * If the GUID of the referenced dataset is different from the
165456893Sfenner	 * GUID of the target dataset, find the referenced dataset.
165556893Sfenner	 */
1656127668Sbms	if (drrwbr->drr_toguid != drrwbr->drr_refguid) {
1657127668Sbms		gmesrch.guid = drrwbr->drr_refguid;
1658127668Sbms		if ((gmep = avl_find(ra->guid_to_ds_map, &gmesrch,
1659127668Sbms		    &where)) == NULL) {
1660127668Sbms			return (SET_ERROR(EINVAL));
1661127668Sbms		}
1662127668Sbms		if (dmu_objset_from_ds(gmep->gme_ds, &ref_os))
1663127668Sbms			return (SET_ERROR(EINVAL));
1664127668Sbms	} else {
1665127668Sbms		ref_os = os;
1666127668Sbms	}
1667127668Sbms
1668127668Sbms	err = dmu_buf_hold(ref_os, drrwbr->drr_refobject,
1669127668Sbms	    drrwbr->drr_refoffset, FTAG, &dbp, DMU_READ_PREFETCH);
167056893Sfenner	if (err != 0)
167156893Sfenner		return (err);
1672127668Sbms
167356893Sfenner	tx = dmu_tx_create(os);
1674127668Sbms
1675127668Sbms	dmu_tx_hold_write(tx, drrwbr->drr_object,
167656893Sfenner	    drrwbr->drr_offset, drrwbr->drr_length);
1677127668Sbms	err = dmu_tx_assign(tx, TXG_WAIT);
1678127668Sbms	if (err != 0) {
167956893Sfenner		dmu_tx_abort(tx);
1680127668Sbms		return (err);
1681127668Sbms	}
1682127668Sbms	dmu_write(os, drrwbr->drr_object,
1683127668Sbms	    drrwbr->drr_offset, drrwbr->drr_length, dbp->db_data, tx);
1684127668Sbms	dmu_buf_rele(dbp, FTAG);
1685127668Sbms	dmu_tx_commit(tx);
1686127668Sbms	return (0);
1687127668Sbms}
1688127668Sbms
1689127668Sbmsstatic int
1690127668Sbmsrestore_write_embedded(struct restorearg *ra, objset_t *os,
1691127668Sbms    struct drr_write_embedded *drrwnp)
1692127668Sbms{
1693127668Sbms	dmu_tx_t *tx;
1694127668Sbms	int err;
1695127668Sbms	void *data;
1696127668Sbms
1697127668Sbms	if (drrwnp->drr_offset + drrwnp->drr_length < drrwnp->drr_offset)
1698127668Sbms		return (EINVAL);
1699127668Sbms
1700127668Sbms	if (drrwnp->drr_psize > BPE_PAYLOAD_SIZE)
1701127668Sbms		return (EINVAL);
1702127668Sbms
1703127668Sbms	if (drrwnp->drr_etype >= NUM_BP_EMBEDDED_TYPES)
1704127668Sbms		return (EINVAL);
1705127668Sbms	if (drrwnp->drr_compression >= ZIO_COMPRESS_FUNCTIONS)
1706127668Sbms		return (EINVAL);
1707127668Sbms
170856893Sfenner	data = restore_read(ra, P2ROUNDUP(drrwnp->drr_psize, 8), NULL);
1709127668Sbms	if (data == NULL)
1710127668Sbms		return (ra->err);
1711127668Sbms
171217680Spst	tx = dmu_tx_create(os);
1713127668Sbms
1714127668Sbms	dmu_tx_hold_write(tx, drrwnp->drr_object,
1715127668Sbms	    drrwnp->drr_offset, drrwnp->drr_length);
1716127668Sbms	err = dmu_tx_assign(tx, TXG_WAIT);
1717127668Sbms	if (err != 0) {
1718127668Sbms		dmu_tx_abort(tx);
1719127668Sbms		return (err);
1720127668Sbms	}
1721127668Sbms
172217680Spst	dmu_write_embedded(os, drrwnp->drr_object,
1723127668Sbms	    drrwnp->drr_offset, data, drrwnp->drr_etype,
1724127668Sbms	    drrwnp->drr_compression, drrwnp->drr_lsize, drrwnp->drr_psize,
172526180Sfenner	    ra->byteswap ^ ZFS_HOST_BYTEORDER, tx);
1726127668Sbms
1727127668Sbms	dmu_tx_commit(tx);
172826180Sfenner	return (0);
1729127668Sbms}
173056893Sfenner
173156893Sfennerstatic int
1732127668Sbmsrestore_spill(struct restorearg *ra, objset_t *os, struct drr_spill *drrs)
1733127668Sbms{
1734127668Sbms	dmu_tx_t *tx;
1735127668Sbms	void *data;
1736127668Sbms	dmu_buf_t *db, *db_spill;
1737127668Sbms	int err;
1738127668Sbms
1739127668Sbms	if (drrs->drr_length < SPA_MINBLOCKSIZE ||
1740127668Sbms	    drrs->drr_length > spa_maxblocksize(dmu_objset_spa(os)))
1741127668Sbms		return (SET_ERROR(EINVAL));
1742127668Sbms
1743127668Sbms	data = restore_read(ra, drrs->drr_length, NULL);
1744127668Sbms	if (data == NULL)
1745127668Sbms		return (ra->err);
1746127668Sbms
1747127668Sbms	if (dmu_object_info(os, drrs->drr_object, NULL) != 0)
1748127668Sbms		return (SET_ERROR(EINVAL));
1749127668Sbms
1750127668Sbms	VERIFY(0 == dmu_bonus_hold(os, drrs->drr_object, FTAG, &db));
1751127668Sbms	if ((err = dmu_spill_hold_by_bonus(db, FTAG, &db_spill)) != 0) {
1752127668Sbms		dmu_buf_rele(db, FTAG);
1753127668Sbms		return (err);
1754127668Sbms	}
1755127668Sbms
1756127668Sbms	tx = dmu_tx_create(os);
1757127668Sbms
1758127668Sbms	dmu_tx_hold_spill(tx, db->db_object);
1759127668Sbms
1760127668Sbms	err = dmu_tx_assign(tx, TXG_WAIT);
1761127668Sbms	if (err != 0) {
1762127668Sbms		dmu_buf_rele(db, FTAG);
1763127668Sbms		dmu_buf_rele(db_spill, FTAG);
1764127668Sbms		dmu_tx_abort(tx);
1765127668Sbms		return (err);
1766127668Sbms	}
1767127668Sbms	dmu_buf_will_dirty(db_spill, tx);
1768127668Sbms
1769127668Sbms	if (db_spill->db_size < drrs->drr_length)
1770127668Sbms		VERIFY(0 == dbuf_spill_set_blksz(db_spill,
1771127668Sbms		    drrs->drr_length, tx));
1772127668Sbms	bcopy(data, db_spill->db_data, drrs->drr_length);
1773127668Sbms
1774127668Sbms	dmu_buf_rele(db, FTAG);
1775127668Sbms	dmu_buf_rele(db_spill, FTAG);
1776127668Sbms
1777127668Sbms	dmu_tx_commit(tx);
1778127668Sbms	return (0);
1779127668Sbms}
1780127668Sbms
1781127668Sbms/* ARGSUSED */
1782127668Sbmsstatic int
1783127668Sbmsrestore_free(struct restorearg *ra, objset_t *os,
1784127668Sbms    struct drr_free *drrf)
1785127668Sbms{
1786127668Sbms	int err;
1787127668Sbms
1788127668Sbms	if (drrf->drr_length != -1ULL &&
1789127668Sbms	    drrf->drr_offset + drrf->drr_length < drrf->drr_offset)
1790127668Sbms		return (SET_ERROR(EINVAL));
1791127668Sbms
1792127668Sbms	if (dmu_object_info(os, drrf->drr_object, NULL) != 0)
1793127668Sbms		return (SET_ERROR(EINVAL));
1794127668Sbms
1795127668Sbms	err = dmu_free_long_range(os, drrf->drr_object,
1796127668Sbms	    drrf->drr_offset, drrf->drr_length);
1797127668Sbms	return (err);
1798127668Sbms}
1799127668Sbms
1800127668Sbms/* used to destroy the drc_ds on error */
1801127668Sbmsstatic void
1802127668Sbmsdmu_recv_cleanup_ds(dmu_recv_cookie_t *drc)
1803127668Sbms{
1804127668Sbms	char name[MAXNAMELEN];
1805127668Sbms	dsl_dataset_name(drc->drc_ds, name);
1806127668Sbms	dsl_dataset_disown(drc->drc_ds, dmu_recv_tag);
1807127668Sbms	(void) dsl_destroy_head(name);
1808127668Sbms}
1809127668Sbms
1810127668Sbms/*
1811127668Sbms * NB: callers *must* call dmu_recv_end() if this succeeds.
1812127668Sbms */
1813127668Sbmsint
1814127668Sbmsdmu_recv_stream(dmu_recv_cookie_t *drc, struct file *fp, offset_t *voffp,
1815127668Sbms    int cleanup_fd, uint64_t *action_handlep)
1816127668Sbms{
1817127668Sbms	struct restorearg ra = { 0 };
1818127668Sbms	dmu_replay_record_t *drr;
1819127668Sbms	objset_t *os;
1820127668Sbms	zio_cksum_t pcksum;
1821127668Sbms	int featureflags;
1822127668Sbms
1823127668Sbms	ra.byteswap = drc->drc_byteswap;
1824127668Sbms	ra.cksum = drc->drc_cksum;
1825127668Sbms	ra.td = curthread;
1826127668Sbms	ra.fp = fp;
1827127668Sbms	ra.voff = *voffp;
1828127668Sbms	ra.bufsize = SPA_MAXBLOCKSIZE;
1829127668Sbms	ra.buf = kmem_alloc(ra.bufsize, KM_SLEEP);
1830127668Sbms
1831127668Sbms	/* these were verified in dmu_recv_begin */
1832127668Sbms	ASSERT3U(DMU_GET_STREAM_HDRTYPE(drc->drc_drrb->drr_versioninfo), ==,
1833127668Sbms	    DMU_SUBSTREAM);
1834127668Sbms	ASSERT3U(drc->drc_drrb->drr_type, <, DMU_OST_NUMTYPES);
1835127668Sbms
1836127668Sbms	/*
1837127668Sbms	 * Open the objset we are modifying.
1838127668Sbms	 */
1839127668Sbms	VERIFY0(dmu_objset_from_ds(drc->drc_ds, &os));
1840127668Sbms
1841127668Sbms	ASSERT(dsl_dataset_phys(drc->drc_ds)->ds_flags & DS_FLAG_INCONSISTENT);
1842127668Sbms
1843127668Sbms	featureflags = DMU_GET_FEATUREFLAGS(drc->drc_drrb->drr_versioninfo);
1844127668Sbms
1845127668Sbms	/* if this stream is dedup'ed, set up the avl tree for guid mapping */
1846127668Sbms	if (featureflags & DMU_BACKUP_FEATURE_DEDUP) {
1847127668Sbms		minor_t minor;
184826180Sfenner
1849127668Sbms		if (cleanup_fd == -1) {
1850127668Sbms			ra.err = SET_ERROR(EBADF);
1851127668Sbms			goto out;
1852127668Sbms		}
1853127668Sbms		ra.err = zfs_onexit_fd_hold(cleanup_fd, &minor);
1854127668Sbms		if (ra.err != 0) {
1855127668Sbms			cleanup_fd = -1;
1856127668Sbms			goto out;
1857127668Sbms		}
1858127668Sbms
1859127668Sbms		if (*action_handlep == 0) {
186026180Sfenner			ra.guid_to_ds_map =
186126180Sfenner			    kmem_alloc(sizeof (avl_tree_t), KM_SLEEP);
1862127668Sbms			avl_create(ra.guid_to_ds_map, guid_compare,
1863127668Sbms			    sizeof (guid_map_entry_t),
186426180Sfenner			    offsetof(guid_map_entry_t, avlnode));
1865127668Sbms			ra.err = zfs_onexit_add_cb(minor,
1866127668Sbms			    free_guid_map_onexit, ra.guid_to_ds_map,
1867127668Sbms			    action_handlep);
1868127668Sbms			if (ra.err != 0)
1869127668Sbms				goto out;
1870127668Sbms		} else {
1871127668Sbms			ra.err = zfs_onexit_cb_data(minor, *action_handlep,
1872127668Sbms			    (void **)&ra.guid_to_ds_map);
1873127668Sbms			if (ra.err != 0)
1874127668Sbms				goto out;
1875127668Sbms		}
1876127668Sbms
1877127668Sbms		drc->drc_guid_to_ds_map = ra.guid_to_ds_map;
1878127668Sbms	}
1879127668Sbms
1880127668Sbms	/*
1881127668Sbms	 * Read records and process them.
1882127668Sbms	 */
1883127668Sbms	pcksum = ra.cksum;
1884127668Sbms	while (ra.err == 0 &&
1885127668Sbms	    NULL != (drr = restore_read(&ra, sizeof (*drr), NULL))) {
1886127668Sbms		if (issig(JUSTLOOKING) && issig(FORREAL)) {
1887127668Sbms			ra.err = SET_ERROR(EINTR);
1888127668Sbms			goto out;
1889127668Sbms		}
1890127668Sbms
1891127668Sbms		if (ra.byteswap)
1892127668Sbms			backup_byteswap(drr);
1893127668Sbms
1894127668Sbms		switch (drr->drr_type) {
1895127668Sbms		case DRR_OBJECT:
1896127668Sbms		{
1897127668Sbms			/*
1898127668Sbms			 * We need to make a copy of the record header,
1899127668Sbms			 * because restore_{object,write} may need to
1900127668Sbms			 * restore_read(), which will invalidate drr.
190126180Sfenner			 */
190226180Sfenner			struct drr_object drro = drr->drr_u.drr_object;
1903127668Sbms			ra.err = restore_object(&ra, os, &drro);
1904127668Sbms			break;
1905127668Sbms		}
1906127668Sbms		case DRR_FREEOBJECTS:
1907127668Sbms		{
1908127668Sbms			struct drr_freeobjects drrfo =
1909127668Sbms			    drr->drr_u.drr_freeobjects;
1910127668Sbms			ra.err = restore_freeobjects(&ra, os, &drrfo);
1911127668Sbms			break;
1912127668Sbms		}
1913127668Sbms		case DRR_WRITE:
191417680Spst		{
1915127668Sbms			struct drr_write drrw = drr->drr_u.drr_write;
1916127668Sbms			ra.err = restore_write(&ra, os, &drrw);
1917127668Sbms			break;
1918127668Sbms		}
1919127668Sbms		case DRR_WRITE_BYREF:
1920127668Sbms		{
1921127668Sbms			struct drr_write_byref drrwbr =
1922127668Sbms			    drr->drr_u.drr_write_byref;
1923127668Sbms			ra.err = restore_write_byref(&ra, os, &drrwbr);
1924127668Sbms			break;
1925127668Sbms		}
1926127668Sbms		case DRR_WRITE_EMBEDDED:
1927127668Sbms		{
1928127668Sbms			struct drr_write_embedded drrwe =
1929127668Sbms			    drr->drr_u.drr_write_embedded;
1930127668Sbms			ra.err = restore_write_embedded(&ra, os, &drrwe);
1931127668Sbms			break;
1932127668Sbms		}
1933127668Sbms		case DRR_FREE:
1934127668Sbms		{
1935127668Sbms			struct drr_free drrf = drr->drr_u.drr_free;
1936127668Sbms			ra.err = restore_free(&ra, os, &drrf);
1937127668Sbms			break;
1938127668Sbms		}
1939127668Sbms		case DRR_END:
1940127668Sbms		{
1941127668Sbms			struct drr_end drre = drr->drr_u.drr_end;
1942127668Sbms			/*
1943127668Sbms			 * We compare against the *previous* checksum
194417680Spst			 * value, because the stored checksum is of
1945127668Sbms			 * everything before the DRR_END record.
1946127668Sbms			 */
1947127668Sbms			if (!ZIO_CHECKSUM_EQUAL(drre.drr_checksum, pcksum))
1948127668Sbms				ra.err = SET_ERROR(ECKSUM);
1949127668Sbms			goto out;
1950127668Sbms		}
1951127668Sbms		case DRR_SPILL:
1952127668Sbms		{
195317680Spst			struct drr_spill drrs = drr->drr_u.drr_spill;
1954127668Sbms			ra.err = restore_spill(&ra, os, &drrs);
1955127668Sbms			break;
195617680Spst		}
1957127668Sbms		default:
1958127668Sbms			ra.err = SET_ERROR(EINVAL);
1959127668Sbms			goto out;
1960127668Sbms		}
1961127668Sbms		pcksum = ra.cksum;
1962127668Sbms	}
1963127668Sbms	ASSERT(ra.err != 0);
1964127668Sbms
1965127668Sbmsout:
1966127668Sbms	if ((featureflags & DMU_BACKUP_FEATURE_DEDUP) && (cleanup_fd != -1))
1967127668Sbms		zfs_onexit_fd_rele(cleanup_fd);
1968127668Sbms
1969127668Sbms	if (ra.err != 0) {
1970127668Sbms		/*
1971127668Sbms		 * destroy what we created, so we don't leave it in the
1972127668Sbms		 * inconsistent restoring state.
197317680Spst		 */
1974127668Sbms		dmu_recv_cleanup_ds(drc);
1975127668Sbms	}
1976127668Sbms
1977127668Sbms	kmem_free(ra.buf, ra.bufsize);
1978127668Sbms	*voffp = ra.voff;
1979127668Sbms	return (ra.err);
198026180Sfenner}
1981127668Sbms
1982127668Sbmsstatic int
1983127668Sbmsdmu_recv_end_check(void *arg, dmu_tx_t *tx)
1984127668Sbms{
1985127668Sbms	dmu_recv_cookie_t *drc = arg;
1986127668Sbms	dsl_pool_t *dp = dmu_tx_pool(tx);
1987127668Sbms	int error;
1988127668Sbms
1989127668Sbms	ASSERT3P(drc->drc_ds->ds_owner, ==, dmu_recv_tag);
1990127668Sbms
1991127668Sbms	if (!drc->drc_newfs) {
1992127668Sbms		dsl_dataset_t *origin_head;
1993127668Sbms
1994127668Sbms		error = dsl_dataset_hold(dp, drc->drc_tofs, FTAG, &origin_head);
1995127668Sbms		if (error != 0)
1996127668Sbms			return (error);
1997127668Sbms		if (drc->drc_force) {
199856893Sfenner			/*
1999127668Sbms			 * We will destroy any snapshots in tofs (i.e. before
2000127668Sbms			 * origin_head) that are after the origin (which is
2001127668Sbms			 * the snap before drc_ds, because drc_ds can not
2002127668Sbms			 * have any snaps of its own).
200356893Sfenner			 */
2004127668Sbms			uint64_t obj;
2005127668Sbms
200656893Sfenner			obj = dsl_dataset_phys(origin_head)->ds_prev_snap_obj;
2007127668Sbms			while (obj !=
2008127668Sbms			    dsl_dataset_phys(drc->drc_ds)->ds_prev_snap_obj) {
2009127668Sbms				dsl_dataset_t *snap;
2010127668Sbms				error = dsl_dataset_hold_obj(dp, obj, FTAG,
2011127668Sbms				    &snap);
2012127668Sbms				if (error != 0)
2013127668Sbms					return (error);
2014127668Sbms				if (snap->ds_dir != origin_head->ds_dir)
2015127668Sbms					error = SET_ERROR(EINVAL);
2016127668Sbms				if (error == 0)  {
2017127668Sbms					error = dsl_destroy_snapshot_check_impl(
201817680Spst					    snap, B_FALSE);
2019127668Sbms				}
2020127668Sbms				obj = dsl_dataset_phys(snap)->ds_prev_snap_obj;
2021127668Sbms				dsl_dataset_rele(snap, FTAG);
2022127668Sbms				if (error != 0)
2023127668Sbms					return (error);
2024127668Sbms			}
2025127668Sbms		}
2026127668Sbms		error = dsl_dataset_clone_swap_check_impl(drc->drc_ds,
2027127668Sbms		    origin_head, drc->drc_force, drc->drc_owner, tx);
2028127668Sbms		if (error != 0) {
2029127668Sbms			dsl_dataset_rele(origin_head, FTAG);
2030127668Sbms			return (error);
2031127668Sbms		}
2032127668Sbms		error = dsl_dataset_snapshot_check_impl(origin_head,
2033127668Sbms		    drc->drc_tosnap, tx, B_TRUE, 1, drc->drc_cred);
2034127668Sbms		dsl_dataset_rele(origin_head, FTAG);
2035127668Sbms		if (error != 0)
2036127668Sbms			return (error);
2037127668Sbms
2038127668Sbms		error = dsl_destroy_head_check_impl(drc->drc_ds, 1);
2039127668Sbms	} else {
2040127668Sbms		error = dsl_dataset_snapshot_check_impl(drc->drc_ds,
2041127668Sbms		    drc->drc_tosnap, tx, B_TRUE, 1, drc->drc_cred);
2042127668Sbms	}
2043127668Sbms	return (error);
2044127668Sbms}
2045127668Sbms
2046127668Sbmsstatic void
204726180Sfennerdmu_recv_end_sync(void *arg, dmu_tx_t *tx)
204817680Spst{
2049127668Sbms	dmu_recv_cookie_t *drc = arg;
2050127668Sbms	dsl_pool_t *dp = dmu_tx_pool(tx);
205117680Spst
2052127668Sbms	spa_history_log_internal_ds(drc->drc_ds, "finish receiving",
205317680Spst	    tx, "snap=%s", drc->drc_tosnap);
2054127668Sbms
2055127668Sbms	if (!drc->drc_newfs) {
2056127668Sbms		dsl_dataset_t *origin_head;
2057127668Sbms
205856893Sfenner		VERIFY0(dsl_dataset_hold(dp, drc->drc_tofs, FTAG,
2059127668Sbms		    &origin_head));
206056893Sfenner
206156893Sfenner		if (drc->drc_force) {
206226180Sfenner			/*
206326180Sfenner			 * Destroy any snapshots of drc_tofs (origin_head)
206456893Sfenner			 * after the origin (the snap before drc_ds).
206556893Sfenner			 */
206656893Sfenner			uint64_t obj;
206756893Sfenner
206826180Sfenner			obj = dsl_dataset_phys(origin_head)->ds_prev_snap_obj;
206956893Sfenner			while (obj !=
207056893Sfenner			    dsl_dataset_phys(drc->drc_ds)->ds_prev_snap_obj) {
207117680Spst				dsl_dataset_t *snap;
207217680Spst				VERIFY0(dsl_dataset_hold_obj(dp, obj, FTAG,
2073127668Sbms				    &snap));
2074127668Sbms				ASSERT3P(snap->ds_dir, ==, origin_head->ds_dir);
2075127668Sbms				obj = dsl_dataset_phys(snap)->ds_prev_snap_obj;
2076127668Sbms				dsl_destroy_snapshot_sync_impl(snap,
2077127668Sbms				    B_FALSE, tx);
2078127668Sbms				dsl_dataset_rele(snap, FTAG);
2079127668Sbms			}
2080127668Sbms		}
2081127668Sbms		VERIFY3P(drc->drc_ds->ds_prev, ==,
2082127668Sbms		    origin_head->ds_prev);
2083127668Sbms
2084127668Sbms		dsl_dataset_clone_swap_sync_impl(drc->drc_ds,
2085127668Sbms		    origin_head, tx);
2086127668Sbms		dsl_dataset_snapshot_sync_impl(origin_head,
2087127668Sbms		    drc->drc_tosnap, tx);
2088127668Sbms
2089127668Sbms		/* set snapshot's creation time and guid */
2090127668Sbms		dmu_buf_will_dirty(origin_head->ds_prev->ds_dbuf, tx);
2091127668Sbms		dsl_dataset_phys(origin_head->ds_prev)->ds_creation_time =
2092127668Sbms		    drc->drc_drrb->drr_creation_time;
2093127668Sbms		dsl_dataset_phys(origin_head->ds_prev)->ds_guid =
2094127668Sbms		    drc->drc_drrb->drr_toguid;
2095127668Sbms		dsl_dataset_phys(origin_head->ds_prev)->ds_flags &=
2096127668Sbms		    ~DS_FLAG_INCONSISTENT;
2097127668Sbms
2098127668Sbms		dmu_buf_will_dirty(origin_head->ds_dbuf, tx);
2099127668Sbms		dsl_dataset_phys(origin_head)->ds_flags &=
2100127668Sbms		    ~DS_FLAG_INCONSISTENT;
2101127668Sbms
2102127668Sbms		dsl_dataset_rele(origin_head, FTAG);
2103127668Sbms		dsl_destroy_head_sync_impl(drc->drc_ds, tx);
2104127668Sbms
2105127668Sbms		if (drc->drc_owner != NULL)
2106127668Sbms			VERIFY3P(origin_head->ds_owner, ==, drc->drc_owner);
2107127668Sbms	} else {
2108127668Sbms		dsl_dataset_t *ds = drc->drc_ds;
2109127668Sbms
2110127668Sbms		dsl_dataset_snapshot_sync_impl(ds, drc->drc_tosnap, tx);
2111127668Sbms
2112127668Sbms		/* set snapshot's creation time and guid */
2113127668Sbms		dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
2114127668Sbms		dsl_dataset_phys(ds->ds_prev)->ds_creation_time =
2115127668Sbms		    drc->drc_drrb->drr_creation_time;
2116127668Sbms		dsl_dataset_phys(ds->ds_prev)->ds_guid =
2117127668Sbms		    drc->drc_drrb->drr_toguid;
2118127668Sbms		dsl_dataset_phys(ds->ds_prev)->ds_flags &=
2119127668Sbms		    ~DS_FLAG_INCONSISTENT;
2120127668Sbms
2121127668Sbms		dmu_buf_will_dirty(ds->ds_dbuf, tx);
2122127668Sbms		dsl_dataset_phys(ds)->ds_flags &= ~DS_FLAG_INCONSISTENT;
2123127668Sbms	}
2124127668Sbms	drc->drc_newsnapobj = dsl_dataset_phys(drc->drc_ds)->ds_prev_snap_obj;
2125127668Sbms	/*
2126127668Sbms	 * Release the hold from dmu_recv_begin.  This must be done before
2127127668Sbms	 * we return to open context, so that when we free the dataset's dnode,
2128127668Sbms	 * we can evict its bonus buffer.
2129127668Sbms	 */
2130127668Sbms	dsl_dataset_disown(drc->drc_ds, dmu_recv_tag);
2131127668Sbms	drc->drc_ds = NULL;
2132127668Sbms}
2133127668Sbms
2134127668Sbmsstatic int
2135127668Sbmsadd_ds_to_guidmap(const char *name, avl_tree_t *guid_map, uint64_t snapobj)
2136127668Sbms{
2137127668Sbms	dsl_pool_t *dp;
2138127668Sbms	dsl_dataset_t *snapds;
2139127668Sbms	guid_map_entry_t *gmep;
2140127668Sbms	int err;
2141127668Sbms
2142127668Sbms	ASSERT(guid_map != NULL);
2143127668Sbms
2144127668Sbms	err = dsl_pool_hold(name, FTAG, &dp);
2145127668Sbms	if (err != 0)
2146127668Sbms		return (err);
2147127668Sbms	gmep = kmem_alloc(sizeof (*gmep), KM_SLEEP);
2148127668Sbms	err = dsl_dataset_hold_obj(dp, snapobj, gmep, &snapds);
2149127668Sbms	if (err == 0) {
215017680Spst		gmep->guid = dsl_dataset_phys(snapds)->ds_guid;
2151127668Sbms		gmep->gme_ds = snapds;
2152127668Sbms		avl_add(guid_map, gmep);
2153127668Sbms		dsl_dataset_long_hold(snapds, gmep);
2154127668Sbms	} else
2155127668Sbms		kmem_free(gmep, sizeof (*gmep));
2156127668Sbms
2157127668Sbms	dsl_pool_rele(dp, FTAG);
2158127668Sbms	return (err);
2159127668Sbms}
2160127668Sbms
2161127668Sbmsstatic int dmu_recv_end_modified_blocks = 3;
2162127668Sbms
2163127668Sbmsstatic int
2164127668Sbmsdmu_recv_existing_end(dmu_recv_cookie_t *drc)
2165127668Sbms{
2166127668Sbms	int error;
2167127668Sbms	char name[MAXNAMELEN];
2168127668Sbms
2169127668Sbms#ifdef _KERNEL
2170127668Sbms	/*
2171127668Sbms	 * We will be destroying the ds; make sure its origin is unmounted if
2172127668Sbms	 * necessary.
2173127668Sbms	 */
2174127668Sbms	dsl_dataset_name(drc->drc_ds, name);
2175127668Sbms	zfs_destroy_unmount_origin(name);
2176127668Sbms#endif
2177127668Sbms
2178127668Sbms	error = dsl_sync_task(drc->drc_tofs,
2179127668Sbms	    dmu_recv_end_check, dmu_recv_end_sync, drc,
2180127668Sbms	    dmu_recv_end_modified_blocks, ZFS_SPACE_CHECK_NORMAL);
2181127668Sbms
2182127668Sbms	if (error != 0)
2183127668Sbms		dmu_recv_cleanup_ds(drc);
2184127668Sbms	return (error);
2185127668Sbms}
2186127668Sbms
2187127668Sbmsstatic int
2188127668Sbmsdmu_recv_new_end(dmu_recv_cookie_t *drc)
2189127668Sbms{
2190127668Sbms	int error;
2191127668Sbms
2192127668Sbms	error = dsl_sync_task(drc->drc_tofs,
2193127668Sbms	    dmu_recv_end_check, dmu_recv_end_sync, drc,
2194127668Sbms	    dmu_recv_end_modified_blocks, ZFS_SPACE_CHECK_NORMAL);
2195127668Sbms
2196127668Sbms	if (error != 0) {
2197127668Sbms		dmu_recv_cleanup_ds(drc);
2198127668Sbms	} else if (drc->drc_guid_to_ds_map != NULL) {
2199127668Sbms		(void) add_ds_to_guidmap(drc->drc_tofs,
2200127668Sbms		    drc->drc_guid_to_ds_map,
2201127668Sbms		    drc->drc_newsnapobj);
2202127668Sbms	}
2203127668Sbms	return (error);
2204127668Sbms}
2205127668Sbms
2206127668Sbmsint
2207127668Sbmsdmu_recv_end(dmu_recv_cookie_t *drc, void *owner)
2208127668Sbms{
2209127668Sbms	drc->drc_owner = owner;
2210127668Sbms
2211127668Sbms	if (drc->drc_newfs)
2212127668Sbms		return (dmu_recv_new_end(drc));
2213127668Sbms	else
2214127668Sbms		return (dmu_recv_existing_end(drc));
2215127668Sbms}
2216127668Sbms
2217127668Sbms/*
2218127668Sbms * Return TRUE if this objset is currently being received into.
2219127668Sbms */
2220127668Sbmsboolean_t
2221127668Sbmsdmu_objset_is_receiving(objset_t *os)
2222127668Sbms{
2223127668Sbms	return (os->os_dsl_dataset != NULL &&
2224127668Sbms	    os->os_dsl_dataset->ds_owner == dmu_recv_tag);
2225127668Sbms}
2226127668Sbms