1168404Spjd/* 2168404Spjd * CDDL HEADER START 3168404Spjd * 4168404Spjd * The contents of this file are subject to the terms of the 5168404Spjd * Common Development and Distribution License (the "License"). 6168404Spjd * You may not use this file except in compliance with the License. 7168404Spjd * 8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9168404Spjd * or http://www.opensolaris.org/os/licensing. 10168404Spjd * See the License for the specific language governing permissions 11168404Spjd * and limitations under the License. 12168404Spjd * 13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each 14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15168404Spjd * If applicable, add the following below this CDDL HEADER, with the 16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18168404Spjd * 19168404Spjd * CDDL HEADER END 20168404Spjd */ 21168404Spjd/* 22219089Spjd * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23221263Smm * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 24249643Smm * Copyright (c) 2013 by Delphix. All rights reserved. 25265754Sdelphij * Copyright (c) 2014, Joyent, Inc. All rights reserved. 26235951Smm * Copyright (c) 2012, Martin Matuska <mm@FreeBSD.org>. All rights reserved. 27221263Smm */ 28168404Spjd 29168404Spjd#include <sys/dmu.h> 30168404Spjd#include <sys/dmu_impl.h> 31168404Spjd#include <sys/dmu_tx.h> 32168404Spjd#include <sys/dbuf.h> 33168404Spjd#include <sys/dnode.h> 34168404Spjd#include <sys/zfs_context.h> 35168404Spjd#include <sys/dmu_objset.h> 36168404Spjd#include <sys/dmu_traverse.h> 37168404Spjd#include <sys/dsl_dataset.h> 38168404Spjd#include <sys/dsl_dir.h> 39219089Spjd#include <sys/dsl_prop.h> 40168404Spjd#include <sys/dsl_pool.h> 41168404Spjd#include <sys/dsl_synctask.h> 42168404Spjd#include <sys/zfs_ioctl.h> 43168404Spjd#include <sys/zap.h> 44168404Spjd#include <sys/zio_checksum.h> 45219089Spjd#include <sys/zfs_znode.h> 46219089Spjd#include <zfs_fletcher.h> 47219089Spjd#include <sys/avl.h> 48219089Spjd#include <sys/ddt.h> 49219089Spjd#include <sys/zfs_onexit.h> 50249643Smm#include <sys/dmu_send.h> 51249643Smm#include <sys/dsl_destroy.h> 52263410Sdelphij#include <sys/dsl_bookmark.h> 53168404Spjd 54229578Smm/* Set this tunable to TRUE to replace corrupt data with 0x2f5baddb10c */ 55229578Smmint zfs_send_corrupt_data = B_FALSE; 56229578Smm 57185029Spjdstatic char *dmu_recv_tag = "dmu_recv_tag"; 58249643Smmstatic const char *recv_clone_name = "%recv"; 59185029Spjd 60168404Spjdstatic int 61235951Smmdump_bytes(dmu_sendarg_t *dsp, void *buf, int len) 62168404Spjd{ 63235951Smm dsl_dataset_t *ds = dsp->dsa_os->os_dsl_dataset; 64168404Spjd struct uio auio; 65168404Spjd struct iovec aiov; 66243674Smm ASSERT0(len % 8); 67168404Spjd 68235951Smm fletcher_4_incremental_native(buf, len, &dsp->dsa_zc); 69168404Spjd aiov.iov_base = buf; 70168404Spjd aiov.iov_len = len; 71168404Spjd auio.uio_iov = &aiov; 72168404Spjd auio.uio_iovcnt = 1; 73168404Spjd auio.uio_resid = len; 74169170Spjd auio.uio_segflg = UIO_SYSSPACE; 75168404Spjd auio.uio_rw = UIO_WRITE; 76168404Spjd auio.uio_offset = (off_t)-1; 77235951Smm auio.uio_td = dsp->dsa_td; 78168404Spjd#ifdef _KERNEL 79235951Smm if (dsp->dsa_fp->f_type == DTYPE_VNODE) 80168404Spjd bwillwrite(); 81235951Smm dsp->dsa_err = fo_write(dsp->dsa_fp, &auio, dsp->dsa_td->td_ucred, 0, 82235951Smm dsp->dsa_td); 83168404Spjd#else 84168404Spjd fprintf(stderr, "%s: returning EOPNOTSUPP\n", __func__); 85235951Smm dsp->dsa_err = EOPNOTSUPP; 86168404Spjd#endif 87235951Smm mutex_enter(&ds->ds_sendstream_lock); 88235951Smm *dsp->dsa_off += len; 89235951Smm mutex_exit(&ds->ds_sendstream_lock); 90235951Smm 91235951Smm return (dsp->dsa_err); 92168404Spjd} 93168404Spjd 94168404Spjdstatic int 95235951Smmdump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset, 96168404Spjd uint64_t length) 97168404Spjd{ 98235951Smm struct drr_free *drrf = &(dsp->dsa_drr->drr_u.drr_free); 99219089Spjd 100260722Savg /* 101260722Savg * When we receive a free record, dbuf_free_range() assumes 102260722Savg * that the receiving system doesn't have any dbufs in the range 103260722Savg * being freed. This is always true because there is a one-record 104260722Savg * constraint: we only send one WRITE record for any given 105260722Savg * object+offset. We know that the one-record constraint is 106260722Savg * true because we always send data in increasing order by 107260722Savg * object,offset. 108260722Savg * 109260722Savg * If the increasing-order constraint ever changes, we should find 110260722Savg * another way to assert that the one-record constraint is still 111260722Savg * satisfied. 112260722Savg */ 113260722Savg ASSERT(object > dsp->dsa_last_data_object || 114260722Savg (object == dsp->dsa_last_data_object && 115260722Savg offset > dsp->dsa_last_data_offset)); 116260722Savg 117260722Savg /* 118260722Savg * If we are doing a non-incremental send, then there can't 119260722Savg * be any data in the dataset we're receiving into. Therefore 120260722Savg * a free record would simply be a no-op. Save space by not 121260722Savg * sending it to begin with. 122260722Savg */ 123260722Savg if (!dsp->dsa_incremental) 124260722Savg return (0); 125260722Savg 126237746Smm if (length != -1ULL && offset + length < offset) 127237746Smm length = -1ULL; 128237746Smm 129219089Spjd /* 130219089Spjd * If there is a pending op, but it's not PENDING_FREE, push it out, 131219089Spjd * since free block aggregation can only be done for blocks of the 132219089Spjd * same type (i.e., DRR_FREE records can only be aggregated with 133219089Spjd * other DRR_FREE records. DRR_FREEOBJECTS records can only be 134219089Spjd * aggregated with other DRR_FREEOBJECTS records. 135219089Spjd */ 136235951Smm if (dsp->dsa_pending_op != PENDING_NONE && 137235951Smm dsp->dsa_pending_op != PENDING_FREE) { 138235951Smm if (dump_bytes(dsp, dsp->dsa_drr, 139235951Smm sizeof (dmu_replay_record_t)) != 0) 140249643Smm return (SET_ERROR(EINTR)); 141235951Smm dsp->dsa_pending_op = PENDING_NONE; 142219089Spjd } 143219089Spjd 144235951Smm if (dsp->dsa_pending_op == PENDING_FREE) { 145219089Spjd /* 146219089Spjd * There should never be a PENDING_FREE if length is -1 147219089Spjd * (because dump_dnode is the only place where this 148219089Spjd * function is called with a -1, and only after flushing 149219089Spjd * any pending record). 150219089Spjd */ 151219089Spjd ASSERT(length != -1ULL); 152219089Spjd /* 153219089Spjd * Check to see whether this free block can be aggregated 154219089Spjd * with pending one. 155219089Spjd */ 156219089Spjd if (drrf->drr_object == object && drrf->drr_offset + 157219089Spjd drrf->drr_length == offset) { 158219089Spjd drrf->drr_length += length; 159219089Spjd return (0); 160219089Spjd } else { 161219089Spjd /* not a continuation. Push out pending record */ 162235951Smm if (dump_bytes(dsp, dsp->dsa_drr, 163219089Spjd sizeof (dmu_replay_record_t)) != 0) 164249643Smm return (SET_ERROR(EINTR)); 165235951Smm dsp->dsa_pending_op = PENDING_NONE; 166219089Spjd } 167219089Spjd } 168219089Spjd /* create a FREE record and make it pending */ 169235951Smm bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t)); 170235951Smm dsp->dsa_drr->drr_type = DRR_FREE; 171219089Spjd drrf->drr_object = object; 172219089Spjd drrf->drr_offset = offset; 173219089Spjd drrf->drr_length = length; 174235951Smm drrf->drr_toguid = dsp->dsa_toguid; 175219089Spjd if (length == -1ULL) { 176235951Smm if (dump_bytes(dsp, dsp->dsa_drr, 177235951Smm sizeof (dmu_replay_record_t)) != 0) 178249643Smm return (SET_ERROR(EINTR)); 179219089Spjd } else { 180235951Smm dsp->dsa_pending_op = PENDING_FREE; 181219089Spjd } 182168404Spjd 183168404Spjd return (0); 184168404Spjd} 185168404Spjd 186168404Spjdstatic int 187235951Smmdump_data(dmu_sendarg_t *dsp, dmu_object_type_t type, 188219089Spjd uint64_t object, uint64_t offset, int blksz, const blkptr_t *bp, void *data) 189168404Spjd{ 190235951Smm struct drr_write *drrw = &(dsp->dsa_drr->drr_u.drr_write); 191219089Spjd 192260722Savg /* 193260722Savg * We send data in increasing object, offset order. 194260722Savg * See comment in dump_free() for details. 195260722Savg */ 196260722Savg ASSERT(object > dsp->dsa_last_data_object || 197260722Savg (object == dsp->dsa_last_data_object && 198260722Savg offset > dsp->dsa_last_data_offset)); 199260722Savg dsp->dsa_last_data_object = object; 200260722Savg dsp->dsa_last_data_offset = offset + blksz - 1; 201219089Spjd 202219089Spjd /* 203219089Spjd * If there is any kind of pending aggregation (currently either 204219089Spjd * a grouping of free objects or free blocks), push it out to 205219089Spjd * the stream, since aggregation can't be done across operations 206219089Spjd * of different types. 207219089Spjd */ 208235951Smm if (dsp->dsa_pending_op != PENDING_NONE) { 209235951Smm if (dump_bytes(dsp, dsp->dsa_drr, 210235951Smm sizeof (dmu_replay_record_t)) != 0) 211249643Smm return (SET_ERROR(EINTR)); 212235951Smm dsp->dsa_pending_op = PENDING_NONE; 213219089Spjd } 214168404Spjd /* write a DATA record */ 215235951Smm bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t)); 216235951Smm dsp->dsa_drr->drr_type = DRR_WRITE; 217219089Spjd drrw->drr_object = object; 218219089Spjd drrw->drr_type = type; 219219089Spjd drrw->drr_offset = offset; 220219089Spjd drrw->drr_length = blksz; 221235951Smm drrw->drr_toguid = dsp->dsa_toguid; 222219089Spjd drrw->drr_checksumtype = BP_GET_CHECKSUM(bp); 223219089Spjd if (zio_checksum_table[drrw->drr_checksumtype].ci_dedup) 224219089Spjd drrw->drr_checksumflags |= DRR_CHECKSUM_DEDUP; 225219089Spjd DDK_SET_LSIZE(&drrw->drr_key, BP_GET_LSIZE(bp)); 226219089Spjd DDK_SET_PSIZE(&drrw->drr_key, BP_GET_PSIZE(bp)); 227219089Spjd DDK_SET_COMPRESS(&drrw->drr_key, BP_GET_COMPRESS(bp)); 228219089Spjd drrw->drr_key.ddk_cksum = bp->blk_cksum; 229168404Spjd 230235951Smm if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0) 231249643Smm return (SET_ERROR(EINTR)); 232235951Smm if (dump_bytes(dsp, data, blksz) != 0) 233249643Smm return (SET_ERROR(EINTR)); 234219089Spjd return (0); 235219089Spjd} 236219089Spjd 237219089Spjdstatic int 238235951Smmdump_spill(dmu_sendarg_t *dsp, uint64_t object, int blksz, void *data) 239219089Spjd{ 240235951Smm struct drr_spill *drrs = &(dsp->dsa_drr->drr_u.drr_spill); 241219089Spjd 242235951Smm if (dsp->dsa_pending_op != PENDING_NONE) { 243235951Smm if (dump_bytes(dsp, dsp->dsa_drr, 244235951Smm sizeof (dmu_replay_record_t)) != 0) 245249643Smm return (SET_ERROR(EINTR)); 246235951Smm dsp->dsa_pending_op = PENDING_NONE; 247219089Spjd } 248219089Spjd 249219089Spjd /* write a SPILL record */ 250235951Smm bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t)); 251235951Smm dsp->dsa_drr->drr_type = DRR_SPILL; 252219089Spjd drrs->drr_object = object; 253219089Spjd drrs->drr_length = blksz; 254235951Smm drrs->drr_toguid = dsp->dsa_toguid; 255219089Spjd 256235951Smm if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t))) 257249643Smm return (SET_ERROR(EINTR)); 258235951Smm if (dump_bytes(dsp, data, blksz)) 259249643Smm return (SET_ERROR(EINTR)); 260168404Spjd return (0); 261168404Spjd} 262168404Spjd 263168404Spjdstatic int 264235951Smmdump_freeobjects(dmu_sendarg_t *dsp, uint64_t firstobj, uint64_t numobjs) 265168404Spjd{ 266235951Smm struct drr_freeobjects *drrfo = &(dsp->dsa_drr->drr_u.drr_freeobjects); 267219089Spjd 268260722Savg /* See comment in dump_free(). */ 269260722Savg if (!dsp->dsa_incremental) 270260722Savg return (0); 271260722Savg 272219089Spjd /* 273219089Spjd * If there is a pending op, but it's not PENDING_FREEOBJECTS, 274219089Spjd * push it out, since free block aggregation can only be done for 275219089Spjd * blocks of the same type (i.e., DRR_FREE records can only be 276219089Spjd * aggregated with other DRR_FREE records. DRR_FREEOBJECTS records 277219089Spjd * can only be aggregated with other DRR_FREEOBJECTS records. 278219089Spjd */ 279235951Smm if (dsp->dsa_pending_op != PENDING_NONE && 280235951Smm dsp->dsa_pending_op != PENDING_FREEOBJECTS) { 281235951Smm if (dump_bytes(dsp, dsp->dsa_drr, 282235951Smm sizeof (dmu_replay_record_t)) != 0) 283249643Smm return (SET_ERROR(EINTR)); 284235951Smm dsp->dsa_pending_op = PENDING_NONE; 285219089Spjd } 286235951Smm if (dsp->dsa_pending_op == PENDING_FREEOBJECTS) { 287219089Spjd /* 288219089Spjd * See whether this free object array can be aggregated 289219089Spjd * with pending one 290219089Spjd */ 291219089Spjd if (drrfo->drr_firstobj + drrfo->drr_numobjs == firstobj) { 292219089Spjd drrfo->drr_numobjs += numobjs; 293219089Spjd return (0); 294219089Spjd } else { 295219089Spjd /* can't be aggregated. Push out pending record */ 296235951Smm if (dump_bytes(dsp, dsp->dsa_drr, 297219089Spjd sizeof (dmu_replay_record_t)) != 0) 298249643Smm return (SET_ERROR(EINTR)); 299235951Smm dsp->dsa_pending_op = PENDING_NONE; 300219089Spjd } 301219089Spjd } 302219089Spjd 303168404Spjd /* write a FREEOBJECTS record */ 304235951Smm bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t)); 305235951Smm dsp->dsa_drr->drr_type = DRR_FREEOBJECTS; 306219089Spjd drrfo->drr_firstobj = firstobj; 307219089Spjd drrfo->drr_numobjs = numobjs; 308235951Smm drrfo->drr_toguid = dsp->dsa_toguid; 309168404Spjd 310235951Smm dsp->dsa_pending_op = PENDING_FREEOBJECTS; 311219089Spjd 312168404Spjd return (0); 313168404Spjd} 314168404Spjd 315168404Spjdstatic int 316235951Smmdump_dnode(dmu_sendarg_t *dsp, uint64_t object, dnode_phys_t *dnp) 317168404Spjd{ 318235951Smm struct drr_object *drro = &(dsp->dsa_drr->drr_u.drr_object); 319219089Spjd 320168404Spjd if (dnp == NULL || dnp->dn_type == DMU_OT_NONE) 321235951Smm return (dump_freeobjects(dsp, object, 1)); 322168404Spjd 323235951Smm if (dsp->dsa_pending_op != PENDING_NONE) { 324235951Smm if (dump_bytes(dsp, dsp->dsa_drr, 325235951Smm sizeof (dmu_replay_record_t)) != 0) 326249643Smm return (SET_ERROR(EINTR)); 327235951Smm dsp->dsa_pending_op = PENDING_NONE; 328219089Spjd } 329219089Spjd 330168404Spjd /* write an OBJECT record */ 331235951Smm bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t)); 332235951Smm dsp->dsa_drr->drr_type = DRR_OBJECT; 333219089Spjd drro->drr_object = object; 334219089Spjd drro->drr_type = dnp->dn_type; 335219089Spjd drro->drr_bonustype = dnp->dn_bonustype; 336219089Spjd drro->drr_blksz = dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT; 337219089Spjd drro->drr_bonuslen = dnp->dn_bonuslen; 338219089Spjd drro->drr_checksumtype = dnp->dn_checksum; 339219089Spjd drro->drr_compress = dnp->dn_compress; 340235951Smm drro->drr_toguid = dsp->dsa_toguid; 341168404Spjd 342235951Smm if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0) 343249643Smm return (SET_ERROR(EINTR)); 344168404Spjd 345235951Smm if (dump_bytes(dsp, DN_BONUS(dnp), P2ROUNDUP(dnp->dn_bonuslen, 8)) != 0) 346249643Smm return (SET_ERROR(EINTR)); 347168404Spjd 348260722Savg /* Free anything past the end of the file. */ 349235951Smm if (dump_free(dsp, object, (dnp->dn_maxblkid + 1) * 350260722Savg (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT), -1ULL) != 0) 351249643Smm return (SET_ERROR(EINTR)); 352249643Smm if (dsp->dsa_err != 0) 353249643Smm return (SET_ERROR(EINTR)); 354168404Spjd return (0); 355168404Spjd} 356168404Spjd 357168404Spjd#define BP_SPAN(dnp, level) \ 358168404Spjd (((uint64_t)dnp->dn_datablkszsec) << (SPA_MINBLOCKSHIFT + \ 359168404Spjd (level) * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) 360168404Spjd 361219089Spjd/* ARGSUSED */ 362168404Spjdstatic int 363247406Smmbackup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, 364219089Spjd const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg) 365168404Spjd{ 366235951Smm dmu_sendarg_t *dsp = arg; 367168404Spjd dmu_object_type_t type = bp ? BP_GET_TYPE(bp) : DMU_OT_NONE; 368168404Spjd int err = 0; 369168404Spjd 370185029Spjd if (issig(JUSTLOOKING) && issig(FORREAL)) 371249643Smm return (SET_ERROR(EINTR)); 372168404Spjd 373219089Spjd if (zb->zb_object != DMU_META_DNODE_OBJECT && 374219089Spjd DMU_OBJECT_IS_SPECIAL(zb->zb_object)) { 375209962Smm return (0); 376263410Sdelphij } else if (zb->zb_level == ZB_ZIL_LEVEL) { 377263410Sdelphij /* 378263410Sdelphij * If we are sending a non-snapshot (which is allowed on 379263410Sdelphij * read-only pools), it may have a ZIL, which must be ignored. 380263410Sdelphij */ 381263410Sdelphij return (0); 382263398Sdelphij } else if (BP_IS_HOLE(bp) && 383263398Sdelphij zb->zb_object == DMU_META_DNODE_OBJECT) { 384208047Smm uint64_t span = BP_SPAN(dnp, zb->zb_level); 385208047Smm uint64_t dnobj = (zb->zb_blkid * span) >> DNODE_SHIFT; 386235951Smm err = dump_freeobjects(dsp, dnobj, span >> DNODE_SHIFT); 387263398Sdelphij } else if (BP_IS_HOLE(bp)) { 388208047Smm uint64_t span = BP_SPAN(dnp, zb->zb_level); 389235951Smm err = dump_free(dsp, zb->zb_object, zb->zb_blkid * span, span); 390208047Smm } else if (zb->zb_level > 0 || type == DMU_OT_OBJSET) { 391208047Smm return (0); 392208047Smm } else if (type == DMU_OT_DNODE) { 393208047Smm dnode_phys_t *blk; 394168404Spjd int i; 395168404Spjd int blksz = BP_GET_LSIZE(bp); 396208047Smm uint32_t aflags = ARC_WAIT; 397208047Smm arc_buf_t *abuf; 398168404Spjd 399247406Smm if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf, 400247406Smm ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, 401247406Smm &aflags, zb) != 0) 402249643Smm return (SET_ERROR(EIO)); 403208047Smm 404208047Smm blk = abuf->b_data; 405168404Spjd for (i = 0; i < blksz >> DNODE_SHIFT; i++) { 406208047Smm uint64_t dnobj = (zb->zb_blkid << 407208047Smm (DNODE_BLOCK_SHIFT - DNODE_SHIFT)) + i; 408235951Smm err = dump_dnode(dsp, dnobj, blk+i); 409249643Smm if (err != 0) 410168404Spjd break; 411168404Spjd } 412208047Smm (void) arc_buf_remove_ref(abuf, &abuf); 413219089Spjd } else if (type == DMU_OT_SA) { 414208047Smm uint32_t aflags = ARC_WAIT; 415208047Smm arc_buf_t *abuf; 416168404Spjd int blksz = BP_GET_LSIZE(bp); 417168404Spjd 418247406Smm if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf, 419247406Smm ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, 420247406Smm &aflags, zb) != 0) 421249643Smm return (SET_ERROR(EIO)); 422168404Spjd 423235951Smm err = dump_spill(dsp, zb->zb_object, blksz, abuf->b_data); 424219089Spjd (void) arc_buf_remove_ref(abuf, &abuf); 425219089Spjd } else { /* it's a level-0 block of a regular object */ 426219089Spjd uint32_t aflags = ARC_WAIT; 427219089Spjd arc_buf_t *abuf; 428219089Spjd int blksz = BP_GET_LSIZE(bp); 429219089Spjd 430263410Sdelphij ASSERT0(zb->zb_level); 431247406Smm if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf, 432247406Smm ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, 433247406Smm &aflags, zb) != 0) { 434229578Smm if (zfs_send_corrupt_data) { 435229578Smm /* Send a block filled with 0x"zfs badd bloc" */ 436229578Smm abuf = arc_buf_alloc(spa, blksz, &abuf, 437229578Smm ARC_BUFC_DATA); 438229578Smm uint64_t *ptr; 439229578Smm for (ptr = abuf->b_data; 440229578Smm (char *)ptr < (char *)abuf->b_data + blksz; 441229578Smm ptr++) 442229578Smm *ptr = 0x2f5baddb10c; 443229578Smm } else { 444249643Smm return (SET_ERROR(EIO)); 445229578Smm } 446229578Smm } 447219089Spjd 448235951Smm err = dump_data(dsp, type, zb->zb_object, zb->zb_blkid * blksz, 449219089Spjd blksz, bp, abuf->b_data); 450208047Smm (void) arc_buf_remove_ref(abuf, &abuf); 451168404Spjd } 452168404Spjd 453168404Spjd ASSERT(err == 0 || err == EINTR); 454168404Spjd return (err); 455168404Spjd} 456168404Spjd 457249643Smm/* 458263410Sdelphij * Releases dp using the specified tag. 459249643Smm */ 460249643Smmstatic int 461249643Smmdmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *ds, 462249643Smm#ifdef illumos 463263410Sdelphij zfs_bookmark_phys_t *fromzb, boolean_t is_clone, int outfd, 464263410Sdelphij vnode_t *vp, offset_t *off) 465249643Smm#else 466263410Sdelphij zfs_bookmark_phys_t *fromzb, boolean_t is_clone, int outfd, 467263410Sdelphij struct file *fp, offset_t *off) 468249643Smm#endif 469168404Spjd{ 470249643Smm objset_t *os; 471168404Spjd dmu_replay_record_t *drr; 472235951Smm dmu_sendarg_t *dsp; 473168404Spjd int err; 474185029Spjd uint64_t fromtxg = 0; 475168404Spjd 476249643Smm err = dmu_objset_from_ds(ds, &os); 477249643Smm if (err != 0) { 478249643Smm dsl_pool_rele(dp, tag); 479249643Smm return (err); 480185029Spjd } 481185029Spjd 482168404Spjd drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP); 483168404Spjd drr->drr_type = DRR_BEGIN; 484168404Spjd drr->drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC; 485219089Spjd DMU_SET_STREAM_HDRTYPE(drr->drr_u.drr_begin.drr_versioninfo, 486219089Spjd DMU_SUBSTREAM); 487219089Spjd 488219089Spjd#ifdef _KERNEL 489249643Smm if (dmu_objset_type(os) == DMU_OST_ZFS) { 490219089Spjd uint64_t version; 491249643Smm if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &version) != 0) { 492235951Smm kmem_free(drr, sizeof (dmu_replay_record_t)); 493249643Smm dsl_pool_rele(dp, tag); 494249643Smm return (SET_ERROR(EINVAL)); 495235951Smm } 496249643Smm if (version >= ZPL_VERSION_SA) { 497219089Spjd DMU_SET_FEATUREFLAGS( 498219089Spjd drr->drr_u.drr_begin.drr_versioninfo, 499219089Spjd DMU_BACKUP_FEATURE_SA_SPILL); 500219089Spjd } 501219089Spjd } 502219089Spjd#endif 503219089Spjd 504168404Spjd drr->drr_u.drr_begin.drr_creation_time = 505168404Spjd ds->ds_phys->ds_creation_time; 506249643Smm drr->drr_u.drr_begin.drr_type = dmu_objset_type(os); 507263410Sdelphij if (is_clone) 508185029Spjd drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CLONE; 509168404Spjd drr->drr_u.drr_begin.drr_toguid = ds->ds_phys->ds_guid; 510185029Spjd if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) 511185029Spjd drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CI_DATA; 512185029Spjd 513263410Sdelphij if (fromzb != NULL) { 514263410Sdelphij drr->drr_u.drr_begin.drr_fromguid = fromzb->zbm_guid; 515263410Sdelphij fromtxg = fromzb->zbm_creation_txg; 516263410Sdelphij } 517168404Spjd dsl_dataset_name(ds, drr->drr_u.drr_begin.drr_toname); 518263410Sdelphij if (!dsl_dataset_is_snapshot(ds)) { 519263410Sdelphij (void) strlcat(drr->drr_u.drr_begin.drr_toname, "@--head--", 520263410Sdelphij sizeof (drr->drr_u.drr_begin.drr_toname)); 521249643Smm } 522185029Spjd 523235951Smm dsp = kmem_zalloc(sizeof (dmu_sendarg_t), KM_SLEEP); 524168404Spjd 525235951Smm dsp->dsa_drr = drr; 526235951Smm dsp->dsa_outfd = outfd; 527235951Smm dsp->dsa_proc = curproc; 528235951Smm dsp->dsa_td = curthread; 529235951Smm dsp->dsa_fp = fp; 530249643Smm dsp->dsa_os = os; 531235951Smm dsp->dsa_off = off; 532235951Smm dsp->dsa_toguid = ds->ds_phys->ds_guid; 533235951Smm ZIO_SET_CHECKSUM(&dsp->dsa_zc, 0, 0, 0, 0); 534235951Smm dsp->dsa_pending_op = PENDING_NONE; 535263410Sdelphij dsp->dsa_incremental = (fromzb != NULL); 536235951Smm 537235951Smm mutex_enter(&ds->ds_sendstream_lock); 538235951Smm list_insert_head(&ds->ds_sendstreams, dsp); 539235951Smm mutex_exit(&ds->ds_sendstream_lock); 540235951Smm 541249643Smm dsl_dataset_long_hold(ds, FTAG); 542249643Smm dsl_pool_rele(dp, tag); 543249643Smm 544235951Smm if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) { 545235951Smm err = dsp->dsa_err; 546235951Smm goto out; 547168404Spjd } 548168404Spjd 549208047Smm err = traverse_dataset(ds, fromtxg, TRAVERSE_PRE | TRAVERSE_PREFETCH, 550235951Smm backup_cb, dsp); 551168404Spjd 552235951Smm if (dsp->dsa_pending_op != PENDING_NONE) 553235951Smm if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) 554249643Smm err = SET_ERROR(EINTR); 555219089Spjd 556249643Smm if (err != 0) { 557249643Smm if (err == EINTR && dsp->dsa_err != 0) 558235951Smm err = dsp->dsa_err; 559235951Smm goto out; 560168404Spjd } 561168404Spjd 562168404Spjd bzero(drr, sizeof (dmu_replay_record_t)); 563168404Spjd drr->drr_type = DRR_END; 564235951Smm drr->drr_u.drr_end.drr_checksum = dsp->dsa_zc; 565235951Smm drr->drr_u.drr_end.drr_toguid = dsp->dsa_toguid; 566168404Spjd 567235951Smm if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) { 568235951Smm err = dsp->dsa_err; 569235951Smm goto out; 570168404Spjd } 571168404Spjd 572235951Smmout: 573235951Smm mutex_enter(&ds->ds_sendstream_lock); 574235951Smm list_remove(&ds->ds_sendstreams, dsp); 575235951Smm mutex_exit(&ds->ds_sendstream_lock); 576235951Smm 577168404Spjd kmem_free(drr, sizeof (dmu_replay_record_t)); 578235951Smm kmem_free(dsp, sizeof (dmu_sendarg_t)); 579168404Spjd 580249643Smm dsl_dataset_long_rele(ds, FTAG); 581249643Smm 582235951Smm return (err); 583168404Spjd} 584168404Spjd 585229578Smmint 586249643Smmdmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap, 587249643Smm#ifdef illumos 588249643Smm int outfd, vnode_t *vp, offset_t *off) 589249643Smm#else 590249643Smm int outfd, struct file *fp, offset_t *off) 591249643Smm#endif 592229578Smm{ 593249643Smm dsl_pool_t *dp; 594249643Smm dsl_dataset_t *ds; 595249643Smm dsl_dataset_t *fromds = NULL; 596229578Smm int err; 597229578Smm 598249643Smm err = dsl_pool_hold(pool, FTAG, &dp); 599249643Smm if (err != 0) 600249643Smm return (err); 601229578Smm 602249643Smm err = dsl_dataset_hold_obj(dp, tosnap, FTAG, &ds); 603249643Smm if (err != 0) { 604249643Smm dsl_pool_rele(dp, FTAG); 605249643Smm return (err); 606249643Smm } 607229578Smm 608249643Smm if (fromsnap != 0) { 609263410Sdelphij zfs_bookmark_phys_t zb; 610263410Sdelphij boolean_t is_clone; 611263410Sdelphij 612249643Smm err = dsl_dataset_hold_obj(dp, fromsnap, FTAG, &fromds); 613249643Smm if (err != 0) { 614249643Smm dsl_dataset_rele(ds, FTAG); 615249643Smm dsl_pool_rele(dp, FTAG); 616249643Smm return (err); 617249643Smm } 618263410Sdelphij if (!dsl_dataset_is_before(ds, fromds, 0)) 619263410Sdelphij err = SET_ERROR(EXDEV); 620263410Sdelphij zb.zbm_creation_time = fromds->ds_phys->ds_creation_time; 621263410Sdelphij zb.zbm_creation_txg = fromds->ds_phys->ds_creation_txg; 622263410Sdelphij zb.zbm_guid = fromds->ds_phys->ds_guid; 623263410Sdelphij is_clone = (fromds->ds_dir != ds->ds_dir); 624263410Sdelphij dsl_dataset_rele(fromds, FTAG); 625263410Sdelphij err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone, 626263410Sdelphij outfd, fp, off); 627263410Sdelphij } else { 628263410Sdelphij err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE, 629263410Sdelphij outfd, fp, off); 630249643Smm } 631263410Sdelphij dsl_dataset_rele(ds, FTAG); 632263410Sdelphij return (err); 633249643Smm} 634249643Smm 635249643Smmint 636249643Smmdmu_send(const char *tosnap, const char *fromsnap, 637249643Smm#ifdef illumos 638249643Smm int outfd, vnode_t *vp, offset_t *off) 639249643Smm#else 640249643Smm int outfd, struct file *fp, offset_t *off) 641249643Smm#endif 642249643Smm{ 643249643Smm dsl_pool_t *dp; 644249643Smm dsl_dataset_t *ds; 645249643Smm int err; 646263410Sdelphij boolean_t owned = B_FALSE; 647249643Smm 648263410Sdelphij if (fromsnap != NULL && strpbrk(fromsnap, "@#") == NULL) 649249643Smm return (SET_ERROR(EINVAL)); 650249643Smm 651249643Smm err = dsl_pool_hold(tosnap, FTAG, &dp); 652249643Smm if (err != 0) 653249643Smm return (err); 654249643Smm 655263410Sdelphij if (strchr(tosnap, '@') == NULL && spa_writeable(dp->dp_spa)) { 656263410Sdelphij /* 657263410Sdelphij * We are sending a filesystem or volume. Ensure 658263410Sdelphij * that it doesn't change by owning the dataset. 659263410Sdelphij */ 660263410Sdelphij err = dsl_dataset_own(dp, tosnap, FTAG, &ds); 661263410Sdelphij owned = B_TRUE; 662263410Sdelphij } else { 663263410Sdelphij err = dsl_dataset_hold(dp, tosnap, FTAG, &ds); 664263410Sdelphij } 665249643Smm if (err != 0) { 666249643Smm dsl_pool_rele(dp, FTAG); 667249643Smm return (err); 668249643Smm } 669249643Smm 670249643Smm if (fromsnap != NULL) { 671263410Sdelphij zfs_bookmark_phys_t zb; 672263410Sdelphij boolean_t is_clone = B_FALSE; 673263410Sdelphij int fsnamelen = strchr(tosnap, '@') - tosnap; 674263410Sdelphij 675263410Sdelphij /* 676263410Sdelphij * If the fromsnap is in a different filesystem, then 677263410Sdelphij * mark the send stream as a clone. 678263410Sdelphij */ 679263410Sdelphij if (strncmp(tosnap, fromsnap, fsnamelen) != 0 || 680263410Sdelphij (fromsnap[fsnamelen] != '@' && 681263410Sdelphij fromsnap[fsnamelen] != '#')) { 682263410Sdelphij is_clone = B_TRUE; 683263410Sdelphij } 684263410Sdelphij 685263410Sdelphij if (strchr(fromsnap, '@')) { 686263410Sdelphij dsl_dataset_t *fromds; 687263410Sdelphij err = dsl_dataset_hold(dp, fromsnap, FTAG, &fromds); 688263410Sdelphij if (err == 0) { 689263410Sdelphij if (!dsl_dataset_is_before(ds, fromds, 0)) 690263410Sdelphij err = SET_ERROR(EXDEV); 691263410Sdelphij zb.zbm_creation_time = 692263410Sdelphij fromds->ds_phys->ds_creation_time; 693263410Sdelphij zb.zbm_creation_txg = 694263410Sdelphij fromds->ds_phys->ds_creation_txg; 695263410Sdelphij zb.zbm_guid = fromds->ds_phys->ds_guid; 696263410Sdelphij is_clone = (ds->ds_dir != fromds->ds_dir); 697263410Sdelphij dsl_dataset_rele(fromds, FTAG); 698263410Sdelphij } 699263410Sdelphij } else { 700263410Sdelphij err = dsl_bookmark_lookup(dp, fromsnap, ds, &zb); 701263410Sdelphij } 702249643Smm if (err != 0) { 703249643Smm dsl_dataset_rele(ds, FTAG); 704249643Smm dsl_pool_rele(dp, FTAG); 705249643Smm return (err); 706229578Smm } 707263410Sdelphij err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone, 708263410Sdelphij outfd, fp, off); 709263410Sdelphij } else { 710263410Sdelphij err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE, 711263410Sdelphij outfd, fp, off); 712229578Smm } 713263410Sdelphij if (owned) 714263410Sdelphij dsl_dataset_disown(ds, FTAG); 715263410Sdelphij else 716263410Sdelphij dsl_dataset_rele(ds, FTAG); 717263410Sdelphij return (err); 718249643Smm} 719229578Smm 720249643Smmint 721249643Smmdmu_send_estimate(dsl_dataset_t *ds, dsl_dataset_t *fromds, uint64_t *sizep) 722249643Smm{ 723249643Smm dsl_pool_t *dp = ds->ds_dir->dd_pool; 724249643Smm int err; 725249643Smm uint64_t size; 726249643Smm 727249643Smm ASSERT(dsl_pool_config_held(dp)); 728249643Smm 729249643Smm /* tosnap must be a snapshot */ 730249643Smm if (!dsl_dataset_is_snapshot(ds)) 731249643Smm return (SET_ERROR(EINVAL)); 732249643Smm 733249643Smm /* 734249643Smm * fromsnap must be an earlier snapshot from the same fs as tosnap, 735249643Smm * or the origin's fs. 736249643Smm */ 737263410Sdelphij if (fromds != NULL && !dsl_dataset_is_before(ds, fromds, 0)) 738249643Smm return (SET_ERROR(EXDEV)); 739249643Smm 740229578Smm /* Get uncompressed size estimate of changed data. */ 741229578Smm if (fromds == NULL) { 742229578Smm size = ds->ds_phys->ds_uncompressed_bytes; 743229578Smm } else { 744229578Smm uint64_t used, comp; 745229578Smm err = dsl_dataset_space_written(fromds, ds, 746229578Smm &used, &comp, &size); 747249643Smm if (err != 0) 748229578Smm return (err); 749229578Smm } 750229578Smm 751229578Smm /* 752229578Smm * Assume that space (both on-disk and in-stream) is dominated by 753229578Smm * data. We will adjust for indirect blocks and the copies property, 754229578Smm * but ignore per-object space used (eg, dnodes and DRR_OBJECT records). 755229578Smm */ 756229578Smm 757229578Smm /* 758229578Smm * Subtract out approximate space used by indirect blocks. 759229578Smm * Assume most space is used by data blocks (non-indirect, non-dnode). 760229578Smm * Assume all blocks are recordsize. Assume ditto blocks and 761229578Smm * internal fragmentation counter out compression. 762229578Smm * 763229578Smm * Therefore, space used by indirect blocks is sizeof(blkptr_t) per 764229578Smm * block, which we observe in practice. 765229578Smm */ 766229578Smm uint64_t recordsize; 767249643Smm err = dsl_prop_get_int_ds(ds, "recordsize", &recordsize); 768249643Smm if (err != 0) 769229578Smm return (err); 770229578Smm size -= size / recordsize * sizeof (blkptr_t); 771229578Smm 772229578Smm /* Add in the space for the record associated with each block. */ 773229578Smm size += size / recordsize * sizeof (dmu_replay_record_t); 774229578Smm 775229578Smm *sizep = size; 776229578Smm 777229578Smm return (0); 778229578Smm} 779229578Smm 780249643Smmtypedef struct dmu_recv_begin_arg { 781249643Smm const char *drba_origin; 782249643Smm dmu_recv_cookie_t *drba_cookie; 783249643Smm cred_t *drba_cred; 784262160Savg uint64_t drba_snapobj; 785249643Smm} dmu_recv_begin_arg_t; 786168404Spjd 787168404Spjdstatic int 788249643Smmrecv_begin_check_existing_impl(dmu_recv_begin_arg_t *drba, dsl_dataset_t *ds, 789249643Smm uint64_t fromguid) 790168404Spjd{ 791185029Spjd uint64_t val; 792249643Smm int error; 793249643Smm dsl_pool_t *dp = ds->ds_dir->dd_pool; 794185029Spjd 795249643Smm /* temporary clone name must not exist */ 796249643Smm error = zap_lookup(dp->dp_meta_objset, 797249643Smm ds->ds_dir->dd_phys->dd_child_dir_zapobj, recv_clone_name, 798249643Smm 8, 1, &val); 799249643Smm if (error != ENOENT) 800249643Smm return (error == 0 ? EBUSY : error); 801185029Spjd 802219089Spjd /* new snapshot name must not exist */ 803249643Smm error = zap_lookup(dp->dp_meta_objset, 804249643Smm ds->ds_phys->ds_snapnames_zapobj, drba->drba_cookie->drc_tosnap, 805249643Smm 8, 1, &val); 806249643Smm if (error != ENOENT) 807249643Smm return (error == 0 ? EEXIST : error); 808168404Spjd 809265754Sdelphij /* 810265754Sdelphij * Check snapshot limit before receiving. We'll recheck again at the 811265754Sdelphij * end, but might as well abort before receiving if we're already over 812265754Sdelphij * the limit. 813265754Sdelphij * 814265754Sdelphij * Note that we do not check the file system limit with 815265754Sdelphij * dsl_dir_fscount_check because the temporary %clones don't count 816265754Sdelphij * against that limit. 817265754Sdelphij */ 818265754Sdelphij error = dsl_fs_ss_limit_check(ds->ds_dir, 1, ZFS_PROP_SNAPSHOT_LIMIT, 819265754Sdelphij NULL, drba->drba_cred); 820265754Sdelphij if (error != 0) 821265754Sdelphij return (error); 822265754Sdelphij 823249643Smm if (fromguid != 0) { 824262160Savg dsl_dataset_t *snap; 825262160Savg uint64_t obj = ds->ds_phys->ds_prev_snap_obj; 826262160Savg 827262160Savg /* Find snapshot in this dir that matches fromguid. */ 828262160Savg while (obj != 0) { 829262160Savg error = dsl_dataset_hold_obj(dp, obj, FTAG, 830262160Savg &snap); 831262160Savg if (error != 0) 832262160Savg return (SET_ERROR(ENODEV)); 833262160Savg if (snap->ds_dir != ds->ds_dir) { 834262160Savg dsl_dataset_rele(snap, FTAG); 835262160Savg return (SET_ERROR(ENODEV)); 836262160Savg } 837262160Savg if (snap->ds_phys->ds_guid == fromguid) 838262160Savg break; 839262160Savg obj = snap->ds_phys->ds_prev_snap_obj; 840262160Savg dsl_dataset_rele(snap, FTAG); 841262160Savg } 842262160Savg if (obj == 0) 843249643Smm return (SET_ERROR(ENODEV)); 844168404Spjd 845262160Savg if (drba->drba_cookie->drc_force) { 846262160Savg drba->drba_snapobj = obj; 847262160Savg } else { 848262160Savg /* 849262160Savg * If we are not forcing, there must be no 850262160Savg * changes since fromsnap. 851262160Savg */ 852262160Savg if (dsl_dataset_modified_since_snap(ds, snap)) { 853219089Spjd dsl_dataset_rele(snap, FTAG); 854262160Savg return (SET_ERROR(ETXTBSY)); 855219089Spjd } 856262160Savg drba->drba_snapobj = ds->ds_prev->ds_object; 857219089Spjd } 858262160Savg 859262160Savg dsl_dataset_rele(snap, FTAG); 860219089Spjd } else { 861219089Spjd /* if full, most recent snapshot must be $ORIGIN */ 862219089Spjd if (ds->ds_phys->ds_prev_snap_txg >= TXG_INITIAL) 863249643Smm return (SET_ERROR(ENODEV)); 864262160Savg drba->drba_snapobj = ds->ds_phys->ds_prev_snap_obj; 865219089Spjd } 866219089Spjd 867249643Smm return (0); 868168404Spjd 869168404Spjd} 870168404Spjd 871249643Smmstatic int 872249643Smmdmu_recv_begin_check(void *arg, dmu_tx_t *tx) 873249643Smm{ 874249643Smm dmu_recv_begin_arg_t *drba = arg; 875249643Smm dsl_pool_t *dp = dmu_tx_pool(tx); 876249643Smm struct drr_begin *drrb = drba->drba_cookie->drc_drrb; 877249643Smm uint64_t fromguid = drrb->drr_fromguid; 878249643Smm int flags = drrb->drr_flags; 879249643Smm int error; 880249643Smm dsl_dataset_t *ds; 881249643Smm const char *tofs = drba->drba_cookie->drc_tofs; 882249643Smm 883249643Smm /* already checked */ 884249643Smm ASSERT3U(drrb->drr_magic, ==, DMU_BACKUP_MAGIC); 885249643Smm 886249643Smm if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) == 887249643Smm DMU_COMPOUNDSTREAM || 888249643Smm drrb->drr_type >= DMU_OST_NUMTYPES || 889249643Smm ((flags & DRR_FLAG_CLONE) && drba->drba_origin == NULL)) 890249643Smm return (SET_ERROR(EINVAL)); 891249643Smm 892249643Smm /* Verify pool version supports SA if SA_SPILL feature set */ 893249643Smm if ((DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) & 894249643Smm DMU_BACKUP_FEATURE_SA_SPILL) && 895249643Smm spa_version(dp->dp_spa) < SPA_VERSION_SA) { 896249643Smm return (SET_ERROR(ENOTSUP)); 897249643Smm } 898249643Smm 899249643Smm error = dsl_dataset_hold(dp, tofs, FTAG, &ds); 900249643Smm if (error == 0) { 901249643Smm /* target fs already exists; recv into temp clone */ 902249643Smm 903249643Smm /* Can't recv a clone into an existing fs */ 904249643Smm if (flags & DRR_FLAG_CLONE) { 905249643Smm dsl_dataset_rele(ds, FTAG); 906249643Smm return (SET_ERROR(EINVAL)); 907249643Smm } 908249643Smm 909249643Smm error = recv_begin_check_existing_impl(drba, ds, fromguid); 910249643Smm dsl_dataset_rele(ds, FTAG); 911249643Smm } else if (error == ENOENT) { 912249643Smm /* target fs does not exist; must be a full backup or clone */ 913249643Smm char buf[MAXNAMELEN]; 914249643Smm 915249643Smm /* 916249643Smm * If it's a non-clone incremental, we are missing the 917249643Smm * target fs, so fail the recv. 918249643Smm */ 919249643Smm if (fromguid != 0 && !(flags & DRR_FLAG_CLONE)) 920249643Smm return (SET_ERROR(ENOENT)); 921249643Smm 922249643Smm /* Open the parent of tofs */ 923249643Smm ASSERT3U(strlen(tofs), <, MAXNAMELEN); 924249643Smm (void) strlcpy(buf, tofs, strrchr(tofs, '/') - tofs + 1); 925249643Smm error = dsl_dataset_hold(dp, buf, FTAG, &ds); 926249643Smm if (error != 0) 927249643Smm return (error); 928249643Smm 929265754Sdelphij /* 930265754Sdelphij * Check filesystem and snapshot limits before receiving. We'll 931265754Sdelphij * recheck snapshot limits again at the end (we create the 932265754Sdelphij * filesystems and increment those counts during begin_sync). 933265754Sdelphij */ 934265754Sdelphij error = dsl_fs_ss_limit_check(ds->ds_dir, 1, 935265754Sdelphij ZFS_PROP_FILESYSTEM_LIMIT, NULL, drba->drba_cred); 936265754Sdelphij if (error != 0) { 937265754Sdelphij dsl_dataset_rele(ds, FTAG); 938265754Sdelphij return (error); 939265754Sdelphij } 940265754Sdelphij 941265754Sdelphij error = dsl_fs_ss_limit_check(ds->ds_dir, 1, 942265754Sdelphij ZFS_PROP_SNAPSHOT_LIMIT, NULL, drba->drba_cred); 943265754Sdelphij if (error != 0) { 944265754Sdelphij dsl_dataset_rele(ds, FTAG); 945265754Sdelphij return (error); 946265754Sdelphij } 947265754Sdelphij 948249643Smm if (drba->drba_origin != NULL) { 949249643Smm dsl_dataset_t *origin; 950249643Smm error = dsl_dataset_hold(dp, drba->drba_origin, 951249643Smm FTAG, &origin); 952249643Smm if (error != 0) { 953249643Smm dsl_dataset_rele(ds, FTAG); 954249643Smm return (error); 955249643Smm } 956249643Smm if (!dsl_dataset_is_snapshot(origin)) { 957249643Smm dsl_dataset_rele(origin, FTAG); 958249643Smm dsl_dataset_rele(ds, FTAG); 959249643Smm return (SET_ERROR(EINVAL)); 960249643Smm } 961249643Smm if (origin->ds_phys->ds_guid != fromguid) { 962249643Smm dsl_dataset_rele(origin, FTAG); 963249643Smm dsl_dataset_rele(ds, FTAG); 964249643Smm return (SET_ERROR(ENODEV)); 965249643Smm } 966249643Smm dsl_dataset_rele(origin, FTAG); 967249643Smm } 968249643Smm dsl_dataset_rele(ds, FTAG); 969249643Smm error = 0; 970249643Smm } 971249643Smm return (error); 972249643Smm} 973249643Smm 974168404Spjdstatic void 975249643Smmdmu_recv_begin_sync(void *arg, dmu_tx_t *tx) 976168404Spjd{ 977249643Smm dmu_recv_begin_arg_t *drba = arg; 978249643Smm dsl_pool_t *dp = dmu_tx_pool(tx); 979249643Smm struct drr_begin *drrb = drba->drba_cookie->drc_drrb; 980249643Smm const char *tofs = drba->drba_cookie->drc_tofs; 981249643Smm dsl_dataset_t *ds, *newds; 982185029Spjd uint64_t dsobj; 983249643Smm int error; 984249643Smm uint64_t crflags; 985168404Spjd 986249643Smm crflags = (drrb->drr_flags & DRR_FLAG_CI_DATA) ? 987249643Smm DS_FLAG_CI_DATASET : 0; 988168404Spjd 989249643Smm error = dsl_dataset_hold(dp, tofs, FTAG, &ds); 990249643Smm if (error == 0) { 991249643Smm /* create temporary clone */ 992262160Savg dsl_dataset_t *snap = NULL; 993262160Savg if (drba->drba_snapobj != 0) { 994262160Savg VERIFY0(dsl_dataset_hold_obj(dp, 995262160Savg drba->drba_snapobj, FTAG, &snap)); 996262160Savg } 997249643Smm dsobj = dsl_dataset_create_sync(ds->ds_dir, recv_clone_name, 998262160Savg snap, crflags, drba->drba_cred, tx); 999262160Savg dsl_dataset_rele(snap, FTAG); 1000249643Smm dsl_dataset_rele(ds, FTAG); 1001249643Smm } else { 1002249643Smm dsl_dir_t *dd; 1003249643Smm const char *tail; 1004249643Smm dsl_dataset_t *origin = NULL; 1005249643Smm 1006249643Smm VERIFY0(dsl_dir_hold(dp, tofs, FTAG, &dd, &tail)); 1007249643Smm 1008249643Smm if (drba->drba_origin != NULL) { 1009249643Smm VERIFY0(dsl_dataset_hold(dp, drba->drba_origin, 1010249643Smm FTAG, &origin)); 1011249643Smm } 1012249643Smm 1013249643Smm /* Create new dataset. */ 1014249643Smm dsobj = dsl_dataset_create_sync(dd, 1015249643Smm strrchr(tofs, '/') + 1, 1016249643Smm origin, crflags, drba->drba_cred, tx); 1017249643Smm if (origin != NULL) 1018249643Smm dsl_dataset_rele(origin, FTAG); 1019249643Smm dsl_dir_rele(dd, FTAG); 1020249643Smm drba->drba_cookie->drc_newfs = B_TRUE; 1021249643Smm } 1022249643Smm VERIFY0(dsl_dataset_own_obj(dp, dsobj, dmu_recv_tag, &newds)); 1023249643Smm 1024249643Smm dmu_buf_will_dirty(newds->ds_dbuf, tx); 1025249643Smm newds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; 1026249643Smm 1027219089Spjd /* 1028219089Spjd * If we actually created a non-clone, we need to create the 1029219089Spjd * objset in our new dataset. 1030219089Spjd */ 1031249643Smm if (BP_IS_HOLE(dsl_dataset_get_blkptr(newds))) { 1032219089Spjd (void) dmu_objset_create_impl(dp->dp_spa, 1033249643Smm newds, dsl_dataset_get_blkptr(newds), drrb->drr_type, tx); 1034219089Spjd } 1035168404Spjd 1036249643Smm drba->drba_cookie->drc_ds = newds; 1037185029Spjd 1038249643Smm spa_history_log_internal_ds(newds, "receive", tx, ""); 1039168404Spjd} 1040168404Spjd 1041185029Spjd/* 1042185029Spjd * NB: callers *MUST* call dmu_recv_stream() if dmu_recv_begin() 1043185029Spjd * succeeds; otherwise we will leak the holds on the datasets. 1044185029Spjd */ 1045185029Spjdint 1046249643Smmdmu_recv_begin(char *tofs, char *tosnap, struct drr_begin *drrb, 1047249643Smm boolean_t force, char *origin, dmu_recv_cookie_t *drc) 1048168404Spjd{ 1049249643Smm dmu_recv_begin_arg_t drba = { 0 }; 1050249643Smm dmu_replay_record_t *drr; 1051168404Spjd 1052185029Spjd bzero(drc, sizeof (dmu_recv_cookie_t)); 1053185029Spjd drc->drc_drrb = drrb; 1054185029Spjd drc->drc_tosnap = tosnap; 1055249643Smm drc->drc_tofs = tofs; 1056185029Spjd drc->drc_force = force; 1057265754Sdelphij drc->drc_cred = CRED(); 1058168404Spjd 1059249643Smm if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) 1060249643Smm drc->drc_byteswap = B_TRUE; 1061249643Smm else if (drrb->drr_magic != DMU_BACKUP_MAGIC) 1062249643Smm return (SET_ERROR(EINVAL)); 1063168404Spjd 1064249643Smm drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP); 1065249643Smm drr->drr_type = DRR_BEGIN; 1066249643Smm drr->drr_u.drr_begin = *drc->drc_drrb; 1067249643Smm if (drc->drc_byteswap) { 1068249643Smm fletcher_4_incremental_byteswap(drr, 1069249643Smm sizeof (dmu_replay_record_t), &drc->drc_cksum); 1070249643Smm } else { 1071249643Smm fletcher_4_incremental_native(drr, 1072249643Smm sizeof (dmu_replay_record_t), &drc->drc_cksum); 1073249643Smm } 1074249643Smm kmem_free(drr, sizeof (dmu_replay_record_t)); 1075219089Spjd 1076249643Smm if (drc->drc_byteswap) { 1077249643Smm drrb->drr_magic = BSWAP_64(drrb->drr_magic); 1078249643Smm drrb->drr_versioninfo = BSWAP_64(drrb->drr_versioninfo); 1079249643Smm drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time); 1080249643Smm drrb->drr_type = BSWAP_32(drrb->drr_type); 1081249643Smm drrb->drr_toguid = BSWAP_64(drrb->drr_toguid); 1082249643Smm drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid); 1083249643Smm } 1084168404Spjd 1085249643Smm drba.drba_origin = origin; 1086249643Smm drba.drba_cookie = drc; 1087249643Smm drba.drba_cred = CRED(); 1088219089Spjd 1089249643Smm return (dsl_sync_task(tofs, dmu_recv_begin_check, dmu_recv_begin_sync, 1090249643Smm &drba, 5)); 1091168404Spjd} 1092168404Spjd 1093185029Spjdstruct restorearg { 1094185029Spjd int err; 1095249643Smm boolean_t byteswap; 1096185029Spjd kthread_t *td; 1097185029Spjd struct file *fp; 1098185029Spjd char *buf; 1099185029Spjd uint64_t voff; 1100185029Spjd int bufsize; /* amount of memory allocated for buf */ 1101185029Spjd zio_cksum_t cksum; 1102219089Spjd avl_tree_t *guid_to_ds_map; 1103185029Spjd}; 1104185029Spjd 1105219089Spjdtypedef struct guid_map_entry { 1106219089Spjd uint64_t guid; 1107219089Spjd dsl_dataset_t *gme_ds; 1108219089Spjd avl_node_t avlnode; 1109219089Spjd} guid_map_entry_t; 1110219089Spjd 1111168404Spjdstatic int 1112219089Spjdguid_compare(const void *arg1, const void *arg2) 1113168404Spjd{ 1114219089Spjd const guid_map_entry_t *gmep1 = arg1; 1115219089Spjd const guid_map_entry_t *gmep2 = arg2; 1116219089Spjd 1117219089Spjd if (gmep1->guid < gmep2->guid) 1118219089Spjd return (-1); 1119219089Spjd else if (gmep1->guid > gmep2->guid) 1120219089Spjd return (1); 1121219089Spjd return (0); 1122219089Spjd} 1123219089Spjd 1124219089Spjdstatic void 1125219089Spjdfree_guid_map_onexit(void *arg) 1126219089Spjd{ 1127219089Spjd avl_tree_t *ca = arg; 1128219089Spjd void *cookie = NULL; 1129219089Spjd guid_map_entry_t *gmep; 1130219089Spjd 1131219089Spjd while ((gmep = avl_destroy_nodes(ca, &cookie)) != NULL) { 1132249643Smm dsl_dataset_long_rele(gmep->gme_ds, gmep); 1133249643Smm dsl_dataset_rele(gmep->gme_ds, gmep); 1134219089Spjd kmem_free(gmep, sizeof (guid_map_entry_t)); 1135219089Spjd } 1136219089Spjd avl_destroy(ca); 1137219089Spjd kmem_free(ca, sizeof (avl_tree_t)); 1138219089Spjd} 1139219089Spjd 1140219089Spjdstatic int 1141219089Spjdrestore_bytes(struct restorearg *ra, void *buf, int len, off_t off, ssize_t *resid) 1142219089Spjd{ 1143168404Spjd struct uio auio; 1144168404Spjd struct iovec aiov; 1145168404Spjd int error; 1146168404Spjd 1147168404Spjd aiov.iov_base = buf; 1148168404Spjd aiov.iov_len = len; 1149168404Spjd auio.uio_iov = &aiov; 1150168404Spjd auio.uio_iovcnt = 1; 1151168404Spjd auio.uio_resid = len; 1152169170Spjd auio.uio_segflg = UIO_SYSSPACE; 1153168404Spjd auio.uio_rw = UIO_READ; 1154168404Spjd auio.uio_offset = off; 1155168404Spjd auio.uio_td = ra->td; 1156168404Spjd#ifdef _KERNEL 1157168404Spjd error = fo_read(ra->fp, &auio, ra->td->td_ucred, FOF_OFFSET, ra->td); 1158168404Spjd#else 1159168404Spjd fprintf(stderr, "%s: returning EOPNOTSUPP\n", __func__); 1160168404Spjd error = EOPNOTSUPP; 1161168404Spjd#endif 1162168404Spjd *resid = auio.uio_resid; 1163168404Spjd return (error); 1164168404Spjd} 1165168404Spjd 1166168404Spjdstatic void * 1167168404Spjdrestore_read(struct restorearg *ra, int len) 1168168404Spjd{ 1169168404Spjd void *rv; 1170185029Spjd int done = 0; 1171168404Spjd 1172168404Spjd /* some things will require 8-byte alignment, so everything must */ 1173243674Smm ASSERT0(len % 8); 1174168404Spjd 1175185029Spjd while (done < len) { 1176219089Spjd ssize_t resid; 1177168404Spjd 1178185029Spjd ra->err = restore_bytes(ra, (caddr_t)ra->buf + done, 1179185029Spjd len - done, ra->voff, &resid); 1180168404Spjd 1181185029Spjd if (resid == len - done) 1182249643Smm ra->err = SET_ERROR(EINVAL); 1183185029Spjd ra->voff += len - done - resid; 1184185029Spjd done = len - resid; 1185249643Smm if (ra->err != 0) 1186168404Spjd return (NULL); 1187168404Spjd } 1188168404Spjd 1189185029Spjd ASSERT3U(done, ==, len); 1190185029Spjd rv = ra->buf; 1191168404Spjd if (ra->byteswap) 1192185029Spjd fletcher_4_incremental_byteswap(rv, len, &ra->cksum); 1193168404Spjd else 1194185029Spjd fletcher_4_incremental_native(rv, len, &ra->cksum); 1195168404Spjd return (rv); 1196168404Spjd} 1197168404Spjd 1198168404Spjdstatic void 1199168404Spjdbackup_byteswap(dmu_replay_record_t *drr) 1200168404Spjd{ 1201168404Spjd#define DO64(X) (drr->drr_u.X = BSWAP_64(drr->drr_u.X)) 1202168404Spjd#define DO32(X) (drr->drr_u.X = BSWAP_32(drr->drr_u.X)) 1203168404Spjd drr->drr_type = BSWAP_32(drr->drr_type); 1204185029Spjd drr->drr_payloadlen = BSWAP_32(drr->drr_payloadlen); 1205168404Spjd switch (drr->drr_type) { 1206168404Spjd case DRR_BEGIN: 1207168404Spjd DO64(drr_begin.drr_magic); 1208219089Spjd DO64(drr_begin.drr_versioninfo); 1209168404Spjd DO64(drr_begin.drr_creation_time); 1210168404Spjd DO32(drr_begin.drr_type); 1211185029Spjd DO32(drr_begin.drr_flags); 1212168404Spjd DO64(drr_begin.drr_toguid); 1213168404Spjd DO64(drr_begin.drr_fromguid); 1214168404Spjd break; 1215168404Spjd case DRR_OBJECT: 1216168404Spjd DO64(drr_object.drr_object); 1217168404Spjd /* DO64(drr_object.drr_allocation_txg); */ 1218168404Spjd DO32(drr_object.drr_type); 1219168404Spjd DO32(drr_object.drr_bonustype); 1220168404Spjd DO32(drr_object.drr_blksz); 1221168404Spjd DO32(drr_object.drr_bonuslen); 1222219089Spjd DO64(drr_object.drr_toguid); 1223168404Spjd break; 1224168404Spjd case DRR_FREEOBJECTS: 1225168404Spjd DO64(drr_freeobjects.drr_firstobj); 1226168404Spjd DO64(drr_freeobjects.drr_numobjs); 1227219089Spjd DO64(drr_freeobjects.drr_toguid); 1228168404Spjd break; 1229168404Spjd case DRR_WRITE: 1230168404Spjd DO64(drr_write.drr_object); 1231168404Spjd DO32(drr_write.drr_type); 1232168404Spjd DO64(drr_write.drr_offset); 1233168404Spjd DO64(drr_write.drr_length); 1234219089Spjd DO64(drr_write.drr_toguid); 1235219089Spjd DO64(drr_write.drr_key.ddk_cksum.zc_word[0]); 1236219089Spjd DO64(drr_write.drr_key.ddk_cksum.zc_word[1]); 1237219089Spjd DO64(drr_write.drr_key.ddk_cksum.zc_word[2]); 1238219089Spjd DO64(drr_write.drr_key.ddk_cksum.zc_word[3]); 1239219089Spjd DO64(drr_write.drr_key.ddk_prop); 1240168404Spjd break; 1241219089Spjd case DRR_WRITE_BYREF: 1242219089Spjd DO64(drr_write_byref.drr_object); 1243219089Spjd DO64(drr_write_byref.drr_offset); 1244219089Spjd DO64(drr_write_byref.drr_length); 1245219089Spjd DO64(drr_write_byref.drr_toguid); 1246219089Spjd DO64(drr_write_byref.drr_refguid); 1247219089Spjd DO64(drr_write_byref.drr_refobject); 1248219089Spjd DO64(drr_write_byref.drr_refoffset); 1249219089Spjd DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[0]); 1250219089Spjd DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[1]); 1251219089Spjd DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[2]); 1252219089Spjd DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[3]); 1253219089Spjd DO64(drr_write_byref.drr_key.ddk_prop); 1254219089Spjd break; 1255168404Spjd case DRR_FREE: 1256168404Spjd DO64(drr_free.drr_object); 1257168404Spjd DO64(drr_free.drr_offset); 1258168404Spjd DO64(drr_free.drr_length); 1259219089Spjd DO64(drr_free.drr_toguid); 1260168404Spjd break; 1261219089Spjd case DRR_SPILL: 1262219089Spjd DO64(drr_spill.drr_object); 1263219089Spjd DO64(drr_spill.drr_length); 1264219089Spjd DO64(drr_spill.drr_toguid); 1265219089Spjd break; 1266168404Spjd case DRR_END: 1267168404Spjd DO64(drr_end.drr_checksum.zc_word[0]); 1268168404Spjd DO64(drr_end.drr_checksum.zc_word[1]); 1269168404Spjd DO64(drr_end.drr_checksum.zc_word[2]); 1270168404Spjd DO64(drr_end.drr_checksum.zc_word[3]); 1271219089Spjd DO64(drr_end.drr_toguid); 1272168404Spjd break; 1273168404Spjd } 1274168404Spjd#undef DO64 1275168404Spjd#undef DO32 1276168404Spjd} 1277168404Spjd 1278168404Spjdstatic int 1279168404Spjdrestore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro) 1280168404Spjd{ 1281168404Spjd int err; 1282168404Spjd dmu_tx_t *tx; 1283200727Sdelphij void *data = NULL; 1284168404Spjd 1285168404Spjd if (drro->drr_type == DMU_OT_NONE || 1286243674Smm !DMU_OT_IS_VALID(drro->drr_type) || 1287243674Smm !DMU_OT_IS_VALID(drro->drr_bonustype) || 1288219089Spjd drro->drr_checksumtype >= ZIO_CHECKSUM_FUNCTIONS || 1289168404Spjd drro->drr_compress >= ZIO_COMPRESS_FUNCTIONS || 1290168404Spjd P2PHASE(drro->drr_blksz, SPA_MINBLOCKSIZE) || 1291168404Spjd drro->drr_blksz < SPA_MINBLOCKSIZE || 1292168404Spjd drro->drr_blksz > SPA_MAXBLOCKSIZE || 1293168404Spjd drro->drr_bonuslen > DN_MAX_BONUSLEN) { 1294249643Smm return (SET_ERROR(EINVAL)); 1295168404Spjd } 1296168404Spjd 1297200726Sdelphij err = dmu_object_info(os, drro->drr_object, NULL); 1298168404Spjd 1299200726Sdelphij if (err != 0 && err != ENOENT) 1300249643Smm return (SET_ERROR(EINVAL)); 1301200726Sdelphij 1302201756Sdelphij if (drro->drr_bonuslen) { 1303201756Sdelphij data = restore_read(ra, P2ROUNDUP(drro->drr_bonuslen, 8)); 1304249643Smm if (ra->err != 0) 1305201756Sdelphij return (ra->err); 1306201756Sdelphij } 1307201756Sdelphij 1308168404Spjd if (err == ENOENT) { 1309168404Spjd /* currently free, want to be allocated */ 1310200726Sdelphij tx = dmu_tx_create(os); 1311168404Spjd dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); 1312168404Spjd err = dmu_tx_assign(tx, TXG_WAIT); 1313249643Smm if (err != 0) { 1314168404Spjd dmu_tx_abort(tx); 1315168404Spjd return (err); 1316168404Spjd } 1317168404Spjd err = dmu_object_claim(os, drro->drr_object, 1318168404Spjd drro->drr_type, drro->drr_blksz, 1319168404Spjd drro->drr_bonustype, drro->drr_bonuslen, tx); 1320200726Sdelphij dmu_tx_commit(tx); 1321168404Spjd } else { 1322168404Spjd /* currently allocated, want to be allocated */ 1323168404Spjd err = dmu_object_reclaim(os, drro->drr_object, 1324168404Spjd drro->drr_type, drro->drr_blksz, 1325200726Sdelphij drro->drr_bonustype, drro->drr_bonuslen); 1326168404Spjd } 1327249643Smm if (err != 0) { 1328249643Smm return (SET_ERROR(EINVAL)); 1329219089Spjd } 1330200726Sdelphij 1331200726Sdelphij tx = dmu_tx_create(os); 1332200726Sdelphij dmu_tx_hold_bonus(tx, drro->drr_object); 1333200726Sdelphij err = dmu_tx_assign(tx, TXG_WAIT); 1334249643Smm if (err != 0) { 1335200726Sdelphij dmu_tx_abort(tx); 1336200726Sdelphij return (err); 1337168404Spjd } 1338168404Spjd 1339219089Spjd dmu_object_set_checksum(os, drro->drr_object, drro->drr_checksumtype, 1340219089Spjd tx); 1341168404Spjd dmu_object_set_compress(os, drro->drr_object, drro->drr_compress, tx); 1342168404Spjd 1343200727Sdelphij if (data != NULL) { 1344168404Spjd dmu_buf_t *db; 1345200727Sdelphij 1346168404Spjd VERIFY(0 == dmu_bonus_hold(os, drro->drr_object, FTAG, &db)); 1347168404Spjd dmu_buf_will_dirty(db, tx); 1348168404Spjd 1349185029Spjd ASSERT3U(db->db_size, >=, drro->drr_bonuslen); 1350185029Spjd bcopy(data, db->db_data, drro->drr_bonuslen); 1351168404Spjd if (ra->byteswap) { 1352243674Smm dmu_object_byteswap_t byteswap = 1353243674Smm DMU_OT_BYTESWAP(drro->drr_bonustype); 1354243674Smm dmu_ot_byteswap[byteswap].ob_func(db->db_data, 1355168404Spjd drro->drr_bonuslen); 1356168404Spjd } 1357168404Spjd dmu_buf_rele(db, FTAG); 1358168404Spjd } 1359168404Spjd dmu_tx_commit(tx); 1360168404Spjd return (0); 1361168404Spjd} 1362168404Spjd 1363168404Spjd/* ARGSUSED */ 1364168404Spjdstatic int 1365168404Spjdrestore_freeobjects(struct restorearg *ra, objset_t *os, 1366168404Spjd struct drr_freeobjects *drrfo) 1367168404Spjd{ 1368168404Spjd uint64_t obj; 1369168404Spjd 1370168404Spjd if (drrfo->drr_firstobj + drrfo->drr_numobjs < drrfo->drr_firstobj) 1371249643Smm return (SET_ERROR(EINVAL)); 1372168404Spjd 1373168404Spjd for (obj = drrfo->drr_firstobj; 1374168404Spjd obj < drrfo->drr_firstobj + drrfo->drr_numobjs; 1375168404Spjd (void) dmu_object_next(os, &obj, FALSE, 0)) { 1376168404Spjd int err; 1377168404Spjd 1378168404Spjd if (dmu_object_info(os, obj, NULL) != 0) 1379168404Spjd continue; 1380168404Spjd 1381260722Savg err = dmu_free_long_object(os, obj); 1382249643Smm if (err != 0) 1383168404Spjd return (err); 1384168404Spjd } 1385168404Spjd return (0); 1386168404Spjd} 1387168404Spjd 1388168404Spjdstatic int 1389168404Spjdrestore_write(struct restorearg *ra, objset_t *os, 1390168404Spjd struct drr_write *drrw) 1391168404Spjd{ 1392168404Spjd dmu_tx_t *tx; 1393168404Spjd void *data; 1394168404Spjd int err; 1395168404Spjd 1396168404Spjd if (drrw->drr_offset + drrw->drr_length < drrw->drr_offset || 1397243674Smm !DMU_OT_IS_VALID(drrw->drr_type)) 1398249643Smm return (SET_ERROR(EINVAL)); 1399168404Spjd 1400168404Spjd data = restore_read(ra, drrw->drr_length); 1401168404Spjd if (data == NULL) 1402168404Spjd return (ra->err); 1403168404Spjd 1404168404Spjd if (dmu_object_info(os, drrw->drr_object, NULL) != 0) 1405249643Smm return (SET_ERROR(EINVAL)); 1406168404Spjd 1407168404Spjd tx = dmu_tx_create(os); 1408168404Spjd 1409168404Spjd dmu_tx_hold_write(tx, drrw->drr_object, 1410168404Spjd drrw->drr_offset, drrw->drr_length); 1411168404Spjd err = dmu_tx_assign(tx, TXG_WAIT); 1412249643Smm if (err != 0) { 1413168404Spjd dmu_tx_abort(tx); 1414168404Spjd return (err); 1415168404Spjd } 1416243674Smm if (ra->byteswap) { 1417243674Smm dmu_object_byteswap_t byteswap = 1418243674Smm DMU_OT_BYTESWAP(drrw->drr_type); 1419243674Smm dmu_ot_byteswap[byteswap].ob_func(data, drrw->drr_length); 1420243674Smm } 1421168404Spjd dmu_write(os, drrw->drr_object, 1422168404Spjd drrw->drr_offset, drrw->drr_length, data, tx); 1423168404Spjd dmu_tx_commit(tx); 1424168404Spjd return (0); 1425168404Spjd} 1426168404Spjd 1427219089Spjd/* 1428219089Spjd * Handle a DRR_WRITE_BYREF record. This record is used in dedup'ed 1429219089Spjd * streams to refer to a copy of the data that is already on the 1430219089Spjd * system because it came in earlier in the stream. This function 1431219089Spjd * finds the earlier copy of the data, and uses that copy instead of 1432219089Spjd * data from the stream to fulfill this write. 1433219089Spjd */ 1434219089Spjdstatic int 1435219089Spjdrestore_write_byref(struct restorearg *ra, objset_t *os, 1436219089Spjd struct drr_write_byref *drrwbr) 1437219089Spjd{ 1438219089Spjd dmu_tx_t *tx; 1439219089Spjd int err; 1440219089Spjd guid_map_entry_t gmesrch; 1441219089Spjd guid_map_entry_t *gmep; 1442219089Spjd avl_index_t where; 1443219089Spjd objset_t *ref_os = NULL; 1444219089Spjd dmu_buf_t *dbp; 1445219089Spjd 1446219089Spjd if (drrwbr->drr_offset + drrwbr->drr_length < drrwbr->drr_offset) 1447249643Smm return (SET_ERROR(EINVAL)); 1448219089Spjd 1449219089Spjd /* 1450219089Spjd * If the GUID of the referenced dataset is different from the 1451219089Spjd * GUID of the target dataset, find the referenced dataset. 1452219089Spjd */ 1453219089Spjd if (drrwbr->drr_toguid != drrwbr->drr_refguid) { 1454219089Spjd gmesrch.guid = drrwbr->drr_refguid; 1455219089Spjd if ((gmep = avl_find(ra->guid_to_ds_map, &gmesrch, 1456219089Spjd &where)) == NULL) { 1457249643Smm return (SET_ERROR(EINVAL)); 1458219089Spjd } 1459219089Spjd if (dmu_objset_from_ds(gmep->gme_ds, &ref_os)) 1460249643Smm return (SET_ERROR(EINVAL)); 1461219089Spjd } else { 1462219089Spjd ref_os = os; 1463219089Spjd } 1464219089Spjd 1465219089Spjd if (err = dmu_buf_hold(ref_os, drrwbr->drr_refobject, 1466219089Spjd drrwbr->drr_refoffset, FTAG, &dbp, DMU_READ_PREFETCH)) 1467219089Spjd return (err); 1468219089Spjd 1469219089Spjd tx = dmu_tx_create(os); 1470219089Spjd 1471219089Spjd dmu_tx_hold_write(tx, drrwbr->drr_object, 1472219089Spjd drrwbr->drr_offset, drrwbr->drr_length); 1473219089Spjd err = dmu_tx_assign(tx, TXG_WAIT); 1474249643Smm if (err != 0) { 1475219089Spjd dmu_tx_abort(tx); 1476219089Spjd return (err); 1477219089Spjd } 1478219089Spjd dmu_write(os, drrwbr->drr_object, 1479219089Spjd drrwbr->drr_offset, drrwbr->drr_length, dbp->db_data, tx); 1480219089Spjd dmu_buf_rele(dbp, FTAG); 1481219089Spjd dmu_tx_commit(tx); 1482219089Spjd return (0); 1483219089Spjd} 1484219089Spjd 1485219089Spjdstatic int 1486219089Spjdrestore_spill(struct restorearg *ra, objset_t *os, struct drr_spill *drrs) 1487219089Spjd{ 1488219089Spjd dmu_tx_t *tx; 1489219089Spjd void *data; 1490219089Spjd dmu_buf_t *db, *db_spill; 1491219089Spjd int err; 1492219089Spjd 1493219089Spjd if (drrs->drr_length < SPA_MINBLOCKSIZE || 1494219089Spjd drrs->drr_length > SPA_MAXBLOCKSIZE) 1495249643Smm return (SET_ERROR(EINVAL)); 1496219089Spjd 1497219089Spjd data = restore_read(ra, drrs->drr_length); 1498219089Spjd if (data == NULL) 1499219089Spjd return (ra->err); 1500219089Spjd 1501219089Spjd if (dmu_object_info(os, drrs->drr_object, NULL) != 0) 1502249643Smm return (SET_ERROR(EINVAL)); 1503219089Spjd 1504219089Spjd VERIFY(0 == dmu_bonus_hold(os, drrs->drr_object, FTAG, &db)); 1505219089Spjd if ((err = dmu_spill_hold_by_bonus(db, FTAG, &db_spill)) != 0) { 1506219089Spjd dmu_buf_rele(db, FTAG); 1507219089Spjd return (err); 1508219089Spjd } 1509219089Spjd 1510219089Spjd tx = dmu_tx_create(os); 1511219089Spjd 1512219089Spjd dmu_tx_hold_spill(tx, db->db_object); 1513219089Spjd 1514219089Spjd err = dmu_tx_assign(tx, TXG_WAIT); 1515249643Smm if (err != 0) { 1516219089Spjd dmu_buf_rele(db, FTAG); 1517219089Spjd dmu_buf_rele(db_spill, FTAG); 1518219089Spjd dmu_tx_abort(tx); 1519219089Spjd return (err); 1520219089Spjd } 1521219089Spjd dmu_buf_will_dirty(db_spill, tx); 1522219089Spjd 1523219089Spjd if (db_spill->db_size < drrs->drr_length) 1524219089Spjd VERIFY(0 == dbuf_spill_set_blksz(db_spill, 1525219089Spjd drrs->drr_length, tx)); 1526219089Spjd bcopy(data, db_spill->db_data, drrs->drr_length); 1527219089Spjd 1528219089Spjd dmu_buf_rele(db, FTAG); 1529219089Spjd dmu_buf_rele(db_spill, FTAG); 1530219089Spjd 1531219089Spjd dmu_tx_commit(tx); 1532219089Spjd return (0); 1533219089Spjd} 1534219089Spjd 1535168404Spjd/* ARGSUSED */ 1536168404Spjdstatic int 1537168404Spjdrestore_free(struct restorearg *ra, objset_t *os, 1538168404Spjd struct drr_free *drrf) 1539168404Spjd{ 1540168404Spjd int err; 1541168404Spjd 1542168404Spjd if (drrf->drr_length != -1ULL && 1543168404Spjd drrf->drr_offset + drrf->drr_length < drrf->drr_offset) 1544249643Smm return (SET_ERROR(EINVAL)); 1545168404Spjd 1546168404Spjd if (dmu_object_info(os, drrf->drr_object, NULL) != 0) 1547249643Smm return (SET_ERROR(EINVAL)); 1548168404Spjd 1549185029Spjd err = dmu_free_long_range(os, drrf->drr_object, 1550168404Spjd drrf->drr_offset, drrf->drr_length); 1551168404Spjd return (err); 1552168404Spjd} 1553168404Spjd 1554249643Smm/* used to destroy the drc_ds on error */ 1555249643Smmstatic void 1556249643Smmdmu_recv_cleanup_ds(dmu_recv_cookie_t *drc) 1557249643Smm{ 1558249643Smm char name[MAXNAMELEN]; 1559249643Smm dsl_dataset_name(drc->drc_ds, name); 1560249643Smm dsl_dataset_disown(drc->drc_ds, dmu_recv_tag); 1561249643Smm (void) dsl_destroy_head(name); 1562249643Smm} 1563249643Smm 1564185029Spjd/* 1565185029Spjd * NB: callers *must* call dmu_recv_end() if this succeeds. 1566185029Spjd */ 1567168404Spjdint 1568219089Spjddmu_recv_stream(dmu_recv_cookie_t *drc, struct file *fp, offset_t *voffp, 1569219089Spjd int cleanup_fd, uint64_t *action_handlep) 1570168404Spjd{ 1571185029Spjd struct restorearg ra = { 0 }; 1572168404Spjd dmu_replay_record_t *drr; 1573185029Spjd objset_t *os; 1574185029Spjd zio_cksum_t pcksum; 1575219089Spjd int featureflags; 1576168404Spjd 1577249643Smm ra.byteswap = drc->drc_byteswap; 1578249643Smm ra.cksum = drc->drc_cksum; 1579219089Spjd ra.td = curthread; 1580185029Spjd ra.fp = fp; 1581185029Spjd ra.voff = *voffp; 1582185029Spjd ra.bufsize = 1<<20; 1583185029Spjd ra.buf = kmem_alloc(ra.bufsize, KM_SLEEP); 1584168404Spjd 1585185029Spjd /* these were verified in dmu_recv_begin */ 1586249643Smm ASSERT3U(DMU_GET_STREAM_HDRTYPE(drc->drc_drrb->drr_versioninfo), ==, 1587219089Spjd DMU_SUBSTREAM); 1588249643Smm ASSERT3U(drc->drc_drrb->drr_type, <, DMU_OST_NUMTYPES); 1589168404Spjd 1590168404Spjd /* 1591168404Spjd * Open the objset we are modifying. 1592168404Spjd */ 1593249643Smm VERIFY0(dmu_objset_from_ds(drc->drc_ds, &os)); 1594168404Spjd 1595249643Smm ASSERT(drc->drc_ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT); 1596168404Spjd 1597219089Spjd featureflags = DMU_GET_FEATUREFLAGS(drc->drc_drrb->drr_versioninfo); 1598219089Spjd 1599219089Spjd /* if this stream is dedup'ed, set up the avl tree for guid mapping */ 1600219089Spjd if (featureflags & DMU_BACKUP_FEATURE_DEDUP) { 1601219089Spjd minor_t minor; 1602219089Spjd 1603219089Spjd if (cleanup_fd == -1) { 1604249643Smm ra.err = SET_ERROR(EBADF); 1605219089Spjd goto out; 1606219089Spjd } 1607219089Spjd ra.err = zfs_onexit_fd_hold(cleanup_fd, &minor); 1608249643Smm if (ra.err != 0) { 1609219089Spjd cleanup_fd = -1; 1610219089Spjd goto out; 1611219089Spjd } 1612219089Spjd 1613219089Spjd if (*action_handlep == 0) { 1614219089Spjd ra.guid_to_ds_map = 1615219089Spjd kmem_alloc(sizeof (avl_tree_t), KM_SLEEP); 1616219089Spjd avl_create(ra.guid_to_ds_map, guid_compare, 1617219089Spjd sizeof (guid_map_entry_t), 1618219089Spjd offsetof(guid_map_entry_t, avlnode)); 1619219089Spjd ra.err = zfs_onexit_add_cb(minor, 1620219089Spjd free_guid_map_onexit, ra.guid_to_ds_map, 1621219089Spjd action_handlep); 1622249643Smm if (ra.err != 0) 1623219089Spjd goto out; 1624219089Spjd } else { 1625219089Spjd ra.err = zfs_onexit_cb_data(minor, *action_handlep, 1626219089Spjd (void **)&ra.guid_to_ds_map); 1627249643Smm if (ra.err != 0) 1628219089Spjd goto out; 1629219089Spjd } 1630221263Smm 1631221263Smm drc->drc_guid_to_ds_map = ra.guid_to_ds_map; 1632219089Spjd } 1633219089Spjd 1634168404Spjd /* 1635168404Spjd * Read records and process them. 1636168404Spjd */ 1637185029Spjd pcksum = ra.cksum; 1638168404Spjd while (ra.err == 0 && 1639168404Spjd NULL != (drr = restore_read(&ra, sizeof (*drr)))) { 1640185029Spjd if (issig(JUSTLOOKING) && issig(FORREAL)) { 1641249643Smm ra.err = SET_ERROR(EINTR); 1642168404Spjd goto out; 1643168404Spjd } 1644168404Spjd 1645168404Spjd if (ra.byteswap) 1646168404Spjd backup_byteswap(drr); 1647168404Spjd 1648168404Spjd switch (drr->drr_type) { 1649168404Spjd case DRR_OBJECT: 1650168404Spjd { 1651168404Spjd /* 1652168404Spjd * We need to make a copy of the record header, 1653168404Spjd * because restore_{object,write} may need to 1654168404Spjd * restore_read(), which will invalidate drr. 1655168404Spjd */ 1656168404Spjd struct drr_object drro = drr->drr_u.drr_object; 1657168404Spjd ra.err = restore_object(&ra, os, &drro); 1658168404Spjd break; 1659168404Spjd } 1660168404Spjd case DRR_FREEOBJECTS: 1661168404Spjd { 1662168404Spjd struct drr_freeobjects drrfo = 1663168404Spjd drr->drr_u.drr_freeobjects; 1664168404Spjd ra.err = restore_freeobjects(&ra, os, &drrfo); 1665168404Spjd break; 1666168404Spjd } 1667168404Spjd case DRR_WRITE: 1668168404Spjd { 1669168404Spjd struct drr_write drrw = drr->drr_u.drr_write; 1670168404Spjd ra.err = restore_write(&ra, os, &drrw); 1671168404Spjd break; 1672168404Spjd } 1673219089Spjd case DRR_WRITE_BYREF: 1674219089Spjd { 1675219089Spjd struct drr_write_byref drrwbr = 1676219089Spjd drr->drr_u.drr_write_byref; 1677219089Spjd ra.err = restore_write_byref(&ra, os, &drrwbr); 1678219089Spjd break; 1679219089Spjd } 1680168404Spjd case DRR_FREE: 1681168404Spjd { 1682168404Spjd struct drr_free drrf = drr->drr_u.drr_free; 1683168404Spjd ra.err = restore_free(&ra, os, &drrf); 1684168404Spjd break; 1685168404Spjd } 1686168404Spjd case DRR_END: 1687168404Spjd { 1688168404Spjd struct drr_end drre = drr->drr_u.drr_end; 1689168404Spjd /* 1690168404Spjd * We compare against the *previous* checksum 1691168404Spjd * value, because the stored checksum is of 1692168404Spjd * everything before the DRR_END record. 1693168404Spjd */ 1694185029Spjd if (!ZIO_CHECKSUM_EQUAL(drre.drr_checksum, pcksum)) 1695249643Smm ra.err = SET_ERROR(ECKSUM); 1696168404Spjd goto out; 1697168404Spjd } 1698219089Spjd case DRR_SPILL: 1699219089Spjd { 1700219089Spjd struct drr_spill drrs = drr->drr_u.drr_spill; 1701219089Spjd ra.err = restore_spill(&ra, os, &drrs); 1702219089Spjd break; 1703219089Spjd } 1704168404Spjd default: 1705249643Smm ra.err = SET_ERROR(EINVAL); 1706168404Spjd goto out; 1707168404Spjd } 1708185029Spjd pcksum = ra.cksum; 1709168404Spjd } 1710185029Spjd ASSERT(ra.err != 0); 1711168404Spjd 1712168404Spjdout: 1713219089Spjd if ((featureflags & DMU_BACKUP_FEATURE_DEDUP) && (cleanup_fd != -1)) 1714219089Spjd zfs_onexit_fd_rele(cleanup_fd); 1715168404Spjd 1716185029Spjd if (ra.err != 0) { 1717168404Spjd /* 1718219089Spjd * destroy what we created, so we don't leave it in the 1719219089Spjd * inconsistent restoring state. 1720168404Spjd */ 1721249643Smm dmu_recv_cleanup_ds(drc); 1722168404Spjd } 1723168404Spjd 1724168404Spjd kmem_free(ra.buf, ra.bufsize); 1725185029Spjd *voffp = ra.voff; 1726168404Spjd return (ra.err); 1727168404Spjd} 1728185029Spjd 1729185029Spjdstatic int 1730249643Smmdmu_recv_end_check(void *arg, dmu_tx_t *tx) 1731185029Spjd{ 1732249643Smm dmu_recv_cookie_t *drc = arg; 1733249643Smm dsl_pool_t *dp = dmu_tx_pool(tx); 1734249643Smm int error; 1735185029Spjd 1736249643Smm ASSERT3P(drc->drc_ds->ds_owner, ==, dmu_recv_tag); 1737249643Smm 1738249643Smm if (!drc->drc_newfs) { 1739249643Smm dsl_dataset_t *origin_head; 1740249643Smm 1741249643Smm error = dsl_dataset_hold(dp, drc->drc_tofs, FTAG, &origin_head); 1742249643Smm if (error != 0) 1743249643Smm return (error); 1744262160Savg if (drc->drc_force) { 1745262160Savg /* 1746262160Savg * We will destroy any snapshots in tofs (i.e. before 1747262160Savg * origin_head) that are after the origin (which is 1748262160Savg * the snap before drc_ds, because drc_ds can not 1749262160Savg * have any snaps of its own). 1750262160Savg */ 1751262160Savg uint64_t obj = origin_head->ds_phys->ds_prev_snap_obj; 1752262160Savg while (obj != drc->drc_ds->ds_phys->ds_prev_snap_obj) { 1753262160Savg dsl_dataset_t *snap; 1754262160Savg error = dsl_dataset_hold_obj(dp, obj, FTAG, 1755262160Savg &snap); 1756262160Savg if (error != 0) 1757262160Savg return (error); 1758262160Savg if (snap->ds_dir != origin_head->ds_dir) 1759262160Savg error = SET_ERROR(EINVAL); 1760262160Savg if (error == 0) { 1761262160Savg error = dsl_destroy_snapshot_check_impl( 1762262160Savg snap, B_FALSE); 1763262160Savg } 1764262160Savg obj = snap->ds_phys->ds_prev_snap_obj; 1765262160Savg dsl_dataset_rele(snap, FTAG); 1766262160Savg if (error != 0) 1767262160Savg return (error); 1768262160Savg } 1769262160Savg } 1770249643Smm error = dsl_dataset_clone_swap_check_impl(drc->drc_ds, 1771257119Sdelphij origin_head, drc->drc_force, drc->drc_owner, tx); 1772249643Smm if (error != 0) { 1773249643Smm dsl_dataset_rele(origin_head, FTAG); 1774249643Smm return (error); 1775249643Smm } 1776249643Smm error = dsl_dataset_snapshot_check_impl(origin_head, 1777265754Sdelphij drc->drc_tosnap, tx, B_TRUE, 1, drc->drc_cred); 1778249643Smm dsl_dataset_rele(origin_head, FTAG); 1779249643Smm if (error != 0) 1780249643Smm return (error); 1781249643Smm 1782249643Smm error = dsl_destroy_head_check_impl(drc->drc_ds, 1); 1783249643Smm } else { 1784249643Smm error = dsl_dataset_snapshot_check_impl(drc->drc_ds, 1785265754Sdelphij drc->drc_tosnap, tx, B_TRUE, 1, drc->drc_cred); 1786249643Smm } 1787249643Smm return (error); 1788185029Spjd} 1789185029Spjd 1790185029Spjdstatic void 1791249643Smmdmu_recv_end_sync(void *arg, dmu_tx_t *tx) 1792185029Spjd{ 1793249643Smm dmu_recv_cookie_t *drc = arg; 1794249643Smm dsl_pool_t *dp = dmu_tx_pool(tx); 1795185029Spjd 1796249643Smm spa_history_log_internal_ds(drc->drc_ds, "finish receiving", 1797249643Smm tx, "snap=%s", drc->drc_tosnap); 1798185029Spjd 1799249643Smm if (!drc->drc_newfs) { 1800249643Smm dsl_dataset_t *origin_head; 1801185029Spjd 1802249643Smm VERIFY0(dsl_dataset_hold(dp, drc->drc_tofs, FTAG, 1803249643Smm &origin_head)); 1804262160Savg 1805262160Savg if (drc->drc_force) { 1806262160Savg /* 1807262160Savg * Destroy any snapshots of drc_tofs (origin_head) 1808262160Savg * after the origin (the snap before drc_ds). 1809262160Savg */ 1810262160Savg uint64_t obj = origin_head->ds_phys->ds_prev_snap_obj; 1811262160Savg while (obj != drc->drc_ds->ds_phys->ds_prev_snap_obj) { 1812262160Savg dsl_dataset_t *snap; 1813262160Savg VERIFY0(dsl_dataset_hold_obj(dp, obj, FTAG, 1814262160Savg &snap)); 1815262160Savg ASSERT3P(snap->ds_dir, ==, origin_head->ds_dir); 1816262160Savg obj = snap->ds_phys->ds_prev_snap_obj; 1817262160Savg dsl_destroy_snapshot_sync_impl(snap, 1818262160Savg B_FALSE, tx); 1819262160Savg dsl_dataset_rele(snap, FTAG); 1820262160Savg } 1821262160Savg } 1822262160Savg VERIFY3P(drc->drc_ds->ds_prev, ==, 1823262160Savg origin_head->ds_prev); 1824262160Savg 1825249643Smm dsl_dataset_clone_swap_sync_impl(drc->drc_ds, 1826249643Smm origin_head, tx); 1827249643Smm dsl_dataset_snapshot_sync_impl(origin_head, 1828249643Smm drc->drc_tosnap, tx); 1829249643Smm 1830249643Smm /* set snapshot's creation time and guid */ 1831249643Smm dmu_buf_will_dirty(origin_head->ds_prev->ds_dbuf, tx); 1832249643Smm origin_head->ds_prev->ds_phys->ds_creation_time = 1833249643Smm drc->drc_drrb->drr_creation_time; 1834249643Smm origin_head->ds_prev->ds_phys->ds_guid = 1835249643Smm drc->drc_drrb->drr_toguid; 1836249643Smm origin_head->ds_prev->ds_phys->ds_flags &= 1837249643Smm ~DS_FLAG_INCONSISTENT; 1838249643Smm 1839249643Smm dmu_buf_will_dirty(origin_head->ds_dbuf, tx); 1840249643Smm origin_head->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT; 1841249643Smm 1842249643Smm dsl_dataset_rele(origin_head, FTAG); 1843249643Smm dsl_destroy_head_sync_impl(drc->drc_ds, tx); 1844257119Sdelphij 1845257119Sdelphij if (drc->drc_owner != NULL) 1846257119Sdelphij VERIFY3P(origin_head->ds_owner, ==, drc->drc_owner); 1847249643Smm } else { 1848249643Smm dsl_dataset_t *ds = drc->drc_ds; 1849249643Smm 1850249643Smm dsl_dataset_snapshot_sync_impl(ds, drc->drc_tosnap, tx); 1851249643Smm 1852249643Smm /* set snapshot's creation time and guid */ 1853249643Smm dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 1854249643Smm ds->ds_prev->ds_phys->ds_creation_time = 1855249643Smm drc->drc_drrb->drr_creation_time; 1856249643Smm ds->ds_prev->ds_phys->ds_guid = drc->drc_drrb->drr_toguid; 1857249643Smm ds->ds_prev->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT; 1858249643Smm 1859249643Smm dmu_buf_will_dirty(ds->ds_dbuf, tx); 1860249643Smm ds->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT; 1861249643Smm } 1862249643Smm drc->drc_newsnapobj = drc->drc_ds->ds_phys->ds_prev_snap_obj; 1863249643Smm /* 1864249643Smm * Release the hold from dmu_recv_begin. This must be done before 1865249643Smm * we return to open context, so that when we free the dataset's dnode, 1866249643Smm * we can evict its bonus buffer. 1867249643Smm */ 1868249643Smm dsl_dataset_disown(drc->drc_ds, dmu_recv_tag); 1869249643Smm drc->drc_ds = NULL; 1870185029Spjd} 1871185029Spjd 1872219089Spjdstatic int 1873249643Smmadd_ds_to_guidmap(const char *name, avl_tree_t *guid_map, uint64_t snapobj) 1874221263Smm{ 1875249643Smm dsl_pool_t *dp; 1876221263Smm dsl_dataset_t *snapds; 1877221263Smm guid_map_entry_t *gmep; 1878221263Smm int err; 1879221263Smm 1880221263Smm ASSERT(guid_map != NULL); 1881221263Smm 1882249643Smm err = dsl_pool_hold(name, FTAG, &dp); 1883249643Smm if (err != 0) 1884249643Smm return (err); 1885249643Smm gmep = kmem_alloc(sizeof (*gmep), KM_SLEEP); 1886249643Smm err = dsl_dataset_hold_obj(dp, snapobj, gmep, &snapds); 1887221263Smm if (err == 0) { 1888221263Smm gmep->guid = snapds->ds_phys->ds_guid; 1889221263Smm gmep->gme_ds = snapds; 1890221263Smm avl_add(guid_map, gmep); 1891249643Smm dsl_dataset_long_hold(snapds, gmep); 1892249643Smm } else 1893249643Smm kmem_free(gmep, sizeof (*gmep)); 1894221263Smm 1895249643Smm dsl_pool_rele(dp, FTAG); 1896221263Smm return (err); 1897221263Smm} 1898221263Smm 1899249643Smmstatic int dmu_recv_end_modified_blocks = 3; 1900249643Smm 1901221263Smmstatic int 1902219089Spjddmu_recv_existing_end(dmu_recv_cookie_t *drc) 1903185029Spjd{ 1904249643Smm int error; 1905249643Smm char name[MAXNAMELEN]; 1906185029Spjd 1907249643Smm#ifdef _KERNEL 1908249643Smm /* 1909249643Smm * We will be destroying the ds; make sure its origin is unmounted if 1910249643Smm * necessary. 1911249643Smm */ 1912249643Smm dsl_dataset_name(drc->drc_ds, name); 1913249643Smm zfs_destroy_unmount_origin(name); 1914249643Smm#endif 1915185029Spjd 1916249643Smm error = dsl_sync_task(drc->drc_tofs, 1917249643Smm dmu_recv_end_check, dmu_recv_end_sync, drc, 1918249643Smm dmu_recv_end_modified_blocks); 1919185029Spjd 1920249643Smm if (error != 0) 1921249643Smm dmu_recv_cleanup_ds(drc); 1922249643Smm return (error); 1923185029Spjd} 1924219089Spjd 1925219089Spjdstatic int 1926219089Spjddmu_recv_new_end(dmu_recv_cookie_t *drc) 1927219089Spjd{ 1928249643Smm int error; 1929219089Spjd 1930249643Smm error = dsl_sync_task(drc->drc_tofs, 1931249643Smm dmu_recv_end_check, dmu_recv_end_sync, drc, 1932249643Smm dmu_recv_end_modified_blocks); 1933219089Spjd 1934249643Smm if (error != 0) { 1935249643Smm dmu_recv_cleanup_ds(drc); 1936249643Smm } else if (drc->drc_guid_to_ds_map != NULL) { 1937249643Smm (void) add_ds_to_guidmap(drc->drc_tofs, 1938249643Smm drc->drc_guid_to_ds_map, 1939249643Smm drc->drc_newsnapobj); 1940219089Spjd } 1941249643Smm return (error); 1942219089Spjd} 1943219089Spjd 1944219089Spjdint 1945257119Sdelphijdmu_recv_end(dmu_recv_cookie_t *drc, void *owner) 1946219089Spjd{ 1947257119Sdelphij drc->drc_owner = owner; 1948257119Sdelphij 1949249643Smm if (drc->drc_newfs) 1950249643Smm return (dmu_recv_new_end(drc)); 1951249643Smm else 1952219089Spjd return (dmu_recv_existing_end(drc)); 1953219089Spjd} 1954260722Savg 1955260722Savg/* 1956260722Savg * Return TRUE if this objset is currently being received into. 1957260722Savg */ 1958260722Savgboolean_t 1959260722Savgdmu_objset_is_receiving(objset_t *os) 1960260722Savg{ 1961260722Savg return (os->os_dsl_dataset != NULL && 1962260722Savg os->os_dsl_dataset->ds_owner == dmu_recv_tag); 1963260722Savg} 1964