dmu_send.c revision 272601
1168404Spjd/* 2168404Spjd * CDDL HEADER START 3168404Spjd * 4168404Spjd * The contents of this file are subject to the terms of the 5168404Spjd * Common Development and Distribution License (the "License"). 6168404Spjd * You may not use this file except in compliance with the License. 7168404Spjd * 8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9168404Spjd * or http://www.opensolaris.org/os/licensing. 10168404Spjd * See the License for the specific language governing permissions 11168404Spjd * and limitations under the License. 12168404Spjd * 13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each 14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15168404Spjd * If applicable, add the following below this CDDL HEADER, with the 16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18168404Spjd * 19168404Spjd * CDDL HEADER END 20168404Spjd */ 21168404Spjd/* 22219089Spjd * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23221263Smm * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 24268123Sdelphij * Copyright (c) 2011, 2014 by Delphix. All rights reserved. 25264835Sdelphij * Copyright (c) 2014, Joyent, Inc. All rights reserved. 26235222Smm * Copyright (c) 2012, Martin Matuska <mm@FreeBSD.org>. All rights reserved. 27221263Smm */ 28168404Spjd 29168404Spjd#include <sys/dmu.h> 30168404Spjd#include <sys/dmu_impl.h> 31168404Spjd#include <sys/dmu_tx.h> 32168404Spjd#include <sys/dbuf.h> 33168404Spjd#include <sys/dnode.h> 34168404Spjd#include <sys/zfs_context.h> 35168404Spjd#include <sys/dmu_objset.h> 36168404Spjd#include <sys/dmu_traverse.h> 37168404Spjd#include <sys/dsl_dataset.h> 38168404Spjd#include <sys/dsl_dir.h> 39219089Spjd#include <sys/dsl_prop.h> 40168404Spjd#include <sys/dsl_pool.h> 41168404Spjd#include <sys/dsl_synctask.h> 42168404Spjd#include <sys/zfs_ioctl.h> 43168404Spjd#include <sys/zap.h> 44168404Spjd#include <sys/zio_checksum.h> 45219089Spjd#include <sys/zfs_znode.h> 46219089Spjd#include <zfs_fletcher.h> 47219089Spjd#include <sys/avl.h> 48219089Spjd#include <sys/ddt.h> 49219089Spjd#include <sys/zfs_onexit.h> 50248571Smm#include <sys/dmu_send.h> 51248571Smm#include <sys/dsl_destroy.h> 52268075Sdelphij#include <sys/blkptr.h> 53260183Sdelphij#include <sys/dsl_bookmark.h> 54268075Sdelphij#include <sys/zfeature.h> 55168404Spjd 56268075Sdelphij#ifdef __FreeBSD__ 57268075Sdelphij#undef dump_write 58268075Sdelphij#define dump_write dmu_dump_write 59268075Sdelphij#endif 60268075Sdelphij 61228103Smm/* Set this tunable to TRUE to replace corrupt data with 0x2f5baddb10c */ 62228103Smmint zfs_send_corrupt_data = B_FALSE; 63228103Smm 64185029Spjdstatic char *dmu_recv_tag = "dmu_recv_tag"; 65248571Smmstatic const char *recv_clone_name = "%recv"; 66185029Spjd 67168404Spjdstatic int 68235222Smmdump_bytes(dmu_sendarg_t *dsp, void *buf, int len) 69168404Spjd{ 70235222Smm dsl_dataset_t *ds = dsp->dsa_os->os_dsl_dataset; 71168404Spjd struct uio auio; 72168404Spjd struct iovec aiov; 73240415Smm ASSERT0(len % 8); 74168404Spjd 75235222Smm fletcher_4_incremental_native(buf, len, &dsp->dsa_zc); 76168404Spjd aiov.iov_base = buf; 77168404Spjd aiov.iov_len = len; 78168404Spjd auio.uio_iov = &aiov; 79168404Spjd auio.uio_iovcnt = 1; 80168404Spjd auio.uio_resid = len; 81169170Spjd auio.uio_segflg = UIO_SYSSPACE; 82168404Spjd auio.uio_rw = UIO_WRITE; 83168404Spjd auio.uio_offset = (off_t)-1; 84235222Smm auio.uio_td = dsp->dsa_td; 85168404Spjd#ifdef _KERNEL 86235222Smm if (dsp->dsa_fp->f_type == DTYPE_VNODE) 87168404Spjd bwillwrite(); 88235222Smm dsp->dsa_err = fo_write(dsp->dsa_fp, &auio, dsp->dsa_td->td_ucred, 0, 89235222Smm dsp->dsa_td); 90168404Spjd#else 91168404Spjd fprintf(stderr, "%s: returning EOPNOTSUPP\n", __func__); 92235222Smm dsp->dsa_err = EOPNOTSUPP; 93168404Spjd#endif 94235222Smm mutex_enter(&ds->ds_sendstream_lock); 95235222Smm *dsp->dsa_off += len; 96235222Smm mutex_exit(&ds->ds_sendstream_lock); 97235222Smm 98235222Smm return (dsp->dsa_err); 99168404Spjd} 100168404Spjd 101168404Spjdstatic int 102235222Smmdump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset, 103168404Spjd uint64_t length) 104168404Spjd{ 105235222Smm struct drr_free *drrf = &(dsp->dsa_drr->drr_u.drr_free); 106219089Spjd 107253821Sdelphij /* 108253821Sdelphij * When we receive a free record, dbuf_free_range() assumes 109253821Sdelphij * that the receiving system doesn't have any dbufs in the range 110253821Sdelphij * being freed. This is always true because there is a one-record 111253821Sdelphij * constraint: we only send one WRITE record for any given 112253821Sdelphij * object+offset. We know that the one-record constraint is 113253821Sdelphij * true because we always send data in increasing order by 114253821Sdelphij * object,offset. 115253821Sdelphij * 116253821Sdelphij * If the increasing-order constraint ever changes, we should find 117253821Sdelphij * another way to assert that the one-record constraint is still 118253821Sdelphij * satisfied. 119253821Sdelphij */ 120253821Sdelphij ASSERT(object > dsp->dsa_last_data_object || 121253821Sdelphij (object == dsp->dsa_last_data_object && 122253821Sdelphij offset > dsp->dsa_last_data_offset)); 123253821Sdelphij 124253821Sdelphij /* 125253821Sdelphij * If we are doing a non-incremental send, then there can't 126253821Sdelphij * be any data in the dataset we're receiving into. Therefore 127253821Sdelphij * a free record would simply be a no-op. Save space by not 128253821Sdelphij * sending it to begin with. 129253821Sdelphij */ 130253821Sdelphij if (!dsp->dsa_incremental) 131253821Sdelphij return (0); 132253821Sdelphij 133237458Smm if (length != -1ULL && offset + length < offset) 134237458Smm length = -1ULL; 135237458Smm 136219089Spjd /* 137219089Spjd * If there is a pending op, but it's not PENDING_FREE, push it out, 138219089Spjd * since free block aggregation can only be done for blocks of the 139219089Spjd * same type (i.e., DRR_FREE records can only be aggregated with 140219089Spjd * other DRR_FREE records. DRR_FREEOBJECTS records can only be 141219089Spjd * aggregated with other DRR_FREEOBJECTS records. 142219089Spjd */ 143235222Smm if (dsp->dsa_pending_op != PENDING_NONE && 144235222Smm dsp->dsa_pending_op != PENDING_FREE) { 145235222Smm if (dump_bytes(dsp, dsp->dsa_drr, 146235222Smm sizeof (dmu_replay_record_t)) != 0) 147249195Smm return (SET_ERROR(EINTR)); 148235222Smm dsp->dsa_pending_op = PENDING_NONE; 149219089Spjd } 150219089Spjd 151235222Smm if (dsp->dsa_pending_op == PENDING_FREE) { 152219089Spjd /* 153219089Spjd * There should never be a PENDING_FREE if length is -1 154219089Spjd * (because dump_dnode is the only place where this 155219089Spjd * function is called with a -1, and only after flushing 156219089Spjd * any pending record). 157219089Spjd */ 158219089Spjd ASSERT(length != -1ULL); 159219089Spjd /* 160219089Spjd * Check to see whether this free block can be aggregated 161219089Spjd * with pending one. 162219089Spjd */ 163219089Spjd if (drrf->drr_object == object && drrf->drr_offset + 164219089Spjd drrf->drr_length == offset) { 165219089Spjd drrf->drr_length += length; 166219089Spjd return (0); 167219089Spjd } else { 168219089Spjd /* not a continuation. Push out pending record */ 169235222Smm if (dump_bytes(dsp, dsp->dsa_drr, 170219089Spjd sizeof (dmu_replay_record_t)) != 0) 171249195Smm return (SET_ERROR(EINTR)); 172235222Smm dsp->dsa_pending_op = PENDING_NONE; 173219089Spjd } 174219089Spjd } 175219089Spjd /* create a FREE record and make it pending */ 176235222Smm bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t)); 177235222Smm dsp->dsa_drr->drr_type = DRR_FREE; 178219089Spjd drrf->drr_object = object; 179219089Spjd drrf->drr_offset = offset; 180219089Spjd drrf->drr_length = length; 181235222Smm drrf->drr_toguid = dsp->dsa_toguid; 182219089Spjd if (length == -1ULL) { 183235222Smm if (dump_bytes(dsp, dsp->dsa_drr, 184235222Smm sizeof (dmu_replay_record_t)) != 0) 185249195Smm return (SET_ERROR(EINTR)); 186219089Spjd } else { 187235222Smm dsp->dsa_pending_op = PENDING_FREE; 188219089Spjd } 189168404Spjd 190168404Spjd return (0); 191168404Spjd} 192168404Spjd 193168404Spjdstatic int 194268075Sdelphijdump_write(dmu_sendarg_t *dsp, dmu_object_type_t type, 195219089Spjd uint64_t object, uint64_t offset, int blksz, const blkptr_t *bp, void *data) 196168404Spjd{ 197235222Smm struct drr_write *drrw = &(dsp->dsa_drr->drr_u.drr_write); 198219089Spjd 199253821Sdelphij /* 200253821Sdelphij * We send data in increasing object, offset order. 201253821Sdelphij * See comment in dump_free() for details. 202253821Sdelphij */ 203253821Sdelphij ASSERT(object > dsp->dsa_last_data_object || 204253821Sdelphij (object == dsp->dsa_last_data_object && 205253821Sdelphij offset > dsp->dsa_last_data_offset)); 206253821Sdelphij dsp->dsa_last_data_object = object; 207253821Sdelphij dsp->dsa_last_data_offset = offset + blksz - 1; 208219089Spjd 209219089Spjd /* 210219089Spjd * If there is any kind of pending aggregation (currently either 211219089Spjd * a grouping of free objects or free blocks), push it out to 212219089Spjd * the stream, since aggregation can't be done across operations 213219089Spjd * of different types. 214219089Spjd */ 215235222Smm if (dsp->dsa_pending_op != PENDING_NONE) { 216235222Smm if (dump_bytes(dsp, dsp->dsa_drr, 217235222Smm sizeof (dmu_replay_record_t)) != 0) 218249195Smm return (SET_ERROR(EINTR)); 219235222Smm dsp->dsa_pending_op = PENDING_NONE; 220219089Spjd } 221168404Spjd /* write a DATA record */ 222235222Smm bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t)); 223235222Smm dsp->dsa_drr->drr_type = DRR_WRITE; 224219089Spjd drrw->drr_object = object; 225219089Spjd drrw->drr_type = type; 226219089Spjd drrw->drr_offset = offset; 227219089Spjd drrw->drr_length = blksz; 228235222Smm drrw->drr_toguid = dsp->dsa_toguid; 229268075Sdelphij if (BP_IS_EMBEDDED(bp)) { 230268075Sdelphij /* 231268075Sdelphij * There's no pre-computed checksum of embedded BP's, so 232268075Sdelphij * (like fletcher4-checkummed blocks) userland will have 233268075Sdelphij * to compute a dedup-capable checksum itself. 234268075Sdelphij */ 235268075Sdelphij drrw->drr_checksumtype = ZIO_CHECKSUM_OFF; 236268075Sdelphij } else { 237268075Sdelphij drrw->drr_checksumtype = BP_GET_CHECKSUM(bp); 238268075Sdelphij if (zio_checksum_table[drrw->drr_checksumtype].ci_dedup) 239268075Sdelphij drrw->drr_checksumflags |= DRR_CHECKSUM_DEDUP; 240268075Sdelphij DDK_SET_LSIZE(&drrw->drr_key, BP_GET_LSIZE(bp)); 241268075Sdelphij DDK_SET_PSIZE(&drrw->drr_key, BP_GET_PSIZE(bp)); 242268075Sdelphij DDK_SET_COMPRESS(&drrw->drr_key, BP_GET_COMPRESS(bp)); 243268075Sdelphij drrw->drr_key.ddk_cksum = bp->blk_cksum; 244268075Sdelphij } 245168404Spjd 246235222Smm if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0) 247249195Smm return (SET_ERROR(EINTR)); 248235222Smm if (dump_bytes(dsp, data, blksz) != 0) 249249195Smm return (SET_ERROR(EINTR)); 250219089Spjd return (0); 251219089Spjd} 252219089Spjd 253219089Spjdstatic int 254268075Sdelphijdump_write_embedded(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset, 255268075Sdelphij int blksz, const blkptr_t *bp) 256268075Sdelphij{ 257268075Sdelphij char buf[BPE_PAYLOAD_SIZE]; 258268075Sdelphij struct drr_write_embedded *drrw = 259268075Sdelphij &(dsp->dsa_drr->drr_u.drr_write_embedded); 260268075Sdelphij 261268075Sdelphij if (dsp->dsa_pending_op != PENDING_NONE) { 262268075Sdelphij if (dump_bytes(dsp, dsp->dsa_drr, 263268075Sdelphij sizeof (dmu_replay_record_t)) != 0) 264268075Sdelphij return (EINTR); 265268075Sdelphij dsp->dsa_pending_op = PENDING_NONE; 266268075Sdelphij } 267268075Sdelphij 268268075Sdelphij ASSERT(BP_IS_EMBEDDED(bp)); 269268075Sdelphij 270268075Sdelphij bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t)); 271268075Sdelphij dsp->dsa_drr->drr_type = DRR_WRITE_EMBEDDED; 272268075Sdelphij drrw->drr_object = object; 273268075Sdelphij drrw->drr_offset = offset; 274268075Sdelphij drrw->drr_length = blksz; 275268075Sdelphij drrw->drr_toguid = dsp->dsa_toguid; 276268075Sdelphij drrw->drr_compression = BP_GET_COMPRESS(bp); 277268075Sdelphij drrw->drr_etype = BPE_GET_ETYPE(bp); 278268075Sdelphij drrw->drr_lsize = BPE_GET_LSIZE(bp); 279268075Sdelphij drrw->drr_psize = BPE_GET_PSIZE(bp); 280268075Sdelphij 281268075Sdelphij decode_embedded_bp_compressed(bp, buf); 282268075Sdelphij 283268075Sdelphij if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0) 284268075Sdelphij return (EINTR); 285268075Sdelphij if (dump_bytes(dsp, buf, P2ROUNDUP(drrw->drr_psize, 8)) != 0) 286268075Sdelphij return (EINTR); 287268075Sdelphij return (0); 288268075Sdelphij} 289268075Sdelphij 290268075Sdelphijstatic int 291235222Smmdump_spill(dmu_sendarg_t *dsp, uint64_t object, int blksz, void *data) 292219089Spjd{ 293235222Smm struct drr_spill *drrs = &(dsp->dsa_drr->drr_u.drr_spill); 294219089Spjd 295235222Smm if (dsp->dsa_pending_op != PENDING_NONE) { 296235222Smm if (dump_bytes(dsp, dsp->dsa_drr, 297235222Smm sizeof (dmu_replay_record_t)) != 0) 298249195Smm return (SET_ERROR(EINTR)); 299235222Smm dsp->dsa_pending_op = PENDING_NONE; 300219089Spjd } 301219089Spjd 302219089Spjd /* write a SPILL record */ 303235222Smm bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t)); 304235222Smm dsp->dsa_drr->drr_type = DRR_SPILL; 305219089Spjd drrs->drr_object = object; 306219089Spjd drrs->drr_length = blksz; 307235222Smm drrs->drr_toguid = dsp->dsa_toguid; 308219089Spjd 309235222Smm if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t))) 310249195Smm return (SET_ERROR(EINTR)); 311235222Smm if (dump_bytes(dsp, data, blksz)) 312249195Smm return (SET_ERROR(EINTR)); 313168404Spjd return (0); 314168404Spjd} 315168404Spjd 316168404Spjdstatic int 317235222Smmdump_freeobjects(dmu_sendarg_t *dsp, uint64_t firstobj, uint64_t numobjs) 318168404Spjd{ 319235222Smm struct drr_freeobjects *drrfo = &(dsp->dsa_drr->drr_u.drr_freeobjects); 320219089Spjd 321253821Sdelphij /* See comment in dump_free(). */ 322253821Sdelphij if (!dsp->dsa_incremental) 323253821Sdelphij return (0); 324253821Sdelphij 325219089Spjd /* 326219089Spjd * If there is a pending op, but it's not PENDING_FREEOBJECTS, 327219089Spjd * push it out, since free block aggregation can only be done for 328219089Spjd * blocks of the same type (i.e., DRR_FREE records can only be 329219089Spjd * aggregated with other DRR_FREE records. DRR_FREEOBJECTS records 330219089Spjd * can only be aggregated with other DRR_FREEOBJECTS records. 331219089Spjd */ 332235222Smm if (dsp->dsa_pending_op != PENDING_NONE && 333235222Smm dsp->dsa_pending_op != PENDING_FREEOBJECTS) { 334235222Smm if (dump_bytes(dsp, dsp->dsa_drr, 335235222Smm sizeof (dmu_replay_record_t)) != 0) 336249195Smm return (SET_ERROR(EINTR)); 337235222Smm dsp->dsa_pending_op = PENDING_NONE; 338219089Spjd } 339235222Smm if (dsp->dsa_pending_op == PENDING_FREEOBJECTS) { 340219089Spjd /* 341219089Spjd * See whether this free object array can be aggregated 342219089Spjd * with pending one 343219089Spjd */ 344219089Spjd if (drrfo->drr_firstobj + drrfo->drr_numobjs == firstobj) { 345219089Spjd drrfo->drr_numobjs += numobjs; 346219089Spjd return (0); 347219089Spjd } else { 348219089Spjd /* can't be aggregated. Push out pending record */ 349235222Smm if (dump_bytes(dsp, dsp->dsa_drr, 350219089Spjd sizeof (dmu_replay_record_t)) != 0) 351249195Smm return (SET_ERROR(EINTR)); 352235222Smm dsp->dsa_pending_op = PENDING_NONE; 353219089Spjd } 354219089Spjd } 355219089Spjd 356168404Spjd /* write a FREEOBJECTS record */ 357235222Smm bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t)); 358235222Smm dsp->dsa_drr->drr_type = DRR_FREEOBJECTS; 359219089Spjd drrfo->drr_firstobj = firstobj; 360219089Spjd drrfo->drr_numobjs = numobjs; 361235222Smm drrfo->drr_toguid = dsp->dsa_toguid; 362168404Spjd 363235222Smm dsp->dsa_pending_op = PENDING_FREEOBJECTS; 364219089Spjd 365168404Spjd return (0); 366168404Spjd} 367168404Spjd 368168404Spjdstatic int 369235222Smmdump_dnode(dmu_sendarg_t *dsp, uint64_t object, dnode_phys_t *dnp) 370168404Spjd{ 371235222Smm struct drr_object *drro = &(dsp->dsa_drr->drr_u.drr_object); 372219089Spjd 373168404Spjd if (dnp == NULL || dnp->dn_type == DMU_OT_NONE) 374235222Smm return (dump_freeobjects(dsp, object, 1)); 375168404Spjd 376235222Smm if (dsp->dsa_pending_op != PENDING_NONE) { 377235222Smm if (dump_bytes(dsp, dsp->dsa_drr, 378235222Smm sizeof (dmu_replay_record_t)) != 0) 379249195Smm return (SET_ERROR(EINTR)); 380235222Smm dsp->dsa_pending_op = PENDING_NONE; 381219089Spjd } 382219089Spjd 383168404Spjd /* write an OBJECT record */ 384235222Smm bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t)); 385235222Smm dsp->dsa_drr->drr_type = DRR_OBJECT; 386219089Spjd drro->drr_object = object; 387219089Spjd drro->drr_type = dnp->dn_type; 388219089Spjd drro->drr_bonustype = dnp->dn_bonustype; 389219089Spjd drro->drr_blksz = dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT; 390219089Spjd drro->drr_bonuslen = dnp->dn_bonuslen; 391219089Spjd drro->drr_checksumtype = dnp->dn_checksum; 392219089Spjd drro->drr_compress = dnp->dn_compress; 393235222Smm drro->drr_toguid = dsp->dsa_toguid; 394168404Spjd 395235222Smm if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0) 396249195Smm return (SET_ERROR(EINTR)); 397168404Spjd 398235222Smm if (dump_bytes(dsp, DN_BONUS(dnp), P2ROUNDUP(dnp->dn_bonuslen, 8)) != 0) 399249195Smm return (SET_ERROR(EINTR)); 400168404Spjd 401253821Sdelphij /* Free anything past the end of the file. */ 402235222Smm if (dump_free(dsp, object, (dnp->dn_maxblkid + 1) * 403253821Sdelphij (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT), -1ULL) != 0) 404249195Smm return (SET_ERROR(EINTR)); 405248571Smm if (dsp->dsa_err != 0) 406249195Smm return (SET_ERROR(EINTR)); 407168404Spjd return (0); 408168404Spjd} 409168404Spjd 410268075Sdelphijstatic boolean_t 411268075Sdelphijbackup_do_embed(dmu_sendarg_t *dsp, const blkptr_t *bp) 412268075Sdelphij{ 413268075Sdelphij if (!BP_IS_EMBEDDED(bp)) 414268075Sdelphij return (B_FALSE); 415268075Sdelphij 416268075Sdelphij /* 417268075Sdelphij * Compression function must be legacy, or explicitly enabled. 418268075Sdelphij */ 419268075Sdelphij if ((BP_GET_COMPRESS(bp) >= ZIO_COMPRESS_LEGACY_FUNCTIONS && 420268075Sdelphij !(dsp->dsa_featureflags & DMU_BACKUP_FEATURE_EMBED_DATA_LZ4))) 421268075Sdelphij return (B_FALSE); 422268075Sdelphij 423268075Sdelphij /* 424268075Sdelphij * Embed type must be explicitly enabled. 425268075Sdelphij */ 426268075Sdelphij switch (BPE_GET_ETYPE(bp)) { 427268075Sdelphij case BP_EMBEDDED_TYPE_DATA: 428268075Sdelphij if (dsp->dsa_featureflags & DMU_BACKUP_FEATURE_EMBED_DATA) 429268075Sdelphij return (B_TRUE); 430268075Sdelphij break; 431268075Sdelphij default: 432268075Sdelphij return (B_FALSE); 433268075Sdelphij } 434268075Sdelphij return (B_FALSE); 435268075Sdelphij} 436268075Sdelphij 437168404Spjd#define BP_SPAN(dnp, level) \ 438168404Spjd (((uint64_t)dnp->dn_datablkszsec) << (SPA_MINBLOCKSHIFT + \ 439168404Spjd (level) * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) 440168404Spjd 441219089Spjd/* ARGSUSED */ 442168404Spjdstatic int 443246666Smmbackup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, 444268123Sdelphij const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg) 445168404Spjd{ 446235222Smm dmu_sendarg_t *dsp = arg; 447168404Spjd dmu_object_type_t type = bp ? BP_GET_TYPE(bp) : DMU_OT_NONE; 448168404Spjd int err = 0; 449168404Spjd 450185029Spjd if (issig(JUSTLOOKING) && issig(FORREAL)) 451249195Smm return (SET_ERROR(EINTR)); 452168404Spjd 453219089Spjd if (zb->zb_object != DMU_META_DNODE_OBJECT && 454219089Spjd DMU_OBJECT_IS_SPECIAL(zb->zb_object)) { 455209962Smm return (0); 456260183Sdelphij } else if (zb->zb_level == ZB_ZIL_LEVEL) { 457260183Sdelphij /* 458260183Sdelphij * If we are sending a non-snapshot (which is allowed on 459260183Sdelphij * read-only pools), it may have a ZIL, which must be ignored. 460260183Sdelphij */ 461260183Sdelphij return (0); 462260150Sdelphij } else if (BP_IS_HOLE(bp) && 463260150Sdelphij zb->zb_object == DMU_META_DNODE_OBJECT) { 464208047Smm uint64_t span = BP_SPAN(dnp, zb->zb_level); 465208047Smm uint64_t dnobj = (zb->zb_blkid * span) >> DNODE_SHIFT; 466235222Smm err = dump_freeobjects(dsp, dnobj, span >> DNODE_SHIFT); 467260150Sdelphij } else if (BP_IS_HOLE(bp)) { 468208047Smm uint64_t span = BP_SPAN(dnp, zb->zb_level); 469235222Smm err = dump_free(dsp, zb->zb_object, zb->zb_blkid * span, span); 470208047Smm } else if (zb->zb_level > 0 || type == DMU_OT_OBJSET) { 471208047Smm return (0); 472208047Smm } else if (type == DMU_OT_DNODE) { 473208047Smm dnode_phys_t *blk; 474168404Spjd int i; 475168404Spjd int blksz = BP_GET_LSIZE(bp); 476208047Smm uint32_t aflags = ARC_WAIT; 477208047Smm arc_buf_t *abuf; 478168404Spjd 479246666Smm if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf, 480246666Smm ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, 481246666Smm &aflags, zb) != 0) 482249195Smm return (SET_ERROR(EIO)); 483208047Smm 484208047Smm blk = abuf->b_data; 485168404Spjd for (i = 0; i < blksz >> DNODE_SHIFT; i++) { 486208047Smm uint64_t dnobj = (zb->zb_blkid << 487208047Smm (DNODE_BLOCK_SHIFT - DNODE_SHIFT)) + i; 488235222Smm err = dump_dnode(dsp, dnobj, blk+i); 489248571Smm if (err != 0) 490168404Spjd break; 491168404Spjd } 492208047Smm (void) arc_buf_remove_ref(abuf, &abuf); 493219089Spjd } else if (type == DMU_OT_SA) { 494208047Smm uint32_t aflags = ARC_WAIT; 495208047Smm arc_buf_t *abuf; 496168404Spjd int blksz = BP_GET_LSIZE(bp); 497168404Spjd 498246666Smm if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf, 499246666Smm ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, 500246666Smm &aflags, zb) != 0) 501249195Smm return (SET_ERROR(EIO)); 502168404Spjd 503235222Smm err = dump_spill(dsp, zb->zb_object, blksz, abuf->b_data); 504219089Spjd (void) arc_buf_remove_ref(abuf, &abuf); 505268075Sdelphij } else if (backup_do_embed(dsp, bp)) { 506268075Sdelphij /* it's an embedded level-0 block of a regular object */ 507268075Sdelphij int blksz = dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT; 508268075Sdelphij err = dump_write_embedded(dsp, zb->zb_object, 509268075Sdelphij zb->zb_blkid * blksz, blksz, bp); 510219089Spjd } else { /* it's a level-0 block of a regular object */ 511219089Spjd uint32_t aflags = ARC_WAIT; 512219089Spjd arc_buf_t *abuf; 513219089Spjd int blksz = BP_GET_LSIZE(bp); 514219089Spjd 515268075Sdelphij ASSERT3U(blksz, ==, dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT); 516260183Sdelphij ASSERT0(zb->zb_level); 517246666Smm if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf, 518246666Smm ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, 519246666Smm &aflags, zb) != 0) { 520228103Smm if (zfs_send_corrupt_data) { 521228103Smm /* Send a block filled with 0x"zfs badd bloc" */ 522228103Smm abuf = arc_buf_alloc(spa, blksz, &abuf, 523228103Smm ARC_BUFC_DATA); 524228103Smm uint64_t *ptr; 525228103Smm for (ptr = abuf->b_data; 526228103Smm (char *)ptr < (char *)abuf->b_data + blksz; 527228103Smm ptr++) 528228103Smm *ptr = 0x2f5baddb10c; 529228103Smm } else { 530249195Smm return (SET_ERROR(EIO)); 531228103Smm } 532228103Smm } 533219089Spjd 534268075Sdelphij err = dump_write(dsp, type, zb->zb_object, zb->zb_blkid * blksz, 535219089Spjd blksz, bp, abuf->b_data); 536208047Smm (void) arc_buf_remove_ref(abuf, &abuf); 537168404Spjd } 538168404Spjd 539168404Spjd ASSERT(err == 0 || err == EINTR); 540168404Spjd return (err); 541168404Spjd} 542168404Spjd 543248571Smm/* 544260183Sdelphij * Releases dp using the specified tag. 545248571Smm */ 546248571Smmstatic int 547248571Smmdmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *ds, 548268075Sdelphij zfs_bookmark_phys_t *fromzb, boolean_t is_clone, boolean_t embedok, 549248571Smm#ifdef illumos 550268075Sdelphij int outfd, vnode_t *vp, offset_t *off) 551248571Smm#else 552268075Sdelphij int outfd, struct file *fp, offset_t *off) 553248571Smm#endif 554168404Spjd{ 555248571Smm objset_t *os; 556168404Spjd dmu_replay_record_t *drr; 557235222Smm dmu_sendarg_t *dsp; 558168404Spjd int err; 559185029Spjd uint64_t fromtxg = 0; 560268075Sdelphij uint64_t featureflags = 0; 561168404Spjd 562248571Smm err = dmu_objset_from_ds(ds, &os); 563248571Smm if (err != 0) { 564248571Smm dsl_pool_rele(dp, tag); 565248571Smm return (err); 566185029Spjd } 567185029Spjd 568168404Spjd drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP); 569168404Spjd drr->drr_type = DRR_BEGIN; 570168404Spjd drr->drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC; 571219089Spjd DMU_SET_STREAM_HDRTYPE(drr->drr_u.drr_begin.drr_versioninfo, 572219089Spjd DMU_SUBSTREAM); 573219089Spjd 574219089Spjd#ifdef _KERNEL 575248571Smm if (dmu_objset_type(os) == DMU_OST_ZFS) { 576219089Spjd uint64_t version; 577248571Smm if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &version) != 0) { 578235222Smm kmem_free(drr, sizeof (dmu_replay_record_t)); 579248571Smm dsl_pool_rele(dp, tag); 580249195Smm return (SET_ERROR(EINVAL)); 581235222Smm } 582248571Smm if (version >= ZPL_VERSION_SA) { 583268075Sdelphij featureflags |= DMU_BACKUP_FEATURE_SA_SPILL; 584219089Spjd } 585219089Spjd } 586219089Spjd#endif 587219089Spjd 588268075Sdelphij if (embedok && 589268075Sdelphij spa_feature_is_active(dp->dp_spa, SPA_FEATURE_EMBEDDED_DATA)) { 590268075Sdelphij featureflags |= DMU_BACKUP_FEATURE_EMBED_DATA; 591268075Sdelphij if (spa_feature_is_active(dp->dp_spa, SPA_FEATURE_LZ4_COMPRESS)) 592268075Sdelphij featureflags |= DMU_BACKUP_FEATURE_EMBED_DATA_LZ4; 593268075Sdelphij } else { 594268075Sdelphij embedok = B_FALSE; 595268075Sdelphij } 596268075Sdelphij 597268075Sdelphij DMU_SET_FEATUREFLAGS(drr->drr_u.drr_begin.drr_versioninfo, 598268075Sdelphij featureflags); 599268075Sdelphij 600168404Spjd drr->drr_u.drr_begin.drr_creation_time = 601168404Spjd ds->ds_phys->ds_creation_time; 602248571Smm drr->drr_u.drr_begin.drr_type = dmu_objset_type(os); 603260183Sdelphij if (is_clone) 604185029Spjd drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CLONE; 605168404Spjd drr->drr_u.drr_begin.drr_toguid = ds->ds_phys->ds_guid; 606185029Spjd if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) 607185029Spjd drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CI_DATA; 608185029Spjd 609260183Sdelphij if (fromzb != NULL) { 610260183Sdelphij drr->drr_u.drr_begin.drr_fromguid = fromzb->zbm_guid; 611260183Sdelphij fromtxg = fromzb->zbm_creation_txg; 612260183Sdelphij } 613168404Spjd dsl_dataset_name(ds, drr->drr_u.drr_begin.drr_toname); 614260183Sdelphij if (!dsl_dataset_is_snapshot(ds)) { 615260183Sdelphij (void) strlcat(drr->drr_u.drr_begin.drr_toname, "@--head--", 616260183Sdelphij sizeof (drr->drr_u.drr_begin.drr_toname)); 617248571Smm } 618185029Spjd 619235222Smm dsp = kmem_zalloc(sizeof (dmu_sendarg_t), KM_SLEEP); 620168404Spjd 621235222Smm dsp->dsa_drr = drr; 622235222Smm dsp->dsa_outfd = outfd; 623235222Smm dsp->dsa_proc = curproc; 624235222Smm dsp->dsa_td = curthread; 625235222Smm dsp->dsa_fp = fp; 626248571Smm dsp->dsa_os = os; 627235222Smm dsp->dsa_off = off; 628235222Smm dsp->dsa_toguid = ds->ds_phys->ds_guid; 629235222Smm ZIO_SET_CHECKSUM(&dsp->dsa_zc, 0, 0, 0, 0); 630235222Smm dsp->dsa_pending_op = PENDING_NONE; 631260183Sdelphij dsp->dsa_incremental = (fromzb != NULL); 632268075Sdelphij dsp->dsa_featureflags = featureflags; 633235222Smm 634235222Smm mutex_enter(&ds->ds_sendstream_lock); 635235222Smm list_insert_head(&ds->ds_sendstreams, dsp); 636235222Smm mutex_exit(&ds->ds_sendstream_lock); 637235222Smm 638249042Smm dsl_dataset_long_hold(ds, FTAG); 639249042Smm dsl_pool_rele(dp, tag); 640249042Smm 641235222Smm if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) { 642235222Smm err = dsp->dsa_err; 643235222Smm goto out; 644168404Spjd } 645168404Spjd 646208047Smm err = traverse_dataset(ds, fromtxg, TRAVERSE_PRE | TRAVERSE_PREFETCH, 647235222Smm backup_cb, dsp); 648168404Spjd 649235222Smm if (dsp->dsa_pending_op != PENDING_NONE) 650235222Smm if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) 651249195Smm err = SET_ERROR(EINTR); 652219089Spjd 653248571Smm if (err != 0) { 654248571Smm if (err == EINTR && dsp->dsa_err != 0) 655235222Smm err = dsp->dsa_err; 656235222Smm goto out; 657168404Spjd } 658168404Spjd 659168404Spjd bzero(drr, sizeof (dmu_replay_record_t)); 660168404Spjd drr->drr_type = DRR_END; 661235222Smm drr->drr_u.drr_end.drr_checksum = dsp->dsa_zc; 662235222Smm drr->drr_u.drr_end.drr_toguid = dsp->dsa_toguid; 663168404Spjd 664235222Smm if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) { 665235222Smm err = dsp->dsa_err; 666235222Smm goto out; 667168404Spjd } 668168404Spjd 669235222Smmout: 670235222Smm mutex_enter(&ds->ds_sendstream_lock); 671235222Smm list_remove(&ds->ds_sendstreams, dsp); 672235222Smm mutex_exit(&ds->ds_sendstream_lock); 673235222Smm 674168404Spjd kmem_free(drr, sizeof (dmu_replay_record_t)); 675235222Smm kmem_free(dsp, sizeof (dmu_sendarg_t)); 676168404Spjd 677248571Smm dsl_dataset_long_rele(ds, FTAG); 678248571Smm 679235222Smm return (err); 680168404Spjd} 681168404Spjd 682228103Smmint 683248571Smmdmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap, 684248571Smm#ifdef illumos 685268075Sdelphij boolean_t embedok, int outfd, vnode_t *vp, offset_t *off) 686248571Smm#else 687268075Sdelphij boolean_t embedok, int outfd, struct file *fp, offset_t *off) 688248571Smm#endif 689228103Smm{ 690248571Smm dsl_pool_t *dp; 691248571Smm dsl_dataset_t *ds; 692248571Smm dsl_dataset_t *fromds = NULL; 693248571Smm int err; 694248571Smm 695248571Smm err = dsl_pool_hold(pool, FTAG, &dp); 696248571Smm if (err != 0) 697248571Smm return (err); 698248571Smm 699248571Smm err = dsl_dataset_hold_obj(dp, tosnap, FTAG, &ds); 700248571Smm if (err != 0) { 701248571Smm dsl_pool_rele(dp, FTAG); 702248571Smm return (err); 703248571Smm } 704248571Smm 705248571Smm if (fromsnap != 0) { 706260183Sdelphij zfs_bookmark_phys_t zb; 707260183Sdelphij boolean_t is_clone; 708260183Sdelphij 709248571Smm err = dsl_dataset_hold_obj(dp, fromsnap, FTAG, &fromds); 710248571Smm if (err != 0) { 711248571Smm dsl_dataset_rele(ds, FTAG); 712248571Smm dsl_pool_rele(dp, FTAG); 713248571Smm return (err); 714248571Smm } 715260183Sdelphij if (!dsl_dataset_is_before(ds, fromds, 0)) 716260183Sdelphij err = SET_ERROR(EXDEV); 717260183Sdelphij zb.zbm_creation_time = fromds->ds_phys->ds_creation_time; 718260183Sdelphij zb.zbm_creation_txg = fromds->ds_phys->ds_creation_txg; 719260183Sdelphij zb.zbm_guid = fromds->ds_phys->ds_guid; 720260183Sdelphij is_clone = (fromds->ds_dir != ds->ds_dir); 721260183Sdelphij dsl_dataset_rele(fromds, FTAG); 722268075Sdelphij err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone, embedok, 723260183Sdelphij outfd, fp, off); 724260183Sdelphij } else { 725268075Sdelphij err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE, embedok, 726260183Sdelphij outfd, fp, off); 727248571Smm } 728260183Sdelphij dsl_dataset_rele(ds, FTAG); 729260183Sdelphij return (err); 730248571Smm} 731248571Smm 732248571Smmint 733268075Sdelphijdmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok, 734248571Smm#ifdef illumos 735248571Smm int outfd, vnode_t *vp, offset_t *off) 736248571Smm#else 737248571Smm int outfd, struct file *fp, offset_t *off) 738248571Smm#endif 739248571Smm{ 740248571Smm dsl_pool_t *dp; 741248571Smm dsl_dataset_t *ds; 742248571Smm int err; 743260183Sdelphij boolean_t owned = B_FALSE; 744248571Smm 745260183Sdelphij if (fromsnap != NULL && strpbrk(fromsnap, "@#") == NULL) 746249195Smm return (SET_ERROR(EINVAL)); 747248571Smm 748248571Smm err = dsl_pool_hold(tosnap, FTAG, &dp); 749248571Smm if (err != 0) 750248571Smm return (err); 751248571Smm 752260183Sdelphij if (strchr(tosnap, '@') == NULL && spa_writeable(dp->dp_spa)) { 753260183Sdelphij /* 754260183Sdelphij * We are sending a filesystem or volume. Ensure 755260183Sdelphij * that it doesn't change by owning the dataset. 756260183Sdelphij */ 757260183Sdelphij err = dsl_dataset_own(dp, tosnap, FTAG, &ds); 758260183Sdelphij owned = B_TRUE; 759260183Sdelphij } else { 760260183Sdelphij err = dsl_dataset_hold(dp, tosnap, FTAG, &ds); 761260183Sdelphij } 762248571Smm if (err != 0) { 763248571Smm dsl_pool_rele(dp, FTAG); 764248571Smm return (err); 765248571Smm } 766248571Smm 767248571Smm if (fromsnap != NULL) { 768260183Sdelphij zfs_bookmark_phys_t zb; 769260183Sdelphij boolean_t is_clone = B_FALSE; 770260183Sdelphij int fsnamelen = strchr(tosnap, '@') - tosnap; 771260183Sdelphij 772260183Sdelphij /* 773260183Sdelphij * If the fromsnap is in a different filesystem, then 774260183Sdelphij * mark the send stream as a clone. 775260183Sdelphij */ 776260183Sdelphij if (strncmp(tosnap, fromsnap, fsnamelen) != 0 || 777260183Sdelphij (fromsnap[fsnamelen] != '@' && 778260183Sdelphij fromsnap[fsnamelen] != '#')) { 779260183Sdelphij is_clone = B_TRUE; 780260183Sdelphij } 781260183Sdelphij 782260183Sdelphij if (strchr(fromsnap, '@')) { 783260183Sdelphij dsl_dataset_t *fromds; 784260183Sdelphij err = dsl_dataset_hold(dp, fromsnap, FTAG, &fromds); 785260183Sdelphij if (err == 0) { 786260183Sdelphij if (!dsl_dataset_is_before(ds, fromds, 0)) 787260183Sdelphij err = SET_ERROR(EXDEV); 788260183Sdelphij zb.zbm_creation_time = 789260183Sdelphij fromds->ds_phys->ds_creation_time; 790260183Sdelphij zb.zbm_creation_txg = 791260183Sdelphij fromds->ds_phys->ds_creation_txg; 792260183Sdelphij zb.zbm_guid = fromds->ds_phys->ds_guid; 793260183Sdelphij is_clone = (ds->ds_dir != fromds->ds_dir); 794260183Sdelphij dsl_dataset_rele(fromds, FTAG); 795260183Sdelphij } 796260183Sdelphij } else { 797260183Sdelphij err = dsl_bookmark_lookup(dp, fromsnap, ds, &zb); 798260183Sdelphij } 799248571Smm if (err != 0) { 800248571Smm dsl_dataset_rele(ds, FTAG); 801248571Smm dsl_pool_rele(dp, FTAG); 802248571Smm return (err); 803248571Smm } 804268075Sdelphij err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone, embedok, 805260183Sdelphij outfd, fp, off); 806260183Sdelphij } else { 807268075Sdelphij err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE, embedok, 808260183Sdelphij outfd, fp, off); 809248571Smm } 810260183Sdelphij if (owned) 811260183Sdelphij dsl_dataset_disown(ds, FTAG); 812260183Sdelphij else 813260183Sdelphij dsl_dataset_rele(ds, FTAG); 814260183Sdelphij return (err); 815248571Smm} 816248571Smm 817248571Smmint 818248571Smmdmu_send_estimate(dsl_dataset_t *ds, dsl_dataset_t *fromds, uint64_t *sizep) 819248571Smm{ 820228103Smm dsl_pool_t *dp = ds->ds_dir->dd_pool; 821228103Smm int err; 822228103Smm uint64_t size; 823228103Smm 824248571Smm ASSERT(dsl_pool_config_held(dp)); 825248571Smm 826228103Smm /* tosnap must be a snapshot */ 827248571Smm if (!dsl_dataset_is_snapshot(ds)) 828249195Smm return (SET_ERROR(EINVAL)); 829228103Smm 830248571Smm /* 831248571Smm * fromsnap must be an earlier snapshot from the same fs as tosnap, 832248571Smm * or the origin's fs. 833248571Smm */ 834260183Sdelphij if (fromds != NULL && !dsl_dataset_is_before(ds, fromds, 0)) 835249195Smm return (SET_ERROR(EXDEV)); 836228103Smm 837228103Smm /* Get uncompressed size estimate of changed data. */ 838228103Smm if (fromds == NULL) { 839228103Smm size = ds->ds_phys->ds_uncompressed_bytes; 840228103Smm } else { 841228103Smm uint64_t used, comp; 842228103Smm err = dsl_dataset_space_written(fromds, ds, 843228103Smm &used, &comp, &size); 844248571Smm if (err != 0) 845228103Smm return (err); 846228103Smm } 847228103Smm 848228103Smm /* 849228103Smm * Assume that space (both on-disk and in-stream) is dominated by 850228103Smm * data. We will adjust for indirect blocks and the copies property, 851228103Smm * but ignore per-object space used (eg, dnodes and DRR_OBJECT records). 852228103Smm */ 853228103Smm 854228103Smm /* 855228103Smm * Subtract out approximate space used by indirect blocks. 856228103Smm * Assume most space is used by data blocks (non-indirect, non-dnode). 857228103Smm * Assume all blocks are recordsize. Assume ditto blocks and 858228103Smm * internal fragmentation counter out compression. 859228103Smm * 860228103Smm * Therefore, space used by indirect blocks is sizeof(blkptr_t) per 861228103Smm * block, which we observe in practice. 862228103Smm */ 863228103Smm uint64_t recordsize; 864248571Smm err = dsl_prop_get_int_ds(ds, "recordsize", &recordsize); 865248571Smm if (err != 0) 866228103Smm return (err); 867228103Smm size -= size / recordsize * sizeof (blkptr_t); 868228103Smm 869228103Smm /* Add in the space for the record associated with each block. */ 870228103Smm size += size / recordsize * sizeof (dmu_replay_record_t); 871228103Smm 872228103Smm *sizep = size; 873228103Smm 874228103Smm return (0); 875228103Smm} 876228103Smm 877248571Smmtypedef struct dmu_recv_begin_arg { 878248571Smm const char *drba_origin; 879248571Smm dmu_recv_cookie_t *drba_cookie; 880248571Smm cred_t *drba_cred; 881253820Sdelphij uint64_t drba_snapobj; 882248571Smm} dmu_recv_begin_arg_t; 883168404Spjd 884168404Spjdstatic int 885248571Smmrecv_begin_check_existing_impl(dmu_recv_begin_arg_t *drba, dsl_dataset_t *ds, 886248571Smm uint64_t fromguid) 887168404Spjd{ 888185029Spjd uint64_t val; 889248571Smm int error; 890248571Smm dsl_pool_t *dp = ds->ds_dir->dd_pool; 891185029Spjd 892248571Smm /* temporary clone name must not exist */ 893248571Smm error = zap_lookup(dp->dp_meta_objset, 894248571Smm ds->ds_dir->dd_phys->dd_child_dir_zapobj, recv_clone_name, 895248571Smm 8, 1, &val); 896248571Smm if (error != ENOENT) 897248571Smm return (error == 0 ? EBUSY : error); 898248571Smm 899219089Spjd /* new snapshot name must not exist */ 900248571Smm error = zap_lookup(dp->dp_meta_objset, 901248571Smm ds->ds_phys->ds_snapnames_zapobj, drba->drba_cookie->drc_tosnap, 902248571Smm 8, 1, &val); 903248571Smm if (error != ENOENT) 904248571Smm return (error == 0 ? EEXIST : error); 905168404Spjd 906264835Sdelphij /* 907264835Sdelphij * Check snapshot limit before receiving. We'll recheck again at the 908264835Sdelphij * end, but might as well abort before receiving if we're already over 909264835Sdelphij * the limit. 910264835Sdelphij * 911264835Sdelphij * Note that we do not check the file system limit with 912264835Sdelphij * dsl_dir_fscount_check because the temporary %clones don't count 913264835Sdelphij * against that limit. 914264835Sdelphij */ 915264835Sdelphij error = dsl_fs_ss_limit_check(ds->ds_dir, 1, ZFS_PROP_SNAPSHOT_LIMIT, 916264835Sdelphij NULL, drba->drba_cred); 917264835Sdelphij if (error != 0) 918264835Sdelphij return (error); 919264835Sdelphij 920248571Smm if (fromguid != 0) { 921253820Sdelphij dsl_dataset_t *snap; 922253820Sdelphij uint64_t obj = ds->ds_phys->ds_prev_snap_obj; 923253820Sdelphij 924253820Sdelphij /* Find snapshot in this dir that matches fromguid. */ 925253820Sdelphij while (obj != 0) { 926253820Sdelphij error = dsl_dataset_hold_obj(dp, obj, FTAG, 927253820Sdelphij &snap); 928253820Sdelphij if (error != 0) 929253820Sdelphij return (SET_ERROR(ENODEV)); 930253820Sdelphij if (snap->ds_dir != ds->ds_dir) { 931253820Sdelphij dsl_dataset_rele(snap, FTAG); 932253820Sdelphij return (SET_ERROR(ENODEV)); 933253820Sdelphij } 934253820Sdelphij if (snap->ds_phys->ds_guid == fromguid) 935253820Sdelphij break; 936253820Sdelphij obj = snap->ds_phys->ds_prev_snap_obj; 937253820Sdelphij dsl_dataset_rele(snap, FTAG); 938253820Sdelphij } 939253820Sdelphij if (obj == 0) 940249195Smm return (SET_ERROR(ENODEV)); 941168404Spjd 942253820Sdelphij if (drba->drba_cookie->drc_force) { 943253820Sdelphij drba->drba_snapobj = obj; 944253820Sdelphij } else { 945253820Sdelphij /* 946253820Sdelphij * If we are not forcing, there must be no 947253820Sdelphij * changes since fromsnap. 948253820Sdelphij */ 949253820Sdelphij if (dsl_dataset_modified_since_snap(ds, snap)) { 950219089Spjd dsl_dataset_rele(snap, FTAG); 951253820Sdelphij return (SET_ERROR(ETXTBSY)); 952219089Spjd } 953253820Sdelphij drba->drba_snapobj = ds->ds_prev->ds_object; 954219089Spjd } 955253820Sdelphij 956253820Sdelphij dsl_dataset_rele(snap, FTAG); 957219089Spjd } else { 958219089Spjd /* if full, most recent snapshot must be $ORIGIN */ 959219089Spjd if (ds->ds_phys->ds_prev_snap_txg >= TXG_INITIAL) 960249195Smm return (SET_ERROR(ENODEV)); 961253820Sdelphij drba->drba_snapobj = ds->ds_phys->ds_prev_snap_obj; 962219089Spjd } 963219089Spjd 964248571Smm return (0); 965168404Spjd 966168404Spjd} 967168404Spjd 968248571Smmstatic int 969248571Smmdmu_recv_begin_check(void *arg, dmu_tx_t *tx) 970248571Smm{ 971248571Smm dmu_recv_begin_arg_t *drba = arg; 972248571Smm dsl_pool_t *dp = dmu_tx_pool(tx); 973248571Smm struct drr_begin *drrb = drba->drba_cookie->drc_drrb; 974248571Smm uint64_t fromguid = drrb->drr_fromguid; 975248571Smm int flags = drrb->drr_flags; 976248571Smm int error; 977268075Sdelphij uint64_t featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo); 978248571Smm dsl_dataset_t *ds; 979248571Smm const char *tofs = drba->drba_cookie->drc_tofs; 980248571Smm 981248571Smm /* already checked */ 982248571Smm ASSERT3U(drrb->drr_magic, ==, DMU_BACKUP_MAGIC); 983248571Smm 984248571Smm if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) == 985248571Smm DMU_COMPOUNDSTREAM || 986248571Smm drrb->drr_type >= DMU_OST_NUMTYPES || 987248571Smm ((flags & DRR_FLAG_CLONE) && drba->drba_origin == NULL)) 988249195Smm return (SET_ERROR(EINVAL)); 989248571Smm 990248571Smm /* Verify pool version supports SA if SA_SPILL feature set */ 991268075Sdelphij if ((featureflags & DMU_BACKUP_FEATURE_SA_SPILL) && 992268075Sdelphij spa_version(dp->dp_spa) < SPA_VERSION_SA) 993249195Smm return (SET_ERROR(ENOTSUP)); 994248571Smm 995268075Sdelphij /* 996268075Sdelphij * The receiving code doesn't know how to translate a WRITE_EMBEDDED 997268075Sdelphij * record to a plan WRITE record, so the pool must have the 998268075Sdelphij * EMBEDDED_DATA feature enabled if the stream has WRITE_EMBEDDED 999268075Sdelphij * records. Same with WRITE_EMBEDDED records that use LZ4 compression. 1000268075Sdelphij */ 1001268075Sdelphij if ((featureflags & DMU_BACKUP_FEATURE_EMBED_DATA) && 1002268075Sdelphij !spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_EMBEDDED_DATA)) 1003268075Sdelphij return (SET_ERROR(ENOTSUP)); 1004268075Sdelphij if ((featureflags & DMU_BACKUP_FEATURE_EMBED_DATA_LZ4) && 1005268075Sdelphij !spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_LZ4_COMPRESS)) 1006268075Sdelphij return (SET_ERROR(ENOTSUP)); 1007268075Sdelphij 1008248571Smm error = dsl_dataset_hold(dp, tofs, FTAG, &ds); 1009248571Smm if (error == 0) { 1010248571Smm /* target fs already exists; recv into temp clone */ 1011248571Smm 1012248571Smm /* Can't recv a clone into an existing fs */ 1013248571Smm if (flags & DRR_FLAG_CLONE) { 1014248571Smm dsl_dataset_rele(ds, FTAG); 1015249195Smm return (SET_ERROR(EINVAL)); 1016248571Smm } 1017248571Smm 1018248571Smm error = recv_begin_check_existing_impl(drba, ds, fromguid); 1019248571Smm dsl_dataset_rele(ds, FTAG); 1020248571Smm } else if (error == ENOENT) { 1021248571Smm /* target fs does not exist; must be a full backup or clone */ 1022248571Smm char buf[MAXNAMELEN]; 1023248571Smm 1024248571Smm /* 1025248571Smm * If it's a non-clone incremental, we are missing the 1026248571Smm * target fs, so fail the recv. 1027248571Smm */ 1028248571Smm if (fromguid != 0 && !(flags & DRR_FLAG_CLONE)) 1029249195Smm return (SET_ERROR(ENOENT)); 1030248571Smm 1031248571Smm /* Open the parent of tofs */ 1032248571Smm ASSERT3U(strlen(tofs), <, MAXNAMELEN); 1033248571Smm (void) strlcpy(buf, tofs, strrchr(tofs, '/') - tofs + 1); 1034248571Smm error = dsl_dataset_hold(dp, buf, FTAG, &ds); 1035248571Smm if (error != 0) 1036248571Smm return (error); 1037248571Smm 1038264835Sdelphij /* 1039264835Sdelphij * Check filesystem and snapshot limits before receiving. We'll 1040264835Sdelphij * recheck snapshot limits again at the end (we create the 1041264835Sdelphij * filesystems and increment those counts during begin_sync). 1042264835Sdelphij */ 1043264835Sdelphij error = dsl_fs_ss_limit_check(ds->ds_dir, 1, 1044264835Sdelphij ZFS_PROP_FILESYSTEM_LIMIT, NULL, drba->drba_cred); 1045264835Sdelphij if (error != 0) { 1046264835Sdelphij dsl_dataset_rele(ds, FTAG); 1047264835Sdelphij return (error); 1048264835Sdelphij } 1049264835Sdelphij 1050264835Sdelphij error = dsl_fs_ss_limit_check(ds->ds_dir, 1, 1051264835Sdelphij ZFS_PROP_SNAPSHOT_LIMIT, NULL, drba->drba_cred); 1052264835Sdelphij if (error != 0) { 1053264835Sdelphij dsl_dataset_rele(ds, FTAG); 1054264835Sdelphij return (error); 1055264835Sdelphij } 1056264835Sdelphij 1057248571Smm if (drba->drba_origin != NULL) { 1058248571Smm dsl_dataset_t *origin; 1059248571Smm error = dsl_dataset_hold(dp, drba->drba_origin, 1060248571Smm FTAG, &origin); 1061248571Smm if (error != 0) { 1062248571Smm dsl_dataset_rele(ds, FTAG); 1063248571Smm return (error); 1064248571Smm } 1065248571Smm if (!dsl_dataset_is_snapshot(origin)) { 1066248571Smm dsl_dataset_rele(origin, FTAG); 1067248571Smm dsl_dataset_rele(ds, FTAG); 1068249195Smm return (SET_ERROR(EINVAL)); 1069248571Smm } 1070248571Smm if (origin->ds_phys->ds_guid != fromguid) { 1071248571Smm dsl_dataset_rele(origin, FTAG); 1072248571Smm dsl_dataset_rele(ds, FTAG); 1073249195Smm return (SET_ERROR(ENODEV)); 1074248571Smm } 1075248571Smm dsl_dataset_rele(origin, FTAG); 1076248571Smm } 1077248571Smm dsl_dataset_rele(ds, FTAG); 1078248571Smm error = 0; 1079248571Smm } 1080248571Smm return (error); 1081248571Smm} 1082248571Smm 1083168404Spjdstatic void 1084248571Smmdmu_recv_begin_sync(void *arg, dmu_tx_t *tx) 1085168404Spjd{ 1086248571Smm dmu_recv_begin_arg_t *drba = arg; 1087248571Smm dsl_pool_t *dp = dmu_tx_pool(tx); 1088248571Smm struct drr_begin *drrb = drba->drba_cookie->drc_drrb; 1089248571Smm const char *tofs = drba->drba_cookie->drc_tofs; 1090248571Smm dsl_dataset_t *ds, *newds; 1091185029Spjd uint64_t dsobj; 1092248571Smm int error; 1093248571Smm uint64_t crflags; 1094168404Spjd 1095248571Smm crflags = (drrb->drr_flags & DRR_FLAG_CI_DATA) ? 1096248571Smm DS_FLAG_CI_DATASET : 0; 1097168404Spjd 1098248571Smm error = dsl_dataset_hold(dp, tofs, FTAG, &ds); 1099248571Smm if (error == 0) { 1100248571Smm /* create temporary clone */ 1101253820Sdelphij dsl_dataset_t *snap = NULL; 1102253820Sdelphij if (drba->drba_snapobj != 0) { 1103253820Sdelphij VERIFY0(dsl_dataset_hold_obj(dp, 1104253820Sdelphij drba->drba_snapobj, FTAG, &snap)); 1105253820Sdelphij } 1106248571Smm dsobj = dsl_dataset_create_sync(ds->ds_dir, recv_clone_name, 1107253820Sdelphij snap, crflags, drba->drba_cred, tx); 1108253820Sdelphij dsl_dataset_rele(snap, FTAG); 1109248571Smm dsl_dataset_rele(ds, FTAG); 1110248571Smm } else { 1111248571Smm dsl_dir_t *dd; 1112248571Smm const char *tail; 1113248571Smm dsl_dataset_t *origin = NULL; 1114248571Smm 1115248571Smm VERIFY0(dsl_dir_hold(dp, tofs, FTAG, &dd, &tail)); 1116248571Smm 1117248571Smm if (drba->drba_origin != NULL) { 1118248571Smm VERIFY0(dsl_dataset_hold(dp, drba->drba_origin, 1119248571Smm FTAG, &origin)); 1120248571Smm } 1121248571Smm 1122248571Smm /* Create new dataset. */ 1123248571Smm dsobj = dsl_dataset_create_sync(dd, 1124248571Smm strrchr(tofs, '/') + 1, 1125248571Smm origin, crflags, drba->drba_cred, tx); 1126248571Smm if (origin != NULL) 1127248571Smm dsl_dataset_rele(origin, FTAG); 1128248571Smm dsl_dir_rele(dd, FTAG); 1129248571Smm drba->drba_cookie->drc_newfs = B_TRUE; 1130248571Smm } 1131248571Smm VERIFY0(dsl_dataset_own_obj(dp, dsobj, dmu_recv_tag, &newds)); 1132248571Smm 1133248571Smm dmu_buf_will_dirty(newds->ds_dbuf, tx); 1134248571Smm newds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; 1135248571Smm 1136219089Spjd /* 1137219089Spjd * If we actually created a non-clone, we need to create the 1138219089Spjd * objset in our new dataset. 1139219089Spjd */ 1140248571Smm if (BP_IS_HOLE(dsl_dataset_get_blkptr(newds))) { 1141219089Spjd (void) dmu_objset_create_impl(dp->dp_spa, 1142248571Smm newds, dsl_dataset_get_blkptr(newds), drrb->drr_type, tx); 1143219089Spjd } 1144168404Spjd 1145248571Smm drba->drba_cookie->drc_ds = newds; 1146185029Spjd 1147248571Smm spa_history_log_internal_ds(newds, "receive", tx, ""); 1148168404Spjd} 1149168404Spjd 1150185029Spjd/* 1151185029Spjd * NB: callers *MUST* call dmu_recv_stream() if dmu_recv_begin() 1152185029Spjd * succeeds; otherwise we will leak the holds on the datasets. 1153185029Spjd */ 1154185029Spjdint 1155248571Smmdmu_recv_begin(char *tofs, char *tosnap, struct drr_begin *drrb, 1156248571Smm boolean_t force, char *origin, dmu_recv_cookie_t *drc) 1157168404Spjd{ 1158248571Smm dmu_recv_begin_arg_t drba = { 0 }; 1159248571Smm dmu_replay_record_t *drr; 1160168404Spjd 1161185029Spjd bzero(drc, sizeof (dmu_recv_cookie_t)); 1162185029Spjd drc->drc_drrb = drrb; 1163185029Spjd drc->drc_tosnap = tosnap; 1164248571Smm drc->drc_tofs = tofs; 1165185029Spjd drc->drc_force = force; 1166264835Sdelphij drc->drc_cred = CRED(); 1167168404Spjd 1168248571Smm if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) 1169248571Smm drc->drc_byteswap = B_TRUE; 1170248571Smm else if (drrb->drr_magic != DMU_BACKUP_MAGIC) 1171249195Smm return (SET_ERROR(EINVAL)); 1172168404Spjd 1173248571Smm drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP); 1174248571Smm drr->drr_type = DRR_BEGIN; 1175248571Smm drr->drr_u.drr_begin = *drc->drc_drrb; 1176248571Smm if (drc->drc_byteswap) { 1177248571Smm fletcher_4_incremental_byteswap(drr, 1178248571Smm sizeof (dmu_replay_record_t), &drc->drc_cksum); 1179248571Smm } else { 1180248571Smm fletcher_4_incremental_native(drr, 1181248571Smm sizeof (dmu_replay_record_t), &drc->drc_cksum); 1182248571Smm } 1183248571Smm kmem_free(drr, sizeof (dmu_replay_record_t)); 1184219089Spjd 1185248571Smm if (drc->drc_byteswap) { 1186248571Smm drrb->drr_magic = BSWAP_64(drrb->drr_magic); 1187248571Smm drrb->drr_versioninfo = BSWAP_64(drrb->drr_versioninfo); 1188248571Smm drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time); 1189248571Smm drrb->drr_type = BSWAP_32(drrb->drr_type); 1190248571Smm drrb->drr_toguid = BSWAP_64(drrb->drr_toguid); 1191248571Smm drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid); 1192248571Smm } 1193168404Spjd 1194248571Smm drba.drba_origin = origin; 1195248571Smm drba.drba_cookie = drc; 1196248571Smm drba.drba_cred = CRED(); 1197219089Spjd 1198248571Smm return (dsl_sync_task(tofs, dmu_recv_begin_check, dmu_recv_begin_sync, 1199268473Sdelphij &drba, 5, ZFS_SPACE_CHECK_NORMAL)); 1200168404Spjd} 1201168404Spjd 1202185029Spjdstruct restorearg { 1203185029Spjd int err; 1204248571Smm boolean_t byteswap; 1205185029Spjd kthread_t *td; 1206185029Spjd struct file *fp; 1207185029Spjd char *buf; 1208185029Spjd uint64_t voff; 1209185029Spjd int bufsize; /* amount of memory allocated for buf */ 1210185029Spjd zio_cksum_t cksum; 1211219089Spjd avl_tree_t *guid_to_ds_map; 1212185029Spjd}; 1213185029Spjd 1214219089Spjdtypedef struct guid_map_entry { 1215219089Spjd uint64_t guid; 1216219089Spjd dsl_dataset_t *gme_ds; 1217219089Spjd avl_node_t avlnode; 1218219089Spjd} guid_map_entry_t; 1219219089Spjd 1220168404Spjdstatic int 1221219089Spjdguid_compare(const void *arg1, const void *arg2) 1222168404Spjd{ 1223219089Spjd const guid_map_entry_t *gmep1 = arg1; 1224219089Spjd const guid_map_entry_t *gmep2 = arg2; 1225219089Spjd 1226219089Spjd if (gmep1->guid < gmep2->guid) 1227219089Spjd return (-1); 1228219089Spjd else if (gmep1->guid > gmep2->guid) 1229219089Spjd return (1); 1230219089Spjd return (0); 1231219089Spjd} 1232219089Spjd 1233219089Spjdstatic void 1234219089Spjdfree_guid_map_onexit(void *arg) 1235219089Spjd{ 1236219089Spjd avl_tree_t *ca = arg; 1237219089Spjd void *cookie = NULL; 1238219089Spjd guid_map_entry_t *gmep; 1239219089Spjd 1240219089Spjd while ((gmep = avl_destroy_nodes(ca, &cookie)) != NULL) { 1241248571Smm dsl_dataset_long_rele(gmep->gme_ds, gmep); 1242249196Smm dsl_dataset_rele(gmep->gme_ds, gmep); 1243219089Spjd kmem_free(gmep, sizeof (guid_map_entry_t)); 1244219089Spjd } 1245219089Spjd avl_destroy(ca); 1246219089Spjd kmem_free(ca, sizeof (avl_tree_t)); 1247219089Spjd} 1248219089Spjd 1249219089Spjdstatic int 1250219089Spjdrestore_bytes(struct restorearg *ra, void *buf, int len, off_t off, ssize_t *resid) 1251219089Spjd{ 1252168404Spjd struct uio auio; 1253168404Spjd struct iovec aiov; 1254168404Spjd int error; 1255168404Spjd 1256168404Spjd aiov.iov_base = buf; 1257168404Spjd aiov.iov_len = len; 1258168404Spjd auio.uio_iov = &aiov; 1259168404Spjd auio.uio_iovcnt = 1; 1260168404Spjd auio.uio_resid = len; 1261169170Spjd auio.uio_segflg = UIO_SYSSPACE; 1262168404Spjd auio.uio_rw = UIO_READ; 1263168404Spjd auio.uio_offset = off; 1264168404Spjd auio.uio_td = ra->td; 1265168404Spjd#ifdef _KERNEL 1266168404Spjd error = fo_read(ra->fp, &auio, ra->td->td_ucred, FOF_OFFSET, ra->td); 1267168404Spjd#else 1268168404Spjd fprintf(stderr, "%s: returning EOPNOTSUPP\n", __func__); 1269168404Spjd error = EOPNOTSUPP; 1270168404Spjd#endif 1271168404Spjd *resid = auio.uio_resid; 1272168404Spjd return (error); 1273168404Spjd} 1274168404Spjd 1275168404Spjdstatic void * 1276272601Sdelphijrestore_read(struct restorearg *ra, int len, char *buf) 1277168404Spjd{ 1278185029Spjd int done = 0; 1279168404Spjd 1280272601Sdelphij if (buf == NULL) 1281272601Sdelphij buf = ra->buf; 1282272601Sdelphij 1283168404Spjd /* some things will require 8-byte alignment, so everything must */ 1284240415Smm ASSERT0(len % 8); 1285168404Spjd 1286185029Spjd while (done < len) { 1287219089Spjd ssize_t resid; 1288168404Spjd 1289272601Sdelphij ra->err = restore_bytes(ra, buf + done, 1290185029Spjd len - done, ra->voff, &resid); 1291168404Spjd 1292185029Spjd if (resid == len - done) 1293249195Smm ra->err = SET_ERROR(EINVAL); 1294185029Spjd ra->voff += len - done - resid; 1295185029Spjd done = len - resid; 1296248571Smm if (ra->err != 0) 1297168404Spjd return (NULL); 1298168404Spjd } 1299168404Spjd 1300185029Spjd ASSERT3U(done, ==, len); 1301168404Spjd if (ra->byteswap) 1302272601Sdelphij fletcher_4_incremental_byteswap(buf, len, &ra->cksum); 1303168404Spjd else 1304272601Sdelphij fletcher_4_incremental_native(buf, len, &ra->cksum); 1305272601Sdelphij return (buf); 1306168404Spjd} 1307168404Spjd 1308168404Spjdstatic void 1309168404Spjdbackup_byteswap(dmu_replay_record_t *drr) 1310168404Spjd{ 1311168404Spjd#define DO64(X) (drr->drr_u.X = BSWAP_64(drr->drr_u.X)) 1312168404Spjd#define DO32(X) (drr->drr_u.X = BSWAP_32(drr->drr_u.X)) 1313168404Spjd drr->drr_type = BSWAP_32(drr->drr_type); 1314185029Spjd drr->drr_payloadlen = BSWAP_32(drr->drr_payloadlen); 1315168404Spjd switch (drr->drr_type) { 1316168404Spjd case DRR_BEGIN: 1317168404Spjd DO64(drr_begin.drr_magic); 1318219089Spjd DO64(drr_begin.drr_versioninfo); 1319168404Spjd DO64(drr_begin.drr_creation_time); 1320168404Spjd DO32(drr_begin.drr_type); 1321185029Spjd DO32(drr_begin.drr_flags); 1322168404Spjd DO64(drr_begin.drr_toguid); 1323168404Spjd DO64(drr_begin.drr_fromguid); 1324168404Spjd break; 1325168404Spjd case DRR_OBJECT: 1326168404Spjd DO64(drr_object.drr_object); 1327168404Spjd DO32(drr_object.drr_type); 1328168404Spjd DO32(drr_object.drr_bonustype); 1329168404Spjd DO32(drr_object.drr_blksz); 1330168404Spjd DO32(drr_object.drr_bonuslen); 1331219089Spjd DO64(drr_object.drr_toguid); 1332168404Spjd break; 1333168404Spjd case DRR_FREEOBJECTS: 1334168404Spjd DO64(drr_freeobjects.drr_firstobj); 1335168404Spjd DO64(drr_freeobjects.drr_numobjs); 1336219089Spjd DO64(drr_freeobjects.drr_toguid); 1337168404Spjd break; 1338168404Spjd case DRR_WRITE: 1339168404Spjd DO64(drr_write.drr_object); 1340168404Spjd DO32(drr_write.drr_type); 1341168404Spjd DO64(drr_write.drr_offset); 1342168404Spjd DO64(drr_write.drr_length); 1343219089Spjd DO64(drr_write.drr_toguid); 1344219089Spjd DO64(drr_write.drr_key.ddk_cksum.zc_word[0]); 1345219089Spjd DO64(drr_write.drr_key.ddk_cksum.zc_word[1]); 1346219089Spjd DO64(drr_write.drr_key.ddk_cksum.zc_word[2]); 1347219089Spjd DO64(drr_write.drr_key.ddk_cksum.zc_word[3]); 1348219089Spjd DO64(drr_write.drr_key.ddk_prop); 1349168404Spjd break; 1350219089Spjd case DRR_WRITE_BYREF: 1351219089Spjd DO64(drr_write_byref.drr_object); 1352219089Spjd DO64(drr_write_byref.drr_offset); 1353219089Spjd DO64(drr_write_byref.drr_length); 1354219089Spjd DO64(drr_write_byref.drr_toguid); 1355219089Spjd DO64(drr_write_byref.drr_refguid); 1356219089Spjd DO64(drr_write_byref.drr_refobject); 1357219089Spjd DO64(drr_write_byref.drr_refoffset); 1358219089Spjd DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[0]); 1359219089Spjd DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[1]); 1360219089Spjd DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[2]); 1361219089Spjd DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[3]); 1362219089Spjd DO64(drr_write_byref.drr_key.ddk_prop); 1363219089Spjd break; 1364268075Sdelphij case DRR_WRITE_EMBEDDED: 1365268075Sdelphij DO64(drr_write_embedded.drr_object); 1366268075Sdelphij DO64(drr_write_embedded.drr_offset); 1367268075Sdelphij DO64(drr_write_embedded.drr_length); 1368268075Sdelphij DO64(drr_write_embedded.drr_toguid); 1369268075Sdelphij DO32(drr_write_embedded.drr_lsize); 1370268075Sdelphij DO32(drr_write_embedded.drr_psize); 1371268075Sdelphij break; 1372168404Spjd case DRR_FREE: 1373168404Spjd DO64(drr_free.drr_object); 1374168404Spjd DO64(drr_free.drr_offset); 1375168404Spjd DO64(drr_free.drr_length); 1376219089Spjd DO64(drr_free.drr_toguid); 1377168404Spjd break; 1378219089Spjd case DRR_SPILL: 1379219089Spjd DO64(drr_spill.drr_object); 1380219089Spjd DO64(drr_spill.drr_length); 1381219089Spjd DO64(drr_spill.drr_toguid); 1382219089Spjd break; 1383168404Spjd case DRR_END: 1384168404Spjd DO64(drr_end.drr_checksum.zc_word[0]); 1385168404Spjd DO64(drr_end.drr_checksum.zc_word[1]); 1386168404Spjd DO64(drr_end.drr_checksum.zc_word[2]); 1387168404Spjd DO64(drr_end.drr_checksum.zc_word[3]); 1388219089Spjd DO64(drr_end.drr_toguid); 1389168404Spjd break; 1390168404Spjd } 1391168404Spjd#undef DO64 1392168404Spjd#undef DO32 1393168404Spjd} 1394168404Spjd 1395168404Spjdstatic int 1396168404Spjdrestore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro) 1397168404Spjd{ 1398168404Spjd int err; 1399168404Spjd dmu_tx_t *tx; 1400200727Sdelphij void *data = NULL; 1401168404Spjd 1402168404Spjd if (drro->drr_type == DMU_OT_NONE || 1403236884Smm !DMU_OT_IS_VALID(drro->drr_type) || 1404236884Smm !DMU_OT_IS_VALID(drro->drr_bonustype) || 1405219089Spjd drro->drr_checksumtype >= ZIO_CHECKSUM_FUNCTIONS || 1406168404Spjd drro->drr_compress >= ZIO_COMPRESS_FUNCTIONS || 1407168404Spjd P2PHASE(drro->drr_blksz, SPA_MINBLOCKSIZE) || 1408168404Spjd drro->drr_blksz < SPA_MINBLOCKSIZE || 1409168404Spjd drro->drr_blksz > SPA_MAXBLOCKSIZE || 1410168404Spjd drro->drr_bonuslen > DN_MAX_BONUSLEN) { 1411249195Smm return (SET_ERROR(EINVAL)); 1412168404Spjd } 1413168404Spjd 1414200726Sdelphij err = dmu_object_info(os, drro->drr_object, NULL); 1415168404Spjd 1416200726Sdelphij if (err != 0 && err != ENOENT) 1417249195Smm return (SET_ERROR(EINVAL)); 1418200726Sdelphij 1419201756Sdelphij if (drro->drr_bonuslen) { 1420272601Sdelphij data = restore_read(ra, P2ROUNDUP(drro->drr_bonuslen, 8), NULL); 1421248571Smm if (ra->err != 0) 1422201756Sdelphij return (ra->err); 1423201756Sdelphij } 1424201756Sdelphij 1425168404Spjd if (err == ENOENT) { 1426168404Spjd /* currently free, want to be allocated */ 1427200726Sdelphij tx = dmu_tx_create(os); 1428168404Spjd dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); 1429168404Spjd err = dmu_tx_assign(tx, TXG_WAIT); 1430248571Smm if (err != 0) { 1431168404Spjd dmu_tx_abort(tx); 1432168404Spjd return (err); 1433168404Spjd } 1434168404Spjd err = dmu_object_claim(os, drro->drr_object, 1435168404Spjd drro->drr_type, drro->drr_blksz, 1436168404Spjd drro->drr_bonustype, drro->drr_bonuslen, tx); 1437200726Sdelphij dmu_tx_commit(tx); 1438168404Spjd } else { 1439168404Spjd /* currently allocated, want to be allocated */ 1440168404Spjd err = dmu_object_reclaim(os, drro->drr_object, 1441168404Spjd drro->drr_type, drro->drr_blksz, 1442200726Sdelphij drro->drr_bonustype, drro->drr_bonuslen); 1443168404Spjd } 1444248571Smm if (err != 0) { 1445249195Smm return (SET_ERROR(EINVAL)); 1446219089Spjd } 1447200726Sdelphij 1448200726Sdelphij tx = dmu_tx_create(os); 1449200726Sdelphij dmu_tx_hold_bonus(tx, drro->drr_object); 1450200726Sdelphij err = dmu_tx_assign(tx, TXG_WAIT); 1451248571Smm if (err != 0) { 1452200726Sdelphij dmu_tx_abort(tx); 1453200726Sdelphij return (err); 1454168404Spjd } 1455168404Spjd 1456219089Spjd dmu_object_set_checksum(os, drro->drr_object, drro->drr_checksumtype, 1457219089Spjd tx); 1458168404Spjd dmu_object_set_compress(os, drro->drr_object, drro->drr_compress, tx); 1459168404Spjd 1460200727Sdelphij if (data != NULL) { 1461168404Spjd dmu_buf_t *db; 1462200727Sdelphij 1463168404Spjd VERIFY(0 == dmu_bonus_hold(os, drro->drr_object, FTAG, &db)); 1464168404Spjd dmu_buf_will_dirty(db, tx); 1465168404Spjd 1466185029Spjd ASSERT3U(db->db_size, >=, drro->drr_bonuslen); 1467185029Spjd bcopy(data, db->db_data, drro->drr_bonuslen); 1468168404Spjd if (ra->byteswap) { 1469236884Smm dmu_object_byteswap_t byteswap = 1470236884Smm DMU_OT_BYTESWAP(drro->drr_bonustype); 1471236884Smm dmu_ot_byteswap[byteswap].ob_func(db->db_data, 1472168404Spjd drro->drr_bonuslen); 1473168404Spjd } 1474168404Spjd dmu_buf_rele(db, FTAG); 1475168404Spjd } 1476168404Spjd dmu_tx_commit(tx); 1477168404Spjd return (0); 1478168404Spjd} 1479168404Spjd 1480168404Spjd/* ARGSUSED */ 1481168404Spjdstatic int 1482168404Spjdrestore_freeobjects(struct restorearg *ra, objset_t *os, 1483168404Spjd struct drr_freeobjects *drrfo) 1484168404Spjd{ 1485168404Spjd uint64_t obj; 1486168404Spjd 1487168404Spjd if (drrfo->drr_firstobj + drrfo->drr_numobjs < drrfo->drr_firstobj) 1488249195Smm return (SET_ERROR(EINVAL)); 1489168404Spjd 1490168404Spjd for (obj = drrfo->drr_firstobj; 1491168404Spjd obj < drrfo->drr_firstobj + drrfo->drr_numobjs; 1492168404Spjd (void) dmu_object_next(os, &obj, FALSE, 0)) { 1493168404Spjd int err; 1494168404Spjd 1495168404Spjd if (dmu_object_info(os, obj, NULL) != 0) 1496168404Spjd continue; 1497168404Spjd 1498254753Sdelphij err = dmu_free_long_object(os, obj); 1499248571Smm if (err != 0) 1500168404Spjd return (err); 1501168404Spjd } 1502168404Spjd return (0); 1503168404Spjd} 1504168404Spjd 1505168404Spjdstatic int 1506168404Spjdrestore_write(struct restorearg *ra, objset_t *os, 1507168404Spjd struct drr_write *drrw) 1508168404Spjd{ 1509168404Spjd dmu_tx_t *tx; 1510168404Spjd void *data; 1511168404Spjd int err; 1512168404Spjd 1513168404Spjd if (drrw->drr_offset + drrw->drr_length < drrw->drr_offset || 1514236884Smm !DMU_OT_IS_VALID(drrw->drr_type)) 1515249195Smm return (SET_ERROR(EINVAL)); 1516168404Spjd 1517168404Spjd if (dmu_object_info(os, drrw->drr_object, NULL) != 0) 1518249195Smm return (SET_ERROR(EINVAL)); 1519168404Spjd 1520272601Sdelphij dmu_buf_t *bonus; 1521272601Sdelphij if (dmu_bonus_hold(os, drrw->drr_object, FTAG, &bonus) != 0) 1522272601Sdelphij return (SET_ERROR(EINVAL)); 1523272601Sdelphij 1524272601Sdelphij arc_buf_t *abuf = dmu_request_arcbuf(bonus, drrw->drr_length); 1525272601Sdelphij 1526272601Sdelphij data = restore_read(ra, drrw->drr_length, abuf->b_data); 1527272601Sdelphij if (data == NULL) { 1528272601Sdelphij dmu_return_arcbuf(abuf); 1529272601Sdelphij dmu_buf_rele(bonus, FTAG); 1530272601Sdelphij return (ra->err); 1531272601Sdelphij } 1532272601Sdelphij 1533168404Spjd tx = dmu_tx_create(os); 1534168404Spjd 1535168404Spjd dmu_tx_hold_write(tx, drrw->drr_object, 1536168404Spjd drrw->drr_offset, drrw->drr_length); 1537168404Spjd err = dmu_tx_assign(tx, TXG_WAIT); 1538248571Smm if (err != 0) { 1539272601Sdelphij dmu_return_arcbuf(abuf); 1540272601Sdelphij dmu_buf_rele(bonus, FTAG); 1541168404Spjd dmu_tx_abort(tx); 1542168404Spjd return (err); 1543168404Spjd } 1544236884Smm if (ra->byteswap) { 1545236884Smm dmu_object_byteswap_t byteswap = 1546236884Smm DMU_OT_BYTESWAP(drrw->drr_type); 1547236884Smm dmu_ot_byteswap[byteswap].ob_func(data, drrw->drr_length); 1548236884Smm } 1549272601Sdelphij dmu_assign_arcbuf(bonus, drrw->drr_offset, abuf, tx); 1550168404Spjd dmu_tx_commit(tx); 1551272601Sdelphij dmu_buf_rele(bonus, FTAG); 1552168404Spjd return (0); 1553168404Spjd} 1554168404Spjd 1555219089Spjd/* 1556219089Spjd * Handle a DRR_WRITE_BYREF record. This record is used in dedup'ed 1557219089Spjd * streams to refer to a copy of the data that is already on the 1558219089Spjd * system because it came in earlier in the stream. This function 1559219089Spjd * finds the earlier copy of the data, and uses that copy instead of 1560219089Spjd * data from the stream to fulfill this write. 1561219089Spjd */ 1562219089Spjdstatic int 1563219089Spjdrestore_write_byref(struct restorearg *ra, objset_t *os, 1564219089Spjd struct drr_write_byref *drrwbr) 1565219089Spjd{ 1566219089Spjd dmu_tx_t *tx; 1567219089Spjd int err; 1568219089Spjd guid_map_entry_t gmesrch; 1569219089Spjd guid_map_entry_t *gmep; 1570268075Sdelphij avl_index_t where; 1571219089Spjd objset_t *ref_os = NULL; 1572219089Spjd dmu_buf_t *dbp; 1573219089Spjd 1574219089Spjd if (drrwbr->drr_offset + drrwbr->drr_length < drrwbr->drr_offset) 1575249195Smm return (SET_ERROR(EINVAL)); 1576219089Spjd 1577219089Spjd /* 1578219089Spjd * If the GUID of the referenced dataset is different from the 1579219089Spjd * GUID of the target dataset, find the referenced dataset. 1580219089Spjd */ 1581219089Spjd if (drrwbr->drr_toguid != drrwbr->drr_refguid) { 1582219089Spjd gmesrch.guid = drrwbr->drr_refguid; 1583219089Spjd if ((gmep = avl_find(ra->guid_to_ds_map, &gmesrch, 1584219089Spjd &where)) == NULL) { 1585249195Smm return (SET_ERROR(EINVAL)); 1586219089Spjd } 1587219089Spjd if (dmu_objset_from_ds(gmep->gme_ds, &ref_os)) 1588249195Smm return (SET_ERROR(EINVAL)); 1589219089Spjd } else { 1590219089Spjd ref_os = os; 1591219089Spjd } 1592219089Spjd 1593268075Sdelphij err = dmu_buf_hold(ref_os, drrwbr->drr_refobject, 1594268075Sdelphij drrwbr->drr_refoffset, FTAG, &dbp, DMU_READ_PREFETCH); 1595268075Sdelphij if (err != 0) 1596219089Spjd return (err); 1597219089Spjd 1598219089Spjd tx = dmu_tx_create(os); 1599219089Spjd 1600219089Spjd dmu_tx_hold_write(tx, drrwbr->drr_object, 1601219089Spjd drrwbr->drr_offset, drrwbr->drr_length); 1602219089Spjd err = dmu_tx_assign(tx, TXG_WAIT); 1603248571Smm if (err != 0) { 1604219089Spjd dmu_tx_abort(tx); 1605219089Spjd return (err); 1606219089Spjd } 1607219089Spjd dmu_write(os, drrwbr->drr_object, 1608219089Spjd drrwbr->drr_offset, drrwbr->drr_length, dbp->db_data, tx); 1609219089Spjd dmu_buf_rele(dbp, FTAG); 1610219089Spjd dmu_tx_commit(tx); 1611219089Spjd return (0); 1612219089Spjd} 1613219089Spjd 1614219089Spjdstatic int 1615268075Sdelphijrestore_write_embedded(struct restorearg *ra, objset_t *os, 1616268075Sdelphij struct drr_write_embedded *drrwnp) 1617268075Sdelphij{ 1618268075Sdelphij dmu_tx_t *tx; 1619268075Sdelphij int err; 1620268075Sdelphij void *data; 1621268075Sdelphij 1622268075Sdelphij if (drrwnp->drr_offset + drrwnp->drr_length < drrwnp->drr_offset) 1623268075Sdelphij return (EINVAL); 1624268075Sdelphij 1625268075Sdelphij if (drrwnp->drr_psize > BPE_PAYLOAD_SIZE) 1626268075Sdelphij return (EINVAL); 1627268075Sdelphij 1628268075Sdelphij if (drrwnp->drr_etype >= NUM_BP_EMBEDDED_TYPES) 1629268075Sdelphij return (EINVAL); 1630268075Sdelphij if (drrwnp->drr_compression >= ZIO_COMPRESS_FUNCTIONS) 1631268075Sdelphij return (EINVAL); 1632268075Sdelphij 1633272601Sdelphij data = restore_read(ra, P2ROUNDUP(drrwnp->drr_psize, 8), NULL); 1634268075Sdelphij if (data == NULL) 1635268075Sdelphij return (ra->err); 1636268075Sdelphij 1637268075Sdelphij tx = dmu_tx_create(os); 1638268075Sdelphij 1639268075Sdelphij dmu_tx_hold_write(tx, drrwnp->drr_object, 1640268075Sdelphij drrwnp->drr_offset, drrwnp->drr_length); 1641268075Sdelphij err = dmu_tx_assign(tx, TXG_WAIT); 1642268075Sdelphij if (err != 0) { 1643268075Sdelphij dmu_tx_abort(tx); 1644268075Sdelphij return (err); 1645268075Sdelphij } 1646268075Sdelphij 1647268075Sdelphij dmu_write_embedded(os, drrwnp->drr_object, 1648268075Sdelphij drrwnp->drr_offset, data, drrwnp->drr_etype, 1649268075Sdelphij drrwnp->drr_compression, drrwnp->drr_lsize, drrwnp->drr_psize, 1650268075Sdelphij ra->byteswap ^ ZFS_HOST_BYTEORDER, tx); 1651268075Sdelphij 1652268075Sdelphij dmu_tx_commit(tx); 1653268075Sdelphij return (0); 1654268075Sdelphij} 1655268075Sdelphij 1656268075Sdelphijstatic int 1657219089Spjdrestore_spill(struct restorearg *ra, objset_t *os, struct drr_spill *drrs) 1658219089Spjd{ 1659219089Spjd dmu_tx_t *tx; 1660219089Spjd void *data; 1661219089Spjd dmu_buf_t *db, *db_spill; 1662219089Spjd int err; 1663219089Spjd 1664219089Spjd if (drrs->drr_length < SPA_MINBLOCKSIZE || 1665219089Spjd drrs->drr_length > SPA_MAXBLOCKSIZE) 1666249195Smm return (SET_ERROR(EINVAL)); 1667219089Spjd 1668272601Sdelphij data = restore_read(ra, drrs->drr_length, NULL); 1669219089Spjd if (data == NULL) 1670219089Spjd return (ra->err); 1671219089Spjd 1672219089Spjd if (dmu_object_info(os, drrs->drr_object, NULL) != 0) 1673249195Smm return (SET_ERROR(EINVAL)); 1674219089Spjd 1675219089Spjd VERIFY(0 == dmu_bonus_hold(os, drrs->drr_object, FTAG, &db)); 1676219089Spjd if ((err = dmu_spill_hold_by_bonus(db, FTAG, &db_spill)) != 0) { 1677219089Spjd dmu_buf_rele(db, FTAG); 1678219089Spjd return (err); 1679219089Spjd } 1680219089Spjd 1681219089Spjd tx = dmu_tx_create(os); 1682219089Spjd 1683219089Spjd dmu_tx_hold_spill(tx, db->db_object); 1684219089Spjd 1685219089Spjd err = dmu_tx_assign(tx, TXG_WAIT); 1686248571Smm if (err != 0) { 1687219089Spjd dmu_buf_rele(db, FTAG); 1688219089Spjd dmu_buf_rele(db_spill, FTAG); 1689219089Spjd dmu_tx_abort(tx); 1690219089Spjd return (err); 1691219089Spjd } 1692219089Spjd dmu_buf_will_dirty(db_spill, tx); 1693219089Spjd 1694219089Spjd if (db_spill->db_size < drrs->drr_length) 1695219089Spjd VERIFY(0 == dbuf_spill_set_blksz(db_spill, 1696219089Spjd drrs->drr_length, tx)); 1697219089Spjd bcopy(data, db_spill->db_data, drrs->drr_length); 1698219089Spjd 1699219089Spjd dmu_buf_rele(db, FTAG); 1700219089Spjd dmu_buf_rele(db_spill, FTAG); 1701219089Spjd 1702219089Spjd dmu_tx_commit(tx); 1703219089Spjd return (0); 1704219089Spjd} 1705219089Spjd 1706168404Spjd/* ARGSUSED */ 1707168404Spjdstatic int 1708168404Spjdrestore_free(struct restorearg *ra, objset_t *os, 1709168404Spjd struct drr_free *drrf) 1710168404Spjd{ 1711168404Spjd int err; 1712168404Spjd 1713168404Spjd if (drrf->drr_length != -1ULL && 1714168404Spjd drrf->drr_offset + drrf->drr_length < drrf->drr_offset) 1715249195Smm return (SET_ERROR(EINVAL)); 1716168404Spjd 1717168404Spjd if (dmu_object_info(os, drrf->drr_object, NULL) != 0) 1718249195Smm return (SET_ERROR(EINVAL)); 1719168404Spjd 1720185029Spjd err = dmu_free_long_range(os, drrf->drr_object, 1721168404Spjd drrf->drr_offset, drrf->drr_length); 1722168404Spjd return (err); 1723168404Spjd} 1724168404Spjd 1725248571Smm/* used to destroy the drc_ds on error */ 1726248571Smmstatic void 1727248571Smmdmu_recv_cleanup_ds(dmu_recv_cookie_t *drc) 1728248571Smm{ 1729248571Smm char name[MAXNAMELEN]; 1730248571Smm dsl_dataset_name(drc->drc_ds, name); 1731248571Smm dsl_dataset_disown(drc->drc_ds, dmu_recv_tag); 1732248571Smm (void) dsl_destroy_head(name); 1733248571Smm} 1734248571Smm 1735185029Spjd/* 1736185029Spjd * NB: callers *must* call dmu_recv_end() if this succeeds. 1737185029Spjd */ 1738168404Spjdint 1739219089Spjddmu_recv_stream(dmu_recv_cookie_t *drc, struct file *fp, offset_t *voffp, 1740219089Spjd int cleanup_fd, uint64_t *action_handlep) 1741168404Spjd{ 1742185029Spjd struct restorearg ra = { 0 }; 1743168404Spjd dmu_replay_record_t *drr; 1744185029Spjd objset_t *os; 1745185029Spjd zio_cksum_t pcksum; 1746219089Spjd int featureflags; 1747168404Spjd 1748248571Smm ra.byteswap = drc->drc_byteswap; 1749248571Smm ra.cksum = drc->drc_cksum; 1750219089Spjd ra.td = curthread; 1751185029Spjd ra.fp = fp; 1752185029Spjd ra.voff = *voffp; 1753185029Spjd ra.bufsize = 1<<20; 1754185029Spjd ra.buf = kmem_alloc(ra.bufsize, KM_SLEEP); 1755168404Spjd 1756185029Spjd /* these were verified in dmu_recv_begin */ 1757248571Smm ASSERT3U(DMU_GET_STREAM_HDRTYPE(drc->drc_drrb->drr_versioninfo), ==, 1758219089Spjd DMU_SUBSTREAM); 1759248571Smm ASSERT3U(drc->drc_drrb->drr_type, <, DMU_OST_NUMTYPES); 1760168404Spjd 1761168404Spjd /* 1762168404Spjd * Open the objset we are modifying. 1763168404Spjd */ 1764248571Smm VERIFY0(dmu_objset_from_ds(drc->drc_ds, &os)); 1765168404Spjd 1766248571Smm ASSERT(drc->drc_ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT); 1767168404Spjd 1768219089Spjd featureflags = DMU_GET_FEATUREFLAGS(drc->drc_drrb->drr_versioninfo); 1769219089Spjd 1770219089Spjd /* if this stream is dedup'ed, set up the avl tree for guid mapping */ 1771219089Spjd if (featureflags & DMU_BACKUP_FEATURE_DEDUP) { 1772219089Spjd minor_t minor; 1773219089Spjd 1774219089Spjd if (cleanup_fd == -1) { 1775249195Smm ra.err = SET_ERROR(EBADF); 1776219089Spjd goto out; 1777219089Spjd } 1778219089Spjd ra.err = zfs_onexit_fd_hold(cleanup_fd, &minor); 1779248571Smm if (ra.err != 0) { 1780219089Spjd cleanup_fd = -1; 1781219089Spjd goto out; 1782219089Spjd } 1783219089Spjd 1784219089Spjd if (*action_handlep == 0) { 1785219089Spjd ra.guid_to_ds_map = 1786219089Spjd kmem_alloc(sizeof (avl_tree_t), KM_SLEEP); 1787219089Spjd avl_create(ra.guid_to_ds_map, guid_compare, 1788219089Spjd sizeof (guid_map_entry_t), 1789219089Spjd offsetof(guid_map_entry_t, avlnode)); 1790219089Spjd ra.err = zfs_onexit_add_cb(minor, 1791219089Spjd free_guid_map_onexit, ra.guid_to_ds_map, 1792219089Spjd action_handlep); 1793248571Smm if (ra.err != 0) 1794219089Spjd goto out; 1795219089Spjd } else { 1796219089Spjd ra.err = zfs_onexit_cb_data(minor, *action_handlep, 1797219089Spjd (void **)&ra.guid_to_ds_map); 1798248571Smm if (ra.err != 0) 1799219089Spjd goto out; 1800219089Spjd } 1801221263Smm 1802221263Smm drc->drc_guid_to_ds_map = ra.guid_to_ds_map; 1803219089Spjd } 1804219089Spjd 1805168404Spjd /* 1806168404Spjd * Read records and process them. 1807168404Spjd */ 1808185029Spjd pcksum = ra.cksum; 1809168404Spjd while (ra.err == 0 && 1810272601Sdelphij NULL != (drr = restore_read(&ra, sizeof (*drr), NULL))) { 1811185029Spjd if (issig(JUSTLOOKING) && issig(FORREAL)) { 1812249195Smm ra.err = SET_ERROR(EINTR); 1813168404Spjd goto out; 1814168404Spjd } 1815168404Spjd 1816168404Spjd if (ra.byteswap) 1817168404Spjd backup_byteswap(drr); 1818168404Spjd 1819168404Spjd switch (drr->drr_type) { 1820168404Spjd case DRR_OBJECT: 1821168404Spjd { 1822168404Spjd /* 1823168404Spjd * We need to make a copy of the record header, 1824168404Spjd * because restore_{object,write} may need to 1825168404Spjd * restore_read(), which will invalidate drr. 1826168404Spjd */ 1827168404Spjd struct drr_object drro = drr->drr_u.drr_object; 1828168404Spjd ra.err = restore_object(&ra, os, &drro); 1829168404Spjd break; 1830168404Spjd } 1831168404Spjd case DRR_FREEOBJECTS: 1832168404Spjd { 1833168404Spjd struct drr_freeobjects drrfo = 1834168404Spjd drr->drr_u.drr_freeobjects; 1835168404Spjd ra.err = restore_freeobjects(&ra, os, &drrfo); 1836168404Spjd break; 1837168404Spjd } 1838168404Spjd case DRR_WRITE: 1839168404Spjd { 1840168404Spjd struct drr_write drrw = drr->drr_u.drr_write; 1841168404Spjd ra.err = restore_write(&ra, os, &drrw); 1842168404Spjd break; 1843168404Spjd } 1844219089Spjd case DRR_WRITE_BYREF: 1845219089Spjd { 1846219089Spjd struct drr_write_byref drrwbr = 1847219089Spjd drr->drr_u.drr_write_byref; 1848219089Spjd ra.err = restore_write_byref(&ra, os, &drrwbr); 1849219089Spjd break; 1850219089Spjd } 1851268075Sdelphij case DRR_WRITE_EMBEDDED: 1852268075Sdelphij { 1853268075Sdelphij struct drr_write_embedded drrwe = 1854268075Sdelphij drr->drr_u.drr_write_embedded; 1855268075Sdelphij ra.err = restore_write_embedded(&ra, os, &drrwe); 1856268075Sdelphij break; 1857268075Sdelphij } 1858168404Spjd case DRR_FREE: 1859168404Spjd { 1860168404Spjd struct drr_free drrf = drr->drr_u.drr_free; 1861168404Spjd ra.err = restore_free(&ra, os, &drrf); 1862168404Spjd break; 1863168404Spjd } 1864168404Spjd case DRR_END: 1865168404Spjd { 1866168404Spjd struct drr_end drre = drr->drr_u.drr_end; 1867168404Spjd /* 1868168404Spjd * We compare against the *previous* checksum 1869168404Spjd * value, because the stored checksum is of 1870168404Spjd * everything before the DRR_END record. 1871168404Spjd */ 1872185029Spjd if (!ZIO_CHECKSUM_EQUAL(drre.drr_checksum, pcksum)) 1873249195Smm ra.err = SET_ERROR(ECKSUM); 1874168404Spjd goto out; 1875168404Spjd } 1876219089Spjd case DRR_SPILL: 1877219089Spjd { 1878219089Spjd struct drr_spill drrs = drr->drr_u.drr_spill; 1879219089Spjd ra.err = restore_spill(&ra, os, &drrs); 1880219089Spjd break; 1881219089Spjd } 1882168404Spjd default: 1883249195Smm ra.err = SET_ERROR(EINVAL); 1884168404Spjd goto out; 1885168404Spjd } 1886185029Spjd pcksum = ra.cksum; 1887168404Spjd } 1888185029Spjd ASSERT(ra.err != 0); 1889168404Spjd 1890168404Spjdout: 1891219089Spjd if ((featureflags & DMU_BACKUP_FEATURE_DEDUP) && (cleanup_fd != -1)) 1892219089Spjd zfs_onexit_fd_rele(cleanup_fd); 1893168404Spjd 1894185029Spjd if (ra.err != 0) { 1895168404Spjd /* 1896219089Spjd * destroy what we created, so we don't leave it in the 1897219089Spjd * inconsistent restoring state. 1898168404Spjd */ 1899248571Smm dmu_recv_cleanup_ds(drc); 1900168404Spjd } 1901168404Spjd 1902168404Spjd kmem_free(ra.buf, ra.bufsize); 1903185029Spjd *voffp = ra.voff; 1904168404Spjd return (ra.err); 1905168404Spjd} 1906185029Spjd 1907185029Spjdstatic int 1908248571Smmdmu_recv_end_check(void *arg, dmu_tx_t *tx) 1909185029Spjd{ 1910248571Smm dmu_recv_cookie_t *drc = arg; 1911248571Smm dsl_pool_t *dp = dmu_tx_pool(tx); 1912248571Smm int error; 1913185029Spjd 1914248571Smm ASSERT3P(drc->drc_ds->ds_owner, ==, dmu_recv_tag); 1915248571Smm 1916248571Smm if (!drc->drc_newfs) { 1917248571Smm dsl_dataset_t *origin_head; 1918248571Smm 1919248571Smm error = dsl_dataset_hold(dp, drc->drc_tofs, FTAG, &origin_head); 1920248571Smm if (error != 0) 1921248571Smm return (error); 1922253820Sdelphij if (drc->drc_force) { 1923253820Sdelphij /* 1924253820Sdelphij * We will destroy any snapshots in tofs (i.e. before 1925253820Sdelphij * origin_head) that are after the origin (which is 1926253820Sdelphij * the snap before drc_ds, because drc_ds can not 1927253820Sdelphij * have any snaps of its own). 1928253820Sdelphij */ 1929253820Sdelphij uint64_t obj = origin_head->ds_phys->ds_prev_snap_obj; 1930253820Sdelphij while (obj != drc->drc_ds->ds_phys->ds_prev_snap_obj) { 1931253820Sdelphij dsl_dataset_t *snap; 1932253820Sdelphij error = dsl_dataset_hold_obj(dp, obj, FTAG, 1933253820Sdelphij &snap); 1934253820Sdelphij if (error != 0) 1935253820Sdelphij return (error); 1936253820Sdelphij if (snap->ds_dir != origin_head->ds_dir) 1937253820Sdelphij error = SET_ERROR(EINVAL); 1938253820Sdelphij if (error == 0) { 1939253820Sdelphij error = dsl_destroy_snapshot_check_impl( 1940253820Sdelphij snap, B_FALSE); 1941253820Sdelphij } 1942253820Sdelphij obj = snap->ds_phys->ds_prev_snap_obj; 1943253820Sdelphij dsl_dataset_rele(snap, FTAG); 1944253820Sdelphij if (error != 0) 1945253820Sdelphij return (error); 1946253820Sdelphij } 1947253820Sdelphij } 1948248571Smm error = dsl_dataset_clone_swap_check_impl(drc->drc_ds, 1949253816Sdelphij origin_head, drc->drc_force, drc->drc_owner, tx); 1950248571Smm if (error != 0) { 1951248571Smm dsl_dataset_rele(origin_head, FTAG); 1952248571Smm return (error); 1953248571Smm } 1954248571Smm error = dsl_dataset_snapshot_check_impl(origin_head, 1955264835Sdelphij drc->drc_tosnap, tx, B_TRUE, 1, drc->drc_cred); 1956248571Smm dsl_dataset_rele(origin_head, FTAG); 1957248571Smm if (error != 0) 1958248571Smm return (error); 1959248571Smm 1960248571Smm error = dsl_destroy_head_check_impl(drc->drc_ds, 1); 1961248571Smm } else { 1962248571Smm error = dsl_dataset_snapshot_check_impl(drc->drc_ds, 1963264835Sdelphij drc->drc_tosnap, tx, B_TRUE, 1, drc->drc_cred); 1964248571Smm } 1965248571Smm return (error); 1966185029Spjd} 1967185029Spjd 1968185029Spjdstatic void 1969248571Smmdmu_recv_end_sync(void *arg, dmu_tx_t *tx) 1970185029Spjd{ 1971248571Smm dmu_recv_cookie_t *drc = arg; 1972248571Smm dsl_pool_t *dp = dmu_tx_pool(tx); 1973185029Spjd 1974248571Smm spa_history_log_internal_ds(drc->drc_ds, "finish receiving", 1975248571Smm tx, "snap=%s", drc->drc_tosnap); 1976185029Spjd 1977248571Smm if (!drc->drc_newfs) { 1978248571Smm dsl_dataset_t *origin_head; 1979185029Spjd 1980248571Smm VERIFY0(dsl_dataset_hold(dp, drc->drc_tofs, FTAG, 1981248571Smm &origin_head)); 1982253820Sdelphij 1983253820Sdelphij if (drc->drc_force) { 1984253820Sdelphij /* 1985253820Sdelphij * Destroy any snapshots of drc_tofs (origin_head) 1986253820Sdelphij * after the origin (the snap before drc_ds). 1987253820Sdelphij */ 1988253820Sdelphij uint64_t obj = origin_head->ds_phys->ds_prev_snap_obj; 1989253820Sdelphij while (obj != drc->drc_ds->ds_phys->ds_prev_snap_obj) { 1990253820Sdelphij dsl_dataset_t *snap; 1991253820Sdelphij VERIFY0(dsl_dataset_hold_obj(dp, obj, FTAG, 1992253820Sdelphij &snap)); 1993253820Sdelphij ASSERT3P(snap->ds_dir, ==, origin_head->ds_dir); 1994253820Sdelphij obj = snap->ds_phys->ds_prev_snap_obj; 1995253820Sdelphij dsl_destroy_snapshot_sync_impl(snap, 1996253820Sdelphij B_FALSE, tx); 1997253820Sdelphij dsl_dataset_rele(snap, FTAG); 1998253820Sdelphij } 1999253820Sdelphij } 2000253820Sdelphij VERIFY3P(drc->drc_ds->ds_prev, ==, 2001253820Sdelphij origin_head->ds_prev); 2002253820Sdelphij 2003248571Smm dsl_dataset_clone_swap_sync_impl(drc->drc_ds, 2004248571Smm origin_head, tx); 2005248571Smm dsl_dataset_snapshot_sync_impl(origin_head, 2006248571Smm drc->drc_tosnap, tx); 2007248571Smm 2008248571Smm /* set snapshot's creation time and guid */ 2009248571Smm dmu_buf_will_dirty(origin_head->ds_prev->ds_dbuf, tx); 2010248571Smm origin_head->ds_prev->ds_phys->ds_creation_time = 2011248571Smm drc->drc_drrb->drr_creation_time; 2012248571Smm origin_head->ds_prev->ds_phys->ds_guid = 2013248571Smm drc->drc_drrb->drr_toguid; 2014248571Smm origin_head->ds_prev->ds_phys->ds_flags &= 2015248571Smm ~DS_FLAG_INCONSISTENT; 2016248571Smm 2017248571Smm dmu_buf_will_dirty(origin_head->ds_dbuf, tx); 2018248571Smm origin_head->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT; 2019248571Smm 2020248571Smm dsl_dataset_rele(origin_head, FTAG); 2021248571Smm dsl_destroy_head_sync_impl(drc->drc_ds, tx); 2022253816Sdelphij 2023253816Sdelphij if (drc->drc_owner != NULL) 2024253816Sdelphij VERIFY3P(origin_head->ds_owner, ==, drc->drc_owner); 2025248571Smm } else { 2026248571Smm dsl_dataset_t *ds = drc->drc_ds; 2027248571Smm 2028248571Smm dsl_dataset_snapshot_sync_impl(ds, drc->drc_tosnap, tx); 2029248571Smm 2030248571Smm /* set snapshot's creation time and guid */ 2031248571Smm dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 2032248571Smm ds->ds_prev->ds_phys->ds_creation_time = 2033248571Smm drc->drc_drrb->drr_creation_time; 2034248571Smm ds->ds_prev->ds_phys->ds_guid = drc->drc_drrb->drr_toguid; 2035248571Smm ds->ds_prev->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT; 2036248571Smm 2037248571Smm dmu_buf_will_dirty(ds->ds_dbuf, tx); 2038248571Smm ds->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT; 2039248571Smm } 2040248571Smm drc->drc_newsnapobj = drc->drc_ds->ds_phys->ds_prev_snap_obj; 2041248571Smm /* 2042248571Smm * Release the hold from dmu_recv_begin. This must be done before 2043248571Smm * we return to open context, so that when we free the dataset's dnode, 2044248571Smm * we can evict its bonus buffer. 2045248571Smm */ 2046248571Smm dsl_dataset_disown(drc->drc_ds, dmu_recv_tag); 2047248571Smm drc->drc_ds = NULL; 2048185029Spjd} 2049185029Spjd 2050219089Spjdstatic int 2051248571Smmadd_ds_to_guidmap(const char *name, avl_tree_t *guid_map, uint64_t snapobj) 2052221263Smm{ 2053248571Smm dsl_pool_t *dp; 2054221263Smm dsl_dataset_t *snapds; 2055221263Smm guid_map_entry_t *gmep; 2056221263Smm int err; 2057221263Smm 2058221263Smm ASSERT(guid_map != NULL); 2059221263Smm 2060248571Smm err = dsl_pool_hold(name, FTAG, &dp); 2061248571Smm if (err != 0) 2062248571Smm return (err); 2063249356Smm gmep = kmem_alloc(sizeof (*gmep), KM_SLEEP); 2064249196Smm err = dsl_dataset_hold_obj(dp, snapobj, gmep, &snapds); 2065221263Smm if (err == 0) { 2066221263Smm gmep->guid = snapds->ds_phys->ds_guid; 2067221263Smm gmep->gme_ds = snapds; 2068221263Smm avl_add(guid_map, gmep); 2069248571Smm dsl_dataset_long_hold(snapds, gmep); 2070249196Smm } else 2071249356Smm kmem_free(gmep, sizeof (*gmep)); 2072221263Smm 2073248571Smm dsl_pool_rele(dp, FTAG); 2074221263Smm return (err); 2075221263Smm} 2076221263Smm 2077248571Smmstatic int dmu_recv_end_modified_blocks = 3; 2078248571Smm 2079221263Smmstatic int 2080219089Spjddmu_recv_existing_end(dmu_recv_cookie_t *drc) 2081185029Spjd{ 2082248571Smm int error; 2083248571Smm char name[MAXNAMELEN]; 2084185029Spjd 2085248571Smm#ifdef _KERNEL 2086248571Smm /* 2087248571Smm * We will be destroying the ds; make sure its origin is unmounted if 2088248571Smm * necessary. 2089248571Smm */ 2090248571Smm dsl_dataset_name(drc->drc_ds, name); 2091248571Smm zfs_destroy_unmount_origin(name); 2092248571Smm#endif 2093185029Spjd 2094248571Smm error = dsl_sync_task(drc->drc_tofs, 2095248571Smm dmu_recv_end_check, dmu_recv_end_sync, drc, 2096268473Sdelphij dmu_recv_end_modified_blocks, ZFS_SPACE_CHECK_NORMAL); 2097185029Spjd 2098248571Smm if (error != 0) 2099248571Smm dmu_recv_cleanup_ds(drc); 2100248571Smm return (error); 2101185029Spjd} 2102219089Spjd 2103219089Spjdstatic int 2104219089Spjddmu_recv_new_end(dmu_recv_cookie_t *drc) 2105219089Spjd{ 2106248571Smm int error; 2107219089Spjd 2108248571Smm error = dsl_sync_task(drc->drc_tofs, 2109248571Smm dmu_recv_end_check, dmu_recv_end_sync, drc, 2110268473Sdelphij dmu_recv_end_modified_blocks, ZFS_SPACE_CHECK_NORMAL); 2111219089Spjd 2112248571Smm if (error != 0) { 2113248571Smm dmu_recv_cleanup_ds(drc); 2114248571Smm } else if (drc->drc_guid_to_ds_map != NULL) { 2115248571Smm (void) add_ds_to_guidmap(drc->drc_tofs, 2116248571Smm drc->drc_guid_to_ds_map, 2117248571Smm drc->drc_newsnapobj); 2118219089Spjd } 2119248571Smm return (error); 2120219089Spjd} 2121219089Spjd 2122219089Spjdint 2123253816Sdelphijdmu_recv_end(dmu_recv_cookie_t *drc, void *owner) 2124219089Spjd{ 2125253816Sdelphij drc->drc_owner = owner; 2126253816Sdelphij 2127248571Smm if (drc->drc_newfs) 2128248571Smm return (dmu_recv_new_end(drc)); 2129248571Smm else 2130219089Spjd return (dmu_recv_existing_end(drc)); 2131219089Spjd} 2132253821Sdelphij 2133253821Sdelphij/* 2134253821Sdelphij * Return TRUE if this objset is currently being received into. 2135253821Sdelphij */ 2136253821Sdelphijboolean_t 2137253821Sdelphijdmu_objset_is_receiving(objset_t *os) 2138253821Sdelphij{ 2139253821Sdelphij return (os->os_dsl_dataset != NULL && 2140253821Sdelphij os->os_dsl_dataset->ds_owner == dmu_recv_tag); 2141253821Sdelphij} 2142