dmu_send.c revision 286575
1168404Spjd/* 2168404Spjd * CDDL HEADER START 3168404Spjd * 4168404Spjd * The contents of this file are subject to the terms of the 5168404Spjd * Common Development and Distribution License (the "License"). 6168404Spjd * You may not use this file except in compliance with the License. 7168404Spjd * 8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9168404Spjd * or http://www.opensolaris.org/os/licensing. 10168404Spjd * See the License for the specific language governing permissions 11168404Spjd * and limitations under the License. 12168404Spjd * 13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each 14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15168404Spjd * If applicable, add the following below this CDDL HEADER, with the 16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18168404Spjd * 19168404Spjd * CDDL HEADER END 20168404Spjd */ 21168404Spjd/* 22219089Spjd * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23221263Smm * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 24268123Sdelphij * Copyright (c) 2011, 2014 by Delphix. All rights reserved. 25264835Sdelphij * Copyright (c) 2014, Joyent, Inc. All rights reserved. 26235222Smm * Copyright (c) 2012, Martin Matuska <mm@FreeBSD.org>. All rights reserved. 27272810Sdelphij * Copyright 2014 HybridCluster. All rights reserved. 28221263Smm */ 29168404Spjd 30168404Spjd#include <sys/dmu.h> 31168404Spjd#include <sys/dmu_impl.h> 32168404Spjd#include <sys/dmu_tx.h> 33168404Spjd#include <sys/dbuf.h> 34168404Spjd#include <sys/dnode.h> 35168404Spjd#include <sys/zfs_context.h> 36168404Spjd#include <sys/dmu_objset.h> 37168404Spjd#include <sys/dmu_traverse.h> 38168404Spjd#include <sys/dsl_dataset.h> 39168404Spjd#include <sys/dsl_dir.h> 40219089Spjd#include <sys/dsl_prop.h> 41168404Spjd#include <sys/dsl_pool.h> 42168404Spjd#include <sys/dsl_synctask.h> 43168404Spjd#include <sys/zfs_ioctl.h> 44168404Spjd#include <sys/zap.h> 45168404Spjd#include <sys/zio_checksum.h> 46219089Spjd#include <sys/zfs_znode.h> 47219089Spjd#include <zfs_fletcher.h> 48219089Spjd#include <sys/avl.h> 49219089Spjd#include <sys/ddt.h> 50219089Spjd#include <sys/zfs_onexit.h> 51248571Smm#include <sys/dmu_send.h> 52248571Smm#include <sys/dsl_destroy.h> 53268075Sdelphij#include <sys/blkptr.h> 54260183Sdelphij#include <sys/dsl_bookmark.h> 55268075Sdelphij#include <sys/zfeature.h> 56168404Spjd 57268075Sdelphij#ifdef __FreeBSD__ 58268075Sdelphij#undef dump_write 59268075Sdelphij#define dump_write dmu_dump_write 60268075Sdelphij#endif 61268075Sdelphij 62228103Smm/* Set this tunable to TRUE to replace corrupt data with 0x2f5baddb10c */ 63228103Smmint zfs_send_corrupt_data = B_FALSE; 64228103Smm 65185029Spjdstatic char *dmu_recv_tag = "dmu_recv_tag"; 66248571Smmstatic const char *recv_clone_name = "%recv"; 67185029Spjd 68168404Spjdstatic int 69235222Smmdump_bytes(dmu_sendarg_t *dsp, void *buf, int len) 70168404Spjd{ 71235222Smm dsl_dataset_t *ds = dsp->dsa_os->os_dsl_dataset; 72168404Spjd struct uio auio; 73168404Spjd struct iovec aiov; 74240415Smm ASSERT0(len % 8); 75168404Spjd 76235222Smm fletcher_4_incremental_native(buf, len, &dsp->dsa_zc); 77168404Spjd aiov.iov_base = buf; 78168404Spjd aiov.iov_len = len; 79168404Spjd auio.uio_iov = &aiov; 80168404Spjd auio.uio_iovcnt = 1; 81168404Spjd auio.uio_resid = len; 82169170Spjd auio.uio_segflg = UIO_SYSSPACE; 83168404Spjd auio.uio_rw = UIO_WRITE; 84168404Spjd auio.uio_offset = (off_t)-1; 85235222Smm auio.uio_td = dsp->dsa_td; 86168404Spjd#ifdef _KERNEL 87235222Smm if (dsp->dsa_fp->f_type == DTYPE_VNODE) 88168404Spjd bwillwrite(); 89235222Smm dsp->dsa_err = fo_write(dsp->dsa_fp, &auio, dsp->dsa_td->td_ucred, 0, 90235222Smm dsp->dsa_td); 91168404Spjd#else 92168404Spjd fprintf(stderr, "%s: returning EOPNOTSUPP\n", __func__); 93235222Smm dsp->dsa_err = EOPNOTSUPP; 94168404Spjd#endif 95235222Smm mutex_enter(&ds->ds_sendstream_lock); 96235222Smm *dsp->dsa_off += len; 97235222Smm mutex_exit(&ds->ds_sendstream_lock); 98235222Smm 99235222Smm return (dsp->dsa_err); 100168404Spjd} 101168404Spjd 102168404Spjdstatic int 103235222Smmdump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset, 104168404Spjd uint64_t length) 105168404Spjd{ 106235222Smm struct drr_free *drrf = &(dsp->dsa_drr->drr_u.drr_free); 107219089Spjd 108253821Sdelphij /* 109253821Sdelphij * When we receive a free record, dbuf_free_range() assumes 110253821Sdelphij * that the receiving system doesn't have any dbufs in the range 111253821Sdelphij * being freed. This is always true because there is a one-record 112253821Sdelphij * constraint: we only send one WRITE record for any given 113253821Sdelphij * object+offset. We know that the one-record constraint is 114253821Sdelphij * true because we always send data in increasing order by 115253821Sdelphij * object,offset. 116253821Sdelphij * 117253821Sdelphij * If the increasing-order constraint ever changes, we should find 118253821Sdelphij * another way to assert that the one-record constraint is still 119253821Sdelphij * satisfied. 120253821Sdelphij */ 121253821Sdelphij ASSERT(object > dsp->dsa_last_data_object || 122253821Sdelphij (object == dsp->dsa_last_data_object && 123253821Sdelphij offset > dsp->dsa_last_data_offset)); 124253821Sdelphij 125253821Sdelphij /* 126253821Sdelphij * If we are doing a non-incremental send, then there can't 127253821Sdelphij * be any data in the dataset we're receiving into. Therefore 128253821Sdelphij * a free record would simply be a no-op. Save space by not 129253821Sdelphij * sending it to begin with. 130253821Sdelphij */ 131253821Sdelphij if (!dsp->dsa_incremental) 132253821Sdelphij return (0); 133253821Sdelphij 134237458Smm if (length != -1ULL && offset + length < offset) 135237458Smm length = -1ULL; 136237458Smm 137219089Spjd /* 138219089Spjd * If there is a pending op, but it's not PENDING_FREE, push it out, 139219089Spjd * since free block aggregation can only be done for blocks of the 140219089Spjd * same type (i.e., DRR_FREE records can only be aggregated with 141219089Spjd * other DRR_FREE records. DRR_FREEOBJECTS records can only be 142219089Spjd * aggregated with other DRR_FREEOBJECTS records. 143219089Spjd */ 144235222Smm if (dsp->dsa_pending_op != PENDING_NONE && 145235222Smm dsp->dsa_pending_op != PENDING_FREE) { 146235222Smm if (dump_bytes(dsp, dsp->dsa_drr, 147235222Smm sizeof (dmu_replay_record_t)) != 0) 148249195Smm return (SET_ERROR(EINTR)); 149235222Smm dsp->dsa_pending_op = PENDING_NONE; 150219089Spjd } 151219089Spjd 152235222Smm if (dsp->dsa_pending_op == PENDING_FREE) { 153219089Spjd /* 154219089Spjd * There should never be a PENDING_FREE if length is -1 155219089Spjd * (because dump_dnode is the only place where this 156219089Spjd * function is called with a -1, and only after flushing 157219089Spjd * any pending record). 158219089Spjd */ 159219089Spjd ASSERT(length != -1ULL); 160219089Spjd /* 161219089Spjd * Check to see whether this free block can be aggregated 162219089Spjd * with pending one. 163219089Spjd */ 164219089Spjd if (drrf->drr_object == object && drrf->drr_offset + 165219089Spjd drrf->drr_length == offset) { 166219089Spjd drrf->drr_length += length; 167219089Spjd return (0); 168219089Spjd } else { 169219089Spjd /* not a continuation. Push out pending record */ 170235222Smm if (dump_bytes(dsp, dsp->dsa_drr, 171219089Spjd sizeof (dmu_replay_record_t)) != 0) 172249195Smm return (SET_ERROR(EINTR)); 173235222Smm dsp->dsa_pending_op = PENDING_NONE; 174219089Spjd } 175219089Spjd } 176219089Spjd /* create a FREE record and make it pending */ 177235222Smm bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t)); 178235222Smm dsp->dsa_drr->drr_type = DRR_FREE; 179219089Spjd drrf->drr_object = object; 180219089Spjd drrf->drr_offset = offset; 181219089Spjd drrf->drr_length = length; 182235222Smm drrf->drr_toguid = dsp->dsa_toguid; 183219089Spjd if (length == -1ULL) { 184235222Smm if (dump_bytes(dsp, dsp->dsa_drr, 185235222Smm sizeof (dmu_replay_record_t)) != 0) 186249195Smm return (SET_ERROR(EINTR)); 187219089Spjd } else { 188235222Smm dsp->dsa_pending_op = PENDING_FREE; 189219089Spjd } 190168404Spjd 191168404Spjd return (0); 192168404Spjd} 193168404Spjd 194168404Spjdstatic int 195268075Sdelphijdump_write(dmu_sendarg_t *dsp, dmu_object_type_t type, 196219089Spjd uint64_t object, uint64_t offset, int blksz, const blkptr_t *bp, void *data) 197168404Spjd{ 198235222Smm struct drr_write *drrw = &(dsp->dsa_drr->drr_u.drr_write); 199219089Spjd 200253821Sdelphij /* 201253821Sdelphij * We send data in increasing object, offset order. 202253821Sdelphij * See comment in dump_free() for details. 203253821Sdelphij */ 204253821Sdelphij ASSERT(object > dsp->dsa_last_data_object || 205253821Sdelphij (object == dsp->dsa_last_data_object && 206253821Sdelphij offset > dsp->dsa_last_data_offset)); 207253821Sdelphij dsp->dsa_last_data_object = object; 208253821Sdelphij dsp->dsa_last_data_offset = offset + blksz - 1; 209219089Spjd 210219089Spjd /* 211219089Spjd * If there is any kind of pending aggregation (currently either 212219089Spjd * a grouping of free objects or free blocks), push it out to 213219089Spjd * the stream, since aggregation can't be done across operations 214219089Spjd * of different types. 215219089Spjd */ 216235222Smm if (dsp->dsa_pending_op != PENDING_NONE) { 217235222Smm if (dump_bytes(dsp, dsp->dsa_drr, 218235222Smm sizeof (dmu_replay_record_t)) != 0) 219249195Smm return (SET_ERROR(EINTR)); 220235222Smm dsp->dsa_pending_op = PENDING_NONE; 221219089Spjd } 222168404Spjd /* write a DATA record */ 223235222Smm bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t)); 224235222Smm dsp->dsa_drr->drr_type = DRR_WRITE; 225219089Spjd drrw->drr_object = object; 226219089Spjd drrw->drr_type = type; 227219089Spjd drrw->drr_offset = offset; 228219089Spjd drrw->drr_length = blksz; 229235222Smm drrw->drr_toguid = dsp->dsa_toguid; 230274337Sdelphij if (bp == NULL || BP_IS_EMBEDDED(bp)) { 231268075Sdelphij /* 232274337Sdelphij * There's no pre-computed checksum for partial-block 233274337Sdelphij * writes or embedded BP's, so (like 234274337Sdelphij * fletcher4-checkummed blocks) userland will have to 235274337Sdelphij * compute a dedup-capable checksum itself. 236268075Sdelphij */ 237268075Sdelphij drrw->drr_checksumtype = ZIO_CHECKSUM_OFF; 238268075Sdelphij } else { 239268075Sdelphij drrw->drr_checksumtype = BP_GET_CHECKSUM(bp); 240268075Sdelphij if (zio_checksum_table[drrw->drr_checksumtype].ci_dedup) 241268075Sdelphij drrw->drr_checksumflags |= DRR_CHECKSUM_DEDUP; 242268075Sdelphij DDK_SET_LSIZE(&drrw->drr_key, BP_GET_LSIZE(bp)); 243268075Sdelphij DDK_SET_PSIZE(&drrw->drr_key, BP_GET_PSIZE(bp)); 244268075Sdelphij DDK_SET_COMPRESS(&drrw->drr_key, BP_GET_COMPRESS(bp)); 245268075Sdelphij drrw->drr_key.ddk_cksum = bp->blk_cksum; 246268075Sdelphij } 247168404Spjd 248235222Smm if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0) 249249195Smm return (SET_ERROR(EINTR)); 250235222Smm if (dump_bytes(dsp, data, blksz) != 0) 251249195Smm return (SET_ERROR(EINTR)); 252219089Spjd return (0); 253219089Spjd} 254219089Spjd 255219089Spjdstatic int 256268075Sdelphijdump_write_embedded(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset, 257268075Sdelphij int blksz, const blkptr_t *bp) 258268075Sdelphij{ 259268075Sdelphij char buf[BPE_PAYLOAD_SIZE]; 260268075Sdelphij struct drr_write_embedded *drrw = 261268075Sdelphij &(dsp->dsa_drr->drr_u.drr_write_embedded); 262268075Sdelphij 263268075Sdelphij if (dsp->dsa_pending_op != PENDING_NONE) { 264268075Sdelphij if (dump_bytes(dsp, dsp->dsa_drr, 265268075Sdelphij sizeof (dmu_replay_record_t)) != 0) 266268075Sdelphij return (EINTR); 267268075Sdelphij dsp->dsa_pending_op = PENDING_NONE; 268268075Sdelphij } 269268075Sdelphij 270268075Sdelphij ASSERT(BP_IS_EMBEDDED(bp)); 271268075Sdelphij 272268075Sdelphij bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t)); 273268075Sdelphij dsp->dsa_drr->drr_type = DRR_WRITE_EMBEDDED; 274268075Sdelphij drrw->drr_object = object; 275268075Sdelphij drrw->drr_offset = offset; 276268075Sdelphij drrw->drr_length = blksz; 277268075Sdelphij drrw->drr_toguid = dsp->dsa_toguid; 278268075Sdelphij drrw->drr_compression = BP_GET_COMPRESS(bp); 279268075Sdelphij drrw->drr_etype = BPE_GET_ETYPE(bp); 280268075Sdelphij drrw->drr_lsize = BPE_GET_LSIZE(bp); 281268075Sdelphij drrw->drr_psize = BPE_GET_PSIZE(bp); 282268075Sdelphij 283268075Sdelphij decode_embedded_bp_compressed(bp, buf); 284268075Sdelphij 285268075Sdelphij if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0) 286268075Sdelphij return (EINTR); 287268075Sdelphij if (dump_bytes(dsp, buf, P2ROUNDUP(drrw->drr_psize, 8)) != 0) 288268075Sdelphij return (EINTR); 289268075Sdelphij return (0); 290268075Sdelphij} 291268075Sdelphij 292268075Sdelphijstatic int 293235222Smmdump_spill(dmu_sendarg_t *dsp, uint64_t object, int blksz, void *data) 294219089Spjd{ 295235222Smm struct drr_spill *drrs = &(dsp->dsa_drr->drr_u.drr_spill); 296219089Spjd 297235222Smm if (dsp->dsa_pending_op != PENDING_NONE) { 298235222Smm if (dump_bytes(dsp, dsp->dsa_drr, 299235222Smm sizeof (dmu_replay_record_t)) != 0) 300249195Smm return (SET_ERROR(EINTR)); 301235222Smm dsp->dsa_pending_op = PENDING_NONE; 302219089Spjd } 303219089Spjd 304219089Spjd /* write a SPILL record */ 305235222Smm bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t)); 306235222Smm dsp->dsa_drr->drr_type = DRR_SPILL; 307219089Spjd drrs->drr_object = object; 308219089Spjd drrs->drr_length = blksz; 309235222Smm drrs->drr_toguid = dsp->dsa_toguid; 310219089Spjd 311235222Smm if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t))) 312249195Smm return (SET_ERROR(EINTR)); 313235222Smm if (dump_bytes(dsp, data, blksz)) 314249195Smm return (SET_ERROR(EINTR)); 315168404Spjd return (0); 316168404Spjd} 317168404Spjd 318168404Spjdstatic int 319235222Smmdump_freeobjects(dmu_sendarg_t *dsp, uint64_t firstobj, uint64_t numobjs) 320168404Spjd{ 321235222Smm struct drr_freeobjects *drrfo = &(dsp->dsa_drr->drr_u.drr_freeobjects); 322219089Spjd 323253821Sdelphij /* See comment in dump_free(). */ 324253821Sdelphij if (!dsp->dsa_incremental) 325253821Sdelphij return (0); 326253821Sdelphij 327219089Spjd /* 328219089Spjd * If there is a pending op, but it's not PENDING_FREEOBJECTS, 329219089Spjd * push it out, since free block aggregation can only be done for 330219089Spjd * blocks of the same type (i.e., DRR_FREE records can only be 331219089Spjd * aggregated with other DRR_FREE records. DRR_FREEOBJECTS records 332219089Spjd * can only be aggregated with other DRR_FREEOBJECTS records. 333219089Spjd */ 334235222Smm if (dsp->dsa_pending_op != PENDING_NONE && 335235222Smm dsp->dsa_pending_op != PENDING_FREEOBJECTS) { 336235222Smm if (dump_bytes(dsp, dsp->dsa_drr, 337235222Smm sizeof (dmu_replay_record_t)) != 0) 338249195Smm return (SET_ERROR(EINTR)); 339235222Smm dsp->dsa_pending_op = PENDING_NONE; 340219089Spjd } 341235222Smm if (dsp->dsa_pending_op == PENDING_FREEOBJECTS) { 342219089Spjd /* 343219089Spjd * See whether this free object array can be aggregated 344219089Spjd * with pending one 345219089Spjd */ 346219089Spjd if (drrfo->drr_firstobj + drrfo->drr_numobjs == firstobj) { 347219089Spjd drrfo->drr_numobjs += numobjs; 348219089Spjd return (0); 349219089Spjd } else { 350219089Spjd /* can't be aggregated. Push out pending record */ 351235222Smm if (dump_bytes(dsp, dsp->dsa_drr, 352219089Spjd sizeof (dmu_replay_record_t)) != 0) 353249195Smm return (SET_ERROR(EINTR)); 354235222Smm dsp->dsa_pending_op = PENDING_NONE; 355219089Spjd } 356219089Spjd } 357219089Spjd 358168404Spjd /* write a FREEOBJECTS record */ 359235222Smm bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t)); 360235222Smm dsp->dsa_drr->drr_type = DRR_FREEOBJECTS; 361219089Spjd drrfo->drr_firstobj = firstobj; 362219089Spjd drrfo->drr_numobjs = numobjs; 363235222Smm drrfo->drr_toguid = dsp->dsa_toguid; 364168404Spjd 365235222Smm dsp->dsa_pending_op = PENDING_FREEOBJECTS; 366219089Spjd 367168404Spjd return (0); 368168404Spjd} 369168404Spjd 370168404Spjdstatic int 371235222Smmdump_dnode(dmu_sendarg_t *dsp, uint64_t object, dnode_phys_t *dnp) 372168404Spjd{ 373235222Smm struct drr_object *drro = &(dsp->dsa_drr->drr_u.drr_object); 374219089Spjd 375168404Spjd if (dnp == NULL || dnp->dn_type == DMU_OT_NONE) 376235222Smm return (dump_freeobjects(dsp, object, 1)); 377168404Spjd 378235222Smm if (dsp->dsa_pending_op != PENDING_NONE) { 379235222Smm if (dump_bytes(dsp, dsp->dsa_drr, 380235222Smm sizeof (dmu_replay_record_t)) != 0) 381249195Smm return (SET_ERROR(EINTR)); 382235222Smm dsp->dsa_pending_op = PENDING_NONE; 383219089Spjd } 384219089Spjd 385168404Spjd /* write an OBJECT record */ 386235222Smm bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t)); 387235222Smm dsp->dsa_drr->drr_type = DRR_OBJECT; 388219089Spjd drro->drr_object = object; 389219089Spjd drro->drr_type = dnp->dn_type; 390219089Spjd drro->drr_bonustype = dnp->dn_bonustype; 391219089Spjd drro->drr_blksz = dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT; 392219089Spjd drro->drr_bonuslen = dnp->dn_bonuslen; 393219089Spjd drro->drr_checksumtype = dnp->dn_checksum; 394219089Spjd drro->drr_compress = dnp->dn_compress; 395235222Smm drro->drr_toguid = dsp->dsa_toguid; 396168404Spjd 397274337Sdelphij if (!(dsp->dsa_featureflags & DMU_BACKUP_FEATURE_LARGE_BLOCKS) && 398274337Sdelphij drro->drr_blksz > SPA_OLD_MAXBLOCKSIZE) 399274337Sdelphij drro->drr_blksz = SPA_OLD_MAXBLOCKSIZE; 400274337Sdelphij 401235222Smm if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0) 402249195Smm return (SET_ERROR(EINTR)); 403168404Spjd 404235222Smm if (dump_bytes(dsp, DN_BONUS(dnp), P2ROUNDUP(dnp->dn_bonuslen, 8)) != 0) 405249195Smm return (SET_ERROR(EINTR)); 406168404Spjd 407253821Sdelphij /* Free anything past the end of the file. */ 408235222Smm if (dump_free(dsp, object, (dnp->dn_maxblkid + 1) * 409253821Sdelphij (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT), -1ULL) != 0) 410249195Smm return (SET_ERROR(EINTR)); 411248571Smm if (dsp->dsa_err != 0) 412249195Smm return (SET_ERROR(EINTR)); 413168404Spjd return (0); 414168404Spjd} 415168404Spjd 416268075Sdelphijstatic boolean_t 417268075Sdelphijbackup_do_embed(dmu_sendarg_t *dsp, const blkptr_t *bp) 418268075Sdelphij{ 419268075Sdelphij if (!BP_IS_EMBEDDED(bp)) 420268075Sdelphij return (B_FALSE); 421268075Sdelphij 422268075Sdelphij /* 423268075Sdelphij * Compression function must be legacy, or explicitly enabled. 424268075Sdelphij */ 425268075Sdelphij if ((BP_GET_COMPRESS(bp) >= ZIO_COMPRESS_LEGACY_FUNCTIONS && 426268075Sdelphij !(dsp->dsa_featureflags & DMU_BACKUP_FEATURE_EMBED_DATA_LZ4))) 427268075Sdelphij return (B_FALSE); 428268075Sdelphij 429268075Sdelphij /* 430268075Sdelphij * Embed type must be explicitly enabled. 431268075Sdelphij */ 432268075Sdelphij switch (BPE_GET_ETYPE(bp)) { 433268075Sdelphij case BP_EMBEDDED_TYPE_DATA: 434268075Sdelphij if (dsp->dsa_featureflags & DMU_BACKUP_FEATURE_EMBED_DATA) 435268075Sdelphij return (B_TRUE); 436268075Sdelphij break; 437268075Sdelphij default: 438268075Sdelphij return (B_FALSE); 439268075Sdelphij } 440268075Sdelphij return (B_FALSE); 441268075Sdelphij} 442268075Sdelphij 443168404Spjd#define BP_SPAN(dnp, level) \ 444168404Spjd (((uint64_t)dnp->dn_datablkszsec) << (SPA_MINBLOCKSHIFT + \ 445168404Spjd (level) * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) 446168404Spjd 447219089Spjd/* ARGSUSED */ 448168404Spjdstatic int 449246666Smmbackup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, 450268123Sdelphij const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg) 451168404Spjd{ 452235222Smm dmu_sendarg_t *dsp = arg; 453168404Spjd dmu_object_type_t type = bp ? BP_GET_TYPE(bp) : DMU_OT_NONE; 454168404Spjd int err = 0; 455168404Spjd 456185029Spjd if (issig(JUSTLOOKING) && issig(FORREAL)) 457249195Smm return (SET_ERROR(EINTR)); 458168404Spjd 459219089Spjd if (zb->zb_object != DMU_META_DNODE_OBJECT && 460219089Spjd DMU_OBJECT_IS_SPECIAL(zb->zb_object)) { 461209962Smm return (0); 462260183Sdelphij } else if (zb->zb_level == ZB_ZIL_LEVEL) { 463260183Sdelphij /* 464260183Sdelphij * If we are sending a non-snapshot (which is allowed on 465260183Sdelphij * read-only pools), it may have a ZIL, which must be ignored. 466260183Sdelphij */ 467260183Sdelphij return (0); 468260150Sdelphij } else if (BP_IS_HOLE(bp) && 469260150Sdelphij zb->zb_object == DMU_META_DNODE_OBJECT) { 470208047Smm uint64_t span = BP_SPAN(dnp, zb->zb_level); 471208047Smm uint64_t dnobj = (zb->zb_blkid * span) >> DNODE_SHIFT; 472235222Smm err = dump_freeobjects(dsp, dnobj, span >> DNODE_SHIFT); 473260150Sdelphij } else if (BP_IS_HOLE(bp)) { 474208047Smm uint64_t span = BP_SPAN(dnp, zb->zb_level); 475235222Smm err = dump_free(dsp, zb->zb_object, zb->zb_blkid * span, span); 476208047Smm } else if (zb->zb_level > 0 || type == DMU_OT_OBJSET) { 477208047Smm return (0); 478208047Smm } else if (type == DMU_OT_DNODE) { 479208047Smm dnode_phys_t *blk; 480168404Spjd int i; 481168404Spjd int blksz = BP_GET_LSIZE(bp); 482275811Sdelphij arc_flags_t aflags = ARC_FLAG_WAIT; 483208047Smm arc_buf_t *abuf; 484168404Spjd 485246666Smm if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf, 486246666Smm ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, 487246666Smm &aflags, zb) != 0) 488249195Smm return (SET_ERROR(EIO)); 489208047Smm 490208047Smm blk = abuf->b_data; 491168404Spjd for (i = 0; i < blksz >> DNODE_SHIFT; i++) { 492208047Smm uint64_t dnobj = (zb->zb_blkid << 493208047Smm (DNODE_BLOCK_SHIFT - DNODE_SHIFT)) + i; 494235222Smm err = dump_dnode(dsp, dnobj, blk+i); 495248571Smm if (err != 0) 496168404Spjd break; 497168404Spjd } 498208047Smm (void) arc_buf_remove_ref(abuf, &abuf); 499219089Spjd } else if (type == DMU_OT_SA) { 500275811Sdelphij arc_flags_t aflags = ARC_FLAG_WAIT; 501208047Smm arc_buf_t *abuf; 502168404Spjd int blksz = BP_GET_LSIZE(bp); 503168404Spjd 504246666Smm if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf, 505246666Smm ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, 506246666Smm &aflags, zb) != 0) 507249195Smm return (SET_ERROR(EIO)); 508168404Spjd 509235222Smm err = dump_spill(dsp, zb->zb_object, blksz, abuf->b_data); 510219089Spjd (void) arc_buf_remove_ref(abuf, &abuf); 511268075Sdelphij } else if (backup_do_embed(dsp, bp)) { 512268075Sdelphij /* it's an embedded level-0 block of a regular object */ 513268075Sdelphij int blksz = dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT; 514268075Sdelphij err = dump_write_embedded(dsp, zb->zb_object, 515268075Sdelphij zb->zb_blkid * blksz, blksz, bp); 516219089Spjd } else { /* it's a level-0 block of a regular object */ 517275811Sdelphij arc_flags_t aflags = ARC_FLAG_WAIT; 518219089Spjd arc_buf_t *abuf; 519219089Spjd int blksz = BP_GET_LSIZE(bp); 520274337Sdelphij uint64_t offset; 521219089Spjd 522268075Sdelphij ASSERT3U(blksz, ==, dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT); 523260183Sdelphij ASSERT0(zb->zb_level); 524246666Smm if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf, 525246666Smm ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, 526246666Smm &aflags, zb) != 0) { 527228103Smm if (zfs_send_corrupt_data) { 528228103Smm /* Send a block filled with 0x"zfs badd bloc" */ 529228103Smm abuf = arc_buf_alloc(spa, blksz, &abuf, 530228103Smm ARC_BUFC_DATA); 531228103Smm uint64_t *ptr; 532228103Smm for (ptr = abuf->b_data; 533228103Smm (char *)ptr < (char *)abuf->b_data + blksz; 534228103Smm ptr++) 535286554Smav *ptr = 0x2f5baddb10cULL; 536228103Smm } else { 537249195Smm return (SET_ERROR(EIO)); 538228103Smm } 539228103Smm } 540219089Spjd 541274337Sdelphij offset = zb->zb_blkid * blksz; 542274337Sdelphij 543274337Sdelphij if (!(dsp->dsa_featureflags & 544274337Sdelphij DMU_BACKUP_FEATURE_LARGE_BLOCKS) && 545274337Sdelphij blksz > SPA_OLD_MAXBLOCKSIZE) { 546274337Sdelphij char *buf = abuf->b_data; 547274337Sdelphij while (blksz > 0 && err == 0) { 548274337Sdelphij int n = MIN(blksz, SPA_OLD_MAXBLOCKSIZE); 549274337Sdelphij err = dump_write(dsp, type, zb->zb_object, 550274337Sdelphij offset, n, NULL, buf); 551274337Sdelphij offset += n; 552274337Sdelphij buf += n; 553274337Sdelphij blksz -= n; 554274337Sdelphij } 555274337Sdelphij } else { 556274337Sdelphij err = dump_write(dsp, type, zb->zb_object, 557274337Sdelphij offset, blksz, bp, abuf->b_data); 558274337Sdelphij } 559208047Smm (void) arc_buf_remove_ref(abuf, &abuf); 560168404Spjd } 561168404Spjd 562168404Spjd ASSERT(err == 0 || err == EINTR); 563168404Spjd return (err); 564168404Spjd} 565168404Spjd 566248571Smm/* 567260183Sdelphij * Releases dp using the specified tag. 568248571Smm */ 569248571Smmstatic int 570248571Smmdmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *ds, 571268075Sdelphij zfs_bookmark_phys_t *fromzb, boolean_t is_clone, boolean_t embedok, 572248571Smm#ifdef illumos 573274337Sdelphij boolean_t large_block_ok, int outfd, vnode_t *vp, offset_t *off) 574248571Smm#else 575274337Sdelphij boolean_t large_block_ok, int outfd, struct file *fp, offset_t *off) 576248571Smm#endif 577168404Spjd{ 578248571Smm objset_t *os; 579168404Spjd dmu_replay_record_t *drr; 580235222Smm dmu_sendarg_t *dsp; 581168404Spjd int err; 582185029Spjd uint64_t fromtxg = 0; 583268075Sdelphij uint64_t featureflags = 0; 584168404Spjd 585248571Smm err = dmu_objset_from_ds(ds, &os); 586248571Smm if (err != 0) { 587248571Smm dsl_pool_rele(dp, tag); 588248571Smm return (err); 589185029Spjd } 590185029Spjd 591168404Spjd drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP); 592168404Spjd drr->drr_type = DRR_BEGIN; 593168404Spjd drr->drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC; 594219089Spjd DMU_SET_STREAM_HDRTYPE(drr->drr_u.drr_begin.drr_versioninfo, 595219089Spjd DMU_SUBSTREAM); 596219089Spjd 597219089Spjd#ifdef _KERNEL 598248571Smm if (dmu_objset_type(os) == DMU_OST_ZFS) { 599219089Spjd uint64_t version; 600248571Smm if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &version) != 0) { 601235222Smm kmem_free(drr, sizeof (dmu_replay_record_t)); 602248571Smm dsl_pool_rele(dp, tag); 603249195Smm return (SET_ERROR(EINVAL)); 604235222Smm } 605248571Smm if (version >= ZPL_VERSION_SA) { 606268075Sdelphij featureflags |= DMU_BACKUP_FEATURE_SA_SPILL; 607219089Spjd } 608219089Spjd } 609219089Spjd#endif 610219089Spjd 611274337Sdelphij if (large_block_ok && ds->ds_large_blocks) 612274337Sdelphij featureflags |= DMU_BACKUP_FEATURE_LARGE_BLOCKS; 613268075Sdelphij if (embedok && 614268075Sdelphij spa_feature_is_active(dp->dp_spa, SPA_FEATURE_EMBEDDED_DATA)) { 615268075Sdelphij featureflags |= DMU_BACKUP_FEATURE_EMBED_DATA; 616268075Sdelphij if (spa_feature_is_active(dp->dp_spa, SPA_FEATURE_LZ4_COMPRESS)) 617268075Sdelphij featureflags |= DMU_BACKUP_FEATURE_EMBED_DATA_LZ4; 618268075Sdelphij } else { 619268075Sdelphij embedok = B_FALSE; 620268075Sdelphij } 621268075Sdelphij 622268075Sdelphij DMU_SET_FEATUREFLAGS(drr->drr_u.drr_begin.drr_versioninfo, 623268075Sdelphij featureflags); 624268075Sdelphij 625168404Spjd drr->drr_u.drr_begin.drr_creation_time = 626275782Sdelphij dsl_dataset_phys(ds)->ds_creation_time; 627248571Smm drr->drr_u.drr_begin.drr_type = dmu_objset_type(os); 628260183Sdelphij if (is_clone) 629185029Spjd drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CLONE; 630275782Sdelphij drr->drr_u.drr_begin.drr_toguid = dsl_dataset_phys(ds)->ds_guid; 631275782Sdelphij if (dsl_dataset_phys(ds)->ds_flags & DS_FLAG_CI_DATASET) 632185029Spjd drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CI_DATA; 633185029Spjd 634260183Sdelphij if (fromzb != NULL) { 635260183Sdelphij drr->drr_u.drr_begin.drr_fromguid = fromzb->zbm_guid; 636260183Sdelphij fromtxg = fromzb->zbm_creation_txg; 637260183Sdelphij } 638168404Spjd dsl_dataset_name(ds, drr->drr_u.drr_begin.drr_toname); 639286575Smav if (!ds->ds_is_snapshot) { 640260183Sdelphij (void) strlcat(drr->drr_u.drr_begin.drr_toname, "@--head--", 641260183Sdelphij sizeof (drr->drr_u.drr_begin.drr_toname)); 642248571Smm } 643185029Spjd 644235222Smm dsp = kmem_zalloc(sizeof (dmu_sendarg_t), KM_SLEEP); 645168404Spjd 646235222Smm dsp->dsa_drr = drr; 647235222Smm dsp->dsa_outfd = outfd; 648235222Smm dsp->dsa_proc = curproc; 649235222Smm dsp->dsa_td = curthread; 650235222Smm dsp->dsa_fp = fp; 651248571Smm dsp->dsa_os = os; 652235222Smm dsp->dsa_off = off; 653275782Sdelphij dsp->dsa_toguid = dsl_dataset_phys(ds)->ds_guid; 654235222Smm ZIO_SET_CHECKSUM(&dsp->dsa_zc, 0, 0, 0, 0); 655235222Smm dsp->dsa_pending_op = PENDING_NONE; 656260183Sdelphij dsp->dsa_incremental = (fromzb != NULL); 657268075Sdelphij dsp->dsa_featureflags = featureflags; 658235222Smm 659235222Smm mutex_enter(&ds->ds_sendstream_lock); 660235222Smm list_insert_head(&ds->ds_sendstreams, dsp); 661235222Smm mutex_exit(&ds->ds_sendstream_lock); 662235222Smm 663249042Smm dsl_dataset_long_hold(ds, FTAG); 664249042Smm dsl_pool_rele(dp, tag); 665249042Smm 666235222Smm if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) { 667235222Smm err = dsp->dsa_err; 668235222Smm goto out; 669168404Spjd } 670168404Spjd 671208047Smm err = traverse_dataset(ds, fromtxg, TRAVERSE_PRE | TRAVERSE_PREFETCH, 672235222Smm backup_cb, dsp); 673168404Spjd 674235222Smm if (dsp->dsa_pending_op != PENDING_NONE) 675235222Smm if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) 676249195Smm err = SET_ERROR(EINTR); 677219089Spjd 678248571Smm if (err != 0) { 679248571Smm if (err == EINTR && dsp->dsa_err != 0) 680235222Smm err = dsp->dsa_err; 681235222Smm goto out; 682168404Spjd } 683168404Spjd 684168404Spjd bzero(drr, sizeof (dmu_replay_record_t)); 685168404Spjd drr->drr_type = DRR_END; 686235222Smm drr->drr_u.drr_end.drr_checksum = dsp->dsa_zc; 687235222Smm drr->drr_u.drr_end.drr_toguid = dsp->dsa_toguid; 688168404Spjd 689235222Smm if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) { 690235222Smm err = dsp->dsa_err; 691235222Smm goto out; 692168404Spjd } 693168404Spjd 694235222Smmout: 695235222Smm mutex_enter(&ds->ds_sendstream_lock); 696235222Smm list_remove(&ds->ds_sendstreams, dsp); 697235222Smm mutex_exit(&ds->ds_sendstream_lock); 698235222Smm 699168404Spjd kmem_free(drr, sizeof (dmu_replay_record_t)); 700235222Smm kmem_free(dsp, sizeof (dmu_sendarg_t)); 701168404Spjd 702248571Smm dsl_dataset_long_rele(ds, FTAG); 703248571Smm 704235222Smm return (err); 705168404Spjd} 706168404Spjd 707228103Smmint 708248571Smmdmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap, 709274337Sdelphij boolean_t embedok, boolean_t large_block_ok, 710248571Smm#ifdef illumos 711274337Sdelphij int outfd, vnode_t *vp, offset_t *off) 712248571Smm#else 713274337Sdelphij int outfd, struct file *fp, offset_t *off) 714248571Smm#endif 715228103Smm{ 716248571Smm dsl_pool_t *dp; 717248571Smm dsl_dataset_t *ds; 718248571Smm dsl_dataset_t *fromds = NULL; 719248571Smm int err; 720248571Smm 721248571Smm err = dsl_pool_hold(pool, FTAG, &dp); 722248571Smm if (err != 0) 723248571Smm return (err); 724248571Smm 725248571Smm err = dsl_dataset_hold_obj(dp, tosnap, FTAG, &ds); 726248571Smm if (err != 0) { 727248571Smm dsl_pool_rele(dp, FTAG); 728248571Smm return (err); 729248571Smm } 730248571Smm 731248571Smm if (fromsnap != 0) { 732260183Sdelphij zfs_bookmark_phys_t zb; 733260183Sdelphij boolean_t is_clone; 734260183Sdelphij 735248571Smm err = dsl_dataset_hold_obj(dp, fromsnap, FTAG, &fromds); 736248571Smm if (err != 0) { 737248571Smm dsl_dataset_rele(ds, FTAG); 738248571Smm dsl_pool_rele(dp, FTAG); 739248571Smm return (err); 740248571Smm } 741260183Sdelphij if (!dsl_dataset_is_before(ds, fromds, 0)) 742260183Sdelphij err = SET_ERROR(EXDEV); 743275782Sdelphij zb.zbm_creation_time = 744275782Sdelphij dsl_dataset_phys(fromds)->ds_creation_time; 745275782Sdelphij zb.zbm_creation_txg = dsl_dataset_phys(fromds)->ds_creation_txg; 746275782Sdelphij zb.zbm_guid = dsl_dataset_phys(fromds)->ds_guid; 747260183Sdelphij is_clone = (fromds->ds_dir != ds->ds_dir); 748260183Sdelphij dsl_dataset_rele(fromds, FTAG); 749274337Sdelphij err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone, 750274337Sdelphij embedok, large_block_ok, outfd, fp, off); 751260183Sdelphij } else { 752274337Sdelphij err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE, 753274337Sdelphij embedok, large_block_ok, outfd, fp, off); 754248571Smm } 755260183Sdelphij dsl_dataset_rele(ds, FTAG); 756260183Sdelphij return (err); 757248571Smm} 758248571Smm 759248571Smmint 760274337Sdelphijdmu_send(const char *tosnap, const char *fromsnap, 761274337Sdelphij boolean_t embedok, boolean_t large_block_ok, 762248571Smm#ifdef illumos 763248571Smm int outfd, vnode_t *vp, offset_t *off) 764248571Smm#else 765248571Smm int outfd, struct file *fp, offset_t *off) 766248571Smm#endif 767248571Smm{ 768248571Smm dsl_pool_t *dp; 769248571Smm dsl_dataset_t *ds; 770248571Smm int err; 771260183Sdelphij boolean_t owned = B_FALSE; 772248571Smm 773260183Sdelphij if (fromsnap != NULL && strpbrk(fromsnap, "@#") == NULL) 774249195Smm return (SET_ERROR(EINVAL)); 775248571Smm 776248571Smm err = dsl_pool_hold(tosnap, FTAG, &dp); 777248571Smm if (err != 0) 778248571Smm return (err); 779248571Smm 780260183Sdelphij if (strchr(tosnap, '@') == NULL && spa_writeable(dp->dp_spa)) { 781260183Sdelphij /* 782260183Sdelphij * We are sending a filesystem or volume. Ensure 783260183Sdelphij * that it doesn't change by owning the dataset. 784260183Sdelphij */ 785260183Sdelphij err = dsl_dataset_own(dp, tosnap, FTAG, &ds); 786260183Sdelphij owned = B_TRUE; 787260183Sdelphij } else { 788260183Sdelphij err = dsl_dataset_hold(dp, tosnap, FTAG, &ds); 789260183Sdelphij } 790248571Smm if (err != 0) { 791248571Smm dsl_pool_rele(dp, FTAG); 792248571Smm return (err); 793248571Smm } 794248571Smm 795248571Smm if (fromsnap != NULL) { 796260183Sdelphij zfs_bookmark_phys_t zb; 797260183Sdelphij boolean_t is_clone = B_FALSE; 798260183Sdelphij int fsnamelen = strchr(tosnap, '@') - tosnap; 799260183Sdelphij 800260183Sdelphij /* 801260183Sdelphij * If the fromsnap is in a different filesystem, then 802260183Sdelphij * mark the send stream as a clone. 803260183Sdelphij */ 804260183Sdelphij if (strncmp(tosnap, fromsnap, fsnamelen) != 0 || 805260183Sdelphij (fromsnap[fsnamelen] != '@' && 806260183Sdelphij fromsnap[fsnamelen] != '#')) { 807260183Sdelphij is_clone = B_TRUE; 808260183Sdelphij } 809260183Sdelphij 810260183Sdelphij if (strchr(fromsnap, '@')) { 811260183Sdelphij dsl_dataset_t *fromds; 812260183Sdelphij err = dsl_dataset_hold(dp, fromsnap, FTAG, &fromds); 813260183Sdelphij if (err == 0) { 814260183Sdelphij if (!dsl_dataset_is_before(ds, fromds, 0)) 815260183Sdelphij err = SET_ERROR(EXDEV); 816260183Sdelphij zb.zbm_creation_time = 817275782Sdelphij dsl_dataset_phys(fromds)->ds_creation_time; 818260183Sdelphij zb.zbm_creation_txg = 819275782Sdelphij dsl_dataset_phys(fromds)->ds_creation_txg; 820275782Sdelphij zb.zbm_guid = dsl_dataset_phys(fromds)->ds_guid; 821260183Sdelphij is_clone = (ds->ds_dir != fromds->ds_dir); 822260183Sdelphij dsl_dataset_rele(fromds, FTAG); 823260183Sdelphij } 824260183Sdelphij } else { 825260183Sdelphij err = dsl_bookmark_lookup(dp, fromsnap, ds, &zb); 826260183Sdelphij } 827248571Smm if (err != 0) { 828248571Smm dsl_dataset_rele(ds, FTAG); 829248571Smm dsl_pool_rele(dp, FTAG); 830248571Smm return (err); 831248571Smm } 832274337Sdelphij err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone, 833274337Sdelphij embedok, large_block_ok, outfd, fp, off); 834260183Sdelphij } else { 835274337Sdelphij err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE, 836274337Sdelphij embedok, large_block_ok, outfd, fp, off); 837248571Smm } 838260183Sdelphij if (owned) 839260183Sdelphij dsl_dataset_disown(ds, FTAG); 840260183Sdelphij else 841260183Sdelphij dsl_dataset_rele(ds, FTAG); 842260183Sdelphij return (err); 843248571Smm} 844248571Smm 845248571Smmint 846248571Smmdmu_send_estimate(dsl_dataset_t *ds, dsl_dataset_t *fromds, uint64_t *sizep) 847248571Smm{ 848228103Smm dsl_pool_t *dp = ds->ds_dir->dd_pool; 849228103Smm int err; 850228103Smm uint64_t size; 851228103Smm 852248571Smm ASSERT(dsl_pool_config_held(dp)); 853248571Smm 854228103Smm /* tosnap must be a snapshot */ 855286575Smav if (!ds->ds_is_snapshot) 856249195Smm return (SET_ERROR(EINVAL)); 857228103Smm 858284301Savg /* fromsnap, if provided, must be a snapshot */ 859286575Smav if (fromds != NULL && !fromds->ds_is_snapshot) 860284301Savg return (SET_ERROR(EINVAL)); 861284301Savg 862248571Smm /* 863248571Smm * fromsnap must be an earlier snapshot from the same fs as tosnap, 864248571Smm * or the origin's fs. 865248571Smm */ 866260183Sdelphij if (fromds != NULL && !dsl_dataset_is_before(ds, fromds, 0)) 867249195Smm return (SET_ERROR(EXDEV)); 868228103Smm 869228103Smm /* Get uncompressed size estimate of changed data. */ 870228103Smm if (fromds == NULL) { 871275782Sdelphij size = dsl_dataset_phys(ds)->ds_uncompressed_bytes; 872228103Smm } else { 873228103Smm uint64_t used, comp; 874228103Smm err = dsl_dataset_space_written(fromds, ds, 875228103Smm &used, &comp, &size); 876248571Smm if (err != 0) 877228103Smm return (err); 878228103Smm } 879228103Smm 880228103Smm /* 881228103Smm * Assume that space (both on-disk and in-stream) is dominated by 882228103Smm * data. We will adjust for indirect blocks and the copies property, 883228103Smm * but ignore per-object space used (eg, dnodes and DRR_OBJECT records). 884228103Smm */ 885228103Smm 886228103Smm /* 887228103Smm * Subtract out approximate space used by indirect blocks. 888228103Smm * Assume most space is used by data blocks (non-indirect, non-dnode). 889228103Smm * Assume all blocks are recordsize. Assume ditto blocks and 890228103Smm * internal fragmentation counter out compression. 891228103Smm * 892228103Smm * Therefore, space used by indirect blocks is sizeof(blkptr_t) per 893228103Smm * block, which we observe in practice. 894228103Smm */ 895228103Smm uint64_t recordsize; 896248571Smm err = dsl_prop_get_int_ds(ds, "recordsize", &recordsize); 897248571Smm if (err != 0) 898228103Smm return (err); 899228103Smm size -= size / recordsize * sizeof (blkptr_t); 900228103Smm 901228103Smm /* Add in the space for the record associated with each block. */ 902228103Smm size += size / recordsize * sizeof (dmu_replay_record_t); 903228103Smm 904228103Smm *sizep = size; 905228103Smm 906228103Smm return (0); 907228103Smm} 908228103Smm 909248571Smmtypedef struct dmu_recv_begin_arg { 910248571Smm const char *drba_origin; 911248571Smm dmu_recv_cookie_t *drba_cookie; 912248571Smm cred_t *drba_cred; 913253820Sdelphij uint64_t drba_snapobj; 914248571Smm} dmu_recv_begin_arg_t; 915168404Spjd 916168404Spjdstatic int 917248571Smmrecv_begin_check_existing_impl(dmu_recv_begin_arg_t *drba, dsl_dataset_t *ds, 918248571Smm uint64_t fromguid) 919168404Spjd{ 920185029Spjd uint64_t val; 921248571Smm int error; 922248571Smm dsl_pool_t *dp = ds->ds_dir->dd_pool; 923185029Spjd 924248571Smm /* temporary clone name must not exist */ 925248571Smm error = zap_lookup(dp->dp_meta_objset, 926275782Sdelphij dsl_dir_phys(ds->ds_dir)->dd_child_dir_zapobj, recv_clone_name, 927248571Smm 8, 1, &val); 928248571Smm if (error != ENOENT) 929248571Smm return (error == 0 ? EBUSY : error); 930248571Smm 931219089Spjd /* new snapshot name must not exist */ 932248571Smm error = zap_lookup(dp->dp_meta_objset, 933275782Sdelphij dsl_dataset_phys(ds)->ds_snapnames_zapobj, 934275782Sdelphij drba->drba_cookie->drc_tosnap, 8, 1, &val); 935248571Smm if (error != ENOENT) 936248571Smm return (error == 0 ? EEXIST : error); 937168404Spjd 938264835Sdelphij /* 939264835Sdelphij * Check snapshot limit before receiving. We'll recheck again at the 940264835Sdelphij * end, but might as well abort before receiving if we're already over 941264835Sdelphij * the limit. 942264835Sdelphij * 943264835Sdelphij * Note that we do not check the file system limit with 944264835Sdelphij * dsl_dir_fscount_check because the temporary %clones don't count 945264835Sdelphij * against that limit. 946264835Sdelphij */ 947264835Sdelphij error = dsl_fs_ss_limit_check(ds->ds_dir, 1, ZFS_PROP_SNAPSHOT_LIMIT, 948264835Sdelphij NULL, drba->drba_cred); 949264835Sdelphij if (error != 0) 950264835Sdelphij return (error); 951264835Sdelphij 952248571Smm if (fromguid != 0) { 953253820Sdelphij dsl_dataset_t *snap; 954275782Sdelphij uint64_t obj = dsl_dataset_phys(ds)->ds_prev_snap_obj; 955253820Sdelphij 956253820Sdelphij /* Find snapshot in this dir that matches fromguid. */ 957253820Sdelphij while (obj != 0) { 958253820Sdelphij error = dsl_dataset_hold_obj(dp, obj, FTAG, 959253820Sdelphij &snap); 960253820Sdelphij if (error != 0) 961253820Sdelphij return (SET_ERROR(ENODEV)); 962253820Sdelphij if (snap->ds_dir != ds->ds_dir) { 963253820Sdelphij dsl_dataset_rele(snap, FTAG); 964253820Sdelphij return (SET_ERROR(ENODEV)); 965253820Sdelphij } 966275782Sdelphij if (dsl_dataset_phys(snap)->ds_guid == fromguid) 967253820Sdelphij break; 968275782Sdelphij obj = dsl_dataset_phys(snap)->ds_prev_snap_obj; 969253820Sdelphij dsl_dataset_rele(snap, FTAG); 970253820Sdelphij } 971253820Sdelphij if (obj == 0) 972249195Smm return (SET_ERROR(ENODEV)); 973168404Spjd 974253820Sdelphij if (drba->drba_cookie->drc_force) { 975253820Sdelphij drba->drba_snapobj = obj; 976253820Sdelphij } else { 977253820Sdelphij /* 978253820Sdelphij * If we are not forcing, there must be no 979253820Sdelphij * changes since fromsnap. 980253820Sdelphij */ 981253820Sdelphij if (dsl_dataset_modified_since_snap(ds, snap)) { 982219089Spjd dsl_dataset_rele(snap, FTAG); 983253820Sdelphij return (SET_ERROR(ETXTBSY)); 984219089Spjd } 985253820Sdelphij drba->drba_snapobj = ds->ds_prev->ds_object; 986219089Spjd } 987253820Sdelphij 988253820Sdelphij dsl_dataset_rele(snap, FTAG); 989219089Spjd } else { 990283525Savg /* if full, then must be forced */ 991283525Savg if (!drba->drba_cookie->drc_force) 992283525Savg return (SET_ERROR(EEXIST)); 993283525Savg /* start from $ORIGIN@$ORIGIN, if supported */ 994283525Savg drba->drba_snapobj = dp->dp_origin_snap != NULL ? 995283525Savg dp->dp_origin_snap->ds_object : 0; 996219089Spjd } 997219089Spjd 998248571Smm return (0); 999168404Spjd 1000168404Spjd} 1001168404Spjd 1002248571Smmstatic int 1003248571Smmdmu_recv_begin_check(void *arg, dmu_tx_t *tx) 1004248571Smm{ 1005248571Smm dmu_recv_begin_arg_t *drba = arg; 1006248571Smm dsl_pool_t *dp = dmu_tx_pool(tx); 1007248571Smm struct drr_begin *drrb = drba->drba_cookie->drc_drrb; 1008248571Smm uint64_t fromguid = drrb->drr_fromguid; 1009248571Smm int flags = drrb->drr_flags; 1010248571Smm int error; 1011268075Sdelphij uint64_t featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo); 1012248571Smm dsl_dataset_t *ds; 1013248571Smm const char *tofs = drba->drba_cookie->drc_tofs; 1014248571Smm 1015248571Smm /* already checked */ 1016248571Smm ASSERT3U(drrb->drr_magic, ==, DMU_BACKUP_MAGIC); 1017248571Smm 1018248571Smm if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) == 1019248571Smm DMU_COMPOUNDSTREAM || 1020248571Smm drrb->drr_type >= DMU_OST_NUMTYPES || 1021248571Smm ((flags & DRR_FLAG_CLONE) && drba->drba_origin == NULL)) 1022249195Smm return (SET_ERROR(EINVAL)); 1023248571Smm 1024248571Smm /* Verify pool version supports SA if SA_SPILL feature set */ 1025268075Sdelphij if ((featureflags & DMU_BACKUP_FEATURE_SA_SPILL) && 1026268075Sdelphij spa_version(dp->dp_spa) < SPA_VERSION_SA) 1027249195Smm return (SET_ERROR(ENOTSUP)); 1028248571Smm 1029268075Sdelphij /* 1030268075Sdelphij * The receiving code doesn't know how to translate a WRITE_EMBEDDED 1031268075Sdelphij * record to a plan WRITE record, so the pool must have the 1032268075Sdelphij * EMBEDDED_DATA feature enabled if the stream has WRITE_EMBEDDED 1033268075Sdelphij * records. Same with WRITE_EMBEDDED records that use LZ4 compression. 1034268075Sdelphij */ 1035268075Sdelphij if ((featureflags & DMU_BACKUP_FEATURE_EMBED_DATA) && 1036268075Sdelphij !spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_EMBEDDED_DATA)) 1037268075Sdelphij return (SET_ERROR(ENOTSUP)); 1038268075Sdelphij if ((featureflags & DMU_BACKUP_FEATURE_EMBED_DATA_LZ4) && 1039268075Sdelphij !spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_LZ4_COMPRESS)) 1040268075Sdelphij return (SET_ERROR(ENOTSUP)); 1041268075Sdelphij 1042274337Sdelphij /* 1043274337Sdelphij * The receiving code doesn't know how to translate large blocks 1044274337Sdelphij * to smaller ones, so the pool must have the LARGE_BLOCKS 1045274337Sdelphij * feature enabled if the stream has LARGE_BLOCKS. 1046274337Sdelphij */ 1047274337Sdelphij if ((featureflags & DMU_BACKUP_FEATURE_LARGE_BLOCKS) && 1048274337Sdelphij !spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_LARGE_BLOCKS)) 1049274337Sdelphij return (SET_ERROR(ENOTSUP)); 1050274337Sdelphij 1051248571Smm error = dsl_dataset_hold(dp, tofs, FTAG, &ds); 1052248571Smm if (error == 0) { 1053248571Smm /* target fs already exists; recv into temp clone */ 1054248571Smm 1055248571Smm /* Can't recv a clone into an existing fs */ 1056248571Smm if (flags & DRR_FLAG_CLONE) { 1057248571Smm dsl_dataset_rele(ds, FTAG); 1058249195Smm return (SET_ERROR(EINVAL)); 1059248571Smm } 1060248571Smm 1061248571Smm error = recv_begin_check_existing_impl(drba, ds, fromguid); 1062248571Smm dsl_dataset_rele(ds, FTAG); 1063248571Smm } else if (error == ENOENT) { 1064248571Smm /* target fs does not exist; must be a full backup or clone */ 1065248571Smm char buf[MAXNAMELEN]; 1066248571Smm 1067248571Smm /* 1068248571Smm * If it's a non-clone incremental, we are missing the 1069248571Smm * target fs, so fail the recv. 1070248571Smm */ 1071248571Smm if (fromguid != 0 && !(flags & DRR_FLAG_CLONE)) 1072249195Smm return (SET_ERROR(ENOENT)); 1073248571Smm 1074248571Smm /* Open the parent of tofs */ 1075248571Smm ASSERT3U(strlen(tofs), <, MAXNAMELEN); 1076248571Smm (void) strlcpy(buf, tofs, strrchr(tofs, '/') - tofs + 1); 1077248571Smm error = dsl_dataset_hold(dp, buf, FTAG, &ds); 1078248571Smm if (error != 0) 1079248571Smm return (error); 1080248571Smm 1081264835Sdelphij /* 1082264835Sdelphij * Check filesystem and snapshot limits before receiving. We'll 1083264835Sdelphij * recheck snapshot limits again at the end (we create the 1084264835Sdelphij * filesystems and increment those counts during begin_sync). 1085264835Sdelphij */ 1086264835Sdelphij error = dsl_fs_ss_limit_check(ds->ds_dir, 1, 1087264835Sdelphij ZFS_PROP_FILESYSTEM_LIMIT, NULL, drba->drba_cred); 1088264835Sdelphij if (error != 0) { 1089264835Sdelphij dsl_dataset_rele(ds, FTAG); 1090264835Sdelphij return (error); 1091264835Sdelphij } 1092264835Sdelphij 1093264835Sdelphij error = dsl_fs_ss_limit_check(ds->ds_dir, 1, 1094264835Sdelphij ZFS_PROP_SNAPSHOT_LIMIT, NULL, drba->drba_cred); 1095264835Sdelphij if (error != 0) { 1096264835Sdelphij dsl_dataset_rele(ds, FTAG); 1097264835Sdelphij return (error); 1098264835Sdelphij } 1099264835Sdelphij 1100248571Smm if (drba->drba_origin != NULL) { 1101248571Smm dsl_dataset_t *origin; 1102248571Smm error = dsl_dataset_hold(dp, drba->drba_origin, 1103248571Smm FTAG, &origin); 1104248571Smm if (error != 0) { 1105248571Smm dsl_dataset_rele(ds, FTAG); 1106248571Smm return (error); 1107248571Smm } 1108286575Smav if (!origin->ds_is_snapshot) { 1109248571Smm dsl_dataset_rele(origin, FTAG); 1110248571Smm dsl_dataset_rele(ds, FTAG); 1111249195Smm return (SET_ERROR(EINVAL)); 1112248571Smm } 1113275782Sdelphij if (dsl_dataset_phys(origin)->ds_guid != fromguid) { 1114248571Smm dsl_dataset_rele(origin, FTAG); 1115248571Smm dsl_dataset_rele(ds, FTAG); 1116249195Smm return (SET_ERROR(ENODEV)); 1117248571Smm } 1118248571Smm dsl_dataset_rele(origin, FTAG); 1119248571Smm } 1120248571Smm dsl_dataset_rele(ds, FTAG); 1121248571Smm error = 0; 1122248571Smm } 1123248571Smm return (error); 1124248571Smm} 1125248571Smm 1126168404Spjdstatic void 1127248571Smmdmu_recv_begin_sync(void *arg, dmu_tx_t *tx) 1128168404Spjd{ 1129248571Smm dmu_recv_begin_arg_t *drba = arg; 1130248571Smm dsl_pool_t *dp = dmu_tx_pool(tx); 1131248571Smm struct drr_begin *drrb = drba->drba_cookie->drc_drrb; 1132248571Smm const char *tofs = drba->drba_cookie->drc_tofs; 1133248571Smm dsl_dataset_t *ds, *newds; 1134185029Spjd uint64_t dsobj; 1135248571Smm int error; 1136248571Smm uint64_t crflags; 1137168404Spjd 1138248571Smm crflags = (drrb->drr_flags & DRR_FLAG_CI_DATA) ? 1139248571Smm DS_FLAG_CI_DATASET : 0; 1140168404Spjd 1141248571Smm error = dsl_dataset_hold(dp, tofs, FTAG, &ds); 1142248571Smm if (error == 0) { 1143248571Smm /* create temporary clone */ 1144253820Sdelphij dsl_dataset_t *snap = NULL; 1145253820Sdelphij if (drba->drba_snapobj != 0) { 1146253820Sdelphij VERIFY0(dsl_dataset_hold_obj(dp, 1147253820Sdelphij drba->drba_snapobj, FTAG, &snap)); 1148253820Sdelphij } 1149248571Smm dsobj = dsl_dataset_create_sync(ds->ds_dir, recv_clone_name, 1150253820Sdelphij snap, crflags, drba->drba_cred, tx); 1151282632Savg if (drba->drba_snapobj != 0) 1152282632Savg dsl_dataset_rele(snap, FTAG); 1153248571Smm dsl_dataset_rele(ds, FTAG); 1154248571Smm } else { 1155248571Smm dsl_dir_t *dd; 1156248571Smm const char *tail; 1157248571Smm dsl_dataset_t *origin = NULL; 1158248571Smm 1159248571Smm VERIFY0(dsl_dir_hold(dp, tofs, FTAG, &dd, &tail)); 1160248571Smm 1161248571Smm if (drba->drba_origin != NULL) { 1162248571Smm VERIFY0(dsl_dataset_hold(dp, drba->drba_origin, 1163248571Smm FTAG, &origin)); 1164248571Smm } 1165248571Smm 1166248571Smm /* Create new dataset. */ 1167248571Smm dsobj = dsl_dataset_create_sync(dd, 1168248571Smm strrchr(tofs, '/') + 1, 1169248571Smm origin, crflags, drba->drba_cred, tx); 1170248571Smm if (origin != NULL) 1171248571Smm dsl_dataset_rele(origin, FTAG); 1172248571Smm dsl_dir_rele(dd, FTAG); 1173248571Smm drba->drba_cookie->drc_newfs = B_TRUE; 1174248571Smm } 1175248571Smm VERIFY0(dsl_dataset_own_obj(dp, dsobj, dmu_recv_tag, &newds)); 1176248571Smm 1177274337Sdelphij if ((DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) & 1178274337Sdelphij DMU_BACKUP_FEATURE_LARGE_BLOCKS) && 1179274337Sdelphij !newds->ds_large_blocks) { 1180274337Sdelphij dsl_dataset_activate_large_blocks_sync_impl(dsobj, tx); 1181274337Sdelphij newds->ds_large_blocks = B_TRUE; 1182274337Sdelphij } 1183274337Sdelphij 1184248571Smm dmu_buf_will_dirty(newds->ds_dbuf, tx); 1185275782Sdelphij dsl_dataset_phys(newds)->ds_flags |= DS_FLAG_INCONSISTENT; 1186248571Smm 1187219089Spjd /* 1188219089Spjd * If we actually created a non-clone, we need to create the 1189219089Spjd * objset in our new dataset. 1190219089Spjd */ 1191248571Smm if (BP_IS_HOLE(dsl_dataset_get_blkptr(newds))) { 1192219089Spjd (void) dmu_objset_create_impl(dp->dp_spa, 1193248571Smm newds, dsl_dataset_get_blkptr(newds), drrb->drr_type, tx); 1194219089Spjd } 1195168404Spjd 1196248571Smm drba->drba_cookie->drc_ds = newds; 1197185029Spjd 1198248571Smm spa_history_log_internal_ds(newds, "receive", tx, ""); 1199168404Spjd} 1200168404Spjd 1201185029Spjd/* 1202185029Spjd * NB: callers *MUST* call dmu_recv_stream() if dmu_recv_begin() 1203185029Spjd * succeeds; otherwise we will leak the holds on the datasets. 1204185029Spjd */ 1205185029Spjdint 1206248571Smmdmu_recv_begin(char *tofs, char *tosnap, struct drr_begin *drrb, 1207248571Smm boolean_t force, char *origin, dmu_recv_cookie_t *drc) 1208168404Spjd{ 1209248571Smm dmu_recv_begin_arg_t drba = { 0 }; 1210248571Smm dmu_replay_record_t *drr; 1211168404Spjd 1212185029Spjd bzero(drc, sizeof (dmu_recv_cookie_t)); 1213185029Spjd drc->drc_drrb = drrb; 1214185029Spjd drc->drc_tosnap = tosnap; 1215248571Smm drc->drc_tofs = tofs; 1216185029Spjd drc->drc_force = force; 1217264835Sdelphij drc->drc_cred = CRED(); 1218168404Spjd 1219248571Smm if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) 1220248571Smm drc->drc_byteswap = B_TRUE; 1221248571Smm else if (drrb->drr_magic != DMU_BACKUP_MAGIC) 1222249195Smm return (SET_ERROR(EINVAL)); 1223168404Spjd 1224248571Smm drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP); 1225248571Smm drr->drr_type = DRR_BEGIN; 1226248571Smm drr->drr_u.drr_begin = *drc->drc_drrb; 1227248571Smm if (drc->drc_byteswap) { 1228248571Smm fletcher_4_incremental_byteswap(drr, 1229248571Smm sizeof (dmu_replay_record_t), &drc->drc_cksum); 1230248571Smm } else { 1231248571Smm fletcher_4_incremental_native(drr, 1232248571Smm sizeof (dmu_replay_record_t), &drc->drc_cksum); 1233248571Smm } 1234248571Smm kmem_free(drr, sizeof (dmu_replay_record_t)); 1235219089Spjd 1236248571Smm if (drc->drc_byteswap) { 1237248571Smm drrb->drr_magic = BSWAP_64(drrb->drr_magic); 1238248571Smm drrb->drr_versioninfo = BSWAP_64(drrb->drr_versioninfo); 1239248571Smm drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time); 1240248571Smm drrb->drr_type = BSWAP_32(drrb->drr_type); 1241248571Smm drrb->drr_toguid = BSWAP_64(drrb->drr_toguid); 1242248571Smm drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid); 1243248571Smm } 1244168404Spjd 1245248571Smm drba.drba_origin = origin; 1246248571Smm drba.drba_cookie = drc; 1247248571Smm drba.drba_cred = CRED(); 1248219089Spjd 1249248571Smm return (dsl_sync_task(tofs, dmu_recv_begin_check, dmu_recv_begin_sync, 1250268473Sdelphij &drba, 5, ZFS_SPACE_CHECK_NORMAL)); 1251168404Spjd} 1252168404Spjd 1253185029Spjdstruct restorearg { 1254185029Spjd int err; 1255248571Smm boolean_t byteswap; 1256185029Spjd kthread_t *td; 1257185029Spjd struct file *fp; 1258185029Spjd char *buf; 1259185029Spjd uint64_t voff; 1260185029Spjd int bufsize; /* amount of memory allocated for buf */ 1261185029Spjd zio_cksum_t cksum; 1262219089Spjd avl_tree_t *guid_to_ds_map; 1263185029Spjd}; 1264185029Spjd 1265219089Spjdtypedef struct guid_map_entry { 1266219089Spjd uint64_t guid; 1267219089Spjd dsl_dataset_t *gme_ds; 1268219089Spjd avl_node_t avlnode; 1269219089Spjd} guid_map_entry_t; 1270219089Spjd 1271168404Spjdstatic int 1272219089Spjdguid_compare(const void *arg1, const void *arg2) 1273168404Spjd{ 1274219089Spjd const guid_map_entry_t *gmep1 = arg1; 1275219089Spjd const guid_map_entry_t *gmep2 = arg2; 1276219089Spjd 1277219089Spjd if (gmep1->guid < gmep2->guid) 1278219089Spjd return (-1); 1279219089Spjd else if (gmep1->guid > gmep2->guid) 1280219089Spjd return (1); 1281219089Spjd return (0); 1282219089Spjd} 1283219089Spjd 1284219089Spjdstatic void 1285219089Spjdfree_guid_map_onexit(void *arg) 1286219089Spjd{ 1287219089Spjd avl_tree_t *ca = arg; 1288219089Spjd void *cookie = NULL; 1289219089Spjd guid_map_entry_t *gmep; 1290219089Spjd 1291219089Spjd while ((gmep = avl_destroy_nodes(ca, &cookie)) != NULL) { 1292248571Smm dsl_dataset_long_rele(gmep->gme_ds, gmep); 1293249196Smm dsl_dataset_rele(gmep->gme_ds, gmep); 1294219089Spjd kmem_free(gmep, sizeof (guid_map_entry_t)); 1295219089Spjd } 1296219089Spjd avl_destroy(ca); 1297219089Spjd kmem_free(ca, sizeof (avl_tree_t)); 1298219089Spjd} 1299219089Spjd 1300219089Spjdstatic int 1301219089Spjdrestore_bytes(struct restorearg *ra, void *buf, int len, off_t off, ssize_t *resid) 1302219089Spjd{ 1303168404Spjd struct uio auio; 1304168404Spjd struct iovec aiov; 1305168404Spjd int error; 1306168404Spjd 1307168404Spjd aiov.iov_base = buf; 1308168404Spjd aiov.iov_len = len; 1309168404Spjd auio.uio_iov = &aiov; 1310168404Spjd auio.uio_iovcnt = 1; 1311168404Spjd auio.uio_resid = len; 1312169170Spjd auio.uio_segflg = UIO_SYSSPACE; 1313168404Spjd auio.uio_rw = UIO_READ; 1314168404Spjd auio.uio_offset = off; 1315168404Spjd auio.uio_td = ra->td; 1316168404Spjd#ifdef _KERNEL 1317168404Spjd error = fo_read(ra->fp, &auio, ra->td->td_ucred, FOF_OFFSET, ra->td); 1318168404Spjd#else 1319168404Spjd fprintf(stderr, "%s: returning EOPNOTSUPP\n", __func__); 1320168404Spjd error = EOPNOTSUPP; 1321168404Spjd#endif 1322168404Spjd *resid = auio.uio_resid; 1323168404Spjd return (error); 1324168404Spjd} 1325168404Spjd 1326168404Spjdstatic void * 1327272601Sdelphijrestore_read(struct restorearg *ra, int len, char *buf) 1328168404Spjd{ 1329185029Spjd int done = 0; 1330168404Spjd 1331272601Sdelphij if (buf == NULL) 1332272601Sdelphij buf = ra->buf; 1333272601Sdelphij 1334168404Spjd /* some things will require 8-byte alignment, so everything must */ 1335240415Smm ASSERT0(len % 8); 1336274337Sdelphij ASSERT3U(len, <=, ra->bufsize); 1337168404Spjd 1338185029Spjd while (done < len) { 1339219089Spjd ssize_t resid; 1340168404Spjd 1341272601Sdelphij ra->err = restore_bytes(ra, buf + done, 1342185029Spjd len - done, ra->voff, &resid); 1343168404Spjd 1344185029Spjd if (resid == len - done) 1345249195Smm ra->err = SET_ERROR(EINVAL); 1346185029Spjd ra->voff += len - done - resid; 1347185029Spjd done = len - resid; 1348248571Smm if (ra->err != 0) 1349168404Spjd return (NULL); 1350168404Spjd } 1351168404Spjd 1352185029Spjd ASSERT3U(done, ==, len); 1353168404Spjd if (ra->byteswap) 1354272601Sdelphij fletcher_4_incremental_byteswap(buf, len, &ra->cksum); 1355168404Spjd else 1356272601Sdelphij fletcher_4_incremental_native(buf, len, &ra->cksum); 1357272601Sdelphij return (buf); 1358168404Spjd} 1359168404Spjd 1360168404Spjdstatic void 1361168404Spjdbackup_byteswap(dmu_replay_record_t *drr) 1362168404Spjd{ 1363168404Spjd#define DO64(X) (drr->drr_u.X = BSWAP_64(drr->drr_u.X)) 1364168404Spjd#define DO32(X) (drr->drr_u.X = BSWAP_32(drr->drr_u.X)) 1365168404Spjd drr->drr_type = BSWAP_32(drr->drr_type); 1366185029Spjd drr->drr_payloadlen = BSWAP_32(drr->drr_payloadlen); 1367168404Spjd switch (drr->drr_type) { 1368168404Spjd case DRR_BEGIN: 1369168404Spjd DO64(drr_begin.drr_magic); 1370219089Spjd DO64(drr_begin.drr_versioninfo); 1371168404Spjd DO64(drr_begin.drr_creation_time); 1372168404Spjd DO32(drr_begin.drr_type); 1373185029Spjd DO32(drr_begin.drr_flags); 1374168404Spjd DO64(drr_begin.drr_toguid); 1375168404Spjd DO64(drr_begin.drr_fromguid); 1376168404Spjd break; 1377168404Spjd case DRR_OBJECT: 1378168404Spjd DO64(drr_object.drr_object); 1379168404Spjd DO32(drr_object.drr_type); 1380168404Spjd DO32(drr_object.drr_bonustype); 1381168404Spjd DO32(drr_object.drr_blksz); 1382168404Spjd DO32(drr_object.drr_bonuslen); 1383219089Spjd DO64(drr_object.drr_toguid); 1384168404Spjd break; 1385168404Spjd case DRR_FREEOBJECTS: 1386168404Spjd DO64(drr_freeobjects.drr_firstobj); 1387168404Spjd DO64(drr_freeobjects.drr_numobjs); 1388219089Spjd DO64(drr_freeobjects.drr_toguid); 1389168404Spjd break; 1390168404Spjd case DRR_WRITE: 1391168404Spjd DO64(drr_write.drr_object); 1392168404Spjd DO32(drr_write.drr_type); 1393168404Spjd DO64(drr_write.drr_offset); 1394168404Spjd DO64(drr_write.drr_length); 1395219089Spjd DO64(drr_write.drr_toguid); 1396219089Spjd DO64(drr_write.drr_key.ddk_cksum.zc_word[0]); 1397219089Spjd DO64(drr_write.drr_key.ddk_cksum.zc_word[1]); 1398219089Spjd DO64(drr_write.drr_key.ddk_cksum.zc_word[2]); 1399219089Spjd DO64(drr_write.drr_key.ddk_cksum.zc_word[3]); 1400219089Spjd DO64(drr_write.drr_key.ddk_prop); 1401168404Spjd break; 1402219089Spjd case DRR_WRITE_BYREF: 1403219089Spjd DO64(drr_write_byref.drr_object); 1404219089Spjd DO64(drr_write_byref.drr_offset); 1405219089Spjd DO64(drr_write_byref.drr_length); 1406219089Spjd DO64(drr_write_byref.drr_toguid); 1407219089Spjd DO64(drr_write_byref.drr_refguid); 1408219089Spjd DO64(drr_write_byref.drr_refobject); 1409219089Spjd DO64(drr_write_byref.drr_refoffset); 1410219089Spjd DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[0]); 1411219089Spjd DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[1]); 1412219089Spjd DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[2]); 1413219089Spjd DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[3]); 1414219089Spjd DO64(drr_write_byref.drr_key.ddk_prop); 1415219089Spjd break; 1416268075Sdelphij case DRR_WRITE_EMBEDDED: 1417268075Sdelphij DO64(drr_write_embedded.drr_object); 1418268075Sdelphij DO64(drr_write_embedded.drr_offset); 1419268075Sdelphij DO64(drr_write_embedded.drr_length); 1420268075Sdelphij DO64(drr_write_embedded.drr_toguid); 1421268075Sdelphij DO32(drr_write_embedded.drr_lsize); 1422268075Sdelphij DO32(drr_write_embedded.drr_psize); 1423268075Sdelphij break; 1424168404Spjd case DRR_FREE: 1425168404Spjd DO64(drr_free.drr_object); 1426168404Spjd DO64(drr_free.drr_offset); 1427168404Spjd DO64(drr_free.drr_length); 1428219089Spjd DO64(drr_free.drr_toguid); 1429168404Spjd break; 1430219089Spjd case DRR_SPILL: 1431219089Spjd DO64(drr_spill.drr_object); 1432219089Spjd DO64(drr_spill.drr_length); 1433219089Spjd DO64(drr_spill.drr_toguid); 1434219089Spjd break; 1435168404Spjd case DRR_END: 1436168404Spjd DO64(drr_end.drr_checksum.zc_word[0]); 1437168404Spjd DO64(drr_end.drr_checksum.zc_word[1]); 1438168404Spjd DO64(drr_end.drr_checksum.zc_word[2]); 1439168404Spjd DO64(drr_end.drr_checksum.zc_word[3]); 1440219089Spjd DO64(drr_end.drr_toguid); 1441168404Spjd break; 1442168404Spjd } 1443168404Spjd#undef DO64 1444168404Spjd#undef DO32 1445168404Spjd} 1446168404Spjd 1447272810Sdelphijstatic inline uint8_t 1448272810Sdelphijdeduce_nblkptr(dmu_object_type_t bonus_type, uint64_t bonus_size) 1449272810Sdelphij{ 1450272810Sdelphij if (bonus_type == DMU_OT_SA) { 1451272810Sdelphij return (1); 1452272810Sdelphij } else { 1453272810Sdelphij return (1 + 1454272810Sdelphij ((DN_MAX_BONUSLEN - bonus_size) >> SPA_BLKPTRSHIFT)); 1455272810Sdelphij } 1456272810Sdelphij} 1457272810Sdelphij 1458168404Spjdstatic int 1459168404Spjdrestore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro) 1460168404Spjd{ 1461272810Sdelphij dmu_object_info_t doi; 1462168404Spjd dmu_tx_t *tx; 1463200727Sdelphij void *data = NULL; 1464272810Sdelphij uint64_t object; 1465272810Sdelphij int err; 1466168404Spjd 1467168404Spjd if (drro->drr_type == DMU_OT_NONE || 1468236884Smm !DMU_OT_IS_VALID(drro->drr_type) || 1469236884Smm !DMU_OT_IS_VALID(drro->drr_bonustype) || 1470219089Spjd drro->drr_checksumtype >= ZIO_CHECKSUM_FUNCTIONS || 1471168404Spjd drro->drr_compress >= ZIO_COMPRESS_FUNCTIONS || 1472168404Spjd P2PHASE(drro->drr_blksz, SPA_MINBLOCKSIZE) || 1473168404Spjd drro->drr_blksz < SPA_MINBLOCKSIZE || 1474274337Sdelphij drro->drr_blksz > spa_maxblocksize(dmu_objset_spa(os)) || 1475168404Spjd drro->drr_bonuslen > DN_MAX_BONUSLEN) { 1476249195Smm return (SET_ERROR(EINVAL)); 1477168404Spjd } 1478168404Spjd 1479272810Sdelphij err = dmu_object_info(os, drro->drr_object, &doi); 1480168404Spjd 1481200726Sdelphij if (err != 0 && err != ENOENT) 1482249195Smm return (SET_ERROR(EINVAL)); 1483272810Sdelphij object = err == 0 ? drro->drr_object : DMU_NEW_OBJECT; 1484200726Sdelphij 1485201756Sdelphij if (drro->drr_bonuslen) { 1486272601Sdelphij data = restore_read(ra, P2ROUNDUP(drro->drr_bonuslen, 8), NULL); 1487248571Smm if (ra->err != 0) 1488201756Sdelphij return (ra->err); 1489201756Sdelphij } 1490201756Sdelphij 1491272810Sdelphij /* 1492272810Sdelphij * If we are losing blkptrs or changing the block size this must 1493272810Sdelphij * be a new file instance. We must clear out the previous file 1494272810Sdelphij * contents before we can change this type of metadata in the dnode. 1495272810Sdelphij */ 1496272810Sdelphij if (err == 0) { 1497272810Sdelphij int nblkptr; 1498272810Sdelphij 1499272810Sdelphij nblkptr = deduce_nblkptr(drro->drr_bonustype, 1500272810Sdelphij drro->drr_bonuslen); 1501272810Sdelphij 1502272810Sdelphij if (drro->drr_blksz != doi.doi_data_block_size || 1503272810Sdelphij nblkptr < doi.doi_nblkptr) { 1504272810Sdelphij err = dmu_free_long_range(os, drro->drr_object, 1505272810Sdelphij 0, DMU_OBJECT_END); 1506272810Sdelphij if (err != 0) 1507272810Sdelphij return (SET_ERROR(EINVAL)); 1508272810Sdelphij } 1509272810Sdelphij } 1510272810Sdelphij 1511272810Sdelphij tx = dmu_tx_create(os); 1512272810Sdelphij dmu_tx_hold_bonus(tx, object); 1513272810Sdelphij err = dmu_tx_assign(tx, TXG_WAIT); 1514272810Sdelphij if (err != 0) { 1515272810Sdelphij dmu_tx_abort(tx); 1516272810Sdelphij return (err); 1517272810Sdelphij } 1518272810Sdelphij 1519272810Sdelphij if (object == DMU_NEW_OBJECT) { 1520168404Spjd /* currently free, want to be allocated */ 1521168404Spjd err = dmu_object_claim(os, drro->drr_object, 1522168404Spjd drro->drr_type, drro->drr_blksz, 1523168404Spjd drro->drr_bonustype, drro->drr_bonuslen, tx); 1524272810Sdelphij } else if (drro->drr_type != doi.doi_type || 1525272810Sdelphij drro->drr_blksz != doi.doi_data_block_size || 1526272810Sdelphij drro->drr_bonustype != doi.doi_bonus_type || 1527272810Sdelphij drro->drr_bonuslen != doi.doi_bonus_size) { 1528272810Sdelphij /* currently allocated, but with different properties */ 1529168404Spjd err = dmu_object_reclaim(os, drro->drr_object, 1530168404Spjd drro->drr_type, drro->drr_blksz, 1531272810Sdelphij drro->drr_bonustype, drro->drr_bonuslen, tx); 1532168404Spjd } 1533248571Smm if (err != 0) { 1534272810Sdelphij dmu_tx_commit(tx); 1535249195Smm return (SET_ERROR(EINVAL)); 1536219089Spjd } 1537200726Sdelphij 1538219089Spjd dmu_object_set_checksum(os, drro->drr_object, drro->drr_checksumtype, 1539219089Spjd tx); 1540168404Spjd dmu_object_set_compress(os, drro->drr_object, drro->drr_compress, tx); 1541168404Spjd 1542200727Sdelphij if (data != NULL) { 1543168404Spjd dmu_buf_t *db; 1544200727Sdelphij 1545168404Spjd VERIFY(0 == dmu_bonus_hold(os, drro->drr_object, FTAG, &db)); 1546168404Spjd dmu_buf_will_dirty(db, tx); 1547168404Spjd 1548185029Spjd ASSERT3U(db->db_size, >=, drro->drr_bonuslen); 1549185029Spjd bcopy(data, db->db_data, drro->drr_bonuslen); 1550168404Spjd if (ra->byteswap) { 1551236884Smm dmu_object_byteswap_t byteswap = 1552236884Smm DMU_OT_BYTESWAP(drro->drr_bonustype); 1553236884Smm dmu_ot_byteswap[byteswap].ob_func(db->db_data, 1554168404Spjd drro->drr_bonuslen); 1555168404Spjd } 1556168404Spjd dmu_buf_rele(db, FTAG); 1557168404Spjd } 1558168404Spjd dmu_tx_commit(tx); 1559168404Spjd return (0); 1560168404Spjd} 1561168404Spjd 1562168404Spjd/* ARGSUSED */ 1563168404Spjdstatic int 1564168404Spjdrestore_freeobjects(struct restorearg *ra, objset_t *os, 1565168404Spjd struct drr_freeobjects *drrfo) 1566168404Spjd{ 1567168404Spjd uint64_t obj; 1568168404Spjd 1569168404Spjd if (drrfo->drr_firstobj + drrfo->drr_numobjs < drrfo->drr_firstobj) 1570249195Smm return (SET_ERROR(EINVAL)); 1571168404Spjd 1572168404Spjd for (obj = drrfo->drr_firstobj; 1573168404Spjd obj < drrfo->drr_firstobj + drrfo->drr_numobjs; 1574168404Spjd (void) dmu_object_next(os, &obj, FALSE, 0)) { 1575168404Spjd int err; 1576168404Spjd 1577168404Spjd if (dmu_object_info(os, obj, NULL) != 0) 1578168404Spjd continue; 1579168404Spjd 1580254753Sdelphij err = dmu_free_long_object(os, obj); 1581248571Smm if (err != 0) 1582168404Spjd return (err); 1583168404Spjd } 1584168404Spjd return (0); 1585168404Spjd} 1586168404Spjd 1587168404Spjdstatic int 1588168404Spjdrestore_write(struct restorearg *ra, objset_t *os, 1589168404Spjd struct drr_write *drrw) 1590168404Spjd{ 1591168404Spjd dmu_tx_t *tx; 1592168404Spjd void *data; 1593168404Spjd int err; 1594168404Spjd 1595168404Spjd if (drrw->drr_offset + drrw->drr_length < drrw->drr_offset || 1596236884Smm !DMU_OT_IS_VALID(drrw->drr_type)) 1597249195Smm return (SET_ERROR(EINVAL)); 1598168404Spjd 1599168404Spjd if (dmu_object_info(os, drrw->drr_object, NULL) != 0) 1600249195Smm return (SET_ERROR(EINVAL)); 1601168404Spjd 1602272601Sdelphij dmu_buf_t *bonus; 1603272601Sdelphij if (dmu_bonus_hold(os, drrw->drr_object, FTAG, &bonus) != 0) 1604272601Sdelphij return (SET_ERROR(EINVAL)); 1605272601Sdelphij 1606272601Sdelphij arc_buf_t *abuf = dmu_request_arcbuf(bonus, drrw->drr_length); 1607272601Sdelphij 1608272601Sdelphij data = restore_read(ra, drrw->drr_length, abuf->b_data); 1609272601Sdelphij if (data == NULL) { 1610272601Sdelphij dmu_return_arcbuf(abuf); 1611272601Sdelphij dmu_buf_rele(bonus, FTAG); 1612272601Sdelphij return (ra->err); 1613272601Sdelphij } 1614272601Sdelphij 1615168404Spjd tx = dmu_tx_create(os); 1616168404Spjd 1617168404Spjd dmu_tx_hold_write(tx, drrw->drr_object, 1618168404Spjd drrw->drr_offset, drrw->drr_length); 1619168404Spjd err = dmu_tx_assign(tx, TXG_WAIT); 1620248571Smm if (err != 0) { 1621272601Sdelphij dmu_return_arcbuf(abuf); 1622272601Sdelphij dmu_buf_rele(bonus, FTAG); 1623168404Spjd dmu_tx_abort(tx); 1624168404Spjd return (err); 1625168404Spjd } 1626236884Smm if (ra->byteswap) { 1627236884Smm dmu_object_byteswap_t byteswap = 1628236884Smm DMU_OT_BYTESWAP(drrw->drr_type); 1629236884Smm dmu_ot_byteswap[byteswap].ob_func(data, drrw->drr_length); 1630236884Smm } 1631272601Sdelphij dmu_assign_arcbuf(bonus, drrw->drr_offset, abuf, tx); 1632168404Spjd dmu_tx_commit(tx); 1633272601Sdelphij dmu_buf_rele(bonus, FTAG); 1634168404Spjd return (0); 1635168404Spjd} 1636168404Spjd 1637219089Spjd/* 1638219089Spjd * Handle a DRR_WRITE_BYREF record. This record is used in dedup'ed 1639219089Spjd * streams to refer to a copy of the data that is already on the 1640219089Spjd * system because it came in earlier in the stream. This function 1641219089Spjd * finds the earlier copy of the data, and uses that copy instead of 1642219089Spjd * data from the stream to fulfill this write. 1643219089Spjd */ 1644219089Spjdstatic int 1645219089Spjdrestore_write_byref(struct restorearg *ra, objset_t *os, 1646219089Spjd struct drr_write_byref *drrwbr) 1647219089Spjd{ 1648219089Spjd dmu_tx_t *tx; 1649219089Spjd int err; 1650219089Spjd guid_map_entry_t gmesrch; 1651219089Spjd guid_map_entry_t *gmep; 1652268075Sdelphij avl_index_t where; 1653219089Spjd objset_t *ref_os = NULL; 1654219089Spjd dmu_buf_t *dbp; 1655219089Spjd 1656219089Spjd if (drrwbr->drr_offset + drrwbr->drr_length < drrwbr->drr_offset) 1657249195Smm return (SET_ERROR(EINVAL)); 1658219089Spjd 1659219089Spjd /* 1660219089Spjd * If the GUID of the referenced dataset is different from the 1661219089Spjd * GUID of the target dataset, find the referenced dataset. 1662219089Spjd */ 1663219089Spjd if (drrwbr->drr_toguid != drrwbr->drr_refguid) { 1664219089Spjd gmesrch.guid = drrwbr->drr_refguid; 1665219089Spjd if ((gmep = avl_find(ra->guid_to_ds_map, &gmesrch, 1666219089Spjd &where)) == NULL) { 1667249195Smm return (SET_ERROR(EINVAL)); 1668219089Spjd } 1669219089Spjd if (dmu_objset_from_ds(gmep->gme_ds, &ref_os)) 1670249195Smm return (SET_ERROR(EINVAL)); 1671219089Spjd } else { 1672219089Spjd ref_os = os; 1673219089Spjd } 1674219089Spjd 1675268075Sdelphij err = dmu_buf_hold(ref_os, drrwbr->drr_refobject, 1676268075Sdelphij drrwbr->drr_refoffset, FTAG, &dbp, DMU_READ_PREFETCH); 1677268075Sdelphij if (err != 0) 1678219089Spjd return (err); 1679219089Spjd 1680219089Spjd tx = dmu_tx_create(os); 1681219089Spjd 1682219089Spjd dmu_tx_hold_write(tx, drrwbr->drr_object, 1683219089Spjd drrwbr->drr_offset, drrwbr->drr_length); 1684219089Spjd err = dmu_tx_assign(tx, TXG_WAIT); 1685248571Smm if (err != 0) { 1686219089Spjd dmu_tx_abort(tx); 1687219089Spjd return (err); 1688219089Spjd } 1689219089Spjd dmu_write(os, drrwbr->drr_object, 1690219089Spjd drrwbr->drr_offset, drrwbr->drr_length, dbp->db_data, tx); 1691219089Spjd dmu_buf_rele(dbp, FTAG); 1692219089Spjd dmu_tx_commit(tx); 1693219089Spjd return (0); 1694219089Spjd} 1695219089Spjd 1696219089Spjdstatic int 1697268075Sdelphijrestore_write_embedded(struct restorearg *ra, objset_t *os, 1698268075Sdelphij struct drr_write_embedded *drrwnp) 1699268075Sdelphij{ 1700268075Sdelphij dmu_tx_t *tx; 1701268075Sdelphij int err; 1702268075Sdelphij void *data; 1703268075Sdelphij 1704268075Sdelphij if (drrwnp->drr_offset + drrwnp->drr_length < drrwnp->drr_offset) 1705268075Sdelphij return (EINVAL); 1706268075Sdelphij 1707268075Sdelphij if (drrwnp->drr_psize > BPE_PAYLOAD_SIZE) 1708268075Sdelphij return (EINVAL); 1709268075Sdelphij 1710268075Sdelphij if (drrwnp->drr_etype >= NUM_BP_EMBEDDED_TYPES) 1711268075Sdelphij return (EINVAL); 1712268075Sdelphij if (drrwnp->drr_compression >= ZIO_COMPRESS_FUNCTIONS) 1713268075Sdelphij return (EINVAL); 1714268075Sdelphij 1715272601Sdelphij data = restore_read(ra, P2ROUNDUP(drrwnp->drr_psize, 8), NULL); 1716268075Sdelphij if (data == NULL) 1717268075Sdelphij return (ra->err); 1718268075Sdelphij 1719268075Sdelphij tx = dmu_tx_create(os); 1720268075Sdelphij 1721268075Sdelphij dmu_tx_hold_write(tx, drrwnp->drr_object, 1722268075Sdelphij drrwnp->drr_offset, drrwnp->drr_length); 1723268075Sdelphij err = dmu_tx_assign(tx, TXG_WAIT); 1724268075Sdelphij if (err != 0) { 1725268075Sdelphij dmu_tx_abort(tx); 1726268075Sdelphij return (err); 1727268075Sdelphij } 1728268075Sdelphij 1729268075Sdelphij dmu_write_embedded(os, drrwnp->drr_object, 1730268075Sdelphij drrwnp->drr_offset, data, drrwnp->drr_etype, 1731268075Sdelphij drrwnp->drr_compression, drrwnp->drr_lsize, drrwnp->drr_psize, 1732268075Sdelphij ra->byteswap ^ ZFS_HOST_BYTEORDER, tx); 1733268075Sdelphij 1734268075Sdelphij dmu_tx_commit(tx); 1735268075Sdelphij return (0); 1736268075Sdelphij} 1737268075Sdelphij 1738268075Sdelphijstatic int 1739219089Spjdrestore_spill(struct restorearg *ra, objset_t *os, struct drr_spill *drrs) 1740219089Spjd{ 1741219089Spjd dmu_tx_t *tx; 1742219089Spjd void *data; 1743219089Spjd dmu_buf_t *db, *db_spill; 1744219089Spjd int err; 1745219089Spjd 1746219089Spjd if (drrs->drr_length < SPA_MINBLOCKSIZE || 1747274337Sdelphij drrs->drr_length > spa_maxblocksize(dmu_objset_spa(os))) 1748249195Smm return (SET_ERROR(EINVAL)); 1749219089Spjd 1750272601Sdelphij data = restore_read(ra, drrs->drr_length, NULL); 1751219089Spjd if (data == NULL) 1752219089Spjd return (ra->err); 1753219089Spjd 1754219089Spjd if (dmu_object_info(os, drrs->drr_object, NULL) != 0) 1755249195Smm return (SET_ERROR(EINVAL)); 1756219089Spjd 1757219089Spjd VERIFY(0 == dmu_bonus_hold(os, drrs->drr_object, FTAG, &db)); 1758219089Spjd if ((err = dmu_spill_hold_by_bonus(db, FTAG, &db_spill)) != 0) { 1759219089Spjd dmu_buf_rele(db, FTAG); 1760219089Spjd return (err); 1761219089Spjd } 1762219089Spjd 1763219089Spjd tx = dmu_tx_create(os); 1764219089Spjd 1765219089Spjd dmu_tx_hold_spill(tx, db->db_object); 1766219089Spjd 1767219089Spjd err = dmu_tx_assign(tx, TXG_WAIT); 1768248571Smm if (err != 0) { 1769219089Spjd dmu_buf_rele(db, FTAG); 1770219089Spjd dmu_buf_rele(db_spill, FTAG); 1771219089Spjd dmu_tx_abort(tx); 1772219089Spjd return (err); 1773219089Spjd } 1774219089Spjd dmu_buf_will_dirty(db_spill, tx); 1775219089Spjd 1776219089Spjd if (db_spill->db_size < drrs->drr_length) 1777219089Spjd VERIFY(0 == dbuf_spill_set_blksz(db_spill, 1778219089Spjd drrs->drr_length, tx)); 1779219089Spjd bcopy(data, db_spill->db_data, drrs->drr_length); 1780219089Spjd 1781219089Spjd dmu_buf_rele(db, FTAG); 1782219089Spjd dmu_buf_rele(db_spill, FTAG); 1783219089Spjd 1784219089Spjd dmu_tx_commit(tx); 1785219089Spjd return (0); 1786219089Spjd} 1787219089Spjd 1788168404Spjd/* ARGSUSED */ 1789168404Spjdstatic int 1790168404Spjdrestore_free(struct restorearg *ra, objset_t *os, 1791168404Spjd struct drr_free *drrf) 1792168404Spjd{ 1793168404Spjd int err; 1794168404Spjd 1795168404Spjd if (drrf->drr_length != -1ULL && 1796168404Spjd drrf->drr_offset + drrf->drr_length < drrf->drr_offset) 1797249195Smm return (SET_ERROR(EINVAL)); 1798168404Spjd 1799168404Spjd if (dmu_object_info(os, drrf->drr_object, NULL) != 0) 1800249195Smm return (SET_ERROR(EINVAL)); 1801168404Spjd 1802185029Spjd err = dmu_free_long_range(os, drrf->drr_object, 1803168404Spjd drrf->drr_offset, drrf->drr_length); 1804168404Spjd return (err); 1805168404Spjd} 1806168404Spjd 1807248571Smm/* used to destroy the drc_ds on error */ 1808248571Smmstatic void 1809248571Smmdmu_recv_cleanup_ds(dmu_recv_cookie_t *drc) 1810248571Smm{ 1811248571Smm char name[MAXNAMELEN]; 1812248571Smm dsl_dataset_name(drc->drc_ds, name); 1813248571Smm dsl_dataset_disown(drc->drc_ds, dmu_recv_tag); 1814248571Smm (void) dsl_destroy_head(name); 1815248571Smm} 1816248571Smm 1817185029Spjd/* 1818185029Spjd * NB: callers *must* call dmu_recv_end() if this succeeds. 1819185029Spjd */ 1820168404Spjdint 1821219089Spjddmu_recv_stream(dmu_recv_cookie_t *drc, struct file *fp, offset_t *voffp, 1822219089Spjd int cleanup_fd, uint64_t *action_handlep) 1823168404Spjd{ 1824185029Spjd struct restorearg ra = { 0 }; 1825168404Spjd dmu_replay_record_t *drr; 1826185029Spjd objset_t *os; 1827185029Spjd zio_cksum_t pcksum; 1828219089Spjd int featureflags; 1829168404Spjd 1830248571Smm ra.byteswap = drc->drc_byteswap; 1831248571Smm ra.cksum = drc->drc_cksum; 1832219089Spjd ra.td = curthread; 1833185029Spjd ra.fp = fp; 1834185029Spjd ra.voff = *voffp; 1835274337Sdelphij ra.bufsize = SPA_MAXBLOCKSIZE; 1836185029Spjd ra.buf = kmem_alloc(ra.bufsize, KM_SLEEP); 1837168404Spjd 1838185029Spjd /* these were verified in dmu_recv_begin */ 1839248571Smm ASSERT3U(DMU_GET_STREAM_HDRTYPE(drc->drc_drrb->drr_versioninfo), ==, 1840219089Spjd DMU_SUBSTREAM); 1841248571Smm ASSERT3U(drc->drc_drrb->drr_type, <, DMU_OST_NUMTYPES); 1842168404Spjd 1843168404Spjd /* 1844168404Spjd * Open the objset we are modifying. 1845168404Spjd */ 1846248571Smm VERIFY0(dmu_objset_from_ds(drc->drc_ds, &os)); 1847168404Spjd 1848275782Sdelphij ASSERT(dsl_dataset_phys(drc->drc_ds)->ds_flags & DS_FLAG_INCONSISTENT); 1849168404Spjd 1850219089Spjd featureflags = DMU_GET_FEATUREFLAGS(drc->drc_drrb->drr_versioninfo); 1851219089Spjd 1852219089Spjd /* if this stream is dedup'ed, set up the avl tree for guid mapping */ 1853219089Spjd if (featureflags & DMU_BACKUP_FEATURE_DEDUP) { 1854219089Spjd minor_t minor; 1855219089Spjd 1856219089Spjd if (cleanup_fd == -1) { 1857249195Smm ra.err = SET_ERROR(EBADF); 1858219089Spjd goto out; 1859219089Spjd } 1860219089Spjd ra.err = zfs_onexit_fd_hold(cleanup_fd, &minor); 1861248571Smm if (ra.err != 0) { 1862219089Spjd cleanup_fd = -1; 1863219089Spjd goto out; 1864219089Spjd } 1865219089Spjd 1866219089Spjd if (*action_handlep == 0) { 1867219089Spjd ra.guid_to_ds_map = 1868219089Spjd kmem_alloc(sizeof (avl_tree_t), KM_SLEEP); 1869219089Spjd avl_create(ra.guid_to_ds_map, guid_compare, 1870219089Spjd sizeof (guid_map_entry_t), 1871219089Spjd offsetof(guid_map_entry_t, avlnode)); 1872219089Spjd ra.err = zfs_onexit_add_cb(minor, 1873219089Spjd free_guid_map_onexit, ra.guid_to_ds_map, 1874219089Spjd action_handlep); 1875248571Smm if (ra.err != 0) 1876219089Spjd goto out; 1877219089Spjd } else { 1878219089Spjd ra.err = zfs_onexit_cb_data(minor, *action_handlep, 1879219089Spjd (void **)&ra.guid_to_ds_map); 1880248571Smm if (ra.err != 0) 1881219089Spjd goto out; 1882219089Spjd } 1883221263Smm 1884221263Smm drc->drc_guid_to_ds_map = ra.guid_to_ds_map; 1885219089Spjd } 1886219089Spjd 1887168404Spjd /* 1888168404Spjd * Read records and process them. 1889168404Spjd */ 1890185029Spjd pcksum = ra.cksum; 1891168404Spjd while (ra.err == 0 && 1892272601Sdelphij NULL != (drr = restore_read(&ra, sizeof (*drr), NULL))) { 1893185029Spjd if (issig(JUSTLOOKING) && issig(FORREAL)) { 1894249195Smm ra.err = SET_ERROR(EINTR); 1895168404Spjd goto out; 1896168404Spjd } 1897168404Spjd 1898168404Spjd if (ra.byteswap) 1899168404Spjd backup_byteswap(drr); 1900168404Spjd 1901168404Spjd switch (drr->drr_type) { 1902168404Spjd case DRR_OBJECT: 1903168404Spjd { 1904168404Spjd /* 1905168404Spjd * We need to make a copy of the record header, 1906168404Spjd * because restore_{object,write} may need to 1907168404Spjd * restore_read(), which will invalidate drr. 1908168404Spjd */ 1909168404Spjd struct drr_object drro = drr->drr_u.drr_object; 1910168404Spjd ra.err = restore_object(&ra, os, &drro); 1911168404Spjd break; 1912168404Spjd } 1913168404Spjd case DRR_FREEOBJECTS: 1914168404Spjd { 1915168404Spjd struct drr_freeobjects drrfo = 1916168404Spjd drr->drr_u.drr_freeobjects; 1917168404Spjd ra.err = restore_freeobjects(&ra, os, &drrfo); 1918168404Spjd break; 1919168404Spjd } 1920168404Spjd case DRR_WRITE: 1921168404Spjd { 1922168404Spjd struct drr_write drrw = drr->drr_u.drr_write; 1923168404Spjd ra.err = restore_write(&ra, os, &drrw); 1924168404Spjd break; 1925168404Spjd } 1926219089Spjd case DRR_WRITE_BYREF: 1927219089Spjd { 1928219089Spjd struct drr_write_byref drrwbr = 1929219089Spjd drr->drr_u.drr_write_byref; 1930219089Spjd ra.err = restore_write_byref(&ra, os, &drrwbr); 1931219089Spjd break; 1932219089Spjd } 1933268075Sdelphij case DRR_WRITE_EMBEDDED: 1934268075Sdelphij { 1935268075Sdelphij struct drr_write_embedded drrwe = 1936268075Sdelphij drr->drr_u.drr_write_embedded; 1937268075Sdelphij ra.err = restore_write_embedded(&ra, os, &drrwe); 1938268075Sdelphij break; 1939268075Sdelphij } 1940168404Spjd case DRR_FREE: 1941168404Spjd { 1942168404Spjd struct drr_free drrf = drr->drr_u.drr_free; 1943168404Spjd ra.err = restore_free(&ra, os, &drrf); 1944168404Spjd break; 1945168404Spjd } 1946168404Spjd case DRR_END: 1947168404Spjd { 1948168404Spjd struct drr_end drre = drr->drr_u.drr_end; 1949168404Spjd /* 1950168404Spjd * We compare against the *previous* checksum 1951168404Spjd * value, because the stored checksum is of 1952168404Spjd * everything before the DRR_END record. 1953168404Spjd */ 1954185029Spjd if (!ZIO_CHECKSUM_EQUAL(drre.drr_checksum, pcksum)) 1955249195Smm ra.err = SET_ERROR(ECKSUM); 1956168404Spjd goto out; 1957168404Spjd } 1958219089Spjd case DRR_SPILL: 1959219089Spjd { 1960219089Spjd struct drr_spill drrs = drr->drr_u.drr_spill; 1961219089Spjd ra.err = restore_spill(&ra, os, &drrs); 1962219089Spjd break; 1963219089Spjd } 1964168404Spjd default: 1965249195Smm ra.err = SET_ERROR(EINVAL); 1966168404Spjd goto out; 1967168404Spjd } 1968185029Spjd pcksum = ra.cksum; 1969168404Spjd } 1970185029Spjd ASSERT(ra.err != 0); 1971168404Spjd 1972168404Spjdout: 1973219089Spjd if ((featureflags & DMU_BACKUP_FEATURE_DEDUP) && (cleanup_fd != -1)) 1974219089Spjd zfs_onexit_fd_rele(cleanup_fd); 1975168404Spjd 1976185029Spjd if (ra.err != 0) { 1977168404Spjd /* 1978219089Spjd * destroy what we created, so we don't leave it in the 1979219089Spjd * inconsistent restoring state. 1980168404Spjd */ 1981248571Smm dmu_recv_cleanup_ds(drc); 1982168404Spjd } 1983168404Spjd 1984168404Spjd kmem_free(ra.buf, ra.bufsize); 1985185029Spjd *voffp = ra.voff; 1986168404Spjd return (ra.err); 1987168404Spjd} 1988185029Spjd 1989185029Spjdstatic int 1990248571Smmdmu_recv_end_check(void *arg, dmu_tx_t *tx) 1991185029Spjd{ 1992248571Smm dmu_recv_cookie_t *drc = arg; 1993248571Smm dsl_pool_t *dp = dmu_tx_pool(tx); 1994248571Smm int error; 1995185029Spjd 1996248571Smm ASSERT3P(drc->drc_ds->ds_owner, ==, dmu_recv_tag); 1997248571Smm 1998248571Smm if (!drc->drc_newfs) { 1999248571Smm dsl_dataset_t *origin_head; 2000248571Smm 2001248571Smm error = dsl_dataset_hold(dp, drc->drc_tofs, FTAG, &origin_head); 2002248571Smm if (error != 0) 2003248571Smm return (error); 2004253820Sdelphij if (drc->drc_force) { 2005253820Sdelphij /* 2006253820Sdelphij * We will destroy any snapshots in tofs (i.e. before 2007253820Sdelphij * origin_head) that are after the origin (which is 2008253820Sdelphij * the snap before drc_ds, because drc_ds can not 2009253820Sdelphij * have any snaps of its own). 2010253820Sdelphij */ 2011275782Sdelphij uint64_t obj; 2012275782Sdelphij 2013275782Sdelphij obj = dsl_dataset_phys(origin_head)->ds_prev_snap_obj; 2014275782Sdelphij while (obj != 2015275782Sdelphij dsl_dataset_phys(drc->drc_ds)->ds_prev_snap_obj) { 2016253820Sdelphij dsl_dataset_t *snap; 2017253820Sdelphij error = dsl_dataset_hold_obj(dp, obj, FTAG, 2018253820Sdelphij &snap); 2019253820Sdelphij if (error != 0) 2020282473Savg break; 2021253820Sdelphij if (snap->ds_dir != origin_head->ds_dir) 2022253820Sdelphij error = SET_ERROR(EINVAL); 2023253820Sdelphij if (error == 0) { 2024253820Sdelphij error = dsl_destroy_snapshot_check_impl( 2025253820Sdelphij snap, B_FALSE); 2026253820Sdelphij } 2027275782Sdelphij obj = dsl_dataset_phys(snap)->ds_prev_snap_obj; 2028253820Sdelphij dsl_dataset_rele(snap, FTAG); 2029253820Sdelphij if (error != 0) 2030282473Savg break; 2031253820Sdelphij } 2032282473Savg if (error != 0) { 2033282473Savg dsl_dataset_rele(origin_head, FTAG); 2034282473Savg return (error); 2035282473Savg } 2036253820Sdelphij } 2037248571Smm error = dsl_dataset_clone_swap_check_impl(drc->drc_ds, 2038253816Sdelphij origin_head, drc->drc_force, drc->drc_owner, tx); 2039248571Smm if (error != 0) { 2040248571Smm dsl_dataset_rele(origin_head, FTAG); 2041248571Smm return (error); 2042248571Smm } 2043248571Smm error = dsl_dataset_snapshot_check_impl(origin_head, 2044264835Sdelphij drc->drc_tosnap, tx, B_TRUE, 1, drc->drc_cred); 2045248571Smm dsl_dataset_rele(origin_head, FTAG); 2046248571Smm if (error != 0) 2047248571Smm return (error); 2048248571Smm 2049248571Smm error = dsl_destroy_head_check_impl(drc->drc_ds, 1); 2050248571Smm } else { 2051248571Smm error = dsl_dataset_snapshot_check_impl(drc->drc_ds, 2052264835Sdelphij drc->drc_tosnap, tx, B_TRUE, 1, drc->drc_cred); 2053248571Smm } 2054248571Smm return (error); 2055185029Spjd} 2056185029Spjd 2057185029Spjdstatic void 2058248571Smmdmu_recv_end_sync(void *arg, dmu_tx_t *tx) 2059185029Spjd{ 2060248571Smm dmu_recv_cookie_t *drc = arg; 2061248571Smm dsl_pool_t *dp = dmu_tx_pool(tx); 2062185029Spjd 2063248571Smm spa_history_log_internal_ds(drc->drc_ds, "finish receiving", 2064248571Smm tx, "snap=%s", drc->drc_tosnap); 2065185029Spjd 2066248571Smm if (!drc->drc_newfs) { 2067248571Smm dsl_dataset_t *origin_head; 2068185029Spjd 2069248571Smm VERIFY0(dsl_dataset_hold(dp, drc->drc_tofs, FTAG, 2070248571Smm &origin_head)); 2071253820Sdelphij 2072253820Sdelphij if (drc->drc_force) { 2073253820Sdelphij /* 2074253820Sdelphij * Destroy any snapshots of drc_tofs (origin_head) 2075253820Sdelphij * after the origin (the snap before drc_ds). 2076253820Sdelphij */ 2077275782Sdelphij uint64_t obj; 2078275782Sdelphij 2079275782Sdelphij obj = dsl_dataset_phys(origin_head)->ds_prev_snap_obj; 2080275782Sdelphij while (obj != 2081275782Sdelphij dsl_dataset_phys(drc->drc_ds)->ds_prev_snap_obj) { 2082253820Sdelphij dsl_dataset_t *snap; 2083253820Sdelphij VERIFY0(dsl_dataset_hold_obj(dp, obj, FTAG, 2084253820Sdelphij &snap)); 2085253820Sdelphij ASSERT3P(snap->ds_dir, ==, origin_head->ds_dir); 2086275782Sdelphij obj = dsl_dataset_phys(snap)->ds_prev_snap_obj; 2087253820Sdelphij dsl_destroy_snapshot_sync_impl(snap, 2088253820Sdelphij B_FALSE, tx); 2089253820Sdelphij dsl_dataset_rele(snap, FTAG); 2090253820Sdelphij } 2091253820Sdelphij } 2092253820Sdelphij VERIFY3P(drc->drc_ds->ds_prev, ==, 2093253820Sdelphij origin_head->ds_prev); 2094253820Sdelphij 2095248571Smm dsl_dataset_clone_swap_sync_impl(drc->drc_ds, 2096248571Smm origin_head, tx); 2097248571Smm dsl_dataset_snapshot_sync_impl(origin_head, 2098248571Smm drc->drc_tosnap, tx); 2099248571Smm 2100248571Smm /* set snapshot's creation time and guid */ 2101248571Smm dmu_buf_will_dirty(origin_head->ds_prev->ds_dbuf, tx); 2102275782Sdelphij dsl_dataset_phys(origin_head->ds_prev)->ds_creation_time = 2103248571Smm drc->drc_drrb->drr_creation_time; 2104275782Sdelphij dsl_dataset_phys(origin_head->ds_prev)->ds_guid = 2105248571Smm drc->drc_drrb->drr_toguid; 2106275782Sdelphij dsl_dataset_phys(origin_head->ds_prev)->ds_flags &= 2107248571Smm ~DS_FLAG_INCONSISTENT; 2108248571Smm 2109248571Smm dmu_buf_will_dirty(origin_head->ds_dbuf, tx); 2110275782Sdelphij dsl_dataset_phys(origin_head)->ds_flags &= 2111275782Sdelphij ~DS_FLAG_INCONSISTENT; 2112248571Smm 2113248571Smm dsl_dataset_rele(origin_head, FTAG); 2114248571Smm dsl_destroy_head_sync_impl(drc->drc_ds, tx); 2115253816Sdelphij 2116253816Sdelphij if (drc->drc_owner != NULL) 2117253816Sdelphij VERIFY3P(origin_head->ds_owner, ==, drc->drc_owner); 2118248571Smm } else { 2119248571Smm dsl_dataset_t *ds = drc->drc_ds; 2120248571Smm 2121248571Smm dsl_dataset_snapshot_sync_impl(ds, drc->drc_tosnap, tx); 2122248571Smm 2123248571Smm /* set snapshot's creation time and guid */ 2124248571Smm dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 2125275782Sdelphij dsl_dataset_phys(ds->ds_prev)->ds_creation_time = 2126248571Smm drc->drc_drrb->drr_creation_time; 2127275782Sdelphij dsl_dataset_phys(ds->ds_prev)->ds_guid = 2128275782Sdelphij drc->drc_drrb->drr_toguid; 2129275782Sdelphij dsl_dataset_phys(ds->ds_prev)->ds_flags &= 2130275782Sdelphij ~DS_FLAG_INCONSISTENT; 2131248571Smm 2132248571Smm dmu_buf_will_dirty(ds->ds_dbuf, tx); 2133275782Sdelphij dsl_dataset_phys(ds)->ds_flags &= ~DS_FLAG_INCONSISTENT; 2134248571Smm } 2135275782Sdelphij drc->drc_newsnapobj = dsl_dataset_phys(drc->drc_ds)->ds_prev_snap_obj; 2136248571Smm /* 2137248571Smm * Release the hold from dmu_recv_begin. This must be done before 2138248571Smm * we return to open context, so that when we free the dataset's dnode, 2139248571Smm * we can evict its bonus buffer. 2140248571Smm */ 2141248571Smm dsl_dataset_disown(drc->drc_ds, dmu_recv_tag); 2142248571Smm drc->drc_ds = NULL; 2143185029Spjd} 2144185029Spjd 2145219089Spjdstatic int 2146248571Smmadd_ds_to_guidmap(const char *name, avl_tree_t *guid_map, uint64_t snapobj) 2147221263Smm{ 2148248571Smm dsl_pool_t *dp; 2149221263Smm dsl_dataset_t *snapds; 2150221263Smm guid_map_entry_t *gmep; 2151221263Smm int err; 2152221263Smm 2153221263Smm ASSERT(guid_map != NULL); 2154221263Smm 2155248571Smm err = dsl_pool_hold(name, FTAG, &dp); 2156248571Smm if (err != 0) 2157248571Smm return (err); 2158249356Smm gmep = kmem_alloc(sizeof (*gmep), KM_SLEEP); 2159249196Smm err = dsl_dataset_hold_obj(dp, snapobj, gmep, &snapds); 2160221263Smm if (err == 0) { 2161275782Sdelphij gmep->guid = dsl_dataset_phys(snapds)->ds_guid; 2162221263Smm gmep->gme_ds = snapds; 2163221263Smm avl_add(guid_map, gmep); 2164248571Smm dsl_dataset_long_hold(snapds, gmep); 2165249196Smm } else 2166249356Smm kmem_free(gmep, sizeof (*gmep)); 2167221263Smm 2168248571Smm dsl_pool_rele(dp, FTAG); 2169221263Smm return (err); 2170221263Smm} 2171221263Smm 2172248571Smmstatic int dmu_recv_end_modified_blocks = 3; 2173248571Smm 2174221263Smmstatic int 2175219089Spjddmu_recv_existing_end(dmu_recv_cookie_t *drc) 2176185029Spjd{ 2177248571Smm int error; 2178248571Smm char name[MAXNAMELEN]; 2179185029Spjd 2180248571Smm#ifdef _KERNEL 2181248571Smm /* 2182248571Smm * We will be destroying the ds; make sure its origin is unmounted if 2183248571Smm * necessary. 2184248571Smm */ 2185248571Smm dsl_dataset_name(drc->drc_ds, name); 2186248571Smm zfs_destroy_unmount_origin(name); 2187248571Smm#endif 2188185029Spjd 2189248571Smm error = dsl_sync_task(drc->drc_tofs, 2190248571Smm dmu_recv_end_check, dmu_recv_end_sync, drc, 2191268473Sdelphij dmu_recv_end_modified_blocks, ZFS_SPACE_CHECK_NORMAL); 2192185029Spjd 2193248571Smm if (error != 0) 2194248571Smm dmu_recv_cleanup_ds(drc); 2195248571Smm return (error); 2196185029Spjd} 2197219089Spjd 2198219089Spjdstatic int 2199219089Spjddmu_recv_new_end(dmu_recv_cookie_t *drc) 2200219089Spjd{ 2201248571Smm int error; 2202219089Spjd 2203248571Smm error = dsl_sync_task(drc->drc_tofs, 2204248571Smm dmu_recv_end_check, dmu_recv_end_sync, drc, 2205268473Sdelphij dmu_recv_end_modified_blocks, ZFS_SPACE_CHECK_NORMAL); 2206219089Spjd 2207248571Smm if (error != 0) { 2208248571Smm dmu_recv_cleanup_ds(drc); 2209248571Smm } else if (drc->drc_guid_to_ds_map != NULL) { 2210248571Smm (void) add_ds_to_guidmap(drc->drc_tofs, 2211248571Smm drc->drc_guid_to_ds_map, 2212248571Smm drc->drc_newsnapobj); 2213219089Spjd } 2214248571Smm return (error); 2215219089Spjd} 2216219089Spjd 2217219089Spjdint 2218253816Sdelphijdmu_recv_end(dmu_recv_cookie_t *drc, void *owner) 2219219089Spjd{ 2220253816Sdelphij drc->drc_owner = owner; 2221253816Sdelphij 2222248571Smm if (drc->drc_newfs) 2223248571Smm return (dmu_recv_new_end(drc)); 2224248571Smm else 2225219089Spjd return (dmu_recv_existing_end(drc)); 2226219089Spjd} 2227253821Sdelphij 2228253821Sdelphij/* 2229253821Sdelphij * Return TRUE if this objset is currently being received into. 2230253821Sdelphij */ 2231253821Sdelphijboolean_t 2232253821Sdelphijdmu_objset_is_receiving(objset_t *os) 2233253821Sdelphij{ 2234253821Sdelphij return (os->os_dsl_dataset != NULL && 2235253821Sdelphij os->os_dsl_dataset->ds_owner == dmu_recv_tag); 2236253821Sdelphij} 2237