dmu_send.c revision 248571
1168404Spjd/* 2168404Spjd * CDDL HEADER START 3168404Spjd * 4168404Spjd * The contents of this file are subject to the terms of the 5168404Spjd * Common Development and Distribution License (the "License"). 6168404Spjd * You may not use this file except in compliance with the License. 7168404Spjd * 8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9168404Spjd * or http://www.opensolaris.org/os/licensing. 10168404Spjd * See the License for the specific language governing permissions 11168404Spjd * and limitations under the License. 12168404Spjd * 13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each 14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15168404Spjd * If applicable, add the following below this CDDL HEADER, with the 16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18168404Spjd * 19168404Spjd * CDDL HEADER END 20168404Spjd */ 21168404Spjd/* 22219089Spjd * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23221263Smm * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 24236884Smm * Copyright (c) 2012 by Delphix. All rights reserved. 25235222Smm * Copyright (c) 2012, Joyent, Inc. All rights reserved. 26235222Smm * Copyright (c) 2012, Martin Matuska <mm@FreeBSD.org>. All rights reserved. 27221263Smm */ 28168404Spjd 29168404Spjd#include <sys/dmu.h> 30168404Spjd#include <sys/dmu_impl.h> 31168404Spjd#include <sys/dmu_tx.h> 32168404Spjd#include <sys/dbuf.h> 33168404Spjd#include <sys/dnode.h> 34168404Spjd#include <sys/zfs_context.h> 35168404Spjd#include <sys/dmu_objset.h> 36168404Spjd#include <sys/dmu_traverse.h> 37168404Spjd#include <sys/dsl_dataset.h> 38168404Spjd#include <sys/dsl_dir.h> 39219089Spjd#include <sys/dsl_prop.h> 40168404Spjd#include <sys/dsl_pool.h> 41168404Spjd#include <sys/dsl_synctask.h> 42168404Spjd#include <sys/zfs_ioctl.h> 43168404Spjd#include <sys/zap.h> 44168404Spjd#include <sys/zio_checksum.h> 45219089Spjd#include <sys/zfs_znode.h> 46219089Spjd#include <zfs_fletcher.h> 47219089Spjd#include <sys/avl.h> 48219089Spjd#include <sys/ddt.h> 49219089Spjd#include <sys/zfs_onexit.h> 50248571Smm#include <sys/dmu_send.h> 51248571Smm#include <sys/dsl_destroy.h> 52168404Spjd 53228103Smm/* Set this tunable to TRUE to replace corrupt data with 0x2f5baddb10c */ 54228103Smmint zfs_send_corrupt_data = B_FALSE; 55228103Smm 56185029Spjdstatic char *dmu_recv_tag = "dmu_recv_tag"; 57248571Smmstatic const char *recv_clone_name = "%recv"; 58185029Spjd 59168404Spjdstatic int 60235222Smmdump_bytes(dmu_sendarg_t *dsp, void *buf, int len) 61168404Spjd{ 62235222Smm dsl_dataset_t *ds = dsp->dsa_os->os_dsl_dataset; 63168404Spjd struct uio auio; 64168404Spjd struct iovec aiov; 65240415Smm ASSERT0(len % 8); 66168404Spjd 67235222Smm fletcher_4_incremental_native(buf, len, &dsp->dsa_zc); 68168404Spjd aiov.iov_base = buf; 69168404Spjd aiov.iov_len = len; 70168404Spjd auio.uio_iov = &aiov; 71168404Spjd auio.uio_iovcnt = 1; 72168404Spjd auio.uio_resid = len; 73169170Spjd auio.uio_segflg = UIO_SYSSPACE; 74168404Spjd auio.uio_rw = UIO_WRITE; 75168404Spjd auio.uio_offset = (off_t)-1; 76235222Smm auio.uio_td = dsp->dsa_td; 77168404Spjd#ifdef _KERNEL 78235222Smm if (dsp->dsa_fp->f_type == DTYPE_VNODE) 79168404Spjd bwillwrite(); 80235222Smm dsp->dsa_err = fo_write(dsp->dsa_fp, &auio, dsp->dsa_td->td_ucred, 0, 81235222Smm dsp->dsa_td); 82168404Spjd#else 83168404Spjd fprintf(stderr, "%s: returning EOPNOTSUPP\n", __func__); 84235222Smm dsp->dsa_err = EOPNOTSUPP; 85168404Spjd#endif 86235222Smm mutex_enter(&ds->ds_sendstream_lock); 87235222Smm *dsp->dsa_off += len; 88235222Smm mutex_exit(&ds->ds_sendstream_lock); 89235222Smm 90235222Smm return (dsp->dsa_err); 91168404Spjd} 92168404Spjd 93168404Spjdstatic int 94235222Smmdump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset, 95168404Spjd uint64_t length) 96168404Spjd{ 97235222Smm struct drr_free *drrf = &(dsp->dsa_drr->drr_u.drr_free); 98219089Spjd 99237458Smm if (length != -1ULL && offset + length < offset) 100237458Smm length = -1ULL; 101237458Smm 102219089Spjd /* 103219089Spjd * If there is a pending op, but it's not PENDING_FREE, push it out, 104219089Spjd * since free block aggregation can only be done for blocks of the 105219089Spjd * same type (i.e., DRR_FREE records can only be aggregated with 106219089Spjd * other DRR_FREE records. DRR_FREEOBJECTS records can only be 107219089Spjd * aggregated with other DRR_FREEOBJECTS records. 108219089Spjd */ 109235222Smm if (dsp->dsa_pending_op != PENDING_NONE && 110235222Smm dsp->dsa_pending_op != PENDING_FREE) { 111235222Smm if (dump_bytes(dsp, dsp->dsa_drr, 112235222Smm sizeof (dmu_replay_record_t)) != 0) 113219089Spjd return (EINTR); 114235222Smm dsp->dsa_pending_op = PENDING_NONE; 115219089Spjd } 116219089Spjd 117235222Smm if (dsp->dsa_pending_op == PENDING_FREE) { 118219089Spjd /* 119219089Spjd * There should never be a PENDING_FREE if length is -1 120219089Spjd * (because dump_dnode is the only place where this 121219089Spjd * function is called with a -1, and only after flushing 122219089Spjd * any pending record). 123219089Spjd */ 124219089Spjd ASSERT(length != -1ULL); 125219089Spjd /* 126219089Spjd * Check to see whether this free block can be aggregated 127219089Spjd * with pending one. 128219089Spjd */ 129219089Spjd if (drrf->drr_object == object && drrf->drr_offset + 130219089Spjd drrf->drr_length == offset) { 131219089Spjd drrf->drr_length += length; 132219089Spjd return (0); 133219089Spjd } else { 134219089Spjd /* not a continuation. Push out pending record */ 135235222Smm if (dump_bytes(dsp, dsp->dsa_drr, 136219089Spjd sizeof (dmu_replay_record_t)) != 0) 137219089Spjd return (EINTR); 138235222Smm dsp->dsa_pending_op = PENDING_NONE; 139219089Spjd } 140219089Spjd } 141219089Spjd /* create a FREE record and make it pending */ 142235222Smm bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t)); 143235222Smm dsp->dsa_drr->drr_type = DRR_FREE; 144219089Spjd drrf->drr_object = object; 145219089Spjd drrf->drr_offset = offset; 146219089Spjd drrf->drr_length = length; 147235222Smm drrf->drr_toguid = dsp->dsa_toguid; 148219089Spjd if (length == -1ULL) { 149235222Smm if (dump_bytes(dsp, dsp->dsa_drr, 150235222Smm sizeof (dmu_replay_record_t)) != 0) 151219089Spjd return (EINTR); 152219089Spjd } else { 153235222Smm dsp->dsa_pending_op = PENDING_FREE; 154219089Spjd } 155168404Spjd 156168404Spjd return (0); 157168404Spjd} 158168404Spjd 159168404Spjdstatic int 160235222Smmdump_data(dmu_sendarg_t *dsp, dmu_object_type_t type, 161219089Spjd uint64_t object, uint64_t offset, int blksz, const blkptr_t *bp, void *data) 162168404Spjd{ 163235222Smm struct drr_write *drrw = &(dsp->dsa_drr->drr_u.drr_write); 164219089Spjd 165219089Spjd 166219089Spjd /* 167219089Spjd * If there is any kind of pending aggregation (currently either 168219089Spjd * a grouping of free objects or free blocks), push it out to 169219089Spjd * the stream, since aggregation can't be done across operations 170219089Spjd * of different types. 171219089Spjd */ 172235222Smm if (dsp->dsa_pending_op != PENDING_NONE) { 173235222Smm if (dump_bytes(dsp, dsp->dsa_drr, 174235222Smm sizeof (dmu_replay_record_t)) != 0) 175219089Spjd return (EINTR); 176235222Smm dsp->dsa_pending_op = PENDING_NONE; 177219089Spjd } 178168404Spjd /* write a DATA record */ 179235222Smm bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t)); 180235222Smm dsp->dsa_drr->drr_type = DRR_WRITE; 181219089Spjd drrw->drr_object = object; 182219089Spjd drrw->drr_type = type; 183219089Spjd drrw->drr_offset = offset; 184219089Spjd drrw->drr_length = blksz; 185235222Smm drrw->drr_toguid = dsp->dsa_toguid; 186219089Spjd drrw->drr_checksumtype = BP_GET_CHECKSUM(bp); 187219089Spjd if (zio_checksum_table[drrw->drr_checksumtype].ci_dedup) 188219089Spjd drrw->drr_checksumflags |= DRR_CHECKSUM_DEDUP; 189219089Spjd DDK_SET_LSIZE(&drrw->drr_key, BP_GET_LSIZE(bp)); 190219089Spjd DDK_SET_PSIZE(&drrw->drr_key, BP_GET_PSIZE(bp)); 191219089Spjd DDK_SET_COMPRESS(&drrw->drr_key, BP_GET_COMPRESS(bp)); 192219089Spjd drrw->drr_key.ddk_cksum = bp->blk_cksum; 193168404Spjd 194235222Smm if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0) 195219089Spjd return (EINTR); 196235222Smm if (dump_bytes(dsp, data, blksz) != 0) 197219089Spjd return (EINTR); 198219089Spjd return (0); 199219089Spjd} 200219089Spjd 201219089Spjdstatic int 202235222Smmdump_spill(dmu_sendarg_t *dsp, uint64_t object, int blksz, void *data) 203219089Spjd{ 204235222Smm struct drr_spill *drrs = &(dsp->dsa_drr->drr_u.drr_spill); 205219089Spjd 206235222Smm if (dsp->dsa_pending_op != PENDING_NONE) { 207235222Smm if (dump_bytes(dsp, dsp->dsa_drr, 208235222Smm sizeof (dmu_replay_record_t)) != 0) 209219089Spjd return (EINTR); 210235222Smm dsp->dsa_pending_op = PENDING_NONE; 211219089Spjd } 212219089Spjd 213219089Spjd /* write a SPILL record */ 214235222Smm bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t)); 215235222Smm dsp->dsa_drr->drr_type = DRR_SPILL; 216219089Spjd drrs->drr_object = object; 217219089Spjd drrs->drr_length = blksz; 218235222Smm drrs->drr_toguid = dsp->dsa_toguid; 219219089Spjd 220235222Smm if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t))) 221168404Spjd return (EINTR); 222235222Smm if (dump_bytes(dsp, data, blksz)) 223168404Spjd return (EINTR); 224168404Spjd return (0); 225168404Spjd} 226168404Spjd 227168404Spjdstatic int 228235222Smmdump_freeobjects(dmu_sendarg_t *dsp, uint64_t firstobj, uint64_t numobjs) 229168404Spjd{ 230235222Smm struct drr_freeobjects *drrfo = &(dsp->dsa_drr->drr_u.drr_freeobjects); 231219089Spjd 232219089Spjd /* 233219089Spjd * If there is a pending op, but it's not PENDING_FREEOBJECTS, 234219089Spjd * push it out, since free block aggregation can only be done for 235219089Spjd * blocks of the same type (i.e., DRR_FREE records can only be 236219089Spjd * aggregated with other DRR_FREE records. DRR_FREEOBJECTS records 237219089Spjd * can only be aggregated with other DRR_FREEOBJECTS records. 238219089Spjd */ 239235222Smm if (dsp->dsa_pending_op != PENDING_NONE && 240235222Smm dsp->dsa_pending_op != PENDING_FREEOBJECTS) { 241235222Smm if (dump_bytes(dsp, dsp->dsa_drr, 242235222Smm sizeof (dmu_replay_record_t)) != 0) 243219089Spjd return (EINTR); 244235222Smm dsp->dsa_pending_op = PENDING_NONE; 245219089Spjd } 246235222Smm if (dsp->dsa_pending_op == PENDING_FREEOBJECTS) { 247219089Spjd /* 248219089Spjd * See whether this free object array can be aggregated 249219089Spjd * with pending one 250219089Spjd */ 251219089Spjd if (drrfo->drr_firstobj + drrfo->drr_numobjs == firstobj) { 252219089Spjd drrfo->drr_numobjs += numobjs; 253219089Spjd return (0); 254219089Spjd } else { 255219089Spjd /* can't be aggregated. Push out pending record */ 256235222Smm if (dump_bytes(dsp, dsp->dsa_drr, 257219089Spjd sizeof (dmu_replay_record_t)) != 0) 258219089Spjd return (EINTR); 259235222Smm dsp->dsa_pending_op = PENDING_NONE; 260219089Spjd } 261219089Spjd } 262219089Spjd 263168404Spjd /* write a FREEOBJECTS record */ 264235222Smm bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t)); 265235222Smm dsp->dsa_drr->drr_type = DRR_FREEOBJECTS; 266219089Spjd drrfo->drr_firstobj = firstobj; 267219089Spjd drrfo->drr_numobjs = numobjs; 268235222Smm drrfo->drr_toguid = dsp->dsa_toguid; 269168404Spjd 270235222Smm dsp->dsa_pending_op = PENDING_FREEOBJECTS; 271219089Spjd 272168404Spjd return (0); 273168404Spjd} 274168404Spjd 275168404Spjdstatic int 276235222Smmdump_dnode(dmu_sendarg_t *dsp, uint64_t object, dnode_phys_t *dnp) 277168404Spjd{ 278235222Smm struct drr_object *drro = &(dsp->dsa_drr->drr_u.drr_object); 279219089Spjd 280168404Spjd if (dnp == NULL || dnp->dn_type == DMU_OT_NONE) 281235222Smm return (dump_freeobjects(dsp, object, 1)); 282168404Spjd 283235222Smm if (dsp->dsa_pending_op != PENDING_NONE) { 284235222Smm if (dump_bytes(dsp, dsp->dsa_drr, 285235222Smm sizeof (dmu_replay_record_t)) != 0) 286219089Spjd return (EINTR); 287235222Smm dsp->dsa_pending_op = PENDING_NONE; 288219089Spjd } 289219089Spjd 290168404Spjd /* write an OBJECT record */ 291235222Smm bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t)); 292235222Smm dsp->dsa_drr->drr_type = DRR_OBJECT; 293219089Spjd drro->drr_object = object; 294219089Spjd drro->drr_type = dnp->dn_type; 295219089Spjd drro->drr_bonustype = dnp->dn_bonustype; 296219089Spjd drro->drr_blksz = dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT; 297219089Spjd drro->drr_bonuslen = dnp->dn_bonuslen; 298219089Spjd drro->drr_checksumtype = dnp->dn_checksum; 299219089Spjd drro->drr_compress = dnp->dn_compress; 300235222Smm drro->drr_toguid = dsp->dsa_toguid; 301168404Spjd 302235222Smm if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0) 303168404Spjd return (EINTR); 304168404Spjd 305235222Smm if (dump_bytes(dsp, DN_BONUS(dnp), P2ROUNDUP(dnp->dn_bonuslen, 8)) != 0) 306168404Spjd return (EINTR); 307168404Spjd 308168404Spjd /* free anything past the end of the file */ 309235222Smm if (dump_free(dsp, object, (dnp->dn_maxblkid + 1) * 310168404Spjd (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT), -1ULL)) 311168404Spjd return (EINTR); 312248571Smm if (dsp->dsa_err != 0) 313168404Spjd return (EINTR); 314168404Spjd return (0); 315168404Spjd} 316168404Spjd 317168404Spjd#define BP_SPAN(dnp, level) \ 318168404Spjd (((uint64_t)dnp->dn_datablkszsec) << (SPA_MINBLOCKSHIFT + \ 319168404Spjd (level) * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) 320168404Spjd 321219089Spjd/* ARGSUSED */ 322168404Spjdstatic int 323246666Smmbackup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, 324219089Spjd const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg) 325168404Spjd{ 326235222Smm dmu_sendarg_t *dsp = arg; 327168404Spjd dmu_object_type_t type = bp ? BP_GET_TYPE(bp) : DMU_OT_NONE; 328168404Spjd int err = 0; 329168404Spjd 330185029Spjd if (issig(JUSTLOOKING) && issig(FORREAL)) 331168404Spjd return (EINTR); 332168404Spjd 333219089Spjd if (zb->zb_object != DMU_META_DNODE_OBJECT && 334219089Spjd DMU_OBJECT_IS_SPECIAL(zb->zb_object)) { 335209962Smm return (0); 336219089Spjd } else if (bp == NULL && zb->zb_object == DMU_META_DNODE_OBJECT) { 337208047Smm uint64_t span = BP_SPAN(dnp, zb->zb_level); 338208047Smm uint64_t dnobj = (zb->zb_blkid * span) >> DNODE_SHIFT; 339235222Smm err = dump_freeobjects(dsp, dnobj, span >> DNODE_SHIFT); 340168404Spjd } else if (bp == NULL) { 341208047Smm uint64_t span = BP_SPAN(dnp, zb->zb_level); 342235222Smm err = dump_free(dsp, zb->zb_object, zb->zb_blkid * span, span); 343208047Smm } else if (zb->zb_level > 0 || type == DMU_OT_OBJSET) { 344208047Smm return (0); 345208047Smm } else if (type == DMU_OT_DNODE) { 346208047Smm dnode_phys_t *blk; 347168404Spjd int i; 348168404Spjd int blksz = BP_GET_LSIZE(bp); 349208047Smm uint32_t aflags = ARC_WAIT; 350208047Smm arc_buf_t *abuf; 351168404Spjd 352246666Smm if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf, 353246666Smm ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, 354246666Smm &aflags, zb) != 0) 355208047Smm return (EIO); 356208047Smm 357208047Smm blk = abuf->b_data; 358168404Spjd for (i = 0; i < blksz >> DNODE_SHIFT; i++) { 359208047Smm uint64_t dnobj = (zb->zb_blkid << 360208047Smm (DNODE_BLOCK_SHIFT - DNODE_SHIFT)) + i; 361235222Smm err = dump_dnode(dsp, dnobj, blk+i); 362248571Smm if (err != 0) 363168404Spjd break; 364168404Spjd } 365208047Smm (void) arc_buf_remove_ref(abuf, &abuf); 366219089Spjd } else if (type == DMU_OT_SA) { 367208047Smm uint32_t aflags = ARC_WAIT; 368208047Smm arc_buf_t *abuf; 369168404Spjd int blksz = BP_GET_LSIZE(bp); 370168404Spjd 371246666Smm if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf, 372246666Smm ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, 373246666Smm &aflags, zb) != 0) 374208047Smm return (EIO); 375168404Spjd 376235222Smm err = dump_spill(dsp, zb->zb_object, blksz, abuf->b_data); 377219089Spjd (void) arc_buf_remove_ref(abuf, &abuf); 378219089Spjd } else { /* it's a level-0 block of a regular object */ 379219089Spjd uint32_t aflags = ARC_WAIT; 380219089Spjd arc_buf_t *abuf; 381219089Spjd int blksz = BP_GET_LSIZE(bp); 382219089Spjd 383246666Smm if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf, 384246666Smm ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, 385246666Smm &aflags, zb) != 0) { 386228103Smm if (zfs_send_corrupt_data) { 387228103Smm /* Send a block filled with 0x"zfs badd bloc" */ 388228103Smm abuf = arc_buf_alloc(spa, blksz, &abuf, 389228103Smm ARC_BUFC_DATA); 390228103Smm uint64_t *ptr; 391228103Smm for (ptr = abuf->b_data; 392228103Smm (char *)ptr < (char *)abuf->b_data + blksz; 393228103Smm ptr++) 394228103Smm *ptr = 0x2f5baddb10c; 395228103Smm } else { 396228103Smm return (EIO); 397228103Smm } 398228103Smm } 399219089Spjd 400235222Smm err = dump_data(dsp, type, zb->zb_object, zb->zb_blkid * blksz, 401219089Spjd blksz, bp, abuf->b_data); 402208047Smm (void) arc_buf_remove_ref(abuf, &abuf); 403168404Spjd } 404168404Spjd 405168404Spjd ASSERT(err == 0 || err == EINTR); 406168404Spjd return (err); 407168404Spjd} 408168404Spjd 409248571Smm/* 410248571Smm * Releases dp, ds, and fromds, using the specified tag. 411248571Smm */ 412248571Smmstatic int 413248571Smmdmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *ds, 414248571Smm#ifdef illumos 415248571Smm dsl_dataset_t *fromds, int outfd, vnode_t *vp, offset_t *off) 416248571Smm#else 417248571Smm dsl_dataset_t *fromds, int outfd, struct file *fp, offset_t *off) 418248571Smm#endif 419168404Spjd{ 420248571Smm objset_t *os; 421168404Spjd dmu_replay_record_t *drr; 422235222Smm dmu_sendarg_t *dsp; 423168404Spjd int err; 424185029Spjd uint64_t fromtxg = 0; 425168404Spjd 426248571Smm if (fromds != NULL && !dsl_dataset_is_before(ds, fromds)) { 427248571Smm dsl_dataset_rele(fromds, tag); 428248571Smm dsl_dataset_rele(ds, tag); 429248571Smm dsl_pool_rele(dp, tag); 430168404Spjd return (EXDEV); 431248571Smm } 432168404Spjd 433248571Smm err = dmu_objset_from_ds(ds, &os); 434248571Smm if (err != 0) { 435248571Smm if (fromds != NULL) 436248571Smm dsl_dataset_rele(fromds, tag); 437248571Smm dsl_dataset_rele(ds, tag); 438248571Smm dsl_pool_rele(dp, tag); 439248571Smm return (err); 440185029Spjd } 441185029Spjd 442168404Spjd drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP); 443168404Spjd drr->drr_type = DRR_BEGIN; 444168404Spjd drr->drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC; 445219089Spjd DMU_SET_STREAM_HDRTYPE(drr->drr_u.drr_begin.drr_versioninfo, 446219089Spjd DMU_SUBSTREAM); 447219089Spjd 448219089Spjd#ifdef _KERNEL 449248571Smm if (dmu_objset_type(os) == DMU_OST_ZFS) { 450219089Spjd uint64_t version; 451248571Smm if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &version) != 0) { 452235222Smm kmem_free(drr, sizeof (dmu_replay_record_t)); 453248571Smm if (fromds != NULL) 454248571Smm dsl_dataset_rele(fromds, tag); 455248571Smm dsl_dataset_rele(ds, tag); 456248571Smm dsl_pool_rele(dp, tag); 457219089Spjd return (EINVAL); 458235222Smm } 459248571Smm if (version >= ZPL_VERSION_SA) { 460219089Spjd DMU_SET_FEATUREFLAGS( 461219089Spjd drr->drr_u.drr_begin.drr_versioninfo, 462219089Spjd DMU_BACKUP_FEATURE_SA_SPILL); 463219089Spjd } 464219089Spjd } 465219089Spjd#endif 466219089Spjd 467168404Spjd drr->drr_u.drr_begin.drr_creation_time = 468168404Spjd ds->ds_phys->ds_creation_time; 469248571Smm drr->drr_u.drr_begin.drr_type = dmu_objset_type(os); 470248571Smm if (fromds != NULL && ds->ds_dir != fromds->ds_dir) 471185029Spjd drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CLONE; 472168404Spjd drr->drr_u.drr_begin.drr_toguid = ds->ds_phys->ds_guid; 473185029Spjd if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) 474185029Spjd drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CI_DATA; 475185029Spjd 476248571Smm if (fromds != NULL) 477168404Spjd drr->drr_u.drr_begin.drr_fromguid = fromds->ds_phys->ds_guid; 478168404Spjd dsl_dataset_name(ds, drr->drr_u.drr_begin.drr_toname); 479168404Spjd 480248571Smm if (fromds != NULL) { 481185029Spjd fromtxg = fromds->ds_phys->ds_creation_txg; 482248571Smm dsl_dataset_rele(fromds, tag); 483248571Smm fromds = NULL; 484248571Smm } 485185029Spjd 486235222Smm dsp = kmem_zalloc(sizeof (dmu_sendarg_t), KM_SLEEP); 487168404Spjd 488235222Smm dsp->dsa_drr = drr; 489235222Smm dsp->dsa_outfd = outfd; 490235222Smm dsp->dsa_proc = curproc; 491235222Smm dsp->dsa_td = curthread; 492235222Smm dsp->dsa_fp = fp; 493248571Smm dsp->dsa_os = os; 494235222Smm dsp->dsa_off = off; 495235222Smm dsp->dsa_toguid = ds->ds_phys->ds_guid; 496235222Smm ZIO_SET_CHECKSUM(&dsp->dsa_zc, 0, 0, 0, 0); 497235222Smm dsp->dsa_pending_op = PENDING_NONE; 498235222Smm 499235222Smm mutex_enter(&ds->ds_sendstream_lock); 500235222Smm list_insert_head(&ds->ds_sendstreams, dsp); 501235222Smm mutex_exit(&ds->ds_sendstream_lock); 502235222Smm 503235222Smm if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) { 504235222Smm err = dsp->dsa_err; 505235222Smm goto out; 506168404Spjd } 507168404Spjd 508248571Smm dsl_dataset_long_hold(ds, FTAG); 509248571Smm dsl_pool_rele(dp, tag); 510248571Smm 511208047Smm err = traverse_dataset(ds, fromtxg, TRAVERSE_PRE | TRAVERSE_PREFETCH, 512235222Smm backup_cb, dsp); 513168404Spjd 514235222Smm if (dsp->dsa_pending_op != PENDING_NONE) 515235222Smm if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) 516219089Spjd err = EINTR; 517219089Spjd 518248571Smm if (err != 0) { 519248571Smm if (err == EINTR && dsp->dsa_err != 0) 520235222Smm err = dsp->dsa_err; 521235222Smm goto out; 522168404Spjd } 523168404Spjd 524168404Spjd bzero(drr, sizeof (dmu_replay_record_t)); 525168404Spjd drr->drr_type = DRR_END; 526235222Smm drr->drr_u.drr_end.drr_checksum = dsp->dsa_zc; 527235222Smm drr->drr_u.drr_end.drr_toguid = dsp->dsa_toguid; 528168404Spjd 529235222Smm if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) { 530235222Smm err = dsp->dsa_err; 531235222Smm goto out; 532168404Spjd } 533168404Spjd 534235222Smmout: 535235222Smm mutex_enter(&ds->ds_sendstream_lock); 536235222Smm list_remove(&ds->ds_sendstreams, dsp); 537235222Smm mutex_exit(&ds->ds_sendstream_lock); 538235222Smm 539168404Spjd kmem_free(drr, sizeof (dmu_replay_record_t)); 540235222Smm kmem_free(dsp, sizeof (dmu_sendarg_t)); 541168404Spjd 542248571Smm dsl_dataset_long_rele(ds, FTAG); 543248571Smm dsl_dataset_rele(ds, tag); 544248571Smm 545235222Smm return (err); 546168404Spjd} 547168404Spjd 548228103Smmint 549248571Smmdmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap, 550248571Smm#ifdef illumos 551248571Smm int outfd, vnode_t *vp, offset_t *off) 552248571Smm#else 553248571Smm int outfd, struct file *fp, offset_t *off) 554248571Smm#endif 555228103Smm{ 556248571Smm dsl_pool_t *dp; 557248571Smm dsl_dataset_t *ds; 558248571Smm dsl_dataset_t *fromds = NULL; 559248571Smm int err; 560248571Smm 561248571Smm err = dsl_pool_hold(pool, FTAG, &dp); 562248571Smm if (err != 0) 563248571Smm return (err); 564248571Smm 565248571Smm err = dsl_dataset_hold_obj(dp, tosnap, FTAG, &ds); 566248571Smm if (err != 0) { 567248571Smm dsl_pool_rele(dp, FTAG); 568248571Smm return (err); 569248571Smm } 570248571Smm 571248571Smm if (fromsnap != 0) { 572248571Smm err = dsl_dataset_hold_obj(dp, fromsnap, FTAG, &fromds); 573248571Smm if (err != 0) { 574248571Smm dsl_dataset_rele(ds, FTAG); 575248571Smm dsl_pool_rele(dp, FTAG); 576248571Smm return (err); 577248571Smm } 578248571Smm } 579248571Smm 580248571Smm return (dmu_send_impl(FTAG, dp, ds, fromds, outfd, fp, off)); 581248571Smm} 582248571Smm 583248571Smmint 584248571Smmdmu_send(const char *tosnap, const char *fromsnap, 585248571Smm#ifdef illumos 586248571Smm int outfd, vnode_t *vp, offset_t *off) 587248571Smm#else 588248571Smm int outfd, struct file *fp, offset_t *off) 589248571Smm#endif 590248571Smm{ 591248571Smm dsl_pool_t *dp; 592248571Smm dsl_dataset_t *ds; 593248571Smm dsl_dataset_t *fromds = NULL; 594248571Smm int err; 595248571Smm 596248571Smm if (strchr(tosnap, '@') == NULL) 597248571Smm return (EINVAL); 598248571Smm if (fromsnap != NULL && strchr(fromsnap, '@') == NULL) 599248571Smm return (EINVAL); 600248571Smm 601248571Smm err = dsl_pool_hold(tosnap, FTAG, &dp); 602248571Smm if (err != 0) 603248571Smm return (err); 604248571Smm 605248571Smm err = dsl_dataset_hold(dp, tosnap, FTAG, &ds); 606248571Smm if (err != 0) { 607248571Smm dsl_pool_rele(dp, FTAG); 608248571Smm return (err); 609248571Smm } 610248571Smm 611248571Smm if (fromsnap != NULL) { 612248571Smm err = dsl_dataset_hold(dp, fromsnap, FTAG, &fromds); 613248571Smm if (err != 0) { 614248571Smm dsl_dataset_rele(ds, FTAG); 615248571Smm dsl_pool_rele(dp, FTAG); 616248571Smm return (err); 617248571Smm } 618248571Smm } 619248571Smm return (dmu_send_impl(FTAG, dp, ds, fromds, outfd, fp, off)); 620248571Smm} 621248571Smm 622248571Smmint 623248571Smmdmu_send_estimate(dsl_dataset_t *ds, dsl_dataset_t *fromds, uint64_t *sizep) 624248571Smm{ 625228103Smm dsl_pool_t *dp = ds->ds_dir->dd_pool; 626228103Smm int err; 627228103Smm uint64_t size; 628228103Smm 629248571Smm ASSERT(dsl_pool_config_held(dp)); 630248571Smm 631228103Smm /* tosnap must be a snapshot */ 632248571Smm if (!dsl_dataset_is_snapshot(ds)) 633228103Smm return (EINVAL); 634228103Smm 635248571Smm /* 636248571Smm * fromsnap must be an earlier snapshot from the same fs as tosnap, 637248571Smm * or the origin's fs. 638248571Smm */ 639248571Smm if (fromds != NULL && !dsl_dataset_is_before(ds, fromds)) 640228103Smm return (EXDEV); 641228103Smm 642228103Smm /* Get uncompressed size estimate of changed data. */ 643228103Smm if (fromds == NULL) { 644228103Smm size = ds->ds_phys->ds_uncompressed_bytes; 645228103Smm } else { 646228103Smm uint64_t used, comp; 647228103Smm err = dsl_dataset_space_written(fromds, ds, 648228103Smm &used, &comp, &size); 649248571Smm if (err != 0) 650228103Smm return (err); 651228103Smm } 652228103Smm 653228103Smm /* 654228103Smm * Assume that space (both on-disk and in-stream) is dominated by 655228103Smm * data. We will adjust for indirect blocks and the copies property, 656228103Smm * but ignore per-object space used (eg, dnodes and DRR_OBJECT records). 657228103Smm */ 658228103Smm 659228103Smm /* 660228103Smm * Subtract out approximate space used by indirect blocks. 661228103Smm * Assume most space is used by data blocks (non-indirect, non-dnode). 662228103Smm * Assume all blocks are recordsize. Assume ditto blocks and 663228103Smm * internal fragmentation counter out compression. 664228103Smm * 665228103Smm * Therefore, space used by indirect blocks is sizeof(blkptr_t) per 666228103Smm * block, which we observe in practice. 667228103Smm */ 668228103Smm uint64_t recordsize; 669248571Smm err = dsl_prop_get_int_ds(ds, "recordsize", &recordsize); 670248571Smm if (err != 0) 671228103Smm return (err); 672228103Smm size -= size / recordsize * sizeof (blkptr_t); 673228103Smm 674228103Smm /* Add in the space for the record associated with each block. */ 675228103Smm size += size / recordsize * sizeof (dmu_replay_record_t); 676228103Smm 677228103Smm *sizep = size; 678228103Smm 679228103Smm return (0); 680228103Smm} 681228103Smm 682248571Smmtypedef struct dmu_recv_begin_arg { 683248571Smm const char *drba_origin; 684248571Smm dmu_recv_cookie_t *drba_cookie; 685248571Smm cred_t *drba_cred; 686248571Smm} dmu_recv_begin_arg_t; 687168404Spjd 688168404Spjdstatic int 689248571Smmrecv_begin_check_existing_impl(dmu_recv_begin_arg_t *drba, dsl_dataset_t *ds, 690248571Smm uint64_t fromguid) 691168404Spjd{ 692185029Spjd uint64_t val; 693248571Smm int error; 694248571Smm dsl_pool_t *dp = ds->ds_dir->dd_pool; 695185029Spjd 696185029Spjd /* must not have any changes since most recent snapshot */ 697248571Smm if (!drba->drba_cookie->drc_force && 698248571Smm dsl_dataset_modified_since_lastsnap(ds)) 699185029Spjd return (ETXTBSY); 700185029Spjd 701248571Smm /* temporary clone name must not exist */ 702248571Smm error = zap_lookup(dp->dp_meta_objset, 703248571Smm ds->ds_dir->dd_phys->dd_child_dir_zapobj, recv_clone_name, 704248571Smm 8, 1, &val); 705248571Smm if (error != ENOENT) 706248571Smm return (error == 0 ? EBUSY : error); 707248571Smm 708219089Spjd /* new snapshot name must not exist */ 709248571Smm error = zap_lookup(dp->dp_meta_objset, 710248571Smm ds->ds_phys->ds_snapnames_zapobj, drba->drba_cookie->drc_tosnap, 711248571Smm 8, 1, &val); 712248571Smm if (error != ENOENT) 713248571Smm return (error == 0 ? EEXIST : error); 714168404Spjd 715248571Smm if (fromguid != 0) { 716219089Spjd /* if incremental, most recent snapshot must match fromguid */ 717219089Spjd if (ds->ds_prev == NULL) 718219089Spjd return (ENODEV); 719168404Spjd 720219089Spjd /* 721219089Spjd * most recent snapshot must match fromguid, or there are no 722219089Spjd * changes since the fromguid one 723219089Spjd */ 724248571Smm if (ds->ds_prev->ds_phys->ds_guid != fromguid) { 725219089Spjd uint64_t birth = ds->ds_prev->ds_phys->ds_bp.blk_birth; 726219089Spjd uint64_t obj = ds->ds_prev->ds_phys->ds_prev_snap_obj; 727219089Spjd while (obj != 0) { 728219089Spjd dsl_dataset_t *snap; 729248571Smm error = dsl_dataset_hold_obj(dp, obj, FTAG, 730248571Smm &snap); 731248571Smm if (error != 0) 732219089Spjd return (ENODEV); 733219089Spjd if (snap->ds_phys->ds_creation_txg < birth) { 734219089Spjd dsl_dataset_rele(snap, FTAG); 735219089Spjd return (ENODEV); 736219089Spjd } 737248571Smm if (snap->ds_phys->ds_guid == fromguid) { 738219089Spjd dsl_dataset_rele(snap, FTAG); 739219089Spjd break; /* it's ok */ 740219089Spjd } 741219089Spjd obj = snap->ds_phys->ds_prev_snap_obj; 742219089Spjd dsl_dataset_rele(snap, FTAG); 743219089Spjd } 744219089Spjd if (obj == 0) 745219089Spjd return (ENODEV); 746219089Spjd } 747219089Spjd } else { 748219089Spjd /* if full, most recent snapshot must be $ORIGIN */ 749219089Spjd if (ds->ds_phys->ds_prev_snap_txg >= TXG_INITIAL) 750219089Spjd return (ENODEV); 751219089Spjd } 752219089Spjd 753248571Smm return (0); 754168404Spjd 755168404Spjd} 756168404Spjd 757248571Smmstatic int 758248571Smmdmu_recv_begin_check(void *arg, dmu_tx_t *tx) 759248571Smm{ 760248571Smm dmu_recv_begin_arg_t *drba = arg; 761248571Smm dsl_pool_t *dp = dmu_tx_pool(tx); 762248571Smm struct drr_begin *drrb = drba->drba_cookie->drc_drrb; 763248571Smm uint64_t fromguid = drrb->drr_fromguid; 764248571Smm int flags = drrb->drr_flags; 765248571Smm int error; 766248571Smm dsl_dataset_t *ds; 767248571Smm const char *tofs = drba->drba_cookie->drc_tofs; 768248571Smm 769248571Smm /* already checked */ 770248571Smm ASSERT3U(drrb->drr_magic, ==, DMU_BACKUP_MAGIC); 771248571Smm 772248571Smm if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) == 773248571Smm DMU_COMPOUNDSTREAM || 774248571Smm drrb->drr_type >= DMU_OST_NUMTYPES || 775248571Smm ((flags & DRR_FLAG_CLONE) && drba->drba_origin == NULL)) 776248571Smm return (EINVAL); 777248571Smm 778248571Smm /* Verify pool version supports SA if SA_SPILL feature set */ 779248571Smm if ((DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) & 780248571Smm DMU_BACKUP_FEATURE_SA_SPILL) && 781248571Smm spa_version(dp->dp_spa) < SPA_VERSION_SA) { 782248571Smm return (ENOTSUP); 783248571Smm } 784248571Smm 785248571Smm error = dsl_dataset_hold(dp, tofs, FTAG, &ds); 786248571Smm if (error == 0) { 787248571Smm /* target fs already exists; recv into temp clone */ 788248571Smm 789248571Smm /* Can't recv a clone into an existing fs */ 790248571Smm if (flags & DRR_FLAG_CLONE) { 791248571Smm dsl_dataset_rele(ds, FTAG); 792248571Smm return (EINVAL); 793248571Smm } 794248571Smm 795248571Smm error = recv_begin_check_existing_impl(drba, ds, fromguid); 796248571Smm dsl_dataset_rele(ds, FTAG); 797248571Smm } else if (error == ENOENT) { 798248571Smm /* target fs does not exist; must be a full backup or clone */ 799248571Smm char buf[MAXNAMELEN]; 800248571Smm 801248571Smm /* 802248571Smm * If it's a non-clone incremental, we are missing the 803248571Smm * target fs, so fail the recv. 804248571Smm */ 805248571Smm if (fromguid != 0 && !(flags & DRR_FLAG_CLONE)) 806248571Smm return (ENOENT); 807248571Smm 808248571Smm /* Open the parent of tofs */ 809248571Smm ASSERT3U(strlen(tofs), <, MAXNAMELEN); 810248571Smm (void) strlcpy(buf, tofs, strrchr(tofs, '/') - tofs + 1); 811248571Smm error = dsl_dataset_hold(dp, buf, FTAG, &ds); 812248571Smm if (error != 0) 813248571Smm return (error); 814248571Smm 815248571Smm if (drba->drba_origin != NULL) { 816248571Smm dsl_dataset_t *origin; 817248571Smm error = dsl_dataset_hold(dp, drba->drba_origin, 818248571Smm FTAG, &origin); 819248571Smm if (error != 0) { 820248571Smm dsl_dataset_rele(ds, FTAG); 821248571Smm return (error); 822248571Smm } 823248571Smm if (!dsl_dataset_is_snapshot(origin)) { 824248571Smm dsl_dataset_rele(origin, FTAG); 825248571Smm dsl_dataset_rele(ds, FTAG); 826248571Smm return (EINVAL); 827248571Smm } 828248571Smm if (origin->ds_phys->ds_guid != fromguid) { 829248571Smm dsl_dataset_rele(origin, FTAG); 830248571Smm dsl_dataset_rele(ds, FTAG); 831248571Smm return (ENODEV); 832248571Smm } 833248571Smm dsl_dataset_rele(origin, FTAG); 834248571Smm } 835248571Smm dsl_dataset_rele(ds, FTAG); 836248571Smm error = 0; 837248571Smm } 838248571Smm return (error); 839248571Smm} 840248571Smm 841168404Spjdstatic void 842248571Smmdmu_recv_begin_sync(void *arg, dmu_tx_t *tx) 843168404Spjd{ 844248571Smm dmu_recv_begin_arg_t *drba = arg; 845248571Smm dsl_pool_t *dp = dmu_tx_pool(tx); 846248571Smm struct drr_begin *drrb = drba->drba_cookie->drc_drrb; 847248571Smm const char *tofs = drba->drba_cookie->drc_tofs; 848248571Smm dsl_dataset_t *ds, *newds; 849185029Spjd uint64_t dsobj; 850248571Smm int error; 851248571Smm uint64_t crflags; 852168404Spjd 853248571Smm crflags = (drrb->drr_flags & DRR_FLAG_CI_DATA) ? 854248571Smm DS_FLAG_CI_DATASET : 0; 855168404Spjd 856248571Smm error = dsl_dataset_hold(dp, tofs, FTAG, &ds); 857248571Smm if (error == 0) { 858248571Smm /* create temporary clone */ 859248571Smm dsobj = dsl_dataset_create_sync(ds->ds_dir, recv_clone_name, 860248571Smm ds->ds_prev, crflags, drba->drba_cred, tx); 861248571Smm dsl_dataset_rele(ds, FTAG); 862248571Smm } else { 863248571Smm dsl_dir_t *dd; 864248571Smm const char *tail; 865248571Smm dsl_dataset_t *origin = NULL; 866248571Smm 867248571Smm VERIFY0(dsl_dir_hold(dp, tofs, FTAG, &dd, &tail)); 868248571Smm 869248571Smm if (drba->drba_origin != NULL) { 870248571Smm VERIFY0(dsl_dataset_hold(dp, drba->drba_origin, 871248571Smm FTAG, &origin)); 872248571Smm } 873248571Smm 874248571Smm /* Create new dataset. */ 875248571Smm dsobj = dsl_dataset_create_sync(dd, 876248571Smm strrchr(tofs, '/') + 1, 877248571Smm origin, crflags, drba->drba_cred, tx); 878248571Smm if (origin != NULL) 879248571Smm dsl_dataset_rele(origin, FTAG); 880248571Smm dsl_dir_rele(dd, FTAG); 881248571Smm drba->drba_cookie->drc_newfs = B_TRUE; 882248571Smm } 883248571Smm VERIFY0(dsl_dataset_own_obj(dp, dsobj, dmu_recv_tag, &newds)); 884248571Smm 885248571Smm dmu_buf_will_dirty(newds->ds_dbuf, tx); 886248571Smm newds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; 887248571Smm 888219089Spjd /* 889219089Spjd * If we actually created a non-clone, we need to create the 890219089Spjd * objset in our new dataset. 891219089Spjd */ 892248571Smm if (BP_IS_HOLE(dsl_dataset_get_blkptr(newds))) { 893219089Spjd (void) dmu_objset_create_impl(dp->dp_spa, 894248571Smm newds, dsl_dataset_get_blkptr(newds), drrb->drr_type, tx); 895219089Spjd } 896168404Spjd 897248571Smm drba->drba_cookie->drc_ds = newds; 898185029Spjd 899248571Smm spa_history_log_internal_ds(newds, "receive", tx, ""); 900168404Spjd} 901168404Spjd 902185029Spjd/* 903185029Spjd * NB: callers *MUST* call dmu_recv_stream() if dmu_recv_begin() 904185029Spjd * succeeds; otherwise we will leak the holds on the datasets. 905185029Spjd */ 906185029Spjdint 907248571Smmdmu_recv_begin(char *tofs, char *tosnap, struct drr_begin *drrb, 908248571Smm boolean_t force, char *origin, dmu_recv_cookie_t *drc) 909168404Spjd{ 910248571Smm dmu_recv_begin_arg_t drba = { 0 }; 911248571Smm dmu_replay_record_t *drr; 912168404Spjd 913185029Spjd bzero(drc, sizeof (dmu_recv_cookie_t)); 914185029Spjd drc->drc_drrb = drrb; 915185029Spjd drc->drc_tosnap = tosnap; 916248571Smm drc->drc_tofs = tofs; 917185029Spjd drc->drc_force = force; 918168404Spjd 919248571Smm if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) 920248571Smm drc->drc_byteswap = B_TRUE; 921248571Smm else if (drrb->drr_magic != DMU_BACKUP_MAGIC) 922248571Smm return (EINVAL); 923168404Spjd 924248571Smm drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP); 925248571Smm drr->drr_type = DRR_BEGIN; 926248571Smm drr->drr_u.drr_begin = *drc->drc_drrb; 927248571Smm if (drc->drc_byteswap) { 928248571Smm fletcher_4_incremental_byteswap(drr, 929248571Smm sizeof (dmu_replay_record_t), &drc->drc_cksum); 930248571Smm } else { 931248571Smm fletcher_4_incremental_native(drr, 932248571Smm sizeof (dmu_replay_record_t), &drc->drc_cksum); 933248571Smm } 934248571Smm kmem_free(drr, sizeof (dmu_replay_record_t)); 935219089Spjd 936248571Smm if (drc->drc_byteswap) { 937248571Smm drrb->drr_magic = BSWAP_64(drrb->drr_magic); 938248571Smm drrb->drr_versioninfo = BSWAP_64(drrb->drr_versioninfo); 939248571Smm drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time); 940248571Smm drrb->drr_type = BSWAP_32(drrb->drr_type); 941248571Smm drrb->drr_toguid = BSWAP_64(drrb->drr_toguid); 942248571Smm drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid); 943248571Smm } 944168404Spjd 945248571Smm drba.drba_origin = origin; 946248571Smm drba.drba_cookie = drc; 947248571Smm drba.drba_cred = CRED(); 948219089Spjd 949248571Smm return (dsl_sync_task(tofs, dmu_recv_begin_check, dmu_recv_begin_sync, 950248571Smm &drba, 5)); 951168404Spjd} 952168404Spjd 953185029Spjdstruct restorearg { 954185029Spjd int err; 955248571Smm boolean_t byteswap; 956185029Spjd kthread_t *td; 957185029Spjd struct file *fp; 958185029Spjd char *buf; 959185029Spjd uint64_t voff; 960185029Spjd int bufsize; /* amount of memory allocated for buf */ 961185029Spjd zio_cksum_t cksum; 962219089Spjd avl_tree_t *guid_to_ds_map; 963185029Spjd}; 964185029Spjd 965219089Spjdtypedef struct guid_map_entry { 966219089Spjd uint64_t guid; 967219089Spjd dsl_dataset_t *gme_ds; 968219089Spjd avl_node_t avlnode; 969219089Spjd} guid_map_entry_t; 970219089Spjd 971168404Spjdstatic int 972219089Spjdguid_compare(const void *arg1, const void *arg2) 973168404Spjd{ 974219089Spjd const guid_map_entry_t *gmep1 = arg1; 975219089Spjd const guid_map_entry_t *gmep2 = arg2; 976219089Spjd 977219089Spjd if (gmep1->guid < gmep2->guid) 978219089Spjd return (-1); 979219089Spjd else if (gmep1->guid > gmep2->guid) 980219089Spjd return (1); 981219089Spjd return (0); 982219089Spjd} 983219089Spjd 984219089Spjdstatic void 985219089Spjdfree_guid_map_onexit(void *arg) 986219089Spjd{ 987219089Spjd avl_tree_t *ca = arg; 988219089Spjd void *cookie = NULL; 989219089Spjd guid_map_entry_t *gmep; 990219089Spjd 991219089Spjd while ((gmep = avl_destroy_nodes(ca, &cookie)) != NULL) { 992248571Smm dsl_dataset_long_rele(gmep->gme_ds, gmep); 993219089Spjd kmem_free(gmep, sizeof (guid_map_entry_t)); 994219089Spjd } 995219089Spjd avl_destroy(ca); 996219089Spjd kmem_free(ca, sizeof (avl_tree_t)); 997219089Spjd} 998219089Spjd 999219089Spjdstatic int 1000219089Spjdrestore_bytes(struct restorearg *ra, void *buf, int len, off_t off, ssize_t *resid) 1001219089Spjd{ 1002168404Spjd struct uio auio; 1003168404Spjd struct iovec aiov; 1004168404Spjd int error; 1005168404Spjd 1006168404Spjd aiov.iov_base = buf; 1007168404Spjd aiov.iov_len = len; 1008168404Spjd auio.uio_iov = &aiov; 1009168404Spjd auio.uio_iovcnt = 1; 1010168404Spjd auio.uio_resid = len; 1011169170Spjd auio.uio_segflg = UIO_SYSSPACE; 1012168404Spjd auio.uio_rw = UIO_READ; 1013168404Spjd auio.uio_offset = off; 1014168404Spjd auio.uio_td = ra->td; 1015168404Spjd#ifdef _KERNEL 1016168404Spjd error = fo_read(ra->fp, &auio, ra->td->td_ucred, FOF_OFFSET, ra->td); 1017168404Spjd#else 1018168404Spjd fprintf(stderr, "%s: returning EOPNOTSUPP\n", __func__); 1019168404Spjd error = EOPNOTSUPP; 1020168404Spjd#endif 1021168404Spjd *resid = auio.uio_resid; 1022168404Spjd return (error); 1023168404Spjd} 1024168404Spjd 1025168404Spjdstatic void * 1026168404Spjdrestore_read(struct restorearg *ra, int len) 1027168404Spjd{ 1028168404Spjd void *rv; 1029185029Spjd int done = 0; 1030168404Spjd 1031168404Spjd /* some things will require 8-byte alignment, so everything must */ 1032240415Smm ASSERT0(len % 8); 1033168404Spjd 1034185029Spjd while (done < len) { 1035219089Spjd ssize_t resid; 1036168404Spjd 1037185029Spjd ra->err = restore_bytes(ra, (caddr_t)ra->buf + done, 1038185029Spjd len - done, ra->voff, &resid); 1039168404Spjd 1040185029Spjd if (resid == len - done) 1041168404Spjd ra->err = EINVAL; 1042185029Spjd ra->voff += len - done - resid; 1043185029Spjd done = len - resid; 1044248571Smm if (ra->err != 0) 1045168404Spjd return (NULL); 1046168404Spjd } 1047168404Spjd 1048185029Spjd ASSERT3U(done, ==, len); 1049185029Spjd rv = ra->buf; 1050168404Spjd if (ra->byteswap) 1051185029Spjd fletcher_4_incremental_byteswap(rv, len, &ra->cksum); 1052168404Spjd else 1053185029Spjd fletcher_4_incremental_native(rv, len, &ra->cksum); 1054168404Spjd return (rv); 1055168404Spjd} 1056168404Spjd 1057168404Spjdstatic void 1058168404Spjdbackup_byteswap(dmu_replay_record_t *drr) 1059168404Spjd{ 1060168404Spjd#define DO64(X) (drr->drr_u.X = BSWAP_64(drr->drr_u.X)) 1061168404Spjd#define DO32(X) (drr->drr_u.X = BSWAP_32(drr->drr_u.X)) 1062168404Spjd drr->drr_type = BSWAP_32(drr->drr_type); 1063185029Spjd drr->drr_payloadlen = BSWAP_32(drr->drr_payloadlen); 1064168404Spjd switch (drr->drr_type) { 1065168404Spjd case DRR_BEGIN: 1066168404Spjd DO64(drr_begin.drr_magic); 1067219089Spjd DO64(drr_begin.drr_versioninfo); 1068168404Spjd DO64(drr_begin.drr_creation_time); 1069168404Spjd DO32(drr_begin.drr_type); 1070185029Spjd DO32(drr_begin.drr_flags); 1071168404Spjd DO64(drr_begin.drr_toguid); 1072168404Spjd DO64(drr_begin.drr_fromguid); 1073168404Spjd break; 1074168404Spjd case DRR_OBJECT: 1075168404Spjd DO64(drr_object.drr_object); 1076168404Spjd /* DO64(drr_object.drr_allocation_txg); */ 1077168404Spjd DO32(drr_object.drr_type); 1078168404Spjd DO32(drr_object.drr_bonustype); 1079168404Spjd DO32(drr_object.drr_blksz); 1080168404Spjd DO32(drr_object.drr_bonuslen); 1081219089Spjd DO64(drr_object.drr_toguid); 1082168404Spjd break; 1083168404Spjd case DRR_FREEOBJECTS: 1084168404Spjd DO64(drr_freeobjects.drr_firstobj); 1085168404Spjd DO64(drr_freeobjects.drr_numobjs); 1086219089Spjd DO64(drr_freeobjects.drr_toguid); 1087168404Spjd break; 1088168404Spjd case DRR_WRITE: 1089168404Spjd DO64(drr_write.drr_object); 1090168404Spjd DO32(drr_write.drr_type); 1091168404Spjd DO64(drr_write.drr_offset); 1092168404Spjd DO64(drr_write.drr_length); 1093219089Spjd DO64(drr_write.drr_toguid); 1094219089Spjd DO64(drr_write.drr_key.ddk_cksum.zc_word[0]); 1095219089Spjd DO64(drr_write.drr_key.ddk_cksum.zc_word[1]); 1096219089Spjd DO64(drr_write.drr_key.ddk_cksum.zc_word[2]); 1097219089Spjd DO64(drr_write.drr_key.ddk_cksum.zc_word[3]); 1098219089Spjd DO64(drr_write.drr_key.ddk_prop); 1099168404Spjd break; 1100219089Spjd case DRR_WRITE_BYREF: 1101219089Spjd DO64(drr_write_byref.drr_object); 1102219089Spjd DO64(drr_write_byref.drr_offset); 1103219089Spjd DO64(drr_write_byref.drr_length); 1104219089Spjd DO64(drr_write_byref.drr_toguid); 1105219089Spjd DO64(drr_write_byref.drr_refguid); 1106219089Spjd DO64(drr_write_byref.drr_refobject); 1107219089Spjd DO64(drr_write_byref.drr_refoffset); 1108219089Spjd DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[0]); 1109219089Spjd DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[1]); 1110219089Spjd DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[2]); 1111219089Spjd DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[3]); 1112219089Spjd DO64(drr_write_byref.drr_key.ddk_prop); 1113219089Spjd break; 1114168404Spjd case DRR_FREE: 1115168404Spjd DO64(drr_free.drr_object); 1116168404Spjd DO64(drr_free.drr_offset); 1117168404Spjd DO64(drr_free.drr_length); 1118219089Spjd DO64(drr_free.drr_toguid); 1119168404Spjd break; 1120219089Spjd case DRR_SPILL: 1121219089Spjd DO64(drr_spill.drr_object); 1122219089Spjd DO64(drr_spill.drr_length); 1123219089Spjd DO64(drr_spill.drr_toguid); 1124219089Spjd break; 1125168404Spjd case DRR_END: 1126168404Spjd DO64(drr_end.drr_checksum.zc_word[0]); 1127168404Spjd DO64(drr_end.drr_checksum.zc_word[1]); 1128168404Spjd DO64(drr_end.drr_checksum.zc_word[2]); 1129168404Spjd DO64(drr_end.drr_checksum.zc_word[3]); 1130219089Spjd DO64(drr_end.drr_toguid); 1131168404Spjd break; 1132168404Spjd } 1133168404Spjd#undef DO64 1134168404Spjd#undef DO32 1135168404Spjd} 1136168404Spjd 1137168404Spjdstatic int 1138168404Spjdrestore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro) 1139168404Spjd{ 1140168404Spjd int err; 1141168404Spjd dmu_tx_t *tx; 1142200727Sdelphij void *data = NULL; 1143168404Spjd 1144168404Spjd if (drro->drr_type == DMU_OT_NONE || 1145236884Smm !DMU_OT_IS_VALID(drro->drr_type) || 1146236884Smm !DMU_OT_IS_VALID(drro->drr_bonustype) || 1147219089Spjd drro->drr_checksumtype >= ZIO_CHECKSUM_FUNCTIONS || 1148168404Spjd drro->drr_compress >= ZIO_COMPRESS_FUNCTIONS || 1149168404Spjd P2PHASE(drro->drr_blksz, SPA_MINBLOCKSIZE) || 1150168404Spjd drro->drr_blksz < SPA_MINBLOCKSIZE || 1151168404Spjd drro->drr_blksz > SPA_MAXBLOCKSIZE || 1152168404Spjd drro->drr_bonuslen > DN_MAX_BONUSLEN) { 1153168404Spjd return (EINVAL); 1154168404Spjd } 1155168404Spjd 1156200726Sdelphij err = dmu_object_info(os, drro->drr_object, NULL); 1157168404Spjd 1158200726Sdelphij if (err != 0 && err != ENOENT) 1159200726Sdelphij return (EINVAL); 1160200726Sdelphij 1161201756Sdelphij if (drro->drr_bonuslen) { 1162201756Sdelphij data = restore_read(ra, P2ROUNDUP(drro->drr_bonuslen, 8)); 1163248571Smm if (ra->err != 0) 1164201756Sdelphij return (ra->err); 1165201756Sdelphij } 1166201756Sdelphij 1167168404Spjd if (err == ENOENT) { 1168168404Spjd /* currently free, want to be allocated */ 1169200726Sdelphij tx = dmu_tx_create(os); 1170168404Spjd dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); 1171168404Spjd err = dmu_tx_assign(tx, TXG_WAIT); 1172248571Smm if (err != 0) { 1173168404Spjd dmu_tx_abort(tx); 1174168404Spjd return (err); 1175168404Spjd } 1176168404Spjd err = dmu_object_claim(os, drro->drr_object, 1177168404Spjd drro->drr_type, drro->drr_blksz, 1178168404Spjd drro->drr_bonustype, drro->drr_bonuslen, tx); 1179200726Sdelphij dmu_tx_commit(tx); 1180168404Spjd } else { 1181168404Spjd /* currently allocated, want to be allocated */ 1182168404Spjd err = dmu_object_reclaim(os, drro->drr_object, 1183168404Spjd drro->drr_type, drro->drr_blksz, 1184200726Sdelphij drro->drr_bonustype, drro->drr_bonuslen); 1185168404Spjd } 1186248571Smm if (err != 0) { 1187200726Sdelphij return (EINVAL); 1188219089Spjd } 1189200726Sdelphij 1190200726Sdelphij tx = dmu_tx_create(os); 1191200726Sdelphij dmu_tx_hold_bonus(tx, drro->drr_object); 1192200726Sdelphij err = dmu_tx_assign(tx, TXG_WAIT); 1193248571Smm if (err != 0) { 1194200726Sdelphij dmu_tx_abort(tx); 1195200726Sdelphij return (err); 1196168404Spjd } 1197168404Spjd 1198219089Spjd dmu_object_set_checksum(os, drro->drr_object, drro->drr_checksumtype, 1199219089Spjd tx); 1200168404Spjd dmu_object_set_compress(os, drro->drr_object, drro->drr_compress, tx); 1201168404Spjd 1202200727Sdelphij if (data != NULL) { 1203168404Spjd dmu_buf_t *db; 1204200727Sdelphij 1205168404Spjd VERIFY(0 == dmu_bonus_hold(os, drro->drr_object, FTAG, &db)); 1206168404Spjd dmu_buf_will_dirty(db, tx); 1207168404Spjd 1208185029Spjd ASSERT3U(db->db_size, >=, drro->drr_bonuslen); 1209185029Spjd bcopy(data, db->db_data, drro->drr_bonuslen); 1210168404Spjd if (ra->byteswap) { 1211236884Smm dmu_object_byteswap_t byteswap = 1212236884Smm DMU_OT_BYTESWAP(drro->drr_bonustype); 1213236884Smm dmu_ot_byteswap[byteswap].ob_func(db->db_data, 1214168404Spjd drro->drr_bonuslen); 1215168404Spjd } 1216168404Spjd dmu_buf_rele(db, FTAG); 1217168404Spjd } 1218168404Spjd dmu_tx_commit(tx); 1219168404Spjd return (0); 1220168404Spjd} 1221168404Spjd 1222168404Spjd/* ARGSUSED */ 1223168404Spjdstatic int 1224168404Spjdrestore_freeobjects(struct restorearg *ra, objset_t *os, 1225168404Spjd struct drr_freeobjects *drrfo) 1226168404Spjd{ 1227168404Spjd uint64_t obj; 1228168404Spjd 1229168404Spjd if (drrfo->drr_firstobj + drrfo->drr_numobjs < drrfo->drr_firstobj) 1230168404Spjd return (EINVAL); 1231168404Spjd 1232168404Spjd for (obj = drrfo->drr_firstobj; 1233168404Spjd obj < drrfo->drr_firstobj + drrfo->drr_numobjs; 1234168404Spjd (void) dmu_object_next(os, &obj, FALSE, 0)) { 1235168404Spjd int err; 1236168404Spjd 1237168404Spjd if (dmu_object_info(os, obj, NULL) != 0) 1238168404Spjd continue; 1239168404Spjd 1240185029Spjd err = dmu_free_object(os, obj); 1241248571Smm if (err != 0) 1242168404Spjd return (err); 1243168404Spjd } 1244168404Spjd return (0); 1245168404Spjd} 1246168404Spjd 1247168404Spjdstatic int 1248168404Spjdrestore_write(struct restorearg *ra, objset_t *os, 1249168404Spjd struct drr_write *drrw) 1250168404Spjd{ 1251168404Spjd dmu_tx_t *tx; 1252168404Spjd void *data; 1253168404Spjd int err; 1254168404Spjd 1255168404Spjd if (drrw->drr_offset + drrw->drr_length < drrw->drr_offset || 1256236884Smm !DMU_OT_IS_VALID(drrw->drr_type)) 1257168404Spjd return (EINVAL); 1258168404Spjd 1259168404Spjd data = restore_read(ra, drrw->drr_length); 1260168404Spjd if (data == NULL) 1261168404Spjd return (ra->err); 1262168404Spjd 1263168404Spjd if (dmu_object_info(os, drrw->drr_object, NULL) != 0) 1264168404Spjd return (EINVAL); 1265168404Spjd 1266168404Spjd tx = dmu_tx_create(os); 1267168404Spjd 1268168404Spjd dmu_tx_hold_write(tx, drrw->drr_object, 1269168404Spjd drrw->drr_offset, drrw->drr_length); 1270168404Spjd err = dmu_tx_assign(tx, TXG_WAIT); 1271248571Smm if (err != 0) { 1272168404Spjd dmu_tx_abort(tx); 1273168404Spjd return (err); 1274168404Spjd } 1275236884Smm if (ra->byteswap) { 1276236884Smm dmu_object_byteswap_t byteswap = 1277236884Smm DMU_OT_BYTESWAP(drrw->drr_type); 1278236884Smm dmu_ot_byteswap[byteswap].ob_func(data, drrw->drr_length); 1279236884Smm } 1280168404Spjd dmu_write(os, drrw->drr_object, 1281168404Spjd drrw->drr_offset, drrw->drr_length, data, tx); 1282168404Spjd dmu_tx_commit(tx); 1283168404Spjd return (0); 1284168404Spjd} 1285168404Spjd 1286219089Spjd/* 1287219089Spjd * Handle a DRR_WRITE_BYREF record. This record is used in dedup'ed 1288219089Spjd * streams to refer to a copy of the data that is already on the 1289219089Spjd * system because it came in earlier in the stream. This function 1290219089Spjd * finds the earlier copy of the data, and uses that copy instead of 1291219089Spjd * data from the stream to fulfill this write. 1292219089Spjd */ 1293219089Spjdstatic int 1294219089Spjdrestore_write_byref(struct restorearg *ra, objset_t *os, 1295219089Spjd struct drr_write_byref *drrwbr) 1296219089Spjd{ 1297219089Spjd dmu_tx_t *tx; 1298219089Spjd int err; 1299219089Spjd guid_map_entry_t gmesrch; 1300219089Spjd guid_map_entry_t *gmep; 1301219089Spjd avl_index_t where; 1302219089Spjd objset_t *ref_os = NULL; 1303219089Spjd dmu_buf_t *dbp; 1304219089Spjd 1305219089Spjd if (drrwbr->drr_offset + drrwbr->drr_length < drrwbr->drr_offset) 1306219089Spjd return (EINVAL); 1307219089Spjd 1308219089Spjd /* 1309219089Spjd * If the GUID of the referenced dataset is different from the 1310219089Spjd * GUID of the target dataset, find the referenced dataset. 1311219089Spjd */ 1312219089Spjd if (drrwbr->drr_toguid != drrwbr->drr_refguid) { 1313219089Spjd gmesrch.guid = drrwbr->drr_refguid; 1314219089Spjd if ((gmep = avl_find(ra->guid_to_ds_map, &gmesrch, 1315219089Spjd &where)) == NULL) { 1316219089Spjd return (EINVAL); 1317219089Spjd } 1318219089Spjd if (dmu_objset_from_ds(gmep->gme_ds, &ref_os)) 1319219089Spjd return (EINVAL); 1320219089Spjd } else { 1321219089Spjd ref_os = os; 1322219089Spjd } 1323219089Spjd 1324219089Spjd if (err = dmu_buf_hold(ref_os, drrwbr->drr_refobject, 1325219089Spjd drrwbr->drr_refoffset, FTAG, &dbp, DMU_READ_PREFETCH)) 1326219089Spjd return (err); 1327219089Spjd 1328219089Spjd tx = dmu_tx_create(os); 1329219089Spjd 1330219089Spjd dmu_tx_hold_write(tx, drrwbr->drr_object, 1331219089Spjd drrwbr->drr_offset, drrwbr->drr_length); 1332219089Spjd err = dmu_tx_assign(tx, TXG_WAIT); 1333248571Smm if (err != 0) { 1334219089Spjd dmu_tx_abort(tx); 1335219089Spjd return (err); 1336219089Spjd } 1337219089Spjd dmu_write(os, drrwbr->drr_object, 1338219089Spjd drrwbr->drr_offset, drrwbr->drr_length, dbp->db_data, tx); 1339219089Spjd dmu_buf_rele(dbp, FTAG); 1340219089Spjd dmu_tx_commit(tx); 1341219089Spjd return (0); 1342219089Spjd} 1343219089Spjd 1344219089Spjdstatic int 1345219089Spjdrestore_spill(struct restorearg *ra, objset_t *os, struct drr_spill *drrs) 1346219089Spjd{ 1347219089Spjd dmu_tx_t *tx; 1348219089Spjd void *data; 1349219089Spjd dmu_buf_t *db, *db_spill; 1350219089Spjd int err; 1351219089Spjd 1352219089Spjd if (drrs->drr_length < SPA_MINBLOCKSIZE || 1353219089Spjd drrs->drr_length > SPA_MAXBLOCKSIZE) 1354219089Spjd return (EINVAL); 1355219089Spjd 1356219089Spjd data = restore_read(ra, drrs->drr_length); 1357219089Spjd if (data == NULL) 1358219089Spjd return (ra->err); 1359219089Spjd 1360219089Spjd if (dmu_object_info(os, drrs->drr_object, NULL) != 0) 1361219089Spjd return (EINVAL); 1362219089Spjd 1363219089Spjd VERIFY(0 == dmu_bonus_hold(os, drrs->drr_object, FTAG, &db)); 1364219089Spjd if ((err = dmu_spill_hold_by_bonus(db, FTAG, &db_spill)) != 0) { 1365219089Spjd dmu_buf_rele(db, FTAG); 1366219089Spjd return (err); 1367219089Spjd } 1368219089Spjd 1369219089Spjd tx = dmu_tx_create(os); 1370219089Spjd 1371219089Spjd dmu_tx_hold_spill(tx, db->db_object); 1372219089Spjd 1373219089Spjd err = dmu_tx_assign(tx, TXG_WAIT); 1374248571Smm if (err != 0) { 1375219089Spjd dmu_buf_rele(db, FTAG); 1376219089Spjd dmu_buf_rele(db_spill, FTAG); 1377219089Spjd dmu_tx_abort(tx); 1378219089Spjd return (err); 1379219089Spjd } 1380219089Spjd dmu_buf_will_dirty(db_spill, tx); 1381219089Spjd 1382219089Spjd if (db_spill->db_size < drrs->drr_length) 1383219089Spjd VERIFY(0 == dbuf_spill_set_blksz(db_spill, 1384219089Spjd drrs->drr_length, tx)); 1385219089Spjd bcopy(data, db_spill->db_data, drrs->drr_length); 1386219089Spjd 1387219089Spjd dmu_buf_rele(db, FTAG); 1388219089Spjd dmu_buf_rele(db_spill, FTAG); 1389219089Spjd 1390219089Spjd dmu_tx_commit(tx); 1391219089Spjd return (0); 1392219089Spjd} 1393219089Spjd 1394168404Spjd/* ARGSUSED */ 1395168404Spjdstatic int 1396168404Spjdrestore_free(struct restorearg *ra, objset_t *os, 1397168404Spjd struct drr_free *drrf) 1398168404Spjd{ 1399168404Spjd int err; 1400168404Spjd 1401168404Spjd if (drrf->drr_length != -1ULL && 1402168404Spjd drrf->drr_offset + drrf->drr_length < drrf->drr_offset) 1403168404Spjd return (EINVAL); 1404168404Spjd 1405168404Spjd if (dmu_object_info(os, drrf->drr_object, NULL) != 0) 1406168404Spjd return (EINVAL); 1407168404Spjd 1408185029Spjd err = dmu_free_long_range(os, drrf->drr_object, 1409168404Spjd drrf->drr_offset, drrf->drr_length); 1410168404Spjd return (err); 1411168404Spjd} 1412168404Spjd 1413248571Smm/* used to destroy the drc_ds on error */ 1414248571Smmstatic void 1415248571Smmdmu_recv_cleanup_ds(dmu_recv_cookie_t *drc) 1416248571Smm{ 1417248571Smm char name[MAXNAMELEN]; 1418248571Smm dsl_dataset_name(drc->drc_ds, name); 1419248571Smm dsl_dataset_disown(drc->drc_ds, dmu_recv_tag); 1420248571Smm (void) dsl_destroy_head(name); 1421248571Smm} 1422248571Smm 1423185029Spjd/* 1424185029Spjd * NB: callers *must* call dmu_recv_end() if this succeeds. 1425185029Spjd */ 1426168404Spjdint 1427219089Spjddmu_recv_stream(dmu_recv_cookie_t *drc, struct file *fp, offset_t *voffp, 1428219089Spjd int cleanup_fd, uint64_t *action_handlep) 1429168404Spjd{ 1430185029Spjd struct restorearg ra = { 0 }; 1431168404Spjd dmu_replay_record_t *drr; 1432185029Spjd objset_t *os; 1433185029Spjd zio_cksum_t pcksum; 1434219089Spjd int featureflags; 1435168404Spjd 1436248571Smm ra.byteswap = drc->drc_byteswap; 1437248571Smm ra.cksum = drc->drc_cksum; 1438219089Spjd ra.td = curthread; 1439185029Spjd ra.fp = fp; 1440185029Spjd ra.voff = *voffp; 1441185029Spjd ra.bufsize = 1<<20; 1442185029Spjd ra.buf = kmem_alloc(ra.bufsize, KM_SLEEP); 1443168404Spjd 1444185029Spjd /* these were verified in dmu_recv_begin */ 1445248571Smm ASSERT3U(DMU_GET_STREAM_HDRTYPE(drc->drc_drrb->drr_versioninfo), ==, 1446219089Spjd DMU_SUBSTREAM); 1447248571Smm ASSERT3U(drc->drc_drrb->drr_type, <, DMU_OST_NUMTYPES); 1448168404Spjd 1449168404Spjd /* 1450168404Spjd * Open the objset we are modifying. 1451168404Spjd */ 1452248571Smm VERIFY0(dmu_objset_from_ds(drc->drc_ds, &os)); 1453168404Spjd 1454248571Smm ASSERT(drc->drc_ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT); 1455168404Spjd 1456219089Spjd featureflags = DMU_GET_FEATUREFLAGS(drc->drc_drrb->drr_versioninfo); 1457219089Spjd 1458219089Spjd /* if this stream is dedup'ed, set up the avl tree for guid mapping */ 1459219089Spjd if (featureflags & DMU_BACKUP_FEATURE_DEDUP) { 1460219089Spjd minor_t minor; 1461219089Spjd 1462219089Spjd if (cleanup_fd == -1) { 1463219089Spjd ra.err = EBADF; 1464219089Spjd goto out; 1465219089Spjd } 1466219089Spjd ra.err = zfs_onexit_fd_hold(cleanup_fd, &minor); 1467248571Smm if (ra.err != 0) { 1468219089Spjd cleanup_fd = -1; 1469219089Spjd goto out; 1470219089Spjd } 1471219089Spjd 1472219089Spjd if (*action_handlep == 0) { 1473219089Spjd ra.guid_to_ds_map = 1474219089Spjd kmem_alloc(sizeof (avl_tree_t), KM_SLEEP); 1475219089Spjd avl_create(ra.guid_to_ds_map, guid_compare, 1476219089Spjd sizeof (guid_map_entry_t), 1477219089Spjd offsetof(guid_map_entry_t, avlnode)); 1478219089Spjd ra.err = zfs_onexit_add_cb(minor, 1479219089Spjd free_guid_map_onexit, ra.guid_to_ds_map, 1480219089Spjd action_handlep); 1481248571Smm if (ra.err != 0) 1482219089Spjd goto out; 1483219089Spjd } else { 1484219089Spjd ra.err = zfs_onexit_cb_data(minor, *action_handlep, 1485219089Spjd (void **)&ra.guid_to_ds_map); 1486248571Smm if (ra.err != 0) 1487219089Spjd goto out; 1488219089Spjd } 1489221263Smm 1490221263Smm drc->drc_guid_to_ds_map = ra.guid_to_ds_map; 1491219089Spjd } 1492219089Spjd 1493168404Spjd /* 1494168404Spjd * Read records and process them. 1495168404Spjd */ 1496185029Spjd pcksum = ra.cksum; 1497168404Spjd while (ra.err == 0 && 1498168404Spjd NULL != (drr = restore_read(&ra, sizeof (*drr)))) { 1499185029Spjd if (issig(JUSTLOOKING) && issig(FORREAL)) { 1500168404Spjd ra.err = EINTR; 1501168404Spjd goto out; 1502168404Spjd } 1503168404Spjd 1504168404Spjd if (ra.byteswap) 1505168404Spjd backup_byteswap(drr); 1506168404Spjd 1507168404Spjd switch (drr->drr_type) { 1508168404Spjd case DRR_OBJECT: 1509168404Spjd { 1510168404Spjd /* 1511168404Spjd * We need to make a copy of the record header, 1512168404Spjd * because restore_{object,write} may need to 1513168404Spjd * restore_read(), which will invalidate drr. 1514168404Spjd */ 1515168404Spjd struct drr_object drro = drr->drr_u.drr_object; 1516168404Spjd ra.err = restore_object(&ra, os, &drro); 1517168404Spjd break; 1518168404Spjd } 1519168404Spjd case DRR_FREEOBJECTS: 1520168404Spjd { 1521168404Spjd struct drr_freeobjects drrfo = 1522168404Spjd drr->drr_u.drr_freeobjects; 1523168404Spjd ra.err = restore_freeobjects(&ra, os, &drrfo); 1524168404Spjd break; 1525168404Spjd } 1526168404Spjd case DRR_WRITE: 1527168404Spjd { 1528168404Spjd struct drr_write drrw = drr->drr_u.drr_write; 1529168404Spjd ra.err = restore_write(&ra, os, &drrw); 1530168404Spjd break; 1531168404Spjd } 1532219089Spjd case DRR_WRITE_BYREF: 1533219089Spjd { 1534219089Spjd struct drr_write_byref drrwbr = 1535219089Spjd drr->drr_u.drr_write_byref; 1536219089Spjd ra.err = restore_write_byref(&ra, os, &drrwbr); 1537219089Spjd break; 1538219089Spjd } 1539168404Spjd case DRR_FREE: 1540168404Spjd { 1541168404Spjd struct drr_free drrf = drr->drr_u.drr_free; 1542168404Spjd ra.err = restore_free(&ra, os, &drrf); 1543168404Spjd break; 1544168404Spjd } 1545168404Spjd case DRR_END: 1546168404Spjd { 1547168404Spjd struct drr_end drre = drr->drr_u.drr_end; 1548168404Spjd /* 1549168404Spjd * We compare against the *previous* checksum 1550168404Spjd * value, because the stored checksum is of 1551168404Spjd * everything before the DRR_END record. 1552168404Spjd */ 1553185029Spjd if (!ZIO_CHECKSUM_EQUAL(drre.drr_checksum, pcksum)) 1554168404Spjd ra.err = ECKSUM; 1555168404Spjd goto out; 1556168404Spjd } 1557219089Spjd case DRR_SPILL: 1558219089Spjd { 1559219089Spjd struct drr_spill drrs = drr->drr_u.drr_spill; 1560219089Spjd ra.err = restore_spill(&ra, os, &drrs); 1561219089Spjd break; 1562219089Spjd } 1563168404Spjd default: 1564168404Spjd ra.err = EINVAL; 1565168404Spjd goto out; 1566168404Spjd } 1567185029Spjd pcksum = ra.cksum; 1568168404Spjd } 1569185029Spjd ASSERT(ra.err != 0); 1570168404Spjd 1571168404Spjdout: 1572219089Spjd if ((featureflags & DMU_BACKUP_FEATURE_DEDUP) && (cleanup_fd != -1)) 1573219089Spjd zfs_onexit_fd_rele(cleanup_fd); 1574168404Spjd 1575185029Spjd if (ra.err != 0) { 1576168404Spjd /* 1577219089Spjd * destroy what we created, so we don't leave it in the 1578219089Spjd * inconsistent restoring state. 1579168404Spjd */ 1580248571Smm dmu_recv_cleanup_ds(drc); 1581168404Spjd } 1582168404Spjd 1583168404Spjd kmem_free(ra.buf, ra.bufsize); 1584185029Spjd *voffp = ra.voff; 1585168404Spjd return (ra.err); 1586168404Spjd} 1587185029Spjd 1588185029Spjdstatic int 1589248571Smmdmu_recv_end_check(void *arg, dmu_tx_t *tx) 1590185029Spjd{ 1591248571Smm dmu_recv_cookie_t *drc = arg; 1592248571Smm dsl_pool_t *dp = dmu_tx_pool(tx); 1593248571Smm int error; 1594185029Spjd 1595248571Smm ASSERT3P(drc->drc_ds->ds_owner, ==, dmu_recv_tag); 1596248571Smm 1597248571Smm if (!drc->drc_newfs) { 1598248571Smm dsl_dataset_t *origin_head; 1599248571Smm 1600248571Smm error = dsl_dataset_hold(dp, drc->drc_tofs, FTAG, &origin_head); 1601248571Smm if (error != 0) 1602248571Smm return (error); 1603248571Smm error = dsl_dataset_clone_swap_check_impl(drc->drc_ds, 1604248571Smm origin_head, drc->drc_force); 1605248571Smm if (error != 0) { 1606248571Smm dsl_dataset_rele(origin_head, FTAG); 1607248571Smm return (error); 1608248571Smm } 1609248571Smm error = dsl_dataset_snapshot_check_impl(origin_head, 1610248571Smm drc->drc_tosnap, tx); 1611248571Smm dsl_dataset_rele(origin_head, FTAG); 1612248571Smm if (error != 0) 1613248571Smm return (error); 1614248571Smm 1615248571Smm error = dsl_destroy_head_check_impl(drc->drc_ds, 1); 1616248571Smm } else { 1617248571Smm error = dsl_dataset_snapshot_check_impl(drc->drc_ds, 1618248571Smm drc->drc_tosnap, tx); 1619248571Smm } 1620248571Smm return (error); 1621185029Spjd} 1622185029Spjd 1623185029Spjdstatic void 1624248571Smmdmu_recv_end_sync(void *arg, dmu_tx_t *tx) 1625185029Spjd{ 1626248571Smm dmu_recv_cookie_t *drc = arg; 1627248571Smm dsl_pool_t *dp = dmu_tx_pool(tx); 1628185029Spjd 1629248571Smm spa_history_log_internal_ds(drc->drc_ds, "finish receiving", 1630248571Smm tx, "snap=%s", drc->drc_tosnap); 1631185029Spjd 1632248571Smm if (!drc->drc_newfs) { 1633248571Smm dsl_dataset_t *origin_head; 1634185029Spjd 1635248571Smm VERIFY0(dsl_dataset_hold(dp, drc->drc_tofs, FTAG, 1636248571Smm &origin_head)); 1637248571Smm dsl_dataset_clone_swap_sync_impl(drc->drc_ds, 1638248571Smm origin_head, tx); 1639248571Smm dsl_dataset_snapshot_sync_impl(origin_head, 1640248571Smm drc->drc_tosnap, tx); 1641248571Smm 1642248571Smm /* set snapshot's creation time and guid */ 1643248571Smm dmu_buf_will_dirty(origin_head->ds_prev->ds_dbuf, tx); 1644248571Smm origin_head->ds_prev->ds_phys->ds_creation_time = 1645248571Smm drc->drc_drrb->drr_creation_time; 1646248571Smm origin_head->ds_prev->ds_phys->ds_guid = 1647248571Smm drc->drc_drrb->drr_toguid; 1648248571Smm origin_head->ds_prev->ds_phys->ds_flags &= 1649248571Smm ~DS_FLAG_INCONSISTENT; 1650248571Smm 1651248571Smm dmu_buf_will_dirty(origin_head->ds_dbuf, tx); 1652248571Smm origin_head->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT; 1653248571Smm 1654248571Smm dsl_dataset_rele(origin_head, FTAG); 1655248571Smm dsl_destroy_head_sync_impl(drc->drc_ds, tx); 1656248571Smm } else { 1657248571Smm dsl_dataset_t *ds = drc->drc_ds; 1658248571Smm 1659248571Smm dsl_dataset_snapshot_sync_impl(ds, drc->drc_tosnap, tx); 1660248571Smm 1661248571Smm /* set snapshot's creation time and guid */ 1662248571Smm dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 1663248571Smm ds->ds_prev->ds_phys->ds_creation_time = 1664248571Smm drc->drc_drrb->drr_creation_time; 1665248571Smm ds->ds_prev->ds_phys->ds_guid = drc->drc_drrb->drr_toguid; 1666248571Smm ds->ds_prev->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT; 1667248571Smm 1668248571Smm dmu_buf_will_dirty(ds->ds_dbuf, tx); 1669248571Smm ds->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT; 1670248571Smm } 1671248571Smm drc->drc_newsnapobj = drc->drc_ds->ds_phys->ds_prev_snap_obj; 1672248571Smm /* 1673248571Smm * Release the hold from dmu_recv_begin. This must be done before 1674248571Smm * we return to open context, so that when we free the dataset's dnode, 1675248571Smm * we can evict its bonus buffer. 1676248571Smm */ 1677248571Smm dsl_dataset_disown(drc->drc_ds, dmu_recv_tag); 1678248571Smm drc->drc_ds = NULL; 1679185029Spjd} 1680185029Spjd 1681219089Spjdstatic int 1682248571Smmadd_ds_to_guidmap(const char *name, avl_tree_t *guid_map, uint64_t snapobj) 1683221263Smm{ 1684248571Smm dsl_pool_t *dp; 1685221263Smm dsl_dataset_t *snapds; 1686221263Smm guid_map_entry_t *gmep; 1687221263Smm int err; 1688221263Smm 1689221263Smm ASSERT(guid_map != NULL); 1690221263Smm 1691248571Smm err = dsl_pool_hold(name, FTAG, &dp); 1692248571Smm if (err != 0) 1693248571Smm return (err); 1694248571Smm err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &snapds); 1695221263Smm if (err == 0) { 1696221263Smm gmep = kmem_alloc(sizeof (guid_map_entry_t), KM_SLEEP); 1697221263Smm gmep->guid = snapds->ds_phys->ds_guid; 1698221263Smm gmep->gme_ds = snapds; 1699221263Smm avl_add(guid_map, gmep); 1700248571Smm dsl_dataset_long_hold(snapds, gmep); 1701248571Smm dsl_dataset_rele(snapds, FTAG); 1702221263Smm } 1703221263Smm 1704248571Smm dsl_pool_rele(dp, FTAG); 1705221263Smm return (err); 1706221263Smm} 1707221263Smm 1708248571Smmstatic int dmu_recv_end_modified_blocks = 3; 1709248571Smm 1710221263Smmstatic int 1711219089Spjddmu_recv_existing_end(dmu_recv_cookie_t *drc) 1712185029Spjd{ 1713248571Smm int error; 1714248571Smm char name[MAXNAMELEN]; 1715185029Spjd 1716248571Smm#ifdef _KERNEL 1717248571Smm /* 1718248571Smm * We will be destroying the ds; make sure its origin is unmounted if 1719248571Smm * necessary. 1720248571Smm */ 1721248571Smm dsl_dataset_name(drc->drc_ds, name); 1722248571Smm zfs_destroy_unmount_origin(name); 1723248571Smm#endif 1724185029Spjd 1725248571Smm error = dsl_sync_task(drc->drc_tofs, 1726248571Smm dmu_recv_end_check, dmu_recv_end_sync, drc, 1727248571Smm dmu_recv_end_modified_blocks); 1728185029Spjd 1729248571Smm if (error != 0) 1730248571Smm dmu_recv_cleanup_ds(drc); 1731248571Smm return (error); 1732185029Spjd} 1733219089Spjd 1734219089Spjdstatic int 1735219089Spjddmu_recv_new_end(dmu_recv_cookie_t *drc) 1736219089Spjd{ 1737248571Smm int error; 1738219089Spjd 1739248571Smm error = dsl_sync_task(drc->drc_tofs, 1740248571Smm dmu_recv_end_check, dmu_recv_end_sync, drc, 1741248571Smm dmu_recv_end_modified_blocks); 1742219089Spjd 1743248571Smm if (error != 0) { 1744248571Smm dmu_recv_cleanup_ds(drc); 1745248571Smm } else if (drc->drc_guid_to_ds_map != NULL) { 1746248571Smm (void) add_ds_to_guidmap(drc->drc_tofs, 1747248571Smm drc->drc_guid_to_ds_map, 1748248571Smm drc->drc_newsnapobj); 1749219089Spjd } 1750248571Smm return (error); 1751219089Spjd} 1752219089Spjd 1753219089Spjdint 1754219089Spjddmu_recv_end(dmu_recv_cookie_t *drc) 1755219089Spjd{ 1756248571Smm if (drc->drc_newfs) 1757248571Smm return (dmu_recv_new_end(drc)); 1758248571Smm else 1759219089Spjd return (dmu_recv_existing_end(drc)); 1760219089Spjd} 1761