dmu_send.c revision 185029
1168404Spjd/* 2168404Spjd * CDDL HEADER START 3168404Spjd * 4168404Spjd * The contents of this file are subject to the terms of the 5168404Spjd * Common Development and Distribution License (the "License"). 6168404Spjd * You may not use this file except in compliance with the License. 7168404Spjd * 8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9168404Spjd * or http://www.opensolaris.org/os/licensing. 10168404Spjd * See the License for the specific language governing permissions 11168404Spjd * and limitations under the License. 12168404Spjd * 13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each 14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15168404Spjd * If applicable, add the following below this CDDL HEADER, with the 16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18168404Spjd * 19168404Spjd * CDDL HEADER END 20168404Spjd */ 21168404Spjd/* 22185029Spjd * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23168404Spjd * Use is subject to license terms. 24168404Spjd */ 25168404Spjd 26168404Spjd#pragma ident "%Z%%M% %I% %E% SMI" 27168404Spjd 28168404Spjd#include <sys/dmu.h> 29168404Spjd#include <sys/dmu_impl.h> 30168404Spjd#include <sys/dmu_tx.h> 31168404Spjd#include <sys/dbuf.h> 32168404Spjd#include <sys/dnode.h> 33168404Spjd#include <sys/zfs_context.h> 34168404Spjd#include <sys/dmu_objset.h> 35168404Spjd#include <sys/dmu_traverse.h> 36168404Spjd#include <sys/dsl_dataset.h> 37168404Spjd#include <sys/dsl_dir.h> 38168404Spjd#include <sys/dsl_pool.h> 39168404Spjd#include <sys/dsl_synctask.h> 40168404Spjd#include <sys/zfs_ioctl.h> 41168404Spjd#include <sys/zap.h> 42168404Spjd#include <sys/zio_checksum.h> 43168404Spjd 44185029Spjdstatic char *dmu_recv_tag = "dmu_recv_tag"; 45185029Spjd 46168404Spjdstruct backuparg { 47168404Spjd dmu_replay_record_t *drr; 48168404Spjd kthread_t *td; 49168404Spjd struct file *fp; 50185029Spjd offset_t *off; 51168404Spjd objset_t *os; 52168404Spjd zio_cksum_t zc; 53168404Spjd int err; 54168404Spjd}; 55168404Spjd 56168404Spjdstatic int 57168404Spjddump_bytes(struct backuparg *ba, void *buf, int len) 58168404Spjd{ 59168404Spjd struct uio auio; 60168404Spjd struct iovec aiov; 61168404Spjd 62168404Spjd ASSERT3U(len % 8, ==, 0); 63168404Spjd 64168404Spjd fletcher_4_incremental_native(buf, len, &ba->zc); 65168404Spjd 66168404Spjd aiov.iov_base = buf; 67168404Spjd aiov.iov_len = len; 68168404Spjd auio.uio_iov = &aiov; 69168404Spjd auio.uio_iovcnt = 1; 70168404Spjd auio.uio_resid = len; 71169170Spjd auio.uio_segflg = UIO_SYSSPACE; 72168404Spjd auio.uio_rw = UIO_WRITE; 73168404Spjd auio.uio_offset = (off_t)-1; 74168404Spjd auio.uio_td = ba->td; 75168404Spjd#ifdef _KERNEL 76168404Spjd if (ba->fp->f_type == DTYPE_VNODE) 77168404Spjd bwillwrite(); 78168404Spjd ba->err = fo_write(ba->fp, &auio, ba->td->td_ucred, 0, ba->td); 79168404Spjd#else 80168404Spjd fprintf(stderr, "%s: returning EOPNOTSUPP\n", __func__); 81168404Spjd ba->err = EOPNOTSUPP; 82168404Spjd#endif 83185029Spjd *ba->off += len; 84168404Spjd 85168404Spjd return (ba->err); 86168404Spjd} 87168404Spjd 88168404Spjdstatic int 89168404Spjddump_free(struct backuparg *ba, uint64_t object, uint64_t offset, 90168404Spjd uint64_t length) 91168404Spjd{ 92168404Spjd /* write a FREE record */ 93168404Spjd bzero(ba->drr, sizeof (dmu_replay_record_t)); 94168404Spjd ba->drr->drr_type = DRR_FREE; 95168404Spjd ba->drr->drr_u.drr_free.drr_object = object; 96168404Spjd ba->drr->drr_u.drr_free.drr_offset = offset; 97168404Spjd ba->drr->drr_u.drr_free.drr_length = length; 98168404Spjd 99168404Spjd if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t))) 100168404Spjd return (EINTR); 101168404Spjd return (0); 102168404Spjd} 103168404Spjd 104168404Spjdstatic int 105168404Spjddump_data(struct backuparg *ba, dmu_object_type_t type, 106168404Spjd uint64_t object, uint64_t offset, int blksz, void *data) 107168404Spjd{ 108168404Spjd /* write a DATA record */ 109168404Spjd bzero(ba->drr, sizeof (dmu_replay_record_t)); 110168404Spjd ba->drr->drr_type = DRR_WRITE; 111168404Spjd ba->drr->drr_u.drr_write.drr_object = object; 112168404Spjd ba->drr->drr_u.drr_write.drr_type = type; 113168404Spjd ba->drr->drr_u.drr_write.drr_offset = offset; 114168404Spjd ba->drr->drr_u.drr_write.drr_length = blksz; 115168404Spjd 116168404Spjd if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t))) 117168404Spjd return (EINTR); 118168404Spjd if (dump_bytes(ba, data, blksz)) 119168404Spjd return (EINTR); 120168404Spjd return (0); 121168404Spjd} 122168404Spjd 123168404Spjdstatic int 124168404Spjddump_freeobjects(struct backuparg *ba, uint64_t firstobj, uint64_t numobjs) 125168404Spjd{ 126168404Spjd /* write a FREEOBJECTS record */ 127168404Spjd bzero(ba->drr, sizeof (dmu_replay_record_t)); 128168404Spjd ba->drr->drr_type = DRR_FREEOBJECTS; 129168404Spjd ba->drr->drr_u.drr_freeobjects.drr_firstobj = firstobj; 130168404Spjd ba->drr->drr_u.drr_freeobjects.drr_numobjs = numobjs; 131168404Spjd 132168404Spjd if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t))) 133168404Spjd return (EINTR); 134168404Spjd return (0); 135168404Spjd} 136168404Spjd 137168404Spjdstatic int 138168404Spjddump_dnode(struct backuparg *ba, uint64_t object, dnode_phys_t *dnp) 139168404Spjd{ 140168404Spjd if (dnp == NULL || dnp->dn_type == DMU_OT_NONE) 141168404Spjd return (dump_freeobjects(ba, object, 1)); 142168404Spjd 143168404Spjd /* write an OBJECT record */ 144168404Spjd bzero(ba->drr, sizeof (dmu_replay_record_t)); 145168404Spjd ba->drr->drr_type = DRR_OBJECT; 146168404Spjd ba->drr->drr_u.drr_object.drr_object = object; 147168404Spjd ba->drr->drr_u.drr_object.drr_type = dnp->dn_type; 148168404Spjd ba->drr->drr_u.drr_object.drr_bonustype = dnp->dn_bonustype; 149168404Spjd ba->drr->drr_u.drr_object.drr_blksz = 150168404Spjd dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT; 151168404Spjd ba->drr->drr_u.drr_object.drr_bonuslen = dnp->dn_bonuslen; 152168404Spjd ba->drr->drr_u.drr_object.drr_checksum = dnp->dn_checksum; 153168404Spjd ba->drr->drr_u.drr_object.drr_compress = dnp->dn_compress; 154168404Spjd 155168404Spjd if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t))) 156168404Spjd return (EINTR); 157168404Spjd 158168404Spjd if (dump_bytes(ba, DN_BONUS(dnp), P2ROUNDUP(dnp->dn_bonuslen, 8))) 159168404Spjd return (EINTR); 160168404Spjd 161168404Spjd /* free anything past the end of the file */ 162168404Spjd if (dump_free(ba, object, (dnp->dn_maxblkid + 1) * 163168404Spjd (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT), -1ULL)) 164168404Spjd return (EINTR); 165168404Spjd if (ba->err) 166168404Spjd return (EINTR); 167168404Spjd return (0); 168168404Spjd} 169168404Spjd 170168404Spjd#define BP_SPAN(dnp, level) \ 171168404Spjd (((uint64_t)dnp->dn_datablkszsec) << (SPA_MINBLOCKSHIFT + \ 172168404Spjd (level) * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) 173168404Spjd 174168404Spjdstatic int 175168404Spjdbackup_cb(traverse_blk_cache_t *bc, spa_t *spa, void *arg) 176168404Spjd{ 177168404Spjd struct backuparg *ba = arg; 178168404Spjd uint64_t object = bc->bc_bookmark.zb_object; 179168404Spjd int level = bc->bc_bookmark.zb_level; 180168404Spjd uint64_t blkid = bc->bc_bookmark.zb_blkid; 181168404Spjd blkptr_t *bp = bc->bc_blkptr.blk_birth ? &bc->bc_blkptr : NULL; 182168404Spjd dmu_object_type_t type = bp ? BP_GET_TYPE(bp) : DMU_OT_NONE; 183168404Spjd void *data = bc->bc_data; 184168404Spjd int err = 0; 185168404Spjd 186185029Spjd if (issig(JUSTLOOKING) && issig(FORREAL)) 187168404Spjd return (EINTR); 188168404Spjd 189168404Spjd ASSERT(data || bp == NULL); 190168404Spjd 191168404Spjd if (bp == NULL && object == 0) { 192168404Spjd uint64_t span = BP_SPAN(bc->bc_dnode, level); 193168404Spjd uint64_t dnobj = (blkid * span) >> DNODE_SHIFT; 194168404Spjd err = dump_freeobjects(ba, dnobj, span >> DNODE_SHIFT); 195168404Spjd } else if (bp == NULL) { 196168404Spjd uint64_t span = BP_SPAN(bc->bc_dnode, level); 197168404Spjd err = dump_free(ba, object, blkid * span, span); 198168404Spjd } else if (data && level == 0 && type == DMU_OT_DNODE) { 199168404Spjd dnode_phys_t *blk = data; 200168404Spjd int i; 201168404Spjd int blksz = BP_GET_LSIZE(bp); 202168404Spjd 203168404Spjd for (i = 0; i < blksz >> DNODE_SHIFT; i++) { 204168404Spjd uint64_t dnobj = 205168404Spjd (blkid << (DNODE_BLOCK_SHIFT - DNODE_SHIFT)) + i; 206168404Spjd err = dump_dnode(ba, dnobj, blk+i); 207168404Spjd if (err) 208168404Spjd break; 209168404Spjd } 210168404Spjd } else if (level == 0 && 211168404Spjd type != DMU_OT_DNODE && type != DMU_OT_OBJSET) { 212168404Spjd int blksz = BP_GET_LSIZE(bp); 213168404Spjd if (data == NULL) { 214168404Spjd uint32_t aflags = ARC_WAIT; 215168404Spjd arc_buf_t *abuf; 216168404Spjd zbookmark_t zb; 217168404Spjd 218168404Spjd zb.zb_objset = ba->os->os->os_dsl_dataset->ds_object; 219168404Spjd zb.zb_object = object; 220168404Spjd zb.zb_level = level; 221168404Spjd zb.zb_blkid = blkid; 222185029Spjd (void) arc_read_nolock(NULL, spa, bp, 223185029Spjd arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ, 224185029Spjd ZIO_FLAG_MUSTSUCCEED, &aflags, &zb); 225168404Spjd 226168404Spjd if (abuf) { 227168404Spjd err = dump_data(ba, type, object, blkid * blksz, 228168404Spjd blksz, abuf->b_data); 229168404Spjd (void) arc_buf_remove_ref(abuf, &abuf); 230168404Spjd } 231168404Spjd } else { 232168404Spjd err = dump_data(ba, type, object, blkid * blksz, 233168404Spjd blksz, data); 234168404Spjd } 235168404Spjd } 236168404Spjd 237168404Spjd ASSERT(err == 0 || err == EINTR); 238168404Spjd return (err); 239168404Spjd} 240168404Spjd 241168404Spjdint 242185029Spjddmu_sendbackup(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, 243185029Spjd struct file *fp, offset_t *off) 244168404Spjd{ 245168404Spjd dsl_dataset_t *ds = tosnap->os->os_dsl_dataset; 246168404Spjd dsl_dataset_t *fromds = fromsnap ? fromsnap->os->os_dsl_dataset : NULL; 247168404Spjd dmu_replay_record_t *drr; 248168404Spjd struct backuparg ba; 249168404Spjd int err; 250185029Spjd uint64_t fromtxg = 0; 251168404Spjd 252168404Spjd /* tosnap must be a snapshot */ 253168404Spjd if (ds->ds_phys->ds_next_snap_obj == 0) 254168404Spjd return (EINVAL); 255168404Spjd 256168404Spjd /* fromsnap must be an earlier snapshot from the same fs as tosnap */ 257168404Spjd if (fromds && (ds->ds_dir != fromds->ds_dir || 258185029Spjd fromds->ds_phys->ds_creation_txg >= ds->ds_phys->ds_creation_txg)) 259168404Spjd return (EXDEV); 260168404Spjd 261185029Spjd if (fromorigin) { 262185029Spjd dsl_pool_t *dp = ds->ds_dir->dd_pool; 263185029Spjd 264185029Spjd if (fromsnap) 265185029Spjd return (EINVAL); 266185029Spjd 267185029Spjd if (dsl_dir_is_clone(ds->ds_dir)) { 268185029Spjd rw_enter(&dp->dp_config_rwlock, RW_READER); 269185029Spjd err = dsl_dataset_hold_obj(dp, 270185029Spjd ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &fromds); 271185029Spjd rw_exit(&dp->dp_config_rwlock); 272185029Spjd if (err) 273185029Spjd return (err); 274185029Spjd } else { 275185029Spjd fromorigin = B_FALSE; 276185029Spjd } 277185029Spjd } 278185029Spjd 279185029Spjd 280168404Spjd drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP); 281168404Spjd drr->drr_type = DRR_BEGIN; 282168404Spjd drr->drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC; 283185029Spjd drr->drr_u.drr_begin.drr_version = DMU_BACKUP_STREAM_VERSION; 284168404Spjd drr->drr_u.drr_begin.drr_creation_time = 285168404Spjd ds->ds_phys->ds_creation_time; 286168404Spjd drr->drr_u.drr_begin.drr_type = tosnap->os->os_phys->os_type; 287185029Spjd if (fromorigin) 288185029Spjd drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CLONE; 289168404Spjd drr->drr_u.drr_begin.drr_toguid = ds->ds_phys->ds_guid; 290185029Spjd if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) 291185029Spjd drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CI_DATA; 292185029Spjd 293168404Spjd if (fromds) 294168404Spjd drr->drr_u.drr_begin.drr_fromguid = fromds->ds_phys->ds_guid; 295168404Spjd dsl_dataset_name(ds, drr->drr_u.drr_begin.drr_toname); 296168404Spjd 297185029Spjd if (fromds) 298185029Spjd fromtxg = fromds->ds_phys->ds_creation_txg; 299185029Spjd if (fromorigin) 300185029Spjd dsl_dataset_rele(fromds, FTAG); 301185029Spjd 302168404Spjd ba.drr = drr; 303168404Spjd ba.td = curthread; 304168404Spjd ba.fp = fp; 305168404Spjd ba.os = tosnap; 306185029Spjd ba.off = off; 307168404Spjd ZIO_SET_CHECKSUM(&ba.zc, 0, 0, 0, 0); 308168404Spjd 309168404Spjd if (dump_bytes(&ba, drr, sizeof (dmu_replay_record_t))) { 310168404Spjd kmem_free(drr, sizeof (dmu_replay_record_t)); 311168404Spjd return (ba.err); 312168404Spjd } 313168404Spjd 314185029Spjd err = traverse_dsl_dataset(ds, fromtxg, 315168404Spjd ADVANCE_PRE | ADVANCE_HOLES | ADVANCE_DATA | ADVANCE_NOLOCK, 316168404Spjd backup_cb, &ba); 317168404Spjd 318168404Spjd if (err) { 319168404Spjd if (err == EINTR && ba.err) 320168404Spjd err = ba.err; 321168404Spjd kmem_free(drr, sizeof (dmu_replay_record_t)); 322168404Spjd return (err); 323168404Spjd } 324168404Spjd 325168404Spjd bzero(drr, sizeof (dmu_replay_record_t)); 326168404Spjd drr->drr_type = DRR_END; 327168404Spjd drr->drr_u.drr_end.drr_checksum = ba.zc; 328168404Spjd 329168404Spjd if (dump_bytes(&ba, drr, sizeof (dmu_replay_record_t))) { 330168404Spjd kmem_free(drr, sizeof (dmu_replay_record_t)); 331168404Spjd return (ba.err); 332168404Spjd } 333168404Spjd 334168404Spjd kmem_free(drr, sizeof (dmu_replay_record_t)); 335168404Spjd 336168404Spjd return (0); 337168404Spjd} 338168404Spjd 339185029Spjdstruct recvbeginsyncarg { 340185029Spjd const char *tofs; 341185029Spjd const char *tosnap; 342185029Spjd dsl_dataset_t *origin; 343185029Spjd uint64_t fromguid; 344185029Spjd dmu_objset_type_t type; 345185029Spjd void *tag; 346185029Spjd boolean_t force; 347185029Spjd uint64_t dsflags; 348185029Spjd char clonelastname[MAXNAMELEN]; 349185029Spjd dsl_dataset_t *ds; /* the ds to recv into; returned from the syncfunc */ 350168404Spjd}; 351168404Spjd 352185029Spjdstatic dsl_dataset_t * 353185029Spjdrecv_full_sync_impl(dsl_pool_t *dp, uint64_t dsobj, dmu_objset_type_t type, 354185029Spjd cred_t *cr, dmu_tx_t *tx) 355185029Spjd{ 356185029Spjd dsl_dataset_t *ds; 357185029Spjd 358185029Spjd /* This should always work, since we just created it */ 359185029Spjd /* XXX - create should return an owned ds */ 360185029Spjd VERIFY(0 == dsl_dataset_own_obj(dp, dsobj, 361185029Spjd DS_MODE_INCONSISTENT, dmu_recv_tag, &ds)); 362185029Spjd 363185029Spjd if (type != DMU_OST_NONE) { 364185029Spjd (void) dmu_objset_create_impl(dp->dp_spa, 365185029Spjd ds, &ds->ds_phys->ds_bp, type, tx); 366185029Spjd } 367185029Spjd 368185029Spjd spa_history_internal_log(LOG_DS_REPLAY_FULL_SYNC, 369185029Spjd dp->dp_spa, tx, cr, "dataset = %lld", dsobj); 370185029Spjd 371185029Spjd return (ds); 372185029Spjd} 373185029Spjd 374168404Spjd/* ARGSUSED */ 375168404Spjdstatic int 376185029Spjdrecv_full_check(void *arg1, void *arg2, dmu_tx_t *tx) 377168404Spjd{ 378185029Spjd dsl_dir_t *dd = arg1; 379185029Spjd struct recvbeginsyncarg *rbsa = arg2; 380185029Spjd objset_t *mos = dd->dd_pool->dp_meta_objset; 381185029Spjd uint64_t val; 382185029Spjd int err; 383185029Spjd 384185029Spjd err = zap_lookup(mos, dd->dd_phys->dd_child_dir_zapobj, 385185029Spjd strrchr(rbsa->tofs, '/') + 1, sizeof (uint64_t), 1, &val); 386185029Spjd 387185029Spjd if (err != ENOENT) 388185029Spjd return (err ? err : EEXIST); 389185029Spjd 390185029Spjd if (rbsa->origin) { 391185029Spjd /* make sure it's a snap in the same pool */ 392185029Spjd if (rbsa->origin->ds_dir->dd_pool != dd->dd_pool) 393185029Spjd return (EXDEV); 394185029Spjd if (rbsa->origin->ds_phys->ds_num_children == 0) 395185029Spjd return (EINVAL); 396185029Spjd if (rbsa->origin->ds_phys->ds_guid != rbsa->fromguid) 397185029Spjd return (ENODEV); 398185029Spjd } 399185029Spjd 400185029Spjd return (0); 401185029Spjd} 402185029Spjd 403185029Spjdstatic void 404185029Spjdrecv_full_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 405185029Spjd{ 406185029Spjd dsl_dir_t *dd = arg1; 407185029Spjd struct recvbeginsyncarg *rbsa = arg2; 408185029Spjd uint64_t flags = DS_FLAG_INCONSISTENT | rbsa->dsflags; 409185029Spjd uint64_t dsobj; 410185029Spjd 411185029Spjd dsobj = dsl_dataset_create_sync(dd, strrchr(rbsa->tofs, '/') + 1, 412185029Spjd rbsa->origin, flags, cr, tx); 413185029Spjd 414185029Spjd rbsa->ds = recv_full_sync_impl(dd->dd_pool, dsobj, 415185029Spjd rbsa->origin ? DMU_OST_NONE : rbsa->type, cr, tx); 416185029Spjd} 417185029Spjd 418185029Spjdstatic int 419185029Spjdrecv_full_existing_check(void *arg1, void *arg2, dmu_tx_t *tx) 420185029Spjd{ 421168404Spjd dsl_dataset_t *ds = arg1; 422185029Spjd struct recvbeginsyncarg *rbsa = arg2; 423168404Spjd int err; 424185029Spjd 425185029Spjd /* must be a head ds */ 426185029Spjd if (ds->ds_phys->ds_next_snap_obj != 0) 427185029Spjd return (EINVAL); 428185029Spjd 429185029Spjd /* must not be a clone ds */ 430185029Spjd if (dsl_dir_is_clone(ds->ds_dir)) 431185029Spjd return (EINVAL); 432185029Spjd 433185029Spjd err = dsl_dataset_destroy_check(ds, rbsa->tag, tx); 434185029Spjd if (err) 435185029Spjd return (err); 436185029Spjd 437185029Spjd if (rbsa->origin) { 438185029Spjd /* make sure it's a snap in the same pool */ 439185029Spjd if (rbsa->origin->ds_dir->dd_pool != ds->ds_dir->dd_pool) 440185029Spjd return (EXDEV); 441185029Spjd if (rbsa->origin->ds_phys->ds_num_children == 0) 442185029Spjd return (EINVAL); 443185029Spjd if (rbsa->origin->ds_phys->ds_guid != rbsa->fromguid) 444185029Spjd return (ENODEV); 445185029Spjd } 446185029Spjd 447185029Spjd return (0); 448185029Spjd} 449185029Spjd 450185029Spjdstatic void 451185029Spjdrecv_full_existing_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 452185029Spjd{ 453185029Spjd dsl_dataset_t *ds = arg1; 454185029Spjd struct recvbeginsyncarg *rbsa = arg2; 455185029Spjd dsl_dir_t *dd = ds->ds_dir; 456185029Spjd uint64_t flags = DS_FLAG_INCONSISTENT | rbsa->dsflags; 457185029Spjd uint64_t dsobj; 458185029Spjd 459185029Spjd /* 460185029Spjd * NB: caller must provide an extra hold on the dsl_dir_t, so it 461185029Spjd * won't go away when dsl_dataset_destroy_sync() closes the 462185029Spjd * dataset. 463185029Spjd */ 464185029Spjd dsl_dataset_destroy_sync(ds, rbsa->tag, cr, tx); 465185029Spjd 466185029Spjd dsobj = dsl_dataset_create_sync_dd(dd, rbsa->origin, flags, tx); 467185029Spjd 468185029Spjd rbsa->ds = recv_full_sync_impl(dd->dd_pool, dsobj, 469185029Spjd rbsa->origin ? DMU_OST_NONE : rbsa->type, cr, tx); 470185029Spjd} 471185029Spjd 472185029Spjd/* ARGSUSED */ 473185029Spjdstatic int 474185029Spjdrecv_incremental_check(void *arg1, void *arg2, dmu_tx_t *tx) 475185029Spjd{ 476185029Spjd dsl_dataset_t *ds = arg1; 477185029Spjd struct recvbeginsyncarg *rbsa = arg2; 478185029Spjd int err; 479168404Spjd uint64_t val; 480168404Spjd 481185029Spjd /* must not have any changes since most recent snapshot */ 482185029Spjd if (!rbsa->force && dsl_dataset_modified_since_lastsnap(ds)) 483185029Spjd return (ETXTBSY); 484185029Spjd 485168404Spjd /* must already be a snapshot of this fs */ 486168404Spjd if (ds->ds_phys->ds_prev_snap_obj == 0) 487168404Spjd return (ENODEV); 488168404Spjd 489168404Spjd /* most recent snapshot must match fromguid */ 490185029Spjd if (ds->ds_prev->ds_phys->ds_guid != rbsa->fromguid) 491168404Spjd return (ENODEV); 492168404Spjd 493185029Spjd /* temporary clone name must not exist */ 494185029Spjd err = zap_lookup(ds->ds_dir->dd_pool->dp_meta_objset, 495185029Spjd ds->ds_dir->dd_phys->dd_child_dir_zapobj, 496185029Spjd rbsa->clonelastname, 8, 1, &val); 497185029Spjd if (err == 0) 498168404Spjd return (EEXIST); 499185029Spjd if (err != ENOENT) 500185029Spjd return (err); 501168404Spjd 502185029Spjd /* new snapshot name must not exist */ 503168404Spjd err = zap_lookup(ds->ds_dir->dd_pool->dp_meta_objset, 504185029Spjd ds->ds_phys->ds_snapnames_zapobj, rbsa->tosnap, 8, 1, &val); 505168404Spjd if (err == 0) 506168404Spjd return (EEXIST); 507168404Spjd if (err != ENOENT) 508168404Spjd return (err); 509168404Spjd return (0); 510168404Spjd} 511168404Spjd 512168404Spjd/* ARGSUSED */ 513168404Spjdstatic void 514185029Spjdrecv_online_incremental_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 515168404Spjd{ 516185029Spjd dsl_dataset_t *ohds = arg1; 517185029Spjd struct recvbeginsyncarg *rbsa = arg2; 518185029Spjd dsl_pool_t *dp = ohds->ds_dir->dd_pool; 519185029Spjd dsl_dataset_t *ods, *cds; 520185029Spjd uint64_t flags = DS_FLAG_INCONSISTENT | rbsa->dsflags; 521185029Spjd uint64_t dsobj; 522168404Spjd 523185029Spjd /* create the temporary clone */ 524185029Spjd VERIFY(0 == dsl_dataset_hold_obj(dp, ohds->ds_phys->ds_prev_snap_obj, 525185029Spjd FTAG, &ods)); 526185029Spjd dsobj = dsl_dataset_create_sync(ohds->ds_dir, 527185029Spjd rbsa->clonelastname, ods, flags, cr, tx); 528185029Spjd dsl_dataset_rele(ods, FTAG); 529168404Spjd 530185029Spjd /* open the temporary clone */ 531185029Spjd VERIFY(0 == dsl_dataset_own_obj(dp, dsobj, 532185029Spjd DS_MODE_INCONSISTENT, dmu_recv_tag, &cds)); 533168404Spjd 534185029Spjd /* copy the refquota from the target fs to the clone */ 535185029Spjd if (ohds->ds_quota > 0) 536185029Spjd dsl_dataset_set_quota_sync(cds, &ohds->ds_quota, cr, tx); 537168404Spjd 538185029Spjd rbsa->ds = cds; 539185029Spjd 540185029Spjd spa_history_internal_log(LOG_DS_REPLAY_INC_SYNC, 541185029Spjd dp->dp_spa, tx, cr, "dataset = %lld", dsobj); 542168404Spjd} 543168404Spjd 544185029Spjd/* ARGSUSED */ 545168404Spjdstatic void 546185029Spjdrecv_offline_incremental_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 547168404Spjd{ 548185029Spjd dsl_dataset_t *ds = arg1; 549168404Spjd 550168404Spjd dmu_buf_will_dirty(ds->ds_dbuf, tx); 551168404Spjd ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; 552168404Spjd 553185029Spjd spa_history_internal_log(LOG_DS_REPLAY_INC_SYNC, 554185029Spjd ds->ds_dir->dd_pool->dp_spa, tx, cr, "dataset = %lld", 555185029Spjd ds->ds_object); 556168404Spjd} 557168404Spjd 558185029Spjd/* 559185029Spjd * NB: callers *MUST* call dmu_recv_stream() if dmu_recv_begin() 560185029Spjd * succeeds; otherwise we will leak the holds on the datasets. 561185029Spjd */ 562185029Spjdint 563185029Spjddmu_recv_begin(char *tofs, char *tosnap, struct drr_begin *drrb, 564185029Spjd boolean_t force, objset_t *origin, boolean_t online, dmu_recv_cookie_t *drc) 565168404Spjd{ 566185029Spjd int err = 0; 567185029Spjd boolean_t byteswap; 568185029Spjd struct recvbeginsyncarg rbsa; 569185029Spjd uint64_t version; 570185029Spjd int flags; 571185029Spjd dsl_dataset_t *ds; 572168404Spjd 573185029Spjd if (drrb->drr_magic == DMU_BACKUP_MAGIC) 574185029Spjd byteswap = FALSE; 575185029Spjd else if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) 576185029Spjd byteswap = TRUE; 577185029Spjd else 578185029Spjd return (EINVAL); 579168404Spjd 580185029Spjd rbsa.tofs = tofs; 581185029Spjd rbsa.tosnap = tosnap; 582185029Spjd rbsa.origin = origin ? origin->os->os_dsl_dataset : NULL; 583185029Spjd rbsa.fromguid = drrb->drr_fromguid; 584185029Spjd rbsa.type = drrb->drr_type; 585185029Spjd rbsa.tag = FTAG; 586185029Spjd rbsa.dsflags = 0; 587185029Spjd version = drrb->drr_version; 588185029Spjd flags = drrb->drr_flags; 589185029Spjd 590185029Spjd if (byteswap) { 591185029Spjd rbsa.type = BSWAP_32(rbsa.type); 592185029Spjd rbsa.fromguid = BSWAP_64(rbsa.fromguid); 593185029Spjd version = BSWAP_64(version); 594185029Spjd flags = BSWAP_32(flags); 595185029Spjd } 596185029Spjd 597185029Spjd if (version != DMU_BACKUP_STREAM_VERSION || 598185029Spjd rbsa.type >= DMU_OST_NUMTYPES || 599185029Spjd ((flags & DRR_FLAG_CLONE) && origin == NULL)) 600168404Spjd return (EINVAL); 601168404Spjd 602185029Spjd if (flags & DRR_FLAG_CI_DATA) 603185029Spjd rbsa.dsflags = DS_FLAG_CI_DATASET; 604168404Spjd 605185029Spjd bzero(drc, sizeof (dmu_recv_cookie_t)); 606185029Spjd drc->drc_drrb = drrb; 607185029Spjd drc->drc_tosnap = tosnap; 608185029Spjd drc->drc_force = force; 609168404Spjd 610185029Spjd /* 611185029Spjd * Process the begin in syncing context. 612185029Spjd */ 613185029Spjd if (rbsa.fromguid && !(flags & DRR_FLAG_CLONE) && !online) { 614185029Spjd /* offline incremental receive */ 615185029Spjd err = dsl_dataset_own(tofs, 0, dmu_recv_tag, &ds); 616185029Spjd if (err) 617185029Spjd return (err); 618168404Spjd 619185029Spjd /* 620185029Spjd * Only do the rollback if the most recent snapshot 621185029Spjd * matches the incremental source 622185029Spjd */ 623185029Spjd if (force) { 624185029Spjd if (ds->ds_prev == NULL || 625185029Spjd ds->ds_prev->ds_phys->ds_guid != 626185029Spjd rbsa.fromguid) { 627185029Spjd dsl_dataset_disown(ds, dmu_recv_tag); 628185029Spjd return (ENODEV); 629185029Spjd } 630185029Spjd (void) dsl_dataset_rollback(ds, DMU_OST_NONE); 631185029Spjd } 632185029Spjd rbsa.force = B_FALSE; 633185029Spjd err = dsl_sync_task_do(ds->ds_dir->dd_pool, 634185029Spjd recv_incremental_check, 635185029Spjd recv_offline_incremental_sync, ds, &rbsa, 1); 636185029Spjd if (err) { 637185029Spjd dsl_dataset_disown(ds, dmu_recv_tag); 638185029Spjd return (err); 639185029Spjd } 640185029Spjd drc->drc_logical_ds = drc->drc_real_ds = ds; 641185029Spjd } else if (rbsa.fromguid && !(flags & DRR_FLAG_CLONE)) { 642185029Spjd /* online incremental receive */ 643168404Spjd 644185029Spjd /* tmp clone name is: tofs/%tosnap" */ 645185029Spjd (void) snprintf(rbsa.clonelastname, sizeof (rbsa.clonelastname), 646185029Spjd "%%%s", tosnap); 647168404Spjd 648185029Spjd /* open the dataset we are logically receiving into */ 649185029Spjd err = dsl_dataset_hold(tofs, dmu_recv_tag, &ds); 650185029Spjd if (err) 651185029Spjd return (err); 652168404Spjd 653185029Spjd rbsa.force = force; 654185029Spjd err = dsl_sync_task_do(ds->ds_dir->dd_pool, 655185029Spjd recv_incremental_check, 656185029Spjd recv_online_incremental_sync, ds, &rbsa, 5); 657185029Spjd if (err) { 658185029Spjd dsl_dataset_rele(ds, dmu_recv_tag); 659185029Spjd return (err); 660185029Spjd } 661185029Spjd drc->drc_logical_ds = ds; 662185029Spjd drc->drc_real_ds = rbsa.ds; 663185029Spjd } else { 664185029Spjd /* create new fs -- full backup or clone */ 665185029Spjd dsl_dir_t *dd = NULL; 666185029Spjd const char *tail; 667168404Spjd 668185029Spjd err = dsl_dir_open(tofs, FTAG, &dd, &tail); 669185029Spjd if (err) 670185029Spjd return (err); 671185029Spjd if (tail == NULL) { 672185029Spjd if (!force) { 673185029Spjd dsl_dir_close(dd, FTAG); 674185029Spjd return (EEXIST); 675185029Spjd } 676185029Spjd 677185029Spjd rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER); 678185029Spjd err = dsl_dataset_own_obj(dd->dd_pool, 679185029Spjd dd->dd_phys->dd_head_dataset_obj, 680185029Spjd DS_MODE_INCONSISTENT, FTAG, &ds); 681185029Spjd rw_exit(&dd->dd_pool->dp_config_rwlock); 682185029Spjd if (err) { 683185029Spjd dsl_dir_close(dd, FTAG); 684185029Spjd return (err); 685185029Spjd } 686185029Spjd 687185029Spjd dsl_dataset_make_exclusive(ds, FTAG); 688185029Spjd err = dsl_sync_task_do(dd->dd_pool, 689185029Spjd recv_full_existing_check, 690185029Spjd recv_full_existing_sync, ds, &rbsa, 5); 691185029Spjd dsl_dataset_disown(ds, FTAG); 692185029Spjd } else { 693185029Spjd err = dsl_sync_task_do(dd->dd_pool, recv_full_check, 694185029Spjd recv_full_sync, dd, &rbsa, 5); 695185029Spjd } 696185029Spjd dsl_dir_close(dd, FTAG); 697185029Spjd if (err) 698185029Spjd return (err); 699185029Spjd drc->drc_logical_ds = drc->drc_real_ds = rbsa.ds; 700185029Spjd drc->drc_newfs = B_TRUE; 701185029Spjd } 702185029Spjd 703185029Spjd return (0); 704168404Spjd} 705168404Spjd 706185029Spjdstruct restorearg { 707185029Spjd int err; 708185029Spjd int byteswap; 709185029Spjd kthread_t *td; 710185029Spjd struct file *fp; 711185029Spjd char *buf; 712185029Spjd uint64_t voff; 713185029Spjd int bufsize; /* amount of memory allocated for buf */ 714185029Spjd zio_cksum_t cksum; 715185029Spjd}; 716185029Spjd 717168404Spjdstatic int 718168404Spjdrestore_bytes(struct restorearg *ra, void *buf, int len, off_t off, int *resid) 719168404Spjd{ 720168404Spjd struct uio auio; 721168404Spjd struct iovec aiov; 722168404Spjd int error; 723168404Spjd 724168404Spjd aiov.iov_base = buf; 725168404Spjd aiov.iov_len = len; 726168404Spjd auio.uio_iov = &aiov; 727168404Spjd auio.uio_iovcnt = 1; 728168404Spjd auio.uio_resid = len; 729169170Spjd auio.uio_segflg = UIO_SYSSPACE; 730168404Spjd auio.uio_rw = UIO_READ; 731168404Spjd auio.uio_offset = off; 732168404Spjd auio.uio_td = ra->td; 733168404Spjd#ifdef _KERNEL 734168404Spjd error = fo_read(ra->fp, &auio, ra->td->td_ucred, FOF_OFFSET, ra->td); 735168404Spjd#else 736168404Spjd fprintf(stderr, "%s: returning EOPNOTSUPP\n", __func__); 737168404Spjd error = EOPNOTSUPP; 738168404Spjd#endif 739168404Spjd *resid = auio.uio_resid; 740168404Spjd return (error); 741168404Spjd} 742168404Spjd 743168404Spjdstatic void * 744168404Spjdrestore_read(struct restorearg *ra, int len) 745168404Spjd{ 746168404Spjd void *rv; 747185029Spjd int done = 0; 748168404Spjd 749168404Spjd /* some things will require 8-byte alignment, so everything must */ 750168404Spjd ASSERT3U(len % 8, ==, 0); 751168404Spjd 752185029Spjd while (done < len) { 753168404Spjd int resid; 754168404Spjd 755185029Spjd ra->err = restore_bytes(ra, (caddr_t)ra->buf + done, 756185029Spjd len - done, ra->voff, &resid); 757168404Spjd 758185029Spjd if (resid == len - done) 759168404Spjd ra->err = EINVAL; 760185029Spjd ra->voff += len - done - resid; 761185029Spjd done = len - resid; 762168404Spjd if (ra->err) 763168404Spjd return (NULL); 764168404Spjd } 765168404Spjd 766185029Spjd ASSERT3U(done, ==, len); 767185029Spjd rv = ra->buf; 768168404Spjd if (ra->byteswap) 769185029Spjd fletcher_4_incremental_byteswap(rv, len, &ra->cksum); 770168404Spjd else 771185029Spjd fletcher_4_incremental_native(rv, len, &ra->cksum); 772168404Spjd return (rv); 773168404Spjd} 774168404Spjd 775168404Spjdstatic void 776168404Spjdbackup_byteswap(dmu_replay_record_t *drr) 777168404Spjd{ 778168404Spjd#define DO64(X) (drr->drr_u.X = BSWAP_64(drr->drr_u.X)) 779168404Spjd#define DO32(X) (drr->drr_u.X = BSWAP_32(drr->drr_u.X)) 780168404Spjd drr->drr_type = BSWAP_32(drr->drr_type); 781185029Spjd drr->drr_payloadlen = BSWAP_32(drr->drr_payloadlen); 782168404Spjd switch (drr->drr_type) { 783168404Spjd case DRR_BEGIN: 784168404Spjd DO64(drr_begin.drr_magic); 785168404Spjd DO64(drr_begin.drr_version); 786168404Spjd DO64(drr_begin.drr_creation_time); 787168404Spjd DO32(drr_begin.drr_type); 788185029Spjd DO32(drr_begin.drr_flags); 789168404Spjd DO64(drr_begin.drr_toguid); 790168404Spjd DO64(drr_begin.drr_fromguid); 791168404Spjd break; 792168404Spjd case DRR_OBJECT: 793168404Spjd DO64(drr_object.drr_object); 794168404Spjd /* DO64(drr_object.drr_allocation_txg); */ 795168404Spjd DO32(drr_object.drr_type); 796168404Spjd DO32(drr_object.drr_bonustype); 797168404Spjd DO32(drr_object.drr_blksz); 798168404Spjd DO32(drr_object.drr_bonuslen); 799168404Spjd break; 800168404Spjd case DRR_FREEOBJECTS: 801168404Spjd DO64(drr_freeobjects.drr_firstobj); 802168404Spjd DO64(drr_freeobjects.drr_numobjs); 803168404Spjd break; 804168404Spjd case DRR_WRITE: 805168404Spjd DO64(drr_write.drr_object); 806168404Spjd DO32(drr_write.drr_type); 807168404Spjd DO64(drr_write.drr_offset); 808168404Spjd DO64(drr_write.drr_length); 809168404Spjd break; 810168404Spjd case DRR_FREE: 811168404Spjd DO64(drr_free.drr_object); 812168404Spjd DO64(drr_free.drr_offset); 813168404Spjd DO64(drr_free.drr_length); 814168404Spjd break; 815168404Spjd case DRR_END: 816168404Spjd DO64(drr_end.drr_checksum.zc_word[0]); 817168404Spjd DO64(drr_end.drr_checksum.zc_word[1]); 818168404Spjd DO64(drr_end.drr_checksum.zc_word[2]); 819168404Spjd DO64(drr_end.drr_checksum.zc_word[3]); 820168404Spjd break; 821168404Spjd } 822168404Spjd#undef DO64 823168404Spjd#undef DO32 824168404Spjd} 825168404Spjd 826168404Spjdstatic int 827168404Spjdrestore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro) 828168404Spjd{ 829168404Spjd int err; 830168404Spjd dmu_tx_t *tx; 831168404Spjd 832168404Spjd err = dmu_object_info(os, drro->drr_object, NULL); 833168404Spjd 834168404Spjd if (err != 0 && err != ENOENT) 835168404Spjd return (EINVAL); 836168404Spjd 837168404Spjd if (drro->drr_type == DMU_OT_NONE || 838168404Spjd drro->drr_type >= DMU_OT_NUMTYPES || 839168404Spjd drro->drr_bonustype >= DMU_OT_NUMTYPES || 840168404Spjd drro->drr_checksum >= ZIO_CHECKSUM_FUNCTIONS || 841168404Spjd drro->drr_compress >= ZIO_COMPRESS_FUNCTIONS || 842168404Spjd P2PHASE(drro->drr_blksz, SPA_MINBLOCKSIZE) || 843168404Spjd drro->drr_blksz < SPA_MINBLOCKSIZE || 844168404Spjd drro->drr_blksz > SPA_MAXBLOCKSIZE || 845168404Spjd drro->drr_bonuslen > DN_MAX_BONUSLEN) { 846168404Spjd return (EINVAL); 847168404Spjd } 848168404Spjd 849168404Spjd tx = dmu_tx_create(os); 850168404Spjd 851168404Spjd if (err == ENOENT) { 852168404Spjd /* currently free, want to be allocated */ 853168404Spjd dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); 854168404Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 1); 855168404Spjd err = dmu_tx_assign(tx, TXG_WAIT); 856168404Spjd if (err) { 857168404Spjd dmu_tx_abort(tx); 858168404Spjd return (err); 859168404Spjd } 860168404Spjd err = dmu_object_claim(os, drro->drr_object, 861168404Spjd drro->drr_type, drro->drr_blksz, 862168404Spjd drro->drr_bonustype, drro->drr_bonuslen, tx); 863168404Spjd } else { 864168404Spjd /* currently allocated, want to be allocated */ 865168404Spjd dmu_tx_hold_bonus(tx, drro->drr_object); 866168404Spjd /* 867168404Spjd * We may change blocksize, so need to 868168404Spjd * hold_write 869168404Spjd */ 870168404Spjd dmu_tx_hold_write(tx, drro->drr_object, 0, 1); 871168404Spjd err = dmu_tx_assign(tx, TXG_WAIT); 872168404Spjd if (err) { 873168404Spjd dmu_tx_abort(tx); 874168404Spjd return (err); 875168404Spjd } 876168404Spjd 877168404Spjd err = dmu_object_reclaim(os, drro->drr_object, 878168404Spjd drro->drr_type, drro->drr_blksz, 879168404Spjd drro->drr_bonustype, drro->drr_bonuslen, tx); 880168404Spjd } 881168404Spjd if (err) { 882168404Spjd dmu_tx_commit(tx); 883168404Spjd return (EINVAL); 884168404Spjd } 885168404Spjd 886168404Spjd dmu_object_set_checksum(os, drro->drr_object, drro->drr_checksum, tx); 887168404Spjd dmu_object_set_compress(os, drro->drr_object, drro->drr_compress, tx); 888168404Spjd 889168404Spjd if (drro->drr_bonuslen) { 890168404Spjd dmu_buf_t *db; 891168404Spjd void *data; 892168404Spjd VERIFY(0 == dmu_bonus_hold(os, drro->drr_object, FTAG, &db)); 893168404Spjd dmu_buf_will_dirty(db, tx); 894168404Spjd 895185029Spjd ASSERT3U(db->db_size, >=, drro->drr_bonuslen); 896185029Spjd data = restore_read(ra, P2ROUNDUP(drro->drr_bonuslen, 8)); 897168404Spjd if (data == NULL) { 898168404Spjd dmu_tx_commit(tx); 899168404Spjd return (ra->err); 900168404Spjd } 901185029Spjd bcopy(data, db->db_data, drro->drr_bonuslen); 902168404Spjd if (ra->byteswap) { 903168404Spjd dmu_ot[drro->drr_bonustype].ot_byteswap(db->db_data, 904168404Spjd drro->drr_bonuslen); 905168404Spjd } 906168404Spjd dmu_buf_rele(db, FTAG); 907168404Spjd } 908168404Spjd dmu_tx_commit(tx); 909168404Spjd return (0); 910168404Spjd} 911168404Spjd 912168404Spjd/* ARGSUSED */ 913168404Spjdstatic int 914168404Spjdrestore_freeobjects(struct restorearg *ra, objset_t *os, 915168404Spjd struct drr_freeobjects *drrfo) 916168404Spjd{ 917168404Spjd uint64_t obj; 918168404Spjd 919168404Spjd if (drrfo->drr_firstobj + drrfo->drr_numobjs < drrfo->drr_firstobj) 920168404Spjd return (EINVAL); 921168404Spjd 922168404Spjd for (obj = drrfo->drr_firstobj; 923168404Spjd obj < drrfo->drr_firstobj + drrfo->drr_numobjs; 924168404Spjd (void) dmu_object_next(os, &obj, FALSE, 0)) { 925168404Spjd int err; 926168404Spjd 927168404Spjd if (dmu_object_info(os, obj, NULL) != 0) 928168404Spjd continue; 929168404Spjd 930185029Spjd err = dmu_free_object(os, obj); 931185029Spjd if (err) 932168404Spjd return (err); 933168404Spjd } 934168404Spjd return (0); 935168404Spjd} 936168404Spjd 937168404Spjdstatic int 938168404Spjdrestore_write(struct restorearg *ra, objset_t *os, 939168404Spjd struct drr_write *drrw) 940168404Spjd{ 941168404Spjd dmu_tx_t *tx; 942168404Spjd void *data; 943168404Spjd int err; 944168404Spjd 945168404Spjd if (drrw->drr_offset + drrw->drr_length < drrw->drr_offset || 946168404Spjd drrw->drr_type >= DMU_OT_NUMTYPES) 947168404Spjd return (EINVAL); 948168404Spjd 949168404Spjd data = restore_read(ra, drrw->drr_length); 950168404Spjd if (data == NULL) 951168404Spjd return (ra->err); 952168404Spjd 953168404Spjd if (dmu_object_info(os, drrw->drr_object, NULL) != 0) 954168404Spjd return (EINVAL); 955168404Spjd 956168404Spjd tx = dmu_tx_create(os); 957168404Spjd 958168404Spjd dmu_tx_hold_write(tx, drrw->drr_object, 959168404Spjd drrw->drr_offset, drrw->drr_length); 960168404Spjd err = dmu_tx_assign(tx, TXG_WAIT); 961168404Spjd if (err) { 962168404Spjd dmu_tx_abort(tx); 963168404Spjd return (err); 964168404Spjd } 965168404Spjd if (ra->byteswap) 966168404Spjd dmu_ot[drrw->drr_type].ot_byteswap(data, drrw->drr_length); 967168404Spjd dmu_write(os, drrw->drr_object, 968168404Spjd drrw->drr_offset, drrw->drr_length, data, tx); 969168404Spjd dmu_tx_commit(tx); 970168404Spjd return (0); 971168404Spjd} 972168404Spjd 973168404Spjd/* ARGSUSED */ 974168404Spjdstatic int 975168404Spjdrestore_free(struct restorearg *ra, objset_t *os, 976168404Spjd struct drr_free *drrf) 977168404Spjd{ 978168404Spjd int err; 979168404Spjd 980168404Spjd if (drrf->drr_length != -1ULL && 981168404Spjd drrf->drr_offset + drrf->drr_length < drrf->drr_offset) 982168404Spjd return (EINVAL); 983168404Spjd 984168404Spjd if (dmu_object_info(os, drrf->drr_object, NULL) != 0) 985168404Spjd return (EINVAL); 986168404Spjd 987185029Spjd err = dmu_free_long_range(os, drrf->drr_object, 988168404Spjd drrf->drr_offset, drrf->drr_length); 989168404Spjd return (err); 990168404Spjd} 991168404Spjd 992185029Spjdvoid 993185029Spjddmu_recv_abort_cleanup(dmu_recv_cookie_t *drc) 994185029Spjd{ 995185029Spjd if (drc->drc_newfs || drc->drc_real_ds != drc->drc_logical_ds) { 996185029Spjd /* 997185029Spjd * online incremental or new fs: destroy the fs (which 998185029Spjd * may be a clone) that we created 999185029Spjd */ 1000185029Spjd (void) dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag); 1001185029Spjd if (drc->drc_real_ds != drc->drc_logical_ds) 1002185029Spjd dsl_dataset_rele(drc->drc_logical_ds, dmu_recv_tag); 1003185029Spjd } else { 1004185029Spjd /* 1005185029Spjd * offline incremental: rollback to most recent snapshot. 1006185029Spjd */ 1007185029Spjd (void) dsl_dataset_rollback(drc->drc_real_ds, DMU_OST_NONE); 1008185029Spjd dsl_dataset_disown(drc->drc_real_ds, dmu_recv_tag); 1009185029Spjd } 1010185029Spjd} 1011185029Spjd 1012185029Spjd/* 1013185029Spjd * NB: callers *must* call dmu_recv_end() if this succeeds. 1014185029Spjd */ 1015168404Spjdint 1016185029Spjddmu_recv_stream(dmu_recv_cookie_t *drc, struct file *fp, offset_t *voffp) 1017168404Spjd{ 1018168404Spjd kthread_t *td = curthread; 1019185029Spjd struct restorearg ra = { 0 }; 1020168404Spjd dmu_replay_record_t *drr; 1021185029Spjd objset_t *os; 1022185029Spjd zio_cksum_t pcksum; 1023168404Spjd 1024185029Spjd if (drc->drc_drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) 1025168404Spjd ra.byteswap = TRUE; 1026168404Spjd 1027185029Spjd { 1028185029Spjd /* compute checksum of drr_begin record */ 1029185029Spjd dmu_replay_record_t *drr; 1030185029Spjd drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP); 1031185029Spjd 1032185029Spjd drr->drr_type = DRR_BEGIN; 1033185029Spjd drr->drr_u.drr_begin = *drc->drc_drrb; 1034185029Spjd if (ra.byteswap) { 1035185029Spjd fletcher_4_incremental_byteswap(drr, 1036185029Spjd sizeof (dmu_replay_record_t), &ra.cksum); 1037185029Spjd } else { 1038185029Spjd fletcher_4_incremental_native(drr, 1039185029Spjd sizeof (dmu_replay_record_t), &ra.cksum); 1040185029Spjd } 1041185029Spjd kmem_free(drr, sizeof (dmu_replay_record_t)); 1042168404Spjd } 1043168404Spjd 1044168404Spjd if (ra.byteswap) { 1045185029Spjd struct drr_begin *drrb = drc->drc_drrb; 1046168404Spjd drrb->drr_magic = BSWAP_64(drrb->drr_magic); 1047168404Spjd drrb->drr_version = BSWAP_64(drrb->drr_version); 1048168404Spjd drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time); 1049168404Spjd drrb->drr_type = BSWAP_32(drrb->drr_type); 1050168404Spjd drrb->drr_toguid = BSWAP_64(drrb->drr_toguid); 1051168404Spjd drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid); 1052168404Spjd } 1053168404Spjd 1054185029Spjd ra.td = td; 1055185029Spjd ra.fp = fp; 1056185029Spjd ra.voff = *voffp; 1057185029Spjd ra.bufsize = 1<<20; 1058185029Spjd ra.buf = kmem_alloc(ra.bufsize, KM_SLEEP); 1059168404Spjd 1060185029Spjd /* these were verified in dmu_recv_begin */ 1061185029Spjd ASSERT(drc->drc_drrb->drr_version == DMU_BACKUP_STREAM_VERSION); 1062185029Spjd ASSERT(drc->drc_drrb->drr_type < DMU_OST_NUMTYPES); 1063168404Spjd 1064168404Spjd /* 1065168404Spjd * Open the objset we are modifying. 1066168404Spjd */ 1067185029Spjd VERIFY(dmu_objset_open_ds(drc->drc_real_ds, DMU_OST_ANY, &os) == 0); 1068168404Spjd 1069185029Spjd ASSERT(drc->drc_real_ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT); 1070168404Spjd 1071168404Spjd /* 1072168404Spjd * Read records and process them. 1073168404Spjd */ 1074185029Spjd pcksum = ra.cksum; 1075168404Spjd while (ra.err == 0 && 1076168404Spjd NULL != (drr = restore_read(&ra, sizeof (*drr)))) { 1077185029Spjd if (issig(JUSTLOOKING) && issig(FORREAL)) { 1078168404Spjd ra.err = EINTR; 1079168404Spjd goto out; 1080168404Spjd } 1081168404Spjd 1082168404Spjd if (ra.byteswap) 1083168404Spjd backup_byteswap(drr); 1084168404Spjd 1085168404Spjd switch (drr->drr_type) { 1086168404Spjd case DRR_OBJECT: 1087168404Spjd { 1088168404Spjd /* 1089168404Spjd * We need to make a copy of the record header, 1090168404Spjd * because restore_{object,write} may need to 1091168404Spjd * restore_read(), which will invalidate drr. 1092168404Spjd */ 1093168404Spjd struct drr_object drro = drr->drr_u.drr_object; 1094168404Spjd ra.err = restore_object(&ra, os, &drro); 1095168404Spjd break; 1096168404Spjd } 1097168404Spjd case DRR_FREEOBJECTS: 1098168404Spjd { 1099168404Spjd struct drr_freeobjects drrfo = 1100168404Spjd drr->drr_u.drr_freeobjects; 1101168404Spjd ra.err = restore_freeobjects(&ra, os, &drrfo); 1102168404Spjd break; 1103168404Spjd } 1104168404Spjd case DRR_WRITE: 1105168404Spjd { 1106168404Spjd struct drr_write drrw = drr->drr_u.drr_write; 1107168404Spjd ra.err = restore_write(&ra, os, &drrw); 1108168404Spjd break; 1109168404Spjd } 1110168404Spjd case DRR_FREE: 1111168404Spjd { 1112168404Spjd struct drr_free drrf = drr->drr_u.drr_free; 1113168404Spjd ra.err = restore_free(&ra, os, &drrf); 1114168404Spjd break; 1115168404Spjd } 1116168404Spjd case DRR_END: 1117168404Spjd { 1118168404Spjd struct drr_end drre = drr->drr_u.drr_end; 1119168404Spjd /* 1120168404Spjd * We compare against the *previous* checksum 1121168404Spjd * value, because the stored checksum is of 1122168404Spjd * everything before the DRR_END record. 1123168404Spjd */ 1124185029Spjd if (!ZIO_CHECKSUM_EQUAL(drre.drr_checksum, pcksum)) 1125168404Spjd ra.err = ECKSUM; 1126168404Spjd goto out; 1127168404Spjd } 1128168404Spjd default: 1129168404Spjd ra.err = EINVAL; 1130168404Spjd goto out; 1131168404Spjd } 1132185029Spjd pcksum = ra.cksum; 1133168404Spjd } 1134185029Spjd ASSERT(ra.err != 0); 1135168404Spjd 1136168404Spjdout: 1137185029Spjd dmu_objset_close(os); 1138168404Spjd 1139185029Spjd if (ra.err != 0) { 1140168404Spjd /* 1141168404Spjd * rollback or destroy what we created, so we don't 1142168404Spjd * leave it in the restoring state. 1143168404Spjd */ 1144185029Spjd txg_wait_synced(drc->drc_real_ds->ds_dir->dd_pool, 0); 1145185029Spjd dmu_recv_abort_cleanup(drc); 1146168404Spjd } 1147168404Spjd 1148168404Spjd kmem_free(ra.buf, ra.bufsize); 1149185029Spjd *voffp = ra.voff; 1150168404Spjd return (ra.err); 1151168404Spjd} 1152185029Spjd 1153185029Spjdstruct recvendsyncarg { 1154185029Spjd char *tosnap; 1155185029Spjd uint64_t creation_time; 1156185029Spjd uint64_t toguid; 1157185029Spjd}; 1158185029Spjd 1159185029Spjdstatic int 1160185029Spjdrecv_end_check(void *arg1, void *arg2, dmu_tx_t *tx) 1161185029Spjd{ 1162185029Spjd dsl_dataset_t *ds = arg1; 1163185029Spjd struct recvendsyncarg *resa = arg2; 1164185029Spjd 1165185029Spjd return (dsl_dataset_snapshot_check(ds, resa->tosnap, tx)); 1166185029Spjd} 1167185029Spjd 1168185029Spjdstatic void 1169185029Spjdrecv_end_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 1170185029Spjd{ 1171185029Spjd dsl_dataset_t *ds = arg1; 1172185029Spjd struct recvendsyncarg *resa = arg2; 1173185029Spjd 1174185029Spjd dsl_dataset_snapshot_sync(ds, resa->tosnap, cr, tx); 1175185029Spjd 1176185029Spjd /* set snapshot's creation time and guid */ 1177185029Spjd dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 1178185029Spjd ds->ds_prev->ds_phys->ds_creation_time = resa->creation_time; 1179185029Spjd ds->ds_prev->ds_phys->ds_guid = resa->toguid; 1180185029Spjd ds->ds_prev->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT; 1181185029Spjd 1182185029Spjd dmu_buf_will_dirty(ds->ds_dbuf, tx); 1183185029Spjd ds->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT; 1184185029Spjd} 1185185029Spjd 1186185029Spjdint 1187185029Spjddmu_recv_end(dmu_recv_cookie_t *drc) 1188185029Spjd{ 1189185029Spjd struct recvendsyncarg resa; 1190185029Spjd dsl_dataset_t *ds = drc->drc_logical_ds; 1191185029Spjd int err; 1192185029Spjd 1193185029Spjd /* 1194185029Spjd * XXX hack; seems the ds is still dirty and 1195185029Spjd * dsl_pool_zil_clean() expects it to have a ds_user_ptr 1196185029Spjd * (and zil), but clone_swap() can close it. 1197185029Spjd */ 1198185029Spjd txg_wait_synced(ds->ds_dir->dd_pool, 0); 1199185029Spjd 1200185029Spjd if (ds != drc->drc_real_ds) { 1201185029Spjd /* we are doing an online recv */ 1202185029Spjd if (dsl_dataset_tryown(ds, FALSE, dmu_recv_tag)) { 1203185029Spjd err = dsl_dataset_clone_swap(drc->drc_real_ds, ds, 1204185029Spjd drc->drc_force); 1205185029Spjd if (err) 1206185029Spjd dsl_dataset_disown(ds, dmu_recv_tag); 1207185029Spjd } else { 1208185029Spjd err = EBUSY; 1209185029Spjd dsl_dataset_rele(ds, dmu_recv_tag); 1210185029Spjd } 1211185029Spjd /* dsl_dataset_destroy() will disown the ds */ 1212185029Spjd (void) dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag); 1213185029Spjd if (err) 1214185029Spjd return (err); 1215185029Spjd } 1216185029Spjd 1217185029Spjd resa.creation_time = drc->drc_drrb->drr_creation_time; 1218185029Spjd resa.toguid = drc->drc_drrb->drr_toguid; 1219185029Spjd resa.tosnap = drc->drc_tosnap; 1220185029Spjd 1221185029Spjd err = dsl_sync_task_do(ds->ds_dir->dd_pool, 1222185029Spjd recv_end_check, recv_end_sync, ds, &resa, 3); 1223185029Spjd if (err) { 1224185029Spjd if (drc->drc_newfs) { 1225185029Spjd ASSERT(ds == drc->drc_real_ds); 1226185029Spjd (void) dsl_dataset_destroy(ds, dmu_recv_tag); 1227185029Spjd return (err); 1228185029Spjd } else { 1229185029Spjd (void) dsl_dataset_rollback(ds, DMU_OST_NONE); 1230185029Spjd } 1231185029Spjd } 1232185029Spjd 1233185029Spjd /* release the hold from dmu_recv_begin */ 1234185029Spjd dsl_dataset_disown(ds, dmu_recv_tag); 1235185029Spjd return (err); 1236185029Spjd} 1237