dmu_send.c revision 221263
1168404Spjd/* 2168404Spjd * CDDL HEADER START 3168404Spjd * 4168404Spjd * The contents of this file are subject to the terms of the 5168404Spjd * Common Development and Distribution License (the "License"). 6168404Spjd * You may not use this file except in compliance with the License. 7168404Spjd * 8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9168404Spjd * or http://www.opensolaris.org/os/licensing. 10168404Spjd * See the License for the specific language governing permissions 11168404Spjd * and limitations under the License. 12168404Spjd * 13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each 14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15168404Spjd * If applicable, add the following below this CDDL HEADER, with the 16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18168404Spjd * 19168404Spjd * CDDL HEADER END 20168404Spjd */ 21168404Spjd/* 22219089Spjd * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23168404Spjd */ 24221263Smm/* 25221263Smm * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 26221263Smm */ 27168404Spjd 28168404Spjd#include <sys/dmu.h> 29168404Spjd#include <sys/dmu_impl.h> 30168404Spjd#include <sys/dmu_tx.h> 31168404Spjd#include <sys/dbuf.h> 32168404Spjd#include <sys/dnode.h> 33168404Spjd#include <sys/zfs_context.h> 34168404Spjd#include <sys/dmu_objset.h> 35168404Spjd#include <sys/dmu_traverse.h> 36168404Spjd#include <sys/dsl_dataset.h> 37168404Spjd#include <sys/dsl_dir.h> 38219089Spjd#include <sys/dsl_prop.h> 39168404Spjd#include <sys/dsl_pool.h> 40168404Spjd#include <sys/dsl_synctask.h> 41168404Spjd#include <sys/zfs_ioctl.h> 42168404Spjd#include <sys/zap.h> 43168404Spjd#include <sys/zio_checksum.h> 44219089Spjd#include <sys/zfs_znode.h> 45219089Spjd#include <zfs_fletcher.h> 46219089Spjd#include <sys/avl.h> 47219089Spjd#include <sys/ddt.h> 48219089Spjd#include <sys/zfs_onexit.h> 49168404Spjd 50185029Spjdstatic char *dmu_recv_tag = "dmu_recv_tag"; 51185029Spjd 52219089Spjd/* 53219089Spjd * The list of data whose inclusion in a send stream can be pending from 54219089Spjd * one call to backup_cb to another. Multiple calls to dump_free() and 55219089Spjd * dump_freeobjects() can be aggregated into a single DRR_FREE or 56219089Spjd * DRR_FREEOBJECTS replay record. 57219089Spjd */ 58219089Spjdtypedef enum { 59219089Spjd PENDING_NONE, 60219089Spjd PENDING_FREE, 61219089Spjd PENDING_FREEOBJECTS 62219089Spjd} pendop_t; 63219089Spjd 64168404Spjdstruct backuparg { 65168404Spjd dmu_replay_record_t *drr; 66168404Spjd kthread_t *td; 67168404Spjd struct file *fp; 68185029Spjd offset_t *off; 69168404Spjd objset_t *os; 70168404Spjd zio_cksum_t zc; 71219089Spjd uint64_t toguid; 72168404Spjd int err; 73219089Spjd pendop_t pending_op; 74168404Spjd}; 75168404Spjd 76168404Spjdstatic int 77168404Spjddump_bytes(struct backuparg *ba, void *buf, int len) 78168404Spjd{ 79168404Spjd struct uio auio; 80168404Spjd struct iovec aiov; 81168404Spjd ASSERT3U(len % 8, ==, 0); 82168404Spjd 83168404Spjd fletcher_4_incremental_native(buf, len, &ba->zc); 84168404Spjd aiov.iov_base = buf; 85168404Spjd aiov.iov_len = len; 86168404Spjd auio.uio_iov = &aiov; 87168404Spjd auio.uio_iovcnt = 1; 88168404Spjd auio.uio_resid = len; 89169170Spjd auio.uio_segflg = UIO_SYSSPACE; 90168404Spjd auio.uio_rw = UIO_WRITE; 91168404Spjd auio.uio_offset = (off_t)-1; 92168404Spjd auio.uio_td = ba->td; 93168404Spjd#ifdef _KERNEL 94168404Spjd if (ba->fp->f_type == DTYPE_VNODE) 95168404Spjd bwillwrite(); 96168404Spjd ba->err = fo_write(ba->fp, &auio, ba->td->td_ucred, 0, ba->td); 97168404Spjd#else 98168404Spjd fprintf(stderr, "%s: returning EOPNOTSUPP\n", __func__); 99168404Spjd ba->err = EOPNOTSUPP; 100168404Spjd#endif 101185029Spjd *ba->off += len; 102168404Spjd return (ba->err); 103168404Spjd} 104168404Spjd 105168404Spjdstatic int 106168404Spjddump_free(struct backuparg *ba, uint64_t object, uint64_t offset, 107168404Spjd uint64_t length) 108168404Spjd{ 109219089Spjd struct drr_free *drrf = &(ba->drr->drr_u.drr_free); 110219089Spjd 111219089Spjd /* 112219089Spjd * If there is a pending op, but it's not PENDING_FREE, push it out, 113219089Spjd * since free block aggregation can only be done for blocks of the 114219089Spjd * same type (i.e., DRR_FREE records can only be aggregated with 115219089Spjd * other DRR_FREE records. DRR_FREEOBJECTS records can only be 116219089Spjd * aggregated with other DRR_FREEOBJECTS records. 117219089Spjd */ 118219089Spjd if (ba->pending_op != PENDING_NONE && ba->pending_op != PENDING_FREE) { 119219089Spjd if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) 120219089Spjd return (EINTR); 121219089Spjd ba->pending_op = PENDING_NONE; 122219089Spjd } 123219089Spjd 124219089Spjd if (ba->pending_op == PENDING_FREE) { 125219089Spjd /* 126219089Spjd * There should never be a PENDING_FREE if length is -1 127219089Spjd * (because dump_dnode is the only place where this 128219089Spjd * function is called with a -1, and only after flushing 129219089Spjd * any pending record). 130219089Spjd */ 131219089Spjd ASSERT(length != -1ULL); 132219089Spjd /* 133219089Spjd * Check to see whether this free block can be aggregated 134219089Spjd * with pending one. 135219089Spjd */ 136219089Spjd if (drrf->drr_object == object && drrf->drr_offset + 137219089Spjd drrf->drr_length == offset) { 138219089Spjd drrf->drr_length += length; 139219089Spjd return (0); 140219089Spjd } else { 141219089Spjd /* not a continuation. Push out pending record */ 142219089Spjd if (dump_bytes(ba, ba->drr, 143219089Spjd sizeof (dmu_replay_record_t)) != 0) 144219089Spjd return (EINTR); 145219089Spjd ba->pending_op = PENDING_NONE; 146219089Spjd } 147219089Spjd } 148219089Spjd /* create a FREE record and make it pending */ 149168404Spjd bzero(ba->drr, sizeof (dmu_replay_record_t)); 150168404Spjd ba->drr->drr_type = DRR_FREE; 151219089Spjd drrf->drr_object = object; 152219089Spjd drrf->drr_offset = offset; 153219089Spjd drrf->drr_length = length; 154219089Spjd drrf->drr_toguid = ba->toguid; 155219089Spjd if (length == -1ULL) { 156219089Spjd if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) 157219089Spjd return (EINTR); 158219089Spjd } else { 159219089Spjd ba->pending_op = PENDING_FREE; 160219089Spjd } 161168404Spjd 162168404Spjd return (0); 163168404Spjd} 164168404Spjd 165168404Spjdstatic int 166168404Spjddump_data(struct backuparg *ba, dmu_object_type_t type, 167219089Spjd uint64_t object, uint64_t offset, int blksz, const blkptr_t *bp, void *data) 168168404Spjd{ 169219089Spjd struct drr_write *drrw = &(ba->drr->drr_u.drr_write); 170219089Spjd 171219089Spjd 172219089Spjd /* 173219089Spjd * If there is any kind of pending aggregation (currently either 174219089Spjd * a grouping of free objects or free blocks), push it out to 175219089Spjd * the stream, since aggregation can't be done across operations 176219089Spjd * of different types. 177219089Spjd */ 178219089Spjd if (ba->pending_op != PENDING_NONE) { 179219089Spjd if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) 180219089Spjd return (EINTR); 181219089Spjd ba->pending_op = PENDING_NONE; 182219089Spjd } 183168404Spjd /* write a DATA record */ 184168404Spjd bzero(ba->drr, sizeof (dmu_replay_record_t)); 185168404Spjd ba->drr->drr_type = DRR_WRITE; 186219089Spjd drrw->drr_object = object; 187219089Spjd drrw->drr_type = type; 188219089Spjd drrw->drr_offset = offset; 189219089Spjd drrw->drr_length = blksz; 190219089Spjd drrw->drr_toguid = ba->toguid; 191219089Spjd drrw->drr_checksumtype = BP_GET_CHECKSUM(bp); 192219089Spjd if (zio_checksum_table[drrw->drr_checksumtype].ci_dedup) 193219089Spjd drrw->drr_checksumflags |= DRR_CHECKSUM_DEDUP; 194219089Spjd DDK_SET_LSIZE(&drrw->drr_key, BP_GET_LSIZE(bp)); 195219089Spjd DDK_SET_PSIZE(&drrw->drr_key, BP_GET_PSIZE(bp)); 196219089Spjd DDK_SET_COMPRESS(&drrw->drr_key, BP_GET_COMPRESS(bp)); 197219089Spjd drrw->drr_key.ddk_cksum = bp->blk_cksum; 198168404Spjd 199219089Spjd if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) 200219089Spjd return (EINTR); 201219089Spjd if (dump_bytes(ba, data, blksz) != 0) 202219089Spjd return (EINTR); 203219089Spjd return (0); 204219089Spjd} 205219089Spjd 206219089Spjdstatic int 207219089Spjddump_spill(struct backuparg *ba, uint64_t object, int blksz, void *data) 208219089Spjd{ 209219089Spjd struct drr_spill *drrs = &(ba->drr->drr_u.drr_spill); 210219089Spjd 211219089Spjd if (ba->pending_op != PENDING_NONE) { 212219089Spjd if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) 213219089Spjd return (EINTR); 214219089Spjd ba->pending_op = PENDING_NONE; 215219089Spjd } 216219089Spjd 217219089Spjd /* write a SPILL record */ 218219089Spjd bzero(ba->drr, sizeof (dmu_replay_record_t)); 219219089Spjd ba->drr->drr_type = DRR_SPILL; 220219089Spjd drrs->drr_object = object; 221219089Spjd drrs->drr_length = blksz; 222219089Spjd drrs->drr_toguid = ba->toguid; 223219089Spjd 224168404Spjd if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t))) 225168404Spjd return (EINTR); 226168404Spjd if (dump_bytes(ba, data, blksz)) 227168404Spjd return (EINTR); 228168404Spjd return (0); 229168404Spjd} 230168404Spjd 231168404Spjdstatic int 232168404Spjddump_freeobjects(struct backuparg *ba, uint64_t firstobj, uint64_t numobjs) 233168404Spjd{ 234219089Spjd struct drr_freeobjects *drrfo = &(ba->drr->drr_u.drr_freeobjects); 235219089Spjd 236219089Spjd /* 237219089Spjd * If there is a pending op, but it's not PENDING_FREEOBJECTS, 238219089Spjd * push it out, since free block aggregation can only be done for 239219089Spjd * blocks of the same type (i.e., DRR_FREE records can only be 240219089Spjd * aggregated with other DRR_FREE records. DRR_FREEOBJECTS records 241219089Spjd * can only be aggregated with other DRR_FREEOBJECTS records. 242219089Spjd */ 243219089Spjd if (ba->pending_op != PENDING_NONE && 244219089Spjd ba->pending_op != PENDING_FREEOBJECTS) { 245219089Spjd if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) 246219089Spjd return (EINTR); 247219089Spjd ba->pending_op = PENDING_NONE; 248219089Spjd } 249219089Spjd if (ba->pending_op == PENDING_FREEOBJECTS) { 250219089Spjd /* 251219089Spjd * See whether this free object array can be aggregated 252219089Spjd * with pending one 253219089Spjd */ 254219089Spjd if (drrfo->drr_firstobj + drrfo->drr_numobjs == firstobj) { 255219089Spjd drrfo->drr_numobjs += numobjs; 256219089Spjd return (0); 257219089Spjd } else { 258219089Spjd /* can't be aggregated. Push out pending record */ 259219089Spjd if (dump_bytes(ba, ba->drr, 260219089Spjd sizeof (dmu_replay_record_t)) != 0) 261219089Spjd return (EINTR); 262219089Spjd ba->pending_op = PENDING_NONE; 263219089Spjd } 264219089Spjd } 265219089Spjd 266168404Spjd /* write a FREEOBJECTS record */ 267168404Spjd bzero(ba->drr, sizeof (dmu_replay_record_t)); 268168404Spjd ba->drr->drr_type = DRR_FREEOBJECTS; 269219089Spjd drrfo->drr_firstobj = firstobj; 270219089Spjd drrfo->drr_numobjs = numobjs; 271219089Spjd drrfo->drr_toguid = ba->toguid; 272168404Spjd 273219089Spjd ba->pending_op = PENDING_FREEOBJECTS; 274219089Spjd 275168404Spjd return (0); 276168404Spjd} 277168404Spjd 278168404Spjdstatic int 279168404Spjddump_dnode(struct backuparg *ba, uint64_t object, dnode_phys_t *dnp) 280168404Spjd{ 281219089Spjd struct drr_object *drro = &(ba->drr->drr_u.drr_object); 282219089Spjd 283168404Spjd if (dnp == NULL || dnp->dn_type == DMU_OT_NONE) 284168404Spjd return (dump_freeobjects(ba, object, 1)); 285168404Spjd 286219089Spjd if (ba->pending_op != PENDING_NONE) { 287219089Spjd if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) 288219089Spjd return (EINTR); 289219089Spjd ba->pending_op = PENDING_NONE; 290219089Spjd } 291219089Spjd 292168404Spjd /* write an OBJECT record */ 293168404Spjd bzero(ba->drr, sizeof (dmu_replay_record_t)); 294168404Spjd ba->drr->drr_type = DRR_OBJECT; 295219089Spjd drro->drr_object = object; 296219089Spjd drro->drr_type = dnp->dn_type; 297219089Spjd drro->drr_bonustype = dnp->dn_bonustype; 298219089Spjd drro->drr_blksz = dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT; 299219089Spjd drro->drr_bonuslen = dnp->dn_bonuslen; 300219089Spjd drro->drr_checksumtype = dnp->dn_checksum; 301219089Spjd drro->drr_compress = dnp->dn_compress; 302219089Spjd drro->drr_toguid = ba->toguid; 303168404Spjd 304219089Spjd if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) 305168404Spjd return (EINTR); 306168404Spjd 307219089Spjd if (dump_bytes(ba, DN_BONUS(dnp), P2ROUNDUP(dnp->dn_bonuslen, 8)) != 0) 308168404Spjd return (EINTR); 309168404Spjd 310168404Spjd /* free anything past the end of the file */ 311168404Spjd if (dump_free(ba, object, (dnp->dn_maxblkid + 1) * 312168404Spjd (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT), -1ULL)) 313168404Spjd return (EINTR); 314168404Spjd if (ba->err) 315168404Spjd return (EINTR); 316168404Spjd return (0); 317168404Spjd} 318168404Spjd 319168404Spjd#define BP_SPAN(dnp, level) \ 320168404Spjd (((uint64_t)dnp->dn_datablkszsec) << (SPA_MINBLOCKSHIFT + \ 321168404Spjd (level) * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) 322168404Spjd 323219089Spjd/* ARGSUSED */ 324168404Spjdstatic int 325219089Spjdbackup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf, 326219089Spjd const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg) 327168404Spjd{ 328168404Spjd struct backuparg *ba = arg; 329168404Spjd dmu_object_type_t type = bp ? BP_GET_TYPE(bp) : DMU_OT_NONE; 330168404Spjd int err = 0; 331168404Spjd 332185029Spjd if (issig(JUSTLOOKING) && issig(FORREAL)) 333168404Spjd return (EINTR); 334168404Spjd 335219089Spjd if (zb->zb_object != DMU_META_DNODE_OBJECT && 336219089Spjd DMU_OBJECT_IS_SPECIAL(zb->zb_object)) { 337209962Smm return (0); 338219089Spjd } else if (bp == NULL && zb->zb_object == DMU_META_DNODE_OBJECT) { 339208047Smm uint64_t span = BP_SPAN(dnp, zb->zb_level); 340208047Smm uint64_t dnobj = (zb->zb_blkid * span) >> DNODE_SHIFT; 341168404Spjd err = dump_freeobjects(ba, dnobj, span >> DNODE_SHIFT); 342168404Spjd } else if (bp == NULL) { 343208047Smm uint64_t span = BP_SPAN(dnp, zb->zb_level); 344208047Smm err = dump_free(ba, zb->zb_object, zb->zb_blkid * span, span); 345208047Smm } else if (zb->zb_level > 0 || type == DMU_OT_OBJSET) { 346208047Smm return (0); 347208047Smm } else if (type == DMU_OT_DNODE) { 348208047Smm dnode_phys_t *blk; 349168404Spjd int i; 350168404Spjd int blksz = BP_GET_LSIZE(bp); 351208047Smm uint32_t aflags = ARC_WAIT; 352208047Smm arc_buf_t *abuf; 353168404Spjd 354219089Spjd if (dsl_read(NULL, spa, bp, pbuf, 355208047Smm arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ, 356208047Smm ZIO_FLAG_CANFAIL, &aflags, zb) != 0) 357208047Smm return (EIO); 358208047Smm 359208047Smm blk = abuf->b_data; 360168404Spjd for (i = 0; i < blksz >> DNODE_SHIFT; i++) { 361208047Smm uint64_t dnobj = (zb->zb_blkid << 362208047Smm (DNODE_BLOCK_SHIFT - DNODE_SHIFT)) + i; 363168404Spjd err = dump_dnode(ba, dnobj, blk+i); 364168404Spjd if (err) 365168404Spjd break; 366168404Spjd } 367208047Smm (void) arc_buf_remove_ref(abuf, &abuf); 368219089Spjd } else if (type == DMU_OT_SA) { 369208047Smm uint32_t aflags = ARC_WAIT; 370208047Smm arc_buf_t *abuf; 371168404Spjd int blksz = BP_GET_LSIZE(bp); 372168404Spjd 373208047Smm if (arc_read_nolock(NULL, spa, bp, 374208047Smm arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ, 375208047Smm ZIO_FLAG_CANFAIL, &aflags, zb) != 0) 376208047Smm return (EIO); 377168404Spjd 378219089Spjd err = dump_spill(ba, zb->zb_object, blksz, abuf->b_data); 379219089Spjd (void) arc_buf_remove_ref(abuf, &abuf); 380219089Spjd } else { /* it's a level-0 block of a regular object */ 381219089Spjd uint32_t aflags = ARC_WAIT; 382219089Spjd arc_buf_t *abuf; 383219089Spjd int blksz = BP_GET_LSIZE(bp); 384219089Spjd 385219089Spjd if (dsl_read(NULL, spa, bp, pbuf, 386219089Spjd arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ, 387219089Spjd ZIO_FLAG_CANFAIL, &aflags, zb) != 0) 388219089Spjd return (EIO); 389219089Spjd 390208047Smm err = dump_data(ba, type, zb->zb_object, zb->zb_blkid * blksz, 391219089Spjd blksz, bp, abuf->b_data); 392208047Smm (void) arc_buf_remove_ref(abuf, &abuf); 393168404Spjd } 394168404Spjd 395168404Spjd ASSERT(err == 0 || err == EINTR); 396168404Spjd return (err); 397168404Spjd} 398168404Spjd 399168404Spjdint 400185029Spjddmu_sendbackup(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, 401185029Spjd struct file *fp, offset_t *off) 402168404Spjd{ 403219089Spjd dsl_dataset_t *ds = tosnap->os_dsl_dataset; 404219089Spjd dsl_dataset_t *fromds = fromsnap ? fromsnap->os_dsl_dataset : NULL; 405168404Spjd dmu_replay_record_t *drr; 406168404Spjd struct backuparg ba; 407168404Spjd int err; 408185029Spjd uint64_t fromtxg = 0; 409168404Spjd 410168404Spjd /* tosnap must be a snapshot */ 411168404Spjd if (ds->ds_phys->ds_next_snap_obj == 0) 412168404Spjd return (EINVAL); 413168404Spjd 414168404Spjd /* fromsnap must be an earlier snapshot from the same fs as tosnap */ 415168404Spjd if (fromds && (ds->ds_dir != fromds->ds_dir || 416185029Spjd fromds->ds_phys->ds_creation_txg >= ds->ds_phys->ds_creation_txg)) 417168404Spjd return (EXDEV); 418168404Spjd 419185029Spjd if (fromorigin) { 420185029Spjd dsl_pool_t *dp = ds->ds_dir->dd_pool; 421185029Spjd 422185029Spjd if (fromsnap) 423185029Spjd return (EINVAL); 424185029Spjd 425185029Spjd if (dsl_dir_is_clone(ds->ds_dir)) { 426185029Spjd rw_enter(&dp->dp_config_rwlock, RW_READER); 427185029Spjd err = dsl_dataset_hold_obj(dp, 428185029Spjd ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &fromds); 429185029Spjd rw_exit(&dp->dp_config_rwlock); 430185029Spjd if (err) 431185029Spjd return (err); 432185029Spjd } else { 433185029Spjd fromorigin = B_FALSE; 434185029Spjd } 435185029Spjd } 436185029Spjd 437185029Spjd 438168404Spjd drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP); 439168404Spjd drr->drr_type = DRR_BEGIN; 440168404Spjd drr->drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC; 441219089Spjd DMU_SET_STREAM_HDRTYPE(drr->drr_u.drr_begin.drr_versioninfo, 442219089Spjd DMU_SUBSTREAM); 443219089Spjd 444219089Spjd#ifdef _KERNEL 445219089Spjd if (dmu_objset_type(tosnap) == DMU_OST_ZFS) { 446219089Spjd uint64_t version; 447219089Spjd if (zfs_get_zplprop(tosnap, ZFS_PROP_VERSION, &version) != 0) 448219089Spjd return (EINVAL); 449219089Spjd if (version == ZPL_VERSION_SA) { 450219089Spjd DMU_SET_FEATUREFLAGS( 451219089Spjd drr->drr_u.drr_begin.drr_versioninfo, 452219089Spjd DMU_BACKUP_FEATURE_SA_SPILL); 453219089Spjd } 454219089Spjd } 455219089Spjd#endif 456219089Spjd 457168404Spjd drr->drr_u.drr_begin.drr_creation_time = 458168404Spjd ds->ds_phys->ds_creation_time; 459219089Spjd drr->drr_u.drr_begin.drr_type = tosnap->os_phys->os_type; 460185029Spjd if (fromorigin) 461185029Spjd drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CLONE; 462168404Spjd drr->drr_u.drr_begin.drr_toguid = ds->ds_phys->ds_guid; 463185029Spjd if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) 464185029Spjd drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CI_DATA; 465185029Spjd 466168404Spjd if (fromds) 467168404Spjd drr->drr_u.drr_begin.drr_fromguid = fromds->ds_phys->ds_guid; 468168404Spjd dsl_dataset_name(ds, drr->drr_u.drr_begin.drr_toname); 469168404Spjd 470185029Spjd if (fromds) 471185029Spjd fromtxg = fromds->ds_phys->ds_creation_txg; 472185029Spjd if (fromorigin) 473185029Spjd dsl_dataset_rele(fromds, FTAG); 474185029Spjd 475168404Spjd ba.drr = drr; 476168404Spjd ba.td = curthread; 477168404Spjd ba.fp = fp; 478168404Spjd ba.os = tosnap; 479185029Spjd ba.off = off; 480219089Spjd ba.toguid = ds->ds_phys->ds_guid; 481168404Spjd ZIO_SET_CHECKSUM(&ba.zc, 0, 0, 0, 0); 482219089Spjd ba.pending_op = PENDING_NONE; 483168404Spjd 484219089Spjd if (dump_bytes(&ba, drr, sizeof (dmu_replay_record_t)) != 0) { 485168404Spjd kmem_free(drr, sizeof (dmu_replay_record_t)); 486168404Spjd return (ba.err); 487168404Spjd } 488168404Spjd 489208047Smm err = traverse_dataset(ds, fromtxg, TRAVERSE_PRE | TRAVERSE_PREFETCH, 490168404Spjd backup_cb, &ba); 491168404Spjd 492219089Spjd if (ba.pending_op != PENDING_NONE) 493219089Spjd if (dump_bytes(&ba, drr, sizeof (dmu_replay_record_t)) != 0) 494219089Spjd err = EINTR; 495219089Spjd 496168404Spjd if (err) { 497168404Spjd if (err == EINTR && ba.err) 498168404Spjd err = ba.err; 499168404Spjd kmem_free(drr, sizeof (dmu_replay_record_t)); 500168404Spjd return (err); 501168404Spjd } 502168404Spjd 503168404Spjd bzero(drr, sizeof (dmu_replay_record_t)); 504168404Spjd drr->drr_type = DRR_END; 505168404Spjd drr->drr_u.drr_end.drr_checksum = ba.zc; 506219089Spjd drr->drr_u.drr_end.drr_toguid = ba.toguid; 507168404Spjd 508219089Spjd if (dump_bytes(&ba, drr, sizeof (dmu_replay_record_t)) != 0) { 509168404Spjd kmem_free(drr, sizeof (dmu_replay_record_t)); 510168404Spjd return (ba.err); 511168404Spjd } 512168404Spjd 513168404Spjd kmem_free(drr, sizeof (dmu_replay_record_t)); 514168404Spjd 515168404Spjd return (0); 516168404Spjd} 517168404Spjd 518185029Spjdstruct recvbeginsyncarg { 519185029Spjd const char *tofs; 520185029Spjd const char *tosnap; 521185029Spjd dsl_dataset_t *origin; 522185029Spjd uint64_t fromguid; 523185029Spjd dmu_objset_type_t type; 524185029Spjd void *tag; 525185029Spjd boolean_t force; 526185029Spjd uint64_t dsflags; 527185029Spjd char clonelastname[MAXNAMELEN]; 528185029Spjd dsl_dataset_t *ds; /* the ds to recv into; returned from the syncfunc */ 529219089Spjd cred_t *cr; 530168404Spjd}; 531168404Spjd 532168404Spjd/* ARGSUSED */ 533168404Spjdstatic int 534219089Spjdrecv_new_check(void *arg1, void *arg2, dmu_tx_t *tx) 535168404Spjd{ 536185029Spjd dsl_dir_t *dd = arg1; 537185029Spjd struct recvbeginsyncarg *rbsa = arg2; 538185029Spjd objset_t *mos = dd->dd_pool->dp_meta_objset; 539185029Spjd uint64_t val; 540185029Spjd int err; 541185029Spjd 542185029Spjd err = zap_lookup(mos, dd->dd_phys->dd_child_dir_zapobj, 543185029Spjd strrchr(rbsa->tofs, '/') + 1, sizeof (uint64_t), 1, &val); 544185029Spjd 545185029Spjd if (err != ENOENT) 546185029Spjd return (err ? err : EEXIST); 547185029Spjd 548185029Spjd if (rbsa->origin) { 549185029Spjd /* make sure it's a snap in the same pool */ 550185029Spjd if (rbsa->origin->ds_dir->dd_pool != dd->dd_pool) 551185029Spjd return (EXDEV); 552219089Spjd if (!dsl_dataset_is_snapshot(rbsa->origin)) 553185029Spjd return (EINVAL); 554185029Spjd if (rbsa->origin->ds_phys->ds_guid != rbsa->fromguid) 555185029Spjd return (ENODEV); 556185029Spjd } 557185029Spjd 558185029Spjd return (0); 559185029Spjd} 560185029Spjd 561185029Spjdstatic void 562219089Spjdrecv_new_sync(void *arg1, void *arg2, dmu_tx_t *tx) 563185029Spjd{ 564185029Spjd dsl_dir_t *dd = arg1; 565185029Spjd struct recvbeginsyncarg *rbsa = arg2; 566185029Spjd uint64_t flags = DS_FLAG_INCONSISTENT | rbsa->dsflags; 567185029Spjd uint64_t dsobj; 568185029Spjd 569219089Spjd /* Create and open new dataset. */ 570185029Spjd dsobj = dsl_dataset_create_sync(dd, strrchr(rbsa->tofs, '/') + 1, 571219089Spjd rbsa->origin, flags, rbsa->cr, tx); 572219089Spjd VERIFY(0 == dsl_dataset_own_obj(dd->dd_pool, dsobj, 573219089Spjd B_TRUE, dmu_recv_tag, &rbsa->ds)); 574185029Spjd 575219089Spjd if (rbsa->origin == NULL) { 576219089Spjd (void) dmu_objset_create_impl(dd->dd_pool->dp_spa, 577219089Spjd rbsa->ds, &rbsa->ds->ds_phys->ds_bp, rbsa->type, tx); 578185029Spjd } 579185029Spjd 580219089Spjd spa_history_log_internal(LOG_DS_REPLAY_FULL_SYNC, 581219089Spjd dd->dd_pool->dp_spa, tx, "dataset = %lld", dsobj); 582185029Spjd} 583185029Spjd 584185029Spjd/* ARGSUSED */ 585185029Spjdstatic int 586219089Spjdrecv_existing_check(void *arg1, void *arg2, dmu_tx_t *tx) 587185029Spjd{ 588185029Spjd dsl_dataset_t *ds = arg1; 589185029Spjd struct recvbeginsyncarg *rbsa = arg2; 590185029Spjd int err; 591168404Spjd uint64_t val; 592168404Spjd 593185029Spjd /* must not have any changes since most recent snapshot */ 594185029Spjd if (!rbsa->force && dsl_dataset_modified_since_lastsnap(ds)) 595185029Spjd return (ETXTBSY); 596185029Spjd 597219089Spjd /* new snapshot name must not exist */ 598219089Spjd err = zap_lookup(ds->ds_dir->dd_pool->dp_meta_objset, 599219089Spjd ds->ds_phys->ds_snapnames_zapobj, rbsa->tosnap, 8, 1, &val); 600219089Spjd if (err == 0) 601219089Spjd return (EEXIST); 602219089Spjd if (err != ENOENT) 603219089Spjd return (err); 604168404Spjd 605219089Spjd if (rbsa->fromguid) { 606219089Spjd /* if incremental, most recent snapshot must match fromguid */ 607219089Spjd if (ds->ds_prev == NULL) 608219089Spjd return (ENODEV); 609168404Spjd 610219089Spjd /* 611219089Spjd * most recent snapshot must match fromguid, or there are no 612219089Spjd * changes since the fromguid one 613219089Spjd */ 614219089Spjd if (ds->ds_prev->ds_phys->ds_guid != rbsa->fromguid) { 615219089Spjd uint64_t birth = ds->ds_prev->ds_phys->ds_bp.blk_birth; 616219089Spjd uint64_t obj = ds->ds_prev->ds_phys->ds_prev_snap_obj; 617219089Spjd while (obj != 0) { 618219089Spjd dsl_dataset_t *snap; 619219089Spjd err = dsl_dataset_hold_obj(ds->ds_dir->dd_pool, 620219089Spjd obj, FTAG, &snap); 621219089Spjd if (err) 622219089Spjd return (ENODEV); 623219089Spjd if (snap->ds_phys->ds_creation_txg < birth) { 624219089Spjd dsl_dataset_rele(snap, FTAG); 625219089Spjd return (ENODEV); 626219089Spjd } 627219089Spjd if (snap->ds_phys->ds_guid == rbsa->fromguid) { 628219089Spjd dsl_dataset_rele(snap, FTAG); 629219089Spjd break; /* it's ok */ 630219089Spjd } 631219089Spjd obj = snap->ds_phys->ds_prev_snap_obj; 632219089Spjd dsl_dataset_rele(snap, FTAG); 633219089Spjd } 634219089Spjd if (obj == 0) 635219089Spjd return (ENODEV); 636219089Spjd } 637219089Spjd } else { 638219089Spjd /* if full, most recent snapshot must be $ORIGIN */ 639219089Spjd if (ds->ds_phys->ds_prev_snap_txg >= TXG_INITIAL) 640219089Spjd return (ENODEV); 641219089Spjd } 642219089Spjd 643185029Spjd /* temporary clone name must not exist */ 644185029Spjd err = zap_lookup(ds->ds_dir->dd_pool->dp_meta_objset, 645185029Spjd ds->ds_dir->dd_phys->dd_child_dir_zapobj, 646185029Spjd rbsa->clonelastname, 8, 1, &val); 647185029Spjd if (err == 0) 648168404Spjd return (EEXIST); 649185029Spjd if (err != ENOENT) 650185029Spjd return (err); 651168404Spjd 652168404Spjd return (0); 653168404Spjd} 654168404Spjd 655168404Spjd/* ARGSUSED */ 656168404Spjdstatic void 657219089Spjdrecv_existing_sync(void *arg1, void *arg2, dmu_tx_t *tx) 658168404Spjd{ 659185029Spjd dsl_dataset_t *ohds = arg1; 660185029Spjd struct recvbeginsyncarg *rbsa = arg2; 661185029Spjd dsl_pool_t *dp = ohds->ds_dir->dd_pool; 662219089Spjd dsl_dataset_t *cds; 663185029Spjd uint64_t flags = DS_FLAG_INCONSISTENT | rbsa->dsflags; 664185029Spjd uint64_t dsobj; 665168404Spjd 666219089Spjd /* create and open the temporary clone */ 667219089Spjd dsobj = dsl_dataset_create_sync(ohds->ds_dir, rbsa->clonelastname, 668219089Spjd ohds->ds_prev, flags, rbsa->cr, tx); 669219089Spjd VERIFY(0 == dsl_dataset_own_obj(dp, dsobj, B_TRUE, dmu_recv_tag, &cds)); 670168404Spjd 671219089Spjd /* 672219089Spjd * If we actually created a non-clone, we need to create the 673219089Spjd * objset in our new dataset. 674219089Spjd */ 675219089Spjd if (BP_IS_HOLE(dsl_dataset_get_blkptr(cds))) { 676219089Spjd (void) dmu_objset_create_impl(dp->dp_spa, 677219089Spjd cds, dsl_dataset_get_blkptr(cds), rbsa->type, tx); 678219089Spjd } 679168404Spjd 680185029Spjd rbsa->ds = cds; 681185029Spjd 682219089Spjd spa_history_log_internal(LOG_DS_REPLAY_INC_SYNC, 683219089Spjd dp->dp_spa, tx, "dataset = %lld", dsobj); 684168404Spjd} 685168404Spjd 686219089Spjdstatic boolean_t 687219089Spjddmu_recv_verify_features(dsl_dataset_t *ds, struct drr_begin *drrb) 688168404Spjd{ 689219089Spjd int featureflags; 690168404Spjd 691219089Spjd featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo); 692168404Spjd 693219089Spjd /* Verify pool version supports SA if SA_SPILL feature set */ 694219089Spjd return ((featureflags & DMU_BACKUP_FEATURE_SA_SPILL) && 695219089Spjd (spa_version(dsl_dataset_get_spa(ds)) < SPA_VERSION_SA)); 696168404Spjd} 697168404Spjd 698185029Spjd/* 699185029Spjd * NB: callers *MUST* call dmu_recv_stream() if dmu_recv_begin() 700185029Spjd * succeeds; otherwise we will leak the holds on the datasets. 701185029Spjd */ 702185029Spjdint 703219089Spjddmu_recv_begin(char *tofs, char *tosnap, char *top_ds, struct drr_begin *drrb, 704219089Spjd boolean_t force, objset_t *origin, dmu_recv_cookie_t *drc) 705168404Spjd{ 706185029Spjd int err = 0; 707185029Spjd boolean_t byteswap; 708219089Spjd struct recvbeginsyncarg rbsa = { 0 }; 709219089Spjd uint64_t versioninfo; 710185029Spjd int flags; 711185029Spjd dsl_dataset_t *ds; 712168404Spjd 713185029Spjd if (drrb->drr_magic == DMU_BACKUP_MAGIC) 714185029Spjd byteswap = FALSE; 715185029Spjd else if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) 716185029Spjd byteswap = TRUE; 717185029Spjd else 718185029Spjd return (EINVAL); 719168404Spjd 720185029Spjd rbsa.tofs = tofs; 721185029Spjd rbsa.tosnap = tosnap; 722219089Spjd rbsa.origin = origin ? origin->os_dsl_dataset : NULL; 723185029Spjd rbsa.fromguid = drrb->drr_fromguid; 724185029Spjd rbsa.type = drrb->drr_type; 725185029Spjd rbsa.tag = FTAG; 726185029Spjd rbsa.dsflags = 0; 727219089Spjd rbsa.cr = CRED(); 728219089Spjd versioninfo = drrb->drr_versioninfo; 729185029Spjd flags = drrb->drr_flags; 730185029Spjd 731185029Spjd if (byteswap) { 732185029Spjd rbsa.type = BSWAP_32(rbsa.type); 733185029Spjd rbsa.fromguid = BSWAP_64(rbsa.fromguid); 734219089Spjd versioninfo = BSWAP_64(versioninfo); 735185029Spjd flags = BSWAP_32(flags); 736185029Spjd } 737185029Spjd 738219089Spjd if (DMU_GET_STREAM_HDRTYPE(versioninfo) == DMU_COMPOUNDSTREAM || 739185029Spjd rbsa.type >= DMU_OST_NUMTYPES || 740185029Spjd ((flags & DRR_FLAG_CLONE) && origin == NULL)) 741168404Spjd return (EINVAL); 742168404Spjd 743185029Spjd if (flags & DRR_FLAG_CI_DATA) 744185029Spjd rbsa.dsflags = DS_FLAG_CI_DATASET; 745168404Spjd 746185029Spjd bzero(drc, sizeof (dmu_recv_cookie_t)); 747185029Spjd drc->drc_drrb = drrb; 748185029Spjd drc->drc_tosnap = tosnap; 749219089Spjd drc->drc_top_ds = top_ds; 750185029Spjd drc->drc_force = force; 751168404Spjd 752185029Spjd /* 753185029Spjd * Process the begin in syncing context. 754185029Spjd */ 755168404Spjd 756219089Spjd /* open the dataset we are logically receiving into */ 757219089Spjd err = dsl_dataset_hold(tofs, dmu_recv_tag, &ds); 758219089Spjd if (err == 0) { 759219089Spjd if (dmu_recv_verify_features(ds, drrb)) { 760219089Spjd dsl_dataset_rele(ds, dmu_recv_tag); 761219089Spjd return (ENOTSUP); 762185029Spjd } 763219089Spjd /* target fs already exists; recv into temp clone */ 764219089Spjd 765219089Spjd /* Can't recv a clone into an existing fs */ 766219089Spjd if (flags & DRR_FLAG_CLONE) { 767219089Spjd dsl_dataset_rele(ds, dmu_recv_tag); 768219089Spjd return (EINVAL); 769185029Spjd } 770168404Spjd 771219089Spjd /* must not have an incremental recv already in progress */ 772219089Spjd if (!mutex_tryenter(&ds->ds_recvlock)) { 773219089Spjd dsl_dataset_rele(ds, dmu_recv_tag); 774219089Spjd return (EBUSY); 775219089Spjd } 776219089Spjd 777185029Spjd /* tmp clone name is: tofs/%tosnap" */ 778185029Spjd (void) snprintf(rbsa.clonelastname, sizeof (rbsa.clonelastname), 779185029Spjd "%%%s", tosnap); 780185029Spjd rbsa.force = force; 781185029Spjd err = dsl_sync_task_do(ds->ds_dir->dd_pool, 782219089Spjd recv_existing_check, recv_existing_sync, ds, &rbsa, 5); 783185029Spjd if (err) { 784219089Spjd mutex_exit(&ds->ds_recvlock); 785185029Spjd dsl_dataset_rele(ds, dmu_recv_tag); 786185029Spjd return (err); 787185029Spjd } 788185029Spjd drc->drc_logical_ds = ds; 789185029Spjd drc->drc_real_ds = rbsa.ds; 790219089Spjd } else if (err == ENOENT) { 791219089Spjd /* target fs does not exist; must be a full backup or clone */ 792219089Spjd char *cp; 793168404Spjd 794219089Spjd /* 795219089Spjd * If it's a non-clone incremental, we are missing the 796219089Spjd * target fs, so fail the recv. 797219089Spjd */ 798219089Spjd if (rbsa.fromguid && !(flags & DRR_FLAG_CLONE)) 799219089Spjd return (ENOENT); 800219089Spjd 801219089Spjd /* Open the parent of tofs */ 802219089Spjd cp = strrchr(tofs, '/'); 803219089Spjd *cp = '\0'; 804219089Spjd err = dsl_dataset_hold(tofs, FTAG, &ds); 805219089Spjd *cp = '/'; 806185029Spjd if (err) 807185029Spjd return (err); 808185029Spjd 809219089Spjd if (dmu_recv_verify_features(ds, drrb)) { 810219089Spjd dsl_dataset_rele(ds, FTAG); 811219089Spjd return (ENOTSUP); 812219089Spjd } 813185029Spjd 814219089Spjd err = dsl_sync_task_do(ds->ds_dir->dd_pool, 815219089Spjd recv_new_check, recv_new_sync, ds->ds_dir, &rbsa, 5); 816219089Spjd dsl_dataset_rele(ds, FTAG); 817185029Spjd if (err) 818185029Spjd return (err); 819185029Spjd drc->drc_logical_ds = drc->drc_real_ds = rbsa.ds; 820185029Spjd drc->drc_newfs = B_TRUE; 821185029Spjd } 822185029Spjd 823219089Spjd return (err); 824168404Spjd} 825168404Spjd 826185029Spjdstruct restorearg { 827185029Spjd int err; 828185029Spjd int byteswap; 829185029Spjd kthread_t *td; 830185029Spjd struct file *fp; 831185029Spjd char *buf; 832185029Spjd uint64_t voff; 833185029Spjd int bufsize; /* amount of memory allocated for buf */ 834185029Spjd zio_cksum_t cksum; 835219089Spjd avl_tree_t *guid_to_ds_map; 836185029Spjd}; 837185029Spjd 838219089Spjdtypedef struct guid_map_entry { 839219089Spjd uint64_t guid; 840219089Spjd dsl_dataset_t *gme_ds; 841219089Spjd avl_node_t avlnode; 842219089Spjd} guid_map_entry_t; 843219089Spjd 844168404Spjdstatic int 845219089Spjdguid_compare(const void *arg1, const void *arg2) 846168404Spjd{ 847219089Spjd const guid_map_entry_t *gmep1 = arg1; 848219089Spjd const guid_map_entry_t *gmep2 = arg2; 849219089Spjd 850219089Spjd if (gmep1->guid < gmep2->guid) 851219089Spjd return (-1); 852219089Spjd else if (gmep1->guid > gmep2->guid) 853219089Spjd return (1); 854219089Spjd return (0); 855219089Spjd} 856219089Spjd 857219089Spjdstatic void 858219089Spjdfree_guid_map_onexit(void *arg) 859219089Spjd{ 860219089Spjd avl_tree_t *ca = arg; 861219089Spjd void *cookie = NULL; 862219089Spjd guid_map_entry_t *gmep; 863219089Spjd 864219089Spjd while ((gmep = avl_destroy_nodes(ca, &cookie)) != NULL) { 865219089Spjd dsl_dataset_rele(gmep->gme_ds, ca); 866219089Spjd kmem_free(gmep, sizeof (guid_map_entry_t)); 867219089Spjd } 868219089Spjd avl_destroy(ca); 869219089Spjd kmem_free(ca, sizeof (avl_tree_t)); 870219089Spjd} 871219089Spjd 872219089Spjdstatic int 873219089Spjdrestore_bytes(struct restorearg *ra, void *buf, int len, off_t off, ssize_t *resid) 874219089Spjd{ 875168404Spjd struct uio auio; 876168404Spjd struct iovec aiov; 877168404Spjd int error; 878168404Spjd 879168404Spjd aiov.iov_base = buf; 880168404Spjd aiov.iov_len = len; 881168404Spjd auio.uio_iov = &aiov; 882168404Spjd auio.uio_iovcnt = 1; 883168404Spjd auio.uio_resid = len; 884169170Spjd auio.uio_segflg = UIO_SYSSPACE; 885168404Spjd auio.uio_rw = UIO_READ; 886168404Spjd auio.uio_offset = off; 887168404Spjd auio.uio_td = ra->td; 888168404Spjd#ifdef _KERNEL 889168404Spjd error = fo_read(ra->fp, &auio, ra->td->td_ucred, FOF_OFFSET, ra->td); 890168404Spjd#else 891168404Spjd fprintf(stderr, "%s: returning EOPNOTSUPP\n", __func__); 892168404Spjd error = EOPNOTSUPP; 893168404Spjd#endif 894168404Spjd *resid = auio.uio_resid; 895168404Spjd return (error); 896168404Spjd} 897168404Spjd 898168404Spjdstatic void * 899168404Spjdrestore_read(struct restorearg *ra, int len) 900168404Spjd{ 901168404Spjd void *rv; 902185029Spjd int done = 0; 903168404Spjd 904168404Spjd /* some things will require 8-byte alignment, so everything must */ 905168404Spjd ASSERT3U(len % 8, ==, 0); 906168404Spjd 907185029Spjd while (done < len) { 908219089Spjd ssize_t resid; 909168404Spjd 910185029Spjd ra->err = restore_bytes(ra, (caddr_t)ra->buf + done, 911185029Spjd len - done, ra->voff, &resid); 912168404Spjd 913185029Spjd if (resid == len - done) 914168404Spjd ra->err = EINVAL; 915185029Spjd ra->voff += len - done - resid; 916185029Spjd done = len - resid; 917168404Spjd if (ra->err) 918168404Spjd return (NULL); 919168404Spjd } 920168404Spjd 921185029Spjd ASSERT3U(done, ==, len); 922185029Spjd rv = ra->buf; 923168404Spjd if (ra->byteswap) 924185029Spjd fletcher_4_incremental_byteswap(rv, len, &ra->cksum); 925168404Spjd else 926185029Spjd fletcher_4_incremental_native(rv, len, &ra->cksum); 927168404Spjd return (rv); 928168404Spjd} 929168404Spjd 930168404Spjdstatic void 931168404Spjdbackup_byteswap(dmu_replay_record_t *drr) 932168404Spjd{ 933168404Spjd#define DO64(X) (drr->drr_u.X = BSWAP_64(drr->drr_u.X)) 934168404Spjd#define DO32(X) (drr->drr_u.X = BSWAP_32(drr->drr_u.X)) 935168404Spjd drr->drr_type = BSWAP_32(drr->drr_type); 936185029Spjd drr->drr_payloadlen = BSWAP_32(drr->drr_payloadlen); 937168404Spjd switch (drr->drr_type) { 938168404Spjd case DRR_BEGIN: 939168404Spjd DO64(drr_begin.drr_magic); 940219089Spjd DO64(drr_begin.drr_versioninfo); 941168404Spjd DO64(drr_begin.drr_creation_time); 942168404Spjd DO32(drr_begin.drr_type); 943185029Spjd DO32(drr_begin.drr_flags); 944168404Spjd DO64(drr_begin.drr_toguid); 945168404Spjd DO64(drr_begin.drr_fromguid); 946168404Spjd break; 947168404Spjd case DRR_OBJECT: 948168404Spjd DO64(drr_object.drr_object); 949168404Spjd /* DO64(drr_object.drr_allocation_txg); */ 950168404Spjd DO32(drr_object.drr_type); 951168404Spjd DO32(drr_object.drr_bonustype); 952168404Spjd DO32(drr_object.drr_blksz); 953168404Spjd DO32(drr_object.drr_bonuslen); 954219089Spjd DO64(drr_object.drr_toguid); 955168404Spjd break; 956168404Spjd case DRR_FREEOBJECTS: 957168404Spjd DO64(drr_freeobjects.drr_firstobj); 958168404Spjd DO64(drr_freeobjects.drr_numobjs); 959219089Spjd DO64(drr_freeobjects.drr_toguid); 960168404Spjd break; 961168404Spjd case DRR_WRITE: 962168404Spjd DO64(drr_write.drr_object); 963168404Spjd DO32(drr_write.drr_type); 964168404Spjd DO64(drr_write.drr_offset); 965168404Spjd DO64(drr_write.drr_length); 966219089Spjd DO64(drr_write.drr_toguid); 967219089Spjd DO64(drr_write.drr_key.ddk_cksum.zc_word[0]); 968219089Spjd DO64(drr_write.drr_key.ddk_cksum.zc_word[1]); 969219089Spjd DO64(drr_write.drr_key.ddk_cksum.zc_word[2]); 970219089Spjd DO64(drr_write.drr_key.ddk_cksum.zc_word[3]); 971219089Spjd DO64(drr_write.drr_key.ddk_prop); 972168404Spjd break; 973219089Spjd case DRR_WRITE_BYREF: 974219089Spjd DO64(drr_write_byref.drr_object); 975219089Spjd DO64(drr_write_byref.drr_offset); 976219089Spjd DO64(drr_write_byref.drr_length); 977219089Spjd DO64(drr_write_byref.drr_toguid); 978219089Spjd DO64(drr_write_byref.drr_refguid); 979219089Spjd DO64(drr_write_byref.drr_refobject); 980219089Spjd DO64(drr_write_byref.drr_refoffset); 981219089Spjd DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[0]); 982219089Spjd DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[1]); 983219089Spjd DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[2]); 984219089Spjd DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[3]); 985219089Spjd DO64(drr_write_byref.drr_key.ddk_prop); 986219089Spjd break; 987168404Spjd case DRR_FREE: 988168404Spjd DO64(drr_free.drr_object); 989168404Spjd DO64(drr_free.drr_offset); 990168404Spjd DO64(drr_free.drr_length); 991219089Spjd DO64(drr_free.drr_toguid); 992168404Spjd break; 993219089Spjd case DRR_SPILL: 994219089Spjd DO64(drr_spill.drr_object); 995219089Spjd DO64(drr_spill.drr_length); 996219089Spjd DO64(drr_spill.drr_toguid); 997219089Spjd break; 998168404Spjd case DRR_END: 999168404Spjd DO64(drr_end.drr_checksum.zc_word[0]); 1000168404Spjd DO64(drr_end.drr_checksum.zc_word[1]); 1001168404Spjd DO64(drr_end.drr_checksum.zc_word[2]); 1002168404Spjd DO64(drr_end.drr_checksum.zc_word[3]); 1003219089Spjd DO64(drr_end.drr_toguid); 1004168404Spjd break; 1005168404Spjd } 1006168404Spjd#undef DO64 1007168404Spjd#undef DO32 1008168404Spjd} 1009168404Spjd 1010168404Spjdstatic int 1011168404Spjdrestore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro) 1012168404Spjd{ 1013168404Spjd int err; 1014168404Spjd dmu_tx_t *tx; 1015200727Sdelphij void *data = NULL; 1016168404Spjd 1017168404Spjd if (drro->drr_type == DMU_OT_NONE || 1018168404Spjd drro->drr_type >= DMU_OT_NUMTYPES || 1019168404Spjd drro->drr_bonustype >= DMU_OT_NUMTYPES || 1020219089Spjd drro->drr_checksumtype >= ZIO_CHECKSUM_FUNCTIONS || 1021168404Spjd drro->drr_compress >= ZIO_COMPRESS_FUNCTIONS || 1022168404Spjd P2PHASE(drro->drr_blksz, SPA_MINBLOCKSIZE) || 1023168404Spjd drro->drr_blksz < SPA_MINBLOCKSIZE || 1024168404Spjd drro->drr_blksz > SPA_MAXBLOCKSIZE || 1025168404Spjd drro->drr_bonuslen > DN_MAX_BONUSLEN) { 1026168404Spjd return (EINVAL); 1027168404Spjd } 1028168404Spjd 1029200726Sdelphij err = dmu_object_info(os, drro->drr_object, NULL); 1030168404Spjd 1031200726Sdelphij if (err != 0 && err != ENOENT) 1032200726Sdelphij return (EINVAL); 1033200726Sdelphij 1034201756Sdelphij if (drro->drr_bonuslen) { 1035201756Sdelphij data = restore_read(ra, P2ROUNDUP(drro->drr_bonuslen, 8)); 1036201756Sdelphij if (ra->err) 1037201756Sdelphij return (ra->err); 1038201756Sdelphij } 1039201756Sdelphij 1040168404Spjd if (err == ENOENT) { 1041168404Spjd /* currently free, want to be allocated */ 1042200726Sdelphij tx = dmu_tx_create(os); 1043168404Spjd dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); 1044168404Spjd err = dmu_tx_assign(tx, TXG_WAIT); 1045168404Spjd if (err) { 1046168404Spjd dmu_tx_abort(tx); 1047168404Spjd return (err); 1048168404Spjd } 1049168404Spjd err = dmu_object_claim(os, drro->drr_object, 1050168404Spjd drro->drr_type, drro->drr_blksz, 1051168404Spjd drro->drr_bonustype, drro->drr_bonuslen, tx); 1052200726Sdelphij dmu_tx_commit(tx); 1053168404Spjd } else { 1054168404Spjd /* currently allocated, want to be allocated */ 1055168404Spjd err = dmu_object_reclaim(os, drro->drr_object, 1056168404Spjd drro->drr_type, drro->drr_blksz, 1057200726Sdelphij drro->drr_bonustype, drro->drr_bonuslen); 1058168404Spjd } 1059219089Spjd if (err) { 1060200726Sdelphij return (EINVAL); 1061219089Spjd } 1062200726Sdelphij 1063200726Sdelphij tx = dmu_tx_create(os); 1064200726Sdelphij dmu_tx_hold_bonus(tx, drro->drr_object); 1065200726Sdelphij err = dmu_tx_assign(tx, TXG_WAIT); 1066168404Spjd if (err) { 1067200726Sdelphij dmu_tx_abort(tx); 1068200726Sdelphij return (err); 1069168404Spjd } 1070168404Spjd 1071219089Spjd dmu_object_set_checksum(os, drro->drr_object, drro->drr_checksumtype, 1072219089Spjd tx); 1073168404Spjd dmu_object_set_compress(os, drro->drr_object, drro->drr_compress, tx); 1074168404Spjd 1075200727Sdelphij if (data != NULL) { 1076168404Spjd dmu_buf_t *db; 1077200727Sdelphij 1078168404Spjd VERIFY(0 == dmu_bonus_hold(os, drro->drr_object, FTAG, &db)); 1079168404Spjd dmu_buf_will_dirty(db, tx); 1080168404Spjd 1081185029Spjd ASSERT3U(db->db_size, >=, drro->drr_bonuslen); 1082185029Spjd bcopy(data, db->db_data, drro->drr_bonuslen); 1083168404Spjd if (ra->byteswap) { 1084168404Spjd dmu_ot[drro->drr_bonustype].ot_byteswap(db->db_data, 1085168404Spjd drro->drr_bonuslen); 1086168404Spjd } 1087168404Spjd dmu_buf_rele(db, FTAG); 1088168404Spjd } 1089168404Spjd dmu_tx_commit(tx); 1090168404Spjd return (0); 1091168404Spjd} 1092168404Spjd 1093168404Spjd/* ARGSUSED */ 1094168404Spjdstatic int 1095168404Spjdrestore_freeobjects(struct restorearg *ra, objset_t *os, 1096168404Spjd struct drr_freeobjects *drrfo) 1097168404Spjd{ 1098168404Spjd uint64_t obj; 1099168404Spjd 1100168404Spjd if (drrfo->drr_firstobj + drrfo->drr_numobjs < drrfo->drr_firstobj) 1101168404Spjd return (EINVAL); 1102168404Spjd 1103168404Spjd for (obj = drrfo->drr_firstobj; 1104168404Spjd obj < drrfo->drr_firstobj + drrfo->drr_numobjs; 1105168404Spjd (void) dmu_object_next(os, &obj, FALSE, 0)) { 1106168404Spjd int err; 1107168404Spjd 1108168404Spjd if (dmu_object_info(os, obj, NULL) != 0) 1109168404Spjd continue; 1110168404Spjd 1111185029Spjd err = dmu_free_object(os, obj); 1112185029Spjd if (err) 1113168404Spjd return (err); 1114168404Spjd } 1115168404Spjd return (0); 1116168404Spjd} 1117168404Spjd 1118168404Spjdstatic int 1119168404Spjdrestore_write(struct restorearg *ra, objset_t *os, 1120168404Spjd struct drr_write *drrw) 1121168404Spjd{ 1122168404Spjd dmu_tx_t *tx; 1123168404Spjd void *data; 1124168404Spjd int err; 1125168404Spjd 1126168404Spjd if (drrw->drr_offset + drrw->drr_length < drrw->drr_offset || 1127168404Spjd drrw->drr_type >= DMU_OT_NUMTYPES) 1128168404Spjd return (EINVAL); 1129168404Spjd 1130168404Spjd data = restore_read(ra, drrw->drr_length); 1131168404Spjd if (data == NULL) 1132168404Spjd return (ra->err); 1133168404Spjd 1134168404Spjd if (dmu_object_info(os, drrw->drr_object, NULL) != 0) 1135168404Spjd return (EINVAL); 1136168404Spjd 1137168404Spjd tx = dmu_tx_create(os); 1138168404Spjd 1139168404Spjd dmu_tx_hold_write(tx, drrw->drr_object, 1140168404Spjd drrw->drr_offset, drrw->drr_length); 1141168404Spjd err = dmu_tx_assign(tx, TXG_WAIT); 1142168404Spjd if (err) { 1143168404Spjd dmu_tx_abort(tx); 1144168404Spjd return (err); 1145168404Spjd } 1146168404Spjd if (ra->byteswap) 1147168404Spjd dmu_ot[drrw->drr_type].ot_byteswap(data, drrw->drr_length); 1148168404Spjd dmu_write(os, drrw->drr_object, 1149168404Spjd drrw->drr_offset, drrw->drr_length, data, tx); 1150168404Spjd dmu_tx_commit(tx); 1151168404Spjd return (0); 1152168404Spjd} 1153168404Spjd 1154219089Spjd/* 1155219089Spjd * Handle a DRR_WRITE_BYREF record. This record is used in dedup'ed 1156219089Spjd * streams to refer to a copy of the data that is already on the 1157219089Spjd * system because it came in earlier in the stream. This function 1158219089Spjd * finds the earlier copy of the data, and uses that copy instead of 1159219089Spjd * data from the stream to fulfill this write. 1160219089Spjd */ 1161219089Spjdstatic int 1162219089Spjdrestore_write_byref(struct restorearg *ra, objset_t *os, 1163219089Spjd struct drr_write_byref *drrwbr) 1164219089Spjd{ 1165219089Spjd dmu_tx_t *tx; 1166219089Spjd int err; 1167219089Spjd guid_map_entry_t gmesrch; 1168219089Spjd guid_map_entry_t *gmep; 1169219089Spjd avl_index_t where; 1170219089Spjd objset_t *ref_os = NULL; 1171219089Spjd dmu_buf_t *dbp; 1172219089Spjd 1173219089Spjd if (drrwbr->drr_offset + drrwbr->drr_length < drrwbr->drr_offset) 1174219089Spjd return (EINVAL); 1175219089Spjd 1176219089Spjd /* 1177219089Spjd * If the GUID of the referenced dataset is different from the 1178219089Spjd * GUID of the target dataset, find the referenced dataset. 1179219089Spjd */ 1180219089Spjd if (drrwbr->drr_toguid != drrwbr->drr_refguid) { 1181219089Spjd gmesrch.guid = drrwbr->drr_refguid; 1182219089Spjd if ((gmep = avl_find(ra->guid_to_ds_map, &gmesrch, 1183219089Spjd &where)) == NULL) { 1184219089Spjd return (EINVAL); 1185219089Spjd } 1186219089Spjd if (dmu_objset_from_ds(gmep->gme_ds, &ref_os)) 1187219089Spjd return (EINVAL); 1188219089Spjd } else { 1189219089Spjd ref_os = os; 1190219089Spjd } 1191219089Spjd 1192219089Spjd if (err = dmu_buf_hold(ref_os, drrwbr->drr_refobject, 1193219089Spjd drrwbr->drr_refoffset, FTAG, &dbp, DMU_READ_PREFETCH)) 1194219089Spjd return (err); 1195219089Spjd 1196219089Spjd tx = dmu_tx_create(os); 1197219089Spjd 1198219089Spjd dmu_tx_hold_write(tx, drrwbr->drr_object, 1199219089Spjd drrwbr->drr_offset, drrwbr->drr_length); 1200219089Spjd err = dmu_tx_assign(tx, TXG_WAIT); 1201219089Spjd if (err) { 1202219089Spjd dmu_tx_abort(tx); 1203219089Spjd return (err); 1204219089Spjd } 1205219089Spjd dmu_write(os, drrwbr->drr_object, 1206219089Spjd drrwbr->drr_offset, drrwbr->drr_length, dbp->db_data, tx); 1207219089Spjd dmu_buf_rele(dbp, FTAG); 1208219089Spjd dmu_tx_commit(tx); 1209219089Spjd return (0); 1210219089Spjd} 1211219089Spjd 1212219089Spjdstatic int 1213219089Spjdrestore_spill(struct restorearg *ra, objset_t *os, struct drr_spill *drrs) 1214219089Spjd{ 1215219089Spjd dmu_tx_t *tx; 1216219089Spjd void *data; 1217219089Spjd dmu_buf_t *db, *db_spill; 1218219089Spjd int err; 1219219089Spjd 1220219089Spjd if (drrs->drr_length < SPA_MINBLOCKSIZE || 1221219089Spjd drrs->drr_length > SPA_MAXBLOCKSIZE) 1222219089Spjd return (EINVAL); 1223219089Spjd 1224219089Spjd data = restore_read(ra, drrs->drr_length); 1225219089Spjd if (data == NULL) 1226219089Spjd return (ra->err); 1227219089Spjd 1228219089Spjd if (dmu_object_info(os, drrs->drr_object, NULL) != 0) 1229219089Spjd return (EINVAL); 1230219089Spjd 1231219089Spjd VERIFY(0 == dmu_bonus_hold(os, drrs->drr_object, FTAG, &db)); 1232219089Spjd if ((err = dmu_spill_hold_by_bonus(db, FTAG, &db_spill)) != 0) { 1233219089Spjd dmu_buf_rele(db, FTAG); 1234219089Spjd return (err); 1235219089Spjd } 1236219089Spjd 1237219089Spjd tx = dmu_tx_create(os); 1238219089Spjd 1239219089Spjd dmu_tx_hold_spill(tx, db->db_object); 1240219089Spjd 1241219089Spjd err = dmu_tx_assign(tx, TXG_WAIT); 1242219089Spjd if (err) { 1243219089Spjd dmu_buf_rele(db, FTAG); 1244219089Spjd dmu_buf_rele(db_spill, FTAG); 1245219089Spjd dmu_tx_abort(tx); 1246219089Spjd return (err); 1247219089Spjd } 1248219089Spjd dmu_buf_will_dirty(db_spill, tx); 1249219089Spjd 1250219089Spjd if (db_spill->db_size < drrs->drr_length) 1251219089Spjd VERIFY(0 == dbuf_spill_set_blksz(db_spill, 1252219089Spjd drrs->drr_length, tx)); 1253219089Spjd bcopy(data, db_spill->db_data, drrs->drr_length); 1254219089Spjd 1255219089Spjd dmu_buf_rele(db, FTAG); 1256219089Spjd dmu_buf_rele(db_spill, FTAG); 1257219089Spjd 1258219089Spjd dmu_tx_commit(tx); 1259219089Spjd return (0); 1260219089Spjd} 1261219089Spjd 1262168404Spjd/* ARGSUSED */ 1263168404Spjdstatic int 1264168404Spjdrestore_free(struct restorearg *ra, objset_t *os, 1265168404Spjd struct drr_free *drrf) 1266168404Spjd{ 1267168404Spjd int err; 1268168404Spjd 1269168404Spjd if (drrf->drr_length != -1ULL && 1270168404Spjd drrf->drr_offset + drrf->drr_length < drrf->drr_offset) 1271168404Spjd return (EINVAL); 1272168404Spjd 1273168404Spjd if (dmu_object_info(os, drrf->drr_object, NULL) != 0) 1274168404Spjd return (EINVAL); 1275168404Spjd 1276185029Spjd err = dmu_free_long_range(os, drrf->drr_object, 1277168404Spjd drrf->drr_offset, drrf->drr_length); 1278168404Spjd return (err); 1279168404Spjd} 1280168404Spjd 1281185029Spjd/* 1282185029Spjd * NB: callers *must* call dmu_recv_end() if this succeeds. 1283185029Spjd */ 1284168404Spjdint 1285219089Spjddmu_recv_stream(dmu_recv_cookie_t *drc, struct file *fp, offset_t *voffp, 1286219089Spjd int cleanup_fd, uint64_t *action_handlep) 1287168404Spjd{ 1288185029Spjd struct restorearg ra = { 0 }; 1289168404Spjd dmu_replay_record_t *drr; 1290185029Spjd objset_t *os; 1291185029Spjd zio_cksum_t pcksum; 1292219089Spjd int featureflags; 1293168404Spjd 1294185029Spjd if (drc->drc_drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) 1295168404Spjd ra.byteswap = TRUE; 1296168404Spjd 1297185029Spjd { 1298185029Spjd /* compute checksum of drr_begin record */ 1299185029Spjd dmu_replay_record_t *drr; 1300185029Spjd drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP); 1301185029Spjd 1302185029Spjd drr->drr_type = DRR_BEGIN; 1303185029Spjd drr->drr_u.drr_begin = *drc->drc_drrb; 1304185029Spjd if (ra.byteswap) { 1305185029Spjd fletcher_4_incremental_byteswap(drr, 1306185029Spjd sizeof (dmu_replay_record_t), &ra.cksum); 1307185029Spjd } else { 1308185029Spjd fletcher_4_incremental_native(drr, 1309185029Spjd sizeof (dmu_replay_record_t), &ra.cksum); 1310185029Spjd } 1311185029Spjd kmem_free(drr, sizeof (dmu_replay_record_t)); 1312168404Spjd } 1313168404Spjd 1314168404Spjd if (ra.byteswap) { 1315185029Spjd struct drr_begin *drrb = drc->drc_drrb; 1316168404Spjd drrb->drr_magic = BSWAP_64(drrb->drr_magic); 1317219089Spjd drrb->drr_versioninfo = BSWAP_64(drrb->drr_versioninfo); 1318168404Spjd drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time); 1319168404Spjd drrb->drr_type = BSWAP_32(drrb->drr_type); 1320168404Spjd drrb->drr_toguid = BSWAP_64(drrb->drr_toguid); 1321168404Spjd drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid); 1322168404Spjd } 1323168404Spjd 1324219089Spjd ra.td = curthread; 1325185029Spjd ra.fp = fp; 1326185029Spjd ra.voff = *voffp; 1327185029Spjd ra.bufsize = 1<<20; 1328185029Spjd ra.buf = kmem_alloc(ra.bufsize, KM_SLEEP); 1329168404Spjd 1330185029Spjd /* these were verified in dmu_recv_begin */ 1331219089Spjd ASSERT(DMU_GET_STREAM_HDRTYPE(drc->drc_drrb->drr_versioninfo) == 1332219089Spjd DMU_SUBSTREAM); 1333185029Spjd ASSERT(drc->drc_drrb->drr_type < DMU_OST_NUMTYPES); 1334168404Spjd 1335168404Spjd /* 1336168404Spjd * Open the objset we are modifying. 1337168404Spjd */ 1338219089Spjd VERIFY(dmu_objset_from_ds(drc->drc_real_ds, &os) == 0); 1339168404Spjd 1340185029Spjd ASSERT(drc->drc_real_ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT); 1341168404Spjd 1342219089Spjd featureflags = DMU_GET_FEATUREFLAGS(drc->drc_drrb->drr_versioninfo); 1343219089Spjd 1344219089Spjd /* if this stream is dedup'ed, set up the avl tree for guid mapping */ 1345219089Spjd if (featureflags & DMU_BACKUP_FEATURE_DEDUP) { 1346219089Spjd minor_t minor; 1347219089Spjd 1348219089Spjd if (cleanup_fd == -1) { 1349219089Spjd ra.err = EBADF; 1350219089Spjd goto out; 1351219089Spjd } 1352219089Spjd ra.err = zfs_onexit_fd_hold(cleanup_fd, &minor); 1353219089Spjd if (ra.err) { 1354219089Spjd cleanup_fd = -1; 1355219089Spjd goto out; 1356219089Spjd } 1357219089Spjd 1358219089Spjd if (*action_handlep == 0) { 1359219089Spjd ra.guid_to_ds_map = 1360219089Spjd kmem_alloc(sizeof (avl_tree_t), KM_SLEEP); 1361219089Spjd avl_create(ra.guid_to_ds_map, guid_compare, 1362219089Spjd sizeof (guid_map_entry_t), 1363219089Spjd offsetof(guid_map_entry_t, avlnode)); 1364219089Spjd ra.err = zfs_onexit_add_cb(minor, 1365219089Spjd free_guid_map_onexit, ra.guid_to_ds_map, 1366219089Spjd action_handlep); 1367219089Spjd if (ra.err) 1368219089Spjd goto out; 1369219089Spjd } else { 1370219089Spjd ra.err = zfs_onexit_cb_data(minor, *action_handlep, 1371219089Spjd (void **)&ra.guid_to_ds_map); 1372219089Spjd if (ra.err) 1373219089Spjd goto out; 1374219089Spjd } 1375221263Smm 1376221263Smm drc->drc_guid_to_ds_map = ra.guid_to_ds_map; 1377219089Spjd } 1378219089Spjd 1379168404Spjd /* 1380168404Spjd * Read records and process them. 1381168404Spjd */ 1382185029Spjd pcksum = ra.cksum; 1383168404Spjd while (ra.err == 0 && 1384168404Spjd NULL != (drr = restore_read(&ra, sizeof (*drr)))) { 1385185029Spjd if (issig(JUSTLOOKING) && issig(FORREAL)) { 1386168404Spjd ra.err = EINTR; 1387168404Spjd goto out; 1388168404Spjd } 1389168404Spjd 1390168404Spjd if (ra.byteswap) 1391168404Spjd backup_byteswap(drr); 1392168404Spjd 1393168404Spjd switch (drr->drr_type) { 1394168404Spjd case DRR_OBJECT: 1395168404Spjd { 1396168404Spjd /* 1397168404Spjd * We need to make a copy of the record header, 1398168404Spjd * because restore_{object,write} may need to 1399168404Spjd * restore_read(), which will invalidate drr. 1400168404Spjd */ 1401168404Spjd struct drr_object drro = drr->drr_u.drr_object; 1402168404Spjd ra.err = restore_object(&ra, os, &drro); 1403168404Spjd break; 1404168404Spjd } 1405168404Spjd case DRR_FREEOBJECTS: 1406168404Spjd { 1407168404Spjd struct drr_freeobjects drrfo = 1408168404Spjd drr->drr_u.drr_freeobjects; 1409168404Spjd ra.err = restore_freeobjects(&ra, os, &drrfo); 1410168404Spjd break; 1411168404Spjd } 1412168404Spjd case DRR_WRITE: 1413168404Spjd { 1414168404Spjd struct drr_write drrw = drr->drr_u.drr_write; 1415168404Spjd ra.err = restore_write(&ra, os, &drrw); 1416168404Spjd break; 1417168404Spjd } 1418219089Spjd case DRR_WRITE_BYREF: 1419219089Spjd { 1420219089Spjd struct drr_write_byref drrwbr = 1421219089Spjd drr->drr_u.drr_write_byref; 1422219089Spjd ra.err = restore_write_byref(&ra, os, &drrwbr); 1423219089Spjd break; 1424219089Spjd } 1425168404Spjd case DRR_FREE: 1426168404Spjd { 1427168404Spjd struct drr_free drrf = drr->drr_u.drr_free; 1428168404Spjd ra.err = restore_free(&ra, os, &drrf); 1429168404Spjd break; 1430168404Spjd } 1431168404Spjd case DRR_END: 1432168404Spjd { 1433168404Spjd struct drr_end drre = drr->drr_u.drr_end; 1434168404Spjd /* 1435168404Spjd * We compare against the *previous* checksum 1436168404Spjd * value, because the stored checksum is of 1437168404Spjd * everything before the DRR_END record. 1438168404Spjd */ 1439185029Spjd if (!ZIO_CHECKSUM_EQUAL(drre.drr_checksum, pcksum)) 1440168404Spjd ra.err = ECKSUM; 1441168404Spjd goto out; 1442168404Spjd } 1443219089Spjd case DRR_SPILL: 1444219089Spjd { 1445219089Spjd struct drr_spill drrs = drr->drr_u.drr_spill; 1446219089Spjd ra.err = restore_spill(&ra, os, &drrs); 1447219089Spjd break; 1448219089Spjd } 1449168404Spjd default: 1450168404Spjd ra.err = EINVAL; 1451168404Spjd goto out; 1452168404Spjd } 1453185029Spjd pcksum = ra.cksum; 1454168404Spjd } 1455185029Spjd ASSERT(ra.err != 0); 1456168404Spjd 1457168404Spjdout: 1458219089Spjd if ((featureflags & DMU_BACKUP_FEATURE_DEDUP) && (cleanup_fd != -1)) 1459219089Spjd zfs_onexit_fd_rele(cleanup_fd); 1460168404Spjd 1461185029Spjd if (ra.err != 0) { 1462168404Spjd /* 1463219089Spjd * destroy what we created, so we don't leave it in the 1464219089Spjd * inconsistent restoring state. 1465168404Spjd */ 1466185029Spjd txg_wait_synced(drc->drc_real_ds->ds_dir->dd_pool, 0); 1467219089Spjd 1468219089Spjd (void) dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag, 1469219089Spjd B_FALSE); 1470219089Spjd if (drc->drc_real_ds != drc->drc_logical_ds) { 1471219089Spjd mutex_exit(&drc->drc_logical_ds->ds_recvlock); 1472219089Spjd dsl_dataset_rele(drc->drc_logical_ds, dmu_recv_tag); 1473219089Spjd } 1474168404Spjd } 1475168404Spjd 1476168404Spjd kmem_free(ra.buf, ra.bufsize); 1477185029Spjd *voffp = ra.voff; 1478168404Spjd return (ra.err); 1479168404Spjd} 1480185029Spjd 1481185029Spjdstruct recvendsyncarg { 1482185029Spjd char *tosnap; 1483185029Spjd uint64_t creation_time; 1484185029Spjd uint64_t toguid; 1485185029Spjd}; 1486185029Spjd 1487185029Spjdstatic int 1488185029Spjdrecv_end_check(void *arg1, void *arg2, dmu_tx_t *tx) 1489185029Spjd{ 1490185029Spjd dsl_dataset_t *ds = arg1; 1491185029Spjd struct recvendsyncarg *resa = arg2; 1492185029Spjd 1493185029Spjd return (dsl_dataset_snapshot_check(ds, resa->tosnap, tx)); 1494185029Spjd} 1495185029Spjd 1496185029Spjdstatic void 1497219089Spjdrecv_end_sync(void *arg1, void *arg2, dmu_tx_t *tx) 1498185029Spjd{ 1499185029Spjd dsl_dataset_t *ds = arg1; 1500185029Spjd struct recvendsyncarg *resa = arg2; 1501185029Spjd 1502219089Spjd dsl_dataset_snapshot_sync(ds, resa->tosnap, tx); 1503185029Spjd 1504185029Spjd /* set snapshot's creation time and guid */ 1505185029Spjd dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 1506185029Spjd ds->ds_prev->ds_phys->ds_creation_time = resa->creation_time; 1507185029Spjd ds->ds_prev->ds_phys->ds_guid = resa->toguid; 1508185029Spjd ds->ds_prev->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT; 1509185029Spjd 1510185029Spjd dmu_buf_will_dirty(ds->ds_dbuf, tx); 1511185029Spjd ds->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT; 1512185029Spjd} 1513185029Spjd 1514219089Spjdstatic int 1515221263Smmadd_ds_to_guidmap(avl_tree_t *guid_map, dsl_dataset_t *ds) 1516221263Smm{ 1517221263Smm dsl_pool_t *dp = ds->ds_dir->dd_pool; 1518221263Smm uint64_t snapobj = ds->ds_phys->ds_prev_snap_obj; 1519221263Smm dsl_dataset_t *snapds; 1520221263Smm guid_map_entry_t *gmep; 1521221263Smm int err; 1522221263Smm 1523221263Smm ASSERT(guid_map != NULL); 1524221263Smm 1525221263Smm rw_enter(&dp->dp_config_rwlock, RW_READER); 1526221263Smm err = dsl_dataset_hold_obj(dp, snapobj, guid_map, &snapds); 1527221263Smm if (err == 0) { 1528221263Smm gmep = kmem_alloc(sizeof (guid_map_entry_t), KM_SLEEP); 1529221263Smm gmep->guid = snapds->ds_phys->ds_guid; 1530221263Smm gmep->gme_ds = snapds; 1531221263Smm avl_add(guid_map, gmep); 1532221263Smm } 1533221263Smm 1534221263Smm rw_exit(&dp->dp_config_rwlock); 1535221263Smm return (err); 1536221263Smm} 1537221263Smm 1538221263Smmstatic int 1539219089Spjddmu_recv_existing_end(dmu_recv_cookie_t *drc) 1540185029Spjd{ 1541185029Spjd struct recvendsyncarg resa; 1542185029Spjd dsl_dataset_t *ds = drc->drc_logical_ds; 1543185029Spjd int err; 1544185029Spjd 1545185029Spjd /* 1546219089Spjd * XXX hack; seems the ds is still dirty and dsl_pool_zil_clean() 1547219089Spjd * expects it to have a ds_user_ptr (and zil), but clone_swap() 1548219089Spjd * can close it. 1549185029Spjd */ 1550185029Spjd txg_wait_synced(ds->ds_dir->dd_pool, 0); 1551185029Spjd 1552219089Spjd if (dsl_dataset_tryown(ds, FALSE, dmu_recv_tag)) { 1553219089Spjd err = dsl_dataset_clone_swap(drc->drc_real_ds, ds, 1554219089Spjd drc->drc_force); 1555185029Spjd if (err) 1556219089Spjd goto out; 1557219089Spjd } else { 1558219089Spjd mutex_exit(&ds->ds_recvlock); 1559219089Spjd dsl_dataset_rele(ds, dmu_recv_tag); 1560219089Spjd (void) dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag, 1561219089Spjd B_FALSE); 1562219089Spjd return (EBUSY); 1563185029Spjd } 1564185029Spjd 1565185029Spjd resa.creation_time = drc->drc_drrb->drr_creation_time; 1566185029Spjd resa.toguid = drc->drc_drrb->drr_toguid; 1567185029Spjd resa.tosnap = drc->drc_tosnap; 1568185029Spjd 1569185029Spjd err = dsl_sync_task_do(ds->ds_dir->dd_pool, 1570185029Spjd recv_end_check, recv_end_sync, ds, &resa, 3); 1571185029Spjd if (err) { 1572219089Spjd /* swap back */ 1573219089Spjd (void) dsl_dataset_clone_swap(drc->drc_real_ds, ds, B_TRUE); 1574185029Spjd } 1575185029Spjd 1576219089Spjdout: 1577219089Spjd mutex_exit(&ds->ds_recvlock); 1578221263Smm if (err == 0 && drc->drc_guid_to_ds_map != NULL) 1579221263Smm (void) add_ds_to_guidmap(drc->drc_guid_to_ds_map, ds); 1580185029Spjd dsl_dataset_disown(ds, dmu_recv_tag); 1581219089Spjd (void) dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag, B_FALSE); 1582185029Spjd return (err); 1583185029Spjd} 1584219089Spjd 1585219089Spjdstatic int 1586219089Spjddmu_recv_new_end(dmu_recv_cookie_t *drc) 1587219089Spjd{ 1588219089Spjd struct recvendsyncarg resa; 1589219089Spjd dsl_dataset_t *ds = drc->drc_logical_ds; 1590219089Spjd int err; 1591219089Spjd 1592219089Spjd /* 1593219089Spjd * XXX hack; seems the ds is still dirty and dsl_pool_zil_clean() 1594219089Spjd * expects it to have a ds_user_ptr (and zil), but clone_swap() 1595219089Spjd * can close it. 1596219089Spjd */ 1597219089Spjd txg_wait_synced(ds->ds_dir->dd_pool, 0); 1598219089Spjd 1599219089Spjd resa.creation_time = drc->drc_drrb->drr_creation_time; 1600219089Spjd resa.toguid = drc->drc_drrb->drr_toguid; 1601219089Spjd resa.tosnap = drc->drc_tosnap; 1602219089Spjd 1603219089Spjd err = dsl_sync_task_do(ds->ds_dir->dd_pool, 1604219089Spjd recv_end_check, recv_end_sync, ds, &resa, 3); 1605219089Spjd if (err) { 1606219089Spjd /* clean up the fs we just recv'd into */ 1607219089Spjd (void) dsl_dataset_destroy(ds, dmu_recv_tag, B_FALSE); 1608219089Spjd } else { 1609221263Smm if (drc->drc_guid_to_ds_map != NULL) 1610221263Smm (void) add_ds_to_guidmap(drc->drc_guid_to_ds_map, ds); 1611219089Spjd /* release the hold from dmu_recv_begin */ 1612219089Spjd dsl_dataset_disown(ds, dmu_recv_tag); 1613219089Spjd } 1614219089Spjd return (err); 1615219089Spjd} 1616219089Spjd 1617219089Spjdint 1618219089Spjddmu_recv_end(dmu_recv_cookie_t *drc) 1619219089Spjd{ 1620219089Spjd if (drc->drc_logical_ds != drc->drc_real_ds) 1621219089Spjd return (dmu_recv_existing_end(drc)); 1622219089Spjd else 1623219089Spjd return (dmu_recv_new_end(drc)); 1624219089Spjd} 1625