dmu_send.c revision 177698
1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26#pragma ident "%Z%%M% %I% %E% SMI" 27 28#include <sys/dmu.h> 29#include <sys/dmu_impl.h> 30#include <sys/dmu_tx.h> 31#include <sys/dbuf.h> 32#include <sys/dnode.h> 33#include <sys/zfs_context.h> 34#include <sys/dmu_objset.h> 35#include <sys/dmu_traverse.h> 36#include <sys/dsl_dataset.h> 37#include <sys/dsl_dir.h> 38#include <sys/dsl_pool.h> 39#include <sys/dsl_synctask.h> 40#include <sys/zfs_ioctl.h> 41#include <sys/zap.h> 42#include <sys/zio_checksum.h> 43 44struct backuparg { 45 dmu_replay_record_t *drr; 46 kthread_t *td; 47 struct file *fp; 48 objset_t *os; 49 zio_cksum_t zc; 50 int err; 51}; 52 53static int 54dump_bytes(struct backuparg *ba, void *buf, int len) 55{ 56 struct uio auio; 57 struct iovec aiov; 58 59 ASSERT3U(len % 8, ==, 0); 60 61 fletcher_4_incremental_native(buf, len, &ba->zc); 62 63 aiov.iov_base = buf; 64 aiov.iov_len = len; 65 auio.uio_iov = &aiov; 66 auio.uio_iovcnt = 1; 67 auio.uio_resid = len; 68 auio.uio_segflg = UIO_SYSSPACE; 69 auio.uio_rw = UIO_WRITE; 70 auio.uio_offset = (off_t)-1; 71 auio.uio_td = ba->td; 72#ifdef _KERNEL 73 if (ba->fp->f_type == DTYPE_VNODE) 74 bwillwrite(); 75 ba->err = fo_write(ba->fp, &auio, ba->td->td_ucred, 0, ba->td); 76#else 77 fprintf(stderr, "%s: returning EOPNOTSUPP\n", __func__); 78 ba->err = EOPNOTSUPP; 79#endif 80 81 return (ba->err); 82} 83 84static int 85dump_free(struct backuparg *ba, uint64_t object, uint64_t offset, 86 uint64_t length) 87{ 88 /* write a FREE record */ 89 bzero(ba->drr, sizeof (dmu_replay_record_t)); 90 ba->drr->drr_type = DRR_FREE; 91 ba->drr->drr_u.drr_free.drr_object = object; 92 ba->drr->drr_u.drr_free.drr_offset = offset; 93 ba->drr->drr_u.drr_free.drr_length = length; 94 95 if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t))) 96 return (EINTR); 97 return (0); 98} 99 100static int 101dump_data(struct backuparg *ba, dmu_object_type_t type, 102 uint64_t object, uint64_t offset, int blksz, void *data) 103{ 104 /* write a DATA record */ 105 bzero(ba->drr, sizeof (dmu_replay_record_t)); 106 ba->drr->drr_type = DRR_WRITE; 107 ba->drr->drr_u.drr_write.drr_object = object; 108 ba->drr->drr_u.drr_write.drr_type = type; 109 ba->drr->drr_u.drr_write.drr_offset = offset; 110 ba->drr->drr_u.drr_write.drr_length = blksz; 111 112 if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t))) 113 return (EINTR); 114 if (dump_bytes(ba, data, blksz)) 115 return (EINTR); 116 return (0); 117} 118 119static int 120dump_freeobjects(struct backuparg *ba, uint64_t firstobj, uint64_t numobjs) 121{ 122 /* write a FREEOBJECTS record */ 123 bzero(ba->drr, sizeof (dmu_replay_record_t)); 124 ba->drr->drr_type = DRR_FREEOBJECTS; 125 ba->drr->drr_u.drr_freeobjects.drr_firstobj = firstobj; 126 ba->drr->drr_u.drr_freeobjects.drr_numobjs = numobjs; 127 128 if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t))) 129 return (EINTR); 130 return (0); 131} 132 133static int 134dump_dnode(struct backuparg *ba, uint64_t object, dnode_phys_t *dnp) 135{ 136 if (dnp == NULL || dnp->dn_type == DMU_OT_NONE) 137 return (dump_freeobjects(ba, object, 1)); 138 139 /* write an OBJECT record */ 140 bzero(ba->drr, sizeof (dmu_replay_record_t)); 141 ba->drr->drr_type = DRR_OBJECT; 142 ba->drr->drr_u.drr_object.drr_object = object; 143 ba->drr->drr_u.drr_object.drr_type = dnp->dn_type; 144 ba->drr->drr_u.drr_object.drr_bonustype = dnp->dn_bonustype; 145 ba->drr->drr_u.drr_object.drr_blksz = 146 dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT; 147 ba->drr->drr_u.drr_object.drr_bonuslen = dnp->dn_bonuslen; 148 ba->drr->drr_u.drr_object.drr_checksum = dnp->dn_checksum; 149 ba->drr->drr_u.drr_object.drr_compress = dnp->dn_compress; 150 151 if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t))) 152 return (EINTR); 153 154 if (dump_bytes(ba, DN_BONUS(dnp), P2ROUNDUP(dnp->dn_bonuslen, 8))) 155 return (EINTR); 156 157 /* free anything past the end of the file */ 158 if (dump_free(ba, object, (dnp->dn_maxblkid + 1) * 159 (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT), -1ULL)) 160 return (EINTR); 161 if (ba->err) 162 return (EINTR); 163 return (0); 164} 165 166#define BP_SPAN(dnp, level) \ 167 (((uint64_t)dnp->dn_datablkszsec) << (SPA_MINBLOCKSHIFT + \ 168 (level) * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) 169 170static int 171backup_cb(traverse_blk_cache_t *bc, spa_t *spa, void *arg) 172{ 173 struct backuparg *ba = arg; 174 uint64_t object = bc->bc_bookmark.zb_object; 175 int level = bc->bc_bookmark.zb_level; 176 uint64_t blkid = bc->bc_bookmark.zb_blkid; 177 blkptr_t *bp = bc->bc_blkptr.blk_birth ? &bc->bc_blkptr : NULL; 178 dmu_object_type_t type = bp ? BP_GET_TYPE(bp) : DMU_OT_NONE; 179 void *data = bc->bc_data; 180 int err = 0; 181 182 if (SIGPENDING(curthread)) 183 return (EINTR); 184 185 ASSERT(data || bp == NULL); 186 187 if (bp == NULL && object == 0) { 188 uint64_t span = BP_SPAN(bc->bc_dnode, level); 189 uint64_t dnobj = (blkid * span) >> DNODE_SHIFT; 190 err = dump_freeobjects(ba, dnobj, span >> DNODE_SHIFT); 191 } else if (bp == NULL) { 192 uint64_t span = BP_SPAN(bc->bc_dnode, level); 193 err = dump_free(ba, object, blkid * span, span); 194 } else if (data && level == 0 && type == DMU_OT_DNODE) { 195 dnode_phys_t *blk = data; 196 int i; 197 int blksz = BP_GET_LSIZE(bp); 198 199 for (i = 0; i < blksz >> DNODE_SHIFT; i++) { 200 uint64_t dnobj = 201 (blkid << (DNODE_BLOCK_SHIFT - DNODE_SHIFT)) + i; 202 err = dump_dnode(ba, dnobj, blk+i); 203 if (err) 204 break; 205 } 206 } else if (level == 0 && 207 type != DMU_OT_DNODE && type != DMU_OT_OBJSET) { 208 int blksz = BP_GET_LSIZE(bp); 209 if (data == NULL) { 210 uint32_t aflags = ARC_WAIT; 211 arc_buf_t *abuf; 212 zbookmark_t zb; 213 214 zb.zb_objset = ba->os->os->os_dsl_dataset->ds_object; 215 zb.zb_object = object; 216 zb.zb_level = level; 217 zb.zb_blkid = blkid; 218 (void) arc_read(NULL, spa, bp, 219 dmu_ot[type].ot_byteswap, arc_getbuf_func, &abuf, 220 ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_MUSTSUCCEED, 221 &aflags, &zb); 222 223 if (abuf) { 224 err = dump_data(ba, type, object, blkid * blksz, 225 blksz, abuf->b_data); 226 (void) arc_buf_remove_ref(abuf, &abuf); 227 } 228 } else { 229 err = dump_data(ba, type, object, blkid * blksz, 230 blksz, data); 231 } 232 } 233 234 ASSERT(err == 0 || err == EINTR); 235 return (err); 236} 237 238int 239dmu_sendbackup(objset_t *tosnap, objset_t *fromsnap, struct file *fp) 240{ 241 dsl_dataset_t *ds = tosnap->os->os_dsl_dataset; 242 dsl_dataset_t *fromds = fromsnap ? fromsnap->os->os_dsl_dataset : NULL; 243 dmu_replay_record_t *drr; 244 struct backuparg ba; 245 int err; 246 247 /* tosnap must be a snapshot */ 248 if (ds->ds_phys->ds_next_snap_obj == 0) 249 return (EINVAL); 250 251 /* fromsnap must be an earlier snapshot from the same fs as tosnap */ 252 if (fromds && (ds->ds_dir != fromds->ds_dir || 253 fromds->ds_phys->ds_creation_txg >= 254 ds->ds_phys->ds_creation_txg)) 255 return (EXDEV); 256 257 drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP); 258 drr->drr_type = DRR_BEGIN; 259 drr->drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC; 260 drr->drr_u.drr_begin.drr_version = DMU_BACKUP_VERSION; 261 drr->drr_u.drr_begin.drr_creation_time = 262 ds->ds_phys->ds_creation_time; 263 drr->drr_u.drr_begin.drr_type = tosnap->os->os_phys->os_type; 264 drr->drr_u.drr_begin.drr_toguid = ds->ds_phys->ds_guid; 265 if (fromds) 266 drr->drr_u.drr_begin.drr_fromguid = fromds->ds_phys->ds_guid; 267 dsl_dataset_name(ds, drr->drr_u.drr_begin.drr_toname); 268 269 ba.drr = drr; 270 ba.td = curthread; 271 ba.fp = fp; 272 ba.os = tosnap; 273 ZIO_SET_CHECKSUM(&ba.zc, 0, 0, 0, 0); 274 275 if (dump_bytes(&ba, drr, sizeof (dmu_replay_record_t))) { 276 kmem_free(drr, sizeof (dmu_replay_record_t)); 277 return (ba.err); 278 } 279 280 err = traverse_dsl_dataset(ds, 281 fromds ? fromds->ds_phys->ds_creation_txg : 0, 282 ADVANCE_PRE | ADVANCE_HOLES | ADVANCE_DATA | ADVANCE_NOLOCK, 283 backup_cb, &ba); 284 285 if (err) { 286 if (err == EINTR && ba.err) 287 err = ba.err; 288 kmem_free(drr, sizeof (dmu_replay_record_t)); 289 return (err); 290 } 291 292 bzero(drr, sizeof (dmu_replay_record_t)); 293 drr->drr_type = DRR_END; 294 drr->drr_u.drr_end.drr_checksum = ba.zc; 295 296 if (dump_bytes(&ba, drr, sizeof (dmu_replay_record_t))) { 297 kmem_free(drr, sizeof (dmu_replay_record_t)); 298 return (ba.err); 299 } 300 301 kmem_free(drr, sizeof (dmu_replay_record_t)); 302 303 return (0); 304} 305 306struct restorearg { 307 int err; 308 int byteswap; 309 kthread_t *td; 310 struct file *fp; 311 char *buf; 312 uint64_t voff; 313 int buflen; /* number of valid bytes in buf */ 314 int bufoff; /* next offset to read */ 315 int bufsize; /* amount of memory allocated for buf */ 316 zio_cksum_t zc; 317}; 318 319/* ARGSUSED */ 320static int 321replay_incremental_check(void *arg1, void *arg2, dmu_tx_t *tx) 322{ 323 dsl_dataset_t *ds = arg1; 324 struct drr_begin *drrb = arg2; 325 const char *snapname; 326 int err; 327 uint64_t val; 328 329 /* must already be a snapshot of this fs */ 330 if (ds->ds_phys->ds_prev_snap_obj == 0) 331 return (ENODEV); 332 333 /* most recent snapshot must match fromguid */ 334 if (ds->ds_prev->ds_phys->ds_guid != drrb->drr_fromguid) 335 return (ENODEV); 336 /* must not have any changes since most recent snapshot */ 337 if (ds->ds_phys->ds_bp.blk_birth > 338 ds->ds_prev->ds_phys->ds_creation_txg) 339 return (ETXTBSY); 340 341 /* new snapshot name must not exist */ 342 snapname = strrchr(drrb->drr_toname, '@'); 343 if (snapname == NULL) 344 return (EEXIST); 345 346 snapname++; 347 err = zap_lookup(ds->ds_dir->dd_pool->dp_meta_objset, 348 ds->ds_phys->ds_snapnames_zapobj, snapname, 8, 1, &val); 349 if (err == 0) 350 return (EEXIST); 351 if (err != ENOENT) 352 return (err); 353 354 return (0); 355} 356 357/* ARGSUSED */ 358static void 359replay_incremental_sync(void *arg1, void *arg2, dmu_tx_t *tx) 360{ 361 dsl_dataset_t *ds = arg1; 362 dmu_buf_will_dirty(ds->ds_dbuf, tx); 363 ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; 364} 365 366/* ARGSUSED */ 367static int 368replay_full_check(void *arg1, void *arg2, dmu_tx_t *tx) 369{ 370 dsl_dir_t *dd = arg1; 371 struct drr_begin *drrb = arg2; 372 objset_t *mos = dd->dd_pool->dp_meta_objset; 373 char *cp; 374 uint64_t val; 375 int err; 376 377 cp = strchr(drrb->drr_toname, '@'); 378 *cp = '\0'; 379 err = zap_lookup(mos, dd->dd_phys->dd_child_dir_zapobj, 380 strrchr(drrb->drr_toname, '/') + 1, 381 sizeof (uint64_t), 1, &val); 382 *cp = '@'; 383 384 if (err != ENOENT) 385 return (err ? err : EEXIST); 386 387 return (0); 388} 389 390static void 391replay_full_sync(void *arg1, void *arg2, dmu_tx_t *tx) 392{ 393 dsl_dir_t *dd = arg1; 394 struct drr_begin *drrb = arg2; 395 char *cp; 396 dsl_dataset_t *ds; 397 uint64_t dsobj; 398 399 cp = strchr(drrb->drr_toname, '@'); 400 *cp = '\0'; 401 dsobj = dsl_dataset_create_sync(dd, strrchr(drrb->drr_toname, '/') + 1, 402 NULL, tx); 403 *cp = '@'; 404 405 VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, dsobj, NULL, 406 DS_MODE_EXCLUSIVE, FTAG, &ds)); 407 408 (void) dmu_objset_create_impl(dsl_dataset_get_spa(ds), 409 ds, &ds->ds_phys->ds_bp, drrb->drr_type, tx); 410 411 dmu_buf_will_dirty(ds->ds_dbuf, tx); 412 ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; 413 414 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 415} 416 417static int 418replay_end_check(void *arg1, void *arg2, dmu_tx_t *tx) 419{ 420 objset_t *os = arg1; 421 struct drr_begin *drrb = arg2; 422 char *snapname; 423 424 /* XXX verify that drr_toname is in dd */ 425 426 snapname = strchr(drrb->drr_toname, '@'); 427 if (snapname == NULL) 428 return (EINVAL); 429 snapname++; 430 431 return (dsl_dataset_snapshot_check(os, snapname, tx)); 432} 433 434static void 435replay_end_sync(void *arg1, void *arg2, dmu_tx_t *tx) 436{ 437 objset_t *os = arg1; 438 struct drr_begin *drrb = arg2; 439 char *snapname; 440 dsl_dataset_t *ds, *hds; 441 442 snapname = strchr(drrb->drr_toname, '@') + 1; 443 444 dsl_dataset_snapshot_sync(os, snapname, tx); 445 446 /* set snapshot's creation time and guid */ 447 hds = os->os->os_dsl_dataset; 448 VERIFY(0 == dsl_dataset_open_obj(hds->ds_dir->dd_pool, 449 hds->ds_phys->ds_prev_snap_obj, NULL, 450 DS_MODE_PRIMARY | DS_MODE_READONLY | DS_MODE_INCONSISTENT, 451 FTAG, &ds)); 452 453 dmu_buf_will_dirty(ds->ds_dbuf, tx); 454 ds->ds_phys->ds_creation_time = drrb->drr_creation_time; 455 ds->ds_phys->ds_guid = drrb->drr_toguid; 456 ds->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT; 457 458 dsl_dataset_close(ds, DS_MODE_PRIMARY, FTAG); 459 460 dmu_buf_will_dirty(hds->ds_dbuf, tx); 461 hds->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT; 462} 463 464static int 465restore_bytes(struct restorearg *ra, void *buf, int len, off_t off, int *resid) 466{ 467 struct uio auio; 468 struct iovec aiov; 469 int error; 470 471 aiov.iov_base = buf; 472 aiov.iov_len = len; 473 auio.uio_iov = &aiov; 474 auio.uio_iovcnt = 1; 475 auio.uio_resid = len; 476 auio.uio_segflg = UIO_SYSSPACE; 477 auio.uio_rw = UIO_READ; 478 auio.uio_offset = off; 479 auio.uio_td = ra->td; 480#ifdef _KERNEL 481 error = fo_read(ra->fp, &auio, ra->td->td_ucred, FOF_OFFSET, ra->td); 482#else 483 fprintf(stderr, "%s: returning EOPNOTSUPP\n", __func__); 484 error = EOPNOTSUPP; 485#endif 486 *resid = auio.uio_resid; 487 return (error); 488} 489 490static void * 491restore_read(struct restorearg *ra, int len) 492{ 493 void *rv; 494 495 /* some things will require 8-byte alignment, so everything must */ 496 ASSERT3U(len % 8, ==, 0); 497 498 while (ra->buflen - ra->bufoff < len) { 499 int resid; 500 int leftover = ra->buflen - ra->bufoff; 501 502 (void) memmove(ra->buf, ra->buf + ra->bufoff, leftover); 503 504 ra->err = restore_bytes(ra, (caddr_t)ra->buf + leftover, 505 ra->bufsize - leftover, ra->voff, &resid); 506 507 ra->voff += ra->bufsize - leftover - resid; 508 ra->buflen = ra->bufsize - resid; 509 ra->bufoff = 0; 510 if (resid == ra->bufsize - leftover) 511 ra->err = EINVAL; 512 if (ra->err) 513 return (NULL); 514 /* Could compute checksum here? */ 515 } 516 517 ASSERT3U(ra->bufoff % 8, ==, 0); 518 ASSERT3U(ra->buflen - ra->bufoff, >=, len); 519 rv = ra->buf + ra->bufoff; 520 ra->bufoff += len; 521 if (ra->byteswap) 522 fletcher_4_incremental_byteswap(rv, len, &ra->zc); 523 else 524 fletcher_4_incremental_native(rv, len, &ra->zc); 525 return (rv); 526} 527 528static void 529backup_byteswap(dmu_replay_record_t *drr) 530{ 531#define DO64(X) (drr->drr_u.X = BSWAP_64(drr->drr_u.X)) 532#define DO32(X) (drr->drr_u.X = BSWAP_32(drr->drr_u.X)) 533 drr->drr_type = BSWAP_32(drr->drr_type); 534 switch (drr->drr_type) { 535 case DRR_BEGIN: 536 DO64(drr_begin.drr_magic); 537 DO64(drr_begin.drr_version); 538 DO64(drr_begin.drr_creation_time); 539 DO32(drr_begin.drr_type); 540 DO64(drr_begin.drr_toguid); 541 DO64(drr_begin.drr_fromguid); 542 break; 543 case DRR_OBJECT: 544 DO64(drr_object.drr_object); 545 /* DO64(drr_object.drr_allocation_txg); */ 546 DO32(drr_object.drr_type); 547 DO32(drr_object.drr_bonustype); 548 DO32(drr_object.drr_blksz); 549 DO32(drr_object.drr_bonuslen); 550 break; 551 case DRR_FREEOBJECTS: 552 DO64(drr_freeobjects.drr_firstobj); 553 DO64(drr_freeobjects.drr_numobjs); 554 break; 555 case DRR_WRITE: 556 DO64(drr_write.drr_object); 557 DO32(drr_write.drr_type); 558 DO64(drr_write.drr_offset); 559 DO64(drr_write.drr_length); 560 break; 561 case DRR_FREE: 562 DO64(drr_free.drr_object); 563 DO64(drr_free.drr_offset); 564 DO64(drr_free.drr_length); 565 break; 566 case DRR_END: 567 DO64(drr_end.drr_checksum.zc_word[0]); 568 DO64(drr_end.drr_checksum.zc_word[1]); 569 DO64(drr_end.drr_checksum.zc_word[2]); 570 DO64(drr_end.drr_checksum.zc_word[3]); 571 break; 572 } 573#undef DO64 574#undef DO32 575} 576 577static int 578restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro) 579{ 580 int err; 581 dmu_tx_t *tx; 582 583 err = dmu_object_info(os, drro->drr_object, NULL); 584 585 if (err != 0 && err != ENOENT) 586 return (EINVAL); 587 588 if (drro->drr_type == DMU_OT_NONE || 589 drro->drr_type >= DMU_OT_NUMTYPES || 590 drro->drr_bonustype >= DMU_OT_NUMTYPES || 591 drro->drr_checksum >= ZIO_CHECKSUM_FUNCTIONS || 592 drro->drr_compress >= ZIO_COMPRESS_FUNCTIONS || 593 P2PHASE(drro->drr_blksz, SPA_MINBLOCKSIZE) || 594 drro->drr_blksz < SPA_MINBLOCKSIZE || 595 drro->drr_blksz > SPA_MAXBLOCKSIZE || 596 drro->drr_bonuslen > DN_MAX_BONUSLEN) { 597 return (EINVAL); 598 } 599 600 tx = dmu_tx_create(os); 601 602 if (err == ENOENT) { 603 /* currently free, want to be allocated */ 604 dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); 605 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 1); 606 err = dmu_tx_assign(tx, TXG_WAIT); 607 if (err) { 608 dmu_tx_abort(tx); 609 return (err); 610 } 611 err = dmu_object_claim(os, drro->drr_object, 612 drro->drr_type, drro->drr_blksz, 613 drro->drr_bonustype, drro->drr_bonuslen, tx); 614 } else { 615 /* currently allocated, want to be allocated */ 616 dmu_tx_hold_bonus(tx, drro->drr_object); 617 /* 618 * We may change blocksize, so need to 619 * hold_write 620 */ 621 dmu_tx_hold_write(tx, drro->drr_object, 0, 1); 622 err = dmu_tx_assign(tx, TXG_WAIT); 623 if (err) { 624 dmu_tx_abort(tx); 625 return (err); 626 } 627 628 err = dmu_object_reclaim(os, drro->drr_object, 629 drro->drr_type, drro->drr_blksz, 630 drro->drr_bonustype, drro->drr_bonuslen, tx); 631 } 632 if (err) { 633 dmu_tx_commit(tx); 634 return (EINVAL); 635 } 636 637 dmu_object_set_checksum(os, drro->drr_object, drro->drr_checksum, tx); 638 dmu_object_set_compress(os, drro->drr_object, drro->drr_compress, tx); 639 640 if (drro->drr_bonuslen) { 641 dmu_buf_t *db; 642 void *data; 643 VERIFY(0 == dmu_bonus_hold(os, drro->drr_object, FTAG, &db)); 644 dmu_buf_will_dirty(db, tx); 645 646 ASSERT3U(db->db_size, ==, drro->drr_bonuslen); 647 data = restore_read(ra, P2ROUNDUP(db->db_size, 8)); 648 if (data == NULL) { 649 dmu_tx_commit(tx); 650 return (ra->err); 651 } 652 bcopy(data, db->db_data, db->db_size); 653 if (ra->byteswap) { 654 dmu_ot[drro->drr_bonustype].ot_byteswap(db->db_data, 655 drro->drr_bonuslen); 656 } 657 dmu_buf_rele(db, FTAG); 658 } 659 dmu_tx_commit(tx); 660 return (0); 661} 662 663/* ARGSUSED */ 664static int 665restore_freeobjects(struct restorearg *ra, objset_t *os, 666 struct drr_freeobjects *drrfo) 667{ 668 uint64_t obj; 669 670 if (drrfo->drr_firstobj + drrfo->drr_numobjs < drrfo->drr_firstobj) 671 return (EINVAL); 672 673 for (obj = drrfo->drr_firstobj; 674 obj < drrfo->drr_firstobj + drrfo->drr_numobjs; 675 (void) dmu_object_next(os, &obj, FALSE, 0)) { 676 dmu_tx_t *tx; 677 int err; 678 679 if (dmu_object_info(os, obj, NULL) != 0) 680 continue; 681 682 tx = dmu_tx_create(os); 683 dmu_tx_hold_bonus(tx, obj); 684 err = dmu_tx_assign(tx, TXG_WAIT); 685 if (err) { 686 dmu_tx_abort(tx); 687 return (err); 688 } 689 err = dmu_object_free(os, obj, tx); 690 dmu_tx_commit(tx); 691 if (err && err != ENOENT) 692 return (EINVAL); 693 } 694 return (0); 695} 696 697static int 698restore_write(struct restorearg *ra, objset_t *os, 699 struct drr_write *drrw) 700{ 701 dmu_tx_t *tx; 702 void *data; 703 int err; 704 705 if (drrw->drr_offset + drrw->drr_length < drrw->drr_offset || 706 drrw->drr_type >= DMU_OT_NUMTYPES) 707 return (EINVAL); 708 709 data = restore_read(ra, drrw->drr_length); 710 if (data == NULL) 711 return (ra->err); 712 713 if (dmu_object_info(os, drrw->drr_object, NULL) != 0) 714 return (EINVAL); 715 716 tx = dmu_tx_create(os); 717 718 dmu_tx_hold_write(tx, drrw->drr_object, 719 drrw->drr_offset, drrw->drr_length); 720 err = dmu_tx_assign(tx, TXG_WAIT); 721 if (err) { 722 dmu_tx_abort(tx); 723 return (err); 724 } 725 if (ra->byteswap) 726 dmu_ot[drrw->drr_type].ot_byteswap(data, drrw->drr_length); 727 dmu_write(os, drrw->drr_object, 728 drrw->drr_offset, drrw->drr_length, data, tx); 729 dmu_tx_commit(tx); 730 return (0); 731} 732 733/* ARGSUSED */ 734static int 735restore_free(struct restorearg *ra, objset_t *os, 736 struct drr_free *drrf) 737{ 738 dmu_tx_t *tx; 739 int err; 740 741 if (drrf->drr_length != -1ULL && 742 drrf->drr_offset + drrf->drr_length < drrf->drr_offset) 743 return (EINVAL); 744 745 if (dmu_object_info(os, drrf->drr_object, NULL) != 0) 746 return (EINVAL); 747 748 tx = dmu_tx_create(os); 749 750 dmu_tx_hold_free(tx, drrf->drr_object, 751 drrf->drr_offset, drrf->drr_length); 752 err = dmu_tx_assign(tx, TXG_WAIT); 753 if (err) { 754 dmu_tx_abort(tx); 755 return (err); 756 } 757 err = dmu_free_range(os, drrf->drr_object, 758 drrf->drr_offset, drrf->drr_length, tx); 759 dmu_tx_commit(tx); 760 return (err); 761} 762 763int 764dmu_recvbackup(char *tosnap, struct drr_begin *drrb, uint64_t *sizep, 765 boolean_t force, struct file *fp, uint64_t voffset) 766{ 767 kthread_t *td = curthread; 768 struct restorearg ra; 769 dmu_replay_record_t *drr; 770 char *cp; 771 objset_t *os = NULL; 772 zio_cksum_t pzc; 773 774 bzero(&ra, sizeof (ra)); 775 ra.td = td; 776 ra.fp = fp; 777 ra.voff = voffset; 778 ra.bufsize = 1<<20; 779 ra.buf = kmem_alloc(ra.bufsize, KM_SLEEP); 780 781 if (drrb->drr_magic == DMU_BACKUP_MAGIC) { 782 ra.byteswap = FALSE; 783 } else if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) { 784 ra.byteswap = TRUE; 785 } else { 786 ra.err = EINVAL; 787 goto out; 788 } 789 790 /* 791 * NB: this assumes that struct drr_begin will be the largest in 792 * dmu_replay_record_t's drr_u, and thus we don't need to pad it 793 * with zeros to make it the same length as we wrote out. 794 */ 795 ((dmu_replay_record_t *)ra.buf)->drr_type = DRR_BEGIN; 796 ((dmu_replay_record_t *)ra.buf)->drr_pad = 0; 797 ((dmu_replay_record_t *)ra.buf)->drr_u.drr_begin = *drrb; 798 if (ra.byteswap) { 799 fletcher_4_incremental_byteswap(ra.buf, 800 sizeof (dmu_replay_record_t), &ra.zc); 801 } else { 802 fletcher_4_incremental_native(ra.buf, 803 sizeof (dmu_replay_record_t), &ra.zc); 804 } 805 (void) strcpy(drrb->drr_toname, tosnap); /* for the sync funcs */ 806 807 if (ra.byteswap) { 808 drrb->drr_magic = BSWAP_64(drrb->drr_magic); 809 drrb->drr_version = BSWAP_64(drrb->drr_version); 810 drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time); 811 drrb->drr_type = BSWAP_32(drrb->drr_type); 812 drrb->drr_toguid = BSWAP_64(drrb->drr_toguid); 813 drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid); 814 } 815 816 ASSERT3U(drrb->drr_magic, ==, DMU_BACKUP_MAGIC); 817 818 if (drrb->drr_version != DMU_BACKUP_VERSION || 819 drrb->drr_type >= DMU_OST_NUMTYPES || 820 strchr(drrb->drr_toname, '@') == NULL) { 821 ra.err = EINVAL; 822 goto out; 823 } 824 825 /* 826 * Process the begin in syncing context. 827 */ 828 if (drrb->drr_fromguid) { 829 /* incremental backup */ 830 dsl_dataset_t *ds = NULL; 831 832 cp = strchr(tosnap, '@'); 833 *cp = '\0'; 834 ra.err = dsl_dataset_open(tosnap, DS_MODE_EXCLUSIVE, FTAG, &ds); 835 *cp = '@'; 836 if (ra.err) 837 goto out; 838 839 /* 840 * Only do the rollback if the most recent snapshot 841 * matches the incremental source 842 */ 843 if (force) { 844 if (ds->ds_prev == NULL || 845 ds->ds_prev->ds_phys->ds_guid != 846 drrb->drr_fromguid) { 847 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 848 kmem_free(ra.buf, ra.bufsize); 849 return (ENODEV); 850 } 851 (void) dsl_dataset_rollback(ds); 852 } 853 ra.err = dsl_sync_task_do(ds->ds_dir->dd_pool, 854 replay_incremental_check, replay_incremental_sync, 855 ds, drrb, 1); 856 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 857 } else { 858 /* full backup */ 859 dsl_dir_t *dd = NULL; 860 const char *tail; 861 862 /* can't restore full backup into topmost fs, for now */ 863 if (strrchr(drrb->drr_toname, '/') == NULL) { 864 ra.err = EINVAL; 865 goto out; 866 } 867 868 cp = strchr(tosnap, '@'); 869 *cp = '\0'; 870 ra.err = dsl_dir_open(tosnap, FTAG, &dd, &tail); 871 *cp = '@'; 872 if (ra.err) 873 goto out; 874 if (tail == NULL) { 875 ra.err = EEXIST; 876 goto out; 877 } 878 879 ra.err = dsl_sync_task_do(dd->dd_pool, replay_full_check, 880 replay_full_sync, dd, drrb, 5); 881 dsl_dir_close(dd, FTAG); 882 } 883 if (ra.err) 884 goto out; 885 886 /* 887 * Open the objset we are modifying. 888 */ 889 890 cp = strchr(tosnap, '@'); 891 *cp = '\0'; 892 ra.err = dmu_objset_open(tosnap, DMU_OST_ANY, 893 DS_MODE_PRIMARY | DS_MODE_INCONSISTENT, &os); 894 *cp = '@'; 895 ASSERT3U(ra.err, ==, 0); 896 897 /* 898 * Read records and process them. 899 */ 900 pzc = ra.zc; 901 while (ra.err == 0 && 902 NULL != (drr = restore_read(&ra, sizeof (*drr)))) { 903 if (SIGPENDING(td)) { 904 ra.err = EINTR; 905 goto out; 906 } 907 908 if (ra.byteswap) 909 backup_byteswap(drr); 910 911 switch (drr->drr_type) { 912 case DRR_OBJECT: 913 { 914 /* 915 * We need to make a copy of the record header, 916 * because restore_{object,write} may need to 917 * restore_read(), which will invalidate drr. 918 */ 919 struct drr_object drro = drr->drr_u.drr_object; 920 ra.err = restore_object(&ra, os, &drro); 921 break; 922 } 923 case DRR_FREEOBJECTS: 924 { 925 struct drr_freeobjects drrfo = 926 drr->drr_u.drr_freeobjects; 927 ra.err = restore_freeobjects(&ra, os, &drrfo); 928 break; 929 } 930 case DRR_WRITE: 931 { 932 struct drr_write drrw = drr->drr_u.drr_write; 933 ra.err = restore_write(&ra, os, &drrw); 934 break; 935 } 936 case DRR_FREE: 937 { 938 struct drr_free drrf = drr->drr_u.drr_free; 939 ra.err = restore_free(&ra, os, &drrf); 940 break; 941 } 942 case DRR_END: 943 { 944 struct drr_end drre = drr->drr_u.drr_end; 945 /* 946 * We compare against the *previous* checksum 947 * value, because the stored checksum is of 948 * everything before the DRR_END record. 949 */ 950 if (drre.drr_checksum.zc_word[0] != 0 && 951 !ZIO_CHECKSUM_EQUAL(drre.drr_checksum, pzc)) { 952 ra.err = ECKSUM; 953 goto out; 954 } 955 956 ra.err = dsl_sync_task_do(dmu_objset_ds(os)-> 957 ds_dir->dd_pool, replay_end_check, replay_end_sync, 958 os, drrb, 3); 959 goto out; 960 } 961 default: 962 ra.err = EINVAL; 963 goto out; 964 } 965 pzc = ra.zc; 966 } 967 968out: 969 if (os) 970 dmu_objset_close(os); 971 972 /* 973 * Make sure we don't rollback/destroy unless we actually 974 * processed the begin properly. 'os' will only be set if this 975 * is the case. 976 */ 977 if (ra.err && os && tosnap && strchr(tosnap, '@')) { 978 /* 979 * rollback or destroy what we created, so we don't 980 * leave it in the restoring state. 981 */ 982 dsl_dataset_t *ds; 983 int err; 984 985 cp = strchr(tosnap, '@'); 986 *cp = '\0'; 987 err = dsl_dataset_open(tosnap, 988 DS_MODE_EXCLUSIVE | DS_MODE_INCONSISTENT, 989 FTAG, &ds); 990 if (err == 0) { 991 txg_wait_synced(ds->ds_dir->dd_pool, 0); 992 if (drrb->drr_fromguid) { 993 /* incremental: rollback to most recent snap */ 994 (void) dsl_dataset_rollback(ds); 995 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 996 } else { 997 /* full: destroy whole fs */ 998 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 999 (void) dsl_dataset_destroy(tosnap); 1000 } 1001 } 1002 *cp = '@'; 1003 } 1004 1005 kmem_free(ra.buf, ra.bufsize); 1006 if (sizep) 1007 *sizep = ra.voff; 1008 return (ra.err); 1009} 1010