1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22/* 23 * Copyright (c) 2012, 2017 by Delphix. All rights reserved. 24 * Copyright (c) 2013 Steven Hartland. All rights reserved. 25 * Copyright (c) 2014 Integros [integros.com] 26 * Copyright 2017 RackTop Systems. 27 */ 28 29/* 30 * LibZFS_Core (lzc) is intended to replace most functionality in libzfs. 31 * It has the following characteristics: 32 * 33 * - Thread Safe. libzfs_core is accessible concurrently from multiple 34 * threads. This is accomplished primarily by avoiding global data 35 * (e.g. caching). Since it's thread-safe, there is no reason for a 36 * process to have multiple libzfs "instances". Therefore, we store 37 * our few pieces of data (e.g. the file descriptor) in global 38 * variables. The fd is reference-counted so that the libzfs_core 39 * library can be "initialized" multiple times (e.g. by different 40 * consumers within the same process). 41 * 42 * - Committed Interface. The libzfs_core interface will be committed, 43 * therefore consumers can compile against it and be confident that 44 * their code will continue to work on future releases of this code. 45 * Currently, the interface is Evolving (not Committed), but we intend 46 * to commit to it once it is more complete and we determine that it 47 * meets the needs of all consumers. 48 * 49 * - Programatic Error Handling. libzfs_core communicates errors with 50 * defined error numbers, and doesn't print anything to stdout/stderr. 51 * 52 * - Thin Layer. libzfs_core is a thin layer, marshaling arguments 53 * to/from the kernel ioctls. There is generally a 1:1 correspondence 54 * between libzfs_core functions and ioctls to /dev/zfs. 55 * 56 * - Clear Atomicity. Because libzfs_core functions are generally 1:1 57 * with kernel ioctls, and kernel ioctls are general atomic, each 58 * libzfs_core function is atomic. For example, creating multiple 59 * snapshots with a single call to lzc_snapshot() is atomic -- it 60 * can't fail with only some of the requested snapshots created, even 61 * in the event of power loss or system crash. 62 * 63 * - Continued libzfs Support. Some higher-level operations (e.g. 64 * support for "zfs send -R") are too complicated to fit the scope of 65 * libzfs_core. This functionality will continue to live in libzfs. 66 * Where appropriate, libzfs will use the underlying atomic operations 67 * of libzfs_core. For example, libzfs may implement "zfs send -R | 68 * zfs receive" by using individual "send one snapshot", rename, 69 * destroy, and "receive one snapshot" operations in libzfs_core. 70 * /sbin/zfs and /zbin/zpool will link with both libzfs and 71 * libzfs_core. Other consumers should aim to use only libzfs_core, 72 * since that will be the supported, stable interface going forwards. 73 */ 74 75#define _IN_LIBZFS_CORE_ 76 77#include <libzfs_core.h> 78#include <ctype.h> 79#include <unistd.h> 80#include <stdlib.h> 81#include <string.h> 82#include <errno.h> 83#include <fcntl.h> 84#include <pthread.h> 85#include <sys/nvpair.h> 86#include <sys/param.h> 87#include <sys/types.h> 88#include <sys/stat.h> 89#include <sys/zfs_ioctl.h> 90#include "libzfs_core_compat.h" 91#include "libzfs_compat.h" 92 93#ifdef __FreeBSD__ 94extern int zfs_ioctl_version; 95#endif 96 97static int g_fd = -1; 98static pthread_mutex_t g_lock = PTHREAD_MUTEX_INITIALIZER; 99static int g_refcount; 100 101int 102libzfs_core_init(void) 103{ 104 (void) pthread_mutex_lock(&g_lock); 105 if (g_refcount == 0) { 106 g_fd = open("/dev/zfs", O_RDWR); 107 if (g_fd < 0) { 108 (void) pthread_mutex_unlock(&g_lock); 109 return (errno); 110 } 111 } 112 g_refcount++; 113 (void) pthread_mutex_unlock(&g_lock); 114 115 return (0); 116} 117 118void 119libzfs_core_fini(void) 120{ 121 (void) pthread_mutex_lock(&g_lock); 122 ASSERT3S(g_refcount, >, 0); 123 124 if (g_refcount > 0) 125 g_refcount--; 126 127 if (g_refcount == 0 && g_fd != -1) { 128 (void) close(g_fd); 129 g_fd = -1; 130 } 131 (void) pthread_mutex_unlock(&g_lock); 132} 133 134static int 135lzc_ioctl(zfs_ioc_t ioc, const char *name, 136 nvlist_t *source, nvlist_t **resultp) 137{ 138 zfs_cmd_t zc = { 0 }; 139 int error = 0; 140 char *packed; 141#ifdef __FreeBSD__ 142 nvlist_t *oldsource; 143#endif 144 size_t size; 145 146 ASSERT3S(g_refcount, >, 0); 147 VERIFY3S(g_fd, !=, -1); 148 149 (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name)); 150 151#ifdef __FreeBSD__ 152 if (zfs_ioctl_version == ZFS_IOCVER_UNDEF) 153 zfs_ioctl_version = get_zfs_ioctl_version(); 154 155 if (zfs_ioctl_version < ZFS_IOCVER_LZC) { 156 oldsource = source; 157 error = lzc_compat_pre(&zc, &ioc, &source); 158 if (error) 159 return (error); 160 } 161#endif 162 163 packed = fnvlist_pack(source, &size); 164 zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed; 165 zc.zc_nvlist_src_size = size; 166 167 if (resultp != NULL) { 168 *resultp = NULL; 169 if (ioc == ZFS_IOC_CHANNEL_PROGRAM) { 170 zc.zc_nvlist_dst_size = fnvlist_lookup_uint64(source, 171 ZCP_ARG_MEMLIMIT); 172 } else { 173 zc.zc_nvlist_dst_size = MAX(size * 2, 128 * 1024); 174 } 175 zc.zc_nvlist_dst = (uint64_t)(uintptr_t) 176 malloc(zc.zc_nvlist_dst_size); 177#ifdef illumos 178 if (zc.zc_nvlist_dst == NULL) { 179#else 180 if (zc.zc_nvlist_dst == 0) { 181#endif 182 error = ENOMEM; 183 goto out; 184 } 185 } 186 187 while (ioctl(g_fd, ioc, &zc) != 0) { 188 /* 189 * If ioctl exited with ENOMEM, we retry the ioctl after 190 * increasing the size of the destination nvlist. 191 * 192 * Channel programs that exit with ENOMEM ran over the 193 * lua memory sandbox; they should not be retried. 194 */ 195 if (errno == ENOMEM && resultp != NULL && 196 ioc != ZFS_IOC_CHANNEL_PROGRAM) { 197 free((void *)(uintptr_t)zc.zc_nvlist_dst); 198 zc.zc_nvlist_dst_size *= 2; 199 zc.zc_nvlist_dst = (uint64_t)(uintptr_t) 200 malloc(zc.zc_nvlist_dst_size); 201#ifdef illumos 202 if (zc.zc_nvlist_dst == NULL) { 203#else 204 if (zc.zc_nvlist_dst == 0) { 205#endif 206 error = ENOMEM; 207 goto out; 208 } 209 } else { 210 error = errno; 211 break; 212 } 213 } 214 215#ifdef __FreeBSD__ 216 if (zfs_ioctl_version < ZFS_IOCVER_LZC) 217 lzc_compat_post(&zc, ioc); 218#endif 219 if (zc.zc_nvlist_dst_filled) { 220 *resultp = fnvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst, 221 zc.zc_nvlist_dst_size); 222 } 223#ifdef __FreeBSD__ 224 if (zfs_ioctl_version < ZFS_IOCVER_LZC) 225 lzc_compat_outnvl(&zc, ioc, resultp); 226#endif 227out: 228#ifdef __FreeBSD__ 229 if (zfs_ioctl_version < ZFS_IOCVER_LZC) { 230 if (source != oldsource) 231 nvlist_free(source); 232 source = oldsource; 233 } 234#endif 235 fnvlist_pack_free(packed, size); 236 free((void *)(uintptr_t)zc.zc_nvlist_dst); 237 return (error); 238} 239 240int 241lzc_create(const char *fsname, enum lzc_dataset_type type, nvlist_t *props) 242{ 243 int error; 244 nvlist_t *args = fnvlist_alloc(); 245 fnvlist_add_int32(args, "type", (dmu_objset_type_t)type); 246 if (props != NULL) 247 fnvlist_add_nvlist(args, "props", props); 248 error = lzc_ioctl(ZFS_IOC_CREATE, fsname, args, NULL); 249 nvlist_free(args); 250 return (error); 251} 252 253int 254lzc_clone(const char *fsname, const char *origin, 255 nvlist_t *props) 256{ 257 int error; 258 nvlist_t *args = fnvlist_alloc(); 259 fnvlist_add_string(args, "origin", origin); 260 if (props != NULL) 261 fnvlist_add_nvlist(args, "props", props); 262 error = lzc_ioctl(ZFS_IOC_CLONE, fsname, args, NULL); 263 nvlist_free(args); 264 return (error); 265} 266 267int 268lzc_promote(const char *fsname, char *snapnamebuf, int snapnamelen) 269{ 270 /* 271 * The promote ioctl is still legacy, so we need to construct our 272 * own zfs_cmd_t rather than using lzc_ioctl(). 273 */ 274 zfs_cmd_t zc = { 0 }; 275 276 ASSERT3S(g_refcount, >, 0); 277 VERIFY3S(g_fd, !=, -1); 278 279 (void) strlcpy(zc.zc_name, fsname, sizeof (zc.zc_name)); 280 if (ioctl(g_fd, ZFS_IOC_PROMOTE, &zc) != 0) { 281 int error = errno; 282 if (error == EEXIST && snapnamebuf != NULL) 283 (void) strlcpy(snapnamebuf, zc.zc_string, snapnamelen); 284 return (error); 285 } 286 return (0); 287} 288 289int 290lzc_remap(const char *fsname) 291{ 292 int error; 293 nvlist_t *args = fnvlist_alloc(); 294 error = lzc_ioctl(ZFS_IOC_REMAP, fsname, args, NULL); 295 nvlist_free(args); 296 return (error); 297} 298 299int 300lzc_rename(const char *source, const char *target) 301{ 302 zfs_cmd_t zc = { 0 }; 303 int error; 304 305 ASSERT3S(g_refcount, >, 0); 306 VERIFY3S(g_fd, !=, -1); 307 308 (void) strlcpy(zc.zc_name, source, sizeof (zc.zc_name)); 309 (void) strlcpy(zc.zc_value, target, sizeof (zc.zc_value)); 310 error = ioctl(g_fd, ZFS_IOC_RENAME, &zc); 311 if (error != 0) 312 error = errno; 313 return (error); 314} 315 316int 317lzc_destroy(const char *fsname) 318{ 319 int error; 320 321 nvlist_t *args = fnvlist_alloc(); 322 error = lzc_ioctl(ZFS_IOC_DESTROY, fsname, args, NULL); 323 nvlist_free(args); 324 return (error); 325} 326 327/* 328 * Creates snapshots. 329 * 330 * The keys in the snaps nvlist are the snapshots to be created. 331 * They must all be in the same pool. 332 * 333 * The props nvlist is properties to set. Currently only user properties 334 * are supported. { user:prop_name -> string value } 335 * 336 * The returned results nvlist will have an entry for each snapshot that failed. 337 * The value will be the (int32) error code. 338 * 339 * The return value will be 0 if all snapshots were created, otherwise it will 340 * be the errno of a (unspecified) snapshot that failed. 341 */ 342int 343lzc_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t **errlist) 344{ 345 nvpair_t *elem; 346 nvlist_t *args; 347 int error; 348 char pool[ZFS_MAX_DATASET_NAME_LEN]; 349 350 *errlist = NULL; 351 352 /* determine the pool name */ 353 elem = nvlist_next_nvpair(snaps, NULL); 354 if (elem == NULL) 355 return (0); 356 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); 357 pool[strcspn(pool, "/@")] = '\0'; 358 359 args = fnvlist_alloc(); 360 fnvlist_add_nvlist(args, "snaps", snaps); 361 if (props != NULL) 362 fnvlist_add_nvlist(args, "props", props); 363 364 error = lzc_ioctl(ZFS_IOC_SNAPSHOT, pool, args, errlist); 365 nvlist_free(args); 366 367 return (error); 368} 369 370/* 371 * Destroys snapshots. 372 * 373 * The keys in the snaps nvlist are the snapshots to be destroyed. 374 * They must all be in the same pool. 375 * 376 * Snapshots that do not exist will be silently ignored. 377 * 378 * If 'defer' is not set, and a snapshot has user holds or clones, the 379 * destroy operation will fail and none of the snapshots will be 380 * destroyed. 381 * 382 * If 'defer' is set, and a snapshot has user holds or clones, it will be 383 * marked for deferred destruction, and will be destroyed when the last hold 384 * or clone is removed/destroyed. 385 * 386 * The return value will be 0 if all snapshots were destroyed (or marked for 387 * later destruction if 'defer' is set) or didn't exist to begin with. 388 * 389 * Otherwise the return value will be the errno of a (unspecified) snapshot 390 * that failed, no snapshots will be destroyed, and the errlist will have an 391 * entry for each snapshot that failed. The value in the errlist will be 392 * the (int32) error code. 393 */ 394int 395lzc_destroy_snaps(nvlist_t *snaps, boolean_t defer, nvlist_t **errlist) 396{ 397 nvpair_t *elem; 398 nvlist_t *args; 399 int error; 400 char pool[ZFS_MAX_DATASET_NAME_LEN]; 401 402 /* determine the pool name */ 403 elem = nvlist_next_nvpair(snaps, NULL); 404 if (elem == NULL) 405 return (0); 406 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); 407 pool[strcspn(pool, "/@")] = '\0'; 408 409 args = fnvlist_alloc(); 410 fnvlist_add_nvlist(args, "snaps", snaps); 411 if (defer) 412 fnvlist_add_boolean(args, "defer"); 413 414 error = lzc_ioctl(ZFS_IOC_DESTROY_SNAPS, pool, args, errlist); 415 nvlist_free(args); 416 417 return (error); 418} 419 420int 421lzc_snaprange_space(const char *firstsnap, const char *lastsnap, 422 uint64_t *usedp) 423{ 424 nvlist_t *args; 425 nvlist_t *result; 426 int err; 427 char fs[ZFS_MAX_DATASET_NAME_LEN]; 428 char *atp; 429 430 /* determine the fs name */ 431 (void) strlcpy(fs, firstsnap, sizeof (fs)); 432 atp = strchr(fs, '@'); 433 if (atp == NULL) 434 return (EINVAL); 435 *atp = '\0'; 436 437 args = fnvlist_alloc(); 438 fnvlist_add_string(args, "firstsnap", firstsnap); 439 440 err = lzc_ioctl(ZFS_IOC_SPACE_SNAPS, lastsnap, args, &result); 441 nvlist_free(args); 442 if (err == 0) 443 *usedp = fnvlist_lookup_uint64(result, "used"); 444 fnvlist_free(result); 445 446 return (err); 447} 448 449boolean_t 450lzc_exists(const char *dataset) 451{ 452 /* 453 * The objset_stats ioctl is still legacy, so we need to construct our 454 * own zfs_cmd_t rather than using lzc_ioctl(). 455 */ 456 zfs_cmd_t zc = { 0 }; 457 458 ASSERT3S(g_refcount, >, 0); 459 VERIFY3S(g_fd, !=, -1); 460 461 (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name)); 462 return (ioctl(g_fd, ZFS_IOC_OBJSET_STATS, &zc) == 0); 463} 464 465/* 466 * Create "user holds" on snapshots. If there is a hold on a snapshot, 467 * the snapshot can not be destroyed. (However, it can be marked for deletion 468 * by lzc_destroy_snaps(defer=B_TRUE).) 469 * 470 * The keys in the nvlist are snapshot names. 471 * The snapshots must all be in the same pool. 472 * The value is the name of the hold (string type). 473 * 474 * If cleanup_fd is not -1, it must be the result of open("/dev/zfs", O_EXCL). 475 * In this case, when the cleanup_fd is closed (including on process 476 * termination), the holds will be released. If the system is shut down 477 * uncleanly, the holds will be released when the pool is next opened 478 * or imported. 479 * 480 * Holds for snapshots which don't exist will be skipped and have an entry 481 * added to errlist, but will not cause an overall failure. 482 * 483 * The return value will be 0 if all holds, for snapshots that existed, 484 * were succesfully created. 485 * 486 * Otherwise the return value will be the errno of a (unspecified) hold that 487 * failed and no holds will be created. 488 * 489 * In all cases the errlist will have an entry for each hold that failed 490 * (name = snapshot), with its value being the error code (int32). 491 */ 492int 493lzc_hold(nvlist_t *holds, int cleanup_fd, nvlist_t **errlist) 494{ 495 char pool[ZFS_MAX_DATASET_NAME_LEN]; 496 nvlist_t *args; 497 nvpair_t *elem; 498 int error; 499 500 /* determine the pool name */ 501 elem = nvlist_next_nvpair(holds, NULL); 502 if (elem == NULL) 503 return (0); 504 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); 505 pool[strcspn(pool, "/@")] = '\0'; 506 507 args = fnvlist_alloc(); 508 fnvlist_add_nvlist(args, "holds", holds); 509 if (cleanup_fd != -1) 510 fnvlist_add_int32(args, "cleanup_fd", cleanup_fd); 511 512 error = lzc_ioctl(ZFS_IOC_HOLD, pool, args, errlist); 513 nvlist_free(args); 514 return (error); 515} 516 517/* 518 * Release "user holds" on snapshots. If the snapshot has been marked for 519 * deferred destroy (by lzc_destroy_snaps(defer=B_TRUE)), it does not have 520 * any clones, and all the user holds are removed, then the snapshot will be 521 * destroyed. 522 * 523 * The keys in the nvlist are snapshot names. 524 * The snapshots must all be in the same pool. 525 * The value is a nvlist whose keys are the holds to remove. 526 * 527 * Holds which failed to release because they didn't exist will have an entry 528 * added to errlist, but will not cause an overall failure. 529 * 530 * The return value will be 0 if the nvl holds was empty or all holds that 531 * existed, were successfully removed. 532 * 533 * Otherwise the return value will be the errno of a (unspecified) hold that 534 * failed to release and no holds will be released. 535 * 536 * In all cases the errlist will have an entry for each hold that failed to 537 * to release. 538 */ 539int 540lzc_release(nvlist_t *holds, nvlist_t **errlist) 541{ 542 char pool[ZFS_MAX_DATASET_NAME_LEN]; 543 nvpair_t *elem; 544 545 /* determine the pool name */ 546 elem = nvlist_next_nvpair(holds, NULL); 547 if (elem == NULL) 548 return (0); 549 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); 550 pool[strcspn(pool, "/@")] = '\0'; 551 552 return (lzc_ioctl(ZFS_IOC_RELEASE, pool, holds, errlist)); 553} 554 555/* 556 * Retrieve list of user holds on the specified snapshot. 557 * 558 * On success, *holdsp will be set to a nvlist which the caller must free. 559 * The keys are the names of the holds, and the value is the creation time 560 * of the hold (uint64) in seconds since the epoch. 561 */ 562int 563lzc_get_holds(const char *snapname, nvlist_t **holdsp) 564{ 565 int error; 566 nvlist_t *innvl = fnvlist_alloc(); 567 error = lzc_ioctl(ZFS_IOC_GET_HOLDS, snapname, innvl, holdsp); 568 fnvlist_free(innvl); 569 return (error); 570} 571 572/* 573 * Generate a zfs send stream for the specified snapshot and write it to 574 * the specified file descriptor. 575 * 576 * "snapname" is the full name of the snapshot to send (e.g. "pool/fs@snap") 577 * 578 * If "from" is NULL, a full (non-incremental) stream will be sent. 579 * If "from" is non-NULL, it must be the full name of a snapshot or 580 * bookmark to send an incremental from (e.g. "pool/fs@earlier_snap" or 581 * "pool/fs#earlier_bmark"). If non-NULL, the specified snapshot or 582 * bookmark must represent an earlier point in the history of "snapname"). 583 * It can be an earlier snapshot in the same filesystem or zvol as "snapname", 584 * or it can be the origin of "snapname"'s filesystem, or an earlier 585 * snapshot in the origin, etc. 586 * 587 * "fd" is the file descriptor to write the send stream to. 588 * 589 * If "flags" contains LZC_SEND_FLAG_LARGE_BLOCK, the stream is permitted 590 * to contain DRR_WRITE records with drr_length > 128K, and DRR_OBJECT 591 * records with drr_blksz > 128K. 592 * 593 * If "flags" contains LZC_SEND_FLAG_EMBED_DATA, the stream is permitted 594 * to contain DRR_WRITE_EMBEDDED records with drr_etype==BP_EMBEDDED_TYPE_DATA, 595 * which the receiving system must support (as indicated by support 596 * for the "embedded_data" feature). 597 */ 598int 599lzc_send(const char *snapname, const char *from, int fd, 600 enum lzc_send_flags flags) 601{ 602 return (lzc_send_resume(snapname, from, fd, flags, 0, 0)); 603} 604 605int 606lzc_send_resume(const char *snapname, const char *from, int fd, 607 enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff) 608{ 609 nvlist_t *args; 610 int err; 611 612 args = fnvlist_alloc(); 613 fnvlist_add_int32(args, "fd", fd); 614 if (from != NULL) 615 fnvlist_add_string(args, "fromsnap", from); 616 if (flags & LZC_SEND_FLAG_LARGE_BLOCK) 617 fnvlist_add_boolean(args, "largeblockok"); 618 if (flags & LZC_SEND_FLAG_EMBED_DATA) 619 fnvlist_add_boolean(args, "embedok"); 620 if (flags & LZC_SEND_FLAG_COMPRESS) 621 fnvlist_add_boolean(args, "compressok"); 622 if (resumeobj != 0 || resumeoff != 0) { 623 fnvlist_add_uint64(args, "resume_object", resumeobj); 624 fnvlist_add_uint64(args, "resume_offset", resumeoff); 625 } 626 err = lzc_ioctl(ZFS_IOC_SEND_NEW, snapname, args, NULL); 627 nvlist_free(args); 628 return (err); 629} 630 631/* 632 * "from" can be NULL, a snapshot, or a bookmark. 633 * 634 * If from is NULL, a full (non-incremental) stream will be estimated. This 635 * is calculated very efficiently. 636 * 637 * If from is a snapshot, lzc_send_space uses the deadlists attached to 638 * each snapshot to efficiently estimate the stream size. 639 * 640 * If from is a bookmark, the indirect blocks in the destination snapshot 641 * are traversed, looking for blocks with a birth time since the creation TXG of 642 * the snapshot this bookmark was created from. This will result in 643 * significantly more I/O and be less efficient than a send space estimation on 644 * an equivalent snapshot. 645 */ 646int 647lzc_send_space(const char *snapname, const char *from, 648 enum lzc_send_flags flags, uint64_t *spacep) 649{ 650 nvlist_t *args; 651 nvlist_t *result; 652 int err; 653 654 args = fnvlist_alloc(); 655 if (from != NULL) 656 fnvlist_add_string(args, "from", from); 657 if (flags & LZC_SEND_FLAG_LARGE_BLOCK) 658 fnvlist_add_boolean(args, "largeblockok"); 659 if (flags & LZC_SEND_FLAG_EMBED_DATA) 660 fnvlist_add_boolean(args, "embedok"); 661 if (flags & LZC_SEND_FLAG_COMPRESS) 662 fnvlist_add_boolean(args, "compressok"); 663 err = lzc_ioctl(ZFS_IOC_SEND_SPACE, snapname, args, &result); 664 nvlist_free(args); 665 if (err == 0) 666 *spacep = fnvlist_lookup_uint64(result, "space"); 667 nvlist_free(result); 668 return (err); 669} 670 671static int 672recv_read(int fd, void *buf, int ilen) 673{ 674 char *cp = buf; 675 int rv; 676 int len = ilen; 677 678 do { 679 rv = read(fd, cp, len); 680 cp += rv; 681 len -= rv; 682 } while (rv > 0); 683 684 if (rv < 0 || len != 0) 685 return (EIO); 686 687 return (0); 688} 689 690static int 691recv_impl(const char *snapname, nvlist_t *props, const char *origin, 692 boolean_t force, boolean_t resumable, int fd, 693 const dmu_replay_record_t *begin_record) 694{ 695 /* 696 * The receive ioctl is still legacy, so we need to construct our own 697 * zfs_cmd_t rather than using zfsc_ioctl(). 698 */ 699 zfs_cmd_t zc = { 0 }; 700 char *atp; 701 char *packed = NULL; 702 size_t size; 703 int error; 704 705 ASSERT3S(g_refcount, >, 0); 706 VERIFY3S(g_fd, !=, -1); 707 708 /* zc_name is name of containing filesystem */ 709 (void) strlcpy(zc.zc_name, snapname, sizeof (zc.zc_name)); 710 atp = strchr(zc.zc_name, '@'); 711 if (atp == NULL) 712 return (EINVAL); 713 *atp = '\0'; 714 715 /* if the fs does not exist, try its parent. */ 716 if (!lzc_exists(zc.zc_name)) { 717 char *slashp = strrchr(zc.zc_name, '/'); 718 if (slashp == NULL) 719 return (ENOENT); 720 *slashp = '\0'; 721 722 } 723 724 /* zc_value is full name of the snapshot to create */ 725 (void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value)); 726 727 if (props != NULL) { 728 /* zc_nvlist_src is props to set */ 729 packed = fnvlist_pack(props, &size); 730 zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed; 731 zc.zc_nvlist_src_size = size; 732 } 733 734 /* zc_string is name of clone origin (if DRR_FLAG_CLONE) */ 735 if (origin != NULL) 736 (void) strlcpy(zc.zc_string, origin, sizeof (zc.zc_string)); 737 738 /* zc_begin_record is non-byteswapped BEGIN record */ 739 if (begin_record == NULL) { 740 error = recv_read(fd, &zc.zc_begin_record, 741 sizeof (zc.zc_begin_record)); 742 if (error != 0) 743 goto out; 744 } else { 745 zc.zc_begin_record = *begin_record; 746 } 747 748 /* zc_cookie is fd to read from */ 749 zc.zc_cookie = fd; 750 751 /* zc guid is force flag */ 752 zc.zc_guid = force; 753 754 zc.zc_resumable = resumable; 755 756 /* zc_cleanup_fd is unused */ 757 zc.zc_cleanup_fd = -1; 758 759 error = ioctl(g_fd, ZFS_IOC_RECV, &zc); 760 if (error != 0) 761 error = errno; 762 763out: 764 if (packed != NULL) 765 fnvlist_pack_free(packed, size); 766 free((void*)(uintptr_t)zc.zc_nvlist_dst); 767 return (error); 768} 769 770/* 771 * The simplest receive case: receive from the specified fd, creating the 772 * specified snapshot. Apply the specified properties as "received" properties 773 * (which can be overridden by locally-set properties). If the stream is a 774 * clone, its origin snapshot must be specified by 'origin'. The 'force' 775 * flag will cause the target filesystem to be rolled back or destroyed if 776 * necessary to receive. 777 * 778 * Return 0 on success or an errno on failure. 779 * 780 * Note: this interface does not work on dedup'd streams 781 * (those with DMU_BACKUP_FEATURE_DEDUP). 782 */ 783int 784lzc_receive(const char *snapname, nvlist_t *props, const char *origin, 785 boolean_t force, int fd) 786{ 787 return (recv_impl(snapname, props, origin, force, B_FALSE, fd, NULL)); 788} 789 790/* 791 * Like lzc_receive, but if the receive fails due to premature stream 792 * termination, the intermediate state will be preserved on disk. In this 793 * case, ECKSUM will be returned. The receive may subsequently be resumed 794 * with a resuming send stream generated by lzc_send_resume(). 795 */ 796int 797lzc_receive_resumable(const char *snapname, nvlist_t *props, const char *origin, 798 boolean_t force, int fd) 799{ 800 return (recv_impl(snapname, props, origin, force, B_TRUE, fd, NULL)); 801} 802 803/* 804 * Like lzc_receive, but allows the caller to read the begin record and then to 805 * pass it in. That could be useful if the caller wants to derive, for example, 806 * the snapname or the origin parameters based on the information contained in 807 * the begin record. 808 * The begin record must be in its original form as read from the stream, 809 * in other words, it should not be byteswapped. 810 * 811 * The 'resumable' parameter allows to obtain the same behavior as with 812 * lzc_receive_resumable. 813 */ 814int 815lzc_receive_with_header(const char *snapname, nvlist_t *props, 816 const char *origin, boolean_t force, boolean_t resumable, int fd, 817 const dmu_replay_record_t *begin_record) 818{ 819 if (begin_record == NULL) 820 return (EINVAL); 821 return (recv_impl(snapname, props, origin, force, resumable, fd, 822 begin_record)); 823} 824 825/* 826 * Roll back this filesystem or volume to its most recent snapshot. 827 * If snapnamebuf is not NULL, it will be filled in with the name 828 * of the most recent snapshot. 829 * Note that the latest snapshot may change if a new one is concurrently 830 * created or the current one is destroyed. lzc_rollback_to can be used 831 * to roll back to a specific latest snapshot. 832 * 833 * Return 0 on success or an errno on failure. 834 */ 835int 836lzc_rollback(const char *fsname, char *snapnamebuf, int snapnamelen) 837{ 838 nvlist_t *args; 839 nvlist_t *result; 840 int err; 841 842 args = fnvlist_alloc(); 843 err = lzc_ioctl(ZFS_IOC_ROLLBACK, fsname, args, &result); 844 nvlist_free(args); 845 if (err == 0 && snapnamebuf != NULL) { 846 const char *snapname = fnvlist_lookup_string(result, "target"); 847 (void) strlcpy(snapnamebuf, snapname, snapnamelen); 848 } 849 nvlist_free(result); 850 851 return (err); 852} 853 854/* 855 * Roll back this filesystem or volume to the specified snapshot, 856 * if possible. 857 * 858 * Return 0 on success or an errno on failure. 859 */ 860int 861lzc_rollback_to(const char *fsname, const char *snapname) 862{ 863 nvlist_t *args; 864 nvlist_t *result; 865 int err; 866 867 args = fnvlist_alloc(); 868 fnvlist_add_string(args, "target", snapname); 869 err = lzc_ioctl(ZFS_IOC_ROLLBACK, fsname, args, &result); 870 nvlist_free(args); 871 nvlist_free(result); 872 return (err); 873} 874 875/* 876 * Creates bookmarks. 877 * 878 * The bookmarks nvlist maps from name of the bookmark (e.g. "pool/fs#bmark") to 879 * the name of the snapshot (e.g. "pool/fs@snap"). All the bookmarks and 880 * snapshots must be in the same pool. 881 * 882 * The returned results nvlist will have an entry for each bookmark that failed. 883 * The value will be the (int32) error code. 884 * 885 * The return value will be 0 if all bookmarks were created, otherwise it will 886 * be the errno of a (undetermined) bookmarks that failed. 887 */ 888int 889lzc_bookmark(nvlist_t *bookmarks, nvlist_t **errlist) 890{ 891 nvpair_t *elem; 892 int error; 893 char pool[ZFS_MAX_DATASET_NAME_LEN]; 894 895 /* determine the pool name */ 896 elem = nvlist_next_nvpair(bookmarks, NULL); 897 if (elem == NULL) 898 return (0); 899 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); 900 pool[strcspn(pool, "/#")] = '\0'; 901 902 error = lzc_ioctl(ZFS_IOC_BOOKMARK, pool, bookmarks, errlist); 903 904 return (error); 905} 906 907/* 908 * Retrieve bookmarks. 909 * 910 * Retrieve the list of bookmarks for the given file system. The props 911 * parameter is an nvlist of property names (with no values) that will be 912 * returned for each bookmark. 913 * 914 * The following are valid properties on bookmarks, all of which are numbers 915 * (represented as uint64 in the nvlist) 916 * 917 * "guid" - globally unique identifier of the snapshot it refers to 918 * "createtxg" - txg when the snapshot it refers to was created 919 * "creation" - timestamp when the snapshot it refers to was created 920 * 921 * The format of the returned nvlist as follows: 922 * <short name of bookmark> -> { 923 * <name of property> -> { 924 * "value" -> uint64 925 * } 926 * } 927 */ 928int 929lzc_get_bookmarks(const char *fsname, nvlist_t *props, nvlist_t **bmarks) 930{ 931 return (lzc_ioctl(ZFS_IOC_GET_BOOKMARKS, fsname, props, bmarks)); 932} 933 934/* 935 * Destroys bookmarks. 936 * 937 * The keys in the bmarks nvlist are the bookmarks to be destroyed. 938 * They must all be in the same pool. Bookmarks are specified as 939 * <fs>#<bmark>. 940 * 941 * Bookmarks that do not exist will be silently ignored. 942 * 943 * The return value will be 0 if all bookmarks that existed were destroyed. 944 * 945 * Otherwise the return value will be the errno of a (undetermined) bookmark 946 * that failed, no bookmarks will be destroyed, and the errlist will have an 947 * entry for each bookmarks that failed. The value in the errlist will be 948 * the (int32) error code. 949 */ 950int 951lzc_destroy_bookmarks(nvlist_t *bmarks, nvlist_t **errlist) 952{ 953 nvpair_t *elem; 954 int error; 955 char pool[ZFS_MAX_DATASET_NAME_LEN]; 956 957 /* determine the pool name */ 958 elem = nvlist_next_nvpair(bmarks, NULL); 959 if (elem == NULL) 960 return (0); 961 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); 962 pool[strcspn(pool, "/#")] = '\0'; 963 964 error = lzc_ioctl(ZFS_IOC_DESTROY_BOOKMARKS, pool, bmarks, errlist); 965 966 return (error); 967} 968 969static int 970lzc_channel_program_impl(const char *pool, const char *program, boolean_t sync, 971 uint64_t instrlimit, uint64_t memlimit, nvlist_t *argnvl, nvlist_t **outnvl) 972{ 973 int error; 974 nvlist_t *args; 975 976 args = fnvlist_alloc(); 977 fnvlist_add_string(args, ZCP_ARG_PROGRAM, program); 978 fnvlist_add_nvlist(args, ZCP_ARG_ARGLIST, argnvl); 979 fnvlist_add_boolean_value(args, ZCP_ARG_SYNC, sync); 980 fnvlist_add_uint64(args, ZCP_ARG_INSTRLIMIT, instrlimit); 981 fnvlist_add_uint64(args, ZCP_ARG_MEMLIMIT, memlimit); 982 error = lzc_ioctl(ZFS_IOC_CHANNEL_PROGRAM, pool, args, outnvl); 983 fnvlist_free(args); 984 985 return (error); 986} 987 988/* 989 * Executes a channel program. 990 * 991 * If this function returns 0 the channel program was successfully loaded and 992 * ran without failing. Note that individual commands the channel program ran 993 * may have failed and the channel program is responsible for reporting such 994 * errors through outnvl if they are important. 995 * 996 * This method may also return: 997 * 998 * EINVAL The program contains syntax errors, or an invalid memory or time 999 * limit was given. No part of the channel program was executed. 1000 * If caused by syntax errors, 'outnvl' contains information about the 1001 * errors. 1002 * 1003 * EDOM The program was executed, but encountered a runtime error, such as 1004 * calling a function with incorrect arguments, invoking the error() 1005 * function directly, failing an assert() command, etc. Some portion 1006 * of the channel program may have executed and committed changes. 1007 * Information about the failure can be found in 'outnvl'. 1008 * 1009 * ENOMEM The program fully executed, but the output buffer was not large 1010 * enough to store the returned value. No output is returned through 1011 * 'outnvl'. 1012 * 1013 * ENOSPC The program was terminated because it exceeded its memory usage 1014 * limit. Some portion of the channel program may have executed and 1015 * committed changes to disk. No output is returned through 'outnvl'. 1016 * 1017 * ETIMEDOUT The program was terminated because it exceeded its Lua instruction 1018 * limit. Some portion of the channel program may have executed and 1019 * committed changes to disk. No output is returned through 'outnvl'. 1020 */ 1021int 1022lzc_channel_program(const char *pool, const char *program, uint64_t instrlimit, 1023 uint64_t memlimit, nvlist_t *argnvl, nvlist_t **outnvl) 1024{ 1025 return (lzc_channel_program_impl(pool, program, B_TRUE, instrlimit, 1026 memlimit, argnvl, outnvl)); 1027} 1028 1029/* 1030 * Creates a checkpoint for the specified pool. 1031 * 1032 * If this function returns 0 the pool was successfully checkpointed. 1033 * 1034 * This method may also return: 1035 * 1036 * ZFS_ERR_CHECKPOINT_EXISTS 1037 * The pool already has a checkpoint. A pools can only have one 1038 * checkpoint at most, at any given time. 1039 * 1040 * ZFS_ERR_DISCARDING_CHECKPOINT 1041 * ZFS is in the middle of discarding a checkpoint for this pool. 1042 * The pool can be checkpointed again once the discard is done. 1043 * 1044 * ZFS_DEVRM_IN_PROGRESS 1045 * A vdev is currently being removed. The pool cannot be 1046 * checkpointed until the device removal is done. 1047 * 1048 * ZFS_VDEV_TOO_BIG 1049 * One or more top-level vdevs exceed the maximum vdev size 1050 * supported for this feature. 1051 */ 1052int 1053lzc_pool_checkpoint(const char *pool) 1054{ 1055 int error; 1056 1057 nvlist_t *result = NULL; 1058 nvlist_t *args = fnvlist_alloc(); 1059 1060 error = lzc_ioctl(ZFS_IOC_POOL_CHECKPOINT, pool, args, &result); 1061 1062 fnvlist_free(args); 1063 fnvlist_free(result); 1064 1065 return (error); 1066} 1067 1068/* 1069 * Discard the checkpoint from the specified pool. 1070 * 1071 * If this function returns 0 the checkpoint was successfully discarded. 1072 * 1073 * This method may also return: 1074 * 1075 * ZFS_ERR_NO_CHECKPOINT 1076 * The pool does not have a checkpoint. 1077 * 1078 * ZFS_ERR_DISCARDING_CHECKPOINT 1079 * ZFS is already in the middle of discarding the checkpoint. 1080 */ 1081int 1082lzc_pool_checkpoint_discard(const char *pool) 1083{ 1084 int error; 1085 1086 nvlist_t *result = NULL; 1087 nvlist_t *args = fnvlist_alloc(); 1088 1089 error = lzc_ioctl(ZFS_IOC_POOL_DISCARD_CHECKPOINT, pool, args, &result); 1090 1091 fnvlist_free(args); 1092 fnvlist_free(result); 1093 1094 return (error); 1095} 1096 1097/* 1098 * Executes a read-only channel program. 1099 * 1100 * A read-only channel program works programmatically the same way as a 1101 * normal channel program executed with lzc_channel_program(). The only 1102 * difference is it runs exclusively in open-context and therefore can 1103 * return faster. The downside to that, is that the program cannot change 1104 * on-disk state by calling functions from the zfs.sync submodule. 1105 * 1106 * The return values of this function (and their meaning) are exactly the 1107 * same as the ones described in lzc_channel_program(). 1108 */ 1109int 1110lzc_channel_program_nosync(const char *pool, const char *program, 1111 uint64_t timeout, uint64_t memlimit, nvlist_t *argnvl, nvlist_t **outnvl) 1112{ 1113 return (lzc_channel_program_impl(pool, program, B_FALSE, timeout, 1114 memlimit, argnvl, outnvl)); 1115} 1116 1117/* 1118 * Changes initializing state. 1119 * 1120 * vdevs should be a list of (<key>, guid) where guid is a uint64 vdev GUID. 1121 * The key is ignored. 1122 * 1123 * If there are errors related to vdev arguments, per-vdev errors are returned 1124 * in an nvlist with the key "vdevs". Each error is a (guid, errno) pair where 1125 * guid is stringified with PRIu64, and errno is one of the following as 1126 * an int64_t: 1127 * - ENODEV if the device was not found 1128 * - EINVAL if the devices is not a leaf or is not concrete (e.g. missing) 1129 * - EROFS if the device is not writeable 1130 * - EBUSY start requested but the device is already being initialized 1131 * - ESRCH cancel/suspend requested but device is not being initialized 1132 * 1133 * If the errlist is empty, then return value will be: 1134 * - EINVAL if one or more arguments was invalid 1135 * - Other spa_open failures 1136 * - 0 if the operation succeeded 1137 */ 1138int 1139lzc_initialize(const char *poolname, pool_initialize_func_t cmd_type, 1140 nvlist_t *vdevs, nvlist_t **errlist) 1141{ 1142 int error; 1143 nvlist_t *args = fnvlist_alloc(); 1144 fnvlist_add_uint64(args, ZPOOL_INITIALIZE_COMMAND, (uint64_t)cmd_type); 1145 fnvlist_add_nvlist(args, ZPOOL_INITIALIZE_VDEVS, vdevs); 1146 1147 error = lzc_ioctl(ZFS_IOC_POOL_INITIALIZE, poolname, args, errlist); 1148 1149 fnvlist_free(args); 1150 1151 return (error); 1152} 1153