libzfs_core.c revision 248435
1238592Smm/* 2238592Smm * CDDL HEADER START 3238592Smm * 4238592Smm * The contents of this file are subject to the terms of the 5238592Smm * Common Development and Distribution License (the "License"). 6238592Smm * You may not use this file except in compliance with the License. 7238592Smm * 8238592Smm * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9238592Smm * or http://www.opensolaris.org/os/licensing. 10238592Smm * See the License for the specific language governing permissions 11238592Smm * and limitations under the License. 12238592Smm * 13238592Smm * When distributing Covered Code, include this CDDL HEADER in each 14238592Smm * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15238592Smm * If applicable, add the following below this CDDL HEADER, with the 16238592Smm * fields enclosed by brackets "[]" replaced with your own identifying 17238592Smm * information: Portions Copyright [yyyy] [name of copyright owner] 18238592Smm * 19238592Smm * CDDL HEADER END 20238592Smm */ 21238592Smm 22238592Smm/* 23238592Smm * Copyright (c) 2012 by Delphix. All rights reserved. 24238592Smm */ 25238592Smm 26238592Smm/* 27238592Smm * LibZFS_Core (lzc) is intended to replace most functionality in libzfs. 28238592Smm * It has the following characteristics: 29238592Smm * 30238592Smm * - Thread Safe. libzfs_core is accessible concurrently from multiple 31238592Smm * threads. This is accomplished primarily by avoiding global data 32238592Smm * (e.g. caching). Since it's thread-safe, there is no reason for a 33238592Smm * process to have multiple libzfs "instances". Therefore, we store 34238592Smm * our few pieces of data (e.g. the file descriptor) in global 35238592Smm * variables. The fd is reference-counted so that the libzfs_core 36238592Smm * library can be "initialized" multiple times (e.g. by different 37238592Smm * consumers within the same process). 38238592Smm * 39238592Smm * - Committed Interface. The libzfs_core interface will be committed, 40238592Smm * therefore consumers can compile against it and be confident that 41238592Smm * their code will continue to work on future releases of this code. 42238592Smm * Currently, the interface is Evolving (not Committed), but we intend 43238592Smm * to commit to it once it is more complete and we determine that it 44238592Smm * meets the needs of all consumers. 45238592Smm * 46238592Smm * - Programatic Error Handling. libzfs_core communicates errors with 47238592Smm * defined error numbers, and doesn't print anything to stdout/stderr. 48238592Smm * 49238592Smm * - Thin Layer. libzfs_core is a thin layer, marshaling arguments 50238592Smm * to/from the kernel ioctls. There is generally a 1:1 correspondence 51238592Smm * between libzfs_core functions and ioctls to /dev/zfs. 52238592Smm * 53238592Smm * - Clear Atomicity. Because libzfs_core functions are generally 1:1 54238592Smm * with kernel ioctls, and kernel ioctls are general atomic, each 55238592Smm * libzfs_core function is atomic. For example, creating multiple 56238592Smm * snapshots with a single call to lzc_snapshot() is atomic -- it 57238592Smm * can't fail with only some of the requested snapshots created, even 58238592Smm * in the event of power loss or system crash. 59238592Smm * 60238592Smm * - Continued libzfs Support. Some higher-level operations (e.g. 61238592Smm * support for "zfs send -R") are too complicated to fit the scope of 62238592Smm * libzfs_core. This functionality will continue to live in libzfs. 63238592Smm * Where appropriate, libzfs will use the underlying atomic operations 64238592Smm * of libzfs_core. For example, libzfs may implement "zfs send -R | 65238592Smm * zfs receive" by using individual "send one snapshot", rename, 66238592Smm * destroy, and "receive one snapshot" operations in libzfs_core. 67238592Smm * /sbin/zfs and /zbin/zpool will link with both libzfs and 68238592Smm * libzfs_core. Other consumers should aim to use only libzfs_core, 69238592Smm * since that will be the supported, stable interface going forwards. 70238592Smm */ 71238592Smm 72247831Smm#define _IN_LIBZFS_CORE_ 73247831Smm 74238592Smm#include <libzfs_core.h> 75238592Smm#include <ctype.h> 76238592Smm#include <unistd.h> 77238592Smm#include <stdlib.h> 78238592Smm#include <string.h> 79238592Smm#include <errno.h> 80238592Smm#include <fcntl.h> 81238592Smm#include <pthread.h> 82238592Smm#include <sys/nvpair.h> 83238592Smm#include <sys/param.h> 84238592Smm#include <sys/types.h> 85238592Smm#include <sys/stat.h> 86238592Smm#include <sys/zfs_ioctl.h> 87247882Smm#include <libzfs_compat.h> 88238592Smm 89248435Smm#ifdef __FreeBSD__ 90248435Smmextern int zfs_ioctl_version; 91248435Smm#endif 92248435Smm 93238592Smmstatic int g_fd; 94238592Smmstatic pthread_mutex_t g_lock = PTHREAD_MUTEX_INITIALIZER; 95238592Smmstatic int g_refcount; 96238592Smm 97238592Smmint 98238592Smmlibzfs_core_init(void) 99238592Smm{ 100238592Smm (void) pthread_mutex_lock(&g_lock); 101238592Smm if (g_refcount == 0) { 102238592Smm g_fd = open("/dev/zfs", O_RDWR); 103238592Smm if (g_fd < 0) { 104238592Smm (void) pthread_mutex_unlock(&g_lock); 105238592Smm return (errno); 106238592Smm } 107238592Smm } 108238592Smm g_refcount++; 109238592Smm (void) pthread_mutex_unlock(&g_lock); 110238592Smm return (0); 111238592Smm} 112238592Smm 113238592Smmvoid 114238592Smmlibzfs_core_fini(void) 115238592Smm{ 116238592Smm (void) pthread_mutex_lock(&g_lock); 117238592Smm ASSERT3S(g_refcount, >, 0); 118238592Smm g_refcount--; 119238592Smm if (g_refcount == 0) 120238592Smm (void) close(g_fd); 121238592Smm (void) pthread_mutex_unlock(&g_lock); 122238592Smm} 123238592Smm 124238592Smmstatic int 125238592Smmlzc_ioctl(zfs_ioc_t ioc, const char *name, 126238592Smm nvlist_t *source, nvlist_t **resultp) 127238592Smm{ 128238592Smm zfs_cmd_t zc = { 0 }; 129238592Smm int error = 0; 130238592Smm char *packed; 131248435Smm#ifdef __FreeBSD__ 132248435Smm nvlist_t *oldsource; 133248435Smm#endif 134238592Smm size_t size; 135238592Smm 136238592Smm ASSERT3S(g_refcount, >, 0); 137238592Smm 138238592Smm (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name)); 139238592Smm 140248435Smm#ifdef __FreeBSD__ 141248435Smm if (zfs_ioctl_version < ZFS_IOCVER_LZC) { 142248435Smm oldsource = source; 143248435Smm error = lzc_compat_pre(&zc, &ioc, &source); 144248435Smm if (error) 145248435Smm return (error); 146248435Smm } 147248435Smm#endif 148248435Smm 149238592Smm packed = fnvlist_pack(source, &size); 150238592Smm zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed; 151238592Smm zc.zc_nvlist_src_size = size; 152238592Smm 153238592Smm if (resultp != NULL) { 154238592Smm zc.zc_nvlist_dst_size = MAX(size * 2, 128 * 1024); 155238592Smm zc.zc_nvlist_dst = (uint64_t)(uintptr_t) 156238592Smm malloc(zc.zc_nvlist_dst_size); 157247831Smm#ifdef illumos 158238592Smm if (zc.zc_nvlist_dst == NULL) { 159247831Smm#else 160247831Smm if (zc.zc_nvlist_dst == 0) { 161247831Smm#endif 162238592Smm error = ENOMEM; 163238592Smm goto out; 164238592Smm } 165238592Smm } 166238592Smm 167238592Smm while (ioctl(g_fd, ioc, &zc) != 0) { 168238592Smm if (errno == ENOMEM && resultp != NULL) { 169238592Smm free((void *)(uintptr_t)zc.zc_nvlist_dst); 170238592Smm zc.zc_nvlist_dst_size *= 2; 171238592Smm zc.zc_nvlist_dst = (uint64_t)(uintptr_t) 172238592Smm malloc(zc.zc_nvlist_dst_size); 173247831Smm#ifdef illumos 174238592Smm if (zc.zc_nvlist_dst == NULL) { 175247831Smm#else 176247831Smm if (zc.zc_nvlist_dst == 0) { 177247831Smm#endif 178238592Smm error = ENOMEM; 179238592Smm goto out; 180238592Smm } 181238592Smm } else { 182238592Smm error = errno; 183238592Smm break; 184238592Smm } 185238592Smm } 186248435Smm 187248435Smm#ifdef __FreeBSD__ 188248435Smm if (zfs_ioctl_version < ZFS_IOCVER_LZC) 189248435Smm lzc_compat_post(&zc, ioc); 190248435Smm#endif 191238592Smm if (zc.zc_nvlist_dst_filled) { 192238592Smm *resultp = fnvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst, 193238592Smm zc.zc_nvlist_dst_size); 194238592Smm } else if (resultp != NULL) { 195238592Smm *resultp = NULL; 196238592Smm } 197248435Smm#ifdef __FreeBSD__ 198248435Smm if (zfs_ioctl_version < ZFS_IOCVER_LZC) 199248435Smm lzc_compat_outnvl(&zc, ioc, resultp); 200248435Smm#endif 201238592Smmout: 202248435Smm#ifdef __FreeBSD__ 203248435Smm if (zfs_ioctl_version < ZFS_IOCVER_LZC) { 204248435Smm if (source != oldsource) 205248435Smm nvlist_free(source); 206248435Smm source = oldsource; 207248435Smm } 208248435Smm#endif 209238592Smm fnvlist_pack_free(packed, size); 210238592Smm free((void *)(uintptr_t)zc.zc_nvlist_dst); 211238592Smm return (error); 212238592Smm} 213238592Smm 214238592Smmint 215238592Smmlzc_create(const char *fsname, dmu_objset_type_t type, nvlist_t *props) 216238592Smm{ 217238592Smm int error; 218238592Smm nvlist_t *args = fnvlist_alloc(); 219238592Smm fnvlist_add_int32(args, "type", type); 220238592Smm if (props != NULL) 221238592Smm fnvlist_add_nvlist(args, "props", props); 222238592Smm error = lzc_ioctl(ZFS_IOC_CREATE, fsname, args, NULL); 223238592Smm nvlist_free(args); 224238592Smm return (error); 225238592Smm} 226238592Smm 227238592Smmint 228238592Smmlzc_clone(const char *fsname, const char *origin, 229238592Smm nvlist_t *props) 230238592Smm{ 231238592Smm int error; 232238592Smm nvlist_t *args = fnvlist_alloc(); 233238592Smm fnvlist_add_string(args, "origin", origin); 234238592Smm if (props != NULL) 235238592Smm fnvlist_add_nvlist(args, "props", props); 236238592Smm error = lzc_ioctl(ZFS_IOC_CLONE, fsname, args, NULL); 237238592Smm nvlist_free(args); 238238592Smm return (error); 239238592Smm} 240238592Smm 241238592Smm/* 242238592Smm * Creates snapshots. 243238592Smm * 244238592Smm * The keys in the snaps nvlist are the snapshots to be created. 245238592Smm * They must all be in the same pool. 246238592Smm * 247238592Smm * The props nvlist is properties to set. Currently only user properties 248238592Smm * are supported. { user:prop_name -> string value } 249238592Smm * 250238592Smm * The returned results nvlist will have an entry for each snapshot that failed. 251238592Smm * The value will be the (int32) error code. 252238592Smm * 253238592Smm * The return value will be 0 if all snapshots were created, otherwise it will 254238592Smm * be the errno of a (undetermined) snapshot that failed. 255238592Smm */ 256238592Smmint 257238592Smmlzc_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t **errlist) 258238592Smm{ 259238592Smm nvpair_t *elem; 260238592Smm nvlist_t *args; 261238592Smm int error; 262238592Smm char pool[MAXNAMELEN]; 263238592Smm 264238592Smm *errlist = NULL; 265238592Smm 266238592Smm /* determine the pool name */ 267238592Smm elem = nvlist_next_nvpair(snaps, NULL); 268238592Smm if (elem == NULL) 269238592Smm return (0); 270238592Smm (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); 271238592Smm pool[strcspn(pool, "/@")] = '\0'; 272238592Smm 273238592Smm args = fnvlist_alloc(); 274238592Smm fnvlist_add_nvlist(args, "snaps", snaps); 275238592Smm if (props != NULL) 276238592Smm fnvlist_add_nvlist(args, "props", props); 277238592Smm 278238592Smm error = lzc_ioctl(ZFS_IOC_SNAPSHOT, pool, args, errlist); 279238592Smm nvlist_free(args); 280238592Smm 281238592Smm return (error); 282238592Smm} 283238592Smm 284238592Smm/* 285238592Smm * Destroys snapshots. 286238592Smm * 287238592Smm * The keys in the snaps nvlist are the snapshots to be destroyed. 288238592Smm * They must all be in the same pool. 289238592Smm * 290238592Smm * Snapshots that do not exist will be silently ignored. 291238592Smm * 292238592Smm * If 'defer' is not set, and a snapshot has user holds or clones, the 293238592Smm * destroy operation will fail and none of the snapshots will be 294238592Smm * destroyed. 295238592Smm * 296238592Smm * If 'defer' is set, and a snapshot has user holds or clones, it will be 297238592Smm * marked for deferred destruction, and will be destroyed when the last hold 298238592Smm * or clone is removed/destroyed. 299238592Smm * 300238592Smm * The return value will be 0 if all snapshots were destroyed (or marked for 301238592Smm * later destruction if 'defer' is set) or didn't exist to begin with. 302238592Smm * 303238592Smm * Otherwise the return value will be the errno of a (undetermined) snapshot 304238592Smm * that failed, no snapshots will be destroyed, and the errlist will have an 305238592Smm * entry for each snapshot that failed. The value in the errlist will be 306238592Smm * the (int32) error code. 307238592Smm */ 308238592Smmint 309238592Smmlzc_destroy_snaps(nvlist_t *snaps, boolean_t defer, nvlist_t **errlist) 310238592Smm{ 311238592Smm nvpair_t *elem; 312238592Smm nvlist_t *args; 313238592Smm int error; 314238592Smm char pool[MAXNAMELEN]; 315238592Smm 316238592Smm /* determine the pool name */ 317238592Smm elem = nvlist_next_nvpair(snaps, NULL); 318238592Smm if (elem == NULL) 319238592Smm return (0); 320238592Smm (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); 321238592Smm pool[strcspn(pool, "/@")] = '\0'; 322238592Smm 323238592Smm args = fnvlist_alloc(); 324238592Smm fnvlist_add_nvlist(args, "snaps", snaps); 325238592Smm if (defer) 326238592Smm fnvlist_add_boolean(args, "defer"); 327238592Smm 328238592Smm error = lzc_ioctl(ZFS_IOC_DESTROY_SNAPS, pool, args, errlist); 329238592Smm nvlist_free(args); 330238592Smm 331238592Smm return (error); 332238592Smm 333238592Smm} 334238592Smm 335238592Smmint 336238592Smmlzc_snaprange_space(const char *firstsnap, const char *lastsnap, 337238592Smm uint64_t *usedp) 338238592Smm{ 339238592Smm nvlist_t *args; 340238592Smm nvlist_t *result; 341238592Smm int err; 342238592Smm char fs[MAXNAMELEN]; 343238592Smm char *atp; 344238592Smm 345238592Smm /* determine the fs name */ 346238592Smm (void) strlcpy(fs, firstsnap, sizeof (fs)); 347238592Smm atp = strchr(fs, '@'); 348238592Smm if (atp == NULL) 349238592Smm return (EINVAL); 350238592Smm *atp = '\0'; 351238592Smm 352238592Smm args = fnvlist_alloc(); 353238592Smm fnvlist_add_string(args, "firstsnap", firstsnap); 354238592Smm 355238592Smm err = lzc_ioctl(ZFS_IOC_SPACE_SNAPS, lastsnap, args, &result); 356238592Smm nvlist_free(args); 357238592Smm if (err == 0) 358238592Smm *usedp = fnvlist_lookup_uint64(result, "used"); 359238592Smm fnvlist_free(result); 360238592Smm 361238592Smm return (err); 362238592Smm} 363238592Smm 364238592Smmboolean_t 365238592Smmlzc_exists(const char *dataset) 366238592Smm{ 367238592Smm /* 368238592Smm * The objset_stats ioctl is still legacy, so we need to construct our 369238592Smm * own zfs_cmd_t rather than using zfsc_ioctl(). 370238592Smm */ 371238592Smm zfs_cmd_t zc = { 0 }; 372238592Smm 373238592Smm (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name)); 374238592Smm return (ioctl(g_fd, ZFS_IOC_OBJSET_STATS, &zc) == 0); 375238592Smm} 376238592Smm 377238592Smm/* 378238592Smm * If fromsnap is NULL, a full (non-incremental) stream will be sent. 379238592Smm */ 380238592Smmint 381238592Smmlzc_send(const char *snapname, const char *fromsnap, int fd) 382238592Smm{ 383238592Smm nvlist_t *args; 384238592Smm int err; 385238592Smm 386238592Smm args = fnvlist_alloc(); 387238592Smm fnvlist_add_int32(args, "fd", fd); 388238592Smm if (fromsnap != NULL) 389238592Smm fnvlist_add_string(args, "fromsnap", fromsnap); 390238592Smm err = lzc_ioctl(ZFS_IOC_SEND_NEW, snapname, args, NULL); 391238592Smm nvlist_free(args); 392238592Smm return (err); 393238592Smm} 394238592Smm 395238592Smm/* 396238592Smm * If fromsnap is NULL, a full (non-incremental) stream will be estimated. 397238592Smm */ 398238592Smmint 399238592Smmlzc_send_space(const char *snapname, const char *fromsnap, uint64_t *spacep) 400238592Smm{ 401238592Smm nvlist_t *args; 402238592Smm nvlist_t *result; 403238592Smm int err; 404238592Smm 405238592Smm args = fnvlist_alloc(); 406238592Smm if (fromsnap != NULL) 407238592Smm fnvlist_add_string(args, "fromsnap", fromsnap); 408238592Smm err = lzc_ioctl(ZFS_IOC_SEND_SPACE, snapname, args, &result); 409238592Smm nvlist_free(args); 410238592Smm if (err == 0) 411238592Smm *spacep = fnvlist_lookup_uint64(result, "space"); 412238592Smm nvlist_free(result); 413238592Smm return (err); 414238592Smm} 415238592Smm 416238592Smmstatic int 417238592Smmrecv_read(int fd, void *buf, int ilen) 418238592Smm{ 419238592Smm char *cp = buf; 420238592Smm int rv; 421238592Smm int len = ilen; 422238592Smm 423238592Smm do { 424238592Smm rv = read(fd, cp, len); 425238592Smm cp += rv; 426238592Smm len -= rv; 427238592Smm } while (rv > 0); 428238592Smm 429238592Smm if (rv < 0 || len != 0) 430238592Smm return (EIO); 431238592Smm 432238592Smm return (0); 433238592Smm} 434238592Smm 435238592Smm/* 436238592Smm * The simplest receive case: receive from the specified fd, creating the 437238592Smm * specified snapshot. Apply the specified properties a "received" properties 438238592Smm * (which can be overridden by locally-set properties). If the stream is a 439238592Smm * clone, its origin snapshot must be specified by 'origin'. The 'force' 440238592Smm * flag will cause the target filesystem to be rolled back or destroyed if 441238592Smm * necessary to receive. 442238592Smm * 443238592Smm * Return 0 on success or an errno on failure. 444238592Smm * 445238592Smm * Note: this interface does not work on dedup'd streams 446238592Smm * (those with DMU_BACKUP_FEATURE_DEDUP). 447238592Smm */ 448238592Smmint 449238592Smmlzc_receive(const char *snapname, nvlist_t *props, const char *origin, 450238592Smm boolean_t force, int fd) 451238592Smm{ 452238592Smm /* 453238592Smm * The receive ioctl is still legacy, so we need to construct our own 454238592Smm * zfs_cmd_t rather than using zfsc_ioctl(). 455238592Smm */ 456238592Smm zfs_cmd_t zc = { 0 }; 457238592Smm char *atp; 458238592Smm char *packed = NULL; 459238592Smm size_t size; 460238592Smm dmu_replay_record_t drr; 461238592Smm int error; 462238592Smm 463238592Smm ASSERT3S(g_refcount, >, 0); 464238592Smm 465238592Smm /* zc_name is name of containing filesystem */ 466238592Smm (void) strlcpy(zc.zc_name, snapname, sizeof (zc.zc_name)); 467238592Smm atp = strchr(zc.zc_name, '@'); 468238592Smm if (atp == NULL) 469238592Smm return (EINVAL); 470238592Smm *atp = '\0'; 471238592Smm 472238592Smm /* if the fs does not exist, try its parent. */ 473238592Smm if (!lzc_exists(zc.zc_name)) { 474238592Smm char *slashp = strrchr(zc.zc_name, '/'); 475238592Smm if (slashp == NULL) 476238592Smm return (ENOENT); 477238592Smm *slashp = '\0'; 478238592Smm 479238592Smm } 480238592Smm 481238592Smm /* zc_value is full name of the snapshot to create */ 482238592Smm (void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value)); 483238592Smm 484238592Smm if (props != NULL) { 485238592Smm /* zc_nvlist_src is props to set */ 486238592Smm packed = fnvlist_pack(props, &size); 487238592Smm zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed; 488238592Smm zc.zc_nvlist_src_size = size; 489238592Smm } 490238592Smm 491238592Smm /* zc_string is name of clone origin (if DRR_FLAG_CLONE) */ 492238592Smm if (origin != NULL) 493238592Smm (void) strlcpy(zc.zc_string, origin, sizeof (zc.zc_string)); 494238592Smm 495238592Smm /* zc_begin_record is non-byteswapped BEGIN record */ 496238592Smm error = recv_read(fd, &drr, sizeof (drr)); 497238592Smm if (error != 0) 498238592Smm goto out; 499238592Smm zc.zc_begin_record = drr.drr_u.drr_begin; 500238592Smm 501238592Smm /* zc_cookie is fd to read from */ 502238592Smm zc.zc_cookie = fd; 503238592Smm 504238592Smm /* zc guid is force flag */ 505238592Smm zc.zc_guid = force; 506238592Smm 507238592Smm /* zc_cleanup_fd is unused */ 508238592Smm zc.zc_cleanup_fd = -1; 509238592Smm 510238592Smm error = ioctl(g_fd, ZFS_IOC_RECV, &zc); 511238592Smm if (error != 0) 512238592Smm error = errno; 513238592Smm 514238592Smmout: 515238592Smm if (packed != NULL) 516238592Smm fnvlist_pack_free(packed, size); 517238592Smm free((void*)(uintptr_t)zc.zc_nvlist_dst); 518238592Smm return (error); 519238592Smm} 520