libzfs_core.c revision 247882
1238592Smm/* 2238592Smm * CDDL HEADER START 3238592Smm * 4238592Smm * The contents of this file are subject to the terms of the 5238592Smm * Common Development and Distribution License (the "License"). 6238592Smm * You may not use this file except in compliance with the License. 7238592Smm * 8238592Smm * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9238592Smm * or http://www.opensolaris.org/os/licensing. 10238592Smm * See the License for the specific language governing permissions 11238592Smm * and limitations under the License. 12238592Smm * 13238592Smm * When distributing Covered Code, include this CDDL HEADER in each 14238592Smm * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15238592Smm * If applicable, add the following below this CDDL HEADER, with the 16238592Smm * fields enclosed by brackets "[]" replaced with your own identifying 17238592Smm * information: Portions Copyright [yyyy] [name of copyright owner] 18238592Smm * 19238592Smm * CDDL HEADER END 20238592Smm */ 21238592Smm 22238592Smm/* 23238592Smm * Copyright (c) 2012 by Delphix. All rights reserved. 24238592Smm */ 25238592Smm 26238592Smm/* 27238592Smm * LibZFS_Core (lzc) is intended to replace most functionality in libzfs. 28238592Smm * It has the following characteristics: 29238592Smm * 30238592Smm * - Thread Safe. libzfs_core is accessible concurrently from multiple 31238592Smm * threads. This is accomplished primarily by avoiding global data 32238592Smm * (e.g. caching). Since it's thread-safe, there is no reason for a 33238592Smm * process to have multiple libzfs "instances". Therefore, we store 34238592Smm * our few pieces of data (e.g. the file descriptor) in global 35238592Smm * variables. The fd is reference-counted so that the libzfs_core 36238592Smm * library can be "initialized" multiple times (e.g. by different 37238592Smm * consumers within the same process). 38238592Smm * 39238592Smm * - Committed Interface. The libzfs_core interface will be committed, 40238592Smm * therefore consumers can compile against it and be confident that 41238592Smm * their code will continue to work on future releases of this code. 42238592Smm * Currently, the interface is Evolving (not Committed), but we intend 43238592Smm * to commit to it once it is more complete and we determine that it 44238592Smm * meets the needs of all consumers. 45238592Smm * 46238592Smm * - Programatic Error Handling. libzfs_core communicates errors with 47238592Smm * defined error numbers, and doesn't print anything to stdout/stderr. 48238592Smm * 49238592Smm * - Thin Layer. libzfs_core is a thin layer, marshaling arguments 50238592Smm * to/from the kernel ioctls. There is generally a 1:1 correspondence 51238592Smm * between libzfs_core functions and ioctls to /dev/zfs. 52238592Smm * 53238592Smm * - Clear Atomicity. Because libzfs_core functions are generally 1:1 54238592Smm * with kernel ioctls, and kernel ioctls are general atomic, each 55238592Smm * libzfs_core function is atomic. For example, creating multiple 56238592Smm * snapshots with a single call to lzc_snapshot() is atomic -- it 57238592Smm * can't fail with only some of the requested snapshots created, even 58238592Smm * in the event of power loss or system crash. 59238592Smm * 60238592Smm * - Continued libzfs Support. Some higher-level operations (e.g. 61238592Smm * support for "zfs send -R") are too complicated to fit the scope of 62238592Smm * libzfs_core. This functionality will continue to live in libzfs. 63238592Smm * Where appropriate, libzfs will use the underlying atomic operations 64238592Smm * of libzfs_core. For example, libzfs may implement "zfs send -R | 65238592Smm * zfs receive" by using individual "send one snapshot", rename, 66238592Smm * destroy, and "receive one snapshot" operations in libzfs_core. 67238592Smm * /sbin/zfs and /zbin/zpool will link with both libzfs and 68238592Smm * libzfs_core. Other consumers should aim to use only libzfs_core, 69238592Smm * since that will be the supported, stable interface going forwards. 70238592Smm */ 71238592Smm 72247831Smm#define _IN_LIBZFS_CORE_ 73247831Smm 74238592Smm#include <libzfs_core.h> 75238592Smm#include <ctype.h> 76238592Smm#include <unistd.h> 77238592Smm#include <stdlib.h> 78238592Smm#include <string.h> 79238592Smm#include <errno.h> 80238592Smm#include <fcntl.h> 81238592Smm#include <pthread.h> 82238592Smm#include <sys/nvpair.h> 83238592Smm#include <sys/param.h> 84238592Smm#include <sys/types.h> 85238592Smm#include <sys/stat.h> 86238592Smm#include <sys/zfs_ioctl.h> 87247882Smm#include <libzfs_compat.h> 88238592Smm 89238592Smmstatic int g_fd; 90238592Smmstatic pthread_mutex_t g_lock = PTHREAD_MUTEX_INITIALIZER; 91238592Smmstatic int g_refcount; 92238592Smm 93238592Smmint 94238592Smmlibzfs_core_init(void) 95238592Smm{ 96238592Smm (void) pthread_mutex_lock(&g_lock); 97238592Smm if (g_refcount == 0) { 98238592Smm g_fd = open("/dev/zfs", O_RDWR); 99238592Smm if (g_fd < 0) { 100238592Smm (void) pthread_mutex_unlock(&g_lock); 101238592Smm return (errno); 102238592Smm } 103238592Smm } 104238592Smm g_refcount++; 105238592Smm (void) pthread_mutex_unlock(&g_lock); 106238592Smm return (0); 107238592Smm} 108238592Smm 109238592Smmvoid 110238592Smmlibzfs_core_fini(void) 111238592Smm{ 112238592Smm (void) pthread_mutex_lock(&g_lock); 113238592Smm ASSERT3S(g_refcount, >, 0); 114238592Smm g_refcount--; 115238592Smm if (g_refcount == 0) 116238592Smm (void) close(g_fd); 117238592Smm (void) pthread_mutex_unlock(&g_lock); 118238592Smm} 119238592Smm 120238592Smmstatic int 121238592Smmlzc_ioctl(zfs_ioc_t ioc, const char *name, 122238592Smm nvlist_t *source, nvlist_t **resultp) 123238592Smm{ 124238592Smm zfs_cmd_t zc = { 0 }; 125238592Smm int error = 0; 126238592Smm char *packed; 127238592Smm size_t size; 128238592Smm 129238592Smm ASSERT3S(g_refcount, >, 0); 130238592Smm 131238592Smm (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name)); 132238592Smm 133238592Smm packed = fnvlist_pack(source, &size); 134238592Smm zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed; 135238592Smm zc.zc_nvlist_src_size = size; 136238592Smm 137238592Smm if (resultp != NULL) { 138238592Smm zc.zc_nvlist_dst_size = MAX(size * 2, 128 * 1024); 139238592Smm zc.zc_nvlist_dst = (uint64_t)(uintptr_t) 140238592Smm malloc(zc.zc_nvlist_dst_size); 141247831Smm#ifdef illumos 142238592Smm if (zc.zc_nvlist_dst == NULL) { 143247831Smm#else 144247831Smm if (zc.zc_nvlist_dst == 0) { 145247831Smm#endif 146238592Smm error = ENOMEM; 147238592Smm goto out; 148238592Smm } 149238592Smm } 150238592Smm 151238592Smm while (ioctl(g_fd, ioc, &zc) != 0) { 152238592Smm if (errno == ENOMEM && resultp != NULL) { 153238592Smm free((void *)(uintptr_t)zc.zc_nvlist_dst); 154238592Smm zc.zc_nvlist_dst_size *= 2; 155238592Smm zc.zc_nvlist_dst = (uint64_t)(uintptr_t) 156238592Smm malloc(zc.zc_nvlist_dst_size); 157247831Smm#ifdef illumos 158238592Smm if (zc.zc_nvlist_dst == NULL) { 159247831Smm#else 160247831Smm if (zc.zc_nvlist_dst == 0) { 161247831Smm#endif 162238592Smm error = ENOMEM; 163238592Smm goto out; 164238592Smm } 165238592Smm } else { 166238592Smm error = errno; 167238592Smm break; 168238592Smm } 169238592Smm } 170238592Smm if (zc.zc_nvlist_dst_filled) { 171238592Smm *resultp = fnvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst, 172238592Smm zc.zc_nvlist_dst_size); 173238592Smm } else if (resultp != NULL) { 174238592Smm *resultp = NULL; 175238592Smm } 176238592Smm 177238592Smmout: 178238592Smm fnvlist_pack_free(packed, size); 179238592Smm free((void *)(uintptr_t)zc.zc_nvlist_dst); 180238592Smm return (error); 181238592Smm} 182238592Smm 183238592Smmint 184238592Smmlzc_create(const char *fsname, dmu_objset_type_t type, nvlist_t *props) 185238592Smm{ 186238592Smm int error; 187238592Smm nvlist_t *args = fnvlist_alloc(); 188238592Smm fnvlist_add_int32(args, "type", type); 189238592Smm if (props != NULL) 190238592Smm fnvlist_add_nvlist(args, "props", props); 191238592Smm error = lzc_ioctl(ZFS_IOC_CREATE, fsname, args, NULL); 192238592Smm nvlist_free(args); 193238592Smm return (error); 194238592Smm} 195238592Smm 196238592Smmint 197238592Smmlzc_clone(const char *fsname, const char *origin, 198238592Smm nvlist_t *props) 199238592Smm{ 200238592Smm int error; 201238592Smm nvlist_t *args = fnvlist_alloc(); 202238592Smm fnvlist_add_string(args, "origin", origin); 203238592Smm if (props != NULL) 204238592Smm fnvlist_add_nvlist(args, "props", props); 205238592Smm error = lzc_ioctl(ZFS_IOC_CLONE, fsname, args, NULL); 206238592Smm nvlist_free(args); 207238592Smm return (error); 208238592Smm} 209238592Smm 210238592Smm/* 211238592Smm * Creates snapshots. 212238592Smm * 213238592Smm * The keys in the snaps nvlist are the snapshots to be created. 214238592Smm * They must all be in the same pool. 215238592Smm * 216238592Smm * The props nvlist is properties to set. Currently only user properties 217238592Smm * are supported. { user:prop_name -> string value } 218238592Smm * 219238592Smm * The returned results nvlist will have an entry for each snapshot that failed. 220238592Smm * The value will be the (int32) error code. 221238592Smm * 222238592Smm * The return value will be 0 if all snapshots were created, otherwise it will 223238592Smm * be the errno of a (undetermined) snapshot that failed. 224238592Smm */ 225238592Smmint 226238592Smmlzc_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t **errlist) 227238592Smm{ 228238592Smm nvpair_t *elem; 229238592Smm nvlist_t *args; 230238592Smm int error; 231238592Smm char pool[MAXNAMELEN]; 232238592Smm 233238592Smm *errlist = NULL; 234238592Smm 235238592Smm /* determine the pool name */ 236238592Smm elem = nvlist_next_nvpair(snaps, NULL); 237238592Smm if (elem == NULL) 238238592Smm return (0); 239238592Smm (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); 240238592Smm pool[strcspn(pool, "/@")] = '\0'; 241238592Smm 242238592Smm args = fnvlist_alloc(); 243238592Smm fnvlist_add_nvlist(args, "snaps", snaps); 244238592Smm if (props != NULL) 245238592Smm fnvlist_add_nvlist(args, "props", props); 246238592Smm 247238592Smm error = lzc_ioctl(ZFS_IOC_SNAPSHOT, pool, args, errlist); 248238592Smm nvlist_free(args); 249238592Smm 250238592Smm return (error); 251238592Smm} 252238592Smm 253238592Smm/* 254238592Smm * Destroys snapshots. 255238592Smm * 256238592Smm * The keys in the snaps nvlist are the snapshots to be destroyed. 257238592Smm * They must all be in the same pool. 258238592Smm * 259238592Smm * Snapshots that do not exist will be silently ignored. 260238592Smm * 261238592Smm * If 'defer' is not set, and a snapshot has user holds or clones, the 262238592Smm * destroy operation will fail and none of the snapshots will be 263238592Smm * destroyed. 264238592Smm * 265238592Smm * If 'defer' is set, and a snapshot has user holds or clones, it will be 266238592Smm * marked for deferred destruction, and will be destroyed when the last hold 267238592Smm * or clone is removed/destroyed. 268238592Smm * 269238592Smm * The return value will be 0 if all snapshots were destroyed (or marked for 270238592Smm * later destruction if 'defer' is set) or didn't exist to begin with. 271238592Smm * 272238592Smm * Otherwise the return value will be the errno of a (undetermined) snapshot 273238592Smm * that failed, no snapshots will be destroyed, and the errlist will have an 274238592Smm * entry for each snapshot that failed. The value in the errlist will be 275238592Smm * the (int32) error code. 276238592Smm */ 277238592Smmint 278238592Smmlzc_destroy_snaps(nvlist_t *snaps, boolean_t defer, nvlist_t **errlist) 279238592Smm{ 280238592Smm nvpair_t *elem; 281238592Smm nvlist_t *args; 282238592Smm int error; 283238592Smm char pool[MAXNAMELEN]; 284238592Smm 285238592Smm /* determine the pool name */ 286238592Smm elem = nvlist_next_nvpair(snaps, NULL); 287238592Smm if (elem == NULL) 288238592Smm return (0); 289238592Smm (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); 290238592Smm pool[strcspn(pool, "/@")] = '\0'; 291238592Smm 292238592Smm args = fnvlist_alloc(); 293238592Smm fnvlist_add_nvlist(args, "snaps", snaps); 294238592Smm if (defer) 295238592Smm fnvlist_add_boolean(args, "defer"); 296238592Smm 297238592Smm error = lzc_ioctl(ZFS_IOC_DESTROY_SNAPS, pool, args, errlist); 298238592Smm nvlist_free(args); 299238592Smm 300238592Smm return (error); 301238592Smm 302238592Smm} 303238592Smm 304238592Smmint 305238592Smmlzc_snaprange_space(const char *firstsnap, const char *lastsnap, 306238592Smm uint64_t *usedp) 307238592Smm{ 308238592Smm nvlist_t *args; 309238592Smm nvlist_t *result; 310238592Smm int err; 311238592Smm char fs[MAXNAMELEN]; 312238592Smm char *atp; 313238592Smm 314238592Smm /* determine the fs name */ 315238592Smm (void) strlcpy(fs, firstsnap, sizeof (fs)); 316238592Smm atp = strchr(fs, '@'); 317238592Smm if (atp == NULL) 318238592Smm return (EINVAL); 319238592Smm *atp = '\0'; 320238592Smm 321238592Smm args = fnvlist_alloc(); 322238592Smm fnvlist_add_string(args, "firstsnap", firstsnap); 323238592Smm 324238592Smm err = lzc_ioctl(ZFS_IOC_SPACE_SNAPS, lastsnap, args, &result); 325238592Smm nvlist_free(args); 326238592Smm if (err == 0) 327238592Smm *usedp = fnvlist_lookup_uint64(result, "used"); 328238592Smm fnvlist_free(result); 329238592Smm 330238592Smm return (err); 331238592Smm} 332238592Smm 333238592Smmboolean_t 334238592Smmlzc_exists(const char *dataset) 335238592Smm{ 336238592Smm /* 337238592Smm * The objset_stats ioctl is still legacy, so we need to construct our 338238592Smm * own zfs_cmd_t rather than using zfsc_ioctl(). 339238592Smm */ 340238592Smm zfs_cmd_t zc = { 0 }; 341238592Smm 342238592Smm (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name)); 343238592Smm return (ioctl(g_fd, ZFS_IOC_OBJSET_STATS, &zc) == 0); 344238592Smm} 345238592Smm 346238592Smm/* 347238592Smm * If fromsnap is NULL, a full (non-incremental) stream will be sent. 348238592Smm */ 349238592Smmint 350238592Smmlzc_send(const char *snapname, const char *fromsnap, int fd) 351238592Smm{ 352238592Smm nvlist_t *args; 353238592Smm int err; 354238592Smm 355238592Smm args = fnvlist_alloc(); 356238592Smm fnvlist_add_int32(args, "fd", fd); 357238592Smm if (fromsnap != NULL) 358238592Smm fnvlist_add_string(args, "fromsnap", fromsnap); 359238592Smm err = lzc_ioctl(ZFS_IOC_SEND_NEW, snapname, args, NULL); 360238592Smm nvlist_free(args); 361238592Smm return (err); 362238592Smm} 363238592Smm 364238592Smm/* 365238592Smm * If fromsnap is NULL, a full (non-incremental) stream will be estimated. 366238592Smm */ 367238592Smmint 368238592Smmlzc_send_space(const char *snapname, const char *fromsnap, uint64_t *spacep) 369238592Smm{ 370238592Smm nvlist_t *args; 371238592Smm nvlist_t *result; 372238592Smm int err; 373238592Smm 374238592Smm args = fnvlist_alloc(); 375238592Smm if (fromsnap != NULL) 376238592Smm fnvlist_add_string(args, "fromsnap", fromsnap); 377238592Smm err = lzc_ioctl(ZFS_IOC_SEND_SPACE, snapname, args, &result); 378238592Smm nvlist_free(args); 379238592Smm if (err == 0) 380238592Smm *spacep = fnvlist_lookup_uint64(result, "space"); 381238592Smm nvlist_free(result); 382238592Smm return (err); 383238592Smm} 384238592Smm 385238592Smmstatic int 386238592Smmrecv_read(int fd, void *buf, int ilen) 387238592Smm{ 388238592Smm char *cp = buf; 389238592Smm int rv; 390238592Smm int len = ilen; 391238592Smm 392238592Smm do { 393238592Smm rv = read(fd, cp, len); 394238592Smm cp += rv; 395238592Smm len -= rv; 396238592Smm } while (rv > 0); 397238592Smm 398238592Smm if (rv < 0 || len != 0) 399238592Smm return (EIO); 400238592Smm 401238592Smm return (0); 402238592Smm} 403238592Smm 404238592Smm/* 405238592Smm * The simplest receive case: receive from the specified fd, creating the 406238592Smm * specified snapshot. Apply the specified properties a "received" properties 407238592Smm * (which can be overridden by locally-set properties). If the stream is a 408238592Smm * clone, its origin snapshot must be specified by 'origin'. The 'force' 409238592Smm * flag will cause the target filesystem to be rolled back or destroyed if 410238592Smm * necessary to receive. 411238592Smm * 412238592Smm * Return 0 on success or an errno on failure. 413238592Smm * 414238592Smm * Note: this interface does not work on dedup'd streams 415238592Smm * (those with DMU_BACKUP_FEATURE_DEDUP). 416238592Smm */ 417238592Smmint 418238592Smmlzc_receive(const char *snapname, nvlist_t *props, const char *origin, 419238592Smm boolean_t force, int fd) 420238592Smm{ 421238592Smm /* 422238592Smm * The receive ioctl is still legacy, so we need to construct our own 423238592Smm * zfs_cmd_t rather than using zfsc_ioctl(). 424238592Smm */ 425238592Smm zfs_cmd_t zc = { 0 }; 426238592Smm char *atp; 427238592Smm char *packed = NULL; 428238592Smm size_t size; 429238592Smm dmu_replay_record_t drr; 430238592Smm int error; 431238592Smm 432238592Smm ASSERT3S(g_refcount, >, 0); 433238592Smm 434238592Smm /* zc_name is name of containing filesystem */ 435238592Smm (void) strlcpy(zc.zc_name, snapname, sizeof (zc.zc_name)); 436238592Smm atp = strchr(zc.zc_name, '@'); 437238592Smm if (atp == NULL) 438238592Smm return (EINVAL); 439238592Smm *atp = '\0'; 440238592Smm 441238592Smm /* if the fs does not exist, try its parent. */ 442238592Smm if (!lzc_exists(zc.zc_name)) { 443238592Smm char *slashp = strrchr(zc.zc_name, '/'); 444238592Smm if (slashp == NULL) 445238592Smm return (ENOENT); 446238592Smm *slashp = '\0'; 447238592Smm 448238592Smm } 449238592Smm 450238592Smm /* zc_value is full name of the snapshot to create */ 451238592Smm (void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value)); 452238592Smm 453238592Smm if (props != NULL) { 454238592Smm /* zc_nvlist_src is props to set */ 455238592Smm packed = fnvlist_pack(props, &size); 456238592Smm zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed; 457238592Smm zc.zc_nvlist_src_size = size; 458238592Smm } 459238592Smm 460238592Smm /* zc_string is name of clone origin (if DRR_FLAG_CLONE) */ 461238592Smm if (origin != NULL) 462238592Smm (void) strlcpy(zc.zc_string, origin, sizeof (zc.zc_string)); 463238592Smm 464238592Smm /* zc_begin_record is non-byteswapped BEGIN record */ 465238592Smm error = recv_read(fd, &drr, sizeof (drr)); 466238592Smm if (error != 0) 467238592Smm goto out; 468238592Smm zc.zc_begin_record = drr.drr_u.drr_begin; 469238592Smm 470238592Smm /* zc_cookie is fd to read from */ 471238592Smm zc.zc_cookie = fd; 472238592Smm 473238592Smm /* zc guid is force flag */ 474238592Smm zc.zc_guid = force; 475238592Smm 476238592Smm /* zc_cleanup_fd is unused */ 477238592Smm zc.zc_cleanup_fd = -1; 478238592Smm 479238592Smm error = ioctl(g_fd, ZFS_IOC_RECV, &zc); 480238592Smm if (error != 0) 481238592Smm error = errno; 482238592Smm 483238592Smmout: 484238592Smm if (packed != NULL) 485238592Smm fnvlist_pack_free(packed, size); 486238592Smm free((void*)(uintptr_t)zc.zc_nvlist_dst); 487238592Smm return (error); 488238592Smm} 489