libzfs_core.c revision 247831
1238592Smm/*
2238592Smm * CDDL HEADER START
3238592Smm *
4238592Smm * The contents of this file are subject to the terms of the
5238592Smm * Common Development and Distribution License (the "License").
6238592Smm * You may not use this file except in compliance with the License.
7238592Smm *
8238592Smm * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9238592Smm * or http://www.opensolaris.org/os/licensing.
10238592Smm * See the License for the specific language governing permissions
11238592Smm * and limitations under the License.
12238592Smm *
13238592Smm * When distributing Covered Code, include this CDDL HEADER in each
14238592Smm * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15238592Smm * If applicable, add the following below this CDDL HEADER, with the
16238592Smm * fields enclosed by brackets "[]" replaced with your own identifying
17238592Smm * information: Portions Copyright [yyyy] [name of copyright owner]
18238592Smm *
19238592Smm * CDDL HEADER END
20238592Smm */
21238592Smm
22238592Smm/*
23238592Smm * Copyright (c) 2012 by Delphix. All rights reserved.
24238592Smm */
25238592Smm
26238592Smm/*
27238592Smm * LibZFS_Core (lzc) is intended to replace most functionality in libzfs.
28238592Smm * It has the following characteristics:
29238592Smm *
30238592Smm *  - Thread Safe.  libzfs_core is accessible concurrently from multiple
31238592Smm *  threads.  This is accomplished primarily by avoiding global data
32238592Smm *  (e.g. caching).  Since it's thread-safe, there is no reason for a
33238592Smm *  process to have multiple libzfs "instances".  Therefore, we store
34238592Smm *  our few pieces of data (e.g. the file descriptor) in global
35238592Smm *  variables.  The fd is reference-counted so that the libzfs_core
36238592Smm *  library can be "initialized" multiple times (e.g. by different
37238592Smm *  consumers within the same process).
38238592Smm *
39238592Smm *  - Committed Interface.  The libzfs_core interface will be committed,
40238592Smm *  therefore consumers can compile against it and be confident that
41238592Smm *  their code will continue to work on future releases of this code.
42238592Smm *  Currently, the interface is Evolving (not Committed), but we intend
43238592Smm *  to commit to it once it is more complete and we determine that it
44238592Smm *  meets the needs of all consumers.
45238592Smm *
46238592Smm *  - Programatic Error Handling.  libzfs_core communicates errors with
47238592Smm *  defined error numbers, and doesn't print anything to stdout/stderr.
48238592Smm *
49238592Smm *  - Thin Layer.  libzfs_core is a thin layer, marshaling arguments
50238592Smm *  to/from the kernel ioctls.  There is generally a 1:1 correspondence
51238592Smm *  between libzfs_core functions and ioctls to /dev/zfs.
52238592Smm *
53238592Smm *  - Clear Atomicity.  Because libzfs_core functions are generally 1:1
54238592Smm *  with kernel ioctls, and kernel ioctls are general atomic, each
55238592Smm *  libzfs_core function is atomic.  For example, creating multiple
56238592Smm *  snapshots with a single call to lzc_snapshot() is atomic -- it
57238592Smm *  can't fail with only some of the requested snapshots created, even
58238592Smm *  in the event of power loss or system crash.
59238592Smm *
60238592Smm *  - Continued libzfs Support.  Some higher-level operations (e.g.
61238592Smm *  support for "zfs send -R") are too complicated to fit the scope of
62238592Smm *  libzfs_core.  This functionality will continue to live in libzfs.
63238592Smm *  Where appropriate, libzfs will use the underlying atomic operations
64238592Smm *  of libzfs_core.  For example, libzfs may implement "zfs send -R |
65238592Smm *  zfs receive" by using individual "send one snapshot", rename,
66238592Smm *  destroy, and "receive one snapshot" operations in libzfs_core.
67238592Smm *  /sbin/zfs and /zbin/zpool will link with both libzfs and
68238592Smm *  libzfs_core.  Other consumers should aim to use only libzfs_core,
69238592Smm *  since that will be the supported, stable interface going forwards.
70238592Smm */
71238592Smm
72247831Smm#define _IN_LIBZFS_CORE_
73247831Smm
74238592Smm#include <libzfs_core.h>
75238592Smm#include <ctype.h>
76238592Smm#include <unistd.h>
77238592Smm#include <stdlib.h>
78238592Smm#include <string.h>
79238592Smm#include <errno.h>
80238592Smm#include <fcntl.h>
81238592Smm#include <pthread.h>
82238592Smm#include <sys/nvpair.h>
83238592Smm#include <sys/param.h>
84238592Smm#include <sys/types.h>
85238592Smm#include <sys/stat.h>
86238592Smm#include <sys/zfs_ioctl.h>
87247831Smm#include <libzfs_impl.h>
88238592Smm
89238592Smmstatic int g_fd;
90238592Smmstatic pthread_mutex_t g_lock = PTHREAD_MUTEX_INITIALIZER;
91238592Smmstatic int g_refcount;
92238592Smm
93238592Smmint
94238592Smmlibzfs_core_init(void)
95238592Smm{
96238592Smm	(void) pthread_mutex_lock(&g_lock);
97238592Smm	if (g_refcount == 0) {
98238592Smm		g_fd = open("/dev/zfs", O_RDWR);
99238592Smm		if (g_fd < 0) {
100238592Smm			(void) pthread_mutex_unlock(&g_lock);
101238592Smm			return (errno);
102238592Smm		}
103238592Smm	}
104238592Smm	g_refcount++;
105238592Smm	(void) pthread_mutex_unlock(&g_lock);
106238592Smm	return (0);
107238592Smm}
108238592Smm
109238592Smmvoid
110238592Smmlibzfs_core_fini(void)
111238592Smm{
112238592Smm	(void) pthread_mutex_lock(&g_lock);
113238592Smm	ASSERT3S(g_refcount, >, 0);
114238592Smm	g_refcount--;
115238592Smm	if (g_refcount == 0)
116238592Smm		(void) close(g_fd);
117238592Smm	(void) pthread_mutex_unlock(&g_lock);
118238592Smm}
119238592Smm
120238592Smmstatic int
121238592Smmlzc_ioctl(zfs_ioc_t ioc, const char *name,
122238592Smm    nvlist_t *source, nvlist_t **resultp)
123238592Smm{
124238592Smm	zfs_cmd_t zc = { 0 };
125238592Smm	int error = 0;
126238592Smm	char *packed;
127238592Smm	size_t size;
128238592Smm
129238592Smm	ASSERT3S(g_refcount, >, 0);
130238592Smm
131238592Smm	(void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
132238592Smm
133238592Smm	packed = fnvlist_pack(source, &size);
134238592Smm	zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed;
135238592Smm	zc.zc_nvlist_src_size = size;
136238592Smm
137238592Smm	if (resultp != NULL) {
138238592Smm		zc.zc_nvlist_dst_size = MAX(size * 2, 128 * 1024);
139238592Smm		zc.zc_nvlist_dst = (uint64_t)(uintptr_t)
140238592Smm		    malloc(zc.zc_nvlist_dst_size);
141247831Smm#ifdef illumos
142238592Smm		if (zc.zc_nvlist_dst == NULL) {
143247831Smm#else
144247831Smm		if (zc.zc_nvlist_dst == 0) {
145247831Smm#endif
146238592Smm			error = ENOMEM;
147238592Smm			goto out;
148238592Smm		}
149238592Smm	}
150238592Smm
151238592Smm	while (ioctl(g_fd, ioc, &zc) != 0) {
152238592Smm		if (errno == ENOMEM && resultp != NULL) {
153238592Smm			free((void *)(uintptr_t)zc.zc_nvlist_dst);
154238592Smm			zc.zc_nvlist_dst_size *= 2;
155238592Smm			zc.zc_nvlist_dst = (uint64_t)(uintptr_t)
156238592Smm			    malloc(zc.zc_nvlist_dst_size);
157247831Smm#ifdef illumos
158238592Smm			if (zc.zc_nvlist_dst == NULL) {
159247831Smm#else
160247831Smm			if (zc.zc_nvlist_dst == 0) {
161247831Smm#endif
162238592Smm				error = ENOMEM;
163238592Smm				goto out;
164238592Smm			}
165238592Smm		} else {
166238592Smm			error = errno;
167238592Smm			break;
168238592Smm		}
169238592Smm	}
170238592Smm	if (zc.zc_nvlist_dst_filled) {
171238592Smm		*resultp = fnvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst,
172238592Smm		    zc.zc_nvlist_dst_size);
173238592Smm	} else if (resultp != NULL) {
174238592Smm		*resultp = NULL;
175238592Smm	}
176238592Smm
177238592Smmout:
178238592Smm	fnvlist_pack_free(packed, size);
179238592Smm	free((void *)(uintptr_t)zc.zc_nvlist_dst);
180238592Smm	return (error);
181238592Smm}
182238592Smm
183238592Smmint
184238592Smmlzc_create(const char *fsname, dmu_objset_type_t type, nvlist_t *props)
185238592Smm{
186238592Smm	int error;
187238592Smm	nvlist_t *args = fnvlist_alloc();
188238592Smm	fnvlist_add_int32(args, "type", type);
189238592Smm	if (props != NULL)
190238592Smm		fnvlist_add_nvlist(args, "props", props);
191238592Smm	error = lzc_ioctl(ZFS_IOC_CREATE, fsname, args, NULL);
192238592Smm	nvlist_free(args);
193238592Smm	return (error);
194238592Smm}
195238592Smm
196238592Smmint
197238592Smmlzc_clone(const char *fsname, const char *origin,
198238592Smm    nvlist_t *props)
199238592Smm{
200238592Smm	int error;
201238592Smm	nvlist_t *args = fnvlist_alloc();
202238592Smm	fnvlist_add_string(args, "origin", origin);
203238592Smm	if (props != NULL)
204238592Smm		fnvlist_add_nvlist(args, "props", props);
205238592Smm	error = lzc_ioctl(ZFS_IOC_CLONE, fsname, args, NULL);
206238592Smm	nvlist_free(args);
207238592Smm	return (error);
208238592Smm}
209238592Smm
210238592Smm/*
211238592Smm * Creates snapshots.
212238592Smm *
213238592Smm * The keys in the snaps nvlist are the snapshots to be created.
214238592Smm * They must all be in the same pool.
215238592Smm *
216238592Smm * The props nvlist is properties to set.  Currently only user properties
217238592Smm * are supported.  { user:prop_name -> string value }
218238592Smm *
219238592Smm * The returned results nvlist will have an entry for each snapshot that failed.
220238592Smm * The value will be the (int32) error code.
221238592Smm *
222238592Smm * The return value will be 0 if all snapshots were created, otherwise it will
223238592Smm * be the errno of a (undetermined) snapshot that failed.
224238592Smm */
225238592Smmint
226238592Smmlzc_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t **errlist)
227238592Smm{
228238592Smm	nvpair_t *elem;
229238592Smm	nvlist_t *args;
230238592Smm	int error;
231238592Smm	char pool[MAXNAMELEN];
232238592Smm
233238592Smm	*errlist = NULL;
234238592Smm
235238592Smm	/* determine the pool name */
236238592Smm	elem = nvlist_next_nvpair(snaps, NULL);
237238592Smm	if (elem == NULL)
238238592Smm		return (0);
239238592Smm	(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
240238592Smm	pool[strcspn(pool, "/@")] = '\0';
241238592Smm
242238592Smm	args = fnvlist_alloc();
243238592Smm	fnvlist_add_nvlist(args, "snaps", snaps);
244238592Smm	if (props != NULL)
245238592Smm		fnvlist_add_nvlist(args, "props", props);
246238592Smm
247238592Smm	error = lzc_ioctl(ZFS_IOC_SNAPSHOT, pool, args, errlist);
248238592Smm	nvlist_free(args);
249238592Smm
250238592Smm	return (error);
251238592Smm}
252238592Smm
253238592Smm/*
254238592Smm * Destroys snapshots.
255238592Smm *
256238592Smm * The keys in the snaps nvlist are the snapshots to be destroyed.
257238592Smm * They must all be in the same pool.
258238592Smm *
259238592Smm * Snapshots that do not exist will be silently ignored.
260238592Smm *
261238592Smm * If 'defer' is not set, and a snapshot has user holds or clones, the
262238592Smm * destroy operation will fail and none of the snapshots will be
263238592Smm * destroyed.
264238592Smm *
265238592Smm * If 'defer' is set, and a snapshot has user holds or clones, it will be
266238592Smm * marked for deferred destruction, and will be destroyed when the last hold
267238592Smm * or clone is removed/destroyed.
268238592Smm *
269238592Smm * The return value will be 0 if all snapshots were destroyed (or marked for
270238592Smm * later destruction if 'defer' is set) or didn't exist to begin with.
271238592Smm *
272238592Smm * Otherwise the return value will be the errno of a (undetermined) snapshot
273238592Smm * that failed, no snapshots will be destroyed, and the errlist will have an
274238592Smm * entry for each snapshot that failed.  The value in the errlist will be
275238592Smm * the (int32) error code.
276238592Smm */
277238592Smmint
278238592Smmlzc_destroy_snaps(nvlist_t *snaps, boolean_t defer, nvlist_t **errlist)
279238592Smm{
280238592Smm	nvpair_t *elem;
281238592Smm	nvlist_t *args;
282238592Smm	int error;
283238592Smm	char pool[MAXNAMELEN];
284238592Smm
285238592Smm	/* determine the pool name */
286238592Smm	elem = nvlist_next_nvpair(snaps, NULL);
287238592Smm	if (elem == NULL)
288238592Smm		return (0);
289238592Smm	(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
290238592Smm	pool[strcspn(pool, "/@")] = '\0';
291238592Smm
292238592Smm	args = fnvlist_alloc();
293238592Smm	fnvlist_add_nvlist(args, "snaps", snaps);
294238592Smm	if (defer)
295238592Smm		fnvlist_add_boolean(args, "defer");
296238592Smm
297238592Smm	error = lzc_ioctl(ZFS_IOC_DESTROY_SNAPS, pool, args, errlist);
298238592Smm	nvlist_free(args);
299238592Smm
300238592Smm	return (error);
301238592Smm
302238592Smm}
303238592Smm
304238592Smmint
305238592Smmlzc_snaprange_space(const char *firstsnap, const char *lastsnap,
306238592Smm    uint64_t *usedp)
307238592Smm{
308238592Smm	nvlist_t *args;
309238592Smm	nvlist_t *result;
310238592Smm	int err;
311238592Smm	char fs[MAXNAMELEN];
312238592Smm	char *atp;
313238592Smm
314238592Smm	/* determine the fs name */
315238592Smm	(void) strlcpy(fs, firstsnap, sizeof (fs));
316238592Smm	atp = strchr(fs, '@');
317238592Smm	if (atp == NULL)
318238592Smm		return (EINVAL);
319238592Smm	*atp = '\0';
320238592Smm
321238592Smm	args = fnvlist_alloc();
322238592Smm	fnvlist_add_string(args, "firstsnap", firstsnap);
323238592Smm
324238592Smm	err = lzc_ioctl(ZFS_IOC_SPACE_SNAPS, lastsnap, args, &result);
325238592Smm	nvlist_free(args);
326238592Smm	if (err == 0)
327238592Smm		*usedp = fnvlist_lookup_uint64(result, "used");
328238592Smm	fnvlist_free(result);
329238592Smm
330238592Smm	return (err);
331238592Smm}
332238592Smm
333238592Smmboolean_t
334238592Smmlzc_exists(const char *dataset)
335238592Smm{
336238592Smm	/*
337238592Smm	 * The objset_stats ioctl is still legacy, so we need to construct our
338238592Smm	 * own zfs_cmd_t rather than using zfsc_ioctl().
339238592Smm	 */
340238592Smm	zfs_cmd_t zc = { 0 };
341238592Smm
342238592Smm	(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
343238592Smm	return (ioctl(g_fd, ZFS_IOC_OBJSET_STATS, &zc) == 0);
344238592Smm}
345238592Smm
346238592Smm/*
347238592Smm * If fromsnap is NULL, a full (non-incremental) stream will be sent.
348238592Smm */
349238592Smmint
350238592Smmlzc_send(const char *snapname, const char *fromsnap, int fd)
351238592Smm{
352238592Smm	nvlist_t *args;
353238592Smm	int err;
354238592Smm
355238592Smm	args = fnvlist_alloc();
356238592Smm	fnvlist_add_int32(args, "fd", fd);
357238592Smm	if (fromsnap != NULL)
358238592Smm		fnvlist_add_string(args, "fromsnap", fromsnap);
359238592Smm	err = lzc_ioctl(ZFS_IOC_SEND_NEW, snapname, args, NULL);
360238592Smm	nvlist_free(args);
361238592Smm	return (err);
362238592Smm}
363238592Smm
364238592Smm/*
365238592Smm * If fromsnap is NULL, a full (non-incremental) stream will be estimated.
366238592Smm */
367238592Smmint
368238592Smmlzc_send_space(const char *snapname, const char *fromsnap, uint64_t *spacep)
369238592Smm{
370238592Smm	nvlist_t *args;
371238592Smm	nvlist_t *result;
372238592Smm	int err;
373238592Smm
374238592Smm	args = fnvlist_alloc();
375238592Smm	if (fromsnap != NULL)
376238592Smm		fnvlist_add_string(args, "fromsnap", fromsnap);
377238592Smm	err = lzc_ioctl(ZFS_IOC_SEND_SPACE, snapname, args, &result);
378238592Smm	nvlist_free(args);
379238592Smm	if (err == 0)
380238592Smm		*spacep = fnvlist_lookup_uint64(result, "space");
381238592Smm	nvlist_free(result);
382238592Smm	return (err);
383238592Smm}
384238592Smm
385238592Smmstatic int
386238592Smmrecv_read(int fd, void *buf, int ilen)
387238592Smm{
388238592Smm	char *cp = buf;
389238592Smm	int rv;
390238592Smm	int len = ilen;
391238592Smm
392238592Smm	do {
393238592Smm		rv = read(fd, cp, len);
394238592Smm		cp += rv;
395238592Smm		len -= rv;
396238592Smm	} while (rv > 0);
397238592Smm
398238592Smm	if (rv < 0 || len != 0)
399238592Smm		return (EIO);
400238592Smm
401238592Smm	return (0);
402238592Smm}
403238592Smm
404238592Smm/*
405238592Smm * The simplest receive case: receive from the specified fd, creating the
406238592Smm * specified snapshot.  Apply the specified properties a "received" properties
407238592Smm * (which can be overridden by locally-set properties).  If the stream is a
408238592Smm * clone, its origin snapshot must be specified by 'origin'.  The 'force'
409238592Smm * flag will cause the target filesystem to be rolled back or destroyed if
410238592Smm * necessary to receive.
411238592Smm *
412238592Smm * Return 0 on success or an errno on failure.
413238592Smm *
414238592Smm * Note: this interface does not work on dedup'd streams
415238592Smm * (those with DMU_BACKUP_FEATURE_DEDUP).
416238592Smm */
417238592Smmint
418238592Smmlzc_receive(const char *snapname, nvlist_t *props, const char *origin,
419238592Smm    boolean_t force, int fd)
420238592Smm{
421238592Smm	/*
422238592Smm	 * The receive ioctl is still legacy, so we need to construct our own
423238592Smm	 * zfs_cmd_t rather than using zfsc_ioctl().
424238592Smm	 */
425238592Smm	zfs_cmd_t zc = { 0 };
426238592Smm	char *atp;
427238592Smm	char *packed = NULL;
428238592Smm	size_t size;
429238592Smm	dmu_replay_record_t drr;
430238592Smm	int error;
431238592Smm
432238592Smm	ASSERT3S(g_refcount, >, 0);
433238592Smm
434238592Smm	/* zc_name is name of containing filesystem */
435238592Smm	(void) strlcpy(zc.zc_name, snapname, sizeof (zc.zc_name));
436238592Smm	atp = strchr(zc.zc_name, '@');
437238592Smm	if (atp == NULL)
438238592Smm		return (EINVAL);
439238592Smm	*atp = '\0';
440238592Smm
441238592Smm	/* if the fs does not exist, try its parent. */
442238592Smm	if (!lzc_exists(zc.zc_name)) {
443238592Smm		char *slashp = strrchr(zc.zc_name, '/');
444238592Smm		if (slashp == NULL)
445238592Smm			return (ENOENT);
446238592Smm		*slashp = '\0';
447238592Smm
448238592Smm	}
449238592Smm
450238592Smm	/* zc_value is full name of the snapshot to create */
451238592Smm	(void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value));
452238592Smm
453238592Smm	if (props != NULL) {
454238592Smm		/* zc_nvlist_src is props to set */
455238592Smm		packed = fnvlist_pack(props, &size);
456238592Smm		zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed;
457238592Smm		zc.zc_nvlist_src_size = size;
458238592Smm	}
459238592Smm
460238592Smm	/* zc_string is name of clone origin (if DRR_FLAG_CLONE) */
461238592Smm	if (origin != NULL)
462238592Smm		(void) strlcpy(zc.zc_string, origin, sizeof (zc.zc_string));
463238592Smm
464238592Smm	/* zc_begin_record is non-byteswapped BEGIN record */
465238592Smm	error = recv_read(fd, &drr, sizeof (drr));
466238592Smm	if (error != 0)
467238592Smm		goto out;
468238592Smm	zc.zc_begin_record = drr.drr_u.drr_begin;
469238592Smm
470238592Smm	/* zc_cookie is fd to read from */
471238592Smm	zc.zc_cookie = fd;
472238592Smm
473238592Smm	/* zc guid is force flag */
474238592Smm	zc.zc_guid = force;
475238592Smm
476238592Smm	/* zc_cleanup_fd is unused */
477238592Smm	zc.zc_cleanup_fd = -1;
478238592Smm
479238592Smm	error = ioctl(g_fd, ZFS_IOC_RECV, &zc);
480238592Smm	if (error != 0)
481238592Smm		error = errno;
482238592Smm
483238592Smmout:
484238592Smm	if (packed != NULL)
485238592Smm		fnvlist_pack_free(packed, size);
486238592Smm	free((void*)(uintptr_t)zc.zc_nvlist_dst);
487238592Smm	return (error);
488238592Smm}
489