libzfs_core.c revision 248435
1238592Smm/*
2238592Smm * CDDL HEADER START
3238592Smm *
4238592Smm * The contents of this file are subject to the terms of the
5238592Smm * Common Development and Distribution License (the "License").
6238592Smm * You may not use this file except in compliance with the License.
7238592Smm *
8238592Smm * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9238592Smm * or http://www.opensolaris.org/os/licensing.
10238592Smm * See the License for the specific language governing permissions
11238592Smm * and limitations under the License.
12238592Smm *
13238592Smm * When distributing Covered Code, include this CDDL HEADER in each
14238592Smm * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15238592Smm * If applicable, add the following below this CDDL HEADER, with the
16238592Smm * fields enclosed by brackets "[]" replaced with your own identifying
17238592Smm * information: Portions Copyright [yyyy] [name of copyright owner]
18238592Smm *
19238592Smm * CDDL HEADER END
20238592Smm */
21238592Smm
22238592Smm/*
23238592Smm * Copyright (c) 2012 by Delphix. All rights reserved.
24238592Smm */
25238592Smm
26238592Smm/*
27238592Smm * LibZFS_Core (lzc) is intended to replace most functionality in libzfs.
28238592Smm * It has the following characteristics:
29238592Smm *
30238592Smm *  - Thread Safe.  libzfs_core is accessible concurrently from multiple
31238592Smm *  threads.  This is accomplished primarily by avoiding global data
32238592Smm *  (e.g. caching).  Since it's thread-safe, there is no reason for a
33238592Smm *  process to have multiple libzfs "instances".  Therefore, we store
34238592Smm *  our few pieces of data (e.g. the file descriptor) in global
35238592Smm *  variables.  The fd is reference-counted so that the libzfs_core
36238592Smm *  library can be "initialized" multiple times (e.g. by different
37238592Smm *  consumers within the same process).
38238592Smm *
39238592Smm *  - Committed Interface.  The libzfs_core interface will be committed,
40238592Smm *  therefore consumers can compile against it and be confident that
41238592Smm *  their code will continue to work on future releases of this code.
42238592Smm *  Currently, the interface is Evolving (not Committed), but we intend
43238592Smm *  to commit to it once it is more complete and we determine that it
44238592Smm *  meets the needs of all consumers.
45238592Smm *
46238592Smm *  - Programatic Error Handling.  libzfs_core communicates errors with
47238592Smm *  defined error numbers, and doesn't print anything to stdout/stderr.
48238592Smm *
49238592Smm *  - Thin Layer.  libzfs_core is a thin layer, marshaling arguments
50238592Smm *  to/from the kernel ioctls.  There is generally a 1:1 correspondence
51238592Smm *  between libzfs_core functions and ioctls to /dev/zfs.
52238592Smm *
53238592Smm *  - Clear Atomicity.  Because libzfs_core functions are generally 1:1
54238592Smm *  with kernel ioctls, and kernel ioctls are general atomic, each
55238592Smm *  libzfs_core function is atomic.  For example, creating multiple
56238592Smm *  snapshots with a single call to lzc_snapshot() is atomic -- it
57238592Smm *  can't fail with only some of the requested snapshots created, even
58238592Smm *  in the event of power loss or system crash.
59238592Smm *
60238592Smm *  - Continued libzfs Support.  Some higher-level operations (e.g.
61238592Smm *  support for "zfs send -R") are too complicated to fit the scope of
62238592Smm *  libzfs_core.  This functionality will continue to live in libzfs.
63238592Smm *  Where appropriate, libzfs will use the underlying atomic operations
64238592Smm *  of libzfs_core.  For example, libzfs may implement "zfs send -R |
65238592Smm *  zfs receive" by using individual "send one snapshot", rename,
66238592Smm *  destroy, and "receive one snapshot" operations in libzfs_core.
67238592Smm *  /sbin/zfs and /zbin/zpool will link with both libzfs and
68238592Smm *  libzfs_core.  Other consumers should aim to use only libzfs_core,
69238592Smm *  since that will be the supported, stable interface going forwards.
70238592Smm */
71238592Smm
72247831Smm#define _IN_LIBZFS_CORE_
73247831Smm
74238592Smm#include <libzfs_core.h>
75238592Smm#include <ctype.h>
76238592Smm#include <unistd.h>
77238592Smm#include <stdlib.h>
78238592Smm#include <string.h>
79238592Smm#include <errno.h>
80238592Smm#include <fcntl.h>
81238592Smm#include <pthread.h>
82238592Smm#include <sys/nvpair.h>
83238592Smm#include <sys/param.h>
84238592Smm#include <sys/types.h>
85238592Smm#include <sys/stat.h>
86238592Smm#include <sys/zfs_ioctl.h>
87247882Smm#include <libzfs_compat.h>
88238592Smm
89248435Smm#ifdef __FreeBSD__
90248435Smmextern int zfs_ioctl_version;
91248435Smm#endif
92248435Smm
93238592Smmstatic int g_fd;
94238592Smmstatic pthread_mutex_t g_lock = PTHREAD_MUTEX_INITIALIZER;
95238592Smmstatic int g_refcount;
96238592Smm
97238592Smmint
98238592Smmlibzfs_core_init(void)
99238592Smm{
100238592Smm	(void) pthread_mutex_lock(&g_lock);
101238592Smm	if (g_refcount == 0) {
102238592Smm		g_fd = open("/dev/zfs", O_RDWR);
103238592Smm		if (g_fd < 0) {
104238592Smm			(void) pthread_mutex_unlock(&g_lock);
105238592Smm			return (errno);
106238592Smm		}
107238592Smm	}
108238592Smm	g_refcount++;
109238592Smm	(void) pthread_mutex_unlock(&g_lock);
110238592Smm	return (0);
111238592Smm}
112238592Smm
113238592Smmvoid
114238592Smmlibzfs_core_fini(void)
115238592Smm{
116238592Smm	(void) pthread_mutex_lock(&g_lock);
117238592Smm	ASSERT3S(g_refcount, >, 0);
118238592Smm	g_refcount--;
119238592Smm	if (g_refcount == 0)
120238592Smm		(void) close(g_fd);
121238592Smm	(void) pthread_mutex_unlock(&g_lock);
122238592Smm}
123238592Smm
124238592Smmstatic int
125238592Smmlzc_ioctl(zfs_ioc_t ioc, const char *name,
126238592Smm    nvlist_t *source, nvlist_t **resultp)
127238592Smm{
128238592Smm	zfs_cmd_t zc = { 0 };
129238592Smm	int error = 0;
130238592Smm	char *packed;
131248435Smm#ifdef __FreeBSD__
132248435Smm	nvlist_t *oldsource;
133248435Smm#endif
134238592Smm	size_t size;
135238592Smm
136238592Smm	ASSERT3S(g_refcount, >, 0);
137238592Smm
138238592Smm	(void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
139238592Smm
140248435Smm#ifdef __FreeBSD__
141248435Smm	if (zfs_ioctl_version < ZFS_IOCVER_LZC) {
142248435Smm		oldsource = source;
143248435Smm		error = lzc_compat_pre(&zc, &ioc, &source);
144248435Smm		if (error)
145248435Smm			return (error);
146248435Smm	}
147248435Smm#endif
148248435Smm
149238592Smm	packed = fnvlist_pack(source, &size);
150238592Smm	zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed;
151238592Smm	zc.zc_nvlist_src_size = size;
152238592Smm
153238592Smm	if (resultp != NULL) {
154238592Smm		zc.zc_nvlist_dst_size = MAX(size * 2, 128 * 1024);
155238592Smm		zc.zc_nvlist_dst = (uint64_t)(uintptr_t)
156238592Smm		    malloc(zc.zc_nvlist_dst_size);
157247831Smm#ifdef illumos
158238592Smm		if (zc.zc_nvlist_dst == NULL) {
159247831Smm#else
160247831Smm		if (zc.zc_nvlist_dst == 0) {
161247831Smm#endif
162238592Smm			error = ENOMEM;
163238592Smm			goto out;
164238592Smm		}
165238592Smm	}
166238592Smm
167238592Smm	while (ioctl(g_fd, ioc, &zc) != 0) {
168238592Smm		if (errno == ENOMEM && resultp != NULL) {
169238592Smm			free((void *)(uintptr_t)zc.zc_nvlist_dst);
170238592Smm			zc.zc_nvlist_dst_size *= 2;
171238592Smm			zc.zc_nvlist_dst = (uint64_t)(uintptr_t)
172238592Smm			    malloc(zc.zc_nvlist_dst_size);
173247831Smm#ifdef illumos
174238592Smm			if (zc.zc_nvlist_dst == NULL) {
175247831Smm#else
176247831Smm			if (zc.zc_nvlist_dst == 0) {
177247831Smm#endif
178238592Smm				error = ENOMEM;
179238592Smm				goto out;
180238592Smm			}
181238592Smm		} else {
182238592Smm			error = errno;
183238592Smm			break;
184238592Smm		}
185238592Smm	}
186248435Smm
187248435Smm#ifdef __FreeBSD__
188248435Smm	if (zfs_ioctl_version < ZFS_IOCVER_LZC)
189248435Smm		lzc_compat_post(&zc, ioc);
190248435Smm#endif
191238592Smm	if (zc.zc_nvlist_dst_filled) {
192238592Smm		*resultp = fnvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst,
193238592Smm		    zc.zc_nvlist_dst_size);
194238592Smm	} else if (resultp != NULL) {
195238592Smm		*resultp = NULL;
196238592Smm	}
197248435Smm#ifdef __FreeBSD__
198248435Smm	if (zfs_ioctl_version < ZFS_IOCVER_LZC)
199248435Smm		lzc_compat_outnvl(&zc, ioc, resultp);
200248435Smm#endif
201238592Smmout:
202248435Smm#ifdef __FreeBSD__
203248435Smm	if (zfs_ioctl_version < ZFS_IOCVER_LZC) {
204248435Smm		if (source != oldsource)
205248435Smm			nvlist_free(source);
206248435Smm		source = oldsource;
207248435Smm	}
208248435Smm#endif
209238592Smm	fnvlist_pack_free(packed, size);
210238592Smm	free((void *)(uintptr_t)zc.zc_nvlist_dst);
211238592Smm	return (error);
212238592Smm}
213238592Smm
214238592Smmint
215238592Smmlzc_create(const char *fsname, dmu_objset_type_t type, nvlist_t *props)
216238592Smm{
217238592Smm	int error;
218238592Smm	nvlist_t *args = fnvlist_alloc();
219238592Smm	fnvlist_add_int32(args, "type", type);
220238592Smm	if (props != NULL)
221238592Smm		fnvlist_add_nvlist(args, "props", props);
222238592Smm	error = lzc_ioctl(ZFS_IOC_CREATE, fsname, args, NULL);
223238592Smm	nvlist_free(args);
224238592Smm	return (error);
225238592Smm}
226238592Smm
227238592Smmint
228238592Smmlzc_clone(const char *fsname, const char *origin,
229238592Smm    nvlist_t *props)
230238592Smm{
231238592Smm	int error;
232238592Smm	nvlist_t *args = fnvlist_alloc();
233238592Smm	fnvlist_add_string(args, "origin", origin);
234238592Smm	if (props != NULL)
235238592Smm		fnvlist_add_nvlist(args, "props", props);
236238592Smm	error = lzc_ioctl(ZFS_IOC_CLONE, fsname, args, NULL);
237238592Smm	nvlist_free(args);
238238592Smm	return (error);
239238592Smm}
240238592Smm
241238592Smm/*
242238592Smm * Creates snapshots.
243238592Smm *
244238592Smm * The keys in the snaps nvlist are the snapshots to be created.
245238592Smm * They must all be in the same pool.
246238592Smm *
247238592Smm * The props nvlist is properties to set.  Currently only user properties
248238592Smm * are supported.  { user:prop_name -> string value }
249238592Smm *
250238592Smm * The returned results nvlist will have an entry for each snapshot that failed.
251238592Smm * The value will be the (int32) error code.
252238592Smm *
253238592Smm * The return value will be 0 if all snapshots were created, otherwise it will
254238592Smm * be the errno of a (undetermined) snapshot that failed.
255238592Smm */
256238592Smmint
257238592Smmlzc_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t **errlist)
258238592Smm{
259238592Smm	nvpair_t *elem;
260238592Smm	nvlist_t *args;
261238592Smm	int error;
262238592Smm	char pool[MAXNAMELEN];
263238592Smm
264238592Smm	*errlist = NULL;
265238592Smm
266238592Smm	/* determine the pool name */
267238592Smm	elem = nvlist_next_nvpair(snaps, NULL);
268238592Smm	if (elem == NULL)
269238592Smm		return (0);
270238592Smm	(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
271238592Smm	pool[strcspn(pool, "/@")] = '\0';
272238592Smm
273238592Smm	args = fnvlist_alloc();
274238592Smm	fnvlist_add_nvlist(args, "snaps", snaps);
275238592Smm	if (props != NULL)
276238592Smm		fnvlist_add_nvlist(args, "props", props);
277238592Smm
278238592Smm	error = lzc_ioctl(ZFS_IOC_SNAPSHOT, pool, args, errlist);
279238592Smm	nvlist_free(args);
280238592Smm
281238592Smm	return (error);
282238592Smm}
283238592Smm
284238592Smm/*
285238592Smm * Destroys snapshots.
286238592Smm *
287238592Smm * The keys in the snaps nvlist are the snapshots to be destroyed.
288238592Smm * They must all be in the same pool.
289238592Smm *
290238592Smm * Snapshots that do not exist will be silently ignored.
291238592Smm *
292238592Smm * If 'defer' is not set, and a snapshot has user holds or clones, the
293238592Smm * destroy operation will fail and none of the snapshots will be
294238592Smm * destroyed.
295238592Smm *
296238592Smm * If 'defer' is set, and a snapshot has user holds or clones, it will be
297238592Smm * marked for deferred destruction, and will be destroyed when the last hold
298238592Smm * or clone is removed/destroyed.
299238592Smm *
300238592Smm * The return value will be 0 if all snapshots were destroyed (or marked for
301238592Smm * later destruction if 'defer' is set) or didn't exist to begin with.
302238592Smm *
303238592Smm * Otherwise the return value will be the errno of a (undetermined) snapshot
304238592Smm * that failed, no snapshots will be destroyed, and the errlist will have an
305238592Smm * entry for each snapshot that failed.  The value in the errlist will be
306238592Smm * the (int32) error code.
307238592Smm */
308238592Smmint
309238592Smmlzc_destroy_snaps(nvlist_t *snaps, boolean_t defer, nvlist_t **errlist)
310238592Smm{
311238592Smm	nvpair_t *elem;
312238592Smm	nvlist_t *args;
313238592Smm	int error;
314238592Smm	char pool[MAXNAMELEN];
315238592Smm
316238592Smm	/* determine the pool name */
317238592Smm	elem = nvlist_next_nvpair(snaps, NULL);
318238592Smm	if (elem == NULL)
319238592Smm		return (0);
320238592Smm	(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
321238592Smm	pool[strcspn(pool, "/@")] = '\0';
322238592Smm
323238592Smm	args = fnvlist_alloc();
324238592Smm	fnvlist_add_nvlist(args, "snaps", snaps);
325238592Smm	if (defer)
326238592Smm		fnvlist_add_boolean(args, "defer");
327238592Smm
328238592Smm	error = lzc_ioctl(ZFS_IOC_DESTROY_SNAPS, pool, args, errlist);
329238592Smm	nvlist_free(args);
330238592Smm
331238592Smm	return (error);
332238592Smm
333238592Smm}
334238592Smm
335238592Smmint
336238592Smmlzc_snaprange_space(const char *firstsnap, const char *lastsnap,
337238592Smm    uint64_t *usedp)
338238592Smm{
339238592Smm	nvlist_t *args;
340238592Smm	nvlist_t *result;
341238592Smm	int err;
342238592Smm	char fs[MAXNAMELEN];
343238592Smm	char *atp;
344238592Smm
345238592Smm	/* determine the fs name */
346238592Smm	(void) strlcpy(fs, firstsnap, sizeof (fs));
347238592Smm	atp = strchr(fs, '@');
348238592Smm	if (atp == NULL)
349238592Smm		return (EINVAL);
350238592Smm	*atp = '\0';
351238592Smm
352238592Smm	args = fnvlist_alloc();
353238592Smm	fnvlist_add_string(args, "firstsnap", firstsnap);
354238592Smm
355238592Smm	err = lzc_ioctl(ZFS_IOC_SPACE_SNAPS, lastsnap, args, &result);
356238592Smm	nvlist_free(args);
357238592Smm	if (err == 0)
358238592Smm		*usedp = fnvlist_lookup_uint64(result, "used");
359238592Smm	fnvlist_free(result);
360238592Smm
361238592Smm	return (err);
362238592Smm}
363238592Smm
364238592Smmboolean_t
365238592Smmlzc_exists(const char *dataset)
366238592Smm{
367238592Smm	/*
368238592Smm	 * The objset_stats ioctl is still legacy, so we need to construct our
369238592Smm	 * own zfs_cmd_t rather than using zfsc_ioctl().
370238592Smm	 */
371238592Smm	zfs_cmd_t zc = { 0 };
372238592Smm
373238592Smm	(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
374238592Smm	return (ioctl(g_fd, ZFS_IOC_OBJSET_STATS, &zc) == 0);
375238592Smm}
376238592Smm
377238592Smm/*
378238592Smm * If fromsnap is NULL, a full (non-incremental) stream will be sent.
379238592Smm */
380238592Smmint
381238592Smmlzc_send(const char *snapname, const char *fromsnap, int fd)
382238592Smm{
383238592Smm	nvlist_t *args;
384238592Smm	int err;
385238592Smm
386238592Smm	args = fnvlist_alloc();
387238592Smm	fnvlist_add_int32(args, "fd", fd);
388238592Smm	if (fromsnap != NULL)
389238592Smm		fnvlist_add_string(args, "fromsnap", fromsnap);
390238592Smm	err = lzc_ioctl(ZFS_IOC_SEND_NEW, snapname, args, NULL);
391238592Smm	nvlist_free(args);
392238592Smm	return (err);
393238592Smm}
394238592Smm
395238592Smm/*
396238592Smm * If fromsnap is NULL, a full (non-incremental) stream will be estimated.
397238592Smm */
398238592Smmint
399238592Smmlzc_send_space(const char *snapname, const char *fromsnap, uint64_t *spacep)
400238592Smm{
401238592Smm	nvlist_t *args;
402238592Smm	nvlist_t *result;
403238592Smm	int err;
404238592Smm
405238592Smm	args = fnvlist_alloc();
406238592Smm	if (fromsnap != NULL)
407238592Smm		fnvlist_add_string(args, "fromsnap", fromsnap);
408238592Smm	err = lzc_ioctl(ZFS_IOC_SEND_SPACE, snapname, args, &result);
409238592Smm	nvlist_free(args);
410238592Smm	if (err == 0)
411238592Smm		*spacep = fnvlist_lookup_uint64(result, "space");
412238592Smm	nvlist_free(result);
413238592Smm	return (err);
414238592Smm}
415238592Smm
416238592Smmstatic int
417238592Smmrecv_read(int fd, void *buf, int ilen)
418238592Smm{
419238592Smm	char *cp = buf;
420238592Smm	int rv;
421238592Smm	int len = ilen;
422238592Smm
423238592Smm	do {
424238592Smm		rv = read(fd, cp, len);
425238592Smm		cp += rv;
426238592Smm		len -= rv;
427238592Smm	} while (rv > 0);
428238592Smm
429238592Smm	if (rv < 0 || len != 0)
430238592Smm		return (EIO);
431238592Smm
432238592Smm	return (0);
433238592Smm}
434238592Smm
435238592Smm/*
436238592Smm * The simplest receive case: receive from the specified fd, creating the
437238592Smm * specified snapshot.  Apply the specified properties a "received" properties
438238592Smm * (which can be overridden by locally-set properties).  If the stream is a
439238592Smm * clone, its origin snapshot must be specified by 'origin'.  The 'force'
440238592Smm * flag will cause the target filesystem to be rolled back or destroyed if
441238592Smm * necessary to receive.
442238592Smm *
443238592Smm * Return 0 on success or an errno on failure.
444238592Smm *
445238592Smm * Note: this interface does not work on dedup'd streams
446238592Smm * (those with DMU_BACKUP_FEATURE_DEDUP).
447238592Smm */
448238592Smmint
449238592Smmlzc_receive(const char *snapname, nvlist_t *props, const char *origin,
450238592Smm    boolean_t force, int fd)
451238592Smm{
452238592Smm	/*
453238592Smm	 * The receive ioctl is still legacy, so we need to construct our own
454238592Smm	 * zfs_cmd_t rather than using zfsc_ioctl().
455238592Smm	 */
456238592Smm	zfs_cmd_t zc = { 0 };
457238592Smm	char *atp;
458238592Smm	char *packed = NULL;
459238592Smm	size_t size;
460238592Smm	dmu_replay_record_t drr;
461238592Smm	int error;
462238592Smm
463238592Smm	ASSERT3S(g_refcount, >, 0);
464238592Smm
465238592Smm	/* zc_name is name of containing filesystem */
466238592Smm	(void) strlcpy(zc.zc_name, snapname, sizeof (zc.zc_name));
467238592Smm	atp = strchr(zc.zc_name, '@');
468238592Smm	if (atp == NULL)
469238592Smm		return (EINVAL);
470238592Smm	*atp = '\0';
471238592Smm
472238592Smm	/* if the fs does not exist, try its parent. */
473238592Smm	if (!lzc_exists(zc.zc_name)) {
474238592Smm		char *slashp = strrchr(zc.zc_name, '/');
475238592Smm		if (slashp == NULL)
476238592Smm			return (ENOENT);
477238592Smm		*slashp = '\0';
478238592Smm
479238592Smm	}
480238592Smm
481238592Smm	/* zc_value is full name of the snapshot to create */
482238592Smm	(void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value));
483238592Smm
484238592Smm	if (props != NULL) {
485238592Smm		/* zc_nvlist_src is props to set */
486238592Smm		packed = fnvlist_pack(props, &size);
487238592Smm		zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed;
488238592Smm		zc.zc_nvlist_src_size = size;
489238592Smm	}
490238592Smm
491238592Smm	/* zc_string is name of clone origin (if DRR_FLAG_CLONE) */
492238592Smm	if (origin != NULL)
493238592Smm		(void) strlcpy(zc.zc_string, origin, sizeof (zc.zc_string));
494238592Smm
495238592Smm	/* zc_begin_record is non-byteswapped BEGIN record */
496238592Smm	error = recv_read(fd, &drr, sizeof (drr));
497238592Smm	if (error != 0)
498238592Smm		goto out;
499238592Smm	zc.zc_begin_record = drr.drr_u.drr_begin;
500238592Smm
501238592Smm	/* zc_cookie is fd to read from */
502238592Smm	zc.zc_cookie = fd;
503238592Smm
504238592Smm	/* zc guid is force flag */
505238592Smm	zc.zc_guid = force;
506238592Smm
507238592Smm	/* zc_cleanup_fd is unused */
508238592Smm	zc.zc_cleanup_fd = -1;
509238592Smm
510238592Smm	error = ioctl(g_fd, ZFS_IOC_RECV, &zc);
511238592Smm	if (error != 0)
512238592Smm		error = errno;
513238592Smm
514238592Smmout:
515238592Smm	if (packed != NULL)
516238592Smm		fnvlist_pack_free(packed, size);
517238592Smm	free((void*)(uintptr_t)zc.zc_nvlist_dst);
518238592Smm	return (error);
519238592Smm}
520