libzfs_core.c revision 286683
1238592Smm/*
2238592Smm * CDDL HEADER START
3238592Smm *
4238592Smm * The contents of this file are subject to the terms of the
5238592Smm * Common Development and Distribution License (the "License").
6238592Smm * You may not use this file except in compliance with the License.
7238592Smm *
8238592Smm * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9238592Smm * or http://www.opensolaris.org/os/licensing.
10238592Smm * See the License for the specific language governing permissions
11238592Smm * and limitations under the License.
12238592Smm *
13238592Smm * When distributing Covered Code, include this CDDL HEADER in each
14238592Smm * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15238592Smm * If applicable, add the following below this CDDL HEADER, with the
16238592Smm * fields enclosed by brackets "[]" replaced with your own identifying
17238592Smm * information: Portions Copyright [yyyy] [name of copyright owner]
18238592Smm *
19238592Smm * CDDL HEADER END
20238592Smm */
21238592Smm
22238592Smm/*
23286683Smav * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
24251646Sdelphij * Copyright (c) 2013 Steven Hartland. All rights reserved.
25238592Smm */
26238592Smm
27238592Smm/*
28238592Smm * LibZFS_Core (lzc) is intended to replace most functionality in libzfs.
29238592Smm * It has the following characteristics:
30238592Smm *
31238592Smm *  - Thread Safe.  libzfs_core is accessible concurrently from multiple
32238592Smm *  threads.  This is accomplished primarily by avoiding global data
33238592Smm *  (e.g. caching).  Since it's thread-safe, there is no reason for a
34238592Smm *  process to have multiple libzfs "instances".  Therefore, we store
35238592Smm *  our few pieces of data (e.g. the file descriptor) in global
36238592Smm *  variables.  The fd is reference-counted so that the libzfs_core
37238592Smm *  library can be "initialized" multiple times (e.g. by different
38238592Smm *  consumers within the same process).
39238592Smm *
40238592Smm *  - Committed Interface.  The libzfs_core interface will be committed,
41238592Smm *  therefore consumers can compile against it and be confident that
42238592Smm *  their code will continue to work on future releases of this code.
43238592Smm *  Currently, the interface is Evolving (not Committed), but we intend
44238592Smm *  to commit to it once it is more complete and we determine that it
45238592Smm *  meets the needs of all consumers.
46238592Smm *
47238592Smm *  - Programatic Error Handling.  libzfs_core communicates errors with
48238592Smm *  defined error numbers, and doesn't print anything to stdout/stderr.
49238592Smm *
50238592Smm *  - Thin Layer.  libzfs_core is a thin layer, marshaling arguments
51238592Smm *  to/from the kernel ioctls.  There is generally a 1:1 correspondence
52238592Smm *  between libzfs_core functions and ioctls to /dev/zfs.
53238592Smm *
54238592Smm *  - Clear Atomicity.  Because libzfs_core functions are generally 1:1
55238592Smm *  with kernel ioctls, and kernel ioctls are general atomic, each
56238592Smm *  libzfs_core function is atomic.  For example, creating multiple
57238592Smm *  snapshots with a single call to lzc_snapshot() is atomic -- it
58238592Smm *  can't fail with only some of the requested snapshots created, even
59238592Smm *  in the event of power loss or system crash.
60238592Smm *
61238592Smm *  - Continued libzfs Support.  Some higher-level operations (e.g.
62238592Smm *  support for "zfs send -R") are too complicated to fit the scope of
63238592Smm *  libzfs_core.  This functionality will continue to live in libzfs.
64238592Smm *  Where appropriate, libzfs will use the underlying atomic operations
65238592Smm *  of libzfs_core.  For example, libzfs may implement "zfs send -R |
66238592Smm *  zfs receive" by using individual "send one snapshot", rename,
67238592Smm *  destroy, and "receive one snapshot" operations in libzfs_core.
68238592Smm *  /sbin/zfs and /zbin/zpool will link with both libzfs and
69238592Smm *  libzfs_core.  Other consumers should aim to use only libzfs_core,
70238592Smm *  since that will be the supported, stable interface going forwards.
71238592Smm */
72238592Smm
73247831Smm#define _IN_LIBZFS_CORE_
74247831Smm
75238592Smm#include <libzfs_core.h>
76238592Smm#include <ctype.h>
77238592Smm#include <unistd.h>
78238592Smm#include <stdlib.h>
79238592Smm#include <string.h>
80238592Smm#include <errno.h>
81238592Smm#include <fcntl.h>
82238592Smm#include <pthread.h>
83238592Smm#include <sys/nvpair.h>
84238592Smm#include <sys/param.h>
85238592Smm#include <sys/types.h>
86238592Smm#include <sys/stat.h>
87238592Smm#include <sys/zfs_ioctl.h>
88248445Smm#include "libzfs_core_compat.h"
89248445Smm#include "libzfs_compat.h"
90238592Smm
91248435Smm#ifdef __FreeBSD__
92248461Smmextern int zfs_ioctl_version;
93248435Smm#endif
94248435Smm
95238592Smmstatic int g_fd;
96238592Smmstatic pthread_mutex_t g_lock = PTHREAD_MUTEX_INITIALIZER;
97238592Smmstatic int g_refcount;
98238592Smm
99238592Smmint
100238592Smmlibzfs_core_init(void)
101238592Smm{
102238592Smm	(void) pthread_mutex_lock(&g_lock);
103238592Smm	if (g_refcount == 0) {
104238592Smm		g_fd = open("/dev/zfs", O_RDWR);
105238592Smm		if (g_fd < 0) {
106238592Smm			(void) pthread_mutex_unlock(&g_lock);
107238592Smm			return (errno);
108238592Smm		}
109238592Smm	}
110238592Smm	g_refcount++;
111238592Smm	(void) pthread_mutex_unlock(&g_lock);
112248445Smm
113238592Smm	return (0);
114238592Smm}
115238592Smm
116238592Smmvoid
117238592Smmlibzfs_core_fini(void)
118238592Smm{
119238592Smm	(void) pthread_mutex_lock(&g_lock);
120238592Smm	ASSERT3S(g_refcount, >, 0);
121238592Smm	g_refcount--;
122238592Smm	if (g_refcount == 0)
123238592Smm		(void) close(g_fd);
124238592Smm	(void) pthread_mutex_unlock(&g_lock);
125238592Smm}
126238592Smm
127238592Smmstatic int
128238592Smmlzc_ioctl(zfs_ioc_t ioc, const char *name,
129238592Smm    nvlist_t *source, nvlist_t **resultp)
130238592Smm{
131238592Smm	zfs_cmd_t zc = { 0 };
132238592Smm	int error = 0;
133238592Smm	char *packed;
134248435Smm#ifdef __FreeBSD__
135248435Smm	nvlist_t *oldsource;
136248435Smm#endif
137238592Smm	size_t size;
138238592Smm
139238592Smm	ASSERT3S(g_refcount, >, 0);
140238592Smm
141238592Smm	(void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
142238592Smm
143248435Smm#ifdef __FreeBSD__
144248461Smm	if (zfs_ioctl_version == ZFS_IOCVER_UNDEF)
145248461Smm		zfs_ioctl_version = get_zfs_ioctl_version();
146248445Smm
147248461Smm	if (zfs_ioctl_version < ZFS_IOCVER_LZC) {
148248435Smm		oldsource = source;
149248435Smm		error = lzc_compat_pre(&zc, &ioc, &source);
150248435Smm		if (error)
151248435Smm			return (error);
152248435Smm	}
153248435Smm#endif
154248435Smm
155238592Smm	packed = fnvlist_pack(source, &size);
156238592Smm	zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed;
157238592Smm	zc.zc_nvlist_src_size = size;
158238592Smm
159238592Smm	if (resultp != NULL) {
160248498Smm		*resultp = NULL;
161238592Smm		zc.zc_nvlist_dst_size = MAX(size * 2, 128 * 1024);
162238592Smm		zc.zc_nvlist_dst = (uint64_t)(uintptr_t)
163238592Smm		    malloc(zc.zc_nvlist_dst_size);
164247831Smm#ifdef illumos
165238592Smm		if (zc.zc_nvlist_dst == NULL) {
166247831Smm#else
167247831Smm		if (zc.zc_nvlist_dst == 0) {
168247831Smm#endif
169238592Smm			error = ENOMEM;
170238592Smm			goto out;
171238592Smm		}
172238592Smm	}
173238592Smm
174238592Smm	while (ioctl(g_fd, ioc, &zc) != 0) {
175238592Smm		if (errno == ENOMEM && resultp != NULL) {
176238592Smm			free((void *)(uintptr_t)zc.zc_nvlist_dst);
177238592Smm			zc.zc_nvlist_dst_size *= 2;
178238592Smm			zc.zc_nvlist_dst = (uint64_t)(uintptr_t)
179238592Smm			    malloc(zc.zc_nvlist_dst_size);
180247831Smm#ifdef illumos
181238592Smm			if (zc.zc_nvlist_dst == NULL) {
182247831Smm#else
183247831Smm			if (zc.zc_nvlist_dst == 0) {
184247831Smm#endif
185238592Smm				error = ENOMEM;
186238592Smm				goto out;
187238592Smm			}
188238592Smm		} else {
189238592Smm			error = errno;
190238592Smm			break;
191238592Smm		}
192238592Smm	}
193248435Smm
194248435Smm#ifdef __FreeBSD__
195248461Smm	if (zfs_ioctl_version < ZFS_IOCVER_LZC)
196248435Smm		lzc_compat_post(&zc, ioc);
197248435Smm#endif
198238592Smm	if (zc.zc_nvlist_dst_filled) {
199238592Smm		*resultp = fnvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst,
200238592Smm		    zc.zc_nvlist_dst_size);
201238592Smm	}
202248435Smm#ifdef __FreeBSD__
203248461Smm	if (zfs_ioctl_version < ZFS_IOCVER_LZC)
204248435Smm		lzc_compat_outnvl(&zc, ioc, resultp);
205248435Smm#endif
206238592Smmout:
207248435Smm#ifdef __FreeBSD__
208248461Smm	if (zfs_ioctl_version < ZFS_IOCVER_LZC) {
209248435Smm		if (source != oldsource)
210248435Smm			nvlist_free(source);
211248435Smm		source = oldsource;
212248435Smm	}
213248435Smm#endif
214238592Smm	fnvlist_pack_free(packed, size);
215238592Smm	free((void *)(uintptr_t)zc.zc_nvlist_dst);
216238592Smm	return (error);
217238592Smm}
218238592Smm
219238592Smmint
220238592Smmlzc_create(const char *fsname, dmu_objset_type_t type, nvlist_t *props)
221238592Smm{
222238592Smm	int error;
223238592Smm	nvlist_t *args = fnvlist_alloc();
224238592Smm	fnvlist_add_int32(args, "type", type);
225238592Smm	if (props != NULL)
226238592Smm		fnvlist_add_nvlist(args, "props", props);
227238592Smm	error = lzc_ioctl(ZFS_IOC_CREATE, fsname, args, NULL);
228238592Smm	nvlist_free(args);
229238592Smm	return (error);
230238592Smm}
231238592Smm
232238592Smmint
233238592Smmlzc_clone(const char *fsname, const char *origin,
234238592Smm    nvlist_t *props)
235238592Smm{
236238592Smm	int error;
237238592Smm	nvlist_t *args = fnvlist_alloc();
238238592Smm	fnvlist_add_string(args, "origin", origin);
239238592Smm	if (props != NULL)
240238592Smm		fnvlist_add_nvlist(args, "props", props);
241238592Smm	error = lzc_ioctl(ZFS_IOC_CLONE, fsname, args, NULL);
242238592Smm	nvlist_free(args);
243238592Smm	return (error);
244238592Smm}
245238592Smm
246238592Smm/*
247238592Smm * Creates snapshots.
248238592Smm *
249238592Smm * The keys in the snaps nvlist are the snapshots to be created.
250238592Smm * They must all be in the same pool.
251238592Smm *
252238592Smm * The props nvlist is properties to set.  Currently only user properties
253238592Smm * are supported.  { user:prop_name -> string value }
254238592Smm *
255238592Smm * The returned results nvlist will have an entry for each snapshot that failed.
256238592Smm * The value will be the (int32) error code.
257238592Smm *
258238592Smm * The return value will be 0 if all snapshots were created, otherwise it will
259248498Smm * be the errno of a (unspecified) snapshot that failed.
260238592Smm */
261238592Smmint
262238592Smmlzc_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t **errlist)
263238592Smm{
264238592Smm	nvpair_t *elem;
265238592Smm	nvlist_t *args;
266238592Smm	int error;
267238592Smm	char pool[MAXNAMELEN];
268238592Smm
269238592Smm	*errlist = NULL;
270238592Smm
271238592Smm	/* determine the pool name */
272238592Smm	elem = nvlist_next_nvpair(snaps, NULL);
273238592Smm	if (elem == NULL)
274238592Smm		return (0);
275238592Smm	(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
276238592Smm	pool[strcspn(pool, "/@")] = '\0';
277238592Smm
278238592Smm	args = fnvlist_alloc();
279238592Smm	fnvlist_add_nvlist(args, "snaps", snaps);
280238592Smm	if (props != NULL)
281238592Smm		fnvlist_add_nvlist(args, "props", props);
282238592Smm
283238592Smm	error = lzc_ioctl(ZFS_IOC_SNAPSHOT, pool, args, errlist);
284238592Smm	nvlist_free(args);
285238592Smm
286238592Smm	return (error);
287238592Smm}
288238592Smm
289238592Smm/*
290238592Smm * Destroys snapshots.
291238592Smm *
292238592Smm * The keys in the snaps nvlist are the snapshots to be destroyed.
293238592Smm * They must all be in the same pool.
294238592Smm *
295238592Smm * Snapshots that do not exist will be silently ignored.
296238592Smm *
297238592Smm * If 'defer' is not set, and a snapshot has user holds or clones, the
298238592Smm * destroy operation will fail and none of the snapshots will be
299238592Smm * destroyed.
300238592Smm *
301238592Smm * If 'defer' is set, and a snapshot has user holds or clones, it will be
302238592Smm * marked for deferred destruction, and will be destroyed when the last hold
303238592Smm * or clone is removed/destroyed.
304238592Smm *
305238592Smm * The return value will be 0 if all snapshots were destroyed (or marked for
306252219Sdelphij * later destruction if 'defer' is set) or didn't exist to begin with.
307238592Smm *
308248498Smm * Otherwise the return value will be the errno of a (unspecified) snapshot
309238592Smm * that failed, no snapshots will be destroyed, and the errlist will have an
310238592Smm * entry for each snapshot that failed.  The value in the errlist will be
311238592Smm * the (int32) error code.
312238592Smm */
313238592Smmint
314238592Smmlzc_destroy_snaps(nvlist_t *snaps, boolean_t defer, nvlist_t **errlist)
315238592Smm{
316238592Smm	nvpair_t *elem;
317238592Smm	nvlist_t *args;
318238592Smm	int error;
319238592Smm	char pool[MAXNAMELEN];
320238592Smm
321238592Smm	/* determine the pool name */
322238592Smm	elem = nvlist_next_nvpair(snaps, NULL);
323238592Smm	if (elem == NULL)
324238592Smm		return (0);
325238592Smm	(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
326238592Smm	pool[strcspn(pool, "/@")] = '\0';
327238592Smm
328238592Smm	args = fnvlist_alloc();
329238592Smm	fnvlist_add_nvlist(args, "snaps", snaps);
330238592Smm	if (defer)
331238592Smm		fnvlist_add_boolean(args, "defer");
332238592Smm
333238592Smm	error = lzc_ioctl(ZFS_IOC_DESTROY_SNAPS, pool, args, errlist);
334238592Smm	nvlist_free(args);
335238592Smm
336238592Smm	return (error);
337238592Smm}
338238592Smm
339238592Smmint
340238592Smmlzc_snaprange_space(const char *firstsnap, const char *lastsnap,
341238592Smm    uint64_t *usedp)
342238592Smm{
343238592Smm	nvlist_t *args;
344238592Smm	nvlist_t *result;
345238592Smm	int err;
346238592Smm	char fs[MAXNAMELEN];
347238592Smm	char *atp;
348238592Smm
349238592Smm	/* determine the fs name */
350238592Smm	(void) strlcpy(fs, firstsnap, sizeof (fs));
351238592Smm	atp = strchr(fs, '@');
352238592Smm	if (atp == NULL)
353238592Smm		return (EINVAL);
354238592Smm	*atp = '\0';
355238592Smm
356238592Smm	args = fnvlist_alloc();
357238592Smm	fnvlist_add_string(args, "firstsnap", firstsnap);
358238592Smm
359238592Smm	err = lzc_ioctl(ZFS_IOC_SPACE_SNAPS, lastsnap, args, &result);
360238592Smm	nvlist_free(args);
361238592Smm	if (err == 0)
362238592Smm		*usedp = fnvlist_lookup_uint64(result, "used");
363238592Smm	fnvlist_free(result);
364238592Smm
365238592Smm	return (err);
366238592Smm}
367238592Smm
368238592Smmboolean_t
369238592Smmlzc_exists(const char *dataset)
370238592Smm{
371238592Smm	/*
372238592Smm	 * The objset_stats ioctl is still legacy, so we need to construct our
373238592Smm	 * own zfs_cmd_t rather than using zfsc_ioctl().
374238592Smm	 */
375238592Smm	zfs_cmd_t zc = { 0 };
376238592Smm
377238592Smm	(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
378238592Smm	return (ioctl(g_fd, ZFS_IOC_OBJSET_STATS, &zc) == 0);
379238592Smm}
380238592Smm
381238592Smm/*
382248498Smm * Create "user holds" on snapshots.  If there is a hold on a snapshot,
383248498Smm * the snapshot can not be destroyed.  (However, it can be marked for deletion
384248498Smm * by lzc_destroy_snaps(defer=B_TRUE).)
385248498Smm *
386248498Smm * The keys in the nvlist are snapshot names.
387248498Smm * The snapshots must all be in the same pool.
388248498Smm * The value is the name of the hold (string type).
389248498Smm *
390248498Smm * If cleanup_fd is not -1, it must be the result of open("/dev/zfs", O_EXCL).
391248498Smm * In this case, when the cleanup_fd is closed (including on process
392248498Smm * termination), the holds will be released.  If the system is shut down
393248498Smm * uncleanly, the holds will be released when the pool is next opened
394248498Smm * or imported.
395248498Smm *
396251646Sdelphij * Holds for snapshots which don't exist will be skipped and have an entry
397252219Sdelphij * added to errlist, but will not cause an overall failure.
398251646Sdelphij *
399252219Sdelphij * The return value will be 0 if all holds, for snapshots that existed,
400252219Sdelphij * were succesfully created.
401251646Sdelphij *
402251646Sdelphij * Otherwise the return value will be the errno of a (unspecified) hold that
403251646Sdelphij * failed and no holds will be created.
404251646Sdelphij *
405251646Sdelphij * In all cases the errlist will have an entry for each hold that failed
406251646Sdelphij * (name = snapshot), with its value being the error code (int32).
407248498Smm */
408248498Smmint
409248498Smmlzc_hold(nvlist_t *holds, int cleanup_fd, nvlist_t **errlist)
410248498Smm{
411248498Smm	char pool[MAXNAMELEN];
412248498Smm	nvlist_t *args;
413248498Smm	nvpair_t *elem;
414248498Smm	int error;
415248498Smm
416248498Smm	/* determine the pool name */
417248498Smm	elem = nvlist_next_nvpair(holds, NULL);
418248498Smm	if (elem == NULL)
419248498Smm		return (0);
420248498Smm	(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
421248498Smm	pool[strcspn(pool, "/@")] = '\0';
422248498Smm
423248498Smm	args = fnvlist_alloc();
424248498Smm	fnvlist_add_nvlist(args, "holds", holds);
425248498Smm	if (cleanup_fd != -1)
426248498Smm		fnvlist_add_int32(args, "cleanup_fd", cleanup_fd);
427248498Smm
428248498Smm	error = lzc_ioctl(ZFS_IOC_HOLD, pool, args, errlist);
429248498Smm	nvlist_free(args);
430248498Smm	return (error);
431248498Smm}
432248498Smm
433248498Smm/*
434248498Smm * Release "user holds" on snapshots.  If the snapshot has been marked for
435248498Smm * deferred destroy (by lzc_destroy_snaps(defer=B_TRUE)), it does not have
436248498Smm * any clones, and all the user holds are removed, then the snapshot will be
437248498Smm * destroyed.
438248498Smm *
439248498Smm * The keys in the nvlist are snapshot names.
440248498Smm * The snapshots must all be in the same pool.
441248498Smm * The value is a nvlist whose keys are the holds to remove.
442248498Smm *
443251646Sdelphij * Holds which failed to release because they didn't exist will have an entry
444252219Sdelphij * added to errlist, but will not cause an overall failure.
445251646Sdelphij *
446251646Sdelphij * The return value will be 0 if the nvl holds was empty or all holds that
447252219Sdelphij * existed, were successfully removed.
448251646Sdelphij *
449251646Sdelphij * Otherwise the return value will be the errno of a (unspecified) hold that
450251646Sdelphij * failed to release and no holds will be released.
451251646Sdelphij *
452251646Sdelphij * In all cases the errlist will have an entry for each hold that failed to
453251646Sdelphij * to release.
454248498Smm */
455248498Smmint
456248498Smmlzc_release(nvlist_t *holds, nvlist_t **errlist)
457248498Smm{
458248498Smm	char pool[MAXNAMELEN];
459248498Smm	nvpair_t *elem;
460248498Smm
461248498Smm	/* determine the pool name */
462248498Smm	elem = nvlist_next_nvpair(holds, NULL);
463248498Smm	if (elem == NULL)
464248498Smm		return (0);
465248498Smm	(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
466248498Smm	pool[strcspn(pool, "/@")] = '\0';
467248498Smm
468248498Smm	return (lzc_ioctl(ZFS_IOC_RELEASE, pool, holds, errlist));
469248498Smm}
470248498Smm
471248498Smm/*
472248498Smm * Retrieve list of user holds on the specified snapshot.
473248498Smm *
474248498Smm * On success, *holdsp will be set to a nvlist which the caller must free.
475248498Smm * The keys are the names of the holds, and the value is the creation time
476248498Smm * of the hold (uint64) in seconds since the epoch.
477248498Smm */
478248498Smmint
479248498Smmlzc_get_holds(const char *snapname, nvlist_t **holdsp)
480248498Smm{
481248498Smm	int error;
482248498Smm	nvlist_t *innvl = fnvlist_alloc();
483248498Smm	error = lzc_ioctl(ZFS_IOC_GET_HOLDS, snapname, innvl, holdsp);
484248498Smm	fnvlist_free(innvl);
485248498Smm	return (error);
486248498Smm}
487248498Smm
488248498Smm/*
489268075Sdelphij * Generate a zfs send stream for the specified snapshot and write it to
490268075Sdelphij * the specified file descriptor.
491260183Sdelphij *
492260183Sdelphij * "snapname" is the full name of the snapshot to send (e.g. "pool/fs@snap")
493260183Sdelphij *
494260183Sdelphij * If "from" is NULL, a full (non-incremental) stream will be sent.
495260183Sdelphij * If "from" is non-NULL, it must be the full name of a snapshot or
496260183Sdelphij * bookmark to send an incremental from (e.g. "pool/fs@earlier_snap" or
497260183Sdelphij * "pool/fs#earlier_bmark").  If non-NULL, the specified snapshot or
498260183Sdelphij * bookmark must represent an earlier point in the history of "snapname").
499260183Sdelphij * It can be an earlier snapshot in the same filesystem or zvol as "snapname",
500260183Sdelphij * or it can be the origin of "snapname"'s filesystem, or an earlier
501260183Sdelphij * snapshot in the origin, etc.
502260183Sdelphij *
503260183Sdelphij * "fd" is the file descriptor to write the send stream to.
504268075Sdelphij *
505274337Sdelphij * If "flags" contains LZC_SEND_FLAG_LARGE_BLOCK, the stream is permitted
506274337Sdelphij * to contain DRR_WRITE records with drr_length > 128K, and DRR_OBJECT
507274337Sdelphij * records with drr_blksz > 128K.
508274337Sdelphij *
509268075Sdelphij * If "flags" contains LZC_SEND_FLAG_EMBED_DATA, the stream is permitted
510268075Sdelphij * to contain DRR_WRITE_EMBEDDED records with drr_etype==BP_EMBEDDED_TYPE_DATA,
511268075Sdelphij * which the receiving system must support (as indicated by support
512268075Sdelphij * for the "embedded_data" feature).
513238592Smm */
514238592Smmint
515268075Sdelphijlzc_send(const char *snapname, const char *from, int fd,
516268075Sdelphij    enum lzc_send_flags flags)
517238592Smm{
518238592Smm	nvlist_t *args;
519238592Smm	int err;
520238592Smm
521238592Smm	args = fnvlist_alloc();
522238592Smm	fnvlist_add_int32(args, "fd", fd);
523260183Sdelphij	if (from != NULL)
524260183Sdelphij		fnvlist_add_string(args, "fromsnap", from);
525274337Sdelphij	if (flags & LZC_SEND_FLAG_LARGE_BLOCK)
526274337Sdelphij		fnvlist_add_boolean(args, "largeblockok");
527268075Sdelphij	if (flags & LZC_SEND_FLAG_EMBED_DATA)
528268075Sdelphij		fnvlist_add_boolean(args, "embedok");
529238592Smm	err = lzc_ioctl(ZFS_IOC_SEND_NEW, snapname, args, NULL);
530238592Smm	nvlist_free(args);
531238592Smm	return (err);
532238592Smm}
533238592Smm
534238592Smm/*
535286683Smav * "from" can be NULL, a snapshot, or a bookmark.
536286683Smav *
537286683Smav * If from is NULL, a full (non-incremental) stream will be estimated.  This
538286683Smav * is calculated very efficiently.
539286683Smav *
540286683Smav * If from is a snapshot, lzc_send_space uses the deadlists attached to
541286683Smav * each snapshot to efficiently estimate the stream size.
542286683Smav *
543286683Smav * If from is a bookmark, the indirect blocks in the destination snapshot
544286683Smav * are traversed, looking for blocks with a birth time since the creation TXG of
545286683Smav * the snapshot this bookmark was created from.  This will result in
546286683Smav * significantly more I/O and be less efficient than a send space estimation on
547286683Smav * an equivalent snapshot.
548238592Smm */
549238592Smmint
550286683Smavlzc_send_space(const char *snapname, const char *from, uint64_t *spacep)
551238592Smm{
552238592Smm	nvlist_t *args;
553238592Smm	nvlist_t *result;
554238592Smm	int err;
555238592Smm
556238592Smm	args = fnvlist_alloc();
557286683Smav	if (from != NULL)
558286683Smav		fnvlist_add_string(args, "from", from);
559238592Smm	err = lzc_ioctl(ZFS_IOC_SEND_SPACE, snapname, args, &result);
560238592Smm	nvlist_free(args);
561238592Smm	if (err == 0)
562238592Smm		*spacep = fnvlist_lookup_uint64(result, "space");
563238592Smm	nvlist_free(result);
564238592Smm	return (err);
565238592Smm}
566238592Smm
567238592Smmstatic int
568238592Smmrecv_read(int fd, void *buf, int ilen)
569238592Smm{
570238592Smm	char *cp = buf;
571238592Smm	int rv;
572238592Smm	int len = ilen;
573238592Smm
574238592Smm	do {
575238592Smm		rv = read(fd, cp, len);
576238592Smm		cp += rv;
577238592Smm		len -= rv;
578238592Smm	} while (rv > 0);
579238592Smm
580238592Smm	if (rv < 0 || len != 0)
581238592Smm		return (EIO);
582238592Smm
583238592Smm	return (0);
584238592Smm}
585238592Smm
586238592Smm/*
587238592Smm * The simplest receive case: receive from the specified fd, creating the
588238592Smm * specified snapshot.  Apply the specified properties a "received" properties
589238592Smm * (which can be overridden by locally-set properties).  If the stream is a
590238592Smm * clone, its origin snapshot must be specified by 'origin'.  The 'force'
591238592Smm * flag will cause the target filesystem to be rolled back or destroyed if
592238592Smm * necessary to receive.
593238592Smm *
594238592Smm * Return 0 on success or an errno on failure.
595238592Smm *
596238592Smm * Note: this interface does not work on dedup'd streams
597238592Smm * (those with DMU_BACKUP_FEATURE_DEDUP).
598238592Smm */
599238592Smmint
600238592Smmlzc_receive(const char *snapname, nvlist_t *props, const char *origin,
601238592Smm    boolean_t force, int fd)
602238592Smm{
603238592Smm	/*
604238592Smm	 * The receive ioctl is still legacy, so we need to construct our own
605238592Smm	 * zfs_cmd_t rather than using zfsc_ioctl().
606238592Smm	 */
607238592Smm	zfs_cmd_t zc = { 0 };
608238592Smm	char *atp;
609238592Smm	char *packed = NULL;
610238592Smm	size_t size;
611238592Smm	dmu_replay_record_t drr;
612238592Smm	int error;
613238592Smm
614238592Smm	ASSERT3S(g_refcount, >, 0);
615238592Smm
616238592Smm	/* zc_name is name of containing filesystem */
617238592Smm	(void) strlcpy(zc.zc_name, snapname, sizeof (zc.zc_name));
618238592Smm	atp = strchr(zc.zc_name, '@');
619238592Smm	if (atp == NULL)
620238592Smm		return (EINVAL);
621238592Smm	*atp = '\0';
622238592Smm
623238592Smm	/* if the fs does not exist, try its parent. */
624238592Smm	if (!lzc_exists(zc.zc_name)) {
625238592Smm		char *slashp = strrchr(zc.zc_name, '/');
626238592Smm		if (slashp == NULL)
627238592Smm			return (ENOENT);
628238592Smm		*slashp = '\0';
629238592Smm
630238592Smm	}
631238592Smm
632238592Smm	/* zc_value is full name of the snapshot to create */
633238592Smm	(void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value));
634238592Smm
635238592Smm	if (props != NULL) {
636238592Smm		/* zc_nvlist_src is props to set */
637238592Smm		packed = fnvlist_pack(props, &size);
638238592Smm		zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed;
639238592Smm		zc.zc_nvlist_src_size = size;
640238592Smm	}
641238592Smm
642238592Smm	/* zc_string is name of clone origin (if DRR_FLAG_CLONE) */
643238592Smm	if (origin != NULL)
644238592Smm		(void) strlcpy(zc.zc_string, origin, sizeof (zc.zc_string));
645238592Smm
646238592Smm	/* zc_begin_record is non-byteswapped BEGIN record */
647238592Smm	error = recv_read(fd, &drr, sizeof (drr));
648238592Smm	if (error != 0)
649238592Smm		goto out;
650238592Smm	zc.zc_begin_record = drr.drr_u.drr_begin;
651238592Smm
652238592Smm	/* zc_cookie is fd to read from */
653238592Smm	zc.zc_cookie = fd;
654238592Smm
655238592Smm	/* zc guid is force flag */
656238592Smm	zc.zc_guid = force;
657238592Smm
658238592Smm	/* zc_cleanup_fd is unused */
659238592Smm	zc.zc_cleanup_fd = -1;
660238592Smm
661238592Smm	error = ioctl(g_fd, ZFS_IOC_RECV, &zc);
662238592Smm	if (error != 0)
663238592Smm		error = errno;
664238592Smm
665238592Smmout:
666238592Smm	if (packed != NULL)
667238592Smm		fnvlist_pack_free(packed, size);
668238592Smm	free((void*)(uintptr_t)zc.zc_nvlist_dst);
669238592Smm	return (error);
670238592Smm}
671254587Sdelphij
672254587Sdelphij/*
673254587Sdelphij * Roll back this filesystem or volume to its most recent snapshot.
674254587Sdelphij * If snapnamebuf is not NULL, it will be filled in with the name
675254587Sdelphij * of the most recent snapshot.
676254587Sdelphij *
677254587Sdelphij * Return 0 on success or an errno on failure.
678254587Sdelphij */
679254587Sdelphijint
680254587Sdelphijlzc_rollback(const char *fsname, char *snapnamebuf, int snapnamelen)
681254587Sdelphij{
682254587Sdelphij	nvlist_t *args;
683254587Sdelphij	nvlist_t *result;
684254587Sdelphij	int err;
685254587Sdelphij
686254587Sdelphij	args = fnvlist_alloc();
687254587Sdelphij	err = lzc_ioctl(ZFS_IOC_ROLLBACK, fsname, args, &result);
688254587Sdelphij	nvlist_free(args);
689254587Sdelphij	if (err == 0 && snapnamebuf != NULL) {
690254587Sdelphij		const char *snapname = fnvlist_lookup_string(result, "target");
691254587Sdelphij		(void) strlcpy(snapnamebuf, snapname, snapnamelen);
692254587Sdelphij	}
693254587Sdelphij	return (err);
694254587Sdelphij}
695260183Sdelphij
696260183Sdelphij/*
697260183Sdelphij * Creates bookmarks.
698260183Sdelphij *
699260183Sdelphij * The bookmarks nvlist maps from name of the bookmark (e.g. "pool/fs#bmark") to
700260183Sdelphij * the name of the snapshot (e.g. "pool/fs@snap").  All the bookmarks and
701260183Sdelphij * snapshots must be in the same pool.
702260183Sdelphij *
703260183Sdelphij * The returned results nvlist will have an entry for each bookmark that failed.
704260183Sdelphij * The value will be the (int32) error code.
705260183Sdelphij *
706260183Sdelphij * The return value will be 0 if all bookmarks were created, otherwise it will
707260183Sdelphij * be the errno of a (undetermined) bookmarks that failed.
708260183Sdelphij */
709260183Sdelphijint
710260183Sdelphijlzc_bookmark(nvlist_t *bookmarks, nvlist_t **errlist)
711260183Sdelphij{
712260183Sdelphij	nvpair_t *elem;
713260183Sdelphij	int error;
714260183Sdelphij	char pool[MAXNAMELEN];
715260183Sdelphij
716260183Sdelphij	/* determine the pool name */
717260183Sdelphij	elem = nvlist_next_nvpair(bookmarks, NULL);
718260183Sdelphij	if (elem == NULL)
719260183Sdelphij		return (0);
720260183Sdelphij	(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
721260183Sdelphij	pool[strcspn(pool, "/#")] = '\0';
722260183Sdelphij
723260183Sdelphij	error = lzc_ioctl(ZFS_IOC_BOOKMARK, pool, bookmarks, errlist);
724260183Sdelphij
725260183Sdelphij	return (error);
726260183Sdelphij}
727260183Sdelphij
728260183Sdelphij/*
729260183Sdelphij * Retrieve bookmarks.
730260183Sdelphij *
731260183Sdelphij * Retrieve the list of bookmarks for the given file system. The props
732260183Sdelphij * parameter is an nvlist of property names (with no values) that will be
733260183Sdelphij * returned for each bookmark.
734260183Sdelphij *
735260183Sdelphij * The following are valid properties on bookmarks, all of which are numbers
736260183Sdelphij * (represented as uint64 in the nvlist)
737260183Sdelphij *
738260183Sdelphij * "guid" - globally unique identifier of the snapshot it refers to
739260183Sdelphij * "createtxg" - txg when the snapshot it refers to was created
740260183Sdelphij * "creation" - timestamp when the snapshot it refers to was created
741260183Sdelphij *
742260183Sdelphij * The format of the returned nvlist as follows:
743260183Sdelphij * <short name of bookmark> -> {
744260183Sdelphij *     <name of property> -> {
745260183Sdelphij *         "value" -> uint64
746260183Sdelphij *     }
747260183Sdelphij *  }
748260183Sdelphij */
749260183Sdelphijint
750260183Sdelphijlzc_get_bookmarks(const char *fsname, nvlist_t *props, nvlist_t **bmarks)
751260183Sdelphij{
752260183Sdelphij	return (lzc_ioctl(ZFS_IOC_GET_BOOKMARKS, fsname, props, bmarks));
753260183Sdelphij}
754260183Sdelphij
755260183Sdelphij/*
756260183Sdelphij * Destroys bookmarks.
757260183Sdelphij *
758260183Sdelphij * The keys in the bmarks nvlist are the bookmarks to be destroyed.
759260183Sdelphij * They must all be in the same pool.  Bookmarks are specified as
760260183Sdelphij * <fs>#<bmark>.
761260183Sdelphij *
762260183Sdelphij * Bookmarks that do not exist will be silently ignored.
763260183Sdelphij *
764260183Sdelphij * The return value will be 0 if all bookmarks that existed were destroyed.
765260183Sdelphij *
766260183Sdelphij * Otherwise the return value will be the errno of a (undetermined) bookmark
767260183Sdelphij * that failed, no bookmarks will be destroyed, and the errlist will have an
768260183Sdelphij * entry for each bookmarks that failed.  The value in the errlist will be
769260183Sdelphij * the (int32) error code.
770260183Sdelphij */
771260183Sdelphijint
772260183Sdelphijlzc_destroy_bookmarks(nvlist_t *bmarks, nvlist_t **errlist)
773260183Sdelphij{
774260183Sdelphij	nvpair_t *elem;
775260183Sdelphij	int error;
776260183Sdelphij	char pool[MAXNAMELEN];
777260183Sdelphij
778260183Sdelphij	/* determine the pool name */
779260183Sdelphij	elem = nvlist_next_nvpair(bmarks, NULL);
780260183Sdelphij	if (elem == NULL)
781260183Sdelphij		return (0);
782260183Sdelphij	(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
783260183Sdelphij	pool[strcspn(pool, "/#")] = '\0';
784260183Sdelphij
785260183Sdelphij	error = lzc_ioctl(ZFS_IOC_DESTROY_BOOKMARKS, pool, bmarks, errlist);
786260183Sdelphij
787260183Sdelphij	return (error);
788260183Sdelphij}
789