1238592Smm/*
2238592Smm * CDDL HEADER START
3238592Smm *
4238592Smm * The contents of this file are subject to the terms of the
5238592Smm * Common Development and Distribution License (the "License").
6238592Smm * You may not use this file except in compliance with the License.
7238592Smm *
8238592Smm * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9238592Smm * or http://www.opensolaris.org/os/licensing.
10238592Smm * See the License for the specific language governing permissions
11238592Smm * and limitations under the License.
12238592Smm *
13238592Smm * When distributing Covered Code, include this CDDL HEADER in each
14238592Smm * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15238592Smm * If applicable, add the following below this CDDL HEADER, with the
16238592Smm * fields enclosed by brackets "[]" replaced with your own identifying
17238592Smm * information: Portions Copyright [yyyy] [name of copyright owner]
18238592Smm *
19238592Smm * CDDL HEADER END
20238592Smm */
21238592Smm
22238592Smm/*
23325538Savg * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
24251646Sdelphij * Copyright (c) 2013 Steven Hartland. All rights reserved.
25296519Smav * Copyright (c) 2014 Integros [integros.com]
26321577Smav * Copyright 2017 RackTop Systems.
27238592Smm */
28238592Smm
29238592Smm/*
30238592Smm * LibZFS_Core (lzc) is intended to replace most functionality in libzfs.
31238592Smm * It has the following characteristics:
32238592Smm *
33238592Smm *  - Thread Safe.  libzfs_core is accessible concurrently from multiple
34238592Smm *  threads.  This is accomplished primarily by avoiding global data
35238592Smm *  (e.g. caching).  Since it's thread-safe, there is no reason for a
36238592Smm *  process to have multiple libzfs "instances".  Therefore, we store
37238592Smm *  our few pieces of data (e.g. the file descriptor) in global
38238592Smm *  variables.  The fd is reference-counted so that the libzfs_core
39238592Smm *  library can be "initialized" multiple times (e.g. by different
40238592Smm *  consumers within the same process).
41238592Smm *
42238592Smm *  - Committed Interface.  The libzfs_core interface will be committed,
43238592Smm *  therefore consumers can compile against it and be confident that
44238592Smm *  their code will continue to work on future releases of this code.
45238592Smm *  Currently, the interface is Evolving (not Committed), but we intend
46238592Smm *  to commit to it once it is more complete and we determine that it
47238592Smm *  meets the needs of all consumers.
48238592Smm *
49238592Smm *  - Programatic Error Handling.  libzfs_core communicates errors with
50238592Smm *  defined error numbers, and doesn't print anything to stdout/stderr.
51238592Smm *
52238592Smm *  - Thin Layer.  libzfs_core is a thin layer, marshaling arguments
53238592Smm *  to/from the kernel ioctls.  There is generally a 1:1 correspondence
54238592Smm *  between libzfs_core functions and ioctls to /dev/zfs.
55238592Smm *
56238592Smm *  - Clear Atomicity.  Because libzfs_core functions are generally 1:1
57238592Smm *  with kernel ioctls, and kernel ioctls are general atomic, each
58238592Smm *  libzfs_core function is atomic.  For example, creating multiple
59238592Smm *  snapshots with a single call to lzc_snapshot() is atomic -- it
60238592Smm *  can't fail with only some of the requested snapshots created, even
61238592Smm *  in the event of power loss or system crash.
62238592Smm *
63238592Smm *  - Continued libzfs Support.  Some higher-level operations (e.g.
64238592Smm *  support for "zfs send -R") are too complicated to fit the scope of
65238592Smm *  libzfs_core.  This functionality will continue to live in libzfs.
66238592Smm *  Where appropriate, libzfs will use the underlying atomic operations
67238592Smm *  of libzfs_core.  For example, libzfs may implement "zfs send -R |
68238592Smm *  zfs receive" by using individual "send one snapshot", rename,
69238592Smm *  destroy, and "receive one snapshot" operations in libzfs_core.
70238592Smm *  /sbin/zfs and /zbin/zpool will link with both libzfs and
71238592Smm *  libzfs_core.  Other consumers should aim to use only libzfs_core,
72238592Smm *  since that will be the supported, stable interface going forwards.
73238592Smm */
74238592Smm
75247831Smm#define _IN_LIBZFS_CORE_
76247831Smm
77238592Smm#include <libzfs_core.h>
78238592Smm#include <ctype.h>
79238592Smm#include <unistd.h>
80238592Smm#include <stdlib.h>
81238592Smm#include <string.h>
82238592Smm#include <errno.h>
83238592Smm#include <fcntl.h>
84238592Smm#include <pthread.h>
85238592Smm#include <sys/nvpair.h>
86238592Smm#include <sys/param.h>
87238592Smm#include <sys/types.h>
88238592Smm#include <sys/stat.h>
89238592Smm#include <sys/zfs_ioctl.h>
90248445Smm#include "libzfs_core_compat.h"
91248445Smm#include "libzfs_compat.h"
92238592Smm
93248435Smm#ifdef __FreeBSD__
94248461Smmextern int zfs_ioctl_version;
95248435Smm#endif
96248435Smm
97331393Smavstatic int g_fd = -1;
98238592Smmstatic pthread_mutex_t g_lock = PTHREAD_MUTEX_INITIALIZER;
99238592Smmstatic int g_refcount;
100238592Smm
101238592Smmint
102238592Smmlibzfs_core_init(void)
103238592Smm{
104238592Smm	(void) pthread_mutex_lock(&g_lock);
105238592Smm	if (g_refcount == 0) {
106238592Smm		g_fd = open("/dev/zfs", O_RDWR);
107238592Smm		if (g_fd < 0) {
108238592Smm			(void) pthread_mutex_unlock(&g_lock);
109238592Smm			return (errno);
110238592Smm		}
111238592Smm	}
112238592Smm	g_refcount++;
113238592Smm	(void) pthread_mutex_unlock(&g_lock);
114248445Smm
115238592Smm	return (0);
116238592Smm}
117238592Smm
118238592Smmvoid
119238592Smmlibzfs_core_fini(void)
120238592Smm{
121238592Smm	(void) pthread_mutex_lock(&g_lock);
122238592Smm	ASSERT3S(g_refcount, >, 0);
123331393Smav
124331393Smav	if (g_refcount > 0)
125331393Smav		g_refcount--;
126331393Smav
127331393Smav	if (g_refcount == 0 && g_fd != -1) {
128238592Smm		(void) close(g_fd);
129331393Smav		g_fd = -1;
130331393Smav	}
131238592Smm	(void) pthread_mutex_unlock(&g_lock);
132238592Smm}
133238592Smm
134238592Smmstatic int
135238592Smmlzc_ioctl(zfs_ioc_t ioc, const char *name,
136238592Smm    nvlist_t *source, nvlist_t **resultp)
137238592Smm{
138238592Smm	zfs_cmd_t zc = { 0 };
139238592Smm	int error = 0;
140238592Smm	char *packed;
141248435Smm#ifdef __FreeBSD__
142248435Smm	nvlist_t *oldsource;
143248435Smm#endif
144238592Smm	size_t size;
145238592Smm
146238592Smm	ASSERT3S(g_refcount, >, 0);
147331393Smav	VERIFY3S(g_fd, !=, -1);
148238592Smm
149238592Smm	(void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
150238592Smm
151248435Smm#ifdef __FreeBSD__
152248461Smm	if (zfs_ioctl_version == ZFS_IOCVER_UNDEF)
153248461Smm		zfs_ioctl_version = get_zfs_ioctl_version();
154248445Smm
155248461Smm	if (zfs_ioctl_version < ZFS_IOCVER_LZC) {
156248435Smm		oldsource = source;
157248435Smm		error = lzc_compat_pre(&zc, &ioc, &source);
158248435Smm		if (error)
159248435Smm			return (error);
160248435Smm	}
161248435Smm#endif
162248435Smm
163238592Smm	packed = fnvlist_pack(source, &size);
164238592Smm	zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed;
165238592Smm	zc.zc_nvlist_src_size = size;
166238592Smm
167238592Smm	if (resultp != NULL) {
168248498Smm		*resultp = NULL;
169325538Savg		if (ioc == ZFS_IOC_CHANNEL_PROGRAM) {
170325538Savg			zc.zc_nvlist_dst_size = fnvlist_lookup_uint64(source,
171325538Savg			    ZCP_ARG_MEMLIMIT);
172325538Savg		} else {
173325538Savg			zc.zc_nvlist_dst_size = MAX(size * 2, 128 * 1024);
174325538Savg		}
175238592Smm		zc.zc_nvlist_dst = (uint64_t)(uintptr_t)
176238592Smm		    malloc(zc.zc_nvlist_dst_size);
177247831Smm#ifdef illumos
178238592Smm		if (zc.zc_nvlist_dst == NULL) {
179247831Smm#else
180247831Smm		if (zc.zc_nvlist_dst == 0) {
181247831Smm#endif
182238592Smm			error = ENOMEM;
183238592Smm			goto out;
184238592Smm		}
185238592Smm	}
186238592Smm
187238592Smm	while (ioctl(g_fd, ioc, &zc) != 0) {
188325534Savg		/*
189325534Savg		 * If ioctl exited with ENOMEM, we retry the ioctl after
190325534Savg		 * increasing the size of the destination nvlist.
191325534Savg		 *
192325538Savg		 * Channel programs that exit with ENOMEM ran over the
193325534Savg		 * lua memory sandbox; they should not be retried.
194325534Savg		 */
195325534Savg		if (errno == ENOMEM && resultp != NULL &&
196325534Savg		    ioc != ZFS_IOC_CHANNEL_PROGRAM) {
197238592Smm			free((void *)(uintptr_t)zc.zc_nvlist_dst);
198238592Smm			zc.zc_nvlist_dst_size *= 2;
199238592Smm			zc.zc_nvlist_dst = (uint64_t)(uintptr_t)
200238592Smm			    malloc(zc.zc_nvlist_dst_size);
201247831Smm#ifdef illumos
202238592Smm			if (zc.zc_nvlist_dst == NULL) {
203247831Smm#else
204247831Smm			if (zc.zc_nvlist_dst == 0) {
205247831Smm#endif
206238592Smm				error = ENOMEM;
207238592Smm				goto out;
208238592Smm			}
209238592Smm		} else {
210238592Smm			error = errno;
211238592Smm			break;
212238592Smm		}
213238592Smm	}
214248435Smm
215248435Smm#ifdef __FreeBSD__
216248461Smm	if (zfs_ioctl_version < ZFS_IOCVER_LZC)
217248435Smm		lzc_compat_post(&zc, ioc);
218248435Smm#endif
219238592Smm	if (zc.zc_nvlist_dst_filled) {
220238592Smm		*resultp = fnvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst,
221238592Smm		    zc.zc_nvlist_dst_size);
222238592Smm	}
223248435Smm#ifdef __FreeBSD__
224248461Smm	if (zfs_ioctl_version < ZFS_IOCVER_LZC)
225248435Smm		lzc_compat_outnvl(&zc, ioc, resultp);
226248435Smm#endif
227238592Smmout:
228248435Smm#ifdef __FreeBSD__
229248461Smm	if (zfs_ioctl_version < ZFS_IOCVER_LZC) {
230248435Smm		if (source != oldsource)
231248435Smm			nvlist_free(source);
232248435Smm		source = oldsource;
233248435Smm	}
234248435Smm#endif
235238592Smm	fnvlist_pack_free(packed, size);
236238592Smm	free((void *)(uintptr_t)zc.zc_nvlist_dst);
237238592Smm	return (error);
238238592Smm}
239238592Smm
240238592Smmint
241298472Savglzc_create(const char *fsname, enum lzc_dataset_type type, nvlist_t *props)
242238592Smm{
243238592Smm	int error;
244238592Smm	nvlist_t *args = fnvlist_alloc();
245298472Savg	fnvlist_add_int32(args, "type", (dmu_objset_type_t)type);
246238592Smm	if (props != NULL)
247238592Smm		fnvlist_add_nvlist(args, "props", props);
248238592Smm	error = lzc_ioctl(ZFS_IOC_CREATE, fsname, args, NULL);
249238592Smm	nvlist_free(args);
250238592Smm	return (error);
251238592Smm}
252238592Smm
253238592Smmint
254238592Smmlzc_clone(const char *fsname, const char *origin,
255238592Smm    nvlist_t *props)
256238592Smm{
257238592Smm	int error;
258238592Smm	nvlist_t *args = fnvlist_alloc();
259238592Smm	fnvlist_add_string(args, "origin", origin);
260238592Smm	if (props != NULL)
261238592Smm		fnvlist_add_nvlist(args, "props", props);
262238592Smm	error = lzc_ioctl(ZFS_IOC_CLONE, fsname, args, NULL);
263238592Smm	nvlist_free(args);
264238592Smm	return (error);
265238592Smm}
266238592Smm
267321577Smavint
268321577Smavlzc_promote(const char *fsname, char *snapnamebuf, int snapnamelen)
269321577Smav{
270321577Smav	/*
271321577Smav	 * The promote ioctl is still legacy, so we need to construct our
272321577Smav	 * own zfs_cmd_t rather than using lzc_ioctl().
273321577Smav	 */
274321577Smav	zfs_cmd_t zc = { 0 };
275321577Smav
276321577Smav	ASSERT3S(g_refcount, >, 0);
277321577Smav	VERIFY3S(g_fd, !=, -1);
278321577Smav
279321577Smav	(void) strlcpy(zc.zc_name, fsname, sizeof (zc.zc_name));
280321577Smav	if (ioctl(g_fd, ZFS_IOC_PROMOTE, &zc) != 0) {
281321577Smav		int error = errno;
282321577Smav		if (error == EEXIST && snapnamebuf != NULL)
283321577Smav			(void) strlcpy(snapnamebuf, zc.zc_string, snapnamelen);
284321577Smav		return (error);
285321577Smav	}
286321577Smav	return (0);
287321577Smav}
288321577Smav
289332525Smavint
290332525Smavlzc_remap(const char *fsname)
291332525Smav{
292332525Smav	int error;
293332525Smav	nvlist_t *args = fnvlist_alloc();
294332525Smav	error = lzc_ioctl(ZFS_IOC_REMAP, fsname, args, NULL);
295332525Smav	nvlist_free(args);
296332525Smav	return (error);
297332525Smav}
298332525Smav
299342943Savgint
300342943Savglzc_rename(const char *source, const char *target)
301342943Savg{
302342943Savg	zfs_cmd_t zc = { 0 };
303342943Savg	int error;
304342943Savg
305342943Savg	ASSERT3S(g_refcount, >, 0);
306342943Savg	VERIFY3S(g_fd, !=, -1);
307342943Savg
308342943Savg	(void) strlcpy(zc.zc_name, source, sizeof (zc.zc_name));
309342943Savg	(void) strlcpy(zc.zc_value, target, sizeof (zc.zc_value));
310342943Savg	error = ioctl(g_fd, ZFS_IOC_RENAME, &zc);
311342943Savg	if (error != 0)
312342943Savg		error = errno;
313342943Savg	return (error);
314342943Savg}
315342943Savg
316342943Savgint
317342943Savglzc_destroy(const char *fsname)
318342943Savg{
319342943Savg	int error;
320342943Savg
321342943Savg	nvlist_t *args = fnvlist_alloc();
322342943Savg	error = lzc_ioctl(ZFS_IOC_DESTROY, fsname, args, NULL);
323342943Savg	nvlist_free(args);
324342943Savg	return (error);
325342943Savg}
326342943Savg
327238592Smm/*
328238592Smm * Creates snapshots.
329238592Smm *
330238592Smm * The keys in the snaps nvlist are the snapshots to be created.
331238592Smm * They must all be in the same pool.
332238592Smm *
333238592Smm * The props nvlist is properties to set.  Currently only user properties
334238592Smm * are supported.  { user:prop_name -> string value }
335238592Smm *
336238592Smm * The returned results nvlist will have an entry for each snapshot that failed.
337238592Smm * The value will be the (int32) error code.
338238592Smm *
339238592Smm * The return value will be 0 if all snapshots were created, otherwise it will
340248498Smm * be the errno of a (unspecified) snapshot that failed.
341238592Smm */
342238592Smmint
343238592Smmlzc_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t **errlist)
344238592Smm{
345238592Smm	nvpair_t *elem;
346238592Smm	nvlist_t *args;
347238592Smm	int error;
348307108Smav	char pool[ZFS_MAX_DATASET_NAME_LEN];
349238592Smm
350238592Smm	*errlist = NULL;
351238592Smm
352238592Smm	/* determine the pool name */
353238592Smm	elem = nvlist_next_nvpair(snaps, NULL);
354238592Smm	if (elem == NULL)
355238592Smm		return (0);
356238592Smm	(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
357238592Smm	pool[strcspn(pool, "/@")] = '\0';
358238592Smm
359238592Smm	args = fnvlist_alloc();
360238592Smm	fnvlist_add_nvlist(args, "snaps", snaps);
361238592Smm	if (props != NULL)
362238592Smm		fnvlist_add_nvlist(args, "props", props);
363238592Smm
364238592Smm	error = lzc_ioctl(ZFS_IOC_SNAPSHOT, pool, args, errlist);
365238592Smm	nvlist_free(args);
366238592Smm
367238592Smm	return (error);
368238592Smm}
369238592Smm
370238592Smm/*
371238592Smm * Destroys snapshots.
372238592Smm *
373238592Smm * The keys in the snaps nvlist are the snapshots to be destroyed.
374238592Smm * They must all be in the same pool.
375238592Smm *
376238592Smm * Snapshots that do not exist will be silently ignored.
377238592Smm *
378238592Smm * If 'defer' is not set, and a snapshot has user holds or clones, the
379238592Smm * destroy operation will fail and none of the snapshots will be
380238592Smm * destroyed.
381238592Smm *
382238592Smm * If 'defer' is set, and a snapshot has user holds or clones, it will be
383238592Smm * marked for deferred destruction, and will be destroyed when the last hold
384238592Smm * or clone is removed/destroyed.
385238592Smm *
386238592Smm * The return value will be 0 if all snapshots were destroyed (or marked for
387252219Sdelphij * later destruction if 'defer' is set) or didn't exist to begin with.
388238592Smm *
389248498Smm * Otherwise the return value will be the errno of a (unspecified) snapshot
390238592Smm * that failed, no snapshots will be destroyed, and the errlist will have an
391238592Smm * entry for each snapshot that failed.  The value in the errlist will be
392238592Smm * the (int32) error code.
393238592Smm */
394238592Smmint
395238592Smmlzc_destroy_snaps(nvlist_t *snaps, boolean_t defer, nvlist_t **errlist)
396238592Smm{
397238592Smm	nvpair_t *elem;
398238592Smm	nvlist_t *args;
399238592Smm	int error;
400307108Smav	char pool[ZFS_MAX_DATASET_NAME_LEN];
401238592Smm
402238592Smm	/* determine the pool name */
403238592Smm	elem = nvlist_next_nvpair(snaps, NULL);
404238592Smm	if (elem == NULL)
405238592Smm		return (0);
406238592Smm	(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
407238592Smm	pool[strcspn(pool, "/@")] = '\0';
408238592Smm
409238592Smm	args = fnvlist_alloc();
410238592Smm	fnvlist_add_nvlist(args, "snaps", snaps);
411238592Smm	if (defer)
412238592Smm		fnvlist_add_boolean(args, "defer");
413238592Smm
414238592Smm	error = lzc_ioctl(ZFS_IOC_DESTROY_SNAPS, pool, args, errlist);
415238592Smm	nvlist_free(args);
416238592Smm
417238592Smm	return (error);
418238592Smm}
419238592Smm
420238592Smmint
421238592Smmlzc_snaprange_space(const char *firstsnap, const char *lastsnap,
422238592Smm    uint64_t *usedp)
423238592Smm{
424238592Smm	nvlist_t *args;
425238592Smm	nvlist_t *result;
426238592Smm	int err;
427307108Smav	char fs[ZFS_MAX_DATASET_NAME_LEN];
428238592Smm	char *atp;
429238592Smm
430238592Smm	/* determine the fs name */
431238592Smm	(void) strlcpy(fs, firstsnap, sizeof (fs));
432238592Smm	atp = strchr(fs, '@');
433238592Smm	if (atp == NULL)
434238592Smm		return (EINVAL);
435238592Smm	*atp = '\0';
436238592Smm
437238592Smm	args = fnvlist_alloc();
438238592Smm	fnvlist_add_string(args, "firstsnap", firstsnap);
439238592Smm
440238592Smm	err = lzc_ioctl(ZFS_IOC_SPACE_SNAPS, lastsnap, args, &result);
441238592Smm	nvlist_free(args);
442238592Smm	if (err == 0)
443238592Smm		*usedp = fnvlist_lookup_uint64(result, "used");
444238592Smm	fnvlist_free(result);
445238592Smm
446238592Smm	return (err);
447238592Smm}
448238592Smm
449238592Smmboolean_t
450238592Smmlzc_exists(const char *dataset)
451238592Smm{
452238592Smm	/*
453238592Smm	 * The objset_stats ioctl is still legacy, so we need to construct our
454321577Smav	 * own zfs_cmd_t rather than using lzc_ioctl().
455238592Smm	 */
456238592Smm	zfs_cmd_t zc = { 0 };
457238592Smm
458331393Smav	ASSERT3S(g_refcount, >, 0);
459331393Smav	VERIFY3S(g_fd, !=, -1);
460331393Smav
461238592Smm	(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
462238592Smm	return (ioctl(g_fd, ZFS_IOC_OBJSET_STATS, &zc) == 0);
463238592Smm}
464238592Smm
465238592Smm/*
466248498Smm * Create "user holds" on snapshots.  If there is a hold on a snapshot,
467248498Smm * the snapshot can not be destroyed.  (However, it can be marked for deletion
468248498Smm * by lzc_destroy_snaps(defer=B_TRUE).)
469248498Smm *
470248498Smm * The keys in the nvlist are snapshot names.
471248498Smm * The snapshots must all be in the same pool.
472248498Smm * The value is the name of the hold (string type).
473248498Smm *
474248498Smm * If cleanup_fd is not -1, it must be the result of open("/dev/zfs", O_EXCL).
475248498Smm * In this case, when the cleanup_fd is closed (including on process
476248498Smm * termination), the holds will be released.  If the system is shut down
477248498Smm * uncleanly, the holds will be released when the pool is next opened
478248498Smm * or imported.
479248498Smm *
480251646Sdelphij * Holds for snapshots which don't exist will be skipped and have an entry
481252219Sdelphij * added to errlist, but will not cause an overall failure.
482251646Sdelphij *
483252219Sdelphij * The return value will be 0 if all holds, for snapshots that existed,
484252219Sdelphij * were succesfully created.
485251646Sdelphij *
486251646Sdelphij * Otherwise the return value will be the errno of a (unspecified) hold that
487251646Sdelphij * failed and no holds will be created.
488251646Sdelphij *
489251646Sdelphij * In all cases the errlist will have an entry for each hold that failed
490251646Sdelphij * (name = snapshot), with its value being the error code (int32).
491248498Smm */
492248498Smmint
493248498Smmlzc_hold(nvlist_t *holds, int cleanup_fd, nvlist_t **errlist)
494248498Smm{
495307108Smav	char pool[ZFS_MAX_DATASET_NAME_LEN];
496248498Smm	nvlist_t *args;
497248498Smm	nvpair_t *elem;
498248498Smm	int error;
499248498Smm
500248498Smm	/* determine the pool name */
501248498Smm	elem = nvlist_next_nvpair(holds, NULL);
502248498Smm	if (elem == NULL)
503248498Smm		return (0);
504248498Smm	(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
505248498Smm	pool[strcspn(pool, "/@")] = '\0';
506248498Smm
507248498Smm	args = fnvlist_alloc();
508248498Smm	fnvlist_add_nvlist(args, "holds", holds);
509248498Smm	if (cleanup_fd != -1)
510248498Smm		fnvlist_add_int32(args, "cleanup_fd", cleanup_fd);
511248498Smm
512248498Smm	error = lzc_ioctl(ZFS_IOC_HOLD, pool, args, errlist);
513248498Smm	nvlist_free(args);
514248498Smm	return (error);
515248498Smm}
516248498Smm
517248498Smm/*
518248498Smm * Release "user holds" on snapshots.  If the snapshot has been marked for
519248498Smm * deferred destroy (by lzc_destroy_snaps(defer=B_TRUE)), it does not have
520248498Smm * any clones, and all the user holds are removed, then the snapshot will be
521248498Smm * destroyed.
522248498Smm *
523248498Smm * The keys in the nvlist are snapshot names.
524248498Smm * The snapshots must all be in the same pool.
525248498Smm * The value is a nvlist whose keys are the holds to remove.
526248498Smm *
527251646Sdelphij * Holds which failed to release because they didn't exist will have an entry
528252219Sdelphij * added to errlist, but will not cause an overall failure.
529251646Sdelphij *
530251646Sdelphij * The return value will be 0 if the nvl holds was empty or all holds that
531252219Sdelphij * existed, were successfully removed.
532251646Sdelphij *
533251646Sdelphij * Otherwise the return value will be the errno of a (unspecified) hold that
534251646Sdelphij * failed to release and no holds will be released.
535251646Sdelphij *
536251646Sdelphij * In all cases the errlist will have an entry for each hold that failed to
537251646Sdelphij * to release.
538248498Smm */
539248498Smmint
540248498Smmlzc_release(nvlist_t *holds, nvlist_t **errlist)
541248498Smm{
542307108Smav	char pool[ZFS_MAX_DATASET_NAME_LEN];
543248498Smm	nvpair_t *elem;
544248498Smm
545248498Smm	/* determine the pool name */
546248498Smm	elem = nvlist_next_nvpair(holds, NULL);
547248498Smm	if (elem == NULL)
548248498Smm		return (0);
549248498Smm	(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
550248498Smm	pool[strcspn(pool, "/@")] = '\0';
551248498Smm
552248498Smm	return (lzc_ioctl(ZFS_IOC_RELEASE, pool, holds, errlist));
553248498Smm}
554248498Smm
555248498Smm/*
556248498Smm * Retrieve list of user holds on the specified snapshot.
557248498Smm *
558248498Smm * On success, *holdsp will be set to a nvlist which the caller must free.
559248498Smm * The keys are the names of the holds, and the value is the creation time
560248498Smm * of the hold (uint64) in seconds since the epoch.
561248498Smm */
562248498Smmint
563248498Smmlzc_get_holds(const char *snapname, nvlist_t **holdsp)
564248498Smm{
565248498Smm	int error;
566248498Smm	nvlist_t *innvl = fnvlist_alloc();
567248498Smm	error = lzc_ioctl(ZFS_IOC_GET_HOLDS, snapname, innvl, holdsp);
568248498Smm	fnvlist_free(innvl);
569248498Smm	return (error);
570248498Smm}
571248498Smm
572248498Smm/*
573268075Sdelphij * Generate a zfs send stream for the specified snapshot and write it to
574268075Sdelphij * the specified file descriptor.
575260183Sdelphij *
576260183Sdelphij * "snapname" is the full name of the snapshot to send (e.g. "pool/fs@snap")
577260183Sdelphij *
578260183Sdelphij * If "from" is NULL, a full (non-incremental) stream will be sent.
579260183Sdelphij * If "from" is non-NULL, it must be the full name of a snapshot or
580260183Sdelphij * bookmark to send an incremental from (e.g. "pool/fs@earlier_snap" or
581260183Sdelphij * "pool/fs#earlier_bmark").  If non-NULL, the specified snapshot or
582260183Sdelphij * bookmark must represent an earlier point in the history of "snapname").
583260183Sdelphij * It can be an earlier snapshot in the same filesystem or zvol as "snapname",
584260183Sdelphij * or it can be the origin of "snapname"'s filesystem, or an earlier
585260183Sdelphij * snapshot in the origin, etc.
586260183Sdelphij *
587260183Sdelphij * "fd" is the file descriptor to write the send stream to.
588268075Sdelphij *
589274337Sdelphij * If "flags" contains LZC_SEND_FLAG_LARGE_BLOCK, the stream is permitted
590274337Sdelphij * to contain DRR_WRITE records with drr_length > 128K, and DRR_OBJECT
591274337Sdelphij * records with drr_blksz > 128K.
592274337Sdelphij *
593268075Sdelphij * If "flags" contains LZC_SEND_FLAG_EMBED_DATA, the stream is permitted
594268075Sdelphij * to contain DRR_WRITE_EMBEDDED records with drr_etype==BP_EMBEDDED_TYPE_DATA,
595268075Sdelphij * which the receiving system must support (as indicated by support
596268075Sdelphij * for the "embedded_data" feature).
597238592Smm */
598238592Smmint
599268075Sdelphijlzc_send(const char *snapname, const char *from, int fd,
600268075Sdelphij    enum lzc_send_flags flags)
601238592Smm{
602289362Smav	return (lzc_send_resume(snapname, from, fd, flags, 0, 0));
603289362Smav}
604289362Smav
605289362Smavint
606289362Smavlzc_send_resume(const char *snapname, const char *from, int fd,
607289362Smav    enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff)
608289362Smav{
609238592Smm	nvlist_t *args;
610238592Smm	int err;
611238592Smm
612238592Smm	args = fnvlist_alloc();
613238592Smm	fnvlist_add_int32(args, "fd", fd);
614260183Sdelphij	if (from != NULL)
615260183Sdelphij		fnvlist_add_string(args, "fromsnap", from);
616274337Sdelphij	if (flags & LZC_SEND_FLAG_LARGE_BLOCK)
617274337Sdelphij		fnvlist_add_boolean(args, "largeblockok");
618268075Sdelphij	if (flags & LZC_SEND_FLAG_EMBED_DATA)
619268075Sdelphij		fnvlist_add_boolean(args, "embedok");
620321535Smav	if (flags & LZC_SEND_FLAG_COMPRESS)
621321535Smav		fnvlist_add_boolean(args, "compressok");
622289362Smav	if (resumeobj != 0 || resumeoff != 0) {
623289362Smav		fnvlist_add_uint64(args, "resume_object", resumeobj);
624289362Smav		fnvlist_add_uint64(args, "resume_offset", resumeoff);
625289362Smav	}
626238592Smm	err = lzc_ioctl(ZFS_IOC_SEND_NEW, snapname, args, NULL);
627238592Smm	nvlist_free(args);
628238592Smm	return (err);
629238592Smm}
630238592Smm
631238592Smm/*
632286683Smav * "from" can be NULL, a snapshot, or a bookmark.
633286683Smav *
634286683Smav * If from is NULL, a full (non-incremental) stream will be estimated.  This
635286683Smav * is calculated very efficiently.
636286683Smav *
637286683Smav * If from is a snapshot, lzc_send_space uses the deadlists attached to
638286683Smav * each snapshot to efficiently estimate the stream size.
639286683Smav *
640286683Smav * If from is a bookmark, the indirect blocks in the destination snapshot
641286683Smav * are traversed, looking for blocks with a birth time since the creation TXG of
642286683Smav * the snapshot this bookmark was created from.  This will result in
643286683Smav * significantly more I/O and be less efficient than a send space estimation on
644286683Smav * an equivalent snapshot.
645238592Smm */
646238592Smmint
647321535Smavlzc_send_space(const char *snapname, const char *from,
648321535Smav    enum lzc_send_flags flags, uint64_t *spacep)
649238592Smm{
650238592Smm	nvlist_t *args;
651238592Smm	nvlist_t *result;
652238592Smm	int err;
653238592Smm
654238592Smm	args = fnvlist_alloc();
655286683Smav	if (from != NULL)
656286683Smav		fnvlist_add_string(args, "from", from);
657321535Smav	if (flags & LZC_SEND_FLAG_LARGE_BLOCK)
658321535Smav		fnvlist_add_boolean(args, "largeblockok");
659321535Smav	if (flags & LZC_SEND_FLAG_EMBED_DATA)
660321535Smav		fnvlist_add_boolean(args, "embedok");
661321535Smav	if (flags & LZC_SEND_FLAG_COMPRESS)
662321535Smav		fnvlist_add_boolean(args, "compressok");
663238592Smm	err = lzc_ioctl(ZFS_IOC_SEND_SPACE, snapname, args, &result);
664238592Smm	nvlist_free(args);
665238592Smm	if (err == 0)
666238592Smm		*spacep = fnvlist_lookup_uint64(result, "space");
667238592Smm	nvlist_free(result);
668238592Smm	return (err);
669238592Smm}
670238592Smm
671238592Smmstatic int
672238592Smmrecv_read(int fd, void *buf, int ilen)
673238592Smm{
674238592Smm	char *cp = buf;
675238592Smm	int rv;
676238592Smm	int len = ilen;
677238592Smm
678238592Smm	do {
679238592Smm		rv = read(fd, cp, len);
680238592Smm		cp += rv;
681238592Smm		len -= rv;
682238592Smm	} while (rv > 0);
683238592Smm
684238592Smm	if (rv < 0 || len != 0)
685238592Smm		return (EIO);
686238592Smm
687238592Smm	return (0);
688238592Smm}
689238592Smm
690289362Smavstatic int
691308763Savgrecv_impl(const char *snapname, nvlist_t *props, const char *origin,
692308763Savg    boolean_t force, boolean_t resumable, int fd,
693308763Savg    const dmu_replay_record_t *begin_record)
694238592Smm{
695238592Smm	/*
696238592Smm	 * The receive ioctl is still legacy, so we need to construct our own
697238592Smm	 * zfs_cmd_t rather than using zfsc_ioctl().
698238592Smm	 */
699238592Smm	zfs_cmd_t zc = { 0 };
700238592Smm	char *atp;
701238592Smm	char *packed = NULL;
702238592Smm	size_t size;
703238592Smm	int error;
704238592Smm
705238592Smm	ASSERT3S(g_refcount, >, 0);
706331393Smav	VERIFY3S(g_fd, !=, -1);
707238592Smm
708238592Smm	/* zc_name is name of containing filesystem */
709238592Smm	(void) strlcpy(zc.zc_name, snapname, sizeof (zc.zc_name));
710238592Smm	atp = strchr(zc.zc_name, '@');
711238592Smm	if (atp == NULL)
712238592Smm		return (EINVAL);
713238592Smm	*atp = '\0';
714238592Smm
715238592Smm	/* if the fs does not exist, try its parent. */
716238592Smm	if (!lzc_exists(zc.zc_name)) {
717238592Smm		char *slashp = strrchr(zc.zc_name, '/');
718238592Smm		if (slashp == NULL)
719238592Smm			return (ENOENT);
720238592Smm		*slashp = '\0';
721238592Smm
722238592Smm	}
723238592Smm
724238592Smm	/* zc_value is full name of the snapshot to create */
725238592Smm	(void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value));
726238592Smm
727238592Smm	if (props != NULL) {
728238592Smm		/* zc_nvlist_src is props to set */
729238592Smm		packed = fnvlist_pack(props, &size);
730238592Smm		zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed;
731238592Smm		zc.zc_nvlist_src_size = size;
732238592Smm	}
733238592Smm
734238592Smm	/* zc_string is name of clone origin (if DRR_FLAG_CLONE) */
735238592Smm	if (origin != NULL)
736238592Smm		(void) strlcpy(zc.zc_string, origin, sizeof (zc.zc_string));
737238592Smm
738238592Smm	/* zc_begin_record is non-byteswapped BEGIN record */
739308763Savg	if (begin_record == NULL) {
740308763Savg		error = recv_read(fd, &zc.zc_begin_record,
741308763Savg		    sizeof (zc.zc_begin_record));
742308763Savg		if (error != 0)
743308763Savg			goto out;
744308763Savg	} else {
745308763Savg		zc.zc_begin_record = *begin_record;
746308763Savg	}
747238592Smm
748238592Smm	/* zc_cookie is fd to read from */
749238592Smm	zc.zc_cookie = fd;
750238592Smm
751238592Smm	/* zc guid is force flag */
752238592Smm	zc.zc_guid = force;
753238592Smm
754289362Smav	zc.zc_resumable = resumable;
755289362Smav
756238592Smm	/* zc_cleanup_fd is unused */
757238592Smm	zc.zc_cleanup_fd = -1;
758238592Smm
759238592Smm	error = ioctl(g_fd, ZFS_IOC_RECV, &zc);
760238592Smm	if (error != 0)
761238592Smm		error = errno;
762238592Smm
763238592Smmout:
764238592Smm	if (packed != NULL)
765238592Smm		fnvlist_pack_free(packed, size);
766238592Smm	free((void*)(uintptr_t)zc.zc_nvlist_dst);
767238592Smm	return (error);
768238592Smm}
769254587Sdelphij
770254587Sdelphij/*
771289362Smav * The simplest receive case: receive from the specified fd, creating the
772289362Smav * specified snapshot.  Apply the specified properties as "received" properties
773289362Smav * (which can be overridden by locally-set properties).  If the stream is a
774289362Smav * clone, its origin snapshot must be specified by 'origin'.  The 'force'
775289362Smav * flag will cause the target filesystem to be rolled back or destroyed if
776289362Smav * necessary to receive.
777289362Smav *
778289362Smav * Return 0 on success or an errno on failure.
779289362Smav *
780289362Smav * Note: this interface does not work on dedup'd streams
781289362Smav * (those with DMU_BACKUP_FEATURE_DEDUP).
782289362Smav */
783289362Smavint
784289362Smavlzc_receive(const char *snapname, nvlist_t *props, const char *origin,
785289362Smav    boolean_t force, int fd)
786289362Smav{
787308763Savg	return (recv_impl(snapname, props, origin, force, B_FALSE, fd, NULL));
788289362Smav}
789289362Smav
790289362Smav/*
791289362Smav * Like lzc_receive, but if the receive fails due to premature stream
792289362Smav * termination, the intermediate state will be preserved on disk.  In this
793289362Smav * case, ECKSUM will be returned.  The receive may subsequently be resumed
794289362Smav * with a resuming send stream generated by lzc_send_resume().
795289362Smav */
796289362Smavint
797289362Smavlzc_receive_resumable(const char *snapname, nvlist_t *props, const char *origin,
798289362Smav    boolean_t force, int fd)
799289362Smav{
800308763Savg	return (recv_impl(snapname, props, origin, force, B_TRUE, fd, NULL));
801289362Smav}
802289362Smav
803289362Smav/*
804308763Savg * Like lzc_receive, but allows the caller to read the begin record and then to
805308763Savg * pass it in.  That could be useful if the caller wants to derive, for example,
806308763Savg * the snapname or the origin parameters based on the information contained in
807308763Savg * the begin record.
808308763Savg * The begin record must be in its original form as read from the stream,
809308763Savg * in other words, it should not be byteswapped.
810308763Savg *
811308763Savg * The 'resumable' parameter allows to obtain the same behavior as with
812308763Savg * lzc_receive_resumable.
813308763Savg */
814308763Savgint
815308763Savglzc_receive_with_header(const char *snapname, nvlist_t *props,
816308763Savg    const char *origin, boolean_t force, boolean_t resumable, int fd,
817308763Savg    const dmu_replay_record_t *begin_record)
818308763Savg{
819308763Savg	if (begin_record == NULL)
820308763Savg		return (EINVAL);
821308763Savg	return (recv_impl(snapname, props, origin, force, resumable, fd,
822308763Savg	    begin_record));
823308763Savg}
824308763Savg
825308763Savg/*
826254587Sdelphij * Roll back this filesystem or volume to its most recent snapshot.
827254587Sdelphij * If snapnamebuf is not NULL, it will be filled in with the name
828254587Sdelphij * of the most recent snapshot.
829323757Savg * Note that the latest snapshot may change if a new one is concurrently
830323757Savg * created or the current one is destroyed.  lzc_rollback_to can be used
831323757Savg * to roll back to a specific latest snapshot.
832254587Sdelphij *
833254587Sdelphij * Return 0 on success or an errno on failure.
834254587Sdelphij */
835254587Sdelphijint
836254587Sdelphijlzc_rollback(const char *fsname, char *snapnamebuf, int snapnamelen)
837254587Sdelphij{
838254587Sdelphij	nvlist_t *args;
839254587Sdelphij	nvlist_t *result;
840254587Sdelphij	int err;
841254587Sdelphij
842254587Sdelphij	args = fnvlist_alloc();
843254587Sdelphij	err = lzc_ioctl(ZFS_IOC_ROLLBACK, fsname, args, &result);
844254587Sdelphij	nvlist_free(args);
845254587Sdelphij	if (err == 0 && snapnamebuf != NULL) {
846254587Sdelphij		const char *snapname = fnvlist_lookup_string(result, "target");
847254587Sdelphij		(void) strlcpy(snapnamebuf, snapname, snapnamelen);
848254587Sdelphij	}
849321544Smav	nvlist_free(result);
850321544Smav
851254587Sdelphij	return (err);
852254587Sdelphij}
853260183Sdelphij
854260183Sdelphij/*
855323757Savg * Roll back this filesystem or volume to the specified snapshot,
856323757Savg * if possible.
857323757Savg *
858323757Savg * Return 0 on success or an errno on failure.
859323757Savg */
860323757Savgint
861323757Savglzc_rollback_to(const char *fsname, const char *snapname)
862323757Savg{
863323757Savg	nvlist_t *args;
864323757Savg	nvlist_t *result;
865323757Savg	int err;
866323757Savg
867323757Savg	args = fnvlist_alloc();
868323757Savg	fnvlist_add_string(args, "target", snapname);
869323757Savg	err = lzc_ioctl(ZFS_IOC_ROLLBACK, fsname, args, &result);
870323757Savg	nvlist_free(args);
871323757Savg	nvlist_free(result);
872323757Savg	return (err);
873323757Savg}
874323757Savg
875323757Savg/*
876260183Sdelphij * Creates bookmarks.
877260183Sdelphij *
878260183Sdelphij * The bookmarks nvlist maps from name of the bookmark (e.g. "pool/fs#bmark") to
879260183Sdelphij * the name of the snapshot (e.g. "pool/fs@snap").  All the bookmarks and
880260183Sdelphij * snapshots must be in the same pool.
881260183Sdelphij *
882260183Sdelphij * The returned results nvlist will have an entry for each bookmark that failed.
883260183Sdelphij * The value will be the (int32) error code.
884260183Sdelphij *
885260183Sdelphij * The return value will be 0 if all bookmarks were created, otherwise it will
886260183Sdelphij * be the errno of a (undetermined) bookmarks that failed.
887260183Sdelphij */
888260183Sdelphijint
889260183Sdelphijlzc_bookmark(nvlist_t *bookmarks, nvlist_t **errlist)
890260183Sdelphij{
891260183Sdelphij	nvpair_t *elem;
892260183Sdelphij	int error;
893307108Smav	char pool[ZFS_MAX_DATASET_NAME_LEN];
894260183Sdelphij
895260183Sdelphij	/* determine the pool name */
896260183Sdelphij	elem = nvlist_next_nvpair(bookmarks, NULL);
897260183Sdelphij	if (elem == NULL)
898260183Sdelphij		return (0);
899260183Sdelphij	(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
900260183Sdelphij	pool[strcspn(pool, "/#")] = '\0';
901260183Sdelphij
902260183Sdelphij	error = lzc_ioctl(ZFS_IOC_BOOKMARK, pool, bookmarks, errlist);
903260183Sdelphij
904260183Sdelphij	return (error);
905260183Sdelphij}
906260183Sdelphij
907260183Sdelphij/*
908260183Sdelphij * Retrieve bookmarks.
909260183Sdelphij *
910260183Sdelphij * Retrieve the list of bookmarks for the given file system. The props
911260183Sdelphij * parameter is an nvlist of property names (with no values) that will be
912260183Sdelphij * returned for each bookmark.
913260183Sdelphij *
914260183Sdelphij * The following are valid properties on bookmarks, all of which are numbers
915260183Sdelphij * (represented as uint64 in the nvlist)
916260183Sdelphij *
917260183Sdelphij * "guid" - globally unique identifier of the snapshot it refers to
918260183Sdelphij * "createtxg" - txg when the snapshot it refers to was created
919260183Sdelphij * "creation" - timestamp when the snapshot it refers to was created
920260183Sdelphij *
921260183Sdelphij * The format of the returned nvlist as follows:
922260183Sdelphij * <short name of bookmark> -> {
923260183Sdelphij *     <name of property> -> {
924260183Sdelphij *         "value" -> uint64
925260183Sdelphij *     }
926260183Sdelphij *  }
927260183Sdelphij */
928260183Sdelphijint
929260183Sdelphijlzc_get_bookmarks(const char *fsname, nvlist_t *props, nvlist_t **bmarks)
930260183Sdelphij{
931260183Sdelphij	return (lzc_ioctl(ZFS_IOC_GET_BOOKMARKS, fsname, props, bmarks));
932260183Sdelphij}
933260183Sdelphij
934260183Sdelphij/*
935260183Sdelphij * Destroys bookmarks.
936260183Sdelphij *
937260183Sdelphij * The keys in the bmarks nvlist are the bookmarks to be destroyed.
938260183Sdelphij * They must all be in the same pool.  Bookmarks are specified as
939260183Sdelphij * <fs>#<bmark>.
940260183Sdelphij *
941260183Sdelphij * Bookmarks that do not exist will be silently ignored.
942260183Sdelphij *
943260183Sdelphij * The return value will be 0 if all bookmarks that existed were destroyed.
944260183Sdelphij *
945260183Sdelphij * Otherwise the return value will be the errno of a (undetermined) bookmark
946260183Sdelphij * that failed, no bookmarks will be destroyed, and the errlist will have an
947260183Sdelphij * entry for each bookmarks that failed.  The value in the errlist will be
948260183Sdelphij * the (int32) error code.
949260183Sdelphij */
950260183Sdelphijint
951260183Sdelphijlzc_destroy_bookmarks(nvlist_t *bmarks, nvlist_t **errlist)
952260183Sdelphij{
953260183Sdelphij	nvpair_t *elem;
954260183Sdelphij	int error;
955307108Smav	char pool[ZFS_MAX_DATASET_NAME_LEN];
956260183Sdelphij
957260183Sdelphij	/* determine the pool name */
958260183Sdelphij	elem = nvlist_next_nvpair(bmarks, NULL);
959260183Sdelphij	if (elem == NULL)
960260183Sdelphij		return (0);
961260183Sdelphij	(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
962260183Sdelphij	pool[strcspn(pool, "/#")] = '\0';
963260183Sdelphij
964260183Sdelphij	error = lzc_ioctl(ZFS_IOC_DESTROY_BOOKMARKS, pool, bmarks, errlist);
965260183Sdelphij
966260183Sdelphij	return (error);
967260183Sdelphij}
968325534Savg
969329484Smavstatic int
970329484Smavlzc_channel_program_impl(const char *pool, const char *program, boolean_t sync,
971329484Smav    uint64_t instrlimit, uint64_t memlimit, nvlist_t *argnvl, nvlist_t **outnvl)
972329484Smav{
973329484Smav	int error;
974329484Smav	nvlist_t *args;
975329484Smav
976329484Smav	args = fnvlist_alloc();
977329484Smav	fnvlist_add_string(args, ZCP_ARG_PROGRAM, program);
978329484Smav	fnvlist_add_nvlist(args, ZCP_ARG_ARGLIST, argnvl);
979329484Smav	fnvlist_add_boolean_value(args, ZCP_ARG_SYNC, sync);
980329484Smav	fnvlist_add_uint64(args, ZCP_ARG_INSTRLIMIT, instrlimit);
981329484Smav	fnvlist_add_uint64(args, ZCP_ARG_MEMLIMIT, memlimit);
982329484Smav	error = lzc_ioctl(ZFS_IOC_CHANNEL_PROGRAM, pool, args, outnvl);
983329484Smav	fnvlist_free(args);
984329484Smav
985329484Smav	return (error);
986329484Smav}
987329484Smav
988325534Savg/*
989325534Savg * Executes a channel program.
990325534Savg *
991325534Savg * If this function returns 0 the channel program was successfully loaded and
992325534Savg * ran without failing. Note that individual commands the channel program ran
993325534Savg * may have failed and the channel program is responsible for reporting such
994325534Savg * errors through outnvl if they are important.
995325534Savg *
996325534Savg * This method may also return:
997325534Savg *
998325534Savg * EINVAL   The program contains syntax errors, or an invalid memory or time
999325534Savg *          limit was given. No part of the channel program was executed.
1000325534Savg *          If caused by syntax errors, 'outnvl' contains information about the
1001325534Savg *          errors.
1002325534Savg *
1003325534Savg * EDOM     The program was executed, but encountered a runtime error, such as
1004325534Savg *          calling a function with incorrect arguments, invoking the error()
1005325534Savg *          function directly, failing an assert() command, etc. Some portion
1006325534Savg *          of the channel program may have executed and committed changes.
1007325534Savg *          Information about the failure can be found in 'outnvl'.
1008325534Savg *
1009325534Savg * ENOMEM   The program fully executed, but the output buffer was not large
1010325534Savg *          enough to store the returned value. No output is returned through
1011325534Savg *          'outnvl'.
1012325534Savg *
1013325534Savg * ENOSPC   The program was terminated because it exceeded its memory usage
1014325534Savg *          limit. Some portion of the channel program may have executed and
1015325534Savg *          committed changes to disk. No output is returned through 'outnvl'.
1016325534Savg *
1017325534Savg * ETIMEDOUT The program was terminated because it exceeded its Lua instruction
1018325534Savg *           limit. Some portion of the channel program may have executed and
1019325534Savg *           committed changes to disk. No output is returned through 'outnvl'.
1020325534Savg */
1021325534Savgint
1022325534Savglzc_channel_program(const char *pool, const char *program, uint64_t instrlimit,
1023325534Savg    uint64_t memlimit, nvlist_t *argnvl, nvlist_t **outnvl)
1024325534Savg{
1025329484Smav	return (lzc_channel_program_impl(pool, program, B_TRUE, instrlimit,
1026329484Smav	    memlimit, argnvl, outnvl));
1027329484Smav}
1028325534Savg
1029329484Smav/*
1030332547Smav * Creates a checkpoint for the specified pool.
1031332547Smav *
1032332547Smav * If this function returns 0 the pool was successfully checkpointed.
1033332547Smav *
1034332547Smav * This method may also return:
1035332547Smav *
1036332547Smav * ZFS_ERR_CHECKPOINT_EXISTS
1037332547Smav *	The pool already has a checkpoint. A pools can only have one
1038332547Smav *	checkpoint at most, at any given time.
1039332547Smav *
1040332547Smav * ZFS_ERR_DISCARDING_CHECKPOINT
1041332547Smav * 	ZFS is in the middle of discarding a checkpoint for this pool.
1042332547Smav * 	The pool can be checkpointed again once the discard is done.
1043332547Smav *
1044332547Smav * ZFS_DEVRM_IN_PROGRESS
1045332547Smav * 	A vdev is currently being removed. The pool cannot be
1046332547Smav * 	checkpointed until the device removal is done.
1047332547Smav *
1048332547Smav * ZFS_VDEV_TOO_BIG
1049332547Smav * 	One or more top-level vdevs exceed the maximum vdev size
1050332547Smav * 	supported for this feature.
1051332547Smav */
1052332547Smavint
1053332547Smavlzc_pool_checkpoint(const char *pool)
1054332547Smav{
1055332547Smav	int error;
1056332547Smav
1057332547Smav	nvlist_t *result = NULL;
1058332547Smav	nvlist_t *args = fnvlist_alloc();
1059332547Smav
1060332547Smav	error = lzc_ioctl(ZFS_IOC_POOL_CHECKPOINT, pool, args, &result);
1061332547Smav
1062332547Smav	fnvlist_free(args);
1063332547Smav	fnvlist_free(result);
1064332547Smav
1065332547Smav	return (error);
1066332547Smav}
1067332547Smav
1068332547Smav/*
1069332547Smav * Discard the checkpoint from the specified pool.
1070332547Smav *
1071332547Smav * If this function returns 0 the checkpoint was successfully discarded.
1072332547Smav *
1073332547Smav * This method may also return:
1074332547Smav *
1075332547Smav * ZFS_ERR_NO_CHECKPOINT
1076332547Smav * 	The pool does not have a checkpoint.
1077332547Smav *
1078332547Smav * ZFS_ERR_DISCARDING_CHECKPOINT
1079332547Smav * 	ZFS is already in the middle of discarding the checkpoint.
1080332547Smav */
1081332547Smavint
1082332547Smavlzc_pool_checkpoint_discard(const char *pool)
1083332547Smav{
1084332547Smav	int error;
1085332547Smav
1086332547Smav	nvlist_t *result = NULL;
1087332547Smav	nvlist_t *args = fnvlist_alloc();
1088332547Smav
1089332547Smav	error = lzc_ioctl(ZFS_IOC_POOL_DISCARD_CHECKPOINT, pool, args, &result);
1090332547Smav
1091332547Smav	fnvlist_free(args);
1092332547Smav	fnvlist_free(result);
1093332547Smav
1094332547Smav	return (error);
1095332547Smav}
1096332547Smav
1097332547Smav/*
1098329484Smav * Executes a read-only channel program.
1099329484Smav *
1100329484Smav * A read-only channel program works programmatically the same way as a
1101329484Smav * normal channel program executed with lzc_channel_program(). The only
1102329484Smav * difference is it runs exclusively in open-context and therefore can
1103329484Smav * return faster. The downside to that, is that the program cannot change
1104329484Smav * on-disk state by calling functions from the zfs.sync submodule.
1105329484Smav *
1106329484Smav * The return values of this function (and their meaning) are exactly the
1107329484Smav * same as the ones described in lzc_channel_program().
1108329484Smav */
1109329484Smavint
1110329484Smavlzc_channel_program_nosync(const char *pool, const char *program,
1111329484Smav    uint64_t timeout, uint64_t memlimit, nvlist_t *argnvl, nvlist_t **outnvl)
1112329484Smav{
1113329484Smav	return (lzc_channel_program_impl(pool, program, B_FALSE, timeout,
1114329484Smav	    memlimit, argnvl, outnvl));
1115325534Savg}
1116339111Smav
1117339111Smav/*
1118339111Smav * Changes initializing state.
1119339111Smav *
1120339111Smav * vdevs should be a list of (<key>, guid) where guid is a uint64 vdev GUID.
1121339111Smav * The key is ignored.
1122339111Smav *
1123339111Smav * If there are errors related to vdev arguments, per-vdev errors are returned
1124339111Smav * in an nvlist with the key "vdevs". Each error is a (guid, errno) pair where
1125339111Smav * guid is stringified with PRIu64, and errno is one of the following as
1126339111Smav * an int64_t:
1127339111Smav *	- ENODEV if the device was not found
1128339111Smav *	- EINVAL if the devices is not a leaf or is not concrete (e.g. missing)
1129339111Smav *	- EROFS if the device is not writeable
1130339111Smav *	- EBUSY start requested but the device is already being initialized
1131339111Smav *	- ESRCH cancel/suspend requested but device is not being initialized
1132339111Smav *
1133339111Smav * If the errlist is empty, then return value will be:
1134339111Smav *	- EINVAL if one or more arguments was invalid
1135339111Smav *	- Other spa_open failures
1136339111Smav *	- 0 if the operation succeeded
1137339111Smav */
1138339111Smavint
1139339111Smavlzc_initialize(const char *poolname, pool_initialize_func_t cmd_type,
1140339111Smav    nvlist_t *vdevs, nvlist_t **errlist)
1141339111Smav{
1142339111Smav	int error;
1143339111Smav	nvlist_t *args = fnvlist_alloc();
1144339111Smav	fnvlist_add_uint64(args, ZPOOL_INITIALIZE_COMMAND, (uint64_t)cmd_type);
1145339111Smav	fnvlist_add_nvlist(args, ZPOOL_INITIALIZE_VDEVS, vdevs);
1146339111Smav
1147339111Smav	error = lzc_ioctl(ZFS_IOC_POOL_INITIALIZE, poolname, args, errlist);
1148339111Smav
1149339111Smav	fnvlist_free(args);
1150339111Smav
1151339111Smav	return (error);
1152339111Smav}
1153