1169689Skan/*
2169689Skan * CDDL HEADER START
3169689Skan *
4169689Skan * The contents of this file are subject to the terms of the
5169689Skan * Common Development and Distribution License (the "License").
6169689Skan * You may not use this file except in compliance with the License.
7169689Skan *
8169689Skan * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9169689Skan * or https://opensource.org/licenses/CDDL-1.0.
10169689Skan * See the License for the specific language governing permissions
11169689Skan * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
24 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
25 * Copyright (c) 2014, 2022 by Delphix. All rights reserved.
26 * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
27 * Copyright 2017 RackTop Systems.
28 * Copyright (c) 2018 Datto Inc.
29 * Copyright 2018 OmniOS Community Edition (OmniOSce) Association.
30 */
31
32/*
33 * Routines to manage ZFS mounts.  We separate all the nasty routines that have
34 * to deal with the OS.  The following functions are the main entry points --
35 * they are used by mount and unmount and when changing a filesystem's
36 * mountpoint.
37 *
38 *	zfs_is_mounted()
39 *	zfs_mount()
40 *	zfs_mount_at()
41 *	zfs_unmount()
42 *	zfs_unmountall()
43 *
44 * This file also contains the functions used to manage sharing filesystems:
45 *
46 *	zfs_is_shared()
47 *	zfs_share()
48 *	zfs_unshare()
49 *	zfs_unshareall()
50 *	zfs_commit_shares()
51 *
52 * The following functions are available for pool consumers, and will
53 * mount/unmount and share/unshare all datasets within pool:
54 *
55 *	zpool_enable_datasets()
56 *	zpool_disable_datasets()
57 */
58
59#include <dirent.h>
60#include <dlfcn.h>
61#include <errno.h>
62#include <fcntl.h>
63#include <libgen.h>
64#include <libintl.h>
65#include <stdio.h>
66#include <stdlib.h>
67#include <string.h>
68#include <unistd.h>
69#include <zone.h>
70#include <sys/mntent.h>
71#include <sys/mount.h>
72#include <sys/stat.h>
73#include <sys/vfs.h>
74#include <sys/dsl_crypt.h>
75
76#include <libzfs.h>
77#include <libzutil.h>
78
79#include "libzfs_impl.h"
80#include <thread_pool.h>
81
82#include <libshare.h>
83#include <sys/systeminfo.h>
84#define	MAXISALEN	257	/* based on sysinfo(2) man page */
85
86static void zfs_mount_task(void *);
87
88static const proto_table_t proto_table[SA_PROTOCOL_COUNT] = {
89	[SA_PROTOCOL_NFS] =
90	    {ZFS_PROP_SHARENFS, EZFS_SHARENFSFAILED, EZFS_UNSHARENFSFAILED},
91	[SA_PROTOCOL_SMB] =
92	    {ZFS_PROP_SHARESMB, EZFS_SHARESMBFAILED, EZFS_UNSHARESMBFAILED},
93};
94
95static const enum sa_protocol share_all_proto[SA_PROTOCOL_COUNT + 1] = {
96	SA_PROTOCOL_NFS,
97	SA_PROTOCOL_SMB,
98	SA_NO_PROTOCOL
99};
100
101
102
103static boolean_t
104dir_is_empty_stat(const char *dirname)
105{
106	struct stat st;
107
108	/*
109	 * We only want to return false if the given path is a non empty
110	 * directory, all other errors are handled elsewhere.
111	 */
112	if (stat(dirname, &st) < 0 || !S_ISDIR(st.st_mode)) {
113		return (B_TRUE);
114	}
115
116	/*
117	 * An empty directory will still have two entries in it, one
118	 * entry for each of "." and "..".
119	 */
120	if (st.st_size > 2) {
121		return (B_FALSE);
122	}
123
124	return (B_TRUE);
125}
126
127static boolean_t
128dir_is_empty_readdir(const char *dirname)
129{
130	DIR *dirp;
131	struct dirent64 *dp;
132	int dirfd;
133
134	if ((dirfd = openat(AT_FDCWD, dirname,
135	    O_RDONLY | O_NDELAY | O_LARGEFILE | O_CLOEXEC, 0)) < 0) {
136		return (B_TRUE);
137	}
138
139	if ((dirp = fdopendir(dirfd)) == NULL) {
140		(void) close(dirfd);
141		return (B_TRUE);
142	}
143
144	while ((dp = readdir64(dirp)) != NULL) {
145
146		if (strcmp(dp->d_name, ".") == 0 ||
147		    strcmp(dp->d_name, "..") == 0)
148			continue;
149
150		(void) closedir(dirp);
151		return (B_FALSE);
152	}
153
154	(void) closedir(dirp);
155	return (B_TRUE);
156}
157
158/*
159 * Returns true if the specified directory is empty.  If we can't open the
160 * directory at all, return true so that the mount can fail with a more
161 * informative error message.
162 */
163static boolean_t
164dir_is_empty(const char *dirname)
165{
166	struct statfs64 st;
167
168	/*
169	 * If the statvfs call fails or the filesystem is not a ZFS
170	 * filesystem, fall back to the slow path which uses readdir.
171	 */
172	if ((statfs64(dirname, &st) != 0) ||
173	    (st.f_type != ZFS_SUPER_MAGIC)) {
174		return (dir_is_empty_readdir(dirname));
175	}
176
177	/*
178	 * At this point, we know the provided path is on a ZFS
179	 * filesystem, so we can use stat instead of readdir to
180	 * determine if the directory is empty or not. We try to avoid
181	 * using readdir because that requires opening "dirname"; this
182	 * open file descriptor can potentially end up in a child
183	 * process if there's a concurrent fork, thus preventing the
184	 * zfs_mount() from otherwise succeeding (the open file
185	 * descriptor inherited by the child process will cause the
186	 * parent's mount to fail with EBUSY). The performance
187	 * implications of replacing the open, read, and close with a
188	 * single stat is nice; but is not the main motivation for the
189	 * added complexity.
190	 */
191	return (dir_is_empty_stat(dirname));
192}
193
194/*
195 * Checks to see if the mount is active.  If the filesystem is mounted, we fill
196 * in 'where' with the current mountpoint, and return 1.  Otherwise, we return
197 * 0.
198 */
199boolean_t
200is_mounted(libzfs_handle_t *zfs_hdl, const char *special, char **where)
201{
202	struct mnttab entry;
203
204	if (libzfs_mnttab_find(zfs_hdl, special, &entry) != 0)
205		return (B_FALSE);
206
207	if (where != NULL)
208		*where = zfs_strdup(zfs_hdl, entry.mnt_mountp);
209
210	return (B_TRUE);
211}
212
213boolean_t
214zfs_is_mounted(zfs_handle_t *zhp, char **where)
215{
216	return (is_mounted(zhp->zfs_hdl, zfs_get_name(zhp), where));
217}
218
219/*
220 * Checks any higher order concerns about whether the given dataset is
221 * mountable, false otherwise.  zfs_is_mountable_internal specifically assumes
222 * that the caller has verified the sanity of mounting the dataset at
223 * its mountpoint to the extent the caller wants.
224 */
225static boolean_t
226zfs_is_mountable_internal(zfs_handle_t *zhp)
227{
228	if (zfs_prop_get_int(zhp, ZFS_PROP_ZONED) &&
229	    getzoneid() == GLOBAL_ZONEID)
230		return (B_FALSE);
231
232	return (B_TRUE);
233}
234
235/*
236 * Returns true if the given dataset is mountable, false otherwise.  Returns the
237 * mountpoint in 'buf'.
238 */
239static boolean_t
240zfs_is_mountable(zfs_handle_t *zhp, char *buf, size_t buflen,
241    zprop_source_t *source, int flags)
242{
243	char sourceloc[MAXNAMELEN];
244	zprop_source_t sourcetype;
245
246	if (!zfs_prop_valid_for_type(ZFS_PROP_MOUNTPOINT, zhp->zfs_type,
247	    B_FALSE))
248		return (B_FALSE);
249
250	verify(zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, buf, buflen,
251	    &sourcetype, sourceloc, sizeof (sourceloc), B_FALSE) == 0);
252
253	if (strcmp(buf, ZFS_MOUNTPOINT_NONE) == 0 ||
254	    strcmp(buf, ZFS_MOUNTPOINT_LEGACY) == 0)
255		return (B_FALSE);
256
257	if (zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT) == ZFS_CANMOUNT_OFF)
258		return (B_FALSE);
259
260	if (!zfs_is_mountable_internal(zhp))
261		return (B_FALSE);
262
263	if (zfs_prop_get_int(zhp, ZFS_PROP_REDACTED) && !(flags & MS_FORCE))
264		return (B_FALSE);
265
266	if (source)
267		*source = sourcetype;
268
269	return (B_TRUE);
270}
271
272/*
273 * The filesystem is mounted by invoking the system mount utility rather
274 * than by the system call mount(2).  This ensures that the /etc/mtab
275 * file is correctly locked for the update.  Performing our own locking
276 * and /etc/mtab update requires making an unsafe assumption about how
277 * the mount utility performs its locking.  Unfortunately, this also means
278 * in the case of a mount failure we do not have the exact errno.  We must
279 * make due with return value from the mount process.
280 *
281 * In the long term a shared library called libmount is under development
282 * which provides a common API to address the locking and errno issues.
283 * Once the standard mount utility has been updated to use this library
284 * we can add an autoconf check to conditionally use it.
285 *
286 * http://www.kernel.org/pub/linux/utils/util-linux/libmount-docs/index.html
287 */
288
289static int
290zfs_add_option(zfs_handle_t *zhp, char *options, int len,
291    zfs_prop_t prop, const char *on, const char *off)
292{
293	const char *source;
294	uint64_t value;
295
296	/* Skip adding duplicate default options */
297	if ((strstr(options, on) != NULL) || (strstr(options, off) != NULL))
298		return (0);
299
300	/*
301	 * zfs_prop_get_int() is not used to ensure our mount options
302	 * are not influenced by the current /proc/self/mounts contents.
303	 */
304	value = getprop_uint64(zhp, prop, &source);
305
306	(void) strlcat(options, ",", len);
307	(void) strlcat(options, value ? on : off, len);
308
309	return (0);
310}
311
312static int
313zfs_add_options(zfs_handle_t *zhp, char *options, int len)
314{
315	int error = 0;
316
317	error = zfs_add_option(zhp, options, len,
318	    ZFS_PROP_ATIME, MNTOPT_ATIME, MNTOPT_NOATIME);
319	/*
320	 * don't add relatime/strictatime when atime=off, otherwise strictatime
321	 * will force atime=on
322	 */
323	if (strstr(options, MNTOPT_NOATIME) == NULL) {
324		error = zfs_add_option(zhp, options, len,
325		    ZFS_PROP_RELATIME, MNTOPT_RELATIME, MNTOPT_STRICTATIME);
326	}
327	error = error ? error : zfs_add_option(zhp, options, len,
328	    ZFS_PROP_DEVICES, MNTOPT_DEVICES, MNTOPT_NODEVICES);
329	error = error ? error : zfs_add_option(zhp, options, len,
330	    ZFS_PROP_EXEC, MNTOPT_EXEC, MNTOPT_NOEXEC);
331	error = error ? error : zfs_add_option(zhp, options, len,
332	    ZFS_PROP_READONLY, MNTOPT_RO, MNTOPT_RW);
333	error = error ? error : zfs_add_option(zhp, options, len,
334	    ZFS_PROP_SETUID, MNTOPT_SETUID, MNTOPT_NOSETUID);
335	error = error ? error : zfs_add_option(zhp, options, len,
336	    ZFS_PROP_NBMAND, MNTOPT_NBMAND, MNTOPT_NONBMAND);
337
338	return (error);
339}
340
341int
342zfs_mount(zfs_handle_t *zhp, const char *options, int flags)
343{
344	char mountpoint[ZFS_MAXPROPLEN];
345
346	if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint), NULL,
347	    flags))
348		return (0);
349
350	return (zfs_mount_at(zhp, options, flags, mountpoint));
351}
352
353/*
354 * Mount the given filesystem.
355 */
356int
357zfs_mount_at(zfs_handle_t *zhp, const char *options, int flags,
358    const char *mountpoint)
359{
360	struct stat buf;
361	char mntopts[MNT_LINE_MAX];
362	char overlay[ZFS_MAXPROPLEN];
363	char prop_encroot[MAXNAMELEN];
364	boolean_t is_encroot;
365	zfs_handle_t *encroot_hp = zhp;
366	libzfs_handle_t *hdl = zhp->zfs_hdl;
367	uint64_t keystatus;
368	int remount = 0, rc;
369
370	if (options == NULL) {
371		(void) strlcpy(mntopts, MNTOPT_DEFAULTS, sizeof (mntopts));
372	} else {
373		(void) strlcpy(mntopts, options, sizeof (mntopts));
374	}
375
376	if (strstr(mntopts, MNTOPT_REMOUNT) != NULL)
377		remount = 1;
378
379	/* Potentially duplicates some checks if invoked by zfs_mount(). */
380	if (!zfs_is_mountable_internal(zhp))
381		return (0);
382
383	/*
384	 * If the pool is imported read-only then all mounts must be read-only
385	 */
386	if (zpool_get_prop_int(zhp->zpool_hdl, ZPOOL_PROP_READONLY, NULL))
387		(void) strlcat(mntopts, "," MNTOPT_RO, sizeof (mntopts));
388
389	/*
390	 * Append default mount options which apply to the mount point.
391	 * This is done because under Linux (unlike Solaris) multiple mount
392	 * points may reference a single super block.  This means that just
393	 * given a super block there is no back reference to update the per
394	 * mount point options.
395	 */
396	rc = zfs_add_options(zhp, mntopts, sizeof (mntopts));
397	if (rc) {
398		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
399		    "default options unavailable"));
400		return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED,
401		    dgettext(TEXT_DOMAIN, "cannot mount '%s'"),
402		    mountpoint));
403	}
404
405	/*
406	 * If the filesystem is encrypted the key must be loaded  in order to
407	 * mount. If the key isn't loaded, the MS_CRYPT flag decides whether
408	 * or not we attempt to load the keys. Note: we must call
409	 * zfs_refresh_properties() here since some callers of this function
410	 * (most notably zpool_enable_datasets()) may implicitly load our key
411	 * by loading the parent's key first.
412	 */
413	if (zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) != ZIO_CRYPT_OFF) {
414		zfs_refresh_properties(zhp);
415		keystatus = zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS);
416
417		/*
418		 * If the key is unavailable and MS_CRYPT is set give the
419		 * user a chance to enter the key. Otherwise just fail
420		 * immediately.
421		 */
422		if (keystatus == ZFS_KEYSTATUS_UNAVAILABLE) {
423			if (flags & MS_CRYPT) {
424				rc = zfs_crypto_get_encryption_root(zhp,
425				    &is_encroot, prop_encroot);
426				if (rc) {
427					zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
428					    "Failed to get encryption root for "
429					    "'%s'."), zfs_get_name(zhp));
430					return (rc);
431				}
432
433				if (!is_encroot) {
434					encroot_hp = zfs_open(hdl, prop_encroot,
435					    ZFS_TYPE_DATASET);
436					if (encroot_hp == NULL)
437						return (hdl->libzfs_error);
438				}
439
440				rc = zfs_crypto_load_key(encroot_hp,
441				    B_FALSE, NULL);
442
443				if (!is_encroot)
444					zfs_close(encroot_hp);
445				if (rc)
446					return (rc);
447			} else {
448				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
449				    "encryption key not loaded"));
450				return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED,
451				    dgettext(TEXT_DOMAIN, "cannot mount '%s'"),
452				    mountpoint));
453			}
454		}
455
456	}
457
458	/*
459	 * Append zfsutil option so the mount helper allow the mount
460	 */
461	strlcat(mntopts, "," MNTOPT_ZFSUTIL, sizeof (mntopts));
462
463	/* Create the directory if it doesn't already exist */
464	if (lstat(mountpoint, &buf) != 0) {
465		if (mkdirp(mountpoint, 0755) != 0) {
466			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
467			    "failed to create mountpoint: %s"),
468			    zfs_strerror(errno));
469			return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED,
470			    dgettext(TEXT_DOMAIN, "cannot mount '%s'"),
471			    mountpoint));
472		}
473	}
474
475	/*
476	 * Overlay mounts are enabled by default but may be disabled
477	 * via the 'overlay' property. The -O flag remains for compatibility.
478	 */
479	if (!(flags & MS_OVERLAY)) {
480		if (zfs_prop_get(zhp, ZFS_PROP_OVERLAY, overlay,
481		    sizeof (overlay), NULL, NULL, 0, B_FALSE) == 0) {
482			if (strcmp(overlay, "on") == 0) {
483				flags |= MS_OVERLAY;
484			}
485		}
486	}
487
488	/*
489	 * Determine if the mountpoint is empty.  If so, refuse to perform the
490	 * mount.  We don't perform this check if 'remount' is
491	 * specified or if overlay option (-O) is given
492	 */
493	if ((flags & MS_OVERLAY) == 0 && !remount &&
494	    !dir_is_empty(mountpoint)) {
495		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
496		    "directory is not empty"));
497		return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED,
498		    dgettext(TEXT_DOMAIN, "cannot mount '%s'"), mountpoint));
499	}
500
501	/* perform the mount */
502	rc = do_mount(zhp, mountpoint, mntopts, flags);
503	if (rc) {
504		/*
505		 * Generic errors are nasty, but there are just way too many
506		 * from mount(), and they're well-understood.  We pick a few
507		 * common ones to improve upon.
508		 */
509		if (rc == EBUSY) {
510			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
511			    "mountpoint or dataset is busy"));
512		} else if (rc == EPERM) {
513			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
514			    "Insufficient privileges"));
515		} else if (rc == ENOTSUP) {
516			int spa_version;
517
518			VERIFY(zfs_spa_version(zhp, &spa_version) == 0);
519			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
520			    "Can't mount a version %llu "
521			    "file system on a version %d pool. Pool must be"
522			    " upgraded to mount this file system."),
523			    (u_longlong_t)zfs_prop_get_int(zhp,
524			    ZFS_PROP_VERSION), spa_version);
525		} else {
526			zfs_error_aux(hdl, "%s", zfs_strerror(rc));
527		}
528		return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED,
529		    dgettext(TEXT_DOMAIN, "cannot mount '%s'"),
530		    zhp->zfs_name));
531	}
532
533	/* remove the mounted entry before re-adding on remount */
534	if (remount)
535		libzfs_mnttab_remove(hdl, zhp->zfs_name);
536
537	/* add the mounted entry into our cache */
538	libzfs_mnttab_add(hdl, zfs_get_name(zhp), mountpoint, mntopts);
539	return (0);
540}
541
542/*
543 * Unmount a single filesystem.
544 */
545static int
546unmount_one(zfs_handle_t *zhp, const char *mountpoint, int flags)
547{
548	int error;
549
550	error = do_unmount(zhp, mountpoint, flags);
551	if (error != 0) {
552		int libzfs_err;
553
554		switch (error) {
555		case EBUSY:
556			libzfs_err = EZFS_BUSY;
557			break;
558		case EIO:
559			libzfs_err = EZFS_IO;
560			break;
561		case ENOENT:
562			libzfs_err = EZFS_NOENT;
563			break;
564		case ENOMEM:
565			libzfs_err = EZFS_NOMEM;
566			break;
567		case EPERM:
568			libzfs_err = EZFS_PERM;
569			break;
570		default:
571			libzfs_err = EZFS_UMOUNTFAILED;
572		}
573		if (zhp) {
574			return (zfs_error_fmt(zhp->zfs_hdl, libzfs_err,
575			    dgettext(TEXT_DOMAIN, "cannot unmount '%s'"),
576			    mountpoint));
577		} else {
578			return (-1);
579		}
580	}
581
582	return (0);
583}
584
585/*
586 * Unmount the given filesystem.
587 */
588int
589zfs_unmount(zfs_handle_t *zhp, const char *mountpoint, int flags)
590{
591	libzfs_handle_t *hdl = zhp->zfs_hdl;
592	struct mnttab entry;
593	char *mntpt = NULL;
594	boolean_t encroot, unmounted = B_FALSE;
595
596	/* check to see if we need to unmount the filesystem */
597	if (mountpoint != NULL || ((zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) &&
598	    libzfs_mnttab_find(hdl, zhp->zfs_name, &entry) == 0)) {
599		/*
600		 * mountpoint may have come from a call to
601		 * getmnt/getmntany if it isn't NULL. If it is NULL,
602		 * we know it comes from libzfs_mnttab_find which can
603		 * then get freed later. We strdup it to play it safe.
604		 */
605		if (mountpoint == NULL)
606			mntpt = zfs_strdup(hdl, entry.mnt_mountp);
607		else
608			mntpt = zfs_strdup(hdl, mountpoint);
609
610		/*
611		 * Unshare and unmount the filesystem
612		 */
613		if (zfs_unshare(zhp, mntpt, share_all_proto) != 0) {
614			free(mntpt);
615			return (-1);
616		}
617		zfs_commit_shares(NULL);
618
619		if (unmount_one(zhp, mntpt, flags) != 0) {
620			free(mntpt);
621			(void) zfs_share(zhp, NULL);
622			zfs_commit_shares(NULL);
623			return (-1);
624		}
625
626		libzfs_mnttab_remove(hdl, zhp->zfs_name);
627		free(mntpt);
628		unmounted = B_TRUE;
629	}
630
631	/*
632	 * If the MS_CRYPT flag is provided we must ensure we attempt to
633	 * unload the dataset's key regardless of whether we did any work
634	 * to unmount it. We only do this for encryption roots.
635	 */
636	if ((flags & MS_CRYPT) != 0 &&
637	    zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) != ZIO_CRYPT_OFF) {
638		zfs_refresh_properties(zhp);
639
640		if (zfs_crypto_get_encryption_root(zhp, &encroot, NULL) != 0 &&
641		    unmounted) {
642			(void) zfs_mount(zhp, NULL, 0);
643			return (-1);
644		}
645
646		if (encroot && zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS) ==
647		    ZFS_KEYSTATUS_AVAILABLE &&
648		    zfs_crypto_unload_key(zhp) != 0) {
649			(void) zfs_mount(zhp, NULL, 0);
650			return (-1);
651		}
652	}
653
654	zpool_disable_volume_os(zhp->zfs_name);
655
656	return (0);
657}
658
659/*
660 * Unmount this filesystem and any children inheriting the mountpoint property.
661 * To do this, just act like we're changing the mountpoint property, but don't
662 * remount the filesystems afterwards.
663 */
664int
665zfs_unmountall(zfs_handle_t *zhp, int flags)
666{
667	prop_changelist_t *clp;
668	int ret;
669
670	clp = changelist_gather(zhp, ZFS_PROP_MOUNTPOINT,
671	    CL_GATHER_ITER_MOUNTED, flags);
672	if (clp == NULL)
673		return (-1);
674
675	ret = changelist_prefix(clp);
676	changelist_free(clp);
677
678	return (ret);
679}
680
681/*
682 * Unshare a filesystem by mountpoint.
683 */
684static int
685unshare_one(libzfs_handle_t *hdl, const char *name, const char *mountpoint,
686    enum sa_protocol proto)
687{
688	int err = sa_disable_share(mountpoint, proto);
689	if (err != SA_OK)
690		return (zfs_error_fmt(hdl, proto_table[proto].p_unshare_err,
691		    dgettext(TEXT_DOMAIN, "cannot unshare '%s': %s"),
692		    name, sa_errorstr(err)));
693
694	return (0);
695}
696
697/*
698 * Share the given filesystem according to the options in the specified
699 * protocol specific properties (sharenfs, sharesmb).  We rely
700 * on "libshare" to do the dirty work for us.
701 */
702int
703zfs_share(zfs_handle_t *zhp, const enum sa_protocol *proto)
704{
705	char mountpoint[ZFS_MAXPROPLEN];
706	char shareopts[ZFS_MAXPROPLEN];
707	char sourcestr[ZFS_MAXPROPLEN];
708	const enum sa_protocol *curr_proto;
709	zprop_source_t sourcetype;
710	int err = 0;
711
712	if (proto == NULL)
713		proto = share_all_proto;
714
715	if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint), NULL, 0))
716		return (0);
717
718	for (curr_proto = proto; *curr_proto != SA_NO_PROTOCOL; curr_proto++) {
719		/*
720		 * Return success if there are no share options.
721		 */
722		if (zfs_prop_get(zhp, proto_table[*curr_proto].p_prop,
723		    shareopts, sizeof (shareopts), &sourcetype, sourcestr,
724		    ZFS_MAXPROPLEN, B_FALSE) != 0 ||
725		    strcmp(shareopts, "off") == 0)
726			continue;
727
728		/*
729		 * If the 'zoned' property is set, then zfs_is_mountable()
730		 * will have already bailed out if we are in the global zone.
731		 * But local zones cannot be NFS servers, so we ignore it for
732		 * local zones as well.
733		 */
734		if (zfs_prop_get_int(zhp, ZFS_PROP_ZONED))
735			continue;
736
737		err = sa_enable_share(zfs_get_name(zhp), mountpoint, shareopts,
738		    *curr_proto);
739		if (err != SA_OK) {
740			return (zfs_error_fmt(zhp->zfs_hdl,
741			    proto_table[*curr_proto].p_share_err,
742			    dgettext(TEXT_DOMAIN, "cannot share '%s: %s'"),
743			    zfs_get_name(zhp), sa_errorstr(err)));
744		}
745
746	}
747	return (0);
748}
749
750/*
751 * Check to see if the filesystem is currently shared.
752 */
753boolean_t
754zfs_is_shared(zfs_handle_t *zhp, char **where,
755    const enum sa_protocol *proto)
756{
757	char *mountpoint;
758	if (proto == NULL)
759		proto = share_all_proto;
760
761	if (ZFS_IS_VOLUME(zhp))
762		return (B_FALSE);
763
764	if (!zfs_is_mounted(zhp, &mountpoint))
765		return (B_FALSE);
766
767	for (const enum sa_protocol *p = proto; *p != SA_NO_PROTOCOL; ++p)
768		if (sa_is_shared(mountpoint, *p)) {
769			if (where != NULL)
770				*where = mountpoint;
771			else
772				free(mountpoint);
773			return (B_TRUE);
774		}
775
776	free(mountpoint);
777	return (B_FALSE);
778}
779
780void
781zfs_commit_shares(const enum sa_protocol *proto)
782{
783	if (proto == NULL)
784		proto = share_all_proto;
785
786	for (const enum sa_protocol *p = proto; *p != SA_NO_PROTOCOL; ++p)
787		sa_commit_shares(*p);
788}
789
790void
791zfs_truncate_shares(const enum sa_protocol *proto)
792{
793	if (proto == NULL)
794		proto = share_all_proto;
795
796	for (const enum sa_protocol *p = proto; *p != SA_NO_PROTOCOL; ++p)
797		sa_truncate_shares(*p);
798}
799
800/*
801 * Unshare the given filesystem.
802 */
803int
804zfs_unshare(zfs_handle_t *zhp, const char *mountpoint,
805    const enum sa_protocol *proto)
806{
807	libzfs_handle_t *hdl = zhp->zfs_hdl;
808	struct mnttab entry;
809
810	if (proto == NULL)
811		proto = share_all_proto;
812
813	if (mountpoint != NULL || ((zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) &&
814	    libzfs_mnttab_find(hdl, zfs_get_name(zhp), &entry) == 0)) {
815
816		/* check to see if need to unmount the filesystem */
817		const char *mntpt = mountpoint ?: entry.mnt_mountp;
818
819		for (const enum sa_protocol *curr_proto = proto;
820		    *curr_proto != SA_NO_PROTOCOL; curr_proto++)
821			if (sa_is_shared(mntpt, *curr_proto) &&
822			    unshare_one(hdl, zhp->zfs_name,
823			    mntpt, *curr_proto) != 0)
824					return (-1);
825	}
826
827	return (0);
828}
829
830/*
831 * Same as zfs_unmountall(), but for NFS and SMB unshares.
832 */
833int
834zfs_unshareall(zfs_handle_t *zhp, const enum sa_protocol *proto)
835{
836	prop_changelist_t *clp;
837	int ret;
838
839	if (proto == NULL)
840		proto = share_all_proto;
841
842	clp = changelist_gather(zhp, ZFS_PROP_SHARENFS, 0, 0);
843	if (clp == NULL)
844		return (-1);
845
846	ret = changelist_unshare(clp, proto);
847	changelist_free(clp);
848
849	return (ret);
850}
851
852/*
853 * Remove the mountpoint associated with the current dataset, if necessary.
854 * We only remove the underlying directory if:
855 *
856 *	- The mountpoint is not 'none' or 'legacy'
857 *	- The mountpoint is non-empty
858 *	- The mountpoint is the default or inherited
859 *	- The 'zoned' property is set, or we're in a local zone
860 *
861 * Any other directories we leave alone.
862 */
863void
864remove_mountpoint(zfs_handle_t *zhp)
865{
866	char mountpoint[ZFS_MAXPROPLEN];
867	zprop_source_t source;
868
869	if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint),
870	    &source, 0))
871		return;
872
873	if (source == ZPROP_SRC_DEFAULT ||
874	    source == ZPROP_SRC_INHERITED) {
875		/*
876		 * Try to remove the directory, silently ignoring any errors.
877		 * The filesystem may have since been removed or moved around,
878		 * and this error isn't really useful to the administrator in
879		 * any way.
880		 */
881		(void) rmdir(mountpoint);
882	}
883}
884
885/*
886 * Add the given zfs handle to the cb_handles array, dynamically reallocating
887 * the array if it is out of space.
888 */
889void
890libzfs_add_handle(get_all_cb_t *cbp, zfs_handle_t *zhp)
891{
892	if (cbp->cb_alloc == cbp->cb_used) {
893		size_t newsz;
894		zfs_handle_t **newhandles;
895
896		newsz = cbp->cb_alloc != 0 ? cbp->cb_alloc * 2 : 64;
897		newhandles = zfs_realloc(zhp->zfs_hdl,
898		    cbp->cb_handles, cbp->cb_alloc * sizeof (zfs_handle_t *),
899		    newsz * sizeof (zfs_handle_t *));
900		cbp->cb_handles = newhandles;
901		cbp->cb_alloc = newsz;
902	}
903	cbp->cb_handles[cbp->cb_used++] = zhp;
904}
905
906/*
907 * Recursive helper function used during file system enumeration
908 */
909static int
910zfs_iter_cb(zfs_handle_t *zhp, void *data)
911{
912	get_all_cb_t *cbp = data;
913
914	if (!(zfs_get_type(zhp) & ZFS_TYPE_FILESYSTEM)) {
915		zfs_close(zhp);
916		return (0);
917	}
918
919	if (zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT) == ZFS_CANMOUNT_NOAUTO) {
920		zfs_close(zhp);
921		return (0);
922	}
923
924	if (zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS) ==
925	    ZFS_KEYSTATUS_UNAVAILABLE) {
926		zfs_close(zhp);
927		return (0);
928	}
929
930	/*
931	 * If this filesystem is inconsistent and has a receive resume
932	 * token, we can not mount it.
933	 */
934	if (zfs_prop_get_int(zhp, ZFS_PROP_INCONSISTENT) &&
935	    zfs_prop_get(zhp, ZFS_PROP_RECEIVE_RESUME_TOKEN,
936	    NULL, 0, NULL, NULL, 0, B_TRUE) == 0) {
937		zfs_close(zhp);
938		return (0);
939	}
940
941	libzfs_add_handle(cbp, zhp);
942	if (zfs_iter_filesystems_v2(zhp, 0, zfs_iter_cb, cbp) != 0) {
943		zfs_close(zhp);
944		return (-1);
945	}
946	return (0);
947}
948
949/*
950 * Sort comparator that compares two mountpoint paths. We sort these paths so
951 * that subdirectories immediately follow their parents. This means that we
952 * effectively treat the '/' character as the lowest value non-nul char.
953 * Since filesystems from non-global zones can have the same mountpoint
954 * as other filesystems, the comparator sorts global zone filesystems to
955 * the top of the list. This means that the global zone will traverse the
956 * filesystem list in the correct order and can stop when it sees the
957 * first zoned filesystem. In a non-global zone, only the delegated
958 * filesystems are seen.
959 *
960 * An example sorted list using this comparator would look like:
961 *
962 * /foo
963 * /foo/bar
964 * /foo/bar/baz
965 * /foo/baz
966 * /foo.bar
967 * /foo (NGZ1)
968 * /foo (NGZ2)
969 *
970 * The mounting code depends on this ordering to deterministically iterate
971 * over filesystems in order to spawn parallel mount tasks.
972 */
973static int
974mountpoint_cmp(const void *arga, const void *argb)
975{
976	zfs_handle_t *const *zap = arga;
977	zfs_handle_t *za = *zap;
978	zfs_handle_t *const *zbp = argb;
979	zfs_handle_t *zb = *zbp;
980	char mounta[MAXPATHLEN];
981	char mountb[MAXPATHLEN];
982	const char *a = mounta;
983	const char *b = mountb;
984	boolean_t gota, gotb;
985	uint64_t zoneda, zonedb;
986
987	zoneda = zfs_prop_get_int(za, ZFS_PROP_ZONED);
988	zonedb = zfs_prop_get_int(zb, ZFS_PROP_ZONED);
989	if (zoneda && !zonedb)
990		return (1);
991	if (!zoneda && zonedb)
992		return (-1);
993
994	gota = (zfs_get_type(za) == ZFS_TYPE_FILESYSTEM);
995	if (gota) {
996		verify(zfs_prop_get(za, ZFS_PROP_MOUNTPOINT, mounta,
997		    sizeof (mounta), NULL, NULL, 0, B_FALSE) == 0);
998	}
999	gotb = (zfs_get_type(zb) == ZFS_TYPE_FILESYSTEM);
1000	if (gotb) {
1001		verify(zfs_prop_get(zb, ZFS_PROP_MOUNTPOINT, mountb,
1002		    sizeof (mountb), NULL, NULL, 0, B_FALSE) == 0);
1003	}
1004
1005	if (gota && gotb) {
1006		while (*a != '\0' && (*a == *b)) {
1007			a++;
1008			b++;
1009		}
1010		if (*a == *b)
1011			return (0);
1012		if (*a == '\0')
1013			return (-1);
1014		if (*b == '\0')
1015			return (1);
1016		if (*a == '/')
1017			return (-1);
1018		if (*b == '/')
1019			return (1);
1020		return (*a < *b ? -1 : *a > *b);
1021	}
1022
1023	if (gota)
1024		return (-1);
1025	if (gotb)
1026		return (1);
1027
1028	/*
1029	 * If neither filesystem has a mountpoint, revert to sorting by
1030	 * dataset name.
1031	 */
1032	return (strcmp(zfs_get_name(za), zfs_get_name(zb)));
1033}
1034
1035/*
1036 * Return true if path2 is a child of path1 or path2 equals path1 or
1037 * path1 is "/" (path2 is always a child of "/").
1038 */
1039static boolean_t
1040libzfs_path_contains(const char *path1, const char *path2)
1041{
1042	return (strcmp(path1, path2) == 0 || strcmp(path1, "/") == 0 ||
1043	    (strstr(path2, path1) == path2 && path2[strlen(path1)] == '/'));
1044}
1045
1046/*
1047 * Given a mountpoint specified by idx in the handles array, find the first
1048 * non-descendent of that mountpoint and return its index. Descendant paths
1049 * start with the parent's path. This function relies on the ordering
1050 * enforced by mountpoint_cmp().
1051 */
1052static int
1053non_descendant_idx(zfs_handle_t **handles, size_t num_handles, int idx)
1054{
1055	char parent[ZFS_MAXPROPLEN];
1056	char child[ZFS_MAXPROPLEN];
1057	int i;
1058
1059	verify(zfs_prop_get(handles[idx], ZFS_PROP_MOUNTPOINT, parent,
1060	    sizeof (parent), NULL, NULL, 0, B_FALSE) == 0);
1061
1062	for (i = idx + 1; i < num_handles; i++) {
1063		verify(zfs_prop_get(handles[i], ZFS_PROP_MOUNTPOINT, child,
1064		    sizeof (child), NULL, NULL, 0, B_FALSE) == 0);
1065		if (!libzfs_path_contains(parent, child))
1066			break;
1067	}
1068	return (i);
1069}
1070
1071typedef struct mnt_param {
1072	libzfs_handle_t	*mnt_hdl;
1073	tpool_t		*mnt_tp;
1074	zfs_handle_t	**mnt_zhps; /* filesystems to mount */
1075	size_t		mnt_num_handles;
1076	int		mnt_idx;	/* Index of selected entry to mount */
1077	zfs_iter_f	mnt_func;
1078	void		*mnt_data;
1079} mnt_param_t;
1080
1081/*
1082 * Allocate and populate the parameter struct for mount function, and
1083 * schedule mounting of the entry selected by idx.
1084 */
1085static void
1086zfs_dispatch_mount(libzfs_handle_t *hdl, zfs_handle_t **handles,
1087    size_t num_handles, int idx, zfs_iter_f func, void *data, tpool_t *tp)
1088{
1089	mnt_param_t *mnt_param = zfs_alloc(hdl, sizeof (mnt_param_t));
1090
1091	mnt_param->mnt_hdl = hdl;
1092	mnt_param->mnt_tp = tp;
1093	mnt_param->mnt_zhps = handles;
1094	mnt_param->mnt_num_handles = num_handles;
1095	mnt_param->mnt_idx = idx;
1096	mnt_param->mnt_func = func;
1097	mnt_param->mnt_data = data;
1098
1099	if (tpool_dispatch(tp, zfs_mount_task, (void*)mnt_param)) {
1100		/* Could not dispatch to thread pool; execute directly */
1101		zfs_mount_task((void*)mnt_param);
1102	}
1103}
1104
1105/*
1106 * This is the structure used to keep state of mounting or sharing operations
1107 * during a call to zpool_enable_datasets().
1108 */
1109typedef struct mount_state {
1110	/*
1111	 * ms_mntstatus is set to -1 if any mount fails. While multiple threads
1112	 * could update this variable concurrently, no synchronization is
1113	 * needed as it's only ever set to -1.
1114	 */
1115	int		ms_mntstatus;
1116	int		ms_mntflags;
1117	const char	*ms_mntopts;
1118} mount_state_t;
1119
1120static int
1121zfs_mount_one(zfs_handle_t *zhp, void *arg)
1122{
1123	mount_state_t *ms = arg;
1124	int ret = 0;
1125
1126	/*
1127	 * don't attempt to mount encrypted datasets with
1128	 * unloaded keys
1129	 */
1130	if (zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS) ==
1131	    ZFS_KEYSTATUS_UNAVAILABLE)
1132		return (0);
1133
1134	if (zfs_mount(zhp, ms->ms_mntopts, ms->ms_mntflags) != 0)
1135		ret = ms->ms_mntstatus = -1;
1136	return (ret);
1137}
1138
1139static int
1140zfs_share_one(zfs_handle_t *zhp, void *arg)
1141{
1142	mount_state_t *ms = arg;
1143	int ret = 0;
1144
1145	if (zfs_share(zhp, NULL) != 0)
1146		ret = ms->ms_mntstatus = -1;
1147	return (ret);
1148}
1149
1150/*
1151 * Thread pool function to mount one file system. On completion, it finds and
1152 * schedules its children to be mounted. This depends on the sorting done in
1153 * zfs_foreach_mountpoint(). Note that the degenerate case (chain of entries
1154 * each descending from the previous) will have no parallelism since we always
1155 * have to wait for the parent to finish mounting before we can schedule
1156 * its children.
1157 */
1158static void
1159zfs_mount_task(void *arg)
1160{
1161	mnt_param_t *mp = arg;
1162	int idx = mp->mnt_idx;
1163	zfs_handle_t **handles = mp->mnt_zhps;
1164	size_t num_handles = mp->mnt_num_handles;
1165	char mountpoint[ZFS_MAXPROPLEN];
1166
1167	verify(zfs_prop_get(handles[idx], ZFS_PROP_MOUNTPOINT, mountpoint,
1168	    sizeof (mountpoint), NULL, NULL, 0, B_FALSE) == 0);
1169
1170	if (mp->mnt_func(handles[idx], mp->mnt_data) != 0)
1171		goto out;
1172
1173	/*
1174	 * We dispatch tasks to mount filesystems with mountpoints underneath
1175	 * this one. We do this by dispatching the next filesystem with a
1176	 * descendant mountpoint of the one we just mounted, then skip all of
1177	 * its descendants, dispatch the next descendant mountpoint, and so on.
1178	 * The non_descendant_idx() function skips over filesystems that are
1179	 * descendants of the filesystem we just dispatched.
1180	 */
1181	for (int i = idx + 1; i < num_handles;
1182	    i = non_descendant_idx(handles, num_handles, i)) {
1183		char child[ZFS_MAXPROPLEN];
1184		verify(zfs_prop_get(handles[i], ZFS_PROP_MOUNTPOINT,
1185		    child, sizeof (child), NULL, NULL, 0, B_FALSE) == 0);
1186
1187		if (!libzfs_path_contains(mountpoint, child))
1188			break; /* not a descendant, return */
1189		zfs_dispatch_mount(mp->mnt_hdl, handles, num_handles, i,
1190		    mp->mnt_func, mp->mnt_data, mp->mnt_tp);
1191	}
1192
1193out:
1194	free(mp);
1195}
1196
1197/*
1198 * Issue the func callback for each ZFS handle contained in the handles
1199 * array. This function is used to mount all datasets, and so this function
1200 * guarantees that filesystems for parent mountpoints are called before their
1201 * children. As such, before issuing any callbacks, we first sort the array
1202 * of handles by mountpoint.
1203 *
1204 * Callbacks are issued in one of two ways:
1205 *
1206 * 1. Sequentially: If the nthr argument is <= 1 or the ZFS_SERIAL_MOUNT
1207 *    environment variable is set, then we issue callbacks sequentially.
1208 *
1209 * 2. In parallel: If the nthr argument is > 1 and the ZFS_SERIAL_MOUNT
1210 *    environment variable is not set, then we use a tpool to dispatch threads
1211 *    to mount filesystems in parallel. This function dispatches tasks to mount
1212 *    the filesystems at the top-level mountpoints, and these tasks in turn
1213 *    are responsible for recursively mounting filesystems in their children
1214 *    mountpoints.  The value of the nthr argument will be the number of worker
1215 *    threads for the thread pool.
1216 */
1217void
1218zfs_foreach_mountpoint(libzfs_handle_t *hdl, zfs_handle_t **handles,
1219    size_t num_handles, zfs_iter_f func, void *data, uint_t nthr)
1220{
1221	zoneid_t zoneid = getzoneid();
1222
1223	/*
1224	 * The ZFS_SERIAL_MOUNT environment variable is an undocumented
1225	 * variable that can be used as a convenience to do a/b comparison
1226	 * of serial vs. parallel mounting.
1227	 */
1228	boolean_t serial_mount = nthr <= 1 ||
1229	    (getenv("ZFS_SERIAL_MOUNT") != NULL);
1230
1231	/*
1232	 * Sort the datasets by mountpoint. See mountpoint_cmp for details
1233	 * of how these are sorted.
1234	 */
1235	qsort(handles, num_handles, sizeof (zfs_handle_t *), mountpoint_cmp);
1236
1237	if (serial_mount) {
1238		for (int i = 0; i < num_handles; i++) {
1239			func(handles[i], data);
1240		}
1241		return;
1242	}
1243
1244	/*
1245	 * Issue the callback function for each dataset using a parallel
1246	 * algorithm that uses a thread pool to manage threads.
1247	 */
1248	tpool_t *tp = tpool_create(1, nthr, 0, NULL);
1249
1250	/*
1251	 * There may be multiple "top level" mountpoints outside of the pool's
1252	 * root mountpoint, e.g.: /foo /bar. Dispatch a mount task for each of
1253	 * these.
1254	 */
1255	for (int i = 0; i < num_handles;
1256	    i = non_descendant_idx(handles, num_handles, i)) {
1257		/*
1258		 * Since the mountpoints have been sorted so that the zoned
1259		 * filesystems are at the end, a zoned filesystem seen from
1260		 * the global zone means that we're done.
1261		 */
1262		if (zoneid == GLOBAL_ZONEID &&
1263		    zfs_prop_get_int(handles[i], ZFS_PROP_ZONED))
1264			break;
1265		zfs_dispatch_mount(hdl, handles, num_handles, i, func, data,
1266		    tp);
1267	}
1268
1269	tpool_wait(tp);	/* wait for all scheduled mounts to complete */
1270	tpool_destroy(tp);
1271}
1272
1273/*
1274 * Mount and share all datasets within the given pool.  This assumes that no
1275 * datasets within the pool are currently mounted.  nthr will be number of
1276 * worker threads to use while mounting datasets.
1277 */
1278int
1279zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags,
1280    uint_t nthr)
1281{
1282	get_all_cb_t cb = { 0 };
1283	mount_state_t ms = { 0 };
1284	zfs_handle_t *zfsp;
1285	int ret = 0;
1286
1287	if ((zfsp = zfs_open(zhp->zpool_hdl, zhp->zpool_name,
1288	    ZFS_TYPE_DATASET)) == NULL)
1289		goto out;
1290
1291	/*
1292	 * Gather all non-snapshot datasets within the pool. Start by adding
1293	 * the root filesystem for this pool to the list, and then iterate
1294	 * over all child filesystems.
1295	 */
1296	libzfs_add_handle(&cb, zfsp);
1297	if (zfs_iter_filesystems_v2(zfsp, 0, zfs_iter_cb, &cb) != 0)
1298		goto out;
1299
1300	/*
1301	 * Mount all filesystems
1302	 */
1303	ms.ms_mntopts = mntopts;
1304	ms.ms_mntflags = flags;
1305	zfs_foreach_mountpoint(zhp->zpool_hdl, cb.cb_handles, cb.cb_used,
1306	    zfs_mount_one, &ms, nthr);
1307	if (ms.ms_mntstatus != 0)
1308		ret = EZFS_MOUNTFAILED;
1309
1310	/*
1311	 * Share all filesystems that need to be shared. This needs to be
1312	 * a separate pass because libshare is not mt-safe, and so we need
1313	 * to share serially.
1314	 */
1315	ms.ms_mntstatus = 0;
1316	zfs_foreach_mountpoint(zhp->zpool_hdl, cb.cb_handles, cb.cb_used,
1317	    zfs_share_one, &ms, 1);
1318	if (ms.ms_mntstatus != 0)
1319		ret = EZFS_SHAREFAILED;
1320	else
1321		zfs_commit_shares(NULL);
1322
1323out:
1324	for (int i = 0; i < cb.cb_used; i++)
1325		zfs_close(cb.cb_handles[i]);
1326	free(cb.cb_handles);
1327
1328	return (ret);
1329}
1330
1331struct sets_s {
1332	char *mountpoint;
1333	zfs_handle_t *dataset;
1334};
1335
1336static int
1337mountpoint_compare(const void *a, const void *b)
1338{
1339	const struct sets_s *mounta = (struct sets_s *)a;
1340	const struct sets_s *mountb = (struct sets_s *)b;
1341
1342	return (strcmp(mountb->mountpoint, mounta->mountpoint));
1343}
1344
1345/*
1346 * Unshare and unmount all datasets within the given pool.  We don't want to
1347 * rely on traversing the DSL to discover the filesystems within the pool,
1348 * because this may be expensive (if not all of them are mounted), and can fail
1349 * arbitrarily (on I/O error, for example).  Instead, we walk /proc/self/mounts
1350 * and gather all the filesystems that are currently mounted.
1351 */
1352int
1353zpool_disable_datasets(zpool_handle_t *zhp, boolean_t force)
1354{
1355	int used, alloc;
1356	FILE *mnttab;
1357	struct mnttab entry;
1358	size_t namelen;
1359	struct sets_s *sets = NULL;
1360	libzfs_handle_t *hdl = zhp->zpool_hdl;
1361	int i;
1362	int ret = -1;
1363	int flags = (force ? MS_FORCE : 0);
1364
1365	namelen = strlen(zhp->zpool_name);
1366
1367	if ((mnttab = fopen(MNTTAB, "re")) == NULL)
1368		return (ENOENT);
1369
1370	used = alloc = 0;
1371	while (getmntent(mnttab, &entry) == 0) {
1372		/*
1373		 * Ignore non-ZFS entries.
1374		 */
1375		if (entry.mnt_fstype == NULL ||
1376		    strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0)
1377			continue;
1378
1379		/*
1380		 * Ignore filesystems not within this pool.
1381		 */
1382		if (entry.mnt_mountp == NULL ||
1383		    strncmp(entry.mnt_special, zhp->zpool_name, namelen) != 0 ||
1384		    (entry.mnt_special[namelen] != '/' &&
1385		    entry.mnt_special[namelen] != '\0'))
1386			continue;
1387
1388		/*
1389		 * At this point we've found a filesystem within our pool.  Add
1390		 * it to our growing list.
1391		 */
1392		if (used == alloc) {
1393			if (alloc == 0) {
1394				sets = zfs_alloc(hdl,
1395				    8 * sizeof (struct sets_s));
1396				alloc = 8;
1397			} else {
1398				sets = zfs_realloc(hdl, sets,
1399				    alloc * sizeof (struct sets_s),
1400				    alloc * 2 * sizeof (struct sets_s));
1401
1402				alloc *= 2;
1403			}
1404		}
1405
1406		sets[used].mountpoint = zfs_strdup(hdl, entry.mnt_mountp);
1407
1408		/*
1409		 * This is allowed to fail, in case there is some I/O error.  It
1410		 * is only used to determine if we need to remove the underlying
1411		 * mountpoint, so failure is not fatal.
1412		 */
1413		sets[used].dataset = make_dataset_handle(hdl,
1414		    entry.mnt_special);
1415
1416		used++;
1417	}
1418
1419	/*
1420	 * At this point, we have the entire list of filesystems, so sort it by
1421	 * mountpoint.
1422	 */
1423	if (used != 0)
1424		qsort(sets, used, sizeof (struct sets_s), mountpoint_compare);
1425
1426	/*
1427	 * Walk through and first unshare everything.
1428	 */
1429	for (i = 0; i < used; i++) {
1430		for (enum sa_protocol p = 0; p < SA_PROTOCOL_COUNT; ++p) {
1431			if (sa_is_shared(sets[i].mountpoint, p) &&
1432			    unshare_one(hdl, sets[i].mountpoint,
1433			    sets[i].mountpoint, p) != 0)
1434				goto out;
1435		}
1436	}
1437	zfs_commit_shares(NULL);
1438
1439	/*
1440	 * Now unmount everything, removing the underlying directories as
1441	 * appropriate.
1442	 */
1443	for (i = 0; i < used; i++) {
1444		if (unmount_one(sets[i].dataset, sets[i].mountpoint,
1445		    flags) != 0)
1446			goto out;
1447	}
1448
1449	for (i = 0; i < used; i++) {
1450		if (sets[i].dataset)
1451			remove_mountpoint(sets[i].dataset);
1452	}
1453
1454	zpool_disable_datasets_os(zhp, force);
1455
1456	ret = 0;
1457out:
1458	(void) fclose(mnttab);
1459	for (i = 0; i < used; i++) {
1460		if (sets[i].dataset)
1461			zfs_close(sets[i].dataset);
1462		free(sets[i].mountpoint);
1463	}
1464	free(sets);
1465
1466	return (ret);
1467}
1468