1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2011, 2017 by Delphix. All rights reserved.
25 * Copyright (c) 2013, Joyent, Inc. All rights reserved.
26 * Copyright 2016 Nexenta Systems, Inc.
27 * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
28 * Copyright (c) 2017 Datto Inc.
29 * Copyright (c) 2017, Intel Corporation.
30 */
31
32#include <sys/types.h>
33#include <sys/stat.h>
34#include <ctype.h>
35#include <errno.h>
36#include <devid.h>
37#include <fcntl.h>
38#include <libintl.h>
39#include <stdio.h>
40#include <stdlib.h>
41#include <strings.h>
42#include <unistd.h>
43#include <libgen.h>
44#include <sys/zfs_ioctl.h>
45#include <dlfcn.h>
46
47#include "zfs_namecheck.h"
48#include "zfs_prop.h"
49#include "libzfs_impl.h"
50#include "zfs_comutil.h"
51#include "zfeature_common.h"
52
53static int read_efi_label(nvlist_t *, diskaddr_t *, boolean_t *);
54static boolean_t zpool_vdev_is_interior(const char *name);
55
56#define	BACKUP_SLICE	"s2"
57
58typedef struct prop_flags {
59	int create:1;	/* Validate property on creation */
60	int import:1;	/* Validate property on import */
61} prop_flags_t;
62
63/*
64 * ====================================================================
65 *   zpool property functions
66 * ====================================================================
67 */
68
69static int
70zpool_get_all_props(zpool_handle_t *zhp)
71{
72	zfs_cmd_t zc = { 0 };
73	libzfs_handle_t *hdl = zhp->zpool_hdl;
74
75	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
76
77	if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0)
78		return (-1);
79
80	while (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_PROPS, &zc) != 0) {
81		if (errno == ENOMEM) {
82			if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
83				zcmd_free_nvlists(&zc);
84				return (-1);
85			}
86		} else {
87			zcmd_free_nvlists(&zc);
88			return (-1);
89		}
90	}
91
92	if (zcmd_read_dst_nvlist(hdl, &zc, &zhp->zpool_props) != 0) {
93		zcmd_free_nvlists(&zc);
94		return (-1);
95	}
96
97	zcmd_free_nvlists(&zc);
98
99	return (0);
100}
101
102static int
103zpool_props_refresh(zpool_handle_t *zhp)
104{
105	nvlist_t *old_props;
106
107	old_props = zhp->zpool_props;
108
109	if (zpool_get_all_props(zhp) != 0)
110		return (-1);
111
112	nvlist_free(old_props);
113	return (0);
114}
115
116static char *
117zpool_get_prop_string(zpool_handle_t *zhp, zpool_prop_t prop,
118    zprop_source_t *src)
119{
120	nvlist_t *nv, *nvl;
121	uint64_t ival;
122	char *value;
123	zprop_source_t source;
124
125	nvl = zhp->zpool_props;
126	if (nvlist_lookup_nvlist(nvl, zpool_prop_to_name(prop), &nv) == 0) {
127		verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, &ival) == 0);
128		source = ival;
129		verify(nvlist_lookup_string(nv, ZPROP_VALUE, &value) == 0);
130	} else {
131		source = ZPROP_SRC_DEFAULT;
132		if ((value = (char *)zpool_prop_default_string(prop)) == NULL)
133			value = "-";
134	}
135
136	if (src)
137		*src = source;
138
139	return (value);
140}
141
142uint64_t
143zpool_get_prop_int(zpool_handle_t *zhp, zpool_prop_t prop, zprop_source_t *src)
144{
145	nvlist_t *nv, *nvl;
146	uint64_t value;
147	zprop_source_t source;
148
149	if (zhp->zpool_props == NULL && zpool_get_all_props(zhp)) {
150		/*
151		 * zpool_get_all_props() has most likely failed because
152		 * the pool is faulted, but if all we need is the top level
153		 * vdev's guid then get it from the zhp config nvlist.
154		 */
155		if ((prop == ZPOOL_PROP_GUID) &&
156		    (nvlist_lookup_nvlist(zhp->zpool_config,
157		    ZPOOL_CONFIG_VDEV_TREE, &nv) == 0) &&
158		    (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &value)
159		    == 0)) {
160			return (value);
161		}
162		return (zpool_prop_default_numeric(prop));
163	}
164
165	nvl = zhp->zpool_props;
166	if (nvlist_lookup_nvlist(nvl, zpool_prop_to_name(prop), &nv) == 0) {
167		verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, &value) == 0);
168		source = value;
169		verify(nvlist_lookup_uint64(nv, ZPROP_VALUE, &value) == 0);
170	} else {
171		source = ZPROP_SRC_DEFAULT;
172		value = zpool_prop_default_numeric(prop);
173	}
174
175	if (src)
176		*src = source;
177
178	return (value);
179}
180
181/*
182 * Map VDEV STATE to printed strings.
183 */
184const char *
185zpool_state_to_name(vdev_state_t state, vdev_aux_t aux)
186{
187	switch (state) {
188	case VDEV_STATE_CLOSED:
189	case VDEV_STATE_OFFLINE:
190		return (gettext("OFFLINE"));
191	case VDEV_STATE_REMOVED:
192		return (gettext("REMOVED"));
193	case VDEV_STATE_CANT_OPEN:
194		if (aux == VDEV_AUX_CORRUPT_DATA || aux == VDEV_AUX_BAD_LOG)
195			return (gettext("FAULTED"));
196		else if (aux == VDEV_AUX_SPLIT_POOL)
197			return (gettext("SPLIT"));
198		else
199			return (gettext("UNAVAIL"));
200	case VDEV_STATE_FAULTED:
201		return (gettext("FAULTED"));
202	case VDEV_STATE_DEGRADED:
203		return (gettext("DEGRADED"));
204	case VDEV_STATE_HEALTHY:
205		return (gettext("ONLINE"));
206
207	default:
208		break;
209	}
210
211	return (gettext("UNKNOWN"));
212}
213
214/*
215 * Map POOL STATE to printed strings.
216 */
217const char *
218zpool_pool_state_to_name(pool_state_t state)
219{
220	switch (state) {
221	case POOL_STATE_ACTIVE:
222		return (gettext("ACTIVE"));
223	case POOL_STATE_EXPORTED:
224		return (gettext("EXPORTED"));
225	case POOL_STATE_DESTROYED:
226		return (gettext("DESTROYED"));
227	case POOL_STATE_SPARE:
228		return (gettext("SPARE"));
229	case POOL_STATE_L2CACHE:
230		return (gettext("L2CACHE"));
231	case POOL_STATE_UNINITIALIZED:
232		return (gettext("UNINITIALIZED"));
233	case POOL_STATE_UNAVAIL:
234		return (gettext("UNAVAIL"));
235	case POOL_STATE_POTENTIALLY_ACTIVE:
236		return (gettext("POTENTIALLY_ACTIVE"));
237	}
238
239	return (gettext("UNKNOWN"));
240}
241
242/*
243 * Get a zpool property value for 'prop' and return the value in
244 * a pre-allocated buffer.
245 */
246int
247zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf, size_t len,
248    zprop_source_t *srctype, boolean_t literal)
249{
250	uint64_t intval;
251	const char *strval;
252	zprop_source_t src = ZPROP_SRC_NONE;
253	nvlist_t *nvroot;
254	vdev_stat_t *vs;
255	uint_t vsc;
256
257	if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) {
258		switch (prop) {
259		case ZPOOL_PROP_NAME:
260			(void) strlcpy(buf, zpool_get_name(zhp), len);
261			break;
262
263		case ZPOOL_PROP_HEALTH:
264			(void) strlcpy(buf,
265			    zpool_pool_state_to_name(POOL_STATE_UNAVAIL), len);
266			break;
267
268		case ZPOOL_PROP_GUID:
269			intval = zpool_get_prop_int(zhp, prop, &src);
270			(void) snprintf(buf, len, "%llu", intval);
271			break;
272
273		case ZPOOL_PROP_ALTROOT:
274		case ZPOOL_PROP_CACHEFILE:
275		case ZPOOL_PROP_COMMENT:
276			if (zhp->zpool_props != NULL ||
277			    zpool_get_all_props(zhp) == 0) {
278				(void) strlcpy(buf,
279				    zpool_get_prop_string(zhp, prop, &src),
280				    len);
281				break;
282			}
283			/* FALLTHROUGH */
284		default:
285			(void) strlcpy(buf, "-", len);
286			break;
287		}
288
289		if (srctype != NULL)
290			*srctype = src;
291		return (0);
292	}
293
294	if (zhp->zpool_props == NULL && zpool_get_all_props(zhp) &&
295	    prop != ZPOOL_PROP_NAME)
296		return (-1);
297
298	switch (zpool_prop_get_type(prop)) {
299	case PROP_TYPE_STRING:
300		(void) strlcpy(buf, zpool_get_prop_string(zhp, prop, &src),
301		    len);
302		break;
303
304	case PROP_TYPE_NUMBER:
305		intval = zpool_get_prop_int(zhp, prop, &src);
306
307		switch (prop) {
308		case ZPOOL_PROP_SIZE:
309		case ZPOOL_PROP_ALLOCATED:
310		case ZPOOL_PROP_FREE:
311		case ZPOOL_PROP_FREEING:
312		case ZPOOL_PROP_LEAKED:
313			if (literal) {
314				(void) snprintf(buf, len, "%llu",
315				    (u_longlong_t)intval);
316			} else {
317				(void) zfs_nicenum(intval, buf, len);
318			}
319			break;
320		case ZPOOL_PROP_BOOTSIZE:
321		case ZPOOL_PROP_EXPANDSZ:
322		case ZPOOL_PROP_CHECKPOINT:
323			if (intval == 0) {
324				(void) strlcpy(buf, "-", len);
325			} else if (literal) {
326				(void) snprintf(buf, len, "%llu",
327				    (u_longlong_t)intval);
328			} else {
329				(void) zfs_nicenum(intval, buf, len);
330			}
331			break;
332		case ZPOOL_PROP_CAPACITY:
333			if (literal) {
334				(void) snprintf(buf, len, "%llu",
335				    (u_longlong_t)intval);
336			} else {
337				(void) snprintf(buf, len, "%llu%%",
338				    (u_longlong_t)intval);
339			}
340			break;
341		case ZPOOL_PROP_FRAGMENTATION:
342			if (intval == UINT64_MAX) {
343				(void) strlcpy(buf, "-", len);
344			} else {
345				(void) snprintf(buf, len, "%llu%%",
346				    (u_longlong_t)intval);
347			}
348			break;
349		case ZPOOL_PROP_DEDUPRATIO:
350			(void) snprintf(buf, len, "%llu.%02llux",
351			    (u_longlong_t)(intval / 100),
352			    (u_longlong_t)(intval % 100));
353			break;
354		case ZPOOL_PROP_HEALTH:
355			verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
356			    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
357			verify(nvlist_lookup_uint64_array(nvroot,
358			    ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc)
359			    == 0);
360
361			(void) strlcpy(buf, zpool_state_to_name(intval,
362			    vs->vs_aux), len);
363			break;
364		case ZPOOL_PROP_VERSION:
365			if (intval >= SPA_VERSION_FEATURES) {
366				(void) snprintf(buf, len, "-");
367				break;
368			}
369			/* FALLTHROUGH */
370		default:
371			(void) snprintf(buf, len, "%llu", intval);
372		}
373		break;
374
375	case PROP_TYPE_INDEX:
376		intval = zpool_get_prop_int(zhp, prop, &src);
377		if (zpool_prop_index_to_string(prop, intval, &strval)
378		    != 0)
379			return (-1);
380		(void) strlcpy(buf, strval, len);
381		break;
382
383	default:
384		abort();
385	}
386
387	if (srctype)
388		*srctype = src;
389
390	return (0);
391}
392
393/*
394 * Check if the bootfs name has the same pool name as it is set to.
395 * Assuming bootfs is a valid dataset name.
396 */
397static boolean_t
398bootfs_name_valid(const char *pool, char *bootfs)
399{
400	int len = strlen(pool);
401
402	if (!zfs_name_valid(bootfs, ZFS_TYPE_FILESYSTEM|ZFS_TYPE_SNAPSHOT))
403		return (B_FALSE);
404
405	if (strncmp(pool, bootfs, len) == 0 &&
406	    (bootfs[len] == '/' || bootfs[len] == '\0'))
407		return (B_TRUE);
408
409	return (B_FALSE);
410}
411
412boolean_t
413zpool_is_bootable(zpool_handle_t *zhp)
414{
415	char bootfs[ZFS_MAX_DATASET_NAME_LEN];
416
417	return (zpool_get_prop(zhp, ZPOOL_PROP_BOOTFS, bootfs,
418	    sizeof (bootfs), NULL, B_FALSE) == 0 && strncmp(bootfs, "-",
419	    sizeof (bootfs)) != 0);
420}
421
422
423/*
424 * Given an nvlist of zpool properties to be set, validate that they are
425 * correct, and parse any numeric properties (index, boolean, etc) if they are
426 * specified as strings.
427 */
428static nvlist_t *
429zpool_valid_proplist(libzfs_handle_t *hdl, const char *poolname,
430    nvlist_t *props, uint64_t version, prop_flags_t flags, char *errbuf)
431{
432	nvpair_t *elem;
433	nvlist_t *retprops;
434	zpool_prop_t prop;
435	char *strval;
436	uint64_t intval;
437	char *slash, *check;
438	struct stat64 statbuf;
439	zpool_handle_t *zhp;
440
441	if (nvlist_alloc(&retprops, NV_UNIQUE_NAME, 0) != 0) {
442		(void) no_memory(hdl);
443		return (NULL);
444	}
445
446	elem = NULL;
447	while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
448		const char *propname = nvpair_name(elem);
449
450		prop = zpool_name_to_prop(propname);
451		if (prop == ZPOOL_PROP_INVAL && zpool_prop_feature(propname)) {
452			int err;
453			char *fname = strchr(propname, '@') + 1;
454
455			err = zfeature_lookup_name(fname, NULL);
456			if (err != 0) {
457				ASSERT3U(err, ==, ENOENT);
458				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
459				    "invalid feature '%s'"), fname);
460				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
461				goto error;
462			}
463
464			if (nvpair_type(elem) != DATA_TYPE_STRING) {
465				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
466				    "'%s' must be a string"), propname);
467				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
468				goto error;
469			}
470
471			(void) nvpair_value_string(elem, &strval);
472			if (strcmp(strval, ZFS_FEATURE_ENABLED) != 0) {
473				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
474				    "property '%s' can only be set to "
475				    "'enabled'"), propname);
476				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
477				goto error;
478			}
479
480			if (nvlist_add_uint64(retprops, propname, 0) != 0) {
481				(void) no_memory(hdl);
482				goto error;
483			}
484			continue;
485		}
486
487		/*
488		 * Make sure this property is valid and applies to this type.
489		 */
490		if (prop == ZPOOL_PROP_INVAL) {
491			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
492			    "invalid property '%s'"), propname);
493			(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
494			goto error;
495		}
496
497		if (zpool_prop_readonly(prop)) {
498			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' "
499			    "is readonly"), propname);
500			(void) zfs_error(hdl, EZFS_PROPREADONLY, errbuf);
501			goto error;
502		}
503
504		if (zprop_parse_value(hdl, elem, prop, ZFS_TYPE_POOL, retprops,
505		    &strval, &intval, errbuf) != 0)
506			goto error;
507
508		/*
509		 * Perform additional checking for specific properties.
510		 */
511		switch (prop) {
512		case ZPOOL_PROP_VERSION:
513			if (intval < version ||
514			    !SPA_VERSION_IS_SUPPORTED(intval)) {
515				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
516				    "property '%s' number %d is invalid."),
517				    propname, intval);
518				(void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
519				goto error;
520			}
521			break;
522
523		case ZPOOL_PROP_BOOTSIZE:
524			if (!flags.create) {
525				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
526				    "property '%s' can only be set during pool "
527				    "creation"), propname);
528				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
529				goto error;
530			}
531			break;
532
533		case ZPOOL_PROP_BOOTFS:
534			if (flags.create || flags.import) {
535				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
536				    "property '%s' cannot be set at creation "
537				    "or import time"), propname);
538				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
539				goto error;
540			}
541
542			if (version < SPA_VERSION_BOOTFS) {
543				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
544				    "pool must be upgraded to support "
545				    "'%s' property"), propname);
546				(void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
547				goto error;
548			}
549
550			/*
551			 * bootfs property value has to be a dataset name and
552			 * the dataset has to be in the same pool as it sets to.
553			 */
554			if (strval[0] != '\0' && !bootfs_name_valid(poolname,
555			    strval)) {
556				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' "
557				    "is an invalid name"), strval);
558				(void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
559				goto error;
560			}
561
562			if ((zhp = zpool_open_canfail(hdl, poolname)) == NULL) {
563				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
564				    "could not open pool '%s'"), poolname);
565				(void) zfs_error(hdl, EZFS_OPENFAILED, errbuf);
566				goto error;
567			}
568			zpool_close(zhp);
569			break;
570
571		case ZPOOL_PROP_ALTROOT:
572			if (!flags.create && !flags.import) {
573				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
574				    "property '%s' can only be set during pool "
575				    "creation or import"), propname);
576				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
577				goto error;
578			}
579
580			if (strval[0] != '/') {
581				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
582				    "bad alternate root '%s'"), strval);
583				(void) zfs_error(hdl, EZFS_BADPATH, errbuf);
584				goto error;
585			}
586			break;
587
588		case ZPOOL_PROP_CACHEFILE:
589			if (strval[0] == '\0')
590				break;
591
592			if (strcmp(strval, "none") == 0)
593				break;
594
595			if (strval[0] != '/') {
596				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
597				    "property '%s' must be empty, an "
598				    "absolute path, or 'none'"), propname);
599				(void) zfs_error(hdl, EZFS_BADPATH, errbuf);
600				goto error;
601			}
602
603			slash = strrchr(strval, '/');
604
605			if (slash[1] == '\0' || strcmp(slash, "/.") == 0 ||
606			    strcmp(slash, "/..") == 0) {
607				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
608				    "'%s' is not a valid file"), strval);
609				(void) zfs_error(hdl, EZFS_BADPATH, errbuf);
610				goto error;
611			}
612
613			*slash = '\0';
614
615			if (strval[0] != '\0' &&
616			    (stat64(strval, &statbuf) != 0 ||
617			    !S_ISDIR(statbuf.st_mode))) {
618				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
619				    "'%s' is not a valid directory"),
620				    strval);
621				(void) zfs_error(hdl, EZFS_BADPATH, errbuf);
622				goto error;
623			}
624
625			*slash = '/';
626			break;
627
628		case ZPOOL_PROP_COMMENT:
629			for (check = strval; *check != '\0'; check++) {
630				if (!isprint(*check)) {
631					zfs_error_aux(hdl,
632					    dgettext(TEXT_DOMAIN,
633					    "comment may only have printable "
634					    "characters"));
635					(void) zfs_error(hdl, EZFS_BADPROP,
636					    errbuf);
637					goto error;
638				}
639			}
640			if (strlen(strval) > ZPROP_MAX_COMMENT) {
641				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
642				    "comment must not exceed %d characters"),
643				    ZPROP_MAX_COMMENT);
644				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
645				goto error;
646			}
647			break;
648
649		case ZPOOL_PROP_READONLY:
650			if (!flags.import) {
651				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
652				    "property '%s' can only be set at "
653				    "import time"), propname);
654				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
655				goto error;
656			}
657			break;
658
659		case ZPOOL_PROP_TNAME:
660			if (!flags.create) {
661				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
662				    "property '%s' can only be set at "
663				    "creation time"), propname);
664				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
665				goto error;
666			}
667			break;
668
669		case ZPOOL_PROP_MULTIHOST:
670			if (get_system_hostid() == 0) {
671				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
672				    "requires a non-zero system hostid"));
673				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
674				goto error;
675			}
676			break;
677
678		default:
679			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
680			    "property '%s'(%d) not defined"), propname, prop);
681			break;
682		}
683	}
684
685	return (retprops);
686error:
687	nvlist_free(retprops);
688	return (NULL);
689}
690
691/*
692 * Set zpool property : propname=propval.
693 */
694int
695zpool_set_prop(zpool_handle_t *zhp, const char *propname, const char *propval)
696{
697	zfs_cmd_t zc = { 0 };
698	int ret = -1;
699	char errbuf[1024];
700	nvlist_t *nvl = NULL;
701	nvlist_t *realprops;
702	uint64_t version;
703	prop_flags_t flags = { 0 };
704
705	(void) snprintf(errbuf, sizeof (errbuf),
706	    dgettext(TEXT_DOMAIN, "cannot set property for '%s'"),
707	    zhp->zpool_name);
708
709	if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0)
710		return (no_memory(zhp->zpool_hdl));
711
712	if (nvlist_add_string(nvl, propname, propval) != 0) {
713		nvlist_free(nvl);
714		return (no_memory(zhp->zpool_hdl));
715	}
716
717	version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL);
718	if ((realprops = zpool_valid_proplist(zhp->zpool_hdl,
719	    zhp->zpool_name, nvl, version, flags, errbuf)) == NULL) {
720		nvlist_free(nvl);
721		return (-1);
722	}
723
724	nvlist_free(nvl);
725	nvl = realprops;
726
727	/*
728	 * Execute the corresponding ioctl() to set this property.
729	 */
730	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
731
732	if (zcmd_write_src_nvlist(zhp->zpool_hdl, &zc, nvl) != 0) {
733		nvlist_free(nvl);
734		return (-1);
735	}
736
737	ret = zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_SET_PROPS, &zc);
738
739	zcmd_free_nvlists(&zc);
740	nvlist_free(nvl);
741
742	if (ret)
743		(void) zpool_standard_error(zhp->zpool_hdl, errno, errbuf);
744	else
745		(void) zpool_props_refresh(zhp);
746
747	return (ret);
748}
749
750int
751zpool_expand_proplist(zpool_handle_t *zhp, zprop_list_t **plp)
752{
753	libzfs_handle_t *hdl = zhp->zpool_hdl;
754	zprop_list_t *entry;
755	char buf[ZFS_MAXPROPLEN];
756	nvlist_t *features = NULL;
757	zprop_list_t **last;
758	boolean_t firstexpand = (NULL == *plp);
759
760	if (zprop_expand_list(hdl, plp, ZFS_TYPE_POOL) != 0)
761		return (-1);
762
763	last = plp;
764	while (*last != NULL)
765		last = &(*last)->pl_next;
766
767	if ((*plp)->pl_all)
768		features = zpool_get_features(zhp);
769
770	if ((*plp)->pl_all && firstexpand) {
771		for (int i = 0; i < SPA_FEATURES; i++) {
772			zprop_list_t *entry = zfs_alloc(hdl,
773			    sizeof (zprop_list_t));
774			entry->pl_prop = ZPROP_INVAL;
775			entry->pl_user_prop = zfs_asprintf(hdl, "feature@%s",
776			    spa_feature_table[i].fi_uname);
777			entry->pl_width = strlen(entry->pl_user_prop);
778			entry->pl_all = B_TRUE;
779
780			*last = entry;
781			last = &entry->pl_next;
782		}
783	}
784
785	/* add any unsupported features */
786	for (nvpair_t *nvp = nvlist_next_nvpair(features, NULL);
787	    nvp != NULL; nvp = nvlist_next_nvpair(features, nvp)) {
788		char *propname;
789		boolean_t found;
790		zprop_list_t *entry;
791
792		if (zfeature_is_supported(nvpair_name(nvp)))
793			continue;
794
795		propname = zfs_asprintf(hdl, "unsupported@%s",
796		    nvpair_name(nvp));
797
798		/*
799		 * Before adding the property to the list make sure that no
800		 * other pool already added the same property.
801		 */
802		found = B_FALSE;
803		entry = *plp;
804		while (entry != NULL) {
805			if (entry->pl_user_prop != NULL &&
806			    strcmp(propname, entry->pl_user_prop) == 0) {
807				found = B_TRUE;
808				break;
809			}
810			entry = entry->pl_next;
811		}
812		if (found) {
813			free(propname);
814			continue;
815		}
816
817		entry = zfs_alloc(hdl, sizeof (zprop_list_t));
818		entry->pl_prop = ZPROP_INVAL;
819		entry->pl_user_prop = propname;
820		entry->pl_width = strlen(entry->pl_user_prop);
821		entry->pl_all = B_TRUE;
822
823		*last = entry;
824		last = &entry->pl_next;
825	}
826
827	for (entry = *plp; entry != NULL; entry = entry->pl_next) {
828
829		if (entry->pl_fixed)
830			continue;
831
832		if (entry->pl_prop != ZPROP_INVAL &&
833		    zpool_get_prop(zhp, entry->pl_prop, buf, sizeof (buf),
834		    NULL, B_FALSE) == 0) {
835			if (strlen(buf) > entry->pl_width)
836				entry->pl_width = strlen(buf);
837		}
838	}
839
840	return (0);
841}
842
843/*
844 * Get the state for the given feature on the given ZFS pool.
845 */
846int
847zpool_prop_get_feature(zpool_handle_t *zhp, const char *propname, char *buf,
848    size_t len)
849{
850	uint64_t refcount;
851	boolean_t found = B_FALSE;
852	nvlist_t *features = zpool_get_features(zhp);
853	boolean_t supported;
854	const char *feature = strchr(propname, '@') + 1;
855
856	supported = zpool_prop_feature(propname);
857	ASSERT(supported || zpool_prop_unsupported(propname));
858
859	/*
860	 * Convert from feature name to feature guid. This conversion is
861	 * unecessary for unsupported@... properties because they already
862	 * use guids.
863	 */
864	if (supported) {
865		int ret;
866		spa_feature_t fid;
867
868		ret = zfeature_lookup_name(feature, &fid);
869		if (ret != 0) {
870			(void) strlcpy(buf, "-", len);
871			return (ENOTSUP);
872		}
873		feature = spa_feature_table[fid].fi_guid;
874	}
875
876	if (nvlist_lookup_uint64(features, feature, &refcount) == 0)
877		found = B_TRUE;
878
879	if (supported) {
880		if (!found) {
881			(void) strlcpy(buf, ZFS_FEATURE_DISABLED, len);
882		} else  {
883			if (refcount == 0)
884				(void) strlcpy(buf, ZFS_FEATURE_ENABLED, len);
885			else
886				(void) strlcpy(buf, ZFS_FEATURE_ACTIVE, len);
887		}
888	} else {
889		if (found) {
890			if (refcount == 0) {
891				(void) strcpy(buf, ZFS_UNSUPPORTED_INACTIVE);
892			} else {
893				(void) strcpy(buf, ZFS_UNSUPPORTED_READONLY);
894			}
895		} else {
896			(void) strlcpy(buf, "-", len);
897			return (ENOTSUP);
898		}
899	}
900
901	return (0);
902}
903
904/*
905 * Don't start the slice at the default block of 34; many storage
906 * devices will use a stripe width of 128k, so start there instead.
907 */
908#define	NEW_START_BLOCK	256
909
910/*
911 * Validate the given pool name, optionally putting an extended error message in
912 * 'buf'.
913 */
914boolean_t
915zpool_name_valid(libzfs_handle_t *hdl, boolean_t isopen, const char *pool)
916{
917	namecheck_err_t why;
918	char what;
919	int ret;
920
921	ret = pool_namecheck(pool, &why, &what);
922
923	/*
924	 * The rules for reserved pool names were extended at a later point.
925	 * But we need to support users with existing pools that may now be
926	 * invalid.  So we only check for this expanded set of names during a
927	 * create (or import), and only in userland.
928	 */
929	if (ret == 0 && !isopen &&
930	    (strncmp(pool, "mirror", 6) == 0 ||
931	    strncmp(pool, "raidz", 5) == 0 ||
932	    strncmp(pool, "spare", 5) == 0 ||
933	    strcmp(pool, "log") == 0)) {
934		if (hdl != NULL)
935			zfs_error_aux(hdl,
936			    dgettext(TEXT_DOMAIN, "name is reserved"));
937		return (B_FALSE);
938	}
939
940
941	if (ret != 0) {
942		if (hdl != NULL) {
943			switch (why) {
944			case NAME_ERR_TOOLONG:
945				zfs_error_aux(hdl,
946				    dgettext(TEXT_DOMAIN, "name is too long"));
947				break;
948
949			case NAME_ERR_INVALCHAR:
950				zfs_error_aux(hdl,
951				    dgettext(TEXT_DOMAIN, "invalid character "
952				    "'%c' in pool name"), what);
953				break;
954
955			case NAME_ERR_NOLETTER:
956				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
957				    "name must begin with a letter"));
958				break;
959
960			case NAME_ERR_RESERVED:
961				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
962				    "name is reserved"));
963				break;
964
965			case NAME_ERR_DISKLIKE:
966				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
967				    "pool name is reserved"));
968				break;
969
970			case NAME_ERR_LEADING_SLASH:
971				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
972				    "leading slash in name"));
973				break;
974
975			case NAME_ERR_EMPTY_COMPONENT:
976				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
977				    "empty component in name"));
978				break;
979
980			case NAME_ERR_TRAILING_SLASH:
981				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
982				    "trailing slash in name"));
983				break;
984
985			case NAME_ERR_MULTIPLE_DELIMITERS:
986				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
987				    "multiple '@' and/or '#' delimiters in "
988				    "name"));
989				break;
990
991			default:
992				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
993				    "(%d) not defined"), why);
994				break;
995			}
996		}
997		return (B_FALSE);
998	}
999
1000	return (B_TRUE);
1001}
1002
1003/*
1004 * Open a handle to the given pool, even if the pool is currently in the FAULTED
1005 * state.
1006 */
1007zpool_handle_t *
1008zpool_open_canfail(libzfs_handle_t *hdl, const char *pool)
1009{
1010	zpool_handle_t *zhp;
1011	boolean_t missing;
1012
1013	/*
1014	 * Make sure the pool name is valid.
1015	 */
1016	if (!zpool_name_valid(hdl, B_TRUE, pool)) {
1017		(void) zfs_error_fmt(hdl, EZFS_INVALIDNAME,
1018		    dgettext(TEXT_DOMAIN, "cannot open '%s'"),
1019		    pool);
1020		return (NULL);
1021	}
1022
1023	if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
1024		return (NULL);
1025
1026	zhp->zpool_hdl = hdl;
1027	(void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
1028
1029	if (zpool_refresh_stats(zhp, &missing) != 0) {
1030		zpool_close(zhp);
1031		return (NULL);
1032	}
1033
1034	if (missing) {
1035		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "no such pool"));
1036		(void) zfs_error_fmt(hdl, EZFS_NOENT,
1037		    dgettext(TEXT_DOMAIN, "cannot open '%s'"), pool);
1038		zpool_close(zhp);
1039		return (NULL);
1040	}
1041
1042	return (zhp);
1043}
1044
1045/*
1046 * Like the above, but silent on error.  Used when iterating over pools (because
1047 * the configuration cache may be out of date).
1048 */
1049int
1050zpool_open_silent(libzfs_handle_t *hdl, const char *pool, zpool_handle_t **ret)
1051{
1052	zpool_handle_t *zhp;
1053	boolean_t missing;
1054
1055	if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
1056		return (-1);
1057
1058	zhp->zpool_hdl = hdl;
1059	(void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
1060
1061	if (zpool_refresh_stats(zhp, &missing) != 0) {
1062		zpool_close(zhp);
1063		return (-1);
1064	}
1065
1066	if (missing) {
1067		zpool_close(zhp);
1068		*ret = NULL;
1069		return (0);
1070	}
1071
1072	*ret = zhp;
1073	return (0);
1074}
1075
1076/*
1077 * Similar to zpool_open_canfail(), but refuses to open pools in the faulted
1078 * state.
1079 */
1080zpool_handle_t *
1081zpool_open(libzfs_handle_t *hdl, const char *pool)
1082{
1083	zpool_handle_t *zhp;
1084
1085	if ((zhp = zpool_open_canfail(hdl, pool)) == NULL)
1086		return (NULL);
1087
1088	if (zhp->zpool_state == POOL_STATE_UNAVAIL) {
1089		(void) zfs_error_fmt(hdl, EZFS_POOLUNAVAIL,
1090		    dgettext(TEXT_DOMAIN, "cannot open '%s'"), zhp->zpool_name);
1091		zpool_close(zhp);
1092		return (NULL);
1093	}
1094
1095	return (zhp);
1096}
1097
1098/*
1099 * Close the handle.  Simply frees the memory associated with the handle.
1100 */
1101void
1102zpool_close(zpool_handle_t *zhp)
1103{
1104	nvlist_free(zhp->zpool_config);
1105	nvlist_free(zhp->zpool_old_config);
1106	nvlist_free(zhp->zpool_props);
1107	free(zhp);
1108}
1109
1110/*
1111 * Return the name of the pool.
1112 */
1113const char *
1114zpool_get_name(zpool_handle_t *zhp)
1115{
1116	return (zhp->zpool_name);
1117}
1118
1119
1120/*
1121 * Return the state of the pool (ACTIVE or UNAVAILABLE)
1122 */
1123int
1124zpool_get_state(zpool_handle_t *zhp)
1125{
1126	return (zhp->zpool_state);
1127}
1128
1129/*
1130 * Check if vdev list contains a special vdev
1131 */
1132static boolean_t
1133zpool_has_special_vdev(nvlist_t *nvroot)
1134{
1135	nvlist_t **child;
1136	uint_t children;
1137
1138	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, &child,
1139	    &children) == 0) {
1140		for (uint_t c = 0; c < children; c++) {
1141			char *bias;
1142
1143			if (nvlist_lookup_string(child[c],
1144			    ZPOOL_CONFIG_ALLOCATION_BIAS, &bias) == 0 &&
1145			    strcmp(bias, VDEV_ALLOC_BIAS_SPECIAL) == 0) {
1146				return (B_TRUE);
1147			}
1148		}
1149	}
1150	return (B_FALSE);
1151}
1152
1153/*
1154 * Create the named pool, using the provided vdev list.  It is assumed
1155 * that the consumer has already validated the contents of the nvlist, so we
1156 * don't have to worry about error semantics.
1157 */
1158int
1159zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
1160    nvlist_t *props, nvlist_t *fsprops)
1161{
1162	zfs_cmd_t zc = { 0 };
1163	nvlist_t *zc_fsprops = NULL;
1164	nvlist_t *zc_props = NULL;
1165	char msg[1024];
1166	int ret = -1;
1167
1168	(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1169	    "cannot create '%s'"), pool);
1170
1171	if (!zpool_name_valid(hdl, B_FALSE, pool))
1172		return (zfs_error(hdl, EZFS_INVALIDNAME, msg));
1173
1174	if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
1175		return (-1);
1176
1177	if (props) {
1178		prop_flags_t flags = { .create = B_TRUE, .import = B_FALSE };
1179
1180		if ((zc_props = zpool_valid_proplist(hdl, pool, props,
1181		    SPA_VERSION_1, flags, msg)) == NULL) {
1182			goto create_failed;
1183		}
1184	}
1185
1186	if (fsprops) {
1187		uint64_t zoned;
1188		char *zonestr;
1189
1190		zoned = ((nvlist_lookup_string(fsprops,
1191		    zfs_prop_to_name(ZFS_PROP_ZONED), &zonestr) == 0) &&
1192		    strcmp(zonestr, "on") == 0);
1193
1194		if ((zc_fsprops = zfs_valid_proplist(hdl, ZFS_TYPE_FILESYSTEM,
1195		    fsprops, zoned, NULL, NULL, msg)) == NULL) {
1196			goto create_failed;
1197		}
1198
1199		if (nvlist_exists(zc_fsprops,
1200		    zfs_prop_to_name(ZFS_PROP_SPECIAL_SMALL_BLOCKS)) &&
1201		    !zpool_has_special_vdev(nvroot)) {
1202			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1203			    "%s property requires a special vdev"),
1204			    zfs_prop_to_name(ZFS_PROP_SPECIAL_SMALL_BLOCKS));
1205			(void) zfs_error(hdl, EZFS_BADPROP, msg);
1206			goto create_failed;
1207		}
1208
1209		if (!zc_props &&
1210		    (nvlist_alloc(&zc_props, NV_UNIQUE_NAME, 0) != 0)) {
1211			goto create_failed;
1212		}
1213		if (nvlist_add_nvlist(zc_props,
1214		    ZPOOL_ROOTFS_PROPS, zc_fsprops) != 0) {
1215			goto create_failed;
1216		}
1217	}
1218
1219	if (zc_props && zcmd_write_src_nvlist(hdl, &zc, zc_props) != 0)
1220		goto create_failed;
1221
1222	(void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
1223
1224	if ((ret = zfs_ioctl(hdl, ZFS_IOC_POOL_CREATE, &zc)) != 0) {
1225
1226		zcmd_free_nvlists(&zc);
1227		nvlist_free(zc_props);
1228		nvlist_free(zc_fsprops);
1229
1230		switch (errno) {
1231		case EBUSY:
1232			/*
1233			 * This can happen if the user has specified the same
1234			 * device multiple times.  We can't reliably detect this
1235			 * until we try to add it and see we already have a
1236			 * label.
1237			 */
1238			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1239			    "one or more vdevs refer to the same device"));
1240			return (zfs_error(hdl, EZFS_BADDEV, msg));
1241
1242		case ERANGE:
1243			/*
1244			 * This happens if the record size is smaller or larger
1245			 * than the allowed size range, or not a power of 2.
1246			 *
1247			 * NOTE: although zfs_valid_proplist is called earlier,
1248			 * this case may have slipped through since the
1249			 * pool does not exist yet and it is therefore
1250			 * impossible to read properties e.g. max blocksize
1251			 * from the pool.
1252			 */
1253			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1254			    "record size invalid"));
1255			return (zfs_error(hdl, EZFS_BADPROP, msg));
1256
1257		case EOVERFLOW:
1258			/*
1259			 * This occurs when one of the devices is below
1260			 * SPA_MINDEVSIZE.  Unfortunately, we can't detect which
1261			 * device was the problem device since there's no
1262			 * reliable way to determine device size from userland.
1263			 */
1264			{
1265				char buf[64];
1266
1267				zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf));
1268
1269				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1270				    "one or more devices is less than the "
1271				    "minimum size (%s)"), buf);
1272			}
1273			return (zfs_error(hdl, EZFS_BADDEV, msg));
1274
1275		case ENOSPC:
1276			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1277			    "one or more devices is out of space"));
1278			return (zfs_error(hdl, EZFS_BADDEV, msg));
1279
1280		case ENOTBLK:
1281			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1282			    "cache device must be a disk or disk slice"));
1283			return (zfs_error(hdl, EZFS_BADDEV, msg));
1284
1285		default:
1286			return (zpool_standard_error(hdl, errno, msg));
1287		}
1288	}
1289
1290create_failed:
1291	zcmd_free_nvlists(&zc);
1292	nvlist_free(zc_props);
1293	nvlist_free(zc_fsprops);
1294	return (ret);
1295}
1296
1297/*
1298 * Destroy the given pool.  It is up to the caller to ensure that there are no
1299 * datasets left in the pool.
1300 */
1301int
1302zpool_destroy(zpool_handle_t *zhp, const char *log_str)
1303{
1304	zfs_cmd_t zc = { 0 };
1305	zfs_handle_t *zfp = NULL;
1306	libzfs_handle_t *hdl = zhp->zpool_hdl;
1307	char msg[1024];
1308
1309	if (zhp->zpool_state == POOL_STATE_ACTIVE &&
1310	    (zfp = zfs_open(hdl, zhp->zpool_name, ZFS_TYPE_FILESYSTEM)) == NULL)
1311		return (-1);
1312
1313	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1314	zc.zc_history = (uint64_t)(uintptr_t)log_str;
1315
1316	if (zfs_ioctl(hdl, ZFS_IOC_POOL_DESTROY, &zc) != 0) {
1317		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1318		    "cannot destroy '%s'"), zhp->zpool_name);
1319
1320		if (errno == EROFS) {
1321			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1322			    "one or more devices is read only"));
1323			(void) zfs_error(hdl, EZFS_BADDEV, msg);
1324		} else {
1325			(void) zpool_standard_error(hdl, errno, msg);
1326		}
1327
1328		if (zfp)
1329			zfs_close(zfp);
1330		return (-1);
1331	}
1332
1333	if (zfp) {
1334		remove_mountpoint(zfp);
1335		zfs_close(zfp);
1336	}
1337
1338	return (0);
1339}
1340
1341/*
1342 * Create a checkpoint in the given pool.
1343 */
1344int
1345zpool_checkpoint(zpool_handle_t *zhp)
1346{
1347	libzfs_handle_t *hdl = zhp->zpool_hdl;
1348	char msg[1024];
1349	int error;
1350
1351	error = lzc_pool_checkpoint(zhp->zpool_name);
1352	if (error != 0) {
1353		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1354		    "cannot checkpoint '%s'"), zhp->zpool_name);
1355		(void) zpool_standard_error(hdl, error, msg);
1356		return (-1);
1357	}
1358
1359	return (0);
1360}
1361
1362/*
1363 * Discard the checkpoint from the given pool.
1364 */
1365int
1366zpool_discard_checkpoint(zpool_handle_t *zhp)
1367{
1368	libzfs_handle_t *hdl = zhp->zpool_hdl;
1369	char msg[1024];
1370	int error;
1371
1372	error = lzc_pool_checkpoint_discard(zhp->zpool_name);
1373	if (error != 0) {
1374		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1375		    "cannot discard checkpoint in '%s'"), zhp->zpool_name);
1376		(void) zpool_standard_error(hdl, error, msg);
1377		return (-1);
1378	}
1379
1380	return (0);
1381}
1382
1383/*
1384 * Add the given vdevs to the pool.  The caller must have already performed the
1385 * necessary verification to ensure that the vdev specification is well-formed.
1386 */
1387int
1388zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
1389{
1390	zfs_cmd_t zc = { 0 };
1391	int ret;
1392	libzfs_handle_t *hdl = zhp->zpool_hdl;
1393	char msg[1024];
1394	nvlist_t **spares, **l2cache;
1395	uint_t nspares, nl2cache;
1396
1397	(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1398	    "cannot add to '%s'"), zhp->zpool_name);
1399
1400	if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
1401	    SPA_VERSION_SPARES &&
1402	    nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
1403	    &spares, &nspares) == 0) {
1404		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
1405		    "upgraded to add hot spares"));
1406		return (zfs_error(hdl, EZFS_BADVERSION, msg));
1407	}
1408
1409	if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
1410	    SPA_VERSION_L2CACHE &&
1411	    nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
1412	    &l2cache, &nl2cache) == 0) {
1413		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
1414		    "upgraded to add cache devices"));
1415		return (zfs_error(hdl, EZFS_BADVERSION, msg));
1416	}
1417
1418	if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
1419		return (-1);
1420	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1421
1422	if (zfs_ioctl(hdl, ZFS_IOC_VDEV_ADD, &zc) != 0) {
1423		switch (errno) {
1424		case EBUSY:
1425			/*
1426			 * This can happen if the user has specified the same
1427			 * device multiple times.  We can't reliably detect this
1428			 * until we try to add it and see we already have a
1429			 * label.
1430			 */
1431			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1432			    "one or more vdevs refer to the same device"));
1433			(void) zfs_error(hdl, EZFS_BADDEV, msg);
1434			break;
1435
1436		case EINVAL:
1437			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1438			    "invalid config; a pool with removing/removed "
1439			    "vdevs does not support adding raidz vdevs"));
1440			(void) zfs_error(hdl, EZFS_BADDEV, msg);
1441			break;
1442
1443		case EOVERFLOW:
1444			/*
1445			 * This occurrs when one of the devices is below
1446			 * SPA_MINDEVSIZE.  Unfortunately, we can't detect which
1447			 * device was the problem device since there's no
1448			 * reliable way to determine device size from userland.
1449			 */
1450			{
1451				char buf[64];
1452
1453				zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf));
1454
1455				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1456				    "device is less than the minimum "
1457				    "size (%s)"), buf);
1458			}
1459			(void) zfs_error(hdl, EZFS_BADDEV, msg);
1460			break;
1461
1462		case ENOTSUP:
1463			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1464			    "pool must be upgraded to add these vdevs"));
1465			(void) zfs_error(hdl, EZFS_BADVERSION, msg);
1466			break;
1467
1468		case EDOM:
1469			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1470			    "root pool can not have multiple vdevs"
1471			    " or separate logs"));
1472			(void) zfs_error(hdl, EZFS_POOL_NOTSUP, msg);
1473			break;
1474
1475		case ENOTBLK:
1476			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1477			    "cache device must be a disk or disk slice"));
1478			(void) zfs_error(hdl, EZFS_BADDEV, msg);
1479			break;
1480
1481		default:
1482			(void) zpool_standard_error(hdl, errno, msg);
1483		}
1484
1485		ret = -1;
1486	} else {
1487		ret = 0;
1488	}
1489
1490	zcmd_free_nvlists(&zc);
1491
1492	return (ret);
1493}
1494
1495/*
1496 * Exports the pool from the system.  The caller must ensure that there are no
1497 * mounted datasets in the pool.
1498 */
1499static int
1500zpool_export_common(zpool_handle_t *zhp, boolean_t force, boolean_t hardforce,
1501    const char *log_str)
1502{
1503	zfs_cmd_t zc = { 0 };
1504	char msg[1024];
1505
1506	(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1507	    "cannot export '%s'"), zhp->zpool_name);
1508
1509	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1510	zc.zc_cookie = force;
1511	zc.zc_guid = hardforce;
1512	zc.zc_history = (uint64_t)(uintptr_t)log_str;
1513
1514	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_EXPORT, &zc) != 0) {
1515		switch (errno) {
1516		case EXDEV:
1517			zfs_error_aux(zhp->zpool_hdl, dgettext(TEXT_DOMAIN,
1518			    "use '-f' to override the following errors:\n"
1519			    "'%s' has an active shared spare which could be"
1520			    " used by other pools once '%s' is exported."),
1521			    zhp->zpool_name, zhp->zpool_name);
1522			return (zfs_error(zhp->zpool_hdl, EZFS_ACTIVE_SPARE,
1523			    msg));
1524		default:
1525			return (zpool_standard_error_fmt(zhp->zpool_hdl, errno,
1526			    msg));
1527		}
1528	}
1529
1530	return (0);
1531}
1532
1533int
1534zpool_export(zpool_handle_t *zhp, boolean_t force, const char *log_str)
1535{
1536	return (zpool_export_common(zhp, force, B_FALSE, log_str));
1537}
1538
1539int
1540zpool_export_force(zpool_handle_t *zhp, const char *log_str)
1541{
1542	return (zpool_export_common(zhp, B_TRUE, B_TRUE, log_str));
1543}
1544
1545static void
1546zpool_rewind_exclaim(libzfs_handle_t *hdl, const char *name, boolean_t dryrun,
1547    nvlist_t *config)
1548{
1549	nvlist_t *nv = NULL;
1550	uint64_t rewindto;
1551	int64_t loss = -1;
1552	struct tm t;
1553	char timestr[128];
1554
1555	if (!hdl->libzfs_printerr || config == NULL)
1556		return;
1557
1558	if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nv) != 0 ||
1559	    nvlist_lookup_nvlist(nv, ZPOOL_CONFIG_REWIND_INFO, &nv) != 0) {
1560		return;
1561	}
1562
1563	if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_TIME, &rewindto) != 0)
1564		return;
1565	(void) nvlist_lookup_int64(nv, ZPOOL_CONFIG_REWIND_TIME, &loss);
1566
1567	if (localtime_r((time_t *)&rewindto, &t) != NULL &&
1568	    strftime(timestr, 128, 0, &t) != 0) {
1569		if (dryrun) {
1570			(void) printf(dgettext(TEXT_DOMAIN,
1571			    "Would be able to return %s "
1572			    "to its state as of %s.\n"),
1573			    name, timestr);
1574		} else {
1575			(void) printf(dgettext(TEXT_DOMAIN,
1576			    "Pool %s returned to its state as of %s.\n"),
1577			    name, timestr);
1578		}
1579		if (loss > 120) {
1580			(void) printf(dgettext(TEXT_DOMAIN,
1581			    "%s approximately %lld "),
1582			    dryrun ? "Would discard" : "Discarded",
1583			    (loss + 30) / 60);
1584			(void) printf(dgettext(TEXT_DOMAIN,
1585			    "minutes of transactions.\n"));
1586		} else if (loss > 0) {
1587			(void) printf(dgettext(TEXT_DOMAIN,
1588			    "%s approximately %lld "),
1589			    dryrun ? "Would discard" : "Discarded", loss);
1590			(void) printf(dgettext(TEXT_DOMAIN,
1591			    "seconds of transactions.\n"));
1592		}
1593	}
1594}
1595
1596void
1597zpool_explain_recover(libzfs_handle_t *hdl, const char *name, int reason,
1598    nvlist_t *config)
1599{
1600	nvlist_t *nv = NULL;
1601	int64_t loss = -1;
1602	uint64_t edata = UINT64_MAX;
1603	uint64_t rewindto;
1604	struct tm t;
1605	char timestr[128];
1606
1607	if (!hdl->libzfs_printerr)
1608		return;
1609
1610	if (reason >= 0)
1611		(void) printf(dgettext(TEXT_DOMAIN, "action: "));
1612	else
1613		(void) printf(dgettext(TEXT_DOMAIN, "\t"));
1614
1615	/* All attempted rewinds failed if ZPOOL_CONFIG_LOAD_TIME missing */
1616	if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nv) != 0 ||
1617	    nvlist_lookup_nvlist(nv, ZPOOL_CONFIG_REWIND_INFO, &nv) != 0 ||
1618	    nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_TIME, &rewindto) != 0)
1619		goto no_info;
1620
1621	(void) nvlist_lookup_int64(nv, ZPOOL_CONFIG_REWIND_TIME, &loss);
1622	(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_DATA_ERRORS,
1623	    &edata);
1624
1625	(void) printf(dgettext(TEXT_DOMAIN,
1626	    "Recovery is possible, but will result in some data loss.\n"));
1627
1628	if (localtime_r((time_t *)&rewindto, &t) != NULL &&
1629	    strftime(timestr, 128, 0, &t) != 0) {
1630		(void) printf(dgettext(TEXT_DOMAIN,
1631		    "\tReturning the pool to its state as of %s\n"
1632		    "\tshould correct the problem.  "),
1633		    timestr);
1634	} else {
1635		(void) printf(dgettext(TEXT_DOMAIN,
1636		    "\tReverting the pool to an earlier state "
1637		    "should correct the problem.\n\t"));
1638	}
1639
1640	if (loss > 120) {
1641		(void) printf(dgettext(TEXT_DOMAIN,
1642		    "Approximately %lld minutes of data\n"
1643		    "\tmust be discarded, irreversibly.  "), (loss + 30) / 60);
1644	} else if (loss > 0) {
1645		(void) printf(dgettext(TEXT_DOMAIN,
1646		    "Approximately %lld seconds of data\n"
1647		    "\tmust be discarded, irreversibly.  "), loss);
1648	}
1649	if (edata != 0 && edata != UINT64_MAX) {
1650		if (edata == 1) {
1651			(void) printf(dgettext(TEXT_DOMAIN,
1652			    "After rewind, at least\n"
1653			    "\tone persistent user-data error will remain.  "));
1654		} else {
1655			(void) printf(dgettext(TEXT_DOMAIN,
1656			    "After rewind, several\n"
1657			    "\tpersistent user-data errors will remain.  "));
1658		}
1659	}
1660	(void) printf(dgettext(TEXT_DOMAIN,
1661	    "Recovery can be attempted\n\tby executing 'zpool %s -F %s'.  "),
1662	    reason >= 0 ? "clear" : "import", name);
1663
1664	(void) printf(dgettext(TEXT_DOMAIN,
1665	    "A scrub of the pool\n"
1666	    "\tis strongly recommended after recovery.\n"));
1667	return;
1668
1669no_info:
1670	(void) printf(dgettext(TEXT_DOMAIN,
1671	    "Destroy and re-create the pool from\n\ta backup source.\n"));
1672}
1673
1674/*
1675 * zpool_import() is a contracted interface. Should be kept the same
1676 * if possible.
1677 *
1678 * Applications should use zpool_import_props() to import a pool with
1679 * new properties value to be set.
1680 */
1681int
1682zpool_import(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
1683    char *altroot)
1684{
1685	nvlist_t *props = NULL;
1686	int ret;
1687
1688	if (altroot != NULL) {
1689		if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) {
1690			return (zfs_error_fmt(hdl, EZFS_NOMEM,
1691			    dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1692			    newname));
1693		}
1694
1695		if (nvlist_add_string(props,
1696		    zpool_prop_to_name(ZPOOL_PROP_ALTROOT), altroot) != 0 ||
1697		    nvlist_add_string(props,
1698		    zpool_prop_to_name(ZPOOL_PROP_CACHEFILE), "none") != 0) {
1699			nvlist_free(props);
1700			return (zfs_error_fmt(hdl, EZFS_NOMEM,
1701			    dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1702			    newname));
1703		}
1704	}
1705
1706	ret = zpool_import_props(hdl, config, newname, props,
1707	    ZFS_IMPORT_NORMAL);
1708	nvlist_free(props);
1709	return (ret);
1710}
1711
1712static void
1713print_vdev_tree(libzfs_handle_t *hdl, const char *name, nvlist_t *nv,
1714    int indent)
1715{
1716	nvlist_t **child;
1717	uint_t c, children;
1718	char *vname;
1719	uint64_t is_log = 0;
1720
1721	(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_LOG,
1722	    &is_log);
1723
1724	if (name != NULL)
1725		(void) printf("\t%*s%s%s\n", indent, "", name,
1726		    is_log ? " [log]" : "");
1727
1728	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
1729	    &child, &children) != 0)
1730		return;
1731
1732	for (c = 0; c < children; c++) {
1733		vname = zpool_vdev_name(hdl, NULL, child[c], VDEV_NAME_TYPE_ID);
1734		print_vdev_tree(hdl, vname, child[c], indent + 2);
1735		free(vname);
1736	}
1737}
1738
1739void
1740zpool_print_unsup_feat(nvlist_t *config)
1741{
1742	nvlist_t *nvinfo, *unsup_feat;
1743
1744	verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nvinfo) ==
1745	    0);
1746	verify(nvlist_lookup_nvlist(nvinfo, ZPOOL_CONFIG_UNSUP_FEAT,
1747	    &unsup_feat) == 0);
1748
1749	for (nvpair_t *nvp = nvlist_next_nvpair(unsup_feat, NULL); nvp != NULL;
1750	    nvp = nvlist_next_nvpair(unsup_feat, nvp)) {
1751		char *desc;
1752
1753		verify(nvpair_type(nvp) == DATA_TYPE_STRING);
1754		verify(nvpair_value_string(nvp, &desc) == 0);
1755
1756		if (strlen(desc) > 0)
1757			(void) printf("\t%s (%s)\n", nvpair_name(nvp), desc);
1758		else
1759			(void) printf("\t%s\n", nvpair_name(nvp));
1760	}
1761}
1762
1763/*
1764 * Import the given pool using the known configuration and a list of
1765 * properties to be set. The configuration should have come from
1766 * zpool_find_import(). The 'newname' parameters control whether the pool
1767 * is imported with a different name.
1768 */
1769int
1770zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
1771    nvlist_t *props, int flags)
1772{
1773	zfs_cmd_t zc = { 0 };
1774	zpool_load_policy_t policy;
1775	nvlist_t *nv = NULL;
1776	nvlist_t *nvinfo = NULL;
1777	nvlist_t *missing = NULL;
1778	char *thename;
1779	char *origname;
1780	int ret;
1781	int error = 0;
1782	char errbuf[1024];
1783
1784	verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
1785	    &origname) == 0);
1786
1787	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1788	    "cannot import pool '%s'"), origname);
1789
1790	if (newname != NULL) {
1791		if (!zpool_name_valid(hdl, B_FALSE, newname))
1792			return (zfs_error_fmt(hdl, EZFS_INVALIDNAME,
1793			    dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1794			    newname));
1795		thename = (char *)newname;
1796	} else {
1797		thename = origname;
1798	}
1799
1800	if (props != NULL) {
1801		uint64_t version;
1802		prop_flags_t flags = { .create = B_FALSE, .import = B_TRUE };
1803
1804		verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
1805		    &version) == 0);
1806
1807		if ((props = zpool_valid_proplist(hdl, origname,
1808		    props, version, flags, errbuf)) == NULL)
1809			return (-1);
1810		if (zcmd_write_src_nvlist(hdl, &zc, props) != 0) {
1811			nvlist_free(props);
1812			return (-1);
1813		}
1814		nvlist_free(props);
1815	}
1816
1817	(void) strlcpy(zc.zc_name, thename, sizeof (zc.zc_name));
1818
1819	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
1820	    &zc.zc_guid) == 0);
1821
1822	if (zcmd_write_conf_nvlist(hdl, &zc, config) != 0) {
1823		zcmd_free_nvlists(&zc);
1824		return (-1);
1825	}
1826	if (zcmd_alloc_dst_nvlist(hdl, &zc, zc.zc_nvlist_conf_size * 2) != 0) {
1827		zcmd_free_nvlists(&zc);
1828		return (-1);
1829	}
1830
1831	zc.zc_cookie = flags;
1832	while ((ret = zfs_ioctl(hdl, ZFS_IOC_POOL_IMPORT, &zc)) != 0 &&
1833	    errno == ENOMEM) {
1834		if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
1835			zcmd_free_nvlists(&zc);
1836			return (-1);
1837		}
1838	}
1839	if (ret != 0)
1840		error = errno;
1841
1842	(void) zcmd_read_dst_nvlist(hdl, &zc, &nv);
1843
1844	zcmd_free_nvlists(&zc);
1845
1846	zpool_get_load_policy(config, &policy);
1847
1848	if (error) {
1849		char desc[1024];
1850		char aux[256];
1851
1852		/*
1853		 * Dry-run failed, but we print out what success
1854		 * looks like if we found a best txg
1855		 */
1856		if (policy.zlp_rewind & ZPOOL_TRY_REWIND) {
1857			zpool_rewind_exclaim(hdl, newname ? origname : thename,
1858			    B_TRUE, nv);
1859			nvlist_free(nv);
1860			return (-1);
1861		}
1862
1863		if (newname == NULL)
1864			(void) snprintf(desc, sizeof (desc),
1865			    dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1866			    thename);
1867		else
1868			(void) snprintf(desc, sizeof (desc),
1869			    dgettext(TEXT_DOMAIN, "cannot import '%s' as '%s'"),
1870			    origname, thename);
1871
1872		switch (error) {
1873		case ENOTSUP:
1874			if (nv != NULL && nvlist_lookup_nvlist(nv,
1875			    ZPOOL_CONFIG_LOAD_INFO, &nvinfo) == 0 &&
1876			    nvlist_exists(nvinfo, ZPOOL_CONFIG_UNSUP_FEAT)) {
1877				(void) printf(dgettext(TEXT_DOMAIN, "This "
1878				    "pool uses the following feature(s) not "
1879				    "supported by this system:\n"));
1880				zpool_print_unsup_feat(nv);
1881				if (nvlist_exists(nvinfo,
1882				    ZPOOL_CONFIG_CAN_RDONLY)) {
1883					(void) printf(dgettext(TEXT_DOMAIN,
1884					    "All unsupported features are only "
1885					    "required for writing to the pool."
1886					    "\nThe pool can be imported using "
1887					    "'-o readonly=on'.\n"));
1888				}
1889			}
1890			/*
1891			 * Unsupported version.
1892			 */
1893			(void) zfs_error(hdl, EZFS_BADVERSION, desc);
1894			break;
1895
1896		case EREMOTEIO:
1897			if (nv != NULL && nvlist_lookup_nvlist(nv,
1898			    ZPOOL_CONFIG_LOAD_INFO, &nvinfo) == 0) {
1899				char *hostname = "<unknown>";
1900				uint64_t hostid = 0;
1901				mmp_state_t mmp_state;
1902
1903				mmp_state = fnvlist_lookup_uint64(nvinfo,
1904				    ZPOOL_CONFIG_MMP_STATE);
1905
1906				if (nvlist_exists(nvinfo,
1907				    ZPOOL_CONFIG_MMP_HOSTNAME))
1908					hostname = fnvlist_lookup_string(nvinfo,
1909					    ZPOOL_CONFIG_MMP_HOSTNAME);
1910
1911				if (nvlist_exists(nvinfo,
1912				    ZPOOL_CONFIG_MMP_HOSTID))
1913					hostid = fnvlist_lookup_uint64(nvinfo,
1914					    ZPOOL_CONFIG_MMP_HOSTID);
1915
1916				if (mmp_state == MMP_STATE_ACTIVE) {
1917					(void) snprintf(aux, sizeof (aux),
1918					    dgettext(TEXT_DOMAIN, "pool is imp"
1919					    "orted on host '%s' (hostid=%lx).\n"
1920					    "Export the pool on the other "
1921					    "system, then run 'zpool import'."),
1922					    hostname, (unsigned long) hostid);
1923				} else if (mmp_state == MMP_STATE_NO_HOSTID) {
1924					(void) snprintf(aux, sizeof (aux),
1925					    dgettext(TEXT_DOMAIN, "pool has "
1926					    "the multihost property on and "
1927					    "the\nsystem's hostid is not "
1928					    "set.\n"));
1929				}
1930
1931				(void) zfs_error_aux(hdl, aux);
1932			}
1933			(void) zfs_error(hdl, EZFS_ACTIVE_POOL, desc);
1934			break;
1935
1936		case EINVAL:
1937			(void) zfs_error(hdl, EZFS_INVALCONFIG, desc);
1938			break;
1939
1940		case EROFS:
1941			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1942			    "one or more devices is read only"));
1943			(void) zfs_error(hdl, EZFS_BADDEV, desc);
1944			break;
1945
1946		case ENXIO:
1947			if (nv && nvlist_lookup_nvlist(nv,
1948			    ZPOOL_CONFIG_LOAD_INFO, &nvinfo) == 0 &&
1949			    nvlist_lookup_nvlist(nvinfo,
1950			    ZPOOL_CONFIG_MISSING_DEVICES, &missing) == 0) {
1951				(void) printf(dgettext(TEXT_DOMAIN,
1952				    "The devices below are missing or "
1953				    "corrupted, use '-m' to import the pool "
1954				    "anyway:\n"));
1955				print_vdev_tree(hdl, NULL, missing, 2);
1956				(void) printf("\n");
1957			}
1958			(void) zpool_standard_error(hdl, error, desc);
1959			break;
1960
1961		case EEXIST:
1962			(void) zpool_standard_error(hdl, error, desc);
1963			break;
1964		case ENAMETOOLONG:
1965			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1966			    "new name of at least one dataset is longer than "
1967			    "the maximum allowable length"));
1968			(void) zfs_error(hdl, EZFS_NAMETOOLONG, desc);
1969			break;
1970		default:
1971			(void) zpool_standard_error(hdl, error, desc);
1972			zpool_explain_recover(hdl,
1973			    newname ? origname : thename, -error, nv);
1974			break;
1975		}
1976
1977		nvlist_free(nv);
1978		ret = -1;
1979	} else {
1980		zpool_handle_t *zhp;
1981
1982		/*
1983		 * This should never fail, but play it safe anyway.
1984		 */
1985		if (zpool_open_silent(hdl, thename, &zhp) != 0)
1986			ret = -1;
1987		else if (zhp != NULL)
1988			zpool_close(zhp);
1989		if (policy.zlp_rewind &
1990		    (ZPOOL_DO_REWIND | ZPOOL_TRY_REWIND)) {
1991			zpool_rewind_exclaim(hdl, newname ? origname : thename,
1992			    ((policy.zlp_rewind & ZPOOL_TRY_REWIND) != 0), nv);
1993		}
1994		nvlist_free(nv);
1995		return (0);
1996	}
1997
1998	return (ret);
1999}
2000
2001/*
2002 * Scan the pool.
2003 */
2004int
2005zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func, pool_scrub_cmd_t cmd)
2006{
2007	zfs_cmd_t zc = { 0 };
2008	char msg[1024];
2009	int err;
2010	libzfs_handle_t *hdl = zhp->zpool_hdl;
2011
2012	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2013	zc.zc_cookie = func;
2014	zc.zc_flags = cmd;
2015
2016	if (zfs_ioctl(hdl, ZFS_IOC_POOL_SCAN, &zc) == 0)
2017		return (0);
2018
2019	err = errno;
2020
2021	/* ECANCELED on a scrub means we resumed a paused scrub */
2022	if (err == ECANCELED && func == POOL_SCAN_SCRUB &&
2023	    cmd == POOL_SCRUB_NORMAL)
2024		return (0);
2025
2026	if (err == ENOENT && func != POOL_SCAN_NONE && cmd == POOL_SCRUB_NORMAL)
2027		return (0);
2028
2029	if (func == POOL_SCAN_SCRUB) {
2030		if (cmd == POOL_SCRUB_PAUSE) {
2031			(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
2032			    "cannot pause scrubbing %s"), zc.zc_name);
2033		} else {
2034			assert(cmd == POOL_SCRUB_NORMAL);
2035			(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
2036			    "cannot scrub %s"), zc.zc_name);
2037		}
2038	} else if (func == POOL_SCAN_NONE) {
2039		(void) snprintf(msg, sizeof (msg),
2040		    dgettext(TEXT_DOMAIN, "cannot cancel scrubbing %s"),
2041		    zc.zc_name);
2042	} else {
2043		assert(!"unexpected result");
2044	}
2045
2046	if (err == EBUSY) {
2047		nvlist_t *nvroot;
2048		pool_scan_stat_t *ps = NULL;
2049		uint_t psc;
2050
2051		verify(nvlist_lookup_nvlist(zhp->zpool_config,
2052		    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
2053		(void) nvlist_lookup_uint64_array(nvroot,
2054		    ZPOOL_CONFIG_SCAN_STATS, (uint64_t **)&ps, &psc);
2055		if (ps && ps->pss_func == POOL_SCAN_SCRUB) {
2056			if (cmd == POOL_SCRUB_PAUSE)
2057				return (zfs_error(hdl, EZFS_SCRUB_PAUSED, msg));
2058			else
2059				return (zfs_error(hdl, EZFS_SCRUBBING, msg));
2060		} else {
2061			return (zfs_error(hdl, EZFS_RESILVERING, msg));
2062		}
2063	} else if (err == ENOENT) {
2064		return (zfs_error(hdl, EZFS_NO_SCRUB, msg));
2065	} else {
2066		return (zpool_standard_error(hdl, err, msg));
2067	}
2068}
2069
2070static int
2071xlate_init_err(int err)
2072{
2073	switch (err) {
2074	case ENODEV:
2075		return (EZFS_NODEVICE);
2076	case EINVAL:
2077	case EROFS:
2078		return (EZFS_BADDEV);
2079	case EBUSY:
2080		return (EZFS_INITIALIZING);
2081	case ESRCH:
2082		return (EZFS_NO_INITIALIZE);
2083	}
2084	return (err);
2085}
2086
2087/*
2088 * Begin, suspend, or cancel the initialization (initializing of all free
2089 * blocks) for the given vdevs in the given pool.
2090 */
2091int
2092zpool_initialize(zpool_handle_t *zhp, pool_initialize_func_t cmd_type,
2093    nvlist_t *vds)
2094{
2095	char msg[1024];
2096	libzfs_handle_t *hdl = zhp->zpool_hdl;
2097
2098	nvlist_t *errlist;
2099
2100	/* translate vdev names to guids */
2101	nvlist_t *vdev_guids = fnvlist_alloc();
2102	nvlist_t *guids_to_paths = fnvlist_alloc();
2103	boolean_t spare, cache;
2104	nvlist_t *tgt;
2105	nvpair_t *elem;
2106
2107	for (elem = nvlist_next_nvpair(vds, NULL); elem != NULL;
2108	    elem = nvlist_next_nvpair(vds, elem)) {
2109		char *vd_path = nvpair_name(elem);
2110		tgt = zpool_find_vdev(zhp, vd_path, &spare, &cache, NULL);
2111
2112		if ((tgt == NULL) || cache || spare) {
2113			(void) snprintf(msg, sizeof (msg),
2114			    dgettext(TEXT_DOMAIN, "cannot initialize '%s'"),
2115			    vd_path);
2116			int err = (tgt == NULL) ? EZFS_NODEVICE :
2117			    (spare ? EZFS_ISSPARE : EZFS_ISL2CACHE);
2118			fnvlist_free(vdev_guids);
2119			fnvlist_free(guids_to_paths);
2120			return (zfs_error(hdl, err, msg));
2121		}
2122
2123		uint64_t guid = fnvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID);
2124		fnvlist_add_uint64(vdev_guids, vd_path, guid);
2125
2126		(void) snprintf(msg, sizeof (msg), "%llu", guid);
2127		fnvlist_add_string(guids_to_paths, msg, vd_path);
2128	}
2129
2130	int err = lzc_initialize(zhp->zpool_name, cmd_type, vdev_guids,
2131	    &errlist);
2132	fnvlist_free(vdev_guids);
2133
2134	if (err == 0) {
2135		fnvlist_free(guids_to_paths);
2136		return (0);
2137	}
2138
2139	nvlist_t *vd_errlist = NULL;
2140	if (errlist != NULL) {
2141		vd_errlist = fnvlist_lookup_nvlist(errlist,
2142		    ZPOOL_INITIALIZE_VDEVS);
2143	}
2144
2145	(void) snprintf(msg, sizeof (msg),
2146	    dgettext(TEXT_DOMAIN, "operation failed"));
2147
2148	for (elem = nvlist_next_nvpair(vd_errlist, NULL); elem != NULL;
2149	    elem = nvlist_next_nvpair(vd_errlist, elem)) {
2150		int64_t vd_error = xlate_init_err(fnvpair_value_int64(elem));
2151		char *path = fnvlist_lookup_string(guids_to_paths,
2152		    nvpair_name(elem));
2153		(void) zfs_error_fmt(hdl, vd_error, "cannot initialize '%s'",
2154		    path);
2155	}
2156
2157	fnvlist_free(guids_to_paths);
2158	if (vd_errlist != NULL)
2159		return (-1);
2160
2161	return (zpool_standard_error(hdl, err, msg));
2162}
2163
2164#ifdef illumos
2165/*
2166 * This provides a very minimal check whether a given string is likely a
2167 * c#t#d# style string.  Users of this are expected to do their own
2168 * verification of the s# part.
2169 */
2170#define	CTD_CHECK(str)  (str && str[0] == 'c' && isdigit(str[1]))
2171
2172/*
2173 * More elaborate version for ones which may start with "/dev/dsk/"
2174 * and the like.
2175 */
2176static int
2177ctd_check_path(char *str)
2178{
2179	/*
2180	 * If it starts with a slash, check the last component.
2181	 */
2182	if (str && str[0] == '/') {
2183		char *tmp = strrchr(str, '/');
2184
2185		/*
2186		 * If it ends in "/old", check the second-to-last
2187		 * component of the string instead.
2188		 */
2189		if (tmp != str && strcmp(tmp, "/old") == 0) {
2190			for (tmp--; *tmp != '/'; tmp--)
2191				;
2192		}
2193		str = tmp + 1;
2194	}
2195	return (CTD_CHECK(str));
2196}
2197#endif
2198
2199/*
2200 * Find a vdev that matches the search criteria specified. We use the
2201 * the nvpair name to determine how we should look for the device.
2202 * 'avail_spare' is set to TRUE if the provided guid refers to an AVAIL
2203 * spare; but FALSE if its an INUSE spare.
2204 */
2205static nvlist_t *
2206vdev_to_nvlist_iter(nvlist_t *nv, nvlist_t *search, boolean_t *avail_spare,
2207    boolean_t *l2cache, boolean_t *log)
2208{
2209	uint_t c, children;
2210	nvlist_t **child;
2211	nvlist_t *ret;
2212	uint64_t is_log;
2213	char *srchkey;
2214	nvpair_t *pair = nvlist_next_nvpair(search, NULL);
2215
2216	/* Nothing to look for */
2217	if (search == NULL || pair == NULL)
2218		return (NULL);
2219
2220	/* Obtain the key we will use to search */
2221	srchkey = nvpair_name(pair);
2222
2223	switch (nvpair_type(pair)) {
2224	case DATA_TYPE_UINT64:
2225		if (strcmp(srchkey, ZPOOL_CONFIG_GUID) == 0) {
2226			uint64_t srchval, theguid;
2227
2228			verify(nvpair_value_uint64(pair, &srchval) == 0);
2229			verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
2230			    &theguid) == 0);
2231			if (theguid == srchval)
2232				return (nv);
2233		}
2234		break;
2235
2236	case DATA_TYPE_STRING: {
2237		char *srchval, *val;
2238
2239		verify(nvpair_value_string(pair, &srchval) == 0);
2240		if (nvlist_lookup_string(nv, srchkey, &val) != 0)
2241			break;
2242
2243		/*
2244		 * Search for the requested value. Special cases:
2245		 *
2246		 * - ZPOOL_CONFIG_PATH for whole disk entries. To support
2247		 *   UEFI boot, these end in "s0" or "s0/old" or "s1" or
2248		 *   "s1/old".   The "s0" or "s1" part is hidden from the user,
2249		 *   but included in the string, so this matches around it.
2250		 * - looking for a top-level vdev name (i.e. ZPOOL_CONFIG_TYPE).
2251		 *
2252		 * Otherwise, all other searches are simple string compares.
2253		 */
2254#ifdef illumos
2255		if (strcmp(srchkey, ZPOOL_CONFIG_PATH) == 0 &&
2256		    ctd_check_path(val)) {
2257			uint64_t wholedisk = 0;
2258
2259			(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
2260			    &wholedisk);
2261			if (wholedisk) {
2262				int slen = strlen(srchval);
2263				int vlen = strlen(val);
2264
2265				if (slen != vlen - 2)
2266					break;
2267
2268				/*
2269				 * make_leaf_vdev() should only set
2270				 * wholedisk for ZPOOL_CONFIG_PATHs which
2271				 * will include "/dev/dsk/", giving plenty of
2272				 * room for the indices used next.
2273				 */
2274				ASSERT(vlen >= 6);
2275
2276				/*
2277				 * strings identical except trailing "s0"
2278				 */
2279				if ((strcmp(&val[vlen - 2], "s0") == 0 ||
2280				    strcmp(&val[vlen - 2], "s1") == 0) &&
2281				    strncmp(srchval, val, slen) == 0)
2282					return (nv);
2283
2284				/*
2285				 * strings identical except trailing "s0/old"
2286				 */
2287				if ((strcmp(&val[vlen - 6], "s0/old") == 0 ||
2288				    strcmp(&val[vlen - 6], "s1/old") == 0) &&
2289				    strcmp(&srchval[slen - 4], "/old") == 0 &&
2290				    strncmp(srchval, val, slen - 4) == 0)
2291					return (nv);
2292
2293				break;
2294			}
2295		} else if (strcmp(srchkey, ZPOOL_CONFIG_TYPE) == 0 && val) {
2296#else
2297		if (strcmp(srchkey, ZPOOL_CONFIG_TYPE) == 0 && val) {
2298#endif
2299			char *type, *idx, *end, *p;
2300			uint64_t id, vdev_id;
2301
2302			/*
2303			 * Determine our vdev type, keeping in mind
2304			 * that the srchval is composed of a type and
2305			 * vdev id pair (i.e. mirror-4).
2306			 */
2307			if ((type = strdup(srchval)) == NULL)
2308				return (NULL);
2309
2310			if ((p = strrchr(type, '-')) == NULL) {
2311				free(type);
2312				break;
2313			}
2314			idx = p + 1;
2315			*p = '\0';
2316
2317			/*
2318			 * If the types don't match then keep looking.
2319			 */
2320			if (strncmp(val, type, strlen(val)) != 0) {
2321				free(type);
2322				break;
2323			}
2324
2325			verify(zpool_vdev_is_interior(type));
2326			verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID,
2327			    &id) == 0);
2328
2329			errno = 0;
2330			vdev_id = strtoull(idx, &end, 10);
2331
2332			free(type);
2333			if (errno != 0)
2334				return (NULL);
2335
2336			/*
2337			 * Now verify that we have the correct vdev id.
2338			 */
2339			if (vdev_id == id)
2340				return (nv);
2341		}
2342
2343		/*
2344		 * Common case
2345		 */
2346		if (strcmp(srchval, val) == 0)
2347			return (nv);
2348		break;
2349	}
2350
2351	default:
2352		break;
2353	}
2354
2355	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
2356	    &child, &children) != 0)
2357		return (NULL);
2358
2359	for (c = 0; c < children; c++) {
2360		if ((ret = vdev_to_nvlist_iter(child[c], search,
2361		    avail_spare, l2cache, NULL)) != NULL) {
2362			/*
2363			 * The 'is_log' value is only set for the toplevel
2364			 * vdev, not the leaf vdevs.  So we always lookup the
2365			 * log device from the root of the vdev tree (where
2366			 * 'log' is non-NULL).
2367			 */
2368			if (log != NULL &&
2369			    nvlist_lookup_uint64(child[c],
2370			    ZPOOL_CONFIG_IS_LOG, &is_log) == 0 &&
2371			    is_log) {
2372				*log = B_TRUE;
2373			}
2374			return (ret);
2375		}
2376	}
2377
2378	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
2379	    &child, &children) == 0) {
2380		for (c = 0; c < children; c++) {
2381			if ((ret = vdev_to_nvlist_iter(child[c], search,
2382			    avail_spare, l2cache, NULL)) != NULL) {
2383				*avail_spare = B_TRUE;
2384				return (ret);
2385			}
2386		}
2387	}
2388
2389	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
2390	    &child, &children) == 0) {
2391		for (c = 0; c < children; c++) {
2392			if ((ret = vdev_to_nvlist_iter(child[c], search,
2393			    avail_spare, l2cache, NULL)) != NULL) {
2394				*l2cache = B_TRUE;
2395				return (ret);
2396			}
2397		}
2398	}
2399
2400	return (NULL);
2401}
2402
2403/*
2404 * Given a physical path (minus the "/devices" prefix), find the
2405 * associated vdev.
2406 */
2407nvlist_t *
2408zpool_find_vdev_by_physpath(zpool_handle_t *zhp, const char *ppath,
2409    boolean_t *avail_spare, boolean_t *l2cache, boolean_t *log)
2410{
2411	nvlist_t *search, *nvroot, *ret;
2412
2413	verify(nvlist_alloc(&search, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2414	verify(nvlist_add_string(search, ZPOOL_CONFIG_PHYS_PATH, ppath) == 0);
2415
2416	verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
2417	    &nvroot) == 0);
2418
2419	*avail_spare = B_FALSE;
2420	*l2cache = B_FALSE;
2421	if (log != NULL)
2422		*log = B_FALSE;
2423	ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log);
2424	nvlist_free(search);
2425
2426	return (ret);
2427}
2428
2429/*
2430 * Determine if we have an "interior" top-level vdev (i.e mirror/raidz).
2431 */
2432static boolean_t
2433zpool_vdev_is_interior(const char *name)
2434{
2435	if (strncmp(name, VDEV_TYPE_RAIDZ, strlen(VDEV_TYPE_RAIDZ)) == 0 ||
2436	    strncmp(name, VDEV_TYPE_SPARE, strlen(VDEV_TYPE_SPARE)) == 0 ||
2437	    strncmp(name,
2438	    VDEV_TYPE_REPLACING, strlen(VDEV_TYPE_REPLACING)) == 0 ||
2439	    strncmp(name, VDEV_TYPE_MIRROR, strlen(VDEV_TYPE_MIRROR)) == 0)
2440		return (B_TRUE);
2441	return (B_FALSE);
2442}
2443
2444nvlist_t *
2445zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare,
2446    boolean_t *l2cache, boolean_t *log)
2447{
2448	char buf[MAXPATHLEN];
2449	char *end;
2450	nvlist_t *nvroot, *search, *ret;
2451	uint64_t guid;
2452
2453	verify(nvlist_alloc(&search, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2454
2455	guid = strtoull(path, &end, 10);
2456	if (guid != 0 && *end == '\0') {
2457		verify(nvlist_add_uint64(search, ZPOOL_CONFIG_GUID, guid) == 0);
2458	} else if (zpool_vdev_is_interior(path)) {
2459		verify(nvlist_add_string(search, ZPOOL_CONFIG_TYPE, path) == 0);
2460	} else if (path[0] != '/') {
2461		(void) snprintf(buf, sizeof (buf), "%s%s", _PATH_DEV, path);
2462		verify(nvlist_add_string(search, ZPOOL_CONFIG_PATH, buf) == 0);
2463	} else {
2464		verify(nvlist_add_string(search, ZPOOL_CONFIG_PATH, path) == 0);
2465	}
2466
2467	verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
2468	    &nvroot) == 0);
2469
2470	*avail_spare = B_FALSE;
2471	*l2cache = B_FALSE;
2472	if (log != NULL)
2473		*log = B_FALSE;
2474	ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log);
2475	nvlist_free(search);
2476
2477	return (ret);
2478}
2479
2480static int
2481vdev_is_online(nvlist_t *nv)
2482{
2483	uint64_t ival;
2484
2485	if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_OFFLINE, &ival) == 0 ||
2486	    nvlist_lookup_uint64(nv, ZPOOL_CONFIG_FAULTED, &ival) == 0 ||
2487	    nvlist_lookup_uint64(nv, ZPOOL_CONFIG_REMOVED, &ival) == 0)
2488		return (0);
2489
2490	return (1);
2491}
2492
2493/*
2494 * Helper function for zpool_get_physpaths().
2495 */
2496static int
2497vdev_get_one_physpath(nvlist_t *config, char *physpath, size_t physpath_size,
2498    size_t *bytes_written)
2499{
2500	size_t bytes_left, pos, rsz;
2501	char *tmppath;
2502	const char *format;
2503
2504	if (nvlist_lookup_string(config, ZPOOL_CONFIG_PHYS_PATH,
2505	    &tmppath) != 0)
2506		return (EZFS_NODEVICE);
2507
2508	pos = *bytes_written;
2509	bytes_left = physpath_size - pos;
2510	format = (pos == 0) ? "%s" : " %s";
2511
2512	rsz = snprintf(physpath + pos, bytes_left, format, tmppath);
2513	*bytes_written += rsz;
2514
2515	if (rsz >= bytes_left) {
2516		/* if physpath was not copied properly, clear it */
2517		if (bytes_left != 0) {
2518			physpath[pos] = 0;
2519		}
2520		return (EZFS_NOSPC);
2521	}
2522	return (0);
2523}
2524
2525static int
2526vdev_get_physpaths(nvlist_t *nv, char *physpath, size_t phypath_size,
2527    size_t *rsz, boolean_t is_spare)
2528{
2529	char *type;
2530	int ret;
2531
2532	if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0)
2533		return (EZFS_INVALCONFIG);
2534
2535	if (strcmp(type, VDEV_TYPE_DISK) == 0) {
2536		/*
2537		 * An active spare device has ZPOOL_CONFIG_IS_SPARE set.
2538		 * For a spare vdev, we only want to boot from the active
2539		 * spare device.
2540		 */
2541		if (is_spare) {
2542			uint64_t spare = 0;
2543			(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_SPARE,
2544			    &spare);
2545			if (!spare)
2546				return (EZFS_INVALCONFIG);
2547		}
2548
2549		if (vdev_is_online(nv)) {
2550			if ((ret = vdev_get_one_physpath(nv, physpath,
2551			    phypath_size, rsz)) != 0)
2552				return (ret);
2553		}
2554	} else if (strcmp(type, VDEV_TYPE_MIRROR) == 0 ||
2555	    strcmp(type, VDEV_TYPE_RAIDZ) == 0 ||
2556	    strcmp(type, VDEV_TYPE_REPLACING) == 0 ||
2557	    (is_spare = (strcmp(type, VDEV_TYPE_SPARE) == 0))) {
2558		nvlist_t **child;
2559		uint_t count;
2560		int i, ret;
2561
2562		if (nvlist_lookup_nvlist_array(nv,
2563		    ZPOOL_CONFIG_CHILDREN, &child, &count) != 0)
2564			return (EZFS_INVALCONFIG);
2565
2566		for (i = 0; i < count; i++) {
2567			ret = vdev_get_physpaths(child[i], physpath,
2568			    phypath_size, rsz, is_spare);
2569			if (ret == EZFS_NOSPC)
2570				return (ret);
2571		}
2572	}
2573
2574	return (EZFS_POOL_INVALARG);
2575}
2576
2577/*
2578 * Get phys_path for a root pool config.
2579 * Return 0 on success; non-zero on failure.
2580 */
2581static int
2582zpool_get_config_physpath(nvlist_t *config, char *physpath, size_t phypath_size)
2583{
2584	size_t rsz;
2585	nvlist_t *vdev_root;
2586	nvlist_t **child;
2587	uint_t count;
2588	char *type;
2589
2590	rsz = 0;
2591
2592	if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
2593	    &vdev_root) != 0)
2594		return (EZFS_INVALCONFIG);
2595
2596	if (nvlist_lookup_string(vdev_root, ZPOOL_CONFIG_TYPE, &type) != 0 ||
2597	    nvlist_lookup_nvlist_array(vdev_root, ZPOOL_CONFIG_CHILDREN,
2598	    &child, &count) != 0)
2599		return (EZFS_INVALCONFIG);
2600
2601	/*
2602	 * root pool can only have a single top-level vdev.
2603	 */
2604	if (strcmp(type, VDEV_TYPE_ROOT) != 0 || count != 1)
2605		return (EZFS_POOL_INVALARG);
2606
2607	(void) vdev_get_physpaths(child[0], physpath, phypath_size, &rsz,
2608	    B_FALSE);
2609
2610	/* No online devices */
2611	if (rsz == 0)
2612		return (EZFS_NODEVICE);
2613
2614	return (0);
2615}
2616
2617/*
2618 * Get phys_path for a root pool
2619 * Return 0 on success; non-zero on failure.
2620 */
2621int
2622zpool_get_physpath(zpool_handle_t *zhp, char *physpath, size_t phypath_size)
2623{
2624	return (zpool_get_config_physpath(zhp->zpool_config, physpath,
2625	    phypath_size));
2626}
2627
2628/*
2629 * If the device has being dynamically expanded then we need to relabel
2630 * the disk to use the new unallocated space.
2631 */
2632static int
2633zpool_relabel_disk(libzfs_handle_t *hdl, const char *name)
2634{
2635#ifdef illumos
2636	char path[MAXPATHLEN];
2637	char errbuf[1024];
2638	int fd, error;
2639	int (*_efi_use_whole_disk)(int);
2640
2641	if ((_efi_use_whole_disk = (int (*)(int))dlsym(RTLD_DEFAULT,
2642	    "efi_use_whole_disk")) == NULL)
2643		return (-1);
2644
2645	(void) snprintf(path, sizeof (path), "%s/%s", ZFS_RDISK_ROOT, name);
2646
2647	if ((fd = open(path, O_RDWR | O_NDELAY)) < 0) {
2648		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
2649		    "relabel '%s': unable to open device"), name);
2650		return (zfs_error(hdl, EZFS_OPENFAILED, errbuf));
2651	}
2652
2653	/*
2654	 * It's possible that we might encounter an error if the device
2655	 * does not have any unallocated space left. If so, we simply
2656	 * ignore that error and continue on.
2657	 */
2658	error = _efi_use_whole_disk(fd);
2659	(void) close(fd);
2660	if (error && error != VT_ENOSPC) {
2661		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
2662		    "relabel '%s': unable to read disk capacity"), name);
2663		return (zfs_error(hdl, EZFS_NOCAP, errbuf));
2664	}
2665#endif	/* illumos */
2666	return (0);
2667}
2668
2669/*
2670 * Bring the specified vdev online.   The 'flags' parameter is a set of the
2671 * ZFS_ONLINE_* flags.
2672 */
2673int
2674zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags,
2675    vdev_state_t *newstate)
2676{
2677	zfs_cmd_t zc = { 0 };
2678	char msg[1024];
2679	char *pathname;
2680	nvlist_t *tgt;
2681	boolean_t avail_spare, l2cache, islog;
2682	libzfs_handle_t *hdl = zhp->zpool_hdl;
2683
2684	if (flags & ZFS_ONLINE_EXPAND) {
2685		(void) snprintf(msg, sizeof (msg),
2686		    dgettext(TEXT_DOMAIN, "cannot expand %s"), path);
2687	} else {
2688		(void) snprintf(msg, sizeof (msg),
2689		    dgettext(TEXT_DOMAIN, "cannot online %s"), path);
2690	}
2691
2692	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2693	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
2694	    &islog)) == NULL)
2695		return (zfs_error(hdl, EZFS_NODEVICE, msg));
2696
2697	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2698
2699	if (avail_spare)
2700		return (zfs_error(hdl, EZFS_ISSPARE, msg));
2701
2702	if ((flags & ZFS_ONLINE_EXPAND ||
2703	    zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOEXPAND, NULL)) &&
2704	    nvlist_lookup_string(tgt, ZPOOL_CONFIG_PATH, &pathname) == 0) {
2705		uint64_t wholedisk = 0;
2706
2707		(void) nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_WHOLE_DISK,
2708		    &wholedisk);
2709
2710		/*
2711		 * XXX - L2ARC 1.0 devices can't support expansion.
2712		 */
2713		if (l2cache) {
2714			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2715			    "cannot expand cache devices"));
2716			return (zfs_error(hdl, EZFS_VDEVNOTSUP, msg));
2717		}
2718
2719		if (wholedisk) {
2720			pathname += strlen(ZFS_DISK_ROOT) + 1;
2721			(void) zpool_relabel_disk(hdl, pathname);
2722		}
2723	}
2724
2725	zc.zc_cookie = VDEV_STATE_ONLINE;
2726	zc.zc_obj = flags;
2727
2728	if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) != 0) {
2729		if (errno == EINVAL) {
2730			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "was split "
2731			    "from this pool into a new one.  Use '%s' "
2732			    "instead"), "zpool detach");
2733			return (zfs_error(hdl, EZFS_POSTSPLIT_ONLINE, msg));
2734		}
2735		return (zpool_standard_error(hdl, errno, msg));
2736	}
2737
2738	*newstate = zc.zc_cookie;
2739	return (0);
2740}
2741
2742/*
2743 * Take the specified vdev offline
2744 */
2745int
2746zpool_vdev_offline(zpool_handle_t *zhp, const char *path, boolean_t istmp)
2747{
2748	zfs_cmd_t zc = { 0 };
2749	char msg[1024];
2750	nvlist_t *tgt;
2751	boolean_t avail_spare, l2cache;
2752	libzfs_handle_t *hdl = zhp->zpool_hdl;
2753
2754	(void) snprintf(msg, sizeof (msg),
2755	    dgettext(TEXT_DOMAIN, "cannot offline %s"), path);
2756
2757	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2758	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
2759	    NULL)) == NULL)
2760		return (zfs_error(hdl, EZFS_NODEVICE, msg));
2761
2762	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2763
2764	if (avail_spare)
2765		return (zfs_error(hdl, EZFS_ISSPARE, msg));
2766
2767	zc.zc_cookie = VDEV_STATE_OFFLINE;
2768	zc.zc_obj = istmp ? ZFS_OFFLINE_TEMPORARY : 0;
2769
2770	if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
2771		return (0);
2772
2773	switch (errno) {
2774	case EBUSY:
2775
2776		/*
2777		 * There are no other replicas of this device.
2778		 */
2779		return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
2780
2781	case EEXIST:
2782		/*
2783		 * The log device has unplayed logs
2784		 */
2785		return (zfs_error(hdl, EZFS_UNPLAYED_LOGS, msg));
2786
2787	default:
2788		return (zpool_standard_error(hdl, errno, msg));
2789	}
2790}
2791
2792/*
2793 * Mark the given vdev faulted.
2794 */
2795int
2796zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
2797{
2798	zfs_cmd_t zc = { 0 };
2799	char msg[1024];
2800	libzfs_handle_t *hdl = zhp->zpool_hdl;
2801
2802	(void) snprintf(msg, sizeof (msg),
2803	    dgettext(TEXT_DOMAIN, "cannot fault %llu"), guid);
2804
2805	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2806	zc.zc_guid = guid;
2807	zc.zc_cookie = VDEV_STATE_FAULTED;
2808	zc.zc_obj = aux;
2809
2810	if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
2811		return (0);
2812
2813	switch (errno) {
2814	case EBUSY:
2815
2816		/*
2817		 * There are no other replicas of this device.
2818		 */
2819		return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
2820
2821	default:
2822		return (zpool_standard_error(hdl, errno, msg));
2823	}
2824
2825}
2826
2827/*
2828 * Mark the given vdev degraded.
2829 */
2830int
2831zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
2832{
2833	zfs_cmd_t zc = { 0 };
2834	char msg[1024];
2835	libzfs_handle_t *hdl = zhp->zpool_hdl;
2836
2837	(void) snprintf(msg, sizeof (msg),
2838	    dgettext(TEXT_DOMAIN, "cannot degrade %llu"), guid);
2839
2840	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2841	zc.zc_guid = guid;
2842	zc.zc_cookie = VDEV_STATE_DEGRADED;
2843	zc.zc_obj = aux;
2844
2845	if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
2846		return (0);
2847
2848	return (zpool_standard_error(hdl, errno, msg));
2849}
2850
2851/*
2852 * Returns TRUE if the given nvlist is a vdev that was originally swapped in as
2853 * a hot spare.
2854 */
2855static boolean_t
2856is_replacing_spare(nvlist_t *search, nvlist_t *tgt, int which)
2857{
2858	nvlist_t **child;
2859	uint_t c, children;
2860	char *type;
2861
2862	if (nvlist_lookup_nvlist_array(search, ZPOOL_CONFIG_CHILDREN, &child,
2863	    &children) == 0) {
2864		verify(nvlist_lookup_string(search, ZPOOL_CONFIG_TYPE,
2865		    &type) == 0);
2866
2867		if (strcmp(type, VDEV_TYPE_SPARE) == 0 &&
2868		    children == 2 && child[which] == tgt)
2869			return (B_TRUE);
2870
2871		for (c = 0; c < children; c++)
2872			if (is_replacing_spare(child[c], tgt, which))
2873				return (B_TRUE);
2874	}
2875
2876	return (B_FALSE);
2877}
2878
2879/*
2880 * Attach new_disk (fully described by nvroot) to old_disk.
2881 * If 'replacing' is specified, the new disk will replace the old one.
2882 */
2883int
2884zpool_vdev_attach(zpool_handle_t *zhp,
2885    const char *old_disk, const char *new_disk, nvlist_t *nvroot, int replacing)
2886{
2887	zfs_cmd_t zc = { 0 };
2888	char msg[1024];
2889	int ret;
2890	nvlist_t *tgt;
2891	boolean_t avail_spare, l2cache, islog;
2892	uint64_t val;
2893	char *newname;
2894	nvlist_t **child;
2895	uint_t children;
2896	nvlist_t *config_root;
2897	libzfs_handle_t *hdl = zhp->zpool_hdl;
2898	boolean_t rootpool = zpool_is_bootable(zhp);
2899
2900	if (replacing)
2901		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
2902		    "cannot replace %s with %s"), old_disk, new_disk);
2903	else
2904		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
2905		    "cannot attach %s to %s"), new_disk, old_disk);
2906
2907	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2908	if ((tgt = zpool_find_vdev(zhp, old_disk, &avail_spare, &l2cache,
2909	    &islog)) == NULL)
2910		return (zfs_error(hdl, EZFS_NODEVICE, msg));
2911
2912	if (avail_spare)
2913		return (zfs_error(hdl, EZFS_ISSPARE, msg));
2914
2915	if (l2cache)
2916		return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
2917
2918	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2919	zc.zc_cookie = replacing;
2920
2921	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
2922	    &child, &children) != 0 || children != 1) {
2923		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2924		    "new device must be a single disk"));
2925		return (zfs_error(hdl, EZFS_INVALCONFIG, msg));
2926	}
2927
2928	verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
2929	    ZPOOL_CONFIG_VDEV_TREE, &config_root) == 0);
2930
2931	if ((newname = zpool_vdev_name(NULL, NULL, child[0], 0)) == NULL)
2932		return (-1);
2933
2934	/*
2935	 * If the target is a hot spare that has been swapped in, we can only
2936	 * replace it with another hot spare.
2937	 */
2938	if (replacing &&
2939	    nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_IS_SPARE, &val) == 0 &&
2940	    (zpool_find_vdev(zhp, newname, &avail_spare, &l2cache,
2941	    NULL) == NULL || !avail_spare) &&
2942	    is_replacing_spare(config_root, tgt, 1)) {
2943		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2944		    "can only be replaced by another hot spare"));
2945		free(newname);
2946		return (zfs_error(hdl, EZFS_BADTARGET, msg));
2947	}
2948
2949	free(newname);
2950
2951	if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
2952		return (-1);
2953
2954	ret = zfs_ioctl(hdl, ZFS_IOC_VDEV_ATTACH, &zc);
2955
2956	zcmd_free_nvlists(&zc);
2957
2958	if (ret == 0) {
2959		if (rootpool) {
2960			/*
2961			 * XXX need a better way to prevent user from
2962			 * booting up a half-baked vdev.
2963			 */
2964			(void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Make "
2965			    "sure to wait until resilver is done "
2966			    "before rebooting.\n"));
2967			(void) fprintf(stderr, "\n");
2968			(void) fprintf(stderr, dgettext(TEXT_DOMAIN, "If "
2969			    "you boot from pool '%s', you may need to update\n"
2970			    "boot code on newly attached disk '%s'.\n\n"
2971			    "Assuming you use GPT partitioning and 'da0' is "
2972			    "your new boot disk\n"
2973			    "you may use the following command:\n\n"
2974			    "\tgpart bootcode -b /boot/pmbr -p "
2975			    "/boot/gptzfsboot -i 1 da0\n\n"),
2976			    zhp->zpool_name, new_disk);
2977		}
2978		return (0);
2979	}
2980
2981	switch (errno) {
2982	case ENOTSUP:
2983		/*
2984		 * Can't attach to or replace this type of vdev.
2985		 */
2986		if (replacing) {
2987			uint64_t version = zpool_get_prop_int(zhp,
2988			    ZPOOL_PROP_VERSION, NULL);
2989
2990			if (islog)
2991				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2992				    "cannot replace a log with a spare"));
2993			else if (version >= SPA_VERSION_MULTI_REPLACE)
2994				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2995				    "already in replacing/spare config; wait "
2996				    "for completion or use 'zpool detach'"));
2997			else
2998				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2999				    "cannot replace a replacing device"));
3000		} else {
3001			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3002			    "can only attach to mirrors and top-level "
3003			    "disks"));
3004		}
3005		(void) zfs_error(hdl, EZFS_BADTARGET, msg);
3006		break;
3007
3008	case EINVAL:
3009		/*
3010		 * The new device must be a single disk.
3011		 */
3012		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3013		    "new device must be a single disk"));
3014		(void) zfs_error(hdl, EZFS_INVALCONFIG, msg);
3015		break;
3016
3017	case EBUSY:
3018		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "%s is busy, "
3019		    "or device removal is in progress"),
3020		    new_disk);
3021		(void) zfs_error(hdl, EZFS_BADDEV, msg);
3022		break;
3023
3024	case EOVERFLOW:
3025		/*
3026		 * The new device is too small.
3027		 */
3028		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3029		    "device is too small"));
3030		(void) zfs_error(hdl, EZFS_BADDEV, msg);
3031		break;
3032
3033	case EDOM:
3034		/*
3035		 * The new device has a different alignment requirement.
3036		 */
3037		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3038		    "devices have different sector alignment"));
3039		(void) zfs_error(hdl, EZFS_BADDEV, msg);
3040		break;
3041
3042	case ENAMETOOLONG:
3043		/*
3044		 * The resulting top-level vdev spec won't fit in the label.
3045		 */
3046		(void) zfs_error(hdl, EZFS_DEVOVERFLOW, msg);
3047		break;
3048
3049	default:
3050		(void) zpool_standard_error(hdl, errno, msg);
3051	}
3052
3053	return (-1);
3054}
3055
3056/*
3057 * Detach the specified device.
3058 */
3059int
3060zpool_vdev_detach(zpool_handle_t *zhp, const char *path)
3061{
3062	zfs_cmd_t zc = { 0 };
3063	char msg[1024];
3064	nvlist_t *tgt;
3065	boolean_t avail_spare, l2cache;
3066	libzfs_handle_t *hdl = zhp->zpool_hdl;
3067
3068	(void) snprintf(msg, sizeof (msg),
3069	    dgettext(TEXT_DOMAIN, "cannot detach %s"), path);
3070
3071	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3072	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
3073	    NULL)) == NULL)
3074		return (zfs_error(hdl, EZFS_NODEVICE, msg));
3075
3076	if (avail_spare)
3077		return (zfs_error(hdl, EZFS_ISSPARE, msg));
3078
3079	if (l2cache)
3080		return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
3081
3082	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
3083
3084	if (zfs_ioctl(hdl, ZFS_IOC_VDEV_DETACH, &zc) == 0)
3085		return (0);
3086
3087	switch (errno) {
3088
3089	case ENOTSUP:
3090		/*
3091		 * Can't detach from this type of vdev.
3092		 */
3093		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "only "
3094		    "applicable to mirror and replacing vdevs"));
3095		(void) zfs_error(hdl, EZFS_BADTARGET, msg);
3096		break;
3097
3098	case EBUSY:
3099		/*
3100		 * There are no other replicas of this device.
3101		 */
3102		(void) zfs_error(hdl, EZFS_NOREPLICAS, msg);
3103		break;
3104
3105	default:
3106		(void) zpool_standard_error(hdl, errno, msg);
3107	}
3108
3109	return (-1);
3110}
3111
3112/*
3113 * Find a mirror vdev in the source nvlist.
3114 *
3115 * The mchild array contains a list of disks in one of the top-level mirrors
3116 * of the source pool.  The schild array contains a list of disks that the
3117 * user specified on the command line.  We loop over the mchild array to
3118 * see if any entry in the schild array matches.
3119 *
3120 * If a disk in the mchild array is found in the schild array, we return
3121 * the index of that entry.  Otherwise we return -1.
3122 */
3123static int
3124find_vdev_entry(zpool_handle_t *zhp, nvlist_t **mchild, uint_t mchildren,
3125    nvlist_t **schild, uint_t schildren)
3126{
3127	uint_t mc;
3128
3129	for (mc = 0; mc < mchildren; mc++) {
3130		uint_t sc;
3131		char *mpath = zpool_vdev_name(zhp->zpool_hdl, zhp,
3132		    mchild[mc], 0);
3133
3134		for (sc = 0; sc < schildren; sc++) {
3135			char *spath = zpool_vdev_name(zhp->zpool_hdl, zhp,
3136			    schild[sc], 0);
3137			boolean_t result = (strcmp(mpath, spath) == 0);
3138
3139			free(spath);
3140			if (result) {
3141				free(mpath);
3142				return (mc);
3143			}
3144		}
3145
3146		free(mpath);
3147	}
3148
3149	return (-1);
3150}
3151
3152/*
3153 * Split a mirror pool.  If newroot points to null, then a new nvlist
3154 * is generated and it is the responsibility of the caller to free it.
3155 */
3156int
3157zpool_vdev_split(zpool_handle_t *zhp, char *newname, nvlist_t **newroot,
3158    nvlist_t *props, splitflags_t flags)
3159{
3160	zfs_cmd_t zc = { 0 };
3161	char msg[1024];
3162	nvlist_t *tree, *config, **child, **newchild, *newconfig = NULL;
3163	nvlist_t **varray = NULL, *zc_props = NULL;
3164	uint_t c, children, newchildren, lastlog = 0, vcount, found = 0;
3165	libzfs_handle_t *hdl = zhp->zpool_hdl;
3166	uint64_t vers;
3167	boolean_t freelist = B_FALSE, memory_err = B_TRUE;
3168	int retval = 0;
3169
3170	(void) snprintf(msg, sizeof (msg),
3171	    dgettext(TEXT_DOMAIN, "Unable to split %s"), zhp->zpool_name);
3172
3173	if (!zpool_name_valid(hdl, B_FALSE, newname))
3174		return (zfs_error(hdl, EZFS_INVALIDNAME, msg));
3175
3176	if ((config = zpool_get_config(zhp, NULL)) == NULL) {
3177		(void) fprintf(stderr, gettext("Internal error: unable to "
3178		    "retrieve pool configuration\n"));
3179		return (-1);
3180	}
3181
3182	verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &tree)
3183	    == 0);
3184	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &vers) == 0);
3185
3186	if (props) {
3187		prop_flags_t flags = { .create = B_FALSE, .import = B_TRUE };
3188		if ((zc_props = zpool_valid_proplist(hdl, zhp->zpool_name,
3189		    props, vers, flags, msg)) == NULL)
3190			return (-1);
3191	}
3192
3193	if (nvlist_lookup_nvlist_array(tree, ZPOOL_CONFIG_CHILDREN, &child,
3194	    &children) != 0) {
3195		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3196		    "Source pool is missing vdev tree"));
3197		nvlist_free(zc_props);
3198		return (-1);
3199	}
3200
3201	varray = zfs_alloc(hdl, children * sizeof (nvlist_t *));
3202	vcount = 0;
3203
3204	if (*newroot == NULL ||
3205	    nvlist_lookup_nvlist_array(*newroot, ZPOOL_CONFIG_CHILDREN,
3206	    &newchild, &newchildren) != 0)
3207		newchildren = 0;
3208
3209	for (c = 0; c < children; c++) {
3210		uint64_t is_log = B_FALSE, is_hole = B_FALSE;
3211		char *type;
3212		nvlist_t **mchild, *vdev;
3213		uint_t mchildren;
3214		int entry;
3215
3216		/*
3217		 * Unlike cache & spares, slogs are stored in the
3218		 * ZPOOL_CONFIG_CHILDREN array.  We filter them out here.
3219		 */
3220		(void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
3221		    &is_log);
3222		(void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_HOLE,
3223		    &is_hole);
3224		if (is_log || is_hole) {
3225			/*
3226			 * Create a hole vdev and put it in the config.
3227			 */
3228			if (nvlist_alloc(&vdev, NV_UNIQUE_NAME, 0) != 0)
3229				goto out;
3230			if (nvlist_add_string(vdev, ZPOOL_CONFIG_TYPE,
3231			    VDEV_TYPE_HOLE) != 0)
3232				goto out;
3233			if (nvlist_add_uint64(vdev, ZPOOL_CONFIG_IS_HOLE,
3234			    1) != 0)
3235				goto out;
3236			if (lastlog == 0)
3237				lastlog = vcount;
3238			varray[vcount++] = vdev;
3239			continue;
3240		}
3241		lastlog = 0;
3242		verify(nvlist_lookup_string(child[c], ZPOOL_CONFIG_TYPE, &type)
3243		    == 0);
3244		if (strcmp(type, VDEV_TYPE_MIRROR) != 0) {
3245			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3246			    "Source pool must be composed only of mirrors\n"));
3247			retval = zfs_error(hdl, EZFS_INVALCONFIG, msg);
3248			goto out;
3249		}
3250
3251		verify(nvlist_lookup_nvlist_array(child[c],
3252		    ZPOOL_CONFIG_CHILDREN, &mchild, &mchildren) == 0);
3253
3254		/* find or add an entry for this top-level vdev */
3255		if (newchildren > 0 &&
3256		    (entry = find_vdev_entry(zhp, mchild, mchildren,
3257		    newchild, newchildren)) >= 0) {
3258			/* We found a disk that the user specified. */
3259			vdev = mchild[entry];
3260			++found;
3261		} else {
3262			/* User didn't specify a disk for this vdev. */
3263			vdev = mchild[mchildren - 1];
3264		}
3265
3266		if (nvlist_dup(vdev, &varray[vcount++], 0) != 0)
3267			goto out;
3268	}
3269
3270	/* did we find every disk the user specified? */
3271	if (found != newchildren) {
3272		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "Device list must "
3273		    "include at most one disk from each mirror"));
3274		retval = zfs_error(hdl, EZFS_INVALCONFIG, msg);
3275		goto out;
3276	}
3277
3278	/* Prepare the nvlist for populating. */
3279	if (*newroot == NULL) {
3280		if (nvlist_alloc(newroot, NV_UNIQUE_NAME, 0) != 0)
3281			goto out;
3282		freelist = B_TRUE;
3283		if (nvlist_add_string(*newroot, ZPOOL_CONFIG_TYPE,
3284		    VDEV_TYPE_ROOT) != 0)
3285			goto out;
3286	} else {
3287		verify(nvlist_remove_all(*newroot, ZPOOL_CONFIG_CHILDREN) == 0);
3288	}
3289
3290	/* Add all the children we found */
3291	if (nvlist_add_nvlist_array(*newroot, ZPOOL_CONFIG_CHILDREN, varray,
3292	    lastlog == 0 ? vcount : lastlog) != 0)
3293		goto out;
3294
3295	/*
3296	 * If we're just doing a dry run, exit now with success.
3297	 */
3298	if (flags.dryrun) {
3299		memory_err = B_FALSE;
3300		freelist = B_FALSE;
3301		goto out;
3302	}
3303
3304	/* now build up the config list & call the ioctl */
3305	if (nvlist_alloc(&newconfig, NV_UNIQUE_NAME, 0) != 0)
3306		goto out;
3307
3308	if (nvlist_add_nvlist(newconfig,
3309	    ZPOOL_CONFIG_VDEV_TREE, *newroot) != 0 ||
3310	    nvlist_add_string(newconfig,
3311	    ZPOOL_CONFIG_POOL_NAME, newname) != 0 ||
3312	    nvlist_add_uint64(newconfig, ZPOOL_CONFIG_VERSION, vers) != 0)
3313		goto out;
3314
3315	/*
3316	 * The new pool is automatically part of the namespace unless we
3317	 * explicitly export it.
3318	 */
3319	if (!flags.import)
3320		zc.zc_cookie = ZPOOL_EXPORT_AFTER_SPLIT;
3321	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3322	(void) strlcpy(zc.zc_string, newname, sizeof (zc.zc_string));
3323	if (zcmd_write_conf_nvlist(hdl, &zc, newconfig) != 0)
3324		goto out;
3325	if (zc_props != NULL && zcmd_write_src_nvlist(hdl, &zc, zc_props) != 0)
3326		goto out;
3327
3328	if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SPLIT, &zc) != 0) {
3329		retval = zpool_standard_error(hdl, errno, msg);
3330		goto out;
3331	}
3332
3333	freelist = B_FALSE;
3334	memory_err = B_FALSE;
3335
3336out:
3337	if (varray != NULL) {
3338		int v;
3339
3340		for (v = 0; v < vcount; v++)
3341			nvlist_free(varray[v]);
3342		free(varray);
3343	}
3344	zcmd_free_nvlists(&zc);
3345	nvlist_free(zc_props);
3346	nvlist_free(newconfig);
3347	if (freelist) {
3348		nvlist_free(*newroot);
3349		*newroot = NULL;
3350	}
3351
3352	if (retval != 0)
3353		return (retval);
3354
3355	if (memory_err)
3356		return (no_memory(hdl));
3357
3358	return (0);
3359}
3360
3361/*
3362 * Remove the given device.
3363 */
3364int
3365zpool_vdev_remove(zpool_handle_t *zhp, const char *path)
3366{
3367	zfs_cmd_t zc = { 0 };
3368	char msg[1024];
3369	nvlist_t *tgt;
3370	boolean_t avail_spare, l2cache, islog;
3371	libzfs_handle_t *hdl = zhp->zpool_hdl;
3372	uint64_t version;
3373
3374	(void) snprintf(msg, sizeof (msg),
3375	    dgettext(TEXT_DOMAIN, "cannot remove %s"), path);
3376
3377	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3378	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
3379	    &islog)) == NULL)
3380		return (zfs_error(hdl, EZFS_NODEVICE, msg));
3381
3382	version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL);
3383	if (islog && version < SPA_VERSION_HOLES) {
3384		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3385		    "pool must be upgraded to support log removal"));
3386		return (zfs_error(hdl, EZFS_BADVERSION, msg));
3387	}
3388
3389	zc.zc_guid = fnvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID);
3390
3391	if (zfs_ioctl(hdl, ZFS_IOC_VDEV_REMOVE, &zc) == 0)
3392		return (0);
3393
3394	switch (errno) {
3395
3396	case EINVAL:
3397		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3398		    "invalid config; all top-level vdevs must "
3399		    "have the same sector size and not be raidz."));
3400		(void) zfs_error(hdl, EZFS_INVALCONFIG, msg);
3401		break;
3402
3403	case EBUSY:
3404		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3405		    "Pool busy; removal may already be in progress"));
3406		(void) zfs_error(hdl, EZFS_BUSY, msg);
3407		break;
3408
3409	default:
3410		(void) zpool_standard_error(hdl, errno, msg);
3411	}
3412	return (-1);
3413}
3414
3415int
3416zpool_vdev_remove_cancel(zpool_handle_t *zhp)
3417{
3418	zfs_cmd_t zc = { 0 };
3419	char msg[1024];
3420	libzfs_handle_t *hdl = zhp->zpool_hdl;
3421
3422	(void) snprintf(msg, sizeof (msg),
3423	    dgettext(TEXT_DOMAIN, "cannot cancel removal"));
3424
3425	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3426	zc.zc_cookie = 1;
3427
3428	if (zfs_ioctl(hdl, ZFS_IOC_VDEV_REMOVE, &zc) == 0)
3429		return (0);
3430
3431	return (zpool_standard_error(hdl, errno, msg));
3432}
3433
3434int
3435zpool_vdev_indirect_size(zpool_handle_t *zhp, const char *path,
3436    uint64_t *sizep)
3437{
3438	char msg[1024];
3439	nvlist_t *tgt;
3440	boolean_t avail_spare, l2cache, islog;
3441	libzfs_handle_t *hdl = zhp->zpool_hdl;
3442
3443	(void) snprintf(msg, sizeof (msg),
3444	    dgettext(TEXT_DOMAIN, "cannot determine indirect size of %s"),
3445	    path);
3446
3447	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
3448	    &islog)) == NULL)
3449		return (zfs_error(hdl, EZFS_NODEVICE, msg));
3450
3451	if (avail_spare || l2cache || islog) {
3452		*sizep = 0;
3453		return (0);
3454	}
3455
3456	if (nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_INDIRECT_SIZE, sizep) != 0) {
3457		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3458		    "indirect size not available"));
3459		return (zfs_error(hdl, EINVAL, msg));
3460	}
3461	return (0);
3462}
3463
3464/*
3465 * Clear the errors for the pool, or the particular device if specified.
3466 */
3467int
3468zpool_clear(zpool_handle_t *zhp, const char *path, nvlist_t *rewindnvl)
3469{
3470	zfs_cmd_t zc = { 0 };
3471	char msg[1024];
3472	nvlist_t *tgt;
3473	zpool_load_policy_t policy;
3474	boolean_t avail_spare, l2cache;
3475	libzfs_handle_t *hdl = zhp->zpool_hdl;
3476	nvlist_t *nvi = NULL;
3477	int error;
3478
3479	if (path)
3480		(void) snprintf(msg, sizeof (msg),
3481		    dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
3482		    path);
3483	else
3484		(void) snprintf(msg, sizeof (msg),
3485		    dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
3486		    zhp->zpool_name);
3487
3488	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3489	if (path) {
3490		if ((tgt = zpool_find_vdev(zhp, path, &avail_spare,
3491		    &l2cache, NULL)) == NULL)
3492			return (zfs_error(hdl, EZFS_NODEVICE, msg));
3493
3494		/*
3495		 * Don't allow error clearing for hot spares.  Do allow
3496		 * error clearing for l2cache devices.
3497		 */
3498		if (avail_spare)
3499			return (zfs_error(hdl, EZFS_ISSPARE, msg));
3500
3501		verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID,
3502		    &zc.zc_guid) == 0);
3503	}
3504
3505	zpool_get_load_policy(rewindnvl, &policy);
3506	zc.zc_cookie = policy.zlp_rewind;
3507
3508	if (zcmd_alloc_dst_nvlist(hdl, &zc, zhp->zpool_config_size * 2) != 0)
3509		return (-1);
3510
3511	if (zcmd_write_src_nvlist(hdl, &zc, rewindnvl) != 0)
3512		return (-1);
3513
3514	while ((error = zfs_ioctl(hdl, ZFS_IOC_CLEAR, &zc)) != 0 &&
3515	    errno == ENOMEM) {
3516		if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
3517			zcmd_free_nvlists(&zc);
3518			return (-1);
3519		}
3520	}
3521
3522	if (!error || ((policy.zlp_rewind & ZPOOL_TRY_REWIND) &&
3523	    errno != EPERM && errno != EACCES)) {
3524		if (policy.zlp_rewind &
3525		    (ZPOOL_DO_REWIND | ZPOOL_TRY_REWIND)) {
3526			(void) zcmd_read_dst_nvlist(hdl, &zc, &nvi);
3527			zpool_rewind_exclaim(hdl, zc.zc_name,
3528			    ((policy.zlp_rewind & ZPOOL_TRY_REWIND) != 0),
3529			    nvi);
3530			nvlist_free(nvi);
3531		}
3532		zcmd_free_nvlists(&zc);
3533		return (0);
3534	}
3535
3536	zcmd_free_nvlists(&zc);
3537	return (zpool_standard_error(hdl, errno, msg));
3538}
3539
3540/*
3541 * Similar to zpool_clear(), but takes a GUID (used by fmd).
3542 */
3543int
3544zpool_vdev_clear(zpool_handle_t *zhp, uint64_t guid)
3545{
3546	zfs_cmd_t zc = { 0 };
3547	char msg[1024];
3548	libzfs_handle_t *hdl = zhp->zpool_hdl;
3549
3550	(void) snprintf(msg, sizeof (msg),
3551	    dgettext(TEXT_DOMAIN, "cannot clear errors for %llx"),
3552	    guid);
3553
3554	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3555	zc.zc_guid = guid;
3556	zc.zc_cookie = ZPOOL_NO_REWIND;
3557
3558	if (ioctl(hdl->libzfs_fd, ZFS_IOC_CLEAR, &zc) == 0)
3559		return (0);
3560
3561	return (zpool_standard_error(hdl, errno, msg));
3562}
3563
3564/*
3565 * Change the GUID for a pool.
3566 */
3567int
3568zpool_reguid(zpool_handle_t *zhp)
3569{
3570	char msg[1024];
3571	libzfs_handle_t *hdl = zhp->zpool_hdl;
3572	zfs_cmd_t zc = { 0 };
3573
3574	(void) snprintf(msg, sizeof (msg),
3575	    dgettext(TEXT_DOMAIN, "cannot reguid '%s'"), zhp->zpool_name);
3576
3577	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3578	if (zfs_ioctl(hdl, ZFS_IOC_POOL_REGUID, &zc) == 0)
3579		return (0);
3580
3581	return (zpool_standard_error(hdl, errno, msg));
3582}
3583
3584/*
3585 * Reopen the pool.
3586 */
3587int
3588zpool_reopen(zpool_handle_t *zhp)
3589{
3590	zfs_cmd_t zc = { 0 };
3591	char msg[1024];
3592	libzfs_handle_t *hdl = zhp->zpool_hdl;
3593
3594	(void) snprintf(msg, sizeof (msg),
3595	    dgettext(TEXT_DOMAIN, "cannot reopen '%s'"),
3596	    zhp->zpool_name);
3597
3598	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3599	if (zfs_ioctl(hdl, ZFS_IOC_POOL_REOPEN, &zc) == 0)
3600		return (0);
3601	return (zpool_standard_error(hdl, errno, msg));
3602}
3603
3604/* call into libzfs_core to execute the sync IOCTL per pool */
3605int
3606zpool_sync_one(zpool_handle_t *zhp, void *data)
3607{
3608	int ret;
3609	libzfs_handle_t *hdl = zpool_get_handle(zhp);
3610	const char *pool_name = zpool_get_name(zhp);
3611	boolean_t *force = data;
3612	nvlist_t *innvl = fnvlist_alloc();
3613
3614	fnvlist_add_boolean_value(innvl, "force", *force);
3615	if ((ret = lzc_sync(pool_name, innvl, NULL)) != 0) {
3616		nvlist_free(innvl);
3617		return (zpool_standard_error_fmt(hdl, ret,
3618		    dgettext(TEXT_DOMAIN, "sync '%s' failed"), pool_name));
3619	}
3620	nvlist_free(innvl);
3621
3622	return (0);
3623}
3624
3625/*
3626 * Convert from a devid string to a path.
3627 */
3628static char *
3629devid_to_path(char *devid_str)
3630{
3631	ddi_devid_t devid;
3632	char *minor;
3633	char *path;
3634	devid_nmlist_t *list = NULL;
3635	int ret;
3636
3637	if (devid_str_decode(devid_str, &devid, &minor) != 0)
3638		return (NULL);
3639
3640	ret = devid_deviceid_to_nmlist("/dev", devid, minor, &list);
3641
3642	devid_str_free(minor);
3643	devid_free(devid);
3644
3645	if (ret != 0)
3646		return (NULL);
3647
3648	/*
3649	 * In a case the strdup() fails, we will just return NULL below.
3650	 */
3651	path = strdup(list[0].devname);
3652
3653	devid_free_nmlist(list);
3654
3655	return (path);
3656}
3657
3658/*
3659 * Convert from a path to a devid string.
3660 */
3661static char *
3662path_to_devid(const char *path)
3663{
3664#ifdef have_devid
3665	int fd;
3666	ddi_devid_t devid;
3667	char *minor, *ret;
3668
3669	if ((fd = open(path, O_RDONLY)) < 0)
3670		return (NULL);
3671
3672	minor = NULL;
3673	ret = NULL;
3674	if (devid_get(fd, &devid) == 0) {
3675		if (devid_get_minor_name(fd, &minor) == 0)
3676			ret = devid_str_encode(devid, minor);
3677		if (minor != NULL)
3678			devid_str_free(minor);
3679		devid_free(devid);
3680	}
3681	(void) close(fd);
3682
3683	return (ret);
3684#else
3685	return (NULL);
3686#endif
3687}
3688
3689/*
3690 * Issue the necessary ioctl() to update the stored path value for the vdev.  We
3691 * ignore any failure here, since a common case is for an unprivileged user to
3692 * type 'zpool status', and we'll display the correct information anyway.
3693 */
3694static void
3695set_path(zpool_handle_t *zhp, nvlist_t *nv, const char *path)
3696{
3697	zfs_cmd_t zc = { 0 };
3698
3699	(void) strncpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3700	(void) strncpy(zc.zc_value, path, sizeof (zc.zc_value));
3701	verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
3702	    &zc.zc_guid) == 0);
3703
3704	(void) ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SETPATH, &zc);
3705}
3706
3707/*
3708 * Given a vdev, return the name to display in iostat.  If the vdev has a path,
3709 * we use that, stripping off any leading "/dev/dsk/"; if not, we use the type.
3710 * We also check if this is a whole disk, in which case we strip off the
3711 * trailing 's0' slice name.
3712 *
3713 * This routine is also responsible for identifying when disks have been
3714 * reconfigured in a new location.  The kernel will have opened the device by
3715 * devid, but the path will still refer to the old location.  To catch this, we
3716 * first do a path -> devid translation (which is fast for the common case).  If
3717 * the devid matches, we're done.  If not, we do a reverse devid -> path
3718 * translation and issue the appropriate ioctl() to update the path of the vdev.
3719 * If 'zhp' is NULL, then this is an exported pool, and we don't need to do any
3720 * of these checks.
3721 */
3722char *
3723zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv,
3724    int name_flags)
3725{
3726	char *path, *devid, *env;
3727	uint64_t value;
3728	char buf[64];
3729	vdev_stat_t *vs;
3730	uint_t vsc;
3731	int have_stats;
3732	int have_path;
3733
3734	env = getenv("ZPOOL_VDEV_NAME_PATH");
3735	if (env && (strtoul(env, NULL, 0) > 0 ||
3736	    !strncasecmp(env, "YES", 3) || !strncasecmp(env, "ON", 2)))
3737		name_flags |= VDEV_NAME_PATH;
3738
3739	env = getenv("ZPOOL_VDEV_NAME_GUID");
3740	if (env && (strtoul(env, NULL, 0) > 0 ||
3741	    !strncasecmp(env, "YES", 3) || !strncasecmp(env, "ON", 2)))
3742		name_flags |= VDEV_NAME_GUID;
3743
3744	env = getenv("ZPOOL_VDEV_NAME_FOLLOW_LINKS");
3745	if (env && (strtoul(env, NULL, 0) > 0 ||
3746	    !strncasecmp(env, "YES", 3) || !strncasecmp(env, "ON", 2)))
3747		name_flags |= VDEV_NAME_FOLLOW_LINKS;
3748
3749	have_stats = nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS,
3750	    (uint64_t **)&vs, &vsc) == 0;
3751	have_path = nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0;
3752
3753	/*
3754	 * If the device is not currently present, assume it will not
3755	 * come back at the same device path.  Display the device by GUID.
3756	 */
3757	if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, &value) == 0 ||
3758	    (name_flags & VDEV_NAME_GUID) != 0 ||
3759	    have_path && have_stats && vs->vs_state <= VDEV_STATE_CANT_OPEN) {
3760		nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &value);
3761		(void) snprintf(buf, sizeof (buf), "%llu", (u_longlong_t)value);
3762		path = buf;
3763	} else if (have_path) {
3764
3765		/*
3766		 * If the device is dead (faulted, offline, etc) then don't
3767		 * bother opening it.  Otherwise we may be forcing the user to
3768		 * open a misbehaving device, which can have undesirable
3769		 * effects.
3770		 */
3771		if ((have_stats == 0 ||
3772		    vs->vs_state >= VDEV_STATE_DEGRADED) &&
3773		    zhp != NULL &&
3774		    nvlist_lookup_string(nv, ZPOOL_CONFIG_DEVID, &devid) == 0) {
3775			/*
3776			 * Determine if the current path is correct.
3777			 */
3778			char *newdevid = path_to_devid(path);
3779
3780			if (newdevid == NULL ||
3781			    strcmp(devid, newdevid) != 0) {
3782				char *newpath;
3783
3784				if ((newpath = devid_to_path(devid)) != NULL) {
3785					/*
3786					 * Update the path appropriately.
3787					 */
3788					set_path(zhp, nv, newpath);
3789					if (nvlist_add_string(nv,
3790					    ZPOOL_CONFIG_PATH, newpath) == 0)
3791						verify(nvlist_lookup_string(nv,
3792						    ZPOOL_CONFIG_PATH,
3793						    &path) == 0);
3794					free(newpath);
3795				}
3796			}
3797
3798			if (newdevid)
3799				devid_str_free(newdevid);
3800		}
3801
3802#ifdef illumos
3803		if (name_flags & VDEV_NAME_FOLLOW_LINKS) {
3804			char *rp = realpath(path, NULL);
3805			if (rp) {
3806				strlcpy(buf, rp, sizeof (buf));
3807				path = buf;
3808				free(rp);
3809			}
3810		}
3811
3812		if (strncmp(path, ZFS_DISK_ROOTD, strlen(ZFS_DISK_ROOTD)) == 0)
3813			path += strlen(ZFS_DISK_ROOTD);
3814
3815		/*
3816		 * Remove the partition from the path it this is a whole disk.
3817		 */
3818		if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, &value)
3819		    == 0 && value && !(name_flags & VDEV_NAME_PATH)) {
3820			int pathlen = strlen(path);
3821			char *tmp = zfs_strdup(hdl, path);
3822
3823			/*
3824			 * If it starts with c#, and ends with "s0" or "s1",
3825			 * chop the slice off, or if it ends with "s0/old" or
3826			 * "s1/old", remove the slice from the middle.
3827			 */
3828			if (CTD_CHECK(tmp)) {
3829				if (strcmp(&tmp[pathlen - 2], "s0") == 0 ||
3830				    strcmp(&tmp[pathlen - 2], "s1") == 0) {
3831					tmp[pathlen - 2] = '\0';
3832				} else if (pathlen > 6 &&
3833				    (strcmp(&tmp[pathlen - 6], "s0/old") == 0 ||
3834				    strcmp(&tmp[pathlen - 6], "s1/old") == 0)) {
3835					(void) strcpy(&tmp[pathlen - 6],
3836					    "/old");
3837				}
3838			}
3839			return (tmp);
3840		}
3841#else	/* !illumos */
3842		if (strncmp(path, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0)
3843			path += sizeof(_PATH_DEV) - 1;
3844#endif	/* illumos */
3845	} else {
3846		verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &path) == 0);
3847
3848		/*
3849		 * If it's a raidz device, we need to stick in the parity level.
3850		 */
3851		if (strcmp(path, VDEV_TYPE_RAIDZ) == 0) {
3852			verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY,
3853			    &value) == 0);
3854			(void) snprintf(buf, sizeof (buf), "%s%llu", path,
3855			    (u_longlong_t)value);
3856			path = buf;
3857		}
3858
3859		/*
3860		 * We identify each top-level vdev by using a <type-id>
3861		 * naming convention.
3862		 */
3863		if (name_flags & VDEV_NAME_TYPE_ID) {
3864			uint64_t id;
3865
3866			verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID,
3867			    &id) == 0);
3868			(void) snprintf(buf, sizeof (buf), "%s-%llu", path,
3869			    (u_longlong_t)id);
3870			path = buf;
3871		}
3872	}
3873
3874	return (zfs_strdup(hdl, path));
3875}
3876
3877static int
3878zbookmark_mem_compare(const void *a, const void *b)
3879{
3880	return (memcmp(a, b, sizeof (zbookmark_phys_t)));
3881}
3882
3883/*
3884 * Retrieve the persistent error log, uniquify the members, and return to the
3885 * caller.
3886 */
3887int
3888zpool_get_errlog(zpool_handle_t *zhp, nvlist_t **nverrlistp)
3889{
3890	zfs_cmd_t zc = { 0 };
3891	uint64_t count;
3892	zbookmark_phys_t *zb = NULL;
3893	int i;
3894
3895	/*
3896	 * Retrieve the raw error list from the kernel.  If the number of errors
3897	 * has increased, allocate more space and continue until we get the
3898	 * entire list.
3899	 */
3900	verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_ERRCOUNT,
3901	    &count) == 0);
3902	if (count == 0)
3903		return (0);
3904	if ((zc.zc_nvlist_dst = (uintptr_t)zfs_alloc(zhp->zpool_hdl,
3905	    count * sizeof (zbookmark_phys_t))) == (uintptr_t)NULL)
3906		return (-1);
3907	zc.zc_nvlist_dst_size = count;
3908	(void) strcpy(zc.zc_name, zhp->zpool_name);
3909	for (;;) {
3910		if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_ERROR_LOG,
3911		    &zc) != 0) {
3912			free((void *)(uintptr_t)zc.zc_nvlist_dst);
3913			if (errno == ENOMEM) {
3914				void *dst;
3915
3916				count = zc.zc_nvlist_dst_size;
3917				dst = zfs_alloc(zhp->zpool_hdl, count *
3918				    sizeof (zbookmark_phys_t));
3919				if (dst == NULL)
3920					return (-1);
3921				zc.zc_nvlist_dst = (uintptr_t)dst;
3922			} else {
3923				return (-1);
3924			}
3925		} else {
3926			break;
3927		}
3928	}
3929
3930	/*
3931	 * Sort the resulting bookmarks.  This is a little confusing due to the
3932	 * implementation of ZFS_IOC_ERROR_LOG.  The bookmarks are copied last
3933	 * to first, and 'zc_nvlist_dst_size' indicates the number of boomarks
3934	 * _not_ copied as part of the process.  So we point the start of our
3935	 * array appropriate and decrement the total number of elements.
3936	 */
3937	zb = ((zbookmark_phys_t *)(uintptr_t)zc.zc_nvlist_dst) +
3938	    zc.zc_nvlist_dst_size;
3939	count -= zc.zc_nvlist_dst_size;
3940
3941	qsort(zb, count, sizeof (zbookmark_phys_t), zbookmark_mem_compare);
3942
3943	verify(nvlist_alloc(nverrlistp, 0, KM_SLEEP) == 0);
3944
3945	/*
3946	 * Fill in the nverrlistp with nvlist's of dataset and object numbers.
3947	 */
3948	for (i = 0; i < count; i++) {
3949		nvlist_t *nv;
3950
3951		/* ignoring zb_blkid and zb_level for now */
3952		if (i > 0 && zb[i-1].zb_objset == zb[i].zb_objset &&
3953		    zb[i-1].zb_object == zb[i].zb_object)
3954			continue;
3955
3956		if (nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) != 0)
3957			goto nomem;
3958		if (nvlist_add_uint64(nv, ZPOOL_ERR_DATASET,
3959		    zb[i].zb_objset) != 0) {
3960			nvlist_free(nv);
3961			goto nomem;
3962		}
3963		if (nvlist_add_uint64(nv, ZPOOL_ERR_OBJECT,
3964		    zb[i].zb_object) != 0) {
3965			nvlist_free(nv);
3966			goto nomem;
3967		}
3968		if (nvlist_add_nvlist(*nverrlistp, "ejk", nv) != 0) {
3969			nvlist_free(nv);
3970			goto nomem;
3971		}
3972		nvlist_free(nv);
3973	}
3974
3975	free((void *)(uintptr_t)zc.zc_nvlist_dst);
3976	return (0);
3977
3978nomem:
3979	free((void *)(uintptr_t)zc.zc_nvlist_dst);
3980	return (no_memory(zhp->zpool_hdl));
3981}
3982
3983/*
3984 * Upgrade a ZFS pool to the latest on-disk version.
3985 */
3986int
3987zpool_upgrade(zpool_handle_t *zhp, uint64_t new_version)
3988{
3989	zfs_cmd_t zc = { 0 };
3990	libzfs_handle_t *hdl = zhp->zpool_hdl;
3991
3992	(void) strcpy(zc.zc_name, zhp->zpool_name);
3993	zc.zc_cookie = new_version;
3994
3995	if (zfs_ioctl(hdl, ZFS_IOC_POOL_UPGRADE, &zc) != 0)
3996		return (zpool_standard_error_fmt(hdl, errno,
3997		    dgettext(TEXT_DOMAIN, "cannot upgrade '%s'"),
3998		    zhp->zpool_name));
3999	return (0);
4000}
4001
4002void
4003zfs_save_arguments(int argc, char **argv, char *string, int len)
4004{
4005	(void) strlcpy(string, basename(argv[0]), len);
4006	for (int i = 1; i < argc; i++) {
4007		(void) strlcat(string, " ", len);
4008		(void) strlcat(string, argv[i], len);
4009	}
4010}
4011
4012int
4013zpool_log_history(libzfs_handle_t *hdl, const char *message)
4014{
4015	zfs_cmd_t zc = { 0 };
4016	nvlist_t *args;
4017	int err;
4018
4019	args = fnvlist_alloc();
4020	fnvlist_add_string(args, "message", message);
4021	err = zcmd_write_src_nvlist(hdl, &zc, args);
4022	if (err == 0)
4023		err = ioctl(hdl->libzfs_fd, ZFS_IOC_LOG_HISTORY, &zc);
4024	nvlist_free(args);
4025	zcmd_free_nvlists(&zc);
4026	return (err);
4027}
4028
4029/*
4030 * Perform ioctl to get some command history of a pool.
4031 *
4032 * 'buf' is the buffer to fill up to 'len' bytes.  'off' is the
4033 * logical offset of the history buffer to start reading from.
4034 *
4035 * Upon return, 'off' is the next logical offset to read from and
4036 * 'len' is the actual amount of bytes read into 'buf'.
4037 */
4038static int
4039get_history(zpool_handle_t *zhp, char *buf, uint64_t *off, uint64_t *len)
4040{
4041	zfs_cmd_t zc = { 0 };
4042	libzfs_handle_t *hdl = zhp->zpool_hdl;
4043
4044	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
4045
4046	zc.zc_history = (uint64_t)(uintptr_t)buf;
4047	zc.zc_history_len = *len;
4048	zc.zc_history_offset = *off;
4049
4050	if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_HISTORY, &zc) != 0) {
4051		switch (errno) {
4052		case EPERM:
4053			return (zfs_error_fmt(hdl, EZFS_PERM,
4054			    dgettext(TEXT_DOMAIN,
4055			    "cannot show history for pool '%s'"),
4056			    zhp->zpool_name));
4057		case ENOENT:
4058			return (zfs_error_fmt(hdl, EZFS_NOHISTORY,
4059			    dgettext(TEXT_DOMAIN, "cannot get history for pool "
4060			    "'%s'"), zhp->zpool_name));
4061		case ENOTSUP:
4062			return (zfs_error_fmt(hdl, EZFS_BADVERSION,
4063			    dgettext(TEXT_DOMAIN, "cannot get history for pool "
4064			    "'%s', pool must be upgraded"), zhp->zpool_name));
4065		default:
4066			return (zpool_standard_error_fmt(hdl, errno,
4067			    dgettext(TEXT_DOMAIN,
4068			    "cannot get history for '%s'"), zhp->zpool_name));
4069		}
4070	}
4071
4072	*len = zc.zc_history_len;
4073	*off = zc.zc_history_offset;
4074
4075	return (0);
4076}
4077
4078/*
4079 * Process the buffer of nvlists, unpacking and storing each nvlist record
4080 * into 'records'.  'leftover' is set to the number of bytes that weren't
4081 * processed as there wasn't a complete record.
4082 */
4083int
4084zpool_history_unpack(char *buf, uint64_t bytes_read, uint64_t *leftover,
4085    nvlist_t ***records, uint_t *numrecords)
4086{
4087	uint64_t reclen;
4088	nvlist_t *nv;
4089	int i;
4090
4091	while (bytes_read > sizeof (reclen)) {
4092
4093		/* get length of packed record (stored as little endian) */
4094		for (i = 0, reclen = 0; i < sizeof (reclen); i++)
4095			reclen += (uint64_t)(((uchar_t *)buf)[i]) << (8*i);
4096
4097		if (bytes_read < sizeof (reclen) + reclen)
4098			break;
4099
4100		/* unpack record */
4101		if (nvlist_unpack(buf + sizeof (reclen), reclen, &nv, 0) != 0)
4102			return (ENOMEM);
4103		bytes_read -= sizeof (reclen) + reclen;
4104		buf += sizeof (reclen) + reclen;
4105
4106		/* add record to nvlist array */
4107		(*numrecords)++;
4108		if (ISP2(*numrecords + 1)) {
4109			*records = realloc(*records,
4110			    *numrecords * 2 * sizeof (nvlist_t *));
4111		}
4112		(*records)[*numrecords - 1] = nv;
4113	}
4114
4115	*leftover = bytes_read;
4116	return (0);
4117}
4118
4119/* from spa_history.c: spa_history_create_obj() */
4120#define	HIS_BUF_LEN_DEF	(128 << 10)
4121#define	HIS_BUF_LEN_MAX	(1 << 30)
4122
4123/*
4124 * Retrieve the command history of a pool.
4125 */
4126int
4127zpool_get_history(zpool_handle_t *zhp, nvlist_t **nvhisp, uint64_t *off,
4128    boolean_t *eof)
4129{
4130	char *buf;
4131	uint64_t buflen = HIS_BUF_LEN_DEF;
4132	nvlist_t **records = NULL;
4133	uint_t numrecords = 0;
4134	int err, i;
4135	uint64_t start = *off;
4136
4137	buf = malloc(buflen);
4138	if (buf == NULL)
4139		return (ENOMEM);
4140	/* process about 1MB at a time */
4141	while (*off - start < 1024 * 1024) {
4142		uint64_t bytes_read = buflen;
4143		uint64_t leftover;
4144
4145		if ((err = get_history(zhp, buf, off, &bytes_read)) != 0)
4146			break;
4147
4148		/* if nothing else was read in, we're at EOF, just return */
4149		if (bytes_read == 0) {
4150			*eof = B_TRUE;
4151			break;
4152		}
4153
4154		if ((err = zpool_history_unpack(buf, bytes_read,
4155		    &leftover, &records, &numrecords)) != 0)
4156			break;
4157		*off -= leftover;
4158		if (leftover == bytes_read) {
4159			/*
4160			 * no progress made, because buffer is not big enough
4161			 * to hold this record; resize and retry.
4162			 */
4163			buflen *= 2;
4164			free(buf);
4165			buf = NULL;
4166			if ((buflen >= HIS_BUF_LEN_MAX) ||
4167			    ((buf = malloc(buflen)) == NULL)) {
4168				err = ENOMEM;
4169				break;
4170			}
4171		}
4172	}
4173
4174	free(buf);
4175
4176	if (!err) {
4177		verify(nvlist_alloc(nvhisp, NV_UNIQUE_NAME, 0) == 0);
4178		verify(nvlist_add_nvlist_array(*nvhisp, ZPOOL_HIST_RECORD,
4179		    records, numrecords) == 0);
4180	}
4181	for (i = 0; i < numrecords; i++)
4182		nvlist_free(records[i]);
4183	free(records);
4184
4185	return (err);
4186}
4187
4188void
4189zpool_obj_to_path(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj,
4190    char *pathname, size_t len)
4191{
4192	zfs_cmd_t zc = { 0 };
4193	boolean_t mounted = B_FALSE;
4194	char *mntpnt = NULL;
4195	char dsname[ZFS_MAX_DATASET_NAME_LEN];
4196
4197	if (dsobj == 0) {
4198		/* special case for the MOS */
4199		(void) snprintf(pathname, len, "<metadata>:<0x%llx>", obj);
4200		return;
4201	}
4202
4203	/* get the dataset's name */
4204	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
4205	zc.zc_obj = dsobj;
4206	if (ioctl(zhp->zpool_hdl->libzfs_fd,
4207	    ZFS_IOC_DSOBJ_TO_DSNAME, &zc) != 0) {
4208		/* just write out a path of two object numbers */
4209		(void) snprintf(pathname, len, "<0x%llx>:<0x%llx>",
4210		    dsobj, obj);
4211		return;
4212	}
4213	(void) strlcpy(dsname, zc.zc_value, sizeof (dsname));
4214
4215	/* find out if the dataset is mounted */
4216	mounted = is_mounted(zhp->zpool_hdl, dsname, &mntpnt);
4217
4218	/* get the corrupted object's path */
4219	(void) strlcpy(zc.zc_name, dsname, sizeof (zc.zc_name));
4220	zc.zc_obj = obj;
4221	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_OBJ_TO_PATH,
4222	    &zc) == 0) {
4223		if (mounted) {
4224			(void) snprintf(pathname, len, "%s%s", mntpnt,
4225			    zc.zc_value);
4226		} else {
4227			(void) snprintf(pathname, len, "%s:%s",
4228			    dsname, zc.zc_value);
4229		}
4230	} else {
4231		(void) snprintf(pathname, len, "%s:<0x%llx>", dsname, obj);
4232	}
4233	free(mntpnt);
4234}
4235
4236#ifdef illumos
4237/*
4238 * Read the EFI label from the config, if a label does not exist then
4239 * pass back the error to the caller. If the caller has passed a non-NULL
4240 * diskaddr argument then we set it to the starting address of the EFI
4241 * partition. If the caller has passed a non-NULL boolean argument, then
4242 * we set it to indicate if the disk does have efi system partition.
4243 */
4244static int
4245read_efi_label(nvlist_t *config, diskaddr_t *sb, boolean_t *system)
4246{
4247	char *path;
4248	int fd;
4249	char diskname[MAXPATHLEN];
4250	boolean_t boot = B_FALSE;
4251	int err = -1;
4252	int slice;
4253
4254	if (nvlist_lookup_string(config, ZPOOL_CONFIG_PATH, &path) != 0)
4255		return (err);
4256
4257	(void) snprintf(diskname, sizeof (diskname), "%s%s", ZFS_RDISK_ROOT,
4258	    strrchr(path, '/'));
4259	if ((fd = open(diskname, O_RDONLY|O_NDELAY)) >= 0) {
4260		struct dk_gpt *vtoc;
4261
4262		if ((err = efi_alloc_and_read(fd, &vtoc)) >= 0) {
4263			for (slice = 0; slice < vtoc->efi_nparts; slice++) {
4264				if (vtoc->efi_parts[slice].p_tag == V_SYSTEM)
4265					boot = B_TRUE;
4266				if (vtoc->efi_parts[slice].p_tag == V_USR)
4267					break;
4268			}
4269			if (sb != NULL && vtoc->efi_parts[slice].p_tag == V_USR)
4270				*sb = vtoc->efi_parts[slice].p_start;
4271			if (system != NULL)
4272				*system = boot;
4273			efi_free(vtoc);
4274		}
4275		(void) close(fd);
4276	}
4277	return (err);
4278}
4279
4280/*
4281 * determine where a partition starts on a disk in the current
4282 * configuration
4283 */
4284static diskaddr_t
4285find_start_block(nvlist_t *config)
4286{
4287	nvlist_t **child;
4288	uint_t c, children;
4289	diskaddr_t sb = MAXOFFSET_T;
4290	uint64_t wholedisk;
4291
4292	if (nvlist_lookup_nvlist_array(config,
4293	    ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) {
4294		if (nvlist_lookup_uint64(config,
4295		    ZPOOL_CONFIG_WHOLE_DISK,
4296		    &wholedisk) != 0 || !wholedisk) {
4297			return (MAXOFFSET_T);
4298		}
4299		if (read_efi_label(config, &sb, NULL) < 0)
4300			sb = MAXOFFSET_T;
4301		return (sb);
4302	}
4303
4304	for (c = 0; c < children; c++) {
4305		sb = find_start_block(child[c]);
4306		if (sb != MAXOFFSET_T) {
4307			return (sb);
4308		}
4309	}
4310	return (MAXOFFSET_T);
4311}
4312#endif /* illumos */
4313
4314/*
4315 * Label an individual disk.  The name provided is the short name,
4316 * stripped of any leading /dev path.
4317 */
4318int
4319zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, const char *name,
4320    zpool_boot_label_t boot_type, uint64_t boot_size, int *slice)
4321{
4322#ifdef illumos
4323	char path[MAXPATHLEN];
4324	struct dk_gpt *vtoc;
4325	int fd;
4326	size_t resv = EFI_MIN_RESV_SIZE;
4327	uint64_t slice_size;
4328	diskaddr_t start_block;
4329	char errbuf[1024];
4330
4331	/* prepare an error message just in case */
4332	(void) snprintf(errbuf, sizeof (errbuf),
4333	    dgettext(TEXT_DOMAIN, "cannot label '%s'"), name);
4334
4335	if (zhp) {
4336		nvlist_t *nvroot;
4337
4338		verify(nvlist_lookup_nvlist(zhp->zpool_config,
4339		    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
4340
4341		if (zhp->zpool_start_block == 0)
4342			start_block = find_start_block(nvroot);
4343		else
4344			start_block = zhp->zpool_start_block;
4345		zhp->zpool_start_block = start_block;
4346	} else {
4347		/* new pool */
4348		start_block = NEW_START_BLOCK;
4349	}
4350
4351	(void) snprintf(path, sizeof (path), "%s/%s%s", ZFS_RDISK_ROOT, name,
4352	    BACKUP_SLICE);
4353
4354	if ((fd = open(path, O_RDWR | O_NDELAY)) < 0) {
4355		/*
4356		 * This shouldn't happen.  We've long since verified that this
4357		 * is a valid device.
4358		 */
4359		zfs_error_aux(hdl,
4360		    dgettext(TEXT_DOMAIN, "unable to open device"));
4361		return (zfs_error(hdl, EZFS_OPENFAILED, errbuf));
4362	}
4363
4364	if (efi_alloc_and_init(fd, EFI_NUMPAR, &vtoc) != 0) {
4365		/*
4366		 * The only way this can fail is if we run out of memory, or we
4367		 * were unable to read the disk's capacity
4368		 */
4369		if (errno == ENOMEM)
4370			(void) no_memory(hdl);
4371
4372		(void) close(fd);
4373		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4374		    "unable to read disk capacity"), name);
4375
4376		return (zfs_error(hdl, EZFS_NOCAP, errbuf));
4377	}
4378
4379	/*
4380	 * Why we use V_USR: V_BACKUP confuses users, and is considered
4381	 * disposable by some EFI utilities (since EFI doesn't have a backup
4382	 * slice).  V_UNASSIGNED is supposed to be used only for zero size
4383	 * partitions, and efi_write() will fail if we use it.  V_ROOT, V_BOOT,
4384	 * etc. were all pretty specific.  V_USR is as close to reality as we
4385	 * can get, in the absence of V_OTHER.
4386	 */
4387	/* first fix the partition start block */
4388	if (start_block == MAXOFFSET_T)
4389		start_block = NEW_START_BLOCK;
4390
4391	/*
4392	 * EFI System partition is using slice 0.
4393	 * ZFS is on slice 1 and slice 8 is reserved.
4394	 * We assume the GPT partition table without system
4395	 * partition has zfs p_start == NEW_START_BLOCK.
4396	 * If start_block != NEW_START_BLOCK, it means we have
4397	 * system partition. Correct solution would be to query/cache vtoc
4398	 * from existing vdev member.
4399	 */
4400	if (boot_type == ZPOOL_CREATE_BOOT_LABEL) {
4401		if (boot_size % vtoc->efi_lbasize != 0) {
4402			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4403			    "boot partition size must be a multiple of %d"),
4404			    vtoc->efi_lbasize);
4405			(void) close(fd);
4406			efi_free(vtoc);
4407			return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
4408		}
4409		/*
4410		 * System partition size checks.
4411		 * Note the 1MB is quite arbitrary value, since we
4412		 * are creating dedicated pool, it should be enough
4413		 * to hold fat + efi bootloader. May need to be
4414		 * adjusted if the bootloader size will grow.
4415		 */
4416		if (boot_size < 1024 * 1024) {
4417			char buf[64];
4418			zfs_nicenum(boot_size, buf, sizeof (buf));
4419			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4420			    "Specified size %s for EFI System partition is too "
4421			    "small, the minimum size is 1MB."), buf);
4422			(void) close(fd);
4423			efi_free(vtoc);
4424			return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
4425		}
4426		/* 33MB is tested with mkfs -F pcfs */
4427		if (hdl->libzfs_printerr &&
4428		    ((vtoc->efi_lbasize == 512 &&
4429		    boot_size < 33 * 1024 * 1024) ||
4430		    (vtoc->efi_lbasize == 4096 &&
4431		    boot_size < 256 * 1024 * 1024)))  {
4432			char buf[64];
4433			zfs_nicenum(boot_size, buf, sizeof (buf));
4434			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
4435			    "Warning: EFI System partition size %s is "
4436			    "not allowing to create FAT32 file\nsystem, which "
4437			    "may result in unbootable system.\n"), buf);
4438		}
4439		/* Adjust zfs partition start by size of system partition. */
4440		start_block += boot_size / vtoc->efi_lbasize;
4441	}
4442
4443	if (start_block == NEW_START_BLOCK) {
4444		/*
4445		 * Use default layout.
4446		 * ZFS is on slice 0 and slice 8 is reserved.
4447		 */
4448		slice_size = vtoc->efi_last_u_lba + 1;
4449		slice_size -= EFI_MIN_RESV_SIZE;
4450		slice_size -= start_block;
4451		if (slice != NULL)
4452			*slice = 0;
4453
4454		vtoc->efi_parts[0].p_start = start_block;
4455		vtoc->efi_parts[0].p_size = slice_size;
4456
4457		vtoc->efi_parts[0].p_tag = V_USR;
4458		(void) strcpy(vtoc->efi_parts[0].p_name, "zfs");
4459
4460		vtoc->efi_parts[8].p_start = slice_size + start_block;
4461		vtoc->efi_parts[8].p_size = resv;
4462		vtoc->efi_parts[8].p_tag = V_RESERVED;
4463	} else {
4464		slice_size = start_block - NEW_START_BLOCK;
4465		vtoc->efi_parts[0].p_start = NEW_START_BLOCK;
4466		vtoc->efi_parts[0].p_size = slice_size;
4467		vtoc->efi_parts[0].p_tag = V_SYSTEM;
4468		(void) strcpy(vtoc->efi_parts[0].p_name, "loader");
4469		if (slice != NULL)
4470			*slice = 1;
4471		/* prepare slice 1 */
4472		slice_size = vtoc->efi_last_u_lba + 1 - slice_size;
4473		slice_size -= resv;
4474		slice_size -= NEW_START_BLOCK;
4475		vtoc->efi_parts[1].p_start = start_block;
4476		vtoc->efi_parts[1].p_size = slice_size;
4477		vtoc->efi_parts[1].p_tag = V_USR;
4478		(void) strcpy(vtoc->efi_parts[1].p_name, "zfs");
4479
4480		vtoc->efi_parts[8].p_start = slice_size + start_block;
4481		vtoc->efi_parts[8].p_size = resv;
4482		vtoc->efi_parts[8].p_tag = V_RESERVED;
4483	}
4484
4485	if (efi_write(fd, vtoc) != 0) {
4486		/*
4487		 * Some block drivers (like pcata) may not support EFI
4488		 * GPT labels.  Print out a helpful error message dir-
4489		 * ecting the user to manually label the disk and give
4490		 * a specific slice.
4491		 */
4492		(void) close(fd);
4493		efi_free(vtoc);
4494
4495		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4496		    "try using fdisk(1M) and then provide a specific slice"));
4497		return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
4498	}
4499
4500	(void) close(fd);
4501	efi_free(vtoc);
4502#endif /* illumos */
4503	return (0);
4504}
4505
4506static boolean_t
4507supported_dump_vdev_type(libzfs_handle_t *hdl, nvlist_t *config, char *errbuf)
4508{
4509	char *type;
4510	nvlist_t **child;
4511	uint_t children, c;
4512
4513	verify(nvlist_lookup_string(config, ZPOOL_CONFIG_TYPE, &type) == 0);
4514	if (strcmp(type, VDEV_TYPE_FILE) == 0 ||
4515	    strcmp(type, VDEV_TYPE_HOLE) == 0 ||
4516	    strcmp(type, VDEV_TYPE_MISSING) == 0) {
4517		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4518		    "vdev type '%s' is not supported"), type);
4519		(void) zfs_error(hdl, EZFS_VDEVNOTSUP, errbuf);
4520		return (B_FALSE);
4521	}
4522	if (nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_CHILDREN,
4523	    &child, &children) == 0) {
4524		for (c = 0; c < children; c++) {
4525			if (!supported_dump_vdev_type(hdl, child[c], errbuf))
4526				return (B_FALSE);
4527		}
4528	}
4529	return (B_TRUE);
4530}
4531
4532/*
4533 * Check if this zvol is allowable for use as a dump device; zero if
4534 * it is, > 0 if it isn't, < 0 if it isn't a zvol.
4535 *
4536 * Allowable storage configurations include mirrors, all raidz variants, and
4537 * pools with log, cache, and spare devices.  Pools which are backed by files or
4538 * have missing/hole vdevs are not suitable.
4539 */
4540int
4541zvol_check_dump_config(char *arg)
4542{
4543	zpool_handle_t *zhp = NULL;
4544	nvlist_t *config, *nvroot;
4545	char *p, *volname;
4546	nvlist_t **top;
4547	uint_t toplevels;
4548	libzfs_handle_t *hdl;
4549	char errbuf[1024];
4550	char poolname[ZFS_MAX_DATASET_NAME_LEN];
4551	int pathlen = strlen(ZVOL_FULL_DEV_DIR);
4552	int ret = 1;
4553
4554	if (strncmp(arg, ZVOL_FULL_DEV_DIR, pathlen)) {
4555		return (-1);
4556	}
4557
4558	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
4559	    "dump is not supported on device '%s'"), arg);
4560
4561	if ((hdl = libzfs_init()) == NULL)
4562		return (1);
4563	libzfs_print_on_error(hdl, B_TRUE);
4564
4565	volname = arg + pathlen;
4566
4567	/* check the configuration of the pool */
4568	if ((p = strchr(volname, '/')) == NULL) {
4569		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4570		    "malformed dataset name"));
4571		(void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
4572		return (1);
4573	} else if (p - volname >= ZFS_MAX_DATASET_NAME_LEN) {
4574		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4575		    "dataset name is too long"));
4576		(void) zfs_error(hdl, EZFS_NAMETOOLONG, errbuf);
4577		return (1);
4578	} else {
4579		(void) strncpy(poolname, volname, p - volname);
4580		poolname[p - volname] = '\0';
4581	}
4582
4583	if ((zhp = zpool_open(hdl, poolname)) == NULL) {
4584		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4585		    "could not open pool '%s'"), poolname);
4586		(void) zfs_error(hdl, EZFS_OPENFAILED, errbuf);
4587		goto out;
4588	}
4589	config = zpool_get_config(zhp, NULL);
4590	if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
4591	    &nvroot) != 0) {
4592		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4593		    "could not obtain vdev configuration for  '%s'"), poolname);
4594		(void) zfs_error(hdl, EZFS_INVALCONFIG, errbuf);
4595		goto out;
4596	}
4597
4598	verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
4599	    &top, &toplevels) == 0);
4600
4601	if (!supported_dump_vdev_type(hdl, top[0], errbuf)) {
4602		goto out;
4603	}
4604	ret = 0;
4605
4606out:
4607	if (zhp)
4608		zpool_close(zhp);
4609	libzfs_fini(hdl);
4610	return (ret);
4611}
4612
4613int
4614zpool_nextboot(libzfs_handle_t *hdl, uint64_t pool_guid, uint64_t dev_guid,
4615    const char *command)
4616{
4617	zfs_cmd_t zc = { 0 };
4618	nvlist_t *args;
4619	char *packed;
4620	size_t size;
4621	int error;
4622
4623	args = fnvlist_alloc();
4624	fnvlist_add_uint64(args, ZPOOL_CONFIG_POOL_GUID, pool_guid);
4625	fnvlist_add_uint64(args, ZPOOL_CONFIG_GUID, dev_guid);
4626	fnvlist_add_string(args, "command", command);
4627	error = zcmd_write_src_nvlist(hdl, &zc, args);
4628	if (error == 0)
4629		error = ioctl(hdl->libzfs_fd, ZFS_IOC_NEXTBOOT, &zc);
4630	zcmd_free_nvlists(&zc);
4631	nvlist_free(args);
4632	return (error);
4633}
4634