1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26#include <ctype.h>
27#include <errno.h>
28#include <devid.h>
29#include <fcntl.h>
30#include <libintl.h>
31#include <stdio.h>
32#include <stdlib.h>
33#include <strings.h>
34#include <unistd.h>
35#include <sys/efi_partition.h>
36#include <sys/vtoc.h>
37#include <sys/zfs_ioctl.h>
38#include <dlfcn.h>
39
40#include "zfs_namecheck.h"
41#include "zfs_prop.h"
42#include "libzfs_impl.h"
43#include "zfs_comutil.h"
44
45static int read_efi_label(nvlist_t *config, diskaddr_t *sb);
46
47#define	DISK_ROOT	"/dev/dsk"
48#define	RDISK_ROOT	"/dev/rdsk"
49#define	BACKUP_SLICE	"s2"
50
51typedef struct prop_flags {
52	int create:1;	/* Validate property on creation */
53	int import:1;	/* Validate property on import */
54} prop_flags_t;
55
56/*
57 * ====================================================================
58 *   zpool property functions
59 * ====================================================================
60 */
61
62static int
63zpool_get_all_props(zpool_handle_t *zhp)
64{
65	zfs_cmd_t zc = { 0 };
66	libzfs_handle_t *hdl = zhp->zpool_hdl;
67
68	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
69
70	if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0)
71		return (-1);
72
73	while (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_PROPS, &zc) != 0) {
74		if (errno == ENOMEM) {
75			if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
76				zcmd_free_nvlists(&zc);
77				return (-1);
78			}
79		} else {
80			zcmd_free_nvlists(&zc);
81			return (-1);
82		}
83	}
84
85	if (zcmd_read_dst_nvlist(hdl, &zc, &zhp->zpool_props) != 0) {
86		zcmd_free_nvlists(&zc);
87		return (-1);
88	}
89
90	zcmd_free_nvlists(&zc);
91
92	return (0);
93}
94
95static int
96zpool_props_refresh(zpool_handle_t *zhp)
97{
98	nvlist_t *old_props;
99
100	old_props = zhp->zpool_props;
101
102	if (zpool_get_all_props(zhp) != 0)
103		return (-1);
104
105	nvlist_free(old_props);
106	return (0);
107}
108
109static char *
110zpool_get_prop_string(zpool_handle_t *zhp, zpool_prop_t prop,
111    zprop_source_t *src)
112{
113	nvlist_t *nv, *nvl;
114	uint64_t ival;
115	char *value;
116	zprop_source_t source;
117
118	nvl = zhp->zpool_props;
119	if (nvlist_lookup_nvlist(nvl, zpool_prop_to_name(prop), &nv) == 0) {
120		verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, &ival) == 0);
121		source = ival;
122		verify(nvlist_lookup_string(nv, ZPROP_VALUE, &value) == 0);
123	} else {
124		source = ZPROP_SRC_DEFAULT;
125		if ((value = (char *)zpool_prop_default_string(prop)) == NULL)
126			value = "-";
127	}
128
129	if (src)
130		*src = source;
131
132	return (value);
133}
134
135uint64_t
136zpool_get_prop_int(zpool_handle_t *zhp, zpool_prop_t prop, zprop_source_t *src)
137{
138	nvlist_t *nv, *nvl;
139	uint64_t value;
140	zprop_source_t source;
141
142	if (zhp->zpool_props == NULL && zpool_get_all_props(zhp)) {
143		/*
144		 * zpool_get_all_props() has most likely failed because
145		 * the pool is faulted, but if all we need is the top level
146		 * vdev's guid then get it from the zhp config nvlist.
147		 */
148		if ((prop == ZPOOL_PROP_GUID) &&
149		    (nvlist_lookup_nvlist(zhp->zpool_config,
150		    ZPOOL_CONFIG_VDEV_TREE, &nv) == 0) &&
151		    (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &value)
152		    == 0)) {
153			return (value);
154		}
155		return (zpool_prop_default_numeric(prop));
156	}
157
158	nvl = zhp->zpool_props;
159	if (nvlist_lookup_nvlist(nvl, zpool_prop_to_name(prop), &nv) == 0) {
160		verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, &value) == 0);
161		source = value;
162		verify(nvlist_lookup_uint64(nv, ZPROP_VALUE, &value) == 0);
163	} else {
164		source = ZPROP_SRC_DEFAULT;
165		value = zpool_prop_default_numeric(prop);
166	}
167
168	if (src)
169		*src = source;
170
171	return (value);
172}
173
174/*
175 * Map VDEV STATE to printed strings.
176 */
177char *
178zpool_state_to_name(vdev_state_t state, vdev_aux_t aux)
179{
180	switch (state) {
181	case VDEV_STATE_CLOSED:
182	case VDEV_STATE_OFFLINE:
183		return (gettext("OFFLINE"));
184	case VDEV_STATE_REMOVED:
185		return (gettext("REMOVED"));
186	case VDEV_STATE_CANT_OPEN:
187		if (aux == VDEV_AUX_CORRUPT_DATA || aux == VDEV_AUX_BAD_LOG)
188			return (gettext("FAULTED"));
189		else if (aux == VDEV_AUX_SPLIT_POOL)
190			return (gettext("SPLIT"));
191		else
192			return (gettext("UNAVAIL"));
193	case VDEV_STATE_FAULTED:
194		return (gettext("FAULTED"));
195	case VDEV_STATE_DEGRADED:
196		return (gettext("DEGRADED"));
197	case VDEV_STATE_HEALTHY:
198		return (gettext("ONLINE"));
199	}
200
201	return (gettext("UNKNOWN"));
202}
203
204/*
205 * Get a zpool property value for 'prop' and return the value in
206 * a pre-allocated buffer.
207 */
208int
209zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf, size_t len,
210    zprop_source_t *srctype)
211{
212	uint64_t intval;
213	const char *strval;
214	zprop_source_t src = ZPROP_SRC_NONE;
215	nvlist_t *nvroot;
216	vdev_stat_t *vs;
217	uint_t vsc;
218
219	if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) {
220		switch (prop) {
221		case ZPOOL_PROP_NAME:
222			(void) strlcpy(buf, zpool_get_name(zhp), len);
223			break;
224
225		case ZPOOL_PROP_HEALTH:
226			(void) strlcpy(buf, "FAULTED", len);
227			break;
228
229		case ZPOOL_PROP_GUID:
230			intval = zpool_get_prop_int(zhp, prop, &src);
231			(void) snprintf(buf, len, "%llu", intval);
232			break;
233
234		case ZPOOL_PROP_ALTROOT:
235		case ZPOOL_PROP_CACHEFILE:
236			if (zhp->zpool_props != NULL ||
237			    zpool_get_all_props(zhp) == 0) {
238				(void) strlcpy(buf,
239				    zpool_get_prop_string(zhp, prop, &src),
240				    len);
241				if (srctype != NULL)
242					*srctype = src;
243				return (0);
244			}
245			/* FALLTHROUGH */
246		default:
247			(void) strlcpy(buf, "-", len);
248			break;
249		}
250
251		if (srctype != NULL)
252			*srctype = src;
253		return (0);
254	}
255
256	if (zhp->zpool_props == NULL && zpool_get_all_props(zhp) &&
257	    prop != ZPOOL_PROP_NAME)
258		return (-1);
259
260	switch (zpool_prop_get_type(prop)) {
261	case PROP_TYPE_STRING:
262		(void) strlcpy(buf, zpool_get_prop_string(zhp, prop, &src),
263		    len);
264		break;
265
266	case PROP_TYPE_NUMBER:
267		intval = zpool_get_prop_int(zhp, prop, &src);
268
269		switch (prop) {
270		case ZPOOL_PROP_SIZE:
271		case ZPOOL_PROP_ALLOCATED:
272		case ZPOOL_PROP_FREE:
273			(void) zfs_nicenum(intval, buf, len);
274			break;
275
276		case ZPOOL_PROP_CAPACITY:
277			(void) snprintf(buf, len, "%llu%%",
278			    (u_longlong_t)intval);
279			break;
280
281		case ZPOOL_PROP_DEDUPRATIO:
282			(void) snprintf(buf, len, "%llu.%02llux",
283			    (u_longlong_t)(intval / 100),
284			    (u_longlong_t)(intval % 100));
285			break;
286
287		case ZPOOL_PROP_HEALTH:
288			verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
289			    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
290			verify(nvlist_lookup_uint64_array(nvroot,
291			    ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc)
292			    == 0);
293
294			(void) strlcpy(buf, zpool_state_to_name(intval,
295			    vs->vs_aux), len);
296			break;
297		default:
298			(void) snprintf(buf, len, "%llu", intval);
299		}
300		break;
301
302	case PROP_TYPE_INDEX:
303		intval = zpool_get_prop_int(zhp, prop, &src);
304		if (zpool_prop_index_to_string(prop, intval, &strval)
305		    != 0)
306			return (-1);
307		(void) strlcpy(buf, strval, len);
308		break;
309
310	default:
311		abort();
312	}
313
314	if (srctype)
315		*srctype = src;
316
317	return (0);
318}
319
320/*
321 * Check if the bootfs name has the same pool name as it is set to.
322 * Assuming bootfs is a valid dataset name.
323 */
324static boolean_t
325bootfs_name_valid(const char *pool, char *bootfs)
326{
327	int len = strlen(pool);
328
329	if (!zfs_name_valid(bootfs, ZFS_TYPE_FILESYSTEM|ZFS_TYPE_SNAPSHOT))
330		return (B_FALSE);
331
332	if (strncmp(pool, bootfs, len) == 0 &&
333	    (bootfs[len] == '/' || bootfs[len] == '\0'))
334		return (B_TRUE);
335
336	return (B_FALSE);
337}
338
339/*
340 * Inspect the configuration to determine if any of the devices contain
341 * an EFI label.
342 */
343static boolean_t
344pool_uses_efi(nvlist_t *config)
345{
346	nvlist_t **child;
347	uint_t c, children;
348
349	if (nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_CHILDREN,
350	    &child, &children) != 0)
351		return (read_efi_label(config, NULL) >= 0);
352
353	for (c = 0; c < children; c++) {
354		if (pool_uses_efi(child[c]))
355			return (B_TRUE);
356	}
357	return (B_FALSE);
358}
359
360static boolean_t
361pool_is_bootable(zpool_handle_t *zhp)
362{
363	char bootfs[ZPOOL_MAXNAMELEN];
364
365	return (zpool_get_prop(zhp, ZPOOL_PROP_BOOTFS, bootfs,
366	    sizeof (bootfs), NULL) == 0 && strncmp(bootfs, "-",
367	    sizeof (bootfs)) != 0);
368}
369
370
371/*
372 * Given an nvlist of zpool properties to be set, validate that they are
373 * correct, and parse any numeric properties (index, boolean, etc) if they are
374 * specified as strings.
375 */
376static nvlist_t *
377zpool_valid_proplist(libzfs_handle_t *hdl, const char *poolname,
378    nvlist_t *props, uint64_t version, prop_flags_t flags, char *errbuf)
379{
380	nvpair_t *elem;
381	nvlist_t *retprops;
382	zpool_prop_t prop;
383	char *strval;
384	uint64_t intval;
385	char *slash;
386	struct stat64 statbuf;
387	zpool_handle_t *zhp;
388	nvlist_t *nvroot;
389
390	if (nvlist_alloc(&retprops, NV_UNIQUE_NAME, 0) != 0) {
391		(void) no_memory(hdl);
392		return (NULL);
393	}
394
395	elem = NULL;
396	while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
397		const char *propname = nvpair_name(elem);
398
399		/*
400		 * Make sure this property is valid and applies to this type.
401		 */
402		if ((prop = zpool_name_to_prop(propname)) == ZPROP_INVAL) {
403			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
404			    "invalid property '%s'"), propname);
405			(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
406			goto error;
407		}
408
409		if (zpool_prop_readonly(prop)) {
410			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' "
411			    "is readonly"), propname);
412			(void) zfs_error(hdl, EZFS_PROPREADONLY, errbuf);
413			goto error;
414		}
415
416		if (zprop_parse_value(hdl, elem, prop, ZFS_TYPE_POOL, retprops,
417		    &strval, &intval, errbuf) != 0)
418			goto error;
419
420		/*
421		 * Perform additional checking for specific properties.
422		 */
423		switch (prop) {
424		case ZPOOL_PROP_VERSION:
425			if (intval < version || intval > SPA_VERSION) {
426				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
427				    "property '%s' number %d is invalid."),
428				    propname, intval);
429				(void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
430				goto error;
431			}
432			break;
433
434		case ZPOOL_PROP_BOOTFS:
435			if (flags.create || flags.import) {
436				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
437				    "property '%s' cannot be set at creation "
438				    "or import time"), propname);
439				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
440				goto error;
441			}
442
443			if (version < SPA_VERSION_BOOTFS) {
444				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
445				    "pool must be upgraded to support "
446				    "'%s' property"), propname);
447				(void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
448				goto error;
449			}
450
451			/*
452			 * bootfs property value has to be a dataset name and
453			 * the dataset has to be in the same pool as it sets to.
454			 */
455			if (strval[0] != '\0' && !bootfs_name_valid(poolname,
456			    strval)) {
457				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' "
458				    "is an invalid name"), strval);
459				(void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
460				goto error;
461			}
462
463			if ((zhp = zpool_open_canfail(hdl, poolname)) == NULL) {
464				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
465				    "could not open pool '%s'"), poolname);
466				(void) zfs_error(hdl, EZFS_OPENFAILED, errbuf);
467				goto error;
468			}
469			verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
470			    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
471
472			/*
473			 * bootfs property cannot be set on a disk which has
474			 * been EFI labeled.
475			 */
476			if (pool_uses_efi(nvroot)) {
477				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
478				    "property '%s' not supported on "
479				    "EFI labeled devices"), propname);
480				(void) zfs_error(hdl, EZFS_POOL_NOTSUP, errbuf);
481				zpool_close(zhp);
482				goto error;
483			}
484			zpool_close(zhp);
485			break;
486
487		case ZPOOL_PROP_ALTROOT:
488			if (!flags.create && !flags.import) {
489				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
490				    "property '%s' can only be set during pool "
491				    "creation or import"), propname);
492				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
493				goto error;
494			}
495
496			if (strval[0] != '/') {
497				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
498				    "bad alternate root '%s'"), strval);
499				(void) zfs_error(hdl, EZFS_BADPATH, errbuf);
500				goto error;
501			}
502			break;
503
504		case ZPOOL_PROP_CACHEFILE:
505			if (strval[0] == '\0')
506				break;
507
508			if (strcmp(strval, "none") == 0)
509				break;
510
511			if (strval[0] != '/') {
512				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
513				    "property '%s' must be empty, an "
514				    "absolute path, or 'none'"), propname);
515				(void) zfs_error(hdl, EZFS_BADPATH, errbuf);
516				goto error;
517			}
518
519			slash = strrchr(strval, '/');
520
521			if (slash[1] == '\0' || strcmp(slash, "/.") == 0 ||
522			    strcmp(slash, "/..") == 0) {
523				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
524				    "'%s' is not a valid file"), strval);
525				(void) zfs_error(hdl, EZFS_BADPATH, errbuf);
526				goto error;
527			}
528
529			*slash = '\0';
530
531			if (strval[0] != '\0' &&
532			    (stat64(strval, &statbuf) != 0 ||
533			    !S_ISDIR(statbuf.st_mode))) {
534				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
535				    "'%s' is not a valid directory"),
536				    strval);
537				(void) zfs_error(hdl, EZFS_BADPATH, errbuf);
538				goto error;
539			}
540
541			*slash = '/';
542			break;
543
544		case ZPOOL_PROP_READONLY:
545			if (!flags.import) {
546				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
547				    "property '%s' can only be set at "
548				    "import time"), propname);
549				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
550				goto error;
551			}
552			break;
553		}
554	}
555
556	return (retprops);
557error:
558	nvlist_free(retprops);
559	return (NULL);
560}
561
562/*
563 * Set zpool property : propname=propval.
564 */
565int
566zpool_set_prop(zpool_handle_t *zhp, const char *propname, const char *propval)
567{
568	zfs_cmd_t zc = { 0 };
569	int ret = -1;
570	char errbuf[1024];
571	nvlist_t *nvl = NULL;
572	nvlist_t *realprops;
573	uint64_t version;
574	prop_flags_t flags = { 0 };
575
576	(void) snprintf(errbuf, sizeof (errbuf),
577	    dgettext(TEXT_DOMAIN, "cannot set property for '%s'"),
578	    zhp->zpool_name);
579
580	if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0)
581		return (no_memory(zhp->zpool_hdl));
582
583	if (nvlist_add_string(nvl, propname, propval) != 0) {
584		nvlist_free(nvl);
585		return (no_memory(zhp->zpool_hdl));
586	}
587
588	version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL);
589	if ((realprops = zpool_valid_proplist(zhp->zpool_hdl,
590	    zhp->zpool_name, nvl, version, flags, errbuf)) == NULL) {
591		nvlist_free(nvl);
592		return (-1);
593	}
594
595	nvlist_free(nvl);
596	nvl = realprops;
597
598	/*
599	 * Execute the corresponding ioctl() to set this property.
600	 */
601	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
602
603	if (zcmd_write_src_nvlist(zhp->zpool_hdl, &zc, nvl) != 0) {
604		nvlist_free(nvl);
605		return (-1);
606	}
607
608	ret = zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_SET_PROPS, &zc);
609
610	zcmd_free_nvlists(&zc);
611	nvlist_free(nvl);
612
613	if (ret)
614		(void) zpool_standard_error(zhp->zpool_hdl, errno, errbuf);
615	else
616		(void) zpool_props_refresh(zhp);
617
618	return (ret);
619}
620
621int
622zpool_expand_proplist(zpool_handle_t *zhp, zprop_list_t **plp)
623{
624	libzfs_handle_t *hdl = zhp->zpool_hdl;
625	zprop_list_t *entry;
626	char buf[ZFS_MAXPROPLEN];
627
628	if (zprop_expand_list(hdl, plp, ZFS_TYPE_POOL) != 0)
629		return (-1);
630
631	for (entry = *plp; entry != NULL; entry = entry->pl_next) {
632
633		if (entry->pl_fixed)
634			continue;
635
636		if (entry->pl_prop != ZPROP_INVAL &&
637		    zpool_get_prop(zhp, entry->pl_prop, buf, sizeof (buf),
638		    NULL) == 0) {
639			if (strlen(buf) > entry->pl_width)
640				entry->pl_width = strlen(buf);
641		}
642	}
643
644	return (0);
645}
646
647
648/*
649 * Don't start the slice at the default block of 34; many storage
650 * devices will use a stripe width of 128k, so start there instead.
651 */
652#define	NEW_START_BLOCK	256
653
654/*
655 * Validate the given pool name, optionally putting an extended error message in
656 * 'buf'.
657 */
658boolean_t
659zpool_name_valid(libzfs_handle_t *hdl, boolean_t isopen, const char *pool)
660{
661	namecheck_err_t why;
662	char what;
663	int ret;
664
665	ret = pool_namecheck(pool, &why, &what);
666
667	/*
668	 * The rules for reserved pool names were extended at a later point.
669	 * But we need to support users with existing pools that may now be
670	 * invalid.  So we only check for this expanded set of names during a
671	 * create (or import), and only in userland.
672	 */
673	if (ret == 0 && !isopen &&
674	    (strncmp(pool, "mirror", 6) == 0 ||
675	    strncmp(pool, "raidz", 5) == 0 ||
676	    strncmp(pool, "spare", 5) == 0 ||
677	    strcmp(pool, "log") == 0)) {
678		if (hdl != NULL)
679			zfs_error_aux(hdl,
680			    dgettext(TEXT_DOMAIN, "name is reserved"));
681		return (B_FALSE);
682	}
683
684
685	if (ret != 0) {
686		if (hdl != NULL) {
687			switch (why) {
688			case NAME_ERR_TOOLONG:
689				zfs_error_aux(hdl,
690				    dgettext(TEXT_DOMAIN, "name is too long"));
691				break;
692
693			case NAME_ERR_INVALCHAR:
694				zfs_error_aux(hdl,
695				    dgettext(TEXT_DOMAIN, "invalid character "
696				    "'%c' in pool name"), what);
697				break;
698
699			case NAME_ERR_NOLETTER:
700				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
701				    "name must begin with a letter"));
702				break;
703
704			case NAME_ERR_RESERVED:
705				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
706				    "name is reserved"));
707				break;
708
709			case NAME_ERR_DISKLIKE:
710				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
711				    "pool name is reserved"));
712				break;
713
714			case NAME_ERR_LEADING_SLASH:
715				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
716				    "leading slash in name"));
717				break;
718
719			case NAME_ERR_EMPTY_COMPONENT:
720				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
721				    "empty component in name"));
722				break;
723
724			case NAME_ERR_TRAILING_SLASH:
725				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
726				    "trailing slash in name"));
727				break;
728
729			case NAME_ERR_MULTIPLE_AT:
730				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
731				    "multiple '@' delimiters in name"));
732				break;
733
734			}
735		}
736		return (B_FALSE);
737	}
738
739	return (B_TRUE);
740}
741
742/*
743 * Open a handle to the given pool, even if the pool is currently in the FAULTED
744 * state.
745 */
746zpool_handle_t *
747zpool_open_canfail(libzfs_handle_t *hdl, const char *pool)
748{
749	zpool_handle_t *zhp;
750	boolean_t missing;
751
752	/*
753	 * Make sure the pool name is valid.
754	 */
755	if (!zpool_name_valid(hdl, B_TRUE, pool)) {
756		(void) zfs_error_fmt(hdl, EZFS_INVALIDNAME,
757		    dgettext(TEXT_DOMAIN, "cannot open '%s'"),
758		    pool);
759		return (NULL);
760	}
761
762	if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
763		return (NULL);
764
765	zhp->zpool_hdl = hdl;
766	(void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
767
768	if (zpool_refresh_stats(zhp, &missing) != 0) {
769		zpool_close(zhp);
770		return (NULL);
771	}
772
773	if (missing) {
774		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "no such pool"));
775		(void) zfs_error_fmt(hdl, EZFS_NOENT,
776		    dgettext(TEXT_DOMAIN, "cannot open '%s'"), pool);
777		zpool_close(zhp);
778		return (NULL);
779	}
780
781	return (zhp);
782}
783
784/*
785 * Like the above, but silent on error.  Used when iterating over pools (because
786 * the configuration cache may be out of date).
787 */
788int
789zpool_open_silent(libzfs_handle_t *hdl, const char *pool, zpool_handle_t **ret)
790{
791	zpool_handle_t *zhp;
792	boolean_t missing;
793
794	if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
795		return (-1);
796
797	zhp->zpool_hdl = hdl;
798	(void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
799
800	if (zpool_refresh_stats(zhp, &missing) != 0) {
801		zpool_close(zhp);
802		return (-1);
803	}
804
805	if (missing) {
806		zpool_close(zhp);
807		*ret = NULL;
808		return (0);
809	}
810
811	*ret = zhp;
812	return (0);
813}
814
815/*
816 * Similar to zpool_open_canfail(), but refuses to open pools in the faulted
817 * state.
818 */
819zpool_handle_t *
820zpool_open(libzfs_handle_t *hdl, const char *pool)
821{
822	zpool_handle_t *zhp;
823
824	if ((zhp = zpool_open_canfail(hdl, pool)) == NULL)
825		return (NULL);
826
827	if (zhp->zpool_state == POOL_STATE_UNAVAIL) {
828		(void) zfs_error_fmt(hdl, EZFS_POOLUNAVAIL,
829		    dgettext(TEXT_DOMAIN, "cannot open '%s'"), zhp->zpool_name);
830		zpool_close(zhp);
831		return (NULL);
832	}
833
834	return (zhp);
835}
836
837/*
838 * Close the handle.  Simply frees the memory associated with the handle.
839 */
840void
841zpool_close(zpool_handle_t *zhp)
842{
843	if (zhp->zpool_config)
844		nvlist_free(zhp->zpool_config);
845	if (zhp->zpool_old_config)
846		nvlist_free(zhp->zpool_old_config);
847	if (zhp->zpool_props)
848		nvlist_free(zhp->zpool_props);
849	free(zhp);
850}
851
852/*
853 * Return the name of the pool.
854 */
855const char *
856zpool_get_name(zpool_handle_t *zhp)
857{
858	return (zhp->zpool_name);
859}
860
861
862/*
863 * Return the state of the pool (ACTIVE or UNAVAILABLE)
864 */
865int
866zpool_get_state(zpool_handle_t *zhp)
867{
868	return (zhp->zpool_state);
869}
870
871/*
872 * Create the named pool, using the provided vdev list.  It is assumed
873 * that the consumer has already validated the contents of the nvlist, so we
874 * don't have to worry about error semantics.
875 */
876int
877zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
878    nvlist_t *props, nvlist_t *fsprops)
879{
880	zfs_cmd_t zc = { 0 };
881	nvlist_t *zc_fsprops = NULL;
882	nvlist_t *zc_props = NULL;
883	char msg[1024];
884	char *altroot;
885	int ret = -1;
886
887	(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
888	    "cannot create '%s'"), pool);
889
890	if (!zpool_name_valid(hdl, B_FALSE, pool))
891		return (zfs_error(hdl, EZFS_INVALIDNAME, msg));
892
893	if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
894		return (-1);
895
896	if (props) {
897		prop_flags_t flags = { .create = B_TRUE, .import = B_FALSE };
898
899		if ((zc_props = zpool_valid_proplist(hdl, pool, props,
900		    SPA_VERSION_1, flags, msg)) == NULL) {
901			goto create_failed;
902		}
903	}
904
905	if (fsprops) {
906		uint64_t zoned;
907		char *zonestr;
908
909		zoned = ((nvlist_lookup_string(fsprops,
910		    zfs_prop_to_name(ZFS_PROP_ZONED), &zonestr) == 0) &&
911		    strcmp(zonestr, "on") == 0);
912
913		if ((zc_fsprops = zfs_valid_proplist(hdl,
914		    ZFS_TYPE_FILESYSTEM, fsprops, zoned, NULL, msg)) == NULL) {
915			goto create_failed;
916		}
917		if (!zc_props &&
918		    (nvlist_alloc(&zc_props, NV_UNIQUE_NAME, 0) != 0)) {
919			goto create_failed;
920		}
921		if (nvlist_add_nvlist(zc_props,
922		    ZPOOL_ROOTFS_PROPS, zc_fsprops) != 0) {
923			goto create_failed;
924		}
925	}
926
927	if (zc_props && zcmd_write_src_nvlist(hdl, &zc, zc_props) != 0)
928		goto create_failed;
929
930	(void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
931
932	if ((ret = zfs_ioctl(hdl, ZFS_IOC_POOL_CREATE, &zc)) != 0) {
933
934		zcmd_free_nvlists(&zc);
935		nvlist_free(zc_props);
936		nvlist_free(zc_fsprops);
937
938		switch (errno) {
939		case EBUSY:
940			/*
941			 * This can happen if the user has specified the same
942			 * device multiple times.  We can't reliably detect this
943			 * until we try to add it and see we already have a
944			 * label.
945			 */
946			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
947			    "one or more vdevs refer to the same device"));
948			return (zfs_error(hdl, EZFS_BADDEV, msg));
949
950		case EOVERFLOW:
951			/*
952			 * This occurs when one of the devices is below
953			 * SPA_MINDEVSIZE.  Unfortunately, we can't detect which
954			 * device was the problem device since there's no
955			 * reliable way to determine device size from userland.
956			 */
957			{
958				char buf[64];
959
960				zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf));
961
962				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
963				    "one or more devices is less than the "
964				    "minimum size (%s)"), buf);
965			}
966			return (zfs_error(hdl, EZFS_BADDEV, msg));
967
968		case ENOSPC:
969			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
970			    "one or more devices is out of space"));
971			return (zfs_error(hdl, EZFS_BADDEV, msg));
972
973		case ENOTBLK:
974			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
975			    "cache device must be a disk or disk slice"));
976			return (zfs_error(hdl, EZFS_BADDEV, msg));
977
978		default:
979			return (zpool_standard_error(hdl, errno, msg));
980		}
981	}
982
983	/*
984	 * If this is an alternate root pool, then we automatically set the
985	 * mountpoint of the root dataset to be '/'.
986	 */
987	if (nvlist_lookup_string(props, zpool_prop_to_name(ZPOOL_PROP_ALTROOT),
988	    &altroot) == 0) {
989		zfs_handle_t *zhp;
990
991		verify((zhp = zfs_open(hdl, pool, ZFS_TYPE_DATASET)) != NULL);
992		verify(zfs_prop_set(zhp, zfs_prop_to_name(ZFS_PROP_MOUNTPOINT),
993		    "/") == 0);
994
995		zfs_close(zhp);
996	}
997
998create_failed:
999	zcmd_free_nvlists(&zc);
1000	nvlist_free(zc_props);
1001	nvlist_free(zc_fsprops);
1002	return (ret);
1003}
1004
1005/*
1006 * Destroy the given pool.  It is up to the caller to ensure that there are no
1007 * datasets left in the pool.
1008 */
1009int
1010zpool_destroy(zpool_handle_t *zhp)
1011{
1012	zfs_cmd_t zc = { 0 };
1013	zfs_handle_t *zfp = NULL;
1014	libzfs_handle_t *hdl = zhp->zpool_hdl;
1015	char msg[1024];
1016
1017	if (zhp->zpool_state == POOL_STATE_ACTIVE &&
1018	    (zfp = zfs_open(hdl, zhp->zpool_name, ZFS_TYPE_FILESYSTEM)) == NULL)
1019		return (-1);
1020
1021	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1022
1023	if (zfs_ioctl(hdl, ZFS_IOC_POOL_DESTROY, &zc) != 0) {
1024		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1025		    "cannot destroy '%s'"), zhp->zpool_name);
1026
1027		if (errno == EROFS) {
1028			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1029			    "one or more devices is read only"));
1030			(void) zfs_error(hdl, EZFS_BADDEV, msg);
1031		} else {
1032			(void) zpool_standard_error(hdl, errno, msg);
1033		}
1034
1035		if (zfp)
1036			zfs_close(zfp);
1037		return (-1);
1038	}
1039
1040	if (zfp) {
1041		remove_mountpoint(zfp);
1042		zfs_close(zfp);
1043	}
1044
1045	return (0);
1046}
1047
1048/*
1049 * Add the given vdevs to the pool.  The caller must have already performed the
1050 * necessary verification to ensure that the vdev specification is well-formed.
1051 */
1052int
1053zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
1054{
1055	zfs_cmd_t zc = { 0 };
1056	int ret;
1057	libzfs_handle_t *hdl = zhp->zpool_hdl;
1058	char msg[1024];
1059	nvlist_t **spares, **l2cache;
1060	uint_t nspares, nl2cache;
1061
1062	(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1063	    "cannot add to '%s'"), zhp->zpool_name);
1064
1065	if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
1066	    SPA_VERSION_SPARES &&
1067	    nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
1068	    &spares, &nspares) == 0) {
1069		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
1070		    "upgraded to add hot spares"));
1071		return (zfs_error(hdl, EZFS_BADVERSION, msg));
1072	}
1073
1074	if (pool_is_bootable(zhp) && nvlist_lookup_nvlist_array(nvroot,
1075	    ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0) {
1076		uint64_t s;
1077
1078		for (s = 0; s < nspares; s++) {
1079			char *path;
1080
1081			if (nvlist_lookup_string(spares[s], ZPOOL_CONFIG_PATH,
1082			    &path) == 0 && pool_uses_efi(spares[s])) {
1083				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1084				    "device '%s' contains an EFI label and "
1085				    "cannot be used on root pools."),
1086				    zpool_vdev_name(hdl, NULL, spares[s],
1087				    B_FALSE));
1088				return (zfs_error(hdl, EZFS_POOL_NOTSUP, msg));
1089			}
1090		}
1091	}
1092
1093	if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
1094	    SPA_VERSION_L2CACHE &&
1095	    nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
1096	    &l2cache, &nl2cache) == 0) {
1097		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
1098		    "upgraded to add cache devices"));
1099		return (zfs_error(hdl, EZFS_BADVERSION, msg));
1100	}
1101
1102	if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
1103		return (-1);
1104	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1105
1106	if (zfs_ioctl(hdl, ZFS_IOC_VDEV_ADD, &zc) != 0) {
1107		switch (errno) {
1108		case EBUSY:
1109			/*
1110			 * This can happen if the user has specified the same
1111			 * device multiple times.  We can't reliably detect this
1112			 * until we try to add it and see we already have a
1113			 * label.
1114			 */
1115			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1116			    "one or more vdevs refer to the same device"));
1117			(void) zfs_error(hdl, EZFS_BADDEV, msg);
1118			break;
1119
1120		case EOVERFLOW:
1121			/*
1122			 * This occurrs when one of the devices is below
1123			 * SPA_MINDEVSIZE.  Unfortunately, we can't detect which
1124			 * device was the problem device since there's no
1125			 * reliable way to determine device size from userland.
1126			 */
1127			{
1128				char buf[64];
1129
1130				zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf));
1131
1132				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1133				    "device is less than the minimum "
1134				    "size (%s)"), buf);
1135			}
1136			(void) zfs_error(hdl, EZFS_BADDEV, msg);
1137			break;
1138
1139		case ENOTSUP:
1140			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1141			    "pool must be upgraded to add these vdevs"));
1142			(void) zfs_error(hdl, EZFS_BADVERSION, msg);
1143			break;
1144
1145		case EDOM:
1146			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1147			    "root pool can not have multiple vdevs"
1148			    " or separate logs"));
1149			(void) zfs_error(hdl, EZFS_POOL_NOTSUP, msg);
1150			break;
1151
1152		case ENOTBLK:
1153			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1154			    "cache device must be a disk or disk slice"));
1155			(void) zfs_error(hdl, EZFS_BADDEV, msg);
1156			break;
1157
1158		default:
1159			(void) zpool_standard_error(hdl, errno, msg);
1160		}
1161
1162		ret = -1;
1163	} else {
1164		ret = 0;
1165	}
1166
1167	zcmd_free_nvlists(&zc);
1168
1169	return (ret);
1170}
1171
1172/*
1173 * Exports the pool from the system.  The caller must ensure that there are no
1174 * mounted datasets in the pool.
1175 */
1176int
1177zpool_export_common(zpool_handle_t *zhp, boolean_t force, boolean_t hardforce)
1178{
1179	zfs_cmd_t zc = { 0 };
1180	char msg[1024];
1181
1182	(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1183	    "cannot export '%s'"), zhp->zpool_name);
1184
1185	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1186	zc.zc_cookie = force;
1187	zc.zc_guid = hardforce;
1188
1189	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_EXPORT, &zc) != 0) {
1190		switch (errno) {
1191		case EXDEV:
1192			zfs_error_aux(zhp->zpool_hdl, dgettext(TEXT_DOMAIN,
1193			    "use '-f' to override the following errors:\n"
1194			    "'%s' has an active shared spare which could be"
1195			    " used by other pools once '%s' is exported."),
1196			    zhp->zpool_name, zhp->zpool_name);
1197			return (zfs_error(zhp->zpool_hdl, EZFS_ACTIVE_SPARE,
1198			    msg));
1199		default:
1200			return (zpool_standard_error_fmt(zhp->zpool_hdl, errno,
1201			    msg));
1202		}
1203	}
1204
1205	return (0);
1206}
1207
1208int
1209zpool_export(zpool_handle_t *zhp, boolean_t force)
1210{
1211	return (zpool_export_common(zhp, force, B_FALSE));
1212}
1213
1214int
1215zpool_export_force(zpool_handle_t *zhp)
1216{
1217	return (zpool_export_common(zhp, B_TRUE, B_TRUE));
1218}
1219
1220static void
1221zpool_rewind_exclaim(libzfs_handle_t *hdl, const char *name, boolean_t dryrun,
1222    nvlist_t *config)
1223{
1224	nvlist_t *nv = NULL;
1225	uint64_t rewindto;
1226	int64_t loss = -1;
1227	struct tm t;
1228	char timestr[128];
1229
1230	if (!hdl->libzfs_printerr || config == NULL)
1231		return;
1232
1233	if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nv) != 0)
1234		return;
1235
1236	if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_TIME, &rewindto) != 0)
1237		return;
1238	(void) nvlist_lookup_int64(nv, ZPOOL_CONFIG_REWIND_TIME, &loss);
1239
1240	if (localtime_r((time_t *)&rewindto, &t) != NULL &&
1241	    strftime(timestr, 128, 0, &t) != 0) {
1242		if (dryrun) {
1243			(void) printf(dgettext(TEXT_DOMAIN,
1244			    "Would be able to return %s "
1245			    "to its state as of %s.\n"),
1246			    name, timestr);
1247		} else {
1248			(void) printf(dgettext(TEXT_DOMAIN,
1249			    "Pool %s returned to its state as of %s.\n"),
1250			    name, timestr);
1251		}
1252		if (loss > 120) {
1253			(void) printf(dgettext(TEXT_DOMAIN,
1254			    "%s approximately %lld "),
1255			    dryrun ? "Would discard" : "Discarded",
1256			    (loss + 30) / 60);
1257			(void) printf(dgettext(TEXT_DOMAIN,
1258			    "minutes of transactions.\n"));
1259		} else if (loss > 0) {
1260			(void) printf(dgettext(TEXT_DOMAIN,
1261			    "%s approximately %lld "),
1262			    dryrun ? "Would discard" : "Discarded", loss);
1263			(void) printf(dgettext(TEXT_DOMAIN,
1264			    "seconds of transactions.\n"));
1265		}
1266	}
1267}
1268
1269void
1270zpool_explain_recover(libzfs_handle_t *hdl, const char *name, int reason,
1271    nvlist_t *config)
1272{
1273	nvlist_t *nv = NULL;
1274	int64_t loss = -1;
1275	uint64_t edata = UINT64_MAX;
1276	uint64_t rewindto;
1277	struct tm t;
1278	char timestr[128];
1279
1280	if (!hdl->libzfs_printerr)
1281		return;
1282
1283	if (reason >= 0)
1284		(void) printf(dgettext(TEXT_DOMAIN, "action: "));
1285	else
1286		(void) printf(dgettext(TEXT_DOMAIN, "\t"));
1287
1288	/* All attempted rewinds failed if ZPOOL_CONFIG_LOAD_TIME missing */
1289	if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nv) != 0 ||
1290	    nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_TIME, &rewindto) != 0)
1291		goto no_info;
1292
1293	(void) nvlist_lookup_int64(nv, ZPOOL_CONFIG_REWIND_TIME, &loss);
1294	(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_DATA_ERRORS,
1295	    &edata);
1296
1297	(void) printf(dgettext(TEXT_DOMAIN,
1298	    "Recovery is possible, but will result in some data loss.\n"));
1299
1300	if (localtime_r((time_t *)&rewindto, &t) != NULL &&
1301	    strftime(timestr, 128, 0, &t) != 0) {
1302		(void) printf(dgettext(TEXT_DOMAIN,
1303		    "\tReturning the pool to its state as of %s\n"
1304		    "\tshould correct the problem.  "),
1305		    timestr);
1306	} else {
1307		(void) printf(dgettext(TEXT_DOMAIN,
1308		    "\tReverting the pool to an earlier state "
1309		    "should correct the problem.\n\t"));
1310	}
1311
1312	if (loss > 120) {
1313		(void) printf(dgettext(TEXT_DOMAIN,
1314		    "Approximately %lld minutes of data\n"
1315		    "\tmust be discarded, irreversibly.  "), (loss + 30) / 60);
1316	} else if (loss > 0) {
1317		(void) printf(dgettext(TEXT_DOMAIN,
1318		    "Approximately %lld seconds of data\n"
1319		    "\tmust be discarded, irreversibly.  "), loss);
1320	}
1321	if (edata != 0 && edata != UINT64_MAX) {
1322		if (edata == 1) {
1323			(void) printf(dgettext(TEXT_DOMAIN,
1324			    "After rewind, at least\n"
1325			    "\tone persistent user-data error will remain.  "));
1326		} else {
1327			(void) printf(dgettext(TEXT_DOMAIN,
1328			    "After rewind, several\n"
1329			    "\tpersistent user-data errors will remain.  "));
1330		}
1331	}
1332	(void) printf(dgettext(TEXT_DOMAIN,
1333	    "Recovery can be attempted\n\tby executing 'zpool %s -F %s'.  "),
1334	    reason >= 0 ? "clear" : "import", name);
1335
1336	(void) printf(dgettext(TEXT_DOMAIN,
1337	    "A scrub of the pool\n"
1338	    "\tis strongly recommended after recovery.\n"));
1339	return;
1340
1341no_info:
1342	(void) printf(dgettext(TEXT_DOMAIN,
1343	    "Destroy and re-create the pool from\n\ta backup source.\n"));
1344}
1345
1346/*
1347 * zpool_import() is a contracted interface. Should be kept the same
1348 * if possible.
1349 *
1350 * Applications should use zpool_import_props() to import a pool with
1351 * new properties value to be set.
1352 */
1353int
1354zpool_import(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
1355    char *altroot)
1356{
1357	nvlist_t *props = NULL;
1358	int ret;
1359
1360	if (altroot != NULL) {
1361		if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) {
1362			return (zfs_error_fmt(hdl, EZFS_NOMEM,
1363			    dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1364			    newname));
1365		}
1366
1367		if (nvlist_add_string(props,
1368		    zpool_prop_to_name(ZPOOL_PROP_ALTROOT), altroot) != 0 ||
1369		    nvlist_add_string(props,
1370		    zpool_prop_to_name(ZPOOL_PROP_CACHEFILE), "none") != 0) {
1371			nvlist_free(props);
1372			return (zfs_error_fmt(hdl, EZFS_NOMEM,
1373			    dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1374			    newname));
1375		}
1376	}
1377
1378	ret = zpool_import_props(hdl, config, newname, props,
1379	    ZFS_IMPORT_NORMAL);
1380	if (props)
1381		nvlist_free(props);
1382	return (ret);
1383}
1384
1385static void
1386print_vdev_tree(libzfs_handle_t *hdl, const char *name, nvlist_t *nv,
1387    int indent)
1388{
1389	nvlist_t **child;
1390	uint_t c, children;
1391	char *vname;
1392	uint64_t is_log = 0;
1393
1394	(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_LOG,
1395	    &is_log);
1396
1397	if (name != NULL)
1398		(void) printf("\t%*s%s%s\n", indent, "", name,
1399		    is_log ? " [log]" : "");
1400
1401	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
1402	    &child, &children) != 0)
1403		return;
1404
1405	for (c = 0; c < children; c++) {
1406		vname = zpool_vdev_name(hdl, NULL, child[c], B_TRUE);
1407		print_vdev_tree(hdl, vname, child[c], indent + 2);
1408		free(vname);
1409	}
1410}
1411
1412/*
1413 * Import the given pool using the known configuration and a list of
1414 * properties to be set. The configuration should have come from
1415 * zpool_find_import(). The 'newname' parameters control whether the pool
1416 * is imported with a different name.
1417 */
1418int
1419zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
1420    nvlist_t *props, int flags)
1421{
1422	zfs_cmd_t zc = { 0 };
1423	zpool_rewind_policy_t policy;
1424	nvlist_t *nv = NULL;
1425	nvlist_t *nvinfo = NULL;
1426	nvlist_t *missing = NULL;
1427	char *thename;
1428	char *origname;
1429	int ret;
1430	int error = 0;
1431	char errbuf[1024];
1432
1433	verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
1434	    &origname) == 0);
1435
1436	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1437	    "cannot import pool '%s'"), origname);
1438
1439	if (newname != NULL) {
1440		if (!zpool_name_valid(hdl, B_FALSE, newname))
1441			return (zfs_error_fmt(hdl, EZFS_INVALIDNAME,
1442			    dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1443			    newname));
1444		thename = (char *)newname;
1445	} else {
1446		thename = origname;
1447	}
1448
1449	if (props) {
1450		uint64_t version;
1451		prop_flags_t flags = { .create = B_FALSE, .import = B_TRUE };
1452
1453		verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
1454		    &version) == 0);
1455
1456		if ((props = zpool_valid_proplist(hdl, origname,
1457		    props, version, flags, errbuf)) == NULL) {
1458			return (-1);
1459		} else if (zcmd_write_src_nvlist(hdl, &zc, props) != 0) {
1460			nvlist_free(props);
1461			return (-1);
1462		}
1463	}
1464
1465	(void) strlcpy(zc.zc_name, thename, sizeof (zc.zc_name));
1466
1467	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
1468	    &zc.zc_guid) == 0);
1469
1470	if (zcmd_write_conf_nvlist(hdl, &zc, config) != 0) {
1471		nvlist_free(props);
1472		return (-1);
1473	}
1474	if (zcmd_alloc_dst_nvlist(hdl, &zc, zc.zc_nvlist_conf_size * 2) != 0) {
1475		nvlist_free(props);
1476		return (-1);
1477	}
1478
1479	zc.zc_cookie = flags;
1480	while ((ret = zfs_ioctl(hdl, ZFS_IOC_POOL_IMPORT, &zc)) != 0 &&
1481	    errno == ENOMEM) {
1482		if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
1483			zcmd_free_nvlists(&zc);
1484			return (-1);
1485		}
1486	}
1487	if (ret != 0)
1488		error = errno;
1489
1490	(void) zcmd_read_dst_nvlist(hdl, &zc, &nv);
1491	zpool_get_rewind_policy(config, &policy);
1492
1493	if (error) {
1494		char desc[1024];
1495
1496		/*
1497		 * Dry-run failed, but we print out what success
1498		 * looks like if we found a best txg
1499		 */
1500		if (policy.zrp_request & ZPOOL_TRY_REWIND) {
1501			zpool_rewind_exclaim(hdl, newname ? origname : thename,
1502			    B_TRUE, nv);
1503			nvlist_free(nv);
1504			return (-1);
1505		}
1506
1507		if (newname == NULL)
1508			(void) snprintf(desc, sizeof (desc),
1509			    dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1510			    thename);
1511		else
1512			(void) snprintf(desc, sizeof (desc),
1513			    dgettext(TEXT_DOMAIN, "cannot import '%s' as '%s'"),
1514			    origname, thename);
1515
1516		switch (error) {
1517		case ENOTSUP:
1518			/*
1519			 * Unsupported version.
1520			 */
1521			(void) zfs_error(hdl, EZFS_BADVERSION, desc);
1522			break;
1523
1524		case EINVAL:
1525			(void) zfs_error(hdl, EZFS_INVALCONFIG, desc);
1526			break;
1527
1528		case EROFS:
1529			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1530			    "one or more devices is read only"));
1531			(void) zfs_error(hdl, EZFS_BADDEV, desc);
1532			break;
1533
1534		case ENXIO:
1535			if (nv && nvlist_lookup_nvlist(nv,
1536			    ZPOOL_CONFIG_LOAD_INFO, &nvinfo) == 0 &&
1537			    nvlist_lookup_nvlist(nvinfo,
1538			    ZPOOL_CONFIG_MISSING_DEVICES, &missing) == 0) {
1539				(void) printf(dgettext(TEXT_DOMAIN,
1540				    "The devices below are missing, use "
1541				    "'-m' to import the pool anyway:\n"));
1542				print_vdev_tree(hdl, NULL, missing, 2);
1543				(void) printf("\n");
1544			}
1545			(void) zpool_standard_error(hdl, error, desc);
1546			break;
1547
1548		case EEXIST:
1549			(void) zpool_standard_error(hdl, error, desc);
1550			break;
1551
1552		default:
1553			(void) zpool_standard_error(hdl, error, desc);
1554			zpool_explain_recover(hdl,
1555			    newname ? origname : thename, -error, nv);
1556			break;
1557		}
1558
1559		nvlist_free(nv);
1560		ret = -1;
1561	} else {
1562		zpool_handle_t *zhp;
1563
1564		/*
1565		 * This should never fail, but play it safe anyway.
1566		 */
1567		if (zpool_open_silent(hdl, thename, &zhp) != 0)
1568			ret = -1;
1569		else if (zhp != NULL)
1570			zpool_close(zhp);
1571		if (policy.zrp_request &
1572		    (ZPOOL_DO_REWIND | ZPOOL_TRY_REWIND)) {
1573			zpool_rewind_exclaim(hdl, newname ? origname : thename,
1574			    ((policy.zrp_request & ZPOOL_TRY_REWIND) != 0), nv);
1575		}
1576		nvlist_free(nv);
1577		return (0);
1578	}
1579
1580	zcmd_free_nvlists(&zc);
1581	nvlist_free(props);
1582
1583	return (ret);
1584}
1585
1586/*
1587 * Scan the pool.
1588 */
1589int
1590zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func)
1591{
1592	zfs_cmd_t zc = { 0 };
1593	char msg[1024];
1594	libzfs_handle_t *hdl = zhp->zpool_hdl;
1595
1596	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1597	zc.zc_cookie = func;
1598
1599	if (zfs_ioctl(hdl, ZFS_IOC_POOL_SCAN, &zc) == 0 ||
1600	    (errno == ENOENT && func != POOL_SCAN_NONE))
1601		return (0);
1602
1603	if (func == POOL_SCAN_SCRUB) {
1604		(void) snprintf(msg, sizeof (msg),
1605		    dgettext(TEXT_DOMAIN, "cannot scrub %s"), zc.zc_name);
1606	} else if (func == POOL_SCAN_NONE) {
1607		(void) snprintf(msg, sizeof (msg),
1608		    dgettext(TEXT_DOMAIN, "cannot cancel scrubbing %s"),
1609		    zc.zc_name);
1610	} else {
1611		assert(!"unexpected result");
1612	}
1613
1614	if (errno == EBUSY) {
1615		nvlist_t *nvroot;
1616		pool_scan_stat_t *ps = NULL;
1617		uint_t psc;
1618
1619		verify(nvlist_lookup_nvlist(zhp->zpool_config,
1620		    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
1621		(void) nvlist_lookup_uint64_array(nvroot,
1622		    ZPOOL_CONFIG_SCAN_STATS, (uint64_t **)&ps, &psc);
1623		if (ps && ps->pss_func == POOL_SCAN_SCRUB)
1624			return (zfs_error(hdl, EZFS_SCRUBBING, msg));
1625		else
1626			return (zfs_error(hdl, EZFS_RESILVERING, msg));
1627	} else if (errno == ENOENT) {
1628		return (zfs_error(hdl, EZFS_NO_SCRUB, msg));
1629	} else {
1630		return (zpool_standard_error(hdl, errno, msg));
1631	}
1632}
1633
1634/*
1635 * This provides a very minimal check whether a given string is likely a
1636 * c#t#d# style string.  Users of this are expected to do their own
1637 * verification of the s# part.
1638 */
1639#define	CTD_CHECK(str)  (str && str[0] == 'c' && isdigit(str[1]))
1640
1641/*
1642 * More elaborate version for ones which may start with "/dev/dsk/"
1643 * and the like.
1644 */
1645static int
1646ctd_check_path(char *str) {
1647	/*
1648	 * If it starts with a slash, check the last component.
1649	 */
1650	if (str && str[0] == '/') {
1651		char *tmp = strrchr(str, '/');
1652
1653		/*
1654		 * If it ends in "/old", check the second-to-last
1655		 * component of the string instead.
1656		 */
1657		if (tmp != str && strcmp(tmp, "/old") == 0) {
1658			for (tmp--; *tmp != '/'; tmp--)
1659				;
1660		}
1661		str = tmp + 1;
1662	}
1663	return (CTD_CHECK(str));
1664}
1665
1666/*
1667 * Find a vdev that matches the search criteria specified. We use the
1668 * the nvpair name to determine how we should look for the device.
1669 * 'avail_spare' is set to TRUE if the provided guid refers to an AVAIL
1670 * spare; but FALSE if its an INUSE spare.
1671 */
1672static nvlist_t *
1673vdev_to_nvlist_iter(nvlist_t *nv, nvlist_t *search, boolean_t *avail_spare,
1674    boolean_t *l2cache, boolean_t *log)
1675{
1676	uint_t c, children;
1677	nvlist_t **child;
1678	nvlist_t *ret;
1679	uint64_t is_log;
1680	char *srchkey;
1681	nvpair_t *pair = nvlist_next_nvpair(search, NULL);
1682
1683	/* Nothing to look for */
1684	if (search == NULL || pair == NULL)
1685		return (NULL);
1686
1687	/* Obtain the key we will use to search */
1688	srchkey = nvpair_name(pair);
1689
1690	switch (nvpair_type(pair)) {
1691	case DATA_TYPE_UINT64:
1692		if (strcmp(srchkey, ZPOOL_CONFIG_GUID) == 0) {
1693			uint64_t srchval, theguid;
1694
1695			verify(nvpair_value_uint64(pair, &srchval) == 0);
1696			verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
1697			    &theguid) == 0);
1698			if (theguid == srchval)
1699				return (nv);
1700		}
1701		break;
1702
1703	case DATA_TYPE_STRING: {
1704		char *srchval, *val;
1705
1706		verify(nvpair_value_string(pair, &srchval) == 0);
1707		if (nvlist_lookup_string(nv, srchkey, &val) != 0)
1708			break;
1709
1710		/*
1711		 * Search for the requested value. Special cases:
1712		 *
1713		 * - ZPOOL_CONFIG_PATH for whole disk entries.  These end in
1714		 *   "s0" or "s0/old".  The "s0" part is hidden from the user,
1715		 *   but included in the string, so this matches around it.
1716		 * - looking for a top-level vdev name (i.e. ZPOOL_CONFIG_TYPE).
1717		 *
1718		 * Otherwise, all other searches are simple string compares.
1719		 */
1720		if (strcmp(srchkey, ZPOOL_CONFIG_PATH) == 0 &&
1721		    ctd_check_path(val)) {
1722			uint64_t wholedisk = 0;
1723
1724			(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
1725			    &wholedisk);
1726			if (wholedisk) {
1727				int slen = strlen(srchval);
1728				int vlen = strlen(val);
1729
1730				if (slen != vlen - 2)
1731					break;
1732
1733				/*
1734				 * make_leaf_vdev() should only set
1735				 * wholedisk for ZPOOL_CONFIG_PATHs which
1736				 * will include "/dev/dsk/", giving plenty of
1737				 * room for the indices used next.
1738				 */
1739				ASSERT(vlen >= 6);
1740
1741				/*
1742				 * strings identical except trailing "s0"
1743				 */
1744				if (strcmp(&val[vlen - 2], "s0") == 0 &&
1745				    strncmp(srchval, val, slen) == 0)
1746					return (nv);
1747
1748				/*
1749				 * strings identical except trailing "s0/old"
1750				 */
1751				if (strcmp(&val[vlen - 6], "s0/old") == 0 &&
1752				    strcmp(&srchval[slen - 4], "/old") == 0 &&
1753				    strncmp(srchval, val, slen - 4) == 0)
1754					return (nv);
1755
1756				break;
1757			}
1758		} else if (strcmp(srchkey, ZPOOL_CONFIG_TYPE) == 0 && val) {
1759			char *type, *idx, *end, *p;
1760			uint64_t id, vdev_id;
1761
1762			/*
1763			 * Determine our vdev type, keeping in mind
1764			 * that the srchval is composed of a type and
1765			 * vdev id pair (i.e. mirror-4).
1766			 */
1767			if ((type = strdup(srchval)) == NULL)
1768				return (NULL);
1769
1770			if ((p = strrchr(type, '-')) == NULL) {
1771				free(type);
1772				break;
1773			}
1774			idx = p + 1;
1775			*p = '\0';
1776
1777			/*
1778			 * If the types don't match then keep looking.
1779			 */
1780			if (strncmp(val, type, strlen(val)) != 0) {
1781				free(type);
1782				break;
1783			}
1784
1785			verify(strncmp(type, VDEV_TYPE_RAIDZ,
1786			    strlen(VDEV_TYPE_RAIDZ)) == 0 ||
1787			    strncmp(type, VDEV_TYPE_MIRROR,
1788			    strlen(VDEV_TYPE_MIRROR)) == 0);
1789			verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID,
1790			    &id) == 0);
1791
1792			errno = 0;
1793			vdev_id = strtoull(idx, &end, 10);
1794
1795			free(type);
1796			if (errno != 0)
1797				return (NULL);
1798
1799			/*
1800			 * Now verify that we have the correct vdev id.
1801			 */
1802			if (vdev_id == id)
1803				return (nv);
1804		}
1805
1806		/*
1807		 * Common case
1808		 */
1809		if (strcmp(srchval, val) == 0)
1810			return (nv);
1811		break;
1812	}
1813
1814	default:
1815		break;
1816	}
1817
1818	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
1819	    &child, &children) != 0)
1820		return (NULL);
1821
1822	for (c = 0; c < children; c++) {
1823		if ((ret = vdev_to_nvlist_iter(child[c], search,
1824		    avail_spare, l2cache, NULL)) != NULL) {
1825			/*
1826			 * The 'is_log' value is only set for the toplevel
1827			 * vdev, not the leaf vdevs.  So we always lookup the
1828			 * log device from the root of the vdev tree (where
1829			 * 'log' is non-NULL).
1830			 */
1831			if (log != NULL &&
1832			    nvlist_lookup_uint64(child[c],
1833			    ZPOOL_CONFIG_IS_LOG, &is_log) == 0 &&
1834			    is_log) {
1835				*log = B_TRUE;
1836			}
1837			return (ret);
1838		}
1839	}
1840
1841	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
1842	    &child, &children) == 0) {
1843		for (c = 0; c < children; c++) {
1844			if ((ret = vdev_to_nvlist_iter(child[c], search,
1845			    avail_spare, l2cache, NULL)) != NULL) {
1846				*avail_spare = B_TRUE;
1847				return (ret);
1848			}
1849		}
1850	}
1851
1852	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
1853	    &child, &children) == 0) {
1854		for (c = 0; c < children; c++) {
1855			if ((ret = vdev_to_nvlist_iter(child[c], search,
1856			    avail_spare, l2cache, NULL)) != NULL) {
1857				*l2cache = B_TRUE;
1858				return (ret);
1859			}
1860		}
1861	}
1862
1863	return (NULL);
1864}
1865
1866/*
1867 * Given a physical path (minus the "/devices" prefix), find the
1868 * associated vdev.
1869 */
1870nvlist_t *
1871zpool_find_vdev_by_physpath(zpool_handle_t *zhp, const char *ppath,
1872    boolean_t *avail_spare, boolean_t *l2cache, boolean_t *log)
1873{
1874	nvlist_t *search, *nvroot, *ret;
1875
1876	verify(nvlist_alloc(&search, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1877	verify(nvlist_add_string(search, ZPOOL_CONFIG_PHYS_PATH, ppath) == 0);
1878
1879	verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
1880	    &nvroot) == 0);
1881
1882	*avail_spare = B_FALSE;
1883	*l2cache = B_FALSE;
1884	if (log != NULL)
1885		*log = B_FALSE;
1886	ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log);
1887	nvlist_free(search);
1888
1889	return (ret);
1890}
1891
1892/*
1893 * Determine if we have an "interior" top-level vdev (i.e mirror/raidz).
1894 */
1895boolean_t
1896zpool_vdev_is_interior(const char *name)
1897{
1898	if (strncmp(name, VDEV_TYPE_RAIDZ, strlen(VDEV_TYPE_RAIDZ)) == 0 ||
1899	    strncmp(name, VDEV_TYPE_MIRROR, strlen(VDEV_TYPE_MIRROR)) == 0)
1900		return (B_TRUE);
1901	return (B_FALSE);
1902}
1903
1904nvlist_t *
1905zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare,
1906    boolean_t *l2cache, boolean_t *log)
1907{
1908	char buf[MAXPATHLEN];
1909	char *end;
1910	nvlist_t *nvroot, *search, *ret;
1911	uint64_t guid;
1912
1913	verify(nvlist_alloc(&search, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1914
1915	guid = strtoull(path, &end, 10);
1916	if (guid != 0 && *end == '\0') {
1917		verify(nvlist_add_uint64(search, ZPOOL_CONFIG_GUID, guid) == 0);
1918	} else if (zpool_vdev_is_interior(path)) {
1919		verify(nvlist_add_string(search, ZPOOL_CONFIG_TYPE, path) == 0);
1920	} else if (path[0] != '/') {
1921		(void) snprintf(buf, sizeof (buf), "%s%s", "/dev/dsk/", path);
1922		verify(nvlist_add_string(search, ZPOOL_CONFIG_PATH, buf) == 0);
1923	} else {
1924		verify(nvlist_add_string(search, ZPOOL_CONFIG_PATH, path) == 0);
1925	}
1926
1927	verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
1928	    &nvroot) == 0);
1929
1930	*avail_spare = B_FALSE;
1931	*l2cache = B_FALSE;
1932	if (log != NULL)
1933		*log = B_FALSE;
1934	ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log);
1935	nvlist_free(search);
1936
1937	return (ret);
1938}
1939
1940static int
1941vdev_online(nvlist_t *nv)
1942{
1943	uint64_t ival;
1944
1945	if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_OFFLINE, &ival) == 0 ||
1946	    nvlist_lookup_uint64(nv, ZPOOL_CONFIG_FAULTED, &ival) == 0 ||
1947	    nvlist_lookup_uint64(nv, ZPOOL_CONFIG_REMOVED, &ival) == 0)
1948		return (0);
1949
1950	return (1);
1951}
1952
1953/*
1954 * Helper function for zpool_get_physpaths().
1955 */
1956static int
1957vdev_get_one_physpath(nvlist_t *config, char *physpath, size_t physpath_size,
1958    size_t *bytes_written)
1959{
1960	size_t bytes_left, pos, rsz;
1961	char *tmppath;
1962	const char *format;
1963
1964	if (nvlist_lookup_string(config, ZPOOL_CONFIG_PHYS_PATH,
1965	    &tmppath) != 0)
1966		return (EZFS_NODEVICE);
1967
1968	pos = *bytes_written;
1969	bytes_left = physpath_size - pos;
1970	format = (pos == 0) ? "%s" : " %s";
1971
1972	rsz = snprintf(physpath + pos, bytes_left, format, tmppath);
1973	*bytes_written += rsz;
1974
1975	if (rsz >= bytes_left) {
1976		/* if physpath was not copied properly, clear it */
1977		if (bytes_left != 0) {
1978			physpath[pos] = 0;
1979		}
1980		return (EZFS_NOSPC);
1981	}
1982	return (0);
1983}
1984
1985static int
1986vdev_get_physpaths(nvlist_t *nv, char *physpath, size_t phypath_size,
1987    size_t *rsz, boolean_t is_spare)
1988{
1989	char *type;
1990	int ret;
1991
1992	if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0)
1993		return (EZFS_INVALCONFIG);
1994
1995	if (strcmp(type, VDEV_TYPE_DISK) == 0) {
1996		/*
1997		 * An active spare device has ZPOOL_CONFIG_IS_SPARE set.
1998		 * For a spare vdev, we only want to boot from the active
1999		 * spare device.
2000		 */
2001		if (is_spare) {
2002			uint64_t spare = 0;
2003			(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_SPARE,
2004			    &spare);
2005			if (!spare)
2006				return (EZFS_INVALCONFIG);
2007		}
2008
2009		if (vdev_online(nv)) {
2010			if ((ret = vdev_get_one_physpath(nv, physpath,
2011			    phypath_size, rsz)) != 0)
2012				return (ret);
2013		}
2014	} else if (strcmp(type, VDEV_TYPE_MIRROR) == 0 ||
2015	    strcmp(type, VDEV_TYPE_REPLACING) == 0 ||
2016	    (is_spare = (strcmp(type, VDEV_TYPE_SPARE) == 0))) {
2017		nvlist_t **child;
2018		uint_t count;
2019		int i, ret;
2020
2021		if (nvlist_lookup_nvlist_array(nv,
2022		    ZPOOL_CONFIG_CHILDREN, &child, &count) != 0)
2023			return (EZFS_INVALCONFIG);
2024
2025		for (i = 0; i < count; i++) {
2026			ret = vdev_get_physpaths(child[i], physpath,
2027			    phypath_size, rsz, is_spare);
2028			if (ret == EZFS_NOSPC)
2029				return (ret);
2030		}
2031	}
2032
2033	return (EZFS_POOL_INVALARG);
2034}
2035
2036/*
2037 * Get phys_path for a root pool config.
2038 * Return 0 on success; non-zero on failure.
2039 */
2040static int
2041zpool_get_config_physpath(nvlist_t *config, char *physpath, size_t phypath_size)
2042{
2043	size_t rsz;
2044	nvlist_t *vdev_root;
2045	nvlist_t **child;
2046	uint_t count;
2047	char *type;
2048
2049	rsz = 0;
2050
2051	if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
2052	    &vdev_root) != 0)
2053		return (EZFS_INVALCONFIG);
2054
2055	if (nvlist_lookup_string(vdev_root, ZPOOL_CONFIG_TYPE, &type) != 0 ||
2056	    nvlist_lookup_nvlist_array(vdev_root, ZPOOL_CONFIG_CHILDREN,
2057	    &child, &count) != 0)
2058		return (EZFS_INVALCONFIG);
2059
2060	/*
2061	 * root pool can not have EFI labeled disks and can only have
2062	 * a single top-level vdev.
2063	 */
2064	if (strcmp(type, VDEV_TYPE_ROOT) != 0 || count != 1 ||
2065	    pool_uses_efi(vdev_root))
2066		return (EZFS_POOL_INVALARG);
2067
2068	(void) vdev_get_physpaths(child[0], physpath, phypath_size, &rsz,
2069	    B_FALSE);
2070
2071	/* No online devices */
2072	if (rsz == 0)
2073		return (EZFS_NODEVICE);
2074
2075	return (0);
2076}
2077
2078/*
2079 * Get phys_path for a root pool
2080 * Return 0 on success; non-zero on failure.
2081 */
2082int
2083zpool_get_physpath(zpool_handle_t *zhp, char *physpath, size_t phypath_size)
2084{
2085	return (zpool_get_config_physpath(zhp->zpool_config, physpath,
2086	    phypath_size));
2087}
2088
2089/*
2090 * If the device has being dynamically expanded then we need to relabel
2091 * the disk to use the new unallocated space.
2092 */
2093static int
2094zpool_relabel_disk(libzfs_handle_t *hdl, const char *name)
2095{
2096	char path[MAXPATHLEN];
2097	char errbuf[1024];
2098	int fd, error;
2099	int (*_efi_use_whole_disk)(int);
2100
2101	if ((_efi_use_whole_disk = (int (*)(int))dlsym(RTLD_DEFAULT,
2102	    "efi_use_whole_disk")) == NULL)
2103		return (-1);
2104
2105	(void) snprintf(path, sizeof (path), "%s/%s", RDISK_ROOT, name);
2106
2107	if ((fd = open(path, O_RDWR | O_NDELAY)) < 0) {
2108		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
2109		    "relabel '%s': unable to open device"), name);
2110		return (zfs_error(hdl, EZFS_OPENFAILED, errbuf));
2111	}
2112
2113	/*
2114	 * It's possible that we might encounter an error if the device
2115	 * does not have any unallocated space left. If so, we simply
2116	 * ignore that error and continue on.
2117	 */
2118	error = _efi_use_whole_disk(fd);
2119	(void) close(fd);
2120	if (error && error != VT_ENOSPC) {
2121		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
2122		    "relabel '%s': unable to read disk capacity"), name);
2123		return (zfs_error(hdl, EZFS_NOCAP, errbuf));
2124	}
2125	return (0);
2126}
2127
2128/*
2129 * Bring the specified vdev online.   The 'flags' parameter is a set of the
2130 * ZFS_ONLINE_* flags.
2131 */
2132int
2133zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags,
2134    vdev_state_t *newstate)
2135{
2136	zfs_cmd_t zc = { 0 };
2137	char msg[1024];
2138	nvlist_t *tgt;
2139	boolean_t avail_spare, l2cache, islog;
2140	libzfs_handle_t *hdl = zhp->zpool_hdl;
2141
2142	if (flags & ZFS_ONLINE_EXPAND) {
2143		(void) snprintf(msg, sizeof (msg),
2144		    dgettext(TEXT_DOMAIN, "cannot expand %s"), path);
2145	} else {
2146		(void) snprintf(msg, sizeof (msg),
2147		    dgettext(TEXT_DOMAIN, "cannot online %s"), path);
2148	}
2149
2150	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2151	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
2152	    &islog)) == NULL)
2153		return (zfs_error(hdl, EZFS_NODEVICE, msg));
2154
2155	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2156
2157	if (avail_spare)
2158		return (zfs_error(hdl, EZFS_ISSPARE, msg));
2159
2160	if (flags & ZFS_ONLINE_EXPAND ||
2161	    zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOEXPAND, NULL)) {
2162		char *pathname = NULL;
2163		uint64_t wholedisk = 0;
2164
2165		(void) nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_WHOLE_DISK,
2166		    &wholedisk);
2167		verify(nvlist_lookup_string(tgt, ZPOOL_CONFIG_PATH,
2168		    &pathname) == 0);
2169
2170		/*
2171		 * XXX - L2ARC 1.0 devices can't support expansion.
2172		 */
2173		if (l2cache) {
2174			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2175			    "cannot expand cache devices"));
2176			return (zfs_error(hdl, EZFS_VDEVNOTSUP, msg));
2177		}
2178
2179		if (wholedisk) {
2180			pathname += strlen(DISK_ROOT) + 1;
2181			(void) zpool_relabel_disk(hdl, pathname);
2182		}
2183	}
2184
2185	zc.zc_cookie = VDEV_STATE_ONLINE;
2186	zc.zc_obj = flags;
2187
2188	if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) != 0) {
2189		if (errno == EINVAL) {
2190			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "was split "
2191			    "from this pool into a new one.  Use '%s' "
2192			    "instead"), "zpool detach");
2193			return (zfs_error(hdl, EZFS_POSTSPLIT_ONLINE, msg));
2194		}
2195		return (zpool_standard_error(hdl, errno, msg));
2196	}
2197
2198	*newstate = zc.zc_cookie;
2199	return (0);
2200}
2201
2202/*
2203 * Take the specified vdev offline
2204 */
2205int
2206zpool_vdev_offline(zpool_handle_t *zhp, const char *path, boolean_t istmp)
2207{
2208	zfs_cmd_t zc = { 0 };
2209	char msg[1024];
2210	nvlist_t *tgt;
2211	boolean_t avail_spare, l2cache;
2212	libzfs_handle_t *hdl = zhp->zpool_hdl;
2213
2214	(void) snprintf(msg, sizeof (msg),
2215	    dgettext(TEXT_DOMAIN, "cannot offline %s"), path);
2216
2217	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2218	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
2219	    NULL)) == NULL)
2220		return (zfs_error(hdl, EZFS_NODEVICE, msg));
2221
2222	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2223
2224	if (avail_spare)
2225		return (zfs_error(hdl, EZFS_ISSPARE, msg));
2226
2227	zc.zc_cookie = VDEV_STATE_OFFLINE;
2228	zc.zc_obj = istmp ? ZFS_OFFLINE_TEMPORARY : 0;
2229
2230	if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
2231		return (0);
2232
2233	switch (errno) {
2234	case EBUSY:
2235
2236		/*
2237		 * There are no other replicas of this device.
2238		 */
2239		return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
2240
2241	case EEXIST:
2242		/*
2243		 * The log device has unplayed logs
2244		 */
2245		return (zfs_error(hdl, EZFS_UNPLAYED_LOGS, msg));
2246
2247	default:
2248		return (zpool_standard_error(hdl, errno, msg));
2249	}
2250}
2251
2252/*
2253 * Mark the given vdev faulted.
2254 */
2255int
2256zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
2257{
2258	zfs_cmd_t zc = { 0 };
2259	char msg[1024];
2260	libzfs_handle_t *hdl = zhp->zpool_hdl;
2261
2262	(void) snprintf(msg, sizeof (msg),
2263	    dgettext(TEXT_DOMAIN, "cannot fault %llu"), guid);
2264
2265	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2266	zc.zc_guid = guid;
2267	zc.zc_cookie = VDEV_STATE_FAULTED;
2268	zc.zc_obj = aux;
2269
2270	if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
2271		return (0);
2272
2273	switch (errno) {
2274	case EBUSY:
2275
2276		/*
2277		 * There are no other replicas of this device.
2278		 */
2279		return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
2280
2281	default:
2282		return (zpool_standard_error(hdl, errno, msg));
2283	}
2284
2285}
2286
2287/*
2288 * Mark the given vdev degraded.
2289 */
2290int
2291zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
2292{
2293	zfs_cmd_t zc = { 0 };
2294	char msg[1024];
2295	libzfs_handle_t *hdl = zhp->zpool_hdl;
2296
2297	(void) snprintf(msg, sizeof (msg),
2298	    dgettext(TEXT_DOMAIN, "cannot degrade %llu"), guid);
2299
2300	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2301	zc.zc_guid = guid;
2302	zc.zc_cookie = VDEV_STATE_DEGRADED;
2303	zc.zc_obj = aux;
2304
2305	if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
2306		return (0);
2307
2308	return (zpool_standard_error(hdl, errno, msg));
2309}
2310
2311/*
2312 * Returns TRUE if the given nvlist is a vdev that was originally swapped in as
2313 * a hot spare.
2314 */
2315static boolean_t
2316is_replacing_spare(nvlist_t *search, nvlist_t *tgt, int which)
2317{
2318	nvlist_t **child;
2319	uint_t c, children;
2320	char *type;
2321
2322	if (nvlist_lookup_nvlist_array(search, ZPOOL_CONFIG_CHILDREN, &child,
2323	    &children) == 0) {
2324		verify(nvlist_lookup_string(search, ZPOOL_CONFIG_TYPE,
2325		    &type) == 0);
2326
2327		if (strcmp(type, VDEV_TYPE_SPARE) == 0 &&
2328		    children == 2 && child[which] == tgt)
2329			return (B_TRUE);
2330
2331		for (c = 0; c < children; c++)
2332			if (is_replacing_spare(child[c], tgt, which))
2333				return (B_TRUE);
2334	}
2335
2336	return (B_FALSE);
2337}
2338
2339/*
2340 * Attach new_disk (fully described by nvroot) to old_disk.
2341 * If 'replacing' is specified, the new disk will replace the old one.
2342 */
2343int
2344zpool_vdev_attach(zpool_handle_t *zhp,
2345    const char *old_disk, const char *new_disk, nvlist_t *nvroot, int replacing)
2346{
2347	zfs_cmd_t zc = { 0 };
2348	char msg[1024];
2349	int ret;
2350	nvlist_t *tgt;
2351	boolean_t avail_spare, l2cache, islog;
2352	uint64_t val;
2353	char *newname;
2354	nvlist_t **child;
2355	uint_t children;
2356	nvlist_t *config_root;
2357	libzfs_handle_t *hdl = zhp->zpool_hdl;
2358	boolean_t rootpool = pool_is_bootable(zhp);
2359
2360	if (replacing)
2361		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
2362		    "cannot replace %s with %s"), old_disk, new_disk);
2363	else
2364		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
2365		    "cannot attach %s to %s"), new_disk, old_disk);
2366
2367	/*
2368	 * If this is a root pool, make sure that we're not attaching an
2369	 * EFI labeled device.
2370	 */
2371	if (rootpool && pool_uses_efi(nvroot)) {
2372		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2373		    "EFI labeled devices are not supported on root pools."));
2374		return (zfs_error(hdl, EZFS_POOL_NOTSUP, msg));
2375	}
2376
2377	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2378	if ((tgt = zpool_find_vdev(zhp, old_disk, &avail_spare, &l2cache,
2379	    &islog)) == 0)
2380		return (zfs_error(hdl, EZFS_NODEVICE, msg));
2381
2382	if (avail_spare)
2383		return (zfs_error(hdl, EZFS_ISSPARE, msg));
2384
2385	if (l2cache)
2386		return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
2387
2388	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2389	zc.zc_cookie = replacing;
2390
2391	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
2392	    &child, &children) != 0 || children != 1) {
2393		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2394		    "new device must be a single disk"));
2395		return (zfs_error(hdl, EZFS_INVALCONFIG, msg));
2396	}
2397
2398	verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
2399	    ZPOOL_CONFIG_VDEV_TREE, &config_root) == 0);
2400
2401	if ((newname = zpool_vdev_name(NULL, NULL, child[0], B_FALSE)) == NULL)
2402		return (-1);
2403
2404	/*
2405	 * If the target is a hot spare that has been swapped in, we can only
2406	 * replace it with another hot spare.
2407	 */
2408	if (replacing &&
2409	    nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_IS_SPARE, &val) == 0 &&
2410	    (zpool_find_vdev(zhp, newname, &avail_spare, &l2cache,
2411	    NULL) == NULL || !avail_spare) &&
2412	    is_replacing_spare(config_root, tgt, 1)) {
2413		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2414		    "can only be replaced by another hot spare"));
2415		free(newname);
2416		return (zfs_error(hdl, EZFS_BADTARGET, msg));
2417	}
2418
2419	free(newname);
2420
2421	if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
2422		return (-1);
2423
2424	ret = zfs_ioctl(hdl, ZFS_IOC_VDEV_ATTACH, &zc);
2425
2426	zcmd_free_nvlists(&zc);
2427
2428	if (ret == 0) {
2429		if (rootpool) {
2430			/*
2431			 * XXX need a better way to prevent user from
2432			 * booting up a half-baked vdev.
2433			 */
2434			(void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Make "
2435			    "sure to wait until resilver is done "
2436			    "before rebooting.\n"));
2437		}
2438		return (0);
2439	}
2440
2441	switch (errno) {
2442	case ENOTSUP:
2443		/*
2444		 * Can't attach to or replace this type of vdev.
2445		 */
2446		if (replacing) {
2447			uint64_t version = zpool_get_prop_int(zhp,
2448			    ZPOOL_PROP_VERSION, NULL);
2449
2450			if (islog)
2451				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2452				    "cannot replace a log with a spare"));
2453			else if (version >= SPA_VERSION_MULTI_REPLACE)
2454				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2455				    "already in replacing/spare config; wait "
2456				    "for completion or use 'zpool detach'"));
2457			else
2458				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2459				    "cannot replace a replacing device"));
2460		} else {
2461			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2462			    "can only attach to mirrors and top-level "
2463			    "disks"));
2464		}
2465		(void) zfs_error(hdl, EZFS_BADTARGET, msg);
2466		break;
2467
2468	case EINVAL:
2469		/*
2470		 * The new device must be a single disk.
2471		 */
2472		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2473		    "new device must be a single disk"));
2474		(void) zfs_error(hdl, EZFS_INVALCONFIG, msg);
2475		break;
2476
2477	case EBUSY:
2478		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "%s is busy"),
2479		    new_disk);
2480		(void) zfs_error(hdl, EZFS_BADDEV, msg);
2481		break;
2482
2483	case EOVERFLOW:
2484		/*
2485		 * The new device is too small.
2486		 */
2487		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2488		    "device is too small"));
2489		(void) zfs_error(hdl, EZFS_BADDEV, msg);
2490		break;
2491
2492	case EDOM:
2493		/*
2494		 * The new device has a different alignment requirement.
2495		 */
2496		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2497		    "devices have different sector alignment"));
2498		(void) zfs_error(hdl, EZFS_BADDEV, msg);
2499		break;
2500
2501	case ENAMETOOLONG:
2502		/*
2503		 * The resulting top-level vdev spec won't fit in the label.
2504		 */
2505		(void) zfs_error(hdl, EZFS_DEVOVERFLOW, msg);
2506		break;
2507
2508	default:
2509		(void) zpool_standard_error(hdl, errno, msg);
2510	}
2511
2512	return (-1);
2513}
2514
2515/*
2516 * Detach the specified device.
2517 */
2518int
2519zpool_vdev_detach(zpool_handle_t *zhp, const char *path)
2520{
2521	zfs_cmd_t zc = { 0 };
2522	char msg[1024];
2523	nvlist_t *tgt;
2524	boolean_t avail_spare, l2cache;
2525	libzfs_handle_t *hdl = zhp->zpool_hdl;
2526
2527	(void) snprintf(msg, sizeof (msg),
2528	    dgettext(TEXT_DOMAIN, "cannot detach %s"), path);
2529
2530	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2531	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
2532	    NULL)) == 0)
2533		return (zfs_error(hdl, EZFS_NODEVICE, msg));
2534
2535	if (avail_spare)
2536		return (zfs_error(hdl, EZFS_ISSPARE, msg));
2537
2538	if (l2cache)
2539		return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
2540
2541	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2542
2543	if (zfs_ioctl(hdl, ZFS_IOC_VDEV_DETACH, &zc) == 0)
2544		return (0);
2545
2546	switch (errno) {
2547
2548	case ENOTSUP:
2549		/*
2550		 * Can't detach from this type of vdev.
2551		 */
2552		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "only "
2553		    "applicable to mirror and replacing vdevs"));
2554		(void) zfs_error(hdl, EZFS_BADTARGET, msg);
2555		break;
2556
2557	case EBUSY:
2558		/*
2559		 * There are no other replicas of this device.
2560		 */
2561		(void) zfs_error(hdl, EZFS_NOREPLICAS, msg);
2562		break;
2563
2564	default:
2565		(void) zpool_standard_error(hdl, errno, msg);
2566	}
2567
2568	return (-1);
2569}
2570
2571/*
2572 * Find a mirror vdev in the source nvlist.
2573 *
2574 * The mchild array contains a list of disks in one of the top-level mirrors
2575 * of the source pool.  The schild array contains a list of disks that the
2576 * user specified on the command line.  We loop over the mchild array to
2577 * see if any entry in the schild array matches.
2578 *
2579 * If a disk in the mchild array is found in the schild array, we return
2580 * the index of that entry.  Otherwise we return -1.
2581 */
2582static int
2583find_vdev_entry(zpool_handle_t *zhp, nvlist_t **mchild, uint_t mchildren,
2584    nvlist_t **schild, uint_t schildren)
2585{
2586	uint_t mc;
2587
2588	for (mc = 0; mc < mchildren; mc++) {
2589		uint_t sc;
2590		char *mpath = zpool_vdev_name(zhp->zpool_hdl, zhp,
2591		    mchild[mc], B_FALSE);
2592
2593		for (sc = 0; sc < schildren; sc++) {
2594			char *spath = zpool_vdev_name(zhp->zpool_hdl, zhp,
2595			    schild[sc], B_FALSE);
2596			boolean_t result = (strcmp(mpath, spath) == 0);
2597
2598			free(spath);
2599			if (result) {
2600				free(mpath);
2601				return (mc);
2602			}
2603		}
2604
2605		free(mpath);
2606	}
2607
2608	return (-1);
2609}
2610
2611/*
2612 * Split a mirror pool.  If newroot points to null, then a new nvlist
2613 * is generated and it is the responsibility of the caller to free it.
2614 */
2615int
2616zpool_vdev_split(zpool_handle_t *zhp, char *newname, nvlist_t **newroot,
2617    nvlist_t *props, splitflags_t flags)
2618{
2619	zfs_cmd_t zc = { 0 };
2620	char msg[1024];
2621	nvlist_t *tree, *config, **child, **newchild, *newconfig = NULL;
2622	nvlist_t **varray = NULL, *zc_props = NULL;
2623	uint_t c, children, newchildren, lastlog = 0, vcount, found = 0;
2624	libzfs_handle_t *hdl = zhp->zpool_hdl;
2625	uint64_t vers;
2626	boolean_t freelist = B_FALSE, memory_err = B_TRUE;
2627	int retval = 0;
2628
2629	(void) snprintf(msg, sizeof (msg),
2630	    dgettext(TEXT_DOMAIN, "Unable to split %s"), zhp->zpool_name);
2631
2632	if (!zpool_name_valid(hdl, B_FALSE, newname))
2633		return (zfs_error(hdl, EZFS_INVALIDNAME, msg));
2634
2635	if ((config = zpool_get_config(zhp, NULL)) == NULL) {
2636		(void) fprintf(stderr, gettext("Internal error: unable to "
2637		    "retrieve pool configuration\n"));
2638		return (-1);
2639	}
2640
2641	verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &tree)
2642	    == 0);
2643	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &vers) == 0);
2644
2645	if (props) {
2646		prop_flags_t flags = { .create = B_FALSE, .import = B_TRUE };
2647		if ((zc_props = zpool_valid_proplist(hdl, zhp->zpool_name,
2648		    props, vers, flags, msg)) == NULL)
2649			return (-1);
2650	}
2651
2652	if (nvlist_lookup_nvlist_array(tree, ZPOOL_CONFIG_CHILDREN, &child,
2653	    &children) != 0) {
2654		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2655		    "Source pool is missing vdev tree"));
2656		if (zc_props)
2657			nvlist_free(zc_props);
2658		return (-1);
2659	}
2660
2661	varray = zfs_alloc(hdl, children * sizeof (nvlist_t *));
2662	vcount = 0;
2663
2664	if (*newroot == NULL ||
2665	    nvlist_lookup_nvlist_array(*newroot, ZPOOL_CONFIG_CHILDREN,
2666	    &newchild, &newchildren) != 0)
2667		newchildren = 0;
2668
2669	for (c = 0; c < children; c++) {
2670		uint64_t is_log = B_FALSE, is_hole = B_FALSE;
2671		char *type;
2672		nvlist_t **mchild, *vdev;
2673		uint_t mchildren;
2674		int entry;
2675
2676		/*
2677		 * Unlike cache & spares, slogs are stored in the
2678		 * ZPOOL_CONFIG_CHILDREN array.  We filter them out here.
2679		 */
2680		(void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
2681		    &is_log);
2682		(void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_HOLE,
2683		    &is_hole);
2684		if (is_log || is_hole) {
2685			/*
2686			 * Create a hole vdev and put it in the config.
2687			 */
2688			if (nvlist_alloc(&vdev, NV_UNIQUE_NAME, 0) != 0)
2689				goto out;
2690			if (nvlist_add_string(vdev, ZPOOL_CONFIG_TYPE,
2691			    VDEV_TYPE_HOLE) != 0)
2692				goto out;
2693			if (nvlist_add_uint64(vdev, ZPOOL_CONFIG_IS_HOLE,
2694			    1) != 0)
2695				goto out;
2696			if (lastlog == 0)
2697				lastlog = vcount;
2698			varray[vcount++] = vdev;
2699			continue;
2700		}
2701		lastlog = 0;
2702		verify(nvlist_lookup_string(child[c], ZPOOL_CONFIG_TYPE, &type)
2703		    == 0);
2704		if (strcmp(type, VDEV_TYPE_MIRROR) != 0) {
2705			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2706			    "Source pool must be composed only of mirrors\n"));
2707			retval = zfs_error(hdl, EZFS_INVALCONFIG, msg);
2708			goto out;
2709		}
2710
2711		verify(nvlist_lookup_nvlist_array(child[c],
2712		    ZPOOL_CONFIG_CHILDREN, &mchild, &mchildren) == 0);
2713
2714		/* find or add an entry for this top-level vdev */
2715		if (newchildren > 0 &&
2716		    (entry = find_vdev_entry(zhp, mchild, mchildren,
2717		    newchild, newchildren)) >= 0) {
2718			/* We found a disk that the user specified. */
2719			vdev = mchild[entry];
2720			++found;
2721		} else {
2722			/* User didn't specify a disk for this vdev. */
2723			vdev = mchild[mchildren - 1];
2724		}
2725
2726		if (nvlist_dup(vdev, &varray[vcount++], 0) != 0)
2727			goto out;
2728	}
2729
2730	/* did we find every disk the user specified? */
2731	if (found != newchildren) {
2732		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "Device list must "
2733		    "include at most one disk from each mirror"));
2734		retval = zfs_error(hdl, EZFS_INVALCONFIG, msg);
2735		goto out;
2736	}
2737
2738	/* Prepare the nvlist for populating. */
2739	if (*newroot == NULL) {
2740		if (nvlist_alloc(newroot, NV_UNIQUE_NAME, 0) != 0)
2741			goto out;
2742		freelist = B_TRUE;
2743		if (nvlist_add_string(*newroot, ZPOOL_CONFIG_TYPE,
2744		    VDEV_TYPE_ROOT) != 0)
2745			goto out;
2746	} else {
2747		verify(nvlist_remove_all(*newroot, ZPOOL_CONFIG_CHILDREN) == 0);
2748	}
2749
2750	/* Add all the children we found */
2751	if (nvlist_add_nvlist_array(*newroot, ZPOOL_CONFIG_CHILDREN, varray,
2752	    lastlog == 0 ? vcount : lastlog) != 0)
2753		goto out;
2754
2755	/*
2756	 * If we're just doing a dry run, exit now with success.
2757	 */
2758	if (flags.dryrun) {
2759		memory_err = B_FALSE;
2760		freelist = B_FALSE;
2761		goto out;
2762	}
2763
2764	/* now build up the config list & call the ioctl */
2765	if (nvlist_alloc(&newconfig, NV_UNIQUE_NAME, 0) != 0)
2766		goto out;
2767
2768	if (nvlist_add_nvlist(newconfig,
2769	    ZPOOL_CONFIG_VDEV_TREE, *newroot) != 0 ||
2770	    nvlist_add_string(newconfig,
2771	    ZPOOL_CONFIG_POOL_NAME, newname) != 0 ||
2772	    nvlist_add_uint64(newconfig, ZPOOL_CONFIG_VERSION, vers) != 0)
2773		goto out;
2774
2775	/*
2776	 * The new pool is automatically part of the namespace unless we
2777	 * explicitly export it.
2778	 */
2779	if (!flags.import)
2780		zc.zc_cookie = ZPOOL_EXPORT_AFTER_SPLIT;
2781	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2782	(void) strlcpy(zc.zc_string, newname, sizeof (zc.zc_string));
2783	if (zcmd_write_conf_nvlist(hdl, &zc, newconfig) != 0)
2784		goto out;
2785	if (zc_props != NULL && zcmd_write_src_nvlist(hdl, &zc, zc_props) != 0)
2786		goto out;
2787
2788	if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SPLIT, &zc) != 0) {
2789		retval = zpool_standard_error(hdl, errno, msg);
2790		goto out;
2791	}
2792
2793	freelist = B_FALSE;
2794	memory_err = B_FALSE;
2795
2796out:
2797	if (varray != NULL) {
2798		int v;
2799
2800		for (v = 0; v < vcount; v++)
2801			nvlist_free(varray[v]);
2802		free(varray);
2803	}
2804	zcmd_free_nvlists(&zc);
2805	if (zc_props)
2806		nvlist_free(zc_props);
2807	if (newconfig)
2808		nvlist_free(newconfig);
2809	if (freelist) {
2810		nvlist_free(*newroot);
2811		*newroot = NULL;
2812	}
2813
2814	if (retval != 0)
2815		return (retval);
2816
2817	if (memory_err)
2818		return (no_memory(hdl));
2819
2820	return (0);
2821}
2822
2823/*
2824 * Remove the given device.  Currently, this is supported only for hot spares
2825 * and level 2 cache devices.
2826 */
2827int
2828zpool_vdev_remove(zpool_handle_t *zhp, const char *path)
2829{
2830	zfs_cmd_t zc = { 0 };
2831	char msg[1024];
2832	nvlist_t *tgt;
2833	boolean_t avail_spare, l2cache, islog;
2834	libzfs_handle_t *hdl = zhp->zpool_hdl;
2835	uint64_t version;
2836
2837	(void) snprintf(msg, sizeof (msg),
2838	    dgettext(TEXT_DOMAIN, "cannot remove %s"), path);
2839
2840	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2841	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
2842	    &islog)) == 0)
2843		return (zfs_error(hdl, EZFS_NODEVICE, msg));
2844	/*
2845	 * XXX - this should just go away.
2846	 */
2847	if (!avail_spare && !l2cache && !islog) {
2848		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2849		    "only inactive hot spares, cache, top-level, "
2850		    "or log devices can be removed"));
2851		return (zfs_error(hdl, EZFS_NODEVICE, msg));
2852	}
2853
2854	version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL);
2855	if (islog && version < SPA_VERSION_HOLES) {
2856		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2857		    "pool must be upgrade to support log removal"));
2858		return (zfs_error(hdl, EZFS_BADVERSION, msg));
2859	}
2860
2861	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2862
2863	if (zfs_ioctl(hdl, ZFS_IOC_VDEV_REMOVE, &zc) == 0)
2864		return (0);
2865
2866	return (zpool_standard_error(hdl, errno, msg));
2867}
2868
2869/*
2870 * Clear the errors for the pool, or the particular device if specified.
2871 */
2872int
2873zpool_clear(zpool_handle_t *zhp, const char *path, nvlist_t *rewindnvl)
2874{
2875	zfs_cmd_t zc = { 0 };
2876	char msg[1024];
2877	nvlist_t *tgt;
2878	zpool_rewind_policy_t policy;
2879	boolean_t avail_spare, l2cache;
2880	libzfs_handle_t *hdl = zhp->zpool_hdl;
2881	nvlist_t *nvi = NULL;
2882	int error;
2883
2884	if (path)
2885		(void) snprintf(msg, sizeof (msg),
2886		    dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
2887		    path);
2888	else
2889		(void) snprintf(msg, sizeof (msg),
2890		    dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
2891		    zhp->zpool_name);
2892
2893	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2894	if (path) {
2895		if ((tgt = zpool_find_vdev(zhp, path, &avail_spare,
2896		    &l2cache, NULL)) == 0)
2897			return (zfs_error(hdl, EZFS_NODEVICE, msg));
2898
2899		/*
2900		 * Don't allow error clearing for hot spares.  Do allow
2901		 * error clearing for l2cache devices.
2902		 */
2903		if (avail_spare)
2904			return (zfs_error(hdl, EZFS_ISSPARE, msg));
2905
2906		verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID,
2907		    &zc.zc_guid) == 0);
2908	}
2909
2910	zpool_get_rewind_policy(rewindnvl, &policy);
2911	zc.zc_cookie = policy.zrp_request;
2912
2913	if (zcmd_alloc_dst_nvlist(hdl, &zc, zhp->zpool_config_size * 2) != 0)
2914		return (-1);
2915
2916	if (zcmd_write_src_nvlist(hdl, &zc, rewindnvl) != 0)
2917		return (-1);
2918
2919	while ((error = zfs_ioctl(hdl, ZFS_IOC_CLEAR, &zc)) != 0 &&
2920	    errno == ENOMEM) {
2921		if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
2922			zcmd_free_nvlists(&zc);
2923			return (-1);
2924		}
2925	}
2926
2927	if (!error || ((policy.zrp_request & ZPOOL_TRY_REWIND) &&
2928	    errno != EPERM && errno != EACCES)) {
2929		if (policy.zrp_request &
2930		    (ZPOOL_DO_REWIND | ZPOOL_TRY_REWIND)) {
2931			(void) zcmd_read_dst_nvlist(hdl, &zc, &nvi);
2932			zpool_rewind_exclaim(hdl, zc.zc_name,
2933			    ((policy.zrp_request & ZPOOL_TRY_REWIND) != 0),
2934			    nvi);
2935			nvlist_free(nvi);
2936		}
2937		zcmd_free_nvlists(&zc);
2938		return (0);
2939	}
2940
2941	zcmd_free_nvlists(&zc);
2942	return (zpool_standard_error(hdl, errno, msg));
2943}
2944
2945/*
2946 * Similar to zpool_clear(), but takes a GUID (used by fmd).
2947 */
2948int
2949zpool_vdev_clear(zpool_handle_t *zhp, uint64_t guid)
2950{
2951	zfs_cmd_t zc = { 0 };
2952	char msg[1024];
2953	libzfs_handle_t *hdl = zhp->zpool_hdl;
2954
2955	(void) snprintf(msg, sizeof (msg),
2956	    dgettext(TEXT_DOMAIN, "cannot clear errors for %llx"),
2957	    guid);
2958
2959	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2960	zc.zc_guid = guid;
2961	zc.zc_cookie = ZPOOL_NO_REWIND;
2962
2963	if (ioctl(hdl->libzfs_fd, ZFS_IOC_CLEAR, &zc) == 0)
2964		return (0);
2965
2966	return (zpool_standard_error(hdl, errno, msg));
2967}
2968
2969/*
2970 * Convert from a devid string to a path.
2971 */
2972static char *
2973devid_to_path(char *devid_str)
2974{
2975	ddi_devid_t devid;
2976	char *minor;
2977	char *path;
2978	devid_nmlist_t *list = NULL;
2979	int ret;
2980
2981	if (devid_str_decode(devid_str, &devid, &minor) != 0)
2982		return (NULL);
2983
2984	ret = devid_deviceid_to_nmlist("/dev", devid, minor, &list);
2985
2986	devid_str_free(minor);
2987	devid_free(devid);
2988
2989	if (ret != 0)
2990		return (NULL);
2991
2992	if ((path = strdup(list[0].devname)) == NULL)
2993		return (NULL);
2994
2995	devid_free_nmlist(list);
2996
2997	return (path);
2998}
2999
3000/*
3001 * Convert from a path to a devid string.
3002 */
3003static char *
3004path_to_devid(const char *path)
3005{
3006	int fd;
3007	ddi_devid_t devid;
3008	char *minor, *ret;
3009
3010	if ((fd = open(path, O_RDONLY)) < 0)
3011		return (NULL);
3012
3013	minor = NULL;
3014	ret = NULL;
3015	if (devid_get(fd, &devid) == 0) {
3016		if (devid_get_minor_name(fd, &minor) == 0)
3017			ret = devid_str_encode(devid, minor);
3018		if (minor != NULL)
3019			devid_str_free(minor);
3020		devid_free(devid);
3021	}
3022	(void) close(fd);
3023
3024	return (ret);
3025}
3026
3027/*
3028 * Issue the necessary ioctl() to update the stored path value for the vdev.  We
3029 * ignore any failure here, since a common case is for an unprivileged user to
3030 * type 'zpool status', and we'll display the correct information anyway.
3031 */
3032static void
3033set_path(zpool_handle_t *zhp, nvlist_t *nv, const char *path)
3034{
3035	zfs_cmd_t zc = { 0 };
3036
3037	(void) strncpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3038	(void) strncpy(zc.zc_value, path, sizeof (zc.zc_value));
3039	verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
3040	    &zc.zc_guid) == 0);
3041
3042	(void) ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SETPATH, &zc);
3043}
3044
3045/*
3046 * Given a vdev, return the name to display in iostat.  If the vdev has a path,
3047 * we use that, stripping off any leading "/dev/dsk/"; if not, we use the type.
3048 * We also check if this is a whole disk, in which case we strip off the
3049 * trailing 's0' slice name.
3050 *
3051 * This routine is also responsible for identifying when disks have been
3052 * reconfigured in a new location.  The kernel will have opened the device by
3053 * devid, but the path will still refer to the old location.  To catch this, we
3054 * first do a path -> devid translation (which is fast for the common case).  If
3055 * the devid matches, we're done.  If not, we do a reverse devid -> path
3056 * translation and issue the appropriate ioctl() to update the path of the vdev.
3057 * If 'zhp' is NULL, then this is an exported pool, and we don't need to do any
3058 * of these checks.
3059 */
3060char *
3061zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv,
3062    boolean_t verbose)
3063{
3064	char *path, *devid;
3065	uint64_t value;
3066	char buf[64];
3067	vdev_stat_t *vs;
3068	uint_t vsc;
3069
3070	if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT,
3071	    &value) == 0) {
3072		verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
3073		    &value) == 0);
3074		(void) snprintf(buf, sizeof (buf), "%llu",
3075		    (u_longlong_t)value);
3076		path = buf;
3077	} else if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) {
3078
3079		/*
3080		 * If the device is dead (faulted, offline, etc) then don't
3081		 * bother opening it.  Otherwise we may be forcing the user to
3082		 * open a misbehaving device, which can have undesirable
3083		 * effects.
3084		 */
3085		if ((nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS,
3086		    (uint64_t **)&vs, &vsc) != 0 ||
3087		    vs->vs_state >= VDEV_STATE_DEGRADED) &&
3088		    zhp != NULL &&
3089		    nvlist_lookup_string(nv, ZPOOL_CONFIG_DEVID, &devid) == 0) {
3090			/*
3091			 * Determine if the current path is correct.
3092			 */
3093			char *newdevid = path_to_devid(path);
3094
3095			if (newdevid == NULL ||
3096			    strcmp(devid, newdevid) != 0) {
3097				char *newpath;
3098
3099				if ((newpath = devid_to_path(devid)) != NULL) {
3100					/*
3101					 * Update the path appropriately.
3102					 */
3103					set_path(zhp, nv, newpath);
3104					if (nvlist_add_string(nv,
3105					    ZPOOL_CONFIG_PATH, newpath) == 0)
3106						verify(nvlist_lookup_string(nv,
3107						    ZPOOL_CONFIG_PATH,
3108						    &path) == 0);
3109					free(newpath);
3110				}
3111			}
3112
3113			if (newdevid)
3114				devid_str_free(newdevid);
3115		}
3116
3117		if (strncmp(path, "/dev/dsk/", 9) == 0)
3118			path += 9;
3119
3120		if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
3121		    &value) == 0 && value) {
3122			int pathlen = strlen(path);
3123			char *tmp = zfs_strdup(hdl, path);
3124
3125			/*
3126			 * If it starts with c#, and ends with "s0", chop
3127			 * the "s0" off, or if it ends with "s0/old", remove
3128			 * the "s0" from the middle.
3129			 */
3130			if (CTD_CHECK(tmp)) {
3131				if (strcmp(&tmp[pathlen - 2], "s0") == 0) {
3132					tmp[pathlen - 2] = '\0';
3133				} else if (pathlen > 6 &&
3134				    strcmp(&tmp[pathlen - 6], "s0/old") == 0) {
3135					(void) strcpy(&tmp[pathlen - 6],
3136					    "/old");
3137				}
3138			}
3139			return (tmp);
3140		}
3141	} else {
3142		verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &path) == 0);
3143
3144		/*
3145		 * If it's a raidz device, we need to stick in the parity level.
3146		 */
3147		if (strcmp(path, VDEV_TYPE_RAIDZ) == 0) {
3148			verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY,
3149			    &value) == 0);
3150			(void) snprintf(buf, sizeof (buf), "%s%llu", path,
3151			    (u_longlong_t)value);
3152			path = buf;
3153		}
3154
3155		/*
3156		 * We identify each top-level vdev by using a <type-id>
3157		 * naming convention.
3158		 */
3159		if (verbose) {
3160			uint64_t id;
3161
3162			verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID,
3163			    &id) == 0);
3164			(void) snprintf(buf, sizeof (buf), "%s-%llu", path,
3165			    (u_longlong_t)id);
3166			path = buf;
3167		}
3168	}
3169
3170	return (zfs_strdup(hdl, path));
3171}
3172
3173static int
3174zbookmark_compare(const void *a, const void *b)
3175{
3176	return (memcmp(a, b, sizeof (zbookmark_t)));
3177}
3178
3179/*
3180 * Retrieve the persistent error log, uniquify the members, and return to the
3181 * caller.
3182 */
3183int
3184zpool_get_errlog(zpool_handle_t *zhp, nvlist_t **nverrlistp)
3185{
3186	zfs_cmd_t zc = { 0 };
3187	uint64_t count;
3188	zbookmark_t *zb = NULL;
3189	int i;
3190
3191	/*
3192	 * Retrieve the raw error list from the kernel.  If the number of errors
3193	 * has increased, allocate more space and continue until we get the
3194	 * entire list.
3195	 */
3196	verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_ERRCOUNT,
3197	    &count) == 0);
3198	if (count == 0)
3199		return (0);
3200	if ((zc.zc_nvlist_dst = (uintptr_t)zfs_alloc(zhp->zpool_hdl,
3201	    count * sizeof (zbookmark_t))) == (uintptr_t)NULL)
3202		return (-1);
3203	zc.zc_nvlist_dst_size = count;
3204	(void) strcpy(zc.zc_name, zhp->zpool_name);
3205	for (;;) {
3206		if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_ERROR_LOG,
3207		    &zc) != 0) {
3208			free((void *)(uintptr_t)zc.zc_nvlist_dst);
3209			if (errno == ENOMEM) {
3210				count = zc.zc_nvlist_dst_size;
3211				if ((zc.zc_nvlist_dst = (uintptr_t)
3212				    zfs_alloc(zhp->zpool_hdl, count *
3213				    sizeof (zbookmark_t))) == (uintptr_t)NULL)
3214					return (-1);
3215			} else {
3216				return (-1);
3217			}
3218		} else {
3219			break;
3220		}
3221	}
3222
3223	/*
3224	 * Sort the resulting bookmarks.  This is a little confusing due to the
3225	 * implementation of ZFS_IOC_ERROR_LOG.  The bookmarks are copied last
3226	 * to first, and 'zc_nvlist_dst_size' indicates the number of boomarks
3227	 * _not_ copied as part of the process.  So we point the start of our
3228	 * array appropriate and decrement the total number of elements.
3229	 */
3230	zb = ((zbookmark_t *)(uintptr_t)zc.zc_nvlist_dst) +
3231	    zc.zc_nvlist_dst_size;
3232	count -= zc.zc_nvlist_dst_size;
3233
3234	qsort(zb, count, sizeof (zbookmark_t), zbookmark_compare);
3235
3236	verify(nvlist_alloc(nverrlistp, 0, KM_SLEEP) == 0);
3237
3238	/*
3239	 * Fill in the nverrlistp with nvlist's of dataset and object numbers.
3240	 */
3241	for (i = 0; i < count; i++) {
3242		nvlist_t *nv;
3243
3244		/* ignoring zb_blkid and zb_level for now */
3245		if (i > 0 && zb[i-1].zb_objset == zb[i].zb_objset &&
3246		    zb[i-1].zb_object == zb[i].zb_object)
3247			continue;
3248
3249		if (nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) != 0)
3250			goto nomem;
3251		if (nvlist_add_uint64(nv, ZPOOL_ERR_DATASET,
3252		    zb[i].zb_objset) != 0) {
3253			nvlist_free(nv);
3254			goto nomem;
3255		}
3256		if (nvlist_add_uint64(nv, ZPOOL_ERR_OBJECT,
3257		    zb[i].zb_object) != 0) {
3258			nvlist_free(nv);
3259			goto nomem;
3260		}
3261		if (nvlist_add_nvlist(*nverrlistp, "ejk", nv) != 0) {
3262			nvlist_free(nv);
3263			goto nomem;
3264		}
3265		nvlist_free(nv);
3266	}
3267
3268	free((void *)(uintptr_t)zc.zc_nvlist_dst);
3269	return (0);
3270
3271nomem:
3272	free((void *)(uintptr_t)zc.zc_nvlist_dst);
3273	return (no_memory(zhp->zpool_hdl));
3274}
3275
3276/*
3277 * Upgrade a ZFS pool to the latest on-disk version.
3278 */
3279int
3280zpool_upgrade(zpool_handle_t *zhp, uint64_t new_version)
3281{
3282	zfs_cmd_t zc = { 0 };
3283	libzfs_handle_t *hdl = zhp->zpool_hdl;
3284
3285	(void) strcpy(zc.zc_name, zhp->zpool_name);
3286	zc.zc_cookie = new_version;
3287
3288	if (zfs_ioctl(hdl, ZFS_IOC_POOL_UPGRADE, &zc) != 0)
3289		return (zpool_standard_error_fmt(hdl, errno,
3290		    dgettext(TEXT_DOMAIN, "cannot upgrade '%s'"),
3291		    zhp->zpool_name));
3292	return (0);
3293}
3294
3295void
3296zpool_set_history_str(const char *subcommand, int argc, char **argv,
3297    char *history_str)
3298{
3299	int i;
3300
3301	(void) strlcpy(history_str, subcommand, HIS_MAX_RECORD_LEN);
3302	for (i = 1; i < argc; i++) {
3303		if (strlen(history_str) + 1 + strlen(argv[i]) >
3304		    HIS_MAX_RECORD_LEN)
3305			break;
3306		(void) strlcat(history_str, " ", HIS_MAX_RECORD_LEN);
3307		(void) strlcat(history_str, argv[i], HIS_MAX_RECORD_LEN);
3308	}
3309}
3310
3311/*
3312 * Stage command history for logging.
3313 */
3314int
3315zpool_stage_history(libzfs_handle_t *hdl, const char *history_str)
3316{
3317	if (history_str == NULL)
3318		return (EINVAL);
3319
3320	if (strlen(history_str) > HIS_MAX_RECORD_LEN)
3321		return (EINVAL);
3322
3323	if (hdl->libzfs_log_str != NULL)
3324		free(hdl->libzfs_log_str);
3325
3326	if ((hdl->libzfs_log_str = strdup(history_str)) == NULL)
3327		return (no_memory(hdl));
3328
3329	return (0);
3330}
3331
3332/*
3333 * Perform ioctl to get some command history of a pool.
3334 *
3335 * 'buf' is the buffer to fill up to 'len' bytes.  'off' is the
3336 * logical offset of the history buffer to start reading from.
3337 *
3338 * Upon return, 'off' is the next logical offset to read from and
3339 * 'len' is the actual amount of bytes read into 'buf'.
3340 */
3341static int
3342get_history(zpool_handle_t *zhp, char *buf, uint64_t *off, uint64_t *len)
3343{
3344	zfs_cmd_t zc = { 0 };
3345	libzfs_handle_t *hdl = zhp->zpool_hdl;
3346
3347	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3348
3349	zc.zc_history = (uint64_t)(uintptr_t)buf;
3350	zc.zc_history_len = *len;
3351	zc.zc_history_offset = *off;
3352
3353	if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_HISTORY, &zc) != 0) {
3354		switch (errno) {
3355		case EPERM:
3356			return (zfs_error_fmt(hdl, EZFS_PERM,
3357			    dgettext(TEXT_DOMAIN,
3358			    "cannot show history for pool '%s'"),
3359			    zhp->zpool_name));
3360		case ENOENT:
3361			return (zfs_error_fmt(hdl, EZFS_NOHISTORY,
3362			    dgettext(TEXT_DOMAIN, "cannot get history for pool "
3363			    "'%s'"), zhp->zpool_name));
3364		case ENOTSUP:
3365			return (zfs_error_fmt(hdl, EZFS_BADVERSION,
3366			    dgettext(TEXT_DOMAIN, "cannot get history for pool "
3367			    "'%s', pool must be upgraded"), zhp->zpool_name));
3368		default:
3369			return (zpool_standard_error_fmt(hdl, errno,
3370			    dgettext(TEXT_DOMAIN,
3371			    "cannot get history for '%s'"), zhp->zpool_name));
3372		}
3373	}
3374
3375	*len = zc.zc_history_len;
3376	*off = zc.zc_history_offset;
3377
3378	return (0);
3379}
3380
3381/*
3382 * Process the buffer of nvlists, unpacking and storing each nvlist record
3383 * into 'records'.  'leftover' is set to the number of bytes that weren't
3384 * processed as there wasn't a complete record.
3385 */
3386int
3387zpool_history_unpack(char *buf, uint64_t bytes_read, uint64_t *leftover,
3388    nvlist_t ***records, uint_t *numrecords)
3389{
3390	uint64_t reclen;
3391	nvlist_t *nv;
3392	int i;
3393
3394	while (bytes_read > sizeof (reclen)) {
3395
3396		/* get length of packed record (stored as little endian) */
3397		for (i = 0, reclen = 0; i < sizeof (reclen); i++)
3398			reclen += (uint64_t)(((uchar_t *)buf)[i]) << (8*i);
3399
3400		if (bytes_read < sizeof (reclen) + reclen)
3401			break;
3402
3403		/* unpack record */
3404		if (nvlist_unpack(buf + sizeof (reclen), reclen, &nv, 0) != 0)
3405			return (ENOMEM);
3406		bytes_read -= sizeof (reclen) + reclen;
3407		buf += sizeof (reclen) + reclen;
3408
3409		/* add record to nvlist array */
3410		(*numrecords)++;
3411		if (ISP2(*numrecords + 1)) {
3412			*records = realloc(*records,
3413			    *numrecords * 2 * sizeof (nvlist_t *));
3414		}
3415		(*records)[*numrecords - 1] = nv;
3416	}
3417
3418	*leftover = bytes_read;
3419	return (0);
3420}
3421
3422#define	HIS_BUF_LEN	(128*1024)
3423
3424/*
3425 * Retrieve the command history of a pool.
3426 */
3427int
3428zpool_get_history(zpool_handle_t *zhp, nvlist_t **nvhisp)
3429{
3430	char buf[HIS_BUF_LEN];
3431	uint64_t off = 0;
3432	nvlist_t **records = NULL;
3433	uint_t numrecords = 0;
3434	int err, i;
3435
3436	do {
3437		uint64_t bytes_read = sizeof (buf);
3438		uint64_t leftover;
3439
3440		if ((err = get_history(zhp, buf, &off, &bytes_read)) != 0)
3441			break;
3442
3443		/* if nothing else was read in, we're at EOF, just return */
3444		if (!bytes_read)
3445			break;
3446
3447		if ((err = zpool_history_unpack(buf, bytes_read,
3448		    &leftover, &records, &numrecords)) != 0)
3449			break;
3450		off -= leftover;
3451
3452		/* CONSTCOND */
3453	} while (1);
3454
3455	if (!err) {
3456		verify(nvlist_alloc(nvhisp, NV_UNIQUE_NAME, 0) == 0);
3457		verify(nvlist_add_nvlist_array(*nvhisp, ZPOOL_HIST_RECORD,
3458		    records, numrecords) == 0);
3459	}
3460	for (i = 0; i < numrecords; i++)
3461		nvlist_free(records[i]);
3462	free(records);
3463
3464	return (err);
3465}
3466
3467void
3468zpool_obj_to_path(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj,
3469    char *pathname, size_t len)
3470{
3471	zfs_cmd_t zc = { 0 };
3472	boolean_t mounted = B_FALSE;
3473	char *mntpnt = NULL;
3474	char dsname[MAXNAMELEN];
3475
3476	if (dsobj == 0) {
3477		/* special case for the MOS */
3478		(void) snprintf(pathname, len, "<metadata>:<0x%llx>", obj);
3479		return;
3480	}
3481
3482	/* get the dataset's name */
3483	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3484	zc.zc_obj = dsobj;
3485	if (ioctl(zhp->zpool_hdl->libzfs_fd,
3486	    ZFS_IOC_DSOBJ_TO_DSNAME, &zc) != 0) {
3487		/* just write out a path of two object numbers */
3488		(void) snprintf(pathname, len, "<0x%llx>:<0x%llx>",
3489		    dsobj, obj);
3490		return;
3491	}
3492	(void) strlcpy(dsname, zc.zc_value, sizeof (dsname));
3493
3494	/* find out if the dataset is mounted */
3495	mounted = is_mounted(zhp->zpool_hdl, dsname, &mntpnt);
3496
3497	/* get the corrupted object's path */
3498	(void) strlcpy(zc.zc_name, dsname, sizeof (zc.zc_name));
3499	zc.zc_obj = obj;
3500	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_OBJ_TO_PATH,
3501	    &zc) == 0) {
3502		if (mounted) {
3503			(void) snprintf(pathname, len, "%s%s", mntpnt,
3504			    zc.zc_value);
3505		} else {
3506			(void) snprintf(pathname, len, "%s:%s",
3507			    dsname, zc.zc_value);
3508		}
3509	} else {
3510		(void) snprintf(pathname, len, "%s:<0x%llx>", dsname, obj);
3511	}
3512	free(mntpnt);
3513}
3514
3515/*
3516 * Read the EFI label from the config, if a label does not exist then
3517 * pass back the error to the caller. If the caller has passed a non-NULL
3518 * diskaddr argument then we set it to the starting address of the EFI
3519 * partition.
3520 */
3521static int
3522read_efi_label(nvlist_t *config, diskaddr_t *sb)
3523{
3524	char *path;
3525	int fd;
3526	char diskname[MAXPATHLEN];
3527	int err = -1;
3528
3529	if (nvlist_lookup_string(config, ZPOOL_CONFIG_PATH, &path) != 0)
3530		return (err);
3531
3532	(void) snprintf(diskname, sizeof (diskname), "%s%s", RDISK_ROOT,
3533	    strrchr(path, '/'));
3534	if ((fd = open(diskname, O_RDONLY|O_NDELAY)) >= 0) {
3535		struct dk_gpt *vtoc;
3536
3537		if ((err = efi_alloc_and_read(fd, &vtoc)) >= 0) {
3538			if (sb != NULL)
3539				*sb = vtoc->efi_parts[0].p_start;
3540			efi_free(vtoc);
3541		}
3542		(void) close(fd);
3543	}
3544	return (err);
3545}
3546
3547/*
3548 * determine where a partition starts on a disk in the current
3549 * configuration
3550 */
3551static diskaddr_t
3552find_start_block(nvlist_t *config)
3553{
3554	nvlist_t **child;
3555	uint_t c, children;
3556	diskaddr_t sb = MAXOFFSET_T;
3557	uint64_t wholedisk;
3558
3559	if (nvlist_lookup_nvlist_array(config,
3560	    ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) {
3561		if (nvlist_lookup_uint64(config,
3562		    ZPOOL_CONFIG_WHOLE_DISK,
3563		    &wholedisk) != 0 || !wholedisk) {
3564			return (MAXOFFSET_T);
3565		}
3566		if (read_efi_label(config, &sb) < 0)
3567			sb = MAXOFFSET_T;
3568		return (sb);
3569	}
3570
3571	for (c = 0; c < children; c++) {
3572		sb = find_start_block(child[c]);
3573		if (sb != MAXOFFSET_T) {
3574			return (sb);
3575		}
3576	}
3577	return (MAXOFFSET_T);
3578}
3579
3580/*
3581 * Label an individual disk.  The name provided is the short name,
3582 * stripped of any leading /dev path.
3583 */
3584int
3585zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name)
3586{
3587	char path[MAXPATHLEN];
3588	struct dk_gpt *vtoc;
3589	int fd;
3590	size_t resv = EFI_MIN_RESV_SIZE;
3591	uint64_t slice_size;
3592	diskaddr_t start_block;
3593	char errbuf[1024];
3594
3595	/* prepare an error message just in case */
3596	(void) snprintf(errbuf, sizeof (errbuf),
3597	    dgettext(TEXT_DOMAIN, "cannot label '%s'"), name);
3598
3599	if (zhp) {
3600		nvlist_t *nvroot;
3601
3602		if (pool_is_bootable(zhp)) {
3603			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3604			    "EFI labeled devices are not supported on root "
3605			    "pools."));
3606			return (zfs_error(hdl, EZFS_POOL_NOTSUP, errbuf));
3607		}
3608
3609		verify(nvlist_lookup_nvlist(zhp->zpool_config,
3610		    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
3611
3612		if (zhp->zpool_start_block == 0)
3613			start_block = find_start_block(nvroot);
3614		else
3615			start_block = zhp->zpool_start_block;
3616		zhp->zpool_start_block = start_block;
3617	} else {
3618		/* new pool */
3619		start_block = NEW_START_BLOCK;
3620	}
3621
3622	(void) snprintf(path, sizeof (path), "%s/%s%s", RDISK_ROOT, name,
3623	    BACKUP_SLICE);
3624
3625	if ((fd = open(path, O_RDWR | O_NDELAY)) < 0) {
3626		/*
3627		 * This shouldn't happen.  We've long since verified that this
3628		 * is a valid device.
3629		 */
3630		zfs_error_aux(hdl,
3631		    dgettext(TEXT_DOMAIN, "unable to open device"));
3632		return (zfs_error(hdl, EZFS_OPENFAILED, errbuf));
3633	}
3634
3635	if (efi_alloc_and_init(fd, EFI_NUMPAR, &vtoc) != 0) {
3636		/*
3637		 * The only way this can fail is if we run out of memory, or we
3638		 * were unable to read the disk's capacity
3639		 */
3640		if (errno == ENOMEM)
3641			(void) no_memory(hdl);
3642
3643		(void) close(fd);
3644		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3645		    "unable to read disk capacity"), name);
3646
3647		return (zfs_error(hdl, EZFS_NOCAP, errbuf));
3648	}
3649
3650	slice_size = vtoc->efi_last_u_lba + 1;
3651	slice_size -= EFI_MIN_RESV_SIZE;
3652	if (start_block == MAXOFFSET_T)
3653		start_block = NEW_START_BLOCK;
3654	slice_size -= start_block;
3655
3656	vtoc->efi_parts[0].p_start = start_block;
3657	vtoc->efi_parts[0].p_size = slice_size;
3658
3659	/*
3660	 * Why we use V_USR: V_BACKUP confuses users, and is considered
3661	 * disposable by some EFI utilities (since EFI doesn't have a backup
3662	 * slice).  V_UNASSIGNED is supposed to be used only for zero size
3663	 * partitions, and efi_write() will fail if we use it.  V_ROOT, V_BOOT,
3664	 * etc. were all pretty specific.  V_USR is as close to reality as we
3665	 * can get, in the absence of V_OTHER.
3666	 */
3667	vtoc->efi_parts[0].p_tag = V_USR;
3668	(void) strcpy(vtoc->efi_parts[0].p_name, "zfs");
3669
3670	vtoc->efi_parts[8].p_start = slice_size + start_block;
3671	vtoc->efi_parts[8].p_size = resv;
3672	vtoc->efi_parts[8].p_tag = V_RESERVED;
3673
3674	if (efi_write(fd, vtoc) != 0) {
3675		/*
3676		 * Some block drivers (like pcata) may not support EFI
3677		 * GPT labels.  Print out a helpful error message dir-
3678		 * ecting the user to manually label the disk and give
3679		 * a specific slice.
3680		 */
3681		(void) close(fd);
3682		efi_free(vtoc);
3683
3684		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3685		    "try using fdisk(1M) and then provide a specific slice"));
3686		return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
3687	}
3688
3689	(void) close(fd);
3690	efi_free(vtoc);
3691	return (0);
3692}
3693
3694static boolean_t
3695supported_dump_vdev_type(libzfs_handle_t *hdl, nvlist_t *config, char *errbuf)
3696{
3697	char *type;
3698	nvlist_t **child;
3699	uint_t children, c;
3700
3701	verify(nvlist_lookup_string(config, ZPOOL_CONFIG_TYPE, &type) == 0);
3702	if (strcmp(type, VDEV_TYPE_RAIDZ) == 0 ||
3703	    strcmp(type, VDEV_TYPE_FILE) == 0 ||
3704	    strcmp(type, VDEV_TYPE_LOG) == 0 ||
3705	    strcmp(type, VDEV_TYPE_HOLE) == 0 ||
3706	    strcmp(type, VDEV_TYPE_MISSING) == 0) {
3707		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3708		    "vdev type '%s' is not supported"), type);
3709		(void) zfs_error(hdl, EZFS_VDEVNOTSUP, errbuf);
3710		return (B_FALSE);
3711	}
3712	if (nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_CHILDREN,
3713	    &child, &children) == 0) {
3714		for (c = 0; c < children; c++) {
3715			if (!supported_dump_vdev_type(hdl, child[c], errbuf))
3716				return (B_FALSE);
3717		}
3718	}
3719	return (B_TRUE);
3720}
3721
3722/*
3723 * check if this zvol is allowable for use as a dump device; zero if
3724 * it is, > 0 if it isn't, < 0 if it isn't a zvol
3725 */
3726int
3727zvol_check_dump_config(char *arg)
3728{
3729	zpool_handle_t *zhp = NULL;
3730	nvlist_t *config, *nvroot;
3731	char *p, *volname;
3732	nvlist_t **top;
3733	uint_t toplevels;
3734	libzfs_handle_t *hdl;
3735	char errbuf[1024];
3736	char poolname[ZPOOL_MAXNAMELEN];
3737	int pathlen = strlen(ZVOL_FULL_DEV_DIR);
3738	int ret = 1;
3739
3740	if (strncmp(arg, ZVOL_FULL_DEV_DIR, pathlen)) {
3741		return (-1);
3742	}
3743
3744	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
3745	    "dump is not supported on device '%s'"), arg);
3746
3747	if ((hdl = libzfs_init()) == NULL)
3748		return (1);
3749	libzfs_print_on_error(hdl, B_TRUE);
3750
3751	volname = arg + pathlen;
3752
3753	/* check the configuration of the pool */
3754	if ((p = strchr(volname, '/')) == NULL) {
3755		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3756		    "malformed dataset name"));
3757		(void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
3758		return (1);
3759	} else if (p - volname >= ZFS_MAXNAMELEN) {
3760		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3761		    "dataset name is too long"));
3762		(void) zfs_error(hdl, EZFS_NAMETOOLONG, errbuf);
3763		return (1);
3764	} else {
3765		(void) strncpy(poolname, volname, p - volname);
3766		poolname[p - volname] = '\0';
3767	}
3768
3769	if ((zhp = zpool_open(hdl, poolname)) == NULL) {
3770		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3771		    "could not open pool '%s'"), poolname);
3772		(void) zfs_error(hdl, EZFS_OPENFAILED, errbuf);
3773		goto out;
3774	}
3775	config = zpool_get_config(zhp, NULL);
3776	if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
3777	    &nvroot) != 0) {
3778		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3779		    "could not obtain vdev configuration for  '%s'"), poolname);
3780		(void) zfs_error(hdl, EZFS_INVALCONFIG, errbuf);
3781		goto out;
3782	}
3783
3784	verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
3785	    &top, &toplevels) == 0);
3786	if (toplevels != 1) {
3787		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3788		    "'%s' has multiple top level vdevs"), poolname);
3789		(void) zfs_error(hdl, EZFS_DEVOVERFLOW, errbuf);
3790		goto out;
3791	}
3792
3793	if (!supported_dump_vdev_type(hdl, top[0], errbuf)) {
3794		goto out;
3795	}
3796	ret = 0;
3797
3798out:
3799	if (zhp)
3800		zpool_close(zhp);
3801	libzfs_fini(hdl);
3802	return (ret);
3803}
3804