libzfs_pool.c revision 216291
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#include <sys/types.h>
28#include <sys/stat.h>
29#include <assert.h>
30#include <ctype.h>
31#include <errno.h>
32#include <devid.h>
33#include <dirent.h>
34#include <fcntl.h>
35#include <libintl.h>
36#include <stdio.h>
37#include <stdlib.h>
38#include <strings.h>
39#include <unistd.h>
40#include <zone.h>
41#include <sys/zfs_ioctl.h>
42#include <sys/zio.h>
43#include <umem.h>
44
45#include "zfs_namecheck.h"
46#include "zfs_prop.h"
47#include "libzfs_impl.h"
48
49static int read_efi_label(nvlist_t *config, diskaddr_t *sb);
50
51#ifdef sun
52#if defined(__i386) || defined(__amd64)
53#define	BOOTCMD	"installgrub(1M)"
54#else
55#define	BOOTCMD	"installboot(1M)"
56#endif
57#endif	/* sun */
58
59/*
60 * ====================================================================
61 *   zpool property functions
62 * ====================================================================
63 */
64
65static int
66zpool_get_all_props(zpool_handle_t *zhp)
67{
68	zfs_cmd_t zc = { 0 };
69	libzfs_handle_t *hdl = zhp->zpool_hdl;
70
71	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
72
73	if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0)
74		return (-1);
75
76	while (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_PROPS, &zc) != 0) {
77		if (errno == ENOMEM) {
78			if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
79				zcmd_free_nvlists(&zc);
80				return (-1);
81			}
82		} else {
83			zcmd_free_nvlists(&zc);
84			return (-1);
85		}
86	}
87
88	if (zcmd_read_dst_nvlist(hdl, &zc, &zhp->zpool_props) != 0) {
89		zcmd_free_nvlists(&zc);
90		return (-1);
91	}
92
93	zcmd_free_nvlists(&zc);
94
95	return (0);
96}
97
98static int
99zpool_props_refresh(zpool_handle_t *zhp)
100{
101	nvlist_t *old_props;
102
103	old_props = zhp->zpool_props;
104
105	if (zpool_get_all_props(zhp) != 0)
106		return (-1);
107
108	nvlist_free(old_props);
109	return (0);
110}
111
112static char *
113zpool_get_prop_string(zpool_handle_t *zhp, zpool_prop_t prop,
114    zprop_source_t *src)
115{
116	nvlist_t *nv, *nvl;
117	uint64_t ival;
118	char *value;
119	zprop_source_t source;
120
121	nvl = zhp->zpool_props;
122	if (nvlist_lookup_nvlist(nvl, zpool_prop_to_name(prop), &nv) == 0) {
123		verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, &ival) == 0);
124		source = ival;
125		verify(nvlist_lookup_string(nv, ZPROP_VALUE, &value) == 0);
126	} else {
127		source = ZPROP_SRC_DEFAULT;
128		if ((value = (char *)zpool_prop_default_string(prop)) == NULL)
129			value = "-";
130	}
131
132	if (src)
133		*src = source;
134
135	return (value);
136}
137
138uint64_t
139zpool_get_prop_int(zpool_handle_t *zhp, zpool_prop_t prop, zprop_source_t *src)
140{
141	nvlist_t *nv, *nvl;
142	uint64_t value;
143	zprop_source_t source;
144
145	if (zhp->zpool_props == NULL && zpool_get_all_props(zhp)) {
146		/*
147		 * zpool_get_all_props() has most likely failed because
148		 * the pool is faulted, but if all we need is the top level
149		 * vdev's guid then get it from the zhp config nvlist.
150		 */
151		if ((prop == ZPOOL_PROP_GUID) &&
152		    (nvlist_lookup_nvlist(zhp->zpool_config,
153		    ZPOOL_CONFIG_VDEV_TREE, &nv) == 0) &&
154		    (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &value)
155		    == 0)) {
156			return (value);
157		}
158		return (zpool_prop_default_numeric(prop));
159	}
160
161	nvl = zhp->zpool_props;
162	if (nvlist_lookup_nvlist(nvl, zpool_prop_to_name(prop), &nv) == 0) {
163		verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, &value) == 0);
164		source = value;
165		verify(nvlist_lookup_uint64(nv, ZPROP_VALUE, &value) == 0);
166	} else {
167		source = ZPROP_SRC_DEFAULT;
168		value = zpool_prop_default_numeric(prop);
169	}
170
171	if (src)
172		*src = source;
173
174	return (value);
175}
176
177/*
178 * Map VDEV STATE to printed strings.
179 */
180char *
181zpool_state_to_name(vdev_state_t state, vdev_aux_t aux)
182{
183	switch (state) {
184	case VDEV_STATE_CLOSED:
185	case VDEV_STATE_OFFLINE:
186		return (gettext("OFFLINE"));
187	case VDEV_STATE_REMOVED:
188		return (gettext("REMOVED"));
189	case VDEV_STATE_CANT_OPEN:
190		if (aux == VDEV_AUX_CORRUPT_DATA || aux == VDEV_AUX_BAD_LOG)
191			return (gettext("FAULTED"));
192		else
193			return (gettext("UNAVAIL"));
194	case VDEV_STATE_FAULTED:
195		return (gettext("FAULTED"));
196	case VDEV_STATE_DEGRADED:
197		return (gettext("DEGRADED"));
198	case VDEV_STATE_HEALTHY:
199		return (gettext("ONLINE"));
200	}
201
202	return (gettext("UNKNOWN"));
203}
204
205/*
206 * Get a zpool property value for 'prop' and return the value in
207 * a pre-allocated buffer.
208 */
209int
210zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf, size_t len,
211    zprop_source_t *srctype)
212{
213	uint64_t intval;
214	const char *strval;
215	zprop_source_t src = ZPROP_SRC_NONE;
216	nvlist_t *nvroot;
217	vdev_stat_t *vs;
218	uint_t vsc;
219
220	if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) {
221		switch (prop) {
222		case ZPOOL_PROP_NAME:
223			(void) strlcpy(buf, zpool_get_name(zhp), len);
224			break;
225
226		case ZPOOL_PROP_HEALTH:
227			(void) strlcpy(buf, "FAULTED", len);
228			break;
229
230		case ZPOOL_PROP_GUID:
231			intval = zpool_get_prop_int(zhp, prop, &src);
232			(void) snprintf(buf, len, "%llu", intval);
233			break;
234
235		case ZPOOL_PROP_ALTROOT:
236		case ZPOOL_PROP_CACHEFILE:
237			if (zhp->zpool_props != NULL ||
238			    zpool_get_all_props(zhp) == 0) {
239				(void) strlcpy(buf,
240				    zpool_get_prop_string(zhp, prop, &src),
241				    len);
242				if (srctype != NULL)
243					*srctype = src;
244				return (0);
245			}
246			/* FALLTHROUGH */
247		default:
248			(void) strlcpy(buf, "-", len);
249			break;
250		}
251
252		if (srctype != NULL)
253			*srctype = src;
254		return (0);
255	}
256
257	if (zhp->zpool_props == NULL && zpool_get_all_props(zhp) &&
258	    prop != ZPOOL_PROP_NAME)
259		return (-1);
260
261	switch (zpool_prop_get_type(prop)) {
262	case PROP_TYPE_STRING:
263		(void) strlcpy(buf, zpool_get_prop_string(zhp, prop, &src),
264		    len);
265		break;
266
267	case PROP_TYPE_NUMBER:
268		intval = zpool_get_prop_int(zhp, prop, &src);
269
270		switch (prop) {
271		case ZPOOL_PROP_SIZE:
272		case ZPOOL_PROP_USED:
273		case ZPOOL_PROP_AVAILABLE:
274			(void) zfs_nicenum(intval, buf, len);
275			break;
276
277		case ZPOOL_PROP_CAPACITY:
278			(void) snprintf(buf, len, "%llu%%",
279			    (u_longlong_t)intval);
280			break;
281
282		case ZPOOL_PROP_HEALTH:
283			verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
284			    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
285			verify(nvlist_lookup_uint64_array(nvroot,
286			    ZPOOL_CONFIG_STATS, (uint64_t **)&vs, &vsc) == 0);
287
288			(void) strlcpy(buf, zpool_state_to_name(intval,
289			    vs->vs_aux), len);
290			break;
291		default:
292			(void) snprintf(buf, len, "%llu", intval);
293		}
294		break;
295
296	case PROP_TYPE_INDEX:
297		intval = zpool_get_prop_int(zhp, prop, &src);
298		if (zpool_prop_index_to_string(prop, intval, &strval)
299		    != 0)
300			return (-1);
301		(void) strlcpy(buf, strval, len);
302		break;
303
304	default:
305		abort();
306	}
307
308	if (srctype)
309		*srctype = src;
310
311	return (0);
312}
313
314static boolean_t
315pool_is_bootable(zpool_handle_t *zhp)
316{
317	char bootfs[ZPOOL_MAXNAMELEN];
318
319	return (zpool_get_prop(zhp, ZPOOL_PROP_BOOTFS, bootfs,
320	    sizeof (bootfs), NULL) == 0 && strncmp(bootfs, "-",
321	    sizeof (bootfs)) != 0);
322}
323
324
325/*
326 * Check if the bootfs name has the same pool name as it is set to.
327 * Assuming bootfs is a valid dataset name.
328 */
329static boolean_t
330bootfs_name_valid(const char *pool, char *bootfs)
331{
332	int len = strlen(pool);
333
334	if (!zfs_name_valid(bootfs, ZFS_TYPE_FILESYSTEM|ZFS_TYPE_SNAPSHOT))
335		return (B_FALSE);
336
337	if (strncmp(pool, bootfs, len) == 0 &&
338	    (bootfs[len] == '/' || bootfs[len] == '\0'))
339		return (B_TRUE);
340
341	return (B_FALSE);
342}
343
344/*
345 * Inspect the configuration to determine if any of the devices contain
346 * an EFI label.
347 */
348static boolean_t
349pool_uses_efi(nvlist_t *config)
350{
351#ifdef sun
352	nvlist_t **child;
353	uint_t c, children;
354
355	if (nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_CHILDREN,
356	    &child, &children) != 0)
357		return (read_efi_label(config, NULL) >= 0);
358
359	for (c = 0; c < children; c++) {
360		if (pool_uses_efi(child[c]))
361			return (B_TRUE);
362	}
363#endif	/* sun */
364	return (B_FALSE);
365}
366
367/*
368 * Given an nvlist of zpool properties to be set, validate that they are
369 * correct, and parse any numeric properties (index, boolean, etc) if they are
370 * specified as strings.
371 */
372static nvlist_t *
373zpool_valid_proplist(libzfs_handle_t *hdl, const char *poolname,
374    nvlist_t *props, uint64_t version, boolean_t create_or_import, char *errbuf)
375{
376	nvpair_t *elem;
377	nvlist_t *retprops;
378	zpool_prop_t prop;
379	char *strval;
380	uint64_t intval;
381	char *slash;
382	struct stat64 statbuf;
383	zpool_handle_t *zhp;
384	nvlist_t *nvroot;
385
386	if (nvlist_alloc(&retprops, NV_UNIQUE_NAME, 0) != 0) {
387		(void) no_memory(hdl);
388		return (NULL);
389	}
390
391	elem = NULL;
392	while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
393		const char *propname = nvpair_name(elem);
394
395		/*
396		 * Make sure this property is valid and applies to this type.
397		 */
398		if ((prop = zpool_name_to_prop(propname)) == ZPROP_INVAL) {
399			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
400			    "invalid property '%s'"), propname);
401			(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
402			goto error;
403		}
404
405		if (zpool_prop_readonly(prop)) {
406			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' "
407			    "is readonly"), propname);
408			(void) zfs_error(hdl, EZFS_PROPREADONLY, errbuf);
409			goto error;
410		}
411
412		if (zprop_parse_value(hdl, elem, prop, ZFS_TYPE_POOL, retprops,
413		    &strval, &intval, errbuf) != 0)
414			goto error;
415
416		/*
417		 * Perform additional checking for specific properties.
418		 */
419		switch (prop) {
420		case ZPOOL_PROP_VERSION:
421			if (intval < version || intval > SPA_VERSION) {
422				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
423				    "property '%s' number %d is invalid."),
424				    propname, intval);
425				(void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
426				goto error;
427			}
428			break;
429
430		case ZPOOL_PROP_BOOTFS:
431			if (create_or_import) {
432				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
433				    "property '%s' cannot be set at creation "
434				    "or import time"), propname);
435				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
436				goto error;
437			}
438
439			if (version < SPA_VERSION_BOOTFS) {
440				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
441				    "pool must be upgraded to support "
442				    "'%s' property"), propname);
443				(void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
444				goto error;
445			}
446
447			/*
448			 * bootfs property value has to be a dataset name and
449			 * the dataset has to be in the same pool as it sets to.
450			 */
451			if (strval[0] != '\0' && !bootfs_name_valid(poolname,
452			    strval)) {
453				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' "
454				    "is an invalid name"), strval);
455				(void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
456				goto error;
457			}
458
459			if ((zhp = zpool_open_canfail(hdl, poolname)) == NULL) {
460				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
461				    "could not open pool '%s'"), poolname);
462				(void) zfs_error(hdl, EZFS_OPENFAILED, errbuf);
463				goto error;
464			}
465			verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
466			    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
467
468#if defined(sun)
469			/*
470			 * bootfs property cannot be set on a disk which has
471			 * been EFI labeled.
472			 */
473			if (pool_uses_efi(nvroot)) {
474				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
475				    "property '%s' not supported on "
476				    "EFI labeled devices"), propname);
477				(void) zfs_error(hdl, EZFS_POOL_NOTSUP, errbuf);
478				zpool_close(zhp);
479				goto error;
480			}
481#endif
482			zpool_close(zhp);
483			break;
484
485		case ZPOOL_PROP_ALTROOT:
486			if (!create_or_import) {
487				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
488				    "property '%s' can only be set during pool "
489				    "creation or import"), propname);
490				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
491				goto error;
492			}
493
494			if (strval[0] != '/') {
495				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
496				    "bad alternate root '%s'"), strval);
497				(void) zfs_error(hdl, EZFS_BADPATH, errbuf);
498				goto error;
499			}
500			break;
501
502		case ZPOOL_PROP_CACHEFILE:
503			if (strval[0] == '\0')
504				break;
505
506			if (strcmp(strval, "none") == 0)
507				break;
508
509			if (strval[0] != '/') {
510				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
511				    "property '%s' must be empty, an "
512				    "absolute path, or 'none'"), propname);
513				(void) zfs_error(hdl, EZFS_BADPATH, errbuf);
514				goto error;
515			}
516
517			slash = strrchr(strval, '/');
518
519			if (slash[1] == '\0' || strcmp(slash, "/.") == 0 ||
520			    strcmp(slash, "/..") == 0) {
521				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
522				    "'%s' is not a valid file"), strval);
523				(void) zfs_error(hdl, EZFS_BADPATH, errbuf);
524				goto error;
525			}
526
527			*slash = '\0';
528
529			if (strval[0] != '\0' &&
530			    (stat64(strval, &statbuf) != 0 ||
531			    !S_ISDIR(statbuf.st_mode))) {
532				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
533				    "'%s' is not a valid directory"),
534				    strval);
535				(void) zfs_error(hdl, EZFS_BADPATH, errbuf);
536				goto error;
537			}
538
539			*slash = '/';
540			break;
541		}
542	}
543
544	return (retprops);
545error:
546	nvlist_free(retprops);
547	return (NULL);
548}
549
550/*
551 * Set zpool property : propname=propval.
552 */
553int
554zpool_set_prop(zpool_handle_t *zhp, const char *propname, const char *propval)
555{
556	zfs_cmd_t zc = { 0 };
557	int ret = -1;
558	char errbuf[1024];
559	nvlist_t *nvl = NULL;
560	nvlist_t *realprops;
561	uint64_t version;
562
563	(void) snprintf(errbuf, sizeof (errbuf),
564	    dgettext(TEXT_DOMAIN, "cannot set property for '%s'"),
565	    zhp->zpool_name);
566
567	if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0)
568		return (no_memory(zhp->zpool_hdl));
569
570	if (nvlist_add_string(nvl, propname, propval) != 0) {
571		nvlist_free(nvl);
572		return (no_memory(zhp->zpool_hdl));
573	}
574
575	version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL);
576	if ((realprops = zpool_valid_proplist(zhp->zpool_hdl,
577	    zhp->zpool_name, nvl, version, B_FALSE, errbuf)) == NULL) {
578		nvlist_free(nvl);
579		return (-1);
580	}
581
582	nvlist_free(nvl);
583	nvl = realprops;
584
585	/*
586	 * Execute the corresponding ioctl() to set this property.
587	 */
588	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
589
590	if (zcmd_write_src_nvlist(zhp->zpool_hdl, &zc, nvl) != 0) {
591		nvlist_free(nvl);
592		return (-1);
593	}
594
595	ret = zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_SET_PROPS, &zc);
596
597	zcmd_free_nvlists(&zc);
598	nvlist_free(nvl);
599
600	if (ret)
601		(void) zpool_standard_error(zhp->zpool_hdl, errno, errbuf);
602	else
603		(void) zpool_props_refresh(zhp);
604
605	return (ret);
606}
607
608int
609zpool_expand_proplist(zpool_handle_t *zhp, zprop_list_t **plp)
610{
611	libzfs_handle_t *hdl = zhp->zpool_hdl;
612	zprop_list_t *entry;
613	char buf[ZFS_MAXPROPLEN];
614
615	if (zprop_expand_list(hdl, plp, ZFS_TYPE_POOL) != 0)
616		return (-1);
617
618	for (entry = *plp; entry != NULL; entry = entry->pl_next) {
619
620		if (entry->pl_fixed)
621			continue;
622
623		if (entry->pl_prop != ZPROP_INVAL &&
624		    zpool_get_prop(zhp, entry->pl_prop, buf, sizeof (buf),
625		    NULL) == 0) {
626			if (strlen(buf) > entry->pl_width)
627				entry->pl_width = strlen(buf);
628		}
629	}
630
631	return (0);
632}
633
634
635/*
636 * Validate the given pool name, optionally putting an extended error message in
637 * 'buf'.
638 */
639boolean_t
640zpool_name_valid(libzfs_handle_t *hdl, boolean_t isopen, const char *pool)
641{
642	namecheck_err_t why;
643	char what;
644	int ret;
645
646	ret = pool_namecheck(pool, &why, &what);
647
648	/*
649	 * The rules for reserved pool names were extended at a later point.
650	 * But we need to support users with existing pools that may now be
651	 * invalid.  So we only check for this expanded set of names during a
652	 * create (or import), and only in userland.
653	 */
654	if (ret == 0 && !isopen &&
655	    (strncmp(pool, "mirror", 6) == 0 ||
656	    strncmp(pool, "raidz", 5) == 0 ||
657	    strncmp(pool, "spare", 5) == 0 ||
658	    strcmp(pool, "log") == 0)) {
659		if (hdl != NULL)
660			zfs_error_aux(hdl,
661			    dgettext(TEXT_DOMAIN, "name is reserved"));
662		return (B_FALSE);
663	}
664
665
666	if (ret != 0) {
667		if (hdl != NULL) {
668			switch (why) {
669			case NAME_ERR_TOOLONG:
670				zfs_error_aux(hdl,
671				    dgettext(TEXT_DOMAIN, "name is too long"));
672				break;
673
674			case NAME_ERR_INVALCHAR:
675				zfs_error_aux(hdl,
676				    dgettext(TEXT_DOMAIN, "invalid character "
677				    "'%c' in pool name"), what);
678				break;
679
680			case NAME_ERR_NOLETTER:
681				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
682				    "name must begin with a letter"));
683				break;
684
685			case NAME_ERR_RESERVED:
686				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
687				    "name is reserved"));
688				break;
689
690			case NAME_ERR_DISKLIKE:
691				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
692				    "pool name is reserved"));
693				break;
694
695			case NAME_ERR_LEADING_SLASH:
696				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
697				    "leading slash in name"));
698				break;
699
700			case NAME_ERR_EMPTY_COMPONENT:
701				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
702				    "empty component in name"));
703				break;
704
705			case NAME_ERR_TRAILING_SLASH:
706				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
707				    "trailing slash in name"));
708				break;
709
710			case NAME_ERR_MULTIPLE_AT:
711				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
712				    "multiple '@' delimiters in name"));
713				break;
714
715			}
716		}
717		return (B_FALSE);
718	}
719
720	return (B_TRUE);
721}
722
723/*
724 * Open a handle to the given pool, even if the pool is currently in the FAULTED
725 * state.
726 */
727zpool_handle_t *
728zpool_open_canfail(libzfs_handle_t *hdl, const char *pool)
729{
730	zpool_handle_t *zhp;
731	boolean_t missing;
732
733	/*
734	 * Make sure the pool name is valid.
735	 */
736	if (!zpool_name_valid(hdl, B_TRUE, pool)) {
737		(void) zfs_error_fmt(hdl, EZFS_INVALIDNAME,
738		    dgettext(TEXT_DOMAIN, "cannot open '%s'"),
739		    pool);
740		return (NULL);
741	}
742
743	if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
744		return (NULL);
745
746	zhp->zpool_hdl = hdl;
747	(void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
748
749	if (zpool_refresh_stats(zhp, &missing) != 0) {
750		zpool_close(zhp);
751		return (NULL);
752	}
753
754	if (missing) {
755		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "no such pool"));
756		(void) zfs_error_fmt(hdl, EZFS_NOENT,
757		    dgettext(TEXT_DOMAIN, "cannot open '%s'"), pool);
758		zpool_close(zhp);
759		return (NULL);
760	}
761
762	return (zhp);
763}
764
765/*
766 * Like the above, but silent on error.  Used when iterating over pools (because
767 * the configuration cache may be out of date).
768 */
769int
770zpool_open_silent(libzfs_handle_t *hdl, const char *pool, zpool_handle_t **ret)
771{
772	zpool_handle_t *zhp;
773	boolean_t missing;
774
775	if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
776		return (-1);
777
778	zhp->zpool_hdl = hdl;
779	(void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
780
781	if (zpool_refresh_stats(zhp, &missing) != 0) {
782		zpool_close(zhp);
783		return (-1);
784	}
785
786	if (missing) {
787		zpool_close(zhp);
788		*ret = NULL;
789		return (0);
790	}
791
792	*ret = zhp;
793	return (0);
794}
795
796/*
797 * Similar to zpool_open_canfail(), but refuses to open pools in the faulted
798 * state.
799 */
800zpool_handle_t *
801zpool_open(libzfs_handle_t *hdl, const char *pool)
802{
803	zpool_handle_t *zhp;
804
805	if ((zhp = zpool_open_canfail(hdl, pool)) == NULL)
806		return (NULL);
807
808	if (zhp->zpool_state == POOL_STATE_UNAVAIL) {
809		(void) zfs_error_fmt(hdl, EZFS_POOLUNAVAIL,
810		    dgettext(TEXT_DOMAIN, "cannot open '%s'"), zhp->zpool_name);
811		zpool_close(zhp);
812		return (NULL);
813	}
814
815	return (zhp);
816}
817
818/*
819 * Close the handle.  Simply frees the memory associated with the handle.
820 */
821void
822zpool_close(zpool_handle_t *zhp)
823{
824	if (zhp->zpool_config)
825		nvlist_free(zhp->zpool_config);
826	if (zhp->zpool_old_config)
827		nvlist_free(zhp->zpool_old_config);
828	if (zhp->zpool_props)
829		nvlist_free(zhp->zpool_props);
830	free(zhp);
831}
832
833/*
834 * Return the name of the pool.
835 */
836const char *
837zpool_get_name(zpool_handle_t *zhp)
838{
839	return (zhp->zpool_name);
840}
841
842
843/*
844 * Return the state of the pool (ACTIVE or UNAVAILABLE)
845 */
846int
847zpool_get_state(zpool_handle_t *zhp)
848{
849	return (zhp->zpool_state);
850}
851
852/*
853 * Create the named pool, using the provided vdev list.  It is assumed
854 * that the consumer has already validated the contents of the nvlist, so we
855 * don't have to worry about error semantics.
856 */
857int
858zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
859    nvlist_t *props, nvlist_t *fsprops)
860{
861	zfs_cmd_t zc = { 0 };
862	nvlist_t *zc_fsprops = NULL;
863	nvlist_t *zc_props = NULL;
864	char msg[1024];
865	char *altroot;
866	int ret = -1;
867
868	(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
869	    "cannot create '%s'"), pool);
870
871	if (!zpool_name_valid(hdl, B_FALSE, pool))
872		return (zfs_error(hdl, EZFS_INVALIDNAME, msg));
873
874	if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
875		return (-1);
876
877	if (props) {
878		if ((zc_props = zpool_valid_proplist(hdl, pool, props,
879		    SPA_VERSION_1, B_TRUE, msg)) == NULL) {
880			goto create_failed;
881		}
882	}
883
884	if (fsprops) {
885		uint64_t zoned;
886		char *zonestr;
887
888		zoned = ((nvlist_lookup_string(fsprops,
889		    zfs_prop_to_name(ZFS_PROP_ZONED), &zonestr) == 0) &&
890		    strcmp(zonestr, "on") == 0);
891
892		if ((zc_fsprops = zfs_valid_proplist(hdl,
893		    ZFS_TYPE_FILESYSTEM, fsprops, zoned, NULL, msg)) == NULL) {
894			goto create_failed;
895		}
896		if (!zc_props &&
897		    (nvlist_alloc(&zc_props, NV_UNIQUE_NAME, 0) != 0)) {
898			goto create_failed;
899		}
900		if (nvlist_add_nvlist(zc_props,
901		    ZPOOL_ROOTFS_PROPS, zc_fsprops) != 0) {
902			goto create_failed;
903		}
904	}
905
906	if (zc_props && zcmd_write_src_nvlist(hdl, &zc, zc_props) != 0)
907		goto create_failed;
908
909	(void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
910
911	if ((ret = zfs_ioctl(hdl, ZFS_IOC_POOL_CREATE, &zc)) != 0) {
912
913		zcmd_free_nvlists(&zc);
914		nvlist_free(zc_props);
915		nvlist_free(zc_fsprops);
916
917		switch (errno) {
918		case EBUSY:
919			/*
920			 * This can happen if the user has specified the same
921			 * device multiple times.  We can't reliably detect this
922			 * until we try to add it and see we already have a
923			 * label.
924			 */
925			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
926			    "one or more vdevs refer to the same device"));
927			return (zfs_error(hdl, EZFS_BADDEV, msg));
928
929		case EOVERFLOW:
930			/*
931			 * This occurs when one of the devices is below
932			 * SPA_MINDEVSIZE.  Unfortunately, we can't detect which
933			 * device was the problem device since there's no
934			 * reliable way to determine device size from userland.
935			 */
936			{
937				char buf[64];
938
939				zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf));
940
941				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
942				    "one or more devices is less than the "
943				    "minimum size (%s)"), buf);
944			}
945			return (zfs_error(hdl, EZFS_BADDEV, msg));
946
947		case ENOSPC:
948			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
949			    "one or more devices is out of space"));
950			return (zfs_error(hdl, EZFS_BADDEV, msg));
951
952		case ENOTBLK:
953			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
954			    "cache device must be a disk or disk slice"));
955			return (zfs_error(hdl, EZFS_BADDEV, msg));
956
957		default:
958			return (zpool_standard_error(hdl, errno, msg));
959		}
960	}
961
962	/*
963	 * If this is an alternate root pool, then we automatically set the
964	 * mountpoint of the root dataset to be '/'.
965	 */
966	if (nvlist_lookup_string(props, zpool_prop_to_name(ZPOOL_PROP_ALTROOT),
967	    &altroot) == 0) {
968		zfs_handle_t *zhp;
969
970		verify((zhp = zfs_open(hdl, pool, ZFS_TYPE_DATASET)) != NULL);
971		verify(zfs_prop_set(zhp, zfs_prop_to_name(ZFS_PROP_MOUNTPOINT),
972		    "/") == 0);
973
974		zfs_close(zhp);
975	}
976
977create_failed:
978	zcmd_free_nvlists(&zc);
979	nvlist_free(zc_props);
980	nvlist_free(zc_fsprops);
981	return (ret);
982}
983
984/*
985 * Destroy the given pool.  It is up to the caller to ensure that there are no
986 * datasets left in the pool.
987 */
988int
989zpool_destroy(zpool_handle_t *zhp)
990{
991	zfs_cmd_t zc = { 0 };
992	zfs_handle_t *zfp = NULL;
993	libzfs_handle_t *hdl = zhp->zpool_hdl;
994	char msg[1024];
995
996	if (zhp->zpool_state == POOL_STATE_ACTIVE &&
997	    (zfp = zfs_open(zhp->zpool_hdl, zhp->zpool_name,
998	    ZFS_TYPE_FILESYSTEM)) == NULL)
999		return (-1);
1000
1001	if (zpool_remove_zvol_links(zhp) != 0)
1002		return (-1);
1003
1004	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1005
1006	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_DESTROY, &zc) != 0) {
1007		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1008		    "cannot destroy '%s'"), zhp->zpool_name);
1009
1010		if (errno == EROFS) {
1011			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1012			    "one or more devices is read only"));
1013			(void) zfs_error(hdl, EZFS_BADDEV, msg);
1014		} else {
1015			(void) zpool_standard_error(hdl, errno, msg);
1016		}
1017
1018		if (zfp)
1019			zfs_close(zfp);
1020		return (-1);
1021	}
1022
1023	if (zfp) {
1024		remove_mountpoint(zfp);
1025		zfs_close(zfp);
1026	}
1027
1028	return (0);
1029}
1030
1031/*
1032 * Add the given vdevs to the pool.  The caller must have already performed the
1033 * necessary verification to ensure that the vdev specification is well-formed.
1034 */
1035int
1036zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
1037{
1038	zfs_cmd_t zc = { 0 };
1039	int ret;
1040	libzfs_handle_t *hdl = zhp->zpool_hdl;
1041	char msg[1024];
1042	nvlist_t **spares, **l2cache;
1043	uint_t nspares, nl2cache;
1044
1045	(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1046	    "cannot add to '%s'"), zhp->zpool_name);
1047
1048	if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
1049	    SPA_VERSION_SPARES &&
1050	    nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
1051	    &spares, &nspares) == 0) {
1052		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
1053		    "upgraded to add hot spares"));
1054		return (zfs_error(hdl, EZFS_BADVERSION, msg));
1055	}
1056
1057	if (pool_is_bootable(zhp) && nvlist_lookup_nvlist_array(nvroot,
1058	    ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0) {
1059		uint64_t s;
1060
1061		for (s = 0; s < nspares; s++) {
1062			char *path;
1063
1064			if (nvlist_lookup_string(spares[s], ZPOOL_CONFIG_PATH,
1065			    &path) == 0 && pool_uses_efi(spares[s])) {
1066				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1067				    "device '%s' contains an EFI label and "
1068				    "cannot be used on root pools."),
1069				    zpool_vdev_name(hdl, NULL, spares[s]));
1070				return (zfs_error(hdl, EZFS_POOL_NOTSUP, msg));
1071			}
1072		}
1073	}
1074
1075	if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
1076	    SPA_VERSION_L2CACHE &&
1077	    nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
1078	    &l2cache, &nl2cache) == 0) {
1079		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
1080		    "upgraded to add cache devices"));
1081		return (zfs_error(hdl, EZFS_BADVERSION, msg));
1082	}
1083
1084	if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
1085		return (-1);
1086	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1087
1088	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_ADD, &zc) != 0) {
1089		switch (errno) {
1090		case EBUSY:
1091			/*
1092			 * This can happen if the user has specified the same
1093			 * device multiple times.  We can't reliably detect this
1094			 * until we try to add it and see we already have a
1095			 * label.
1096			 */
1097			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1098			    "one or more vdevs refer to the same device"));
1099			(void) zfs_error(hdl, EZFS_BADDEV, msg);
1100			break;
1101
1102		case EOVERFLOW:
1103			/*
1104			 * This occurrs when one of the devices is below
1105			 * SPA_MINDEVSIZE.  Unfortunately, we can't detect which
1106			 * device was the problem device since there's no
1107			 * reliable way to determine device size from userland.
1108			 */
1109			{
1110				char buf[64];
1111
1112				zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf));
1113
1114				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1115				    "device is less than the minimum "
1116				    "size (%s)"), buf);
1117			}
1118			(void) zfs_error(hdl, EZFS_BADDEV, msg);
1119			break;
1120
1121		case ENOTSUP:
1122			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1123			    "pool must be upgraded to add these vdevs"));
1124			(void) zfs_error(hdl, EZFS_BADVERSION, msg);
1125			break;
1126
1127		case EDOM:
1128			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1129			    "root pool can not have multiple vdevs"
1130			    " or separate logs"));
1131			(void) zfs_error(hdl, EZFS_POOL_NOTSUP, msg);
1132			break;
1133
1134		case ENOTBLK:
1135			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1136			    "cache device must be a disk or disk slice"));
1137			(void) zfs_error(hdl, EZFS_BADDEV, msg);
1138			break;
1139
1140		default:
1141			(void) zpool_standard_error(hdl, errno, msg);
1142		}
1143
1144		ret = -1;
1145	} else {
1146		ret = 0;
1147	}
1148
1149	zcmd_free_nvlists(&zc);
1150
1151	return (ret);
1152}
1153
1154/*
1155 * Exports the pool from the system.  The caller must ensure that there are no
1156 * mounted datasets in the pool.
1157 */
1158int
1159zpool_export_common(zpool_handle_t *zhp, boolean_t force, boolean_t hardforce)
1160{
1161	zfs_cmd_t zc = { 0 };
1162	char msg[1024];
1163
1164	if (zpool_remove_zvol_links(zhp) != 0)
1165		return (-1);
1166
1167	(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1168	    "cannot export '%s'"), zhp->zpool_name);
1169
1170	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1171	zc.zc_cookie = force;
1172	zc.zc_guid = hardforce;
1173
1174	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_EXPORT, &zc) != 0) {
1175		switch (errno) {
1176		case EXDEV:
1177			zfs_error_aux(zhp->zpool_hdl, dgettext(TEXT_DOMAIN,
1178			    "use '-f' to override the following errors:\n"
1179			    "'%s' has an active shared spare which could be"
1180			    " used by other pools once '%s' is exported."),
1181			    zhp->zpool_name, zhp->zpool_name);
1182			return (zfs_error(zhp->zpool_hdl, EZFS_ACTIVE_SPARE,
1183			    msg));
1184		default:
1185			return (zpool_standard_error_fmt(zhp->zpool_hdl, errno,
1186			    msg));
1187		}
1188	}
1189
1190	return (0);
1191}
1192
1193int
1194zpool_export(zpool_handle_t *zhp, boolean_t force)
1195{
1196	return (zpool_export_common(zhp, force, B_FALSE));
1197}
1198
1199int
1200zpool_export_force(zpool_handle_t *zhp)
1201{
1202	return (zpool_export_common(zhp, B_TRUE, B_TRUE));
1203}
1204
1205/*
1206 * zpool_import() is a contracted interface. Should be kept the same
1207 * if possible.
1208 *
1209 * Applications should use zpool_import_props() to import a pool with
1210 * new properties value to be set.
1211 */
1212int
1213zpool_import(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
1214    char *altroot)
1215{
1216	nvlist_t *props = NULL;
1217	int ret;
1218
1219	if (altroot != NULL) {
1220		if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) {
1221			return (zfs_error_fmt(hdl, EZFS_NOMEM,
1222			    dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1223			    newname));
1224		}
1225
1226		if (nvlist_add_string(props,
1227		    zpool_prop_to_name(ZPOOL_PROP_ALTROOT), altroot) != 0 ||
1228		    nvlist_add_string(props,
1229		    zpool_prop_to_name(ZPOOL_PROP_CACHEFILE), "none") != 0) {
1230			nvlist_free(props);
1231			return (zfs_error_fmt(hdl, EZFS_NOMEM,
1232			    dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1233			    newname));
1234		}
1235	}
1236
1237	ret = zpool_import_props(hdl, config, newname, props, B_FALSE);
1238	if (props)
1239		nvlist_free(props);
1240	return (ret);
1241}
1242
1243/*
1244 * Import the given pool using the known configuration and a list of
1245 * properties to be set. The configuration should have come from
1246 * zpool_find_import(). The 'newname' parameters control whether the pool
1247 * is imported with a different name.
1248 */
1249int
1250zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
1251    nvlist_t *props, boolean_t importfaulted)
1252{
1253	zfs_cmd_t zc = { 0 };
1254	char *thename;
1255	char *origname;
1256	int ret;
1257	char errbuf[1024];
1258
1259	verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
1260	    &origname) == 0);
1261
1262	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1263	    "cannot import pool '%s'"), origname);
1264
1265	if (newname != NULL) {
1266		if (!zpool_name_valid(hdl, B_FALSE, newname))
1267			return (zfs_error_fmt(hdl, EZFS_INVALIDNAME,
1268			    dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1269			    newname));
1270		thename = (char *)newname;
1271	} else {
1272		thename = origname;
1273	}
1274
1275	if (props) {
1276		uint64_t version;
1277
1278		verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
1279		    &version) == 0);
1280
1281		if ((props = zpool_valid_proplist(hdl, origname,
1282		    props, version, B_TRUE, errbuf)) == NULL) {
1283			return (-1);
1284		} else if (zcmd_write_src_nvlist(hdl, &zc, props) != 0) {
1285			nvlist_free(props);
1286			return (-1);
1287		}
1288	}
1289
1290	(void) strlcpy(zc.zc_name, thename, sizeof (zc.zc_name));
1291
1292	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
1293	    &zc.zc_guid) == 0);
1294
1295	if (zcmd_write_conf_nvlist(hdl, &zc, config) != 0) {
1296		nvlist_free(props);
1297		return (-1);
1298	}
1299
1300	zc.zc_cookie = (uint64_t)importfaulted;
1301	ret = 0;
1302	if (zfs_ioctl(hdl, ZFS_IOC_POOL_IMPORT, &zc) != 0) {
1303		char desc[1024];
1304		if (newname == NULL)
1305			(void) snprintf(desc, sizeof (desc),
1306			    dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1307			    thename);
1308		else
1309			(void) snprintf(desc, sizeof (desc),
1310			    dgettext(TEXT_DOMAIN, "cannot import '%s' as '%s'"),
1311			    origname, thename);
1312
1313		switch (errno) {
1314		case ENOTSUP:
1315			/*
1316			 * Unsupported version.
1317			 */
1318			(void) zfs_error(hdl, EZFS_BADVERSION, desc);
1319			break;
1320
1321		case EINVAL:
1322			(void) zfs_error(hdl, EZFS_INVALCONFIG, desc);
1323			break;
1324
1325		default:
1326			(void) zpool_standard_error(hdl, errno, desc);
1327		}
1328
1329		ret = -1;
1330	} else {
1331		zpool_handle_t *zhp;
1332
1333		/*
1334		 * This should never fail, but play it safe anyway.
1335		 */
1336		if (zpool_open_silent(hdl, thename, &zhp) != 0) {
1337			ret = -1;
1338		} else if (zhp != NULL) {
1339			ret = zpool_create_zvol_links(zhp);
1340			zpool_close(zhp);
1341		}
1342
1343	}
1344
1345	zcmd_free_nvlists(&zc);
1346	nvlist_free(props);
1347
1348	return (ret);
1349}
1350
1351/*
1352 * Scrub the pool.
1353 */
1354int
1355zpool_scrub(zpool_handle_t *zhp, pool_scrub_type_t type)
1356{
1357	zfs_cmd_t zc = { 0 };
1358	char msg[1024];
1359	libzfs_handle_t *hdl = zhp->zpool_hdl;
1360
1361	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1362	zc.zc_cookie = type;
1363
1364	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_SCRUB, &zc) == 0)
1365		return (0);
1366
1367	(void) snprintf(msg, sizeof (msg),
1368	    dgettext(TEXT_DOMAIN, "cannot scrub %s"), zc.zc_name);
1369
1370	if (errno == EBUSY)
1371		return (zfs_error(hdl, EZFS_RESILVERING, msg));
1372	else
1373		return (zpool_standard_error(hdl, errno, msg));
1374}
1375
1376/*
1377 * 'avail_spare' is set to TRUE if the provided guid refers to an AVAIL
1378 * spare; but FALSE if its an INUSE spare.
1379 */
1380static nvlist_t *
1381vdev_to_nvlist_iter(nvlist_t *nv, const char *search, uint64_t guid,
1382    boolean_t *avail_spare, boolean_t *l2cache, boolean_t *log)
1383{
1384	uint_t c, children;
1385	nvlist_t **child;
1386	uint64_t theguid, present;
1387	char *path;
1388	uint64_t wholedisk = 0;
1389	nvlist_t *ret;
1390	uint64_t is_log;
1391
1392	verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &theguid) == 0);
1393
1394	if (search == NULL &&
1395	    nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, &present) == 0) {
1396		/*
1397		 * If the device has never been present since import, the only
1398		 * reliable way to match the vdev is by GUID.
1399		 */
1400		if (theguid == guid)
1401			return (nv);
1402	} else if (search != NULL &&
1403	    nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) {
1404		(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
1405		    &wholedisk);
1406		if (wholedisk) {
1407			/*
1408			 * For whole disks, the internal path has 's0', but the
1409			 * path passed in by the user doesn't.
1410			 */
1411			if (strlen(search) == strlen(path) - 2 &&
1412			    strncmp(search, path, strlen(search)) == 0)
1413				return (nv);
1414		} else if (strcmp(search, path) == 0) {
1415			return (nv);
1416		}
1417	}
1418
1419	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
1420	    &child, &children) != 0)
1421		return (NULL);
1422
1423	for (c = 0; c < children; c++) {
1424		if ((ret = vdev_to_nvlist_iter(child[c], search, guid,
1425		    avail_spare, l2cache, NULL)) != NULL) {
1426			/*
1427			 * The 'is_log' value is only set for the toplevel
1428			 * vdev, not the leaf vdevs.  So we always lookup the
1429			 * log device from the root of the vdev tree (where
1430			 * 'log' is non-NULL).
1431			 */
1432			if (log != NULL &&
1433			    nvlist_lookup_uint64(child[c],
1434			    ZPOOL_CONFIG_IS_LOG, &is_log) == 0 &&
1435			    is_log) {
1436				*log = B_TRUE;
1437			}
1438			return (ret);
1439		}
1440	}
1441
1442	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
1443	    &child, &children) == 0) {
1444		for (c = 0; c < children; c++) {
1445			if ((ret = vdev_to_nvlist_iter(child[c], search, guid,
1446			    avail_spare, l2cache, NULL)) != NULL) {
1447				*avail_spare = B_TRUE;
1448				return (ret);
1449			}
1450		}
1451	}
1452
1453	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
1454	    &child, &children) == 0) {
1455		for (c = 0; c < children; c++) {
1456			if ((ret = vdev_to_nvlist_iter(child[c], search, guid,
1457			    avail_spare, l2cache, NULL)) != NULL) {
1458				*l2cache = B_TRUE;
1459				return (ret);
1460			}
1461		}
1462	}
1463
1464	return (NULL);
1465}
1466
1467nvlist_t *
1468zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare,
1469    boolean_t *l2cache, boolean_t *log)
1470{
1471	char buf[MAXPATHLEN];
1472	const char *search;
1473	char *end;
1474	nvlist_t *nvroot;
1475	uint64_t guid;
1476
1477	guid = strtoull(path, &end, 10);
1478	if (guid != 0 && *end == '\0') {
1479		search = NULL;
1480	} else if (path[0] != '/') {
1481		(void) snprintf(buf, sizeof (buf), "%s%s", _PATH_DEV, path);
1482		search = buf;
1483	} else {
1484		search = path;
1485	}
1486
1487	verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
1488	    &nvroot) == 0);
1489
1490	*avail_spare = B_FALSE;
1491	*l2cache = B_FALSE;
1492	if (log != NULL)
1493		*log = B_FALSE;
1494	return (vdev_to_nvlist_iter(nvroot, search, guid, avail_spare,
1495	    l2cache, log));
1496}
1497
1498static int
1499vdev_online(nvlist_t *nv)
1500{
1501	uint64_t ival;
1502
1503	if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_OFFLINE, &ival) == 0 ||
1504	    nvlist_lookup_uint64(nv, ZPOOL_CONFIG_FAULTED, &ival) == 0 ||
1505	    nvlist_lookup_uint64(nv, ZPOOL_CONFIG_REMOVED, &ival) == 0)
1506		return (0);
1507
1508	return (1);
1509}
1510
1511/*
1512 * Get phys_path for a root pool
1513 * Return 0 on success; non-zeron on failure.
1514 */
1515int
1516zpool_get_physpath(zpool_handle_t *zhp, char *physpath)
1517{
1518	nvlist_t *vdev_root;
1519	nvlist_t **child;
1520	uint_t count;
1521	int i;
1522
1523	/*
1524	 * Make sure this is a root pool, as phys_path doesn't mean
1525	 * anything to a non-root pool.
1526	 */
1527	if (!pool_is_bootable(zhp))
1528		return (-1);
1529
1530	verify(nvlist_lookup_nvlist(zhp->zpool_config,
1531	    ZPOOL_CONFIG_VDEV_TREE, &vdev_root) == 0);
1532
1533	if (nvlist_lookup_nvlist_array(vdev_root, ZPOOL_CONFIG_CHILDREN,
1534	    &child, &count) != 0)
1535		return (-2);
1536
1537	for (i = 0; i < count; i++) {
1538		nvlist_t **child2;
1539		uint_t count2;
1540		char *type;
1541		char *tmppath;
1542		int j;
1543
1544		if (nvlist_lookup_string(child[i], ZPOOL_CONFIG_TYPE, &type)
1545		    != 0)
1546			return (-3);
1547
1548		if (strcmp(type, VDEV_TYPE_DISK) == 0) {
1549			if (!vdev_online(child[i]))
1550				return (-8);
1551			verify(nvlist_lookup_string(child[i],
1552			    ZPOOL_CONFIG_PHYS_PATH, &tmppath) == 0);
1553			(void) strncpy(physpath, tmppath, strlen(tmppath));
1554		} else if (strcmp(type, VDEV_TYPE_MIRROR) == 0) {
1555			if (nvlist_lookup_nvlist_array(child[i],
1556			    ZPOOL_CONFIG_CHILDREN, &child2, &count2) != 0)
1557				return (-4);
1558
1559			for (j = 0; j < count2; j++) {
1560				if (!vdev_online(child2[j]))
1561					return (-8);
1562				if (nvlist_lookup_string(child2[j],
1563				    ZPOOL_CONFIG_PHYS_PATH, &tmppath) != 0)
1564					return (-5);
1565
1566				if ((strlen(physpath) + strlen(tmppath)) >
1567				    MAXNAMELEN)
1568					return (-6);
1569
1570				if (strlen(physpath) == 0) {
1571					(void) strncpy(physpath, tmppath,
1572					    strlen(tmppath));
1573				} else {
1574					(void) strcat(physpath, " ");
1575					(void) strcat(physpath, tmppath);
1576				}
1577			}
1578		} else {
1579			return (-7);
1580		}
1581	}
1582
1583	return (0);
1584}
1585
1586/*
1587 * Returns TRUE if the given guid corresponds to the given type.
1588 * This is used to check for hot spares (INUSE or not), and level 2 cache
1589 * devices.
1590 */
1591static boolean_t
1592is_guid_type(zpool_handle_t *zhp, uint64_t guid, const char *type)
1593{
1594	uint64_t target_guid;
1595	nvlist_t *nvroot;
1596	nvlist_t **list;
1597	uint_t count;
1598	int i;
1599
1600	verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
1601	    &nvroot) == 0);
1602	if (nvlist_lookup_nvlist_array(nvroot, type, &list, &count) == 0) {
1603		for (i = 0; i < count; i++) {
1604			verify(nvlist_lookup_uint64(list[i], ZPOOL_CONFIG_GUID,
1605			    &target_guid) == 0);
1606			if (guid == target_guid)
1607				return (B_TRUE);
1608		}
1609	}
1610
1611	return (B_FALSE);
1612}
1613
1614/*
1615 * Bring the specified vdev online.   The 'flags' parameter is a set of the
1616 * ZFS_ONLINE_* flags.
1617 */
1618int
1619zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags,
1620    vdev_state_t *newstate)
1621{
1622	zfs_cmd_t zc = { 0 };
1623	char msg[1024];
1624	nvlist_t *tgt;
1625	boolean_t avail_spare, l2cache;
1626	libzfs_handle_t *hdl = zhp->zpool_hdl;
1627
1628	(void) snprintf(msg, sizeof (msg),
1629	    dgettext(TEXT_DOMAIN, "cannot online %s"), path);
1630
1631	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1632	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
1633	    NULL)) == NULL)
1634		return (zfs_error(hdl, EZFS_NODEVICE, msg));
1635
1636	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
1637
1638	if (avail_spare ||
1639	    is_guid_type(zhp, zc.zc_guid, ZPOOL_CONFIG_SPARES) == B_TRUE)
1640		return (zfs_error(hdl, EZFS_ISSPARE, msg));
1641
1642	zc.zc_cookie = VDEV_STATE_ONLINE;
1643	zc.zc_obj = flags;
1644
1645	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_SET_STATE, &zc) != 0)
1646		return (zpool_standard_error(hdl, errno, msg));
1647
1648	*newstate = zc.zc_cookie;
1649	return (0);
1650}
1651
1652/*
1653 * Take the specified vdev offline
1654 */
1655int
1656zpool_vdev_offline(zpool_handle_t *zhp, const char *path, boolean_t istmp)
1657{
1658	zfs_cmd_t zc = { 0 };
1659	char msg[1024];
1660	nvlist_t *tgt;
1661	boolean_t avail_spare, l2cache;
1662	libzfs_handle_t *hdl = zhp->zpool_hdl;
1663
1664	(void) snprintf(msg, sizeof (msg),
1665	    dgettext(TEXT_DOMAIN, "cannot offline %s"), path);
1666
1667	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1668	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
1669	    NULL)) == NULL)
1670		return (zfs_error(hdl, EZFS_NODEVICE, msg));
1671
1672	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
1673
1674	if (avail_spare ||
1675	    is_guid_type(zhp, zc.zc_guid, ZPOOL_CONFIG_SPARES) == B_TRUE)
1676		return (zfs_error(hdl, EZFS_ISSPARE, msg));
1677
1678	zc.zc_cookie = VDEV_STATE_OFFLINE;
1679	zc.zc_obj = istmp ? ZFS_OFFLINE_TEMPORARY : 0;
1680
1681	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
1682		return (0);
1683
1684	switch (errno) {
1685	case EBUSY:
1686
1687		/*
1688		 * There are no other replicas of this device.
1689		 */
1690		return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
1691
1692	default:
1693		return (zpool_standard_error(hdl, errno, msg));
1694	}
1695}
1696
1697/*
1698 * Mark the given vdev faulted.
1699 */
1700int
1701zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid)
1702{
1703	zfs_cmd_t zc = { 0 };
1704	char msg[1024];
1705	libzfs_handle_t *hdl = zhp->zpool_hdl;
1706
1707	(void) snprintf(msg, sizeof (msg),
1708	    dgettext(TEXT_DOMAIN, "cannot fault %llu"), guid);
1709
1710	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1711	zc.zc_guid = guid;
1712	zc.zc_cookie = VDEV_STATE_FAULTED;
1713
1714	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
1715		return (0);
1716
1717	switch (errno) {
1718	case EBUSY:
1719
1720		/*
1721		 * There are no other replicas of this device.
1722		 */
1723		return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
1724
1725	case EEXIST:
1726		/*
1727		 * The log device has unplayed logs
1728		 */
1729		return (zfs_error(hdl, EZFS_UNPLAYED_LOGS, msg));
1730
1731	default:
1732		return (zpool_standard_error(hdl, errno, msg));
1733	}
1734
1735}
1736
1737/*
1738 * Mark the given vdev degraded.
1739 */
1740int
1741zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid)
1742{
1743	zfs_cmd_t zc = { 0 };
1744	char msg[1024];
1745	libzfs_handle_t *hdl = zhp->zpool_hdl;
1746
1747	(void) snprintf(msg, sizeof (msg),
1748	    dgettext(TEXT_DOMAIN, "cannot degrade %llu"), guid);
1749
1750	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1751	zc.zc_guid = guid;
1752	zc.zc_cookie = VDEV_STATE_DEGRADED;
1753
1754	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
1755		return (0);
1756
1757	return (zpool_standard_error(hdl, errno, msg));
1758}
1759
1760/*
1761 * Returns TRUE if the given nvlist is a vdev that was originally swapped in as
1762 * a hot spare.
1763 */
1764static boolean_t
1765is_replacing_spare(nvlist_t *search, nvlist_t *tgt, int which)
1766{
1767	nvlist_t **child;
1768	uint_t c, children;
1769	char *type;
1770
1771	if (nvlist_lookup_nvlist_array(search, ZPOOL_CONFIG_CHILDREN, &child,
1772	    &children) == 0) {
1773		verify(nvlist_lookup_string(search, ZPOOL_CONFIG_TYPE,
1774		    &type) == 0);
1775
1776		if (strcmp(type, VDEV_TYPE_SPARE) == 0 &&
1777		    children == 2 && child[which] == tgt)
1778			return (B_TRUE);
1779
1780		for (c = 0; c < children; c++)
1781			if (is_replacing_spare(child[c], tgt, which))
1782				return (B_TRUE);
1783	}
1784
1785	return (B_FALSE);
1786}
1787
1788/*
1789 * Attach new_disk (fully described by nvroot) to old_disk.
1790 * If 'replacing' is specified, the new disk will replace the old one.
1791 */
1792int
1793zpool_vdev_attach(zpool_handle_t *zhp,
1794    const char *old_disk, const char *new_disk, nvlist_t *nvroot, int replacing)
1795{
1796	zfs_cmd_t zc = { 0 };
1797	char msg[1024];
1798	int ret;
1799	nvlist_t *tgt;
1800	boolean_t avail_spare, l2cache, islog;
1801	uint64_t val;
1802	char *path, *newname;
1803	nvlist_t **child;
1804	uint_t children;
1805	nvlist_t *config_root;
1806	libzfs_handle_t *hdl = zhp->zpool_hdl;
1807	boolean_t rootpool = pool_is_bootable(zhp);
1808
1809	if (replacing)
1810		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1811		    "cannot replace %s with %s"), old_disk, new_disk);
1812	else
1813		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1814		    "cannot attach %s to %s"), new_disk, old_disk);
1815
1816	/*
1817	 * If this is a root pool, make sure that we're not attaching an
1818	 * EFI labeled device.
1819	 */
1820	if (rootpool && pool_uses_efi(nvroot)) {
1821		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1822		    "EFI labeled devices are not supported on root pools."));
1823		return (zfs_error(hdl, EZFS_POOL_NOTSUP, msg));
1824	}
1825
1826	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1827	if ((tgt = zpool_find_vdev(zhp, old_disk, &avail_spare, &l2cache,
1828	    &islog)) == 0)
1829		return (zfs_error(hdl, EZFS_NODEVICE, msg));
1830
1831	if (avail_spare)
1832		return (zfs_error(hdl, EZFS_ISSPARE, msg));
1833
1834	if (l2cache)
1835		return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
1836
1837	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
1838	zc.zc_cookie = replacing;
1839
1840	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
1841	    &child, &children) != 0 || children != 1) {
1842		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1843		    "new device must be a single disk"));
1844		return (zfs_error(hdl, EZFS_INVALCONFIG, msg));
1845	}
1846
1847	verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
1848	    ZPOOL_CONFIG_VDEV_TREE, &config_root) == 0);
1849
1850	if ((newname = zpool_vdev_name(NULL, NULL, child[0])) == NULL)
1851		return (-1);
1852
1853	/*
1854	 * If the target is a hot spare that has been swapped in, we can only
1855	 * replace it with another hot spare.
1856	 */
1857	if (replacing &&
1858	    nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_IS_SPARE, &val) == 0 &&
1859	    (zpool_find_vdev(zhp, newname, &avail_spare, &l2cache,
1860	    NULL) == NULL || !avail_spare) &&
1861	    is_replacing_spare(config_root, tgt, 1)) {
1862		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1863		    "can only be replaced by another hot spare"));
1864		free(newname);
1865		return (zfs_error(hdl, EZFS_BADTARGET, msg));
1866	}
1867
1868	/*
1869	 * If we are attempting to replace a spare, it canot be applied to an
1870	 * already spared device.
1871	 */
1872	if (replacing &&
1873	    nvlist_lookup_string(child[0], ZPOOL_CONFIG_PATH, &path) == 0 &&
1874	    zpool_find_vdev(zhp, newname, &avail_spare,
1875	    &l2cache, NULL) != NULL && avail_spare &&
1876	    is_replacing_spare(config_root, tgt, 0)) {
1877		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1878		    "device has already been replaced with a spare"));
1879		free(newname);
1880		return (zfs_error(hdl, EZFS_BADTARGET, msg));
1881	}
1882
1883	free(newname);
1884
1885	if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
1886		return (-1);
1887
1888	ret = zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_ATTACH, &zc);
1889
1890	zcmd_free_nvlists(&zc);
1891
1892	if (ret == 0) {
1893#ifdef sun
1894		if (rootpool) {
1895			/*
1896			 * XXX - This should be removed once we can
1897			 * automatically install the bootblocks on the
1898			 * newly attached disk.
1899			 */
1900			(void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Please "
1901			    "be sure to invoke %s to make '%s' bootable.\n"),
1902			    BOOTCMD, new_disk);
1903		}
1904#endif	/* sun */
1905		return (0);
1906	}
1907
1908	switch (errno) {
1909	case ENOTSUP:
1910		/*
1911		 * Can't attach to or replace this type of vdev.
1912		 */
1913		if (replacing) {
1914			if (islog)
1915				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1916				    "cannot replace a log with a spare"));
1917			else
1918				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1919				    "cannot replace a replacing device"));
1920		} else {
1921			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1922			    "can only attach to mirrors and top-level "
1923			    "disks"));
1924		}
1925		(void) zfs_error(hdl, EZFS_BADTARGET, msg);
1926		break;
1927
1928	case EINVAL:
1929		/*
1930		 * The new device must be a single disk.
1931		 */
1932		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1933		    "new device must be a single disk"));
1934		(void) zfs_error(hdl, EZFS_INVALCONFIG, msg);
1935		break;
1936
1937	case EBUSY:
1938		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "%s is busy"),
1939		    new_disk);
1940		(void) zfs_error(hdl, EZFS_BADDEV, msg);
1941		break;
1942
1943	case EOVERFLOW:
1944		/*
1945		 * The new device is too small.
1946		 */
1947		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1948		    "device is too small"));
1949		(void) zfs_error(hdl, EZFS_BADDEV, msg);
1950		break;
1951
1952	case EDOM:
1953		/*
1954		 * The new device has a different alignment requirement.
1955		 */
1956		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1957		    "devices have different sector alignment"));
1958		(void) zfs_error(hdl, EZFS_BADDEV, msg);
1959		break;
1960
1961	case ENAMETOOLONG:
1962		/*
1963		 * The resulting top-level vdev spec won't fit in the label.
1964		 */
1965		(void) zfs_error(hdl, EZFS_DEVOVERFLOW, msg);
1966		break;
1967
1968	default:
1969		(void) zpool_standard_error(hdl, errno, msg);
1970	}
1971
1972	return (-1);
1973}
1974
1975/*
1976 * Detach the specified device.
1977 */
1978int
1979zpool_vdev_detach(zpool_handle_t *zhp, const char *path)
1980{
1981	zfs_cmd_t zc = { 0 };
1982	char msg[1024];
1983	nvlist_t *tgt;
1984	boolean_t avail_spare, l2cache;
1985	libzfs_handle_t *hdl = zhp->zpool_hdl;
1986
1987	(void) snprintf(msg, sizeof (msg),
1988	    dgettext(TEXT_DOMAIN, "cannot detach %s"), path);
1989
1990	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1991	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
1992	    NULL)) == 0)
1993		return (zfs_error(hdl, EZFS_NODEVICE, msg));
1994
1995	if (avail_spare)
1996		return (zfs_error(hdl, EZFS_ISSPARE, msg));
1997
1998	if (l2cache)
1999		return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
2000
2001	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2002
2003	if (zfs_ioctl(hdl, ZFS_IOC_VDEV_DETACH, &zc) == 0)
2004		return (0);
2005
2006	switch (errno) {
2007
2008	case ENOTSUP:
2009		/*
2010		 * Can't detach from this type of vdev.
2011		 */
2012		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "only "
2013		    "applicable to mirror and replacing vdevs"));
2014		(void) zfs_error(zhp->zpool_hdl, EZFS_BADTARGET, msg);
2015		break;
2016
2017	case EBUSY:
2018		/*
2019		 * There are no other replicas of this device.
2020		 */
2021		(void) zfs_error(hdl, EZFS_NOREPLICAS, msg);
2022		break;
2023
2024	default:
2025		(void) zpool_standard_error(hdl, errno, msg);
2026	}
2027
2028	return (-1);
2029}
2030
2031/*
2032 * Remove the given device.  Currently, this is supported only for hot spares
2033 * and level 2 cache devices.
2034 */
2035int
2036zpool_vdev_remove(zpool_handle_t *zhp, const char *path)
2037{
2038	zfs_cmd_t zc = { 0 };
2039	char msg[1024];
2040	nvlist_t *tgt;
2041	boolean_t avail_spare, l2cache;
2042	libzfs_handle_t *hdl = zhp->zpool_hdl;
2043
2044	(void) snprintf(msg, sizeof (msg),
2045	    dgettext(TEXT_DOMAIN, "cannot remove %s"), path);
2046
2047	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2048	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
2049	    NULL)) == 0)
2050		return (zfs_error(hdl, EZFS_NODEVICE, msg));
2051
2052	if (!avail_spare && !l2cache) {
2053		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2054		    "only inactive hot spares or cache devices "
2055		    "can be removed"));
2056		return (zfs_error(hdl, EZFS_NODEVICE, msg));
2057	}
2058
2059	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2060
2061	if (zfs_ioctl(hdl, ZFS_IOC_VDEV_REMOVE, &zc) == 0)
2062		return (0);
2063
2064	return (zpool_standard_error(hdl, errno, msg));
2065}
2066
2067/*
2068 * Clear the errors for the pool, or the particular device if specified.
2069 */
2070int
2071zpool_clear(zpool_handle_t *zhp, const char *path)
2072{
2073	zfs_cmd_t zc = { 0 };
2074	char msg[1024];
2075	nvlist_t *tgt;
2076	boolean_t avail_spare, l2cache;
2077	libzfs_handle_t *hdl = zhp->zpool_hdl;
2078
2079	if (path)
2080		(void) snprintf(msg, sizeof (msg),
2081		    dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
2082		    path);
2083	else
2084		(void) snprintf(msg, sizeof (msg),
2085		    dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
2086		    zhp->zpool_name);
2087
2088	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2089	if (path) {
2090		if ((tgt = zpool_find_vdev(zhp, path, &avail_spare,
2091		    &l2cache, NULL)) == 0)
2092			return (zfs_error(hdl, EZFS_NODEVICE, msg));
2093
2094		/*
2095		 * Don't allow error clearing for hot spares.  Do allow
2096		 * error clearing for l2cache devices.
2097		 */
2098		if (avail_spare)
2099			return (zfs_error(hdl, EZFS_ISSPARE, msg));
2100
2101		verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID,
2102		    &zc.zc_guid) == 0);
2103	}
2104
2105	if (zfs_ioctl(hdl, ZFS_IOC_CLEAR, &zc) == 0)
2106		return (0);
2107
2108	return (zpool_standard_error(hdl, errno, msg));
2109}
2110
2111/*
2112 * Similar to zpool_clear(), but takes a GUID (used by fmd).
2113 */
2114int
2115zpool_vdev_clear(zpool_handle_t *zhp, uint64_t guid)
2116{
2117	zfs_cmd_t zc = { 0 };
2118	char msg[1024];
2119	libzfs_handle_t *hdl = zhp->zpool_hdl;
2120
2121	(void) snprintf(msg, sizeof (msg),
2122	    dgettext(TEXT_DOMAIN, "cannot clear errors for %llx"),
2123	    guid);
2124
2125	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2126	zc.zc_guid = guid;
2127
2128	if (ioctl(hdl->libzfs_fd, ZFS_IOC_CLEAR, &zc) == 0)
2129		return (0);
2130
2131	return (zpool_standard_error(hdl, errno, msg));
2132}
2133
2134/*
2135 * Iterate over all zvols in a given pool by walking the /dev/zvol/dsk/<pool>
2136 * hierarchy.
2137 */
2138int
2139zpool_iter_zvol(zpool_handle_t *zhp, int (*cb)(const char *, void *),
2140    void *data)
2141{
2142	libzfs_handle_t *hdl = zhp->zpool_hdl;
2143	char (*paths)[MAXPATHLEN];
2144	char path[MAXPATHLEN];
2145	size_t size = 4;
2146	int curr, fd, base, ret = 0;
2147	DIR *dirp;
2148	struct dirent *dp;
2149	struct stat st;
2150
2151	if ((base = open(ZVOL_FULL_DEV_DIR, O_RDONLY)) < 0)
2152		return (errno == ENOENT ? 0 : -1);
2153
2154	snprintf(path, sizeof(path), "%s/%s", ZVOL_FULL_DEV_DIR,
2155	    zhp->zpool_name);
2156	if (stat(path, &st) != 0) {
2157		int err = errno;
2158		(void) close(base);
2159		return (err == ENOENT ? 0 : -1);
2160	}
2161
2162	/*
2163	 * Oddly this wasn't a directory -- ignore that failure since we
2164	 * know there are no links lower in the (non-existant) hierarchy.
2165	 */
2166	if (!S_ISDIR(st.st_mode)) {
2167		(void) close(base);
2168		return (0);
2169	}
2170
2171	if ((paths = zfs_alloc(hdl, size * sizeof (paths[0]))) == NULL) {
2172		(void) close(base);
2173		return (-1);
2174	}
2175
2176	(void) strlcpy(paths[0], zhp->zpool_name, sizeof (paths[0]));
2177	curr = 0;
2178
2179	while (curr >= 0) {
2180		snprintf(path, sizeof(path), "%s/%s", ZVOL_FULL_DEV_DIR,
2181		    paths[curr]);
2182		if (lstat(path, &st) != 0)
2183			goto err;
2184
2185		if (S_ISDIR(st.st_mode)) {
2186			if ((dirp = opendir(path)) == NULL) {
2187				goto err;
2188			}
2189
2190			while ((dp = readdir(dirp)) != NULL) {
2191				if (dp->d_name[0] == '.')
2192					continue;
2193
2194				if (curr + 1 == size) {
2195					paths = zfs_realloc(hdl, paths,
2196					    size * sizeof (paths[0]),
2197					    size * 2 * sizeof (paths[0]));
2198					if (paths == NULL) {
2199						(void) closedir(dirp);
2200						goto err;
2201					}
2202
2203					size *= 2;
2204				}
2205
2206				(void) strlcpy(paths[curr + 1], paths[curr],
2207				    sizeof (paths[curr + 1]));
2208				(void) strlcat(paths[curr], "/",
2209				    sizeof (paths[curr]));
2210				(void) strlcat(paths[curr], dp->d_name,
2211				    sizeof (paths[curr]));
2212				curr++;
2213			}
2214
2215			(void) closedir(dirp);
2216
2217		} else {
2218			if ((ret = cb(paths[curr], data)) != 0)
2219				break;
2220		}
2221
2222		curr--;
2223	}
2224
2225	free(paths);
2226	(void) close(base);
2227
2228	return (ret);
2229
2230err:
2231	free(paths);
2232	(void) close(base);
2233	return (-1);
2234}
2235
2236typedef struct zvol_cb {
2237	zpool_handle_t *zcb_pool;
2238	boolean_t zcb_create;
2239} zvol_cb_t;
2240
2241/*ARGSUSED*/
2242static int
2243do_zvol_create(zfs_handle_t *zhp, void *data)
2244{
2245	int ret = 0;
2246
2247	if (ZFS_IS_VOLUME(zhp)) {
2248		(void) zvol_create_link(zhp->zfs_hdl, zhp->zfs_name);
2249		ret = zfs_iter_snapshots(zhp, do_zvol_create, NULL);
2250	}
2251
2252	if (ret == 0)
2253		ret = zfs_iter_filesystems(zhp, do_zvol_create, NULL);
2254
2255	zfs_close(zhp);
2256
2257	return (ret);
2258}
2259
2260/*
2261 * Iterate over all zvols in the pool and make any necessary minor nodes.
2262 */
2263int
2264zpool_create_zvol_links(zpool_handle_t *zhp)
2265{
2266	zfs_handle_t *zfp;
2267	int ret;
2268
2269	/*
2270	 * If the pool is unavailable, just return success.
2271	 */
2272	if ((zfp = make_dataset_handle(zhp->zpool_hdl,
2273	    zhp->zpool_name)) == NULL)
2274		return (0);
2275
2276	ret = zfs_iter_filesystems(zfp, do_zvol_create, NULL);
2277
2278	zfs_close(zfp);
2279	return (ret);
2280}
2281
2282static int
2283do_zvol_remove(const char *dataset, void *data)
2284{
2285	zpool_handle_t *zhp = data;
2286
2287	return (zvol_remove_link(zhp->zpool_hdl, dataset));
2288}
2289
2290/*
2291 * Iterate over all zvols in the pool and remove any minor nodes.  We iterate
2292 * by examining the /dev links so that a corrupted pool doesn't impede this
2293 * operation.
2294 */
2295int
2296zpool_remove_zvol_links(zpool_handle_t *zhp)
2297{
2298	return (zpool_iter_zvol(zhp, do_zvol_remove, zhp));
2299}
2300
2301/*
2302 * Convert from a devid string to a path.
2303 */
2304static char *
2305devid_to_path(char *devid_str)
2306{
2307	ddi_devid_t devid;
2308	char *minor;
2309	char *path;
2310	devid_nmlist_t *list = NULL;
2311	int ret;
2312
2313	if (devid_str_decode(devid_str, &devid, &minor) != 0)
2314		return (NULL);
2315
2316	ret = devid_deviceid_to_nmlist("/dev", devid, minor, &list);
2317
2318	devid_str_free(minor);
2319	devid_free(devid);
2320
2321	if (ret != 0)
2322		return (NULL);
2323
2324	if ((path = strdup(list[0].devname)) == NULL)
2325		return (NULL);
2326
2327	devid_free_nmlist(list);
2328
2329	return (path);
2330}
2331
2332/*
2333 * Convert from a path to a devid string.
2334 */
2335static char *
2336path_to_devid(const char *path)
2337{
2338	int fd;
2339	ddi_devid_t devid;
2340	char *minor, *ret;
2341
2342	if ((fd = open(path, O_RDONLY)) < 0)
2343		return (NULL);
2344
2345	minor = NULL;
2346	ret = NULL;
2347	if (devid_get(fd, &devid) == 0) {
2348		if (devid_get_minor_name(fd, &minor) == 0)
2349			ret = devid_str_encode(devid, minor);
2350		if (minor != NULL)
2351			devid_str_free(minor);
2352		devid_free(devid);
2353	}
2354	(void) close(fd);
2355
2356	return (ret);
2357}
2358
2359/*
2360 * Issue the necessary ioctl() to update the stored path value for the vdev.  We
2361 * ignore any failure here, since a common case is for an unprivileged user to
2362 * type 'zpool status', and we'll display the correct information anyway.
2363 */
2364static void
2365set_path(zpool_handle_t *zhp, nvlist_t *nv, const char *path)
2366{
2367	zfs_cmd_t zc = { 0 };
2368
2369	(void) strncpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2370	(void) strncpy(zc.zc_value, path, sizeof (zc.zc_value));
2371	verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
2372	    &zc.zc_guid) == 0);
2373
2374	(void) ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SETPATH, &zc);
2375}
2376
2377/*
2378 * Given a vdev, return the name to display in iostat.  If the vdev has a path,
2379 * we use that, stripping off any leading "/dev/dsk/"; if not, we use the type.
2380 * We also check if this is a whole disk, in which case we strip off the
2381 * trailing 's0' slice name.
2382 *
2383 * This routine is also responsible for identifying when disks have been
2384 * reconfigured in a new location.  The kernel will have opened the device by
2385 * devid, but the path will still refer to the old location.  To catch this, we
2386 * first do a path -> devid translation (which is fast for the common case).  If
2387 * the devid matches, we're done.  If not, we do a reverse devid -> path
2388 * translation and issue the appropriate ioctl() to update the path of the vdev.
2389 * If 'zhp' is NULL, then this is an exported pool, and we don't need to do any
2390 * of these checks.
2391 */
2392char *
2393zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv)
2394{
2395	char *path, *devid;
2396	uint64_t value;
2397	char buf[64];
2398	vdev_stat_t *vs;
2399	uint_t vsc;
2400
2401	if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT,
2402	    &value) == 0) {
2403		verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
2404		    &value) == 0);
2405		(void) snprintf(buf, sizeof (buf), "%llu",
2406		    (u_longlong_t)value);
2407		path = buf;
2408	} else if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) {
2409
2410		/*
2411		 * If the device is dead (faulted, offline, etc) then don't
2412		 * bother opening it.  Otherwise we may be forcing the user to
2413		 * open a misbehaving device, which can have undesirable
2414		 * effects.
2415		 */
2416		if ((nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_STATS,
2417		    (uint64_t **)&vs, &vsc) != 0 ||
2418		    vs->vs_state >= VDEV_STATE_DEGRADED) &&
2419		    zhp != NULL &&
2420		    nvlist_lookup_string(nv, ZPOOL_CONFIG_DEVID, &devid) == 0) {
2421			/*
2422			 * Determine if the current path is correct.
2423			 */
2424			char *newdevid = path_to_devid(path);
2425
2426			if (newdevid == NULL ||
2427			    strcmp(devid, newdevid) != 0) {
2428				char *newpath;
2429
2430				if ((newpath = devid_to_path(devid)) != NULL) {
2431					/*
2432					 * Update the path appropriately.
2433					 */
2434					set_path(zhp, nv, newpath);
2435					if (nvlist_add_string(nv,
2436					    ZPOOL_CONFIG_PATH, newpath) == 0)
2437						verify(nvlist_lookup_string(nv,
2438						    ZPOOL_CONFIG_PATH,
2439						    &path) == 0);
2440					free(newpath);
2441				}
2442			}
2443
2444			if (newdevid)
2445				devid_str_free(newdevid);
2446		}
2447
2448		if (strncmp(path, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0)
2449			path += sizeof(_PATH_DEV) - 1;
2450
2451		if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
2452		    &value) == 0 && value) {
2453			char *tmp = zfs_strdup(hdl, path);
2454			if (tmp == NULL)
2455				return (NULL);
2456			tmp[strlen(path) - 2] = '\0';
2457			return (tmp);
2458		}
2459	} else {
2460		verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &path) == 0);
2461
2462		/*
2463		 * If it's a raidz device, we need to stick in the parity level.
2464		 */
2465		if (strcmp(path, VDEV_TYPE_RAIDZ) == 0) {
2466			verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY,
2467			    &value) == 0);
2468			(void) snprintf(buf, sizeof (buf), "%s%llu", path,
2469			    (u_longlong_t)value);
2470			path = buf;
2471		}
2472	}
2473
2474	return (zfs_strdup(hdl, path));
2475}
2476
2477static int
2478zbookmark_compare(const void *a, const void *b)
2479{
2480	return (memcmp(a, b, sizeof (zbookmark_t)));
2481}
2482
2483/*
2484 * Retrieve the persistent error log, uniquify the members, and return to the
2485 * caller.
2486 */
2487int
2488zpool_get_errlog(zpool_handle_t *zhp, nvlist_t **nverrlistp)
2489{
2490	zfs_cmd_t zc = { 0 };
2491	uint64_t count;
2492	zbookmark_t *zb = NULL;
2493	int i;
2494
2495	/*
2496	 * Retrieve the raw error list from the kernel.  If the number of errors
2497	 * has increased, allocate more space and continue until we get the
2498	 * entire list.
2499	 */
2500	verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_ERRCOUNT,
2501	    &count) == 0);
2502	if (count == 0)
2503		return (0);
2504	if ((zc.zc_nvlist_dst = (uintptr_t)zfs_alloc(zhp->zpool_hdl,
2505	    count * sizeof (zbookmark_t))) == (uintptr_t)NULL)
2506		return (-1);
2507	zc.zc_nvlist_dst_size = count;
2508	(void) strcpy(zc.zc_name, zhp->zpool_name);
2509	for (;;) {
2510		if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_ERROR_LOG,
2511		    &zc) != 0) {
2512			free((void *)(uintptr_t)zc.zc_nvlist_dst);
2513			if (errno == ENOMEM) {
2514				count = zc.zc_nvlist_dst_size;
2515				if ((zc.zc_nvlist_dst = (uintptr_t)
2516				    zfs_alloc(zhp->zpool_hdl, count *
2517				    sizeof (zbookmark_t))) == (uintptr_t)NULL)
2518					return (-1);
2519			} else {
2520				return (-1);
2521			}
2522		} else {
2523			break;
2524		}
2525	}
2526
2527	/*
2528	 * Sort the resulting bookmarks.  This is a little confusing due to the
2529	 * implementation of ZFS_IOC_ERROR_LOG.  The bookmarks are copied last
2530	 * to first, and 'zc_nvlist_dst_size' indicates the number of boomarks
2531	 * _not_ copied as part of the process.  So we point the start of our
2532	 * array appropriate and decrement the total number of elements.
2533	 */
2534	zb = ((zbookmark_t *)(uintptr_t)zc.zc_nvlist_dst) +
2535	    zc.zc_nvlist_dst_size;
2536	count -= zc.zc_nvlist_dst_size;
2537
2538	qsort(zb, count, sizeof (zbookmark_t), zbookmark_compare);
2539
2540	verify(nvlist_alloc(nverrlistp, 0, KM_SLEEP) == 0);
2541
2542	/*
2543	 * Fill in the nverrlistp with nvlist's of dataset and object numbers.
2544	 */
2545	for (i = 0; i < count; i++) {
2546		nvlist_t *nv;
2547
2548		/* ignoring zb_blkid and zb_level for now */
2549		if (i > 0 && zb[i-1].zb_objset == zb[i].zb_objset &&
2550		    zb[i-1].zb_object == zb[i].zb_object)
2551			continue;
2552
2553		if (nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) != 0)
2554			goto nomem;
2555		if (nvlist_add_uint64(nv, ZPOOL_ERR_DATASET,
2556		    zb[i].zb_objset) != 0) {
2557			nvlist_free(nv);
2558			goto nomem;
2559		}
2560		if (nvlist_add_uint64(nv, ZPOOL_ERR_OBJECT,
2561		    zb[i].zb_object) != 0) {
2562			nvlist_free(nv);
2563			goto nomem;
2564		}
2565		if (nvlist_add_nvlist(*nverrlistp, "ejk", nv) != 0) {
2566			nvlist_free(nv);
2567			goto nomem;
2568		}
2569		nvlist_free(nv);
2570	}
2571
2572	free((void *)(uintptr_t)zc.zc_nvlist_dst);
2573	return (0);
2574
2575nomem:
2576	free((void *)(uintptr_t)zc.zc_nvlist_dst);
2577	return (no_memory(zhp->zpool_hdl));
2578}
2579
2580/*
2581 * Upgrade a ZFS pool to the latest on-disk version.
2582 */
2583int
2584zpool_upgrade(zpool_handle_t *zhp, uint64_t new_version)
2585{
2586	zfs_cmd_t zc = { 0 };
2587	libzfs_handle_t *hdl = zhp->zpool_hdl;
2588
2589	(void) strcpy(zc.zc_name, zhp->zpool_name);
2590	zc.zc_cookie = new_version;
2591
2592	if (zfs_ioctl(hdl, ZFS_IOC_POOL_UPGRADE, &zc) != 0)
2593		return (zpool_standard_error_fmt(hdl, errno,
2594		    dgettext(TEXT_DOMAIN, "cannot upgrade '%s'"),
2595		    zhp->zpool_name));
2596	return (0);
2597}
2598
2599void
2600zpool_set_history_str(const char *subcommand, int argc, char **argv,
2601    char *history_str)
2602{
2603	int i;
2604
2605	(void) strlcpy(history_str, subcommand, HIS_MAX_RECORD_LEN);
2606	for (i = 1; i < argc; i++) {
2607		if (strlen(history_str) + 1 + strlen(argv[i]) >
2608		    HIS_MAX_RECORD_LEN)
2609			break;
2610		(void) strlcat(history_str, " ", HIS_MAX_RECORD_LEN);
2611		(void) strlcat(history_str, argv[i], HIS_MAX_RECORD_LEN);
2612	}
2613}
2614
2615/*
2616 * Stage command history for logging.
2617 */
2618int
2619zpool_stage_history(libzfs_handle_t *hdl, const char *history_str)
2620{
2621	if (history_str == NULL)
2622		return (EINVAL);
2623
2624	if (strlen(history_str) > HIS_MAX_RECORD_LEN)
2625		return (EINVAL);
2626
2627	if (hdl->libzfs_log_str != NULL)
2628		free(hdl->libzfs_log_str);
2629
2630	if ((hdl->libzfs_log_str = strdup(history_str)) == NULL)
2631		return (no_memory(hdl));
2632
2633	return (0);
2634}
2635
2636/*
2637 * Perform ioctl to get some command history of a pool.
2638 *
2639 * 'buf' is the buffer to fill up to 'len' bytes.  'off' is the
2640 * logical offset of the history buffer to start reading from.
2641 *
2642 * Upon return, 'off' is the next logical offset to read from and
2643 * 'len' is the actual amount of bytes read into 'buf'.
2644 */
2645static int
2646get_history(zpool_handle_t *zhp, char *buf, uint64_t *off, uint64_t *len)
2647{
2648	zfs_cmd_t zc = { 0 };
2649	libzfs_handle_t *hdl = zhp->zpool_hdl;
2650
2651	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2652
2653	zc.zc_history = (uint64_t)(uintptr_t)buf;
2654	zc.zc_history_len = *len;
2655	zc.zc_history_offset = *off;
2656
2657	if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_HISTORY, &zc) != 0) {
2658		switch (errno) {
2659		case EPERM:
2660			return (zfs_error_fmt(hdl, EZFS_PERM,
2661			    dgettext(TEXT_DOMAIN,
2662			    "cannot show history for pool '%s'"),
2663			    zhp->zpool_name));
2664		case ENOENT:
2665			return (zfs_error_fmt(hdl, EZFS_NOHISTORY,
2666			    dgettext(TEXT_DOMAIN, "cannot get history for pool "
2667			    "'%s'"), zhp->zpool_name));
2668		case ENOTSUP:
2669			return (zfs_error_fmt(hdl, EZFS_BADVERSION,
2670			    dgettext(TEXT_DOMAIN, "cannot get history for pool "
2671			    "'%s', pool must be upgraded"), zhp->zpool_name));
2672		default:
2673			return (zpool_standard_error_fmt(hdl, errno,
2674			    dgettext(TEXT_DOMAIN,
2675			    "cannot get history for '%s'"), zhp->zpool_name));
2676		}
2677	}
2678
2679	*len = zc.zc_history_len;
2680	*off = zc.zc_history_offset;
2681
2682	return (0);
2683}
2684
2685/*
2686 * Process the buffer of nvlists, unpacking and storing each nvlist record
2687 * into 'records'.  'leftover' is set to the number of bytes that weren't
2688 * processed as there wasn't a complete record.
2689 */
2690static int
2691zpool_history_unpack(char *buf, uint64_t bytes_read, uint64_t *leftover,
2692    nvlist_t ***records, uint_t *numrecords)
2693{
2694	uint64_t reclen;
2695	nvlist_t *nv;
2696	int i;
2697
2698	while (bytes_read > sizeof (reclen)) {
2699
2700		/* get length of packed record (stored as little endian) */
2701		for (i = 0, reclen = 0; i < sizeof (reclen); i++)
2702			reclen += (uint64_t)(((uchar_t *)buf)[i]) << (8*i);
2703
2704		if (bytes_read < sizeof (reclen) + reclen)
2705			break;
2706
2707		/* unpack record */
2708		if (nvlist_unpack(buf + sizeof (reclen), reclen, &nv, 0) != 0)
2709			return (ENOMEM);
2710		bytes_read -= sizeof (reclen) + reclen;
2711		buf += sizeof (reclen) + reclen;
2712
2713		/* add record to nvlist array */
2714		(*numrecords)++;
2715		if (ISP2(*numrecords + 1)) {
2716			*records = realloc(*records,
2717			    *numrecords * 2 * sizeof (nvlist_t *));
2718		}
2719		(*records)[*numrecords - 1] = nv;
2720	}
2721
2722	*leftover = bytes_read;
2723	return (0);
2724}
2725
2726#define	HIS_BUF_LEN	(128*1024)
2727
2728/*
2729 * Retrieve the command history of a pool.
2730 */
2731int
2732zpool_get_history(zpool_handle_t *zhp, nvlist_t **nvhisp)
2733{
2734	char buf[HIS_BUF_LEN];
2735	uint64_t off = 0;
2736	nvlist_t **records = NULL;
2737	uint_t numrecords = 0;
2738	int err, i;
2739
2740	do {
2741		uint64_t bytes_read = sizeof (buf);
2742		uint64_t leftover;
2743
2744		if ((err = get_history(zhp, buf, &off, &bytes_read)) != 0)
2745			break;
2746
2747		/* if nothing else was read in, we're at EOF, just return */
2748		if (!bytes_read)
2749			break;
2750
2751		if ((err = zpool_history_unpack(buf, bytes_read,
2752		    &leftover, &records, &numrecords)) != 0)
2753			break;
2754		off -= leftover;
2755
2756		/* CONSTCOND */
2757	} while (1);
2758
2759	if (!err) {
2760		verify(nvlist_alloc(nvhisp, NV_UNIQUE_NAME, 0) == 0);
2761		verify(nvlist_add_nvlist_array(*nvhisp, ZPOOL_HIST_RECORD,
2762		    records, numrecords) == 0);
2763	}
2764	for (i = 0; i < numrecords; i++)
2765		nvlist_free(records[i]);
2766	free(records);
2767
2768	return (err);
2769}
2770
2771void
2772zpool_obj_to_path(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj,
2773    char *pathname, size_t len)
2774{
2775	zfs_cmd_t zc = { 0 };
2776	boolean_t mounted = B_FALSE;
2777	char *mntpnt = NULL;
2778	char dsname[MAXNAMELEN];
2779
2780	if (dsobj == 0) {
2781		/* special case for the MOS */
2782		(void) snprintf(pathname, len, "<metadata>:<0x%llx>", obj);
2783		return;
2784	}
2785
2786	/* get the dataset's name */
2787	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2788	zc.zc_obj = dsobj;
2789	if (ioctl(zhp->zpool_hdl->libzfs_fd,
2790	    ZFS_IOC_DSOBJ_TO_DSNAME, &zc) != 0) {
2791		/* just write out a path of two object numbers */
2792		(void) snprintf(pathname, len, "<0x%llx>:<0x%llx>",
2793		    dsobj, obj);
2794		return;
2795	}
2796	(void) strlcpy(dsname, zc.zc_value, sizeof (dsname));
2797
2798	/* find out if the dataset is mounted */
2799	mounted = is_mounted(zhp->zpool_hdl, dsname, &mntpnt);
2800
2801	/* get the corrupted object's path */
2802	(void) strlcpy(zc.zc_name, dsname, sizeof (zc.zc_name));
2803	zc.zc_obj = obj;
2804	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_OBJ_TO_PATH,
2805	    &zc) == 0) {
2806		if (mounted) {
2807			(void) snprintf(pathname, len, "%s%s", mntpnt,
2808			    zc.zc_value);
2809		} else {
2810			(void) snprintf(pathname, len, "%s:%s",
2811			    dsname, zc.zc_value);
2812		}
2813	} else {
2814		(void) snprintf(pathname, len, "%s:<0x%llx>", dsname, obj);
2815	}
2816	free(mntpnt);
2817}
2818
2819#define	RDISK_ROOT	"/dev/rdsk"
2820#define	BACKUP_SLICE	"s2"
2821/*
2822 * Don't start the slice at the default block of 34; many storage
2823 * devices will use a stripe width of 128k, so start there instead.
2824 */
2825#define	NEW_START_BLOCK	256
2826
2827#if defined(sun)
2828/*
2829 * Read the EFI label from the config, if a label does not exist then
2830 * pass back the error to the caller. If the caller has passed a non-NULL
2831 * diskaddr argument then we set it to the starting address of the EFI
2832 * partition.
2833 */
2834static int
2835read_efi_label(nvlist_t *config, diskaddr_t *sb)
2836{
2837	char *path;
2838	int fd;
2839	char diskname[MAXPATHLEN];
2840	int err = -1;
2841
2842	if (nvlist_lookup_string(config, ZPOOL_CONFIG_PATH, &path) != 0)
2843		return (err);
2844
2845	(void) snprintf(diskname, sizeof (diskname), "%s%s", RDISK_ROOT,
2846	    strrchr(path, '/'));
2847	if ((fd = open(diskname, O_RDONLY|O_NDELAY)) >= 0) {
2848		struct dk_gpt *vtoc;
2849
2850		if ((err = efi_alloc_and_read(fd, &vtoc)) >= 0) {
2851			if (sb != NULL)
2852				*sb = vtoc->efi_parts[0].p_start;
2853			efi_free(vtoc);
2854		}
2855		(void) close(fd);
2856	}
2857	return (err);
2858}
2859
2860/*
2861 * determine where a partition starts on a disk in the current
2862 * configuration
2863 */
2864static diskaddr_t
2865find_start_block(nvlist_t *config)
2866{
2867	nvlist_t **child;
2868	uint_t c, children;
2869	diskaddr_t sb = MAXOFFSET_T;
2870	uint64_t wholedisk;
2871
2872	if (nvlist_lookup_nvlist_array(config,
2873	    ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) {
2874		if (nvlist_lookup_uint64(config,
2875		    ZPOOL_CONFIG_WHOLE_DISK,
2876		    &wholedisk) != 0 || !wholedisk) {
2877			return (MAXOFFSET_T);
2878		}
2879		if (read_efi_label(config, &sb) < 0)
2880			sb = MAXOFFSET_T;
2881		return (sb);
2882	}
2883
2884	for (c = 0; c < children; c++) {
2885		sb = find_start_block(child[c]);
2886		if (sb != MAXOFFSET_T) {
2887			return (sb);
2888		}
2889	}
2890	return (MAXOFFSET_T);
2891}
2892#endif /* sun */
2893
2894/*
2895 * Label an individual disk.  The name provided is the short name,
2896 * stripped of any leading /dev path.
2897 */
2898int
2899zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name)
2900{
2901#if defined(sun)
2902	char path[MAXPATHLEN];
2903	struct dk_gpt *vtoc;
2904	int fd;
2905	size_t resv = EFI_MIN_RESV_SIZE;
2906	uint64_t slice_size;
2907	diskaddr_t start_block;
2908	char errbuf[1024];
2909
2910	/* prepare an error message just in case */
2911	(void) snprintf(errbuf, sizeof (errbuf),
2912	    dgettext(TEXT_DOMAIN, "cannot label '%s'"), name);
2913
2914	if (zhp) {
2915		nvlist_t *nvroot;
2916
2917		if (pool_is_bootable(zhp)) {
2918			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2919			    "EFI labeled devices are not supported on root "
2920			    "pools."));
2921			return (zfs_error(hdl, EZFS_POOL_NOTSUP, errbuf));
2922		}
2923
2924		verify(nvlist_lookup_nvlist(zhp->zpool_config,
2925		    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
2926
2927		if (zhp->zpool_start_block == 0)
2928			start_block = find_start_block(nvroot);
2929		else
2930			start_block = zhp->zpool_start_block;
2931		zhp->zpool_start_block = start_block;
2932	} else {
2933		/* new pool */
2934		start_block = NEW_START_BLOCK;
2935	}
2936
2937	(void) snprintf(path, sizeof (path), "%s/%s%s", RDISK_ROOT, name,
2938	    BACKUP_SLICE);
2939
2940	if ((fd = open(path, O_RDWR | O_NDELAY)) < 0) {
2941		/*
2942		 * This shouldn't happen.  We've long since verified that this
2943		 * is a valid device.
2944		 */
2945		zfs_error_aux(hdl,
2946		    dgettext(TEXT_DOMAIN, "unable to open device"));
2947		return (zfs_error(hdl, EZFS_OPENFAILED, errbuf));
2948	}
2949
2950	if (efi_alloc_and_init(fd, EFI_NUMPAR, &vtoc) != 0) {
2951		/*
2952		 * The only way this can fail is if we run out of memory, or we
2953		 * were unable to read the disk's capacity
2954		 */
2955		if (errno == ENOMEM)
2956			(void) no_memory(hdl);
2957
2958		(void) close(fd);
2959		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2960		    "unable to read disk capacity"), name);
2961
2962		return (zfs_error(hdl, EZFS_NOCAP, errbuf));
2963	}
2964
2965	slice_size = vtoc->efi_last_u_lba + 1;
2966	slice_size -= EFI_MIN_RESV_SIZE;
2967	if (start_block == MAXOFFSET_T)
2968		start_block = NEW_START_BLOCK;
2969	slice_size -= start_block;
2970
2971	vtoc->efi_parts[0].p_start = start_block;
2972	vtoc->efi_parts[0].p_size = slice_size;
2973
2974	/*
2975	 * Why we use V_USR: V_BACKUP confuses users, and is considered
2976	 * disposable by some EFI utilities (since EFI doesn't have a backup
2977	 * slice).  V_UNASSIGNED is supposed to be used only for zero size
2978	 * partitions, and efi_write() will fail if we use it.  V_ROOT, V_BOOT,
2979	 * etc. were all pretty specific.  V_USR is as close to reality as we
2980	 * can get, in the absence of V_OTHER.
2981	 */
2982	vtoc->efi_parts[0].p_tag = V_USR;
2983	(void) strcpy(vtoc->efi_parts[0].p_name, "zfs");
2984
2985	vtoc->efi_parts[8].p_start = slice_size + start_block;
2986	vtoc->efi_parts[8].p_size = resv;
2987	vtoc->efi_parts[8].p_tag = V_RESERVED;
2988
2989	if (efi_write(fd, vtoc) != 0) {
2990		/*
2991		 * Some block drivers (like pcata) may not support EFI
2992		 * GPT labels.  Print out a helpful error message dir-
2993		 * ecting the user to manually label the disk and give
2994		 * a specific slice.
2995		 */
2996		(void) close(fd);
2997		efi_free(vtoc);
2998
2999		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3000		    "try using fdisk(1M) and then provide a specific slice"));
3001		return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
3002	}
3003
3004	(void) close(fd);
3005	efi_free(vtoc);
3006#endif /* sun */
3007	return (0);
3008}
3009
3010static boolean_t
3011supported_dump_vdev_type(libzfs_handle_t *hdl, nvlist_t *config, char *errbuf)
3012{
3013	char *type;
3014	nvlist_t **child;
3015	uint_t children, c;
3016
3017	verify(nvlist_lookup_string(config, ZPOOL_CONFIG_TYPE, &type) == 0);
3018	if (strcmp(type, VDEV_TYPE_RAIDZ) == 0 ||
3019	    strcmp(type, VDEV_TYPE_FILE) == 0 ||
3020	    strcmp(type, VDEV_TYPE_LOG) == 0 ||
3021	    strcmp(type, VDEV_TYPE_MISSING) == 0) {
3022		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3023		    "vdev type '%s' is not supported"), type);
3024		(void) zfs_error(hdl, EZFS_VDEVNOTSUP, errbuf);
3025		return (B_FALSE);
3026	}
3027	if (nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_CHILDREN,
3028	    &child, &children) == 0) {
3029		for (c = 0; c < children; c++) {
3030			if (!supported_dump_vdev_type(hdl, child[c], errbuf))
3031				return (B_FALSE);
3032		}
3033	}
3034	return (B_TRUE);
3035}
3036
3037/*
3038 * check if this zvol is allowable for use as a dump device; zero if
3039 * it is, > 0 if it isn't, < 0 if it isn't a zvol
3040 */
3041int
3042zvol_check_dump_config(char *arg)
3043{
3044	zpool_handle_t *zhp = NULL;
3045	nvlist_t *config, *nvroot;
3046	char *p, *volname;
3047	nvlist_t **top;
3048	uint_t toplevels;
3049	libzfs_handle_t *hdl;
3050	char errbuf[1024];
3051	char poolname[ZPOOL_MAXNAMELEN];
3052	int pathlen = strlen(ZVOL_FULL_DEV_DIR);
3053	int ret = 1;
3054
3055	if (strncmp(arg, ZVOL_FULL_DEV_DIR, pathlen)) {
3056		return (-1);
3057	}
3058
3059	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
3060	    "dump is not supported on device '%s'"), arg);
3061
3062	if ((hdl = libzfs_init()) == NULL)
3063		return (1);
3064	libzfs_print_on_error(hdl, B_TRUE);
3065
3066	volname = arg + pathlen;
3067
3068	/* check the configuration of the pool */
3069	if ((p = strchr(volname, '/')) == NULL) {
3070		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3071		    "malformed dataset name"));
3072		(void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
3073		return (1);
3074	} else if (p - volname >= ZFS_MAXNAMELEN) {
3075		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3076		    "dataset name is too long"));
3077		(void) zfs_error(hdl, EZFS_NAMETOOLONG, errbuf);
3078		return (1);
3079	} else {
3080		(void) strncpy(poolname, volname, p - volname);
3081		poolname[p - volname] = '\0';
3082	}
3083
3084	if ((zhp = zpool_open(hdl, poolname)) == NULL) {
3085		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3086		    "could not open pool '%s'"), poolname);
3087		(void) zfs_error(hdl, EZFS_OPENFAILED, errbuf);
3088		goto out;
3089	}
3090	config = zpool_get_config(zhp, NULL);
3091	if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
3092	    &nvroot) != 0) {
3093		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3094		    "could not obtain vdev configuration for  '%s'"), poolname);
3095		(void) zfs_error(hdl, EZFS_INVALCONFIG, errbuf);
3096		goto out;
3097	}
3098
3099	verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
3100	    &top, &toplevels) == 0);
3101	if (toplevels != 1) {
3102		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3103		    "'%s' has multiple top level vdevs"), poolname);
3104		(void) zfs_error(hdl, EZFS_DEVOVERFLOW, errbuf);
3105		goto out;
3106	}
3107
3108	if (!supported_dump_vdev_type(hdl, top[0], errbuf)) {
3109		goto out;
3110	}
3111	ret = 0;
3112
3113out:
3114	if (zhp)
3115		zpool_close(zhp);
3116	libzfs_fini(hdl);
3117	return (ret);
3118}
3119