Deleted Added
full compact
libzfs_import.c (224171) libzfs_import.c (228103)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24 * Copyright (c) 2011 by Delphix. All rights reserved.
23 */
24
25/*
26 * Pool import support functions.
27 *
28 * To import a pool, we rely on reading the configuration information from the
29 * ZFS label of each device. If we successfully read the label, then we
30 * organize the configuration information in the following hierarchy:
31 *
32 * pool guid -> toplevel vdev guid -> label txg
33 *
34 * Duplicate entries matching this same tuple will be discarded. Once we have
35 * examined every device, we pick the best label txg config for each toplevel
36 * vdev. We then arrange these toplevel vdevs into a complete pool config, and
37 * update any paths that have changed. Finally, we attempt to import the pool
38 * using our derived config, and record the results.
39 */
40
41#include <ctype.h>
42#include <devid.h>
43#include <dirent.h>
44#include <errno.h>
45#include <libintl.h>
46#include <stddef.h>
47#include <stdlib.h>
48#include <string.h>
49#include <sys/stat.h>
50#include <unistd.h>
51#include <fcntl.h>
52#include <thread_pool.h>
53#include <libgeom.h>
54
55#include <sys/vdev_impl.h>
56
57#include "libzfs.h"
58#include "libzfs_impl.h"
59
60/*
61 * Intermediate structures used to gather configuration information.
62 */
63typedef struct config_entry {
64 uint64_t ce_txg;
65 nvlist_t *ce_config;
66 struct config_entry *ce_next;
67} config_entry_t;
68
69typedef struct vdev_entry {
70 uint64_t ve_guid;
71 config_entry_t *ve_configs;
72 struct vdev_entry *ve_next;
73} vdev_entry_t;
74
75typedef struct pool_entry {
76 uint64_t pe_guid;
77 vdev_entry_t *pe_vdevs;
78 struct pool_entry *pe_next;
79} pool_entry_t;
80
81typedef struct name_entry {
82 char *ne_name;
83 uint64_t ne_guid;
84 struct name_entry *ne_next;
85} name_entry_t;
86
87typedef struct pool_list {
88 pool_entry_t *pools;
89 name_entry_t *names;
90} pool_list_t;
91
92static char *
93get_devid(const char *path)
94{
95 int fd;
96 ddi_devid_t devid;
97 char *minor, *ret;
98
99 if ((fd = open(path, O_RDONLY)) < 0)
100 return (NULL);
101
102 minor = NULL;
103 ret = NULL;
104 if (devid_get(fd, &devid) == 0) {
105 if (devid_get_minor_name(fd, &minor) == 0)
106 ret = devid_str_encode(devid, minor);
107 if (minor != NULL)
108 devid_str_free(minor);
109 devid_free(devid);
110 }
111 (void) close(fd);
112
113 return (ret);
114}
115
116
117/*
118 * Go through and fix up any path and/or devid information for the given vdev
119 * configuration.
120 */
121static int
122fix_paths(nvlist_t *nv, name_entry_t *names)
123{
124 nvlist_t **child;
125 uint_t c, children;
126 uint64_t guid;
127 name_entry_t *ne, *best;
128 char *path, *devid;
129 int matched;
130
131 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
132 &child, &children) == 0) {
133 for (c = 0; c < children; c++)
134 if (fix_paths(child[c], names) != 0)
135 return (-1);
136 return (0);
137 }
138
139 /*
140 * This is a leaf (file or disk) vdev. In either case, go through
141 * the name list and see if we find a matching guid. If so, replace
142 * the path and see if we can calculate a new devid.
143 *
144 * There may be multiple names associated with a particular guid, in
145 * which case we have overlapping slices or multiple paths to the same
146 * disk. If this is the case, then we want to pick the path that is
147 * the most similar to the original, where "most similar" is the number
148 * of matching characters starting from the end of the path. This will
149 * preserve slice numbers even if the disks have been reorganized, and
150 * will also catch preferred disk names if multiple paths exist.
151 */
152 verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0);
153 if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0)
154 path = NULL;
155
156 matched = 0;
157 best = NULL;
158 for (ne = names; ne != NULL; ne = ne->ne_next) {
159 if (ne->ne_guid == guid) {
160 const char *src, *dst;
161 int count;
162
163 if (path == NULL) {
164 best = ne;
165 break;
166 }
167
168 src = ne->ne_name + strlen(ne->ne_name) - 1;
169 dst = path + strlen(path) - 1;
170 for (count = 0; src >= ne->ne_name && dst >= path;
171 src--, dst--, count++)
172 if (*src != *dst)
173 break;
174
175 /*
176 * At this point, 'count' is the number of characters
177 * matched from the end.
178 */
179 if (count > matched || best == NULL) {
180 best = ne;
181 matched = count;
182 }
183 }
184 }
185
186 if (best == NULL)
187 return (0);
188
189 if (nvlist_add_string(nv, ZPOOL_CONFIG_PATH, best->ne_name) != 0)
190 return (-1);
191
192 if ((devid = get_devid(best->ne_name)) == NULL) {
193 (void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID);
194 } else {
195 if (nvlist_add_string(nv, ZPOOL_CONFIG_DEVID, devid) != 0)
196 return (-1);
197 devid_str_free(devid);
198 }
199
200 return (0);
201}
202
203/*
204 * Add the given configuration to the list of known devices.
205 */
206static int
207add_config(libzfs_handle_t *hdl, pool_list_t *pl, const char *path,
208 nvlist_t *config)
209{
210 uint64_t pool_guid, vdev_guid, top_guid, txg, state;
211 pool_entry_t *pe;
212 vdev_entry_t *ve;
213 config_entry_t *ce;
214 name_entry_t *ne;
215
216 /*
217 * If this is a hot spare not currently in use or level 2 cache
218 * device, add it to the list of names to translate, but don't do
219 * anything else.
220 */
221 if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
222 &state) == 0 &&
223 (state == POOL_STATE_SPARE || state == POOL_STATE_L2CACHE) &&
224 nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid) == 0) {
225 if ((ne = zfs_alloc(hdl, sizeof (name_entry_t))) == NULL)
226 return (-1);
227
228 if ((ne->ne_name = zfs_strdup(hdl, path)) == NULL) {
229 free(ne);
230 return (-1);
231 }
232 ne->ne_guid = vdev_guid;
233 ne->ne_next = pl->names;
234 pl->names = ne;
235 return (0);
236 }
237
238 /*
239 * If we have a valid config but cannot read any of these fields, then
240 * it means we have a half-initialized label. In vdev_label_init()
241 * we write a label with txg == 0 so that we can identify the device
242 * in case the user refers to the same disk later on. If we fail to
243 * create the pool, we'll be left with a label in this state
244 * which should not be considered part of a valid pool.
245 */
246 if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
247 &pool_guid) != 0 ||
248 nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID,
249 &vdev_guid) != 0 ||
250 nvlist_lookup_uint64(config, ZPOOL_CONFIG_TOP_GUID,
251 &top_guid) != 0 ||
252 nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG,
253 &txg) != 0 || txg == 0) {
254 nvlist_free(config);
255 return (0);
256 }
257
258 /*
259 * First, see if we know about this pool. If not, then add it to the
260 * list of known pools.
261 */
262 for (pe = pl->pools; pe != NULL; pe = pe->pe_next) {
263 if (pe->pe_guid == pool_guid)
264 break;
265 }
266
267 if (pe == NULL) {
268 if ((pe = zfs_alloc(hdl, sizeof (pool_entry_t))) == NULL) {
269 nvlist_free(config);
270 return (-1);
271 }
272 pe->pe_guid = pool_guid;
273 pe->pe_next = pl->pools;
274 pl->pools = pe;
275 }
276
277 /*
278 * Second, see if we know about this toplevel vdev. Add it if its
279 * missing.
280 */
281 for (ve = pe->pe_vdevs; ve != NULL; ve = ve->ve_next) {
282 if (ve->ve_guid == top_guid)
283 break;
284 }
285
286 if (ve == NULL) {
287 if ((ve = zfs_alloc(hdl, sizeof (vdev_entry_t))) == NULL) {
288 nvlist_free(config);
289 return (-1);
290 }
291 ve->ve_guid = top_guid;
292 ve->ve_next = pe->pe_vdevs;
293 pe->pe_vdevs = ve;
294 }
295
296 /*
297 * Third, see if we have a config with a matching transaction group. If
298 * so, then we do nothing. Otherwise, add it to the list of known
299 * configs.
300 */
301 for (ce = ve->ve_configs; ce != NULL; ce = ce->ce_next) {
302 if (ce->ce_txg == txg)
303 break;
304 }
305
306 if (ce == NULL) {
307 if ((ce = zfs_alloc(hdl, sizeof (config_entry_t))) == NULL) {
308 nvlist_free(config);
309 return (-1);
310 }
311 ce->ce_txg = txg;
312 ce->ce_config = config;
313 ce->ce_next = ve->ve_configs;
314 ve->ve_configs = ce;
315 } else {
316 nvlist_free(config);
317 }
318
319 /*
320 * At this point we've successfully added our config to the list of
321 * known configs. The last thing to do is add the vdev guid -> path
322 * mappings so that we can fix up the configuration as necessary before
323 * doing the import.
324 */
325 if ((ne = zfs_alloc(hdl, sizeof (name_entry_t))) == NULL)
326 return (-1);
327
328 if ((ne->ne_name = zfs_strdup(hdl, path)) == NULL) {
329 free(ne);
330 return (-1);
331 }
332
333 ne->ne_guid = vdev_guid;
334 ne->ne_next = pl->names;
335 pl->names = ne;
336
337 return (0);
338}
339
340/*
341 * Returns true if the named pool matches the given GUID.
342 */
343static int
344pool_active(libzfs_handle_t *hdl, const char *name, uint64_t guid,
345 boolean_t *isactive)
346{
347 zpool_handle_t *zhp;
348 uint64_t theguid;
349
350 if (zpool_open_silent(hdl, name, &zhp) != 0)
351 return (-1);
352
353 if (zhp == NULL) {
354 *isactive = B_FALSE;
355 return (0);
356 }
357
358 verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_POOL_GUID,
359 &theguid) == 0);
360
361 zpool_close(zhp);
362
363 *isactive = (theguid == guid);
364 return (0);
365}
366
367static nvlist_t *
368refresh_config(libzfs_handle_t *hdl, nvlist_t *config)
369{
370 nvlist_t *nvl;
371 zfs_cmd_t zc = { 0 };
372 int err;
373
374 if (zcmd_write_conf_nvlist(hdl, &zc, config) != 0)
375 return (NULL);
376
377 if (zcmd_alloc_dst_nvlist(hdl, &zc,
378 zc.zc_nvlist_conf_size * 2) != 0) {
379 zcmd_free_nvlists(&zc);
380 return (NULL);
381 }
382
383 while ((err = ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_TRYIMPORT,
384 &zc)) != 0 && errno == ENOMEM) {
385 if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
386 zcmd_free_nvlists(&zc);
387 return (NULL);
388 }
389 }
390
391 if (err) {
392 zcmd_free_nvlists(&zc);
393 return (NULL);
394 }
395
396 if (zcmd_read_dst_nvlist(hdl, &zc, &nvl) != 0) {
397 zcmd_free_nvlists(&zc);
398 return (NULL);
399 }
400
401 zcmd_free_nvlists(&zc);
402 return (nvl);
403}
404
405/*
406 * Determine if the vdev id is a hole in the namespace.
407 */
408boolean_t
409vdev_is_hole(uint64_t *hole_array, uint_t holes, uint_t id)
410{
411 for (int c = 0; c < holes; c++) {
412
413 /* Top-level is a hole */
414 if (hole_array[c] == id)
415 return (B_TRUE);
416 }
417 return (B_FALSE);
418}
419
420/*
421 * Convert our list of pools into the definitive set of configurations. We
422 * start by picking the best config for each toplevel vdev. Once that's done,
423 * we assemble the toplevel vdevs into a full config for the pool. We make a
424 * pass to fix up any incorrect paths, and then add it to the main list to
425 * return to the user.
426 */
427static nvlist_t *
428get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok)
429{
430 pool_entry_t *pe;
431 vdev_entry_t *ve;
432 config_entry_t *ce;
433 nvlist_t *ret = NULL, *config = NULL, *tmp, *nvtop, *nvroot;
434 nvlist_t **spares, **l2cache;
435 uint_t i, nspares, nl2cache;
436 boolean_t config_seen;
437 uint64_t best_txg;
25 */
26
27/*
28 * Pool import support functions.
29 *
30 * To import a pool, we rely on reading the configuration information from the
31 * ZFS label of each device. If we successfully read the label, then we
32 * organize the configuration information in the following hierarchy:
33 *
34 * pool guid -> toplevel vdev guid -> label txg
35 *
36 * Duplicate entries matching this same tuple will be discarded. Once we have
37 * examined every device, we pick the best label txg config for each toplevel
38 * vdev. We then arrange these toplevel vdevs into a complete pool config, and
39 * update any paths that have changed. Finally, we attempt to import the pool
40 * using our derived config, and record the results.
41 */
42
43#include <ctype.h>
44#include <devid.h>
45#include <dirent.h>
46#include <errno.h>
47#include <libintl.h>
48#include <stddef.h>
49#include <stdlib.h>
50#include <string.h>
51#include <sys/stat.h>
52#include <unistd.h>
53#include <fcntl.h>
54#include <thread_pool.h>
55#include <libgeom.h>
56
57#include <sys/vdev_impl.h>
58
59#include "libzfs.h"
60#include "libzfs_impl.h"
61
62/*
63 * Intermediate structures used to gather configuration information.
64 */
65typedef struct config_entry {
66 uint64_t ce_txg;
67 nvlist_t *ce_config;
68 struct config_entry *ce_next;
69} config_entry_t;
70
71typedef struct vdev_entry {
72 uint64_t ve_guid;
73 config_entry_t *ve_configs;
74 struct vdev_entry *ve_next;
75} vdev_entry_t;
76
77typedef struct pool_entry {
78 uint64_t pe_guid;
79 vdev_entry_t *pe_vdevs;
80 struct pool_entry *pe_next;
81} pool_entry_t;
82
83typedef struct name_entry {
84 char *ne_name;
85 uint64_t ne_guid;
86 struct name_entry *ne_next;
87} name_entry_t;
88
89typedef struct pool_list {
90 pool_entry_t *pools;
91 name_entry_t *names;
92} pool_list_t;
93
94static char *
95get_devid(const char *path)
96{
97 int fd;
98 ddi_devid_t devid;
99 char *minor, *ret;
100
101 if ((fd = open(path, O_RDONLY)) < 0)
102 return (NULL);
103
104 minor = NULL;
105 ret = NULL;
106 if (devid_get(fd, &devid) == 0) {
107 if (devid_get_minor_name(fd, &minor) == 0)
108 ret = devid_str_encode(devid, minor);
109 if (minor != NULL)
110 devid_str_free(minor);
111 devid_free(devid);
112 }
113 (void) close(fd);
114
115 return (ret);
116}
117
118
119/*
120 * Go through and fix up any path and/or devid information for the given vdev
121 * configuration.
122 */
123static int
124fix_paths(nvlist_t *nv, name_entry_t *names)
125{
126 nvlist_t **child;
127 uint_t c, children;
128 uint64_t guid;
129 name_entry_t *ne, *best;
130 char *path, *devid;
131 int matched;
132
133 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
134 &child, &children) == 0) {
135 for (c = 0; c < children; c++)
136 if (fix_paths(child[c], names) != 0)
137 return (-1);
138 return (0);
139 }
140
141 /*
142 * This is a leaf (file or disk) vdev. In either case, go through
143 * the name list and see if we find a matching guid. If so, replace
144 * the path and see if we can calculate a new devid.
145 *
146 * There may be multiple names associated with a particular guid, in
147 * which case we have overlapping slices or multiple paths to the same
148 * disk. If this is the case, then we want to pick the path that is
149 * the most similar to the original, where "most similar" is the number
150 * of matching characters starting from the end of the path. This will
151 * preserve slice numbers even if the disks have been reorganized, and
152 * will also catch preferred disk names if multiple paths exist.
153 */
154 verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0);
155 if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0)
156 path = NULL;
157
158 matched = 0;
159 best = NULL;
160 for (ne = names; ne != NULL; ne = ne->ne_next) {
161 if (ne->ne_guid == guid) {
162 const char *src, *dst;
163 int count;
164
165 if (path == NULL) {
166 best = ne;
167 break;
168 }
169
170 src = ne->ne_name + strlen(ne->ne_name) - 1;
171 dst = path + strlen(path) - 1;
172 for (count = 0; src >= ne->ne_name && dst >= path;
173 src--, dst--, count++)
174 if (*src != *dst)
175 break;
176
177 /*
178 * At this point, 'count' is the number of characters
179 * matched from the end.
180 */
181 if (count > matched || best == NULL) {
182 best = ne;
183 matched = count;
184 }
185 }
186 }
187
188 if (best == NULL)
189 return (0);
190
191 if (nvlist_add_string(nv, ZPOOL_CONFIG_PATH, best->ne_name) != 0)
192 return (-1);
193
194 if ((devid = get_devid(best->ne_name)) == NULL) {
195 (void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID);
196 } else {
197 if (nvlist_add_string(nv, ZPOOL_CONFIG_DEVID, devid) != 0)
198 return (-1);
199 devid_str_free(devid);
200 }
201
202 return (0);
203}
204
205/*
206 * Add the given configuration to the list of known devices.
207 */
208static int
209add_config(libzfs_handle_t *hdl, pool_list_t *pl, const char *path,
210 nvlist_t *config)
211{
212 uint64_t pool_guid, vdev_guid, top_guid, txg, state;
213 pool_entry_t *pe;
214 vdev_entry_t *ve;
215 config_entry_t *ce;
216 name_entry_t *ne;
217
218 /*
219 * If this is a hot spare not currently in use or level 2 cache
220 * device, add it to the list of names to translate, but don't do
221 * anything else.
222 */
223 if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
224 &state) == 0 &&
225 (state == POOL_STATE_SPARE || state == POOL_STATE_L2CACHE) &&
226 nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid) == 0) {
227 if ((ne = zfs_alloc(hdl, sizeof (name_entry_t))) == NULL)
228 return (-1);
229
230 if ((ne->ne_name = zfs_strdup(hdl, path)) == NULL) {
231 free(ne);
232 return (-1);
233 }
234 ne->ne_guid = vdev_guid;
235 ne->ne_next = pl->names;
236 pl->names = ne;
237 return (0);
238 }
239
240 /*
241 * If we have a valid config but cannot read any of these fields, then
242 * it means we have a half-initialized label. In vdev_label_init()
243 * we write a label with txg == 0 so that we can identify the device
244 * in case the user refers to the same disk later on. If we fail to
245 * create the pool, we'll be left with a label in this state
246 * which should not be considered part of a valid pool.
247 */
248 if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
249 &pool_guid) != 0 ||
250 nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID,
251 &vdev_guid) != 0 ||
252 nvlist_lookup_uint64(config, ZPOOL_CONFIG_TOP_GUID,
253 &top_guid) != 0 ||
254 nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG,
255 &txg) != 0 || txg == 0) {
256 nvlist_free(config);
257 return (0);
258 }
259
260 /*
261 * First, see if we know about this pool. If not, then add it to the
262 * list of known pools.
263 */
264 for (pe = pl->pools; pe != NULL; pe = pe->pe_next) {
265 if (pe->pe_guid == pool_guid)
266 break;
267 }
268
269 if (pe == NULL) {
270 if ((pe = zfs_alloc(hdl, sizeof (pool_entry_t))) == NULL) {
271 nvlist_free(config);
272 return (-1);
273 }
274 pe->pe_guid = pool_guid;
275 pe->pe_next = pl->pools;
276 pl->pools = pe;
277 }
278
279 /*
280 * Second, see if we know about this toplevel vdev. Add it if its
281 * missing.
282 */
283 for (ve = pe->pe_vdevs; ve != NULL; ve = ve->ve_next) {
284 if (ve->ve_guid == top_guid)
285 break;
286 }
287
288 if (ve == NULL) {
289 if ((ve = zfs_alloc(hdl, sizeof (vdev_entry_t))) == NULL) {
290 nvlist_free(config);
291 return (-1);
292 }
293 ve->ve_guid = top_guid;
294 ve->ve_next = pe->pe_vdevs;
295 pe->pe_vdevs = ve;
296 }
297
298 /*
299 * Third, see if we have a config with a matching transaction group. If
300 * so, then we do nothing. Otherwise, add it to the list of known
301 * configs.
302 */
303 for (ce = ve->ve_configs; ce != NULL; ce = ce->ce_next) {
304 if (ce->ce_txg == txg)
305 break;
306 }
307
308 if (ce == NULL) {
309 if ((ce = zfs_alloc(hdl, sizeof (config_entry_t))) == NULL) {
310 nvlist_free(config);
311 return (-1);
312 }
313 ce->ce_txg = txg;
314 ce->ce_config = config;
315 ce->ce_next = ve->ve_configs;
316 ve->ve_configs = ce;
317 } else {
318 nvlist_free(config);
319 }
320
321 /*
322 * At this point we've successfully added our config to the list of
323 * known configs. The last thing to do is add the vdev guid -> path
324 * mappings so that we can fix up the configuration as necessary before
325 * doing the import.
326 */
327 if ((ne = zfs_alloc(hdl, sizeof (name_entry_t))) == NULL)
328 return (-1);
329
330 if ((ne->ne_name = zfs_strdup(hdl, path)) == NULL) {
331 free(ne);
332 return (-1);
333 }
334
335 ne->ne_guid = vdev_guid;
336 ne->ne_next = pl->names;
337 pl->names = ne;
338
339 return (0);
340}
341
342/*
343 * Returns true if the named pool matches the given GUID.
344 */
345static int
346pool_active(libzfs_handle_t *hdl, const char *name, uint64_t guid,
347 boolean_t *isactive)
348{
349 zpool_handle_t *zhp;
350 uint64_t theguid;
351
352 if (zpool_open_silent(hdl, name, &zhp) != 0)
353 return (-1);
354
355 if (zhp == NULL) {
356 *isactive = B_FALSE;
357 return (0);
358 }
359
360 verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_POOL_GUID,
361 &theguid) == 0);
362
363 zpool_close(zhp);
364
365 *isactive = (theguid == guid);
366 return (0);
367}
368
369static nvlist_t *
370refresh_config(libzfs_handle_t *hdl, nvlist_t *config)
371{
372 nvlist_t *nvl;
373 zfs_cmd_t zc = { 0 };
374 int err;
375
376 if (zcmd_write_conf_nvlist(hdl, &zc, config) != 0)
377 return (NULL);
378
379 if (zcmd_alloc_dst_nvlist(hdl, &zc,
380 zc.zc_nvlist_conf_size * 2) != 0) {
381 zcmd_free_nvlists(&zc);
382 return (NULL);
383 }
384
385 while ((err = ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_TRYIMPORT,
386 &zc)) != 0 && errno == ENOMEM) {
387 if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
388 zcmd_free_nvlists(&zc);
389 return (NULL);
390 }
391 }
392
393 if (err) {
394 zcmd_free_nvlists(&zc);
395 return (NULL);
396 }
397
398 if (zcmd_read_dst_nvlist(hdl, &zc, &nvl) != 0) {
399 zcmd_free_nvlists(&zc);
400 return (NULL);
401 }
402
403 zcmd_free_nvlists(&zc);
404 return (nvl);
405}
406
407/*
408 * Determine if the vdev id is a hole in the namespace.
409 */
410boolean_t
411vdev_is_hole(uint64_t *hole_array, uint_t holes, uint_t id)
412{
413 for (int c = 0; c < holes; c++) {
414
415 /* Top-level is a hole */
416 if (hole_array[c] == id)
417 return (B_TRUE);
418 }
419 return (B_FALSE);
420}
421
422/*
423 * Convert our list of pools into the definitive set of configurations. We
424 * start by picking the best config for each toplevel vdev. Once that's done,
425 * we assemble the toplevel vdevs into a full config for the pool. We make a
426 * pass to fix up any incorrect paths, and then add it to the main list to
427 * return to the user.
428 */
429static nvlist_t *
430get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok)
431{
432 pool_entry_t *pe;
433 vdev_entry_t *ve;
434 config_entry_t *ce;
435 nvlist_t *ret = NULL, *config = NULL, *tmp, *nvtop, *nvroot;
436 nvlist_t **spares, **l2cache;
437 uint_t i, nspares, nl2cache;
438 boolean_t config_seen;
439 uint64_t best_txg;
438 char *name, *hostname;
440 char *name, *hostname, *comment;
439 uint64_t version, guid;
440 uint_t children = 0;
441 nvlist_t **child = NULL;
442 uint_t holes;
443 uint64_t *hole_array, max_id;
444 uint_t c;
445 boolean_t isactive;
446 uint64_t hostid;
447 nvlist_t *nvl;
448 boolean_t found_one = B_FALSE;
449 boolean_t valid_top_config = B_FALSE;
450
451 if (nvlist_alloc(&ret, 0, 0) != 0)
452 goto nomem;
453
454 for (pe = pl->pools; pe != NULL; pe = pe->pe_next) {
455 uint64_t id, max_txg = 0;
456
457 if (nvlist_alloc(&config, NV_UNIQUE_NAME, 0) != 0)
458 goto nomem;
459 config_seen = B_FALSE;
460
461 /*
462 * Iterate over all toplevel vdevs. Grab the pool configuration
463 * from the first one we find, and then go through the rest and
464 * add them as necessary to the 'vdevs' member of the config.
465 */
466 for (ve = pe->pe_vdevs; ve != NULL; ve = ve->ve_next) {
467
468 /*
469 * Determine the best configuration for this vdev by
470 * selecting the config with the latest transaction
471 * group.
472 */
473 best_txg = 0;
474 for (ce = ve->ve_configs; ce != NULL;
475 ce = ce->ce_next) {
476
477 if (ce->ce_txg > best_txg) {
478 tmp = ce->ce_config;
479 best_txg = ce->ce_txg;
480 }
481 }
482
483 /*
484 * We rely on the fact that the max txg for the
485 * pool will contain the most up-to-date information
486 * about the valid top-levels in the vdev namespace.
487 */
488 if (best_txg > max_txg) {
489 (void) nvlist_remove(config,
490 ZPOOL_CONFIG_VDEV_CHILDREN,
491 DATA_TYPE_UINT64);
492 (void) nvlist_remove(config,
493 ZPOOL_CONFIG_HOLE_ARRAY,
494 DATA_TYPE_UINT64_ARRAY);
495
496 max_txg = best_txg;
497 hole_array = NULL;
498 holes = 0;
499 max_id = 0;
500 valid_top_config = B_FALSE;
501
502 if (nvlist_lookup_uint64(tmp,
503 ZPOOL_CONFIG_VDEV_CHILDREN, &max_id) == 0) {
504 verify(nvlist_add_uint64(config,
505 ZPOOL_CONFIG_VDEV_CHILDREN,
506 max_id) == 0);
507 valid_top_config = B_TRUE;
508 }
509
510 if (nvlist_lookup_uint64_array(tmp,
511 ZPOOL_CONFIG_HOLE_ARRAY, &hole_array,
512 &holes) == 0) {
513 verify(nvlist_add_uint64_array(config,
514 ZPOOL_CONFIG_HOLE_ARRAY,
515 hole_array, holes) == 0);
516 }
517 }
518
519 if (!config_seen) {
520 /*
521 * Copy the relevant pieces of data to the pool
522 * configuration:
523 *
524 * version
525 * pool guid
526 * name
441 uint64_t version, guid;
442 uint_t children = 0;
443 nvlist_t **child = NULL;
444 uint_t holes;
445 uint64_t *hole_array, max_id;
446 uint_t c;
447 boolean_t isactive;
448 uint64_t hostid;
449 nvlist_t *nvl;
450 boolean_t found_one = B_FALSE;
451 boolean_t valid_top_config = B_FALSE;
452
453 if (nvlist_alloc(&ret, 0, 0) != 0)
454 goto nomem;
455
456 for (pe = pl->pools; pe != NULL; pe = pe->pe_next) {
457 uint64_t id, max_txg = 0;
458
459 if (nvlist_alloc(&config, NV_UNIQUE_NAME, 0) != 0)
460 goto nomem;
461 config_seen = B_FALSE;
462
463 /*
464 * Iterate over all toplevel vdevs. Grab the pool configuration
465 * from the first one we find, and then go through the rest and
466 * add them as necessary to the 'vdevs' member of the config.
467 */
468 for (ve = pe->pe_vdevs; ve != NULL; ve = ve->ve_next) {
469
470 /*
471 * Determine the best configuration for this vdev by
472 * selecting the config with the latest transaction
473 * group.
474 */
475 best_txg = 0;
476 for (ce = ve->ve_configs; ce != NULL;
477 ce = ce->ce_next) {
478
479 if (ce->ce_txg > best_txg) {
480 tmp = ce->ce_config;
481 best_txg = ce->ce_txg;
482 }
483 }
484
485 /*
486 * We rely on the fact that the max txg for the
487 * pool will contain the most up-to-date information
488 * about the valid top-levels in the vdev namespace.
489 */
490 if (best_txg > max_txg) {
491 (void) nvlist_remove(config,
492 ZPOOL_CONFIG_VDEV_CHILDREN,
493 DATA_TYPE_UINT64);
494 (void) nvlist_remove(config,
495 ZPOOL_CONFIG_HOLE_ARRAY,
496 DATA_TYPE_UINT64_ARRAY);
497
498 max_txg = best_txg;
499 hole_array = NULL;
500 holes = 0;
501 max_id = 0;
502 valid_top_config = B_FALSE;
503
504 if (nvlist_lookup_uint64(tmp,
505 ZPOOL_CONFIG_VDEV_CHILDREN, &max_id) == 0) {
506 verify(nvlist_add_uint64(config,
507 ZPOOL_CONFIG_VDEV_CHILDREN,
508 max_id) == 0);
509 valid_top_config = B_TRUE;
510 }
511
512 if (nvlist_lookup_uint64_array(tmp,
513 ZPOOL_CONFIG_HOLE_ARRAY, &hole_array,
514 &holes) == 0) {
515 verify(nvlist_add_uint64_array(config,
516 ZPOOL_CONFIG_HOLE_ARRAY,
517 hole_array, holes) == 0);
518 }
519 }
520
521 if (!config_seen) {
522 /*
523 * Copy the relevant pieces of data to the pool
524 * configuration:
525 *
526 * version
527 * pool guid
528 * name
529 * comment (if available)
527 * pool state
528 * hostid (if available)
529 * hostname (if available)
530 */
531 uint64_t state;
532
533 verify(nvlist_lookup_uint64(tmp,
534 ZPOOL_CONFIG_VERSION, &version) == 0);
535 if (nvlist_add_uint64(config,
536 ZPOOL_CONFIG_VERSION, version) != 0)
537 goto nomem;
538 verify(nvlist_lookup_uint64(tmp,
539 ZPOOL_CONFIG_POOL_GUID, &guid) == 0);
540 if (nvlist_add_uint64(config,
541 ZPOOL_CONFIG_POOL_GUID, guid) != 0)
542 goto nomem;
543 verify(nvlist_lookup_string(tmp,
544 ZPOOL_CONFIG_POOL_NAME, &name) == 0);
545 if (nvlist_add_string(config,
546 ZPOOL_CONFIG_POOL_NAME, name) != 0)
547 goto nomem;
530 * pool state
531 * hostid (if available)
532 * hostname (if available)
533 */
534 uint64_t state;
535
536 verify(nvlist_lookup_uint64(tmp,
537 ZPOOL_CONFIG_VERSION, &version) == 0);
538 if (nvlist_add_uint64(config,
539 ZPOOL_CONFIG_VERSION, version) != 0)
540 goto nomem;
541 verify(nvlist_lookup_uint64(tmp,
542 ZPOOL_CONFIG_POOL_GUID, &guid) == 0);
543 if (nvlist_add_uint64(config,
544 ZPOOL_CONFIG_POOL_GUID, guid) != 0)
545 goto nomem;
546 verify(nvlist_lookup_string(tmp,
547 ZPOOL_CONFIG_POOL_NAME, &name) == 0);
548 if (nvlist_add_string(config,
549 ZPOOL_CONFIG_POOL_NAME, name) != 0)
550 goto nomem;
551
552 /*
553 * COMMENT is optional, don't bail if it's not
554 * there, instead, set it to NULL.
555 */
556 if (nvlist_lookup_string(tmp,
557 ZPOOL_CONFIG_COMMENT, &comment) != 0)
558 comment = NULL;
559 else if (nvlist_add_string(config,
560 ZPOOL_CONFIG_COMMENT, comment) != 0)
561 goto nomem;
562
548 verify(nvlist_lookup_uint64(tmp,
549 ZPOOL_CONFIG_POOL_STATE, &state) == 0);
550 if (nvlist_add_uint64(config,
551 ZPOOL_CONFIG_POOL_STATE, state) != 0)
552 goto nomem;
563 verify(nvlist_lookup_uint64(tmp,
564 ZPOOL_CONFIG_POOL_STATE, &state) == 0);
565 if (nvlist_add_uint64(config,
566 ZPOOL_CONFIG_POOL_STATE, state) != 0)
567 goto nomem;
568
553 hostid = 0;
554 if (nvlist_lookup_uint64(tmp,
555 ZPOOL_CONFIG_HOSTID, &hostid) == 0) {
556 if (nvlist_add_uint64(config,
557 ZPOOL_CONFIG_HOSTID, hostid) != 0)
558 goto nomem;
559 verify(nvlist_lookup_string(tmp,
560 ZPOOL_CONFIG_HOSTNAME,
561 &hostname) == 0);
562 if (nvlist_add_string(config,
563 ZPOOL_CONFIG_HOSTNAME,
564 hostname) != 0)
565 goto nomem;
566 }
567
568 config_seen = B_TRUE;
569 }
570
571 /*
572 * Add this top-level vdev to the child array.
573 */
574 verify(nvlist_lookup_nvlist(tmp,
575 ZPOOL_CONFIG_VDEV_TREE, &nvtop) == 0);
576 verify(nvlist_lookup_uint64(nvtop, ZPOOL_CONFIG_ID,
577 &id) == 0);
578
579 if (id >= children) {
580 nvlist_t **newchild;
581
582 newchild = zfs_alloc(hdl, (id + 1) *
583 sizeof (nvlist_t *));
584 if (newchild == NULL)
585 goto nomem;
586
587 for (c = 0; c < children; c++)
588 newchild[c] = child[c];
589
590 free(child);
591 child = newchild;
592 children = id + 1;
593 }
594 if (nvlist_dup(nvtop, &child[id], 0) != 0)
595 goto nomem;
596
597 }
598
599 /*
600 * If we have information about all the top-levels then
601 * clean up the nvlist which we've constructed. This
602 * means removing any extraneous devices that are
603 * beyond the valid range or adding devices to the end
604 * of our array which appear to be missing.
605 */
606 if (valid_top_config) {
607 if (max_id < children) {
608 for (c = max_id; c < children; c++)
609 nvlist_free(child[c]);
610 children = max_id;
611 } else if (max_id > children) {
612 nvlist_t **newchild;
613
614 newchild = zfs_alloc(hdl, (max_id) *
615 sizeof (nvlist_t *));
616 if (newchild == NULL)
617 goto nomem;
618
619 for (c = 0; c < children; c++)
620 newchild[c] = child[c];
621
622 free(child);
623 child = newchild;
624 children = max_id;
625 }
626 }
627
628 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
629 &guid) == 0);
630
631 /*
632 * The vdev namespace may contain holes as a result of
633 * device removal. We must add them back into the vdev
634 * tree before we process any missing devices.
635 */
636 if (holes > 0) {
637 ASSERT(valid_top_config);
638
639 for (c = 0; c < children; c++) {
640 nvlist_t *holey;
641
642 if (child[c] != NULL ||
643 !vdev_is_hole(hole_array, holes, c))
644 continue;
645
646 if (nvlist_alloc(&holey, NV_UNIQUE_NAME,
647 0) != 0)
648 goto nomem;
649
650 /*
651 * Holes in the namespace are treated as
652 * "hole" top-level vdevs and have a
653 * special flag set on them.
654 */
655 if (nvlist_add_string(holey,
656 ZPOOL_CONFIG_TYPE,
657 VDEV_TYPE_HOLE) != 0 ||
658 nvlist_add_uint64(holey,
659 ZPOOL_CONFIG_ID, c) != 0 ||
660 nvlist_add_uint64(holey,
661 ZPOOL_CONFIG_GUID, 0ULL) != 0)
662 goto nomem;
663 child[c] = holey;
664 }
665 }
666
667 /*
668 * Look for any missing top-level vdevs. If this is the case,
669 * create a faked up 'missing' vdev as a placeholder. We cannot
670 * simply compress the child array, because the kernel performs
671 * certain checks to make sure the vdev IDs match their location
672 * in the configuration.
673 */
674 for (c = 0; c < children; c++) {
675 if (child[c] == NULL) {
676 nvlist_t *missing;
677 if (nvlist_alloc(&missing, NV_UNIQUE_NAME,
678 0) != 0)
679 goto nomem;
680 if (nvlist_add_string(missing,
681 ZPOOL_CONFIG_TYPE,
682 VDEV_TYPE_MISSING) != 0 ||
683 nvlist_add_uint64(missing,
684 ZPOOL_CONFIG_ID, c) != 0 ||
685 nvlist_add_uint64(missing,
686 ZPOOL_CONFIG_GUID, 0ULL) != 0) {
687 nvlist_free(missing);
688 goto nomem;
689 }
690 child[c] = missing;
691 }
692 }
693
694 /*
695 * Put all of this pool's top-level vdevs into a root vdev.
696 */
697 if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0)
698 goto nomem;
699 if (nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE,
700 VDEV_TYPE_ROOT) != 0 ||
701 nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) != 0 ||
702 nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, guid) != 0 ||
703 nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
704 child, children) != 0) {
705 nvlist_free(nvroot);
706 goto nomem;
707 }
708
709 for (c = 0; c < children; c++)
710 nvlist_free(child[c]);
711 free(child);
712 children = 0;
713 child = NULL;
714
715 /*
716 * Go through and fix up any paths and/or devids based on our
717 * known list of vdev GUID -> path mappings.
718 */
719 if (fix_paths(nvroot, pl->names) != 0) {
720 nvlist_free(nvroot);
721 goto nomem;
722 }
723
724 /*
725 * Add the root vdev to this pool's configuration.
726 */
727 if (nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
728 nvroot) != 0) {
729 nvlist_free(nvroot);
730 goto nomem;
731 }
732 nvlist_free(nvroot);
733
734 /*
735 * zdb uses this path to report on active pools that were
736 * imported or created using -R.
737 */
738 if (active_ok)
739 goto add_pool;
740
741 /*
742 * Determine if this pool is currently active, in which case we
743 * can't actually import it.
744 */
745 verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
746 &name) == 0);
747 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
748 &guid) == 0);
749
750 if (pool_active(hdl, name, guid, &isactive) != 0)
751 goto error;
752
753 if (isactive) {
754 nvlist_free(config);
755 config = NULL;
756 continue;
757 }
758
759 if ((nvl = refresh_config(hdl, config)) == NULL) {
760 nvlist_free(config);
761 config = NULL;
762 continue;
763 }
764
765 nvlist_free(config);
766 config = nvl;
767
768 /*
769 * Go through and update the paths for spares, now that we have
770 * them.
771 */
772 verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
773 &nvroot) == 0);
774 if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
775 &spares, &nspares) == 0) {
776 for (i = 0; i < nspares; i++) {
777 if (fix_paths(spares[i], pl->names) != 0)
778 goto nomem;
779 }
780 }
781
782 /*
783 * Update the paths for l2cache devices.
784 */
785 if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
786 &l2cache, &nl2cache) == 0) {
787 for (i = 0; i < nl2cache; i++) {
788 if (fix_paths(l2cache[i], pl->names) != 0)
789 goto nomem;
790 }
791 }
792
793 /*
794 * Restore the original information read from the actual label.
795 */
796 (void) nvlist_remove(config, ZPOOL_CONFIG_HOSTID,
797 DATA_TYPE_UINT64);
798 (void) nvlist_remove(config, ZPOOL_CONFIG_HOSTNAME,
799 DATA_TYPE_STRING);
800 if (hostid != 0) {
801 verify(nvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID,
802 hostid) == 0);
803 verify(nvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME,
804 hostname) == 0);
805 }
806
807add_pool:
808 /*
809 * Add this pool to the list of configs.
810 */
811 verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
812 &name) == 0);
813 if (nvlist_add_nvlist(ret, name, config) != 0)
814 goto nomem;
815
816 found_one = B_TRUE;
817 nvlist_free(config);
818 config = NULL;
819 }
820
821 if (!found_one) {
822 nvlist_free(ret);
823 ret = NULL;
824 }
825
826 return (ret);
827
828nomem:
829 (void) no_memory(hdl);
830error:
831 nvlist_free(config);
832 nvlist_free(ret);
833 for (c = 0; c < children; c++)
834 nvlist_free(child[c]);
835 free(child);
836
837 return (NULL);
838}
839
840/*
841 * Return the offset of the given label.
842 */
843static uint64_t
844label_offset(uint64_t size, int l)
845{
846 ASSERT(P2PHASE_TYPED(size, sizeof (vdev_label_t), uint64_t) == 0);
847 return (l * sizeof (vdev_label_t) + (l < VDEV_LABELS / 2 ?
848 0 : size - VDEV_LABELS * sizeof (vdev_label_t)));
849}
850
851/*
852 * Given a file descriptor, read the label information and return an nvlist
853 * describing the configuration, if there is one.
854 */
855int
856zpool_read_label(int fd, nvlist_t **config)
857{
858 struct stat64 statbuf;
859 int l;
860 vdev_label_t *label;
861 uint64_t state, txg, size;
862
863 *config = NULL;
864
865 if (fstat64(fd, &statbuf) == -1)
866 return (0);
867 size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t);
868
869 if ((label = malloc(sizeof (vdev_label_t))) == NULL)
870 return (-1);
871
872 for (l = 0; l < VDEV_LABELS; l++) {
873 if (pread64(fd, label, sizeof (vdev_label_t),
874 label_offset(size, l)) != sizeof (vdev_label_t))
875 continue;
876
877 if (nvlist_unpack(label->vl_vdev_phys.vp_nvlist,
878 sizeof (label->vl_vdev_phys.vp_nvlist), config, 0) != 0)
879 continue;
880
881 if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE,
882 &state) != 0 || state > POOL_STATE_L2CACHE) {
883 nvlist_free(*config);
884 continue;
885 }
886
887 if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE &&
888 (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG,
889 &txg) != 0 || txg == 0)) {
890 nvlist_free(*config);
891 continue;
892 }
893
894 free(label);
895 return (0);
896 }
897
898 free(label);
899 *config = NULL;
900 return (0);
901}
902
903typedef struct rdsk_node {
904 char *rn_name;
905 int rn_dfd;
906 libzfs_handle_t *rn_hdl;
907 nvlist_t *rn_config;
908 avl_tree_t *rn_avl;
909 avl_node_t rn_node;
910 boolean_t rn_nozpool;
911} rdsk_node_t;
912
913static int
914slice_cache_compare(const void *arg1, const void *arg2)
915{
916 const char *nm1 = ((rdsk_node_t *)arg1)->rn_name;
917 const char *nm2 = ((rdsk_node_t *)arg2)->rn_name;
918 char *nm1slice, *nm2slice;
919 int rv;
920
921 /*
922 * slices zero and two are the most likely to provide results,
923 * so put those first
924 */
925 nm1slice = strstr(nm1, "s0");
926 nm2slice = strstr(nm2, "s0");
927 if (nm1slice && !nm2slice) {
928 return (-1);
929 }
930 if (!nm1slice && nm2slice) {
931 return (1);
932 }
933 nm1slice = strstr(nm1, "s2");
934 nm2slice = strstr(nm2, "s2");
935 if (nm1slice && !nm2slice) {
936 return (-1);
937 }
938 if (!nm1slice && nm2slice) {
939 return (1);
940 }
941
942 rv = strcmp(nm1, nm2);
943 if (rv == 0)
944 return (0);
945 return (rv > 0 ? 1 : -1);
946}
947
948#ifdef sun
949static void
950check_one_slice(avl_tree_t *r, char *diskname, uint_t partno,
951 diskaddr_t size, uint_t blksz)
952{
953 rdsk_node_t tmpnode;
954 rdsk_node_t *node;
955 char sname[MAXNAMELEN];
956
957 tmpnode.rn_name = &sname[0];
958 (void) snprintf(tmpnode.rn_name, MAXNAMELEN, "%s%u",
959 diskname, partno);
960 /*
961 * protect against division by zero for disk labels that
962 * contain a bogus sector size
963 */
964 if (blksz == 0)
965 blksz = DEV_BSIZE;
966 /* too small to contain a zpool? */
967 if ((size < (SPA_MINDEVSIZE / blksz)) &&
968 (node = avl_find(r, &tmpnode, NULL)))
969 node->rn_nozpool = B_TRUE;
970}
971#endif /* sun */
972
973static void
974nozpool_all_slices(avl_tree_t *r, const char *sname)
975{
976#ifdef sun
977 char diskname[MAXNAMELEN];
978 char *ptr;
979 int i;
980
981 (void) strncpy(diskname, sname, MAXNAMELEN);
982 if (((ptr = strrchr(diskname, 's')) == NULL) &&
983 ((ptr = strrchr(diskname, 'p')) == NULL))
984 return;
985 ptr[0] = 's';
986 ptr[1] = '\0';
987 for (i = 0; i < NDKMAP; i++)
988 check_one_slice(r, diskname, i, 0, 1);
989 ptr[0] = 'p';
990 for (i = 0; i <= FD_NUMPART; i++)
991 check_one_slice(r, diskname, i, 0, 1);
992#endif /* sun */
993}
994
995static void
996check_slices(avl_tree_t *r, int fd, const char *sname)
997{
998#ifdef sun
999 struct extvtoc vtoc;
1000 struct dk_gpt *gpt;
1001 char diskname[MAXNAMELEN];
1002 char *ptr;
1003 int i;
1004
1005 (void) strncpy(diskname, sname, MAXNAMELEN);
1006 if ((ptr = strrchr(diskname, 's')) == NULL || !isdigit(ptr[1]))
1007 return;
1008 ptr[1] = '\0';
1009
1010 if (read_extvtoc(fd, &vtoc) >= 0) {
1011 for (i = 0; i < NDKMAP; i++)
1012 check_one_slice(r, diskname, i,
1013 vtoc.v_part[i].p_size, vtoc.v_sectorsz);
1014 } else if (efi_alloc_and_read(fd, &gpt) >= 0) {
1015 /*
1016 * on x86 we'll still have leftover links that point
1017 * to slices s[9-15], so use NDKMAP instead
1018 */
1019 for (i = 0; i < NDKMAP; i++)
1020 check_one_slice(r, diskname, i,
1021 gpt->efi_parts[i].p_size, gpt->efi_lbasize);
1022 /* nodes p[1-4] are never used with EFI labels */
1023 ptr[0] = 'p';
1024 for (i = 1; i <= FD_NUMPART; i++)
1025 check_one_slice(r, diskname, i, 0, 1);
1026 efi_free(gpt);
1027 }
1028#endif /* sun */
1029}
1030
1031static void
1032zpool_open_func(void *arg)
1033{
1034 rdsk_node_t *rn = arg;
1035 struct stat64 statbuf;
1036 nvlist_t *config;
1037 int fd;
1038
1039 if (rn->rn_nozpool)
1040 return;
1041 if ((fd = openat64(rn->rn_dfd, rn->rn_name, O_RDONLY)) < 0) {
1042 /* symlink to a device that's no longer there */
1043 if (errno == ENOENT)
1044 nozpool_all_slices(rn->rn_avl, rn->rn_name);
1045 return;
1046 }
1047 /*
1048 * Ignore failed stats. We only want regular
1049 * files, character devs and block devs.
1050 */
1051 if (fstat64(fd, &statbuf) != 0 ||
1052 (!S_ISREG(statbuf.st_mode) &&
1053 !S_ISCHR(statbuf.st_mode) &&
1054 !S_ISBLK(statbuf.st_mode))) {
1055 (void) close(fd);
1056 return;
1057 }
1058 /* this file is too small to hold a zpool */
1059 if (S_ISREG(statbuf.st_mode) &&
1060 statbuf.st_size < SPA_MINDEVSIZE) {
1061 (void) close(fd);
1062 return;
1063 } else if (!S_ISREG(statbuf.st_mode)) {
1064 /*
1065 * Try to read the disk label first so we don't have to
1066 * open a bunch of minor nodes that can't have a zpool.
1067 */
1068 check_slices(rn->rn_avl, fd, rn->rn_name);
1069 }
1070
1071 if ((zpool_read_label(fd, &config)) != 0) {
1072 (void) close(fd);
1073 (void) no_memory(rn->rn_hdl);
1074 return;
1075 }
1076 (void) close(fd);
1077
1078
1079 rn->rn_config = config;
1080 if (config != NULL) {
1081 assert(rn->rn_nozpool == B_FALSE);
1082 }
1083}
1084
1085/*
1086 * Given a file descriptor, clear (zero) the label information. This function
1087 * is used in the appliance stack as part of the ZFS sysevent module and
1088 * to implement the "zpool labelclear" command.
1089 */
1090int
1091zpool_clear_label(int fd)
1092{
1093 struct stat64 statbuf;
1094 int l;
1095 vdev_label_t *label;
1096 uint64_t size;
1097
1098 if (fstat64(fd, &statbuf) == -1)
1099 return (0);
1100 size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t);
1101
1102 if ((label = calloc(sizeof (vdev_label_t), 1)) == NULL)
1103 return (-1);
1104
1105 for (l = 0; l < VDEV_LABELS; l++) {
1106 if (pwrite64(fd, label, sizeof (vdev_label_t),
1107 label_offset(size, l)) != sizeof (vdev_label_t))
1108 return (-1);
1109 }
1110
1111 free(label);
1112 return (0);
1113}
1114
1115/*
1116 * Given a list of directories to search, find all pools stored on disk. This
1117 * includes partial pools which are not available to import. If no args are
1118 * given (argc is 0), then the default directory (/dev/dsk) is searched.
1119 * poolname or guid (but not both) are provided by the caller when trying
1120 * to import a specific pool.
1121 */
1122static nvlist_t *
1123zpool_find_import_impl(libzfs_handle_t *hdl, importargs_t *iarg)
1124{
1125 int i, dirs = iarg->paths;
1126 DIR *dirp = NULL;
1127 struct dirent64 *dp;
1128 char path[MAXPATHLEN];
1129 char *end, **dir = iarg->path;
1130 size_t pathleft;
1131 nvlist_t *ret = NULL;
1132 static char *default_dir = "/dev/dsk";
1133 pool_list_t pools = { 0 };
1134 pool_entry_t *pe, *penext;
1135 vdev_entry_t *ve, *venext;
1136 config_entry_t *ce, *cenext;
1137 name_entry_t *ne, *nenext;
1138 avl_tree_t slice_cache;
1139 rdsk_node_t *slice;
1140 void *cookie;
1141
1142 if (dirs == 0) {
1143 dirs = 1;
1144 dir = &default_dir;
1145 }
1146
1147 /*
1148 * Go through and read the label configuration information from every
1149 * possible device, organizing the information according to pool GUID
1150 * and toplevel GUID.
1151 */
1152 for (i = 0; i < dirs; i++) {
1153 tpool_t *t;
1154 char *rdsk;
1155 int dfd;
1156
1157 /* use realpath to normalize the path */
1158 if (realpath(dir[i], path) == 0) {
1159 (void) zfs_error_fmt(hdl, EZFS_BADPATH,
1160 dgettext(TEXT_DOMAIN, "cannot open '%s'"), dir[i]);
1161 goto error;
1162 }
1163 end = &path[strlen(path)];
1164 *end++ = '/';
1165 *end = 0;
1166 pathleft = &path[sizeof (path)] - end;
1167
1168 /*
1169 * Using raw devices instead of block devices when we're
1170 * reading the labels skips a bunch of slow operations during
1171 * close(2) processing, so we replace /dev/dsk with /dev/rdsk.
1172 */
1173 if (strcmp(path, "/dev/dsk/") == 0)
1174 rdsk = "/dev/";
1175 else
1176 rdsk = path;
1177
1178 if ((dfd = open64(rdsk, O_RDONLY)) < 0 ||
1179 (dirp = fdopendir(dfd)) == NULL) {
1180 zfs_error_aux(hdl, strerror(errno));
1181 (void) zfs_error_fmt(hdl, EZFS_BADPATH,
1182 dgettext(TEXT_DOMAIN, "cannot open '%s'"),
1183 rdsk);
1184 goto error;
1185 }
1186
1187 avl_create(&slice_cache, slice_cache_compare,
1188 sizeof (rdsk_node_t), offsetof(rdsk_node_t, rn_node));
1189
1190 if (strcmp(rdsk, "/dev/") == 0) {
1191 struct gmesh mesh;
1192 struct gclass *mp;
1193 struct ggeom *gp;
1194 struct gprovider *pp;
1195
1196 errno = geom_gettree(&mesh);
1197 if (errno != 0) {
1198 zfs_error_aux(hdl, strerror(errno));
1199 (void) zfs_error_fmt(hdl, EZFS_BADPATH,
1200 dgettext(TEXT_DOMAIN, "cannot get GEOM tree"));
1201 goto error;
1202 }
1203
1204 LIST_FOREACH(mp, &mesh.lg_class, lg_class) {
1205 LIST_FOREACH(gp, &mp->lg_geom, lg_geom) {
1206 LIST_FOREACH(pp, &gp->lg_provider, lg_provider) {
1207 slice = zfs_alloc(hdl, sizeof (rdsk_node_t));
1208 slice->rn_name = zfs_strdup(hdl, pp->lg_name);
1209 slice->rn_avl = &slice_cache;
1210 slice->rn_dfd = dfd;
1211 slice->rn_hdl = hdl;
1212 slice->rn_nozpool = B_FALSE;
1213 avl_add(&slice_cache, slice);
1214 }
1215 }
1216 }
1217
1218 geom_deletetree(&mesh);
1219 goto skipdir;
1220 }
1221
1222 /*
1223 * This is not MT-safe, but we have no MT consumers of libzfs
1224 */
1225 while ((dp = readdir64(dirp)) != NULL) {
1226 const char *name = dp->d_name;
1227 if (name[0] == '.' &&
1228 (name[1] == 0 || (name[1] == '.' && name[2] == 0)))
1229 continue;
1230
1231 slice = zfs_alloc(hdl, sizeof (rdsk_node_t));
1232 slice->rn_name = zfs_strdup(hdl, name);
1233 slice->rn_avl = &slice_cache;
1234 slice->rn_dfd = dfd;
1235 slice->rn_hdl = hdl;
1236 slice->rn_nozpool = B_FALSE;
1237 avl_add(&slice_cache, slice);
1238 }
1239skipdir:
1240 /*
1241 * create a thread pool to do all of this in parallel;
1242 * rn_nozpool is not protected, so this is racy in that
1243 * multiple tasks could decide that the same slice can
1244 * not hold a zpool, which is benign. Also choose
1245 * double the number of processors; we hold a lot of
1246 * locks in the kernel, so going beyond this doesn't
1247 * buy us much.
1248 */
1249 t = tpool_create(1, 2 * sysconf(_SC_NPROCESSORS_ONLN),
1250 0, NULL);
1251 for (slice = avl_first(&slice_cache); slice;
1252 (slice = avl_walk(&slice_cache, slice,
1253 AVL_AFTER)))
1254 (void) tpool_dispatch(t, zpool_open_func, slice);
1255 tpool_wait(t);
1256 tpool_destroy(t);
1257
1258 cookie = NULL;
1259 while ((slice = avl_destroy_nodes(&slice_cache,
1260 &cookie)) != NULL) {
1261 if (slice->rn_config != NULL) {
1262 nvlist_t *config = slice->rn_config;
1263 boolean_t matched = B_TRUE;
1264
1265 if (iarg->poolname != NULL) {
1266 char *pname;
1267
1268 matched = nvlist_lookup_string(config,
1269 ZPOOL_CONFIG_POOL_NAME,
1270 &pname) == 0 &&
1271 strcmp(iarg->poolname, pname) == 0;
1272 } else if (iarg->guid != 0) {
1273 uint64_t this_guid;
1274
1275 matched = nvlist_lookup_uint64(config,
1276 ZPOOL_CONFIG_POOL_GUID,
1277 &this_guid) == 0 &&
1278 iarg->guid == this_guid;
1279 }
1280 if (!matched) {
1281 nvlist_free(config);
1282 config = NULL;
1283 continue;
1284 }
1285 /* use the non-raw path for the config */
1286 (void) strlcpy(end, slice->rn_name, pathleft);
1287 if (add_config(hdl, &pools, path, config) != 0)
1288 goto error;
1289 }
1290 free(slice->rn_name);
1291 free(slice);
1292 }
1293 avl_destroy(&slice_cache);
1294
1295 (void) closedir(dirp);
1296 dirp = NULL;
1297 }
1298
1299 ret = get_configs(hdl, &pools, iarg->can_be_active);
1300
1301error:
1302 for (pe = pools.pools; pe != NULL; pe = penext) {
1303 penext = pe->pe_next;
1304 for (ve = pe->pe_vdevs; ve != NULL; ve = venext) {
1305 venext = ve->ve_next;
1306 for (ce = ve->ve_configs; ce != NULL; ce = cenext) {
1307 cenext = ce->ce_next;
1308 if (ce->ce_config)
1309 nvlist_free(ce->ce_config);
1310 free(ce);
1311 }
1312 free(ve);
1313 }
1314 free(pe);
1315 }
1316
1317 for (ne = pools.names; ne != NULL; ne = nenext) {
1318 nenext = ne->ne_next;
1319 if (ne->ne_name)
1320 free(ne->ne_name);
1321 free(ne);
1322 }
1323
1324 if (dirp)
1325 (void) closedir(dirp);
1326
1327 return (ret);
1328}
1329
1330nvlist_t *
1331zpool_find_import(libzfs_handle_t *hdl, int argc, char **argv)
1332{
1333 importargs_t iarg = { 0 };
1334
1335 iarg.paths = argc;
1336 iarg.path = argv;
1337
1338 return (zpool_find_import_impl(hdl, &iarg));
1339}
1340
1341/*
1342 * Given a cache file, return the contents as a list of importable pools.
1343 * poolname or guid (but not both) are provided by the caller when trying
1344 * to import a specific pool.
1345 */
1346nvlist_t *
1347zpool_find_import_cached(libzfs_handle_t *hdl, const char *cachefile,
1348 char *poolname, uint64_t guid)
1349{
1350 char *buf;
1351 int fd;
1352 struct stat64 statbuf;
1353 nvlist_t *raw, *src, *dst;
1354 nvlist_t *pools;
1355 nvpair_t *elem;
1356 char *name;
1357 uint64_t this_guid;
1358 boolean_t active;
1359
1360 verify(poolname == NULL || guid == 0);
1361
1362 if ((fd = open(cachefile, O_RDONLY)) < 0) {
1363 zfs_error_aux(hdl, "%s", strerror(errno));
1364 (void) zfs_error(hdl, EZFS_BADCACHE,
1365 dgettext(TEXT_DOMAIN, "failed to open cache file"));
1366 return (NULL);
1367 }
1368
1369 if (fstat64(fd, &statbuf) != 0) {
1370 zfs_error_aux(hdl, "%s", strerror(errno));
1371 (void) close(fd);
1372 (void) zfs_error(hdl, EZFS_BADCACHE,
1373 dgettext(TEXT_DOMAIN, "failed to get size of cache file"));
1374 return (NULL);
1375 }
1376
1377 if ((buf = zfs_alloc(hdl, statbuf.st_size)) == NULL) {
1378 (void) close(fd);
1379 return (NULL);
1380 }
1381
1382 if (read(fd, buf, statbuf.st_size) != statbuf.st_size) {
1383 (void) close(fd);
1384 free(buf);
1385 (void) zfs_error(hdl, EZFS_BADCACHE,
1386 dgettext(TEXT_DOMAIN,
1387 "failed to read cache file contents"));
1388 return (NULL);
1389 }
1390
1391 (void) close(fd);
1392
1393 if (nvlist_unpack(buf, statbuf.st_size, &raw, 0) != 0) {
1394 free(buf);
1395 (void) zfs_error(hdl, EZFS_BADCACHE,
1396 dgettext(TEXT_DOMAIN,
1397 "invalid or corrupt cache file contents"));
1398 return (NULL);
1399 }
1400
1401 free(buf);
1402
1403 /*
1404 * Go through and get the current state of the pools and refresh their
1405 * state.
1406 */
1407 if (nvlist_alloc(&pools, 0, 0) != 0) {
1408 (void) no_memory(hdl);
1409 nvlist_free(raw);
1410 return (NULL);
1411 }
1412
1413 elem = NULL;
1414 while ((elem = nvlist_next_nvpair(raw, elem)) != NULL) {
1415 verify(nvpair_value_nvlist(elem, &src) == 0);
1416
1417 verify(nvlist_lookup_string(src, ZPOOL_CONFIG_POOL_NAME,
1418 &name) == 0);
1419 if (poolname != NULL && strcmp(poolname, name) != 0)
1420 continue;
1421
1422 verify(nvlist_lookup_uint64(src, ZPOOL_CONFIG_POOL_GUID,
1423 &this_guid) == 0);
1424 if (guid != 0) {
1425 verify(nvlist_lookup_uint64(src, ZPOOL_CONFIG_POOL_GUID,
1426 &this_guid) == 0);
1427 if (guid != this_guid)
1428 continue;
1429 }
1430
1431 if (pool_active(hdl, name, this_guid, &active) != 0) {
1432 nvlist_free(raw);
1433 nvlist_free(pools);
1434 return (NULL);
1435 }
1436
1437 if (active)
1438 continue;
1439
1440 if ((dst = refresh_config(hdl, src)) == NULL) {
1441 nvlist_free(raw);
1442 nvlist_free(pools);
1443 return (NULL);
1444 }
1445
1446 if (nvlist_add_nvlist(pools, nvpair_name(elem), dst) != 0) {
1447 (void) no_memory(hdl);
1448 nvlist_free(dst);
1449 nvlist_free(raw);
1450 nvlist_free(pools);
1451 return (NULL);
1452 }
1453 nvlist_free(dst);
1454 }
1455
1456 nvlist_free(raw);
1457 return (pools);
1458}
1459
1460static int
1461name_or_guid_exists(zpool_handle_t *zhp, void *data)
1462{
1463 importargs_t *import = data;
1464 int found = 0;
1465
1466 if (import->poolname != NULL) {
1467 char *pool_name;
1468
1469 verify(nvlist_lookup_string(zhp->zpool_config,
1470 ZPOOL_CONFIG_POOL_NAME, &pool_name) == 0);
1471 if (strcmp(pool_name, import->poolname) == 0)
1472 found = 1;
1473 } else {
1474 uint64_t pool_guid;
1475
1476 verify(nvlist_lookup_uint64(zhp->zpool_config,
1477 ZPOOL_CONFIG_POOL_GUID, &pool_guid) == 0);
1478 if (pool_guid == import->guid)
1479 found = 1;
1480 }
1481
1482 zpool_close(zhp);
1483 return (found);
1484}
1485
1486nvlist_t *
1487zpool_search_import(libzfs_handle_t *hdl, importargs_t *import)
1488{
1489 verify(import->poolname == NULL || import->guid == 0);
1490
1491 if (import->unique)
1492 import->exists = zpool_iter(hdl, name_or_guid_exists, import);
1493
1494 if (import->cachefile != NULL)
1495 return (zpool_find_import_cached(hdl, import->cachefile,
1496 import->poolname, import->guid));
1497
1498 return (zpool_find_import_impl(hdl, import));
1499}
1500
1501boolean_t
1502find_guid(nvlist_t *nv, uint64_t guid)
1503{
1504 uint64_t tmp;
1505 nvlist_t **child;
1506 uint_t c, children;
1507
1508 verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &tmp) == 0);
1509 if (tmp == guid)
1510 return (B_TRUE);
1511
1512 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
1513 &child, &children) == 0) {
1514 for (c = 0; c < children; c++)
1515 if (find_guid(child[c], guid))
1516 return (B_TRUE);
1517 }
1518
1519 return (B_FALSE);
1520}
1521
1522typedef struct aux_cbdata {
1523 const char *cb_type;
1524 uint64_t cb_guid;
1525 zpool_handle_t *cb_zhp;
1526} aux_cbdata_t;
1527
1528static int
1529find_aux(zpool_handle_t *zhp, void *data)
1530{
1531 aux_cbdata_t *cbp = data;
1532 nvlist_t **list;
1533 uint_t i, count;
1534 uint64_t guid;
1535 nvlist_t *nvroot;
1536
1537 verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
1538 &nvroot) == 0);
1539
1540 if (nvlist_lookup_nvlist_array(nvroot, cbp->cb_type,
1541 &list, &count) == 0) {
1542 for (i = 0; i < count; i++) {
1543 verify(nvlist_lookup_uint64(list[i],
1544 ZPOOL_CONFIG_GUID, &guid) == 0);
1545 if (guid == cbp->cb_guid) {
1546 cbp->cb_zhp = zhp;
1547 return (1);
1548 }
1549 }
1550 }
1551
1552 zpool_close(zhp);
1553 return (0);
1554}
1555
1556/*
1557 * Determines if the pool is in use. If so, it returns true and the state of
1558 * the pool as well as the name of the pool. Both strings are allocated and
1559 * must be freed by the caller.
1560 */
1561int
1562zpool_in_use(libzfs_handle_t *hdl, int fd, pool_state_t *state, char **namestr,
1563 boolean_t *inuse)
1564{
1565 nvlist_t *config;
1566 char *name;
1567 boolean_t ret;
1568 uint64_t guid, vdev_guid;
1569 zpool_handle_t *zhp;
1570 nvlist_t *pool_config;
1571 uint64_t stateval, isspare;
1572 aux_cbdata_t cb = { 0 };
1573 boolean_t isactive;
1574
1575 *inuse = B_FALSE;
1576
1577 if (zpool_read_label(fd, &config) != 0) {
1578 (void) no_memory(hdl);
1579 return (-1);
1580 }
1581
1582 if (config == NULL)
1583 return (0);
1584
1585 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
1586 &stateval) == 0);
1587 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID,
1588 &vdev_guid) == 0);
1589
1590 if (stateval != POOL_STATE_SPARE && stateval != POOL_STATE_L2CACHE) {
1591 verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
1592 &name) == 0);
1593 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
1594 &guid) == 0);
1595 }
1596
1597 switch (stateval) {
1598 case POOL_STATE_EXPORTED:
1599 /*
1600 * A pool with an exported state may in fact be imported
1601 * read-only, so check the in-core state to see if it's
1602 * active and imported read-only. If it is, set
1603 * its state to active.
1604 */
1605 if (pool_active(hdl, name, guid, &isactive) == 0 && isactive &&
1606 (zhp = zpool_open_canfail(hdl, name)) != NULL &&
1607 zpool_get_prop_int(zhp, ZPOOL_PROP_READONLY, NULL))
1608 stateval = POOL_STATE_ACTIVE;
1609
1610 ret = B_TRUE;
1611 break;
1612
1613 case POOL_STATE_ACTIVE:
1614 /*
1615 * For an active pool, we have to determine if it's really part
1616 * of a currently active pool (in which case the pool will exist
1617 * and the guid will be the same), or whether it's part of an
1618 * active pool that was disconnected without being explicitly
1619 * exported.
1620 */
1621 if (pool_active(hdl, name, guid, &isactive) != 0) {
1622 nvlist_free(config);
1623 return (-1);
1624 }
1625
1626 if (isactive) {
1627 /*
1628 * Because the device may have been removed while
1629 * offlined, we only report it as active if the vdev is
1630 * still present in the config. Otherwise, pretend like
1631 * it's not in use.
1632 */
1633 if ((zhp = zpool_open_canfail(hdl, name)) != NULL &&
1634 (pool_config = zpool_get_config(zhp, NULL))
1635 != NULL) {
1636 nvlist_t *nvroot;
1637
1638 verify(nvlist_lookup_nvlist(pool_config,
1639 ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
1640 ret = find_guid(nvroot, vdev_guid);
1641 } else {
1642 ret = B_FALSE;
1643 }
1644
1645 /*
1646 * If this is an active spare within another pool, we
1647 * treat it like an unused hot spare. This allows the
1648 * user to create a pool with a hot spare that currently
1649 * in use within another pool. Since we return B_TRUE,
1650 * libdiskmgt will continue to prevent generic consumers
1651 * from using the device.
1652 */
1653 if (ret && nvlist_lookup_uint64(config,
1654 ZPOOL_CONFIG_IS_SPARE, &isspare) == 0 && isspare)
1655 stateval = POOL_STATE_SPARE;
1656
1657 if (zhp != NULL)
1658 zpool_close(zhp);
1659 } else {
1660 stateval = POOL_STATE_POTENTIALLY_ACTIVE;
1661 ret = B_TRUE;
1662 }
1663 break;
1664
1665 case POOL_STATE_SPARE:
1666 /*
1667 * For a hot spare, it can be either definitively in use, or
1668 * potentially active. To determine if it's in use, we iterate
1669 * over all pools in the system and search for one with a spare
1670 * with a matching guid.
1671 *
1672 * Due to the shared nature of spares, we don't actually report
1673 * the potentially active case as in use. This means the user
1674 * can freely create pools on the hot spares of exported pools,
1675 * but to do otherwise makes the resulting code complicated, and
1676 * we end up having to deal with this case anyway.
1677 */
1678 cb.cb_zhp = NULL;
1679 cb.cb_guid = vdev_guid;
1680 cb.cb_type = ZPOOL_CONFIG_SPARES;
1681 if (zpool_iter(hdl, find_aux, &cb) == 1) {
1682 name = (char *)zpool_get_name(cb.cb_zhp);
1683 ret = TRUE;
1684 } else {
1685 ret = FALSE;
1686 }
1687 break;
1688
1689 case POOL_STATE_L2CACHE:
1690
1691 /*
1692 * Check if any pool is currently using this l2cache device.
1693 */
1694 cb.cb_zhp = NULL;
1695 cb.cb_guid = vdev_guid;
1696 cb.cb_type = ZPOOL_CONFIG_L2CACHE;
1697 if (zpool_iter(hdl, find_aux, &cb) == 1) {
1698 name = (char *)zpool_get_name(cb.cb_zhp);
1699 ret = TRUE;
1700 } else {
1701 ret = FALSE;
1702 }
1703 break;
1704
1705 default:
1706 ret = B_FALSE;
1707 }
1708
1709
1710 if (ret) {
1711 if ((*namestr = zfs_strdup(hdl, name)) == NULL) {
1712 if (cb.cb_zhp)
1713 zpool_close(cb.cb_zhp);
1714 nvlist_free(config);
1715 return (-1);
1716 }
1717 *state = (pool_state_t)stateval;
1718 }
1719
1720 if (cb.cb_zhp)
1721 zpool_close(cb.cb_zhp);
1722
1723 nvlist_free(config);
1724 *inuse = ret;
1725 return (0);
1726}
569 hostid = 0;
570 if (nvlist_lookup_uint64(tmp,
571 ZPOOL_CONFIG_HOSTID, &hostid) == 0) {
572 if (nvlist_add_uint64(config,
573 ZPOOL_CONFIG_HOSTID, hostid) != 0)
574 goto nomem;
575 verify(nvlist_lookup_string(tmp,
576 ZPOOL_CONFIG_HOSTNAME,
577 &hostname) == 0);
578 if (nvlist_add_string(config,
579 ZPOOL_CONFIG_HOSTNAME,
580 hostname) != 0)
581 goto nomem;
582 }
583
584 config_seen = B_TRUE;
585 }
586
587 /*
588 * Add this top-level vdev to the child array.
589 */
590 verify(nvlist_lookup_nvlist(tmp,
591 ZPOOL_CONFIG_VDEV_TREE, &nvtop) == 0);
592 verify(nvlist_lookup_uint64(nvtop, ZPOOL_CONFIG_ID,
593 &id) == 0);
594
595 if (id >= children) {
596 nvlist_t **newchild;
597
598 newchild = zfs_alloc(hdl, (id + 1) *
599 sizeof (nvlist_t *));
600 if (newchild == NULL)
601 goto nomem;
602
603 for (c = 0; c < children; c++)
604 newchild[c] = child[c];
605
606 free(child);
607 child = newchild;
608 children = id + 1;
609 }
610 if (nvlist_dup(nvtop, &child[id], 0) != 0)
611 goto nomem;
612
613 }
614
615 /*
616 * If we have information about all the top-levels then
617 * clean up the nvlist which we've constructed. This
618 * means removing any extraneous devices that are
619 * beyond the valid range or adding devices to the end
620 * of our array which appear to be missing.
621 */
622 if (valid_top_config) {
623 if (max_id < children) {
624 for (c = max_id; c < children; c++)
625 nvlist_free(child[c]);
626 children = max_id;
627 } else if (max_id > children) {
628 nvlist_t **newchild;
629
630 newchild = zfs_alloc(hdl, (max_id) *
631 sizeof (nvlist_t *));
632 if (newchild == NULL)
633 goto nomem;
634
635 for (c = 0; c < children; c++)
636 newchild[c] = child[c];
637
638 free(child);
639 child = newchild;
640 children = max_id;
641 }
642 }
643
644 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
645 &guid) == 0);
646
647 /*
648 * The vdev namespace may contain holes as a result of
649 * device removal. We must add them back into the vdev
650 * tree before we process any missing devices.
651 */
652 if (holes > 0) {
653 ASSERT(valid_top_config);
654
655 for (c = 0; c < children; c++) {
656 nvlist_t *holey;
657
658 if (child[c] != NULL ||
659 !vdev_is_hole(hole_array, holes, c))
660 continue;
661
662 if (nvlist_alloc(&holey, NV_UNIQUE_NAME,
663 0) != 0)
664 goto nomem;
665
666 /*
667 * Holes in the namespace are treated as
668 * "hole" top-level vdevs and have a
669 * special flag set on them.
670 */
671 if (nvlist_add_string(holey,
672 ZPOOL_CONFIG_TYPE,
673 VDEV_TYPE_HOLE) != 0 ||
674 nvlist_add_uint64(holey,
675 ZPOOL_CONFIG_ID, c) != 0 ||
676 nvlist_add_uint64(holey,
677 ZPOOL_CONFIG_GUID, 0ULL) != 0)
678 goto nomem;
679 child[c] = holey;
680 }
681 }
682
683 /*
684 * Look for any missing top-level vdevs. If this is the case,
685 * create a faked up 'missing' vdev as a placeholder. We cannot
686 * simply compress the child array, because the kernel performs
687 * certain checks to make sure the vdev IDs match their location
688 * in the configuration.
689 */
690 for (c = 0; c < children; c++) {
691 if (child[c] == NULL) {
692 nvlist_t *missing;
693 if (nvlist_alloc(&missing, NV_UNIQUE_NAME,
694 0) != 0)
695 goto nomem;
696 if (nvlist_add_string(missing,
697 ZPOOL_CONFIG_TYPE,
698 VDEV_TYPE_MISSING) != 0 ||
699 nvlist_add_uint64(missing,
700 ZPOOL_CONFIG_ID, c) != 0 ||
701 nvlist_add_uint64(missing,
702 ZPOOL_CONFIG_GUID, 0ULL) != 0) {
703 nvlist_free(missing);
704 goto nomem;
705 }
706 child[c] = missing;
707 }
708 }
709
710 /*
711 * Put all of this pool's top-level vdevs into a root vdev.
712 */
713 if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0)
714 goto nomem;
715 if (nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE,
716 VDEV_TYPE_ROOT) != 0 ||
717 nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) != 0 ||
718 nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, guid) != 0 ||
719 nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
720 child, children) != 0) {
721 nvlist_free(nvroot);
722 goto nomem;
723 }
724
725 for (c = 0; c < children; c++)
726 nvlist_free(child[c]);
727 free(child);
728 children = 0;
729 child = NULL;
730
731 /*
732 * Go through and fix up any paths and/or devids based on our
733 * known list of vdev GUID -> path mappings.
734 */
735 if (fix_paths(nvroot, pl->names) != 0) {
736 nvlist_free(nvroot);
737 goto nomem;
738 }
739
740 /*
741 * Add the root vdev to this pool's configuration.
742 */
743 if (nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
744 nvroot) != 0) {
745 nvlist_free(nvroot);
746 goto nomem;
747 }
748 nvlist_free(nvroot);
749
750 /*
751 * zdb uses this path to report on active pools that were
752 * imported or created using -R.
753 */
754 if (active_ok)
755 goto add_pool;
756
757 /*
758 * Determine if this pool is currently active, in which case we
759 * can't actually import it.
760 */
761 verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
762 &name) == 0);
763 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
764 &guid) == 0);
765
766 if (pool_active(hdl, name, guid, &isactive) != 0)
767 goto error;
768
769 if (isactive) {
770 nvlist_free(config);
771 config = NULL;
772 continue;
773 }
774
775 if ((nvl = refresh_config(hdl, config)) == NULL) {
776 nvlist_free(config);
777 config = NULL;
778 continue;
779 }
780
781 nvlist_free(config);
782 config = nvl;
783
784 /*
785 * Go through and update the paths for spares, now that we have
786 * them.
787 */
788 verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
789 &nvroot) == 0);
790 if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
791 &spares, &nspares) == 0) {
792 for (i = 0; i < nspares; i++) {
793 if (fix_paths(spares[i], pl->names) != 0)
794 goto nomem;
795 }
796 }
797
798 /*
799 * Update the paths for l2cache devices.
800 */
801 if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
802 &l2cache, &nl2cache) == 0) {
803 for (i = 0; i < nl2cache; i++) {
804 if (fix_paths(l2cache[i], pl->names) != 0)
805 goto nomem;
806 }
807 }
808
809 /*
810 * Restore the original information read from the actual label.
811 */
812 (void) nvlist_remove(config, ZPOOL_CONFIG_HOSTID,
813 DATA_TYPE_UINT64);
814 (void) nvlist_remove(config, ZPOOL_CONFIG_HOSTNAME,
815 DATA_TYPE_STRING);
816 if (hostid != 0) {
817 verify(nvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID,
818 hostid) == 0);
819 verify(nvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME,
820 hostname) == 0);
821 }
822
823add_pool:
824 /*
825 * Add this pool to the list of configs.
826 */
827 verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
828 &name) == 0);
829 if (nvlist_add_nvlist(ret, name, config) != 0)
830 goto nomem;
831
832 found_one = B_TRUE;
833 nvlist_free(config);
834 config = NULL;
835 }
836
837 if (!found_one) {
838 nvlist_free(ret);
839 ret = NULL;
840 }
841
842 return (ret);
843
844nomem:
845 (void) no_memory(hdl);
846error:
847 nvlist_free(config);
848 nvlist_free(ret);
849 for (c = 0; c < children; c++)
850 nvlist_free(child[c]);
851 free(child);
852
853 return (NULL);
854}
855
856/*
857 * Return the offset of the given label.
858 */
859static uint64_t
860label_offset(uint64_t size, int l)
861{
862 ASSERT(P2PHASE_TYPED(size, sizeof (vdev_label_t), uint64_t) == 0);
863 return (l * sizeof (vdev_label_t) + (l < VDEV_LABELS / 2 ?
864 0 : size - VDEV_LABELS * sizeof (vdev_label_t)));
865}
866
867/*
868 * Given a file descriptor, read the label information and return an nvlist
869 * describing the configuration, if there is one.
870 */
871int
872zpool_read_label(int fd, nvlist_t **config)
873{
874 struct stat64 statbuf;
875 int l;
876 vdev_label_t *label;
877 uint64_t state, txg, size;
878
879 *config = NULL;
880
881 if (fstat64(fd, &statbuf) == -1)
882 return (0);
883 size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t);
884
885 if ((label = malloc(sizeof (vdev_label_t))) == NULL)
886 return (-1);
887
888 for (l = 0; l < VDEV_LABELS; l++) {
889 if (pread64(fd, label, sizeof (vdev_label_t),
890 label_offset(size, l)) != sizeof (vdev_label_t))
891 continue;
892
893 if (nvlist_unpack(label->vl_vdev_phys.vp_nvlist,
894 sizeof (label->vl_vdev_phys.vp_nvlist), config, 0) != 0)
895 continue;
896
897 if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE,
898 &state) != 0 || state > POOL_STATE_L2CACHE) {
899 nvlist_free(*config);
900 continue;
901 }
902
903 if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE &&
904 (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG,
905 &txg) != 0 || txg == 0)) {
906 nvlist_free(*config);
907 continue;
908 }
909
910 free(label);
911 return (0);
912 }
913
914 free(label);
915 *config = NULL;
916 return (0);
917}
918
919typedef struct rdsk_node {
920 char *rn_name;
921 int rn_dfd;
922 libzfs_handle_t *rn_hdl;
923 nvlist_t *rn_config;
924 avl_tree_t *rn_avl;
925 avl_node_t rn_node;
926 boolean_t rn_nozpool;
927} rdsk_node_t;
928
929static int
930slice_cache_compare(const void *arg1, const void *arg2)
931{
932 const char *nm1 = ((rdsk_node_t *)arg1)->rn_name;
933 const char *nm2 = ((rdsk_node_t *)arg2)->rn_name;
934 char *nm1slice, *nm2slice;
935 int rv;
936
937 /*
938 * slices zero and two are the most likely to provide results,
939 * so put those first
940 */
941 nm1slice = strstr(nm1, "s0");
942 nm2slice = strstr(nm2, "s0");
943 if (nm1slice && !nm2slice) {
944 return (-1);
945 }
946 if (!nm1slice && nm2slice) {
947 return (1);
948 }
949 nm1slice = strstr(nm1, "s2");
950 nm2slice = strstr(nm2, "s2");
951 if (nm1slice && !nm2slice) {
952 return (-1);
953 }
954 if (!nm1slice && nm2slice) {
955 return (1);
956 }
957
958 rv = strcmp(nm1, nm2);
959 if (rv == 0)
960 return (0);
961 return (rv > 0 ? 1 : -1);
962}
963
964#ifdef sun
965static void
966check_one_slice(avl_tree_t *r, char *diskname, uint_t partno,
967 diskaddr_t size, uint_t blksz)
968{
969 rdsk_node_t tmpnode;
970 rdsk_node_t *node;
971 char sname[MAXNAMELEN];
972
973 tmpnode.rn_name = &sname[0];
974 (void) snprintf(tmpnode.rn_name, MAXNAMELEN, "%s%u",
975 diskname, partno);
976 /*
977 * protect against division by zero for disk labels that
978 * contain a bogus sector size
979 */
980 if (blksz == 0)
981 blksz = DEV_BSIZE;
982 /* too small to contain a zpool? */
983 if ((size < (SPA_MINDEVSIZE / blksz)) &&
984 (node = avl_find(r, &tmpnode, NULL)))
985 node->rn_nozpool = B_TRUE;
986}
987#endif /* sun */
988
989static void
990nozpool_all_slices(avl_tree_t *r, const char *sname)
991{
992#ifdef sun
993 char diskname[MAXNAMELEN];
994 char *ptr;
995 int i;
996
997 (void) strncpy(diskname, sname, MAXNAMELEN);
998 if (((ptr = strrchr(diskname, 's')) == NULL) &&
999 ((ptr = strrchr(diskname, 'p')) == NULL))
1000 return;
1001 ptr[0] = 's';
1002 ptr[1] = '\0';
1003 for (i = 0; i < NDKMAP; i++)
1004 check_one_slice(r, diskname, i, 0, 1);
1005 ptr[0] = 'p';
1006 for (i = 0; i <= FD_NUMPART; i++)
1007 check_one_slice(r, diskname, i, 0, 1);
1008#endif /* sun */
1009}
1010
1011static void
1012check_slices(avl_tree_t *r, int fd, const char *sname)
1013{
1014#ifdef sun
1015 struct extvtoc vtoc;
1016 struct dk_gpt *gpt;
1017 char diskname[MAXNAMELEN];
1018 char *ptr;
1019 int i;
1020
1021 (void) strncpy(diskname, sname, MAXNAMELEN);
1022 if ((ptr = strrchr(diskname, 's')) == NULL || !isdigit(ptr[1]))
1023 return;
1024 ptr[1] = '\0';
1025
1026 if (read_extvtoc(fd, &vtoc) >= 0) {
1027 for (i = 0; i < NDKMAP; i++)
1028 check_one_slice(r, diskname, i,
1029 vtoc.v_part[i].p_size, vtoc.v_sectorsz);
1030 } else if (efi_alloc_and_read(fd, &gpt) >= 0) {
1031 /*
1032 * on x86 we'll still have leftover links that point
1033 * to slices s[9-15], so use NDKMAP instead
1034 */
1035 for (i = 0; i < NDKMAP; i++)
1036 check_one_slice(r, diskname, i,
1037 gpt->efi_parts[i].p_size, gpt->efi_lbasize);
1038 /* nodes p[1-4] are never used with EFI labels */
1039 ptr[0] = 'p';
1040 for (i = 1; i <= FD_NUMPART; i++)
1041 check_one_slice(r, diskname, i, 0, 1);
1042 efi_free(gpt);
1043 }
1044#endif /* sun */
1045}
1046
1047static void
1048zpool_open_func(void *arg)
1049{
1050 rdsk_node_t *rn = arg;
1051 struct stat64 statbuf;
1052 nvlist_t *config;
1053 int fd;
1054
1055 if (rn->rn_nozpool)
1056 return;
1057 if ((fd = openat64(rn->rn_dfd, rn->rn_name, O_RDONLY)) < 0) {
1058 /* symlink to a device that's no longer there */
1059 if (errno == ENOENT)
1060 nozpool_all_slices(rn->rn_avl, rn->rn_name);
1061 return;
1062 }
1063 /*
1064 * Ignore failed stats. We only want regular
1065 * files, character devs and block devs.
1066 */
1067 if (fstat64(fd, &statbuf) != 0 ||
1068 (!S_ISREG(statbuf.st_mode) &&
1069 !S_ISCHR(statbuf.st_mode) &&
1070 !S_ISBLK(statbuf.st_mode))) {
1071 (void) close(fd);
1072 return;
1073 }
1074 /* this file is too small to hold a zpool */
1075 if (S_ISREG(statbuf.st_mode) &&
1076 statbuf.st_size < SPA_MINDEVSIZE) {
1077 (void) close(fd);
1078 return;
1079 } else if (!S_ISREG(statbuf.st_mode)) {
1080 /*
1081 * Try to read the disk label first so we don't have to
1082 * open a bunch of minor nodes that can't have a zpool.
1083 */
1084 check_slices(rn->rn_avl, fd, rn->rn_name);
1085 }
1086
1087 if ((zpool_read_label(fd, &config)) != 0) {
1088 (void) close(fd);
1089 (void) no_memory(rn->rn_hdl);
1090 return;
1091 }
1092 (void) close(fd);
1093
1094
1095 rn->rn_config = config;
1096 if (config != NULL) {
1097 assert(rn->rn_nozpool == B_FALSE);
1098 }
1099}
1100
1101/*
1102 * Given a file descriptor, clear (zero) the label information. This function
1103 * is used in the appliance stack as part of the ZFS sysevent module and
1104 * to implement the "zpool labelclear" command.
1105 */
1106int
1107zpool_clear_label(int fd)
1108{
1109 struct stat64 statbuf;
1110 int l;
1111 vdev_label_t *label;
1112 uint64_t size;
1113
1114 if (fstat64(fd, &statbuf) == -1)
1115 return (0);
1116 size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t);
1117
1118 if ((label = calloc(sizeof (vdev_label_t), 1)) == NULL)
1119 return (-1);
1120
1121 for (l = 0; l < VDEV_LABELS; l++) {
1122 if (pwrite64(fd, label, sizeof (vdev_label_t),
1123 label_offset(size, l)) != sizeof (vdev_label_t))
1124 return (-1);
1125 }
1126
1127 free(label);
1128 return (0);
1129}
1130
1131/*
1132 * Given a list of directories to search, find all pools stored on disk. This
1133 * includes partial pools which are not available to import. If no args are
1134 * given (argc is 0), then the default directory (/dev/dsk) is searched.
1135 * poolname or guid (but not both) are provided by the caller when trying
1136 * to import a specific pool.
1137 */
1138static nvlist_t *
1139zpool_find_import_impl(libzfs_handle_t *hdl, importargs_t *iarg)
1140{
1141 int i, dirs = iarg->paths;
1142 DIR *dirp = NULL;
1143 struct dirent64 *dp;
1144 char path[MAXPATHLEN];
1145 char *end, **dir = iarg->path;
1146 size_t pathleft;
1147 nvlist_t *ret = NULL;
1148 static char *default_dir = "/dev/dsk";
1149 pool_list_t pools = { 0 };
1150 pool_entry_t *pe, *penext;
1151 vdev_entry_t *ve, *venext;
1152 config_entry_t *ce, *cenext;
1153 name_entry_t *ne, *nenext;
1154 avl_tree_t slice_cache;
1155 rdsk_node_t *slice;
1156 void *cookie;
1157
1158 if (dirs == 0) {
1159 dirs = 1;
1160 dir = &default_dir;
1161 }
1162
1163 /*
1164 * Go through and read the label configuration information from every
1165 * possible device, organizing the information according to pool GUID
1166 * and toplevel GUID.
1167 */
1168 for (i = 0; i < dirs; i++) {
1169 tpool_t *t;
1170 char *rdsk;
1171 int dfd;
1172
1173 /* use realpath to normalize the path */
1174 if (realpath(dir[i], path) == 0) {
1175 (void) zfs_error_fmt(hdl, EZFS_BADPATH,
1176 dgettext(TEXT_DOMAIN, "cannot open '%s'"), dir[i]);
1177 goto error;
1178 }
1179 end = &path[strlen(path)];
1180 *end++ = '/';
1181 *end = 0;
1182 pathleft = &path[sizeof (path)] - end;
1183
1184 /*
1185 * Using raw devices instead of block devices when we're
1186 * reading the labels skips a bunch of slow operations during
1187 * close(2) processing, so we replace /dev/dsk with /dev/rdsk.
1188 */
1189 if (strcmp(path, "/dev/dsk/") == 0)
1190 rdsk = "/dev/";
1191 else
1192 rdsk = path;
1193
1194 if ((dfd = open64(rdsk, O_RDONLY)) < 0 ||
1195 (dirp = fdopendir(dfd)) == NULL) {
1196 zfs_error_aux(hdl, strerror(errno));
1197 (void) zfs_error_fmt(hdl, EZFS_BADPATH,
1198 dgettext(TEXT_DOMAIN, "cannot open '%s'"),
1199 rdsk);
1200 goto error;
1201 }
1202
1203 avl_create(&slice_cache, slice_cache_compare,
1204 sizeof (rdsk_node_t), offsetof(rdsk_node_t, rn_node));
1205
1206 if (strcmp(rdsk, "/dev/") == 0) {
1207 struct gmesh mesh;
1208 struct gclass *mp;
1209 struct ggeom *gp;
1210 struct gprovider *pp;
1211
1212 errno = geom_gettree(&mesh);
1213 if (errno != 0) {
1214 zfs_error_aux(hdl, strerror(errno));
1215 (void) zfs_error_fmt(hdl, EZFS_BADPATH,
1216 dgettext(TEXT_DOMAIN, "cannot get GEOM tree"));
1217 goto error;
1218 }
1219
1220 LIST_FOREACH(mp, &mesh.lg_class, lg_class) {
1221 LIST_FOREACH(gp, &mp->lg_geom, lg_geom) {
1222 LIST_FOREACH(pp, &gp->lg_provider, lg_provider) {
1223 slice = zfs_alloc(hdl, sizeof (rdsk_node_t));
1224 slice->rn_name = zfs_strdup(hdl, pp->lg_name);
1225 slice->rn_avl = &slice_cache;
1226 slice->rn_dfd = dfd;
1227 slice->rn_hdl = hdl;
1228 slice->rn_nozpool = B_FALSE;
1229 avl_add(&slice_cache, slice);
1230 }
1231 }
1232 }
1233
1234 geom_deletetree(&mesh);
1235 goto skipdir;
1236 }
1237
1238 /*
1239 * This is not MT-safe, but we have no MT consumers of libzfs
1240 */
1241 while ((dp = readdir64(dirp)) != NULL) {
1242 const char *name = dp->d_name;
1243 if (name[0] == '.' &&
1244 (name[1] == 0 || (name[1] == '.' && name[2] == 0)))
1245 continue;
1246
1247 slice = zfs_alloc(hdl, sizeof (rdsk_node_t));
1248 slice->rn_name = zfs_strdup(hdl, name);
1249 slice->rn_avl = &slice_cache;
1250 slice->rn_dfd = dfd;
1251 slice->rn_hdl = hdl;
1252 slice->rn_nozpool = B_FALSE;
1253 avl_add(&slice_cache, slice);
1254 }
1255skipdir:
1256 /*
1257 * create a thread pool to do all of this in parallel;
1258 * rn_nozpool is not protected, so this is racy in that
1259 * multiple tasks could decide that the same slice can
1260 * not hold a zpool, which is benign. Also choose
1261 * double the number of processors; we hold a lot of
1262 * locks in the kernel, so going beyond this doesn't
1263 * buy us much.
1264 */
1265 t = tpool_create(1, 2 * sysconf(_SC_NPROCESSORS_ONLN),
1266 0, NULL);
1267 for (slice = avl_first(&slice_cache); slice;
1268 (slice = avl_walk(&slice_cache, slice,
1269 AVL_AFTER)))
1270 (void) tpool_dispatch(t, zpool_open_func, slice);
1271 tpool_wait(t);
1272 tpool_destroy(t);
1273
1274 cookie = NULL;
1275 while ((slice = avl_destroy_nodes(&slice_cache,
1276 &cookie)) != NULL) {
1277 if (slice->rn_config != NULL) {
1278 nvlist_t *config = slice->rn_config;
1279 boolean_t matched = B_TRUE;
1280
1281 if (iarg->poolname != NULL) {
1282 char *pname;
1283
1284 matched = nvlist_lookup_string(config,
1285 ZPOOL_CONFIG_POOL_NAME,
1286 &pname) == 0 &&
1287 strcmp(iarg->poolname, pname) == 0;
1288 } else if (iarg->guid != 0) {
1289 uint64_t this_guid;
1290
1291 matched = nvlist_lookup_uint64(config,
1292 ZPOOL_CONFIG_POOL_GUID,
1293 &this_guid) == 0 &&
1294 iarg->guid == this_guid;
1295 }
1296 if (!matched) {
1297 nvlist_free(config);
1298 config = NULL;
1299 continue;
1300 }
1301 /* use the non-raw path for the config */
1302 (void) strlcpy(end, slice->rn_name, pathleft);
1303 if (add_config(hdl, &pools, path, config) != 0)
1304 goto error;
1305 }
1306 free(slice->rn_name);
1307 free(slice);
1308 }
1309 avl_destroy(&slice_cache);
1310
1311 (void) closedir(dirp);
1312 dirp = NULL;
1313 }
1314
1315 ret = get_configs(hdl, &pools, iarg->can_be_active);
1316
1317error:
1318 for (pe = pools.pools; pe != NULL; pe = penext) {
1319 penext = pe->pe_next;
1320 for (ve = pe->pe_vdevs; ve != NULL; ve = venext) {
1321 venext = ve->ve_next;
1322 for (ce = ve->ve_configs; ce != NULL; ce = cenext) {
1323 cenext = ce->ce_next;
1324 if (ce->ce_config)
1325 nvlist_free(ce->ce_config);
1326 free(ce);
1327 }
1328 free(ve);
1329 }
1330 free(pe);
1331 }
1332
1333 for (ne = pools.names; ne != NULL; ne = nenext) {
1334 nenext = ne->ne_next;
1335 if (ne->ne_name)
1336 free(ne->ne_name);
1337 free(ne);
1338 }
1339
1340 if (dirp)
1341 (void) closedir(dirp);
1342
1343 return (ret);
1344}
1345
1346nvlist_t *
1347zpool_find_import(libzfs_handle_t *hdl, int argc, char **argv)
1348{
1349 importargs_t iarg = { 0 };
1350
1351 iarg.paths = argc;
1352 iarg.path = argv;
1353
1354 return (zpool_find_import_impl(hdl, &iarg));
1355}
1356
1357/*
1358 * Given a cache file, return the contents as a list of importable pools.
1359 * poolname or guid (but not both) are provided by the caller when trying
1360 * to import a specific pool.
1361 */
1362nvlist_t *
1363zpool_find_import_cached(libzfs_handle_t *hdl, const char *cachefile,
1364 char *poolname, uint64_t guid)
1365{
1366 char *buf;
1367 int fd;
1368 struct stat64 statbuf;
1369 nvlist_t *raw, *src, *dst;
1370 nvlist_t *pools;
1371 nvpair_t *elem;
1372 char *name;
1373 uint64_t this_guid;
1374 boolean_t active;
1375
1376 verify(poolname == NULL || guid == 0);
1377
1378 if ((fd = open(cachefile, O_RDONLY)) < 0) {
1379 zfs_error_aux(hdl, "%s", strerror(errno));
1380 (void) zfs_error(hdl, EZFS_BADCACHE,
1381 dgettext(TEXT_DOMAIN, "failed to open cache file"));
1382 return (NULL);
1383 }
1384
1385 if (fstat64(fd, &statbuf) != 0) {
1386 zfs_error_aux(hdl, "%s", strerror(errno));
1387 (void) close(fd);
1388 (void) zfs_error(hdl, EZFS_BADCACHE,
1389 dgettext(TEXT_DOMAIN, "failed to get size of cache file"));
1390 return (NULL);
1391 }
1392
1393 if ((buf = zfs_alloc(hdl, statbuf.st_size)) == NULL) {
1394 (void) close(fd);
1395 return (NULL);
1396 }
1397
1398 if (read(fd, buf, statbuf.st_size) != statbuf.st_size) {
1399 (void) close(fd);
1400 free(buf);
1401 (void) zfs_error(hdl, EZFS_BADCACHE,
1402 dgettext(TEXT_DOMAIN,
1403 "failed to read cache file contents"));
1404 return (NULL);
1405 }
1406
1407 (void) close(fd);
1408
1409 if (nvlist_unpack(buf, statbuf.st_size, &raw, 0) != 0) {
1410 free(buf);
1411 (void) zfs_error(hdl, EZFS_BADCACHE,
1412 dgettext(TEXT_DOMAIN,
1413 "invalid or corrupt cache file contents"));
1414 return (NULL);
1415 }
1416
1417 free(buf);
1418
1419 /*
1420 * Go through and get the current state of the pools and refresh their
1421 * state.
1422 */
1423 if (nvlist_alloc(&pools, 0, 0) != 0) {
1424 (void) no_memory(hdl);
1425 nvlist_free(raw);
1426 return (NULL);
1427 }
1428
1429 elem = NULL;
1430 while ((elem = nvlist_next_nvpair(raw, elem)) != NULL) {
1431 verify(nvpair_value_nvlist(elem, &src) == 0);
1432
1433 verify(nvlist_lookup_string(src, ZPOOL_CONFIG_POOL_NAME,
1434 &name) == 0);
1435 if (poolname != NULL && strcmp(poolname, name) != 0)
1436 continue;
1437
1438 verify(nvlist_lookup_uint64(src, ZPOOL_CONFIG_POOL_GUID,
1439 &this_guid) == 0);
1440 if (guid != 0) {
1441 verify(nvlist_lookup_uint64(src, ZPOOL_CONFIG_POOL_GUID,
1442 &this_guid) == 0);
1443 if (guid != this_guid)
1444 continue;
1445 }
1446
1447 if (pool_active(hdl, name, this_guid, &active) != 0) {
1448 nvlist_free(raw);
1449 nvlist_free(pools);
1450 return (NULL);
1451 }
1452
1453 if (active)
1454 continue;
1455
1456 if ((dst = refresh_config(hdl, src)) == NULL) {
1457 nvlist_free(raw);
1458 nvlist_free(pools);
1459 return (NULL);
1460 }
1461
1462 if (nvlist_add_nvlist(pools, nvpair_name(elem), dst) != 0) {
1463 (void) no_memory(hdl);
1464 nvlist_free(dst);
1465 nvlist_free(raw);
1466 nvlist_free(pools);
1467 return (NULL);
1468 }
1469 nvlist_free(dst);
1470 }
1471
1472 nvlist_free(raw);
1473 return (pools);
1474}
1475
1476static int
1477name_or_guid_exists(zpool_handle_t *zhp, void *data)
1478{
1479 importargs_t *import = data;
1480 int found = 0;
1481
1482 if (import->poolname != NULL) {
1483 char *pool_name;
1484
1485 verify(nvlist_lookup_string(zhp->zpool_config,
1486 ZPOOL_CONFIG_POOL_NAME, &pool_name) == 0);
1487 if (strcmp(pool_name, import->poolname) == 0)
1488 found = 1;
1489 } else {
1490 uint64_t pool_guid;
1491
1492 verify(nvlist_lookup_uint64(zhp->zpool_config,
1493 ZPOOL_CONFIG_POOL_GUID, &pool_guid) == 0);
1494 if (pool_guid == import->guid)
1495 found = 1;
1496 }
1497
1498 zpool_close(zhp);
1499 return (found);
1500}
1501
1502nvlist_t *
1503zpool_search_import(libzfs_handle_t *hdl, importargs_t *import)
1504{
1505 verify(import->poolname == NULL || import->guid == 0);
1506
1507 if (import->unique)
1508 import->exists = zpool_iter(hdl, name_or_guid_exists, import);
1509
1510 if (import->cachefile != NULL)
1511 return (zpool_find_import_cached(hdl, import->cachefile,
1512 import->poolname, import->guid));
1513
1514 return (zpool_find_import_impl(hdl, import));
1515}
1516
1517boolean_t
1518find_guid(nvlist_t *nv, uint64_t guid)
1519{
1520 uint64_t tmp;
1521 nvlist_t **child;
1522 uint_t c, children;
1523
1524 verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &tmp) == 0);
1525 if (tmp == guid)
1526 return (B_TRUE);
1527
1528 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
1529 &child, &children) == 0) {
1530 for (c = 0; c < children; c++)
1531 if (find_guid(child[c], guid))
1532 return (B_TRUE);
1533 }
1534
1535 return (B_FALSE);
1536}
1537
1538typedef struct aux_cbdata {
1539 const char *cb_type;
1540 uint64_t cb_guid;
1541 zpool_handle_t *cb_zhp;
1542} aux_cbdata_t;
1543
1544static int
1545find_aux(zpool_handle_t *zhp, void *data)
1546{
1547 aux_cbdata_t *cbp = data;
1548 nvlist_t **list;
1549 uint_t i, count;
1550 uint64_t guid;
1551 nvlist_t *nvroot;
1552
1553 verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
1554 &nvroot) == 0);
1555
1556 if (nvlist_lookup_nvlist_array(nvroot, cbp->cb_type,
1557 &list, &count) == 0) {
1558 for (i = 0; i < count; i++) {
1559 verify(nvlist_lookup_uint64(list[i],
1560 ZPOOL_CONFIG_GUID, &guid) == 0);
1561 if (guid == cbp->cb_guid) {
1562 cbp->cb_zhp = zhp;
1563 return (1);
1564 }
1565 }
1566 }
1567
1568 zpool_close(zhp);
1569 return (0);
1570}
1571
1572/*
1573 * Determines if the pool is in use. If so, it returns true and the state of
1574 * the pool as well as the name of the pool. Both strings are allocated and
1575 * must be freed by the caller.
1576 */
1577int
1578zpool_in_use(libzfs_handle_t *hdl, int fd, pool_state_t *state, char **namestr,
1579 boolean_t *inuse)
1580{
1581 nvlist_t *config;
1582 char *name;
1583 boolean_t ret;
1584 uint64_t guid, vdev_guid;
1585 zpool_handle_t *zhp;
1586 nvlist_t *pool_config;
1587 uint64_t stateval, isspare;
1588 aux_cbdata_t cb = { 0 };
1589 boolean_t isactive;
1590
1591 *inuse = B_FALSE;
1592
1593 if (zpool_read_label(fd, &config) != 0) {
1594 (void) no_memory(hdl);
1595 return (-1);
1596 }
1597
1598 if (config == NULL)
1599 return (0);
1600
1601 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
1602 &stateval) == 0);
1603 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID,
1604 &vdev_guid) == 0);
1605
1606 if (stateval != POOL_STATE_SPARE && stateval != POOL_STATE_L2CACHE) {
1607 verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
1608 &name) == 0);
1609 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
1610 &guid) == 0);
1611 }
1612
1613 switch (stateval) {
1614 case POOL_STATE_EXPORTED:
1615 /*
1616 * A pool with an exported state may in fact be imported
1617 * read-only, so check the in-core state to see if it's
1618 * active and imported read-only. If it is, set
1619 * its state to active.
1620 */
1621 if (pool_active(hdl, name, guid, &isactive) == 0 && isactive &&
1622 (zhp = zpool_open_canfail(hdl, name)) != NULL &&
1623 zpool_get_prop_int(zhp, ZPOOL_PROP_READONLY, NULL))
1624 stateval = POOL_STATE_ACTIVE;
1625
1626 ret = B_TRUE;
1627 break;
1628
1629 case POOL_STATE_ACTIVE:
1630 /*
1631 * For an active pool, we have to determine if it's really part
1632 * of a currently active pool (in which case the pool will exist
1633 * and the guid will be the same), or whether it's part of an
1634 * active pool that was disconnected without being explicitly
1635 * exported.
1636 */
1637 if (pool_active(hdl, name, guid, &isactive) != 0) {
1638 nvlist_free(config);
1639 return (-1);
1640 }
1641
1642 if (isactive) {
1643 /*
1644 * Because the device may have been removed while
1645 * offlined, we only report it as active if the vdev is
1646 * still present in the config. Otherwise, pretend like
1647 * it's not in use.
1648 */
1649 if ((zhp = zpool_open_canfail(hdl, name)) != NULL &&
1650 (pool_config = zpool_get_config(zhp, NULL))
1651 != NULL) {
1652 nvlist_t *nvroot;
1653
1654 verify(nvlist_lookup_nvlist(pool_config,
1655 ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
1656 ret = find_guid(nvroot, vdev_guid);
1657 } else {
1658 ret = B_FALSE;
1659 }
1660
1661 /*
1662 * If this is an active spare within another pool, we
1663 * treat it like an unused hot spare. This allows the
1664 * user to create a pool with a hot spare that currently
1665 * in use within another pool. Since we return B_TRUE,
1666 * libdiskmgt will continue to prevent generic consumers
1667 * from using the device.
1668 */
1669 if (ret && nvlist_lookup_uint64(config,
1670 ZPOOL_CONFIG_IS_SPARE, &isspare) == 0 && isspare)
1671 stateval = POOL_STATE_SPARE;
1672
1673 if (zhp != NULL)
1674 zpool_close(zhp);
1675 } else {
1676 stateval = POOL_STATE_POTENTIALLY_ACTIVE;
1677 ret = B_TRUE;
1678 }
1679 break;
1680
1681 case POOL_STATE_SPARE:
1682 /*
1683 * For a hot spare, it can be either definitively in use, or
1684 * potentially active. To determine if it's in use, we iterate
1685 * over all pools in the system and search for one with a spare
1686 * with a matching guid.
1687 *
1688 * Due to the shared nature of spares, we don't actually report
1689 * the potentially active case as in use. This means the user
1690 * can freely create pools on the hot spares of exported pools,
1691 * but to do otherwise makes the resulting code complicated, and
1692 * we end up having to deal with this case anyway.
1693 */
1694 cb.cb_zhp = NULL;
1695 cb.cb_guid = vdev_guid;
1696 cb.cb_type = ZPOOL_CONFIG_SPARES;
1697 if (zpool_iter(hdl, find_aux, &cb) == 1) {
1698 name = (char *)zpool_get_name(cb.cb_zhp);
1699 ret = TRUE;
1700 } else {
1701 ret = FALSE;
1702 }
1703 break;
1704
1705 case POOL_STATE_L2CACHE:
1706
1707 /*
1708 * Check if any pool is currently using this l2cache device.
1709 */
1710 cb.cb_zhp = NULL;
1711 cb.cb_guid = vdev_guid;
1712 cb.cb_type = ZPOOL_CONFIG_L2CACHE;
1713 if (zpool_iter(hdl, find_aux, &cb) == 1) {
1714 name = (char *)zpool_get_name(cb.cb_zhp);
1715 ret = TRUE;
1716 } else {
1717 ret = FALSE;
1718 }
1719 break;
1720
1721 default:
1722 ret = B_FALSE;
1723 }
1724
1725
1726 if (ret) {
1727 if ((*namestr = zfs_strdup(hdl, name)) == NULL) {
1728 if (cb.cb_zhp)
1729 zpool_close(cb.cb_zhp);
1730 nvlist_free(config);
1731 return (-1);
1732 }
1733 *state = (pool_state_t)stateval;
1734 }
1735
1736 if (cb.cb_zhp)
1737 zpool_close(cb.cb_zhp);
1738
1739 nvlist_free(config);
1740 *inuse = ret;
1741 return (0);
1742}