Deleted Added
full compact
zpool_vdev.c (168404) zpool_vdev.c (169303)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#pragma ident "%Z%%M% %I% %E% SMI"
28
29/*
30 * Functions to convert between a list of vdevs and an nvlist representing the
31 * configuration. Each entry in the list can be one of:
32 *
33 * Device vdevs
34 * disk=(path=..., devid=...)
35 * file=(path=...)
36 *
37 * Group vdevs
38 * raidz[1|2]=(...)
39 * mirror=(...)
40 *
41 * Hot spares
42 *
43 * While the underlying implementation supports it, group vdevs cannot contain
44 * other group vdevs. All userland verification of devices is contained within
45 * this file. If successful, the nvlist returned can be passed directly to the
46 * kernel; we've done as much verification as possible in userland.
47 *
48 * Hot spares are a special case, and passed down as an array of disk vdevs, at
49 * the same level as the root of the vdev tree.
50 *
51 * The only function exported by this file is 'get_vdev_spec'. The function
52 * performs several passes:
53 *
54 * 1. Construct the vdev specification. Performs syntax validation and
55 * makes sure each device is valid.
56 * 2. Check for devices in use. Using libdiskmgt, makes sure that no
57 * devices are also in use. Some can be overridden using the 'force'
58 * flag, others cannot.
59 * 3. Check for replication errors if the 'force' flag is not specified.
60 * validates that the replication level is consistent across the
61 * entire pool.
62 */
63
64#include <assert.h>
65#include <devid.h>
66#include <errno.h>
67#include <fcntl.h>
68#include <libintl.h>
69#include <libnvpair.h>
70#include <stdio.h>
71#include <string.h>
72#include <unistd.h>
73#include <paths.h>
74#include <sys/stat.h>
75#include <sys/disk.h>
76#include <sys/mntent.h>
77#include <libgeom.h>
78
79#include <libzfs.h>
80
81#include "zpool_util.h"
82
83/*
84 * For any given vdev specification, we can have multiple errors. The
85 * vdev_error() function keeps track of whether we have seen an error yet, and
86 * prints out a header if its the first error we've seen.
87 */
88boolean_t error_seen;
89boolean_t is_force;
90
91/*PRINTFLIKE1*/
92static void
93vdev_error(const char *fmt, ...)
94{
95 va_list ap;
96
97 if (!error_seen) {
98 (void) fprintf(stderr, gettext("invalid vdev specification\n"));
99 if (!is_force)
100 (void) fprintf(stderr, gettext("use '-f' to override "
101 "the following errors:\n"));
102 else
103 (void) fprintf(stderr, gettext("the following errors "
104 "must be manually repaired:\n"));
105 error_seen = B_TRUE;
106 }
107
108 va_start(ap, fmt);
109 (void) vfprintf(stderr, fmt, ap);
110 va_end(ap);
111}
112
113/*
114 * Validate a GEOM provider.
115 */
116static int
117check_provider(const char *name, boolean_t force, boolean_t isspare)
118{
119 struct gmesh mesh;
120 struct gclass *mp;
121 struct ggeom *gp;
122 struct gprovider *pp;
123 int rv;
124
125 /* XXX: What to do with isspare? */
126
127 if (strncmp(name, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0)
128 name += sizeof(_PATH_DEV) - 1;
129
130 rv = geom_gettree(&mesh);
131 assert(rv == 0);
132
133 pp = NULL;
134 LIST_FOREACH(mp, &mesh.lg_class, lg_class) {
135 LIST_FOREACH(gp, &mp->lg_geom, lg_geom) {
136 LIST_FOREACH(pp, &gp->lg_provider, lg_provider) {
137 if (strcmp(pp->lg_name, name) == 0)
138 goto out;
139 }
140 }
141 }
142out:
143 rv = -1;
144 if (pp == NULL)
145 vdev_error("no such provider %s\n", name);
146 else {
147 int acr, acw, ace;
148
149 VERIFY(sscanf(pp->lg_mode, "r%dw%de%d", &acr, &acw, &ace) == 3);
150 if (acw == 0 && ace == 0)
151 rv = 0;
152 else
153 vdev_error("%s is in use (%s)\n", name, pp->lg_mode);
154 }
155 geom_deletetree(&mesh);
156 return (rv);
157}
158
159static boolean_t
160is_provider(const char *name)
161{
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#pragma ident "%Z%%M% %I% %E% SMI"
28
29/*
30 * Functions to convert between a list of vdevs and an nvlist representing the
31 * configuration. Each entry in the list can be one of:
32 *
33 * Device vdevs
34 * disk=(path=..., devid=...)
35 * file=(path=...)
36 *
37 * Group vdevs
38 * raidz[1|2]=(...)
39 * mirror=(...)
40 *
41 * Hot spares
42 *
43 * While the underlying implementation supports it, group vdevs cannot contain
44 * other group vdevs. All userland verification of devices is contained within
45 * this file. If successful, the nvlist returned can be passed directly to the
46 * kernel; we've done as much verification as possible in userland.
47 *
48 * Hot spares are a special case, and passed down as an array of disk vdevs, at
49 * the same level as the root of the vdev tree.
50 *
51 * The only function exported by this file is 'get_vdev_spec'. The function
52 * performs several passes:
53 *
54 * 1. Construct the vdev specification. Performs syntax validation and
55 * makes sure each device is valid.
56 * 2. Check for devices in use. Using libdiskmgt, makes sure that no
57 * devices are also in use. Some can be overridden using the 'force'
58 * flag, others cannot.
59 * 3. Check for replication errors if the 'force' flag is not specified.
60 * validates that the replication level is consistent across the
61 * entire pool.
62 */
63
64#include <assert.h>
65#include <devid.h>
66#include <errno.h>
67#include <fcntl.h>
68#include <libintl.h>
69#include <libnvpair.h>
70#include <stdio.h>
71#include <string.h>
72#include <unistd.h>
73#include <paths.h>
74#include <sys/stat.h>
75#include <sys/disk.h>
76#include <sys/mntent.h>
77#include <libgeom.h>
78
79#include <libzfs.h>
80
81#include "zpool_util.h"
82
83/*
84 * For any given vdev specification, we can have multiple errors. The
85 * vdev_error() function keeps track of whether we have seen an error yet, and
86 * prints out a header if its the first error we've seen.
87 */
88boolean_t error_seen;
89boolean_t is_force;
90
91/*PRINTFLIKE1*/
92static void
93vdev_error(const char *fmt, ...)
94{
95 va_list ap;
96
97 if (!error_seen) {
98 (void) fprintf(stderr, gettext("invalid vdev specification\n"));
99 if (!is_force)
100 (void) fprintf(stderr, gettext("use '-f' to override "
101 "the following errors:\n"));
102 else
103 (void) fprintf(stderr, gettext("the following errors "
104 "must be manually repaired:\n"));
105 error_seen = B_TRUE;
106 }
107
108 va_start(ap, fmt);
109 (void) vfprintf(stderr, fmt, ap);
110 va_end(ap);
111}
112
113/*
114 * Validate a GEOM provider.
115 */
116static int
117check_provider(const char *name, boolean_t force, boolean_t isspare)
118{
119 struct gmesh mesh;
120 struct gclass *mp;
121 struct ggeom *gp;
122 struct gprovider *pp;
123 int rv;
124
125 /* XXX: What to do with isspare? */
126
127 if (strncmp(name, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0)
128 name += sizeof(_PATH_DEV) - 1;
129
130 rv = geom_gettree(&mesh);
131 assert(rv == 0);
132
133 pp = NULL;
134 LIST_FOREACH(mp, &mesh.lg_class, lg_class) {
135 LIST_FOREACH(gp, &mp->lg_geom, lg_geom) {
136 LIST_FOREACH(pp, &gp->lg_provider, lg_provider) {
137 if (strcmp(pp->lg_name, name) == 0)
138 goto out;
139 }
140 }
141 }
142out:
143 rv = -1;
144 if (pp == NULL)
145 vdev_error("no such provider %s\n", name);
146 else {
147 int acr, acw, ace;
148
149 VERIFY(sscanf(pp->lg_mode, "r%dw%de%d", &acr, &acw, &ace) == 3);
150 if (acw == 0 && ace == 0)
151 rv = 0;
152 else
153 vdev_error("%s is in use (%s)\n", name, pp->lg_mode);
154 }
155 geom_deletetree(&mesh);
156 return (rv);
157}
158
159static boolean_t
160is_provider(const char *name)
161{
162 off_t mediasize;
163 int fd;
164
162 int fd;
163
165 fd = open(name, O_RDONLY);
166 if (fd == -1)
167 return (B_FALSE);
168 if (ioctl(fd, DIOCGMEDIASIZE, &mediasize) == -1) {
169 close(fd);
170 return (B_FALSE);
164 fd = g_open(name, 0);
165 if (fd >= 0) {
166 g_close(fd);
167 return (B_TRUE);
171 }
168 }
172 close(fd);
173 return (B_TRUE);
169 return (B_FALSE);
174
175}
176/*
177 * Create a leaf vdev. Determine if this is a GEOM provider.
178 * Valid forms for a leaf vdev are:
179 *
180 * /dev/xxx Complete path to a GEOM provider
181 * xxx Shorthand for /dev/xxx
182 */
183nvlist_t *
184make_leaf_vdev(const char *arg)
185{
170
171}
172/*
173 * Create a leaf vdev. Determine if this is a GEOM provider.
174 * Valid forms for a leaf vdev are:
175 *
176 * /dev/xxx Complete path to a GEOM provider
177 * xxx Shorthand for /dev/xxx
178 */
179nvlist_t *
180make_leaf_vdev(const char *arg)
181{
186 char path[MAXPATHLEN];
182 char ident[DISK_IDENT_SIZE], path[MAXPATHLEN];
183 struct stat64 statbuf;
187 nvlist_t *vdev = NULL;
188 char *type = NULL;
184 nvlist_t *vdev = NULL;
185 char *type = NULL;
186 boolean_t wholedisk = B_FALSE;
189
190 if (strncmp(arg, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0)
191 strlcpy(path, arg, sizeof (path));
192 else
193 snprintf(path, sizeof (path), "%s%s", _PATH_DEV, arg);
194
195 if (is_provider(path))
196 type = VDEV_TYPE_DISK;
197 else {
198 (void) fprintf(stderr, gettext("cannot use '%s': must be a "
199 "GEOM provider\n"), path);
200 return (NULL);
201 }
202
203 /*
204 * Finally, we have the complete device or file, and we know that it is
205 * acceptable to use. Construct the nvlist to describe this vdev. All
206 * vdevs have a 'path' element, and devices also have a 'devid' element.
207 */
208 verify(nvlist_alloc(&vdev, NV_UNIQUE_NAME, 0) == 0);
209 verify(nvlist_add_string(vdev, ZPOOL_CONFIG_PATH, path) == 0);
210 verify(nvlist_add_string(vdev, ZPOOL_CONFIG_TYPE, type) == 0);
211 if (strcmp(type, VDEV_TYPE_DISK) == 0)
212 verify(nvlist_add_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK,
213 (uint64_t)B_FALSE) == 0);
214
187
188 if (strncmp(arg, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0)
189 strlcpy(path, arg, sizeof (path));
190 else
191 snprintf(path, sizeof (path), "%s%s", _PATH_DEV, arg);
192
193 if (is_provider(path))
194 type = VDEV_TYPE_DISK;
195 else {
196 (void) fprintf(stderr, gettext("cannot use '%s': must be a "
197 "GEOM provider\n"), path);
198 return (NULL);
199 }
200
201 /*
202 * Finally, we have the complete device or file, and we know that it is
203 * acceptable to use. Construct the nvlist to describe this vdev. All
204 * vdevs have a 'path' element, and devices also have a 'devid' element.
205 */
206 verify(nvlist_alloc(&vdev, NV_UNIQUE_NAME, 0) == 0);
207 verify(nvlist_add_string(vdev, ZPOOL_CONFIG_PATH, path) == 0);
208 verify(nvlist_add_string(vdev, ZPOOL_CONFIG_TYPE, type) == 0);
209 if (strcmp(type, VDEV_TYPE_DISK) == 0)
210 verify(nvlist_add_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK,
211 (uint64_t)B_FALSE) == 0);
212
213 /*
214 * For a whole disk, defer getting its devid until after labeling it.
215 */
216 if (1 || (S_ISBLK(statbuf.st_mode) && !wholedisk)) {
217 /*
218 * Get the devid for the device.
219 */
220 int fd;
221 ddi_devid_t devid;
222 char *minor = NULL, *devid_str = NULL;
223
224 if ((fd = open(path, O_RDONLY)) < 0) {
225 (void) fprintf(stderr, gettext("cannot open '%s': "
226 "%s\n"), path, strerror(errno));
227 nvlist_free(vdev);
228 return (NULL);
229 }
230
231 if (devid_get(fd, &devid) == 0) {
232 if (devid_get_minor_name(fd, &minor) == 0 &&
233 (devid_str = devid_str_encode(devid, minor)) !=
234 NULL) {
235 verify(nvlist_add_string(vdev,
236 ZPOOL_CONFIG_DEVID, devid_str) == 0);
237 }
238 if (devid_str != NULL)
239 devid_str_free(devid_str);
240 if (minor != NULL)
241 devid_str_free(minor);
242 devid_free(devid);
243 }
244
245 (void) close(fd);
246 }
247
215 return (vdev);
216}
217
218/*
219 * Go through and verify the replication level of the pool is consistent.
220 * Performs the following checks:
221 *
222 * For the new spec, verifies that devices in mirrors and raidz are the
223 * same size.
224 *
225 * If the current configuration already has inconsistent replication
226 * levels, ignore any other potential problems in the new spec.
227 *
228 * Otherwise, make sure that the current spec (if there is one) and the new
229 * spec have consistent replication levels.
230 */
231typedef struct replication_level {
232 char *zprl_type;
233 uint64_t zprl_children;
234 uint64_t zprl_parity;
235} replication_level_t;
236
237/*
238 * Given a list of toplevel vdevs, return the current replication level. If
239 * the config is inconsistent, then NULL is returned. If 'fatal' is set, then
240 * an error message will be displayed for each self-inconsistent vdev.
241 */
242replication_level_t *
243get_replication(nvlist_t *nvroot, boolean_t fatal)
244{
245 nvlist_t **top;
246 uint_t t, toplevels;
247 nvlist_t **child;
248 uint_t c, children;
249 nvlist_t *nv;
250 char *type;
251 replication_level_t lastrep, rep, *ret;
252 boolean_t dontreport;
253
254 ret = safe_malloc(sizeof (replication_level_t));
255
256 verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
257 &top, &toplevels) == 0);
258
259 lastrep.zprl_type = NULL;
260 for (t = 0; t < toplevels; t++) {
261 nv = top[t];
262
263 verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
264
265 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
266 &child, &children) != 0) {
267 /*
268 * This is a 'file' or 'disk' vdev.
269 */
270 rep.zprl_type = type;
271 rep.zprl_children = 1;
272 rep.zprl_parity = 0;
273 } else {
274 uint64_t vdev_size;
275
276 /*
277 * This is a mirror or RAID-Z vdev. Go through and make
278 * sure the contents are all the same (files vs. disks),
279 * keeping track of the number of elements in the
280 * process.
281 *
282 * We also check that the size of each vdev (if it can
283 * be determined) is the same.
284 */
285 rep.zprl_type = type;
286 rep.zprl_children = 0;
287
288 if (strcmp(type, VDEV_TYPE_RAIDZ) == 0) {
289 verify(nvlist_lookup_uint64(nv,
290 ZPOOL_CONFIG_NPARITY,
291 &rep.zprl_parity) == 0);
292 assert(rep.zprl_parity != 0);
293 } else {
294 rep.zprl_parity = 0;
295 }
296
297 /*
298 * The 'dontreport' variable indicatest that we've
299 * already reported an error for this spec, so don't
300 * bother doing it again.
301 */
302 type = NULL;
303 dontreport = 0;
304 vdev_size = -1ULL;
305 for (c = 0; c < children; c++) {
306 nvlist_t *cnv = child[c];
307 char *path;
308 struct stat64 statbuf;
309 uint64_t size = -1ULL;
310 char *childtype;
311 int fd, err;
312
313 rep.zprl_children++;
314
315 verify(nvlist_lookup_string(cnv,
316 ZPOOL_CONFIG_TYPE, &childtype) == 0);
317
318 /*
319 * If this is a a replacing or spare vdev, then
320 * get the real first child of the vdev.
321 */
322 if (strcmp(childtype,
323 VDEV_TYPE_REPLACING) == 0 ||
324 strcmp(childtype, VDEV_TYPE_SPARE) == 0) {
325 nvlist_t **rchild;
326 uint_t rchildren;
327
328 verify(nvlist_lookup_nvlist_array(cnv,
329 ZPOOL_CONFIG_CHILDREN, &rchild,
330 &rchildren) == 0);
331 assert(rchildren == 2);
332 cnv = rchild[0];
333
334 verify(nvlist_lookup_string(cnv,
335 ZPOOL_CONFIG_TYPE,
336 &childtype) == 0);
337 }
338
339 verify(nvlist_lookup_string(cnv,
340 ZPOOL_CONFIG_PATH, &path) == 0);
341
342 /*
343 * If we have a raidz/mirror that combines disks
344 * with files, report it as an error.
345 */
346 if (!dontreport && type != NULL &&
347 strcmp(type, childtype) != 0) {
348 if (ret != NULL)
349 free(ret);
350 ret = NULL;
351 if (fatal)
352 vdev_error(gettext(
353 "mismatched replication "
354 "level: %s contains both "
355 "files and devices\n"),
356 rep.zprl_type);
357 else
358 return (NULL);
359 dontreport = B_TRUE;
360 }
361
362 /*
363 * According to stat(2), the value of 'st_size'
364 * is undefined for block devices and character
365 * devices. But there is no effective way to
366 * determine the real size in userland.
367 *
368 * Instead, we'll take advantage of an
369 * implementation detail of spec_size(). If the
370 * device is currently open, then we (should)
371 * return a valid size.
372 *
373 * If we still don't get a valid size (indicated
374 * by a size of 0 or MAXOFFSET_T), then ignore
375 * this device altogether.
376 */
377 if ((fd = open(path, O_RDONLY)) >= 0) {
378 err = fstat64(fd, &statbuf);
379 (void) close(fd);
380 } else {
381 err = stat64(path, &statbuf);
382 }
383
384 if (err != 0 || statbuf.st_size == 0)
385 continue;
386
387 size = statbuf.st_size;
388
389 /*
390 * Also check the size of each device. If they
391 * differ, then report an error.
392 */
393 if (!dontreport && vdev_size != -1ULL &&
394 size != vdev_size) {
395 if (ret != NULL)
396 free(ret);
397 ret = NULL;
398 if (fatal)
399 vdev_error(gettext(
400 "%s contains devices of "
401 "different sizes\n"),
402 rep.zprl_type);
403 else
404 return (NULL);
405 dontreport = B_TRUE;
406 }
407
408 type = childtype;
409 vdev_size = size;
410 }
411 }
412
413 /*
414 * At this point, we have the replication of the last toplevel
415 * vdev in 'rep'. Compare it to 'lastrep' to see if its
416 * different.
417 */
418 if (lastrep.zprl_type != NULL) {
419 if (strcmp(lastrep.zprl_type, rep.zprl_type) != 0) {
420 if (ret != NULL)
421 free(ret);
422 ret = NULL;
423 if (fatal)
424 vdev_error(gettext(
425 "mismatched replication level: "
426 "both %s and %s vdevs are "
427 "present\n"),
428 lastrep.zprl_type, rep.zprl_type);
429 else
430 return (NULL);
431 } else if (lastrep.zprl_parity != rep.zprl_parity) {
432 if (ret)
433 free(ret);
434 ret = NULL;
435 if (fatal)
436 vdev_error(gettext(
437 "mismatched replication level: "
438 "both %llu and %llu device parity "
439 "%s vdevs are present\n"),
440 lastrep.zprl_parity,
441 rep.zprl_parity,
442 rep.zprl_type);
443 else
444 return (NULL);
445 } else if (lastrep.zprl_children != rep.zprl_children) {
446 if (ret)
447 free(ret);
448 ret = NULL;
449 if (fatal)
450 vdev_error(gettext(
451 "mismatched replication level: "
452 "both %llu-way and %llu-way %s "
453 "vdevs are present\n"),
454 lastrep.zprl_children,
455 rep.zprl_children,
456 rep.zprl_type);
457 else
458 return (NULL);
459 }
460 }
461 lastrep = rep;
462 }
463
464 if (ret != NULL)
465 *ret = rep;
466
467 return (ret);
468}
469
470/*
471 * Check the replication level of the vdev spec against the current pool. Calls
472 * get_replication() to make sure the new spec is self-consistent. If the pool
473 * has a consistent replication level, then we ignore any errors. Otherwise,
474 * report any difference between the two.
475 */
476int
477check_replication(nvlist_t *config, nvlist_t *newroot)
478{
479 replication_level_t *current = NULL, *new;
480 int ret;
481
482 /*
483 * If we have a current pool configuration, check to see if it's
484 * self-consistent. If not, simply return success.
485 */
486 if (config != NULL) {
487 nvlist_t *nvroot;
488
489 verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
490 &nvroot) == 0);
491 if ((current = get_replication(nvroot, B_FALSE)) == NULL)
492 return (0);
493 }
494
495 /*
496 * Get the replication level of the new vdev spec, reporting any
497 * inconsistencies found.
498 */
499 if ((new = get_replication(newroot, B_TRUE)) == NULL) {
500 free(current);
501 return (-1);
502 }
503
504 /*
505 * Check to see if the new vdev spec matches the replication level of
506 * the current pool.
507 */
508 ret = 0;
509 if (current != NULL) {
510 if (strcmp(current->zprl_type, new->zprl_type) != 0) {
511 vdev_error(gettext(
512 "mismatched replication level: pool uses %s "
513 "and new vdev is %s\n"),
514 current->zprl_type, new->zprl_type);
515 ret = -1;
516 } else if (current->zprl_parity != new->zprl_parity) {
517 vdev_error(gettext(
518 "mismatched replication level: pool uses %llu "
519 "device parity and new vdev uses %llu\n"),
520 current->zprl_parity, new->zprl_parity);
521 ret = -1;
522 } else if (current->zprl_children != new->zprl_children) {
523 vdev_error(gettext(
524 "mismatched replication level: pool uses %llu-way "
525 "%s and new vdev uses %llu-way %s\n"),
526 current->zprl_children, current->zprl_type,
527 new->zprl_children, new->zprl_type);
528 ret = -1;
529 }
530 }
531
532 free(new);
533 if (current != NULL)
534 free(current);
535
536 return (ret);
537}
538
539/*
540 * Determine if the given path is a hot spare within the given configuration.
541 */
542static boolean_t
543is_spare(nvlist_t *config, const char *path)
544{
545 int fd;
546 pool_state_t state;
547 char *name = NULL;
548 nvlist_t *label;
549 uint64_t guid, spareguid;
550 nvlist_t *nvroot;
551 nvlist_t **spares;
552 uint_t i, nspares;
553 boolean_t inuse;
554
555 if ((fd = open(path, O_RDONLY)) < 0)
556 return (B_FALSE);
557
558 if (zpool_in_use(g_zfs, fd, &state, &name, &inuse) != 0 ||
559 !inuse ||
560 state != POOL_STATE_SPARE ||
561 zpool_read_label(fd, &label) != 0) {
562 free(name);
563 (void) close(fd);
564 return (B_FALSE);
565 }
566 free(name);
567
568 (void) close(fd);
569 verify(nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, &guid) == 0);
570 nvlist_free(label);
571
572 verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
573 &nvroot) == 0);
574 if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
575 &spares, &nspares) == 0) {
576 for (i = 0; i < nspares; i++) {
577 verify(nvlist_lookup_uint64(spares[i],
578 ZPOOL_CONFIG_GUID, &spareguid) == 0);
579 if (spareguid == guid)
580 return (B_TRUE);
581 }
582 }
583
584 return (B_FALSE);
585}
586
587/*
588 * Go through and find any devices that are in use. We rely on libdiskmgt for
589 * the majority of this task.
590 */
591int
592check_in_use(nvlist_t *config, nvlist_t *nv, int force, int isreplacing,
593 int isspare)
594{
595 nvlist_t **child;
596 uint_t c, children;
597 char *type, *path;
598 int ret;
599 char buf[MAXPATHLEN];
600 uint64_t wholedisk;
601
602 verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
603
604 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
605 &child, &children) != 0) {
606
607 verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0);
608
609 /*
610 * As a generic check, we look to see if this is a replace of a
611 * hot spare within the same pool. If so, we allow it
612 * regardless of what libdiskmgt or zpool_in_use() says.
613 */
614 if (isreplacing) {
615 (void) strlcpy(buf, path, sizeof (buf));
616 if (is_spare(config, buf))
617 return (0);
618 }
619
620 if (strcmp(type, VDEV_TYPE_DISK) == 0)
621 ret = check_provider(path, force, isspare);
622
623 return (ret);
624 }
625
626 for (c = 0; c < children; c++)
627 if ((ret = check_in_use(config, child[c], force,
628 isreplacing, B_FALSE)) != 0)
629 return (ret);
630
631 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
632 &child, &children) == 0)
633 for (c = 0; c < children; c++)
634 if ((ret = check_in_use(config, child[c], force,
635 isreplacing, B_TRUE)) != 0)
636 return (ret);
637
638 return (0);
639}
640
641const char *
642is_grouping(const char *type, int *mindev)
643{
644 if (strcmp(type, "raidz") == 0 || strcmp(type, "raidz1") == 0) {
645 if (mindev != NULL)
646 *mindev = 2;
647 return (VDEV_TYPE_RAIDZ);
648 }
649
650 if (strcmp(type, "raidz2") == 0) {
651 if (mindev != NULL)
652 *mindev = 3;
653 return (VDEV_TYPE_RAIDZ);
654 }
655
656 if (strcmp(type, "mirror") == 0) {
657 if (mindev != NULL)
658 *mindev = 2;
659 return (VDEV_TYPE_MIRROR);
660 }
661
662 if (strcmp(type, "spare") == 0) {
663 if (mindev != NULL)
664 *mindev = 1;
665 return (VDEV_TYPE_SPARE);
666 }
667
668 return (NULL);
669}
670
671/*
672 * Construct a syntactically valid vdev specification,
673 * and ensure that all devices and files exist and can be opened.
674 * Note: we don't bother freeing anything in the error paths
675 * because the program is just going to exit anyway.
676 */
677nvlist_t *
678construct_spec(int argc, char **argv)
679{
680 nvlist_t *nvroot, *nv, **top, **spares;
681 int t, toplevels, mindev, nspares;
682 const char *type;
683
684 top = NULL;
685 toplevels = 0;
686 spares = NULL;
687 nspares = 0;
688
689 while (argc > 0) {
690 nv = NULL;
691
692 /*
693 * If it's a mirror or raidz, the subsequent arguments are
694 * its leaves -- until we encounter the next mirror or raidz.
695 */
696 if ((type = is_grouping(argv[0], &mindev)) != NULL) {
697 nvlist_t **child = NULL;
698 int c, children = 0;
699
700 if (strcmp(type, VDEV_TYPE_SPARE) == 0 &&
701 spares != NULL) {
702 (void) fprintf(stderr, gettext("invalid vdev "
703 "specification: 'spare' can be "
704 "specified only once\n"));
705 return (NULL);
706 }
707
708 for (c = 1; c < argc; c++) {
709 if (is_grouping(argv[c], NULL) != NULL)
710 break;
711 children++;
712 child = realloc(child,
713 children * sizeof (nvlist_t *));
714 if (child == NULL)
715 zpool_no_memory();
716 if ((nv = make_leaf_vdev(argv[c])) == NULL)
717 return (NULL);
718 child[children - 1] = nv;
719 }
720
721 if (children < mindev) {
722 (void) fprintf(stderr, gettext("invalid vdev "
723 "specification: %s requires at least %d "
724 "devices\n"), argv[0], mindev);
725 return (NULL);
726 }
727
728 argc -= c;
729 argv += c;
730
731 if (strcmp(type, VDEV_TYPE_SPARE) == 0) {
732 spares = child;
733 nspares = children;
734 continue;
735 } else {
736 verify(nvlist_alloc(&nv, NV_UNIQUE_NAME,
737 0) == 0);
738 verify(nvlist_add_string(nv, ZPOOL_CONFIG_TYPE,
739 type) == 0);
740 if (strcmp(type, VDEV_TYPE_RAIDZ) == 0) {
741 verify(nvlist_add_uint64(nv,
742 ZPOOL_CONFIG_NPARITY,
743 mindev - 1) == 0);
744 }
745 verify(nvlist_add_nvlist_array(nv,
746 ZPOOL_CONFIG_CHILDREN, child,
747 children) == 0);
748
749 for (c = 0; c < children; c++)
750 nvlist_free(child[c]);
751 free(child);
752 }
753 } else {
754 /*
755 * We have a device. Pass off to make_leaf_vdev() to
756 * construct the appropriate nvlist describing the vdev.
757 */
758 if ((nv = make_leaf_vdev(argv[0])) == NULL)
759 return (NULL);
760 argc--;
761 argv++;
762 }
763
764 toplevels++;
765 top = realloc(top, toplevels * sizeof (nvlist_t *));
766 if (top == NULL)
767 zpool_no_memory();
768 top[toplevels - 1] = nv;
769 }
770
771 if (toplevels == 0 && nspares == 0) {
772 (void) fprintf(stderr, gettext("invalid vdev "
773 "specification: at least one toplevel vdev must be "
774 "specified\n"));
775 return (NULL);
776 }
777
778 /*
779 * Finally, create nvroot and add all top-level vdevs to it.
780 */
781 verify(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) == 0);
782 verify(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE,
783 VDEV_TYPE_ROOT) == 0);
784 verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
785 top, toplevels) == 0);
786 if (nspares != 0)
787 verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
788 spares, nspares) == 0);
789
790 for (t = 0; t < toplevels; t++)
791 nvlist_free(top[t]);
792 for (t = 0; t < nspares; t++)
793 nvlist_free(spares[t]);
794 if (spares)
795 free(spares);
796 free(top);
797
798 return (nvroot);
799}
800
801/*
802 * Get and validate the contents of the given vdev specification. This ensures
803 * that the nvlist returned is well-formed, that all the devices exist, and that
804 * they are not currently in use by any other known consumer. The 'poolconfig'
805 * parameter is the current configuration of the pool when adding devices
806 * existing pool, and is used to perform additional checks, such as changing the
807 * replication level of the pool. It can be 'NULL' to indicate that this is a
808 * new pool. The 'force' flag controls whether devices should be forcefully
809 * added, even if they appear in use.
810 */
811nvlist_t *
812make_root_vdev(nvlist_t *poolconfig, int force, int check_rep,
813 boolean_t isreplacing, int argc, char **argv)
814{
815 nvlist_t *newroot;
816
817 is_force = force;
818
819 /*
820 * Construct the vdev specification. If this is successful, we know
821 * that we have a valid specification, and that all devices can be
822 * opened.
823 */
824 if ((newroot = construct_spec(argc, argv)) == NULL)
825 return (NULL);
826
827 /*
828 * Validate each device to make sure that its not shared with another
829 * subsystem. We do this even if 'force' is set, because there are some
830 * uses (such as a dedicated dump device) that even '-f' cannot
831 * override.
832 */
833 if (check_in_use(poolconfig, newroot, force, isreplacing,
834 B_FALSE) != 0) {
835 nvlist_free(newroot);
836 return (NULL);
837 }
838
839 /*
840 * Check the replication level of the given vdevs and report any errors
841 * found. We include the existing pool spec, if any, as we need to
842 * catch changes against the existing replication level.
843 */
844 if (check_rep && check_replication(poolconfig, newroot) != 0) {
845 nvlist_free(newroot);
846 return (NULL);
847 }
848
849 return (newroot);
850}
248 return (vdev);
249}
250
251/*
252 * Go through and verify the replication level of the pool is consistent.
253 * Performs the following checks:
254 *
255 * For the new spec, verifies that devices in mirrors and raidz are the
256 * same size.
257 *
258 * If the current configuration already has inconsistent replication
259 * levels, ignore any other potential problems in the new spec.
260 *
261 * Otherwise, make sure that the current spec (if there is one) and the new
262 * spec have consistent replication levels.
263 */
264typedef struct replication_level {
265 char *zprl_type;
266 uint64_t zprl_children;
267 uint64_t zprl_parity;
268} replication_level_t;
269
270/*
271 * Given a list of toplevel vdevs, return the current replication level. If
272 * the config is inconsistent, then NULL is returned. If 'fatal' is set, then
273 * an error message will be displayed for each self-inconsistent vdev.
274 */
275replication_level_t *
276get_replication(nvlist_t *nvroot, boolean_t fatal)
277{
278 nvlist_t **top;
279 uint_t t, toplevels;
280 nvlist_t **child;
281 uint_t c, children;
282 nvlist_t *nv;
283 char *type;
284 replication_level_t lastrep, rep, *ret;
285 boolean_t dontreport;
286
287 ret = safe_malloc(sizeof (replication_level_t));
288
289 verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
290 &top, &toplevels) == 0);
291
292 lastrep.zprl_type = NULL;
293 for (t = 0; t < toplevels; t++) {
294 nv = top[t];
295
296 verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
297
298 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
299 &child, &children) != 0) {
300 /*
301 * This is a 'file' or 'disk' vdev.
302 */
303 rep.zprl_type = type;
304 rep.zprl_children = 1;
305 rep.zprl_parity = 0;
306 } else {
307 uint64_t vdev_size;
308
309 /*
310 * This is a mirror or RAID-Z vdev. Go through and make
311 * sure the contents are all the same (files vs. disks),
312 * keeping track of the number of elements in the
313 * process.
314 *
315 * We also check that the size of each vdev (if it can
316 * be determined) is the same.
317 */
318 rep.zprl_type = type;
319 rep.zprl_children = 0;
320
321 if (strcmp(type, VDEV_TYPE_RAIDZ) == 0) {
322 verify(nvlist_lookup_uint64(nv,
323 ZPOOL_CONFIG_NPARITY,
324 &rep.zprl_parity) == 0);
325 assert(rep.zprl_parity != 0);
326 } else {
327 rep.zprl_parity = 0;
328 }
329
330 /*
331 * The 'dontreport' variable indicatest that we've
332 * already reported an error for this spec, so don't
333 * bother doing it again.
334 */
335 type = NULL;
336 dontreport = 0;
337 vdev_size = -1ULL;
338 for (c = 0; c < children; c++) {
339 nvlist_t *cnv = child[c];
340 char *path;
341 struct stat64 statbuf;
342 uint64_t size = -1ULL;
343 char *childtype;
344 int fd, err;
345
346 rep.zprl_children++;
347
348 verify(nvlist_lookup_string(cnv,
349 ZPOOL_CONFIG_TYPE, &childtype) == 0);
350
351 /*
352 * If this is a a replacing or spare vdev, then
353 * get the real first child of the vdev.
354 */
355 if (strcmp(childtype,
356 VDEV_TYPE_REPLACING) == 0 ||
357 strcmp(childtype, VDEV_TYPE_SPARE) == 0) {
358 nvlist_t **rchild;
359 uint_t rchildren;
360
361 verify(nvlist_lookup_nvlist_array(cnv,
362 ZPOOL_CONFIG_CHILDREN, &rchild,
363 &rchildren) == 0);
364 assert(rchildren == 2);
365 cnv = rchild[0];
366
367 verify(nvlist_lookup_string(cnv,
368 ZPOOL_CONFIG_TYPE,
369 &childtype) == 0);
370 }
371
372 verify(nvlist_lookup_string(cnv,
373 ZPOOL_CONFIG_PATH, &path) == 0);
374
375 /*
376 * If we have a raidz/mirror that combines disks
377 * with files, report it as an error.
378 */
379 if (!dontreport && type != NULL &&
380 strcmp(type, childtype) != 0) {
381 if (ret != NULL)
382 free(ret);
383 ret = NULL;
384 if (fatal)
385 vdev_error(gettext(
386 "mismatched replication "
387 "level: %s contains both "
388 "files and devices\n"),
389 rep.zprl_type);
390 else
391 return (NULL);
392 dontreport = B_TRUE;
393 }
394
395 /*
396 * According to stat(2), the value of 'st_size'
397 * is undefined for block devices and character
398 * devices. But there is no effective way to
399 * determine the real size in userland.
400 *
401 * Instead, we'll take advantage of an
402 * implementation detail of spec_size(). If the
403 * device is currently open, then we (should)
404 * return a valid size.
405 *
406 * If we still don't get a valid size (indicated
407 * by a size of 0 or MAXOFFSET_T), then ignore
408 * this device altogether.
409 */
410 if ((fd = open(path, O_RDONLY)) >= 0) {
411 err = fstat64(fd, &statbuf);
412 (void) close(fd);
413 } else {
414 err = stat64(path, &statbuf);
415 }
416
417 if (err != 0 || statbuf.st_size == 0)
418 continue;
419
420 size = statbuf.st_size;
421
422 /*
423 * Also check the size of each device. If they
424 * differ, then report an error.
425 */
426 if (!dontreport && vdev_size != -1ULL &&
427 size != vdev_size) {
428 if (ret != NULL)
429 free(ret);
430 ret = NULL;
431 if (fatal)
432 vdev_error(gettext(
433 "%s contains devices of "
434 "different sizes\n"),
435 rep.zprl_type);
436 else
437 return (NULL);
438 dontreport = B_TRUE;
439 }
440
441 type = childtype;
442 vdev_size = size;
443 }
444 }
445
446 /*
447 * At this point, we have the replication of the last toplevel
448 * vdev in 'rep'. Compare it to 'lastrep' to see if its
449 * different.
450 */
451 if (lastrep.zprl_type != NULL) {
452 if (strcmp(lastrep.zprl_type, rep.zprl_type) != 0) {
453 if (ret != NULL)
454 free(ret);
455 ret = NULL;
456 if (fatal)
457 vdev_error(gettext(
458 "mismatched replication level: "
459 "both %s and %s vdevs are "
460 "present\n"),
461 lastrep.zprl_type, rep.zprl_type);
462 else
463 return (NULL);
464 } else if (lastrep.zprl_parity != rep.zprl_parity) {
465 if (ret)
466 free(ret);
467 ret = NULL;
468 if (fatal)
469 vdev_error(gettext(
470 "mismatched replication level: "
471 "both %llu and %llu device parity "
472 "%s vdevs are present\n"),
473 lastrep.zprl_parity,
474 rep.zprl_parity,
475 rep.zprl_type);
476 else
477 return (NULL);
478 } else if (lastrep.zprl_children != rep.zprl_children) {
479 if (ret)
480 free(ret);
481 ret = NULL;
482 if (fatal)
483 vdev_error(gettext(
484 "mismatched replication level: "
485 "both %llu-way and %llu-way %s "
486 "vdevs are present\n"),
487 lastrep.zprl_children,
488 rep.zprl_children,
489 rep.zprl_type);
490 else
491 return (NULL);
492 }
493 }
494 lastrep = rep;
495 }
496
497 if (ret != NULL)
498 *ret = rep;
499
500 return (ret);
501}
502
503/*
504 * Check the replication level of the vdev spec against the current pool. Calls
505 * get_replication() to make sure the new spec is self-consistent. If the pool
506 * has a consistent replication level, then we ignore any errors. Otherwise,
507 * report any difference between the two.
508 */
509int
510check_replication(nvlist_t *config, nvlist_t *newroot)
511{
512 replication_level_t *current = NULL, *new;
513 int ret;
514
515 /*
516 * If we have a current pool configuration, check to see if it's
517 * self-consistent. If not, simply return success.
518 */
519 if (config != NULL) {
520 nvlist_t *nvroot;
521
522 verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
523 &nvroot) == 0);
524 if ((current = get_replication(nvroot, B_FALSE)) == NULL)
525 return (0);
526 }
527
528 /*
529 * Get the replication level of the new vdev spec, reporting any
530 * inconsistencies found.
531 */
532 if ((new = get_replication(newroot, B_TRUE)) == NULL) {
533 free(current);
534 return (-1);
535 }
536
537 /*
538 * Check to see if the new vdev spec matches the replication level of
539 * the current pool.
540 */
541 ret = 0;
542 if (current != NULL) {
543 if (strcmp(current->zprl_type, new->zprl_type) != 0) {
544 vdev_error(gettext(
545 "mismatched replication level: pool uses %s "
546 "and new vdev is %s\n"),
547 current->zprl_type, new->zprl_type);
548 ret = -1;
549 } else if (current->zprl_parity != new->zprl_parity) {
550 vdev_error(gettext(
551 "mismatched replication level: pool uses %llu "
552 "device parity and new vdev uses %llu\n"),
553 current->zprl_parity, new->zprl_parity);
554 ret = -1;
555 } else if (current->zprl_children != new->zprl_children) {
556 vdev_error(gettext(
557 "mismatched replication level: pool uses %llu-way "
558 "%s and new vdev uses %llu-way %s\n"),
559 current->zprl_children, current->zprl_type,
560 new->zprl_children, new->zprl_type);
561 ret = -1;
562 }
563 }
564
565 free(new);
566 if (current != NULL)
567 free(current);
568
569 return (ret);
570}
571
572/*
573 * Determine if the given path is a hot spare within the given configuration.
574 */
575static boolean_t
576is_spare(nvlist_t *config, const char *path)
577{
578 int fd;
579 pool_state_t state;
580 char *name = NULL;
581 nvlist_t *label;
582 uint64_t guid, spareguid;
583 nvlist_t *nvroot;
584 nvlist_t **spares;
585 uint_t i, nspares;
586 boolean_t inuse;
587
588 if ((fd = open(path, O_RDONLY)) < 0)
589 return (B_FALSE);
590
591 if (zpool_in_use(g_zfs, fd, &state, &name, &inuse) != 0 ||
592 !inuse ||
593 state != POOL_STATE_SPARE ||
594 zpool_read_label(fd, &label) != 0) {
595 free(name);
596 (void) close(fd);
597 return (B_FALSE);
598 }
599 free(name);
600
601 (void) close(fd);
602 verify(nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, &guid) == 0);
603 nvlist_free(label);
604
605 verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
606 &nvroot) == 0);
607 if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
608 &spares, &nspares) == 0) {
609 for (i = 0; i < nspares; i++) {
610 verify(nvlist_lookup_uint64(spares[i],
611 ZPOOL_CONFIG_GUID, &spareguid) == 0);
612 if (spareguid == guid)
613 return (B_TRUE);
614 }
615 }
616
617 return (B_FALSE);
618}
619
620/*
621 * Go through and find any devices that are in use. We rely on libdiskmgt for
622 * the majority of this task.
623 */
624int
625check_in_use(nvlist_t *config, nvlist_t *nv, int force, int isreplacing,
626 int isspare)
627{
628 nvlist_t **child;
629 uint_t c, children;
630 char *type, *path;
631 int ret;
632 char buf[MAXPATHLEN];
633 uint64_t wholedisk;
634
635 verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
636
637 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
638 &child, &children) != 0) {
639
640 verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0);
641
642 /*
643 * As a generic check, we look to see if this is a replace of a
644 * hot spare within the same pool. If so, we allow it
645 * regardless of what libdiskmgt or zpool_in_use() says.
646 */
647 if (isreplacing) {
648 (void) strlcpy(buf, path, sizeof (buf));
649 if (is_spare(config, buf))
650 return (0);
651 }
652
653 if (strcmp(type, VDEV_TYPE_DISK) == 0)
654 ret = check_provider(path, force, isspare);
655
656 return (ret);
657 }
658
659 for (c = 0; c < children; c++)
660 if ((ret = check_in_use(config, child[c], force,
661 isreplacing, B_FALSE)) != 0)
662 return (ret);
663
664 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
665 &child, &children) == 0)
666 for (c = 0; c < children; c++)
667 if ((ret = check_in_use(config, child[c], force,
668 isreplacing, B_TRUE)) != 0)
669 return (ret);
670
671 return (0);
672}
673
674const char *
675is_grouping(const char *type, int *mindev)
676{
677 if (strcmp(type, "raidz") == 0 || strcmp(type, "raidz1") == 0) {
678 if (mindev != NULL)
679 *mindev = 2;
680 return (VDEV_TYPE_RAIDZ);
681 }
682
683 if (strcmp(type, "raidz2") == 0) {
684 if (mindev != NULL)
685 *mindev = 3;
686 return (VDEV_TYPE_RAIDZ);
687 }
688
689 if (strcmp(type, "mirror") == 0) {
690 if (mindev != NULL)
691 *mindev = 2;
692 return (VDEV_TYPE_MIRROR);
693 }
694
695 if (strcmp(type, "spare") == 0) {
696 if (mindev != NULL)
697 *mindev = 1;
698 return (VDEV_TYPE_SPARE);
699 }
700
701 return (NULL);
702}
703
704/*
705 * Construct a syntactically valid vdev specification,
706 * and ensure that all devices and files exist and can be opened.
707 * Note: we don't bother freeing anything in the error paths
708 * because the program is just going to exit anyway.
709 */
710nvlist_t *
711construct_spec(int argc, char **argv)
712{
713 nvlist_t *nvroot, *nv, **top, **spares;
714 int t, toplevels, mindev, nspares;
715 const char *type;
716
717 top = NULL;
718 toplevels = 0;
719 spares = NULL;
720 nspares = 0;
721
722 while (argc > 0) {
723 nv = NULL;
724
725 /*
726 * If it's a mirror or raidz, the subsequent arguments are
727 * its leaves -- until we encounter the next mirror or raidz.
728 */
729 if ((type = is_grouping(argv[0], &mindev)) != NULL) {
730 nvlist_t **child = NULL;
731 int c, children = 0;
732
733 if (strcmp(type, VDEV_TYPE_SPARE) == 0 &&
734 spares != NULL) {
735 (void) fprintf(stderr, gettext("invalid vdev "
736 "specification: 'spare' can be "
737 "specified only once\n"));
738 return (NULL);
739 }
740
741 for (c = 1; c < argc; c++) {
742 if (is_grouping(argv[c], NULL) != NULL)
743 break;
744 children++;
745 child = realloc(child,
746 children * sizeof (nvlist_t *));
747 if (child == NULL)
748 zpool_no_memory();
749 if ((nv = make_leaf_vdev(argv[c])) == NULL)
750 return (NULL);
751 child[children - 1] = nv;
752 }
753
754 if (children < mindev) {
755 (void) fprintf(stderr, gettext("invalid vdev "
756 "specification: %s requires at least %d "
757 "devices\n"), argv[0], mindev);
758 return (NULL);
759 }
760
761 argc -= c;
762 argv += c;
763
764 if (strcmp(type, VDEV_TYPE_SPARE) == 0) {
765 spares = child;
766 nspares = children;
767 continue;
768 } else {
769 verify(nvlist_alloc(&nv, NV_UNIQUE_NAME,
770 0) == 0);
771 verify(nvlist_add_string(nv, ZPOOL_CONFIG_TYPE,
772 type) == 0);
773 if (strcmp(type, VDEV_TYPE_RAIDZ) == 0) {
774 verify(nvlist_add_uint64(nv,
775 ZPOOL_CONFIG_NPARITY,
776 mindev - 1) == 0);
777 }
778 verify(nvlist_add_nvlist_array(nv,
779 ZPOOL_CONFIG_CHILDREN, child,
780 children) == 0);
781
782 for (c = 0; c < children; c++)
783 nvlist_free(child[c]);
784 free(child);
785 }
786 } else {
787 /*
788 * We have a device. Pass off to make_leaf_vdev() to
789 * construct the appropriate nvlist describing the vdev.
790 */
791 if ((nv = make_leaf_vdev(argv[0])) == NULL)
792 return (NULL);
793 argc--;
794 argv++;
795 }
796
797 toplevels++;
798 top = realloc(top, toplevels * sizeof (nvlist_t *));
799 if (top == NULL)
800 zpool_no_memory();
801 top[toplevels - 1] = nv;
802 }
803
804 if (toplevels == 0 && nspares == 0) {
805 (void) fprintf(stderr, gettext("invalid vdev "
806 "specification: at least one toplevel vdev must be "
807 "specified\n"));
808 return (NULL);
809 }
810
811 /*
812 * Finally, create nvroot and add all top-level vdevs to it.
813 */
814 verify(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) == 0);
815 verify(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE,
816 VDEV_TYPE_ROOT) == 0);
817 verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
818 top, toplevels) == 0);
819 if (nspares != 0)
820 verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
821 spares, nspares) == 0);
822
823 for (t = 0; t < toplevels; t++)
824 nvlist_free(top[t]);
825 for (t = 0; t < nspares; t++)
826 nvlist_free(spares[t]);
827 if (spares)
828 free(spares);
829 free(top);
830
831 return (nvroot);
832}
833
834/*
835 * Get and validate the contents of the given vdev specification. This ensures
836 * that the nvlist returned is well-formed, that all the devices exist, and that
837 * they are not currently in use by any other known consumer. The 'poolconfig'
838 * parameter is the current configuration of the pool when adding devices
839 * existing pool, and is used to perform additional checks, such as changing the
840 * replication level of the pool. It can be 'NULL' to indicate that this is a
841 * new pool. The 'force' flag controls whether devices should be forcefully
842 * added, even if they appear in use.
843 */
844nvlist_t *
845make_root_vdev(nvlist_t *poolconfig, int force, int check_rep,
846 boolean_t isreplacing, int argc, char **argv)
847{
848 nvlist_t *newroot;
849
850 is_force = force;
851
852 /*
853 * Construct the vdev specification. If this is successful, we know
854 * that we have a valid specification, and that all devices can be
855 * opened.
856 */
857 if ((newroot = construct_spec(argc, argv)) == NULL)
858 return (NULL);
859
860 /*
861 * Validate each device to make sure that its not shared with another
862 * subsystem. We do this even if 'force' is set, because there are some
863 * uses (such as a dedicated dump device) that even '-f' cannot
864 * override.
865 */
866 if (check_in_use(poolconfig, newroot, force, isreplacing,
867 B_FALSE) != 0) {
868 nvlist_free(newroot);
869 return (NULL);
870 }
871
872 /*
873 * Check the replication level of the given vdevs and report any errors
874 * found. We include the existing pool spec, if any, as we need to
875 * catch changes against the existing replication level.
876 */
877 if (check_rep && check_replication(poolconfig, newroot) != 0) {
878 nvlist_free(newroot);
879 return (NULL);
880 }
881
882 return (newroot);
883}