1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or https://opensource.org/licenses/CDDL-1.0. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright 2015 Nexenta Systems, Inc. All rights reserved. 23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright (c) 2012, 2018 by Delphix. All rights reserved. 25 * Copyright 2015 RackTop Systems. 26 * Copyright (c) 2016, Intel Corporation. 27 */ 28 29/* 30 * Pool import support functions. 31 * 32 * Used by zpool, ztest, zdb, and zhack to locate importable configs. Since 33 * these commands are expected to run in the global zone, we can assume 34 * that the devices are all readable when called. 35 * 36 * To import a pool, we rely on reading the configuration information from the 37 * ZFS label of each device. If we successfully read the label, then we 38 * organize the configuration information in the following hierarchy: 39 * 40 * pool guid -> toplevel vdev guid -> label txg 41 * 42 * Duplicate entries matching this same tuple will be discarded. Once we have 43 * examined every device, we pick the best label txg config for each toplevel 44 * vdev. We then arrange these toplevel vdevs into a complete pool config, and 45 * update any paths that have changed. Finally, we attempt to import the pool 46 * using our derived config, and record the results. 47 */ 48 49#include <ctype.h> 50#include <dirent.h> 51#include <errno.h> 52#include <libintl.h> 53#include <libgen.h> 54#include <stddef.h> 55#include <stdlib.h> 56#include <stdio.h> 57#include <string.h> 58#include <sys/stat.h> 59#include <unistd.h> 60#include <fcntl.h> 61#include <sys/dktp/fdisk.h> 62#include <sys/vdev_impl.h> 63#include <sys/fs/zfs.h> 64 65#include <thread_pool.h> 66#include <libzutil.h> 67#include <libnvpair.h> 68#include <libzfs.h> 69 70#include "zutil_import.h" 71 72#ifdef HAVE_LIBUDEV 73#include <libudev.h> 74#include <sched.h> 75#endif 76#include <blkid/blkid.h> 77 78#define DEV_BYID_PATH "/dev/disk/by-id/" 79 80/* 81 * Skip devices with well known prefixes: 82 * there can be side effects when opening devices which need to be avoided. 83 * 84 * hpet - High Precision Event Timer 85 * watchdog[N] - Watchdog must be closed in a special way. 86 */ 87static boolean_t 88should_skip_dev(const char *dev) 89{ 90 return ((strcmp(dev, "watchdog") == 0) || 91 (strncmp(dev, "watchdog", 8) == 0 && isdigit(dev[8])) || 92 (strcmp(dev, "hpet") == 0)); 93} 94 95int 96zfs_dev_flush(int fd) 97{ 98 return (ioctl(fd, BLKFLSBUF)); 99} 100 101void 102zpool_open_func(void *arg) 103{ 104 rdsk_node_t *rn = arg; 105 libpc_handle_t *hdl = rn->rn_hdl; 106 struct stat64 statbuf; 107 nvlist_t *config; 108 uint64_t vdev_guid = 0; 109 int error; 110 int num_labels = 0; 111 int fd; 112 113 if (should_skip_dev(zfs_basename(rn->rn_name))) 114 return; 115 116 /* 117 * Ignore failed stats. We only want regular files and block devices. 118 * Ignore files that are too small to hold a zpool. 119 */ 120 if (stat64(rn->rn_name, &statbuf) != 0 || 121 (!S_ISREG(statbuf.st_mode) && !S_ISBLK(statbuf.st_mode)) || 122 (S_ISREG(statbuf.st_mode) && statbuf.st_size < SPA_MINDEVSIZE)) 123 return; 124 125 /* 126 * Preferentially open using O_DIRECT to bypass the block device 127 * cache which may be stale for multipath devices. An EINVAL errno 128 * indicates O_DIRECT is unsupported so fallback to just O_RDONLY. 129 */ 130 fd = open(rn->rn_name, O_RDONLY | O_DIRECT | O_CLOEXEC); 131 if ((fd < 0) && (errno == EINVAL)) 132 fd = open(rn->rn_name, O_RDONLY | O_CLOEXEC); 133 if ((fd < 0) && (errno == EACCES)) 134 hdl->lpc_open_access_error = B_TRUE; 135 if (fd < 0) 136 return; 137 138 error = zpool_read_label(fd, &config, &num_labels); 139 if (error != 0) { 140 (void) close(fd); 141 return; 142 } 143 144 if (num_labels == 0) { 145 (void) close(fd); 146 nvlist_free(config); 147 return; 148 } 149 150 /* 151 * Check that the vdev is for the expected guid. Additional entries 152 * are speculatively added based on the paths stored in the labels. 153 * Entries with valid paths but incorrect guids must be removed. 154 */ 155 error = nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid); 156 if (error || (rn->rn_vdev_guid && rn->rn_vdev_guid != vdev_guid)) { 157 (void) close(fd); 158 nvlist_free(config); 159 return; 160 } 161 162 (void) close(fd); 163 164 rn->rn_config = config; 165 rn->rn_num_labels = num_labels; 166 167 /* 168 * Add additional entries for paths described by this label. 169 */ 170 if (rn->rn_labelpaths) { 171 const char *path = NULL; 172 const char *devid = NULL; 173 rdsk_node_t *slice; 174 avl_index_t where; 175 int error; 176 177 if (label_paths(rn->rn_hdl, rn->rn_config, &path, &devid)) 178 return; 179 180 /* 181 * Allow devlinks to stabilize so all paths are available. 182 */ 183 zpool_disk_wait(rn->rn_name); 184 185 if (path != NULL) { 186 slice = zutil_alloc(hdl, sizeof (rdsk_node_t)); 187 slice->rn_name = zutil_strdup(hdl, path); 188 slice->rn_vdev_guid = vdev_guid; 189 slice->rn_avl = rn->rn_avl; 190 slice->rn_hdl = hdl; 191 slice->rn_order = IMPORT_ORDER_PREFERRED_1; 192 slice->rn_labelpaths = B_FALSE; 193 pthread_mutex_lock(rn->rn_lock); 194 if (avl_find(rn->rn_avl, slice, &where)) { 195 pthread_mutex_unlock(rn->rn_lock); 196 free(slice->rn_name); 197 free(slice); 198 } else { 199 avl_insert(rn->rn_avl, slice, where); 200 pthread_mutex_unlock(rn->rn_lock); 201 zpool_open_func(slice); 202 } 203 } 204 205 if (devid != NULL) { 206 slice = zutil_alloc(hdl, sizeof (rdsk_node_t)); 207 error = asprintf(&slice->rn_name, "%s%s", 208 DEV_BYID_PATH, devid); 209 if (error == -1) { 210 free(slice); 211 return; 212 } 213 214 slice->rn_vdev_guid = vdev_guid; 215 slice->rn_avl = rn->rn_avl; 216 slice->rn_hdl = hdl; 217 slice->rn_order = IMPORT_ORDER_PREFERRED_2; 218 slice->rn_labelpaths = B_FALSE; 219 pthread_mutex_lock(rn->rn_lock); 220 if (avl_find(rn->rn_avl, slice, &where)) { 221 pthread_mutex_unlock(rn->rn_lock); 222 free(slice->rn_name); 223 free(slice); 224 } else { 225 avl_insert(rn->rn_avl, slice, where); 226 pthread_mutex_unlock(rn->rn_lock); 227 zpool_open_func(slice); 228 } 229 } 230 } 231} 232 233static const char * const 234zpool_default_import_path[] = { 235 "/dev/disk/by-vdev", /* Custom rules, use first if they exist */ 236 "/dev/mapper", /* Use multipath devices before components */ 237 "/dev/disk/by-partlabel", /* Single unique entry set by user */ 238 "/dev/disk/by-partuuid", /* Generated partition uuid */ 239 "/dev/disk/by-label", /* Custom persistent labels */ 240 "/dev/disk/by-uuid", /* Single unique entry and persistent */ 241 "/dev/disk/by-id", /* May be multiple entries and persistent */ 242 "/dev/disk/by-path", /* Encodes physical location and persistent */ 243 "/dev" /* UNSAFE device names will change */ 244}; 245 246const char * const * 247zpool_default_search_paths(size_t *count) 248{ 249 *count = ARRAY_SIZE(zpool_default_import_path); 250 return (zpool_default_import_path); 251} 252 253/* 254 * Given a full path to a device determine if that device appears in the 255 * import search path. If it does return the first match and store the 256 * index in the passed 'order' variable, otherwise return an error. 257 */ 258static int 259zfs_path_order(const char *name, int *order) 260{ 261 const char *env = getenv("ZPOOL_IMPORT_PATH"); 262 263 if (env) { 264 for (int i = 0; ; ++i) { 265 env += strspn(env, ":"); 266 size_t dirlen = strcspn(env, ":"); 267 if (dirlen) { 268 if (strncmp(name, env, dirlen) == 0) { 269 *order = i; 270 return (0); 271 } 272 273 env += dirlen; 274 } else 275 break; 276 } 277 } else { 278 for (int i = 0; i < ARRAY_SIZE(zpool_default_import_path); 279 ++i) { 280 if (strncmp(name, zpool_default_import_path[i], 281 strlen(zpool_default_import_path[i])) == 0) { 282 *order = i; 283 return (0); 284 } 285 } 286 } 287 288 return (ENOENT); 289} 290 291/* 292 * Use libblkid to quickly enumerate all known zfs devices. 293 */ 294int 295zpool_find_import_blkid(libpc_handle_t *hdl, pthread_mutex_t *lock, 296 avl_tree_t **slice_cache) 297{ 298 rdsk_node_t *slice; 299 blkid_cache cache; 300 blkid_dev_iterate iter; 301 blkid_dev dev; 302 avl_index_t where; 303 int error; 304 305 *slice_cache = NULL; 306 307 error = blkid_get_cache(&cache, NULL); 308 if (error != 0) 309 return (error); 310 311 error = blkid_probe_all_new(cache); 312 if (error != 0) { 313 blkid_put_cache(cache); 314 return (error); 315 } 316 317 iter = blkid_dev_iterate_begin(cache); 318 if (iter == NULL) { 319 blkid_put_cache(cache); 320 return (EINVAL); 321 } 322 323 /* Only const char *s since 2.32 */ 324 error = blkid_dev_set_search(iter, 325 (char *)"TYPE", (char *)"zfs_member"); 326 if (error != 0) { 327 blkid_dev_iterate_end(iter); 328 blkid_put_cache(cache); 329 return (error); 330 } 331 332 *slice_cache = zutil_alloc(hdl, sizeof (avl_tree_t)); 333 avl_create(*slice_cache, slice_cache_compare, sizeof (rdsk_node_t), 334 offsetof(rdsk_node_t, rn_node)); 335 336 while (blkid_dev_next(iter, &dev) == 0) { 337 slice = zutil_alloc(hdl, sizeof (rdsk_node_t)); 338 slice->rn_name = zutil_strdup(hdl, blkid_dev_devname(dev)); 339 slice->rn_vdev_guid = 0; 340 slice->rn_lock = lock; 341 slice->rn_avl = *slice_cache; 342 slice->rn_hdl = hdl; 343 slice->rn_labelpaths = B_TRUE; 344 345 error = zfs_path_order(slice->rn_name, &slice->rn_order); 346 if (error == 0) 347 slice->rn_order += IMPORT_ORDER_SCAN_OFFSET; 348 else 349 slice->rn_order = IMPORT_ORDER_DEFAULT; 350 351 pthread_mutex_lock(lock); 352 if (avl_find(*slice_cache, slice, &where)) { 353 free(slice->rn_name); 354 free(slice); 355 } else { 356 avl_insert(*slice_cache, slice, where); 357 } 358 pthread_mutex_unlock(lock); 359 } 360 361 blkid_dev_iterate_end(iter); 362 blkid_put_cache(cache); 363 364 return (0); 365} 366 367/* 368 * Linux persistent device strings for vdev labels 369 * 370 * based on libudev for consistency with libudev disk add/remove events 371 */ 372 373typedef struct vdev_dev_strs { 374 char vds_devid[128]; 375 char vds_devphys[128]; 376} vdev_dev_strs_t; 377 378#ifdef HAVE_LIBUDEV 379 380/* 381 * Obtain the persistent device id string (describes what) 382 * 383 * used by ZED vdev matching for auto-{online,expand,replace} 384 */ 385int 386zfs_device_get_devid(struct udev_device *dev, char *bufptr, size_t buflen) 387{ 388 struct udev_list_entry *entry; 389 const char *bus; 390 char devbyid[MAXPATHLEN]; 391 392 /* The bus based by-id path is preferred */ 393 bus = udev_device_get_property_value(dev, "ID_BUS"); 394 395 if (bus == NULL) { 396 const char *dm_uuid; 397 398 /* 399 * For multipath nodes use the persistent uuid based identifier 400 * 401 * Example: /dev/disk/by-id/dm-uuid-mpath-35000c5006304de3f 402 */ 403 dm_uuid = udev_device_get_property_value(dev, "DM_UUID"); 404 if (dm_uuid != NULL) { 405 (void) snprintf(bufptr, buflen, "dm-uuid-%s", dm_uuid); 406 return (0); 407 } 408 409 /* 410 * For volumes use the persistent /dev/zvol/dataset identifier 411 */ 412 entry = udev_device_get_devlinks_list_entry(dev); 413 while (entry != NULL) { 414 const char *name; 415 416 name = udev_list_entry_get_name(entry); 417 if (strncmp(name, ZVOL_ROOT, strlen(ZVOL_ROOT)) == 0) { 418 (void) strlcpy(bufptr, name, buflen); 419 return (0); 420 } 421 entry = udev_list_entry_get_next(entry); 422 } 423 424 /* 425 * NVME 'by-id' symlinks are similar to bus case 426 */ 427 struct udev_device *parent; 428 429 parent = udev_device_get_parent_with_subsystem_devtype(dev, 430 "nvme", NULL); 431 if (parent != NULL) 432 bus = "nvme"; /* continue with bus symlink search */ 433 else 434 return (ENODATA); 435 } 436 437 /* 438 * locate the bus specific by-id link 439 */ 440 (void) snprintf(devbyid, sizeof (devbyid), "%s%s-", DEV_BYID_PATH, bus); 441 entry = udev_device_get_devlinks_list_entry(dev); 442 while (entry != NULL) { 443 const char *name; 444 445 name = udev_list_entry_get_name(entry); 446 if (strncmp(name, devbyid, strlen(devbyid)) == 0) { 447 name += strlen(DEV_BYID_PATH); 448 (void) strlcpy(bufptr, name, buflen); 449 return (0); 450 } 451 entry = udev_list_entry_get_next(entry); 452 } 453 454 return (ENODATA); 455} 456 457/* 458 * Obtain the persistent physical location string (describes where) 459 * 460 * used by ZED vdev matching for auto-{online,expand,replace} 461 */ 462int 463zfs_device_get_physical(struct udev_device *dev, char *bufptr, size_t buflen) 464{ 465 const char *physpath = NULL; 466 struct udev_list_entry *entry; 467 468 /* 469 * Normal disks use ID_PATH for their physical path. 470 */ 471 physpath = udev_device_get_property_value(dev, "ID_PATH"); 472 if (physpath != NULL && strlen(physpath) > 0) { 473 (void) strlcpy(bufptr, physpath, buflen); 474 return (0); 475 } 476 477 /* 478 * Device mapper devices are virtual and don't have a physical 479 * path. For them we use ID_VDEV instead, which is setup via the 480 * /etc/vdev_id.conf file. ID_VDEV provides a persistent path 481 * to a virtual device. If you don't have vdev_id.conf setup, 482 * you cannot use multipath autoreplace with device mapper. 483 */ 484 physpath = udev_device_get_property_value(dev, "ID_VDEV"); 485 if (physpath != NULL && strlen(physpath) > 0) { 486 (void) strlcpy(bufptr, physpath, buflen); 487 return (0); 488 } 489 490 /* 491 * For ZFS volumes use the persistent /dev/zvol/dataset identifier 492 */ 493 entry = udev_device_get_devlinks_list_entry(dev); 494 while (entry != NULL) { 495 physpath = udev_list_entry_get_name(entry); 496 if (strncmp(physpath, ZVOL_ROOT, strlen(ZVOL_ROOT)) == 0) { 497 (void) strlcpy(bufptr, physpath, buflen); 498 return (0); 499 } 500 entry = udev_list_entry_get_next(entry); 501 } 502 503 /* 504 * For all other devices fallback to using the by-uuid name. 505 */ 506 entry = udev_device_get_devlinks_list_entry(dev); 507 while (entry != NULL) { 508 physpath = udev_list_entry_get_name(entry); 509 if (strncmp(physpath, "/dev/disk/by-uuid", 17) == 0) { 510 (void) strlcpy(bufptr, physpath, buflen); 511 return (0); 512 } 513 entry = udev_list_entry_get_next(entry); 514 } 515 516 return (ENODATA); 517} 518 519/* 520 * A disk is considered a multipath whole disk when: 521 * DEVNAME key value has "dm-" 522 * DM_NAME key value has "mpath" prefix 523 * DM_UUID key exists 524 * ID_PART_TABLE_TYPE key does not exist or is not gpt 525 */ 526static boolean_t 527udev_mpath_whole_disk(struct udev_device *dev) 528{ 529 const char *devname, *type, *uuid; 530 531 devname = udev_device_get_property_value(dev, "DEVNAME"); 532 type = udev_device_get_property_value(dev, "ID_PART_TABLE_TYPE"); 533 uuid = udev_device_get_property_value(dev, "DM_UUID"); 534 535 if ((devname != NULL && strncmp(devname, "/dev/dm-", 8) == 0) && 536 ((type == NULL) || (strcmp(type, "gpt") != 0)) && 537 (uuid != NULL)) { 538 return (B_TRUE); 539 } 540 541 return (B_FALSE); 542} 543 544static int 545udev_device_is_ready(struct udev_device *dev) 546{ 547#ifdef HAVE_LIBUDEV_UDEV_DEVICE_GET_IS_INITIALIZED 548 return (udev_device_get_is_initialized(dev)); 549#else 550 /* wait for DEVLINKS property to be initialized */ 551 return (udev_device_get_property_value(dev, "DEVLINKS") != NULL); 552#endif 553} 554 555#else 556 557int 558zfs_device_get_devid(struct udev_device *dev, char *bufptr, size_t buflen) 559{ 560 (void) dev, (void) bufptr, (void) buflen; 561 return (ENODATA); 562} 563 564int 565zfs_device_get_physical(struct udev_device *dev, char *bufptr, size_t buflen) 566{ 567 (void) dev, (void) bufptr, (void) buflen; 568 return (ENODATA); 569} 570 571#endif /* HAVE_LIBUDEV */ 572 573/* 574 * Wait up to timeout_ms for udev to set up the device node. The device is 575 * considered ready when libudev determines it has been initialized, all of 576 * the device links have been verified to exist, and it has been allowed to 577 * settle. At this point the device can be accessed reliably. Depending on 578 * the complexity of the udev rules this process could take several seconds. 579 */ 580int 581zpool_label_disk_wait(const char *path, int timeout_ms) 582{ 583#ifdef HAVE_LIBUDEV 584 struct udev *udev; 585 struct udev_device *dev = NULL; 586 char nodepath[MAXPATHLEN]; 587 char *sysname = NULL; 588 int ret = ENODEV; 589 int settle_ms = 50; 590 long sleep_ms = 10; 591 hrtime_t start, settle; 592 593 if ((udev = udev_new()) == NULL) 594 return (ENXIO); 595 596 start = gethrtime(); 597 settle = 0; 598 599 do { 600 if (sysname == NULL) { 601 if (realpath(path, nodepath) != NULL) { 602 sysname = strrchr(nodepath, '/') + 1; 603 } else { 604 (void) usleep(sleep_ms * MILLISEC); 605 continue; 606 } 607 } 608 609 dev = udev_device_new_from_subsystem_sysname(udev, 610 "block", sysname); 611 if ((dev != NULL) && udev_device_is_ready(dev)) { 612 struct udev_list_entry *links, *link = NULL; 613 614 ret = 0; 615 links = udev_device_get_devlinks_list_entry(dev); 616 617 udev_list_entry_foreach(link, links) { 618 struct stat64 statbuf; 619 const char *name; 620 621 name = udev_list_entry_get_name(link); 622 errno = 0; 623 if (stat64(name, &statbuf) == 0 && errno == 0) 624 continue; 625 626 settle = 0; 627 ret = ENODEV; 628 break; 629 } 630 631 if (ret == 0) { 632 if (settle == 0) { 633 settle = gethrtime(); 634 } else if (NSEC2MSEC(gethrtime() - settle) >= 635 settle_ms) { 636 udev_device_unref(dev); 637 break; 638 } 639 } 640 } 641 642 udev_device_unref(dev); 643 (void) usleep(sleep_ms * MILLISEC); 644 645 } while (NSEC2MSEC(gethrtime() - start) < timeout_ms); 646 647 udev_unref(udev); 648 649 return (ret); 650#else 651 int settle_ms = 50; 652 long sleep_ms = 10; 653 hrtime_t start, settle; 654 struct stat64 statbuf; 655 656 start = gethrtime(); 657 settle = 0; 658 659 do { 660 errno = 0; 661 if ((stat64(path, &statbuf) == 0) && (errno == 0)) { 662 if (settle == 0) 663 settle = gethrtime(); 664 else if (NSEC2MSEC(gethrtime() - settle) >= settle_ms) 665 return (0); 666 } else if (errno != ENOENT) { 667 return (errno); 668 } 669 670 usleep(sleep_ms * MILLISEC); 671 } while (NSEC2MSEC(gethrtime() - start) < timeout_ms); 672 673 return (ENODEV); 674#endif /* HAVE_LIBUDEV */ 675} 676 677/* 678 * Simplified version of zpool_label_disk_wait() where we wait for a device 679 * to appear using the default timeouts. 680 */ 681int 682zpool_disk_wait(const char *path) 683{ 684 int timeout; 685 timeout = zpool_getenv_int("ZPOOL_IMPORT_UDEV_TIMEOUT_MS", 686 DISK_LABEL_WAIT); 687 688 return (zpool_label_disk_wait(path, timeout)); 689} 690 691/* 692 * Encode the persistent devices strings 693 * used for the vdev disk label 694 */ 695static int 696encode_device_strings(const char *path, vdev_dev_strs_t *ds, 697 boolean_t wholedisk) 698{ 699#ifdef HAVE_LIBUDEV 700 struct udev *udev; 701 struct udev_device *dev = NULL; 702 char nodepath[MAXPATHLEN]; 703 char *sysname; 704 int ret = ENODEV; 705 hrtime_t start; 706 707 if ((udev = udev_new()) == NULL) 708 return (ENXIO); 709 710 /* resolve path to a runtime device node instance */ 711 if (realpath(path, nodepath) == NULL) 712 goto no_dev; 713 714 sysname = strrchr(nodepath, '/') + 1; 715 716 /* 717 * Wait up to 3 seconds for udev to set up the device node context 718 */ 719 start = gethrtime(); 720 do { 721 dev = udev_device_new_from_subsystem_sysname(udev, "block", 722 sysname); 723 if (dev == NULL) 724 goto no_dev; 725 if (udev_device_is_ready(dev)) 726 break; /* udev ready */ 727 728 udev_device_unref(dev); 729 dev = NULL; 730 731 if (NSEC2MSEC(gethrtime() - start) < 10) 732 (void) sched_yield(); /* yield/busy wait up to 10ms */ 733 else 734 (void) usleep(10 * MILLISEC); 735 736 } while (NSEC2MSEC(gethrtime() - start) < (3 * MILLISEC)); 737 738 if (dev == NULL) 739 goto no_dev; 740 741 /* 742 * Only whole disks require extra device strings 743 */ 744 if (!wholedisk && !udev_mpath_whole_disk(dev)) 745 goto no_dev; 746 747 ret = zfs_device_get_devid(dev, ds->vds_devid, sizeof (ds->vds_devid)); 748 if (ret != 0) 749 goto no_dev_ref; 750 751 /* physical location string (optional) */ 752 if (zfs_device_get_physical(dev, ds->vds_devphys, 753 sizeof (ds->vds_devphys)) != 0) { 754 ds->vds_devphys[0] = '\0'; /* empty string --> not available */ 755 } 756 757no_dev_ref: 758 udev_device_unref(dev); 759no_dev: 760 udev_unref(udev); 761 762 return (ret); 763#else 764 (void) path; 765 (void) ds; 766 (void) wholedisk; 767 return (ENOENT); 768#endif 769} 770 771/* 772 * Rescan the enclosure sysfs path for turning on enclosure LEDs and store it 773 * in the nvlist * (if applicable). Like: 774 * vdev_enc_sysfs_path: '/sys/class/enclosure/11:0:1:0/SLOT 4' 775 * 776 * If an old path was in the nvlist, and the rescan can not find a new path, 777 * then keep the old path, since the disk may have been removed. 778 * 779 * path: The vdev path (value from ZPOOL_CONFIG_PATH) 780 * key: The nvlist_t name (like ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH) 781 */ 782void 783update_vdev_config_dev_sysfs_path(nvlist_t *nv, const char *path, 784 const char *key) 785{ 786 char *upath, *spath; 787 const char *oldpath = NULL; 788 789 (void) nvlist_lookup_string(nv, key, &oldpath); 790 791 /* Add enclosure sysfs path (if disk is in an enclosure). */ 792 upath = zfs_get_underlying_path(path); 793 spath = zfs_get_enclosure_sysfs_path(upath); 794 795 if (spath) { 796 (void) nvlist_add_string(nv, key, spath); 797 } else { 798 /* 799 * We couldn't dynamically scan the disk's enclosure sysfs path. 800 * This could be because the disk went away. If there's an old 801 * enclosure sysfs path in the nvlist, then keep using it. 802 */ 803 if (!oldpath) { 804 (void) nvlist_remove_all(nv, key); 805 } 806 } 807 808 free(upath); 809 free(spath); 810} 811 812/* 813 * This will get called for each leaf vdev. 814 */ 815static int 816sysfs_path_pool_vdev_iter_f(void *hdl_data, nvlist_t *nv, void *data) 817{ 818 (void) hdl_data, (void) data; 819 820 const char *path = NULL; 821 if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0) 822 return (1); 823 824 /* Rescan our enclosure sysfs path for this vdev */ 825 update_vdev_config_dev_sysfs_path(nv, path, 826 ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH); 827 return (0); 828} 829 830/* 831 * Given an nvlist for our pool (with vdev tree), iterate over all the 832 * leaf vdevs and update their ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH. 833 */ 834void 835update_vdevs_config_dev_sysfs_path(nvlist_t *config) 836{ 837 nvlist_t *nvroot = NULL; 838 verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 839 &nvroot) == 0); 840 for_each_vdev_in_nvlist(nvroot, sysfs_path_pool_vdev_iter_f, NULL); 841} 842 843/* 844 * Update a leaf vdev's persistent device strings 845 * 846 * - only applies for a dedicated leaf vdev (aka whole disk) 847 * - updated during pool create|add|attach|import 848 * - used for matching device matching during auto-{online,expand,replace} 849 * - stored in a leaf disk config label (i.e. alongside 'path' NVP) 850 * - these strings are currently not used in kernel (i.e. for vdev_disk_open) 851 * 852 * single device node example: 853 * devid: 'scsi-MG03SCA300_350000494a8cb3d67-part1' 854 * phys_path: 'pci-0000:04:00.0-sas-0x50000394a8cb3d67-lun-0' 855 * 856 * multipath device node example: 857 * devid: 'dm-uuid-mpath-35000c5006304de3f' 858 * 859 * We also store the enclosure sysfs path for turning on enclosure LEDs 860 * (if applicable): 861 * vdev_enc_sysfs_path: '/sys/class/enclosure/11:0:1:0/SLOT 4' 862 */ 863void 864update_vdev_config_dev_strs(nvlist_t *nv) 865{ 866 vdev_dev_strs_t vds; 867 const char *env, *type, *path; 868 uint64_t wholedisk = 0; 869 870 /* 871 * For the benefit of legacy ZFS implementations, allow 872 * for opting out of devid strings in the vdev label. 873 * 874 * example use: 875 * env ZFS_VDEV_DEVID_OPT_OUT=YES zpool import dozer 876 * 877 * explanation: 878 * Older OpenZFS implementations had issues when attempting to 879 * display pool config VDEV names if a "devid" NVP value is 880 * present in the pool's config. 881 * 882 * For example, a pool that originated on illumos platform would 883 * have a devid value in the config and "zpool status" would fail 884 * when listing the config. 885 * 886 * A pool can be stripped of any "devid" values on import or 887 * prevented from adding them on zpool create|add by setting 888 * ZFS_VDEV_DEVID_OPT_OUT. 889 */ 890 env = getenv("ZFS_VDEV_DEVID_OPT_OUT"); 891 if (env && (strtoul(env, NULL, 0) > 0 || 892 !strncasecmp(env, "YES", 3) || !strncasecmp(env, "ON", 2))) { 893 (void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID); 894 (void) nvlist_remove_all(nv, ZPOOL_CONFIG_PHYS_PATH); 895 return; 896 } 897 898 if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0 || 899 strcmp(type, VDEV_TYPE_DISK) != 0) { 900 return; 901 } 902 if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0) 903 return; 904 (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk); 905 906 /* 907 * Update device string values in the config nvlist. 908 */ 909 if (encode_device_strings(path, &vds, (boolean_t)wholedisk) == 0) { 910 (void) nvlist_add_string(nv, ZPOOL_CONFIG_DEVID, vds.vds_devid); 911 if (vds.vds_devphys[0] != '\0') { 912 (void) nvlist_add_string(nv, ZPOOL_CONFIG_PHYS_PATH, 913 vds.vds_devphys); 914 } 915 update_vdev_config_dev_sysfs_path(nv, path, 916 ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH); 917 } else { 918 /* Clear out any stale entries. */ 919 (void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID); 920 (void) nvlist_remove_all(nv, ZPOOL_CONFIG_PHYS_PATH); 921 (void) nvlist_remove_all(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH); 922 } 923} 924