case_file.cc revision 329792
1300906Sasomers/*- 2300906Sasomers * Copyright (c) 2011, 2012, 2013, 2014, 2016 Spectra Logic Corporation 3300906Sasomers * All rights reserved. 4300906Sasomers * 5300906Sasomers * Redistribution and use in source and binary forms, with or without 6300906Sasomers * modification, are permitted provided that the following conditions 7300906Sasomers * are met: 8300906Sasomers * 1. Redistributions of source code must retain the above copyright 9300906Sasomers * notice, this list of conditions, and the following disclaimer, 10300906Sasomers * without modification. 11300906Sasomers * 2. Redistributions in binary form must reproduce at minimum a disclaimer 12300906Sasomers * substantially similar to the "NO WARRANTY" disclaimer below 13300906Sasomers * ("Disclaimer") and any redistribution must be conditioned upon 14300906Sasomers * including a substantially similar Disclaimer requirement for further 15300906Sasomers * binary redistribution. 16300906Sasomers * 17300906Sasomers * NO WARRANTY 18300906Sasomers * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19300906Sasomers * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20300906Sasomers * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR 21300906Sasomers * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22300906Sasomers * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23300906Sasomers * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24300906Sasomers * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25300906Sasomers * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 26300906Sasomers * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 27300906Sasomers * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28300906Sasomers * POSSIBILITY OF SUCH DAMAGES. 29300906Sasomers * 30300906Sasomers * Authors: Justin T. Gibbs (Spectra Logic Corporation) 31300906Sasomers */ 32300906Sasomers 33300906Sasomers/** 34300906Sasomers * \file case_file.cc 35300906Sasomers * 36300906Sasomers * We keep case files for any leaf vdev that is not in the optimal state. 37300906Sasomers * However, we only serialize to disk those events that need to be preserved 38300906Sasomers * across reboots. For now, this is just a log of soft errors which we 39300906Sasomers * accumulate in order to mark a device as degraded. 40300906Sasomers */ 41300906Sasomers#include <sys/cdefs.h> 42300906Sasomers#include <sys/time.h> 43300906Sasomers 44300906Sasomers#include <sys/fs/zfs.h> 45300906Sasomers 46300906Sasomers#include <dirent.h> 47300906Sasomers#include <iomanip> 48300906Sasomers#include <fstream> 49300906Sasomers#include <functional> 50300906Sasomers#include <sstream> 51300906Sasomers#include <syslog.h> 52300906Sasomers#include <unistd.h> 53300906Sasomers 54300906Sasomers#include <libzfs.h> 55300906Sasomers 56300906Sasomers#include <list> 57300906Sasomers#include <map> 58300906Sasomers#include <string> 59300906Sasomers 60300906Sasomers#include <devdctl/guid.h> 61300906Sasomers#include <devdctl/event.h> 62300906Sasomers#include <devdctl/event_factory.h> 63300906Sasomers#include <devdctl/exception.h> 64300906Sasomers#include <devdctl/consumer.h> 65300906Sasomers 66300906Sasomers#include "callout.h" 67300906Sasomers#include "vdev_iterator.h" 68300906Sasomers#include "zfsd_event.h" 69300906Sasomers#include "case_file.h" 70300906Sasomers#include "vdev.h" 71300906Sasomers#include "zfsd.h" 72300906Sasomers#include "zfsd_exception.h" 73300906Sasomers#include "zpool_list.h" 74300906Sasomers 75300906Sasomers__FBSDID("$FreeBSD: stable/11/cddl/usr.sbin/zfsd/case_file.cc 329792 2018-02-22 02:16:44Z asomers $"); 76300906Sasomers 77300906Sasomers/*============================ Namespace Control =============================*/ 78300906Sasomersusing std::auto_ptr; 79300906Sasomersusing std::hex; 80300906Sasomersusing std::ifstream; 81300906Sasomersusing std::stringstream; 82300906Sasomersusing std::setfill; 83300906Sasomersusing std::setw; 84300906Sasomers 85300906Sasomersusing DevdCtl::Event; 86300906Sasomersusing DevdCtl::EventFactory; 87300906Sasomersusing DevdCtl::EventList; 88300906Sasomersusing DevdCtl::Guid; 89300906Sasomersusing DevdCtl::ParseException; 90300906Sasomers 91300906Sasomers/*--------------------------------- CaseFile ---------------------------------*/ 92300906Sasomers//- CaseFile Static Data ------------------------------------------------------- 93300906Sasomers 94300906SasomersCaseFileList CaseFile::s_activeCases; 95300906Sasomersconst string CaseFile::s_caseFilePath = "/var/db/zfsd/cases"; 96300906Sasomersconst timeval CaseFile::s_removeGracePeriod = { 60 /*sec*/, 0 /*usec*/}; 97300906Sasomers 98300906Sasomers//- CaseFile Static Public Methods --------------------------------------------- 99300906SasomersCaseFile * 100300906SasomersCaseFile::Find(Guid poolGUID, Guid vdevGUID) 101300906Sasomers{ 102300906Sasomers for (CaseFileList::iterator curCase = s_activeCases.begin(); 103300906Sasomers curCase != s_activeCases.end(); curCase++) { 104300906Sasomers 105326321Sasomers if (((*curCase)->PoolGUID() != poolGUID 106326321Sasomers && Guid::InvalidGuid() != poolGUID) 107300906Sasomers || (*curCase)->VdevGUID() != vdevGUID) 108300906Sasomers continue; 109300906Sasomers 110300906Sasomers /* 111300906Sasomers * We only carry one active case per-vdev. 112300906Sasomers */ 113300906Sasomers return (*curCase); 114300906Sasomers } 115300906Sasomers return (NULL); 116300906Sasomers} 117300906Sasomers 118300906SasomersCaseFile * 119300906SasomersCaseFile::Find(const string &physPath) 120300906Sasomers{ 121300906Sasomers CaseFile *result = NULL; 122300906Sasomers 123300906Sasomers for (CaseFileList::iterator curCase = s_activeCases.begin(); 124300906Sasomers curCase != s_activeCases.end(); curCase++) { 125300906Sasomers 126300906Sasomers if ((*curCase)->PhysicalPath() != physPath) 127300906Sasomers continue; 128300906Sasomers 129300906Sasomers if (result != NULL) { 130300906Sasomers syslog(LOG_WARNING, "Multiple casefiles found for " 131300906Sasomers "physical path %s. " 132300906Sasomers "This is most likely a bug in zfsd", 133300906Sasomers physPath.c_str()); 134300906Sasomers } 135300906Sasomers result = *curCase; 136300906Sasomers } 137300906Sasomers return (result); 138300906Sasomers} 139300906Sasomers 140300906Sasomers 141300906Sasomersvoid 142300906SasomersCaseFile::ReEvaluateByGuid(Guid poolGUID, const ZfsEvent &event) 143300906Sasomers{ 144300906Sasomers CaseFileList::iterator casefile; 145300906Sasomers for (casefile = s_activeCases.begin(); casefile != s_activeCases.end();){ 146300906Sasomers CaseFileList::iterator next = casefile; 147300906Sasomers next++; 148300906Sasomers if (poolGUID == (*casefile)->PoolGUID()) 149300906Sasomers (*casefile)->ReEvaluate(event); 150300906Sasomers casefile = next; 151300906Sasomers } 152300906Sasomers} 153300906Sasomers 154300906SasomersCaseFile & 155300906SasomersCaseFile::Create(Vdev &vdev) 156300906Sasomers{ 157300906Sasomers CaseFile *activeCase; 158300906Sasomers 159300906Sasomers activeCase = Find(vdev.PoolGUID(), vdev.GUID()); 160300906Sasomers if (activeCase == NULL) 161300906Sasomers activeCase = new CaseFile(vdev); 162300906Sasomers 163300906Sasomers return (*activeCase); 164300906Sasomers} 165300906Sasomers 166300906Sasomersvoid 167300906SasomersCaseFile::DeSerialize() 168300906Sasomers{ 169300906Sasomers struct dirent **caseFiles; 170300906Sasomers 171300906Sasomers int numCaseFiles(scandir(s_caseFilePath.c_str(), &caseFiles, 172300906Sasomers DeSerializeSelector, /*compar*/NULL)); 173300906Sasomers 174300906Sasomers if (numCaseFiles == -1) 175300906Sasomers return; 176300906Sasomers if (numCaseFiles == 0) { 177300906Sasomers free(caseFiles); 178300906Sasomers return; 179300906Sasomers } 180300906Sasomers 181300906Sasomers for (int i = 0; i < numCaseFiles; i++) { 182300906Sasomers 183300906Sasomers DeSerializeFile(caseFiles[i]->d_name); 184300906Sasomers free(caseFiles[i]); 185300906Sasomers } 186300906Sasomers free(caseFiles); 187300906Sasomers} 188300906Sasomers 189300906Sasomersvoid 190300906SasomersCaseFile::LogAll() 191300906Sasomers{ 192300906Sasomers for (CaseFileList::iterator curCase = s_activeCases.begin(); 193300906Sasomers curCase != s_activeCases.end(); curCase++) 194300906Sasomers (*curCase)->Log(); 195300906Sasomers} 196300906Sasomers 197300906Sasomersvoid 198300906SasomersCaseFile::PurgeAll() 199300906Sasomers{ 200300906Sasomers /* 201300906Sasomers * Serialize casefiles before deleting them so that they can be reread 202300906Sasomers * and revalidated during BuildCaseFiles. 203300906Sasomers * CaseFiles remove themselves from this list on destruction. 204300906Sasomers */ 205300906Sasomers while (s_activeCases.size() != 0) { 206300906Sasomers CaseFile *casefile = s_activeCases.front(); 207300906Sasomers casefile->Serialize(); 208300906Sasomers delete casefile; 209300906Sasomers } 210300906Sasomers 211300906Sasomers} 212300906Sasomers 213300906Sasomers//- CaseFile Public Methods ---------------------------------------------------- 214300906Sasomersbool 215300906SasomersCaseFile::RefreshVdevState() 216300906Sasomers{ 217300906Sasomers ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); 218300906Sasomers zpool_handle_t *casePool(zpl.empty() ? NULL : zpl.front()); 219300906Sasomers if (casePool == NULL) 220300906Sasomers return (false); 221300906Sasomers 222300906Sasomers Vdev vd(casePool, CaseVdev(casePool)); 223300906Sasomers if (vd.DoesNotExist()) 224300906Sasomers return (false); 225300906Sasomers 226300906Sasomers m_vdevState = vd.State(); 227300906Sasomers m_vdevPhysPath = vd.PhysicalPath(); 228300906Sasomers return (true); 229300906Sasomers} 230300906Sasomers 231300906Sasomersbool 232300906SasomersCaseFile::ReEvaluate(const string &devPath, const string &physPath, Vdev *vdev) 233300906Sasomers{ 234300906Sasomers ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); 235300906Sasomers zpool_handle_t *pool(zpl.empty() ? NULL : zpl.front()); 236300906Sasomers 237300906Sasomers if (pool == NULL || !RefreshVdevState()) { 238300906Sasomers /* 239300906Sasomers * The pool or vdev for this case file is no longer 240300906Sasomers * part of the configuration. This can happen 241300906Sasomers * if we process a device arrival notification 242300906Sasomers * before seeing the ZFS configuration change 243300906Sasomers * event. 244300906Sasomers */ 245300906Sasomers syslog(LOG_INFO, 246300906Sasomers "CaseFile::ReEvaluate(%s,%s) Pool/Vdev unconfigured. " 247300906Sasomers "Closing\n", 248300906Sasomers PoolGUIDString().c_str(), 249300906Sasomers VdevGUIDString().c_str()); 250300906Sasomers Close(); 251300906Sasomers 252300906Sasomers /* 253300906Sasomers * Since this event was not used to close this 254300906Sasomers * case, do not report it as consumed. 255300906Sasomers */ 256300906Sasomers return (/*consumed*/false); 257300906Sasomers } 258300906Sasomers 259300906Sasomers if (VdevState() > VDEV_STATE_CANT_OPEN) { 260300906Sasomers /* 261300906Sasomers * For now, newly discovered devices only help for 262300906Sasomers * devices that are missing. In the future, we might 263300906Sasomers * use a newly inserted spare to replace a degraded 264300906Sasomers * or faulted device. 265300906Sasomers */ 266300906Sasomers syslog(LOG_INFO, "CaseFile::ReEvaluate(%s,%s): Pool/Vdev ignored", 267300906Sasomers PoolGUIDString().c_str(), VdevGUIDString().c_str()); 268300906Sasomers return (/*consumed*/false); 269300906Sasomers } 270300906Sasomers 271300906Sasomers if (vdev != NULL 272326321Sasomers && ( vdev->PoolGUID() == m_poolGUID 273326321Sasomers || vdev->PoolGUID() == Guid::InvalidGuid()) 274300906Sasomers && vdev->GUID() == m_vdevGUID) { 275300906Sasomers 276300906Sasomers zpool_vdev_online(pool, vdev->GUIDString().c_str(), 277300906Sasomers ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE, 278300906Sasomers &m_vdevState); 279300906Sasomers syslog(LOG_INFO, "Onlined vdev(%s/%s:%s). State now %s.\n", 280300906Sasomers zpool_get_name(pool), vdev->GUIDString().c_str(), 281300906Sasomers devPath.c_str(), 282300906Sasomers zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); 283300906Sasomers 284300906Sasomers /* 285300906Sasomers * Check the vdev state post the online action to see 286300906Sasomers * if we can retire this case. 287300906Sasomers */ 288300906Sasomers CloseIfSolved(); 289300906Sasomers 290300906Sasomers return (/*consumed*/true); 291300906Sasomers } 292300906Sasomers 293300906Sasomers /* 294300906Sasomers * If the auto-replace policy is enabled, and we have physical 295300906Sasomers * path information, try a physical path replacement. 296300906Sasomers */ 297300906Sasomers if (zpool_get_prop_int(pool, ZPOOL_PROP_AUTOREPLACE, NULL) == 0) { 298300906Sasomers syslog(LOG_INFO, 299300906Sasomers "CaseFile(%s:%s:%s): AutoReplace not set. " 300300906Sasomers "Ignoring device insertion.\n", 301300906Sasomers PoolGUIDString().c_str(), 302300906Sasomers VdevGUIDString().c_str(), 303300906Sasomers zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); 304300906Sasomers return (/*consumed*/false); 305300906Sasomers } 306300906Sasomers 307300906Sasomers if (PhysicalPath().empty()) { 308300906Sasomers syslog(LOG_INFO, 309300906Sasomers "CaseFile(%s:%s:%s): No physical path information. " 310300906Sasomers "Ignoring device insertion.\n", 311300906Sasomers PoolGUIDString().c_str(), 312300906Sasomers VdevGUIDString().c_str(), 313300906Sasomers zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); 314300906Sasomers return (/*consumed*/false); 315300906Sasomers } 316300906Sasomers 317300906Sasomers if (physPath != PhysicalPath()) { 318300906Sasomers syslog(LOG_INFO, 319300906Sasomers "CaseFile(%s:%s:%s): Physical path mismatch. " 320300906Sasomers "Ignoring device insertion.\n", 321300906Sasomers PoolGUIDString().c_str(), 322300906Sasomers VdevGUIDString().c_str(), 323300906Sasomers zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); 324300906Sasomers return (/*consumed*/false); 325300906Sasomers } 326300906Sasomers 327300906Sasomers /* Write a label on the newly inserted disk. */ 328300906Sasomers if (zpool_label_disk(g_zfsHandle, pool, devPath.c_str()) != 0) { 329300906Sasomers syslog(LOG_ERR, 330300906Sasomers "Replace vdev(%s/%s) by physical path (label): %s: %s\n", 331300906Sasomers zpool_get_name(pool), VdevGUIDString().c_str(), 332300906Sasomers libzfs_error_action(g_zfsHandle), 333300906Sasomers libzfs_error_description(g_zfsHandle)); 334300906Sasomers return (/*consumed*/false); 335300906Sasomers } 336300906Sasomers 337300906Sasomers syslog(LOG_INFO, "CaseFile::ReEvaluate(%s/%s): Replacing with %s", 338300906Sasomers PoolGUIDString().c_str(), VdevGUIDString().c_str(), 339300906Sasomers devPath.c_str()); 340300906Sasomers return (Replace(VDEV_TYPE_DISK, devPath.c_str(), /*isspare*/false)); 341300906Sasomers} 342300906Sasomers 343300906Sasomersbool 344300906SasomersCaseFile::ReEvaluate(const ZfsEvent &event) 345300906Sasomers{ 346300906Sasomers bool consumed(false); 347300906Sasomers 348300906Sasomers if (event.Value("type") == "misc.fs.zfs.vdev_remove") { 349300906Sasomers /* 350300906Sasomers * The Vdev we represent has been removed from the 351300906Sasomers * configuration. This case is no longer of value. 352300906Sasomers */ 353300906Sasomers Close(); 354300906Sasomers 355300906Sasomers return (/*consumed*/true); 356300906Sasomers } else if (event.Value("type") == "misc.fs.zfs.pool_destroy") { 357300906Sasomers /* This Pool has been destroyed. Discard the case */ 358300906Sasomers Close(); 359300906Sasomers 360300906Sasomers return (/*consumed*/true); 361300906Sasomers } else if (event.Value("type") == "misc.fs.zfs.config_sync") { 362300906Sasomers RefreshVdevState(); 363300906Sasomers if (VdevState() < VDEV_STATE_HEALTHY) 364300906Sasomers consumed = ActivateSpare(); 365300906Sasomers } 366300906Sasomers 367300906Sasomers 368300906Sasomers if (event.Value("class") == "resource.fs.zfs.removed") { 369300906Sasomers bool spare_activated; 370300906Sasomers 371300906Sasomers if (!RefreshVdevState()) { 372300906Sasomers /* 373300906Sasomers * The pool or vdev for this case file is no longer 374300906Sasomers * part of the configuration. This can happen 375300906Sasomers * if we process a device arrival notification 376300906Sasomers * before seeing the ZFS configuration change 377300906Sasomers * event. 378300906Sasomers */ 379300906Sasomers syslog(LOG_INFO, 380300906Sasomers "CaseFile::ReEvaluate(%s,%s) Pool/Vdev " 381300906Sasomers "unconfigured. Closing\n", 382300906Sasomers PoolGUIDString().c_str(), 383300906Sasomers VdevGUIDString().c_str()); 384300906Sasomers /* 385300906Sasomers * Close the case now so we won't waste cycles in the 386300906Sasomers * system rescan 387300906Sasomers */ 388300906Sasomers Close(); 389300906Sasomers 390300906Sasomers /* 391300906Sasomers * Since this event was not used to close this 392300906Sasomers * case, do not report it as consumed. 393300906Sasomers */ 394300906Sasomers return (/*consumed*/false); 395300906Sasomers } 396300906Sasomers 397300906Sasomers /* 398300906Sasomers * Discard any tentative I/O error events for 399300906Sasomers * this case. They were most likely caused by the 400300906Sasomers * hot-unplug of this device. 401300906Sasomers */ 402300906Sasomers PurgeTentativeEvents(); 403300906Sasomers 404300906Sasomers /* Try to activate spares if they are available */ 405300906Sasomers spare_activated = ActivateSpare(); 406300906Sasomers 407300906Sasomers /* 408300906Sasomers * Rescan the drives in the system to see if a recent 409300906Sasomers * drive arrival can be used to solve this case. 410300906Sasomers */ 411300906Sasomers ZfsDaemon::RequestSystemRescan(); 412300906Sasomers 413300906Sasomers /* 414300906Sasomers * Consume the event if we successfully activated a spare. 415300906Sasomers * Otherwise, leave it in the unconsumed events list so that the 416300906Sasomers * future addition of a spare to this pool might be able to 417300906Sasomers * close the case 418300906Sasomers */ 419300906Sasomers consumed = spare_activated; 420300906Sasomers } else if (event.Value("class") == "resource.fs.zfs.statechange") { 421300906Sasomers RefreshVdevState(); 422300906Sasomers /* 423300906Sasomers * If this vdev is DEGRADED, FAULTED, or UNAVAIL, try to 424300906Sasomers * activate a hotspare. Otherwise, ignore the event 425300906Sasomers */ 426300906Sasomers if (VdevState() == VDEV_STATE_FAULTED || 427300906Sasomers VdevState() == VDEV_STATE_DEGRADED || 428300906Sasomers VdevState() == VDEV_STATE_CANT_OPEN) 429300906Sasomers (void) ActivateSpare(); 430300906Sasomers consumed = true; 431300906Sasomers } 432300906Sasomers else if (event.Value("class") == "ereport.fs.zfs.io" || 433300906Sasomers event.Value("class") == "ereport.fs.zfs.checksum") { 434300906Sasomers 435300906Sasomers m_tentativeEvents.push_front(event.DeepCopy()); 436300906Sasomers RegisterCallout(event); 437300906Sasomers consumed = true; 438300906Sasomers } 439300906Sasomers 440300906Sasomers bool closed(CloseIfSolved()); 441300906Sasomers 442300906Sasomers return (consumed || closed); 443300906Sasomers} 444300906Sasomers 445329792Sasomers/* Find a Vdev containing the vdev with the given GUID */ 446329792Sasomersstatic nvlist_t* 447329792Sasomersfind_parent(nvlist_t *pool_config, nvlist_t *config, DevdCtl::Guid child_guid) 448329792Sasomers{ 449329792Sasomers nvlist_t **vdevChildren; 450329792Sasomers int error; 451329792Sasomers unsigned ch, numChildren; 452300906Sasomers 453329792Sasomers error = nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_CHILDREN, 454329792Sasomers &vdevChildren, &numChildren); 455329792Sasomers 456329792Sasomers if (error != 0 || numChildren == 0) 457329792Sasomers return (NULL); 458329792Sasomers 459329792Sasomers for (ch = 0; ch < numChildren; ch++) { 460329792Sasomers nvlist *result; 461329792Sasomers Vdev vdev(pool_config, vdevChildren[ch]); 462329792Sasomers 463329792Sasomers if (vdev.GUID() == child_guid) 464329792Sasomers return (config); 465329792Sasomers 466329792Sasomers result = find_parent(pool_config, vdevChildren[ch], child_guid); 467329792Sasomers if (result != NULL) 468329792Sasomers return (result); 469329792Sasomers } 470329792Sasomers 471329792Sasomers return (NULL); 472329792Sasomers} 473329792Sasomers 474300906Sasomersbool 475300906SasomersCaseFile::ActivateSpare() { 476329792Sasomers nvlist_t *config, *nvroot, *parent_config; 477300906Sasomers nvlist_t **spares; 478300906Sasomers char *devPath, *vdev_type; 479300906Sasomers const char *poolname; 480300906Sasomers u_int nspares, i; 481300906Sasomers int error; 482300906Sasomers 483300906Sasomers ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); 484300906Sasomers zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front()); 485300906Sasomers if (zhp == NULL) { 486300906Sasomers syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool " 487300919Sbdrewery "for pool_guid %" PRIu64".", (uint64_t)m_poolGUID); 488300906Sasomers return (false); 489300906Sasomers } 490300906Sasomers poolname = zpool_get_name(zhp); 491300906Sasomers config = zpool_get_config(zhp, NULL); 492300906Sasomers if (config == NULL) { 493300906Sasomers syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool " 494300906Sasomers "config for pool %s", poolname); 495300906Sasomers return (false); 496300906Sasomers } 497300906Sasomers error = nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot); 498300906Sasomers if (error != 0){ 499300906Sasomers syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find vdev " 500300906Sasomers "tree for pool %s", poolname); 501300906Sasomers return (false); 502300906Sasomers } 503329792Sasomers 504329792Sasomers parent_config = find_parent(config, nvroot, m_vdevGUID); 505329792Sasomers if (parent_config != NULL) { 506329792Sasomers char *parent_type; 507329792Sasomers 508329792Sasomers /* 509329792Sasomers * Don't activate spares for members of a "replacing" vdev. 510329792Sasomers * They're already dealt with. Sparing them will just drag out 511329792Sasomers * the resilver process. 512329792Sasomers */ 513329792Sasomers error = nvlist_lookup_string(parent_config, 514329792Sasomers ZPOOL_CONFIG_TYPE, &parent_type); 515329792Sasomers if (error == 0 && strcmp(parent_type, VDEV_TYPE_REPLACING) == 0) 516329792Sasomers return (false); 517329792Sasomers } 518329792Sasomers 519300906Sasomers nspares = 0; 520300906Sasomers nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares, 521300906Sasomers &nspares); 522300906Sasomers if (nspares == 0) { 523300906Sasomers /* The pool has no spares configured */ 524300906Sasomers syslog(LOG_INFO, "CaseFile::ActivateSpare: " 525300906Sasomers "No spares available for pool %s", poolname); 526300906Sasomers return (false); 527300906Sasomers } 528300906Sasomers for (i = 0; i < nspares; i++) { 529300906Sasomers uint64_t *nvlist_array; 530300906Sasomers vdev_stat_t *vs; 531300906Sasomers uint_t nstats; 532300906Sasomers 533300906Sasomers if (nvlist_lookup_uint64_array(spares[i], 534300906Sasomers ZPOOL_CONFIG_VDEV_STATS, &nvlist_array, &nstats) != 0) { 535300906Sasomers syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not " 536300906Sasomers "find vdev stats for pool %s, spare %d", 537300906Sasomers poolname, i); 538300906Sasomers return (false); 539300906Sasomers } 540300906Sasomers vs = reinterpret_cast<vdev_stat_t *>(nvlist_array); 541300906Sasomers 542300906Sasomers if ((vs->vs_aux != VDEV_AUX_SPARED) 543300906Sasomers && (vs->vs_state == VDEV_STATE_HEALTHY)) { 544300906Sasomers /* We found a usable spare */ 545300906Sasomers break; 546300906Sasomers } 547300906Sasomers } 548300906Sasomers 549300906Sasomers if (i == nspares) { 550300906Sasomers /* No available spares were found */ 551300906Sasomers return (false); 552300906Sasomers } 553300906Sasomers 554300906Sasomers error = nvlist_lookup_string(spares[i], ZPOOL_CONFIG_PATH, &devPath); 555300906Sasomers if (error != 0) { 556300906Sasomers syslog(LOG_ERR, "CaseFile::ActivateSpare: Cannot determine " 557300906Sasomers "the path of pool %s, spare %d. Error %d", 558300906Sasomers poolname, i, error); 559300906Sasomers return (false); 560300906Sasomers } 561300906Sasomers 562300906Sasomers error = nvlist_lookup_string(spares[i], ZPOOL_CONFIG_TYPE, &vdev_type); 563300906Sasomers if (error != 0) { 564300906Sasomers syslog(LOG_ERR, "CaseFile::ActivateSpare: Cannot determine " 565300906Sasomers "the vdev type of pool %s, spare %d. Error %d", 566300906Sasomers poolname, i, error); 567300906Sasomers return (false); 568300906Sasomers } 569300906Sasomers 570300906Sasomers return (Replace(vdev_type, devPath, /*isspare*/true)); 571300906Sasomers} 572300906Sasomers 573300906Sasomersvoid 574300906SasomersCaseFile::RegisterCallout(const Event &event) 575300906Sasomers{ 576300906Sasomers timeval now, countdown, elapsed, timestamp, zero, remaining; 577300906Sasomers 578300906Sasomers gettimeofday(&now, 0); 579300906Sasomers timestamp = event.GetTimestamp(); 580300906Sasomers timersub(&now, ×tamp, &elapsed); 581300906Sasomers timersub(&s_removeGracePeriod, &elapsed, &countdown); 582300906Sasomers /* 583300906Sasomers * If countdown is <= zero, Reset the timer to the 584300906Sasomers * smallest positive time value instead 585300906Sasomers */ 586300906Sasomers timerclear(&zero); 587300906Sasomers if (timercmp(&countdown, &zero, <=)) { 588300906Sasomers timerclear(&countdown); 589300906Sasomers countdown.tv_usec = 1; 590300906Sasomers } 591300906Sasomers 592300906Sasomers remaining = m_tentativeTimer.TimeRemaining(); 593300906Sasomers 594300906Sasomers if (!m_tentativeTimer.IsPending() 595300906Sasomers || timercmp(&countdown, &remaining, <)) 596300906Sasomers m_tentativeTimer.Reset(countdown, OnGracePeriodEnded, this); 597300906Sasomers} 598300906Sasomers 599300906Sasomers 600300906Sasomersbool 601300906SasomersCaseFile::CloseIfSolved() 602300906Sasomers{ 603300906Sasomers if (m_events.empty() 604300906Sasomers && m_tentativeEvents.empty()) { 605300906Sasomers 606300906Sasomers /* 607300906Sasomers * We currently do not track or take actions on 608300906Sasomers * devices in the degraded or faulted state. 609300906Sasomers * Once we have support for spare pools, we'll 610300906Sasomers * retain these cases so that any spares added in 611300906Sasomers * the future can be applied to them. 612300906Sasomers */ 613300906Sasomers switch (VdevState()) { 614300906Sasomers case VDEV_STATE_HEALTHY: 615300906Sasomers /* No need to keep cases for healthy vdevs */ 616300906Sasomers Close(); 617300906Sasomers return (true); 618300906Sasomers case VDEV_STATE_REMOVED: 619300906Sasomers case VDEV_STATE_CANT_OPEN: 620300906Sasomers /* 621300906Sasomers * Keep open. We may solve it with a newly inserted 622300906Sasomers * device. 623300906Sasomers */ 624300906Sasomers case VDEV_STATE_FAULTED: 625300906Sasomers case VDEV_STATE_DEGRADED: 626300906Sasomers /* 627300906Sasomers * Keep open. We may solve it with the future 628300906Sasomers * addition of a spare to the pool 629300906Sasomers */ 630300906Sasomers case VDEV_STATE_UNKNOWN: 631300906Sasomers case VDEV_STATE_CLOSED: 632300906Sasomers case VDEV_STATE_OFFLINE: 633300906Sasomers /* 634300906Sasomers * Keep open? This may not be the correct behavior, 635300906Sasomers * but it's what we've always done 636300906Sasomers */ 637300906Sasomers ; 638300906Sasomers } 639300906Sasomers 640300906Sasomers /* 641300906Sasomers * Re-serialize the case in order to remove any 642300906Sasomers * previous event data. 643300906Sasomers */ 644300906Sasomers Serialize(); 645300906Sasomers } 646300906Sasomers 647300906Sasomers return (false); 648300906Sasomers} 649300906Sasomers 650300906Sasomersvoid 651300906SasomersCaseFile::Log() 652300906Sasomers{ 653300906Sasomers syslog(LOG_INFO, "CaseFile(%s,%s,%s)\n", PoolGUIDString().c_str(), 654300906Sasomers VdevGUIDString().c_str(), PhysicalPath().c_str()); 655300906Sasomers syslog(LOG_INFO, "\tVdev State = %s\n", 656300906Sasomers zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); 657300906Sasomers if (m_tentativeEvents.size() != 0) { 658300906Sasomers syslog(LOG_INFO, "\t=== Tentative Events ===\n"); 659300906Sasomers for (EventList::iterator event(m_tentativeEvents.begin()); 660300906Sasomers event != m_tentativeEvents.end(); event++) 661300906Sasomers (*event)->Log(LOG_INFO); 662300906Sasomers } 663300906Sasomers if (m_events.size() != 0) { 664300906Sasomers syslog(LOG_INFO, "\t=== Events ===\n"); 665300906Sasomers for (EventList::iterator event(m_events.begin()); 666300906Sasomers event != m_events.end(); event++) 667300906Sasomers (*event)->Log(LOG_INFO); 668300906Sasomers } 669300906Sasomers} 670300906Sasomers 671300906Sasomers//- CaseFile Static Protected Methods ------------------------------------------ 672300906Sasomersvoid 673300906SasomersCaseFile::OnGracePeriodEnded(void *arg) 674300906Sasomers{ 675300906Sasomers CaseFile &casefile(*static_cast<CaseFile *>(arg)); 676300906Sasomers 677300906Sasomers casefile.OnGracePeriodEnded(); 678300906Sasomers} 679300906Sasomers 680300906Sasomersint 681300906SasomersCaseFile::DeSerializeSelector(const struct dirent *dirEntry) 682300906Sasomers{ 683300906Sasomers uint64_t poolGUID; 684300906Sasomers uint64_t vdevGUID; 685300906Sasomers 686300906Sasomers if (dirEntry->d_type == DT_REG 687300919Sbdrewery && sscanf(dirEntry->d_name, "pool_%" PRIu64 "_vdev_%" PRIu64 ".case", 688300906Sasomers &poolGUID, &vdevGUID) == 2) 689300906Sasomers return (1); 690300906Sasomers return (0); 691300906Sasomers} 692300906Sasomers 693300906Sasomersvoid 694300906SasomersCaseFile::DeSerializeFile(const char *fileName) 695300906Sasomers{ 696300906Sasomers string fullName(s_caseFilePath + '/' + fileName); 697300906Sasomers CaseFile *existingCaseFile(NULL); 698300906Sasomers CaseFile *caseFile(NULL); 699300906Sasomers 700300906Sasomers try { 701300906Sasomers uint64_t poolGUID; 702300906Sasomers uint64_t vdevGUID; 703300906Sasomers nvlist_t *vdevConf; 704300906Sasomers 705314431Sasomers if (sscanf(fileName, "pool_%" PRIu64 "_vdev_%" PRIu64 ".case", 706314431Sasomers &poolGUID, &vdevGUID) != 2) { 707314431Sasomers throw ZfsdException("CaseFile::DeSerialize: " 708314431Sasomers "Unintelligible CaseFile filename %s.\n", fileName); 709314431Sasomers } 710300906Sasomers existingCaseFile = Find(Guid(poolGUID), Guid(vdevGUID)); 711300906Sasomers if (existingCaseFile != NULL) { 712300906Sasomers /* 713300906Sasomers * If the vdev is already degraded or faulted, 714300906Sasomers * there's no point in keeping the state around 715300906Sasomers * that we use to put a drive into the degraded 716300906Sasomers * state. However, if the vdev is simply missing, 717300906Sasomers * preserve the case data in the hopes that it will 718300906Sasomers * return. 719300906Sasomers */ 720300906Sasomers caseFile = existingCaseFile; 721300906Sasomers vdev_state curState(caseFile->VdevState()); 722300906Sasomers if (curState > VDEV_STATE_CANT_OPEN 723300906Sasomers && curState < VDEV_STATE_HEALTHY) { 724300906Sasomers unlink(fileName); 725300906Sasomers return; 726300906Sasomers } 727300906Sasomers } else { 728300906Sasomers ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID); 729300906Sasomers if (zpl.empty() 730300906Sasomers || (vdevConf = VdevIterator(zpl.front()) 731300906Sasomers .Find(vdevGUID)) == NULL) { 732300906Sasomers /* 733300906Sasomers * Either the pool no longer exists 734300906Sasomers * or this vdev is no longer a member of 735300906Sasomers * the pool. 736300906Sasomers */ 737300906Sasomers unlink(fullName.c_str()); 738300906Sasomers return; 739300906Sasomers } 740300906Sasomers 741300906Sasomers /* 742300906Sasomers * Any vdev we find that does not have a case file 743300906Sasomers * must be in the healthy state and thus worthy of 744300906Sasomers * continued SERD data tracking. 745300906Sasomers */ 746300906Sasomers caseFile = new CaseFile(Vdev(zpl.front(), vdevConf)); 747300906Sasomers } 748300906Sasomers 749300906Sasomers ifstream caseStream(fullName.c_str()); 750300906Sasomers if (!caseStream) 751300906Sasomers throw ZfsdException("CaseFile::DeSerialize: Unable to " 752300906Sasomers "read %s.\n", fileName); 753300906Sasomers 754300906Sasomers caseFile->DeSerialize(caseStream); 755300906Sasomers } catch (const ParseException &exp) { 756300906Sasomers 757300906Sasomers exp.Log(); 758300906Sasomers if (caseFile != existingCaseFile) 759300906Sasomers delete caseFile; 760300906Sasomers 761300906Sasomers /* 762300906Sasomers * Since we can't parse the file, unlink it so we don't 763300906Sasomers * trip over it again. 764300906Sasomers */ 765300906Sasomers unlink(fileName); 766300906Sasomers } catch (const ZfsdException &zfsException) { 767300906Sasomers 768300906Sasomers zfsException.Log(); 769300906Sasomers if (caseFile != existingCaseFile) 770300906Sasomers delete caseFile; 771300906Sasomers } 772300906Sasomers} 773300906Sasomers 774300906Sasomers//- CaseFile Protected Methods ------------------------------------------------- 775300906SasomersCaseFile::CaseFile(const Vdev &vdev) 776300906Sasomers : m_poolGUID(vdev.PoolGUID()), 777300906Sasomers m_vdevGUID(vdev.GUID()), 778300906Sasomers m_vdevState(vdev.State()), 779300906Sasomers m_vdevPhysPath(vdev.PhysicalPath()) 780300906Sasomers{ 781300906Sasomers stringstream guidString; 782300906Sasomers 783300906Sasomers guidString << m_vdevGUID; 784300906Sasomers m_vdevGUIDString = guidString.str(); 785300906Sasomers guidString.str(""); 786300906Sasomers guidString << m_poolGUID; 787300906Sasomers m_poolGUIDString = guidString.str(); 788300906Sasomers 789300906Sasomers s_activeCases.push_back(this); 790300906Sasomers 791300906Sasomers syslog(LOG_INFO, "Creating new CaseFile:\n"); 792300906Sasomers Log(); 793300906Sasomers} 794300906Sasomers 795300906SasomersCaseFile::~CaseFile() 796300906Sasomers{ 797300906Sasomers PurgeEvents(); 798300906Sasomers PurgeTentativeEvents(); 799300906Sasomers m_tentativeTimer.Stop(); 800300906Sasomers s_activeCases.remove(this); 801300906Sasomers} 802300906Sasomers 803300906Sasomersvoid 804300906SasomersCaseFile::PurgeEvents() 805300906Sasomers{ 806300906Sasomers for (EventList::iterator event(m_events.begin()); 807300906Sasomers event != m_events.end(); event++) 808300906Sasomers delete *event; 809300906Sasomers 810300906Sasomers m_events.clear(); 811300906Sasomers} 812300906Sasomers 813300906Sasomersvoid 814300906SasomersCaseFile::PurgeTentativeEvents() 815300906Sasomers{ 816300906Sasomers for (EventList::iterator event(m_tentativeEvents.begin()); 817300906Sasomers event != m_tentativeEvents.end(); event++) 818300906Sasomers delete *event; 819300906Sasomers 820300906Sasomers m_tentativeEvents.clear(); 821300906Sasomers} 822300906Sasomers 823300906Sasomersvoid 824300906SasomersCaseFile::SerializeEvList(const EventList events, int fd, 825300906Sasomers const char* prefix) const 826300906Sasomers{ 827300906Sasomers if (events.empty()) 828300906Sasomers return; 829300906Sasomers for (EventList::const_iterator curEvent = events.begin(); 830300906Sasomers curEvent != events.end(); curEvent++) { 831300906Sasomers const string &eventString((*curEvent)->GetEventString()); 832300906Sasomers 833300906Sasomers // TODO: replace many write(2) calls with a single writev(2) 834300906Sasomers if (prefix) 835300906Sasomers write(fd, prefix, strlen(prefix)); 836300906Sasomers write(fd, eventString.c_str(), eventString.length()); 837300906Sasomers } 838300906Sasomers} 839300906Sasomers 840300906Sasomersvoid 841300906SasomersCaseFile::Serialize() 842300906Sasomers{ 843300906Sasomers stringstream saveFile; 844300906Sasomers 845300906Sasomers saveFile << setfill('0') 846300906Sasomers << s_caseFilePath << "/" 847300906Sasomers << "pool_" << PoolGUIDString() 848300906Sasomers << "_vdev_" << VdevGUIDString() 849300906Sasomers << ".case"; 850300906Sasomers 851300906Sasomers if (m_events.empty() && m_tentativeEvents.empty()) { 852300906Sasomers unlink(saveFile.str().c_str()); 853300906Sasomers return; 854300906Sasomers } 855300906Sasomers 856300906Sasomers int fd(open(saveFile.str().c_str(), O_CREAT|O_TRUNC|O_WRONLY, 0644)); 857300906Sasomers if (fd == -1) { 858300906Sasomers syslog(LOG_ERR, "CaseFile::Serialize: Unable to open %s.\n", 859300906Sasomers saveFile.str().c_str()); 860300906Sasomers return; 861300906Sasomers } 862300906Sasomers SerializeEvList(m_events, fd); 863300906Sasomers SerializeEvList(m_tentativeEvents, fd, "tentative "); 864300906Sasomers close(fd); 865300906Sasomers} 866300906Sasomers 867300906Sasomers/* 868300906Sasomers * XXX: This method assumes that events may not contain embedded newlines. If 869300906Sasomers * ever events can contain embedded newlines, then CaseFile must switch 870300906Sasomers * serialization formats 871300906Sasomers */ 872300906Sasomersvoid 873300906SasomersCaseFile::DeSerialize(ifstream &caseStream) 874300906Sasomers{ 875300906Sasomers string evString; 876300906Sasomers const EventFactory &factory(ZfsDaemon::Get().GetFactory()); 877300906Sasomers 878300906Sasomers caseStream >> std::noskipws >> std::ws; 879300906Sasomers while (caseStream.good()) { 880300906Sasomers /* 881300906Sasomers * Outline: 882300906Sasomers * read the beginning of a line and check it for 883300906Sasomers * "tentative". If found, discard "tentative". 884300906Sasomers * Create a new event 885300906Sasomers * continue 886300906Sasomers */ 887300906Sasomers EventList* destEvents; 888300906Sasomers const string tentFlag("tentative "); 889300906Sasomers string line; 890300906Sasomers std::stringbuf lineBuf; 891300906Sasomers 892300906Sasomers caseStream.get(lineBuf); 893300906Sasomers caseStream.ignore(); /*discard the newline character*/ 894300906Sasomers line = lineBuf.str(); 895300906Sasomers if (line.compare(0, tentFlag.size(), tentFlag) == 0) { 896300906Sasomers /* Discard "tentative" */ 897300906Sasomers line.erase(0, tentFlag.size()); 898300906Sasomers destEvents = &m_tentativeEvents; 899300906Sasomers } else { 900300906Sasomers destEvents = &m_events; 901300906Sasomers } 902300906Sasomers Event *event(Event::CreateEvent(factory, line)); 903300906Sasomers if (event != NULL) { 904300906Sasomers destEvents->push_back(event); 905300906Sasomers RegisterCallout(*event); 906300906Sasomers } 907300906Sasomers } 908300906Sasomers} 909300906Sasomers 910300906Sasomersvoid 911300906SasomersCaseFile::Close() 912300906Sasomers{ 913300906Sasomers /* 914300906Sasomers * This case is no longer relevant. Clean up our 915300906Sasomers * serialization file, and delete the case. 916300906Sasomers */ 917300906Sasomers syslog(LOG_INFO, "CaseFile(%s,%s) closed - State %s\n", 918300906Sasomers PoolGUIDString().c_str(), VdevGUIDString().c_str(), 919300906Sasomers zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); 920300906Sasomers 921300906Sasomers /* 922300906Sasomers * Serialization of a Case with no event data, clears the 923300906Sasomers * Serialization data for that event. 924300906Sasomers */ 925300906Sasomers PurgeEvents(); 926300906Sasomers Serialize(); 927300906Sasomers 928300906Sasomers delete this; 929300906Sasomers} 930300906Sasomers 931300906Sasomersvoid 932300906SasomersCaseFile::OnGracePeriodEnded() 933300906Sasomers{ 934300906Sasomers bool should_fault, should_degrade; 935300906Sasomers ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); 936300906Sasomers zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front()); 937300906Sasomers 938300906Sasomers m_events.splice(m_events.begin(), m_tentativeEvents); 939300906Sasomers should_fault = ShouldFault(); 940300906Sasomers should_degrade = ShouldDegrade(); 941300906Sasomers 942300906Sasomers if (should_fault || should_degrade) { 943300906Sasomers if (zhp == NULL 944300906Sasomers || (VdevIterator(zhp).Find(m_vdevGUID)) == NULL) { 945300906Sasomers /* 946300906Sasomers * Either the pool no longer exists 947300906Sasomers * or this vdev is no longer a member of 948300906Sasomers * the pool. 949300906Sasomers */ 950300906Sasomers Close(); 951300906Sasomers return; 952300906Sasomers } 953300906Sasomers 954300906Sasomers } 955300906Sasomers 956300906Sasomers /* A fault condition has priority over a degrade condition */ 957300906Sasomers if (ShouldFault()) { 958300906Sasomers /* Fault the vdev and close the case. */ 959300906Sasomers if (zpool_vdev_fault(zhp, (uint64_t)m_vdevGUID, 960300906Sasomers VDEV_AUX_ERR_EXCEEDED) == 0) { 961300906Sasomers syslog(LOG_INFO, "Faulting vdev(%s/%s)", 962300906Sasomers PoolGUIDString().c_str(), 963300906Sasomers VdevGUIDString().c_str()); 964300906Sasomers Close(); 965300906Sasomers return; 966300906Sasomers } 967300906Sasomers else { 968300906Sasomers syslog(LOG_ERR, "Fault vdev(%s/%s): %s: %s\n", 969300906Sasomers PoolGUIDString().c_str(), 970300906Sasomers VdevGUIDString().c_str(), 971300906Sasomers libzfs_error_action(g_zfsHandle), 972300906Sasomers libzfs_error_description(g_zfsHandle)); 973300906Sasomers } 974300906Sasomers } 975300906Sasomers else if (ShouldDegrade()) { 976300906Sasomers /* Degrade the vdev and close the case. */ 977300906Sasomers if (zpool_vdev_degrade(zhp, (uint64_t)m_vdevGUID, 978300906Sasomers VDEV_AUX_ERR_EXCEEDED) == 0) { 979300906Sasomers syslog(LOG_INFO, "Degrading vdev(%s/%s)", 980300906Sasomers PoolGUIDString().c_str(), 981300906Sasomers VdevGUIDString().c_str()); 982300906Sasomers Close(); 983300906Sasomers return; 984300906Sasomers } 985300906Sasomers else { 986300906Sasomers syslog(LOG_ERR, "Degrade vdev(%s/%s): %s: %s\n", 987300906Sasomers PoolGUIDString().c_str(), 988300906Sasomers VdevGUIDString().c_str(), 989300906Sasomers libzfs_error_action(g_zfsHandle), 990300906Sasomers libzfs_error_description(g_zfsHandle)); 991300906Sasomers } 992300906Sasomers } 993300906Sasomers Serialize(); 994300906Sasomers} 995300906Sasomers 996300906SasomersVdev 997300906SasomersCaseFile::BeingReplacedBy(zpool_handle_t *zhp) { 998300906Sasomers Vdev vd(zhp, CaseVdev(zhp)); 999300906Sasomers std::list<Vdev> children; 1000300906Sasomers std::list<Vdev>::iterator children_it; 1001300906Sasomers 1002300906Sasomers Vdev parent(vd.Parent()); 1003300906Sasomers Vdev replacing(NonexistentVdev); 1004300906Sasomers 1005300906Sasomers /* 1006300906Sasomers * To determine whether we are being replaced by another spare that 1007300906Sasomers * is still working, then make sure that it is currently spared and 1008300906Sasomers * that the spare is either resilvering or healthy. If any of these 1009300906Sasomers * conditions fail, then we are not being replaced by a spare. 1010300906Sasomers * 1011300906Sasomers * If the spare is healthy, then the case file should be closed very 1012300906Sasomers * soon after this check. 1013300906Sasomers */ 1014300906Sasomers if (parent.DoesNotExist() 1015300906Sasomers || parent.Name(zhp, /*verbose*/false) != "spare") 1016300906Sasomers return (NonexistentVdev); 1017300906Sasomers 1018300906Sasomers children = parent.Children(); 1019300906Sasomers children_it = children.begin(); 1020300906Sasomers for (;children_it != children.end(); children_it++) { 1021300906Sasomers Vdev child = *children_it; 1022300906Sasomers 1023300906Sasomers /* Skip our vdev. */ 1024300906Sasomers if (child.GUID() == VdevGUID()) 1025300906Sasomers continue; 1026300906Sasomers /* 1027300906Sasomers * Accept the first child that doesn't match our GUID, or 1028300906Sasomers * any resilvering/healthy device if one exists. 1029300906Sasomers */ 1030300906Sasomers if (replacing.DoesNotExist() || child.IsResilvering() 1031300906Sasomers || child.State() == VDEV_STATE_HEALTHY) 1032300906Sasomers replacing = child; 1033300906Sasomers } 1034300906Sasomers 1035300906Sasomers return (replacing); 1036300906Sasomers} 1037300906Sasomers 1038300906Sasomersbool 1039300906SasomersCaseFile::Replace(const char* vdev_type, const char* path, bool isspare) { 1040300906Sasomers nvlist_t *nvroot, *newvd; 1041300906Sasomers const char *poolname; 1042300906Sasomers string oldstr(VdevGUIDString()); 1043300906Sasomers bool retval = true; 1044300906Sasomers 1045300906Sasomers /* Figure out what pool we're working on */ 1046300906Sasomers ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); 1047300906Sasomers zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front()); 1048300906Sasomers if (zhp == NULL) { 1049300906Sasomers syslog(LOG_ERR, "CaseFile::Replace: could not find pool for " 1050300919Sbdrewery "pool_guid %" PRIu64 ".", (uint64_t)m_poolGUID); 1051300906Sasomers return (false); 1052300906Sasomers } 1053300906Sasomers poolname = zpool_get_name(zhp); 1054300906Sasomers Vdev vd(zhp, CaseVdev(zhp)); 1055300906Sasomers Vdev replaced(BeingReplacedBy(zhp)); 1056300906Sasomers 1057300906Sasomers if (isspare && !vd.IsSpare() && !replaced.DoesNotExist()) { 1058300906Sasomers /* If we are already being replaced by a working spare, pass. */ 1059300906Sasomers if (replaced.IsResilvering() 1060300906Sasomers || replaced.State() == VDEV_STATE_HEALTHY) { 1061300906Sasomers syslog(LOG_INFO, "CaseFile::Replace(%s->%s): already " 1062300906Sasomers "replaced", VdevGUIDString().c_str(), path); 1063300906Sasomers return (/*consumed*/false); 1064300906Sasomers } 1065300906Sasomers /* 1066300906Sasomers * If we have already been replaced by a spare, but that spare 1067300906Sasomers * is broken, we must spare the spare, not the original device. 1068300906Sasomers */ 1069300906Sasomers oldstr = replaced.GUIDString(); 1070300906Sasomers syslog(LOG_INFO, "CaseFile::Replace(%s->%s): sparing " 1071300906Sasomers "broken spare %s instead", VdevGUIDString().c_str(), 1072300906Sasomers path, oldstr.c_str()); 1073300906Sasomers } 1074300906Sasomers 1075300906Sasomers /* 1076300906Sasomers * Build a root vdev/leaf vdev configuration suitable for 1077300906Sasomers * zpool_vdev_attach. Only enough data for the kernel to find 1078300906Sasomers * the device (i.e. type and disk device node path) are needed. 1079300906Sasomers */ 1080300906Sasomers nvroot = NULL; 1081300906Sasomers newvd = NULL; 1082300906Sasomers 1083300906Sasomers if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0 1084300906Sasomers || nvlist_alloc(&newvd, NV_UNIQUE_NAME, 0) != 0) { 1085300906Sasomers syslog(LOG_ERR, "Replace vdev(%s/%s): Unable to allocate " 1086300906Sasomers "configuration data.", poolname, oldstr.c_str()); 1087300906Sasomers if (nvroot != NULL) 1088300906Sasomers nvlist_free(nvroot); 1089300906Sasomers return (false); 1090300906Sasomers } 1091300906Sasomers if (nvlist_add_string(newvd, ZPOOL_CONFIG_TYPE, vdev_type) != 0 1092300906Sasomers || nvlist_add_string(newvd, ZPOOL_CONFIG_PATH, path) != 0 1093300906Sasomers || nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) != 0 1094300906Sasomers || nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, 1095300906Sasomers &newvd, 1) != 0) { 1096300906Sasomers syslog(LOG_ERR, "Replace vdev(%s/%s): Unable to initialize " 1097300906Sasomers "configuration data.", poolname, oldstr.c_str()); 1098300906Sasomers nvlist_free(newvd); 1099300906Sasomers nvlist_free(nvroot); 1100300906Sasomers return (true); 1101300906Sasomers } 1102300906Sasomers 1103300906Sasomers /* Data was copied when added to the root vdev. */ 1104300906Sasomers nvlist_free(newvd); 1105300906Sasomers 1106300906Sasomers retval = (zpool_vdev_attach(zhp, oldstr.c_str(), path, nvroot, 1107300906Sasomers /*replace*/B_TRUE) == 0); 1108300906Sasomers if (retval) 1109300906Sasomers syslog(LOG_INFO, "Replacing vdev(%s/%s) with %s\n", 1110300906Sasomers poolname, oldstr.c_str(), path); 1111300906Sasomers else 1112300906Sasomers syslog(LOG_ERR, "Replace vdev(%s/%s): %s: %s\n", 1113300906Sasomers poolname, oldstr.c_str(), libzfs_error_action(g_zfsHandle), 1114300906Sasomers libzfs_error_description(g_zfsHandle)); 1115300906Sasomers nvlist_free(nvroot); 1116300906Sasomers 1117300906Sasomers return (retval); 1118300906Sasomers} 1119300906Sasomers 1120300906Sasomers/* Does the argument event refer to a checksum error? */ 1121300906Sasomersstatic bool 1122300906SasomersIsChecksumEvent(const Event* const event) 1123300906Sasomers{ 1124300906Sasomers return ("ereport.fs.zfs.checksum" == event->Value("type")); 1125300906Sasomers} 1126300906Sasomers 1127300906Sasomers/* Does the argument event refer to an IO error? */ 1128300906Sasomersstatic bool 1129300906SasomersIsIOEvent(const Event* const event) 1130300906Sasomers{ 1131300906Sasomers return ("ereport.fs.zfs.io" == event->Value("type")); 1132300906Sasomers} 1133300906Sasomers 1134300906Sasomersbool 1135300906SasomersCaseFile::ShouldDegrade() const 1136300906Sasomers{ 1137300906Sasomers return (std::count_if(m_events.begin(), m_events.end(), 1138300906Sasomers IsChecksumEvent) > ZFS_DEGRADE_IO_COUNT); 1139300906Sasomers} 1140300906Sasomers 1141300906Sasomersbool 1142300906SasomersCaseFile::ShouldFault() const 1143300906Sasomers{ 1144300906Sasomers return (std::count_if(m_events.begin(), m_events.end(), 1145300906Sasomers IsIOEvent) > ZFS_DEGRADE_IO_COUNT); 1146300906Sasomers} 1147300906Sasomers 1148300906Sasomersnvlist_t * 1149300906SasomersCaseFile::CaseVdev(zpool_handle_t *zhp) const 1150300906Sasomers{ 1151300906Sasomers return (VdevIterator(zhp).Find(VdevGUID())); 1152300906Sasomers} 1153