case_file.cc revision 330733
1300906Sasomers/*- 2300906Sasomers * Copyright (c) 2011, 2012, 2013, 2014, 2016 Spectra Logic Corporation 3300906Sasomers * All rights reserved. 4300906Sasomers * 5300906Sasomers * Redistribution and use in source and binary forms, with or without 6300906Sasomers * modification, are permitted provided that the following conditions 7300906Sasomers * are met: 8300906Sasomers * 1. Redistributions of source code must retain the above copyright 9300906Sasomers * notice, this list of conditions, and the following disclaimer, 10300906Sasomers * without modification. 11300906Sasomers * 2. Redistributions in binary form must reproduce at minimum a disclaimer 12300906Sasomers * substantially similar to the "NO WARRANTY" disclaimer below 13300906Sasomers * ("Disclaimer") and any redistribution must be conditioned upon 14300906Sasomers * including a substantially similar Disclaimer requirement for further 15300906Sasomers * binary redistribution. 16300906Sasomers * 17300906Sasomers * NO WARRANTY 18300906Sasomers * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19300906Sasomers * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20300906Sasomers * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR 21300906Sasomers * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22300906Sasomers * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23300906Sasomers * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24300906Sasomers * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25300906Sasomers * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 26300906Sasomers * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 27300906Sasomers * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28300906Sasomers * POSSIBILITY OF SUCH DAMAGES. 29300906Sasomers * 30300906Sasomers * Authors: Justin T. Gibbs (Spectra Logic Corporation) 31300906Sasomers */ 32300906Sasomers 33300906Sasomers/** 34300906Sasomers * \file case_file.cc 35300906Sasomers * 36300906Sasomers * We keep case files for any leaf vdev that is not in the optimal state. 37300906Sasomers * However, we only serialize to disk those events that need to be preserved 38300906Sasomers * across reboots. For now, this is just a log of soft errors which we 39300906Sasomers * accumulate in order to mark a device as degraded. 40300906Sasomers */ 41300906Sasomers#include <sys/cdefs.h> 42300906Sasomers#include <sys/time.h> 43300906Sasomers 44300906Sasomers#include <sys/fs/zfs.h> 45300906Sasomers 46300906Sasomers#include <dirent.h> 47300906Sasomers#include <iomanip> 48300906Sasomers#include <fstream> 49300906Sasomers#include <functional> 50300906Sasomers#include <sstream> 51300906Sasomers#include <syslog.h> 52300906Sasomers#include <unistd.h> 53300906Sasomers 54300906Sasomers#include <libzfs.h> 55300906Sasomers 56300906Sasomers#include <list> 57300906Sasomers#include <map> 58300906Sasomers#include <string> 59300906Sasomers 60300906Sasomers#include <devdctl/guid.h> 61300906Sasomers#include <devdctl/event.h> 62300906Sasomers#include <devdctl/event_factory.h> 63300906Sasomers#include <devdctl/exception.h> 64300906Sasomers#include <devdctl/consumer.h> 65300906Sasomers 66300906Sasomers#include "callout.h" 67300906Sasomers#include "vdev_iterator.h" 68300906Sasomers#include "zfsd_event.h" 69300906Sasomers#include "case_file.h" 70300906Sasomers#include "vdev.h" 71300906Sasomers#include "zfsd.h" 72300906Sasomers#include "zfsd_exception.h" 73300906Sasomers#include "zpool_list.h" 74300906Sasomers 75300906Sasomers__FBSDID("$FreeBSD: stable/11/cddl/usr.sbin/zfsd/case_file.cc 330733 2018-03-10 03:34:27Z asomers $"); 76300906Sasomers 77300906Sasomers/*============================ Namespace Control =============================*/ 78300906Sasomersusing std::auto_ptr; 79300906Sasomersusing std::hex; 80300906Sasomersusing std::ifstream; 81300906Sasomersusing std::stringstream; 82300906Sasomersusing std::setfill; 83300906Sasomersusing std::setw; 84300906Sasomers 85300906Sasomersusing DevdCtl::Event; 86300906Sasomersusing DevdCtl::EventFactory; 87300906Sasomersusing DevdCtl::EventList; 88300906Sasomersusing DevdCtl::Guid; 89300906Sasomersusing DevdCtl::ParseException; 90300906Sasomers 91300906Sasomers/*--------------------------------- CaseFile ---------------------------------*/ 92300906Sasomers//- CaseFile Static Data ------------------------------------------------------- 93300906Sasomers 94300906SasomersCaseFileList CaseFile::s_activeCases; 95300906Sasomersconst string CaseFile::s_caseFilePath = "/var/db/zfsd/cases"; 96300906Sasomersconst timeval CaseFile::s_removeGracePeriod = { 60 /*sec*/, 0 /*usec*/}; 97300906Sasomers 98300906Sasomers//- CaseFile Static Public Methods --------------------------------------------- 99300906SasomersCaseFile * 100300906SasomersCaseFile::Find(Guid poolGUID, Guid vdevGUID) 101300906Sasomers{ 102300906Sasomers for (CaseFileList::iterator curCase = s_activeCases.begin(); 103300906Sasomers curCase != s_activeCases.end(); curCase++) { 104300906Sasomers 105326321Sasomers if (((*curCase)->PoolGUID() != poolGUID 106326321Sasomers && Guid::InvalidGuid() != poolGUID) 107300906Sasomers || (*curCase)->VdevGUID() != vdevGUID) 108300906Sasomers continue; 109300906Sasomers 110300906Sasomers /* 111300906Sasomers * We only carry one active case per-vdev. 112300906Sasomers */ 113300906Sasomers return (*curCase); 114300906Sasomers } 115300906Sasomers return (NULL); 116300906Sasomers} 117300906Sasomers 118300906SasomersCaseFile * 119300906SasomersCaseFile::Find(const string &physPath) 120300906Sasomers{ 121300906Sasomers CaseFile *result = NULL; 122300906Sasomers 123300906Sasomers for (CaseFileList::iterator curCase = s_activeCases.begin(); 124300906Sasomers curCase != s_activeCases.end(); curCase++) { 125300906Sasomers 126300906Sasomers if ((*curCase)->PhysicalPath() != physPath) 127300906Sasomers continue; 128300906Sasomers 129300906Sasomers if (result != NULL) { 130300906Sasomers syslog(LOG_WARNING, "Multiple casefiles found for " 131300906Sasomers "physical path %s. " 132300906Sasomers "This is most likely a bug in zfsd", 133300906Sasomers physPath.c_str()); 134300906Sasomers } 135300906Sasomers result = *curCase; 136300906Sasomers } 137300906Sasomers return (result); 138300906Sasomers} 139300906Sasomers 140300906Sasomers 141300906Sasomersvoid 142300906SasomersCaseFile::ReEvaluateByGuid(Guid poolGUID, const ZfsEvent &event) 143300906Sasomers{ 144300906Sasomers CaseFileList::iterator casefile; 145300906Sasomers for (casefile = s_activeCases.begin(); casefile != s_activeCases.end();){ 146300906Sasomers CaseFileList::iterator next = casefile; 147300906Sasomers next++; 148300906Sasomers if (poolGUID == (*casefile)->PoolGUID()) 149300906Sasomers (*casefile)->ReEvaluate(event); 150300906Sasomers casefile = next; 151300906Sasomers } 152300906Sasomers} 153300906Sasomers 154300906SasomersCaseFile & 155300906SasomersCaseFile::Create(Vdev &vdev) 156300906Sasomers{ 157300906Sasomers CaseFile *activeCase; 158300906Sasomers 159300906Sasomers activeCase = Find(vdev.PoolGUID(), vdev.GUID()); 160300906Sasomers if (activeCase == NULL) 161300906Sasomers activeCase = new CaseFile(vdev); 162300906Sasomers 163300906Sasomers return (*activeCase); 164300906Sasomers} 165300906Sasomers 166300906Sasomersvoid 167300906SasomersCaseFile::DeSerialize() 168300906Sasomers{ 169300906Sasomers struct dirent **caseFiles; 170300906Sasomers 171300906Sasomers int numCaseFiles(scandir(s_caseFilePath.c_str(), &caseFiles, 172300906Sasomers DeSerializeSelector, /*compar*/NULL)); 173300906Sasomers 174300906Sasomers if (numCaseFiles == -1) 175300906Sasomers return; 176300906Sasomers if (numCaseFiles == 0) { 177300906Sasomers free(caseFiles); 178300906Sasomers return; 179300906Sasomers } 180300906Sasomers 181300906Sasomers for (int i = 0; i < numCaseFiles; i++) { 182300906Sasomers 183300906Sasomers DeSerializeFile(caseFiles[i]->d_name); 184300906Sasomers free(caseFiles[i]); 185300906Sasomers } 186300906Sasomers free(caseFiles); 187300906Sasomers} 188300906Sasomers 189330733Sasomersbool 190330733SasomersCaseFile::Empty() 191330733Sasomers{ 192330733Sasomers return (s_activeCases.empty()); 193330733Sasomers} 194330733Sasomers 195300906Sasomersvoid 196300906SasomersCaseFile::LogAll() 197300906Sasomers{ 198300906Sasomers for (CaseFileList::iterator curCase = s_activeCases.begin(); 199300906Sasomers curCase != s_activeCases.end(); curCase++) 200300906Sasomers (*curCase)->Log(); 201300906Sasomers} 202300906Sasomers 203300906Sasomersvoid 204300906SasomersCaseFile::PurgeAll() 205300906Sasomers{ 206300906Sasomers /* 207300906Sasomers * Serialize casefiles before deleting them so that they can be reread 208300906Sasomers * and revalidated during BuildCaseFiles. 209300906Sasomers * CaseFiles remove themselves from this list on destruction. 210300906Sasomers */ 211300906Sasomers while (s_activeCases.size() != 0) { 212300906Sasomers CaseFile *casefile = s_activeCases.front(); 213300906Sasomers casefile->Serialize(); 214300906Sasomers delete casefile; 215300906Sasomers } 216300906Sasomers 217300906Sasomers} 218300906Sasomers 219300906Sasomers//- CaseFile Public Methods ---------------------------------------------------- 220300906Sasomersbool 221300906SasomersCaseFile::RefreshVdevState() 222300906Sasomers{ 223300906Sasomers ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); 224300906Sasomers zpool_handle_t *casePool(zpl.empty() ? NULL : zpl.front()); 225300906Sasomers if (casePool == NULL) 226300906Sasomers return (false); 227300906Sasomers 228300906Sasomers Vdev vd(casePool, CaseVdev(casePool)); 229300906Sasomers if (vd.DoesNotExist()) 230300906Sasomers return (false); 231300906Sasomers 232300906Sasomers m_vdevState = vd.State(); 233300906Sasomers m_vdevPhysPath = vd.PhysicalPath(); 234300906Sasomers return (true); 235300906Sasomers} 236300906Sasomers 237300906Sasomersbool 238300906SasomersCaseFile::ReEvaluate(const string &devPath, const string &physPath, Vdev *vdev) 239300906Sasomers{ 240300906Sasomers ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); 241300906Sasomers zpool_handle_t *pool(zpl.empty() ? NULL : zpl.front()); 242300906Sasomers 243300906Sasomers if (pool == NULL || !RefreshVdevState()) { 244300906Sasomers /* 245300906Sasomers * The pool or vdev for this case file is no longer 246300906Sasomers * part of the configuration. This can happen 247300906Sasomers * if we process a device arrival notification 248300906Sasomers * before seeing the ZFS configuration change 249300906Sasomers * event. 250300906Sasomers */ 251300906Sasomers syslog(LOG_INFO, 252300906Sasomers "CaseFile::ReEvaluate(%s,%s) Pool/Vdev unconfigured. " 253300906Sasomers "Closing\n", 254300906Sasomers PoolGUIDString().c_str(), 255300906Sasomers VdevGUIDString().c_str()); 256300906Sasomers Close(); 257300906Sasomers 258300906Sasomers /* 259300906Sasomers * Since this event was not used to close this 260300906Sasomers * case, do not report it as consumed. 261300906Sasomers */ 262300906Sasomers return (/*consumed*/false); 263300906Sasomers } 264300906Sasomers 265300906Sasomers if (VdevState() > VDEV_STATE_CANT_OPEN) { 266300906Sasomers /* 267300906Sasomers * For now, newly discovered devices only help for 268300906Sasomers * devices that are missing. In the future, we might 269300906Sasomers * use a newly inserted spare to replace a degraded 270300906Sasomers * or faulted device. 271300906Sasomers */ 272300906Sasomers syslog(LOG_INFO, "CaseFile::ReEvaluate(%s,%s): Pool/Vdev ignored", 273300906Sasomers PoolGUIDString().c_str(), VdevGUIDString().c_str()); 274300906Sasomers return (/*consumed*/false); 275300906Sasomers } 276300906Sasomers 277300906Sasomers if (vdev != NULL 278326321Sasomers && ( vdev->PoolGUID() == m_poolGUID 279326321Sasomers || vdev->PoolGUID() == Guid::InvalidGuid()) 280300906Sasomers && vdev->GUID() == m_vdevGUID) { 281300906Sasomers 282300906Sasomers zpool_vdev_online(pool, vdev->GUIDString().c_str(), 283300906Sasomers ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE, 284300906Sasomers &m_vdevState); 285300906Sasomers syslog(LOG_INFO, "Onlined vdev(%s/%s:%s). State now %s.\n", 286300906Sasomers zpool_get_name(pool), vdev->GUIDString().c_str(), 287300906Sasomers devPath.c_str(), 288300906Sasomers zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); 289300906Sasomers 290300906Sasomers /* 291300906Sasomers * Check the vdev state post the online action to see 292300906Sasomers * if we can retire this case. 293300906Sasomers */ 294300906Sasomers CloseIfSolved(); 295300906Sasomers 296300906Sasomers return (/*consumed*/true); 297300906Sasomers } 298300906Sasomers 299300906Sasomers /* 300300906Sasomers * If the auto-replace policy is enabled, and we have physical 301300906Sasomers * path information, try a physical path replacement. 302300906Sasomers */ 303300906Sasomers if (zpool_get_prop_int(pool, ZPOOL_PROP_AUTOREPLACE, NULL) == 0) { 304300906Sasomers syslog(LOG_INFO, 305300906Sasomers "CaseFile(%s:%s:%s): AutoReplace not set. " 306300906Sasomers "Ignoring device insertion.\n", 307300906Sasomers PoolGUIDString().c_str(), 308300906Sasomers VdevGUIDString().c_str(), 309300906Sasomers zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); 310300906Sasomers return (/*consumed*/false); 311300906Sasomers } 312300906Sasomers 313300906Sasomers if (PhysicalPath().empty()) { 314300906Sasomers syslog(LOG_INFO, 315300906Sasomers "CaseFile(%s:%s:%s): No physical path information. " 316300906Sasomers "Ignoring device insertion.\n", 317300906Sasomers PoolGUIDString().c_str(), 318300906Sasomers VdevGUIDString().c_str(), 319300906Sasomers zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); 320300906Sasomers return (/*consumed*/false); 321300906Sasomers } 322300906Sasomers 323300906Sasomers if (physPath != PhysicalPath()) { 324300906Sasomers syslog(LOG_INFO, 325300906Sasomers "CaseFile(%s:%s:%s): Physical path mismatch. " 326300906Sasomers "Ignoring device insertion.\n", 327300906Sasomers PoolGUIDString().c_str(), 328300906Sasomers VdevGUIDString().c_str(), 329300906Sasomers zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); 330300906Sasomers return (/*consumed*/false); 331300906Sasomers } 332300906Sasomers 333300906Sasomers /* Write a label on the newly inserted disk. */ 334300906Sasomers if (zpool_label_disk(g_zfsHandle, pool, devPath.c_str()) != 0) { 335300906Sasomers syslog(LOG_ERR, 336300906Sasomers "Replace vdev(%s/%s) by physical path (label): %s: %s\n", 337300906Sasomers zpool_get_name(pool), VdevGUIDString().c_str(), 338300906Sasomers libzfs_error_action(g_zfsHandle), 339300906Sasomers libzfs_error_description(g_zfsHandle)); 340300906Sasomers return (/*consumed*/false); 341300906Sasomers } 342300906Sasomers 343300906Sasomers syslog(LOG_INFO, "CaseFile::ReEvaluate(%s/%s): Replacing with %s", 344300906Sasomers PoolGUIDString().c_str(), VdevGUIDString().c_str(), 345300906Sasomers devPath.c_str()); 346300906Sasomers return (Replace(VDEV_TYPE_DISK, devPath.c_str(), /*isspare*/false)); 347300906Sasomers} 348300906Sasomers 349300906Sasomersbool 350300906SasomersCaseFile::ReEvaluate(const ZfsEvent &event) 351300906Sasomers{ 352300906Sasomers bool consumed(false); 353300906Sasomers 354300906Sasomers if (event.Value("type") == "misc.fs.zfs.vdev_remove") { 355300906Sasomers /* 356300906Sasomers * The Vdev we represent has been removed from the 357300906Sasomers * configuration. This case is no longer of value. 358300906Sasomers */ 359300906Sasomers Close(); 360300906Sasomers 361300906Sasomers return (/*consumed*/true); 362300906Sasomers } else if (event.Value("type") == "misc.fs.zfs.pool_destroy") { 363300906Sasomers /* This Pool has been destroyed. Discard the case */ 364300906Sasomers Close(); 365300906Sasomers 366300906Sasomers return (/*consumed*/true); 367300906Sasomers } else if (event.Value("type") == "misc.fs.zfs.config_sync") { 368300906Sasomers RefreshVdevState(); 369300906Sasomers if (VdevState() < VDEV_STATE_HEALTHY) 370300906Sasomers consumed = ActivateSpare(); 371300906Sasomers } 372300906Sasomers 373300906Sasomers 374300906Sasomers if (event.Value("class") == "resource.fs.zfs.removed") { 375300906Sasomers bool spare_activated; 376300906Sasomers 377300906Sasomers if (!RefreshVdevState()) { 378300906Sasomers /* 379300906Sasomers * The pool or vdev for this case file is no longer 380300906Sasomers * part of the configuration. This can happen 381300906Sasomers * if we process a device arrival notification 382300906Sasomers * before seeing the ZFS configuration change 383300906Sasomers * event. 384300906Sasomers */ 385300906Sasomers syslog(LOG_INFO, 386300906Sasomers "CaseFile::ReEvaluate(%s,%s) Pool/Vdev " 387300906Sasomers "unconfigured. Closing\n", 388300906Sasomers PoolGUIDString().c_str(), 389300906Sasomers VdevGUIDString().c_str()); 390300906Sasomers /* 391300906Sasomers * Close the case now so we won't waste cycles in the 392300906Sasomers * system rescan 393300906Sasomers */ 394300906Sasomers Close(); 395300906Sasomers 396300906Sasomers /* 397300906Sasomers * Since this event was not used to close this 398300906Sasomers * case, do not report it as consumed. 399300906Sasomers */ 400300906Sasomers return (/*consumed*/false); 401300906Sasomers } 402300906Sasomers 403300906Sasomers /* 404300906Sasomers * Discard any tentative I/O error events for 405300906Sasomers * this case. They were most likely caused by the 406300906Sasomers * hot-unplug of this device. 407300906Sasomers */ 408300906Sasomers PurgeTentativeEvents(); 409300906Sasomers 410300906Sasomers /* Try to activate spares if they are available */ 411300906Sasomers spare_activated = ActivateSpare(); 412300906Sasomers 413300906Sasomers /* 414300906Sasomers * Rescan the drives in the system to see if a recent 415300906Sasomers * drive arrival can be used to solve this case. 416300906Sasomers */ 417300906Sasomers ZfsDaemon::RequestSystemRescan(); 418300906Sasomers 419300906Sasomers /* 420300906Sasomers * Consume the event if we successfully activated a spare. 421300906Sasomers * Otherwise, leave it in the unconsumed events list so that the 422300906Sasomers * future addition of a spare to this pool might be able to 423300906Sasomers * close the case 424300906Sasomers */ 425300906Sasomers consumed = spare_activated; 426300906Sasomers } else if (event.Value("class") == "resource.fs.zfs.statechange") { 427300906Sasomers RefreshVdevState(); 428300906Sasomers /* 429300906Sasomers * If this vdev is DEGRADED, FAULTED, or UNAVAIL, try to 430300906Sasomers * activate a hotspare. Otherwise, ignore the event 431300906Sasomers */ 432300906Sasomers if (VdevState() == VDEV_STATE_FAULTED || 433300906Sasomers VdevState() == VDEV_STATE_DEGRADED || 434300906Sasomers VdevState() == VDEV_STATE_CANT_OPEN) 435300906Sasomers (void) ActivateSpare(); 436300906Sasomers consumed = true; 437300906Sasomers } 438300906Sasomers else if (event.Value("class") == "ereport.fs.zfs.io" || 439300906Sasomers event.Value("class") == "ereport.fs.zfs.checksum") { 440300906Sasomers 441300906Sasomers m_tentativeEvents.push_front(event.DeepCopy()); 442300906Sasomers RegisterCallout(event); 443300906Sasomers consumed = true; 444300906Sasomers } 445300906Sasomers 446300906Sasomers bool closed(CloseIfSolved()); 447300906Sasomers 448300906Sasomers return (consumed || closed); 449300906Sasomers} 450300906Sasomers 451329792Sasomers/* Find a Vdev containing the vdev with the given GUID */ 452329792Sasomersstatic nvlist_t* 453329792Sasomersfind_parent(nvlist_t *pool_config, nvlist_t *config, DevdCtl::Guid child_guid) 454329792Sasomers{ 455329792Sasomers nvlist_t **vdevChildren; 456329792Sasomers int error; 457329792Sasomers unsigned ch, numChildren; 458300906Sasomers 459329792Sasomers error = nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_CHILDREN, 460329792Sasomers &vdevChildren, &numChildren); 461329792Sasomers 462329792Sasomers if (error != 0 || numChildren == 0) 463329792Sasomers return (NULL); 464329792Sasomers 465329792Sasomers for (ch = 0; ch < numChildren; ch++) { 466329792Sasomers nvlist *result; 467329792Sasomers Vdev vdev(pool_config, vdevChildren[ch]); 468329792Sasomers 469329792Sasomers if (vdev.GUID() == child_guid) 470329792Sasomers return (config); 471329792Sasomers 472329792Sasomers result = find_parent(pool_config, vdevChildren[ch], child_guid); 473329792Sasomers if (result != NULL) 474329792Sasomers return (result); 475329792Sasomers } 476329792Sasomers 477329792Sasomers return (NULL); 478329792Sasomers} 479329792Sasomers 480300906Sasomersbool 481300906SasomersCaseFile::ActivateSpare() { 482329792Sasomers nvlist_t *config, *nvroot, *parent_config; 483300906Sasomers nvlist_t **spares; 484300906Sasomers char *devPath, *vdev_type; 485300906Sasomers const char *poolname; 486300906Sasomers u_int nspares, i; 487300906Sasomers int error; 488300906Sasomers 489300906Sasomers ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); 490300906Sasomers zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front()); 491300906Sasomers if (zhp == NULL) { 492300906Sasomers syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool " 493300919Sbdrewery "for pool_guid %" PRIu64".", (uint64_t)m_poolGUID); 494300906Sasomers return (false); 495300906Sasomers } 496300906Sasomers poolname = zpool_get_name(zhp); 497300906Sasomers config = zpool_get_config(zhp, NULL); 498300906Sasomers if (config == NULL) { 499300906Sasomers syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool " 500300906Sasomers "config for pool %s", poolname); 501300906Sasomers return (false); 502300906Sasomers } 503300906Sasomers error = nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot); 504300906Sasomers if (error != 0){ 505300906Sasomers syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find vdev " 506300906Sasomers "tree for pool %s", poolname); 507300906Sasomers return (false); 508300906Sasomers } 509329792Sasomers 510329792Sasomers parent_config = find_parent(config, nvroot, m_vdevGUID); 511329792Sasomers if (parent_config != NULL) { 512329792Sasomers char *parent_type; 513329792Sasomers 514329792Sasomers /* 515329792Sasomers * Don't activate spares for members of a "replacing" vdev. 516329792Sasomers * They're already dealt with. Sparing them will just drag out 517329792Sasomers * the resilver process. 518329792Sasomers */ 519329792Sasomers error = nvlist_lookup_string(parent_config, 520329792Sasomers ZPOOL_CONFIG_TYPE, &parent_type); 521329792Sasomers if (error == 0 && strcmp(parent_type, VDEV_TYPE_REPLACING) == 0) 522329792Sasomers return (false); 523329792Sasomers } 524329792Sasomers 525300906Sasomers nspares = 0; 526300906Sasomers nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares, 527300906Sasomers &nspares); 528300906Sasomers if (nspares == 0) { 529300906Sasomers /* The pool has no spares configured */ 530300906Sasomers syslog(LOG_INFO, "CaseFile::ActivateSpare: " 531300906Sasomers "No spares available for pool %s", poolname); 532300906Sasomers return (false); 533300906Sasomers } 534300906Sasomers for (i = 0; i < nspares; i++) { 535300906Sasomers uint64_t *nvlist_array; 536300906Sasomers vdev_stat_t *vs; 537300906Sasomers uint_t nstats; 538300906Sasomers 539300906Sasomers if (nvlist_lookup_uint64_array(spares[i], 540300906Sasomers ZPOOL_CONFIG_VDEV_STATS, &nvlist_array, &nstats) != 0) { 541300906Sasomers syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not " 542300906Sasomers "find vdev stats for pool %s, spare %d", 543300906Sasomers poolname, i); 544300906Sasomers return (false); 545300906Sasomers } 546300906Sasomers vs = reinterpret_cast<vdev_stat_t *>(nvlist_array); 547300906Sasomers 548300906Sasomers if ((vs->vs_aux != VDEV_AUX_SPARED) 549300906Sasomers && (vs->vs_state == VDEV_STATE_HEALTHY)) { 550300906Sasomers /* We found a usable spare */ 551300906Sasomers break; 552300906Sasomers } 553300906Sasomers } 554300906Sasomers 555300906Sasomers if (i == nspares) { 556300906Sasomers /* No available spares were found */ 557300906Sasomers return (false); 558300906Sasomers } 559300906Sasomers 560300906Sasomers error = nvlist_lookup_string(spares[i], ZPOOL_CONFIG_PATH, &devPath); 561300906Sasomers if (error != 0) { 562300906Sasomers syslog(LOG_ERR, "CaseFile::ActivateSpare: Cannot determine " 563300906Sasomers "the path of pool %s, spare %d. Error %d", 564300906Sasomers poolname, i, error); 565300906Sasomers return (false); 566300906Sasomers } 567300906Sasomers 568300906Sasomers error = nvlist_lookup_string(spares[i], ZPOOL_CONFIG_TYPE, &vdev_type); 569300906Sasomers if (error != 0) { 570300906Sasomers syslog(LOG_ERR, "CaseFile::ActivateSpare: Cannot determine " 571300906Sasomers "the vdev type of pool %s, spare %d. Error %d", 572300906Sasomers poolname, i, error); 573300906Sasomers return (false); 574300906Sasomers } 575300906Sasomers 576300906Sasomers return (Replace(vdev_type, devPath, /*isspare*/true)); 577300906Sasomers} 578300906Sasomers 579300906Sasomersvoid 580300906SasomersCaseFile::RegisterCallout(const Event &event) 581300906Sasomers{ 582300906Sasomers timeval now, countdown, elapsed, timestamp, zero, remaining; 583300906Sasomers 584300906Sasomers gettimeofday(&now, 0); 585300906Sasomers timestamp = event.GetTimestamp(); 586300906Sasomers timersub(&now, ×tamp, &elapsed); 587300906Sasomers timersub(&s_removeGracePeriod, &elapsed, &countdown); 588300906Sasomers /* 589300906Sasomers * If countdown is <= zero, Reset the timer to the 590300906Sasomers * smallest positive time value instead 591300906Sasomers */ 592300906Sasomers timerclear(&zero); 593300906Sasomers if (timercmp(&countdown, &zero, <=)) { 594300906Sasomers timerclear(&countdown); 595300906Sasomers countdown.tv_usec = 1; 596300906Sasomers } 597300906Sasomers 598300906Sasomers remaining = m_tentativeTimer.TimeRemaining(); 599300906Sasomers 600300906Sasomers if (!m_tentativeTimer.IsPending() 601300906Sasomers || timercmp(&countdown, &remaining, <)) 602300906Sasomers m_tentativeTimer.Reset(countdown, OnGracePeriodEnded, this); 603300906Sasomers} 604300906Sasomers 605300906Sasomers 606300906Sasomersbool 607300906SasomersCaseFile::CloseIfSolved() 608300906Sasomers{ 609300906Sasomers if (m_events.empty() 610300906Sasomers && m_tentativeEvents.empty()) { 611300906Sasomers 612300906Sasomers /* 613300906Sasomers * We currently do not track or take actions on 614300906Sasomers * devices in the degraded or faulted state. 615300906Sasomers * Once we have support for spare pools, we'll 616300906Sasomers * retain these cases so that any spares added in 617300906Sasomers * the future can be applied to them. 618300906Sasomers */ 619300906Sasomers switch (VdevState()) { 620300906Sasomers case VDEV_STATE_HEALTHY: 621300906Sasomers /* No need to keep cases for healthy vdevs */ 622300906Sasomers Close(); 623300906Sasomers return (true); 624300906Sasomers case VDEV_STATE_REMOVED: 625300906Sasomers case VDEV_STATE_CANT_OPEN: 626300906Sasomers /* 627300906Sasomers * Keep open. We may solve it with a newly inserted 628300906Sasomers * device. 629300906Sasomers */ 630300906Sasomers case VDEV_STATE_FAULTED: 631300906Sasomers case VDEV_STATE_DEGRADED: 632300906Sasomers /* 633300906Sasomers * Keep open. We may solve it with the future 634300906Sasomers * addition of a spare to the pool 635300906Sasomers */ 636300906Sasomers case VDEV_STATE_UNKNOWN: 637300906Sasomers case VDEV_STATE_CLOSED: 638300906Sasomers case VDEV_STATE_OFFLINE: 639300906Sasomers /* 640300906Sasomers * Keep open? This may not be the correct behavior, 641300906Sasomers * but it's what we've always done 642300906Sasomers */ 643300906Sasomers ; 644300906Sasomers } 645300906Sasomers 646300906Sasomers /* 647300906Sasomers * Re-serialize the case in order to remove any 648300906Sasomers * previous event data. 649300906Sasomers */ 650300906Sasomers Serialize(); 651300906Sasomers } 652300906Sasomers 653300906Sasomers return (false); 654300906Sasomers} 655300906Sasomers 656300906Sasomersvoid 657300906SasomersCaseFile::Log() 658300906Sasomers{ 659300906Sasomers syslog(LOG_INFO, "CaseFile(%s,%s,%s)\n", PoolGUIDString().c_str(), 660300906Sasomers VdevGUIDString().c_str(), PhysicalPath().c_str()); 661300906Sasomers syslog(LOG_INFO, "\tVdev State = %s\n", 662300906Sasomers zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); 663300906Sasomers if (m_tentativeEvents.size() != 0) { 664300906Sasomers syslog(LOG_INFO, "\t=== Tentative Events ===\n"); 665300906Sasomers for (EventList::iterator event(m_tentativeEvents.begin()); 666300906Sasomers event != m_tentativeEvents.end(); event++) 667300906Sasomers (*event)->Log(LOG_INFO); 668300906Sasomers } 669300906Sasomers if (m_events.size() != 0) { 670300906Sasomers syslog(LOG_INFO, "\t=== Events ===\n"); 671300906Sasomers for (EventList::iterator event(m_events.begin()); 672300906Sasomers event != m_events.end(); event++) 673300906Sasomers (*event)->Log(LOG_INFO); 674300906Sasomers } 675300906Sasomers} 676300906Sasomers 677300906Sasomers//- CaseFile Static Protected Methods ------------------------------------------ 678300906Sasomersvoid 679300906SasomersCaseFile::OnGracePeriodEnded(void *arg) 680300906Sasomers{ 681300906Sasomers CaseFile &casefile(*static_cast<CaseFile *>(arg)); 682300906Sasomers 683300906Sasomers casefile.OnGracePeriodEnded(); 684300906Sasomers} 685300906Sasomers 686300906Sasomersint 687300906SasomersCaseFile::DeSerializeSelector(const struct dirent *dirEntry) 688300906Sasomers{ 689300906Sasomers uint64_t poolGUID; 690300906Sasomers uint64_t vdevGUID; 691300906Sasomers 692300906Sasomers if (dirEntry->d_type == DT_REG 693300919Sbdrewery && sscanf(dirEntry->d_name, "pool_%" PRIu64 "_vdev_%" PRIu64 ".case", 694300906Sasomers &poolGUID, &vdevGUID) == 2) 695300906Sasomers return (1); 696300906Sasomers return (0); 697300906Sasomers} 698300906Sasomers 699300906Sasomersvoid 700300906SasomersCaseFile::DeSerializeFile(const char *fileName) 701300906Sasomers{ 702300906Sasomers string fullName(s_caseFilePath + '/' + fileName); 703300906Sasomers CaseFile *existingCaseFile(NULL); 704300906Sasomers CaseFile *caseFile(NULL); 705300906Sasomers 706300906Sasomers try { 707300906Sasomers uint64_t poolGUID; 708300906Sasomers uint64_t vdevGUID; 709300906Sasomers nvlist_t *vdevConf; 710300906Sasomers 711314431Sasomers if (sscanf(fileName, "pool_%" PRIu64 "_vdev_%" PRIu64 ".case", 712314431Sasomers &poolGUID, &vdevGUID) != 2) { 713314431Sasomers throw ZfsdException("CaseFile::DeSerialize: " 714314431Sasomers "Unintelligible CaseFile filename %s.\n", fileName); 715314431Sasomers } 716300906Sasomers existingCaseFile = Find(Guid(poolGUID), Guid(vdevGUID)); 717300906Sasomers if (existingCaseFile != NULL) { 718300906Sasomers /* 719300906Sasomers * If the vdev is already degraded or faulted, 720300906Sasomers * there's no point in keeping the state around 721300906Sasomers * that we use to put a drive into the degraded 722300906Sasomers * state. However, if the vdev is simply missing, 723300906Sasomers * preserve the case data in the hopes that it will 724300906Sasomers * return. 725300906Sasomers */ 726300906Sasomers caseFile = existingCaseFile; 727300906Sasomers vdev_state curState(caseFile->VdevState()); 728300906Sasomers if (curState > VDEV_STATE_CANT_OPEN 729300906Sasomers && curState < VDEV_STATE_HEALTHY) { 730300906Sasomers unlink(fileName); 731300906Sasomers return; 732300906Sasomers } 733300906Sasomers } else { 734300906Sasomers ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID); 735300906Sasomers if (zpl.empty() 736300906Sasomers || (vdevConf = VdevIterator(zpl.front()) 737300906Sasomers .Find(vdevGUID)) == NULL) { 738300906Sasomers /* 739300906Sasomers * Either the pool no longer exists 740300906Sasomers * or this vdev is no longer a member of 741300906Sasomers * the pool. 742300906Sasomers */ 743300906Sasomers unlink(fullName.c_str()); 744300906Sasomers return; 745300906Sasomers } 746300906Sasomers 747300906Sasomers /* 748300906Sasomers * Any vdev we find that does not have a case file 749300906Sasomers * must be in the healthy state and thus worthy of 750300906Sasomers * continued SERD data tracking. 751300906Sasomers */ 752300906Sasomers caseFile = new CaseFile(Vdev(zpl.front(), vdevConf)); 753300906Sasomers } 754300906Sasomers 755300906Sasomers ifstream caseStream(fullName.c_str()); 756300906Sasomers if (!caseStream) 757300906Sasomers throw ZfsdException("CaseFile::DeSerialize: Unable to " 758300906Sasomers "read %s.\n", fileName); 759300906Sasomers 760300906Sasomers caseFile->DeSerialize(caseStream); 761300906Sasomers } catch (const ParseException &exp) { 762300906Sasomers 763300906Sasomers exp.Log(); 764300906Sasomers if (caseFile != existingCaseFile) 765300906Sasomers delete caseFile; 766300906Sasomers 767300906Sasomers /* 768300906Sasomers * Since we can't parse the file, unlink it so we don't 769300906Sasomers * trip over it again. 770300906Sasomers */ 771300906Sasomers unlink(fileName); 772300906Sasomers } catch (const ZfsdException &zfsException) { 773300906Sasomers 774300906Sasomers zfsException.Log(); 775300906Sasomers if (caseFile != existingCaseFile) 776300906Sasomers delete caseFile; 777300906Sasomers } 778300906Sasomers} 779300906Sasomers 780300906Sasomers//- CaseFile Protected Methods ------------------------------------------------- 781300906SasomersCaseFile::CaseFile(const Vdev &vdev) 782300906Sasomers : m_poolGUID(vdev.PoolGUID()), 783300906Sasomers m_vdevGUID(vdev.GUID()), 784300906Sasomers m_vdevState(vdev.State()), 785300906Sasomers m_vdevPhysPath(vdev.PhysicalPath()) 786300906Sasomers{ 787300906Sasomers stringstream guidString; 788300906Sasomers 789300906Sasomers guidString << m_vdevGUID; 790300906Sasomers m_vdevGUIDString = guidString.str(); 791300906Sasomers guidString.str(""); 792300906Sasomers guidString << m_poolGUID; 793300906Sasomers m_poolGUIDString = guidString.str(); 794300906Sasomers 795300906Sasomers s_activeCases.push_back(this); 796300906Sasomers 797300906Sasomers syslog(LOG_INFO, "Creating new CaseFile:\n"); 798300906Sasomers Log(); 799300906Sasomers} 800300906Sasomers 801300906SasomersCaseFile::~CaseFile() 802300906Sasomers{ 803300906Sasomers PurgeEvents(); 804300906Sasomers PurgeTentativeEvents(); 805300906Sasomers m_tentativeTimer.Stop(); 806300906Sasomers s_activeCases.remove(this); 807300906Sasomers} 808300906Sasomers 809300906Sasomersvoid 810300906SasomersCaseFile::PurgeEvents() 811300906Sasomers{ 812300906Sasomers for (EventList::iterator event(m_events.begin()); 813300906Sasomers event != m_events.end(); event++) 814300906Sasomers delete *event; 815300906Sasomers 816300906Sasomers m_events.clear(); 817300906Sasomers} 818300906Sasomers 819300906Sasomersvoid 820300906SasomersCaseFile::PurgeTentativeEvents() 821300906Sasomers{ 822300906Sasomers for (EventList::iterator event(m_tentativeEvents.begin()); 823300906Sasomers event != m_tentativeEvents.end(); event++) 824300906Sasomers delete *event; 825300906Sasomers 826300906Sasomers m_tentativeEvents.clear(); 827300906Sasomers} 828300906Sasomers 829300906Sasomersvoid 830300906SasomersCaseFile::SerializeEvList(const EventList events, int fd, 831300906Sasomers const char* prefix) const 832300906Sasomers{ 833300906Sasomers if (events.empty()) 834300906Sasomers return; 835300906Sasomers for (EventList::const_iterator curEvent = events.begin(); 836300906Sasomers curEvent != events.end(); curEvent++) { 837300906Sasomers const string &eventString((*curEvent)->GetEventString()); 838300906Sasomers 839300906Sasomers // TODO: replace many write(2) calls with a single writev(2) 840300906Sasomers if (prefix) 841300906Sasomers write(fd, prefix, strlen(prefix)); 842300906Sasomers write(fd, eventString.c_str(), eventString.length()); 843300906Sasomers } 844300906Sasomers} 845300906Sasomers 846300906Sasomersvoid 847300906SasomersCaseFile::Serialize() 848300906Sasomers{ 849300906Sasomers stringstream saveFile; 850300906Sasomers 851300906Sasomers saveFile << setfill('0') 852300906Sasomers << s_caseFilePath << "/" 853300906Sasomers << "pool_" << PoolGUIDString() 854300906Sasomers << "_vdev_" << VdevGUIDString() 855300906Sasomers << ".case"; 856300906Sasomers 857300906Sasomers if (m_events.empty() && m_tentativeEvents.empty()) { 858300906Sasomers unlink(saveFile.str().c_str()); 859300906Sasomers return; 860300906Sasomers } 861300906Sasomers 862300906Sasomers int fd(open(saveFile.str().c_str(), O_CREAT|O_TRUNC|O_WRONLY, 0644)); 863300906Sasomers if (fd == -1) { 864300906Sasomers syslog(LOG_ERR, "CaseFile::Serialize: Unable to open %s.\n", 865300906Sasomers saveFile.str().c_str()); 866300906Sasomers return; 867300906Sasomers } 868300906Sasomers SerializeEvList(m_events, fd); 869300906Sasomers SerializeEvList(m_tentativeEvents, fd, "tentative "); 870300906Sasomers close(fd); 871300906Sasomers} 872300906Sasomers 873300906Sasomers/* 874300906Sasomers * XXX: This method assumes that events may not contain embedded newlines. If 875300906Sasomers * ever events can contain embedded newlines, then CaseFile must switch 876300906Sasomers * serialization formats 877300906Sasomers */ 878300906Sasomersvoid 879300906SasomersCaseFile::DeSerialize(ifstream &caseStream) 880300906Sasomers{ 881300906Sasomers string evString; 882300906Sasomers const EventFactory &factory(ZfsDaemon::Get().GetFactory()); 883300906Sasomers 884300906Sasomers caseStream >> std::noskipws >> std::ws; 885300906Sasomers while (caseStream.good()) { 886300906Sasomers /* 887300906Sasomers * Outline: 888300906Sasomers * read the beginning of a line and check it for 889300906Sasomers * "tentative". If found, discard "tentative". 890300906Sasomers * Create a new event 891300906Sasomers * continue 892300906Sasomers */ 893300906Sasomers EventList* destEvents; 894300906Sasomers const string tentFlag("tentative "); 895300906Sasomers string line; 896300906Sasomers std::stringbuf lineBuf; 897300906Sasomers 898300906Sasomers caseStream.get(lineBuf); 899300906Sasomers caseStream.ignore(); /*discard the newline character*/ 900300906Sasomers line = lineBuf.str(); 901300906Sasomers if (line.compare(0, tentFlag.size(), tentFlag) == 0) { 902300906Sasomers /* Discard "tentative" */ 903300906Sasomers line.erase(0, tentFlag.size()); 904300906Sasomers destEvents = &m_tentativeEvents; 905300906Sasomers } else { 906300906Sasomers destEvents = &m_events; 907300906Sasomers } 908300906Sasomers Event *event(Event::CreateEvent(factory, line)); 909300906Sasomers if (event != NULL) { 910300906Sasomers destEvents->push_back(event); 911300906Sasomers RegisterCallout(*event); 912300906Sasomers } 913300906Sasomers } 914300906Sasomers} 915300906Sasomers 916300906Sasomersvoid 917300906SasomersCaseFile::Close() 918300906Sasomers{ 919300906Sasomers /* 920300906Sasomers * This case is no longer relevant. Clean up our 921300906Sasomers * serialization file, and delete the case. 922300906Sasomers */ 923300906Sasomers syslog(LOG_INFO, "CaseFile(%s,%s) closed - State %s\n", 924300906Sasomers PoolGUIDString().c_str(), VdevGUIDString().c_str(), 925300906Sasomers zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); 926300906Sasomers 927300906Sasomers /* 928300906Sasomers * Serialization of a Case with no event data, clears the 929300906Sasomers * Serialization data for that event. 930300906Sasomers */ 931300906Sasomers PurgeEvents(); 932300906Sasomers Serialize(); 933300906Sasomers 934300906Sasomers delete this; 935300906Sasomers} 936300906Sasomers 937300906Sasomersvoid 938300906SasomersCaseFile::OnGracePeriodEnded() 939300906Sasomers{ 940300906Sasomers bool should_fault, should_degrade; 941300906Sasomers ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); 942300906Sasomers zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front()); 943300906Sasomers 944300906Sasomers m_events.splice(m_events.begin(), m_tentativeEvents); 945300906Sasomers should_fault = ShouldFault(); 946300906Sasomers should_degrade = ShouldDegrade(); 947300906Sasomers 948300906Sasomers if (should_fault || should_degrade) { 949300906Sasomers if (zhp == NULL 950300906Sasomers || (VdevIterator(zhp).Find(m_vdevGUID)) == NULL) { 951300906Sasomers /* 952300906Sasomers * Either the pool no longer exists 953300906Sasomers * or this vdev is no longer a member of 954300906Sasomers * the pool. 955300906Sasomers */ 956300906Sasomers Close(); 957300906Sasomers return; 958300906Sasomers } 959300906Sasomers 960300906Sasomers } 961300906Sasomers 962300906Sasomers /* A fault condition has priority over a degrade condition */ 963300906Sasomers if (ShouldFault()) { 964300906Sasomers /* Fault the vdev and close the case. */ 965300906Sasomers if (zpool_vdev_fault(zhp, (uint64_t)m_vdevGUID, 966300906Sasomers VDEV_AUX_ERR_EXCEEDED) == 0) { 967300906Sasomers syslog(LOG_INFO, "Faulting vdev(%s/%s)", 968300906Sasomers PoolGUIDString().c_str(), 969300906Sasomers VdevGUIDString().c_str()); 970300906Sasomers Close(); 971300906Sasomers return; 972300906Sasomers } 973300906Sasomers else { 974300906Sasomers syslog(LOG_ERR, "Fault vdev(%s/%s): %s: %s\n", 975300906Sasomers PoolGUIDString().c_str(), 976300906Sasomers VdevGUIDString().c_str(), 977300906Sasomers libzfs_error_action(g_zfsHandle), 978300906Sasomers libzfs_error_description(g_zfsHandle)); 979300906Sasomers } 980300906Sasomers } 981300906Sasomers else if (ShouldDegrade()) { 982300906Sasomers /* Degrade the vdev and close the case. */ 983300906Sasomers if (zpool_vdev_degrade(zhp, (uint64_t)m_vdevGUID, 984300906Sasomers VDEV_AUX_ERR_EXCEEDED) == 0) { 985300906Sasomers syslog(LOG_INFO, "Degrading vdev(%s/%s)", 986300906Sasomers PoolGUIDString().c_str(), 987300906Sasomers VdevGUIDString().c_str()); 988300906Sasomers Close(); 989300906Sasomers return; 990300906Sasomers } 991300906Sasomers else { 992300906Sasomers syslog(LOG_ERR, "Degrade vdev(%s/%s): %s: %s\n", 993300906Sasomers PoolGUIDString().c_str(), 994300906Sasomers VdevGUIDString().c_str(), 995300906Sasomers libzfs_error_action(g_zfsHandle), 996300906Sasomers libzfs_error_description(g_zfsHandle)); 997300906Sasomers } 998300906Sasomers } 999300906Sasomers Serialize(); 1000300906Sasomers} 1001300906Sasomers 1002300906SasomersVdev 1003300906SasomersCaseFile::BeingReplacedBy(zpool_handle_t *zhp) { 1004300906Sasomers Vdev vd(zhp, CaseVdev(zhp)); 1005300906Sasomers std::list<Vdev> children; 1006300906Sasomers std::list<Vdev>::iterator children_it; 1007300906Sasomers 1008300906Sasomers Vdev parent(vd.Parent()); 1009300906Sasomers Vdev replacing(NonexistentVdev); 1010300906Sasomers 1011300906Sasomers /* 1012300906Sasomers * To determine whether we are being replaced by another spare that 1013300906Sasomers * is still working, then make sure that it is currently spared and 1014300906Sasomers * that the spare is either resilvering or healthy. If any of these 1015300906Sasomers * conditions fail, then we are not being replaced by a spare. 1016300906Sasomers * 1017300906Sasomers * If the spare is healthy, then the case file should be closed very 1018300906Sasomers * soon after this check. 1019300906Sasomers */ 1020300906Sasomers if (parent.DoesNotExist() 1021300906Sasomers || parent.Name(zhp, /*verbose*/false) != "spare") 1022300906Sasomers return (NonexistentVdev); 1023300906Sasomers 1024300906Sasomers children = parent.Children(); 1025300906Sasomers children_it = children.begin(); 1026300906Sasomers for (;children_it != children.end(); children_it++) { 1027300906Sasomers Vdev child = *children_it; 1028300906Sasomers 1029300906Sasomers /* Skip our vdev. */ 1030300906Sasomers if (child.GUID() == VdevGUID()) 1031300906Sasomers continue; 1032300906Sasomers /* 1033300906Sasomers * Accept the first child that doesn't match our GUID, or 1034300906Sasomers * any resilvering/healthy device if one exists. 1035300906Sasomers */ 1036300906Sasomers if (replacing.DoesNotExist() || child.IsResilvering() 1037300906Sasomers || child.State() == VDEV_STATE_HEALTHY) 1038300906Sasomers replacing = child; 1039300906Sasomers } 1040300906Sasomers 1041300906Sasomers return (replacing); 1042300906Sasomers} 1043300906Sasomers 1044300906Sasomersbool 1045300906SasomersCaseFile::Replace(const char* vdev_type, const char* path, bool isspare) { 1046300906Sasomers nvlist_t *nvroot, *newvd; 1047300906Sasomers const char *poolname; 1048300906Sasomers string oldstr(VdevGUIDString()); 1049300906Sasomers bool retval = true; 1050300906Sasomers 1051300906Sasomers /* Figure out what pool we're working on */ 1052300906Sasomers ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); 1053300906Sasomers zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front()); 1054300906Sasomers if (zhp == NULL) { 1055300906Sasomers syslog(LOG_ERR, "CaseFile::Replace: could not find pool for " 1056300919Sbdrewery "pool_guid %" PRIu64 ".", (uint64_t)m_poolGUID); 1057300906Sasomers return (false); 1058300906Sasomers } 1059300906Sasomers poolname = zpool_get_name(zhp); 1060300906Sasomers Vdev vd(zhp, CaseVdev(zhp)); 1061300906Sasomers Vdev replaced(BeingReplacedBy(zhp)); 1062300906Sasomers 1063300906Sasomers if (isspare && !vd.IsSpare() && !replaced.DoesNotExist()) { 1064300906Sasomers /* If we are already being replaced by a working spare, pass. */ 1065300906Sasomers if (replaced.IsResilvering() 1066300906Sasomers || replaced.State() == VDEV_STATE_HEALTHY) { 1067300906Sasomers syslog(LOG_INFO, "CaseFile::Replace(%s->%s): already " 1068300906Sasomers "replaced", VdevGUIDString().c_str(), path); 1069300906Sasomers return (/*consumed*/false); 1070300906Sasomers } 1071300906Sasomers /* 1072300906Sasomers * If we have already been replaced by a spare, but that spare 1073300906Sasomers * is broken, we must spare the spare, not the original device. 1074300906Sasomers */ 1075300906Sasomers oldstr = replaced.GUIDString(); 1076300906Sasomers syslog(LOG_INFO, "CaseFile::Replace(%s->%s): sparing " 1077300906Sasomers "broken spare %s instead", VdevGUIDString().c_str(), 1078300906Sasomers path, oldstr.c_str()); 1079300906Sasomers } 1080300906Sasomers 1081300906Sasomers /* 1082300906Sasomers * Build a root vdev/leaf vdev configuration suitable for 1083300906Sasomers * zpool_vdev_attach. Only enough data for the kernel to find 1084300906Sasomers * the device (i.e. type and disk device node path) are needed. 1085300906Sasomers */ 1086300906Sasomers nvroot = NULL; 1087300906Sasomers newvd = NULL; 1088300906Sasomers 1089300906Sasomers if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0 1090300906Sasomers || nvlist_alloc(&newvd, NV_UNIQUE_NAME, 0) != 0) { 1091300906Sasomers syslog(LOG_ERR, "Replace vdev(%s/%s): Unable to allocate " 1092300906Sasomers "configuration data.", poolname, oldstr.c_str()); 1093300906Sasomers if (nvroot != NULL) 1094300906Sasomers nvlist_free(nvroot); 1095300906Sasomers return (false); 1096300906Sasomers } 1097300906Sasomers if (nvlist_add_string(newvd, ZPOOL_CONFIG_TYPE, vdev_type) != 0 1098300906Sasomers || nvlist_add_string(newvd, ZPOOL_CONFIG_PATH, path) != 0 1099300906Sasomers || nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) != 0 1100300906Sasomers || nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, 1101300906Sasomers &newvd, 1) != 0) { 1102300906Sasomers syslog(LOG_ERR, "Replace vdev(%s/%s): Unable to initialize " 1103300906Sasomers "configuration data.", poolname, oldstr.c_str()); 1104300906Sasomers nvlist_free(newvd); 1105300906Sasomers nvlist_free(nvroot); 1106300906Sasomers return (true); 1107300906Sasomers } 1108300906Sasomers 1109300906Sasomers /* Data was copied when added to the root vdev. */ 1110300906Sasomers nvlist_free(newvd); 1111300906Sasomers 1112300906Sasomers retval = (zpool_vdev_attach(zhp, oldstr.c_str(), path, nvroot, 1113300906Sasomers /*replace*/B_TRUE) == 0); 1114300906Sasomers if (retval) 1115300906Sasomers syslog(LOG_INFO, "Replacing vdev(%s/%s) with %s\n", 1116300906Sasomers poolname, oldstr.c_str(), path); 1117300906Sasomers else 1118300906Sasomers syslog(LOG_ERR, "Replace vdev(%s/%s): %s: %s\n", 1119300906Sasomers poolname, oldstr.c_str(), libzfs_error_action(g_zfsHandle), 1120300906Sasomers libzfs_error_description(g_zfsHandle)); 1121300906Sasomers nvlist_free(nvroot); 1122300906Sasomers 1123300906Sasomers return (retval); 1124300906Sasomers} 1125300906Sasomers 1126300906Sasomers/* Does the argument event refer to a checksum error? */ 1127300906Sasomersstatic bool 1128300906SasomersIsChecksumEvent(const Event* const event) 1129300906Sasomers{ 1130300906Sasomers return ("ereport.fs.zfs.checksum" == event->Value("type")); 1131300906Sasomers} 1132300906Sasomers 1133300906Sasomers/* Does the argument event refer to an IO error? */ 1134300906Sasomersstatic bool 1135300906SasomersIsIOEvent(const Event* const event) 1136300906Sasomers{ 1137300906Sasomers return ("ereport.fs.zfs.io" == event->Value("type")); 1138300906Sasomers} 1139300906Sasomers 1140300906Sasomersbool 1141300906SasomersCaseFile::ShouldDegrade() const 1142300906Sasomers{ 1143300906Sasomers return (std::count_if(m_events.begin(), m_events.end(), 1144300906Sasomers IsChecksumEvent) > ZFS_DEGRADE_IO_COUNT); 1145300906Sasomers} 1146300906Sasomers 1147300906Sasomersbool 1148300906SasomersCaseFile::ShouldFault() const 1149300906Sasomers{ 1150300906Sasomers return (std::count_if(m_events.begin(), m_events.end(), 1151300906Sasomers IsIOEvent) > ZFS_DEGRADE_IO_COUNT); 1152300906Sasomers} 1153300906Sasomers 1154300906Sasomersnvlist_t * 1155300906SasomersCaseFile::CaseVdev(zpool_handle_t *zhp) const 1156300906Sasomers{ 1157300906Sasomers return (VdevIterator(zhp).Find(VdevGUID())); 1158300906Sasomers} 1159