case_file.cc revision 314431
1300906Sasomers/*- 2300906Sasomers * Copyright (c) 2011, 2012, 2013, 2014, 2016 Spectra Logic Corporation 3300906Sasomers * All rights reserved. 4300906Sasomers * 5300906Sasomers * Redistribution and use in source and binary forms, with or without 6300906Sasomers * modification, are permitted provided that the following conditions 7300906Sasomers * are met: 8300906Sasomers * 1. Redistributions of source code must retain the above copyright 9300906Sasomers * notice, this list of conditions, and the following disclaimer, 10300906Sasomers * without modification. 11300906Sasomers * 2. Redistributions in binary form must reproduce at minimum a disclaimer 12300906Sasomers * substantially similar to the "NO WARRANTY" disclaimer below 13300906Sasomers * ("Disclaimer") and any redistribution must be conditioned upon 14300906Sasomers * including a substantially similar Disclaimer requirement for further 15300906Sasomers * binary redistribution. 16300906Sasomers * 17300906Sasomers * NO WARRANTY 18300906Sasomers * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19300906Sasomers * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20300906Sasomers * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR 21300906Sasomers * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22300906Sasomers * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23300906Sasomers * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24300906Sasomers * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25300906Sasomers * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 26300906Sasomers * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 27300906Sasomers * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28300906Sasomers * POSSIBILITY OF SUCH DAMAGES. 29300906Sasomers * 30300906Sasomers * Authors: Justin T. Gibbs (Spectra Logic Corporation) 31300906Sasomers */ 32300906Sasomers 33300906Sasomers/** 34300906Sasomers * \file case_file.cc 35300906Sasomers * 36300906Sasomers * We keep case files for any leaf vdev that is not in the optimal state. 37300906Sasomers * However, we only serialize to disk those events that need to be preserved 38300906Sasomers * across reboots. For now, this is just a log of soft errors which we 39300906Sasomers * accumulate in order to mark a device as degraded. 40300906Sasomers */ 41300906Sasomers#include <sys/cdefs.h> 42300906Sasomers#include <sys/time.h> 43300906Sasomers 44300906Sasomers#include <sys/fs/zfs.h> 45300906Sasomers 46300906Sasomers#include <dirent.h> 47300906Sasomers#include <iomanip> 48300906Sasomers#include <fstream> 49300906Sasomers#include <functional> 50300906Sasomers#include <sstream> 51300906Sasomers#include <syslog.h> 52300906Sasomers#include <unistd.h> 53300906Sasomers 54300906Sasomers#include <libzfs.h> 55300906Sasomers 56300906Sasomers#include <list> 57300906Sasomers#include <map> 58300906Sasomers#include <string> 59300906Sasomers 60300906Sasomers#include <devdctl/guid.h> 61300906Sasomers#include <devdctl/event.h> 62300906Sasomers#include <devdctl/event_factory.h> 63300906Sasomers#include <devdctl/exception.h> 64300906Sasomers#include <devdctl/consumer.h> 65300906Sasomers 66300906Sasomers#include "callout.h" 67300906Sasomers#include "vdev_iterator.h" 68300906Sasomers#include "zfsd_event.h" 69300906Sasomers#include "case_file.h" 70300906Sasomers#include "vdev.h" 71300906Sasomers#include "zfsd.h" 72300906Sasomers#include "zfsd_exception.h" 73300906Sasomers#include "zpool_list.h" 74300906Sasomers 75300906Sasomers__FBSDID("$FreeBSD: stable/11/cddl/usr.sbin/zfsd/case_file.cc 314431 2017-02-28 23:03:51Z asomers $"); 76300906Sasomers 77300906Sasomers/*============================ Namespace Control =============================*/ 78300906Sasomersusing std::auto_ptr; 79300906Sasomersusing std::hex; 80300906Sasomersusing std::ifstream; 81300906Sasomersusing std::stringstream; 82300906Sasomersusing std::setfill; 83300906Sasomersusing std::setw; 84300906Sasomers 85300906Sasomersusing DevdCtl::Event; 86300906Sasomersusing DevdCtl::EventFactory; 87300906Sasomersusing DevdCtl::EventList; 88300906Sasomersusing DevdCtl::Guid; 89300906Sasomersusing DevdCtl::ParseException; 90300906Sasomers 91300906Sasomers/*--------------------------------- CaseFile ---------------------------------*/ 92300906Sasomers//- CaseFile Static Data ------------------------------------------------------- 93300906Sasomers 94300906SasomersCaseFileList CaseFile::s_activeCases; 95300906Sasomersconst string CaseFile::s_caseFilePath = "/var/db/zfsd/cases"; 96300906Sasomersconst timeval CaseFile::s_removeGracePeriod = { 60 /*sec*/, 0 /*usec*/}; 97300906Sasomers 98300906Sasomers//- CaseFile Static Public Methods --------------------------------------------- 99300906SasomersCaseFile * 100300906SasomersCaseFile::Find(Guid poolGUID, Guid vdevGUID) 101300906Sasomers{ 102300906Sasomers for (CaseFileList::iterator curCase = s_activeCases.begin(); 103300906Sasomers curCase != s_activeCases.end(); curCase++) { 104300906Sasomers 105300906Sasomers if ((*curCase)->PoolGUID() != poolGUID 106300906Sasomers || (*curCase)->VdevGUID() != vdevGUID) 107300906Sasomers continue; 108300906Sasomers 109300906Sasomers /* 110300906Sasomers * We only carry one active case per-vdev. 111300906Sasomers */ 112300906Sasomers return (*curCase); 113300906Sasomers } 114300906Sasomers return (NULL); 115300906Sasomers} 116300906Sasomers 117300906SasomersCaseFile * 118300906SasomersCaseFile::Find(const string &physPath) 119300906Sasomers{ 120300906Sasomers CaseFile *result = NULL; 121300906Sasomers 122300906Sasomers for (CaseFileList::iterator curCase = s_activeCases.begin(); 123300906Sasomers curCase != s_activeCases.end(); curCase++) { 124300906Sasomers 125300906Sasomers if ((*curCase)->PhysicalPath() != physPath) 126300906Sasomers continue; 127300906Sasomers 128300906Sasomers if (result != NULL) { 129300906Sasomers syslog(LOG_WARNING, "Multiple casefiles found for " 130300906Sasomers "physical path %s. " 131300906Sasomers "This is most likely a bug in zfsd", 132300906Sasomers physPath.c_str()); 133300906Sasomers } 134300906Sasomers result = *curCase; 135300906Sasomers } 136300906Sasomers return (result); 137300906Sasomers} 138300906Sasomers 139300906Sasomers 140300906Sasomersvoid 141300906SasomersCaseFile::ReEvaluateByGuid(Guid poolGUID, const ZfsEvent &event) 142300906Sasomers{ 143300906Sasomers CaseFileList::iterator casefile; 144300906Sasomers for (casefile = s_activeCases.begin(); casefile != s_activeCases.end();){ 145300906Sasomers CaseFileList::iterator next = casefile; 146300906Sasomers next++; 147300906Sasomers if (poolGUID == (*casefile)->PoolGUID()) 148300906Sasomers (*casefile)->ReEvaluate(event); 149300906Sasomers casefile = next; 150300906Sasomers } 151300906Sasomers} 152300906Sasomers 153300906SasomersCaseFile & 154300906SasomersCaseFile::Create(Vdev &vdev) 155300906Sasomers{ 156300906Sasomers CaseFile *activeCase; 157300906Sasomers 158300906Sasomers activeCase = Find(vdev.PoolGUID(), vdev.GUID()); 159300906Sasomers if (activeCase == NULL) 160300906Sasomers activeCase = new CaseFile(vdev); 161300906Sasomers 162300906Sasomers return (*activeCase); 163300906Sasomers} 164300906Sasomers 165300906Sasomersvoid 166300906SasomersCaseFile::DeSerialize() 167300906Sasomers{ 168300906Sasomers struct dirent **caseFiles; 169300906Sasomers 170300906Sasomers int numCaseFiles(scandir(s_caseFilePath.c_str(), &caseFiles, 171300906Sasomers DeSerializeSelector, /*compar*/NULL)); 172300906Sasomers 173300906Sasomers if (numCaseFiles == -1) 174300906Sasomers return; 175300906Sasomers if (numCaseFiles == 0) { 176300906Sasomers free(caseFiles); 177300906Sasomers return; 178300906Sasomers } 179300906Sasomers 180300906Sasomers for (int i = 0; i < numCaseFiles; i++) { 181300906Sasomers 182300906Sasomers DeSerializeFile(caseFiles[i]->d_name); 183300906Sasomers free(caseFiles[i]); 184300906Sasomers } 185300906Sasomers free(caseFiles); 186300906Sasomers} 187300906Sasomers 188300906Sasomersvoid 189300906SasomersCaseFile::LogAll() 190300906Sasomers{ 191300906Sasomers for (CaseFileList::iterator curCase = s_activeCases.begin(); 192300906Sasomers curCase != s_activeCases.end(); curCase++) 193300906Sasomers (*curCase)->Log(); 194300906Sasomers} 195300906Sasomers 196300906Sasomersvoid 197300906SasomersCaseFile::PurgeAll() 198300906Sasomers{ 199300906Sasomers /* 200300906Sasomers * Serialize casefiles before deleting them so that they can be reread 201300906Sasomers * and revalidated during BuildCaseFiles. 202300906Sasomers * CaseFiles remove themselves from this list on destruction. 203300906Sasomers */ 204300906Sasomers while (s_activeCases.size() != 0) { 205300906Sasomers CaseFile *casefile = s_activeCases.front(); 206300906Sasomers casefile->Serialize(); 207300906Sasomers delete casefile; 208300906Sasomers } 209300906Sasomers 210300906Sasomers} 211300906Sasomers 212300906Sasomers//- CaseFile Public Methods ---------------------------------------------------- 213300906Sasomersbool 214300906SasomersCaseFile::RefreshVdevState() 215300906Sasomers{ 216300906Sasomers ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); 217300906Sasomers zpool_handle_t *casePool(zpl.empty() ? NULL : zpl.front()); 218300906Sasomers if (casePool == NULL) 219300906Sasomers return (false); 220300906Sasomers 221300906Sasomers Vdev vd(casePool, CaseVdev(casePool)); 222300906Sasomers if (vd.DoesNotExist()) 223300906Sasomers return (false); 224300906Sasomers 225300906Sasomers m_vdevState = vd.State(); 226300906Sasomers m_vdevPhysPath = vd.PhysicalPath(); 227300906Sasomers return (true); 228300906Sasomers} 229300906Sasomers 230300906Sasomersbool 231300906SasomersCaseFile::ReEvaluate(const string &devPath, const string &physPath, Vdev *vdev) 232300906Sasomers{ 233300906Sasomers ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); 234300906Sasomers zpool_handle_t *pool(zpl.empty() ? NULL : zpl.front()); 235300906Sasomers 236300906Sasomers if (pool == NULL || !RefreshVdevState()) { 237300906Sasomers /* 238300906Sasomers * The pool or vdev for this case file is no longer 239300906Sasomers * part of the configuration. This can happen 240300906Sasomers * if we process a device arrival notification 241300906Sasomers * before seeing the ZFS configuration change 242300906Sasomers * event. 243300906Sasomers */ 244300906Sasomers syslog(LOG_INFO, 245300906Sasomers "CaseFile::ReEvaluate(%s,%s) Pool/Vdev unconfigured. " 246300906Sasomers "Closing\n", 247300906Sasomers PoolGUIDString().c_str(), 248300906Sasomers VdevGUIDString().c_str()); 249300906Sasomers Close(); 250300906Sasomers 251300906Sasomers /* 252300906Sasomers * Since this event was not used to close this 253300906Sasomers * case, do not report it as consumed. 254300906Sasomers */ 255300906Sasomers return (/*consumed*/false); 256300906Sasomers } 257300906Sasomers 258300906Sasomers if (VdevState() > VDEV_STATE_CANT_OPEN) { 259300906Sasomers /* 260300906Sasomers * For now, newly discovered devices only help for 261300906Sasomers * devices that are missing. In the future, we might 262300906Sasomers * use a newly inserted spare to replace a degraded 263300906Sasomers * or faulted device. 264300906Sasomers */ 265300906Sasomers syslog(LOG_INFO, "CaseFile::ReEvaluate(%s,%s): Pool/Vdev ignored", 266300906Sasomers PoolGUIDString().c_str(), VdevGUIDString().c_str()); 267300906Sasomers return (/*consumed*/false); 268300906Sasomers } 269300906Sasomers 270300906Sasomers if (vdev != NULL 271300906Sasomers && vdev->PoolGUID() == m_poolGUID 272300906Sasomers && vdev->GUID() == m_vdevGUID) { 273300906Sasomers 274300906Sasomers zpool_vdev_online(pool, vdev->GUIDString().c_str(), 275300906Sasomers ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE, 276300906Sasomers &m_vdevState); 277300906Sasomers syslog(LOG_INFO, "Onlined vdev(%s/%s:%s). State now %s.\n", 278300906Sasomers zpool_get_name(pool), vdev->GUIDString().c_str(), 279300906Sasomers devPath.c_str(), 280300906Sasomers zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); 281300906Sasomers 282300906Sasomers /* 283300906Sasomers * Check the vdev state post the online action to see 284300906Sasomers * if we can retire this case. 285300906Sasomers */ 286300906Sasomers CloseIfSolved(); 287300906Sasomers 288300906Sasomers return (/*consumed*/true); 289300906Sasomers } 290300906Sasomers 291300906Sasomers /* 292300906Sasomers * If the auto-replace policy is enabled, and we have physical 293300906Sasomers * path information, try a physical path replacement. 294300906Sasomers */ 295300906Sasomers if (zpool_get_prop_int(pool, ZPOOL_PROP_AUTOREPLACE, NULL) == 0) { 296300906Sasomers syslog(LOG_INFO, 297300906Sasomers "CaseFile(%s:%s:%s): AutoReplace not set. " 298300906Sasomers "Ignoring device insertion.\n", 299300906Sasomers PoolGUIDString().c_str(), 300300906Sasomers VdevGUIDString().c_str(), 301300906Sasomers zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); 302300906Sasomers return (/*consumed*/false); 303300906Sasomers } 304300906Sasomers 305300906Sasomers if (PhysicalPath().empty()) { 306300906Sasomers syslog(LOG_INFO, 307300906Sasomers "CaseFile(%s:%s:%s): No physical path information. " 308300906Sasomers "Ignoring device insertion.\n", 309300906Sasomers PoolGUIDString().c_str(), 310300906Sasomers VdevGUIDString().c_str(), 311300906Sasomers zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); 312300906Sasomers return (/*consumed*/false); 313300906Sasomers } 314300906Sasomers 315300906Sasomers if (physPath != PhysicalPath()) { 316300906Sasomers syslog(LOG_INFO, 317300906Sasomers "CaseFile(%s:%s:%s): Physical path mismatch. " 318300906Sasomers "Ignoring device insertion.\n", 319300906Sasomers PoolGUIDString().c_str(), 320300906Sasomers VdevGUIDString().c_str(), 321300906Sasomers zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); 322300906Sasomers return (/*consumed*/false); 323300906Sasomers } 324300906Sasomers 325300906Sasomers /* Write a label on the newly inserted disk. */ 326300906Sasomers if (zpool_label_disk(g_zfsHandle, pool, devPath.c_str()) != 0) { 327300906Sasomers syslog(LOG_ERR, 328300906Sasomers "Replace vdev(%s/%s) by physical path (label): %s: %s\n", 329300906Sasomers zpool_get_name(pool), VdevGUIDString().c_str(), 330300906Sasomers libzfs_error_action(g_zfsHandle), 331300906Sasomers libzfs_error_description(g_zfsHandle)); 332300906Sasomers return (/*consumed*/false); 333300906Sasomers } 334300906Sasomers 335300906Sasomers syslog(LOG_INFO, "CaseFile::ReEvaluate(%s/%s): Replacing with %s", 336300906Sasomers PoolGUIDString().c_str(), VdevGUIDString().c_str(), 337300906Sasomers devPath.c_str()); 338300906Sasomers return (Replace(VDEV_TYPE_DISK, devPath.c_str(), /*isspare*/false)); 339300906Sasomers} 340300906Sasomers 341300906Sasomersbool 342300906SasomersCaseFile::ReEvaluate(const ZfsEvent &event) 343300906Sasomers{ 344300906Sasomers bool consumed(false); 345300906Sasomers 346300906Sasomers if (event.Value("type") == "misc.fs.zfs.vdev_remove") { 347300906Sasomers /* 348300906Sasomers * The Vdev we represent has been removed from the 349300906Sasomers * configuration. This case is no longer of value. 350300906Sasomers */ 351300906Sasomers Close(); 352300906Sasomers 353300906Sasomers return (/*consumed*/true); 354300906Sasomers } else if (event.Value("type") == "misc.fs.zfs.pool_destroy") { 355300906Sasomers /* This Pool has been destroyed. Discard the case */ 356300906Sasomers Close(); 357300906Sasomers 358300906Sasomers return (/*consumed*/true); 359300906Sasomers } else if (event.Value("type") == "misc.fs.zfs.config_sync") { 360300906Sasomers RefreshVdevState(); 361300906Sasomers if (VdevState() < VDEV_STATE_HEALTHY) 362300906Sasomers consumed = ActivateSpare(); 363300906Sasomers } 364300906Sasomers 365300906Sasomers 366300906Sasomers if (event.Value("class") == "resource.fs.zfs.removed") { 367300906Sasomers bool spare_activated; 368300906Sasomers 369300906Sasomers if (!RefreshVdevState()) { 370300906Sasomers /* 371300906Sasomers * The pool or vdev for this case file is no longer 372300906Sasomers * part of the configuration. This can happen 373300906Sasomers * if we process a device arrival notification 374300906Sasomers * before seeing the ZFS configuration change 375300906Sasomers * event. 376300906Sasomers */ 377300906Sasomers syslog(LOG_INFO, 378300906Sasomers "CaseFile::ReEvaluate(%s,%s) Pool/Vdev " 379300906Sasomers "unconfigured. Closing\n", 380300906Sasomers PoolGUIDString().c_str(), 381300906Sasomers VdevGUIDString().c_str()); 382300906Sasomers /* 383300906Sasomers * Close the case now so we won't waste cycles in the 384300906Sasomers * system rescan 385300906Sasomers */ 386300906Sasomers Close(); 387300906Sasomers 388300906Sasomers /* 389300906Sasomers * Since this event was not used to close this 390300906Sasomers * case, do not report it as consumed. 391300906Sasomers */ 392300906Sasomers return (/*consumed*/false); 393300906Sasomers } 394300906Sasomers 395300906Sasomers /* 396300906Sasomers * Discard any tentative I/O error events for 397300906Sasomers * this case. They were most likely caused by the 398300906Sasomers * hot-unplug of this device. 399300906Sasomers */ 400300906Sasomers PurgeTentativeEvents(); 401300906Sasomers 402300906Sasomers /* Try to activate spares if they are available */ 403300906Sasomers spare_activated = ActivateSpare(); 404300906Sasomers 405300906Sasomers /* 406300906Sasomers * Rescan the drives in the system to see if a recent 407300906Sasomers * drive arrival can be used to solve this case. 408300906Sasomers */ 409300906Sasomers ZfsDaemon::RequestSystemRescan(); 410300906Sasomers 411300906Sasomers /* 412300906Sasomers * Consume the event if we successfully activated a spare. 413300906Sasomers * Otherwise, leave it in the unconsumed events list so that the 414300906Sasomers * future addition of a spare to this pool might be able to 415300906Sasomers * close the case 416300906Sasomers */ 417300906Sasomers consumed = spare_activated; 418300906Sasomers } else if (event.Value("class") == "resource.fs.zfs.statechange") { 419300906Sasomers RefreshVdevState(); 420300906Sasomers /* 421300906Sasomers * If this vdev is DEGRADED, FAULTED, or UNAVAIL, try to 422300906Sasomers * activate a hotspare. Otherwise, ignore the event 423300906Sasomers */ 424300906Sasomers if (VdevState() == VDEV_STATE_FAULTED || 425300906Sasomers VdevState() == VDEV_STATE_DEGRADED || 426300906Sasomers VdevState() == VDEV_STATE_CANT_OPEN) 427300906Sasomers (void) ActivateSpare(); 428300906Sasomers consumed = true; 429300906Sasomers } 430300906Sasomers else if (event.Value("class") == "ereport.fs.zfs.io" || 431300906Sasomers event.Value("class") == "ereport.fs.zfs.checksum") { 432300906Sasomers 433300906Sasomers m_tentativeEvents.push_front(event.DeepCopy()); 434300906Sasomers RegisterCallout(event); 435300906Sasomers consumed = true; 436300906Sasomers } 437300906Sasomers 438300906Sasomers bool closed(CloseIfSolved()); 439300906Sasomers 440300906Sasomers return (consumed || closed); 441300906Sasomers} 442300906Sasomers 443300906Sasomers 444300906Sasomersbool 445300906SasomersCaseFile::ActivateSpare() { 446300906Sasomers nvlist_t *config, *nvroot; 447300906Sasomers nvlist_t **spares; 448300906Sasomers char *devPath, *vdev_type; 449300906Sasomers const char *poolname; 450300906Sasomers u_int nspares, i; 451300906Sasomers int error; 452300906Sasomers 453300906Sasomers ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); 454300906Sasomers zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front()); 455300906Sasomers if (zhp == NULL) { 456300906Sasomers syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool " 457300919Sbdrewery "for pool_guid %" PRIu64".", (uint64_t)m_poolGUID); 458300906Sasomers return (false); 459300906Sasomers } 460300906Sasomers poolname = zpool_get_name(zhp); 461300906Sasomers config = zpool_get_config(zhp, NULL); 462300906Sasomers if (config == NULL) { 463300906Sasomers syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool " 464300906Sasomers "config for pool %s", poolname); 465300906Sasomers return (false); 466300906Sasomers } 467300906Sasomers error = nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot); 468300906Sasomers if (error != 0){ 469300906Sasomers syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find vdev " 470300906Sasomers "tree for pool %s", poolname); 471300906Sasomers return (false); 472300906Sasomers } 473300906Sasomers nspares = 0; 474300906Sasomers nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares, 475300906Sasomers &nspares); 476300906Sasomers if (nspares == 0) { 477300906Sasomers /* The pool has no spares configured */ 478300906Sasomers syslog(LOG_INFO, "CaseFile::ActivateSpare: " 479300906Sasomers "No spares available for pool %s", poolname); 480300906Sasomers return (false); 481300906Sasomers } 482300906Sasomers for (i = 0; i < nspares; i++) { 483300906Sasomers uint64_t *nvlist_array; 484300906Sasomers vdev_stat_t *vs; 485300906Sasomers uint_t nstats; 486300906Sasomers 487300906Sasomers if (nvlist_lookup_uint64_array(spares[i], 488300906Sasomers ZPOOL_CONFIG_VDEV_STATS, &nvlist_array, &nstats) != 0) { 489300906Sasomers syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not " 490300906Sasomers "find vdev stats for pool %s, spare %d", 491300906Sasomers poolname, i); 492300906Sasomers return (false); 493300906Sasomers } 494300906Sasomers vs = reinterpret_cast<vdev_stat_t *>(nvlist_array); 495300906Sasomers 496300906Sasomers if ((vs->vs_aux != VDEV_AUX_SPARED) 497300906Sasomers && (vs->vs_state == VDEV_STATE_HEALTHY)) { 498300906Sasomers /* We found a usable spare */ 499300906Sasomers break; 500300906Sasomers } 501300906Sasomers } 502300906Sasomers 503300906Sasomers if (i == nspares) { 504300906Sasomers /* No available spares were found */ 505300906Sasomers return (false); 506300906Sasomers } 507300906Sasomers 508300906Sasomers error = nvlist_lookup_string(spares[i], ZPOOL_CONFIG_PATH, &devPath); 509300906Sasomers if (error != 0) { 510300906Sasomers syslog(LOG_ERR, "CaseFile::ActivateSpare: Cannot determine " 511300906Sasomers "the path of pool %s, spare %d. Error %d", 512300906Sasomers poolname, i, error); 513300906Sasomers return (false); 514300906Sasomers } 515300906Sasomers 516300906Sasomers error = nvlist_lookup_string(spares[i], ZPOOL_CONFIG_TYPE, &vdev_type); 517300906Sasomers if (error != 0) { 518300906Sasomers syslog(LOG_ERR, "CaseFile::ActivateSpare: Cannot determine " 519300906Sasomers "the vdev type of pool %s, spare %d. Error %d", 520300906Sasomers poolname, i, error); 521300906Sasomers return (false); 522300906Sasomers } 523300906Sasomers 524300906Sasomers return (Replace(vdev_type, devPath, /*isspare*/true)); 525300906Sasomers} 526300906Sasomers 527300906Sasomersvoid 528300906SasomersCaseFile::RegisterCallout(const Event &event) 529300906Sasomers{ 530300906Sasomers timeval now, countdown, elapsed, timestamp, zero, remaining; 531300906Sasomers 532300906Sasomers gettimeofday(&now, 0); 533300906Sasomers timestamp = event.GetTimestamp(); 534300906Sasomers timersub(&now, ×tamp, &elapsed); 535300906Sasomers timersub(&s_removeGracePeriod, &elapsed, &countdown); 536300906Sasomers /* 537300906Sasomers * If countdown is <= zero, Reset the timer to the 538300906Sasomers * smallest positive time value instead 539300906Sasomers */ 540300906Sasomers timerclear(&zero); 541300906Sasomers if (timercmp(&countdown, &zero, <=)) { 542300906Sasomers timerclear(&countdown); 543300906Sasomers countdown.tv_usec = 1; 544300906Sasomers } 545300906Sasomers 546300906Sasomers remaining = m_tentativeTimer.TimeRemaining(); 547300906Sasomers 548300906Sasomers if (!m_tentativeTimer.IsPending() 549300906Sasomers || timercmp(&countdown, &remaining, <)) 550300906Sasomers m_tentativeTimer.Reset(countdown, OnGracePeriodEnded, this); 551300906Sasomers} 552300906Sasomers 553300906Sasomers 554300906Sasomersbool 555300906SasomersCaseFile::CloseIfSolved() 556300906Sasomers{ 557300906Sasomers if (m_events.empty() 558300906Sasomers && m_tentativeEvents.empty()) { 559300906Sasomers 560300906Sasomers /* 561300906Sasomers * We currently do not track or take actions on 562300906Sasomers * devices in the degraded or faulted state. 563300906Sasomers * Once we have support for spare pools, we'll 564300906Sasomers * retain these cases so that any spares added in 565300906Sasomers * the future can be applied to them. 566300906Sasomers */ 567300906Sasomers switch (VdevState()) { 568300906Sasomers case VDEV_STATE_HEALTHY: 569300906Sasomers /* No need to keep cases for healthy vdevs */ 570300906Sasomers Close(); 571300906Sasomers return (true); 572300906Sasomers case VDEV_STATE_REMOVED: 573300906Sasomers case VDEV_STATE_CANT_OPEN: 574300906Sasomers /* 575300906Sasomers * Keep open. We may solve it with a newly inserted 576300906Sasomers * device. 577300906Sasomers */ 578300906Sasomers case VDEV_STATE_FAULTED: 579300906Sasomers case VDEV_STATE_DEGRADED: 580300906Sasomers /* 581300906Sasomers * Keep open. We may solve it with the future 582300906Sasomers * addition of a spare to the pool 583300906Sasomers */ 584300906Sasomers case VDEV_STATE_UNKNOWN: 585300906Sasomers case VDEV_STATE_CLOSED: 586300906Sasomers case VDEV_STATE_OFFLINE: 587300906Sasomers /* 588300906Sasomers * Keep open? This may not be the correct behavior, 589300906Sasomers * but it's what we've always done 590300906Sasomers */ 591300906Sasomers ; 592300906Sasomers } 593300906Sasomers 594300906Sasomers /* 595300906Sasomers * Re-serialize the case in order to remove any 596300906Sasomers * previous event data. 597300906Sasomers */ 598300906Sasomers Serialize(); 599300906Sasomers } 600300906Sasomers 601300906Sasomers return (false); 602300906Sasomers} 603300906Sasomers 604300906Sasomersvoid 605300906SasomersCaseFile::Log() 606300906Sasomers{ 607300906Sasomers syslog(LOG_INFO, "CaseFile(%s,%s,%s)\n", PoolGUIDString().c_str(), 608300906Sasomers VdevGUIDString().c_str(), PhysicalPath().c_str()); 609300906Sasomers syslog(LOG_INFO, "\tVdev State = %s\n", 610300906Sasomers zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); 611300906Sasomers if (m_tentativeEvents.size() != 0) { 612300906Sasomers syslog(LOG_INFO, "\t=== Tentative Events ===\n"); 613300906Sasomers for (EventList::iterator event(m_tentativeEvents.begin()); 614300906Sasomers event != m_tentativeEvents.end(); event++) 615300906Sasomers (*event)->Log(LOG_INFO); 616300906Sasomers } 617300906Sasomers if (m_events.size() != 0) { 618300906Sasomers syslog(LOG_INFO, "\t=== Events ===\n"); 619300906Sasomers for (EventList::iterator event(m_events.begin()); 620300906Sasomers event != m_events.end(); event++) 621300906Sasomers (*event)->Log(LOG_INFO); 622300906Sasomers } 623300906Sasomers} 624300906Sasomers 625300906Sasomers//- CaseFile Static Protected Methods ------------------------------------------ 626300906Sasomersvoid 627300906SasomersCaseFile::OnGracePeriodEnded(void *arg) 628300906Sasomers{ 629300906Sasomers CaseFile &casefile(*static_cast<CaseFile *>(arg)); 630300906Sasomers 631300906Sasomers casefile.OnGracePeriodEnded(); 632300906Sasomers} 633300906Sasomers 634300906Sasomersint 635300906SasomersCaseFile::DeSerializeSelector(const struct dirent *dirEntry) 636300906Sasomers{ 637300906Sasomers uint64_t poolGUID; 638300906Sasomers uint64_t vdevGUID; 639300906Sasomers 640300906Sasomers if (dirEntry->d_type == DT_REG 641300919Sbdrewery && sscanf(dirEntry->d_name, "pool_%" PRIu64 "_vdev_%" PRIu64 ".case", 642300906Sasomers &poolGUID, &vdevGUID) == 2) 643300906Sasomers return (1); 644300906Sasomers return (0); 645300906Sasomers} 646300906Sasomers 647300906Sasomersvoid 648300906SasomersCaseFile::DeSerializeFile(const char *fileName) 649300906Sasomers{ 650300906Sasomers string fullName(s_caseFilePath + '/' + fileName); 651300906Sasomers CaseFile *existingCaseFile(NULL); 652300906Sasomers CaseFile *caseFile(NULL); 653300906Sasomers 654300906Sasomers try { 655300906Sasomers uint64_t poolGUID; 656300906Sasomers uint64_t vdevGUID; 657300906Sasomers nvlist_t *vdevConf; 658300906Sasomers 659314431Sasomers if (sscanf(fileName, "pool_%" PRIu64 "_vdev_%" PRIu64 ".case", 660314431Sasomers &poolGUID, &vdevGUID) != 2) { 661314431Sasomers throw ZfsdException("CaseFile::DeSerialize: " 662314431Sasomers "Unintelligible CaseFile filename %s.\n", fileName); 663314431Sasomers } 664300906Sasomers existingCaseFile = Find(Guid(poolGUID), Guid(vdevGUID)); 665300906Sasomers if (existingCaseFile != NULL) { 666300906Sasomers /* 667300906Sasomers * If the vdev is already degraded or faulted, 668300906Sasomers * there's no point in keeping the state around 669300906Sasomers * that we use to put a drive into the degraded 670300906Sasomers * state. However, if the vdev is simply missing, 671300906Sasomers * preserve the case data in the hopes that it will 672300906Sasomers * return. 673300906Sasomers */ 674300906Sasomers caseFile = existingCaseFile; 675300906Sasomers vdev_state curState(caseFile->VdevState()); 676300906Sasomers if (curState > VDEV_STATE_CANT_OPEN 677300906Sasomers && curState < VDEV_STATE_HEALTHY) { 678300906Sasomers unlink(fileName); 679300906Sasomers return; 680300906Sasomers } 681300906Sasomers } else { 682300906Sasomers ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID); 683300906Sasomers if (zpl.empty() 684300906Sasomers || (vdevConf = VdevIterator(zpl.front()) 685300906Sasomers .Find(vdevGUID)) == NULL) { 686300906Sasomers /* 687300906Sasomers * Either the pool no longer exists 688300906Sasomers * or this vdev is no longer a member of 689300906Sasomers * the pool. 690300906Sasomers */ 691300906Sasomers unlink(fullName.c_str()); 692300906Sasomers return; 693300906Sasomers } 694300906Sasomers 695300906Sasomers /* 696300906Sasomers * Any vdev we find that does not have a case file 697300906Sasomers * must be in the healthy state and thus worthy of 698300906Sasomers * continued SERD data tracking. 699300906Sasomers */ 700300906Sasomers caseFile = new CaseFile(Vdev(zpl.front(), vdevConf)); 701300906Sasomers } 702300906Sasomers 703300906Sasomers ifstream caseStream(fullName.c_str()); 704300906Sasomers if (!caseStream) 705300906Sasomers throw ZfsdException("CaseFile::DeSerialize: Unable to " 706300906Sasomers "read %s.\n", fileName); 707300906Sasomers 708300906Sasomers caseFile->DeSerialize(caseStream); 709300906Sasomers } catch (const ParseException &exp) { 710300906Sasomers 711300906Sasomers exp.Log(); 712300906Sasomers if (caseFile != existingCaseFile) 713300906Sasomers delete caseFile; 714300906Sasomers 715300906Sasomers /* 716300906Sasomers * Since we can't parse the file, unlink it so we don't 717300906Sasomers * trip over it again. 718300906Sasomers */ 719300906Sasomers unlink(fileName); 720300906Sasomers } catch (const ZfsdException &zfsException) { 721300906Sasomers 722300906Sasomers zfsException.Log(); 723300906Sasomers if (caseFile != existingCaseFile) 724300906Sasomers delete caseFile; 725300906Sasomers } 726300906Sasomers} 727300906Sasomers 728300906Sasomers//- CaseFile Protected Methods ------------------------------------------------- 729300906SasomersCaseFile::CaseFile(const Vdev &vdev) 730300906Sasomers : m_poolGUID(vdev.PoolGUID()), 731300906Sasomers m_vdevGUID(vdev.GUID()), 732300906Sasomers m_vdevState(vdev.State()), 733300906Sasomers m_vdevPhysPath(vdev.PhysicalPath()) 734300906Sasomers{ 735300906Sasomers stringstream guidString; 736300906Sasomers 737300906Sasomers guidString << m_vdevGUID; 738300906Sasomers m_vdevGUIDString = guidString.str(); 739300906Sasomers guidString.str(""); 740300906Sasomers guidString << m_poolGUID; 741300906Sasomers m_poolGUIDString = guidString.str(); 742300906Sasomers 743300906Sasomers s_activeCases.push_back(this); 744300906Sasomers 745300906Sasomers syslog(LOG_INFO, "Creating new CaseFile:\n"); 746300906Sasomers Log(); 747300906Sasomers} 748300906Sasomers 749300906SasomersCaseFile::~CaseFile() 750300906Sasomers{ 751300906Sasomers PurgeEvents(); 752300906Sasomers PurgeTentativeEvents(); 753300906Sasomers m_tentativeTimer.Stop(); 754300906Sasomers s_activeCases.remove(this); 755300906Sasomers} 756300906Sasomers 757300906Sasomersvoid 758300906SasomersCaseFile::PurgeEvents() 759300906Sasomers{ 760300906Sasomers for (EventList::iterator event(m_events.begin()); 761300906Sasomers event != m_events.end(); event++) 762300906Sasomers delete *event; 763300906Sasomers 764300906Sasomers m_events.clear(); 765300906Sasomers} 766300906Sasomers 767300906Sasomersvoid 768300906SasomersCaseFile::PurgeTentativeEvents() 769300906Sasomers{ 770300906Sasomers for (EventList::iterator event(m_tentativeEvents.begin()); 771300906Sasomers event != m_tentativeEvents.end(); event++) 772300906Sasomers delete *event; 773300906Sasomers 774300906Sasomers m_tentativeEvents.clear(); 775300906Sasomers} 776300906Sasomers 777300906Sasomersvoid 778300906SasomersCaseFile::SerializeEvList(const EventList events, int fd, 779300906Sasomers const char* prefix) const 780300906Sasomers{ 781300906Sasomers if (events.empty()) 782300906Sasomers return; 783300906Sasomers for (EventList::const_iterator curEvent = events.begin(); 784300906Sasomers curEvent != events.end(); curEvent++) { 785300906Sasomers const string &eventString((*curEvent)->GetEventString()); 786300906Sasomers 787300906Sasomers // TODO: replace many write(2) calls with a single writev(2) 788300906Sasomers if (prefix) 789300906Sasomers write(fd, prefix, strlen(prefix)); 790300906Sasomers write(fd, eventString.c_str(), eventString.length()); 791300906Sasomers } 792300906Sasomers} 793300906Sasomers 794300906Sasomersvoid 795300906SasomersCaseFile::Serialize() 796300906Sasomers{ 797300906Sasomers stringstream saveFile; 798300906Sasomers 799300906Sasomers saveFile << setfill('0') 800300906Sasomers << s_caseFilePath << "/" 801300906Sasomers << "pool_" << PoolGUIDString() 802300906Sasomers << "_vdev_" << VdevGUIDString() 803300906Sasomers << ".case"; 804300906Sasomers 805300906Sasomers if (m_events.empty() && m_tentativeEvents.empty()) { 806300906Sasomers unlink(saveFile.str().c_str()); 807300906Sasomers return; 808300906Sasomers } 809300906Sasomers 810300906Sasomers int fd(open(saveFile.str().c_str(), O_CREAT|O_TRUNC|O_WRONLY, 0644)); 811300906Sasomers if (fd == -1) { 812300906Sasomers syslog(LOG_ERR, "CaseFile::Serialize: Unable to open %s.\n", 813300906Sasomers saveFile.str().c_str()); 814300906Sasomers return; 815300906Sasomers } 816300906Sasomers SerializeEvList(m_events, fd); 817300906Sasomers SerializeEvList(m_tentativeEvents, fd, "tentative "); 818300906Sasomers close(fd); 819300906Sasomers} 820300906Sasomers 821300906Sasomers/* 822300906Sasomers * XXX: This method assumes that events may not contain embedded newlines. If 823300906Sasomers * ever events can contain embedded newlines, then CaseFile must switch 824300906Sasomers * serialization formats 825300906Sasomers */ 826300906Sasomersvoid 827300906SasomersCaseFile::DeSerialize(ifstream &caseStream) 828300906Sasomers{ 829300906Sasomers string evString; 830300906Sasomers const EventFactory &factory(ZfsDaemon::Get().GetFactory()); 831300906Sasomers 832300906Sasomers caseStream >> std::noskipws >> std::ws; 833300906Sasomers while (caseStream.good()) { 834300906Sasomers /* 835300906Sasomers * Outline: 836300906Sasomers * read the beginning of a line and check it for 837300906Sasomers * "tentative". If found, discard "tentative". 838300906Sasomers * Create a new event 839300906Sasomers * continue 840300906Sasomers */ 841300906Sasomers EventList* destEvents; 842300906Sasomers const string tentFlag("tentative "); 843300906Sasomers string line; 844300906Sasomers std::stringbuf lineBuf; 845300906Sasomers 846300906Sasomers caseStream.get(lineBuf); 847300906Sasomers caseStream.ignore(); /*discard the newline character*/ 848300906Sasomers line = lineBuf.str(); 849300906Sasomers if (line.compare(0, tentFlag.size(), tentFlag) == 0) { 850300906Sasomers /* Discard "tentative" */ 851300906Sasomers line.erase(0, tentFlag.size()); 852300906Sasomers destEvents = &m_tentativeEvents; 853300906Sasomers } else { 854300906Sasomers destEvents = &m_events; 855300906Sasomers } 856300906Sasomers Event *event(Event::CreateEvent(factory, line)); 857300906Sasomers if (event != NULL) { 858300906Sasomers destEvents->push_back(event); 859300906Sasomers RegisterCallout(*event); 860300906Sasomers } 861300906Sasomers } 862300906Sasomers} 863300906Sasomers 864300906Sasomersvoid 865300906SasomersCaseFile::Close() 866300906Sasomers{ 867300906Sasomers /* 868300906Sasomers * This case is no longer relevant. Clean up our 869300906Sasomers * serialization file, and delete the case. 870300906Sasomers */ 871300906Sasomers syslog(LOG_INFO, "CaseFile(%s,%s) closed - State %s\n", 872300906Sasomers PoolGUIDString().c_str(), VdevGUIDString().c_str(), 873300906Sasomers zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); 874300906Sasomers 875300906Sasomers /* 876300906Sasomers * Serialization of a Case with no event data, clears the 877300906Sasomers * Serialization data for that event. 878300906Sasomers */ 879300906Sasomers PurgeEvents(); 880300906Sasomers Serialize(); 881300906Sasomers 882300906Sasomers delete this; 883300906Sasomers} 884300906Sasomers 885300906Sasomersvoid 886300906SasomersCaseFile::OnGracePeriodEnded() 887300906Sasomers{ 888300906Sasomers bool should_fault, should_degrade; 889300906Sasomers ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); 890300906Sasomers zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front()); 891300906Sasomers 892300906Sasomers m_events.splice(m_events.begin(), m_tentativeEvents); 893300906Sasomers should_fault = ShouldFault(); 894300906Sasomers should_degrade = ShouldDegrade(); 895300906Sasomers 896300906Sasomers if (should_fault || should_degrade) { 897300906Sasomers if (zhp == NULL 898300906Sasomers || (VdevIterator(zhp).Find(m_vdevGUID)) == NULL) { 899300906Sasomers /* 900300906Sasomers * Either the pool no longer exists 901300906Sasomers * or this vdev is no longer a member of 902300906Sasomers * the pool. 903300906Sasomers */ 904300906Sasomers Close(); 905300906Sasomers return; 906300906Sasomers } 907300906Sasomers 908300906Sasomers } 909300906Sasomers 910300906Sasomers /* A fault condition has priority over a degrade condition */ 911300906Sasomers if (ShouldFault()) { 912300906Sasomers /* Fault the vdev and close the case. */ 913300906Sasomers if (zpool_vdev_fault(zhp, (uint64_t)m_vdevGUID, 914300906Sasomers VDEV_AUX_ERR_EXCEEDED) == 0) { 915300906Sasomers syslog(LOG_INFO, "Faulting vdev(%s/%s)", 916300906Sasomers PoolGUIDString().c_str(), 917300906Sasomers VdevGUIDString().c_str()); 918300906Sasomers Close(); 919300906Sasomers return; 920300906Sasomers } 921300906Sasomers else { 922300906Sasomers syslog(LOG_ERR, "Fault vdev(%s/%s): %s: %s\n", 923300906Sasomers PoolGUIDString().c_str(), 924300906Sasomers VdevGUIDString().c_str(), 925300906Sasomers libzfs_error_action(g_zfsHandle), 926300906Sasomers libzfs_error_description(g_zfsHandle)); 927300906Sasomers } 928300906Sasomers } 929300906Sasomers else if (ShouldDegrade()) { 930300906Sasomers /* Degrade the vdev and close the case. */ 931300906Sasomers if (zpool_vdev_degrade(zhp, (uint64_t)m_vdevGUID, 932300906Sasomers VDEV_AUX_ERR_EXCEEDED) == 0) { 933300906Sasomers syslog(LOG_INFO, "Degrading vdev(%s/%s)", 934300906Sasomers PoolGUIDString().c_str(), 935300906Sasomers VdevGUIDString().c_str()); 936300906Sasomers Close(); 937300906Sasomers return; 938300906Sasomers } 939300906Sasomers else { 940300906Sasomers syslog(LOG_ERR, "Degrade vdev(%s/%s): %s: %s\n", 941300906Sasomers PoolGUIDString().c_str(), 942300906Sasomers VdevGUIDString().c_str(), 943300906Sasomers libzfs_error_action(g_zfsHandle), 944300906Sasomers libzfs_error_description(g_zfsHandle)); 945300906Sasomers } 946300906Sasomers } 947300906Sasomers Serialize(); 948300906Sasomers} 949300906Sasomers 950300906SasomersVdev 951300906SasomersCaseFile::BeingReplacedBy(zpool_handle_t *zhp) { 952300906Sasomers Vdev vd(zhp, CaseVdev(zhp)); 953300906Sasomers std::list<Vdev> children; 954300906Sasomers std::list<Vdev>::iterator children_it; 955300906Sasomers 956300906Sasomers Vdev parent(vd.Parent()); 957300906Sasomers Vdev replacing(NonexistentVdev); 958300906Sasomers 959300906Sasomers /* 960300906Sasomers * To determine whether we are being replaced by another spare that 961300906Sasomers * is still working, then make sure that it is currently spared and 962300906Sasomers * that the spare is either resilvering or healthy. If any of these 963300906Sasomers * conditions fail, then we are not being replaced by a spare. 964300906Sasomers * 965300906Sasomers * If the spare is healthy, then the case file should be closed very 966300906Sasomers * soon after this check. 967300906Sasomers */ 968300906Sasomers if (parent.DoesNotExist() 969300906Sasomers || parent.Name(zhp, /*verbose*/false) != "spare") 970300906Sasomers return (NonexistentVdev); 971300906Sasomers 972300906Sasomers children = parent.Children(); 973300906Sasomers children_it = children.begin(); 974300906Sasomers for (;children_it != children.end(); children_it++) { 975300906Sasomers Vdev child = *children_it; 976300906Sasomers 977300906Sasomers /* Skip our vdev. */ 978300906Sasomers if (child.GUID() == VdevGUID()) 979300906Sasomers continue; 980300906Sasomers /* 981300906Sasomers * Accept the first child that doesn't match our GUID, or 982300906Sasomers * any resilvering/healthy device if one exists. 983300906Sasomers */ 984300906Sasomers if (replacing.DoesNotExist() || child.IsResilvering() 985300906Sasomers || child.State() == VDEV_STATE_HEALTHY) 986300906Sasomers replacing = child; 987300906Sasomers } 988300906Sasomers 989300906Sasomers return (replacing); 990300906Sasomers} 991300906Sasomers 992300906Sasomersbool 993300906SasomersCaseFile::Replace(const char* vdev_type, const char* path, bool isspare) { 994300906Sasomers nvlist_t *nvroot, *newvd; 995300906Sasomers const char *poolname; 996300906Sasomers string oldstr(VdevGUIDString()); 997300906Sasomers bool retval = true; 998300906Sasomers 999300906Sasomers /* Figure out what pool we're working on */ 1000300906Sasomers ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); 1001300906Sasomers zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front()); 1002300906Sasomers if (zhp == NULL) { 1003300906Sasomers syslog(LOG_ERR, "CaseFile::Replace: could not find pool for " 1004300919Sbdrewery "pool_guid %" PRIu64 ".", (uint64_t)m_poolGUID); 1005300906Sasomers return (false); 1006300906Sasomers } 1007300906Sasomers poolname = zpool_get_name(zhp); 1008300906Sasomers Vdev vd(zhp, CaseVdev(zhp)); 1009300906Sasomers Vdev replaced(BeingReplacedBy(zhp)); 1010300906Sasomers 1011300906Sasomers if (isspare && !vd.IsSpare() && !replaced.DoesNotExist()) { 1012300906Sasomers /* If we are already being replaced by a working spare, pass. */ 1013300906Sasomers if (replaced.IsResilvering() 1014300906Sasomers || replaced.State() == VDEV_STATE_HEALTHY) { 1015300906Sasomers syslog(LOG_INFO, "CaseFile::Replace(%s->%s): already " 1016300906Sasomers "replaced", VdevGUIDString().c_str(), path); 1017300906Sasomers return (/*consumed*/false); 1018300906Sasomers } 1019300906Sasomers /* 1020300906Sasomers * If we have already been replaced by a spare, but that spare 1021300906Sasomers * is broken, we must spare the spare, not the original device. 1022300906Sasomers */ 1023300906Sasomers oldstr = replaced.GUIDString(); 1024300906Sasomers syslog(LOG_INFO, "CaseFile::Replace(%s->%s): sparing " 1025300906Sasomers "broken spare %s instead", VdevGUIDString().c_str(), 1026300906Sasomers path, oldstr.c_str()); 1027300906Sasomers } 1028300906Sasomers 1029300906Sasomers /* 1030300906Sasomers * Build a root vdev/leaf vdev configuration suitable for 1031300906Sasomers * zpool_vdev_attach. Only enough data for the kernel to find 1032300906Sasomers * the device (i.e. type and disk device node path) are needed. 1033300906Sasomers */ 1034300906Sasomers nvroot = NULL; 1035300906Sasomers newvd = NULL; 1036300906Sasomers 1037300906Sasomers if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0 1038300906Sasomers || nvlist_alloc(&newvd, NV_UNIQUE_NAME, 0) != 0) { 1039300906Sasomers syslog(LOG_ERR, "Replace vdev(%s/%s): Unable to allocate " 1040300906Sasomers "configuration data.", poolname, oldstr.c_str()); 1041300906Sasomers if (nvroot != NULL) 1042300906Sasomers nvlist_free(nvroot); 1043300906Sasomers return (false); 1044300906Sasomers } 1045300906Sasomers if (nvlist_add_string(newvd, ZPOOL_CONFIG_TYPE, vdev_type) != 0 1046300906Sasomers || nvlist_add_string(newvd, ZPOOL_CONFIG_PATH, path) != 0 1047300906Sasomers || nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) != 0 1048300906Sasomers || nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, 1049300906Sasomers &newvd, 1) != 0) { 1050300906Sasomers syslog(LOG_ERR, "Replace vdev(%s/%s): Unable to initialize " 1051300906Sasomers "configuration data.", poolname, oldstr.c_str()); 1052300906Sasomers nvlist_free(newvd); 1053300906Sasomers nvlist_free(nvroot); 1054300906Sasomers return (true); 1055300906Sasomers } 1056300906Sasomers 1057300906Sasomers /* Data was copied when added to the root vdev. */ 1058300906Sasomers nvlist_free(newvd); 1059300906Sasomers 1060300906Sasomers retval = (zpool_vdev_attach(zhp, oldstr.c_str(), path, nvroot, 1061300906Sasomers /*replace*/B_TRUE) == 0); 1062300906Sasomers if (retval) 1063300906Sasomers syslog(LOG_INFO, "Replacing vdev(%s/%s) with %s\n", 1064300906Sasomers poolname, oldstr.c_str(), path); 1065300906Sasomers else 1066300906Sasomers syslog(LOG_ERR, "Replace vdev(%s/%s): %s: %s\n", 1067300906Sasomers poolname, oldstr.c_str(), libzfs_error_action(g_zfsHandle), 1068300906Sasomers libzfs_error_description(g_zfsHandle)); 1069300906Sasomers nvlist_free(nvroot); 1070300906Sasomers 1071300906Sasomers return (retval); 1072300906Sasomers} 1073300906Sasomers 1074300906Sasomers/* Does the argument event refer to a checksum error? */ 1075300906Sasomersstatic bool 1076300906SasomersIsChecksumEvent(const Event* const event) 1077300906Sasomers{ 1078300906Sasomers return ("ereport.fs.zfs.checksum" == event->Value("type")); 1079300906Sasomers} 1080300906Sasomers 1081300906Sasomers/* Does the argument event refer to an IO error? */ 1082300906Sasomersstatic bool 1083300906SasomersIsIOEvent(const Event* const event) 1084300906Sasomers{ 1085300906Sasomers return ("ereport.fs.zfs.io" == event->Value("type")); 1086300906Sasomers} 1087300906Sasomers 1088300906Sasomersbool 1089300906SasomersCaseFile::ShouldDegrade() const 1090300906Sasomers{ 1091300906Sasomers return (std::count_if(m_events.begin(), m_events.end(), 1092300906Sasomers IsChecksumEvent) > ZFS_DEGRADE_IO_COUNT); 1093300906Sasomers} 1094300906Sasomers 1095300906Sasomersbool 1096300906SasomersCaseFile::ShouldFault() const 1097300906Sasomers{ 1098300906Sasomers return (std::count_if(m_events.begin(), m_events.end(), 1099300906Sasomers IsIOEvent) > ZFS_DEGRADE_IO_COUNT); 1100300906Sasomers} 1101300906Sasomers 1102300906Sasomersnvlist_t * 1103300906SasomersCaseFile::CaseVdev(zpool_handle_t *zhp) const 1104300906Sasomers{ 1105300906Sasomers return (VdevIterator(zhp).Find(VdevGUID())); 1106300906Sasomers} 1107