case_file.cc revision 300906
1300906Sasomers/*- 2300906Sasomers * Copyright (c) 2011, 2012, 2013, 2014, 2016 Spectra Logic Corporation 3300906Sasomers * All rights reserved. 4300906Sasomers * 5300906Sasomers * Redistribution and use in source and binary forms, with or without 6300906Sasomers * modification, are permitted provided that the following conditions 7300906Sasomers * are met: 8300906Sasomers * 1. Redistributions of source code must retain the above copyright 9300906Sasomers * notice, this list of conditions, and the following disclaimer, 10300906Sasomers * without modification. 11300906Sasomers * 2. Redistributions in binary form must reproduce at minimum a disclaimer 12300906Sasomers * substantially similar to the "NO WARRANTY" disclaimer below 13300906Sasomers * ("Disclaimer") and any redistribution must be conditioned upon 14300906Sasomers * including a substantially similar Disclaimer requirement for further 15300906Sasomers * binary redistribution. 16300906Sasomers * 17300906Sasomers * NO WARRANTY 18300906Sasomers * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19300906Sasomers * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20300906Sasomers * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR 21300906Sasomers * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22300906Sasomers * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23300906Sasomers * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24300906Sasomers * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25300906Sasomers * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 26300906Sasomers * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 27300906Sasomers * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28300906Sasomers * POSSIBILITY OF SUCH DAMAGES. 29300906Sasomers * 30300906Sasomers * Authors: Justin T. Gibbs (Spectra Logic Corporation) 31300906Sasomers */ 32300906Sasomers 33300906Sasomers/** 34300906Sasomers * \file case_file.cc 35300906Sasomers * 36300906Sasomers * We keep case files for any leaf vdev that is not in the optimal state. 37300906Sasomers * However, we only serialize to disk those events that need to be preserved 38300906Sasomers * across reboots. For now, this is just a log of soft errors which we 39300906Sasomers * accumulate in order to mark a device as degraded. 40300906Sasomers */ 41300906Sasomers#include <sys/cdefs.h> 42300906Sasomers#include <sys/time.h> 43300906Sasomers 44300906Sasomers#include <sys/fs/zfs.h> 45300906Sasomers 46300906Sasomers#include <dirent.h> 47300906Sasomers#include <iomanip> 48300906Sasomers#include <fstream> 49300906Sasomers#include <functional> 50300906Sasomers#include <sstream> 51300906Sasomers#include <syslog.h> 52300906Sasomers#include <unistd.h> 53300906Sasomers 54300906Sasomers#include <libzfs.h> 55300906Sasomers 56300906Sasomers#include <list> 57300906Sasomers#include <map> 58300906Sasomers#include <string> 59300906Sasomers 60300906Sasomers#include <devdctl/guid.h> 61300906Sasomers#include <devdctl/event.h> 62300906Sasomers#include <devdctl/event_factory.h> 63300906Sasomers#include <devdctl/exception.h> 64300906Sasomers#include <devdctl/consumer.h> 65300906Sasomers 66300906Sasomers#include "callout.h" 67300906Sasomers#include "vdev_iterator.h" 68300906Sasomers#include "zfsd_event.h" 69300906Sasomers#include "case_file.h" 70300906Sasomers#include "vdev.h" 71300906Sasomers#include "zfsd.h" 72300906Sasomers#include "zfsd_exception.h" 73300906Sasomers#include "zpool_list.h" 74300906Sasomers 75300906Sasomers__FBSDID("$FreeBSD: head/cddl/usr.sbin/zfsd/case_file.cc 300906 2016-05-28 17:43:40Z asomers $"); 76300906Sasomers 77300906Sasomers/*============================ Namespace Control =============================*/ 78300906Sasomersusing std::auto_ptr; 79300906Sasomersusing std::hex; 80300906Sasomersusing std::ifstream; 81300906Sasomersusing std::stringstream; 82300906Sasomersusing std::setfill; 83300906Sasomersusing std::setw; 84300906Sasomers 85300906Sasomersusing DevdCtl::Event; 86300906Sasomersusing DevdCtl::EventBuffer; 87300906Sasomersusing DevdCtl::EventFactory; 88300906Sasomersusing DevdCtl::EventList; 89300906Sasomersusing DevdCtl::Guid; 90300906Sasomersusing DevdCtl::ParseException; 91300906Sasomers 92300906Sasomers/*--------------------------------- CaseFile ---------------------------------*/ 93300906Sasomers//- CaseFile Static Data ------------------------------------------------------- 94300906Sasomers 95300906SasomersCaseFileList CaseFile::s_activeCases; 96300906Sasomersconst string CaseFile::s_caseFilePath = "/var/db/zfsd/cases"; 97300906Sasomersconst timeval CaseFile::s_removeGracePeriod = { 60 /*sec*/, 0 /*usec*/}; 98300906Sasomers 99300906Sasomers//- CaseFile Static Public Methods --------------------------------------------- 100300906SasomersCaseFile * 101300906SasomersCaseFile::Find(Guid poolGUID, Guid vdevGUID) 102300906Sasomers{ 103300906Sasomers for (CaseFileList::iterator curCase = s_activeCases.begin(); 104300906Sasomers curCase != s_activeCases.end(); curCase++) { 105300906Sasomers 106300906Sasomers if ((*curCase)->PoolGUID() != poolGUID 107300906Sasomers || (*curCase)->VdevGUID() != vdevGUID) 108300906Sasomers continue; 109300906Sasomers 110300906Sasomers /* 111300906Sasomers * We only carry one active case per-vdev. 112300906Sasomers */ 113300906Sasomers return (*curCase); 114300906Sasomers } 115300906Sasomers return (NULL); 116300906Sasomers} 117300906Sasomers 118300906SasomersCaseFile * 119300906SasomersCaseFile::Find(const string &physPath) 120300906Sasomers{ 121300906Sasomers CaseFile *result = NULL; 122300906Sasomers 123300906Sasomers for (CaseFileList::iterator curCase = s_activeCases.begin(); 124300906Sasomers curCase != s_activeCases.end(); curCase++) { 125300906Sasomers 126300906Sasomers if ((*curCase)->PhysicalPath() != physPath) 127300906Sasomers continue; 128300906Sasomers 129300906Sasomers if (result != NULL) { 130300906Sasomers syslog(LOG_WARNING, "Multiple casefiles found for " 131300906Sasomers "physical path %s. " 132300906Sasomers "This is most likely a bug in zfsd", 133300906Sasomers physPath.c_str()); 134300906Sasomers } 135300906Sasomers result = *curCase; 136300906Sasomers } 137300906Sasomers return (result); 138300906Sasomers} 139300906Sasomers 140300906Sasomers 141300906Sasomersvoid 142300906SasomersCaseFile::ReEvaluateByGuid(Guid poolGUID, const ZfsEvent &event) 143300906Sasomers{ 144300906Sasomers CaseFileList::iterator casefile; 145300906Sasomers for (casefile = s_activeCases.begin(); casefile != s_activeCases.end();){ 146300906Sasomers CaseFileList::iterator next = casefile; 147300906Sasomers next++; 148300906Sasomers if (poolGUID == (*casefile)->PoolGUID()) 149300906Sasomers (*casefile)->ReEvaluate(event); 150300906Sasomers casefile = next; 151300906Sasomers } 152300906Sasomers} 153300906Sasomers 154300906SasomersCaseFile & 155300906SasomersCaseFile::Create(Vdev &vdev) 156300906Sasomers{ 157300906Sasomers CaseFile *activeCase; 158300906Sasomers 159300906Sasomers activeCase = Find(vdev.PoolGUID(), vdev.GUID()); 160300906Sasomers if (activeCase == NULL) 161300906Sasomers activeCase = new CaseFile(vdev); 162300906Sasomers 163300906Sasomers return (*activeCase); 164300906Sasomers} 165300906Sasomers 166300906Sasomersvoid 167300906SasomersCaseFile::DeSerialize() 168300906Sasomers{ 169300906Sasomers struct dirent **caseFiles; 170300906Sasomers 171300906Sasomers int numCaseFiles(scandir(s_caseFilePath.c_str(), &caseFiles, 172300906Sasomers DeSerializeSelector, /*compar*/NULL)); 173300906Sasomers 174300906Sasomers if (numCaseFiles == -1) 175300906Sasomers return; 176300906Sasomers if (numCaseFiles == 0) { 177300906Sasomers free(caseFiles); 178300906Sasomers return; 179300906Sasomers } 180300906Sasomers 181300906Sasomers for (int i = 0; i < numCaseFiles; i++) { 182300906Sasomers 183300906Sasomers DeSerializeFile(caseFiles[i]->d_name); 184300906Sasomers free(caseFiles[i]); 185300906Sasomers } 186300906Sasomers free(caseFiles); 187300906Sasomers} 188300906Sasomers 189300906Sasomersvoid 190300906SasomersCaseFile::LogAll() 191300906Sasomers{ 192300906Sasomers for (CaseFileList::iterator curCase = s_activeCases.begin(); 193300906Sasomers curCase != s_activeCases.end(); curCase++) 194300906Sasomers (*curCase)->Log(); 195300906Sasomers} 196300906Sasomers 197300906Sasomersvoid 198300906SasomersCaseFile::PurgeAll() 199300906Sasomers{ 200300906Sasomers /* 201300906Sasomers * Serialize casefiles before deleting them so that they can be reread 202300906Sasomers * and revalidated during BuildCaseFiles. 203300906Sasomers * CaseFiles remove themselves from this list on destruction. 204300906Sasomers */ 205300906Sasomers while (s_activeCases.size() != 0) { 206300906Sasomers CaseFile *casefile = s_activeCases.front(); 207300906Sasomers casefile->Serialize(); 208300906Sasomers delete casefile; 209300906Sasomers } 210300906Sasomers 211300906Sasomers} 212300906Sasomers 213300906Sasomers//- CaseFile Public Methods ---------------------------------------------------- 214300906Sasomersbool 215300906SasomersCaseFile::RefreshVdevState() 216300906Sasomers{ 217300906Sasomers ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); 218300906Sasomers zpool_handle_t *casePool(zpl.empty() ? NULL : zpl.front()); 219300906Sasomers if (casePool == NULL) 220300906Sasomers return (false); 221300906Sasomers 222300906Sasomers Vdev vd(casePool, CaseVdev(casePool)); 223300906Sasomers if (vd.DoesNotExist()) 224300906Sasomers return (false); 225300906Sasomers 226300906Sasomers m_vdevState = vd.State(); 227300906Sasomers m_vdevPhysPath = vd.PhysicalPath(); 228300906Sasomers return (true); 229300906Sasomers} 230300906Sasomers 231300906Sasomersbool 232300906SasomersCaseFile::ReEvaluate(const string &devPath, const string &physPath, Vdev *vdev) 233300906Sasomers{ 234300906Sasomers ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); 235300906Sasomers zpool_handle_t *pool(zpl.empty() ? NULL : zpl.front()); 236300906Sasomers 237300906Sasomers if (pool == NULL || !RefreshVdevState()) { 238300906Sasomers /* 239300906Sasomers * The pool or vdev for this case file is no longer 240300906Sasomers * part of the configuration. This can happen 241300906Sasomers * if we process a device arrival notification 242300906Sasomers * before seeing the ZFS configuration change 243300906Sasomers * event. 244300906Sasomers */ 245300906Sasomers syslog(LOG_INFO, 246300906Sasomers "CaseFile::ReEvaluate(%s,%s) Pool/Vdev unconfigured. " 247300906Sasomers "Closing\n", 248300906Sasomers PoolGUIDString().c_str(), 249300906Sasomers VdevGUIDString().c_str()); 250300906Sasomers Close(); 251300906Sasomers 252300906Sasomers /* 253300906Sasomers * Since this event was not used to close this 254300906Sasomers * case, do not report it as consumed. 255300906Sasomers */ 256300906Sasomers return (/*consumed*/false); 257300906Sasomers } 258300906Sasomers 259300906Sasomers if (VdevState() > VDEV_STATE_CANT_OPEN) { 260300906Sasomers /* 261300906Sasomers * For now, newly discovered devices only help for 262300906Sasomers * devices that are missing. In the future, we might 263300906Sasomers * use a newly inserted spare to replace a degraded 264300906Sasomers * or faulted device. 265300906Sasomers */ 266300906Sasomers syslog(LOG_INFO, "CaseFile::ReEvaluate(%s,%s): Pool/Vdev ignored", 267300906Sasomers PoolGUIDString().c_str(), VdevGUIDString().c_str()); 268300906Sasomers return (/*consumed*/false); 269300906Sasomers } 270300906Sasomers 271300906Sasomers if (vdev != NULL 272300906Sasomers && vdev->PoolGUID() == m_poolGUID 273300906Sasomers && vdev->GUID() == m_vdevGUID) { 274300906Sasomers 275300906Sasomers zpool_vdev_online(pool, vdev->GUIDString().c_str(), 276300906Sasomers ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE, 277300906Sasomers &m_vdevState); 278300906Sasomers syslog(LOG_INFO, "Onlined vdev(%s/%s:%s). State now %s.\n", 279300906Sasomers zpool_get_name(pool), vdev->GUIDString().c_str(), 280300906Sasomers devPath.c_str(), 281300906Sasomers zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); 282300906Sasomers 283300906Sasomers /* 284300906Sasomers * Check the vdev state post the online action to see 285300906Sasomers * if we can retire this case. 286300906Sasomers */ 287300906Sasomers CloseIfSolved(); 288300906Sasomers 289300906Sasomers return (/*consumed*/true); 290300906Sasomers } 291300906Sasomers 292300906Sasomers /* 293300906Sasomers * If the auto-replace policy is enabled, and we have physical 294300906Sasomers * path information, try a physical path replacement. 295300906Sasomers */ 296300906Sasomers if (zpool_get_prop_int(pool, ZPOOL_PROP_AUTOREPLACE, NULL) == 0) { 297300906Sasomers syslog(LOG_INFO, 298300906Sasomers "CaseFile(%s:%s:%s): AutoReplace not set. " 299300906Sasomers "Ignoring device insertion.\n", 300300906Sasomers PoolGUIDString().c_str(), 301300906Sasomers VdevGUIDString().c_str(), 302300906Sasomers zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); 303300906Sasomers return (/*consumed*/false); 304300906Sasomers } 305300906Sasomers 306300906Sasomers if (PhysicalPath().empty()) { 307300906Sasomers syslog(LOG_INFO, 308300906Sasomers "CaseFile(%s:%s:%s): No physical path information. " 309300906Sasomers "Ignoring device insertion.\n", 310300906Sasomers PoolGUIDString().c_str(), 311300906Sasomers VdevGUIDString().c_str(), 312300906Sasomers zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); 313300906Sasomers return (/*consumed*/false); 314300906Sasomers } 315300906Sasomers 316300906Sasomers if (physPath != PhysicalPath()) { 317300906Sasomers syslog(LOG_INFO, 318300906Sasomers "CaseFile(%s:%s:%s): Physical path mismatch. " 319300906Sasomers "Ignoring device insertion.\n", 320300906Sasomers PoolGUIDString().c_str(), 321300906Sasomers VdevGUIDString().c_str(), 322300906Sasomers zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); 323300906Sasomers return (/*consumed*/false); 324300906Sasomers } 325300906Sasomers 326300906Sasomers /* Write a label on the newly inserted disk. */ 327300906Sasomers if (zpool_label_disk(g_zfsHandle, pool, devPath.c_str()) != 0) { 328300906Sasomers syslog(LOG_ERR, 329300906Sasomers "Replace vdev(%s/%s) by physical path (label): %s: %s\n", 330300906Sasomers zpool_get_name(pool), VdevGUIDString().c_str(), 331300906Sasomers libzfs_error_action(g_zfsHandle), 332300906Sasomers libzfs_error_description(g_zfsHandle)); 333300906Sasomers return (/*consumed*/false); 334300906Sasomers } 335300906Sasomers 336300906Sasomers syslog(LOG_INFO, "CaseFile::ReEvaluate(%s/%s): Replacing with %s", 337300906Sasomers PoolGUIDString().c_str(), VdevGUIDString().c_str(), 338300906Sasomers devPath.c_str()); 339300906Sasomers return (Replace(VDEV_TYPE_DISK, devPath.c_str(), /*isspare*/false)); 340300906Sasomers} 341300906Sasomers 342300906Sasomersbool 343300906SasomersCaseFile::ReEvaluate(const ZfsEvent &event) 344300906Sasomers{ 345300906Sasomers bool consumed(false); 346300906Sasomers 347300906Sasomers if (event.Value("type") == "misc.fs.zfs.vdev_remove") { 348300906Sasomers /* 349300906Sasomers * The Vdev we represent has been removed from the 350300906Sasomers * configuration. This case is no longer of value. 351300906Sasomers */ 352300906Sasomers Close(); 353300906Sasomers 354300906Sasomers return (/*consumed*/true); 355300906Sasomers } else if (event.Value("type") == "misc.fs.zfs.pool_destroy") { 356300906Sasomers /* This Pool has been destroyed. Discard the case */ 357300906Sasomers Close(); 358300906Sasomers 359300906Sasomers return (/*consumed*/true); 360300906Sasomers } else if (event.Value("type") == "misc.fs.zfs.config_sync") { 361300906Sasomers RefreshVdevState(); 362300906Sasomers if (VdevState() < VDEV_STATE_HEALTHY) 363300906Sasomers consumed = ActivateSpare(); 364300906Sasomers } 365300906Sasomers 366300906Sasomers 367300906Sasomers if (event.Value("class") == "resource.fs.zfs.removed") { 368300906Sasomers bool spare_activated; 369300906Sasomers 370300906Sasomers if (!RefreshVdevState()) { 371300906Sasomers /* 372300906Sasomers * The pool or vdev for this case file is no longer 373300906Sasomers * part of the configuration. This can happen 374300906Sasomers * if we process a device arrival notification 375300906Sasomers * before seeing the ZFS configuration change 376300906Sasomers * event. 377300906Sasomers */ 378300906Sasomers syslog(LOG_INFO, 379300906Sasomers "CaseFile::ReEvaluate(%s,%s) Pool/Vdev " 380300906Sasomers "unconfigured. Closing\n", 381300906Sasomers PoolGUIDString().c_str(), 382300906Sasomers VdevGUIDString().c_str()); 383300906Sasomers /* 384300906Sasomers * Close the case now so we won't waste cycles in the 385300906Sasomers * system rescan 386300906Sasomers */ 387300906Sasomers Close(); 388300906Sasomers 389300906Sasomers /* 390300906Sasomers * Since this event was not used to close this 391300906Sasomers * case, do not report it as consumed. 392300906Sasomers */ 393300906Sasomers return (/*consumed*/false); 394300906Sasomers } 395300906Sasomers 396300906Sasomers /* 397300906Sasomers * Discard any tentative I/O error events for 398300906Sasomers * this case. They were most likely caused by the 399300906Sasomers * hot-unplug of this device. 400300906Sasomers */ 401300906Sasomers PurgeTentativeEvents(); 402300906Sasomers 403300906Sasomers /* Try to activate spares if they are available */ 404300906Sasomers spare_activated = ActivateSpare(); 405300906Sasomers 406300906Sasomers /* 407300906Sasomers * Rescan the drives in the system to see if a recent 408300906Sasomers * drive arrival can be used to solve this case. 409300906Sasomers */ 410300906Sasomers ZfsDaemon::RequestSystemRescan(); 411300906Sasomers 412300906Sasomers /* 413300906Sasomers * Consume the event if we successfully activated a spare. 414300906Sasomers * Otherwise, leave it in the unconsumed events list so that the 415300906Sasomers * future addition of a spare to this pool might be able to 416300906Sasomers * close the case 417300906Sasomers */ 418300906Sasomers consumed = spare_activated; 419300906Sasomers } else if (event.Value("class") == "resource.fs.zfs.statechange") { 420300906Sasomers RefreshVdevState(); 421300906Sasomers /* 422300906Sasomers * If this vdev is DEGRADED, FAULTED, or UNAVAIL, try to 423300906Sasomers * activate a hotspare. Otherwise, ignore the event 424300906Sasomers */ 425300906Sasomers if (VdevState() == VDEV_STATE_FAULTED || 426300906Sasomers VdevState() == VDEV_STATE_DEGRADED || 427300906Sasomers VdevState() == VDEV_STATE_CANT_OPEN) 428300906Sasomers (void) ActivateSpare(); 429300906Sasomers consumed = true; 430300906Sasomers } 431300906Sasomers else if (event.Value("class") == "ereport.fs.zfs.io" || 432300906Sasomers event.Value("class") == "ereport.fs.zfs.checksum") { 433300906Sasomers 434300906Sasomers m_tentativeEvents.push_front(event.DeepCopy()); 435300906Sasomers RegisterCallout(event); 436300906Sasomers consumed = true; 437300906Sasomers } 438300906Sasomers 439300906Sasomers bool closed(CloseIfSolved()); 440300906Sasomers 441300906Sasomers return (consumed || closed); 442300906Sasomers} 443300906Sasomers 444300906Sasomers 445300906Sasomersbool 446300906SasomersCaseFile::ActivateSpare() { 447300906Sasomers nvlist_t *config, *nvroot; 448300906Sasomers nvlist_t **spares; 449300906Sasomers char *devPath, *vdev_type; 450300906Sasomers const char *poolname; 451300906Sasomers u_int nspares, i; 452300906Sasomers int error; 453300906Sasomers 454300906Sasomers ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); 455300906Sasomers zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front()); 456300906Sasomers if (zhp == NULL) { 457300906Sasomers syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool " 458300906Sasomers "for pool_guid %"PRIu64".", (uint64_t)m_poolGUID); 459300906Sasomers return (false); 460300906Sasomers } 461300906Sasomers poolname = zpool_get_name(zhp); 462300906Sasomers config = zpool_get_config(zhp, NULL); 463300906Sasomers if (config == NULL) { 464300906Sasomers syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool " 465300906Sasomers "config for pool %s", poolname); 466300906Sasomers return (false); 467300906Sasomers } 468300906Sasomers error = nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot); 469300906Sasomers if (error != 0){ 470300906Sasomers syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find vdev " 471300906Sasomers "tree for pool %s", poolname); 472300906Sasomers return (false); 473300906Sasomers } 474300906Sasomers nspares = 0; 475300906Sasomers nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares, 476300906Sasomers &nspares); 477300906Sasomers if (nspares == 0) { 478300906Sasomers /* The pool has no spares configured */ 479300906Sasomers syslog(LOG_INFO, "CaseFile::ActivateSpare: " 480300906Sasomers "No spares available for pool %s", poolname); 481300906Sasomers return (false); 482300906Sasomers } 483300906Sasomers for (i = 0; i < nspares; i++) { 484300906Sasomers uint64_t *nvlist_array; 485300906Sasomers vdev_stat_t *vs; 486300906Sasomers uint_t nstats; 487300906Sasomers 488300906Sasomers if (nvlist_lookup_uint64_array(spares[i], 489300906Sasomers ZPOOL_CONFIG_VDEV_STATS, &nvlist_array, &nstats) != 0) { 490300906Sasomers syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not " 491300906Sasomers "find vdev stats for pool %s, spare %d", 492300906Sasomers poolname, i); 493300906Sasomers return (false); 494300906Sasomers } 495300906Sasomers vs = reinterpret_cast<vdev_stat_t *>(nvlist_array); 496300906Sasomers 497300906Sasomers if ((vs->vs_aux != VDEV_AUX_SPARED) 498300906Sasomers && (vs->vs_state == VDEV_STATE_HEALTHY)) { 499300906Sasomers /* We found a usable spare */ 500300906Sasomers break; 501300906Sasomers } 502300906Sasomers } 503300906Sasomers 504300906Sasomers if (i == nspares) { 505300906Sasomers /* No available spares were found */ 506300906Sasomers return (false); 507300906Sasomers } 508300906Sasomers 509300906Sasomers error = nvlist_lookup_string(spares[i], ZPOOL_CONFIG_PATH, &devPath); 510300906Sasomers if (error != 0) { 511300906Sasomers syslog(LOG_ERR, "CaseFile::ActivateSpare: Cannot determine " 512300906Sasomers "the path of pool %s, spare %d. Error %d", 513300906Sasomers poolname, i, error); 514300906Sasomers return (false); 515300906Sasomers } 516300906Sasomers 517300906Sasomers error = nvlist_lookup_string(spares[i], ZPOOL_CONFIG_TYPE, &vdev_type); 518300906Sasomers if (error != 0) { 519300906Sasomers syslog(LOG_ERR, "CaseFile::ActivateSpare: Cannot determine " 520300906Sasomers "the vdev type of pool %s, spare %d. Error %d", 521300906Sasomers poolname, i, error); 522300906Sasomers return (false); 523300906Sasomers } 524300906Sasomers 525300906Sasomers return (Replace(vdev_type, devPath, /*isspare*/true)); 526300906Sasomers} 527300906Sasomers 528300906Sasomersvoid 529300906SasomersCaseFile::RegisterCallout(const Event &event) 530300906Sasomers{ 531300906Sasomers timeval now, countdown, elapsed, timestamp, zero, remaining; 532300906Sasomers 533300906Sasomers gettimeofday(&now, 0); 534300906Sasomers timestamp = event.GetTimestamp(); 535300906Sasomers timersub(&now, ×tamp, &elapsed); 536300906Sasomers timersub(&s_removeGracePeriod, &elapsed, &countdown); 537300906Sasomers /* 538300906Sasomers * If countdown is <= zero, Reset the timer to the 539300906Sasomers * smallest positive time value instead 540300906Sasomers */ 541300906Sasomers timerclear(&zero); 542300906Sasomers if (timercmp(&countdown, &zero, <=)) { 543300906Sasomers timerclear(&countdown); 544300906Sasomers countdown.tv_usec = 1; 545300906Sasomers } 546300906Sasomers 547300906Sasomers remaining = m_tentativeTimer.TimeRemaining(); 548300906Sasomers 549300906Sasomers if (!m_tentativeTimer.IsPending() 550300906Sasomers || timercmp(&countdown, &remaining, <)) 551300906Sasomers m_tentativeTimer.Reset(countdown, OnGracePeriodEnded, this); 552300906Sasomers} 553300906Sasomers 554300906Sasomers 555300906Sasomersbool 556300906SasomersCaseFile::CloseIfSolved() 557300906Sasomers{ 558300906Sasomers if (m_events.empty() 559300906Sasomers && m_tentativeEvents.empty()) { 560300906Sasomers 561300906Sasomers /* 562300906Sasomers * We currently do not track or take actions on 563300906Sasomers * devices in the degraded or faulted state. 564300906Sasomers * Once we have support for spare pools, we'll 565300906Sasomers * retain these cases so that any spares added in 566300906Sasomers * the future can be applied to them. 567300906Sasomers */ 568300906Sasomers switch (VdevState()) { 569300906Sasomers case VDEV_STATE_HEALTHY: 570300906Sasomers /* No need to keep cases for healthy vdevs */ 571300906Sasomers Close(); 572300906Sasomers return (true); 573300906Sasomers case VDEV_STATE_REMOVED: 574300906Sasomers case VDEV_STATE_CANT_OPEN: 575300906Sasomers /* 576300906Sasomers * Keep open. We may solve it with a newly inserted 577300906Sasomers * device. 578300906Sasomers */ 579300906Sasomers case VDEV_STATE_FAULTED: 580300906Sasomers case VDEV_STATE_DEGRADED: 581300906Sasomers /* 582300906Sasomers * Keep open. We may solve it with the future 583300906Sasomers * addition of a spare to the pool 584300906Sasomers */ 585300906Sasomers case VDEV_STATE_UNKNOWN: 586300906Sasomers case VDEV_STATE_CLOSED: 587300906Sasomers case VDEV_STATE_OFFLINE: 588300906Sasomers /* 589300906Sasomers * Keep open? This may not be the correct behavior, 590300906Sasomers * but it's what we've always done 591300906Sasomers */ 592300906Sasomers ; 593300906Sasomers } 594300906Sasomers 595300906Sasomers /* 596300906Sasomers * Re-serialize the case in order to remove any 597300906Sasomers * previous event data. 598300906Sasomers */ 599300906Sasomers Serialize(); 600300906Sasomers } 601300906Sasomers 602300906Sasomers return (false); 603300906Sasomers} 604300906Sasomers 605300906Sasomersvoid 606300906SasomersCaseFile::Log() 607300906Sasomers{ 608300906Sasomers syslog(LOG_INFO, "CaseFile(%s,%s,%s)\n", PoolGUIDString().c_str(), 609300906Sasomers VdevGUIDString().c_str(), PhysicalPath().c_str()); 610300906Sasomers syslog(LOG_INFO, "\tVdev State = %s\n", 611300906Sasomers zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); 612300906Sasomers if (m_tentativeEvents.size() != 0) { 613300906Sasomers syslog(LOG_INFO, "\t=== Tentative Events ===\n"); 614300906Sasomers for (EventList::iterator event(m_tentativeEvents.begin()); 615300906Sasomers event != m_tentativeEvents.end(); event++) 616300906Sasomers (*event)->Log(LOG_INFO); 617300906Sasomers } 618300906Sasomers if (m_events.size() != 0) { 619300906Sasomers syslog(LOG_INFO, "\t=== Events ===\n"); 620300906Sasomers for (EventList::iterator event(m_events.begin()); 621300906Sasomers event != m_events.end(); event++) 622300906Sasomers (*event)->Log(LOG_INFO); 623300906Sasomers } 624300906Sasomers} 625300906Sasomers 626300906Sasomers//- CaseFile Static Protected Methods ------------------------------------------ 627300906Sasomersvoid 628300906SasomersCaseFile::OnGracePeriodEnded(void *arg) 629300906Sasomers{ 630300906Sasomers CaseFile &casefile(*static_cast<CaseFile *>(arg)); 631300906Sasomers 632300906Sasomers casefile.OnGracePeriodEnded(); 633300906Sasomers} 634300906Sasomers 635300906Sasomersint 636300906SasomersCaseFile::DeSerializeSelector(const struct dirent *dirEntry) 637300906Sasomers{ 638300906Sasomers uint64_t poolGUID; 639300906Sasomers uint64_t vdevGUID; 640300906Sasomers 641300906Sasomers if (dirEntry->d_type == DT_REG 642300906Sasomers && sscanf(dirEntry->d_name, "pool_%"PRIu64"_vdev_%"PRIu64".case", 643300906Sasomers &poolGUID, &vdevGUID) == 2) 644300906Sasomers return (1); 645300906Sasomers return (0); 646300906Sasomers} 647300906Sasomers 648300906Sasomersvoid 649300906SasomersCaseFile::DeSerializeFile(const char *fileName) 650300906Sasomers{ 651300906Sasomers string fullName(s_caseFilePath + '/' + fileName); 652300906Sasomers CaseFile *existingCaseFile(NULL); 653300906Sasomers CaseFile *caseFile(NULL); 654300906Sasomers 655300906Sasomers try { 656300906Sasomers uint64_t poolGUID; 657300906Sasomers uint64_t vdevGUID; 658300906Sasomers nvlist_t *vdevConf; 659300906Sasomers 660300906Sasomers sscanf(fileName, "pool_%"PRIu64"_vdev_%"PRIu64".case", 661300906Sasomers &poolGUID, &vdevGUID); 662300906Sasomers existingCaseFile = Find(Guid(poolGUID), Guid(vdevGUID)); 663300906Sasomers if (existingCaseFile != NULL) { 664300906Sasomers /* 665300906Sasomers * If the vdev is already degraded or faulted, 666300906Sasomers * there's no point in keeping the state around 667300906Sasomers * that we use to put a drive into the degraded 668300906Sasomers * state. However, if the vdev is simply missing, 669300906Sasomers * preserve the case data in the hopes that it will 670300906Sasomers * return. 671300906Sasomers */ 672300906Sasomers caseFile = existingCaseFile; 673300906Sasomers vdev_state curState(caseFile->VdevState()); 674300906Sasomers if (curState > VDEV_STATE_CANT_OPEN 675300906Sasomers && curState < VDEV_STATE_HEALTHY) { 676300906Sasomers unlink(fileName); 677300906Sasomers return; 678300906Sasomers } 679300906Sasomers } else { 680300906Sasomers ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID); 681300906Sasomers if (zpl.empty() 682300906Sasomers || (vdevConf = VdevIterator(zpl.front()) 683300906Sasomers .Find(vdevGUID)) == NULL) { 684300906Sasomers /* 685300906Sasomers * Either the pool no longer exists 686300906Sasomers * or this vdev is no longer a member of 687300906Sasomers * the pool. 688300906Sasomers */ 689300906Sasomers unlink(fullName.c_str()); 690300906Sasomers return; 691300906Sasomers } 692300906Sasomers 693300906Sasomers /* 694300906Sasomers * Any vdev we find that does not have a case file 695300906Sasomers * must be in the healthy state and thus worthy of 696300906Sasomers * continued SERD data tracking. 697300906Sasomers */ 698300906Sasomers caseFile = new CaseFile(Vdev(zpl.front(), vdevConf)); 699300906Sasomers } 700300906Sasomers 701300906Sasomers ifstream caseStream(fullName.c_str()); 702300906Sasomers if (!caseStream) 703300906Sasomers throw ZfsdException("CaseFile::DeSerialize: Unable to " 704300906Sasomers "read %s.\n", fileName); 705300906Sasomers 706300906Sasomers caseFile->DeSerialize(caseStream); 707300906Sasomers } catch (const ParseException &exp) { 708300906Sasomers 709300906Sasomers exp.Log(); 710300906Sasomers if (caseFile != existingCaseFile) 711300906Sasomers delete caseFile; 712300906Sasomers 713300906Sasomers /* 714300906Sasomers * Since we can't parse the file, unlink it so we don't 715300906Sasomers * trip over it again. 716300906Sasomers */ 717300906Sasomers unlink(fileName); 718300906Sasomers } catch (const ZfsdException &zfsException) { 719300906Sasomers 720300906Sasomers zfsException.Log(); 721300906Sasomers if (caseFile != existingCaseFile) 722300906Sasomers delete caseFile; 723300906Sasomers } 724300906Sasomers} 725300906Sasomers 726300906Sasomers//- CaseFile Protected Methods ------------------------------------------------- 727300906SasomersCaseFile::CaseFile(const Vdev &vdev) 728300906Sasomers : m_poolGUID(vdev.PoolGUID()), 729300906Sasomers m_vdevGUID(vdev.GUID()), 730300906Sasomers m_vdevState(vdev.State()), 731300906Sasomers m_vdevPhysPath(vdev.PhysicalPath()) 732300906Sasomers{ 733300906Sasomers stringstream guidString; 734300906Sasomers 735300906Sasomers guidString << m_vdevGUID; 736300906Sasomers m_vdevGUIDString = guidString.str(); 737300906Sasomers guidString.str(""); 738300906Sasomers guidString << m_poolGUID; 739300906Sasomers m_poolGUIDString = guidString.str(); 740300906Sasomers 741300906Sasomers s_activeCases.push_back(this); 742300906Sasomers 743300906Sasomers syslog(LOG_INFO, "Creating new CaseFile:\n"); 744300906Sasomers Log(); 745300906Sasomers} 746300906Sasomers 747300906SasomersCaseFile::~CaseFile() 748300906Sasomers{ 749300906Sasomers PurgeEvents(); 750300906Sasomers PurgeTentativeEvents(); 751300906Sasomers m_tentativeTimer.Stop(); 752300906Sasomers s_activeCases.remove(this); 753300906Sasomers} 754300906Sasomers 755300906Sasomersvoid 756300906SasomersCaseFile::PurgeEvents() 757300906Sasomers{ 758300906Sasomers for (EventList::iterator event(m_events.begin()); 759300906Sasomers event != m_events.end(); event++) 760300906Sasomers delete *event; 761300906Sasomers 762300906Sasomers m_events.clear(); 763300906Sasomers} 764300906Sasomers 765300906Sasomersvoid 766300906SasomersCaseFile::PurgeTentativeEvents() 767300906Sasomers{ 768300906Sasomers for (EventList::iterator event(m_tentativeEvents.begin()); 769300906Sasomers event != m_tentativeEvents.end(); event++) 770300906Sasomers delete *event; 771300906Sasomers 772300906Sasomers m_tentativeEvents.clear(); 773300906Sasomers} 774300906Sasomers 775300906Sasomersvoid 776300906SasomersCaseFile::SerializeEvList(const EventList events, int fd, 777300906Sasomers const char* prefix) const 778300906Sasomers{ 779300906Sasomers if (events.empty()) 780300906Sasomers return; 781300906Sasomers for (EventList::const_iterator curEvent = events.begin(); 782300906Sasomers curEvent != events.end(); curEvent++) { 783300906Sasomers const string &eventString((*curEvent)->GetEventString()); 784300906Sasomers 785300906Sasomers // TODO: replace many write(2) calls with a single writev(2) 786300906Sasomers if (prefix) 787300906Sasomers write(fd, prefix, strlen(prefix)); 788300906Sasomers write(fd, eventString.c_str(), eventString.length()); 789300906Sasomers } 790300906Sasomers} 791300906Sasomers 792300906Sasomersvoid 793300906SasomersCaseFile::Serialize() 794300906Sasomers{ 795300906Sasomers stringstream saveFile; 796300906Sasomers 797300906Sasomers saveFile << setfill('0') 798300906Sasomers << s_caseFilePath << "/" 799300906Sasomers << "pool_" << PoolGUIDString() 800300906Sasomers << "_vdev_" << VdevGUIDString() 801300906Sasomers << ".case"; 802300906Sasomers 803300906Sasomers if (m_events.empty() && m_tentativeEvents.empty()) { 804300906Sasomers unlink(saveFile.str().c_str()); 805300906Sasomers return; 806300906Sasomers } 807300906Sasomers 808300906Sasomers int fd(open(saveFile.str().c_str(), O_CREAT|O_TRUNC|O_WRONLY, 0644)); 809300906Sasomers if (fd == -1) { 810300906Sasomers syslog(LOG_ERR, "CaseFile::Serialize: Unable to open %s.\n", 811300906Sasomers saveFile.str().c_str()); 812300906Sasomers return; 813300906Sasomers } 814300906Sasomers SerializeEvList(m_events, fd); 815300906Sasomers SerializeEvList(m_tentativeEvents, fd, "tentative "); 816300906Sasomers close(fd); 817300906Sasomers} 818300906Sasomers 819300906Sasomers/* 820300906Sasomers * XXX: This method assumes that events may not contain embedded newlines. If 821300906Sasomers * ever events can contain embedded newlines, then CaseFile must switch 822300906Sasomers * serialization formats 823300906Sasomers */ 824300906Sasomersvoid 825300906SasomersCaseFile::DeSerialize(ifstream &caseStream) 826300906Sasomers{ 827300906Sasomers string evString; 828300906Sasomers const EventFactory &factory(ZfsDaemon::Get().GetFactory()); 829300906Sasomers 830300906Sasomers caseStream >> std::noskipws >> std::ws; 831300906Sasomers while (caseStream.good()) { 832300906Sasomers /* 833300906Sasomers * Outline: 834300906Sasomers * read the beginning of a line and check it for 835300906Sasomers * "tentative". If found, discard "tentative". 836300906Sasomers * Create a new event 837300906Sasomers * continue 838300906Sasomers */ 839300906Sasomers EventList* destEvents; 840300906Sasomers const string tentFlag("tentative "); 841300906Sasomers string line; 842300906Sasomers std::stringbuf lineBuf; 843300906Sasomers 844300906Sasomers caseStream.get(lineBuf); 845300906Sasomers caseStream.ignore(); /*discard the newline character*/ 846300906Sasomers line = lineBuf.str(); 847300906Sasomers if (line.compare(0, tentFlag.size(), tentFlag) == 0) { 848300906Sasomers /* Discard "tentative" */ 849300906Sasomers line.erase(0, tentFlag.size()); 850300906Sasomers destEvents = &m_tentativeEvents; 851300906Sasomers } else { 852300906Sasomers destEvents = &m_events; 853300906Sasomers } 854300906Sasomers Event *event(Event::CreateEvent(factory, line)); 855300906Sasomers if (event != NULL) { 856300906Sasomers destEvents->push_back(event); 857300906Sasomers RegisterCallout(*event); 858300906Sasomers } 859300906Sasomers } 860300906Sasomers} 861300906Sasomers 862300906Sasomersvoid 863300906SasomersCaseFile::Close() 864300906Sasomers{ 865300906Sasomers /* 866300906Sasomers * This case is no longer relevant. Clean up our 867300906Sasomers * serialization file, and delete the case. 868300906Sasomers */ 869300906Sasomers syslog(LOG_INFO, "CaseFile(%s,%s) closed - State %s\n", 870300906Sasomers PoolGUIDString().c_str(), VdevGUIDString().c_str(), 871300906Sasomers zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); 872300906Sasomers 873300906Sasomers /* 874300906Sasomers * Serialization of a Case with no event data, clears the 875300906Sasomers * Serialization data for that event. 876300906Sasomers */ 877300906Sasomers PurgeEvents(); 878300906Sasomers Serialize(); 879300906Sasomers 880300906Sasomers delete this; 881300906Sasomers} 882300906Sasomers 883300906Sasomersvoid 884300906SasomersCaseFile::OnGracePeriodEnded() 885300906Sasomers{ 886300906Sasomers bool should_fault, should_degrade; 887300906Sasomers ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); 888300906Sasomers zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front()); 889300906Sasomers 890300906Sasomers m_events.splice(m_events.begin(), m_tentativeEvents); 891300906Sasomers should_fault = ShouldFault(); 892300906Sasomers should_degrade = ShouldDegrade(); 893300906Sasomers 894300906Sasomers if (should_fault || should_degrade) { 895300906Sasomers if (zhp == NULL 896300906Sasomers || (VdevIterator(zhp).Find(m_vdevGUID)) == NULL) { 897300906Sasomers /* 898300906Sasomers * Either the pool no longer exists 899300906Sasomers * or this vdev is no longer a member of 900300906Sasomers * the pool. 901300906Sasomers */ 902300906Sasomers Close(); 903300906Sasomers return; 904300906Sasomers } 905300906Sasomers 906300906Sasomers } 907300906Sasomers 908300906Sasomers /* A fault condition has priority over a degrade condition */ 909300906Sasomers if (ShouldFault()) { 910300906Sasomers /* Fault the vdev and close the case. */ 911300906Sasomers if (zpool_vdev_fault(zhp, (uint64_t)m_vdevGUID, 912300906Sasomers VDEV_AUX_ERR_EXCEEDED) == 0) { 913300906Sasomers syslog(LOG_INFO, "Faulting vdev(%s/%s)", 914300906Sasomers PoolGUIDString().c_str(), 915300906Sasomers VdevGUIDString().c_str()); 916300906Sasomers Close(); 917300906Sasomers return; 918300906Sasomers } 919300906Sasomers else { 920300906Sasomers syslog(LOG_ERR, "Fault vdev(%s/%s): %s: %s\n", 921300906Sasomers PoolGUIDString().c_str(), 922300906Sasomers VdevGUIDString().c_str(), 923300906Sasomers libzfs_error_action(g_zfsHandle), 924300906Sasomers libzfs_error_description(g_zfsHandle)); 925300906Sasomers } 926300906Sasomers } 927300906Sasomers else if (ShouldDegrade()) { 928300906Sasomers /* Degrade the vdev and close the case. */ 929300906Sasomers if (zpool_vdev_degrade(zhp, (uint64_t)m_vdevGUID, 930300906Sasomers VDEV_AUX_ERR_EXCEEDED) == 0) { 931300906Sasomers syslog(LOG_INFO, "Degrading vdev(%s/%s)", 932300906Sasomers PoolGUIDString().c_str(), 933300906Sasomers VdevGUIDString().c_str()); 934300906Sasomers Close(); 935300906Sasomers return; 936300906Sasomers } 937300906Sasomers else { 938300906Sasomers syslog(LOG_ERR, "Degrade vdev(%s/%s): %s: %s\n", 939300906Sasomers PoolGUIDString().c_str(), 940300906Sasomers VdevGUIDString().c_str(), 941300906Sasomers libzfs_error_action(g_zfsHandle), 942300906Sasomers libzfs_error_description(g_zfsHandle)); 943300906Sasomers } 944300906Sasomers } 945300906Sasomers Serialize(); 946300906Sasomers} 947300906Sasomers 948300906SasomersVdev 949300906SasomersCaseFile::BeingReplacedBy(zpool_handle_t *zhp) { 950300906Sasomers Vdev vd(zhp, CaseVdev(zhp)); 951300906Sasomers std::list<Vdev> children; 952300906Sasomers std::list<Vdev>::iterator children_it; 953300906Sasomers 954300906Sasomers Vdev parent(vd.Parent()); 955300906Sasomers Vdev replacing(NonexistentVdev); 956300906Sasomers 957300906Sasomers /* 958300906Sasomers * To determine whether we are being replaced by another spare that 959300906Sasomers * is still working, then make sure that it is currently spared and 960300906Sasomers * that the spare is either resilvering or healthy. If any of these 961300906Sasomers * conditions fail, then we are not being replaced by a spare. 962300906Sasomers * 963300906Sasomers * If the spare is healthy, then the case file should be closed very 964300906Sasomers * soon after this check. 965300906Sasomers */ 966300906Sasomers if (parent.DoesNotExist() 967300906Sasomers || parent.Name(zhp, /*verbose*/false) != "spare") 968300906Sasomers return (NonexistentVdev); 969300906Sasomers 970300906Sasomers children = parent.Children(); 971300906Sasomers children_it = children.begin(); 972300906Sasomers for (;children_it != children.end(); children_it++) { 973300906Sasomers Vdev child = *children_it; 974300906Sasomers 975300906Sasomers /* Skip our vdev. */ 976300906Sasomers if (child.GUID() == VdevGUID()) 977300906Sasomers continue; 978300906Sasomers /* 979300906Sasomers * Accept the first child that doesn't match our GUID, or 980300906Sasomers * any resilvering/healthy device if one exists. 981300906Sasomers */ 982300906Sasomers if (replacing.DoesNotExist() || child.IsResilvering() 983300906Sasomers || child.State() == VDEV_STATE_HEALTHY) 984300906Sasomers replacing = child; 985300906Sasomers } 986300906Sasomers 987300906Sasomers return (replacing); 988300906Sasomers} 989300906Sasomers 990300906Sasomersbool 991300906SasomersCaseFile::Replace(const char* vdev_type, const char* path, bool isspare) { 992300906Sasomers nvlist_t *nvroot, *newvd; 993300906Sasomers const char *poolname; 994300906Sasomers string oldstr(VdevGUIDString()); 995300906Sasomers bool retval = true; 996300906Sasomers 997300906Sasomers /* Figure out what pool we're working on */ 998300906Sasomers ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); 999300906Sasomers zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front()); 1000300906Sasomers if (zhp == NULL) { 1001300906Sasomers syslog(LOG_ERR, "CaseFile::Replace: could not find pool for " 1002300906Sasomers "pool_guid %"PRIu64".", (uint64_t)m_poolGUID); 1003300906Sasomers return (false); 1004300906Sasomers } 1005300906Sasomers poolname = zpool_get_name(zhp); 1006300906Sasomers Vdev vd(zhp, CaseVdev(zhp)); 1007300906Sasomers Vdev replaced(BeingReplacedBy(zhp)); 1008300906Sasomers 1009300906Sasomers if (isspare && !vd.IsSpare() && !replaced.DoesNotExist()) { 1010300906Sasomers /* If we are already being replaced by a working spare, pass. */ 1011300906Sasomers if (replaced.IsResilvering() 1012300906Sasomers || replaced.State() == VDEV_STATE_HEALTHY) { 1013300906Sasomers syslog(LOG_INFO, "CaseFile::Replace(%s->%s): already " 1014300906Sasomers "replaced", VdevGUIDString().c_str(), path); 1015300906Sasomers return (/*consumed*/false); 1016300906Sasomers } 1017300906Sasomers /* 1018300906Sasomers * If we have already been replaced by a spare, but that spare 1019300906Sasomers * is broken, we must spare the spare, not the original device. 1020300906Sasomers */ 1021300906Sasomers oldstr = replaced.GUIDString(); 1022300906Sasomers syslog(LOG_INFO, "CaseFile::Replace(%s->%s): sparing " 1023300906Sasomers "broken spare %s instead", VdevGUIDString().c_str(), 1024300906Sasomers path, oldstr.c_str()); 1025300906Sasomers } 1026300906Sasomers 1027300906Sasomers /* 1028300906Sasomers * Build a root vdev/leaf vdev configuration suitable for 1029300906Sasomers * zpool_vdev_attach. Only enough data for the kernel to find 1030300906Sasomers * the device (i.e. type and disk device node path) are needed. 1031300906Sasomers */ 1032300906Sasomers nvroot = NULL; 1033300906Sasomers newvd = NULL; 1034300906Sasomers 1035300906Sasomers if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0 1036300906Sasomers || nvlist_alloc(&newvd, NV_UNIQUE_NAME, 0) != 0) { 1037300906Sasomers syslog(LOG_ERR, "Replace vdev(%s/%s): Unable to allocate " 1038300906Sasomers "configuration data.", poolname, oldstr.c_str()); 1039300906Sasomers if (nvroot != NULL) 1040300906Sasomers nvlist_free(nvroot); 1041300906Sasomers return (false); 1042300906Sasomers } 1043300906Sasomers if (nvlist_add_string(newvd, ZPOOL_CONFIG_TYPE, vdev_type) != 0 1044300906Sasomers || nvlist_add_string(newvd, ZPOOL_CONFIG_PATH, path) != 0 1045300906Sasomers || nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) != 0 1046300906Sasomers || nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, 1047300906Sasomers &newvd, 1) != 0) { 1048300906Sasomers syslog(LOG_ERR, "Replace vdev(%s/%s): Unable to initialize " 1049300906Sasomers "configuration data.", poolname, oldstr.c_str()); 1050300906Sasomers nvlist_free(newvd); 1051300906Sasomers nvlist_free(nvroot); 1052300906Sasomers return (true); 1053300906Sasomers } 1054300906Sasomers 1055300906Sasomers /* Data was copied when added to the root vdev. */ 1056300906Sasomers nvlist_free(newvd); 1057300906Sasomers 1058300906Sasomers retval = (zpool_vdev_attach(zhp, oldstr.c_str(), path, nvroot, 1059300906Sasomers /*replace*/B_TRUE) == 0); 1060300906Sasomers if (retval) 1061300906Sasomers syslog(LOG_INFO, "Replacing vdev(%s/%s) with %s\n", 1062300906Sasomers poolname, oldstr.c_str(), path); 1063300906Sasomers else 1064300906Sasomers syslog(LOG_ERR, "Replace vdev(%s/%s): %s: %s\n", 1065300906Sasomers poolname, oldstr.c_str(), libzfs_error_action(g_zfsHandle), 1066300906Sasomers libzfs_error_description(g_zfsHandle)); 1067300906Sasomers nvlist_free(nvroot); 1068300906Sasomers 1069300906Sasomers return (retval); 1070300906Sasomers} 1071300906Sasomers 1072300906Sasomers/* Does the argument event refer to a checksum error? */ 1073300906Sasomersstatic bool 1074300906SasomersIsChecksumEvent(const Event* const event) 1075300906Sasomers{ 1076300906Sasomers return ("ereport.fs.zfs.checksum" == event->Value("type")); 1077300906Sasomers} 1078300906Sasomers 1079300906Sasomers/* Does the argument event refer to an IO error? */ 1080300906Sasomersstatic bool 1081300906SasomersIsIOEvent(const Event* const event) 1082300906Sasomers{ 1083300906Sasomers return ("ereport.fs.zfs.io" == event->Value("type")); 1084300906Sasomers} 1085300906Sasomers 1086300906Sasomersbool 1087300906SasomersCaseFile::ShouldDegrade() const 1088300906Sasomers{ 1089300906Sasomers return (std::count_if(m_events.begin(), m_events.end(), 1090300906Sasomers IsChecksumEvent) > ZFS_DEGRADE_IO_COUNT); 1091300906Sasomers} 1092300906Sasomers 1093300906Sasomersbool 1094300906SasomersCaseFile::ShouldFault() const 1095300906Sasomers{ 1096300906Sasomers return (std::count_if(m_events.begin(), m_events.end(), 1097300906Sasomers IsIOEvent) > ZFS_DEGRADE_IO_COUNT); 1098300906Sasomers} 1099300906Sasomers 1100300906Sasomersnvlist_t * 1101300906SasomersCaseFile::CaseVdev(zpool_handle_t *zhp) const 1102300906Sasomers{ 1103300906Sasomers return (VdevIterator(zhp).Find(VdevGUID())); 1104300906Sasomers} 1105