1300906Sasomers/*- 2300906Sasomers * Copyright (c) 2011, 2012, 2013, 2014, 2016 Spectra Logic Corporation 3300906Sasomers * All rights reserved. 4300906Sasomers * 5300906Sasomers * Redistribution and use in source and binary forms, with or without 6300906Sasomers * modification, are permitted provided that the following conditions 7300906Sasomers * are met: 8300906Sasomers * 1. Redistributions of source code must retain the above copyright 9300906Sasomers * notice, this list of conditions, and the following disclaimer, 10300906Sasomers * without modification. 11300906Sasomers * 2. Redistributions in binary form must reproduce at minimum a disclaimer 12300906Sasomers * substantially similar to the "NO WARRANTY" disclaimer below 13300906Sasomers * ("Disclaimer") and any redistribution must be conditioned upon 14300906Sasomers * including a substantially similar Disclaimer requirement for further 15300906Sasomers * binary redistribution. 16300906Sasomers * 17300906Sasomers * NO WARRANTY 18300906Sasomers * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19300906Sasomers * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20300906Sasomers * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR 21300906Sasomers * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22300906Sasomers * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23300906Sasomers * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24300906Sasomers * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25300906Sasomers * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 26300906Sasomers * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 27300906Sasomers * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28300906Sasomers * POSSIBILITY OF SUCH DAMAGES. 29300906Sasomers * 30300906Sasomers * Authors: Justin T. Gibbs (Spectra Logic Corporation) 31300906Sasomers */ 32300906Sasomers 33300906Sasomers/** 34300906Sasomers * \file case_file.cc 35300906Sasomers * 36300906Sasomers * We keep case files for any leaf vdev that is not in the optimal state. 37300906Sasomers * However, we only serialize to disk those events that need to be preserved 38300906Sasomers * across reboots. For now, this is just a log of soft errors which we 39300906Sasomers * accumulate in order to mark a device as degraded. 40300906Sasomers */ 41300906Sasomers#include <sys/cdefs.h> 42300906Sasomers#include <sys/time.h> 43300906Sasomers 44300906Sasomers#include <sys/fs/zfs.h> 45300906Sasomers 46300906Sasomers#include <dirent.h> 47300906Sasomers#include <iomanip> 48300906Sasomers#include <fstream> 49300906Sasomers#include <functional> 50300906Sasomers#include <sstream> 51300906Sasomers#include <syslog.h> 52300906Sasomers#include <unistd.h> 53300906Sasomers 54300906Sasomers#include <libzfs.h> 55300906Sasomers 56300906Sasomers#include <list> 57300906Sasomers#include <map> 58300906Sasomers#include <string> 59300906Sasomers 60300906Sasomers#include <devdctl/guid.h> 61300906Sasomers#include <devdctl/event.h> 62300906Sasomers#include <devdctl/event_factory.h> 63300906Sasomers#include <devdctl/exception.h> 64300906Sasomers#include <devdctl/consumer.h> 65300906Sasomers 66300906Sasomers#include "callout.h" 67300906Sasomers#include "vdev_iterator.h" 68300906Sasomers#include "zfsd_event.h" 69300906Sasomers#include "case_file.h" 70300906Sasomers#include "vdev.h" 71300906Sasomers#include "zfsd.h" 72300906Sasomers#include "zfsd_exception.h" 73300906Sasomers#include "zpool_list.h" 74300906Sasomers 75300906Sasomers__FBSDID("$FreeBSD: stable/11/cddl/usr.sbin/zfsd/case_file.cc 331395 2018-03-22 23:54:14Z mav $"); 76300906Sasomers 77300906Sasomers/*============================ Namespace Control =============================*/ 78300906Sasomersusing std::auto_ptr; 79300906Sasomersusing std::hex; 80300906Sasomersusing std::ifstream; 81300906Sasomersusing std::stringstream; 82300906Sasomersusing std::setfill; 83300906Sasomersusing std::setw; 84300906Sasomers 85300906Sasomersusing DevdCtl::Event; 86300906Sasomersusing DevdCtl::EventFactory; 87300906Sasomersusing DevdCtl::EventList; 88300906Sasomersusing DevdCtl::Guid; 89300906Sasomersusing DevdCtl::ParseException; 90300906Sasomers 91300906Sasomers/*--------------------------------- CaseFile ---------------------------------*/ 92300906Sasomers//- CaseFile Static Data ------------------------------------------------------- 93300906Sasomers 94300906SasomersCaseFileList CaseFile::s_activeCases; 95300906Sasomersconst string CaseFile::s_caseFilePath = "/var/db/zfsd/cases"; 96300906Sasomersconst timeval CaseFile::s_removeGracePeriod = { 60 /*sec*/, 0 /*usec*/}; 97300906Sasomers 98300906Sasomers//- CaseFile Static Public Methods --------------------------------------------- 99300906SasomersCaseFile * 100300906SasomersCaseFile::Find(Guid poolGUID, Guid vdevGUID) 101300906Sasomers{ 102300906Sasomers for (CaseFileList::iterator curCase = s_activeCases.begin(); 103300906Sasomers curCase != s_activeCases.end(); curCase++) { 104300906Sasomers 105326321Sasomers if (((*curCase)->PoolGUID() != poolGUID 106326321Sasomers && Guid::InvalidGuid() != poolGUID) 107300906Sasomers || (*curCase)->VdevGUID() != vdevGUID) 108300906Sasomers continue; 109300906Sasomers 110300906Sasomers /* 111300906Sasomers * We only carry one active case per-vdev. 112300906Sasomers */ 113300906Sasomers return (*curCase); 114300906Sasomers } 115300906Sasomers return (NULL); 116300906Sasomers} 117300906Sasomers 118300906SasomersCaseFile * 119300906SasomersCaseFile::Find(const string &physPath) 120300906Sasomers{ 121300906Sasomers CaseFile *result = NULL; 122300906Sasomers 123300906Sasomers for (CaseFileList::iterator curCase = s_activeCases.begin(); 124300906Sasomers curCase != s_activeCases.end(); curCase++) { 125300906Sasomers 126300906Sasomers if ((*curCase)->PhysicalPath() != physPath) 127300906Sasomers continue; 128300906Sasomers 129300906Sasomers if (result != NULL) { 130300906Sasomers syslog(LOG_WARNING, "Multiple casefiles found for " 131300906Sasomers "physical path %s. " 132300906Sasomers "This is most likely a bug in zfsd", 133300906Sasomers physPath.c_str()); 134300906Sasomers } 135300906Sasomers result = *curCase; 136300906Sasomers } 137300906Sasomers return (result); 138300906Sasomers} 139300906Sasomers 140300906Sasomers 141300906Sasomersvoid 142300906SasomersCaseFile::ReEvaluateByGuid(Guid poolGUID, const ZfsEvent &event) 143300906Sasomers{ 144300906Sasomers CaseFileList::iterator casefile; 145300906Sasomers for (casefile = s_activeCases.begin(); casefile != s_activeCases.end();){ 146300906Sasomers CaseFileList::iterator next = casefile; 147300906Sasomers next++; 148300906Sasomers if (poolGUID == (*casefile)->PoolGUID()) 149300906Sasomers (*casefile)->ReEvaluate(event); 150300906Sasomers casefile = next; 151300906Sasomers } 152300906Sasomers} 153300906Sasomers 154300906SasomersCaseFile & 155300906SasomersCaseFile::Create(Vdev &vdev) 156300906Sasomers{ 157300906Sasomers CaseFile *activeCase; 158300906Sasomers 159300906Sasomers activeCase = Find(vdev.PoolGUID(), vdev.GUID()); 160300906Sasomers if (activeCase == NULL) 161300906Sasomers activeCase = new CaseFile(vdev); 162300906Sasomers 163300906Sasomers return (*activeCase); 164300906Sasomers} 165300906Sasomers 166300906Sasomersvoid 167300906SasomersCaseFile::DeSerialize() 168300906Sasomers{ 169300906Sasomers struct dirent **caseFiles; 170300906Sasomers 171300906Sasomers int numCaseFiles(scandir(s_caseFilePath.c_str(), &caseFiles, 172300906Sasomers DeSerializeSelector, /*compar*/NULL)); 173300906Sasomers 174300906Sasomers if (numCaseFiles == -1) 175300906Sasomers return; 176300906Sasomers if (numCaseFiles == 0) { 177300906Sasomers free(caseFiles); 178300906Sasomers return; 179300906Sasomers } 180300906Sasomers 181300906Sasomers for (int i = 0; i < numCaseFiles; i++) { 182300906Sasomers 183300906Sasomers DeSerializeFile(caseFiles[i]->d_name); 184300906Sasomers free(caseFiles[i]); 185300906Sasomers } 186300906Sasomers free(caseFiles); 187300906Sasomers} 188300906Sasomers 189330733Sasomersbool 190330733SasomersCaseFile::Empty() 191330733Sasomers{ 192330733Sasomers return (s_activeCases.empty()); 193330733Sasomers} 194330733Sasomers 195300906Sasomersvoid 196300906SasomersCaseFile::LogAll() 197300906Sasomers{ 198300906Sasomers for (CaseFileList::iterator curCase = s_activeCases.begin(); 199300906Sasomers curCase != s_activeCases.end(); curCase++) 200300906Sasomers (*curCase)->Log(); 201300906Sasomers} 202300906Sasomers 203300906Sasomersvoid 204300906SasomersCaseFile::PurgeAll() 205300906Sasomers{ 206300906Sasomers /* 207300906Sasomers * Serialize casefiles before deleting them so that they can be reread 208300906Sasomers * and revalidated during BuildCaseFiles. 209300906Sasomers * CaseFiles remove themselves from this list on destruction. 210300906Sasomers */ 211300906Sasomers while (s_activeCases.size() != 0) { 212300906Sasomers CaseFile *casefile = s_activeCases.front(); 213300906Sasomers casefile->Serialize(); 214300906Sasomers delete casefile; 215300906Sasomers } 216300906Sasomers 217300906Sasomers} 218300906Sasomers 219300906Sasomers//- CaseFile Public Methods ---------------------------------------------------- 220300906Sasomersbool 221300906SasomersCaseFile::RefreshVdevState() 222300906Sasomers{ 223300906Sasomers ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); 224300906Sasomers zpool_handle_t *casePool(zpl.empty() ? NULL : zpl.front()); 225300906Sasomers if (casePool == NULL) 226300906Sasomers return (false); 227300906Sasomers 228300906Sasomers Vdev vd(casePool, CaseVdev(casePool)); 229300906Sasomers if (vd.DoesNotExist()) 230300906Sasomers return (false); 231300906Sasomers 232300906Sasomers m_vdevState = vd.State(); 233300906Sasomers m_vdevPhysPath = vd.PhysicalPath(); 234300906Sasomers return (true); 235300906Sasomers} 236300906Sasomers 237300906Sasomersbool 238300906SasomersCaseFile::ReEvaluate(const string &devPath, const string &physPath, Vdev *vdev) 239300906Sasomers{ 240300906Sasomers ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); 241300906Sasomers zpool_handle_t *pool(zpl.empty() ? NULL : zpl.front()); 242331395Smav zpool_boot_label_t boot_type; 243331395Smav uint64_t boot_size; 244300906Sasomers 245300906Sasomers if (pool == NULL || !RefreshVdevState()) { 246300906Sasomers /* 247300906Sasomers * The pool or vdev for this case file is no longer 248300906Sasomers * part of the configuration. This can happen 249300906Sasomers * if we process a device arrival notification 250300906Sasomers * before seeing the ZFS configuration change 251300906Sasomers * event. 252300906Sasomers */ 253300906Sasomers syslog(LOG_INFO, 254300906Sasomers "CaseFile::ReEvaluate(%s,%s) Pool/Vdev unconfigured. " 255300906Sasomers "Closing\n", 256300906Sasomers PoolGUIDString().c_str(), 257300906Sasomers VdevGUIDString().c_str()); 258300906Sasomers Close(); 259300906Sasomers 260300906Sasomers /* 261300906Sasomers * Since this event was not used to close this 262300906Sasomers * case, do not report it as consumed. 263300906Sasomers */ 264300906Sasomers return (/*consumed*/false); 265300906Sasomers } 266300906Sasomers 267300906Sasomers if (VdevState() > VDEV_STATE_CANT_OPEN) { 268300906Sasomers /* 269300906Sasomers * For now, newly discovered devices only help for 270300906Sasomers * devices that are missing. In the future, we might 271300906Sasomers * use a newly inserted spare to replace a degraded 272300906Sasomers * or faulted device. 273300906Sasomers */ 274300906Sasomers syslog(LOG_INFO, "CaseFile::ReEvaluate(%s,%s): Pool/Vdev ignored", 275300906Sasomers PoolGUIDString().c_str(), VdevGUIDString().c_str()); 276300906Sasomers return (/*consumed*/false); 277300906Sasomers } 278300906Sasomers 279300906Sasomers if (vdev != NULL 280326321Sasomers && ( vdev->PoolGUID() == m_poolGUID 281326321Sasomers || vdev->PoolGUID() == Guid::InvalidGuid()) 282300906Sasomers && vdev->GUID() == m_vdevGUID) { 283300906Sasomers 284300906Sasomers zpool_vdev_online(pool, vdev->GUIDString().c_str(), 285300906Sasomers ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE, 286300906Sasomers &m_vdevState); 287300906Sasomers syslog(LOG_INFO, "Onlined vdev(%s/%s:%s). State now %s.\n", 288300906Sasomers zpool_get_name(pool), vdev->GUIDString().c_str(), 289300906Sasomers devPath.c_str(), 290300906Sasomers zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); 291300906Sasomers 292300906Sasomers /* 293300906Sasomers * Check the vdev state post the online action to see 294300906Sasomers * if we can retire this case. 295300906Sasomers */ 296300906Sasomers CloseIfSolved(); 297300906Sasomers 298300906Sasomers return (/*consumed*/true); 299300906Sasomers } 300300906Sasomers 301300906Sasomers /* 302300906Sasomers * If the auto-replace policy is enabled, and we have physical 303300906Sasomers * path information, try a physical path replacement. 304300906Sasomers */ 305300906Sasomers if (zpool_get_prop_int(pool, ZPOOL_PROP_AUTOREPLACE, NULL) == 0) { 306300906Sasomers syslog(LOG_INFO, 307300906Sasomers "CaseFile(%s:%s:%s): AutoReplace not set. " 308300906Sasomers "Ignoring device insertion.\n", 309300906Sasomers PoolGUIDString().c_str(), 310300906Sasomers VdevGUIDString().c_str(), 311300906Sasomers zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); 312300906Sasomers return (/*consumed*/false); 313300906Sasomers } 314300906Sasomers 315300906Sasomers if (PhysicalPath().empty()) { 316300906Sasomers syslog(LOG_INFO, 317300906Sasomers "CaseFile(%s:%s:%s): No physical path information. " 318300906Sasomers "Ignoring device insertion.\n", 319300906Sasomers PoolGUIDString().c_str(), 320300906Sasomers VdevGUIDString().c_str(), 321300906Sasomers zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); 322300906Sasomers return (/*consumed*/false); 323300906Sasomers } 324300906Sasomers 325300906Sasomers if (physPath != PhysicalPath()) { 326300906Sasomers syslog(LOG_INFO, 327300906Sasomers "CaseFile(%s:%s:%s): Physical path mismatch. " 328300906Sasomers "Ignoring device insertion.\n", 329300906Sasomers PoolGUIDString().c_str(), 330300906Sasomers VdevGUIDString().c_str(), 331300906Sasomers zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); 332300906Sasomers return (/*consumed*/false); 333300906Sasomers } 334300906Sasomers 335300906Sasomers /* Write a label on the newly inserted disk. */ 336331395Smav if (zpool_is_bootable(pool)) 337331395Smav boot_type = ZPOOL_COPY_BOOT_LABEL; 338331395Smav else 339331395Smav boot_type = ZPOOL_NO_BOOT_LABEL; 340331395Smav boot_size = zpool_get_prop_int(pool, ZPOOL_PROP_BOOTSIZE, NULL); 341331395Smav if (zpool_label_disk(g_zfsHandle, pool, devPath.c_str(), 342331395Smav boot_type, boot_size, NULL) != 0) { 343300906Sasomers syslog(LOG_ERR, 344300906Sasomers "Replace vdev(%s/%s) by physical path (label): %s: %s\n", 345300906Sasomers zpool_get_name(pool), VdevGUIDString().c_str(), 346300906Sasomers libzfs_error_action(g_zfsHandle), 347300906Sasomers libzfs_error_description(g_zfsHandle)); 348300906Sasomers return (/*consumed*/false); 349300906Sasomers } 350300906Sasomers 351300906Sasomers syslog(LOG_INFO, "CaseFile::ReEvaluate(%s/%s): Replacing with %s", 352300906Sasomers PoolGUIDString().c_str(), VdevGUIDString().c_str(), 353300906Sasomers devPath.c_str()); 354300906Sasomers return (Replace(VDEV_TYPE_DISK, devPath.c_str(), /*isspare*/false)); 355300906Sasomers} 356300906Sasomers 357300906Sasomersbool 358300906SasomersCaseFile::ReEvaluate(const ZfsEvent &event) 359300906Sasomers{ 360300906Sasomers bool consumed(false); 361300906Sasomers 362300906Sasomers if (event.Value("type") == "misc.fs.zfs.vdev_remove") { 363300906Sasomers /* 364300906Sasomers * The Vdev we represent has been removed from the 365300906Sasomers * configuration. This case is no longer of value. 366300906Sasomers */ 367300906Sasomers Close(); 368300906Sasomers 369300906Sasomers return (/*consumed*/true); 370300906Sasomers } else if (event.Value("type") == "misc.fs.zfs.pool_destroy") { 371300906Sasomers /* This Pool has been destroyed. Discard the case */ 372300906Sasomers Close(); 373300906Sasomers 374300906Sasomers return (/*consumed*/true); 375300906Sasomers } else if (event.Value("type") == "misc.fs.zfs.config_sync") { 376300906Sasomers RefreshVdevState(); 377300906Sasomers if (VdevState() < VDEV_STATE_HEALTHY) 378300906Sasomers consumed = ActivateSpare(); 379300906Sasomers } 380300906Sasomers 381300906Sasomers 382300906Sasomers if (event.Value("class") == "resource.fs.zfs.removed") { 383300906Sasomers bool spare_activated; 384300906Sasomers 385300906Sasomers if (!RefreshVdevState()) { 386300906Sasomers /* 387300906Sasomers * The pool or vdev for this case file is no longer 388300906Sasomers * part of the configuration. This can happen 389300906Sasomers * if we process a device arrival notification 390300906Sasomers * before seeing the ZFS configuration change 391300906Sasomers * event. 392300906Sasomers */ 393300906Sasomers syslog(LOG_INFO, 394300906Sasomers "CaseFile::ReEvaluate(%s,%s) Pool/Vdev " 395300906Sasomers "unconfigured. Closing\n", 396300906Sasomers PoolGUIDString().c_str(), 397300906Sasomers VdevGUIDString().c_str()); 398300906Sasomers /* 399300906Sasomers * Close the case now so we won't waste cycles in the 400300906Sasomers * system rescan 401300906Sasomers */ 402300906Sasomers Close(); 403300906Sasomers 404300906Sasomers /* 405300906Sasomers * Since this event was not used to close this 406300906Sasomers * case, do not report it as consumed. 407300906Sasomers */ 408300906Sasomers return (/*consumed*/false); 409300906Sasomers } 410300906Sasomers 411300906Sasomers /* 412300906Sasomers * Discard any tentative I/O error events for 413300906Sasomers * this case. They were most likely caused by the 414300906Sasomers * hot-unplug of this device. 415300906Sasomers */ 416300906Sasomers PurgeTentativeEvents(); 417300906Sasomers 418300906Sasomers /* Try to activate spares if they are available */ 419300906Sasomers spare_activated = ActivateSpare(); 420300906Sasomers 421300906Sasomers /* 422300906Sasomers * Rescan the drives in the system to see if a recent 423300906Sasomers * drive arrival can be used to solve this case. 424300906Sasomers */ 425300906Sasomers ZfsDaemon::RequestSystemRescan(); 426300906Sasomers 427300906Sasomers /* 428300906Sasomers * Consume the event if we successfully activated a spare. 429300906Sasomers * Otherwise, leave it in the unconsumed events list so that the 430300906Sasomers * future addition of a spare to this pool might be able to 431300906Sasomers * close the case 432300906Sasomers */ 433300906Sasomers consumed = spare_activated; 434300906Sasomers } else if (event.Value("class") == "resource.fs.zfs.statechange") { 435300906Sasomers RefreshVdevState(); 436300906Sasomers /* 437300906Sasomers * If this vdev is DEGRADED, FAULTED, or UNAVAIL, try to 438300906Sasomers * activate a hotspare. Otherwise, ignore the event 439300906Sasomers */ 440300906Sasomers if (VdevState() == VDEV_STATE_FAULTED || 441300906Sasomers VdevState() == VDEV_STATE_DEGRADED || 442300906Sasomers VdevState() == VDEV_STATE_CANT_OPEN) 443300906Sasomers (void) ActivateSpare(); 444300906Sasomers consumed = true; 445300906Sasomers } 446300906Sasomers else if (event.Value("class") == "ereport.fs.zfs.io" || 447300906Sasomers event.Value("class") == "ereport.fs.zfs.checksum") { 448300906Sasomers 449300906Sasomers m_tentativeEvents.push_front(event.DeepCopy()); 450300906Sasomers RegisterCallout(event); 451300906Sasomers consumed = true; 452300906Sasomers } 453300906Sasomers 454300906Sasomers bool closed(CloseIfSolved()); 455300906Sasomers 456300906Sasomers return (consumed || closed); 457300906Sasomers} 458300906Sasomers 459329792Sasomers/* Find a Vdev containing the vdev with the given GUID */ 460329792Sasomersstatic nvlist_t* 461329792Sasomersfind_parent(nvlist_t *pool_config, nvlist_t *config, DevdCtl::Guid child_guid) 462329792Sasomers{ 463329792Sasomers nvlist_t **vdevChildren; 464329792Sasomers int error; 465329792Sasomers unsigned ch, numChildren; 466300906Sasomers 467329792Sasomers error = nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_CHILDREN, 468329792Sasomers &vdevChildren, &numChildren); 469329792Sasomers 470329792Sasomers if (error != 0 || numChildren == 0) 471329792Sasomers return (NULL); 472329792Sasomers 473329792Sasomers for (ch = 0; ch < numChildren; ch++) { 474329792Sasomers nvlist *result; 475329792Sasomers Vdev vdev(pool_config, vdevChildren[ch]); 476329792Sasomers 477329792Sasomers if (vdev.GUID() == child_guid) 478329792Sasomers return (config); 479329792Sasomers 480329792Sasomers result = find_parent(pool_config, vdevChildren[ch], child_guid); 481329792Sasomers if (result != NULL) 482329792Sasomers return (result); 483329792Sasomers } 484329792Sasomers 485329792Sasomers return (NULL); 486329792Sasomers} 487329792Sasomers 488300906Sasomersbool 489300906SasomersCaseFile::ActivateSpare() { 490329792Sasomers nvlist_t *config, *nvroot, *parent_config; 491300906Sasomers nvlist_t **spares; 492300906Sasomers char *devPath, *vdev_type; 493300906Sasomers const char *poolname; 494300906Sasomers u_int nspares, i; 495300906Sasomers int error; 496300906Sasomers 497300906Sasomers ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); 498300906Sasomers zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front()); 499300906Sasomers if (zhp == NULL) { 500300906Sasomers syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool " 501300919Sbdrewery "for pool_guid %" PRIu64".", (uint64_t)m_poolGUID); 502300906Sasomers return (false); 503300906Sasomers } 504300906Sasomers poolname = zpool_get_name(zhp); 505300906Sasomers config = zpool_get_config(zhp, NULL); 506300906Sasomers if (config == NULL) { 507300906Sasomers syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool " 508300906Sasomers "config for pool %s", poolname); 509300906Sasomers return (false); 510300906Sasomers } 511300906Sasomers error = nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot); 512300906Sasomers if (error != 0){ 513300906Sasomers syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find vdev " 514300906Sasomers "tree for pool %s", poolname); 515300906Sasomers return (false); 516300906Sasomers } 517329792Sasomers 518329792Sasomers parent_config = find_parent(config, nvroot, m_vdevGUID); 519329792Sasomers if (parent_config != NULL) { 520329792Sasomers char *parent_type; 521329792Sasomers 522329792Sasomers /* 523329792Sasomers * Don't activate spares for members of a "replacing" vdev. 524329792Sasomers * They're already dealt with. Sparing them will just drag out 525329792Sasomers * the resilver process. 526329792Sasomers */ 527329792Sasomers error = nvlist_lookup_string(parent_config, 528329792Sasomers ZPOOL_CONFIG_TYPE, &parent_type); 529329792Sasomers if (error == 0 && strcmp(parent_type, VDEV_TYPE_REPLACING) == 0) 530329792Sasomers return (false); 531329792Sasomers } 532329792Sasomers 533300906Sasomers nspares = 0; 534300906Sasomers nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares, 535300906Sasomers &nspares); 536300906Sasomers if (nspares == 0) { 537300906Sasomers /* The pool has no spares configured */ 538300906Sasomers syslog(LOG_INFO, "CaseFile::ActivateSpare: " 539300906Sasomers "No spares available for pool %s", poolname); 540300906Sasomers return (false); 541300906Sasomers } 542300906Sasomers for (i = 0; i < nspares; i++) { 543300906Sasomers uint64_t *nvlist_array; 544300906Sasomers vdev_stat_t *vs; 545300906Sasomers uint_t nstats; 546300906Sasomers 547300906Sasomers if (nvlist_lookup_uint64_array(spares[i], 548300906Sasomers ZPOOL_CONFIG_VDEV_STATS, &nvlist_array, &nstats) != 0) { 549300906Sasomers syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not " 550300906Sasomers "find vdev stats for pool %s, spare %d", 551300906Sasomers poolname, i); 552300906Sasomers return (false); 553300906Sasomers } 554300906Sasomers vs = reinterpret_cast<vdev_stat_t *>(nvlist_array); 555300906Sasomers 556300906Sasomers if ((vs->vs_aux != VDEV_AUX_SPARED) 557300906Sasomers && (vs->vs_state == VDEV_STATE_HEALTHY)) { 558300906Sasomers /* We found a usable spare */ 559300906Sasomers break; 560300906Sasomers } 561300906Sasomers } 562300906Sasomers 563300906Sasomers if (i == nspares) { 564300906Sasomers /* No available spares were found */ 565300906Sasomers return (false); 566300906Sasomers } 567300906Sasomers 568300906Sasomers error = nvlist_lookup_string(spares[i], ZPOOL_CONFIG_PATH, &devPath); 569300906Sasomers if (error != 0) { 570300906Sasomers syslog(LOG_ERR, "CaseFile::ActivateSpare: Cannot determine " 571300906Sasomers "the path of pool %s, spare %d. Error %d", 572300906Sasomers poolname, i, error); 573300906Sasomers return (false); 574300906Sasomers } 575300906Sasomers 576300906Sasomers error = nvlist_lookup_string(spares[i], ZPOOL_CONFIG_TYPE, &vdev_type); 577300906Sasomers if (error != 0) { 578300906Sasomers syslog(LOG_ERR, "CaseFile::ActivateSpare: Cannot determine " 579300906Sasomers "the vdev type of pool %s, spare %d. Error %d", 580300906Sasomers poolname, i, error); 581300906Sasomers return (false); 582300906Sasomers } 583300906Sasomers 584300906Sasomers return (Replace(vdev_type, devPath, /*isspare*/true)); 585300906Sasomers} 586300906Sasomers 587300906Sasomersvoid 588300906SasomersCaseFile::RegisterCallout(const Event &event) 589300906Sasomers{ 590300906Sasomers timeval now, countdown, elapsed, timestamp, zero, remaining; 591300906Sasomers 592300906Sasomers gettimeofday(&now, 0); 593300906Sasomers timestamp = event.GetTimestamp(); 594300906Sasomers timersub(&now, ×tamp, &elapsed); 595300906Sasomers timersub(&s_removeGracePeriod, &elapsed, &countdown); 596300906Sasomers /* 597300906Sasomers * If countdown is <= zero, Reset the timer to the 598300906Sasomers * smallest positive time value instead 599300906Sasomers */ 600300906Sasomers timerclear(&zero); 601300906Sasomers if (timercmp(&countdown, &zero, <=)) { 602300906Sasomers timerclear(&countdown); 603300906Sasomers countdown.tv_usec = 1; 604300906Sasomers } 605300906Sasomers 606300906Sasomers remaining = m_tentativeTimer.TimeRemaining(); 607300906Sasomers 608300906Sasomers if (!m_tentativeTimer.IsPending() 609300906Sasomers || timercmp(&countdown, &remaining, <)) 610300906Sasomers m_tentativeTimer.Reset(countdown, OnGracePeriodEnded, this); 611300906Sasomers} 612300906Sasomers 613300906Sasomers 614300906Sasomersbool 615300906SasomersCaseFile::CloseIfSolved() 616300906Sasomers{ 617300906Sasomers if (m_events.empty() 618300906Sasomers && m_tentativeEvents.empty()) { 619300906Sasomers 620300906Sasomers /* 621300906Sasomers * We currently do not track or take actions on 622300906Sasomers * devices in the degraded or faulted state. 623300906Sasomers * Once we have support for spare pools, we'll 624300906Sasomers * retain these cases so that any spares added in 625300906Sasomers * the future can be applied to them. 626300906Sasomers */ 627300906Sasomers switch (VdevState()) { 628300906Sasomers case VDEV_STATE_HEALTHY: 629300906Sasomers /* No need to keep cases for healthy vdevs */ 630300906Sasomers Close(); 631300906Sasomers return (true); 632300906Sasomers case VDEV_STATE_REMOVED: 633300906Sasomers case VDEV_STATE_CANT_OPEN: 634300906Sasomers /* 635300906Sasomers * Keep open. We may solve it with a newly inserted 636300906Sasomers * device. 637300906Sasomers */ 638300906Sasomers case VDEV_STATE_FAULTED: 639300906Sasomers case VDEV_STATE_DEGRADED: 640300906Sasomers /* 641300906Sasomers * Keep open. We may solve it with the future 642300906Sasomers * addition of a spare to the pool 643300906Sasomers */ 644300906Sasomers case VDEV_STATE_UNKNOWN: 645300906Sasomers case VDEV_STATE_CLOSED: 646300906Sasomers case VDEV_STATE_OFFLINE: 647300906Sasomers /* 648300906Sasomers * Keep open? This may not be the correct behavior, 649300906Sasomers * but it's what we've always done 650300906Sasomers */ 651300906Sasomers ; 652300906Sasomers } 653300906Sasomers 654300906Sasomers /* 655300906Sasomers * Re-serialize the case in order to remove any 656300906Sasomers * previous event data. 657300906Sasomers */ 658300906Sasomers Serialize(); 659300906Sasomers } 660300906Sasomers 661300906Sasomers return (false); 662300906Sasomers} 663300906Sasomers 664300906Sasomersvoid 665300906SasomersCaseFile::Log() 666300906Sasomers{ 667300906Sasomers syslog(LOG_INFO, "CaseFile(%s,%s,%s)\n", PoolGUIDString().c_str(), 668300906Sasomers VdevGUIDString().c_str(), PhysicalPath().c_str()); 669300906Sasomers syslog(LOG_INFO, "\tVdev State = %s\n", 670300906Sasomers zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); 671300906Sasomers if (m_tentativeEvents.size() != 0) { 672300906Sasomers syslog(LOG_INFO, "\t=== Tentative Events ===\n"); 673300906Sasomers for (EventList::iterator event(m_tentativeEvents.begin()); 674300906Sasomers event != m_tentativeEvents.end(); event++) 675300906Sasomers (*event)->Log(LOG_INFO); 676300906Sasomers } 677300906Sasomers if (m_events.size() != 0) { 678300906Sasomers syslog(LOG_INFO, "\t=== Events ===\n"); 679300906Sasomers for (EventList::iterator event(m_events.begin()); 680300906Sasomers event != m_events.end(); event++) 681300906Sasomers (*event)->Log(LOG_INFO); 682300906Sasomers } 683300906Sasomers} 684300906Sasomers 685300906Sasomers//- CaseFile Static Protected Methods ------------------------------------------ 686300906Sasomersvoid 687300906SasomersCaseFile::OnGracePeriodEnded(void *arg) 688300906Sasomers{ 689300906Sasomers CaseFile &casefile(*static_cast<CaseFile *>(arg)); 690300906Sasomers 691300906Sasomers casefile.OnGracePeriodEnded(); 692300906Sasomers} 693300906Sasomers 694300906Sasomersint 695300906SasomersCaseFile::DeSerializeSelector(const struct dirent *dirEntry) 696300906Sasomers{ 697300906Sasomers uint64_t poolGUID; 698300906Sasomers uint64_t vdevGUID; 699300906Sasomers 700300906Sasomers if (dirEntry->d_type == DT_REG 701300919Sbdrewery && sscanf(dirEntry->d_name, "pool_%" PRIu64 "_vdev_%" PRIu64 ".case", 702300906Sasomers &poolGUID, &vdevGUID) == 2) 703300906Sasomers return (1); 704300906Sasomers return (0); 705300906Sasomers} 706300906Sasomers 707300906Sasomersvoid 708300906SasomersCaseFile::DeSerializeFile(const char *fileName) 709300906Sasomers{ 710300906Sasomers string fullName(s_caseFilePath + '/' + fileName); 711300906Sasomers CaseFile *existingCaseFile(NULL); 712300906Sasomers CaseFile *caseFile(NULL); 713300906Sasomers 714300906Sasomers try { 715300906Sasomers uint64_t poolGUID; 716300906Sasomers uint64_t vdevGUID; 717300906Sasomers nvlist_t *vdevConf; 718300906Sasomers 719314431Sasomers if (sscanf(fileName, "pool_%" PRIu64 "_vdev_%" PRIu64 ".case", 720314431Sasomers &poolGUID, &vdevGUID) != 2) { 721314431Sasomers throw ZfsdException("CaseFile::DeSerialize: " 722314431Sasomers "Unintelligible CaseFile filename %s.\n", fileName); 723314431Sasomers } 724300906Sasomers existingCaseFile = Find(Guid(poolGUID), Guid(vdevGUID)); 725300906Sasomers if (existingCaseFile != NULL) { 726300906Sasomers /* 727300906Sasomers * If the vdev is already degraded or faulted, 728300906Sasomers * there's no point in keeping the state around 729300906Sasomers * that we use to put a drive into the degraded 730300906Sasomers * state. However, if the vdev is simply missing, 731300906Sasomers * preserve the case data in the hopes that it will 732300906Sasomers * return. 733300906Sasomers */ 734300906Sasomers caseFile = existingCaseFile; 735300906Sasomers vdev_state curState(caseFile->VdevState()); 736300906Sasomers if (curState > VDEV_STATE_CANT_OPEN 737300906Sasomers && curState < VDEV_STATE_HEALTHY) { 738300906Sasomers unlink(fileName); 739300906Sasomers return; 740300906Sasomers } 741300906Sasomers } else { 742300906Sasomers ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID); 743300906Sasomers if (zpl.empty() 744300906Sasomers || (vdevConf = VdevIterator(zpl.front()) 745300906Sasomers .Find(vdevGUID)) == NULL) { 746300906Sasomers /* 747300906Sasomers * Either the pool no longer exists 748300906Sasomers * or this vdev is no longer a member of 749300906Sasomers * the pool. 750300906Sasomers */ 751300906Sasomers unlink(fullName.c_str()); 752300906Sasomers return; 753300906Sasomers } 754300906Sasomers 755300906Sasomers /* 756300906Sasomers * Any vdev we find that does not have a case file 757300906Sasomers * must be in the healthy state and thus worthy of 758300906Sasomers * continued SERD data tracking. 759300906Sasomers */ 760300906Sasomers caseFile = new CaseFile(Vdev(zpl.front(), vdevConf)); 761300906Sasomers } 762300906Sasomers 763300906Sasomers ifstream caseStream(fullName.c_str()); 764300906Sasomers if (!caseStream) 765300906Sasomers throw ZfsdException("CaseFile::DeSerialize: Unable to " 766300906Sasomers "read %s.\n", fileName); 767300906Sasomers 768300906Sasomers caseFile->DeSerialize(caseStream); 769300906Sasomers } catch (const ParseException &exp) { 770300906Sasomers 771300906Sasomers exp.Log(); 772300906Sasomers if (caseFile != existingCaseFile) 773300906Sasomers delete caseFile; 774300906Sasomers 775300906Sasomers /* 776300906Sasomers * Since we can't parse the file, unlink it so we don't 777300906Sasomers * trip over it again. 778300906Sasomers */ 779300906Sasomers unlink(fileName); 780300906Sasomers } catch (const ZfsdException &zfsException) { 781300906Sasomers 782300906Sasomers zfsException.Log(); 783300906Sasomers if (caseFile != existingCaseFile) 784300906Sasomers delete caseFile; 785300906Sasomers } 786300906Sasomers} 787300906Sasomers 788300906Sasomers//- CaseFile Protected Methods ------------------------------------------------- 789300906SasomersCaseFile::CaseFile(const Vdev &vdev) 790300906Sasomers : m_poolGUID(vdev.PoolGUID()), 791300906Sasomers m_vdevGUID(vdev.GUID()), 792300906Sasomers m_vdevState(vdev.State()), 793300906Sasomers m_vdevPhysPath(vdev.PhysicalPath()) 794300906Sasomers{ 795300906Sasomers stringstream guidString; 796300906Sasomers 797300906Sasomers guidString << m_vdevGUID; 798300906Sasomers m_vdevGUIDString = guidString.str(); 799300906Sasomers guidString.str(""); 800300906Sasomers guidString << m_poolGUID; 801300906Sasomers m_poolGUIDString = guidString.str(); 802300906Sasomers 803300906Sasomers s_activeCases.push_back(this); 804300906Sasomers 805300906Sasomers syslog(LOG_INFO, "Creating new CaseFile:\n"); 806300906Sasomers Log(); 807300906Sasomers} 808300906Sasomers 809300906SasomersCaseFile::~CaseFile() 810300906Sasomers{ 811300906Sasomers PurgeEvents(); 812300906Sasomers PurgeTentativeEvents(); 813300906Sasomers m_tentativeTimer.Stop(); 814300906Sasomers s_activeCases.remove(this); 815300906Sasomers} 816300906Sasomers 817300906Sasomersvoid 818300906SasomersCaseFile::PurgeEvents() 819300906Sasomers{ 820300906Sasomers for (EventList::iterator event(m_events.begin()); 821300906Sasomers event != m_events.end(); event++) 822300906Sasomers delete *event; 823300906Sasomers 824300906Sasomers m_events.clear(); 825300906Sasomers} 826300906Sasomers 827300906Sasomersvoid 828300906SasomersCaseFile::PurgeTentativeEvents() 829300906Sasomers{ 830300906Sasomers for (EventList::iterator event(m_tentativeEvents.begin()); 831300906Sasomers event != m_tentativeEvents.end(); event++) 832300906Sasomers delete *event; 833300906Sasomers 834300906Sasomers m_tentativeEvents.clear(); 835300906Sasomers} 836300906Sasomers 837300906Sasomersvoid 838300906SasomersCaseFile::SerializeEvList(const EventList events, int fd, 839300906Sasomers const char* prefix) const 840300906Sasomers{ 841300906Sasomers if (events.empty()) 842300906Sasomers return; 843300906Sasomers for (EventList::const_iterator curEvent = events.begin(); 844300906Sasomers curEvent != events.end(); curEvent++) { 845300906Sasomers const string &eventString((*curEvent)->GetEventString()); 846300906Sasomers 847300906Sasomers // TODO: replace many write(2) calls with a single writev(2) 848300906Sasomers if (prefix) 849300906Sasomers write(fd, prefix, strlen(prefix)); 850300906Sasomers write(fd, eventString.c_str(), eventString.length()); 851300906Sasomers } 852300906Sasomers} 853300906Sasomers 854300906Sasomersvoid 855300906SasomersCaseFile::Serialize() 856300906Sasomers{ 857300906Sasomers stringstream saveFile; 858300906Sasomers 859300906Sasomers saveFile << setfill('0') 860300906Sasomers << s_caseFilePath << "/" 861300906Sasomers << "pool_" << PoolGUIDString() 862300906Sasomers << "_vdev_" << VdevGUIDString() 863300906Sasomers << ".case"; 864300906Sasomers 865300906Sasomers if (m_events.empty() && m_tentativeEvents.empty()) { 866300906Sasomers unlink(saveFile.str().c_str()); 867300906Sasomers return; 868300906Sasomers } 869300906Sasomers 870300906Sasomers int fd(open(saveFile.str().c_str(), O_CREAT|O_TRUNC|O_WRONLY, 0644)); 871300906Sasomers if (fd == -1) { 872300906Sasomers syslog(LOG_ERR, "CaseFile::Serialize: Unable to open %s.\n", 873300906Sasomers saveFile.str().c_str()); 874300906Sasomers return; 875300906Sasomers } 876300906Sasomers SerializeEvList(m_events, fd); 877300906Sasomers SerializeEvList(m_tentativeEvents, fd, "tentative "); 878300906Sasomers close(fd); 879300906Sasomers} 880300906Sasomers 881300906Sasomers/* 882300906Sasomers * XXX: This method assumes that events may not contain embedded newlines. If 883300906Sasomers * ever events can contain embedded newlines, then CaseFile must switch 884300906Sasomers * serialization formats 885300906Sasomers */ 886300906Sasomersvoid 887300906SasomersCaseFile::DeSerialize(ifstream &caseStream) 888300906Sasomers{ 889300906Sasomers string evString; 890300906Sasomers const EventFactory &factory(ZfsDaemon::Get().GetFactory()); 891300906Sasomers 892300906Sasomers caseStream >> std::noskipws >> std::ws; 893300906Sasomers while (caseStream.good()) { 894300906Sasomers /* 895300906Sasomers * Outline: 896300906Sasomers * read the beginning of a line and check it for 897300906Sasomers * "tentative". If found, discard "tentative". 898300906Sasomers * Create a new event 899300906Sasomers * continue 900300906Sasomers */ 901300906Sasomers EventList* destEvents; 902300906Sasomers const string tentFlag("tentative "); 903300906Sasomers string line; 904300906Sasomers std::stringbuf lineBuf; 905300906Sasomers 906300906Sasomers caseStream.get(lineBuf); 907300906Sasomers caseStream.ignore(); /*discard the newline character*/ 908300906Sasomers line = lineBuf.str(); 909300906Sasomers if (line.compare(0, tentFlag.size(), tentFlag) == 0) { 910300906Sasomers /* Discard "tentative" */ 911300906Sasomers line.erase(0, tentFlag.size()); 912300906Sasomers destEvents = &m_tentativeEvents; 913300906Sasomers } else { 914300906Sasomers destEvents = &m_events; 915300906Sasomers } 916300906Sasomers Event *event(Event::CreateEvent(factory, line)); 917300906Sasomers if (event != NULL) { 918300906Sasomers destEvents->push_back(event); 919300906Sasomers RegisterCallout(*event); 920300906Sasomers } 921300906Sasomers } 922300906Sasomers} 923300906Sasomers 924300906Sasomersvoid 925300906SasomersCaseFile::Close() 926300906Sasomers{ 927300906Sasomers /* 928300906Sasomers * This case is no longer relevant. Clean up our 929300906Sasomers * serialization file, and delete the case. 930300906Sasomers */ 931300906Sasomers syslog(LOG_INFO, "CaseFile(%s,%s) closed - State %s\n", 932300906Sasomers PoolGUIDString().c_str(), VdevGUIDString().c_str(), 933300906Sasomers zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); 934300906Sasomers 935300906Sasomers /* 936300906Sasomers * Serialization of a Case with no event data, clears the 937300906Sasomers * Serialization data for that event. 938300906Sasomers */ 939300906Sasomers PurgeEvents(); 940300906Sasomers Serialize(); 941300906Sasomers 942300906Sasomers delete this; 943300906Sasomers} 944300906Sasomers 945300906Sasomersvoid 946300906SasomersCaseFile::OnGracePeriodEnded() 947300906Sasomers{ 948300906Sasomers bool should_fault, should_degrade; 949300906Sasomers ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); 950300906Sasomers zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front()); 951300906Sasomers 952300906Sasomers m_events.splice(m_events.begin(), m_tentativeEvents); 953300906Sasomers should_fault = ShouldFault(); 954300906Sasomers should_degrade = ShouldDegrade(); 955300906Sasomers 956300906Sasomers if (should_fault || should_degrade) { 957300906Sasomers if (zhp == NULL 958300906Sasomers || (VdevIterator(zhp).Find(m_vdevGUID)) == NULL) { 959300906Sasomers /* 960300906Sasomers * Either the pool no longer exists 961300906Sasomers * or this vdev is no longer a member of 962300906Sasomers * the pool. 963300906Sasomers */ 964300906Sasomers Close(); 965300906Sasomers return; 966300906Sasomers } 967300906Sasomers 968300906Sasomers } 969300906Sasomers 970300906Sasomers /* A fault condition has priority over a degrade condition */ 971300906Sasomers if (ShouldFault()) { 972300906Sasomers /* Fault the vdev and close the case. */ 973300906Sasomers if (zpool_vdev_fault(zhp, (uint64_t)m_vdevGUID, 974300906Sasomers VDEV_AUX_ERR_EXCEEDED) == 0) { 975300906Sasomers syslog(LOG_INFO, "Faulting vdev(%s/%s)", 976300906Sasomers PoolGUIDString().c_str(), 977300906Sasomers VdevGUIDString().c_str()); 978300906Sasomers Close(); 979300906Sasomers return; 980300906Sasomers } 981300906Sasomers else { 982300906Sasomers syslog(LOG_ERR, "Fault vdev(%s/%s): %s: %s\n", 983300906Sasomers PoolGUIDString().c_str(), 984300906Sasomers VdevGUIDString().c_str(), 985300906Sasomers libzfs_error_action(g_zfsHandle), 986300906Sasomers libzfs_error_description(g_zfsHandle)); 987300906Sasomers } 988300906Sasomers } 989300906Sasomers else if (ShouldDegrade()) { 990300906Sasomers /* Degrade the vdev and close the case. */ 991300906Sasomers if (zpool_vdev_degrade(zhp, (uint64_t)m_vdevGUID, 992300906Sasomers VDEV_AUX_ERR_EXCEEDED) == 0) { 993300906Sasomers syslog(LOG_INFO, "Degrading vdev(%s/%s)", 994300906Sasomers PoolGUIDString().c_str(), 995300906Sasomers VdevGUIDString().c_str()); 996300906Sasomers Close(); 997300906Sasomers return; 998300906Sasomers } 999300906Sasomers else { 1000300906Sasomers syslog(LOG_ERR, "Degrade vdev(%s/%s): %s: %s\n", 1001300906Sasomers PoolGUIDString().c_str(), 1002300906Sasomers VdevGUIDString().c_str(), 1003300906Sasomers libzfs_error_action(g_zfsHandle), 1004300906Sasomers libzfs_error_description(g_zfsHandle)); 1005300906Sasomers } 1006300906Sasomers } 1007300906Sasomers Serialize(); 1008300906Sasomers} 1009300906Sasomers 1010300906SasomersVdev 1011300906SasomersCaseFile::BeingReplacedBy(zpool_handle_t *zhp) { 1012300906Sasomers Vdev vd(zhp, CaseVdev(zhp)); 1013300906Sasomers std::list<Vdev> children; 1014300906Sasomers std::list<Vdev>::iterator children_it; 1015300906Sasomers 1016300906Sasomers Vdev parent(vd.Parent()); 1017300906Sasomers Vdev replacing(NonexistentVdev); 1018300906Sasomers 1019300906Sasomers /* 1020300906Sasomers * To determine whether we are being replaced by another spare that 1021300906Sasomers * is still working, then make sure that it is currently spared and 1022300906Sasomers * that the spare is either resilvering or healthy. If any of these 1023300906Sasomers * conditions fail, then we are not being replaced by a spare. 1024300906Sasomers * 1025300906Sasomers * If the spare is healthy, then the case file should be closed very 1026300906Sasomers * soon after this check. 1027300906Sasomers */ 1028300906Sasomers if (parent.DoesNotExist() 1029300906Sasomers || parent.Name(zhp, /*verbose*/false) != "spare") 1030300906Sasomers return (NonexistentVdev); 1031300906Sasomers 1032300906Sasomers children = parent.Children(); 1033300906Sasomers children_it = children.begin(); 1034300906Sasomers for (;children_it != children.end(); children_it++) { 1035300906Sasomers Vdev child = *children_it; 1036300906Sasomers 1037300906Sasomers /* Skip our vdev. */ 1038300906Sasomers if (child.GUID() == VdevGUID()) 1039300906Sasomers continue; 1040300906Sasomers /* 1041300906Sasomers * Accept the first child that doesn't match our GUID, or 1042300906Sasomers * any resilvering/healthy device if one exists. 1043300906Sasomers */ 1044300906Sasomers if (replacing.DoesNotExist() || child.IsResilvering() 1045300906Sasomers || child.State() == VDEV_STATE_HEALTHY) 1046300906Sasomers replacing = child; 1047300906Sasomers } 1048300906Sasomers 1049300906Sasomers return (replacing); 1050300906Sasomers} 1051300906Sasomers 1052300906Sasomersbool 1053300906SasomersCaseFile::Replace(const char* vdev_type, const char* path, bool isspare) { 1054300906Sasomers nvlist_t *nvroot, *newvd; 1055300906Sasomers const char *poolname; 1056300906Sasomers string oldstr(VdevGUIDString()); 1057300906Sasomers bool retval = true; 1058300906Sasomers 1059300906Sasomers /* Figure out what pool we're working on */ 1060300906Sasomers ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); 1061300906Sasomers zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front()); 1062300906Sasomers if (zhp == NULL) { 1063300906Sasomers syslog(LOG_ERR, "CaseFile::Replace: could not find pool for " 1064300919Sbdrewery "pool_guid %" PRIu64 ".", (uint64_t)m_poolGUID); 1065300906Sasomers return (false); 1066300906Sasomers } 1067300906Sasomers poolname = zpool_get_name(zhp); 1068300906Sasomers Vdev vd(zhp, CaseVdev(zhp)); 1069300906Sasomers Vdev replaced(BeingReplacedBy(zhp)); 1070300906Sasomers 1071300906Sasomers if (isspare && !vd.IsSpare() && !replaced.DoesNotExist()) { 1072300906Sasomers /* If we are already being replaced by a working spare, pass. */ 1073300906Sasomers if (replaced.IsResilvering() 1074300906Sasomers || replaced.State() == VDEV_STATE_HEALTHY) { 1075300906Sasomers syslog(LOG_INFO, "CaseFile::Replace(%s->%s): already " 1076300906Sasomers "replaced", VdevGUIDString().c_str(), path); 1077300906Sasomers return (/*consumed*/false); 1078300906Sasomers } 1079300906Sasomers /* 1080300906Sasomers * If we have already been replaced by a spare, but that spare 1081300906Sasomers * is broken, we must spare the spare, not the original device. 1082300906Sasomers */ 1083300906Sasomers oldstr = replaced.GUIDString(); 1084300906Sasomers syslog(LOG_INFO, "CaseFile::Replace(%s->%s): sparing " 1085300906Sasomers "broken spare %s instead", VdevGUIDString().c_str(), 1086300906Sasomers path, oldstr.c_str()); 1087300906Sasomers } 1088300906Sasomers 1089300906Sasomers /* 1090300906Sasomers * Build a root vdev/leaf vdev configuration suitable for 1091300906Sasomers * zpool_vdev_attach. Only enough data for the kernel to find 1092300906Sasomers * the device (i.e. type and disk device node path) are needed. 1093300906Sasomers */ 1094300906Sasomers nvroot = NULL; 1095300906Sasomers newvd = NULL; 1096300906Sasomers 1097300906Sasomers if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0 1098300906Sasomers || nvlist_alloc(&newvd, NV_UNIQUE_NAME, 0) != 0) { 1099300906Sasomers syslog(LOG_ERR, "Replace vdev(%s/%s): Unable to allocate " 1100300906Sasomers "configuration data.", poolname, oldstr.c_str()); 1101300906Sasomers if (nvroot != NULL) 1102300906Sasomers nvlist_free(nvroot); 1103300906Sasomers return (false); 1104300906Sasomers } 1105300906Sasomers if (nvlist_add_string(newvd, ZPOOL_CONFIG_TYPE, vdev_type) != 0 1106300906Sasomers || nvlist_add_string(newvd, ZPOOL_CONFIG_PATH, path) != 0 1107300906Sasomers || nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) != 0 1108300906Sasomers || nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, 1109300906Sasomers &newvd, 1) != 0) { 1110300906Sasomers syslog(LOG_ERR, "Replace vdev(%s/%s): Unable to initialize " 1111300906Sasomers "configuration data.", poolname, oldstr.c_str()); 1112300906Sasomers nvlist_free(newvd); 1113300906Sasomers nvlist_free(nvroot); 1114300906Sasomers return (true); 1115300906Sasomers } 1116300906Sasomers 1117300906Sasomers /* Data was copied when added to the root vdev. */ 1118300906Sasomers nvlist_free(newvd); 1119300906Sasomers 1120300906Sasomers retval = (zpool_vdev_attach(zhp, oldstr.c_str(), path, nvroot, 1121300906Sasomers /*replace*/B_TRUE) == 0); 1122300906Sasomers if (retval) 1123300906Sasomers syslog(LOG_INFO, "Replacing vdev(%s/%s) with %s\n", 1124300906Sasomers poolname, oldstr.c_str(), path); 1125300906Sasomers else 1126300906Sasomers syslog(LOG_ERR, "Replace vdev(%s/%s): %s: %s\n", 1127300906Sasomers poolname, oldstr.c_str(), libzfs_error_action(g_zfsHandle), 1128300906Sasomers libzfs_error_description(g_zfsHandle)); 1129300906Sasomers nvlist_free(nvroot); 1130300906Sasomers 1131300906Sasomers return (retval); 1132300906Sasomers} 1133300906Sasomers 1134300906Sasomers/* Does the argument event refer to a checksum error? */ 1135300906Sasomersstatic bool 1136300906SasomersIsChecksumEvent(const Event* const event) 1137300906Sasomers{ 1138300906Sasomers return ("ereport.fs.zfs.checksum" == event->Value("type")); 1139300906Sasomers} 1140300906Sasomers 1141300906Sasomers/* Does the argument event refer to an IO error? */ 1142300906Sasomersstatic bool 1143300906SasomersIsIOEvent(const Event* const event) 1144300906Sasomers{ 1145300906Sasomers return ("ereport.fs.zfs.io" == event->Value("type")); 1146300906Sasomers} 1147300906Sasomers 1148300906Sasomersbool 1149300906SasomersCaseFile::ShouldDegrade() const 1150300906Sasomers{ 1151300906Sasomers return (std::count_if(m_events.begin(), m_events.end(), 1152300906Sasomers IsChecksumEvent) > ZFS_DEGRADE_IO_COUNT); 1153300906Sasomers} 1154300906Sasomers 1155300906Sasomersbool 1156300906SasomersCaseFile::ShouldFault() const 1157300906Sasomers{ 1158300906Sasomers return (std::count_if(m_events.begin(), m_events.end(), 1159300906Sasomers IsIOEvent) > ZFS_DEGRADE_IO_COUNT); 1160300906Sasomers} 1161300906Sasomers 1162300906Sasomersnvlist_t * 1163300906SasomersCaseFile::CaseVdev(zpool_handle_t *zhp) const 1164300906Sasomers{ 1165300906Sasomers return (VdevIterator(zhp).Find(VdevGUID())); 1166300906Sasomers} 1167