1300906Sasomers/*-
2300906Sasomers * Copyright (c) 2011, 2012, 2013, 2014, 2016 Spectra Logic Corporation
3300906Sasomers * All rights reserved.
4300906Sasomers *
5300906Sasomers * Redistribution and use in source and binary forms, with or without
6300906Sasomers * modification, are permitted provided that the following conditions
7300906Sasomers * are met:
8300906Sasomers * 1. Redistributions of source code must retain the above copyright
9300906Sasomers *    notice, this list of conditions, and the following disclaimer,
10300906Sasomers *    without modification.
11300906Sasomers * 2. Redistributions in binary form must reproduce at minimum a disclaimer
12300906Sasomers *    substantially similar to the "NO WARRANTY" disclaimer below
13300906Sasomers *    ("Disclaimer") and any redistribution must be conditioned upon
14300906Sasomers *    including a substantially similar Disclaimer requirement for further
15300906Sasomers *    binary redistribution.
16300906Sasomers *
17300906Sasomers * NO WARRANTY
18300906Sasomers * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19300906Sasomers * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20300906Sasomers * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
21300906Sasomers * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22300906Sasomers * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23300906Sasomers * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24300906Sasomers * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25300906Sasomers * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
26300906Sasomers * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
27300906Sasomers * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28300906Sasomers * POSSIBILITY OF SUCH DAMAGES.
29300906Sasomers *
30300906Sasomers * Authors: Justin T. Gibbs     (Spectra Logic Corporation)
31300906Sasomers */
32300906Sasomers
33300906Sasomers/**
34300906Sasomers * \file case_file.cc
35300906Sasomers *
36300906Sasomers * We keep case files for any leaf vdev that is not in the optimal state.
37300906Sasomers * However, we only serialize to disk those events that need to be preserved
38300906Sasomers * across reboots.  For now, this is just a log of soft errors which we
39300906Sasomers * accumulate in order to mark a device as degraded.
40300906Sasomers */
41300906Sasomers#include <sys/cdefs.h>
42300906Sasomers#include <sys/time.h>
43300906Sasomers
44300906Sasomers#include <sys/fs/zfs.h>
45300906Sasomers
46300906Sasomers#include <dirent.h>
47300906Sasomers#include <iomanip>
48300906Sasomers#include <fstream>
49300906Sasomers#include <functional>
50300906Sasomers#include <sstream>
51300906Sasomers#include <syslog.h>
52300906Sasomers#include <unistd.h>
53300906Sasomers
54300906Sasomers#include <libzfs.h>
55300906Sasomers
56300906Sasomers#include <list>
57300906Sasomers#include <map>
58300906Sasomers#include <string>
59300906Sasomers
60300906Sasomers#include <devdctl/guid.h>
61300906Sasomers#include <devdctl/event.h>
62300906Sasomers#include <devdctl/event_factory.h>
63300906Sasomers#include <devdctl/exception.h>
64300906Sasomers#include <devdctl/consumer.h>
65300906Sasomers
66300906Sasomers#include "callout.h"
67300906Sasomers#include "vdev_iterator.h"
68300906Sasomers#include "zfsd_event.h"
69300906Sasomers#include "case_file.h"
70300906Sasomers#include "vdev.h"
71300906Sasomers#include "zfsd.h"
72300906Sasomers#include "zfsd_exception.h"
73300906Sasomers#include "zpool_list.h"
74300906Sasomers
75300906Sasomers__FBSDID("$FreeBSD: stable/11/cddl/usr.sbin/zfsd/case_file.cc 331395 2018-03-22 23:54:14Z mav $");
76300906Sasomers
77300906Sasomers/*============================ Namespace Control =============================*/
78300906Sasomersusing std::auto_ptr;
79300906Sasomersusing std::hex;
80300906Sasomersusing std::ifstream;
81300906Sasomersusing std::stringstream;
82300906Sasomersusing std::setfill;
83300906Sasomersusing std::setw;
84300906Sasomers
85300906Sasomersusing DevdCtl::Event;
86300906Sasomersusing DevdCtl::EventFactory;
87300906Sasomersusing DevdCtl::EventList;
88300906Sasomersusing DevdCtl::Guid;
89300906Sasomersusing DevdCtl::ParseException;
90300906Sasomers
91300906Sasomers/*--------------------------------- CaseFile ---------------------------------*/
92300906Sasomers//- CaseFile Static Data -------------------------------------------------------
93300906Sasomers
94300906SasomersCaseFileList  CaseFile::s_activeCases;
95300906Sasomersconst string  CaseFile::s_caseFilePath = "/var/db/zfsd/cases";
96300906Sasomersconst timeval CaseFile::s_removeGracePeriod = { 60 /*sec*/, 0 /*usec*/};
97300906Sasomers
98300906Sasomers//- CaseFile Static Public Methods ---------------------------------------------
99300906SasomersCaseFile *
100300906SasomersCaseFile::Find(Guid poolGUID, Guid vdevGUID)
101300906Sasomers{
102300906Sasomers	for (CaseFileList::iterator curCase = s_activeCases.begin();
103300906Sasomers	     curCase != s_activeCases.end(); curCase++) {
104300906Sasomers
105326321Sasomers		if (((*curCase)->PoolGUID() != poolGUID
106326321Sasomers		  && Guid::InvalidGuid() != poolGUID)
107300906Sasomers		 || (*curCase)->VdevGUID() != vdevGUID)
108300906Sasomers			continue;
109300906Sasomers
110300906Sasomers		/*
111300906Sasomers		 * We only carry one active case per-vdev.
112300906Sasomers		 */
113300906Sasomers		return (*curCase);
114300906Sasomers	}
115300906Sasomers	return (NULL);
116300906Sasomers}
117300906Sasomers
118300906SasomersCaseFile *
119300906SasomersCaseFile::Find(const string &physPath)
120300906Sasomers{
121300906Sasomers	CaseFile *result = NULL;
122300906Sasomers
123300906Sasomers	for (CaseFileList::iterator curCase = s_activeCases.begin();
124300906Sasomers	     curCase != s_activeCases.end(); curCase++) {
125300906Sasomers
126300906Sasomers		if ((*curCase)->PhysicalPath() != physPath)
127300906Sasomers			continue;
128300906Sasomers
129300906Sasomers		if (result != NULL) {
130300906Sasomers			syslog(LOG_WARNING, "Multiple casefiles found for "
131300906Sasomers			    "physical path %s.  "
132300906Sasomers			    "This is most likely a bug in zfsd",
133300906Sasomers			    physPath.c_str());
134300906Sasomers		}
135300906Sasomers		result = *curCase;
136300906Sasomers	}
137300906Sasomers	return (result);
138300906Sasomers}
139300906Sasomers
140300906Sasomers
141300906Sasomersvoid
142300906SasomersCaseFile::ReEvaluateByGuid(Guid poolGUID, const ZfsEvent &event)
143300906Sasomers{
144300906Sasomers	CaseFileList::iterator casefile;
145300906Sasomers	for (casefile = s_activeCases.begin(); casefile != s_activeCases.end();){
146300906Sasomers		CaseFileList::iterator next = casefile;
147300906Sasomers		next++;
148300906Sasomers		if (poolGUID == (*casefile)->PoolGUID())
149300906Sasomers			(*casefile)->ReEvaluate(event);
150300906Sasomers		casefile = next;
151300906Sasomers	}
152300906Sasomers}
153300906Sasomers
154300906SasomersCaseFile &
155300906SasomersCaseFile::Create(Vdev &vdev)
156300906Sasomers{
157300906Sasomers	CaseFile *activeCase;
158300906Sasomers
159300906Sasomers	activeCase = Find(vdev.PoolGUID(), vdev.GUID());
160300906Sasomers	if (activeCase == NULL)
161300906Sasomers		activeCase = new CaseFile(vdev);
162300906Sasomers
163300906Sasomers	return (*activeCase);
164300906Sasomers}
165300906Sasomers
166300906Sasomersvoid
167300906SasomersCaseFile::DeSerialize()
168300906Sasomers{
169300906Sasomers	struct dirent **caseFiles;
170300906Sasomers
171300906Sasomers	int numCaseFiles(scandir(s_caseFilePath.c_str(), &caseFiles,
172300906Sasomers			 DeSerializeSelector, /*compar*/NULL));
173300906Sasomers
174300906Sasomers	if (numCaseFiles == -1)
175300906Sasomers		return;
176300906Sasomers	if (numCaseFiles == 0) {
177300906Sasomers		free(caseFiles);
178300906Sasomers		return;
179300906Sasomers	}
180300906Sasomers
181300906Sasomers	for (int i = 0; i < numCaseFiles; i++) {
182300906Sasomers
183300906Sasomers		DeSerializeFile(caseFiles[i]->d_name);
184300906Sasomers		free(caseFiles[i]);
185300906Sasomers	}
186300906Sasomers	free(caseFiles);
187300906Sasomers}
188300906Sasomers
189330733Sasomersbool
190330733SasomersCaseFile::Empty()
191330733Sasomers{
192330733Sasomers	return (s_activeCases.empty());
193330733Sasomers}
194330733Sasomers
195300906Sasomersvoid
196300906SasomersCaseFile::LogAll()
197300906Sasomers{
198300906Sasomers	for (CaseFileList::iterator curCase = s_activeCases.begin();
199300906Sasomers	     curCase != s_activeCases.end(); curCase++)
200300906Sasomers		(*curCase)->Log();
201300906Sasomers}
202300906Sasomers
203300906Sasomersvoid
204300906SasomersCaseFile::PurgeAll()
205300906Sasomers{
206300906Sasomers	/*
207300906Sasomers	 * Serialize casefiles before deleting them so that they can be reread
208300906Sasomers	 * and revalidated during BuildCaseFiles.
209300906Sasomers	 * CaseFiles remove themselves from this list on destruction.
210300906Sasomers	 */
211300906Sasomers	while (s_activeCases.size() != 0) {
212300906Sasomers		CaseFile *casefile = s_activeCases.front();
213300906Sasomers		casefile->Serialize();
214300906Sasomers		delete casefile;
215300906Sasomers	}
216300906Sasomers
217300906Sasomers}
218300906Sasomers
219300906Sasomers//- CaseFile Public Methods ----------------------------------------------------
220300906Sasomersbool
221300906SasomersCaseFile::RefreshVdevState()
222300906Sasomers{
223300906Sasomers	ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
224300906Sasomers	zpool_handle_t *casePool(zpl.empty() ? NULL : zpl.front());
225300906Sasomers	if (casePool == NULL)
226300906Sasomers		return (false);
227300906Sasomers
228300906Sasomers	Vdev vd(casePool, CaseVdev(casePool));
229300906Sasomers	if (vd.DoesNotExist())
230300906Sasomers		return (false);
231300906Sasomers
232300906Sasomers	m_vdevState    = vd.State();
233300906Sasomers	m_vdevPhysPath = vd.PhysicalPath();
234300906Sasomers	return (true);
235300906Sasomers}
236300906Sasomers
237300906Sasomersbool
238300906SasomersCaseFile::ReEvaluate(const string &devPath, const string &physPath, Vdev *vdev)
239300906Sasomers{
240300906Sasomers	ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
241300906Sasomers	zpool_handle_t *pool(zpl.empty() ? NULL : zpl.front());
242331395Smav	zpool_boot_label_t boot_type;
243331395Smav	uint64_t boot_size;
244300906Sasomers
245300906Sasomers	if (pool == NULL || !RefreshVdevState()) {
246300906Sasomers		/*
247300906Sasomers		 * The pool or vdev for this case file is no longer
248300906Sasomers		 * part of the configuration.  This can happen
249300906Sasomers		 * if we process a device arrival notification
250300906Sasomers		 * before seeing the ZFS configuration change
251300906Sasomers		 * event.
252300906Sasomers		 */
253300906Sasomers		syslog(LOG_INFO,
254300906Sasomers		       "CaseFile::ReEvaluate(%s,%s) Pool/Vdev unconfigured.  "
255300906Sasomers		       "Closing\n",
256300906Sasomers		       PoolGUIDString().c_str(),
257300906Sasomers		       VdevGUIDString().c_str());
258300906Sasomers		Close();
259300906Sasomers
260300906Sasomers		/*
261300906Sasomers		 * Since this event was not used to close this
262300906Sasomers		 * case, do not report it as consumed.
263300906Sasomers		 */
264300906Sasomers		return (/*consumed*/false);
265300906Sasomers	}
266300906Sasomers
267300906Sasomers	if (VdevState() > VDEV_STATE_CANT_OPEN) {
268300906Sasomers		/*
269300906Sasomers		 * For now, newly discovered devices only help for
270300906Sasomers		 * devices that are missing.  In the future, we might
271300906Sasomers		 * use a newly inserted spare to replace a degraded
272300906Sasomers		 * or faulted device.
273300906Sasomers		 */
274300906Sasomers		syslog(LOG_INFO, "CaseFile::ReEvaluate(%s,%s): Pool/Vdev ignored",
275300906Sasomers		    PoolGUIDString().c_str(), VdevGUIDString().c_str());
276300906Sasomers		return (/*consumed*/false);
277300906Sasomers	}
278300906Sasomers
279300906Sasomers	if (vdev != NULL
280326321Sasomers	 && ( vdev->PoolGUID() == m_poolGUID
281326321Sasomers	   || vdev->PoolGUID() == Guid::InvalidGuid())
282300906Sasomers	 && vdev->GUID() == m_vdevGUID) {
283300906Sasomers
284300906Sasomers		zpool_vdev_online(pool, vdev->GUIDString().c_str(),
285300906Sasomers				  ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE,
286300906Sasomers				  &m_vdevState);
287300906Sasomers		syslog(LOG_INFO, "Onlined vdev(%s/%s:%s).  State now %s.\n",
288300906Sasomers		       zpool_get_name(pool), vdev->GUIDString().c_str(),
289300906Sasomers		       devPath.c_str(),
290300906Sasomers		       zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
291300906Sasomers
292300906Sasomers		/*
293300906Sasomers		 * Check the vdev state post the online action to see
294300906Sasomers		 * if we can retire this case.
295300906Sasomers		 */
296300906Sasomers		CloseIfSolved();
297300906Sasomers
298300906Sasomers		return (/*consumed*/true);
299300906Sasomers	}
300300906Sasomers
301300906Sasomers	/*
302300906Sasomers	 * If the auto-replace policy is enabled, and we have physical
303300906Sasomers	 * path information, try a physical path replacement.
304300906Sasomers	 */
305300906Sasomers	if (zpool_get_prop_int(pool, ZPOOL_PROP_AUTOREPLACE, NULL) == 0) {
306300906Sasomers		syslog(LOG_INFO,
307300906Sasomers		       "CaseFile(%s:%s:%s): AutoReplace not set.  "
308300906Sasomers		       "Ignoring device insertion.\n",
309300906Sasomers		       PoolGUIDString().c_str(),
310300906Sasomers		       VdevGUIDString().c_str(),
311300906Sasomers		       zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
312300906Sasomers		return (/*consumed*/false);
313300906Sasomers	}
314300906Sasomers
315300906Sasomers	if (PhysicalPath().empty()) {
316300906Sasomers		syslog(LOG_INFO,
317300906Sasomers		       "CaseFile(%s:%s:%s): No physical path information.  "
318300906Sasomers		       "Ignoring device insertion.\n",
319300906Sasomers		       PoolGUIDString().c_str(),
320300906Sasomers		       VdevGUIDString().c_str(),
321300906Sasomers		       zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
322300906Sasomers		return (/*consumed*/false);
323300906Sasomers	}
324300906Sasomers
325300906Sasomers	if (physPath != PhysicalPath()) {
326300906Sasomers		syslog(LOG_INFO,
327300906Sasomers		       "CaseFile(%s:%s:%s): Physical path mismatch.  "
328300906Sasomers		       "Ignoring device insertion.\n",
329300906Sasomers		       PoolGUIDString().c_str(),
330300906Sasomers		       VdevGUIDString().c_str(),
331300906Sasomers		       zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
332300906Sasomers		return (/*consumed*/false);
333300906Sasomers	}
334300906Sasomers
335300906Sasomers	/* Write a label on the newly inserted disk. */
336331395Smav	if (zpool_is_bootable(pool))
337331395Smav		boot_type = ZPOOL_COPY_BOOT_LABEL;
338331395Smav	else
339331395Smav		boot_type = ZPOOL_NO_BOOT_LABEL;
340331395Smav	boot_size = zpool_get_prop_int(pool, ZPOOL_PROP_BOOTSIZE, NULL);
341331395Smav	if (zpool_label_disk(g_zfsHandle, pool, devPath.c_str(),
342331395Smav	    boot_type, boot_size, NULL) != 0) {
343300906Sasomers		syslog(LOG_ERR,
344300906Sasomers		       "Replace vdev(%s/%s) by physical path (label): %s: %s\n",
345300906Sasomers		       zpool_get_name(pool), VdevGUIDString().c_str(),
346300906Sasomers		       libzfs_error_action(g_zfsHandle),
347300906Sasomers		       libzfs_error_description(g_zfsHandle));
348300906Sasomers		return (/*consumed*/false);
349300906Sasomers	}
350300906Sasomers
351300906Sasomers	syslog(LOG_INFO, "CaseFile::ReEvaluate(%s/%s): Replacing with %s",
352300906Sasomers	    PoolGUIDString().c_str(), VdevGUIDString().c_str(),
353300906Sasomers	    devPath.c_str());
354300906Sasomers	return (Replace(VDEV_TYPE_DISK, devPath.c_str(), /*isspare*/false));
355300906Sasomers}
356300906Sasomers
357300906Sasomersbool
358300906SasomersCaseFile::ReEvaluate(const ZfsEvent &event)
359300906Sasomers{
360300906Sasomers	bool consumed(false);
361300906Sasomers
362300906Sasomers	if (event.Value("type") == "misc.fs.zfs.vdev_remove") {
363300906Sasomers		/*
364300906Sasomers		 * The Vdev we represent has been removed from the
365300906Sasomers		 * configuration.  This case is no longer of value.
366300906Sasomers		 */
367300906Sasomers		Close();
368300906Sasomers
369300906Sasomers		return (/*consumed*/true);
370300906Sasomers	} else if (event.Value("type") == "misc.fs.zfs.pool_destroy") {
371300906Sasomers		/* This Pool has been destroyed.  Discard the case */
372300906Sasomers		Close();
373300906Sasomers
374300906Sasomers		return (/*consumed*/true);
375300906Sasomers	} else if (event.Value("type") == "misc.fs.zfs.config_sync") {
376300906Sasomers		RefreshVdevState();
377300906Sasomers		if (VdevState() < VDEV_STATE_HEALTHY)
378300906Sasomers			consumed = ActivateSpare();
379300906Sasomers	}
380300906Sasomers
381300906Sasomers
382300906Sasomers	if (event.Value("class") == "resource.fs.zfs.removed") {
383300906Sasomers		bool spare_activated;
384300906Sasomers
385300906Sasomers		if (!RefreshVdevState()) {
386300906Sasomers			/*
387300906Sasomers			 * The pool or vdev for this case file is no longer
388300906Sasomers			 * part of the configuration.  This can happen
389300906Sasomers			 * if we process a device arrival notification
390300906Sasomers			 * before seeing the ZFS configuration change
391300906Sasomers			 * event.
392300906Sasomers			 */
393300906Sasomers			syslog(LOG_INFO,
394300906Sasomers			       "CaseFile::ReEvaluate(%s,%s) Pool/Vdev "
395300906Sasomers			       "unconfigured.  Closing\n",
396300906Sasomers			       PoolGUIDString().c_str(),
397300906Sasomers			       VdevGUIDString().c_str());
398300906Sasomers			/*
399300906Sasomers			 * Close the case now so we won't waste cycles in the
400300906Sasomers			 * system rescan
401300906Sasomers			 */
402300906Sasomers			Close();
403300906Sasomers
404300906Sasomers			/*
405300906Sasomers			 * Since this event was not used to close this
406300906Sasomers			 * case, do not report it as consumed.
407300906Sasomers			 */
408300906Sasomers			return (/*consumed*/false);
409300906Sasomers		}
410300906Sasomers
411300906Sasomers		/*
412300906Sasomers		 * Discard any tentative I/O error events for
413300906Sasomers		 * this case.  They were most likely caused by the
414300906Sasomers		 * hot-unplug of this device.
415300906Sasomers		 */
416300906Sasomers		PurgeTentativeEvents();
417300906Sasomers
418300906Sasomers		/* Try to activate spares if they are available */
419300906Sasomers		spare_activated = ActivateSpare();
420300906Sasomers
421300906Sasomers		/*
422300906Sasomers		 * Rescan the drives in the system to see if a recent
423300906Sasomers		 * drive arrival can be used to solve this case.
424300906Sasomers		 */
425300906Sasomers		ZfsDaemon::RequestSystemRescan();
426300906Sasomers
427300906Sasomers		/*
428300906Sasomers		 * Consume the event if we successfully activated a spare.
429300906Sasomers		 * Otherwise, leave it in the unconsumed events list so that the
430300906Sasomers		 * future addition of a spare to this pool might be able to
431300906Sasomers		 * close the case
432300906Sasomers		 */
433300906Sasomers		consumed = spare_activated;
434300906Sasomers	} else if (event.Value("class") == "resource.fs.zfs.statechange") {
435300906Sasomers		RefreshVdevState();
436300906Sasomers		/*
437300906Sasomers		 * If this vdev is DEGRADED, FAULTED, or UNAVAIL, try to
438300906Sasomers		 * activate a hotspare.  Otherwise, ignore the event
439300906Sasomers		 */
440300906Sasomers		if (VdevState() == VDEV_STATE_FAULTED ||
441300906Sasomers		    VdevState() == VDEV_STATE_DEGRADED ||
442300906Sasomers		    VdevState() == VDEV_STATE_CANT_OPEN)
443300906Sasomers			(void) ActivateSpare();
444300906Sasomers		consumed = true;
445300906Sasomers	}
446300906Sasomers	else if (event.Value("class") == "ereport.fs.zfs.io" ||
447300906Sasomers	         event.Value("class") == "ereport.fs.zfs.checksum") {
448300906Sasomers
449300906Sasomers		m_tentativeEvents.push_front(event.DeepCopy());
450300906Sasomers		RegisterCallout(event);
451300906Sasomers		consumed = true;
452300906Sasomers	}
453300906Sasomers
454300906Sasomers	bool closed(CloseIfSolved());
455300906Sasomers
456300906Sasomers	return (consumed || closed);
457300906Sasomers}
458300906Sasomers
459329792Sasomers/* Find a Vdev containing the vdev with the given GUID */
460329792Sasomersstatic nvlist_t*
461329792Sasomersfind_parent(nvlist_t *pool_config, nvlist_t *config, DevdCtl::Guid child_guid)
462329792Sasomers{
463329792Sasomers	nvlist_t **vdevChildren;
464329792Sasomers	int        error;
465329792Sasomers	unsigned   ch, numChildren;
466300906Sasomers
467329792Sasomers	error = nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_CHILDREN,
468329792Sasomers					   &vdevChildren, &numChildren);
469329792Sasomers
470329792Sasomers	if (error != 0 || numChildren == 0)
471329792Sasomers		return (NULL);
472329792Sasomers
473329792Sasomers	for (ch = 0; ch < numChildren; ch++) {
474329792Sasomers		nvlist *result;
475329792Sasomers		Vdev vdev(pool_config, vdevChildren[ch]);
476329792Sasomers
477329792Sasomers		if (vdev.GUID() == child_guid)
478329792Sasomers			return (config);
479329792Sasomers
480329792Sasomers		result = find_parent(pool_config, vdevChildren[ch], child_guid);
481329792Sasomers		if (result != NULL)
482329792Sasomers			return (result);
483329792Sasomers	}
484329792Sasomers
485329792Sasomers	return (NULL);
486329792Sasomers}
487329792Sasomers
488300906Sasomersbool
489300906SasomersCaseFile::ActivateSpare() {
490329792Sasomers	nvlist_t	*config, *nvroot, *parent_config;
491300906Sasomers	nvlist_t       **spares;
492300906Sasomers	char		*devPath, *vdev_type;
493300906Sasomers	const char	*poolname;
494300906Sasomers	u_int		 nspares, i;
495300906Sasomers	int		 error;
496300906Sasomers
497300906Sasomers	ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
498300906Sasomers	zpool_handle_t	*zhp(zpl.empty() ? NULL : zpl.front());
499300906Sasomers	if (zhp == NULL) {
500300906Sasomers		syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool "
501300919Sbdrewery		       "for pool_guid %" PRIu64".", (uint64_t)m_poolGUID);
502300906Sasomers		return (false);
503300906Sasomers	}
504300906Sasomers	poolname = zpool_get_name(zhp);
505300906Sasomers	config = zpool_get_config(zhp, NULL);
506300906Sasomers	if (config == NULL) {
507300906Sasomers		syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool "
508300906Sasomers		       "config for pool %s", poolname);
509300906Sasomers		return (false);
510300906Sasomers	}
511300906Sasomers	error = nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot);
512300906Sasomers	if (error != 0){
513300906Sasomers		syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find vdev "
514300906Sasomers		       "tree for pool %s", poolname);
515300906Sasomers		return (false);
516300906Sasomers	}
517329792Sasomers
518329792Sasomers	parent_config = find_parent(config, nvroot, m_vdevGUID);
519329792Sasomers	if (parent_config != NULL) {
520329792Sasomers		char *parent_type;
521329792Sasomers
522329792Sasomers		/*
523329792Sasomers		 * Don't activate spares for members of a "replacing" vdev.
524329792Sasomers		 * They're already dealt with.  Sparing them will just drag out
525329792Sasomers		 * the resilver process.
526329792Sasomers		 */
527329792Sasomers		error = nvlist_lookup_string(parent_config,
528329792Sasomers		    ZPOOL_CONFIG_TYPE, &parent_type);
529329792Sasomers		if (error == 0 && strcmp(parent_type, VDEV_TYPE_REPLACING) == 0)
530329792Sasomers			return (false);
531329792Sasomers	}
532329792Sasomers
533300906Sasomers	nspares = 0;
534300906Sasomers	nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares,
535300906Sasomers				   &nspares);
536300906Sasomers	if (nspares == 0) {
537300906Sasomers		/* The pool has no spares configured */
538300906Sasomers		syslog(LOG_INFO, "CaseFile::ActivateSpare: "
539300906Sasomers		       "No spares available for pool %s", poolname);
540300906Sasomers		return (false);
541300906Sasomers	}
542300906Sasomers	for (i = 0; i < nspares; i++) {
543300906Sasomers		uint64_t    *nvlist_array;
544300906Sasomers		vdev_stat_t *vs;
545300906Sasomers		uint_t	     nstats;
546300906Sasomers
547300906Sasomers		if (nvlist_lookup_uint64_array(spares[i],
548300906Sasomers		    ZPOOL_CONFIG_VDEV_STATS, &nvlist_array, &nstats) != 0) {
549300906Sasomers			syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not "
550300906Sasomers			       "find vdev stats for pool %s, spare %d",
551300906Sasomers			       poolname, i);
552300906Sasomers			return (false);
553300906Sasomers		}
554300906Sasomers		vs = reinterpret_cast<vdev_stat_t *>(nvlist_array);
555300906Sasomers
556300906Sasomers		if ((vs->vs_aux != VDEV_AUX_SPARED)
557300906Sasomers		 && (vs->vs_state == VDEV_STATE_HEALTHY)) {
558300906Sasomers			/* We found a usable spare */
559300906Sasomers			break;
560300906Sasomers		}
561300906Sasomers	}
562300906Sasomers
563300906Sasomers	if (i == nspares) {
564300906Sasomers		/* No available spares were found */
565300906Sasomers		return (false);
566300906Sasomers	}
567300906Sasomers
568300906Sasomers	error = nvlist_lookup_string(spares[i], ZPOOL_CONFIG_PATH, &devPath);
569300906Sasomers	if (error != 0) {
570300906Sasomers		syslog(LOG_ERR, "CaseFile::ActivateSpare: Cannot determine "
571300906Sasomers		       "the path of pool %s, spare %d. Error %d",
572300906Sasomers		       poolname, i, error);
573300906Sasomers		return (false);
574300906Sasomers	}
575300906Sasomers
576300906Sasomers	error = nvlist_lookup_string(spares[i], ZPOOL_CONFIG_TYPE, &vdev_type);
577300906Sasomers	if (error != 0) {
578300906Sasomers		syslog(LOG_ERR, "CaseFile::ActivateSpare: Cannot determine "
579300906Sasomers		       "the vdev type of pool %s, spare %d. Error %d",
580300906Sasomers		       poolname, i, error);
581300906Sasomers		return (false);
582300906Sasomers	}
583300906Sasomers
584300906Sasomers	return (Replace(vdev_type, devPath, /*isspare*/true));
585300906Sasomers}
586300906Sasomers
587300906Sasomersvoid
588300906SasomersCaseFile::RegisterCallout(const Event &event)
589300906Sasomers{
590300906Sasomers	timeval now, countdown, elapsed, timestamp, zero, remaining;
591300906Sasomers
592300906Sasomers	gettimeofday(&now, 0);
593300906Sasomers	timestamp = event.GetTimestamp();
594300906Sasomers	timersub(&now, &timestamp, &elapsed);
595300906Sasomers	timersub(&s_removeGracePeriod, &elapsed, &countdown);
596300906Sasomers	/*
597300906Sasomers	 * If countdown is <= zero, Reset the timer to the
598300906Sasomers	 * smallest positive time value instead
599300906Sasomers	 */
600300906Sasomers	timerclear(&zero);
601300906Sasomers	if (timercmp(&countdown, &zero, <=)) {
602300906Sasomers		timerclear(&countdown);
603300906Sasomers		countdown.tv_usec = 1;
604300906Sasomers	}
605300906Sasomers
606300906Sasomers	remaining = m_tentativeTimer.TimeRemaining();
607300906Sasomers
608300906Sasomers	if (!m_tentativeTimer.IsPending()
609300906Sasomers	 || timercmp(&countdown, &remaining, <))
610300906Sasomers		m_tentativeTimer.Reset(countdown, OnGracePeriodEnded, this);
611300906Sasomers}
612300906Sasomers
613300906Sasomers
614300906Sasomersbool
615300906SasomersCaseFile::CloseIfSolved()
616300906Sasomers{
617300906Sasomers	if (m_events.empty()
618300906Sasomers	 && m_tentativeEvents.empty()) {
619300906Sasomers
620300906Sasomers		/*
621300906Sasomers		 * We currently do not track or take actions on
622300906Sasomers		 * devices in the degraded or faulted state.
623300906Sasomers		 * Once we have support for spare pools, we'll
624300906Sasomers		 * retain these cases so that any spares added in
625300906Sasomers		 * the future can be applied to them.
626300906Sasomers		 */
627300906Sasomers		switch (VdevState()) {
628300906Sasomers		case VDEV_STATE_HEALTHY:
629300906Sasomers			/* No need to keep cases for healthy vdevs */
630300906Sasomers			Close();
631300906Sasomers			return (true);
632300906Sasomers		case VDEV_STATE_REMOVED:
633300906Sasomers		case VDEV_STATE_CANT_OPEN:
634300906Sasomers			/*
635300906Sasomers			 * Keep open.  We may solve it with a newly inserted
636300906Sasomers			 * device.
637300906Sasomers			 */
638300906Sasomers		case VDEV_STATE_FAULTED:
639300906Sasomers		case VDEV_STATE_DEGRADED:
640300906Sasomers			/*
641300906Sasomers			 * Keep open.  We may solve it with the future
642300906Sasomers			 * addition of a spare to the pool
643300906Sasomers			 */
644300906Sasomers		case VDEV_STATE_UNKNOWN:
645300906Sasomers		case VDEV_STATE_CLOSED:
646300906Sasomers		case VDEV_STATE_OFFLINE:
647300906Sasomers			/*
648300906Sasomers			 * Keep open?  This may not be the correct behavior,
649300906Sasomers			 * but it's what we've always done
650300906Sasomers			 */
651300906Sasomers			;
652300906Sasomers		}
653300906Sasomers
654300906Sasomers		/*
655300906Sasomers		 * Re-serialize the case in order to remove any
656300906Sasomers		 * previous event data.
657300906Sasomers		 */
658300906Sasomers		Serialize();
659300906Sasomers	}
660300906Sasomers
661300906Sasomers	return (false);
662300906Sasomers}
663300906Sasomers
664300906Sasomersvoid
665300906SasomersCaseFile::Log()
666300906Sasomers{
667300906Sasomers	syslog(LOG_INFO, "CaseFile(%s,%s,%s)\n", PoolGUIDString().c_str(),
668300906Sasomers	       VdevGUIDString().c_str(), PhysicalPath().c_str());
669300906Sasomers	syslog(LOG_INFO, "\tVdev State = %s\n",
670300906Sasomers	       zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
671300906Sasomers	if (m_tentativeEvents.size() != 0) {
672300906Sasomers		syslog(LOG_INFO, "\t=== Tentative Events ===\n");
673300906Sasomers		for (EventList::iterator event(m_tentativeEvents.begin());
674300906Sasomers		     event != m_tentativeEvents.end(); event++)
675300906Sasomers			(*event)->Log(LOG_INFO);
676300906Sasomers	}
677300906Sasomers	if (m_events.size() != 0) {
678300906Sasomers		syslog(LOG_INFO, "\t=== Events ===\n");
679300906Sasomers		for (EventList::iterator event(m_events.begin());
680300906Sasomers		     event != m_events.end(); event++)
681300906Sasomers			(*event)->Log(LOG_INFO);
682300906Sasomers	}
683300906Sasomers}
684300906Sasomers
685300906Sasomers//- CaseFile Static Protected Methods ------------------------------------------
686300906Sasomersvoid
687300906SasomersCaseFile::OnGracePeriodEnded(void *arg)
688300906Sasomers{
689300906Sasomers	CaseFile &casefile(*static_cast<CaseFile *>(arg));
690300906Sasomers
691300906Sasomers	casefile.OnGracePeriodEnded();
692300906Sasomers}
693300906Sasomers
694300906Sasomersint
695300906SasomersCaseFile::DeSerializeSelector(const struct dirent *dirEntry)
696300906Sasomers{
697300906Sasomers	uint64_t poolGUID;
698300906Sasomers	uint64_t vdevGUID;
699300906Sasomers
700300906Sasomers	if (dirEntry->d_type == DT_REG
701300919Sbdrewery	 && sscanf(dirEntry->d_name, "pool_%" PRIu64 "_vdev_%" PRIu64 ".case",
702300906Sasomers		   &poolGUID, &vdevGUID) == 2)
703300906Sasomers		return (1);
704300906Sasomers	return (0);
705300906Sasomers}
706300906Sasomers
707300906Sasomersvoid
708300906SasomersCaseFile::DeSerializeFile(const char *fileName)
709300906Sasomers{
710300906Sasomers	string	  fullName(s_caseFilePath + '/' + fileName);
711300906Sasomers	CaseFile *existingCaseFile(NULL);
712300906Sasomers	CaseFile *caseFile(NULL);
713300906Sasomers
714300906Sasomers	try {
715300906Sasomers		uint64_t poolGUID;
716300906Sasomers		uint64_t vdevGUID;
717300906Sasomers		nvlist_t *vdevConf;
718300906Sasomers
719314431Sasomers		if (sscanf(fileName, "pool_%" PRIu64 "_vdev_%" PRIu64 ".case",
720314431Sasomers		       &poolGUID, &vdevGUID) != 2) {
721314431Sasomers			throw ZfsdException("CaseFile::DeSerialize: "
722314431Sasomers			    "Unintelligible CaseFile filename %s.\n", fileName);
723314431Sasomers		}
724300906Sasomers		existingCaseFile = Find(Guid(poolGUID), Guid(vdevGUID));
725300906Sasomers		if (existingCaseFile != NULL) {
726300906Sasomers			/*
727300906Sasomers			 * If the vdev is already degraded or faulted,
728300906Sasomers			 * there's no point in keeping the state around
729300906Sasomers			 * that we use to put a drive into the degraded
730300906Sasomers			 * state.  However, if the vdev is simply missing,
731300906Sasomers			 * preserve the case data in the hopes that it will
732300906Sasomers			 * return.
733300906Sasomers			 */
734300906Sasomers			caseFile = existingCaseFile;
735300906Sasomers			vdev_state curState(caseFile->VdevState());
736300906Sasomers			if (curState > VDEV_STATE_CANT_OPEN
737300906Sasomers			 && curState < VDEV_STATE_HEALTHY) {
738300906Sasomers				unlink(fileName);
739300906Sasomers				return;
740300906Sasomers			}
741300906Sasomers		} else {
742300906Sasomers			ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID);
743300906Sasomers			if (zpl.empty()
744300906Sasomers			 || (vdevConf = VdevIterator(zpl.front())
745300906Sasomers						    .Find(vdevGUID)) == NULL) {
746300906Sasomers				/*
747300906Sasomers				 * Either the pool no longer exists
748300906Sasomers				 * or this vdev is no longer a member of
749300906Sasomers				 * the pool.
750300906Sasomers				 */
751300906Sasomers				unlink(fullName.c_str());
752300906Sasomers				return;
753300906Sasomers			}
754300906Sasomers
755300906Sasomers			/*
756300906Sasomers			 * Any vdev we find that does not have a case file
757300906Sasomers			 * must be in the healthy state and thus worthy of
758300906Sasomers			 * continued SERD data tracking.
759300906Sasomers			 */
760300906Sasomers			caseFile = new CaseFile(Vdev(zpl.front(), vdevConf));
761300906Sasomers		}
762300906Sasomers
763300906Sasomers		ifstream caseStream(fullName.c_str());
764300906Sasomers		if (!caseStream)
765300906Sasomers			throw ZfsdException("CaseFile::DeSerialize: Unable to "
766300906Sasomers					    "read %s.\n", fileName);
767300906Sasomers
768300906Sasomers		caseFile->DeSerialize(caseStream);
769300906Sasomers	} catch (const ParseException &exp) {
770300906Sasomers
771300906Sasomers		exp.Log();
772300906Sasomers		if (caseFile != existingCaseFile)
773300906Sasomers			delete caseFile;
774300906Sasomers
775300906Sasomers		/*
776300906Sasomers		 * Since we can't parse the file, unlink it so we don't
777300906Sasomers		 * trip over it again.
778300906Sasomers		 */
779300906Sasomers		unlink(fileName);
780300906Sasomers	} catch (const ZfsdException &zfsException) {
781300906Sasomers
782300906Sasomers		zfsException.Log();
783300906Sasomers		if (caseFile != existingCaseFile)
784300906Sasomers			delete caseFile;
785300906Sasomers	}
786300906Sasomers}
787300906Sasomers
788300906Sasomers//- CaseFile Protected Methods -------------------------------------------------
789300906SasomersCaseFile::CaseFile(const Vdev &vdev)
790300906Sasomers : m_poolGUID(vdev.PoolGUID()),
791300906Sasomers   m_vdevGUID(vdev.GUID()),
792300906Sasomers   m_vdevState(vdev.State()),
793300906Sasomers   m_vdevPhysPath(vdev.PhysicalPath())
794300906Sasomers{
795300906Sasomers	stringstream guidString;
796300906Sasomers
797300906Sasomers	guidString << m_vdevGUID;
798300906Sasomers	m_vdevGUIDString = guidString.str();
799300906Sasomers	guidString.str("");
800300906Sasomers	guidString << m_poolGUID;
801300906Sasomers	m_poolGUIDString = guidString.str();
802300906Sasomers
803300906Sasomers	s_activeCases.push_back(this);
804300906Sasomers
805300906Sasomers	syslog(LOG_INFO, "Creating new CaseFile:\n");
806300906Sasomers	Log();
807300906Sasomers}
808300906Sasomers
809300906SasomersCaseFile::~CaseFile()
810300906Sasomers{
811300906Sasomers	PurgeEvents();
812300906Sasomers	PurgeTentativeEvents();
813300906Sasomers	m_tentativeTimer.Stop();
814300906Sasomers	s_activeCases.remove(this);
815300906Sasomers}
816300906Sasomers
817300906Sasomersvoid
818300906SasomersCaseFile::PurgeEvents()
819300906Sasomers{
820300906Sasomers	for (EventList::iterator event(m_events.begin());
821300906Sasomers	     event != m_events.end(); event++)
822300906Sasomers		delete *event;
823300906Sasomers
824300906Sasomers	m_events.clear();
825300906Sasomers}
826300906Sasomers
827300906Sasomersvoid
828300906SasomersCaseFile::PurgeTentativeEvents()
829300906Sasomers{
830300906Sasomers	for (EventList::iterator event(m_tentativeEvents.begin());
831300906Sasomers	     event != m_tentativeEvents.end(); event++)
832300906Sasomers		delete *event;
833300906Sasomers
834300906Sasomers	m_tentativeEvents.clear();
835300906Sasomers}
836300906Sasomers
837300906Sasomersvoid
838300906SasomersCaseFile::SerializeEvList(const EventList events, int fd,
839300906Sasomers		const char* prefix) const
840300906Sasomers{
841300906Sasomers	if (events.empty())
842300906Sasomers		return;
843300906Sasomers	for (EventList::const_iterator curEvent = events.begin();
844300906Sasomers	     curEvent != events.end(); curEvent++) {
845300906Sasomers		const string &eventString((*curEvent)->GetEventString());
846300906Sasomers
847300906Sasomers		// TODO: replace many write(2) calls with a single writev(2)
848300906Sasomers		if (prefix)
849300906Sasomers			write(fd, prefix, strlen(prefix));
850300906Sasomers		write(fd, eventString.c_str(), eventString.length());
851300906Sasomers	}
852300906Sasomers}
853300906Sasomers
854300906Sasomersvoid
855300906SasomersCaseFile::Serialize()
856300906Sasomers{
857300906Sasomers	stringstream saveFile;
858300906Sasomers
859300906Sasomers	saveFile << setfill('0')
860300906Sasomers		 << s_caseFilePath << "/"
861300906Sasomers		 << "pool_" << PoolGUIDString()
862300906Sasomers		 << "_vdev_" << VdevGUIDString()
863300906Sasomers		 << ".case";
864300906Sasomers
865300906Sasomers	if (m_events.empty() && m_tentativeEvents.empty()) {
866300906Sasomers		unlink(saveFile.str().c_str());
867300906Sasomers		return;
868300906Sasomers	}
869300906Sasomers
870300906Sasomers	int fd(open(saveFile.str().c_str(), O_CREAT|O_TRUNC|O_WRONLY, 0644));
871300906Sasomers	if (fd == -1) {
872300906Sasomers		syslog(LOG_ERR, "CaseFile::Serialize: Unable to open %s.\n",
873300906Sasomers		       saveFile.str().c_str());
874300906Sasomers		return;
875300906Sasomers	}
876300906Sasomers	SerializeEvList(m_events, fd);
877300906Sasomers	SerializeEvList(m_tentativeEvents, fd, "tentative ");
878300906Sasomers	close(fd);
879300906Sasomers}
880300906Sasomers
881300906Sasomers/*
882300906Sasomers * XXX: This method assumes that events may not contain embedded newlines.  If
883300906Sasomers * ever events can contain embedded newlines, then CaseFile must switch
884300906Sasomers * serialization formats
885300906Sasomers */
886300906Sasomersvoid
887300906SasomersCaseFile::DeSerialize(ifstream &caseStream)
888300906Sasomers{
889300906Sasomers	string	      evString;
890300906Sasomers	const EventFactory &factory(ZfsDaemon::Get().GetFactory());
891300906Sasomers
892300906Sasomers	caseStream >> std::noskipws >> std::ws;
893300906Sasomers	while (caseStream.good()) {
894300906Sasomers		/*
895300906Sasomers		 * Outline:
896300906Sasomers		 * read the beginning of a line and check it for
897300906Sasomers		 * "tentative".  If found, discard "tentative".
898300906Sasomers		 * Create a new event
899300906Sasomers		 * continue
900300906Sasomers		 */
901300906Sasomers		EventList* destEvents;
902300906Sasomers		const string tentFlag("tentative ");
903300906Sasomers		string line;
904300906Sasomers		std::stringbuf lineBuf;
905300906Sasomers
906300906Sasomers		caseStream.get(lineBuf);
907300906Sasomers		caseStream.ignore();  /*discard the newline character*/
908300906Sasomers		line = lineBuf.str();
909300906Sasomers		if (line.compare(0, tentFlag.size(), tentFlag) == 0) {
910300906Sasomers			/* Discard "tentative" */
911300906Sasomers			line.erase(0, tentFlag.size());
912300906Sasomers			destEvents = &m_tentativeEvents;
913300906Sasomers		} else {
914300906Sasomers			destEvents = &m_events;
915300906Sasomers		}
916300906Sasomers		Event *event(Event::CreateEvent(factory, line));
917300906Sasomers		if (event != NULL) {
918300906Sasomers			destEvents->push_back(event);
919300906Sasomers			RegisterCallout(*event);
920300906Sasomers		}
921300906Sasomers	}
922300906Sasomers}
923300906Sasomers
924300906Sasomersvoid
925300906SasomersCaseFile::Close()
926300906Sasomers{
927300906Sasomers	/*
928300906Sasomers	 * This case is no longer relevant.  Clean up our
929300906Sasomers	 * serialization file, and delete the case.
930300906Sasomers	 */
931300906Sasomers	syslog(LOG_INFO, "CaseFile(%s,%s) closed - State %s\n",
932300906Sasomers	       PoolGUIDString().c_str(), VdevGUIDString().c_str(),
933300906Sasomers	       zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
934300906Sasomers
935300906Sasomers	/*
936300906Sasomers	 * Serialization of a Case with no event data, clears the
937300906Sasomers	 * Serialization data for that event.
938300906Sasomers	 */
939300906Sasomers	PurgeEvents();
940300906Sasomers	Serialize();
941300906Sasomers
942300906Sasomers	delete this;
943300906Sasomers}
944300906Sasomers
945300906Sasomersvoid
946300906SasomersCaseFile::OnGracePeriodEnded()
947300906Sasomers{
948300906Sasomers	bool should_fault, should_degrade;
949300906Sasomers	ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
950300906Sasomers	zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front());
951300906Sasomers
952300906Sasomers	m_events.splice(m_events.begin(), m_tentativeEvents);
953300906Sasomers	should_fault = ShouldFault();
954300906Sasomers	should_degrade = ShouldDegrade();
955300906Sasomers
956300906Sasomers	if (should_fault || should_degrade) {
957300906Sasomers		if (zhp == NULL
958300906Sasomers		 || (VdevIterator(zhp).Find(m_vdevGUID)) == NULL) {
959300906Sasomers			/*
960300906Sasomers			 * Either the pool no longer exists
961300906Sasomers			 * or this vdev is no longer a member of
962300906Sasomers			 * the pool.
963300906Sasomers			 */
964300906Sasomers			Close();
965300906Sasomers			return;
966300906Sasomers		}
967300906Sasomers
968300906Sasomers	}
969300906Sasomers
970300906Sasomers	/* A fault condition has priority over a degrade condition */
971300906Sasomers	if (ShouldFault()) {
972300906Sasomers		/* Fault the vdev and close the case. */
973300906Sasomers		if (zpool_vdev_fault(zhp, (uint64_t)m_vdevGUID,
974300906Sasomers				       VDEV_AUX_ERR_EXCEEDED) == 0) {
975300906Sasomers			syslog(LOG_INFO, "Faulting vdev(%s/%s)",
976300906Sasomers			       PoolGUIDString().c_str(),
977300906Sasomers			       VdevGUIDString().c_str());
978300906Sasomers			Close();
979300906Sasomers			return;
980300906Sasomers		}
981300906Sasomers		else {
982300906Sasomers			syslog(LOG_ERR, "Fault vdev(%s/%s): %s: %s\n",
983300906Sasomers			       PoolGUIDString().c_str(),
984300906Sasomers			       VdevGUIDString().c_str(),
985300906Sasomers			       libzfs_error_action(g_zfsHandle),
986300906Sasomers			       libzfs_error_description(g_zfsHandle));
987300906Sasomers		}
988300906Sasomers	}
989300906Sasomers	else if (ShouldDegrade()) {
990300906Sasomers		/* Degrade the vdev and close the case. */
991300906Sasomers		if (zpool_vdev_degrade(zhp, (uint64_t)m_vdevGUID,
992300906Sasomers				       VDEV_AUX_ERR_EXCEEDED) == 0) {
993300906Sasomers			syslog(LOG_INFO, "Degrading vdev(%s/%s)",
994300906Sasomers			       PoolGUIDString().c_str(),
995300906Sasomers			       VdevGUIDString().c_str());
996300906Sasomers			Close();
997300906Sasomers			return;
998300906Sasomers		}
999300906Sasomers		else {
1000300906Sasomers			syslog(LOG_ERR, "Degrade vdev(%s/%s): %s: %s\n",
1001300906Sasomers			       PoolGUIDString().c_str(),
1002300906Sasomers			       VdevGUIDString().c_str(),
1003300906Sasomers			       libzfs_error_action(g_zfsHandle),
1004300906Sasomers			       libzfs_error_description(g_zfsHandle));
1005300906Sasomers		}
1006300906Sasomers	}
1007300906Sasomers	Serialize();
1008300906Sasomers}
1009300906Sasomers
1010300906SasomersVdev
1011300906SasomersCaseFile::BeingReplacedBy(zpool_handle_t *zhp) {
1012300906Sasomers	Vdev vd(zhp, CaseVdev(zhp));
1013300906Sasomers	std::list<Vdev> children;
1014300906Sasomers	std::list<Vdev>::iterator children_it;
1015300906Sasomers
1016300906Sasomers	Vdev parent(vd.Parent());
1017300906Sasomers	Vdev replacing(NonexistentVdev);
1018300906Sasomers
1019300906Sasomers	/*
1020300906Sasomers	 * To determine whether we are being replaced by another spare that
1021300906Sasomers	 * is still working, then make sure that it is currently spared and
1022300906Sasomers	 * that the spare is either resilvering or healthy.  If any of these
1023300906Sasomers	 * conditions fail, then we are not being replaced by a spare.
1024300906Sasomers	 *
1025300906Sasomers	 * If the spare is healthy, then the case file should be closed very
1026300906Sasomers	 * soon after this check.
1027300906Sasomers	 */
1028300906Sasomers	if (parent.DoesNotExist()
1029300906Sasomers	 || parent.Name(zhp, /*verbose*/false) != "spare")
1030300906Sasomers		return (NonexistentVdev);
1031300906Sasomers
1032300906Sasomers	children = parent.Children();
1033300906Sasomers	children_it = children.begin();
1034300906Sasomers	for (;children_it != children.end(); children_it++) {
1035300906Sasomers		Vdev child = *children_it;
1036300906Sasomers
1037300906Sasomers		/* Skip our vdev. */
1038300906Sasomers		if (child.GUID() == VdevGUID())
1039300906Sasomers			continue;
1040300906Sasomers		/*
1041300906Sasomers		 * Accept the first child that doesn't match our GUID, or
1042300906Sasomers		 * any resilvering/healthy device if one exists.
1043300906Sasomers		 */
1044300906Sasomers		if (replacing.DoesNotExist() || child.IsResilvering()
1045300906Sasomers		 || child.State() == VDEV_STATE_HEALTHY)
1046300906Sasomers			replacing = child;
1047300906Sasomers	}
1048300906Sasomers
1049300906Sasomers	return (replacing);
1050300906Sasomers}
1051300906Sasomers
1052300906Sasomersbool
1053300906SasomersCaseFile::Replace(const char* vdev_type, const char* path, bool isspare) {
1054300906Sasomers	nvlist_t *nvroot, *newvd;
1055300906Sasomers	const char *poolname;
1056300906Sasomers	string oldstr(VdevGUIDString());
1057300906Sasomers	bool retval = true;
1058300906Sasomers
1059300906Sasomers	/* Figure out what pool we're working on */
1060300906Sasomers	ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
1061300906Sasomers	zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front());
1062300906Sasomers	if (zhp == NULL) {
1063300906Sasomers		syslog(LOG_ERR, "CaseFile::Replace: could not find pool for "
1064300919Sbdrewery		       "pool_guid %" PRIu64 ".", (uint64_t)m_poolGUID);
1065300906Sasomers		return (false);
1066300906Sasomers	}
1067300906Sasomers	poolname = zpool_get_name(zhp);
1068300906Sasomers	Vdev vd(zhp, CaseVdev(zhp));
1069300906Sasomers	Vdev replaced(BeingReplacedBy(zhp));
1070300906Sasomers
1071300906Sasomers	if (isspare && !vd.IsSpare() && !replaced.DoesNotExist()) {
1072300906Sasomers		/* If we are already being replaced by a working spare, pass. */
1073300906Sasomers		if (replaced.IsResilvering()
1074300906Sasomers		 || replaced.State() == VDEV_STATE_HEALTHY) {
1075300906Sasomers			syslog(LOG_INFO, "CaseFile::Replace(%s->%s): already "
1076300906Sasomers			    "replaced", VdevGUIDString().c_str(), path);
1077300906Sasomers			return (/*consumed*/false);
1078300906Sasomers		}
1079300906Sasomers		/*
1080300906Sasomers		 * If we have already been replaced by a spare, but that spare
1081300906Sasomers		 * is broken, we must spare the spare, not the original device.
1082300906Sasomers		 */
1083300906Sasomers		oldstr = replaced.GUIDString();
1084300906Sasomers		syslog(LOG_INFO, "CaseFile::Replace(%s->%s): sparing "
1085300906Sasomers		    "broken spare %s instead", VdevGUIDString().c_str(),
1086300906Sasomers		    path, oldstr.c_str());
1087300906Sasomers	}
1088300906Sasomers
1089300906Sasomers	/*
1090300906Sasomers	 * Build a root vdev/leaf vdev configuration suitable for
1091300906Sasomers	 * zpool_vdev_attach. Only enough data for the kernel to find
1092300906Sasomers	 * the device (i.e. type and disk device node path) are needed.
1093300906Sasomers	 */
1094300906Sasomers	nvroot = NULL;
1095300906Sasomers	newvd = NULL;
1096300906Sasomers
1097300906Sasomers	if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0
1098300906Sasomers	 || nvlist_alloc(&newvd, NV_UNIQUE_NAME, 0) != 0) {
1099300906Sasomers		syslog(LOG_ERR, "Replace vdev(%s/%s): Unable to allocate "
1100300906Sasomers		    "configuration data.", poolname, oldstr.c_str());
1101300906Sasomers		if (nvroot != NULL)
1102300906Sasomers			nvlist_free(nvroot);
1103300906Sasomers		return (false);
1104300906Sasomers	}
1105300906Sasomers	if (nvlist_add_string(newvd, ZPOOL_CONFIG_TYPE, vdev_type) != 0
1106300906Sasomers	 || nvlist_add_string(newvd, ZPOOL_CONFIG_PATH, path) != 0
1107300906Sasomers	 || nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) != 0
1108300906Sasomers	 || nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
1109300906Sasomers				    &newvd, 1) != 0) {
1110300906Sasomers		syslog(LOG_ERR, "Replace vdev(%s/%s): Unable to initialize "
1111300906Sasomers		    "configuration data.", poolname, oldstr.c_str());
1112300906Sasomers		nvlist_free(newvd);
1113300906Sasomers		nvlist_free(nvroot);
1114300906Sasomers		return (true);
1115300906Sasomers	}
1116300906Sasomers
1117300906Sasomers	/* Data was copied when added to the root vdev. */
1118300906Sasomers	nvlist_free(newvd);
1119300906Sasomers
1120300906Sasomers	retval = (zpool_vdev_attach(zhp, oldstr.c_str(), path, nvroot,
1121300906Sasomers	    /*replace*/B_TRUE) == 0);
1122300906Sasomers	if (retval)
1123300906Sasomers		syslog(LOG_INFO, "Replacing vdev(%s/%s) with %s\n",
1124300906Sasomers		    poolname, oldstr.c_str(), path);
1125300906Sasomers	else
1126300906Sasomers		syslog(LOG_ERR, "Replace vdev(%s/%s): %s: %s\n",
1127300906Sasomers		    poolname, oldstr.c_str(), libzfs_error_action(g_zfsHandle),
1128300906Sasomers		    libzfs_error_description(g_zfsHandle));
1129300906Sasomers	nvlist_free(nvroot);
1130300906Sasomers
1131300906Sasomers	return (retval);
1132300906Sasomers}
1133300906Sasomers
1134300906Sasomers/* Does the argument event refer to a checksum error? */
1135300906Sasomersstatic bool
1136300906SasomersIsChecksumEvent(const Event* const event)
1137300906Sasomers{
1138300906Sasomers	return ("ereport.fs.zfs.checksum" == event->Value("type"));
1139300906Sasomers}
1140300906Sasomers
1141300906Sasomers/* Does the argument event refer to an IO error? */
1142300906Sasomersstatic bool
1143300906SasomersIsIOEvent(const Event* const event)
1144300906Sasomers{
1145300906Sasomers	return ("ereport.fs.zfs.io" == event->Value("type"));
1146300906Sasomers}
1147300906Sasomers
1148300906Sasomersbool
1149300906SasomersCaseFile::ShouldDegrade() const
1150300906Sasomers{
1151300906Sasomers	return (std::count_if(m_events.begin(), m_events.end(),
1152300906Sasomers			      IsChecksumEvent) > ZFS_DEGRADE_IO_COUNT);
1153300906Sasomers}
1154300906Sasomers
1155300906Sasomersbool
1156300906SasomersCaseFile::ShouldFault() const
1157300906Sasomers{
1158300906Sasomers	return (std::count_if(m_events.begin(), m_events.end(),
1159300906Sasomers			      IsIOEvent) > ZFS_DEGRADE_IO_COUNT);
1160300906Sasomers}
1161300906Sasomers
1162300906Sasomersnvlist_t *
1163300906SasomersCaseFile::CaseVdev(zpool_handle_t *zhp) const
1164300906Sasomers{
1165300906Sasomers	return (VdevIterator(zhp).Find(VdevGUID()));
1166300906Sasomers}
1167