case_file.cc revision 300919
1/*-
2 * Copyright (c) 2011, 2012, 2013, 2014, 2016 Spectra Logic Corporation
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions, and the following disclaimer,
10 *    without modification.
11 * 2. Redistributions in binary form must reproduce at minimum a disclaimer
12 *    substantially similar to the "NO WARRANTY" disclaimer below
13 *    ("Disclaimer") and any redistribution must be conditioned upon
14 *    including a substantially similar Disclaimer requirement for further
15 *    binary redistribution.
16 *
17 * NO WARRANTY
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
26 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
27 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGES.
29 *
30 * Authors: Justin T. Gibbs     (Spectra Logic Corporation)
31 */
32
33/**
34 * \file case_file.cc
35 *
36 * We keep case files for any leaf vdev that is not in the optimal state.
37 * However, we only serialize to disk those events that need to be preserved
38 * across reboots.  For now, this is just a log of soft errors which we
39 * accumulate in order to mark a device as degraded.
40 */
41#include <sys/cdefs.h>
42#include <sys/time.h>
43
44#include <sys/fs/zfs.h>
45
46#include <dirent.h>
47#include <iomanip>
48#include <fstream>
49#include <functional>
50#include <sstream>
51#include <syslog.h>
52#include <unistd.h>
53
54#include <libzfs.h>
55
56#include <list>
57#include <map>
58#include <string>
59
60#include <devdctl/guid.h>
61#include <devdctl/event.h>
62#include <devdctl/event_factory.h>
63#include <devdctl/exception.h>
64#include <devdctl/consumer.h>
65
66#include "callout.h"
67#include "vdev_iterator.h"
68#include "zfsd_event.h"
69#include "case_file.h"
70#include "vdev.h"
71#include "zfsd.h"
72#include "zfsd_exception.h"
73#include "zpool_list.h"
74
75__FBSDID("$FreeBSD: head/cddl/usr.sbin/zfsd/case_file.cc 300919 2016-05-29 00:40:29Z bdrewery $");
76
77/*============================ Namespace Control =============================*/
78using std::auto_ptr;
79using std::hex;
80using std::ifstream;
81using std::stringstream;
82using std::setfill;
83using std::setw;
84
85using DevdCtl::Event;
86using DevdCtl::EventBuffer;
87using DevdCtl::EventFactory;
88using DevdCtl::EventList;
89using DevdCtl::Guid;
90using DevdCtl::ParseException;
91
92/*--------------------------------- CaseFile ---------------------------------*/
93//- CaseFile Static Data -------------------------------------------------------
94
95CaseFileList  CaseFile::s_activeCases;
96const string  CaseFile::s_caseFilePath = "/var/db/zfsd/cases";
97const timeval CaseFile::s_removeGracePeriod = { 60 /*sec*/, 0 /*usec*/};
98
99//- CaseFile Static Public Methods ---------------------------------------------
100CaseFile *
101CaseFile::Find(Guid poolGUID, Guid vdevGUID)
102{
103	for (CaseFileList::iterator curCase = s_activeCases.begin();
104	     curCase != s_activeCases.end(); curCase++) {
105
106		if ((*curCase)->PoolGUID() != poolGUID
107		 || (*curCase)->VdevGUID() != vdevGUID)
108			continue;
109
110		/*
111		 * We only carry one active case per-vdev.
112		 */
113		return (*curCase);
114	}
115	return (NULL);
116}
117
118CaseFile *
119CaseFile::Find(const string &physPath)
120{
121	CaseFile *result = NULL;
122
123	for (CaseFileList::iterator curCase = s_activeCases.begin();
124	     curCase != s_activeCases.end(); curCase++) {
125
126		if ((*curCase)->PhysicalPath() != physPath)
127			continue;
128
129		if (result != NULL) {
130			syslog(LOG_WARNING, "Multiple casefiles found for "
131			    "physical path %s.  "
132			    "This is most likely a bug in zfsd",
133			    physPath.c_str());
134		}
135		result = *curCase;
136	}
137	return (result);
138}
139
140
141void
142CaseFile::ReEvaluateByGuid(Guid poolGUID, const ZfsEvent &event)
143{
144	CaseFileList::iterator casefile;
145	for (casefile = s_activeCases.begin(); casefile != s_activeCases.end();){
146		CaseFileList::iterator next = casefile;
147		next++;
148		if (poolGUID == (*casefile)->PoolGUID())
149			(*casefile)->ReEvaluate(event);
150		casefile = next;
151	}
152}
153
154CaseFile &
155CaseFile::Create(Vdev &vdev)
156{
157	CaseFile *activeCase;
158
159	activeCase = Find(vdev.PoolGUID(), vdev.GUID());
160	if (activeCase == NULL)
161		activeCase = new CaseFile(vdev);
162
163	return (*activeCase);
164}
165
166void
167CaseFile::DeSerialize()
168{
169	struct dirent **caseFiles;
170
171	int numCaseFiles(scandir(s_caseFilePath.c_str(), &caseFiles,
172			 DeSerializeSelector, /*compar*/NULL));
173
174	if (numCaseFiles == -1)
175		return;
176	if (numCaseFiles == 0) {
177		free(caseFiles);
178		return;
179	}
180
181	for (int i = 0; i < numCaseFiles; i++) {
182
183		DeSerializeFile(caseFiles[i]->d_name);
184		free(caseFiles[i]);
185	}
186	free(caseFiles);
187}
188
189void
190CaseFile::LogAll()
191{
192	for (CaseFileList::iterator curCase = s_activeCases.begin();
193	     curCase != s_activeCases.end(); curCase++)
194		(*curCase)->Log();
195}
196
197void
198CaseFile::PurgeAll()
199{
200	/*
201	 * Serialize casefiles before deleting them so that they can be reread
202	 * and revalidated during BuildCaseFiles.
203	 * CaseFiles remove themselves from this list on destruction.
204	 */
205	while (s_activeCases.size() != 0) {
206		CaseFile *casefile = s_activeCases.front();
207		casefile->Serialize();
208		delete casefile;
209	}
210
211}
212
213//- CaseFile Public Methods ----------------------------------------------------
214bool
215CaseFile::RefreshVdevState()
216{
217	ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
218	zpool_handle_t *casePool(zpl.empty() ? NULL : zpl.front());
219	if (casePool == NULL)
220		return (false);
221
222	Vdev vd(casePool, CaseVdev(casePool));
223	if (vd.DoesNotExist())
224		return (false);
225
226	m_vdevState    = vd.State();
227	m_vdevPhysPath = vd.PhysicalPath();
228	return (true);
229}
230
231bool
232CaseFile::ReEvaluate(const string &devPath, const string &physPath, Vdev *vdev)
233{
234	ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
235	zpool_handle_t *pool(zpl.empty() ? NULL : zpl.front());
236
237	if (pool == NULL || !RefreshVdevState()) {
238		/*
239		 * The pool or vdev for this case file is no longer
240		 * part of the configuration.  This can happen
241		 * if we process a device arrival notification
242		 * before seeing the ZFS configuration change
243		 * event.
244		 */
245		syslog(LOG_INFO,
246		       "CaseFile::ReEvaluate(%s,%s) Pool/Vdev unconfigured.  "
247		       "Closing\n",
248		       PoolGUIDString().c_str(),
249		       VdevGUIDString().c_str());
250		Close();
251
252		/*
253		 * Since this event was not used to close this
254		 * case, do not report it as consumed.
255		 */
256		return (/*consumed*/false);
257	}
258
259	if (VdevState() > VDEV_STATE_CANT_OPEN) {
260		/*
261		 * For now, newly discovered devices only help for
262		 * devices that are missing.  In the future, we might
263		 * use a newly inserted spare to replace a degraded
264		 * or faulted device.
265		 */
266		syslog(LOG_INFO, "CaseFile::ReEvaluate(%s,%s): Pool/Vdev ignored",
267		    PoolGUIDString().c_str(), VdevGUIDString().c_str());
268		return (/*consumed*/false);
269	}
270
271	if (vdev != NULL
272	 && vdev->PoolGUID() == m_poolGUID
273	 && vdev->GUID() == m_vdevGUID) {
274
275		zpool_vdev_online(pool, vdev->GUIDString().c_str(),
276				  ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE,
277				  &m_vdevState);
278		syslog(LOG_INFO, "Onlined vdev(%s/%s:%s).  State now %s.\n",
279		       zpool_get_name(pool), vdev->GUIDString().c_str(),
280		       devPath.c_str(),
281		       zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
282
283		/*
284		 * Check the vdev state post the online action to see
285		 * if we can retire this case.
286		 */
287		CloseIfSolved();
288
289		return (/*consumed*/true);
290	}
291
292	/*
293	 * If the auto-replace policy is enabled, and we have physical
294	 * path information, try a physical path replacement.
295	 */
296	if (zpool_get_prop_int(pool, ZPOOL_PROP_AUTOREPLACE, NULL) == 0) {
297		syslog(LOG_INFO,
298		       "CaseFile(%s:%s:%s): AutoReplace not set.  "
299		       "Ignoring device insertion.\n",
300		       PoolGUIDString().c_str(),
301		       VdevGUIDString().c_str(),
302		       zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
303		return (/*consumed*/false);
304	}
305
306	if (PhysicalPath().empty()) {
307		syslog(LOG_INFO,
308		       "CaseFile(%s:%s:%s): No physical path information.  "
309		       "Ignoring device insertion.\n",
310		       PoolGUIDString().c_str(),
311		       VdevGUIDString().c_str(),
312		       zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
313		return (/*consumed*/false);
314	}
315
316	if (physPath != PhysicalPath()) {
317		syslog(LOG_INFO,
318		       "CaseFile(%s:%s:%s): Physical path mismatch.  "
319		       "Ignoring device insertion.\n",
320		       PoolGUIDString().c_str(),
321		       VdevGUIDString().c_str(),
322		       zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
323		return (/*consumed*/false);
324	}
325
326	/* Write a label on the newly inserted disk. */
327	if (zpool_label_disk(g_zfsHandle, pool, devPath.c_str()) != 0) {
328		syslog(LOG_ERR,
329		       "Replace vdev(%s/%s) by physical path (label): %s: %s\n",
330		       zpool_get_name(pool), VdevGUIDString().c_str(),
331		       libzfs_error_action(g_zfsHandle),
332		       libzfs_error_description(g_zfsHandle));
333		return (/*consumed*/false);
334	}
335
336	syslog(LOG_INFO, "CaseFile::ReEvaluate(%s/%s): Replacing with %s",
337	    PoolGUIDString().c_str(), VdevGUIDString().c_str(),
338	    devPath.c_str());
339	return (Replace(VDEV_TYPE_DISK, devPath.c_str(), /*isspare*/false));
340}
341
342bool
343CaseFile::ReEvaluate(const ZfsEvent &event)
344{
345	bool consumed(false);
346
347	if (event.Value("type") == "misc.fs.zfs.vdev_remove") {
348		/*
349		 * The Vdev we represent has been removed from the
350		 * configuration.  This case is no longer of value.
351		 */
352		Close();
353
354		return (/*consumed*/true);
355	} else if (event.Value("type") == "misc.fs.zfs.pool_destroy") {
356		/* This Pool has been destroyed.  Discard the case */
357		Close();
358
359		return (/*consumed*/true);
360	} else if (event.Value("type") == "misc.fs.zfs.config_sync") {
361		RefreshVdevState();
362		if (VdevState() < VDEV_STATE_HEALTHY)
363			consumed = ActivateSpare();
364	}
365
366
367	if (event.Value("class") == "resource.fs.zfs.removed") {
368		bool spare_activated;
369
370		if (!RefreshVdevState()) {
371			/*
372			 * The pool or vdev for this case file is no longer
373			 * part of the configuration.  This can happen
374			 * if we process a device arrival notification
375			 * before seeing the ZFS configuration change
376			 * event.
377			 */
378			syslog(LOG_INFO,
379			       "CaseFile::ReEvaluate(%s,%s) Pool/Vdev "
380			       "unconfigured.  Closing\n",
381			       PoolGUIDString().c_str(),
382			       VdevGUIDString().c_str());
383			/*
384			 * Close the case now so we won't waste cycles in the
385			 * system rescan
386			 */
387			Close();
388
389			/*
390			 * Since this event was not used to close this
391			 * case, do not report it as consumed.
392			 */
393			return (/*consumed*/false);
394		}
395
396		/*
397		 * Discard any tentative I/O error events for
398		 * this case.  They were most likely caused by the
399		 * hot-unplug of this device.
400		 */
401		PurgeTentativeEvents();
402
403		/* Try to activate spares if they are available */
404		spare_activated = ActivateSpare();
405
406		/*
407		 * Rescan the drives in the system to see if a recent
408		 * drive arrival can be used to solve this case.
409		 */
410		ZfsDaemon::RequestSystemRescan();
411
412		/*
413		 * Consume the event if we successfully activated a spare.
414		 * Otherwise, leave it in the unconsumed events list so that the
415		 * future addition of a spare to this pool might be able to
416		 * close the case
417		 */
418		consumed = spare_activated;
419	} else if (event.Value("class") == "resource.fs.zfs.statechange") {
420		RefreshVdevState();
421		/*
422		 * If this vdev is DEGRADED, FAULTED, or UNAVAIL, try to
423		 * activate a hotspare.  Otherwise, ignore the event
424		 */
425		if (VdevState() == VDEV_STATE_FAULTED ||
426		    VdevState() == VDEV_STATE_DEGRADED ||
427		    VdevState() == VDEV_STATE_CANT_OPEN)
428			(void) ActivateSpare();
429		consumed = true;
430	}
431	else if (event.Value("class") == "ereport.fs.zfs.io" ||
432	         event.Value("class") == "ereport.fs.zfs.checksum") {
433
434		m_tentativeEvents.push_front(event.DeepCopy());
435		RegisterCallout(event);
436		consumed = true;
437	}
438
439	bool closed(CloseIfSolved());
440
441	return (consumed || closed);
442}
443
444
445bool
446CaseFile::ActivateSpare() {
447	nvlist_t	*config, *nvroot;
448	nvlist_t       **spares;
449	char		*devPath, *vdev_type;
450	const char	*poolname;
451	u_int		 nspares, i;
452	int		 error;
453
454	ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
455	zpool_handle_t	*zhp(zpl.empty() ? NULL : zpl.front());
456	if (zhp == NULL) {
457		syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool "
458		       "for pool_guid %" PRIu64".", (uint64_t)m_poolGUID);
459		return (false);
460	}
461	poolname = zpool_get_name(zhp);
462	config = zpool_get_config(zhp, NULL);
463	if (config == NULL) {
464		syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool "
465		       "config for pool %s", poolname);
466		return (false);
467	}
468	error = nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot);
469	if (error != 0){
470		syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find vdev "
471		       "tree for pool %s", poolname);
472		return (false);
473	}
474	nspares = 0;
475	nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares,
476				   &nspares);
477	if (nspares == 0) {
478		/* The pool has no spares configured */
479		syslog(LOG_INFO, "CaseFile::ActivateSpare: "
480		       "No spares available for pool %s", poolname);
481		return (false);
482	}
483	for (i = 0; i < nspares; i++) {
484		uint64_t    *nvlist_array;
485		vdev_stat_t *vs;
486		uint_t	     nstats;
487
488		if (nvlist_lookup_uint64_array(spares[i],
489		    ZPOOL_CONFIG_VDEV_STATS, &nvlist_array, &nstats) != 0) {
490			syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not "
491			       "find vdev stats for pool %s, spare %d",
492			       poolname, i);
493			return (false);
494		}
495		vs = reinterpret_cast<vdev_stat_t *>(nvlist_array);
496
497		if ((vs->vs_aux != VDEV_AUX_SPARED)
498		 && (vs->vs_state == VDEV_STATE_HEALTHY)) {
499			/* We found a usable spare */
500			break;
501		}
502	}
503
504	if (i == nspares) {
505		/* No available spares were found */
506		return (false);
507	}
508
509	error = nvlist_lookup_string(spares[i], ZPOOL_CONFIG_PATH, &devPath);
510	if (error != 0) {
511		syslog(LOG_ERR, "CaseFile::ActivateSpare: Cannot determine "
512		       "the path of pool %s, spare %d. Error %d",
513		       poolname, i, error);
514		return (false);
515	}
516
517	error = nvlist_lookup_string(spares[i], ZPOOL_CONFIG_TYPE, &vdev_type);
518	if (error != 0) {
519		syslog(LOG_ERR, "CaseFile::ActivateSpare: Cannot determine "
520		       "the vdev type of pool %s, spare %d. Error %d",
521		       poolname, i, error);
522		return (false);
523	}
524
525	return (Replace(vdev_type, devPath, /*isspare*/true));
526}
527
528void
529CaseFile::RegisterCallout(const Event &event)
530{
531	timeval now, countdown, elapsed, timestamp, zero, remaining;
532
533	gettimeofday(&now, 0);
534	timestamp = event.GetTimestamp();
535	timersub(&now, &timestamp, &elapsed);
536	timersub(&s_removeGracePeriod, &elapsed, &countdown);
537	/*
538	 * If countdown is <= zero, Reset the timer to the
539	 * smallest positive time value instead
540	 */
541	timerclear(&zero);
542	if (timercmp(&countdown, &zero, <=)) {
543		timerclear(&countdown);
544		countdown.tv_usec = 1;
545	}
546
547	remaining = m_tentativeTimer.TimeRemaining();
548
549	if (!m_tentativeTimer.IsPending()
550	 || timercmp(&countdown, &remaining, <))
551		m_tentativeTimer.Reset(countdown, OnGracePeriodEnded, this);
552}
553
554
555bool
556CaseFile::CloseIfSolved()
557{
558	if (m_events.empty()
559	 && m_tentativeEvents.empty()) {
560
561		/*
562		 * We currently do not track or take actions on
563		 * devices in the degraded or faulted state.
564		 * Once we have support for spare pools, we'll
565		 * retain these cases so that any spares added in
566		 * the future can be applied to them.
567		 */
568		switch (VdevState()) {
569		case VDEV_STATE_HEALTHY:
570			/* No need to keep cases for healthy vdevs */
571			Close();
572			return (true);
573		case VDEV_STATE_REMOVED:
574		case VDEV_STATE_CANT_OPEN:
575			/*
576			 * Keep open.  We may solve it with a newly inserted
577			 * device.
578			 */
579		case VDEV_STATE_FAULTED:
580		case VDEV_STATE_DEGRADED:
581			/*
582			 * Keep open.  We may solve it with the future
583			 * addition of a spare to the pool
584			 */
585		case VDEV_STATE_UNKNOWN:
586		case VDEV_STATE_CLOSED:
587		case VDEV_STATE_OFFLINE:
588			/*
589			 * Keep open?  This may not be the correct behavior,
590			 * but it's what we've always done
591			 */
592			;
593		}
594
595		/*
596		 * Re-serialize the case in order to remove any
597		 * previous event data.
598		 */
599		Serialize();
600	}
601
602	return (false);
603}
604
605void
606CaseFile::Log()
607{
608	syslog(LOG_INFO, "CaseFile(%s,%s,%s)\n", PoolGUIDString().c_str(),
609	       VdevGUIDString().c_str(), PhysicalPath().c_str());
610	syslog(LOG_INFO, "\tVdev State = %s\n",
611	       zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
612	if (m_tentativeEvents.size() != 0) {
613		syslog(LOG_INFO, "\t=== Tentative Events ===\n");
614		for (EventList::iterator event(m_tentativeEvents.begin());
615		     event != m_tentativeEvents.end(); event++)
616			(*event)->Log(LOG_INFO);
617	}
618	if (m_events.size() != 0) {
619		syslog(LOG_INFO, "\t=== Events ===\n");
620		for (EventList::iterator event(m_events.begin());
621		     event != m_events.end(); event++)
622			(*event)->Log(LOG_INFO);
623	}
624}
625
626//- CaseFile Static Protected Methods ------------------------------------------
627void
628CaseFile::OnGracePeriodEnded(void *arg)
629{
630	CaseFile &casefile(*static_cast<CaseFile *>(arg));
631
632	casefile.OnGracePeriodEnded();
633}
634
635int
636CaseFile::DeSerializeSelector(const struct dirent *dirEntry)
637{
638	uint64_t poolGUID;
639	uint64_t vdevGUID;
640
641	if (dirEntry->d_type == DT_REG
642	 && sscanf(dirEntry->d_name, "pool_%" PRIu64 "_vdev_%" PRIu64 ".case",
643		   &poolGUID, &vdevGUID) == 2)
644		return (1);
645	return (0);
646}
647
648void
649CaseFile::DeSerializeFile(const char *fileName)
650{
651	string	  fullName(s_caseFilePath + '/' + fileName);
652	CaseFile *existingCaseFile(NULL);
653	CaseFile *caseFile(NULL);
654
655	try {
656		uint64_t poolGUID;
657		uint64_t vdevGUID;
658		nvlist_t *vdevConf;
659
660		sscanf(fileName, "pool_%" PRIu64 "_vdev_%" PRIu64 ".case",
661		       &poolGUID, &vdevGUID);
662		existingCaseFile = Find(Guid(poolGUID), Guid(vdevGUID));
663		if (existingCaseFile != NULL) {
664			/*
665			 * If the vdev is already degraded or faulted,
666			 * there's no point in keeping the state around
667			 * that we use to put a drive into the degraded
668			 * state.  However, if the vdev is simply missing,
669			 * preserve the case data in the hopes that it will
670			 * return.
671			 */
672			caseFile = existingCaseFile;
673			vdev_state curState(caseFile->VdevState());
674			if (curState > VDEV_STATE_CANT_OPEN
675			 && curState < VDEV_STATE_HEALTHY) {
676				unlink(fileName);
677				return;
678			}
679		} else {
680			ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID);
681			if (zpl.empty()
682			 || (vdevConf = VdevIterator(zpl.front())
683						    .Find(vdevGUID)) == NULL) {
684				/*
685				 * Either the pool no longer exists
686				 * or this vdev is no longer a member of
687				 * the pool.
688				 */
689				unlink(fullName.c_str());
690				return;
691			}
692
693			/*
694			 * Any vdev we find that does not have a case file
695			 * must be in the healthy state and thus worthy of
696			 * continued SERD data tracking.
697			 */
698			caseFile = new CaseFile(Vdev(zpl.front(), vdevConf));
699		}
700
701		ifstream caseStream(fullName.c_str());
702		if (!caseStream)
703			throw ZfsdException("CaseFile::DeSerialize: Unable to "
704					    "read %s.\n", fileName);
705
706		caseFile->DeSerialize(caseStream);
707	} catch (const ParseException &exp) {
708
709		exp.Log();
710		if (caseFile != existingCaseFile)
711			delete caseFile;
712
713		/*
714		 * Since we can't parse the file, unlink it so we don't
715		 * trip over it again.
716		 */
717		unlink(fileName);
718	} catch (const ZfsdException &zfsException) {
719
720		zfsException.Log();
721		if (caseFile != existingCaseFile)
722			delete caseFile;
723	}
724}
725
726//- CaseFile Protected Methods -------------------------------------------------
727CaseFile::CaseFile(const Vdev &vdev)
728 : m_poolGUID(vdev.PoolGUID()),
729   m_vdevGUID(vdev.GUID()),
730   m_vdevState(vdev.State()),
731   m_vdevPhysPath(vdev.PhysicalPath())
732{
733	stringstream guidString;
734
735	guidString << m_vdevGUID;
736	m_vdevGUIDString = guidString.str();
737	guidString.str("");
738	guidString << m_poolGUID;
739	m_poolGUIDString = guidString.str();
740
741	s_activeCases.push_back(this);
742
743	syslog(LOG_INFO, "Creating new CaseFile:\n");
744	Log();
745}
746
747CaseFile::~CaseFile()
748{
749	PurgeEvents();
750	PurgeTentativeEvents();
751	m_tentativeTimer.Stop();
752	s_activeCases.remove(this);
753}
754
755void
756CaseFile::PurgeEvents()
757{
758	for (EventList::iterator event(m_events.begin());
759	     event != m_events.end(); event++)
760		delete *event;
761
762	m_events.clear();
763}
764
765void
766CaseFile::PurgeTentativeEvents()
767{
768	for (EventList::iterator event(m_tentativeEvents.begin());
769	     event != m_tentativeEvents.end(); event++)
770		delete *event;
771
772	m_tentativeEvents.clear();
773}
774
775void
776CaseFile::SerializeEvList(const EventList events, int fd,
777		const char* prefix) const
778{
779	if (events.empty())
780		return;
781	for (EventList::const_iterator curEvent = events.begin();
782	     curEvent != events.end(); curEvent++) {
783		const string &eventString((*curEvent)->GetEventString());
784
785		// TODO: replace many write(2) calls with a single writev(2)
786		if (prefix)
787			write(fd, prefix, strlen(prefix));
788		write(fd, eventString.c_str(), eventString.length());
789	}
790}
791
792void
793CaseFile::Serialize()
794{
795	stringstream saveFile;
796
797	saveFile << setfill('0')
798		 << s_caseFilePath << "/"
799		 << "pool_" << PoolGUIDString()
800		 << "_vdev_" << VdevGUIDString()
801		 << ".case";
802
803	if (m_events.empty() && m_tentativeEvents.empty()) {
804		unlink(saveFile.str().c_str());
805		return;
806	}
807
808	int fd(open(saveFile.str().c_str(), O_CREAT|O_TRUNC|O_WRONLY, 0644));
809	if (fd == -1) {
810		syslog(LOG_ERR, "CaseFile::Serialize: Unable to open %s.\n",
811		       saveFile.str().c_str());
812		return;
813	}
814	SerializeEvList(m_events, fd);
815	SerializeEvList(m_tentativeEvents, fd, "tentative ");
816	close(fd);
817}
818
819/*
820 * XXX: This method assumes that events may not contain embedded newlines.  If
821 * ever events can contain embedded newlines, then CaseFile must switch
822 * serialization formats
823 */
824void
825CaseFile::DeSerialize(ifstream &caseStream)
826{
827	string	      evString;
828	const EventFactory &factory(ZfsDaemon::Get().GetFactory());
829
830	caseStream >> std::noskipws >> std::ws;
831	while (caseStream.good()) {
832		/*
833		 * Outline:
834		 * read the beginning of a line and check it for
835		 * "tentative".  If found, discard "tentative".
836		 * Create a new event
837		 * continue
838		 */
839		EventList* destEvents;
840		const string tentFlag("tentative ");
841		string line;
842		std::stringbuf lineBuf;
843
844		caseStream.get(lineBuf);
845		caseStream.ignore();  /*discard the newline character*/
846		line = lineBuf.str();
847		if (line.compare(0, tentFlag.size(), tentFlag) == 0) {
848			/* Discard "tentative" */
849			line.erase(0, tentFlag.size());
850			destEvents = &m_tentativeEvents;
851		} else {
852			destEvents = &m_events;
853		}
854		Event *event(Event::CreateEvent(factory, line));
855		if (event != NULL) {
856			destEvents->push_back(event);
857			RegisterCallout(*event);
858		}
859	}
860}
861
862void
863CaseFile::Close()
864{
865	/*
866	 * This case is no longer relevant.  Clean up our
867	 * serialization file, and delete the case.
868	 */
869	syslog(LOG_INFO, "CaseFile(%s,%s) closed - State %s\n",
870	       PoolGUIDString().c_str(), VdevGUIDString().c_str(),
871	       zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
872
873	/*
874	 * Serialization of a Case with no event data, clears the
875	 * Serialization data for that event.
876	 */
877	PurgeEvents();
878	Serialize();
879
880	delete this;
881}
882
883void
884CaseFile::OnGracePeriodEnded()
885{
886	bool should_fault, should_degrade;
887	ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
888	zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front());
889
890	m_events.splice(m_events.begin(), m_tentativeEvents);
891	should_fault = ShouldFault();
892	should_degrade = ShouldDegrade();
893
894	if (should_fault || should_degrade) {
895		if (zhp == NULL
896		 || (VdevIterator(zhp).Find(m_vdevGUID)) == NULL) {
897			/*
898			 * Either the pool no longer exists
899			 * or this vdev is no longer a member of
900			 * the pool.
901			 */
902			Close();
903			return;
904		}
905
906	}
907
908	/* A fault condition has priority over a degrade condition */
909	if (ShouldFault()) {
910		/* Fault the vdev and close the case. */
911		if (zpool_vdev_fault(zhp, (uint64_t)m_vdevGUID,
912				       VDEV_AUX_ERR_EXCEEDED) == 0) {
913			syslog(LOG_INFO, "Faulting vdev(%s/%s)",
914			       PoolGUIDString().c_str(),
915			       VdevGUIDString().c_str());
916			Close();
917			return;
918		}
919		else {
920			syslog(LOG_ERR, "Fault vdev(%s/%s): %s: %s\n",
921			       PoolGUIDString().c_str(),
922			       VdevGUIDString().c_str(),
923			       libzfs_error_action(g_zfsHandle),
924			       libzfs_error_description(g_zfsHandle));
925		}
926	}
927	else if (ShouldDegrade()) {
928		/* Degrade the vdev and close the case. */
929		if (zpool_vdev_degrade(zhp, (uint64_t)m_vdevGUID,
930				       VDEV_AUX_ERR_EXCEEDED) == 0) {
931			syslog(LOG_INFO, "Degrading vdev(%s/%s)",
932			       PoolGUIDString().c_str(),
933			       VdevGUIDString().c_str());
934			Close();
935			return;
936		}
937		else {
938			syslog(LOG_ERR, "Degrade vdev(%s/%s): %s: %s\n",
939			       PoolGUIDString().c_str(),
940			       VdevGUIDString().c_str(),
941			       libzfs_error_action(g_zfsHandle),
942			       libzfs_error_description(g_zfsHandle));
943		}
944	}
945	Serialize();
946}
947
948Vdev
949CaseFile::BeingReplacedBy(zpool_handle_t *zhp) {
950	Vdev vd(zhp, CaseVdev(zhp));
951	std::list<Vdev> children;
952	std::list<Vdev>::iterator children_it;
953
954	Vdev parent(vd.Parent());
955	Vdev replacing(NonexistentVdev);
956
957	/*
958	 * To determine whether we are being replaced by another spare that
959	 * is still working, then make sure that it is currently spared and
960	 * that the spare is either resilvering or healthy.  If any of these
961	 * conditions fail, then we are not being replaced by a spare.
962	 *
963	 * If the spare is healthy, then the case file should be closed very
964	 * soon after this check.
965	 */
966	if (parent.DoesNotExist()
967	 || parent.Name(zhp, /*verbose*/false) != "spare")
968		return (NonexistentVdev);
969
970	children = parent.Children();
971	children_it = children.begin();
972	for (;children_it != children.end(); children_it++) {
973		Vdev child = *children_it;
974
975		/* Skip our vdev. */
976		if (child.GUID() == VdevGUID())
977			continue;
978		/*
979		 * Accept the first child that doesn't match our GUID, or
980		 * any resilvering/healthy device if one exists.
981		 */
982		if (replacing.DoesNotExist() || child.IsResilvering()
983		 || child.State() == VDEV_STATE_HEALTHY)
984			replacing = child;
985	}
986
987	return (replacing);
988}
989
990bool
991CaseFile::Replace(const char* vdev_type, const char* path, bool isspare) {
992	nvlist_t *nvroot, *newvd;
993	const char *poolname;
994	string oldstr(VdevGUIDString());
995	bool retval = true;
996
997	/* Figure out what pool we're working on */
998	ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
999	zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front());
1000	if (zhp == NULL) {
1001		syslog(LOG_ERR, "CaseFile::Replace: could not find pool for "
1002		       "pool_guid %" PRIu64 ".", (uint64_t)m_poolGUID);
1003		return (false);
1004	}
1005	poolname = zpool_get_name(zhp);
1006	Vdev vd(zhp, CaseVdev(zhp));
1007	Vdev replaced(BeingReplacedBy(zhp));
1008
1009	if (isspare && !vd.IsSpare() && !replaced.DoesNotExist()) {
1010		/* If we are already being replaced by a working spare, pass. */
1011		if (replaced.IsResilvering()
1012		 || replaced.State() == VDEV_STATE_HEALTHY) {
1013			syslog(LOG_INFO, "CaseFile::Replace(%s->%s): already "
1014			    "replaced", VdevGUIDString().c_str(), path);
1015			return (/*consumed*/false);
1016		}
1017		/*
1018		 * If we have already been replaced by a spare, but that spare
1019		 * is broken, we must spare the spare, not the original device.
1020		 */
1021		oldstr = replaced.GUIDString();
1022		syslog(LOG_INFO, "CaseFile::Replace(%s->%s): sparing "
1023		    "broken spare %s instead", VdevGUIDString().c_str(),
1024		    path, oldstr.c_str());
1025	}
1026
1027	/*
1028	 * Build a root vdev/leaf vdev configuration suitable for
1029	 * zpool_vdev_attach. Only enough data for the kernel to find
1030	 * the device (i.e. type and disk device node path) are needed.
1031	 */
1032	nvroot = NULL;
1033	newvd = NULL;
1034
1035	if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0
1036	 || nvlist_alloc(&newvd, NV_UNIQUE_NAME, 0) != 0) {
1037		syslog(LOG_ERR, "Replace vdev(%s/%s): Unable to allocate "
1038		    "configuration data.", poolname, oldstr.c_str());
1039		if (nvroot != NULL)
1040			nvlist_free(nvroot);
1041		return (false);
1042	}
1043	if (nvlist_add_string(newvd, ZPOOL_CONFIG_TYPE, vdev_type) != 0
1044	 || nvlist_add_string(newvd, ZPOOL_CONFIG_PATH, path) != 0
1045	 || nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) != 0
1046	 || nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
1047				    &newvd, 1) != 0) {
1048		syslog(LOG_ERR, "Replace vdev(%s/%s): Unable to initialize "
1049		    "configuration data.", poolname, oldstr.c_str());
1050		nvlist_free(newvd);
1051		nvlist_free(nvroot);
1052		return (true);
1053	}
1054
1055	/* Data was copied when added to the root vdev. */
1056	nvlist_free(newvd);
1057
1058	retval = (zpool_vdev_attach(zhp, oldstr.c_str(), path, nvroot,
1059	    /*replace*/B_TRUE) == 0);
1060	if (retval)
1061		syslog(LOG_INFO, "Replacing vdev(%s/%s) with %s\n",
1062		    poolname, oldstr.c_str(), path);
1063	else
1064		syslog(LOG_ERR, "Replace vdev(%s/%s): %s: %s\n",
1065		    poolname, oldstr.c_str(), libzfs_error_action(g_zfsHandle),
1066		    libzfs_error_description(g_zfsHandle));
1067	nvlist_free(nvroot);
1068
1069	return (retval);
1070}
1071
1072/* Does the argument event refer to a checksum error? */
1073static bool
1074IsChecksumEvent(const Event* const event)
1075{
1076	return ("ereport.fs.zfs.checksum" == event->Value("type"));
1077}
1078
1079/* Does the argument event refer to an IO error? */
1080static bool
1081IsIOEvent(const Event* const event)
1082{
1083	return ("ereport.fs.zfs.io" == event->Value("type"));
1084}
1085
1086bool
1087CaseFile::ShouldDegrade() const
1088{
1089	return (std::count_if(m_events.begin(), m_events.end(),
1090			      IsChecksumEvent) > ZFS_DEGRADE_IO_COUNT);
1091}
1092
1093bool
1094CaseFile::ShouldFault() const
1095{
1096	return (std::count_if(m_events.begin(), m_events.end(),
1097			      IsIOEvent) > ZFS_DEGRADE_IO_COUNT);
1098}
1099
1100nvlist_t *
1101CaseFile::CaseVdev(zpool_handle_t *zhp) const
1102{
1103	return (VdevIterator(zhp).Find(VdevGUID()));
1104}
1105