1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2012 by Delphix. All rights reserved.
25 * Copyright (c) 2013 Steven Hartland. All rights reserved.
26 * Copyright (c) 2021, Colm Buckley <colm@tuatha.org>
27 */
28
29/*
30 * This file contains the functions which analyze the status of a pool.  This
31 * include both the status of an active pool, as well as the status exported
32 * pools.  Returns one of the ZPOOL_STATUS_* defines describing the status of
33 * the pool.  This status is independent (to a certain degree) from the state of
34 * the pool.  A pool's state describes only whether or not it is capable of
35 * providing the necessary fault tolerance for data.  The status describes the
36 * overall status of devices.  A pool that is online can still have a device
37 * that is experiencing errors.
38 *
39 * Only a subset of the possible faults can be detected using 'zpool status',
40 * and not all possible errors correspond to a FMA message ID.  The explanation
41 * is left up to the caller, depending on whether it is a live pool or an
42 * import.
43 */
44
45#include <libzfs.h>
46#include <libzutil.h>
47#include <stdlib.h>
48#include <string.h>
49#include <unistd.h>
50#include <sys/systeminfo.h>
51#include "libzfs_impl.h"
52#include "zfeature_common.h"
53
54/*
55 * Message ID table.  This must be kept in sync with the ZPOOL_STATUS_* defines
56 * in include/libzfs.h.  Note that there are some status results which go past
57 * the end of this table, and hence have no associated message ID.
58 */
59static const char *const zfs_msgid_table[] = {
60	"ZFS-8000-14", /* ZPOOL_STATUS_CORRUPT_CACHE */
61	"ZFS-8000-2Q", /* ZPOOL_STATUS_MISSING_DEV_R */
62	"ZFS-8000-3C", /* ZPOOL_STATUS_MISSING_DEV_NR */
63	"ZFS-8000-4J", /* ZPOOL_STATUS_CORRUPT_LABEL_R */
64	"ZFS-8000-5E", /* ZPOOL_STATUS_CORRUPT_LABEL_NR */
65	"ZFS-8000-6X", /* ZPOOL_STATUS_BAD_GUID_SUM */
66	"ZFS-8000-72", /* ZPOOL_STATUS_CORRUPT_POOL */
67	"ZFS-8000-8A", /* ZPOOL_STATUS_CORRUPT_DATA */
68	"ZFS-8000-9P", /* ZPOOL_STATUS_FAILING_DEV */
69	"ZFS-8000-A5", /* ZPOOL_STATUS_VERSION_NEWER */
70	"ZFS-8000-EY", /* ZPOOL_STATUS_HOSTID_MISMATCH */
71	"ZFS-8000-EY", /* ZPOOL_STATUS_HOSTID_ACTIVE */
72	"ZFS-8000-EY", /* ZPOOL_STATUS_HOSTID_REQUIRED */
73	"ZFS-8000-HC", /* ZPOOL_STATUS_IO_FAILURE_WAIT */
74	"ZFS-8000-JQ", /* ZPOOL_STATUS_IO_FAILURE_CONTINUE */
75	"ZFS-8000-MM", /* ZPOOL_STATUS_IO_FAILURE_MMP */
76	"ZFS-8000-K4", /* ZPOOL_STATUS_BAD_LOG */
77	"ZFS-8000-ER", /* ZPOOL_STATUS_ERRATA */
78	/*
79	 * The following results have no message ID.
80	 *	ZPOOL_STATUS_UNSUP_FEAT_READ
81	 *	ZPOOL_STATUS_UNSUP_FEAT_WRITE
82	 *	ZPOOL_STATUS_FAULTED_DEV_R
83	 *	ZPOOL_STATUS_FAULTED_DEV_NR
84	 *	ZPOOL_STATUS_VERSION_OLDER
85	 *	ZPOOL_STATUS_FEAT_DISABLED
86	 *	ZPOOL_STATUS_RESILVERING
87	 *	ZPOOL_STATUS_OFFLINE_DEV
88	 *	ZPOOL_STATUS_REMOVED_DEV
89	 *	ZPOOL_STATUS_REBUILDING
90	 *	ZPOOL_STATUS_REBUILD_SCRUB
91	 *	ZPOOL_STATUS_COMPATIBILITY_ERR
92	 *	ZPOOL_STATUS_INCOMPATIBLE_FEAT
93	 *	ZPOOL_STATUS_OK
94	 */
95};
96
97#define	NMSGID	(sizeof (zfs_msgid_table) / sizeof (zfs_msgid_table[0]))
98
99static int
100vdev_missing(vdev_stat_t *vs, uint_t vsc)
101{
102	(void) vsc;
103	return (vs->vs_state == VDEV_STATE_CANT_OPEN &&
104	    vs->vs_aux == VDEV_AUX_OPEN_FAILED);
105}
106
107static int
108vdev_faulted(vdev_stat_t *vs, uint_t vsc)
109{
110	(void) vsc;
111	return (vs->vs_state == VDEV_STATE_FAULTED);
112}
113
114static int
115vdev_errors(vdev_stat_t *vs, uint_t vsc)
116{
117	(void) vsc;
118	return (vs->vs_state == VDEV_STATE_DEGRADED ||
119	    vs->vs_read_errors != 0 || vs->vs_write_errors != 0 ||
120	    vs->vs_checksum_errors != 0);
121}
122
123static int
124vdev_broken(vdev_stat_t *vs, uint_t vsc)
125{
126	(void) vsc;
127	return (vs->vs_state == VDEV_STATE_CANT_OPEN);
128}
129
130static int
131vdev_offlined(vdev_stat_t *vs, uint_t vsc)
132{
133	(void) vsc;
134	return (vs->vs_state == VDEV_STATE_OFFLINE);
135}
136
137static int
138vdev_removed(vdev_stat_t *vs, uint_t vsc)
139{
140	(void) vsc;
141	return (vs->vs_state == VDEV_STATE_REMOVED);
142}
143
144static int
145vdev_non_native_ashift(vdev_stat_t *vs, uint_t vsc)
146{
147	if (getenv("ZPOOL_STATUS_NON_NATIVE_ASHIFT_IGNORE") != NULL)
148		return (0);
149
150	return (VDEV_STAT_VALID(vs_physical_ashift, vsc) &&
151	    vs->vs_configured_ashift < vs->vs_physical_ashift);
152}
153
154/*
155 * Detect if any leaf devices that have seen errors or could not be opened.
156 */
157static boolean_t
158find_vdev_problem(nvlist_t *vdev, int (*func)(vdev_stat_t *, uint_t),
159    boolean_t ignore_replacing)
160{
161	nvlist_t **child;
162	uint_t c, children;
163
164	/*
165	 * Ignore problems within a 'replacing' vdev, since we're presumably in
166	 * the process of repairing any such errors, and don't want to call them
167	 * out again.  We'll pick up the fact that a resilver is happening
168	 * later.
169	 */
170	if (ignore_replacing == B_TRUE) {
171		const char *type = fnvlist_lookup_string(vdev,
172		    ZPOOL_CONFIG_TYPE);
173		if (strcmp(type, VDEV_TYPE_REPLACING) == 0)
174			return (B_FALSE);
175	}
176
177	if (nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_CHILDREN, &child,
178	    &children) == 0) {
179		for (c = 0; c < children; c++)
180			if (find_vdev_problem(child[c], func, ignore_replacing))
181				return (B_TRUE);
182	} else {
183		uint_t vsc;
184		vdev_stat_t *vs = (vdev_stat_t *)fnvlist_lookup_uint64_array(
185		    vdev, ZPOOL_CONFIG_VDEV_STATS, &vsc);
186		if (func(vs, vsc) != 0)
187			return (B_TRUE);
188	}
189
190	/*
191	 * Check any L2 cache devs
192	 */
193	if (nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_L2CACHE, &child,
194	    &children) == 0) {
195		for (c = 0; c < children; c++)
196			if (find_vdev_problem(child[c], func, ignore_replacing))
197				return (B_TRUE);
198	}
199
200	return (B_FALSE);
201}
202
203/*
204 * Active pool health status.
205 *
206 * To determine the status for a pool, we make several passes over the config,
207 * picking the most egregious error we find.  In order of importance, we do the
208 * following:
209 *
210 *	- Check for a complete and valid configuration
211 *	- Look for any faulted or missing devices in a non-replicated config
212 *	- Check for any data errors
213 *	- Check for any faulted or missing devices in a replicated config
214 *	- Look for any devices showing errors
215 *	- Check for any resilvering or rebuilding devices
216 *
217 * There can obviously be multiple errors within a single pool, so this routine
218 * only picks the most damaging of all the current errors to report.
219 */
220static zpool_status_t
221check_status(nvlist_t *config, boolean_t isimport,
222    zpool_errata_t *erratap, const char *compat)
223{
224	pool_scan_stat_t *ps = NULL;
225	uint_t vsc, psc;
226	uint64_t suspended;
227	uint64_t hostid = 0;
228	uint64_t errata = 0;
229	unsigned long system_hostid = get_system_hostid();
230
231	uint64_t version = fnvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION);
232	nvlist_t *nvroot = fnvlist_lookup_nvlist(config,
233	    ZPOOL_CONFIG_VDEV_TREE);
234	vdev_stat_t *vs = (vdev_stat_t *)fnvlist_lookup_uint64_array(nvroot,
235	    ZPOOL_CONFIG_VDEV_STATS, &vsc);
236	uint64_t stateval = fnvlist_lookup_uint64(config,
237	    ZPOOL_CONFIG_POOL_STATE);
238
239	/*
240	 * Currently resilvering a vdev
241	 */
242	(void) nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_SCAN_STATS,
243	    (uint64_t **)&ps, &psc);
244	if (ps != NULL && ps->pss_func == POOL_SCAN_RESILVER &&
245	    ps->pss_state == DSS_SCANNING)
246		return (ZPOOL_STATUS_RESILVERING);
247
248	/*
249	 * Currently rebuilding a vdev, check top-level vdevs.
250	 */
251	vdev_rebuild_stat_t *vrs = NULL;
252	nvlist_t **child;
253	uint_t c, i, children;
254	uint64_t rebuild_end_time = 0;
255	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
256	    &child, &children) == 0) {
257		for (c = 0; c < children; c++) {
258			if ((nvlist_lookup_uint64_array(child[c],
259			    ZPOOL_CONFIG_REBUILD_STATS,
260			    (uint64_t **)&vrs, &i) == 0) && (vrs != NULL)) {
261				uint64_t state = vrs->vrs_state;
262
263				if (state == VDEV_REBUILD_ACTIVE) {
264					return (ZPOOL_STATUS_REBUILDING);
265				} else if (state == VDEV_REBUILD_COMPLETE &&
266				    vrs->vrs_end_time > rebuild_end_time) {
267					rebuild_end_time = vrs->vrs_end_time;
268				}
269			}
270		}
271
272		/*
273		 * If we can determine when the last scrub was run, and it
274		 * was before the last rebuild completed, then recommend
275		 * that the pool be scrubbed to verify all checksums.  When
276		 * ps is NULL we can infer the pool has never been scrubbed.
277		 */
278		if (rebuild_end_time > 0) {
279			if (ps != NULL) {
280				if ((ps->pss_state == DSS_FINISHED &&
281				    ps->pss_func == POOL_SCAN_SCRUB &&
282				    rebuild_end_time > ps->pss_end_time) ||
283				    ps->pss_state == DSS_NONE)
284					return (ZPOOL_STATUS_REBUILD_SCRUB);
285			} else {
286				return (ZPOOL_STATUS_REBUILD_SCRUB);
287			}
288		}
289	}
290
291	/*
292	 * The multihost property is set and the pool may be active.
293	 */
294	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
295	    vs->vs_aux == VDEV_AUX_ACTIVE) {
296		mmp_state_t mmp_state;
297		nvlist_t *nvinfo;
298
299		nvinfo = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO);
300		mmp_state = fnvlist_lookup_uint64(nvinfo,
301		    ZPOOL_CONFIG_MMP_STATE);
302
303		if (mmp_state == MMP_STATE_ACTIVE)
304			return (ZPOOL_STATUS_HOSTID_ACTIVE);
305		else if (mmp_state == MMP_STATE_NO_HOSTID)
306			return (ZPOOL_STATUS_HOSTID_REQUIRED);
307		else
308			return (ZPOOL_STATUS_HOSTID_MISMATCH);
309	}
310
311	/*
312	 * Pool last accessed by another system.
313	 */
314	(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid);
315	if (hostid != 0 && (unsigned long)hostid != system_hostid &&
316	    stateval == POOL_STATE_ACTIVE)
317		return (ZPOOL_STATUS_HOSTID_MISMATCH);
318
319	/*
320	 * Newer on-disk version.
321	 */
322	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
323	    vs->vs_aux == VDEV_AUX_VERSION_NEWER)
324		return (ZPOOL_STATUS_VERSION_NEWER);
325
326	/*
327	 * Unsupported feature(s).
328	 */
329	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
330	    vs->vs_aux == VDEV_AUX_UNSUP_FEAT) {
331		nvlist_t *nvinfo = fnvlist_lookup_nvlist(config,
332		    ZPOOL_CONFIG_LOAD_INFO);
333		if (nvlist_exists(nvinfo, ZPOOL_CONFIG_CAN_RDONLY))
334			return (ZPOOL_STATUS_UNSUP_FEAT_WRITE);
335		return (ZPOOL_STATUS_UNSUP_FEAT_READ);
336	}
337
338	/*
339	 * Check that the config is complete.
340	 */
341	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
342	    vs->vs_aux == VDEV_AUX_BAD_GUID_SUM)
343		return (ZPOOL_STATUS_BAD_GUID_SUM);
344
345	/*
346	 * Check whether the pool has suspended.
347	 */
348	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_SUSPENDED,
349	    &suspended) == 0) {
350		uint64_t reason;
351
352		if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_SUSPENDED_REASON,
353		    &reason) == 0 && reason == ZIO_SUSPEND_MMP)
354			return (ZPOOL_STATUS_IO_FAILURE_MMP);
355
356		if (suspended == ZIO_FAILURE_MODE_CONTINUE)
357			return (ZPOOL_STATUS_IO_FAILURE_CONTINUE);
358		return (ZPOOL_STATUS_IO_FAILURE_WAIT);
359	}
360
361	/*
362	 * Could not read a log.
363	 */
364	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
365	    vs->vs_aux == VDEV_AUX_BAD_LOG) {
366		return (ZPOOL_STATUS_BAD_LOG);
367	}
368
369	/*
370	 * Bad devices in non-replicated config.
371	 */
372	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
373	    find_vdev_problem(nvroot, vdev_faulted, B_TRUE))
374		return (ZPOOL_STATUS_FAULTED_DEV_NR);
375
376	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
377	    find_vdev_problem(nvroot, vdev_missing, B_TRUE))
378		return (ZPOOL_STATUS_MISSING_DEV_NR);
379
380	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
381	    find_vdev_problem(nvroot, vdev_broken, B_TRUE))
382		return (ZPOOL_STATUS_CORRUPT_LABEL_NR);
383
384	/*
385	 * Corrupted pool metadata
386	 */
387	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
388	    vs->vs_aux == VDEV_AUX_CORRUPT_DATA)
389		return (ZPOOL_STATUS_CORRUPT_POOL);
390
391	/*
392	 * Persistent data errors.
393	 */
394	if (!isimport) {
395		uint64_t nerr;
396		if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_ERRCOUNT,
397		    &nerr) == 0 && nerr != 0)
398			return (ZPOOL_STATUS_CORRUPT_DATA);
399	}
400
401	/*
402	 * Missing devices in a replicated config.
403	 */
404	if (find_vdev_problem(nvroot, vdev_faulted, B_TRUE))
405		return (ZPOOL_STATUS_FAULTED_DEV_R);
406	if (find_vdev_problem(nvroot, vdev_missing, B_TRUE))
407		return (ZPOOL_STATUS_MISSING_DEV_R);
408	if (find_vdev_problem(nvroot, vdev_broken, B_TRUE))
409		return (ZPOOL_STATUS_CORRUPT_LABEL_R);
410
411	/*
412	 * Devices with errors
413	 */
414	if (!isimport && find_vdev_problem(nvroot, vdev_errors, B_TRUE))
415		return (ZPOOL_STATUS_FAILING_DEV);
416
417	/*
418	 * Offlined devices
419	 */
420	if (find_vdev_problem(nvroot, vdev_offlined, B_TRUE))
421		return (ZPOOL_STATUS_OFFLINE_DEV);
422
423	/*
424	 * Removed device
425	 */
426	if (find_vdev_problem(nvroot, vdev_removed, B_TRUE))
427		return (ZPOOL_STATUS_REMOVED_DEV);
428
429	/*
430	 * Suboptimal, but usable, ashift configuration.
431	 */
432	if (find_vdev_problem(nvroot, vdev_non_native_ashift, B_FALSE))
433		return (ZPOOL_STATUS_NON_NATIVE_ASHIFT);
434
435	/*
436	 * Informational errata available.
437	 */
438	(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_ERRATA, &errata);
439	if (errata) {
440		*erratap = errata;
441		return (ZPOOL_STATUS_ERRATA);
442	}
443
444	/*
445	 * Outdated, but usable, version
446	 */
447	if (SPA_VERSION_IS_SUPPORTED(version) && version != SPA_VERSION) {
448		/* "legacy" compatibility disables old version reporting */
449		if (compat != NULL && strcmp(compat, ZPOOL_COMPAT_LEGACY) == 0)
450			return (ZPOOL_STATUS_OK);
451		else
452			return (ZPOOL_STATUS_VERSION_OLDER);
453	}
454
455	/*
456	 * Usable pool with disabled or superfluous features
457	 * (superfluous = beyond what's requested by 'compatibility')
458	 */
459	if (version >= SPA_VERSION_FEATURES) {
460		int i;
461		nvlist_t *feat;
462
463		if (isimport) {
464			feat = fnvlist_lookup_nvlist(config,
465			    ZPOOL_CONFIG_LOAD_INFO);
466			if (nvlist_exists(feat, ZPOOL_CONFIG_ENABLED_FEAT))
467				feat = fnvlist_lookup_nvlist(feat,
468				    ZPOOL_CONFIG_ENABLED_FEAT);
469		} else {
470			feat = fnvlist_lookup_nvlist(config,
471			    ZPOOL_CONFIG_FEATURE_STATS);
472		}
473
474		/* check against all features, or limited set? */
475		boolean_t c_features[SPA_FEATURES];
476
477		switch (zpool_load_compat(compat, c_features, NULL, 0)) {
478		case ZPOOL_COMPATIBILITY_OK:
479		case ZPOOL_COMPATIBILITY_WARNTOKEN:
480			break;
481		default:
482			return (ZPOOL_STATUS_COMPATIBILITY_ERR);
483		}
484		for (i = 0; i < SPA_FEATURES; i++) {
485			zfeature_info_t *fi = &spa_feature_table[i];
486			if (!fi->fi_zfs_mod_supported)
487				continue;
488			if (c_features[i] && !nvlist_exists(feat, fi->fi_guid))
489				return (ZPOOL_STATUS_FEAT_DISABLED);
490			if (!c_features[i] && nvlist_exists(feat, fi->fi_guid))
491				return (ZPOOL_STATUS_INCOMPATIBLE_FEAT);
492		}
493	}
494
495	return (ZPOOL_STATUS_OK);
496}
497
498zpool_status_t
499zpool_get_status(zpool_handle_t *zhp, const char **msgid,
500    zpool_errata_t *errata)
501{
502	/*
503	 * pass in the desired feature set, as
504	 * it affects check for disabled features
505	 */
506	char compatibility[ZFS_MAXPROPLEN];
507	if (zpool_get_prop(zhp, ZPOOL_PROP_COMPATIBILITY, compatibility,
508	    ZFS_MAXPROPLEN, NULL, B_FALSE) != 0)
509		compatibility[0] = '\0';
510
511	zpool_status_t ret = check_status(zhp->zpool_config, B_FALSE, errata,
512	    compatibility);
513
514	if (msgid != NULL) {
515		if (ret >= NMSGID)
516			*msgid = NULL;
517		else
518			*msgid = zfs_msgid_table[ret];
519	}
520	return (ret);
521}
522
523zpool_status_t
524zpool_import_status(nvlist_t *config, const char **msgid,
525    zpool_errata_t *errata)
526{
527	zpool_status_t ret = check_status(config, B_TRUE, errata, NULL);
528
529	if (ret >= NMSGID)
530		*msgid = NULL;
531	else
532		*msgid = zfs_msgid_table[ret];
533
534	return (ret);
535}
536