libzfs_status.c revision 245479
1132720Skan/* 2132720Skan * CDDL HEADER START 3132720Skan * 4132720Skan * The contents of this file are subject to the terms of the 5132720Skan * Common Development and Distribution License (the "License"). 6132720Skan * You may not use this file except in compliance with the License. 7132720Skan * 8132720Skan * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9132720Skan * or http://www.opensolaris.org/os/licensing. 10132720Skan * See the License for the specific language governing permissions 11132720Skan * and limitations under the License. 12132720Skan * 13132720Skan * When distributing Covered Code, include this CDDL HEADER in each 14132720Skan * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15132720Skan * If applicable, add the following below this CDDL HEADER, with the 16132720Skan * fields enclosed by brackets "[]" replaced with your own identifying 17132720Skan * information: Portions Copyright [yyyy] [name of copyright owner] 18132720Skan * 19132720Skan * CDDL HEADER END 20132720Skan */ 21169691Skan 22132720Skan/* 23132720Skan * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 24132720Skan * Copyright (c) 2012 by Delphix. All rights reserved. 25132720Skan */ 26132720Skan 27132720Skan/* 28132720Skan * This file contains the functions which analyze the status of a pool. This 29132720Skan * include both the status of an active pool, as well as the status exported 30132720Skan * pools. Returns one of the ZPOOL_STATUS_* defines describing the status of 31132720Skan * the pool. This status is independent (to a certain degree) from the state of 32132720Skan * the pool. A pool's state describes only whether or not it is capable of 33169691Skan * providing the necessary fault tolerance for data. The status describes the 34132720Skan * overall status of devices. A pool that is online can still have a device 35132720Skan * that is experiencing errors. 36132720Skan * 37169691Skan * Only a subset of the possible faults can be detected using 'zpool status', 38169691Skan * and not all possible errors correspond to a FMA message ID. The explanation 39132720Skan * is left up to the caller, depending on whether it is a live pool or an 40132720Skan * import. 41132720Skan */ 42132720Skan 43132720Skan#include <libzfs.h> 44132720Skan#include <string.h> 45132720Skan#include <unistd.h> 46132720Skan#include "libzfs_impl.h" 47132720Skan#include "zfeature_common.h" 48132720Skan 49132720Skan/* 50132720Skan * Message ID table. This must be kept in sync with the ZPOOL_STATUS_* defines 51132720Skan * in libzfs.h. Note that there are some status results which go past the end 52169691Skan * of this table, and hence have no associated message ID. 53169691Skan */ 54132720Skanstatic char *zfs_msgid_table[] = { 55132720Skan "ZFS-8000-14", 56132720Skan "ZFS-8000-2Q", 57132720Skan "ZFS-8000-3C", 58132720Skan "ZFS-8000-4J", 59132720Skan "ZFS-8000-5E", 60169691Skan "ZFS-8000-6X", 61169691Skan "ZFS-8000-72", 62169691Skan "ZFS-8000-8A", 63169691Skan "ZFS-8000-9P", 64132720Skan "ZFS-8000-A5", 65169691Skan "ZFS-8000-EY", 66132720Skan "ZFS-8000-HC", 67132720Skan "ZFS-8000-JQ", 68132720Skan "ZFS-8000-K4", 69132720Skan}; 70 71#define NMSGID (sizeof (zfs_msgid_table) / sizeof (zfs_msgid_table[0])) 72 73/* ARGSUSED */ 74static int 75vdev_missing(uint64_t state, uint64_t aux, uint64_t errs) 76{ 77 return (state == VDEV_STATE_CANT_OPEN && 78 aux == VDEV_AUX_OPEN_FAILED); 79} 80 81/* ARGSUSED */ 82static int 83vdev_faulted(uint64_t state, uint64_t aux, uint64_t errs) 84{ 85 return (state == VDEV_STATE_FAULTED); 86} 87 88/* ARGSUSED */ 89static int 90vdev_errors(uint64_t state, uint64_t aux, uint64_t errs) 91{ 92 return (state == VDEV_STATE_DEGRADED || errs != 0); 93} 94 95/* ARGSUSED */ 96static int 97vdev_broken(uint64_t state, uint64_t aux, uint64_t errs) 98{ 99 return (state == VDEV_STATE_CANT_OPEN); 100} 101 102/* ARGSUSED */ 103static int 104vdev_offlined(uint64_t state, uint64_t aux, uint64_t errs) 105{ 106 return (state == VDEV_STATE_OFFLINE); 107} 108 109/* ARGSUSED */ 110static int 111vdev_removed(uint64_t state, uint64_t aux, uint64_t errs) 112{ 113 return (state == VDEV_STATE_REMOVED); 114} 115 116/* 117 * Detect if any leaf devices that have seen errors or could not be opened. 118 */ 119static boolean_t 120find_vdev_problem(nvlist_t *vdev, int (*func)(uint64_t, uint64_t, uint64_t)) 121{ 122 nvlist_t **child; 123 vdev_stat_t *vs; 124 uint_t c, children; 125 char *type; 126 127 /* 128 * Ignore problems within a 'replacing' vdev, since we're presumably in 129 * the process of repairing any such errors, and don't want to call them 130 * out again. We'll pick up the fact that a resilver is happening 131 * later. 132 */ 133 verify(nvlist_lookup_string(vdev, ZPOOL_CONFIG_TYPE, &type) == 0); 134 if (strcmp(type, VDEV_TYPE_REPLACING) == 0) 135 return (B_FALSE); 136 137 if (nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_CHILDREN, &child, 138 &children) == 0) { 139 for (c = 0; c < children; c++) 140 if (find_vdev_problem(child[c], func)) 141 return (B_TRUE); 142 } else { 143 verify(nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_VDEV_STATS, 144 (uint64_t **)&vs, &c) == 0); 145 146 if (func(vs->vs_state, vs->vs_aux, 147 vs->vs_read_errors + 148 vs->vs_write_errors + 149 vs->vs_checksum_errors)) 150 return (B_TRUE); 151 } 152 153 /* 154 * Check any L2 cache devs 155 */ 156 if (nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_L2CACHE, &child, 157 &children) == 0) { 158 for (c = 0; c < children; c++) 159 if (find_vdev_problem(child[c], func)) 160 return (B_TRUE); 161 } 162 163 return (B_FALSE); 164} 165 166/* 167 * Active pool health status. 168 * 169 * To determine the status for a pool, we make several passes over the config, 170 * picking the most egregious error we find. In order of importance, we do the 171 * following: 172 * 173 * - Check for a complete and valid configuration 174 * - Look for any faulted or missing devices in a non-replicated config 175 * - Check for any data errors 176 * - Check for any faulted or missing devices in a replicated config 177 * - Look for any devices showing errors 178 * - Check for any resilvering devices 179 * 180 * There can obviously be multiple errors within a single pool, so this routine 181 * only picks the most damaging of all the current errors to report. 182 */ 183static zpool_status_t 184check_status(nvlist_t *config, boolean_t isimport) 185{ 186 nvlist_t *nvroot; 187 vdev_stat_t *vs; 188 pool_scan_stat_t *ps = NULL; 189 uint_t vsc, psc; 190 uint64_t nerr; 191 uint64_t version; 192 uint64_t stateval; 193 uint64_t suspended; 194 uint64_t hostid = 0; 195 196 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, 197 &version) == 0); 198 verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 199 &nvroot) == 0); 200 verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS, 201 (uint64_t **)&vs, &vsc) == 0); 202 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE, 203 &stateval) == 0); 204 205 /* 206 * Currently resilvering a vdev 207 */ 208 (void) nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_SCAN_STATS, 209 (uint64_t **)&ps, &psc); 210 if (ps && ps->pss_func == POOL_SCAN_RESILVER && 211 ps->pss_state == DSS_SCANNING) 212 return (ZPOOL_STATUS_RESILVERING); 213 214 /* 215 * Pool last accessed by another system. 216 */ 217 (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid); 218 if (hostid != 0 && (unsigned long)hostid != gethostid() && 219 stateval == POOL_STATE_ACTIVE) 220 return (ZPOOL_STATUS_HOSTID_MISMATCH); 221 222 /* 223 * Newer on-disk version. 224 */ 225 if (vs->vs_state == VDEV_STATE_CANT_OPEN && 226 vs->vs_aux == VDEV_AUX_VERSION_NEWER) 227 return (ZPOOL_STATUS_VERSION_NEWER); 228 229 /* 230 * Unsupported feature(s). 231 */ 232 if (vs->vs_state == VDEV_STATE_CANT_OPEN && 233 vs->vs_aux == VDEV_AUX_UNSUP_FEAT) { 234 nvlist_t *nvinfo; 235 236 verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, 237 &nvinfo) == 0); 238 if (nvlist_exists(nvinfo, ZPOOL_CONFIG_CAN_RDONLY)) 239 return (ZPOOL_STATUS_UNSUP_FEAT_WRITE); 240 return (ZPOOL_STATUS_UNSUP_FEAT_READ); 241 } 242 243 /* 244 * Check that the config is complete. 245 */ 246 if (vs->vs_state == VDEV_STATE_CANT_OPEN && 247 vs->vs_aux == VDEV_AUX_BAD_GUID_SUM) 248 return (ZPOOL_STATUS_BAD_GUID_SUM); 249 250 /* 251 * Check whether the pool has suspended due to failed I/O. 252 */ 253 if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_SUSPENDED, 254 &suspended) == 0) { 255 if (suspended == ZIO_FAILURE_MODE_CONTINUE) 256 return (ZPOOL_STATUS_IO_FAILURE_CONTINUE); 257 return (ZPOOL_STATUS_IO_FAILURE_WAIT); 258 } 259 260 /* 261 * Could not read a log. 262 */ 263 if (vs->vs_state == VDEV_STATE_CANT_OPEN && 264 vs->vs_aux == VDEV_AUX_BAD_LOG) { 265 return (ZPOOL_STATUS_BAD_LOG); 266 } 267 268 /* 269 * Bad devices in non-replicated config. 270 */ 271 if (vs->vs_state == VDEV_STATE_CANT_OPEN && 272 find_vdev_problem(nvroot, vdev_faulted)) 273 return (ZPOOL_STATUS_FAULTED_DEV_NR); 274 275 if (vs->vs_state == VDEV_STATE_CANT_OPEN && 276 find_vdev_problem(nvroot, vdev_missing)) 277 return (ZPOOL_STATUS_MISSING_DEV_NR); 278 279 if (vs->vs_state == VDEV_STATE_CANT_OPEN && 280 find_vdev_problem(nvroot, vdev_broken)) 281 return (ZPOOL_STATUS_CORRUPT_LABEL_NR); 282 283 /* 284 * Corrupted pool metadata 285 */ 286 if (vs->vs_state == VDEV_STATE_CANT_OPEN && 287 vs->vs_aux == VDEV_AUX_CORRUPT_DATA) 288 return (ZPOOL_STATUS_CORRUPT_POOL); 289 290 /* 291 * Persistent data errors. 292 */ 293 if (!isimport) { 294 if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_ERRCOUNT, 295 &nerr) == 0 && nerr != 0) 296 return (ZPOOL_STATUS_CORRUPT_DATA); 297 } 298 299 /* 300 * Missing devices in a replicated config. 301 */ 302 if (find_vdev_problem(nvroot, vdev_faulted)) 303 return (ZPOOL_STATUS_FAULTED_DEV_R); 304 if (find_vdev_problem(nvroot, vdev_missing)) 305 return (ZPOOL_STATUS_MISSING_DEV_R); 306 if (find_vdev_problem(nvroot, vdev_broken)) 307 return (ZPOOL_STATUS_CORRUPT_LABEL_R); 308 309 /* 310 * Devices with errors 311 */ 312 if (!isimport && find_vdev_problem(nvroot, vdev_errors)) 313 return (ZPOOL_STATUS_FAILING_DEV); 314 315 /* 316 * Offlined devices 317 */ 318 if (find_vdev_problem(nvroot, vdev_offlined)) 319 return (ZPOOL_STATUS_OFFLINE_DEV); 320 321 /* 322 * Removed device 323 */ 324 if (find_vdev_problem(nvroot, vdev_removed)) 325 return (ZPOOL_STATUS_REMOVED_DEV); 326 327 /* 328 * Outdated, but usable, version 329 */ 330 if (SPA_VERSION_IS_SUPPORTED(version) && version != SPA_VERSION) 331 return (ZPOOL_STATUS_VERSION_OLDER); 332 333 /* 334 * Usable pool with disabled features 335 */ 336 if (version >= SPA_VERSION_FEATURES) { 337 int i; 338 nvlist_t *feat; 339 340 if (isimport) { 341 feat = fnvlist_lookup_nvlist(config, 342 ZPOOL_CONFIG_LOAD_INFO); 343 feat = fnvlist_lookup_nvlist(feat, 344 ZPOOL_CONFIG_ENABLED_FEAT); 345 } else { 346 feat = fnvlist_lookup_nvlist(config, 347 ZPOOL_CONFIG_FEATURE_STATS); 348 } 349 350 for (i = 0; i < SPA_FEATURES; i++) { 351 zfeature_info_t *fi = &spa_feature_table[i]; 352 if (!nvlist_exists(feat, fi->fi_guid)) 353 return (ZPOOL_STATUS_FEAT_DISABLED); 354 } 355 } 356 357 return (ZPOOL_STATUS_OK); 358} 359 360zpool_status_t 361zpool_get_status(zpool_handle_t *zhp, char **msgid) 362{ 363 zpool_status_t ret = check_status(zhp->zpool_config, B_FALSE); 364 365 if (ret >= NMSGID) 366 *msgid = NULL; 367 else 368 *msgid = zfs_msgid_table[ret]; 369 370 return (ret); 371} 372 373zpool_status_t 374zpool_import_status(nvlist_t *config, char **msgid) 375{ 376 zpool_status_t ret = check_status(config, B_TRUE); 377 378 if (ret >= NMSGID) 379 *msgid = NULL; 380 else 381 *msgid = zfs_msgid_table[ret]; 382 383 return (ret); 384} 385 386static void 387dump_ddt_stat(const ddt_stat_t *dds, int h) 388{ 389 char refcnt[6]; 390 char blocks[6], lsize[6], psize[6], dsize[6]; 391 char ref_blocks[6], ref_lsize[6], ref_psize[6], ref_dsize[6]; 392 393 if (dds == NULL || dds->dds_blocks == 0) 394 return; 395 396 if (h == -1) 397 (void) strcpy(refcnt, "Total"); 398 else 399 zfs_nicenum(1ULL << h, refcnt, sizeof (refcnt)); 400 401 zfs_nicenum(dds->dds_blocks, blocks, sizeof (blocks)); 402 zfs_nicenum(dds->dds_lsize, lsize, sizeof (lsize)); 403 zfs_nicenum(dds->dds_psize, psize, sizeof (psize)); 404 zfs_nicenum(dds->dds_dsize, dsize, sizeof (dsize)); 405 zfs_nicenum(dds->dds_ref_blocks, ref_blocks, sizeof (ref_blocks)); 406 zfs_nicenum(dds->dds_ref_lsize, ref_lsize, sizeof (ref_lsize)); 407 zfs_nicenum(dds->dds_ref_psize, ref_psize, sizeof (ref_psize)); 408 zfs_nicenum(dds->dds_ref_dsize, ref_dsize, sizeof (ref_dsize)); 409 410 (void) printf("%6s %6s %5s %5s %5s %6s %5s %5s %5s\n", 411 refcnt, 412 blocks, lsize, psize, dsize, 413 ref_blocks, ref_lsize, ref_psize, ref_dsize); 414} 415 416/* 417 * Print the DDT histogram and the column totals. 418 */ 419void 420zpool_dump_ddt(const ddt_stat_t *dds_total, const ddt_histogram_t *ddh) 421{ 422 int h; 423 424 (void) printf("\n"); 425 426 (void) printf("bucket " 427 " allocated " 428 " referenced \n"); 429 (void) printf("______ " 430 "______________________________ " 431 "______________________________\n"); 432 433 (void) printf("%6s %6s %5s %5s %5s %6s %5s %5s %5s\n", 434 "refcnt", 435 "blocks", "LSIZE", "PSIZE", "DSIZE", 436 "blocks", "LSIZE", "PSIZE", "DSIZE"); 437 438 (void) printf("%6s %6s %5s %5s %5s %6s %5s %5s %5s\n", 439 "------", 440 "------", "-----", "-----", "-----", 441 "------", "-----", "-----", "-----"); 442 443 for (h = 0; h < 64; h++) 444 dump_ddt_stat(&ddh->ddh_stat[h], h); 445 446 dump_ddt_stat(dds_total, -1); 447 448 (void) printf("\n"); 449} 450