zio_inject.c revision 10921:8aac17999e4d
1276478Sngie/* 2272343Sngie * CDDL HEADER START 3272343Sngie * 4272343Sngie * The contents of this file are subject to the terms of the 5272343Sngie * Common Development and Distribution License (the "License"). 6272343Sngie * You may not use this file except in compliance with the License. 7272343Sngie * 8272343Sngie * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9272343Sngie * or http://www.opensolaris.org/os/licensing. 10272343Sngie * See the License for the specific language governing permissions 11272343Sngie * and limitations under the License. 12272343Sngie * 13272343Sngie * When distributing Covered Code, include this CDDL HEADER in each 14272343Sngie * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15272343Sngie * If applicable, add the following below this CDDL HEADER, with the 16272343Sngie * fields enclosed by brackets "[]" replaced with your own identifying 17272343Sngie * information: Portions Copyright [yyyy] [name of copyright owner] 18272343Sngie * 19272343Sngie * CDDL HEADER END 20272343Sngie */ 21272343Sngie/* 22272343Sngie * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23272343Sngie * Use is subject to license terms. 24272343Sngie */ 25272343Sngie 26272343Sngie/* 27272343Sngie * ZFS fault injection 28272343Sngie * 29272343Sngie * To handle fault injection, we keep track of a series of zinject_record_t 30272343Sngie * structures which describe which logical block(s) should be injected with a 31272343Sngie * fault. These are kept in a global list. Each record corresponds to a given 32276478Sngie * spa_t and maintains a special hold on the spa_t so that it cannot be deleted 33272343Sngie * or exported while the injection record exists. 34272343Sngie * 35272343Sngie * Device level injection is done using the 'zi_guid' field. If this is set, it 36272343Sngie * means that the error is destined for a particular device, not a piece of 37272343Sngie * data. 38272343Sngie * 39272343Sngie * This is a rather poor data structure and algorithm, but we don't expect more 40272343Sngie * than a few faults at any one time, so it should be sufficient for our needs. 41272343Sngie */ 42272343Sngie 43272343Sngie#include <sys/arc.h> 44272343Sngie#include <sys/zio_impl.h> 45272343Sngie#include <sys/zfs_ioctl.h> 46272343Sngie#include <sys/spa_impl.h> 47272343Sngie#include <sys/vdev_impl.h> 48272343Sngie#include <sys/fs/zfs.h> 49272343Sngie 50272343Sngieuint32_t zio_injection_enabled; 51272343Sngie 52272343Sngietypedef struct inject_handler { 53272343Sngie int zi_id; 54272343Sngie spa_t *zi_spa; 55272343Sngie zinject_record_t zi_record; 56272343Sngie list_node_t zi_link; 57272343Sngie} inject_handler_t; 58272343Sngie 59272343Sngiestatic list_t inject_handlers; 60272343Sngiestatic krwlock_t inject_lock; 61272343Sngiestatic int inject_next_id = 1; 62272343Sngie 63272343Sngie/* 64272343Sngie * Returns true if the given record matches the I/O in progress. 65272343Sngie */ 66272343Sngiestatic boolean_t 67272343Sngiezio_match_handler(zbookmark_t *zb, uint64_t type, 68272343Sngie zinject_record_t *record, int error) 69272343Sngie{ 70272343Sngie /* 71272343Sngie * Check for a match against the MOS, which is based on type 72272343Sngie */ 73272343Sngie if (zb->zb_objset == 0 && record->zi_objset == 0 && 74272343Sngie record->zi_object == 0) { 75272343Sngie if (record->zi_type == DMU_OT_NONE || 76272343Sngie type == record->zi_type) 77272343Sngie return (record->zi_freq == 0 || 78272343Sngie spa_get_random(100) < record->zi_freq); 79272343Sngie else 80272343Sngie return (B_FALSE); 81272343Sngie } 82272343Sngie 83272343Sngie /* 84272343Sngie * Check for an exact match. 85272343Sngie */ 86272343Sngie if (zb->zb_objset == record->zi_objset && 87272343Sngie zb->zb_object == record->zi_object && 88272343Sngie zb->zb_level == record->zi_level && 89272343Sngie zb->zb_blkid >= record->zi_start && 90272343Sngie zb->zb_blkid <= record->zi_end && 91272343Sngie error == record->zi_error) 92272343Sngie return (record->zi_freq == 0 || 93272343Sngie spa_get_random(100) < record->zi_freq); 94272343Sngie 95272343Sngie return (B_FALSE); 96272343Sngie} 97272343Sngie 98272343Sngie/* 99272343Sngie * Panic the system when a config change happens in the function 100272343Sngie * specified by tag. 101272343Sngie */ 102272343Sngievoid 103272343Sngiezio_handle_panic_injection(spa_t *spa, char *tag) 104272343Sngie{ 105272343Sngie inject_handler_t *handler; 106272343Sngie 107272343Sngie rw_enter(&inject_lock, RW_READER); 108272343Sngie 109272343Sngie for (handler = list_head(&inject_handlers); handler != NULL; 110272343Sngie handler = list_next(&inject_handlers, handler)) { 111272343Sngie 112272343Sngie if (spa != handler->zi_spa) 113272343Sngie continue; 114272343Sngie 115272343Sngie if (strcmp(tag, handler->zi_record.zi_func) == 0) 116272343Sngie panic("Panic requested in function %s\n", tag); 117272343Sngie } 118272343Sngie 119272343Sngie rw_exit(&inject_lock); 120272343Sngie} 121272343Sngie 122272343Sngie/* 123272343Sngie * Determine if the I/O in question should return failure. Returns the errno 124272343Sngie * to be returned to the caller. 125272343Sngie */ 126272343Sngieint 127272343Sngiezio_handle_fault_injection(zio_t *zio, int error) 128272343Sngie{ 129272343Sngie int ret = 0; 130272343Sngie inject_handler_t *handler; 131272343Sngie 132272343Sngie /* 133272343Sngie * Ignore I/O not associated with any logical data. 134272343Sngie */ 135272343Sngie if (zio->io_logical == NULL) 136272343Sngie return (0); 137272343Sngie 138272343Sngie /* 139272343Sngie * Currently, we only support fault injection on reads. 140272343Sngie */ 141272343Sngie if (zio->io_type != ZIO_TYPE_READ) 142272343Sngie return (0); 143272343Sngie 144272343Sngie rw_enter(&inject_lock, RW_READER); 145272343Sngie 146272343Sngie for (handler = list_head(&inject_handlers); handler != NULL; 147272343Sngie handler = list_next(&inject_handlers, handler)) { 148272343Sngie 149272343Sngie /* Ignore errors not destined for this pool */ 150272343Sngie if (zio->io_spa != handler->zi_spa) 151272343Sngie continue; 152272343Sngie 153272343Sngie /* Ignore device errors and panic injection */ 154272343Sngie if (handler->zi_record.zi_guid != 0 || 155272343Sngie handler->zi_record.zi_func[0] != '\0' || 156272343Sngie handler->zi_record.zi_duration != 0) 157272343Sngie continue; 158272343Sngie 159272343Sngie /* If this handler matches, return EIO */ 160272343Sngie if (zio_match_handler(&zio->io_logical->io_bookmark, 161272343Sngie zio->io_bp ? BP_GET_TYPE(zio->io_bp) : DMU_OT_NONE, 162272343Sngie &handler->zi_record, error)) { 163272343Sngie ret = error; 164272343Sngie break; 165272343Sngie } 166272343Sngie } 167272343Sngie 168272343Sngie rw_exit(&inject_lock); 169272343Sngie 170272343Sngie return (ret); 171272343Sngie} 172272343Sngie 173272343Sngie/* 174272343Sngie * Determine if the zio is part of a label update and has an injection 175272343Sngie * handler associated with that portion of the label. Currently, we 176272343Sngie * allow error injection in either the nvlist or the uberblock region of 177272343Sngie * of the vdev label. 178272343Sngie */ 179272343Sngieint 180272343Sngiezio_handle_label_injection(zio_t *zio, int error) 181272343Sngie{ 182272343Sngie inject_handler_t *handler; 183272343Sngie vdev_t *vd = zio->io_vd; 184272343Sngie uint64_t offset = zio->io_offset; 185272343Sngie int label; 186272343Sngie int ret = 0; 187272343Sngie 188272343Sngie if (offset >= VDEV_LABEL_START_SIZE && 189272343Sngie offset < vd->vdev_psize - VDEV_LABEL_END_SIZE) 190272343Sngie return (0); 191272343Sngie 192272343Sngie rw_enter(&inject_lock, RW_READER); 193272343Sngie 194272343Sngie for (handler = list_head(&inject_handlers); handler != NULL; 195272343Sngie handler = list_next(&inject_handlers, handler)) { 196272343Sngie uint64_t start = handler->zi_record.zi_start; 197272343Sngie uint64_t end = handler->zi_record.zi_end; 198272343Sngie 199272343Sngie /* Ignore device only faults or panic injection */ 200272343Sngie if (handler->zi_record.zi_start == 0 || 201272343Sngie handler->zi_record.zi_func[0] != '\0' || 202272343Sngie handler->zi_record.zi_duration != 0) 203272343Sngie continue; 204272343Sngie 205272343Sngie /* 206272343Sngie * The injection region is the relative offsets within a 207272343Sngie * vdev label. We must determine the label which is being 208272343Sngie * updated and adjust our region accordingly. 209272343Sngie */ 210272343Sngie label = vdev_label_number(vd->vdev_psize, offset); 211272343Sngie start = vdev_label_offset(vd->vdev_psize, label, start); 212272343Sngie end = vdev_label_offset(vd->vdev_psize, label, end); 213272343Sngie 214272343Sngie if (zio->io_vd->vdev_guid == handler->zi_record.zi_guid && 215272343Sngie (offset >= start && offset <= end)) { 216272343Sngie ret = error; 217272343Sngie break; 218272343Sngie } 219272343Sngie } 220272343Sngie rw_exit(&inject_lock); 221272343Sngie return (ret); 222272343Sngie} 223272343Sngie 224272343Sngie 225272343Sngieint 226272343Sngiezio_handle_device_injection(vdev_t *vd, zio_t *zio, int error) 227272343Sngie{ 228272343Sngie inject_handler_t *handler; 229272343Sngie int ret = 0; 230272343Sngie 231272343Sngie /* 232272343Sngie * We skip over faults in the labels unless it's during 233272343Sngie * device open (i.e. zio == NULL). 234272343Sngie */ 235272343Sngie if (zio != NULL) { 236272343Sngie uint64_t offset = zio->io_offset; 237272343Sngie 238272343Sngie if (offset < VDEV_LABEL_START_SIZE || 239272343Sngie offset >= vd->vdev_psize - VDEV_LABEL_END_SIZE) 240272343Sngie return (0); 241272343Sngie } 242272343Sngie 243272343Sngie rw_enter(&inject_lock, RW_READER); 244272343Sngie 245272343Sngie for (handler = list_head(&inject_handlers); handler != NULL; 246272343Sngie handler = list_next(&inject_handlers, handler)) { 247272343Sngie 248272343Sngie /* 249272343Sngie * Ignore label specific faults, panic injection 250272343Sngie * or fake writes 251272343Sngie */ 252272343Sngie if (handler->zi_record.zi_start != 0 || 253272343Sngie handler->zi_record.zi_func[0] != '\0' || 254272343Sngie handler->zi_record.zi_duration != 0) 255272343Sngie continue; 256272343Sngie 257272343Sngie if (vd->vdev_guid == handler->zi_record.zi_guid) { 258272343Sngie if (handler->zi_record.zi_failfast && 259272343Sngie (zio == NULL || (zio->io_flags & 260272343Sngie (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)))) { 261272343Sngie continue; 262272343Sngie } 263272343Sngie 264272343Sngie /* Handle type specific I/O failures */ 265272343Sngie if (zio != NULL && 266272343Sngie handler->zi_record.zi_iotype != ZIO_TYPES && 267272343Sngie handler->zi_record.zi_iotype != zio->io_type) 268272343Sngie continue; 269272343Sngie 270272343Sngie if (handler->zi_record.zi_error == error) { 271272343Sngie /* 272272343Sngie * For a failed open, pretend like the device 273272343Sngie * has gone away. 274272343Sngie */ 275272343Sngie if (error == ENXIO) 276272343Sngie vd->vdev_stat.vs_aux = 277272343Sngie VDEV_AUX_OPEN_FAILED; 278272343Sngie ret = error; 279272343Sngie break; 280272343Sngie } 281272343Sngie if (handler->zi_record.zi_error == ENXIO) { 282272343Sngie ret = EIO; 283272343Sngie break; 284272343Sngie } 285272343Sngie } 286272343Sngie } 287272343Sngie 288272343Sngie rw_exit(&inject_lock); 289272343Sngie 290272343Sngie return (ret); 291272343Sngie} 292272343Sngie 293272343Sngie/* 294272343Sngie * Simulate hardware that ignores cache flushes. For requested number 295272343Sngie * of seconds nix the actual writing to disk. 296272343Sngie */ 297272343Sngievoid 298272343Sngiezio_handle_ignored_writes(zio_t *zio) 299272343Sngie{ 300272343Sngie inject_handler_t *handler; 301272343Sngie 302272343Sngie rw_enter(&inject_lock, RW_READER); 303272343Sngie 304272343Sngie for (handler = list_head(&inject_handlers); handler != NULL; 305272343Sngie handler = list_next(&inject_handlers, handler)) { 306272343Sngie 307272343Sngie /* Ignore errors not destined for this pool */ 308272343Sngie if (zio->io_spa != handler->zi_spa) 309272343Sngie continue; 310272343Sngie 311272343Sngie if (handler->zi_record.zi_duration == 0) 312272343Sngie continue; 313272343Sngie 314272343Sngie /* 315272343Sngie * Positive duration implies # of seconds, negative 316272343Sngie * a number of txgs 317272343Sngie */ 318272343Sngie if (handler->zi_record.zi_timer == 0) { 319272343Sngie if (handler->zi_record.zi_duration > 0) 320272343Sngie handler->zi_record.zi_timer = lbolt64; 321272343Sngie else 322272343Sngie handler->zi_record.zi_timer = zio->io_txg; 323272343Sngie } 324272343Sngie zio->io_pipeline &= ~ZIO_VDEV_IO_STAGES; 325272343Sngie break; 326272343Sngie } 327272343Sngie 328272343Sngie rw_exit(&inject_lock); 329272343Sngie} 330272343Sngie 331272343Sngievoid 332272343Sngiespa_handle_ignored_writes(spa_t *spa) 333272343Sngie{ 334272343Sngie inject_handler_t *handler; 335272343Sngie 336272343Sngie if (zio_injection_enabled == 0) 337272343Sngie return; 338272343Sngie 339272343Sngie rw_enter(&inject_lock, RW_READER); 340272343Sngie 341272343Sngie for (handler = list_head(&inject_handlers); handler != NULL; 342272343Sngie handler = list_next(&inject_handlers, handler)) { 343272343Sngie 344272343Sngie /* Ignore errors not destined for this pool */ 345272343Sngie if (spa != handler->zi_spa) 346272343Sngie continue; 347272343Sngie 348272343Sngie if (handler->zi_record.zi_duration == 0) 349272343Sngie continue; 350272343Sngie 351272343Sngie if (handler->zi_record.zi_duration > 0) { 352272343Sngie VERIFY(handler->zi_record.zi_timer == 0 || 353272343Sngie handler->zi_record.zi_timer + 354272343Sngie handler->zi_record.zi_duration * hz > lbolt64); 355272343Sngie } else { 356272343Sngie /* duration is negative so the subtraction here adds */ 357272343Sngie VERIFY(handler->zi_record.zi_timer == 0 || 358272343Sngie handler->zi_record.zi_timer - 359272343Sngie handler->zi_record.zi_duration >= 360272343Sngie spa->spa_syncing_txg); 361272343Sngie } 362272343Sngie } 363272343Sngie 364272343Sngie rw_exit(&inject_lock); 365272343Sngie} 366272343Sngie 367272343Sngie/* 368272343Sngie * Create a new handler for the given record. We add it to the list, adding 369272343Sngie * a reference to the spa_t in the process. We increment zio_injection_enabled, 370272343Sngie * which is the switch to trigger all fault injection. 371272343Sngie */ 372272343Sngieint 373272343Sngiezio_inject_fault(char *name, int flags, int *id, zinject_record_t *record) 374272343Sngie{ 375272343Sngie inject_handler_t *handler; 376272343Sngie int error; 377272343Sngie spa_t *spa; 378272343Sngie 379272343Sngie /* 380272343Sngie * If this is pool-wide metadata, make sure we unload the corresponding 381272343Sngie * spa_t, so that the next attempt to load it will trigger the fault. 382272343Sngie * We call spa_reset() to unload the pool appropriately. 383272343Sngie */ 384272343Sngie if (flags & ZINJECT_UNLOAD_SPA) 385272343Sngie if ((error = spa_reset(name)) != 0) 386272343Sngie return (error); 387272343Sngie 388272343Sngie if (!(flags & ZINJECT_NULL)) { 389272343Sngie /* 390272343Sngie * spa_inject_ref() will add an injection reference, which will 391272343Sngie * prevent the pool from being removed from the namespace while 392272343Sngie * still allowing it to be unloaded. 393272343Sngie */ 394272343Sngie if ((spa = spa_inject_addref(name)) == NULL) 395272343Sngie return (ENOENT); 396272343Sngie 397272343Sngie handler = kmem_alloc(sizeof (inject_handler_t), KM_SLEEP); 398272343Sngie 399272343Sngie rw_enter(&inject_lock, RW_WRITER); 400272343Sngie 401272343Sngie *id = handler->zi_id = inject_next_id++; 402272343Sngie handler->zi_spa = spa; 403272343Sngie handler->zi_record = *record; 404272343Sngie list_insert_tail(&inject_handlers, handler); 405272343Sngie atomic_add_32(&zio_injection_enabled, 1); 406272343Sngie 407272343Sngie rw_exit(&inject_lock); 408272343Sngie } 409272343Sngie 410272343Sngie /* 411272343Sngie * Flush the ARC, so that any attempts to read this data will end up 412272343Sngie * going to the ZIO layer. Note that this is a little overkill, but 413272343Sngie * we don't have the necessary ARC interfaces to do anything else, and 414272343Sngie * fault injection isn't a performance critical path. 415272343Sngie */ 416272343Sngie if (flags & ZINJECT_FLUSH_ARC) 417272343Sngie arc_flush(NULL); 418272343Sngie 419272343Sngie return (0); 420272343Sngie} 421272343Sngie 422272343Sngie/* 423272343Sngie * Returns the next record with an ID greater than that supplied to the 424272343Sngie * function. Used to iterate over all handlers in the system. 425272343Sngie */ 426272343Sngieint 427272343Sngiezio_inject_list_next(int *id, char *name, size_t buflen, 428272343Sngie zinject_record_t *record) 429272343Sngie{ 430272343Sngie inject_handler_t *handler; 431272343Sngie int ret; 432272343Sngie 433272343Sngie mutex_enter(&spa_namespace_lock); 434272343Sngie rw_enter(&inject_lock, RW_READER); 435272343Sngie 436272343Sngie for (handler = list_head(&inject_handlers); handler != NULL; 437272343Sngie handler = list_next(&inject_handlers, handler)) 438272343Sngie if (handler->zi_id > *id) 439272343Sngie break; 440272343Sngie 441272343Sngie if (handler) { 442272343Sngie *record = handler->zi_record; 443272343Sngie *id = handler->zi_id; 444272343Sngie (void) strncpy(name, spa_name(handler->zi_spa), buflen); 445272343Sngie ret = 0; 446272343Sngie } else { 447272343Sngie ret = ENOENT; 448272343Sngie } 449272343Sngie 450272343Sngie rw_exit(&inject_lock); 451272343Sngie mutex_exit(&spa_namespace_lock); 452272343Sngie 453272343Sngie return (ret); 454272343Sngie} 455272343Sngie 456272343Sngie/* 457272343Sngie * Clear the fault handler with the given identifier, or return ENOENT if none 458272343Sngie * exists. 459272343Sngie */ 460272343Sngieint 461272343Sngiezio_clear_fault(int id) 462272343Sngie{ 463272343Sngie inject_handler_t *handler; 464272343Sngie int ret; 465272343Sngie 466272343Sngie rw_enter(&inject_lock, RW_WRITER); 467272343Sngie 468272343Sngie for (handler = list_head(&inject_handlers); handler != NULL; 469272343Sngie handler = list_next(&inject_handlers, handler)) 470272343Sngie if (handler->zi_id == id) 471272343Sngie break; 472272343Sngie 473272343Sngie if (handler == NULL) { 474272343Sngie ret = ENOENT; 475272343Sngie } else { 476272343Sngie list_remove(&inject_handlers, handler); 477272343Sngie spa_inject_delref(handler->zi_spa); 478272343Sngie kmem_free(handler, sizeof (inject_handler_t)); 479272343Sngie atomic_add_32(&zio_injection_enabled, -1); 480272343Sngie ret = 0; 481272343Sngie } 482272343Sngie 483272343Sngie rw_exit(&inject_lock); 484272343Sngie 485272343Sngie return (ret); 486272343Sngie} 487272343Sngie 488272343Sngievoid 489272343Sngiezio_inject_init(void) 490272343Sngie{ 491272343Sngie rw_init(&inject_lock, NULL, RW_DEFAULT, NULL); 492272343Sngie list_create(&inject_handlers, sizeof (inject_handler_t), 493272343Sngie offsetof(inject_handler_t, zi_link)); 494272343Sngie} 495272343Sngie 496272343Sngievoid 497272343Sngiezio_inject_fini(void) 498272343Sngie{ 499272343Sngie list_destroy(&inject_handlers); 500272343Sngie rw_destroy(&inject_lock); 501272343Sngie} 502272343Sngie