zio_inject.c revision 168404
1168404Spjd/* 2168404Spjd * CDDL HEADER START 3168404Spjd * 4168404Spjd * The contents of this file are subject to the terms of the 5168404Spjd * Common Development and Distribution License (the "License"). 6168404Spjd * You may not use this file except in compliance with the License. 7168404Spjd * 8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9168404Spjd * or http://www.opensolaris.org/os/licensing. 10168404Spjd * See the License for the specific language governing permissions 11168404Spjd * and limitations under the License. 12168404Spjd * 13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each 14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15168404Spjd * If applicable, add the following below this CDDL HEADER, with the 16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18168404Spjd * 19168404Spjd * CDDL HEADER END 20168404Spjd */ 21168404Spjd/* 22168404Spjd * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23168404Spjd * Use is subject to license terms. 24168404Spjd */ 25168404Spjd 26168404Spjd#pragma ident "%Z%%M% %I% %E% SMI" 27168404Spjd 28168404Spjd/* 29168404Spjd * ZFS fault injection 30168404Spjd * 31168404Spjd * To handle fault injection, we keep track of a series of zinject_record_t 32168404Spjd * structures which describe which logical block(s) should be injected with a 33168404Spjd * fault. These are kept in a global list. Each record corresponds to a given 34168404Spjd * spa_t and maintains a special hold on the spa_t so that it cannot be deleted 35168404Spjd * or exported while the injection record exists. 36168404Spjd * 37168404Spjd * Device level injection is done using the 'zi_guid' field. If this is set, it 38168404Spjd * means that the error is destined for a particular device, not a piece of 39168404Spjd * data. 40168404Spjd * 41168404Spjd * This is a rather poor data structure and algorithm, but we don't expect more 42168404Spjd * than a few faults at any one time, so it should be sufficient for our needs. 43168404Spjd */ 44168404Spjd 45168404Spjd#include <sys/arc.h> 46168404Spjd#include <sys/zio_impl.h> 47168404Spjd#include <sys/zfs_ioctl.h> 48168404Spjd#include <sys/spa_impl.h> 49168404Spjd#include <sys/vdev_impl.h> 50168404Spjd 51168404Spjduint32_t zio_injection_enabled; 52168404Spjd 53168404Spjdtypedef struct inject_handler { 54168404Spjd int zi_id; 55168404Spjd spa_t *zi_spa; 56168404Spjd zinject_record_t zi_record; 57168404Spjd list_node_t zi_link; 58168404Spjd} inject_handler_t; 59168404Spjd 60168404Spjdstatic list_t inject_handlers; 61168404Spjdstatic krwlock_t inject_lock; 62168404Spjdstatic int inject_next_id = 1; 63168404Spjd 64168404Spjd/* 65168404Spjd * Returns true if the given record matches the I/O in progress. 66168404Spjd */ 67168404Spjdstatic boolean_t 68168404Spjdzio_match_handler(zbookmark_t *zb, uint64_t type, 69168404Spjd zinject_record_t *record, int error) 70168404Spjd{ 71168404Spjd /* 72168404Spjd * Check for a match against the MOS, which is based on type 73168404Spjd */ 74168404Spjd if (zb->zb_objset == 0 && record->zi_objset == 0 && 75168404Spjd record->zi_object == 0) { 76168404Spjd if (record->zi_type == DMU_OT_NONE || 77168404Spjd type == record->zi_type) 78168404Spjd return (record->zi_freq == 0 || 79168404Spjd spa_get_random(100) < record->zi_freq); 80168404Spjd else 81168404Spjd return (B_FALSE); 82168404Spjd } 83168404Spjd 84168404Spjd /* 85168404Spjd * Check for an exact match. 86168404Spjd */ 87168404Spjd if (zb->zb_objset == record->zi_objset && 88168404Spjd zb->zb_object == record->zi_object && 89168404Spjd zb->zb_level == record->zi_level && 90168404Spjd zb->zb_blkid >= record->zi_start && 91168404Spjd zb->zb_blkid <= record->zi_end && 92168404Spjd error == record->zi_error) 93168404Spjd return (record->zi_freq == 0 || 94168404Spjd spa_get_random(100) < record->zi_freq); 95168404Spjd 96168404Spjd return (B_FALSE); 97168404Spjd} 98168404Spjd 99168404Spjd/* 100168404Spjd * Determine if the I/O in question should return failure. Returns the errno 101168404Spjd * to be returned to the caller. 102168404Spjd */ 103168404Spjdint 104168404Spjdzio_handle_fault_injection(zio_t *zio, int error) 105168404Spjd{ 106168404Spjd int ret = 0; 107168404Spjd inject_handler_t *handler; 108168404Spjd 109168404Spjd /* 110168404Spjd * Ignore I/O not associated with any logical data. 111168404Spjd */ 112168404Spjd if (zio->io_logical == NULL) 113168404Spjd return (0); 114168404Spjd 115168404Spjd /* 116168404Spjd * Currently, we only support fault injection on reads. 117168404Spjd */ 118168404Spjd if (zio->io_type != ZIO_TYPE_READ) 119168404Spjd return (0); 120168404Spjd 121168404Spjd rw_enter(&inject_lock, RW_READER); 122168404Spjd 123168404Spjd for (handler = list_head(&inject_handlers); handler != NULL; 124168404Spjd handler = list_next(&inject_handlers, handler)) { 125168404Spjd 126168404Spjd /* Ignore errors not destined for this pool */ 127168404Spjd if (zio->io_spa != handler->zi_spa) 128168404Spjd continue; 129168404Spjd 130168404Spjd /* Ignore device errors */ 131168404Spjd if (handler->zi_record.zi_guid != 0) 132168404Spjd continue; 133168404Spjd 134168404Spjd /* If this handler matches, return EIO */ 135168404Spjd if (zio_match_handler(&zio->io_logical->io_bookmark, 136168404Spjd zio->io_bp ? BP_GET_TYPE(zio->io_bp) : DMU_OT_NONE, 137168404Spjd &handler->zi_record, error)) { 138168404Spjd ret = error; 139168404Spjd break; 140168404Spjd } 141168404Spjd } 142168404Spjd 143168404Spjd rw_exit(&inject_lock); 144168404Spjd 145168404Spjd return (ret); 146168404Spjd} 147168404Spjd 148168404Spjdint 149168404Spjdzio_handle_device_injection(vdev_t *vd, int error) 150168404Spjd{ 151168404Spjd inject_handler_t *handler; 152168404Spjd int ret = 0; 153168404Spjd 154168404Spjd rw_enter(&inject_lock, RW_READER); 155168404Spjd 156168404Spjd for (handler = list_head(&inject_handlers); handler != NULL; 157168404Spjd handler = list_next(&inject_handlers, handler)) { 158168404Spjd 159168404Spjd if (vd->vdev_guid == handler->zi_record.zi_guid) { 160168404Spjd if (handler->zi_record.zi_error == error) { 161168404Spjd /* 162168404Spjd * For a failed open, pretend like the device 163168404Spjd * has gone away. 164168404Spjd */ 165168404Spjd if (error == ENXIO) 166168404Spjd vd->vdev_stat.vs_aux = 167168404Spjd VDEV_AUX_OPEN_FAILED; 168168404Spjd ret = error; 169168404Spjd break; 170168404Spjd } 171168404Spjd if (handler->zi_record.zi_error == ENXIO) { 172168404Spjd ret = EIO; 173168404Spjd break; 174168404Spjd } 175168404Spjd } 176168404Spjd } 177168404Spjd 178168404Spjd rw_exit(&inject_lock); 179168404Spjd 180168404Spjd return (ret); 181168404Spjd} 182168404Spjd 183168404Spjd/* 184168404Spjd * Create a new handler for the given record. We add it to the list, adding 185168404Spjd * a reference to the spa_t in the process. We increment zio_injection_enabled, 186168404Spjd * which is the switch to trigger all fault injection. 187168404Spjd */ 188168404Spjdint 189168404Spjdzio_inject_fault(char *name, int flags, int *id, zinject_record_t *record) 190168404Spjd{ 191168404Spjd inject_handler_t *handler; 192168404Spjd int error; 193168404Spjd spa_t *spa; 194168404Spjd 195168404Spjd /* 196168404Spjd * If this is pool-wide metadata, make sure we unload the corresponding 197168404Spjd * spa_t, so that the next attempt to load it will trigger the fault. 198168404Spjd * We call spa_reset() to unload the pool appropriately. 199168404Spjd */ 200168404Spjd if (flags & ZINJECT_UNLOAD_SPA) 201168404Spjd if ((error = spa_reset(name)) != 0) 202168404Spjd return (error); 203168404Spjd 204168404Spjd if (!(flags & ZINJECT_NULL)) { 205168404Spjd /* 206168404Spjd * spa_inject_ref() will add an injection reference, which will 207168404Spjd * prevent the pool from being removed from the namespace while 208168404Spjd * still allowing it to be unloaded. 209168404Spjd */ 210168404Spjd if ((spa = spa_inject_addref(name)) == NULL) 211168404Spjd return (ENOENT); 212168404Spjd 213168404Spjd handler = kmem_alloc(sizeof (inject_handler_t), KM_SLEEP); 214168404Spjd 215168404Spjd rw_enter(&inject_lock, RW_WRITER); 216168404Spjd 217168404Spjd *id = handler->zi_id = inject_next_id++; 218168404Spjd handler->zi_spa = spa; 219168404Spjd handler->zi_record = *record; 220168404Spjd list_insert_tail(&inject_handlers, handler); 221168404Spjd atomic_add_32(&zio_injection_enabled, 1); 222168404Spjd 223168404Spjd rw_exit(&inject_lock); 224168404Spjd } 225168404Spjd 226168404Spjd /* 227168404Spjd * Flush the ARC, so that any attempts to read this data will end up 228168404Spjd * going to the ZIO layer. Note that this is a little overkill, but 229168404Spjd * we don't have the necessary ARC interfaces to do anything else, and 230168404Spjd * fault injection isn't a performance critical path. 231168404Spjd */ 232168404Spjd if (flags & ZINJECT_FLUSH_ARC) 233168404Spjd arc_flush(); 234168404Spjd 235168404Spjd return (0); 236168404Spjd} 237168404Spjd 238168404Spjd/* 239168404Spjd * Returns the next record with an ID greater than that supplied to the 240168404Spjd * function. Used to iterate over all handlers in the system. 241168404Spjd */ 242168404Spjdint 243168404Spjdzio_inject_list_next(int *id, char *name, size_t buflen, 244168404Spjd zinject_record_t *record) 245168404Spjd{ 246168404Spjd inject_handler_t *handler; 247168404Spjd int ret; 248168404Spjd 249168404Spjd mutex_enter(&spa_namespace_lock); 250168404Spjd rw_enter(&inject_lock, RW_READER); 251168404Spjd 252168404Spjd for (handler = list_head(&inject_handlers); handler != NULL; 253168404Spjd handler = list_next(&inject_handlers, handler)) 254168404Spjd if (handler->zi_id > *id) 255168404Spjd break; 256168404Spjd 257168404Spjd if (handler) { 258168404Spjd *record = handler->zi_record; 259168404Spjd *id = handler->zi_id; 260168404Spjd (void) strncpy(name, spa_name(handler->zi_spa), buflen); 261168404Spjd ret = 0; 262168404Spjd } else { 263168404Spjd ret = ENOENT; 264168404Spjd } 265168404Spjd 266168404Spjd rw_exit(&inject_lock); 267168404Spjd mutex_exit(&spa_namespace_lock); 268168404Spjd 269168404Spjd return (ret); 270168404Spjd} 271168404Spjd 272168404Spjd/* 273168404Spjd * Clear the fault handler with the given identifier, or return ENOENT if none 274168404Spjd * exists. 275168404Spjd */ 276168404Spjdint 277168404Spjdzio_clear_fault(int id) 278168404Spjd{ 279168404Spjd inject_handler_t *handler; 280168404Spjd int ret; 281168404Spjd 282168404Spjd rw_enter(&inject_lock, RW_WRITER); 283168404Spjd 284168404Spjd for (handler = list_head(&inject_handlers); handler != NULL; 285168404Spjd handler = list_next(&inject_handlers, handler)) 286168404Spjd if (handler->zi_id == id) 287168404Spjd break; 288168404Spjd 289168404Spjd if (handler == NULL) { 290168404Spjd ret = ENOENT; 291168404Spjd } else { 292168404Spjd list_remove(&inject_handlers, handler); 293168404Spjd spa_inject_delref(handler->zi_spa); 294168404Spjd kmem_free(handler, sizeof (inject_handler_t)); 295168404Spjd atomic_add_32(&zio_injection_enabled, -1); 296168404Spjd ret = 0; 297168404Spjd } 298168404Spjd 299168404Spjd rw_exit(&inject_lock); 300168404Spjd 301168404Spjd return (ret); 302168404Spjd} 303168404Spjd 304168404Spjdvoid 305168404Spjdzio_inject_init(void) 306168404Spjd{ 307168404Spjd list_create(&inject_handlers, sizeof (inject_handler_t), 308168404Spjd offsetof(inject_handler_t, zi_link)); 309168404Spjd} 310168404Spjd 311168404Spjdvoid 312168404Spjdzio_inject_fini(void) 313168404Spjd{ 314168404Spjd list_destroy(&inject_handlers); 315168404Spjd} 316