zio_inject.c revision 168404
1157016Sdes/* 2124208Sdes * CDDL HEADER START 3124208Sdes * 4124208Sdes * The contents of this file are subject to the terms of the 5124208Sdes * Common Development and Distribution License (the "License"). 6124208Sdes * You may not use this file except in compliance with the License. 7124208Sdes * 8124208Sdes * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9124208Sdes * or http://www.opensolaris.org/os/licensing. 10124208Sdes * See the License for the specific language governing permissions 11124208Sdes * and limitations under the License. 12124208Sdes * 13124208Sdes * When distributing Covered Code, include this CDDL HEADER in each 14124208Sdes * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15124208Sdes * If applicable, add the following below this CDDL HEADER, with the 16124208Sdes * fields enclosed by brackets "[]" replaced with your own identifying 17124208Sdes * information: Portions Copyright [yyyy] [name of copyright owner] 18124208Sdes * 19124208Sdes * CDDL HEADER END 20124208Sdes */ 21124208Sdes/* 22124208Sdes * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23124208Sdes * Use is subject to license terms. 24124208Sdes */ 25124208Sdes 26124208Sdes#pragma ident "%Z%%M% %I% %E% SMI" 27124208Sdes 28124208Sdes/* 29124208Sdes * ZFS fault injection 30124208Sdes * 31124208Sdes * To handle fault injection, we keep track of a series of zinject_record_t 32124208Sdes * structures which describe which logical block(s) should be injected with a 33124208Sdes * fault. These are kept in a global list. Each record corresponds to a given 34124208Sdes * spa_t and maintains a special hold on the spa_t so that it cannot be deleted 35124208Sdes * or exported while the injection record exists. 36124208Sdes * 37124208Sdes * Device level injection is done using the 'zi_guid' field. If this is set, it 38124208Sdes * means that the error is destined for a particular device, not a piece of 39124208Sdes * data. 40124208Sdes * 41124208Sdes * This is a rather poor data structure and algorithm, but we don't expect more 42124208Sdes * than a few faults at any one time, so it should be sufficient for our needs. 43124208Sdes */ 44124208Sdes 45126274Sdes#include <sys/arc.h> 46124208Sdes#include <sys/zio_impl.h> 47124208Sdes#include <sys/zfs_ioctl.h> 48124208Sdes#include <sys/spa_impl.h> 49124208Sdes#include <sys/vdev_impl.h> 50124208Sdes 51157016Sdesuint32_t zio_injection_enabled; 52124208Sdes 53124208Sdestypedef struct inject_handler { 54124208Sdes int zi_id; 55124208Sdes spa_t *zi_spa; 56137015Sdes zinject_record_t zi_record; 57124208Sdes list_node_t zi_link; 58124208Sdes} inject_handler_t; 59124208Sdes 60124208Sdesstatic list_t inject_handlers; 61124208Sdesstatic krwlock_t inject_lock; 62124208Sdesstatic int inject_next_id = 1; 63149749Sdes 64124208Sdes/* 65124208Sdes * Returns true if the given record matches the I/O in progress. 66124208Sdes */ 67124208Sdesstatic boolean_t 68124208Sdeszio_match_handler(zbookmark_t *zb, uint64_t type, 69124208Sdes zinject_record_t *record, int error) 70124208Sdes{ 71124208Sdes /* 72124208Sdes * Check for a match against the MOS, which is based on type 73124208Sdes */ 74124208Sdes if (zb->zb_objset == 0 && record->zi_objset == 0 && 75124208Sdes record->zi_object == 0) { 76124208Sdes if (record->zi_type == DMU_OT_NONE || 77124208Sdes type == record->zi_type) 78124208Sdes return (record->zi_freq == 0 || 79124208Sdes spa_get_random(100) < record->zi_freq); 80124208Sdes else 81126274Sdes return (B_FALSE); 82124208Sdes } 83124208Sdes 84149749Sdes /* 85149749Sdes * Check for an exact match. 86137015Sdes */ 87137015Sdes if (zb->zb_objset == record->zi_objset && 88137015Sdes zb->zb_object == record->zi_object && 89126274Sdes zb->zb_level == record->zi_level && 90126274Sdes zb->zb_blkid >= record->zi_start && 91126274Sdes zb->zb_blkid <= record->zi_end && 92124208Sdes error == record->zi_error) 93124208Sdes return (record->zi_freq == 0 || 94124208Sdes spa_get_random(100) < record->zi_freq); 95124208Sdes 96124208Sdes return (B_FALSE); 97124208Sdes} 98124208Sdes 99124208Sdes/* 100124208Sdes * Determine if the I/O in question should return failure. Returns the errno 101124208Sdes * to be returned to the caller. 102137015Sdes */ 103124208Sdesint 104124208Sdeszio_handle_fault_injection(zio_t *zio, int error) 105124208Sdes{ 106124208Sdes int ret = 0; 107157016Sdes inject_handler_t *handler; 108124208Sdes 109124208Sdes /* 110124208Sdes * Ignore I/O not associated with any logical data. 111126274Sdes */ 112124208Sdes if (zio->io_logical == NULL) 113124208Sdes return (0); 114124208Sdes 115124208Sdes /* 116124208Sdes * Currently, we only support fault injection on reads. 117124208Sdes */ 118124208Sdes if (zio->io_type != ZIO_TYPE_READ) 119124208Sdes return (0); 120124208Sdes 121124208Sdes rw_enter(&inject_lock, RW_READER); 122124208Sdes 123124208Sdes for (handler = list_head(&inject_handlers); handler != NULL; 124124208Sdes handler = list_next(&inject_handlers, handler)) { 125124208Sdes 126124208Sdes /* Ignore errors not destined for this pool */ 127124208Sdes if (zio->io_spa != handler->zi_spa) 128124208Sdes continue; 129124208Sdes 130124208Sdes /* Ignore device errors */ 131126274Sdes if (handler->zi_record.zi_guid != 0) 132124208Sdes continue; 133124208Sdes 134124208Sdes /* If this handler matches, return EIO */ 135124208Sdes if (zio_match_handler(&zio->io_logical->io_bookmark, 136124208Sdes zio->io_bp ? BP_GET_TYPE(zio->io_bp) : DMU_OT_NONE, 137124208Sdes &handler->zi_record, error)) { 138124208Sdes ret = error; 139124208Sdes break; 140124208Sdes } 141124208Sdes } 142124208Sdes 143124208Sdes rw_exit(&inject_lock); 144126274Sdes 145124208Sdes return (ret); 146124208Sdes} 147124208Sdes 148124208Sdesint 149124208Sdeszio_handle_device_injection(vdev_t *vd, int error) 150124208Sdes{ 151124208Sdes inject_handler_t *handler; 152124208Sdes int ret = 0; 153124208Sdes 154124208Sdes rw_enter(&inject_lock, RW_READER); 155124208Sdes 156126274Sdes for (handler = list_head(&inject_handlers); handler != NULL; 157124208Sdes handler = list_next(&inject_handlers, handler)) { 158124208Sdes 159124208Sdes if (vd->vdev_guid == handler->zi_record.zi_guid) { 160124208Sdes if (handler->zi_record.zi_error == error) { 161124208Sdes /* 162124208Sdes * For a failed open, pretend like the device 163124208Sdes * has gone away. 164124208Sdes */ 165126274Sdes if (error == ENXIO) 166126274Sdes vd->vdev_stat.vs_aux = 167126274Sdes VDEV_AUX_OPEN_FAILED; 168126274Sdes ret = error; 169126274Sdes break; 170126274Sdes } 171126274Sdes if (handler->zi_record.zi_error == ENXIO) { 172124208Sdes ret = EIO; 173124208Sdes break; 174124208Sdes } 175124208Sdes } 176124208Sdes } 177124208Sdes 178124208Sdes rw_exit(&inject_lock); 179124208Sdes 180124208Sdes return (ret); 181124208Sdes} 182124208Sdes 183124208Sdes/* 184124208Sdes * Create a new handler for the given record. We add it to the list, adding 185124208Sdes * a reference to the spa_t in the process. We increment zio_injection_enabled, 186124208Sdes * which is the switch to trigger all fault injection. 187124208Sdes */ 188124208Sdesint 189124208Sdeszio_inject_fault(char *name, int flags, int *id, zinject_record_t *record) 190124208Sdes{ 191124208Sdes inject_handler_t *handler; 192124208Sdes int error; 193124208Sdes spa_t *spa; 194124208Sdes 195124208Sdes /* 196124208Sdes * If this is pool-wide metadata, make sure we unload the corresponding 197124208Sdes * spa_t, so that the next attempt to load it will trigger the fault. 198124208Sdes * We call spa_reset() to unload the pool appropriately. 199124208Sdes */ 200124208Sdes if (flags & ZINJECT_UNLOAD_SPA) 201124208Sdes if ((error = spa_reset(name)) != 0) 202124208Sdes return (error); 203124208Sdes 204124208Sdes if (!(flags & ZINJECT_NULL)) { 205124208Sdes /* 206124208Sdes * spa_inject_ref() will add an injection reference, which will 207124208Sdes * prevent the pool from being removed from the namespace while 208124208Sdes * still allowing it to be unloaded. 209124208Sdes */ 210124208Sdes if ((spa = spa_inject_addref(name)) == NULL) 211124208Sdes return (ENOENT); 212124208Sdes 213124208Sdes handler = kmem_alloc(sizeof (inject_handler_t), KM_SLEEP); 214124208Sdes 215124208Sdes rw_enter(&inject_lock, RW_WRITER); 216124208Sdes 217124208Sdes *id = handler->zi_id = inject_next_id++; 218124208Sdes handler->zi_spa = spa; 219124208Sdes handler->zi_record = *record; 220124208Sdes list_insert_tail(&inject_handlers, handler); 221124208Sdes atomic_add_32(&zio_injection_enabled, 1); 222124208Sdes 223124208Sdes rw_exit(&inject_lock); 224124208Sdes } 225124208Sdes 226124208Sdes /* 227124208Sdes * Flush the ARC, so that any attempts to read this data will end up 228124208Sdes * going to the ZIO layer. Note that this is a little overkill, but 229124208Sdes * we don't have the necessary ARC interfaces to do anything else, and 230124208Sdes * fault injection isn't a performance critical path. 231126274Sdes */ 232126274Sdes if (flags & ZINJECT_FLUSH_ARC) 233124208Sdes arc_flush(); 234124208Sdes 235124208Sdes return (0); 236124208Sdes} 237124208Sdes 238124208Sdes/* 239124208Sdes * Returns the next record with an ID greater than that supplied to the 240124208Sdes * function. Used to iterate over all handlers in the system. 241124208Sdes */ 242126274Sdesint 243124208Sdeszio_inject_list_next(int *id, char *name, size_t buflen, 244126274Sdes zinject_record_t *record) 245124208Sdes{ 246124208Sdes inject_handler_t *handler; 247126274Sdes int ret; 248126274Sdes 249126274Sdes mutex_enter(&spa_namespace_lock); 250126274Sdes rw_enter(&inject_lock, RW_READER); 251126274Sdes 252126274Sdes for (handler = list_head(&inject_handlers); handler != NULL; 253126274Sdes handler = list_next(&inject_handlers, handler)) 254126274Sdes if (handler->zi_id > *id) 255126274Sdes break; 256126274Sdes 257126274Sdes if (handler) { 258126274Sdes *record = handler->zi_record; 259126274Sdes *id = handler->zi_id; 260126274Sdes (void) strncpy(name, spa_name(handler->zi_spa), buflen); 261126274Sdes ret = 0; 262126274Sdes } else { 263126274Sdes ret = ENOENT; 264126274Sdes } 265126274Sdes 266126274Sdes rw_exit(&inject_lock); 267126274Sdes mutex_exit(&spa_namespace_lock); 268126274Sdes 269126274Sdes return (ret); 270126274Sdes} 271126274Sdes 272126274Sdes/* 273126274Sdes * Clear the fault handler with the given identifier, or return ENOENT if none 274126274Sdes * exists. 275126274Sdes */ 276126274Sdesint 277126274Sdeszio_clear_fault(int id) 278126274Sdes{ 279126274Sdes inject_handler_t *handler; 280126274Sdes int ret; 281126274Sdes 282126274Sdes rw_enter(&inject_lock, RW_WRITER); 283126274Sdes 284126274Sdes for (handler = list_head(&inject_handlers); handler != NULL; 285126274Sdes handler = list_next(&inject_handlers, handler)) 286126274Sdes if (handler->zi_id == id) 287124208Sdes break; 288126274Sdes 289124208Sdes if (handler == NULL) { 290124208Sdes ret = ENOENT; 291124208Sdes } else { 292124208Sdes list_remove(&inject_handlers, handler); 293124208Sdes spa_inject_delref(handler->zi_spa); 294 kmem_free(handler, sizeof (inject_handler_t)); 295 atomic_add_32(&zio_injection_enabled, -1); 296 ret = 0; 297 } 298 299 rw_exit(&inject_lock); 300 301 return (ret); 302} 303 304void 305zio_inject_init(void) 306{ 307 list_create(&inject_handlers, sizeof (inject_handler_t), 308 offsetof(inject_handler_t, zi_link)); 309} 310 311void 312zio_inject_fini(void) 313{ 314 list_destroy(&inject_handlers); 315} 316