zio_inject.c revision 168404
1157016Sdes/*
2124208Sdes * CDDL HEADER START
3124208Sdes *
4124208Sdes * The contents of this file are subject to the terms of the
5124208Sdes * Common Development and Distribution License (the "License").
6124208Sdes * You may not use this file except in compliance with the License.
7124208Sdes *
8124208Sdes * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9124208Sdes * or http://www.opensolaris.org/os/licensing.
10124208Sdes * See the License for the specific language governing permissions
11124208Sdes * and limitations under the License.
12124208Sdes *
13124208Sdes * When distributing Covered Code, include this CDDL HEADER in each
14124208Sdes * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15124208Sdes * If applicable, add the following below this CDDL HEADER, with the
16124208Sdes * fields enclosed by brackets "[]" replaced with your own identifying
17124208Sdes * information: Portions Copyright [yyyy] [name of copyright owner]
18124208Sdes *
19124208Sdes * CDDL HEADER END
20124208Sdes */
21124208Sdes/*
22124208Sdes * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23124208Sdes * Use is subject to license terms.
24124208Sdes */
25124208Sdes
26124208Sdes#pragma ident	"%Z%%M%	%I%	%E% SMI"
27124208Sdes
28124208Sdes/*
29124208Sdes * ZFS fault injection
30124208Sdes *
31124208Sdes * To handle fault injection, we keep track of a series of zinject_record_t
32124208Sdes * structures which describe which logical block(s) should be injected with a
33124208Sdes * fault.  These are kept in a global list.  Each record corresponds to a given
34124208Sdes * spa_t and maintains a special hold on the spa_t so that it cannot be deleted
35124208Sdes * or exported while the injection record exists.
36124208Sdes *
37124208Sdes * Device level injection is done using the 'zi_guid' field.  If this is set, it
38124208Sdes * means that the error is destined for a particular device, not a piece of
39124208Sdes * data.
40124208Sdes *
41124208Sdes * This is a rather poor data structure and algorithm, but we don't expect more
42124208Sdes * than a few faults at any one time, so it should be sufficient for our needs.
43124208Sdes */
44124208Sdes
45126274Sdes#include <sys/arc.h>
46124208Sdes#include <sys/zio_impl.h>
47124208Sdes#include <sys/zfs_ioctl.h>
48124208Sdes#include <sys/spa_impl.h>
49124208Sdes#include <sys/vdev_impl.h>
50124208Sdes
51157016Sdesuint32_t zio_injection_enabled;
52124208Sdes
53124208Sdestypedef struct inject_handler {
54124208Sdes	int			zi_id;
55124208Sdes	spa_t			*zi_spa;
56137015Sdes	zinject_record_t	zi_record;
57124208Sdes	list_node_t		zi_link;
58124208Sdes} inject_handler_t;
59124208Sdes
60124208Sdesstatic list_t inject_handlers;
61124208Sdesstatic krwlock_t inject_lock;
62124208Sdesstatic int inject_next_id = 1;
63149749Sdes
64124208Sdes/*
65124208Sdes * Returns true if the given record matches the I/O in progress.
66124208Sdes */
67124208Sdesstatic boolean_t
68124208Sdeszio_match_handler(zbookmark_t *zb, uint64_t type,
69124208Sdes    zinject_record_t *record, int error)
70124208Sdes{
71124208Sdes	/*
72124208Sdes	 * Check for a match against the MOS, which is based on type
73124208Sdes	 */
74124208Sdes	if (zb->zb_objset == 0 && record->zi_objset == 0 &&
75124208Sdes	    record->zi_object == 0) {
76124208Sdes		if (record->zi_type == DMU_OT_NONE ||
77124208Sdes		    type == record->zi_type)
78124208Sdes			return (record->zi_freq == 0 ||
79124208Sdes			    spa_get_random(100) < record->zi_freq);
80124208Sdes		else
81126274Sdes			return (B_FALSE);
82124208Sdes	}
83124208Sdes
84149749Sdes	/*
85149749Sdes	 * Check for an exact match.
86137015Sdes	 */
87137015Sdes	if (zb->zb_objset == record->zi_objset &&
88137015Sdes	    zb->zb_object == record->zi_object &&
89126274Sdes	    zb->zb_level == record->zi_level &&
90126274Sdes	    zb->zb_blkid >= record->zi_start &&
91126274Sdes	    zb->zb_blkid <= record->zi_end &&
92124208Sdes	    error == record->zi_error)
93124208Sdes		return (record->zi_freq == 0 ||
94124208Sdes		    spa_get_random(100) < record->zi_freq);
95124208Sdes
96124208Sdes	return (B_FALSE);
97124208Sdes}
98124208Sdes
99124208Sdes/*
100124208Sdes * Determine if the I/O in question should return failure.  Returns the errno
101124208Sdes * to be returned to the caller.
102137015Sdes */
103124208Sdesint
104124208Sdeszio_handle_fault_injection(zio_t *zio, int error)
105124208Sdes{
106124208Sdes	int ret = 0;
107157016Sdes	inject_handler_t *handler;
108124208Sdes
109124208Sdes	/*
110124208Sdes	 * Ignore I/O not associated with any logical data.
111126274Sdes	 */
112124208Sdes	if (zio->io_logical == NULL)
113124208Sdes		return (0);
114124208Sdes
115124208Sdes	/*
116124208Sdes	 * Currently, we only support fault injection on reads.
117124208Sdes	 */
118124208Sdes	if (zio->io_type != ZIO_TYPE_READ)
119124208Sdes		return (0);
120124208Sdes
121124208Sdes	rw_enter(&inject_lock, RW_READER);
122124208Sdes
123124208Sdes	for (handler = list_head(&inject_handlers); handler != NULL;
124124208Sdes	    handler = list_next(&inject_handlers, handler)) {
125124208Sdes
126124208Sdes		/* Ignore errors not destined for this pool */
127124208Sdes		if (zio->io_spa != handler->zi_spa)
128124208Sdes			continue;
129124208Sdes
130124208Sdes		/* Ignore device errors */
131126274Sdes		if (handler->zi_record.zi_guid != 0)
132124208Sdes			continue;
133124208Sdes
134124208Sdes		/* If this handler matches, return EIO */
135124208Sdes		if (zio_match_handler(&zio->io_logical->io_bookmark,
136124208Sdes		    zio->io_bp ? BP_GET_TYPE(zio->io_bp) : DMU_OT_NONE,
137124208Sdes		    &handler->zi_record, error)) {
138124208Sdes			ret = error;
139124208Sdes			break;
140124208Sdes		}
141124208Sdes	}
142124208Sdes
143124208Sdes	rw_exit(&inject_lock);
144126274Sdes
145124208Sdes	return (ret);
146124208Sdes}
147124208Sdes
148124208Sdesint
149124208Sdeszio_handle_device_injection(vdev_t *vd, int error)
150124208Sdes{
151124208Sdes	inject_handler_t *handler;
152124208Sdes	int ret = 0;
153124208Sdes
154124208Sdes	rw_enter(&inject_lock, RW_READER);
155124208Sdes
156126274Sdes	for (handler = list_head(&inject_handlers); handler != NULL;
157124208Sdes	    handler = list_next(&inject_handlers, handler)) {
158124208Sdes
159124208Sdes		if (vd->vdev_guid == handler->zi_record.zi_guid) {
160124208Sdes			if (handler->zi_record.zi_error == error) {
161124208Sdes				/*
162124208Sdes				 * For a failed open, pretend like the device
163124208Sdes				 * has gone away.
164124208Sdes				 */
165126274Sdes				if (error == ENXIO)
166126274Sdes					vd->vdev_stat.vs_aux =
167126274Sdes					    VDEV_AUX_OPEN_FAILED;
168126274Sdes				ret = error;
169126274Sdes				break;
170126274Sdes			}
171126274Sdes			if (handler->zi_record.zi_error == ENXIO) {
172124208Sdes				ret = EIO;
173124208Sdes				break;
174124208Sdes			}
175124208Sdes		}
176124208Sdes	}
177124208Sdes
178124208Sdes	rw_exit(&inject_lock);
179124208Sdes
180124208Sdes	return (ret);
181124208Sdes}
182124208Sdes
183124208Sdes/*
184124208Sdes * Create a new handler for the given record.  We add it to the list, adding
185124208Sdes * a reference to the spa_t in the process.  We increment zio_injection_enabled,
186124208Sdes * which is the switch to trigger all fault injection.
187124208Sdes */
188124208Sdesint
189124208Sdeszio_inject_fault(char *name, int flags, int *id, zinject_record_t *record)
190124208Sdes{
191124208Sdes	inject_handler_t *handler;
192124208Sdes	int error;
193124208Sdes	spa_t *spa;
194124208Sdes
195124208Sdes	/*
196124208Sdes	 * If this is pool-wide metadata, make sure we unload the corresponding
197124208Sdes	 * spa_t, so that the next attempt to load it will trigger the fault.
198124208Sdes	 * We call spa_reset() to unload the pool appropriately.
199124208Sdes	 */
200124208Sdes	if (flags & ZINJECT_UNLOAD_SPA)
201124208Sdes		if ((error = spa_reset(name)) != 0)
202124208Sdes			return (error);
203124208Sdes
204124208Sdes	if (!(flags & ZINJECT_NULL)) {
205124208Sdes		/*
206124208Sdes		 * spa_inject_ref() will add an injection reference, which will
207124208Sdes		 * prevent the pool from being removed from the namespace while
208124208Sdes		 * still allowing it to be unloaded.
209124208Sdes		 */
210124208Sdes		if ((spa = spa_inject_addref(name)) == NULL)
211124208Sdes			return (ENOENT);
212124208Sdes
213124208Sdes		handler = kmem_alloc(sizeof (inject_handler_t), KM_SLEEP);
214124208Sdes
215124208Sdes		rw_enter(&inject_lock, RW_WRITER);
216124208Sdes
217124208Sdes		*id = handler->zi_id = inject_next_id++;
218124208Sdes		handler->zi_spa = spa;
219124208Sdes		handler->zi_record = *record;
220124208Sdes		list_insert_tail(&inject_handlers, handler);
221124208Sdes		atomic_add_32(&zio_injection_enabled, 1);
222124208Sdes
223124208Sdes		rw_exit(&inject_lock);
224124208Sdes	}
225124208Sdes
226124208Sdes	/*
227124208Sdes	 * Flush the ARC, so that any attempts to read this data will end up
228124208Sdes	 * going to the ZIO layer.  Note that this is a little overkill, but
229124208Sdes	 * we don't have the necessary ARC interfaces to do anything else, and
230124208Sdes	 * fault injection isn't a performance critical path.
231126274Sdes	 */
232126274Sdes	if (flags & ZINJECT_FLUSH_ARC)
233124208Sdes		arc_flush();
234124208Sdes
235124208Sdes	return (0);
236124208Sdes}
237124208Sdes
238124208Sdes/*
239124208Sdes * Returns the next record with an ID greater than that supplied to the
240124208Sdes * function.  Used to iterate over all handlers in the system.
241124208Sdes */
242126274Sdesint
243124208Sdeszio_inject_list_next(int *id, char *name, size_t buflen,
244126274Sdes    zinject_record_t *record)
245124208Sdes{
246124208Sdes	inject_handler_t *handler;
247126274Sdes	int ret;
248126274Sdes
249126274Sdes	mutex_enter(&spa_namespace_lock);
250126274Sdes	rw_enter(&inject_lock, RW_READER);
251126274Sdes
252126274Sdes	for (handler = list_head(&inject_handlers); handler != NULL;
253126274Sdes	    handler = list_next(&inject_handlers, handler))
254126274Sdes		if (handler->zi_id > *id)
255126274Sdes			break;
256126274Sdes
257126274Sdes	if (handler) {
258126274Sdes		*record = handler->zi_record;
259126274Sdes		*id = handler->zi_id;
260126274Sdes		(void) strncpy(name, spa_name(handler->zi_spa), buflen);
261126274Sdes		ret = 0;
262126274Sdes	} else {
263126274Sdes		ret = ENOENT;
264126274Sdes	}
265126274Sdes
266126274Sdes	rw_exit(&inject_lock);
267126274Sdes	mutex_exit(&spa_namespace_lock);
268126274Sdes
269126274Sdes	return (ret);
270126274Sdes}
271126274Sdes
272126274Sdes/*
273126274Sdes * Clear the fault handler with the given identifier, or return ENOENT if none
274126274Sdes * exists.
275126274Sdes */
276126274Sdesint
277126274Sdeszio_clear_fault(int id)
278126274Sdes{
279126274Sdes	inject_handler_t *handler;
280126274Sdes	int ret;
281126274Sdes
282126274Sdes	rw_enter(&inject_lock, RW_WRITER);
283126274Sdes
284126274Sdes	for (handler = list_head(&inject_handlers); handler != NULL;
285126274Sdes	    handler = list_next(&inject_handlers, handler))
286126274Sdes		if (handler->zi_id == id)
287124208Sdes			break;
288126274Sdes
289124208Sdes	if (handler == NULL) {
290124208Sdes		ret = ENOENT;
291124208Sdes	} else {
292124208Sdes		list_remove(&inject_handlers, handler);
293124208Sdes		spa_inject_delref(handler->zi_spa);
294		kmem_free(handler, sizeof (inject_handler_t));
295		atomic_add_32(&zio_injection_enabled, -1);
296		ret = 0;
297	}
298
299	rw_exit(&inject_lock);
300
301	return (ret);
302}
303
304void
305zio_inject_init(void)
306{
307	list_create(&inject_handlers, sizeof (inject_handler_t),
308	    offsetof(inject_handler_t, zi_link));
309}
310
311void
312zio_inject_fini(void)
313{
314	list_destroy(&inject_handlers);
315}
316