zio_inject.c revision 168404
1168404Spjd/*
2168404Spjd * CDDL HEADER START
3168404Spjd *
4168404Spjd * The contents of this file are subject to the terms of the
5168404Spjd * Common Development and Distribution License (the "License").
6168404Spjd * You may not use this file except in compliance with the License.
7168404Spjd *
8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9168404Spjd * or http://www.opensolaris.org/os/licensing.
10168404Spjd * See the License for the specific language governing permissions
11168404Spjd * and limitations under the License.
12168404Spjd *
13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each
14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15168404Spjd * If applicable, add the following below this CDDL HEADER, with the
16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying
17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner]
18168404Spjd *
19168404Spjd * CDDL HEADER END
20168404Spjd */
21168404Spjd/*
22168404Spjd * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23168404Spjd * Use is subject to license terms.
24168404Spjd */
25168404Spjd
26168404Spjd#pragma ident	"%Z%%M%	%I%	%E% SMI"
27168404Spjd
28168404Spjd/*
29168404Spjd * ZFS fault injection
30168404Spjd *
31168404Spjd * To handle fault injection, we keep track of a series of zinject_record_t
32168404Spjd * structures which describe which logical block(s) should be injected with a
33168404Spjd * fault.  These are kept in a global list.  Each record corresponds to a given
34168404Spjd * spa_t and maintains a special hold on the spa_t so that it cannot be deleted
35168404Spjd * or exported while the injection record exists.
36168404Spjd *
37168404Spjd * Device level injection is done using the 'zi_guid' field.  If this is set, it
38168404Spjd * means that the error is destined for a particular device, not a piece of
39168404Spjd * data.
40168404Spjd *
41168404Spjd * This is a rather poor data structure and algorithm, but we don't expect more
42168404Spjd * than a few faults at any one time, so it should be sufficient for our needs.
43168404Spjd */
44168404Spjd
45168404Spjd#include <sys/arc.h>
46168404Spjd#include <sys/zio_impl.h>
47168404Spjd#include <sys/zfs_ioctl.h>
48168404Spjd#include <sys/spa_impl.h>
49168404Spjd#include <sys/vdev_impl.h>
50168404Spjd
51168404Spjduint32_t zio_injection_enabled;
52168404Spjd
53168404Spjdtypedef struct inject_handler {
54168404Spjd	int			zi_id;
55168404Spjd	spa_t			*zi_spa;
56168404Spjd	zinject_record_t	zi_record;
57168404Spjd	list_node_t		zi_link;
58168404Spjd} inject_handler_t;
59168404Spjd
60168404Spjdstatic list_t inject_handlers;
61168404Spjdstatic krwlock_t inject_lock;
62168404Spjdstatic int inject_next_id = 1;
63168404Spjd
64168404Spjd/*
65168404Spjd * Returns true if the given record matches the I/O in progress.
66168404Spjd */
67168404Spjdstatic boolean_t
68168404Spjdzio_match_handler(zbookmark_t *zb, uint64_t type,
69168404Spjd    zinject_record_t *record, int error)
70168404Spjd{
71168404Spjd	/*
72168404Spjd	 * Check for a match against the MOS, which is based on type
73168404Spjd	 */
74168404Spjd	if (zb->zb_objset == 0 && record->zi_objset == 0 &&
75168404Spjd	    record->zi_object == 0) {
76168404Spjd		if (record->zi_type == DMU_OT_NONE ||
77168404Spjd		    type == record->zi_type)
78168404Spjd			return (record->zi_freq == 0 ||
79168404Spjd			    spa_get_random(100) < record->zi_freq);
80168404Spjd		else
81168404Spjd			return (B_FALSE);
82168404Spjd	}
83168404Spjd
84168404Spjd	/*
85168404Spjd	 * Check for an exact match.
86168404Spjd	 */
87168404Spjd	if (zb->zb_objset == record->zi_objset &&
88168404Spjd	    zb->zb_object == record->zi_object &&
89168404Spjd	    zb->zb_level == record->zi_level &&
90168404Spjd	    zb->zb_blkid >= record->zi_start &&
91168404Spjd	    zb->zb_blkid <= record->zi_end &&
92168404Spjd	    error == record->zi_error)
93168404Spjd		return (record->zi_freq == 0 ||
94168404Spjd		    spa_get_random(100) < record->zi_freq);
95168404Spjd
96168404Spjd	return (B_FALSE);
97168404Spjd}
98168404Spjd
99168404Spjd/*
100168404Spjd * Determine if the I/O in question should return failure.  Returns the errno
101168404Spjd * to be returned to the caller.
102168404Spjd */
103168404Spjdint
104168404Spjdzio_handle_fault_injection(zio_t *zio, int error)
105168404Spjd{
106168404Spjd	int ret = 0;
107168404Spjd	inject_handler_t *handler;
108168404Spjd
109168404Spjd	/*
110168404Spjd	 * Ignore I/O not associated with any logical data.
111168404Spjd	 */
112168404Spjd	if (zio->io_logical == NULL)
113168404Spjd		return (0);
114168404Spjd
115168404Spjd	/*
116168404Spjd	 * Currently, we only support fault injection on reads.
117168404Spjd	 */
118168404Spjd	if (zio->io_type != ZIO_TYPE_READ)
119168404Spjd		return (0);
120168404Spjd
121168404Spjd	rw_enter(&inject_lock, RW_READER);
122168404Spjd
123168404Spjd	for (handler = list_head(&inject_handlers); handler != NULL;
124168404Spjd	    handler = list_next(&inject_handlers, handler)) {
125168404Spjd
126168404Spjd		/* Ignore errors not destined for this pool */
127168404Spjd		if (zio->io_spa != handler->zi_spa)
128168404Spjd			continue;
129168404Spjd
130168404Spjd		/* Ignore device errors */
131168404Spjd		if (handler->zi_record.zi_guid != 0)
132168404Spjd			continue;
133168404Spjd
134168404Spjd		/* If this handler matches, return EIO */
135168404Spjd		if (zio_match_handler(&zio->io_logical->io_bookmark,
136168404Spjd		    zio->io_bp ? BP_GET_TYPE(zio->io_bp) : DMU_OT_NONE,
137168404Spjd		    &handler->zi_record, error)) {
138168404Spjd			ret = error;
139168404Spjd			break;
140168404Spjd		}
141168404Spjd	}
142168404Spjd
143168404Spjd	rw_exit(&inject_lock);
144168404Spjd
145168404Spjd	return (ret);
146168404Spjd}
147168404Spjd
148168404Spjdint
149168404Spjdzio_handle_device_injection(vdev_t *vd, int error)
150168404Spjd{
151168404Spjd	inject_handler_t *handler;
152168404Spjd	int ret = 0;
153168404Spjd
154168404Spjd	rw_enter(&inject_lock, RW_READER);
155168404Spjd
156168404Spjd	for (handler = list_head(&inject_handlers); handler != NULL;
157168404Spjd	    handler = list_next(&inject_handlers, handler)) {
158168404Spjd
159168404Spjd		if (vd->vdev_guid == handler->zi_record.zi_guid) {
160168404Spjd			if (handler->zi_record.zi_error == error) {
161168404Spjd				/*
162168404Spjd				 * For a failed open, pretend like the device
163168404Spjd				 * has gone away.
164168404Spjd				 */
165168404Spjd				if (error == ENXIO)
166168404Spjd					vd->vdev_stat.vs_aux =
167168404Spjd					    VDEV_AUX_OPEN_FAILED;
168168404Spjd				ret = error;
169168404Spjd				break;
170168404Spjd			}
171168404Spjd			if (handler->zi_record.zi_error == ENXIO) {
172168404Spjd				ret = EIO;
173168404Spjd				break;
174168404Spjd			}
175168404Spjd		}
176168404Spjd	}
177168404Spjd
178168404Spjd	rw_exit(&inject_lock);
179168404Spjd
180168404Spjd	return (ret);
181168404Spjd}
182168404Spjd
183168404Spjd/*
184168404Spjd * Create a new handler for the given record.  We add it to the list, adding
185168404Spjd * a reference to the spa_t in the process.  We increment zio_injection_enabled,
186168404Spjd * which is the switch to trigger all fault injection.
187168404Spjd */
188168404Spjdint
189168404Spjdzio_inject_fault(char *name, int flags, int *id, zinject_record_t *record)
190168404Spjd{
191168404Spjd	inject_handler_t *handler;
192168404Spjd	int error;
193168404Spjd	spa_t *spa;
194168404Spjd
195168404Spjd	/*
196168404Spjd	 * If this is pool-wide metadata, make sure we unload the corresponding
197168404Spjd	 * spa_t, so that the next attempt to load it will trigger the fault.
198168404Spjd	 * We call spa_reset() to unload the pool appropriately.
199168404Spjd	 */
200168404Spjd	if (flags & ZINJECT_UNLOAD_SPA)
201168404Spjd		if ((error = spa_reset(name)) != 0)
202168404Spjd			return (error);
203168404Spjd
204168404Spjd	if (!(flags & ZINJECT_NULL)) {
205168404Spjd		/*
206168404Spjd		 * spa_inject_ref() will add an injection reference, which will
207168404Spjd		 * prevent the pool from being removed from the namespace while
208168404Spjd		 * still allowing it to be unloaded.
209168404Spjd		 */
210168404Spjd		if ((spa = spa_inject_addref(name)) == NULL)
211168404Spjd			return (ENOENT);
212168404Spjd
213168404Spjd		handler = kmem_alloc(sizeof (inject_handler_t), KM_SLEEP);
214168404Spjd
215168404Spjd		rw_enter(&inject_lock, RW_WRITER);
216168404Spjd
217168404Spjd		*id = handler->zi_id = inject_next_id++;
218168404Spjd		handler->zi_spa = spa;
219168404Spjd		handler->zi_record = *record;
220168404Spjd		list_insert_tail(&inject_handlers, handler);
221168404Spjd		atomic_add_32(&zio_injection_enabled, 1);
222168404Spjd
223168404Spjd		rw_exit(&inject_lock);
224168404Spjd	}
225168404Spjd
226168404Spjd	/*
227168404Spjd	 * Flush the ARC, so that any attempts to read this data will end up
228168404Spjd	 * going to the ZIO layer.  Note that this is a little overkill, but
229168404Spjd	 * we don't have the necessary ARC interfaces to do anything else, and
230168404Spjd	 * fault injection isn't a performance critical path.
231168404Spjd	 */
232168404Spjd	if (flags & ZINJECT_FLUSH_ARC)
233168404Spjd		arc_flush();
234168404Spjd
235168404Spjd	return (0);
236168404Spjd}
237168404Spjd
238168404Spjd/*
239168404Spjd * Returns the next record with an ID greater than that supplied to the
240168404Spjd * function.  Used to iterate over all handlers in the system.
241168404Spjd */
242168404Spjdint
243168404Spjdzio_inject_list_next(int *id, char *name, size_t buflen,
244168404Spjd    zinject_record_t *record)
245168404Spjd{
246168404Spjd	inject_handler_t *handler;
247168404Spjd	int ret;
248168404Spjd
249168404Spjd	mutex_enter(&spa_namespace_lock);
250168404Spjd	rw_enter(&inject_lock, RW_READER);
251168404Spjd
252168404Spjd	for (handler = list_head(&inject_handlers); handler != NULL;
253168404Spjd	    handler = list_next(&inject_handlers, handler))
254168404Spjd		if (handler->zi_id > *id)
255168404Spjd			break;
256168404Spjd
257168404Spjd	if (handler) {
258168404Spjd		*record = handler->zi_record;
259168404Spjd		*id = handler->zi_id;
260168404Spjd		(void) strncpy(name, spa_name(handler->zi_spa), buflen);
261168404Spjd		ret = 0;
262168404Spjd	} else {
263168404Spjd		ret = ENOENT;
264168404Spjd	}
265168404Spjd
266168404Spjd	rw_exit(&inject_lock);
267168404Spjd	mutex_exit(&spa_namespace_lock);
268168404Spjd
269168404Spjd	return (ret);
270168404Spjd}
271168404Spjd
272168404Spjd/*
273168404Spjd * Clear the fault handler with the given identifier, or return ENOENT if none
274168404Spjd * exists.
275168404Spjd */
276168404Spjdint
277168404Spjdzio_clear_fault(int id)
278168404Spjd{
279168404Spjd	inject_handler_t *handler;
280168404Spjd	int ret;
281168404Spjd
282168404Spjd	rw_enter(&inject_lock, RW_WRITER);
283168404Spjd
284168404Spjd	for (handler = list_head(&inject_handlers); handler != NULL;
285168404Spjd	    handler = list_next(&inject_handlers, handler))
286168404Spjd		if (handler->zi_id == id)
287168404Spjd			break;
288168404Spjd
289168404Spjd	if (handler == NULL) {
290168404Spjd		ret = ENOENT;
291168404Spjd	} else {
292168404Spjd		list_remove(&inject_handlers, handler);
293168404Spjd		spa_inject_delref(handler->zi_spa);
294168404Spjd		kmem_free(handler, sizeof (inject_handler_t));
295168404Spjd		atomic_add_32(&zio_injection_enabled, -1);
296168404Spjd		ret = 0;
297168404Spjd	}
298168404Spjd
299168404Spjd	rw_exit(&inject_lock);
300168404Spjd
301168404Spjd	return (ret);
302168404Spjd}
303168404Spjd
304168404Spjdvoid
305168404Spjdzio_inject_init(void)
306168404Spjd{
307168404Spjd	list_create(&inject_handlers, sizeof (inject_handler_t),
308168404Spjd	    offsetof(inject_handler_t, zi_link));
309168404Spjd}
310168404Spjd
311168404Spjdvoid
312168404Spjdzio_inject_fini(void)
313168404Spjd{
314168404Spjd	list_destroy(&inject_handlers);
315168404Spjd}
316