zio_inject.c revision 10921:8aac17999e4d
1276478Sngie/*
2272343Sngie * CDDL HEADER START
3272343Sngie *
4272343Sngie * The contents of this file are subject to the terms of the
5272343Sngie * Common Development and Distribution License (the "License").
6272343Sngie * You may not use this file except in compliance with the License.
7272343Sngie *
8272343Sngie * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9272343Sngie * or http://www.opensolaris.org/os/licensing.
10272343Sngie * See the License for the specific language governing permissions
11272343Sngie * and limitations under the License.
12272343Sngie *
13272343Sngie * When distributing Covered Code, include this CDDL HEADER in each
14272343Sngie * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15272343Sngie * If applicable, add the following below this CDDL HEADER, with the
16272343Sngie * fields enclosed by brackets "[]" replaced with your own identifying
17272343Sngie * information: Portions Copyright [yyyy] [name of copyright owner]
18272343Sngie *
19272343Sngie * CDDL HEADER END
20272343Sngie */
21272343Sngie/*
22272343Sngie * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23272343Sngie * Use is subject to license terms.
24272343Sngie */
25272343Sngie
26272343Sngie/*
27272343Sngie * ZFS fault injection
28272343Sngie *
29272343Sngie * To handle fault injection, we keep track of a series of zinject_record_t
30272343Sngie * structures which describe which logical block(s) should be injected with a
31272343Sngie * fault.  These are kept in a global list.  Each record corresponds to a given
32276478Sngie * spa_t and maintains a special hold on the spa_t so that it cannot be deleted
33272343Sngie * or exported while the injection record exists.
34272343Sngie *
35272343Sngie * Device level injection is done using the 'zi_guid' field.  If this is set, it
36272343Sngie * means that the error is destined for a particular device, not a piece of
37272343Sngie * data.
38272343Sngie *
39272343Sngie * This is a rather poor data structure and algorithm, but we don't expect more
40272343Sngie * than a few faults at any one time, so it should be sufficient for our needs.
41272343Sngie */
42272343Sngie
43272343Sngie#include <sys/arc.h>
44272343Sngie#include <sys/zio_impl.h>
45272343Sngie#include <sys/zfs_ioctl.h>
46272343Sngie#include <sys/spa_impl.h>
47272343Sngie#include <sys/vdev_impl.h>
48272343Sngie#include <sys/fs/zfs.h>
49272343Sngie
50272343Sngieuint32_t zio_injection_enabled;
51272343Sngie
52272343Sngietypedef struct inject_handler {
53272343Sngie	int			zi_id;
54272343Sngie	spa_t			*zi_spa;
55272343Sngie	zinject_record_t	zi_record;
56272343Sngie	list_node_t		zi_link;
57272343Sngie} inject_handler_t;
58272343Sngie
59272343Sngiestatic list_t inject_handlers;
60272343Sngiestatic krwlock_t inject_lock;
61272343Sngiestatic int inject_next_id = 1;
62272343Sngie
63272343Sngie/*
64272343Sngie * Returns true if the given record matches the I/O in progress.
65272343Sngie */
66272343Sngiestatic boolean_t
67272343Sngiezio_match_handler(zbookmark_t *zb, uint64_t type,
68272343Sngie    zinject_record_t *record, int error)
69272343Sngie{
70272343Sngie	/*
71272343Sngie	 * Check for a match against the MOS, which is based on type
72272343Sngie	 */
73272343Sngie	if (zb->zb_objset == 0 && record->zi_objset == 0 &&
74272343Sngie	    record->zi_object == 0) {
75272343Sngie		if (record->zi_type == DMU_OT_NONE ||
76272343Sngie		    type == record->zi_type)
77272343Sngie			return (record->zi_freq == 0 ||
78272343Sngie			    spa_get_random(100) < record->zi_freq);
79272343Sngie		else
80272343Sngie			return (B_FALSE);
81272343Sngie	}
82272343Sngie
83272343Sngie	/*
84272343Sngie	 * Check for an exact match.
85272343Sngie	 */
86272343Sngie	if (zb->zb_objset == record->zi_objset &&
87272343Sngie	    zb->zb_object == record->zi_object &&
88272343Sngie	    zb->zb_level == record->zi_level &&
89272343Sngie	    zb->zb_blkid >= record->zi_start &&
90272343Sngie	    zb->zb_blkid <= record->zi_end &&
91272343Sngie	    error == record->zi_error)
92272343Sngie		return (record->zi_freq == 0 ||
93272343Sngie		    spa_get_random(100) < record->zi_freq);
94272343Sngie
95272343Sngie	return (B_FALSE);
96272343Sngie}
97272343Sngie
98272343Sngie/*
99272343Sngie * Panic the system when a config change happens in the function
100272343Sngie * specified by tag.
101272343Sngie */
102272343Sngievoid
103272343Sngiezio_handle_panic_injection(spa_t *spa, char *tag)
104272343Sngie{
105272343Sngie	inject_handler_t *handler;
106272343Sngie
107272343Sngie	rw_enter(&inject_lock, RW_READER);
108272343Sngie
109272343Sngie	for (handler = list_head(&inject_handlers); handler != NULL;
110272343Sngie	    handler = list_next(&inject_handlers, handler)) {
111272343Sngie
112272343Sngie		if (spa != handler->zi_spa)
113272343Sngie			continue;
114272343Sngie
115272343Sngie		if (strcmp(tag, handler->zi_record.zi_func) == 0)
116272343Sngie			panic("Panic requested in function %s\n", tag);
117272343Sngie	}
118272343Sngie
119272343Sngie	rw_exit(&inject_lock);
120272343Sngie}
121272343Sngie
122272343Sngie/*
123272343Sngie * Determine if the I/O in question should return failure.  Returns the errno
124272343Sngie * to be returned to the caller.
125272343Sngie */
126272343Sngieint
127272343Sngiezio_handle_fault_injection(zio_t *zio, int error)
128272343Sngie{
129272343Sngie	int ret = 0;
130272343Sngie	inject_handler_t *handler;
131272343Sngie
132272343Sngie	/*
133272343Sngie	 * Ignore I/O not associated with any logical data.
134272343Sngie	 */
135272343Sngie	if (zio->io_logical == NULL)
136272343Sngie		return (0);
137272343Sngie
138272343Sngie	/*
139272343Sngie	 * Currently, we only support fault injection on reads.
140272343Sngie	 */
141272343Sngie	if (zio->io_type != ZIO_TYPE_READ)
142272343Sngie		return (0);
143272343Sngie
144272343Sngie	rw_enter(&inject_lock, RW_READER);
145272343Sngie
146272343Sngie	for (handler = list_head(&inject_handlers); handler != NULL;
147272343Sngie	    handler = list_next(&inject_handlers, handler)) {
148272343Sngie
149272343Sngie		/* Ignore errors not destined for this pool */
150272343Sngie		if (zio->io_spa != handler->zi_spa)
151272343Sngie			continue;
152272343Sngie
153272343Sngie		/* Ignore device errors and panic injection */
154272343Sngie		if (handler->zi_record.zi_guid != 0 ||
155272343Sngie		    handler->zi_record.zi_func[0] != '\0' ||
156272343Sngie		    handler->zi_record.zi_duration != 0)
157272343Sngie			continue;
158272343Sngie
159272343Sngie		/* If this handler matches, return EIO */
160272343Sngie		if (zio_match_handler(&zio->io_logical->io_bookmark,
161272343Sngie		    zio->io_bp ? BP_GET_TYPE(zio->io_bp) : DMU_OT_NONE,
162272343Sngie		    &handler->zi_record, error)) {
163272343Sngie			ret = error;
164272343Sngie			break;
165272343Sngie		}
166272343Sngie	}
167272343Sngie
168272343Sngie	rw_exit(&inject_lock);
169272343Sngie
170272343Sngie	return (ret);
171272343Sngie}
172272343Sngie
173272343Sngie/*
174272343Sngie * Determine if the zio is part of a label update and has an injection
175272343Sngie * handler associated with that portion of the label. Currently, we
176272343Sngie * allow error injection in either the nvlist or the uberblock region of
177272343Sngie * of the vdev label.
178272343Sngie */
179272343Sngieint
180272343Sngiezio_handle_label_injection(zio_t *zio, int error)
181272343Sngie{
182272343Sngie	inject_handler_t *handler;
183272343Sngie	vdev_t *vd = zio->io_vd;
184272343Sngie	uint64_t offset = zio->io_offset;
185272343Sngie	int label;
186272343Sngie	int ret = 0;
187272343Sngie
188272343Sngie	if (offset >= VDEV_LABEL_START_SIZE &&
189272343Sngie	    offset < vd->vdev_psize - VDEV_LABEL_END_SIZE)
190272343Sngie		return (0);
191272343Sngie
192272343Sngie	rw_enter(&inject_lock, RW_READER);
193272343Sngie
194272343Sngie	for (handler = list_head(&inject_handlers); handler != NULL;
195272343Sngie	    handler = list_next(&inject_handlers, handler)) {
196272343Sngie		uint64_t start = handler->zi_record.zi_start;
197272343Sngie		uint64_t end = handler->zi_record.zi_end;
198272343Sngie
199272343Sngie		/* Ignore device only faults or panic injection */
200272343Sngie		if (handler->zi_record.zi_start == 0 ||
201272343Sngie		    handler->zi_record.zi_func[0] != '\0' ||
202272343Sngie		    handler->zi_record.zi_duration != 0)
203272343Sngie			continue;
204272343Sngie
205272343Sngie		/*
206272343Sngie		 * The injection region is the relative offsets within a
207272343Sngie		 * vdev label. We must determine the label which is being
208272343Sngie		 * updated and adjust our region accordingly.
209272343Sngie		 */
210272343Sngie		label = vdev_label_number(vd->vdev_psize, offset);
211272343Sngie		start = vdev_label_offset(vd->vdev_psize, label, start);
212272343Sngie		end = vdev_label_offset(vd->vdev_psize, label, end);
213272343Sngie
214272343Sngie		if (zio->io_vd->vdev_guid == handler->zi_record.zi_guid &&
215272343Sngie		    (offset >= start && offset <= end)) {
216272343Sngie			ret = error;
217272343Sngie			break;
218272343Sngie		}
219272343Sngie	}
220272343Sngie	rw_exit(&inject_lock);
221272343Sngie	return (ret);
222272343Sngie}
223272343Sngie
224272343Sngie
225272343Sngieint
226272343Sngiezio_handle_device_injection(vdev_t *vd, zio_t *zio, int error)
227272343Sngie{
228272343Sngie	inject_handler_t *handler;
229272343Sngie	int ret = 0;
230272343Sngie
231272343Sngie	/*
232272343Sngie	 * We skip over faults in the labels unless it's during
233272343Sngie	 * device open (i.e. zio == NULL).
234272343Sngie	 */
235272343Sngie	if (zio != NULL) {
236272343Sngie		uint64_t offset = zio->io_offset;
237272343Sngie
238272343Sngie		if (offset < VDEV_LABEL_START_SIZE ||
239272343Sngie		    offset >= vd->vdev_psize - VDEV_LABEL_END_SIZE)
240272343Sngie		return (0);
241272343Sngie	}
242272343Sngie
243272343Sngie	rw_enter(&inject_lock, RW_READER);
244272343Sngie
245272343Sngie	for (handler = list_head(&inject_handlers); handler != NULL;
246272343Sngie	    handler = list_next(&inject_handlers, handler)) {
247272343Sngie
248272343Sngie		/*
249272343Sngie		 * Ignore label specific faults, panic injection
250272343Sngie		 * or fake writes
251272343Sngie		 */
252272343Sngie		if (handler->zi_record.zi_start != 0 ||
253272343Sngie		    handler->zi_record.zi_func[0] != '\0' ||
254272343Sngie		    handler->zi_record.zi_duration != 0)
255272343Sngie			continue;
256272343Sngie
257272343Sngie		if (vd->vdev_guid == handler->zi_record.zi_guid) {
258272343Sngie			if (handler->zi_record.zi_failfast &&
259272343Sngie			    (zio == NULL || (zio->io_flags &
260272343Sngie			    (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)))) {
261272343Sngie				continue;
262272343Sngie			}
263272343Sngie
264272343Sngie			/* Handle type specific I/O failures */
265272343Sngie			if (zio != NULL &&
266272343Sngie			    handler->zi_record.zi_iotype != ZIO_TYPES &&
267272343Sngie			    handler->zi_record.zi_iotype != zio->io_type)
268272343Sngie				continue;
269272343Sngie
270272343Sngie			if (handler->zi_record.zi_error == error) {
271272343Sngie				/*
272272343Sngie				 * For a failed open, pretend like the device
273272343Sngie				 * has gone away.
274272343Sngie				 */
275272343Sngie				if (error == ENXIO)
276272343Sngie					vd->vdev_stat.vs_aux =
277272343Sngie					    VDEV_AUX_OPEN_FAILED;
278272343Sngie				ret = error;
279272343Sngie				break;
280272343Sngie			}
281272343Sngie			if (handler->zi_record.zi_error == ENXIO) {
282272343Sngie				ret = EIO;
283272343Sngie				break;
284272343Sngie			}
285272343Sngie		}
286272343Sngie	}
287272343Sngie
288272343Sngie	rw_exit(&inject_lock);
289272343Sngie
290272343Sngie	return (ret);
291272343Sngie}
292272343Sngie
293272343Sngie/*
294272343Sngie * Simulate hardware that ignores cache flushes.  For requested number
295272343Sngie * of seconds nix the actual writing to disk.
296272343Sngie */
297272343Sngievoid
298272343Sngiezio_handle_ignored_writes(zio_t *zio)
299272343Sngie{
300272343Sngie	inject_handler_t *handler;
301272343Sngie
302272343Sngie	rw_enter(&inject_lock, RW_READER);
303272343Sngie
304272343Sngie	for (handler = list_head(&inject_handlers); handler != NULL;
305272343Sngie	    handler = list_next(&inject_handlers, handler)) {
306272343Sngie
307272343Sngie		/* Ignore errors not destined for this pool */
308272343Sngie		if (zio->io_spa != handler->zi_spa)
309272343Sngie			continue;
310272343Sngie
311272343Sngie		if (handler->zi_record.zi_duration == 0)
312272343Sngie			continue;
313272343Sngie
314272343Sngie		/*
315272343Sngie		 * Positive duration implies # of seconds, negative
316272343Sngie		 * a number of txgs
317272343Sngie		 */
318272343Sngie		if (handler->zi_record.zi_timer == 0) {
319272343Sngie			if (handler->zi_record.zi_duration > 0)
320272343Sngie				handler->zi_record.zi_timer = lbolt64;
321272343Sngie			else
322272343Sngie				handler->zi_record.zi_timer = zio->io_txg;
323272343Sngie		}
324272343Sngie		zio->io_pipeline &= ~ZIO_VDEV_IO_STAGES;
325272343Sngie		break;
326272343Sngie	}
327272343Sngie
328272343Sngie	rw_exit(&inject_lock);
329272343Sngie}
330272343Sngie
331272343Sngievoid
332272343Sngiespa_handle_ignored_writes(spa_t *spa)
333272343Sngie{
334272343Sngie	inject_handler_t *handler;
335272343Sngie
336272343Sngie	if (zio_injection_enabled == 0)
337272343Sngie		return;
338272343Sngie
339272343Sngie	rw_enter(&inject_lock, RW_READER);
340272343Sngie
341272343Sngie	for (handler = list_head(&inject_handlers); handler != NULL;
342272343Sngie	    handler = list_next(&inject_handlers, handler)) {
343272343Sngie
344272343Sngie		/* Ignore errors not destined for this pool */
345272343Sngie		if (spa != handler->zi_spa)
346272343Sngie			continue;
347272343Sngie
348272343Sngie		if (handler->zi_record.zi_duration == 0)
349272343Sngie			continue;
350272343Sngie
351272343Sngie		if (handler->zi_record.zi_duration > 0) {
352272343Sngie			VERIFY(handler->zi_record.zi_timer == 0 ||
353272343Sngie			    handler->zi_record.zi_timer +
354272343Sngie			    handler->zi_record.zi_duration * hz > lbolt64);
355272343Sngie		} else {
356272343Sngie			/* duration is negative so the subtraction here adds */
357272343Sngie			VERIFY(handler->zi_record.zi_timer == 0 ||
358272343Sngie			    handler->zi_record.zi_timer -
359272343Sngie			    handler->zi_record.zi_duration >=
360272343Sngie			    spa->spa_syncing_txg);
361272343Sngie		}
362272343Sngie	}
363272343Sngie
364272343Sngie	rw_exit(&inject_lock);
365272343Sngie}
366272343Sngie
367272343Sngie/*
368272343Sngie * Create a new handler for the given record.  We add it to the list, adding
369272343Sngie * a reference to the spa_t in the process.  We increment zio_injection_enabled,
370272343Sngie * which is the switch to trigger all fault injection.
371272343Sngie */
372272343Sngieint
373272343Sngiezio_inject_fault(char *name, int flags, int *id, zinject_record_t *record)
374272343Sngie{
375272343Sngie	inject_handler_t *handler;
376272343Sngie	int error;
377272343Sngie	spa_t *spa;
378272343Sngie
379272343Sngie	/*
380272343Sngie	 * If this is pool-wide metadata, make sure we unload the corresponding
381272343Sngie	 * spa_t, so that the next attempt to load it will trigger the fault.
382272343Sngie	 * We call spa_reset() to unload the pool appropriately.
383272343Sngie	 */
384272343Sngie	if (flags & ZINJECT_UNLOAD_SPA)
385272343Sngie		if ((error = spa_reset(name)) != 0)
386272343Sngie			return (error);
387272343Sngie
388272343Sngie	if (!(flags & ZINJECT_NULL)) {
389272343Sngie		/*
390272343Sngie		 * spa_inject_ref() will add an injection reference, which will
391272343Sngie		 * prevent the pool from being removed from the namespace while
392272343Sngie		 * still allowing it to be unloaded.
393272343Sngie		 */
394272343Sngie		if ((spa = spa_inject_addref(name)) == NULL)
395272343Sngie			return (ENOENT);
396272343Sngie
397272343Sngie		handler = kmem_alloc(sizeof (inject_handler_t), KM_SLEEP);
398272343Sngie
399272343Sngie		rw_enter(&inject_lock, RW_WRITER);
400272343Sngie
401272343Sngie		*id = handler->zi_id = inject_next_id++;
402272343Sngie		handler->zi_spa = spa;
403272343Sngie		handler->zi_record = *record;
404272343Sngie		list_insert_tail(&inject_handlers, handler);
405272343Sngie		atomic_add_32(&zio_injection_enabled, 1);
406272343Sngie
407272343Sngie		rw_exit(&inject_lock);
408272343Sngie	}
409272343Sngie
410272343Sngie	/*
411272343Sngie	 * Flush the ARC, so that any attempts to read this data will end up
412272343Sngie	 * going to the ZIO layer.  Note that this is a little overkill, but
413272343Sngie	 * we don't have the necessary ARC interfaces to do anything else, and
414272343Sngie	 * fault injection isn't a performance critical path.
415272343Sngie	 */
416272343Sngie	if (flags & ZINJECT_FLUSH_ARC)
417272343Sngie		arc_flush(NULL);
418272343Sngie
419272343Sngie	return (0);
420272343Sngie}
421272343Sngie
422272343Sngie/*
423272343Sngie * Returns the next record with an ID greater than that supplied to the
424272343Sngie * function.  Used to iterate over all handlers in the system.
425272343Sngie */
426272343Sngieint
427272343Sngiezio_inject_list_next(int *id, char *name, size_t buflen,
428272343Sngie    zinject_record_t *record)
429272343Sngie{
430272343Sngie	inject_handler_t *handler;
431272343Sngie	int ret;
432272343Sngie
433272343Sngie	mutex_enter(&spa_namespace_lock);
434272343Sngie	rw_enter(&inject_lock, RW_READER);
435272343Sngie
436272343Sngie	for (handler = list_head(&inject_handlers); handler != NULL;
437272343Sngie	    handler = list_next(&inject_handlers, handler))
438272343Sngie		if (handler->zi_id > *id)
439272343Sngie			break;
440272343Sngie
441272343Sngie	if (handler) {
442272343Sngie		*record = handler->zi_record;
443272343Sngie		*id = handler->zi_id;
444272343Sngie		(void) strncpy(name, spa_name(handler->zi_spa), buflen);
445272343Sngie		ret = 0;
446272343Sngie	} else {
447272343Sngie		ret = ENOENT;
448272343Sngie	}
449272343Sngie
450272343Sngie	rw_exit(&inject_lock);
451272343Sngie	mutex_exit(&spa_namespace_lock);
452272343Sngie
453272343Sngie	return (ret);
454272343Sngie}
455272343Sngie
456272343Sngie/*
457272343Sngie * Clear the fault handler with the given identifier, or return ENOENT if none
458272343Sngie * exists.
459272343Sngie */
460272343Sngieint
461272343Sngiezio_clear_fault(int id)
462272343Sngie{
463272343Sngie	inject_handler_t *handler;
464272343Sngie	int ret;
465272343Sngie
466272343Sngie	rw_enter(&inject_lock, RW_WRITER);
467272343Sngie
468272343Sngie	for (handler = list_head(&inject_handlers); handler != NULL;
469272343Sngie	    handler = list_next(&inject_handlers, handler))
470272343Sngie		if (handler->zi_id == id)
471272343Sngie			break;
472272343Sngie
473272343Sngie	if (handler == NULL) {
474272343Sngie		ret = ENOENT;
475272343Sngie	} else {
476272343Sngie		list_remove(&inject_handlers, handler);
477272343Sngie		spa_inject_delref(handler->zi_spa);
478272343Sngie		kmem_free(handler, sizeof (inject_handler_t));
479272343Sngie		atomic_add_32(&zio_injection_enabled, -1);
480272343Sngie		ret = 0;
481272343Sngie	}
482272343Sngie
483272343Sngie	rw_exit(&inject_lock);
484272343Sngie
485272343Sngie	return (ret);
486272343Sngie}
487272343Sngie
488272343Sngievoid
489272343Sngiezio_inject_init(void)
490272343Sngie{
491272343Sngie	rw_init(&inject_lock, NULL, RW_DEFAULT, NULL);
492272343Sngie	list_create(&inject_handlers, sizeof (inject_handler_t),
493272343Sngie	    offsetof(inject_handler_t, zi_link));
494272343Sngie}
495272343Sngie
496272343Sngievoid
497272343Sngiezio_inject_fini(void)
498272343Sngie{
499272343Sngie	list_destroy(&inject_handlers);
500272343Sngie	rw_destroy(&inject_lock);
501272343Sngie}
502272343Sngie