1185029Spjd/*
2185029Spjd * CDDL HEADER START
3185029Spjd *
4185029Spjd * The contents of this file are subject to the terms of the
5185029Spjd * Common Development and Distribution License (the "License").
6185029Spjd * You may not use this file except in compliance with the License.
7185029Spjd *
8185029Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9185029Spjd * or http://www.opensolaris.org/os/licensing.
10185029Spjd * See the License for the specific language governing permissions
11185029Spjd * and limitations under the License.
12185029Spjd *
13185029Spjd * When distributing Covered Code, include this CDDL HEADER in each
14185029Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15185029Spjd * If applicable, add the following below this CDDL HEADER, with the
16185029Spjd * fields enclosed by brackets "[]" replaced with your own identifying
17185029Spjd * information: Portions Copyright [yyyy] [name of copyright owner]
18185029Spjd *
19185029Spjd * CDDL HEADER END
20185029Spjd */
21185029Spjd/*
22219089Spjd * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
23247265Smm * Copyright (c) 2012 by Delphix. All rights reserved.
24185029Spjd */
25185029Spjd
26185029Spjd#include <libzfs.h>
27185029Spjd
28185029Spjd#include <sys/zfs_context.h>
29185029Spjd
30185029Spjd#include <errno.h>
31185029Spjd#include <fcntl.h>
32185029Spjd#include <stdarg.h>
33185029Spjd#include <stddef.h>
34185029Spjd#include <stdio.h>
35185029Spjd#include <stdlib.h>
36185029Spjd#include <strings.h>
37185029Spjd#include <sys/file.h>
38185029Spjd#include <sys/mntent.h>
39185029Spjd#include <sys/mnttab.h>
40185029Spjd#include <sys/param.h>
41185029Spjd#include <sys/stat.h>
42185029Spjd
43185029Spjd#include <sys/dmu.h>
44185029Spjd#include <sys/dmu_objset.h>
45185029Spjd#include <sys/dnode.h>
46185029Spjd#include <sys/vdev_impl.h>
47185029Spjd
48185029Spjd#include "zinject.h"
49185029Spjd
50185029Spjdextern void kernel_init(int);
51185029Spjdextern void kernel_fini(void);
52185029Spjd
53185029Spjdstatic int debug;
54185029Spjd
55185029Spjdstatic void
56185029Spjdziprintf(const char *fmt, ...)
57185029Spjd{
58185029Spjd	va_list ap;
59185029Spjd
60185029Spjd	if (!debug)
61185029Spjd		return;
62185029Spjd
63185029Spjd	va_start(ap, fmt);
64185029Spjd	(void) vprintf(fmt, ap);
65185029Spjd	va_end(ap);
66185029Spjd}
67185029Spjd
68219089Spjdstatic void
69219089Spjdcompress_slashes(const char *src, char *dest)
70219089Spjd{
71219089Spjd	while (*src != '\0') {
72219089Spjd		*dest = *src++;
73219089Spjd		while (*dest == '/' && *src == '/')
74219089Spjd			++src;
75219089Spjd		++dest;
76219089Spjd	}
77219089Spjd	*dest = '\0';
78219089Spjd}
79219089Spjd
80185029Spjd/*
81185029Spjd * Given a full path to a file, translate into a dataset name and a relative
82185029Spjd * path within the dataset.  'dataset' must be at least MAXNAMELEN characters,
83185029Spjd * and 'relpath' must be at least MAXPATHLEN characters.  We also pass a stat64
84185029Spjd * buffer, which we need later to get the object ID.
85185029Spjd */
86185029Spjdstatic int
87219089Spjdparse_pathname(const char *inpath, char *dataset, char *relpath,
88185029Spjd    struct stat64 *statbuf)
89185029Spjd{
90185029Spjd	struct statfs sfs;
91185029Spjd	const char *rel;
92219089Spjd	char fullpath[MAXPATHLEN];
93185029Spjd
94219089Spjd	compress_slashes(inpath, fullpath);
95219089Spjd
96185029Spjd	if (fullpath[0] != '/') {
97185029Spjd		(void) fprintf(stderr, "invalid object '%s': must be full "
98185029Spjd		    "path\n", fullpath);
99185029Spjd		usage();
100185029Spjd		return (-1);
101185029Spjd	}
102185029Spjd
103185029Spjd	if (strlen(fullpath) >= MAXPATHLEN) {
104185029Spjd		(void) fprintf(stderr, "invalid object; pathname too long\n");
105185029Spjd		return (-1);
106185029Spjd	}
107185029Spjd
108185029Spjd	if (stat64(fullpath, statbuf) != 0) {
109185029Spjd		(void) fprintf(stderr, "cannot open '%s': %s\n",
110185029Spjd		    fullpath, strerror(errno));
111185029Spjd		return (-1);
112185029Spjd	}
113185029Spjd
114185029Spjd	if (statfs(fullpath, &sfs) == -1) {
115185029Spjd		(void) fprintf(stderr, "cannot find mountpoint for '%s': %s\n",
116185029Spjd		    fullpath, strerror(errno));
117185029Spjd		return (-1);
118185029Spjd	}
119185029Spjd
120185029Spjd	if (strcmp(sfs.f_fstypename, MNTTYPE_ZFS) != 0) {
121185029Spjd		(void) fprintf(stderr, "invalid path '%s': not a ZFS "
122185029Spjd		    "filesystem\n", fullpath);
123185029Spjd		return (-1);
124185029Spjd	}
125185029Spjd
126185029Spjd	if (strncmp(fullpath, sfs.f_mntonname, strlen(sfs.f_mntonname)) != 0) {
127185029Spjd		(void) fprintf(stderr, "invalid path '%s': mountpoint "
128185029Spjd		    "doesn't match path\n", fullpath);
129185029Spjd		return (-1);
130185029Spjd	}
131185029Spjd
132185029Spjd	(void) strcpy(dataset, sfs.f_mntfromname);
133185029Spjd
134185029Spjd	rel = fullpath + strlen(sfs.f_mntonname);
135185029Spjd	if (rel[0] == '/')
136185029Spjd		rel++;
137185029Spjd	(void) strcpy(relpath, rel);
138185029Spjd
139185029Spjd	return (0);
140185029Spjd}
141185029Spjd
142185029Spjd/*
143185029Spjd * Convert from a (dataset, path) pair into a (objset, object) pair.  Note that
144185029Spjd * we grab the object number from the inode number, since looking this up via
145185029Spjd * libzpool is a real pain.
146185029Spjd */
147185029Spjd/* ARGSUSED */
148185029Spjdstatic int
149185029Spjdobject_from_path(const char *dataset, const char *path, struct stat64 *statbuf,
150185029Spjd    zinject_record_t *record)
151185029Spjd{
152185029Spjd	objset_t *os;
153185029Spjd	int err;
154185029Spjd
155185029Spjd	/*
156185029Spjd	 * Before doing any libzpool operations, call sync() to ensure that the
157185029Spjd	 * on-disk state is consistent with the in-core state.
158185029Spjd	 */
159185029Spjd	sync();
160185029Spjd
161219089Spjd	err = dmu_objset_own(dataset, DMU_OST_ZFS, B_TRUE, FTAG, &os);
162219089Spjd	if (err != 0) {
163185029Spjd		(void) fprintf(stderr, "cannot open dataset '%s': %s\n",
164185029Spjd		    dataset, strerror(err));
165185029Spjd		return (-1);
166185029Spjd	}
167185029Spjd
168185029Spjd	record->zi_objset = dmu_objset_id(os);
169185029Spjd	record->zi_object = statbuf->st_ino;
170185029Spjd
171219089Spjd	dmu_objset_disown(os, FTAG);
172185029Spjd
173185029Spjd	return (0);
174185029Spjd}
175185029Spjd
176185029Spjd/*
177185029Spjd * Calculate the real range based on the type, level, and range given.
178185029Spjd */
179185029Spjdstatic int
180185029Spjdcalculate_range(const char *dataset, err_type_t type, int level, char *range,
181185029Spjd    zinject_record_t *record)
182185029Spjd{
183185029Spjd	objset_t *os = NULL;
184185029Spjd	dnode_t *dn = NULL;
185185029Spjd	int err;
186185029Spjd	int ret = -1;
187185029Spjd
188185029Spjd	/*
189185029Spjd	 * Determine the numeric range from the string.
190185029Spjd	 */
191185029Spjd	if (range == NULL) {
192185029Spjd		/*
193185029Spjd		 * If range is unspecified, set the range to [0,-1], which
194185029Spjd		 * indicates that the whole object should be treated as an
195185029Spjd		 * error.
196185029Spjd		 */
197185029Spjd		record->zi_start = 0;
198185029Spjd		record->zi_end = -1ULL;
199185029Spjd	} else {
200185029Spjd		char *end;
201185029Spjd
202185029Spjd		/* XXX add support for suffixes */
203185029Spjd		record->zi_start = strtoull(range, &end, 10);
204185029Spjd
205185029Spjd
206185029Spjd		if (*end == '\0')
207185029Spjd			record->zi_end = record->zi_start + 1;
208185029Spjd		else if (*end == ',')
209185029Spjd			record->zi_end = strtoull(end + 1, &end, 10);
210185029Spjd
211185029Spjd		if (*end != '\0') {
212185029Spjd			(void) fprintf(stderr, "invalid range '%s': must be "
213185029Spjd			    "a numeric range of the form 'start[,end]'\n",
214185029Spjd			    range);
215185029Spjd			goto out;
216185029Spjd		}
217185029Spjd	}
218185029Spjd
219185029Spjd	switch (type) {
220185029Spjd	case TYPE_DATA:
221185029Spjd		break;
222185029Spjd
223185029Spjd	case TYPE_DNODE:
224185029Spjd		/*
225185029Spjd		 * If this is a request to inject faults into the dnode, then we
226185029Spjd		 * must translate the current (objset,object) pair into an
227185029Spjd		 * offset within the metadnode for the objset.  Specifying any
228185029Spjd		 * kind of range with type 'dnode' is illegal.
229185029Spjd		 */
230185029Spjd		if (range != NULL) {
231185029Spjd			(void) fprintf(stderr, "range cannot be specified when "
232185029Spjd			    "type is 'dnode'\n");
233185029Spjd			goto out;
234185029Spjd		}
235185029Spjd
236185029Spjd		record->zi_start = record->zi_object * sizeof (dnode_phys_t);
237185029Spjd		record->zi_end = record->zi_start + sizeof (dnode_phys_t);
238185029Spjd		record->zi_object = 0;
239185029Spjd		break;
240185029Spjd	}
241185029Spjd
242185029Spjd	/*
243185029Spjd	 * Get the dnode associated with object, so we can calculate the block
244185029Spjd	 * size.
245185029Spjd	 */
246219089Spjd	if ((err = dmu_objset_own(dataset, DMU_OST_ANY,
247219089Spjd	    B_TRUE, FTAG, &os)) != 0) {
248185029Spjd		(void) fprintf(stderr, "cannot open dataset '%s': %s\n",
249185029Spjd		    dataset, strerror(err));
250185029Spjd		goto out;
251185029Spjd	}
252185029Spjd
253185029Spjd	if (record->zi_object == 0) {
254219089Spjd		dn = DMU_META_DNODE(os);
255185029Spjd	} else {
256219089Spjd		err = dnode_hold(os, record->zi_object, FTAG, &dn);
257185029Spjd		if (err != 0) {
258185029Spjd			(void) fprintf(stderr, "failed to hold dnode "
259185029Spjd			    "for object %llu\n",
260185029Spjd			    (u_longlong_t)record->zi_object);
261185029Spjd			goto out;
262185029Spjd		}
263185029Spjd	}
264185029Spjd
265185029Spjd
266185029Spjd	ziprintf("data shift: %d\n", (int)dn->dn_datablkshift);
267185029Spjd	ziprintf(" ind shift: %d\n", (int)dn->dn_indblkshift);
268185029Spjd
269185029Spjd	/*
270185029Spjd	 * Translate range into block IDs.
271185029Spjd	 */
272185029Spjd	if (record->zi_start != 0 || record->zi_end != -1ULL) {
273185029Spjd		record->zi_start >>= dn->dn_datablkshift;
274185029Spjd		record->zi_end >>= dn->dn_datablkshift;
275185029Spjd	}
276185029Spjd
277185029Spjd	/*
278185029Spjd	 * Check level, and then translate level 0 blkids into ranges
279185029Spjd	 * appropriate for level of indirection.
280185029Spjd	 */
281185029Spjd	record->zi_level = level;
282185029Spjd	if (level > 0) {
283185029Spjd		ziprintf("level 0 blkid range: [%llu, %llu]\n",
284185029Spjd		    record->zi_start, record->zi_end);
285185029Spjd
286185029Spjd		if (level >= dn->dn_nlevels) {
287185029Spjd			(void) fprintf(stderr, "level %d exceeds max level "
288185029Spjd			    "of object (%d)\n", level, dn->dn_nlevels - 1);
289185029Spjd			goto out;
290185029Spjd		}
291185029Spjd
292185029Spjd		if (record->zi_start != 0 || record->zi_end != 0) {
293185029Spjd			int shift = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
294185029Spjd
295185029Spjd			for (; level > 0; level--) {
296185029Spjd				record->zi_start >>= shift;
297185029Spjd				record->zi_end >>= shift;
298185029Spjd			}
299185029Spjd		}
300185029Spjd	}
301185029Spjd
302185029Spjd	ret = 0;
303185029Spjdout:
304185029Spjd	if (dn) {
305219089Spjd		if (dn != DMU_META_DNODE(os))
306185029Spjd			dnode_rele(dn, FTAG);
307185029Spjd	}
308185029Spjd	if (os)
309219089Spjd		dmu_objset_disown(os, FTAG);
310185029Spjd
311185029Spjd	return (ret);
312185029Spjd}
313185029Spjd
314185029Spjdint
315185029Spjdtranslate_record(err_type_t type, const char *object, const char *range,
316185029Spjd    int level, zinject_record_t *record, char *poolname, char *dataset)
317185029Spjd{
318185029Spjd	char path[MAXPATHLEN];
319185029Spjd	char *slash;
320185029Spjd	struct stat64 statbuf;
321185029Spjd	int ret = -1;
322185029Spjd
323185029Spjd	kernel_init(FREAD);
324185029Spjd
325185029Spjd	debug = (getenv("ZINJECT_DEBUG") != NULL);
326185029Spjd
327185029Spjd	ziprintf("translating: %s\n", object);
328185029Spjd
329185029Spjd	if (MOS_TYPE(type)) {
330185029Spjd		/*
331185029Spjd		 * MOS objects are treated specially.
332185029Spjd		 */
333185029Spjd		switch (type) {
334185029Spjd		case TYPE_MOS:
335185029Spjd			record->zi_type = 0;
336185029Spjd			break;
337185029Spjd		case TYPE_MOSDIR:
338185029Spjd			record->zi_type = DMU_OT_OBJECT_DIRECTORY;
339185029Spjd			break;
340185029Spjd		case TYPE_METASLAB:
341185029Spjd			record->zi_type = DMU_OT_OBJECT_ARRAY;
342185029Spjd			break;
343185029Spjd		case TYPE_CONFIG:
344185029Spjd			record->zi_type = DMU_OT_PACKED_NVLIST;
345185029Spjd			break;
346219089Spjd		case TYPE_BPOBJ:
347219089Spjd			record->zi_type = DMU_OT_BPOBJ;
348185029Spjd			break;
349185029Spjd		case TYPE_SPACEMAP:
350185029Spjd			record->zi_type = DMU_OT_SPACE_MAP;
351185029Spjd			break;
352185029Spjd		case TYPE_ERRLOG:
353185029Spjd			record->zi_type = DMU_OT_ERROR_LOG;
354185029Spjd			break;
355185029Spjd		}
356185029Spjd
357185029Spjd		dataset[0] = '\0';
358185029Spjd		(void) strcpy(poolname, object);
359185029Spjd		return (0);
360185029Spjd	}
361185029Spjd
362185029Spjd	/*
363185029Spjd	 * Convert a full path into a (dataset, file) pair.
364185029Spjd	 */
365185029Spjd	if (parse_pathname(object, dataset, path, &statbuf) != 0)
366185029Spjd		goto err;
367185029Spjd
368185029Spjd	ziprintf("   dataset: %s\n", dataset);
369185029Spjd	ziprintf("      path: %s\n", path);
370185029Spjd
371185029Spjd	/*
372185029Spjd	 * Convert (dataset, file) into (objset, object)
373185029Spjd	 */
374185029Spjd	if (object_from_path(dataset, path, &statbuf, record) != 0)
375185029Spjd		goto err;
376185029Spjd
377185029Spjd	ziprintf("raw objset: %llu\n", record->zi_objset);
378185029Spjd	ziprintf("raw object: %llu\n", record->zi_object);
379185029Spjd
380185029Spjd	/*
381185029Spjd	 * For the given object, calculate the real (type, level, range)
382185029Spjd	 */
383185029Spjd	if (calculate_range(dataset, type, level, (char *)range, record) != 0)
384185029Spjd		goto err;
385185029Spjd
386185029Spjd	ziprintf("    objset: %llu\n", record->zi_objset);
387185029Spjd	ziprintf("    object: %llu\n", record->zi_object);
388185029Spjd	if (record->zi_start == 0 &&
389185029Spjd	    record->zi_end == -1ULL)
390185029Spjd		ziprintf("     range: all\n");
391185029Spjd	else
392185029Spjd		ziprintf("     range: [%llu, %llu]\n", record->zi_start,
393185029Spjd		    record->zi_end);
394185029Spjd
395185029Spjd	/*
396185029Spjd	 * Copy the pool name
397185029Spjd	 */
398185029Spjd	(void) strcpy(poolname, dataset);
399185029Spjd	if ((slash = strchr(poolname, '/')) != NULL)
400185029Spjd		*slash = '\0';
401185029Spjd
402185029Spjd	ret = 0;
403185029Spjd
404185029Spjderr:
405185029Spjd	kernel_fini();
406185029Spjd	return (ret);
407185029Spjd}
408185029Spjd
409185029Spjdint
410185029Spjdtranslate_raw(const char *str, zinject_record_t *record)
411185029Spjd{
412185029Spjd	/*
413185029Spjd	 * A raw bookmark of the form objset:object:level:blkid, where each
414185029Spjd	 * number is a hexidecimal value.
415185029Spjd	 */
416185029Spjd	if (sscanf(str, "%llx:%llx:%x:%llx", (u_longlong_t *)&record->zi_objset,
417185029Spjd	    (u_longlong_t *)&record->zi_object, &record->zi_level,
418185029Spjd	    (u_longlong_t *)&record->zi_start) != 4) {
419185029Spjd		(void) fprintf(stderr, "bad raw spec '%s': must be of the form "
420185029Spjd		    "'objset:object:level:blkid'\n", str);
421185029Spjd		return (-1);
422185029Spjd	}
423185029Spjd
424185029Spjd	record->zi_end = record->zi_start;
425185029Spjd
426185029Spjd	return (0);
427185029Spjd}
428185029Spjd
429185029Spjdint
430185029Spjdtranslate_device(const char *pool, const char *device, err_type_t label_type,
431185029Spjd    zinject_record_t *record)
432185029Spjd{
433185029Spjd	char *end;
434185029Spjd	zpool_handle_t *zhp;
435185029Spjd	nvlist_t *tgt;
436185029Spjd	boolean_t isspare, iscache;
437185029Spjd
438185029Spjd	/*
439185029Spjd	 * Given a device name or GUID, create an appropriate injection record
440185029Spjd	 * with zi_guid set.
441185029Spjd	 */
442185029Spjd	if ((zhp = zpool_open(g_zfs, pool)) == NULL)
443185029Spjd		return (-1);
444185029Spjd
445185029Spjd	record->zi_guid = strtoull(device, &end, 16);
446185029Spjd	if (record->zi_guid == 0 || *end != '\0') {
447185029Spjd		tgt = zpool_find_vdev(zhp, device, &isspare, &iscache, NULL);
448185029Spjd
449185029Spjd		if (tgt == NULL) {
450185029Spjd			(void) fprintf(stderr, "cannot find device '%s' in "
451185029Spjd			    "pool '%s'\n", device, pool);
452185029Spjd			return (-1);
453185029Spjd		}
454185029Spjd
455185029Spjd		verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID,
456185029Spjd		    &record->zi_guid) == 0);
457185029Spjd	}
458185029Spjd
459247265Smm	/*
460247265Smm	 * Device faults can take on three different forms:
461247265Smm	 * 1). delayed or hanging I/O
462247265Smm	 * 2). zfs label faults
463247265Smm	 * 3). generic disk faults
464247265Smm	 */
465247265Smm	if (record->zi_timer != 0) {
466247265Smm		record->zi_cmd = ZINJECT_DELAY_IO;
467247265Smm	} else if (label_type != TYPE_INVAL) {
468247265Smm		record->zi_cmd = ZINJECT_LABEL_FAULT;
469247265Smm	} else {
470247265Smm		record->zi_cmd = ZINJECT_DEVICE_FAULT;
471247265Smm	}
472247265Smm
473185029Spjd	switch (label_type) {
474185029Spjd	case TYPE_LABEL_UBERBLOCK:
475185029Spjd		record->zi_start = offsetof(vdev_label_t, vl_uberblock[0]);
476185029Spjd		record->zi_end = record->zi_start + VDEV_UBERBLOCK_RING - 1;
477185029Spjd		break;
478185029Spjd	case TYPE_LABEL_NVLIST:
479185029Spjd		record->zi_start = offsetof(vdev_label_t, vl_vdev_phys);
480185029Spjd		record->zi_end = record->zi_start + VDEV_PHYS_SIZE - 1;
481185029Spjd		break;
482219089Spjd	case TYPE_LABEL_PAD1:
483219089Spjd		record->zi_start = offsetof(vdev_label_t, vl_pad1);
484219089Spjd		record->zi_end = record->zi_start + VDEV_PAD_SIZE - 1;
485219089Spjd		break;
486219089Spjd	case TYPE_LABEL_PAD2:
487219089Spjd		record->zi_start = offsetof(vdev_label_t, vl_pad2);
488219089Spjd		record->zi_end = record->zi_start + VDEV_PAD_SIZE - 1;
489219089Spjd		break;
490185029Spjd	}
491185029Spjd	return (0);
492185029Spjd}
493