1185029Spjd/* 2185029Spjd * CDDL HEADER START 3185029Spjd * 4185029Spjd * The contents of this file are subject to the terms of the 5185029Spjd * Common Development and Distribution License (the "License"). 6185029Spjd * You may not use this file except in compliance with the License. 7185029Spjd * 8185029Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9185029Spjd * or http://www.opensolaris.org/os/licensing. 10185029Spjd * See the License for the specific language governing permissions 11185029Spjd * and limitations under the License. 12185029Spjd * 13185029Spjd * When distributing Covered Code, include this CDDL HEADER in each 14185029Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15185029Spjd * If applicable, add the following below this CDDL HEADER, with the 16185029Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17185029Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18185029Spjd * 19185029Spjd * CDDL HEADER END 20185029Spjd */ 21185029Spjd/* 22219089Spjd * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. 23247265Smm * Copyright (c) 2012 by Delphix. All rights reserved. 24185029Spjd */ 25185029Spjd 26185029Spjd#include <libzfs.h> 27185029Spjd 28185029Spjd#include <sys/zfs_context.h> 29185029Spjd 30185029Spjd#include <errno.h> 31185029Spjd#include <fcntl.h> 32185029Spjd#include <stdarg.h> 33185029Spjd#include <stddef.h> 34185029Spjd#include <stdio.h> 35185029Spjd#include <stdlib.h> 36185029Spjd#include <strings.h> 37185029Spjd#include <sys/file.h> 38185029Spjd#include <sys/mntent.h> 39185029Spjd#include <sys/mnttab.h> 40185029Spjd#include <sys/param.h> 41185029Spjd#include <sys/stat.h> 42185029Spjd 43185029Spjd#include <sys/dmu.h> 44185029Spjd#include <sys/dmu_objset.h> 45185029Spjd#include <sys/dnode.h> 46185029Spjd#include <sys/vdev_impl.h> 47185029Spjd 48185029Spjd#include "zinject.h" 49185029Spjd 50185029Spjdextern void kernel_init(int); 51185029Spjdextern void kernel_fini(void); 52185029Spjd 53185029Spjdstatic int debug; 54185029Spjd 55185029Spjdstatic void 56185029Spjdziprintf(const char *fmt, ...) 57185029Spjd{ 58185029Spjd va_list ap; 59185029Spjd 60185029Spjd if (!debug) 61185029Spjd return; 62185029Spjd 63185029Spjd va_start(ap, fmt); 64185029Spjd (void) vprintf(fmt, ap); 65185029Spjd va_end(ap); 66185029Spjd} 67185029Spjd 68219089Spjdstatic void 69219089Spjdcompress_slashes(const char *src, char *dest) 70219089Spjd{ 71219089Spjd while (*src != '\0') { 72219089Spjd *dest = *src++; 73219089Spjd while (*dest == '/' && *src == '/') 74219089Spjd ++src; 75219089Spjd ++dest; 76219089Spjd } 77219089Spjd *dest = '\0'; 78219089Spjd} 79219089Spjd 80185029Spjd/* 81185029Spjd * Given a full path to a file, translate into a dataset name and a relative 82185029Spjd * path within the dataset. 'dataset' must be at least MAXNAMELEN characters, 83185029Spjd * and 'relpath' must be at least MAXPATHLEN characters. We also pass a stat64 84185029Spjd * buffer, which we need later to get the object ID. 85185029Spjd */ 86185029Spjdstatic int 87219089Spjdparse_pathname(const char *inpath, char *dataset, char *relpath, 88185029Spjd struct stat64 *statbuf) 89185029Spjd{ 90185029Spjd struct statfs sfs; 91185029Spjd const char *rel; 92219089Spjd char fullpath[MAXPATHLEN]; 93185029Spjd 94219089Spjd compress_slashes(inpath, fullpath); 95219089Spjd 96185029Spjd if (fullpath[0] != '/') { 97185029Spjd (void) fprintf(stderr, "invalid object '%s': must be full " 98185029Spjd "path\n", fullpath); 99185029Spjd usage(); 100185029Spjd return (-1); 101185029Spjd } 102185029Spjd 103185029Spjd if (strlen(fullpath) >= MAXPATHLEN) { 104185029Spjd (void) fprintf(stderr, "invalid object; pathname too long\n"); 105185029Spjd return (-1); 106185029Spjd } 107185029Spjd 108185029Spjd if (stat64(fullpath, statbuf) != 0) { 109185029Spjd (void) fprintf(stderr, "cannot open '%s': %s\n", 110185029Spjd fullpath, strerror(errno)); 111185029Spjd return (-1); 112185029Spjd } 113185029Spjd 114185029Spjd if (statfs(fullpath, &sfs) == -1) { 115185029Spjd (void) fprintf(stderr, "cannot find mountpoint for '%s': %s\n", 116185029Spjd fullpath, strerror(errno)); 117185029Spjd return (-1); 118185029Spjd } 119185029Spjd 120185029Spjd if (strcmp(sfs.f_fstypename, MNTTYPE_ZFS) != 0) { 121185029Spjd (void) fprintf(stderr, "invalid path '%s': not a ZFS " 122185029Spjd "filesystem\n", fullpath); 123185029Spjd return (-1); 124185029Spjd } 125185029Spjd 126185029Spjd if (strncmp(fullpath, sfs.f_mntonname, strlen(sfs.f_mntonname)) != 0) { 127185029Spjd (void) fprintf(stderr, "invalid path '%s': mountpoint " 128185029Spjd "doesn't match path\n", fullpath); 129185029Spjd return (-1); 130185029Spjd } 131185029Spjd 132185029Spjd (void) strcpy(dataset, sfs.f_mntfromname); 133185029Spjd 134185029Spjd rel = fullpath + strlen(sfs.f_mntonname); 135185029Spjd if (rel[0] == '/') 136185029Spjd rel++; 137185029Spjd (void) strcpy(relpath, rel); 138185029Spjd 139185029Spjd return (0); 140185029Spjd} 141185029Spjd 142185029Spjd/* 143185029Spjd * Convert from a (dataset, path) pair into a (objset, object) pair. Note that 144185029Spjd * we grab the object number from the inode number, since looking this up via 145185029Spjd * libzpool is a real pain. 146185029Spjd */ 147185029Spjd/* ARGSUSED */ 148185029Spjdstatic int 149185029Spjdobject_from_path(const char *dataset, const char *path, struct stat64 *statbuf, 150185029Spjd zinject_record_t *record) 151185029Spjd{ 152185029Spjd objset_t *os; 153185029Spjd int err; 154185029Spjd 155185029Spjd /* 156185029Spjd * Before doing any libzpool operations, call sync() to ensure that the 157185029Spjd * on-disk state is consistent with the in-core state. 158185029Spjd */ 159185029Spjd sync(); 160185029Spjd 161219089Spjd err = dmu_objset_own(dataset, DMU_OST_ZFS, B_TRUE, FTAG, &os); 162219089Spjd if (err != 0) { 163185029Spjd (void) fprintf(stderr, "cannot open dataset '%s': %s\n", 164185029Spjd dataset, strerror(err)); 165185029Spjd return (-1); 166185029Spjd } 167185029Spjd 168185029Spjd record->zi_objset = dmu_objset_id(os); 169185029Spjd record->zi_object = statbuf->st_ino; 170185029Spjd 171219089Spjd dmu_objset_disown(os, FTAG); 172185029Spjd 173185029Spjd return (0); 174185029Spjd} 175185029Spjd 176185029Spjd/* 177185029Spjd * Calculate the real range based on the type, level, and range given. 178185029Spjd */ 179185029Spjdstatic int 180185029Spjdcalculate_range(const char *dataset, err_type_t type, int level, char *range, 181185029Spjd zinject_record_t *record) 182185029Spjd{ 183185029Spjd objset_t *os = NULL; 184185029Spjd dnode_t *dn = NULL; 185185029Spjd int err; 186185029Spjd int ret = -1; 187185029Spjd 188185029Spjd /* 189185029Spjd * Determine the numeric range from the string. 190185029Spjd */ 191185029Spjd if (range == NULL) { 192185029Spjd /* 193185029Spjd * If range is unspecified, set the range to [0,-1], which 194185029Spjd * indicates that the whole object should be treated as an 195185029Spjd * error. 196185029Spjd */ 197185029Spjd record->zi_start = 0; 198185029Spjd record->zi_end = -1ULL; 199185029Spjd } else { 200185029Spjd char *end; 201185029Spjd 202185029Spjd /* XXX add support for suffixes */ 203185029Spjd record->zi_start = strtoull(range, &end, 10); 204185029Spjd 205185029Spjd 206185029Spjd if (*end == '\0') 207185029Spjd record->zi_end = record->zi_start + 1; 208185029Spjd else if (*end == ',') 209185029Spjd record->zi_end = strtoull(end + 1, &end, 10); 210185029Spjd 211185029Spjd if (*end != '\0') { 212185029Spjd (void) fprintf(stderr, "invalid range '%s': must be " 213185029Spjd "a numeric range of the form 'start[,end]'\n", 214185029Spjd range); 215185029Spjd goto out; 216185029Spjd } 217185029Spjd } 218185029Spjd 219185029Spjd switch (type) { 220185029Spjd case TYPE_DATA: 221185029Spjd break; 222185029Spjd 223185029Spjd case TYPE_DNODE: 224185029Spjd /* 225185029Spjd * If this is a request to inject faults into the dnode, then we 226185029Spjd * must translate the current (objset,object) pair into an 227185029Spjd * offset within the metadnode for the objset. Specifying any 228185029Spjd * kind of range with type 'dnode' is illegal. 229185029Spjd */ 230185029Spjd if (range != NULL) { 231185029Spjd (void) fprintf(stderr, "range cannot be specified when " 232185029Spjd "type is 'dnode'\n"); 233185029Spjd goto out; 234185029Spjd } 235185029Spjd 236185029Spjd record->zi_start = record->zi_object * sizeof (dnode_phys_t); 237185029Spjd record->zi_end = record->zi_start + sizeof (dnode_phys_t); 238185029Spjd record->zi_object = 0; 239185029Spjd break; 240185029Spjd } 241185029Spjd 242185029Spjd /* 243185029Spjd * Get the dnode associated with object, so we can calculate the block 244185029Spjd * size. 245185029Spjd */ 246219089Spjd if ((err = dmu_objset_own(dataset, DMU_OST_ANY, 247219089Spjd B_TRUE, FTAG, &os)) != 0) { 248185029Spjd (void) fprintf(stderr, "cannot open dataset '%s': %s\n", 249185029Spjd dataset, strerror(err)); 250185029Spjd goto out; 251185029Spjd } 252185029Spjd 253185029Spjd if (record->zi_object == 0) { 254219089Spjd dn = DMU_META_DNODE(os); 255185029Spjd } else { 256219089Spjd err = dnode_hold(os, record->zi_object, FTAG, &dn); 257185029Spjd if (err != 0) { 258185029Spjd (void) fprintf(stderr, "failed to hold dnode " 259185029Spjd "for object %llu\n", 260185029Spjd (u_longlong_t)record->zi_object); 261185029Spjd goto out; 262185029Spjd } 263185029Spjd } 264185029Spjd 265185029Spjd 266185029Spjd ziprintf("data shift: %d\n", (int)dn->dn_datablkshift); 267185029Spjd ziprintf(" ind shift: %d\n", (int)dn->dn_indblkshift); 268185029Spjd 269185029Spjd /* 270185029Spjd * Translate range into block IDs. 271185029Spjd */ 272185029Spjd if (record->zi_start != 0 || record->zi_end != -1ULL) { 273185029Spjd record->zi_start >>= dn->dn_datablkshift; 274185029Spjd record->zi_end >>= dn->dn_datablkshift; 275185029Spjd } 276185029Spjd 277185029Spjd /* 278185029Spjd * Check level, and then translate level 0 blkids into ranges 279185029Spjd * appropriate for level of indirection. 280185029Spjd */ 281185029Spjd record->zi_level = level; 282185029Spjd if (level > 0) { 283185029Spjd ziprintf("level 0 blkid range: [%llu, %llu]\n", 284185029Spjd record->zi_start, record->zi_end); 285185029Spjd 286185029Spjd if (level >= dn->dn_nlevels) { 287185029Spjd (void) fprintf(stderr, "level %d exceeds max level " 288185029Spjd "of object (%d)\n", level, dn->dn_nlevels - 1); 289185029Spjd goto out; 290185029Spjd } 291185029Spjd 292185029Spjd if (record->zi_start != 0 || record->zi_end != 0) { 293185029Spjd int shift = dn->dn_indblkshift - SPA_BLKPTRSHIFT; 294185029Spjd 295185029Spjd for (; level > 0; level--) { 296185029Spjd record->zi_start >>= shift; 297185029Spjd record->zi_end >>= shift; 298185029Spjd } 299185029Spjd } 300185029Spjd } 301185029Spjd 302185029Spjd ret = 0; 303185029Spjdout: 304185029Spjd if (dn) { 305219089Spjd if (dn != DMU_META_DNODE(os)) 306185029Spjd dnode_rele(dn, FTAG); 307185029Spjd } 308185029Spjd if (os) 309219089Spjd dmu_objset_disown(os, FTAG); 310185029Spjd 311185029Spjd return (ret); 312185029Spjd} 313185029Spjd 314185029Spjdint 315185029Spjdtranslate_record(err_type_t type, const char *object, const char *range, 316185029Spjd int level, zinject_record_t *record, char *poolname, char *dataset) 317185029Spjd{ 318185029Spjd char path[MAXPATHLEN]; 319185029Spjd char *slash; 320185029Spjd struct stat64 statbuf; 321185029Spjd int ret = -1; 322185029Spjd 323185029Spjd kernel_init(FREAD); 324185029Spjd 325185029Spjd debug = (getenv("ZINJECT_DEBUG") != NULL); 326185029Spjd 327185029Spjd ziprintf("translating: %s\n", object); 328185029Spjd 329185029Spjd if (MOS_TYPE(type)) { 330185029Spjd /* 331185029Spjd * MOS objects are treated specially. 332185029Spjd */ 333185029Spjd switch (type) { 334185029Spjd case TYPE_MOS: 335185029Spjd record->zi_type = 0; 336185029Spjd break; 337185029Spjd case TYPE_MOSDIR: 338185029Spjd record->zi_type = DMU_OT_OBJECT_DIRECTORY; 339185029Spjd break; 340185029Spjd case TYPE_METASLAB: 341185029Spjd record->zi_type = DMU_OT_OBJECT_ARRAY; 342185029Spjd break; 343185029Spjd case TYPE_CONFIG: 344185029Spjd record->zi_type = DMU_OT_PACKED_NVLIST; 345185029Spjd break; 346219089Spjd case TYPE_BPOBJ: 347219089Spjd record->zi_type = DMU_OT_BPOBJ; 348185029Spjd break; 349185029Spjd case TYPE_SPACEMAP: 350185029Spjd record->zi_type = DMU_OT_SPACE_MAP; 351185029Spjd break; 352185029Spjd case TYPE_ERRLOG: 353185029Spjd record->zi_type = DMU_OT_ERROR_LOG; 354185029Spjd break; 355185029Spjd } 356185029Spjd 357185029Spjd dataset[0] = '\0'; 358185029Spjd (void) strcpy(poolname, object); 359185029Spjd return (0); 360185029Spjd } 361185029Spjd 362185029Spjd /* 363185029Spjd * Convert a full path into a (dataset, file) pair. 364185029Spjd */ 365185029Spjd if (parse_pathname(object, dataset, path, &statbuf) != 0) 366185029Spjd goto err; 367185029Spjd 368185029Spjd ziprintf(" dataset: %s\n", dataset); 369185029Spjd ziprintf(" path: %s\n", path); 370185029Spjd 371185029Spjd /* 372185029Spjd * Convert (dataset, file) into (objset, object) 373185029Spjd */ 374185029Spjd if (object_from_path(dataset, path, &statbuf, record) != 0) 375185029Spjd goto err; 376185029Spjd 377185029Spjd ziprintf("raw objset: %llu\n", record->zi_objset); 378185029Spjd ziprintf("raw object: %llu\n", record->zi_object); 379185029Spjd 380185029Spjd /* 381185029Spjd * For the given object, calculate the real (type, level, range) 382185029Spjd */ 383185029Spjd if (calculate_range(dataset, type, level, (char *)range, record) != 0) 384185029Spjd goto err; 385185029Spjd 386185029Spjd ziprintf(" objset: %llu\n", record->zi_objset); 387185029Spjd ziprintf(" object: %llu\n", record->zi_object); 388185029Spjd if (record->zi_start == 0 && 389185029Spjd record->zi_end == -1ULL) 390185029Spjd ziprintf(" range: all\n"); 391185029Spjd else 392185029Spjd ziprintf(" range: [%llu, %llu]\n", record->zi_start, 393185029Spjd record->zi_end); 394185029Spjd 395185029Spjd /* 396185029Spjd * Copy the pool name 397185029Spjd */ 398185029Spjd (void) strcpy(poolname, dataset); 399185029Spjd if ((slash = strchr(poolname, '/')) != NULL) 400185029Spjd *slash = '\0'; 401185029Spjd 402185029Spjd ret = 0; 403185029Spjd 404185029Spjderr: 405185029Spjd kernel_fini(); 406185029Spjd return (ret); 407185029Spjd} 408185029Spjd 409185029Spjdint 410185029Spjdtranslate_raw(const char *str, zinject_record_t *record) 411185029Spjd{ 412185029Spjd /* 413185029Spjd * A raw bookmark of the form objset:object:level:blkid, where each 414185029Spjd * number is a hexidecimal value. 415185029Spjd */ 416185029Spjd if (sscanf(str, "%llx:%llx:%x:%llx", (u_longlong_t *)&record->zi_objset, 417185029Spjd (u_longlong_t *)&record->zi_object, &record->zi_level, 418185029Spjd (u_longlong_t *)&record->zi_start) != 4) { 419185029Spjd (void) fprintf(stderr, "bad raw spec '%s': must be of the form " 420185029Spjd "'objset:object:level:blkid'\n", str); 421185029Spjd return (-1); 422185029Spjd } 423185029Spjd 424185029Spjd record->zi_end = record->zi_start; 425185029Spjd 426185029Spjd return (0); 427185029Spjd} 428185029Spjd 429185029Spjdint 430185029Spjdtranslate_device(const char *pool, const char *device, err_type_t label_type, 431185029Spjd zinject_record_t *record) 432185029Spjd{ 433185029Spjd char *end; 434185029Spjd zpool_handle_t *zhp; 435185029Spjd nvlist_t *tgt; 436185029Spjd boolean_t isspare, iscache; 437185029Spjd 438185029Spjd /* 439185029Spjd * Given a device name or GUID, create an appropriate injection record 440185029Spjd * with zi_guid set. 441185029Spjd */ 442185029Spjd if ((zhp = zpool_open(g_zfs, pool)) == NULL) 443185029Spjd return (-1); 444185029Spjd 445185029Spjd record->zi_guid = strtoull(device, &end, 16); 446185029Spjd if (record->zi_guid == 0 || *end != '\0') { 447185029Spjd tgt = zpool_find_vdev(zhp, device, &isspare, &iscache, NULL); 448185029Spjd 449185029Spjd if (tgt == NULL) { 450185029Spjd (void) fprintf(stderr, "cannot find device '%s' in " 451185029Spjd "pool '%s'\n", device, pool); 452185029Spjd return (-1); 453185029Spjd } 454185029Spjd 455185029Spjd verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, 456185029Spjd &record->zi_guid) == 0); 457185029Spjd } 458185029Spjd 459247265Smm /* 460247265Smm * Device faults can take on three different forms: 461247265Smm * 1). delayed or hanging I/O 462247265Smm * 2). zfs label faults 463247265Smm * 3). generic disk faults 464247265Smm */ 465247265Smm if (record->zi_timer != 0) { 466247265Smm record->zi_cmd = ZINJECT_DELAY_IO; 467247265Smm } else if (label_type != TYPE_INVAL) { 468247265Smm record->zi_cmd = ZINJECT_LABEL_FAULT; 469247265Smm } else { 470247265Smm record->zi_cmd = ZINJECT_DEVICE_FAULT; 471247265Smm } 472247265Smm 473185029Spjd switch (label_type) { 474185029Spjd case TYPE_LABEL_UBERBLOCK: 475185029Spjd record->zi_start = offsetof(vdev_label_t, vl_uberblock[0]); 476185029Spjd record->zi_end = record->zi_start + VDEV_UBERBLOCK_RING - 1; 477185029Spjd break; 478185029Spjd case TYPE_LABEL_NVLIST: 479185029Spjd record->zi_start = offsetof(vdev_label_t, vl_vdev_phys); 480185029Spjd record->zi_end = record->zi_start + VDEV_PHYS_SIZE - 1; 481185029Spjd break; 482219089Spjd case TYPE_LABEL_PAD1: 483219089Spjd record->zi_start = offsetof(vdev_label_t, vl_pad1); 484219089Spjd record->zi_end = record->zi_start + VDEV_PAD_SIZE - 1; 485219089Spjd break; 486219089Spjd case TYPE_LABEL_PAD2: 487219089Spjd record->zi_start = offsetof(vdev_label_t, vl_pad2); 488219089Spjd record->zi_end = record->zi_start + VDEV_PAD_SIZE - 1; 489219089Spjd break; 490185029Spjd } 491185029Spjd return (0); 492185029Spjd} 493