ztest.c revision 207910
1168404Spjd/* 2168404Spjd * CDDL HEADER START 3168404Spjd * 4168404Spjd * The contents of this file are subject to the terms of the 5168404Spjd * Common Development and Distribution License (the "License"). 6168404Spjd * You may not use this file except in compliance with the License. 7168404Spjd * 8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9168404Spjd * or http://www.opensolaris.org/os/licensing. 10168404Spjd * See the License for the specific language governing permissions 11168404Spjd * and limitations under the License. 12168404Spjd * 13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each 14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15168404Spjd * If applicable, add the following below this CDDL HEADER, with the 16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18168404Spjd * 19168404Spjd * CDDL HEADER END 20168404Spjd */ 21168404Spjd/* 22207910Smm * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23168404Spjd * Use is subject to license terms. 24168404Spjd */ 25168404Spjd 26168404Spjd/* 27168404Spjd * The objective of this program is to provide a DMU/ZAP/SPA stress test 28168404Spjd * that runs entirely in userland, is easy to use, and easy to extend. 29168404Spjd * 30168404Spjd * The overall design of the ztest program is as follows: 31168404Spjd * 32168404Spjd * (1) For each major functional area (e.g. adding vdevs to a pool, 33168404Spjd * creating and destroying datasets, reading and writing objects, etc) 34168404Spjd * we have a simple routine to test that functionality. These 35168404Spjd * individual routines do not have to do anything "stressful". 36168404Spjd * 37168404Spjd * (2) We turn these simple functionality tests into a stress test by 38168404Spjd * running them all in parallel, with as many threads as desired, 39168404Spjd * and spread across as many datasets, objects, and vdevs as desired. 40168404Spjd * 41168404Spjd * (3) While all this is happening, we inject faults into the pool to 42168404Spjd * verify that self-healing data really works. 43168404Spjd * 44168404Spjd * (4) Every time we open a dataset, we change its checksum and compression 45168404Spjd * functions. Thus even individual objects vary from block to block 46168404Spjd * in which checksum they use and whether they're compressed. 47168404Spjd * 48168404Spjd * (5) To verify that we never lose on-disk consistency after a crash, 49168404Spjd * we run the entire test in a child of the main process. 50168404Spjd * At random times, the child self-immolates with a SIGKILL. 51168404Spjd * This is the software equivalent of pulling the power cord. 52168404Spjd * The parent then runs the test again, using the existing 53168404Spjd * storage pool, as many times as desired. 54168404Spjd * 55168404Spjd * (6) To verify that we don't have future leaks or temporal incursions, 56168404Spjd * many of the functional tests record the transaction group number 57168404Spjd * as part of their data. When reading old data, they verify that 58168404Spjd * the transaction group number is less than the current, open txg. 59168404Spjd * If you add a new test, please do this if applicable. 60168404Spjd * 61168404Spjd * When run with no arguments, ztest runs for about five minutes and 62168404Spjd * produces no output if successful. To get a little bit of information, 63168404Spjd * specify -V. To get more information, specify -VV, and so on. 64168404Spjd * 65168404Spjd * To turn this into an overnight stress test, use -T to specify run time. 66168404Spjd * 67168404Spjd * You can ask more more vdevs [-v], datasets [-d], or threads [-t] 68168404Spjd * to increase the pool capacity, fanout, and overall stress level. 69168404Spjd * 70168404Spjd * The -N(okill) option will suppress kills, so each child runs to completion. 71168404Spjd * This can be useful when you're trying to distinguish temporal incursions 72168404Spjd * from plain old race conditions. 73168404Spjd */ 74168404Spjd 75168404Spjd#include <sys/zfs_context.h> 76168404Spjd#include <sys/spa.h> 77168404Spjd#include <sys/dmu.h> 78168404Spjd#include <sys/txg.h> 79168404Spjd#include <sys/zap.h> 80168404Spjd#include <sys/dmu_traverse.h> 81168404Spjd#include <sys/dmu_objset.h> 82168404Spjd#include <sys/poll.h> 83168404Spjd#include <sys/stat.h> 84168404Spjd#include <sys/time.h> 85168404Spjd#include <sys/wait.h> 86168404Spjd#include <sys/mman.h> 87168404Spjd#include <sys/resource.h> 88168404Spjd#include <sys/zio.h> 89168404Spjd#include <sys/zio_checksum.h> 90168404Spjd#include <sys/zio_compress.h> 91168404Spjd#include <sys/zil.h> 92168404Spjd#include <sys/vdev_impl.h> 93185029Spjd#include <sys/vdev_file.h> 94168404Spjd#include <sys/spa_impl.h> 95168404Spjd#include <sys/dsl_prop.h> 96207910Smm#include <sys/dsl_dataset.h> 97168404Spjd#include <sys/refcount.h> 98168404Spjd#include <stdio.h> 99168404Spjd#include <stdio_ext.h> 100168404Spjd#include <stdlib.h> 101168404Spjd#include <unistd.h> 102168404Spjd#include <signal.h> 103168404Spjd#include <umem.h> 104168404Spjd#include <dlfcn.h> 105168404Spjd#include <ctype.h> 106168404Spjd#include <math.h> 107168404Spjd#include <errno.h> 108168404Spjd#include <sys/fs/zfs.h> 109168404Spjd 110168404Spjdstatic char cmdname[] = "ztest"; 111168404Spjdstatic char *zopt_pool = cmdname; 112168404Spjdstatic char *progname; 113168404Spjd 114168404Spjdstatic uint64_t zopt_vdevs = 5; 115168404Spjdstatic uint64_t zopt_vdevtime; 116168404Spjdstatic int zopt_ashift = SPA_MINBLOCKSHIFT; 117168404Spjdstatic int zopt_mirrors = 2; 118168404Spjdstatic int zopt_raidz = 4; 119168404Spjdstatic int zopt_raidz_parity = 1; 120168404Spjdstatic size_t zopt_vdev_size = SPA_MINDEVSIZE; 121168404Spjdstatic int zopt_datasets = 7; 122168404Spjdstatic int zopt_threads = 23; 123168404Spjdstatic uint64_t zopt_passtime = 60; /* 60 seconds */ 124168404Spjdstatic uint64_t zopt_killrate = 70; /* 70% kill rate */ 125168404Spjdstatic int zopt_verbose = 0; 126168404Spjdstatic int zopt_init = 1; 127168404Spjdstatic char *zopt_dir = "/tmp"; 128168404Spjdstatic uint64_t zopt_time = 300; /* 5 minutes */ 129168404Spjdstatic int zopt_maxfaults; 130168404Spjd 131185029Spjdtypedef struct ztest_block_tag { 132185029Spjd uint64_t bt_objset; 133185029Spjd uint64_t bt_object; 134185029Spjd uint64_t bt_offset; 135185029Spjd uint64_t bt_txg; 136185029Spjd uint64_t bt_thread; 137185029Spjd uint64_t bt_seq; 138185029Spjd} ztest_block_tag_t; 139185029Spjd 140168404Spjdtypedef struct ztest_args { 141185029Spjd char za_pool[MAXNAMELEN]; 142185029Spjd spa_t *za_spa; 143168404Spjd objset_t *za_os; 144168404Spjd zilog_t *za_zilog; 145168404Spjd thread_t za_thread; 146168404Spjd uint64_t za_instance; 147168404Spjd uint64_t za_random; 148168404Spjd uint64_t za_diroff; 149168404Spjd uint64_t za_diroff_shared; 150168404Spjd uint64_t za_zil_seq; 151168404Spjd hrtime_t za_start; 152168404Spjd hrtime_t za_stop; 153168404Spjd hrtime_t za_kill; 154168404Spjd traverse_handle_t *za_th; 155185029Spjd /* 156185029Spjd * Thread-local variables can go here to aid debugging. 157185029Spjd */ 158185029Spjd ztest_block_tag_t za_rbt; 159185029Spjd ztest_block_tag_t za_wbt; 160185029Spjd dmu_object_info_t za_doi; 161185029Spjd dmu_buf_t *za_dbuf; 162168404Spjd} ztest_args_t; 163168404Spjd 164168404Spjdtypedef void ztest_func_t(ztest_args_t *); 165168404Spjd 166168404Spjd/* 167168404Spjd * Note: these aren't static because we want dladdr() to work. 168168404Spjd */ 169168404Spjdztest_func_t ztest_dmu_read_write; 170168404Spjdztest_func_t ztest_dmu_write_parallel; 171168404Spjdztest_func_t ztest_dmu_object_alloc_free; 172168404Spjdztest_func_t ztest_zap; 173168404Spjdztest_func_t ztest_zap_parallel; 174168404Spjdztest_func_t ztest_traverse; 175168404Spjdztest_func_t ztest_dsl_prop_get_set; 176168404Spjdztest_func_t ztest_dmu_objset_create_destroy; 177168404Spjdztest_func_t ztest_dmu_snapshot_create_destroy; 178207910Smmztest_func_t ztest_dsl_dataset_promote_busy; 179168404Spjdztest_func_t ztest_spa_create_destroy; 180168404Spjdztest_func_t ztest_fault_inject; 181185029Spjdztest_func_t ztest_spa_rename; 182168404Spjdztest_func_t ztest_vdev_attach_detach; 183168404Spjdztest_func_t ztest_vdev_LUN_growth; 184168404Spjdztest_func_t ztest_vdev_add_remove; 185185029Spjdztest_func_t ztest_vdev_aux_add_remove; 186168404Spjdztest_func_t ztest_scrub; 187168404Spjd 188168404Spjdtypedef struct ztest_info { 189168404Spjd ztest_func_t *zi_func; /* test function */ 190185029Spjd uint64_t zi_iters; /* iterations per execution */ 191168404Spjd uint64_t *zi_interval; /* execute every <interval> seconds */ 192168404Spjd uint64_t zi_calls; /* per-pass count */ 193168404Spjd uint64_t zi_call_time; /* per-pass time */ 194168404Spjd uint64_t zi_call_total; /* cumulative total */ 195168404Spjd uint64_t zi_call_target; /* target cumulative total */ 196168404Spjd} ztest_info_t; 197168404Spjd 198168404Spjduint64_t zopt_always = 0; /* all the time */ 199168404Spjduint64_t zopt_often = 1; /* every second */ 200168404Spjduint64_t zopt_sometimes = 10; /* every 10 seconds */ 201168404Spjduint64_t zopt_rarely = 60; /* every 60 seconds */ 202168404Spjd 203168404Spjdztest_info_t ztest_info[] = { 204185029Spjd { ztest_dmu_read_write, 1, &zopt_always }, 205185029Spjd { ztest_dmu_write_parallel, 30, &zopt_always }, 206185029Spjd { ztest_dmu_object_alloc_free, 1, &zopt_always }, 207185029Spjd { ztest_zap, 30, &zopt_always }, 208185029Spjd { ztest_zap_parallel, 100, &zopt_always }, 209185029Spjd { ztest_traverse, 1, &zopt_often }, 210185029Spjd { ztest_dsl_prop_get_set, 1, &zopt_sometimes }, 211185029Spjd { ztest_dmu_objset_create_destroy, 1, &zopt_sometimes }, 212185029Spjd { ztest_dmu_snapshot_create_destroy, 1, &zopt_sometimes }, 213207910Smm { ztest_dsl_dataset_promote_busy, 1, &zopt_sometimes }, 214185029Spjd { ztest_spa_create_destroy, 1, &zopt_sometimes }, 215185029Spjd { ztest_fault_inject, 1, &zopt_sometimes }, 216185029Spjd { ztest_spa_rename, 1, &zopt_rarely }, 217185029Spjd { ztest_vdev_attach_detach, 1, &zopt_rarely }, 218185029Spjd { ztest_vdev_LUN_growth, 1, &zopt_rarely }, 219185029Spjd { ztest_vdev_add_remove, 1, &zopt_vdevtime }, 220185029Spjd { ztest_vdev_aux_add_remove, 1, &zopt_vdevtime }, 221185029Spjd { ztest_scrub, 1, &zopt_vdevtime }, 222168404Spjd}; 223168404Spjd 224168404Spjd#define ZTEST_FUNCS (sizeof (ztest_info) / sizeof (ztest_info_t)) 225168404Spjd 226168404Spjd#define ZTEST_SYNC_LOCKS 16 227168404Spjd 228168404Spjd/* 229168404Spjd * Stuff we need to share writably between parent and child. 230168404Spjd */ 231168404Spjdtypedef struct ztest_shared { 232168404Spjd mutex_t zs_vdev_lock; 233168404Spjd rwlock_t zs_name_lock; 234168404Spjd uint64_t zs_vdev_primaries; 235185029Spjd uint64_t zs_vdev_aux; 236168404Spjd uint64_t zs_enospc_count; 237168404Spjd hrtime_t zs_start_time; 238168404Spjd hrtime_t zs_stop_time; 239168404Spjd uint64_t zs_alloc; 240168404Spjd uint64_t zs_space; 241168404Spjd ztest_info_t zs_info[ZTEST_FUNCS]; 242168404Spjd mutex_t zs_sync_lock[ZTEST_SYNC_LOCKS]; 243168404Spjd uint64_t zs_seq[ZTEST_SYNC_LOCKS]; 244168404Spjd} ztest_shared_t; 245168404Spjd 246168404Spjdstatic char ztest_dev_template[] = "%s/%s.%llua"; 247185029Spjdstatic char ztest_aux_template[] = "%s/%s.%s.%llu"; 248168404Spjdstatic ztest_shared_t *ztest_shared; 249168404Spjd 250168404Spjdstatic int ztest_random_fd; 251168404Spjdstatic int ztest_dump_core = 1; 252168404Spjd 253185029Spjdstatic boolean_t ztest_exiting; 254168404Spjd 255185029Spjdextern uint64_t metaslab_gang_bang; 256185029Spjd 257168404Spjd#define ZTEST_DIROBJ 1 258168404Spjd#define ZTEST_MICROZAP_OBJ 2 259168404Spjd#define ZTEST_FATZAP_OBJ 3 260168404Spjd 261168404Spjd#define ZTEST_DIROBJ_BLOCKSIZE (1 << 10) 262168404Spjd#define ZTEST_DIRSIZE 256 263168404Spjd 264168676Spjdstatic void usage(boolean_t) __NORETURN; 265168498Spjd 266168404Spjd/* 267168404Spjd * These libumem hooks provide a reasonable set of defaults for the allocator's 268168404Spjd * debugging facilities. 269168404Spjd */ 270168404Spjdconst char * 271168404Spjd_umem_debug_init() 272168404Spjd{ 273168404Spjd return ("default,verbose"); /* $UMEM_DEBUG setting */ 274168404Spjd} 275168404Spjd 276168404Spjdconst char * 277168404Spjd_umem_logging_init(void) 278168404Spjd{ 279168404Spjd return ("fail,contents"); /* $UMEM_LOGGING setting */ 280168404Spjd} 281168404Spjd 282168404Spjd#define FATAL_MSG_SZ 1024 283168404Spjd 284168404Spjdchar *fatal_msg; 285168404Spjd 286168404Spjdstatic void 287168404Spjdfatal(int do_perror, char *message, ...) 288168404Spjd{ 289168404Spjd va_list args; 290168404Spjd int save_errno = errno; 291168404Spjd char buf[FATAL_MSG_SZ]; 292168404Spjd 293168404Spjd (void) fflush(stdout); 294168404Spjd 295168404Spjd va_start(args, message); 296168404Spjd (void) sprintf(buf, "ztest: "); 297168404Spjd /* LINTED */ 298168404Spjd (void) vsprintf(buf + strlen(buf), message, args); 299168404Spjd va_end(args); 300168404Spjd if (do_perror) { 301168404Spjd (void) snprintf(buf + strlen(buf), FATAL_MSG_SZ - strlen(buf), 302168404Spjd ": %s", strerror(save_errno)); 303168404Spjd } 304168404Spjd (void) fprintf(stderr, "%s\n", buf); 305168404Spjd fatal_msg = buf; /* to ease debugging */ 306168404Spjd if (ztest_dump_core) 307168404Spjd abort(); 308168404Spjd exit(3); 309168404Spjd} 310168404Spjd 311168404Spjdstatic int 312168404Spjdstr2shift(const char *buf) 313168404Spjd{ 314168404Spjd const char *ends = "BKMGTPEZ"; 315168404Spjd int i; 316168404Spjd 317168404Spjd if (buf[0] == '\0') 318168404Spjd return (0); 319168404Spjd for (i = 0; i < strlen(ends); i++) { 320168404Spjd if (toupper(buf[0]) == ends[i]) 321168404Spjd break; 322168404Spjd } 323168498Spjd if (i == strlen(ends)) { 324168498Spjd (void) fprintf(stderr, "ztest: invalid bytes suffix: %s\n", 325168498Spjd buf); 326168498Spjd usage(B_FALSE); 327168498Spjd } 328168404Spjd if (buf[1] == '\0' || (toupper(buf[1]) == 'B' && buf[2] == '\0')) { 329168404Spjd return (10*i); 330168404Spjd } 331168498Spjd (void) fprintf(stderr, "ztest: invalid bytes suffix: %s\n", buf); 332168498Spjd usage(B_FALSE); 333168498Spjd /* NOTREACHED */ 334168404Spjd} 335168404Spjd 336168404Spjdstatic uint64_t 337168404Spjdnicenumtoull(const char *buf) 338168404Spjd{ 339168404Spjd char *end; 340168404Spjd uint64_t val; 341168404Spjd 342168404Spjd val = strtoull(buf, &end, 0); 343168404Spjd if (end == buf) { 344168498Spjd (void) fprintf(stderr, "ztest: bad numeric value: %s\n", buf); 345168498Spjd usage(B_FALSE); 346168404Spjd } else if (end[0] == '.') { 347168404Spjd double fval = strtod(buf, &end); 348168404Spjd fval *= pow(2, str2shift(end)); 349168498Spjd if (fval > UINT64_MAX) { 350168498Spjd (void) fprintf(stderr, "ztest: value too large: %s\n", 351168498Spjd buf); 352168498Spjd usage(B_FALSE); 353168498Spjd } 354168404Spjd val = (uint64_t)fval; 355168404Spjd } else { 356168404Spjd int shift = str2shift(end); 357168498Spjd if (shift >= 64 || (val << shift) >> shift != val) { 358168498Spjd (void) fprintf(stderr, "ztest: value too large: %s\n", 359168498Spjd buf); 360168498Spjd usage(B_FALSE); 361168498Spjd } 362168404Spjd val <<= shift; 363168404Spjd } 364168404Spjd return (val); 365168404Spjd} 366168404Spjd 367168404Spjdstatic void 368168498Spjdusage(boolean_t requested) 369168404Spjd{ 370168404Spjd char nice_vdev_size[10]; 371168404Spjd char nice_gang_bang[10]; 372168498Spjd FILE *fp = requested ? stdout : stderr; 373168404Spjd 374168404Spjd nicenum(zopt_vdev_size, nice_vdev_size); 375185029Spjd nicenum(metaslab_gang_bang, nice_gang_bang); 376168404Spjd 377168498Spjd (void) fprintf(fp, "Usage: %s\n" 378168404Spjd "\t[-v vdevs (default: %llu)]\n" 379168404Spjd "\t[-s size_of_each_vdev (default: %s)]\n" 380168404Spjd "\t[-a alignment_shift (default: %d) (use 0 for random)]\n" 381168404Spjd "\t[-m mirror_copies (default: %d)]\n" 382168404Spjd "\t[-r raidz_disks (default: %d)]\n" 383168404Spjd "\t[-R raidz_parity (default: %d)]\n" 384168404Spjd "\t[-d datasets (default: %d)]\n" 385168404Spjd "\t[-t threads (default: %d)]\n" 386168404Spjd "\t[-g gang_block_threshold (default: %s)]\n" 387168404Spjd "\t[-i initialize pool i times (default: %d)]\n" 388168404Spjd "\t[-k kill percentage (default: %llu%%)]\n" 389168404Spjd "\t[-p pool_name (default: %s)]\n" 390168404Spjd "\t[-f file directory for vdev files (default: %s)]\n" 391168404Spjd "\t[-V(erbose)] (use multiple times for ever more blather)\n" 392168404Spjd "\t[-E(xisting)] (use existing pool instead of creating new one)\n" 393168404Spjd "\t[-T time] total run time (default: %llu sec)\n" 394168404Spjd "\t[-P passtime] time per pass (default: %llu sec)\n" 395168498Spjd "\t[-h] (print help)\n" 396168404Spjd "", 397168404Spjd cmdname, 398185029Spjd (u_longlong_t)zopt_vdevs, /* -v */ 399185029Spjd nice_vdev_size, /* -s */ 400185029Spjd zopt_ashift, /* -a */ 401185029Spjd zopt_mirrors, /* -m */ 402185029Spjd zopt_raidz, /* -r */ 403185029Spjd zopt_raidz_parity, /* -R */ 404185029Spjd zopt_datasets, /* -d */ 405185029Spjd zopt_threads, /* -t */ 406185029Spjd nice_gang_bang, /* -g */ 407185029Spjd zopt_init, /* -i */ 408185029Spjd (u_longlong_t)zopt_killrate, /* -k */ 409185029Spjd zopt_pool, /* -p */ 410185029Spjd zopt_dir, /* -f */ 411185029Spjd (u_longlong_t)zopt_time, /* -T */ 412185029Spjd (u_longlong_t)zopt_passtime); /* -P */ 413168498Spjd exit(requested ? 0 : 1); 414168404Spjd} 415168404Spjd 416168404Spjdstatic uint64_t 417168404Spjdztest_random(uint64_t range) 418168404Spjd{ 419168404Spjd uint64_t r; 420168404Spjd 421168404Spjd if (range == 0) 422168404Spjd return (0); 423168404Spjd 424168404Spjd if (read(ztest_random_fd, &r, sizeof (r)) != sizeof (r)) 425168404Spjd fatal(1, "short read from /dev/urandom"); 426168404Spjd 427168404Spjd return (r % range); 428168404Spjd} 429168404Spjd 430168404Spjdstatic void 431168404Spjdztest_record_enospc(char *s) 432168404Spjd{ 433168404Spjd dprintf("ENOSPC doing: %s\n", s ? s : "<unknown>"); 434168404Spjd ztest_shared->zs_enospc_count++; 435168404Spjd} 436168404Spjd 437168404Spjdstatic void 438168404Spjdprocess_options(int argc, char **argv) 439168404Spjd{ 440168404Spjd int opt; 441168404Spjd uint64_t value; 442168404Spjd 443168404Spjd /* Remember program name. */ 444168404Spjd progname = argv[0]; 445168404Spjd 446168404Spjd /* By default, test gang blocks for blocks 32K and greater */ 447185029Spjd metaslab_gang_bang = 32 << 10; 448168404Spjd 449168404Spjd while ((opt = getopt(argc, argv, 450185029Spjd "v:s:a:m:r:R:d:t:g:i:k:p:f:VET:P:h")) != EOF) { 451168404Spjd value = 0; 452168404Spjd switch (opt) { 453185029Spjd case 'v': 454185029Spjd case 's': 455185029Spjd case 'a': 456185029Spjd case 'm': 457185029Spjd case 'r': 458185029Spjd case 'R': 459185029Spjd case 'd': 460185029Spjd case 't': 461185029Spjd case 'g': 462185029Spjd case 'i': 463185029Spjd case 'k': 464185029Spjd case 'T': 465185029Spjd case 'P': 466168404Spjd value = nicenumtoull(optarg); 467168404Spjd } 468168404Spjd switch (opt) { 469185029Spjd case 'v': 470168404Spjd zopt_vdevs = value; 471168404Spjd break; 472185029Spjd case 's': 473168404Spjd zopt_vdev_size = MAX(SPA_MINDEVSIZE, value); 474168404Spjd break; 475185029Spjd case 'a': 476168404Spjd zopt_ashift = value; 477168404Spjd break; 478185029Spjd case 'm': 479168404Spjd zopt_mirrors = value; 480168404Spjd break; 481185029Spjd case 'r': 482168404Spjd zopt_raidz = MAX(1, value); 483168404Spjd break; 484185029Spjd case 'R': 485168404Spjd zopt_raidz_parity = MIN(MAX(value, 1), 2); 486168404Spjd break; 487185029Spjd case 'd': 488168404Spjd zopt_datasets = MAX(1, value); 489168404Spjd break; 490185029Spjd case 't': 491168404Spjd zopt_threads = MAX(1, value); 492168404Spjd break; 493185029Spjd case 'g': 494185029Spjd metaslab_gang_bang = MAX(SPA_MINBLOCKSIZE << 1, value); 495168404Spjd break; 496185029Spjd case 'i': 497168404Spjd zopt_init = value; 498168404Spjd break; 499185029Spjd case 'k': 500168404Spjd zopt_killrate = value; 501168404Spjd break; 502185029Spjd case 'p': 503168404Spjd zopt_pool = strdup(optarg); 504168404Spjd break; 505185029Spjd case 'f': 506168404Spjd zopt_dir = strdup(optarg); 507168404Spjd break; 508185029Spjd case 'V': 509168404Spjd zopt_verbose++; 510168404Spjd break; 511185029Spjd case 'E': 512168404Spjd zopt_init = 0; 513168404Spjd break; 514185029Spjd case 'T': 515168404Spjd zopt_time = value; 516168404Spjd break; 517185029Spjd case 'P': 518168404Spjd zopt_passtime = MAX(1, value); 519168404Spjd break; 520185029Spjd case 'h': 521168498Spjd usage(B_TRUE); 522168498Spjd break; 523185029Spjd case '?': 524185029Spjd default: 525168498Spjd usage(B_FALSE); 526168404Spjd break; 527168404Spjd } 528168404Spjd } 529168404Spjd 530168404Spjd zopt_raidz_parity = MIN(zopt_raidz_parity, zopt_raidz - 1); 531168404Spjd 532168404Spjd zopt_vdevtime = (zopt_vdevs > 0 ? zopt_time / zopt_vdevs : UINT64_MAX); 533168404Spjd zopt_maxfaults = MAX(zopt_mirrors, 1) * (zopt_raidz_parity + 1) - 1; 534168404Spjd} 535168404Spjd 536168404Spjdstatic uint64_t 537168404Spjdztest_get_ashift(void) 538168404Spjd{ 539168404Spjd if (zopt_ashift == 0) 540168404Spjd return (SPA_MINBLOCKSHIFT + ztest_random(3)); 541168404Spjd return (zopt_ashift); 542168404Spjd} 543168404Spjd 544168404Spjdstatic nvlist_t * 545185029Spjdmake_vdev_file(char *path, char *aux, size_t size, uint64_t ashift) 546168404Spjd{ 547185029Spjd char pathbuf[MAXPATHLEN]; 548168404Spjd uint64_t vdev; 549168404Spjd nvlist_t *file; 550168404Spjd 551185029Spjd if (ashift == 0) 552185029Spjd ashift = ztest_get_ashift(); 553168404Spjd 554185029Spjd if (path == NULL) { 555185029Spjd path = pathbuf; 556185029Spjd 557185029Spjd if (aux != NULL) { 558185029Spjd vdev = ztest_shared->zs_vdev_aux; 559185029Spjd (void) sprintf(path, ztest_aux_template, 560185029Spjd zopt_dir, zopt_pool, aux, vdev); 561185029Spjd } else { 562185029Spjd vdev = ztest_shared->zs_vdev_primaries++; 563185029Spjd (void) sprintf(path, ztest_dev_template, 564185029Spjd zopt_dir, zopt_pool, vdev); 565185029Spjd } 566185029Spjd } 567185029Spjd 568185029Spjd if (size != 0) { 569185029Spjd int fd = open(path, O_RDWR | O_CREAT | O_TRUNC, 0666); 570168404Spjd if (fd == -1) 571185029Spjd fatal(1, "can't open %s", path); 572168404Spjd if (ftruncate(fd, size) != 0) 573185029Spjd fatal(1, "can't ftruncate %s", path); 574168404Spjd (void) close(fd); 575168404Spjd } 576168404Spjd 577168404Spjd VERIFY(nvlist_alloc(&file, NV_UNIQUE_NAME, 0) == 0); 578168404Spjd VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_TYPE, VDEV_TYPE_FILE) == 0); 579185029Spjd VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_PATH, path) == 0); 580168404Spjd VERIFY(nvlist_add_uint64(file, ZPOOL_CONFIG_ASHIFT, ashift) == 0); 581168404Spjd 582168404Spjd return (file); 583168404Spjd} 584168404Spjd 585168404Spjdstatic nvlist_t * 586185029Spjdmake_vdev_raidz(char *path, char *aux, size_t size, uint64_t ashift, int r) 587168404Spjd{ 588168404Spjd nvlist_t *raidz, **child; 589168404Spjd int c; 590168404Spjd 591168404Spjd if (r < 2) 592185029Spjd return (make_vdev_file(path, aux, size, ashift)); 593168404Spjd child = umem_alloc(r * sizeof (nvlist_t *), UMEM_NOFAIL); 594168404Spjd 595168404Spjd for (c = 0; c < r; c++) 596185029Spjd child[c] = make_vdev_file(path, aux, size, ashift); 597168404Spjd 598168404Spjd VERIFY(nvlist_alloc(&raidz, NV_UNIQUE_NAME, 0) == 0); 599168404Spjd VERIFY(nvlist_add_string(raidz, ZPOOL_CONFIG_TYPE, 600168404Spjd VDEV_TYPE_RAIDZ) == 0); 601168404Spjd VERIFY(nvlist_add_uint64(raidz, ZPOOL_CONFIG_NPARITY, 602168404Spjd zopt_raidz_parity) == 0); 603168404Spjd VERIFY(nvlist_add_nvlist_array(raidz, ZPOOL_CONFIG_CHILDREN, 604168404Spjd child, r) == 0); 605168404Spjd 606168404Spjd for (c = 0; c < r; c++) 607168404Spjd nvlist_free(child[c]); 608168404Spjd 609168404Spjd umem_free(child, r * sizeof (nvlist_t *)); 610168404Spjd 611168404Spjd return (raidz); 612168404Spjd} 613168404Spjd 614168404Spjdstatic nvlist_t * 615185029Spjdmake_vdev_mirror(char *path, char *aux, size_t size, uint64_t ashift, 616185029Spjd int r, int m) 617168404Spjd{ 618168404Spjd nvlist_t *mirror, **child; 619168404Spjd int c; 620168404Spjd 621168404Spjd if (m < 1) 622185029Spjd return (make_vdev_raidz(path, aux, size, ashift, r)); 623168404Spjd 624168404Spjd child = umem_alloc(m * sizeof (nvlist_t *), UMEM_NOFAIL); 625168404Spjd 626168404Spjd for (c = 0; c < m; c++) 627185029Spjd child[c] = make_vdev_raidz(path, aux, size, ashift, r); 628168404Spjd 629168404Spjd VERIFY(nvlist_alloc(&mirror, NV_UNIQUE_NAME, 0) == 0); 630168404Spjd VERIFY(nvlist_add_string(mirror, ZPOOL_CONFIG_TYPE, 631168404Spjd VDEV_TYPE_MIRROR) == 0); 632168404Spjd VERIFY(nvlist_add_nvlist_array(mirror, ZPOOL_CONFIG_CHILDREN, 633168404Spjd child, m) == 0); 634168404Spjd 635168404Spjd for (c = 0; c < m; c++) 636168404Spjd nvlist_free(child[c]); 637168404Spjd 638168404Spjd umem_free(child, m * sizeof (nvlist_t *)); 639168404Spjd 640168404Spjd return (mirror); 641168404Spjd} 642168404Spjd 643168404Spjdstatic nvlist_t * 644185029Spjdmake_vdev_root(char *path, char *aux, size_t size, uint64_t ashift, 645185029Spjd int log, int r, int m, int t) 646168404Spjd{ 647168404Spjd nvlist_t *root, **child; 648168404Spjd int c; 649168404Spjd 650168404Spjd ASSERT(t > 0); 651168404Spjd 652168404Spjd child = umem_alloc(t * sizeof (nvlist_t *), UMEM_NOFAIL); 653168404Spjd 654185029Spjd for (c = 0; c < t; c++) { 655185029Spjd child[c] = make_vdev_mirror(path, aux, size, ashift, r, m); 656185029Spjd VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_IS_LOG, 657185029Spjd log) == 0); 658185029Spjd } 659168404Spjd 660168404Spjd VERIFY(nvlist_alloc(&root, NV_UNIQUE_NAME, 0) == 0); 661168404Spjd VERIFY(nvlist_add_string(root, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) == 0); 662185029Spjd VERIFY(nvlist_add_nvlist_array(root, aux ? aux : ZPOOL_CONFIG_CHILDREN, 663168404Spjd child, t) == 0); 664168404Spjd 665168404Spjd for (c = 0; c < t; c++) 666168404Spjd nvlist_free(child[c]); 667168404Spjd 668168404Spjd umem_free(child, t * sizeof (nvlist_t *)); 669168404Spjd 670168404Spjd return (root); 671168404Spjd} 672168404Spjd 673168404Spjdstatic void 674168404Spjdztest_set_random_blocksize(objset_t *os, uint64_t object, dmu_tx_t *tx) 675168404Spjd{ 676168404Spjd int bs = SPA_MINBLOCKSHIFT + 677168404Spjd ztest_random(SPA_MAXBLOCKSHIFT - SPA_MINBLOCKSHIFT + 1); 678168404Spjd int ibs = DN_MIN_INDBLKSHIFT + 679168404Spjd ztest_random(DN_MAX_INDBLKSHIFT - DN_MIN_INDBLKSHIFT + 1); 680168404Spjd int error; 681168404Spjd 682168404Spjd error = dmu_object_set_blocksize(os, object, 1ULL << bs, ibs, tx); 683168404Spjd if (error) { 684168404Spjd char osname[300]; 685168404Spjd dmu_objset_name(os, osname); 686168404Spjd fatal(0, "dmu_object_set_blocksize('%s', %llu, %d, %d) = %d", 687168404Spjd osname, object, 1 << bs, ibs, error); 688168404Spjd } 689168404Spjd} 690168404Spjd 691168404Spjdstatic uint8_t 692168404Spjdztest_random_checksum(void) 693168404Spjd{ 694168404Spjd uint8_t checksum; 695168404Spjd 696168404Spjd do { 697168404Spjd checksum = ztest_random(ZIO_CHECKSUM_FUNCTIONS); 698168404Spjd } while (zio_checksum_table[checksum].ci_zbt); 699168404Spjd 700168404Spjd if (checksum == ZIO_CHECKSUM_OFF) 701168404Spjd checksum = ZIO_CHECKSUM_ON; 702168404Spjd 703168404Spjd return (checksum); 704168404Spjd} 705168404Spjd 706168404Spjdstatic uint8_t 707168404Spjdztest_random_compress(void) 708168404Spjd{ 709168404Spjd return ((uint8_t)ztest_random(ZIO_COMPRESS_FUNCTIONS)); 710168404Spjd} 711168404Spjd 712168404Spjdtypedef struct ztest_replay { 713168404Spjd objset_t *zr_os; 714168404Spjd uint64_t zr_assign; 715168404Spjd} ztest_replay_t; 716168404Spjd 717168404Spjdstatic int 718168404Spjdztest_replay_create(ztest_replay_t *zr, lr_create_t *lr, boolean_t byteswap) 719168404Spjd{ 720168404Spjd objset_t *os = zr->zr_os; 721168404Spjd dmu_tx_t *tx; 722168404Spjd int error; 723168404Spjd 724168404Spjd if (byteswap) 725168404Spjd byteswap_uint64_array(lr, sizeof (*lr)); 726168404Spjd 727168404Spjd tx = dmu_tx_create(os); 728168404Spjd dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); 729168404Spjd error = dmu_tx_assign(tx, zr->zr_assign); 730168404Spjd if (error) { 731168404Spjd dmu_tx_abort(tx); 732168404Spjd return (error); 733168404Spjd } 734168404Spjd 735168404Spjd error = dmu_object_claim(os, lr->lr_doid, lr->lr_mode, 0, 736168404Spjd DMU_OT_NONE, 0, tx); 737168404Spjd ASSERT3U(error, ==, 0); 738168404Spjd dmu_tx_commit(tx); 739168404Spjd 740168404Spjd if (zopt_verbose >= 5) { 741168404Spjd char osname[MAXNAMELEN]; 742168404Spjd dmu_objset_name(os, osname); 743168404Spjd (void) printf("replay create of %s object %llu" 744168404Spjd " in txg %llu = %d\n", 745168404Spjd osname, (u_longlong_t)lr->lr_doid, 746168404Spjd (u_longlong_t)zr->zr_assign, error); 747168404Spjd } 748168404Spjd 749168404Spjd return (error); 750168404Spjd} 751168404Spjd 752168404Spjdstatic int 753168404Spjdztest_replay_remove(ztest_replay_t *zr, lr_remove_t *lr, boolean_t byteswap) 754168404Spjd{ 755168404Spjd objset_t *os = zr->zr_os; 756168404Spjd dmu_tx_t *tx; 757168404Spjd int error; 758168404Spjd 759168404Spjd if (byteswap) 760168404Spjd byteswap_uint64_array(lr, sizeof (*lr)); 761168404Spjd 762168404Spjd tx = dmu_tx_create(os); 763168404Spjd dmu_tx_hold_free(tx, lr->lr_doid, 0, DMU_OBJECT_END); 764168404Spjd error = dmu_tx_assign(tx, zr->zr_assign); 765168404Spjd if (error) { 766168404Spjd dmu_tx_abort(tx); 767168404Spjd return (error); 768168404Spjd } 769168404Spjd 770168404Spjd error = dmu_object_free(os, lr->lr_doid, tx); 771168404Spjd dmu_tx_commit(tx); 772168404Spjd 773168404Spjd return (error); 774168404Spjd} 775168404Spjd 776168404Spjdzil_replay_func_t *ztest_replay_vector[TX_MAX_TYPE] = { 777168404Spjd NULL, /* 0 no such transaction type */ 778168404Spjd ztest_replay_create, /* TX_CREATE */ 779168404Spjd NULL, /* TX_MKDIR */ 780168404Spjd NULL, /* TX_MKXATTR */ 781168404Spjd NULL, /* TX_SYMLINK */ 782168404Spjd ztest_replay_remove, /* TX_REMOVE */ 783168404Spjd NULL, /* TX_RMDIR */ 784168404Spjd NULL, /* TX_LINK */ 785168404Spjd NULL, /* TX_RENAME */ 786168404Spjd NULL, /* TX_WRITE */ 787168404Spjd NULL, /* TX_TRUNCATE */ 788168404Spjd NULL, /* TX_SETATTR */ 789168404Spjd NULL, /* TX_ACL */ 790168404Spjd}; 791168404Spjd 792168404Spjd/* 793168404Spjd * Verify that we can't destroy an active pool, create an existing pool, 794168404Spjd * or create a pool with a bad vdev spec. 795168404Spjd */ 796168404Spjdvoid 797168404Spjdztest_spa_create_destroy(ztest_args_t *za) 798168404Spjd{ 799168404Spjd int error; 800168404Spjd spa_t *spa; 801168404Spjd nvlist_t *nvroot; 802168404Spjd 803168404Spjd /* 804168404Spjd * Attempt to create using a bad file. 805168404Spjd */ 806185029Spjd nvroot = make_vdev_root("/dev/bogus", NULL, 0, 0, 0, 0, 0, 1); 807185029Spjd error = spa_create("ztest_bad_file", nvroot, NULL, NULL, NULL); 808168404Spjd nvlist_free(nvroot); 809168404Spjd if (error != ENOENT) 810168404Spjd fatal(0, "spa_create(bad_file) = %d", error); 811168404Spjd 812168404Spjd /* 813168404Spjd * Attempt to create using a bad mirror. 814168404Spjd */ 815185029Spjd nvroot = make_vdev_root("/dev/bogus", NULL, 0, 0, 0, 0, 2, 1); 816185029Spjd error = spa_create("ztest_bad_mirror", nvroot, NULL, NULL, NULL); 817168404Spjd nvlist_free(nvroot); 818168404Spjd if (error != ENOENT) 819168404Spjd fatal(0, "spa_create(bad_mirror) = %d", error); 820168404Spjd 821168404Spjd /* 822168404Spjd * Attempt to create an existing pool. It shouldn't matter 823168404Spjd * what's in the nvroot; we should fail with EEXIST. 824168404Spjd */ 825168404Spjd (void) rw_rdlock(&ztest_shared->zs_name_lock); 826185029Spjd nvroot = make_vdev_root("/dev/bogus", NULL, 0, 0, 0, 0, 0, 1); 827185029Spjd error = spa_create(za->za_pool, nvroot, NULL, NULL, NULL); 828168404Spjd nvlist_free(nvroot); 829168404Spjd if (error != EEXIST) 830168404Spjd fatal(0, "spa_create(whatever) = %d", error); 831168404Spjd 832168404Spjd error = spa_open(za->za_pool, &spa, FTAG); 833168404Spjd if (error) 834168404Spjd fatal(0, "spa_open() = %d", error); 835168404Spjd 836168404Spjd error = spa_destroy(za->za_pool); 837168404Spjd if (error != EBUSY) 838168404Spjd fatal(0, "spa_destroy() = %d", error); 839168404Spjd 840168404Spjd spa_close(spa, FTAG); 841168404Spjd (void) rw_unlock(&ztest_shared->zs_name_lock); 842168404Spjd} 843168404Spjd 844185029Spjdstatic vdev_t * 845185029Spjdvdev_lookup_by_path(vdev_t *vd, const char *path) 846185029Spjd{ 847185029Spjd vdev_t *mvd; 848185029Spjd 849185029Spjd if (vd->vdev_path != NULL && strcmp(path, vd->vdev_path) == 0) 850185029Spjd return (vd); 851185029Spjd 852185029Spjd for (int c = 0; c < vd->vdev_children; c++) 853185029Spjd if ((mvd = vdev_lookup_by_path(vd->vdev_child[c], path)) != 854185029Spjd NULL) 855185029Spjd return (mvd); 856185029Spjd 857185029Spjd return (NULL); 858185029Spjd} 859185029Spjd 860168404Spjd/* 861168404Spjd * Verify that vdev_add() works as expected. 862168404Spjd */ 863168404Spjdvoid 864168404Spjdztest_vdev_add_remove(ztest_args_t *za) 865168404Spjd{ 866185029Spjd spa_t *spa = za->za_spa; 867168404Spjd uint64_t leaves = MAX(zopt_mirrors, 1) * zopt_raidz; 868168404Spjd nvlist_t *nvroot; 869168404Spjd int error; 870168404Spjd 871168404Spjd (void) mutex_lock(&ztest_shared->zs_vdev_lock); 872168404Spjd 873185029Spjd spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); 874168404Spjd 875168404Spjd ztest_shared->zs_vdev_primaries = 876168404Spjd spa->spa_root_vdev->vdev_children * leaves; 877168404Spjd 878185029Spjd spa_config_exit(spa, SCL_VDEV, FTAG); 879168404Spjd 880185029Spjd /* 881185029Spjd * Make 1/4 of the devices be log devices. 882185029Spjd */ 883185029Spjd nvroot = make_vdev_root(NULL, NULL, zopt_vdev_size, 0, 884185029Spjd ztest_random(4) == 0, zopt_raidz, zopt_mirrors, 1); 885185029Spjd 886168404Spjd error = spa_vdev_add(spa, nvroot); 887168404Spjd nvlist_free(nvroot); 888168404Spjd 889168404Spjd (void) mutex_unlock(&ztest_shared->zs_vdev_lock); 890168404Spjd 891168404Spjd if (error == ENOSPC) 892168404Spjd ztest_record_enospc("spa_vdev_add"); 893168404Spjd else if (error != 0) 894168404Spjd fatal(0, "spa_vdev_add() = %d", error); 895168404Spjd} 896168404Spjd 897185029Spjd/* 898185029Spjd * Verify that adding/removing aux devices (l2arc, hot spare) works as expected. 899185029Spjd */ 900185029Spjdvoid 901185029Spjdztest_vdev_aux_add_remove(ztest_args_t *za) 902168404Spjd{ 903185029Spjd spa_t *spa = za->za_spa; 904185029Spjd vdev_t *rvd = spa->spa_root_vdev; 905185029Spjd spa_aux_vdev_t *sav; 906185029Spjd char *aux; 907185029Spjd uint64_t guid = 0; 908185029Spjd int error; 909168404Spjd 910185029Spjd if (ztest_random(2) == 0) { 911185029Spjd sav = &spa->spa_spares; 912185029Spjd aux = ZPOOL_CONFIG_SPARES; 913185029Spjd } else { 914185029Spjd sav = &spa->spa_l2cache; 915185029Spjd aux = ZPOOL_CONFIG_L2CACHE; 916185029Spjd } 917185029Spjd 918185029Spjd (void) mutex_lock(&ztest_shared->zs_vdev_lock); 919185029Spjd 920185029Spjd spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); 921185029Spjd 922185029Spjd if (sav->sav_count != 0 && ztest_random(4) == 0) { 923185029Spjd /* 924185029Spjd * Pick a random device to remove. 925185029Spjd */ 926185029Spjd guid = sav->sav_vdevs[ztest_random(sav->sav_count)]->vdev_guid; 927185029Spjd } else { 928185029Spjd /* 929185029Spjd * Find an unused device we can add. 930185029Spjd */ 931185029Spjd ztest_shared->zs_vdev_aux = 0; 932185029Spjd for (;;) { 933185029Spjd char path[MAXPATHLEN]; 934185029Spjd int c; 935185029Spjd (void) sprintf(path, ztest_aux_template, zopt_dir, 936185029Spjd zopt_pool, aux, ztest_shared->zs_vdev_aux); 937185029Spjd for (c = 0; c < sav->sav_count; c++) 938185029Spjd if (strcmp(sav->sav_vdevs[c]->vdev_path, 939185029Spjd path) == 0) 940185029Spjd break; 941185029Spjd if (c == sav->sav_count && 942185029Spjd vdev_lookup_by_path(rvd, path) == NULL) 943185029Spjd break; 944185029Spjd ztest_shared->zs_vdev_aux++; 945168404Spjd } 946168404Spjd } 947168404Spjd 948185029Spjd spa_config_exit(spa, SCL_VDEV, FTAG); 949168404Spjd 950185029Spjd if (guid == 0) { 951185029Spjd /* 952185029Spjd * Add a new device. 953185029Spjd */ 954185029Spjd nvlist_t *nvroot = make_vdev_root(NULL, aux, 955185029Spjd (zopt_vdev_size * 5) / 4, 0, 0, 0, 0, 1); 956185029Spjd error = spa_vdev_add(spa, nvroot); 957185029Spjd if (error != 0) 958185029Spjd fatal(0, "spa_vdev_add(%p) = %d", nvroot, error); 959185029Spjd nvlist_free(nvroot); 960185029Spjd } else { 961185029Spjd /* 962185029Spjd * Remove an existing device. Sometimes, dirty its 963185029Spjd * vdev state first to make sure we handle removal 964185029Spjd * of devices that have pending state changes. 965185029Spjd */ 966185029Spjd if (ztest_random(2) == 0) 967185029Spjd (void) vdev_online(spa, guid, B_FALSE, NULL); 968185029Spjd 969185029Spjd error = spa_vdev_remove(spa, guid, B_FALSE); 970185029Spjd if (error != 0 && error != EBUSY) 971185029Spjd fatal(0, "spa_vdev_remove(%llu) = %d", guid, error); 972185029Spjd } 973185029Spjd 974185029Spjd (void) mutex_unlock(&ztest_shared->zs_vdev_lock); 975168404Spjd} 976168404Spjd 977168404Spjd/* 978168404Spjd * Verify that we can attach and detach devices. 979168404Spjd */ 980168404Spjdvoid 981168404Spjdztest_vdev_attach_detach(ztest_args_t *za) 982168404Spjd{ 983185029Spjd spa_t *spa = za->za_spa; 984185029Spjd spa_aux_vdev_t *sav = &spa->spa_spares; 985168404Spjd vdev_t *rvd = spa->spa_root_vdev; 986168404Spjd vdev_t *oldvd, *newvd, *pvd; 987185029Spjd nvlist_t *root; 988168404Spjd uint64_t leaves = MAX(zopt_mirrors, 1) * zopt_raidz; 989168404Spjd uint64_t leaf, top; 990168404Spjd uint64_t ashift = ztest_get_ashift(); 991185029Spjd uint64_t oldguid; 992168404Spjd size_t oldsize, newsize; 993168404Spjd char oldpath[MAXPATHLEN], newpath[MAXPATHLEN]; 994168404Spjd int replacing; 995185029Spjd int oldvd_has_siblings = B_FALSE; 996185029Spjd int newvd_is_spare = B_FALSE; 997185029Spjd int oldvd_is_log; 998168404Spjd int error, expected_error; 999168404Spjd 1000168404Spjd (void) mutex_lock(&ztest_shared->zs_vdev_lock); 1001168404Spjd 1002185029Spjd spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); 1003168404Spjd 1004168404Spjd /* 1005168404Spjd * Decide whether to do an attach or a replace. 1006168404Spjd */ 1007168404Spjd replacing = ztest_random(2); 1008168404Spjd 1009168404Spjd /* 1010168404Spjd * Pick a random top-level vdev. 1011168404Spjd */ 1012168404Spjd top = ztest_random(rvd->vdev_children); 1013168404Spjd 1014168404Spjd /* 1015168404Spjd * Pick a random leaf within it. 1016168404Spjd */ 1017168404Spjd leaf = ztest_random(leaves); 1018168404Spjd 1019168404Spjd /* 1020185029Spjd * Locate this vdev. 1021168404Spjd */ 1022185029Spjd oldvd = rvd->vdev_child[top]; 1023185029Spjd if (zopt_mirrors >= 1) 1024185029Spjd oldvd = oldvd->vdev_child[leaf / zopt_raidz]; 1025185029Spjd if (zopt_raidz > 1) 1026185029Spjd oldvd = oldvd->vdev_child[leaf % zopt_raidz]; 1027168404Spjd 1028168404Spjd /* 1029185029Spjd * If we're already doing an attach or replace, oldvd may be a 1030185029Spjd * mirror vdev -- in which case, pick a random child. 1031168404Spjd */ 1032185029Spjd while (oldvd->vdev_children != 0) { 1033185029Spjd oldvd_has_siblings = B_TRUE; 1034185029Spjd ASSERT(oldvd->vdev_children == 2); 1035185029Spjd oldvd = oldvd->vdev_child[ztest_random(2)]; 1036185029Spjd } 1037168404Spjd 1038185029Spjd oldguid = oldvd->vdev_guid; 1039185029Spjd oldsize = vdev_get_rsize(oldvd); 1040185029Spjd oldvd_is_log = oldvd->vdev_top->vdev_islog; 1041185029Spjd (void) strcpy(oldpath, oldvd->vdev_path); 1042185029Spjd pvd = oldvd->vdev_parent; 1043185029Spjd 1044168404Spjd /* 1045185029Spjd * If oldvd has siblings, then half of the time, detach it. 1046168404Spjd */ 1047185029Spjd if (oldvd_has_siblings && ztest_random(2) == 0) { 1048185029Spjd spa_config_exit(spa, SCL_VDEV, FTAG); 1049185029Spjd error = spa_vdev_detach(spa, oldguid, B_FALSE); 1050185029Spjd if (error != 0 && error != ENODEV && error != EBUSY) 1051185029Spjd fatal(0, "detach (%s) returned %d", 1052185029Spjd oldpath, error); 1053185029Spjd (void) mutex_unlock(&ztest_shared->zs_vdev_lock); 1054185029Spjd return; 1055185029Spjd } 1056168404Spjd 1057168404Spjd /* 1058185029Spjd * For the new vdev, choose with equal probability between the two 1059185029Spjd * standard paths (ending in either 'a' or 'b') or a random hot spare. 1060168404Spjd */ 1061185029Spjd if (sav->sav_count != 0 && ztest_random(3) == 0) { 1062185029Spjd newvd = sav->sav_vdevs[ztest_random(sav->sav_count)]; 1063185029Spjd newvd_is_spare = B_TRUE; 1064185029Spjd (void) strcpy(newpath, newvd->vdev_path); 1065185029Spjd } else { 1066185029Spjd (void) snprintf(newpath, sizeof (newpath), ztest_dev_template, 1067185029Spjd zopt_dir, zopt_pool, top * leaves + leaf); 1068185029Spjd if (ztest_random(2) == 0) 1069185029Spjd newpath[strlen(newpath) - 1] = 'b'; 1070185029Spjd newvd = vdev_lookup_by_path(rvd, newpath); 1071185029Spjd } 1072168404Spjd 1073185029Spjd if (newvd) { 1074185029Spjd newsize = vdev_get_rsize(newvd); 1075185029Spjd } else { 1076185029Spjd /* 1077185029Spjd * Make newsize a little bigger or smaller than oldsize. 1078185029Spjd * If it's smaller, the attach should fail. 1079185029Spjd * If it's larger, and we're doing a replace, 1080185029Spjd * we should get dynamic LUN growth when we're done. 1081185029Spjd */ 1082185029Spjd newsize = 10 * oldsize / (9 + ztest_random(3)); 1083185029Spjd } 1084185029Spjd 1085168404Spjd /* 1086168404Spjd * If pvd is not a mirror or root, the attach should fail with ENOTSUP, 1087168404Spjd * unless it's a replace; in that case any non-replacing parent is OK. 1088168404Spjd * 1089168404Spjd * If newvd is already part of the pool, it should fail with EBUSY. 1090168404Spjd * 1091168404Spjd * If newvd is too small, it should fail with EOVERFLOW. 1092168404Spjd */ 1093185029Spjd if (pvd->vdev_ops != &vdev_mirror_ops && 1094185029Spjd pvd->vdev_ops != &vdev_root_ops && (!replacing || 1095185029Spjd pvd->vdev_ops == &vdev_replacing_ops || 1096185029Spjd pvd->vdev_ops == &vdev_spare_ops)) 1097185029Spjd expected_error = ENOTSUP; 1098185029Spjd else if (newvd_is_spare && (!replacing || oldvd_is_log)) 1099185029Spjd expected_error = ENOTSUP; 1100185029Spjd else if (newvd == oldvd) 1101185029Spjd expected_error = replacing ? 0 : EBUSY; 1102185029Spjd else if (vdev_lookup_by_path(rvd, newpath) != NULL) 1103168404Spjd expected_error = EBUSY; 1104168404Spjd else if (newsize < oldsize) 1105168404Spjd expected_error = EOVERFLOW; 1106168404Spjd else if (ashift > oldvd->vdev_top->vdev_ashift) 1107168404Spjd expected_error = EDOM; 1108168404Spjd else 1109168404Spjd expected_error = 0; 1110168404Spjd 1111185029Spjd spa_config_exit(spa, SCL_VDEV, FTAG); 1112168404Spjd 1113168404Spjd /* 1114168404Spjd * Build the nvlist describing newpath. 1115168404Spjd */ 1116185029Spjd root = make_vdev_root(newpath, NULL, newvd == NULL ? newsize : 0, 1117185029Spjd ashift, 0, 0, 0, 1); 1118168404Spjd 1119185029Spjd error = spa_vdev_attach(spa, oldguid, root, replacing); 1120168404Spjd 1121168404Spjd nvlist_free(root); 1122168404Spjd 1123168404Spjd /* 1124168404Spjd * If our parent was the replacing vdev, but the replace completed, 1125168404Spjd * then instead of failing with ENOTSUP we may either succeed, 1126168404Spjd * fail with ENODEV, or fail with EOVERFLOW. 1127168404Spjd */ 1128168404Spjd if (expected_error == ENOTSUP && 1129168404Spjd (error == 0 || error == ENODEV || error == EOVERFLOW)) 1130168404Spjd expected_error = error; 1131168404Spjd 1132168404Spjd /* 1133168404Spjd * If someone grew the LUN, the replacement may be too small. 1134168404Spjd */ 1135185029Spjd if (error == EOVERFLOW || error == EBUSY) 1136168404Spjd expected_error = error; 1137168404Spjd 1138185029Spjd /* XXX workaround 6690467 */ 1139185029Spjd if (error != expected_error && expected_error != EBUSY) { 1140185029Spjd fatal(0, "attach (%s %llu, %s %llu, %d) " 1141185029Spjd "returned %d, expected %d", 1142185029Spjd oldpath, (longlong_t)oldsize, newpath, 1143185029Spjd (longlong_t)newsize, replacing, error, expected_error); 1144168404Spjd } 1145168404Spjd 1146168404Spjd (void) mutex_unlock(&ztest_shared->zs_vdev_lock); 1147168404Spjd} 1148168404Spjd 1149168404Spjd/* 1150168404Spjd * Verify that dynamic LUN growth works as expected. 1151168404Spjd */ 1152168404Spjd/* ARGSUSED */ 1153168404Spjdvoid 1154168404Spjdztest_vdev_LUN_growth(ztest_args_t *za) 1155168404Spjd{ 1156185029Spjd spa_t *spa = za->za_spa; 1157168404Spjd char dev_name[MAXPATHLEN]; 1158168404Spjd uint64_t leaves = MAX(zopt_mirrors, 1) * zopt_raidz; 1159168404Spjd uint64_t vdev; 1160168404Spjd size_t fsize; 1161168404Spjd int fd; 1162168404Spjd 1163168404Spjd (void) mutex_lock(&ztest_shared->zs_vdev_lock); 1164168404Spjd 1165168404Spjd /* 1166168404Spjd * Pick a random leaf vdev. 1167168404Spjd */ 1168185029Spjd spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); 1169168404Spjd vdev = ztest_random(spa->spa_root_vdev->vdev_children * leaves); 1170185029Spjd spa_config_exit(spa, SCL_VDEV, FTAG); 1171168404Spjd 1172168404Spjd (void) sprintf(dev_name, ztest_dev_template, zopt_dir, zopt_pool, vdev); 1173168404Spjd 1174168404Spjd if ((fd = open(dev_name, O_RDWR)) != -1) { 1175168404Spjd /* 1176168404Spjd * Determine the size. 1177168404Spjd */ 1178168404Spjd fsize = lseek(fd, 0, SEEK_END); 1179168404Spjd 1180168404Spjd /* 1181168404Spjd * If it's less than 2x the original size, grow by around 3%. 1182168404Spjd */ 1183168404Spjd if (fsize < 2 * zopt_vdev_size) { 1184168404Spjd size_t newsize = fsize + ztest_random(fsize / 32); 1185168404Spjd (void) ftruncate(fd, newsize); 1186168404Spjd if (zopt_verbose >= 6) { 1187168404Spjd (void) printf("%s grew from %lu to %lu bytes\n", 1188168404Spjd dev_name, (ulong_t)fsize, (ulong_t)newsize); 1189168404Spjd } 1190168404Spjd } 1191168404Spjd (void) close(fd); 1192168404Spjd } 1193168404Spjd 1194168404Spjd (void) mutex_unlock(&ztest_shared->zs_vdev_lock); 1195168404Spjd} 1196168404Spjd 1197168404Spjd/* ARGSUSED */ 1198168404Spjdstatic void 1199185029Spjdztest_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx) 1200168404Spjd{ 1201168404Spjd /* 1202168404Spjd * Create the directory object. 1203168404Spjd */ 1204168404Spjd VERIFY(dmu_object_claim(os, ZTEST_DIROBJ, 1205168404Spjd DMU_OT_UINT64_OTHER, ZTEST_DIROBJ_BLOCKSIZE, 1206185029Spjd DMU_OT_UINT64_OTHER, 5 * sizeof (ztest_block_tag_t), tx) == 0); 1207168404Spjd 1208168404Spjd VERIFY(zap_create_claim(os, ZTEST_MICROZAP_OBJ, 1209168404Spjd DMU_OT_ZAP_OTHER, DMU_OT_NONE, 0, tx) == 0); 1210168404Spjd 1211168404Spjd VERIFY(zap_create_claim(os, ZTEST_FATZAP_OBJ, 1212168404Spjd DMU_OT_ZAP_OTHER, DMU_OT_NONE, 0, tx) == 0); 1213168404Spjd} 1214168404Spjd 1215168404Spjdstatic int 1216168404Spjdztest_destroy_cb(char *name, void *arg) 1217168404Spjd{ 1218185029Spjd ztest_args_t *za = arg; 1219168404Spjd objset_t *os; 1220185029Spjd dmu_object_info_t *doi = &za->za_doi; 1221168404Spjd int error; 1222168404Spjd 1223168404Spjd /* 1224168404Spjd * Verify that the dataset contains a directory object. 1225168404Spjd */ 1226168404Spjd error = dmu_objset_open(name, DMU_OST_OTHER, 1227185029Spjd DS_MODE_USER | DS_MODE_READONLY, &os); 1228168404Spjd ASSERT3U(error, ==, 0); 1229185029Spjd error = dmu_object_info(os, ZTEST_DIROBJ, doi); 1230168404Spjd if (error != ENOENT) { 1231168404Spjd /* We could have crashed in the middle of destroying it */ 1232168404Spjd ASSERT3U(error, ==, 0); 1233185029Spjd ASSERT3U(doi->doi_type, ==, DMU_OT_UINT64_OTHER); 1234185029Spjd ASSERT3S(doi->doi_physical_blks, >=, 0); 1235168404Spjd } 1236168404Spjd dmu_objset_close(os); 1237168404Spjd 1238168404Spjd /* 1239168404Spjd * Destroy the dataset. 1240168404Spjd */ 1241168404Spjd error = dmu_objset_destroy(name); 1242185029Spjd if (error) { 1243185029Spjd (void) dmu_objset_open(name, DMU_OST_OTHER, 1244185029Spjd DS_MODE_USER | DS_MODE_READONLY, &os); 1245185029Spjd fatal(0, "dmu_objset_destroy(os=%p) = %d\n", &os, error); 1246185029Spjd } 1247168404Spjd return (0); 1248168404Spjd} 1249168404Spjd 1250168404Spjd/* 1251168404Spjd * Verify that dmu_objset_{create,destroy,open,close} work as expected. 1252168404Spjd */ 1253168404Spjdstatic uint64_t 1254168404Spjdztest_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t object, int mode) 1255168404Spjd{ 1256168404Spjd itx_t *itx; 1257168404Spjd lr_create_t *lr; 1258168404Spjd size_t namesize; 1259168404Spjd char name[24]; 1260168404Spjd 1261168404Spjd (void) sprintf(name, "ZOBJ_%llu", (u_longlong_t)object); 1262168404Spjd namesize = strlen(name) + 1; 1263168404Spjd 1264168404Spjd itx = zil_itx_create(TX_CREATE, sizeof (*lr) + namesize + 1265168404Spjd ztest_random(ZIL_MAX_BLKSZ)); 1266168404Spjd lr = (lr_create_t *)&itx->itx_lr; 1267168404Spjd bzero(lr + 1, lr->lr_common.lrc_reclen - sizeof (*lr)); 1268168404Spjd lr->lr_doid = object; 1269168404Spjd lr->lr_foid = 0; 1270168404Spjd lr->lr_mode = mode; 1271168404Spjd lr->lr_uid = 0; 1272168404Spjd lr->lr_gid = 0; 1273168404Spjd lr->lr_gen = dmu_tx_get_txg(tx); 1274168404Spjd lr->lr_crtime[0] = time(NULL); 1275168404Spjd lr->lr_crtime[1] = 0; 1276168404Spjd lr->lr_rdev = 0; 1277168404Spjd bcopy(name, (char *)(lr + 1), namesize); 1278168404Spjd 1279168404Spjd return (zil_itx_assign(zilog, itx, tx)); 1280168404Spjd} 1281168404Spjd 1282168404Spjdvoid 1283168404Spjdztest_dmu_objset_create_destroy(ztest_args_t *za) 1284168404Spjd{ 1285168404Spjd int error; 1286185029Spjd objset_t *os, *os2; 1287168404Spjd char name[100]; 1288185029Spjd int basemode, expected_error; 1289168404Spjd zilog_t *zilog; 1290168404Spjd uint64_t seq; 1291168404Spjd uint64_t objects; 1292168404Spjd ztest_replay_t zr; 1293168404Spjd 1294168404Spjd (void) rw_rdlock(&ztest_shared->zs_name_lock); 1295168404Spjd (void) snprintf(name, 100, "%s/%s_temp_%llu", za->za_pool, za->za_pool, 1296168404Spjd (u_longlong_t)za->za_instance); 1297168404Spjd 1298185029Spjd basemode = DS_MODE_TYPE(za->za_instance); 1299185029Spjd if (basemode != DS_MODE_USER && basemode != DS_MODE_OWNER) 1300185029Spjd basemode = DS_MODE_USER; 1301168404Spjd 1302168404Spjd /* 1303168404Spjd * If this dataset exists from a previous run, process its replay log 1304168404Spjd * half of the time. If we don't replay it, then dmu_objset_destroy() 1305168404Spjd * (invoked from ztest_destroy_cb() below) should just throw it away. 1306168404Spjd */ 1307168404Spjd if (ztest_random(2) == 0 && 1308185029Spjd dmu_objset_open(name, DMU_OST_OTHER, DS_MODE_OWNER, &os) == 0) { 1309168404Spjd zr.zr_os = os; 1310185029Spjd zil_replay(os, &zr, &zr.zr_assign, ztest_replay_vector, NULL); 1311168404Spjd dmu_objset_close(os); 1312168404Spjd } 1313168404Spjd 1314168404Spjd /* 1315168404Spjd * There may be an old instance of the dataset we're about to 1316168404Spjd * create lying around from a previous run. If so, destroy it 1317168404Spjd * and all of its snapshots. 1318168404Spjd */ 1319185029Spjd (void) dmu_objset_find(name, ztest_destroy_cb, za, 1320168404Spjd DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS); 1321168404Spjd 1322168404Spjd /* 1323168404Spjd * Verify that the destroyed dataset is no longer in the namespace. 1324168404Spjd */ 1325168404Spjd error = dmu_objset_open(name, DMU_OST_OTHER, basemode, &os); 1326168404Spjd if (error != ENOENT) 1327168404Spjd fatal(1, "dmu_objset_open(%s) found destroyed dataset %p", 1328168404Spjd name, os); 1329168404Spjd 1330168404Spjd /* 1331168404Spjd * Verify that we can create a new dataset. 1332168404Spjd */ 1333185029Spjd error = dmu_objset_create(name, DMU_OST_OTHER, NULL, 0, 1334185029Spjd ztest_create_cb, NULL); 1335168404Spjd if (error) { 1336168404Spjd if (error == ENOSPC) { 1337168404Spjd ztest_record_enospc("dmu_objset_create"); 1338168404Spjd (void) rw_unlock(&ztest_shared->zs_name_lock); 1339168404Spjd return; 1340168404Spjd } 1341168404Spjd fatal(0, "dmu_objset_create(%s) = %d", name, error); 1342168404Spjd } 1343168404Spjd 1344168404Spjd error = dmu_objset_open(name, DMU_OST_OTHER, basemode, &os); 1345168404Spjd if (error) { 1346168404Spjd fatal(0, "dmu_objset_open(%s) = %d", name, error); 1347168404Spjd } 1348168404Spjd 1349168404Spjd /* 1350168404Spjd * Open the intent log for it. 1351168404Spjd */ 1352168404Spjd zilog = zil_open(os, NULL); 1353168404Spjd 1354168404Spjd /* 1355168404Spjd * Put a random number of objects in there. 1356168404Spjd */ 1357168404Spjd objects = ztest_random(20); 1358168404Spjd seq = 0; 1359168404Spjd while (objects-- != 0) { 1360168404Spjd uint64_t object; 1361168404Spjd dmu_tx_t *tx = dmu_tx_create(os); 1362168404Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, sizeof (name)); 1363168404Spjd error = dmu_tx_assign(tx, TXG_WAIT); 1364168404Spjd if (error) { 1365168404Spjd dmu_tx_abort(tx); 1366168404Spjd } else { 1367168404Spjd object = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0, 1368168404Spjd DMU_OT_NONE, 0, tx); 1369168404Spjd ztest_set_random_blocksize(os, object, tx); 1370168404Spjd seq = ztest_log_create(zilog, tx, object, 1371168404Spjd DMU_OT_UINT64_OTHER); 1372168404Spjd dmu_write(os, object, 0, sizeof (name), name, tx); 1373168404Spjd dmu_tx_commit(tx); 1374168404Spjd } 1375168404Spjd if (ztest_random(5) == 0) { 1376168404Spjd zil_commit(zilog, seq, object); 1377168404Spjd } 1378168404Spjd if (ztest_random(100) == 0) { 1379168404Spjd error = zil_suspend(zilog); 1380168404Spjd if (error == 0) { 1381168404Spjd zil_resume(zilog); 1382168404Spjd } 1383168404Spjd } 1384168404Spjd } 1385168404Spjd 1386168404Spjd /* 1387168404Spjd * Verify that we cannot create an existing dataset. 1388168404Spjd */ 1389185029Spjd error = dmu_objset_create(name, DMU_OST_OTHER, NULL, 0, NULL, NULL); 1390168404Spjd if (error != EEXIST) 1391168404Spjd fatal(0, "created existing dataset, error = %d", error); 1392168404Spjd 1393168404Spjd /* 1394185029Spjd * Verify that multiple dataset holds are allowed, but only when 1395168404Spjd * the new access mode is compatible with the base mode. 1396168404Spjd */ 1397185029Spjd if (basemode == DS_MODE_OWNER) { 1398185029Spjd error = dmu_objset_open(name, DMU_OST_OTHER, DS_MODE_USER, 1399185029Spjd &os2); 1400185029Spjd if (error) 1401185029Spjd fatal(0, "dmu_objset_open('%s') = %d", name, error); 1402185029Spjd else 1403168404Spjd dmu_objset_close(os2); 1404168404Spjd } 1405185029Spjd error = dmu_objset_open(name, DMU_OST_OTHER, DS_MODE_OWNER, &os2); 1406185029Spjd expected_error = (basemode == DS_MODE_OWNER) ? EBUSY : 0; 1407185029Spjd if (error != expected_error) 1408185029Spjd fatal(0, "dmu_objset_open('%s') = %d, expected %d", 1409185029Spjd name, error, expected_error); 1410185029Spjd if (error == 0) 1411185029Spjd dmu_objset_close(os2); 1412168404Spjd 1413168404Spjd zil_close(zilog); 1414168404Spjd dmu_objset_close(os); 1415168404Spjd 1416168404Spjd error = dmu_objset_destroy(name); 1417168404Spjd if (error) 1418168404Spjd fatal(0, "dmu_objset_destroy(%s) = %d", name, error); 1419168404Spjd 1420168404Spjd (void) rw_unlock(&ztest_shared->zs_name_lock); 1421168404Spjd} 1422168404Spjd 1423168404Spjd/* 1424168404Spjd * Verify that dmu_snapshot_{create,destroy,open,close} work as expected. 1425168404Spjd */ 1426168404Spjdvoid 1427168404Spjdztest_dmu_snapshot_create_destroy(ztest_args_t *za) 1428168404Spjd{ 1429168404Spjd int error; 1430168404Spjd objset_t *os = za->za_os; 1431168404Spjd char snapname[100]; 1432168404Spjd char osname[MAXNAMELEN]; 1433168404Spjd 1434168404Spjd (void) rw_rdlock(&ztest_shared->zs_name_lock); 1435168404Spjd dmu_objset_name(os, osname); 1436168404Spjd (void) snprintf(snapname, 100, "%s@%llu", osname, 1437168404Spjd (u_longlong_t)za->za_instance); 1438168404Spjd 1439168404Spjd error = dmu_objset_destroy(snapname); 1440168404Spjd if (error != 0 && error != ENOENT) 1441168404Spjd fatal(0, "dmu_objset_destroy() = %d", error); 1442168404Spjd error = dmu_objset_snapshot(osname, strchr(snapname, '@')+1, FALSE); 1443168404Spjd if (error == ENOSPC) 1444168404Spjd ztest_record_enospc("dmu_take_snapshot"); 1445168404Spjd else if (error != 0 && error != EEXIST) 1446168404Spjd fatal(0, "dmu_take_snapshot() = %d", error); 1447168404Spjd (void) rw_unlock(&ztest_shared->zs_name_lock); 1448168404Spjd} 1449168404Spjd 1450168404Spjd#define ZTEST_TRAVERSE_BLOCKS 1000 1451168404Spjd 1452168404Spjdstatic int 1453168404Spjdztest_blk_cb(traverse_blk_cache_t *bc, spa_t *spa, void *arg) 1454168404Spjd{ 1455168404Spjd ztest_args_t *za = arg; 1456168404Spjd zbookmark_t *zb = &bc->bc_bookmark; 1457168404Spjd blkptr_t *bp = &bc->bc_blkptr; 1458168404Spjd dnode_phys_t *dnp = bc->bc_dnode; 1459168404Spjd traverse_handle_t *th = za->za_th; 1460168404Spjd uint64_t size = BP_GET_LSIZE(bp); 1461168404Spjd 1462168404Spjd /* 1463168404Spjd * Level -1 indicates the objset_phys_t or something in its intent log. 1464168404Spjd */ 1465168404Spjd if (zb->zb_level == -1) { 1466168404Spjd if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) { 1467168404Spjd ASSERT3U(zb->zb_object, ==, 0); 1468168404Spjd ASSERT3U(zb->zb_blkid, ==, 0); 1469168404Spjd ASSERT3U(size, ==, sizeof (objset_phys_t)); 1470168404Spjd za->za_zil_seq = 0; 1471168404Spjd } else if (BP_GET_TYPE(bp) == DMU_OT_INTENT_LOG) { 1472168404Spjd ASSERT3U(zb->zb_object, ==, 0); 1473168404Spjd ASSERT3U(zb->zb_blkid, >, za->za_zil_seq); 1474168404Spjd za->za_zil_seq = zb->zb_blkid; 1475168404Spjd } else { 1476168404Spjd ASSERT3U(zb->zb_object, !=, 0); /* lr_write_t */ 1477168404Spjd } 1478168404Spjd 1479168404Spjd return (0); 1480168404Spjd } 1481168404Spjd 1482168404Spjd ASSERT(dnp != NULL); 1483168404Spjd 1484168404Spjd if (bc->bc_errno) 1485168404Spjd return (ERESTART); 1486168404Spjd 1487168404Spjd /* 1488168404Spjd * Once in a while, abort the traverse. We only do this to odd 1489168404Spjd * instance numbers to ensure that even ones can run to completion. 1490168404Spjd */ 1491168404Spjd if ((za->za_instance & 1) && ztest_random(10000) == 0) 1492168404Spjd return (EINTR); 1493168404Spjd 1494168404Spjd if (bp->blk_birth == 0) { 1495168404Spjd ASSERT(th->th_advance & ADVANCE_HOLES); 1496168404Spjd return (0); 1497168404Spjd } 1498168404Spjd 1499168404Spjd if (zb->zb_level == 0 && !(th->th_advance & ADVANCE_DATA) && 1500168404Spjd bc == &th->th_cache[ZB_DN_CACHE][0]) { 1501168404Spjd ASSERT(bc->bc_data == NULL); 1502168404Spjd return (0); 1503168404Spjd } 1504168404Spjd 1505168404Spjd ASSERT(bc->bc_data != NULL); 1506168404Spjd 1507168404Spjd /* 1508168404Spjd * This is an expensive question, so don't ask it too often. 1509168404Spjd */ 1510168404Spjd if (((za->za_random ^ th->th_callbacks) & 0xff) == 0) { 1511168404Spjd void *xbuf = umem_alloc(size, UMEM_NOFAIL); 1512168404Spjd if (arc_tryread(spa, bp, xbuf) == 0) { 1513168404Spjd ASSERT(bcmp(bc->bc_data, xbuf, size) == 0); 1514168404Spjd } 1515168404Spjd umem_free(xbuf, size); 1516168404Spjd } 1517168404Spjd 1518168404Spjd if (zb->zb_level > 0) { 1519168404Spjd ASSERT3U(size, ==, 1ULL << dnp->dn_indblkshift); 1520168404Spjd return (0); 1521168404Spjd } 1522168404Spjd 1523168404Spjd ASSERT(zb->zb_level == 0); 1524168404Spjd ASSERT3U(size, ==, dnp->dn_datablkszsec << DEV_BSHIFT); 1525168404Spjd 1526168404Spjd return (0); 1527168404Spjd} 1528168404Spjd 1529168404Spjd/* 1530168404Spjd * Verify that live pool traversal works. 1531168404Spjd */ 1532168404Spjdvoid 1533168404Spjdztest_traverse(ztest_args_t *za) 1534168404Spjd{ 1535185029Spjd spa_t *spa = za->za_spa; 1536168404Spjd traverse_handle_t *th = za->za_th; 1537168404Spjd int rc, advance; 1538168404Spjd uint64_t cbstart, cblimit; 1539168404Spjd 1540168404Spjd if (th == NULL) { 1541168404Spjd advance = 0; 1542168404Spjd 1543168404Spjd if (ztest_random(2) == 0) 1544168404Spjd advance |= ADVANCE_PRE; 1545168404Spjd 1546168404Spjd if (ztest_random(2) == 0) 1547168404Spjd advance |= ADVANCE_PRUNE; 1548168404Spjd 1549168404Spjd if (ztest_random(2) == 0) 1550168404Spjd advance |= ADVANCE_DATA; 1551168404Spjd 1552168404Spjd if (ztest_random(2) == 0) 1553168404Spjd advance |= ADVANCE_HOLES; 1554168404Spjd 1555168404Spjd if (ztest_random(2) == 0) 1556168404Spjd advance |= ADVANCE_ZIL; 1557168404Spjd 1558168404Spjd th = za->za_th = traverse_init(spa, ztest_blk_cb, za, advance, 1559168404Spjd ZIO_FLAG_CANFAIL); 1560168404Spjd 1561168404Spjd traverse_add_pool(th, 0, -1ULL); 1562168404Spjd } 1563168404Spjd 1564168404Spjd advance = th->th_advance; 1565168404Spjd cbstart = th->th_callbacks; 1566168404Spjd cblimit = cbstart + ((advance & ADVANCE_DATA) ? 100 : 1000); 1567168404Spjd 1568168404Spjd while ((rc = traverse_more(th)) == EAGAIN && th->th_callbacks < cblimit) 1569168404Spjd continue; 1570168404Spjd 1571168404Spjd if (zopt_verbose >= 5) 1572168404Spjd (void) printf("traverse %s%s%s%s %llu blocks to " 1573168404Spjd "<%llu, %llu, %lld, %llx>%s\n", 1574168404Spjd (advance & ADVANCE_PRE) ? "pre" : "post", 1575168404Spjd (advance & ADVANCE_PRUNE) ? "|prune" : "", 1576168404Spjd (advance & ADVANCE_DATA) ? "|data" : "", 1577168404Spjd (advance & ADVANCE_HOLES) ? "|holes" : "", 1578168404Spjd (u_longlong_t)(th->th_callbacks - cbstart), 1579168404Spjd (u_longlong_t)th->th_lastcb.zb_objset, 1580168404Spjd (u_longlong_t)th->th_lastcb.zb_object, 1581168404Spjd (u_longlong_t)th->th_lastcb.zb_level, 1582168404Spjd (u_longlong_t)th->th_lastcb.zb_blkid, 1583168404Spjd rc == 0 ? " [done]" : 1584168404Spjd rc == EINTR ? " [aborted]" : 1585168404Spjd rc == EAGAIN ? "" : 1586168404Spjd strerror(rc)); 1587168404Spjd 1588168404Spjd if (rc != EAGAIN) { 1589168404Spjd if (rc != 0 && rc != EINTR) 1590168404Spjd fatal(0, "traverse_more(%p) = %d", th, rc); 1591168404Spjd traverse_fini(th); 1592168404Spjd za->za_th = NULL; 1593168404Spjd } 1594168404Spjd} 1595168404Spjd 1596168404Spjd/* 1597207910Smm * Verify dsl_dataset_promote handles EBUSY 1598207910Smm */ 1599207910Smmvoid 1600207910Smmztest_dsl_dataset_promote_busy(ztest_args_t *za) 1601207910Smm{ 1602207910Smm int error; 1603207910Smm objset_t *os = za->za_os; 1604207910Smm objset_t *clone; 1605207910Smm dsl_dataset_t *ds; 1606207910Smm char snap1name[100]; 1607207910Smm char clone1name[100]; 1608207910Smm char snap2name[100]; 1609207910Smm char clone2name[100]; 1610207910Smm char snap3name[100]; 1611207910Smm char osname[MAXNAMELEN]; 1612207910Smm static uint64_t uniq = 0; 1613207910Smm uint64_t curval; 1614207910Smm 1615207910Smm curval = atomic_add_64_nv(&uniq, 5) - 5; 1616207910Smm 1617207910Smm (void) rw_rdlock(&ztest_shared->zs_name_lock); 1618207910Smm 1619207910Smm dmu_objset_name(os, osname); 1620207910Smm (void) snprintf(snap1name, 100, "%s@s1_%llu", osname, curval++); 1621207910Smm (void) snprintf(clone1name, 100, "%s/c1_%llu", osname, curval++); 1622207910Smm (void) snprintf(snap2name, 100, "%s@s2_%llu", clone1name, curval++); 1623207910Smm (void) snprintf(clone2name, 100, "%s/c2_%llu", osname, curval++); 1624207910Smm (void) snprintf(snap3name, 100, "%s@s3_%llu", clone1name, curval++); 1625207910Smm 1626207910Smm error = dmu_objset_snapshot(osname, strchr(snap1name, '@')+1, FALSE); 1627207910Smm if (error == ENOSPC) 1628207910Smm ztest_record_enospc("dmu_take_snapshot"); 1629207910Smm else if (error != 0 && error != EEXIST) 1630207910Smm fatal(0, "dmu_take_snapshot = %d", error); 1631207910Smm 1632207910Smm error = dmu_objset_open(snap1name, DMU_OST_OTHER, 1633207910Smm DS_MODE_USER | DS_MODE_READONLY, &clone); 1634207910Smm if (error) 1635207910Smm fatal(0, "dmu_open_snapshot(%s) = %d", snap1name, error); 1636207910Smm 1637207910Smm error = dmu_objset_create(clone1name, DMU_OST_OTHER, clone, 0, 1638207910Smm NULL, NULL); 1639207910Smm if (error) 1640207910Smm fatal(0, "dmu_objset_create(%s) = %d", clone1name, error); 1641207910Smm dmu_objset_close(clone); 1642207910Smm 1643207910Smm error = dmu_objset_snapshot(clone1name, strchr(snap2name, '@')+1, 1644207910Smm FALSE); 1645207910Smm if (error == ENOSPC) 1646207910Smm ztest_record_enospc("dmu_take_snapshot"); 1647207910Smm else if (error != 0 && error != EEXIST) 1648207910Smm fatal(0, "dmu_take_snapshot = %d", error); 1649207910Smm 1650207910Smm error = dmu_objset_snapshot(clone1name, strchr(snap3name, '@')+1, 1651207910Smm FALSE); 1652207910Smm if (error == ENOSPC) 1653207910Smm ztest_record_enospc("dmu_take_snapshot"); 1654207910Smm else if (error != 0 && error != EEXIST) 1655207910Smm fatal(0, "dmu_take_snapshot = %d", error); 1656207910Smm 1657207910Smm error = dmu_objset_open(snap3name, DMU_OST_OTHER, 1658207910Smm DS_MODE_USER | DS_MODE_READONLY, &clone); 1659207910Smm if (error) 1660207910Smm fatal(0, "dmu_open_snapshot(%s) = %d", snap3name, error); 1661207910Smm 1662207910Smm error = dmu_objset_create(clone2name, DMU_OST_OTHER, clone, 0, 1663207910Smm NULL, NULL); 1664207910Smm if (error) 1665207910Smm fatal(0, "dmu_objset_create(%s) = %d", clone2name, error); 1666207910Smm dmu_objset_close(clone); 1667207910Smm 1668207910Smm error = dsl_dataset_own(snap1name, 0, FTAG, &ds); 1669207910Smm if (error) 1670207910Smm fatal(0, "dsl_dataset_own(%s) = %d", snap1name, error); 1671207910Smm error = dsl_dataset_promote(clone2name); 1672207910Smm if (error != EBUSY) 1673207910Smm fatal(0, "dsl_dataset_promote(%s), %d, not EBUSY", clone2name, 1674207910Smm error); 1675207910Smm dsl_dataset_disown(ds, FTAG); 1676207910Smm 1677207910Smm error = dmu_objset_destroy(clone2name); 1678207910Smm if (error) 1679207910Smm fatal(0, "dmu_objset_destroy(%s) = %d", clone2name, error); 1680207910Smm 1681207910Smm error = dmu_objset_destroy(snap3name); 1682207910Smm if (error) 1683207910Smm fatal(0, "dmu_objset_destroy(%s) = %d", snap2name, error); 1684207910Smm 1685207910Smm error = dmu_objset_destroy(snap2name); 1686207910Smm if (error) 1687207910Smm fatal(0, "dmu_objset_destroy(%s) = %d", snap2name, error); 1688207910Smm 1689207910Smm error = dmu_objset_destroy(clone1name); 1690207910Smm if (error) 1691207910Smm fatal(0, "dmu_objset_destroy(%s) = %d", clone1name, error); 1692207910Smm error = dmu_objset_destroy(snap1name); 1693207910Smm if (error) 1694207910Smm fatal(0, "dmu_objset_destroy(%s) = %d", snap1name, error); 1695207910Smm 1696207910Smm (void) rw_unlock(&ztest_shared->zs_name_lock); 1697207910Smm} 1698207910Smm 1699207910Smm/* 1700168404Spjd * Verify that dmu_object_{alloc,free} work as expected. 1701168404Spjd */ 1702168404Spjdvoid 1703168404Spjdztest_dmu_object_alloc_free(ztest_args_t *za) 1704168404Spjd{ 1705168404Spjd objset_t *os = za->za_os; 1706168404Spjd dmu_buf_t *db; 1707168404Spjd dmu_tx_t *tx; 1708168404Spjd uint64_t batchobj, object, batchsize, endoff, temp; 1709168404Spjd int b, c, error, bonuslen; 1710185029Spjd dmu_object_info_t *doi = &za->za_doi; 1711168404Spjd char osname[MAXNAMELEN]; 1712168404Spjd 1713168404Spjd dmu_objset_name(os, osname); 1714168404Spjd 1715168404Spjd endoff = -8ULL; 1716168404Spjd batchsize = 2; 1717168404Spjd 1718168404Spjd /* 1719168404Spjd * Create a batch object if necessary, and record it in the directory. 1720168404Spjd */ 1721185029Spjd VERIFY3U(0, ==, dmu_read(os, ZTEST_DIROBJ, za->za_diroff, 1722168404Spjd sizeof (uint64_t), &batchobj)); 1723168404Spjd if (batchobj == 0) { 1724168404Spjd tx = dmu_tx_create(os); 1725168404Spjd dmu_tx_hold_write(tx, ZTEST_DIROBJ, za->za_diroff, 1726168404Spjd sizeof (uint64_t)); 1727168404Spjd dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); 1728168404Spjd error = dmu_tx_assign(tx, TXG_WAIT); 1729168404Spjd if (error) { 1730168404Spjd ztest_record_enospc("create a batch object"); 1731168404Spjd dmu_tx_abort(tx); 1732168404Spjd return; 1733168404Spjd } 1734168404Spjd batchobj = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0, 1735168404Spjd DMU_OT_NONE, 0, tx); 1736168404Spjd ztest_set_random_blocksize(os, batchobj, tx); 1737168404Spjd dmu_write(os, ZTEST_DIROBJ, za->za_diroff, 1738168404Spjd sizeof (uint64_t), &batchobj, tx); 1739168404Spjd dmu_tx_commit(tx); 1740168404Spjd } 1741168404Spjd 1742168404Spjd /* 1743168404Spjd * Destroy the previous batch of objects. 1744168404Spjd */ 1745168404Spjd for (b = 0; b < batchsize; b++) { 1746185029Spjd VERIFY3U(0, ==, dmu_read(os, batchobj, b * sizeof (uint64_t), 1747168404Spjd sizeof (uint64_t), &object)); 1748168404Spjd if (object == 0) 1749168404Spjd continue; 1750168404Spjd /* 1751168404Spjd * Read and validate contents. 1752168404Spjd * We expect the nth byte of the bonus buffer to be n. 1753168404Spjd */ 1754168404Spjd VERIFY(0 == dmu_bonus_hold(os, object, FTAG, &db)); 1755185029Spjd za->za_dbuf = db; 1756168404Spjd 1757185029Spjd dmu_object_info_from_db(db, doi); 1758185029Spjd ASSERT(doi->doi_type == DMU_OT_UINT64_OTHER); 1759185029Spjd ASSERT(doi->doi_bonus_type == DMU_OT_PLAIN_OTHER); 1760185029Spjd ASSERT3S(doi->doi_physical_blks, >=, 0); 1761168404Spjd 1762185029Spjd bonuslen = doi->doi_bonus_size; 1763168404Spjd 1764168404Spjd for (c = 0; c < bonuslen; c++) { 1765168404Spjd if (((uint8_t *)db->db_data)[c] != 1766168404Spjd (uint8_t)(c + bonuslen)) { 1767168404Spjd fatal(0, 1768168404Spjd "bad bonus: %s, obj %llu, off %d: %u != %u", 1769168404Spjd osname, object, c, 1770168404Spjd ((uint8_t *)db->db_data)[c], 1771168404Spjd (uint8_t)(c + bonuslen)); 1772168404Spjd } 1773168404Spjd } 1774168404Spjd 1775168404Spjd dmu_buf_rele(db, FTAG); 1776185029Spjd za->za_dbuf = NULL; 1777168404Spjd 1778168404Spjd /* 1779168404Spjd * We expect the word at endoff to be our object number. 1780168404Spjd */ 1781168404Spjd VERIFY(0 == dmu_read(os, object, endoff, 1782168404Spjd sizeof (uint64_t), &temp)); 1783168404Spjd 1784168404Spjd if (temp != object) { 1785168404Spjd fatal(0, "bad data in %s, got %llu, expected %llu", 1786168404Spjd osname, temp, object); 1787168404Spjd } 1788168404Spjd 1789168404Spjd /* 1790168404Spjd * Destroy old object and clear batch entry. 1791168404Spjd */ 1792168404Spjd tx = dmu_tx_create(os); 1793168404Spjd dmu_tx_hold_write(tx, batchobj, 1794168404Spjd b * sizeof (uint64_t), sizeof (uint64_t)); 1795168404Spjd dmu_tx_hold_free(tx, object, 0, DMU_OBJECT_END); 1796168404Spjd error = dmu_tx_assign(tx, TXG_WAIT); 1797168404Spjd if (error) { 1798168404Spjd ztest_record_enospc("free object"); 1799168404Spjd dmu_tx_abort(tx); 1800168404Spjd return; 1801168404Spjd } 1802168404Spjd error = dmu_object_free(os, object, tx); 1803168404Spjd if (error) { 1804168404Spjd fatal(0, "dmu_object_free('%s', %llu) = %d", 1805168404Spjd osname, object, error); 1806168404Spjd } 1807168404Spjd object = 0; 1808168404Spjd 1809168404Spjd dmu_object_set_checksum(os, batchobj, 1810168404Spjd ztest_random_checksum(), tx); 1811168404Spjd dmu_object_set_compress(os, batchobj, 1812168404Spjd ztest_random_compress(), tx); 1813168404Spjd 1814168404Spjd dmu_write(os, batchobj, b * sizeof (uint64_t), 1815168404Spjd sizeof (uint64_t), &object, tx); 1816168404Spjd 1817168404Spjd dmu_tx_commit(tx); 1818168404Spjd } 1819168404Spjd 1820168404Spjd /* 1821168404Spjd * Before creating the new batch of objects, generate a bunch of churn. 1822168404Spjd */ 1823168404Spjd for (b = ztest_random(100); b > 0; b--) { 1824168404Spjd tx = dmu_tx_create(os); 1825168404Spjd dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); 1826168404Spjd error = dmu_tx_assign(tx, TXG_WAIT); 1827168404Spjd if (error) { 1828168404Spjd ztest_record_enospc("churn objects"); 1829168404Spjd dmu_tx_abort(tx); 1830168404Spjd return; 1831168404Spjd } 1832168404Spjd object = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0, 1833168404Spjd DMU_OT_NONE, 0, tx); 1834168404Spjd ztest_set_random_blocksize(os, object, tx); 1835168404Spjd error = dmu_object_free(os, object, tx); 1836168404Spjd if (error) { 1837168404Spjd fatal(0, "dmu_object_free('%s', %llu) = %d", 1838168404Spjd osname, object, error); 1839168404Spjd } 1840168404Spjd dmu_tx_commit(tx); 1841168404Spjd } 1842168404Spjd 1843168404Spjd /* 1844168404Spjd * Create a new batch of objects with randomly chosen 1845168404Spjd * blocksizes and record them in the batch directory. 1846168404Spjd */ 1847168404Spjd for (b = 0; b < batchsize; b++) { 1848168404Spjd uint32_t va_blksize; 1849168404Spjd u_longlong_t va_nblocks; 1850168404Spjd 1851168404Spjd tx = dmu_tx_create(os); 1852168404Spjd dmu_tx_hold_write(tx, batchobj, b * sizeof (uint64_t), 1853168404Spjd sizeof (uint64_t)); 1854168404Spjd dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); 1855168404Spjd dmu_tx_hold_write(tx, DMU_NEW_OBJECT, endoff, 1856168404Spjd sizeof (uint64_t)); 1857168404Spjd error = dmu_tx_assign(tx, TXG_WAIT); 1858168404Spjd if (error) { 1859168404Spjd ztest_record_enospc("create batchobj"); 1860168404Spjd dmu_tx_abort(tx); 1861168404Spjd return; 1862168404Spjd } 1863168404Spjd bonuslen = (int)ztest_random(dmu_bonus_max()) + 1; 1864168404Spjd 1865168404Spjd object = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0, 1866168404Spjd DMU_OT_PLAIN_OTHER, bonuslen, tx); 1867168404Spjd 1868168404Spjd ztest_set_random_blocksize(os, object, tx); 1869168404Spjd 1870168404Spjd dmu_object_set_checksum(os, object, 1871168404Spjd ztest_random_checksum(), tx); 1872168404Spjd dmu_object_set_compress(os, object, 1873168404Spjd ztest_random_compress(), tx); 1874168404Spjd 1875168404Spjd dmu_write(os, batchobj, b * sizeof (uint64_t), 1876168404Spjd sizeof (uint64_t), &object, tx); 1877168404Spjd 1878168404Spjd /* 1879168404Spjd * Write to both the bonus buffer and the regular data. 1880168404Spjd */ 1881185029Spjd VERIFY(dmu_bonus_hold(os, object, FTAG, &db) == 0); 1882185029Spjd za->za_dbuf = db; 1883185029Spjd ASSERT3U(bonuslen, <=, db->db_size); 1884168404Spjd 1885168404Spjd dmu_object_size_from_db(db, &va_blksize, &va_nblocks); 1886168404Spjd ASSERT3S(va_nblocks, >=, 0); 1887168404Spjd 1888168404Spjd dmu_buf_will_dirty(db, tx); 1889168404Spjd 1890168404Spjd /* 1891168404Spjd * See comments above regarding the contents of 1892168404Spjd * the bonus buffer and the word at endoff. 1893168404Spjd */ 1894185029Spjd for (c = 0; c < bonuslen; c++) 1895168404Spjd ((uint8_t *)db->db_data)[c] = (uint8_t)(c + bonuslen); 1896168404Spjd 1897168404Spjd dmu_buf_rele(db, FTAG); 1898185029Spjd za->za_dbuf = NULL; 1899168404Spjd 1900168404Spjd /* 1901168404Spjd * Write to a large offset to increase indirection. 1902168404Spjd */ 1903168404Spjd dmu_write(os, object, endoff, sizeof (uint64_t), &object, tx); 1904168404Spjd 1905168404Spjd dmu_tx_commit(tx); 1906168404Spjd } 1907168404Spjd} 1908168404Spjd 1909168404Spjd/* 1910168404Spjd * Verify that dmu_{read,write} work as expected. 1911168404Spjd */ 1912168404Spjdtypedef struct bufwad { 1913168404Spjd uint64_t bw_index; 1914168404Spjd uint64_t bw_txg; 1915168404Spjd uint64_t bw_data; 1916168404Spjd} bufwad_t; 1917168404Spjd 1918168404Spjdtypedef struct dmu_read_write_dir { 1919168404Spjd uint64_t dd_packobj; 1920168404Spjd uint64_t dd_bigobj; 1921168404Spjd uint64_t dd_chunk; 1922168404Spjd} dmu_read_write_dir_t; 1923168404Spjd 1924168404Spjdvoid 1925168404Spjdztest_dmu_read_write(ztest_args_t *za) 1926168404Spjd{ 1927168404Spjd objset_t *os = za->za_os; 1928168404Spjd dmu_read_write_dir_t dd; 1929168404Spjd dmu_tx_t *tx; 1930168404Spjd int i, freeit, error; 1931168404Spjd uint64_t n, s, txg; 1932168404Spjd bufwad_t *packbuf, *bigbuf, *pack, *bigH, *bigT; 1933168404Spjd uint64_t packoff, packsize, bigoff, bigsize; 1934168404Spjd uint64_t regions = 997; 1935168404Spjd uint64_t stride = 123456789ULL; 1936168404Spjd uint64_t width = 40; 1937168404Spjd int free_percent = 5; 1938168404Spjd 1939168404Spjd /* 1940168404Spjd * This test uses two objects, packobj and bigobj, that are always 1941168404Spjd * updated together (i.e. in the same tx) so that their contents are 1942168404Spjd * in sync and can be compared. Their contents relate to each other 1943168404Spjd * in a simple way: packobj is a dense array of 'bufwad' structures, 1944168404Spjd * while bigobj is a sparse array of the same bufwads. Specifically, 1945168404Spjd * for any index n, there are three bufwads that should be identical: 1946168404Spjd * 1947168404Spjd * packobj, at offset n * sizeof (bufwad_t) 1948168404Spjd * bigobj, at the head of the nth chunk 1949168404Spjd * bigobj, at the tail of the nth chunk 1950168404Spjd * 1951168404Spjd * The chunk size is arbitrary. It doesn't have to be a power of two, 1952168404Spjd * and it doesn't have any relation to the object blocksize. 1953168404Spjd * The only requirement is that it can hold at least two bufwads. 1954168404Spjd * 1955168404Spjd * Normally, we write the bufwad to each of these locations. 1956168404Spjd * However, free_percent of the time we instead write zeroes to 1957168404Spjd * packobj and perform a dmu_free_range() on bigobj. By comparing 1958168404Spjd * bigobj to packobj, we can verify that the DMU is correctly 1959168404Spjd * tracking which parts of an object are allocated and free, 1960168404Spjd * and that the contents of the allocated blocks are correct. 1961168404Spjd */ 1962168404Spjd 1963168404Spjd /* 1964168404Spjd * Read the directory info. If it's the first time, set things up. 1965168404Spjd */ 1966168404Spjd VERIFY(0 == dmu_read(os, ZTEST_DIROBJ, za->za_diroff, 1967168404Spjd sizeof (dd), &dd)); 1968168404Spjd if (dd.dd_chunk == 0) { 1969168404Spjd ASSERT(dd.dd_packobj == 0); 1970168404Spjd ASSERT(dd.dd_bigobj == 0); 1971168404Spjd tx = dmu_tx_create(os); 1972168404Spjd dmu_tx_hold_write(tx, ZTEST_DIROBJ, za->za_diroff, sizeof (dd)); 1973168404Spjd dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); 1974168404Spjd error = dmu_tx_assign(tx, TXG_WAIT); 1975168404Spjd if (error) { 1976168404Spjd ztest_record_enospc("create r/w directory"); 1977168404Spjd dmu_tx_abort(tx); 1978168404Spjd return; 1979168404Spjd } 1980168404Spjd 1981168404Spjd dd.dd_packobj = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0, 1982168404Spjd DMU_OT_NONE, 0, tx); 1983168404Spjd dd.dd_bigobj = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0, 1984168404Spjd DMU_OT_NONE, 0, tx); 1985168404Spjd dd.dd_chunk = (1000 + ztest_random(1000)) * sizeof (uint64_t); 1986168404Spjd 1987168404Spjd ztest_set_random_blocksize(os, dd.dd_packobj, tx); 1988168404Spjd ztest_set_random_blocksize(os, dd.dd_bigobj, tx); 1989168404Spjd 1990168404Spjd dmu_write(os, ZTEST_DIROBJ, za->za_diroff, sizeof (dd), &dd, 1991168404Spjd tx); 1992168404Spjd dmu_tx_commit(tx); 1993168404Spjd } 1994168404Spjd 1995168404Spjd /* 1996168404Spjd * Prefetch a random chunk of the big object. 1997168404Spjd * Our aim here is to get some async reads in flight 1998168404Spjd * for blocks that we may free below; the DMU should 1999168404Spjd * handle this race correctly. 2000168404Spjd */ 2001168404Spjd n = ztest_random(regions) * stride + ztest_random(width); 2002168404Spjd s = 1 + ztest_random(2 * width - 1); 2003168404Spjd dmu_prefetch(os, dd.dd_bigobj, n * dd.dd_chunk, s * dd.dd_chunk); 2004168404Spjd 2005168404Spjd /* 2006168404Spjd * Pick a random index and compute the offsets into packobj and bigobj. 2007168404Spjd */ 2008168404Spjd n = ztest_random(regions) * stride + ztest_random(width); 2009168404Spjd s = 1 + ztest_random(width - 1); 2010168404Spjd 2011168404Spjd packoff = n * sizeof (bufwad_t); 2012168404Spjd packsize = s * sizeof (bufwad_t); 2013168404Spjd 2014168404Spjd bigoff = n * dd.dd_chunk; 2015168404Spjd bigsize = s * dd.dd_chunk; 2016168404Spjd 2017168404Spjd packbuf = umem_alloc(packsize, UMEM_NOFAIL); 2018168404Spjd bigbuf = umem_alloc(bigsize, UMEM_NOFAIL); 2019168404Spjd 2020168404Spjd /* 2021168404Spjd * free_percent of the time, free a range of bigobj rather than 2022168404Spjd * overwriting it. 2023168404Spjd */ 2024168404Spjd freeit = (ztest_random(100) < free_percent); 2025168404Spjd 2026168404Spjd /* 2027168404Spjd * Read the current contents of our objects. 2028168404Spjd */ 2029168404Spjd error = dmu_read(os, dd.dd_packobj, packoff, packsize, packbuf); 2030168404Spjd ASSERT3U(error, ==, 0); 2031168404Spjd error = dmu_read(os, dd.dd_bigobj, bigoff, bigsize, bigbuf); 2032168404Spjd ASSERT3U(error, ==, 0); 2033168404Spjd 2034168404Spjd /* 2035168404Spjd * Get a tx for the mods to both packobj and bigobj. 2036168404Spjd */ 2037168404Spjd tx = dmu_tx_create(os); 2038168404Spjd 2039168404Spjd dmu_tx_hold_write(tx, dd.dd_packobj, packoff, packsize); 2040168404Spjd 2041168404Spjd if (freeit) 2042168404Spjd dmu_tx_hold_free(tx, dd.dd_bigobj, bigoff, bigsize); 2043168404Spjd else 2044168404Spjd dmu_tx_hold_write(tx, dd.dd_bigobj, bigoff, bigsize); 2045168404Spjd 2046168404Spjd error = dmu_tx_assign(tx, TXG_WAIT); 2047168404Spjd 2048168404Spjd if (error) { 2049168404Spjd ztest_record_enospc("dmu r/w range"); 2050168404Spjd dmu_tx_abort(tx); 2051168404Spjd umem_free(packbuf, packsize); 2052168404Spjd umem_free(bigbuf, bigsize); 2053168404Spjd return; 2054168404Spjd } 2055168404Spjd 2056168404Spjd txg = dmu_tx_get_txg(tx); 2057168404Spjd 2058168404Spjd /* 2059168404Spjd * For each index from n to n + s, verify that the existing bufwad 2060168404Spjd * in packobj matches the bufwads at the head and tail of the 2061168404Spjd * corresponding chunk in bigobj. Then update all three bufwads 2062168404Spjd * with the new values we want to write out. 2063168404Spjd */ 2064168404Spjd for (i = 0; i < s; i++) { 2065168404Spjd /* LINTED */ 2066168404Spjd pack = (bufwad_t *)((char *)packbuf + i * sizeof (bufwad_t)); 2067168404Spjd /* LINTED */ 2068168404Spjd bigH = (bufwad_t *)((char *)bigbuf + i * dd.dd_chunk); 2069168404Spjd /* LINTED */ 2070168404Spjd bigT = (bufwad_t *)((char *)bigH + dd.dd_chunk) - 1; 2071168404Spjd 2072168404Spjd ASSERT((uintptr_t)bigH - (uintptr_t)bigbuf < bigsize); 2073168404Spjd ASSERT((uintptr_t)bigT - (uintptr_t)bigbuf < bigsize); 2074168404Spjd 2075168404Spjd if (pack->bw_txg > txg) 2076168404Spjd fatal(0, "future leak: got %llx, open txg is %llx", 2077168404Spjd pack->bw_txg, txg); 2078168404Spjd 2079168404Spjd if (pack->bw_data != 0 && pack->bw_index != n + i) 2080168404Spjd fatal(0, "wrong index: got %llx, wanted %llx+%llx", 2081168404Spjd pack->bw_index, n, i); 2082168404Spjd 2083168404Spjd if (bcmp(pack, bigH, sizeof (bufwad_t)) != 0) 2084168404Spjd fatal(0, "pack/bigH mismatch in %p/%p", pack, bigH); 2085168404Spjd 2086168404Spjd if (bcmp(pack, bigT, sizeof (bufwad_t)) != 0) 2087168404Spjd fatal(0, "pack/bigT mismatch in %p/%p", pack, bigT); 2088168404Spjd 2089168404Spjd if (freeit) { 2090168404Spjd bzero(pack, sizeof (bufwad_t)); 2091168404Spjd } else { 2092168404Spjd pack->bw_index = n + i; 2093168404Spjd pack->bw_txg = txg; 2094168404Spjd pack->bw_data = 1 + ztest_random(-2ULL); 2095168404Spjd } 2096168404Spjd *bigH = *pack; 2097168404Spjd *bigT = *pack; 2098168404Spjd } 2099168404Spjd 2100168404Spjd /* 2101168404Spjd * We've verified all the old bufwads, and made new ones. 2102168404Spjd * Now write them out. 2103168404Spjd */ 2104168404Spjd dmu_write(os, dd.dd_packobj, packoff, packsize, packbuf, tx); 2105168404Spjd 2106168404Spjd if (freeit) { 2107168404Spjd if (zopt_verbose >= 6) { 2108168404Spjd (void) printf("freeing offset %llx size %llx" 2109168404Spjd " txg %llx\n", 2110168404Spjd (u_longlong_t)bigoff, 2111168404Spjd (u_longlong_t)bigsize, 2112168404Spjd (u_longlong_t)txg); 2113168404Spjd } 2114168404Spjd VERIFY(0 == dmu_free_range(os, dd.dd_bigobj, bigoff, 2115168404Spjd bigsize, tx)); 2116168404Spjd } else { 2117168404Spjd if (zopt_verbose >= 6) { 2118168404Spjd (void) printf("writing offset %llx size %llx" 2119168404Spjd " txg %llx\n", 2120168404Spjd (u_longlong_t)bigoff, 2121168404Spjd (u_longlong_t)bigsize, 2122168404Spjd (u_longlong_t)txg); 2123168404Spjd } 2124168404Spjd dmu_write(os, dd.dd_bigobj, bigoff, bigsize, bigbuf, tx); 2125168404Spjd } 2126168404Spjd 2127168404Spjd dmu_tx_commit(tx); 2128168404Spjd 2129168404Spjd /* 2130168404Spjd * Sanity check the stuff we just wrote. 2131168404Spjd */ 2132168404Spjd { 2133168404Spjd void *packcheck = umem_alloc(packsize, UMEM_NOFAIL); 2134168404Spjd void *bigcheck = umem_alloc(bigsize, UMEM_NOFAIL); 2135168404Spjd 2136168404Spjd VERIFY(0 == dmu_read(os, dd.dd_packobj, packoff, 2137168404Spjd packsize, packcheck)); 2138168404Spjd VERIFY(0 == dmu_read(os, dd.dd_bigobj, bigoff, 2139168404Spjd bigsize, bigcheck)); 2140168404Spjd 2141168404Spjd ASSERT(bcmp(packbuf, packcheck, packsize) == 0); 2142168404Spjd ASSERT(bcmp(bigbuf, bigcheck, bigsize) == 0); 2143168404Spjd 2144168404Spjd umem_free(packcheck, packsize); 2145168404Spjd umem_free(bigcheck, bigsize); 2146168404Spjd } 2147168404Spjd 2148168404Spjd umem_free(packbuf, packsize); 2149168404Spjd umem_free(bigbuf, bigsize); 2150168404Spjd} 2151168404Spjd 2152168404Spjdvoid 2153185029Spjdztest_dmu_check_future_leak(ztest_args_t *za) 2154168404Spjd{ 2155185029Spjd objset_t *os = za->za_os; 2156168404Spjd dmu_buf_t *db; 2157185029Spjd ztest_block_tag_t *bt; 2158185029Spjd dmu_object_info_t *doi = &za->za_doi; 2159168404Spjd 2160168404Spjd /* 2161168404Spjd * Make sure that, if there is a write record in the bonus buffer 2162168404Spjd * of the ZTEST_DIROBJ, that the txg for this record is <= the 2163168404Spjd * last synced txg of the pool. 2164168404Spjd */ 2165185029Spjd VERIFY(dmu_bonus_hold(os, ZTEST_DIROBJ, FTAG, &db) == 0); 2166185029Spjd za->za_dbuf = db; 2167185029Spjd VERIFY(dmu_object_info(os, ZTEST_DIROBJ, doi) == 0); 2168185029Spjd ASSERT3U(doi->doi_bonus_size, >=, sizeof (*bt)); 2169185029Spjd ASSERT3U(doi->doi_bonus_size, <=, db->db_size); 2170185029Spjd ASSERT3U(doi->doi_bonus_size % sizeof (*bt), ==, 0); 2171185029Spjd bt = (void *)((char *)db->db_data + doi->doi_bonus_size - sizeof (*bt)); 2172185029Spjd if (bt->bt_objset != 0) { 2173185029Spjd ASSERT3U(bt->bt_objset, ==, dmu_objset_id(os)); 2174185029Spjd ASSERT3U(bt->bt_object, ==, ZTEST_DIROBJ); 2175185029Spjd ASSERT3U(bt->bt_offset, ==, -1ULL); 2176185029Spjd ASSERT3U(bt->bt_txg, <, spa_first_txg(za->za_spa)); 2177168404Spjd } 2178168404Spjd dmu_buf_rele(db, FTAG); 2179185029Spjd za->za_dbuf = NULL; 2180168404Spjd} 2181168404Spjd 2182168404Spjdvoid 2183168404Spjdztest_dmu_write_parallel(ztest_args_t *za) 2184168404Spjd{ 2185168404Spjd objset_t *os = za->za_os; 2186185029Spjd ztest_block_tag_t *rbt = &za->za_rbt; 2187185029Spjd ztest_block_tag_t *wbt = &za->za_wbt; 2188185029Spjd const size_t btsize = sizeof (ztest_block_tag_t); 2189168404Spjd dmu_buf_t *db; 2190185029Spjd int b, error; 2191185029Spjd int bs = ZTEST_DIROBJ_BLOCKSIZE; 2192185029Spjd int do_free = 0; 2193185029Spjd uint64_t off, txg, txg_how; 2194168404Spjd mutex_t *lp; 2195168404Spjd char osname[MAXNAMELEN]; 2196168404Spjd char iobuf[SPA_MAXBLOCKSIZE]; 2197185029Spjd blkptr_t blk = { 0 }; 2198185029Spjd uint64_t blkoff; 2199185029Spjd zbookmark_t zb; 2200185029Spjd dmu_tx_t *tx = dmu_tx_create(os); 2201168404Spjd 2202168404Spjd dmu_objset_name(os, osname); 2203168404Spjd 2204168404Spjd /* 2205168404Spjd * Have multiple threads write to large offsets in ZTEST_DIROBJ 2206168404Spjd * to verify that having multiple threads writing to the same object 2207168404Spjd * in parallel doesn't cause any trouble. 2208168404Spjd */ 2209185029Spjd if (ztest_random(4) == 0) { 2210185029Spjd /* 2211185029Spjd * Do the bonus buffer instead of a regular block. 2212185029Spjd * We need a lock to serialize resize vs. others, 2213185029Spjd * so we hash on the objset ID. 2214185029Spjd */ 2215185029Spjd b = dmu_objset_id(os) % ZTEST_SYNC_LOCKS; 2216185029Spjd off = -1ULL; 2217185029Spjd dmu_tx_hold_bonus(tx, ZTEST_DIROBJ); 2218185029Spjd } else { 2219168404Spjd b = ztest_random(ZTEST_SYNC_LOCKS); 2220185029Spjd off = za->za_diroff_shared + (b << SPA_MAXBLOCKSHIFT); 2221185029Spjd if (ztest_random(4) == 0) { 2222185029Spjd do_free = 1; 2223185029Spjd dmu_tx_hold_free(tx, ZTEST_DIROBJ, off, bs); 2224185029Spjd } else { 2225185029Spjd dmu_tx_hold_write(tx, ZTEST_DIROBJ, off, bs); 2226185029Spjd } 2227185029Spjd } 2228168404Spjd 2229185029Spjd txg_how = ztest_random(2) == 0 ? TXG_WAIT : TXG_NOWAIT; 2230185029Spjd error = dmu_tx_assign(tx, txg_how); 2231185029Spjd if (error) { 2232185029Spjd if (error == ERESTART) { 2233185029Spjd ASSERT(txg_how == TXG_NOWAIT); 2234185029Spjd dmu_tx_wait(tx); 2235185029Spjd } else { 2236185029Spjd ztest_record_enospc("dmu write parallel"); 2237185029Spjd } 2238185029Spjd dmu_tx_abort(tx); 2239185029Spjd return; 2240185029Spjd } 2241185029Spjd txg = dmu_tx_get_txg(tx); 2242168404Spjd 2243185029Spjd lp = &ztest_shared->zs_sync_lock[b]; 2244185029Spjd (void) mutex_lock(lp); 2245168404Spjd 2246185029Spjd wbt->bt_objset = dmu_objset_id(os); 2247185029Spjd wbt->bt_object = ZTEST_DIROBJ; 2248185029Spjd wbt->bt_offset = off; 2249185029Spjd wbt->bt_txg = txg; 2250185029Spjd wbt->bt_thread = za->za_instance; 2251185029Spjd wbt->bt_seq = ztest_shared->zs_seq[b]++; /* protected by lp */ 2252168404Spjd 2253185029Spjd /* 2254185029Spjd * Occasionally, write an all-zero block to test the behavior 2255185029Spjd * of blocks that compress into holes. 2256185029Spjd */ 2257185029Spjd if (off != -1ULL && ztest_random(8) == 0) 2258185029Spjd bzero(wbt, btsize); 2259168404Spjd 2260185029Spjd if (off == -1ULL) { 2261185029Spjd dmu_object_info_t *doi = &za->za_doi; 2262185029Spjd char *dboff; 2263168404Spjd 2264185029Spjd VERIFY(dmu_bonus_hold(os, ZTEST_DIROBJ, FTAG, &db) == 0); 2265185029Spjd za->za_dbuf = db; 2266185029Spjd dmu_object_info_from_db(db, doi); 2267185029Spjd ASSERT3U(doi->doi_bonus_size, <=, db->db_size); 2268185029Spjd ASSERT3U(doi->doi_bonus_size, >=, btsize); 2269185029Spjd ASSERT3U(doi->doi_bonus_size % btsize, ==, 0); 2270185029Spjd dboff = (char *)db->db_data + doi->doi_bonus_size - btsize; 2271185029Spjd bcopy(dboff, rbt, btsize); 2272185029Spjd if (rbt->bt_objset != 0) { 2273185029Spjd ASSERT3U(rbt->bt_objset, ==, wbt->bt_objset); 2274185029Spjd ASSERT3U(rbt->bt_object, ==, wbt->bt_object); 2275185029Spjd ASSERT3U(rbt->bt_offset, ==, wbt->bt_offset); 2276185029Spjd ASSERT3U(rbt->bt_txg, <=, wbt->bt_txg); 2277168404Spjd } 2278185029Spjd if (ztest_random(10) == 0) { 2279185029Spjd int newsize = (ztest_random(db->db_size / 2280185029Spjd btsize) + 1) * btsize; 2281168404Spjd 2282185029Spjd ASSERT3U(newsize, >=, btsize); 2283185029Spjd ASSERT3U(newsize, <=, db->db_size); 2284185029Spjd VERIFY3U(dmu_set_bonus(db, newsize, tx), ==, 0); 2285185029Spjd dboff = (char *)db->db_data + newsize - btsize; 2286168404Spjd } 2287185029Spjd dmu_buf_will_dirty(db, tx); 2288185029Spjd bcopy(wbt, dboff, btsize); 2289185029Spjd dmu_buf_rele(db, FTAG); 2290185029Spjd za->za_dbuf = NULL; 2291185029Spjd } else if (do_free) { 2292185029Spjd VERIFY(dmu_free_range(os, ZTEST_DIROBJ, off, bs, tx) == 0); 2293185029Spjd } else { 2294185029Spjd dmu_write(os, ZTEST_DIROBJ, off, btsize, wbt, tx); 2295185029Spjd } 2296168404Spjd 2297185029Spjd (void) mutex_unlock(lp); 2298168404Spjd 2299185029Spjd if (ztest_random(1000) == 0) 2300185029Spjd (void) poll(NULL, 0, 1); /* open dn_notxholds window */ 2301168404Spjd 2302185029Spjd dmu_tx_commit(tx); 2303168404Spjd 2304185029Spjd if (ztest_random(10000) == 0) 2305185029Spjd txg_wait_synced(dmu_objset_pool(os), txg); 2306168404Spjd 2307185029Spjd if (off == -1ULL || do_free) 2308185029Spjd return; 2309168404Spjd 2310185029Spjd if (ztest_random(2) != 0) 2311185029Spjd return; 2312185029Spjd 2313185029Spjd /* 2314185029Spjd * dmu_sync() the block we just wrote. 2315185029Spjd */ 2316185029Spjd (void) mutex_lock(lp); 2317185029Spjd 2318185029Spjd blkoff = P2ALIGN_TYPED(off, bs, uint64_t); 2319185029Spjd error = dmu_buf_hold(os, ZTEST_DIROBJ, blkoff, FTAG, &db); 2320185029Spjd za->za_dbuf = db; 2321185029Spjd if (error) { 2322185029Spjd dprintf("dmu_buf_hold(%s, %d, %llx) = %d\n", 2323185029Spjd osname, ZTEST_DIROBJ, blkoff, error); 2324168404Spjd (void) mutex_unlock(lp); 2325185029Spjd return; 2326185029Spjd } 2327185029Spjd blkoff = off - blkoff; 2328185029Spjd error = dmu_sync(NULL, db, &blk, txg, NULL, NULL); 2329185029Spjd dmu_buf_rele(db, FTAG); 2330185029Spjd za->za_dbuf = NULL; 2331168404Spjd 2332185029Spjd (void) mutex_unlock(lp); 2333168404Spjd 2334185029Spjd if (error) { 2335185029Spjd dprintf("dmu_sync(%s, %d, %llx) = %d\n", 2336185029Spjd osname, ZTEST_DIROBJ, off, error); 2337185029Spjd return; 2338185029Spjd } 2339168404Spjd 2340185029Spjd if (blk.blk_birth == 0) /* concurrent free */ 2341185029Spjd return; 2342168404Spjd 2343185029Spjd txg_suspend(dmu_objset_pool(os)); 2344168404Spjd 2345185029Spjd ASSERT(blk.blk_fill == 1); 2346185029Spjd ASSERT3U(BP_GET_TYPE(&blk), ==, DMU_OT_UINT64_OTHER); 2347185029Spjd ASSERT3U(BP_GET_LEVEL(&blk), ==, 0); 2348185029Spjd ASSERT3U(BP_GET_LSIZE(&blk), ==, bs); 2349168404Spjd 2350185029Spjd /* 2351185029Spjd * Read the block that dmu_sync() returned to make sure its contents 2352185029Spjd * match what we wrote. We do this while still txg_suspend()ed 2353185029Spjd * to ensure that the block can't be reused before we read it. 2354185029Spjd */ 2355185029Spjd zb.zb_objset = dmu_objset_id(os); 2356185029Spjd zb.zb_object = ZTEST_DIROBJ; 2357185029Spjd zb.zb_level = 0; 2358185029Spjd zb.zb_blkid = off / bs; 2359185029Spjd error = zio_wait(zio_read(NULL, za->za_spa, &blk, iobuf, bs, 2360185029Spjd NULL, NULL, ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_MUSTSUCCEED, &zb)); 2361185029Spjd ASSERT3U(error, ==, 0); 2362168404Spjd 2363185029Spjd txg_resume(dmu_objset_pool(os)); 2364168404Spjd 2365185029Spjd bcopy(&iobuf[blkoff], rbt, btsize); 2366168404Spjd 2367185029Spjd if (rbt->bt_objset == 0) /* concurrent free */ 2368185029Spjd return; 2369168404Spjd 2370185029Spjd if (wbt->bt_objset == 0) /* all-zero overwrite */ 2371185029Spjd return; 2372168404Spjd 2373185029Spjd ASSERT3U(rbt->bt_objset, ==, wbt->bt_objset); 2374185029Spjd ASSERT3U(rbt->bt_object, ==, wbt->bt_object); 2375185029Spjd ASSERT3U(rbt->bt_offset, ==, wbt->bt_offset); 2376168404Spjd 2377185029Spjd /* 2378185029Spjd * The semantic of dmu_sync() is that we always push the most recent 2379185029Spjd * version of the data, so in the face of concurrent updates we may 2380185029Spjd * see a newer version of the block. That's OK. 2381185029Spjd */ 2382185029Spjd ASSERT3U(rbt->bt_txg, >=, wbt->bt_txg); 2383185029Spjd if (rbt->bt_thread == wbt->bt_thread) 2384185029Spjd ASSERT3U(rbt->bt_seq, ==, wbt->bt_seq); 2385185029Spjd else 2386185029Spjd ASSERT3U(rbt->bt_seq, >, wbt->bt_seq); 2387168404Spjd} 2388168404Spjd 2389168404Spjd/* 2390168404Spjd * Verify that zap_{create,destroy,add,remove,update} work as expected. 2391168404Spjd */ 2392168404Spjd#define ZTEST_ZAP_MIN_INTS 1 2393168404Spjd#define ZTEST_ZAP_MAX_INTS 4 2394168404Spjd#define ZTEST_ZAP_MAX_PROPS 1000 2395168404Spjd 2396168404Spjdvoid 2397168404Spjdztest_zap(ztest_args_t *za) 2398168404Spjd{ 2399168404Spjd objset_t *os = za->za_os; 2400168404Spjd uint64_t object; 2401168404Spjd uint64_t txg, last_txg; 2402168404Spjd uint64_t value[ZTEST_ZAP_MAX_INTS]; 2403168404Spjd uint64_t zl_ints, zl_intsize, prop; 2404168404Spjd int i, ints; 2405168404Spjd dmu_tx_t *tx; 2406168404Spjd char propname[100], txgname[100]; 2407168404Spjd int error; 2408168404Spjd char osname[MAXNAMELEN]; 2409168404Spjd char *hc[2] = { "s.acl.h", ".s.open.h.hyLZlg" }; 2410168404Spjd 2411168404Spjd dmu_objset_name(os, osname); 2412168404Spjd 2413168404Spjd /* 2414168404Spjd * Create a new object if necessary, and record it in the directory. 2415168404Spjd */ 2416168404Spjd VERIFY(0 == dmu_read(os, ZTEST_DIROBJ, za->za_diroff, 2417168404Spjd sizeof (uint64_t), &object)); 2418168404Spjd 2419168404Spjd if (object == 0) { 2420168404Spjd tx = dmu_tx_create(os); 2421168404Spjd dmu_tx_hold_write(tx, ZTEST_DIROBJ, za->za_diroff, 2422168404Spjd sizeof (uint64_t)); 2423168404Spjd dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, TRUE, NULL); 2424168404Spjd error = dmu_tx_assign(tx, TXG_WAIT); 2425168404Spjd if (error) { 2426168404Spjd ztest_record_enospc("create zap test obj"); 2427168404Spjd dmu_tx_abort(tx); 2428168404Spjd return; 2429168404Spjd } 2430168404Spjd object = zap_create(os, DMU_OT_ZAP_OTHER, DMU_OT_NONE, 0, tx); 2431168404Spjd if (error) { 2432168404Spjd fatal(0, "zap_create('%s', %llu) = %d", 2433168404Spjd osname, object, error); 2434168404Spjd } 2435168404Spjd ASSERT(object != 0); 2436168404Spjd dmu_write(os, ZTEST_DIROBJ, za->za_diroff, 2437168404Spjd sizeof (uint64_t), &object, tx); 2438168404Spjd /* 2439168404Spjd * Generate a known hash collision, and verify that 2440168404Spjd * we can lookup and remove both entries. 2441168404Spjd */ 2442168404Spjd for (i = 0; i < 2; i++) { 2443168404Spjd value[i] = i; 2444168404Spjd error = zap_add(os, object, hc[i], sizeof (uint64_t), 2445168404Spjd 1, &value[i], tx); 2446168404Spjd ASSERT3U(error, ==, 0); 2447168404Spjd } 2448168404Spjd for (i = 0; i < 2; i++) { 2449168404Spjd error = zap_add(os, object, hc[i], sizeof (uint64_t), 2450168404Spjd 1, &value[i], tx); 2451168404Spjd ASSERT3U(error, ==, EEXIST); 2452168404Spjd error = zap_length(os, object, hc[i], 2453168404Spjd &zl_intsize, &zl_ints); 2454168404Spjd ASSERT3U(error, ==, 0); 2455168404Spjd ASSERT3U(zl_intsize, ==, sizeof (uint64_t)); 2456168404Spjd ASSERT3U(zl_ints, ==, 1); 2457168404Spjd } 2458168404Spjd for (i = 0; i < 2; i++) { 2459168404Spjd error = zap_remove(os, object, hc[i], tx); 2460168404Spjd ASSERT3U(error, ==, 0); 2461168404Spjd } 2462168404Spjd 2463168404Spjd dmu_tx_commit(tx); 2464168404Spjd } 2465168404Spjd 2466168404Spjd ints = MAX(ZTEST_ZAP_MIN_INTS, object % ZTEST_ZAP_MAX_INTS); 2467168404Spjd 2468185029Spjd prop = ztest_random(ZTEST_ZAP_MAX_PROPS); 2469185029Spjd (void) sprintf(propname, "prop_%llu", (u_longlong_t)prop); 2470185029Spjd (void) sprintf(txgname, "txg_%llu", (u_longlong_t)prop); 2471185029Spjd bzero(value, sizeof (value)); 2472185029Spjd last_txg = 0; 2473168404Spjd 2474185029Spjd /* 2475185029Spjd * If these zap entries already exist, validate their contents. 2476185029Spjd */ 2477185029Spjd error = zap_length(os, object, txgname, &zl_intsize, &zl_ints); 2478185029Spjd if (error == 0) { 2479185029Spjd ASSERT3U(zl_intsize, ==, sizeof (uint64_t)); 2480185029Spjd ASSERT3U(zl_ints, ==, 1); 2481168404Spjd 2482185029Spjd VERIFY(zap_lookup(os, object, txgname, zl_intsize, 2483185029Spjd zl_ints, &last_txg) == 0); 2484168404Spjd 2485185029Spjd VERIFY(zap_length(os, object, propname, &zl_intsize, 2486185029Spjd &zl_ints) == 0); 2487168404Spjd 2488185029Spjd ASSERT3U(zl_intsize, ==, sizeof (uint64_t)); 2489185029Spjd ASSERT3U(zl_ints, ==, ints); 2490168404Spjd 2491185029Spjd VERIFY(zap_lookup(os, object, propname, zl_intsize, 2492185029Spjd zl_ints, value) == 0); 2493168404Spjd 2494185029Spjd for (i = 0; i < ints; i++) { 2495185029Spjd ASSERT3U(value[i], ==, last_txg + object + i); 2496168404Spjd } 2497185029Spjd } else { 2498185029Spjd ASSERT3U(error, ==, ENOENT); 2499185029Spjd } 2500168404Spjd 2501185029Spjd /* 2502185029Spjd * Atomically update two entries in our zap object. 2503185029Spjd * The first is named txg_%llu, and contains the txg 2504185029Spjd * in which the property was last updated. The second 2505185029Spjd * is named prop_%llu, and the nth element of its value 2506185029Spjd * should be txg + object + n. 2507185029Spjd */ 2508185029Spjd tx = dmu_tx_create(os); 2509185029Spjd dmu_tx_hold_zap(tx, object, TRUE, NULL); 2510185029Spjd error = dmu_tx_assign(tx, TXG_WAIT); 2511185029Spjd if (error) { 2512185029Spjd ztest_record_enospc("create zap entry"); 2513185029Spjd dmu_tx_abort(tx); 2514185029Spjd return; 2515185029Spjd } 2516185029Spjd txg = dmu_tx_get_txg(tx); 2517168404Spjd 2518185029Spjd if (last_txg > txg) 2519185029Spjd fatal(0, "zap future leak: old %llu new %llu", last_txg, txg); 2520168404Spjd 2521185029Spjd for (i = 0; i < ints; i++) 2522185029Spjd value[i] = txg + object + i; 2523168404Spjd 2524185029Spjd error = zap_update(os, object, txgname, sizeof (uint64_t), 1, &txg, tx); 2525185029Spjd if (error) 2526185029Spjd fatal(0, "zap_update('%s', %llu, '%s') = %d", 2527185029Spjd osname, object, txgname, error); 2528168404Spjd 2529185029Spjd error = zap_update(os, object, propname, sizeof (uint64_t), 2530185029Spjd ints, value, tx); 2531185029Spjd if (error) 2532185029Spjd fatal(0, "zap_update('%s', %llu, '%s') = %d", 2533185029Spjd osname, object, propname, error); 2534168404Spjd 2535185029Spjd dmu_tx_commit(tx); 2536168404Spjd 2537185029Spjd /* 2538185029Spjd * Remove a random pair of entries. 2539185029Spjd */ 2540185029Spjd prop = ztest_random(ZTEST_ZAP_MAX_PROPS); 2541185029Spjd (void) sprintf(propname, "prop_%llu", (u_longlong_t)prop); 2542185029Spjd (void) sprintf(txgname, "txg_%llu", (u_longlong_t)prop); 2543168404Spjd 2544185029Spjd error = zap_length(os, object, txgname, &zl_intsize, &zl_ints); 2545168404Spjd 2546185029Spjd if (error == ENOENT) 2547185029Spjd return; 2548168404Spjd 2549185029Spjd ASSERT3U(error, ==, 0); 2550168404Spjd 2551185029Spjd tx = dmu_tx_create(os); 2552185029Spjd dmu_tx_hold_zap(tx, object, TRUE, NULL); 2553185029Spjd error = dmu_tx_assign(tx, TXG_WAIT); 2554185029Spjd if (error) { 2555185029Spjd ztest_record_enospc("remove zap entry"); 2556185029Spjd dmu_tx_abort(tx); 2557185029Spjd return; 2558185029Spjd } 2559185029Spjd error = zap_remove(os, object, txgname, tx); 2560185029Spjd if (error) 2561185029Spjd fatal(0, "zap_remove('%s', %llu, '%s') = %d", 2562185029Spjd osname, object, txgname, error); 2563168404Spjd 2564185029Spjd error = zap_remove(os, object, propname, tx); 2565185029Spjd if (error) 2566185029Spjd fatal(0, "zap_remove('%s', %llu, '%s') = %d", 2567185029Spjd osname, object, propname, error); 2568168404Spjd 2569185029Spjd dmu_tx_commit(tx); 2570168404Spjd 2571168404Spjd /* 2572168404Spjd * Once in a while, destroy the object. 2573168404Spjd */ 2574185029Spjd if (ztest_random(1000) != 0) 2575168404Spjd return; 2576168404Spjd 2577168404Spjd tx = dmu_tx_create(os); 2578168404Spjd dmu_tx_hold_write(tx, ZTEST_DIROBJ, za->za_diroff, sizeof (uint64_t)); 2579168404Spjd dmu_tx_hold_free(tx, object, 0, DMU_OBJECT_END); 2580168404Spjd error = dmu_tx_assign(tx, TXG_WAIT); 2581168404Spjd if (error) { 2582168404Spjd ztest_record_enospc("destroy zap object"); 2583168404Spjd dmu_tx_abort(tx); 2584168404Spjd return; 2585168404Spjd } 2586168404Spjd error = zap_destroy(os, object, tx); 2587168404Spjd if (error) 2588168404Spjd fatal(0, "zap_destroy('%s', %llu) = %d", 2589168404Spjd osname, object, error); 2590168404Spjd object = 0; 2591168404Spjd dmu_write(os, ZTEST_DIROBJ, za->za_diroff, sizeof (uint64_t), 2592168404Spjd &object, tx); 2593168404Spjd dmu_tx_commit(tx); 2594168404Spjd} 2595168404Spjd 2596168404Spjdvoid 2597168404Spjdztest_zap_parallel(ztest_args_t *za) 2598168404Spjd{ 2599168404Spjd objset_t *os = za->za_os; 2600168404Spjd uint64_t txg, object, count, wsize, wc, zl_wsize, zl_wc; 2601168404Spjd dmu_tx_t *tx; 2602168404Spjd int i, namelen, error; 2603168404Spjd char name[20], string_value[20]; 2604168404Spjd void *data; 2605168404Spjd 2606185029Spjd /* 2607185029Spjd * Generate a random name of the form 'xxx.....' where each 2608185029Spjd * x is a random printable character and the dots are dots. 2609185029Spjd * There are 94 such characters, and the name length goes from 2610185029Spjd * 6 to 20, so there are 94^3 * 15 = 12,458,760 possible names. 2611185029Spjd */ 2612185029Spjd namelen = ztest_random(sizeof (name) - 5) + 5 + 1; 2613168404Spjd 2614185029Spjd for (i = 0; i < 3; i++) 2615185029Spjd name[i] = '!' + ztest_random('~' - '!' + 1); 2616185029Spjd for (; i < namelen - 1; i++) 2617185029Spjd name[i] = '.'; 2618185029Spjd name[i] = '\0'; 2619168404Spjd 2620185029Spjd if (ztest_random(2) == 0) 2621185029Spjd object = ZTEST_MICROZAP_OBJ; 2622185029Spjd else 2623185029Spjd object = ZTEST_FATZAP_OBJ; 2624168404Spjd 2625185029Spjd if ((namelen & 1) || object == ZTEST_MICROZAP_OBJ) { 2626185029Spjd wsize = sizeof (txg); 2627185029Spjd wc = 1; 2628185029Spjd data = &txg; 2629185029Spjd } else { 2630185029Spjd wsize = 1; 2631185029Spjd wc = namelen; 2632185029Spjd data = string_value; 2633185029Spjd } 2634168404Spjd 2635185029Spjd count = -1ULL; 2636185029Spjd VERIFY(zap_count(os, object, &count) == 0); 2637185029Spjd ASSERT(count != -1ULL); 2638168404Spjd 2639185029Spjd /* 2640185029Spjd * Select an operation: length, lookup, add, update, remove. 2641185029Spjd */ 2642185029Spjd i = ztest_random(5); 2643168404Spjd 2644185029Spjd if (i >= 2) { 2645185029Spjd tx = dmu_tx_create(os); 2646185029Spjd dmu_tx_hold_zap(tx, object, TRUE, NULL); 2647185029Spjd error = dmu_tx_assign(tx, TXG_WAIT); 2648185029Spjd if (error) { 2649185029Spjd ztest_record_enospc("zap parallel"); 2650185029Spjd dmu_tx_abort(tx); 2651185029Spjd return; 2652168404Spjd } 2653185029Spjd txg = dmu_tx_get_txg(tx); 2654185029Spjd bcopy(name, string_value, namelen); 2655185029Spjd } else { 2656185029Spjd tx = NULL; 2657185029Spjd txg = 0; 2658185029Spjd bzero(string_value, namelen); 2659185029Spjd } 2660168404Spjd 2661185029Spjd switch (i) { 2662168404Spjd 2663185029Spjd case 0: 2664185029Spjd error = zap_length(os, object, name, &zl_wsize, &zl_wc); 2665185029Spjd if (error == 0) { 2666185029Spjd ASSERT3U(wsize, ==, zl_wsize); 2667185029Spjd ASSERT3U(wc, ==, zl_wc); 2668185029Spjd } else { 2669185029Spjd ASSERT3U(error, ==, ENOENT); 2670185029Spjd } 2671185029Spjd break; 2672168404Spjd 2673185029Spjd case 1: 2674185029Spjd error = zap_lookup(os, object, name, wsize, wc, data); 2675185029Spjd if (error == 0) { 2676185029Spjd if (data == string_value && 2677185029Spjd bcmp(name, data, namelen) != 0) 2678185029Spjd fatal(0, "name '%s' != val '%s' len %d", 2679185029Spjd name, data, namelen); 2680185029Spjd } else { 2681185029Spjd ASSERT3U(error, ==, ENOENT); 2682185029Spjd } 2683185029Spjd break; 2684168404Spjd 2685185029Spjd case 2: 2686185029Spjd error = zap_add(os, object, name, wsize, wc, data, tx); 2687185029Spjd ASSERT(error == 0 || error == EEXIST); 2688185029Spjd break; 2689168404Spjd 2690185029Spjd case 3: 2691185029Spjd VERIFY(zap_update(os, object, name, wsize, wc, data, tx) == 0); 2692185029Spjd break; 2693168404Spjd 2694185029Spjd case 4: 2695185029Spjd error = zap_remove(os, object, name, tx); 2696185029Spjd ASSERT(error == 0 || error == ENOENT); 2697185029Spjd break; 2698185029Spjd } 2699168404Spjd 2700185029Spjd if (tx != NULL) 2701185029Spjd dmu_tx_commit(tx); 2702168404Spjd} 2703168404Spjd 2704168404Spjdvoid 2705168404Spjdztest_dsl_prop_get_set(ztest_args_t *za) 2706168404Spjd{ 2707168404Spjd objset_t *os = za->za_os; 2708168404Spjd int i, inherit; 2709168404Spjd uint64_t value; 2710168404Spjd const char *prop, *valname; 2711168404Spjd char setpoint[MAXPATHLEN]; 2712168404Spjd char osname[MAXNAMELEN]; 2713168404Spjd int error; 2714168404Spjd 2715168404Spjd (void) rw_rdlock(&ztest_shared->zs_name_lock); 2716168404Spjd 2717168404Spjd dmu_objset_name(os, osname); 2718168404Spjd 2719168404Spjd for (i = 0; i < 2; i++) { 2720168404Spjd if (i == 0) { 2721168404Spjd prop = "checksum"; 2722168404Spjd value = ztest_random_checksum(); 2723168404Spjd inherit = (value == ZIO_CHECKSUM_INHERIT); 2724168404Spjd } else { 2725168404Spjd prop = "compression"; 2726168404Spjd value = ztest_random_compress(); 2727168404Spjd inherit = (value == ZIO_COMPRESS_INHERIT); 2728168404Spjd } 2729168404Spjd 2730168404Spjd error = dsl_prop_set(osname, prop, sizeof (value), 2731168404Spjd !inherit, &value); 2732168404Spjd 2733168404Spjd if (error == ENOSPC) { 2734168404Spjd ztest_record_enospc("dsl_prop_set"); 2735168404Spjd break; 2736168404Spjd } 2737168404Spjd 2738168404Spjd ASSERT3U(error, ==, 0); 2739168404Spjd 2740168404Spjd VERIFY3U(dsl_prop_get(osname, prop, sizeof (value), 2741168404Spjd 1, &value, setpoint), ==, 0); 2742168404Spjd 2743168404Spjd if (i == 0) 2744168404Spjd valname = zio_checksum_table[value].ci_name; 2745168404Spjd else 2746168404Spjd valname = zio_compress_table[value].ci_name; 2747168404Spjd 2748168404Spjd if (zopt_verbose >= 6) { 2749168404Spjd (void) printf("%s %s = %s for '%s'\n", 2750168404Spjd osname, prop, valname, setpoint); 2751168404Spjd } 2752168404Spjd } 2753168404Spjd 2754168404Spjd (void) rw_unlock(&ztest_shared->zs_name_lock); 2755168404Spjd} 2756168404Spjd 2757168404Spjd/* 2758168404Spjd * Inject random faults into the on-disk data. 2759168404Spjd */ 2760168404Spjdvoid 2761168404Spjdztest_fault_inject(ztest_args_t *za) 2762168404Spjd{ 2763168404Spjd int fd; 2764168404Spjd uint64_t offset; 2765168404Spjd uint64_t leaves = MAX(zopt_mirrors, 1) * zopt_raidz; 2766168404Spjd uint64_t bad = 0x1990c0ffeedecadeULL; 2767168404Spjd uint64_t top, leaf; 2768168404Spjd char path0[MAXPATHLEN]; 2769168404Spjd char pathrand[MAXPATHLEN]; 2770168404Spjd size_t fsize; 2771185029Spjd spa_t *spa = za->za_spa; 2772168404Spjd int bshift = SPA_MAXBLOCKSHIFT + 2; /* don't scrog all labels */ 2773168404Spjd int iters = 1000; 2774185029Spjd int maxfaults = zopt_maxfaults; 2775185029Spjd vdev_t *vd0 = NULL; 2776168404Spjd uint64_t guid0 = 0; 2777168404Spjd 2778185029Spjd ASSERT(leaves >= 1); 2779168404Spjd 2780168404Spjd /* 2781185029Spjd * We need SCL_STATE here because we're going to look at vd0->vdev_tsd. 2782168404Spjd */ 2783185029Spjd spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 2784168404Spjd 2785185029Spjd if (ztest_random(2) == 0) { 2786185029Spjd /* 2787185029Spjd * Inject errors on a normal data device. 2788185029Spjd */ 2789185029Spjd top = ztest_random(spa->spa_root_vdev->vdev_children); 2790185029Spjd leaf = ztest_random(leaves); 2791168404Spjd 2792185029Spjd /* 2793185029Spjd * Generate paths to the first leaf in this top-level vdev, 2794185029Spjd * and to the random leaf we selected. We'll induce transient 2795185029Spjd * write failures and random online/offline activity on leaf 0, 2796185029Spjd * and we'll write random garbage to the randomly chosen leaf. 2797185029Spjd */ 2798185029Spjd (void) snprintf(path0, sizeof (path0), ztest_dev_template, 2799185029Spjd zopt_dir, zopt_pool, top * leaves + 0); 2800185029Spjd (void) snprintf(pathrand, sizeof (pathrand), ztest_dev_template, 2801185029Spjd zopt_dir, zopt_pool, top * leaves + leaf); 2802168404Spjd 2803185029Spjd vd0 = vdev_lookup_by_path(spa->spa_root_vdev, path0); 2804185029Spjd if (vd0 != NULL && maxfaults != 1) { 2805185029Spjd /* 2806185029Spjd * Make vd0 explicitly claim to be unreadable, 2807185029Spjd * or unwriteable, or reach behind its back 2808185029Spjd * and close the underlying fd. We can do this if 2809185029Spjd * maxfaults == 0 because we'll fail and reexecute, 2810185029Spjd * and we can do it if maxfaults >= 2 because we'll 2811185029Spjd * have enough redundancy. If maxfaults == 1, the 2812185029Spjd * combination of this with injection of random data 2813185029Spjd * corruption below exceeds the pool's fault tolerance. 2814185029Spjd */ 2815185029Spjd vdev_file_t *vf = vd0->vdev_tsd; 2816168404Spjd 2817185029Spjd if (vf != NULL && ztest_random(3) == 0) { 2818185029Spjd (void) close(vf->vf_vnode->v_fd); 2819185029Spjd vf->vf_vnode->v_fd = -1; 2820185029Spjd } else if (ztest_random(2) == 0) { 2821185029Spjd vd0->vdev_cant_read = B_TRUE; 2822185029Spjd } else { 2823185029Spjd vd0->vdev_cant_write = B_TRUE; 2824185029Spjd } 2825185029Spjd guid0 = vd0->vdev_guid; 2826185029Spjd } 2827185029Spjd } else { 2828185029Spjd /* 2829185029Spjd * Inject errors on an l2cache device. 2830185029Spjd */ 2831185029Spjd spa_aux_vdev_t *sav = &spa->spa_l2cache; 2832168404Spjd 2833185029Spjd if (sav->sav_count == 0) { 2834185029Spjd spa_config_exit(spa, SCL_STATE, FTAG); 2835185029Spjd return; 2836185029Spjd } 2837185029Spjd vd0 = sav->sav_vdevs[ztest_random(sav->sav_count)]; 2838168404Spjd guid0 = vd0->vdev_guid; 2839185029Spjd (void) strcpy(path0, vd0->vdev_path); 2840185029Spjd (void) strcpy(pathrand, vd0->vdev_path); 2841185029Spjd 2842185029Spjd leaf = 0; 2843185029Spjd leaves = 1; 2844185029Spjd maxfaults = INT_MAX; /* no limit on cache devices */ 2845168404Spjd } 2846168404Spjd 2847185029Spjd dprintf("damaging %s and %s\n", path0, pathrand); 2848168404Spjd 2849185029Spjd spa_config_exit(spa, SCL_STATE, FTAG); 2850185029Spjd 2851185029Spjd if (maxfaults == 0) 2852185029Spjd return; 2853185029Spjd 2854168404Spjd /* 2855168404Spjd * If we can tolerate two or more faults, randomly online/offline vd0. 2856168404Spjd */ 2857185029Spjd if (maxfaults >= 2 && guid0 != 0) { 2858168404Spjd if (ztest_random(10) < 6) 2859168404Spjd (void) vdev_offline(spa, guid0, B_TRUE); 2860168404Spjd else 2861185029Spjd (void) vdev_online(spa, guid0, B_FALSE, NULL); 2862168404Spjd } 2863168404Spjd 2864168404Spjd /* 2865168404Spjd * We have at least single-fault tolerance, so inject data corruption. 2866168404Spjd */ 2867168404Spjd fd = open(pathrand, O_RDWR); 2868168404Spjd 2869168404Spjd if (fd == -1) /* we hit a gap in the device namespace */ 2870168404Spjd return; 2871168404Spjd 2872168404Spjd fsize = lseek(fd, 0, SEEK_END); 2873168404Spjd 2874168404Spjd while (--iters != 0) { 2875168404Spjd offset = ztest_random(fsize / (leaves << bshift)) * 2876168404Spjd (leaves << bshift) + (leaf << bshift) + 2877168404Spjd (ztest_random(1ULL << (bshift - 1)) & -8ULL); 2878168404Spjd 2879168404Spjd if (offset >= fsize) 2880168404Spjd continue; 2881168404Spjd 2882168404Spjd if (zopt_verbose >= 6) 2883168404Spjd (void) printf("injecting bad word into %s," 2884168404Spjd " offset 0x%llx\n", pathrand, (u_longlong_t)offset); 2885168404Spjd 2886168404Spjd if (pwrite(fd, &bad, sizeof (bad), offset) != sizeof (bad)) 2887168404Spjd fatal(1, "can't inject bad word at 0x%llx in %s", 2888168404Spjd offset, pathrand); 2889168404Spjd } 2890168404Spjd 2891168404Spjd (void) close(fd); 2892168404Spjd} 2893168404Spjd 2894168404Spjd/* 2895168404Spjd * Scrub the pool. 2896168404Spjd */ 2897168404Spjdvoid 2898168404Spjdztest_scrub(ztest_args_t *za) 2899168404Spjd{ 2900185029Spjd spa_t *spa = za->za_spa; 2901168404Spjd 2902185029Spjd (void) spa_scrub(spa, POOL_SCRUB_EVERYTHING); 2903168404Spjd (void) poll(NULL, 0, 1000); /* wait a second, then force a restart */ 2904185029Spjd (void) spa_scrub(spa, POOL_SCRUB_EVERYTHING); 2905168404Spjd} 2906168404Spjd 2907168404Spjd/* 2908168404Spjd * Rename the pool to a different name and then rename it back. 2909168404Spjd */ 2910168404Spjdvoid 2911168404Spjdztest_spa_rename(ztest_args_t *za) 2912168404Spjd{ 2913168404Spjd char *oldname, *newname; 2914168404Spjd int error; 2915168404Spjd spa_t *spa; 2916168404Spjd 2917168404Spjd (void) rw_wrlock(&ztest_shared->zs_name_lock); 2918168404Spjd 2919168404Spjd oldname = za->za_pool; 2920168404Spjd newname = umem_alloc(strlen(oldname) + 5, UMEM_NOFAIL); 2921168404Spjd (void) strcpy(newname, oldname); 2922168404Spjd (void) strcat(newname, "_tmp"); 2923168404Spjd 2924168404Spjd /* 2925168404Spjd * Do the rename 2926168404Spjd */ 2927168404Spjd error = spa_rename(oldname, newname); 2928168404Spjd if (error) 2929168404Spjd fatal(0, "spa_rename('%s', '%s') = %d", oldname, 2930168404Spjd newname, error); 2931168404Spjd 2932168404Spjd /* 2933168404Spjd * Try to open it under the old name, which shouldn't exist 2934168404Spjd */ 2935168404Spjd error = spa_open(oldname, &spa, FTAG); 2936168404Spjd if (error != ENOENT) 2937168404Spjd fatal(0, "spa_open('%s') = %d", oldname, error); 2938168404Spjd 2939168404Spjd /* 2940168404Spjd * Open it under the new name and make sure it's still the same spa_t. 2941168404Spjd */ 2942168404Spjd error = spa_open(newname, &spa, FTAG); 2943168404Spjd if (error != 0) 2944168404Spjd fatal(0, "spa_open('%s') = %d", newname, error); 2945168404Spjd 2946185029Spjd ASSERT(spa == za->za_spa); 2947168404Spjd spa_close(spa, FTAG); 2948168404Spjd 2949168404Spjd /* 2950168404Spjd * Rename it back to the original 2951168404Spjd */ 2952168404Spjd error = spa_rename(newname, oldname); 2953168404Spjd if (error) 2954168404Spjd fatal(0, "spa_rename('%s', '%s') = %d", newname, 2955168404Spjd oldname, error); 2956168404Spjd 2957168404Spjd /* 2958168404Spjd * Make sure it can still be opened 2959168404Spjd */ 2960168404Spjd error = spa_open(oldname, &spa, FTAG); 2961168404Spjd if (error != 0) 2962168404Spjd fatal(0, "spa_open('%s') = %d", oldname, error); 2963168404Spjd 2964185029Spjd ASSERT(spa == za->za_spa); 2965168404Spjd spa_close(spa, FTAG); 2966168404Spjd 2967168404Spjd umem_free(newname, strlen(newname) + 1); 2968168404Spjd 2969168404Spjd (void) rw_unlock(&ztest_shared->zs_name_lock); 2970168404Spjd} 2971168404Spjd 2972168404Spjd 2973168404Spjd/* 2974168404Spjd * Completely obliterate one disk. 2975168404Spjd */ 2976168404Spjdstatic void 2977168404Spjdztest_obliterate_one_disk(uint64_t vdev) 2978168404Spjd{ 2979168404Spjd int fd; 2980168404Spjd char dev_name[MAXPATHLEN], copy_name[MAXPATHLEN]; 2981168404Spjd size_t fsize; 2982168404Spjd 2983168404Spjd if (zopt_maxfaults < 2) 2984168404Spjd return; 2985168404Spjd 2986168404Spjd (void) sprintf(dev_name, ztest_dev_template, zopt_dir, zopt_pool, vdev); 2987168404Spjd (void) snprintf(copy_name, MAXPATHLEN, "%s.old", dev_name); 2988168404Spjd 2989168404Spjd fd = open(dev_name, O_RDWR); 2990168404Spjd 2991168404Spjd if (fd == -1) 2992168404Spjd fatal(1, "can't open %s", dev_name); 2993168404Spjd 2994168404Spjd /* 2995168404Spjd * Determine the size. 2996168404Spjd */ 2997168404Spjd fsize = lseek(fd, 0, SEEK_END); 2998168404Spjd 2999168404Spjd (void) close(fd); 3000168404Spjd 3001168404Spjd /* 3002168404Spjd * Rename the old device to dev_name.old (useful for debugging). 3003168404Spjd */ 3004168404Spjd VERIFY(rename(dev_name, copy_name) == 0); 3005168404Spjd 3006168404Spjd /* 3007168404Spjd * Create a new one. 3008168404Spjd */ 3009168404Spjd VERIFY((fd = open(dev_name, O_RDWR | O_CREAT | O_TRUNC, 0666)) >= 0); 3010168404Spjd VERIFY(ftruncate(fd, fsize) == 0); 3011168404Spjd (void) close(fd); 3012168404Spjd} 3013168404Spjd 3014168404Spjdstatic void 3015168404Spjdztest_replace_one_disk(spa_t *spa, uint64_t vdev) 3016168404Spjd{ 3017168404Spjd char dev_name[MAXPATHLEN]; 3018185029Spjd nvlist_t *root; 3019168404Spjd int error; 3020168404Spjd uint64_t guid; 3021168404Spjd vdev_t *vd; 3022168404Spjd 3023168404Spjd (void) sprintf(dev_name, ztest_dev_template, zopt_dir, zopt_pool, vdev); 3024168404Spjd 3025168404Spjd /* 3026168404Spjd * Build the nvlist describing dev_name. 3027168404Spjd */ 3028185029Spjd root = make_vdev_root(dev_name, NULL, 0, 0, 0, 0, 0, 1); 3029168404Spjd 3030185029Spjd spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); 3031168404Spjd if ((vd = vdev_lookup_by_path(spa->spa_root_vdev, dev_name)) == NULL) 3032168404Spjd guid = 0; 3033168404Spjd else 3034168404Spjd guid = vd->vdev_guid; 3035185029Spjd spa_config_exit(spa, SCL_VDEV, FTAG); 3036168404Spjd error = spa_vdev_attach(spa, guid, root, B_TRUE); 3037168404Spjd if (error != 0 && 3038168404Spjd error != EBUSY && 3039168404Spjd error != ENOTSUP && 3040168404Spjd error != ENODEV && 3041168404Spjd error != EDOM) 3042168404Spjd fatal(0, "spa_vdev_attach(in-place) = %d", error); 3043168404Spjd 3044168404Spjd nvlist_free(root); 3045168404Spjd} 3046168404Spjd 3047168404Spjdstatic void 3048168404Spjdztest_verify_blocks(char *pool) 3049168404Spjd{ 3050168404Spjd int status; 3051168404Spjd char zdb[MAXPATHLEN + MAXNAMELEN + 20]; 3052168404Spjd char zbuf[1024]; 3053168404Spjd char *bin; 3054185029Spjd char *ztest; 3055185029Spjd char *isa; 3056185029Spjd int isalen; 3057168404Spjd FILE *fp; 3058168404Spjd 3059168404Spjd if (realpath(progname, zdb) == NULL) 3060168404Spjd assert(!"realpath() failed"); 3061168404Spjd 3062168404Spjd /* zdb lives in /usr/sbin, while ztest lives in /usr/bin */ 3063168404Spjd bin = strstr(zdb, "/usr/bin/"); 3064185029Spjd ztest = strstr(bin, "/ztest"); 3065185029Spjd isa = bin + 8; 3066185029Spjd isalen = ztest - isa; 3067185029Spjd isa = strdup(isa); 3068168404Spjd /* LINTED */ 3069185029Spjd (void) sprintf(bin, 3070185029Spjd "/usr/sbin%.*s/zdb -bc%s%s -U /tmp/zpool.cache -O %s %s", 3071185029Spjd isalen, 3072185029Spjd isa, 3073168404Spjd zopt_verbose >= 3 ? "s" : "", 3074168404Spjd zopt_verbose >= 4 ? "v" : "", 3075168404Spjd ztest_random(2) == 0 ? "pre" : "post", pool); 3076185029Spjd free(isa); 3077168404Spjd 3078168404Spjd if (zopt_verbose >= 5) 3079168404Spjd (void) printf("Executing %s\n", strstr(zdb, "zdb ")); 3080168404Spjd 3081168404Spjd fp = popen(zdb, "r"); 3082168404Spjd assert(fp != NULL); 3083168404Spjd 3084168404Spjd while (fgets(zbuf, sizeof (zbuf), fp) != NULL) 3085168404Spjd if (zopt_verbose >= 3) 3086168404Spjd (void) printf("%s", zbuf); 3087168404Spjd 3088168404Spjd status = pclose(fp); 3089168404Spjd 3090168404Spjd if (status == 0) 3091168404Spjd return; 3092168404Spjd 3093168404Spjd ztest_dump_core = 0; 3094168404Spjd if (WIFEXITED(status)) 3095168404Spjd fatal(0, "'%s' exit code %d", zdb, WEXITSTATUS(status)); 3096168404Spjd else 3097168404Spjd fatal(0, "'%s' died with signal %d", zdb, WTERMSIG(status)); 3098168404Spjd} 3099168404Spjd 3100168404Spjdstatic void 3101168404Spjdztest_walk_pool_directory(char *header) 3102168404Spjd{ 3103168404Spjd spa_t *spa = NULL; 3104168404Spjd 3105168404Spjd if (zopt_verbose >= 6) 3106168404Spjd (void) printf("%s\n", header); 3107168404Spjd 3108168404Spjd mutex_enter(&spa_namespace_lock); 3109168404Spjd while ((spa = spa_next(spa)) != NULL) 3110168404Spjd if (zopt_verbose >= 6) 3111168404Spjd (void) printf("\t%s\n", spa_name(spa)); 3112168404Spjd mutex_exit(&spa_namespace_lock); 3113168404Spjd} 3114168404Spjd 3115168404Spjdstatic void 3116168404Spjdztest_spa_import_export(char *oldname, char *newname) 3117168404Spjd{ 3118168404Spjd nvlist_t *config; 3119168404Spjd uint64_t pool_guid; 3120168404Spjd spa_t *spa; 3121168404Spjd int error; 3122168404Spjd 3123168404Spjd if (zopt_verbose >= 4) { 3124168404Spjd (void) printf("import/export: old = %s, new = %s\n", 3125168404Spjd oldname, newname); 3126168404Spjd } 3127168404Spjd 3128168404Spjd /* 3129168404Spjd * Clean up from previous runs. 3130168404Spjd */ 3131168404Spjd (void) spa_destroy(newname); 3132168404Spjd 3133168404Spjd /* 3134168404Spjd * Get the pool's configuration and guid. 3135168404Spjd */ 3136168404Spjd error = spa_open(oldname, &spa, FTAG); 3137168404Spjd if (error) 3138168404Spjd fatal(0, "spa_open('%s') = %d", oldname, error); 3139168404Spjd 3140168404Spjd pool_guid = spa_guid(spa); 3141168404Spjd spa_close(spa, FTAG); 3142168404Spjd 3143168404Spjd ztest_walk_pool_directory("pools before export"); 3144168404Spjd 3145168404Spjd /* 3146168404Spjd * Export it. 3147168404Spjd */ 3148207670Smm error = spa_export(oldname, &config, B_FALSE, B_FALSE); 3149168404Spjd if (error) 3150168404Spjd fatal(0, "spa_export('%s') = %d", oldname, error); 3151168404Spjd 3152168404Spjd ztest_walk_pool_directory("pools after export"); 3153168404Spjd 3154168404Spjd /* 3155168404Spjd * Import it under the new name. 3156168404Spjd */ 3157168404Spjd error = spa_import(newname, config, NULL); 3158168404Spjd if (error) 3159168404Spjd fatal(0, "spa_import('%s') = %d", newname, error); 3160168404Spjd 3161168404Spjd ztest_walk_pool_directory("pools after import"); 3162168404Spjd 3163168404Spjd /* 3164168404Spjd * Try to import it again -- should fail with EEXIST. 3165168404Spjd */ 3166168404Spjd error = spa_import(newname, config, NULL); 3167168404Spjd if (error != EEXIST) 3168168404Spjd fatal(0, "spa_import('%s') twice", newname); 3169168404Spjd 3170168404Spjd /* 3171168404Spjd * Try to import it under a different name -- should fail with EEXIST. 3172168404Spjd */ 3173168404Spjd error = spa_import(oldname, config, NULL); 3174168404Spjd if (error != EEXIST) 3175168404Spjd fatal(0, "spa_import('%s') under multiple names", newname); 3176168404Spjd 3177168404Spjd /* 3178168404Spjd * Verify that the pool is no longer visible under the old name. 3179168404Spjd */ 3180168404Spjd error = spa_open(oldname, &spa, FTAG); 3181168404Spjd if (error != ENOENT) 3182168404Spjd fatal(0, "spa_open('%s') = %d", newname, error); 3183168404Spjd 3184168404Spjd /* 3185168404Spjd * Verify that we can open and close the pool using the new name. 3186168404Spjd */ 3187168404Spjd error = spa_open(newname, &spa, FTAG); 3188168404Spjd if (error) 3189168404Spjd fatal(0, "spa_open('%s') = %d", newname, error); 3190168404Spjd ASSERT(pool_guid == spa_guid(spa)); 3191168404Spjd spa_close(spa, FTAG); 3192168404Spjd 3193168404Spjd nvlist_free(config); 3194168404Spjd} 3195168404Spjd 3196168404Spjdstatic void * 3197185029Spjdztest_resume(void *arg) 3198185029Spjd{ 3199185029Spjd spa_t *spa = arg; 3200185029Spjd 3201185029Spjd while (!ztest_exiting) { 3202185029Spjd (void) poll(NULL, 0, 1000); 3203185029Spjd 3204185029Spjd if (!spa_suspended(spa)) 3205185029Spjd continue; 3206185029Spjd 3207185029Spjd spa_vdev_state_enter(spa); 3208185029Spjd vdev_clear(spa, NULL); 3209185029Spjd (void) spa_vdev_state_exit(spa, NULL, 0); 3210185029Spjd 3211185029Spjd zio_resume(spa); 3212185029Spjd } 3213185029Spjd return (NULL); 3214185029Spjd} 3215185029Spjd 3216185029Spjdstatic void * 3217168404Spjdztest_thread(void *arg) 3218168404Spjd{ 3219168404Spjd ztest_args_t *za = arg; 3220168404Spjd ztest_shared_t *zs = ztest_shared; 3221168404Spjd hrtime_t now, functime; 3222168404Spjd ztest_info_t *zi; 3223185029Spjd int f, i; 3224168404Spjd 3225168404Spjd while ((now = gethrtime()) < za->za_stop) { 3226168404Spjd /* 3227168404Spjd * See if it's time to force a crash. 3228168404Spjd */ 3229168404Spjd if (now > za->za_kill) { 3230185029Spjd zs->zs_alloc = spa_get_alloc(za->za_spa); 3231185029Spjd zs->zs_space = spa_get_space(za->za_spa); 3232168404Spjd (void) kill(getpid(), SIGKILL); 3233168404Spjd } 3234168404Spjd 3235168404Spjd /* 3236168404Spjd * Pick a random function. 3237168404Spjd */ 3238168404Spjd f = ztest_random(ZTEST_FUNCS); 3239168404Spjd zi = &zs->zs_info[f]; 3240168404Spjd 3241168404Spjd /* 3242168404Spjd * Decide whether to call it, based on the requested frequency. 3243168404Spjd */ 3244168404Spjd if (zi->zi_call_target == 0 || 3245168404Spjd (double)zi->zi_call_total / zi->zi_call_target > 3246168404Spjd (double)(now - zs->zs_start_time) / (zopt_time * NANOSEC)) 3247168404Spjd continue; 3248168404Spjd 3249168404Spjd atomic_add_64(&zi->zi_calls, 1); 3250168404Spjd atomic_add_64(&zi->zi_call_total, 1); 3251168404Spjd 3252168404Spjd za->za_diroff = (za->za_instance * ZTEST_FUNCS + f) * 3253168404Spjd ZTEST_DIRSIZE; 3254168404Spjd za->za_diroff_shared = (1ULL << 63); 3255168404Spjd 3256185029Spjd for (i = 0; i < zi->zi_iters; i++) 3257185029Spjd zi->zi_func(za); 3258168404Spjd 3259168404Spjd functime = gethrtime() - now; 3260168404Spjd 3261168404Spjd atomic_add_64(&zi->zi_call_time, functime); 3262168404Spjd 3263168404Spjd if (zopt_verbose >= 4) { 3264168404Spjd Dl_info dli; 3265168404Spjd (void) dladdr((void *)zi->zi_func, &dli); 3266168404Spjd (void) printf("%6.2f sec in %s\n", 3267168404Spjd (double)functime / NANOSEC, dli.dli_sname); 3268168404Spjd } 3269168404Spjd 3270168404Spjd /* 3271168404Spjd * If we're getting ENOSPC with some regularity, stop. 3272168404Spjd */ 3273168404Spjd if (zs->zs_enospc_count > 10) 3274168404Spjd break; 3275168404Spjd } 3276168404Spjd 3277168404Spjd return (NULL); 3278168404Spjd} 3279168404Spjd 3280168404Spjd/* 3281168404Spjd * Kick off threads to run tests on all datasets in parallel. 3282168404Spjd */ 3283168404Spjdstatic void 3284168404Spjdztest_run(char *pool) 3285168404Spjd{ 3286168404Spjd int t, d, error; 3287168404Spjd ztest_shared_t *zs = ztest_shared; 3288168404Spjd ztest_args_t *za; 3289168404Spjd spa_t *spa; 3290168404Spjd char name[100]; 3291185029Spjd thread_t resume_tid; 3292168404Spjd 3293185029Spjd ztest_exiting = B_FALSE; 3294185029Spjd 3295168404Spjd (void) _mutex_init(&zs->zs_vdev_lock, USYNC_THREAD, NULL); 3296168404Spjd (void) rwlock_init(&zs->zs_name_lock, USYNC_THREAD, NULL); 3297168404Spjd 3298168404Spjd for (t = 0; t < ZTEST_SYNC_LOCKS; t++) 3299168404Spjd (void) _mutex_init(&zs->zs_sync_lock[t], USYNC_THREAD, NULL); 3300168404Spjd 3301168404Spjd /* 3302168404Spjd * Destroy one disk before we even start. 3303168404Spjd * It's mirrored, so everything should work just fine. 3304168404Spjd * This makes us exercise fault handling very early in spa_load(). 3305168404Spjd */ 3306168404Spjd ztest_obliterate_one_disk(0); 3307168404Spjd 3308168404Spjd /* 3309168404Spjd * Verify that the sum of the sizes of all blocks in the pool 3310168404Spjd * equals the SPA's allocated space total. 3311168404Spjd */ 3312168404Spjd ztest_verify_blocks(pool); 3313168404Spjd 3314168404Spjd /* 3315168404Spjd * Kick off a replacement of the disk we just obliterated. 3316168404Spjd */ 3317168404Spjd kernel_init(FREAD | FWRITE); 3318185029Spjd VERIFY(spa_open(pool, &spa, FTAG) == 0); 3319168404Spjd ztest_replace_one_disk(spa, 0); 3320168404Spjd if (zopt_verbose >= 5) 3321168404Spjd show_pool_stats(spa); 3322168404Spjd spa_close(spa, FTAG); 3323168404Spjd kernel_fini(); 3324168404Spjd 3325168404Spjd kernel_init(FREAD | FWRITE); 3326168404Spjd 3327168404Spjd /* 3328168404Spjd * Verify that we can export the pool and reimport it under a 3329168404Spjd * different name. 3330168404Spjd */ 3331168404Spjd if (ztest_random(2) == 0) { 3332168404Spjd (void) snprintf(name, 100, "%s_import", pool); 3333168404Spjd ztest_spa_import_export(pool, name); 3334168404Spjd ztest_spa_import_export(name, pool); 3335168404Spjd } 3336168404Spjd 3337168404Spjd /* 3338168404Spjd * Verify that we can loop over all pools. 3339168404Spjd */ 3340168404Spjd mutex_enter(&spa_namespace_lock); 3341168404Spjd for (spa = spa_next(NULL); spa != NULL; spa = spa_next(spa)) { 3342168404Spjd if (zopt_verbose > 3) { 3343168404Spjd (void) printf("spa_next: found %s\n", spa_name(spa)); 3344168404Spjd } 3345168404Spjd } 3346168404Spjd mutex_exit(&spa_namespace_lock); 3347168404Spjd 3348168404Spjd /* 3349168404Spjd * Open our pool. 3350168404Spjd */ 3351185029Spjd VERIFY(spa_open(pool, &spa, FTAG) == 0); 3352168404Spjd 3353168404Spjd /* 3354185029Spjd * Create a thread to periodically resume suspended I/O. 3355185029Spjd */ 3356185029Spjd VERIFY(thr_create(0, 0, ztest_resume, spa, THR_BOUND, 3357185029Spjd &resume_tid) == 0); 3358185029Spjd 3359185029Spjd /* 3360168404Spjd * Verify that we can safely inquire about about any object, 3361168404Spjd * whether it's allocated or not. To make it interesting, 3362168404Spjd * we probe a 5-wide window around each power of two. 3363168404Spjd * This hits all edge cases, including zero and the max. 3364168404Spjd */ 3365168404Spjd for (t = 0; t < 64; t++) { 3366168404Spjd for (d = -5; d <= 5; d++) { 3367168404Spjd error = dmu_object_info(spa->spa_meta_objset, 3368168404Spjd (1ULL << t) + d, NULL); 3369168404Spjd ASSERT(error == 0 || error == ENOENT || 3370168404Spjd error == EINVAL); 3371168404Spjd } 3372168404Spjd } 3373168404Spjd 3374168404Spjd /* 3375168404Spjd * Now kick off all the tests that run in parallel. 3376168404Spjd */ 3377168404Spjd zs->zs_enospc_count = 0; 3378168404Spjd 3379168404Spjd za = umem_zalloc(zopt_threads * sizeof (ztest_args_t), UMEM_NOFAIL); 3380168404Spjd 3381168404Spjd if (zopt_verbose >= 4) 3382168404Spjd (void) printf("starting main threads...\n"); 3383168404Spjd 3384168404Spjd za[0].za_start = gethrtime(); 3385168404Spjd za[0].za_stop = za[0].za_start + zopt_passtime * NANOSEC; 3386168404Spjd za[0].za_stop = MIN(za[0].za_stop, zs->zs_stop_time); 3387168404Spjd za[0].za_kill = za[0].za_stop; 3388168404Spjd if (ztest_random(100) < zopt_killrate) 3389168404Spjd za[0].za_kill -= ztest_random(zopt_passtime * NANOSEC); 3390168404Spjd 3391168404Spjd for (t = 0; t < zopt_threads; t++) { 3392168404Spjd d = t % zopt_datasets; 3393185029Spjd 3394185029Spjd (void) strcpy(za[t].za_pool, pool); 3395185029Spjd za[t].za_os = za[d].za_os; 3396185029Spjd za[t].za_spa = spa; 3397185029Spjd za[t].za_zilog = za[d].za_zilog; 3398185029Spjd za[t].za_instance = t; 3399185029Spjd za[t].za_random = ztest_random(-1ULL); 3400185029Spjd za[t].za_start = za[0].za_start; 3401185029Spjd za[t].za_stop = za[0].za_stop; 3402185029Spjd za[t].za_kill = za[0].za_kill; 3403185029Spjd 3404168404Spjd if (t < zopt_datasets) { 3405168404Spjd ztest_replay_t zr; 3406168404Spjd int test_future = FALSE; 3407168404Spjd (void) rw_rdlock(&ztest_shared->zs_name_lock); 3408168404Spjd (void) snprintf(name, 100, "%s/%s_%d", pool, pool, d); 3409185029Spjd error = dmu_objset_create(name, DMU_OST_OTHER, NULL, 0, 3410168404Spjd ztest_create_cb, NULL); 3411168404Spjd if (error == EEXIST) { 3412168404Spjd test_future = TRUE; 3413185029Spjd } else if (error == ENOSPC) { 3414185029Spjd zs->zs_enospc_count++; 3415185029Spjd (void) rw_unlock(&ztest_shared->zs_name_lock); 3416185029Spjd break; 3417168404Spjd } else if (error != 0) { 3418168404Spjd fatal(0, "dmu_objset_create(%s) = %d", 3419168404Spjd name, error); 3420168404Spjd } 3421168404Spjd error = dmu_objset_open(name, DMU_OST_OTHER, 3422185029Spjd DS_MODE_USER, &za[d].za_os); 3423168404Spjd if (error) 3424168404Spjd fatal(0, "dmu_objset_open('%s') = %d", 3425168404Spjd name, error); 3426168404Spjd (void) rw_unlock(&ztest_shared->zs_name_lock); 3427185029Spjd if (test_future) 3428185029Spjd ztest_dmu_check_future_leak(&za[t]); 3429168404Spjd zr.zr_os = za[d].za_os; 3430168404Spjd zil_replay(zr.zr_os, &zr, &zr.zr_assign, 3431185029Spjd ztest_replay_vector, NULL); 3432168404Spjd za[d].za_zilog = zil_open(za[d].za_os, NULL); 3433168404Spjd } 3434168404Spjd 3435185029Spjd VERIFY(thr_create(0, 0, ztest_thread, &za[t], THR_BOUND, 3436185029Spjd &za[t].za_thread) == 0); 3437168404Spjd } 3438168404Spjd 3439168404Spjd while (--t >= 0) { 3440185029Spjd VERIFY(thr_join(za[t].za_thread, NULL, NULL) == 0); 3441168404Spjd if (za[t].za_th) 3442168404Spjd traverse_fini(za[t].za_th); 3443168404Spjd if (t < zopt_datasets) { 3444168404Spjd zil_close(za[t].za_zilog); 3445168404Spjd dmu_objset_close(za[t].za_os); 3446168404Spjd } 3447168404Spjd } 3448168404Spjd 3449168404Spjd if (zopt_verbose >= 3) 3450168404Spjd show_pool_stats(spa); 3451168404Spjd 3452168404Spjd txg_wait_synced(spa_get_dsl(spa), 0); 3453168404Spjd 3454168404Spjd zs->zs_alloc = spa_get_alloc(spa); 3455168404Spjd zs->zs_space = spa_get_space(spa); 3456168404Spjd 3457168404Spjd /* 3458185029Spjd * If we had out-of-space errors, destroy a random objset. 3459168404Spjd */ 3460168404Spjd if (zs->zs_enospc_count != 0) { 3461168404Spjd (void) rw_rdlock(&ztest_shared->zs_name_lock); 3462185029Spjd d = (int)ztest_random(zopt_datasets); 3463185029Spjd (void) snprintf(name, 100, "%s/%s_%d", pool, pool, d); 3464168404Spjd if (zopt_verbose >= 3) 3465168404Spjd (void) printf("Destroying %s to free up space\n", name); 3466185029Spjd (void) dmu_objset_find(name, ztest_destroy_cb, &za[d], 3467168404Spjd DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN); 3468168404Spjd (void) rw_unlock(&ztest_shared->zs_name_lock); 3469168404Spjd } 3470168404Spjd 3471168404Spjd txg_wait_synced(spa_get_dsl(spa), 0); 3472168404Spjd 3473185029Spjd umem_free(za, zopt_threads * sizeof (ztest_args_t)); 3474185029Spjd 3475185029Spjd /* Kill the resume thread */ 3476185029Spjd ztest_exiting = B_TRUE; 3477185029Spjd VERIFY(thr_join(resume_tid, NULL, NULL) == 0); 3478185029Spjd 3479168404Spjd /* 3480168404Spjd * Right before closing the pool, kick off a bunch of async I/O; 3481168404Spjd * spa_close() should wait for it to complete. 3482168404Spjd */ 3483168404Spjd for (t = 1; t < 50; t++) 3484168404Spjd dmu_prefetch(spa->spa_meta_objset, t, 0, 1 << 15); 3485168404Spjd 3486168404Spjd spa_close(spa, FTAG); 3487168404Spjd 3488168404Spjd kernel_fini(); 3489168404Spjd} 3490168404Spjd 3491168404Spjdvoid 3492168404Spjdprint_time(hrtime_t t, char *timebuf) 3493168404Spjd{ 3494168404Spjd hrtime_t s = t / NANOSEC; 3495168404Spjd hrtime_t m = s / 60; 3496168404Spjd hrtime_t h = m / 60; 3497168404Spjd hrtime_t d = h / 24; 3498168404Spjd 3499168404Spjd s -= m * 60; 3500168404Spjd m -= h * 60; 3501168404Spjd h -= d * 24; 3502168404Spjd 3503168404Spjd timebuf[0] = '\0'; 3504168404Spjd 3505168404Spjd if (d) 3506168404Spjd (void) sprintf(timebuf, 3507168404Spjd "%llud%02lluh%02llum%02llus", d, h, m, s); 3508168404Spjd else if (h) 3509168404Spjd (void) sprintf(timebuf, "%lluh%02llum%02llus", h, m, s); 3510168404Spjd else if (m) 3511168404Spjd (void) sprintf(timebuf, "%llum%02llus", m, s); 3512168404Spjd else 3513168404Spjd (void) sprintf(timebuf, "%llus", s); 3514168404Spjd} 3515168404Spjd 3516168404Spjd/* 3517168404Spjd * Create a storage pool with the given name and initial vdev size. 3518168404Spjd * Then create the specified number of datasets in the pool. 3519168404Spjd */ 3520168404Spjdstatic void 3521168404Spjdztest_init(char *pool) 3522168404Spjd{ 3523168404Spjd spa_t *spa; 3524168404Spjd int error; 3525168404Spjd nvlist_t *nvroot; 3526168404Spjd 3527168404Spjd kernel_init(FREAD | FWRITE); 3528168404Spjd 3529168404Spjd /* 3530168404Spjd * Create the storage pool. 3531168404Spjd */ 3532168404Spjd (void) spa_destroy(pool); 3533168404Spjd ztest_shared->zs_vdev_primaries = 0; 3534185029Spjd nvroot = make_vdev_root(NULL, NULL, zopt_vdev_size, 0, 3535185029Spjd 0, zopt_raidz, zopt_mirrors, 1); 3536185029Spjd error = spa_create(pool, nvroot, NULL, NULL, NULL); 3537168404Spjd nvlist_free(nvroot); 3538168404Spjd 3539168404Spjd if (error) 3540168404Spjd fatal(0, "spa_create() = %d", error); 3541168404Spjd error = spa_open(pool, &spa, FTAG); 3542168404Spjd if (error) 3543168404Spjd fatal(0, "spa_open() = %d", error); 3544168404Spjd 3545168404Spjd if (zopt_verbose >= 3) 3546168404Spjd show_pool_stats(spa); 3547168404Spjd 3548168404Spjd spa_close(spa, FTAG); 3549168404Spjd 3550168404Spjd kernel_fini(); 3551168404Spjd} 3552168404Spjd 3553168404Spjdint 3554168404Spjdmain(int argc, char **argv) 3555168404Spjd{ 3556168404Spjd int kills = 0; 3557168404Spjd int iters = 0; 3558168404Spjd int i, f; 3559168404Spjd ztest_shared_t *zs; 3560168404Spjd ztest_info_t *zi; 3561168404Spjd char timebuf[100]; 3562168404Spjd char numbuf[6]; 3563168404Spjd 3564168404Spjd (void) setvbuf(stdout, NULL, _IOLBF, 0); 3565168404Spjd 3566168404Spjd /* Override location of zpool.cache */ 3567185029Spjd spa_config_path = "/tmp/zpool.cache"; 3568168404Spjd 3569168404Spjd ztest_random_fd = open("/dev/urandom", O_RDONLY); 3570168404Spjd 3571168404Spjd process_options(argc, argv); 3572168404Spjd 3573168404Spjd argc -= optind; 3574168404Spjd argv += optind; 3575168404Spjd 3576168404Spjd dprintf_setup(&argc, argv); 3577168404Spjd 3578168404Spjd /* 3579168404Spjd * Blow away any existing copy of zpool.cache 3580168404Spjd */ 3581168404Spjd if (zopt_init != 0) 3582168404Spjd (void) remove("/tmp/zpool.cache"); 3583168404Spjd 3584168404Spjd zs = ztest_shared = (void *)mmap(0, 3585168404Spjd P2ROUNDUP(sizeof (ztest_shared_t), getpagesize()), 3586168404Spjd PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0); 3587168404Spjd 3588168404Spjd if (zopt_verbose >= 1) { 3589168404Spjd (void) printf("%llu vdevs, %d datasets, %d threads," 3590168404Spjd " %llu seconds...\n", 3591168404Spjd (u_longlong_t)zopt_vdevs, zopt_datasets, zopt_threads, 3592168404Spjd (u_longlong_t)zopt_time); 3593168404Spjd } 3594168404Spjd 3595168404Spjd /* 3596168404Spjd * Create and initialize our storage pool. 3597168404Spjd */ 3598168404Spjd for (i = 1; i <= zopt_init; i++) { 3599168404Spjd bzero(zs, sizeof (ztest_shared_t)); 3600168404Spjd if (zopt_verbose >= 3 && zopt_init != 1) 3601168404Spjd (void) printf("ztest_init(), pass %d\n", i); 3602168404Spjd ztest_init(zopt_pool); 3603168404Spjd } 3604168404Spjd 3605168404Spjd /* 3606168404Spjd * Initialize the call targets for each function. 3607168404Spjd */ 3608168404Spjd for (f = 0; f < ZTEST_FUNCS; f++) { 3609168404Spjd zi = &zs->zs_info[f]; 3610168404Spjd 3611168404Spjd *zi = ztest_info[f]; 3612168404Spjd 3613168404Spjd if (*zi->zi_interval == 0) 3614168404Spjd zi->zi_call_target = UINT64_MAX; 3615168404Spjd else 3616168404Spjd zi->zi_call_target = zopt_time / *zi->zi_interval; 3617168404Spjd } 3618168404Spjd 3619168404Spjd zs->zs_start_time = gethrtime(); 3620168404Spjd zs->zs_stop_time = zs->zs_start_time + zopt_time * NANOSEC; 3621168404Spjd 3622168404Spjd /* 3623168404Spjd * Run the tests in a loop. These tests include fault injection 3624168404Spjd * to verify that self-healing data works, and forced crashes 3625168404Spjd * to verify that we never lose on-disk consistency. 3626168404Spjd */ 3627168404Spjd while (gethrtime() < zs->zs_stop_time) { 3628168404Spjd int status; 3629168404Spjd pid_t pid; 3630168404Spjd char *tmp; 3631168404Spjd 3632168404Spjd /* 3633168404Spjd * Initialize the workload counters for each function. 3634168404Spjd */ 3635168404Spjd for (f = 0; f < ZTEST_FUNCS; f++) { 3636168404Spjd zi = &zs->zs_info[f]; 3637168404Spjd zi->zi_calls = 0; 3638168404Spjd zi->zi_call_time = 0; 3639168404Spjd } 3640168404Spjd 3641168404Spjd pid = fork(); 3642168404Spjd 3643168404Spjd if (pid == -1) 3644168404Spjd fatal(1, "fork failed"); 3645168404Spjd 3646168404Spjd if (pid == 0) { /* child */ 3647168404Spjd struct rlimit rl = { 1024, 1024 }; 3648168404Spjd (void) setrlimit(RLIMIT_NOFILE, &rl); 3649168404Spjd (void) enable_extended_FILE_stdio(-1, -1); 3650168404Spjd ztest_run(zopt_pool); 3651168404Spjd exit(0); 3652168404Spjd } 3653168404Spjd 3654168404Spjd while (waitpid(pid, &status, 0) != pid) 3655168404Spjd continue; 3656168404Spjd 3657168404Spjd if (WIFEXITED(status)) { 3658168404Spjd if (WEXITSTATUS(status) != 0) { 3659168404Spjd (void) fprintf(stderr, 3660168404Spjd "child exited with code %d\n", 3661168404Spjd WEXITSTATUS(status)); 3662168404Spjd exit(2); 3663168404Spjd } 3664168404Spjd } else if (WIFSIGNALED(status)) { 3665168404Spjd if (WTERMSIG(status) != SIGKILL) { 3666168404Spjd (void) fprintf(stderr, 3667168404Spjd "child died with signal %d\n", 3668168404Spjd WTERMSIG(status)); 3669168404Spjd exit(3); 3670168404Spjd } 3671168404Spjd kills++; 3672168404Spjd } else { 3673168404Spjd (void) fprintf(stderr, "something strange happened " 3674168404Spjd "to child\n"); 3675168404Spjd exit(4); 3676168404Spjd } 3677168404Spjd 3678168404Spjd iters++; 3679168404Spjd 3680168404Spjd if (zopt_verbose >= 1) { 3681168404Spjd hrtime_t now = gethrtime(); 3682168404Spjd 3683168404Spjd now = MIN(now, zs->zs_stop_time); 3684168404Spjd print_time(zs->zs_stop_time - now, timebuf); 3685168404Spjd nicenum(zs->zs_space, numbuf); 3686168404Spjd 3687168404Spjd (void) printf("Pass %3d, %8s, %3llu ENOSPC, " 3688168404Spjd "%4.1f%% of %5s used, %3.0f%% done, %8s to go\n", 3689168404Spjd iters, 3690168404Spjd WIFEXITED(status) ? "Complete" : "SIGKILL", 3691168404Spjd (u_longlong_t)zs->zs_enospc_count, 3692168404Spjd 100.0 * zs->zs_alloc / zs->zs_space, 3693168404Spjd numbuf, 3694168404Spjd 100.0 * (now - zs->zs_start_time) / 3695168404Spjd (zopt_time * NANOSEC), timebuf); 3696168404Spjd } 3697168404Spjd 3698168404Spjd if (zopt_verbose >= 2) { 3699168404Spjd (void) printf("\nWorkload summary:\n\n"); 3700168404Spjd (void) printf("%7s %9s %s\n", 3701168404Spjd "Calls", "Time", "Function"); 3702168404Spjd (void) printf("%7s %9s %s\n", 3703168404Spjd "-----", "----", "--------"); 3704168404Spjd for (f = 0; f < ZTEST_FUNCS; f++) { 3705168404Spjd Dl_info dli; 3706168404Spjd 3707168404Spjd zi = &zs->zs_info[f]; 3708168404Spjd print_time(zi->zi_call_time, timebuf); 3709168404Spjd (void) dladdr((void *)zi->zi_func, &dli); 3710168404Spjd (void) printf("%7llu %9s %s\n", 3711168404Spjd (u_longlong_t)zi->zi_calls, timebuf, 3712168404Spjd dli.dli_sname); 3713168404Spjd } 3714168404Spjd (void) printf("\n"); 3715168404Spjd } 3716168404Spjd 3717168404Spjd /* 3718168404Spjd * It's possible that we killed a child during a rename test, in 3719168404Spjd * which case we'll have a 'ztest_tmp' pool lying around instead 3720168404Spjd * of 'ztest'. Do a blind rename in case this happened. 3721168404Spjd */ 3722168404Spjd tmp = umem_alloc(strlen(zopt_pool) + 5, UMEM_NOFAIL); 3723168404Spjd (void) strcpy(tmp, zopt_pool); 3724168404Spjd (void) strcat(tmp, "_tmp"); 3725168404Spjd kernel_init(FREAD | FWRITE); 3726168404Spjd (void) spa_rename(tmp, zopt_pool); 3727168404Spjd kernel_fini(); 3728168404Spjd umem_free(tmp, strlen(tmp) + 1); 3729168404Spjd } 3730168404Spjd 3731168404Spjd ztest_verify_blocks(zopt_pool); 3732168404Spjd 3733168404Spjd if (zopt_verbose >= 1) { 3734168404Spjd (void) printf("%d killed, %d completed, %.0f%% kill rate\n", 3735168404Spjd kills, iters - kills, (100.0 * kills) / MAX(1, iters)); 3736168404Spjd } 3737168404Spjd 3738168404Spjd return (0); 3739168404Spjd} 3740