1168404Spjd/* 2168404Spjd * CDDL HEADER START 3168404Spjd * 4168404Spjd * The contents of this file are subject to the terms of the 5168404Spjd * Common Development and Distribution License (the "License"). 6168404Spjd * You may not use this file except in compliance with the License. 7168404Spjd * 8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9168404Spjd * or http://www.opensolaris.org/os/licensing. 10168404Spjd * See the License for the specific language governing permissions 11168404Spjd * and limitations under the License. 12168404Spjd * 13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each 14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15168404Spjd * If applicable, add the following below this CDDL HEADER, with the 16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18168404Spjd * 19168404Spjd * CDDL HEADER END 20168404Spjd */ 21168404Spjd/* 22219089Spjd * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23339108Smav * Copyright (c) 2011, 2017 by Delphix. All rights reserved. 24228103Smm * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 25236143Smm * Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org>. All rights reserved. 26251646Sdelphij * Copyright (c) 2013 Steven Hartland. All rights reserved. 27296519Smav * Copyright (c) 2014 Integros [integros.com] 28325914Savg * Copyright 2017 Joyent, Inc. 29332545Smav * Copyright 2017 RackTop Systems. 30168404Spjd */ 31168404Spjd 32168404Spjd/* 33168404Spjd * The objective of this program is to provide a DMU/ZAP/SPA stress test 34168404Spjd * that runs entirely in userland, is easy to use, and easy to extend. 35168404Spjd * 36168404Spjd * The overall design of the ztest program is as follows: 37168404Spjd * 38168404Spjd * (1) For each major functional area (e.g. adding vdevs to a pool, 39168404Spjd * creating and destroying datasets, reading and writing objects, etc) 40168404Spjd * we have a simple routine to test that functionality. These 41168404Spjd * individual routines do not have to do anything "stressful". 42168404Spjd * 43168404Spjd * (2) We turn these simple functionality tests into a stress test by 44168404Spjd * running them all in parallel, with as many threads as desired, 45168404Spjd * and spread across as many datasets, objects, and vdevs as desired. 46168404Spjd * 47168404Spjd * (3) While all this is happening, we inject faults into the pool to 48168404Spjd * verify that self-healing data really works. 49168404Spjd * 50168404Spjd * (4) Every time we open a dataset, we change its checksum and compression 51168404Spjd * functions. Thus even individual objects vary from block to block 52168404Spjd * in which checksum they use and whether they're compressed. 53168404Spjd * 54168404Spjd * (5) To verify that we never lose on-disk consistency after a crash, 55168404Spjd * we run the entire test in a child of the main process. 56168404Spjd * At random times, the child self-immolates with a SIGKILL. 57168404Spjd * This is the software equivalent of pulling the power cord. 58168404Spjd * The parent then runs the test again, using the existing 59268075Sdelphij * storage pool, as many times as desired. If backwards compatibility 60236143Smm * testing is enabled ztest will sometimes run the "older" version 61236143Smm * of ztest after a SIGKILL. 62168404Spjd * 63168404Spjd * (6) To verify that we don't have future leaks or temporal incursions, 64168404Spjd * many of the functional tests record the transaction group number 65168404Spjd * as part of their data. When reading old data, they verify that 66168404Spjd * the transaction group number is less than the current, open txg. 67168404Spjd * If you add a new test, please do this if applicable. 68168404Spjd * 69168404Spjd * When run with no arguments, ztest runs for about five minutes and 70168404Spjd * produces no output if successful. To get a little bit of information, 71168404Spjd * specify -V. To get more information, specify -VV, and so on. 72168404Spjd * 73168404Spjd * To turn this into an overnight stress test, use -T to specify run time. 74168404Spjd * 75168404Spjd * You can ask more more vdevs [-v], datasets [-d], or threads [-t] 76168404Spjd * to increase the pool capacity, fanout, and overall stress level. 77168404Spjd * 78236143Smm * Use the -k option to set the desired frequency of kills. 79236143Smm * 80236143Smm * When ztest invokes itself it passes all relevant information through a 81236143Smm * temporary file which is mmap-ed in the child process. This allows shared 82236143Smm * memory to survive the exec syscall. The ztest_shared_hdr_t struct is always 83236143Smm * stored at offset 0 of this file and contains information on the size and 84236143Smm * number of shared structures in the file. The information stored in this file 85236143Smm * must remain backwards compatible with older versions of ztest so that 86236143Smm * ztest can invoke them during backwards compatibility testing (-B). 87168404Spjd */ 88168404Spjd 89168404Spjd#include <sys/zfs_context.h> 90168404Spjd#include <sys/spa.h> 91168404Spjd#include <sys/dmu.h> 92168404Spjd#include <sys/txg.h> 93209962Smm#include <sys/dbuf.h> 94168404Spjd#include <sys/zap.h> 95168404Spjd#include <sys/dmu_objset.h> 96168404Spjd#include <sys/poll.h> 97168404Spjd#include <sys/stat.h> 98168404Spjd#include <sys/time.h> 99168404Spjd#include <sys/wait.h> 100168404Spjd#include <sys/mman.h> 101168404Spjd#include <sys/resource.h> 102168404Spjd#include <sys/zio.h> 103168404Spjd#include <sys/zil.h> 104219089Spjd#include <sys/zil_impl.h> 105168404Spjd#include <sys/vdev_impl.h> 106185029Spjd#include <sys/vdev_file.h> 107339111Smav#include <sys/vdev_initialize.h> 108168404Spjd#include <sys/spa_impl.h> 109219089Spjd#include <sys/metaslab_impl.h> 110168404Spjd#include <sys/dsl_prop.h> 111207910Smm#include <sys/dsl_dataset.h> 112248571Smm#include <sys/dsl_destroy.h> 113219089Spjd#include <sys/dsl_scan.h> 114219089Spjd#include <sys/zio_checksum.h> 115168404Spjd#include <sys/refcount.h> 116236884Smm#include <sys/zfeature.h> 117248571Smm#include <sys/dsl_userhold.h> 118321610Smav#include <sys/abd.h> 119168404Spjd#include <stdio.h> 120168404Spjd#include <stdio_ext.h> 121168404Spjd#include <stdlib.h> 122168404Spjd#include <unistd.h> 123168404Spjd#include <signal.h> 124168404Spjd#include <umem.h> 125168404Spjd#include <dlfcn.h> 126168404Spjd#include <ctype.h> 127168404Spjd#include <math.h> 128168404Spjd#include <errno.h> 129168404Spjd#include <sys/fs/zfs.h> 130219089Spjd#include <libnvpair.h> 131325914Savg#include <libcmdutils.h> 132168404Spjd 133242845Sdelphijstatic int ztest_fd_data = -1; 134242845Sdelphijstatic int ztest_fd_rand = -1; 135168404Spjd 136236143Smmtypedef struct ztest_shared_hdr { 137236143Smm uint64_t zh_hdr_size; 138236143Smm uint64_t zh_opts_size; 139236143Smm uint64_t zh_size; 140236143Smm uint64_t zh_stats_size; 141236143Smm uint64_t zh_stats_count; 142236143Smm uint64_t zh_ds_size; 143236143Smm uint64_t zh_ds_count; 144236143Smm} ztest_shared_hdr_t; 145168404Spjd 146236143Smmstatic ztest_shared_hdr_t *ztest_shared_hdr; 147236143Smm 148236143Smmtypedef struct ztest_shared_opts { 149307108Smav char zo_pool[ZFS_MAX_DATASET_NAME_LEN]; 150307108Smav char zo_dir[ZFS_MAX_DATASET_NAME_LEN]; 151236143Smm char zo_alt_ztest[MAXNAMELEN]; 152236143Smm char zo_alt_libpath[MAXNAMELEN]; 153236143Smm uint64_t zo_vdevs; 154236143Smm uint64_t zo_vdevtime; 155236143Smm size_t zo_vdev_size; 156236143Smm int zo_ashift; 157236143Smm int zo_mirrors; 158236143Smm int zo_raidz; 159236143Smm int zo_raidz_parity; 160236143Smm int zo_datasets; 161236143Smm int zo_threads; 162236143Smm uint64_t zo_passtime; 163236143Smm uint64_t zo_killrate; 164236143Smm int zo_verbose; 165236143Smm int zo_init; 166236143Smm uint64_t zo_time; 167236143Smm uint64_t zo_maxloops; 168332553Smav uint64_t zo_metaslab_force_ganging; 169236143Smm} ztest_shared_opts_t; 170236143Smm 171236143Smmstatic const ztest_shared_opts_t ztest_opts_defaults = { 172236143Smm .zo_pool = { 'z', 't', 'e', 's', 't', '\0' }, 173236143Smm .zo_dir = { '/', 't', 'm', 'p', '\0' }, 174236143Smm .zo_alt_ztest = { '\0' }, 175236143Smm .zo_alt_libpath = { '\0' }, 176236143Smm .zo_vdevs = 5, 177236143Smm .zo_ashift = SPA_MINBLOCKSHIFT, 178236143Smm .zo_mirrors = 2, 179236143Smm .zo_raidz = 4, 180236143Smm .zo_raidz_parity = 1, 181321529Smav .zo_vdev_size = SPA_MINDEVSIZE * 4, /* 256m default size */ 182236143Smm .zo_datasets = 7, 183236143Smm .zo_threads = 23, 184236143Smm .zo_passtime = 60, /* 60 seconds */ 185236143Smm .zo_killrate = 70, /* 70% kill rate */ 186236143Smm .zo_verbose = 0, 187236143Smm .zo_init = 1, 188236143Smm .zo_time = 300, /* 5 minutes */ 189236143Smm .zo_maxloops = 50, /* max loops during spa_freeze() */ 190332553Smav .zo_metaslab_force_ganging = 32 << 10 191236143Smm}; 192236143Smm 193332553Smavextern uint64_t metaslab_force_ganging; 194236143Smmextern uint64_t metaslab_df_alloc_threshold; 195258632Savgextern uint64_t zfs_deadman_synctime_ms; 196268086Sdelphijextern int metaslab_preload_limit; 197307265Smavextern boolean_t zfs_compressed_arc_enabled; 198321610Smavextern boolean_t zfs_abd_scatter_enabled; 199339104Smavextern boolean_t zfs_force_some_double_word_sm_entries; 200236143Smm 201236143Smmstatic ztest_shared_opts_t *ztest_shared_opts; 202236143Smmstatic ztest_shared_opts_t ztest_opts; 203236143Smm 204236143Smmtypedef struct ztest_shared_ds { 205236143Smm uint64_t zd_seq; 206236143Smm} ztest_shared_ds_t; 207236143Smm 208236143Smmstatic ztest_shared_ds_t *ztest_shared_ds; 209236143Smm#define ZTEST_GET_SHARED_DS(d) (&ztest_shared_ds[d]) 210236143Smm 211219089Spjd#define BT_MAGIC 0x123456789abcdefULL 212236143Smm#define MAXFAULTS() \ 213236143Smm (MAX(zs->zs_mirrors, 1) * (ztest_opts.zo_raidz_parity + 1) - 1) 214219089Spjd 215219089Spjdenum ztest_io_type { 216219089Spjd ZTEST_IO_WRITE_TAG, 217219089Spjd ZTEST_IO_WRITE_PATTERN, 218219089Spjd ZTEST_IO_WRITE_ZEROES, 219219089Spjd ZTEST_IO_TRUNCATE, 220219089Spjd ZTEST_IO_SETATTR, 221243524Smm ZTEST_IO_REWRITE, 222219089Spjd ZTEST_IO_TYPES 223219089Spjd}; 224219089Spjd 225185029Spjdtypedef struct ztest_block_tag { 226219089Spjd uint64_t bt_magic; 227185029Spjd uint64_t bt_objset; 228185029Spjd uint64_t bt_object; 229185029Spjd uint64_t bt_offset; 230219089Spjd uint64_t bt_gen; 231185029Spjd uint64_t bt_txg; 232219089Spjd uint64_t bt_crtxg; 233185029Spjd} ztest_block_tag_t; 234185029Spjd 235219089Spjdtypedef struct bufwad { 236219089Spjd uint64_t bw_index; 237219089Spjd uint64_t bw_txg; 238219089Spjd uint64_t bw_data; 239219089Spjd} bufwad_t; 240168404Spjd 241219089Spjd/* 242219089Spjd * XXX -- fix zfs range locks to be generic so we can use them here. 243219089Spjd */ 244219089Spjdtypedef enum { 245219089Spjd RL_READER, 246219089Spjd RL_WRITER, 247219089Spjd RL_APPEND 248219089Spjd} rl_type_t; 249168404Spjd 250219089Spjdtypedef struct rll { 251219089Spjd void *rll_writer; 252219089Spjd int rll_readers; 253332545Smav kmutex_t rll_lock; 254332545Smav kcondvar_t rll_cv; 255219089Spjd} rll_t; 256219089Spjd 257219089Spjdtypedef struct rl { 258219089Spjd uint64_t rl_object; 259219089Spjd uint64_t rl_offset; 260219089Spjd uint64_t rl_size; 261219089Spjd rll_t *rl_lock; 262219089Spjd} rl_t; 263219089Spjd 264219089Spjd#define ZTEST_RANGE_LOCKS 64 265219089Spjd#define ZTEST_OBJECT_LOCKS 64 266219089Spjd 267168404Spjd/* 268219089Spjd * Object descriptor. Used as a template for object lookup/create/remove. 269219089Spjd */ 270219089Spjdtypedef struct ztest_od { 271219089Spjd uint64_t od_dir; 272219089Spjd uint64_t od_object; 273219089Spjd dmu_object_type_t od_type; 274219089Spjd dmu_object_type_t od_crtype; 275219089Spjd uint64_t od_blocksize; 276219089Spjd uint64_t od_crblocksize; 277219089Spjd uint64_t od_gen; 278219089Spjd uint64_t od_crgen; 279307108Smav char od_name[ZFS_MAX_DATASET_NAME_LEN]; 280219089Spjd} ztest_od_t; 281219089Spjd 282219089Spjd/* 283219089Spjd * Per-dataset state. 284219089Spjd */ 285219089Spjdtypedef struct ztest_ds { 286236143Smm ztest_shared_ds_t *zd_shared; 287219089Spjd objset_t *zd_os; 288332545Smav krwlock_t zd_zilog_lock; 289219089Spjd zilog_t *zd_zilog; 290219089Spjd ztest_od_t *zd_od; /* debugging aid */ 291307108Smav char zd_name[ZFS_MAX_DATASET_NAME_LEN]; 292332545Smav kmutex_t zd_dirobj_lock; 293219089Spjd rll_t zd_object_lock[ZTEST_OBJECT_LOCKS]; 294219089Spjd rll_t zd_range_lock[ZTEST_RANGE_LOCKS]; 295219089Spjd} ztest_ds_t; 296219089Spjd 297219089Spjd/* 298219089Spjd * Per-iteration state. 299219089Spjd */ 300219089Spjdtypedef void ztest_func_t(ztest_ds_t *zd, uint64_t id); 301219089Spjd 302219089Spjdtypedef struct ztest_info { 303219089Spjd ztest_func_t *zi_func; /* test function */ 304219089Spjd uint64_t zi_iters; /* iterations per execution */ 305219089Spjd uint64_t *zi_interval; /* execute every <interval> seconds */ 306219089Spjd} ztest_info_t; 307219089Spjd 308236143Smmtypedef struct ztest_shared_callstate { 309236143Smm uint64_t zc_count; /* per-pass count */ 310236143Smm uint64_t zc_time; /* per-pass time */ 311236143Smm uint64_t zc_next; /* next time to call this function */ 312236143Smm} ztest_shared_callstate_t; 313236143Smm 314236143Smmstatic ztest_shared_callstate_t *ztest_shared_callstate; 315236143Smm#define ZTEST_GET_SHARED_CALLSTATE(c) (&ztest_shared_callstate[c]) 316236143Smm 317219089Spjd/* 318168404Spjd * Note: these aren't static because we want dladdr() to work. 319168404Spjd */ 320168404Spjdztest_func_t ztest_dmu_read_write; 321168404Spjdztest_func_t ztest_dmu_write_parallel; 322168404Spjdztest_func_t ztest_dmu_object_alloc_free; 323219089Spjdztest_func_t ztest_dmu_commit_callbacks; 324168404Spjdztest_func_t ztest_zap; 325168404Spjdztest_func_t ztest_zap_parallel; 326219089Spjdztest_func_t ztest_zil_commit; 327224526Smmztest_func_t ztest_zil_remount; 328219089Spjdztest_func_t ztest_dmu_read_write_zcopy; 329168404Spjdztest_func_t ztest_dmu_objset_create_destroy; 330219089Spjdztest_func_t ztest_dmu_prealloc; 331219089Spjdztest_func_t ztest_fzap; 332168404Spjdztest_func_t ztest_dmu_snapshot_create_destroy; 333219089Spjdztest_func_t ztest_dsl_prop_get_set; 334219089Spjdztest_func_t ztest_spa_prop_get_set; 335168404Spjdztest_func_t ztest_spa_create_destroy; 336168404Spjdztest_func_t ztest_fault_inject; 337219089Spjdztest_func_t ztest_ddt_repair; 338219089Spjdztest_func_t ztest_dmu_snapshot_hold; 339185029Spjdztest_func_t ztest_spa_rename; 340219089Spjdztest_func_t ztest_scrub; 341219089Spjdztest_func_t ztest_dsl_dataset_promote_busy; 342168404Spjdztest_func_t ztest_vdev_attach_detach; 343168404Spjdztest_func_t ztest_vdev_LUN_growth; 344168404Spjdztest_func_t ztest_vdev_add_remove; 345185029Spjdztest_func_t ztest_vdev_aux_add_remove; 346219089Spjdztest_func_t ztest_split_pool; 347228103Smmztest_func_t ztest_reguid; 348243505Smmztest_func_t ztest_spa_upgrade; 349332525Smavztest_func_t ztest_device_removal; 350332525Smavztest_func_t ztest_remap_blocks; 351332547Smavztest_func_t ztest_spa_checkpoint_create_discard; 352339111Smavztest_func_t ztest_initialize; 353168404Spjd 354219089Spjduint64_t zopt_always = 0ULL * NANOSEC; /* all the time */ 355219089Spjduint64_t zopt_incessant = 1ULL * NANOSEC / 10; /* every 1/10 second */ 356219089Spjduint64_t zopt_often = 1ULL * NANOSEC; /* every second */ 357219089Spjduint64_t zopt_sometimes = 10ULL * NANOSEC; /* every 10 seconds */ 358219089Spjduint64_t zopt_rarely = 60ULL * NANOSEC; /* every 60 seconds */ 359168404Spjd 360168404Spjdztest_info_t ztest_info[] = { 361185029Spjd { ztest_dmu_read_write, 1, &zopt_always }, 362219089Spjd { ztest_dmu_write_parallel, 10, &zopt_always }, 363185029Spjd { ztest_dmu_object_alloc_free, 1, &zopt_always }, 364219089Spjd { ztest_dmu_commit_callbacks, 1, &zopt_always }, 365185029Spjd { ztest_zap, 30, &zopt_always }, 366185029Spjd { ztest_zap_parallel, 100, &zopt_always }, 367219089Spjd { ztest_split_pool, 1, &zopt_always }, 368219089Spjd { ztest_zil_commit, 1, &zopt_incessant }, 369224526Smm { ztest_zil_remount, 1, &zopt_sometimes }, 370219089Spjd { ztest_dmu_read_write_zcopy, 1, &zopt_often }, 371219089Spjd { ztest_dmu_objset_create_destroy, 1, &zopt_often }, 372219089Spjd { ztest_dsl_prop_get_set, 1, &zopt_often }, 373219089Spjd { ztest_spa_prop_get_set, 1, &zopt_sometimes }, 374219089Spjd#if 0 375219089Spjd { ztest_dmu_prealloc, 1, &zopt_sometimes }, 376219089Spjd#endif 377219089Spjd { ztest_fzap, 1, &zopt_sometimes }, 378219089Spjd { ztest_dmu_snapshot_create_destroy, 1, &zopt_sometimes }, 379219089Spjd { ztest_spa_create_destroy, 1, &zopt_sometimes }, 380339034Ssef { ztest_fault_inject, 1, &zopt_incessant }, 381219089Spjd { ztest_ddt_repair, 1, &zopt_sometimes }, 382219089Spjd { ztest_dmu_snapshot_hold, 1, &zopt_sometimes }, 383254074Sdelphij { ztest_reguid, 1, &zopt_rarely }, 384185029Spjd { ztest_spa_rename, 1, &zopt_rarely }, 385339034Ssef { ztest_scrub, 1, &zopt_often }, 386243505Smm { ztest_spa_upgrade, 1, &zopt_rarely }, 387219089Spjd { ztest_dsl_dataset_promote_busy, 1, &zopt_rarely }, 388339034Ssef { ztest_vdev_attach_detach, 1, &zopt_incessant }, 389185029Spjd { ztest_vdev_LUN_growth, 1, &zopt_rarely }, 390236143Smm { ztest_vdev_add_remove, 1, 391236143Smm &ztest_opts.zo_vdevtime }, 392236143Smm { ztest_vdev_aux_add_remove, 1, 393236143Smm &ztest_opts.zo_vdevtime }, 394332525Smav { ztest_device_removal, 1, &zopt_sometimes }, 395332547Smav { ztest_remap_blocks, 1, &zopt_sometimes }, 396339111Smav { ztest_spa_checkpoint_create_discard, 1, &zopt_rarely }, 397339111Smav { ztest_initialize, 1, &zopt_sometimes } 398168404Spjd}; 399168404Spjd 400168404Spjd#define ZTEST_FUNCS (sizeof (ztest_info) / sizeof (ztest_info_t)) 401168404Spjd 402219089Spjd/* 403219089Spjd * The following struct is used to hold a list of uncalled commit callbacks. 404219089Spjd * The callbacks are ordered by txg number. 405219089Spjd */ 406219089Spjdtypedef struct ztest_cb_list { 407332545Smav kmutex_t zcl_callbacks_lock; 408219089Spjd list_t zcl_callbacks; 409219089Spjd} ztest_cb_list_t; 410168404Spjd 411168404Spjd/* 412168404Spjd * Stuff we need to share writably between parent and child. 413168404Spjd */ 414168404Spjdtypedef struct ztest_shared { 415236143Smm boolean_t zs_do_init; 416219089Spjd hrtime_t zs_proc_start; 417219089Spjd hrtime_t zs_proc_stop; 418219089Spjd hrtime_t zs_thread_start; 419219089Spjd hrtime_t zs_thread_stop; 420219089Spjd hrtime_t zs_thread_kill; 421219089Spjd uint64_t zs_enospc_count; 422219089Spjd uint64_t zs_vdev_next_leaf; 423185029Spjd uint64_t zs_vdev_aux; 424168404Spjd uint64_t zs_alloc; 425168404Spjd uint64_t zs_space; 426219089Spjd uint64_t zs_splits; 427219089Spjd uint64_t zs_mirrors; 428236143Smm uint64_t zs_metaslab_sz; 429236143Smm uint64_t zs_metaslab_df_alloc_threshold; 430236143Smm uint64_t zs_guid; 431168404Spjd} ztest_shared_t; 432168404Spjd 433219089Spjd#define ID_PARALLEL -1ULL 434219089Spjd 435168404Spjdstatic char ztest_dev_template[] = "%s/%s.%llua"; 436185029Spjdstatic char ztest_aux_template[] = "%s/%s.%s.%llu"; 437219089Spjdztest_shared_t *ztest_shared; 438168404Spjd 439236143Smmstatic spa_t *ztest_spa = NULL; 440236143Smmstatic ztest_ds_t *ztest_ds; 441168404Spjd 442332545Smavstatic kmutex_t ztest_vdev_lock; 443332547Smavstatic kmutex_t ztest_checkpoint_lock; 444339106Smavstatic boolean_t ztest_device_removal_active = B_FALSE; 445239620Smm 446239620Smm/* 447239620Smm * The ztest_name_lock protects the pool and dataset namespace used by 448239620Smm * the individual tests. To modify the namespace, consumers must grab 449239620Smm * this lock as writer. Grabbing the lock as reader will ensure that the 450239620Smm * namespace does not change while the lock is held. 451239620Smm */ 452332545Smavstatic krwlock_t ztest_name_lock; 453236143Smm 454236143Smmstatic boolean_t ztest_dump_core = B_TRUE; 455185029Spjdstatic boolean_t ztest_exiting; 456168404Spjd 457219089Spjd/* Global commit callback list */ 458219089Spjdstatic ztest_cb_list_t zcl; 459219089Spjd 460219089Spjdenum ztest_object { 461219089Spjd ZTEST_META_DNODE = 0, 462219089Spjd ZTEST_DIROBJ, 463219089Spjd ZTEST_OBJECTS 464219089Spjd}; 465168404Spjd 466168676Spjdstatic void usage(boolean_t) __NORETURN; 467168498Spjd 468168404Spjd/* 469168404Spjd * These libumem hooks provide a reasonable set of defaults for the allocator's 470168404Spjd * debugging facilities. 471168404Spjd */ 472168404Spjdconst char * 473168404Spjd_umem_debug_init() 474168404Spjd{ 475168404Spjd return ("default,verbose"); /* $UMEM_DEBUG setting */ 476168404Spjd} 477168404Spjd 478168404Spjdconst char * 479168404Spjd_umem_logging_init(void) 480168404Spjd{ 481168404Spjd return ("fail,contents"); /* $UMEM_LOGGING setting */ 482168404Spjd} 483168404Spjd 484168404Spjd#define FATAL_MSG_SZ 1024 485168404Spjd 486168404Spjdchar *fatal_msg; 487168404Spjd 488168404Spjdstatic void 489168404Spjdfatal(int do_perror, char *message, ...) 490168404Spjd{ 491168404Spjd va_list args; 492168404Spjd int save_errno = errno; 493168404Spjd char buf[FATAL_MSG_SZ]; 494168404Spjd 495168404Spjd (void) fflush(stdout); 496168404Spjd 497168404Spjd va_start(args, message); 498168404Spjd (void) sprintf(buf, "ztest: "); 499168404Spjd /* LINTED */ 500168404Spjd (void) vsprintf(buf + strlen(buf), message, args); 501168404Spjd va_end(args); 502168404Spjd if (do_perror) { 503168404Spjd (void) snprintf(buf + strlen(buf), FATAL_MSG_SZ - strlen(buf), 504168404Spjd ": %s", strerror(save_errno)); 505168404Spjd } 506168404Spjd (void) fprintf(stderr, "%s\n", buf); 507168404Spjd fatal_msg = buf; /* to ease debugging */ 508168404Spjd if (ztest_dump_core) 509168404Spjd abort(); 510168404Spjd exit(3); 511168404Spjd} 512168404Spjd 513168404Spjdstatic int 514168404Spjdstr2shift(const char *buf) 515168404Spjd{ 516168404Spjd const char *ends = "BKMGTPEZ"; 517168404Spjd int i; 518168404Spjd 519168404Spjd if (buf[0] == '\0') 520168404Spjd return (0); 521168404Spjd for (i = 0; i < strlen(ends); i++) { 522168404Spjd if (toupper(buf[0]) == ends[i]) 523168404Spjd break; 524168404Spjd } 525168498Spjd if (i == strlen(ends)) { 526168498Spjd (void) fprintf(stderr, "ztest: invalid bytes suffix: %s\n", 527168498Spjd buf); 528168498Spjd usage(B_FALSE); 529168498Spjd } 530168404Spjd if (buf[1] == '\0' || (toupper(buf[1]) == 'B' && buf[2] == '\0')) { 531168404Spjd return (10*i); 532168404Spjd } 533168498Spjd (void) fprintf(stderr, "ztest: invalid bytes suffix: %s\n", buf); 534168498Spjd usage(B_FALSE); 535168498Spjd /* NOTREACHED */ 536168404Spjd} 537168404Spjd 538168404Spjdstatic uint64_t 539168404Spjdnicenumtoull(const char *buf) 540168404Spjd{ 541168404Spjd char *end; 542168404Spjd uint64_t val; 543168404Spjd 544168404Spjd val = strtoull(buf, &end, 0); 545168404Spjd if (end == buf) { 546168498Spjd (void) fprintf(stderr, "ztest: bad numeric value: %s\n", buf); 547168498Spjd usage(B_FALSE); 548168404Spjd } else if (end[0] == '.') { 549168404Spjd double fval = strtod(buf, &end); 550168404Spjd fval *= pow(2, str2shift(end)); 551168498Spjd if (fval > UINT64_MAX) { 552168498Spjd (void) fprintf(stderr, "ztest: value too large: %s\n", 553168498Spjd buf); 554168498Spjd usage(B_FALSE); 555168498Spjd } 556168404Spjd val = (uint64_t)fval; 557168404Spjd } else { 558168404Spjd int shift = str2shift(end); 559168498Spjd if (shift >= 64 || (val << shift) >> shift != val) { 560168498Spjd (void) fprintf(stderr, "ztest: value too large: %s\n", 561168498Spjd buf); 562168498Spjd usage(B_FALSE); 563168498Spjd } 564168404Spjd val <<= shift; 565168404Spjd } 566168404Spjd return (val); 567168404Spjd} 568168404Spjd 569168404Spjdstatic void 570168498Spjdusage(boolean_t requested) 571168404Spjd{ 572236143Smm const ztest_shared_opts_t *zo = &ztest_opts_defaults; 573236143Smm 574325914Savg char nice_vdev_size[NN_NUMBUF_SZ]; 575332553Smav char nice_force_ganging[NN_NUMBUF_SZ]; 576168498Spjd FILE *fp = requested ? stdout : stderr; 577168404Spjd 578325914Savg nicenum(zo->zo_vdev_size, nice_vdev_size, sizeof (nice_vdev_size)); 579332553Smav nicenum(zo->zo_metaslab_force_ganging, nice_force_ganging, 580332553Smav sizeof (nice_force_ganging)); 581168404Spjd 582168498Spjd (void) fprintf(fp, "Usage: %s\n" 583168404Spjd "\t[-v vdevs (default: %llu)]\n" 584168404Spjd "\t[-s size_of_each_vdev (default: %s)]\n" 585219089Spjd "\t[-a alignment_shift (default: %d)] use 0 for random\n" 586168404Spjd "\t[-m mirror_copies (default: %d)]\n" 587168404Spjd "\t[-r raidz_disks (default: %d)]\n" 588168404Spjd "\t[-R raidz_parity (default: %d)]\n" 589168404Spjd "\t[-d datasets (default: %d)]\n" 590168404Spjd "\t[-t threads (default: %d)]\n" 591168404Spjd "\t[-g gang_block_threshold (default: %s)]\n" 592219089Spjd "\t[-i init_count (default: %d)] initialize pool i times\n" 593219089Spjd "\t[-k kill_percentage (default: %llu%%)]\n" 594168404Spjd "\t[-p pool_name (default: %s)]\n" 595219089Spjd "\t[-f dir (default: %s)] file directory for vdev files\n" 596219089Spjd "\t[-V] verbose (use multiple times for ever more blather)\n" 597219089Spjd "\t[-E] use existing pool instead of creating new one\n" 598219089Spjd "\t[-T time (default: %llu sec)] total run time\n" 599219089Spjd "\t[-F freezeloops (default: %llu)] max loops in spa_freeze()\n" 600219089Spjd "\t[-P passtime (default: %llu sec)] time per pass\n" 601236143Smm "\t[-B alt_ztest (default: <none>)] alternate ztest path\n" 602324973Sasomers "\t[-o variable=value] ... set global variable to an unsigned\n" 603324973Sasomers "\t 32-bit integer value\n" 604168498Spjd "\t[-h] (print help)\n" 605168404Spjd "", 606236143Smm zo->zo_pool, 607236143Smm (u_longlong_t)zo->zo_vdevs, /* -v */ 608185029Spjd nice_vdev_size, /* -s */ 609236143Smm zo->zo_ashift, /* -a */ 610236143Smm zo->zo_mirrors, /* -m */ 611236143Smm zo->zo_raidz, /* -r */ 612236143Smm zo->zo_raidz_parity, /* -R */ 613236143Smm zo->zo_datasets, /* -d */ 614236143Smm zo->zo_threads, /* -t */ 615332553Smav nice_force_ganging, /* -g */ 616236143Smm zo->zo_init, /* -i */ 617236143Smm (u_longlong_t)zo->zo_killrate, /* -k */ 618236143Smm zo->zo_pool, /* -p */ 619236143Smm zo->zo_dir, /* -f */ 620236143Smm (u_longlong_t)zo->zo_time, /* -T */ 621236143Smm (u_longlong_t)zo->zo_maxloops, /* -F */ 622236143Smm (u_longlong_t)zo->zo_passtime); 623168498Spjd exit(requested ? 0 : 1); 624168404Spjd} 625168404Spjd 626168404Spjdstatic void 627168404Spjdprocess_options(int argc, char **argv) 628168404Spjd{ 629236143Smm char *path; 630236143Smm ztest_shared_opts_t *zo = &ztest_opts; 631236143Smm 632168404Spjd int opt; 633168404Spjd uint64_t value; 634236143Smm char altdir[MAXNAMELEN] = { 0 }; 635168404Spjd 636236143Smm bcopy(&ztest_opts_defaults, zo, sizeof (*zo)); 637168404Spjd 638168404Spjd while ((opt = getopt(argc, argv, 639324973Sasomers "v:s:a:m:r:R:d:t:g:i:k:p:f:VET:P:hF:B:o:")) != EOF) { 640168404Spjd value = 0; 641168404Spjd switch (opt) { 642185029Spjd case 'v': 643185029Spjd case 's': 644185029Spjd case 'a': 645185029Spjd case 'm': 646185029Spjd case 'r': 647185029Spjd case 'R': 648185029Spjd case 'd': 649185029Spjd case 't': 650185029Spjd case 'g': 651185029Spjd case 'i': 652185029Spjd case 'k': 653185029Spjd case 'T': 654185029Spjd case 'P': 655219089Spjd case 'F': 656168404Spjd value = nicenumtoull(optarg); 657168404Spjd } 658168404Spjd switch (opt) { 659185029Spjd case 'v': 660236143Smm zo->zo_vdevs = value; 661168404Spjd break; 662185029Spjd case 's': 663236143Smm zo->zo_vdev_size = MAX(SPA_MINDEVSIZE, value); 664168404Spjd break; 665185029Spjd case 'a': 666236143Smm zo->zo_ashift = value; 667168404Spjd break; 668185029Spjd case 'm': 669236143Smm zo->zo_mirrors = value; 670168404Spjd break; 671185029Spjd case 'r': 672236143Smm zo->zo_raidz = MAX(1, value); 673168404Spjd break; 674185029Spjd case 'R': 675236143Smm zo->zo_raidz_parity = MIN(MAX(value, 1), 3); 676168404Spjd break; 677185029Spjd case 'd': 678236143Smm zo->zo_datasets = MAX(1, value); 679168404Spjd break; 680185029Spjd case 't': 681236143Smm zo->zo_threads = MAX(1, value); 682168404Spjd break; 683185029Spjd case 'g': 684332553Smav zo->zo_metaslab_force_ganging = 685332553Smav MAX(SPA_MINBLOCKSIZE << 1, value); 686168404Spjd break; 687185029Spjd case 'i': 688236143Smm zo->zo_init = value; 689168404Spjd break; 690185029Spjd case 'k': 691236143Smm zo->zo_killrate = value; 692168404Spjd break; 693185029Spjd case 'p': 694236143Smm (void) strlcpy(zo->zo_pool, optarg, 695236143Smm sizeof (zo->zo_pool)); 696168404Spjd break; 697185029Spjd case 'f': 698236143Smm path = realpath(optarg, NULL); 699236143Smm if (path == NULL) { 700236143Smm (void) fprintf(stderr, "error: %s: %s\n", 701236143Smm optarg, strerror(errno)); 702236143Smm usage(B_FALSE); 703236143Smm } else { 704236143Smm (void) strlcpy(zo->zo_dir, path, 705236143Smm sizeof (zo->zo_dir)); 706236143Smm } 707168404Spjd break; 708185029Spjd case 'V': 709236143Smm zo->zo_verbose++; 710168404Spjd break; 711185029Spjd case 'E': 712236143Smm zo->zo_init = 0; 713168404Spjd break; 714185029Spjd case 'T': 715236143Smm zo->zo_time = value; 716168404Spjd break; 717185029Spjd case 'P': 718236143Smm zo->zo_passtime = MAX(1, value); 719168404Spjd break; 720219089Spjd case 'F': 721236143Smm zo->zo_maxloops = MAX(1, value); 722219089Spjd break; 723236143Smm case 'B': 724236143Smm (void) strlcpy(altdir, optarg, sizeof (altdir)); 725236143Smm break; 726324973Sasomers case 'o': 727324973Sasomers if (set_global_var(optarg) != 0) 728324973Sasomers usage(B_FALSE); 729324973Sasomers break; 730185029Spjd case 'h': 731168498Spjd usage(B_TRUE); 732168498Spjd break; 733185029Spjd case '?': 734185029Spjd default: 735168498Spjd usage(B_FALSE); 736168404Spjd break; 737168404Spjd } 738168404Spjd } 739168404Spjd 740236143Smm zo->zo_raidz_parity = MIN(zo->zo_raidz_parity, zo->zo_raidz - 1); 741168404Spjd 742236143Smm zo->zo_vdevtime = 743236143Smm (zo->zo_vdevs > 0 ? zo->zo_time * NANOSEC / zo->zo_vdevs : 744219089Spjd UINT64_MAX >> 2); 745236143Smm 746236143Smm if (strlen(altdir) > 0) { 747242845Sdelphij char *cmd; 748242845Sdelphij char *realaltdir; 749236143Smm char *bin; 750236143Smm char *ztest; 751236143Smm char *isa; 752236143Smm int isalen; 753236143Smm 754242845Sdelphij cmd = umem_alloc(MAXPATHLEN, UMEM_NOFAIL); 755242845Sdelphij realaltdir = umem_alloc(MAXPATHLEN, UMEM_NOFAIL); 756242845Sdelphij 757242845Sdelphij VERIFY(NULL != realpath(getexecname(), cmd)); 758236143Smm if (0 != access(altdir, F_OK)) { 759236143Smm ztest_dump_core = B_FALSE; 760236143Smm fatal(B_TRUE, "invalid alternate ztest path: %s", 761236143Smm altdir); 762236143Smm } 763236143Smm VERIFY(NULL != realpath(altdir, realaltdir)); 764236143Smm 765236143Smm /* 766236143Smm * 'cmd' should be of the form "<anything>/usr/bin/<isa>/ztest". 767236143Smm * We want to extract <isa> to determine if we should use 768236143Smm * 32 or 64 bit binaries. 769236143Smm */ 770236143Smm bin = strstr(cmd, "/usr/bin/"); 771236143Smm ztest = strstr(bin, "/ztest"); 772236143Smm isa = bin + 9; 773236143Smm isalen = ztest - isa; 774236143Smm (void) snprintf(zo->zo_alt_ztest, sizeof (zo->zo_alt_ztest), 775236143Smm "%s/usr/bin/%.*s/ztest", realaltdir, isalen, isa); 776236143Smm (void) snprintf(zo->zo_alt_libpath, sizeof (zo->zo_alt_libpath), 777236143Smm "%s/usr/lib/%.*s", realaltdir, isalen, isa); 778236143Smm 779236143Smm if (0 != access(zo->zo_alt_ztest, X_OK)) { 780236143Smm ztest_dump_core = B_FALSE; 781236143Smm fatal(B_TRUE, "invalid alternate ztest: %s", 782236143Smm zo->zo_alt_ztest); 783236143Smm } else if (0 != access(zo->zo_alt_libpath, X_OK)) { 784236143Smm ztest_dump_core = B_FALSE; 785236143Smm fatal(B_TRUE, "invalid alternate lib directory %s", 786236143Smm zo->zo_alt_libpath); 787236143Smm } 788242845Sdelphij 789242845Sdelphij umem_free(cmd, MAXPATHLEN); 790242845Sdelphij umem_free(realaltdir, MAXPATHLEN); 791236143Smm } 792168404Spjd} 793168404Spjd 794219089Spjdstatic void 795219089Spjdztest_kill(ztest_shared_t *zs) 796219089Spjd{ 797236143Smm zs->zs_alloc = metaslab_class_get_alloc(spa_normal_class(ztest_spa)); 798236143Smm zs->zs_space = metaslab_class_get_space(spa_normal_class(ztest_spa)); 799254112Sdelphij 800254112Sdelphij /* 801254112Sdelphij * Before we kill off ztest, make sure that the config is updated. 802332525Smav * See comment above spa_write_cachefile(). 803254112Sdelphij */ 804254112Sdelphij mutex_enter(&spa_namespace_lock); 805332525Smav spa_write_cachefile(ztest_spa, B_FALSE, B_FALSE); 806254112Sdelphij mutex_exit(&spa_namespace_lock); 807254112Sdelphij 808254112Sdelphij zfs_dbgmsg_print(FTAG); 809219089Spjd (void) kill(getpid(), SIGKILL); 810219089Spjd} 811219089Spjd 812168404Spjdstatic uint64_t 813219089Spjdztest_random(uint64_t range) 814219089Spjd{ 815219089Spjd uint64_t r; 816219089Spjd 817242845Sdelphij ASSERT3S(ztest_fd_rand, >=, 0); 818242845Sdelphij 819219089Spjd if (range == 0) 820219089Spjd return (0); 821219089Spjd 822242845Sdelphij if (read(ztest_fd_rand, &r, sizeof (r)) != sizeof (r)) 823219089Spjd fatal(1, "short read from /dev/urandom"); 824219089Spjd 825219089Spjd return (r % range); 826219089Spjd} 827219089Spjd 828219089Spjd/* ARGSUSED */ 829219089Spjdstatic void 830219089Spjdztest_record_enospc(const char *s) 831219089Spjd{ 832219089Spjd ztest_shared->zs_enospc_count++; 833219089Spjd} 834219089Spjd 835219089Spjdstatic uint64_t 836168404Spjdztest_get_ashift(void) 837168404Spjd{ 838236143Smm if (ztest_opts.zo_ashift == 0) 839268855Sdelphij return (SPA_MINBLOCKSHIFT + ztest_random(5)); 840236143Smm return (ztest_opts.zo_ashift); 841168404Spjd} 842168404Spjd 843168404Spjdstatic nvlist_t * 844243505Smmmake_vdev_file(char *path, char *aux, char *pool, size_t size, uint64_t ashift) 845168404Spjd{ 846185029Spjd char pathbuf[MAXPATHLEN]; 847168404Spjd uint64_t vdev; 848168404Spjd nvlist_t *file; 849168404Spjd 850185029Spjd if (ashift == 0) 851185029Spjd ashift = ztest_get_ashift(); 852168404Spjd 853185029Spjd if (path == NULL) { 854185029Spjd path = pathbuf; 855185029Spjd 856185029Spjd if (aux != NULL) { 857185029Spjd vdev = ztest_shared->zs_vdev_aux; 858236143Smm (void) snprintf(path, sizeof (pathbuf), 859236143Smm ztest_aux_template, ztest_opts.zo_dir, 860243505Smm pool == NULL ? ztest_opts.zo_pool : pool, 861243505Smm aux, vdev); 862185029Spjd } else { 863219089Spjd vdev = ztest_shared->zs_vdev_next_leaf++; 864236143Smm (void) snprintf(path, sizeof (pathbuf), 865236143Smm ztest_dev_template, ztest_opts.zo_dir, 866243505Smm pool == NULL ? ztest_opts.zo_pool : pool, vdev); 867185029Spjd } 868185029Spjd } 869185029Spjd 870185029Spjd if (size != 0) { 871185029Spjd int fd = open(path, O_RDWR | O_CREAT | O_TRUNC, 0666); 872168404Spjd if (fd == -1) 873185029Spjd fatal(1, "can't open %s", path); 874168404Spjd if (ftruncate(fd, size) != 0) 875185029Spjd fatal(1, "can't ftruncate %s", path); 876168404Spjd (void) close(fd); 877168404Spjd } 878168404Spjd 879168404Spjd VERIFY(nvlist_alloc(&file, NV_UNIQUE_NAME, 0) == 0); 880168404Spjd VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_TYPE, VDEV_TYPE_FILE) == 0); 881185029Spjd VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_PATH, path) == 0); 882168404Spjd VERIFY(nvlist_add_uint64(file, ZPOOL_CONFIG_ASHIFT, ashift) == 0); 883168404Spjd 884168404Spjd return (file); 885168404Spjd} 886168404Spjd 887168404Spjdstatic nvlist_t * 888243505Smmmake_vdev_raidz(char *path, char *aux, char *pool, size_t size, 889243505Smm uint64_t ashift, int r) 890168404Spjd{ 891168404Spjd nvlist_t *raidz, **child; 892168404Spjd int c; 893168404Spjd 894168404Spjd if (r < 2) 895243505Smm return (make_vdev_file(path, aux, pool, size, ashift)); 896168404Spjd child = umem_alloc(r * sizeof (nvlist_t *), UMEM_NOFAIL); 897168404Spjd 898168404Spjd for (c = 0; c < r; c++) 899243505Smm child[c] = make_vdev_file(path, aux, pool, size, ashift); 900168404Spjd 901168404Spjd VERIFY(nvlist_alloc(&raidz, NV_UNIQUE_NAME, 0) == 0); 902168404Spjd VERIFY(nvlist_add_string(raidz, ZPOOL_CONFIG_TYPE, 903168404Spjd VDEV_TYPE_RAIDZ) == 0); 904168404Spjd VERIFY(nvlist_add_uint64(raidz, ZPOOL_CONFIG_NPARITY, 905236143Smm ztest_opts.zo_raidz_parity) == 0); 906168404Spjd VERIFY(nvlist_add_nvlist_array(raidz, ZPOOL_CONFIG_CHILDREN, 907168404Spjd child, r) == 0); 908168404Spjd 909168404Spjd for (c = 0; c < r; c++) 910168404Spjd nvlist_free(child[c]); 911168404Spjd 912168404Spjd umem_free(child, r * sizeof (nvlist_t *)); 913168404Spjd 914168404Spjd return (raidz); 915168404Spjd} 916168404Spjd 917168404Spjdstatic nvlist_t * 918243505Smmmake_vdev_mirror(char *path, char *aux, char *pool, size_t size, 919243505Smm uint64_t ashift, int r, int m) 920168404Spjd{ 921168404Spjd nvlist_t *mirror, **child; 922168404Spjd int c; 923168404Spjd 924168404Spjd if (m < 1) 925243505Smm return (make_vdev_raidz(path, aux, pool, size, ashift, r)); 926168404Spjd 927168404Spjd child = umem_alloc(m * sizeof (nvlist_t *), UMEM_NOFAIL); 928168404Spjd 929168404Spjd for (c = 0; c < m; c++) 930243505Smm child[c] = make_vdev_raidz(path, aux, pool, size, ashift, r); 931168404Spjd 932168404Spjd VERIFY(nvlist_alloc(&mirror, NV_UNIQUE_NAME, 0) == 0); 933168404Spjd VERIFY(nvlist_add_string(mirror, ZPOOL_CONFIG_TYPE, 934168404Spjd VDEV_TYPE_MIRROR) == 0); 935168404Spjd VERIFY(nvlist_add_nvlist_array(mirror, ZPOOL_CONFIG_CHILDREN, 936168404Spjd child, m) == 0); 937168404Spjd 938168404Spjd for (c = 0; c < m; c++) 939168404Spjd nvlist_free(child[c]); 940168404Spjd 941168404Spjd umem_free(child, m * sizeof (nvlist_t *)); 942168404Spjd 943168404Spjd return (mirror); 944168404Spjd} 945168404Spjd 946168404Spjdstatic nvlist_t * 947243505Smmmake_vdev_root(char *path, char *aux, char *pool, size_t size, uint64_t ashift, 948243505Smm int log, int r, int m, int t) 949168404Spjd{ 950168404Spjd nvlist_t *root, **child; 951168404Spjd int c; 952168404Spjd 953168404Spjd ASSERT(t > 0); 954168404Spjd 955168404Spjd child = umem_alloc(t * sizeof (nvlist_t *), UMEM_NOFAIL); 956168404Spjd 957185029Spjd for (c = 0; c < t; c++) { 958243505Smm child[c] = make_vdev_mirror(path, aux, pool, size, ashift, 959243505Smm r, m); 960185029Spjd VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_IS_LOG, 961185029Spjd log) == 0); 962185029Spjd } 963168404Spjd 964168404Spjd VERIFY(nvlist_alloc(&root, NV_UNIQUE_NAME, 0) == 0); 965168404Spjd VERIFY(nvlist_add_string(root, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) == 0); 966185029Spjd VERIFY(nvlist_add_nvlist_array(root, aux ? aux : ZPOOL_CONFIG_CHILDREN, 967168404Spjd child, t) == 0); 968168404Spjd 969168404Spjd for (c = 0; c < t; c++) 970168404Spjd nvlist_free(child[c]); 971168404Spjd 972168404Spjd umem_free(child, t * sizeof (nvlist_t *)); 973168404Spjd 974168404Spjd return (root); 975168404Spjd} 976168404Spjd 977243505Smm/* 978243505Smm * Find a random spa version. Returns back a random spa version in the 979243505Smm * range [initial_version, SPA_VERSION_FEATURES]. 980243505Smm */ 981243505Smmstatic uint64_t 982243505Smmztest_random_spa_version(uint64_t initial_version) 983243505Smm{ 984243505Smm uint64_t version = initial_version; 985243505Smm 986243505Smm if (version <= SPA_VERSION_BEFORE_FEATURES) { 987243505Smm version = version + 988243505Smm ztest_random(SPA_VERSION_BEFORE_FEATURES - version + 1); 989243505Smm } 990243505Smm 991243505Smm if (version > SPA_VERSION_BEFORE_FEATURES) 992243505Smm version = SPA_VERSION_FEATURES; 993243505Smm 994243505Smm ASSERT(SPA_VERSION_IS_SUPPORTED(version)); 995243505Smm return (version); 996243505Smm} 997243505Smm 998219089Spjdstatic int 999219089Spjdztest_random_blocksize(void) 1000219089Spjd{ 1001274337Sdelphij uint64_t block_shift; 1002274337Sdelphij /* 1003274337Sdelphij * Choose a block size >= the ashift. 1004274337Sdelphij * If the SPA supports new MAXBLOCKSIZE, test up to 1MB blocks. 1005274337Sdelphij */ 1006274337Sdelphij int maxbs = SPA_OLD_MAXBLOCKSHIFT; 1007274337Sdelphij if (spa_maxblocksize(ztest_spa) == SPA_MAXBLOCKSIZE) 1008274337Sdelphij maxbs = 20; 1009284304Savg block_shift = ztest_random(maxbs - ztest_spa->spa_max_ashift + 1); 1010268855Sdelphij return (1 << (SPA_MINBLOCKSHIFT + block_shift)); 1011219089Spjd} 1012219089Spjd 1013219089Spjdstatic int 1014219089Spjdztest_random_ibshift(void) 1015219089Spjd{ 1016219089Spjd return (DN_MIN_INDBLKSHIFT + 1017219089Spjd ztest_random(DN_MAX_INDBLKSHIFT - DN_MIN_INDBLKSHIFT + 1)); 1018219089Spjd} 1019219089Spjd 1020219089Spjdstatic uint64_t 1021219089Spjdztest_random_vdev_top(spa_t *spa, boolean_t log_ok) 1022219089Spjd{ 1023219089Spjd uint64_t top; 1024219089Spjd vdev_t *rvd = spa->spa_root_vdev; 1025219089Spjd vdev_t *tvd; 1026219089Spjd 1027219089Spjd ASSERT(spa_config_held(spa, SCL_ALL, RW_READER) != 0); 1028219089Spjd 1029219089Spjd do { 1030219089Spjd top = ztest_random(rvd->vdev_children); 1031219089Spjd tvd = rvd->vdev_child[top]; 1032332525Smav } while (!vdev_is_concrete(tvd) || (tvd->vdev_islog && !log_ok) || 1033219089Spjd tvd->vdev_mg == NULL || tvd->vdev_mg->mg_class == NULL); 1034219089Spjd 1035219089Spjd return (top); 1036219089Spjd} 1037219089Spjd 1038219089Spjdstatic uint64_t 1039219089Spjdztest_random_dsl_prop(zfs_prop_t prop) 1040219089Spjd{ 1041219089Spjd uint64_t value; 1042219089Spjd 1043219089Spjd do { 1044219089Spjd value = zfs_prop_random_value(prop, ztest_random(-1ULL)); 1045219089Spjd } while (prop == ZFS_PROP_CHECKSUM && value == ZIO_CHECKSUM_OFF); 1046219089Spjd 1047219089Spjd return (value); 1048219089Spjd} 1049219089Spjd 1050219089Spjdstatic int 1051219089Spjdztest_dsl_prop_set_uint64(char *osname, zfs_prop_t prop, uint64_t value, 1052219089Spjd boolean_t inherit) 1053219089Spjd{ 1054219089Spjd const char *propname = zfs_prop_to_name(prop); 1055219089Spjd const char *valname; 1056219089Spjd char setpoint[MAXPATHLEN]; 1057219089Spjd uint64_t curval; 1058219089Spjd int error; 1059219089Spjd 1060248571Smm error = dsl_prop_set_int(osname, propname, 1061248571Smm (inherit ? ZPROP_SRC_NONE : ZPROP_SRC_LOCAL), value); 1062219089Spjd 1063219089Spjd if (error == ENOSPC) { 1064219089Spjd ztest_record_enospc(FTAG); 1065219089Spjd return (error); 1066219089Spjd } 1067240415Smm ASSERT0(error); 1068219089Spjd 1069248571Smm VERIFY0(dsl_prop_get_integer(osname, propname, &curval, setpoint)); 1070219089Spjd 1071236143Smm if (ztest_opts.zo_verbose >= 6) { 1072219089Spjd VERIFY(zfs_prop_index_to_string(prop, curval, &valname) == 0); 1073219089Spjd (void) printf("%s %s = %s at '%s'\n", 1074219089Spjd osname, propname, valname, setpoint); 1075219089Spjd } 1076219089Spjd 1077219089Spjd return (error); 1078219089Spjd} 1079219089Spjd 1080219089Spjdstatic int 1081236143Smmztest_spa_prop_set_uint64(zpool_prop_t prop, uint64_t value) 1082219089Spjd{ 1083236143Smm spa_t *spa = ztest_spa; 1084219089Spjd nvlist_t *props = NULL; 1085219089Spjd int error; 1086219089Spjd 1087219089Spjd VERIFY(nvlist_alloc(&props, NV_UNIQUE_NAME, 0) == 0); 1088219089Spjd VERIFY(nvlist_add_uint64(props, zpool_prop_to_name(prop), value) == 0); 1089219089Spjd 1090219089Spjd error = spa_prop_set(spa, props); 1091219089Spjd 1092219089Spjd nvlist_free(props); 1093219089Spjd 1094219089Spjd if (error == ENOSPC) { 1095219089Spjd ztest_record_enospc(FTAG); 1096219089Spjd return (error); 1097219089Spjd } 1098240415Smm ASSERT0(error); 1099219089Spjd 1100219089Spjd return (error); 1101219089Spjd} 1102219089Spjd 1103168404Spjdstatic void 1104219089Spjdztest_rll_init(rll_t *rll) 1105168404Spjd{ 1106219089Spjd rll->rll_writer = NULL; 1107219089Spjd rll->rll_readers = 0; 1108332545Smav mutex_init(&rll->rll_lock, NULL, USYNC_THREAD, NULL); 1109332545Smav cv_init(&rll->rll_cv, NULL, USYNC_THREAD, NULL); 1110219089Spjd} 1111219089Spjd 1112219089Spjdstatic void 1113219089Spjdztest_rll_destroy(rll_t *rll) 1114219089Spjd{ 1115219089Spjd ASSERT(rll->rll_writer == NULL); 1116219089Spjd ASSERT(rll->rll_readers == 0); 1117332545Smav mutex_destroy(&rll->rll_lock); 1118332545Smav cv_destroy(&rll->rll_cv); 1119219089Spjd} 1120219089Spjd 1121219089Spjdstatic void 1122219089Spjdztest_rll_lock(rll_t *rll, rl_type_t type) 1123219089Spjd{ 1124332545Smav mutex_enter(&rll->rll_lock); 1125219089Spjd 1126219089Spjd if (type == RL_READER) { 1127219089Spjd while (rll->rll_writer != NULL) 1128332545Smav cv_wait(&rll->rll_cv, &rll->rll_lock); 1129219089Spjd rll->rll_readers++; 1130219089Spjd } else { 1131219089Spjd while (rll->rll_writer != NULL || rll->rll_readers) 1132332545Smav cv_wait(&rll->rll_cv, &rll->rll_lock); 1133219089Spjd rll->rll_writer = curthread; 1134219089Spjd } 1135219089Spjd 1136332545Smav mutex_exit(&rll->rll_lock); 1137219089Spjd} 1138219089Spjd 1139219089Spjdstatic void 1140219089Spjdztest_rll_unlock(rll_t *rll) 1141219089Spjd{ 1142332545Smav mutex_enter(&rll->rll_lock); 1143219089Spjd 1144219089Spjd if (rll->rll_writer) { 1145219089Spjd ASSERT(rll->rll_readers == 0); 1146219089Spjd rll->rll_writer = NULL; 1147219089Spjd } else { 1148219089Spjd ASSERT(rll->rll_readers != 0); 1149219089Spjd ASSERT(rll->rll_writer == NULL); 1150219089Spjd rll->rll_readers--; 1151219089Spjd } 1152219089Spjd 1153219089Spjd if (rll->rll_writer == NULL && rll->rll_readers == 0) 1154332545Smav cv_broadcast(&rll->rll_cv); 1155219089Spjd 1156332545Smav mutex_exit(&rll->rll_lock); 1157219089Spjd} 1158219089Spjd 1159219089Spjdstatic void 1160219089Spjdztest_object_lock(ztest_ds_t *zd, uint64_t object, rl_type_t type) 1161219089Spjd{ 1162219089Spjd rll_t *rll = &zd->zd_object_lock[object & (ZTEST_OBJECT_LOCKS - 1)]; 1163219089Spjd 1164219089Spjd ztest_rll_lock(rll, type); 1165219089Spjd} 1166219089Spjd 1167219089Spjdstatic void 1168219089Spjdztest_object_unlock(ztest_ds_t *zd, uint64_t object) 1169219089Spjd{ 1170219089Spjd rll_t *rll = &zd->zd_object_lock[object & (ZTEST_OBJECT_LOCKS - 1)]; 1171219089Spjd 1172219089Spjd ztest_rll_unlock(rll); 1173219089Spjd} 1174219089Spjd 1175219089Spjdstatic rl_t * 1176219089Spjdztest_range_lock(ztest_ds_t *zd, uint64_t object, uint64_t offset, 1177219089Spjd uint64_t size, rl_type_t type) 1178219089Spjd{ 1179219089Spjd uint64_t hash = object ^ (offset % (ZTEST_RANGE_LOCKS + 1)); 1180219089Spjd rll_t *rll = &zd->zd_range_lock[hash & (ZTEST_RANGE_LOCKS - 1)]; 1181219089Spjd rl_t *rl; 1182219089Spjd 1183219089Spjd rl = umem_alloc(sizeof (*rl), UMEM_NOFAIL); 1184219089Spjd rl->rl_object = object; 1185219089Spjd rl->rl_offset = offset; 1186219089Spjd rl->rl_size = size; 1187219089Spjd rl->rl_lock = rll; 1188219089Spjd 1189219089Spjd ztest_rll_lock(rll, type); 1190219089Spjd 1191219089Spjd return (rl); 1192219089Spjd} 1193219089Spjd 1194219089Spjdstatic void 1195219089Spjdztest_range_unlock(rl_t *rl) 1196219089Spjd{ 1197219089Spjd rll_t *rll = rl->rl_lock; 1198219089Spjd 1199219089Spjd ztest_rll_unlock(rll); 1200219089Spjd 1201219089Spjd umem_free(rl, sizeof (*rl)); 1202219089Spjd} 1203219089Spjd 1204219089Spjdstatic void 1205236143Smmztest_zd_init(ztest_ds_t *zd, ztest_shared_ds_t *szd, objset_t *os) 1206219089Spjd{ 1207219089Spjd zd->zd_os = os; 1208219089Spjd zd->zd_zilog = dmu_objset_zil(os); 1209236143Smm zd->zd_shared = szd; 1210219089Spjd dmu_objset_name(os, zd->zd_name); 1211219089Spjd 1212236143Smm if (zd->zd_shared != NULL) 1213236143Smm zd->zd_shared->zd_seq = 0; 1214236143Smm 1215332545Smav rw_init(&zd->zd_zilog_lock, NULL, USYNC_THREAD, NULL); 1216332545Smav mutex_init(&zd->zd_dirobj_lock, NULL, USYNC_THREAD, NULL); 1217219089Spjd 1218219089Spjd for (int l = 0; l < ZTEST_OBJECT_LOCKS; l++) 1219219089Spjd ztest_rll_init(&zd->zd_object_lock[l]); 1220219089Spjd 1221219089Spjd for (int l = 0; l < ZTEST_RANGE_LOCKS; l++) 1222219089Spjd ztest_rll_init(&zd->zd_range_lock[l]); 1223219089Spjd} 1224219089Spjd 1225219089Spjdstatic void 1226219089Spjdztest_zd_fini(ztest_ds_t *zd) 1227219089Spjd{ 1228332545Smav mutex_destroy(&zd->zd_dirobj_lock); 1229219089Spjd 1230219089Spjd for (int l = 0; l < ZTEST_OBJECT_LOCKS; l++) 1231219089Spjd ztest_rll_destroy(&zd->zd_object_lock[l]); 1232219089Spjd 1233219089Spjd for (int l = 0; l < ZTEST_RANGE_LOCKS; l++) 1234219089Spjd ztest_rll_destroy(&zd->zd_range_lock[l]); 1235219089Spjd} 1236219089Spjd 1237219089Spjd#define TXG_MIGHTWAIT (ztest_random(10) == 0 ? TXG_NOWAIT : TXG_WAIT) 1238219089Spjd 1239219089Spjdstatic uint64_t 1240219089Spjdztest_tx_assign(dmu_tx_t *tx, uint64_t txg_how, const char *tag) 1241219089Spjd{ 1242219089Spjd uint64_t txg; 1243168404Spjd int error; 1244168404Spjd 1245219089Spjd /* 1246219089Spjd * Attempt to assign tx to some transaction group. 1247219089Spjd */ 1248219089Spjd error = dmu_tx_assign(tx, txg_how); 1249168404Spjd if (error) { 1250219089Spjd if (error == ERESTART) { 1251219089Spjd ASSERT(txg_how == TXG_NOWAIT); 1252219089Spjd dmu_tx_wait(tx); 1253219089Spjd } else { 1254219089Spjd ASSERT3U(error, ==, ENOSPC); 1255219089Spjd ztest_record_enospc(tag); 1256219089Spjd } 1257219089Spjd dmu_tx_abort(tx); 1258219089Spjd return (0); 1259168404Spjd } 1260219089Spjd txg = dmu_tx_get_txg(tx); 1261219089Spjd ASSERT(txg != 0); 1262219089Spjd return (txg); 1263168404Spjd} 1264168404Spjd 1265219089Spjdstatic void 1266219089Spjdztest_pattern_set(void *buf, uint64_t size, uint64_t value) 1267168404Spjd{ 1268219089Spjd uint64_t *ip = buf; 1269219089Spjd uint64_t *ip_end = (uint64_t *)((uintptr_t)buf + (uintptr_t)size); 1270168404Spjd 1271219089Spjd while (ip < ip_end) 1272219089Spjd *ip++ = value; 1273219089Spjd} 1274168404Spjd 1275219089Spjdstatic boolean_t 1276219089Spjdztest_pattern_match(void *buf, uint64_t size, uint64_t value) 1277219089Spjd{ 1278219089Spjd uint64_t *ip = buf; 1279219089Spjd uint64_t *ip_end = (uint64_t *)((uintptr_t)buf + (uintptr_t)size); 1280219089Spjd uint64_t diff = 0; 1281168404Spjd 1282219089Spjd while (ip < ip_end) 1283219089Spjd diff |= (value - *ip++); 1284219089Spjd 1285219089Spjd return (diff == 0); 1286168404Spjd} 1287168404Spjd 1288219089Spjdstatic void 1289219089Spjdztest_bt_generate(ztest_block_tag_t *bt, objset_t *os, uint64_t object, 1290219089Spjd uint64_t offset, uint64_t gen, uint64_t txg, uint64_t crtxg) 1291168404Spjd{ 1292219089Spjd bt->bt_magic = BT_MAGIC; 1293219089Spjd bt->bt_objset = dmu_objset_id(os); 1294219089Spjd bt->bt_object = object; 1295219089Spjd bt->bt_offset = offset; 1296219089Spjd bt->bt_gen = gen; 1297219089Spjd bt->bt_txg = txg; 1298219089Spjd bt->bt_crtxg = crtxg; 1299168404Spjd} 1300168404Spjd 1301219089Spjdstatic void 1302219089Spjdztest_bt_verify(ztest_block_tag_t *bt, objset_t *os, uint64_t object, 1303219089Spjd uint64_t offset, uint64_t gen, uint64_t txg, uint64_t crtxg) 1304219089Spjd{ 1305268075Sdelphij ASSERT3U(bt->bt_magic, ==, BT_MAGIC); 1306268075Sdelphij ASSERT3U(bt->bt_objset, ==, dmu_objset_id(os)); 1307268075Sdelphij ASSERT3U(bt->bt_object, ==, object); 1308268075Sdelphij ASSERT3U(bt->bt_offset, ==, offset); 1309268075Sdelphij ASSERT3U(bt->bt_gen, <=, gen); 1310268075Sdelphij ASSERT3U(bt->bt_txg, <=, txg); 1311268075Sdelphij ASSERT3U(bt->bt_crtxg, ==, crtxg); 1312219089Spjd} 1313219089Spjd 1314219089Spjdstatic ztest_block_tag_t * 1315219089Spjdztest_bt_bonus(dmu_buf_t *db) 1316219089Spjd{ 1317219089Spjd dmu_object_info_t doi; 1318219089Spjd ztest_block_tag_t *bt; 1319219089Spjd 1320219089Spjd dmu_object_info_from_db(db, &doi); 1321219089Spjd ASSERT3U(doi.doi_bonus_size, <=, db->db_size); 1322219089Spjd ASSERT3U(doi.doi_bonus_size, >=, sizeof (*bt)); 1323219089Spjd bt = (void *)((char *)db->db_data + doi.doi_bonus_size - sizeof (*bt)); 1324219089Spjd 1325219089Spjd return (bt); 1326219089Spjd} 1327219089Spjd 1328219089Spjd/* 1329219089Spjd * ZIL logging ops 1330219089Spjd */ 1331219089Spjd 1332219089Spjd#define lrz_type lr_mode 1333219089Spjd#define lrz_blocksize lr_uid 1334219089Spjd#define lrz_ibshift lr_gid 1335219089Spjd#define lrz_bonustype lr_rdev 1336219089Spjd#define lrz_bonuslen lr_crtime[1] 1337219089Spjd 1338219089Spjdstatic void 1339219089Spjdztest_log_create(ztest_ds_t *zd, dmu_tx_t *tx, lr_create_t *lr) 1340219089Spjd{ 1341219089Spjd char *name = (void *)(lr + 1); /* name follows lr */ 1342219089Spjd size_t namesize = strlen(name) + 1; 1343219089Spjd itx_t *itx; 1344219089Spjd 1345219089Spjd if (zil_replaying(zd->zd_zilog, tx)) 1346219089Spjd return; 1347219089Spjd 1348219089Spjd itx = zil_itx_create(TX_CREATE, sizeof (*lr) + namesize); 1349219089Spjd bcopy(&lr->lr_common + 1, &itx->itx_lr + 1, 1350219089Spjd sizeof (*lr) + namesize - sizeof (lr_t)); 1351219089Spjd 1352219089Spjd zil_itx_assign(zd->zd_zilog, itx, tx); 1353219089Spjd} 1354219089Spjd 1355219089Spjdstatic void 1356219089Spjdztest_log_remove(ztest_ds_t *zd, dmu_tx_t *tx, lr_remove_t *lr, uint64_t object) 1357219089Spjd{ 1358219089Spjd char *name = (void *)(lr + 1); /* name follows lr */ 1359219089Spjd size_t namesize = strlen(name) + 1; 1360219089Spjd itx_t *itx; 1361219089Spjd 1362219089Spjd if (zil_replaying(zd->zd_zilog, tx)) 1363219089Spjd return; 1364219089Spjd 1365219089Spjd itx = zil_itx_create(TX_REMOVE, sizeof (*lr) + namesize); 1366219089Spjd bcopy(&lr->lr_common + 1, &itx->itx_lr + 1, 1367219089Spjd sizeof (*lr) + namesize - sizeof (lr_t)); 1368219089Spjd 1369219089Spjd itx->itx_oid = object; 1370219089Spjd zil_itx_assign(zd->zd_zilog, itx, tx); 1371219089Spjd} 1372219089Spjd 1373219089Spjdstatic void 1374219089Spjdztest_log_write(ztest_ds_t *zd, dmu_tx_t *tx, lr_write_t *lr) 1375219089Spjd{ 1376219089Spjd itx_t *itx; 1377219089Spjd itx_wr_state_t write_state = ztest_random(WR_NUM_STATES); 1378219089Spjd 1379219089Spjd if (zil_replaying(zd->zd_zilog, tx)) 1380219089Spjd return; 1381219089Spjd 1382359554Smav if (lr->lr_length > zil_max_log_data(zd->zd_zilog)) 1383219089Spjd write_state = WR_INDIRECT; 1384219089Spjd 1385219089Spjd itx = zil_itx_create(TX_WRITE, 1386219089Spjd sizeof (*lr) + (write_state == WR_COPIED ? lr->lr_length : 0)); 1387219089Spjd 1388219089Spjd if (write_state == WR_COPIED && 1389219089Spjd dmu_read(zd->zd_os, lr->lr_foid, lr->lr_offset, lr->lr_length, 1390219089Spjd ((lr_write_t *)&itx->itx_lr) + 1, DMU_READ_NO_PREFETCH) != 0) { 1391219089Spjd zil_itx_destroy(itx); 1392219089Spjd itx = zil_itx_create(TX_WRITE, sizeof (*lr)); 1393219089Spjd write_state = WR_NEED_COPY; 1394219089Spjd } 1395219089Spjd itx->itx_private = zd; 1396219089Spjd itx->itx_wr_state = write_state; 1397219089Spjd itx->itx_sync = (ztest_random(8) == 0); 1398219089Spjd 1399219089Spjd bcopy(&lr->lr_common + 1, &itx->itx_lr + 1, 1400219089Spjd sizeof (*lr) - sizeof (lr_t)); 1401219089Spjd 1402219089Spjd zil_itx_assign(zd->zd_zilog, itx, tx); 1403219089Spjd} 1404219089Spjd 1405219089Spjdstatic void 1406219089Spjdztest_log_truncate(ztest_ds_t *zd, dmu_tx_t *tx, lr_truncate_t *lr) 1407219089Spjd{ 1408219089Spjd itx_t *itx; 1409219089Spjd 1410219089Spjd if (zil_replaying(zd->zd_zilog, tx)) 1411219089Spjd return; 1412219089Spjd 1413219089Spjd itx = zil_itx_create(TX_TRUNCATE, sizeof (*lr)); 1414219089Spjd bcopy(&lr->lr_common + 1, &itx->itx_lr + 1, 1415219089Spjd sizeof (*lr) - sizeof (lr_t)); 1416219089Spjd 1417219089Spjd itx->itx_sync = B_FALSE; 1418219089Spjd zil_itx_assign(zd->zd_zilog, itx, tx); 1419219089Spjd} 1420219089Spjd 1421219089Spjdstatic void 1422219089Spjdztest_log_setattr(ztest_ds_t *zd, dmu_tx_t *tx, lr_setattr_t *lr) 1423219089Spjd{ 1424219089Spjd itx_t *itx; 1425219089Spjd 1426219089Spjd if (zil_replaying(zd->zd_zilog, tx)) 1427219089Spjd return; 1428219089Spjd 1429219089Spjd itx = zil_itx_create(TX_SETATTR, sizeof (*lr)); 1430219089Spjd bcopy(&lr->lr_common + 1, &itx->itx_lr + 1, 1431219089Spjd sizeof (*lr) - sizeof (lr_t)); 1432219089Spjd 1433219089Spjd itx->itx_sync = B_FALSE; 1434219089Spjd zil_itx_assign(zd->zd_zilog, itx, tx); 1435219089Spjd} 1436219089Spjd 1437219089Spjd/* 1438219089Spjd * ZIL replay ops 1439219089Spjd */ 1440168404Spjdstatic int 1441331382Smavztest_replay_create(void *arg1, void *arg2, boolean_t byteswap) 1442168404Spjd{ 1443331382Smav ztest_ds_t *zd = arg1; 1444331382Smav lr_create_t *lr = arg2; 1445219089Spjd char *name = (void *)(lr + 1); /* name follows lr */ 1446219089Spjd objset_t *os = zd->zd_os; 1447219089Spjd ztest_block_tag_t *bbt; 1448219089Spjd dmu_buf_t *db; 1449168404Spjd dmu_tx_t *tx; 1450219089Spjd uint64_t txg; 1451219089Spjd int error = 0; 1452168404Spjd 1453168404Spjd if (byteswap) 1454168404Spjd byteswap_uint64_array(lr, sizeof (*lr)); 1455168404Spjd 1456219089Spjd ASSERT(lr->lr_doid == ZTEST_DIROBJ); 1457219089Spjd ASSERT(name[0] != '\0'); 1458219089Spjd 1459168404Spjd tx = dmu_tx_create(os); 1460219089Spjd 1461219089Spjd dmu_tx_hold_zap(tx, lr->lr_doid, B_TRUE, name); 1462219089Spjd 1463219089Spjd if (lr->lrz_type == DMU_OT_ZAP_OTHER) { 1464219089Spjd dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL); 1465219089Spjd } else { 1466219089Spjd dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); 1467219089Spjd } 1468219089Spjd 1469219089Spjd txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); 1470219089Spjd if (txg == 0) 1471219089Spjd return (ENOSPC); 1472219089Spjd 1473219089Spjd ASSERT(dmu_objset_zil(os)->zl_replay == !!lr->lr_foid); 1474219089Spjd 1475219089Spjd if (lr->lrz_type == DMU_OT_ZAP_OTHER) { 1476219089Spjd if (lr->lr_foid == 0) { 1477219089Spjd lr->lr_foid = zap_create(os, 1478219089Spjd lr->lrz_type, lr->lrz_bonustype, 1479219089Spjd lr->lrz_bonuslen, tx); 1480219089Spjd } else { 1481219089Spjd error = zap_create_claim(os, lr->lr_foid, 1482219089Spjd lr->lrz_type, lr->lrz_bonustype, 1483219089Spjd lr->lrz_bonuslen, tx); 1484219089Spjd } 1485219089Spjd } else { 1486219089Spjd if (lr->lr_foid == 0) { 1487219089Spjd lr->lr_foid = dmu_object_alloc(os, 1488219089Spjd lr->lrz_type, 0, lr->lrz_bonustype, 1489219089Spjd lr->lrz_bonuslen, tx); 1490219089Spjd } else { 1491219089Spjd error = dmu_object_claim(os, lr->lr_foid, 1492219089Spjd lr->lrz_type, 0, lr->lrz_bonustype, 1493219089Spjd lr->lrz_bonuslen, tx); 1494219089Spjd } 1495219089Spjd } 1496219089Spjd 1497168404Spjd if (error) { 1498219089Spjd ASSERT3U(error, ==, EEXIST); 1499219089Spjd ASSERT(zd->zd_zilog->zl_replay); 1500219089Spjd dmu_tx_commit(tx); 1501168404Spjd return (error); 1502168404Spjd } 1503168404Spjd 1504219089Spjd ASSERT(lr->lr_foid != 0); 1505219089Spjd 1506219089Spjd if (lr->lrz_type != DMU_OT_ZAP_OTHER) 1507219089Spjd VERIFY3U(0, ==, dmu_object_set_blocksize(os, lr->lr_foid, 1508219089Spjd lr->lrz_blocksize, lr->lrz_ibshift, tx)); 1509219089Spjd 1510219089Spjd VERIFY3U(0, ==, dmu_bonus_hold(os, lr->lr_foid, FTAG, &db)); 1511219089Spjd bbt = ztest_bt_bonus(db); 1512219089Spjd dmu_buf_will_dirty(db, tx); 1513219089Spjd ztest_bt_generate(bbt, os, lr->lr_foid, -1ULL, lr->lr_gen, txg, txg); 1514219089Spjd dmu_buf_rele(db, FTAG); 1515219089Spjd 1516219089Spjd VERIFY3U(0, ==, zap_add(os, lr->lr_doid, name, sizeof (uint64_t), 1, 1517219089Spjd &lr->lr_foid, tx)); 1518219089Spjd 1519219089Spjd (void) ztest_log_create(zd, tx, lr); 1520219089Spjd 1521168404Spjd dmu_tx_commit(tx); 1522168404Spjd 1523219089Spjd return (0); 1524219089Spjd} 1525219089Spjd 1526219089Spjdstatic int 1527331382Smavztest_replay_remove(void *arg1, void *arg2, boolean_t byteswap) 1528219089Spjd{ 1529331382Smav ztest_ds_t *zd = arg1; 1530331382Smav lr_remove_t *lr = arg2; 1531219089Spjd char *name = (void *)(lr + 1); /* name follows lr */ 1532219089Spjd objset_t *os = zd->zd_os; 1533219089Spjd dmu_object_info_t doi; 1534219089Spjd dmu_tx_t *tx; 1535219089Spjd uint64_t object, txg; 1536219089Spjd 1537219089Spjd if (byteswap) 1538219089Spjd byteswap_uint64_array(lr, sizeof (*lr)); 1539219089Spjd 1540219089Spjd ASSERT(lr->lr_doid == ZTEST_DIROBJ); 1541219089Spjd ASSERT(name[0] != '\0'); 1542219089Spjd 1543219089Spjd VERIFY3U(0, ==, 1544219089Spjd zap_lookup(os, lr->lr_doid, name, sizeof (object), 1, &object)); 1545219089Spjd ASSERT(object != 0); 1546219089Spjd 1547219089Spjd ztest_object_lock(zd, object, RL_WRITER); 1548219089Spjd 1549219089Spjd VERIFY3U(0, ==, dmu_object_info(os, object, &doi)); 1550219089Spjd 1551219089Spjd tx = dmu_tx_create(os); 1552219089Spjd 1553219089Spjd dmu_tx_hold_zap(tx, lr->lr_doid, B_FALSE, name); 1554219089Spjd dmu_tx_hold_free(tx, object, 0, DMU_OBJECT_END); 1555219089Spjd 1556219089Spjd txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); 1557219089Spjd if (txg == 0) { 1558219089Spjd ztest_object_unlock(zd, object); 1559219089Spjd return (ENOSPC); 1560168404Spjd } 1561168404Spjd 1562219089Spjd if (doi.doi_type == DMU_OT_ZAP_OTHER) { 1563219089Spjd VERIFY3U(0, ==, zap_destroy(os, object, tx)); 1564219089Spjd } else { 1565219089Spjd VERIFY3U(0, ==, dmu_object_free(os, object, tx)); 1566219089Spjd } 1567219089Spjd 1568219089Spjd VERIFY3U(0, ==, zap_remove(os, lr->lr_doid, name, tx)); 1569219089Spjd 1570219089Spjd (void) ztest_log_remove(zd, tx, lr, object); 1571219089Spjd 1572219089Spjd dmu_tx_commit(tx); 1573219089Spjd 1574219089Spjd ztest_object_unlock(zd, object); 1575219089Spjd 1576219089Spjd return (0); 1577168404Spjd} 1578168404Spjd 1579168404Spjdstatic int 1580331382Smavztest_replay_write(void *arg1, void *arg2, boolean_t byteswap) 1581168404Spjd{ 1582331382Smav ztest_ds_t *zd = arg1; 1583331382Smav lr_write_t *lr = arg2; 1584219089Spjd objset_t *os = zd->zd_os; 1585219089Spjd void *data = lr + 1; /* data follows lr */ 1586219089Spjd uint64_t offset, length; 1587219089Spjd ztest_block_tag_t *bt = data; 1588219089Spjd ztest_block_tag_t *bbt; 1589219089Spjd uint64_t gen, txg, lrtxg, crtxg; 1590219089Spjd dmu_object_info_t doi; 1591168404Spjd dmu_tx_t *tx; 1592219089Spjd dmu_buf_t *db; 1593219089Spjd arc_buf_t *abuf = NULL; 1594219089Spjd rl_t *rl; 1595168404Spjd 1596168404Spjd if (byteswap) 1597168404Spjd byteswap_uint64_array(lr, sizeof (*lr)); 1598168404Spjd 1599219089Spjd offset = lr->lr_offset; 1600219089Spjd length = lr->lr_length; 1601219089Spjd 1602219089Spjd /* If it's a dmu_sync() block, write the whole block */ 1603219089Spjd if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) { 1604219089Spjd uint64_t blocksize = BP_GET_LSIZE(&lr->lr_blkptr); 1605219089Spjd if (length < blocksize) { 1606219089Spjd offset -= offset % blocksize; 1607219089Spjd length = blocksize; 1608219089Spjd } 1609219089Spjd } 1610219089Spjd 1611219089Spjd if (bt->bt_magic == BSWAP_64(BT_MAGIC)) 1612219089Spjd byteswap_uint64_array(bt, sizeof (*bt)); 1613219089Spjd 1614219089Spjd if (bt->bt_magic != BT_MAGIC) 1615219089Spjd bt = NULL; 1616219089Spjd 1617219089Spjd ztest_object_lock(zd, lr->lr_foid, RL_READER); 1618219089Spjd rl = ztest_range_lock(zd, lr->lr_foid, offset, length, RL_WRITER); 1619219089Spjd 1620219089Spjd VERIFY3U(0, ==, dmu_bonus_hold(os, lr->lr_foid, FTAG, &db)); 1621219089Spjd 1622219089Spjd dmu_object_info_from_db(db, &doi); 1623219089Spjd 1624219089Spjd bbt = ztest_bt_bonus(db); 1625219089Spjd ASSERT3U(bbt->bt_magic, ==, BT_MAGIC); 1626219089Spjd gen = bbt->bt_gen; 1627219089Spjd crtxg = bbt->bt_crtxg; 1628219089Spjd lrtxg = lr->lr_common.lrc_txg; 1629219089Spjd 1630168404Spjd tx = dmu_tx_create(os); 1631219089Spjd 1632219089Spjd dmu_tx_hold_write(tx, lr->lr_foid, offset, length); 1633219089Spjd 1634219089Spjd if (ztest_random(8) == 0 && length == doi.doi_data_block_size && 1635219089Spjd P2PHASE(offset, length) == 0) 1636219089Spjd abuf = dmu_request_arcbuf(db, length); 1637219089Spjd 1638219089Spjd txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); 1639219089Spjd if (txg == 0) { 1640219089Spjd if (abuf != NULL) 1641219089Spjd dmu_return_arcbuf(abuf); 1642219089Spjd dmu_buf_rele(db, FTAG); 1643219089Spjd ztest_range_unlock(rl); 1644219089Spjd ztest_object_unlock(zd, lr->lr_foid); 1645219089Spjd return (ENOSPC); 1646168404Spjd } 1647168404Spjd 1648219089Spjd if (bt != NULL) { 1649219089Spjd /* 1650219089Spjd * Usually, verify the old data before writing new data -- 1651219089Spjd * but not always, because we also want to verify correct 1652219089Spjd * behavior when the data was not recently read into cache. 1653219089Spjd */ 1654219089Spjd ASSERT(offset % doi.doi_data_block_size == 0); 1655219089Spjd if (ztest_random(4) != 0) { 1656219089Spjd int prefetch = ztest_random(2) ? 1657219089Spjd DMU_READ_PREFETCH : DMU_READ_NO_PREFETCH; 1658219089Spjd ztest_block_tag_t rbt; 1659219089Spjd 1660219089Spjd VERIFY(dmu_read(os, lr->lr_foid, offset, 1661219089Spjd sizeof (rbt), &rbt, prefetch) == 0); 1662219089Spjd if (rbt.bt_magic == BT_MAGIC) { 1663219089Spjd ztest_bt_verify(&rbt, os, lr->lr_foid, 1664219089Spjd offset, gen, txg, crtxg); 1665219089Spjd } 1666219089Spjd } 1667219089Spjd 1668219089Spjd /* 1669219089Spjd * Writes can appear to be newer than the bonus buffer because 1670219089Spjd * the ztest_get_data() callback does a dmu_read() of the 1671219089Spjd * open-context data, which may be different than the data 1672219089Spjd * as it was when the write was generated. 1673219089Spjd */ 1674219089Spjd if (zd->zd_zilog->zl_replay) { 1675219089Spjd ztest_bt_verify(bt, os, lr->lr_foid, offset, 1676219089Spjd MAX(gen, bt->bt_gen), MAX(txg, lrtxg), 1677219089Spjd bt->bt_crtxg); 1678219089Spjd } 1679219089Spjd 1680219089Spjd /* 1681219089Spjd * Set the bt's gen/txg to the bonus buffer's gen/txg 1682219089Spjd * so that all of the usual ASSERTs will work. 1683219089Spjd */ 1684219089Spjd ztest_bt_generate(bt, os, lr->lr_foid, offset, gen, txg, crtxg); 1685219089Spjd } 1686219089Spjd 1687219089Spjd if (abuf == NULL) { 1688219089Spjd dmu_write(os, lr->lr_foid, offset, length, data, tx); 1689219089Spjd } else { 1690219089Spjd bcopy(data, abuf->b_data, length); 1691219089Spjd dmu_assign_arcbuf(db, offset, abuf, tx); 1692219089Spjd } 1693219089Spjd 1694219089Spjd (void) ztest_log_write(zd, tx, lr); 1695219089Spjd 1696219089Spjd dmu_buf_rele(db, FTAG); 1697219089Spjd 1698168404Spjd dmu_tx_commit(tx); 1699168404Spjd 1700219089Spjd ztest_range_unlock(rl); 1701219089Spjd ztest_object_unlock(zd, lr->lr_foid); 1702219089Spjd 1703219089Spjd return (0); 1704168404Spjd} 1705168404Spjd 1706219089Spjdstatic int 1707331382Smavztest_replay_truncate(void *arg1, void *arg2, boolean_t byteswap) 1708219089Spjd{ 1709331382Smav ztest_ds_t *zd = arg1; 1710331382Smav lr_truncate_t *lr = arg2; 1711219089Spjd objset_t *os = zd->zd_os; 1712219089Spjd dmu_tx_t *tx; 1713219089Spjd uint64_t txg; 1714219089Spjd rl_t *rl; 1715219089Spjd 1716219089Spjd if (byteswap) 1717219089Spjd byteswap_uint64_array(lr, sizeof (*lr)); 1718219089Spjd 1719219089Spjd ztest_object_lock(zd, lr->lr_foid, RL_READER); 1720219089Spjd rl = ztest_range_lock(zd, lr->lr_foid, lr->lr_offset, lr->lr_length, 1721219089Spjd RL_WRITER); 1722219089Spjd 1723219089Spjd tx = dmu_tx_create(os); 1724219089Spjd 1725219089Spjd dmu_tx_hold_free(tx, lr->lr_foid, lr->lr_offset, lr->lr_length); 1726219089Spjd 1727219089Spjd txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); 1728219089Spjd if (txg == 0) { 1729219089Spjd ztest_range_unlock(rl); 1730219089Spjd ztest_object_unlock(zd, lr->lr_foid); 1731219089Spjd return (ENOSPC); 1732219089Spjd } 1733219089Spjd 1734219089Spjd VERIFY(dmu_free_range(os, lr->lr_foid, lr->lr_offset, 1735219089Spjd lr->lr_length, tx) == 0); 1736219089Spjd 1737219089Spjd (void) ztest_log_truncate(zd, tx, lr); 1738219089Spjd 1739219089Spjd dmu_tx_commit(tx); 1740219089Spjd 1741219089Spjd ztest_range_unlock(rl); 1742219089Spjd ztest_object_unlock(zd, lr->lr_foid); 1743219089Spjd 1744219089Spjd return (0); 1745219089Spjd} 1746219089Spjd 1747219089Spjdstatic int 1748331382Smavztest_replay_setattr(void *arg1, void *arg2, boolean_t byteswap) 1749219089Spjd{ 1750331382Smav ztest_ds_t *zd = arg1; 1751331382Smav lr_setattr_t *lr = arg2; 1752219089Spjd objset_t *os = zd->zd_os; 1753219089Spjd dmu_tx_t *tx; 1754219089Spjd dmu_buf_t *db; 1755219089Spjd ztest_block_tag_t *bbt; 1756219089Spjd uint64_t txg, lrtxg, crtxg; 1757219089Spjd 1758219089Spjd if (byteswap) 1759219089Spjd byteswap_uint64_array(lr, sizeof (*lr)); 1760219089Spjd 1761219089Spjd ztest_object_lock(zd, lr->lr_foid, RL_WRITER); 1762219089Spjd 1763219089Spjd VERIFY3U(0, ==, dmu_bonus_hold(os, lr->lr_foid, FTAG, &db)); 1764219089Spjd 1765219089Spjd tx = dmu_tx_create(os); 1766219089Spjd dmu_tx_hold_bonus(tx, lr->lr_foid); 1767219089Spjd 1768219089Spjd txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); 1769219089Spjd if (txg == 0) { 1770219089Spjd dmu_buf_rele(db, FTAG); 1771219089Spjd ztest_object_unlock(zd, lr->lr_foid); 1772219089Spjd return (ENOSPC); 1773219089Spjd } 1774219089Spjd 1775219089Spjd bbt = ztest_bt_bonus(db); 1776219089Spjd ASSERT3U(bbt->bt_magic, ==, BT_MAGIC); 1777219089Spjd crtxg = bbt->bt_crtxg; 1778219089Spjd lrtxg = lr->lr_common.lrc_txg; 1779219089Spjd 1780219089Spjd if (zd->zd_zilog->zl_replay) { 1781219089Spjd ASSERT(lr->lr_size != 0); 1782219089Spjd ASSERT(lr->lr_mode != 0); 1783219089Spjd ASSERT(lrtxg != 0); 1784219089Spjd } else { 1785219089Spjd /* 1786219089Spjd * Randomly change the size and increment the generation. 1787219089Spjd */ 1788219089Spjd lr->lr_size = (ztest_random(db->db_size / sizeof (*bbt)) + 1) * 1789219089Spjd sizeof (*bbt); 1790219089Spjd lr->lr_mode = bbt->bt_gen + 1; 1791219089Spjd ASSERT(lrtxg == 0); 1792219089Spjd } 1793219089Spjd 1794219089Spjd /* 1795219089Spjd * Verify that the current bonus buffer is not newer than our txg. 1796219089Spjd */ 1797219089Spjd ztest_bt_verify(bbt, os, lr->lr_foid, -1ULL, lr->lr_mode, 1798219089Spjd MAX(txg, lrtxg), crtxg); 1799219089Spjd 1800219089Spjd dmu_buf_will_dirty(db, tx); 1801219089Spjd 1802219089Spjd ASSERT3U(lr->lr_size, >=, sizeof (*bbt)); 1803219089Spjd ASSERT3U(lr->lr_size, <=, db->db_size); 1804240415Smm VERIFY0(dmu_set_bonus(db, lr->lr_size, tx)); 1805219089Spjd bbt = ztest_bt_bonus(db); 1806219089Spjd 1807219089Spjd ztest_bt_generate(bbt, os, lr->lr_foid, -1ULL, lr->lr_mode, txg, crtxg); 1808219089Spjd 1809219089Spjd dmu_buf_rele(db, FTAG); 1810219089Spjd 1811219089Spjd (void) ztest_log_setattr(zd, tx, lr); 1812219089Spjd 1813219089Spjd dmu_tx_commit(tx); 1814219089Spjd 1815219089Spjd ztest_object_unlock(zd, lr->lr_foid); 1816219089Spjd 1817219089Spjd return (0); 1818219089Spjd} 1819219089Spjd 1820168404Spjdzil_replay_func_t *ztest_replay_vector[TX_MAX_TYPE] = { 1821168404Spjd NULL, /* 0 no such transaction type */ 1822168404Spjd ztest_replay_create, /* TX_CREATE */ 1823168404Spjd NULL, /* TX_MKDIR */ 1824168404Spjd NULL, /* TX_MKXATTR */ 1825168404Spjd NULL, /* TX_SYMLINK */ 1826168404Spjd ztest_replay_remove, /* TX_REMOVE */ 1827168404Spjd NULL, /* TX_RMDIR */ 1828168404Spjd NULL, /* TX_LINK */ 1829168404Spjd NULL, /* TX_RENAME */ 1830219089Spjd ztest_replay_write, /* TX_WRITE */ 1831219089Spjd ztest_replay_truncate, /* TX_TRUNCATE */ 1832219089Spjd ztest_replay_setattr, /* TX_SETATTR */ 1833168404Spjd NULL, /* TX_ACL */ 1834209962Smm NULL, /* TX_CREATE_ACL */ 1835209962Smm NULL, /* TX_CREATE_ATTR */ 1836209962Smm NULL, /* TX_CREATE_ACL_ATTR */ 1837209962Smm NULL, /* TX_MKDIR_ACL */ 1838209962Smm NULL, /* TX_MKDIR_ATTR */ 1839209962Smm NULL, /* TX_MKDIR_ACL_ATTR */ 1840209962Smm NULL, /* TX_WRITE2 */ 1841168404Spjd}; 1842168404Spjd 1843168404Spjd/* 1844219089Spjd * ZIL get_data callbacks 1845219089Spjd */ 1846219089Spjd 1847219089Spjdstatic void 1848219089Spjdztest_get_done(zgd_t *zgd, int error) 1849219089Spjd{ 1850219089Spjd ztest_ds_t *zd = zgd->zgd_private; 1851219089Spjd uint64_t object = zgd->zgd_rl->rl_object; 1852219089Spjd 1853219089Spjd if (zgd->zgd_db) 1854219089Spjd dmu_buf_rele(zgd->zgd_db, zgd); 1855219089Spjd 1856219089Spjd ztest_range_unlock(zgd->zgd_rl); 1857219089Spjd ztest_object_unlock(zd, object); 1858219089Spjd 1859219089Spjd if (error == 0 && zgd->zgd_bp) 1860325132Savg zil_lwb_add_block(zgd->zgd_lwb, zgd->zgd_bp); 1861219089Spjd 1862219089Spjd umem_free(zgd, sizeof (*zgd)); 1863219089Spjd} 1864219089Spjd 1865219089Spjdstatic int 1866325132Savgztest_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb, 1867325132Savg zio_t *zio) 1868219089Spjd{ 1869219089Spjd ztest_ds_t *zd = arg; 1870219089Spjd objset_t *os = zd->zd_os; 1871219089Spjd uint64_t object = lr->lr_foid; 1872219089Spjd uint64_t offset = lr->lr_offset; 1873219089Spjd uint64_t size = lr->lr_length; 1874219089Spjd uint64_t txg = lr->lr_common.lrc_txg; 1875219089Spjd uint64_t crtxg; 1876219089Spjd dmu_object_info_t doi; 1877219089Spjd dmu_buf_t *db; 1878219089Spjd zgd_t *zgd; 1879219089Spjd int error; 1880219089Spjd 1881325132Savg ASSERT3P(lwb, !=, NULL); 1882325132Savg ASSERT3P(zio, !=, NULL); 1883325132Savg ASSERT3U(size, !=, 0); 1884325132Savg 1885219089Spjd ztest_object_lock(zd, object, RL_READER); 1886219089Spjd error = dmu_bonus_hold(os, object, FTAG, &db); 1887219089Spjd if (error) { 1888219089Spjd ztest_object_unlock(zd, object); 1889219089Spjd return (error); 1890219089Spjd } 1891219089Spjd 1892219089Spjd crtxg = ztest_bt_bonus(db)->bt_crtxg; 1893219089Spjd 1894219089Spjd if (crtxg == 0 || crtxg > txg) { 1895219089Spjd dmu_buf_rele(db, FTAG); 1896219089Spjd ztest_object_unlock(zd, object); 1897219089Spjd return (ENOENT); 1898219089Spjd } 1899219089Spjd 1900219089Spjd dmu_object_info_from_db(db, &doi); 1901219089Spjd dmu_buf_rele(db, FTAG); 1902219089Spjd db = NULL; 1903219089Spjd 1904219089Spjd zgd = umem_zalloc(sizeof (*zgd), UMEM_NOFAIL); 1905325132Savg zgd->zgd_lwb = lwb; 1906219089Spjd zgd->zgd_private = zd; 1907219089Spjd 1908219089Spjd if (buf != NULL) { /* immediate write */ 1909219089Spjd zgd->zgd_rl = ztest_range_lock(zd, object, offset, size, 1910219089Spjd RL_READER); 1911219089Spjd 1912219089Spjd error = dmu_read(os, object, offset, size, buf, 1913219089Spjd DMU_READ_NO_PREFETCH); 1914219089Spjd ASSERT(error == 0); 1915219089Spjd } else { 1916219089Spjd size = doi.doi_data_block_size; 1917219089Spjd if (ISP2(size)) { 1918219089Spjd offset = P2ALIGN(offset, size); 1919219089Spjd } else { 1920219089Spjd ASSERT(offset < size); 1921219089Spjd offset = 0; 1922219089Spjd } 1923219089Spjd 1924219089Spjd zgd->zgd_rl = ztest_range_lock(zd, object, offset, size, 1925219089Spjd RL_READER); 1926219089Spjd 1927219089Spjd error = dmu_buf_hold(os, object, offset, zgd, &db, 1928219089Spjd DMU_READ_NO_PREFETCH); 1929219089Spjd 1930219089Spjd if (error == 0) { 1931323748Savg blkptr_t *bp = &lr->lr_blkptr; 1932243524Smm 1933219089Spjd zgd->zgd_db = db; 1934219089Spjd zgd->zgd_bp = bp; 1935219089Spjd 1936219089Spjd ASSERT(db->db_offset == offset); 1937219089Spjd ASSERT(db->db_size == size); 1938219089Spjd 1939219089Spjd error = dmu_sync(zio, lr->lr_common.lrc_txg, 1940219089Spjd ztest_get_done, zgd); 1941219089Spjd 1942219089Spjd if (error == 0) 1943219089Spjd return (0); 1944219089Spjd } 1945219089Spjd } 1946219089Spjd 1947219089Spjd ztest_get_done(zgd, error); 1948219089Spjd 1949219089Spjd return (error); 1950219089Spjd} 1951219089Spjd 1952219089Spjdstatic void * 1953219089Spjdztest_lr_alloc(size_t lrsize, char *name) 1954219089Spjd{ 1955219089Spjd char *lr; 1956219089Spjd size_t namesize = name ? strlen(name) + 1 : 0; 1957219089Spjd 1958219089Spjd lr = umem_zalloc(lrsize + namesize, UMEM_NOFAIL); 1959219089Spjd 1960219089Spjd if (name) 1961219089Spjd bcopy(name, lr + lrsize, namesize); 1962219089Spjd 1963219089Spjd return (lr); 1964219089Spjd} 1965219089Spjd 1966219089Spjdvoid 1967219089Spjdztest_lr_free(void *lr, size_t lrsize, char *name) 1968219089Spjd{ 1969219089Spjd size_t namesize = name ? strlen(name) + 1 : 0; 1970219089Spjd 1971219089Spjd umem_free(lr, lrsize + namesize); 1972219089Spjd} 1973219089Spjd 1974219089Spjd/* 1975219089Spjd * Lookup a bunch of objects. Returns the number of objects not found. 1976219089Spjd */ 1977219089Spjdstatic int 1978219089Spjdztest_lookup(ztest_ds_t *zd, ztest_od_t *od, int count) 1979219089Spjd{ 1980219089Spjd int missing = 0; 1981219089Spjd int error; 1982219089Spjd 1983332545Smav ASSERT(MUTEX_HELD(&zd->zd_dirobj_lock)); 1984219089Spjd 1985219089Spjd for (int i = 0; i < count; i++, od++) { 1986219089Spjd od->od_object = 0; 1987219089Spjd error = zap_lookup(zd->zd_os, od->od_dir, od->od_name, 1988219089Spjd sizeof (uint64_t), 1, &od->od_object); 1989219089Spjd if (error) { 1990219089Spjd ASSERT(error == ENOENT); 1991219089Spjd ASSERT(od->od_object == 0); 1992219089Spjd missing++; 1993219089Spjd } else { 1994219089Spjd dmu_buf_t *db; 1995219089Spjd ztest_block_tag_t *bbt; 1996219089Spjd dmu_object_info_t doi; 1997219089Spjd 1998219089Spjd ASSERT(od->od_object != 0); 1999219089Spjd ASSERT(missing == 0); /* there should be no gaps */ 2000219089Spjd 2001219089Spjd ztest_object_lock(zd, od->od_object, RL_READER); 2002219089Spjd VERIFY3U(0, ==, dmu_bonus_hold(zd->zd_os, 2003219089Spjd od->od_object, FTAG, &db)); 2004219089Spjd dmu_object_info_from_db(db, &doi); 2005219089Spjd bbt = ztest_bt_bonus(db); 2006219089Spjd ASSERT3U(bbt->bt_magic, ==, BT_MAGIC); 2007219089Spjd od->od_type = doi.doi_type; 2008219089Spjd od->od_blocksize = doi.doi_data_block_size; 2009219089Spjd od->od_gen = bbt->bt_gen; 2010219089Spjd dmu_buf_rele(db, FTAG); 2011219089Spjd ztest_object_unlock(zd, od->od_object); 2012219089Spjd } 2013219089Spjd } 2014219089Spjd 2015219089Spjd return (missing); 2016219089Spjd} 2017219089Spjd 2018219089Spjdstatic int 2019219089Spjdztest_create(ztest_ds_t *zd, ztest_od_t *od, int count) 2020219089Spjd{ 2021219089Spjd int missing = 0; 2022219089Spjd 2023332545Smav ASSERT(MUTEX_HELD(&zd->zd_dirobj_lock)); 2024219089Spjd 2025219089Spjd for (int i = 0; i < count; i++, od++) { 2026219089Spjd if (missing) { 2027219089Spjd od->od_object = 0; 2028219089Spjd missing++; 2029219089Spjd continue; 2030219089Spjd } 2031219089Spjd 2032219089Spjd lr_create_t *lr = ztest_lr_alloc(sizeof (*lr), od->od_name); 2033219089Spjd 2034219089Spjd lr->lr_doid = od->od_dir; 2035219089Spjd lr->lr_foid = 0; /* 0 to allocate, > 0 to claim */ 2036219089Spjd lr->lrz_type = od->od_crtype; 2037219089Spjd lr->lrz_blocksize = od->od_crblocksize; 2038219089Spjd lr->lrz_ibshift = ztest_random_ibshift(); 2039219089Spjd lr->lrz_bonustype = DMU_OT_UINT64_OTHER; 2040219089Spjd lr->lrz_bonuslen = dmu_bonus_max(); 2041219089Spjd lr->lr_gen = od->od_crgen; 2042219089Spjd lr->lr_crtime[0] = time(NULL); 2043219089Spjd 2044219089Spjd if (ztest_replay_create(zd, lr, B_FALSE) != 0) { 2045219089Spjd ASSERT(missing == 0); 2046219089Spjd od->od_object = 0; 2047219089Spjd missing++; 2048219089Spjd } else { 2049219089Spjd od->od_object = lr->lr_foid; 2050219089Spjd od->od_type = od->od_crtype; 2051219089Spjd od->od_blocksize = od->od_crblocksize; 2052219089Spjd od->od_gen = od->od_crgen; 2053219089Spjd ASSERT(od->od_object != 0); 2054219089Spjd } 2055219089Spjd 2056219089Spjd ztest_lr_free(lr, sizeof (*lr), od->od_name); 2057219089Spjd } 2058219089Spjd 2059219089Spjd return (missing); 2060219089Spjd} 2061219089Spjd 2062219089Spjdstatic int 2063219089Spjdztest_remove(ztest_ds_t *zd, ztest_od_t *od, int count) 2064219089Spjd{ 2065219089Spjd int missing = 0; 2066219089Spjd int error; 2067219089Spjd 2068332545Smav ASSERT(MUTEX_HELD(&zd->zd_dirobj_lock)); 2069219089Spjd 2070219089Spjd od += count - 1; 2071219089Spjd 2072219089Spjd for (int i = count - 1; i >= 0; i--, od--) { 2073219089Spjd if (missing) { 2074219089Spjd missing++; 2075219089Spjd continue; 2076219089Spjd } 2077219089Spjd 2078243524Smm /* 2079243524Smm * No object was found. 2080243524Smm */ 2081219089Spjd if (od->od_object == 0) 2082219089Spjd continue; 2083219089Spjd 2084219089Spjd lr_remove_t *lr = ztest_lr_alloc(sizeof (*lr), od->od_name); 2085219089Spjd 2086219089Spjd lr->lr_doid = od->od_dir; 2087219089Spjd 2088219089Spjd if ((error = ztest_replay_remove(zd, lr, B_FALSE)) != 0) { 2089219089Spjd ASSERT3U(error, ==, ENOSPC); 2090219089Spjd missing++; 2091219089Spjd } else { 2092219089Spjd od->od_object = 0; 2093219089Spjd } 2094219089Spjd ztest_lr_free(lr, sizeof (*lr), od->od_name); 2095219089Spjd } 2096219089Spjd 2097219089Spjd return (missing); 2098219089Spjd} 2099219089Spjd 2100219089Spjdstatic int 2101219089Spjdztest_write(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size, 2102219089Spjd void *data) 2103219089Spjd{ 2104219089Spjd lr_write_t *lr; 2105219089Spjd int error; 2106219089Spjd 2107219089Spjd lr = ztest_lr_alloc(sizeof (*lr) + size, NULL); 2108219089Spjd 2109219089Spjd lr->lr_foid = object; 2110219089Spjd lr->lr_offset = offset; 2111219089Spjd lr->lr_length = size; 2112219089Spjd lr->lr_blkoff = 0; 2113219089Spjd BP_ZERO(&lr->lr_blkptr); 2114219089Spjd 2115219089Spjd bcopy(data, lr + 1, size); 2116219089Spjd 2117219089Spjd error = ztest_replay_write(zd, lr, B_FALSE); 2118219089Spjd 2119219089Spjd ztest_lr_free(lr, sizeof (*lr) + size, NULL); 2120219089Spjd 2121219089Spjd return (error); 2122219089Spjd} 2123219089Spjd 2124219089Spjdstatic int 2125219089Spjdztest_truncate(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size) 2126219089Spjd{ 2127219089Spjd lr_truncate_t *lr; 2128219089Spjd int error; 2129219089Spjd 2130219089Spjd lr = ztest_lr_alloc(sizeof (*lr), NULL); 2131219089Spjd 2132219089Spjd lr->lr_foid = object; 2133219089Spjd lr->lr_offset = offset; 2134219089Spjd lr->lr_length = size; 2135219089Spjd 2136219089Spjd error = ztest_replay_truncate(zd, lr, B_FALSE); 2137219089Spjd 2138219089Spjd ztest_lr_free(lr, sizeof (*lr), NULL); 2139219089Spjd 2140219089Spjd return (error); 2141219089Spjd} 2142219089Spjd 2143219089Spjdstatic int 2144219089Spjdztest_setattr(ztest_ds_t *zd, uint64_t object) 2145219089Spjd{ 2146219089Spjd lr_setattr_t *lr; 2147219089Spjd int error; 2148219089Spjd 2149219089Spjd lr = ztest_lr_alloc(sizeof (*lr), NULL); 2150219089Spjd 2151219089Spjd lr->lr_foid = object; 2152219089Spjd lr->lr_size = 0; 2153219089Spjd lr->lr_mode = 0; 2154219089Spjd 2155219089Spjd error = ztest_replay_setattr(zd, lr, B_FALSE); 2156219089Spjd 2157219089Spjd ztest_lr_free(lr, sizeof (*lr), NULL); 2158219089Spjd 2159219089Spjd return (error); 2160219089Spjd} 2161219089Spjd 2162219089Spjdstatic void 2163219089Spjdztest_prealloc(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size) 2164219089Spjd{ 2165219089Spjd objset_t *os = zd->zd_os; 2166219089Spjd dmu_tx_t *tx; 2167219089Spjd uint64_t txg; 2168219089Spjd rl_t *rl; 2169219089Spjd 2170219089Spjd txg_wait_synced(dmu_objset_pool(os), 0); 2171219089Spjd 2172219089Spjd ztest_object_lock(zd, object, RL_READER); 2173219089Spjd rl = ztest_range_lock(zd, object, offset, size, RL_WRITER); 2174219089Spjd 2175219089Spjd tx = dmu_tx_create(os); 2176219089Spjd 2177219089Spjd dmu_tx_hold_write(tx, object, offset, size); 2178219089Spjd 2179219089Spjd txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); 2180219089Spjd 2181219089Spjd if (txg != 0) { 2182219089Spjd dmu_prealloc(os, object, offset, size, tx); 2183219089Spjd dmu_tx_commit(tx); 2184219089Spjd txg_wait_synced(dmu_objset_pool(os), txg); 2185219089Spjd } else { 2186219089Spjd (void) dmu_free_long_range(os, object, offset, size); 2187219089Spjd } 2188219089Spjd 2189219089Spjd ztest_range_unlock(rl); 2190219089Spjd ztest_object_unlock(zd, object); 2191219089Spjd} 2192219089Spjd 2193219089Spjdstatic void 2194219089Spjdztest_io(ztest_ds_t *zd, uint64_t object, uint64_t offset) 2195219089Spjd{ 2196243524Smm int err; 2197219089Spjd ztest_block_tag_t wbt; 2198219089Spjd dmu_object_info_t doi; 2199219089Spjd enum ztest_io_type io_type; 2200219089Spjd uint64_t blocksize; 2201219089Spjd void *data; 2202219089Spjd 2203219089Spjd VERIFY(dmu_object_info(zd->zd_os, object, &doi) == 0); 2204219089Spjd blocksize = doi.doi_data_block_size; 2205219089Spjd data = umem_alloc(blocksize, UMEM_NOFAIL); 2206219089Spjd 2207219089Spjd /* 2208219089Spjd * Pick an i/o type at random, biased toward writing block tags. 2209219089Spjd */ 2210219089Spjd io_type = ztest_random(ZTEST_IO_TYPES); 2211219089Spjd if (ztest_random(2) == 0) 2212219089Spjd io_type = ZTEST_IO_WRITE_TAG; 2213219089Spjd 2214332545Smav rw_enter(&zd->zd_zilog_lock, RW_READER); 2215224526Smm 2216219089Spjd switch (io_type) { 2217219089Spjd 2218219089Spjd case ZTEST_IO_WRITE_TAG: 2219219089Spjd ztest_bt_generate(&wbt, zd->zd_os, object, offset, 0, 0, 0); 2220219089Spjd (void) ztest_write(zd, object, offset, sizeof (wbt), &wbt); 2221219089Spjd break; 2222219089Spjd 2223219089Spjd case ZTEST_IO_WRITE_PATTERN: 2224219089Spjd (void) memset(data, 'a' + (object + offset) % 5, blocksize); 2225219089Spjd if (ztest_random(2) == 0) { 2226219089Spjd /* 2227219089Spjd * Induce fletcher2 collisions to ensure that 2228219089Spjd * zio_ddt_collision() detects and resolves them 2229219089Spjd * when using fletcher2-verify for deduplication. 2230219089Spjd */ 2231219089Spjd ((uint64_t *)data)[0] ^= 1ULL << 63; 2232219089Spjd ((uint64_t *)data)[4] ^= 1ULL << 63; 2233219089Spjd } 2234219089Spjd (void) ztest_write(zd, object, offset, blocksize, data); 2235219089Spjd break; 2236219089Spjd 2237219089Spjd case ZTEST_IO_WRITE_ZEROES: 2238219089Spjd bzero(data, blocksize); 2239219089Spjd (void) ztest_write(zd, object, offset, blocksize, data); 2240219089Spjd break; 2241219089Spjd 2242219089Spjd case ZTEST_IO_TRUNCATE: 2243219089Spjd (void) ztest_truncate(zd, object, offset, blocksize); 2244219089Spjd break; 2245219089Spjd 2246219089Spjd case ZTEST_IO_SETATTR: 2247219089Spjd (void) ztest_setattr(zd, object); 2248219089Spjd break; 2249243524Smm 2250243524Smm case ZTEST_IO_REWRITE: 2251332545Smav rw_enter(&ztest_name_lock, RW_READER); 2252243524Smm err = ztest_dsl_prop_set_uint64(zd->zd_name, 2253243524Smm ZFS_PROP_CHECKSUM, spa_dedup_checksum(ztest_spa), 2254243524Smm B_FALSE); 2255243524Smm VERIFY(err == 0 || err == ENOSPC); 2256243524Smm err = ztest_dsl_prop_set_uint64(zd->zd_name, 2257243524Smm ZFS_PROP_COMPRESSION, 2258243524Smm ztest_random_dsl_prop(ZFS_PROP_COMPRESSION), 2259243524Smm B_FALSE); 2260243524Smm VERIFY(err == 0 || err == ENOSPC); 2261332545Smav rw_exit(&ztest_name_lock); 2262243524Smm 2263243524Smm VERIFY0(dmu_read(zd->zd_os, object, offset, blocksize, data, 2264243524Smm DMU_READ_NO_PREFETCH)); 2265243524Smm 2266243524Smm (void) ztest_write(zd, object, offset, blocksize, data); 2267243524Smm break; 2268219089Spjd } 2269219089Spjd 2270332545Smav rw_exit(&zd->zd_zilog_lock); 2271224526Smm 2272219089Spjd umem_free(data, blocksize); 2273219089Spjd} 2274219089Spjd 2275219089Spjd/* 2276219089Spjd * Initialize an object description template. 2277219089Spjd */ 2278219089Spjdstatic void 2279219089Spjdztest_od_init(ztest_od_t *od, uint64_t id, char *tag, uint64_t index, 2280219089Spjd dmu_object_type_t type, uint64_t blocksize, uint64_t gen) 2281219089Spjd{ 2282219089Spjd od->od_dir = ZTEST_DIROBJ; 2283219089Spjd od->od_object = 0; 2284219089Spjd 2285219089Spjd od->od_crtype = type; 2286219089Spjd od->od_crblocksize = blocksize ? blocksize : ztest_random_blocksize(); 2287219089Spjd od->od_crgen = gen; 2288219089Spjd 2289219089Spjd od->od_type = DMU_OT_NONE; 2290219089Spjd od->od_blocksize = 0; 2291219089Spjd od->od_gen = 0; 2292219089Spjd 2293219089Spjd (void) snprintf(od->od_name, sizeof (od->od_name), "%s(%lld)[%llu]", 2294219089Spjd tag, (int64_t)id, index); 2295219089Spjd} 2296219089Spjd 2297219089Spjd/* 2298219089Spjd * Lookup or create the objects for a test using the od template. 2299219089Spjd * If the objects do not all exist, or if 'remove' is specified, 2300219089Spjd * remove any existing objects and create new ones. Otherwise, 2301219089Spjd * use the existing objects. 2302219089Spjd */ 2303219089Spjdstatic int 2304219089Spjdztest_object_init(ztest_ds_t *zd, ztest_od_t *od, size_t size, boolean_t remove) 2305219089Spjd{ 2306219089Spjd int count = size / sizeof (*od); 2307219089Spjd int rv = 0; 2308219089Spjd 2309332545Smav mutex_enter(&zd->zd_dirobj_lock); 2310219089Spjd if ((ztest_lookup(zd, od, count) != 0 || remove) && 2311219089Spjd (ztest_remove(zd, od, count) != 0 || 2312219089Spjd ztest_create(zd, od, count) != 0)) 2313219089Spjd rv = -1; 2314219089Spjd zd->zd_od = od; 2315332545Smav mutex_exit(&zd->zd_dirobj_lock); 2316219089Spjd 2317219089Spjd return (rv); 2318219089Spjd} 2319219089Spjd 2320219089Spjd/* ARGSUSED */ 2321219089Spjdvoid 2322219089Spjdztest_zil_commit(ztest_ds_t *zd, uint64_t id) 2323219089Spjd{ 2324219089Spjd zilog_t *zilog = zd->zd_zilog; 2325219089Spjd 2326332545Smav rw_enter(&zd->zd_zilog_lock, RW_READER); 2327224526Smm 2328219089Spjd zil_commit(zilog, ztest_random(ZTEST_OBJECTS)); 2329219089Spjd 2330219089Spjd /* 2331219089Spjd * Remember the committed values in zd, which is in parent/child 2332219089Spjd * shared memory. If we die, the next iteration of ztest_run() 2333219089Spjd * will verify that the log really does contain this record. 2334219089Spjd */ 2335219089Spjd mutex_enter(&zilog->zl_lock); 2336236143Smm ASSERT(zd->zd_shared != NULL); 2337236143Smm ASSERT3U(zd->zd_shared->zd_seq, <=, zilog->zl_commit_lr_seq); 2338236143Smm zd->zd_shared->zd_seq = zilog->zl_commit_lr_seq; 2339219089Spjd mutex_exit(&zilog->zl_lock); 2340224526Smm 2341332545Smav rw_exit(&zd->zd_zilog_lock); 2342219089Spjd} 2343219089Spjd 2344219089Spjd/* 2345224526Smm * This function is designed to simulate the operations that occur during a 2346224526Smm * mount/unmount operation. We hold the dataset across these operations in an 2347224526Smm * attempt to expose any implicit assumptions about ZIL management. 2348224526Smm */ 2349224526Smm/* ARGSUSED */ 2350224526Smmvoid 2351224526Smmztest_zil_remount(ztest_ds_t *zd, uint64_t id) 2352224526Smm{ 2353224526Smm objset_t *os = zd->zd_os; 2354224526Smm 2355243524Smm /* 2356243524Smm * We grab the zd_dirobj_lock to ensure that no other thread is 2357243524Smm * updating the zil (i.e. adding in-memory log records) and the 2358243524Smm * zd_zilog_lock to block any I/O. 2359243524Smm */ 2360332545Smav mutex_enter(&zd->zd_dirobj_lock); 2361332545Smav rw_enter(&zd->zd_zilog_lock, RW_WRITER); 2362224526Smm 2363224526Smm /* zfsvfs_teardown() */ 2364224526Smm zil_close(zd->zd_zilog); 2365224526Smm 2366224526Smm /* zfsvfs_setup() */ 2367224526Smm VERIFY(zil_open(os, ztest_get_data) == zd->zd_zilog); 2368224526Smm zil_replay(os, zd, ztest_replay_vector); 2369224526Smm 2370332545Smav rw_exit(&zd->zd_zilog_lock); 2371332545Smav mutex_exit(&zd->zd_dirobj_lock); 2372224526Smm} 2373224526Smm 2374224526Smm/* 2375168404Spjd * Verify that we can't destroy an active pool, create an existing pool, 2376168404Spjd * or create a pool with a bad vdev spec. 2377168404Spjd */ 2378219089Spjd/* ARGSUSED */ 2379168404Spjdvoid 2380219089Spjdztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id) 2381168404Spjd{ 2382236143Smm ztest_shared_opts_t *zo = &ztest_opts; 2383168404Spjd spa_t *spa; 2384168404Spjd nvlist_t *nvroot; 2385168404Spjd 2386168404Spjd /* 2387168404Spjd * Attempt to create using a bad file. 2388168404Spjd */ 2389243505Smm nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 0, 1); 2390219089Spjd VERIFY3U(ENOENT, ==, 2391248571Smm spa_create("ztest_bad_file", nvroot, NULL, NULL)); 2392168404Spjd nvlist_free(nvroot); 2393168404Spjd 2394168404Spjd /* 2395168404Spjd * Attempt to create using a bad mirror. 2396168404Spjd */ 2397243505Smm nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 2, 1); 2398219089Spjd VERIFY3U(ENOENT, ==, 2399248571Smm spa_create("ztest_bad_mirror", nvroot, NULL, NULL)); 2400168404Spjd nvlist_free(nvroot); 2401168404Spjd 2402168404Spjd /* 2403168404Spjd * Attempt to create an existing pool. It shouldn't matter 2404168404Spjd * what's in the nvroot; we should fail with EEXIST. 2405168404Spjd */ 2406332545Smav rw_enter(&ztest_name_lock, RW_READER); 2407243505Smm nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 0, 1); 2408248571Smm VERIFY3U(EEXIST, ==, spa_create(zo->zo_pool, nvroot, NULL, NULL)); 2409168404Spjd nvlist_free(nvroot); 2410236143Smm VERIFY3U(0, ==, spa_open(zo->zo_pool, &spa, FTAG)); 2411236143Smm VERIFY3U(EBUSY, ==, spa_destroy(zo->zo_pool)); 2412219089Spjd spa_close(spa, FTAG); 2413168404Spjd 2414332545Smav rw_exit(&ztest_name_lock); 2415168404Spjd} 2416168404Spjd 2417243505Smm/* ARGSUSED */ 2418243505Smmvoid 2419243505Smmztest_spa_upgrade(ztest_ds_t *zd, uint64_t id) 2420243505Smm{ 2421243505Smm spa_t *spa; 2422243505Smm uint64_t initial_version = SPA_VERSION_INITIAL; 2423243505Smm uint64_t version, newversion; 2424243505Smm nvlist_t *nvroot, *props; 2425243505Smm char *name; 2426243505Smm 2427332545Smav mutex_enter(&ztest_vdev_lock); 2428243505Smm name = kmem_asprintf("%s_upgrade", ztest_opts.zo_pool); 2429243505Smm 2430243505Smm /* 2431243505Smm * Clean up from previous runs. 2432243505Smm */ 2433243505Smm (void) spa_destroy(name); 2434243505Smm 2435243505Smm nvroot = make_vdev_root(NULL, NULL, name, ztest_opts.zo_vdev_size, 0, 2436243505Smm 0, ztest_opts.zo_raidz, ztest_opts.zo_mirrors, 1); 2437243505Smm 2438243505Smm /* 2439243505Smm * If we're configuring a RAIDZ device then make sure that the 2440243505Smm * the initial version is capable of supporting that feature. 2441243505Smm */ 2442243505Smm switch (ztest_opts.zo_raidz_parity) { 2443243505Smm case 0: 2444243505Smm case 1: 2445243505Smm initial_version = SPA_VERSION_INITIAL; 2446243505Smm break; 2447243505Smm case 2: 2448243505Smm initial_version = SPA_VERSION_RAIDZ2; 2449243505Smm break; 2450243505Smm case 3: 2451243505Smm initial_version = SPA_VERSION_RAIDZ3; 2452243505Smm break; 2453243505Smm } 2454243505Smm 2455243505Smm /* 2456243505Smm * Create a pool with a spa version that can be upgraded. Pick 2457243505Smm * a value between initial_version and SPA_VERSION_BEFORE_FEATURES. 2458243505Smm */ 2459243505Smm do { 2460243505Smm version = ztest_random_spa_version(initial_version); 2461243505Smm } while (version > SPA_VERSION_BEFORE_FEATURES); 2462243505Smm 2463243505Smm props = fnvlist_alloc(); 2464243505Smm fnvlist_add_uint64(props, 2465243505Smm zpool_prop_to_name(ZPOOL_PROP_VERSION), version); 2466248571Smm VERIFY0(spa_create(name, nvroot, props, NULL)); 2467243505Smm fnvlist_free(nvroot); 2468243505Smm fnvlist_free(props); 2469243505Smm 2470243505Smm VERIFY0(spa_open(name, &spa, FTAG)); 2471243505Smm VERIFY3U(spa_version(spa), ==, version); 2472243505Smm newversion = ztest_random_spa_version(version + 1); 2473243505Smm 2474243505Smm if (ztest_opts.zo_verbose >= 4) { 2475243505Smm (void) printf("upgrading spa version from %llu to %llu\n", 2476243505Smm (u_longlong_t)version, (u_longlong_t)newversion); 2477243505Smm } 2478243505Smm 2479243505Smm spa_upgrade(spa, newversion); 2480243505Smm VERIFY3U(spa_version(spa), >, version); 2481243505Smm VERIFY3U(spa_version(spa), ==, fnvlist_lookup_uint64(spa->spa_config, 2482243505Smm zpool_prop_to_name(ZPOOL_PROP_VERSION))); 2483243505Smm spa_close(spa, FTAG); 2484243505Smm 2485243505Smm strfree(name); 2486332545Smav mutex_exit(&ztest_vdev_lock); 2487243505Smm} 2488243505Smm 2489332547Smavstatic void 2490332547Smavztest_spa_checkpoint(spa_t *spa) 2491332547Smav{ 2492332547Smav ASSERT(MUTEX_HELD(&ztest_checkpoint_lock)); 2493332547Smav 2494332547Smav int error = spa_checkpoint(spa->spa_name); 2495332547Smav 2496332547Smav switch (error) { 2497332547Smav case 0: 2498332547Smav case ZFS_ERR_DEVRM_IN_PROGRESS: 2499332547Smav case ZFS_ERR_DISCARDING_CHECKPOINT: 2500332547Smav case ZFS_ERR_CHECKPOINT_EXISTS: 2501332547Smav break; 2502332547Smav case ENOSPC: 2503332547Smav ztest_record_enospc(FTAG); 2504332547Smav break; 2505332547Smav default: 2506332547Smav fatal(0, "spa_checkpoint(%s) = %d", spa->spa_name, error); 2507332547Smav } 2508332547Smav} 2509332547Smav 2510332547Smavstatic void 2511332547Smavztest_spa_discard_checkpoint(spa_t *spa) 2512332547Smav{ 2513332547Smav ASSERT(MUTEX_HELD(&ztest_checkpoint_lock)); 2514332547Smav 2515332547Smav int error = spa_checkpoint_discard(spa->spa_name); 2516332547Smav 2517332547Smav switch (error) { 2518332547Smav case 0: 2519332547Smav case ZFS_ERR_DISCARDING_CHECKPOINT: 2520332547Smav case ZFS_ERR_NO_CHECKPOINT: 2521332547Smav break; 2522332547Smav default: 2523332547Smav fatal(0, "spa_discard_checkpoint(%s) = %d", 2524332547Smav spa->spa_name, error); 2525332547Smav } 2526332547Smav 2527332547Smav} 2528332547Smav 2529332547Smav/* ARGSUSED */ 2530332547Smavvoid 2531332547Smavztest_spa_checkpoint_create_discard(ztest_ds_t *zd, uint64_t id) 2532332547Smav{ 2533332547Smav spa_t *spa = ztest_spa; 2534332547Smav 2535332547Smav mutex_enter(&ztest_checkpoint_lock); 2536332547Smav if (ztest_random(2) == 0) { 2537332547Smav ztest_spa_checkpoint(spa); 2538332547Smav } else { 2539332547Smav ztest_spa_discard_checkpoint(spa); 2540332547Smav } 2541332547Smav mutex_exit(&ztest_checkpoint_lock); 2542332547Smav} 2543332547Smav 2544332547Smav 2545185029Spjdstatic vdev_t * 2546185029Spjdvdev_lookup_by_path(vdev_t *vd, const char *path) 2547185029Spjd{ 2548185029Spjd vdev_t *mvd; 2549185029Spjd 2550185029Spjd if (vd->vdev_path != NULL && strcmp(path, vd->vdev_path) == 0) 2551185029Spjd return (vd); 2552185029Spjd 2553185029Spjd for (int c = 0; c < vd->vdev_children; c++) 2554185029Spjd if ((mvd = vdev_lookup_by_path(vd->vdev_child[c], path)) != 2555185029Spjd NULL) 2556185029Spjd return (mvd); 2557185029Spjd 2558185029Spjd return (NULL); 2559185029Spjd} 2560185029Spjd 2561168404Spjd/* 2562219089Spjd * Find the first available hole which can be used as a top-level. 2563219089Spjd */ 2564219089Spjdint 2565219089Spjdfind_vdev_hole(spa_t *spa) 2566219089Spjd{ 2567219089Spjd vdev_t *rvd = spa->spa_root_vdev; 2568219089Spjd int c; 2569219089Spjd 2570219089Spjd ASSERT(spa_config_held(spa, SCL_VDEV, RW_READER) == SCL_VDEV); 2571219089Spjd 2572219089Spjd for (c = 0; c < rvd->vdev_children; c++) { 2573219089Spjd vdev_t *cvd = rvd->vdev_child[c]; 2574219089Spjd 2575219089Spjd if (cvd->vdev_ishole) 2576219089Spjd break; 2577219089Spjd } 2578219089Spjd return (c); 2579219089Spjd} 2580219089Spjd 2581219089Spjd/* 2582168404Spjd * Verify that vdev_add() works as expected. 2583168404Spjd */ 2584219089Spjd/* ARGSUSED */ 2585168404Spjdvoid 2586219089Spjdztest_vdev_add_remove(ztest_ds_t *zd, uint64_t id) 2587168404Spjd{ 2588219089Spjd ztest_shared_t *zs = ztest_shared; 2589236143Smm spa_t *spa = ztest_spa; 2590219089Spjd uint64_t leaves; 2591219089Spjd uint64_t guid; 2592168404Spjd nvlist_t *nvroot; 2593168404Spjd int error; 2594168404Spjd 2595332545Smav mutex_enter(&ztest_vdev_lock); 2596248571Smm leaves = MAX(zs->zs_mirrors + zs->zs_splits, 1) * ztest_opts.zo_raidz; 2597168404Spjd 2598185029Spjd spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); 2599168404Spjd 2600219089Spjd ztest_shared->zs_vdev_next_leaf = find_vdev_hole(spa) * leaves; 2601168404Spjd 2602185029Spjd /* 2603219089Spjd * If we have slogs then remove them 1/4 of the time. 2604185029Spjd */ 2605219089Spjd if (spa_has_slogs(spa) && ztest_random(4) == 0) { 2606219089Spjd /* 2607219089Spjd * Grab the guid from the head of the log class rotor. 2608219089Spjd */ 2609219089Spjd guid = spa_log_class(spa)->mc_rotor->mg_vd->vdev_guid; 2610185029Spjd 2611219089Spjd spa_config_exit(spa, SCL_VDEV, FTAG); 2612168404Spjd 2613219089Spjd /* 2614219089Spjd * We have to grab the zs_name_lock as writer to 2615219089Spjd * prevent a race between removing a slog (dmu_objset_find) 2616219089Spjd * and destroying a dataset. Removing the slog will 2617219089Spjd * grab a reference on the dataset which may cause 2618219089Spjd * dmu_objset_destroy() to fail with EBUSY thus 2619219089Spjd * leaving the dataset in an inconsistent state. 2620219089Spjd */ 2621332545Smav rw_enter(&ztest_name_lock, RW_WRITER); 2622219089Spjd error = spa_vdev_remove(spa, guid, B_FALSE); 2623332545Smav rw_exit(&ztest_name_lock); 2624168404Spjd 2625332547Smav switch (error) { 2626332547Smav case 0: 2627332547Smav case EEXIST: 2628332547Smav case ZFS_ERR_CHECKPOINT_EXISTS: 2629332547Smav case ZFS_ERR_DISCARDING_CHECKPOINT: 2630332547Smav break; 2631332547Smav default: 2632219089Spjd fatal(0, "spa_vdev_remove() = %d", error); 2633332547Smav } 2634219089Spjd } else { 2635219089Spjd spa_config_exit(spa, SCL_VDEV, FTAG); 2636219089Spjd 2637219089Spjd /* 2638219089Spjd * Make 1/4 of the devices be log devices. 2639219089Spjd */ 2640243505Smm nvroot = make_vdev_root(NULL, NULL, NULL, 2641236143Smm ztest_opts.zo_vdev_size, 0, 2642236143Smm ztest_random(4) == 0, ztest_opts.zo_raidz, 2643236143Smm zs->zs_mirrors, 1); 2644219089Spjd 2645219089Spjd error = spa_vdev_add(spa, nvroot); 2646219089Spjd nvlist_free(nvroot); 2647219089Spjd 2648332547Smav switch (error) { 2649332547Smav case 0: 2650332547Smav break; 2651332547Smav case ENOSPC: 2652219089Spjd ztest_record_enospc("spa_vdev_add"); 2653332547Smav break; 2654332547Smav default: 2655219089Spjd fatal(0, "spa_vdev_add() = %d", error); 2656332547Smav } 2657219089Spjd } 2658219089Spjd 2659332545Smav mutex_exit(&ztest_vdev_lock); 2660168404Spjd} 2661168404Spjd 2662185029Spjd/* 2663185029Spjd * Verify that adding/removing aux devices (l2arc, hot spare) works as expected. 2664185029Spjd */ 2665219089Spjd/* ARGSUSED */ 2666185029Spjdvoid 2667219089Spjdztest_vdev_aux_add_remove(ztest_ds_t *zd, uint64_t id) 2668168404Spjd{ 2669219089Spjd ztest_shared_t *zs = ztest_shared; 2670236143Smm spa_t *spa = ztest_spa; 2671185029Spjd vdev_t *rvd = spa->spa_root_vdev; 2672185029Spjd spa_aux_vdev_t *sav; 2673185029Spjd char *aux; 2674185029Spjd uint64_t guid = 0; 2675185029Spjd int error; 2676168404Spjd 2677185029Spjd if (ztest_random(2) == 0) { 2678185029Spjd sav = &spa->spa_spares; 2679185029Spjd aux = ZPOOL_CONFIG_SPARES; 2680185029Spjd } else { 2681185029Spjd sav = &spa->spa_l2cache; 2682185029Spjd aux = ZPOOL_CONFIG_L2CACHE; 2683185029Spjd } 2684185029Spjd 2685332545Smav mutex_enter(&ztest_vdev_lock); 2686185029Spjd 2687185029Spjd spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); 2688185029Spjd 2689185029Spjd if (sav->sav_count != 0 && ztest_random(4) == 0) { 2690185029Spjd /* 2691185029Spjd * Pick a random device to remove. 2692185029Spjd */ 2693185029Spjd guid = sav->sav_vdevs[ztest_random(sav->sav_count)]->vdev_guid; 2694185029Spjd } else { 2695185029Spjd /* 2696185029Spjd * Find an unused device we can add. 2697185029Spjd */ 2698219089Spjd zs->zs_vdev_aux = 0; 2699185029Spjd for (;;) { 2700185029Spjd char path[MAXPATHLEN]; 2701185029Spjd int c; 2702236143Smm (void) snprintf(path, sizeof (path), ztest_aux_template, 2703236143Smm ztest_opts.zo_dir, ztest_opts.zo_pool, aux, 2704236143Smm zs->zs_vdev_aux); 2705185029Spjd for (c = 0; c < sav->sav_count; c++) 2706185029Spjd if (strcmp(sav->sav_vdevs[c]->vdev_path, 2707185029Spjd path) == 0) 2708185029Spjd break; 2709185029Spjd if (c == sav->sav_count && 2710185029Spjd vdev_lookup_by_path(rvd, path) == NULL) 2711185029Spjd break; 2712219089Spjd zs->zs_vdev_aux++; 2713168404Spjd } 2714168404Spjd } 2715168404Spjd 2716185029Spjd spa_config_exit(spa, SCL_VDEV, FTAG); 2717168404Spjd 2718185029Spjd if (guid == 0) { 2719185029Spjd /* 2720185029Spjd * Add a new device. 2721185029Spjd */ 2722243505Smm nvlist_t *nvroot = make_vdev_root(NULL, aux, NULL, 2723236143Smm (ztest_opts.zo_vdev_size * 5) / 4, 0, 0, 0, 0, 1); 2724185029Spjd error = spa_vdev_add(spa, nvroot); 2725332547Smav 2726332547Smav switch (error) { 2727332547Smav case 0: 2728332547Smav break; 2729332547Smav default: 2730185029Spjd fatal(0, "spa_vdev_add(%p) = %d", nvroot, error); 2731332547Smav } 2732185029Spjd nvlist_free(nvroot); 2733185029Spjd } else { 2734185029Spjd /* 2735185029Spjd * Remove an existing device. Sometimes, dirty its 2736185029Spjd * vdev state first to make sure we handle removal 2737185029Spjd * of devices that have pending state changes. 2738185029Spjd */ 2739185029Spjd if (ztest_random(2) == 0) 2740219089Spjd (void) vdev_online(spa, guid, 0, NULL); 2741185029Spjd 2742185029Spjd error = spa_vdev_remove(spa, guid, B_FALSE); 2743332547Smav 2744332547Smav switch (error) { 2745332547Smav case 0: 2746332547Smav case EBUSY: 2747332547Smav case ZFS_ERR_CHECKPOINT_EXISTS: 2748332547Smav case ZFS_ERR_DISCARDING_CHECKPOINT: 2749332547Smav break; 2750332547Smav default: 2751185029Spjd fatal(0, "spa_vdev_remove(%llu) = %d", guid, error); 2752332547Smav } 2753185029Spjd } 2754185029Spjd 2755332545Smav mutex_exit(&ztest_vdev_lock); 2756168404Spjd} 2757168404Spjd 2758168404Spjd/* 2759219089Spjd * split a pool if it has mirror tlvdevs 2760219089Spjd */ 2761219089Spjd/* ARGSUSED */ 2762219089Spjdvoid 2763219089Spjdztest_split_pool(ztest_ds_t *zd, uint64_t id) 2764219089Spjd{ 2765219089Spjd ztest_shared_t *zs = ztest_shared; 2766236143Smm spa_t *spa = ztest_spa; 2767219089Spjd vdev_t *rvd = spa->spa_root_vdev; 2768219089Spjd nvlist_t *tree, **child, *config, *split, **schild; 2769219089Spjd uint_t c, children, schildren = 0, lastlogid = 0; 2770219089Spjd int error = 0; 2771219089Spjd 2772332545Smav mutex_enter(&ztest_vdev_lock); 2773219089Spjd 2774219089Spjd /* ensure we have a useable config; mirrors of raidz aren't supported */ 2775236143Smm if (zs->zs_mirrors < 3 || ztest_opts.zo_raidz > 1) { 2776332545Smav mutex_exit(&ztest_vdev_lock); 2777219089Spjd return; 2778219089Spjd } 2779219089Spjd 2780219089Spjd /* clean up the old pool, if any */ 2781219089Spjd (void) spa_destroy("splitp"); 2782219089Spjd 2783219089Spjd spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); 2784219089Spjd 2785219089Spjd /* generate a config from the existing config */ 2786219089Spjd mutex_enter(&spa->spa_props_lock); 2787219089Spjd VERIFY(nvlist_lookup_nvlist(spa->spa_config, ZPOOL_CONFIG_VDEV_TREE, 2788219089Spjd &tree) == 0); 2789219089Spjd mutex_exit(&spa->spa_props_lock); 2790219089Spjd 2791219089Spjd VERIFY(nvlist_lookup_nvlist_array(tree, ZPOOL_CONFIG_CHILDREN, &child, 2792219089Spjd &children) == 0); 2793219089Spjd 2794219089Spjd schild = malloc(rvd->vdev_children * sizeof (nvlist_t *)); 2795219089Spjd for (c = 0; c < children; c++) { 2796219089Spjd vdev_t *tvd = rvd->vdev_child[c]; 2797219089Spjd nvlist_t **mchild; 2798219089Spjd uint_t mchildren; 2799219089Spjd 2800219089Spjd if (tvd->vdev_islog || tvd->vdev_ops == &vdev_hole_ops) { 2801219089Spjd VERIFY(nvlist_alloc(&schild[schildren], NV_UNIQUE_NAME, 2802219089Spjd 0) == 0); 2803219089Spjd VERIFY(nvlist_add_string(schild[schildren], 2804219089Spjd ZPOOL_CONFIG_TYPE, VDEV_TYPE_HOLE) == 0); 2805219089Spjd VERIFY(nvlist_add_uint64(schild[schildren], 2806219089Spjd ZPOOL_CONFIG_IS_HOLE, 1) == 0); 2807219089Spjd if (lastlogid == 0) 2808219089Spjd lastlogid = schildren; 2809219089Spjd ++schildren; 2810219089Spjd continue; 2811219089Spjd } 2812219089Spjd lastlogid = 0; 2813219089Spjd VERIFY(nvlist_lookup_nvlist_array(child[c], 2814219089Spjd ZPOOL_CONFIG_CHILDREN, &mchild, &mchildren) == 0); 2815219089Spjd VERIFY(nvlist_dup(mchild[0], &schild[schildren++], 0) == 0); 2816219089Spjd } 2817219089Spjd 2818219089Spjd /* OK, create a config that can be used to split */ 2819219089Spjd VERIFY(nvlist_alloc(&split, NV_UNIQUE_NAME, 0) == 0); 2820219089Spjd VERIFY(nvlist_add_string(split, ZPOOL_CONFIG_TYPE, 2821219089Spjd VDEV_TYPE_ROOT) == 0); 2822219089Spjd VERIFY(nvlist_add_nvlist_array(split, ZPOOL_CONFIG_CHILDREN, schild, 2823219089Spjd lastlogid != 0 ? lastlogid : schildren) == 0); 2824219089Spjd 2825219089Spjd VERIFY(nvlist_alloc(&config, NV_UNIQUE_NAME, 0) == 0); 2826219089Spjd VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, split) == 0); 2827219089Spjd 2828219089Spjd for (c = 0; c < schildren; c++) 2829219089Spjd nvlist_free(schild[c]); 2830219089Spjd free(schild); 2831219089Spjd nvlist_free(split); 2832219089Spjd 2833219089Spjd spa_config_exit(spa, SCL_VDEV, FTAG); 2834219089Spjd 2835332545Smav rw_enter(&ztest_name_lock, RW_WRITER); 2836219089Spjd error = spa_vdev_split_mirror(spa, "splitp", config, NULL, B_FALSE); 2837332545Smav rw_exit(&ztest_name_lock); 2838219089Spjd 2839219089Spjd nvlist_free(config); 2840219089Spjd 2841219089Spjd if (error == 0) { 2842219089Spjd (void) printf("successful split - results:\n"); 2843219089Spjd mutex_enter(&spa_namespace_lock); 2844219089Spjd show_pool_stats(spa); 2845219089Spjd show_pool_stats(spa_lookup("splitp")); 2846219089Spjd mutex_exit(&spa_namespace_lock); 2847219089Spjd ++zs->zs_splits; 2848219089Spjd --zs->zs_mirrors; 2849219089Spjd } 2850332545Smav mutex_exit(&ztest_vdev_lock); 2851219089Spjd} 2852219089Spjd 2853219089Spjd/* 2854168404Spjd * Verify that we can attach and detach devices. 2855168404Spjd */ 2856219089Spjd/* ARGSUSED */ 2857168404Spjdvoid 2858219089Spjdztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id) 2859168404Spjd{ 2860219089Spjd ztest_shared_t *zs = ztest_shared; 2861236143Smm spa_t *spa = ztest_spa; 2862185029Spjd spa_aux_vdev_t *sav = &spa->spa_spares; 2863168404Spjd vdev_t *rvd = spa->spa_root_vdev; 2864168404Spjd vdev_t *oldvd, *newvd, *pvd; 2865185029Spjd nvlist_t *root; 2866219089Spjd uint64_t leaves; 2867168404Spjd uint64_t leaf, top; 2868168404Spjd uint64_t ashift = ztest_get_ashift(); 2869209962Smm uint64_t oldguid, pguid; 2870254112Sdelphij uint64_t oldsize, newsize; 2871168404Spjd char oldpath[MAXPATHLEN], newpath[MAXPATHLEN]; 2872168404Spjd int replacing; 2873185029Spjd int oldvd_has_siblings = B_FALSE; 2874185029Spjd int newvd_is_spare = B_FALSE; 2875185029Spjd int oldvd_is_log; 2876168404Spjd int error, expected_error; 2877168404Spjd 2878332545Smav mutex_enter(&ztest_vdev_lock); 2879236143Smm leaves = MAX(zs->zs_mirrors, 1) * ztest_opts.zo_raidz; 2880168404Spjd 2881332525Smav spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 2882168404Spjd 2883168404Spjd /* 2884332525Smav * If a vdev is in the process of being removed, its removal may 2885332525Smav * finish while we are in progress, leading to an unexpected error 2886332525Smav * value. Don't bother trying to attach while we are in the middle 2887332525Smav * of removal. 2888332525Smav */ 2889339106Smav if (ztest_device_removal_active) { 2890332525Smav spa_config_exit(spa, SCL_ALL, FTAG); 2891332545Smav mutex_exit(&ztest_vdev_lock); 2892332525Smav return; 2893332525Smav } 2894332525Smav 2895332525Smav /* 2896168404Spjd * Decide whether to do an attach or a replace. 2897168404Spjd */ 2898168404Spjd replacing = ztest_random(2); 2899168404Spjd 2900168404Spjd /* 2901168404Spjd * Pick a random top-level vdev. 2902168404Spjd */ 2903219089Spjd top = ztest_random_vdev_top(spa, B_TRUE); 2904168404Spjd 2905168404Spjd /* 2906168404Spjd * Pick a random leaf within it. 2907168404Spjd */ 2908168404Spjd leaf = ztest_random(leaves); 2909168404Spjd 2910168404Spjd /* 2911185029Spjd * Locate this vdev. 2912168404Spjd */ 2913185029Spjd oldvd = rvd->vdev_child[top]; 2914219089Spjd if (zs->zs_mirrors >= 1) { 2915209962Smm ASSERT(oldvd->vdev_ops == &vdev_mirror_ops); 2916219089Spjd ASSERT(oldvd->vdev_children >= zs->zs_mirrors); 2917236143Smm oldvd = oldvd->vdev_child[leaf / ztest_opts.zo_raidz]; 2918209962Smm } 2919236143Smm if (ztest_opts.zo_raidz > 1) { 2920209962Smm ASSERT(oldvd->vdev_ops == &vdev_raidz_ops); 2921236143Smm ASSERT(oldvd->vdev_children == ztest_opts.zo_raidz); 2922236143Smm oldvd = oldvd->vdev_child[leaf % ztest_opts.zo_raidz]; 2923209962Smm } 2924168404Spjd 2925168404Spjd /* 2926185029Spjd * If we're already doing an attach or replace, oldvd may be a 2927185029Spjd * mirror vdev -- in which case, pick a random child. 2928168404Spjd */ 2929185029Spjd while (oldvd->vdev_children != 0) { 2930185029Spjd oldvd_has_siblings = B_TRUE; 2931209962Smm ASSERT(oldvd->vdev_children >= 2); 2932209962Smm oldvd = oldvd->vdev_child[ztest_random(oldvd->vdev_children)]; 2933185029Spjd } 2934168404Spjd 2935185029Spjd oldguid = oldvd->vdev_guid; 2936219089Spjd oldsize = vdev_get_min_asize(oldvd); 2937185029Spjd oldvd_is_log = oldvd->vdev_top->vdev_islog; 2938185029Spjd (void) strcpy(oldpath, oldvd->vdev_path); 2939185029Spjd pvd = oldvd->vdev_parent; 2940209962Smm pguid = pvd->vdev_guid; 2941185029Spjd 2942168404Spjd /* 2943185029Spjd * If oldvd has siblings, then half of the time, detach it. 2944168404Spjd */ 2945185029Spjd if (oldvd_has_siblings && ztest_random(2) == 0) { 2946332525Smav spa_config_exit(spa, SCL_ALL, FTAG); 2947209962Smm error = spa_vdev_detach(spa, oldguid, pguid, B_FALSE); 2948209962Smm if (error != 0 && error != ENODEV && error != EBUSY && 2949332547Smav error != ENOTSUP && error != ZFS_ERR_CHECKPOINT_EXISTS && 2950332547Smav error != ZFS_ERR_DISCARDING_CHECKPOINT) 2951209962Smm fatal(0, "detach (%s) returned %d", oldpath, error); 2952332545Smav mutex_exit(&ztest_vdev_lock); 2953185029Spjd return; 2954185029Spjd } 2955168404Spjd 2956168404Spjd /* 2957185029Spjd * For the new vdev, choose with equal probability between the two 2958185029Spjd * standard paths (ending in either 'a' or 'b') or a random hot spare. 2959168404Spjd */ 2960185029Spjd if (sav->sav_count != 0 && ztest_random(3) == 0) { 2961185029Spjd newvd = sav->sav_vdevs[ztest_random(sav->sav_count)]; 2962185029Spjd newvd_is_spare = B_TRUE; 2963185029Spjd (void) strcpy(newpath, newvd->vdev_path); 2964185029Spjd } else { 2965185029Spjd (void) snprintf(newpath, sizeof (newpath), ztest_dev_template, 2966236143Smm ztest_opts.zo_dir, ztest_opts.zo_pool, 2967236143Smm top * leaves + leaf); 2968185029Spjd if (ztest_random(2) == 0) 2969185029Spjd newpath[strlen(newpath) - 1] = 'b'; 2970185029Spjd newvd = vdev_lookup_by_path(rvd, newpath); 2971185029Spjd } 2972168404Spjd 2973185029Spjd if (newvd) { 2974332525Smav /* 2975332525Smav * Reopen to ensure the vdev's asize field isn't stale. 2976332525Smav */ 2977332525Smav vdev_reopen(newvd); 2978219089Spjd newsize = vdev_get_min_asize(newvd); 2979185029Spjd } else { 2980185029Spjd /* 2981185029Spjd * Make newsize a little bigger or smaller than oldsize. 2982185029Spjd * If it's smaller, the attach should fail. 2983185029Spjd * If it's larger, and we're doing a replace, 2984185029Spjd * we should get dynamic LUN growth when we're done. 2985185029Spjd */ 2986185029Spjd newsize = 10 * oldsize / (9 + ztest_random(3)); 2987185029Spjd } 2988185029Spjd 2989168404Spjd /* 2990168404Spjd * If pvd is not a mirror or root, the attach should fail with ENOTSUP, 2991168404Spjd * unless it's a replace; in that case any non-replacing parent is OK. 2992168404Spjd * 2993168404Spjd * If newvd is already part of the pool, it should fail with EBUSY. 2994168404Spjd * 2995168404Spjd * If newvd is too small, it should fail with EOVERFLOW. 2996168404Spjd */ 2997185029Spjd if (pvd->vdev_ops != &vdev_mirror_ops && 2998185029Spjd pvd->vdev_ops != &vdev_root_ops && (!replacing || 2999185029Spjd pvd->vdev_ops == &vdev_replacing_ops || 3000185029Spjd pvd->vdev_ops == &vdev_spare_ops)) 3001185029Spjd expected_error = ENOTSUP; 3002185029Spjd else if (newvd_is_spare && (!replacing || oldvd_is_log)) 3003185029Spjd expected_error = ENOTSUP; 3004185029Spjd else if (newvd == oldvd) 3005185029Spjd expected_error = replacing ? 0 : EBUSY; 3006185029Spjd else if (vdev_lookup_by_path(rvd, newpath) != NULL) 3007168404Spjd expected_error = EBUSY; 3008168404Spjd else if (newsize < oldsize) 3009168404Spjd expected_error = EOVERFLOW; 3010168404Spjd else if (ashift > oldvd->vdev_top->vdev_ashift) 3011168404Spjd expected_error = EDOM; 3012168404Spjd else 3013168404Spjd expected_error = 0; 3014168404Spjd 3015332525Smav spa_config_exit(spa, SCL_ALL, FTAG); 3016168404Spjd 3017168404Spjd /* 3018168404Spjd * Build the nvlist describing newpath. 3019168404Spjd */ 3020243505Smm root = make_vdev_root(newpath, NULL, NULL, newvd == NULL ? newsize : 0, 3021185029Spjd ashift, 0, 0, 0, 1); 3022168404Spjd 3023185029Spjd error = spa_vdev_attach(spa, oldguid, root, replacing); 3024168404Spjd 3025168404Spjd nvlist_free(root); 3026168404Spjd 3027168404Spjd /* 3028168404Spjd * If our parent was the replacing vdev, but the replace completed, 3029168404Spjd * then instead of failing with ENOTSUP we may either succeed, 3030168404Spjd * fail with ENODEV, or fail with EOVERFLOW. 3031168404Spjd */ 3032168404Spjd if (expected_error == ENOTSUP && 3033168404Spjd (error == 0 || error == ENODEV || error == EOVERFLOW)) 3034168404Spjd expected_error = error; 3035168404Spjd 3036168404Spjd /* 3037168404Spjd * If someone grew the LUN, the replacement may be too small. 3038168404Spjd */ 3039185029Spjd if (error == EOVERFLOW || error == EBUSY) 3040168404Spjd expected_error = error; 3041168404Spjd 3042332547Smav if (error == ZFS_ERR_CHECKPOINT_EXISTS || 3043332547Smav error == ZFS_ERR_DISCARDING_CHECKPOINT) 3044332547Smav expected_error = error; 3045332547Smav 3046185029Spjd /* XXX workaround 6690467 */ 3047185029Spjd if (error != expected_error && expected_error != EBUSY) { 3048185029Spjd fatal(0, "attach (%s %llu, %s %llu, %d) " 3049185029Spjd "returned %d, expected %d", 3050254112Sdelphij oldpath, oldsize, newpath, 3051254112Sdelphij newsize, replacing, error, expected_error); 3052168404Spjd } 3053168404Spjd 3054332545Smav mutex_exit(&ztest_vdev_lock); 3055168404Spjd} 3056168404Spjd 3057332525Smav/* ARGSUSED */ 3058332525Smavvoid 3059332525Smavztest_device_removal(ztest_ds_t *zd, uint64_t id) 3060332525Smav{ 3061332525Smav spa_t *spa = ztest_spa; 3062332525Smav vdev_t *vd; 3063332525Smav uint64_t guid; 3064339106Smav int error; 3065332525Smav 3066332545Smav mutex_enter(&ztest_vdev_lock); 3067332525Smav 3068339106Smav if (ztest_device_removal_active) { 3069339106Smav mutex_exit(&ztest_vdev_lock); 3070339106Smav return; 3071339106Smav } 3072339106Smav 3073339106Smav /* 3074339106Smav * Remove a random top-level vdev and wait for removal to finish. 3075339106Smav */ 3076332525Smav spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); 3077332525Smav vd = vdev_lookup_top(spa, ztest_random_vdev_top(spa, B_FALSE)); 3078332525Smav guid = vd->vdev_guid; 3079332525Smav spa_config_exit(spa, SCL_VDEV, FTAG); 3080332525Smav 3081339106Smav error = spa_vdev_remove(spa, guid, B_FALSE); 3082339106Smav if (error == 0) { 3083339106Smav ztest_device_removal_active = B_TRUE; 3084339106Smav mutex_exit(&ztest_vdev_lock); 3085332525Smav 3086339106Smav while (spa->spa_vdev_removal != NULL) 3087339106Smav txg_wait_synced(spa_get_dsl(spa), 0); 3088339106Smav } else { 3089339106Smav mutex_exit(&ztest_vdev_lock); 3090339106Smav return; 3091339106Smav } 3092339106Smav 3093339106Smav /* 3094339106Smav * The pool needs to be scrubbed after completing device removal. 3095339106Smav * Failure to do so may result in checksum errors due to the 3096339106Smav * strategy employed by ztest_fault_inject() when selecting which 3097339106Smav * offset are redundant and can be damaged. 3098339106Smav */ 3099339106Smav error = spa_scan(spa, POOL_SCAN_SCRUB); 3100339106Smav if (error == 0) { 3101339106Smav while (dsl_scan_scrubbing(spa_get_dsl(spa))) 3102339106Smav txg_wait_synced(spa_get_dsl(spa), 0); 3103339106Smav } 3104339106Smav 3105339106Smav mutex_enter(&ztest_vdev_lock); 3106339106Smav ztest_device_removal_active = B_FALSE; 3107332545Smav mutex_exit(&ztest_vdev_lock); 3108332525Smav} 3109332525Smav 3110168404Spjd/* 3111219089Spjd * Callback function which expands the physical size of the vdev. 3112168404Spjd */ 3113219089Spjdvdev_t * 3114219089Spjdgrow_vdev(vdev_t *vd, void *arg) 3115168404Spjd{ 3116219089Spjd spa_t *spa = vd->vdev_spa; 3117219089Spjd size_t *newsize = arg; 3118168404Spjd size_t fsize; 3119168404Spjd int fd; 3120168404Spjd 3121219089Spjd ASSERT(spa_config_held(spa, SCL_STATE, RW_READER) == SCL_STATE); 3122219089Spjd ASSERT(vd->vdev_ops->vdev_op_leaf); 3123168404Spjd 3124219089Spjd if ((fd = open(vd->vdev_path, O_RDWR)) == -1) 3125219089Spjd return (vd); 3126219089Spjd 3127219089Spjd fsize = lseek(fd, 0, SEEK_END); 3128219089Spjd (void) ftruncate(fd, *newsize); 3129219089Spjd 3130236143Smm if (ztest_opts.zo_verbose >= 6) { 3131219089Spjd (void) printf("%s grew from %lu to %lu bytes\n", 3132219089Spjd vd->vdev_path, (ulong_t)fsize, (ulong_t)*newsize); 3133219089Spjd } 3134219089Spjd (void) close(fd); 3135219089Spjd return (NULL); 3136219089Spjd} 3137219089Spjd 3138219089Spjd/* 3139219089Spjd * Callback function which expands a given vdev by calling vdev_online(). 3140219089Spjd */ 3141219089Spjd/* ARGSUSED */ 3142219089Spjdvdev_t * 3143219089Spjdonline_vdev(vdev_t *vd, void *arg) 3144219089Spjd{ 3145219089Spjd spa_t *spa = vd->vdev_spa; 3146219089Spjd vdev_t *tvd = vd->vdev_top; 3147219089Spjd uint64_t guid = vd->vdev_guid; 3148219089Spjd uint64_t generation = spa->spa_config_generation + 1; 3149219089Spjd vdev_state_t newstate = VDEV_STATE_UNKNOWN; 3150219089Spjd int error; 3151219089Spjd 3152219089Spjd ASSERT(spa_config_held(spa, SCL_STATE, RW_READER) == SCL_STATE); 3153219089Spjd ASSERT(vd->vdev_ops->vdev_op_leaf); 3154219089Spjd 3155219089Spjd /* Calling vdev_online will initialize the new metaslabs */ 3156219089Spjd spa_config_exit(spa, SCL_STATE, spa); 3157219089Spjd error = vdev_online(spa, guid, ZFS_ONLINE_EXPAND, &newstate); 3158219089Spjd spa_config_enter(spa, SCL_STATE, spa, RW_READER); 3159219089Spjd 3160168404Spjd /* 3161219089Spjd * If vdev_online returned an error or the underlying vdev_open 3162219089Spjd * failed then we abort the expand. The only way to know that 3163219089Spjd * vdev_open fails is by checking the returned newstate. 3164168404Spjd */ 3165219089Spjd if (error || newstate != VDEV_STATE_HEALTHY) { 3166236143Smm if (ztest_opts.zo_verbose >= 5) { 3167219089Spjd (void) printf("Unable to expand vdev, state %llu, " 3168219089Spjd "error %d\n", (u_longlong_t)newstate, error); 3169219089Spjd } 3170219089Spjd return (vd); 3171219089Spjd } 3172219089Spjd ASSERT3U(newstate, ==, VDEV_STATE_HEALTHY); 3173168404Spjd 3174219089Spjd /* 3175219089Spjd * Since we dropped the lock we need to ensure that we're 3176219089Spjd * still talking to the original vdev. It's possible this 3177219089Spjd * vdev may have been detached/replaced while we were 3178219089Spjd * trying to online it. 3179219089Spjd */ 3180219089Spjd if (generation != spa->spa_config_generation) { 3181236143Smm if (ztest_opts.zo_verbose >= 5) { 3182219089Spjd (void) printf("vdev configuration has changed, " 3183219089Spjd "guid %llu, state %llu, expected gen %llu, " 3184219089Spjd "got gen %llu\n", 3185219089Spjd (u_longlong_t)guid, 3186219089Spjd (u_longlong_t)tvd->vdev_state, 3187219089Spjd (u_longlong_t)generation, 3188219089Spjd (u_longlong_t)spa->spa_config_generation); 3189219089Spjd } 3190219089Spjd return (vd); 3191219089Spjd } 3192219089Spjd return (NULL); 3193219089Spjd} 3194168404Spjd 3195219089Spjd/* 3196219089Spjd * Traverse the vdev tree calling the supplied function. 3197219089Spjd * We continue to walk the tree until we either have walked all 3198219089Spjd * children or we receive a non-NULL return from the callback. 3199219089Spjd * If a NULL callback is passed, then we just return back the first 3200219089Spjd * leaf vdev we encounter. 3201219089Spjd */ 3202219089Spjdvdev_t * 3203219089Spjdvdev_walk_tree(vdev_t *vd, vdev_t *(*func)(vdev_t *, void *), void *arg) 3204219089Spjd{ 3205219089Spjd if (vd->vdev_ops->vdev_op_leaf) { 3206219089Spjd if (func == NULL) 3207219089Spjd return (vd); 3208219089Spjd else 3209219089Spjd return (func(vd, arg)); 3210219089Spjd } 3211168404Spjd 3212219089Spjd for (uint_t c = 0; c < vd->vdev_children; c++) { 3213219089Spjd vdev_t *cvd = vd->vdev_child[c]; 3214219089Spjd if ((cvd = vdev_walk_tree(cvd, func, arg)) != NULL) 3215219089Spjd return (cvd); 3216219089Spjd } 3217219089Spjd return (NULL); 3218219089Spjd} 3219219089Spjd 3220219089Spjd/* 3221219089Spjd * Verify that dynamic LUN growth works as expected. 3222219089Spjd */ 3223219089Spjd/* ARGSUSED */ 3224219089Spjdvoid 3225219089Spjdztest_vdev_LUN_growth(ztest_ds_t *zd, uint64_t id) 3226219089Spjd{ 3227236143Smm spa_t *spa = ztest_spa; 3228219089Spjd vdev_t *vd, *tvd; 3229219089Spjd metaslab_class_t *mc; 3230219089Spjd metaslab_group_t *mg; 3231219089Spjd size_t psize, newsize; 3232219089Spjd uint64_t top; 3233219089Spjd uint64_t old_class_space, new_class_space, old_ms_count, new_ms_count; 3234219089Spjd 3235332547Smav mutex_enter(&ztest_checkpoint_lock); 3236332545Smav mutex_enter(&ztest_vdev_lock); 3237219089Spjd spa_config_enter(spa, SCL_STATE, spa, RW_READER); 3238219089Spjd 3239332525Smav /* 3240332525Smav * If there is a vdev removal in progress, it could complete while 3241332525Smav * we are running, in which case we would not be able to verify 3242332525Smav * that the metaslab_class space increased (because it decreases 3243332525Smav * when the device removal completes). 3244332525Smav */ 3245339106Smav if (ztest_device_removal_active) { 3246332547Smav spa_config_exit(spa, SCL_STATE, spa); 3247332545Smav mutex_exit(&ztest_vdev_lock); 3248332547Smav mutex_exit(&ztest_checkpoint_lock); 3249332525Smav return; 3250332525Smav } 3251332525Smav 3252219089Spjd top = ztest_random_vdev_top(spa, B_TRUE); 3253219089Spjd 3254219089Spjd tvd = spa->spa_root_vdev->vdev_child[top]; 3255219089Spjd mg = tvd->vdev_mg; 3256219089Spjd mc = mg->mg_class; 3257219089Spjd old_ms_count = tvd->vdev_ms_count; 3258219089Spjd old_class_space = metaslab_class_get_space(mc); 3259219089Spjd 3260219089Spjd /* 3261219089Spjd * Determine the size of the first leaf vdev associated with 3262219089Spjd * our top-level device. 3263219089Spjd */ 3264219089Spjd vd = vdev_walk_tree(tvd, NULL, NULL); 3265219089Spjd ASSERT3P(vd, !=, NULL); 3266219089Spjd ASSERT(vd->vdev_ops->vdev_op_leaf); 3267219089Spjd 3268219089Spjd psize = vd->vdev_psize; 3269219089Spjd 3270219089Spjd /* 3271219089Spjd * We only try to expand the vdev if it's healthy, less than 4x its 3272219089Spjd * original size, and it has a valid psize. 3273219089Spjd */ 3274219089Spjd if (tvd->vdev_state != VDEV_STATE_HEALTHY || 3275236143Smm psize == 0 || psize >= 4 * ztest_opts.zo_vdev_size) { 3276219089Spjd spa_config_exit(spa, SCL_STATE, spa); 3277332545Smav mutex_exit(&ztest_vdev_lock); 3278332547Smav mutex_exit(&ztest_checkpoint_lock); 3279219089Spjd return; 3280219089Spjd } 3281219089Spjd ASSERT(psize > 0); 3282219089Spjd newsize = psize + psize / 8; 3283219089Spjd ASSERT3U(newsize, >, psize); 3284219089Spjd 3285236143Smm if (ztest_opts.zo_verbose >= 6) { 3286219089Spjd (void) printf("Expanding LUN %s from %lu to %lu\n", 3287219089Spjd vd->vdev_path, (ulong_t)psize, (ulong_t)newsize); 3288219089Spjd } 3289219089Spjd 3290219089Spjd /* 3291219089Spjd * Growing the vdev is a two step process: 3292219089Spjd * 1). expand the physical size (i.e. relabel) 3293219089Spjd * 2). online the vdev to create the new metaslabs 3294219089Spjd */ 3295219089Spjd if (vdev_walk_tree(tvd, grow_vdev, &newsize) != NULL || 3296219089Spjd vdev_walk_tree(tvd, online_vdev, NULL) != NULL || 3297219089Spjd tvd->vdev_state != VDEV_STATE_HEALTHY) { 3298236143Smm if (ztest_opts.zo_verbose >= 5) { 3299219089Spjd (void) printf("Could not expand LUN because " 3300219089Spjd "the vdev configuration changed.\n"); 3301168404Spjd } 3302219089Spjd spa_config_exit(spa, SCL_STATE, spa); 3303332545Smav mutex_exit(&ztest_vdev_lock); 3304332547Smav mutex_exit(&ztest_checkpoint_lock); 3305219089Spjd return; 3306168404Spjd } 3307168404Spjd 3308219089Spjd spa_config_exit(spa, SCL_STATE, spa); 3309219089Spjd 3310219089Spjd /* 3311219089Spjd * Expanding the LUN will update the config asynchronously, 3312219089Spjd * thus we must wait for the async thread to complete any 3313219089Spjd * pending tasks before proceeding. 3314219089Spjd */ 3315219089Spjd for (;;) { 3316219089Spjd boolean_t done; 3317219089Spjd mutex_enter(&spa->spa_async_lock); 3318219089Spjd done = (spa->spa_async_thread == NULL && !spa->spa_async_tasks); 3319219089Spjd mutex_exit(&spa->spa_async_lock); 3320219089Spjd if (done) 3321219089Spjd break; 3322219089Spjd txg_wait_synced(spa_get_dsl(spa), 0); 3323219089Spjd (void) poll(NULL, 0, 100); 3324219089Spjd } 3325219089Spjd 3326219089Spjd spa_config_enter(spa, SCL_STATE, spa, RW_READER); 3327219089Spjd 3328219089Spjd tvd = spa->spa_root_vdev->vdev_child[top]; 3329219089Spjd new_ms_count = tvd->vdev_ms_count; 3330219089Spjd new_class_space = metaslab_class_get_space(mc); 3331219089Spjd 3332219089Spjd if (tvd->vdev_mg != mg || mg->mg_class != mc) { 3333236143Smm if (ztest_opts.zo_verbose >= 5) { 3334219089Spjd (void) printf("Could not verify LUN expansion due to " 3335219089Spjd "intervening vdev offline or remove.\n"); 3336219089Spjd } 3337219089Spjd spa_config_exit(spa, SCL_STATE, spa); 3338332545Smav mutex_exit(&ztest_vdev_lock); 3339332547Smav mutex_exit(&ztest_checkpoint_lock); 3340219089Spjd return; 3341219089Spjd } 3342219089Spjd 3343219089Spjd /* 3344219089Spjd * Make sure we were able to grow the vdev. 3345219089Spjd */ 3346332525Smav if (new_ms_count <= old_ms_count) { 3347332525Smav fatal(0, "LUN expansion failed: ms_count %llu < %llu\n", 3348219089Spjd old_ms_count, new_ms_count); 3349332525Smav } 3350219089Spjd 3351219089Spjd /* 3352219089Spjd * Make sure we were able to grow the pool. 3353219089Spjd */ 3354332525Smav if (new_class_space <= old_class_space) { 3355332525Smav fatal(0, "LUN expansion failed: class_space %llu < %llu\n", 3356219089Spjd old_class_space, new_class_space); 3357332525Smav } 3358219089Spjd 3359236143Smm if (ztest_opts.zo_verbose >= 5) { 3360325914Savg char oldnumbuf[NN_NUMBUF_SZ], newnumbuf[NN_NUMBUF_SZ]; 3361219089Spjd 3362325914Savg nicenum(old_class_space, oldnumbuf, sizeof (oldnumbuf)); 3363325914Savg nicenum(new_class_space, newnumbuf, sizeof (newnumbuf)); 3364219089Spjd (void) printf("%s grew from %s to %s\n", 3365219089Spjd spa->spa_name, oldnumbuf, newnumbuf); 3366219089Spjd } 3367219089Spjd 3368219089Spjd spa_config_exit(spa, SCL_STATE, spa); 3369332545Smav mutex_exit(&ztest_vdev_lock); 3370332547Smav mutex_exit(&ztest_checkpoint_lock); 3371168404Spjd} 3372168404Spjd 3373219089Spjd/* 3374219089Spjd * Verify that dmu_objset_{create,destroy,open,close} work as expected. 3375219089Spjd */ 3376168404Spjd/* ARGSUSED */ 3377168404Spjdstatic void 3378219089Spjdztest_objset_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx) 3379168404Spjd{ 3380168404Spjd /* 3381219089Spjd * Create the objects common to all ztest datasets. 3382168404Spjd */ 3383219089Spjd VERIFY(zap_create_claim(os, ZTEST_DIROBJ, 3384168404Spjd DMU_OT_ZAP_OTHER, DMU_OT_NONE, 0, tx) == 0); 3385219089Spjd} 3386168404Spjd 3387219089Spjdstatic int 3388219089Spjdztest_dataset_create(char *dsname) 3389219089Spjd{ 3390219089Spjd uint64_t zilset = ztest_random(100); 3391219089Spjd int err = dmu_objset_create(dsname, DMU_OST_OTHER, 0, 3392219089Spjd ztest_objset_create_cb, NULL); 3393219089Spjd 3394219089Spjd if (err || zilset < 80) 3395219089Spjd return (err); 3396219089Spjd 3397236143Smm if (ztest_opts.zo_verbose >= 6) 3398236143Smm (void) printf("Setting dataset %s to sync always\n", dsname); 3399219089Spjd return (ztest_dsl_prop_set_uint64(dsname, ZFS_PROP_SYNC, 3400219089Spjd ZFS_SYNC_ALWAYS, B_FALSE)); 3401168404Spjd} 3402168404Spjd 3403219089Spjd/* ARGSUSED */ 3404168404Spjdstatic int 3405219089Spjdztest_objset_destroy_cb(const char *name, void *arg) 3406168404Spjd{ 3407168404Spjd objset_t *os; 3408219089Spjd dmu_object_info_t doi; 3409168404Spjd int error; 3410168404Spjd 3411168404Spjd /* 3412168404Spjd * Verify that the dataset contains a directory object. 3413168404Spjd */ 3414248571Smm VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_TRUE, FTAG, &os)); 3415219089Spjd error = dmu_object_info(os, ZTEST_DIROBJ, &doi); 3416168404Spjd if (error != ENOENT) { 3417168404Spjd /* We could have crashed in the middle of destroying it */ 3418240415Smm ASSERT0(error); 3419219089Spjd ASSERT3U(doi.doi_type, ==, DMU_OT_ZAP_OTHER); 3420219089Spjd ASSERT3S(doi.doi_physical_blocks_512, >=, 0); 3421168404Spjd } 3422248571Smm dmu_objset_disown(os, FTAG); 3423168404Spjd 3424168404Spjd /* 3425168404Spjd * Destroy the dataset. 3426168404Spjd */ 3427248571Smm if (strchr(name, '@') != NULL) { 3428248571Smm VERIFY0(dsl_destroy_snapshot(name, B_FALSE)); 3429248571Smm } else { 3430248571Smm VERIFY0(dsl_destroy_head(name)); 3431248571Smm } 3432168404Spjd return (0); 3433168404Spjd} 3434168404Spjd 3435219089Spjdstatic boolean_t 3436219089Spjdztest_snapshot_create(char *osname, uint64_t id) 3437168404Spjd{ 3438307108Smav char snapname[ZFS_MAX_DATASET_NAME_LEN]; 3439219089Spjd int error; 3440168404Spjd 3441248571Smm (void) snprintf(snapname, sizeof (snapname), "%llu", (u_longlong_t)id); 3442168404Spjd 3443248571Smm error = dmu_objset_snapshot_one(osname, snapname); 3444219089Spjd if (error == ENOSPC) { 3445219089Spjd ztest_record_enospc(FTAG); 3446219089Spjd return (B_FALSE); 3447219089Spjd } 3448248571Smm if (error != 0 && error != EEXIST) { 3449248571Smm fatal(0, "ztest_snapshot_create(%s@%s) = %d", osname, 3450248571Smm snapname, error); 3451248571Smm } 3452219089Spjd return (B_TRUE); 3453219089Spjd} 3454168404Spjd 3455219089Spjdstatic boolean_t 3456219089Spjdztest_snapshot_destroy(char *osname, uint64_t id) 3457219089Spjd{ 3458307108Smav char snapname[ZFS_MAX_DATASET_NAME_LEN]; 3459219089Spjd int error; 3460219089Spjd 3461307108Smav (void) snprintf(snapname, sizeof (snapname), "%s@%llu", osname, 3462219089Spjd (u_longlong_t)id); 3463219089Spjd 3464248571Smm error = dsl_destroy_snapshot(snapname, B_FALSE); 3465219089Spjd if (error != 0 && error != ENOENT) 3466219089Spjd fatal(0, "ztest_snapshot_destroy(%s) = %d", snapname, error); 3467219089Spjd return (B_TRUE); 3468168404Spjd} 3469168404Spjd 3470219089Spjd/* ARGSUSED */ 3471168404Spjdvoid 3472219089Spjdztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id) 3473168404Spjd{ 3474219089Spjd ztest_ds_t zdtmp; 3475219089Spjd int iters; 3476168404Spjd int error; 3477185029Spjd objset_t *os, *os2; 3478307108Smav char name[ZFS_MAX_DATASET_NAME_LEN]; 3479168404Spjd zilog_t *zilog; 3480168404Spjd 3481332545Smav rw_enter(&ztest_name_lock, RW_READER); 3482168404Spjd 3483307108Smav (void) snprintf(name, sizeof (name), "%s/temp_%llu", 3484236143Smm ztest_opts.zo_pool, (u_longlong_t)id); 3485168404Spjd 3486168404Spjd /* 3487168404Spjd * If this dataset exists from a previous run, process its replay log 3488168404Spjd * half of the time. If we don't replay it, then dmu_objset_destroy() 3489219089Spjd * (invoked from ztest_objset_destroy_cb()) should just throw it away. 3490168404Spjd */ 3491168404Spjd if (ztest_random(2) == 0 && 3492219089Spjd dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os) == 0) { 3493236143Smm ztest_zd_init(&zdtmp, NULL, os); 3494219089Spjd zil_replay(os, &zdtmp, ztest_replay_vector); 3495219089Spjd ztest_zd_fini(&zdtmp); 3496219089Spjd dmu_objset_disown(os, FTAG); 3497168404Spjd } 3498168404Spjd 3499168404Spjd /* 3500168404Spjd * There may be an old instance of the dataset we're about to 3501168404Spjd * create lying around from a previous run. If so, destroy it 3502168404Spjd * and all of its snapshots. 3503168404Spjd */ 3504219089Spjd (void) dmu_objset_find(name, ztest_objset_destroy_cb, NULL, 3505168404Spjd DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS); 3506168404Spjd 3507168404Spjd /* 3508168404Spjd * Verify that the destroyed dataset is no longer in the namespace. 3509168404Spjd */ 3510248571Smm VERIFY3U(ENOENT, ==, dmu_objset_own(name, DMU_OST_OTHER, B_TRUE, 3511248571Smm FTAG, &os)); 3512168404Spjd 3513168404Spjd /* 3514168404Spjd * Verify that we can create a new dataset. 3515168404Spjd */ 3516219089Spjd error = ztest_dataset_create(name); 3517168404Spjd if (error) { 3518168404Spjd if (error == ENOSPC) { 3519219089Spjd ztest_record_enospc(FTAG); 3520332545Smav rw_exit(&ztest_name_lock); 3521168404Spjd return; 3522168404Spjd } 3523168404Spjd fatal(0, "dmu_objset_create(%s) = %d", name, error); 3524168404Spjd } 3525168404Spjd 3526248571Smm VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os)); 3527168404Spjd 3528236143Smm ztest_zd_init(&zdtmp, NULL, os); 3529219089Spjd 3530168404Spjd /* 3531168404Spjd * Open the intent log for it. 3532168404Spjd */ 3533219089Spjd zilog = zil_open(os, ztest_get_data); 3534168404Spjd 3535168404Spjd /* 3536219089Spjd * Put some objects in there, do a little I/O to them, 3537219089Spjd * and randomly take a couple of snapshots along the way. 3538168404Spjd */ 3539219089Spjd iters = ztest_random(5); 3540219089Spjd for (int i = 0; i < iters; i++) { 3541219089Spjd ztest_dmu_object_alloc_free(&zdtmp, id); 3542219089Spjd if (ztest_random(iters) == 0) 3543219089Spjd (void) ztest_snapshot_create(name, i); 3544168404Spjd } 3545168404Spjd 3546168404Spjd /* 3547168404Spjd * Verify that we cannot create an existing dataset. 3548168404Spjd */ 3549219089Spjd VERIFY3U(EEXIST, ==, 3550219089Spjd dmu_objset_create(name, DMU_OST_OTHER, 0, NULL, NULL)); 3551168404Spjd 3552168404Spjd /* 3553219089Spjd * Verify that we can hold an objset that is also owned. 3554168404Spjd */ 3555219089Spjd VERIFY3U(0, ==, dmu_objset_hold(name, FTAG, &os2)); 3556219089Spjd dmu_objset_rele(os2, FTAG); 3557168404Spjd 3558219089Spjd /* 3559219089Spjd * Verify that we cannot own an objset that is already owned. 3560219089Spjd */ 3561219089Spjd VERIFY3U(EBUSY, ==, 3562219089Spjd dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os2)); 3563219089Spjd 3564168404Spjd zil_close(zilog); 3565219089Spjd dmu_objset_disown(os, FTAG); 3566219089Spjd ztest_zd_fini(&zdtmp); 3567168404Spjd 3568332545Smav rw_exit(&ztest_name_lock); 3569168404Spjd} 3570168404Spjd 3571168404Spjd/* 3572168404Spjd * Verify that dmu_snapshot_{create,destroy,open,close} work as expected. 3573168404Spjd */ 3574168404Spjdvoid 3575219089Spjdztest_dmu_snapshot_create_destroy(ztest_ds_t *zd, uint64_t id) 3576168404Spjd{ 3577332545Smav rw_enter(&ztest_name_lock, RW_READER); 3578219089Spjd (void) ztest_snapshot_destroy(zd->zd_name, id); 3579219089Spjd (void) ztest_snapshot_create(zd->zd_name, id); 3580332545Smav rw_exit(&ztest_name_lock); 3581219089Spjd} 3582219089Spjd 3583219089Spjd/* 3584219089Spjd * Cleanup non-standard snapshots and clones. 3585219089Spjd */ 3586219089Spjdvoid 3587219089Spjdztest_dsl_dataset_cleanup(char *osname, uint64_t id) 3588219089Spjd{ 3589307108Smav char snap1name[ZFS_MAX_DATASET_NAME_LEN]; 3590307108Smav char clone1name[ZFS_MAX_DATASET_NAME_LEN]; 3591307108Smav char snap2name[ZFS_MAX_DATASET_NAME_LEN]; 3592307108Smav char clone2name[ZFS_MAX_DATASET_NAME_LEN]; 3593307108Smav char snap3name[ZFS_MAX_DATASET_NAME_LEN]; 3594168404Spjd int error; 3595168404Spjd 3596307108Smav (void) snprintf(snap1name, sizeof (snap1name), 3597307108Smav "%s@s1_%llu", osname, id); 3598307108Smav (void) snprintf(clone1name, sizeof (clone1name), 3599307108Smav "%s/c1_%llu", osname, id); 3600307108Smav (void) snprintf(snap2name, sizeof (snap2name), 3601307108Smav "%s@s2_%llu", clone1name, id); 3602307108Smav (void) snprintf(clone2name, sizeof (clone2name), 3603307108Smav "%s/c2_%llu", osname, id); 3604307108Smav (void) snprintf(snap3name, sizeof (snap3name), 3605307108Smav "%s@s3_%llu", clone1name, id); 3606168404Spjd 3607248571Smm error = dsl_destroy_head(clone2name); 3608219089Spjd if (error && error != ENOENT) 3609248571Smm fatal(0, "dsl_destroy_head(%s) = %d", clone2name, error); 3610248571Smm error = dsl_destroy_snapshot(snap3name, B_FALSE); 3611219089Spjd if (error && error != ENOENT) 3612248571Smm fatal(0, "dsl_destroy_snapshot(%s) = %d", snap3name, error); 3613248571Smm error = dsl_destroy_snapshot(snap2name, B_FALSE); 3614219089Spjd if (error && error != ENOENT) 3615248571Smm fatal(0, "dsl_destroy_snapshot(%s) = %d", snap2name, error); 3616248571Smm error = dsl_destroy_head(clone1name); 3617219089Spjd if (error && error != ENOENT) 3618248571Smm fatal(0, "dsl_destroy_head(%s) = %d", clone1name, error); 3619248571Smm error = dsl_destroy_snapshot(snap1name, B_FALSE); 3620219089Spjd if (error && error != ENOENT) 3621248571Smm fatal(0, "dsl_destroy_snapshot(%s) = %d", snap1name, error); 3622168404Spjd} 3623168404Spjd 3624168404Spjd/* 3625207910Smm * Verify dsl_dataset_promote handles EBUSY 3626207910Smm */ 3627207910Smmvoid 3628219089Spjdztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id) 3629207910Smm{ 3630248571Smm objset_t *os; 3631307108Smav char snap1name[ZFS_MAX_DATASET_NAME_LEN]; 3632307108Smav char clone1name[ZFS_MAX_DATASET_NAME_LEN]; 3633307108Smav char snap2name[ZFS_MAX_DATASET_NAME_LEN]; 3634307108Smav char clone2name[ZFS_MAX_DATASET_NAME_LEN]; 3635307108Smav char snap3name[ZFS_MAX_DATASET_NAME_LEN]; 3636219089Spjd char *osname = zd->zd_name; 3637219089Spjd int error; 3638207910Smm 3639332545Smav rw_enter(&ztest_name_lock, RW_READER); 3640207910Smm 3641219089Spjd ztest_dsl_dataset_cleanup(osname, id); 3642207910Smm 3643307108Smav (void) snprintf(snap1name, sizeof (snap1name), 3644307108Smav "%s@s1_%llu", osname, id); 3645307108Smav (void) snprintf(clone1name, sizeof (clone1name), 3646307108Smav "%s/c1_%llu", osname, id); 3647307108Smav (void) snprintf(snap2name, sizeof (snap2name), 3648307108Smav "%s@s2_%llu", clone1name, id); 3649307108Smav (void) snprintf(clone2name, sizeof (clone2name), 3650307108Smav "%s/c2_%llu", osname, id); 3651307108Smav (void) snprintf(snap3name, sizeof (snap3name), 3652307108Smav "%s@s3_%llu", clone1name, id); 3653207910Smm 3654248571Smm error = dmu_objset_snapshot_one(osname, strchr(snap1name, '@') + 1); 3655209962Smm if (error && error != EEXIST) { 3656209962Smm if (error == ENOSPC) { 3657209962Smm ztest_record_enospc(FTAG); 3658209962Smm goto out; 3659209962Smm } 3660209962Smm fatal(0, "dmu_take_snapshot(%s) = %d", snap1name, error); 3661209962Smm } 3662207910Smm 3663248571Smm error = dmu_objset_clone(clone1name, snap1name); 3664209962Smm if (error) { 3665209962Smm if (error == ENOSPC) { 3666209962Smm ztest_record_enospc(FTAG); 3667209962Smm goto out; 3668209962Smm } 3669207910Smm fatal(0, "dmu_objset_create(%s) = %d", clone1name, error); 3670209962Smm } 3671207910Smm 3672248571Smm error = dmu_objset_snapshot_one(clone1name, strchr(snap2name, '@') + 1); 3673209962Smm if (error && error != EEXIST) { 3674209962Smm if (error == ENOSPC) { 3675209962Smm ztest_record_enospc(FTAG); 3676209962Smm goto out; 3677209962Smm } 3678209962Smm fatal(0, "dmu_open_snapshot(%s) = %d", snap2name, error); 3679209962Smm } 3680207910Smm 3681248571Smm error = dmu_objset_snapshot_one(clone1name, strchr(snap3name, '@') + 1); 3682209962Smm if (error && error != EEXIST) { 3683209962Smm if (error == ENOSPC) { 3684209962Smm ztest_record_enospc(FTAG); 3685209962Smm goto out; 3686209962Smm } 3687209962Smm fatal(0, "dmu_open_snapshot(%s) = %d", snap3name, error); 3688209962Smm } 3689207910Smm 3690248571Smm error = dmu_objset_clone(clone2name, snap3name); 3691209962Smm if (error) { 3692209962Smm if (error == ENOSPC) { 3693219089Spjd ztest_record_enospc(FTAG); 3694209962Smm goto out; 3695209962Smm } 3696207910Smm fatal(0, "dmu_objset_create(%s) = %d", clone2name, error); 3697209962Smm } 3698207910Smm 3699248571Smm error = dmu_objset_own(snap2name, DMU_OST_ANY, B_TRUE, FTAG, &os); 3700207910Smm if (error) 3701248571Smm fatal(0, "dmu_objset_own(%s) = %d", snap2name, error); 3702219089Spjd error = dsl_dataset_promote(clone2name, NULL); 3703268075Sdelphij if (error == ENOSPC) { 3704268075Sdelphij dmu_objset_disown(os, FTAG); 3705268075Sdelphij ztest_record_enospc(FTAG); 3706268075Sdelphij goto out; 3707268075Sdelphij } 3708207910Smm if (error != EBUSY) 3709207910Smm fatal(0, "dsl_dataset_promote(%s), %d, not EBUSY", clone2name, 3710207910Smm error); 3711248571Smm dmu_objset_disown(os, FTAG); 3712207910Smm 3713209962Smmout: 3714219089Spjd ztest_dsl_dataset_cleanup(osname, id); 3715207910Smm 3716332545Smav rw_exit(&ztest_name_lock); 3717207910Smm} 3718207910Smm 3719207910Smm/* 3720168404Spjd * Verify that dmu_object_{alloc,free} work as expected. 3721168404Spjd */ 3722168404Spjdvoid 3723219089Spjdztest_dmu_object_alloc_free(ztest_ds_t *zd, uint64_t id) 3724168404Spjd{ 3725219089Spjd ztest_od_t od[4]; 3726219089Spjd int batchsize = sizeof (od) / sizeof (od[0]); 3727168404Spjd 3728219089Spjd for (int b = 0; b < batchsize; b++) 3729219089Spjd ztest_od_init(&od[b], id, FTAG, b, DMU_OT_UINT64_OTHER, 0, 0); 3730168404Spjd 3731168404Spjd /* 3732219089Spjd * Destroy the previous batch of objects, create a new batch, 3733219089Spjd * and do some I/O on the new objects. 3734168404Spjd */ 3735219089Spjd if (ztest_object_init(zd, od, sizeof (od), B_TRUE) != 0) 3736219089Spjd return; 3737168404Spjd 3738219089Spjd while (ztest_random(4 * batchsize) != 0) 3739219089Spjd ztest_io(zd, od[ztest_random(batchsize)].od_object, 3740219089Spjd ztest_random(ZTEST_RANGE_LOCKS) << SPA_MAXBLOCKSHIFT); 3741168404Spjd} 3742168404Spjd 3743168404Spjd/* 3744168404Spjd * Verify that dmu_{read,write} work as expected. 3745168404Spjd */ 3746168404Spjdvoid 3747219089Spjdztest_dmu_read_write(ztest_ds_t *zd, uint64_t id) 3748168404Spjd{ 3749219089Spjd objset_t *os = zd->zd_os; 3750219089Spjd ztest_od_t od[2]; 3751168404Spjd dmu_tx_t *tx; 3752168404Spjd int i, freeit, error; 3753168404Spjd uint64_t n, s, txg; 3754168404Spjd bufwad_t *packbuf, *bigbuf, *pack, *bigH, *bigT; 3755219089Spjd uint64_t packobj, packoff, packsize, bigobj, bigoff, bigsize; 3756219089Spjd uint64_t chunksize = (1000 + ztest_random(1000)) * sizeof (uint64_t); 3757168404Spjd uint64_t regions = 997; 3758168404Spjd uint64_t stride = 123456789ULL; 3759168404Spjd uint64_t width = 40; 3760168404Spjd int free_percent = 5; 3761168404Spjd 3762168404Spjd /* 3763168404Spjd * This test uses two objects, packobj and bigobj, that are always 3764168404Spjd * updated together (i.e. in the same tx) so that their contents are 3765168404Spjd * in sync and can be compared. Their contents relate to each other 3766168404Spjd * in a simple way: packobj is a dense array of 'bufwad' structures, 3767168404Spjd * while bigobj is a sparse array of the same bufwads. Specifically, 3768168404Spjd * for any index n, there are three bufwads that should be identical: 3769168404Spjd * 3770168404Spjd * packobj, at offset n * sizeof (bufwad_t) 3771168404Spjd * bigobj, at the head of the nth chunk 3772168404Spjd * bigobj, at the tail of the nth chunk 3773168404Spjd * 3774168404Spjd * The chunk size is arbitrary. It doesn't have to be a power of two, 3775168404Spjd * and it doesn't have any relation to the object blocksize. 3776168404Spjd * The only requirement is that it can hold at least two bufwads. 3777168404Spjd * 3778168404Spjd * Normally, we write the bufwad to each of these locations. 3779168404Spjd * However, free_percent of the time we instead write zeroes to 3780168404Spjd * packobj and perform a dmu_free_range() on bigobj. By comparing 3781168404Spjd * bigobj to packobj, we can verify that the DMU is correctly 3782168404Spjd * tracking which parts of an object are allocated and free, 3783168404Spjd * and that the contents of the allocated blocks are correct. 3784168404Spjd */ 3785168404Spjd 3786168404Spjd /* 3787168404Spjd * Read the directory info. If it's the first time, set things up. 3788168404Spjd */ 3789219089Spjd ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, 0, chunksize); 3790219089Spjd ztest_od_init(&od[1], id, FTAG, 1, DMU_OT_UINT64_OTHER, 0, chunksize); 3791168404Spjd 3792219089Spjd if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0) 3793219089Spjd return; 3794168404Spjd 3795219089Spjd bigobj = od[0].od_object; 3796219089Spjd packobj = od[1].od_object; 3797219089Spjd chunksize = od[0].od_gen; 3798219089Spjd ASSERT(chunksize == od[1].od_gen); 3799168404Spjd 3800168404Spjd /* 3801168404Spjd * Prefetch a random chunk of the big object. 3802168404Spjd * Our aim here is to get some async reads in flight 3803168404Spjd * for blocks that we may free below; the DMU should 3804168404Spjd * handle this race correctly. 3805168404Spjd */ 3806168404Spjd n = ztest_random(regions) * stride + ztest_random(width); 3807168404Spjd s = 1 + ztest_random(2 * width - 1); 3808286705Smav dmu_prefetch(os, bigobj, 0, n * chunksize, s * chunksize, 3809286705Smav ZIO_PRIORITY_SYNC_READ); 3810168404Spjd 3811168404Spjd /* 3812168404Spjd * Pick a random index and compute the offsets into packobj and bigobj. 3813168404Spjd */ 3814168404Spjd n = ztest_random(regions) * stride + ztest_random(width); 3815168404Spjd s = 1 + ztest_random(width - 1); 3816168404Spjd 3817168404Spjd packoff = n * sizeof (bufwad_t); 3818168404Spjd packsize = s * sizeof (bufwad_t); 3819168404Spjd 3820219089Spjd bigoff = n * chunksize; 3821219089Spjd bigsize = s * chunksize; 3822168404Spjd 3823168404Spjd packbuf = umem_alloc(packsize, UMEM_NOFAIL); 3824168404Spjd bigbuf = umem_alloc(bigsize, UMEM_NOFAIL); 3825168404Spjd 3826168404Spjd /* 3827168404Spjd * free_percent of the time, free a range of bigobj rather than 3828168404Spjd * overwriting it. 3829168404Spjd */ 3830168404Spjd freeit = (ztest_random(100) < free_percent); 3831168404Spjd 3832168404Spjd /* 3833168404Spjd * Read the current contents of our objects. 3834168404Spjd */ 3835219089Spjd error = dmu_read(os, packobj, packoff, packsize, packbuf, 3836209962Smm DMU_READ_PREFETCH); 3837240415Smm ASSERT0(error); 3838219089Spjd error = dmu_read(os, bigobj, bigoff, bigsize, bigbuf, 3839209962Smm DMU_READ_PREFETCH); 3840240415Smm ASSERT0(error); 3841168404Spjd 3842168404Spjd /* 3843168404Spjd * Get a tx for the mods to both packobj and bigobj. 3844168404Spjd */ 3845168404Spjd tx = dmu_tx_create(os); 3846168404Spjd 3847219089Spjd dmu_tx_hold_write(tx, packobj, packoff, packsize); 3848168404Spjd 3849168404Spjd if (freeit) 3850219089Spjd dmu_tx_hold_free(tx, bigobj, bigoff, bigsize); 3851168404Spjd else 3852219089Spjd dmu_tx_hold_write(tx, bigobj, bigoff, bigsize); 3853168404Spjd 3854254077Sdelphij /* This accounts for setting the checksum/compression. */ 3855254077Sdelphij dmu_tx_hold_bonus(tx, bigobj); 3856254077Sdelphij 3857219089Spjd txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG); 3858219089Spjd if (txg == 0) { 3859168404Spjd umem_free(packbuf, packsize); 3860168404Spjd umem_free(bigbuf, bigsize); 3861168404Spjd return; 3862168404Spjd } 3863168404Spjd 3864268075Sdelphij enum zio_checksum cksum; 3865268075Sdelphij do { 3866268075Sdelphij cksum = (enum zio_checksum) 3867268075Sdelphij ztest_random_dsl_prop(ZFS_PROP_CHECKSUM); 3868268075Sdelphij } while (cksum >= ZIO_CHECKSUM_LEGACY_FUNCTIONS); 3869268075Sdelphij dmu_object_set_checksum(os, bigobj, cksum, tx); 3870168404Spjd 3871268075Sdelphij enum zio_compress comp; 3872268075Sdelphij do { 3873268075Sdelphij comp = (enum zio_compress) 3874268075Sdelphij ztest_random_dsl_prop(ZFS_PROP_COMPRESSION); 3875268075Sdelphij } while (comp >= ZIO_COMPRESS_LEGACY_FUNCTIONS); 3876268075Sdelphij dmu_object_set_compress(os, bigobj, comp, tx); 3877219089Spjd 3878168404Spjd /* 3879168404Spjd * For each index from n to n + s, verify that the existing bufwad 3880168404Spjd * in packobj matches the bufwads at the head and tail of the 3881168404Spjd * corresponding chunk in bigobj. Then update all three bufwads 3882168404Spjd * with the new values we want to write out. 3883168404Spjd */ 3884168404Spjd for (i = 0; i < s; i++) { 3885168404Spjd /* LINTED */ 3886168404Spjd pack = (bufwad_t *)((char *)packbuf + i * sizeof (bufwad_t)); 3887168404Spjd /* LINTED */ 3888219089Spjd bigH = (bufwad_t *)((char *)bigbuf + i * chunksize); 3889168404Spjd /* LINTED */ 3890219089Spjd bigT = (bufwad_t *)((char *)bigH + chunksize) - 1; 3891168404Spjd 3892168404Spjd ASSERT((uintptr_t)bigH - (uintptr_t)bigbuf < bigsize); 3893168404Spjd ASSERT((uintptr_t)bigT - (uintptr_t)bigbuf < bigsize); 3894168404Spjd 3895168404Spjd if (pack->bw_txg > txg) 3896168404Spjd fatal(0, "future leak: got %llx, open txg is %llx", 3897168404Spjd pack->bw_txg, txg); 3898168404Spjd 3899168404Spjd if (pack->bw_data != 0 && pack->bw_index != n + i) 3900168404Spjd fatal(0, "wrong index: got %llx, wanted %llx+%llx", 3901168404Spjd pack->bw_index, n, i); 3902168404Spjd 3903168404Spjd if (bcmp(pack, bigH, sizeof (bufwad_t)) != 0) 3904168404Spjd fatal(0, "pack/bigH mismatch in %p/%p", pack, bigH); 3905168404Spjd 3906168404Spjd if (bcmp(pack, bigT, sizeof (bufwad_t)) != 0) 3907168404Spjd fatal(0, "pack/bigT mismatch in %p/%p", pack, bigT); 3908168404Spjd 3909168404Spjd if (freeit) { 3910168404Spjd bzero(pack, sizeof (bufwad_t)); 3911168404Spjd } else { 3912168404Spjd pack->bw_index = n + i; 3913168404Spjd pack->bw_txg = txg; 3914168404Spjd pack->bw_data = 1 + ztest_random(-2ULL); 3915168404Spjd } 3916168404Spjd *bigH = *pack; 3917168404Spjd *bigT = *pack; 3918168404Spjd } 3919168404Spjd 3920168404Spjd /* 3921168404Spjd * We've verified all the old bufwads, and made new ones. 3922168404Spjd * Now write them out. 3923168404Spjd */ 3924219089Spjd dmu_write(os, packobj, packoff, packsize, packbuf, tx); 3925168404Spjd 3926168404Spjd if (freeit) { 3927236143Smm if (ztest_opts.zo_verbose >= 7) { 3928168404Spjd (void) printf("freeing offset %llx size %llx" 3929168404Spjd " txg %llx\n", 3930168404Spjd (u_longlong_t)bigoff, 3931168404Spjd (u_longlong_t)bigsize, 3932168404Spjd (u_longlong_t)txg); 3933168404Spjd } 3934219089Spjd VERIFY(0 == dmu_free_range(os, bigobj, bigoff, bigsize, tx)); 3935168404Spjd } else { 3936236143Smm if (ztest_opts.zo_verbose >= 7) { 3937168404Spjd (void) printf("writing offset %llx size %llx" 3938168404Spjd " txg %llx\n", 3939168404Spjd (u_longlong_t)bigoff, 3940168404Spjd (u_longlong_t)bigsize, 3941168404Spjd (u_longlong_t)txg); 3942168404Spjd } 3943219089Spjd dmu_write(os, bigobj, bigoff, bigsize, bigbuf, tx); 3944168404Spjd } 3945168404Spjd 3946168404Spjd dmu_tx_commit(tx); 3947168404Spjd 3948168404Spjd /* 3949168404Spjd * Sanity check the stuff we just wrote. 3950168404Spjd */ 3951168404Spjd { 3952168404Spjd void *packcheck = umem_alloc(packsize, UMEM_NOFAIL); 3953168404Spjd void *bigcheck = umem_alloc(bigsize, UMEM_NOFAIL); 3954168404Spjd 3955219089Spjd VERIFY(0 == dmu_read(os, packobj, packoff, 3956209962Smm packsize, packcheck, DMU_READ_PREFETCH)); 3957219089Spjd VERIFY(0 == dmu_read(os, bigobj, bigoff, 3958209962Smm bigsize, bigcheck, DMU_READ_PREFETCH)); 3959168404Spjd 3960168404Spjd ASSERT(bcmp(packbuf, packcheck, packsize) == 0); 3961168404Spjd ASSERT(bcmp(bigbuf, bigcheck, bigsize) == 0); 3962168404Spjd 3963168404Spjd umem_free(packcheck, packsize); 3964168404Spjd umem_free(bigcheck, bigsize); 3965168404Spjd } 3966168404Spjd 3967168404Spjd umem_free(packbuf, packsize); 3968168404Spjd umem_free(bigbuf, bigsize); 3969168404Spjd} 3970168404Spjd 3971168404Spjdvoid 3972209962Smmcompare_and_update_pbbufs(uint64_t s, bufwad_t *packbuf, bufwad_t *bigbuf, 3973219089Spjd uint64_t bigsize, uint64_t n, uint64_t chunksize, uint64_t txg) 3974209962Smm{ 3975209962Smm uint64_t i; 3976209962Smm bufwad_t *pack; 3977209962Smm bufwad_t *bigH; 3978209962Smm bufwad_t *bigT; 3979209962Smm 3980209962Smm /* 3981209962Smm * For each index from n to n + s, verify that the existing bufwad 3982209962Smm * in packobj matches the bufwads at the head and tail of the 3983209962Smm * corresponding chunk in bigobj. Then update all three bufwads 3984209962Smm * with the new values we want to write out. 3985209962Smm */ 3986209962Smm for (i = 0; i < s; i++) { 3987209962Smm /* LINTED */ 3988209962Smm pack = (bufwad_t *)((char *)packbuf + i * sizeof (bufwad_t)); 3989209962Smm /* LINTED */ 3990219089Spjd bigH = (bufwad_t *)((char *)bigbuf + i * chunksize); 3991209962Smm /* LINTED */ 3992219089Spjd bigT = (bufwad_t *)((char *)bigH + chunksize) - 1; 3993209962Smm 3994209962Smm ASSERT((uintptr_t)bigH - (uintptr_t)bigbuf < bigsize); 3995209962Smm ASSERT((uintptr_t)bigT - (uintptr_t)bigbuf < bigsize); 3996209962Smm 3997209962Smm if (pack->bw_txg > txg) 3998209962Smm fatal(0, "future leak: got %llx, open txg is %llx", 3999209962Smm pack->bw_txg, txg); 4000209962Smm 4001209962Smm if (pack->bw_data != 0 && pack->bw_index != n + i) 4002209962Smm fatal(0, "wrong index: got %llx, wanted %llx+%llx", 4003209962Smm pack->bw_index, n, i); 4004209962Smm 4005209962Smm if (bcmp(pack, bigH, sizeof (bufwad_t)) != 0) 4006209962Smm fatal(0, "pack/bigH mismatch in %p/%p", pack, bigH); 4007209962Smm 4008209962Smm if (bcmp(pack, bigT, sizeof (bufwad_t)) != 0) 4009209962Smm fatal(0, "pack/bigT mismatch in %p/%p", pack, bigT); 4010209962Smm 4011209962Smm pack->bw_index = n + i; 4012209962Smm pack->bw_txg = txg; 4013209962Smm pack->bw_data = 1 + ztest_random(-2ULL); 4014209962Smm 4015209962Smm *bigH = *pack; 4016209962Smm *bigT = *pack; 4017209962Smm } 4018209962Smm} 4019209962Smm 4020209962Smmvoid 4021219089Spjdztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id) 4022209962Smm{ 4023219089Spjd objset_t *os = zd->zd_os; 4024219089Spjd ztest_od_t od[2]; 4025209962Smm dmu_tx_t *tx; 4026209962Smm uint64_t i; 4027209962Smm int error; 4028209962Smm uint64_t n, s, txg; 4029209962Smm bufwad_t *packbuf, *bigbuf; 4030219089Spjd uint64_t packobj, packoff, packsize, bigobj, bigoff, bigsize; 4031219089Spjd uint64_t blocksize = ztest_random_blocksize(); 4032219089Spjd uint64_t chunksize = blocksize; 4033209962Smm uint64_t regions = 997; 4034209962Smm uint64_t stride = 123456789ULL; 4035209962Smm uint64_t width = 9; 4036209962Smm dmu_buf_t *bonus_db; 4037209962Smm arc_buf_t **bigbuf_arcbufs; 4038219089Spjd dmu_object_info_t doi; 4039209962Smm 4040209962Smm /* 4041209962Smm * This test uses two objects, packobj and bigobj, that are always 4042209962Smm * updated together (i.e. in the same tx) so that their contents are 4043209962Smm * in sync and can be compared. Their contents relate to each other 4044209962Smm * in a simple way: packobj is a dense array of 'bufwad' structures, 4045209962Smm * while bigobj is a sparse array of the same bufwads. Specifically, 4046209962Smm * for any index n, there are three bufwads that should be identical: 4047209962Smm * 4048209962Smm * packobj, at offset n * sizeof (bufwad_t) 4049209962Smm * bigobj, at the head of the nth chunk 4050209962Smm * bigobj, at the tail of the nth chunk 4051209962Smm * 4052209962Smm * The chunk size is set equal to bigobj block size so that 4053209962Smm * dmu_assign_arcbuf() can be tested for object updates. 4054209962Smm */ 4055209962Smm 4056209962Smm /* 4057209962Smm * Read the directory info. If it's the first time, set things up. 4058209962Smm */ 4059219089Spjd ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize, 0); 4060219089Spjd ztest_od_init(&od[1], id, FTAG, 1, DMU_OT_UINT64_OTHER, 0, chunksize); 4061209962Smm 4062219089Spjd if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0) 4063219089Spjd return; 4064209962Smm 4065219089Spjd bigobj = od[0].od_object; 4066219089Spjd packobj = od[1].od_object; 4067219089Spjd blocksize = od[0].od_blocksize; 4068219089Spjd chunksize = blocksize; 4069219089Spjd ASSERT(chunksize == od[1].od_gen); 4070209962Smm 4071219089Spjd VERIFY(dmu_object_info(os, bigobj, &doi) == 0); 4072219089Spjd VERIFY(ISP2(doi.doi_data_block_size)); 4073219089Spjd VERIFY(chunksize == doi.doi_data_block_size); 4074219089Spjd VERIFY(chunksize >= 2 * sizeof (bufwad_t)); 4075209962Smm 4076209962Smm /* 4077209962Smm * Pick a random index and compute the offsets into packobj and bigobj. 4078209962Smm */ 4079209962Smm n = ztest_random(regions) * stride + ztest_random(width); 4080209962Smm s = 1 + ztest_random(width - 1); 4081209962Smm 4082209962Smm packoff = n * sizeof (bufwad_t); 4083209962Smm packsize = s * sizeof (bufwad_t); 4084209962Smm 4085219089Spjd bigoff = n * chunksize; 4086219089Spjd bigsize = s * chunksize; 4087209962Smm 4088209962Smm packbuf = umem_zalloc(packsize, UMEM_NOFAIL); 4089209962Smm bigbuf = umem_zalloc(bigsize, UMEM_NOFAIL); 4090209962Smm 4091219089Spjd VERIFY3U(0, ==, dmu_bonus_hold(os, bigobj, FTAG, &bonus_db)); 4092209962Smm 4093209962Smm bigbuf_arcbufs = umem_zalloc(2 * s * sizeof (arc_buf_t *), UMEM_NOFAIL); 4094209962Smm 4095209962Smm /* 4096209962Smm * Iteration 0 test zcopy for DB_UNCACHED dbufs. 4097209962Smm * Iteration 1 test zcopy to already referenced dbufs. 4098209962Smm * Iteration 2 test zcopy to dirty dbuf in the same txg. 4099209962Smm * Iteration 3 test zcopy to dbuf dirty in previous txg. 4100209962Smm * Iteration 4 test zcopy when dbuf is no longer dirty. 4101209962Smm * Iteration 5 test zcopy when it can't be done. 4102209962Smm * Iteration 6 one more zcopy write. 4103209962Smm */ 4104209962Smm for (i = 0; i < 7; i++) { 4105209962Smm uint64_t j; 4106209962Smm uint64_t off; 4107209962Smm 4108209962Smm /* 4109209962Smm * In iteration 5 (i == 5) use arcbufs 4110209962Smm * that don't match bigobj blksz to test 4111209962Smm * dmu_assign_arcbuf() when it can't directly 4112209962Smm * assign an arcbuf to a dbuf. 4113209962Smm */ 4114209962Smm for (j = 0; j < s; j++) { 4115209962Smm if (i != 5) { 4116209962Smm bigbuf_arcbufs[j] = 4117219089Spjd dmu_request_arcbuf(bonus_db, chunksize); 4118209962Smm } else { 4119209962Smm bigbuf_arcbufs[2 * j] = 4120219089Spjd dmu_request_arcbuf(bonus_db, chunksize / 2); 4121209962Smm bigbuf_arcbufs[2 * j + 1] = 4122219089Spjd dmu_request_arcbuf(bonus_db, chunksize / 2); 4123209962Smm } 4124209962Smm } 4125209962Smm 4126209962Smm /* 4127209962Smm * Get a tx for the mods to both packobj and bigobj. 4128209962Smm */ 4129209962Smm tx = dmu_tx_create(os); 4130209962Smm 4131219089Spjd dmu_tx_hold_write(tx, packobj, packoff, packsize); 4132219089Spjd dmu_tx_hold_write(tx, bigobj, bigoff, bigsize); 4133209962Smm 4134219089Spjd txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG); 4135219089Spjd if (txg == 0) { 4136209962Smm umem_free(packbuf, packsize); 4137209962Smm umem_free(bigbuf, bigsize); 4138209962Smm for (j = 0; j < s; j++) { 4139209962Smm if (i != 5) { 4140209962Smm dmu_return_arcbuf(bigbuf_arcbufs[j]); 4141209962Smm } else { 4142209962Smm dmu_return_arcbuf( 4143209962Smm bigbuf_arcbufs[2 * j]); 4144209962Smm dmu_return_arcbuf( 4145209962Smm bigbuf_arcbufs[2 * j + 1]); 4146209962Smm } 4147209962Smm } 4148209962Smm umem_free(bigbuf_arcbufs, 2 * s * sizeof (arc_buf_t *)); 4149209962Smm dmu_buf_rele(bonus_db, FTAG); 4150209962Smm return; 4151209962Smm } 4152209962Smm 4153209962Smm /* 4154209962Smm * 50% of the time don't read objects in the 1st iteration to 4155209962Smm * test dmu_assign_arcbuf() for the case when there're no 4156209962Smm * existing dbufs for the specified offsets. 4157209962Smm */ 4158209962Smm if (i != 0 || ztest_random(2) != 0) { 4159219089Spjd error = dmu_read(os, packobj, packoff, 4160209962Smm packsize, packbuf, DMU_READ_PREFETCH); 4161240415Smm ASSERT0(error); 4162219089Spjd error = dmu_read(os, bigobj, bigoff, bigsize, 4163209962Smm bigbuf, DMU_READ_PREFETCH); 4164240415Smm ASSERT0(error); 4165209962Smm } 4166209962Smm compare_and_update_pbbufs(s, packbuf, bigbuf, bigsize, 4167219089Spjd n, chunksize, txg); 4168209962Smm 4169209962Smm /* 4170209962Smm * We've verified all the old bufwads, and made new ones. 4171209962Smm * Now write them out. 4172209962Smm */ 4173219089Spjd dmu_write(os, packobj, packoff, packsize, packbuf, tx); 4174236143Smm if (ztest_opts.zo_verbose >= 7) { 4175209962Smm (void) printf("writing offset %llx size %llx" 4176209962Smm " txg %llx\n", 4177209962Smm (u_longlong_t)bigoff, 4178209962Smm (u_longlong_t)bigsize, 4179209962Smm (u_longlong_t)txg); 4180209962Smm } 4181219089Spjd for (off = bigoff, j = 0; j < s; j++, off += chunksize) { 4182209962Smm dmu_buf_t *dbt; 4183209962Smm if (i != 5) { 4184209962Smm bcopy((caddr_t)bigbuf + (off - bigoff), 4185219089Spjd bigbuf_arcbufs[j]->b_data, chunksize); 4186209962Smm } else { 4187209962Smm bcopy((caddr_t)bigbuf + (off - bigoff), 4188209962Smm bigbuf_arcbufs[2 * j]->b_data, 4189219089Spjd chunksize / 2); 4190209962Smm bcopy((caddr_t)bigbuf + (off - bigoff) + 4191219089Spjd chunksize / 2, 4192209962Smm bigbuf_arcbufs[2 * j + 1]->b_data, 4193219089Spjd chunksize / 2); 4194209962Smm } 4195209962Smm 4196209962Smm if (i == 1) { 4197219089Spjd VERIFY(dmu_buf_hold(os, bigobj, off, 4198219089Spjd FTAG, &dbt, DMU_READ_NO_PREFETCH) == 0); 4199209962Smm } 4200209962Smm if (i != 5) { 4201209962Smm dmu_assign_arcbuf(bonus_db, off, 4202209962Smm bigbuf_arcbufs[j], tx); 4203209962Smm } else { 4204209962Smm dmu_assign_arcbuf(bonus_db, off, 4205209962Smm bigbuf_arcbufs[2 * j], tx); 4206209962Smm dmu_assign_arcbuf(bonus_db, 4207219089Spjd off + chunksize / 2, 4208209962Smm bigbuf_arcbufs[2 * j + 1], tx); 4209209962Smm } 4210209962Smm if (i == 1) { 4211209962Smm dmu_buf_rele(dbt, FTAG); 4212209962Smm } 4213209962Smm } 4214209962Smm dmu_tx_commit(tx); 4215209962Smm 4216209962Smm /* 4217209962Smm * Sanity check the stuff we just wrote. 4218209962Smm */ 4219209962Smm { 4220209962Smm void *packcheck = umem_alloc(packsize, UMEM_NOFAIL); 4221209962Smm void *bigcheck = umem_alloc(bigsize, UMEM_NOFAIL); 4222209962Smm 4223219089Spjd VERIFY(0 == dmu_read(os, packobj, packoff, 4224209962Smm packsize, packcheck, DMU_READ_PREFETCH)); 4225219089Spjd VERIFY(0 == dmu_read(os, bigobj, bigoff, 4226209962Smm bigsize, bigcheck, DMU_READ_PREFETCH)); 4227209962Smm 4228209962Smm ASSERT(bcmp(packbuf, packcheck, packsize) == 0); 4229209962Smm ASSERT(bcmp(bigbuf, bigcheck, bigsize) == 0); 4230209962Smm 4231209962Smm umem_free(packcheck, packsize); 4232209962Smm umem_free(bigcheck, bigsize); 4233209962Smm } 4234209962Smm if (i == 2) { 4235209962Smm txg_wait_open(dmu_objset_pool(os), 0); 4236209962Smm } else if (i == 3) { 4237209962Smm txg_wait_synced(dmu_objset_pool(os), 0); 4238209962Smm } 4239209962Smm } 4240209962Smm 4241209962Smm dmu_buf_rele(bonus_db, FTAG); 4242209962Smm umem_free(packbuf, packsize); 4243209962Smm umem_free(bigbuf, bigsize); 4244209962Smm umem_free(bigbuf_arcbufs, 2 * s * sizeof (arc_buf_t *)); 4245209962Smm} 4246209962Smm 4247219089Spjd/* ARGSUSED */ 4248209962Smmvoid 4249219089Spjdztest_dmu_write_parallel(ztest_ds_t *zd, uint64_t id) 4250168404Spjd{ 4251219089Spjd ztest_od_t od[1]; 4252219089Spjd uint64_t offset = (1ULL << (ztest_random(20) + 43)) + 4253219089Spjd (ztest_random(ZTEST_RANGE_LOCKS) << SPA_MAXBLOCKSHIFT); 4254168404Spjd 4255168404Spjd /* 4256219089Spjd * Have multiple threads write to large offsets in an object 4257219089Spjd * to verify that parallel writes to an object -- even to the 4258219089Spjd * same blocks within the object -- doesn't cause any trouble. 4259168404Spjd */ 4260219089Spjd ztest_od_init(&od[0], ID_PARALLEL, FTAG, 0, DMU_OT_UINT64_OTHER, 0, 0); 4261219089Spjd 4262219089Spjd if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0) 4263219089Spjd return; 4264219089Spjd 4265219089Spjd while (ztest_random(10) != 0) 4266219089Spjd ztest_io(zd, od[0].od_object, offset); 4267168404Spjd} 4268168404Spjd 4269168404Spjdvoid 4270219089Spjdztest_dmu_prealloc(ztest_ds_t *zd, uint64_t id) 4271168404Spjd{ 4272219089Spjd ztest_od_t od[1]; 4273219089Spjd uint64_t offset = (1ULL << (ztest_random(4) + SPA_MAXBLOCKSHIFT)) + 4274219089Spjd (ztest_random(ZTEST_RANGE_LOCKS) << SPA_MAXBLOCKSHIFT); 4275219089Spjd uint64_t count = ztest_random(20) + 1; 4276219089Spjd uint64_t blocksize = ztest_random_blocksize(); 4277219089Spjd void *data; 4278168404Spjd 4279219089Spjd ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize, 0); 4280168404Spjd 4281219089Spjd if (ztest_object_init(zd, od, sizeof (od), !ztest_random(2)) != 0) 4282185029Spjd return; 4283168404Spjd 4284219089Spjd if (ztest_truncate(zd, od[0].od_object, offset, count * blocksize) != 0) 4285185029Spjd return; 4286168404Spjd 4287219089Spjd ztest_prealloc(zd, od[0].od_object, offset, count * blocksize); 4288185029Spjd 4289219089Spjd data = umem_zalloc(blocksize, UMEM_NOFAIL); 4290185029Spjd 4291219089Spjd while (ztest_random(count) != 0) { 4292219089Spjd uint64_t randoff = offset + (ztest_random(count) * blocksize); 4293219089Spjd if (ztest_write(zd, od[0].od_object, randoff, blocksize, 4294219089Spjd data) != 0) 4295219089Spjd break; 4296219089Spjd while (ztest_random(4) != 0) 4297219089Spjd ztest_io(zd, od[0].od_object, randoff); 4298185029Spjd } 4299168404Spjd 4300219089Spjd umem_free(data, blocksize); 4301168404Spjd} 4302168404Spjd 4303168404Spjd/* 4304168404Spjd * Verify that zap_{create,destroy,add,remove,update} work as expected. 4305168404Spjd */ 4306168404Spjd#define ZTEST_ZAP_MIN_INTS 1 4307168404Spjd#define ZTEST_ZAP_MAX_INTS 4 4308168404Spjd#define ZTEST_ZAP_MAX_PROPS 1000 4309168404Spjd 4310168404Spjdvoid 4311219089Spjdztest_zap(ztest_ds_t *zd, uint64_t id) 4312168404Spjd{ 4313219089Spjd objset_t *os = zd->zd_os; 4314219089Spjd ztest_od_t od[1]; 4315168404Spjd uint64_t object; 4316168404Spjd uint64_t txg, last_txg; 4317168404Spjd uint64_t value[ZTEST_ZAP_MAX_INTS]; 4318168404Spjd uint64_t zl_ints, zl_intsize, prop; 4319168404Spjd int i, ints; 4320168404Spjd dmu_tx_t *tx; 4321168404Spjd char propname[100], txgname[100]; 4322168404Spjd int error; 4323168404Spjd char *hc[2] = { "s.acl.h", ".s.open.h.hyLZlg" }; 4324168404Spjd 4325219089Spjd ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_ZAP_OTHER, 0, 0); 4326168404Spjd 4327219089Spjd if (ztest_object_init(zd, od, sizeof (od), !ztest_random(2)) != 0) 4328219089Spjd return; 4329219089Spjd 4330219089Spjd object = od[0].od_object; 4331219089Spjd 4332168404Spjd /* 4333219089Spjd * Generate a known hash collision, and verify that 4334219089Spjd * we can lookup and remove both entries. 4335168404Spjd */ 4336219089Spjd tx = dmu_tx_create(os); 4337219089Spjd dmu_tx_hold_zap(tx, object, B_TRUE, NULL); 4338219089Spjd txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG); 4339219089Spjd if (txg == 0) 4340219089Spjd return; 4341219089Spjd for (i = 0; i < 2; i++) { 4342219089Spjd value[i] = i; 4343219089Spjd VERIFY3U(0, ==, zap_add(os, object, hc[i], sizeof (uint64_t), 4344219089Spjd 1, &value[i], tx)); 4345168404Spjd } 4346219089Spjd for (i = 0; i < 2; i++) { 4347219089Spjd VERIFY3U(EEXIST, ==, zap_add(os, object, hc[i], 4348219089Spjd sizeof (uint64_t), 1, &value[i], tx)); 4349219089Spjd VERIFY3U(0, ==, 4350219089Spjd zap_length(os, object, hc[i], &zl_intsize, &zl_ints)); 4351219089Spjd ASSERT3U(zl_intsize, ==, sizeof (uint64_t)); 4352219089Spjd ASSERT3U(zl_ints, ==, 1); 4353219089Spjd } 4354219089Spjd for (i = 0; i < 2; i++) { 4355219089Spjd VERIFY3U(0, ==, zap_remove(os, object, hc[i], tx)); 4356219089Spjd } 4357219089Spjd dmu_tx_commit(tx); 4358168404Spjd 4359219089Spjd /* 4360219089Spjd * Generate a buch of random entries. 4361219089Spjd */ 4362168404Spjd ints = MAX(ZTEST_ZAP_MIN_INTS, object % ZTEST_ZAP_MAX_INTS); 4363168404Spjd 4364185029Spjd prop = ztest_random(ZTEST_ZAP_MAX_PROPS); 4365185029Spjd (void) sprintf(propname, "prop_%llu", (u_longlong_t)prop); 4366185029Spjd (void) sprintf(txgname, "txg_%llu", (u_longlong_t)prop); 4367185029Spjd bzero(value, sizeof (value)); 4368185029Spjd last_txg = 0; 4369168404Spjd 4370185029Spjd /* 4371185029Spjd * If these zap entries already exist, validate their contents. 4372185029Spjd */ 4373185029Spjd error = zap_length(os, object, txgname, &zl_intsize, &zl_ints); 4374185029Spjd if (error == 0) { 4375185029Spjd ASSERT3U(zl_intsize, ==, sizeof (uint64_t)); 4376185029Spjd ASSERT3U(zl_ints, ==, 1); 4377168404Spjd 4378185029Spjd VERIFY(zap_lookup(os, object, txgname, zl_intsize, 4379185029Spjd zl_ints, &last_txg) == 0); 4380168404Spjd 4381185029Spjd VERIFY(zap_length(os, object, propname, &zl_intsize, 4382185029Spjd &zl_ints) == 0); 4383168404Spjd 4384185029Spjd ASSERT3U(zl_intsize, ==, sizeof (uint64_t)); 4385185029Spjd ASSERT3U(zl_ints, ==, ints); 4386168404Spjd 4387185029Spjd VERIFY(zap_lookup(os, object, propname, zl_intsize, 4388185029Spjd zl_ints, value) == 0); 4389168404Spjd 4390185029Spjd for (i = 0; i < ints; i++) { 4391185029Spjd ASSERT3U(value[i], ==, last_txg + object + i); 4392168404Spjd } 4393185029Spjd } else { 4394185029Spjd ASSERT3U(error, ==, ENOENT); 4395185029Spjd } 4396168404Spjd 4397185029Spjd /* 4398185029Spjd * Atomically update two entries in our zap object. 4399185029Spjd * The first is named txg_%llu, and contains the txg 4400185029Spjd * in which the property was last updated. The second 4401185029Spjd * is named prop_%llu, and the nth element of its value 4402185029Spjd * should be txg + object + n. 4403185029Spjd */ 4404185029Spjd tx = dmu_tx_create(os); 4405219089Spjd dmu_tx_hold_zap(tx, object, B_TRUE, NULL); 4406219089Spjd txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG); 4407219089Spjd if (txg == 0) 4408185029Spjd return; 4409168404Spjd 4410185029Spjd if (last_txg > txg) 4411185029Spjd fatal(0, "zap future leak: old %llu new %llu", last_txg, txg); 4412168404Spjd 4413185029Spjd for (i = 0; i < ints; i++) 4414185029Spjd value[i] = txg + object + i; 4415168404Spjd 4416219089Spjd VERIFY3U(0, ==, zap_update(os, object, txgname, sizeof (uint64_t), 4417219089Spjd 1, &txg, tx)); 4418219089Spjd VERIFY3U(0, ==, zap_update(os, object, propname, sizeof (uint64_t), 4419219089Spjd ints, value, tx)); 4420168404Spjd 4421185029Spjd dmu_tx_commit(tx); 4422168404Spjd 4423185029Spjd /* 4424185029Spjd * Remove a random pair of entries. 4425185029Spjd */ 4426185029Spjd prop = ztest_random(ZTEST_ZAP_MAX_PROPS); 4427185029Spjd (void) sprintf(propname, "prop_%llu", (u_longlong_t)prop); 4428185029Spjd (void) sprintf(txgname, "txg_%llu", (u_longlong_t)prop); 4429168404Spjd 4430185029Spjd error = zap_length(os, object, txgname, &zl_intsize, &zl_ints); 4431168404Spjd 4432185029Spjd if (error == ENOENT) 4433185029Spjd return; 4434168404Spjd 4435240415Smm ASSERT0(error); 4436168404Spjd 4437185029Spjd tx = dmu_tx_create(os); 4438219089Spjd dmu_tx_hold_zap(tx, object, B_TRUE, NULL); 4439219089Spjd txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG); 4440219089Spjd if (txg == 0) 4441185029Spjd return; 4442219089Spjd VERIFY3U(0, ==, zap_remove(os, object, txgname, tx)); 4443219089Spjd VERIFY3U(0, ==, zap_remove(os, object, propname, tx)); 4444185029Spjd dmu_tx_commit(tx); 4445168404Spjd} 4446168404Spjd 4447209962Smm/* 4448209962Smm * Testcase to test the upgrading of a microzap to fatzap. 4449209962Smm */ 4450168404Spjdvoid 4451219089Spjdztest_fzap(ztest_ds_t *zd, uint64_t id) 4452209962Smm{ 4453219089Spjd objset_t *os = zd->zd_os; 4454219089Spjd ztest_od_t od[1]; 4455219089Spjd uint64_t object, txg; 4456209962Smm 4457219089Spjd ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_ZAP_OTHER, 0, 0); 4458209962Smm 4459219089Spjd if (ztest_object_init(zd, od, sizeof (od), !ztest_random(2)) != 0) 4460219089Spjd return; 4461209962Smm 4462219089Spjd object = od[0].od_object; 4463209962Smm 4464209962Smm /* 4465219089Spjd * Add entries to this ZAP and make sure it spills over 4466209962Smm * and gets upgraded to a fatzap. Also, since we are adding 4467219089Spjd * 2050 entries we should see ptrtbl growth and leaf-block split. 4468209962Smm */ 4469219089Spjd for (int i = 0; i < 2050; i++) { 4470307108Smav char name[ZFS_MAX_DATASET_NAME_LEN]; 4471219089Spjd uint64_t value = i; 4472219089Spjd dmu_tx_t *tx; 4473219089Spjd int error; 4474209962Smm 4475219089Spjd (void) snprintf(name, sizeof (name), "fzap-%llu-%llu", 4476219089Spjd id, value); 4477219089Spjd 4478209962Smm tx = dmu_tx_create(os); 4479219089Spjd dmu_tx_hold_zap(tx, object, B_TRUE, name); 4480219089Spjd txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG); 4481219089Spjd if (txg == 0) 4482209962Smm return; 4483219089Spjd error = zap_add(os, object, name, sizeof (uint64_t), 1, 4484219089Spjd &value, tx); 4485209962Smm ASSERT(error == 0 || error == EEXIST); 4486209962Smm dmu_tx_commit(tx); 4487209962Smm } 4488209962Smm} 4489209962Smm 4490219089Spjd/* ARGSUSED */ 4491209962Smmvoid 4492219089Spjdztest_zap_parallel(ztest_ds_t *zd, uint64_t id) 4493168404Spjd{ 4494219089Spjd objset_t *os = zd->zd_os; 4495219089Spjd ztest_od_t od[1]; 4496168404Spjd uint64_t txg, object, count, wsize, wc, zl_wsize, zl_wc; 4497168404Spjd dmu_tx_t *tx; 4498168404Spjd int i, namelen, error; 4499219089Spjd int micro = ztest_random(2); 4500168404Spjd char name[20], string_value[20]; 4501168404Spjd void *data; 4502168404Spjd 4503219089Spjd ztest_od_init(&od[0], ID_PARALLEL, FTAG, micro, DMU_OT_ZAP_OTHER, 0, 0); 4504219089Spjd 4505219089Spjd if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0) 4506219089Spjd return; 4507219089Spjd 4508219089Spjd object = od[0].od_object; 4509219089Spjd 4510185029Spjd /* 4511185029Spjd * Generate a random name of the form 'xxx.....' where each 4512185029Spjd * x is a random printable character and the dots are dots. 4513185029Spjd * There are 94 such characters, and the name length goes from 4514185029Spjd * 6 to 20, so there are 94^3 * 15 = 12,458,760 possible names. 4515185029Spjd */ 4516185029Spjd namelen = ztest_random(sizeof (name) - 5) + 5 + 1; 4517168404Spjd 4518185029Spjd for (i = 0; i < 3; i++) 4519185029Spjd name[i] = '!' + ztest_random('~' - '!' + 1); 4520185029Spjd for (; i < namelen - 1; i++) 4521185029Spjd name[i] = '.'; 4522185029Spjd name[i] = '\0'; 4523168404Spjd 4524219089Spjd if ((namelen & 1) || micro) { 4525185029Spjd wsize = sizeof (txg); 4526185029Spjd wc = 1; 4527185029Spjd data = &txg; 4528185029Spjd } else { 4529185029Spjd wsize = 1; 4530185029Spjd wc = namelen; 4531185029Spjd data = string_value; 4532185029Spjd } 4533168404Spjd 4534185029Spjd count = -1ULL; 4535248571Smm VERIFY0(zap_count(os, object, &count)); 4536185029Spjd ASSERT(count != -1ULL); 4537168404Spjd 4538185029Spjd /* 4539185029Spjd * Select an operation: length, lookup, add, update, remove. 4540185029Spjd */ 4541185029Spjd i = ztest_random(5); 4542168404Spjd 4543185029Spjd if (i >= 2) { 4544185029Spjd tx = dmu_tx_create(os); 4545219089Spjd dmu_tx_hold_zap(tx, object, B_TRUE, NULL); 4546219089Spjd txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG); 4547219089Spjd if (txg == 0) 4548185029Spjd return; 4549185029Spjd bcopy(name, string_value, namelen); 4550185029Spjd } else { 4551185029Spjd tx = NULL; 4552185029Spjd txg = 0; 4553185029Spjd bzero(string_value, namelen); 4554185029Spjd } 4555168404Spjd 4556185029Spjd switch (i) { 4557168404Spjd 4558185029Spjd case 0: 4559185029Spjd error = zap_length(os, object, name, &zl_wsize, &zl_wc); 4560185029Spjd if (error == 0) { 4561185029Spjd ASSERT3U(wsize, ==, zl_wsize); 4562185029Spjd ASSERT3U(wc, ==, zl_wc); 4563185029Spjd } else { 4564185029Spjd ASSERT3U(error, ==, ENOENT); 4565185029Spjd } 4566185029Spjd break; 4567168404Spjd 4568185029Spjd case 1: 4569185029Spjd error = zap_lookup(os, object, name, wsize, wc, data); 4570185029Spjd if (error == 0) { 4571185029Spjd if (data == string_value && 4572185029Spjd bcmp(name, data, namelen) != 0) 4573185029Spjd fatal(0, "name '%s' != val '%s' len %d", 4574185029Spjd name, data, namelen); 4575185029Spjd } else { 4576185029Spjd ASSERT3U(error, ==, ENOENT); 4577185029Spjd } 4578185029Spjd break; 4579168404Spjd 4580185029Spjd case 2: 4581185029Spjd error = zap_add(os, object, name, wsize, wc, data, tx); 4582185029Spjd ASSERT(error == 0 || error == EEXIST); 4583185029Spjd break; 4584168404Spjd 4585185029Spjd case 3: 4586185029Spjd VERIFY(zap_update(os, object, name, wsize, wc, data, tx) == 0); 4587185029Spjd break; 4588168404Spjd 4589185029Spjd case 4: 4590185029Spjd error = zap_remove(os, object, name, tx); 4591185029Spjd ASSERT(error == 0 || error == ENOENT); 4592185029Spjd break; 4593185029Spjd } 4594168404Spjd 4595185029Spjd if (tx != NULL) 4596185029Spjd dmu_tx_commit(tx); 4597168404Spjd} 4598168404Spjd 4599219089Spjd/* 4600219089Spjd * Commit callback data. 4601219089Spjd */ 4602219089Spjdtypedef struct ztest_cb_data { 4603219089Spjd list_node_t zcd_node; 4604219089Spjd uint64_t zcd_txg; 4605219089Spjd int zcd_expected_err; 4606219089Spjd boolean_t zcd_added; 4607219089Spjd boolean_t zcd_called; 4608219089Spjd spa_t *zcd_spa; 4609219089Spjd} ztest_cb_data_t; 4610219089Spjd 4611219089Spjd/* This is the actual commit callback function */ 4612219089Spjdstatic void 4613219089Spjdztest_commit_callback(void *arg, int error) 4614219089Spjd{ 4615219089Spjd ztest_cb_data_t *data = arg; 4616219089Spjd uint64_t synced_txg; 4617219089Spjd 4618219089Spjd VERIFY(data != NULL); 4619219089Spjd VERIFY3S(data->zcd_expected_err, ==, error); 4620219089Spjd VERIFY(!data->zcd_called); 4621219089Spjd 4622219089Spjd synced_txg = spa_last_synced_txg(data->zcd_spa); 4623219089Spjd if (data->zcd_txg > synced_txg) 4624219089Spjd fatal(0, "commit callback of txg %" PRIu64 " called prematurely" 4625219089Spjd ", last synced txg = %" PRIu64 "\n", data->zcd_txg, 4626219089Spjd synced_txg); 4627219089Spjd 4628219089Spjd data->zcd_called = B_TRUE; 4629219089Spjd 4630219089Spjd if (error == ECANCELED) { 4631240415Smm ASSERT0(data->zcd_txg); 4632219089Spjd ASSERT(!data->zcd_added); 4633219089Spjd 4634219089Spjd /* 4635219089Spjd * The private callback data should be destroyed here, but 4636219089Spjd * since we are going to check the zcd_called field after 4637219089Spjd * dmu_tx_abort(), we will destroy it there. 4638219089Spjd */ 4639219089Spjd return; 4640219089Spjd } 4641219089Spjd 4642219089Spjd /* Was this callback added to the global callback list? */ 4643219089Spjd if (!data->zcd_added) 4644219089Spjd goto out; 4645219089Spjd 4646219089Spjd ASSERT3U(data->zcd_txg, !=, 0); 4647219089Spjd 4648219089Spjd /* Remove our callback from the list */ 4649332545Smav mutex_enter(&zcl.zcl_callbacks_lock); 4650219089Spjd list_remove(&zcl.zcl_callbacks, data); 4651332545Smav mutex_exit(&zcl.zcl_callbacks_lock); 4652219089Spjd 4653219089Spjdout: 4654219089Spjd umem_free(data, sizeof (ztest_cb_data_t)); 4655219089Spjd} 4656219089Spjd 4657219089Spjd/* Allocate and initialize callback data structure */ 4658219089Spjdstatic ztest_cb_data_t * 4659219089Spjdztest_create_cb_data(objset_t *os, uint64_t txg) 4660219089Spjd{ 4661219089Spjd ztest_cb_data_t *cb_data; 4662219089Spjd 4663219089Spjd cb_data = umem_zalloc(sizeof (ztest_cb_data_t), UMEM_NOFAIL); 4664219089Spjd 4665219089Spjd cb_data->zcd_txg = txg; 4666219089Spjd cb_data->zcd_spa = dmu_objset_spa(os); 4667219089Spjd 4668219089Spjd return (cb_data); 4669219089Spjd} 4670219089Spjd 4671219089Spjd/* 4672219089Spjd * If a number of txgs equal to this threshold have been created after a commit 4673219089Spjd * callback has been registered but not called, then we assume there is an 4674219089Spjd * implementation bug. 4675219089Spjd */ 4676219089Spjd#define ZTEST_COMMIT_CALLBACK_THRESH (TXG_CONCURRENT_STATES + 2) 4677219089Spjd 4678219089Spjd/* 4679219089Spjd * Commit callback test. 4680219089Spjd */ 4681168404Spjdvoid 4682219089Spjdztest_dmu_commit_callbacks(ztest_ds_t *zd, uint64_t id) 4683168404Spjd{ 4684219089Spjd objset_t *os = zd->zd_os; 4685219089Spjd ztest_od_t od[1]; 4686219089Spjd dmu_tx_t *tx; 4687219089Spjd ztest_cb_data_t *cb_data[3], *tmp_cb; 4688219089Spjd uint64_t old_txg, txg; 4689219089Spjd int i, error; 4690219089Spjd 4691219089Spjd ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, 0, 0); 4692219089Spjd 4693219089Spjd if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0) 4694219089Spjd return; 4695219089Spjd 4696219089Spjd tx = dmu_tx_create(os); 4697219089Spjd 4698219089Spjd cb_data[0] = ztest_create_cb_data(os, 0); 4699219089Spjd dmu_tx_callback_register(tx, ztest_commit_callback, cb_data[0]); 4700219089Spjd 4701219089Spjd dmu_tx_hold_write(tx, od[0].od_object, 0, sizeof (uint64_t)); 4702219089Spjd 4703219089Spjd /* Every once in a while, abort the transaction on purpose */ 4704219089Spjd if (ztest_random(100) == 0) 4705219089Spjd error = -1; 4706219089Spjd 4707219089Spjd if (!error) 4708219089Spjd error = dmu_tx_assign(tx, TXG_NOWAIT); 4709219089Spjd 4710219089Spjd txg = error ? 0 : dmu_tx_get_txg(tx); 4711219089Spjd 4712219089Spjd cb_data[0]->zcd_txg = txg; 4713219089Spjd cb_data[1] = ztest_create_cb_data(os, txg); 4714219089Spjd dmu_tx_callback_register(tx, ztest_commit_callback, cb_data[1]); 4715219089Spjd 4716219089Spjd if (error) { 4717219089Spjd /* 4718219089Spjd * It's not a strict requirement to call the registered 4719219089Spjd * callbacks from inside dmu_tx_abort(), but that's what 4720219089Spjd * it's supposed to happen in the current implementation 4721219089Spjd * so we will check for that. 4722219089Spjd */ 4723219089Spjd for (i = 0; i < 2; i++) { 4724219089Spjd cb_data[i]->zcd_expected_err = ECANCELED; 4725219089Spjd VERIFY(!cb_data[i]->zcd_called); 4726219089Spjd } 4727219089Spjd 4728219089Spjd dmu_tx_abort(tx); 4729219089Spjd 4730219089Spjd for (i = 0; i < 2; i++) { 4731219089Spjd VERIFY(cb_data[i]->zcd_called); 4732219089Spjd umem_free(cb_data[i], sizeof (ztest_cb_data_t)); 4733219089Spjd } 4734219089Spjd 4735219089Spjd return; 4736219089Spjd } 4737219089Spjd 4738219089Spjd cb_data[2] = ztest_create_cb_data(os, txg); 4739219089Spjd dmu_tx_callback_register(tx, ztest_commit_callback, cb_data[2]); 4740219089Spjd 4741219089Spjd /* 4742219089Spjd * Read existing data to make sure there isn't a future leak. 4743219089Spjd */ 4744219089Spjd VERIFY(0 == dmu_read(os, od[0].od_object, 0, sizeof (uint64_t), 4745219089Spjd &old_txg, DMU_READ_PREFETCH)); 4746219089Spjd 4747219089Spjd if (old_txg > txg) 4748219089Spjd fatal(0, "future leak: got %" PRIu64 ", open txg is %" PRIu64, 4749219089Spjd old_txg, txg); 4750219089Spjd 4751219089Spjd dmu_write(os, od[0].od_object, 0, sizeof (uint64_t), &txg, tx); 4752219089Spjd 4753332545Smav mutex_enter(&zcl.zcl_callbacks_lock); 4754219089Spjd 4755219089Spjd /* 4756219089Spjd * Since commit callbacks don't have any ordering requirement and since 4757219089Spjd * it is theoretically possible for a commit callback to be called 4758219089Spjd * after an arbitrary amount of time has elapsed since its txg has been 4759219089Spjd * synced, it is difficult to reliably determine whether a commit 4760219089Spjd * callback hasn't been called due to high load or due to a flawed 4761219089Spjd * implementation. 4762219089Spjd * 4763219089Spjd * In practice, we will assume that if after a certain number of txgs a 4764219089Spjd * commit callback hasn't been called, then most likely there's an 4765219089Spjd * implementation bug.. 4766219089Spjd */ 4767219089Spjd tmp_cb = list_head(&zcl.zcl_callbacks); 4768219089Spjd if (tmp_cb != NULL && 4769251635Sdelphij (txg - ZTEST_COMMIT_CALLBACK_THRESH) > tmp_cb->zcd_txg) { 4770219089Spjd fatal(0, "Commit callback threshold exceeded, oldest txg: %" 4771219089Spjd PRIu64 ", open txg: %" PRIu64 "\n", tmp_cb->zcd_txg, txg); 4772219089Spjd } 4773219089Spjd 4774219089Spjd /* 4775219089Spjd * Let's find the place to insert our callbacks. 4776219089Spjd * 4777219089Spjd * Even though the list is ordered by txg, it is possible for the 4778219089Spjd * insertion point to not be the end because our txg may already be 4779219089Spjd * quiescing at this point and other callbacks in the open txg 4780219089Spjd * (from other objsets) may have sneaked in. 4781219089Spjd */ 4782219089Spjd tmp_cb = list_tail(&zcl.zcl_callbacks); 4783219089Spjd while (tmp_cb != NULL && tmp_cb->zcd_txg > txg) 4784219089Spjd tmp_cb = list_prev(&zcl.zcl_callbacks, tmp_cb); 4785219089Spjd 4786219089Spjd /* Add the 3 callbacks to the list */ 4787219089Spjd for (i = 0; i < 3; i++) { 4788219089Spjd if (tmp_cb == NULL) 4789219089Spjd list_insert_head(&zcl.zcl_callbacks, cb_data[i]); 4790219089Spjd else 4791219089Spjd list_insert_after(&zcl.zcl_callbacks, tmp_cb, 4792219089Spjd cb_data[i]); 4793219089Spjd 4794219089Spjd cb_data[i]->zcd_added = B_TRUE; 4795219089Spjd VERIFY(!cb_data[i]->zcd_called); 4796219089Spjd 4797219089Spjd tmp_cb = cb_data[i]; 4798219089Spjd } 4799219089Spjd 4800332545Smav mutex_exit(&zcl.zcl_callbacks_lock); 4801219089Spjd 4802219089Spjd dmu_tx_commit(tx); 4803219089Spjd} 4804219089Spjd 4805219089Spjd/* ARGSUSED */ 4806219089Spjdvoid 4807219089Spjdztest_dsl_prop_get_set(ztest_ds_t *zd, uint64_t id) 4808219089Spjd{ 4809219089Spjd zfs_prop_t proplist[] = { 4810219089Spjd ZFS_PROP_CHECKSUM, 4811219089Spjd ZFS_PROP_COMPRESSION, 4812219089Spjd ZFS_PROP_COPIES, 4813219089Spjd ZFS_PROP_DEDUP 4814219089Spjd }; 4815219089Spjd 4816332545Smav rw_enter(&ztest_name_lock, RW_READER); 4817219089Spjd 4818219089Spjd for (int p = 0; p < sizeof (proplist) / sizeof (proplist[0]); p++) 4819219089Spjd (void) ztest_dsl_prop_set_uint64(zd->zd_name, proplist[p], 4820219089Spjd ztest_random_dsl_prop(proplist[p]), (int)ztest_random(2)); 4821219089Spjd 4822332545Smav rw_exit(&ztest_name_lock); 4823219089Spjd} 4824219089Spjd 4825219089Spjd/* ARGSUSED */ 4826219089Spjdvoid 4827332525Smavztest_remap_blocks(ztest_ds_t *zd, uint64_t id) 4828332525Smav{ 4829332545Smav rw_enter(&ztest_name_lock, RW_READER); 4830332525Smav 4831332525Smav int error = dmu_objset_remap_indirects(zd->zd_name); 4832332525Smav if (error == ENOSPC) 4833332525Smav error = 0; 4834332525Smav ASSERT0(error); 4835332525Smav 4836332545Smav rw_exit(&ztest_name_lock); 4837332525Smav} 4838332525Smav 4839332525Smav/* ARGSUSED */ 4840332525Smavvoid 4841219089Spjdztest_spa_prop_get_set(ztest_ds_t *zd, uint64_t id) 4842219089Spjd{ 4843219089Spjd nvlist_t *props = NULL; 4844219089Spjd 4845332545Smav rw_enter(&ztest_name_lock, RW_READER); 4846219089Spjd 4847236143Smm (void) ztest_spa_prop_set_uint64(ZPOOL_PROP_DEDUPDITTO, 4848219089Spjd ZIO_DEDUPDITTO_MIN + ztest_random(ZIO_DEDUPDITTO_MIN)); 4849219089Spjd 4850240415Smm VERIFY0(spa_prop_get(ztest_spa, &props)); 4851219089Spjd 4852236143Smm if (ztest_opts.zo_verbose >= 6) 4853219089Spjd dump_nvlist(props, 4); 4854219089Spjd 4855219089Spjd nvlist_free(props); 4856219089Spjd 4857332545Smav rw_exit(&ztest_name_lock); 4858219089Spjd} 4859219089Spjd 4860248571Smmstatic int 4861248571Smmuser_release_one(const char *snapname, const char *holdname) 4862248571Smm{ 4863248571Smm nvlist_t *snaps, *holds; 4864248571Smm int error; 4865248571Smm 4866248571Smm snaps = fnvlist_alloc(); 4867248571Smm holds = fnvlist_alloc(); 4868248571Smm fnvlist_add_boolean(holds, holdname); 4869248571Smm fnvlist_add_nvlist(snaps, snapname, holds); 4870248571Smm fnvlist_free(holds); 4871248571Smm error = dsl_dataset_user_release(snaps, NULL); 4872248571Smm fnvlist_free(snaps); 4873248571Smm return (error); 4874248571Smm} 4875248571Smm 4876219089Spjd/* 4877219089Spjd * Test snapshot hold/release and deferred destroy. 4878219089Spjd */ 4879219089Spjdvoid 4880219089Spjdztest_dmu_snapshot_hold(ztest_ds_t *zd, uint64_t id) 4881219089Spjd{ 4882219089Spjd int error; 4883219089Spjd objset_t *os = zd->zd_os; 4884219089Spjd objset_t *origin; 4885219089Spjd char snapname[100]; 4886219089Spjd char fullname[100]; 4887219089Spjd char clonename[100]; 4888219089Spjd char tag[100]; 4889307108Smav char osname[ZFS_MAX_DATASET_NAME_LEN]; 4890248571Smm nvlist_t *holds; 4891168404Spjd 4892332545Smav rw_enter(&ztest_name_lock, RW_READER); 4893168404Spjd 4894168404Spjd dmu_objset_name(os, osname); 4895168404Spjd 4896248571Smm (void) snprintf(snapname, sizeof (snapname), "sh1_%llu", id); 4897248571Smm (void) snprintf(fullname, sizeof (fullname), "%s@%s", osname, snapname); 4898248571Smm (void) snprintf(clonename, sizeof (clonename), 4899248571Smm "%s/ch1_%llu", osname, id); 4900248571Smm (void) snprintf(tag, sizeof (tag), "tag_%llu", id); 4901219089Spjd 4902219089Spjd /* 4903219089Spjd * Clean up from any previous run. 4904219089Spjd */ 4905248571Smm error = dsl_destroy_head(clonename); 4906248571Smm if (error != ENOENT) 4907248571Smm ASSERT0(error); 4908248571Smm error = user_release_one(fullname, tag); 4909248571Smm if (error != ESRCH && error != ENOENT) 4910248571Smm ASSERT0(error); 4911248571Smm error = dsl_destroy_snapshot(fullname, B_FALSE); 4912248571Smm if (error != ENOENT) 4913248571Smm ASSERT0(error); 4914219089Spjd 4915219089Spjd /* 4916219089Spjd * Create snapshot, clone it, mark snap for deferred destroy, 4917219089Spjd * destroy clone, verify snap was also destroyed. 4918219089Spjd */ 4919248571Smm error = dmu_objset_snapshot_one(osname, snapname); 4920219089Spjd if (error) { 4921219089Spjd if (error == ENOSPC) { 4922219089Spjd ztest_record_enospc("dmu_objset_snapshot"); 4923219089Spjd goto out; 4924168404Spjd } 4925219089Spjd fatal(0, "dmu_objset_snapshot(%s) = %d", fullname, error); 4926219089Spjd } 4927168404Spjd 4928248571Smm error = dmu_objset_clone(clonename, fullname); 4929219089Spjd if (error) { 4930168404Spjd if (error == ENOSPC) { 4931219089Spjd ztest_record_enospc("dmu_objset_clone"); 4932219089Spjd goto out; 4933168404Spjd } 4934219089Spjd fatal(0, "dmu_objset_clone(%s) = %d", clonename, error); 4935219089Spjd } 4936168404Spjd 4937248571Smm error = dsl_destroy_snapshot(fullname, B_TRUE); 4938219089Spjd if (error) { 4939248571Smm fatal(0, "dsl_destroy_snapshot(%s, B_TRUE) = %d", 4940219089Spjd fullname, error); 4941219089Spjd } 4942168404Spjd 4943248571Smm error = dsl_destroy_head(clonename); 4944219089Spjd if (error) 4945248571Smm fatal(0, "dsl_destroy_head(%s) = %d", clonename, error); 4946168404Spjd 4947219089Spjd error = dmu_objset_hold(fullname, FTAG, &origin); 4948219089Spjd if (error != ENOENT) 4949219089Spjd fatal(0, "dmu_objset_hold(%s) = %d", fullname, error); 4950168404Spjd 4951219089Spjd /* 4952219089Spjd * Create snapshot, add temporary hold, verify that we can't 4953219089Spjd * destroy a held snapshot, mark for deferred destroy, 4954219089Spjd * release hold, verify snapshot was destroyed. 4955219089Spjd */ 4956248571Smm error = dmu_objset_snapshot_one(osname, snapname); 4957219089Spjd if (error) { 4958219089Spjd if (error == ENOSPC) { 4959219089Spjd ztest_record_enospc("dmu_objset_snapshot"); 4960219089Spjd goto out; 4961168404Spjd } 4962219089Spjd fatal(0, "dmu_objset_snapshot(%s) = %d", fullname, error); 4963168404Spjd } 4964168404Spjd 4965248571Smm holds = fnvlist_alloc(); 4966248571Smm fnvlist_add_string(holds, fullname, tag); 4967248571Smm error = dsl_dataset_user_hold(holds, 0, NULL); 4968248571Smm fnvlist_free(holds); 4969248571Smm 4970268075Sdelphij if (error == ENOSPC) { 4971268075Sdelphij ztest_record_enospc("dsl_dataset_user_hold"); 4972268075Sdelphij goto out; 4973268075Sdelphij } else if (error) { 4974268075Sdelphij fatal(0, "dsl_dataset_user_hold(%s, %s) = %u", 4975268075Sdelphij fullname, tag, error); 4976268075Sdelphij } 4977219089Spjd 4978248571Smm error = dsl_destroy_snapshot(fullname, B_FALSE); 4979219089Spjd if (error != EBUSY) { 4980248571Smm fatal(0, "dsl_destroy_snapshot(%s, B_FALSE) = %d", 4981219089Spjd fullname, error); 4982219089Spjd } 4983219089Spjd 4984248571Smm error = dsl_destroy_snapshot(fullname, B_TRUE); 4985219089Spjd if (error) { 4986248571Smm fatal(0, "dsl_destroy_snapshot(%s, B_TRUE) = %d", 4987219089Spjd fullname, error); 4988219089Spjd } 4989219089Spjd 4990248571Smm error = user_release_one(fullname, tag); 4991219089Spjd if (error) 4992251646Sdelphij fatal(0, "user_release_one(%s, %s) = %d", fullname, tag, error); 4993219089Spjd 4994248571Smm VERIFY3U(dmu_objset_hold(fullname, FTAG, &origin), ==, ENOENT); 4995219089Spjd 4996219089Spjdout: 4997332545Smav rw_exit(&ztest_name_lock); 4998168404Spjd} 4999168404Spjd 5000168404Spjd/* 5001168404Spjd * Inject random faults into the on-disk data. 5002168404Spjd */ 5003219089Spjd/* ARGSUSED */ 5004168404Spjdvoid 5005219089Spjdztest_fault_inject(ztest_ds_t *zd, uint64_t id) 5006168404Spjd{ 5007219089Spjd ztest_shared_t *zs = ztest_shared; 5008236143Smm spa_t *spa = ztest_spa; 5009168404Spjd int fd; 5010168404Spjd uint64_t offset; 5011219089Spjd uint64_t leaves; 5012168404Spjd uint64_t bad = 0x1990c0ffeedecadeULL; 5013168404Spjd uint64_t top, leaf; 5014168404Spjd char path0[MAXPATHLEN]; 5015168404Spjd char pathrand[MAXPATHLEN]; 5016168404Spjd size_t fsize; 5017307275Smav int bshift = SPA_MAXBLOCKSHIFT + 2; 5018168404Spjd int iters = 1000; 5019219089Spjd int maxfaults; 5020219089Spjd int mirror_save; 5021185029Spjd vdev_t *vd0 = NULL; 5022168404Spjd uint64_t guid0 = 0; 5023219089Spjd boolean_t islog = B_FALSE; 5024168404Spjd 5025332545Smav mutex_enter(&ztest_vdev_lock); 5026339106Smav 5027339106Smav /* 5028339106Smav * Device removal is in progress, fault injection must be disabled 5029339106Smav * until it completes and the pool is scrubbed. The fault injection 5030339106Smav * strategy for damaging blocks does not take in to account evacuated 5031339106Smav * blocks which may have already been damaged. 5032339106Smav */ 5033339106Smav if (ztest_device_removal_active) { 5034339106Smav mutex_exit(&ztest_vdev_lock); 5035339106Smav return; 5036339106Smav } 5037339106Smav 5038219089Spjd maxfaults = MAXFAULTS(); 5039236143Smm leaves = MAX(zs->zs_mirrors, 1) * ztest_opts.zo_raidz; 5040219089Spjd mirror_save = zs->zs_mirrors; 5041332545Smav mutex_exit(&ztest_vdev_lock); 5042219089Spjd 5043185029Spjd ASSERT(leaves >= 1); 5044168404Spjd 5045168404Spjd /* 5046254074Sdelphij * Grab the name lock as reader. There are some operations 5047254074Sdelphij * which don't like to have their vdevs changed while 5048254074Sdelphij * they are in progress (i.e. spa_change_guid). Those 5049254074Sdelphij * operations will have grabbed the name lock as writer. 5050254074Sdelphij */ 5051332545Smav rw_enter(&ztest_name_lock, RW_READER); 5052254074Sdelphij 5053254074Sdelphij /* 5054185029Spjd * We need SCL_STATE here because we're going to look at vd0->vdev_tsd. 5055168404Spjd */ 5056185029Spjd spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 5057168404Spjd 5058185029Spjd if (ztest_random(2) == 0) { 5059185029Spjd /* 5060219089Spjd * Inject errors on a normal data device or slog device. 5061185029Spjd */ 5062219089Spjd top = ztest_random_vdev_top(spa, B_TRUE); 5063219089Spjd leaf = ztest_random(leaves) + zs->zs_splits; 5064168404Spjd 5065185029Spjd /* 5066185029Spjd * Generate paths to the first leaf in this top-level vdev, 5067185029Spjd * and to the random leaf we selected. We'll induce transient 5068185029Spjd * write failures and random online/offline activity on leaf 0, 5069185029Spjd * and we'll write random garbage to the randomly chosen leaf. 5070185029Spjd */ 5071185029Spjd (void) snprintf(path0, sizeof (path0), ztest_dev_template, 5072236143Smm ztest_opts.zo_dir, ztest_opts.zo_pool, 5073236143Smm top * leaves + zs->zs_splits); 5074185029Spjd (void) snprintf(pathrand, sizeof (pathrand), ztest_dev_template, 5075236143Smm ztest_opts.zo_dir, ztest_opts.zo_pool, 5076236143Smm top * leaves + leaf); 5077168404Spjd 5078185029Spjd vd0 = vdev_lookup_by_path(spa->spa_root_vdev, path0); 5079219089Spjd if (vd0 != NULL && vd0->vdev_top->vdev_islog) 5080219089Spjd islog = B_TRUE; 5081219089Spjd 5082254074Sdelphij /* 5083254074Sdelphij * If the top-level vdev needs to be resilvered 5084254074Sdelphij * then we only allow faults on the device that is 5085254074Sdelphij * resilvering. 5086254074Sdelphij */ 5087254074Sdelphij if (vd0 != NULL && maxfaults != 1 && 5088254074Sdelphij (!vdev_resilver_needed(vd0->vdev_top, NULL, NULL) || 5089254112Sdelphij vd0->vdev_resilver_txg != 0)) { 5090185029Spjd /* 5091185029Spjd * Make vd0 explicitly claim to be unreadable, 5092185029Spjd * or unwriteable, or reach behind its back 5093185029Spjd * and close the underlying fd. We can do this if 5094185029Spjd * maxfaults == 0 because we'll fail and reexecute, 5095185029Spjd * and we can do it if maxfaults >= 2 because we'll 5096185029Spjd * have enough redundancy. If maxfaults == 1, the 5097185029Spjd * combination of this with injection of random data 5098185029Spjd * corruption below exceeds the pool's fault tolerance. 5099185029Spjd */ 5100185029Spjd vdev_file_t *vf = vd0->vdev_tsd; 5101168404Spjd 5102332525Smav zfs_dbgmsg("injecting fault to vdev %llu; maxfaults=%d", 5103332525Smav (long long)vd0->vdev_id, (int)maxfaults); 5104332525Smav 5105185029Spjd if (vf != NULL && ztest_random(3) == 0) { 5106185029Spjd (void) close(vf->vf_vnode->v_fd); 5107185029Spjd vf->vf_vnode->v_fd = -1; 5108185029Spjd } else if (ztest_random(2) == 0) { 5109185029Spjd vd0->vdev_cant_read = B_TRUE; 5110185029Spjd } else { 5111185029Spjd vd0->vdev_cant_write = B_TRUE; 5112185029Spjd } 5113185029Spjd guid0 = vd0->vdev_guid; 5114185029Spjd } 5115185029Spjd } else { 5116185029Spjd /* 5117185029Spjd * Inject errors on an l2cache device. 5118185029Spjd */ 5119185029Spjd spa_aux_vdev_t *sav = &spa->spa_l2cache; 5120168404Spjd 5121185029Spjd if (sav->sav_count == 0) { 5122185029Spjd spa_config_exit(spa, SCL_STATE, FTAG); 5123332545Smav rw_exit(&ztest_name_lock); 5124185029Spjd return; 5125185029Spjd } 5126185029Spjd vd0 = sav->sav_vdevs[ztest_random(sav->sav_count)]; 5127168404Spjd guid0 = vd0->vdev_guid; 5128185029Spjd (void) strcpy(path0, vd0->vdev_path); 5129185029Spjd (void) strcpy(pathrand, vd0->vdev_path); 5130185029Spjd 5131185029Spjd leaf = 0; 5132185029Spjd leaves = 1; 5133185029Spjd maxfaults = INT_MAX; /* no limit on cache devices */ 5134168404Spjd } 5135168404Spjd 5136185029Spjd spa_config_exit(spa, SCL_STATE, FTAG); 5137332545Smav rw_exit(&ztest_name_lock); 5138185029Spjd 5139168404Spjd /* 5140219089Spjd * If we can tolerate two or more faults, or we're dealing 5141219089Spjd * with a slog, randomly online/offline vd0. 5142168404Spjd */ 5143219089Spjd if ((maxfaults >= 2 || islog) && guid0 != 0) { 5144209962Smm if (ztest_random(10) < 6) { 5145209962Smm int flags = (ztest_random(2) == 0 ? 5146209962Smm ZFS_OFFLINE_TEMPORARY : 0); 5147219089Spjd 5148219089Spjd /* 5149219089Spjd * We have to grab the zs_name_lock as writer to 5150219089Spjd * prevent a race between offlining a slog and 5151219089Spjd * destroying a dataset. Offlining the slog will 5152219089Spjd * grab a reference on the dataset which may cause 5153219089Spjd * dmu_objset_destroy() to fail with EBUSY thus 5154219089Spjd * leaving the dataset in an inconsistent state. 5155219089Spjd */ 5156219089Spjd if (islog) 5157332545Smav rw_enter(&ztest_name_lock, RW_WRITER); 5158219089Spjd 5159209962Smm VERIFY(vdev_offline(spa, guid0, flags) != EBUSY); 5160219089Spjd 5161219089Spjd if (islog) 5162332545Smav rw_exit(&ztest_name_lock); 5163209962Smm } else { 5164242845Sdelphij /* 5165242845Sdelphij * Ideally we would like to be able to randomly 5166242845Sdelphij * call vdev_[on|off]line without holding locks 5167242845Sdelphij * to force unpredictable failures but the side 5168242845Sdelphij * effects of vdev_[on|off]line prevent us from 5169242845Sdelphij * doing so. We grab the ztest_vdev_lock here to 5170242845Sdelphij * prevent a race between injection testing and 5171242845Sdelphij * aux_vdev removal. 5172242845Sdelphij */ 5173332545Smav mutex_enter(&ztest_vdev_lock); 5174209962Smm (void) vdev_online(spa, guid0, 0, NULL); 5175332545Smav mutex_exit(&ztest_vdev_lock); 5176209962Smm } 5177168404Spjd } 5178168404Spjd 5179219089Spjd if (maxfaults == 0) 5180219089Spjd return; 5181219089Spjd 5182168404Spjd /* 5183168404Spjd * We have at least single-fault tolerance, so inject data corruption. 5184168404Spjd */ 5185168404Spjd fd = open(pathrand, O_RDWR); 5186168404Spjd 5187332547Smav if (fd == -1) /* we hit a gap in the device namespace */ 5188168404Spjd return; 5189168404Spjd 5190168404Spjd fsize = lseek(fd, 0, SEEK_END); 5191168404Spjd 5192168404Spjd while (--iters != 0) { 5193307273Smav /* 5194307273Smav * The offset must be chosen carefully to ensure that 5195307273Smav * we do not inject a given logical block with errors 5196307273Smav * on two different leaf devices, because ZFS can not 5197307273Smav * tolerate that (if maxfaults==1). 5198307273Smav * 5199307273Smav * We divide each leaf into chunks of size 5200307273Smav * (# leaves * SPA_MAXBLOCKSIZE * 4). Within each chunk 5201307273Smav * there is a series of ranges to which we can inject errors. 5202307273Smav * Each range can accept errors on only a single leaf vdev. 5203307273Smav * The error injection ranges are separated by ranges 5204307273Smav * which we will not inject errors on any device (DMZs). 5205307273Smav * Each DMZ must be large enough such that a single block 5206307273Smav * can not straddle it, so that a single block can not be 5207307273Smav * a target in two different injection ranges (on different 5208307273Smav * leaf vdevs). 5209307273Smav * 5210307273Smav * For example, with 3 leaves, each chunk looks like: 5211307273Smav * 0 to 32M: injection range for leaf 0 5212307273Smav * 32M to 64M: DMZ - no injection allowed 5213307273Smav * 64M to 96M: injection range for leaf 1 5214307273Smav * 96M to 128M: DMZ - no injection allowed 5215307273Smav * 128M to 160M: injection range for leaf 2 5216307273Smav * 160M to 192M: DMZ - no injection allowed 5217307273Smav */ 5218168404Spjd offset = ztest_random(fsize / (leaves << bshift)) * 5219168404Spjd (leaves << bshift) + (leaf << bshift) + 5220168404Spjd (ztest_random(1ULL << (bshift - 1)) & -8ULL); 5221168404Spjd 5222307275Smav /* 5223307275Smav * Only allow damage to the labels at one end of the vdev. 5224307275Smav * 5225307275Smav * If all labels are damaged, the device will be totally 5226307275Smav * inaccessible, which will result in loss of data, 5227307275Smav * because we also damage (parts of) the other side of 5228307275Smav * the mirror/raidz. 5229307275Smav * 5230307275Smav * Additionally, we will always have both an even and an 5231307275Smav * odd label, so that we can handle crashes in the 5232307275Smav * middle of vdev_config_sync(). 5233307275Smav */ 5234307275Smav if ((leaf & 1) == 0 && offset < VDEV_LABEL_START_SIZE) 5235168404Spjd continue; 5236168404Spjd 5237307275Smav /* 5238307275Smav * The two end labels are stored at the "end" of the disk, but 5239307275Smav * the end of the disk (vdev_psize) is aligned to 5240307275Smav * sizeof (vdev_label_t). 5241307275Smav */ 5242307275Smav uint64_t psize = P2ALIGN(fsize, sizeof (vdev_label_t)); 5243307275Smav if ((leaf & 1) == 1 && 5244307275Smav offset + sizeof (bad) > psize - VDEV_LABEL_END_SIZE) 5245307275Smav continue; 5246307275Smav 5247332545Smav mutex_enter(&ztest_vdev_lock); 5248219089Spjd if (mirror_save != zs->zs_mirrors) { 5249332545Smav mutex_exit(&ztest_vdev_lock); 5250219089Spjd (void) close(fd); 5251219089Spjd return; 5252219089Spjd } 5253168404Spjd 5254168404Spjd if (pwrite(fd, &bad, sizeof (bad), offset) != sizeof (bad)) 5255168404Spjd fatal(1, "can't inject bad word at 0x%llx in %s", 5256168404Spjd offset, pathrand); 5257219089Spjd 5258332545Smav mutex_exit(&ztest_vdev_lock); 5259219089Spjd 5260236143Smm if (ztest_opts.zo_verbose >= 7) 5261219089Spjd (void) printf("injected bad word into %s," 5262219089Spjd " offset 0x%llx\n", pathrand, (u_longlong_t)offset); 5263168404Spjd } 5264168404Spjd 5265168404Spjd (void) close(fd); 5266168404Spjd} 5267168404Spjd 5268168404Spjd/* 5269219089Spjd * Verify that DDT repair works as expected. 5270219089Spjd */ 5271219089Spjdvoid 5272219089Spjdztest_ddt_repair(ztest_ds_t *zd, uint64_t id) 5273219089Spjd{ 5274219089Spjd ztest_shared_t *zs = ztest_shared; 5275236143Smm spa_t *spa = ztest_spa; 5276219089Spjd objset_t *os = zd->zd_os; 5277219089Spjd ztest_od_t od[1]; 5278219089Spjd uint64_t object, blocksize, txg, pattern, psize; 5279219089Spjd enum zio_checksum checksum = spa_dedup_checksum(spa); 5280219089Spjd dmu_buf_t *db; 5281219089Spjd dmu_tx_t *tx; 5282321610Smav abd_t *abd; 5283219089Spjd blkptr_t blk; 5284219089Spjd int copies = 2 * ZIO_DEDUPDITTO_MIN; 5285219089Spjd 5286219089Spjd blocksize = ztest_random_blocksize(); 5287219089Spjd blocksize = MIN(blocksize, 2048); /* because we write so many */ 5288219089Spjd 5289219089Spjd ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize, 0); 5290219089Spjd 5291219089Spjd if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0) 5292219089Spjd return; 5293219089Spjd 5294219089Spjd /* 5295219089Spjd * Take the name lock as writer to prevent anyone else from changing 5296219089Spjd * the pool and dataset properies we need to maintain during this test. 5297219089Spjd */ 5298332545Smav rw_enter(&ztest_name_lock, RW_WRITER); 5299219089Spjd 5300219089Spjd if (ztest_dsl_prop_set_uint64(zd->zd_name, ZFS_PROP_DEDUP, checksum, 5301219089Spjd B_FALSE) != 0 || 5302219089Spjd ztest_dsl_prop_set_uint64(zd->zd_name, ZFS_PROP_COPIES, 1, 5303219089Spjd B_FALSE) != 0) { 5304332545Smav rw_exit(&ztest_name_lock); 5305219089Spjd return; 5306219089Spjd } 5307219089Spjd 5308307271Smav dmu_objset_stats_t dds; 5309307271Smav dsl_pool_config_enter(dmu_objset_pool(os), FTAG); 5310307271Smav dmu_objset_fast_stat(os, &dds); 5311307271Smav dsl_pool_config_exit(dmu_objset_pool(os), FTAG); 5312307271Smav 5313219089Spjd object = od[0].od_object; 5314219089Spjd blocksize = od[0].od_blocksize; 5315307271Smav pattern = zs->zs_guid ^ dds.dds_guid; 5316219089Spjd 5317219089Spjd ASSERT(object != 0); 5318219089Spjd 5319219089Spjd tx = dmu_tx_create(os); 5320219089Spjd dmu_tx_hold_write(tx, object, 0, copies * blocksize); 5321219089Spjd txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); 5322219089Spjd if (txg == 0) { 5323332545Smav rw_exit(&ztest_name_lock); 5324219089Spjd return; 5325219089Spjd } 5326219089Spjd 5327219089Spjd /* 5328219089Spjd * Write all the copies of our block. 5329219089Spjd */ 5330219089Spjd for (int i = 0; i < copies; i++) { 5331219089Spjd uint64_t offset = i * blocksize; 5332248571Smm int error = dmu_buf_hold(os, object, offset, FTAG, &db, 5333248571Smm DMU_READ_NO_PREFETCH); 5334248571Smm if (error != 0) { 5335248571Smm fatal(B_FALSE, "dmu_buf_hold(%p, %llu, %llu) = %u", 5336248571Smm os, (long long)object, (long long) offset, error); 5337248571Smm } 5338219089Spjd ASSERT(db->db_offset == offset); 5339219089Spjd ASSERT(db->db_size == blocksize); 5340219089Spjd ASSERT(ztest_pattern_match(db->db_data, db->db_size, pattern) || 5341219089Spjd ztest_pattern_match(db->db_data, db->db_size, 0ULL)); 5342219089Spjd dmu_buf_will_fill(db, tx); 5343219089Spjd ztest_pattern_set(db->db_data, db->db_size, pattern); 5344219089Spjd dmu_buf_rele(db, FTAG); 5345219089Spjd } 5346219089Spjd 5347219089Spjd dmu_tx_commit(tx); 5348219089Spjd txg_wait_synced(spa_get_dsl(spa), txg); 5349219089Spjd 5350219089Spjd /* 5351219089Spjd * Find out what block we got. 5352219089Spjd */ 5353243524Smm VERIFY0(dmu_buf_hold(os, object, 0, FTAG, &db, 5354243524Smm DMU_READ_NO_PREFETCH)); 5355219089Spjd blk = *((dmu_buf_impl_t *)db)->db_blkptr; 5356219089Spjd dmu_buf_rele(db, FTAG); 5357219089Spjd 5358219089Spjd /* 5359219089Spjd * Damage the block. Dedup-ditto will save us when we read it later. 5360219089Spjd */ 5361219089Spjd psize = BP_GET_PSIZE(&blk); 5362321610Smav abd = abd_alloc_linear(psize, B_TRUE); 5363321610Smav ztest_pattern_set(abd_to_buf(abd), psize, ~pattern); 5364219089Spjd 5365219089Spjd (void) zio_wait(zio_rewrite(NULL, spa, 0, &blk, 5366321610Smav abd, psize, NULL, NULL, ZIO_PRIORITY_SYNC_WRITE, 5367219089Spjd ZIO_FLAG_CANFAIL | ZIO_FLAG_INDUCE_DAMAGE, NULL)); 5368219089Spjd 5369321610Smav abd_free(abd); 5370219089Spjd 5371332545Smav rw_exit(&ztest_name_lock); 5372219089Spjd} 5373219089Spjd 5374219089Spjd/* 5375168404Spjd * Scrub the pool. 5376168404Spjd */ 5377219089Spjd/* ARGSUSED */ 5378168404Spjdvoid 5379219089Spjdztest_scrub(ztest_ds_t *zd, uint64_t id) 5380168404Spjd{ 5381236143Smm spa_t *spa = ztest_spa; 5382168404Spjd 5383339106Smav /* 5384339106Smav * Scrub in progress by device removal. 5385339106Smav */ 5386339106Smav if (ztest_device_removal_active) 5387339106Smav return; 5388339106Smav 5389219089Spjd (void) spa_scan(spa, POOL_SCAN_SCRUB); 5390219089Spjd (void) poll(NULL, 0, 100); /* wait a moment, then force a restart */ 5391219089Spjd (void) spa_scan(spa, POOL_SCAN_SCRUB); 5392168404Spjd} 5393168404Spjd 5394168404Spjd/* 5395228103Smm * Change the guid for the pool. 5396228103Smm */ 5397228103Smm/* ARGSUSED */ 5398228103Smmvoid 5399228103Smmztest_reguid(ztest_ds_t *zd, uint64_t id) 5400228103Smm{ 5401236143Smm spa_t *spa = ztest_spa; 5402228103Smm uint64_t orig, load; 5403239620Smm int error; 5404228103Smm 5405228103Smm orig = spa_guid(spa); 5406228103Smm load = spa_load_guid(spa); 5407239620Smm 5408332545Smav rw_enter(&ztest_name_lock, RW_WRITER); 5409239620Smm error = spa_change_guid(spa); 5410332545Smav rw_exit(&ztest_name_lock); 5411239620Smm 5412239620Smm if (error != 0) 5413228103Smm return; 5414228103Smm 5415243505Smm if (ztest_opts.zo_verbose >= 4) { 5416228103Smm (void) printf("Changed guid old %llu -> %llu\n", 5417228103Smm (u_longlong_t)orig, (u_longlong_t)spa_guid(spa)); 5418228103Smm } 5419228103Smm 5420228103Smm VERIFY3U(orig, !=, spa_guid(spa)); 5421228103Smm VERIFY3U(load, ==, spa_load_guid(spa)); 5422228103Smm} 5423228103Smm 5424228103Smm/* 5425168404Spjd * Rename the pool to a different name and then rename it back. 5426168404Spjd */ 5427219089Spjd/* ARGSUSED */ 5428168404Spjdvoid 5429219089Spjdztest_spa_rename(ztest_ds_t *zd, uint64_t id) 5430168404Spjd{ 5431168404Spjd char *oldname, *newname; 5432168404Spjd spa_t *spa; 5433168404Spjd 5434332545Smav rw_enter(&ztest_name_lock, RW_WRITER); 5435168404Spjd 5436236143Smm oldname = ztest_opts.zo_pool; 5437168404Spjd newname = umem_alloc(strlen(oldname) + 5, UMEM_NOFAIL); 5438168404Spjd (void) strcpy(newname, oldname); 5439168404Spjd (void) strcat(newname, "_tmp"); 5440168404Spjd 5441168404Spjd /* 5442168404Spjd * Do the rename 5443168404Spjd */ 5444219089Spjd VERIFY3U(0, ==, spa_rename(oldname, newname)); 5445168404Spjd 5446168404Spjd /* 5447168404Spjd * Try to open it under the old name, which shouldn't exist 5448168404Spjd */ 5449219089Spjd VERIFY3U(ENOENT, ==, spa_open(oldname, &spa, FTAG)); 5450168404Spjd 5451168404Spjd /* 5452168404Spjd * Open it under the new name and make sure it's still the same spa_t. 5453168404Spjd */ 5454219089Spjd VERIFY3U(0, ==, spa_open(newname, &spa, FTAG)); 5455168404Spjd 5456236143Smm ASSERT(spa == ztest_spa); 5457168404Spjd spa_close(spa, FTAG); 5458168404Spjd 5459168404Spjd /* 5460168404Spjd * Rename it back to the original 5461168404Spjd */ 5462219089Spjd VERIFY3U(0, ==, spa_rename(newname, oldname)); 5463168404Spjd 5464168404Spjd /* 5465168404Spjd * Make sure it can still be opened 5466168404Spjd */ 5467219089Spjd VERIFY3U(0, ==, spa_open(oldname, &spa, FTAG)); 5468168404Spjd 5469236143Smm ASSERT(spa == ztest_spa); 5470168404Spjd spa_close(spa, FTAG); 5471168404Spjd 5472168404Spjd umem_free(newname, strlen(newname) + 1); 5473168404Spjd 5474332545Smav rw_exit(&ztest_name_lock); 5475168404Spjd} 5476168404Spjd 5477339111Smavstatic vdev_t * 5478339111Smavztest_random_concrete_vdev_leaf(vdev_t *vd) 5479339111Smav{ 5480339111Smav if (vd == NULL) 5481339111Smav return (NULL); 5482339111Smav 5483339111Smav if (vd->vdev_children == 0) 5484339111Smav return (vd); 5485339111Smav 5486339111Smav vdev_t *eligible[vd->vdev_children]; 5487339111Smav int eligible_idx = 0, i; 5488339111Smav for (i = 0; i < vd->vdev_children; i++) { 5489339111Smav vdev_t *cvd = vd->vdev_child[i]; 5490339111Smav if (cvd->vdev_top->vdev_removing) 5491339111Smav continue; 5492339111Smav if (cvd->vdev_children > 0 || 5493339111Smav (vdev_is_concrete(cvd) && !cvd->vdev_detached)) { 5494339111Smav eligible[eligible_idx++] = cvd; 5495339111Smav } 5496339111Smav } 5497339111Smav VERIFY(eligible_idx > 0); 5498339111Smav 5499339111Smav uint64_t child_no = ztest_random(eligible_idx); 5500339111Smav return (ztest_random_concrete_vdev_leaf(eligible[child_no])); 5501339111Smav} 5502339111Smav 5503339111Smav/* ARGSUSED */ 5504339111Smavvoid 5505339111Smavztest_initialize(ztest_ds_t *zd, uint64_t id) 5506339111Smav{ 5507339111Smav spa_t *spa = ztest_spa; 5508339111Smav int error = 0; 5509339111Smav 5510339111Smav mutex_enter(&ztest_vdev_lock); 5511339111Smav 5512339111Smav spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); 5513339111Smav 5514339111Smav /* Random leaf vdev */ 5515339111Smav vdev_t *rand_vd = ztest_random_concrete_vdev_leaf(spa->spa_root_vdev); 5516339111Smav if (rand_vd == NULL) { 5517339111Smav spa_config_exit(spa, SCL_VDEV, FTAG); 5518339111Smav mutex_exit(&ztest_vdev_lock); 5519339111Smav return; 5520339111Smav } 5521339111Smav 5522339111Smav /* 5523339111Smav * The random vdev we've selected may change as soon as we 5524339111Smav * drop the spa_config_lock. We create local copies of things 5525339111Smav * we're interested in. 5526339111Smav */ 5527339111Smav uint64_t guid = rand_vd->vdev_guid; 5528339111Smav char *path = strdup(rand_vd->vdev_path); 5529339111Smav boolean_t active = rand_vd->vdev_initialize_thread != NULL; 5530339111Smav 5531339111Smav zfs_dbgmsg("vd %p, guid %llu", rand_vd, guid); 5532339111Smav spa_config_exit(spa, SCL_VDEV, FTAG); 5533339111Smav 5534339111Smav uint64_t cmd = ztest_random(POOL_INITIALIZE_FUNCS); 5535339111Smav error = spa_vdev_initialize(spa, guid, cmd); 5536339111Smav switch (cmd) { 5537339111Smav case POOL_INITIALIZE_CANCEL: 5538339111Smav if (ztest_opts.zo_verbose >= 4) { 5539339111Smav (void) printf("Cancel initialize %s", path); 5540339111Smav if (!active) 5541339111Smav (void) printf(" failed (no initialize active)"); 5542339111Smav (void) printf("\n"); 5543339111Smav } 5544339111Smav break; 5545339111Smav case POOL_INITIALIZE_DO: 5546339111Smav if (ztest_opts.zo_verbose >= 4) { 5547339111Smav (void) printf("Start initialize %s", path); 5548339111Smav if (active && error == 0) 5549339111Smav (void) printf(" failed (already active)"); 5550339111Smav else if (error != 0) 5551339111Smav (void) printf(" failed (error %d)", error); 5552339111Smav (void) printf("\n"); 5553339111Smav } 5554339111Smav break; 5555339111Smav case POOL_INITIALIZE_SUSPEND: 5556339111Smav if (ztest_opts.zo_verbose >= 4) { 5557339111Smav (void) printf("Suspend initialize %s", path); 5558339111Smav if (!active) 5559339111Smav (void) printf(" failed (no initialize active)"); 5560339111Smav (void) printf("\n"); 5561339111Smav } 5562339111Smav break; 5563339111Smav } 5564339111Smav free(path); 5565339111Smav mutex_exit(&ztest_vdev_lock); 5566339111Smav} 5567339111Smav 5568168404Spjd/* 5569219089Spjd * Verify pool integrity by running zdb. 5570168404Spjd */ 5571168404Spjdstatic void 5572219089Spjdztest_run_zdb(char *pool) 5573168404Spjd{ 5574168404Spjd int status; 5575168404Spjd char zdb[MAXPATHLEN + MAXNAMELEN + 20]; 5576168404Spjd char zbuf[1024]; 5577168404Spjd char *bin; 5578185029Spjd char *ztest; 5579185029Spjd char *isa; 5580185029Spjd int isalen; 5581168404Spjd FILE *fp; 5582168404Spjd 5583214623Spjd strlcpy(zdb, "/usr/bin/ztest", sizeof(zdb)); 5584168404Spjd 5585168404Spjd /* zdb lives in /usr/sbin, while ztest lives in /usr/bin */ 5586168404Spjd bin = strstr(zdb, "/usr/bin/"); 5587185029Spjd ztest = strstr(bin, "/ztest"); 5588185029Spjd isa = bin + 8; 5589185029Spjd isalen = ztest - isa; 5590185029Spjd isa = strdup(isa); 5591168404Spjd /* LINTED */ 5592185029Spjd (void) sprintf(bin, 5593331387Smav "/usr/sbin%.*s/zdb -bcc%s%s -G -d -U %s %s", 5594185029Spjd isalen, 5595185029Spjd isa, 5596236143Smm ztest_opts.zo_verbose >= 3 ? "s" : "", 5597236143Smm ztest_opts.zo_verbose >= 4 ? "v" : "", 5598219089Spjd spa_config_path, 5599208047Smm pool); 5600185029Spjd free(isa); 5601168404Spjd 5602236143Smm if (ztest_opts.zo_verbose >= 5) 5603168404Spjd (void) printf("Executing %s\n", strstr(zdb, "zdb ")); 5604168404Spjd 5605168404Spjd fp = popen(zdb, "r"); 5606168404Spjd assert(fp != NULL); 5607168404Spjd 5608168404Spjd while (fgets(zbuf, sizeof (zbuf), fp) != NULL) 5609236143Smm if (ztest_opts.zo_verbose >= 3) 5610168404Spjd (void) printf("%s", zbuf); 5611168404Spjd 5612168404Spjd status = pclose(fp); 5613168404Spjd 5614168404Spjd if (status == 0) 5615168404Spjd return; 5616168404Spjd 5617168404Spjd ztest_dump_core = 0; 5618168404Spjd if (WIFEXITED(status)) 5619168404Spjd fatal(0, "'%s' exit code %d", zdb, WEXITSTATUS(status)); 5620168404Spjd else 5621168404Spjd fatal(0, "'%s' died with signal %d", zdb, WTERMSIG(status)); 5622168404Spjd} 5623168404Spjd 5624168404Spjdstatic void 5625168404Spjdztest_walk_pool_directory(char *header) 5626168404Spjd{ 5627168404Spjd spa_t *spa = NULL; 5628168404Spjd 5629236143Smm if (ztest_opts.zo_verbose >= 6) 5630168404Spjd (void) printf("%s\n", header); 5631168404Spjd 5632168404Spjd mutex_enter(&spa_namespace_lock); 5633168404Spjd while ((spa = spa_next(spa)) != NULL) 5634236143Smm if (ztest_opts.zo_verbose >= 6) 5635168404Spjd (void) printf("\t%s\n", spa_name(spa)); 5636168404Spjd mutex_exit(&spa_namespace_lock); 5637168404Spjd} 5638168404Spjd 5639168404Spjdstatic void 5640168404Spjdztest_spa_import_export(char *oldname, char *newname) 5641168404Spjd{ 5642209962Smm nvlist_t *config, *newconfig; 5643168404Spjd uint64_t pool_guid; 5644168404Spjd spa_t *spa; 5645248571Smm int error; 5646168404Spjd 5647236143Smm if (ztest_opts.zo_verbose >= 4) { 5648168404Spjd (void) printf("import/export: old = %s, new = %s\n", 5649168404Spjd oldname, newname); 5650168404Spjd } 5651168404Spjd 5652168404Spjd /* 5653168404Spjd * Clean up from previous runs. 5654168404Spjd */ 5655168404Spjd (void) spa_destroy(newname); 5656168404Spjd 5657168404Spjd /* 5658168404Spjd * Get the pool's configuration and guid. 5659168404Spjd */ 5660219089Spjd VERIFY3U(0, ==, spa_open(oldname, &spa, FTAG)); 5661168404Spjd 5662209962Smm /* 5663209962Smm * Kick off a scrub to tickle scrub/export races. 5664209962Smm */ 5665209962Smm if (ztest_random(2) == 0) 5666219089Spjd (void) spa_scan(spa, POOL_SCAN_SCRUB); 5667209962Smm 5668168404Spjd pool_guid = spa_guid(spa); 5669168404Spjd spa_close(spa, FTAG); 5670168404Spjd 5671168404Spjd ztest_walk_pool_directory("pools before export"); 5672168404Spjd 5673168404Spjd /* 5674168404Spjd * Export it. 5675168404Spjd */ 5676219089Spjd VERIFY3U(0, ==, spa_export(oldname, &config, B_FALSE, B_FALSE)); 5677168404Spjd 5678168404Spjd ztest_walk_pool_directory("pools after export"); 5679168404Spjd 5680168404Spjd /* 5681209962Smm * Try to import it. 5682209962Smm */ 5683209962Smm newconfig = spa_tryimport(config); 5684209962Smm ASSERT(newconfig != NULL); 5685209962Smm nvlist_free(newconfig); 5686209962Smm 5687209962Smm /* 5688168404Spjd * Import it under the new name. 5689168404Spjd */ 5690248571Smm error = spa_import(newname, config, NULL, 0); 5691248571Smm if (error != 0) { 5692248571Smm dump_nvlist(config, 0); 5693248571Smm fatal(B_FALSE, "couldn't import pool %s as %s: error %u", 5694248571Smm oldname, newname, error); 5695248571Smm } 5696168404Spjd 5697168404Spjd ztest_walk_pool_directory("pools after import"); 5698168404Spjd 5699168404Spjd /* 5700168404Spjd * Try to import it again -- should fail with EEXIST. 5701168404Spjd */ 5702219089Spjd VERIFY3U(EEXIST, ==, spa_import(newname, config, NULL, 0)); 5703168404Spjd 5704168404Spjd /* 5705168404Spjd * Try to import it under a different name -- should fail with EEXIST. 5706168404Spjd */ 5707219089Spjd VERIFY3U(EEXIST, ==, spa_import(oldname, config, NULL, 0)); 5708168404Spjd 5709168404Spjd /* 5710168404Spjd * Verify that the pool is no longer visible under the old name. 5711168404Spjd */ 5712219089Spjd VERIFY3U(ENOENT, ==, spa_open(oldname, &spa, FTAG)); 5713168404Spjd 5714168404Spjd /* 5715168404Spjd * Verify that we can open and close the pool using the new name. 5716168404Spjd */ 5717219089Spjd VERIFY3U(0, ==, spa_open(newname, &spa, FTAG)); 5718168404Spjd ASSERT(pool_guid == spa_guid(spa)); 5719168404Spjd spa_close(spa, FTAG); 5720168404Spjd 5721168404Spjd nvlist_free(config); 5722168404Spjd} 5723168404Spjd 5724209962Smmstatic void 5725209962Smmztest_resume(spa_t *spa) 5726209962Smm{ 5727236143Smm if (spa_suspended(spa) && ztest_opts.zo_verbose >= 6) 5728219089Spjd (void) printf("resuming from suspended state\n"); 5729219089Spjd spa_vdev_state_enter(spa, SCL_NONE); 5730219089Spjd vdev_clear(spa, NULL); 5731219089Spjd (void) spa_vdev_state_exit(spa, NULL, 0); 5732219089Spjd (void) zio_resume(spa); 5733209962Smm} 5734209962Smm 5735168404Spjdstatic void * 5736209962Smmztest_resume_thread(void *arg) 5737185029Spjd{ 5738185029Spjd spa_t *spa = arg; 5739185029Spjd 5740185029Spjd while (!ztest_exiting) { 5741219089Spjd if (spa_suspended(spa)) 5742219089Spjd ztest_resume(spa); 5743219089Spjd (void) poll(NULL, 0, 100); 5744307265Smav 5745307265Smav /* 5746307265Smav * Periodically change the zfs_compressed_arc_enabled setting. 5747307265Smav */ 5748307265Smav if (ztest_random(10) == 0) 5749307265Smav zfs_compressed_arc_enabled = ztest_random(2); 5750321610Smav 5751321610Smav /* 5752321610Smav * Periodically change the zfs_abd_scatter_enabled setting. 5753321610Smav */ 5754321610Smav if (ztest_random(10) == 0) 5755321610Smav zfs_abd_scatter_enabled = ztest_random(2); 5756185029Spjd } 5757185029Spjd return (NULL); 5758185029Spjd} 5759185029Spjd 5760185029Spjdstatic void * 5761219089Spjdztest_deadman_thread(void *arg) 5762219089Spjd{ 5763219089Spjd ztest_shared_t *zs = arg; 5764254074Sdelphij spa_t *spa = ztest_spa; 5765254074Sdelphij hrtime_t delta, total = 0; 5766219089Spjd 5767254074Sdelphij for (;;) { 5768258632Savg delta = zs->zs_thread_stop - zs->zs_thread_start + 5769258632Savg MSEC2NSEC(zfs_deadman_synctime_ms); 5770219089Spjd 5771258632Savg (void) poll(NULL, 0, (int)NSEC2MSEC(delta)); 5772219089Spjd 5773254074Sdelphij /* 5774254074Sdelphij * If the pool is suspended then fail immediately. Otherwise, 5775254074Sdelphij * check to see if the pool is making any progress. If 5776254074Sdelphij * vdev_deadman() discovers that there hasn't been any recent 5777254074Sdelphij * I/Os then it will end up aborting the tests. 5778254074Sdelphij */ 5779258717Savg if (spa_suspended(spa) || spa->spa_root_vdev == NULL) { 5780254074Sdelphij fatal(0, "aborting test after %llu seconds because " 5781254074Sdelphij "pool has transitioned to a suspended state.", 5782258632Savg zfs_deadman_synctime_ms / 1000); 5783254074Sdelphij return (NULL); 5784254074Sdelphij } 5785254074Sdelphij vdev_deadman(spa->spa_root_vdev); 5786219089Spjd 5787258632Savg total += zfs_deadman_synctime_ms/1000; 5788254074Sdelphij (void) printf("ztest has been running for %lld seconds\n", 5789254074Sdelphij total); 5790254074Sdelphij } 5791219089Spjd} 5792219089Spjd 5793219089Spjdstatic void 5794236143Smmztest_execute(int test, ztest_info_t *zi, uint64_t id) 5795219089Spjd{ 5796236143Smm ztest_ds_t *zd = &ztest_ds[id % ztest_opts.zo_datasets]; 5797236143Smm ztest_shared_callstate_t *zc = ZTEST_GET_SHARED_CALLSTATE(test); 5798219089Spjd hrtime_t functime = gethrtime(); 5799219089Spjd 5800219089Spjd for (int i = 0; i < zi->zi_iters; i++) 5801219089Spjd zi->zi_func(zd, id); 5802219089Spjd 5803219089Spjd functime = gethrtime() - functime; 5804219089Spjd 5805236143Smm atomic_add_64(&zc->zc_count, 1); 5806236143Smm atomic_add_64(&zc->zc_time, functime); 5807219089Spjd 5808236143Smm if (ztest_opts.zo_verbose >= 4) { 5809219089Spjd Dl_info dli; 5810219089Spjd (void) dladdr((void *)zi->zi_func, &dli); 5811219089Spjd (void) printf("%6.2f sec in %s\n", 5812219089Spjd (double)functime / NANOSEC, dli.dli_sname); 5813219089Spjd } 5814219089Spjd} 5815219089Spjd 5816219089Spjdstatic void * 5817168404Spjdztest_thread(void *arg) 5818168404Spjd{ 5819236143Smm int rand; 5820219089Spjd uint64_t id = (uintptr_t)arg; 5821168404Spjd ztest_shared_t *zs = ztest_shared; 5822219089Spjd uint64_t call_next; 5823219089Spjd hrtime_t now; 5824168404Spjd ztest_info_t *zi; 5825236143Smm ztest_shared_callstate_t *zc; 5826168404Spjd 5827219089Spjd while ((now = gethrtime()) < zs->zs_thread_stop) { 5828168404Spjd /* 5829168404Spjd * See if it's time to force a crash. 5830168404Spjd */ 5831219089Spjd if (now > zs->zs_thread_kill) 5832219089Spjd ztest_kill(zs); 5833168404Spjd 5834168404Spjd /* 5835219089Spjd * If we're getting ENOSPC with some regularity, stop. 5836168404Spjd */ 5837219089Spjd if (zs->zs_enospc_count > 10) 5838219089Spjd break; 5839168404Spjd 5840168404Spjd /* 5841219089Spjd * Pick a random function to execute. 5842168404Spjd */ 5843236143Smm rand = ztest_random(ZTEST_FUNCS); 5844236143Smm zi = &ztest_info[rand]; 5845236143Smm zc = ZTEST_GET_SHARED_CALLSTATE(rand); 5846236143Smm call_next = zc->zc_next; 5847168404Spjd 5848219089Spjd if (now >= call_next && 5849236143Smm atomic_cas_64(&zc->zc_next, call_next, call_next + 5850236143Smm ztest_random(2 * zi->zi_interval[0] + 1)) == call_next) { 5851236143Smm ztest_execute(rand, zi, id); 5852236143Smm } 5853219089Spjd } 5854168404Spjd 5855219089Spjd return (NULL); 5856219089Spjd} 5857168404Spjd 5858219089Spjdstatic void 5859219089Spjdztest_dataset_name(char *dsname, char *pool, int d) 5860219089Spjd{ 5861307108Smav (void) snprintf(dsname, ZFS_MAX_DATASET_NAME_LEN, "%s/ds_%d", pool, d); 5862219089Spjd} 5863168404Spjd 5864219089Spjdstatic void 5865236143Smmztest_dataset_destroy(int d) 5866219089Spjd{ 5867307108Smav char name[ZFS_MAX_DATASET_NAME_LEN]; 5868168404Spjd 5869236143Smm ztest_dataset_name(name, ztest_opts.zo_pool, d); 5870168404Spjd 5871236143Smm if (ztest_opts.zo_verbose >= 3) 5872219089Spjd (void) printf("Destroying %s to free up space\n", name); 5873168404Spjd 5874219089Spjd /* 5875219089Spjd * Cleanup any non-standard clones and snapshots. In general, 5876219089Spjd * ztest thread t operates on dataset (t % zopt_datasets), 5877219089Spjd * so there may be more than one thing to clean up. 5878219089Spjd */ 5879236143Smm for (int t = d; t < ztest_opts.zo_threads; 5880236143Smm t += ztest_opts.zo_datasets) { 5881219089Spjd ztest_dsl_dataset_cleanup(name, t); 5882236143Smm } 5883219089Spjd 5884219089Spjd (void) dmu_objset_find(name, ztest_objset_destroy_cb, NULL, 5885219089Spjd DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN); 5886219089Spjd} 5887219089Spjd 5888219089Spjdstatic void 5889219089Spjdztest_dataset_dirobj_verify(ztest_ds_t *zd) 5890219089Spjd{ 5891219089Spjd uint64_t usedobjs, dirobjs, scratch; 5892219089Spjd 5893219089Spjd /* 5894219089Spjd * ZTEST_DIROBJ is the object directory for the entire dataset. 5895219089Spjd * Therefore, the number of objects in use should equal the 5896219089Spjd * number of ZTEST_DIROBJ entries, +1 for ZTEST_DIROBJ itself. 5897219089Spjd * If not, we have an object leak. 5898219089Spjd * 5899219089Spjd * Note that we can only check this in ztest_dataset_open(), 5900219089Spjd * when the open-context and syncing-context values agree. 5901219089Spjd * That's because zap_count() returns the open-context value, 5902219089Spjd * while dmu_objset_space() returns the rootbp fill count. 5903219089Spjd */ 5904219089Spjd VERIFY3U(0, ==, zap_count(zd->zd_os, ZTEST_DIROBJ, &dirobjs)); 5905219089Spjd dmu_objset_space(zd->zd_os, &scratch, &scratch, &usedobjs, &scratch); 5906219089Spjd ASSERT3U(dirobjs + 1, ==, usedobjs); 5907219089Spjd} 5908219089Spjd 5909219089Spjdstatic int 5910236143Smmztest_dataset_open(int d) 5911219089Spjd{ 5912236143Smm ztest_ds_t *zd = &ztest_ds[d]; 5913236143Smm uint64_t committed_seq = ZTEST_GET_SHARED_DS(d)->zd_seq; 5914219089Spjd objset_t *os; 5915219089Spjd zilog_t *zilog; 5916307108Smav char name[ZFS_MAX_DATASET_NAME_LEN]; 5917219089Spjd int error; 5918219089Spjd 5919236143Smm ztest_dataset_name(name, ztest_opts.zo_pool, d); 5920219089Spjd 5921332545Smav rw_enter(&ztest_name_lock, RW_READER); 5922219089Spjd 5923219089Spjd error = ztest_dataset_create(name); 5924219089Spjd if (error == ENOSPC) { 5925332545Smav rw_exit(&ztest_name_lock); 5926219089Spjd ztest_record_enospc(FTAG); 5927219089Spjd return (error); 5928168404Spjd } 5929219089Spjd ASSERT(error == 0 || error == EEXIST); 5930168404Spjd 5931248571Smm VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, zd, &os)); 5932332545Smav rw_exit(&ztest_name_lock); 5933219089Spjd 5934236143Smm ztest_zd_init(zd, ZTEST_GET_SHARED_DS(d), os); 5935219089Spjd 5936219089Spjd zilog = zd->zd_zilog; 5937219089Spjd 5938219089Spjd if (zilog->zl_header->zh_claim_lr_seq != 0 && 5939219089Spjd zilog->zl_header->zh_claim_lr_seq < committed_seq) 5940219089Spjd fatal(0, "missing log records: claimed %llu < committed %llu", 5941219089Spjd zilog->zl_header->zh_claim_lr_seq, committed_seq); 5942219089Spjd 5943219089Spjd ztest_dataset_dirobj_verify(zd); 5944219089Spjd 5945219089Spjd zil_replay(os, zd, ztest_replay_vector); 5946219089Spjd 5947219089Spjd ztest_dataset_dirobj_verify(zd); 5948219089Spjd 5949236143Smm if (ztest_opts.zo_verbose >= 6) 5950219089Spjd (void) printf("%s replay %llu blocks, %llu records, seq %llu\n", 5951219089Spjd zd->zd_name, 5952219089Spjd (u_longlong_t)zilog->zl_parse_blk_count, 5953219089Spjd (u_longlong_t)zilog->zl_parse_lr_count, 5954219089Spjd (u_longlong_t)zilog->zl_replaying_seq); 5955219089Spjd 5956219089Spjd zilog = zil_open(os, ztest_get_data); 5957219089Spjd 5958219089Spjd if (zilog->zl_replaying_seq != 0 && 5959219089Spjd zilog->zl_replaying_seq < committed_seq) 5960219089Spjd fatal(0, "missing log records: replayed %llu < committed %llu", 5961219089Spjd zilog->zl_replaying_seq, committed_seq); 5962219089Spjd 5963219089Spjd return (0); 5964168404Spjd} 5965168404Spjd 5966219089Spjdstatic void 5967236143Smmztest_dataset_close(int d) 5968219089Spjd{ 5969236143Smm ztest_ds_t *zd = &ztest_ds[d]; 5970219089Spjd 5971219089Spjd zil_close(zd->zd_zilog); 5972248571Smm dmu_objset_disown(zd->zd_os, zd); 5973219089Spjd 5974219089Spjd ztest_zd_fini(zd); 5975219089Spjd} 5976219089Spjd 5977168404Spjd/* 5978168404Spjd * Kick off threads to run tests on all datasets in parallel. 5979168404Spjd */ 5980168404Spjdstatic void 5981219089Spjdztest_run(ztest_shared_t *zs) 5982168404Spjd{ 5983219089Spjd thread_t *tid; 5984168404Spjd spa_t *spa; 5985228103Smm objset_t *os; 5986185029Spjd thread_t resume_tid; 5987219089Spjd int error; 5988168404Spjd 5989185029Spjd ztest_exiting = B_FALSE; 5990185029Spjd 5991168404Spjd /* 5992219089Spjd * Initialize parent/child shared state. 5993168404Spjd */ 5994332547Smav mutex_init(&ztest_checkpoint_lock, NULL, USYNC_THREAD, NULL); 5995332545Smav mutex_init(&ztest_vdev_lock, NULL, USYNC_THREAD, NULL); 5996332545Smav rw_init(&ztest_name_lock, NULL, USYNC_THREAD, NULL); 5997168404Spjd 5998219089Spjd zs->zs_thread_start = gethrtime(); 5999236143Smm zs->zs_thread_stop = 6000236143Smm zs->zs_thread_start + ztest_opts.zo_passtime * NANOSEC; 6001219089Spjd zs->zs_thread_stop = MIN(zs->zs_thread_stop, zs->zs_proc_stop); 6002219089Spjd zs->zs_thread_kill = zs->zs_thread_stop; 6003236143Smm if (ztest_random(100) < ztest_opts.zo_killrate) { 6004236143Smm zs->zs_thread_kill -= 6005236143Smm ztest_random(ztest_opts.zo_passtime * NANOSEC); 6006236143Smm } 6007168404Spjd 6008332545Smav mutex_init(&zcl.zcl_callbacks_lock, NULL, USYNC_THREAD, NULL); 6009168404Spjd 6010219089Spjd list_create(&zcl.zcl_callbacks, sizeof (ztest_cb_data_t), 6011219089Spjd offsetof(ztest_cb_data_t, zcd_node)); 6012168404Spjd 6013168404Spjd /* 6014219089Spjd * Open our pool. 6015168404Spjd */ 6016219089Spjd kernel_init(FREAD | FWRITE); 6017248571Smm VERIFY0(spa_open(ztest_opts.zo_pool, &spa, FTAG)); 6018268086Sdelphij metaslab_preload_limit = ztest_random(20) + 1; 6019236143Smm ztest_spa = spa; 6020168404Spjd 6021307271Smav dmu_objset_stats_t dds; 6022248571Smm VERIFY0(dmu_objset_own(ztest_opts.zo_pool, 6023248571Smm DMU_OST_ANY, B_TRUE, FTAG, &os)); 6024307271Smav dsl_pool_config_enter(dmu_objset_pool(os), FTAG); 6025307271Smav dmu_objset_fast_stat(os, &dds); 6026307271Smav dsl_pool_config_exit(dmu_objset_pool(os), FTAG); 6027307271Smav zs->zs_guid = dds.dds_guid; 6028248571Smm dmu_objset_disown(os, FTAG); 6029228103Smm 6030219089Spjd spa->spa_dedup_ditto = 2 * ZIO_DEDUPDITTO_MIN; 6031168404Spjd 6032168404Spjd /* 6033209962Smm * We don't expect the pool to suspend unless maxfaults == 0, 6034209962Smm * in which case ztest_fault_inject() temporarily takes away 6035209962Smm * the only valid replica. 6036209962Smm */ 6037219089Spjd if (MAXFAULTS() == 0) 6038209962Smm spa->spa_failmode = ZIO_FAILURE_MODE_WAIT; 6039209962Smm else 6040209962Smm spa->spa_failmode = ZIO_FAILURE_MODE_PANIC; 6041209962Smm 6042209962Smm /* 6043185029Spjd * Create a thread to periodically resume suspended I/O. 6044185029Spjd */ 6045209962Smm VERIFY(thr_create(0, 0, ztest_resume_thread, spa, THR_BOUND, 6046185029Spjd &resume_tid) == 0); 6047185029Spjd 6048185029Spjd /* 6049219089Spjd * Create a deadman thread to abort() if we hang. 6050219089Spjd */ 6051219089Spjd VERIFY(thr_create(0, 0, ztest_deadman_thread, zs, THR_BOUND, 6052219089Spjd NULL) == 0); 6053219089Spjd 6054219089Spjd /* 6055332547Smav * Verify that we can safely inquire about any object, 6056168404Spjd * whether it's allocated or not. To make it interesting, 6057168404Spjd * we probe a 5-wide window around each power of two. 6058168404Spjd * This hits all edge cases, including zero and the max. 6059168404Spjd */ 6060219089Spjd for (int t = 0; t < 64; t++) { 6061219089Spjd for (int d = -5; d <= 5; d++) { 6062168404Spjd error = dmu_object_info(spa->spa_meta_objset, 6063168404Spjd (1ULL << t) + d, NULL); 6064168404Spjd ASSERT(error == 0 || error == ENOENT || 6065168404Spjd error == EINVAL); 6066168404Spjd } 6067168404Spjd } 6068168404Spjd 6069168404Spjd /* 6070219089Spjd * If we got any ENOSPC errors on the previous run, destroy something. 6071168404Spjd */ 6072219089Spjd if (zs->zs_enospc_count != 0) { 6073236143Smm int d = ztest_random(ztest_opts.zo_datasets); 6074236143Smm ztest_dataset_destroy(d); 6075219089Spjd } 6076168404Spjd zs->zs_enospc_count = 0; 6077168404Spjd 6078236143Smm tid = umem_zalloc(ztest_opts.zo_threads * sizeof (thread_t), 6079236143Smm UMEM_NOFAIL); 6080168404Spjd 6081236143Smm if (ztest_opts.zo_verbose >= 4) 6082168404Spjd (void) printf("starting main threads...\n"); 6083168404Spjd 6084219089Spjd /* 6085219089Spjd * Kick off all the tests that run in parallel. 6086219089Spjd */ 6087236143Smm for (int t = 0; t < ztest_opts.zo_threads; t++) { 6088236143Smm if (t < ztest_opts.zo_datasets && 6089236143Smm ztest_dataset_open(t) != 0) 6090219089Spjd return; 6091219089Spjd VERIFY(thr_create(0, 0, ztest_thread, (void *)(uintptr_t)t, 6092219089Spjd THR_BOUND, &tid[t]) == 0); 6093219089Spjd } 6094168404Spjd 6095219089Spjd /* 6096219089Spjd * Wait for all of the tests to complete. We go in reverse order 6097219089Spjd * so we don't close datasets while threads are still using them. 6098219089Spjd */ 6099236143Smm for (int t = ztest_opts.zo_threads - 1; t >= 0; t--) { 6100219089Spjd VERIFY(thr_join(tid[t], NULL, NULL) == 0); 6101236143Smm if (t < ztest_opts.zo_datasets) 6102236143Smm ztest_dataset_close(t); 6103219089Spjd } 6104185029Spjd 6105219089Spjd txg_wait_synced(spa_get_dsl(spa), 0); 6106185029Spjd 6107219089Spjd zs->zs_alloc = metaslab_class_get_alloc(spa_normal_class(spa)); 6108219089Spjd zs->zs_space = metaslab_class_get_space(spa_normal_class(spa)); 6109254112Sdelphij zfs_dbgmsg_print(FTAG); 6110168404Spjd 6111236143Smm umem_free(tid, ztest_opts.zo_threads * sizeof (thread_t)); 6112168404Spjd 6113219089Spjd /* Kill the resume thread */ 6114219089Spjd ztest_exiting = B_TRUE; 6115219089Spjd VERIFY(thr_join(resume_tid, NULL, NULL) == 0); 6116219089Spjd ztest_resume(spa); 6117219089Spjd 6118219089Spjd /* 6119219089Spjd * Right before closing the pool, kick off a bunch of async I/O; 6120219089Spjd * spa_close() should wait for it to complete. 6121219089Spjd */ 6122286705Smav for (uint64_t object = 1; object < 50; object++) { 6123286705Smav dmu_prefetch(spa->spa_meta_objset, object, 0, 0, 1ULL << 20, 6124286705Smav ZIO_PRIORITY_SYNC_READ); 6125286705Smav } 6126219089Spjd 6127219089Spjd spa_close(spa, FTAG); 6128219089Spjd 6129219089Spjd /* 6130219089Spjd * Verify that we can loop over all pools. 6131219089Spjd */ 6132219089Spjd mutex_enter(&spa_namespace_lock); 6133219089Spjd for (spa = spa_next(NULL); spa != NULL; spa = spa_next(spa)) 6134236143Smm if (ztest_opts.zo_verbose > 3) 6135219089Spjd (void) printf("spa_next: found %s\n", spa_name(spa)); 6136219089Spjd mutex_exit(&spa_namespace_lock); 6137219089Spjd 6138219089Spjd /* 6139219089Spjd * Verify that we can export the pool and reimport it under a 6140219089Spjd * different name. 6141219089Spjd */ 6142219089Spjd if (ztest_random(2) == 0) { 6143307108Smav char name[ZFS_MAX_DATASET_NAME_LEN]; 6144307108Smav (void) snprintf(name, sizeof (name), "%s_import", 6145236143Smm ztest_opts.zo_pool); 6146236143Smm ztest_spa_import_export(ztest_opts.zo_pool, name); 6147236143Smm ztest_spa_import_export(name, ztest_opts.zo_pool); 6148168404Spjd } 6149168404Spjd 6150219089Spjd kernel_fini(); 6151219089Spjd 6152219089Spjd list_destroy(&zcl.zcl_callbacks); 6153219089Spjd 6154332545Smav mutex_destroy(&zcl.zcl_callbacks_lock); 6155219089Spjd 6156332545Smav rw_destroy(&ztest_name_lock); 6157332545Smav mutex_destroy(&ztest_vdev_lock); 6158332547Smav mutex_destroy(&ztest_checkpoint_lock); 6159219089Spjd} 6160219089Spjd 6161219089Spjdstatic void 6162236143Smmztest_freeze(void) 6163219089Spjd{ 6164236143Smm ztest_ds_t *zd = &ztest_ds[0]; 6165219089Spjd spa_t *spa; 6166219089Spjd int numloops = 0; 6167219089Spjd 6168236143Smm if (ztest_opts.zo_verbose >= 3) 6169219089Spjd (void) printf("testing spa_freeze()...\n"); 6170168404Spjd 6171219089Spjd kernel_init(FREAD | FWRITE); 6172236143Smm VERIFY3U(0, ==, spa_open(ztest_opts.zo_pool, &spa, FTAG)); 6173236143Smm VERIFY3U(0, ==, ztest_dataset_open(0)); 6174243524Smm ztest_spa = spa; 6175168404Spjd 6176168404Spjd /* 6177219089Spjd * Force the first log block to be transactionally allocated. 6178219089Spjd * We have to do this before we freeze the pool -- otherwise 6179219089Spjd * the log chain won't be anchored. 6180168404Spjd */ 6181219089Spjd while (BP_IS_HOLE(&zd->zd_zilog->zl_header->zh_log)) { 6182219089Spjd ztest_dmu_object_alloc_free(zd, 0); 6183219089Spjd zil_commit(zd->zd_zilog, 0); 6184168404Spjd } 6185168404Spjd 6186168404Spjd txg_wait_synced(spa_get_dsl(spa), 0); 6187168404Spjd 6188219089Spjd /* 6189219089Spjd * Freeze the pool. This stops spa_sync() from doing anything, 6190219089Spjd * so that the only way to record changes from now on is the ZIL. 6191219089Spjd */ 6192219089Spjd spa_freeze(spa); 6193185029Spjd 6194219089Spjd /* 6195268855Sdelphij * Because it is hard to predict how much space a write will actually 6196268855Sdelphij * require beforehand, we leave ourselves some fudge space to write over 6197268855Sdelphij * capacity. 6198268855Sdelphij */ 6199268855Sdelphij uint64_t capacity = metaslab_class_get_space(spa_normal_class(spa)) / 2; 6200268855Sdelphij 6201268855Sdelphij /* 6202219089Spjd * Run tests that generate log records but don't alter the pool config 6203219089Spjd * or depend on DSL sync tasks (snapshots, objset create/destroy, etc). 6204219089Spjd * We do a txg_wait_synced() after each iteration to force the txg 6205219089Spjd * to increase well beyond the last synced value in the uberblock. 6206219089Spjd * The ZIL should be OK with that. 6207268855Sdelphij * 6208268855Sdelphij * Run a random number of times less than zo_maxloops and ensure we do 6209268855Sdelphij * not run out of space on the pool. 6210219089Spjd */ 6211236143Smm while (ztest_random(10) != 0 && 6212268855Sdelphij numloops++ < ztest_opts.zo_maxloops && 6213268855Sdelphij metaslab_class_get_alloc(spa_normal_class(spa)) < capacity) { 6214268855Sdelphij ztest_od_t od; 6215268855Sdelphij ztest_od_init(&od, 0, FTAG, 0, DMU_OT_UINT64_OTHER, 0, 0); 6216268855Sdelphij VERIFY0(ztest_object_init(zd, &od, sizeof (od), B_FALSE)); 6217268855Sdelphij ztest_io(zd, od.od_object, 6218268855Sdelphij ztest_random(ZTEST_RANGE_LOCKS) << SPA_MAXBLOCKSHIFT); 6219219089Spjd txg_wait_synced(spa_get_dsl(spa), 0); 6220219089Spjd } 6221185029Spjd 6222168404Spjd /* 6223219089Spjd * Commit all of the changes we just generated. 6224168404Spjd */ 6225219089Spjd zil_commit(zd->zd_zilog, 0); 6226219089Spjd txg_wait_synced(spa_get_dsl(spa), 0); 6227168404Spjd 6228219089Spjd /* 6229219089Spjd * Close our dataset and close the pool. 6230219089Spjd */ 6231236143Smm ztest_dataset_close(0); 6232168404Spjd spa_close(spa, FTAG); 6233219089Spjd kernel_fini(); 6234168404Spjd 6235219089Spjd /* 6236219089Spjd * Open and close the pool and dataset to induce log replay. 6237219089Spjd */ 6238219089Spjd kernel_init(FREAD | FWRITE); 6239236143Smm VERIFY3U(0, ==, spa_open(ztest_opts.zo_pool, &spa, FTAG)); 6240239620Smm ASSERT(spa_freeze_txg(spa) == UINT64_MAX); 6241236143Smm VERIFY3U(0, ==, ztest_dataset_open(0)); 6242236143Smm ztest_dataset_close(0); 6243239620Smm 6244239620Smm ztest_spa = spa; 6245239620Smm txg_wait_synced(spa_get_dsl(spa), 0); 6246239620Smm ztest_reguid(NULL, 0); 6247239620Smm 6248219089Spjd spa_close(spa, FTAG); 6249168404Spjd kernel_fini(); 6250168404Spjd} 6251168404Spjd 6252168404Spjdvoid 6253168404Spjdprint_time(hrtime_t t, char *timebuf) 6254168404Spjd{ 6255168404Spjd hrtime_t s = t / NANOSEC; 6256168404Spjd hrtime_t m = s / 60; 6257168404Spjd hrtime_t h = m / 60; 6258168404Spjd hrtime_t d = h / 24; 6259168404Spjd 6260168404Spjd s -= m * 60; 6261168404Spjd m -= h * 60; 6262168404Spjd h -= d * 24; 6263168404Spjd 6264168404Spjd timebuf[0] = '\0'; 6265168404Spjd 6266168404Spjd if (d) 6267168404Spjd (void) sprintf(timebuf, 6268168404Spjd "%llud%02lluh%02llum%02llus", d, h, m, s); 6269168404Spjd else if (h) 6270168404Spjd (void) sprintf(timebuf, "%lluh%02llum%02llus", h, m, s); 6271168404Spjd else if (m) 6272168404Spjd (void) sprintf(timebuf, "%llum%02llus", m, s); 6273168404Spjd else 6274168404Spjd (void) sprintf(timebuf, "%llus", s); 6275168404Spjd} 6276168404Spjd 6277219089Spjdstatic nvlist_t * 6278219089Spjdmake_random_props() 6279219089Spjd{ 6280219089Spjd nvlist_t *props; 6281219089Spjd 6282236884Smm VERIFY(nvlist_alloc(&props, NV_UNIQUE_NAME, 0) == 0); 6283219089Spjd if (ztest_random(2) == 0) 6284236884Smm return (props); 6285219089Spjd VERIFY(nvlist_add_uint64(props, "autoreplace", 1) == 0); 6286219089Spjd 6287219089Spjd return (props); 6288219089Spjd} 6289219089Spjd 6290168404Spjd/* 6291168404Spjd * Create a storage pool with the given name and initial vdev size. 6292219089Spjd * Then test spa_freeze() functionality. 6293168404Spjd */ 6294168404Spjdstatic void 6295219089Spjdztest_init(ztest_shared_t *zs) 6296168404Spjd{ 6297168404Spjd spa_t *spa; 6298219089Spjd nvlist_t *nvroot, *props; 6299168404Spjd 6300332545Smav mutex_init(&ztest_vdev_lock, NULL, USYNC_THREAD, NULL); 6301332547Smav mutex_init(&ztest_checkpoint_lock, NULL, USYNC_THREAD, NULL); 6302332545Smav rw_init(&ztest_name_lock, NULL, USYNC_THREAD, NULL); 6303219089Spjd 6304168404Spjd kernel_init(FREAD | FWRITE); 6305168404Spjd 6306168404Spjd /* 6307168404Spjd * Create the storage pool. 6308168404Spjd */ 6309236143Smm (void) spa_destroy(ztest_opts.zo_pool); 6310219089Spjd ztest_shared->zs_vdev_next_leaf = 0; 6311219089Spjd zs->zs_splits = 0; 6312236143Smm zs->zs_mirrors = ztest_opts.zo_mirrors; 6313243505Smm nvroot = make_vdev_root(NULL, NULL, NULL, ztest_opts.zo_vdev_size, 0, 6314236143Smm 0, ztest_opts.zo_raidz, zs->zs_mirrors, 1); 6315219089Spjd props = make_random_props(); 6316236884Smm for (int i = 0; i < SPA_FEATURES; i++) { 6317236884Smm char buf[1024]; 6318236884Smm (void) snprintf(buf, sizeof (buf), "feature@%s", 6319236884Smm spa_feature_table[i].fi_uname); 6320236884Smm VERIFY3U(0, ==, nvlist_add_uint64(props, buf, 0)); 6321236884Smm } 6322248571Smm VERIFY3U(0, ==, spa_create(ztest_opts.zo_pool, nvroot, props, NULL)); 6323168404Spjd nvlist_free(nvroot); 6324286737Sdelphij nvlist_free(props); 6325168404Spjd 6326236143Smm VERIFY3U(0, ==, spa_open(ztest_opts.zo_pool, &spa, FTAG)); 6327236143Smm zs->zs_metaslab_sz = 6328236143Smm 1ULL << spa->spa_root_vdev->vdev_child[0]->vdev_ms_shift; 6329236884Smm 6330219089Spjd spa_close(spa, FTAG); 6331209962Smm 6332219089Spjd kernel_fini(); 6333168404Spjd 6334236143Smm ztest_run_zdb(ztest_opts.zo_pool); 6335168404Spjd 6336236143Smm ztest_freeze(); 6337219089Spjd 6338236143Smm ztest_run_zdb(ztest_opts.zo_pool); 6339219089Spjd 6340332545Smav rw_destroy(&ztest_name_lock); 6341332545Smav mutex_destroy(&ztest_vdev_lock); 6342332547Smav mutex_destroy(&ztest_checkpoint_lock); 6343168404Spjd} 6344168404Spjd 6345236143Smmstatic void 6346242845Sdelphijsetup_data_fd(void) 6347236143Smm{ 6348242845Sdelphij static char ztest_name_data[] = "/tmp/ztest.data.XXXXXX"; 6349236143Smm 6350242845Sdelphij ztest_fd_data = mkstemp(ztest_name_data); 6351242845Sdelphij ASSERT3S(ztest_fd_data, >=, 0); 6352242845Sdelphij (void) unlink(ztest_name_data); 6353242845Sdelphij} 6354236143Smm 6355236143Smm 6356236884Smmstatic int 6357236884Smmshared_data_size(ztest_shared_hdr_t *hdr) 6358236884Smm{ 6359236884Smm int size; 6360236884Smm 6361236884Smm size = hdr->zh_hdr_size; 6362236884Smm size += hdr->zh_opts_size; 6363236884Smm size += hdr->zh_size; 6364236884Smm size += hdr->zh_stats_size * hdr->zh_stats_count; 6365236884Smm size += hdr->zh_ds_size * hdr->zh_ds_count; 6366236884Smm 6367236884Smm return (size); 6368236884Smm} 6369236884Smm 6370236143Smmstatic void 6371236143Smmsetup_hdr(void) 6372236143Smm{ 6373236884Smm int size; 6374236143Smm ztest_shared_hdr_t *hdr; 6375236143Smm 6376236143Smm hdr = (void *)mmap(0, P2ROUNDUP(sizeof (*hdr), getpagesize()), 6377242845Sdelphij PROT_READ | PROT_WRITE, MAP_SHARED, ztest_fd_data, 0); 6378236143Smm ASSERT(hdr != MAP_FAILED); 6379236143Smm 6380242845Sdelphij VERIFY3U(0, ==, ftruncate(ztest_fd_data, sizeof (ztest_shared_hdr_t))); 6381236884Smm 6382236143Smm hdr->zh_hdr_size = sizeof (ztest_shared_hdr_t); 6383236143Smm hdr->zh_opts_size = sizeof (ztest_shared_opts_t); 6384236143Smm hdr->zh_size = sizeof (ztest_shared_t); 6385236143Smm hdr->zh_stats_size = sizeof (ztest_shared_callstate_t); 6386236143Smm hdr->zh_stats_count = ZTEST_FUNCS; 6387236143Smm hdr->zh_ds_size = sizeof (ztest_shared_ds_t); 6388236143Smm hdr->zh_ds_count = ztest_opts.zo_datasets; 6389236143Smm 6390236884Smm size = shared_data_size(hdr); 6391242845Sdelphij VERIFY3U(0, ==, ftruncate(ztest_fd_data, size)); 6392236884Smm 6393236143Smm (void) munmap((caddr_t)hdr, P2ROUNDUP(sizeof (*hdr), getpagesize())); 6394236143Smm} 6395236143Smm 6396236143Smmstatic void 6397236143Smmsetup_data(void) 6398236143Smm{ 6399236143Smm int size, offset; 6400236143Smm ztest_shared_hdr_t *hdr; 6401236143Smm uint8_t *buf; 6402236143Smm 6403236143Smm hdr = (void *)mmap(0, P2ROUNDUP(sizeof (*hdr), getpagesize()), 6404242845Sdelphij PROT_READ, MAP_SHARED, ztest_fd_data, 0); 6405236143Smm ASSERT(hdr != MAP_FAILED); 6406236143Smm 6407236884Smm size = shared_data_size(hdr); 6408236143Smm 6409236143Smm (void) munmap((caddr_t)hdr, P2ROUNDUP(sizeof (*hdr), getpagesize())); 6410236143Smm hdr = ztest_shared_hdr = (void *)mmap(0, P2ROUNDUP(size, getpagesize()), 6411242845Sdelphij PROT_READ | PROT_WRITE, MAP_SHARED, ztest_fd_data, 0); 6412236143Smm ASSERT(hdr != MAP_FAILED); 6413236143Smm buf = (uint8_t *)hdr; 6414236143Smm 6415236143Smm offset = hdr->zh_hdr_size; 6416236143Smm ztest_shared_opts = (void *)&buf[offset]; 6417236143Smm offset += hdr->zh_opts_size; 6418236143Smm ztest_shared = (void *)&buf[offset]; 6419236143Smm offset += hdr->zh_size; 6420236143Smm ztest_shared_callstate = (void *)&buf[offset]; 6421236143Smm offset += hdr->zh_stats_size * hdr->zh_stats_count; 6422236143Smm ztest_shared_ds = (void *)&buf[offset]; 6423236143Smm} 6424236143Smm 6425236143Smmstatic boolean_t 6426236143Smmexec_child(char *cmd, char *libpath, boolean_t ignorekill, int *statusp) 6427236143Smm{ 6428236143Smm pid_t pid; 6429236143Smm int status; 6430242845Sdelphij char *cmdbuf = NULL; 6431236143Smm 6432236143Smm pid = fork(); 6433236143Smm 6434236143Smm if (cmd == NULL) { 6435242845Sdelphij cmdbuf = umem_alloc(MAXPATHLEN, UMEM_NOFAIL); 6436242845Sdelphij (void) strlcpy(cmdbuf, getexecname(), MAXPATHLEN); 6437236143Smm cmd = cmdbuf; 6438236143Smm } 6439236143Smm 6440236143Smm if (pid == -1) 6441236143Smm fatal(1, "fork failed"); 6442236143Smm 6443236143Smm if (pid == 0) { /* child */ 6444236143Smm char *emptyargv[2] = { cmd, NULL }; 6445242845Sdelphij char fd_data_str[12]; 6446236143Smm 6447236143Smm struct rlimit rl = { 1024, 1024 }; 6448236143Smm (void) setrlimit(RLIMIT_NOFILE, &rl); 6449242845Sdelphij 6450242845Sdelphij (void) close(ztest_fd_rand); 6451242845Sdelphij VERIFY3U(11, >=, 6452242845Sdelphij snprintf(fd_data_str, 12, "%d", ztest_fd_data)); 6453242845Sdelphij VERIFY0(setenv("ZTEST_FD_DATA", fd_data_str, 1)); 6454242845Sdelphij 6455236143Smm (void) enable_extended_FILE_stdio(-1, -1); 6456236143Smm if (libpath != NULL) 6457236143Smm VERIFY(0 == setenv("LD_LIBRARY_PATH", libpath, 1)); 6458236143Smm#ifdef illumos 6459236143Smm (void) execv(cmd, emptyargv); 6460236143Smm#else 6461236143Smm (void) execvp(cmd, emptyargv); 6462236143Smm#endif 6463236143Smm ztest_dump_core = B_FALSE; 6464236143Smm fatal(B_TRUE, "exec failed: %s", cmd); 6465236143Smm } 6466236143Smm 6467242845Sdelphij if (cmdbuf != NULL) { 6468242845Sdelphij umem_free(cmdbuf, MAXPATHLEN); 6469242845Sdelphij cmd = NULL; 6470242845Sdelphij } 6471242845Sdelphij 6472236143Smm while (waitpid(pid, &status, 0) != pid) 6473236143Smm continue; 6474236143Smm if (statusp != NULL) 6475236143Smm *statusp = status; 6476236143Smm 6477236143Smm if (WIFEXITED(status)) { 6478236143Smm if (WEXITSTATUS(status) != 0) { 6479236143Smm (void) fprintf(stderr, "child exited with code %d\n", 6480236143Smm WEXITSTATUS(status)); 6481236143Smm exit(2); 6482236143Smm } 6483236143Smm return (B_FALSE); 6484236143Smm } else if (WIFSIGNALED(status)) { 6485236143Smm if (!ignorekill || WTERMSIG(status) != SIGKILL) { 6486236143Smm (void) fprintf(stderr, "child died with signal %d\n", 6487236143Smm WTERMSIG(status)); 6488236143Smm exit(3); 6489236143Smm } 6490236143Smm return (B_TRUE); 6491236143Smm } else { 6492236143Smm (void) fprintf(stderr, "something strange happened to child\n"); 6493236143Smm exit(4); 6494236143Smm /* NOTREACHED */ 6495236143Smm } 6496236143Smm} 6497236143Smm 6498236143Smmstatic void 6499236143Smmztest_run_init(void) 6500236143Smm{ 6501236143Smm ztest_shared_t *zs = ztest_shared; 6502236143Smm 6503236143Smm ASSERT(ztest_opts.zo_init != 0); 6504236143Smm 6505236143Smm /* 6506236143Smm * Blow away any existing copy of zpool.cache 6507236143Smm */ 6508236143Smm (void) remove(spa_config_path); 6509236143Smm 6510236143Smm /* 6511236143Smm * Create and initialize our storage pool. 6512236143Smm */ 6513236143Smm for (int i = 1; i <= ztest_opts.zo_init; i++) { 6514236143Smm bzero(zs, sizeof (ztest_shared_t)); 6515236143Smm if (ztest_opts.zo_verbose >= 3 && 6516236143Smm ztest_opts.zo_init != 1) { 6517236143Smm (void) printf("ztest_init(), pass %d\n", i); 6518236143Smm } 6519236143Smm ztest_init(zs); 6520236143Smm } 6521236143Smm} 6522236143Smm 6523168404Spjdint 6524168404Spjdmain(int argc, char **argv) 6525168404Spjd{ 6526168404Spjd int kills = 0; 6527168404Spjd int iters = 0; 6528236143Smm int older = 0; 6529236143Smm int newer = 0; 6530168404Spjd ztest_shared_t *zs; 6531168404Spjd ztest_info_t *zi; 6532236143Smm ztest_shared_callstate_t *zc; 6533168404Spjd char timebuf[100]; 6534325914Savg char numbuf[NN_NUMBUF_SZ]; 6535219089Spjd spa_t *spa; 6536242845Sdelphij char *cmd; 6537236143Smm boolean_t hasalt; 6538242845Sdelphij char *fd_data_str = getenv("ZTEST_FD_DATA"); 6539168404Spjd 6540168404Spjd (void) setvbuf(stdout, NULL, _IOLBF, 0); 6541168404Spjd 6542240133Smm dprintf_setup(&argc, argv); 6543258632Savg zfs_deadman_synctime_ms = 300000; 6544339104Smav /* 6545339104Smav * As two-word space map entries may not come up often (especially 6546339104Smav * if pool and vdev sizes are small) we want to force at least some 6547339104Smav * of them so the feature get tested. 6548339104Smav */ 6549339104Smav zfs_force_some_double_word_sm_entries = B_TRUE; 6550240133Smm 6551242845Sdelphij ztest_fd_rand = open("/dev/urandom", O_RDONLY); 6552242845Sdelphij ASSERT3S(ztest_fd_rand, >=, 0); 6553242845Sdelphij 6554242845Sdelphij if (!fd_data_str) { 6555236143Smm process_options(argc, argv); 6556168404Spjd 6557242845Sdelphij setup_data_fd(); 6558236143Smm setup_hdr(); 6559236143Smm setup_data(); 6560236143Smm bcopy(&ztest_opts, ztest_shared_opts, 6561236143Smm sizeof (*ztest_shared_opts)); 6562236143Smm } else { 6563242845Sdelphij ztest_fd_data = atoi(fd_data_str); 6564236143Smm setup_data(); 6565236143Smm bcopy(ztest_shared_opts, &ztest_opts, sizeof (ztest_opts)); 6566236143Smm } 6567236143Smm ASSERT3U(ztest_opts.zo_datasets, ==, ztest_shared_hdr->zh_ds_count); 6568168404Spjd 6569219089Spjd /* Override location of zpool.cache */ 6570242845Sdelphij VERIFY3U(asprintf((char **)&spa_config_path, "%s/zpool.cache", 6571242845Sdelphij ztest_opts.zo_dir), !=, -1); 6572219089Spjd 6573236143Smm ztest_ds = umem_alloc(ztest_opts.zo_datasets * sizeof (ztest_ds_t), 6574236143Smm UMEM_NOFAIL); 6575236143Smm zs = ztest_shared; 6576168404Spjd 6577242845Sdelphij if (fd_data_str) { 6578332553Smav metaslab_force_ganging = ztest_opts.zo_metaslab_force_ganging; 6579236143Smm metaslab_df_alloc_threshold = 6580236143Smm zs->zs_metaslab_df_alloc_threshold; 6581219089Spjd 6582236143Smm if (zs->zs_do_init) 6583236143Smm ztest_run_init(); 6584236143Smm else 6585236143Smm ztest_run(zs); 6586236143Smm exit(0); 6587236143Smm } 6588168404Spjd 6589236143Smm hasalt = (strlen(ztest_opts.zo_alt_ztest) != 0); 6590236143Smm 6591236143Smm if (ztest_opts.zo_verbose >= 1) { 6592168404Spjd (void) printf("%llu vdevs, %d datasets, %d threads," 6593168404Spjd " %llu seconds...\n", 6594236143Smm (u_longlong_t)ztest_opts.zo_vdevs, 6595236143Smm ztest_opts.zo_datasets, 6596236143Smm ztest_opts.zo_threads, 6597236143Smm (u_longlong_t)ztest_opts.zo_time); 6598168404Spjd } 6599168404Spjd 6600242845Sdelphij cmd = umem_alloc(MAXNAMELEN, UMEM_NOFAIL); 6601242845Sdelphij (void) strlcpy(cmd, getexecname(), MAXNAMELEN); 6602236143Smm 6603236143Smm zs->zs_do_init = B_TRUE; 6604236143Smm if (strlen(ztest_opts.zo_alt_ztest) != 0) { 6605236143Smm if (ztest_opts.zo_verbose >= 1) { 6606236143Smm (void) printf("Executing older ztest for " 6607236143Smm "initialization: %s\n", ztest_opts.zo_alt_ztest); 6608236143Smm } 6609236143Smm VERIFY(!exec_child(ztest_opts.zo_alt_ztest, 6610236143Smm ztest_opts.zo_alt_libpath, B_FALSE, NULL)); 6611236143Smm } else { 6612236143Smm VERIFY(!exec_child(NULL, NULL, B_FALSE, NULL)); 6613168404Spjd } 6614236143Smm zs->zs_do_init = B_FALSE; 6615168404Spjd 6616219089Spjd zs->zs_proc_start = gethrtime(); 6617236143Smm zs->zs_proc_stop = zs->zs_proc_start + ztest_opts.zo_time * NANOSEC; 6618219089Spjd 6619219089Spjd for (int f = 0; f < ZTEST_FUNCS; f++) { 6620236143Smm zi = &ztest_info[f]; 6621236143Smm zc = ZTEST_GET_SHARED_CALLSTATE(f); 6622219089Spjd if (zs->zs_proc_start + zi->zi_interval[0] > zs->zs_proc_stop) 6623236143Smm zc->zc_next = UINT64_MAX; 6624168404Spjd else 6625236143Smm zc->zc_next = zs->zs_proc_start + 6626219089Spjd ztest_random(2 * zi->zi_interval[0] + 1); 6627168404Spjd } 6628168404Spjd 6629168404Spjd /* 6630168404Spjd * Run the tests in a loop. These tests include fault injection 6631168404Spjd * to verify that self-healing data works, and forced crashes 6632168404Spjd * to verify that we never lose on-disk consistency. 6633168404Spjd */ 6634219089Spjd while (gethrtime() < zs->zs_proc_stop) { 6635168404Spjd int status; 6636236143Smm boolean_t killed; 6637168404Spjd 6638168404Spjd /* 6639168404Spjd * Initialize the workload counters for each function. 6640168404Spjd */ 6641219089Spjd for (int f = 0; f < ZTEST_FUNCS; f++) { 6642236143Smm zc = ZTEST_GET_SHARED_CALLSTATE(f); 6643236143Smm zc->zc_count = 0; 6644236143Smm zc->zc_time = 0; 6645168404Spjd } 6646168404Spjd 6647209962Smm /* Set the allocation switch size */ 6648236143Smm zs->zs_metaslab_df_alloc_threshold = 6649236143Smm ztest_random(zs->zs_metaslab_sz / 4) + 1; 6650209962Smm 6651236143Smm if (!hasalt || ztest_random(2) == 0) { 6652236143Smm if (hasalt && ztest_opts.zo_verbose >= 1) { 6653236143Smm (void) printf("Executing newer ztest: %s\n", 6654236143Smm cmd); 6655168404Spjd } 6656236143Smm newer++; 6657236143Smm killed = exec_child(cmd, NULL, B_TRUE, &status); 6658236143Smm } else { 6659236143Smm if (hasalt && ztest_opts.zo_verbose >= 1) { 6660236143Smm (void) printf("Executing older ztest: %s\n", 6661236143Smm ztest_opts.zo_alt_ztest); 6662168404Spjd } 6663236143Smm older++; 6664236143Smm killed = exec_child(ztest_opts.zo_alt_ztest, 6665236143Smm ztest_opts.zo_alt_libpath, B_TRUE, &status); 6666168404Spjd } 6667168404Spjd 6668236143Smm if (killed) 6669236143Smm kills++; 6670168404Spjd iters++; 6671168404Spjd 6672236143Smm if (ztest_opts.zo_verbose >= 1) { 6673168404Spjd hrtime_t now = gethrtime(); 6674168404Spjd 6675219089Spjd now = MIN(now, zs->zs_proc_stop); 6676219089Spjd print_time(zs->zs_proc_stop - now, timebuf); 6677325914Savg nicenum(zs->zs_space, numbuf, sizeof (numbuf)); 6678168404Spjd 6679168404Spjd (void) printf("Pass %3d, %8s, %3llu ENOSPC, " 6680168404Spjd "%4.1f%% of %5s used, %3.0f%% done, %8s to go\n", 6681168404Spjd iters, 6682168404Spjd WIFEXITED(status) ? "Complete" : "SIGKILL", 6683168404Spjd (u_longlong_t)zs->zs_enospc_count, 6684168404Spjd 100.0 * zs->zs_alloc / zs->zs_space, 6685168404Spjd numbuf, 6686219089Spjd 100.0 * (now - zs->zs_proc_start) / 6687236143Smm (ztest_opts.zo_time * NANOSEC), timebuf); 6688168404Spjd } 6689168404Spjd 6690236143Smm if (ztest_opts.zo_verbose >= 2) { 6691168404Spjd (void) printf("\nWorkload summary:\n\n"); 6692168404Spjd (void) printf("%7s %9s %s\n", 6693168404Spjd "Calls", "Time", "Function"); 6694168404Spjd (void) printf("%7s %9s %s\n", 6695168404Spjd "-----", "----", "--------"); 6696219089Spjd for (int f = 0; f < ZTEST_FUNCS; f++) { 6697168404Spjd Dl_info dli; 6698168404Spjd 6699236143Smm zi = &ztest_info[f]; 6700236143Smm zc = ZTEST_GET_SHARED_CALLSTATE(f); 6701236143Smm print_time(zc->zc_time, timebuf); 6702168404Spjd (void) dladdr((void *)zi->zi_func, &dli); 6703168404Spjd (void) printf("%7llu %9s %s\n", 6704236143Smm (u_longlong_t)zc->zc_count, timebuf, 6705168404Spjd dli.dli_sname); 6706168404Spjd } 6707168404Spjd (void) printf("\n"); 6708168404Spjd } 6709168404Spjd 6710168404Spjd /* 6711219089Spjd * It's possible that we killed a child during a rename test, 6712219089Spjd * in which case we'll have a 'ztest_tmp' pool lying around 6713219089Spjd * instead of 'ztest'. Do a blind rename in case this happened. 6714168404Spjd */ 6715219089Spjd kernel_init(FREAD); 6716236143Smm if (spa_open(ztest_opts.zo_pool, &spa, FTAG) == 0) { 6717219089Spjd spa_close(spa, FTAG); 6718219089Spjd } else { 6719307108Smav char tmpname[ZFS_MAX_DATASET_NAME_LEN]; 6720219089Spjd kernel_fini(); 6721219089Spjd kernel_init(FREAD | FWRITE); 6722219089Spjd (void) snprintf(tmpname, sizeof (tmpname), "%s_tmp", 6723236143Smm ztest_opts.zo_pool); 6724236143Smm (void) spa_rename(tmpname, ztest_opts.zo_pool); 6725219089Spjd } 6726168404Spjd kernel_fini(); 6727219089Spjd 6728236143Smm ztest_run_zdb(ztest_opts.zo_pool); 6729168404Spjd } 6730168404Spjd 6731236143Smm if (ztest_opts.zo_verbose >= 1) { 6732236143Smm if (hasalt) { 6733236143Smm (void) printf("%d runs of older ztest: %s\n", older, 6734236143Smm ztest_opts.zo_alt_ztest); 6735236143Smm (void) printf("%d runs of newer ztest: %s\n", newer, 6736236143Smm cmd); 6737236143Smm } 6738168404Spjd (void) printf("%d killed, %d completed, %.0f%% kill rate\n", 6739168404Spjd kills, iters - kills, (100.0 * kills) / MAX(1, iters)); 6740168404Spjd } 6741168404Spjd 6742242845Sdelphij umem_free(cmd, MAXNAMELEN); 6743242845Sdelphij 6744168404Spjd return (0); 6745168404Spjd} 6746