ztest.c revision 236884
1168404Spjd/* 2168404Spjd * CDDL HEADER START 3168404Spjd * 4168404Spjd * The contents of this file are subject to the terms of the 5168404Spjd * Common Development and Distribution License (the "License"). 6168404Spjd * You may not use this file except in compliance with the License. 7168404Spjd * 8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9168404Spjd * or http://www.opensolaris.org/os/licensing. 10168404Spjd * See the License for the specific language governing permissions 11168404Spjd * and limitations under the License. 12168404Spjd * 13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each 14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15168404Spjd * If applicable, add the following below this CDDL HEADER, with the 16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18168404Spjd * 19168404Spjd * CDDL HEADER END 20168404Spjd */ 21168404Spjd/* 22219089Spjd * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23236143Smm * Copyright (c) 2012 by Delphix. All rights reserved. 24228103Smm * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 25236143Smm * Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org>. All rights reserved. 26168404Spjd */ 27168404Spjd 28168404Spjd/* 29168404Spjd * The objective of this program is to provide a DMU/ZAP/SPA stress test 30168404Spjd * that runs entirely in userland, is easy to use, and easy to extend. 31168404Spjd * 32168404Spjd * The overall design of the ztest program is as follows: 33168404Spjd * 34168404Spjd * (1) For each major functional area (e.g. adding vdevs to a pool, 35168404Spjd * creating and destroying datasets, reading and writing objects, etc) 36168404Spjd * we have a simple routine to test that functionality. These 37168404Spjd * individual routines do not have to do anything "stressful". 38168404Spjd * 39168404Spjd * (2) We turn these simple functionality tests into a stress test by 40168404Spjd * running them all in parallel, with as many threads as desired, 41168404Spjd * and spread across as many datasets, objects, and vdevs as desired. 42168404Spjd * 43168404Spjd * (3) While all this is happening, we inject faults into the pool to 44168404Spjd * verify that self-healing data really works. 45168404Spjd * 46168404Spjd * (4) Every time we open a dataset, we change its checksum and compression 47168404Spjd * functions. Thus even individual objects vary from block to block 48168404Spjd * in which checksum they use and whether they're compressed. 49168404Spjd * 50168404Spjd * (5) To verify that we never lose on-disk consistency after a crash, 51168404Spjd * we run the entire test in a child of the main process. 52168404Spjd * At random times, the child self-immolates with a SIGKILL. 53168404Spjd * This is the software equivalent of pulling the power cord. 54168404Spjd * The parent then runs the test again, using the existing 55236143Smm * storage pool, as many times as desired. If backwards compatability 56236143Smm * testing is enabled ztest will sometimes run the "older" version 57236143Smm * of ztest after a SIGKILL. 58168404Spjd * 59168404Spjd * (6) To verify that we don't have future leaks or temporal incursions, 60168404Spjd * many of the functional tests record the transaction group number 61168404Spjd * as part of their data. When reading old data, they verify that 62168404Spjd * the transaction group number is less than the current, open txg. 63168404Spjd * If you add a new test, please do this if applicable. 64168404Spjd * 65168404Spjd * When run with no arguments, ztest runs for about five minutes and 66168404Spjd * produces no output if successful. To get a little bit of information, 67168404Spjd * specify -V. To get more information, specify -VV, and so on. 68168404Spjd * 69168404Spjd * To turn this into an overnight stress test, use -T to specify run time. 70168404Spjd * 71168404Spjd * You can ask more more vdevs [-v], datasets [-d], or threads [-t] 72168404Spjd * to increase the pool capacity, fanout, and overall stress level. 73168404Spjd * 74236143Smm * Use the -k option to set the desired frequency of kills. 75236143Smm * 76236143Smm * When ztest invokes itself it passes all relevant information through a 77236143Smm * temporary file which is mmap-ed in the child process. This allows shared 78236143Smm * memory to survive the exec syscall. The ztest_shared_hdr_t struct is always 79236143Smm * stored at offset 0 of this file and contains information on the size and 80236143Smm * number of shared structures in the file. The information stored in this file 81236143Smm * must remain backwards compatible with older versions of ztest so that 82236143Smm * ztest can invoke them during backwards compatibility testing (-B). 83168404Spjd */ 84168404Spjd 85168404Spjd#include <sys/zfs_context.h> 86168404Spjd#include <sys/spa.h> 87168404Spjd#include <sys/dmu.h> 88168404Spjd#include <sys/txg.h> 89209962Smm#include <sys/dbuf.h> 90168404Spjd#include <sys/zap.h> 91168404Spjd#include <sys/dmu_objset.h> 92168404Spjd#include <sys/poll.h> 93168404Spjd#include <sys/stat.h> 94168404Spjd#include <sys/time.h> 95168404Spjd#include <sys/wait.h> 96168404Spjd#include <sys/mman.h> 97168404Spjd#include <sys/resource.h> 98168404Spjd#include <sys/zio.h> 99168404Spjd#include <sys/zil.h> 100219089Spjd#include <sys/zil_impl.h> 101168404Spjd#include <sys/vdev_impl.h> 102185029Spjd#include <sys/vdev_file.h> 103168404Spjd#include <sys/spa_impl.h> 104219089Spjd#include <sys/metaslab_impl.h> 105168404Spjd#include <sys/dsl_prop.h> 106207910Smm#include <sys/dsl_dataset.h> 107219089Spjd#include <sys/dsl_scan.h> 108219089Spjd#include <sys/zio_checksum.h> 109168404Spjd#include <sys/refcount.h> 110236884Smm#include <sys/zfeature.h> 111168404Spjd#include <stdio.h> 112168404Spjd#include <stdio_ext.h> 113168404Spjd#include <stdlib.h> 114168404Spjd#include <unistd.h> 115168404Spjd#include <signal.h> 116168404Spjd#include <umem.h> 117168404Spjd#include <dlfcn.h> 118168404Spjd#include <ctype.h> 119168404Spjd#include <math.h> 120168404Spjd#include <errno.h> 121168404Spjd#include <sys/fs/zfs.h> 122219089Spjd#include <libnvpair.h> 123168404Spjd 124236143Smm#define ZTEST_FD_DATA 3 125236143Smm#define ZTEST_FD_RAND 4 126168404Spjd 127236143Smmtypedef struct ztest_shared_hdr { 128236143Smm uint64_t zh_hdr_size; 129236143Smm uint64_t zh_opts_size; 130236143Smm uint64_t zh_size; 131236143Smm uint64_t zh_stats_size; 132236143Smm uint64_t zh_stats_count; 133236143Smm uint64_t zh_ds_size; 134236143Smm uint64_t zh_ds_count; 135236143Smm} ztest_shared_hdr_t; 136168404Spjd 137236143Smmstatic ztest_shared_hdr_t *ztest_shared_hdr; 138236143Smm 139236143Smmtypedef struct ztest_shared_opts { 140236143Smm char zo_pool[MAXNAMELEN]; 141236143Smm char zo_dir[MAXNAMELEN]; 142236143Smm char zo_alt_ztest[MAXNAMELEN]; 143236143Smm char zo_alt_libpath[MAXNAMELEN]; 144236143Smm uint64_t zo_vdevs; 145236143Smm uint64_t zo_vdevtime; 146236143Smm size_t zo_vdev_size; 147236143Smm int zo_ashift; 148236143Smm int zo_mirrors; 149236143Smm int zo_raidz; 150236143Smm int zo_raidz_parity; 151236143Smm int zo_datasets; 152236143Smm int zo_threads; 153236143Smm uint64_t zo_passtime; 154236143Smm uint64_t zo_killrate; 155236143Smm int zo_verbose; 156236143Smm int zo_init; 157236143Smm uint64_t zo_time; 158236143Smm uint64_t zo_maxloops; 159236143Smm uint64_t zo_metaslab_gang_bang; 160236143Smm} ztest_shared_opts_t; 161236143Smm 162236143Smmstatic const ztest_shared_opts_t ztest_opts_defaults = { 163236143Smm .zo_pool = { 'z', 't', 'e', 's', 't', '\0' }, 164236143Smm .zo_dir = { '/', 't', 'm', 'p', '\0' }, 165236143Smm .zo_alt_ztest = { '\0' }, 166236143Smm .zo_alt_libpath = { '\0' }, 167236143Smm .zo_vdevs = 5, 168236143Smm .zo_ashift = SPA_MINBLOCKSHIFT, 169236143Smm .zo_mirrors = 2, 170236143Smm .zo_raidz = 4, 171236143Smm .zo_raidz_parity = 1, 172236143Smm .zo_vdev_size = SPA_MINDEVSIZE, 173236143Smm .zo_datasets = 7, 174236143Smm .zo_threads = 23, 175236143Smm .zo_passtime = 60, /* 60 seconds */ 176236143Smm .zo_killrate = 70, /* 70% kill rate */ 177236143Smm .zo_verbose = 0, 178236143Smm .zo_init = 1, 179236143Smm .zo_time = 300, /* 5 minutes */ 180236143Smm .zo_maxloops = 50, /* max loops during spa_freeze() */ 181236143Smm .zo_metaslab_gang_bang = 32 << 10 182236143Smm}; 183236143Smm 184236143Smmextern uint64_t metaslab_gang_bang; 185236143Smmextern uint64_t metaslab_df_alloc_threshold; 186236143Smm 187236143Smmstatic ztest_shared_opts_t *ztest_shared_opts; 188236143Smmstatic ztest_shared_opts_t ztest_opts; 189236143Smm 190236143Smmtypedef struct ztest_shared_ds { 191236143Smm uint64_t zd_seq; 192236143Smm} ztest_shared_ds_t; 193236143Smm 194236143Smmstatic ztest_shared_ds_t *ztest_shared_ds; 195236143Smm#define ZTEST_GET_SHARED_DS(d) (&ztest_shared_ds[d]) 196236143Smm 197219089Spjd#define BT_MAGIC 0x123456789abcdefULL 198236143Smm#define MAXFAULTS() \ 199236143Smm (MAX(zs->zs_mirrors, 1) * (ztest_opts.zo_raidz_parity + 1) - 1) 200219089Spjd 201219089Spjdenum ztest_io_type { 202219089Spjd ZTEST_IO_WRITE_TAG, 203219089Spjd ZTEST_IO_WRITE_PATTERN, 204219089Spjd ZTEST_IO_WRITE_ZEROES, 205219089Spjd ZTEST_IO_TRUNCATE, 206219089Spjd ZTEST_IO_SETATTR, 207219089Spjd ZTEST_IO_TYPES 208219089Spjd}; 209219089Spjd 210185029Spjdtypedef struct ztest_block_tag { 211219089Spjd uint64_t bt_magic; 212185029Spjd uint64_t bt_objset; 213185029Spjd uint64_t bt_object; 214185029Spjd uint64_t bt_offset; 215219089Spjd uint64_t bt_gen; 216185029Spjd uint64_t bt_txg; 217219089Spjd uint64_t bt_crtxg; 218185029Spjd} ztest_block_tag_t; 219185029Spjd 220219089Spjdtypedef struct bufwad { 221219089Spjd uint64_t bw_index; 222219089Spjd uint64_t bw_txg; 223219089Spjd uint64_t bw_data; 224219089Spjd} bufwad_t; 225168404Spjd 226219089Spjd/* 227219089Spjd * XXX -- fix zfs range locks to be generic so we can use them here. 228219089Spjd */ 229219089Spjdtypedef enum { 230219089Spjd RL_READER, 231219089Spjd RL_WRITER, 232219089Spjd RL_APPEND 233219089Spjd} rl_type_t; 234168404Spjd 235219089Spjdtypedef struct rll { 236219089Spjd void *rll_writer; 237219089Spjd int rll_readers; 238219089Spjd mutex_t rll_lock; 239219089Spjd cond_t rll_cv; 240219089Spjd} rll_t; 241219089Spjd 242219089Spjdtypedef struct rl { 243219089Spjd uint64_t rl_object; 244219089Spjd uint64_t rl_offset; 245219089Spjd uint64_t rl_size; 246219089Spjd rll_t *rl_lock; 247219089Spjd} rl_t; 248219089Spjd 249219089Spjd#define ZTEST_RANGE_LOCKS 64 250219089Spjd#define ZTEST_OBJECT_LOCKS 64 251219089Spjd 252168404Spjd/* 253219089Spjd * Object descriptor. Used as a template for object lookup/create/remove. 254219089Spjd */ 255219089Spjdtypedef struct ztest_od { 256219089Spjd uint64_t od_dir; 257219089Spjd uint64_t od_object; 258219089Spjd dmu_object_type_t od_type; 259219089Spjd dmu_object_type_t od_crtype; 260219089Spjd uint64_t od_blocksize; 261219089Spjd uint64_t od_crblocksize; 262219089Spjd uint64_t od_gen; 263219089Spjd uint64_t od_crgen; 264219089Spjd char od_name[MAXNAMELEN]; 265219089Spjd} ztest_od_t; 266219089Spjd 267219089Spjd/* 268219089Spjd * Per-dataset state. 269219089Spjd */ 270219089Spjdtypedef struct ztest_ds { 271236143Smm ztest_shared_ds_t *zd_shared; 272219089Spjd objset_t *zd_os; 273224526Smm rwlock_t zd_zilog_lock; 274219089Spjd zilog_t *zd_zilog; 275219089Spjd ztest_od_t *zd_od; /* debugging aid */ 276219089Spjd char zd_name[MAXNAMELEN]; 277219089Spjd mutex_t zd_dirobj_lock; 278219089Spjd rll_t zd_object_lock[ZTEST_OBJECT_LOCKS]; 279219089Spjd rll_t zd_range_lock[ZTEST_RANGE_LOCKS]; 280219089Spjd} ztest_ds_t; 281219089Spjd 282219089Spjd/* 283219089Spjd * Per-iteration state. 284219089Spjd */ 285219089Spjdtypedef void ztest_func_t(ztest_ds_t *zd, uint64_t id); 286219089Spjd 287219089Spjdtypedef struct ztest_info { 288219089Spjd ztest_func_t *zi_func; /* test function */ 289219089Spjd uint64_t zi_iters; /* iterations per execution */ 290219089Spjd uint64_t *zi_interval; /* execute every <interval> seconds */ 291219089Spjd} ztest_info_t; 292219089Spjd 293236143Smmtypedef struct ztest_shared_callstate { 294236143Smm uint64_t zc_count; /* per-pass count */ 295236143Smm uint64_t zc_time; /* per-pass time */ 296236143Smm uint64_t zc_next; /* next time to call this function */ 297236143Smm} ztest_shared_callstate_t; 298236143Smm 299236143Smmstatic ztest_shared_callstate_t *ztest_shared_callstate; 300236143Smm#define ZTEST_GET_SHARED_CALLSTATE(c) (&ztest_shared_callstate[c]) 301236143Smm 302219089Spjd/* 303168404Spjd * Note: these aren't static because we want dladdr() to work. 304168404Spjd */ 305168404Spjdztest_func_t ztest_dmu_read_write; 306168404Spjdztest_func_t ztest_dmu_write_parallel; 307168404Spjdztest_func_t ztest_dmu_object_alloc_free; 308219089Spjdztest_func_t ztest_dmu_commit_callbacks; 309168404Spjdztest_func_t ztest_zap; 310168404Spjdztest_func_t ztest_zap_parallel; 311219089Spjdztest_func_t ztest_zil_commit; 312224526Smmztest_func_t ztest_zil_remount; 313219089Spjdztest_func_t ztest_dmu_read_write_zcopy; 314168404Spjdztest_func_t ztest_dmu_objset_create_destroy; 315219089Spjdztest_func_t ztest_dmu_prealloc; 316219089Spjdztest_func_t ztest_fzap; 317168404Spjdztest_func_t ztest_dmu_snapshot_create_destroy; 318219089Spjdztest_func_t ztest_dsl_prop_get_set; 319219089Spjdztest_func_t ztest_spa_prop_get_set; 320168404Spjdztest_func_t ztest_spa_create_destroy; 321168404Spjdztest_func_t ztest_fault_inject; 322219089Spjdztest_func_t ztest_ddt_repair; 323219089Spjdztest_func_t ztest_dmu_snapshot_hold; 324185029Spjdztest_func_t ztest_spa_rename; 325219089Spjdztest_func_t ztest_scrub; 326219089Spjdztest_func_t ztest_dsl_dataset_promote_busy; 327168404Spjdztest_func_t ztest_vdev_attach_detach; 328168404Spjdztest_func_t ztest_vdev_LUN_growth; 329168404Spjdztest_func_t ztest_vdev_add_remove; 330185029Spjdztest_func_t ztest_vdev_aux_add_remove; 331219089Spjdztest_func_t ztest_split_pool; 332228103Smmztest_func_t ztest_reguid; 333168404Spjd 334219089Spjduint64_t zopt_always = 0ULL * NANOSEC; /* all the time */ 335219089Spjduint64_t zopt_incessant = 1ULL * NANOSEC / 10; /* every 1/10 second */ 336219089Spjduint64_t zopt_often = 1ULL * NANOSEC; /* every second */ 337219089Spjduint64_t zopt_sometimes = 10ULL * NANOSEC; /* every 10 seconds */ 338219089Spjduint64_t zopt_rarely = 60ULL * NANOSEC; /* every 60 seconds */ 339168404Spjd 340168404Spjdztest_info_t ztest_info[] = { 341185029Spjd { ztest_dmu_read_write, 1, &zopt_always }, 342219089Spjd { ztest_dmu_write_parallel, 10, &zopt_always }, 343185029Spjd { ztest_dmu_object_alloc_free, 1, &zopt_always }, 344219089Spjd { ztest_dmu_commit_callbacks, 1, &zopt_always }, 345185029Spjd { ztest_zap, 30, &zopt_always }, 346185029Spjd { ztest_zap_parallel, 100, &zopt_always }, 347219089Spjd { ztest_split_pool, 1, &zopt_always }, 348219089Spjd { ztest_zil_commit, 1, &zopt_incessant }, 349224526Smm { ztest_zil_remount, 1, &zopt_sometimes }, 350219089Spjd { ztest_dmu_read_write_zcopy, 1, &zopt_often }, 351219089Spjd { ztest_dmu_objset_create_destroy, 1, &zopt_often }, 352219089Spjd { ztest_dsl_prop_get_set, 1, &zopt_often }, 353219089Spjd { ztest_spa_prop_get_set, 1, &zopt_sometimes }, 354219089Spjd#if 0 355219089Spjd { ztest_dmu_prealloc, 1, &zopt_sometimes }, 356219089Spjd#endif 357219089Spjd { ztest_fzap, 1, &zopt_sometimes }, 358219089Spjd { ztest_dmu_snapshot_create_destroy, 1, &zopt_sometimes }, 359219089Spjd { ztest_spa_create_destroy, 1, &zopt_sometimes }, 360185029Spjd { ztest_fault_inject, 1, &zopt_sometimes }, 361219089Spjd { ztest_ddt_repair, 1, &zopt_sometimes }, 362219089Spjd { ztest_dmu_snapshot_hold, 1, &zopt_sometimes }, 363228103Smm { ztest_reguid, 1, &zopt_sometimes }, 364185029Spjd { ztest_spa_rename, 1, &zopt_rarely }, 365219089Spjd { ztest_scrub, 1, &zopt_rarely }, 366219089Spjd { ztest_dsl_dataset_promote_busy, 1, &zopt_rarely }, 367219089Spjd { ztest_vdev_attach_detach, 1, &zopt_rarely }, 368185029Spjd { ztest_vdev_LUN_growth, 1, &zopt_rarely }, 369236143Smm { ztest_vdev_add_remove, 1, 370236143Smm &ztest_opts.zo_vdevtime }, 371236143Smm { ztest_vdev_aux_add_remove, 1, 372236143Smm &ztest_opts.zo_vdevtime }, 373168404Spjd}; 374168404Spjd 375168404Spjd#define ZTEST_FUNCS (sizeof (ztest_info) / sizeof (ztest_info_t)) 376168404Spjd 377219089Spjd/* 378219089Spjd * The following struct is used to hold a list of uncalled commit callbacks. 379219089Spjd * The callbacks are ordered by txg number. 380219089Spjd */ 381219089Spjdtypedef struct ztest_cb_list { 382219089Spjd mutex_t zcl_callbacks_lock; 383219089Spjd list_t zcl_callbacks; 384219089Spjd} ztest_cb_list_t; 385168404Spjd 386168404Spjd/* 387168404Spjd * Stuff we need to share writably between parent and child. 388168404Spjd */ 389168404Spjdtypedef struct ztest_shared { 390236143Smm boolean_t zs_do_init; 391219089Spjd hrtime_t zs_proc_start; 392219089Spjd hrtime_t zs_proc_stop; 393219089Spjd hrtime_t zs_thread_start; 394219089Spjd hrtime_t zs_thread_stop; 395219089Spjd hrtime_t zs_thread_kill; 396219089Spjd uint64_t zs_enospc_count; 397219089Spjd uint64_t zs_vdev_next_leaf; 398185029Spjd uint64_t zs_vdev_aux; 399168404Spjd uint64_t zs_alloc; 400168404Spjd uint64_t zs_space; 401219089Spjd uint64_t zs_splits; 402219089Spjd uint64_t zs_mirrors; 403236143Smm uint64_t zs_metaslab_sz; 404236143Smm uint64_t zs_metaslab_df_alloc_threshold; 405236143Smm uint64_t zs_guid; 406168404Spjd} ztest_shared_t; 407168404Spjd 408219089Spjd#define ID_PARALLEL -1ULL 409219089Spjd 410168404Spjdstatic char ztest_dev_template[] = "%s/%s.%llua"; 411185029Spjdstatic char ztest_aux_template[] = "%s/%s.%s.%llu"; 412219089Spjdztest_shared_t *ztest_shared; 413168404Spjd 414236143Smmstatic spa_t *ztest_spa = NULL; 415236143Smmstatic ztest_ds_t *ztest_ds; 416168404Spjd 417236143Smmstatic mutex_t ztest_vdev_lock; 418236143Smmstatic rwlock_t ztest_name_lock; 419236143Smm 420236143Smmstatic boolean_t ztest_dump_core = B_TRUE; 421185029Spjdstatic boolean_t ztest_exiting; 422168404Spjd 423219089Spjd/* Global commit callback list */ 424219089Spjdstatic ztest_cb_list_t zcl; 425219089Spjd 426219089Spjdenum ztest_object { 427219089Spjd ZTEST_META_DNODE = 0, 428219089Spjd ZTEST_DIROBJ, 429219089Spjd ZTEST_OBJECTS 430219089Spjd}; 431168404Spjd 432168676Spjdstatic void usage(boolean_t) __NORETURN; 433168498Spjd 434168404Spjd/* 435168404Spjd * These libumem hooks provide a reasonable set of defaults for the allocator's 436168404Spjd * debugging facilities. 437168404Spjd */ 438168404Spjdconst char * 439168404Spjd_umem_debug_init() 440168404Spjd{ 441168404Spjd return ("default,verbose"); /* $UMEM_DEBUG setting */ 442168404Spjd} 443168404Spjd 444168404Spjdconst char * 445168404Spjd_umem_logging_init(void) 446168404Spjd{ 447168404Spjd return ("fail,contents"); /* $UMEM_LOGGING setting */ 448168404Spjd} 449168404Spjd 450168404Spjd#define FATAL_MSG_SZ 1024 451168404Spjd 452168404Spjdchar *fatal_msg; 453168404Spjd 454168404Spjdstatic void 455168404Spjdfatal(int do_perror, char *message, ...) 456168404Spjd{ 457168404Spjd va_list args; 458168404Spjd int save_errno = errno; 459168404Spjd char buf[FATAL_MSG_SZ]; 460168404Spjd 461168404Spjd (void) fflush(stdout); 462168404Spjd 463168404Spjd va_start(args, message); 464168404Spjd (void) sprintf(buf, "ztest: "); 465168404Spjd /* LINTED */ 466168404Spjd (void) vsprintf(buf + strlen(buf), message, args); 467168404Spjd va_end(args); 468168404Spjd if (do_perror) { 469168404Spjd (void) snprintf(buf + strlen(buf), FATAL_MSG_SZ - strlen(buf), 470168404Spjd ": %s", strerror(save_errno)); 471168404Spjd } 472168404Spjd (void) fprintf(stderr, "%s\n", buf); 473168404Spjd fatal_msg = buf; /* to ease debugging */ 474168404Spjd if (ztest_dump_core) 475168404Spjd abort(); 476168404Spjd exit(3); 477168404Spjd} 478168404Spjd 479168404Spjdstatic int 480168404Spjdstr2shift(const char *buf) 481168404Spjd{ 482168404Spjd const char *ends = "BKMGTPEZ"; 483168404Spjd int i; 484168404Spjd 485168404Spjd if (buf[0] == '\0') 486168404Spjd return (0); 487168404Spjd for (i = 0; i < strlen(ends); i++) { 488168404Spjd if (toupper(buf[0]) == ends[i]) 489168404Spjd break; 490168404Spjd } 491168498Spjd if (i == strlen(ends)) { 492168498Spjd (void) fprintf(stderr, "ztest: invalid bytes suffix: %s\n", 493168498Spjd buf); 494168498Spjd usage(B_FALSE); 495168498Spjd } 496168404Spjd if (buf[1] == '\0' || (toupper(buf[1]) == 'B' && buf[2] == '\0')) { 497168404Spjd return (10*i); 498168404Spjd } 499168498Spjd (void) fprintf(stderr, "ztest: invalid bytes suffix: %s\n", buf); 500168498Spjd usage(B_FALSE); 501168498Spjd /* NOTREACHED */ 502168404Spjd} 503168404Spjd 504168404Spjdstatic uint64_t 505168404Spjdnicenumtoull(const char *buf) 506168404Spjd{ 507168404Spjd char *end; 508168404Spjd uint64_t val; 509168404Spjd 510168404Spjd val = strtoull(buf, &end, 0); 511168404Spjd if (end == buf) { 512168498Spjd (void) fprintf(stderr, "ztest: bad numeric value: %s\n", buf); 513168498Spjd usage(B_FALSE); 514168404Spjd } else if (end[0] == '.') { 515168404Spjd double fval = strtod(buf, &end); 516168404Spjd fval *= pow(2, str2shift(end)); 517168498Spjd if (fval > UINT64_MAX) { 518168498Spjd (void) fprintf(stderr, "ztest: value too large: %s\n", 519168498Spjd buf); 520168498Spjd usage(B_FALSE); 521168498Spjd } 522168404Spjd val = (uint64_t)fval; 523168404Spjd } else { 524168404Spjd int shift = str2shift(end); 525168498Spjd if (shift >= 64 || (val << shift) >> shift != val) { 526168498Spjd (void) fprintf(stderr, "ztest: value too large: %s\n", 527168498Spjd buf); 528168498Spjd usage(B_FALSE); 529168498Spjd } 530168404Spjd val <<= shift; 531168404Spjd } 532168404Spjd return (val); 533168404Spjd} 534168404Spjd 535168404Spjdstatic void 536168498Spjdusage(boolean_t requested) 537168404Spjd{ 538236143Smm const ztest_shared_opts_t *zo = &ztest_opts_defaults; 539236143Smm 540168404Spjd char nice_vdev_size[10]; 541168404Spjd char nice_gang_bang[10]; 542168498Spjd FILE *fp = requested ? stdout : stderr; 543168404Spjd 544236143Smm nicenum(zo->zo_vdev_size, nice_vdev_size); 545236143Smm nicenum(zo->zo_metaslab_gang_bang, nice_gang_bang); 546168404Spjd 547168498Spjd (void) fprintf(fp, "Usage: %s\n" 548168404Spjd "\t[-v vdevs (default: %llu)]\n" 549168404Spjd "\t[-s size_of_each_vdev (default: %s)]\n" 550219089Spjd "\t[-a alignment_shift (default: %d)] use 0 for random\n" 551168404Spjd "\t[-m mirror_copies (default: %d)]\n" 552168404Spjd "\t[-r raidz_disks (default: %d)]\n" 553168404Spjd "\t[-R raidz_parity (default: %d)]\n" 554168404Spjd "\t[-d datasets (default: %d)]\n" 555168404Spjd "\t[-t threads (default: %d)]\n" 556168404Spjd "\t[-g gang_block_threshold (default: %s)]\n" 557219089Spjd "\t[-i init_count (default: %d)] initialize pool i times\n" 558219089Spjd "\t[-k kill_percentage (default: %llu%%)]\n" 559168404Spjd "\t[-p pool_name (default: %s)]\n" 560219089Spjd "\t[-f dir (default: %s)] file directory for vdev files\n" 561219089Spjd "\t[-V] verbose (use multiple times for ever more blather)\n" 562219089Spjd "\t[-E] use existing pool instead of creating new one\n" 563219089Spjd "\t[-T time (default: %llu sec)] total run time\n" 564219089Spjd "\t[-F freezeloops (default: %llu)] max loops in spa_freeze()\n" 565219089Spjd "\t[-P passtime (default: %llu sec)] time per pass\n" 566236143Smm "\t[-B alt_ztest (default: <none>)] alternate ztest path\n" 567168498Spjd "\t[-h] (print help)\n" 568168404Spjd "", 569236143Smm zo->zo_pool, 570236143Smm (u_longlong_t)zo->zo_vdevs, /* -v */ 571185029Spjd nice_vdev_size, /* -s */ 572236143Smm zo->zo_ashift, /* -a */ 573236143Smm zo->zo_mirrors, /* -m */ 574236143Smm zo->zo_raidz, /* -r */ 575236143Smm zo->zo_raidz_parity, /* -R */ 576236143Smm zo->zo_datasets, /* -d */ 577236143Smm zo->zo_threads, /* -t */ 578185029Spjd nice_gang_bang, /* -g */ 579236143Smm zo->zo_init, /* -i */ 580236143Smm (u_longlong_t)zo->zo_killrate, /* -k */ 581236143Smm zo->zo_pool, /* -p */ 582236143Smm zo->zo_dir, /* -f */ 583236143Smm (u_longlong_t)zo->zo_time, /* -T */ 584236143Smm (u_longlong_t)zo->zo_maxloops, /* -F */ 585236143Smm (u_longlong_t)zo->zo_passtime); 586168498Spjd exit(requested ? 0 : 1); 587168404Spjd} 588168404Spjd 589168404Spjdstatic void 590168404Spjdprocess_options(int argc, char **argv) 591168404Spjd{ 592236143Smm char *path; 593236143Smm ztest_shared_opts_t *zo = &ztest_opts; 594236143Smm 595168404Spjd int opt; 596168404Spjd uint64_t value; 597236143Smm char altdir[MAXNAMELEN] = { 0 }; 598168404Spjd 599236143Smm bcopy(&ztest_opts_defaults, zo, sizeof (*zo)); 600168404Spjd 601168404Spjd while ((opt = getopt(argc, argv, 602236143Smm "v:s:a:m:r:R:d:t:g:i:k:p:f:VET:P:hF:B:")) != EOF) { 603168404Spjd value = 0; 604168404Spjd switch (opt) { 605185029Spjd case 'v': 606185029Spjd case 's': 607185029Spjd case 'a': 608185029Spjd case 'm': 609185029Spjd case 'r': 610185029Spjd case 'R': 611185029Spjd case 'd': 612185029Spjd case 't': 613185029Spjd case 'g': 614185029Spjd case 'i': 615185029Spjd case 'k': 616185029Spjd case 'T': 617185029Spjd case 'P': 618219089Spjd case 'F': 619168404Spjd value = nicenumtoull(optarg); 620168404Spjd } 621168404Spjd switch (opt) { 622185029Spjd case 'v': 623236143Smm zo->zo_vdevs = value; 624168404Spjd break; 625185029Spjd case 's': 626236143Smm zo->zo_vdev_size = MAX(SPA_MINDEVSIZE, value); 627168404Spjd break; 628185029Spjd case 'a': 629236143Smm zo->zo_ashift = value; 630168404Spjd break; 631185029Spjd case 'm': 632236143Smm zo->zo_mirrors = value; 633168404Spjd break; 634185029Spjd case 'r': 635236143Smm zo->zo_raidz = MAX(1, value); 636168404Spjd break; 637185029Spjd case 'R': 638236143Smm zo->zo_raidz_parity = MIN(MAX(value, 1), 3); 639168404Spjd break; 640185029Spjd case 'd': 641236143Smm zo->zo_datasets = MAX(1, value); 642168404Spjd break; 643185029Spjd case 't': 644236143Smm zo->zo_threads = MAX(1, value); 645168404Spjd break; 646185029Spjd case 'g': 647236143Smm zo->zo_metaslab_gang_bang = MAX(SPA_MINBLOCKSIZE << 1, 648236143Smm value); 649168404Spjd break; 650185029Spjd case 'i': 651236143Smm zo->zo_init = value; 652168404Spjd break; 653185029Spjd case 'k': 654236143Smm zo->zo_killrate = value; 655168404Spjd break; 656185029Spjd case 'p': 657236143Smm (void) strlcpy(zo->zo_pool, optarg, 658236143Smm sizeof (zo->zo_pool)); 659168404Spjd break; 660185029Spjd case 'f': 661236143Smm path = realpath(optarg, NULL); 662236143Smm if (path == NULL) { 663236143Smm (void) fprintf(stderr, "error: %s: %s\n", 664236143Smm optarg, strerror(errno)); 665236143Smm usage(B_FALSE); 666236143Smm } else { 667236143Smm (void) strlcpy(zo->zo_dir, path, 668236143Smm sizeof (zo->zo_dir)); 669236143Smm } 670168404Spjd break; 671185029Spjd case 'V': 672236143Smm zo->zo_verbose++; 673168404Spjd break; 674185029Spjd case 'E': 675236143Smm zo->zo_init = 0; 676168404Spjd break; 677185029Spjd case 'T': 678236143Smm zo->zo_time = value; 679168404Spjd break; 680185029Spjd case 'P': 681236143Smm zo->zo_passtime = MAX(1, value); 682168404Spjd break; 683219089Spjd case 'F': 684236143Smm zo->zo_maxloops = MAX(1, value); 685219089Spjd break; 686236143Smm case 'B': 687236143Smm (void) strlcpy(altdir, optarg, sizeof (altdir)); 688236143Smm break; 689185029Spjd case 'h': 690168498Spjd usage(B_TRUE); 691168498Spjd break; 692185029Spjd case '?': 693185029Spjd default: 694168498Spjd usage(B_FALSE); 695168404Spjd break; 696168404Spjd } 697168404Spjd } 698168404Spjd 699236143Smm zo->zo_raidz_parity = MIN(zo->zo_raidz_parity, zo->zo_raidz - 1); 700168404Spjd 701236143Smm zo->zo_vdevtime = 702236143Smm (zo->zo_vdevs > 0 ? zo->zo_time * NANOSEC / zo->zo_vdevs : 703219089Spjd UINT64_MAX >> 2); 704236143Smm 705236143Smm if (strlen(altdir) > 0) { 706236143Smm char cmd[MAXNAMELEN]; 707236143Smm char realaltdir[MAXNAMELEN]; 708236143Smm char *bin; 709236143Smm char *ztest; 710236143Smm char *isa; 711236143Smm int isalen; 712236143Smm 713236143Smm (void) realpath(getexecname(), cmd); 714236143Smm if (0 != access(altdir, F_OK)) { 715236143Smm ztest_dump_core = B_FALSE; 716236143Smm fatal(B_TRUE, "invalid alternate ztest path: %s", 717236143Smm altdir); 718236143Smm } 719236143Smm VERIFY(NULL != realpath(altdir, realaltdir)); 720236143Smm 721236143Smm /* 722236143Smm * 'cmd' should be of the form "<anything>/usr/bin/<isa>/ztest". 723236143Smm * We want to extract <isa> to determine if we should use 724236143Smm * 32 or 64 bit binaries. 725236143Smm */ 726236143Smm bin = strstr(cmd, "/usr/bin/"); 727236143Smm ztest = strstr(bin, "/ztest"); 728236143Smm isa = bin + 9; 729236143Smm isalen = ztest - isa; 730236143Smm (void) snprintf(zo->zo_alt_ztest, sizeof (zo->zo_alt_ztest), 731236143Smm "%s/usr/bin/%.*s/ztest", realaltdir, isalen, isa); 732236143Smm (void) snprintf(zo->zo_alt_libpath, sizeof (zo->zo_alt_libpath), 733236143Smm "%s/usr/lib/%.*s", realaltdir, isalen, isa); 734236143Smm 735236143Smm if (0 != access(zo->zo_alt_ztest, X_OK)) { 736236143Smm ztest_dump_core = B_FALSE; 737236143Smm fatal(B_TRUE, "invalid alternate ztest: %s", 738236143Smm zo->zo_alt_ztest); 739236143Smm } else if (0 != access(zo->zo_alt_libpath, X_OK)) { 740236143Smm ztest_dump_core = B_FALSE; 741236143Smm fatal(B_TRUE, "invalid alternate lib directory %s", 742236143Smm zo->zo_alt_libpath); 743236143Smm } 744236143Smm } 745168404Spjd} 746168404Spjd 747219089Spjdstatic void 748219089Spjdztest_kill(ztest_shared_t *zs) 749219089Spjd{ 750236143Smm zs->zs_alloc = metaslab_class_get_alloc(spa_normal_class(ztest_spa)); 751236143Smm zs->zs_space = metaslab_class_get_space(spa_normal_class(ztest_spa)); 752219089Spjd (void) kill(getpid(), SIGKILL); 753219089Spjd} 754219089Spjd 755168404Spjdstatic uint64_t 756219089Spjdztest_random(uint64_t range) 757219089Spjd{ 758219089Spjd uint64_t r; 759219089Spjd 760219089Spjd if (range == 0) 761219089Spjd return (0); 762219089Spjd 763236143Smm if (read(ZTEST_FD_RAND, &r, sizeof (r)) != sizeof (r)) 764219089Spjd fatal(1, "short read from /dev/urandom"); 765219089Spjd 766219089Spjd return (r % range); 767219089Spjd} 768219089Spjd 769219089Spjd/* ARGSUSED */ 770219089Spjdstatic void 771219089Spjdztest_record_enospc(const char *s) 772219089Spjd{ 773219089Spjd ztest_shared->zs_enospc_count++; 774219089Spjd} 775219089Spjd 776219089Spjdstatic uint64_t 777168404Spjdztest_get_ashift(void) 778168404Spjd{ 779236143Smm if (ztest_opts.zo_ashift == 0) 780168404Spjd return (SPA_MINBLOCKSHIFT + ztest_random(3)); 781236143Smm return (ztest_opts.zo_ashift); 782168404Spjd} 783168404Spjd 784168404Spjdstatic nvlist_t * 785185029Spjdmake_vdev_file(char *path, char *aux, size_t size, uint64_t ashift) 786168404Spjd{ 787185029Spjd char pathbuf[MAXPATHLEN]; 788168404Spjd uint64_t vdev; 789168404Spjd nvlist_t *file; 790168404Spjd 791185029Spjd if (ashift == 0) 792185029Spjd ashift = ztest_get_ashift(); 793168404Spjd 794185029Spjd if (path == NULL) { 795185029Spjd path = pathbuf; 796185029Spjd 797185029Spjd if (aux != NULL) { 798185029Spjd vdev = ztest_shared->zs_vdev_aux; 799236143Smm (void) snprintf(path, sizeof (pathbuf), 800236143Smm ztest_aux_template, ztest_opts.zo_dir, 801236143Smm ztest_opts.zo_pool, aux, vdev); 802185029Spjd } else { 803219089Spjd vdev = ztest_shared->zs_vdev_next_leaf++; 804236143Smm (void) snprintf(path, sizeof (pathbuf), 805236143Smm ztest_dev_template, ztest_opts.zo_dir, 806236143Smm ztest_opts.zo_pool, vdev); 807185029Spjd } 808185029Spjd } 809185029Spjd 810185029Spjd if (size != 0) { 811185029Spjd int fd = open(path, O_RDWR | O_CREAT | O_TRUNC, 0666); 812168404Spjd if (fd == -1) 813185029Spjd fatal(1, "can't open %s", path); 814168404Spjd if (ftruncate(fd, size) != 0) 815185029Spjd fatal(1, "can't ftruncate %s", path); 816168404Spjd (void) close(fd); 817168404Spjd } 818168404Spjd 819168404Spjd VERIFY(nvlist_alloc(&file, NV_UNIQUE_NAME, 0) == 0); 820168404Spjd VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_TYPE, VDEV_TYPE_FILE) == 0); 821185029Spjd VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_PATH, path) == 0); 822168404Spjd VERIFY(nvlist_add_uint64(file, ZPOOL_CONFIG_ASHIFT, ashift) == 0); 823168404Spjd 824168404Spjd return (file); 825168404Spjd} 826168404Spjd 827168404Spjdstatic nvlist_t * 828185029Spjdmake_vdev_raidz(char *path, char *aux, size_t size, uint64_t ashift, int r) 829168404Spjd{ 830168404Spjd nvlist_t *raidz, **child; 831168404Spjd int c; 832168404Spjd 833168404Spjd if (r < 2) 834185029Spjd return (make_vdev_file(path, aux, size, ashift)); 835168404Spjd child = umem_alloc(r * sizeof (nvlist_t *), UMEM_NOFAIL); 836168404Spjd 837168404Spjd for (c = 0; c < r; c++) 838185029Spjd child[c] = make_vdev_file(path, aux, size, ashift); 839168404Spjd 840168404Spjd VERIFY(nvlist_alloc(&raidz, NV_UNIQUE_NAME, 0) == 0); 841168404Spjd VERIFY(nvlist_add_string(raidz, ZPOOL_CONFIG_TYPE, 842168404Spjd VDEV_TYPE_RAIDZ) == 0); 843168404Spjd VERIFY(nvlist_add_uint64(raidz, ZPOOL_CONFIG_NPARITY, 844236143Smm ztest_opts.zo_raidz_parity) == 0); 845168404Spjd VERIFY(nvlist_add_nvlist_array(raidz, ZPOOL_CONFIG_CHILDREN, 846168404Spjd child, r) == 0); 847168404Spjd 848168404Spjd for (c = 0; c < r; c++) 849168404Spjd nvlist_free(child[c]); 850168404Spjd 851168404Spjd umem_free(child, r * sizeof (nvlist_t *)); 852168404Spjd 853168404Spjd return (raidz); 854168404Spjd} 855168404Spjd 856168404Spjdstatic nvlist_t * 857185029Spjdmake_vdev_mirror(char *path, char *aux, size_t size, uint64_t ashift, 858185029Spjd int r, int m) 859168404Spjd{ 860168404Spjd nvlist_t *mirror, **child; 861168404Spjd int c; 862168404Spjd 863168404Spjd if (m < 1) 864185029Spjd return (make_vdev_raidz(path, aux, size, ashift, r)); 865168404Spjd 866168404Spjd child = umem_alloc(m * sizeof (nvlist_t *), UMEM_NOFAIL); 867168404Spjd 868168404Spjd for (c = 0; c < m; c++) 869185029Spjd child[c] = make_vdev_raidz(path, aux, size, ashift, r); 870168404Spjd 871168404Spjd VERIFY(nvlist_alloc(&mirror, NV_UNIQUE_NAME, 0) == 0); 872168404Spjd VERIFY(nvlist_add_string(mirror, ZPOOL_CONFIG_TYPE, 873168404Spjd VDEV_TYPE_MIRROR) == 0); 874168404Spjd VERIFY(nvlist_add_nvlist_array(mirror, ZPOOL_CONFIG_CHILDREN, 875168404Spjd child, m) == 0); 876168404Spjd 877168404Spjd for (c = 0; c < m; c++) 878168404Spjd nvlist_free(child[c]); 879168404Spjd 880168404Spjd umem_free(child, m * sizeof (nvlist_t *)); 881168404Spjd 882168404Spjd return (mirror); 883168404Spjd} 884168404Spjd 885168404Spjdstatic nvlist_t * 886185029Spjdmake_vdev_root(char *path, char *aux, size_t size, uint64_t ashift, 887185029Spjd int log, int r, int m, int t) 888168404Spjd{ 889168404Spjd nvlist_t *root, **child; 890168404Spjd int c; 891168404Spjd 892168404Spjd ASSERT(t > 0); 893168404Spjd 894168404Spjd child = umem_alloc(t * sizeof (nvlist_t *), UMEM_NOFAIL); 895168404Spjd 896185029Spjd for (c = 0; c < t; c++) { 897185029Spjd child[c] = make_vdev_mirror(path, aux, size, ashift, r, m); 898185029Spjd VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_IS_LOG, 899185029Spjd log) == 0); 900185029Spjd } 901168404Spjd 902168404Spjd VERIFY(nvlist_alloc(&root, NV_UNIQUE_NAME, 0) == 0); 903168404Spjd VERIFY(nvlist_add_string(root, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) == 0); 904185029Spjd VERIFY(nvlist_add_nvlist_array(root, aux ? aux : ZPOOL_CONFIG_CHILDREN, 905168404Spjd child, t) == 0); 906168404Spjd 907168404Spjd for (c = 0; c < t; c++) 908168404Spjd nvlist_free(child[c]); 909168404Spjd 910168404Spjd umem_free(child, t * sizeof (nvlist_t *)); 911168404Spjd 912168404Spjd return (root); 913168404Spjd} 914168404Spjd 915219089Spjdstatic int 916219089Spjdztest_random_blocksize(void) 917219089Spjd{ 918219089Spjd return (1 << (SPA_MINBLOCKSHIFT + 919219089Spjd ztest_random(SPA_MAXBLOCKSHIFT - SPA_MINBLOCKSHIFT + 1))); 920219089Spjd} 921219089Spjd 922219089Spjdstatic int 923219089Spjdztest_random_ibshift(void) 924219089Spjd{ 925219089Spjd return (DN_MIN_INDBLKSHIFT + 926219089Spjd ztest_random(DN_MAX_INDBLKSHIFT - DN_MIN_INDBLKSHIFT + 1)); 927219089Spjd} 928219089Spjd 929219089Spjdstatic uint64_t 930219089Spjdztest_random_vdev_top(spa_t *spa, boolean_t log_ok) 931219089Spjd{ 932219089Spjd uint64_t top; 933219089Spjd vdev_t *rvd = spa->spa_root_vdev; 934219089Spjd vdev_t *tvd; 935219089Spjd 936219089Spjd ASSERT(spa_config_held(spa, SCL_ALL, RW_READER) != 0); 937219089Spjd 938219089Spjd do { 939219089Spjd top = ztest_random(rvd->vdev_children); 940219089Spjd tvd = rvd->vdev_child[top]; 941219089Spjd } while (tvd->vdev_ishole || (tvd->vdev_islog && !log_ok) || 942219089Spjd tvd->vdev_mg == NULL || tvd->vdev_mg->mg_class == NULL); 943219089Spjd 944219089Spjd return (top); 945219089Spjd} 946219089Spjd 947219089Spjdstatic uint64_t 948219089Spjdztest_random_dsl_prop(zfs_prop_t prop) 949219089Spjd{ 950219089Spjd uint64_t value; 951219089Spjd 952219089Spjd do { 953219089Spjd value = zfs_prop_random_value(prop, ztest_random(-1ULL)); 954219089Spjd } while (prop == ZFS_PROP_CHECKSUM && value == ZIO_CHECKSUM_OFF); 955219089Spjd 956219089Spjd return (value); 957219089Spjd} 958219089Spjd 959219089Spjdstatic int 960219089Spjdztest_dsl_prop_set_uint64(char *osname, zfs_prop_t prop, uint64_t value, 961219089Spjd boolean_t inherit) 962219089Spjd{ 963219089Spjd const char *propname = zfs_prop_to_name(prop); 964219089Spjd const char *valname; 965219089Spjd char setpoint[MAXPATHLEN]; 966219089Spjd uint64_t curval; 967219089Spjd int error; 968219089Spjd 969219089Spjd error = dsl_prop_set(osname, propname, 970219089Spjd (inherit ? ZPROP_SRC_NONE : ZPROP_SRC_LOCAL), 971219089Spjd sizeof (value), 1, &value); 972219089Spjd 973219089Spjd if (error == ENOSPC) { 974219089Spjd ztest_record_enospc(FTAG); 975219089Spjd return (error); 976219089Spjd } 977219089Spjd ASSERT3U(error, ==, 0); 978219089Spjd 979219089Spjd VERIFY3U(dsl_prop_get(osname, propname, sizeof (curval), 980219089Spjd 1, &curval, setpoint), ==, 0); 981219089Spjd 982236143Smm if (ztest_opts.zo_verbose >= 6) { 983219089Spjd VERIFY(zfs_prop_index_to_string(prop, curval, &valname) == 0); 984219089Spjd (void) printf("%s %s = %s at '%s'\n", 985219089Spjd osname, propname, valname, setpoint); 986219089Spjd } 987219089Spjd 988219089Spjd return (error); 989219089Spjd} 990219089Spjd 991219089Spjdstatic int 992236143Smmztest_spa_prop_set_uint64(zpool_prop_t prop, uint64_t value) 993219089Spjd{ 994236143Smm spa_t *spa = ztest_spa; 995219089Spjd nvlist_t *props = NULL; 996219089Spjd int error; 997219089Spjd 998219089Spjd VERIFY(nvlist_alloc(&props, NV_UNIQUE_NAME, 0) == 0); 999219089Spjd VERIFY(nvlist_add_uint64(props, zpool_prop_to_name(prop), value) == 0); 1000219089Spjd 1001219089Spjd error = spa_prop_set(spa, props); 1002219089Spjd 1003219089Spjd nvlist_free(props); 1004219089Spjd 1005219089Spjd if (error == ENOSPC) { 1006219089Spjd ztest_record_enospc(FTAG); 1007219089Spjd return (error); 1008219089Spjd } 1009219089Spjd ASSERT3U(error, ==, 0); 1010219089Spjd 1011219089Spjd return (error); 1012219089Spjd} 1013219089Spjd 1014168404Spjdstatic void 1015219089Spjdztest_rll_init(rll_t *rll) 1016168404Spjd{ 1017219089Spjd rll->rll_writer = NULL; 1018219089Spjd rll->rll_readers = 0; 1019219089Spjd VERIFY(_mutex_init(&rll->rll_lock, USYNC_THREAD, NULL) == 0); 1020219089Spjd VERIFY(cond_init(&rll->rll_cv, USYNC_THREAD, NULL) == 0); 1021219089Spjd} 1022219089Spjd 1023219089Spjdstatic void 1024219089Spjdztest_rll_destroy(rll_t *rll) 1025219089Spjd{ 1026219089Spjd ASSERT(rll->rll_writer == NULL); 1027219089Spjd ASSERT(rll->rll_readers == 0); 1028219089Spjd VERIFY(_mutex_destroy(&rll->rll_lock) == 0); 1029219089Spjd VERIFY(cond_destroy(&rll->rll_cv) == 0); 1030219089Spjd} 1031219089Spjd 1032219089Spjdstatic void 1033219089Spjdztest_rll_lock(rll_t *rll, rl_type_t type) 1034219089Spjd{ 1035219089Spjd VERIFY(mutex_lock(&rll->rll_lock) == 0); 1036219089Spjd 1037219089Spjd if (type == RL_READER) { 1038219089Spjd while (rll->rll_writer != NULL) 1039219089Spjd (void) cond_wait(&rll->rll_cv, &rll->rll_lock); 1040219089Spjd rll->rll_readers++; 1041219089Spjd } else { 1042219089Spjd while (rll->rll_writer != NULL || rll->rll_readers) 1043219089Spjd (void) cond_wait(&rll->rll_cv, &rll->rll_lock); 1044219089Spjd rll->rll_writer = curthread; 1045219089Spjd } 1046219089Spjd 1047219089Spjd VERIFY(mutex_unlock(&rll->rll_lock) == 0); 1048219089Spjd} 1049219089Spjd 1050219089Spjdstatic void 1051219089Spjdztest_rll_unlock(rll_t *rll) 1052219089Spjd{ 1053219089Spjd VERIFY(mutex_lock(&rll->rll_lock) == 0); 1054219089Spjd 1055219089Spjd if (rll->rll_writer) { 1056219089Spjd ASSERT(rll->rll_readers == 0); 1057219089Spjd rll->rll_writer = NULL; 1058219089Spjd } else { 1059219089Spjd ASSERT(rll->rll_readers != 0); 1060219089Spjd ASSERT(rll->rll_writer == NULL); 1061219089Spjd rll->rll_readers--; 1062219089Spjd } 1063219089Spjd 1064219089Spjd if (rll->rll_writer == NULL && rll->rll_readers == 0) 1065219089Spjd VERIFY(cond_broadcast(&rll->rll_cv) == 0); 1066219089Spjd 1067219089Spjd VERIFY(mutex_unlock(&rll->rll_lock) == 0); 1068219089Spjd} 1069219089Spjd 1070219089Spjdstatic void 1071219089Spjdztest_object_lock(ztest_ds_t *zd, uint64_t object, rl_type_t type) 1072219089Spjd{ 1073219089Spjd rll_t *rll = &zd->zd_object_lock[object & (ZTEST_OBJECT_LOCKS - 1)]; 1074219089Spjd 1075219089Spjd ztest_rll_lock(rll, type); 1076219089Spjd} 1077219089Spjd 1078219089Spjdstatic void 1079219089Spjdztest_object_unlock(ztest_ds_t *zd, uint64_t object) 1080219089Spjd{ 1081219089Spjd rll_t *rll = &zd->zd_object_lock[object & (ZTEST_OBJECT_LOCKS - 1)]; 1082219089Spjd 1083219089Spjd ztest_rll_unlock(rll); 1084219089Spjd} 1085219089Spjd 1086219089Spjdstatic rl_t * 1087219089Spjdztest_range_lock(ztest_ds_t *zd, uint64_t object, uint64_t offset, 1088219089Spjd uint64_t size, rl_type_t type) 1089219089Spjd{ 1090219089Spjd uint64_t hash = object ^ (offset % (ZTEST_RANGE_LOCKS + 1)); 1091219089Spjd rll_t *rll = &zd->zd_range_lock[hash & (ZTEST_RANGE_LOCKS - 1)]; 1092219089Spjd rl_t *rl; 1093219089Spjd 1094219089Spjd rl = umem_alloc(sizeof (*rl), UMEM_NOFAIL); 1095219089Spjd rl->rl_object = object; 1096219089Spjd rl->rl_offset = offset; 1097219089Spjd rl->rl_size = size; 1098219089Spjd rl->rl_lock = rll; 1099219089Spjd 1100219089Spjd ztest_rll_lock(rll, type); 1101219089Spjd 1102219089Spjd return (rl); 1103219089Spjd} 1104219089Spjd 1105219089Spjdstatic void 1106219089Spjdztest_range_unlock(rl_t *rl) 1107219089Spjd{ 1108219089Spjd rll_t *rll = rl->rl_lock; 1109219089Spjd 1110219089Spjd ztest_rll_unlock(rll); 1111219089Spjd 1112219089Spjd umem_free(rl, sizeof (*rl)); 1113219089Spjd} 1114219089Spjd 1115219089Spjdstatic void 1116236143Smmztest_zd_init(ztest_ds_t *zd, ztest_shared_ds_t *szd, objset_t *os) 1117219089Spjd{ 1118219089Spjd zd->zd_os = os; 1119219089Spjd zd->zd_zilog = dmu_objset_zil(os); 1120236143Smm zd->zd_shared = szd; 1121219089Spjd dmu_objset_name(os, zd->zd_name); 1122219089Spjd 1123236143Smm if (zd->zd_shared != NULL) 1124236143Smm zd->zd_shared->zd_seq = 0; 1125236143Smm 1126224526Smm VERIFY(rwlock_init(&zd->zd_zilog_lock, USYNC_THREAD, NULL) == 0); 1127219089Spjd VERIFY(_mutex_init(&zd->zd_dirobj_lock, USYNC_THREAD, NULL) == 0); 1128219089Spjd 1129219089Spjd for (int l = 0; l < ZTEST_OBJECT_LOCKS; l++) 1130219089Spjd ztest_rll_init(&zd->zd_object_lock[l]); 1131219089Spjd 1132219089Spjd for (int l = 0; l < ZTEST_RANGE_LOCKS; l++) 1133219089Spjd ztest_rll_init(&zd->zd_range_lock[l]); 1134219089Spjd} 1135219089Spjd 1136219089Spjdstatic void 1137219089Spjdztest_zd_fini(ztest_ds_t *zd) 1138219089Spjd{ 1139219089Spjd VERIFY(_mutex_destroy(&zd->zd_dirobj_lock) == 0); 1140219089Spjd 1141219089Spjd for (int l = 0; l < ZTEST_OBJECT_LOCKS; l++) 1142219089Spjd ztest_rll_destroy(&zd->zd_object_lock[l]); 1143219089Spjd 1144219089Spjd for (int l = 0; l < ZTEST_RANGE_LOCKS; l++) 1145219089Spjd ztest_rll_destroy(&zd->zd_range_lock[l]); 1146219089Spjd} 1147219089Spjd 1148219089Spjd#define TXG_MIGHTWAIT (ztest_random(10) == 0 ? TXG_NOWAIT : TXG_WAIT) 1149219089Spjd 1150219089Spjdstatic uint64_t 1151219089Spjdztest_tx_assign(dmu_tx_t *tx, uint64_t txg_how, const char *tag) 1152219089Spjd{ 1153219089Spjd uint64_t txg; 1154168404Spjd int error; 1155168404Spjd 1156219089Spjd /* 1157219089Spjd * Attempt to assign tx to some transaction group. 1158219089Spjd */ 1159219089Spjd error = dmu_tx_assign(tx, txg_how); 1160168404Spjd if (error) { 1161219089Spjd if (error == ERESTART) { 1162219089Spjd ASSERT(txg_how == TXG_NOWAIT); 1163219089Spjd dmu_tx_wait(tx); 1164219089Spjd } else { 1165219089Spjd ASSERT3U(error, ==, ENOSPC); 1166219089Spjd ztest_record_enospc(tag); 1167219089Spjd } 1168219089Spjd dmu_tx_abort(tx); 1169219089Spjd return (0); 1170168404Spjd } 1171219089Spjd txg = dmu_tx_get_txg(tx); 1172219089Spjd ASSERT(txg != 0); 1173219089Spjd return (txg); 1174168404Spjd} 1175168404Spjd 1176219089Spjdstatic void 1177219089Spjdztest_pattern_set(void *buf, uint64_t size, uint64_t value) 1178168404Spjd{ 1179219089Spjd uint64_t *ip = buf; 1180219089Spjd uint64_t *ip_end = (uint64_t *)((uintptr_t)buf + (uintptr_t)size); 1181168404Spjd 1182219089Spjd while (ip < ip_end) 1183219089Spjd *ip++ = value; 1184219089Spjd} 1185168404Spjd 1186219089Spjdstatic boolean_t 1187219089Spjdztest_pattern_match(void *buf, uint64_t size, uint64_t value) 1188219089Spjd{ 1189219089Spjd uint64_t *ip = buf; 1190219089Spjd uint64_t *ip_end = (uint64_t *)((uintptr_t)buf + (uintptr_t)size); 1191219089Spjd uint64_t diff = 0; 1192168404Spjd 1193219089Spjd while (ip < ip_end) 1194219089Spjd diff |= (value - *ip++); 1195219089Spjd 1196219089Spjd return (diff == 0); 1197168404Spjd} 1198168404Spjd 1199219089Spjdstatic void 1200219089Spjdztest_bt_generate(ztest_block_tag_t *bt, objset_t *os, uint64_t object, 1201219089Spjd uint64_t offset, uint64_t gen, uint64_t txg, uint64_t crtxg) 1202168404Spjd{ 1203219089Spjd bt->bt_magic = BT_MAGIC; 1204219089Spjd bt->bt_objset = dmu_objset_id(os); 1205219089Spjd bt->bt_object = object; 1206219089Spjd bt->bt_offset = offset; 1207219089Spjd bt->bt_gen = gen; 1208219089Spjd bt->bt_txg = txg; 1209219089Spjd bt->bt_crtxg = crtxg; 1210168404Spjd} 1211168404Spjd 1212219089Spjdstatic void 1213219089Spjdztest_bt_verify(ztest_block_tag_t *bt, objset_t *os, uint64_t object, 1214219089Spjd uint64_t offset, uint64_t gen, uint64_t txg, uint64_t crtxg) 1215219089Spjd{ 1216219089Spjd ASSERT(bt->bt_magic == BT_MAGIC); 1217219089Spjd ASSERT(bt->bt_objset == dmu_objset_id(os)); 1218219089Spjd ASSERT(bt->bt_object == object); 1219219089Spjd ASSERT(bt->bt_offset == offset); 1220219089Spjd ASSERT(bt->bt_gen <= gen); 1221219089Spjd ASSERT(bt->bt_txg <= txg); 1222219089Spjd ASSERT(bt->bt_crtxg == crtxg); 1223219089Spjd} 1224219089Spjd 1225219089Spjdstatic ztest_block_tag_t * 1226219089Spjdztest_bt_bonus(dmu_buf_t *db) 1227219089Spjd{ 1228219089Spjd dmu_object_info_t doi; 1229219089Spjd ztest_block_tag_t *bt; 1230219089Spjd 1231219089Spjd dmu_object_info_from_db(db, &doi); 1232219089Spjd ASSERT3U(doi.doi_bonus_size, <=, db->db_size); 1233219089Spjd ASSERT3U(doi.doi_bonus_size, >=, sizeof (*bt)); 1234219089Spjd bt = (void *)((char *)db->db_data + doi.doi_bonus_size - sizeof (*bt)); 1235219089Spjd 1236219089Spjd return (bt); 1237219089Spjd} 1238219089Spjd 1239219089Spjd/* 1240219089Spjd * ZIL logging ops 1241219089Spjd */ 1242219089Spjd 1243219089Spjd#define lrz_type lr_mode 1244219089Spjd#define lrz_blocksize lr_uid 1245219089Spjd#define lrz_ibshift lr_gid 1246219089Spjd#define lrz_bonustype lr_rdev 1247219089Spjd#define lrz_bonuslen lr_crtime[1] 1248219089Spjd 1249219089Spjdstatic void 1250219089Spjdztest_log_create(ztest_ds_t *zd, dmu_tx_t *tx, lr_create_t *lr) 1251219089Spjd{ 1252219089Spjd char *name = (void *)(lr + 1); /* name follows lr */ 1253219089Spjd size_t namesize = strlen(name) + 1; 1254219089Spjd itx_t *itx; 1255219089Spjd 1256219089Spjd if (zil_replaying(zd->zd_zilog, tx)) 1257219089Spjd return; 1258219089Spjd 1259219089Spjd itx = zil_itx_create(TX_CREATE, sizeof (*lr) + namesize); 1260219089Spjd bcopy(&lr->lr_common + 1, &itx->itx_lr + 1, 1261219089Spjd sizeof (*lr) + namesize - sizeof (lr_t)); 1262219089Spjd 1263219089Spjd zil_itx_assign(zd->zd_zilog, itx, tx); 1264219089Spjd} 1265219089Spjd 1266219089Spjdstatic void 1267219089Spjdztest_log_remove(ztest_ds_t *zd, dmu_tx_t *tx, lr_remove_t *lr, uint64_t object) 1268219089Spjd{ 1269219089Spjd char *name = (void *)(lr + 1); /* name follows lr */ 1270219089Spjd size_t namesize = strlen(name) + 1; 1271219089Spjd itx_t *itx; 1272219089Spjd 1273219089Spjd if (zil_replaying(zd->zd_zilog, tx)) 1274219089Spjd return; 1275219089Spjd 1276219089Spjd itx = zil_itx_create(TX_REMOVE, sizeof (*lr) + namesize); 1277219089Spjd bcopy(&lr->lr_common + 1, &itx->itx_lr + 1, 1278219089Spjd sizeof (*lr) + namesize - sizeof (lr_t)); 1279219089Spjd 1280219089Spjd itx->itx_oid = object; 1281219089Spjd zil_itx_assign(zd->zd_zilog, itx, tx); 1282219089Spjd} 1283219089Spjd 1284219089Spjdstatic void 1285219089Spjdztest_log_write(ztest_ds_t *zd, dmu_tx_t *tx, lr_write_t *lr) 1286219089Spjd{ 1287219089Spjd itx_t *itx; 1288219089Spjd itx_wr_state_t write_state = ztest_random(WR_NUM_STATES); 1289219089Spjd 1290219089Spjd if (zil_replaying(zd->zd_zilog, tx)) 1291219089Spjd return; 1292219089Spjd 1293219089Spjd if (lr->lr_length > ZIL_MAX_LOG_DATA) 1294219089Spjd write_state = WR_INDIRECT; 1295219089Spjd 1296219089Spjd itx = zil_itx_create(TX_WRITE, 1297219089Spjd sizeof (*lr) + (write_state == WR_COPIED ? lr->lr_length : 0)); 1298219089Spjd 1299219089Spjd if (write_state == WR_COPIED && 1300219089Spjd dmu_read(zd->zd_os, lr->lr_foid, lr->lr_offset, lr->lr_length, 1301219089Spjd ((lr_write_t *)&itx->itx_lr) + 1, DMU_READ_NO_PREFETCH) != 0) { 1302219089Spjd zil_itx_destroy(itx); 1303219089Spjd itx = zil_itx_create(TX_WRITE, sizeof (*lr)); 1304219089Spjd write_state = WR_NEED_COPY; 1305219089Spjd } 1306219089Spjd itx->itx_private = zd; 1307219089Spjd itx->itx_wr_state = write_state; 1308219089Spjd itx->itx_sync = (ztest_random(8) == 0); 1309219089Spjd itx->itx_sod += (write_state == WR_NEED_COPY ? lr->lr_length : 0); 1310219089Spjd 1311219089Spjd bcopy(&lr->lr_common + 1, &itx->itx_lr + 1, 1312219089Spjd sizeof (*lr) - sizeof (lr_t)); 1313219089Spjd 1314219089Spjd zil_itx_assign(zd->zd_zilog, itx, tx); 1315219089Spjd} 1316219089Spjd 1317219089Spjdstatic void 1318219089Spjdztest_log_truncate(ztest_ds_t *zd, dmu_tx_t *tx, lr_truncate_t *lr) 1319219089Spjd{ 1320219089Spjd itx_t *itx; 1321219089Spjd 1322219089Spjd if (zil_replaying(zd->zd_zilog, tx)) 1323219089Spjd return; 1324219089Spjd 1325219089Spjd itx = zil_itx_create(TX_TRUNCATE, sizeof (*lr)); 1326219089Spjd bcopy(&lr->lr_common + 1, &itx->itx_lr + 1, 1327219089Spjd sizeof (*lr) - sizeof (lr_t)); 1328219089Spjd 1329219089Spjd itx->itx_sync = B_FALSE; 1330219089Spjd zil_itx_assign(zd->zd_zilog, itx, tx); 1331219089Spjd} 1332219089Spjd 1333219089Spjdstatic void 1334219089Spjdztest_log_setattr(ztest_ds_t *zd, dmu_tx_t *tx, lr_setattr_t *lr) 1335219089Spjd{ 1336219089Spjd itx_t *itx; 1337219089Spjd 1338219089Spjd if (zil_replaying(zd->zd_zilog, tx)) 1339219089Spjd return; 1340219089Spjd 1341219089Spjd itx = zil_itx_create(TX_SETATTR, sizeof (*lr)); 1342219089Spjd bcopy(&lr->lr_common + 1, &itx->itx_lr + 1, 1343219089Spjd sizeof (*lr) - sizeof (lr_t)); 1344219089Spjd 1345219089Spjd itx->itx_sync = B_FALSE; 1346219089Spjd zil_itx_assign(zd->zd_zilog, itx, tx); 1347219089Spjd} 1348219089Spjd 1349219089Spjd/* 1350219089Spjd * ZIL replay ops 1351219089Spjd */ 1352168404Spjdstatic int 1353219089Spjdztest_replay_create(ztest_ds_t *zd, lr_create_t *lr, boolean_t byteswap) 1354168404Spjd{ 1355219089Spjd char *name = (void *)(lr + 1); /* name follows lr */ 1356219089Spjd objset_t *os = zd->zd_os; 1357219089Spjd ztest_block_tag_t *bbt; 1358219089Spjd dmu_buf_t *db; 1359168404Spjd dmu_tx_t *tx; 1360219089Spjd uint64_t txg; 1361219089Spjd int error = 0; 1362168404Spjd 1363168404Spjd if (byteswap) 1364168404Spjd byteswap_uint64_array(lr, sizeof (*lr)); 1365168404Spjd 1366219089Spjd ASSERT(lr->lr_doid == ZTEST_DIROBJ); 1367219089Spjd ASSERT(name[0] != '\0'); 1368219089Spjd 1369168404Spjd tx = dmu_tx_create(os); 1370219089Spjd 1371219089Spjd dmu_tx_hold_zap(tx, lr->lr_doid, B_TRUE, name); 1372219089Spjd 1373219089Spjd if (lr->lrz_type == DMU_OT_ZAP_OTHER) { 1374219089Spjd dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL); 1375219089Spjd } else { 1376219089Spjd dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); 1377219089Spjd } 1378219089Spjd 1379219089Spjd txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); 1380219089Spjd if (txg == 0) 1381219089Spjd return (ENOSPC); 1382219089Spjd 1383219089Spjd ASSERT(dmu_objset_zil(os)->zl_replay == !!lr->lr_foid); 1384219089Spjd 1385219089Spjd if (lr->lrz_type == DMU_OT_ZAP_OTHER) { 1386219089Spjd if (lr->lr_foid == 0) { 1387219089Spjd lr->lr_foid = zap_create(os, 1388219089Spjd lr->lrz_type, lr->lrz_bonustype, 1389219089Spjd lr->lrz_bonuslen, tx); 1390219089Spjd } else { 1391219089Spjd error = zap_create_claim(os, lr->lr_foid, 1392219089Spjd lr->lrz_type, lr->lrz_bonustype, 1393219089Spjd lr->lrz_bonuslen, tx); 1394219089Spjd } 1395219089Spjd } else { 1396219089Spjd if (lr->lr_foid == 0) { 1397219089Spjd lr->lr_foid = dmu_object_alloc(os, 1398219089Spjd lr->lrz_type, 0, lr->lrz_bonustype, 1399219089Spjd lr->lrz_bonuslen, tx); 1400219089Spjd } else { 1401219089Spjd error = dmu_object_claim(os, lr->lr_foid, 1402219089Spjd lr->lrz_type, 0, lr->lrz_bonustype, 1403219089Spjd lr->lrz_bonuslen, tx); 1404219089Spjd } 1405219089Spjd } 1406219089Spjd 1407168404Spjd if (error) { 1408219089Spjd ASSERT3U(error, ==, EEXIST); 1409219089Spjd ASSERT(zd->zd_zilog->zl_replay); 1410219089Spjd dmu_tx_commit(tx); 1411168404Spjd return (error); 1412168404Spjd } 1413168404Spjd 1414219089Spjd ASSERT(lr->lr_foid != 0); 1415219089Spjd 1416219089Spjd if (lr->lrz_type != DMU_OT_ZAP_OTHER) 1417219089Spjd VERIFY3U(0, ==, dmu_object_set_blocksize(os, lr->lr_foid, 1418219089Spjd lr->lrz_blocksize, lr->lrz_ibshift, tx)); 1419219089Spjd 1420219089Spjd VERIFY3U(0, ==, dmu_bonus_hold(os, lr->lr_foid, FTAG, &db)); 1421219089Spjd bbt = ztest_bt_bonus(db); 1422219089Spjd dmu_buf_will_dirty(db, tx); 1423219089Spjd ztest_bt_generate(bbt, os, lr->lr_foid, -1ULL, lr->lr_gen, txg, txg); 1424219089Spjd dmu_buf_rele(db, FTAG); 1425219089Spjd 1426219089Spjd VERIFY3U(0, ==, zap_add(os, lr->lr_doid, name, sizeof (uint64_t), 1, 1427219089Spjd &lr->lr_foid, tx)); 1428219089Spjd 1429219089Spjd (void) ztest_log_create(zd, tx, lr); 1430219089Spjd 1431168404Spjd dmu_tx_commit(tx); 1432168404Spjd 1433219089Spjd return (0); 1434219089Spjd} 1435219089Spjd 1436219089Spjdstatic int 1437219089Spjdztest_replay_remove(ztest_ds_t *zd, lr_remove_t *lr, boolean_t byteswap) 1438219089Spjd{ 1439219089Spjd char *name = (void *)(lr + 1); /* name follows lr */ 1440219089Spjd objset_t *os = zd->zd_os; 1441219089Spjd dmu_object_info_t doi; 1442219089Spjd dmu_tx_t *tx; 1443219089Spjd uint64_t object, txg; 1444219089Spjd 1445219089Spjd if (byteswap) 1446219089Spjd byteswap_uint64_array(lr, sizeof (*lr)); 1447219089Spjd 1448219089Spjd ASSERT(lr->lr_doid == ZTEST_DIROBJ); 1449219089Spjd ASSERT(name[0] != '\0'); 1450219089Spjd 1451219089Spjd VERIFY3U(0, ==, 1452219089Spjd zap_lookup(os, lr->lr_doid, name, sizeof (object), 1, &object)); 1453219089Spjd ASSERT(object != 0); 1454219089Spjd 1455219089Spjd ztest_object_lock(zd, object, RL_WRITER); 1456219089Spjd 1457219089Spjd VERIFY3U(0, ==, dmu_object_info(os, object, &doi)); 1458219089Spjd 1459219089Spjd tx = dmu_tx_create(os); 1460219089Spjd 1461219089Spjd dmu_tx_hold_zap(tx, lr->lr_doid, B_FALSE, name); 1462219089Spjd dmu_tx_hold_free(tx, object, 0, DMU_OBJECT_END); 1463219089Spjd 1464219089Spjd txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); 1465219089Spjd if (txg == 0) { 1466219089Spjd ztest_object_unlock(zd, object); 1467219089Spjd return (ENOSPC); 1468168404Spjd } 1469168404Spjd 1470219089Spjd if (doi.doi_type == DMU_OT_ZAP_OTHER) { 1471219089Spjd VERIFY3U(0, ==, zap_destroy(os, object, tx)); 1472219089Spjd } else { 1473219089Spjd VERIFY3U(0, ==, dmu_object_free(os, object, tx)); 1474219089Spjd } 1475219089Spjd 1476219089Spjd VERIFY3U(0, ==, zap_remove(os, lr->lr_doid, name, tx)); 1477219089Spjd 1478219089Spjd (void) ztest_log_remove(zd, tx, lr, object); 1479219089Spjd 1480219089Spjd dmu_tx_commit(tx); 1481219089Spjd 1482219089Spjd ztest_object_unlock(zd, object); 1483219089Spjd 1484219089Spjd return (0); 1485168404Spjd} 1486168404Spjd 1487168404Spjdstatic int 1488219089Spjdztest_replay_write(ztest_ds_t *zd, lr_write_t *lr, boolean_t byteswap) 1489168404Spjd{ 1490219089Spjd objset_t *os = zd->zd_os; 1491219089Spjd void *data = lr + 1; /* data follows lr */ 1492219089Spjd uint64_t offset, length; 1493219089Spjd ztest_block_tag_t *bt = data; 1494219089Spjd ztest_block_tag_t *bbt; 1495219089Spjd uint64_t gen, txg, lrtxg, crtxg; 1496219089Spjd dmu_object_info_t doi; 1497168404Spjd dmu_tx_t *tx; 1498219089Spjd dmu_buf_t *db; 1499219089Spjd arc_buf_t *abuf = NULL; 1500219089Spjd rl_t *rl; 1501168404Spjd 1502168404Spjd if (byteswap) 1503168404Spjd byteswap_uint64_array(lr, sizeof (*lr)); 1504168404Spjd 1505219089Spjd offset = lr->lr_offset; 1506219089Spjd length = lr->lr_length; 1507219089Spjd 1508219089Spjd /* If it's a dmu_sync() block, write the whole block */ 1509219089Spjd if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) { 1510219089Spjd uint64_t blocksize = BP_GET_LSIZE(&lr->lr_blkptr); 1511219089Spjd if (length < blocksize) { 1512219089Spjd offset -= offset % blocksize; 1513219089Spjd length = blocksize; 1514219089Spjd } 1515219089Spjd } 1516219089Spjd 1517219089Spjd if (bt->bt_magic == BSWAP_64(BT_MAGIC)) 1518219089Spjd byteswap_uint64_array(bt, sizeof (*bt)); 1519219089Spjd 1520219089Spjd if (bt->bt_magic != BT_MAGIC) 1521219089Spjd bt = NULL; 1522219089Spjd 1523219089Spjd ztest_object_lock(zd, lr->lr_foid, RL_READER); 1524219089Spjd rl = ztest_range_lock(zd, lr->lr_foid, offset, length, RL_WRITER); 1525219089Spjd 1526219089Spjd VERIFY3U(0, ==, dmu_bonus_hold(os, lr->lr_foid, FTAG, &db)); 1527219089Spjd 1528219089Spjd dmu_object_info_from_db(db, &doi); 1529219089Spjd 1530219089Spjd bbt = ztest_bt_bonus(db); 1531219089Spjd ASSERT3U(bbt->bt_magic, ==, BT_MAGIC); 1532219089Spjd gen = bbt->bt_gen; 1533219089Spjd crtxg = bbt->bt_crtxg; 1534219089Spjd lrtxg = lr->lr_common.lrc_txg; 1535219089Spjd 1536168404Spjd tx = dmu_tx_create(os); 1537219089Spjd 1538219089Spjd dmu_tx_hold_write(tx, lr->lr_foid, offset, length); 1539219089Spjd 1540219089Spjd if (ztest_random(8) == 0 && length == doi.doi_data_block_size && 1541219089Spjd P2PHASE(offset, length) == 0) 1542219089Spjd abuf = dmu_request_arcbuf(db, length); 1543219089Spjd 1544219089Spjd txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); 1545219089Spjd if (txg == 0) { 1546219089Spjd if (abuf != NULL) 1547219089Spjd dmu_return_arcbuf(abuf); 1548219089Spjd dmu_buf_rele(db, FTAG); 1549219089Spjd ztest_range_unlock(rl); 1550219089Spjd ztest_object_unlock(zd, lr->lr_foid); 1551219089Spjd return (ENOSPC); 1552168404Spjd } 1553168404Spjd 1554219089Spjd if (bt != NULL) { 1555219089Spjd /* 1556219089Spjd * Usually, verify the old data before writing new data -- 1557219089Spjd * but not always, because we also want to verify correct 1558219089Spjd * behavior when the data was not recently read into cache. 1559219089Spjd */ 1560219089Spjd ASSERT(offset % doi.doi_data_block_size == 0); 1561219089Spjd if (ztest_random(4) != 0) { 1562219089Spjd int prefetch = ztest_random(2) ? 1563219089Spjd DMU_READ_PREFETCH : DMU_READ_NO_PREFETCH; 1564219089Spjd ztest_block_tag_t rbt; 1565219089Spjd 1566219089Spjd VERIFY(dmu_read(os, lr->lr_foid, offset, 1567219089Spjd sizeof (rbt), &rbt, prefetch) == 0); 1568219089Spjd if (rbt.bt_magic == BT_MAGIC) { 1569219089Spjd ztest_bt_verify(&rbt, os, lr->lr_foid, 1570219089Spjd offset, gen, txg, crtxg); 1571219089Spjd } 1572219089Spjd } 1573219089Spjd 1574219089Spjd /* 1575219089Spjd * Writes can appear to be newer than the bonus buffer because 1576219089Spjd * the ztest_get_data() callback does a dmu_read() of the 1577219089Spjd * open-context data, which may be different than the data 1578219089Spjd * as it was when the write was generated. 1579219089Spjd */ 1580219089Spjd if (zd->zd_zilog->zl_replay) { 1581219089Spjd ztest_bt_verify(bt, os, lr->lr_foid, offset, 1582219089Spjd MAX(gen, bt->bt_gen), MAX(txg, lrtxg), 1583219089Spjd bt->bt_crtxg); 1584219089Spjd } 1585219089Spjd 1586219089Spjd /* 1587219089Spjd * Set the bt's gen/txg to the bonus buffer's gen/txg 1588219089Spjd * so that all of the usual ASSERTs will work. 1589219089Spjd */ 1590219089Spjd ztest_bt_generate(bt, os, lr->lr_foid, offset, gen, txg, crtxg); 1591219089Spjd } 1592219089Spjd 1593219089Spjd if (abuf == NULL) { 1594219089Spjd dmu_write(os, lr->lr_foid, offset, length, data, tx); 1595219089Spjd } else { 1596219089Spjd bcopy(data, abuf->b_data, length); 1597219089Spjd dmu_assign_arcbuf(db, offset, abuf, tx); 1598219089Spjd } 1599219089Spjd 1600219089Spjd (void) ztest_log_write(zd, tx, lr); 1601219089Spjd 1602219089Spjd dmu_buf_rele(db, FTAG); 1603219089Spjd 1604168404Spjd dmu_tx_commit(tx); 1605168404Spjd 1606219089Spjd ztest_range_unlock(rl); 1607219089Spjd ztest_object_unlock(zd, lr->lr_foid); 1608219089Spjd 1609219089Spjd return (0); 1610168404Spjd} 1611168404Spjd 1612219089Spjdstatic int 1613219089Spjdztest_replay_truncate(ztest_ds_t *zd, lr_truncate_t *lr, boolean_t byteswap) 1614219089Spjd{ 1615219089Spjd objset_t *os = zd->zd_os; 1616219089Spjd dmu_tx_t *tx; 1617219089Spjd uint64_t txg; 1618219089Spjd rl_t *rl; 1619219089Spjd 1620219089Spjd if (byteswap) 1621219089Spjd byteswap_uint64_array(lr, sizeof (*lr)); 1622219089Spjd 1623219089Spjd ztest_object_lock(zd, lr->lr_foid, RL_READER); 1624219089Spjd rl = ztest_range_lock(zd, lr->lr_foid, lr->lr_offset, lr->lr_length, 1625219089Spjd RL_WRITER); 1626219089Spjd 1627219089Spjd tx = dmu_tx_create(os); 1628219089Spjd 1629219089Spjd dmu_tx_hold_free(tx, lr->lr_foid, lr->lr_offset, lr->lr_length); 1630219089Spjd 1631219089Spjd txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); 1632219089Spjd if (txg == 0) { 1633219089Spjd ztest_range_unlock(rl); 1634219089Spjd ztest_object_unlock(zd, lr->lr_foid); 1635219089Spjd return (ENOSPC); 1636219089Spjd } 1637219089Spjd 1638219089Spjd VERIFY(dmu_free_range(os, lr->lr_foid, lr->lr_offset, 1639219089Spjd lr->lr_length, tx) == 0); 1640219089Spjd 1641219089Spjd (void) ztest_log_truncate(zd, tx, lr); 1642219089Spjd 1643219089Spjd dmu_tx_commit(tx); 1644219089Spjd 1645219089Spjd ztest_range_unlock(rl); 1646219089Spjd ztest_object_unlock(zd, lr->lr_foid); 1647219089Spjd 1648219089Spjd return (0); 1649219089Spjd} 1650219089Spjd 1651219089Spjdstatic int 1652219089Spjdztest_replay_setattr(ztest_ds_t *zd, lr_setattr_t *lr, boolean_t byteswap) 1653219089Spjd{ 1654219089Spjd objset_t *os = zd->zd_os; 1655219089Spjd dmu_tx_t *tx; 1656219089Spjd dmu_buf_t *db; 1657219089Spjd ztest_block_tag_t *bbt; 1658219089Spjd uint64_t txg, lrtxg, crtxg; 1659219089Spjd 1660219089Spjd if (byteswap) 1661219089Spjd byteswap_uint64_array(lr, sizeof (*lr)); 1662219089Spjd 1663219089Spjd ztest_object_lock(zd, lr->lr_foid, RL_WRITER); 1664219089Spjd 1665219089Spjd VERIFY3U(0, ==, dmu_bonus_hold(os, lr->lr_foid, FTAG, &db)); 1666219089Spjd 1667219089Spjd tx = dmu_tx_create(os); 1668219089Spjd dmu_tx_hold_bonus(tx, lr->lr_foid); 1669219089Spjd 1670219089Spjd txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); 1671219089Spjd if (txg == 0) { 1672219089Spjd dmu_buf_rele(db, FTAG); 1673219089Spjd ztest_object_unlock(zd, lr->lr_foid); 1674219089Spjd return (ENOSPC); 1675219089Spjd } 1676219089Spjd 1677219089Spjd bbt = ztest_bt_bonus(db); 1678219089Spjd ASSERT3U(bbt->bt_magic, ==, BT_MAGIC); 1679219089Spjd crtxg = bbt->bt_crtxg; 1680219089Spjd lrtxg = lr->lr_common.lrc_txg; 1681219089Spjd 1682219089Spjd if (zd->zd_zilog->zl_replay) { 1683219089Spjd ASSERT(lr->lr_size != 0); 1684219089Spjd ASSERT(lr->lr_mode != 0); 1685219089Spjd ASSERT(lrtxg != 0); 1686219089Spjd } else { 1687219089Spjd /* 1688219089Spjd * Randomly change the size and increment the generation. 1689219089Spjd */ 1690219089Spjd lr->lr_size = (ztest_random(db->db_size / sizeof (*bbt)) + 1) * 1691219089Spjd sizeof (*bbt); 1692219089Spjd lr->lr_mode = bbt->bt_gen + 1; 1693219089Spjd ASSERT(lrtxg == 0); 1694219089Spjd } 1695219089Spjd 1696219089Spjd /* 1697219089Spjd * Verify that the current bonus buffer is not newer than our txg. 1698219089Spjd */ 1699219089Spjd ztest_bt_verify(bbt, os, lr->lr_foid, -1ULL, lr->lr_mode, 1700219089Spjd MAX(txg, lrtxg), crtxg); 1701219089Spjd 1702219089Spjd dmu_buf_will_dirty(db, tx); 1703219089Spjd 1704219089Spjd ASSERT3U(lr->lr_size, >=, sizeof (*bbt)); 1705219089Spjd ASSERT3U(lr->lr_size, <=, db->db_size); 1706219089Spjd VERIFY3U(dmu_set_bonus(db, lr->lr_size, tx), ==, 0); 1707219089Spjd bbt = ztest_bt_bonus(db); 1708219089Spjd 1709219089Spjd ztest_bt_generate(bbt, os, lr->lr_foid, -1ULL, lr->lr_mode, txg, crtxg); 1710219089Spjd 1711219089Spjd dmu_buf_rele(db, FTAG); 1712219089Spjd 1713219089Spjd (void) ztest_log_setattr(zd, tx, lr); 1714219089Spjd 1715219089Spjd dmu_tx_commit(tx); 1716219089Spjd 1717219089Spjd ztest_object_unlock(zd, lr->lr_foid); 1718219089Spjd 1719219089Spjd return (0); 1720219089Spjd} 1721219089Spjd 1722168404Spjdzil_replay_func_t *ztest_replay_vector[TX_MAX_TYPE] = { 1723168404Spjd NULL, /* 0 no such transaction type */ 1724168404Spjd ztest_replay_create, /* TX_CREATE */ 1725168404Spjd NULL, /* TX_MKDIR */ 1726168404Spjd NULL, /* TX_MKXATTR */ 1727168404Spjd NULL, /* TX_SYMLINK */ 1728168404Spjd ztest_replay_remove, /* TX_REMOVE */ 1729168404Spjd NULL, /* TX_RMDIR */ 1730168404Spjd NULL, /* TX_LINK */ 1731168404Spjd NULL, /* TX_RENAME */ 1732219089Spjd ztest_replay_write, /* TX_WRITE */ 1733219089Spjd ztest_replay_truncate, /* TX_TRUNCATE */ 1734219089Spjd ztest_replay_setattr, /* TX_SETATTR */ 1735168404Spjd NULL, /* TX_ACL */ 1736209962Smm NULL, /* TX_CREATE_ACL */ 1737209962Smm NULL, /* TX_CREATE_ATTR */ 1738209962Smm NULL, /* TX_CREATE_ACL_ATTR */ 1739209962Smm NULL, /* TX_MKDIR_ACL */ 1740209962Smm NULL, /* TX_MKDIR_ATTR */ 1741209962Smm NULL, /* TX_MKDIR_ACL_ATTR */ 1742209962Smm NULL, /* TX_WRITE2 */ 1743168404Spjd}; 1744168404Spjd 1745168404Spjd/* 1746219089Spjd * ZIL get_data callbacks 1747219089Spjd */ 1748219089Spjd 1749219089Spjdstatic void 1750219089Spjdztest_get_done(zgd_t *zgd, int error) 1751219089Spjd{ 1752219089Spjd ztest_ds_t *zd = zgd->zgd_private; 1753219089Spjd uint64_t object = zgd->zgd_rl->rl_object; 1754219089Spjd 1755219089Spjd if (zgd->zgd_db) 1756219089Spjd dmu_buf_rele(zgd->zgd_db, zgd); 1757219089Spjd 1758219089Spjd ztest_range_unlock(zgd->zgd_rl); 1759219089Spjd ztest_object_unlock(zd, object); 1760219089Spjd 1761219089Spjd if (error == 0 && zgd->zgd_bp) 1762219089Spjd zil_add_block(zgd->zgd_zilog, zgd->zgd_bp); 1763219089Spjd 1764219089Spjd umem_free(zgd, sizeof (*zgd)); 1765219089Spjd} 1766219089Spjd 1767219089Spjdstatic int 1768219089Spjdztest_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) 1769219089Spjd{ 1770219089Spjd ztest_ds_t *zd = arg; 1771219089Spjd objset_t *os = zd->zd_os; 1772219089Spjd uint64_t object = lr->lr_foid; 1773219089Spjd uint64_t offset = lr->lr_offset; 1774219089Spjd uint64_t size = lr->lr_length; 1775219089Spjd blkptr_t *bp = &lr->lr_blkptr; 1776219089Spjd uint64_t txg = lr->lr_common.lrc_txg; 1777219089Spjd uint64_t crtxg; 1778219089Spjd dmu_object_info_t doi; 1779219089Spjd dmu_buf_t *db; 1780219089Spjd zgd_t *zgd; 1781219089Spjd int error; 1782219089Spjd 1783219089Spjd ztest_object_lock(zd, object, RL_READER); 1784219089Spjd error = dmu_bonus_hold(os, object, FTAG, &db); 1785219089Spjd if (error) { 1786219089Spjd ztest_object_unlock(zd, object); 1787219089Spjd return (error); 1788219089Spjd } 1789219089Spjd 1790219089Spjd crtxg = ztest_bt_bonus(db)->bt_crtxg; 1791219089Spjd 1792219089Spjd if (crtxg == 0 || crtxg > txg) { 1793219089Spjd dmu_buf_rele(db, FTAG); 1794219089Spjd ztest_object_unlock(zd, object); 1795219089Spjd return (ENOENT); 1796219089Spjd } 1797219089Spjd 1798219089Spjd dmu_object_info_from_db(db, &doi); 1799219089Spjd dmu_buf_rele(db, FTAG); 1800219089Spjd db = NULL; 1801219089Spjd 1802219089Spjd zgd = umem_zalloc(sizeof (*zgd), UMEM_NOFAIL); 1803219089Spjd zgd->zgd_zilog = zd->zd_zilog; 1804219089Spjd zgd->zgd_private = zd; 1805219089Spjd 1806219089Spjd if (buf != NULL) { /* immediate write */ 1807219089Spjd zgd->zgd_rl = ztest_range_lock(zd, object, offset, size, 1808219089Spjd RL_READER); 1809219089Spjd 1810219089Spjd error = dmu_read(os, object, offset, size, buf, 1811219089Spjd DMU_READ_NO_PREFETCH); 1812219089Spjd ASSERT(error == 0); 1813219089Spjd } else { 1814219089Spjd size = doi.doi_data_block_size; 1815219089Spjd if (ISP2(size)) { 1816219089Spjd offset = P2ALIGN(offset, size); 1817219089Spjd } else { 1818219089Spjd ASSERT(offset < size); 1819219089Spjd offset = 0; 1820219089Spjd } 1821219089Spjd 1822219089Spjd zgd->zgd_rl = ztest_range_lock(zd, object, offset, size, 1823219089Spjd RL_READER); 1824219089Spjd 1825219089Spjd error = dmu_buf_hold(os, object, offset, zgd, &db, 1826219089Spjd DMU_READ_NO_PREFETCH); 1827219089Spjd 1828219089Spjd if (error == 0) { 1829219089Spjd zgd->zgd_db = db; 1830219089Spjd zgd->zgd_bp = bp; 1831219089Spjd 1832219089Spjd ASSERT(db->db_offset == offset); 1833219089Spjd ASSERT(db->db_size == size); 1834219089Spjd 1835219089Spjd error = dmu_sync(zio, lr->lr_common.lrc_txg, 1836219089Spjd ztest_get_done, zgd); 1837219089Spjd 1838219089Spjd if (error == 0) 1839219089Spjd return (0); 1840219089Spjd } 1841219089Spjd } 1842219089Spjd 1843219089Spjd ztest_get_done(zgd, error); 1844219089Spjd 1845219089Spjd return (error); 1846219089Spjd} 1847219089Spjd 1848219089Spjdstatic void * 1849219089Spjdztest_lr_alloc(size_t lrsize, char *name) 1850219089Spjd{ 1851219089Spjd char *lr; 1852219089Spjd size_t namesize = name ? strlen(name) + 1 : 0; 1853219089Spjd 1854219089Spjd lr = umem_zalloc(lrsize + namesize, UMEM_NOFAIL); 1855219089Spjd 1856219089Spjd if (name) 1857219089Spjd bcopy(name, lr + lrsize, namesize); 1858219089Spjd 1859219089Spjd return (lr); 1860219089Spjd} 1861219089Spjd 1862219089Spjdvoid 1863219089Spjdztest_lr_free(void *lr, size_t lrsize, char *name) 1864219089Spjd{ 1865219089Spjd size_t namesize = name ? strlen(name) + 1 : 0; 1866219089Spjd 1867219089Spjd umem_free(lr, lrsize + namesize); 1868219089Spjd} 1869219089Spjd 1870219089Spjd/* 1871219089Spjd * Lookup a bunch of objects. Returns the number of objects not found. 1872219089Spjd */ 1873219089Spjdstatic int 1874219089Spjdztest_lookup(ztest_ds_t *zd, ztest_od_t *od, int count) 1875219089Spjd{ 1876219089Spjd int missing = 0; 1877219089Spjd int error; 1878219089Spjd 1879219089Spjd ASSERT(_mutex_held(&zd->zd_dirobj_lock)); 1880219089Spjd 1881219089Spjd for (int i = 0; i < count; i++, od++) { 1882219089Spjd od->od_object = 0; 1883219089Spjd error = zap_lookup(zd->zd_os, od->od_dir, od->od_name, 1884219089Spjd sizeof (uint64_t), 1, &od->od_object); 1885219089Spjd if (error) { 1886219089Spjd ASSERT(error == ENOENT); 1887219089Spjd ASSERT(od->od_object == 0); 1888219089Spjd missing++; 1889219089Spjd } else { 1890219089Spjd dmu_buf_t *db; 1891219089Spjd ztest_block_tag_t *bbt; 1892219089Spjd dmu_object_info_t doi; 1893219089Spjd 1894219089Spjd ASSERT(od->od_object != 0); 1895219089Spjd ASSERT(missing == 0); /* there should be no gaps */ 1896219089Spjd 1897219089Spjd ztest_object_lock(zd, od->od_object, RL_READER); 1898219089Spjd VERIFY3U(0, ==, dmu_bonus_hold(zd->zd_os, 1899219089Spjd od->od_object, FTAG, &db)); 1900219089Spjd dmu_object_info_from_db(db, &doi); 1901219089Spjd bbt = ztest_bt_bonus(db); 1902219089Spjd ASSERT3U(bbt->bt_magic, ==, BT_MAGIC); 1903219089Spjd od->od_type = doi.doi_type; 1904219089Spjd od->od_blocksize = doi.doi_data_block_size; 1905219089Spjd od->od_gen = bbt->bt_gen; 1906219089Spjd dmu_buf_rele(db, FTAG); 1907219089Spjd ztest_object_unlock(zd, od->od_object); 1908219089Spjd } 1909219089Spjd } 1910219089Spjd 1911219089Spjd return (missing); 1912219089Spjd} 1913219089Spjd 1914219089Spjdstatic int 1915219089Spjdztest_create(ztest_ds_t *zd, ztest_od_t *od, int count) 1916219089Spjd{ 1917219089Spjd int missing = 0; 1918219089Spjd 1919219089Spjd ASSERT(_mutex_held(&zd->zd_dirobj_lock)); 1920219089Spjd 1921219089Spjd for (int i = 0; i < count; i++, od++) { 1922219089Spjd if (missing) { 1923219089Spjd od->od_object = 0; 1924219089Spjd missing++; 1925219089Spjd continue; 1926219089Spjd } 1927219089Spjd 1928219089Spjd lr_create_t *lr = ztest_lr_alloc(sizeof (*lr), od->od_name); 1929219089Spjd 1930219089Spjd lr->lr_doid = od->od_dir; 1931219089Spjd lr->lr_foid = 0; /* 0 to allocate, > 0 to claim */ 1932219089Spjd lr->lrz_type = od->od_crtype; 1933219089Spjd lr->lrz_blocksize = od->od_crblocksize; 1934219089Spjd lr->lrz_ibshift = ztest_random_ibshift(); 1935219089Spjd lr->lrz_bonustype = DMU_OT_UINT64_OTHER; 1936219089Spjd lr->lrz_bonuslen = dmu_bonus_max(); 1937219089Spjd lr->lr_gen = od->od_crgen; 1938219089Spjd lr->lr_crtime[0] = time(NULL); 1939219089Spjd 1940219089Spjd if (ztest_replay_create(zd, lr, B_FALSE) != 0) { 1941219089Spjd ASSERT(missing == 0); 1942219089Spjd od->od_object = 0; 1943219089Spjd missing++; 1944219089Spjd } else { 1945219089Spjd od->od_object = lr->lr_foid; 1946219089Spjd od->od_type = od->od_crtype; 1947219089Spjd od->od_blocksize = od->od_crblocksize; 1948219089Spjd od->od_gen = od->od_crgen; 1949219089Spjd ASSERT(od->od_object != 0); 1950219089Spjd } 1951219089Spjd 1952219089Spjd ztest_lr_free(lr, sizeof (*lr), od->od_name); 1953219089Spjd } 1954219089Spjd 1955219089Spjd return (missing); 1956219089Spjd} 1957219089Spjd 1958219089Spjdstatic int 1959219089Spjdztest_remove(ztest_ds_t *zd, ztest_od_t *od, int count) 1960219089Spjd{ 1961219089Spjd int missing = 0; 1962219089Spjd int error; 1963219089Spjd 1964219089Spjd ASSERT(_mutex_held(&zd->zd_dirobj_lock)); 1965219089Spjd 1966219089Spjd od += count - 1; 1967219089Spjd 1968219089Spjd for (int i = count - 1; i >= 0; i--, od--) { 1969219089Spjd if (missing) { 1970219089Spjd missing++; 1971219089Spjd continue; 1972219089Spjd } 1973219089Spjd 1974219089Spjd if (od->od_object == 0) 1975219089Spjd continue; 1976219089Spjd 1977219089Spjd lr_remove_t *lr = ztest_lr_alloc(sizeof (*lr), od->od_name); 1978219089Spjd 1979219089Spjd lr->lr_doid = od->od_dir; 1980219089Spjd 1981219089Spjd if ((error = ztest_replay_remove(zd, lr, B_FALSE)) != 0) { 1982219089Spjd ASSERT3U(error, ==, ENOSPC); 1983219089Spjd missing++; 1984219089Spjd } else { 1985219089Spjd od->od_object = 0; 1986219089Spjd } 1987219089Spjd ztest_lr_free(lr, sizeof (*lr), od->od_name); 1988219089Spjd } 1989219089Spjd 1990219089Spjd return (missing); 1991219089Spjd} 1992219089Spjd 1993219089Spjdstatic int 1994219089Spjdztest_write(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size, 1995219089Spjd void *data) 1996219089Spjd{ 1997219089Spjd lr_write_t *lr; 1998219089Spjd int error; 1999219089Spjd 2000219089Spjd lr = ztest_lr_alloc(sizeof (*lr) + size, NULL); 2001219089Spjd 2002219089Spjd lr->lr_foid = object; 2003219089Spjd lr->lr_offset = offset; 2004219089Spjd lr->lr_length = size; 2005219089Spjd lr->lr_blkoff = 0; 2006219089Spjd BP_ZERO(&lr->lr_blkptr); 2007219089Spjd 2008219089Spjd bcopy(data, lr + 1, size); 2009219089Spjd 2010219089Spjd error = ztest_replay_write(zd, lr, B_FALSE); 2011219089Spjd 2012219089Spjd ztest_lr_free(lr, sizeof (*lr) + size, NULL); 2013219089Spjd 2014219089Spjd return (error); 2015219089Spjd} 2016219089Spjd 2017219089Spjdstatic int 2018219089Spjdztest_truncate(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size) 2019219089Spjd{ 2020219089Spjd lr_truncate_t *lr; 2021219089Spjd int error; 2022219089Spjd 2023219089Spjd lr = ztest_lr_alloc(sizeof (*lr), NULL); 2024219089Spjd 2025219089Spjd lr->lr_foid = object; 2026219089Spjd lr->lr_offset = offset; 2027219089Spjd lr->lr_length = size; 2028219089Spjd 2029219089Spjd error = ztest_replay_truncate(zd, lr, B_FALSE); 2030219089Spjd 2031219089Spjd ztest_lr_free(lr, sizeof (*lr), NULL); 2032219089Spjd 2033219089Spjd return (error); 2034219089Spjd} 2035219089Spjd 2036219089Spjdstatic int 2037219089Spjdztest_setattr(ztest_ds_t *zd, uint64_t object) 2038219089Spjd{ 2039219089Spjd lr_setattr_t *lr; 2040219089Spjd int error; 2041219089Spjd 2042219089Spjd lr = ztest_lr_alloc(sizeof (*lr), NULL); 2043219089Spjd 2044219089Spjd lr->lr_foid = object; 2045219089Spjd lr->lr_size = 0; 2046219089Spjd lr->lr_mode = 0; 2047219089Spjd 2048219089Spjd error = ztest_replay_setattr(zd, lr, B_FALSE); 2049219089Spjd 2050219089Spjd ztest_lr_free(lr, sizeof (*lr), NULL); 2051219089Spjd 2052219089Spjd return (error); 2053219089Spjd} 2054219089Spjd 2055219089Spjdstatic void 2056219089Spjdztest_prealloc(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size) 2057219089Spjd{ 2058219089Spjd objset_t *os = zd->zd_os; 2059219089Spjd dmu_tx_t *tx; 2060219089Spjd uint64_t txg; 2061219089Spjd rl_t *rl; 2062219089Spjd 2063219089Spjd txg_wait_synced(dmu_objset_pool(os), 0); 2064219089Spjd 2065219089Spjd ztest_object_lock(zd, object, RL_READER); 2066219089Spjd rl = ztest_range_lock(zd, object, offset, size, RL_WRITER); 2067219089Spjd 2068219089Spjd tx = dmu_tx_create(os); 2069219089Spjd 2070219089Spjd dmu_tx_hold_write(tx, object, offset, size); 2071219089Spjd 2072219089Spjd txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); 2073219089Spjd 2074219089Spjd if (txg != 0) { 2075219089Spjd dmu_prealloc(os, object, offset, size, tx); 2076219089Spjd dmu_tx_commit(tx); 2077219089Spjd txg_wait_synced(dmu_objset_pool(os), txg); 2078219089Spjd } else { 2079219089Spjd (void) dmu_free_long_range(os, object, offset, size); 2080219089Spjd } 2081219089Spjd 2082219089Spjd ztest_range_unlock(rl); 2083219089Spjd ztest_object_unlock(zd, object); 2084219089Spjd} 2085219089Spjd 2086219089Spjdstatic void 2087219089Spjdztest_io(ztest_ds_t *zd, uint64_t object, uint64_t offset) 2088219089Spjd{ 2089219089Spjd ztest_block_tag_t wbt; 2090219089Spjd dmu_object_info_t doi; 2091219089Spjd enum ztest_io_type io_type; 2092219089Spjd uint64_t blocksize; 2093219089Spjd void *data; 2094219089Spjd 2095219089Spjd VERIFY(dmu_object_info(zd->zd_os, object, &doi) == 0); 2096219089Spjd blocksize = doi.doi_data_block_size; 2097219089Spjd data = umem_alloc(blocksize, UMEM_NOFAIL); 2098219089Spjd 2099219089Spjd /* 2100219089Spjd * Pick an i/o type at random, biased toward writing block tags. 2101219089Spjd */ 2102219089Spjd io_type = ztest_random(ZTEST_IO_TYPES); 2103219089Spjd if (ztest_random(2) == 0) 2104219089Spjd io_type = ZTEST_IO_WRITE_TAG; 2105219089Spjd 2106224526Smm (void) rw_rdlock(&zd->zd_zilog_lock); 2107224526Smm 2108219089Spjd switch (io_type) { 2109219089Spjd 2110219089Spjd case ZTEST_IO_WRITE_TAG: 2111219089Spjd ztest_bt_generate(&wbt, zd->zd_os, object, offset, 0, 0, 0); 2112219089Spjd (void) ztest_write(zd, object, offset, sizeof (wbt), &wbt); 2113219089Spjd break; 2114219089Spjd 2115219089Spjd case ZTEST_IO_WRITE_PATTERN: 2116219089Spjd (void) memset(data, 'a' + (object + offset) % 5, blocksize); 2117219089Spjd if (ztest_random(2) == 0) { 2118219089Spjd /* 2119219089Spjd * Induce fletcher2 collisions to ensure that 2120219089Spjd * zio_ddt_collision() detects and resolves them 2121219089Spjd * when using fletcher2-verify for deduplication. 2122219089Spjd */ 2123219089Spjd ((uint64_t *)data)[0] ^= 1ULL << 63; 2124219089Spjd ((uint64_t *)data)[4] ^= 1ULL << 63; 2125219089Spjd } 2126219089Spjd (void) ztest_write(zd, object, offset, blocksize, data); 2127219089Spjd break; 2128219089Spjd 2129219089Spjd case ZTEST_IO_WRITE_ZEROES: 2130219089Spjd bzero(data, blocksize); 2131219089Spjd (void) ztest_write(zd, object, offset, blocksize, data); 2132219089Spjd break; 2133219089Spjd 2134219089Spjd case ZTEST_IO_TRUNCATE: 2135219089Spjd (void) ztest_truncate(zd, object, offset, blocksize); 2136219089Spjd break; 2137219089Spjd 2138219089Spjd case ZTEST_IO_SETATTR: 2139219089Spjd (void) ztest_setattr(zd, object); 2140219089Spjd break; 2141219089Spjd } 2142219089Spjd 2143224526Smm (void) rw_unlock(&zd->zd_zilog_lock); 2144224526Smm 2145219089Spjd umem_free(data, blocksize); 2146219089Spjd} 2147219089Spjd 2148219089Spjd/* 2149219089Spjd * Initialize an object description template. 2150219089Spjd */ 2151219089Spjdstatic void 2152219089Spjdztest_od_init(ztest_od_t *od, uint64_t id, char *tag, uint64_t index, 2153219089Spjd dmu_object_type_t type, uint64_t blocksize, uint64_t gen) 2154219089Spjd{ 2155219089Spjd od->od_dir = ZTEST_DIROBJ; 2156219089Spjd od->od_object = 0; 2157219089Spjd 2158219089Spjd od->od_crtype = type; 2159219089Spjd od->od_crblocksize = blocksize ? blocksize : ztest_random_blocksize(); 2160219089Spjd od->od_crgen = gen; 2161219089Spjd 2162219089Spjd od->od_type = DMU_OT_NONE; 2163219089Spjd od->od_blocksize = 0; 2164219089Spjd od->od_gen = 0; 2165219089Spjd 2166219089Spjd (void) snprintf(od->od_name, sizeof (od->od_name), "%s(%lld)[%llu]", 2167219089Spjd tag, (int64_t)id, index); 2168219089Spjd} 2169219089Spjd 2170219089Spjd/* 2171219089Spjd * Lookup or create the objects for a test using the od template. 2172219089Spjd * If the objects do not all exist, or if 'remove' is specified, 2173219089Spjd * remove any existing objects and create new ones. Otherwise, 2174219089Spjd * use the existing objects. 2175219089Spjd */ 2176219089Spjdstatic int 2177219089Spjdztest_object_init(ztest_ds_t *zd, ztest_od_t *od, size_t size, boolean_t remove) 2178219089Spjd{ 2179219089Spjd int count = size / sizeof (*od); 2180219089Spjd int rv = 0; 2181219089Spjd 2182219089Spjd VERIFY(mutex_lock(&zd->zd_dirobj_lock) == 0); 2183219089Spjd if ((ztest_lookup(zd, od, count) != 0 || remove) && 2184219089Spjd (ztest_remove(zd, od, count) != 0 || 2185219089Spjd ztest_create(zd, od, count) != 0)) 2186219089Spjd rv = -1; 2187219089Spjd zd->zd_od = od; 2188219089Spjd VERIFY(mutex_unlock(&zd->zd_dirobj_lock) == 0); 2189219089Spjd 2190219089Spjd return (rv); 2191219089Spjd} 2192219089Spjd 2193219089Spjd/* ARGSUSED */ 2194219089Spjdvoid 2195219089Spjdztest_zil_commit(ztest_ds_t *zd, uint64_t id) 2196219089Spjd{ 2197219089Spjd zilog_t *zilog = zd->zd_zilog; 2198219089Spjd 2199224526Smm (void) rw_rdlock(&zd->zd_zilog_lock); 2200224526Smm 2201219089Spjd zil_commit(zilog, ztest_random(ZTEST_OBJECTS)); 2202219089Spjd 2203219089Spjd /* 2204219089Spjd * Remember the committed values in zd, which is in parent/child 2205219089Spjd * shared memory. If we die, the next iteration of ztest_run() 2206219089Spjd * will verify that the log really does contain this record. 2207219089Spjd */ 2208219089Spjd mutex_enter(&zilog->zl_lock); 2209236143Smm ASSERT(zd->zd_shared != NULL); 2210236143Smm ASSERT3U(zd->zd_shared->zd_seq, <=, zilog->zl_commit_lr_seq); 2211236143Smm zd->zd_shared->zd_seq = zilog->zl_commit_lr_seq; 2212219089Spjd mutex_exit(&zilog->zl_lock); 2213224526Smm 2214224526Smm (void) rw_unlock(&zd->zd_zilog_lock); 2215219089Spjd} 2216219089Spjd 2217219089Spjd/* 2218224526Smm * This function is designed to simulate the operations that occur during a 2219224526Smm * mount/unmount operation. We hold the dataset across these operations in an 2220224526Smm * attempt to expose any implicit assumptions about ZIL management. 2221224526Smm */ 2222224526Smm/* ARGSUSED */ 2223224526Smmvoid 2224224526Smmztest_zil_remount(ztest_ds_t *zd, uint64_t id) 2225224526Smm{ 2226224526Smm objset_t *os = zd->zd_os; 2227224526Smm 2228224526Smm (void) rw_wrlock(&zd->zd_zilog_lock); 2229224526Smm 2230224526Smm /* zfsvfs_teardown() */ 2231224526Smm zil_close(zd->zd_zilog); 2232224526Smm 2233224526Smm /* zfsvfs_setup() */ 2234224526Smm VERIFY(zil_open(os, ztest_get_data) == zd->zd_zilog); 2235224526Smm zil_replay(os, zd, ztest_replay_vector); 2236224526Smm 2237224526Smm (void) rw_unlock(&zd->zd_zilog_lock); 2238224526Smm} 2239224526Smm 2240224526Smm/* 2241168404Spjd * Verify that we can't destroy an active pool, create an existing pool, 2242168404Spjd * or create a pool with a bad vdev spec. 2243168404Spjd */ 2244219089Spjd/* ARGSUSED */ 2245168404Spjdvoid 2246219089Spjdztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id) 2247168404Spjd{ 2248236143Smm ztest_shared_opts_t *zo = &ztest_opts; 2249168404Spjd spa_t *spa; 2250168404Spjd nvlist_t *nvroot; 2251168404Spjd 2252168404Spjd /* 2253168404Spjd * Attempt to create using a bad file. 2254168404Spjd */ 2255185029Spjd nvroot = make_vdev_root("/dev/bogus", NULL, 0, 0, 0, 0, 0, 1); 2256219089Spjd VERIFY3U(ENOENT, ==, 2257219089Spjd spa_create("ztest_bad_file", nvroot, NULL, NULL, NULL)); 2258168404Spjd nvlist_free(nvroot); 2259168404Spjd 2260168404Spjd /* 2261168404Spjd * Attempt to create using a bad mirror. 2262168404Spjd */ 2263185029Spjd nvroot = make_vdev_root("/dev/bogus", NULL, 0, 0, 0, 0, 2, 1); 2264219089Spjd VERIFY3U(ENOENT, ==, 2265219089Spjd spa_create("ztest_bad_mirror", nvroot, NULL, NULL, NULL)); 2266168404Spjd nvlist_free(nvroot); 2267168404Spjd 2268168404Spjd /* 2269168404Spjd * Attempt to create an existing pool. It shouldn't matter 2270168404Spjd * what's in the nvroot; we should fail with EEXIST. 2271168404Spjd */ 2272236143Smm (void) rw_rdlock(&ztest_name_lock); 2273185029Spjd nvroot = make_vdev_root("/dev/bogus", NULL, 0, 0, 0, 0, 0, 1); 2274236143Smm VERIFY3U(EEXIST, ==, spa_create(zo->zo_pool, nvroot, NULL, NULL, NULL)); 2275168404Spjd nvlist_free(nvroot); 2276236143Smm VERIFY3U(0, ==, spa_open(zo->zo_pool, &spa, FTAG)); 2277236143Smm VERIFY3U(EBUSY, ==, spa_destroy(zo->zo_pool)); 2278219089Spjd spa_close(spa, FTAG); 2279168404Spjd 2280236143Smm (void) rw_unlock(&ztest_name_lock); 2281168404Spjd} 2282168404Spjd 2283185029Spjdstatic vdev_t * 2284185029Spjdvdev_lookup_by_path(vdev_t *vd, const char *path) 2285185029Spjd{ 2286185029Spjd vdev_t *mvd; 2287185029Spjd 2288185029Spjd if (vd->vdev_path != NULL && strcmp(path, vd->vdev_path) == 0) 2289185029Spjd return (vd); 2290185029Spjd 2291185029Spjd for (int c = 0; c < vd->vdev_children; c++) 2292185029Spjd if ((mvd = vdev_lookup_by_path(vd->vdev_child[c], path)) != 2293185029Spjd NULL) 2294185029Spjd return (mvd); 2295185029Spjd 2296185029Spjd return (NULL); 2297185029Spjd} 2298185029Spjd 2299168404Spjd/* 2300219089Spjd * Find the first available hole which can be used as a top-level. 2301219089Spjd */ 2302219089Spjdint 2303219089Spjdfind_vdev_hole(spa_t *spa) 2304219089Spjd{ 2305219089Spjd vdev_t *rvd = spa->spa_root_vdev; 2306219089Spjd int c; 2307219089Spjd 2308219089Spjd ASSERT(spa_config_held(spa, SCL_VDEV, RW_READER) == SCL_VDEV); 2309219089Spjd 2310219089Spjd for (c = 0; c < rvd->vdev_children; c++) { 2311219089Spjd vdev_t *cvd = rvd->vdev_child[c]; 2312219089Spjd 2313219089Spjd if (cvd->vdev_ishole) 2314219089Spjd break; 2315219089Spjd } 2316219089Spjd return (c); 2317219089Spjd} 2318219089Spjd 2319219089Spjd/* 2320168404Spjd * Verify that vdev_add() works as expected. 2321168404Spjd */ 2322219089Spjd/* ARGSUSED */ 2323168404Spjdvoid 2324219089Spjdztest_vdev_add_remove(ztest_ds_t *zd, uint64_t id) 2325168404Spjd{ 2326219089Spjd ztest_shared_t *zs = ztest_shared; 2327236143Smm spa_t *spa = ztest_spa; 2328219089Spjd uint64_t leaves; 2329219089Spjd uint64_t guid; 2330168404Spjd nvlist_t *nvroot; 2331168404Spjd int error; 2332168404Spjd 2333236143Smm VERIFY(mutex_lock(&ztest_vdev_lock) == 0); 2334236143Smm leaves = 2335236143Smm MAX(zs->zs_mirrors + zs->zs_splits, 1) * ztest_opts.zo_raidz; 2336168404Spjd 2337185029Spjd spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); 2338168404Spjd 2339219089Spjd ztest_shared->zs_vdev_next_leaf = find_vdev_hole(spa) * leaves; 2340168404Spjd 2341185029Spjd /* 2342219089Spjd * If we have slogs then remove them 1/4 of the time. 2343185029Spjd */ 2344219089Spjd if (spa_has_slogs(spa) && ztest_random(4) == 0) { 2345219089Spjd /* 2346219089Spjd * Grab the guid from the head of the log class rotor. 2347219089Spjd */ 2348219089Spjd guid = spa_log_class(spa)->mc_rotor->mg_vd->vdev_guid; 2349185029Spjd 2350219089Spjd spa_config_exit(spa, SCL_VDEV, FTAG); 2351168404Spjd 2352219089Spjd /* 2353219089Spjd * We have to grab the zs_name_lock as writer to 2354219089Spjd * prevent a race between removing a slog (dmu_objset_find) 2355219089Spjd * and destroying a dataset. Removing the slog will 2356219089Spjd * grab a reference on the dataset which may cause 2357219089Spjd * dmu_objset_destroy() to fail with EBUSY thus 2358219089Spjd * leaving the dataset in an inconsistent state. 2359219089Spjd */ 2360236143Smm VERIFY(rw_wrlock(&ztest_name_lock) == 0); 2361219089Spjd error = spa_vdev_remove(spa, guid, B_FALSE); 2362236143Smm VERIFY(rw_unlock(&ztest_name_lock) == 0); 2363168404Spjd 2364219089Spjd if (error && error != EEXIST) 2365219089Spjd fatal(0, "spa_vdev_remove() = %d", error); 2366219089Spjd } else { 2367219089Spjd spa_config_exit(spa, SCL_VDEV, FTAG); 2368219089Spjd 2369219089Spjd /* 2370219089Spjd * Make 1/4 of the devices be log devices. 2371219089Spjd */ 2372236143Smm nvroot = make_vdev_root(NULL, NULL, 2373236143Smm ztest_opts.zo_vdev_size, 0, 2374236143Smm ztest_random(4) == 0, ztest_opts.zo_raidz, 2375236143Smm zs->zs_mirrors, 1); 2376219089Spjd 2377219089Spjd error = spa_vdev_add(spa, nvroot); 2378219089Spjd nvlist_free(nvroot); 2379219089Spjd 2380219089Spjd if (error == ENOSPC) 2381219089Spjd ztest_record_enospc("spa_vdev_add"); 2382219089Spjd else if (error != 0) 2383219089Spjd fatal(0, "spa_vdev_add() = %d", error); 2384219089Spjd } 2385219089Spjd 2386236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 2387168404Spjd} 2388168404Spjd 2389185029Spjd/* 2390185029Spjd * Verify that adding/removing aux devices (l2arc, hot spare) works as expected. 2391185029Spjd */ 2392219089Spjd/* ARGSUSED */ 2393185029Spjdvoid 2394219089Spjdztest_vdev_aux_add_remove(ztest_ds_t *zd, uint64_t id) 2395168404Spjd{ 2396219089Spjd ztest_shared_t *zs = ztest_shared; 2397236143Smm spa_t *spa = ztest_spa; 2398185029Spjd vdev_t *rvd = spa->spa_root_vdev; 2399185029Spjd spa_aux_vdev_t *sav; 2400185029Spjd char *aux; 2401185029Spjd uint64_t guid = 0; 2402185029Spjd int error; 2403168404Spjd 2404185029Spjd if (ztest_random(2) == 0) { 2405185029Spjd sav = &spa->spa_spares; 2406185029Spjd aux = ZPOOL_CONFIG_SPARES; 2407185029Spjd } else { 2408185029Spjd sav = &spa->spa_l2cache; 2409185029Spjd aux = ZPOOL_CONFIG_L2CACHE; 2410185029Spjd } 2411185029Spjd 2412236143Smm VERIFY(mutex_lock(&ztest_vdev_lock) == 0); 2413185029Spjd 2414185029Spjd spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); 2415185029Spjd 2416185029Spjd if (sav->sav_count != 0 && ztest_random(4) == 0) { 2417185029Spjd /* 2418185029Spjd * Pick a random device to remove. 2419185029Spjd */ 2420185029Spjd guid = sav->sav_vdevs[ztest_random(sav->sav_count)]->vdev_guid; 2421185029Spjd } else { 2422185029Spjd /* 2423185029Spjd * Find an unused device we can add. 2424185029Spjd */ 2425219089Spjd zs->zs_vdev_aux = 0; 2426185029Spjd for (;;) { 2427185029Spjd char path[MAXPATHLEN]; 2428185029Spjd int c; 2429236143Smm (void) snprintf(path, sizeof (path), ztest_aux_template, 2430236143Smm ztest_opts.zo_dir, ztest_opts.zo_pool, aux, 2431236143Smm zs->zs_vdev_aux); 2432185029Spjd for (c = 0; c < sav->sav_count; c++) 2433185029Spjd if (strcmp(sav->sav_vdevs[c]->vdev_path, 2434185029Spjd path) == 0) 2435185029Spjd break; 2436185029Spjd if (c == sav->sav_count && 2437185029Spjd vdev_lookup_by_path(rvd, path) == NULL) 2438185029Spjd break; 2439219089Spjd zs->zs_vdev_aux++; 2440168404Spjd } 2441168404Spjd } 2442168404Spjd 2443185029Spjd spa_config_exit(spa, SCL_VDEV, FTAG); 2444168404Spjd 2445185029Spjd if (guid == 0) { 2446185029Spjd /* 2447185029Spjd * Add a new device. 2448185029Spjd */ 2449185029Spjd nvlist_t *nvroot = make_vdev_root(NULL, aux, 2450236143Smm (ztest_opts.zo_vdev_size * 5) / 4, 0, 0, 0, 0, 1); 2451185029Spjd error = spa_vdev_add(spa, nvroot); 2452185029Spjd if (error != 0) 2453185029Spjd fatal(0, "spa_vdev_add(%p) = %d", nvroot, error); 2454185029Spjd nvlist_free(nvroot); 2455185029Spjd } else { 2456185029Spjd /* 2457185029Spjd * Remove an existing device. Sometimes, dirty its 2458185029Spjd * vdev state first to make sure we handle removal 2459185029Spjd * of devices that have pending state changes. 2460185029Spjd */ 2461185029Spjd if (ztest_random(2) == 0) 2462219089Spjd (void) vdev_online(spa, guid, 0, NULL); 2463185029Spjd 2464185029Spjd error = spa_vdev_remove(spa, guid, B_FALSE); 2465185029Spjd if (error != 0 && error != EBUSY) 2466185029Spjd fatal(0, "spa_vdev_remove(%llu) = %d", guid, error); 2467185029Spjd } 2468185029Spjd 2469236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 2470168404Spjd} 2471168404Spjd 2472168404Spjd/* 2473219089Spjd * split a pool if it has mirror tlvdevs 2474219089Spjd */ 2475219089Spjd/* ARGSUSED */ 2476219089Spjdvoid 2477219089Spjdztest_split_pool(ztest_ds_t *zd, uint64_t id) 2478219089Spjd{ 2479219089Spjd ztest_shared_t *zs = ztest_shared; 2480236143Smm spa_t *spa = ztest_spa; 2481219089Spjd vdev_t *rvd = spa->spa_root_vdev; 2482219089Spjd nvlist_t *tree, **child, *config, *split, **schild; 2483219089Spjd uint_t c, children, schildren = 0, lastlogid = 0; 2484219089Spjd int error = 0; 2485219089Spjd 2486236143Smm VERIFY(mutex_lock(&ztest_vdev_lock) == 0); 2487219089Spjd 2488219089Spjd /* ensure we have a useable config; mirrors of raidz aren't supported */ 2489236143Smm if (zs->zs_mirrors < 3 || ztest_opts.zo_raidz > 1) { 2490236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 2491219089Spjd return; 2492219089Spjd } 2493219089Spjd 2494219089Spjd /* clean up the old pool, if any */ 2495219089Spjd (void) spa_destroy("splitp"); 2496219089Spjd 2497219089Spjd spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); 2498219089Spjd 2499219089Spjd /* generate a config from the existing config */ 2500219089Spjd mutex_enter(&spa->spa_props_lock); 2501219089Spjd VERIFY(nvlist_lookup_nvlist(spa->spa_config, ZPOOL_CONFIG_VDEV_TREE, 2502219089Spjd &tree) == 0); 2503219089Spjd mutex_exit(&spa->spa_props_lock); 2504219089Spjd 2505219089Spjd VERIFY(nvlist_lookup_nvlist_array(tree, ZPOOL_CONFIG_CHILDREN, &child, 2506219089Spjd &children) == 0); 2507219089Spjd 2508219089Spjd schild = malloc(rvd->vdev_children * sizeof (nvlist_t *)); 2509219089Spjd for (c = 0; c < children; c++) { 2510219089Spjd vdev_t *tvd = rvd->vdev_child[c]; 2511219089Spjd nvlist_t **mchild; 2512219089Spjd uint_t mchildren; 2513219089Spjd 2514219089Spjd if (tvd->vdev_islog || tvd->vdev_ops == &vdev_hole_ops) { 2515219089Spjd VERIFY(nvlist_alloc(&schild[schildren], NV_UNIQUE_NAME, 2516219089Spjd 0) == 0); 2517219089Spjd VERIFY(nvlist_add_string(schild[schildren], 2518219089Spjd ZPOOL_CONFIG_TYPE, VDEV_TYPE_HOLE) == 0); 2519219089Spjd VERIFY(nvlist_add_uint64(schild[schildren], 2520219089Spjd ZPOOL_CONFIG_IS_HOLE, 1) == 0); 2521219089Spjd if (lastlogid == 0) 2522219089Spjd lastlogid = schildren; 2523219089Spjd ++schildren; 2524219089Spjd continue; 2525219089Spjd } 2526219089Spjd lastlogid = 0; 2527219089Spjd VERIFY(nvlist_lookup_nvlist_array(child[c], 2528219089Spjd ZPOOL_CONFIG_CHILDREN, &mchild, &mchildren) == 0); 2529219089Spjd VERIFY(nvlist_dup(mchild[0], &schild[schildren++], 0) == 0); 2530219089Spjd } 2531219089Spjd 2532219089Spjd /* OK, create a config that can be used to split */ 2533219089Spjd VERIFY(nvlist_alloc(&split, NV_UNIQUE_NAME, 0) == 0); 2534219089Spjd VERIFY(nvlist_add_string(split, ZPOOL_CONFIG_TYPE, 2535219089Spjd VDEV_TYPE_ROOT) == 0); 2536219089Spjd VERIFY(nvlist_add_nvlist_array(split, ZPOOL_CONFIG_CHILDREN, schild, 2537219089Spjd lastlogid != 0 ? lastlogid : schildren) == 0); 2538219089Spjd 2539219089Spjd VERIFY(nvlist_alloc(&config, NV_UNIQUE_NAME, 0) == 0); 2540219089Spjd VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, split) == 0); 2541219089Spjd 2542219089Spjd for (c = 0; c < schildren; c++) 2543219089Spjd nvlist_free(schild[c]); 2544219089Spjd free(schild); 2545219089Spjd nvlist_free(split); 2546219089Spjd 2547219089Spjd spa_config_exit(spa, SCL_VDEV, FTAG); 2548219089Spjd 2549236143Smm (void) rw_wrlock(&ztest_name_lock); 2550219089Spjd error = spa_vdev_split_mirror(spa, "splitp", config, NULL, B_FALSE); 2551236143Smm (void) rw_unlock(&ztest_name_lock); 2552219089Spjd 2553219089Spjd nvlist_free(config); 2554219089Spjd 2555219089Spjd if (error == 0) { 2556219089Spjd (void) printf("successful split - results:\n"); 2557219089Spjd mutex_enter(&spa_namespace_lock); 2558219089Spjd show_pool_stats(spa); 2559219089Spjd show_pool_stats(spa_lookup("splitp")); 2560219089Spjd mutex_exit(&spa_namespace_lock); 2561219089Spjd ++zs->zs_splits; 2562219089Spjd --zs->zs_mirrors; 2563219089Spjd } 2564236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 2565219089Spjd 2566219089Spjd} 2567219089Spjd 2568219089Spjd/* 2569168404Spjd * Verify that we can attach and detach devices. 2570168404Spjd */ 2571219089Spjd/* ARGSUSED */ 2572168404Spjdvoid 2573219089Spjdztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id) 2574168404Spjd{ 2575219089Spjd ztest_shared_t *zs = ztest_shared; 2576236143Smm spa_t *spa = ztest_spa; 2577185029Spjd spa_aux_vdev_t *sav = &spa->spa_spares; 2578168404Spjd vdev_t *rvd = spa->spa_root_vdev; 2579168404Spjd vdev_t *oldvd, *newvd, *pvd; 2580185029Spjd nvlist_t *root; 2581219089Spjd uint64_t leaves; 2582168404Spjd uint64_t leaf, top; 2583168404Spjd uint64_t ashift = ztest_get_ashift(); 2584209962Smm uint64_t oldguid, pguid; 2585168404Spjd size_t oldsize, newsize; 2586168404Spjd char oldpath[MAXPATHLEN], newpath[MAXPATHLEN]; 2587168404Spjd int replacing; 2588185029Spjd int oldvd_has_siblings = B_FALSE; 2589185029Spjd int newvd_is_spare = B_FALSE; 2590185029Spjd int oldvd_is_log; 2591168404Spjd int error, expected_error; 2592168404Spjd 2593236143Smm VERIFY(mutex_lock(&ztest_vdev_lock) == 0); 2594236143Smm leaves = MAX(zs->zs_mirrors, 1) * ztest_opts.zo_raidz; 2595168404Spjd 2596185029Spjd spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); 2597168404Spjd 2598168404Spjd /* 2599168404Spjd * Decide whether to do an attach or a replace. 2600168404Spjd */ 2601168404Spjd replacing = ztest_random(2); 2602168404Spjd 2603168404Spjd /* 2604168404Spjd * Pick a random top-level vdev. 2605168404Spjd */ 2606219089Spjd top = ztest_random_vdev_top(spa, B_TRUE); 2607168404Spjd 2608168404Spjd /* 2609168404Spjd * Pick a random leaf within it. 2610168404Spjd */ 2611168404Spjd leaf = ztest_random(leaves); 2612168404Spjd 2613168404Spjd /* 2614185029Spjd * Locate this vdev. 2615168404Spjd */ 2616185029Spjd oldvd = rvd->vdev_child[top]; 2617219089Spjd if (zs->zs_mirrors >= 1) { 2618209962Smm ASSERT(oldvd->vdev_ops == &vdev_mirror_ops); 2619219089Spjd ASSERT(oldvd->vdev_children >= zs->zs_mirrors); 2620236143Smm oldvd = oldvd->vdev_child[leaf / ztest_opts.zo_raidz]; 2621209962Smm } 2622236143Smm if (ztest_opts.zo_raidz > 1) { 2623209962Smm ASSERT(oldvd->vdev_ops == &vdev_raidz_ops); 2624236143Smm ASSERT(oldvd->vdev_children == ztest_opts.zo_raidz); 2625236143Smm oldvd = oldvd->vdev_child[leaf % ztest_opts.zo_raidz]; 2626209962Smm } 2627168404Spjd 2628168404Spjd /* 2629185029Spjd * If we're already doing an attach or replace, oldvd may be a 2630185029Spjd * mirror vdev -- in which case, pick a random child. 2631168404Spjd */ 2632185029Spjd while (oldvd->vdev_children != 0) { 2633185029Spjd oldvd_has_siblings = B_TRUE; 2634209962Smm ASSERT(oldvd->vdev_children >= 2); 2635209962Smm oldvd = oldvd->vdev_child[ztest_random(oldvd->vdev_children)]; 2636185029Spjd } 2637168404Spjd 2638185029Spjd oldguid = oldvd->vdev_guid; 2639219089Spjd oldsize = vdev_get_min_asize(oldvd); 2640185029Spjd oldvd_is_log = oldvd->vdev_top->vdev_islog; 2641185029Spjd (void) strcpy(oldpath, oldvd->vdev_path); 2642185029Spjd pvd = oldvd->vdev_parent; 2643209962Smm pguid = pvd->vdev_guid; 2644185029Spjd 2645168404Spjd /* 2646185029Spjd * If oldvd has siblings, then half of the time, detach it. 2647168404Spjd */ 2648185029Spjd if (oldvd_has_siblings && ztest_random(2) == 0) { 2649185029Spjd spa_config_exit(spa, SCL_VDEV, FTAG); 2650209962Smm error = spa_vdev_detach(spa, oldguid, pguid, B_FALSE); 2651209962Smm if (error != 0 && error != ENODEV && error != EBUSY && 2652209962Smm error != ENOTSUP) 2653209962Smm fatal(0, "detach (%s) returned %d", oldpath, error); 2654236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 2655185029Spjd return; 2656185029Spjd } 2657168404Spjd 2658168404Spjd /* 2659185029Spjd * For the new vdev, choose with equal probability between the two 2660185029Spjd * standard paths (ending in either 'a' or 'b') or a random hot spare. 2661168404Spjd */ 2662185029Spjd if (sav->sav_count != 0 && ztest_random(3) == 0) { 2663185029Spjd newvd = sav->sav_vdevs[ztest_random(sav->sav_count)]; 2664185029Spjd newvd_is_spare = B_TRUE; 2665185029Spjd (void) strcpy(newpath, newvd->vdev_path); 2666185029Spjd } else { 2667185029Spjd (void) snprintf(newpath, sizeof (newpath), ztest_dev_template, 2668236143Smm ztest_opts.zo_dir, ztest_opts.zo_pool, 2669236143Smm top * leaves + leaf); 2670185029Spjd if (ztest_random(2) == 0) 2671185029Spjd newpath[strlen(newpath) - 1] = 'b'; 2672185029Spjd newvd = vdev_lookup_by_path(rvd, newpath); 2673185029Spjd } 2674168404Spjd 2675185029Spjd if (newvd) { 2676219089Spjd newsize = vdev_get_min_asize(newvd); 2677185029Spjd } else { 2678185029Spjd /* 2679185029Spjd * Make newsize a little bigger or smaller than oldsize. 2680185029Spjd * If it's smaller, the attach should fail. 2681185029Spjd * If it's larger, and we're doing a replace, 2682185029Spjd * we should get dynamic LUN growth when we're done. 2683185029Spjd */ 2684185029Spjd newsize = 10 * oldsize / (9 + ztest_random(3)); 2685185029Spjd } 2686185029Spjd 2687168404Spjd /* 2688168404Spjd * If pvd is not a mirror or root, the attach should fail with ENOTSUP, 2689168404Spjd * unless it's a replace; in that case any non-replacing parent is OK. 2690168404Spjd * 2691168404Spjd * If newvd is already part of the pool, it should fail with EBUSY. 2692168404Spjd * 2693168404Spjd * If newvd is too small, it should fail with EOVERFLOW. 2694168404Spjd */ 2695185029Spjd if (pvd->vdev_ops != &vdev_mirror_ops && 2696185029Spjd pvd->vdev_ops != &vdev_root_ops && (!replacing || 2697185029Spjd pvd->vdev_ops == &vdev_replacing_ops || 2698185029Spjd pvd->vdev_ops == &vdev_spare_ops)) 2699185029Spjd expected_error = ENOTSUP; 2700185029Spjd else if (newvd_is_spare && (!replacing || oldvd_is_log)) 2701185029Spjd expected_error = ENOTSUP; 2702185029Spjd else if (newvd == oldvd) 2703185029Spjd expected_error = replacing ? 0 : EBUSY; 2704185029Spjd else if (vdev_lookup_by_path(rvd, newpath) != NULL) 2705168404Spjd expected_error = EBUSY; 2706168404Spjd else if (newsize < oldsize) 2707168404Spjd expected_error = EOVERFLOW; 2708168404Spjd else if (ashift > oldvd->vdev_top->vdev_ashift) 2709168404Spjd expected_error = EDOM; 2710168404Spjd else 2711168404Spjd expected_error = 0; 2712168404Spjd 2713185029Spjd spa_config_exit(spa, SCL_VDEV, FTAG); 2714168404Spjd 2715168404Spjd /* 2716168404Spjd * Build the nvlist describing newpath. 2717168404Spjd */ 2718185029Spjd root = make_vdev_root(newpath, NULL, newvd == NULL ? newsize : 0, 2719185029Spjd ashift, 0, 0, 0, 1); 2720168404Spjd 2721185029Spjd error = spa_vdev_attach(spa, oldguid, root, replacing); 2722168404Spjd 2723168404Spjd nvlist_free(root); 2724168404Spjd 2725168404Spjd /* 2726168404Spjd * If our parent was the replacing vdev, but the replace completed, 2727168404Spjd * then instead of failing with ENOTSUP we may either succeed, 2728168404Spjd * fail with ENODEV, or fail with EOVERFLOW. 2729168404Spjd */ 2730168404Spjd if (expected_error == ENOTSUP && 2731168404Spjd (error == 0 || error == ENODEV || error == EOVERFLOW)) 2732168404Spjd expected_error = error; 2733168404Spjd 2734168404Spjd /* 2735168404Spjd * If someone grew the LUN, the replacement may be too small. 2736168404Spjd */ 2737185029Spjd if (error == EOVERFLOW || error == EBUSY) 2738168404Spjd expected_error = error; 2739168404Spjd 2740185029Spjd /* XXX workaround 6690467 */ 2741185029Spjd if (error != expected_error && expected_error != EBUSY) { 2742185029Spjd fatal(0, "attach (%s %llu, %s %llu, %d) " 2743185029Spjd "returned %d, expected %d", 2744185029Spjd oldpath, (longlong_t)oldsize, newpath, 2745185029Spjd (longlong_t)newsize, replacing, error, expected_error); 2746168404Spjd } 2747168404Spjd 2748236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 2749168404Spjd} 2750168404Spjd 2751168404Spjd/* 2752219089Spjd * Callback function which expands the physical size of the vdev. 2753168404Spjd */ 2754219089Spjdvdev_t * 2755219089Spjdgrow_vdev(vdev_t *vd, void *arg) 2756168404Spjd{ 2757219089Spjd spa_t *spa = vd->vdev_spa; 2758219089Spjd size_t *newsize = arg; 2759168404Spjd size_t fsize; 2760168404Spjd int fd; 2761168404Spjd 2762219089Spjd ASSERT(spa_config_held(spa, SCL_STATE, RW_READER) == SCL_STATE); 2763219089Spjd ASSERT(vd->vdev_ops->vdev_op_leaf); 2764168404Spjd 2765219089Spjd if ((fd = open(vd->vdev_path, O_RDWR)) == -1) 2766219089Spjd return (vd); 2767219089Spjd 2768219089Spjd fsize = lseek(fd, 0, SEEK_END); 2769219089Spjd (void) ftruncate(fd, *newsize); 2770219089Spjd 2771236143Smm if (ztest_opts.zo_verbose >= 6) { 2772219089Spjd (void) printf("%s grew from %lu to %lu bytes\n", 2773219089Spjd vd->vdev_path, (ulong_t)fsize, (ulong_t)*newsize); 2774219089Spjd } 2775219089Spjd (void) close(fd); 2776219089Spjd return (NULL); 2777219089Spjd} 2778219089Spjd 2779219089Spjd/* 2780219089Spjd * Callback function which expands a given vdev by calling vdev_online(). 2781219089Spjd */ 2782219089Spjd/* ARGSUSED */ 2783219089Spjdvdev_t * 2784219089Spjdonline_vdev(vdev_t *vd, void *arg) 2785219089Spjd{ 2786219089Spjd spa_t *spa = vd->vdev_spa; 2787219089Spjd vdev_t *tvd = vd->vdev_top; 2788219089Spjd uint64_t guid = vd->vdev_guid; 2789219089Spjd uint64_t generation = spa->spa_config_generation + 1; 2790219089Spjd vdev_state_t newstate = VDEV_STATE_UNKNOWN; 2791219089Spjd int error; 2792219089Spjd 2793219089Spjd ASSERT(spa_config_held(spa, SCL_STATE, RW_READER) == SCL_STATE); 2794219089Spjd ASSERT(vd->vdev_ops->vdev_op_leaf); 2795219089Spjd 2796219089Spjd /* Calling vdev_online will initialize the new metaslabs */ 2797219089Spjd spa_config_exit(spa, SCL_STATE, spa); 2798219089Spjd error = vdev_online(spa, guid, ZFS_ONLINE_EXPAND, &newstate); 2799219089Spjd spa_config_enter(spa, SCL_STATE, spa, RW_READER); 2800219089Spjd 2801168404Spjd /* 2802219089Spjd * If vdev_online returned an error or the underlying vdev_open 2803219089Spjd * failed then we abort the expand. The only way to know that 2804219089Spjd * vdev_open fails is by checking the returned newstate. 2805168404Spjd */ 2806219089Spjd if (error || newstate != VDEV_STATE_HEALTHY) { 2807236143Smm if (ztest_opts.zo_verbose >= 5) { 2808219089Spjd (void) printf("Unable to expand vdev, state %llu, " 2809219089Spjd "error %d\n", (u_longlong_t)newstate, error); 2810219089Spjd } 2811219089Spjd return (vd); 2812219089Spjd } 2813219089Spjd ASSERT3U(newstate, ==, VDEV_STATE_HEALTHY); 2814168404Spjd 2815219089Spjd /* 2816219089Spjd * Since we dropped the lock we need to ensure that we're 2817219089Spjd * still talking to the original vdev. It's possible this 2818219089Spjd * vdev may have been detached/replaced while we were 2819219089Spjd * trying to online it. 2820219089Spjd */ 2821219089Spjd if (generation != spa->spa_config_generation) { 2822236143Smm if (ztest_opts.zo_verbose >= 5) { 2823219089Spjd (void) printf("vdev configuration has changed, " 2824219089Spjd "guid %llu, state %llu, expected gen %llu, " 2825219089Spjd "got gen %llu\n", 2826219089Spjd (u_longlong_t)guid, 2827219089Spjd (u_longlong_t)tvd->vdev_state, 2828219089Spjd (u_longlong_t)generation, 2829219089Spjd (u_longlong_t)spa->spa_config_generation); 2830219089Spjd } 2831219089Spjd return (vd); 2832219089Spjd } 2833219089Spjd return (NULL); 2834219089Spjd} 2835168404Spjd 2836219089Spjd/* 2837219089Spjd * Traverse the vdev tree calling the supplied function. 2838219089Spjd * We continue to walk the tree until we either have walked all 2839219089Spjd * children or we receive a non-NULL return from the callback. 2840219089Spjd * If a NULL callback is passed, then we just return back the first 2841219089Spjd * leaf vdev we encounter. 2842219089Spjd */ 2843219089Spjdvdev_t * 2844219089Spjdvdev_walk_tree(vdev_t *vd, vdev_t *(*func)(vdev_t *, void *), void *arg) 2845219089Spjd{ 2846219089Spjd if (vd->vdev_ops->vdev_op_leaf) { 2847219089Spjd if (func == NULL) 2848219089Spjd return (vd); 2849219089Spjd else 2850219089Spjd return (func(vd, arg)); 2851219089Spjd } 2852168404Spjd 2853219089Spjd for (uint_t c = 0; c < vd->vdev_children; c++) { 2854219089Spjd vdev_t *cvd = vd->vdev_child[c]; 2855219089Spjd if ((cvd = vdev_walk_tree(cvd, func, arg)) != NULL) 2856219089Spjd return (cvd); 2857219089Spjd } 2858219089Spjd return (NULL); 2859219089Spjd} 2860219089Spjd 2861219089Spjd/* 2862219089Spjd * Verify that dynamic LUN growth works as expected. 2863219089Spjd */ 2864219089Spjd/* ARGSUSED */ 2865219089Spjdvoid 2866219089Spjdztest_vdev_LUN_growth(ztest_ds_t *zd, uint64_t id) 2867219089Spjd{ 2868236143Smm spa_t *spa = ztest_spa; 2869219089Spjd vdev_t *vd, *tvd; 2870219089Spjd metaslab_class_t *mc; 2871219089Spjd metaslab_group_t *mg; 2872219089Spjd size_t psize, newsize; 2873219089Spjd uint64_t top; 2874219089Spjd uint64_t old_class_space, new_class_space, old_ms_count, new_ms_count; 2875219089Spjd 2876236143Smm VERIFY(mutex_lock(&ztest_vdev_lock) == 0); 2877219089Spjd spa_config_enter(spa, SCL_STATE, spa, RW_READER); 2878219089Spjd 2879219089Spjd top = ztest_random_vdev_top(spa, B_TRUE); 2880219089Spjd 2881219089Spjd tvd = spa->spa_root_vdev->vdev_child[top]; 2882219089Spjd mg = tvd->vdev_mg; 2883219089Spjd mc = mg->mg_class; 2884219089Spjd old_ms_count = tvd->vdev_ms_count; 2885219089Spjd old_class_space = metaslab_class_get_space(mc); 2886219089Spjd 2887219089Spjd /* 2888219089Spjd * Determine the size of the first leaf vdev associated with 2889219089Spjd * our top-level device. 2890219089Spjd */ 2891219089Spjd vd = vdev_walk_tree(tvd, NULL, NULL); 2892219089Spjd ASSERT3P(vd, !=, NULL); 2893219089Spjd ASSERT(vd->vdev_ops->vdev_op_leaf); 2894219089Spjd 2895219089Spjd psize = vd->vdev_psize; 2896219089Spjd 2897219089Spjd /* 2898219089Spjd * We only try to expand the vdev if it's healthy, less than 4x its 2899219089Spjd * original size, and it has a valid psize. 2900219089Spjd */ 2901219089Spjd if (tvd->vdev_state != VDEV_STATE_HEALTHY || 2902236143Smm psize == 0 || psize >= 4 * ztest_opts.zo_vdev_size) { 2903219089Spjd spa_config_exit(spa, SCL_STATE, spa); 2904236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 2905219089Spjd return; 2906219089Spjd } 2907219089Spjd ASSERT(psize > 0); 2908219089Spjd newsize = psize + psize / 8; 2909219089Spjd ASSERT3U(newsize, >, psize); 2910219089Spjd 2911236143Smm if (ztest_opts.zo_verbose >= 6) { 2912219089Spjd (void) printf("Expanding LUN %s from %lu to %lu\n", 2913219089Spjd vd->vdev_path, (ulong_t)psize, (ulong_t)newsize); 2914219089Spjd } 2915219089Spjd 2916219089Spjd /* 2917219089Spjd * Growing the vdev is a two step process: 2918219089Spjd * 1). expand the physical size (i.e. relabel) 2919219089Spjd * 2). online the vdev to create the new metaslabs 2920219089Spjd */ 2921219089Spjd if (vdev_walk_tree(tvd, grow_vdev, &newsize) != NULL || 2922219089Spjd vdev_walk_tree(tvd, online_vdev, NULL) != NULL || 2923219089Spjd tvd->vdev_state != VDEV_STATE_HEALTHY) { 2924236143Smm if (ztest_opts.zo_verbose >= 5) { 2925219089Spjd (void) printf("Could not expand LUN because " 2926219089Spjd "the vdev configuration changed.\n"); 2927168404Spjd } 2928219089Spjd spa_config_exit(spa, SCL_STATE, spa); 2929236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 2930219089Spjd return; 2931168404Spjd } 2932168404Spjd 2933219089Spjd spa_config_exit(spa, SCL_STATE, spa); 2934219089Spjd 2935219089Spjd /* 2936219089Spjd * Expanding the LUN will update the config asynchronously, 2937219089Spjd * thus we must wait for the async thread to complete any 2938219089Spjd * pending tasks before proceeding. 2939219089Spjd */ 2940219089Spjd for (;;) { 2941219089Spjd boolean_t done; 2942219089Spjd mutex_enter(&spa->spa_async_lock); 2943219089Spjd done = (spa->spa_async_thread == NULL && !spa->spa_async_tasks); 2944219089Spjd mutex_exit(&spa->spa_async_lock); 2945219089Spjd if (done) 2946219089Spjd break; 2947219089Spjd txg_wait_synced(spa_get_dsl(spa), 0); 2948219089Spjd (void) poll(NULL, 0, 100); 2949219089Spjd } 2950219089Spjd 2951219089Spjd spa_config_enter(spa, SCL_STATE, spa, RW_READER); 2952219089Spjd 2953219089Spjd tvd = spa->spa_root_vdev->vdev_child[top]; 2954219089Spjd new_ms_count = tvd->vdev_ms_count; 2955219089Spjd new_class_space = metaslab_class_get_space(mc); 2956219089Spjd 2957219089Spjd if (tvd->vdev_mg != mg || mg->mg_class != mc) { 2958236143Smm if (ztest_opts.zo_verbose >= 5) { 2959219089Spjd (void) printf("Could not verify LUN expansion due to " 2960219089Spjd "intervening vdev offline or remove.\n"); 2961219089Spjd } 2962219089Spjd spa_config_exit(spa, SCL_STATE, spa); 2963236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 2964219089Spjd return; 2965219089Spjd } 2966219089Spjd 2967219089Spjd /* 2968219089Spjd * Make sure we were able to grow the vdev. 2969219089Spjd */ 2970219089Spjd if (new_ms_count <= old_ms_count) 2971219089Spjd fatal(0, "LUN expansion failed: ms_count %llu <= %llu\n", 2972219089Spjd old_ms_count, new_ms_count); 2973219089Spjd 2974219089Spjd /* 2975219089Spjd * Make sure we were able to grow the pool. 2976219089Spjd */ 2977219089Spjd if (new_class_space <= old_class_space) 2978219089Spjd fatal(0, "LUN expansion failed: class_space %llu <= %llu\n", 2979219089Spjd old_class_space, new_class_space); 2980219089Spjd 2981236143Smm if (ztest_opts.zo_verbose >= 5) { 2982219089Spjd char oldnumbuf[6], newnumbuf[6]; 2983219089Spjd 2984219089Spjd nicenum(old_class_space, oldnumbuf); 2985219089Spjd nicenum(new_class_space, newnumbuf); 2986219089Spjd (void) printf("%s grew from %s to %s\n", 2987219089Spjd spa->spa_name, oldnumbuf, newnumbuf); 2988219089Spjd } 2989219089Spjd 2990219089Spjd spa_config_exit(spa, SCL_STATE, spa); 2991236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 2992168404Spjd} 2993168404Spjd 2994219089Spjd/* 2995219089Spjd * Verify that dmu_objset_{create,destroy,open,close} work as expected. 2996219089Spjd */ 2997168404Spjd/* ARGSUSED */ 2998168404Spjdstatic void 2999219089Spjdztest_objset_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx) 3000168404Spjd{ 3001168404Spjd /* 3002219089Spjd * Create the objects common to all ztest datasets. 3003168404Spjd */ 3004219089Spjd VERIFY(zap_create_claim(os, ZTEST_DIROBJ, 3005168404Spjd DMU_OT_ZAP_OTHER, DMU_OT_NONE, 0, tx) == 0); 3006219089Spjd} 3007168404Spjd 3008219089Spjdstatic int 3009219089Spjdztest_dataset_create(char *dsname) 3010219089Spjd{ 3011219089Spjd uint64_t zilset = ztest_random(100); 3012219089Spjd int err = dmu_objset_create(dsname, DMU_OST_OTHER, 0, 3013219089Spjd ztest_objset_create_cb, NULL); 3014219089Spjd 3015219089Spjd if (err || zilset < 80) 3016219089Spjd return (err); 3017219089Spjd 3018236143Smm if (ztest_opts.zo_verbose >= 6) 3019236143Smm (void) printf("Setting dataset %s to sync always\n", dsname); 3020219089Spjd return (ztest_dsl_prop_set_uint64(dsname, ZFS_PROP_SYNC, 3021219089Spjd ZFS_SYNC_ALWAYS, B_FALSE)); 3022168404Spjd} 3023168404Spjd 3024219089Spjd/* ARGSUSED */ 3025168404Spjdstatic int 3026219089Spjdztest_objset_destroy_cb(const char *name, void *arg) 3027168404Spjd{ 3028168404Spjd objset_t *os; 3029219089Spjd dmu_object_info_t doi; 3030168404Spjd int error; 3031168404Spjd 3032168404Spjd /* 3033168404Spjd * Verify that the dataset contains a directory object. 3034168404Spjd */ 3035219089Spjd VERIFY3U(0, ==, dmu_objset_hold(name, FTAG, &os)); 3036219089Spjd error = dmu_object_info(os, ZTEST_DIROBJ, &doi); 3037168404Spjd if (error != ENOENT) { 3038168404Spjd /* We could have crashed in the middle of destroying it */ 3039168404Spjd ASSERT3U(error, ==, 0); 3040219089Spjd ASSERT3U(doi.doi_type, ==, DMU_OT_ZAP_OTHER); 3041219089Spjd ASSERT3S(doi.doi_physical_blocks_512, >=, 0); 3042168404Spjd } 3043219089Spjd dmu_objset_rele(os, FTAG); 3044168404Spjd 3045168404Spjd /* 3046168404Spjd * Destroy the dataset. 3047168404Spjd */ 3048219089Spjd VERIFY3U(0, ==, dmu_objset_destroy(name, B_FALSE)); 3049168404Spjd return (0); 3050168404Spjd} 3051168404Spjd 3052219089Spjdstatic boolean_t 3053219089Spjdztest_snapshot_create(char *osname, uint64_t id) 3054168404Spjd{ 3055219089Spjd char snapname[MAXNAMELEN]; 3056219089Spjd int error; 3057168404Spjd 3058219089Spjd (void) snprintf(snapname, MAXNAMELEN, "%s@%llu", osname, 3059219089Spjd (u_longlong_t)id); 3060168404Spjd 3061219089Spjd error = dmu_objset_snapshot(osname, strchr(snapname, '@') + 1, 3062219089Spjd NULL, NULL, B_FALSE, B_FALSE, -1); 3063219089Spjd if (error == ENOSPC) { 3064219089Spjd ztest_record_enospc(FTAG); 3065219089Spjd return (B_FALSE); 3066219089Spjd } 3067219089Spjd if (error != 0 && error != EEXIST) 3068219089Spjd fatal(0, "ztest_snapshot_create(%s) = %d", snapname, error); 3069219089Spjd return (B_TRUE); 3070219089Spjd} 3071168404Spjd 3072219089Spjdstatic boolean_t 3073219089Spjdztest_snapshot_destroy(char *osname, uint64_t id) 3074219089Spjd{ 3075219089Spjd char snapname[MAXNAMELEN]; 3076219089Spjd int error; 3077219089Spjd 3078219089Spjd (void) snprintf(snapname, MAXNAMELEN, "%s@%llu", osname, 3079219089Spjd (u_longlong_t)id); 3080219089Spjd 3081219089Spjd error = dmu_objset_destroy(snapname, B_FALSE); 3082219089Spjd if (error != 0 && error != ENOENT) 3083219089Spjd fatal(0, "ztest_snapshot_destroy(%s) = %d", snapname, error); 3084219089Spjd return (B_TRUE); 3085168404Spjd} 3086168404Spjd 3087219089Spjd/* ARGSUSED */ 3088168404Spjdvoid 3089219089Spjdztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id) 3090168404Spjd{ 3091219089Spjd ztest_ds_t zdtmp; 3092219089Spjd int iters; 3093168404Spjd int error; 3094185029Spjd objset_t *os, *os2; 3095219089Spjd char name[MAXNAMELEN]; 3096168404Spjd zilog_t *zilog; 3097168404Spjd 3098236143Smm (void) rw_rdlock(&ztest_name_lock); 3099168404Spjd 3100219089Spjd (void) snprintf(name, MAXNAMELEN, "%s/temp_%llu", 3101236143Smm ztest_opts.zo_pool, (u_longlong_t)id); 3102168404Spjd 3103168404Spjd /* 3104168404Spjd * If this dataset exists from a previous run, process its replay log 3105168404Spjd * half of the time. If we don't replay it, then dmu_objset_destroy() 3106219089Spjd * (invoked from ztest_objset_destroy_cb()) should just throw it away. 3107168404Spjd */ 3108168404Spjd if (ztest_random(2) == 0 && 3109219089Spjd dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os) == 0) { 3110236143Smm ztest_zd_init(&zdtmp, NULL, os); 3111219089Spjd zil_replay(os, &zdtmp, ztest_replay_vector); 3112219089Spjd ztest_zd_fini(&zdtmp); 3113219089Spjd dmu_objset_disown(os, FTAG); 3114168404Spjd } 3115168404Spjd 3116168404Spjd /* 3117168404Spjd * There may be an old instance of the dataset we're about to 3118168404Spjd * create lying around from a previous run. If so, destroy it 3119168404Spjd * and all of its snapshots. 3120168404Spjd */ 3121219089Spjd (void) dmu_objset_find(name, ztest_objset_destroy_cb, NULL, 3122168404Spjd DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS); 3123168404Spjd 3124168404Spjd /* 3125168404Spjd * Verify that the destroyed dataset is no longer in the namespace. 3126168404Spjd */ 3127219089Spjd VERIFY3U(ENOENT, ==, dmu_objset_hold(name, FTAG, &os)); 3128168404Spjd 3129168404Spjd /* 3130168404Spjd * Verify that we can create a new dataset. 3131168404Spjd */ 3132219089Spjd error = ztest_dataset_create(name); 3133168404Spjd if (error) { 3134168404Spjd if (error == ENOSPC) { 3135219089Spjd ztest_record_enospc(FTAG); 3136236143Smm (void) rw_unlock(&ztest_name_lock); 3137168404Spjd return; 3138168404Spjd } 3139168404Spjd fatal(0, "dmu_objset_create(%s) = %d", name, error); 3140168404Spjd } 3141168404Spjd 3142219089Spjd VERIFY3U(0, ==, 3143219089Spjd dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os)); 3144168404Spjd 3145236143Smm ztest_zd_init(&zdtmp, NULL, os); 3146219089Spjd 3147168404Spjd /* 3148168404Spjd * Open the intent log for it. 3149168404Spjd */ 3150219089Spjd zilog = zil_open(os, ztest_get_data); 3151168404Spjd 3152168404Spjd /* 3153219089Spjd * Put some objects in there, do a little I/O to them, 3154219089Spjd * and randomly take a couple of snapshots along the way. 3155168404Spjd */ 3156219089Spjd iters = ztest_random(5); 3157219089Spjd for (int i = 0; i < iters; i++) { 3158219089Spjd ztest_dmu_object_alloc_free(&zdtmp, id); 3159219089Spjd if (ztest_random(iters) == 0) 3160219089Spjd (void) ztest_snapshot_create(name, i); 3161168404Spjd } 3162168404Spjd 3163168404Spjd /* 3164168404Spjd * Verify that we cannot create an existing dataset. 3165168404Spjd */ 3166219089Spjd VERIFY3U(EEXIST, ==, 3167219089Spjd dmu_objset_create(name, DMU_OST_OTHER, 0, NULL, NULL)); 3168168404Spjd 3169168404Spjd /* 3170219089Spjd * Verify that we can hold an objset that is also owned. 3171168404Spjd */ 3172219089Spjd VERIFY3U(0, ==, dmu_objset_hold(name, FTAG, &os2)); 3173219089Spjd dmu_objset_rele(os2, FTAG); 3174168404Spjd 3175219089Spjd /* 3176219089Spjd * Verify that we cannot own an objset that is already owned. 3177219089Spjd */ 3178219089Spjd VERIFY3U(EBUSY, ==, 3179219089Spjd dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os2)); 3180219089Spjd 3181168404Spjd zil_close(zilog); 3182219089Spjd dmu_objset_disown(os, FTAG); 3183219089Spjd ztest_zd_fini(&zdtmp); 3184168404Spjd 3185236143Smm (void) rw_unlock(&ztest_name_lock); 3186168404Spjd} 3187168404Spjd 3188168404Spjd/* 3189168404Spjd * Verify that dmu_snapshot_{create,destroy,open,close} work as expected. 3190168404Spjd */ 3191168404Spjdvoid 3192219089Spjdztest_dmu_snapshot_create_destroy(ztest_ds_t *zd, uint64_t id) 3193168404Spjd{ 3194236143Smm (void) rw_rdlock(&ztest_name_lock); 3195219089Spjd (void) ztest_snapshot_destroy(zd->zd_name, id); 3196219089Spjd (void) ztest_snapshot_create(zd->zd_name, id); 3197236143Smm (void) rw_unlock(&ztest_name_lock); 3198219089Spjd} 3199219089Spjd 3200219089Spjd/* 3201219089Spjd * Cleanup non-standard snapshots and clones. 3202219089Spjd */ 3203219089Spjdvoid 3204219089Spjdztest_dsl_dataset_cleanup(char *osname, uint64_t id) 3205219089Spjd{ 3206219089Spjd char snap1name[MAXNAMELEN]; 3207219089Spjd char clone1name[MAXNAMELEN]; 3208219089Spjd char snap2name[MAXNAMELEN]; 3209219089Spjd char clone2name[MAXNAMELEN]; 3210219089Spjd char snap3name[MAXNAMELEN]; 3211168404Spjd int error; 3212168404Spjd 3213219089Spjd (void) snprintf(snap1name, MAXNAMELEN, "%s@s1_%llu", osname, id); 3214219089Spjd (void) snprintf(clone1name, MAXNAMELEN, "%s/c1_%llu", osname, id); 3215219089Spjd (void) snprintf(snap2name, MAXNAMELEN, "%s@s2_%llu", clone1name, id); 3216219089Spjd (void) snprintf(clone2name, MAXNAMELEN, "%s/c2_%llu", osname, id); 3217219089Spjd (void) snprintf(snap3name, MAXNAMELEN, "%s@s3_%llu", clone1name, id); 3218168404Spjd 3219219089Spjd error = dmu_objset_destroy(clone2name, B_FALSE); 3220219089Spjd if (error && error != ENOENT) 3221219089Spjd fatal(0, "dmu_objset_destroy(%s) = %d", clone2name, error); 3222219089Spjd error = dmu_objset_destroy(snap3name, B_FALSE); 3223219089Spjd if (error && error != ENOENT) 3224219089Spjd fatal(0, "dmu_objset_destroy(%s) = %d", snap3name, error); 3225219089Spjd error = dmu_objset_destroy(snap2name, B_FALSE); 3226219089Spjd if (error && error != ENOENT) 3227219089Spjd fatal(0, "dmu_objset_destroy(%s) = %d", snap2name, error); 3228219089Spjd error = dmu_objset_destroy(clone1name, B_FALSE); 3229219089Spjd if (error && error != ENOENT) 3230219089Spjd fatal(0, "dmu_objset_destroy(%s) = %d", clone1name, error); 3231219089Spjd error = dmu_objset_destroy(snap1name, B_FALSE); 3232219089Spjd if (error && error != ENOENT) 3233219089Spjd fatal(0, "dmu_objset_destroy(%s) = %d", snap1name, error); 3234168404Spjd} 3235168404Spjd 3236168404Spjd/* 3237207910Smm * Verify dsl_dataset_promote handles EBUSY 3238207910Smm */ 3239207910Smmvoid 3240219089Spjdztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id) 3241207910Smm{ 3242207910Smm objset_t *clone; 3243207910Smm dsl_dataset_t *ds; 3244219089Spjd char snap1name[MAXNAMELEN]; 3245219089Spjd char clone1name[MAXNAMELEN]; 3246219089Spjd char snap2name[MAXNAMELEN]; 3247219089Spjd char clone2name[MAXNAMELEN]; 3248219089Spjd char snap3name[MAXNAMELEN]; 3249219089Spjd char *osname = zd->zd_name; 3250219089Spjd int error; 3251207910Smm 3252236143Smm (void) rw_rdlock(&ztest_name_lock); 3253207910Smm 3254219089Spjd ztest_dsl_dataset_cleanup(osname, id); 3255207910Smm 3256219089Spjd (void) snprintf(snap1name, MAXNAMELEN, "%s@s1_%llu", osname, id); 3257219089Spjd (void) snprintf(clone1name, MAXNAMELEN, "%s/c1_%llu", osname, id); 3258219089Spjd (void) snprintf(snap2name, MAXNAMELEN, "%s@s2_%llu", clone1name, id); 3259219089Spjd (void) snprintf(clone2name, MAXNAMELEN, "%s/c2_%llu", osname, id); 3260219089Spjd (void) snprintf(snap3name, MAXNAMELEN, "%s@s3_%llu", clone1name, id); 3261207910Smm 3262209962Smm error = dmu_objset_snapshot(osname, strchr(snap1name, '@')+1, 3263219089Spjd NULL, NULL, B_FALSE, B_FALSE, -1); 3264209962Smm if (error && error != EEXIST) { 3265209962Smm if (error == ENOSPC) { 3266209962Smm ztest_record_enospc(FTAG); 3267209962Smm goto out; 3268209962Smm } 3269209962Smm fatal(0, "dmu_take_snapshot(%s) = %d", snap1name, error); 3270209962Smm } 3271207910Smm 3272219089Spjd error = dmu_objset_hold(snap1name, FTAG, &clone); 3273207910Smm if (error) 3274207910Smm fatal(0, "dmu_open_snapshot(%s) = %d", snap1name, error); 3275207910Smm 3276219089Spjd error = dmu_objset_clone(clone1name, dmu_objset_ds(clone), 0); 3277219089Spjd dmu_objset_rele(clone, FTAG); 3278209962Smm if (error) { 3279209962Smm if (error == ENOSPC) { 3280209962Smm ztest_record_enospc(FTAG); 3281209962Smm goto out; 3282209962Smm } 3283207910Smm fatal(0, "dmu_objset_create(%s) = %d", clone1name, error); 3284209962Smm } 3285207910Smm 3286207910Smm error = dmu_objset_snapshot(clone1name, strchr(snap2name, '@')+1, 3287219089Spjd NULL, NULL, B_FALSE, B_FALSE, -1); 3288209962Smm if (error && error != EEXIST) { 3289209962Smm if (error == ENOSPC) { 3290209962Smm ztest_record_enospc(FTAG); 3291209962Smm goto out; 3292209962Smm } 3293209962Smm fatal(0, "dmu_open_snapshot(%s) = %d", snap2name, error); 3294209962Smm } 3295207910Smm 3296207910Smm error = dmu_objset_snapshot(clone1name, strchr(snap3name, '@')+1, 3297219089Spjd NULL, NULL, B_FALSE, B_FALSE, -1); 3298209962Smm if (error && error != EEXIST) { 3299209962Smm if (error == ENOSPC) { 3300209962Smm ztest_record_enospc(FTAG); 3301209962Smm goto out; 3302209962Smm } 3303209962Smm fatal(0, "dmu_open_snapshot(%s) = %d", snap3name, error); 3304209962Smm } 3305207910Smm 3306219089Spjd error = dmu_objset_hold(snap3name, FTAG, &clone); 3307207910Smm if (error) 3308207910Smm fatal(0, "dmu_open_snapshot(%s) = %d", snap3name, error); 3309207910Smm 3310219089Spjd error = dmu_objset_clone(clone2name, dmu_objset_ds(clone), 0); 3311219089Spjd dmu_objset_rele(clone, FTAG); 3312209962Smm if (error) { 3313209962Smm if (error == ENOSPC) { 3314219089Spjd ztest_record_enospc(FTAG); 3315209962Smm goto out; 3316209962Smm } 3317207910Smm fatal(0, "dmu_objset_create(%s) = %d", clone2name, error); 3318209962Smm } 3319207910Smm 3320219089Spjd error = dsl_dataset_own(snap2name, B_FALSE, FTAG, &ds); 3321207910Smm if (error) 3322219089Spjd fatal(0, "dsl_dataset_own(%s) = %d", snap2name, error); 3323219089Spjd error = dsl_dataset_promote(clone2name, NULL); 3324207910Smm if (error != EBUSY) 3325207910Smm fatal(0, "dsl_dataset_promote(%s), %d, not EBUSY", clone2name, 3326207910Smm error); 3327207910Smm dsl_dataset_disown(ds, FTAG); 3328207910Smm 3329209962Smmout: 3330219089Spjd ztest_dsl_dataset_cleanup(osname, id); 3331207910Smm 3332236143Smm (void) rw_unlock(&ztest_name_lock); 3333207910Smm} 3334207910Smm 3335207910Smm/* 3336168404Spjd * Verify that dmu_object_{alloc,free} work as expected. 3337168404Spjd */ 3338168404Spjdvoid 3339219089Spjdztest_dmu_object_alloc_free(ztest_ds_t *zd, uint64_t id) 3340168404Spjd{ 3341219089Spjd ztest_od_t od[4]; 3342219089Spjd int batchsize = sizeof (od) / sizeof (od[0]); 3343168404Spjd 3344219089Spjd for (int b = 0; b < batchsize; b++) 3345219089Spjd ztest_od_init(&od[b], id, FTAG, b, DMU_OT_UINT64_OTHER, 0, 0); 3346168404Spjd 3347168404Spjd /* 3348219089Spjd * Destroy the previous batch of objects, create a new batch, 3349219089Spjd * and do some I/O on the new objects. 3350168404Spjd */ 3351219089Spjd if (ztest_object_init(zd, od, sizeof (od), B_TRUE) != 0) 3352219089Spjd return; 3353168404Spjd 3354219089Spjd while (ztest_random(4 * batchsize) != 0) 3355219089Spjd ztest_io(zd, od[ztest_random(batchsize)].od_object, 3356219089Spjd ztest_random(ZTEST_RANGE_LOCKS) << SPA_MAXBLOCKSHIFT); 3357168404Spjd} 3358168404Spjd 3359168404Spjd/* 3360168404Spjd * Verify that dmu_{read,write} work as expected. 3361168404Spjd */ 3362168404Spjdvoid 3363219089Spjdztest_dmu_read_write(ztest_ds_t *zd, uint64_t id) 3364168404Spjd{ 3365219089Spjd objset_t *os = zd->zd_os; 3366219089Spjd ztest_od_t od[2]; 3367168404Spjd dmu_tx_t *tx; 3368168404Spjd int i, freeit, error; 3369168404Spjd uint64_t n, s, txg; 3370168404Spjd bufwad_t *packbuf, *bigbuf, *pack, *bigH, *bigT; 3371219089Spjd uint64_t packobj, packoff, packsize, bigobj, bigoff, bigsize; 3372219089Spjd uint64_t chunksize = (1000 + ztest_random(1000)) * sizeof (uint64_t); 3373168404Spjd uint64_t regions = 997; 3374168404Spjd uint64_t stride = 123456789ULL; 3375168404Spjd uint64_t width = 40; 3376168404Spjd int free_percent = 5; 3377168404Spjd 3378168404Spjd /* 3379168404Spjd * This test uses two objects, packobj and bigobj, that are always 3380168404Spjd * updated together (i.e. in the same tx) so that their contents are 3381168404Spjd * in sync and can be compared. Their contents relate to each other 3382168404Spjd * in a simple way: packobj is a dense array of 'bufwad' structures, 3383168404Spjd * while bigobj is a sparse array of the same bufwads. Specifically, 3384168404Spjd * for any index n, there are three bufwads that should be identical: 3385168404Spjd * 3386168404Spjd * packobj, at offset n * sizeof (bufwad_t) 3387168404Spjd * bigobj, at the head of the nth chunk 3388168404Spjd * bigobj, at the tail of the nth chunk 3389168404Spjd * 3390168404Spjd * The chunk size is arbitrary. It doesn't have to be a power of two, 3391168404Spjd * and it doesn't have any relation to the object blocksize. 3392168404Spjd * The only requirement is that it can hold at least two bufwads. 3393168404Spjd * 3394168404Spjd * Normally, we write the bufwad to each of these locations. 3395168404Spjd * However, free_percent of the time we instead write zeroes to 3396168404Spjd * packobj and perform a dmu_free_range() on bigobj. By comparing 3397168404Spjd * bigobj to packobj, we can verify that the DMU is correctly 3398168404Spjd * tracking which parts of an object are allocated and free, 3399168404Spjd * and that the contents of the allocated blocks are correct. 3400168404Spjd */ 3401168404Spjd 3402168404Spjd /* 3403168404Spjd * Read the directory info. If it's the first time, set things up. 3404168404Spjd */ 3405219089Spjd ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, 0, chunksize); 3406219089Spjd ztest_od_init(&od[1], id, FTAG, 1, DMU_OT_UINT64_OTHER, 0, chunksize); 3407168404Spjd 3408219089Spjd if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0) 3409219089Spjd return; 3410168404Spjd 3411219089Spjd bigobj = od[0].od_object; 3412219089Spjd packobj = od[1].od_object; 3413219089Spjd chunksize = od[0].od_gen; 3414219089Spjd ASSERT(chunksize == od[1].od_gen); 3415168404Spjd 3416168404Spjd /* 3417168404Spjd * Prefetch a random chunk of the big object. 3418168404Spjd * Our aim here is to get some async reads in flight 3419168404Spjd * for blocks that we may free below; the DMU should 3420168404Spjd * handle this race correctly. 3421168404Spjd */ 3422168404Spjd n = ztest_random(regions) * stride + ztest_random(width); 3423168404Spjd s = 1 + ztest_random(2 * width - 1); 3424219089Spjd dmu_prefetch(os, bigobj, n * chunksize, s * chunksize); 3425168404Spjd 3426168404Spjd /* 3427168404Spjd * Pick a random index and compute the offsets into packobj and bigobj. 3428168404Spjd */ 3429168404Spjd n = ztest_random(regions) * stride + ztest_random(width); 3430168404Spjd s = 1 + ztest_random(width - 1); 3431168404Spjd 3432168404Spjd packoff = n * sizeof (bufwad_t); 3433168404Spjd packsize = s * sizeof (bufwad_t); 3434168404Spjd 3435219089Spjd bigoff = n * chunksize; 3436219089Spjd bigsize = s * chunksize; 3437168404Spjd 3438168404Spjd packbuf = umem_alloc(packsize, UMEM_NOFAIL); 3439168404Spjd bigbuf = umem_alloc(bigsize, UMEM_NOFAIL); 3440168404Spjd 3441168404Spjd /* 3442168404Spjd * free_percent of the time, free a range of bigobj rather than 3443168404Spjd * overwriting it. 3444168404Spjd */ 3445168404Spjd freeit = (ztest_random(100) < free_percent); 3446168404Spjd 3447168404Spjd /* 3448168404Spjd * Read the current contents of our objects. 3449168404Spjd */ 3450219089Spjd error = dmu_read(os, packobj, packoff, packsize, packbuf, 3451209962Smm DMU_READ_PREFETCH); 3452168404Spjd ASSERT3U(error, ==, 0); 3453219089Spjd error = dmu_read(os, bigobj, bigoff, bigsize, bigbuf, 3454209962Smm DMU_READ_PREFETCH); 3455168404Spjd ASSERT3U(error, ==, 0); 3456168404Spjd 3457168404Spjd /* 3458168404Spjd * Get a tx for the mods to both packobj and bigobj. 3459168404Spjd */ 3460168404Spjd tx = dmu_tx_create(os); 3461168404Spjd 3462219089Spjd dmu_tx_hold_write(tx, packobj, packoff, packsize); 3463168404Spjd 3464168404Spjd if (freeit) 3465219089Spjd dmu_tx_hold_free(tx, bigobj, bigoff, bigsize); 3466168404Spjd else 3467219089Spjd dmu_tx_hold_write(tx, bigobj, bigoff, bigsize); 3468168404Spjd 3469219089Spjd txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG); 3470219089Spjd if (txg == 0) { 3471168404Spjd umem_free(packbuf, packsize); 3472168404Spjd umem_free(bigbuf, bigsize); 3473168404Spjd return; 3474168404Spjd } 3475168404Spjd 3476219089Spjd dmu_object_set_checksum(os, bigobj, 3477219089Spjd (enum zio_checksum)ztest_random_dsl_prop(ZFS_PROP_CHECKSUM), tx); 3478168404Spjd 3479219089Spjd dmu_object_set_compress(os, bigobj, 3480219089Spjd (enum zio_compress)ztest_random_dsl_prop(ZFS_PROP_COMPRESSION), tx); 3481219089Spjd 3482168404Spjd /* 3483168404Spjd * For each index from n to n + s, verify that the existing bufwad 3484168404Spjd * in packobj matches the bufwads at the head and tail of the 3485168404Spjd * corresponding chunk in bigobj. Then update all three bufwads 3486168404Spjd * with the new values we want to write out. 3487168404Spjd */ 3488168404Spjd for (i = 0; i < s; i++) { 3489168404Spjd /* LINTED */ 3490168404Spjd pack = (bufwad_t *)((char *)packbuf + i * sizeof (bufwad_t)); 3491168404Spjd /* LINTED */ 3492219089Spjd bigH = (bufwad_t *)((char *)bigbuf + i * chunksize); 3493168404Spjd /* LINTED */ 3494219089Spjd bigT = (bufwad_t *)((char *)bigH + chunksize) - 1; 3495168404Spjd 3496168404Spjd ASSERT((uintptr_t)bigH - (uintptr_t)bigbuf < bigsize); 3497168404Spjd ASSERT((uintptr_t)bigT - (uintptr_t)bigbuf < bigsize); 3498168404Spjd 3499168404Spjd if (pack->bw_txg > txg) 3500168404Spjd fatal(0, "future leak: got %llx, open txg is %llx", 3501168404Spjd pack->bw_txg, txg); 3502168404Spjd 3503168404Spjd if (pack->bw_data != 0 && pack->bw_index != n + i) 3504168404Spjd fatal(0, "wrong index: got %llx, wanted %llx+%llx", 3505168404Spjd pack->bw_index, n, i); 3506168404Spjd 3507168404Spjd if (bcmp(pack, bigH, sizeof (bufwad_t)) != 0) 3508168404Spjd fatal(0, "pack/bigH mismatch in %p/%p", pack, bigH); 3509168404Spjd 3510168404Spjd if (bcmp(pack, bigT, sizeof (bufwad_t)) != 0) 3511168404Spjd fatal(0, "pack/bigT mismatch in %p/%p", pack, bigT); 3512168404Spjd 3513168404Spjd if (freeit) { 3514168404Spjd bzero(pack, sizeof (bufwad_t)); 3515168404Spjd } else { 3516168404Spjd pack->bw_index = n + i; 3517168404Spjd pack->bw_txg = txg; 3518168404Spjd pack->bw_data = 1 + ztest_random(-2ULL); 3519168404Spjd } 3520168404Spjd *bigH = *pack; 3521168404Spjd *bigT = *pack; 3522168404Spjd } 3523168404Spjd 3524168404Spjd /* 3525168404Spjd * We've verified all the old bufwads, and made new ones. 3526168404Spjd * Now write them out. 3527168404Spjd */ 3528219089Spjd dmu_write(os, packobj, packoff, packsize, packbuf, tx); 3529168404Spjd 3530168404Spjd if (freeit) { 3531236143Smm if (ztest_opts.zo_verbose >= 7) { 3532168404Spjd (void) printf("freeing offset %llx size %llx" 3533168404Spjd " txg %llx\n", 3534168404Spjd (u_longlong_t)bigoff, 3535168404Spjd (u_longlong_t)bigsize, 3536168404Spjd (u_longlong_t)txg); 3537168404Spjd } 3538219089Spjd VERIFY(0 == dmu_free_range(os, bigobj, bigoff, bigsize, tx)); 3539168404Spjd } else { 3540236143Smm if (ztest_opts.zo_verbose >= 7) { 3541168404Spjd (void) printf("writing offset %llx size %llx" 3542168404Spjd " txg %llx\n", 3543168404Spjd (u_longlong_t)bigoff, 3544168404Spjd (u_longlong_t)bigsize, 3545168404Spjd (u_longlong_t)txg); 3546168404Spjd } 3547219089Spjd dmu_write(os, bigobj, bigoff, bigsize, bigbuf, tx); 3548168404Spjd } 3549168404Spjd 3550168404Spjd dmu_tx_commit(tx); 3551168404Spjd 3552168404Spjd /* 3553168404Spjd * Sanity check the stuff we just wrote. 3554168404Spjd */ 3555168404Spjd { 3556168404Spjd void *packcheck = umem_alloc(packsize, UMEM_NOFAIL); 3557168404Spjd void *bigcheck = umem_alloc(bigsize, UMEM_NOFAIL); 3558168404Spjd 3559219089Spjd VERIFY(0 == dmu_read(os, packobj, packoff, 3560209962Smm packsize, packcheck, DMU_READ_PREFETCH)); 3561219089Spjd VERIFY(0 == dmu_read(os, bigobj, bigoff, 3562209962Smm bigsize, bigcheck, DMU_READ_PREFETCH)); 3563168404Spjd 3564168404Spjd ASSERT(bcmp(packbuf, packcheck, packsize) == 0); 3565168404Spjd ASSERT(bcmp(bigbuf, bigcheck, bigsize) == 0); 3566168404Spjd 3567168404Spjd umem_free(packcheck, packsize); 3568168404Spjd umem_free(bigcheck, bigsize); 3569168404Spjd } 3570168404Spjd 3571168404Spjd umem_free(packbuf, packsize); 3572168404Spjd umem_free(bigbuf, bigsize); 3573168404Spjd} 3574168404Spjd 3575168404Spjdvoid 3576209962Smmcompare_and_update_pbbufs(uint64_t s, bufwad_t *packbuf, bufwad_t *bigbuf, 3577219089Spjd uint64_t bigsize, uint64_t n, uint64_t chunksize, uint64_t txg) 3578209962Smm{ 3579209962Smm uint64_t i; 3580209962Smm bufwad_t *pack; 3581209962Smm bufwad_t *bigH; 3582209962Smm bufwad_t *bigT; 3583209962Smm 3584209962Smm /* 3585209962Smm * For each index from n to n + s, verify that the existing bufwad 3586209962Smm * in packobj matches the bufwads at the head and tail of the 3587209962Smm * corresponding chunk in bigobj. Then update all three bufwads 3588209962Smm * with the new values we want to write out. 3589209962Smm */ 3590209962Smm for (i = 0; i < s; i++) { 3591209962Smm /* LINTED */ 3592209962Smm pack = (bufwad_t *)((char *)packbuf + i * sizeof (bufwad_t)); 3593209962Smm /* LINTED */ 3594219089Spjd bigH = (bufwad_t *)((char *)bigbuf + i * chunksize); 3595209962Smm /* LINTED */ 3596219089Spjd bigT = (bufwad_t *)((char *)bigH + chunksize) - 1; 3597209962Smm 3598209962Smm ASSERT((uintptr_t)bigH - (uintptr_t)bigbuf < bigsize); 3599209962Smm ASSERT((uintptr_t)bigT - (uintptr_t)bigbuf < bigsize); 3600209962Smm 3601209962Smm if (pack->bw_txg > txg) 3602209962Smm fatal(0, "future leak: got %llx, open txg is %llx", 3603209962Smm pack->bw_txg, txg); 3604209962Smm 3605209962Smm if (pack->bw_data != 0 && pack->bw_index != n + i) 3606209962Smm fatal(0, "wrong index: got %llx, wanted %llx+%llx", 3607209962Smm pack->bw_index, n, i); 3608209962Smm 3609209962Smm if (bcmp(pack, bigH, sizeof (bufwad_t)) != 0) 3610209962Smm fatal(0, "pack/bigH mismatch in %p/%p", pack, bigH); 3611209962Smm 3612209962Smm if (bcmp(pack, bigT, sizeof (bufwad_t)) != 0) 3613209962Smm fatal(0, "pack/bigT mismatch in %p/%p", pack, bigT); 3614209962Smm 3615209962Smm pack->bw_index = n + i; 3616209962Smm pack->bw_txg = txg; 3617209962Smm pack->bw_data = 1 + ztest_random(-2ULL); 3618209962Smm 3619209962Smm *bigH = *pack; 3620209962Smm *bigT = *pack; 3621209962Smm } 3622209962Smm} 3623209962Smm 3624209962Smmvoid 3625219089Spjdztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id) 3626209962Smm{ 3627219089Spjd objset_t *os = zd->zd_os; 3628219089Spjd ztest_od_t od[2]; 3629209962Smm dmu_tx_t *tx; 3630209962Smm uint64_t i; 3631209962Smm int error; 3632209962Smm uint64_t n, s, txg; 3633209962Smm bufwad_t *packbuf, *bigbuf; 3634219089Spjd uint64_t packobj, packoff, packsize, bigobj, bigoff, bigsize; 3635219089Spjd uint64_t blocksize = ztest_random_blocksize(); 3636219089Spjd uint64_t chunksize = blocksize; 3637209962Smm uint64_t regions = 997; 3638209962Smm uint64_t stride = 123456789ULL; 3639209962Smm uint64_t width = 9; 3640209962Smm dmu_buf_t *bonus_db; 3641209962Smm arc_buf_t **bigbuf_arcbufs; 3642219089Spjd dmu_object_info_t doi; 3643209962Smm 3644209962Smm /* 3645209962Smm * This test uses two objects, packobj and bigobj, that are always 3646209962Smm * updated together (i.e. in the same tx) so that their contents are 3647209962Smm * in sync and can be compared. Their contents relate to each other 3648209962Smm * in a simple way: packobj is a dense array of 'bufwad' structures, 3649209962Smm * while bigobj is a sparse array of the same bufwads. Specifically, 3650209962Smm * for any index n, there are three bufwads that should be identical: 3651209962Smm * 3652209962Smm * packobj, at offset n * sizeof (bufwad_t) 3653209962Smm * bigobj, at the head of the nth chunk 3654209962Smm * bigobj, at the tail of the nth chunk 3655209962Smm * 3656209962Smm * The chunk size is set equal to bigobj block size so that 3657209962Smm * dmu_assign_arcbuf() can be tested for object updates. 3658209962Smm */ 3659209962Smm 3660209962Smm /* 3661209962Smm * Read the directory info. If it's the first time, set things up. 3662209962Smm */ 3663219089Spjd ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize, 0); 3664219089Spjd ztest_od_init(&od[1], id, FTAG, 1, DMU_OT_UINT64_OTHER, 0, chunksize); 3665209962Smm 3666219089Spjd if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0) 3667219089Spjd return; 3668209962Smm 3669219089Spjd bigobj = od[0].od_object; 3670219089Spjd packobj = od[1].od_object; 3671219089Spjd blocksize = od[0].od_blocksize; 3672219089Spjd chunksize = blocksize; 3673219089Spjd ASSERT(chunksize == od[1].od_gen); 3674209962Smm 3675219089Spjd VERIFY(dmu_object_info(os, bigobj, &doi) == 0); 3676219089Spjd VERIFY(ISP2(doi.doi_data_block_size)); 3677219089Spjd VERIFY(chunksize == doi.doi_data_block_size); 3678219089Spjd VERIFY(chunksize >= 2 * sizeof (bufwad_t)); 3679209962Smm 3680209962Smm /* 3681209962Smm * Pick a random index and compute the offsets into packobj and bigobj. 3682209962Smm */ 3683209962Smm n = ztest_random(regions) * stride + ztest_random(width); 3684209962Smm s = 1 + ztest_random(width - 1); 3685209962Smm 3686209962Smm packoff = n * sizeof (bufwad_t); 3687209962Smm packsize = s * sizeof (bufwad_t); 3688209962Smm 3689219089Spjd bigoff = n * chunksize; 3690219089Spjd bigsize = s * chunksize; 3691209962Smm 3692209962Smm packbuf = umem_zalloc(packsize, UMEM_NOFAIL); 3693209962Smm bigbuf = umem_zalloc(bigsize, UMEM_NOFAIL); 3694209962Smm 3695219089Spjd VERIFY3U(0, ==, dmu_bonus_hold(os, bigobj, FTAG, &bonus_db)); 3696209962Smm 3697209962Smm bigbuf_arcbufs = umem_zalloc(2 * s * sizeof (arc_buf_t *), UMEM_NOFAIL); 3698209962Smm 3699209962Smm /* 3700209962Smm * Iteration 0 test zcopy for DB_UNCACHED dbufs. 3701209962Smm * Iteration 1 test zcopy to already referenced dbufs. 3702209962Smm * Iteration 2 test zcopy to dirty dbuf in the same txg. 3703209962Smm * Iteration 3 test zcopy to dbuf dirty in previous txg. 3704209962Smm * Iteration 4 test zcopy when dbuf is no longer dirty. 3705209962Smm * Iteration 5 test zcopy when it can't be done. 3706209962Smm * Iteration 6 one more zcopy write. 3707209962Smm */ 3708209962Smm for (i = 0; i < 7; i++) { 3709209962Smm uint64_t j; 3710209962Smm uint64_t off; 3711209962Smm 3712209962Smm /* 3713209962Smm * In iteration 5 (i == 5) use arcbufs 3714209962Smm * that don't match bigobj blksz to test 3715209962Smm * dmu_assign_arcbuf() when it can't directly 3716209962Smm * assign an arcbuf to a dbuf. 3717209962Smm */ 3718209962Smm for (j = 0; j < s; j++) { 3719209962Smm if (i != 5) { 3720209962Smm bigbuf_arcbufs[j] = 3721219089Spjd dmu_request_arcbuf(bonus_db, chunksize); 3722209962Smm } else { 3723209962Smm bigbuf_arcbufs[2 * j] = 3724219089Spjd dmu_request_arcbuf(bonus_db, chunksize / 2); 3725209962Smm bigbuf_arcbufs[2 * j + 1] = 3726219089Spjd dmu_request_arcbuf(bonus_db, chunksize / 2); 3727209962Smm } 3728209962Smm } 3729209962Smm 3730209962Smm /* 3731209962Smm * Get a tx for the mods to both packobj and bigobj. 3732209962Smm */ 3733209962Smm tx = dmu_tx_create(os); 3734209962Smm 3735219089Spjd dmu_tx_hold_write(tx, packobj, packoff, packsize); 3736219089Spjd dmu_tx_hold_write(tx, bigobj, bigoff, bigsize); 3737209962Smm 3738219089Spjd txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG); 3739219089Spjd if (txg == 0) { 3740209962Smm umem_free(packbuf, packsize); 3741209962Smm umem_free(bigbuf, bigsize); 3742209962Smm for (j = 0; j < s; j++) { 3743209962Smm if (i != 5) { 3744209962Smm dmu_return_arcbuf(bigbuf_arcbufs[j]); 3745209962Smm } else { 3746209962Smm dmu_return_arcbuf( 3747209962Smm bigbuf_arcbufs[2 * j]); 3748209962Smm dmu_return_arcbuf( 3749209962Smm bigbuf_arcbufs[2 * j + 1]); 3750209962Smm } 3751209962Smm } 3752209962Smm umem_free(bigbuf_arcbufs, 2 * s * sizeof (arc_buf_t *)); 3753209962Smm dmu_buf_rele(bonus_db, FTAG); 3754209962Smm return; 3755209962Smm } 3756209962Smm 3757209962Smm /* 3758209962Smm * 50% of the time don't read objects in the 1st iteration to 3759209962Smm * test dmu_assign_arcbuf() for the case when there're no 3760209962Smm * existing dbufs for the specified offsets. 3761209962Smm */ 3762209962Smm if (i != 0 || ztest_random(2) != 0) { 3763219089Spjd error = dmu_read(os, packobj, packoff, 3764209962Smm packsize, packbuf, DMU_READ_PREFETCH); 3765209962Smm ASSERT3U(error, ==, 0); 3766219089Spjd error = dmu_read(os, bigobj, bigoff, bigsize, 3767209962Smm bigbuf, DMU_READ_PREFETCH); 3768209962Smm ASSERT3U(error, ==, 0); 3769209962Smm } 3770209962Smm compare_and_update_pbbufs(s, packbuf, bigbuf, bigsize, 3771219089Spjd n, chunksize, txg); 3772209962Smm 3773209962Smm /* 3774209962Smm * We've verified all the old bufwads, and made new ones. 3775209962Smm * Now write them out. 3776209962Smm */ 3777219089Spjd dmu_write(os, packobj, packoff, packsize, packbuf, tx); 3778236143Smm if (ztest_opts.zo_verbose >= 7) { 3779209962Smm (void) printf("writing offset %llx size %llx" 3780209962Smm " txg %llx\n", 3781209962Smm (u_longlong_t)bigoff, 3782209962Smm (u_longlong_t)bigsize, 3783209962Smm (u_longlong_t)txg); 3784209962Smm } 3785219089Spjd for (off = bigoff, j = 0; j < s; j++, off += chunksize) { 3786209962Smm dmu_buf_t *dbt; 3787209962Smm if (i != 5) { 3788209962Smm bcopy((caddr_t)bigbuf + (off - bigoff), 3789219089Spjd bigbuf_arcbufs[j]->b_data, chunksize); 3790209962Smm } else { 3791209962Smm bcopy((caddr_t)bigbuf + (off - bigoff), 3792209962Smm bigbuf_arcbufs[2 * j]->b_data, 3793219089Spjd chunksize / 2); 3794209962Smm bcopy((caddr_t)bigbuf + (off - bigoff) + 3795219089Spjd chunksize / 2, 3796209962Smm bigbuf_arcbufs[2 * j + 1]->b_data, 3797219089Spjd chunksize / 2); 3798209962Smm } 3799209962Smm 3800209962Smm if (i == 1) { 3801219089Spjd VERIFY(dmu_buf_hold(os, bigobj, off, 3802219089Spjd FTAG, &dbt, DMU_READ_NO_PREFETCH) == 0); 3803209962Smm } 3804209962Smm if (i != 5) { 3805209962Smm dmu_assign_arcbuf(bonus_db, off, 3806209962Smm bigbuf_arcbufs[j], tx); 3807209962Smm } else { 3808209962Smm dmu_assign_arcbuf(bonus_db, off, 3809209962Smm bigbuf_arcbufs[2 * j], tx); 3810209962Smm dmu_assign_arcbuf(bonus_db, 3811219089Spjd off + chunksize / 2, 3812209962Smm bigbuf_arcbufs[2 * j + 1], tx); 3813209962Smm } 3814209962Smm if (i == 1) { 3815209962Smm dmu_buf_rele(dbt, FTAG); 3816209962Smm } 3817209962Smm } 3818209962Smm dmu_tx_commit(tx); 3819209962Smm 3820209962Smm /* 3821209962Smm * Sanity check the stuff we just wrote. 3822209962Smm */ 3823209962Smm { 3824209962Smm void *packcheck = umem_alloc(packsize, UMEM_NOFAIL); 3825209962Smm void *bigcheck = umem_alloc(bigsize, UMEM_NOFAIL); 3826209962Smm 3827219089Spjd VERIFY(0 == dmu_read(os, packobj, packoff, 3828209962Smm packsize, packcheck, DMU_READ_PREFETCH)); 3829219089Spjd VERIFY(0 == dmu_read(os, bigobj, bigoff, 3830209962Smm bigsize, bigcheck, DMU_READ_PREFETCH)); 3831209962Smm 3832209962Smm ASSERT(bcmp(packbuf, packcheck, packsize) == 0); 3833209962Smm ASSERT(bcmp(bigbuf, bigcheck, bigsize) == 0); 3834209962Smm 3835209962Smm umem_free(packcheck, packsize); 3836209962Smm umem_free(bigcheck, bigsize); 3837209962Smm } 3838209962Smm if (i == 2) { 3839209962Smm txg_wait_open(dmu_objset_pool(os), 0); 3840209962Smm } else if (i == 3) { 3841209962Smm txg_wait_synced(dmu_objset_pool(os), 0); 3842209962Smm } 3843209962Smm } 3844209962Smm 3845209962Smm dmu_buf_rele(bonus_db, FTAG); 3846209962Smm umem_free(packbuf, packsize); 3847209962Smm umem_free(bigbuf, bigsize); 3848209962Smm umem_free(bigbuf_arcbufs, 2 * s * sizeof (arc_buf_t *)); 3849209962Smm} 3850209962Smm 3851219089Spjd/* ARGSUSED */ 3852209962Smmvoid 3853219089Spjdztest_dmu_write_parallel(ztest_ds_t *zd, uint64_t id) 3854168404Spjd{ 3855219089Spjd ztest_od_t od[1]; 3856219089Spjd uint64_t offset = (1ULL << (ztest_random(20) + 43)) + 3857219089Spjd (ztest_random(ZTEST_RANGE_LOCKS) << SPA_MAXBLOCKSHIFT); 3858168404Spjd 3859168404Spjd /* 3860219089Spjd * Have multiple threads write to large offsets in an object 3861219089Spjd * to verify that parallel writes to an object -- even to the 3862219089Spjd * same blocks within the object -- doesn't cause any trouble. 3863168404Spjd */ 3864219089Spjd ztest_od_init(&od[0], ID_PARALLEL, FTAG, 0, DMU_OT_UINT64_OTHER, 0, 0); 3865219089Spjd 3866219089Spjd if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0) 3867219089Spjd return; 3868219089Spjd 3869219089Spjd while (ztest_random(10) != 0) 3870219089Spjd ztest_io(zd, od[0].od_object, offset); 3871168404Spjd} 3872168404Spjd 3873168404Spjdvoid 3874219089Spjdztest_dmu_prealloc(ztest_ds_t *zd, uint64_t id) 3875168404Spjd{ 3876219089Spjd ztest_od_t od[1]; 3877219089Spjd uint64_t offset = (1ULL << (ztest_random(4) + SPA_MAXBLOCKSHIFT)) + 3878219089Spjd (ztest_random(ZTEST_RANGE_LOCKS) << SPA_MAXBLOCKSHIFT); 3879219089Spjd uint64_t count = ztest_random(20) + 1; 3880219089Spjd uint64_t blocksize = ztest_random_blocksize(); 3881219089Spjd void *data; 3882168404Spjd 3883219089Spjd ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize, 0); 3884168404Spjd 3885219089Spjd if (ztest_object_init(zd, od, sizeof (od), !ztest_random(2)) != 0) 3886185029Spjd return; 3887168404Spjd 3888219089Spjd if (ztest_truncate(zd, od[0].od_object, offset, count * blocksize) != 0) 3889185029Spjd return; 3890168404Spjd 3891219089Spjd ztest_prealloc(zd, od[0].od_object, offset, count * blocksize); 3892185029Spjd 3893219089Spjd data = umem_zalloc(blocksize, UMEM_NOFAIL); 3894185029Spjd 3895219089Spjd while (ztest_random(count) != 0) { 3896219089Spjd uint64_t randoff = offset + (ztest_random(count) * blocksize); 3897219089Spjd if (ztest_write(zd, od[0].od_object, randoff, blocksize, 3898219089Spjd data) != 0) 3899219089Spjd break; 3900219089Spjd while (ztest_random(4) != 0) 3901219089Spjd ztest_io(zd, od[0].od_object, randoff); 3902185029Spjd } 3903168404Spjd 3904219089Spjd umem_free(data, blocksize); 3905168404Spjd} 3906168404Spjd 3907168404Spjd/* 3908168404Spjd * Verify that zap_{create,destroy,add,remove,update} work as expected. 3909168404Spjd */ 3910168404Spjd#define ZTEST_ZAP_MIN_INTS 1 3911168404Spjd#define ZTEST_ZAP_MAX_INTS 4 3912168404Spjd#define ZTEST_ZAP_MAX_PROPS 1000 3913168404Spjd 3914168404Spjdvoid 3915219089Spjdztest_zap(ztest_ds_t *zd, uint64_t id) 3916168404Spjd{ 3917219089Spjd objset_t *os = zd->zd_os; 3918219089Spjd ztest_od_t od[1]; 3919168404Spjd uint64_t object; 3920168404Spjd uint64_t txg, last_txg; 3921168404Spjd uint64_t value[ZTEST_ZAP_MAX_INTS]; 3922168404Spjd uint64_t zl_ints, zl_intsize, prop; 3923168404Spjd int i, ints; 3924168404Spjd dmu_tx_t *tx; 3925168404Spjd char propname[100], txgname[100]; 3926168404Spjd int error; 3927168404Spjd char *hc[2] = { "s.acl.h", ".s.open.h.hyLZlg" }; 3928168404Spjd 3929219089Spjd ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_ZAP_OTHER, 0, 0); 3930168404Spjd 3931219089Spjd if (ztest_object_init(zd, od, sizeof (od), !ztest_random(2)) != 0) 3932219089Spjd return; 3933219089Spjd 3934219089Spjd object = od[0].od_object; 3935219089Spjd 3936168404Spjd /* 3937219089Spjd * Generate a known hash collision, and verify that 3938219089Spjd * we can lookup and remove both entries. 3939168404Spjd */ 3940219089Spjd tx = dmu_tx_create(os); 3941219089Spjd dmu_tx_hold_zap(tx, object, B_TRUE, NULL); 3942219089Spjd txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG); 3943219089Spjd if (txg == 0) 3944219089Spjd return; 3945219089Spjd for (i = 0; i < 2; i++) { 3946219089Spjd value[i] = i; 3947219089Spjd VERIFY3U(0, ==, zap_add(os, object, hc[i], sizeof (uint64_t), 3948219089Spjd 1, &value[i], tx)); 3949168404Spjd } 3950219089Spjd for (i = 0; i < 2; i++) { 3951219089Spjd VERIFY3U(EEXIST, ==, zap_add(os, object, hc[i], 3952219089Spjd sizeof (uint64_t), 1, &value[i], tx)); 3953219089Spjd VERIFY3U(0, ==, 3954219089Spjd zap_length(os, object, hc[i], &zl_intsize, &zl_ints)); 3955219089Spjd ASSERT3U(zl_intsize, ==, sizeof (uint64_t)); 3956219089Spjd ASSERT3U(zl_ints, ==, 1); 3957219089Spjd } 3958219089Spjd for (i = 0; i < 2; i++) { 3959219089Spjd VERIFY3U(0, ==, zap_remove(os, object, hc[i], tx)); 3960219089Spjd } 3961219089Spjd dmu_tx_commit(tx); 3962168404Spjd 3963219089Spjd /* 3964219089Spjd * Generate a buch of random entries. 3965219089Spjd */ 3966168404Spjd ints = MAX(ZTEST_ZAP_MIN_INTS, object % ZTEST_ZAP_MAX_INTS); 3967168404Spjd 3968185029Spjd prop = ztest_random(ZTEST_ZAP_MAX_PROPS); 3969185029Spjd (void) sprintf(propname, "prop_%llu", (u_longlong_t)prop); 3970185029Spjd (void) sprintf(txgname, "txg_%llu", (u_longlong_t)prop); 3971185029Spjd bzero(value, sizeof (value)); 3972185029Spjd last_txg = 0; 3973168404Spjd 3974185029Spjd /* 3975185029Spjd * If these zap entries already exist, validate their contents. 3976185029Spjd */ 3977185029Spjd error = zap_length(os, object, txgname, &zl_intsize, &zl_ints); 3978185029Spjd if (error == 0) { 3979185029Spjd ASSERT3U(zl_intsize, ==, sizeof (uint64_t)); 3980185029Spjd ASSERT3U(zl_ints, ==, 1); 3981168404Spjd 3982185029Spjd VERIFY(zap_lookup(os, object, txgname, zl_intsize, 3983185029Spjd zl_ints, &last_txg) == 0); 3984168404Spjd 3985185029Spjd VERIFY(zap_length(os, object, propname, &zl_intsize, 3986185029Spjd &zl_ints) == 0); 3987168404Spjd 3988185029Spjd ASSERT3U(zl_intsize, ==, sizeof (uint64_t)); 3989185029Spjd ASSERT3U(zl_ints, ==, ints); 3990168404Spjd 3991185029Spjd VERIFY(zap_lookup(os, object, propname, zl_intsize, 3992185029Spjd zl_ints, value) == 0); 3993168404Spjd 3994185029Spjd for (i = 0; i < ints; i++) { 3995185029Spjd ASSERT3U(value[i], ==, last_txg + object + i); 3996168404Spjd } 3997185029Spjd } else { 3998185029Spjd ASSERT3U(error, ==, ENOENT); 3999185029Spjd } 4000168404Spjd 4001185029Spjd /* 4002185029Spjd * Atomically update two entries in our zap object. 4003185029Spjd * The first is named txg_%llu, and contains the txg 4004185029Spjd * in which the property was last updated. The second 4005185029Spjd * is named prop_%llu, and the nth element of its value 4006185029Spjd * should be txg + object + n. 4007185029Spjd */ 4008185029Spjd tx = dmu_tx_create(os); 4009219089Spjd dmu_tx_hold_zap(tx, object, B_TRUE, NULL); 4010219089Spjd txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG); 4011219089Spjd if (txg == 0) 4012185029Spjd return; 4013168404Spjd 4014185029Spjd if (last_txg > txg) 4015185029Spjd fatal(0, "zap future leak: old %llu new %llu", last_txg, txg); 4016168404Spjd 4017185029Spjd for (i = 0; i < ints; i++) 4018185029Spjd value[i] = txg + object + i; 4019168404Spjd 4020219089Spjd VERIFY3U(0, ==, zap_update(os, object, txgname, sizeof (uint64_t), 4021219089Spjd 1, &txg, tx)); 4022219089Spjd VERIFY3U(0, ==, zap_update(os, object, propname, sizeof (uint64_t), 4023219089Spjd ints, value, tx)); 4024168404Spjd 4025185029Spjd dmu_tx_commit(tx); 4026168404Spjd 4027185029Spjd /* 4028185029Spjd * Remove a random pair of entries. 4029185029Spjd */ 4030185029Spjd prop = ztest_random(ZTEST_ZAP_MAX_PROPS); 4031185029Spjd (void) sprintf(propname, "prop_%llu", (u_longlong_t)prop); 4032185029Spjd (void) sprintf(txgname, "txg_%llu", (u_longlong_t)prop); 4033168404Spjd 4034185029Spjd error = zap_length(os, object, txgname, &zl_intsize, &zl_ints); 4035168404Spjd 4036185029Spjd if (error == ENOENT) 4037185029Spjd return; 4038168404Spjd 4039185029Spjd ASSERT3U(error, ==, 0); 4040168404Spjd 4041185029Spjd tx = dmu_tx_create(os); 4042219089Spjd dmu_tx_hold_zap(tx, object, B_TRUE, NULL); 4043219089Spjd txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG); 4044219089Spjd if (txg == 0) 4045185029Spjd return; 4046219089Spjd VERIFY3U(0, ==, zap_remove(os, object, txgname, tx)); 4047219089Spjd VERIFY3U(0, ==, zap_remove(os, object, propname, tx)); 4048185029Spjd dmu_tx_commit(tx); 4049168404Spjd} 4050168404Spjd 4051209962Smm/* 4052209962Smm * Testcase to test the upgrading of a microzap to fatzap. 4053209962Smm */ 4054168404Spjdvoid 4055219089Spjdztest_fzap(ztest_ds_t *zd, uint64_t id) 4056209962Smm{ 4057219089Spjd objset_t *os = zd->zd_os; 4058219089Spjd ztest_od_t od[1]; 4059219089Spjd uint64_t object, txg; 4060209962Smm 4061219089Spjd ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_ZAP_OTHER, 0, 0); 4062209962Smm 4063219089Spjd if (ztest_object_init(zd, od, sizeof (od), !ztest_random(2)) != 0) 4064219089Spjd return; 4065209962Smm 4066219089Spjd object = od[0].od_object; 4067209962Smm 4068209962Smm /* 4069219089Spjd * Add entries to this ZAP and make sure it spills over 4070209962Smm * and gets upgraded to a fatzap. Also, since we are adding 4071219089Spjd * 2050 entries we should see ptrtbl growth and leaf-block split. 4072209962Smm */ 4073219089Spjd for (int i = 0; i < 2050; i++) { 4074219089Spjd char name[MAXNAMELEN]; 4075219089Spjd uint64_t value = i; 4076219089Spjd dmu_tx_t *tx; 4077219089Spjd int error; 4078209962Smm 4079219089Spjd (void) snprintf(name, sizeof (name), "fzap-%llu-%llu", 4080219089Spjd id, value); 4081219089Spjd 4082209962Smm tx = dmu_tx_create(os); 4083219089Spjd dmu_tx_hold_zap(tx, object, B_TRUE, name); 4084219089Spjd txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG); 4085219089Spjd if (txg == 0) 4086209962Smm return; 4087219089Spjd error = zap_add(os, object, name, sizeof (uint64_t), 1, 4088219089Spjd &value, tx); 4089209962Smm ASSERT(error == 0 || error == EEXIST); 4090209962Smm dmu_tx_commit(tx); 4091209962Smm } 4092209962Smm} 4093209962Smm 4094219089Spjd/* ARGSUSED */ 4095209962Smmvoid 4096219089Spjdztest_zap_parallel(ztest_ds_t *zd, uint64_t id) 4097168404Spjd{ 4098219089Spjd objset_t *os = zd->zd_os; 4099219089Spjd ztest_od_t od[1]; 4100168404Spjd uint64_t txg, object, count, wsize, wc, zl_wsize, zl_wc; 4101168404Spjd dmu_tx_t *tx; 4102168404Spjd int i, namelen, error; 4103219089Spjd int micro = ztest_random(2); 4104168404Spjd char name[20], string_value[20]; 4105168404Spjd void *data; 4106168404Spjd 4107219089Spjd ztest_od_init(&od[0], ID_PARALLEL, FTAG, micro, DMU_OT_ZAP_OTHER, 0, 0); 4108219089Spjd 4109219089Spjd if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0) 4110219089Spjd return; 4111219089Spjd 4112219089Spjd object = od[0].od_object; 4113219089Spjd 4114185029Spjd /* 4115185029Spjd * Generate a random name of the form 'xxx.....' where each 4116185029Spjd * x is a random printable character and the dots are dots. 4117185029Spjd * There are 94 such characters, and the name length goes from 4118185029Spjd * 6 to 20, so there are 94^3 * 15 = 12,458,760 possible names. 4119185029Spjd */ 4120185029Spjd namelen = ztest_random(sizeof (name) - 5) + 5 + 1; 4121168404Spjd 4122185029Spjd for (i = 0; i < 3; i++) 4123185029Spjd name[i] = '!' + ztest_random('~' - '!' + 1); 4124185029Spjd for (; i < namelen - 1; i++) 4125185029Spjd name[i] = '.'; 4126185029Spjd name[i] = '\0'; 4127168404Spjd 4128219089Spjd if ((namelen & 1) || micro) { 4129185029Spjd wsize = sizeof (txg); 4130185029Spjd wc = 1; 4131185029Spjd data = &txg; 4132185029Spjd } else { 4133185029Spjd wsize = 1; 4134185029Spjd wc = namelen; 4135185029Spjd data = string_value; 4136185029Spjd } 4137168404Spjd 4138185029Spjd count = -1ULL; 4139185029Spjd VERIFY(zap_count(os, object, &count) == 0); 4140185029Spjd ASSERT(count != -1ULL); 4141168404Spjd 4142185029Spjd /* 4143185029Spjd * Select an operation: length, lookup, add, update, remove. 4144185029Spjd */ 4145185029Spjd i = ztest_random(5); 4146168404Spjd 4147185029Spjd if (i >= 2) { 4148185029Spjd tx = dmu_tx_create(os); 4149219089Spjd dmu_tx_hold_zap(tx, object, B_TRUE, NULL); 4150219089Spjd txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG); 4151219089Spjd if (txg == 0) 4152185029Spjd return; 4153185029Spjd bcopy(name, string_value, namelen); 4154185029Spjd } else { 4155185029Spjd tx = NULL; 4156185029Spjd txg = 0; 4157185029Spjd bzero(string_value, namelen); 4158185029Spjd } 4159168404Spjd 4160185029Spjd switch (i) { 4161168404Spjd 4162185029Spjd case 0: 4163185029Spjd error = zap_length(os, object, name, &zl_wsize, &zl_wc); 4164185029Spjd if (error == 0) { 4165185029Spjd ASSERT3U(wsize, ==, zl_wsize); 4166185029Spjd ASSERT3U(wc, ==, zl_wc); 4167185029Spjd } else { 4168185029Spjd ASSERT3U(error, ==, ENOENT); 4169185029Spjd } 4170185029Spjd break; 4171168404Spjd 4172185029Spjd case 1: 4173185029Spjd error = zap_lookup(os, object, name, wsize, wc, data); 4174185029Spjd if (error == 0) { 4175185029Spjd if (data == string_value && 4176185029Spjd bcmp(name, data, namelen) != 0) 4177185029Spjd fatal(0, "name '%s' != val '%s' len %d", 4178185029Spjd name, data, namelen); 4179185029Spjd } else { 4180185029Spjd ASSERT3U(error, ==, ENOENT); 4181185029Spjd } 4182185029Spjd break; 4183168404Spjd 4184185029Spjd case 2: 4185185029Spjd error = zap_add(os, object, name, wsize, wc, data, tx); 4186185029Spjd ASSERT(error == 0 || error == EEXIST); 4187185029Spjd break; 4188168404Spjd 4189185029Spjd case 3: 4190185029Spjd VERIFY(zap_update(os, object, name, wsize, wc, data, tx) == 0); 4191185029Spjd break; 4192168404Spjd 4193185029Spjd case 4: 4194185029Spjd error = zap_remove(os, object, name, tx); 4195185029Spjd ASSERT(error == 0 || error == ENOENT); 4196185029Spjd break; 4197185029Spjd } 4198168404Spjd 4199185029Spjd if (tx != NULL) 4200185029Spjd dmu_tx_commit(tx); 4201168404Spjd} 4202168404Spjd 4203219089Spjd/* 4204219089Spjd * Commit callback data. 4205219089Spjd */ 4206219089Spjdtypedef struct ztest_cb_data { 4207219089Spjd list_node_t zcd_node; 4208219089Spjd uint64_t zcd_txg; 4209219089Spjd int zcd_expected_err; 4210219089Spjd boolean_t zcd_added; 4211219089Spjd boolean_t zcd_called; 4212219089Spjd spa_t *zcd_spa; 4213219089Spjd} ztest_cb_data_t; 4214219089Spjd 4215219089Spjd/* This is the actual commit callback function */ 4216219089Spjdstatic void 4217219089Spjdztest_commit_callback(void *arg, int error) 4218219089Spjd{ 4219219089Spjd ztest_cb_data_t *data = arg; 4220219089Spjd uint64_t synced_txg; 4221219089Spjd 4222219089Spjd VERIFY(data != NULL); 4223219089Spjd VERIFY3S(data->zcd_expected_err, ==, error); 4224219089Spjd VERIFY(!data->zcd_called); 4225219089Spjd 4226219089Spjd synced_txg = spa_last_synced_txg(data->zcd_spa); 4227219089Spjd if (data->zcd_txg > synced_txg) 4228219089Spjd fatal(0, "commit callback of txg %" PRIu64 " called prematurely" 4229219089Spjd ", last synced txg = %" PRIu64 "\n", data->zcd_txg, 4230219089Spjd synced_txg); 4231219089Spjd 4232219089Spjd data->zcd_called = B_TRUE; 4233219089Spjd 4234219089Spjd if (error == ECANCELED) { 4235219089Spjd ASSERT3U(data->zcd_txg, ==, 0); 4236219089Spjd ASSERT(!data->zcd_added); 4237219089Spjd 4238219089Spjd /* 4239219089Spjd * The private callback data should be destroyed here, but 4240219089Spjd * since we are going to check the zcd_called field after 4241219089Spjd * dmu_tx_abort(), we will destroy it there. 4242219089Spjd */ 4243219089Spjd return; 4244219089Spjd } 4245219089Spjd 4246219089Spjd /* Was this callback added to the global callback list? */ 4247219089Spjd if (!data->zcd_added) 4248219089Spjd goto out; 4249219089Spjd 4250219089Spjd ASSERT3U(data->zcd_txg, !=, 0); 4251219089Spjd 4252219089Spjd /* Remove our callback from the list */ 4253219089Spjd (void) mutex_lock(&zcl.zcl_callbacks_lock); 4254219089Spjd list_remove(&zcl.zcl_callbacks, data); 4255219089Spjd (void) mutex_unlock(&zcl.zcl_callbacks_lock); 4256219089Spjd 4257219089Spjdout: 4258219089Spjd umem_free(data, sizeof (ztest_cb_data_t)); 4259219089Spjd} 4260219089Spjd 4261219089Spjd/* Allocate and initialize callback data structure */ 4262219089Spjdstatic ztest_cb_data_t * 4263219089Spjdztest_create_cb_data(objset_t *os, uint64_t txg) 4264219089Spjd{ 4265219089Spjd ztest_cb_data_t *cb_data; 4266219089Spjd 4267219089Spjd cb_data = umem_zalloc(sizeof (ztest_cb_data_t), UMEM_NOFAIL); 4268219089Spjd 4269219089Spjd cb_data->zcd_txg = txg; 4270219089Spjd cb_data->zcd_spa = dmu_objset_spa(os); 4271219089Spjd 4272219089Spjd return (cb_data); 4273219089Spjd} 4274219089Spjd 4275219089Spjd/* 4276219089Spjd * If a number of txgs equal to this threshold have been created after a commit 4277219089Spjd * callback has been registered but not called, then we assume there is an 4278219089Spjd * implementation bug. 4279219089Spjd */ 4280219089Spjd#define ZTEST_COMMIT_CALLBACK_THRESH (TXG_CONCURRENT_STATES + 2) 4281219089Spjd 4282219089Spjd/* 4283219089Spjd * Commit callback test. 4284219089Spjd */ 4285168404Spjdvoid 4286219089Spjdztest_dmu_commit_callbacks(ztest_ds_t *zd, uint64_t id) 4287168404Spjd{ 4288219089Spjd objset_t *os = zd->zd_os; 4289219089Spjd ztest_od_t od[1]; 4290219089Spjd dmu_tx_t *tx; 4291219089Spjd ztest_cb_data_t *cb_data[3], *tmp_cb; 4292219089Spjd uint64_t old_txg, txg; 4293219089Spjd int i, error; 4294219089Spjd 4295219089Spjd ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, 0, 0); 4296219089Spjd 4297219089Spjd if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0) 4298219089Spjd return; 4299219089Spjd 4300219089Spjd tx = dmu_tx_create(os); 4301219089Spjd 4302219089Spjd cb_data[0] = ztest_create_cb_data(os, 0); 4303219089Spjd dmu_tx_callback_register(tx, ztest_commit_callback, cb_data[0]); 4304219089Spjd 4305219089Spjd dmu_tx_hold_write(tx, od[0].od_object, 0, sizeof (uint64_t)); 4306219089Spjd 4307219089Spjd /* Every once in a while, abort the transaction on purpose */ 4308219089Spjd if (ztest_random(100) == 0) 4309219089Spjd error = -1; 4310219089Spjd 4311219089Spjd if (!error) 4312219089Spjd error = dmu_tx_assign(tx, TXG_NOWAIT); 4313219089Spjd 4314219089Spjd txg = error ? 0 : dmu_tx_get_txg(tx); 4315219089Spjd 4316219089Spjd cb_data[0]->zcd_txg = txg; 4317219089Spjd cb_data[1] = ztest_create_cb_data(os, txg); 4318219089Spjd dmu_tx_callback_register(tx, ztest_commit_callback, cb_data[1]); 4319219089Spjd 4320219089Spjd if (error) { 4321219089Spjd /* 4322219089Spjd * It's not a strict requirement to call the registered 4323219089Spjd * callbacks from inside dmu_tx_abort(), but that's what 4324219089Spjd * it's supposed to happen in the current implementation 4325219089Spjd * so we will check for that. 4326219089Spjd */ 4327219089Spjd for (i = 0; i < 2; i++) { 4328219089Spjd cb_data[i]->zcd_expected_err = ECANCELED; 4329219089Spjd VERIFY(!cb_data[i]->zcd_called); 4330219089Spjd } 4331219089Spjd 4332219089Spjd dmu_tx_abort(tx); 4333219089Spjd 4334219089Spjd for (i = 0; i < 2; i++) { 4335219089Spjd VERIFY(cb_data[i]->zcd_called); 4336219089Spjd umem_free(cb_data[i], sizeof (ztest_cb_data_t)); 4337219089Spjd } 4338219089Spjd 4339219089Spjd return; 4340219089Spjd } 4341219089Spjd 4342219089Spjd cb_data[2] = ztest_create_cb_data(os, txg); 4343219089Spjd dmu_tx_callback_register(tx, ztest_commit_callback, cb_data[2]); 4344219089Spjd 4345219089Spjd /* 4346219089Spjd * Read existing data to make sure there isn't a future leak. 4347219089Spjd */ 4348219089Spjd VERIFY(0 == dmu_read(os, od[0].od_object, 0, sizeof (uint64_t), 4349219089Spjd &old_txg, DMU_READ_PREFETCH)); 4350219089Spjd 4351219089Spjd if (old_txg > txg) 4352219089Spjd fatal(0, "future leak: got %" PRIu64 ", open txg is %" PRIu64, 4353219089Spjd old_txg, txg); 4354219089Spjd 4355219089Spjd dmu_write(os, od[0].od_object, 0, sizeof (uint64_t), &txg, tx); 4356219089Spjd 4357219089Spjd (void) mutex_lock(&zcl.zcl_callbacks_lock); 4358219089Spjd 4359219089Spjd /* 4360219089Spjd * Since commit callbacks don't have any ordering requirement and since 4361219089Spjd * it is theoretically possible for a commit callback to be called 4362219089Spjd * after an arbitrary amount of time has elapsed since its txg has been 4363219089Spjd * synced, it is difficult to reliably determine whether a commit 4364219089Spjd * callback hasn't been called due to high load or due to a flawed 4365219089Spjd * implementation. 4366219089Spjd * 4367219089Spjd * In practice, we will assume that if after a certain number of txgs a 4368219089Spjd * commit callback hasn't been called, then most likely there's an 4369219089Spjd * implementation bug.. 4370219089Spjd */ 4371219089Spjd tmp_cb = list_head(&zcl.zcl_callbacks); 4372219089Spjd if (tmp_cb != NULL && 4373219089Spjd tmp_cb->zcd_txg > txg - ZTEST_COMMIT_CALLBACK_THRESH) { 4374219089Spjd fatal(0, "Commit callback threshold exceeded, oldest txg: %" 4375219089Spjd PRIu64 ", open txg: %" PRIu64 "\n", tmp_cb->zcd_txg, txg); 4376219089Spjd } 4377219089Spjd 4378219089Spjd /* 4379219089Spjd * Let's find the place to insert our callbacks. 4380219089Spjd * 4381219089Spjd * Even though the list is ordered by txg, it is possible for the 4382219089Spjd * insertion point to not be the end because our txg may already be 4383219089Spjd * quiescing at this point and other callbacks in the open txg 4384219089Spjd * (from other objsets) may have sneaked in. 4385219089Spjd */ 4386219089Spjd tmp_cb = list_tail(&zcl.zcl_callbacks); 4387219089Spjd while (tmp_cb != NULL && tmp_cb->zcd_txg > txg) 4388219089Spjd tmp_cb = list_prev(&zcl.zcl_callbacks, tmp_cb); 4389219089Spjd 4390219089Spjd /* Add the 3 callbacks to the list */ 4391219089Spjd for (i = 0; i < 3; i++) { 4392219089Spjd if (tmp_cb == NULL) 4393219089Spjd list_insert_head(&zcl.zcl_callbacks, cb_data[i]); 4394219089Spjd else 4395219089Spjd list_insert_after(&zcl.zcl_callbacks, tmp_cb, 4396219089Spjd cb_data[i]); 4397219089Spjd 4398219089Spjd cb_data[i]->zcd_added = B_TRUE; 4399219089Spjd VERIFY(!cb_data[i]->zcd_called); 4400219089Spjd 4401219089Spjd tmp_cb = cb_data[i]; 4402219089Spjd } 4403219089Spjd 4404219089Spjd (void) mutex_unlock(&zcl.zcl_callbacks_lock); 4405219089Spjd 4406219089Spjd dmu_tx_commit(tx); 4407219089Spjd} 4408219089Spjd 4409219089Spjd/* ARGSUSED */ 4410219089Spjdvoid 4411219089Spjdztest_dsl_prop_get_set(ztest_ds_t *zd, uint64_t id) 4412219089Spjd{ 4413219089Spjd zfs_prop_t proplist[] = { 4414219089Spjd ZFS_PROP_CHECKSUM, 4415219089Spjd ZFS_PROP_COMPRESSION, 4416219089Spjd ZFS_PROP_COPIES, 4417219089Spjd ZFS_PROP_DEDUP 4418219089Spjd }; 4419219089Spjd 4420236143Smm (void) rw_rdlock(&ztest_name_lock); 4421219089Spjd 4422219089Spjd for (int p = 0; p < sizeof (proplist) / sizeof (proplist[0]); p++) 4423219089Spjd (void) ztest_dsl_prop_set_uint64(zd->zd_name, proplist[p], 4424219089Spjd ztest_random_dsl_prop(proplist[p]), (int)ztest_random(2)); 4425219089Spjd 4426236143Smm (void) rw_unlock(&ztest_name_lock); 4427219089Spjd} 4428219089Spjd 4429219089Spjd/* ARGSUSED */ 4430219089Spjdvoid 4431219089Spjdztest_spa_prop_get_set(ztest_ds_t *zd, uint64_t id) 4432219089Spjd{ 4433219089Spjd nvlist_t *props = NULL; 4434219089Spjd 4435236143Smm (void) rw_rdlock(&ztest_name_lock); 4436219089Spjd 4437236143Smm (void) ztest_spa_prop_set_uint64(ZPOOL_PROP_DEDUPDITTO, 4438219089Spjd ZIO_DEDUPDITTO_MIN + ztest_random(ZIO_DEDUPDITTO_MIN)); 4439219089Spjd 4440236143Smm VERIFY3U(spa_prop_get(ztest_spa, &props), ==, 0); 4441219089Spjd 4442236143Smm if (ztest_opts.zo_verbose >= 6) 4443219089Spjd dump_nvlist(props, 4); 4444219089Spjd 4445219089Spjd nvlist_free(props); 4446219089Spjd 4447236143Smm (void) rw_unlock(&ztest_name_lock); 4448219089Spjd} 4449219089Spjd 4450219089Spjd/* 4451219089Spjd * Test snapshot hold/release and deferred destroy. 4452219089Spjd */ 4453219089Spjdvoid 4454219089Spjdztest_dmu_snapshot_hold(ztest_ds_t *zd, uint64_t id) 4455219089Spjd{ 4456219089Spjd int error; 4457219089Spjd objset_t *os = zd->zd_os; 4458219089Spjd objset_t *origin; 4459219089Spjd char snapname[100]; 4460219089Spjd char fullname[100]; 4461219089Spjd char clonename[100]; 4462219089Spjd char tag[100]; 4463168404Spjd char osname[MAXNAMELEN]; 4464168404Spjd 4465236143Smm (void) rw_rdlock(&ztest_name_lock); 4466168404Spjd 4467168404Spjd dmu_objset_name(os, osname); 4468168404Spjd 4469219089Spjd (void) snprintf(snapname, 100, "sh1_%llu", id); 4470219089Spjd (void) snprintf(fullname, 100, "%s@%s", osname, snapname); 4471219089Spjd (void) snprintf(clonename, 100, "%s/ch1_%llu", osname, id); 4472219089Spjd (void) snprintf(tag, 100, "%tag_%llu", id); 4473219089Spjd 4474219089Spjd /* 4475219089Spjd * Clean up from any previous run. 4476219089Spjd */ 4477219089Spjd (void) dmu_objset_destroy(clonename, B_FALSE); 4478219089Spjd (void) dsl_dataset_user_release(osname, snapname, tag, B_FALSE); 4479219089Spjd (void) dmu_objset_destroy(fullname, B_FALSE); 4480219089Spjd 4481219089Spjd /* 4482219089Spjd * Create snapshot, clone it, mark snap for deferred destroy, 4483219089Spjd * destroy clone, verify snap was also destroyed. 4484219089Spjd */ 4485219089Spjd error = dmu_objset_snapshot(osname, snapname, NULL, NULL, FALSE, 4486219089Spjd FALSE, -1); 4487219089Spjd if (error) { 4488219089Spjd if (error == ENOSPC) { 4489219089Spjd ztest_record_enospc("dmu_objset_snapshot"); 4490219089Spjd goto out; 4491168404Spjd } 4492219089Spjd fatal(0, "dmu_objset_snapshot(%s) = %d", fullname, error); 4493219089Spjd } 4494168404Spjd 4495219089Spjd error = dmu_objset_hold(fullname, FTAG, &origin); 4496219089Spjd if (error) 4497219089Spjd fatal(0, "dmu_objset_hold(%s) = %d", fullname, error); 4498168404Spjd 4499219089Spjd error = dmu_objset_clone(clonename, dmu_objset_ds(origin), 0); 4500219089Spjd dmu_objset_rele(origin, FTAG); 4501219089Spjd if (error) { 4502168404Spjd if (error == ENOSPC) { 4503219089Spjd ztest_record_enospc("dmu_objset_clone"); 4504219089Spjd goto out; 4505168404Spjd } 4506219089Spjd fatal(0, "dmu_objset_clone(%s) = %d", clonename, error); 4507219089Spjd } 4508168404Spjd 4509219089Spjd error = dmu_objset_destroy(fullname, B_TRUE); 4510219089Spjd if (error) { 4511219089Spjd fatal(0, "dmu_objset_destroy(%s, B_TRUE) = %d", 4512219089Spjd fullname, error); 4513219089Spjd } 4514168404Spjd 4515219089Spjd error = dmu_objset_destroy(clonename, B_FALSE); 4516219089Spjd if (error) 4517219089Spjd fatal(0, "dmu_objset_destroy(%s) = %d", clonename, error); 4518168404Spjd 4519219089Spjd error = dmu_objset_hold(fullname, FTAG, &origin); 4520219089Spjd if (error != ENOENT) 4521219089Spjd fatal(0, "dmu_objset_hold(%s) = %d", fullname, error); 4522168404Spjd 4523219089Spjd /* 4524219089Spjd * Create snapshot, add temporary hold, verify that we can't 4525219089Spjd * destroy a held snapshot, mark for deferred destroy, 4526219089Spjd * release hold, verify snapshot was destroyed. 4527219089Spjd */ 4528219089Spjd error = dmu_objset_snapshot(osname, snapname, NULL, NULL, FALSE, 4529219089Spjd FALSE, -1); 4530219089Spjd if (error) { 4531219089Spjd if (error == ENOSPC) { 4532219089Spjd ztest_record_enospc("dmu_objset_snapshot"); 4533219089Spjd goto out; 4534168404Spjd } 4535219089Spjd fatal(0, "dmu_objset_snapshot(%s) = %d", fullname, error); 4536168404Spjd } 4537168404Spjd 4538219089Spjd error = dsl_dataset_user_hold(osname, snapname, tag, B_FALSE, 4539219089Spjd B_TRUE, -1); 4540219089Spjd if (error) 4541219089Spjd fatal(0, "dsl_dataset_user_hold(%s)", fullname, tag); 4542219089Spjd 4543219089Spjd error = dmu_objset_destroy(fullname, B_FALSE); 4544219089Spjd if (error != EBUSY) { 4545219089Spjd fatal(0, "dmu_objset_destroy(%s, B_FALSE) = %d", 4546219089Spjd fullname, error); 4547219089Spjd } 4548219089Spjd 4549219089Spjd error = dmu_objset_destroy(fullname, B_TRUE); 4550219089Spjd if (error) { 4551219089Spjd fatal(0, "dmu_objset_destroy(%s, B_TRUE) = %d", 4552219089Spjd fullname, error); 4553219089Spjd } 4554219089Spjd 4555219089Spjd error = dsl_dataset_user_release(osname, snapname, tag, B_FALSE); 4556219089Spjd if (error) 4557219089Spjd fatal(0, "dsl_dataset_user_release(%s)", fullname, tag); 4558219089Spjd 4559219089Spjd VERIFY(dmu_objset_hold(fullname, FTAG, &origin) == ENOENT); 4560219089Spjd 4561219089Spjdout: 4562236143Smm (void) rw_unlock(&ztest_name_lock); 4563168404Spjd} 4564168404Spjd 4565168404Spjd/* 4566168404Spjd * Inject random faults into the on-disk data. 4567168404Spjd */ 4568219089Spjd/* ARGSUSED */ 4569168404Spjdvoid 4570219089Spjdztest_fault_inject(ztest_ds_t *zd, uint64_t id) 4571168404Spjd{ 4572219089Spjd ztest_shared_t *zs = ztest_shared; 4573236143Smm spa_t *spa = ztest_spa; 4574168404Spjd int fd; 4575168404Spjd uint64_t offset; 4576219089Spjd uint64_t leaves; 4577168404Spjd uint64_t bad = 0x1990c0ffeedecadeULL; 4578168404Spjd uint64_t top, leaf; 4579168404Spjd char path0[MAXPATHLEN]; 4580168404Spjd char pathrand[MAXPATHLEN]; 4581168404Spjd size_t fsize; 4582168404Spjd int bshift = SPA_MAXBLOCKSHIFT + 2; /* don't scrog all labels */ 4583168404Spjd int iters = 1000; 4584219089Spjd int maxfaults; 4585219089Spjd int mirror_save; 4586185029Spjd vdev_t *vd0 = NULL; 4587168404Spjd uint64_t guid0 = 0; 4588219089Spjd boolean_t islog = B_FALSE; 4589168404Spjd 4590236143Smm VERIFY(mutex_lock(&ztest_vdev_lock) == 0); 4591219089Spjd maxfaults = MAXFAULTS(); 4592236143Smm leaves = MAX(zs->zs_mirrors, 1) * ztest_opts.zo_raidz; 4593219089Spjd mirror_save = zs->zs_mirrors; 4594236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 4595219089Spjd 4596185029Spjd ASSERT(leaves >= 1); 4597168404Spjd 4598168404Spjd /* 4599185029Spjd * We need SCL_STATE here because we're going to look at vd0->vdev_tsd. 4600168404Spjd */ 4601185029Spjd spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 4602168404Spjd 4603185029Spjd if (ztest_random(2) == 0) { 4604185029Spjd /* 4605219089Spjd * Inject errors on a normal data device or slog device. 4606185029Spjd */ 4607219089Spjd top = ztest_random_vdev_top(spa, B_TRUE); 4608219089Spjd leaf = ztest_random(leaves) + zs->zs_splits; 4609168404Spjd 4610185029Spjd /* 4611185029Spjd * Generate paths to the first leaf in this top-level vdev, 4612185029Spjd * and to the random leaf we selected. We'll induce transient 4613185029Spjd * write failures and random online/offline activity on leaf 0, 4614185029Spjd * and we'll write random garbage to the randomly chosen leaf. 4615185029Spjd */ 4616185029Spjd (void) snprintf(path0, sizeof (path0), ztest_dev_template, 4617236143Smm ztest_opts.zo_dir, ztest_opts.zo_pool, 4618236143Smm top * leaves + zs->zs_splits); 4619185029Spjd (void) snprintf(pathrand, sizeof (pathrand), ztest_dev_template, 4620236143Smm ztest_opts.zo_dir, ztest_opts.zo_pool, 4621236143Smm top * leaves + leaf); 4622168404Spjd 4623185029Spjd vd0 = vdev_lookup_by_path(spa->spa_root_vdev, path0); 4624219089Spjd if (vd0 != NULL && vd0->vdev_top->vdev_islog) 4625219089Spjd islog = B_TRUE; 4626219089Spjd 4627185029Spjd if (vd0 != NULL && maxfaults != 1) { 4628185029Spjd /* 4629185029Spjd * Make vd0 explicitly claim to be unreadable, 4630185029Spjd * or unwriteable, or reach behind its back 4631185029Spjd * and close the underlying fd. We can do this if 4632185029Spjd * maxfaults == 0 because we'll fail and reexecute, 4633185029Spjd * and we can do it if maxfaults >= 2 because we'll 4634185029Spjd * have enough redundancy. If maxfaults == 1, the 4635185029Spjd * combination of this with injection of random data 4636185029Spjd * corruption below exceeds the pool's fault tolerance. 4637185029Spjd */ 4638185029Spjd vdev_file_t *vf = vd0->vdev_tsd; 4639168404Spjd 4640185029Spjd if (vf != NULL && ztest_random(3) == 0) { 4641185029Spjd (void) close(vf->vf_vnode->v_fd); 4642185029Spjd vf->vf_vnode->v_fd = -1; 4643185029Spjd } else if (ztest_random(2) == 0) { 4644185029Spjd vd0->vdev_cant_read = B_TRUE; 4645185029Spjd } else { 4646185029Spjd vd0->vdev_cant_write = B_TRUE; 4647185029Spjd } 4648185029Spjd guid0 = vd0->vdev_guid; 4649185029Spjd } 4650185029Spjd } else { 4651185029Spjd /* 4652185029Spjd * Inject errors on an l2cache device. 4653185029Spjd */ 4654185029Spjd spa_aux_vdev_t *sav = &spa->spa_l2cache; 4655168404Spjd 4656185029Spjd if (sav->sav_count == 0) { 4657185029Spjd spa_config_exit(spa, SCL_STATE, FTAG); 4658185029Spjd return; 4659185029Spjd } 4660185029Spjd vd0 = sav->sav_vdevs[ztest_random(sav->sav_count)]; 4661168404Spjd guid0 = vd0->vdev_guid; 4662185029Spjd (void) strcpy(path0, vd0->vdev_path); 4663185029Spjd (void) strcpy(pathrand, vd0->vdev_path); 4664185029Spjd 4665185029Spjd leaf = 0; 4666185029Spjd leaves = 1; 4667185029Spjd maxfaults = INT_MAX; /* no limit on cache devices */ 4668168404Spjd } 4669168404Spjd 4670185029Spjd spa_config_exit(spa, SCL_STATE, FTAG); 4671185029Spjd 4672168404Spjd /* 4673219089Spjd * If we can tolerate two or more faults, or we're dealing 4674219089Spjd * with a slog, randomly online/offline vd0. 4675168404Spjd */ 4676219089Spjd if ((maxfaults >= 2 || islog) && guid0 != 0) { 4677209962Smm if (ztest_random(10) < 6) { 4678209962Smm int flags = (ztest_random(2) == 0 ? 4679209962Smm ZFS_OFFLINE_TEMPORARY : 0); 4680219089Spjd 4681219089Spjd /* 4682219089Spjd * We have to grab the zs_name_lock as writer to 4683219089Spjd * prevent a race between offlining a slog and 4684219089Spjd * destroying a dataset. Offlining the slog will 4685219089Spjd * grab a reference on the dataset which may cause 4686219089Spjd * dmu_objset_destroy() to fail with EBUSY thus 4687219089Spjd * leaving the dataset in an inconsistent state. 4688219089Spjd */ 4689219089Spjd if (islog) 4690236143Smm (void) rw_wrlock(&ztest_name_lock); 4691219089Spjd 4692209962Smm VERIFY(vdev_offline(spa, guid0, flags) != EBUSY); 4693219089Spjd 4694219089Spjd if (islog) 4695236143Smm (void) rw_unlock(&ztest_name_lock); 4696209962Smm } else { 4697209962Smm (void) vdev_online(spa, guid0, 0, NULL); 4698209962Smm } 4699168404Spjd } 4700168404Spjd 4701219089Spjd if (maxfaults == 0) 4702219089Spjd return; 4703219089Spjd 4704168404Spjd /* 4705168404Spjd * We have at least single-fault tolerance, so inject data corruption. 4706168404Spjd */ 4707168404Spjd fd = open(pathrand, O_RDWR); 4708168404Spjd 4709168404Spjd if (fd == -1) /* we hit a gap in the device namespace */ 4710168404Spjd return; 4711168404Spjd 4712168404Spjd fsize = lseek(fd, 0, SEEK_END); 4713168404Spjd 4714168404Spjd while (--iters != 0) { 4715168404Spjd offset = ztest_random(fsize / (leaves << bshift)) * 4716168404Spjd (leaves << bshift) + (leaf << bshift) + 4717168404Spjd (ztest_random(1ULL << (bshift - 1)) & -8ULL); 4718168404Spjd 4719168404Spjd if (offset >= fsize) 4720168404Spjd continue; 4721168404Spjd 4722236143Smm VERIFY(mutex_lock(&ztest_vdev_lock) == 0); 4723219089Spjd if (mirror_save != zs->zs_mirrors) { 4724236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 4725219089Spjd (void) close(fd); 4726219089Spjd return; 4727219089Spjd } 4728168404Spjd 4729168404Spjd if (pwrite(fd, &bad, sizeof (bad), offset) != sizeof (bad)) 4730168404Spjd fatal(1, "can't inject bad word at 0x%llx in %s", 4731168404Spjd offset, pathrand); 4732219089Spjd 4733236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 4734219089Spjd 4735236143Smm if (ztest_opts.zo_verbose >= 7) 4736219089Spjd (void) printf("injected bad word into %s," 4737219089Spjd " offset 0x%llx\n", pathrand, (u_longlong_t)offset); 4738168404Spjd } 4739168404Spjd 4740168404Spjd (void) close(fd); 4741168404Spjd} 4742168404Spjd 4743168404Spjd/* 4744219089Spjd * Verify that DDT repair works as expected. 4745219089Spjd */ 4746219089Spjdvoid 4747219089Spjdztest_ddt_repair(ztest_ds_t *zd, uint64_t id) 4748219089Spjd{ 4749219089Spjd ztest_shared_t *zs = ztest_shared; 4750236143Smm spa_t *spa = ztest_spa; 4751219089Spjd objset_t *os = zd->zd_os; 4752219089Spjd ztest_od_t od[1]; 4753219089Spjd uint64_t object, blocksize, txg, pattern, psize; 4754219089Spjd enum zio_checksum checksum = spa_dedup_checksum(spa); 4755219089Spjd dmu_buf_t *db; 4756219089Spjd dmu_tx_t *tx; 4757219089Spjd void *buf; 4758219089Spjd blkptr_t blk; 4759219089Spjd int copies = 2 * ZIO_DEDUPDITTO_MIN; 4760219089Spjd 4761219089Spjd blocksize = ztest_random_blocksize(); 4762219089Spjd blocksize = MIN(blocksize, 2048); /* because we write so many */ 4763219089Spjd 4764219089Spjd ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize, 0); 4765219089Spjd 4766219089Spjd if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0) 4767219089Spjd return; 4768219089Spjd 4769219089Spjd /* 4770219089Spjd * Take the name lock as writer to prevent anyone else from changing 4771219089Spjd * the pool and dataset properies we need to maintain during this test. 4772219089Spjd */ 4773236143Smm (void) rw_wrlock(&ztest_name_lock); 4774219089Spjd 4775219089Spjd if (ztest_dsl_prop_set_uint64(zd->zd_name, ZFS_PROP_DEDUP, checksum, 4776219089Spjd B_FALSE) != 0 || 4777219089Spjd ztest_dsl_prop_set_uint64(zd->zd_name, ZFS_PROP_COPIES, 1, 4778219089Spjd B_FALSE) != 0) { 4779236143Smm (void) rw_unlock(&ztest_name_lock); 4780219089Spjd return; 4781219089Spjd } 4782219089Spjd 4783219089Spjd object = od[0].od_object; 4784219089Spjd blocksize = od[0].od_blocksize; 4785228103Smm pattern = zs->zs_guid ^ dmu_objset_fsid_guid(os); 4786219089Spjd 4787219089Spjd ASSERT(object != 0); 4788219089Spjd 4789219089Spjd tx = dmu_tx_create(os); 4790219089Spjd dmu_tx_hold_write(tx, object, 0, copies * blocksize); 4791219089Spjd txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); 4792219089Spjd if (txg == 0) { 4793236143Smm (void) rw_unlock(&ztest_name_lock); 4794219089Spjd return; 4795219089Spjd } 4796219089Spjd 4797219089Spjd /* 4798219089Spjd * Write all the copies of our block. 4799219089Spjd */ 4800219089Spjd for (int i = 0; i < copies; i++) { 4801219089Spjd uint64_t offset = i * blocksize; 4802219089Spjd VERIFY(dmu_buf_hold(os, object, offset, FTAG, &db, 4803219089Spjd DMU_READ_NO_PREFETCH) == 0); 4804219089Spjd ASSERT(db->db_offset == offset); 4805219089Spjd ASSERT(db->db_size == blocksize); 4806219089Spjd ASSERT(ztest_pattern_match(db->db_data, db->db_size, pattern) || 4807219089Spjd ztest_pattern_match(db->db_data, db->db_size, 0ULL)); 4808219089Spjd dmu_buf_will_fill(db, tx); 4809219089Spjd ztest_pattern_set(db->db_data, db->db_size, pattern); 4810219089Spjd dmu_buf_rele(db, FTAG); 4811219089Spjd } 4812219089Spjd 4813219089Spjd dmu_tx_commit(tx); 4814219089Spjd txg_wait_synced(spa_get_dsl(spa), txg); 4815219089Spjd 4816219089Spjd /* 4817219089Spjd * Find out what block we got. 4818219089Spjd */ 4819219089Spjd VERIFY(dmu_buf_hold(os, object, 0, FTAG, &db, 4820219089Spjd DMU_READ_NO_PREFETCH) == 0); 4821219089Spjd blk = *((dmu_buf_impl_t *)db)->db_blkptr; 4822219089Spjd dmu_buf_rele(db, FTAG); 4823219089Spjd 4824219089Spjd /* 4825219089Spjd * Damage the block. Dedup-ditto will save us when we read it later. 4826219089Spjd */ 4827219089Spjd psize = BP_GET_PSIZE(&blk); 4828219089Spjd buf = zio_buf_alloc(psize); 4829219089Spjd ztest_pattern_set(buf, psize, ~pattern); 4830219089Spjd 4831219089Spjd (void) zio_wait(zio_rewrite(NULL, spa, 0, &blk, 4832219089Spjd buf, psize, NULL, NULL, ZIO_PRIORITY_SYNC_WRITE, 4833219089Spjd ZIO_FLAG_CANFAIL | ZIO_FLAG_INDUCE_DAMAGE, NULL)); 4834219089Spjd 4835219089Spjd zio_buf_free(buf, psize); 4836219089Spjd 4837236143Smm (void) rw_unlock(&ztest_name_lock); 4838219089Spjd} 4839219089Spjd 4840219089Spjd/* 4841168404Spjd * Scrub the pool. 4842168404Spjd */ 4843219089Spjd/* ARGSUSED */ 4844168404Spjdvoid 4845219089Spjdztest_scrub(ztest_ds_t *zd, uint64_t id) 4846168404Spjd{ 4847236143Smm spa_t *spa = ztest_spa; 4848168404Spjd 4849219089Spjd (void) spa_scan(spa, POOL_SCAN_SCRUB); 4850219089Spjd (void) poll(NULL, 0, 100); /* wait a moment, then force a restart */ 4851219089Spjd (void) spa_scan(spa, POOL_SCAN_SCRUB); 4852168404Spjd} 4853168404Spjd 4854168404Spjd/* 4855228103Smm * Change the guid for the pool. 4856228103Smm */ 4857228103Smm/* ARGSUSED */ 4858228103Smmvoid 4859228103Smmztest_reguid(ztest_ds_t *zd, uint64_t id) 4860228103Smm{ 4861236143Smm spa_t *spa = ztest_spa; 4862228103Smm uint64_t orig, load; 4863228103Smm 4864228103Smm orig = spa_guid(spa); 4865228103Smm load = spa_load_guid(spa); 4866228103Smm if (spa_change_guid(spa) != 0) 4867228103Smm return; 4868228103Smm 4869236143Smm if (ztest_opts.zo_verbose >= 3) { 4870228103Smm (void) printf("Changed guid old %llu -> %llu\n", 4871228103Smm (u_longlong_t)orig, (u_longlong_t)spa_guid(spa)); 4872228103Smm } 4873228103Smm 4874228103Smm VERIFY3U(orig, !=, spa_guid(spa)); 4875228103Smm VERIFY3U(load, ==, spa_load_guid(spa)); 4876228103Smm} 4877228103Smm 4878228103Smm/* 4879168404Spjd * Rename the pool to a different name and then rename it back. 4880168404Spjd */ 4881219089Spjd/* ARGSUSED */ 4882168404Spjdvoid 4883219089Spjdztest_spa_rename(ztest_ds_t *zd, uint64_t id) 4884168404Spjd{ 4885168404Spjd char *oldname, *newname; 4886168404Spjd spa_t *spa; 4887168404Spjd 4888236143Smm (void) rw_wrlock(&ztest_name_lock); 4889168404Spjd 4890236143Smm oldname = ztest_opts.zo_pool; 4891168404Spjd newname = umem_alloc(strlen(oldname) + 5, UMEM_NOFAIL); 4892168404Spjd (void) strcpy(newname, oldname); 4893168404Spjd (void) strcat(newname, "_tmp"); 4894168404Spjd 4895168404Spjd /* 4896168404Spjd * Do the rename 4897168404Spjd */ 4898219089Spjd VERIFY3U(0, ==, spa_rename(oldname, newname)); 4899168404Spjd 4900168404Spjd /* 4901168404Spjd * Try to open it under the old name, which shouldn't exist 4902168404Spjd */ 4903219089Spjd VERIFY3U(ENOENT, ==, spa_open(oldname, &spa, FTAG)); 4904168404Spjd 4905168404Spjd /* 4906168404Spjd * Open it under the new name and make sure it's still the same spa_t. 4907168404Spjd */ 4908219089Spjd VERIFY3U(0, ==, spa_open(newname, &spa, FTAG)); 4909168404Spjd 4910236143Smm ASSERT(spa == ztest_spa); 4911168404Spjd spa_close(spa, FTAG); 4912168404Spjd 4913168404Spjd /* 4914168404Spjd * Rename it back to the original 4915168404Spjd */ 4916219089Spjd VERIFY3U(0, ==, spa_rename(newname, oldname)); 4917168404Spjd 4918168404Spjd /* 4919168404Spjd * Make sure it can still be opened 4920168404Spjd */ 4921219089Spjd VERIFY3U(0, ==, spa_open(oldname, &spa, FTAG)); 4922168404Spjd 4923236143Smm ASSERT(spa == ztest_spa); 4924168404Spjd spa_close(spa, FTAG); 4925168404Spjd 4926168404Spjd umem_free(newname, strlen(newname) + 1); 4927168404Spjd 4928236143Smm (void) rw_unlock(&ztest_name_lock); 4929168404Spjd} 4930168404Spjd 4931168404Spjd/* 4932219089Spjd * Verify pool integrity by running zdb. 4933168404Spjd */ 4934168404Spjdstatic void 4935219089Spjdztest_run_zdb(char *pool) 4936168404Spjd{ 4937168404Spjd int status; 4938168404Spjd char zdb[MAXPATHLEN + MAXNAMELEN + 20]; 4939168404Spjd char zbuf[1024]; 4940168404Spjd char *bin; 4941185029Spjd char *ztest; 4942185029Spjd char *isa; 4943185029Spjd int isalen; 4944168404Spjd FILE *fp; 4945168404Spjd 4946214623Spjd strlcpy(zdb, "/usr/bin/ztest", sizeof(zdb)); 4947168404Spjd 4948168404Spjd /* zdb lives in /usr/sbin, while ztest lives in /usr/bin */ 4949168404Spjd bin = strstr(zdb, "/usr/bin/"); 4950185029Spjd ztest = strstr(bin, "/ztest"); 4951185029Spjd isa = bin + 8; 4952185029Spjd isalen = ztest - isa; 4953185029Spjd isa = strdup(isa); 4954168404Spjd /* LINTED */ 4955185029Spjd (void) sprintf(bin, 4956219089Spjd "/usr/sbin%.*s/zdb -bcc%s%s -U %s %s", 4957185029Spjd isalen, 4958185029Spjd isa, 4959236143Smm ztest_opts.zo_verbose >= 3 ? "s" : "", 4960236143Smm ztest_opts.zo_verbose >= 4 ? "v" : "", 4961219089Spjd spa_config_path, 4962208047Smm pool); 4963185029Spjd free(isa); 4964168404Spjd 4965236143Smm if (ztest_opts.zo_verbose >= 5) 4966168404Spjd (void) printf("Executing %s\n", strstr(zdb, "zdb ")); 4967168404Spjd 4968168404Spjd fp = popen(zdb, "r"); 4969168404Spjd assert(fp != NULL); 4970168404Spjd 4971168404Spjd while (fgets(zbuf, sizeof (zbuf), fp) != NULL) 4972236143Smm if (ztest_opts.zo_verbose >= 3) 4973168404Spjd (void) printf("%s", zbuf); 4974168404Spjd 4975168404Spjd status = pclose(fp); 4976168404Spjd 4977168404Spjd if (status == 0) 4978168404Spjd return; 4979168404Spjd 4980168404Spjd ztest_dump_core = 0; 4981168404Spjd if (WIFEXITED(status)) 4982168404Spjd fatal(0, "'%s' exit code %d", zdb, WEXITSTATUS(status)); 4983168404Spjd else 4984168404Spjd fatal(0, "'%s' died with signal %d", zdb, WTERMSIG(status)); 4985168404Spjd} 4986168404Spjd 4987168404Spjdstatic void 4988168404Spjdztest_walk_pool_directory(char *header) 4989168404Spjd{ 4990168404Spjd spa_t *spa = NULL; 4991168404Spjd 4992236143Smm if (ztest_opts.zo_verbose >= 6) 4993168404Spjd (void) printf("%s\n", header); 4994168404Spjd 4995168404Spjd mutex_enter(&spa_namespace_lock); 4996168404Spjd while ((spa = spa_next(spa)) != NULL) 4997236143Smm if (ztest_opts.zo_verbose >= 6) 4998168404Spjd (void) printf("\t%s\n", spa_name(spa)); 4999168404Spjd mutex_exit(&spa_namespace_lock); 5000168404Spjd} 5001168404Spjd 5002168404Spjdstatic void 5003168404Spjdztest_spa_import_export(char *oldname, char *newname) 5004168404Spjd{ 5005209962Smm nvlist_t *config, *newconfig; 5006168404Spjd uint64_t pool_guid; 5007168404Spjd spa_t *spa; 5008168404Spjd 5009236143Smm if (ztest_opts.zo_verbose >= 4) { 5010168404Spjd (void) printf("import/export: old = %s, new = %s\n", 5011168404Spjd oldname, newname); 5012168404Spjd } 5013168404Spjd 5014168404Spjd /* 5015168404Spjd * Clean up from previous runs. 5016168404Spjd */ 5017168404Spjd (void) spa_destroy(newname); 5018168404Spjd 5019168404Spjd /* 5020168404Spjd * Get the pool's configuration and guid. 5021168404Spjd */ 5022219089Spjd VERIFY3U(0, ==, spa_open(oldname, &spa, FTAG)); 5023168404Spjd 5024209962Smm /* 5025209962Smm * Kick off a scrub to tickle scrub/export races. 5026209962Smm */ 5027209962Smm if (ztest_random(2) == 0) 5028219089Spjd (void) spa_scan(spa, POOL_SCAN_SCRUB); 5029209962Smm 5030168404Spjd pool_guid = spa_guid(spa); 5031168404Spjd spa_close(spa, FTAG); 5032168404Spjd 5033168404Spjd ztest_walk_pool_directory("pools before export"); 5034168404Spjd 5035168404Spjd /* 5036168404Spjd * Export it. 5037168404Spjd */ 5038219089Spjd VERIFY3U(0, ==, spa_export(oldname, &config, B_FALSE, B_FALSE)); 5039168404Spjd 5040168404Spjd ztest_walk_pool_directory("pools after export"); 5041168404Spjd 5042168404Spjd /* 5043209962Smm * Try to import it. 5044209962Smm */ 5045209962Smm newconfig = spa_tryimport(config); 5046209962Smm ASSERT(newconfig != NULL); 5047209962Smm nvlist_free(newconfig); 5048209962Smm 5049209962Smm /* 5050168404Spjd * Import it under the new name. 5051168404Spjd */ 5052219089Spjd VERIFY3U(0, ==, spa_import(newname, config, NULL, 0)); 5053168404Spjd 5054168404Spjd ztest_walk_pool_directory("pools after import"); 5055168404Spjd 5056168404Spjd /* 5057168404Spjd * Try to import it again -- should fail with EEXIST. 5058168404Spjd */ 5059219089Spjd VERIFY3U(EEXIST, ==, spa_import(newname, config, NULL, 0)); 5060168404Spjd 5061168404Spjd /* 5062168404Spjd * Try to import it under a different name -- should fail with EEXIST. 5063168404Spjd */ 5064219089Spjd VERIFY3U(EEXIST, ==, spa_import(oldname, config, NULL, 0)); 5065168404Spjd 5066168404Spjd /* 5067168404Spjd * Verify that the pool is no longer visible under the old name. 5068168404Spjd */ 5069219089Spjd VERIFY3U(ENOENT, ==, spa_open(oldname, &spa, FTAG)); 5070168404Spjd 5071168404Spjd /* 5072168404Spjd * Verify that we can open and close the pool using the new name. 5073168404Spjd */ 5074219089Spjd VERIFY3U(0, ==, spa_open(newname, &spa, FTAG)); 5075168404Spjd ASSERT(pool_guid == spa_guid(spa)); 5076168404Spjd spa_close(spa, FTAG); 5077168404Spjd 5078168404Spjd nvlist_free(config); 5079168404Spjd} 5080168404Spjd 5081209962Smmstatic void 5082209962Smmztest_resume(spa_t *spa) 5083209962Smm{ 5084236143Smm if (spa_suspended(spa) && ztest_opts.zo_verbose >= 6) 5085219089Spjd (void) printf("resuming from suspended state\n"); 5086219089Spjd spa_vdev_state_enter(spa, SCL_NONE); 5087219089Spjd vdev_clear(spa, NULL); 5088219089Spjd (void) spa_vdev_state_exit(spa, NULL, 0); 5089219089Spjd (void) zio_resume(spa); 5090209962Smm} 5091209962Smm 5092168404Spjdstatic void * 5093209962Smmztest_resume_thread(void *arg) 5094185029Spjd{ 5095185029Spjd spa_t *spa = arg; 5096185029Spjd 5097185029Spjd while (!ztest_exiting) { 5098219089Spjd if (spa_suspended(spa)) 5099219089Spjd ztest_resume(spa); 5100219089Spjd (void) poll(NULL, 0, 100); 5101185029Spjd } 5102185029Spjd return (NULL); 5103185029Spjd} 5104185029Spjd 5105185029Spjdstatic void * 5106219089Spjdztest_deadman_thread(void *arg) 5107219089Spjd{ 5108219089Spjd ztest_shared_t *zs = arg; 5109219089Spjd int grace = 300; 5110219089Spjd hrtime_t delta; 5111219089Spjd 5112219089Spjd delta = (zs->zs_thread_stop - zs->zs_thread_start) / NANOSEC + grace; 5113219089Spjd 5114219089Spjd (void) poll(NULL, 0, (int)(1000 * delta)); 5115219089Spjd 5116219089Spjd fatal(0, "failed to complete within %d seconds of deadline", grace); 5117219089Spjd 5118219089Spjd return (NULL); 5119219089Spjd} 5120219089Spjd 5121219089Spjdstatic void 5122236143Smmztest_execute(int test, ztest_info_t *zi, uint64_t id) 5123219089Spjd{ 5124236143Smm ztest_ds_t *zd = &ztest_ds[id % ztest_opts.zo_datasets]; 5125236143Smm ztest_shared_callstate_t *zc = ZTEST_GET_SHARED_CALLSTATE(test); 5126219089Spjd hrtime_t functime = gethrtime(); 5127219089Spjd 5128219089Spjd for (int i = 0; i < zi->zi_iters; i++) 5129219089Spjd zi->zi_func(zd, id); 5130219089Spjd 5131219089Spjd functime = gethrtime() - functime; 5132219089Spjd 5133236143Smm atomic_add_64(&zc->zc_count, 1); 5134236143Smm atomic_add_64(&zc->zc_time, functime); 5135219089Spjd 5136236143Smm if (ztest_opts.zo_verbose >= 4) { 5137219089Spjd Dl_info dli; 5138219089Spjd (void) dladdr((void *)zi->zi_func, &dli); 5139219089Spjd (void) printf("%6.2f sec in %s\n", 5140219089Spjd (double)functime / NANOSEC, dli.dli_sname); 5141219089Spjd } 5142219089Spjd} 5143219089Spjd 5144219089Spjdstatic void * 5145168404Spjdztest_thread(void *arg) 5146168404Spjd{ 5147236143Smm int rand; 5148219089Spjd uint64_t id = (uintptr_t)arg; 5149168404Spjd ztest_shared_t *zs = ztest_shared; 5150219089Spjd uint64_t call_next; 5151219089Spjd hrtime_t now; 5152168404Spjd ztest_info_t *zi; 5153236143Smm ztest_shared_callstate_t *zc; 5154168404Spjd 5155219089Spjd while ((now = gethrtime()) < zs->zs_thread_stop) { 5156168404Spjd /* 5157168404Spjd * See if it's time to force a crash. 5158168404Spjd */ 5159219089Spjd if (now > zs->zs_thread_kill) 5160219089Spjd ztest_kill(zs); 5161168404Spjd 5162168404Spjd /* 5163219089Spjd * If we're getting ENOSPC with some regularity, stop. 5164168404Spjd */ 5165219089Spjd if (zs->zs_enospc_count > 10) 5166219089Spjd break; 5167168404Spjd 5168168404Spjd /* 5169219089Spjd * Pick a random function to execute. 5170168404Spjd */ 5171236143Smm rand = ztest_random(ZTEST_FUNCS); 5172236143Smm zi = &ztest_info[rand]; 5173236143Smm zc = ZTEST_GET_SHARED_CALLSTATE(rand); 5174236143Smm call_next = zc->zc_next; 5175168404Spjd 5176219089Spjd if (now >= call_next && 5177236143Smm atomic_cas_64(&zc->zc_next, call_next, call_next + 5178236143Smm ztest_random(2 * zi->zi_interval[0] + 1)) == call_next) { 5179236143Smm ztest_execute(rand, zi, id); 5180236143Smm } 5181219089Spjd } 5182168404Spjd 5183219089Spjd return (NULL); 5184219089Spjd} 5185168404Spjd 5186219089Spjdstatic void 5187219089Spjdztest_dataset_name(char *dsname, char *pool, int d) 5188219089Spjd{ 5189219089Spjd (void) snprintf(dsname, MAXNAMELEN, "%s/ds_%d", pool, d); 5190219089Spjd} 5191168404Spjd 5192219089Spjdstatic void 5193236143Smmztest_dataset_destroy(int d) 5194219089Spjd{ 5195219089Spjd char name[MAXNAMELEN]; 5196168404Spjd 5197236143Smm ztest_dataset_name(name, ztest_opts.zo_pool, d); 5198168404Spjd 5199236143Smm if (ztest_opts.zo_verbose >= 3) 5200219089Spjd (void) printf("Destroying %s to free up space\n", name); 5201168404Spjd 5202219089Spjd /* 5203219089Spjd * Cleanup any non-standard clones and snapshots. In general, 5204219089Spjd * ztest thread t operates on dataset (t % zopt_datasets), 5205219089Spjd * so there may be more than one thing to clean up. 5206219089Spjd */ 5207236143Smm for (int t = d; t < ztest_opts.zo_threads; 5208236143Smm t += ztest_opts.zo_datasets) { 5209219089Spjd ztest_dsl_dataset_cleanup(name, t); 5210236143Smm } 5211219089Spjd 5212219089Spjd (void) dmu_objset_find(name, ztest_objset_destroy_cb, NULL, 5213219089Spjd DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN); 5214219089Spjd} 5215219089Spjd 5216219089Spjdstatic void 5217219089Spjdztest_dataset_dirobj_verify(ztest_ds_t *zd) 5218219089Spjd{ 5219219089Spjd uint64_t usedobjs, dirobjs, scratch; 5220219089Spjd 5221219089Spjd /* 5222219089Spjd * ZTEST_DIROBJ is the object directory for the entire dataset. 5223219089Spjd * Therefore, the number of objects in use should equal the 5224219089Spjd * number of ZTEST_DIROBJ entries, +1 for ZTEST_DIROBJ itself. 5225219089Spjd * If not, we have an object leak. 5226219089Spjd * 5227219089Spjd * Note that we can only check this in ztest_dataset_open(), 5228219089Spjd * when the open-context and syncing-context values agree. 5229219089Spjd * That's because zap_count() returns the open-context value, 5230219089Spjd * while dmu_objset_space() returns the rootbp fill count. 5231219089Spjd */ 5232219089Spjd VERIFY3U(0, ==, zap_count(zd->zd_os, ZTEST_DIROBJ, &dirobjs)); 5233219089Spjd dmu_objset_space(zd->zd_os, &scratch, &scratch, &usedobjs, &scratch); 5234219089Spjd ASSERT3U(dirobjs + 1, ==, usedobjs); 5235219089Spjd} 5236219089Spjd 5237219089Spjdstatic int 5238236143Smmztest_dataset_open(int d) 5239219089Spjd{ 5240236143Smm ztest_ds_t *zd = &ztest_ds[d]; 5241236143Smm uint64_t committed_seq = ZTEST_GET_SHARED_DS(d)->zd_seq; 5242219089Spjd objset_t *os; 5243219089Spjd zilog_t *zilog; 5244219089Spjd char name[MAXNAMELEN]; 5245219089Spjd int error; 5246219089Spjd 5247236143Smm ztest_dataset_name(name, ztest_opts.zo_pool, d); 5248219089Spjd 5249236143Smm (void) rw_rdlock(&ztest_name_lock); 5250219089Spjd 5251219089Spjd error = ztest_dataset_create(name); 5252219089Spjd if (error == ENOSPC) { 5253236143Smm (void) rw_unlock(&ztest_name_lock); 5254219089Spjd ztest_record_enospc(FTAG); 5255219089Spjd return (error); 5256168404Spjd } 5257219089Spjd ASSERT(error == 0 || error == EEXIST); 5258168404Spjd 5259219089Spjd VERIFY3U(dmu_objset_hold(name, zd, &os), ==, 0); 5260236143Smm (void) rw_unlock(&ztest_name_lock); 5261219089Spjd 5262236143Smm ztest_zd_init(zd, ZTEST_GET_SHARED_DS(d), os); 5263219089Spjd 5264219089Spjd zilog = zd->zd_zilog; 5265219089Spjd 5266219089Spjd if (zilog->zl_header->zh_claim_lr_seq != 0 && 5267219089Spjd zilog->zl_header->zh_claim_lr_seq < committed_seq) 5268219089Spjd fatal(0, "missing log records: claimed %llu < committed %llu", 5269219089Spjd zilog->zl_header->zh_claim_lr_seq, committed_seq); 5270219089Spjd 5271219089Spjd ztest_dataset_dirobj_verify(zd); 5272219089Spjd 5273219089Spjd zil_replay(os, zd, ztest_replay_vector); 5274219089Spjd 5275219089Spjd ztest_dataset_dirobj_verify(zd); 5276219089Spjd 5277236143Smm if (ztest_opts.zo_verbose >= 6) 5278219089Spjd (void) printf("%s replay %llu blocks, %llu records, seq %llu\n", 5279219089Spjd zd->zd_name, 5280219089Spjd (u_longlong_t)zilog->zl_parse_blk_count, 5281219089Spjd (u_longlong_t)zilog->zl_parse_lr_count, 5282219089Spjd (u_longlong_t)zilog->zl_replaying_seq); 5283219089Spjd 5284219089Spjd zilog = zil_open(os, ztest_get_data); 5285219089Spjd 5286219089Spjd if (zilog->zl_replaying_seq != 0 && 5287219089Spjd zilog->zl_replaying_seq < committed_seq) 5288219089Spjd fatal(0, "missing log records: replayed %llu < committed %llu", 5289219089Spjd zilog->zl_replaying_seq, committed_seq); 5290219089Spjd 5291219089Spjd return (0); 5292168404Spjd} 5293168404Spjd 5294219089Spjdstatic void 5295236143Smmztest_dataset_close(int d) 5296219089Spjd{ 5297236143Smm ztest_ds_t *zd = &ztest_ds[d]; 5298219089Spjd 5299219089Spjd zil_close(zd->zd_zilog); 5300219089Spjd dmu_objset_rele(zd->zd_os, zd); 5301219089Spjd 5302219089Spjd ztest_zd_fini(zd); 5303219089Spjd} 5304219089Spjd 5305168404Spjd/* 5306168404Spjd * Kick off threads to run tests on all datasets in parallel. 5307168404Spjd */ 5308168404Spjdstatic void 5309219089Spjdztest_run(ztest_shared_t *zs) 5310168404Spjd{ 5311219089Spjd thread_t *tid; 5312168404Spjd spa_t *spa; 5313228103Smm objset_t *os; 5314185029Spjd thread_t resume_tid; 5315219089Spjd int error; 5316168404Spjd 5317185029Spjd ztest_exiting = B_FALSE; 5318185029Spjd 5319168404Spjd /* 5320219089Spjd * Initialize parent/child shared state. 5321168404Spjd */ 5322236143Smm VERIFY(_mutex_init(&ztest_vdev_lock, USYNC_THREAD, NULL) == 0); 5323236143Smm VERIFY(rwlock_init(&ztest_name_lock, USYNC_THREAD, NULL) == 0); 5324168404Spjd 5325219089Spjd zs->zs_thread_start = gethrtime(); 5326236143Smm zs->zs_thread_stop = 5327236143Smm zs->zs_thread_start + ztest_opts.zo_passtime * NANOSEC; 5328219089Spjd zs->zs_thread_stop = MIN(zs->zs_thread_stop, zs->zs_proc_stop); 5329219089Spjd zs->zs_thread_kill = zs->zs_thread_stop; 5330236143Smm if (ztest_random(100) < ztest_opts.zo_killrate) { 5331236143Smm zs->zs_thread_kill -= 5332236143Smm ztest_random(ztest_opts.zo_passtime * NANOSEC); 5333236143Smm } 5334168404Spjd 5335219089Spjd (void) _mutex_init(&zcl.zcl_callbacks_lock, USYNC_THREAD, NULL); 5336168404Spjd 5337219089Spjd list_create(&zcl.zcl_callbacks, sizeof (ztest_cb_data_t), 5338219089Spjd offsetof(ztest_cb_data_t, zcd_node)); 5339168404Spjd 5340168404Spjd /* 5341219089Spjd * Open our pool. 5342168404Spjd */ 5343219089Spjd kernel_init(FREAD | FWRITE); 5344236143Smm VERIFY(spa_open(ztest_opts.zo_pool, &spa, FTAG) == 0); 5345224177Smm spa->spa_debug = B_TRUE; 5346236143Smm ztest_spa = spa; 5347168404Spjd 5348236143Smm VERIFY3U(0, ==, dmu_objset_hold(ztest_opts.zo_pool, FTAG, &os)); 5349228103Smm zs->zs_guid = dmu_objset_fsid_guid(os); 5350228103Smm dmu_objset_rele(os, FTAG); 5351228103Smm 5352219089Spjd spa->spa_dedup_ditto = 2 * ZIO_DEDUPDITTO_MIN; 5353168404Spjd 5354168404Spjd /* 5355209962Smm * We don't expect the pool to suspend unless maxfaults == 0, 5356209962Smm * in which case ztest_fault_inject() temporarily takes away 5357209962Smm * the only valid replica. 5358209962Smm */ 5359219089Spjd if (MAXFAULTS() == 0) 5360209962Smm spa->spa_failmode = ZIO_FAILURE_MODE_WAIT; 5361209962Smm else 5362209962Smm spa->spa_failmode = ZIO_FAILURE_MODE_PANIC; 5363209962Smm 5364209962Smm /* 5365185029Spjd * Create a thread to periodically resume suspended I/O. 5366185029Spjd */ 5367209962Smm VERIFY(thr_create(0, 0, ztest_resume_thread, spa, THR_BOUND, 5368185029Spjd &resume_tid) == 0); 5369185029Spjd 5370185029Spjd /* 5371219089Spjd * Create a deadman thread to abort() if we hang. 5372219089Spjd */ 5373219089Spjd VERIFY(thr_create(0, 0, ztest_deadman_thread, zs, THR_BOUND, 5374219089Spjd NULL) == 0); 5375219089Spjd 5376219089Spjd /* 5377168404Spjd * Verify that we can safely inquire about about any object, 5378168404Spjd * whether it's allocated or not. To make it interesting, 5379168404Spjd * we probe a 5-wide window around each power of two. 5380168404Spjd * This hits all edge cases, including zero and the max. 5381168404Spjd */ 5382219089Spjd for (int t = 0; t < 64; t++) { 5383219089Spjd for (int d = -5; d <= 5; d++) { 5384168404Spjd error = dmu_object_info(spa->spa_meta_objset, 5385168404Spjd (1ULL << t) + d, NULL); 5386168404Spjd ASSERT(error == 0 || error == ENOENT || 5387168404Spjd error == EINVAL); 5388168404Spjd } 5389168404Spjd } 5390168404Spjd 5391168404Spjd /* 5392219089Spjd * If we got any ENOSPC errors on the previous run, destroy something. 5393168404Spjd */ 5394219089Spjd if (zs->zs_enospc_count != 0) { 5395236143Smm int d = ztest_random(ztest_opts.zo_datasets); 5396236143Smm ztest_dataset_destroy(d); 5397219089Spjd } 5398168404Spjd zs->zs_enospc_count = 0; 5399168404Spjd 5400236143Smm tid = umem_zalloc(ztest_opts.zo_threads * sizeof (thread_t), 5401236143Smm UMEM_NOFAIL); 5402168404Spjd 5403236143Smm if (ztest_opts.zo_verbose >= 4) 5404168404Spjd (void) printf("starting main threads...\n"); 5405168404Spjd 5406219089Spjd /* 5407219089Spjd * Kick off all the tests that run in parallel. 5408219089Spjd */ 5409236143Smm for (int t = 0; t < ztest_opts.zo_threads; t++) { 5410236143Smm if (t < ztest_opts.zo_datasets && 5411236143Smm ztest_dataset_open(t) != 0) 5412219089Spjd return; 5413219089Spjd VERIFY(thr_create(0, 0, ztest_thread, (void *)(uintptr_t)t, 5414219089Spjd THR_BOUND, &tid[t]) == 0); 5415219089Spjd } 5416168404Spjd 5417219089Spjd /* 5418219089Spjd * Wait for all of the tests to complete. We go in reverse order 5419219089Spjd * so we don't close datasets while threads are still using them. 5420219089Spjd */ 5421236143Smm for (int t = ztest_opts.zo_threads - 1; t >= 0; t--) { 5422219089Spjd VERIFY(thr_join(tid[t], NULL, NULL) == 0); 5423236143Smm if (t < ztest_opts.zo_datasets) 5424236143Smm ztest_dataset_close(t); 5425219089Spjd } 5426185029Spjd 5427219089Spjd txg_wait_synced(spa_get_dsl(spa), 0); 5428185029Spjd 5429219089Spjd zs->zs_alloc = metaslab_class_get_alloc(spa_normal_class(spa)); 5430219089Spjd zs->zs_space = metaslab_class_get_space(spa_normal_class(spa)); 5431168404Spjd 5432236143Smm umem_free(tid, ztest_opts.zo_threads * sizeof (thread_t)); 5433168404Spjd 5434219089Spjd /* Kill the resume thread */ 5435219089Spjd ztest_exiting = B_TRUE; 5436219089Spjd VERIFY(thr_join(resume_tid, NULL, NULL) == 0); 5437219089Spjd ztest_resume(spa); 5438219089Spjd 5439219089Spjd /* 5440219089Spjd * Right before closing the pool, kick off a bunch of async I/O; 5441219089Spjd * spa_close() should wait for it to complete. 5442219089Spjd */ 5443219089Spjd for (uint64_t object = 1; object < 50; object++) 5444219089Spjd dmu_prefetch(spa->spa_meta_objset, object, 0, 1ULL << 20); 5445219089Spjd 5446219089Spjd spa_close(spa, FTAG); 5447219089Spjd 5448219089Spjd /* 5449219089Spjd * Verify that we can loop over all pools. 5450219089Spjd */ 5451219089Spjd mutex_enter(&spa_namespace_lock); 5452219089Spjd for (spa = spa_next(NULL); spa != NULL; spa = spa_next(spa)) 5453236143Smm if (ztest_opts.zo_verbose > 3) 5454219089Spjd (void) printf("spa_next: found %s\n", spa_name(spa)); 5455219089Spjd mutex_exit(&spa_namespace_lock); 5456219089Spjd 5457219089Spjd /* 5458219089Spjd * Verify that we can export the pool and reimport it under a 5459219089Spjd * different name. 5460219089Spjd */ 5461219089Spjd if (ztest_random(2) == 0) { 5462219089Spjd char name[MAXNAMELEN]; 5463236143Smm (void) snprintf(name, MAXNAMELEN, "%s_import", 5464236143Smm ztest_opts.zo_pool); 5465236143Smm ztest_spa_import_export(ztest_opts.zo_pool, name); 5466236143Smm ztest_spa_import_export(name, ztest_opts.zo_pool); 5467168404Spjd } 5468168404Spjd 5469219089Spjd kernel_fini(); 5470219089Spjd 5471219089Spjd list_destroy(&zcl.zcl_callbacks); 5472219089Spjd 5473219089Spjd (void) _mutex_destroy(&zcl.zcl_callbacks_lock); 5474219089Spjd 5475236143Smm (void) rwlock_destroy(&ztest_name_lock); 5476236143Smm (void) _mutex_destroy(&ztest_vdev_lock); 5477219089Spjd} 5478219089Spjd 5479219089Spjdstatic void 5480236143Smmztest_freeze(void) 5481219089Spjd{ 5482236143Smm ztest_ds_t *zd = &ztest_ds[0]; 5483219089Spjd spa_t *spa; 5484219089Spjd int numloops = 0; 5485219089Spjd 5486236143Smm if (ztest_opts.zo_verbose >= 3) 5487219089Spjd (void) printf("testing spa_freeze()...\n"); 5488168404Spjd 5489219089Spjd kernel_init(FREAD | FWRITE); 5490236143Smm VERIFY3U(0, ==, spa_open(ztest_opts.zo_pool, &spa, FTAG)); 5491236143Smm VERIFY3U(0, ==, ztest_dataset_open(0)); 5492168404Spjd 5493168404Spjd /* 5494219089Spjd * Force the first log block to be transactionally allocated. 5495219089Spjd * We have to do this before we freeze the pool -- otherwise 5496219089Spjd * the log chain won't be anchored. 5497168404Spjd */ 5498219089Spjd while (BP_IS_HOLE(&zd->zd_zilog->zl_header->zh_log)) { 5499219089Spjd ztest_dmu_object_alloc_free(zd, 0); 5500219089Spjd zil_commit(zd->zd_zilog, 0); 5501168404Spjd } 5502168404Spjd 5503168404Spjd txg_wait_synced(spa_get_dsl(spa), 0); 5504168404Spjd 5505219089Spjd /* 5506219089Spjd * Freeze the pool. This stops spa_sync() from doing anything, 5507219089Spjd * so that the only way to record changes from now on is the ZIL. 5508219089Spjd */ 5509219089Spjd spa_freeze(spa); 5510185029Spjd 5511219089Spjd /* 5512219089Spjd * Run tests that generate log records but don't alter the pool config 5513219089Spjd * or depend on DSL sync tasks (snapshots, objset create/destroy, etc). 5514219089Spjd * We do a txg_wait_synced() after each iteration to force the txg 5515219089Spjd * to increase well beyond the last synced value in the uberblock. 5516219089Spjd * The ZIL should be OK with that. 5517219089Spjd */ 5518236143Smm while (ztest_random(10) != 0 && 5519236143Smm numloops++ < ztest_opts.zo_maxloops) { 5520219089Spjd ztest_dmu_write_parallel(zd, 0); 5521219089Spjd ztest_dmu_object_alloc_free(zd, 0); 5522219089Spjd txg_wait_synced(spa_get_dsl(spa), 0); 5523219089Spjd } 5524185029Spjd 5525168404Spjd /* 5526219089Spjd * Commit all of the changes we just generated. 5527168404Spjd */ 5528219089Spjd zil_commit(zd->zd_zilog, 0); 5529219089Spjd txg_wait_synced(spa_get_dsl(spa), 0); 5530168404Spjd 5531219089Spjd /* 5532219089Spjd * Close our dataset and close the pool. 5533219089Spjd */ 5534236143Smm ztest_dataset_close(0); 5535168404Spjd spa_close(spa, FTAG); 5536219089Spjd kernel_fini(); 5537168404Spjd 5538219089Spjd /* 5539219089Spjd * Open and close the pool and dataset to induce log replay. 5540219089Spjd */ 5541219089Spjd kernel_init(FREAD | FWRITE); 5542236143Smm VERIFY3U(0, ==, spa_open(ztest_opts.zo_pool, &spa, FTAG)); 5543236143Smm VERIFY3U(0, ==, ztest_dataset_open(0)); 5544236143Smm ztest_dataset_close(0); 5545219089Spjd spa_close(spa, FTAG); 5546168404Spjd kernel_fini(); 5547168404Spjd} 5548168404Spjd 5549168404Spjdvoid 5550168404Spjdprint_time(hrtime_t t, char *timebuf) 5551168404Spjd{ 5552168404Spjd hrtime_t s = t / NANOSEC; 5553168404Spjd hrtime_t m = s / 60; 5554168404Spjd hrtime_t h = m / 60; 5555168404Spjd hrtime_t d = h / 24; 5556168404Spjd 5557168404Spjd s -= m * 60; 5558168404Spjd m -= h * 60; 5559168404Spjd h -= d * 24; 5560168404Spjd 5561168404Spjd timebuf[0] = '\0'; 5562168404Spjd 5563168404Spjd if (d) 5564168404Spjd (void) sprintf(timebuf, 5565168404Spjd "%llud%02lluh%02llum%02llus", d, h, m, s); 5566168404Spjd else if (h) 5567168404Spjd (void) sprintf(timebuf, "%lluh%02llum%02llus", h, m, s); 5568168404Spjd else if (m) 5569168404Spjd (void) sprintf(timebuf, "%llum%02llus", m, s); 5570168404Spjd else 5571168404Spjd (void) sprintf(timebuf, "%llus", s); 5572168404Spjd} 5573168404Spjd 5574219089Spjdstatic nvlist_t * 5575219089Spjdmake_random_props() 5576219089Spjd{ 5577219089Spjd nvlist_t *props; 5578219089Spjd 5579236884Smm VERIFY(nvlist_alloc(&props, NV_UNIQUE_NAME, 0) == 0); 5580219089Spjd if (ztest_random(2) == 0) 5581236884Smm return (props); 5582219089Spjd VERIFY(nvlist_add_uint64(props, "autoreplace", 1) == 0); 5583219089Spjd 5584219089Spjd return (props); 5585219089Spjd} 5586219089Spjd 5587168404Spjd/* 5588168404Spjd * Create a storage pool with the given name and initial vdev size. 5589219089Spjd * Then test spa_freeze() functionality. 5590168404Spjd */ 5591168404Spjdstatic void 5592219089Spjdztest_init(ztest_shared_t *zs) 5593168404Spjd{ 5594168404Spjd spa_t *spa; 5595219089Spjd nvlist_t *nvroot, *props; 5596168404Spjd 5597236143Smm VERIFY(_mutex_init(&ztest_vdev_lock, USYNC_THREAD, NULL) == 0); 5598236143Smm VERIFY(rwlock_init(&ztest_name_lock, USYNC_THREAD, NULL) == 0); 5599219089Spjd 5600168404Spjd kernel_init(FREAD | FWRITE); 5601168404Spjd 5602168404Spjd /* 5603168404Spjd * Create the storage pool. 5604168404Spjd */ 5605236143Smm (void) spa_destroy(ztest_opts.zo_pool); 5606219089Spjd ztest_shared->zs_vdev_next_leaf = 0; 5607219089Spjd zs->zs_splits = 0; 5608236143Smm zs->zs_mirrors = ztest_opts.zo_mirrors; 5609236143Smm nvroot = make_vdev_root(NULL, NULL, ztest_opts.zo_vdev_size, 0, 5610236143Smm 0, ztest_opts.zo_raidz, zs->zs_mirrors, 1); 5611219089Spjd props = make_random_props(); 5612236884Smm for (int i = 0; i < SPA_FEATURES; i++) { 5613236884Smm char buf[1024]; 5614236884Smm (void) snprintf(buf, sizeof (buf), "feature@%s", 5615236884Smm spa_feature_table[i].fi_uname); 5616236884Smm VERIFY3U(0, ==, nvlist_add_uint64(props, buf, 0)); 5617236884Smm } 5618236143Smm VERIFY3U(0, ==, spa_create(ztest_opts.zo_pool, nvroot, props, 5619236143Smm NULL, NULL)); 5620168404Spjd nvlist_free(nvroot); 5621168404Spjd 5622236143Smm VERIFY3U(0, ==, spa_open(ztest_opts.zo_pool, &spa, FTAG)); 5623236143Smm zs->zs_metaslab_sz = 5624236143Smm 1ULL << spa->spa_root_vdev->vdev_child[0]->vdev_ms_shift; 5625236884Smm 5626219089Spjd spa_close(spa, FTAG); 5627209962Smm 5628219089Spjd kernel_fini(); 5629168404Spjd 5630236143Smm ztest_run_zdb(ztest_opts.zo_pool); 5631168404Spjd 5632236143Smm ztest_freeze(); 5633219089Spjd 5634236143Smm ztest_run_zdb(ztest_opts.zo_pool); 5635219089Spjd 5636236143Smm (void) rwlock_destroy(&ztest_name_lock); 5637236143Smm (void) _mutex_destroy(&ztest_vdev_lock); 5638168404Spjd} 5639168404Spjd 5640236143Smmstatic void 5641236143Smmsetup_fds(void) 5642236143Smm{ 5643236143Smm int fd; 5644236143Smm#ifdef illumos 5645236143Smm 5646236143Smm char *tmp = tempnam(NULL, NULL); 5647236143Smm fd = open(tmp, O_RDWR | O_CREAT, 0700); 5648236143Smm ASSERT3U(fd, ==, ZTEST_FD_DATA); 5649236143Smm (void) unlink(tmp); 5650236143Smm free(tmp); 5651236143Smm#else 5652236143Smm char tmp[MAXPATHLEN]; 5653236143Smm 5654236143Smm strlcpy(tmp, ztest_opts.zo_dir, MAXPATHLEN); 5655236143Smm strlcat(tmp, "/ztest.XXXXXX", MAXPATHLEN); 5656236143Smm fd = mkstemp(tmp); 5657236143Smm ASSERT3U(fd, ==, ZTEST_FD_DATA); 5658236143Smm#endif 5659236143Smm 5660236143Smm fd = open("/dev/urandom", O_RDONLY); 5661236143Smm ASSERT3U(fd, ==, ZTEST_FD_RAND); 5662236143Smm} 5663236143Smm 5664236884Smmstatic int 5665236884Smmshared_data_size(ztest_shared_hdr_t *hdr) 5666236884Smm{ 5667236884Smm int size; 5668236884Smm 5669236884Smm size = hdr->zh_hdr_size; 5670236884Smm size += hdr->zh_opts_size; 5671236884Smm size += hdr->zh_size; 5672236884Smm size += hdr->zh_stats_size * hdr->zh_stats_count; 5673236884Smm size += hdr->zh_ds_size * hdr->zh_ds_count; 5674236884Smm 5675236884Smm return (size); 5676236884Smm} 5677236884Smm 5678236143Smmstatic void 5679236143Smmsetup_hdr(void) 5680236143Smm{ 5681236884Smm int size; 5682236143Smm ztest_shared_hdr_t *hdr; 5683236143Smm 5684236143Smm#ifndef illumos 5685236143Smm pwrite(ZTEST_FD_DATA, "", 1, 0); 5686236143Smm#endif 5687236143Smm 5688236143Smm hdr = (void *)mmap(0, P2ROUNDUP(sizeof (*hdr), getpagesize()), 5689236143Smm PROT_READ | PROT_WRITE, MAP_SHARED, ZTEST_FD_DATA, 0); 5690236143Smm ASSERT(hdr != MAP_FAILED); 5691236143Smm 5692236884Smm VERIFY3U(0, ==, ftruncate(ZTEST_FD_DATA, sizeof (ztest_shared_hdr_t))); 5693236884Smm 5694236143Smm hdr->zh_hdr_size = sizeof (ztest_shared_hdr_t); 5695236143Smm hdr->zh_opts_size = sizeof (ztest_shared_opts_t); 5696236143Smm hdr->zh_size = sizeof (ztest_shared_t); 5697236143Smm hdr->zh_stats_size = sizeof (ztest_shared_callstate_t); 5698236143Smm hdr->zh_stats_count = ZTEST_FUNCS; 5699236143Smm hdr->zh_ds_size = sizeof (ztest_shared_ds_t); 5700236143Smm hdr->zh_ds_count = ztest_opts.zo_datasets; 5701236143Smm 5702236884Smm size = shared_data_size(hdr); 5703236884Smm VERIFY3U(0, ==, ftruncate(ZTEST_FD_DATA, size)); 5704236884Smm 5705236143Smm (void) munmap((caddr_t)hdr, P2ROUNDUP(sizeof (*hdr), getpagesize())); 5706236143Smm} 5707236143Smm 5708236143Smmstatic void 5709236143Smmsetup_data(void) 5710236143Smm{ 5711236143Smm int size, offset; 5712236143Smm ztest_shared_hdr_t *hdr; 5713236143Smm uint8_t *buf; 5714236143Smm 5715236143Smm hdr = (void *)mmap(0, P2ROUNDUP(sizeof (*hdr), getpagesize()), 5716236143Smm PROT_READ, MAP_SHARED, ZTEST_FD_DATA, 0); 5717236143Smm ASSERT(hdr != MAP_FAILED); 5718236143Smm 5719236884Smm size = shared_data_size(hdr); 5720236143Smm 5721236143Smm (void) munmap((caddr_t)hdr, P2ROUNDUP(sizeof (*hdr), getpagesize())); 5722236143Smm hdr = ztest_shared_hdr = (void *)mmap(0, P2ROUNDUP(size, getpagesize()), 5723236143Smm PROT_READ | PROT_WRITE, MAP_SHARED, ZTEST_FD_DATA, 0); 5724236143Smm ASSERT(hdr != MAP_FAILED); 5725236143Smm buf = (uint8_t *)hdr; 5726236143Smm 5727236143Smm offset = hdr->zh_hdr_size; 5728236143Smm ztest_shared_opts = (void *)&buf[offset]; 5729236143Smm offset += hdr->zh_opts_size; 5730236143Smm ztest_shared = (void *)&buf[offset]; 5731236143Smm offset += hdr->zh_size; 5732236143Smm ztest_shared_callstate = (void *)&buf[offset]; 5733236143Smm offset += hdr->zh_stats_size * hdr->zh_stats_count; 5734236143Smm ztest_shared_ds = (void *)&buf[offset]; 5735236143Smm} 5736236143Smm 5737236143Smmstatic boolean_t 5738236143Smmexec_child(char *cmd, char *libpath, boolean_t ignorekill, int *statusp) 5739236143Smm{ 5740236143Smm pid_t pid; 5741236143Smm int status; 5742236143Smm char cmdbuf[MAXPATHLEN]; 5743236143Smm 5744236143Smm pid = fork(); 5745236143Smm 5746236143Smm if (cmd == NULL) { 5747236143Smm (void) strlcpy(cmdbuf, getexecname(), sizeof (cmdbuf)); 5748236143Smm cmd = cmdbuf; 5749236143Smm } 5750236143Smm 5751236143Smm if (pid == -1) 5752236143Smm fatal(1, "fork failed"); 5753236143Smm 5754236143Smm if (pid == 0) { /* child */ 5755236143Smm char *emptyargv[2] = { cmd, NULL }; 5756236143Smm 5757236143Smm struct rlimit rl = { 1024, 1024 }; 5758236143Smm (void) setrlimit(RLIMIT_NOFILE, &rl); 5759236143Smm (void) enable_extended_FILE_stdio(-1, -1); 5760236143Smm if (libpath != NULL) 5761236143Smm VERIFY(0 == setenv("LD_LIBRARY_PATH", libpath, 1)); 5762236143Smm#ifdef illumos 5763236143Smm (void) execv(cmd, emptyargv); 5764236143Smm#else 5765236143Smm (void) execvp(cmd, emptyargv); 5766236143Smm#endif 5767236143Smm ztest_dump_core = B_FALSE; 5768236143Smm fatal(B_TRUE, "exec failed: %s", cmd); 5769236143Smm } 5770236143Smm 5771236143Smm while (waitpid(pid, &status, 0) != pid) 5772236143Smm continue; 5773236143Smm if (statusp != NULL) 5774236143Smm *statusp = status; 5775236143Smm 5776236143Smm if (WIFEXITED(status)) { 5777236143Smm if (WEXITSTATUS(status) != 0) { 5778236143Smm (void) fprintf(stderr, "child exited with code %d\n", 5779236143Smm WEXITSTATUS(status)); 5780236143Smm exit(2); 5781236143Smm } 5782236143Smm return (B_FALSE); 5783236143Smm } else if (WIFSIGNALED(status)) { 5784236143Smm if (!ignorekill || WTERMSIG(status) != SIGKILL) { 5785236143Smm (void) fprintf(stderr, "child died with signal %d\n", 5786236143Smm WTERMSIG(status)); 5787236143Smm exit(3); 5788236143Smm } 5789236143Smm return (B_TRUE); 5790236143Smm } else { 5791236143Smm (void) fprintf(stderr, "something strange happened to child\n"); 5792236143Smm exit(4); 5793236143Smm /* NOTREACHED */ 5794236143Smm } 5795236143Smm} 5796236143Smm 5797236143Smmstatic void 5798236143Smmztest_run_init(void) 5799236143Smm{ 5800236143Smm ztest_shared_t *zs = ztest_shared; 5801236143Smm 5802236143Smm ASSERT(ztest_opts.zo_init != 0); 5803236143Smm 5804236143Smm /* 5805236143Smm * Blow away any existing copy of zpool.cache 5806236143Smm */ 5807236143Smm (void) remove(spa_config_path); 5808236143Smm 5809236143Smm /* 5810236143Smm * Create and initialize our storage pool. 5811236143Smm */ 5812236143Smm for (int i = 1; i <= ztest_opts.zo_init; i++) { 5813236143Smm bzero(zs, sizeof (ztest_shared_t)); 5814236143Smm if (ztest_opts.zo_verbose >= 3 && 5815236143Smm ztest_opts.zo_init != 1) { 5816236143Smm (void) printf("ztest_init(), pass %d\n", i); 5817236143Smm } 5818236143Smm ztest_init(zs); 5819236143Smm } 5820236143Smm} 5821236143Smm 5822168404Spjdint 5823168404Spjdmain(int argc, char **argv) 5824168404Spjd{ 5825168404Spjd int kills = 0; 5826168404Spjd int iters = 0; 5827236143Smm int older = 0; 5828236143Smm int newer = 0; 5829168404Spjd ztest_shared_t *zs; 5830168404Spjd ztest_info_t *zi; 5831236143Smm ztest_shared_callstate_t *zc; 5832168404Spjd char timebuf[100]; 5833168404Spjd char numbuf[6]; 5834219089Spjd spa_t *spa; 5835236143Smm char cmd[MAXNAMELEN]; 5836236143Smm boolean_t hasalt; 5837168404Spjd 5838236143Smm boolean_t ischild = (0 == lseek(ZTEST_FD_DATA, 0, SEEK_CUR)); 5839236143Smm ASSERT(ischild || errno == EBADF); 5840236143Smm 5841168404Spjd (void) setvbuf(stdout, NULL, _IOLBF, 0); 5842168404Spjd 5843236143Smm if (!ischild) { 5844236143Smm process_options(argc, argv); 5845168404Spjd 5846236143Smm setup_fds(); 5847236143Smm setup_hdr(); 5848236143Smm setup_data(); 5849236143Smm bcopy(&ztest_opts, ztest_shared_opts, 5850236143Smm sizeof (*ztest_shared_opts)); 5851236143Smm } else { 5852236143Smm setup_data(); 5853236143Smm bcopy(ztest_shared_opts, &ztest_opts, sizeof (ztest_opts)); 5854236143Smm } 5855236143Smm ASSERT3U(ztest_opts.zo_datasets, ==, ztest_shared_hdr->zh_ds_count); 5856168404Spjd 5857219089Spjd /* Override location of zpool.cache */ 5858236143Smm (void) asprintf((char **)&spa_config_path, "%s/zpool.cache", 5859236143Smm ztest_opts.zo_dir); 5860219089Spjd 5861236143Smm ztest_ds = umem_alloc(ztest_opts.zo_datasets * sizeof (ztest_ds_t), 5862236143Smm UMEM_NOFAIL); 5863236143Smm zs = ztest_shared; 5864168404Spjd 5865236143Smm if (ischild) { 5866236143Smm metaslab_gang_bang = ztest_opts.zo_metaslab_gang_bang; 5867236143Smm metaslab_df_alloc_threshold = 5868236143Smm zs->zs_metaslab_df_alloc_threshold; 5869219089Spjd 5870236143Smm if (zs->zs_do_init) 5871236143Smm ztest_run_init(); 5872236143Smm else 5873236143Smm ztest_run(zs); 5874236143Smm exit(0); 5875236143Smm } 5876168404Spjd 5877236143Smm hasalt = (strlen(ztest_opts.zo_alt_ztest) != 0); 5878236143Smm 5879236143Smm if (ztest_opts.zo_verbose >= 1) { 5880168404Spjd (void) printf("%llu vdevs, %d datasets, %d threads," 5881168404Spjd " %llu seconds...\n", 5882236143Smm (u_longlong_t)ztest_opts.zo_vdevs, 5883236143Smm ztest_opts.zo_datasets, 5884236143Smm ztest_opts.zo_threads, 5885236143Smm (u_longlong_t)ztest_opts.zo_time); 5886168404Spjd } 5887168404Spjd 5888236143Smm (void) strlcpy(cmd, getexecname(), sizeof (cmd)); 5889236143Smm 5890236143Smm zs->zs_do_init = B_TRUE; 5891236143Smm if (strlen(ztest_opts.zo_alt_ztest) != 0) { 5892236143Smm if (ztest_opts.zo_verbose >= 1) { 5893236143Smm (void) printf("Executing older ztest for " 5894236143Smm "initialization: %s\n", ztest_opts.zo_alt_ztest); 5895236143Smm } 5896236143Smm VERIFY(!exec_child(ztest_opts.zo_alt_ztest, 5897236143Smm ztest_opts.zo_alt_libpath, B_FALSE, NULL)); 5898236143Smm } else { 5899236143Smm VERIFY(!exec_child(NULL, NULL, B_FALSE, NULL)); 5900168404Spjd } 5901236143Smm zs->zs_do_init = B_FALSE; 5902168404Spjd 5903219089Spjd zs->zs_proc_start = gethrtime(); 5904236143Smm zs->zs_proc_stop = zs->zs_proc_start + ztest_opts.zo_time * NANOSEC; 5905219089Spjd 5906219089Spjd for (int f = 0; f < ZTEST_FUNCS; f++) { 5907236143Smm zi = &ztest_info[f]; 5908236143Smm zc = ZTEST_GET_SHARED_CALLSTATE(f); 5909219089Spjd if (zs->zs_proc_start + zi->zi_interval[0] > zs->zs_proc_stop) 5910236143Smm zc->zc_next = UINT64_MAX; 5911168404Spjd else 5912236143Smm zc->zc_next = zs->zs_proc_start + 5913219089Spjd ztest_random(2 * zi->zi_interval[0] + 1); 5914168404Spjd } 5915168404Spjd 5916168404Spjd /* 5917168404Spjd * Run the tests in a loop. These tests include fault injection 5918168404Spjd * to verify that self-healing data works, and forced crashes 5919168404Spjd * to verify that we never lose on-disk consistency. 5920168404Spjd */ 5921219089Spjd while (gethrtime() < zs->zs_proc_stop) { 5922168404Spjd int status; 5923236143Smm boolean_t killed; 5924168404Spjd 5925168404Spjd /* 5926168404Spjd * Initialize the workload counters for each function. 5927168404Spjd */ 5928219089Spjd for (int f = 0; f < ZTEST_FUNCS; f++) { 5929236143Smm zc = ZTEST_GET_SHARED_CALLSTATE(f); 5930236143Smm zc->zc_count = 0; 5931236143Smm zc->zc_time = 0; 5932168404Spjd } 5933168404Spjd 5934209962Smm /* Set the allocation switch size */ 5935236143Smm zs->zs_metaslab_df_alloc_threshold = 5936236143Smm ztest_random(zs->zs_metaslab_sz / 4) + 1; 5937209962Smm 5938236143Smm if (!hasalt || ztest_random(2) == 0) { 5939236143Smm if (hasalt && ztest_opts.zo_verbose >= 1) { 5940236143Smm (void) printf("Executing newer ztest: %s\n", 5941236143Smm cmd); 5942168404Spjd } 5943236143Smm newer++; 5944236143Smm killed = exec_child(cmd, NULL, B_TRUE, &status); 5945236143Smm } else { 5946236143Smm if (hasalt && ztest_opts.zo_verbose >= 1) { 5947236143Smm (void) printf("Executing older ztest: %s\n", 5948236143Smm ztest_opts.zo_alt_ztest); 5949168404Spjd } 5950236143Smm older++; 5951236143Smm killed = exec_child(ztest_opts.zo_alt_ztest, 5952236143Smm ztest_opts.zo_alt_libpath, B_TRUE, &status); 5953168404Spjd } 5954168404Spjd 5955236143Smm if (killed) 5956236143Smm kills++; 5957168404Spjd iters++; 5958168404Spjd 5959236143Smm if (ztest_opts.zo_verbose >= 1) { 5960168404Spjd hrtime_t now = gethrtime(); 5961168404Spjd 5962219089Spjd now = MIN(now, zs->zs_proc_stop); 5963219089Spjd print_time(zs->zs_proc_stop - now, timebuf); 5964168404Spjd nicenum(zs->zs_space, numbuf); 5965168404Spjd 5966168404Spjd (void) printf("Pass %3d, %8s, %3llu ENOSPC, " 5967168404Spjd "%4.1f%% of %5s used, %3.0f%% done, %8s to go\n", 5968168404Spjd iters, 5969168404Spjd WIFEXITED(status) ? "Complete" : "SIGKILL", 5970168404Spjd (u_longlong_t)zs->zs_enospc_count, 5971168404Spjd 100.0 * zs->zs_alloc / zs->zs_space, 5972168404Spjd numbuf, 5973219089Spjd 100.0 * (now - zs->zs_proc_start) / 5974236143Smm (ztest_opts.zo_time * NANOSEC), timebuf); 5975168404Spjd } 5976168404Spjd 5977236143Smm if (ztest_opts.zo_verbose >= 2) { 5978168404Spjd (void) printf("\nWorkload summary:\n\n"); 5979168404Spjd (void) printf("%7s %9s %s\n", 5980168404Spjd "Calls", "Time", "Function"); 5981168404Spjd (void) printf("%7s %9s %s\n", 5982168404Spjd "-----", "----", "--------"); 5983219089Spjd for (int f = 0; f < ZTEST_FUNCS; f++) { 5984168404Spjd Dl_info dli; 5985168404Spjd 5986236143Smm zi = &ztest_info[f]; 5987236143Smm zc = ZTEST_GET_SHARED_CALLSTATE(f); 5988236143Smm print_time(zc->zc_time, timebuf); 5989168404Spjd (void) dladdr((void *)zi->zi_func, &dli); 5990168404Spjd (void) printf("%7llu %9s %s\n", 5991236143Smm (u_longlong_t)zc->zc_count, timebuf, 5992168404Spjd dli.dli_sname); 5993168404Spjd } 5994168404Spjd (void) printf("\n"); 5995168404Spjd } 5996168404Spjd 5997168404Spjd /* 5998219089Spjd * It's possible that we killed a child during a rename test, 5999219089Spjd * in which case we'll have a 'ztest_tmp' pool lying around 6000219089Spjd * instead of 'ztest'. Do a blind rename in case this happened. 6001168404Spjd */ 6002219089Spjd kernel_init(FREAD); 6003236143Smm if (spa_open(ztest_opts.zo_pool, &spa, FTAG) == 0) { 6004219089Spjd spa_close(spa, FTAG); 6005219089Spjd } else { 6006219089Spjd char tmpname[MAXNAMELEN]; 6007219089Spjd kernel_fini(); 6008219089Spjd kernel_init(FREAD | FWRITE); 6009219089Spjd (void) snprintf(tmpname, sizeof (tmpname), "%s_tmp", 6010236143Smm ztest_opts.zo_pool); 6011236143Smm (void) spa_rename(tmpname, ztest_opts.zo_pool); 6012219089Spjd } 6013168404Spjd kernel_fini(); 6014219089Spjd 6015236143Smm ztest_run_zdb(ztest_opts.zo_pool); 6016168404Spjd } 6017168404Spjd 6018236143Smm if (ztest_opts.zo_verbose >= 1) { 6019236143Smm if (hasalt) { 6020236143Smm (void) printf("%d runs of older ztest: %s\n", older, 6021236143Smm ztest_opts.zo_alt_ztest); 6022236143Smm (void) printf("%d runs of newer ztest: %s\n", newer, 6023236143Smm cmd); 6024236143Smm } 6025168404Spjd (void) printf("%d killed, %d completed, %.0f%% kill rate\n", 6026168404Spjd kills, iters - kills, (100.0 * kills) / MAX(1, iters)); 6027168404Spjd } 6028168404Spjd 6029168404Spjd return (0); 6030168404Spjd} 6031