ztest.c revision 243524
1168404Spjd/* 2168404Spjd * CDDL HEADER START 3168404Spjd * 4168404Spjd * The contents of this file are subject to the terms of the 5168404Spjd * Common Development and Distribution License (the "License"). 6168404Spjd * You may not use this file except in compliance with the License. 7168404Spjd * 8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9168404Spjd * or http://www.opensolaris.org/os/licensing. 10168404Spjd * See the License for the specific language governing permissions 11168404Spjd * and limitations under the License. 12168404Spjd * 13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each 14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15168404Spjd * If applicable, add the following below this CDDL HEADER, with the 16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18168404Spjd * 19168404Spjd * CDDL HEADER END 20168404Spjd */ 21168404Spjd/* 22219089Spjd * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23236143Smm * Copyright (c) 2012 by Delphix. All rights reserved. 24228103Smm * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 25236143Smm * Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org>. All rights reserved. 26168404Spjd */ 27168404Spjd 28168404Spjd/* 29168404Spjd * The objective of this program is to provide a DMU/ZAP/SPA stress test 30168404Spjd * that runs entirely in userland, is easy to use, and easy to extend. 31168404Spjd * 32168404Spjd * The overall design of the ztest program is as follows: 33168404Spjd * 34168404Spjd * (1) For each major functional area (e.g. adding vdevs to a pool, 35168404Spjd * creating and destroying datasets, reading and writing objects, etc) 36168404Spjd * we have a simple routine to test that functionality. These 37168404Spjd * individual routines do not have to do anything "stressful". 38168404Spjd * 39168404Spjd * (2) We turn these simple functionality tests into a stress test by 40168404Spjd * running them all in parallel, with as many threads as desired, 41168404Spjd * and spread across as many datasets, objects, and vdevs as desired. 42168404Spjd * 43168404Spjd * (3) While all this is happening, we inject faults into the pool to 44168404Spjd * verify that self-healing data really works. 45168404Spjd * 46168404Spjd * (4) Every time we open a dataset, we change its checksum and compression 47168404Spjd * functions. Thus even individual objects vary from block to block 48168404Spjd * in which checksum they use and whether they're compressed. 49168404Spjd * 50168404Spjd * (5) To verify that we never lose on-disk consistency after a crash, 51168404Spjd * we run the entire test in a child of the main process. 52168404Spjd * At random times, the child self-immolates with a SIGKILL. 53168404Spjd * This is the software equivalent of pulling the power cord. 54168404Spjd * The parent then runs the test again, using the existing 55236143Smm * storage pool, as many times as desired. If backwards compatability 56236143Smm * testing is enabled ztest will sometimes run the "older" version 57236143Smm * of ztest after a SIGKILL. 58168404Spjd * 59168404Spjd * (6) To verify that we don't have future leaks or temporal incursions, 60168404Spjd * many of the functional tests record the transaction group number 61168404Spjd * as part of their data. When reading old data, they verify that 62168404Spjd * the transaction group number is less than the current, open txg. 63168404Spjd * If you add a new test, please do this if applicable. 64168404Spjd * 65168404Spjd * When run with no arguments, ztest runs for about five minutes and 66168404Spjd * produces no output if successful. To get a little bit of information, 67168404Spjd * specify -V. To get more information, specify -VV, and so on. 68168404Spjd * 69168404Spjd * To turn this into an overnight stress test, use -T to specify run time. 70168404Spjd * 71168404Spjd * You can ask more more vdevs [-v], datasets [-d], or threads [-t] 72168404Spjd * to increase the pool capacity, fanout, and overall stress level. 73168404Spjd * 74236143Smm * Use the -k option to set the desired frequency of kills. 75236143Smm * 76236143Smm * When ztest invokes itself it passes all relevant information through a 77236143Smm * temporary file which is mmap-ed in the child process. This allows shared 78236143Smm * memory to survive the exec syscall. The ztest_shared_hdr_t struct is always 79236143Smm * stored at offset 0 of this file and contains information on the size and 80236143Smm * number of shared structures in the file. The information stored in this file 81236143Smm * must remain backwards compatible with older versions of ztest so that 82236143Smm * ztest can invoke them during backwards compatibility testing (-B). 83168404Spjd */ 84168404Spjd 85168404Spjd#include <sys/zfs_context.h> 86168404Spjd#include <sys/spa.h> 87168404Spjd#include <sys/dmu.h> 88168404Spjd#include <sys/txg.h> 89209962Smm#include <sys/dbuf.h> 90168404Spjd#include <sys/zap.h> 91168404Spjd#include <sys/dmu_objset.h> 92168404Spjd#include <sys/poll.h> 93168404Spjd#include <sys/stat.h> 94168404Spjd#include <sys/time.h> 95168404Spjd#include <sys/wait.h> 96168404Spjd#include <sys/mman.h> 97168404Spjd#include <sys/resource.h> 98168404Spjd#include <sys/zio.h> 99168404Spjd#include <sys/zil.h> 100219089Spjd#include <sys/zil_impl.h> 101168404Spjd#include <sys/vdev_impl.h> 102185029Spjd#include <sys/vdev_file.h> 103168404Spjd#include <sys/spa_impl.h> 104219089Spjd#include <sys/metaslab_impl.h> 105168404Spjd#include <sys/dsl_prop.h> 106207910Smm#include <sys/dsl_dataset.h> 107219089Spjd#include <sys/dsl_scan.h> 108219089Spjd#include <sys/zio_checksum.h> 109168404Spjd#include <sys/refcount.h> 110236884Smm#include <sys/zfeature.h> 111168404Spjd#include <stdio.h> 112168404Spjd#include <stdio_ext.h> 113168404Spjd#include <stdlib.h> 114168404Spjd#include <unistd.h> 115168404Spjd#include <signal.h> 116168404Spjd#include <umem.h> 117168404Spjd#include <dlfcn.h> 118168404Spjd#include <ctype.h> 119168404Spjd#include <math.h> 120168404Spjd#include <errno.h> 121168404Spjd#include <sys/fs/zfs.h> 122219089Spjd#include <libnvpair.h> 123168404Spjd 124242845Sdelphijstatic int ztest_fd_data = -1; 125242845Sdelphijstatic int ztest_fd_rand = -1; 126168404Spjd 127236143Smmtypedef struct ztest_shared_hdr { 128236143Smm uint64_t zh_hdr_size; 129236143Smm uint64_t zh_opts_size; 130236143Smm uint64_t zh_size; 131236143Smm uint64_t zh_stats_size; 132236143Smm uint64_t zh_stats_count; 133236143Smm uint64_t zh_ds_size; 134236143Smm uint64_t zh_ds_count; 135236143Smm} ztest_shared_hdr_t; 136168404Spjd 137236143Smmstatic ztest_shared_hdr_t *ztest_shared_hdr; 138236143Smm 139236143Smmtypedef struct ztest_shared_opts { 140236143Smm char zo_pool[MAXNAMELEN]; 141236143Smm char zo_dir[MAXNAMELEN]; 142236143Smm char zo_alt_ztest[MAXNAMELEN]; 143236143Smm char zo_alt_libpath[MAXNAMELEN]; 144236143Smm uint64_t zo_vdevs; 145236143Smm uint64_t zo_vdevtime; 146236143Smm size_t zo_vdev_size; 147236143Smm int zo_ashift; 148236143Smm int zo_mirrors; 149236143Smm int zo_raidz; 150236143Smm int zo_raidz_parity; 151236143Smm int zo_datasets; 152236143Smm int zo_threads; 153236143Smm uint64_t zo_passtime; 154236143Smm uint64_t zo_killrate; 155236143Smm int zo_verbose; 156236143Smm int zo_init; 157236143Smm uint64_t zo_time; 158236143Smm uint64_t zo_maxloops; 159236143Smm uint64_t zo_metaslab_gang_bang; 160236143Smm} ztest_shared_opts_t; 161236143Smm 162236143Smmstatic const ztest_shared_opts_t ztest_opts_defaults = { 163236143Smm .zo_pool = { 'z', 't', 'e', 's', 't', '\0' }, 164236143Smm .zo_dir = { '/', 't', 'm', 'p', '\0' }, 165236143Smm .zo_alt_ztest = { '\0' }, 166236143Smm .zo_alt_libpath = { '\0' }, 167236143Smm .zo_vdevs = 5, 168236143Smm .zo_ashift = SPA_MINBLOCKSHIFT, 169236143Smm .zo_mirrors = 2, 170236143Smm .zo_raidz = 4, 171236143Smm .zo_raidz_parity = 1, 172236143Smm .zo_vdev_size = SPA_MINDEVSIZE, 173236143Smm .zo_datasets = 7, 174236143Smm .zo_threads = 23, 175236143Smm .zo_passtime = 60, /* 60 seconds */ 176236143Smm .zo_killrate = 70, /* 70% kill rate */ 177236143Smm .zo_verbose = 0, 178236143Smm .zo_init = 1, 179236143Smm .zo_time = 300, /* 5 minutes */ 180236143Smm .zo_maxloops = 50, /* max loops during spa_freeze() */ 181236143Smm .zo_metaslab_gang_bang = 32 << 10 182236143Smm}; 183236143Smm 184236143Smmextern uint64_t metaslab_gang_bang; 185236143Smmextern uint64_t metaslab_df_alloc_threshold; 186236143Smm 187236143Smmstatic ztest_shared_opts_t *ztest_shared_opts; 188236143Smmstatic ztest_shared_opts_t ztest_opts; 189236143Smm 190236143Smmtypedef struct ztest_shared_ds { 191236143Smm uint64_t zd_seq; 192236143Smm} ztest_shared_ds_t; 193236143Smm 194236143Smmstatic ztest_shared_ds_t *ztest_shared_ds; 195236143Smm#define ZTEST_GET_SHARED_DS(d) (&ztest_shared_ds[d]) 196236143Smm 197219089Spjd#define BT_MAGIC 0x123456789abcdefULL 198236143Smm#define MAXFAULTS() \ 199236143Smm (MAX(zs->zs_mirrors, 1) * (ztest_opts.zo_raidz_parity + 1) - 1) 200219089Spjd 201219089Spjdenum ztest_io_type { 202219089Spjd ZTEST_IO_WRITE_TAG, 203219089Spjd ZTEST_IO_WRITE_PATTERN, 204219089Spjd ZTEST_IO_WRITE_ZEROES, 205219089Spjd ZTEST_IO_TRUNCATE, 206219089Spjd ZTEST_IO_SETATTR, 207243524Smm ZTEST_IO_REWRITE, 208219089Spjd ZTEST_IO_TYPES 209219089Spjd}; 210219089Spjd 211185029Spjdtypedef struct ztest_block_tag { 212219089Spjd uint64_t bt_magic; 213185029Spjd uint64_t bt_objset; 214185029Spjd uint64_t bt_object; 215185029Spjd uint64_t bt_offset; 216219089Spjd uint64_t bt_gen; 217185029Spjd uint64_t bt_txg; 218219089Spjd uint64_t bt_crtxg; 219185029Spjd} ztest_block_tag_t; 220185029Spjd 221219089Spjdtypedef struct bufwad { 222219089Spjd uint64_t bw_index; 223219089Spjd uint64_t bw_txg; 224219089Spjd uint64_t bw_data; 225219089Spjd} bufwad_t; 226168404Spjd 227219089Spjd/* 228219089Spjd * XXX -- fix zfs range locks to be generic so we can use them here. 229219089Spjd */ 230219089Spjdtypedef enum { 231219089Spjd RL_READER, 232219089Spjd RL_WRITER, 233219089Spjd RL_APPEND 234219089Spjd} rl_type_t; 235168404Spjd 236219089Spjdtypedef struct rll { 237219089Spjd void *rll_writer; 238219089Spjd int rll_readers; 239219089Spjd mutex_t rll_lock; 240219089Spjd cond_t rll_cv; 241219089Spjd} rll_t; 242219089Spjd 243219089Spjdtypedef struct rl { 244219089Spjd uint64_t rl_object; 245219089Spjd uint64_t rl_offset; 246219089Spjd uint64_t rl_size; 247219089Spjd rll_t *rl_lock; 248219089Spjd} rl_t; 249219089Spjd 250219089Spjd#define ZTEST_RANGE_LOCKS 64 251219089Spjd#define ZTEST_OBJECT_LOCKS 64 252219089Spjd 253168404Spjd/* 254219089Spjd * Object descriptor. Used as a template for object lookup/create/remove. 255219089Spjd */ 256219089Spjdtypedef struct ztest_od { 257219089Spjd uint64_t od_dir; 258219089Spjd uint64_t od_object; 259219089Spjd dmu_object_type_t od_type; 260219089Spjd dmu_object_type_t od_crtype; 261219089Spjd uint64_t od_blocksize; 262219089Spjd uint64_t od_crblocksize; 263219089Spjd uint64_t od_gen; 264219089Spjd uint64_t od_crgen; 265219089Spjd char od_name[MAXNAMELEN]; 266219089Spjd} ztest_od_t; 267219089Spjd 268219089Spjd/* 269219089Spjd * Per-dataset state. 270219089Spjd */ 271219089Spjdtypedef struct ztest_ds { 272236143Smm ztest_shared_ds_t *zd_shared; 273219089Spjd objset_t *zd_os; 274224526Smm rwlock_t zd_zilog_lock; 275219089Spjd zilog_t *zd_zilog; 276219089Spjd ztest_od_t *zd_od; /* debugging aid */ 277219089Spjd char zd_name[MAXNAMELEN]; 278219089Spjd mutex_t zd_dirobj_lock; 279219089Spjd rll_t zd_object_lock[ZTEST_OBJECT_LOCKS]; 280219089Spjd rll_t zd_range_lock[ZTEST_RANGE_LOCKS]; 281219089Spjd} ztest_ds_t; 282219089Spjd 283219089Spjd/* 284219089Spjd * Per-iteration state. 285219089Spjd */ 286219089Spjdtypedef void ztest_func_t(ztest_ds_t *zd, uint64_t id); 287219089Spjd 288219089Spjdtypedef struct ztest_info { 289219089Spjd ztest_func_t *zi_func; /* test function */ 290219089Spjd uint64_t zi_iters; /* iterations per execution */ 291219089Spjd uint64_t *zi_interval; /* execute every <interval> seconds */ 292219089Spjd} ztest_info_t; 293219089Spjd 294236143Smmtypedef struct ztest_shared_callstate { 295236143Smm uint64_t zc_count; /* per-pass count */ 296236143Smm uint64_t zc_time; /* per-pass time */ 297236143Smm uint64_t zc_next; /* next time to call this function */ 298236143Smm} ztest_shared_callstate_t; 299236143Smm 300236143Smmstatic ztest_shared_callstate_t *ztest_shared_callstate; 301236143Smm#define ZTEST_GET_SHARED_CALLSTATE(c) (&ztest_shared_callstate[c]) 302236143Smm 303219089Spjd/* 304168404Spjd * Note: these aren't static because we want dladdr() to work. 305168404Spjd */ 306168404Spjdztest_func_t ztest_dmu_read_write; 307168404Spjdztest_func_t ztest_dmu_write_parallel; 308168404Spjdztest_func_t ztest_dmu_object_alloc_free; 309219089Spjdztest_func_t ztest_dmu_commit_callbacks; 310168404Spjdztest_func_t ztest_zap; 311168404Spjdztest_func_t ztest_zap_parallel; 312219089Spjdztest_func_t ztest_zil_commit; 313224526Smmztest_func_t ztest_zil_remount; 314219089Spjdztest_func_t ztest_dmu_read_write_zcopy; 315168404Spjdztest_func_t ztest_dmu_objset_create_destroy; 316219089Spjdztest_func_t ztest_dmu_prealloc; 317219089Spjdztest_func_t ztest_fzap; 318168404Spjdztest_func_t ztest_dmu_snapshot_create_destroy; 319219089Spjdztest_func_t ztest_dsl_prop_get_set; 320219089Spjdztest_func_t ztest_spa_prop_get_set; 321168404Spjdztest_func_t ztest_spa_create_destroy; 322168404Spjdztest_func_t ztest_fault_inject; 323219089Spjdztest_func_t ztest_ddt_repair; 324219089Spjdztest_func_t ztest_dmu_snapshot_hold; 325185029Spjdztest_func_t ztest_spa_rename; 326219089Spjdztest_func_t ztest_scrub; 327219089Spjdztest_func_t ztest_dsl_dataset_promote_busy; 328168404Spjdztest_func_t ztest_vdev_attach_detach; 329168404Spjdztest_func_t ztest_vdev_LUN_growth; 330168404Spjdztest_func_t ztest_vdev_add_remove; 331185029Spjdztest_func_t ztest_vdev_aux_add_remove; 332219089Spjdztest_func_t ztest_split_pool; 333228103Smmztest_func_t ztest_reguid; 334243505Smmztest_func_t ztest_spa_upgrade; 335168404Spjd 336219089Spjduint64_t zopt_always = 0ULL * NANOSEC; /* all the time */ 337219089Spjduint64_t zopt_incessant = 1ULL * NANOSEC / 10; /* every 1/10 second */ 338219089Spjduint64_t zopt_often = 1ULL * NANOSEC; /* every second */ 339219089Spjduint64_t zopt_sometimes = 10ULL * NANOSEC; /* every 10 seconds */ 340219089Spjduint64_t zopt_rarely = 60ULL * NANOSEC; /* every 60 seconds */ 341168404Spjd 342168404Spjdztest_info_t ztest_info[] = { 343185029Spjd { ztest_dmu_read_write, 1, &zopt_always }, 344219089Spjd { ztest_dmu_write_parallel, 10, &zopt_always }, 345185029Spjd { ztest_dmu_object_alloc_free, 1, &zopt_always }, 346219089Spjd { ztest_dmu_commit_callbacks, 1, &zopt_always }, 347185029Spjd { ztest_zap, 30, &zopt_always }, 348185029Spjd { ztest_zap_parallel, 100, &zopt_always }, 349219089Spjd { ztest_split_pool, 1, &zopt_always }, 350219089Spjd { ztest_zil_commit, 1, &zopt_incessant }, 351224526Smm { ztest_zil_remount, 1, &zopt_sometimes }, 352219089Spjd { ztest_dmu_read_write_zcopy, 1, &zopt_often }, 353219089Spjd { ztest_dmu_objset_create_destroy, 1, &zopt_often }, 354219089Spjd { ztest_dsl_prop_get_set, 1, &zopt_often }, 355219089Spjd { ztest_spa_prop_get_set, 1, &zopt_sometimes }, 356219089Spjd#if 0 357219089Spjd { ztest_dmu_prealloc, 1, &zopt_sometimes }, 358219089Spjd#endif 359219089Spjd { ztest_fzap, 1, &zopt_sometimes }, 360219089Spjd { ztest_dmu_snapshot_create_destroy, 1, &zopt_sometimes }, 361219089Spjd { ztest_spa_create_destroy, 1, &zopt_sometimes }, 362185029Spjd { ztest_fault_inject, 1, &zopt_sometimes }, 363219089Spjd { ztest_ddt_repair, 1, &zopt_sometimes }, 364219089Spjd { ztest_dmu_snapshot_hold, 1, &zopt_sometimes }, 365228103Smm { ztest_reguid, 1, &zopt_sometimes }, 366185029Spjd { ztest_spa_rename, 1, &zopt_rarely }, 367219089Spjd { ztest_scrub, 1, &zopt_rarely }, 368243505Smm { ztest_spa_upgrade, 1, &zopt_rarely }, 369219089Spjd { ztest_dsl_dataset_promote_busy, 1, &zopt_rarely }, 370239620Smm { ztest_vdev_attach_detach, 1, &zopt_rarely }, 371185029Spjd { ztest_vdev_LUN_growth, 1, &zopt_rarely }, 372236143Smm { ztest_vdev_add_remove, 1, 373236143Smm &ztest_opts.zo_vdevtime }, 374236143Smm { ztest_vdev_aux_add_remove, 1, 375236143Smm &ztest_opts.zo_vdevtime }, 376168404Spjd}; 377168404Spjd 378168404Spjd#define ZTEST_FUNCS (sizeof (ztest_info) / sizeof (ztest_info_t)) 379168404Spjd 380219089Spjd/* 381219089Spjd * The following struct is used to hold a list of uncalled commit callbacks. 382219089Spjd * The callbacks are ordered by txg number. 383219089Spjd */ 384219089Spjdtypedef struct ztest_cb_list { 385219089Spjd mutex_t zcl_callbacks_lock; 386219089Spjd list_t zcl_callbacks; 387219089Spjd} ztest_cb_list_t; 388168404Spjd 389168404Spjd/* 390168404Spjd * Stuff we need to share writably between parent and child. 391168404Spjd */ 392168404Spjdtypedef struct ztest_shared { 393236143Smm boolean_t zs_do_init; 394219089Spjd hrtime_t zs_proc_start; 395219089Spjd hrtime_t zs_proc_stop; 396219089Spjd hrtime_t zs_thread_start; 397219089Spjd hrtime_t zs_thread_stop; 398219089Spjd hrtime_t zs_thread_kill; 399219089Spjd uint64_t zs_enospc_count; 400219089Spjd uint64_t zs_vdev_next_leaf; 401185029Spjd uint64_t zs_vdev_aux; 402168404Spjd uint64_t zs_alloc; 403168404Spjd uint64_t zs_space; 404219089Spjd uint64_t zs_splits; 405219089Spjd uint64_t zs_mirrors; 406236143Smm uint64_t zs_metaslab_sz; 407236143Smm uint64_t zs_metaslab_df_alloc_threshold; 408236143Smm uint64_t zs_guid; 409168404Spjd} ztest_shared_t; 410168404Spjd 411219089Spjd#define ID_PARALLEL -1ULL 412219089Spjd 413168404Spjdstatic char ztest_dev_template[] = "%s/%s.%llua"; 414185029Spjdstatic char ztest_aux_template[] = "%s/%s.%s.%llu"; 415219089Spjdztest_shared_t *ztest_shared; 416168404Spjd 417236143Smmstatic spa_t *ztest_spa = NULL; 418236143Smmstatic ztest_ds_t *ztest_ds; 419168404Spjd 420236143Smmstatic mutex_t ztest_vdev_lock; 421239620Smm 422239620Smm/* 423239620Smm * The ztest_name_lock protects the pool and dataset namespace used by 424239620Smm * the individual tests. To modify the namespace, consumers must grab 425239620Smm * this lock as writer. Grabbing the lock as reader will ensure that the 426239620Smm * namespace does not change while the lock is held. 427239620Smm */ 428236143Smmstatic rwlock_t ztest_name_lock; 429236143Smm 430236143Smmstatic boolean_t ztest_dump_core = B_TRUE; 431185029Spjdstatic boolean_t ztest_exiting; 432168404Spjd 433219089Spjd/* Global commit callback list */ 434219089Spjdstatic ztest_cb_list_t zcl; 435219089Spjd 436219089Spjdenum ztest_object { 437219089Spjd ZTEST_META_DNODE = 0, 438219089Spjd ZTEST_DIROBJ, 439219089Spjd ZTEST_OBJECTS 440219089Spjd}; 441168404Spjd 442168676Spjdstatic void usage(boolean_t) __NORETURN; 443168498Spjd 444168404Spjd/* 445168404Spjd * These libumem hooks provide a reasonable set of defaults for the allocator's 446168404Spjd * debugging facilities. 447168404Spjd */ 448168404Spjdconst char * 449168404Spjd_umem_debug_init() 450168404Spjd{ 451168404Spjd return ("default,verbose"); /* $UMEM_DEBUG setting */ 452168404Spjd} 453168404Spjd 454168404Spjdconst char * 455168404Spjd_umem_logging_init(void) 456168404Spjd{ 457168404Spjd return ("fail,contents"); /* $UMEM_LOGGING setting */ 458168404Spjd} 459168404Spjd 460168404Spjd#define FATAL_MSG_SZ 1024 461168404Spjd 462168404Spjdchar *fatal_msg; 463168404Spjd 464168404Spjdstatic void 465168404Spjdfatal(int do_perror, char *message, ...) 466168404Spjd{ 467168404Spjd va_list args; 468168404Spjd int save_errno = errno; 469168404Spjd char buf[FATAL_MSG_SZ]; 470168404Spjd 471168404Spjd (void) fflush(stdout); 472168404Spjd 473168404Spjd va_start(args, message); 474168404Spjd (void) sprintf(buf, "ztest: "); 475168404Spjd /* LINTED */ 476168404Spjd (void) vsprintf(buf + strlen(buf), message, args); 477168404Spjd va_end(args); 478168404Spjd if (do_perror) { 479168404Spjd (void) snprintf(buf + strlen(buf), FATAL_MSG_SZ - strlen(buf), 480168404Spjd ": %s", strerror(save_errno)); 481168404Spjd } 482168404Spjd (void) fprintf(stderr, "%s\n", buf); 483168404Spjd fatal_msg = buf; /* to ease debugging */ 484168404Spjd if (ztest_dump_core) 485168404Spjd abort(); 486168404Spjd exit(3); 487168404Spjd} 488168404Spjd 489168404Spjdstatic int 490168404Spjdstr2shift(const char *buf) 491168404Spjd{ 492168404Spjd const char *ends = "BKMGTPEZ"; 493168404Spjd int i; 494168404Spjd 495168404Spjd if (buf[0] == '\0') 496168404Spjd return (0); 497168404Spjd for (i = 0; i < strlen(ends); i++) { 498168404Spjd if (toupper(buf[0]) == ends[i]) 499168404Spjd break; 500168404Spjd } 501168498Spjd if (i == strlen(ends)) { 502168498Spjd (void) fprintf(stderr, "ztest: invalid bytes suffix: %s\n", 503168498Spjd buf); 504168498Spjd usage(B_FALSE); 505168498Spjd } 506168404Spjd if (buf[1] == '\0' || (toupper(buf[1]) == 'B' && buf[2] == '\0')) { 507168404Spjd return (10*i); 508168404Spjd } 509168498Spjd (void) fprintf(stderr, "ztest: invalid bytes suffix: %s\n", buf); 510168498Spjd usage(B_FALSE); 511168498Spjd /* NOTREACHED */ 512168404Spjd} 513168404Spjd 514168404Spjdstatic uint64_t 515168404Spjdnicenumtoull(const char *buf) 516168404Spjd{ 517168404Spjd char *end; 518168404Spjd uint64_t val; 519168404Spjd 520168404Spjd val = strtoull(buf, &end, 0); 521168404Spjd if (end == buf) { 522168498Spjd (void) fprintf(stderr, "ztest: bad numeric value: %s\n", buf); 523168498Spjd usage(B_FALSE); 524168404Spjd } else if (end[0] == '.') { 525168404Spjd double fval = strtod(buf, &end); 526168404Spjd fval *= pow(2, str2shift(end)); 527168498Spjd if (fval > UINT64_MAX) { 528168498Spjd (void) fprintf(stderr, "ztest: value too large: %s\n", 529168498Spjd buf); 530168498Spjd usage(B_FALSE); 531168498Spjd } 532168404Spjd val = (uint64_t)fval; 533168404Spjd } else { 534168404Spjd int shift = str2shift(end); 535168498Spjd if (shift >= 64 || (val << shift) >> shift != val) { 536168498Spjd (void) fprintf(stderr, "ztest: value too large: %s\n", 537168498Spjd buf); 538168498Spjd usage(B_FALSE); 539168498Spjd } 540168404Spjd val <<= shift; 541168404Spjd } 542168404Spjd return (val); 543168404Spjd} 544168404Spjd 545168404Spjdstatic void 546168498Spjdusage(boolean_t requested) 547168404Spjd{ 548236143Smm const ztest_shared_opts_t *zo = &ztest_opts_defaults; 549236143Smm 550168404Spjd char nice_vdev_size[10]; 551168404Spjd char nice_gang_bang[10]; 552168498Spjd FILE *fp = requested ? stdout : stderr; 553168404Spjd 554236143Smm nicenum(zo->zo_vdev_size, nice_vdev_size); 555236143Smm nicenum(zo->zo_metaslab_gang_bang, nice_gang_bang); 556168404Spjd 557168498Spjd (void) fprintf(fp, "Usage: %s\n" 558168404Spjd "\t[-v vdevs (default: %llu)]\n" 559168404Spjd "\t[-s size_of_each_vdev (default: %s)]\n" 560219089Spjd "\t[-a alignment_shift (default: %d)] use 0 for random\n" 561168404Spjd "\t[-m mirror_copies (default: %d)]\n" 562168404Spjd "\t[-r raidz_disks (default: %d)]\n" 563168404Spjd "\t[-R raidz_parity (default: %d)]\n" 564168404Spjd "\t[-d datasets (default: %d)]\n" 565168404Spjd "\t[-t threads (default: %d)]\n" 566168404Spjd "\t[-g gang_block_threshold (default: %s)]\n" 567219089Spjd "\t[-i init_count (default: %d)] initialize pool i times\n" 568219089Spjd "\t[-k kill_percentage (default: %llu%%)]\n" 569168404Spjd "\t[-p pool_name (default: %s)]\n" 570219089Spjd "\t[-f dir (default: %s)] file directory for vdev files\n" 571219089Spjd "\t[-V] verbose (use multiple times for ever more blather)\n" 572219089Spjd "\t[-E] use existing pool instead of creating new one\n" 573219089Spjd "\t[-T time (default: %llu sec)] total run time\n" 574219089Spjd "\t[-F freezeloops (default: %llu)] max loops in spa_freeze()\n" 575219089Spjd "\t[-P passtime (default: %llu sec)] time per pass\n" 576236143Smm "\t[-B alt_ztest (default: <none>)] alternate ztest path\n" 577168498Spjd "\t[-h] (print help)\n" 578168404Spjd "", 579236143Smm zo->zo_pool, 580236143Smm (u_longlong_t)zo->zo_vdevs, /* -v */ 581185029Spjd nice_vdev_size, /* -s */ 582236143Smm zo->zo_ashift, /* -a */ 583236143Smm zo->zo_mirrors, /* -m */ 584236143Smm zo->zo_raidz, /* -r */ 585236143Smm zo->zo_raidz_parity, /* -R */ 586236143Smm zo->zo_datasets, /* -d */ 587236143Smm zo->zo_threads, /* -t */ 588185029Spjd nice_gang_bang, /* -g */ 589236143Smm zo->zo_init, /* -i */ 590236143Smm (u_longlong_t)zo->zo_killrate, /* -k */ 591236143Smm zo->zo_pool, /* -p */ 592236143Smm zo->zo_dir, /* -f */ 593236143Smm (u_longlong_t)zo->zo_time, /* -T */ 594236143Smm (u_longlong_t)zo->zo_maxloops, /* -F */ 595236143Smm (u_longlong_t)zo->zo_passtime); 596168498Spjd exit(requested ? 0 : 1); 597168404Spjd} 598168404Spjd 599168404Spjdstatic void 600168404Spjdprocess_options(int argc, char **argv) 601168404Spjd{ 602236143Smm char *path; 603236143Smm ztest_shared_opts_t *zo = &ztest_opts; 604236143Smm 605168404Spjd int opt; 606168404Spjd uint64_t value; 607236143Smm char altdir[MAXNAMELEN] = { 0 }; 608168404Spjd 609236143Smm bcopy(&ztest_opts_defaults, zo, sizeof (*zo)); 610168404Spjd 611168404Spjd while ((opt = getopt(argc, argv, 612236143Smm "v:s:a:m:r:R:d:t:g:i:k:p:f:VET:P:hF:B:")) != EOF) { 613168404Spjd value = 0; 614168404Spjd switch (opt) { 615185029Spjd case 'v': 616185029Spjd case 's': 617185029Spjd case 'a': 618185029Spjd case 'm': 619185029Spjd case 'r': 620185029Spjd case 'R': 621185029Spjd case 'd': 622185029Spjd case 't': 623185029Spjd case 'g': 624185029Spjd case 'i': 625185029Spjd case 'k': 626185029Spjd case 'T': 627185029Spjd case 'P': 628219089Spjd case 'F': 629168404Spjd value = nicenumtoull(optarg); 630168404Spjd } 631168404Spjd switch (opt) { 632185029Spjd case 'v': 633236143Smm zo->zo_vdevs = value; 634168404Spjd break; 635185029Spjd case 's': 636236143Smm zo->zo_vdev_size = MAX(SPA_MINDEVSIZE, value); 637168404Spjd break; 638185029Spjd case 'a': 639236143Smm zo->zo_ashift = value; 640168404Spjd break; 641185029Spjd case 'm': 642236143Smm zo->zo_mirrors = value; 643168404Spjd break; 644185029Spjd case 'r': 645236143Smm zo->zo_raidz = MAX(1, value); 646168404Spjd break; 647185029Spjd case 'R': 648236143Smm zo->zo_raidz_parity = MIN(MAX(value, 1), 3); 649168404Spjd break; 650185029Spjd case 'd': 651236143Smm zo->zo_datasets = MAX(1, value); 652168404Spjd break; 653185029Spjd case 't': 654236143Smm zo->zo_threads = MAX(1, value); 655168404Spjd break; 656185029Spjd case 'g': 657236143Smm zo->zo_metaslab_gang_bang = MAX(SPA_MINBLOCKSIZE << 1, 658236143Smm value); 659168404Spjd break; 660185029Spjd case 'i': 661236143Smm zo->zo_init = value; 662168404Spjd break; 663185029Spjd case 'k': 664236143Smm zo->zo_killrate = value; 665168404Spjd break; 666185029Spjd case 'p': 667236143Smm (void) strlcpy(zo->zo_pool, optarg, 668236143Smm sizeof (zo->zo_pool)); 669168404Spjd break; 670185029Spjd case 'f': 671236143Smm path = realpath(optarg, NULL); 672236143Smm if (path == NULL) { 673236143Smm (void) fprintf(stderr, "error: %s: %s\n", 674236143Smm optarg, strerror(errno)); 675236143Smm usage(B_FALSE); 676236143Smm } else { 677236143Smm (void) strlcpy(zo->zo_dir, path, 678236143Smm sizeof (zo->zo_dir)); 679236143Smm } 680168404Spjd break; 681185029Spjd case 'V': 682236143Smm zo->zo_verbose++; 683168404Spjd break; 684185029Spjd case 'E': 685236143Smm zo->zo_init = 0; 686168404Spjd break; 687185029Spjd case 'T': 688236143Smm zo->zo_time = value; 689168404Spjd break; 690185029Spjd case 'P': 691236143Smm zo->zo_passtime = MAX(1, value); 692168404Spjd break; 693219089Spjd case 'F': 694236143Smm zo->zo_maxloops = MAX(1, value); 695219089Spjd break; 696236143Smm case 'B': 697236143Smm (void) strlcpy(altdir, optarg, sizeof (altdir)); 698236143Smm break; 699185029Spjd case 'h': 700168498Spjd usage(B_TRUE); 701168498Spjd break; 702185029Spjd case '?': 703185029Spjd default: 704168498Spjd usage(B_FALSE); 705168404Spjd break; 706168404Spjd } 707168404Spjd } 708168404Spjd 709236143Smm zo->zo_raidz_parity = MIN(zo->zo_raidz_parity, zo->zo_raidz - 1); 710168404Spjd 711236143Smm zo->zo_vdevtime = 712236143Smm (zo->zo_vdevs > 0 ? zo->zo_time * NANOSEC / zo->zo_vdevs : 713219089Spjd UINT64_MAX >> 2); 714236143Smm 715236143Smm if (strlen(altdir) > 0) { 716242845Sdelphij char *cmd; 717242845Sdelphij char *realaltdir; 718236143Smm char *bin; 719236143Smm char *ztest; 720236143Smm char *isa; 721236143Smm int isalen; 722236143Smm 723242845Sdelphij cmd = umem_alloc(MAXPATHLEN, UMEM_NOFAIL); 724242845Sdelphij realaltdir = umem_alloc(MAXPATHLEN, UMEM_NOFAIL); 725242845Sdelphij 726242845Sdelphij VERIFY(NULL != realpath(getexecname(), cmd)); 727236143Smm if (0 != access(altdir, F_OK)) { 728236143Smm ztest_dump_core = B_FALSE; 729236143Smm fatal(B_TRUE, "invalid alternate ztest path: %s", 730236143Smm altdir); 731236143Smm } 732236143Smm VERIFY(NULL != realpath(altdir, realaltdir)); 733236143Smm 734236143Smm /* 735236143Smm * 'cmd' should be of the form "<anything>/usr/bin/<isa>/ztest". 736236143Smm * We want to extract <isa> to determine if we should use 737236143Smm * 32 or 64 bit binaries. 738236143Smm */ 739236143Smm bin = strstr(cmd, "/usr/bin/"); 740236143Smm ztest = strstr(bin, "/ztest"); 741236143Smm isa = bin + 9; 742236143Smm isalen = ztest - isa; 743236143Smm (void) snprintf(zo->zo_alt_ztest, sizeof (zo->zo_alt_ztest), 744236143Smm "%s/usr/bin/%.*s/ztest", realaltdir, isalen, isa); 745236143Smm (void) snprintf(zo->zo_alt_libpath, sizeof (zo->zo_alt_libpath), 746236143Smm "%s/usr/lib/%.*s", realaltdir, isalen, isa); 747236143Smm 748236143Smm if (0 != access(zo->zo_alt_ztest, X_OK)) { 749236143Smm ztest_dump_core = B_FALSE; 750236143Smm fatal(B_TRUE, "invalid alternate ztest: %s", 751236143Smm zo->zo_alt_ztest); 752236143Smm } else if (0 != access(zo->zo_alt_libpath, X_OK)) { 753236143Smm ztest_dump_core = B_FALSE; 754236143Smm fatal(B_TRUE, "invalid alternate lib directory %s", 755236143Smm zo->zo_alt_libpath); 756236143Smm } 757242845Sdelphij 758242845Sdelphij umem_free(cmd, MAXPATHLEN); 759242845Sdelphij umem_free(realaltdir, MAXPATHLEN); 760236143Smm } 761168404Spjd} 762168404Spjd 763219089Spjdstatic void 764219089Spjdztest_kill(ztest_shared_t *zs) 765219089Spjd{ 766236143Smm zs->zs_alloc = metaslab_class_get_alloc(spa_normal_class(ztest_spa)); 767236143Smm zs->zs_space = metaslab_class_get_space(spa_normal_class(ztest_spa)); 768219089Spjd (void) kill(getpid(), SIGKILL); 769219089Spjd} 770219089Spjd 771168404Spjdstatic uint64_t 772219089Spjdztest_random(uint64_t range) 773219089Spjd{ 774219089Spjd uint64_t r; 775219089Spjd 776242845Sdelphij ASSERT3S(ztest_fd_rand, >=, 0); 777242845Sdelphij 778219089Spjd if (range == 0) 779219089Spjd return (0); 780219089Spjd 781242845Sdelphij if (read(ztest_fd_rand, &r, sizeof (r)) != sizeof (r)) 782219089Spjd fatal(1, "short read from /dev/urandom"); 783219089Spjd 784219089Spjd return (r % range); 785219089Spjd} 786219089Spjd 787219089Spjd/* ARGSUSED */ 788219089Spjdstatic void 789219089Spjdztest_record_enospc(const char *s) 790219089Spjd{ 791219089Spjd ztest_shared->zs_enospc_count++; 792219089Spjd} 793219089Spjd 794219089Spjdstatic uint64_t 795168404Spjdztest_get_ashift(void) 796168404Spjd{ 797236143Smm if (ztest_opts.zo_ashift == 0) 798168404Spjd return (SPA_MINBLOCKSHIFT + ztest_random(3)); 799236143Smm return (ztest_opts.zo_ashift); 800168404Spjd} 801168404Spjd 802168404Spjdstatic nvlist_t * 803243505Smmmake_vdev_file(char *path, char *aux, char *pool, size_t size, uint64_t ashift) 804168404Spjd{ 805185029Spjd char pathbuf[MAXPATHLEN]; 806168404Spjd uint64_t vdev; 807168404Spjd nvlist_t *file; 808168404Spjd 809185029Spjd if (ashift == 0) 810185029Spjd ashift = ztest_get_ashift(); 811168404Spjd 812185029Spjd if (path == NULL) { 813185029Spjd path = pathbuf; 814185029Spjd 815185029Spjd if (aux != NULL) { 816185029Spjd vdev = ztest_shared->zs_vdev_aux; 817236143Smm (void) snprintf(path, sizeof (pathbuf), 818236143Smm ztest_aux_template, ztest_opts.zo_dir, 819243505Smm pool == NULL ? ztest_opts.zo_pool : pool, 820243505Smm aux, vdev); 821185029Spjd } else { 822219089Spjd vdev = ztest_shared->zs_vdev_next_leaf++; 823236143Smm (void) snprintf(path, sizeof (pathbuf), 824236143Smm ztest_dev_template, ztest_opts.zo_dir, 825243505Smm pool == NULL ? ztest_opts.zo_pool : pool, vdev); 826185029Spjd } 827185029Spjd } 828185029Spjd 829185029Spjd if (size != 0) { 830185029Spjd int fd = open(path, O_RDWR | O_CREAT | O_TRUNC, 0666); 831168404Spjd if (fd == -1) 832185029Spjd fatal(1, "can't open %s", path); 833168404Spjd if (ftruncate(fd, size) != 0) 834185029Spjd fatal(1, "can't ftruncate %s", path); 835168404Spjd (void) close(fd); 836168404Spjd } 837168404Spjd 838168404Spjd VERIFY(nvlist_alloc(&file, NV_UNIQUE_NAME, 0) == 0); 839168404Spjd VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_TYPE, VDEV_TYPE_FILE) == 0); 840185029Spjd VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_PATH, path) == 0); 841168404Spjd VERIFY(nvlist_add_uint64(file, ZPOOL_CONFIG_ASHIFT, ashift) == 0); 842168404Spjd 843168404Spjd return (file); 844168404Spjd} 845168404Spjd 846168404Spjdstatic nvlist_t * 847243505Smmmake_vdev_raidz(char *path, char *aux, char *pool, size_t size, 848243505Smm uint64_t ashift, int r) 849168404Spjd{ 850168404Spjd nvlist_t *raidz, **child; 851168404Spjd int c; 852168404Spjd 853168404Spjd if (r < 2) 854243505Smm return (make_vdev_file(path, aux, pool, size, ashift)); 855168404Spjd child = umem_alloc(r * sizeof (nvlist_t *), UMEM_NOFAIL); 856168404Spjd 857168404Spjd for (c = 0; c < r; c++) 858243505Smm child[c] = make_vdev_file(path, aux, pool, size, ashift); 859168404Spjd 860168404Spjd VERIFY(nvlist_alloc(&raidz, NV_UNIQUE_NAME, 0) == 0); 861168404Spjd VERIFY(nvlist_add_string(raidz, ZPOOL_CONFIG_TYPE, 862168404Spjd VDEV_TYPE_RAIDZ) == 0); 863168404Spjd VERIFY(nvlist_add_uint64(raidz, ZPOOL_CONFIG_NPARITY, 864236143Smm ztest_opts.zo_raidz_parity) == 0); 865168404Spjd VERIFY(nvlist_add_nvlist_array(raidz, ZPOOL_CONFIG_CHILDREN, 866168404Spjd child, r) == 0); 867168404Spjd 868168404Spjd for (c = 0; c < r; c++) 869168404Spjd nvlist_free(child[c]); 870168404Spjd 871168404Spjd umem_free(child, r * sizeof (nvlist_t *)); 872168404Spjd 873168404Spjd return (raidz); 874168404Spjd} 875168404Spjd 876168404Spjdstatic nvlist_t * 877243505Smmmake_vdev_mirror(char *path, char *aux, char *pool, size_t size, 878243505Smm uint64_t ashift, int r, int m) 879168404Spjd{ 880168404Spjd nvlist_t *mirror, **child; 881168404Spjd int c; 882168404Spjd 883168404Spjd if (m < 1) 884243505Smm return (make_vdev_raidz(path, aux, pool, size, ashift, r)); 885168404Spjd 886168404Spjd child = umem_alloc(m * sizeof (nvlist_t *), UMEM_NOFAIL); 887168404Spjd 888168404Spjd for (c = 0; c < m; c++) 889243505Smm child[c] = make_vdev_raidz(path, aux, pool, size, ashift, r); 890168404Spjd 891168404Spjd VERIFY(nvlist_alloc(&mirror, NV_UNIQUE_NAME, 0) == 0); 892168404Spjd VERIFY(nvlist_add_string(mirror, ZPOOL_CONFIG_TYPE, 893168404Spjd VDEV_TYPE_MIRROR) == 0); 894168404Spjd VERIFY(nvlist_add_nvlist_array(mirror, ZPOOL_CONFIG_CHILDREN, 895168404Spjd child, m) == 0); 896168404Spjd 897168404Spjd for (c = 0; c < m; c++) 898168404Spjd nvlist_free(child[c]); 899168404Spjd 900168404Spjd umem_free(child, m * sizeof (nvlist_t *)); 901168404Spjd 902168404Spjd return (mirror); 903168404Spjd} 904168404Spjd 905168404Spjdstatic nvlist_t * 906243505Smmmake_vdev_root(char *path, char *aux, char *pool, size_t size, uint64_t ashift, 907243505Smm int log, int r, int m, int t) 908168404Spjd{ 909168404Spjd nvlist_t *root, **child; 910168404Spjd int c; 911168404Spjd 912168404Spjd ASSERT(t > 0); 913168404Spjd 914168404Spjd child = umem_alloc(t * sizeof (nvlist_t *), UMEM_NOFAIL); 915168404Spjd 916185029Spjd for (c = 0; c < t; c++) { 917243505Smm child[c] = make_vdev_mirror(path, aux, pool, size, ashift, 918243505Smm r, m); 919185029Spjd VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_IS_LOG, 920185029Spjd log) == 0); 921185029Spjd } 922168404Spjd 923168404Spjd VERIFY(nvlist_alloc(&root, NV_UNIQUE_NAME, 0) == 0); 924168404Spjd VERIFY(nvlist_add_string(root, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) == 0); 925185029Spjd VERIFY(nvlist_add_nvlist_array(root, aux ? aux : ZPOOL_CONFIG_CHILDREN, 926168404Spjd child, t) == 0); 927168404Spjd 928168404Spjd for (c = 0; c < t; c++) 929168404Spjd nvlist_free(child[c]); 930168404Spjd 931168404Spjd umem_free(child, t * sizeof (nvlist_t *)); 932168404Spjd 933168404Spjd return (root); 934168404Spjd} 935168404Spjd 936243505Smm/* 937243505Smm * Find a random spa version. Returns back a random spa version in the 938243505Smm * range [initial_version, SPA_VERSION_FEATURES]. 939243505Smm */ 940243505Smmstatic uint64_t 941243505Smmztest_random_spa_version(uint64_t initial_version) 942243505Smm{ 943243505Smm uint64_t version = initial_version; 944243505Smm 945243505Smm if (version <= SPA_VERSION_BEFORE_FEATURES) { 946243505Smm version = version + 947243505Smm ztest_random(SPA_VERSION_BEFORE_FEATURES - version + 1); 948243505Smm } 949243505Smm 950243505Smm if (version > SPA_VERSION_BEFORE_FEATURES) 951243505Smm version = SPA_VERSION_FEATURES; 952243505Smm 953243505Smm ASSERT(SPA_VERSION_IS_SUPPORTED(version)); 954243505Smm return (version); 955243505Smm} 956243505Smm 957219089Spjdstatic int 958219089Spjdztest_random_blocksize(void) 959219089Spjd{ 960219089Spjd return (1 << (SPA_MINBLOCKSHIFT + 961219089Spjd ztest_random(SPA_MAXBLOCKSHIFT - SPA_MINBLOCKSHIFT + 1))); 962219089Spjd} 963219089Spjd 964219089Spjdstatic int 965219089Spjdztest_random_ibshift(void) 966219089Spjd{ 967219089Spjd return (DN_MIN_INDBLKSHIFT + 968219089Spjd ztest_random(DN_MAX_INDBLKSHIFT - DN_MIN_INDBLKSHIFT + 1)); 969219089Spjd} 970219089Spjd 971219089Spjdstatic uint64_t 972219089Spjdztest_random_vdev_top(spa_t *spa, boolean_t log_ok) 973219089Spjd{ 974219089Spjd uint64_t top; 975219089Spjd vdev_t *rvd = spa->spa_root_vdev; 976219089Spjd vdev_t *tvd; 977219089Spjd 978219089Spjd ASSERT(spa_config_held(spa, SCL_ALL, RW_READER) != 0); 979219089Spjd 980219089Spjd do { 981219089Spjd top = ztest_random(rvd->vdev_children); 982219089Spjd tvd = rvd->vdev_child[top]; 983219089Spjd } while (tvd->vdev_ishole || (tvd->vdev_islog && !log_ok) || 984219089Spjd tvd->vdev_mg == NULL || tvd->vdev_mg->mg_class == NULL); 985219089Spjd 986219089Spjd return (top); 987219089Spjd} 988219089Spjd 989219089Spjdstatic uint64_t 990219089Spjdztest_random_dsl_prop(zfs_prop_t prop) 991219089Spjd{ 992219089Spjd uint64_t value; 993219089Spjd 994219089Spjd do { 995219089Spjd value = zfs_prop_random_value(prop, ztest_random(-1ULL)); 996219089Spjd } while (prop == ZFS_PROP_CHECKSUM && value == ZIO_CHECKSUM_OFF); 997219089Spjd 998219089Spjd return (value); 999219089Spjd} 1000219089Spjd 1001219089Spjdstatic int 1002219089Spjdztest_dsl_prop_set_uint64(char *osname, zfs_prop_t prop, uint64_t value, 1003219089Spjd boolean_t inherit) 1004219089Spjd{ 1005219089Spjd const char *propname = zfs_prop_to_name(prop); 1006219089Spjd const char *valname; 1007219089Spjd char setpoint[MAXPATHLEN]; 1008219089Spjd uint64_t curval; 1009219089Spjd int error; 1010219089Spjd 1011219089Spjd error = dsl_prop_set(osname, propname, 1012219089Spjd (inherit ? ZPROP_SRC_NONE : ZPROP_SRC_LOCAL), 1013219089Spjd sizeof (value), 1, &value); 1014219089Spjd 1015219089Spjd if (error == ENOSPC) { 1016219089Spjd ztest_record_enospc(FTAG); 1017219089Spjd return (error); 1018219089Spjd } 1019240415Smm ASSERT0(error); 1020219089Spjd 1021219089Spjd VERIFY3U(dsl_prop_get(osname, propname, sizeof (curval), 1022219089Spjd 1, &curval, setpoint), ==, 0); 1023219089Spjd 1024236143Smm if (ztest_opts.zo_verbose >= 6) { 1025219089Spjd VERIFY(zfs_prop_index_to_string(prop, curval, &valname) == 0); 1026219089Spjd (void) printf("%s %s = %s at '%s'\n", 1027219089Spjd osname, propname, valname, setpoint); 1028219089Spjd } 1029219089Spjd 1030219089Spjd return (error); 1031219089Spjd} 1032219089Spjd 1033219089Spjdstatic int 1034236143Smmztest_spa_prop_set_uint64(zpool_prop_t prop, uint64_t value) 1035219089Spjd{ 1036236143Smm spa_t *spa = ztest_spa; 1037219089Spjd nvlist_t *props = NULL; 1038219089Spjd int error; 1039219089Spjd 1040219089Spjd VERIFY(nvlist_alloc(&props, NV_UNIQUE_NAME, 0) == 0); 1041219089Spjd VERIFY(nvlist_add_uint64(props, zpool_prop_to_name(prop), value) == 0); 1042219089Spjd 1043219089Spjd error = spa_prop_set(spa, props); 1044219089Spjd 1045219089Spjd nvlist_free(props); 1046219089Spjd 1047219089Spjd if (error == ENOSPC) { 1048219089Spjd ztest_record_enospc(FTAG); 1049219089Spjd return (error); 1050219089Spjd } 1051240415Smm ASSERT0(error); 1052219089Spjd 1053219089Spjd return (error); 1054219089Spjd} 1055219089Spjd 1056168404Spjdstatic void 1057219089Spjdztest_rll_init(rll_t *rll) 1058168404Spjd{ 1059219089Spjd rll->rll_writer = NULL; 1060219089Spjd rll->rll_readers = 0; 1061219089Spjd VERIFY(_mutex_init(&rll->rll_lock, USYNC_THREAD, NULL) == 0); 1062219089Spjd VERIFY(cond_init(&rll->rll_cv, USYNC_THREAD, NULL) == 0); 1063219089Spjd} 1064219089Spjd 1065219089Spjdstatic void 1066219089Spjdztest_rll_destroy(rll_t *rll) 1067219089Spjd{ 1068219089Spjd ASSERT(rll->rll_writer == NULL); 1069219089Spjd ASSERT(rll->rll_readers == 0); 1070219089Spjd VERIFY(_mutex_destroy(&rll->rll_lock) == 0); 1071219089Spjd VERIFY(cond_destroy(&rll->rll_cv) == 0); 1072219089Spjd} 1073219089Spjd 1074219089Spjdstatic void 1075219089Spjdztest_rll_lock(rll_t *rll, rl_type_t type) 1076219089Spjd{ 1077219089Spjd VERIFY(mutex_lock(&rll->rll_lock) == 0); 1078219089Spjd 1079219089Spjd if (type == RL_READER) { 1080219089Spjd while (rll->rll_writer != NULL) 1081219089Spjd (void) cond_wait(&rll->rll_cv, &rll->rll_lock); 1082219089Spjd rll->rll_readers++; 1083219089Spjd } else { 1084219089Spjd while (rll->rll_writer != NULL || rll->rll_readers) 1085219089Spjd (void) cond_wait(&rll->rll_cv, &rll->rll_lock); 1086219089Spjd rll->rll_writer = curthread; 1087219089Spjd } 1088219089Spjd 1089219089Spjd VERIFY(mutex_unlock(&rll->rll_lock) == 0); 1090219089Spjd} 1091219089Spjd 1092219089Spjdstatic void 1093219089Spjdztest_rll_unlock(rll_t *rll) 1094219089Spjd{ 1095219089Spjd VERIFY(mutex_lock(&rll->rll_lock) == 0); 1096219089Spjd 1097219089Spjd if (rll->rll_writer) { 1098219089Spjd ASSERT(rll->rll_readers == 0); 1099219089Spjd rll->rll_writer = NULL; 1100219089Spjd } else { 1101219089Spjd ASSERT(rll->rll_readers != 0); 1102219089Spjd ASSERT(rll->rll_writer == NULL); 1103219089Spjd rll->rll_readers--; 1104219089Spjd } 1105219089Spjd 1106219089Spjd if (rll->rll_writer == NULL && rll->rll_readers == 0) 1107219089Spjd VERIFY(cond_broadcast(&rll->rll_cv) == 0); 1108219089Spjd 1109219089Spjd VERIFY(mutex_unlock(&rll->rll_lock) == 0); 1110219089Spjd} 1111219089Spjd 1112219089Spjdstatic void 1113219089Spjdztest_object_lock(ztest_ds_t *zd, uint64_t object, rl_type_t type) 1114219089Spjd{ 1115219089Spjd rll_t *rll = &zd->zd_object_lock[object & (ZTEST_OBJECT_LOCKS - 1)]; 1116219089Spjd 1117219089Spjd ztest_rll_lock(rll, type); 1118219089Spjd} 1119219089Spjd 1120219089Spjdstatic void 1121219089Spjdztest_object_unlock(ztest_ds_t *zd, uint64_t object) 1122219089Spjd{ 1123219089Spjd rll_t *rll = &zd->zd_object_lock[object & (ZTEST_OBJECT_LOCKS - 1)]; 1124219089Spjd 1125219089Spjd ztest_rll_unlock(rll); 1126219089Spjd} 1127219089Spjd 1128219089Spjdstatic rl_t * 1129219089Spjdztest_range_lock(ztest_ds_t *zd, uint64_t object, uint64_t offset, 1130219089Spjd uint64_t size, rl_type_t type) 1131219089Spjd{ 1132219089Spjd uint64_t hash = object ^ (offset % (ZTEST_RANGE_LOCKS + 1)); 1133219089Spjd rll_t *rll = &zd->zd_range_lock[hash & (ZTEST_RANGE_LOCKS - 1)]; 1134219089Spjd rl_t *rl; 1135219089Spjd 1136219089Spjd rl = umem_alloc(sizeof (*rl), UMEM_NOFAIL); 1137219089Spjd rl->rl_object = object; 1138219089Spjd rl->rl_offset = offset; 1139219089Spjd rl->rl_size = size; 1140219089Spjd rl->rl_lock = rll; 1141219089Spjd 1142219089Spjd ztest_rll_lock(rll, type); 1143219089Spjd 1144219089Spjd return (rl); 1145219089Spjd} 1146219089Spjd 1147219089Spjdstatic void 1148219089Spjdztest_range_unlock(rl_t *rl) 1149219089Spjd{ 1150219089Spjd rll_t *rll = rl->rl_lock; 1151219089Spjd 1152219089Spjd ztest_rll_unlock(rll); 1153219089Spjd 1154219089Spjd umem_free(rl, sizeof (*rl)); 1155219089Spjd} 1156219089Spjd 1157219089Spjdstatic void 1158236143Smmztest_zd_init(ztest_ds_t *zd, ztest_shared_ds_t *szd, objset_t *os) 1159219089Spjd{ 1160219089Spjd zd->zd_os = os; 1161219089Spjd zd->zd_zilog = dmu_objset_zil(os); 1162236143Smm zd->zd_shared = szd; 1163219089Spjd dmu_objset_name(os, zd->zd_name); 1164219089Spjd 1165236143Smm if (zd->zd_shared != NULL) 1166236143Smm zd->zd_shared->zd_seq = 0; 1167236143Smm 1168224526Smm VERIFY(rwlock_init(&zd->zd_zilog_lock, USYNC_THREAD, NULL) == 0); 1169219089Spjd VERIFY(_mutex_init(&zd->zd_dirobj_lock, USYNC_THREAD, NULL) == 0); 1170219089Spjd 1171219089Spjd for (int l = 0; l < ZTEST_OBJECT_LOCKS; l++) 1172219089Spjd ztest_rll_init(&zd->zd_object_lock[l]); 1173219089Spjd 1174219089Spjd for (int l = 0; l < ZTEST_RANGE_LOCKS; l++) 1175219089Spjd ztest_rll_init(&zd->zd_range_lock[l]); 1176219089Spjd} 1177219089Spjd 1178219089Spjdstatic void 1179219089Spjdztest_zd_fini(ztest_ds_t *zd) 1180219089Spjd{ 1181219089Spjd VERIFY(_mutex_destroy(&zd->zd_dirobj_lock) == 0); 1182219089Spjd 1183219089Spjd for (int l = 0; l < ZTEST_OBJECT_LOCKS; l++) 1184219089Spjd ztest_rll_destroy(&zd->zd_object_lock[l]); 1185219089Spjd 1186219089Spjd for (int l = 0; l < ZTEST_RANGE_LOCKS; l++) 1187219089Spjd ztest_rll_destroy(&zd->zd_range_lock[l]); 1188219089Spjd} 1189219089Spjd 1190219089Spjd#define TXG_MIGHTWAIT (ztest_random(10) == 0 ? TXG_NOWAIT : TXG_WAIT) 1191219089Spjd 1192219089Spjdstatic uint64_t 1193219089Spjdztest_tx_assign(dmu_tx_t *tx, uint64_t txg_how, const char *tag) 1194219089Spjd{ 1195219089Spjd uint64_t txg; 1196168404Spjd int error; 1197168404Spjd 1198219089Spjd /* 1199219089Spjd * Attempt to assign tx to some transaction group. 1200219089Spjd */ 1201219089Spjd error = dmu_tx_assign(tx, txg_how); 1202168404Spjd if (error) { 1203219089Spjd if (error == ERESTART) { 1204219089Spjd ASSERT(txg_how == TXG_NOWAIT); 1205219089Spjd dmu_tx_wait(tx); 1206219089Spjd } else { 1207219089Spjd ASSERT3U(error, ==, ENOSPC); 1208219089Spjd ztest_record_enospc(tag); 1209219089Spjd } 1210219089Spjd dmu_tx_abort(tx); 1211219089Spjd return (0); 1212168404Spjd } 1213219089Spjd txg = dmu_tx_get_txg(tx); 1214219089Spjd ASSERT(txg != 0); 1215219089Spjd return (txg); 1216168404Spjd} 1217168404Spjd 1218219089Spjdstatic void 1219219089Spjdztest_pattern_set(void *buf, uint64_t size, uint64_t value) 1220168404Spjd{ 1221219089Spjd uint64_t *ip = buf; 1222219089Spjd uint64_t *ip_end = (uint64_t *)((uintptr_t)buf + (uintptr_t)size); 1223168404Spjd 1224219089Spjd while (ip < ip_end) 1225219089Spjd *ip++ = value; 1226219089Spjd} 1227168404Spjd 1228219089Spjdstatic boolean_t 1229219089Spjdztest_pattern_match(void *buf, uint64_t size, uint64_t value) 1230219089Spjd{ 1231219089Spjd uint64_t *ip = buf; 1232219089Spjd uint64_t *ip_end = (uint64_t *)((uintptr_t)buf + (uintptr_t)size); 1233219089Spjd uint64_t diff = 0; 1234168404Spjd 1235219089Spjd while (ip < ip_end) 1236219089Spjd diff |= (value - *ip++); 1237219089Spjd 1238219089Spjd return (diff == 0); 1239168404Spjd} 1240168404Spjd 1241219089Spjdstatic void 1242219089Spjdztest_bt_generate(ztest_block_tag_t *bt, objset_t *os, uint64_t object, 1243219089Spjd uint64_t offset, uint64_t gen, uint64_t txg, uint64_t crtxg) 1244168404Spjd{ 1245219089Spjd bt->bt_magic = BT_MAGIC; 1246219089Spjd bt->bt_objset = dmu_objset_id(os); 1247219089Spjd bt->bt_object = object; 1248219089Spjd bt->bt_offset = offset; 1249219089Spjd bt->bt_gen = gen; 1250219089Spjd bt->bt_txg = txg; 1251219089Spjd bt->bt_crtxg = crtxg; 1252168404Spjd} 1253168404Spjd 1254219089Spjdstatic void 1255219089Spjdztest_bt_verify(ztest_block_tag_t *bt, objset_t *os, uint64_t object, 1256219089Spjd uint64_t offset, uint64_t gen, uint64_t txg, uint64_t crtxg) 1257219089Spjd{ 1258219089Spjd ASSERT(bt->bt_magic == BT_MAGIC); 1259219089Spjd ASSERT(bt->bt_objset == dmu_objset_id(os)); 1260219089Spjd ASSERT(bt->bt_object == object); 1261219089Spjd ASSERT(bt->bt_offset == offset); 1262219089Spjd ASSERT(bt->bt_gen <= gen); 1263219089Spjd ASSERT(bt->bt_txg <= txg); 1264219089Spjd ASSERT(bt->bt_crtxg == crtxg); 1265219089Spjd} 1266219089Spjd 1267219089Spjdstatic ztest_block_tag_t * 1268219089Spjdztest_bt_bonus(dmu_buf_t *db) 1269219089Spjd{ 1270219089Spjd dmu_object_info_t doi; 1271219089Spjd ztest_block_tag_t *bt; 1272219089Spjd 1273219089Spjd dmu_object_info_from_db(db, &doi); 1274219089Spjd ASSERT3U(doi.doi_bonus_size, <=, db->db_size); 1275219089Spjd ASSERT3U(doi.doi_bonus_size, >=, sizeof (*bt)); 1276219089Spjd bt = (void *)((char *)db->db_data + doi.doi_bonus_size - sizeof (*bt)); 1277219089Spjd 1278219089Spjd return (bt); 1279219089Spjd} 1280219089Spjd 1281219089Spjd/* 1282219089Spjd * ZIL logging ops 1283219089Spjd */ 1284219089Spjd 1285219089Spjd#define lrz_type lr_mode 1286219089Spjd#define lrz_blocksize lr_uid 1287219089Spjd#define lrz_ibshift lr_gid 1288219089Spjd#define lrz_bonustype lr_rdev 1289219089Spjd#define lrz_bonuslen lr_crtime[1] 1290219089Spjd 1291219089Spjdstatic void 1292219089Spjdztest_log_create(ztest_ds_t *zd, dmu_tx_t *tx, lr_create_t *lr) 1293219089Spjd{ 1294219089Spjd char *name = (void *)(lr + 1); /* name follows lr */ 1295219089Spjd size_t namesize = strlen(name) + 1; 1296219089Spjd itx_t *itx; 1297219089Spjd 1298219089Spjd if (zil_replaying(zd->zd_zilog, tx)) 1299219089Spjd return; 1300219089Spjd 1301219089Spjd itx = zil_itx_create(TX_CREATE, sizeof (*lr) + namesize); 1302219089Spjd bcopy(&lr->lr_common + 1, &itx->itx_lr + 1, 1303219089Spjd sizeof (*lr) + namesize - sizeof (lr_t)); 1304219089Spjd 1305219089Spjd zil_itx_assign(zd->zd_zilog, itx, tx); 1306219089Spjd} 1307219089Spjd 1308219089Spjdstatic void 1309219089Spjdztest_log_remove(ztest_ds_t *zd, dmu_tx_t *tx, lr_remove_t *lr, uint64_t object) 1310219089Spjd{ 1311219089Spjd char *name = (void *)(lr + 1); /* name follows lr */ 1312219089Spjd size_t namesize = strlen(name) + 1; 1313219089Spjd itx_t *itx; 1314219089Spjd 1315219089Spjd if (zil_replaying(zd->zd_zilog, tx)) 1316219089Spjd return; 1317219089Spjd 1318219089Spjd itx = zil_itx_create(TX_REMOVE, sizeof (*lr) + namesize); 1319219089Spjd bcopy(&lr->lr_common + 1, &itx->itx_lr + 1, 1320219089Spjd sizeof (*lr) + namesize - sizeof (lr_t)); 1321219089Spjd 1322219089Spjd itx->itx_oid = object; 1323219089Spjd zil_itx_assign(zd->zd_zilog, itx, tx); 1324219089Spjd} 1325219089Spjd 1326219089Spjdstatic void 1327219089Spjdztest_log_write(ztest_ds_t *zd, dmu_tx_t *tx, lr_write_t *lr) 1328219089Spjd{ 1329219089Spjd itx_t *itx; 1330219089Spjd itx_wr_state_t write_state = ztest_random(WR_NUM_STATES); 1331219089Spjd 1332219089Spjd if (zil_replaying(zd->zd_zilog, tx)) 1333219089Spjd return; 1334219089Spjd 1335219089Spjd if (lr->lr_length > ZIL_MAX_LOG_DATA) 1336219089Spjd write_state = WR_INDIRECT; 1337219089Spjd 1338219089Spjd itx = zil_itx_create(TX_WRITE, 1339219089Spjd sizeof (*lr) + (write_state == WR_COPIED ? lr->lr_length : 0)); 1340219089Spjd 1341219089Spjd if (write_state == WR_COPIED && 1342219089Spjd dmu_read(zd->zd_os, lr->lr_foid, lr->lr_offset, lr->lr_length, 1343219089Spjd ((lr_write_t *)&itx->itx_lr) + 1, DMU_READ_NO_PREFETCH) != 0) { 1344219089Spjd zil_itx_destroy(itx); 1345219089Spjd itx = zil_itx_create(TX_WRITE, sizeof (*lr)); 1346219089Spjd write_state = WR_NEED_COPY; 1347219089Spjd } 1348219089Spjd itx->itx_private = zd; 1349219089Spjd itx->itx_wr_state = write_state; 1350219089Spjd itx->itx_sync = (ztest_random(8) == 0); 1351219089Spjd itx->itx_sod += (write_state == WR_NEED_COPY ? lr->lr_length : 0); 1352219089Spjd 1353219089Spjd bcopy(&lr->lr_common + 1, &itx->itx_lr + 1, 1354219089Spjd sizeof (*lr) - sizeof (lr_t)); 1355219089Spjd 1356219089Spjd zil_itx_assign(zd->zd_zilog, itx, tx); 1357219089Spjd} 1358219089Spjd 1359219089Spjdstatic void 1360219089Spjdztest_log_truncate(ztest_ds_t *zd, dmu_tx_t *tx, lr_truncate_t *lr) 1361219089Spjd{ 1362219089Spjd itx_t *itx; 1363219089Spjd 1364219089Spjd if (zil_replaying(zd->zd_zilog, tx)) 1365219089Spjd return; 1366219089Spjd 1367219089Spjd itx = zil_itx_create(TX_TRUNCATE, sizeof (*lr)); 1368219089Spjd bcopy(&lr->lr_common + 1, &itx->itx_lr + 1, 1369219089Spjd sizeof (*lr) - sizeof (lr_t)); 1370219089Spjd 1371219089Spjd itx->itx_sync = B_FALSE; 1372219089Spjd zil_itx_assign(zd->zd_zilog, itx, tx); 1373219089Spjd} 1374219089Spjd 1375219089Spjdstatic void 1376219089Spjdztest_log_setattr(ztest_ds_t *zd, dmu_tx_t *tx, lr_setattr_t *lr) 1377219089Spjd{ 1378219089Spjd itx_t *itx; 1379219089Spjd 1380219089Spjd if (zil_replaying(zd->zd_zilog, tx)) 1381219089Spjd return; 1382219089Spjd 1383219089Spjd itx = zil_itx_create(TX_SETATTR, sizeof (*lr)); 1384219089Spjd bcopy(&lr->lr_common + 1, &itx->itx_lr + 1, 1385219089Spjd sizeof (*lr) - sizeof (lr_t)); 1386219089Spjd 1387219089Spjd itx->itx_sync = B_FALSE; 1388219089Spjd zil_itx_assign(zd->zd_zilog, itx, tx); 1389219089Spjd} 1390219089Spjd 1391219089Spjd/* 1392219089Spjd * ZIL replay ops 1393219089Spjd */ 1394168404Spjdstatic int 1395219089Spjdztest_replay_create(ztest_ds_t *zd, lr_create_t *lr, boolean_t byteswap) 1396168404Spjd{ 1397219089Spjd char *name = (void *)(lr + 1); /* name follows lr */ 1398219089Spjd objset_t *os = zd->zd_os; 1399219089Spjd ztest_block_tag_t *bbt; 1400219089Spjd dmu_buf_t *db; 1401168404Spjd dmu_tx_t *tx; 1402219089Spjd uint64_t txg; 1403219089Spjd int error = 0; 1404168404Spjd 1405168404Spjd if (byteswap) 1406168404Spjd byteswap_uint64_array(lr, sizeof (*lr)); 1407168404Spjd 1408219089Spjd ASSERT(lr->lr_doid == ZTEST_DIROBJ); 1409219089Spjd ASSERT(name[0] != '\0'); 1410219089Spjd 1411168404Spjd tx = dmu_tx_create(os); 1412219089Spjd 1413219089Spjd dmu_tx_hold_zap(tx, lr->lr_doid, B_TRUE, name); 1414219089Spjd 1415219089Spjd if (lr->lrz_type == DMU_OT_ZAP_OTHER) { 1416219089Spjd dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL); 1417219089Spjd } else { 1418219089Spjd dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); 1419219089Spjd } 1420219089Spjd 1421219089Spjd txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); 1422219089Spjd if (txg == 0) 1423219089Spjd return (ENOSPC); 1424219089Spjd 1425219089Spjd ASSERT(dmu_objset_zil(os)->zl_replay == !!lr->lr_foid); 1426219089Spjd 1427219089Spjd if (lr->lrz_type == DMU_OT_ZAP_OTHER) { 1428219089Spjd if (lr->lr_foid == 0) { 1429219089Spjd lr->lr_foid = zap_create(os, 1430219089Spjd lr->lrz_type, lr->lrz_bonustype, 1431219089Spjd lr->lrz_bonuslen, tx); 1432219089Spjd } else { 1433219089Spjd error = zap_create_claim(os, lr->lr_foid, 1434219089Spjd lr->lrz_type, lr->lrz_bonustype, 1435219089Spjd lr->lrz_bonuslen, tx); 1436219089Spjd } 1437219089Spjd } else { 1438219089Spjd if (lr->lr_foid == 0) { 1439219089Spjd lr->lr_foid = dmu_object_alloc(os, 1440219089Spjd lr->lrz_type, 0, lr->lrz_bonustype, 1441219089Spjd lr->lrz_bonuslen, tx); 1442219089Spjd } else { 1443219089Spjd error = dmu_object_claim(os, lr->lr_foid, 1444219089Spjd lr->lrz_type, 0, lr->lrz_bonustype, 1445219089Spjd lr->lrz_bonuslen, tx); 1446219089Spjd } 1447219089Spjd } 1448219089Spjd 1449168404Spjd if (error) { 1450219089Spjd ASSERT3U(error, ==, EEXIST); 1451219089Spjd ASSERT(zd->zd_zilog->zl_replay); 1452219089Spjd dmu_tx_commit(tx); 1453168404Spjd return (error); 1454168404Spjd } 1455168404Spjd 1456219089Spjd ASSERT(lr->lr_foid != 0); 1457219089Spjd 1458219089Spjd if (lr->lrz_type != DMU_OT_ZAP_OTHER) 1459219089Spjd VERIFY3U(0, ==, dmu_object_set_blocksize(os, lr->lr_foid, 1460219089Spjd lr->lrz_blocksize, lr->lrz_ibshift, tx)); 1461219089Spjd 1462219089Spjd VERIFY3U(0, ==, dmu_bonus_hold(os, lr->lr_foid, FTAG, &db)); 1463219089Spjd bbt = ztest_bt_bonus(db); 1464219089Spjd dmu_buf_will_dirty(db, tx); 1465219089Spjd ztest_bt_generate(bbt, os, lr->lr_foid, -1ULL, lr->lr_gen, txg, txg); 1466219089Spjd dmu_buf_rele(db, FTAG); 1467219089Spjd 1468219089Spjd VERIFY3U(0, ==, zap_add(os, lr->lr_doid, name, sizeof (uint64_t), 1, 1469219089Spjd &lr->lr_foid, tx)); 1470219089Spjd 1471219089Spjd (void) ztest_log_create(zd, tx, lr); 1472219089Spjd 1473168404Spjd dmu_tx_commit(tx); 1474168404Spjd 1475219089Spjd return (0); 1476219089Spjd} 1477219089Spjd 1478219089Spjdstatic int 1479219089Spjdztest_replay_remove(ztest_ds_t *zd, lr_remove_t *lr, boolean_t byteswap) 1480219089Spjd{ 1481219089Spjd char *name = (void *)(lr + 1); /* name follows lr */ 1482219089Spjd objset_t *os = zd->zd_os; 1483219089Spjd dmu_object_info_t doi; 1484219089Spjd dmu_tx_t *tx; 1485219089Spjd uint64_t object, txg; 1486219089Spjd 1487219089Spjd if (byteswap) 1488219089Spjd byteswap_uint64_array(lr, sizeof (*lr)); 1489219089Spjd 1490219089Spjd ASSERT(lr->lr_doid == ZTEST_DIROBJ); 1491219089Spjd ASSERT(name[0] != '\0'); 1492219089Spjd 1493219089Spjd VERIFY3U(0, ==, 1494219089Spjd zap_lookup(os, lr->lr_doid, name, sizeof (object), 1, &object)); 1495219089Spjd ASSERT(object != 0); 1496219089Spjd 1497219089Spjd ztest_object_lock(zd, object, RL_WRITER); 1498219089Spjd 1499219089Spjd VERIFY3U(0, ==, dmu_object_info(os, object, &doi)); 1500219089Spjd 1501219089Spjd tx = dmu_tx_create(os); 1502219089Spjd 1503219089Spjd dmu_tx_hold_zap(tx, lr->lr_doid, B_FALSE, name); 1504219089Spjd dmu_tx_hold_free(tx, object, 0, DMU_OBJECT_END); 1505219089Spjd 1506219089Spjd txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); 1507219089Spjd if (txg == 0) { 1508219089Spjd ztest_object_unlock(zd, object); 1509219089Spjd return (ENOSPC); 1510168404Spjd } 1511168404Spjd 1512219089Spjd if (doi.doi_type == DMU_OT_ZAP_OTHER) { 1513219089Spjd VERIFY3U(0, ==, zap_destroy(os, object, tx)); 1514219089Spjd } else { 1515219089Spjd VERIFY3U(0, ==, dmu_object_free(os, object, tx)); 1516219089Spjd } 1517219089Spjd 1518219089Spjd VERIFY3U(0, ==, zap_remove(os, lr->lr_doid, name, tx)); 1519219089Spjd 1520219089Spjd (void) ztest_log_remove(zd, tx, lr, object); 1521219089Spjd 1522219089Spjd dmu_tx_commit(tx); 1523219089Spjd 1524219089Spjd ztest_object_unlock(zd, object); 1525219089Spjd 1526219089Spjd return (0); 1527168404Spjd} 1528168404Spjd 1529168404Spjdstatic int 1530219089Spjdztest_replay_write(ztest_ds_t *zd, lr_write_t *lr, boolean_t byteswap) 1531168404Spjd{ 1532219089Spjd objset_t *os = zd->zd_os; 1533219089Spjd void *data = lr + 1; /* data follows lr */ 1534219089Spjd uint64_t offset, length; 1535219089Spjd ztest_block_tag_t *bt = data; 1536219089Spjd ztest_block_tag_t *bbt; 1537219089Spjd uint64_t gen, txg, lrtxg, crtxg; 1538219089Spjd dmu_object_info_t doi; 1539168404Spjd dmu_tx_t *tx; 1540219089Spjd dmu_buf_t *db; 1541219089Spjd arc_buf_t *abuf = NULL; 1542219089Spjd rl_t *rl; 1543168404Spjd 1544168404Spjd if (byteswap) 1545168404Spjd byteswap_uint64_array(lr, sizeof (*lr)); 1546168404Spjd 1547219089Spjd offset = lr->lr_offset; 1548219089Spjd length = lr->lr_length; 1549219089Spjd 1550219089Spjd /* If it's a dmu_sync() block, write the whole block */ 1551219089Spjd if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) { 1552219089Spjd uint64_t blocksize = BP_GET_LSIZE(&lr->lr_blkptr); 1553219089Spjd if (length < blocksize) { 1554219089Spjd offset -= offset % blocksize; 1555219089Spjd length = blocksize; 1556219089Spjd } 1557219089Spjd } 1558219089Spjd 1559219089Spjd if (bt->bt_magic == BSWAP_64(BT_MAGIC)) 1560219089Spjd byteswap_uint64_array(bt, sizeof (*bt)); 1561219089Spjd 1562219089Spjd if (bt->bt_magic != BT_MAGIC) 1563219089Spjd bt = NULL; 1564219089Spjd 1565219089Spjd ztest_object_lock(zd, lr->lr_foid, RL_READER); 1566219089Spjd rl = ztest_range_lock(zd, lr->lr_foid, offset, length, RL_WRITER); 1567219089Spjd 1568219089Spjd VERIFY3U(0, ==, dmu_bonus_hold(os, lr->lr_foid, FTAG, &db)); 1569219089Spjd 1570219089Spjd dmu_object_info_from_db(db, &doi); 1571219089Spjd 1572219089Spjd bbt = ztest_bt_bonus(db); 1573219089Spjd ASSERT3U(bbt->bt_magic, ==, BT_MAGIC); 1574219089Spjd gen = bbt->bt_gen; 1575219089Spjd crtxg = bbt->bt_crtxg; 1576219089Spjd lrtxg = lr->lr_common.lrc_txg; 1577219089Spjd 1578168404Spjd tx = dmu_tx_create(os); 1579219089Spjd 1580219089Spjd dmu_tx_hold_write(tx, lr->lr_foid, offset, length); 1581219089Spjd 1582219089Spjd if (ztest_random(8) == 0 && length == doi.doi_data_block_size && 1583219089Spjd P2PHASE(offset, length) == 0) 1584219089Spjd abuf = dmu_request_arcbuf(db, length); 1585219089Spjd 1586219089Spjd txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); 1587219089Spjd if (txg == 0) { 1588219089Spjd if (abuf != NULL) 1589219089Spjd dmu_return_arcbuf(abuf); 1590219089Spjd dmu_buf_rele(db, FTAG); 1591219089Spjd ztest_range_unlock(rl); 1592219089Spjd ztest_object_unlock(zd, lr->lr_foid); 1593219089Spjd return (ENOSPC); 1594168404Spjd } 1595168404Spjd 1596219089Spjd if (bt != NULL) { 1597219089Spjd /* 1598219089Spjd * Usually, verify the old data before writing new data -- 1599219089Spjd * but not always, because we also want to verify correct 1600219089Spjd * behavior when the data was not recently read into cache. 1601219089Spjd */ 1602219089Spjd ASSERT(offset % doi.doi_data_block_size == 0); 1603219089Spjd if (ztest_random(4) != 0) { 1604219089Spjd int prefetch = ztest_random(2) ? 1605219089Spjd DMU_READ_PREFETCH : DMU_READ_NO_PREFETCH; 1606219089Spjd ztest_block_tag_t rbt; 1607219089Spjd 1608219089Spjd VERIFY(dmu_read(os, lr->lr_foid, offset, 1609219089Spjd sizeof (rbt), &rbt, prefetch) == 0); 1610219089Spjd if (rbt.bt_magic == BT_MAGIC) { 1611219089Spjd ztest_bt_verify(&rbt, os, lr->lr_foid, 1612219089Spjd offset, gen, txg, crtxg); 1613219089Spjd } 1614219089Spjd } 1615219089Spjd 1616219089Spjd /* 1617219089Spjd * Writes can appear to be newer than the bonus buffer because 1618219089Spjd * the ztest_get_data() callback does a dmu_read() of the 1619219089Spjd * open-context data, which may be different than the data 1620219089Spjd * as it was when the write was generated. 1621219089Spjd */ 1622219089Spjd if (zd->zd_zilog->zl_replay) { 1623219089Spjd ztest_bt_verify(bt, os, lr->lr_foid, offset, 1624219089Spjd MAX(gen, bt->bt_gen), MAX(txg, lrtxg), 1625219089Spjd bt->bt_crtxg); 1626219089Spjd } 1627219089Spjd 1628219089Spjd /* 1629219089Spjd * Set the bt's gen/txg to the bonus buffer's gen/txg 1630219089Spjd * so that all of the usual ASSERTs will work. 1631219089Spjd */ 1632219089Spjd ztest_bt_generate(bt, os, lr->lr_foid, offset, gen, txg, crtxg); 1633219089Spjd } 1634219089Spjd 1635219089Spjd if (abuf == NULL) { 1636219089Spjd dmu_write(os, lr->lr_foid, offset, length, data, tx); 1637219089Spjd } else { 1638219089Spjd bcopy(data, abuf->b_data, length); 1639219089Spjd dmu_assign_arcbuf(db, offset, abuf, tx); 1640219089Spjd } 1641219089Spjd 1642219089Spjd (void) ztest_log_write(zd, tx, lr); 1643219089Spjd 1644219089Spjd dmu_buf_rele(db, FTAG); 1645219089Spjd 1646168404Spjd dmu_tx_commit(tx); 1647168404Spjd 1648219089Spjd ztest_range_unlock(rl); 1649219089Spjd ztest_object_unlock(zd, lr->lr_foid); 1650219089Spjd 1651219089Spjd return (0); 1652168404Spjd} 1653168404Spjd 1654219089Spjdstatic int 1655219089Spjdztest_replay_truncate(ztest_ds_t *zd, lr_truncate_t *lr, boolean_t byteswap) 1656219089Spjd{ 1657219089Spjd objset_t *os = zd->zd_os; 1658219089Spjd dmu_tx_t *tx; 1659219089Spjd uint64_t txg; 1660219089Spjd rl_t *rl; 1661219089Spjd 1662219089Spjd if (byteswap) 1663219089Spjd byteswap_uint64_array(lr, sizeof (*lr)); 1664219089Spjd 1665219089Spjd ztest_object_lock(zd, lr->lr_foid, RL_READER); 1666219089Spjd rl = ztest_range_lock(zd, lr->lr_foid, lr->lr_offset, lr->lr_length, 1667219089Spjd RL_WRITER); 1668219089Spjd 1669219089Spjd tx = dmu_tx_create(os); 1670219089Spjd 1671219089Spjd dmu_tx_hold_free(tx, lr->lr_foid, lr->lr_offset, lr->lr_length); 1672219089Spjd 1673219089Spjd txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); 1674219089Spjd if (txg == 0) { 1675219089Spjd ztest_range_unlock(rl); 1676219089Spjd ztest_object_unlock(zd, lr->lr_foid); 1677219089Spjd return (ENOSPC); 1678219089Spjd } 1679219089Spjd 1680219089Spjd VERIFY(dmu_free_range(os, lr->lr_foid, lr->lr_offset, 1681219089Spjd lr->lr_length, tx) == 0); 1682219089Spjd 1683219089Spjd (void) ztest_log_truncate(zd, tx, lr); 1684219089Spjd 1685219089Spjd dmu_tx_commit(tx); 1686219089Spjd 1687219089Spjd ztest_range_unlock(rl); 1688219089Spjd ztest_object_unlock(zd, lr->lr_foid); 1689219089Spjd 1690219089Spjd return (0); 1691219089Spjd} 1692219089Spjd 1693219089Spjdstatic int 1694219089Spjdztest_replay_setattr(ztest_ds_t *zd, lr_setattr_t *lr, boolean_t byteswap) 1695219089Spjd{ 1696219089Spjd objset_t *os = zd->zd_os; 1697219089Spjd dmu_tx_t *tx; 1698219089Spjd dmu_buf_t *db; 1699219089Spjd ztest_block_tag_t *bbt; 1700219089Spjd uint64_t txg, lrtxg, crtxg; 1701219089Spjd 1702219089Spjd if (byteswap) 1703219089Spjd byteswap_uint64_array(lr, sizeof (*lr)); 1704219089Spjd 1705219089Spjd ztest_object_lock(zd, lr->lr_foid, RL_WRITER); 1706219089Spjd 1707219089Spjd VERIFY3U(0, ==, dmu_bonus_hold(os, lr->lr_foid, FTAG, &db)); 1708219089Spjd 1709219089Spjd tx = dmu_tx_create(os); 1710219089Spjd dmu_tx_hold_bonus(tx, lr->lr_foid); 1711219089Spjd 1712219089Spjd txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); 1713219089Spjd if (txg == 0) { 1714219089Spjd dmu_buf_rele(db, FTAG); 1715219089Spjd ztest_object_unlock(zd, lr->lr_foid); 1716219089Spjd return (ENOSPC); 1717219089Spjd } 1718219089Spjd 1719219089Spjd bbt = ztest_bt_bonus(db); 1720219089Spjd ASSERT3U(bbt->bt_magic, ==, BT_MAGIC); 1721219089Spjd crtxg = bbt->bt_crtxg; 1722219089Spjd lrtxg = lr->lr_common.lrc_txg; 1723219089Spjd 1724219089Spjd if (zd->zd_zilog->zl_replay) { 1725219089Spjd ASSERT(lr->lr_size != 0); 1726219089Spjd ASSERT(lr->lr_mode != 0); 1727219089Spjd ASSERT(lrtxg != 0); 1728219089Spjd } else { 1729219089Spjd /* 1730219089Spjd * Randomly change the size and increment the generation. 1731219089Spjd */ 1732219089Spjd lr->lr_size = (ztest_random(db->db_size / sizeof (*bbt)) + 1) * 1733219089Spjd sizeof (*bbt); 1734219089Spjd lr->lr_mode = bbt->bt_gen + 1; 1735219089Spjd ASSERT(lrtxg == 0); 1736219089Spjd } 1737219089Spjd 1738219089Spjd /* 1739219089Spjd * Verify that the current bonus buffer is not newer than our txg. 1740219089Spjd */ 1741219089Spjd ztest_bt_verify(bbt, os, lr->lr_foid, -1ULL, lr->lr_mode, 1742219089Spjd MAX(txg, lrtxg), crtxg); 1743219089Spjd 1744219089Spjd dmu_buf_will_dirty(db, tx); 1745219089Spjd 1746219089Spjd ASSERT3U(lr->lr_size, >=, sizeof (*bbt)); 1747219089Spjd ASSERT3U(lr->lr_size, <=, db->db_size); 1748240415Smm VERIFY0(dmu_set_bonus(db, lr->lr_size, tx)); 1749219089Spjd bbt = ztest_bt_bonus(db); 1750219089Spjd 1751219089Spjd ztest_bt_generate(bbt, os, lr->lr_foid, -1ULL, lr->lr_mode, txg, crtxg); 1752219089Spjd 1753219089Spjd dmu_buf_rele(db, FTAG); 1754219089Spjd 1755219089Spjd (void) ztest_log_setattr(zd, tx, lr); 1756219089Spjd 1757219089Spjd dmu_tx_commit(tx); 1758219089Spjd 1759219089Spjd ztest_object_unlock(zd, lr->lr_foid); 1760219089Spjd 1761219089Spjd return (0); 1762219089Spjd} 1763219089Spjd 1764168404Spjdzil_replay_func_t *ztest_replay_vector[TX_MAX_TYPE] = { 1765168404Spjd NULL, /* 0 no such transaction type */ 1766168404Spjd ztest_replay_create, /* TX_CREATE */ 1767168404Spjd NULL, /* TX_MKDIR */ 1768168404Spjd NULL, /* TX_MKXATTR */ 1769168404Spjd NULL, /* TX_SYMLINK */ 1770168404Spjd ztest_replay_remove, /* TX_REMOVE */ 1771168404Spjd NULL, /* TX_RMDIR */ 1772168404Spjd NULL, /* TX_LINK */ 1773168404Spjd NULL, /* TX_RENAME */ 1774219089Spjd ztest_replay_write, /* TX_WRITE */ 1775219089Spjd ztest_replay_truncate, /* TX_TRUNCATE */ 1776219089Spjd ztest_replay_setattr, /* TX_SETATTR */ 1777168404Spjd NULL, /* TX_ACL */ 1778209962Smm NULL, /* TX_CREATE_ACL */ 1779209962Smm NULL, /* TX_CREATE_ATTR */ 1780209962Smm NULL, /* TX_CREATE_ACL_ATTR */ 1781209962Smm NULL, /* TX_MKDIR_ACL */ 1782209962Smm NULL, /* TX_MKDIR_ATTR */ 1783209962Smm NULL, /* TX_MKDIR_ACL_ATTR */ 1784209962Smm NULL, /* TX_WRITE2 */ 1785168404Spjd}; 1786168404Spjd 1787168404Spjd/* 1788219089Spjd * ZIL get_data callbacks 1789219089Spjd */ 1790219089Spjd 1791219089Spjdstatic void 1792219089Spjdztest_get_done(zgd_t *zgd, int error) 1793219089Spjd{ 1794219089Spjd ztest_ds_t *zd = zgd->zgd_private; 1795219089Spjd uint64_t object = zgd->zgd_rl->rl_object; 1796219089Spjd 1797219089Spjd if (zgd->zgd_db) 1798219089Spjd dmu_buf_rele(zgd->zgd_db, zgd); 1799219089Spjd 1800219089Spjd ztest_range_unlock(zgd->zgd_rl); 1801219089Spjd ztest_object_unlock(zd, object); 1802219089Spjd 1803219089Spjd if (error == 0 && zgd->zgd_bp) 1804219089Spjd zil_add_block(zgd->zgd_zilog, zgd->zgd_bp); 1805219089Spjd 1806219089Spjd umem_free(zgd, sizeof (*zgd)); 1807219089Spjd} 1808219089Spjd 1809219089Spjdstatic int 1810219089Spjdztest_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) 1811219089Spjd{ 1812219089Spjd ztest_ds_t *zd = arg; 1813219089Spjd objset_t *os = zd->zd_os; 1814219089Spjd uint64_t object = lr->lr_foid; 1815219089Spjd uint64_t offset = lr->lr_offset; 1816219089Spjd uint64_t size = lr->lr_length; 1817219089Spjd blkptr_t *bp = &lr->lr_blkptr; 1818219089Spjd uint64_t txg = lr->lr_common.lrc_txg; 1819219089Spjd uint64_t crtxg; 1820219089Spjd dmu_object_info_t doi; 1821219089Spjd dmu_buf_t *db; 1822219089Spjd zgd_t *zgd; 1823219089Spjd int error; 1824219089Spjd 1825219089Spjd ztest_object_lock(zd, object, RL_READER); 1826219089Spjd error = dmu_bonus_hold(os, object, FTAG, &db); 1827219089Spjd if (error) { 1828219089Spjd ztest_object_unlock(zd, object); 1829219089Spjd return (error); 1830219089Spjd } 1831219089Spjd 1832219089Spjd crtxg = ztest_bt_bonus(db)->bt_crtxg; 1833219089Spjd 1834219089Spjd if (crtxg == 0 || crtxg > txg) { 1835219089Spjd dmu_buf_rele(db, FTAG); 1836219089Spjd ztest_object_unlock(zd, object); 1837219089Spjd return (ENOENT); 1838219089Spjd } 1839219089Spjd 1840219089Spjd dmu_object_info_from_db(db, &doi); 1841219089Spjd dmu_buf_rele(db, FTAG); 1842219089Spjd db = NULL; 1843219089Spjd 1844219089Spjd zgd = umem_zalloc(sizeof (*zgd), UMEM_NOFAIL); 1845219089Spjd zgd->zgd_zilog = zd->zd_zilog; 1846219089Spjd zgd->zgd_private = zd; 1847219089Spjd 1848219089Spjd if (buf != NULL) { /* immediate write */ 1849219089Spjd zgd->zgd_rl = ztest_range_lock(zd, object, offset, size, 1850219089Spjd RL_READER); 1851219089Spjd 1852219089Spjd error = dmu_read(os, object, offset, size, buf, 1853219089Spjd DMU_READ_NO_PREFETCH); 1854219089Spjd ASSERT(error == 0); 1855219089Spjd } else { 1856219089Spjd size = doi.doi_data_block_size; 1857219089Spjd if (ISP2(size)) { 1858219089Spjd offset = P2ALIGN(offset, size); 1859219089Spjd } else { 1860219089Spjd ASSERT(offset < size); 1861219089Spjd offset = 0; 1862219089Spjd } 1863219089Spjd 1864219089Spjd zgd->zgd_rl = ztest_range_lock(zd, object, offset, size, 1865219089Spjd RL_READER); 1866219089Spjd 1867219089Spjd error = dmu_buf_hold(os, object, offset, zgd, &db, 1868219089Spjd DMU_READ_NO_PREFETCH); 1869219089Spjd 1870219089Spjd if (error == 0) { 1871243524Smm blkptr_t *obp = dmu_buf_get_blkptr(db); 1872243524Smm if (obp) { 1873243524Smm ASSERT(BP_IS_HOLE(bp)); 1874243524Smm *bp = *obp; 1875243524Smm } 1876243524Smm 1877219089Spjd zgd->zgd_db = db; 1878219089Spjd zgd->zgd_bp = bp; 1879219089Spjd 1880219089Spjd ASSERT(db->db_offset == offset); 1881219089Spjd ASSERT(db->db_size == size); 1882219089Spjd 1883219089Spjd error = dmu_sync(zio, lr->lr_common.lrc_txg, 1884219089Spjd ztest_get_done, zgd); 1885219089Spjd 1886219089Spjd if (error == 0) 1887219089Spjd return (0); 1888219089Spjd } 1889219089Spjd } 1890219089Spjd 1891219089Spjd ztest_get_done(zgd, error); 1892219089Spjd 1893219089Spjd return (error); 1894219089Spjd} 1895219089Spjd 1896219089Spjdstatic void * 1897219089Spjdztest_lr_alloc(size_t lrsize, char *name) 1898219089Spjd{ 1899219089Spjd char *lr; 1900219089Spjd size_t namesize = name ? strlen(name) + 1 : 0; 1901219089Spjd 1902219089Spjd lr = umem_zalloc(lrsize + namesize, UMEM_NOFAIL); 1903219089Spjd 1904219089Spjd if (name) 1905219089Spjd bcopy(name, lr + lrsize, namesize); 1906219089Spjd 1907219089Spjd return (lr); 1908219089Spjd} 1909219089Spjd 1910219089Spjdvoid 1911219089Spjdztest_lr_free(void *lr, size_t lrsize, char *name) 1912219089Spjd{ 1913219089Spjd size_t namesize = name ? strlen(name) + 1 : 0; 1914219089Spjd 1915219089Spjd umem_free(lr, lrsize + namesize); 1916219089Spjd} 1917219089Spjd 1918219089Spjd/* 1919219089Spjd * Lookup a bunch of objects. Returns the number of objects not found. 1920219089Spjd */ 1921219089Spjdstatic int 1922219089Spjdztest_lookup(ztest_ds_t *zd, ztest_od_t *od, int count) 1923219089Spjd{ 1924219089Spjd int missing = 0; 1925219089Spjd int error; 1926219089Spjd 1927219089Spjd ASSERT(_mutex_held(&zd->zd_dirobj_lock)); 1928219089Spjd 1929219089Spjd for (int i = 0; i < count; i++, od++) { 1930219089Spjd od->od_object = 0; 1931219089Spjd error = zap_lookup(zd->zd_os, od->od_dir, od->od_name, 1932219089Spjd sizeof (uint64_t), 1, &od->od_object); 1933219089Spjd if (error) { 1934219089Spjd ASSERT(error == ENOENT); 1935219089Spjd ASSERT(od->od_object == 0); 1936219089Spjd missing++; 1937219089Spjd } else { 1938219089Spjd dmu_buf_t *db; 1939219089Spjd ztest_block_tag_t *bbt; 1940219089Spjd dmu_object_info_t doi; 1941219089Spjd 1942219089Spjd ASSERT(od->od_object != 0); 1943219089Spjd ASSERT(missing == 0); /* there should be no gaps */ 1944219089Spjd 1945219089Spjd ztest_object_lock(zd, od->od_object, RL_READER); 1946219089Spjd VERIFY3U(0, ==, dmu_bonus_hold(zd->zd_os, 1947219089Spjd od->od_object, FTAG, &db)); 1948219089Spjd dmu_object_info_from_db(db, &doi); 1949219089Spjd bbt = ztest_bt_bonus(db); 1950219089Spjd ASSERT3U(bbt->bt_magic, ==, BT_MAGIC); 1951219089Spjd od->od_type = doi.doi_type; 1952219089Spjd od->od_blocksize = doi.doi_data_block_size; 1953219089Spjd od->od_gen = bbt->bt_gen; 1954219089Spjd dmu_buf_rele(db, FTAG); 1955219089Spjd ztest_object_unlock(zd, od->od_object); 1956219089Spjd } 1957219089Spjd } 1958219089Spjd 1959219089Spjd return (missing); 1960219089Spjd} 1961219089Spjd 1962219089Spjdstatic int 1963219089Spjdztest_create(ztest_ds_t *zd, ztest_od_t *od, int count) 1964219089Spjd{ 1965219089Spjd int missing = 0; 1966219089Spjd 1967219089Spjd ASSERT(_mutex_held(&zd->zd_dirobj_lock)); 1968219089Spjd 1969219089Spjd for (int i = 0; i < count; i++, od++) { 1970219089Spjd if (missing) { 1971219089Spjd od->od_object = 0; 1972219089Spjd missing++; 1973219089Spjd continue; 1974219089Spjd } 1975219089Spjd 1976219089Spjd lr_create_t *lr = ztest_lr_alloc(sizeof (*lr), od->od_name); 1977219089Spjd 1978219089Spjd lr->lr_doid = od->od_dir; 1979219089Spjd lr->lr_foid = 0; /* 0 to allocate, > 0 to claim */ 1980219089Spjd lr->lrz_type = od->od_crtype; 1981219089Spjd lr->lrz_blocksize = od->od_crblocksize; 1982219089Spjd lr->lrz_ibshift = ztest_random_ibshift(); 1983219089Spjd lr->lrz_bonustype = DMU_OT_UINT64_OTHER; 1984219089Spjd lr->lrz_bonuslen = dmu_bonus_max(); 1985219089Spjd lr->lr_gen = od->od_crgen; 1986219089Spjd lr->lr_crtime[0] = time(NULL); 1987219089Spjd 1988219089Spjd if (ztest_replay_create(zd, lr, B_FALSE) != 0) { 1989219089Spjd ASSERT(missing == 0); 1990219089Spjd od->od_object = 0; 1991219089Spjd missing++; 1992219089Spjd } else { 1993219089Spjd od->od_object = lr->lr_foid; 1994219089Spjd od->od_type = od->od_crtype; 1995219089Spjd od->od_blocksize = od->od_crblocksize; 1996219089Spjd od->od_gen = od->od_crgen; 1997219089Spjd ASSERT(od->od_object != 0); 1998219089Spjd } 1999219089Spjd 2000219089Spjd ztest_lr_free(lr, sizeof (*lr), od->od_name); 2001219089Spjd } 2002219089Spjd 2003219089Spjd return (missing); 2004219089Spjd} 2005219089Spjd 2006219089Spjdstatic int 2007219089Spjdztest_remove(ztest_ds_t *zd, ztest_od_t *od, int count) 2008219089Spjd{ 2009219089Spjd int missing = 0; 2010219089Spjd int error; 2011219089Spjd 2012219089Spjd ASSERT(_mutex_held(&zd->zd_dirobj_lock)); 2013219089Spjd 2014219089Spjd od += count - 1; 2015219089Spjd 2016219089Spjd for (int i = count - 1; i >= 0; i--, od--) { 2017219089Spjd if (missing) { 2018219089Spjd missing++; 2019219089Spjd continue; 2020219089Spjd } 2021219089Spjd 2022243524Smm /* 2023243524Smm * No object was found. 2024243524Smm */ 2025219089Spjd if (od->od_object == 0) 2026219089Spjd continue; 2027219089Spjd 2028219089Spjd lr_remove_t *lr = ztest_lr_alloc(sizeof (*lr), od->od_name); 2029219089Spjd 2030219089Spjd lr->lr_doid = od->od_dir; 2031219089Spjd 2032219089Spjd if ((error = ztest_replay_remove(zd, lr, B_FALSE)) != 0) { 2033219089Spjd ASSERT3U(error, ==, ENOSPC); 2034219089Spjd missing++; 2035219089Spjd } else { 2036219089Spjd od->od_object = 0; 2037219089Spjd } 2038219089Spjd ztest_lr_free(lr, sizeof (*lr), od->od_name); 2039219089Spjd } 2040219089Spjd 2041219089Spjd return (missing); 2042219089Spjd} 2043219089Spjd 2044219089Spjdstatic int 2045219089Spjdztest_write(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size, 2046219089Spjd void *data) 2047219089Spjd{ 2048219089Spjd lr_write_t *lr; 2049219089Spjd int error; 2050219089Spjd 2051219089Spjd lr = ztest_lr_alloc(sizeof (*lr) + size, NULL); 2052219089Spjd 2053219089Spjd lr->lr_foid = object; 2054219089Spjd lr->lr_offset = offset; 2055219089Spjd lr->lr_length = size; 2056219089Spjd lr->lr_blkoff = 0; 2057219089Spjd BP_ZERO(&lr->lr_blkptr); 2058219089Spjd 2059219089Spjd bcopy(data, lr + 1, size); 2060219089Spjd 2061219089Spjd error = ztest_replay_write(zd, lr, B_FALSE); 2062219089Spjd 2063219089Spjd ztest_lr_free(lr, sizeof (*lr) + size, NULL); 2064219089Spjd 2065219089Spjd return (error); 2066219089Spjd} 2067219089Spjd 2068219089Spjdstatic int 2069219089Spjdztest_truncate(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size) 2070219089Spjd{ 2071219089Spjd lr_truncate_t *lr; 2072219089Spjd int error; 2073219089Spjd 2074219089Spjd lr = ztest_lr_alloc(sizeof (*lr), NULL); 2075219089Spjd 2076219089Spjd lr->lr_foid = object; 2077219089Spjd lr->lr_offset = offset; 2078219089Spjd lr->lr_length = size; 2079219089Spjd 2080219089Spjd error = ztest_replay_truncate(zd, lr, B_FALSE); 2081219089Spjd 2082219089Spjd ztest_lr_free(lr, sizeof (*lr), NULL); 2083219089Spjd 2084219089Spjd return (error); 2085219089Spjd} 2086219089Spjd 2087219089Spjdstatic int 2088219089Spjdztest_setattr(ztest_ds_t *zd, uint64_t object) 2089219089Spjd{ 2090219089Spjd lr_setattr_t *lr; 2091219089Spjd int error; 2092219089Spjd 2093219089Spjd lr = ztest_lr_alloc(sizeof (*lr), NULL); 2094219089Spjd 2095219089Spjd lr->lr_foid = object; 2096219089Spjd lr->lr_size = 0; 2097219089Spjd lr->lr_mode = 0; 2098219089Spjd 2099219089Spjd error = ztest_replay_setattr(zd, lr, B_FALSE); 2100219089Spjd 2101219089Spjd ztest_lr_free(lr, sizeof (*lr), NULL); 2102219089Spjd 2103219089Spjd return (error); 2104219089Spjd} 2105219089Spjd 2106219089Spjdstatic void 2107219089Spjdztest_prealloc(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size) 2108219089Spjd{ 2109219089Spjd objset_t *os = zd->zd_os; 2110219089Spjd dmu_tx_t *tx; 2111219089Spjd uint64_t txg; 2112219089Spjd rl_t *rl; 2113219089Spjd 2114219089Spjd txg_wait_synced(dmu_objset_pool(os), 0); 2115219089Spjd 2116219089Spjd ztest_object_lock(zd, object, RL_READER); 2117219089Spjd rl = ztest_range_lock(zd, object, offset, size, RL_WRITER); 2118219089Spjd 2119219089Spjd tx = dmu_tx_create(os); 2120219089Spjd 2121219089Spjd dmu_tx_hold_write(tx, object, offset, size); 2122219089Spjd 2123219089Spjd txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); 2124219089Spjd 2125219089Spjd if (txg != 0) { 2126219089Spjd dmu_prealloc(os, object, offset, size, tx); 2127219089Spjd dmu_tx_commit(tx); 2128219089Spjd txg_wait_synced(dmu_objset_pool(os), txg); 2129219089Spjd } else { 2130219089Spjd (void) dmu_free_long_range(os, object, offset, size); 2131219089Spjd } 2132219089Spjd 2133219089Spjd ztest_range_unlock(rl); 2134219089Spjd ztest_object_unlock(zd, object); 2135219089Spjd} 2136219089Spjd 2137219089Spjdstatic void 2138219089Spjdztest_io(ztest_ds_t *zd, uint64_t object, uint64_t offset) 2139219089Spjd{ 2140243524Smm int err; 2141219089Spjd ztest_block_tag_t wbt; 2142219089Spjd dmu_object_info_t doi; 2143219089Spjd enum ztest_io_type io_type; 2144219089Spjd uint64_t blocksize; 2145219089Spjd void *data; 2146219089Spjd 2147219089Spjd VERIFY(dmu_object_info(zd->zd_os, object, &doi) == 0); 2148219089Spjd blocksize = doi.doi_data_block_size; 2149219089Spjd data = umem_alloc(blocksize, UMEM_NOFAIL); 2150219089Spjd 2151219089Spjd /* 2152219089Spjd * Pick an i/o type at random, biased toward writing block tags. 2153219089Spjd */ 2154219089Spjd io_type = ztest_random(ZTEST_IO_TYPES); 2155219089Spjd if (ztest_random(2) == 0) 2156219089Spjd io_type = ZTEST_IO_WRITE_TAG; 2157219089Spjd 2158224526Smm (void) rw_rdlock(&zd->zd_zilog_lock); 2159224526Smm 2160219089Spjd switch (io_type) { 2161219089Spjd 2162219089Spjd case ZTEST_IO_WRITE_TAG: 2163219089Spjd ztest_bt_generate(&wbt, zd->zd_os, object, offset, 0, 0, 0); 2164219089Spjd (void) ztest_write(zd, object, offset, sizeof (wbt), &wbt); 2165219089Spjd break; 2166219089Spjd 2167219089Spjd case ZTEST_IO_WRITE_PATTERN: 2168219089Spjd (void) memset(data, 'a' + (object + offset) % 5, blocksize); 2169219089Spjd if (ztest_random(2) == 0) { 2170219089Spjd /* 2171219089Spjd * Induce fletcher2 collisions to ensure that 2172219089Spjd * zio_ddt_collision() detects and resolves them 2173219089Spjd * when using fletcher2-verify for deduplication. 2174219089Spjd */ 2175219089Spjd ((uint64_t *)data)[0] ^= 1ULL << 63; 2176219089Spjd ((uint64_t *)data)[4] ^= 1ULL << 63; 2177219089Spjd } 2178219089Spjd (void) ztest_write(zd, object, offset, blocksize, data); 2179219089Spjd break; 2180219089Spjd 2181219089Spjd case ZTEST_IO_WRITE_ZEROES: 2182219089Spjd bzero(data, blocksize); 2183219089Spjd (void) ztest_write(zd, object, offset, blocksize, data); 2184219089Spjd break; 2185219089Spjd 2186219089Spjd case ZTEST_IO_TRUNCATE: 2187219089Spjd (void) ztest_truncate(zd, object, offset, blocksize); 2188219089Spjd break; 2189219089Spjd 2190219089Spjd case ZTEST_IO_SETATTR: 2191219089Spjd (void) ztest_setattr(zd, object); 2192219089Spjd break; 2193243524Smm 2194243524Smm case ZTEST_IO_REWRITE: 2195243524Smm (void) rw_rdlock(&ztest_name_lock); 2196243524Smm err = ztest_dsl_prop_set_uint64(zd->zd_name, 2197243524Smm ZFS_PROP_CHECKSUM, spa_dedup_checksum(ztest_spa), 2198243524Smm B_FALSE); 2199243524Smm VERIFY(err == 0 || err == ENOSPC); 2200243524Smm err = ztest_dsl_prop_set_uint64(zd->zd_name, 2201243524Smm ZFS_PROP_COMPRESSION, 2202243524Smm ztest_random_dsl_prop(ZFS_PROP_COMPRESSION), 2203243524Smm B_FALSE); 2204243524Smm VERIFY(err == 0 || err == ENOSPC); 2205243524Smm (void) rw_unlock(&ztest_name_lock); 2206243524Smm 2207243524Smm VERIFY0(dmu_read(zd->zd_os, object, offset, blocksize, data, 2208243524Smm DMU_READ_NO_PREFETCH)); 2209243524Smm 2210243524Smm (void) ztest_write(zd, object, offset, blocksize, data); 2211243524Smm break; 2212219089Spjd } 2213219089Spjd 2214224526Smm (void) rw_unlock(&zd->zd_zilog_lock); 2215224526Smm 2216219089Spjd umem_free(data, blocksize); 2217219089Spjd} 2218219089Spjd 2219219089Spjd/* 2220219089Spjd * Initialize an object description template. 2221219089Spjd */ 2222219089Spjdstatic void 2223219089Spjdztest_od_init(ztest_od_t *od, uint64_t id, char *tag, uint64_t index, 2224219089Spjd dmu_object_type_t type, uint64_t blocksize, uint64_t gen) 2225219089Spjd{ 2226219089Spjd od->od_dir = ZTEST_DIROBJ; 2227219089Spjd od->od_object = 0; 2228219089Spjd 2229219089Spjd od->od_crtype = type; 2230219089Spjd od->od_crblocksize = blocksize ? blocksize : ztest_random_blocksize(); 2231219089Spjd od->od_crgen = gen; 2232219089Spjd 2233219089Spjd od->od_type = DMU_OT_NONE; 2234219089Spjd od->od_blocksize = 0; 2235219089Spjd od->od_gen = 0; 2236219089Spjd 2237219089Spjd (void) snprintf(od->od_name, sizeof (od->od_name), "%s(%lld)[%llu]", 2238219089Spjd tag, (int64_t)id, index); 2239219089Spjd} 2240219089Spjd 2241219089Spjd/* 2242219089Spjd * Lookup or create the objects for a test using the od template. 2243219089Spjd * If the objects do not all exist, or if 'remove' is specified, 2244219089Spjd * remove any existing objects and create new ones. Otherwise, 2245219089Spjd * use the existing objects. 2246219089Spjd */ 2247219089Spjdstatic int 2248219089Spjdztest_object_init(ztest_ds_t *zd, ztest_od_t *od, size_t size, boolean_t remove) 2249219089Spjd{ 2250219089Spjd int count = size / sizeof (*od); 2251219089Spjd int rv = 0; 2252219089Spjd 2253219089Spjd VERIFY(mutex_lock(&zd->zd_dirobj_lock) == 0); 2254219089Spjd if ((ztest_lookup(zd, od, count) != 0 || remove) && 2255219089Spjd (ztest_remove(zd, od, count) != 0 || 2256219089Spjd ztest_create(zd, od, count) != 0)) 2257219089Spjd rv = -1; 2258219089Spjd zd->zd_od = od; 2259219089Spjd VERIFY(mutex_unlock(&zd->zd_dirobj_lock) == 0); 2260219089Spjd 2261219089Spjd return (rv); 2262219089Spjd} 2263219089Spjd 2264219089Spjd/* ARGSUSED */ 2265219089Spjdvoid 2266219089Spjdztest_zil_commit(ztest_ds_t *zd, uint64_t id) 2267219089Spjd{ 2268219089Spjd zilog_t *zilog = zd->zd_zilog; 2269219089Spjd 2270224526Smm (void) rw_rdlock(&zd->zd_zilog_lock); 2271224526Smm 2272219089Spjd zil_commit(zilog, ztest_random(ZTEST_OBJECTS)); 2273219089Spjd 2274219089Spjd /* 2275219089Spjd * Remember the committed values in zd, which is in parent/child 2276219089Spjd * shared memory. If we die, the next iteration of ztest_run() 2277219089Spjd * will verify that the log really does contain this record. 2278219089Spjd */ 2279219089Spjd mutex_enter(&zilog->zl_lock); 2280236143Smm ASSERT(zd->zd_shared != NULL); 2281236143Smm ASSERT3U(zd->zd_shared->zd_seq, <=, zilog->zl_commit_lr_seq); 2282236143Smm zd->zd_shared->zd_seq = zilog->zl_commit_lr_seq; 2283219089Spjd mutex_exit(&zilog->zl_lock); 2284224526Smm 2285224526Smm (void) rw_unlock(&zd->zd_zilog_lock); 2286219089Spjd} 2287219089Spjd 2288219089Spjd/* 2289224526Smm * This function is designed to simulate the operations that occur during a 2290224526Smm * mount/unmount operation. We hold the dataset across these operations in an 2291224526Smm * attempt to expose any implicit assumptions about ZIL management. 2292224526Smm */ 2293224526Smm/* ARGSUSED */ 2294224526Smmvoid 2295224526Smmztest_zil_remount(ztest_ds_t *zd, uint64_t id) 2296224526Smm{ 2297224526Smm objset_t *os = zd->zd_os; 2298224526Smm 2299243524Smm /* 2300243524Smm * We grab the zd_dirobj_lock to ensure that no other thread is 2301243524Smm * updating the zil (i.e. adding in-memory log records) and the 2302243524Smm * zd_zilog_lock to block any I/O. 2303243524Smm */ 2304243524Smm VERIFY0(mutex_lock(&zd->zd_dirobj_lock)); 2305224526Smm (void) rw_wrlock(&zd->zd_zilog_lock); 2306224526Smm 2307224526Smm /* zfsvfs_teardown() */ 2308224526Smm zil_close(zd->zd_zilog); 2309224526Smm 2310224526Smm /* zfsvfs_setup() */ 2311224526Smm VERIFY(zil_open(os, ztest_get_data) == zd->zd_zilog); 2312224526Smm zil_replay(os, zd, ztest_replay_vector); 2313224526Smm 2314224526Smm (void) rw_unlock(&zd->zd_zilog_lock); 2315239620Smm VERIFY(mutex_unlock(&zd->zd_dirobj_lock) == 0); 2316224526Smm} 2317224526Smm 2318224526Smm/* 2319168404Spjd * Verify that we can't destroy an active pool, create an existing pool, 2320168404Spjd * or create a pool with a bad vdev spec. 2321168404Spjd */ 2322219089Spjd/* ARGSUSED */ 2323168404Spjdvoid 2324219089Spjdztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id) 2325168404Spjd{ 2326236143Smm ztest_shared_opts_t *zo = &ztest_opts; 2327168404Spjd spa_t *spa; 2328168404Spjd nvlist_t *nvroot; 2329168404Spjd 2330168404Spjd /* 2331168404Spjd * Attempt to create using a bad file. 2332168404Spjd */ 2333243505Smm nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 0, 1); 2334219089Spjd VERIFY3U(ENOENT, ==, 2335219089Spjd spa_create("ztest_bad_file", nvroot, NULL, NULL, NULL)); 2336168404Spjd nvlist_free(nvroot); 2337168404Spjd 2338168404Spjd /* 2339168404Spjd * Attempt to create using a bad mirror. 2340168404Spjd */ 2341243505Smm nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 2, 1); 2342219089Spjd VERIFY3U(ENOENT, ==, 2343219089Spjd spa_create("ztest_bad_mirror", nvroot, NULL, NULL, NULL)); 2344168404Spjd nvlist_free(nvroot); 2345168404Spjd 2346168404Spjd /* 2347168404Spjd * Attempt to create an existing pool. It shouldn't matter 2348168404Spjd * what's in the nvroot; we should fail with EEXIST. 2349168404Spjd */ 2350236143Smm (void) rw_rdlock(&ztest_name_lock); 2351243505Smm nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 0, 1); 2352236143Smm VERIFY3U(EEXIST, ==, spa_create(zo->zo_pool, nvroot, NULL, NULL, NULL)); 2353168404Spjd nvlist_free(nvroot); 2354236143Smm VERIFY3U(0, ==, spa_open(zo->zo_pool, &spa, FTAG)); 2355236143Smm VERIFY3U(EBUSY, ==, spa_destroy(zo->zo_pool)); 2356219089Spjd spa_close(spa, FTAG); 2357168404Spjd 2358236143Smm (void) rw_unlock(&ztest_name_lock); 2359168404Spjd} 2360168404Spjd 2361243505Smm/* ARGSUSED */ 2362243505Smmvoid 2363243505Smmztest_spa_upgrade(ztest_ds_t *zd, uint64_t id) 2364243505Smm{ 2365243505Smm spa_t *spa; 2366243505Smm uint64_t initial_version = SPA_VERSION_INITIAL; 2367243505Smm uint64_t version, newversion; 2368243505Smm nvlist_t *nvroot, *props; 2369243505Smm char *name; 2370243505Smm 2371243505Smm VERIFY0(mutex_lock(&ztest_vdev_lock)); 2372243505Smm name = kmem_asprintf("%s_upgrade", ztest_opts.zo_pool); 2373243505Smm 2374243505Smm /* 2375243505Smm * Clean up from previous runs. 2376243505Smm */ 2377243505Smm (void) spa_destroy(name); 2378243505Smm 2379243505Smm nvroot = make_vdev_root(NULL, NULL, name, ztest_opts.zo_vdev_size, 0, 2380243505Smm 0, ztest_opts.zo_raidz, ztest_opts.zo_mirrors, 1); 2381243505Smm 2382243505Smm /* 2383243505Smm * If we're configuring a RAIDZ device then make sure that the 2384243505Smm * the initial version is capable of supporting that feature. 2385243505Smm */ 2386243505Smm switch (ztest_opts.zo_raidz_parity) { 2387243505Smm case 0: 2388243505Smm case 1: 2389243505Smm initial_version = SPA_VERSION_INITIAL; 2390243505Smm break; 2391243505Smm case 2: 2392243505Smm initial_version = SPA_VERSION_RAIDZ2; 2393243505Smm break; 2394243505Smm case 3: 2395243505Smm initial_version = SPA_VERSION_RAIDZ3; 2396243505Smm break; 2397243505Smm } 2398243505Smm 2399243505Smm /* 2400243505Smm * Create a pool with a spa version that can be upgraded. Pick 2401243505Smm * a value between initial_version and SPA_VERSION_BEFORE_FEATURES. 2402243505Smm */ 2403243505Smm do { 2404243505Smm version = ztest_random_spa_version(initial_version); 2405243505Smm } while (version > SPA_VERSION_BEFORE_FEATURES); 2406243505Smm 2407243505Smm props = fnvlist_alloc(); 2408243505Smm fnvlist_add_uint64(props, 2409243505Smm zpool_prop_to_name(ZPOOL_PROP_VERSION), version); 2410243505Smm VERIFY0(spa_create(name, nvroot, props, NULL, NULL)); 2411243505Smm fnvlist_free(nvroot); 2412243505Smm fnvlist_free(props); 2413243505Smm 2414243505Smm VERIFY0(spa_open(name, &spa, FTAG)); 2415243505Smm VERIFY3U(spa_version(spa), ==, version); 2416243505Smm newversion = ztest_random_spa_version(version + 1); 2417243505Smm 2418243505Smm if (ztest_opts.zo_verbose >= 4) { 2419243505Smm (void) printf("upgrading spa version from %llu to %llu\n", 2420243505Smm (u_longlong_t)version, (u_longlong_t)newversion); 2421243505Smm } 2422243505Smm 2423243505Smm spa_upgrade(spa, newversion); 2424243505Smm VERIFY3U(spa_version(spa), >, version); 2425243505Smm VERIFY3U(spa_version(spa), ==, fnvlist_lookup_uint64(spa->spa_config, 2426243505Smm zpool_prop_to_name(ZPOOL_PROP_VERSION))); 2427243505Smm spa_close(spa, FTAG); 2428243505Smm 2429243505Smm strfree(name); 2430243505Smm VERIFY0(mutex_unlock(&ztest_vdev_lock)); 2431243505Smm} 2432243505Smm 2433185029Spjdstatic vdev_t * 2434185029Spjdvdev_lookup_by_path(vdev_t *vd, const char *path) 2435185029Spjd{ 2436185029Spjd vdev_t *mvd; 2437185029Spjd 2438185029Spjd if (vd->vdev_path != NULL && strcmp(path, vd->vdev_path) == 0) 2439185029Spjd return (vd); 2440185029Spjd 2441185029Spjd for (int c = 0; c < vd->vdev_children; c++) 2442185029Spjd if ((mvd = vdev_lookup_by_path(vd->vdev_child[c], path)) != 2443185029Spjd NULL) 2444185029Spjd return (mvd); 2445185029Spjd 2446185029Spjd return (NULL); 2447185029Spjd} 2448185029Spjd 2449168404Spjd/* 2450219089Spjd * Find the first available hole which can be used as a top-level. 2451219089Spjd */ 2452219089Spjdint 2453219089Spjdfind_vdev_hole(spa_t *spa) 2454219089Spjd{ 2455219089Spjd vdev_t *rvd = spa->spa_root_vdev; 2456219089Spjd int c; 2457219089Spjd 2458219089Spjd ASSERT(spa_config_held(spa, SCL_VDEV, RW_READER) == SCL_VDEV); 2459219089Spjd 2460219089Spjd for (c = 0; c < rvd->vdev_children; c++) { 2461219089Spjd vdev_t *cvd = rvd->vdev_child[c]; 2462219089Spjd 2463219089Spjd if (cvd->vdev_ishole) 2464219089Spjd break; 2465219089Spjd } 2466219089Spjd return (c); 2467219089Spjd} 2468219089Spjd 2469219089Spjd/* 2470168404Spjd * Verify that vdev_add() works as expected. 2471168404Spjd */ 2472219089Spjd/* ARGSUSED */ 2473168404Spjdvoid 2474219089Spjdztest_vdev_add_remove(ztest_ds_t *zd, uint64_t id) 2475168404Spjd{ 2476219089Spjd ztest_shared_t *zs = ztest_shared; 2477236143Smm spa_t *spa = ztest_spa; 2478219089Spjd uint64_t leaves; 2479219089Spjd uint64_t guid; 2480168404Spjd nvlist_t *nvroot; 2481168404Spjd int error; 2482168404Spjd 2483236143Smm VERIFY(mutex_lock(&ztest_vdev_lock) == 0); 2484236143Smm leaves = 2485236143Smm MAX(zs->zs_mirrors + zs->zs_splits, 1) * ztest_opts.zo_raidz; 2486168404Spjd 2487185029Spjd spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); 2488168404Spjd 2489219089Spjd ztest_shared->zs_vdev_next_leaf = find_vdev_hole(spa) * leaves; 2490168404Spjd 2491185029Spjd /* 2492219089Spjd * If we have slogs then remove them 1/4 of the time. 2493185029Spjd */ 2494219089Spjd if (spa_has_slogs(spa) && ztest_random(4) == 0) { 2495219089Spjd /* 2496219089Spjd * Grab the guid from the head of the log class rotor. 2497219089Spjd */ 2498219089Spjd guid = spa_log_class(spa)->mc_rotor->mg_vd->vdev_guid; 2499185029Spjd 2500219089Spjd spa_config_exit(spa, SCL_VDEV, FTAG); 2501168404Spjd 2502219089Spjd /* 2503219089Spjd * We have to grab the zs_name_lock as writer to 2504219089Spjd * prevent a race between removing a slog (dmu_objset_find) 2505219089Spjd * and destroying a dataset. Removing the slog will 2506219089Spjd * grab a reference on the dataset which may cause 2507219089Spjd * dmu_objset_destroy() to fail with EBUSY thus 2508219089Spjd * leaving the dataset in an inconsistent state. 2509219089Spjd */ 2510236143Smm VERIFY(rw_wrlock(&ztest_name_lock) == 0); 2511219089Spjd error = spa_vdev_remove(spa, guid, B_FALSE); 2512236143Smm VERIFY(rw_unlock(&ztest_name_lock) == 0); 2513168404Spjd 2514219089Spjd if (error && error != EEXIST) 2515219089Spjd fatal(0, "spa_vdev_remove() = %d", error); 2516219089Spjd } else { 2517219089Spjd spa_config_exit(spa, SCL_VDEV, FTAG); 2518219089Spjd 2519219089Spjd /* 2520219089Spjd * Make 1/4 of the devices be log devices. 2521219089Spjd */ 2522243505Smm nvroot = make_vdev_root(NULL, NULL, NULL, 2523236143Smm ztest_opts.zo_vdev_size, 0, 2524236143Smm ztest_random(4) == 0, ztest_opts.zo_raidz, 2525236143Smm zs->zs_mirrors, 1); 2526219089Spjd 2527219089Spjd error = spa_vdev_add(spa, nvroot); 2528219089Spjd nvlist_free(nvroot); 2529219089Spjd 2530219089Spjd if (error == ENOSPC) 2531219089Spjd ztest_record_enospc("spa_vdev_add"); 2532219089Spjd else if (error != 0) 2533219089Spjd fatal(0, "spa_vdev_add() = %d", error); 2534219089Spjd } 2535219089Spjd 2536236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 2537168404Spjd} 2538168404Spjd 2539185029Spjd/* 2540185029Spjd * Verify that adding/removing aux devices (l2arc, hot spare) works as expected. 2541185029Spjd */ 2542219089Spjd/* ARGSUSED */ 2543185029Spjdvoid 2544219089Spjdztest_vdev_aux_add_remove(ztest_ds_t *zd, uint64_t id) 2545168404Spjd{ 2546219089Spjd ztest_shared_t *zs = ztest_shared; 2547236143Smm spa_t *spa = ztest_spa; 2548185029Spjd vdev_t *rvd = spa->spa_root_vdev; 2549185029Spjd spa_aux_vdev_t *sav; 2550185029Spjd char *aux; 2551185029Spjd uint64_t guid = 0; 2552185029Spjd int error; 2553168404Spjd 2554185029Spjd if (ztest_random(2) == 0) { 2555185029Spjd sav = &spa->spa_spares; 2556185029Spjd aux = ZPOOL_CONFIG_SPARES; 2557185029Spjd } else { 2558185029Spjd sav = &spa->spa_l2cache; 2559185029Spjd aux = ZPOOL_CONFIG_L2CACHE; 2560185029Spjd } 2561185029Spjd 2562236143Smm VERIFY(mutex_lock(&ztest_vdev_lock) == 0); 2563185029Spjd 2564185029Spjd spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); 2565185029Spjd 2566185029Spjd if (sav->sav_count != 0 && ztest_random(4) == 0) { 2567185029Spjd /* 2568185029Spjd * Pick a random device to remove. 2569185029Spjd */ 2570185029Spjd guid = sav->sav_vdevs[ztest_random(sav->sav_count)]->vdev_guid; 2571185029Spjd } else { 2572185029Spjd /* 2573185029Spjd * Find an unused device we can add. 2574185029Spjd */ 2575219089Spjd zs->zs_vdev_aux = 0; 2576185029Spjd for (;;) { 2577185029Spjd char path[MAXPATHLEN]; 2578185029Spjd int c; 2579236143Smm (void) snprintf(path, sizeof (path), ztest_aux_template, 2580236143Smm ztest_opts.zo_dir, ztest_opts.zo_pool, aux, 2581236143Smm zs->zs_vdev_aux); 2582185029Spjd for (c = 0; c < sav->sav_count; c++) 2583185029Spjd if (strcmp(sav->sav_vdevs[c]->vdev_path, 2584185029Spjd path) == 0) 2585185029Spjd break; 2586185029Spjd if (c == sav->sav_count && 2587185029Spjd vdev_lookup_by_path(rvd, path) == NULL) 2588185029Spjd break; 2589219089Spjd zs->zs_vdev_aux++; 2590168404Spjd } 2591168404Spjd } 2592168404Spjd 2593185029Spjd spa_config_exit(spa, SCL_VDEV, FTAG); 2594168404Spjd 2595185029Spjd if (guid == 0) { 2596185029Spjd /* 2597185029Spjd * Add a new device. 2598185029Spjd */ 2599243505Smm nvlist_t *nvroot = make_vdev_root(NULL, aux, NULL, 2600236143Smm (ztest_opts.zo_vdev_size * 5) / 4, 0, 0, 0, 0, 1); 2601185029Spjd error = spa_vdev_add(spa, nvroot); 2602185029Spjd if (error != 0) 2603185029Spjd fatal(0, "spa_vdev_add(%p) = %d", nvroot, error); 2604185029Spjd nvlist_free(nvroot); 2605185029Spjd } else { 2606185029Spjd /* 2607185029Spjd * Remove an existing device. Sometimes, dirty its 2608185029Spjd * vdev state first to make sure we handle removal 2609185029Spjd * of devices that have pending state changes. 2610185029Spjd */ 2611185029Spjd if (ztest_random(2) == 0) 2612219089Spjd (void) vdev_online(spa, guid, 0, NULL); 2613185029Spjd 2614185029Spjd error = spa_vdev_remove(spa, guid, B_FALSE); 2615185029Spjd if (error != 0 && error != EBUSY) 2616185029Spjd fatal(0, "spa_vdev_remove(%llu) = %d", guid, error); 2617185029Spjd } 2618185029Spjd 2619236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 2620168404Spjd} 2621168404Spjd 2622168404Spjd/* 2623219089Spjd * split a pool if it has mirror tlvdevs 2624219089Spjd */ 2625219089Spjd/* ARGSUSED */ 2626219089Spjdvoid 2627219089Spjdztest_split_pool(ztest_ds_t *zd, uint64_t id) 2628219089Spjd{ 2629219089Spjd ztest_shared_t *zs = ztest_shared; 2630236143Smm spa_t *spa = ztest_spa; 2631219089Spjd vdev_t *rvd = spa->spa_root_vdev; 2632219089Spjd nvlist_t *tree, **child, *config, *split, **schild; 2633219089Spjd uint_t c, children, schildren = 0, lastlogid = 0; 2634219089Spjd int error = 0; 2635219089Spjd 2636236143Smm VERIFY(mutex_lock(&ztest_vdev_lock) == 0); 2637219089Spjd 2638219089Spjd /* ensure we have a useable config; mirrors of raidz aren't supported */ 2639236143Smm if (zs->zs_mirrors < 3 || ztest_opts.zo_raidz > 1) { 2640236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 2641219089Spjd return; 2642219089Spjd } 2643219089Spjd 2644219089Spjd /* clean up the old pool, if any */ 2645219089Spjd (void) spa_destroy("splitp"); 2646219089Spjd 2647219089Spjd spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); 2648219089Spjd 2649219089Spjd /* generate a config from the existing config */ 2650219089Spjd mutex_enter(&spa->spa_props_lock); 2651219089Spjd VERIFY(nvlist_lookup_nvlist(spa->spa_config, ZPOOL_CONFIG_VDEV_TREE, 2652219089Spjd &tree) == 0); 2653219089Spjd mutex_exit(&spa->spa_props_lock); 2654219089Spjd 2655219089Spjd VERIFY(nvlist_lookup_nvlist_array(tree, ZPOOL_CONFIG_CHILDREN, &child, 2656219089Spjd &children) == 0); 2657219089Spjd 2658219089Spjd schild = malloc(rvd->vdev_children * sizeof (nvlist_t *)); 2659219089Spjd for (c = 0; c < children; c++) { 2660219089Spjd vdev_t *tvd = rvd->vdev_child[c]; 2661219089Spjd nvlist_t **mchild; 2662219089Spjd uint_t mchildren; 2663219089Spjd 2664219089Spjd if (tvd->vdev_islog || tvd->vdev_ops == &vdev_hole_ops) { 2665219089Spjd VERIFY(nvlist_alloc(&schild[schildren], NV_UNIQUE_NAME, 2666219089Spjd 0) == 0); 2667219089Spjd VERIFY(nvlist_add_string(schild[schildren], 2668219089Spjd ZPOOL_CONFIG_TYPE, VDEV_TYPE_HOLE) == 0); 2669219089Spjd VERIFY(nvlist_add_uint64(schild[schildren], 2670219089Spjd ZPOOL_CONFIG_IS_HOLE, 1) == 0); 2671219089Spjd if (lastlogid == 0) 2672219089Spjd lastlogid = schildren; 2673219089Spjd ++schildren; 2674219089Spjd continue; 2675219089Spjd } 2676219089Spjd lastlogid = 0; 2677219089Spjd VERIFY(nvlist_lookup_nvlist_array(child[c], 2678219089Spjd ZPOOL_CONFIG_CHILDREN, &mchild, &mchildren) == 0); 2679219089Spjd VERIFY(nvlist_dup(mchild[0], &schild[schildren++], 0) == 0); 2680219089Spjd } 2681219089Spjd 2682219089Spjd /* OK, create a config that can be used to split */ 2683219089Spjd VERIFY(nvlist_alloc(&split, NV_UNIQUE_NAME, 0) == 0); 2684219089Spjd VERIFY(nvlist_add_string(split, ZPOOL_CONFIG_TYPE, 2685219089Spjd VDEV_TYPE_ROOT) == 0); 2686219089Spjd VERIFY(nvlist_add_nvlist_array(split, ZPOOL_CONFIG_CHILDREN, schild, 2687219089Spjd lastlogid != 0 ? lastlogid : schildren) == 0); 2688219089Spjd 2689219089Spjd VERIFY(nvlist_alloc(&config, NV_UNIQUE_NAME, 0) == 0); 2690219089Spjd VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, split) == 0); 2691219089Spjd 2692219089Spjd for (c = 0; c < schildren; c++) 2693219089Spjd nvlist_free(schild[c]); 2694219089Spjd free(schild); 2695219089Spjd nvlist_free(split); 2696219089Spjd 2697219089Spjd spa_config_exit(spa, SCL_VDEV, FTAG); 2698219089Spjd 2699236143Smm (void) rw_wrlock(&ztest_name_lock); 2700219089Spjd error = spa_vdev_split_mirror(spa, "splitp", config, NULL, B_FALSE); 2701236143Smm (void) rw_unlock(&ztest_name_lock); 2702219089Spjd 2703219089Spjd nvlist_free(config); 2704219089Spjd 2705219089Spjd if (error == 0) { 2706219089Spjd (void) printf("successful split - results:\n"); 2707219089Spjd mutex_enter(&spa_namespace_lock); 2708219089Spjd show_pool_stats(spa); 2709219089Spjd show_pool_stats(spa_lookup("splitp")); 2710219089Spjd mutex_exit(&spa_namespace_lock); 2711219089Spjd ++zs->zs_splits; 2712219089Spjd --zs->zs_mirrors; 2713219089Spjd } 2714236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 2715219089Spjd 2716219089Spjd} 2717219089Spjd 2718219089Spjd/* 2719168404Spjd * Verify that we can attach and detach devices. 2720168404Spjd */ 2721219089Spjd/* ARGSUSED */ 2722168404Spjdvoid 2723219089Spjdztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id) 2724168404Spjd{ 2725219089Spjd ztest_shared_t *zs = ztest_shared; 2726236143Smm spa_t *spa = ztest_spa; 2727185029Spjd spa_aux_vdev_t *sav = &spa->spa_spares; 2728168404Spjd vdev_t *rvd = spa->spa_root_vdev; 2729168404Spjd vdev_t *oldvd, *newvd, *pvd; 2730185029Spjd nvlist_t *root; 2731219089Spjd uint64_t leaves; 2732168404Spjd uint64_t leaf, top; 2733168404Spjd uint64_t ashift = ztest_get_ashift(); 2734209962Smm uint64_t oldguid, pguid; 2735168404Spjd size_t oldsize, newsize; 2736168404Spjd char oldpath[MAXPATHLEN], newpath[MAXPATHLEN]; 2737168404Spjd int replacing; 2738185029Spjd int oldvd_has_siblings = B_FALSE; 2739185029Spjd int newvd_is_spare = B_FALSE; 2740185029Spjd int oldvd_is_log; 2741168404Spjd int error, expected_error; 2742168404Spjd 2743236143Smm VERIFY(mutex_lock(&ztest_vdev_lock) == 0); 2744236143Smm leaves = MAX(zs->zs_mirrors, 1) * ztest_opts.zo_raidz; 2745168404Spjd 2746185029Spjd spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); 2747168404Spjd 2748168404Spjd /* 2749168404Spjd * Decide whether to do an attach or a replace. 2750168404Spjd */ 2751168404Spjd replacing = ztest_random(2); 2752168404Spjd 2753168404Spjd /* 2754168404Spjd * Pick a random top-level vdev. 2755168404Spjd */ 2756219089Spjd top = ztest_random_vdev_top(spa, B_TRUE); 2757168404Spjd 2758168404Spjd /* 2759168404Spjd * Pick a random leaf within it. 2760168404Spjd */ 2761168404Spjd leaf = ztest_random(leaves); 2762168404Spjd 2763168404Spjd /* 2764185029Spjd * Locate this vdev. 2765168404Spjd */ 2766185029Spjd oldvd = rvd->vdev_child[top]; 2767219089Spjd if (zs->zs_mirrors >= 1) { 2768209962Smm ASSERT(oldvd->vdev_ops == &vdev_mirror_ops); 2769219089Spjd ASSERT(oldvd->vdev_children >= zs->zs_mirrors); 2770236143Smm oldvd = oldvd->vdev_child[leaf / ztest_opts.zo_raidz]; 2771209962Smm } 2772236143Smm if (ztest_opts.zo_raidz > 1) { 2773209962Smm ASSERT(oldvd->vdev_ops == &vdev_raidz_ops); 2774236143Smm ASSERT(oldvd->vdev_children == ztest_opts.zo_raidz); 2775236143Smm oldvd = oldvd->vdev_child[leaf % ztest_opts.zo_raidz]; 2776209962Smm } 2777168404Spjd 2778168404Spjd /* 2779185029Spjd * If we're already doing an attach or replace, oldvd may be a 2780185029Spjd * mirror vdev -- in which case, pick a random child. 2781168404Spjd */ 2782185029Spjd while (oldvd->vdev_children != 0) { 2783185029Spjd oldvd_has_siblings = B_TRUE; 2784209962Smm ASSERT(oldvd->vdev_children >= 2); 2785209962Smm oldvd = oldvd->vdev_child[ztest_random(oldvd->vdev_children)]; 2786185029Spjd } 2787168404Spjd 2788185029Spjd oldguid = oldvd->vdev_guid; 2789219089Spjd oldsize = vdev_get_min_asize(oldvd); 2790185029Spjd oldvd_is_log = oldvd->vdev_top->vdev_islog; 2791185029Spjd (void) strcpy(oldpath, oldvd->vdev_path); 2792185029Spjd pvd = oldvd->vdev_parent; 2793209962Smm pguid = pvd->vdev_guid; 2794185029Spjd 2795168404Spjd /* 2796185029Spjd * If oldvd has siblings, then half of the time, detach it. 2797168404Spjd */ 2798185029Spjd if (oldvd_has_siblings && ztest_random(2) == 0) { 2799185029Spjd spa_config_exit(spa, SCL_VDEV, FTAG); 2800209962Smm error = spa_vdev_detach(spa, oldguid, pguid, B_FALSE); 2801209962Smm if (error != 0 && error != ENODEV && error != EBUSY && 2802209962Smm error != ENOTSUP) 2803209962Smm fatal(0, "detach (%s) returned %d", oldpath, error); 2804236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 2805185029Spjd return; 2806185029Spjd } 2807168404Spjd 2808168404Spjd /* 2809185029Spjd * For the new vdev, choose with equal probability between the two 2810185029Spjd * standard paths (ending in either 'a' or 'b') or a random hot spare. 2811168404Spjd */ 2812185029Spjd if (sav->sav_count != 0 && ztest_random(3) == 0) { 2813185029Spjd newvd = sav->sav_vdevs[ztest_random(sav->sav_count)]; 2814185029Spjd newvd_is_spare = B_TRUE; 2815185029Spjd (void) strcpy(newpath, newvd->vdev_path); 2816185029Spjd } else { 2817185029Spjd (void) snprintf(newpath, sizeof (newpath), ztest_dev_template, 2818236143Smm ztest_opts.zo_dir, ztest_opts.zo_pool, 2819236143Smm top * leaves + leaf); 2820185029Spjd if (ztest_random(2) == 0) 2821185029Spjd newpath[strlen(newpath) - 1] = 'b'; 2822185029Spjd newvd = vdev_lookup_by_path(rvd, newpath); 2823185029Spjd } 2824168404Spjd 2825185029Spjd if (newvd) { 2826219089Spjd newsize = vdev_get_min_asize(newvd); 2827185029Spjd } else { 2828185029Spjd /* 2829185029Spjd * Make newsize a little bigger or smaller than oldsize. 2830185029Spjd * If it's smaller, the attach should fail. 2831185029Spjd * If it's larger, and we're doing a replace, 2832185029Spjd * we should get dynamic LUN growth when we're done. 2833185029Spjd */ 2834185029Spjd newsize = 10 * oldsize / (9 + ztest_random(3)); 2835185029Spjd } 2836185029Spjd 2837168404Spjd /* 2838168404Spjd * If pvd is not a mirror or root, the attach should fail with ENOTSUP, 2839168404Spjd * unless it's a replace; in that case any non-replacing parent is OK. 2840168404Spjd * 2841168404Spjd * If newvd is already part of the pool, it should fail with EBUSY. 2842168404Spjd * 2843168404Spjd * If newvd is too small, it should fail with EOVERFLOW. 2844168404Spjd */ 2845185029Spjd if (pvd->vdev_ops != &vdev_mirror_ops && 2846185029Spjd pvd->vdev_ops != &vdev_root_ops && (!replacing || 2847185029Spjd pvd->vdev_ops == &vdev_replacing_ops || 2848185029Spjd pvd->vdev_ops == &vdev_spare_ops)) 2849185029Spjd expected_error = ENOTSUP; 2850185029Spjd else if (newvd_is_spare && (!replacing || oldvd_is_log)) 2851185029Spjd expected_error = ENOTSUP; 2852185029Spjd else if (newvd == oldvd) 2853185029Spjd expected_error = replacing ? 0 : EBUSY; 2854185029Spjd else if (vdev_lookup_by_path(rvd, newpath) != NULL) 2855168404Spjd expected_error = EBUSY; 2856168404Spjd else if (newsize < oldsize) 2857168404Spjd expected_error = EOVERFLOW; 2858168404Spjd else if (ashift > oldvd->vdev_top->vdev_ashift) 2859168404Spjd expected_error = EDOM; 2860168404Spjd else 2861168404Spjd expected_error = 0; 2862168404Spjd 2863185029Spjd spa_config_exit(spa, SCL_VDEV, FTAG); 2864168404Spjd 2865168404Spjd /* 2866168404Spjd * Build the nvlist describing newpath. 2867168404Spjd */ 2868243505Smm root = make_vdev_root(newpath, NULL, NULL, newvd == NULL ? newsize : 0, 2869185029Spjd ashift, 0, 0, 0, 1); 2870168404Spjd 2871185029Spjd error = spa_vdev_attach(spa, oldguid, root, replacing); 2872168404Spjd 2873168404Spjd nvlist_free(root); 2874168404Spjd 2875168404Spjd /* 2876168404Spjd * If our parent was the replacing vdev, but the replace completed, 2877168404Spjd * then instead of failing with ENOTSUP we may either succeed, 2878168404Spjd * fail with ENODEV, or fail with EOVERFLOW. 2879168404Spjd */ 2880168404Spjd if (expected_error == ENOTSUP && 2881168404Spjd (error == 0 || error == ENODEV || error == EOVERFLOW)) 2882168404Spjd expected_error = error; 2883168404Spjd 2884168404Spjd /* 2885168404Spjd * If someone grew the LUN, the replacement may be too small. 2886168404Spjd */ 2887185029Spjd if (error == EOVERFLOW || error == EBUSY) 2888168404Spjd expected_error = error; 2889168404Spjd 2890185029Spjd /* XXX workaround 6690467 */ 2891185029Spjd if (error != expected_error && expected_error != EBUSY) { 2892185029Spjd fatal(0, "attach (%s %llu, %s %llu, %d) " 2893185029Spjd "returned %d, expected %d", 2894185029Spjd oldpath, (longlong_t)oldsize, newpath, 2895185029Spjd (longlong_t)newsize, replacing, error, expected_error); 2896168404Spjd } 2897168404Spjd 2898236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 2899168404Spjd} 2900168404Spjd 2901168404Spjd/* 2902219089Spjd * Callback function which expands the physical size of the vdev. 2903168404Spjd */ 2904219089Spjdvdev_t * 2905219089Spjdgrow_vdev(vdev_t *vd, void *arg) 2906168404Spjd{ 2907219089Spjd spa_t *spa = vd->vdev_spa; 2908219089Spjd size_t *newsize = arg; 2909168404Spjd size_t fsize; 2910168404Spjd int fd; 2911168404Spjd 2912219089Spjd ASSERT(spa_config_held(spa, SCL_STATE, RW_READER) == SCL_STATE); 2913219089Spjd ASSERT(vd->vdev_ops->vdev_op_leaf); 2914168404Spjd 2915219089Spjd if ((fd = open(vd->vdev_path, O_RDWR)) == -1) 2916219089Spjd return (vd); 2917219089Spjd 2918219089Spjd fsize = lseek(fd, 0, SEEK_END); 2919219089Spjd (void) ftruncate(fd, *newsize); 2920219089Spjd 2921236143Smm if (ztest_opts.zo_verbose >= 6) { 2922219089Spjd (void) printf("%s grew from %lu to %lu bytes\n", 2923219089Spjd vd->vdev_path, (ulong_t)fsize, (ulong_t)*newsize); 2924219089Spjd } 2925219089Spjd (void) close(fd); 2926219089Spjd return (NULL); 2927219089Spjd} 2928219089Spjd 2929219089Spjd/* 2930219089Spjd * Callback function which expands a given vdev by calling vdev_online(). 2931219089Spjd */ 2932219089Spjd/* ARGSUSED */ 2933219089Spjdvdev_t * 2934219089Spjdonline_vdev(vdev_t *vd, void *arg) 2935219089Spjd{ 2936219089Spjd spa_t *spa = vd->vdev_spa; 2937219089Spjd vdev_t *tvd = vd->vdev_top; 2938219089Spjd uint64_t guid = vd->vdev_guid; 2939219089Spjd uint64_t generation = spa->spa_config_generation + 1; 2940219089Spjd vdev_state_t newstate = VDEV_STATE_UNKNOWN; 2941219089Spjd int error; 2942219089Spjd 2943219089Spjd ASSERT(spa_config_held(spa, SCL_STATE, RW_READER) == SCL_STATE); 2944219089Spjd ASSERT(vd->vdev_ops->vdev_op_leaf); 2945219089Spjd 2946219089Spjd /* Calling vdev_online will initialize the new metaslabs */ 2947219089Spjd spa_config_exit(spa, SCL_STATE, spa); 2948219089Spjd error = vdev_online(spa, guid, ZFS_ONLINE_EXPAND, &newstate); 2949219089Spjd spa_config_enter(spa, SCL_STATE, spa, RW_READER); 2950219089Spjd 2951168404Spjd /* 2952219089Spjd * If vdev_online returned an error or the underlying vdev_open 2953219089Spjd * failed then we abort the expand. The only way to know that 2954219089Spjd * vdev_open fails is by checking the returned newstate. 2955168404Spjd */ 2956219089Spjd if (error || newstate != VDEV_STATE_HEALTHY) { 2957236143Smm if (ztest_opts.zo_verbose >= 5) { 2958219089Spjd (void) printf("Unable to expand vdev, state %llu, " 2959219089Spjd "error %d\n", (u_longlong_t)newstate, error); 2960219089Spjd } 2961219089Spjd return (vd); 2962219089Spjd } 2963219089Spjd ASSERT3U(newstate, ==, VDEV_STATE_HEALTHY); 2964168404Spjd 2965219089Spjd /* 2966219089Spjd * Since we dropped the lock we need to ensure that we're 2967219089Spjd * still talking to the original vdev. It's possible this 2968219089Spjd * vdev may have been detached/replaced while we were 2969219089Spjd * trying to online it. 2970219089Spjd */ 2971219089Spjd if (generation != spa->spa_config_generation) { 2972236143Smm if (ztest_opts.zo_verbose >= 5) { 2973219089Spjd (void) printf("vdev configuration has changed, " 2974219089Spjd "guid %llu, state %llu, expected gen %llu, " 2975219089Spjd "got gen %llu\n", 2976219089Spjd (u_longlong_t)guid, 2977219089Spjd (u_longlong_t)tvd->vdev_state, 2978219089Spjd (u_longlong_t)generation, 2979219089Spjd (u_longlong_t)spa->spa_config_generation); 2980219089Spjd } 2981219089Spjd return (vd); 2982219089Spjd } 2983219089Spjd return (NULL); 2984219089Spjd} 2985168404Spjd 2986219089Spjd/* 2987219089Spjd * Traverse the vdev tree calling the supplied function. 2988219089Spjd * We continue to walk the tree until we either have walked all 2989219089Spjd * children or we receive a non-NULL return from the callback. 2990219089Spjd * If a NULL callback is passed, then we just return back the first 2991219089Spjd * leaf vdev we encounter. 2992219089Spjd */ 2993219089Spjdvdev_t * 2994219089Spjdvdev_walk_tree(vdev_t *vd, vdev_t *(*func)(vdev_t *, void *), void *arg) 2995219089Spjd{ 2996219089Spjd if (vd->vdev_ops->vdev_op_leaf) { 2997219089Spjd if (func == NULL) 2998219089Spjd return (vd); 2999219089Spjd else 3000219089Spjd return (func(vd, arg)); 3001219089Spjd } 3002168404Spjd 3003219089Spjd for (uint_t c = 0; c < vd->vdev_children; c++) { 3004219089Spjd vdev_t *cvd = vd->vdev_child[c]; 3005219089Spjd if ((cvd = vdev_walk_tree(cvd, func, arg)) != NULL) 3006219089Spjd return (cvd); 3007219089Spjd } 3008219089Spjd return (NULL); 3009219089Spjd} 3010219089Spjd 3011219089Spjd/* 3012219089Spjd * Verify that dynamic LUN growth works as expected. 3013219089Spjd */ 3014219089Spjd/* ARGSUSED */ 3015219089Spjdvoid 3016219089Spjdztest_vdev_LUN_growth(ztest_ds_t *zd, uint64_t id) 3017219089Spjd{ 3018236143Smm spa_t *spa = ztest_spa; 3019219089Spjd vdev_t *vd, *tvd; 3020219089Spjd metaslab_class_t *mc; 3021219089Spjd metaslab_group_t *mg; 3022219089Spjd size_t psize, newsize; 3023219089Spjd uint64_t top; 3024219089Spjd uint64_t old_class_space, new_class_space, old_ms_count, new_ms_count; 3025219089Spjd 3026236143Smm VERIFY(mutex_lock(&ztest_vdev_lock) == 0); 3027219089Spjd spa_config_enter(spa, SCL_STATE, spa, RW_READER); 3028219089Spjd 3029219089Spjd top = ztest_random_vdev_top(spa, B_TRUE); 3030219089Spjd 3031219089Spjd tvd = spa->spa_root_vdev->vdev_child[top]; 3032219089Spjd mg = tvd->vdev_mg; 3033219089Spjd mc = mg->mg_class; 3034219089Spjd old_ms_count = tvd->vdev_ms_count; 3035219089Spjd old_class_space = metaslab_class_get_space(mc); 3036219089Spjd 3037219089Spjd /* 3038219089Spjd * Determine the size of the first leaf vdev associated with 3039219089Spjd * our top-level device. 3040219089Spjd */ 3041219089Spjd vd = vdev_walk_tree(tvd, NULL, NULL); 3042219089Spjd ASSERT3P(vd, !=, NULL); 3043219089Spjd ASSERT(vd->vdev_ops->vdev_op_leaf); 3044219089Spjd 3045219089Spjd psize = vd->vdev_psize; 3046219089Spjd 3047219089Spjd /* 3048219089Spjd * We only try to expand the vdev if it's healthy, less than 4x its 3049219089Spjd * original size, and it has a valid psize. 3050219089Spjd */ 3051219089Spjd if (tvd->vdev_state != VDEV_STATE_HEALTHY || 3052236143Smm psize == 0 || psize >= 4 * ztest_opts.zo_vdev_size) { 3053219089Spjd spa_config_exit(spa, SCL_STATE, spa); 3054236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 3055219089Spjd return; 3056219089Spjd } 3057219089Spjd ASSERT(psize > 0); 3058219089Spjd newsize = psize + psize / 8; 3059219089Spjd ASSERT3U(newsize, >, psize); 3060219089Spjd 3061236143Smm if (ztest_opts.zo_verbose >= 6) { 3062219089Spjd (void) printf("Expanding LUN %s from %lu to %lu\n", 3063219089Spjd vd->vdev_path, (ulong_t)psize, (ulong_t)newsize); 3064219089Spjd } 3065219089Spjd 3066219089Spjd /* 3067219089Spjd * Growing the vdev is a two step process: 3068219089Spjd * 1). expand the physical size (i.e. relabel) 3069219089Spjd * 2). online the vdev to create the new metaslabs 3070219089Spjd */ 3071219089Spjd if (vdev_walk_tree(tvd, grow_vdev, &newsize) != NULL || 3072219089Spjd vdev_walk_tree(tvd, online_vdev, NULL) != NULL || 3073219089Spjd tvd->vdev_state != VDEV_STATE_HEALTHY) { 3074236143Smm if (ztest_opts.zo_verbose >= 5) { 3075219089Spjd (void) printf("Could not expand LUN because " 3076219089Spjd "the vdev configuration changed.\n"); 3077168404Spjd } 3078219089Spjd spa_config_exit(spa, SCL_STATE, spa); 3079236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 3080219089Spjd return; 3081168404Spjd } 3082168404Spjd 3083219089Spjd spa_config_exit(spa, SCL_STATE, spa); 3084219089Spjd 3085219089Spjd /* 3086219089Spjd * Expanding the LUN will update the config asynchronously, 3087219089Spjd * thus we must wait for the async thread to complete any 3088219089Spjd * pending tasks before proceeding. 3089219089Spjd */ 3090219089Spjd for (;;) { 3091219089Spjd boolean_t done; 3092219089Spjd mutex_enter(&spa->spa_async_lock); 3093219089Spjd done = (spa->spa_async_thread == NULL && !spa->spa_async_tasks); 3094219089Spjd mutex_exit(&spa->spa_async_lock); 3095219089Spjd if (done) 3096219089Spjd break; 3097219089Spjd txg_wait_synced(spa_get_dsl(spa), 0); 3098219089Spjd (void) poll(NULL, 0, 100); 3099219089Spjd } 3100219089Spjd 3101219089Spjd spa_config_enter(spa, SCL_STATE, spa, RW_READER); 3102219089Spjd 3103219089Spjd tvd = spa->spa_root_vdev->vdev_child[top]; 3104219089Spjd new_ms_count = tvd->vdev_ms_count; 3105219089Spjd new_class_space = metaslab_class_get_space(mc); 3106219089Spjd 3107219089Spjd if (tvd->vdev_mg != mg || mg->mg_class != mc) { 3108236143Smm if (ztest_opts.zo_verbose >= 5) { 3109219089Spjd (void) printf("Could not verify LUN expansion due to " 3110219089Spjd "intervening vdev offline or remove.\n"); 3111219089Spjd } 3112219089Spjd spa_config_exit(spa, SCL_STATE, spa); 3113236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 3114219089Spjd return; 3115219089Spjd } 3116219089Spjd 3117219089Spjd /* 3118219089Spjd * Make sure we were able to grow the vdev. 3119219089Spjd */ 3120219089Spjd if (new_ms_count <= old_ms_count) 3121219089Spjd fatal(0, "LUN expansion failed: ms_count %llu <= %llu\n", 3122219089Spjd old_ms_count, new_ms_count); 3123219089Spjd 3124219089Spjd /* 3125219089Spjd * Make sure we were able to grow the pool. 3126219089Spjd */ 3127219089Spjd if (new_class_space <= old_class_space) 3128219089Spjd fatal(0, "LUN expansion failed: class_space %llu <= %llu\n", 3129219089Spjd old_class_space, new_class_space); 3130219089Spjd 3131236143Smm if (ztest_opts.zo_verbose >= 5) { 3132219089Spjd char oldnumbuf[6], newnumbuf[6]; 3133219089Spjd 3134219089Spjd nicenum(old_class_space, oldnumbuf); 3135219089Spjd nicenum(new_class_space, newnumbuf); 3136219089Spjd (void) printf("%s grew from %s to %s\n", 3137219089Spjd spa->spa_name, oldnumbuf, newnumbuf); 3138219089Spjd } 3139219089Spjd 3140219089Spjd spa_config_exit(spa, SCL_STATE, spa); 3141236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 3142168404Spjd} 3143168404Spjd 3144219089Spjd/* 3145219089Spjd * Verify that dmu_objset_{create,destroy,open,close} work as expected. 3146219089Spjd */ 3147168404Spjd/* ARGSUSED */ 3148168404Spjdstatic void 3149219089Spjdztest_objset_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx) 3150168404Spjd{ 3151168404Spjd /* 3152219089Spjd * Create the objects common to all ztest datasets. 3153168404Spjd */ 3154219089Spjd VERIFY(zap_create_claim(os, ZTEST_DIROBJ, 3155168404Spjd DMU_OT_ZAP_OTHER, DMU_OT_NONE, 0, tx) == 0); 3156219089Spjd} 3157168404Spjd 3158219089Spjdstatic int 3159219089Spjdztest_dataset_create(char *dsname) 3160219089Spjd{ 3161219089Spjd uint64_t zilset = ztest_random(100); 3162219089Spjd int err = dmu_objset_create(dsname, DMU_OST_OTHER, 0, 3163219089Spjd ztest_objset_create_cb, NULL); 3164219089Spjd 3165219089Spjd if (err || zilset < 80) 3166219089Spjd return (err); 3167219089Spjd 3168236143Smm if (ztest_opts.zo_verbose >= 6) 3169236143Smm (void) printf("Setting dataset %s to sync always\n", dsname); 3170219089Spjd return (ztest_dsl_prop_set_uint64(dsname, ZFS_PROP_SYNC, 3171219089Spjd ZFS_SYNC_ALWAYS, B_FALSE)); 3172168404Spjd} 3173168404Spjd 3174219089Spjd/* ARGSUSED */ 3175168404Spjdstatic int 3176219089Spjdztest_objset_destroy_cb(const char *name, void *arg) 3177168404Spjd{ 3178168404Spjd objset_t *os; 3179219089Spjd dmu_object_info_t doi; 3180168404Spjd int error; 3181168404Spjd 3182168404Spjd /* 3183168404Spjd * Verify that the dataset contains a directory object. 3184168404Spjd */ 3185219089Spjd VERIFY3U(0, ==, dmu_objset_hold(name, FTAG, &os)); 3186219089Spjd error = dmu_object_info(os, ZTEST_DIROBJ, &doi); 3187168404Spjd if (error != ENOENT) { 3188168404Spjd /* We could have crashed in the middle of destroying it */ 3189240415Smm ASSERT0(error); 3190219089Spjd ASSERT3U(doi.doi_type, ==, DMU_OT_ZAP_OTHER); 3191219089Spjd ASSERT3S(doi.doi_physical_blocks_512, >=, 0); 3192168404Spjd } 3193219089Spjd dmu_objset_rele(os, FTAG); 3194168404Spjd 3195168404Spjd /* 3196168404Spjd * Destroy the dataset. 3197168404Spjd */ 3198219089Spjd VERIFY3U(0, ==, dmu_objset_destroy(name, B_FALSE)); 3199168404Spjd return (0); 3200168404Spjd} 3201168404Spjd 3202219089Spjdstatic boolean_t 3203219089Spjdztest_snapshot_create(char *osname, uint64_t id) 3204168404Spjd{ 3205219089Spjd char snapname[MAXNAMELEN]; 3206219089Spjd int error; 3207168404Spjd 3208219089Spjd (void) snprintf(snapname, MAXNAMELEN, "%s@%llu", osname, 3209219089Spjd (u_longlong_t)id); 3210168404Spjd 3211219089Spjd error = dmu_objset_snapshot(osname, strchr(snapname, '@') + 1, 3212219089Spjd NULL, NULL, B_FALSE, B_FALSE, -1); 3213219089Spjd if (error == ENOSPC) { 3214219089Spjd ztest_record_enospc(FTAG); 3215219089Spjd return (B_FALSE); 3216219089Spjd } 3217219089Spjd if (error != 0 && error != EEXIST) 3218219089Spjd fatal(0, "ztest_snapshot_create(%s) = %d", snapname, error); 3219219089Spjd return (B_TRUE); 3220219089Spjd} 3221168404Spjd 3222219089Spjdstatic boolean_t 3223219089Spjdztest_snapshot_destroy(char *osname, uint64_t id) 3224219089Spjd{ 3225219089Spjd char snapname[MAXNAMELEN]; 3226219089Spjd int error; 3227219089Spjd 3228219089Spjd (void) snprintf(snapname, MAXNAMELEN, "%s@%llu", osname, 3229219089Spjd (u_longlong_t)id); 3230219089Spjd 3231219089Spjd error = dmu_objset_destroy(snapname, B_FALSE); 3232219089Spjd if (error != 0 && error != ENOENT) 3233219089Spjd fatal(0, "ztest_snapshot_destroy(%s) = %d", snapname, error); 3234219089Spjd return (B_TRUE); 3235168404Spjd} 3236168404Spjd 3237219089Spjd/* ARGSUSED */ 3238168404Spjdvoid 3239219089Spjdztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id) 3240168404Spjd{ 3241219089Spjd ztest_ds_t zdtmp; 3242219089Spjd int iters; 3243168404Spjd int error; 3244185029Spjd objset_t *os, *os2; 3245219089Spjd char name[MAXNAMELEN]; 3246168404Spjd zilog_t *zilog; 3247168404Spjd 3248236143Smm (void) rw_rdlock(&ztest_name_lock); 3249168404Spjd 3250219089Spjd (void) snprintf(name, MAXNAMELEN, "%s/temp_%llu", 3251236143Smm ztest_opts.zo_pool, (u_longlong_t)id); 3252168404Spjd 3253168404Spjd /* 3254168404Spjd * If this dataset exists from a previous run, process its replay log 3255168404Spjd * half of the time. If we don't replay it, then dmu_objset_destroy() 3256219089Spjd * (invoked from ztest_objset_destroy_cb()) should just throw it away. 3257168404Spjd */ 3258168404Spjd if (ztest_random(2) == 0 && 3259219089Spjd dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os) == 0) { 3260236143Smm ztest_zd_init(&zdtmp, NULL, os); 3261219089Spjd zil_replay(os, &zdtmp, ztest_replay_vector); 3262219089Spjd ztest_zd_fini(&zdtmp); 3263219089Spjd dmu_objset_disown(os, FTAG); 3264168404Spjd } 3265168404Spjd 3266168404Spjd /* 3267168404Spjd * There may be an old instance of the dataset we're about to 3268168404Spjd * create lying around from a previous run. If so, destroy it 3269168404Spjd * and all of its snapshots. 3270168404Spjd */ 3271219089Spjd (void) dmu_objset_find(name, ztest_objset_destroy_cb, NULL, 3272168404Spjd DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS); 3273168404Spjd 3274168404Spjd /* 3275168404Spjd * Verify that the destroyed dataset is no longer in the namespace. 3276168404Spjd */ 3277219089Spjd VERIFY3U(ENOENT, ==, dmu_objset_hold(name, FTAG, &os)); 3278168404Spjd 3279168404Spjd /* 3280168404Spjd * Verify that we can create a new dataset. 3281168404Spjd */ 3282219089Spjd error = ztest_dataset_create(name); 3283168404Spjd if (error) { 3284168404Spjd if (error == ENOSPC) { 3285219089Spjd ztest_record_enospc(FTAG); 3286236143Smm (void) rw_unlock(&ztest_name_lock); 3287168404Spjd return; 3288168404Spjd } 3289168404Spjd fatal(0, "dmu_objset_create(%s) = %d", name, error); 3290168404Spjd } 3291168404Spjd 3292219089Spjd VERIFY3U(0, ==, 3293219089Spjd dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os)); 3294168404Spjd 3295236143Smm ztest_zd_init(&zdtmp, NULL, os); 3296219089Spjd 3297168404Spjd /* 3298168404Spjd * Open the intent log for it. 3299168404Spjd */ 3300219089Spjd zilog = zil_open(os, ztest_get_data); 3301168404Spjd 3302168404Spjd /* 3303219089Spjd * Put some objects in there, do a little I/O to them, 3304219089Spjd * and randomly take a couple of snapshots along the way. 3305168404Spjd */ 3306219089Spjd iters = ztest_random(5); 3307219089Spjd for (int i = 0; i < iters; i++) { 3308219089Spjd ztest_dmu_object_alloc_free(&zdtmp, id); 3309219089Spjd if (ztest_random(iters) == 0) 3310219089Spjd (void) ztest_snapshot_create(name, i); 3311168404Spjd } 3312168404Spjd 3313168404Spjd /* 3314168404Spjd * Verify that we cannot create an existing dataset. 3315168404Spjd */ 3316219089Spjd VERIFY3U(EEXIST, ==, 3317219089Spjd dmu_objset_create(name, DMU_OST_OTHER, 0, NULL, NULL)); 3318168404Spjd 3319168404Spjd /* 3320219089Spjd * Verify that we can hold an objset that is also owned. 3321168404Spjd */ 3322219089Spjd VERIFY3U(0, ==, dmu_objset_hold(name, FTAG, &os2)); 3323219089Spjd dmu_objset_rele(os2, FTAG); 3324168404Spjd 3325219089Spjd /* 3326219089Spjd * Verify that we cannot own an objset that is already owned. 3327219089Spjd */ 3328219089Spjd VERIFY3U(EBUSY, ==, 3329219089Spjd dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os2)); 3330219089Spjd 3331168404Spjd zil_close(zilog); 3332219089Spjd dmu_objset_disown(os, FTAG); 3333219089Spjd ztest_zd_fini(&zdtmp); 3334168404Spjd 3335236143Smm (void) rw_unlock(&ztest_name_lock); 3336168404Spjd} 3337168404Spjd 3338168404Spjd/* 3339168404Spjd * Verify that dmu_snapshot_{create,destroy,open,close} work as expected. 3340168404Spjd */ 3341168404Spjdvoid 3342219089Spjdztest_dmu_snapshot_create_destroy(ztest_ds_t *zd, uint64_t id) 3343168404Spjd{ 3344236143Smm (void) rw_rdlock(&ztest_name_lock); 3345219089Spjd (void) ztest_snapshot_destroy(zd->zd_name, id); 3346219089Spjd (void) ztest_snapshot_create(zd->zd_name, id); 3347236143Smm (void) rw_unlock(&ztest_name_lock); 3348219089Spjd} 3349219089Spjd 3350219089Spjd/* 3351219089Spjd * Cleanup non-standard snapshots and clones. 3352219089Spjd */ 3353219089Spjdvoid 3354219089Spjdztest_dsl_dataset_cleanup(char *osname, uint64_t id) 3355219089Spjd{ 3356219089Spjd char snap1name[MAXNAMELEN]; 3357219089Spjd char clone1name[MAXNAMELEN]; 3358219089Spjd char snap2name[MAXNAMELEN]; 3359219089Spjd char clone2name[MAXNAMELEN]; 3360219089Spjd char snap3name[MAXNAMELEN]; 3361168404Spjd int error; 3362168404Spjd 3363219089Spjd (void) snprintf(snap1name, MAXNAMELEN, "%s@s1_%llu", osname, id); 3364219089Spjd (void) snprintf(clone1name, MAXNAMELEN, "%s/c1_%llu", osname, id); 3365219089Spjd (void) snprintf(snap2name, MAXNAMELEN, "%s@s2_%llu", clone1name, id); 3366219089Spjd (void) snprintf(clone2name, MAXNAMELEN, "%s/c2_%llu", osname, id); 3367219089Spjd (void) snprintf(snap3name, MAXNAMELEN, "%s@s3_%llu", clone1name, id); 3368168404Spjd 3369219089Spjd error = dmu_objset_destroy(clone2name, B_FALSE); 3370219089Spjd if (error && error != ENOENT) 3371219089Spjd fatal(0, "dmu_objset_destroy(%s) = %d", clone2name, error); 3372219089Spjd error = dmu_objset_destroy(snap3name, B_FALSE); 3373219089Spjd if (error && error != ENOENT) 3374219089Spjd fatal(0, "dmu_objset_destroy(%s) = %d", snap3name, error); 3375219089Spjd error = dmu_objset_destroy(snap2name, B_FALSE); 3376219089Spjd if (error && error != ENOENT) 3377219089Spjd fatal(0, "dmu_objset_destroy(%s) = %d", snap2name, error); 3378219089Spjd error = dmu_objset_destroy(clone1name, B_FALSE); 3379219089Spjd if (error && error != ENOENT) 3380219089Spjd fatal(0, "dmu_objset_destroy(%s) = %d", clone1name, error); 3381219089Spjd error = dmu_objset_destroy(snap1name, B_FALSE); 3382219089Spjd if (error && error != ENOENT) 3383219089Spjd fatal(0, "dmu_objset_destroy(%s) = %d", snap1name, error); 3384168404Spjd} 3385168404Spjd 3386168404Spjd/* 3387207910Smm * Verify dsl_dataset_promote handles EBUSY 3388207910Smm */ 3389207910Smmvoid 3390219089Spjdztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id) 3391207910Smm{ 3392207910Smm objset_t *clone; 3393207910Smm dsl_dataset_t *ds; 3394219089Spjd char snap1name[MAXNAMELEN]; 3395219089Spjd char clone1name[MAXNAMELEN]; 3396219089Spjd char snap2name[MAXNAMELEN]; 3397219089Spjd char clone2name[MAXNAMELEN]; 3398219089Spjd char snap3name[MAXNAMELEN]; 3399219089Spjd char *osname = zd->zd_name; 3400219089Spjd int error; 3401207910Smm 3402236143Smm (void) rw_rdlock(&ztest_name_lock); 3403207910Smm 3404219089Spjd ztest_dsl_dataset_cleanup(osname, id); 3405207910Smm 3406219089Spjd (void) snprintf(snap1name, MAXNAMELEN, "%s@s1_%llu", osname, id); 3407219089Spjd (void) snprintf(clone1name, MAXNAMELEN, "%s/c1_%llu", osname, id); 3408219089Spjd (void) snprintf(snap2name, MAXNAMELEN, "%s@s2_%llu", clone1name, id); 3409219089Spjd (void) snprintf(clone2name, MAXNAMELEN, "%s/c2_%llu", osname, id); 3410219089Spjd (void) snprintf(snap3name, MAXNAMELEN, "%s@s3_%llu", clone1name, id); 3411207910Smm 3412209962Smm error = dmu_objset_snapshot(osname, strchr(snap1name, '@')+1, 3413219089Spjd NULL, NULL, B_FALSE, B_FALSE, -1); 3414209962Smm if (error && error != EEXIST) { 3415209962Smm if (error == ENOSPC) { 3416209962Smm ztest_record_enospc(FTAG); 3417209962Smm goto out; 3418209962Smm } 3419209962Smm fatal(0, "dmu_take_snapshot(%s) = %d", snap1name, error); 3420209962Smm } 3421207910Smm 3422219089Spjd error = dmu_objset_hold(snap1name, FTAG, &clone); 3423207910Smm if (error) 3424207910Smm fatal(0, "dmu_open_snapshot(%s) = %d", snap1name, error); 3425207910Smm 3426219089Spjd error = dmu_objset_clone(clone1name, dmu_objset_ds(clone), 0); 3427219089Spjd dmu_objset_rele(clone, FTAG); 3428209962Smm if (error) { 3429209962Smm if (error == ENOSPC) { 3430209962Smm ztest_record_enospc(FTAG); 3431209962Smm goto out; 3432209962Smm } 3433207910Smm fatal(0, "dmu_objset_create(%s) = %d", clone1name, error); 3434209962Smm } 3435207910Smm 3436207910Smm error = dmu_objset_snapshot(clone1name, strchr(snap2name, '@')+1, 3437219089Spjd NULL, NULL, B_FALSE, B_FALSE, -1); 3438209962Smm if (error && error != EEXIST) { 3439209962Smm if (error == ENOSPC) { 3440209962Smm ztest_record_enospc(FTAG); 3441209962Smm goto out; 3442209962Smm } 3443209962Smm fatal(0, "dmu_open_snapshot(%s) = %d", snap2name, error); 3444209962Smm } 3445207910Smm 3446207910Smm error = dmu_objset_snapshot(clone1name, strchr(snap3name, '@')+1, 3447219089Spjd NULL, NULL, B_FALSE, B_FALSE, -1); 3448209962Smm if (error && error != EEXIST) { 3449209962Smm if (error == ENOSPC) { 3450209962Smm ztest_record_enospc(FTAG); 3451209962Smm goto out; 3452209962Smm } 3453209962Smm fatal(0, "dmu_open_snapshot(%s) = %d", snap3name, error); 3454209962Smm } 3455207910Smm 3456219089Spjd error = dmu_objset_hold(snap3name, FTAG, &clone); 3457207910Smm if (error) 3458207910Smm fatal(0, "dmu_open_snapshot(%s) = %d", snap3name, error); 3459207910Smm 3460219089Spjd error = dmu_objset_clone(clone2name, dmu_objset_ds(clone), 0); 3461219089Spjd dmu_objset_rele(clone, FTAG); 3462209962Smm if (error) { 3463209962Smm if (error == ENOSPC) { 3464219089Spjd ztest_record_enospc(FTAG); 3465209962Smm goto out; 3466209962Smm } 3467207910Smm fatal(0, "dmu_objset_create(%s) = %d", clone2name, error); 3468209962Smm } 3469207910Smm 3470219089Spjd error = dsl_dataset_own(snap2name, B_FALSE, FTAG, &ds); 3471207910Smm if (error) 3472219089Spjd fatal(0, "dsl_dataset_own(%s) = %d", snap2name, error); 3473219089Spjd error = dsl_dataset_promote(clone2name, NULL); 3474207910Smm if (error != EBUSY) 3475207910Smm fatal(0, "dsl_dataset_promote(%s), %d, not EBUSY", clone2name, 3476207910Smm error); 3477207910Smm dsl_dataset_disown(ds, FTAG); 3478207910Smm 3479209962Smmout: 3480219089Spjd ztest_dsl_dataset_cleanup(osname, id); 3481207910Smm 3482236143Smm (void) rw_unlock(&ztest_name_lock); 3483207910Smm} 3484207910Smm 3485207910Smm/* 3486168404Spjd * Verify that dmu_object_{alloc,free} work as expected. 3487168404Spjd */ 3488168404Spjdvoid 3489219089Spjdztest_dmu_object_alloc_free(ztest_ds_t *zd, uint64_t id) 3490168404Spjd{ 3491219089Spjd ztest_od_t od[4]; 3492219089Spjd int batchsize = sizeof (od) / sizeof (od[0]); 3493168404Spjd 3494219089Spjd for (int b = 0; b < batchsize; b++) 3495219089Spjd ztest_od_init(&od[b], id, FTAG, b, DMU_OT_UINT64_OTHER, 0, 0); 3496168404Spjd 3497168404Spjd /* 3498219089Spjd * Destroy the previous batch of objects, create a new batch, 3499219089Spjd * and do some I/O on the new objects. 3500168404Spjd */ 3501219089Spjd if (ztest_object_init(zd, od, sizeof (od), B_TRUE) != 0) 3502219089Spjd return; 3503168404Spjd 3504219089Spjd while (ztest_random(4 * batchsize) != 0) 3505219089Spjd ztest_io(zd, od[ztest_random(batchsize)].od_object, 3506219089Spjd ztest_random(ZTEST_RANGE_LOCKS) << SPA_MAXBLOCKSHIFT); 3507168404Spjd} 3508168404Spjd 3509168404Spjd/* 3510168404Spjd * Verify that dmu_{read,write} work as expected. 3511168404Spjd */ 3512168404Spjdvoid 3513219089Spjdztest_dmu_read_write(ztest_ds_t *zd, uint64_t id) 3514168404Spjd{ 3515219089Spjd objset_t *os = zd->zd_os; 3516219089Spjd ztest_od_t od[2]; 3517168404Spjd dmu_tx_t *tx; 3518168404Spjd int i, freeit, error; 3519168404Spjd uint64_t n, s, txg; 3520168404Spjd bufwad_t *packbuf, *bigbuf, *pack, *bigH, *bigT; 3521219089Spjd uint64_t packobj, packoff, packsize, bigobj, bigoff, bigsize; 3522219089Spjd uint64_t chunksize = (1000 + ztest_random(1000)) * sizeof (uint64_t); 3523168404Spjd uint64_t regions = 997; 3524168404Spjd uint64_t stride = 123456789ULL; 3525168404Spjd uint64_t width = 40; 3526168404Spjd int free_percent = 5; 3527168404Spjd 3528168404Spjd /* 3529168404Spjd * This test uses two objects, packobj and bigobj, that are always 3530168404Spjd * updated together (i.e. in the same tx) so that their contents are 3531168404Spjd * in sync and can be compared. Their contents relate to each other 3532168404Spjd * in a simple way: packobj is a dense array of 'bufwad' structures, 3533168404Spjd * while bigobj is a sparse array of the same bufwads. Specifically, 3534168404Spjd * for any index n, there are three bufwads that should be identical: 3535168404Spjd * 3536168404Spjd * packobj, at offset n * sizeof (bufwad_t) 3537168404Spjd * bigobj, at the head of the nth chunk 3538168404Spjd * bigobj, at the tail of the nth chunk 3539168404Spjd * 3540168404Spjd * The chunk size is arbitrary. It doesn't have to be a power of two, 3541168404Spjd * and it doesn't have any relation to the object blocksize. 3542168404Spjd * The only requirement is that it can hold at least two bufwads. 3543168404Spjd * 3544168404Spjd * Normally, we write the bufwad to each of these locations. 3545168404Spjd * However, free_percent of the time we instead write zeroes to 3546168404Spjd * packobj and perform a dmu_free_range() on bigobj. By comparing 3547168404Spjd * bigobj to packobj, we can verify that the DMU is correctly 3548168404Spjd * tracking which parts of an object are allocated and free, 3549168404Spjd * and that the contents of the allocated blocks are correct. 3550168404Spjd */ 3551168404Spjd 3552168404Spjd /* 3553168404Spjd * Read the directory info. If it's the first time, set things up. 3554168404Spjd */ 3555219089Spjd ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, 0, chunksize); 3556219089Spjd ztest_od_init(&od[1], id, FTAG, 1, DMU_OT_UINT64_OTHER, 0, chunksize); 3557168404Spjd 3558219089Spjd if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0) 3559219089Spjd return; 3560168404Spjd 3561219089Spjd bigobj = od[0].od_object; 3562219089Spjd packobj = od[1].od_object; 3563219089Spjd chunksize = od[0].od_gen; 3564219089Spjd ASSERT(chunksize == od[1].od_gen); 3565168404Spjd 3566168404Spjd /* 3567168404Spjd * Prefetch a random chunk of the big object. 3568168404Spjd * Our aim here is to get some async reads in flight 3569168404Spjd * for blocks that we may free below; the DMU should 3570168404Spjd * handle this race correctly. 3571168404Spjd */ 3572168404Spjd n = ztest_random(regions) * stride + ztest_random(width); 3573168404Spjd s = 1 + ztest_random(2 * width - 1); 3574219089Spjd dmu_prefetch(os, bigobj, n * chunksize, s * chunksize); 3575168404Spjd 3576168404Spjd /* 3577168404Spjd * Pick a random index and compute the offsets into packobj and bigobj. 3578168404Spjd */ 3579168404Spjd n = ztest_random(regions) * stride + ztest_random(width); 3580168404Spjd s = 1 + ztest_random(width - 1); 3581168404Spjd 3582168404Spjd packoff = n * sizeof (bufwad_t); 3583168404Spjd packsize = s * sizeof (bufwad_t); 3584168404Spjd 3585219089Spjd bigoff = n * chunksize; 3586219089Spjd bigsize = s * chunksize; 3587168404Spjd 3588168404Spjd packbuf = umem_alloc(packsize, UMEM_NOFAIL); 3589168404Spjd bigbuf = umem_alloc(bigsize, UMEM_NOFAIL); 3590168404Spjd 3591168404Spjd /* 3592168404Spjd * free_percent of the time, free a range of bigobj rather than 3593168404Spjd * overwriting it. 3594168404Spjd */ 3595168404Spjd freeit = (ztest_random(100) < free_percent); 3596168404Spjd 3597168404Spjd /* 3598168404Spjd * Read the current contents of our objects. 3599168404Spjd */ 3600219089Spjd error = dmu_read(os, packobj, packoff, packsize, packbuf, 3601209962Smm DMU_READ_PREFETCH); 3602240415Smm ASSERT0(error); 3603219089Spjd error = dmu_read(os, bigobj, bigoff, bigsize, bigbuf, 3604209962Smm DMU_READ_PREFETCH); 3605240415Smm ASSERT0(error); 3606168404Spjd 3607168404Spjd /* 3608168404Spjd * Get a tx for the mods to both packobj and bigobj. 3609168404Spjd */ 3610168404Spjd tx = dmu_tx_create(os); 3611168404Spjd 3612219089Spjd dmu_tx_hold_write(tx, packobj, packoff, packsize); 3613168404Spjd 3614168404Spjd if (freeit) 3615219089Spjd dmu_tx_hold_free(tx, bigobj, bigoff, bigsize); 3616168404Spjd else 3617219089Spjd dmu_tx_hold_write(tx, bigobj, bigoff, bigsize); 3618168404Spjd 3619219089Spjd txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG); 3620219089Spjd if (txg == 0) { 3621168404Spjd umem_free(packbuf, packsize); 3622168404Spjd umem_free(bigbuf, bigsize); 3623168404Spjd return; 3624168404Spjd } 3625168404Spjd 3626219089Spjd dmu_object_set_checksum(os, bigobj, 3627219089Spjd (enum zio_checksum)ztest_random_dsl_prop(ZFS_PROP_CHECKSUM), tx); 3628168404Spjd 3629219089Spjd dmu_object_set_compress(os, bigobj, 3630219089Spjd (enum zio_compress)ztest_random_dsl_prop(ZFS_PROP_COMPRESSION), tx); 3631219089Spjd 3632168404Spjd /* 3633168404Spjd * For each index from n to n + s, verify that the existing bufwad 3634168404Spjd * in packobj matches the bufwads at the head and tail of the 3635168404Spjd * corresponding chunk in bigobj. Then update all three bufwads 3636168404Spjd * with the new values we want to write out. 3637168404Spjd */ 3638168404Spjd for (i = 0; i < s; i++) { 3639168404Spjd /* LINTED */ 3640168404Spjd pack = (bufwad_t *)((char *)packbuf + i * sizeof (bufwad_t)); 3641168404Spjd /* LINTED */ 3642219089Spjd bigH = (bufwad_t *)((char *)bigbuf + i * chunksize); 3643168404Spjd /* LINTED */ 3644219089Spjd bigT = (bufwad_t *)((char *)bigH + chunksize) - 1; 3645168404Spjd 3646168404Spjd ASSERT((uintptr_t)bigH - (uintptr_t)bigbuf < bigsize); 3647168404Spjd ASSERT((uintptr_t)bigT - (uintptr_t)bigbuf < bigsize); 3648168404Spjd 3649168404Spjd if (pack->bw_txg > txg) 3650168404Spjd fatal(0, "future leak: got %llx, open txg is %llx", 3651168404Spjd pack->bw_txg, txg); 3652168404Spjd 3653168404Spjd if (pack->bw_data != 0 && pack->bw_index != n + i) 3654168404Spjd fatal(0, "wrong index: got %llx, wanted %llx+%llx", 3655168404Spjd pack->bw_index, n, i); 3656168404Spjd 3657168404Spjd if (bcmp(pack, bigH, sizeof (bufwad_t)) != 0) 3658168404Spjd fatal(0, "pack/bigH mismatch in %p/%p", pack, bigH); 3659168404Spjd 3660168404Spjd if (bcmp(pack, bigT, sizeof (bufwad_t)) != 0) 3661168404Spjd fatal(0, "pack/bigT mismatch in %p/%p", pack, bigT); 3662168404Spjd 3663168404Spjd if (freeit) { 3664168404Spjd bzero(pack, sizeof (bufwad_t)); 3665168404Spjd } else { 3666168404Spjd pack->bw_index = n + i; 3667168404Spjd pack->bw_txg = txg; 3668168404Spjd pack->bw_data = 1 + ztest_random(-2ULL); 3669168404Spjd } 3670168404Spjd *bigH = *pack; 3671168404Spjd *bigT = *pack; 3672168404Spjd } 3673168404Spjd 3674168404Spjd /* 3675168404Spjd * We've verified all the old bufwads, and made new ones. 3676168404Spjd * Now write them out. 3677168404Spjd */ 3678219089Spjd dmu_write(os, packobj, packoff, packsize, packbuf, tx); 3679168404Spjd 3680168404Spjd if (freeit) { 3681236143Smm if (ztest_opts.zo_verbose >= 7) { 3682168404Spjd (void) printf("freeing offset %llx size %llx" 3683168404Spjd " txg %llx\n", 3684168404Spjd (u_longlong_t)bigoff, 3685168404Spjd (u_longlong_t)bigsize, 3686168404Spjd (u_longlong_t)txg); 3687168404Spjd } 3688219089Spjd VERIFY(0 == dmu_free_range(os, bigobj, bigoff, bigsize, tx)); 3689168404Spjd } else { 3690236143Smm if (ztest_opts.zo_verbose >= 7) { 3691168404Spjd (void) printf("writing offset %llx size %llx" 3692168404Spjd " txg %llx\n", 3693168404Spjd (u_longlong_t)bigoff, 3694168404Spjd (u_longlong_t)bigsize, 3695168404Spjd (u_longlong_t)txg); 3696168404Spjd } 3697219089Spjd dmu_write(os, bigobj, bigoff, bigsize, bigbuf, tx); 3698168404Spjd } 3699168404Spjd 3700168404Spjd dmu_tx_commit(tx); 3701168404Spjd 3702168404Spjd /* 3703168404Spjd * Sanity check the stuff we just wrote. 3704168404Spjd */ 3705168404Spjd { 3706168404Spjd void *packcheck = umem_alloc(packsize, UMEM_NOFAIL); 3707168404Spjd void *bigcheck = umem_alloc(bigsize, UMEM_NOFAIL); 3708168404Spjd 3709219089Spjd VERIFY(0 == dmu_read(os, packobj, packoff, 3710209962Smm packsize, packcheck, DMU_READ_PREFETCH)); 3711219089Spjd VERIFY(0 == dmu_read(os, bigobj, bigoff, 3712209962Smm bigsize, bigcheck, DMU_READ_PREFETCH)); 3713168404Spjd 3714168404Spjd ASSERT(bcmp(packbuf, packcheck, packsize) == 0); 3715168404Spjd ASSERT(bcmp(bigbuf, bigcheck, bigsize) == 0); 3716168404Spjd 3717168404Spjd umem_free(packcheck, packsize); 3718168404Spjd umem_free(bigcheck, bigsize); 3719168404Spjd } 3720168404Spjd 3721168404Spjd umem_free(packbuf, packsize); 3722168404Spjd umem_free(bigbuf, bigsize); 3723168404Spjd} 3724168404Spjd 3725168404Spjdvoid 3726209962Smmcompare_and_update_pbbufs(uint64_t s, bufwad_t *packbuf, bufwad_t *bigbuf, 3727219089Spjd uint64_t bigsize, uint64_t n, uint64_t chunksize, uint64_t txg) 3728209962Smm{ 3729209962Smm uint64_t i; 3730209962Smm bufwad_t *pack; 3731209962Smm bufwad_t *bigH; 3732209962Smm bufwad_t *bigT; 3733209962Smm 3734209962Smm /* 3735209962Smm * For each index from n to n + s, verify that the existing bufwad 3736209962Smm * in packobj matches the bufwads at the head and tail of the 3737209962Smm * corresponding chunk in bigobj. Then update all three bufwads 3738209962Smm * with the new values we want to write out. 3739209962Smm */ 3740209962Smm for (i = 0; i < s; i++) { 3741209962Smm /* LINTED */ 3742209962Smm pack = (bufwad_t *)((char *)packbuf + i * sizeof (bufwad_t)); 3743209962Smm /* LINTED */ 3744219089Spjd bigH = (bufwad_t *)((char *)bigbuf + i * chunksize); 3745209962Smm /* LINTED */ 3746219089Spjd bigT = (bufwad_t *)((char *)bigH + chunksize) - 1; 3747209962Smm 3748209962Smm ASSERT((uintptr_t)bigH - (uintptr_t)bigbuf < bigsize); 3749209962Smm ASSERT((uintptr_t)bigT - (uintptr_t)bigbuf < bigsize); 3750209962Smm 3751209962Smm if (pack->bw_txg > txg) 3752209962Smm fatal(0, "future leak: got %llx, open txg is %llx", 3753209962Smm pack->bw_txg, txg); 3754209962Smm 3755209962Smm if (pack->bw_data != 0 && pack->bw_index != n + i) 3756209962Smm fatal(0, "wrong index: got %llx, wanted %llx+%llx", 3757209962Smm pack->bw_index, n, i); 3758209962Smm 3759209962Smm if (bcmp(pack, bigH, sizeof (bufwad_t)) != 0) 3760209962Smm fatal(0, "pack/bigH mismatch in %p/%p", pack, bigH); 3761209962Smm 3762209962Smm if (bcmp(pack, bigT, sizeof (bufwad_t)) != 0) 3763209962Smm fatal(0, "pack/bigT mismatch in %p/%p", pack, bigT); 3764209962Smm 3765209962Smm pack->bw_index = n + i; 3766209962Smm pack->bw_txg = txg; 3767209962Smm pack->bw_data = 1 + ztest_random(-2ULL); 3768209962Smm 3769209962Smm *bigH = *pack; 3770209962Smm *bigT = *pack; 3771209962Smm } 3772209962Smm} 3773209962Smm 3774209962Smmvoid 3775219089Spjdztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id) 3776209962Smm{ 3777219089Spjd objset_t *os = zd->zd_os; 3778219089Spjd ztest_od_t od[2]; 3779209962Smm dmu_tx_t *tx; 3780209962Smm uint64_t i; 3781209962Smm int error; 3782209962Smm uint64_t n, s, txg; 3783209962Smm bufwad_t *packbuf, *bigbuf; 3784219089Spjd uint64_t packobj, packoff, packsize, bigobj, bigoff, bigsize; 3785219089Spjd uint64_t blocksize = ztest_random_blocksize(); 3786219089Spjd uint64_t chunksize = blocksize; 3787209962Smm uint64_t regions = 997; 3788209962Smm uint64_t stride = 123456789ULL; 3789209962Smm uint64_t width = 9; 3790209962Smm dmu_buf_t *bonus_db; 3791209962Smm arc_buf_t **bigbuf_arcbufs; 3792219089Spjd dmu_object_info_t doi; 3793209962Smm 3794209962Smm /* 3795209962Smm * This test uses two objects, packobj and bigobj, that are always 3796209962Smm * updated together (i.e. in the same tx) so that their contents are 3797209962Smm * in sync and can be compared. Their contents relate to each other 3798209962Smm * in a simple way: packobj is a dense array of 'bufwad' structures, 3799209962Smm * while bigobj is a sparse array of the same bufwads. Specifically, 3800209962Smm * for any index n, there are three bufwads that should be identical: 3801209962Smm * 3802209962Smm * packobj, at offset n * sizeof (bufwad_t) 3803209962Smm * bigobj, at the head of the nth chunk 3804209962Smm * bigobj, at the tail of the nth chunk 3805209962Smm * 3806209962Smm * The chunk size is set equal to bigobj block size so that 3807209962Smm * dmu_assign_arcbuf() can be tested for object updates. 3808209962Smm */ 3809209962Smm 3810209962Smm /* 3811209962Smm * Read the directory info. If it's the first time, set things up. 3812209962Smm */ 3813219089Spjd ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize, 0); 3814219089Spjd ztest_od_init(&od[1], id, FTAG, 1, DMU_OT_UINT64_OTHER, 0, chunksize); 3815209962Smm 3816219089Spjd if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0) 3817219089Spjd return; 3818209962Smm 3819219089Spjd bigobj = od[0].od_object; 3820219089Spjd packobj = od[1].od_object; 3821219089Spjd blocksize = od[0].od_blocksize; 3822219089Spjd chunksize = blocksize; 3823219089Spjd ASSERT(chunksize == od[1].od_gen); 3824209962Smm 3825219089Spjd VERIFY(dmu_object_info(os, bigobj, &doi) == 0); 3826219089Spjd VERIFY(ISP2(doi.doi_data_block_size)); 3827219089Spjd VERIFY(chunksize == doi.doi_data_block_size); 3828219089Spjd VERIFY(chunksize >= 2 * sizeof (bufwad_t)); 3829209962Smm 3830209962Smm /* 3831209962Smm * Pick a random index and compute the offsets into packobj and bigobj. 3832209962Smm */ 3833209962Smm n = ztest_random(regions) * stride + ztest_random(width); 3834209962Smm s = 1 + ztest_random(width - 1); 3835209962Smm 3836209962Smm packoff = n * sizeof (bufwad_t); 3837209962Smm packsize = s * sizeof (bufwad_t); 3838209962Smm 3839219089Spjd bigoff = n * chunksize; 3840219089Spjd bigsize = s * chunksize; 3841209962Smm 3842209962Smm packbuf = umem_zalloc(packsize, UMEM_NOFAIL); 3843209962Smm bigbuf = umem_zalloc(bigsize, UMEM_NOFAIL); 3844209962Smm 3845219089Spjd VERIFY3U(0, ==, dmu_bonus_hold(os, bigobj, FTAG, &bonus_db)); 3846209962Smm 3847209962Smm bigbuf_arcbufs = umem_zalloc(2 * s * sizeof (arc_buf_t *), UMEM_NOFAIL); 3848209962Smm 3849209962Smm /* 3850209962Smm * Iteration 0 test zcopy for DB_UNCACHED dbufs. 3851209962Smm * Iteration 1 test zcopy to already referenced dbufs. 3852209962Smm * Iteration 2 test zcopy to dirty dbuf in the same txg. 3853209962Smm * Iteration 3 test zcopy to dbuf dirty in previous txg. 3854209962Smm * Iteration 4 test zcopy when dbuf is no longer dirty. 3855209962Smm * Iteration 5 test zcopy when it can't be done. 3856209962Smm * Iteration 6 one more zcopy write. 3857209962Smm */ 3858209962Smm for (i = 0; i < 7; i++) { 3859209962Smm uint64_t j; 3860209962Smm uint64_t off; 3861209962Smm 3862209962Smm /* 3863209962Smm * In iteration 5 (i == 5) use arcbufs 3864209962Smm * that don't match bigobj blksz to test 3865209962Smm * dmu_assign_arcbuf() when it can't directly 3866209962Smm * assign an arcbuf to a dbuf. 3867209962Smm */ 3868209962Smm for (j = 0; j < s; j++) { 3869209962Smm if (i != 5) { 3870209962Smm bigbuf_arcbufs[j] = 3871219089Spjd dmu_request_arcbuf(bonus_db, chunksize); 3872209962Smm } else { 3873209962Smm bigbuf_arcbufs[2 * j] = 3874219089Spjd dmu_request_arcbuf(bonus_db, chunksize / 2); 3875209962Smm bigbuf_arcbufs[2 * j + 1] = 3876219089Spjd dmu_request_arcbuf(bonus_db, chunksize / 2); 3877209962Smm } 3878209962Smm } 3879209962Smm 3880209962Smm /* 3881209962Smm * Get a tx for the mods to both packobj and bigobj. 3882209962Smm */ 3883209962Smm tx = dmu_tx_create(os); 3884209962Smm 3885219089Spjd dmu_tx_hold_write(tx, packobj, packoff, packsize); 3886219089Spjd dmu_tx_hold_write(tx, bigobj, bigoff, bigsize); 3887209962Smm 3888219089Spjd txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG); 3889219089Spjd if (txg == 0) { 3890209962Smm umem_free(packbuf, packsize); 3891209962Smm umem_free(bigbuf, bigsize); 3892209962Smm for (j = 0; j < s; j++) { 3893209962Smm if (i != 5) { 3894209962Smm dmu_return_arcbuf(bigbuf_arcbufs[j]); 3895209962Smm } else { 3896209962Smm dmu_return_arcbuf( 3897209962Smm bigbuf_arcbufs[2 * j]); 3898209962Smm dmu_return_arcbuf( 3899209962Smm bigbuf_arcbufs[2 * j + 1]); 3900209962Smm } 3901209962Smm } 3902209962Smm umem_free(bigbuf_arcbufs, 2 * s * sizeof (arc_buf_t *)); 3903209962Smm dmu_buf_rele(bonus_db, FTAG); 3904209962Smm return; 3905209962Smm } 3906209962Smm 3907209962Smm /* 3908209962Smm * 50% of the time don't read objects in the 1st iteration to 3909209962Smm * test dmu_assign_arcbuf() for the case when there're no 3910209962Smm * existing dbufs for the specified offsets. 3911209962Smm */ 3912209962Smm if (i != 0 || ztest_random(2) != 0) { 3913219089Spjd error = dmu_read(os, packobj, packoff, 3914209962Smm packsize, packbuf, DMU_READ_PREFETCH); 3915240415Smm ASSERT0(error); 3916219089Spjd error = dmu_read(os, bigobj, bigoff, bigsize, 3917209962Smm bigbuf, DMU_READ_PREFETCH); 3918240415Smm ASSERT0(error); 3919209962Smm } 3920209962Smm compare_and_update_pbbufs(s, packbuf, bigbuf, bigsize, 3921219089Spjd n, chunksize, txg); 3922209962Smm 3923209962Smm /* 3924209962Smm * We've verified all the old bufwads, and made new ones. 3925209962Smm * Now write them out. 3926209962Smm */ 3927219089Spjd dmu_write(os, packobj, packoff, packsize, packbuf, tx); 3928236143Smm if (ztest_opts.zo_verbose >= 7) { 3929209962Smm (void) printf("writing offset %llx size %llx" 3930209962Smm " txg %llx\n", 3931209962Smm (u_longlong_t)bigoff, 3932209962Smm (u_longlong_t)bigsize, 3933209962Smm (u_longlong_t)txg); 3934209962Smm } 3935219089Spjd for (off = bigoff, j = 0; j < s; j++, off += chunksize) { 3936209962Smm dmu_buf_t *dbt; 3937209962Smm if (i != 5) { 3938209962Smm bcopy((caddr_t)bigbuf + (off - bigoff), 3939219089Spjd bigbuf_arcbufs[j]->b_data, chunksize); 3940209962Smm } else { 3941209962Smm bcopy((caddr_t)bigbuf + (off - bigoff), 3942209962Smm bigbuf_arcbufs[2 * j]->b_data, 3943219089Spjd chunksize / 2); 3944209962Smm bcopy((caddr_t)bigbuf + (off - bigoff) + 3945219089Spjd chunksize / 2, 3946209962Smm bigbuf_arcbufs[2 * j + 1]->b_data, 3947219089Spjd chunksize / 2); 3948209962Smm } 3949209962Smm 3950209962Smm if (i == 1) { 3951219089Spjd VERIFY(dmu_buf_hold(os, bigobj, off, 3952219089Spjd FTAG, &dbt, DMU_READ_NO_PREFETCH) == 0); 3953209962Smm } 3954209962Smm if (i != 5) { 3955209962Smm dmu_assign_arcbuf(bonus_db, off, 3956209962Smm bigbuf_arcbufs[j], tx); 3957209962Smm } else { 3958209962Smm dmu_assign_arcbuf(bonus_db, off, 3959209962Smm bigbuf_arcbufs[2 * j], tx); 3960209962Smm dmu_assign_arcbuf(bonus_db, 3961219089Spjd off + chunksize / 2, 3962209962Smm bigbuf_arcbufs[2 * j + 1], tx); 3963209962Smm } 3964209962Smm if (i == 1) { 3965209962Smm dmu_buf_rele(dbt, FTAG); 3966209962Smm } 3967209962Smm } 3968209962Smm dmu_tx_commit(tx); 3969209962Smm 3970209962Smm /* 3971209962Smm * Sanity check the stuff we just wrote. 3972209962Smm */ 3973209962Smm { 3974209962Smm void *packcheck = umem_alloc(packsize, UMEM_NOFAIL); 3975209962Smm void *bigcheck = umem_alloc(bigsize, UMEM_NOFAIL); 3976209962Smm 3977219089Spjd VERIFY(0 == dmu_read(os, packobj, packoff, 3978209962Smm packsize, packcheck, DMU_READ_PREFETCH)); 3979219089Spjd VERIFY(0 == dmu_read(os, bigobj, bigoff, 3980209962Smm bigsize, bigcheck, DMU_READ_PREFETCH)); 3981209962Smm 3982209962Smm ASSERT(bcmp(packbuf, packcheck, packsize) == 0); 3983209962Smm ASSERT(bcmp(bigbuf, bigcheck, bigsize) == 0); 3984209962Smm 3985209962Smm umem_free(packcheck, packsize); 3986209962Smm umem_free(bigcheck, bigsize); 3987209962Smm } 3988209962Smm if (i == 2) { 3989209962Smm txg_wait_open(dmu_objset_pool(os), 0); 3990209962Smm } else if (i == 3) { 3991209962Smm txg_wait_synced(dmu_objset_pool(os), 0); 3992209962Smm } 3993209962Smm } 3994209962Smm 3995209962Smm dmu_buf_rele(bonus_db, FTAG); 3996209962Smm umem_free(packbuf, packsize); 3997209962Smm umem_free(bigbuf, bigsize); 3998209962Smm umem_free(bigbuf_arcbufs, 2 * s * sizeof (arc_buf_t *)); 3999209962Smm} 4000209962Smm 4001219089Spjd/* ARGSUSED */ 4002209962Smmvoid 4003219089Spjdztest_dmu_write_parallel(ztest_ds_t *zd, uint64_t id) 4004168404Spjd{ 4005219089Spjd ztest_od_t od[1]; 4006219089Spjd uint64_t offset = (1ULL << (ztest_random(20) + 43)) + 4007219089Spjd (ztest_random(ZTEST_RANGE_LOCKS) << SPA_MAXBLOCKSHIFT); 4008168404Spjd 4009168404Spjd /* 4010219089Spjd * Have multiple threads write to large offsets in an object 4011219089Spjd * to verify that parallel writes to an object -- even to the 4012219089Spjd * same blocks within the object -- doesn't cause any trouble. 4013168404Spjd */ 4014219089Spjd ztest_od_init(&od[0], ID_PARALLEL, FTAG, 0, DMU_OT_UINT64_OTHER, 0, 0); 4015219089Spjd 4016219089Spjd if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0) 4017219089Spjd return; 4018219089Spjd 4019219089Spjd while (ztest_random(10) != 0) 4020219089Spjd ztest_io(zd, od[0].od_object, offset); 4021168404Spjd} 4022168404Spjd 4023168404Spjdvoid 4024219089Spjdztest_dmu_prealloc(ztest_ds_t *zd, uint64_t id) 4025168404Spjd{ 4026219089Spjd ztest_od_t od[1]; 4027219089Spjd uint64_t offset = (1ULL << (ztest_random(4) + SPA_MAXBLOCKSHIFT)) + 4028219089Spjd (ztest_random(ZTEST_RANGE_LOCKS) << SPA_MAXBLOCKSHIFT); 4029219089Spjd uint64_t count = ztest_random(20) + 1; 4030219089Spjd uint64_t blocksize = ztest_random_blocksize(); 4031219089Spjd void *data; 4032168404Spjd 4033219089Spjd ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize, 0); 4034168404Spjd 4035219089Spjd if (ztest_object_init(zd, od, sizeof (od), !ztest_random(2)) != 0) 4036185029Spjd return; 4037168404Spjd 4038219089Spjd if (ztest_truncate(zd, od[0].od_object, offset, count * blocksize) != 0) 4039185029Spjd return; 4040168404Spjd 4041219089Spjd ztest_prealloc(zd, od[0].od_object, offset, count * blocksize); 4042185029Spjd 4043219089Spjd data = umem_zalloc(blocksize, UMEM_NOFAIL); 4044185029Spjd 4045219089Spjd while (ztest_random(count) != 0) { 4046219089Spjd uint64_t randoff = offset + (ztest_random(count) * blocksize); 4047219089Spjd if (ztest_write(zd, od[0].od_object, randoff, blocksize, 4048219089Spjd data) != 0) 4049219089Spjd break; 4050219089Spjd while (ztest_random(4) != 0) 4051219089Spjd ztest_io(zd, od[0].od_object, randoff); 4052185029Spjd } 4053168404Spjd 4054219089Spjd umem_free(data, blocksize); 4055168404Spjd} 4056168404Spjd 4057168404Spjd/* 4058168404Spjd * Verify that zap_{create,destroy,add,remove,update} work as expected. 4059168404Spjd */ 4060168404Spjd#define ZTEST_ZAP_MIN_INTS 1 4061168404Spjd#define ZTEST_ZAP_MAX_INTS 4 4062168404Spjd#define ZTEST_ZAP_MAX_PROPS 1000 4063168404Spjd 4064168404Spjdvoid 4065219089Spjdztest_zap(ztest_ds_t *zd, uint64_t id) 4066168404Spjd{ 4067219089Spjd objset_t *os = zd->zd_os; 4068219089Spjd ztest_od_t od[1]; 4069168404Spjd uint64_t object; 4070168404Spjd uint64_t txg, last_txg; 4071168404Spjd uint64_t value[ZTEST_ZAP_MAX_INTS]; 4072168404Spjd uint64_t zl_ints, zl_intsize, prop; 4073168404Spjd int i, ints; 4074168404Spjd dmu_tx_t *tx; 4075168404Spjd char propname[100], txgname[100]; 4076168404Spjd int error; 4077168404Spjd char *hc[2] = { "s.acl.h", ".s.open.h.hyLZlg" }; 4078168404Spjd 4079219089Spjd ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_ZAP_OTHER, 0, 0); 4080168404Spjd 4081219089Spjd if (ztest_object_init(zd, od, sizeof (od), !ztest_random(2)) != 0) 4082219089Spjd return; 4083219089Spjd 4084219089Spjd object = od[0].od_object; 4085219089Spjd 4086168404Spjd /* 4087219089Spjd * Generate a known hash collision, and verify that 4088219089Spjd * we can lookup and remove both entries. 4089168404Spjd */ 4090219089Spjd tx = dmu_tx_create(os); 4091219089Spjd dmu_tx_hold_zap(tx, object, B_TRUE, NULL); 4092219089Spjd txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG); 4093219089Spjd if (txg == 0) 4094219089Spjd return; 4095219089Spjd for (i = 0; i < 2; i++) { 4096219089Spjd value[i] = i; 4097219089Spjd VERIFY3U(0, ==, zap_add(os, object, hc[i], sizeof (uint64_t), 4098219089Spjd 1, &value[i], tx)); 4099168404Spjd } 4100219089Spjd for (i = 0; i < 2; i++) { 4101219089Spjd VERIFY3U(EEXIST, ==, zap_add(os, object, hc[i], 4102219089Spjd sizeof (uint64_t), 1, &value[i], tx)); 4103219089Spjd VERIFY3U(0, ==, 4104219089Spjd zap_length(os, object, hc[i], &zl_intsize, &zl_ints)); 4105219089Spjd ASSERT3U(zl_intsize, ==, sizeof (uint64_t)); 4106219089Spjd ASSERT3U(zl_ints, ==, 1); 4107219089Spjd } 4108219089Spjd for (i = 0; i < 2; i++) { 4109219089Spjd VERIFY3U(0, ==, zap_remove(os, object, hc[i], tx)); 4110219089Spjd } 4111219089Spjd dmu_tx_commit(tx); 4112168404Spjd 4113219089Spjd /* 4114219089Spjd * Generate a buch of random entries. 4115219089Spjd */ 4116168404Spjd ints = MAX(ZTEST_ZAP_MIN_INTS, object % ZTEST_ZAP_MAX_INTS); 4117168404Spjd 4118185029Spjd prop = ztest_random(ZTEST_ZAP_MAX_PROPS); 4119185029Spjd (void) sprintf(propname, "prop_%llu", (u_longlong_t)prop); 4120185029Spjd (void) sprintf(txgname, "txg_%llu", (u_longlong_t)prop); 4121185029Spjd bzero(value, sizeof (value)); 4122185029Spjd last_txg = 0; 4123168404Spjd 4124185029Spjd /* 4125185029Spjd * If these zap entries already exist, validate their contents. 4126185029Spjd */ 4127185029Spjd error = zap_length(os, object, txgname, &zl_intsize, &zl_ints); 4128185029Spjd if (error == 0) { 4129185029Spjd ASSERT3U(zl_intsize, ==, sizeof (uint64_t)); 4130185029Spjd ASSERT3U(zl_ints, ==, 1); 4131168404Spjd 4132185029Spjd VERIFY(zap_lookup(os, object, txgname, zl_intsize, 4133185029Spjd zl_ints, &last_txg) == 0); 4134168404Spjd 4135185029Spjd VERIFY(zap_length(os, object, propname, &zl_intsize, 4136185029Spjd &zl_ints) == 0); 4137168404Spjd 4138185029Spjd ASSERT3U(zl_intsize, ==, sizeof (uint64_t)); 4139185029Spjd ASSERT3U(zl_ints, ==, ints); 4140168404Spjd 4141185029Spjd VERIFY(zap_lookup(os, object, propname, zl_intsize, 4142185029Spjd zl_ints, value) == 0); 4143168404Spjd 4144185029Spjd for (i = 0; i < ints; i++) { 4145185029Spjd ASSERT3U(value[i], ==, last_txg + object + i); 4146168404Spjd } 4147185029Spjd } else { 4148185029Spjd ASSERT3U(error, ==, ENOENT); 4149185029Spjd } 4150168404Spjd 4151185029Spjd /* 4152185029Spjd * Atomically update two entries in our zap object. 4153185029Spjd * The first is named txg_%llu, and contains the txg 4154185029Spjd * in which the property was last updated. The second 4155185029Spjd * is named prop_%llu, and the nth element of its value 4156185029Spjd * should be txg + object + n. 4157185029Spjd */ 4158185029Spjd tx = dmu_tx_create(os); 4159219089Spjd dmu_tx_hold_zap(tx, object, B_TRUE, NULL); 4160219089Spjd txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG); 4161219089Spjd if (txg == 0) 4162185029Spjd return; 4163168404Spjd 4164185029Spjd if (last_txg > txg) 4165185029Spjd fatal(0, "zap future leak: old %llu new %llu", last_txg, txg); 4166168404Spjd 4167185029Spjd for (i = 0; i < ints; i++) 4168185029Spjd value[i] = txg + object + i; 4169168404Spjd 4170219089Spjd VERIFY3U(0, ==, zap_update(os, object, txgname, sizeof (uint64_t), 4171219089Spjd 1, &txg, tx)); 4172219089Spjd VERIFY3U(0, ==, zap_update(os, object, propname, sizeof (uint64_t), 4173219089Spjd ints, value, tx)); 4174168404Spjd 4175185029Spjd dmu_tx_commit(tx); 4176168404Spjd 4177185029Spjd /* 4178185029Spjd * Remove a random pair of entries. 4179185029Spjd */ 4180185029Spjd prop = ztest_random(ZTEST_ZAP_MAX_PROPS); 4181185029Spjd (void) sprintf(propname, "prop_%llu", (u_longlong_t)prop); 4182185029Spjd (void) sprintf(txgname, "txg_%llu", (u_longlong_t)prop); 4183168404Spjd 4184185029Spjd error = zap_length(os, object, txgname, &zl_intsize, &zl_ints); 4185168404Spjd 4186185029Spjd if (error == ENOENT) 4187185029Spjd return; 4188168404Spjd 4189240415Smm ASSERT0(error); 4190168404Spjd 4191185029Spjd tx = dmu_tx_create(os); 4192219089Spjd dmu_tx_hold_zap(tx, object, B_TRUE, NULL); 4193219089Spjd txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG); 4194219089Spjd if (txg == 0) 4195185029Spjd return; 4196219089Spjd VERIFY3U(0, ==, zap_remove(os, object, txgname, tx)); 4197219089Spjd VERIFY3U(0, ==, zap_remove(os, object, propname, tx)); 4198185029Spjd dmu_tx_commit(tx); 4199168404Spjd} 4200168404Spjd 4201209962Smm/* 4202209962Smm * Testcase to test the upgrading of a microzap to fatzap. 4203209962Smm */ 4204168404Spjdvoid 4205219089Spjdztest_fzap(ztest_ds_t *zd, uint64_t id) 4206209962Smm{ 4207219089Spjd objset_t *os = zd->zd_os; 4208219089Spjd ztest_od_t od[1]; 4209219089Spjd uint64_t object, txg; 4210209962Smm 4211219089Spjd ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_ZAP_OTHER, 0, 0); 4212209962Smm 4213219089Spjd if (ztest_object_init(zd, od, sizeof (od), !ztest_random(2)) != 0) 4214219089Spjd return; 4215209962Smm 4216219089Spjd object = od[0].od_object; 4217209962Smm 4218209962Smm /* 4219219089Spjd * Add entries to this ZAP and make sure it spills over 4220209962Smm * and gets upgraded to a fatzap. Also, since we are adding 4221219089Spjd * 2050 entries we should see ptrtbl growth and leaf-block split. 4222209962Smm */ 4223219089Spjd for (int i = 0; i < 2050; i++) { 4224219089Spjd char name[MAXNAMELEN]; 4225219089Spjd uint64_t value = i; 4226219089Spjd dmu_tx_t *tx; 4227219089Spjd int error; 4228209962Smm 4229219089Spjd (void) snprintf(name, sizeof (name), "fzap-%llu-%llu", 4230219089Spjd id, value); 4231219089Spjd 4232209962Smm tx = dmu_tx_create(os); 4233219089Spjd dmu_tx_hold_zap(tx, object, B_TRUE, name); 4234219089Spjd txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG); 4235219089Spjd if (txg == 0) 4236209962Smm return; 4237219089Spjd error = zap_add(os, object, name, sizeof (uint64_t), 1, 4238219089Spjd &value, tx); 4239209962Smm ASSERT(error == 0 || error == EEXIST); 4240209962Smm dmu_tx_commit(tx); 4241209962Smm } 4242209962Smm} 4243209962Smm 4244219089Spjd/* ARGSUSED */ 4245209962Smmvoid 4246219089Spjdztest_zap_parallel(ztest_ds_t *zd, uint64_t id) 4247168404Spjd{ 4248219089Spjd objset_t *os = zd->zd_os; 4249219089Spjd ztest_od_t od[1]; 4250168404Spjd uint64_t txg, object, count, wsize, wc, zl_wsize, zl_wc; 4251168404Spjd dmu_tx_t *tx; 4252168404Spjd int i, namelen, error; 4253219089Spjd int micro = ztest_random(2); 4254168404Spjd char name[20], string_value[20]; 4255168404Spjd void *data; 4256168404Spjd 4257219089Spjd ztest_od_init(&od[0], ID_PARALLEL, FTAG, micro, DMU_OT_ZAP_OTHER, 0, 0); 4258219089Spjd 4259219089Spjd if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0) 4260219089Spjd return; 4261219089Spjd 4262219089Spjd object = od[0].od_object; 4263219089Spjd 4264185029Spjd /* 4265185029Spjd * Generate a random name of the form 'xxx.....' where each 4266185029Spjd * x is a random printable character and the dots are dots. 4267185029Spjd * There are 94 such characters, and the name length goes from 4268185029Spjd * 6 to 20, so there are 94^3 * 15 = 12,458,760 possible names. 4269185029Spjd */ 4270185029Spjd namelen = ztest_random(sizeof (name) - 5) + 5 + 1; 4271168404Spjd 4272185029Spjd for (i = 0; i < 3; i++) 4273185029Spjd name[i] = '!' + ztest_random('~' - '!' + 1); 4274185029Spjd for (; i < namelen - 1; i++) 4275185029Spjd name[i] = '.'; 4276185029Spjd name[i] = '\0'; 4277168404Spjd 4278219089Spjd if ((namelen & 1) || micro) { 4279185029Spjd wsize = sizeof (txg); 4280185029Spjd wc = 1; 4281185029Spjd data = &txg; 4282185029Spjd } else { 4283185029Spjd wsize = 1; 4284185029Spjd wc = namelen; 4285185029Spjd data = string_value; 4286185029Spjd } 4287168404Spjd 4288185029Spjd count = -1ULL; 4289185029Spjd VERIFY(zap_count(os, object, &count) == 0); 4290185029Spjd ASSERT(count != -1ULL); 4291168404Spjd 4292185029Spjd /* 4293185029Spjd * Select an operation: length, lookup, add, update, remove. 4294185029Spjd */ 4295185029Spjd i = ztest_random(5); 4296168404Spjd 4297185029Spjd if (i >= 2) { 4298185029Spjd tx = dmu_tx_create(os); 4299219089Spjd dmu_tx_hold_zap(tx, object, B_TRUE, NULL); 4300219089Spjd txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG); 4301219089Spjd if (txg == 0) 4302185029Spjd return; 4303185029Spjd bcopy(name, string_value, namelen); 4304185029Spjd } else { 4305185029Spjd tx = NULL; 4306185029Spjd txg = 0; 4307185029Spjd bzero(string_value, namelen); 4308185029Spjd } 4309168404Spjd 4310185029Spjd switch (i) { 4311168404Spjd 4312185029Spjd case 0: 4313185029Spjd error = zap_length(os, object, name, &zl_wsize, &zl_wc); 4314185029Spjd if (error == 0) { 4315185029Spjd ASSERT3U(wsize, ==, zl_wsize); 4316185029Spjd ASSERT3U(wc, ==, zl_wc); 4317185029Spjd } else { 4318185029Spjd ASSERT3U(error, ==, ENOENT); 4319185029Spjd } 4320185029Spjd break; 4321168404Spjd 4322185029Spjd case 1: 4323185029Spjd error = zap_lookup(os, object, name, wsize, wc, data); 4324185029Spjd if (error == 0) { 4325185029Spjd if (data == string_value && 4326185029Spjd bcmp(name, data, namelen) != 0) 4327185029Spjd fatal(0, "name '%s' != val '%s' len %d", 4328185029Spjd name, data, namelen); 4329185029Spjd } else { 4330185029Spjd ASSERT3U(error, ==, ENOENT); 4331185029Spjd } 4332185029Spjd break; 4333168404Spjd 4334185029Spjd case 2: 4335185029Spjd error = zap_add(os, object, name, wsize, wc, data, tx); 4336185029Spjd ASSERT(error == 0 || error == EEXIST); 4337185029Spjd break; 4338168404Spjd 4339185029Spjd case 3: 4340185029Spjd VERIFY(zap_update(os, object, name, wsize, wc, data, tx) == 0); 4341185029Spjd break; 4342168404Spjd 4343185029Spjd case 4: 4344185029Spjd error = zap_remove(os, object, name, tx); 4345185029Spjd ASSERT(error == 0 || error == ENOENT); 4346185029Spjd break; 4347185029Spjd } 4348168404Spjd 4349185029Spjd if (tx != NULL) 4350185029Spjd dmu_tx_commit(tx); 4351168404Spjd} 4352168404Spjd 4353219089Spjd/* 4354219089Spjd * Commit callback data. 4355219089Spjd */ 4356219089Spjdtypedef struct ztest_cb_data { 4357219089Spjd list_node_t zcd_node; 4358219089Spjd uint64_t zcd_txg; 4359219089Spjd int zcd_expected_err; 4360219089Spjd boolean_t zcd_added; 4361219089Spjd boolean_t zcd_called; 4362219089Spjd spa_t *zcd_spa; 4363219089Spjd} ztest_cb_data_t; 4364219089Spjd 4365219089Spjd/* This is the actual commit callback function */ 4366219089Spjdstatic void 4367219089Spjdztest_commit_callback(void *arg, int error) 4368219089Spjd{ 4369219089Spjd ztest_cb_data_t *data = arg; 4370219089Spjd uint64_t synced_txg; 4371219089Spjd 4372219089Spjd VERIFY(data != NULL); 4373219089Spjd VERIFY3S(data->zcd_expected_err, ==, error); 4374219089Spjd VERIFY(!data->zcd_called); 4375219089Spjd 4376219089Spjd synced_txg = spa_last_synced_txg(data->zcd_spa); 4377219089Spjd if (data->zcd_txg > synced_txg) 4378219089Spjd fatal(0, "commit callback of txg %" PRIu64 " called prematurely" 4379219089Spjd ", last synced txg = %" PRIu64 "\n", data->zcd_txg, 4380219089Spjd synced_txg); 4381219089Spjd 4382219089Spjd data->zcd_called = B_TRUE; 4383219089Spjd 4384219089Spjd if (error == ECANCELED) { 4385240415Smm ASSERT0(data->zcd_txg); 4386219089Spjd ASSERT(!data->zcd_added); 4387219089Spjd 4388219089Spjd /* 4389219089Spjd * The private callback data should be destroyed here, but 4390219089Spjd * since we are going to check the zcd_called field after 4391219089Spjd * dmu_tx_abort(), we will destroy it there. 4392219089Spjd */ 4393219089Spjd return; 4394219089Spjd } 4395219089Spjd 4396219089Spjd /* Was this callback added to the global callback list? */ 4397219089Spjd if (!data->zcd_added) 4398219089Spjd goto out; 4399219089Spjd 4400219089Spjd ASSERT3U(data->zcd_txg, !=, 0); 4401219089Spjd 4402219089Spjd /* Remove our callback from the list */ 4403219089Spjd (void) mutex_lock(&zcl.zcl_callbacks_lock); 4404219089Spjd list_remove(&zcl.zcl_callbacks, data); 4405219089Spjd (void) mutex_unlock(&zcl.zcl_callbacks_lock); 4406219089Spjd 4407219089Spjdout: 4408219089Spjd umem_free(data, sizeof (ztest_cb_data_t)); 4409219089Spjd} 4410219089Spjd 4411219089Spjd/* Allocate and initialize callback data structure */ 4412219089Spjdstatic ztest_cb_data_t * 4413219089Spjdztest_create_cb_data(objset_t *os, uint64_t txg) 4414219089Spjd{ 4415219089Spjd ztest_cb_data_t *cb_data; 4416219089Spjd 4417219089Spjd cb_data = umem_zalloc(sizeof (ztest_cb_data_t), UMEM_NOFAIL); 4418219089Spjd 4419219089Spjd cb_data->zcd_txg = txg; 4420219089Spjd cb_data->zcd_spa = dmu_objset_spa(os); 4421219089Spjd 4422219089Spjd return (cb_data); 4423219089Spjd} 4424219089Spjd 4425219089Spjd/* 4426219089Spjd * If a number of txgs equal to this threshold have been created after a commit 4427219089Spjd * callback has been registered but not called, then we assume there is an 4428219089Spjd * implementation bug. 4429219089Spjd */ 4430219089Spjd#define ZTEST_COMMIT_CALLBACK_THRESH (TXG_CONCURRENT_STATES + 2) 4431219089Spjd 4432219089Spjd/* 4433219089Spjd * Commit callback test. 4434219089Spjd */ 4435168404Spjdvoid 4436219089Spjdztest_dmu_commit_callbacks(ztest_ds_t *zd, uint64_t id) 4437168404Spjd{ 4438219089Spjd objset_t *os = zd->zd_os; 4439219089Spjd ztest_od_t od[1]; 4440219089Spjd dmu_tx_t *tx; 4441219089Spjd ztest_cb_data_t *cb_data[3], *tmp_cb; 4442219089Spjd uint64_t old_txg, txg; 4443219089Spjd int i, error; 4444219089Spjd 4445219089Spjd ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, 0, 0); 4446219089Spjd 4447219089Spjd if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0) 4448219089Spjd return; 4449219089Spjd 4450219089Spjd tx = dmu_tx_create(os); 4451219089Spjd 4452219089Spjd cb_data[0] = ztest_create_cb_data(os, 0); 4453219089Spjd dmu_tx_callback_register(tx, ztest_commit_callback, cb_data[0]); 4454219089Spjd 4455219089Spjd dmu_tx_hold_write(tx, od[0].od_object, 0, sizeof (uint64_t)); 4456219089Spjd 4457219089Spjd /* Every once in a while, abort the transaction on purpose */ 4458219089Spjd if (ztest_random(100) == 0) 4459219089Spjd error = -1; 4460219089Spjd 4461219089Spjd if (!error) 4462219089Spjd error = dmu_tx_assign(tx, TXG_NOWAIT); 4463219089Spjd 4464219089Spjd txg = error ? 0 : dmu_tx_get_txg(tx); 4465219089Spjd 4466219089Spjd cb_data[0]->zcd_txg = txg; 4467219089Spjd cb_data[1] = ztest_create_cb_data(os, txg); 4468219089Spjd dmu_tx_callback_register(tx, ztest_commit_callback, cb_data[1]); 4469219089Spjd 4470219089Spjd if (error) { 4471219089Spjd /* 4472219089Spjd * It's not a strict requirement to call the registered 4473219089Spjd * callbacks from inside dmu_tx_abort(), but that's what 4474219089Spjd * it's supposed to happen in the current implementation 4475219089Spjd * so we will check for that. 4476219089Spjd */ 4477219089Spjd for (i = 0; i < 2; i++) { 4478219089Spjd cb_data[i]->zcd_expected_err = ECANCELED; 4479219089Spjd VERIFY(!cb_data[i]->zcd_called); 4480219089Spjd } 4481219089Spjd 4482219089Spjd dmu_tx_abort(tx); 4483219089Spjd 4484219089Spjd for (i = 0; i < 2; i++) { 4485219089Spjd VERIFY(cb_data[i]->zcd_called); 4486219089Spjd umem_free(cb_data[i], sizeof (ztest_cb_data_t)); 4487219089Spjd } 4488219089Spjd 4489219089Spjd return; 4490219089Spjd } 4491219089Spjd 4492219089Spjd cb_data[2] = ztest_create_cb_data(os, txg); 4493219089Spjd dmu_tx_callback_register(tx, ztest_commit_callback, cb_data[2]); 4494219089Spjd 4495219089Spjd /* 4496219089Spjd * Read existing data to make sure there isn't a future leak. 4497219089Spjd */ 4498219089Spjd VERIFY(0 == dmu_read(os, od[0].od_object, 0, sizeof (uint64_t), 4499219089Spjd &old_txg, DMU_READ_PREFETCH)); 4500219089Spjd 4501219089Spjd if (old_txg > txg) 4502219089Spjd fatal(0, "future leak: got %" PRIu64 ", open txg is %" PRIu64, 4503219089Spjd old_txg, txg); 4504219089Spjd 4505219089Spjd dmu_write(os, od[0].od_object, 0, sizeof (uint64_t), &txg, tx); 4506219089Spjd 4507219089Spjd (void) mutex_lock(&zcl.zcl_callbacks_lock); 4508219089Spjd 4509219089Spjd /* 4510219089Spjd * Since commit callbacks don't have any ordering requirement and since 4511219089Spjd * it is theoretically possible for a commit callback to be called 4512219089Spjd * after an arbitrary amount of time has elapsed since its txg has been 4513219089Spjd * synced, it is difficult to reliably determine whether a commit 4514219089Spjd * callback hasn't been called due to high load or due to a flawed 4515219089Spjd * implementation. 4516219089Spjd * 4517219089Spjd * In practice, we will assume that if after a certain number of txgs a 4518219089Spjd * commit callback hasn't been called, then most likely there's an 4519219089Spjd * implementation bug.. 4520219089Spjd */ 4521219089Spjd tmp_cb = list_head(&zcl.zcl_callbacks); 4522219089Spjd if (tmp_cb != NULL && 4523219089Spjd tmp_cb->zcd_txg > txg - ZTEST_COMMIT_CALLBACK_THRESH) { 4524219089Spjd fatal(0, "Commit callback threshold exceeded, oldest txg: %" 4525219089Spjd PRIu64 ", open txg: %" PRIu64 "\n", tmp_cb->zcd_txg, txg); 4526219089Spjd } 4527219089Spjd 4528219089Spjd /* 4529219089Spjd * Let's find the place to insert our callbacks. 4530219089Spjd * 4531219089Spjd * Even though the list is ordered by txg, it is possible for the 4532219089Spjd * insertion point to not be the end because our txg may already be 4533219089Spjd * quiescing at this point and other callbacks in the open txg 4534219089Spjd * (from other objsets) may have sneaked in. 4535219089Spjd */ 4536219089Spjd tmp_cb = list_tail(&zcl.zcl_callbacks); 4537219089Spjd while (tmp_cb != NULL && tmp_cb->zcd_txg > txg) 4538219089Spjd tmp_cb = list_prev(&zcl.zcl_callbacks, tmp_cb); 4539219089Spjd 4540219089Spjd /* Add the 3 callbacks to the list */ 4541219089Spjd for (i = 0; i < 3; i++) { 4542219089Spjd if (tmp_cb == NULL) 4543219089Spjd list_insert_head(&zcl.zcl_callbacks, cb_data[i]); 4544219089Spjd else 4545219089Spjd list_insert_after(&zcl.zcl_callbacks, tmp_cb, 4546219089Spjd cb_data[i]); 4547219089Spjd 4548219089Spjd cb_data[i]->zcd_added = B_TRUE; 4549219089Spjd VERIFY(!cb_data[i]->zcd_called); 4550219089Spjd 4551219089Spjd tmp_cb = cb_data[i]; 4552219089Spjd } 4553219089Spjd 4554219089Spjd (void) mutex_unlock(&zcl.zcl_callbacks_lock); 4555219089Spjd 4556219089Spjd dmu_tx_commit(tx); 4557219089Spjd} 4558219089Spjd 4559219089Spjd/* ARGSUSED */ 4560219089Spjdvoid 4561219089Spjdztest_dsl_prop_get_set(ztest_ds_t *zd, uint64_t id) 4562219089Spjd{ 4563219089Spjd zfs_prop_t proplist[] = { 4564219089Spjd ZFS_PROP_CHECKSUM, 4565219089Spjd ZFS_PROP_COMPRESSION, 4566219089Spjd ZFS_PROP_COPIES, 4567219089Spjd ZFS_PROP_DEDUP 4568219089Spjd }; 4569219089Spjd 4570236143Smm (void) rw_rdlock(&ztest_name_lock); 4571219089Spjd 4572219089Spjd for (int p = 0; p < sizeof (proplist) / sizeof (proplist[0]); p++) 4573219089Spjd (void) ztest_dsl_prop_set_uint64(zd->zd_name, proplist[p], 4574219089Spjd ztest_random_dsl_prop(proplist[p]), (int)ztest_random(2)); 4575219089Spjd 4576236143Smm (void) rw_unlock(&ztest_name_lock); 4577219089Spjd} 4578219089Spjd 4579219089Spjd/* ARGSUSED */ 4580219089Spjdvoid 4581219089Spjdztest_spa_prop_get_set(ztest_ds_t *zd, uint64_t id) 4582219089Spjd{ 4583219089Spjd nvlist_t *props = NULL; 4584219089Spjd 4585236143Smm (void) rw_rdlock(&ztest_name_lock); 4586219089Spjd 4587236143Smm (void) ztest_spa_prop_set_uint64(ZPOOL_PROP_DEDUPDITTO, 4588219089Spjd ZIO_DEDUPDITTO_MIN + ztest_random(ZIO_DEDUPDITTO_MIN)); 4589219089Spjd 4590240415Smm VERIFY0(spa_prop_get(ztest_spa, &props)); 4591219089Spjd 4592236143Smm if (ztest_opts.zo_verbose >= 6) 4593219089Spjd dump_nvlist(props, 4); 4594219089Spjd 4595219089Spjd nvlist_free(props); 4596219089Spjd 4597236143Smm (void) rw_unlock(&ztest_name_lock); 4598219089Spjd} 4599219089Spjd 4600219089Spjd/* 4601219089Spjd * Test snapshot hold/release and deferred destroy. 4602219089Spjd */ 4603219089Spjdvoid 4604219089Spjdztest_dmu_snapshot_hold(ztest_ds_t *zd, uint64_t id) 4605219089Spjd{ 4606219089Spjd int error; 4607219089Spjd objset_t *os = zd->zd_os; 4608219089Spjd objset_t *origin; 4609219089Spjd char snapname[100]; 4610219089Spjd char fullname[100]; 4611219089Spjd char clonename[100]; 4612219089Spjd char tag[100]; 4613168404Spjd char osname[MAXNAMELEN]; 4614168404Spjd 4615236143Smm (void) rw_rdlock(&ztest_name_lock); 4616168404Spjd 4617168404Spjd dmu_objset_name(os, osname); 4618168404Spjd 4619219089Spjd (void) snprintf(snapname, 100, "sh1_%llu", id); 4620219089Spjd (void) snprintf(fullname, 100, "%s@%s", osname, snapname); 4621219089Spjd (void) snprintf(clonename, 100, "%s/ch1_%llu", osname, id); 4622219089Spjd (void) snprintf(tag, 100, "%tag_%llu", id); 4623219089Spjd 4624219089Spjd /* 4625219089Spjd * Clean up from any previous run. 4626219089Spjd */ 4627219089Spjd (void) dmu_objset_destroy(clonename, B_FALSE); 4628219089Spjd (void) dsl_dataset_user_release(osname, snapname, tag, B_FALSE); 4629219089Spjd (void) dmu_objset_destroy(fullname, B_FALSE); 4630219089Spjd 4631219089Spjd /* 4632219089Spjd * Create snapshot, clone it, mark snap for deferred destroy, 4633219089Spjd * destroy clone, verify snap was also destroyed. 4634219089Spjd */ 4635219089Spjd error = dmu_objset_snapshot(osname, snapname, NULL, NULL, FALSE, 4636219089Spjd FALSE, -1); 4637219089Spjd if (error) { 4638219089Spjd if (error == ENOSPC) { 4639219089Spjd ztest_record_enospc("dmu_objset_snapshot"); 4640219089Spjd goto out; 4641168404Spjd } 4642219089Spjd fatal(0, "dmu_objset_snapshot(%s) = %d", fullname, error); 4643219089Spjd } 4644168404Spjd 4645219089Spjd error = dmu_objset_hold(fullname, FTAG, &origin); 4646219089Spjd if (error) 4647219089Spjd fatal(0, "dmu_objset_hold(%s) = %d", fullname, error); 4648168404Spjd 4649219089Spjd error = dmu_objset_clone(clonename, dmu_objset_ds(origin), 0); 4650219089Spjd dmu_objset_rele(origin, FTAG); 4651219089Spjd if (error) { 4652168404Spjd if (error == ENOSPC) { 4653219089Spjd ztest_record_enospc("dmu_objset_clone"); 4654219089Spjd goto out; 4655168404Spjd } 4656219089Spjd fatal(0, "dmu_objset_clone(%s) = %d", clonename, error); 4657219089Spjd } 4658168404Spjd 4659219089Spjd error = dmu_objset_destroy(fullname, B_TRUE); 4660219089Spjd if (error) { 4661219089Spjd fatal(0, "dmu_objset_destroy(%s, B_TRUE) = %d", 4662219089Spjd fullname, error); 4663219089Spjd } 4664168404Spjd 4665219089Spjd error = dmu_objset_destroy(clonename, B_FALSE); 4666219089Spjd if (error) 4667219089Spjd fatal(0, "dmu_objset_destroy(%s) = %d", clonename, error); 4668168404Spjd 4669219089Spjd error = dmu_objset_hold(fullname, FTAG, &origin); 4670219089Spjd if (error != ENOENT) 4671219089Spjd fatal(0, "dmu_objset_hold(%s) = %d", fullname, error); 4672168404Spjd 4673219089Spjd /* 4674219089Spjd * Create snapshot, add temporary hold, verify that we can't 4675219089Spjd * destroy a held snapshot, mark for deferred destroy, 4676219089Spjd * release hold, verify snapshot was destroyed. 4677219089Spjd */ 4678219089Spjd error = dmu_objset_snapshot(osname, snapname, NULL, NULL, FALSE, 4679219089Spjd FALSE, -1); 4680219089Spjd if (error) { 4681219089Spjd if (error == ENOSPC) { 4682219089Spjd ztest_record_enospc("dmu_objset_snapshot"); 4683219089Spjd goto out; 4684168404Spjd } 4685219089Spjd fatal(0, "dmu_objset_snapshot(%s) = %d", fullname, error); 4686168404Spjd } 4687168404Spjd 4688219089Spjd error = dsl_dataset_user_hold(osname, snapname, tag, B_FALSE, 4689219089Spjd B_TRUE, -1); 4690219089Spjd if (error) 4691219089Spjd fatal(0, "dsl_dataset_user_hold(%s)", fullname, tag); 4692219089Spjd 4693219089Spjd error = dmu_objset_destroy(fullname, B_FALSE); 4694219089Spjd if (error != EBUSY) { 4695219089Spjd fatal(0, "dmu_objset_destroy(%s, B_FALSE) = %d", 4696219089Spjd fullname, error); 4697219089Spjd } 4698219089Spjd 4699219089Spjd error = dmu_objset_destroy(fullname, B_TRUE); 4700219089Spjd if (error) { 4701219089Spjd fatal(0, "dmu_objset_destroy(%s, B_TRUE) = %d", 4702219089Spjd fullname, error); 4703219089Spjd } 4704219089Spjd 4705219089Spjd error = dsl_dataset_user_release(osname, snapname, tag, B_FALSE); 4706219089Spjd if (error) 4707219089Spjd fatal(0, "dsl_dataset_user_release(%s)", fullname, tag); 4708219089Spjd 4709219089Spjd VERIFY(dmu_objset_hold(fullname, FTAG, &origin) == ENOENT); 4710219089Spjd 4711219089Spjdout: 4712236143Smm (void) rw_unlock(&ztest_name_lock); 4713168404Spjd} 4714168404Spjd 4715168404Spjd/* 4716168404Spjd * Inject random faults into the on-disk data. 4717168404Spjd */ 4718219089Spjd/* ARGSUSED */ 4719168404Spjdvoid 4720219089Spjdztest_fault_inject(ztest_ds_t *zd, uint64_t id) 4721168404Spjd{ 4722219089Spjd ztest_shared_t *zs = ztest_shared; 4723236143Smm spa_t *spa = ztest_spa; 4724168404Spjd int fd; 4725168404Spjd uint64_t offset; 4726219089Spjd uint64_t leaves; 4727168404Spjd uint64_t bad = 0x1990c0ffeedecadeULL; 4728168404Spjd uint64_t top, leaf; 4729168404Spjd char path0[MAXPATHLEN]; 4730168404Spjd char pathrand[MAXPATHLEN]; 4731168404Spjd size_t fsize; 4732168404Spjd int bshift = SPA_MAXBLOCKSHIFT + 2; /* don't scrog all labels */ 4733168404Spjd int iters = 1000; 4734219089Spjd int maxfaults; 4735219089Spjd int mirror_save; 4736185029Spjd vdev_t *vd0 = NULL; 4737168404Spjd uint64_t guid0 = 0; 4738219089Spjd boolean_t islog = B_FALSE; 4739168404Spjd 4740236143Smm VERIFY(mutex_lock(&ztest_vdev_lock) == 0); 4741219089Spjd maxfaults = MAXFAULTS(); 4742236143Smm leaves = MAX(zs->zs_mirrors, 1) * ztest_opts.zo_raidz; 4743219089Spjd mirror_save = zs->zs_mirrors; 4744236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 4745219089Spjd 4746185029Spjd ASSERT(leaves >= 1); 4747168404Spjd 4748168404Spjd /* 4749185029Spjd * We need SCL_STATE here because we're going to look at vd0->vdev_tsd. 4750168404Spjd */ 4751185029Spjd spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 4752168404Spjd 4753185029Spjd if (ztest_random(2) == 0) { 4754185029Spjd /* 4755219089Spjd * Inject errors on a normal data device or slog device. 4756185029Spjd */ 4757219089Spjd top = ztest_random_vdev_top(spa, B_TRUE); 4758219089Spjd leaf = ztest_random(leaves) + zs->zs_splits; 4759168404Spjd 4760185029Spjd /* 4761185029Spjd * Generate paths to the first leaf in this top-level vdev, 4762185029Spjd * and to the random leaf we selected. We'll induce transient 4763185029Spjd * write failures and random online/offline activity on leaf 0, 4764185029Spjd * and we'll write random garbage to the randomly chosen leaf. 4765185029Spjd */ 4766185029Spjd (void) snprintf(path0, sizeof (path0), ztest_dev_template, 4767236143Smm ztest_opts.zo_dir, ztest_opts.zo_pool, 4768236143Smm top * leaves + zs->zs_splits); 4769185029Spjd (void) snprintf(pathrand, sizeof (pathrand), ztest_dev_template, 4770236143Smm ztest_opts.zo_dir, ztest_opts.zo_pool, 4771236143Smm top * leaves + leaf); 4772168404Spjd 4773185029Spjd vd0 = vdev_lookup_by_path(spa->spa_root_vdev, path0); 4774219089Spjd if (vd0 != NULL && vd0->vdev_top->vdev_islog) 4775219089Spjd islog = B_TRUE; 4776219089Spjd 4777185029Spjd if (vd0 != NULL && maxfaults != 1) { 4778185029Spjd /* 4779185029Spjd * Make vd0 explicitly claim to be unreadable, 4780185029Spjd * or unwriteable, or reach behind its back 4781185029Spjd * and close the underlying fd. We can do this if 4782185029Spjd * maxfaults == 0 because we'll fail and reexecute, 4783185029Spjd * and we can do it if maxfaults >= 2 because we'll 4784185029Spjd * have enough redundancy. If maxfaults == 1, the 4785185029Spjd * combination of this with injection of random data 4786185029Spjd * corruption below exceeds the pool's fault tolerance. 4787185029Spjd */ 4788185029Spjd vdev_file_t *vf = vd0->vdev_tsd; 4789168404Spjd 4790185029Spjd if (vf != NULL && ztest_random(3) == 0) { 4791185029Spjd (void) close(vf->vf_vnode->v_fd); 4792185029Spjd vf->vf_vnode->v_fd = -1; 4793185029Spjd } else if (ztest_random(2) == 0) { 4794185029Spjd vd0->vdev_cant_read = B_TRUE; 4795185029Spjd } else { 4796185029Spjd vd0->vdev_cant_write = B_TRUE; 4797185029Spjd } 4798185029Spjd guid0 = vd0->vdev_guid; 4799185029Spjd } 4800185029Spjd } else { 4801185029Spjd /* 4802185029Spjd * Inject errors on an l2cache device. 4803185029Spjd */ 4804185029Spjd spa_aux_vdev_t *sav = &spa->spa_l2cache; 4805168404Spjd 4806185029Spjd if (sav->sav_count == 0) { 4807185029Spjd spa_config_exit(spa, SCL_STATE, FTAG); 4808185029Spjd return; 4809185029Spjd } 4810185029Spjd vd0 = sav->sav_vdevs[ztest_random(sav->sav_count)]; 4811168404Spjd guid0 = vd0->vdev_guid; 4812185029Spjd (void) strcpy(path0, vd0->vdev_path); 4813185029Spjd (void) strcpy(pathrand, vd0->vdev_path); 4814185029Spjd 4815185029Spjd leaf = 0; 4816185029Spjd leaves = 1; 4817185029Spjd maxfaults = INT_MAX; /* no limit on cache devices */ 4818168404Spjd } 4819168404Spjd 4820185029Spjd spa_config_exit(spa, SCL_STATE, FTAG); 4821185029Spjd 4822168404Spjd /* 4823219089Spjd * If we can tolerate two or more faults, or we're dealing 4824219089Spjd * with a slog, randomly online/offline vd0. 4825168404Spjd */ 4826219089Spjd if ((maxfaults >= 2 || islog) && guid0 != 0) { 4827209962Smm if (ztest_random(10) < 6) { 4828209962Smm int flags = (ztest_random(2) == 0 ? 4829209962Smm ZFS_OFFLINE_TEMPORARY : 0); 4830219089Spjd 4831219089Spjd /* 4832219089Spjd * We have to grab the zs_name_lock as writer to 4833219089Spjd * prevent a race between offlining a slog and 4834219089Spjd * destroying a dataset. Offlining the slog will 4835219089Spjd * grab a reference on the dataset which may cause 4836219089Spjd * dmu_objset_destroy() to fail with EBUSY thus 4837219089Spjd * leaving the dataset in an inconsistent state. 4838219089Spjd */ 4839219089Spjd if (islog) 4840236143Smm (void) rw_wrlock(&ztest_name_lock); 4841219089Spjd 4842209962Smm VERIFY(vdev_offline(spa, guid0, flags) != EBUSY); 4843219089Spjd 4844219089Spjd if (islog) 4845236143Smm (void) rw_unlock(&ztest_name_lock); 4846209962Smm } else { 4847242845Sdelphij /* 4848242845Sdelphij * Ideally we would like to be able to randomly 4849242845Sdelphij * call vdev_[on|off]line without holding locks 4850242845Sdelphij * to force unpredictable failures but the side 4851242845Sdelphij * effects of vdev_[on|off]line prevent us from 4852242845Sdelphij * doing so. We grab the ztest_vdev_lock here to 4853242845Sdelphij * prevent a race between injection testing and 4854242845Sdelphij * aux_vdev removal. 4855242845Sdelphij */ 4856242845Sdelphij VERIFY(mutex_lock(&ztest_vdev_lock) == 0); 4857209962Smm (void) vdev_online(spa, guid0, 0, NULL); 4858242845Sdelphij VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 4859209962Smm } 4860168404Spjd } 4861168404Spjd 4862219089Spjd if (maxfaults == 0) 4863219089Spjd return; 4864219089Spjd 4865168404Spjd /* 4866168404Spjd * We have at least single-fault tolerance, so inject data corruption. 4867168404Spjd */ 4868168404Spjd fd = open(pathrand, O_RDWR); 4869168404Spjd 4870168404Spjd if (fd == -1) /* we hit a gap in the device namespace */ 4871168404Spjd return; 4872168404Spjd 4873168404Spjd fsize = lseek(fd, 0, SEEK_END); 4874168404Spjd 4875168404Spjd while (--iters != 0) { 4876168404Spjd offset = ztest_random(fsize / (leaves << bshift)) * 4877168404Spjd (leaves << bshift) + (leaf << bshift) + 4878168404Spjd (ztest_random(1ULL << (bshift - 1)) & -8ULL); 4879168404Spjd 4880168404Spjd if (offset >= fsize) 4881168404Spjd continue; 4882168404Spjd 4883236143Smm VERIFY(mutex_lock(&ztest_vdev_lock) == 0); 4884219089Spjd if (mirror_save != zs->zs_mirrors) { 4885236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 4886219089Spjd (void) close(fd); 4887219089Spjd return; 4888219089Spjd } 4889168404Spjd 4890168404Spjd if (pwrite(fd, &bad, sizeof (bad), offset) != sizeof (bad)) 4891168404Spjd fatal(1, "can't inject bad word at 0x%llx in %s", 4892168404Spjd offset, pathrand); 4893219089Spjd 4894236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 4895219089Spjd 4896236143Smm if (ztest_opts.zo_verbose >= 7) 4897219089Spjd (void) printf("injected bad word into %s," 4898219089Spjd " offset 0x%llx\n", pathrand, (u_longlong_t)offset); 4899168404Spjd } 4900168404Spjd 4901168404Spjd (void) close(fd); 4902168404Spjd} 4903168404Spjd 4904168404Spjd/* 4905219089Spjd * Verify that DDT repair works as expected. 4906219089Spjd */ 4907219089Spjdvoid 4908219089Spjdztest_ddt_repair(ztest_ds_t *zd, uint64_t id) 4909219089Spjd{ 4910219089Spjd ztest_shared_t *zs = ztest_shared; 4911236143Smm spa_t *spa = ztest_spa; 4912219089Spjd objset_t *os = zd->zd_os; 4913219089Spjd ztest_od_t od[1]; 4914219089Spjd uint64_t object, blocksize, txg, pattern, psize; 4915219089Spjd enum zio_checksum checksum = spa_dedup_checksum(spa); 4916219089Spjd dmu_buf_t *db; 4917219089Spjd dmu_tx_t *tx; 4918219089Spjd void *buf; 4919219089Spjd blkptr_t blk; 4920219089Spjd int copies = 2 * ZIO_DEDUPDITTO_MIN; 4921219089Spjd 4922219089Spjd blocksize = ztest_random_blocksize(); 4923219089Spjd blocksize = MIN(blocksize, 2048); /* because we write so many */ 4924219089Spjd 4925219089Spjd ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize, 0); 4926219089Spjd 4927219089Spjd if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0) 4928219089Spjd return; 4929219089Spjd 4930219089Spjd /* 4931219089Spjd * Take the name lock as writer to prevent anyone else from changing 4932219089Spjd * the pool and dataset properies we need to maintain during this test. 4933219089Spjd */ 4934236143Smm (void) rw_wrlock(&ztest_name_lock); 4935219089Spjd 4936219089Spjd if (ztest_dsl_prop_set_uint64(zd->zd_name, ZFS_PROP_DEDUP, checksum, 4937219089Spjd B_FALSE) != 0 || 4938219089Spjd ztest_dsl_prop_set_uint64(zd->zd_name, ZFS_PROP_COPIES, 1, 4939219089Spjd B_FALSE) != 0) { 4940236143Smm (void) rw_unlock(&ztest_name_lock); 4941219089Spjd return; 4942219089Spjd } 4943219089Spjd 4944219089Spjd object = od[0].od_object; 4945219089Spjd blocksize = od[0].od_blocksize; 4946228103Smm pattern = zs->zs_guid ^ dmu_objset_fsid_guid(os); 4947219089Spjd 4948219089Spjd ASSERT(object != 0); 4949219089Spjd 4950219089Spjd tx = dmu_tx_create(os); 4951219089Spjd dmu_tx_hold_write(tx, object, 0, copies * blocksize); 4952219089Spjd txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); 4953219089Spjd if (txg == 0) { 4954236143Smm (void) rw_unlock(&ztest_name_lock); 4955219089Spjd return; 4956219089Spjd } 4957219089Spjd 4958219089Spjd /* 4959219089Spjd * Write all the copies of our block. 4960219089Spjd */ 4961219089Spjd for (int i = 0; i < copies; i++) { 4962219089Spjd uint64_t offset = i * blocksize; 4963243524Smm VERIFY0(dmu_buf_hold(os, object, offset, FTAG, &db, 4964243524Smm DMU_READ_NO_PREFETCH)); 4965219089Spjd ASSERT(db->db_offset == offset); 4966219089Spjd ASSERT(db->db_size == blocksize); 4967219089Spjd ASSERT(ztest_pattern_match(db->db_data, db->db_size, pattern) || 4968219089Spjd ztest_pattern_match(db->db_data, db->db_size, 0ULL)); 4969219089Spjd dmu_buf_will_fill(db, tx); 4970219089Spjd ztest_pattern_set(db->db_data, db->db_size, pattern); 4971219089Spjd dmu_buf_rele(db, FTAG); 4972219089Spjd } 4973219089Spjd 4974219089Spjd dmu_tx_commit(tx); 4975219089Spjd txg_wait_synced(spa_get_dsl(spa), txg); 4976219089Spjd 4977219089Spjd /* 4978219089Spjd * Find out what block we got. 4979219089Spjd */ 4980243524Smm VERIFY0(dmu_buf_hold(os, object, 0, FTAG, &db, 4981243524Smm DMU_READ_NO_PREFETCH)); 4982219089Spjd blk = *((dmu_buf_impl_t *)db)->db_blkptr; 4983219089Spjd dmu_buf_rele(db, FTAG); 4984219089Spjd 4985219089Spjd /* 4986219089Spjd * Damage the block. Dedup-ditto will save us when we read it later. 4987219089Spjd */ 4988219089Spjd psize = BP_GET_PSIZE(&blk); 4989219089Spjd buf = zio_buf_alloc(psize); 4990219089Spjd ztest_pattern_set(buf, psize, ~pattern); 4991219089Spjd 4992219089Spjd (void) zio_wait(zio_rewrite(NULL, spa, 0, &blk, 4993219089Spjd buf, psize, NULL, NULL, ZIO_PRIORITY_SYNC_WRITE, 4994219089Spjd ZIO_FLAG_CANFAIL | ZIO_FLAG_INDUCE_DAMAGE, NULL)); 4995219089Spjd 4996219089Spjd zio_buf_free(buf, psize); 4997219089Spjd 4998236143Smm (void) rw_unlock(&ztest_name_lock); 4999219089Spjd} 5000219089Spjd 5001219089Spjd/* 5002168404Spjd * Scrub the pool. 5003168404Spjd */ 5004219089Spjd/* ARGSUSED */ 5005168404Spjdvoid 5006219089Spjdztest_scrub(ztest_ds_t *zd, uint64_t id) 5007168404Spjd{ 5008236143Smm spa_t *spa = ztest_spa; 5009168404Spjd 5010219089Spjd (void) spa_scan(spa, POOL_SCAN_SCRUB); 5011219089Spjd (void) poll(NULL, 0, 100); /* wait a moment, then force a restart */ 5012219089Spjd (void) spa_scan(spa, POOL_SCAN_SCRUB); 5013168404Spjd} 5014168404Spjd 5015168404Spjd/* 5016228103Smm * Change the guid for the pool. 5017228103Smm */ 5018228103Smm/* ARGSUSED */ 5019228103Smmvoid 5020228103Smmztest_reguid(ztest_ds_t *zd, uint64_t id) 5021228103Smm{ 5022236143Smm spa_t *spa = ztest_spa; 5023228103Smm uint64_t orig, load; 5024239620Smm int error; 5025228103Smm 5026228103Smm orig = spa_guid(spa); 5027228103Smm load = spa_load_guid(spa); 5028239620Smm 5029239620Smm (void) rw_wrlock(&ztest_name_lock); 5030239620Smm error = spa_change_guid(spa); 5031239620Smm (void) rw_unlock(&ztest_name_lock); 5032239620Smm 5033239620Smm if (error != 0) 5034228103Smm return; 5035228103Smm 5036243505Smm if (ztest_opts.zo_verbose >= 4) { 5037228103Smm (void) printf("Changed guid old %llu -> %llu\n", 5038228103Smm (u_longlong_t)orig, (u_longlong_t)spa_guid(spa)); 5039228103Smm } 5040228103Smm 5041228103Smm VERIFY3U(orig, !=, spa_guid(spa)); 5042228103Smm VERIFY3U(load, ==, spa_load_guid(spa)); 5043228103Smm} 5044228103Smm 5045228103Smm/* 5046168404Spjd * Rename the pool to a different name and then rename it back. 5047168404Spjd */ 5048219089Spjd/* ARGSUSED */ 5049168404Spjdvoid 5050219089Spjdztest_spa_rename(ztest_ds_t *zd, uint64_t id) 5051168404Spjd{ 5052168404Spjd char *oldname, *newname; 5053168404Spjd spa_t *spa; 5054168404Spjd 5055236143Smm (void) rw_wrlock(&ztest_name_lock); 5056168404Spjd 5057236143Smm oldname = ztest_opts.zo_pool; 5058168404Spjd newname = umem_alloc(strlen(oldname) + 5, UMEM_NOFAIL); 5059168404Spjd (void) strcpy(newname, oldname); 5060168404Spjd (void) strcat(newname, "_tmp"); 5061168404Spjd 5062168404Spjd /* 5063168404Spjd * Do the rename 5064168404Spjd */ 5065219089Spjd VERIFY3U(0, ==, spa_rename(oldname, newname)); 5066168404Spjd 5067168404Spjd /* 5068168404Spjd * Try to open it under the old name, which shouldn't exist 5069168404Spjd */ 5070219089Spjd VERIFY3U(ENOENT, ==, spa_open(oldname, &spa, FTAG)); 5071168404Spjd 5072168404Spjd /* 5073168404Spjd * Open it under the new name and make sure it's still the same spa_t. 5074168404Spjd */ 5075219089Spjd VERIFY3U(0, ==, spa_open(newname, &spa, FTAG)); 5076168404Spjd 5077236143Smm ASSERT(spa == ztest_spa); 5078168404Spjd spa_close(spa, FTAG); 5079168404Spjd 5080168404Spjd /* 5081168404Spjd * Rename it back to the original 5082168404Spjd */ 5083219089Spjd VERIFY3U(0, ==, spa_rename(newname, oldname)); 5084168404Spjd 5085168404Spjd /* 5086168404Spjd * Make sure it can still be opened 5087168404Spjd */ 5088219089Spjd VERIFY3U(0, ==, spa_open(oldname, &spa, FTAG)); 5089168404Spjd 5090236143Smm ASSERT(spa == ztest_spa); 5091168404Spjd spa_close(spa, FTAG); 5092168404Spjd 5093168404Spjd umem_free(newname, strlen(newname) + 1); 5094168404Spjd 5095236143Smm (void) rw_unlock(&ztest_name_lock); 5096168404Spjd} 5097168404Spjd 5098168404Spjd/* 5099219089Spjd * Verify pool integrity by running zdb. 5100168404Spjd */ 5101168404Spjdstatic void 5102219089Spjdztest_run_zdb(char *pool) 5103168404Spjd{ 5104168404Spjd int status; 5105168404Spjd char zdb[MAXPATHLEN + MAXNAMELEN + 20]; 5106168404Spjd char zbuf[1024]; 5107168404Spjd char *bin; 5108185029Spjd char *ztest; 5109185029Spjd char *isa; 5110185029Spjd int isalen; 5111168404Spjd FILE *fp; 5112168404Spjd 5113214623Spjd strlcpy(zdb, "/usr/bin/ztest", sizeof(zdb)); 5114168404Spjd 5115168404Spjd /* zdb lives in /usr/sbin, while ztest lives in /usr/bin */ 5116168404Spjd bin = strstr(zdb, "/usr/bin/"); 5117185029Spjd ztest = strstr(bin, "/ztest"); 5118185029Spjd isa = bin + 8; 5119185029Spjd isalen = ztest - isa; 5120185029Spjd isa = strdup(isa); 5121168404Spjd /* LINTED */ 5122185029Spjd (void) sprintf(bin, 5123219089Spjd "/usr/sbin%.*s/zdb -bcc%s%s -U %s %s", 5124185029Spjd isalen, 5125185029Spjd isa, 5126236143Smm ztest_opts.zo_verbose >= 3 ? "s" : "", 5127236143Smm ztest_opts.zo_verbose >= 4 ? "v" : "", 5128219089Spjd spa_config_path, 5129208047Smm pool); 5130185029Spjd free(isa); 5131168404Spjd 5132236143Smm if (ztest_opts.zo_verbose >= 5) 5133168404Spjd (void) printf("Executing %s\n", strstr(zdb, "zdb ")); 5134168404Spjd 5135168404Spjd fp = popen(zdb, "r"); 5136168404Spjd assert(fp != NULL); 5137168404Spjd 5138168404Spjd while (fgets(zbuf, sizeof (zbuf), fp) != NULL) 5139236143Smm if (ztest_opts.zo_verbose >= 3) 5140168404Spjd (void) printf("%s", zbuf); 5141168404Spjd 5142168404Spjd status = pclose(fp); 5143168404Spjd 5144168404Spjd if (status == 0) 5145168404Spjd return; 5146168404Spjd 5147168404Spjd ztest_dump_core = 0; 5148168404Spjd if (WIFEXITED(status)) 5149168404Spjd fatal(0, "'%s' exit code %d", zdb, WEXITSTATUS(status)); 5150168404Spjd else 5151168404Spjd fatal(0, "'%s' died with signal %d", zdb, WTERMSIG(status)); 5152168404Spjd} 5153168404Spjd 5154168404Spjdstatic void 5155168404Spjdztest_walk_pool_directory(char *header) 5156168404Spjd{ 5157168404Spjd spa_t *spa = NULL; 5158168404Spjd 5159236143Smm if (ztest_opts.zo_verbose >= 6) 5160168404Spjd (void) printf("%s\n", header); 5161168404Spjd 5162168404Spjd mutex_enter(&spa_namespace_lock); 5163168404Spjd while ((spa = spa_next(spa)) != NULL) 5164236143Smm if (ztest_opts.zo_verbose >= 6) 5165168404Spjd (void) printf("\t%s\n", spa_name(spa)); 5166168404Spjd mutex_exit(&spa_namespace_lock); 5167168404Spjd} 5168168404Spjd 5169168404Spjdstatic void 5170168404Spjdztest_spa_import_export(char *oldname, char *newname) 5171168404Spjd{ 5172209962Smm nvlist_t *config, *newconfig; 5173168404Spjd uint64_t pool_guid; 5174168404Spjd spa_t *spa; 5175168404Spjd 5176236143Smm if (ztest_opts.zo_verbose >= 4) { 5177168404Spjd (void) printf("import/export: old = %s, new = %s\n", 5178168404Spjd oldname, newname); 5179168404Spjd } 5180168404Spjd 5181168404Spjd /* 5182168404Spjd * Clean up from previous runs. 5183168404Spjd */ 5184168404Spjd (void) spa_destroy(newname); 5185168404Spjd 5186168404Spjd /* 5187168404Spjd * Get the pool's configuration and guid. 5188168404Spjd */ 5189219089Spjd VERIFY3U(0, ==, spa_open(oldname, &spa, FTAG)); 5190168404Spjd 5191209962Smm /* 5192209962Smm * Kick off a scrub to tickle scrub/export races. 5193209962Smm */ 5194209962Smm if (ztest_random(2) == 0) 5195219089Spjd (void) spa_scan(spa, POOL_SCAN_SCRUB); 5196209962Smm 5197168404Spjd pool_guid = spa_guid(spa); 5198168404Spjd spa_close(spa, FTAG); 5199168404Spjd 5200168404Spjd ztest_walk_pool_directory("pools before export"); 5201168404Spjd 5202168404Spjd /* 5203168404Spjd * Export it. 5204168404Spjd */ 5205219089Spjd VERIFY3U(0, ==, spa_export(oldname, &config, B_FALSE, B_FALSE)); 5206168404Spjd 5207168404Spjd ztest_walk_pool_directory("pools after export"); 5208168404Spjd 5209168404Spjd /* 5210209962Smm * Try to import it. 5211209962Smm */ 5212209962Smm newconfig = spa_tryimport(config); 5213209962Smm ASSERT(newconfig != NULL); 5214209962Smm nvlist_free(newconfig); 5215209962Smm 5216209962Smm /* 5217168404Spjd * Import it under the new name. 5218168404Spjd */ 5219219089Spjd VERIFY3U(0, ==, spa_import(newname, config, NULL, 0)); 5220168404Spjd 5221168404Spjd ztest_walk_pool_directory("pools after import"); 5222168404Spjd 5223168404Spjd /* 5224168404Spjd * Try to import it again -- should fail with EEXIST. 5225168404Spjd */ 5226219089Spjd VERIFY3U(EEXIST, ==, spa_import(newname, config, NULL, 0)); 5227168404Spjd 5228168404Spjd /* 5229168404Spjd * Try to import it under a different name -- should fail with EEXIST. 5230168404Spjd */ 5231219089Spjd VERIFY3U(EEXIST, ==, spa_import(oldname, config, NULL, 0)); 5232168404Spjd 5233168404Spjd /* 5234168404Spjd * Verify that the pool is no longer visible under the old name. 5235168404Spjd */ 5236219089Spjd VERIFY3U(ENOENT, ==, spa_open(oldname, &spa, FTAG)); 5237168404Spjd 5238168404Spjd /* 5239168404Spjd * Verify that we can open and close the pool using the new name. 5240168404Spjd */ 5241219089Spjd VERIFY3U(0, ==, spa_open(newname, &spa, FTAG)); 5242168404Spjd ASSERT(pool_guid == spa_guid(spa)); 5243168404Spjd spa_close(spa, FTAG); 5244168404Spjd 5245168404Spjd nvlist_free(config); 5246168404Spjd} 5247168404Spjd 5248209962Smmstatic void 5249209962Smmztest_resume(spa_t *spa) 5250209962Smm{ 5251236143Smm if (spa_suspended(spa) && ztest_opts.zo_verbose >= 6) 5252219089Spjd (void) printf("resuming from suspended state\n"); 5253219089Spjd spa_vdev_state_enter(spa, SCL_NONE); 5254219089Spjd vdev_clear(spa, NULL); 5255219089Spjd (void) spa_vdev_state_exit(spa, NULL, 0); 5256219089Spjd (void) zio_resume(spa); 5257209962Smm} 5258209962Smm 5259168404Spjdstatic void * 5260209962Smmztest_resume_thread(void *arg) 5261185029Spjd{ 5262185029Spjd spa_t *spa = arg; 5263185029Spjd 5264185029Spjd while (!ztest_exiting) { 5265219089Spjd if (spa_suspended(spa)) 5266219089Spjd ztest_resume(spa); 5267219089Spjd (void) poll(NULL, 0, 100); 5268185029Spjd } 5269185029Spjd return (NULL); 5270185029Spjd} 5271185029Spjd 5272185029Spjdstatic void * 5273219089Spjdztest_deadman_thread(void *arg) 5274219089Spjd{ 5275219089Spjd ztest_shared_t *zs = arg; 5276219089Spjd int grace = 300; 5277219089Spjd hrtime_t delta; 5278219089Spjd 5279219089Spjd delta = (zs->zs_thread_stop - zs->zs_thread_start) / NANOSEC + grace; 5280219089Spjd 5281219089Spjd (void) poll(NULL, 0, (int)(1000 * delta)); 5282219089Spjd 5283219089Spjd fatal(0, "failed to complete within %d seconds of deadline", grace); 5284219089Spjd 5285219089Spjd return (NULL); 5286219089Spjd} 5287219089Spjd 5288219089Spjdstatic void 5289236143Smmztest_execute(int test, ztest_info_t *zi, uint64_t id) 5290219089Spjd{ 5291236143Smm ztest_ds_t *zd = &ztest_ds[id % ztest_opts.zo_datasets]; 5292236143Smm ztest_shared_callstate_t *zc = ZTEST_GET_SHARED_CALLSTATE(test); 5293219089Spjd hrtime_t functime = gethrtime(); 5294219089Spjd 5295219089Spjd for (int i = 0; i < zi->zi_iters; i++) 5296219089Spjd zi->zi_func(zd, id); 5297219089Spjd 5298219089Spjd functime = gethrtime() - functime; 5299219089Spjd 5300236143Smm atomic_add_64(&zc->zc_count, 1); 5301236143Smm atomic_add_64(&zc->zc_time, functime); 5302219089Spjd 5303236143Smm if (ztest_opts.zo_verbose >= 4) { 5304219089Spjd Dl_info dli; 5305219089Spjd (void) dladdr((void *)zi->zi_func, &dli); 5306219089Spjd (void) printf("%6.2f sec in %s\n", 5307219089Spjd (double)functime / NANOSEC, dli.dli_sname); 5308219089Spjd } 5309219089Spjd} 5310219089Spjd 5311219089Spjdstatic void * 5312168404Spjdztest_thread(void *arg) 5313168404Spjd{ 5314236143Smm int rand; 5315219089Spjd uint64_t id = (uintptr_t)arg; 5316168404Spjd ztest_shared_t *zs = ztest_shared; 5317219089Spjd uint64_t call_next; 5318219089Spjd hrtime_t now; 5319168404Spjd ztest_info_t *zi; 5320236143Smm ztest_shared_callstate_t *zc; 5321168404Spjd 5322219089Spjd while ((now = gethrtime()) < zs->zs_thread_stop) { 5323168404Spjd /* 5324168404Spjd * See if it's time to force a crash. 5325168404Spjd */ 5326219089Spjd if (now > zs->zs_thread_kill) 5327219089Spjd ztest_kill(zs); 5328168404Spjd 5329168404Spjd /* 5330219089Spjd * If we're getting ENOSPC with some regularity, stop. 5331168404Spjd */ 5332219089Spjd if (zs->zs_enospc_count > 10) 5333219089Spjd break; 5334168404Spjd 5335168404Spjd /* 5336219089Spjd * Pick a random function to execute. 5337168404Spjd */ 5338236143Smm rand = ztest_random(ZTEST_FUNCS); 5339236143Smm zi = &ztest_info[rand]; 5340236143Smm zc = ZTEST_GET_SHARED_CALLSTATE(rand); 5341236143Smm call_next = zc->zc_next; 5342168404Spjd 5343219089Spjd if (now >= call_next && 5344236143Smm atomic_cas_64(&zc->zc_next, call_next, call_next + 5345236143Smm ztest_random(2 * zi->zi_interval[0] + 1)) == call_next) { 5346236143Smm ztest_execute(rand, zi, id); 5347236143Smm } 5348219089Spjd } 5349168404Spjd 5350219089Spjd return (NULL); 5351219089Spjd} 5352168404Spjd 5353219089Spjdstatic void 5354219089Spjdztest_dataset_name(char *dsname, char *pool, int d) 5355219089Spjd{ 5356219089Spjd (void) snprintf(dsname, MAXNAMELEN, "%s/ds_%d", pool, d); 5357219089Spjd} 5358168404Spjd 5359219089Spjdstatic void 5360236143Smmztest_dataset_destroy(int d) 5361219089Spjd{ 5362219089Spjd char name[MAXNAMELEN]; 5363168404Spjd 5364236143Smm ztest_dataset_name(name, ztest_opts.zo_pool, d); 5365168404Spjd 5366236143Smm if (ztest_opts.zo_verbose >= 3) 5367219089Spjd (void) printf("Destroying %s to free up space\n", name); 5368168404Spjd 5369219089Spjd /* 5370219089Spjd * Cleanup any non-standard clones and snapshots. In general, 5371219089Spjd * ztest thread t operates on dataset (t % zopt_datasets), 5372219089Spjd * so there may be more than one thing to clean up. 5373219089Spjd */ 5374236143Smm for (int t = d; t < ztest_opts.zo_threads; 5375236143Smm t += ztest_opts.zo_datasets) { 5376219089Spjd ztest_dsl_dataset_cleanup(name, t); 5377236143Smm } 5378219089Spjd 5379219089Spjd (void) dmu_objset_find(name, ztest_objset_destroy_cb, NULL, 5380219089Spjd DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN); 5381219089Spjd} 5382219089Spjd 5383219089Spjdstatic void 5384219089Spjdztest_dataset_dirobj_verify(ztest_ds_t *zd) 5385219089Spjd{ 5386219089Spjd uint64_t usedobjs, dirobjs, scratch; 5387219089Spjd 5388219089Spjd /* 5389219089Spjd * ZTEST_DIROBJ is the object directory for the entire dataset. 5390219089Spjd * Therefore, the number of objects in use should equal the 5391219089Spjd * number of ZTEST_DIROBJ entries, +1 for ZTEST_DIROBJ itself. 5392219089Spjd * If not, we have an object leak. 5393219089Spjd * 5394219089Spjd * Note that we can only check this in ztest_dataset_open(), 5395219089Spjd * when the open-context and syncing-context values agree. 5396219089Spjd * That's because zap_count() returns the open-context value, 5397219089Spjd * while dmu_objset_space() returns the rootbp fill count. 5398219089Spjd */ 5399219089Spjd VERIFY3U(0, ==, zap_count(zd->zd_os, ZTEST_DIROBJ, &dirobjs)); 5400219089Spjd dmu_objset_space(zd->zd_os, &scratch, &scratch, &usedobjs, &scratch); 5401219089Spjd ASSERT3U(dirobjs + 1, ==, usedobjs); 5402219089Spjd} 5403219089Spjd 5404219089Spjdstatic int 5405236143Smmztest_dataset_open(int d) 5406219089Spjd{ 5407236143Smm ztest_ds_t *zd = &ztest_ds[d]; 5408236143Smm uint64_t committed_seq = ZTEST_GET_SHARED_DS(d)->zd_seq; 5409219089Spjd objset_t *os; 5410219089Spjd zilog_t *zilog; 5411219089Spjd char name[MAXNAMELEN]; 5412219089Spjd int error; 5413219089Spjd 5414236143Smm ztest_dataset_name(name, ztest_opts.zo_pool, d); 5415219089Spjd 5416236143Smm (void) rw_rdlock(&ztest_name_lock); 5417219089Spjd 5418219089Spjd error = ztest_dataset_create(name); 5419219089Spjd if (error == ENOSPC) { 5420236143Smm (void) rw_unlock(&ztest_name_lock); 5421219089Spjd ztest_record_enospc(FTAG); 5422219089Spjd return (error); 5423168404Spjd } 5424219089Spjd ASSERT(error == 0 || error == EEXIST); 5425168404Spjd 5426240415Smm VERIFY0(dmu_objset_hold(name, zd, &os)); 5427236143Smm (void) rw_unlock(&ztest_name_lock); 5428219089Spjd 5429236143Smm ztest_zd_init(zd, ZTEST_GET_SHARED_DS(d), os); 5430219089Spjd 5431219089Spjd zilog = zd->zd_zilog; 5432219089Spjd 5433219089Spjd if (zilog->zl_header->zh_claim_lr_seq != 0 && 5434219089Spjd zilog->zl_header->zh_claim_lr_seq < committed_seq) 5435219089Spjd fatal(0, "missing log records: claimed %llu < committed %llu", 5436219089Spjd zilog->zl_header->zh_claim_lr_seq, committed_seq); 5437219089Spjd 5438219089Spjd ztest_dataset_dirobj_verify(zd); 5439219089Spjd 5440219089Spjd zil_replay(os, zd, ztest_replay_vector); 5441219089Spjd 5442219089Spjd ztest_dataset_dirobj_verify(zd); 5443219089Spjd 5444236143Smm if (ztest_opts.zo_verbose >= 6) 5445219089Spjd (void) printf("%s replay %llu blocks, %llu records, seq %llu\n", 5446219089Spjd zd->zd_name, 5447219089Spjd (u_longlong_t)zilog->zl_parse_blk_count, 5448219089Spjd (u_longlong_t)zilog->zl_parse_lr_count, 5449219089Spjd (u_longlong_t)zilog->zl_replaying_seq); 5450219089Spjd 5451219089Spjd zilog = zil_open(os, ztest_get_data); 5452219089Spjd 5453219089Spjd if (zilog->zl_replaying_seq != 0 && 5454219089Spjd zilog->zl_replaying_seq < committed_seq) 5455219089Spjd fatal(0, "missing log records: replayed %llu < committed %llu", 5456219089Spjd zilog->zl_replaying_seq, committed_seq); 5457219089Spjd 5458219089Spjd return (0); 5459168404Spjd} 5460168404Spjd 5461219089Spjdstatic void 5462236143Smmztest_dataset_close(int d) 5463219089Spjd{ 5464236143Smm ztest_ds_t *zd = &ztest_ds[d]; 5465219089Spjd 5466219089Spjd zil_close(zd->zd_zilog); 5467219089Spjd dmu_objset_rele(zd->zd_os, zd); 5468219089Spjd 5469219089Spjd ztest_zd_fini(zd); 5470219089Spjd} 5471219089Spjd 5472168404Spjd/* 5473168404Spjd * Kick off threads to run tests on all datasets in parallel. 5474168404Spjd */ 5475168404Spjdstatic void 5476219089Spjdztest_run(ztest_shared_t *zs) 5477168404Spjd{ 5478219089Spjd thread_t *tid; 5479168404Spjd spa_t *spa; 5480228103Smm objset_t *os; 5481185029Spjd thread_t resume_tid; 5482219089Spjd int error; 5483168404Spjd 5484185029Spjd ztest_exiting = B_FALSE; 5485185029Spjd 5486168404Spjd /* 5487219089Spjd * Initialize parent/child shared state. 5488168404Spjd */ 5489236143Smm VERIFY(_mutex_init(&ztest_vdev_lock, USYNC_THREAD, NULL) == 0); 5490236143Smm VERIFY(rwlock_init(&ztest_name_lock, USYNC_THREAD, NULL) == 0); 5491168404Spjd 5492219089Spjd zs->zs_thread_start = gethrtime(); 5493236143Smm zs->zs_thread_stop = 5494236143Smm zs->zs_thread_start + ztest_opts.zo_passtime * NANOSEC; 5495219089Spjd zs->zs_thread_stop = MIN(zs->zs_thread_stop, zs->zs_proc_stop); 5496219089Spjd zs->zs_thread_kill = zs->zs_thread_stop; 5497236143Smm if (ztest_random(100) < ztest_opts.zo_killrate) { 5498236143Smm zs->zs_thread_kill -= 5499236143Smm ztest_random(ztest_opts.zo_passtime * NANOSEC); 5500236143Smm } 5501168404Spjd 5502219089Spjd (void) _mutex_init(&zcl.zcl_callbacks_lock, USYNC_THREAD, NULL); 5503168404Spjd 5504219089Spjd list_create(&zcl.zcl_callbacks, sizeof (ztest_cb_data_t), 5505219089Spjd offsetof(ztest_cb_data_t, zcd_node)); 5506168404Spjd 5507168404Spjd /* 5508219089Spjd * Open our pool. 5509168404Spjd */ 5510219089Spjd kernel_init(FREAD | FWRITE); 5511236143Smm VERIFY(spa_open(ztest_opts.zo_pool, &spa, FTAG) == 0); 5512224177Smm spa->spa_debug = B_TRUE; 5513236143Smm ztest_spa = spa; 5514168404Spjd 5515236143Smm VERIFY3U(0, ==, dmu_objset_hold(ztest_opts.zo_pool, FTAG, &os)); 5516228103Smm zs->zs_guid = dmu_objset_fsid_guid(os); 5517228103Smm dmu_objset_rele(os, FTAG); 5518228103Smm 5519219089Spjd spa->spa_dedup_ditto = 2 * ZIO_DEDUPDITTO_MIN; 5520168404Spjd 5521168404Spjd /* 5522209962Smm * We don't expect the pool to suspend unless maxfaults == 0, 5523209962Smm * in which case ztest_fault_inject() temporarily takes away 5524209962Smm * the only valid replica. 5525209962Smm */ 5526219089Spjd if (MAXFAULTS() == 0) 5527209962Smm spa->spa_failmode = ZIO_FAILURE_MODE_WAIT; 5528209962Smm else 5529209962Smm spa->spa_failmode = ZIO_FAILURE_MODE_PANIC; 5530209962Smm 5531209962Smm /* 5532185029Spjd * Create a thread to periodically resume suspended I/O. 5533185029Spjd */ 5534209962Smm VERIFY(thr_create(0, 0, ztest_resume_thread, spa, THR_BOUND, 5535185029Spjd &resume_tid) == 0); 5536185029Spjd 5537185029Spjd /* 5538219089Spjd * Create a deadman thread to abort() if we hang. 5539219089Spjd */ 5540219089Spjd VERIFY(thr_create(0, 0, ztest_deadman_thread, zs, THR_BOUND, 5541219089Spjd NULL) == 0); 5542219089Spjd 5543219089Spjd /* 5544168404Spjd * Verify that we can safely inquire about about any object, 5545168404Spjd * whether it's allocated or not. To make it interesting, 5546168404Spjd * we probe a 5-wide window around each power of two. 5547168404Spjd * This hits all edge cases, including zero and the max. 5548168404Spjd */ 5549219089Spjd for (int t = 0; t < 64; t++) { 5550219089Spjd for (int d = -5; d <= 5; d++) { 5551168404Spjd error = dmu_object_info(spa->spa_meta_objset, 5552168404Spjd (1ULL << t) + d, NULL); 5553168404Spjd ASSERT(error == 0 || error == ENOENT || 5554168404Spjd error == EINVAL); 5555168404Spjd } 5556168404Spjd } 5557168404Spjd 5558168404Spjd /* 5559219089Spjd * If we got any ENOSPC errors on the previous run, destroy something. 5560168404Spjd */ 5561219089Spjd if (zs->zs_enospc_count != 0) { 5562236143Smm int d = ztest_random(ztest_opts.zo_datasets); 5563236143Smm ztest_dataset_destroy(d); 5564219089Spjd } 5565168404Spjd zs->zs_enospc_count = 0; 5566168404Spjd 5567236143Smm tid = umem_zalloc(ztest_opts.zo_threads * sizeof (thread_t), 5568236143Smm UMEM_NOFAIL); 5569168404Spjd 5570236143Smm if (ztest_opts.zo_verbose >= 4) 5571168404Spjd (void) printf("starting main threads...\n"); 5572168404Spjd 5573219089Spjd /* 5574219089Spjd * Kick off all the tests that run in parallel. 5575219089Spjd */ 5576236143Smm for (int t = 0; t < ztest_opts.zo_threads; t++) { 5577236143Smm if (t < ztest_opts.zo_datasets && 5578236143Smm ztest_dataset_open(t) != 0) 5579219089Spjd return; 5580219089Spjd VERIFY(thr_create(0, 0, ztest_thread, (void *)(uintptr_t)t, 5581219089Spjd THR_BOUND, &tid[t]) == 0); 5582219089Spjd } 5583168404Spjd 5584219089Spjd /* 5585219089Spjd * Wait for all of the tests to complete. We go in reverse order 5586219089Spjd * so we don't close datasets while threads are still using them. 5587219089Spjd */ 5588236143Smm for (int t = ztest_opts.zo_threads - 1; t >= 0; t--) { 5589219089Spjd VERIFY(thr_join(tid[t], NULL, NULL) == 0); 5590236143Smm if (t < ztest_opts.zo_datasets) 5591236143Smm ztest_dataset_close(t); 5592219089Spjd } 5593185029Spjd 5594219089Spjd txg_wait_synced(spa_get_dsl(spa), 0); 5595185029Spjd 5596219089Spjd zs->zs_alloc = metaslab_class_get_alloc(spa_normal_class(spa)); 5597219089Spjd zs->zs_space = metaslab_class_get_space(spa_normal_class(spa)); 5598168404Spjd 5599236143Smm umem_free(tid, ztest_opts.zo_threads * sizeof (thread_t)); 5600168404Spjd 5601219089Spjd /* Kill the resume thread */ 5602219089Spjd ztest_exiting = B_TRUE; 5603219089Spjd VERIFY(thr_join(resume_tid, NULL, NULL) == 0); 5604219089Spjd ztest_resume(spa); 5605219089Spjd 5606219089Spjd /* 5607219089Spjd * Right before closing the pool, kick off a bunch of async I/O; 5608219089Spjd * spa_close() should wait for it to complete. 5609219089Spjd */ 5610219089Spjd for (uint64_t object = 1; object < 50; object++) 5611219089Spjd dmu_prefetch(spa->spa_meta_objset, object, 0, 1ULL << 20); 5612219089Spjd 5613219089Spjd spa_close(spa, FTAG); 5614219089Spjd 5615219089Spjd /* 5616219089Spjd * Verify that we can loop over all pools. 5617219089Spjd */ 5618219089Spjd mutex_enter(&spa_namespace_lock); 5619219089Spjd for (spa = spa_next(NULL); spa != NULL; spa = spa_next(spa)) 5620236143Smm if (ztest_opts.zo_verbose > 3) 5621219089Spjd (void) printf("spa_next: found %s\n", spa_name(spa)); 5622219089Spjd mutex_exit(&spa_namespace_lock); 5623219089Spjd 5624219089Spjd /* 5625219089Spjd * Verify that we can export the pool and reimport it under a 5626219089Spjd * different name. 5627219089Spjd */ 5628219089Spjd if (ztest_random(2) == 0) { 5629219089Spjd char name[MAXNAMELEN]; 5630236143Smm (void) snprintf(name, MAXNAMELEN, "%s_import", 5631236143Smm ztest_opts.zo_pool); 5632236143Smm ztest_spa_import_export(ztest_opts.zo_pool, name); 5633236143Smm ztest_spa_import_export(name, ztest_opts.zo_pool); 5634168404Spjd } 5635168404Spjd 5636219089Spjd kernel_fini(); 5637219089Spjd 5638219089Spjd list_destroy(&zcl.zcl_callbacks); 5639219089Spjd 5640219089Spjd (void) _mutex_destroy(&zcl.zcl_callbacks_lock); 5641219089Spjd 5642236143Smm (void) rwlock_destroy(&ztest_name_lock); 5643236143Smm (void) _mutex_destroy(&ztest_vdev_lock); 5644219089Spjd} 5645219089Spjd 5646219089Spjdstatic void 5647236143Smmztest_freeze(void) 5648219089Spjd{ 5649236143Smm ztest_ds_t *zd = &ztest_ds[0]; 5650219089Spjd spa_t *spa; 5651219089Spjd int numloops = 0; 5652219089Spjd 5653236143Smm if (ztest_opts.zo_verbose >= 3) 5654219089Spjd (void) printf("testing spa_freeze()...\n"); 5655168404Spjd 5656219089Spjd kernel_init(FREAD | FWRITE); 5657236143Smm VERIFY3U(0, ==, spa_open(ztest_opts.zo_pool, &spa, FTAG)); 5658236143Smm VERIFY3U(0, ==, ztest_dataset_open(0)); 5659243524Smm spa->spa_debug = B_TRUE; 5660243524Smm ztest_spa = spa; 5661168404Spjd 5662168404Spjd /* 5663219089Spjd * Force the first log block to be transactionally allocated. 5664219089Spjd * We have to do this before we freeze the pool -- otherwise 5665219089Spjd * the log chain won't be anchored. 5666168404Spjd */ 5667219089Spjd while (BP_IS_HOLE(&zd->zd_zilog->zl_header->zh_log)) { 5668219089Spjd ztest_dmu_object_alloc_free(zd, 0); 5669219089Spjd zil_commit(zd->zd_zilog, 0); 5670168404Spjd } 5671168404Spjd 5672168404Spjd txg_wait_synced(spa_get_dsl(spa), 0); 5673168404Spjd 5674219089Spjd /* 5675219089Spjd * Freeze the pool. This stops spa_sync() from doing anything, 5676219089Spjd * so that the only way to record changes from now on is the ZIL. 5677219089Spjd */ 5678219089Spjd spa_freeze(spa); 5679185029Spjd 5680219089Spjd /* 5681219089Spjd * Run tests that generate log records but don't alter the pool config 5682219089Spjd * or depend on DSL sync tasks (snapshots, objset create/destroy, etc). 5683219089Spjd * We do a txg_wait_synced() after each iteration to force the txg 5684219089Spjd * to increase well beyond the last synced value in the uberblock. 5685219089Spjd * The ZIL should be OK with that. 5686219089Spjd */ 5687236143Smm while (ztest_random(10) != 0 && 5688236143Smm numloops++ < ztest_opts.zo_maxloops) { 5689219089Spjd ztest_dmu_write_parallel(zd, 0); 5690219089Spjd ztest_dmu_object_alloc_free(zd, 0); 5691219089Spjd txg_wait_synced(spa_get_dsl(spa), 0); 5692219089Spjd } 5693185029Spjd 5694168404Spjd /* 5695219089Spjd * Commit all of the changes we just generated. 5696168404Spjd */ 5697219089Spjd zil_commit(zd->zd_zilog, 0); 5698219089Spjd txg_wait_synced(spa_get_dsl(spa), 0); 5699168404Spjd 5700219089Spjd /* 5701219089Spjd * Close our dataset and close the pool. 5702219089Spjd */ 5703236143Smm ztest_dataset_close(0); 5704168404Spjd spa_close(spa, FTAG); 5705219089Spjd kernel_fini(); 5706168404Spjd 5707219089Spjd /* 5708219089Spjd * Open and close the pool and dataset to induce log replay. 5709219089Spjd */ 5710219089Spjd kernel_init(FREAD | FWRITE); 5711236143Smm VERIFY3U(0, ==, spa_open(ztest_opts.zo_pool, &spa, FTAG)); 5712239620Smm ASSERT(spa_freeze_txg(spa) == UINT64_MAX); 5713236143Smm VERIFY3U(0, ==, ztest_dataset_open(0)); 5714236143Smm ztest_dataset_close(0); 5715239620Smm 5716239620Smm spa->spa_debug = B_TRUE; 5717239620Smm ztest_spa = spa; 5718239620Smm txg_wait_synced(spa_get_dsl(spa), 0); 5719239620Smm ztest_reguid(NULL, 0); 5720239620Smm 5721219089Spjd spa_close(spa, FTAG); 5722168404Spjd kernel_fini(); 5723168404Spjd} 5724168404Spjd 5725168404Spjdvoid 5726168404Spjdprint_time(hrtime_t t, char *timebuf) 5727168404Spjd{ 5728168404Spjd hrtime_t s = t / NANOSEC; 5729168404Spjd hrtime_t m = s / 60; 5730168404Spjd hrtime_t h = m / 60; 5731168404Spjd hrtime_t d = h / 24; 5732168404Spjd 5733168404Spjd s -= m * 60; 5734168404Spjd m -= h * 60; 5735168404Spjd h -= d * 24; 5736168404Spjd 5737168404Spjd timebuf[0] = '\0'; 5738168404Spjd 5739168404Spjd if (d) 5740168404Spjd (void) sprintf(timebuf, 5741168404Spjd "%llud%02lluh%02llum%02llus", d, h, m, s); 5742168404Spjd else if (h) 5743168404Spjd (void) sprintf(timebuf, "%lluh%02llum%02llus", h, m, s); 5744168404Spjd else if (m) 5745168404Spjd (void) sprintf(timebuf, "%llum%02llus", m, s); 5746168404Spjd else 5747168404Spjd (void) sprintf(timebuf, "%llus", s); 5748168404Spjd} 5749168404Spjd 5750219089Spjdstatic nvlist_t * 5751219089Spjdmake_random_props() 5752219089Spjd{ 5753219089Spjd nvlist_t *props; 5754219089Spjd 5755236884Smm VERIFY(nvlist_alloc(&props, NV_UNIQUE_NAME, 0) == 0); 5756219089Spjd if (ztest_random(2) == 0) 5757236884Smm return (props); 5758219089Spjd VERIFY(nvlist_add_uint64(props, "autoreplace", 1) == 0); 5759219089Spjd 5760219089Spjd return (props); 5761219089Spjd} 5762219089Spjd 5763168404Spjd/* 5764168404Spjd * Create a storage pool with the given name and initial vdev size. 5765219089Spjd * Then test spa_freeze() functionality. 5766168404Spjd */ 5767168404Spjdstatic void 5768219089Spjdztest_init(ztest_shared_t *zs) 5769168404Spjd{ 5770168404Spjd spa_t *spa; 5771219089Spjd nvlist_t *nvroot, *props; 5772168404Spjd 5773236143Smm VERIFY(_mutex_init(&ztest_vdev_lock, USYNC_THREAD, NULL) == 0); 5774236143Smm VERIFY(rwlock_init(&ztest_name_lock, USYNC_THREAD, NULL) == 0); 5775219089Spjd 5776168404Spjd kernel_init(FREAD | FWRITE); 5777168404Spjd 5778168404Spjd /* 5779168404Spjd * Create the storage pool. 5780168404Spjd */ 5781236143Smm (void) spa_destroy(ztest_opts.zo_pool); 5782219089Spjd ztest_shared->zs_vdev_next_leaf = 0; 5783219089Spjd zs->zs_splits = 0; 5784236143Smm zs->zs_mirrors = ztest_opts.zo_mirrors; 5785243505Smm nvroot = make_vdev_root(NULL, NULL, NULL, ztest_opts.zo_vdev_size, 0, 5786236143Smm 0, ztest_opts.zo_raidz, zs->zs_mirrors, 1); 5787219089Spjd props = make_random_props(); 5788236884Smm for (int i = 0; i < SPA_FEATURES; i++) { 5789236884Smm char buf[1024]; 5790236884Smm (void) snprintf(buf, sizeof (buf), "feature@%s", 5791236884Smm spa_feature_table[i].fi_uname); 5792236884Smm VERIFY3U(0, ==, nvlist_add_uint64(props, buf, 0)); 5793236884Smm } 5794236143Smm VERIFY3U(0, ==, spa_create(ztest_opts.zo_pool, nvroot, props, 5795236143Smm NULL, NULL)); 5796168404Spjd nvlist_free(nvroot); 5797168404Spjd 5798236143Smm VERIFY3U(0, ==, spa_open(ztest_opts.zo_pool, &spa, FTAG)); 5799236143Smm zs->zs_metaslab_sz = 5800236143Smm 1ULL << spa->spa_root_vdev->vdev_child[0]->vdev_ms_shift; 5801236884Smm 5802219089Spjd spa_close(spa, FTAG); 5803209962Smm 5804219089Spjd kernel_fini(); 5805168404Spjd 5806236143Smm ztest_run_zdb(ztest_opts.zo_pool); 5807168404Spjd 5808236143Smm ztest_freeze(); 5809219089Spjd 5810236143Smm ztest_run_zdb(ztest_opts.zo_pool); 5811219089Spjd 5812236143Smm (void) rwlock_destroy(&ztest_name_lock); 5813236143Smm (void) _mutex_destroy(&ztest_vdev_lock); 5814168404Spjd} 5815168404Spjd 5816236143Smmstatic void 5817242845Sdelphijsetup_data_fd(void) 5818236143Smm{ 5819242845Sdelphij static char ztest_name_data[] = "/tmp/ztest.data.XXXXXX"; 5820236143Smm 5821242845Sdelphij ztest_fd_data = mkstemp(ztest_name_data); 5822242845Sdelphij ASSERT3S(ztest_fd_data, >=, 0); 5823242845Sdelphij (void) unlink(ztest_name_data); 5824242845Sdelphij} 5825236143Smm 5826236143Smm 5827236884Smmstatic int 5828236884Smmshared_data_size(ztest_shared_hdr_t *hdr) 5829236884Smm{ 5830236884Smm int size; 5831236884Smm 5832236884Smm size = hdr->zh_hdr_size; 5833236884Smm size += hdr->zh_opts_size; 5834236884Smm size += hdr->zh_size; 5835236884Smm size += hdr->zh_stats_size * hdr->zh_stats_count; 5836236884Smm size += hdr->zh_ds_size * hdr->zh_ds_count; 5837236884Smm 5838236884Smm return (size); 5839236884Smm} 5840236884Smm 5841236143Smmstatic void 5842236143Smmsetup_hdr(void) 5843236143Smm{ 5844236884Smm int size; 5845236143Smm ztest_shared_hdr_t *hdr; 5846236143Smm 5847236143Smm hdr = (void *)mmap(0, P2ROUNDUP(sizeof (*hdr), getpagesize()), 5848242845Sdelphij PROT_READ | PROT_WRITE, MAP_SHARED, ztest_fd_data, 0); 5849236143Smm ASSERT(hdr != MAP_FAILED); 5850236143Smm 5851242845Sdelphij VERIFY3U(0, ==, ftruncate(ztest_fd_data, sizeof (ztest_shared_hdr_t))); 5852236884Smm 5853236143Smm hdr->zh_hdr_size = sizeof (ztest_shared_hdr_t); 5854236143Smm hdr->zh_opts_size = sizeof (ztest_shared_opts_t); 5855236143Smm hdr->zh_size = sizeof (ztest_shared_t); 5856236143Smm hdr->zh_stats_size = sizeof (ztest_shared_callstate_t); 5857236143Smm hdr->zh_stats_count = ZTEST_FUNCS; 5858236143Smm hdr->zh_ds_size = sizeof (ztest_shared_ds_t); 5859236143Smm hdr->zh_ds_count = ztest_opts.zo_datasets; 5860236143Smm 5861236884Smm size = shared_data_size(hdr); 5862242845Sdelphij VERIFY3U(0, ==, ftruncate(ztest_fd_data, size)); 5863236884Smm 5864236143Smm (void) munmap((caddr_t)hdr, P2ROUNDUP(sizeof (*hdr), getpagesize())); 5865236143Smm} 5866236143Smm 5867236143Smmstatic void 5868236143Smmsetup_data(void) 5869236143Smm{ 5870236143Smm int size, offset; 5871236143Smm ztest_shared_hdr_t *hdr; 5872236143Smm uint8_t *buf; 5873236143Smm 5874236143Smm hdr = (void *)mmap(0, P2ROUNDUP(sizeof (*hdr), getpagesize()), 5875242845Sdelphij PROT_READ, MAP_SHARED, ztest_fd_data, 0); 5876236143Smm ASSERT(hdr != MAP_FAILED); 5877236143Smm 5878236884Smm size = shared_data_size(hdr); 5879236143Smm 5880236143Smm (void) munmap((caddr_t)hdr, P2ROUNDUP(sizeof (*hdr), getpagesize())); 5881236143Smm hdr = ztest_shared_hdr = (void *)mmap(0, P2ROUNDUP(size, getpagesize()), 5882242845Sdelphij PROT_READ | PROT_WRITE, MAP_SHARED, ztest_fd_data, 0); 5883236143Smm ASSERT(hdr != MAP_FAILED); 5884236143Smm buf = (uint8_t *)hdr; 5885236143Smm 5886236143Smm offset = hdr->zh_hdr_size; 5887236143Smm ztest_shared_opts = (void *)&buf[offset]; 5888236143Smm offset += hdr->zh_opts_size; 5889236143Smm ztest_shared = (void *)&buf[offset]; 5890236143Smm offset += hdr->zh_size; 5891236143Smm ztest_shared_callstate = (void *)&buf[offset]; 5892236143Smm offset += hdr->zh_stats_size * hdr->zh_stats_count; 5893236143Smm ztest_shared_ds = (void *)&buf[offset]; 5894236143Smm} 5895236143Smm 5896236143Smmstatic boolean_t 5897236143Smmexec_child(char *cmd, char *libpath, boolean_t ignorekill, int *statusp) 5898236143Smm{ 5899236143Smm pid_t pid; 5900236143Smm int status; 5901242845Sdelphij char *cmdbuf = NULL; 5902236143Smm 5903236143Smm pid = fork(); 5904236143Smm 5905236143Smm if (cmd == NULL) { 5906242845Sdelphij cmdbuf = umem_alloc(MAXPATHLEN, UMEM_NOFAIL); 5907242845Sdelphij (void) strlcpy(cmdbuf, getexecname(), MAXPATHLEN); 5908236143Smm cmd = cmdbuf; 5909236143Smm } 5910236143Smm 5911236143Smm if (pid == -1) 5912236143Smm fatal(1, "fork failed"); 5913236143Smm 5914236143Smm if (pid == 0) { /* child */ 5915236143Smm char *emptyargv[2] = { cmd, NULL }; 5916242845Sdelphij char fd_data_str[12]; 5917236143Smm 5918236143Smm struct rlimit rl = { 1024, 1024 }; 5919236143Smm (void) setrlimit(RLIMIT_NOFILE, &rl); 5920242845Sdelphij 5921242845Sdelphij (void) close(ztest_fd_rand); 5922242845Sdelphij VERIFY3U(11, >=, 5923242845Sdelphij snprintf(fd_data_str, 12, "%d", ztest_fd_data)); 5924242845Sdelphij VERIFY0(setenv("ZTEST_FD_DATA", fd_data_str, 1)); 5925242845Sdelphij 5926236143Smm (void) enable_extended_FILE_stdio(-1, -1); 5927236143Smm if (libpath != NULL) 5928236143Smm VERIFY(0 == setenv("LD_LIBRARY_PATH", libpath, 1)); 5929236143Smm#ifdef illumos 5930236143Smm (void) execv(cmd, emptyargv); 5931236143Smm#else 5932236143Smm (void) execvp(cmd, emptyargv); 5933236143Smm#endif 5934236143Smm ztest_dump_core = B_FALSE; 5935236143Smm fatal(B_TRUE, "exec failed: %s", cmd); 5936236143Smm } 5937236143Smm 5938242845Sdelphij if (cmdbuf != NULL) { 5939242845Sdelphij umem_free(cmdbuf, MAXPATHLEN); 5940242845Sdelphij cmd = NULL; 5941242845Sdelphij } 5942242845Sdelphij 5943236143Smm while (waitpid(pid, &status, 0) != pid) 5944236143Smm continue; 5945236143Smm if (statusp != NULL) 5946236143Smm *statusp = status; 5947236143Smm 5948236143Smm if (WIFEXITED(status)) { 5949236143Smm if (WEXITSTATUS(status) != 0) { 5950236143Smm (void) fprintf(stderr, "child exited with code %d\n", 5951236143Smm WEXITSTATUS(status)); 5952236143Smm exit(2); 5953236143Smm } 5954236143Smm return (B_FALSE); 5955236143Smm } else if (WIFSIGNALED(status)) { 5956236143Smm if (!ignorekill || WTERMSIG(status) != SIGKILL) { 5957236143Smm (void) fprintf(stderr, "child died with signal %d\n", 5958236143Smm WTERMSIG(status)); 5959236143Smm exit(3); 5960236143Smm } 5961236143Smm return (B_TRUE); 5962236143Smm } else { 5963236143Smm (void) fprintf(stderr, "something strange happened to child\n"); 5964236143Smm exit(4); 5965236143Smm /* NOTREACHED */ 5966236143Smm } 5967236143Smm} 5968236143Smm 5969236143Smmstatic void 5970236143Smmztest_run_init(void) 5971236143Smm{ 5972236143Smm ztest_shared_t *zs = ztest_shared; 5973236143Smm 5974236143Smm ASSERT(ztest_opts.zo_init != 0); 5975236143Smm 5976236143Smm /* 5977236143Smm * Blow away any existing copy of zpool.cache 5978236143Smm */ 5979236143Smm (void) remove(spa_config_path); 5980236143Smm 5981236143Smm /* 5982236143Smm * Create and initialize our storage pool. 5983236143Smm */ 5984236143Smm for (int i = 1; i <= ztest_opts.zo_init; i++) { 5985236143Smm bzero(zs, sizeof (ztest_shared_t)); 5986236143Smm if (ztest_opts.zo_verbose >= 3 && 5987236143Smm ztest_opts.zo_init != 1) { 5988236143Smm (void) printf("ztest_init(), pass %d\n", i); 5989236143Smm } 5990236143Smm ztest_init(zs); 5991236143Smm } 5992236143Smm} 5993236143Smm 5994168404Spjdint 5995168404Spjdmain(int argc, char **argv) 5996168404Spjd{ 5997168404Spjd int kills = 0; 5998168404Spjd int iters = 0; 5999236143Smm int older = 0; 6000236143Smm int newer = 0; 6001168404Spjd ztest_shared_t *zs; 6002168404Spjd ztest_info_t *zi; 6003236143Smm ztest_shared_callstate_t *zc; 6004168404Spjd char timebuf[100]; 6005168404Spjd char numbuf[6]; 6006219089Spjd spa_t *spa; 6007242845Sdelphij char *cmd; 6008236143Smm boolean_t hasalt; 6009242845Sdelphij char *fd_data_str = getenv("ZTEST_FD_DATA"); 6010168404Spjd 6011168404Spjd (void) setvbuf(stdout, NULL, _IOLBF, 0); 6012168404Spjd 6013240133Smm dprintf_setup(&argc, argv); 6014240133Smm 6015242845Sdelphij ztest_fd_rand = open("/dev/urandom", O_RDONLY); 6016242845Sdelphij ASSERT3S(ztest_fd_rand, >=, 0); 6017242845Sdelphij 6018242845Sdelphij if (!fd_data_str) { 6019236143Smm process_options(argc, argv); 6020168404Spjd 6021242845Sdelphij setup_data_fd(); 6022236143Smm setup_hdr(); 6023236143Smm setup_data(); 6024236143Smm bcopy(&ztest_opts, ztest_shared_opts, 6025236143Smm sizeof (*ztest_shared_opts)); 6026236143Smm } else { 6027242845Sdelphij ztest_fd_data = atoi(fd_data_str); 6028236143Smm setup_data(); 6029236143Smm bcopy(ztest_shared_opts, &ztest_opts, sizeof (ztest_opts)); 6030236143Smm } 6031236143Smm ASSERT3U(ztest_opts.zo_datasets, ==, ztest_shared_hdr->zh_ds_count); 6032168404Spjd 6033219089Spjd /* Override location of zpool.cache */ 6034242845Sdelphij VERIFY3U(asprintf((char **)&spa_config_path, "%s/zpool.cache", 6035242845Sdelphij ztest_opts.zo_dir), !=, -1); 6036219089Spjd 6037236143Smm ztest_ds = umem_alloc(ztest_opts.zo_datasets * sizeof (ztest_ds_t), 6038236143Smm UMEM_NOFAIL); 6039236143Smm zs = ztest_shared; 6040168404Spjd 6041242845Sdelphij if (fd_data_str) { 6042236143Smm metaslab_gang_bang = ztest_opts.zo_metaslab_gang_bang; 6043236143Smm metaslab_df_alloc_threshold = 6044236143Smm zs->zs_metaslab_df_alloc_threshold; 6045219089Spjd 6046236143Smm if (zs->zs_do_init) 6047236143Smm ztest_run_init(); 6048236143Smm else 6049236143Smm ztest_run(zs); 6050236143Smm exit(0); 6051236143Smm } 6052168404Spjd 6053236143Smm hasalt = (strlen(ztest_opts.zo_alt_ztest) != 0); 6054236143Smm 6055236143Smm if (ztest_opts.zo_verbose >= 1) { 6056168404Spjd (void) printf("%llu vdevs, %d datasets, %d threads," 6057168404Spjd " %llu seconds...\n", 6058236143Smm (u_longlong_t)ztest_opts.zo_vdevs, 6059236143Smm ztest_opts.zo_datasets, 6060236143Smm ztest_opts.zo_threads, 6061236143Smm (u_longlong_t)ztest_opts.zo_time); 6062168404Spjd } 6063168404Spjd 6064242845Sdelphij cmd = umem_alloc(MAXNAMELEN, UMEM_NOFAIL); 6065242845Sdelphij (void) strlcpy(cmd, getexecname(), MAXNAMELEN); 6066236143Smm 6067236143Smm zs->zs_do_init = B_TRUE; 6068236143Smm if (strlen(ztest_opts.zo_alt_ztest) != 0) { 6069236143Smm if (ztest_opts.zo_verbose >= 1) { 6070236143Smm (void) printf("Executing older ztest for " 6071236143Smm "initialization: %s\n", ztest_opts.zo_alt_ztest); 6072236143Smm } 6073236143Smm VERIFY(!exec_child(ztest_opts.zo_alt_ztest, 6074236143Smm ztest_opts.zo_alt_libpath, B_FALSE, NULL)); 6075236143Smm } else { 6076236143Smm VERIFY(!exec_child(NULL, NULL, B_FALSE, NULL)); 6077168404Spjd } 6078236143Smm zs->zs_do_init = B_FALSE; 6079168404Spjd 6080219089Spjd zs->zs_proc_start = gethrtime(); 6081236143Smm zs->zs_proc_stop = zs->zs_proc_start + ztest_opts.zo_time * NANOSEC; 6082219089Spjd 6083219089Spjd for (int f = 0; f < ZTEST_FUNCS; f++) { 6084236143Smm zi = &ztest_info[f]; 6085236143Smm zc = ZTEST_GET_SHARED_CALLSTATE(f); 6086219089Spjd if (zs->zs_proc_start + zi->zi_interval[0] > zs->zs_proc_stop) 6087236143Smm zc->zc_next = UINT64_MAX; 6088168404Spjd else 6089236143Smm zc->zc_next = zs->zs_proc_start + 6090219089Spjd ztest_random(2 * zi->zi_interval[0] + 1); 6091168404Spjd } 6092168404Spjd 6093168404Spjd /* 6094168404Spjd * Run the tests in a loop. These tests include fault injection 6095168404Spjd * to verify that self-healing data works, and forced crashes 6096168404Spjd * to verify that we never lose on-disk consistency. 6097168404Spjd */ 6098219089Spjd while (gethrtime() < zs->zs_proc_stop) { 6099168404Spjd int status; 6100236143Smm boolean_t killed; 6101168404Spjd 6102168404Spjd /* 6103168404Spjd * Initialize the workload counters for each function. 6104168404Spjd */ 6105219089Spjd for (int f = 0; f < ZTEST_FUNCS; f++) { 6106236143Smm zc = ZTEST_GET_SHARED_CALLSTATE(f); 6107236143Smm zc->zc_count = 0; 6108236143Smm zc->zc_time = 0; 6109168404Spjd } 6110168404Spjd 6111209962Smm /* Set the allocation switch size */ 6112236143Smm zs->zs_metaslab_df_alloc_threshold = 6113236143Smm ztest_random(zs->zs_metaslab_sz / 4) + 1; 6114209962Smm 6115236143Smm if (!hasalt || ztest_random(2) == 0) { 6116236143Smm if (hasalt && ztest_opts.zo_verbose >= 1) { 6117236143Smm (void) printf("Executing newer ztest: %s\n", 6118236143Smm cmd); 6119168404Spjd } 6120236143Smm newer++; 6121236143Smm killed = exec_child(cmd, NULL, B_TRUE, &status); 6122236143Smm } else { 6123236143Smm if (hasalt && ztest_opts.zo_verbose >= 1) { 6124236143Smm (void) printf("Executing older ztest: %s\n", 6125236143Smm ztest_opts.zo_alt_ztest); 6126168404Spjd } 6127236143Smm older++; 6128236143Smm killed = exec_child(ztest_opts.zo_alt_ztest, 6129236143Smm ztest_opts.zo_alt_libpath, B_TRUE, &status); 6130168404Spjd } 6131168404Spjd 6132236143Smm if (killed) 6133236143Smm kills++; 6134168404Spjd iters++; 6135168404Spjd 6136236143Smm if (ztest_opts.zo_verbose >= 1) { 6137168404Spjd hrtime_t now = gethrtime(); 6138168404Spjd 6139219089Spjd now = MIN(now, zs->zs_proc_stop); 6140219089Spjd print_time(zs->zs_proc_stop - now, timebuf); 6141168404Spjd nicenum(zs->zs_space, numbuf); 6142168404Spjd 6143168404Spjd (void) printf("Pass %3d, %8s, %3llu ENOSPC, " 6144168404Spjd "%4.1f%% of %5s used, %3.0f%% done, %8s to go\n", 6145168404Spjd iters, 6146168404Spjd WIFEXITED(status) ? "Complete" : "SIGKILL", 6147168404Spjd (u_longlong_t)zs->zs_enospc_count, 6148168404Spjd 100.0 * zs->zs_alloc / zs->zs_space, 6149168404Spjd numbuf, 6150219089Spjd 100.0 * (now - zs->zs_proc_start) / 6151236143Smm (ztest_opts.zo_time * NANOSEC), timebuf); 6152168404Spjd } 6153168404Spjd 6154236143Smm if (ztest_opts.zo_verbose >= 2) { 6155168404Spjd (void) printf("\nWorkload summary:\n\n"); 6156168404Spjd (void) printf("%7s %9s %s\n", 6157168404Spjd "Calls", "Time", "Function"); 6158168404Spjd (void) printf("%7s %9s %s\n", 6159168404Spjd "-----", "----", "--------"); 6160219089Spjd for (int f = 0; f < ZTEST_FUNCS; f++) { 6161168404Spjd Dl_info dli; 6162168404Spjd 6163236143Smm zi = &ztest_info[f]; 6164236143Smm zc = ZTEST_GET_SHARED_CALLSTATE(f); 6165236143Smm print_time(zc->zc_time, timebuf); 6166168404Spjd (void) dladdr((void *)zi->zi_func, &dli); 6167168404Spjd (void) printf("%7llu %9s %s\n", 6168236143Smm (u_longlong_t)zc->zc_count, timebuf, 6169168404Spjd dli.dli_sname); 6170168404Spjd } 6171168404Spjd (void) printf("\n"); 6172168404Spjd } 6173168404Spjd 6174168404Spjd /* 6175219089Spjd * It's possible that we killed a child during a rename test, 6176219089Spjd * in which case we'll have a 'ztest_tmp' pool lying around 6177219089Spjd * instead of 'ztest'. Do a blind rename in case this happened. 6178168404Spjd */ 6179219089Spjd kernel_init(FREAD); 6180236143Smm if (spa_open(ztest_opts.zo_pool, &spa, FTAG) == 0) { 6181219089Spjd spa_close(spa, FTAG); 6182219089Spjd } else { 6183219089Spjd char tmpname[MAXNAMELEN]; 6184219089Spjd kernel_fini(); 6185219089Spjd kernel_init(FREAD | FWRITE); 6186219089Spjd (void) snprintf(tmpname, sizeof (tmpname), "%s_tmp", 6187236143Smm ztest_opts.zo_pool); 6188236143Smm (void) spa_rename(tmpname, ztest_opts.zo_pool); 6189219089Spjd } 6190168404Spjd kernel_fini(); 6191219089Spjd 6192236143Smm ztest_run_zdb(ztest_opts.zo_pool); 6193168404Spjd } 6194168404Spjd 6195236143Smm if (ztest_opts.zo_verbose >= 1) { 6196236143Smm if (hasalt) { 6197236143Smm (void) printf("%d runs of older ztest: %s\n", older, 6198236143Smm ztest_opts.zo_alt_ztest); 6199236143Smm (void) printf("%d runs of newer ztest: %s\n", newer, 6200236143Smm cmd); 6201236143Smm } 6202168404Spjd (void) printf("%d killed, %d completed, %.0f%% kill rate\n", 6203168404Spjd kills, iters - kills, (100.0 * kills) / MAX(1, iters)); 6204168404Spjd } 6205168404Spjd 6206242845Sdelphij umem_free(cmd, MAXNAMELEN); 6207242845Sdelphij 6208168404Spjd return (0); 6209168404Spjd} 6210