1168404Spjd/* 2168404Spjd * CDDL HEADER START 3168404Spjd * 4168404Spjd * The contents of this file are subject to the terms of the 5168404Spjd * Common Development and Distribution License (the "License"). 6168404Spjd * You may not use this file except in compliance with the License. 7168404Spjd * 8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9168404Spjd * or http://www.opensolaris.org/os/licensing. 10168404Spjd * See the License for the specific language governing permissions 11168404Spjd * and limitations under the License. 12168404Spjd * 13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each 14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15168404Spjd * If applicable, add the following below this CDDL HEADER, with the 16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18168404Spjd * 19168404Spjd * CDDL HEADER END 20168404Spjd */ 21168404Spjd/* 22219089Spjd * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23284304Savg * Copyright (c) 2011, 2015 by Delphix. All rights reserved. 24228103Smm * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 25236143Smm * Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org>. All rights reserved. 26251646Sdelphij * Copyright (c) 2013 Steven Hartland. All rights reserved. 27296519Smav * Copyright (c) 2014 Integros [integros.com] 28168404Spjd */ 29168404Spjd 30168404Spjd/* 31168404Spjd * The objective of this program is to provide a DMU/ZAP/SPA stress test 32168404Spjd * that runs entirely in userland, is easy to use, and easy to extend. 33168404Spjd * 34168404Spjd * The overall design of the ztest program is as follows: 35168404Spjd * 36168404Spjd * (1) For each major functional area (e.g. adding vdevs to a pool, 37168404Spjd * creating and destroying datasets, reading and writing objects, etc) 38168404Spjd * we have a simple routine to test that functionality. These 39168404Spjd * individual routines do not have to do anything "stressful". 40168404Spjd * 41168404Spjd * (2) We turn these simple functionality tests into a stress test by 42168404Spjd * running them all in parallel, with as many threads as desired, 43168404Spjd * and spread across as many datasets, objects, and vdevs as desired. 44168404Spjd * 45168404Spjd * (3) While all this is happening, we inject faults into the pool to 46168404Spjd * verify that self-healing data really works. 47168404Spjd * 48168404Spjd * (4) Every time we open a dataset, we change its checksum and compression 49168404Spjd * functions. Thus even individual objects vary from block to block 50168404Spjd * in which checksum they use and whether they're compressed. 51168404Spjd * 52168404Spjd * (5) To verify that we never lose on-disk consistency after a crash, 53168404Spjd * we run the entire test in a child of the main process. 54168404Spjd * At random times, the child self-immolates with a SIGKILL. 55168404Spjd * This is the software equivalent of pulling the power cord. 56168404Spjd * The parent then runs the test again, using the existing 57268075Sdelphij * storage pool, as many times as desired. If backwards compatibility 58236143Smm * testing is enabled ztest will sometimes run the "older" version 59236143Smm * of ztest after a SIGKILL. 60168404Spjd * 61168404Spjd * (6) To verify that we don't have future leaks or temporal incursions, 62168404Spjd * many of the functional tests record the transaction group number 63168404Spjd * as part of their data. When reading old data, they verify that 64168404Spjd * the transaction group number is less than the current, open txg. 65168404Spjd * If you add a new test, please do this if applicable. 66168404Spjd * 67168404Spjd * When run with no arguments, ztest runs for about five minutes and 68168404Spjd * produces no output if successful. To get a little bit of information, 69168404Spjd * specify -V. To get more information, specify -VV, and so on. 70168404Spjd * 71168404Spjd * To turn this into an overnight stress test, use -T to specify run time. 72168404Spjd * 73168404Spjd * You can ask more more vdevs [-v], datasets [-d], or threads [-t] 74168404Spjd * to increase the pool capacity, fanout, and overall stress level. 75168404Spjd * 76236143Smm * Use the -k option to set the desired frequency of kills. 77236143Smm * 78236143Smm * When ztest invokes itself it passes all relevant information through a 79236143Smm * temporary file which is mmap-ed in the child process. This allows shared 80236143Smm * memory to survive the exec syscall. The ztest_shared_hdr_t struct is always 81236143Smm * stored at offset 0 of this file and contains information on the size and 82236143Smm * number of shared structures in the file. The information stored in this file 83236143Smm * must remain backwards compatible with older versions of ztest so that 84236143Smm * ztest can invoke them during backwards compatibility testing (-B). 85168404Spjd */ 86168404Spjd 87168404Spjd#include <sys/zfs_context.h> 88168404Spjd#include <sys/spa.h> 89168404Spjd#include <sys/dmu.h> 90168404Spjd#include <sys/txg.h> 91209962Smm#include <sys/dbuf.h> 92168404Spjd#include <sys/zap.h> 93168404Spjd#include <sys/dmu_objset.h> 94168404Spjd#include <sys/poll.h> 95168404Spjd#include <sys/stat.h> 96168404Spjd#include <sys/time.h> 97168404Spjd#include <sys/wait.h> 98168404Spjd#include <sys/mman.h> 99168404Spjd#include <sys/resource.h> 100168404Spjd#include <sys/zio.h> 101168404Spjd#include <sys/zil.h> 102219089Spjd#include <sys/zil_impl.h> 103168404Spjd#include <sys/vdev_impl.h> 104185029Spjd#include <sys/vdev_file.h> 105168404Spjd#include <sys/spa_impl.h> 106219089Spjd#include <sys/metaslab_impl.h> 107168404Spjd#include <sys/dsl_prop.h> 108207910Smm#include <sys/dsl_dataset.h> 109248571Smm#include <sys/dsl_destroy.h> 110219089Spjd#include <sys/dsl_scan.h> 111219089Spjd#include <sys/zio_checksum.h> 112168404Spjd#include <sys/refcount.h> 113236884Smm#include <sys/zfeature.h> 114248571Smm#include <sys/dsl_userhold.h> 115168404Spjd#include <stdio.h> 116168404Spjd#include <stdio_ext.h> 117168404Spjd#include <stdlib.h> 118168404Spjd#include <unistd.h> 119168404Spjd#include <signal.h> 120168404Spjd#include <umem.h> 121168404Spjd#include <dlfcn.h> 122168404Spjd#include <ctype.h> 123168404Spjd#include <math.h> 124168404Spjd#include <errno.h> 125168404Spjd#include <sys/fs/zfs.h> 126219089Spjd#include <libnvpair.h> 127168404Spjd 128242845Sdelphijstatic int ztest_fd_data = -1; 129242845Sdelphijstatic int ztest_fd_rand = -1; 130168404Spjd 131236143Smmtypedef struct ztest_shared_hdr { 132236143Smm uint64_t zh_hdr_size; 133236143Smm uint64_t zh_opts_size; 134236143Smm uint64_t zh_size; 135236143Smm uint64_t zh_stats_size; 136236143Smm uint64_t zh_stats_count; 137236143Smm uint64_t zh_ds_size; 138236143Smm uint64_t zh_ds_count; 139236143Smm} ztest_shared_hdr_t; 140168404Spjd 141236143Smmstatic ztest_shared_hdr_t *ztest_shared_hdr; 142236143Smm 143236143Smmtypedef struct ztest_shared_opts { 144236143Smm char zo_pool[MAXNAMELEN]; 145236143Smm char zo_dir[MAXNAMELEN]; 146236143Smm char zo_alt_ztest[MAXNAMELEN]; 147236143Smm char zo_alt_libpath[MAXNAMELEN]; 148236143Smm uint64_t zo_vdevs; 149236143Smm uint64_t zo_vdevtime; 150236143Smm size_t zo_vdev_size; 151236143Smm int zo_ashift; 152236143Smm int zo_mirrors; 153236143Smm int zo_raidz; 154236143Smm int zo_raidz_parity; 155236143Smm int zo_datasets; 156236143Smm int zo_threads; 157236143Smm uint64_t zo_passtime; 158236143Smm uint64_t zo_killrate; 159236143Smm int zo_verbose; 160236143Smm int zo_init; 161236143Smm uint64_t zo_time; 162236143Smm uint64_t zo_maxloops; 163236143Smm uint64_t zo_metaslab_gang_bang; 164236143Smm} ztest_shared_opts_t; 165236143Smm 166236143Smmstatic const ztest_shared_opts_t ztest_opts_defaults = { 167236143Smm .zo_pool = { 'z', 't', 'e', 's', 't', '\0' }, 168236143Smm .zo_dir = { '/', 't', 'm', 'p', '\0' }, 169236143Smm .zo_alt_ztest = { '\0' }, 170236143Smm .zo_alt_libpath = { '\0' }, 171236143Smm .zo_vdevs = 5, 172236143Smm .zo_ashift = SPA_MINBLOCKSHIFT, 173236143Smm .zo_mirrors = 2, 174236143Smm .zo_raidz = 4, 175236143Smm .zo_raidz_parity = 1, 176269430Sdelphij .zo_vdev_size = SPA_MINDEVSIZE * 2, 177236143Smm .zo_datasets = 7, 178236143Smm .zo_threads = 23, 179236143Smm .zo_passtime = 60, /* 60 seconds */ 180236143Smm .zo_killrate = 70, /* 70% kill rate */ 181236143Smm .zo_verbose = 0, 182236143Smm .zo_init = 1, 183236143Smm .zo_time = 300, /* 5 minutes */ 184236143Smm .zo_maxloops = 50, /* max loops during spa_freeze() */ 185236143Smm .zo_metaslab_gang_bang = 32 << 10 186236143Smm}; 187236143Smm 188236143Smmextern uint64_t metaslab_gang_bang; 189236143Smmextern uint64_t metaslab_df_alloc_threshold; 190258632Savgextern uint64_t zfs_deadman_synctime_ms; 191268086Sdelphijextern int metaslab_preload_limit; 192236143Smm 193236143Smmstatic ztest_shared_opts_t *ztest_shared_opts; 194236143Smmstatic ztest_shared_opts_t ztest_opts; 195236143Smm 196236143Smmtypedef struct ztest_shared_ds { 197236143Smm uint64_t zd_seq; 198236143Smm} ztest_shared_ds_t; 199236143Smm 200236143Smmstatic ztest_shared_ds_t *ztest_shared_ds; 201236143Smm#define ZTEST_GET_SHARED_DS(d) (&ztest_shared_ds[d]) 202236143Smm 203219089Spjd#define BT_MAGIC 0x123456789abcdefULL 204236143Smm#define MAXFAULTS() \ 205236143Smm (MAX(zs->zs_mirrors, 1) * (ztest_opts.zo_raidz_parity + 1) - 1) 206219089Spjd 207219089Spjdenum ztest_io_type { 208219089Spjd ZTEST_IO_WRITE_TAG, 209219089Spjd ZTEST_IO_WRITE_PATTERN, 210219089Spjd ZTEST_IO_WRITE_ZEROES, 211219089Spjd ZTEST_IO_TRUNCATE, 212219089Spjd ZTEST_IO_SETATTR, 213243524Smm ZTEST_IO_REWRITE, 214219089Spjd ZTEST_IO_TYPES 215219089Spjd}; 216219089Spjd 217185029Spjdtypedef struct ztest_block_tag { 218219089Spjd uint64_t bt_magic; 219185029Spjd uint64_t bt_objset; 220185029Spjd uint64_t bt_object; 221185029Spjd uint64_t bt_offset; 222219089Spjd uint64_t bt_gen; 223185029Spjd uint64_t bt_txg; 224219089Spjd uint64_t bt_crtxg; 225185029Spjd} ztest_block_tag_t; 226185029Spjd 227219089Spjdtypedef struct bufwad { 228219089Spjd uint64_t bw_index; 229219089Spjd uint64_t bw_txg; 230219089Spjd uint64_t bw_data; 231219089Spjd} bufwad_t; 232168404Spjd 233219089Spjd/* 234219089Spjd * XXX -- fix zfs range locks to be generic so we can use them here. 235219089Spjd */ 236219089Spjdtypedef enum { 237219089Spjd RL_READER, 238219089Spjd RL_WRITER, 239219089Spjd RL_APPEND 240219089Spjd} rl_type_t; 241168404Spjd 242219089Spjdtypedef struct rll { 243219089Spjd void *rll_writer; 244219089Spjd int rll_readers; 245219089Spjd mutex_t rll_lock; 246219089Spjd cond_t rll_cv; 247219089Spjd} rll_t; 248219089Spjd 249219089Spjdtypedef struct rl { 250219089Spjd uint64_t rl_object; 251219089Spjd uint64_t rl_offset; 252219089Spjd uint64_t rl_size; 253219089Spjd rll_t *rl_lock; 254219089Spjd} rl_t; 255219089Spjd 256219089Spjd#define ZTEST_RANGE_LOCKS 64 257219089Spjd#define ZTEST_OBJECT_LOCKS 64 258219089Spjd 259168404Spjd/* 260219089Spjd * Object descriptor. Used as a template for object lookup/create/remove. 261219089Spjd */ 262219089Spjdtypedef struct ztest_od { 263219089Spjd uint64_t od_dir; 264219089Spjd uint64_t od_object; 265219089Spjd dmu_object_type_t od_type; 266219089Spjd dmu_object_type_t od_crtype; 267219089Spjd uint64_t od_blocksize; 268219089Spjd uint64_t od_crblocksize; 269219089Spjd uint64_t od_gen; 270219089Spjd uint64_t od_crgen; 271219089Spjd char od_name[MAXNAMELEN]; 272219089Spjd} ztest_od_t; 273219089Spjd 274219089Spjd/* 275219089Spjd * Per-dataset state. 276219089Spjd */ 277219089Spjdtypedef struct ztest_ds { 278236143Smm ztest_shared_ds_t *zd_shared; 279219089Spjd objset_t *zd_os; 280224526Smm rwlock_t zd_zilog_lock; 281219089Spjd zilog_t *zd_zilog; 282219089Spjd ztest_od_t *zd_od; /* debugging aid */ 283219089Spjd char zd_name[MAXNAMELEN]; 284219089Spjd mutex_t zd_dirobj_lock; 285219089Spjd rll_t zd_object_lock[ZTEST_OBJECT_LOCKS]; 286219089Spjd rll_t zd_range_lock[ZTEST_RANGE_LOCKS]; 287219089Spjd} ztest_ds_t; 288219089Spjd 289219089Spjd/* 290219089Spjd * Per-iteration state. 291219089Spjd */ 292219089Spjdtypedef void ztest_func_t(ztest_ds_t *zd, uint64_t id); 293219089Spjd 294219089Spjdtypedef struct ztest_info { 295219089Spjd ztest_func_t *zi_func; /* test function */ 296219089Spjd uint64_t zi_iters; /* iterations per execution */ 297219089Spjd uint64_t *zi_interval; /* execute every <interval> seconds */ 298219089Spjd} ztest_info_t; 299219089Spjd 300236143Smmtypedef struct ztest_shared_callstate { 301236143Smm uint64_t zc_count; /* per-pass count */ 302236143Smm uint64_t zc_time; /* per-pass time */ 303236143Smm uint64_t zc_next; /* next time to call this function */ 304236143Smm} ztest_shared_callstate_t; 305236143Smm 306236143Smmstatic ztest_shared_callstate_t *ztest_shared_callstate; 307236143Smm#define ZTEST_GET_SHARED_CALLSTATE(c) (&ztest_shared_callstate[c]) 308236143Smm 309219089Spjd/* 310168404Spjd * Note: these aren't static because we want dladdr() to work. 311168404Spjd */ 312168404Spjdztest_func_t ztest_dmu_read_write; 313168404Spjdztest_func_t ztest_dmu_write_parallel; 314168404Spjdztest_func_t ztest_dmu_object_alloc_free; 315219089Spjdztest_func_t ztest_dmu_commit_callbacks; 316168404Spjdztest_func_t ztest_zap; 317168404Spjdztest_func_t ztest_zap_parallel; 318219089Spjdztest_func_t ztest_zil_commit; 319224526Smmztest_func_t ztest_zil_remount; 320219089Spjdztest_func_t ztest_dmu_read_write_zcopy; 321168404Spjdztest_func_t ztest_dmu_objset_create_destroy; 322219089Spjdztest_func_t ztest_dmu_prealloc; 323219089Spjdztest_func_t ztest_fzap; 324168404Spjdztest_func_t ztest_dmu_snapshot_create_destroy; 325219089Spjdztest_func_t ztest_dsl_prop_get_set; 326219089Spjdztest_func_t ztest_spa_prop_get_set; 327168404Spjdztest_func_t ztest_spa_create_destroy; 328168404Spjdztest_func_t ztest_fault_inject; 329219089Spjdztest_func_t ztest_ddt_repair; 330219089Spjdztest_func_t ztest_dmu_snapshot_hold; 331185029Spjdztest_func_t ztest_spa_rename; 332219089Spjdztest_func_t ztest_scrub; 333219089Spjdztest_func_t ztest_dsl_dataset_promote_busy; 334168404Spjdztest_func_t ztest_vdev_attach_detach; 335168404Spjdztest_func_t ztest_vdev_LUN_growth; 336168404Spjdztest_func_t ztest_vdev_add_remove; 337185029Spjdztest_func_t ztest_vdev_aux_add_remove; 338219089Spjdztest_func_t ztest_split_pool; 339228103Smmztest_func_t ztest_reguid; 340243505Smmztest_func_t ztest_spa_upgrade; 341168404Spjd 342219089Spjduint64_t zopt_always = 0ULL * NANOSEC; /* all the time */ 343219089Spjduint64_t zopt_incessant = 1ULL * NANOSEC / 10; /* every 1/10 second */ 344219089Spjduint64_t zopt_often = 1ULL * NANOSEC; /* every second */ 345219089Spjduint64_t zopt_sometimes = 10ULL * NANOSEC; /* every 10 seconds */ 346219089Spjduint64_t zopt_rarely = 60ULL * NANOSEC; /* every 60 seconds */ 347168404Spjd 348168404Spjdztest_info_t ztest_info[] = { 349185029Spjd { ztest_dmu_read_write, 1, &zopt_always }, 350219089Spjd { ztest_dmu_write_parallel, 10, &zopt_always }, 351185029Spjd { ztest_dmu_object_alloc_free, 1, &zopt_always }, 352219089Spjd { ztest_dmu_commit_callbacks, 1, &zopt_always }, 353185029Spjd { ztest_zap, 30, &zopt_always }, 354185029Spjd { ztest_zap_parallel, 100, &zopt_always }, 355219089Spjd { ztest_split_pool, 1, &zopt_always }, 356219089Spjd { ztest_zil_commit, 1, &zopt_incessant }, 357224526Smm { ztest_zil_remount, 1, &zopt_sometimes }, 358219089Spjd { ztest_dmu_read_write_zcopy, 1, &zopt_often }, 359219089Spjd { ztest_dmu_objset_create_destroy, 1, &zopt_often }, 360219089Spjd { ztest_dsl_prop_get_set, 1, &zopt_often }, 361219089Spjd { ztest_spa_prop_get_set, 1, &zopt_sometimes }, 362219089Spjd#if 0 363219089Spjd { ztest_dmu_prealloc, 1, &zopt_sometimes }, 364219089Spjd#endif 365219089Spjd { ztest_fzap, 1, &zopt_sometimes }, 366219089Spjd { ztest_dmu_snapshot_create_destroy, 1, &zopt_sometimes }, 367219089Spjd { ztest_spa_create_destroy, 1, &zopt_sometimes }, 368185029Spjd { ztest_fault_inject, 1, &zopt_sometimes }, 369219089Spjd { ztest_ddt_repair, 1, &zopt_sometimes }, 370219089Spjd { ztest_dmu_snapshot_hold, 1, &zopt_sometimes }, 371254074Sdelphij { ztest_reguid, 1, &zopt_rarely }, 372185029Spjd { ztest_spa_rename, 1, &zopt_rarely }, 373219089Spjd { ztest_scrub, 1, &zopt_rarely }, 374243505Smm { ztest_spa_upgrade, 1, &zopt_rarely }, 375219089Spjd { ztest_dsl_dataset_promote_busy, 1, &zopt_rarely }, 376248571Smm { ztest_vdev_attach_detach, 1, &zopt_sometimes }, 377185029Spjd { ztest_vdev_LUN_growth, 1, &zopt_rarely }, 378236143Smm { ztest_vdev_add_remove, 1, 379236143Smm &ztest_opts.zo_vdevtime }, 380236143Smm { ztest_vdev_aux_add_remove, 1, 381236143Smm &ztest_opts.zo_vdevtime }, 382168404Spjd}; 383168404Spjd 384168404Spjd#define ZTEST_FUNCS (sizeof (ztest_info) / sizeof (ztest_info_t)) 385168404Spjd 386219089Spjd/* 387219089Spjd * The following struct is used to hold a list of uncalled commit callbacks. 388219089Spjd * The callbacks are ordered by txg number. 389219089Spjd */ 390219089Spjdtypedef struct ztest_cb_list { 391219089Spjd mutex_t zcl_callbacks_lock; 392219089Spjd list_t zcl_callbacks; 393219089Spjd} ztest_cb_list_t; 394168404Spjd 395168404Spjd/* 396168404Spjd * Stuff we need to share writably between parent and child. 397168404Spjd */ 398168404Spjdtypedef struct ztest_shared { 399236143Smm boolean_t zs_do_init; 400219089Spjd hrtime_t zs_proc_start; 401219089Spjd hrtime_t zs_proc_stop; 402219089Spjd hrtime_t zs_thread_start; 403219089Spjd hrtime_t zs_thread_stop; 404219089Spjd hrtime_t zs_thread_kill; 405219089Spjd uint64_t zs_enospc_count; 406219089Spjd uint64_t zs_vdev_next_leaf; 407185029Spjd uint64_t zs_vdev_aux; 408168404Spjd uint64_t zs_alloc; 409168404Spjd uint64_t zs_space; 410219089Spjd uint64_t zs_splits; 411219089Spjd uint64_t zs_mirrors; 412236143Smm uint64_t zs_metaslab_sz; 413236143Smm uint64_t zs_metaslab_df_alloc_threshold; 414236143Smm uint64_t zs_guid; 415168404Spjd} ztest_shared_t; 416168404Spjd 417219089Spjd#define ID_PARALLEL -1ULL 418219089Spjd 419168404Spjdstatic char ztest_dev_template[] = "%s/%s.%llua"; 420185029Spjdstatic char ztest_aux_template[] = "%s/%s.%s.%llu"; 421219089Spjdztest_shared_t *ztest_shared; 422168404Spjd 423236143Smmstatic spa_t *ztest_spa = NULL; 424236143Smmstatic ztest_ds_t *ztest_ds; 425168404Spjd 426236143Smmstatic mutex_t ztest_vdev_lock; 427239620Smm 428239620Smm/* 429239620Smm * The ztest_name_lock protects the pool and dataset namespace used by 430239620Smm * the individual tests. To modify the namespace, consumers must grab 431239620Smm * this lock as writer. Grabbing the lock as reader will ensure that the 432239620Smm * namespace does not change while the lock is held. 433239620Smm */ 434236143Smmstatic rwlock_t ztest_name_lock; 435236143Smm 436236143Smmstatic boolean_t ztest_dump_core = B_TRUE; 437185029Spjdstatic boolean_t ztest_exiting; 438168404Spjd 439219089Spjd/* Global commit callback list */ 440219089Spjdstatic ztest_cb_list_t zcl; 441219089Spjd 442219089Spjdenum ztest_object { 443219089Spjd ZTEST_META_DNODE = 0, 444219089Spjd ZTEST_DIROBJ, 445219089Spjd ZTEST_OBJECTS 446219089Spjd}; 447168404Spjd 448168676Spjdstatic void usage(boolean_t) __NORETURN; 449168498Spjd 450168404Spjd/* 451168404Spjd * These libumem hooks provide a reasonable set of defaults for the allocator's 452168404Spjd * debugging facilities. 453168404Spjd */ 454168404Spjdconst char * 455168404Spjd_umem_debug_init() 456168404Spjd{ 457168404Spjd return ("default,verbose"); /* $UMEM_DEBUG setting */ 458168404Spjd} 459168404Spjd 460168404Spjdconst char * 461168404Spjd_umem_logging_init(void) 462168404Spjd{ 463168404Spjd return ("fail,contents"); /* $UMEM_LOGGING setting */ 464168404Spjd} 465168404Spjd 466168404Spjd#define FATAL_MSG_SZ 1024 467168404Spjd 468168404Spjdchar *fatal_msg; 469168404Spjd 470168404Spjdstatic void 471168404Spjdfatal(int do_perror, char *message, ...) 472168404Spjd{ 473168404Spjd va_list args; 474168404Spjd int save_errno = errno; 475168404Spjd char buf[FATAL_MSG_SZ]; 476168404Spjd 477168404Spjd (void) fflush(stdout); 478168404Spjd 479168404Spjd va_start(args, message); 480168404Spjd (void) sprintf(buf, "ztest: "); 481168404Spjd /* LINTED */ 482168404Spjd (void) vsprintf(buf + strlen(buf), message, args); 483168404Spjd va_end(args); 484168404Spjd if (do_perror) { 485168404Spjd (void) snprintf(buf + strlen(buf), FATAL_MSG_SZ - strlen(buf), 486168404Spjd ": %s", strerror(save_errno)); 487168404Spjd } 488168404Spjd (void) fprintf(stderr, "%s\n", buf); 489168404Spjd fatal_msg = buf; /* to ease debugging */ 490168404Spjd if (ztest_dump_core) 491168404Spjd abort(); 492168404Spjd exit(3); 493168404Spjd} 494168404Spjd 495168404Spjdstatic int 496168404Spjdstr2shift(const char *buf) 497168404Spjd{ 498168404Spjd const char *ends = "BKMGTPEZ"; 499168404Spjd int i; 500168404Spjd 501168404Spjd if (buf[0] == '\0') 502168404Spjd return (0); 503168404Spjd for (i = 0; i < strlen(ends); i++) { 504168404Spjd if (toupper(buf[0]) == ends[i]) 505168404Spjd break; 506168404Spjd } 507168498Spjd if (i == strlen(ends)) { 508168498Spjd (void) fprintf(stderr, "ztest: invalid bytes suffix: %s\n", 509168498Spjd buf); 510168498Spjd usage(B_FALSE); 511168498Spjd } 512168404Spjd if (buf[1] == '\0' || (toupper(buf[1]) == 'B' && buf[2] == '\0')) { 513168404Spjd return (10*i); 514168404Spjd } 515168498Spjd (void) fprintf(stderr, "ztest: invalid bytes suffix: %s\n", buf); 516168498Spjd usage(B_FALSE); 517168498Spjd /* NOTREACHED */ 518168404Spjd} 519168404Spjd 520168404Spjdstatic uint64_t 521168404Spjdnicenumtoull(const char *buf) 522168404Spjd{ 523168404Spjd char *end; 524168404Spjd uint64_t val; 525168404Spjd 526168404Spjd val = strtoull(buf, &end, 0); 527168404Spjd if (end == buf) { 528168498Spjd (void) fprintf(stderr, "ztest: bad numeric value: %s\n", buf); 529168498Spjd usage(B_FALSE); 530168404Spjd } else if (end[0] == '.') { 531168404Spjd double fval = strtod(buf, &end); 532168404Spjd fval *= pow(2, str2shift(end)); 533168498Spjd if (fval > UINT64_MAX) { 534168498Spjd (void) fprintf(stderr, "ztest: value too large: %s\n", 535168498Spjd buf); 536168498Spjd usage(B_FALSE); 537168498Spjd } 538168404Spjd val = (uint64_t)fval; 539168404Spjd } else { 540168404Spjd int shift = str2shift(end); 541168498Spjd if (shift >= 64 || (val << shift) >> shift != val) { 542168498Spjd (void) fprintf(stderr, "ztest: value too large: %s\n", 543168498Spjd buf); 544168498Spjd usage(B_FALSE); 545168498Spjd } 546168404Spjd val <<= shift; 547168404Spjd } 548168404Spjd return (val); 549168404Spjd} 550168404Spjd 551168404Spjdstatic void 552168498Spjdusage(boolean_t requested) 553168404Spjd{ 554236143Smm const ztest_shared_opts_t *zo = &ztest_opts_defaults; 555236143Smm 556168404Spjd char nice_vdev_size[10]; 557168404Spjd char nice_gang_bang[10]; 558168498Spjd FILE *fp = requested ? stdout : stderr; 559168404Spjd 560236143Smm nicenum(zo->zo_vdev_size, nice_vdev_size); 561236143Smm nicenum(zo->zo_metaslab_gang_bang, nice_gang_bang); 562168404Spjd 563168498Spjd (void) fprintf(fp, "Usage: %s\n" 564168404Spjd "\t[-v vdevs (default: %llu)]\n" 565168404Spjd "\t[-s size_of_each_vdev (default: %s)]\n" 566219089Spjd "\t[-a alignment_shift (default: %d)] use 0 for random\n" 567168404Spjd "\t[-m mirror_copies (default: %d)]\n" 568168404Spjd "\t[-r raidz_disks (default: %d)]\n" 569168404Spjd "\t[-R raidz_parity (default: %d)]\n" 570168404Spjd "\t[-d datasets (default: %d)]\n" 571168404Spjd "\t[-t threads (default: %d)]\n" 572168404Spjd "\t[-g gang_block_threshold (default: %s)]\n" 573219089Spjd "\t[-i init_count (default: %d)] initialize pool i times\n" 574219089Spjd "\t[-k kill_percentage (default: %llu%%)]\n" 575168404Spjd "\t[-p pool_name (default: %s)]\n" 576219089Spjd "\t[-f dir (default: %s)] file directory for vdev files\n" 577219089Spjd "\t[-V] verbose (use multiple times for ever more blather)\n" 578219089Spjd "\t[-E] use existing pool instead of creating new one\n" 579219089Spjd "\t[-T time (default: %llu sec)] total run time\n" 580219089Spjd "\t[-F freezeloops (default: %llu)] max loops in spa_freeze()\n" 581219089Spjd "\t[-P passtime (default: %llu sec)] time per pass\n" 582236143Smm "\t[-B alt_ztest (default: <none>)] alternate ztest path\n" 583168498Spjd "\t[-h] (print help)\n" 584168404Spjd "", 585236143Smm zo->zo_pool, 586236143Smm (u_longlong_t)zo->zo_vdevs, /* -v */ 587185029Spjd nice_vdev_size, /* -s */ 588236143Smm zo->zo_ashift, /* -a */ 589236143Smm zo->zo_mirrors, /* -m */ 590236143Smm zo->zo_raidz, /* -r */ 591236143Smm zo->zo_raidz_parity, /* -R */ 592236143Smm zo->zo_datasets, /* -d */ 593236143Smm zo->zo_threads, /* -t */ 594185029Spjd nice_gang_bang, /* -g */ 595236143Smm zo->zo_init, /* -i */ 596236143Smm (u_longlong_t)zo->zo_killrate, /* -k */ 597236143Smm zo->zo_pool, /* -p */ 598236143Smm zo->zo_dir, /* -f */ 599236143Smm (u_longlong_t)zo->zo_time, /* -T */ 600236143Smm (u_longlong_t)zo->zo_maxloops, /* -F */ 601236143Smm (u_longlong_t)zo->zo_passtime); 602168498Spjd exit(requested ? 0 : 1); 603168404Spjd} 604168404Spjd 605168404Spjdstatic void 606168404Spjdprocess_options(int argc, char **argv) 607168404Spjd{ 608236143Smm char *path; 609236143Smm ztest_shared_opts_t *zo = &ztest_opts; 610236143Smm 611168404Spjd int opt; 612168404Spjd uint64_t value; 613236143Smm char altdir[MAXNAMELEN] = { 0 }; 614168404Spjd 615236143Smm bcopy(&ztest_opts_defaults, zo, sizeof (*zo)); 616168404Spjd 617168404Spjd while ((opt = getopt(argc, argv, 618236143Smm "v:s:a:m:r:R:d:t:g:i:k:p:f:VET:P:hF:B:")) != EOF) { 619168404Spjd value = 0; 620168404Spjd switch (opt) { 621185029Spjd case 'v': 622185029Spjd case 's': 623185029Spjd case 'a': 624185029Spjd case 'm': 625185029Spjd case 'r': 626185029Spjd case 'R': 627185029Spjd case 'd': 628185029Spjd case 't': 629185029Spjd case 'g': 630185029Spjd case 'i': 631185029Spjd case 'k': 632185029Spjd case 'T': 633185029Spjd case 'P': 634219089Spjd case 'F': 635168404Spjd value = nicenumtoull(optarg); 636168404Spjd } 637168404Spjd switch (opt) { 638185029Spjd case 'v': 639236143Smm zo->zo_vdevs = value; 640168404Spjd break; 641185029Spjd case 's': 642236143Smm zo->zo_vdev_size = MAX(SPA_MINDEVSIZE, value); 643168404Spjd break; 644185029Spjd case 'a': 645236143Smm zo->zo_ashift = value; 646168404Spjd break; 647185029Spjd case 'm': 648236143Smm zo->zo_mirrors = value; 649168404Spjd break; 650185029Spjd case 'r': 651236143Smm zo->zo_raidz = MAX(1, value); 652168404Spjd break; 653185029Spjd case 'R': 654236143Smm zo->zo_raidz_parity = MIN(MAX(value, 1), 3); 655168404Spjd break; 656185029Spjd case 'd': 657236143Smm zo->zo_datasets = MAX(1, value); 658168404Spjd break; 659185029Spjd case 't': 660236143Smm zo->zo_threads = MAX(1, value); 661168404Spjd break; 662185029Spjd case 'g': 663236143Smm zo->zo_metaslab_gang_bang = MAX(SPA_MINBLOCKSIZE << 1, 664236143Smm value); 665168404Spjd break; 666185029Spjd case 'i': 667236143Smm zo->zo_init = value; 668168404Spjd break; 669185029Spjd case 'k': 670236143Smm zo->zo_killrate = value; 671168404Spjd break; 672185029Spjd case 'p': 673236143Smm (void) strlcpy(zo->zo_pool, optarg, 674236143Smm sizeof (zo->zo_pool)); 675168404Spjd break; 676185029Spjd case 'f': 677236143Smm path = realpath(optarg, NULL); 678236143Smm if (path == NULL) { 679236143Smm (void) fprintf(stderr, "error: %s: %s\n", 680236143Smm optarg, strerror(errno)); 681236143Smm usage(B_FALSE); 682236143Smm } else { 683236143Smm (void) strlcpy(zo->zo_dir, path, 684236143Smm sizeof (zo->zo_dir)); 685236143Smm } 686168404Spjd break; 687185029Spjd case 'V': 688236143Smm zo->zo_verbose++; 689168404Spjd break; 690185029Spjd case 'E': 691236143Smm zo->zo_init = 0; 692168404Spjd break; 693185029Spjd case 'T': 694236143Smm zo->zo_time = value; 695168404Spjd break; 696185029Spjd case 'P': 697236143Smm zo->zo_passtime = MAX(1, value); 698168404Spjd break; 699219089Spjd case 'F': 700236143Smm zo->zo_maxloops = MAX(1, value); 701219089Spjd break; 702236143Smm case 'B': 703236143Smm (void) strlcpy(altdir, optarg, sizeof (altdir)); 704236143Smm break; 705185029Spjd case 'h': 706168498Spjd usage(B_TRUE); 707168498Spjd break; 708185029Spjd case '?': 709185029Spjd default: 710168498Spjd usage(B_FALSE); 711168404Spjd break; 712168404Spjd } 713168404Spjd } 714168404Spjd 715236143Smm zo->zo_raidz_parity = MIN(zo->zo_raidz_parity, zo->zo_raidz - 1); 716168404Spjd 717236143Smm zo->zo_vdevtime = 718236143Smm (zo->zo_vdevs > 0 ? zo->zo_time * NANOSEC / zo->zo_vdevs : 719219089Spjd UINT64_MAX >> 2); 720236143Smm 721236143Smm if (strlen(altdir) > 0) { 722242845Sdelphij char *cmd; 723242845Sdelphij char *realaltdir; 724236143Smm char *bin; 725236143Smm char *ztest; 726236143Smm char *isa; 727236143Smm int isalen; 728236143Smm 729242845Sdelphij cmd = umem_alloc(MAXPATHLEN, UMEM_NOFAIL); 730242845Sdelphij realaltdir = umem_alloc(MAXPATHLEN, UMEM_NOFAIL); 731242845Sdelphij 732242845Sdelphij VERIFY(NULL != realpath(getexecname(), cmd)); 733236143Smm if (0 != access(altdir, F_OK)) { 734236143Smm ztest_dump_core = B_FALSE; 735236143Smm fatal(B_TRUE, "invalid alternate ztest path: %s", 736236143Smm altdir); 737236143Smm } 738236143Smm VERIFY(NULL != realpath(altdir, realaltdir)); 739236143Smm 740236143Smm /* 741236143Smm * 'cmd' should be of the form "<anything>/usr/bin/<isa>/ztest". 742236143Smm * We want to extract <isa> to determine if we should use 743236143Smm * 32 or 64 bit binaries. 744236143Smm */ 745236143Smm bin = strstr(cmd, "/usr/bin/"); 746236143Smm ztest = strstr(bin, "/ztest"); 747236143Smm isa = bin + 9; 748236143Smm isalen = ztest - isa; 749236143Smm (void) snprintf(zo->zo_alt_ztest, sizeof (zo->zo_alt_ztest), 750236143Smm "%s/usr/bin/%.*s/ztest", realaltdir, isalen, isa); 751236143Smm (void) snprintf(zo->zo_alt_libpath, sizeof (zo->zo_alt_libpath), 752236143Smm "%s/usr/lib/%.*s", realaltdir, isalen, isa); 753236143Smm 754236143Smm if (0 != access(zo->zo_alt_ztest, X_OK)) { 755236143Smm ztest_dump_core = B_FALSE; 756236143Smm fatal(B_TRUE, "invalid alternate ztest: %s", 757236143Smm zo->zo_alt_ztest); 758236143Smm } else if (0 != access(zo->zo_alt_libpath, X_OK)) { 759236143Smm ztest_dump_core = B_FALSE; 760236143Smm fatal(B_TRUE, "invalid alternate lib directory %s", 761236143Smm zo->zo_alt_libpath); 762236143Smm } 763242845Sdelphij 764242845Sdelphij umem_free(cmd, MAXPATHLEN); 765242845Sdelphij umem_free(realaltdir, MAXPATHLEN); 766236143Smm } 767168404Spjd} 768168404Spjd 769219089Spjdstatic void 770219089Spjdztest_kill(ztest_shared_t *zs) 771219089Spjd{ 772236143Smm zs->zs_alloc = metaslab_class_get_alloc(spa_normal_class(ztest_spa)); 773236143Smm zs->zs_space = metaslab_class_get_space(spa_normal_class(ztest_spa)); 774254112Sdelphij 775254112Sdelphij /* 776254112Sdelphij * Before we kill off ztest, make sure that the config is updated. 777254112Sdelphij * See comment above spa_config_sync(). 778254112Sdelphij */ 779254112Sdelphij mutex_enter(&spa_namespace_lock); 780254112Sdelphij spa_config_sync(ztest_spa, B_FALSE, B_FALSE); 781254112Sdelphij mutex_exit(&spa_namespace_lock); 782254112Sdelphij 783254112Sdelphij zfs_dbgmsg_print(FTAG); 784219089Spjd (void) kill(getpid(), SIGKILL); 785219089Spjd} 786219089Spjd 787168404Spjdstatic uint64_t 788219089Spjdztest_random(uint64_t range) 789219089Spjd{ 790219089Spjd uint64_t r; 791219089Spjd 792242845Sdelphij ASSERT3S(ztest_fd_rand, >=, 0); 793242845Sdelphij 794219089Spjd if (range == 0) 795219089Spjd return (0); 796219089Spjd 797242845Sdelphij if (read(ztest_fd_rand, &r, sizeof (r)) != sizeof (r)) 798219089Spjd fatal(1, "short read from /dev/urandom"); 799219089Spjd 800219089Spjd return (r % range); 801219089Spjd} 802219089Spjd 803219089Spjd/* ARGSUSED */ 804219089Spjdstatic void 805219089Spjdztest_record_enospc(const char *s) 806219089Spjd{ 807219089Spjd ztest_shared->zs_enospc_count++; 808219089Spjd} 809219089Spjd 810219089Spjdstatic uint64_t 811168404Spjdztest_get_ashift(void) 812168404Spjd{ 813236143Smm if (ztest_opts.zo_ashift == 0) 814268855Sdelphij return (SPA_MINBLOCKSHIFT + ztest_random(5)); 815236143Smm return (ztest_opts.zo_ashift); 816168404Spjd} 817168404Spjd 818168404Spjdstatic nvlist_t * 819243505Smmmake_vdev_file(char *path, char *aux, char *pool, size_t size, uint64_t ashift) 820168404Spjd{ 821185029Spjd char pathbuf[MAXPATHLEN]; 822168404Spjd uint64_t vdev; 823168404Spjd nvlist_t *file; 824168404Spjd 825185029Spjd if (ashift == 0) 826185029Spjd ashift = ztest_get_ashift(); 827168404Spjd 828185029Spjd if (path == NULL) { 829185029Spjd path = pathbuf; 830185029Spjd 831185029Spjd if (aux != NULL) { 832185029Spjd vdev = ztest_shared->zs_vdev_aux; 833236143Smm (void) snprintf(path, sizeof (pathbuf), 834236143Smm ztest_aux_template, ztest_opts.zo_dir, 835243505Smm pool == NULL ? ztest_opts.zo_pool : pool, 836243505Smm aux, vdev); 837185029Spjd } else { 838219089Spjd vdev = ztest_shared->zs_vdev_next_leaf++; 839236143Smm (void) snprintf(path, sizeof (pathbuf), 840236143Smm ztest_dev_template, ztest_opts.zo_dir, 841243505Smm pool == NULL ? ztest_opts.zo_pool : pool, vdev); 842185029Spjd } 843185029Spjd } 844185029Spjd 845185029Spjd if (size != 0) { 846185029Spjd int fd = open(path, O_RDWR | O_CREAT | O_TRUNC, 0666); 847168404Spjd if (fd == -1) 848185029Spjd fatal(1, "can't open %s", path); 849168404Spjd if (ftruncate(fd, size) != 0) 850185029Spjd fatal(1, "can't ftruncate %s", path); 851168404Spjd (void) close(fd); 852168404Spjd } 853168404Spjd 854168404Spjd VERIFY(nvlist_alloc(&file, NV_UNIQUE_NAME, 0) == 0); 855168404Spjd VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_TYPE, VDEV_TYPE_FILE) == 0); 856185029Spjd VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_PATH, path) == 0); 857168404Spjd VERIFY(nvlist_add_uint64(file, ZPOOL_CONFIG_ASHIFT, ashift) == 0); 858168404Spjd 859168404Spjd return (file); 860168404Spjd} 861168404Spjd 862168404Spjdstatic nvlist_t * 863243505Smmmake_vdev_raidz(char *path, char *aux, char *pool, size_t size, 864243505Smm uint64_t ashift, int r) 865168404Spjd{ 866168404Spjd nvlist_t *raidz, **child; 867168404Spjd int c; 868168404Spjd 869168404Spjd if (r < 2) 870243505Smm return (make_vdev_file(path, aux, pool, size, ashift)); 871168404Spjd child = umem_alloc(r * sizeof (nvlist_t *), UMEM_NOFAIL); 872168404Spjd 873168404Spjd for (c = 0; c < r; c++) 874243505Smm child[c] = make_vdev_file(path, aux, pool, size, ashift); 875168404Spjd 876168404Spjd VERIFY(nvlist_alloc(&raidz, NV_UNIQUE_NAME, 0) == 0); 877168404Spjd VERIFY(nvlist_add_string(raidz, ZPOOL_CONFIG_TYPE, 878168404Spjd VDEV_TYPE_RAIDZ) == 0); 879168404Spjd VERIFY(nvlist_add_uint64(raidz, ZPOOL_CONFIG_NPARITY, 880236143Smm ztest_opts.zo_raidz_parity) == 0); 881168404Spjd VERIFY(nvlist_add_nvlist_array(raidz, ZPOOL_CONFIG_CHILDREN, 882168404Spjd child, r) == 0); 883168404Spjd 884168404Spjd for (c = 0; c < r; c++) 885168404Spjd nvlist_free(child[c]); 886168404Spjd 887168404Spjd umem_free(child, r * sizeof (nvlist_t *)); 888168404Spjd 889168404Spjd return (raidz); 890168404Spjd} 891168404Spjd 892168404Spjdstatic nvlist_t * 893243505Smmmake_vdev_mirror(char *path, char *aux, char *pool, size_t size, 894243505Smm uint64_t ashift, int r, int m) 895168404Spjd{ 896168404Spjd nvlist_t *mirror, **child; 897168404Spjd int c; 898168404Spjd 899168404Spjd if (m < 1) 900243505Smm return (make_vdev_raidz(path, aux, pool, size, ashift, r)); 901168404Spjd 902168404Spjd child = umem_alloc(m * sizeof (nvlist_t *), UMEM_NOFAIL); 903168404Spjd 904168404Spjd for (c = 0; c < m; c++) 905243505Smm child[c] = make_vdev_raidz(path, aux, pool, size, ashift, r); 906168404Spjd 907168404Spjd VERIFY(nvlist_alloc(&mirror, NV_UNIQUE_NAME, 0) == 0); 908168404Spjd VERIFY(nvlist_add_string(mirror, ZPOOL_CONFIG_TYPE, 909168404Spjd VDEV_TYPE_MIRROR) == 0); 910168404Spjd VERIFY(nvlist_add_nvlist_array(mirror, ZPOOL_CONFIG_CHILDREN, 911168404Spjd child, m) == 0); 912168404Spjd 913168404Spjd for (c = 0; c < m; c++) 914168404Spjd nvlist_free(child[c]); 915168404Spjd 916168404Spjd umem_free(child, m * sizeof (nvlist_t *)); 917168404Spjd 918168404Spjd return (mirror); 919168404Spjd} 920168404Spjd 921168404Spjdstatic nvlist_t * 922243505Smmmake_vdev_root(char *path, char *aux, char *pool, size_t size, uint64_t ashift, 923243505Smm int log, int r, int m, int t) 924168404Spjd{ 925168404Spjd nvlist_t *root, **child; 926168404Spjd int c; 927168404Spjd 928168404Spjd ASSERT(t > 0); 929168404Spjd 930168404Spjd child = umem_alloc(t * sizeof (nvlist_t *), UMEM_NOFAIL); 931168404Spjd 932185029Spjd for (c = 0; c < t; c++) { 933243505Smm child[c] = make_vdev_mirror(path, aux, pool, size, ashift, 934243505Smm r, m); 935185029Spjd VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_IS_LOG, 936185029Spjd log) == 0); 937185029Spjd } 938168404Spjd 939168404Spjd VERIFY(nvlist_alloc(&root, NV_UNIQUE_NAME, 0) == 0); 940168404Spjd VERIFY(nvlist_add_string(root, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) == 0); 941185029Spjd VERIFY(nvlist_add_nvlist_array(root, aux ? aux : ZPOOL_CONFIG_CHILDREN, 942168404Spjd child, t) == 0); 943168404Spjd 944168404Spjd for (c = 0; c < t; c++) 945168404Spjd nvlist_free(child[c]); 946168404Spjd 947168404Spjd umem_free(child, t * sizeof (nvlist_t *)); 948168404Spjd 949168404Spjd return (root); 950168404Spjd} 951168404Spjd 952243505Smm/* 953243505Smm * Find a random spa version. Returns back a random spa version in the 954243505Smm * range [initial_version, SPA_VERSION_FEATURES]. 955243505Smm */ 956243505Smmstatic uint64_t 957243505Smmztest_random_spa_version(uint64_t initial_version) 958243505Smm{ 959243505Smm uint64_t version = initial_version; 960243505Smm 961243505Smm if (version <= SPA_VERSION_BEFORE_FEATURES) { 962243505Smm version = version + 963243505Smm ztest_random(SPA_VERSION_BEFORE_FEATURES - version + 1); 964243505Smm } 965243505Smm 966243505Smm if (version > SPA_VERSION_BEFORE_FEATURES) 967243505Smm version = SPA_VERSION_FEATURES; 968243505Smm 969243505Smm ASSERT(SPA_VERSION_IS_SUPPORTED(version)); 970243505Smm return (version); 971243505Smm} 972243505Smm 973219089Spjdstatic int 974219089Spjdztest_random_blocksize(void) 975219089Spjd{ 976274337Sdelphij uint64_t block_shift; 977274337Sdelphij /* 978274337Sdelphij * Choose a block size >= the ashift. 979274337Sdelphij * If the SPA supports new MAXBLOCKSIZE, test up to 1MB blocks. 980274337Sdelphij */ 981274337Sdelphij int maxbs = SPA_OLD_MAXBLOCKSHIFT; 982274337Sdelphij if (spa_maxblocksize(ztest_spa) == SPA_MAXBLOCKSIZE) 983274337Sdelphij maxbs = 20; 984284304Savg block_shift = ztest_random(maxbs - ztest_spa->spa_max_ashift + 1); 985268855Sdelphij return (1 << (SPA_MINBLOCKSHIFT + block_shift)); 986219089Spjd} 987219089Spjd 988219089Spjdstatic int 989219089Spjdztest_random_ibshift(void) 990219089Spjd{ 991219089Spjd return (DN_MIN_INDBLKSHIFT + 992219089Spjd ztest_random(DN_MAX_INDBLKSHIFT - DN_MIN_INDBLKSHIFT + 1)); 993219089Spjd} 994219089Spjd 995219089Spjdstatic uint64_t 996219089Spjdztest_random_vdev_top(spa_t *spa, boolean_t log_ok) 997219089Spjd{ 998219089Spjd uint64_t top; 999219089Spjd vdev_t *rvd = spa->spa_root_vdev; 1000219089Spjd vdev_t *tvd; 1001219089Spjd 1002219089Spjd ASSERT(spa_config_held(spa, SCL_ALL, RW_READER) != 0); 1003219089Spjd 1004219089Spjd do { 1005219089Spjd top = ztest_random(rvd->vdev_children); 1006219089Spjd tvd = rvd->vdev_child[top]; 1007219089Spjd } while (tvd->vdev_ishole || (tvd->vdev_islog && !log_ok) || 1008219089Spjd tvd->vdev_mg == NULL || tvd->vdev_mg->mg_class == NULL); 1009219089Spjd 1010219089Spjd return (top); 1011219089Spjd} 1012219089Spjd 1013219089Spjdstatic uint64_t 1014219089Spjdztest_random_dsl_prop(zfs_prop_t prop) 1015219089Spjd{ 1016219089Spjd uint64_t value; 1017219089Spjd 1018219089Spjd do { 1019219089Spjd value = zfs_prop_random_value(prop, ztest_random(-1ULL)); 1020219089Spjd } while (prop == ZFS_PROP_CHECKSUM && value == ZIO_CHECKSUM_OFF); 1021219089Spjd 1022219089Spjd return (value); 1023219089Spjd} 1024219089Spjd 1025219089Spjdstatic int 1026219089Spjdztest_dsl_prop_set_uint64(char *osname, zfs_prop_t prop, uint64_t value, 1027219089Spjd boolean_t inherit) 1028219089Spjd{ 1029219089Spjd const char *propname = zfs_prop_to_name(prop); 1030219089Spjd const char *valname; 1031219089Spjd char setpoint[MAXPATHLEN]; 1032219089Spjd uint64_t curval; 1033219089Spjd int error; 1034219089Spjd 1035248571Smm error = dsl_prop_set_int(osname, propname, 1036248571Smm (inherit ? ZPROP_SRC_NONE : ZPROP_SRC_LOCAL), value); 1037219089Spjd 1038219089Spjd if (error == ENOSPC) { 1039219089Spjd ztest_record_enospc(FTAG); 1040219089Spjd return (error); 1041219089Spjd } 1042240415Smm ASSERT0(error); 1043219089Spjd 1044248571Smm VERIFY0(dsl_prop_get_integer(osname, propname, &curval, setpoint)); 1045219089Spjd 1046236143Smm if (ztest_opts.zo_verbose >= 6) { 1047219089Spjd VERIFY(zfs_prop_index_to_string(prop, curval, &valname) == 0); 1048219089Spjd (void) printf("%s %s = %s at '%s'\n", 1049219089Spjd osname, propname, valname, setpoint); 1050219089Spjd } 1051219089Spjd 1052219089Spjd return (error); 1053219089Spjd} 1054219089Spjd 1055219089Spjdstatic int 1056236143Smmztest_spa_prop_set_uint64(zpool_prop_t prop, uint64_t value) 1057219089Spjd{ 1058236143Smm spa_t *spa = ztest_spa; 1059219089Spjd nvlist_t *props = NULL; 1060219089Spjd int error; 1061219089Spjd 1062219089Spjd VERIFY(nvlist_alloc(&props, NV_UNIQUE_NAME, 0) == 0); 1063219089Spjd VERIFY(nvlist_add_uint64(props, zpool_prop_to_name(prop), value) == 0); 1064219089Spjd 1065219089Spjd error = spa_prop_set(spa, props); 1066219089Spjd 1067219089Spjd nvlist_free(props); 1068219089Spjd 1069219089Spjd if (error == ENOSPC) { 1070219089Spjd ztest_record_enospc(FTAG); 1071219089Spjd return (error); 1072219089Spjd } 1073240415Smm ASSERT0(error); 1074219089Spjd 1075219089Spjd return (error); 1076219089Spjd} 1077219089Spjd 1078168404Spjdstatic void 1079219089Spjdztest_rll_init(rll_t *rll) 1080168404Spjd{ 1081219089Spjd rll->rll_writer = NULL; 1082219089Spjd rll->rll_readers = 0; 1083219089Spjd VERIFY(_mutex_init(&rll->rll_lock, USYNC_THREAD, NULL) == 0); 1084219089Spjd VERIFY(cond_init(&rll->rll_cv, USYNC_THREAD, NULL) == 0); 1085219089Spjd} 1086219089Spjd 1087219089Spjdstatic void 1088219089Spjdztest_rll_destroy(rll_t *rll) 1089219089Spjd{ 1090219089Spjd ASSERT(rll->rll_writer == NULL); 1091219089Spjd ASSERT(rll->rll_readers == 0); 1092219089Spjd VERIFY(_mutex_destroy(&rll->rll_lock) == 0); 1093219089Spjd VERIFY(cond_destroy(&rll->rll_cv) == 0); 1094219089Spjd} 1095219089Spjd 1096219089Spjdstatic void 1097219089Spjdztest_rll_lock(rll_t *rll, rl_type_t type) 1098219089Spjd{ 1099219089Spjd VERIFY(mutex_lock(&rll->rll_lock) == 0); 1100219089Spjd 1101219089Spjd if (type == RL_READER) { 1102219089Spjd while (rll->rll_writer != NULL) 1103219089Spjd (void) cond_wait(&rll->rll_cv, &rll->rll_lock); 1104219089Spjd rll->rll_readers++; 1105219089Spjd } else { 1106219089Spjd while (rll->rll_writer != NULL || rll->rll_readers) 1107219089Spjd (void) cond_wait(&rll->rll_cv, &rll->rll_lock); 1108219089Spjd rll->rll_writer = curthread; 1109219089Spjd } 1110219089Spjd 1111219089Spjd VERIFY(mutex_unlock(&rll->rll_lock) == 0); 1112219089Spjd} 1113219089Spjd 1114219089Spjdstatic void 1115219089Spjdztest_rll_unlock(rll_t *rll) 1116219089Spjd{ 1117219089Spjd VERIFY(mutex_lock(&rll->rll_lock) == 0); 1118219089Spjd 1119219089Spjd if (rll->rll_writer) { 1120219089Spjd ASSERT(rll->rll_readers == 0); 1121219089Spjd rll->rll_writer = NULL; 1122219089Spjd } else { 1123219089Spjd ASSERT(rll->rll_readers != 0); 1124219089Spjd ASSERT(rll->rll_writer == NULL); 1125219089Spjd rll->rll_readers--; 1126219089Spjd } 1127219089Spjd 1128219089Spjd if (rll->rll_writer == NULL && rll->rll_readers == 0) 1129219089Spjd VERIFY(cond_broadcast(&rll->rll_cv) == 0); 1130219089Spjd 1131219089Spjd VERIFY(mutex_unlock(&rll->rll_lock) == 0); 1132219089Spjd} 1133219089Spjd 1134219089Spjdstatic void 1135219089Spjdztest_object_lock(ztest_ds_t *zd, uint64_t object, rl_type_t type) 1136219089Spjd{ 1137219089Spjd rll_t *rll = &zd->zd_object_lock[object & (ZTEST_OBJECT_LOCKS - 1)]; 1138219089Spjd 1139219089Spjd ztest_rll_lock(rll, type); 1140219089Spjd} 1141219089Spjd 1142219089Spjdstatic void 1143219089Spjdztest_object_unlock(ztest_ds_t *zd, uint64_t object) 1144219089Spjd{ 1145219089Spjd rll_t *rll = &zd->zd_object_lock[object & (ZTEST_OBJECT_LOCKS - 1)]; 1146219089Spjd 1147219089Spjd ztest_rll_unlock(rll); 1148219089Spjd} 1149219089Spjd 1150219089Spjdstatic rl_t * 1151219089Spjdztest_range_lock(ztest_ds_t *zd, uint64_t object, uint64_t offset, 1152219089Spjd uint64_t size, rl_type_t type) 1153219089Spjd{ 1154219089Spjd uint64_t hash = object ^ (offset % (ZTEST_RANGE_LOCKS + 1)); 1155219089Spjd rll_t *rll = &zd->zd_range_lock[hash & (ZTEST_RANGE_LOCKS - 1)]; 1156219089Spjd rl_t *rl; 1157219089Spjd 1158219089Spjd rl = umem_alloc(sizeof (*rl), UMEM_NOFAIL); 1159219089Spjd rl->rl_object = object; 1160219089Spjd rl->rl_offset = offset; 1161219089Spjd rl->rl_size = size; 1162219089Spjd rl->rl_lock = rll; 1163219089Spjd 1164219089Spjd ztest_rll_lock(rll, type); 1165219089Spjd 1166219089Spjd return (rl); 1167219089Spjd} 1168219089Spjd 1169219089Spjdstatic void 1170219089Spjdztest_range_unlock(rl_t *rl) 1171219089Spjd{ 1172219089Spjd rll_t *rll = rl->rl_lock; 1173219089Spjd 1174219089Spjd ztest_rll_unlock(rll); 1175219089Spjd 1176219089Spjd umem_free(rl, sizeof (*rl)); 1177219089Spjd} 1178219089Spjd 1179219089Spjdstatic void 1180236143Smmztest_zd_init(ztest_ds_t *zd, ztest_shared_ds_t *szd, objset_t *os) 1181219089Spjd{ 1182219089Spjd zd->zd_os = os; 1183219089Spjd zd->zd_zilog = dmu_objset_zil(os); 1184236143Smm zd->zd_shared = szd; 1185219089Spjd dmu_objset_name(os, zd->zd_name); 1186219089Spjd 1187236143Smm if (zd->zd_shared != NULL) 1188236143Smm zd->zd_shared->zd_seq = 0; 1189236143Smm 1190224526Smm VERIFY(rwlock_init(&zd->zd_zilog_lock, USYNC_THREAD, NULL) == 0); 1191219089Spjd VERIFY(_mutex_init(&zd->zd_dirobj_lock, USYNC_THREAD, NULL) == 0); 1192219089Spjd 1193219089Spjd for (int l = 0; l < ZTEST_OBJECT_LOCKS; l++) 1194219089Spjd ztest_rll_init(&zd->zd_object_lock[l]); 1195219089Spjd 1196219089Spjd for (int l = 0; l < ZTEST_RANGE_LOCKS; l++) 1197219089Spjd ztest_rll_init(&zd->zd_range_lock[l]); 1198219089Spjd} 1199219089Spjd 1200219089Spjdstatic void 1201219089Spjdztest_zd_fini(ztest_ds_t *zd) 1202219089Spjd{ 1203219089Spjd VERIFY(_mutex_destroy(&zd->zd_dirobj_lock) == 0); 1204219089Spjd 1205219089Spjd for (int l = 0; l < ZTEST_OBJECT_LOCKS; l++) 1206219089Spjd ztest_rll_destroy(&zd->zd_object_lock[l]); 1207219089Spjd 1208219089Spjd for (int l = 0; l < ZTEST_RANGE_LOCKS; l++) 1209219089Spjd ztest_rll_destroy(&zd->zd_range_lock[l]); 1210219089Spjd} 1211219089Spjd 1212219089Spjd#define TXG_MIGHTWAIT (ztest_random(10) == 0 ? TXG_NOWAIT : TXG_WAIT) 1213219089Spjd 1214219089Spjdstatic uint64_t 1215219089Spjdztest_tx_assign(dmu_tx_t *tx, uint64_t txg_how, const char *tag) 1216219089Spjd{ 1217219089Spjd uint64_t txg; 1218168404Spjd int error; 1219168404Spjd 1220219089Spjd /* 1221219089Spjd * Attempt to assign tx to some transaction group. 1222219089Spjd */ 1223219089Spjd error = dmu_tx_assign(tx, txg_how); 1224168404Spjd if (error) { 1225219089Spjd if (error == ERESTART) { 1226219089Spjd ASSERT(txg_how == TXG_NOWAIT); 1227219089Spjd dmu_tx_wait(tx); 1228219089Spjd } else { 1229219089Spjd ASSERT3U(error, ==, ENOSPC); 1230219089Spjd ztest_record_enospc(tag); 1231219089Spjd } 1232219089Spjd dmu_tx_abort(tx); 1233219089Spjd return (0); 1234168404Spjd } 1235219089Spjd txg = dmu_tx_get_txg(tx); 1236219089Spjd ASSERT(txg != 0); 1237219089Spjd return (txg); 1238168404Spjd} 1239168404Spjd 1240219089Spjdstatic void 1241219089Spjdztest_pattern_set(void *buf, uint64_t size, uint64_t value) 1242168404Spjd{ 1243219089Spjd uint64_t *ip = buf; 1244219089Spjd uint64_t *ip_end = (uint64_t *)((uintptr_t)buf + (uintptr_t)size); 1245168404Spjd 1246219089Spjd while (ip < ip_end) 1247219089Spjd *ip++ = value; 1248219089Spjd} 1249168404Spjd 1250219089Spjdstatic boolean_t 1251219089Spjdztest_pattern_match(void *buf, uint64_t size, uint64_t value) 1252219089Spjd{ 1253219089Spjd uint64_t *ip = buf; 1254219089Spjd uint64_t *ip_end = (uint64_t *)((uintptr_t)buf + (uintptr_t)size); 1255219089Spjd uint64_t diff = 0; 1256168404Spjd 1257219089Spjd while (ip < ip_end) 1258219089Spjd diff |= (value - *ip++); 1259219089Spjd 1260219089Spjd return (diff == 0); 1261168404Spjd} 1262168404Spjd 1263219089Spjdstatic void 1264219089Spjdztest_bt_generate(ztest_block_tag_t *bt, objset_t *os, uint64_t object, 1265219089Spjd uint64_t offset, uint64_t gen, uint64_t txg, uint64_t crtxg) 1266168404Spjd{ 1267219089Spjd bt->bt_magic = BT_MAGIC; 1268219089Spjd bt->bt_objset = dmu_objset_id(os); 1269219089Spjd bt->bt_object = object; 1270219089Spjd bt->bt_offset = offset; 1271219089Spjd bt->bt_gen = gen; 1272219089Spjd bt->bt_txg = txg; 1273219089Spjd bt->bt_crtxg = crtxg; 1274168404Spjd} 1275168404Spjd 1276219089Spjdstatic void 1277219089Spjdztest_bt_verify(ztest_block_tag_t *bt, objset_t *os, uint64_t object, 1278219089Spjd uint64_t offset, uint64_t gen, uint64_t txg, uint64_t crtxg) 1279219089Spjd{ 1280268075Sdelphij ASSERT3U(bt->bt_magic, ==, BT_MAGIC); 1281268075Sdelphij ASSERT3U(bt->bt_objset, ==, dmu_objset_id(os)); 1282268075Sdelphij ASSERT3U(bt->bt_object, ==, object); 1283268075Sdelphij ASSERT3U(bt->bt_offset, ==, offset); 1284268075Sdelphij ASSERT3U(bt->bt_gen, <=, gen); 1285268075Sdelphij ASSERT3U(bt->bt_txg, <=, txg); 1286268075Sdelphij ASSERT3U(bt->bt_crtxg, ==, crtxg); 1287219089Spjd} 1288219089Spjd 1289219089Spjdstatic ztest_block_tag_t * 1290219089Spjdztest_bt_bonus(dmu_buf_t *db) 1291219089Spjd{ 1292219089Spjd dmu_object_info_t doi; 1293219089Spjd ztest_block_tag_t *bt; 1294219089Spjd 1295219089Spjd dmu_object_info_from_db(db, &doi); 1296219089Spjd ASSERT3U(doi.doi_bonus_size, <=, db->db_size); 1297219089Spjd ASSERT3U(doi.doi_bonus_size, >=, sizeof (*bt)); 1298219089Spjd bt = (void *)((char *)db->db_data + doi.doi_bonus_size - sizeof (*bt)); 1299219089Spjd 1300219089Spjd return (bt); 1301219089Spjd} 1302219089Spjd 1303219089Spjd/* 1304219089Spjd * ZIL logging ops 1305219089Spjd */ 1306219089Spjd 1307219089Spjd#define lrz_type lr_mode 1308219089Spjd#define lrz_blocksize lr_uid 1309219089Spjd#define lrz_ibshift lr_gid 1310219089Spjd#define lrz_bonustype lr_rdev 1311219089Spjd#define lrz_bonuslen lr_crtime[1] 1312219089Spjd 1313219089Spjdstatic void 1314219089Spjdztest_log_create(ztest_ds_t *zd, dmu_tx_t *tx, lr_create_t *lr) 1315219089Spjd{ 1316219089Spjd char *name = (void *)(lr + 1); /* name follows lr */ 1317219089Spjd size_t namesize = strlen(name) + 1; 1318219089Spjd itx_t *itx; 1319219089Spjd 1320219089Spjd if (zil_replaying(zd->zd_zilog, tx)) 1321219089Spjd return; 1322219089Spjd 1323219089Spjd itx = zil_itx_create(TX_CREATE, sizeof (*lr) + namesize); 1324219089Spjd bcopy(&lr->lr_common + 1, &itx->itx_lr + 1, 1325219089Spjd sizeof (*lr) + namesize - sizeof (lr_t)); 1326219089Spjd 1327219089Spjd zil_itx_assign(zd->zd_zilog, itx, tx); 1328219089Spjd} 1329219089Spjd 1330219089Spjdstatic void 1331219089Spjdztest_log_remove(ztest_ds_t *zd, dmu_tx_t *tx, lr_remove_t *lr, uint64_t object) 1332219089Spjd{ 1333219089Spjd char *name = (void *)(lr + 1); /* name follows lr */ 1334219089Spjd size_t namesize = strlen(name) + 1; 1335219089Spjd itx_t *itx; 1336219089Spjd 1337219089Spjd if (zil_replaying(zd->zd_zilog, tx)) 1338219089Spjd return; 1339219089Spjd 1340219089Spjd itx = zil_itx_create(TX_REMOVE, sizeof (*lr) + namesize); 1341219089Spjd bcopy(&lr->lr_common + 1, &itx->itx_lr + 1, 1342219089Spjd sizeof (*lr) + namesize - sizeof (lr_t)); 1343219089Spjd 1344219089Spjd itx->itx_oid = object; 1345219089Spjd zil_itx_assign(zd->zd_zilog, itx, tx); 1346219089Spjd} 1347219089Spjd 1348219089Spjdstatic void 1349219089Spjdztest_log_write(ztest_ds_t *zd, dmu_tx_t *tx, lr_write_t *lr) 1350219089Spjd{ 1351219089Spjd itx_t *itx; 1352219089Spjd itx_wr_state_t write_state = ztest_random(WR_NUM_STATES); 1353219089Spjd 1354219089Spjd if (zil_replaying(zd->zd_zilog, tx)) 1355219089Spjd return; 1356219089Spjd 1357219089Spjd if (lr->lr_length > ZIL_MAX_LOG_DATA) 1358219089Spjd write_state = WR_INDIRECT; 1359219089Spjd 1360219089Spjd itx = zil_itx_create(TX_WRITE, 1361219089Spjd sizeof (*lr) + (write_state == WR_COPIED ? lr->lr_length : 0)); 1362219089Spjd 1363219089Spjd if (write_state == WR_COPIED && 1364219089Spjd dmu_read(zd->zd_os, lr->lr_foid, lr->lr_offset, lr->lr_length, 1365219089Spjd ((lr_write_t *)&itx->itx_lr) + 1, DMU_READ_NO_PREFETCH) != 0) { 1366219089Spjd zil_itx_destroy(itx); 1367219089Spjd itx = zil_itx_create(TX_WRITE, sizeof (*lr)); 1368219089Spjd write_state = WR_NEED_COPY; 1369219089Spjd } 1370219089Spjd itx->itx_private = zd; 1371219089Spjd itx->itx_wr_state = write_state; 1372219089Spjd itx->itx_sync = (ztest_random(8) == 0); 1373219089Spjd itx->itx_sod += (write_state == WR_NEED_COPY ? lr->lr_length : 0); 1374219089Spjd 1375219089Spjd bcopy(&lr->lr_common + 1, &itx->itx_lr + 1, 1376219089Spjd sizeof (*lr) - sizeof (lr_t)); 1377219089Spjd 1378219089Spjd zil_itx_assign(zd->zd_zilog, itx, tx); 1379219089Spjd} 1380219089Spjd 1381219089Spjdstatic void 1382219089Spjdztest_log_truncate(ztest_ds_t *zd, dmu_tx_t *tx, lr_truncate_t *lr) 1383219089Spjd{ 1384219089Spjd itx_t *itx; 1385219089Spjd 1386219089Spjd if (zil_replaying(zd->zd_zilog, tx)) 1387219089Spjd return; 1388219089Spjd 1389219089Spjd itx = zil_itx_create(TX_TRUNCATE, sizeof (*lr)); 1390219089Spjd bcopy(&lr->lr_common + 1, &itx->itx_lr + 1, 1391219089Spjd sizeof (*lr) - sizeof (lr_t)); 1392219089Spjd 1393219089Spjd itx->itx_sync = B_FALSE; 1394219089Spjd zil_itx_assign(zd->zd_zilog, itx, tx); 1395219089Spjd} 1396219089Spjd 1397219089Spjdstatic void 1398219089Spjdztest_log_setattr(ztest_ds_t *zd, dmu_tx_t *tx, lr_setattr_t *lr) 1399219089Spjd{ 1400219089Spjd itx_t *itx; 1401219089Spjd 1402219089Spjd if (zil_replaying(zd->zd_zilog, tx)) 1403219089Spjd return; 1404219089Spjd 1405219089Spjd itx = zil_itx_create(TX_SETATTR, sizeof (*lr)); 1406219089Spjd bcopy(&lr->lr_common + 1, &itx->itx_lr + 1, 1407219089Spjd sizeof (*lr) - sizeof (lr_t)); 1408219089Spjd 1409219089Spjd itx->itx_sync = B_FALSE; 1410219089Spjd zil_itx_assign(zd->zd_zilog, itx, tx); 1411219089Spjd} 1412219089Spjd 1413219089Spjd/* 1414219089Spjd * ZIL replay ops 1415219089Spjd */ 1416168404Spjdstatic int 1417219089Spjdztest_replay_create(ztest_ds_t *zd, lr_create_t *lr, boolean_t byteswap) 1418168404Spjd{ 1419219089Spjd char *name = (void *)(lr + 1); /* name follows lr */ 1420219089Spjd objset_t *os = zd->zd_os; 1421219089Spjd ztest_block_tag_t *bbt; 1422219089Spjd dmu_buf_t *db; 1423168404Spjd dmu_tx_t *tx; 1424219089Spjd uint64_t txg; 1425219089Spjd int error = 0; 1426168404Spjd 1427168404Spjd if (byteswap) 1428168404Spjd byteswap_uint64_array(lr, sizeof (*lr)); 1429168404Spjd 1430219089Spjd ASSERT(lr->lr_doid == ZTEST_DIROBJ); 1431219089Spjd ASSERT(name[0] != '\0'); 1432219089Spjd 1433168404Spjd tx = dmu_tx_create(os); 1434219089Spjd 1435219089Spjd dmu_tx_hold_zap(tx, lr->lr_doid, B_TRUE, name); 1436219089Spjd 1437219089Spjd if (lr->lrz_type == DMU_OT_ZAP_OTHER) { 1438219089Spjd dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL); 1439219089Spjd } else { 1440219089Spjd dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); 1441219089Spjd } 1442219089Spjd 1443219089Spjd txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); 1444219089Spjd if (txg == 0) 1445219089Spjd return (ENOSPC); 1446219089Spjd 1447219089Spjd ASSERT(dmu_objset_zil(os)->zl_replay == !!lr->lr_foid); 1448219089Spjd 1449219089Spjd if (lr->lrz_type == DMU_OT_ZAP_OTHER) { 1450219089Spjd if (lr->lr_foid == 0) { 1451219089Spjd lr->lr_foid = zap_create(os, 1452219089Spjd lr->lrz_type, lr->lrz_bonustype, 1453219089Spjd lr->lrz_bonuslen, tx); 1454219089Spjd } else { 1455219089Spjd error = zap_create_claim(os, lr->lr_foid, 1456219089Spjd lr->lrz_type, lr->lrz_bonustype, 1457219089Spjd lr->lrz_bonuslen, tx); 1458219089Spjd } 1459219089Spjd } else { 1460219089Spjd if (lr->lr_foid == 0) { 1461219089Spjd lr->lr_foid = dmu_object_alloc(os, 1462219089Spjd lr->lrz_type, 0, lr->lrz_bonustype, 1463219089Spjd lr->lrz_bonuslen, tx); 1464219089Spjd } else { 1465219089Spjd error = dmu_object_claim(os, lr->lr_foid, 1466219089Spjd lr->lrz_type, 0, lr->lrz_bonustype, 1467219089Spjd lr->lrz_bonuslen, tx); 1468219089Spjd } 1469219089Spjd } 1470219089Spjd 1471168404Spjd if (error) { 1472219089Spjd ASSERT3U(error, ==, EEXIST); 1473219089Spjd ASSERT(zd->zd_zilog->zl_replay); 1474219089Spjd dmu_tx_commit(tx); 1475168404Spjd return (error); 1476168404Spjd } 1477168404Spjd 1478219089Spjd ASSERT(lr->lr_foid != 0); 1479219089Spjd 1480219089Spjd if (lr->lrz_type != DMU_OT_ZAP_OTHER) 1481219089Spjd VERIFY3U(0, ==, dmu_object_set_blocksize(os, lr->lr_foid, 1482219089Spjd lr->lrz_blocksize, lr->lrz_ibshift, tx)); 1483219089Spjd 1484219089Spjd VERIFY3U(0, ==, dmu_bonus_hold(os, lr->lr_foid, FTAG, &db)); 1485219089Spjd bbt = ztest_bt_bonus(db); 1486219089Spjd dmu_buf_will_dirty(db, tx); 1487219089Spjd ztest_bt_generate(bbt, os, lr->lr_foid, -1ULL, lr->lr_gen, txg, txg); 1488219089Spjd dmu_buf_rele(db, FTAG); 1489219089Spjd 1490219089Spjd VERIFY3U(0, ==, zap_add(os, lr->lr_doid, name, sizeof (uint64_t), 1, 1491219089Spjd &lr->lr_foid, tx)); 1492219089Spjd 1493219089Spjd (void) ztest_log_create(zd, tx, lr); 1494219089Spjd 1495168404Spjd dmu_tx_commit(tx); 1496168404Spjd 1497219089Spjd return (0); 1498219089Spjd} 1499219089Spjd 1500219089Spjdstatic int 1501219089Spjdztest_replay_remove(ztest_ds_t *zd, lr_remove_t *lr, boolean_t byteswap) 1502219089Spjd{ 1503219089Spjd char *name = (void *)(lr + 1); /* name follows lr */ 1504219089Spjd objset_t *os = zd->zd_os; 1505219089Spjd dmu_object_info_t doi; 1506219089Spjd dmu_tx_t *tx; 1507219089Spjd uint64_t object, txg; 1508219089Spjd 1509219089Spjd if (byteswap) 1510219089Spjd byteswap_uint64_array(lr, sizeof (*lr)); 1511219089Spjd 1512219089Spjd ASSERT(lr->lr_doid == ZTEST_DIROBJ); 1513219089Spjd ASSERT(name[0] != '\0'); 1514219089Spjd 1515219089Spjd VERIFY3U(0, ==, 1516219089Spjd zap_lookup(os, lr->lr_doid, name, sizeof (object), 1, &object)); 1517219089Spjd ASSERT(object != 0); 1518219089Spjd 1519219089Spjd ztest_object_lock(zd, object, RL_WRITER); 1520219089Spjd 1521219089Spjd VERIFY3U(0, ==, dmu_object_info(os, object, &doi)); 1522219089Spjd 1523219089Spjd tx = dmu_tx_create(os); 1524219089Spjd 1525219089Spjd dmu_tx_hold_zap(tx, lr->lr_doid, B_FALSE, name); 1526219089Spjd dmu_tx_hold_free(tx, object, 0, DMU_OBJECT_END); 1527219089Spjd 1528219089Spjd txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); 1529219089Spjd if (txg == 0) { 1530219089Spjd ztest_object_unlock(zd, object); 1531219089Spjd return (ENOSPC); 1532168404Spjd } 1533168404Spjd 1534219089Spjd if (doi.doi_type == DMU_OT_ZAP_OTHER) { 1535219089Spjd VERIFY3U(0, ==, zap_destroy(os, object, tx)); 1536219089Spjd } else { 1537219089Spjd VERIFY3U(0, ==, dmu_object_free(os, object, tx)); 1538219089Spjd } 1539219089Spjd 1540219089Spjd VERIFY3U(0, ==, zap_remove(os, lr->lr_doid, name, tx)); 1541219089Spjd 1542219089Spjd (void) ztest_log_remove(zd, tx, lr, object); 1543219089Spjd 1544219089Spjd dmu_tx_commit(tx); 1545219089Spjd 1546219089Spjd ztest_object_unlock(zd, object); 1547219089Spjd 1548219089Spjd return (0); 1549168404Spjd} 1550168404Spjd 1551168404Spjdstatic int 1552219089Spjdztest_replay_write(ztest_ds_t *zd, lr_write_t *lr, boolean_t byteswap) 1553168404Spjd{ 1554219089Spjd objset_t *os = zd->zd_os; 1555219089Spjd void *data = lr + 1; /* data follows lr */ 1556219089Spjd uint64_t offset, length; 1557219089Spjd ztest_block_tag_t *bt = data; 1558219089Spjd ztest_block_tag_t *bbt; 1559219089Spjd uint64_t gen, txg, lrtxg, crtxg; 1560219089Spjd dmu_object_info_t doi; 1561168404Spjd dmu_tx_t *tx; 1562219089Spjd dmu_buf_t *db; 1563219089Spjd arc_buf_t *abuf = NULL; 1564219089Spjd rl_t *rl; 1565168404Spjd 1566168404Spjd if (byteswap) 1567168404Spjd byteswap_uint64_array(lr, sizeof (*lr)); 1568168404Spjd 1569219089Spjd offset = lr->lr_offset; 1570219089Spjd length = lr->lr_length; 1571219089Spjd 1572219089Spjd /* If it's a dmu_sync() block, write the whole block */ 1573219089Spjd if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) { 1574219089Spjd uint64_t blocksize = BP_GET_LSIZE(&lr->lr_blkptr); 1575219089Spjd if (length < blocksize) { 1576219089Spjd offset -= offset % blocksize; 1577219089Spjd length = blocksize; 1578219089Spjd } 1579219089Spjd } 1580219089Spjd 1581219089Spjd if (bt->bt_magic == BSWAP_64(BT_MAGIC)) 1582219089Spjd byteswap_uint64_array(bt, sizeof (*bt)); 1583219089Spjd 1584219089Spjd if (bt->bt_magic != BT_MAGIC) 1585219089Spjd bt = NULL; 1586219089Spjd 1587219089Spjd ztest_object_lock(zd, lr->lr_foid, RL_READER); 1588219089Spjd rl = ztest_range_lock(zd, lr->lr_foid, offset, length, RL_WRITER); 1589219089Spjd 1590219089Spjd VERIFY3U(0, ==, dmu_bonus_hold(os, lr->lr_foid, FTAG, &db)); 1591219089Spjd 1592219089Spjd dmu_object_info_from_db(db, &doi); 1593219089Spjd 1594219089Spjd bbt = ztest_bt_bonus(db); 1595219089Spjd ASSERT3U(bbt->bt_magic, ==, BT_MAGIC); 1596219089Spjd gen = bbt->bt_gen; 1597219089Spjd crtxg = bbt->bt_crtxg; 1598219089Spjd lrtxg = lr->lr_common.lrc_txg; 1599219089Spjd 1600168404Spjd tx = dmu_tx_create(os); 1601219089Spjd 1602219089Spjd dmu_tx_hold_write(tx, lr->lr_foid, offset, length); 1603219089Spjd 1604219089Spjd if (ztest_random(8) == 0 && length == doi.doi_data_block_size && 1605219089Spjd P2PHASE(offset, length) == 0) 1606219089Spjd abuf = dmu_request_arcbuf(db, length); 1607219089Spjd 1608219089Spjd txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); 1609219089Spjd if (txg == 0) { 1610219089Spjd if (abuf != NULL) 1611219089Spjd dmu_return_arcbuf(abuf); 1612219089Spjd dmu_buf_rele(db, FTAG); 1613219089Spjd ztest_range_unlock(rl); 1614219089Spjd ztest_object_unlock(zd, lr->lr_foid); 1615219089Spjd return (ENOSPC); 1616168404Spjd } 1617168404Spjd 1618219089Spjd if (bt != NULL) { 1619219089Spjd /* 1620219089Spjd * Usually, verify the old data before writing new data -- 1621219089Spjd * but not always, because we also want to verify correct 1622219089Spjd * behavior when the data was not recently read into cache. 1623219089Spjd */ 1624219089Spjd ASSERT(offset % doi.doi_data_block_size == 0); 1625219089Spjd if (ztest_random(4) != 0) { 1626219089Spjd int prefetch = ztest_random(2) ? 1627219089Spjd DMU_READ_PREFETCH : DMU_READ_NO_PREFETCH; 1628219089Spjd ztest_block_tag_t rbt; 1629219089Spjd 1630219089Spjd VERIFY(dmu_read(os, lr->lr_foid, offset, 1631219089Spjd sizeof (rbt), &rbt, prefetch) == 0); 1632219089Spjd if (rbt.bt_magic == BT_MAGIC) { 1633219089Spjd ztest_bt_verify(&rbt, os, lr->lr_foid, 1634219089Spjd offset, gen, txg, crtxg); 1635219089Spjd } 1636219089Spjd } 1637219089Spjd 1638219089Spjd /* 1639219089Spjd * Writes can appear to be newer than the bonus buffer because 1640219089Spjd * the ztest_get_data() callback does a dmu_read() of the 1641219089Spjd * open-context data, which may be different than the data 1642219089Spjd * as it was when the write was generated. 1643219089Spjd */ 1644219089Spjd if (zd->zd_zilog->zl_replay) { 1645219089Spjd ztest_bt_verify(bt, os, lr->lr_foid, offset, 1646219089Spjd MAX(gen, bt->bt_gen), MAX(txg, lrtxg), 1647219089Spjd bt->bt_crtxg); 1648219089Spjd } 1649219089Spjd 1650219089Spjd /* 1651219089Spjd * Set the bt's gen/txg to the bonus buffer's gen/txg 1652219089Spjd * so that all of the usual ASSERTs will work. 1653219089Spjd */ 1654219089Spjd ztest_bt_generate(bt, os, lr->lr_foid, offset, gen, txg, crtxg); 1655219089Spjd } 1656219089Spjd 1657219089Spjd if (abuf == NULL) { 1658219089Spjd dmu_write(os, lr->lr_foid, offset, length, data, tx); 1659219089Spjd } else { 1660219089Spjd bcopy(data, abuf->b_data, length); 1661219089Spjd dmu_assign_arcbuf(db, offset, abuf, tx); 1662219089Spjd } 1663219089Spjd 1664219089Spjd (void) ztest_log_write(zd, tx, lr); 1665219089Spjd 1666219089Spjd dmu_buf_rele(db, FTAG); 1667219089Spjd 1668168404Spjd dmu_tx_commit(tx); 1669168404Spjd 1670219089Spjd ztest_range_unlock(rl); 1671219089Spjd ztest_object_unlock(zd, lr->lr_foid); 1672219089Spjd 1673219089Spjd return (0); 1674168404Spjd} 1675168404Spjd 1676219089Spjdstatic int 1677219089Spjdztest_replay_truncate(ztest_ds_t *zd, lr_truncate_t *lr, boolean_t byteswap) 1678219089Spjd{ 1679219089Spjd objset_t *os = zd->zd_os; 1680219089Spjd dmu_tx_t *tx; 1681219089Spjd uint64_t txg; 1682219089Spjd rl_t *rl; 1683219089Spjd 1684219089Spjd if (byteswap) 1685219089Spjd byteswap_uint64_array(lr, sizeof (*lr)); 1686219089Spjd 1687219089Spjd ztest_object_lock(zd, lr->lr_foid, RL_READER); 1688219089Spjd rl = ztest_range_lock(zd, lr->lr_foid, lr->lr_offset, lr->lr_length, 1689219089Spjd RL_WRITER); 1690219089Spjd 1691219089Spjd tx = dmu_tx_create(os); 1692219089Spjd 1693219089Spjd dmu_tx_hold_free(tx, lr->lr_foid, lr->lr_offset, lr->lr_length); 1694219089Spjd 1695219089Spjd txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); 1696219089Spjd if (txg == 0) { 1697219089Spjd ztest_range_unlock(rl); 1698219089Spjd ztest_object_unlock(zd, lr->lr_foid); 1699219089Spjd return (ENOSPC); 1700219089Spjd } 1701219089Spjd 1702219089Spjd VERIFY(dmu_free_range(os, lr->lr_foid, lr->lr_offset, 1703219089Spjd lr->lr_length, tx) == 0); 1704219089Spjd 1705219089Spjd (void) ztest_log_truncate(zd, tx, lr); 1706219089Spjd 1707219089Spjd dmu_tx_commit(tx); 1708219089Spjd 1709219089Spjd ztest_range_unlock(rl); 1710219089Spjd ztest_object_unlock(zd, lr->lr_foid); 1711219089Spjd 1712219089Spjd return (0); 1713219089Spjd} 1714219089Spjd 1715219089Spjdstatic int 1716219089Spjdztest_replay_setattr(ztest_ds_t *zd, lr_setattr_t *lr, boolean_t byteswap) 1717219089Spjd{ 1718219089Spjd objset_t *os = zd->zd_os; 1719219089Spjd dmu_tx_t *tx; 1720219089Spjd dmu_buf_t *db; 1721219089Spjd ztest_block_tag_t *bbt; 1722219089Spjd uint64_t txg, lrtxg, crtxg; 1723219089Spjd 1724219089Spjd if (byteswap) 1725219089Spjd byteswap_uint64_array(lr, sizeof (*lr)); 1726219089Spjd 1727219089Spjd ztest_object_lock(zd, lr->lr_foid, RL_WRITER); 1728219089Spjd 1729219089Spjd VERIFY3U(0, ==, dmu_bonus_hold(os, lr->lr_foid, FTAG, &db)); 1730219089Spjd 1731219089Spjd tx = dmu_tx_create(os); 1732219089Spjd dmu_tx_hold_bonus(tx, lr->lr_foid); 1733219089Spjd 1734219089Spjd txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); 1735219089Spjd if (txg == 0) { 1736219089Spjd dmu_buf_rele(db, FTAG); 1737219089Spjd ztest_object_unlock(zd, lr->lr_foid); 1738219089Spjd return (ENOSPC); 1739219089Spjd } 1740219089Spjd 1741219089Spjd bbt = ztest_bt_bonus(db); 1742219089Spjd ASSERT3U(bbt->bt_magic, ==, BT_MAGIC); 1743219089Spjd crtxg = bbt->bt_crtxg; 1744219089Spjd lrtxg = lr->lr_common.lrc_txg; 1745219089Spjd 1746219089Spjd if (zd->zd_zilog->zl_replay) { 1747219089Spjd ASSERT(lr->lr_size != 0); 1748219089Spjd ASSERT(lr->lr_mode != 0); 1749219089Spjd ASSERT(lrtxg != 0); 1750219089Spjd } else { 1751219089Spjd /* 1752219089Spjd * Randomly change the size and increment the generation. 1753219089Spjd */ 1754219089Spjd lr->lr_size = (ztest_random(db->db_size / sizeof (*bbt)) + 1) * 1755219089Spjd sizeof (*bbt); 1756219089Spjd lr->lr_mode = bbt->bt_gen + 1; 1757219089Spjd ASSERT(lrtxg == 0); 1758219089Spjd } 1759219089Spjd 1760219089Spjd /* 1761219089Spjd * Verify that the current bonus buffer is not newer than our txg. 1762219089Spjd */ 1763219089Spjd ztest_bt_verify(bbt, os, lr->lr_foid, -1ULL, lr->lr_mode, 1764219089Spjd MAX(txg, lrtxg), crtxg); 1765219089Spjd 1766219089Spjd dmu_buf_will_dirty(db, tx); 1767219089Spjd 1768219089Spjd ASSERT3U(lr->lr_size, >=, sizeof (*bbt)); 1769219089Spjd ASSERT3U(lr->lr_size, <=, db->db_size); 1770240415Smm VERIFY0(dmu_set_bonus(db, lr->lr_size, tx)); 1771219089Spjd bbt = ztest_bt_bonus(db); 1772219089Spjd 1773219089Spjd ztest_bt_generate(bbt, os, lr->lr_foid, -1ULL, lr->lr_mode, txg, crtxg); 1774219089Spjd 1775219089Spjd dmu_buf_rele(db, FTAG); 1776219089Spjd 1777219089Spjd (void) ztest_log_setattr(zd, tx, lr); 1778219089Spjd 1779219089Spjd dmu_tx_commit(tx); 1780219089Spjd 1781219089Spjd ztest_object_unlock(zd, lr->lr_foid); 1782219089Spjd 1783219089Spjd return (0); 1784219089Spjd} 1785219089Spjd 1786168404Spjdzil_replay_func_t *ztest_replay_vector[TX_MAX_TYPE] = { 1787168404Spjd NULL, /* 0 no such transaction type */ 1788168404Spjd ztest_replay_create, /* TX_CREATE */ 1789168404Spjd NULL, /* TX_MKDIR */ 1790168404Spjd NULL, /* TX_MKXATTR */ 1791168404Spjd NULL, /* TX_SYMLINK */ 1792168404Spjd ztest_replay_remove, /* TX_REMOVE */ 1793168404Spjd NULL, /* TX_RMDIR */ 1794168404Spjd NULL, /* TX_LINK */ 1795168404Spjd NULL, /* TX_RENAME */ 1796219089Spjd ztest_replay_write, /* TX_WRITE */ 1797219089Spjd ztest_replay_truncate, /* TX_TRUNCATE */ 1798219089Spjd ztest_replay_setattr, /* TX_SETATTR */ 1799168404Spjd NULL, /* TX_ACL */ 1800209962Smm NULL, /* TX_CREATE_ACL */ 1801209962Smm NULL, /* TX_CREATE_ATTR */ 1802209962Smm NULL, /* TX_CREATE_ACL_ATTR */ 1803209962Smm NULL, /* TX_MKDIR_ACL */ 1804209962Smm NULL, /* TX_MKDIR_ATTR */ 1805209962Smm NULL, /* TX_MKDIR_ACL_ATTR */ 1806209962Smm NULL, /* TX_WRITE2 */ 1807168404Spjd}; 1808168404Spjd 1809168404Spjd/* 1810219089Spjd * ZIL get_data callbacks 1811219089Spjd */ 1812219089Spjd 1813219089Spjdstatic void 1814219089Spjdztest_get_done(zgd_t *zgd, int error) 1815219089Spjd{ 1816219089Spjd ztest_ds_t *zd = zgd->zgd_private; 1817219089Spjd uint64_t object = zgd->zgd_rl->rl_object; 1818219089Spjd 1819219089Spjd if (zgd->zgd_db) 1820219089Spjd dmu_buf_rele(zgd->zgd_db, zgd); 1821219089Spjd 1822219089Spjd ztest_range_unlock(zgd->zgd_rl); 1823219089Spjd ztest_object_unlock(zd, object); 1824219089Spjd 1825219089Spjd if (error == 0 && zgd->zgd_bp) 1826219089Spjd zil_add_block(zgd->zgd_zilog, zgd->zgd_bp); 1827219089Spjd 1828219089Spjd umem_free(zgd, sizeof (*zgd)); 1829219089Spjd} 1830219089Spjd 1831219089Spjdstatic int 1832219089Spjdztest_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) 1833219089Spjd{ 1834219089Spjd ztest_ds_t *zd = arg; 1835219089Spjd objset_t *os = zd->zd_os; 1836219089Spjd uint64_t object = lr->lr_foid; 1837219089Spjd uint64_t offset = lr->lr_offset; 1838219089Spjd uint64_t size = lr->lr_length; 1839219089Spjd blkptr_t *bp = &lr->lr_blkptr; 1840219089Spjd uint64_t txg = lr->lr_common.lrc_txg; 1841219089Spjd uint64_t crtxg; 1842219089Spjd dmu_object_info_t doi; 1843219089Spjd dmu_buf_t *db; 1844219089Spjd zgd_t *zgd; 1845219089Spjd int error; 1846219089Spjd 1847219089Spjd ztest_object_lock(zd, object, RL_READER); 1848219089Spjd error = dmu_bonus_hold(os, object, FTAG, &db); 1849219089Spjd if (error) { 1850219089Spjd ztest_object_unlock(zd, object); 1851219089Spjd return (error); 1852219089Spjd } 1853219089Spjd 1854219089Spjd crtxg = ztest_bt_bonus(db)->bt_crtxg; 1855219089Spjd 1856219089Spjd if (crtxg == 0 || crtxg > txg) { 1857219089Spjd dmu_buf_rele(db, FTAG); 1858219089Spjd ztest_object_unlock(zd, object); 1859219089Spjd return (ENOENT); 1860219089Spjd } 1861219089Spjd 1862219089Spjd dmu_object_info_from_db(db, &doi); 1863219089Spjd dmu_buf_rele(db, FTAG); 1864219089Spjd db = NULL; 1865219089Spjd 1866219089Spjd zgd = umem_zalloc(sizeof (*zgd), UMEM_NOFAIL); 1867219089Spjd zgd->zgd_zilog = zd->zd_zilog; 1868219089Spjd zgd->zgd_private = zd; 1869219089Spjd 1870219089Spjd if (buf != NULL) { /* immediate write */ 1871219089Spjd zgd->zgd_rl = ztest_range_lock(zd, object, offset, size, 1872219089Spjd RL_READER); 1873219089Spjd 1874219089Spjd error = dmu_read(os, object, offset, size, buf, 1875219089Spjd DMU_READ_NO_PREFETCH); 1876219089Spjd ASSERT(error == 0); 1877219089Spjd } else { 1878219089Spjd size = doi.doi_data_block_size; 1879219089Spjd if (ISP2(size)) { 1880219089Spjd offset = P2ALIGN(offset, size); 1881219089Spjd } else { 1882219089Spjd ASSERT(offset < size); 1883219089Spjd offset = 0; 1884219089Spjd } 1885219089Spjd 1886219089Spjd zgd->zgd_rl = ztest_range_lock(zd, object, offset, size, 1887219089Spjd RL_READER); 1888219089Spjd 1889219089Spjd error = dmu_buf_hold(os, object, offset, zgd, &db, 1890219089Spjd DMU_READ_NO_PREFETCH); 1891219089Spjd 1892219089Spjd if (error == 0) { 1893243524Smm blkptr_t *obp = dmu_buf_get_blkptr(db); 1894243524Smm if (obp) { 1895243524Smm ASSERT(BP_IS_HOLE(bp)); 1896243524Smm *bp = *obp; 1897243524Smm } 1898243524Smm 1899219089Spjd zgd->zgd_db = db; 1900219089Spjd zgd->zgd_bp = bp; 1901219089Spjd 1902219089Spjd ASSERT(db->db_offset == offset); 1903219089Spjd ASSERT(db->db_size == size); 1904219089Spjd 1905219089Spjd error = dmu_sync(zio, lr->lr_common.lrc_txg, 1906219089Spjd ztest_get_done, zgd); 1907219089Spjd 1908219089Spjd if (error == 0) 1909219089Spjd return (0); 1910219089Spjd } 1911219089Spjd } 1912219089Spjd 1913219089Spjd ztest_get_done(zgd, error); 1914219089Spjd 1915219089Spjd return (error); 1916219089Spjd} 1917219089Spjd 1918219089Spjdstatic void * 1919219089Spjdztest_lr_alloc(size_t lrsize, char *name) 1920219089Spjd{ 1921219089Spjd char *lr; 1922219089Spjd size_t namesize = name ? strlen(name) + 1 : 0; 1923219089Spjd 1924219089Spjd lr = umem_zalloc(lrsize + namesize, UMEM_NOFAIL); 1925219089Spjd 1926219089Spjd if (name) 1927219089Spjd bcopy(name, lr + lrsize, namesize); 1928219089Spjd 1929219089Spjd return (lr); 1930219089Spjd} 1931219089Spjd 1932219089Spjdvoid 1933219089Spjdztest_lr_free(void *lr, size_t lrsize, char *name) 1934219089Spjd{ 1935219089Spjd size_t namesize = name ? strlen(name) + 1 : 0; 1936219089Spjd 1937219089Spjd umem_free(lr, lrsize + namesize); 1938219089Spjd} 1939219089Spjd 1940219089Spjd/* 1941219089Spjd * Lookup a bunch of objects. Returns the number of objects not found. 1942219089Spjd */ 1943219089Spjdstatic int 1944219089Spjdztest_lookup(ztest_ds_t *zd, ztest_od_t *od, int count) 1945219089Spjd{ 1946219089Spjd int missing = 0; 1947219089Spjd int error; 1948219089Spjd 1949219089Spjd ASSERT(_mutex_held(&zd->zd_dirobj_lock)); 1950219089Spjd 1951219089Spjd for (int i = 0; i < count; i++, od++) { 1952219089Spjd od->od_object = 0; 1953219089Spjd error = zap_lookup(zd->zd_os, od->od_dir, od->od_name, 1954219089Spjd sizeof (uint64_t), 1, &od->od_object); 1955219089Spjd if (error) { 1956219089Spjd ASSERT(error == ENOENT); 1957219089Spjd ASSERT(od->od_object == 0); 1958219089Spjd missing++; 1959219089Spjd } else { 1960219089Spjd dmu_buf_t *db; 1961219089Spjd ztest_block_tag_t *bbt; 1962219089Spjd dmu_object_info_t doi; 1963219089Spjd 1964219089Spjd ASSERT(od->od_object != 0); 1965219089Spjd ASSERT(missing == 0); /* there should be no gaps */ 1966219089Spjd 1967219089Spjd ztest_object_lock(zd, od->od_object, RL_READER); 1968219089Spjd VERIFY3U(0, ==, dmu_bonus_hold(zd->zd_os, 1969219089Spjd od->od_object, FTAG, &db)); 1970219089Spjd dmu_object_info_from_db(db, &doi); 1971219089Spjd bbt = ztest_bt_bonus(db); 1972219089Spjd ASSERT3U(bbt->bt_magic, ==, BT_MAGIC); 1973219089Spjd od->od_type = doi.doi_type; 1974219089Spjd od->od_blocksize = doi.doi_data_block_size; 1975219089Spjd od->od_gen = bbt->bt_gen; 1976219089Spjd dmu_buf_rele(db, FTAG); 1977219089Spjd ztest_object_unlock(zd, od->od_object); 1978219089Spjd } 1979219089Spjd } 1980219089Spjd 1981219089Spjd return (missing); 1982219089Spjd} 1983219089Spjd 1984219089Spjdstatic int 1985219089Spjdztest_create(ztest_ds_t *zd, ztest_od_t *od, int count) 1986219089Spjd{ 1987219089Spjd int missing = 0; 1988219089Spjd 1989219089Spjd ASSERT(_mutex_held(&zd->zd_dirobj_lock)); 1990219089Spjd 1991219089Spjd for (int i = 0; i < count; i++, od++) { 1992219089Spjd if (missing) { 1993219089Spjd od->od_object = 0; 1994219089Spjd missing++; 1995219089Spjd continue; 1996219089Spjd } 1997219089Spjd 1998219089Spjd lr_create_t *lr = ztest_lr_alloc(sizeof (*lr), od->od_name); 1999219089Spjd 2000219089Spjd lr->lr_doid = od->od_dir; 2001219089Spjd lr->lr_foid = 0; /* 0 to allocate, > 0 to claim */ 2002219089Spjd lr->lrz_type = od->od_crtype; 2003219089Spjd lr->lrz_blocksize = od->od_crblocksize; 2004219089Spjd lr->lrz_ibshift = ztest_random_ibshift(); 2005219089Spjd lr->lrz_bonustype = DMU_OT_UINT64_OTHER; 2006219089Spjd lr->lrz_bonuslen = dmu_bonus_max(); 2007219089Spjd lr->lr_gen = od->od_crgen; 2008219089Spjd lr->lr_crtime[0] = time(NULL); 2009219089Spjd 2010219089Spjd if (ztest_replay_create(zd, lr, B_FALSE) != 0) { 2011219089Spjd ASSERT(missing == 0); 2012219089Spjd od->od_object = 0; 2013219089Spjd missing++; 2014219089Spjd } else { 2015219089Spjd od->od_object = lr->lr_foid; 2016219089Spjd od->od_type = od->od_crtype; 2017219089Spjd od->od_blocksize = od->od_crblocksize; 2018219089Spjd od->od_gen = od->od_crgen; 2019219089Spjd ASSERT(od->od_object != 0); 2020219089Spjd } 2021219089Spjd 2022219089Spjd ztest_lr_free(lr, sizeof (*lr), od->od_name); 2023219089Spjd } 2024219089Spjd 2025219089Spjd return (missing); 2026219089Spjd} 2027219089Spjd 2028219089Spjdstatic int 2029219089Spjdztest_remove(ztest_ds_t *zd, ztest_od_t *od, int count) 2030219089Spjd{ 2031219089Spjd int missing = 0; 2032219089Spjd int error; 2033219089Spjd 2034219089Spjd ASSERT(_mutex_held(&zd->zd_dirobj_lock)); 2035219089Spjd 2036219089Spjd od += count - 1; 2037219089Spjd 2038219089Spjd for (int i = count - 1; i >= 0; i--, od--) { 2039219089Spjd if (missing) { 2040219089Spjd missing++; 2041219089Spjd continue; 2042219089Spjd } 2043219089Spjd 2044243524Smm /* 2045243524Smm * No object was found. 2046243524Smm */ 2047219089Spjd if (od->od_object == 0) 2048219089Spjd continue; 2049219089Spjd 2050219089Spjd lr_remove_t *lr = ztest_lr_alloc(sizeof (*lr), od->od_name); 2051219089Spjd 2052219089Spjd lr->lr_doid = od->od_dir; 2053219089Spjd 2054219089Spjd if ((error = ztest_replay_remove(zd, lr, B_FALSE)) != 0) { 2055219089Spjd ASSERT3U(error, ==, ENOSPC); 2056219089Spjd missing++; 2057219089Spjd } else { 2058219089Spjd od->od_object = 0; 2059219089Spjd } 2060219089Spjd ztest_lr_free(lr, sizeof (*lr), od->od_name); 2061219089Spjd } 2062219089Spjd 2063219089Spjd return (missing); 2064219089Spjd} 2065219089Spjd 2066219089Spjdstatic int 2067219089Spjdztest_write(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size, 2068219089Spjd void *data) 2069219089Spjd{ 2070219089Spjd lr_write_t *lr; 2071219089Spjd int error; 2072219089Spjd 2073219089Spjd lr = ztest_lr_alloc(sizeof (*lr) + size, NULL); 2074219089Spjd 2075219089Spjd lr->lr_foid = object; 2076219089Spjd lr->lr_offset = offset; 2077219089Spjd lr->lr_length = size; 2078219089Spjd lr->lr_blkoff = 0; 2079219089Spjd BP_ZERO(&lr->lr_blkptr); 2080219089Spjd 2081219089Spjd bcopy(data, lr + 1, size); 2082219089Spjd 2083219089Spjd error = ztest_replay_write(zd, lr, B_FALSE); 2084219089Spjd 2085219089Spjd ztest_lr_free(lr, sizeof (*lr) + size, NULL); 2086219089Spjd 2087219089Spjd return (error); 2088219089Spjd} 2089219089Spjd 2090219089Spjdstatic int 2091219089Spjdztest_truncate(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size) 2092219089Spjd{ 2093219089Spjd lr_truncate_t *lr; 2094219089Spjd int error; 2095219089Spjd 2096219089Spjd lr = ztest_lr_alloc(sizeof (*lr), NULL); 2097219089Spjd 2098219089Spjd lr->lr_foid = object; 2099219089Spjd lr->lr_offset = offset; 2100219089Spjd lr->lr_length = size; 2101219089Spjd 2102219089Spjd error = ztest_replay_truncate(zd, lr, B_FALSE); 2103219089Spjd 2104219089Spjd ztest_lr_free(lr, sizeof (*lr), NULL); 2105219089Spjd 2106219089Spjd return (error); 2107219089Spjd} 2108219089Spjd 2109219089Spjdstatic int 2110219089Spjdztest_setattr(ztest_ds_t *zd, uint64_t object) 2111219089Spjd{ 2112219089Spjd lr_setattr_t *lr; 2113219089Spjd int error; 2114219089Spjd 2115219089Spjd lr = ztest_lr_alloc(sizeof (*lr), NULL); 2116219089Spjd 2117219089Spjd lr->lr_foid = object; 2118219089Spjd lr->lr_size = 0; 2119219089Spjd lr->lr_mode = 0; 2120219089Spjd 2121219089Spjd error = ztest_replay_setattr(zd, lr, B_FALSE); 2122219089Spjd 2123219089Spjd ztest_lr_free(lr, sizeof (*lr), NULL); 2124219089Spjd 2125219089Spjd return (error); 2126219089Spjd} 2127219089Spjd 2128219089Spjdstatic void 2129219089Spjdztest_prealloc(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size) 2130219089Spjd{ 2131219089Spjd objset_t *os = zd->zd_os; 2132219089Spjd dmu_tx_t *tx; 2133219089Spjd uint64_t txg; 2134219089Spjd rl_t *rl; 2135219089Spjd 2136219089Spjd txg_wait_synced(dmu_objset_pool(os), 0); 2137219089Spjd 2138219089Spjd ztest_object_lock(zd, object, RL_READER); 2139219089Spjd rl = ztest_range_lock(zd, object, offset, size, RL_WRITER); 2140219089Spjd 2141219089Spjd tx = dmu_tx_create(os); 2142219089Spjd 2143219089Spjd dmu_tx_hold_write(tx, object, offset, size); 2144219089Spjd 2145219089Spjd txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); 2146219089Spjd 2147219089Spjd if (txg != 0) { 2148219089Spjd dmu_prealloc(os, object, offset, size, tx); 2149219089Spjd dmu_tx_commit(tx); 2150219089Spjd txg_wait_synced(dmu_objset_pool(os), txg); 2151219089Spjd } else { 2152219089Spjd (void) dmu_free_long_range(os, object, offset, size); 2153219089Spjd } 2154219089Spjd 2155219089Spjd ztest_range_unlock(rl); 2156219089Spjd ztest_object_unlock(zd, object); 2157219089Spjd} 2158219089Spjd 2159219089Spjdstatic void 2160219089Spjdztest_io(ztest_ds_t *zd, uint64_t object, uint64_t offset) 2161219089Spjd{ 2162243524Smm int err; 2163219089Spjd ztest_block_tag_t wbt; 2164219089Spjd dmu_object_info_t doi; 2165219089Spjd enum ztest_io_type io_type; 2166219089Spjd uint64_t blocksize; 2167219089Spjd void *data; 2168219089Spjd 2169219089Spjd VERIFY(dmu_object_info(zd->zd_os, object, &doi) == 0); 2170219089Spjd blocksize = doi.doi_data_block_size; 2171219089Spjd data = umem_alloc(blocksize, UMEM_NOFAIL); 2172219089Spjd 2173219089Spjd /* 2174219089Spjd * Pick an i/o type at random, biased toward writing block tags. 2175219089Spjd */ 2176219089Spjd io_type = ztest_random(ZTEST_IO_TYPES); 2177219089Spjd if (ztest_random(2) == 0) 2178219089Spjd io_type = ZTEST_IO_WRITE_TAG; 2179219089Spjd 2180224526Smm (void) rw_rdlock(&zd->zd_zilog_lock); 2181224526Smm 2182219089Spjd switch (io_type) { 2183219089Spjd 2184219089Spjd case ZTEST_IO_WRITE_TAG: 2185219089Spjd ztest_bt_generate(&wbt, zd->zd_os, object, offset, 0, 0, 0); 2186219089Spjd (void) ztest_write(zd, object, offset, sizeof (wbt), &wbt); 2187219089Spjd break; 2188219089Spjd 2189219089Spjd case ZTEST_IO_WRITE_PATTERN: 2190219089Spjd (void) memset(data, 'a' + (object + offset) % 5, blocksize); 2191219089Spjd if (ztest_random(2) == 0) { 2192219089Spjd /* 2193219089Spjd * Induce fletcher2 collisions to ensure that 2194219089Spjd * zio_ddt_collision() detects and resolves them 2195219089Spjd * when using fletcher2-verify for deduplication. 2196219089Spjd */ 2197219089Spjd ((uint64_t *)data)[0] ^= 1ULL << 63; 2198219089Spjd ((uint64_t *)data)[4] ^= 1ULL << 63; 2199219089Spjd } 2200219089Spjd (void) ztest_write(zd, object, offset, blocksize, data); 2201219089Spjd break; 2202219089Spjd 2203219089Spjd case ZTEST_IO_WRITE_ZEROES: 2204219089Spjd bzero(data, blocksize); 2205219089Spjd (void) ztest_write(zd, object, offset, blocksize, data); 2206219089Spjd break; 2207219089Spjd 2208219089Spjd case ZTEST_IO_TRUNCATE: 2209219089Spjd (void) ztest_truncate(zd, object, offset, blocksize); 2210219089Spjd break; 2211219089Spjd 2212219089Spjd case ZTEST_IO_SETATTR: 2213219089Spjd (void) ztest_setattr(zd, object); 2214219089Spjd break; 2215243524Smm 2216243524Smm case ZTEST_IO_REWRITE: 2217243524Smm (void) rw_rdlock(&ztest_name_lock); 2218243524Smm err = ztest_dsl_prop_set_uint64(zd->zd_name, 2219243524Smm ZFS_PROP_CHECKSUM, spa_dedup_checksum(ztest_spa), 2220243524Smm B_FALSE); 2221243524Smm VERIFY(err == 0 || err == ENOSPC); 2222243524Smm err = ztest_dsl_prop_set_uint64(zd->zd_name, 2223243524Smm ZFS_PROP_COMPRESSION, 2224243524Smm ztest_random_dsl_prop(ZFS_PROP_COMPRESSION), 2225243524Smm B_FALSE); 2226243524Smm VERIFY(err == 0 || err == ENOSPC); 2227243524Smm (void) rw_unlock(&ztest_name_lock); 2228243524Smm 2229243524Smm VERIFY0(dmu_read(zd->zd_os, object, offset, blocksize, data, 2230243524Smm DMU_READ_NO_PREFETCH)); 2231243524Smm 2232243524Smm (void) ztest_write(zd, object, offset, blocksize, data); 2233243524Smm break; 2234219089Spjd } 2235219089Spjd 2236224526Smm (void) rw_unlock(&zd->zd_zilog_lock); 2237224526Smm 2238219089Spjd umem_free(data, blocksize); 2239219089Spjd} 2240219089Spjd 2241219089Spjd/* 2242219089Spjd * Initialize an object description template. 2243219089Spjd */ 2244219089Spjdstatic void 2245219089Spjdztest_od_init(ztest_od_t *od, uint64_t id, char *tag, uint64_t index, 2246219089Spjd dmu_object_type_t type, uint64_t blocksize, uint64_t gen) 2247219089Spjd{ 2248219089Spjd od->od_dir = ZTEST_DIROBJ; 2249219089Spjd od->od_object = 0; 2250219089Spjd 2251219089Spjd od->od_crtype = type; 2252219089Spjd od->od_crblocksize = blocksize ? blocksize : ztest_random_blocksize(); 2253219089Spjd od->od_crgen = gen; 2254219089Spjd 2255219089Spjd od->od_type = DMU_OT_NONE; 2256219089Spjd od->od_blocksize = 0; 2257219089Spjd od->od_gen = 0; 2258219089Spjd 2259219089Spjd (void) snprintf(od->od_name, sizeof (od->od_name), "%s(%lld)[%llu]", 2260219089Spjd tag, (int64_t)id, index); 2261219089Spjd} 2262219089Spjd 2263219089Spjd/* 2264219089Spjd * Lookup or create the objects for a test using the od template. 2265219089Spjd * If the objects do not all exist, or if 'remove' is specified, 2266219089Spjd * remove any existing objects and create new ones. Otherwise, 2267219089Spjd * use the existing objects. 2268219089Spjd */ 2269219089Spjdstatic int 2270219089Spjdztest_object_init(ztest_ds_t *zd, ztest_od_t *od, size_t size, boolean_t remove) 2271219089Spjd{ 2272219089Spjd int count = size / sizeof (*od); 2273219089Spjd int rv = 0; 2274219089Spjd 2275219089Spjd VERIFY(mutex_lock(&zd->zd_dirobj_lock) == 0); 2276219089Spjd if ((ztest_lookup(zd, od, count) != 0 || remove) && 2277219089Spjd (ztest_remove(zd, od, count) != 0 || 2278219089Spjd ztest_create(zd, od, count) != 0)) 2279219089Spjd rv = -1; 2280219089Spjd zd->zd_od = od; 2281219089Spjd VERIFY(mutex_unlock(&zd->zd_dirobj_lock) == 0); 2282219089Spjd 2283219089Spjd return (rv); 2284219089Spjd} 2285219089Spjd 2286219089Spjd/* ARGSUSED */ 2287219089Spjdvoid 2288219089Spjdztest_zil_commit(ztest_ds_t *zd, uint64_t id) 2289219089Spjd{ 2290219089Spjd zilog_t *zilog = zd->zd_zilog; 2291219089Spjd 2292224526Smm (void) rw_rdlock(&zd->zd_zilog_lock); 2293224526Smm 2294219089Spjd zil_commit(zilog, ztest_random(ZTEST_OBJECTS)); 2295219089Spjd 2296219089Spjd /* 2297219089Spjd * Remember the committed values in zd, which is in parent/child 2298219089Spjd * shared memory. If we die, the next iteration of ztest_run() 2299219089Spjd * will verify that the log really does contain this record. 2300219089Spjd */ 2301219089Spjd mutex_enter(&zilog->zl_lock); 2302236143Smm ASSERT(zd->zd_shared != NULL); 2303236143Smm ASSERT3U(zd->zd_shared->zd_seq, <=, zilog->zl_commit_lr_seq); 2304236143Smm zd->zd_shared->zd_seq = zilog->zl_commit_lr_seq; 2305219089Spjd mutex_exit(&zilog->zl_lock); 2306224526Smm 2307224526Smm (void) rw_unlock(&zd->zd_zilog_lock); 2308219089Spjd} 2309219089Spjd 2310219089Spjd/* 2311224526Smm * This function is designed to simulate the operations that occur during a 2312224526Smm * mount/unmount operation. We hold the dataset across these operations in an 2313224526Smm * attempt to expose any implicit assumptions about ZIL management. 2314224526Smm */ 2315224526Smm/* ARGSUSED */ 2316224526Smmvoid 2317224526Smmztest_zil_remount(ztest_ds_t *zd, uint64_t id) 2318224526Smm{ 2319224526Smm objset_t *os = zd->zd_os; 2320224526Smm 2321243524Smm /* 2322243524Smm * We grab the zd_dirobj_lock to ensure that no other thread is 2323243524Smm * updating the zil (i.e. adding in-memory log records) and the 2324243524Smm * zd_zilog_lock to block any I/O. 2325243524Smm */ 2326243524Smm VERIFY0(mutex_lock(&zd->zd_dirobj_lock)); 2327224526Smm (void) rw_wrlock(&zd->zd_zilog_lock); 2328224526Smm 2329224526Smm /* zfsvfs_teardown() */ 2330224526Smm zil_close(zd->zd_zilog); 2331224526Smm 2332224526Smm /* zfsvfs_setup() */ 2333224526Smm VERIFY(zil_open(os, ztest_get_data) == zd->zd_zilog); 2334224526Smm zil_replay(os, zd, ztest_replay_vector); 2335224526Smm 2336224526Smm (void) rw_unlock(&zd->zd_zilog_lock); 2337239620Smm VERIFY(mutex_unlock(&zd->zd_dirobj_lock) == 0); 2338224526Smm} 2339224526Smm 2340224526Smm/* 2341168404Spjd * Verify that we can't destroy an active pool, create an existing pool, 2342168404Spjd * or create a pool with a bad vdev spec. 2343168404Spjd */ 2344219089Spjd/* ARGSUSED */ 2345168404Spjdvoid 2346219089Spjdztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id) 2347168404Spjd{ 2348236143Smm ztest_shared_opts_t *zo = &ztest_opts; 2349168404Spjd spa_t *spa; 2350168404Spjd nvlist_t *nvroot; 2351168404Spjd 2352168404Spjd /* 2353168404Spjd * Attempt to create using a bad file. 2354168404Spjd */ 2355243505Smm nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 0, 1); 2356219089Spjd VERIFY3U(ENOENT, ==, 2357248571Smm spa_create("ztest_bad_file", nvroot, NULL, NULL)); 2358168404Spjd nvlist_free(nvroot); 2359168404Spjd 2360168404Spjd /* 2361168404Spjd * Attempt to create using a bad mirror. 2362168404Spjd */ 2363243505Smm nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 2, 1); 2364219089Spjd VERIFY3U(ENOENT, ==, 2365248571Smm spa_create("ztest_bad_mirror", nvroot, NULL, NULL)); 2366168404Spjd nvlist_free(nvroot); 2367168404Spjd 2368168404Spjd /* 2369168404Spjd * Attempt to create an existing pool. It shouldn't matter 2370168404Spjd * what's in the nvroot; we should fail with EEXIST. 2371168404Spjd */ 2372236143Smm (void) rw_rdlock(&ztest_name_lock); 2373243505Smm nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 0, 1); 2374248571Smm VERIFY3U(EEXIST, ==, spa_create(zo->zo_pool, nvroot, NULL, NULL)); 2375168404Spjd nvlist_free(nvroot); 2376236143Smm VERIFY3U(0, ==, spa_open(zo->zo_pool, &spa, FTAG)); 2377236143Smm VERIFY3U(EBUSY, ==, spa_destroy(zo->zo_pool)); 2378219089Spjd spa_close(spa, FTAG); 2379168404Spjd 2380236143Smm (void) rw_unlock(&ztest_name_lock); 2381168404Spjd} 2382168404Spjd 2383243505Smm/* ARGSUSED */ 2384243505Smmvoid 2385243505Smmztest_spa_upgrade(ztest_ds_t *zd, uint64_t id) 2386243505Smm{ 2387243505Smm spa_t *spa; 2388243505Smm uint64_t initial_version = SPA_VERSION_INITIAL; 2389243505Smm uint64_t version, newversion; 2390243505Smm nvlist_t *nvroot, *props; 2391243505Smm char *name; 2392243505Smm 2393243505Smm VERIFY0(mutex_lock(&ztest_vdev_lock)); 2394243505Smm name = kmem_asprintf("%s_upgrade", ztest_opts.zo_pool); 2395243505Smm 2396243505Smm /* 2397243505Smm * Clean up from previous runs. 2398243505Smm */ 2399243505Smm (void) spa_destroy(name); 2400243505Smm 2401243505Smm nvroot = make_vdev_root(NULL, NULL, name, ztest_opts.zo_vdev_size, 0, 2402243505Smm 0, ztest_opts.zo_raidz, ztest_opts.zo_mirrors, 1); 2403243505Smm 2404243505Smm /* 2405243505Smm * If we're configuring a RAIDZ device then make sure that the 2406243505Smm * the initial version is capable of supporting that feature. 2407243505Smm */ 2408243505Smm switch (ztest_opts.zo_raidz_parity) { 2409243505Smm case 0: 2410243505Smm case 1: 2411243505Smm initial_version = SPA_VERSION_INITIAL; 2412243505Smm break; 2413243505Smm case 2: 2414243505Smm initial_version = SPA_VERSION_RAIDZ2; 2415243505Smm break; 2416243505Smm case 3: 2417243505Smm initial_version = SPA_VERSION_RAIDZ3; 2418243505Smm break; 2419243505Smm } 2420243505Smm 2421243505Smm /* 2422243505Smm * Create a pool with a spa version that can be upgraded. Pick 2423243505Smm * a value between initial_version and SPA_VERSION_BEFORE_FEATURES. 2424243505Smm */ 2425243505Smm do { 2426243505Smm version = ztest_random_spa_version(initial_version); 2427243505Smm } while (version > SPA_VERSION_BEFORE_FEATURES); 2428243505Smm 2429243505Smm props = fnvlist_alloc(); 2430243505Smm fnvlist_add_uint64(props, 2431243505Smm zpool_prop_to_name(ZPOOL_PROP_VERSION), version); 2432248571Smm VERIFY0(spa_create(name, nvroot, props, NULL)); 2433243505Smm fnvlist_free(nvroot); 2434243505Smm fnvlist_free(props); 2435243505Smm 2436243505Smm VERIFY0(spa_open(name, &spa, FTAG)); 2437243505Smm VERIFY3U(spa_version(spa), ==, version); 2438243505Smm newversion = ztest_random_spa_version(version + 1); 2439243505Smm 2440243505Smm if (ztest_opts.zo_verbose >= 4) { 2441243505Smm (void) printf("upgrading spa version from %llu to %llu\n", 2442243505Smm (u_longlong_t)version, (u_longlong_t)newversion); 2443243505Smm } 2444243505Smm 2445243505Smm spa_upgrade(spa, newversion); 2446243505Smm VERIFY3U(spa_version(spa), >, version); 2447243505Smm VERIFY3U(spa_version(spa), ==, fnvlist_lookup_uint64(spa->spa_config, 2448243505Smm zpool_prop_to_name(ZPOOL_PROP_VERSION))); 2449243505Smm spa_close(spa, FTAG); 2450243505Smm 2451243505Smm strfree(name); 2452243505Smm VERIFY0(mutex_unlock(&ztest_vdev_lock)); 2453243505Smm} 2454243505Smm 2455185029Spjdstatic vdev_t * 2456185029Spjdvdev_lookup_by_path(vdev_t *vd, const char *path) 2457185029Spjd{ 2458185029Spjd vdev_t *mvd; 2459185029Spjd 2460185029Spjd if (vd->vdev_path != NULL && strcmp(path, vd->vdev_path) == 0) 2461185029Spjd return (vd); 2462185029Spjd 2463185029Spjd for (int c = 0; c < vd->vdev_children; c++) 2464185029Spjd if ((mvd = vdev_lookup_by_path(vd->vdev_child[c], path)) != 2465185029Spjd NULL) 2466185029Spjd return (mvd); 2467185029Spjd 2468185029Spjd return (NULL); 2469185029Spjd} 2470185029Spjd 2471168404Spjd/* 2472219089Spjd * Find the first available hole which can be used as a top-level. 2473219089Spjd */ 2474219089Spjdint 2475219089Spjdfind_vdev_hole(spa_t *spa) 2476219089Spjd{ 2477219089Spjd vdev_t *rvd = spa->spa_root_vdev; 2478219089Spjd int c; 2479219089Spjd 2480219089Spjd ASSERT(spa_config_held(spa, SCL_VDEV, RW_READER) == SCL_VDEV); 2481219089Spjd 2482219089Spjd for (c = 0; c < rvd->vdev_children; c++) { 2483219089Spjd vdev_t *cvd = rvd->vdev_child[c]; 2484219089Spjd 2485219089Spjd if (cvd->vdev_ishole) 2486219089Spjd break; 2487219089Spjd } 2488219089Spjd return (c); 2489219089Spjd} 2490219089Spjd 2491219089Spjd/* 2492168404Spjd * Verify that vdev_add() works as expected. 2493168404Spjd */ 2494219089Spjd/* ARGSUSED */ 2495168404Spjdvoid 2496219089Spjdztest_vdev_add_remove(ztest_ds_t *zd, uint64_t id) 2497168404Spjd{ 2498219089Spjd ztest_shared_t *zs = ztest_shared; 2499236143Smm spa_t *spa = ztest_spa; 2500219089Spjd uint64_t leaves; 2501219089Spjd uint64_t guid; 2502168404Spjd nvlist_t *nvroot; 2503168404Spjd int error; 2504168404Spjd 2505236143Smm VERIFY(mutex_lock(&ztest_vdev_lock) == 0); 2506248571Smm leaves = MAX(zs->zs_mirrors + zs->zs_splits, 1) * ztest_opts.zo_raidz; 2507168404Spjd 2508185029Spjd spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); 2509168404Spjd 2510219089Spjd ztest_shared->zs_vdev_next_leaf = find_vdev_hole(spa) * leaves; 2511168404Spjd 2512185029Spjd /* 2513219089Spjd * If we have slogs then remove them 1/4 of the time. 2514185029Spjd */ 2515219089Spjd if (spa_has_slogs(spa) && ztest_random(4) == 0) { 2516219089Spjd /* 2517219089Spjd * Grab the guid from the head of the log class rotor. 2518219089Spjd */ 2519219089Spjd guid = spa_log_class(spa)->mc_rotor->mg_vd->vdev_guid; 2520185029Spjd 2521219089Spjd spa_config_exit(spa, SCL_VDEV, FTAG); 2522168404Spjd 2523219089Spjd /* 2524219089Spjd * We have to grab the zs_name_lock as writer to 2525219089Spjd * prevent a race between removing a slog (dmu_objset_find) 2526219089Spjd * and destroying a dataset. Removing the slog will 2527219089Spjd * grab a reference on the dataset which may cause 2528219089Spjd * dmu_objset_destroy() to fail with EBUSY thus 2529219089Spjd * leaving the dataset in an inconsistent state. 2530219089Spjd */ 2531236143Smm VERIFY(rw_wrlock(&ztest_name_lock) == 0); 2532219089Spjd error = spa_vdev_remove(spa, guid, B_FALSE); 2533236143Smm VERIFY(rw_unlock(&ztest_name_lock) == 0); 2534168404Spjd 2535219089Spjd if (error && error != EEXIST) 2536219089Spjd fatal(0, "spa_vdev_remove() = %d", error); 2537219089Spjd } else { 2538219089Spjd spa_config_exit(spa, SCL_VDEV, FTAG); 2539219089Spjd 2540219089Spjd /* 2541219089Spjd * Make 1/4 of the devices be log devices. 2542219089Spjd */ 2543243505Smm nvroot = make_vdev_root(NULL, NULL, NULL, 2544236143Smm ztest_opts.zo_vdev_size, 0, 2545236143Smm ztest_random(4) == 0, ztest_opts.zo_raidz, 2546236143Smm zs->zs_mirrors, 1); 2547219089Spjd 2548219089Spjd error = spa_vdev_add(spa, nvroot); 2549219089Spjd nvlist_free(nvroot); 2550219089Spjd 2551219089Spjd if (error == ENOSPC) 2552219089Spjd ztest_record_enospc("spa_vdev_add"); 2553219089Spjd else if (error != 0) 2554219089Spjd fatal(0, "spa_vdev_add() = %d", error); 2555219089Spjd } 2556219089Spjd 2557236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 2558168404Spjd} 2559168404Spjd 2560185029Spjd/* 2561185029Spjd * Verify that adding/removing aux devices (l2arc, hot spare) works as expected. 2562185029Spjd */ 2563219089Spjd/* ARGSUSED */ 2564185029Spjdvoid 2565219089Spjdztest_vdev_aux_add_remove(ztest_ds_t *zd, uint64_t id) 2566168404Spjd{ 2567219089Spjd ztest_shared_t *zs = ztest_shared; 2568236143Smm spa_t *spa = ztest_spa; 2569185029Spjd vdev_t *rvd = spa->spa_root_vdev; 2570185029Spjd spa_aux_vdev_t *sav; 2571185029Spjd char *aux; 2572185029Spjd uint64_t guid = 0; 2573185029Spjd int error; 2574168404Spjd 2575185029Spjd if (ztest_random(2) == 0) { 2576185029Spjd sav = &spa->spa_spares; 2577185029Spjd aux = ZPOOL_CONFIG_SPARES; 2578185029Spjd } else { 2579185029Spjd sav = &spa->spa_l2cache; 2580185029Spjd aux = ZPOOL_CONFIG_L2CACHE; 2581185029Spjd } 2582185029Spjd 2583236143Smm VERIFY(mutex_lock(&ztest_vdev_lock) == 0); 2584185029Spjd 2585185029Spjd spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); 2586185029Spjd 2587185029Spjd if (sav->sav_count != 0 && ztest_random(4) == 0) { 2588185029Spjd /* 2589185029Spjd * Pick a random device to remove. 2590185029Spjd */ 2591185029Spjd guid = sav->sav_vdevs[ztest_random(sav->sav_count)]->vdev_guid; 2592185029Spjd } else { 2593185029Spjd /* 2594185029Spjd * Find an unused device we can add. 2595185029Spjd */ 2596219089Spjd zs->zs_vdev_aux = 0; 2597185029Spjd for (;;) { 2598185029Spjd char path[MAXPATHLEN]; 2599185029Spjd int c; 2600236143Smm (void) snprintf(path, sizeof (path), ztest_aux_template, 2601236143Smm ztest_opts.zo_dir, ztest_opts.zo_pool, aux, 2602236143Smm zs->zs_vdev_aux); 2603185029Spjd for (c = 0; c < sav->sav_count; c++) 2604185029Spjd if (strcmp(sav->sav_vdevs[c]->vdev_path, 2605185029Spjd path) == 0) 2606185029Spjd break; 2607185029Spjd if (c == sav->sav_count && 2608185029Spjd vdev_lookup_by_path(rvd, path) == NULL) 2609185029Spjd break; 2610219089Spjd zs->zs_vdev_aux++; 2611168404Spjd } 2612168404Spjd } 2613168404Spjd 2614185029Spjd spa_config_exit(spa, SCL_VDEV, FTAG); 2615168404Spjd 2616185029Spjd if (guid == 0) { 2617185029Spjd /* 2618185029Spjd * Add a new device. 2619185029Spjd */ 2620243505Smm nvlist_t *nvroot = make_vdev_root(NULL, aux, NULL, 2621236143Smm (ztest_opts.zo_vdev_size * 5) / 4, 0, 0, 0, 0, 1); 2622185029Spjd error = spa_vdev_add(spa, nvroot); 2623185029Spjd if (error != 0) 2624185029Spjd fatal(0, "spa_vdev_add(%p) = %d", nvroot, error); 2625185029Spjd nvlist_free(nvroot); 2626185029Spjd } else { 2627185029Spjd /* 2628185029Spjd * Remove an existing device. Sometimes, dirty its 2629185029Spjd * vdev state first to make sure we handle removal 2630185029Spjd * of devices that have pending state changes. 2631185029Spjd */ 2632185029Spjd if (ztest_random(2) == 0) 2633219089Spjd (void) vdev_online(spa, guid, 0, NULL); 2634185029Spjd 2635185029Spjd error = spa_vdev_remove(spa, guid, B_FALSE); 2636185029Spjd if (error != 0 && error != EBUSY) 2637185029Spjd fatal(0, "spa_vdev_remove(%llu) = %d", guid, error); 2638185029Spjd } 2639185029Spjd 2640236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 2641168404Spjd} 2642168404Spjd 2643168404Spjd/* 2644219089Spjd * split a pool if it has mirror tlvdevs 2645219089Spjd */ 2646219089Spjd/* ARGSUSED */ 2647219089Spjdvoid 2648219089Spjdztest_split_pool(ztest_ds_t *zd, uint64_t id) 2649219089Spjd{ 2650219089Spjd ztest_shared_t *zs = ztest_shared; 2651236143Smm spa_t *spa = ztest_spa; 2652219089Spjd vdev_t *rvd = spa->spa_root_vdev; 2653219089Spjd nvlist_t *tree, **child, *config, *split, **schild; 2654219089Spjd uint_t c, children, schildren = 0, lastlogid = 0; 2655219089Spjd int error = 0; 2656219089Spjd 2657236143Smm VERIFY(mutex_lock(&ztest_vdev_lock) == 0); 2658219089Spjd 2659219089Spjd /* ensure we have a useable config; mirrors of raidz aren't supported */ 2660236143Smm if (zs->zs_mirrors < 3 || ztest_opts.zo_raidz > 1) { 2661236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 2662219089Spjd return; 2663219089Spjd } 2664219089Spjd 2665219089Spjd /* clean up the old pool, if any */ 2666219089Spjd (void) spa_destroy("splitp"); 2667219089Spjd 2668219089Spjd spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); 2669219089Spjd 2670219089Spjd /* generate a config from the existing config */ 2671219089Spjd mutex_enter(&spa->spa_props_lock); 2672219089Spjd VERIFY(nvlist_lookup_nvlist(spa->spa_config, ZPOOL_CONFIG_VDEV_TREE, 2673219089Spjd &tree) == 0); 2674219089Spjd mutex_exit(&spa->spa_props_lock); 2675219089Spjd 2676219089Spjd VERIFY(nvlist_lookup_nvlist_array(tree, ZPOOL_CONFIG_CHILDREN, &child, 2677219089Spjd &children) == 0); 2678219089Spjd 2679219089Spjd schild = malloc(rvd->vdev_children * sizeof (nvlist_t *)); 2680219089Spjd for (c = 0; c < children; c++) { 2681219089Spjd vdev_t *tvd = rvd->vdev_child[c]; 2682219089Spjd nvlist_t **mchild; 2683219089Spjd uint_t mchildren; 2684219089Spjd 2685219089Spjd if (tvd->vdev_islog || tvd->vdev_ops == &vdev_hole_ops) { 2686219089Spjd VERIFY(nvlist_alloc(&schild[schildren], NV_UNIQUE_NAME, 2687219089Spjd 0) == 0); 2688219089Spjd VERIFY(nvlist_add_string(schild[schildren], 2689219089Spjd ZPOOL_CONFIG_TYPE, VDEV_TYPE_HOLE) == 0); 2690219089Spjd VERIFY(nvlist_add_uint64(schild[schildren], 2691219089Spjd ZPOOL_CONFIG_IS_HOLE, 1) == 0); 2692219089Spjd if (lastlogid == 0) 2693219089Spjd lastlogid = schildren; 2694219089Spjd ++schildren; 2695219089Spjd continue; 2696219089Spjd } 2697219089Spjd lastlogid = 0; 2698219089Spjd VERIFY(nvlist_lookup_nvlist_array(child[c], 2699219089Spjd ZPOOL_CONFIG_CHILDREN, &mchild, &mchildren) == 0); 2700219089Spjd VERIFY(nvlist_dup(mchild[0], &schild[schildren++], 0) == 0); 2701219089Spjd } 2702219089Spjd 2703219089Spjd /* OK, create a config that can be used to split */ 2704219089Spjd VERIFY(nvlist_alloc(&split, NV_UNIQUE_NAME, 0) == 0); 2705219089Spjd VERIFY(nvlist_add_string(split, ZPOOL_CONFIG_TYPE, 2706219089Spjd VDEV_TYPE_ROOT) == 0); 2707219089Spjd VERIFY(nvlist_add_nvlist_array(split, ZPOOL_CONFIG_CHILDREN, schild, 2708219089Spjd lastlogid != 0 ? lastlogid : schildren) == 0); 2709219089Spjd 2710219089Spjd VERIFY(nvlist_alloc(&config, NV_UNIQUE_NAME, 0) == 0); 2711219089Spjd VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, split) == 0); 2712219089Spjd 2713219089Spjd for (c = 0; c < schildren; c++) 2714219089Spjd nvlist_free(schild[c]); 2715219089Spjd free(schild); 2716219089Spjd nvlist_free(split); 2717219089Spjd 2718219089Spjd spa_config_exit(spa, SCL_VDEV, FTAG); 2719219089Spjd 2720236143Smm (void) rw_wrlock(&ztest_name_lock); 2721219089Spjd error = spa_vdev_split_mirror(spa, "splitp", config, NULL, B_FALSE); 2722236143Smm (void) rw_unlock(&ztest_name_lock); 2723219089Spjd 2724219089Spjd nvlist_free(config); 2725219089Spjd 2726219089Spjd if (error == 0) { 2727219089Spjd (void) printf("successful split - results:\n"); 2728219089Spjd mutex_enter(&spa_namespace_lock); 2729219089Spjd show_pool_stats(spa); 2730219089Spjd show_pool_stats(spa_lookup("splitp")); 2731219089Spjd mutex_exit(&spa_namespace_lock); 2732219089Spjd ++zs->zs_splits; 2733219089Spjd --zs->zs_mirrors; 2734219089Spjd } 2735236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 2736219089Spjd 2737219089Spjd} 2738219089Spjd 2739219089Spjd/* 2740168404Spjd * Verify that we can attach and detach devices. 2741168404Spjd */ 2742219089Spjd/* ARGSUSED */ 2743168404Spjdvoid 2744219089Spjdztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id) 2745168404Spjd{ 2746219089Spjd ztest_shared_t *zs = ztest_shared; 2747236143Smm spa_t *spa = ztest_spa; 2748185029Spjd spa_aux_vdev_t *sav = &spa->spa_spares; 2749168404Spjd vdev_t *rvd = spa->spa_root_vdev; 2750168404Spjd vdev_t *oldvd, *newvd, *pvd; 2751185029Spjd nvlist_t *root; 2752219089Spjd uint64_t leaves; 2753168404Spjd uint64_t leaf, top; 2754168404Spjd uint64_t ashift = ztest_get_ashift(); 2755209962Smm uint64_t oldguid, pguid; 2756254112Sdelphij uint64_t oldsize, newsize; 2757168404Spjd char oldpath[MAXPATHLEN], newpath[MAXPATHLEN]; 2758168404Spjd int replacing; 2759185029Spjd int oldvd_has_siblings = B_FALSE; 2760185029Spjd int newvd_is_spare = B_FALSE; 2761185029Spjd int oldvd_is_log; 2762168404Spjd int error, expected_error; 2763168404Spjd 2764236143Smm VERIFY(mutex_lock(&ztest_vdev_lock) == 0); 2765236143Smm leaves = MAX(zs->zs_mirrors, 1) * ztest_opts.zo_raidz; 2766168404Spjd 2767185029Spjd spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); 2768168404Spjd 2769168404Spjd /* 2770168404Spjd * Decide whether to do an attach or a replace. 2771168404Spjd */ 2772168404Spjd replacing = ztest_random(2); 2773168404Spjd 2774168404Spjd /* 2775168404Spjd * Pick a random top-level vdev. 2776168404Spjd */ 2777219089Spjd top = ztest_random_vdev_top(spa, B_TRUE); 2778168404Spjd 2779168404Spjd /* 2780168404Spjd * Pick a random leaf within it. 2781168404Spjd */ 2782168404Spjd leaf = ztest_random(leaves); 2783168404Spjd 2784168404Spjd /* 2785185029Spjd * Locate this vdev. 2786168404Spjd */ 2787185029Spjd oldvd = rvd->vdev_child[top]; 2788219089Spjd if (zs->zs_mirrors >= 1) { 2789209962Smm ASSERT(oldvd->vdev_ops == &vdev_mirror_ops); 2790219089Spjd ASSERT(oldvd->vdev_children >= zs->zs_mirrors); 2791236143Smm oldvd = oldvd->vdev_child[leaf / ztest_opts.zo_raidz]; 2792209962Smm } 2793236143Smm if (ztest_opts.zo_raidz > 1) { 2794209962Smm ASSERT(oldvd->vdev_ops == &vdev_raidz_ops); 2795236143Smm ASSERT(oldvd->vdev_children == ztest_opts.zo_raidz); 2796236143Smm oldvd = oldvd->vdev_child[leaf % ztest_opts.zo_raidz]; 2797209962Smm } 2798168404Spjd 2799168404Spjd /* 2800185029Spjd * If we're already doing an attach or replace, oldvd may be a 2801185029Spjd * mirror vdev -- in which case, pick a random child. 2802168404Spjd */ 2803185029Spjd while (oldvd->vdev_children != 0) { 2804185029Spjd oldvd_has_siblings = B_TRUE; 2805209962Smm ASSERT(oldvd->vdev_children >= 2); 2806209962Smm oldvd = oldvd->vdev_child[ztest_random(oldvd->vdev_children)]; 2807185029Spjd } 2808168404Spjd 2809185029Spjd oldguid = oldvd->vdev_guid; 2810219089Spjd oldsize = vdev_get_min_asize(oldvd); 2811185029Spjd oldvd_is_log = oldvd->vdev_top->vdev_islog; 2812185029Spjd (void) strcpy(oldpath, oldvd->vdev_path); 2813185029Spjd pvd = oldvd->vdev_parent; 2814209962Smm pguid = pvd->vdev_guid; 2815185029Spjd 2816168404Spjd /* 2817185029Spjd * If oldvd has siblings, then half of the time, detach it. 2818168404Spjd */ 2819185029Spjd if (oldvd_has_siblings && ztest_random(2) == 0) { 2820185029Spjd spa_config_exit(spa, SCL_VDEV, FTAG); 2821209962Smm error = spa_vdev_detach(spa, oldguid, pguid, B_FALSE); 2822209962Smm if (error != 0 && error != ENODEV && error != EBUSY && 2823209962Smm error != ENOTSUP) 2824209962Smm fatal(0, "detach (%s) returned %d", oldpath, error); 2825236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 2826185029Spjd return; 2827185029Spjd } 2828168404Spjd 2829168404Spjd /* 2830185029Spjd * For the new vdev, choose with equal probability between the two 2831185029Spjd * standard paths (ending in either 'a' or 'b') or a random hot spare. 2832168404Spjd */ 2833185029Spjd if (sav->sav_count != 0 && ztest_random(3) == 0) { 2834185029Spjd newvd = sav->sav_vdevs[ztest_random(sav->sav_count)]; 2835185029Spjd newvd_is_spare = B_TRUE; 2836185029Spjd (void) strcpy(newpath, newvd->vdev_path); 2837185029Spjd } else { 2838185029Spjd (void) snprintf(newpath, sizeof (newpath), ztest_dev_template, 2839236143Smm ztest_opts.zo_dir, ztest_opts.zo_pool, 2840236143Smm top * leaves + leaf); 2841185029Spjd if (ztest_random(2) == 0) 2842185029Spjd newpath[strlen(newpath) - 1] = 'b'; 2843185029Spjd newvd = vdev_lookup_by_path(rvd, newpath); 2844185029Spjd } 2845168404Spjd 2846185029Spjd if (newvd) { 2847219089Spjd newsize = vdev_get_min_asize(newvd); 2848185029Spjd } else { 2849185029Spjd /* 2850185029Spjd * Make newsize a little bigger or smaller than oldsize. 2851185029Spjd * If it's smaller, the attach should fail. 2852185029Spjd * If it's larger, and we're doing a replace, 2853185029Spjd * we should get dynamic LUN growth when we're done. 2854185029Spjd */ 2855185029Spjd newsize = 10 * oldsize / (9 + ztest_random(3)); 2856185029Spjd } 2857185029Spjd 2858168404Spjd /* 2859168404Spjd * If pvd is not a mirror or root, the attach should fail with ENOTSUP, 2860168404Spjd * unless it's a replace; in that case any non-replacing parent is OK. 2861168404Spjd * 2862168404Spjd * If newvd is already part of the pool, it should fail with EBUSY. 2863168404Spjd * 2864168404Spjd * If newvd is too small, it should fail with EOVERFLOW. 2865168404Spjd */ 2866185029Spjd if (pvd->vdev_ops != &vdev_mirror_ops && 2867185029Spjd pvd->vdev_ops != &vdev_root_ops && (!replacing || 2868185029Spjd pvd->vdev_ops == &vdev_replacing_ops || 2869185029Spjd pvd->vdev_ops == &vdev_spare_ops)) 2870185029Spjd expected_error = ENOTSUP; 2871185029Spjd else if (newvd_is_spare && (!replacing || oldvd_is_log)) 2872185029Spjd expected_error = ENOTSUP; 2873185029Spjd else if (newvd == oldvd) 2874185029Spjd expected_error = replacing ? 0 : EBUSY; 2875185029Spjd else if (vdev_lookup_by_path(rvd, newpath) != NULL) 2876168404Spjd expected_error = EBUSY; 2877168404Spjd else if (newsize < oldsize) 2878168404Spjd expected_error = EOVERFLOW; 2879168404Spjd else if (ashift > oldvd->vdev_top->vdev_ashift) 2880168404Spjd expected_error = EDOM; 2881168404Spjd else 2882168404Spjd expected_error = 0; 2883168404Spjd 2884185029Spjd spa_config_exit(spa, SCL_VDEV, FTAG); 2885168404Spjd 2886168404Spjd /* 2887168404Spjd * Build the nvlist describing newpath. 2888168404Spjd */ 2889243505Smm root = make_vdev_root(newpath, NULL, NULL, newvd == NULL ? newsize : 0, 2890185029Spjd ashift, 0, 0, 0, 1); 2891168404Spjd 2892185029Spjd error = spa_vdev_attach(spa, oldguid, root, replacing); 2893168404Spjd 2894168404Spjd nvlist_free(root); 2895168404Spjd 2896168404Spjd /* 2897168404Spjd * If our parent was the replacing vdev, but the replace completed, 2898168404Spjd * then instead of failing with ENOTSUP we may either succeed, 2899168404Spjd * fail with ENODEV, or fail with EOVERFLOW. 2900168404Spjd */ 2901168404Spjd if (expected_error == ENOTSUP && 2902168404Spjd (error == 0 || error == ENODEV || error == EOVERFLOW)) 2903168404Spjd expected_error = error; 2904168404Spjd 2905168404Spjd /* 2906168404Spjd * If someone grew the LUN, the replacement may be too small. 2907168404Spjd */ 2908185029Spjd if (error == EOVERFLOW || error == EBUSY) 2909168404Spjd expected_error = error; 2910168404Spjd 2911185029Spjd /* XXX workaround 6690467 */ 2912185029Spjd if (error != expected_error && expected_error != EBUSY) { 2913185029Spjd fatal(0, "attach (%s %llu, %s %llu, %d) " 2914185029Spjd "returned %d, expected %d", 2915254112Sdelphij oldpath, oldsize, newpath, 2916254112Sdelphij newsize, replacing, error, expected_error); 2917168404Spjd } 2918168404Spjd 2919236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 2920168404Spjd} 2921168404Spjd 2922168404Spjd/* 2923219089Spjd * Callback function which expands the physical size of the vdev. 2924168404Spjd */ 2925219089Spjdvdev_t * 2926219089Spjdgrow_vdev(vdev_t *vd, void *arg) 2927168404Spjd{ 2928219089Spjd spa_t *spa = vd->vdev_spa; 2929219089Spjd size_t *newsize = arg; 2930168404Spjd size_t fsize; 2931168404Spjd int fd; 2932168404Spjd 2933219089Spjd ASSERT(spa_config_held(spa, SCL_STATE, RW_READER) == SCL_STATE); 2934219089Spjd ASSERT(vd->vdev_ops->vdev_op_leaf); 2935168404Spjd 2936219089Spjd if ((fd = open(vd->vdev_path, O_RDWR)) == -1) 2937219089Spjd return (vd); 2938219089Spjd 2939219089Spjd fsize = lseek(fd, 0, SEEK_END); 2940219089Spjd (void) ftruncate(fd, *newsize); 2941219089Spjd 2942236143Smm if (ztest_opts.zo_verbose >= 6) { 2943219089Spjd (void) printf("%s grew from %lu to %lu bytes\n", 2944219089Spjd vd->vdev_path, (ulong_t)fsize, (ulong_t)*newsize); 2945219089Spjd } 2946219089Spjd (void) close(fd); 2947219089Spjd return (NULL); 2948219089Spjd} 2949219089Spjd 2950219089Spjd/* 2951219089Spjd * Callback function which expands a given vdev by calling vdev_online(). 2952219089Spjd */ 2953219089Spjd/* ARGSUSED */ 2954219089Spjdvdev_t * 2955219089Spjdonline_vdev(vdev_t *vd, void *arg) 2956219089Spjd{ 2957219089Spjd spa_t *spa = vd->vdev_spa; 2958219089Spjd vdev_t *tvd = vd->vdev_top; 2959219089Spjd uint64_t guid = vd->vdev_guid; 2960219089Spjd uint64_t generation = spa->spa_config_generation + 1; 2961219089Spjd vdev_state_t newstate = VDEV_STATE_UNKNOWN; 2962219089Spjd int error; 2963219089Spjd 2964219089Spjd ASSERT(spa_config_held(spa, SCL_STATE, RW_READER) == SCL_STATE); 2965219089Spjd ASSERT(vd->vdev_ops->vdev_op_leaf); 2966219089Spjd 2967219089Spjd /* Calling vdev_online will initialize the new metaslabs */ 2968219089Spjd spa_config_exit(spa, SCL_STATE, spa); 2969219089Spjd error = vdev_online(spa, guid, ZFS_ONLINE_EXPAND, &newstate); 2970219089Spjd spa_config_enter(spa, SCL_STATE, spa, RW_READER); 2971219089Spjd 2972168404Spjd /* 2973219089Spjd * If vdev_online returned an error or the underlying vdev_open 2974219089Spjd * failed then we abort the expand. The only way to know that 2975219089Spjd * vdev_open fails is by checking the returned newstate. 2976168404Spjd */ 2977219089Spjd if (error || newstate != VDEV_STATE_HEALTHY) { 2978236143Smm if (ztest_opts.zo_verbose >= 5) { 2979219089Spjd (void) printf("Unable to expand vdev, state %llu, " 2980219089Spjd "error %d\n", (u_longlong_t)newstate, error); 2981219089Spjd } 2982219089Spjd return (vd); 2983219089Spjd } 2984219089Spjd ASSERT3U(newstate, ==, VDEV_STATE_HEALTHY); 2985168404Spjd 2986219089Spjd /* 2987219089Spjd * Since we dropped the lock we need to ensure that we're 2988219089Spjd * still talking to the original vdev. It's possible this 2989219089Spjd * vdev may have been detached/replaced while we were 2990219089Spjd * trying to online it. 2991219089Spjd */ 2992219089Spjd if (generation != spa->spa_config_generation) { 2993236143Smm if (ztest_opts.zo_verbose >= 5) { 2994219089Spjd (void) printf("vdev configuration has changed, " 2995219089Spjd "guid %llu, state %llu, expected gen %llu, " 2996219089Spjd "got gen %llu\n", 2997219089Spjd (u_longlong_t)guid, 2998219089Spjd (u_longlong_t)tvd->vdev_state, 2999219089Spjd (u_longlong_t)generation, 3000219089Spjd (u_longlong_t)spa->spa_config_generation); 3001219089Spjd } 3002219089Spjd return (vd); 3003219089Spjd } 3004219089Spjd return (NULL); 3005219089Spjd} 3006168404Spjd 3007219089Spjd/* 3008219089Spjd * Traverse the vdev tree calling the supplied function. 3009219089Spjd * We continue to walk the tree until we either have walked all 3010219089Spjd * children or we receive a non-NULL return from the callback. 3011219089Spjd * If a NULL callback is passed, then we just return back the first 3012219089Spjd * leaf vdev we encounter. 3013219089Spjd */ 3014219089Spjdvdev_t * 3015219089Spjdvdev_walk_tree(vdev_t *vd, vdev_t *(*func)(vdev_t *, void *), void *arg) 3016219089Spjd{ 3017219089Spjd if (vd->vdev_ops->vdev_op_leaf) { 3018219089Spjd if (func == NULL) 3019219089Spjd return (vd); 3020219089Spjd else 3021219089Spjd return (func(vd, arg)); 3022219089Spjd } 3023168404Spjd 3024219089Spjd for (uint_t c = 0; c < vd->vdev_children; c++) { 3025219089Spjd vdev_t *cvd = vd->vdev_child[c]; 3026219089Spjd if ((cvd = vdev_walk_tree(cvd, func, arg)) != NULL) 3027219089Spjd return (cvd); 3028219089Spjd } 3029219089Spjd return (NULL); 3030219089Spjd} 3031219089Spjd 3032219089Spjd/* 3033219089Spjd * Verify that dynamic LUN growth works as expected. 3034219089Spjd */ 3035219089Spjd/* ARGSUSED */ 3036219089Spjdvoid 3037219089Spjdztest_vdev_LUN_growth(ztest_ds_t *zd, uint64_t id) 3038219089Spjd{ 3039236143Smm spa_t *spa = ztest_spa; 3040219089Spjd vdev_t *vd, *tvd; 3041219089Spjd metaslab_class_t *mc; 3042219089Spjd metaslab_group_t *mg; 3043219089Spjd size_t psize, newsize; 3044219089Spjd uint64_t top; 3045219089Spjd uint64_t old_class_space, new_class_space, old_ms_count, new_ms_count; 3046219089Spjd 3047236143Smm VERIFY(mutex_lock(&ztest_vdev_lock) == 0); 3048219089Spjd spa_config_enter(spa, SCL_STATE, spa, RW_READER); 3049219089Spjd 3050219089Spjd top = ztest_random_vdev_top(spa, B_TRUE); 3051219089Spjd 3052219089Spjd tvd = spa->spa_root_vdev->vdev_child[top]; 3053219089Spjd mg = tvd->vdev_mg; 3054219089Spjd mc = mg->mg_class; 3055219089Spjd old_ms_count = tvd->vdev_ms_count; 3056219089Spjd old_class_space = metaslab_class_get_space(mc); 3057219089Spjd 3058219089Spjd /* 3059219089Spjd * Determine the size of the first leaf vdev associated with 3060219089Spjd * our top-level device. 3061219089Spjd */ 3062219089Spjd vd = vdev_walk_tree(tvd, NULL, NULL); 3063219089Spjd ASSERT3P(vd, !=, NULL); 3064219089Spjd ASSERT(vd->vdev_ops->vdev_op_leaf); 3065219089Spjd 3066219089Spjd psize = vd->vdev_psize; 3067219089Spjd 3068219089Spjd /* 3069219089Spjd * We only try to expand the vdev if it's healthy, less than 4x its 3070219089Spjd * original size, and it has a valid psize. 3071219089Spjd */ 3072219089Spjd if (tvd->vdev_state != VDEV_STATE_HEALTHY || 3073236143Smm psize == 0 || psize >= 4 * ztest_opts.zo_vdev_size) { 3074219089Spjd spa_config_exit(spa, SCL_STATE, spa); 3075236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 3076219089Spjd return; 3077219089Spjd } 3078219089Spjd ASSERT(psize > 0); 3079219089Spjd newsize = psize + psize / 8; 3080219089Spjd ASSERT3U(newsize, >, psize); 3081219089Spjd 3082236143Smm if (ztest_opts.zo_verbose >= 6) { 3083219089Spjd (void) printf("Expanding LUN %s from %lu to %lu\n", 3084219089Spjd vd->vdev_path, (ulong_t)psize, (ulong_t)newsize); 3085219089Spjd } 3086219089Spjd 3087219089Spjd /* 3088219089Spjd * Growing the vdev is a two step process: 3089219089Spjd * 1). expand the physical size (i.e. relabel) 3090219089Spjd * 2). online the vdev to create the new metaslabs 3091219089Spjd */ 3092219089Spjd if (vdev_walk_tree(tvd, grow_vdev, &newsize) != NULL || 3093219089Spjd vdev_walk_tree(tvd, online_vdev, NULL) != NULL || 3094219089Spjd tvd->vdev_state != VDEV_STATE_HEALTHY) { 3095236143Smm if (ztest_opts.zo_verbose >= 5) { 3096219089Spjd (void) printf("Could not expand LUN because " 3097219089Spjd "the vdev configuration changed.\n"); 3098168404Spjd } 3099219089Spjd spa_config_exit(spa, SCL_STATE, spa); 3100236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 3101219089Spjd return; 3102168404Spjd } 3103168404Spjd 3104219089Spjd spa_config_exit(spa, SCL_STATE, spa); 3105219089Spjd 3106219089Spjd /* 3107219089Spjd * Expanding the LUN will update the config asynchronously, 3108219089Spjd * thus we must wait for the async thread to complete any 3109219089Spjd * pending tasks before proceeding. 3110219089Spjd */ 3111219089Spjd for (;;) { 3112219089Spjd boolean_t done; 3113219089Spjd mutex_enter(&spa->spa_async_lock); 3114219089Spjd done = (spa->spa_async_thread == NULL && !spa->spa_async_tasks); 3115219089Spjd mutex_exit(&spa->spa_async_lock); 3116219089Spjd if (done) 3117219089Spjd break; 3118219089Spjd txg_wait_synced(spa_get_dsl(spa), 0); 3119219089Spjd (void) poll(NULL, 0, 100); 3120219089Spjd } 3121219089Spjd 3122219089Spjd spa_config_enter(spa, SCL_STATE, spa, RW_READER); 3123219089Spjd 3124219089Spjd tvd = spa->spa_root_vdev->vdev_child[top]; 3125219089Spjd new_ms_count = tvd->vdev_ms_count; 3126219089Spjd new_class_space = metaslab_class_get_space(mc); 3127219089Spjd 3128219089Spjd if (tvd->vdev_mg != mg || mg->mg_class != mc) { 3129236143Smm if (ztest_opts.zo_verbose >= 5) { 3130219089Spjd (void) printf("Could not verify LUN expansion due to " 3131219089Spjd "intervening vdev offline or remove.\n"); 3132219089Spjd } 3133219089Spjd spa_config_exit(spa, SCL_STATE, spa); 3134236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 3135219089Spjd return; 3136219089Spjd } 3137219089Spjd 3138219089Spjd /* 3139219089Spjd * Make sure we were able to grow the vdev. 3140219089Spjd */ 3141219089Spjd if (new_ms_count <= old_ms_count) 3142219089Spjd fatal(0, "LUN expansion failed: ms_count %llu <= %llu\n", 3143219089Spjd old_ms_count, new_ms_count); 3144219089Spjd 3145219089Spjd /* 3146219089Spjd * Make sure we were able to grow the pool. 3147219089Spjd */ 3148219089Spjd if (new_class_space <= old_class_space) 3149219089Spjd fatal(0, "LUN expansion failed: class_space %llu <= %llu\n", 3150219089Spjd old_class_space, new_class_space); 3151219089Spjd 3152236143Smm if (ztest_opts.zo_verbose >= 5) { 3153219089Spjd char oldnumbuf[6], newnumbuf[6]; 3154219089Spjd 3155219089Spjd nicenum(old_class_space, oldnumbuf); 3156219089Spjd nicenum(new_class_space, newnumbuf); 3157219089Spjd (void) printf("%s grew from %s to %s\n", 3158219089Spjd spa->spa_name, oldnumbuf, newnumbuf); 3159219089Spjd } 3160219089Spjd 3161219089Spjd spa_config_exit(spa, SCL_STATE, spa); 3162236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 3163168404Spjd} 3164168404Spjd 3165219089Spjd/* 3166219089Spjd * Verify that dmu_objset_{create,destroy,open,close} work as expected. 3167219089Spjd */ 3168168404Spjd/* ARGSUSED */ 3169168404Spjdstatic void 3170219089Spjdztest_objset_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx) 3171168404Spjd{ 3172168404Spjd /* 3173219089Spjd * Create the objects common to all ztest datasets. 3174168404Spjd */ 3175219089Spjd VERIFY(zap_create_claim(os, ZTEST_DIROBJ, 3176168404Spjd DMU_OT_ZAP_OTHER, DMU_OT_NONE, 0, tx) == 0); 3177219089Spjd} 3178168404Spjd 3179219089Spjdstatic int 3180219089Spjdztest_dataset_create(char *dsname) 3181219089Spjd{ 3182219089Spjd uint64_t zilset = ztest_random(100); 3183219089Spjd int err = dmu_objset_create(dsname, DMU_OST_OTHER, 0, 3184219089Spjd ztest_objset_create_cb, NULL); 3185219089Spjd 3186219089Spjd if (err || zilset < 80) 3187219089Spjd return (err); 3188219089Spjd 3189236143Smm if (ztest_opts.zo_verbose >= 6) 3190236143Smm (void) printf("Setting dataset %s to sync always\n", dsname); 3191219089Spjd return (ztest_dsl_prop_set_uint64(dsname, ZFS_PROP_SYNC, 3192219089Spjd ZFS_SYNC_ALWAYS, B_FALSE)); 3193168404Spjd} 3194168404Spjd 3195219089Spjd/* ARGSUSED */ 3196168404Spjdstatic int 3197219089Spjdztest_objset_destroy_cb(const char *name, void *arg) 3198168404Spjd{ 3199168404Spjd objset_t *os; 3200219089Spjd dmu_object_info_t doi; 3201168404Spjd int error; 3202168404Spjd 3203168404Spjd /* 3204168404Spjd * Verify that the dataset contains a directory object. 3205168404Spjd */ 3206248571Smm VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_TRUE, FTAG, &os)); 3207219089Spjd error = dmu_object_info(os, ZTEST_DIROBJ, &doi); 3208168404Spjd if (error != ENOENT) { 3209168404Spjd /* We could have crashed in the middle of destroying it */ 3210240415Smm ASSERT0(error); 3211219089Spjd ASSERT3U(doi.doi_type, ==, DMU_OT_ZAP_OTHER); 3212219089Spjd ASSERT3S(doi.doi_physical_blocks_512, >=, 0); 3213168404Spjd } 3214248571Smm dmu_objset_disown(os, FTAG); 3215168404Spjd 3216168404Spjd /* 3217168404Spjd * Destroy the dataset. 3218168404Spjd */ 3219248571Smm if (strchr(name, '@') != NULL) { 3220248571Smm VERIFY0(dsl_destroy_snapshot(name, B_FALSE)); 3221248571Smm } else { 3222248571Smm VERIFY0(dsl_destroy_head(name)); 3223248571Smm } 3224168404Spjd return (0); 3225168404Spjd} 3226168404Spjd 3227219089Spjdstatic boolean_t 3228219089Spjdztest_snapshot_create(char *osname, uint64_t id) 3229168404Spjd{ 3230219089Spjd char snapname[MAXNAMELEN]; 3231219089Spjd int error; 3232168404Spjd 3233248571Smm (void) snprintf(snapname, sizeof (snapname), "%llu", (u_longlong_t)id); 3234168404Spjd 3235248571Smm error = dmu_objset_snapshot_one(osname, snapname); 3236219089Spjd if (error == ENOSPC) { 3237219089Spjd ztest_record_enospc(FTAG); 3238219089Spjd return (B_FALSE); 3239219089Spjd } 3240248571Smm if (error != 0 && error != EEXIST) { 3241248571Smm fatal(0, "ztest_snapshot_create(%s@%s) = %d", osname, 3242248571Smm snapname, error); 3243248571Smm } 3244219089Spjd return (B_TRUE); 3245219089Spjd} 3246168404Spjd 3247219089Spjdstatic boolean_t 3248219089Spjdztest_snapshot_destroy(char *osname, uint64_t id) 3249219089Spjd{ 3250219089Spjd char snapname[MAXNAMELEN]; 3251219089Spjd int error; 3252219089Spjd 3253219089Spjd (void) snprintf(snapname, MAXNAMELEN, "%s@%llu", osname, 3254219089Spjd (u_longlong_t)id); 3255219089Spjd 3256248571Smm error = dsl_destroy_snapshot(snapname, B_FALSE); 3257219089Spjd if (error != 0 && error != ENOENT) 3258219089Spjd fatal(0, "ztest_snapshot_destroy(%s) = %d", snapname, error); 3259219089Spjd return (B_TRUE); 3260168404Spjd} 3261168404Spjd 3262219089Spjd/* ARGSUSED */ 3263168404Spjdvoid 3264219089Spjdztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id) 3265168404Spjd{ 3266219089Spjd ztest_ds_t zdtmp; 3267219089Spjd int iters; 3268168404Spjd int error; 3269185029Spjd objset_t *os, *os2; 3270219089Spjd char name[MAXNAMELEN]; 3271168404Spjd zilog_t *zilog; 3272168404Spjd 3273236143Smm (void) rw_rdlock(&ztest_name_lock); 3274168404Spjd 3275219089Spjd (void) snprintf(name, MAXNAMELEN, "%s/temp_%llu", 3276236143Smm ztest_opts.zo_pool, (u_longlong_t)id); 3277168404Spjd 3278168404Spjd /* 3279168404Spjd * If this dataset exists from a previous run, process its replay log 3280168404Spjd * half of the time. If we don't replay it, then dmu_objset_destroy() 3281219089Spjd * (invoked from ztest_objset_destroy_cb()) should just throw it away. 3282168404Spjd */ 3283168404Spjd if (ztest_random(2) == 0 && 3284219089Spjd dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os) == 0) { 3285236143Smm ztest_zd_init(&zdtmp, NULL, os); 3286219089Spjd zil_replay(os, &zdtmp, ztest_replay_vector); 3287219089Spjd ztest_zd_fini(&zdtmp); 3288219089Spjd dmu_objset_disown(os, FTAG); 3289168404Spjd } 3290168404Spjd 3291168404Spjd /* 3292168404Spjd * There may be an old instance of the dataset we're about to 3293168404Spjd * create lying around from a previous run. If so, destroy it 3294168404Spjd * and all of its snapshots. 3295168404Spjd */ 3296219089Spjd (void) dmu_objset_find(name, ztest_objset_destroy_cb, NULL, 3297168404Spjd DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS); 3298168404Spjd 3299168404Spjd /* 3300168404Spjd * Verify that the destroyed dataset is no longer in the namespace. 3301168404Spjd */ 3302248571Smm VERIFY3U(ENOENT, ==, dmu_objset_own(name, DMU_OST_OTHER, B_TRUE, 3303248571Smm FTAG, &os)); 3304168404Spjd 3305168404Spjd /* 3306168404Spjd * Verify that we can create a new dataset. 3307168404Spjd */ 3308219089Spjd error = ztest_dataset_create(name); 3309168404Spjd if (error) { 3310168404Spjd if (error == ENOSPC) { 3311219089Spjd ztest_record_enospc(FTAG); 3312236143Smm (void) rw_unlock(&ztest_name_lock); 3313168404Spjd return; 3314168404Spjd } 3315168404Spjd fatal(0, "dmu_objset_create(%s) = %d", name, error); 3316168404Spjd } 3317168404Spjd 3318248571Smm VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os)); 3319168404Spjd 3320236143Smm ztest_zd_init(&zdtmp, NULL, os); 3321219089Spjd 3322168404Spjd /* 3323168404Spjd * Open the intent log for it. 3324168404Spjd */ 3325219089Spjd zilog = zil_open(os, ztest_get_data); 3326168404Spjd 3327168404Spjd /* 3328219089Spjd * Put some objects in there, do a little I/O to them, 3329219089Spjd * and randomly take a couple of snapshots along the way. 3330168404Spjd */ 3331219089Spjd iters = ztest_random(5); 3332219089Spjd for (int i = 0; i < iters; i++) { 3333219089Spjd ztest_dmu_object_alloc_free(&zdtmp, id); 3334219089Spjd if (ztest_random(iters) == 0) 3335219089Spjd (void) ztest_snapshot_create(name, i); 3336168404Spjd } 3337168404Spjd 3338168404Spjd /* 3339168404Spjd * Verify that we cannot create an existing dataset. 3340168404Spjd */ 3341219089Spjd VERIFY3U(EEXIST, ==, 3342219089Spjd dmu_objset_create(name, DMU_OST_OTHER, 0, NULL, NULL)); 3343168404Spjd 3344168404Spjd /* 3345219089Spjd * Verify that we can hold an objset that is also owned. 3346168404Spjd */ 3347219089Spjd VERIFY3U(0, ==, dmu_objset_hold(name, FTAG, &os2)); 3348219089Spjd dmu_objset_rele(os2, FTAG); 3349168404Spjd 3350219089Spjd /* 3351219089Spjd * Verify that we cannot own an objset that is already owned. 3352219089Spjd */ 3353219089Spjd VERIFY3U(EBUSY, ==, 3354219089Spjd dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os2)); 3355219089Spjd 3356168404Spjd zil_close(zilog); 3357219089Spjd dmu_objset_disown(os, FTAG); 3358219089Spjd ztest_zd_fini(&zdtmp); 3359168404Spjd 3360236143Smm (void) rw_unlock(&ztest_name_lock); 3361168404Spjd} 3362168404Spjd 3363168404Spjd/* 3364168404Spjd * Verify that dmu_snapshot_{create,destroy,open,close} work as expected. 3365168404Spjd */ 3366168404Spjdvoid 3367219089Spjdztest_dmu_snapshot_create_destroy(ztest_ds_t *zd, uint64_t id) 3368168404Spjd{ 3369236143Smm (void) rw_rdlock(&ztest_name_lock); 3370219089Spjd (void) ztest_snapshot_destroy(zd->zd_name, id); 3371219089Spjd (void) ztest_snapshot_create(zd->zd_name, id); 3372236143Smm (void) rw_unlock(&ztest_name_lock); 3373219089Spjd} 3374219089Spjd 3375219089Spjd/* 3376219089Spjd * Cleanup non-standard snapshots and clones. 3377219089Spjd */ 3378219089Spjdvoid 3379219089Spjdztest_dsl_dataset_cleanup(char *osname, uint64_t id) 3380219089Spjd{ 3381219089Spjd char snap1name[MAXNAMELEN]; 3382219089Spjd char clone1name[MAXNAMELEN]; 3383219089Spjd char snap2name[MAXNAMELEN]; 3384219089Spjd char clone2name[MAXNAMELEN]; 3385219089Spjd char snap3name[MAXNAMELEN]; 3386168404Spjd int error; 3387168404Spjd 3388219089Spjd (void) snprintf(snap1name, MAXNAMELEN, "%s@s1_%llu", osname, id); 3389219089Spjd (void) snprintf(clone1name, MAXNAMELEN, "%s/c1_%llu", osname, id); 3390219089Spjd (void) snprintf(snap2name, MAXNAMELEN, "%s@s2_%llu", clone1name, id); 3391219089Spjd (void) snprintf(clone2name, MAXNAMELEN, "%s/c2_%llu", osname, id); 3392219089Spjd (void) snprintf(snap3name, MAXNAMELEN, "%s@s3_%llu", clone1name, id); 3393168404Spjd 3394248571Smm error = dsl_destroy_head(clone2name); 3395219089Spjd if (error && error != ENOENT) 3396248571Smm fatal(0, "dsl_destroy_head(%s) = %d", clone2name, error); 3397248571Smm error = dsl_destroy_snapshot(snap3name, B_FALSE); 3398219089Spjd if (error && error != ENOENT) 3399248571Smm fatal(0, "dsl_destroy_snapshot(%s) = %d", snap3name, error); 3400248571Smm error = dsl_destroy_snapshot(snap2name, B_FALSE); 3401219089Spjd if (error && error != ENOENT) 3402248571Smm fatal(0, "dsl_destroy_snapshot(%s) = %d", snap2name, error); 3403248571Smm error = dsl_destroy_head(clone1name); 3404219089Spjd if (error && error != ENOENT) 3405248571Smm fatal(0, "dsl_destroy_head(%s) = %d", clone1name, error); 3406248571Smm error = dsl_destroy_snapshot(snap1name, B_FALSE); 3407219089Spjd if (error && error != ENOENT) 3408248571Smm fatal(0, "dsl_destroy_snapshot(%s) = %d", snap1name, error); 3409168404Spjd} 3410168404Spjd 3411168404Spjd/* 3412207910Smm * Verify dsl_dataset_promote handles EBUSY 3413207910Smm */ 3414207910Smmvoid 3415219089Spjdztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id) 3416207910Smm{ 3417248571Smm objset_t *os; 3418219089Spjd char snap1name[MAXNAMELEN]; 3419219089Spjd char clone1name[MAXNAMELEN]; 3420219089Spjd char snap2name[MAXNAMELEN]; 3421219089Spjd char clone2name[MAXNAMELEN]; 3422219089Spjd char snap3name[MAXNAMELEN]; 3423219089Spjd char *osname = zd->zd_name; 3424219089Spjd int error; 3425207910Smm 3426236143Smm (void) rw_rdlock(&ztest_name_lock); 3427207910Smm 3428219089Spjd ztest_dsl_dataset_cleanup(osname, id); 3429207910Smm 3430219089Spjd (void) snprintf(snap1name, MAXNAMELEN, "%s@s1_%llu", osname, id); 3431219089Spjd (void) snprintf(clone1name, MAXNAMELEN, "%s/c1_%llu", osname, id); 3432219089Spjd (void) snprintf(snap2name, MAXNAMELEN, "%s@s2_%llu", clone1name, id); 3433219089Spjd (void) snprintf(clone2name, MAXNAMELEN, "%s/c2_%llu", osname, id); 3434219089Spjd (void) snprintf(snap3name, MAXNAMELEN, "%s@s3_%llu", clone1name, id); 3435207910Smm 3436248571Smm error = dmu_objset_snapshot_one(osname, strchr(snap1name, '@') + 1); 3437209962Smm if (error && error != EEXIST) { 3438209962Smm if (error == ENOSPC) { 3439209962Smm ztest_record_enospc(FTAG); 3440209962Smm goto out; 3441209962Smm } 3442209962Smm fatal(0, "dmu_take_snapshot(%s) = %d", snap1name, error); 3443209962Smm } 3444207910Smm 3445248571Smm error = dmu_objset_clone(clone1name, snap1name); 3446209962Smm if (error) { 3447209962Smm if (error == ENOSPC) { 3448209962Smm ztest_record_enospc(FTAG); 3449209962Smm goto out; 3450209962Smm } 3451207910Smm fatal(0, "dmu_objset_create(%s) = %d", clone1name, error); 3452209962Smm } 3453207910Smm 3454248571Smm error = dmu_objset_snapshot_one(clone1name, strchr(snap2name, '@') + 1); 3455209962Smm if (error && error != EEXIST) { 3456209962Smm if (error == ENOSPC) { 3457209962Smm ztest_record_enospc(FTAG); 3458209962Smm goto out; 3459209962Smm } 3460209962Smm fatal(0, "dmu_open_snapshot(%s) = %d", snap2name, error); 3461209962Smm } 3462207910Smm 3463248571Smm error = dmu_objset_snapshot_one(clone1name, strchr(snap3name, '@') + 1); 3464209962Smm if (error && error != EEXIST) { 3465209962Smm if (error == ENOSPC) { 3466209962Smm ztest_record_enospc(FTAG); 3467209962Smm goto out; 3468209962Smm } 3469209962Smm fatal(0, "dmu_open_snapshot(%s) = %d", snap3name, error); 3470209962Smm } 3471207910Smm 3472248571Smm error = dmu_objset_clone(clone2name, snap3name); 3473209962Smm if (error) { 3474209962Smm if (error == ENOSPC) { 3475219089Spjd ztest_record_enospc(FTAG); 3476209962Smm goto out; 3477209962Smm } 3478207910Smm fatal(0, "dmu_objset_create(%s) = %d", clone2name, error); 3479209962Smm } 3480207910Smm 3481248571Smm error = dmu_objset_own(snap2name, DMU_OST_ANY, B_TRUE, FTAG, &os); 3482207910Smm if (error) 3483248571Smm fatal(0, "dmu_objset_own(%s) = %d", snap2name, error); 3484219089Spjd error = dsl_dataset_promote(clone2name, NULL); 3485268075Sdelphij if (error == ENOSPC) { 3486268075Sdelphij dmu_objset_disown(os, FTAG); 3487268075Sdelphij ztest_record_enospc(FTAG); 3488268075Sdelphij goto out; 3489268075Sdelphij } 3490207910Smm if (error != EBUSY) 3491207910Smm fatal(0, "dsl_dataset_promote(%s), %d, not EBUSY", clone2name, 3492207910Smm error); 3493248571Smm dmu_objset_disown(os, FTAG); 3494207910Smm 3495209962Smmout: 3496219089Spjd ztest_dsl_dataset_cleanup(osname, id); 3497207910Smm 3498236143Smm (void) rw_unlock(&ztest_name_lock); 3499207910Smm} 3500207910Smm 3501207910Smm/* 3502168404Spjd * Verify that dmu_object_{alloc,free} work as expected. 3503168404Spjd */ 3504168404Spjdvoid 3505219089Spjdztest_dmu_object_alloc_free(ztest_ds_t *zd, uint64_t id) 3506168404Spjd{ 3507219089Spjd ztest_od_t od[4]; 3508219089Spjd int batchsize = sizeof (od) / sizeof (od[0]); 3509168404Spjd 3510219089Spjd for (int b = 0; b < batchsize; b++) 3511219089Spjd ztest_od_init(&od[b], id, FTAG, b, DMU_OT_UINT64_OTHER, 0, 0); 3512168404Spjd 3513168404Spjd /* 3514219089Spjd * Destroy the previous batch of objects, create a new batch, 3515219089Spjd * and do some I/O on the new objects. 3516168404Spjd */ 3517219089Spjd if (ztest_object_init(zd, od, sizeof (od), B_TRUE) != 0) 3518219089Spjd return; 3519168404Spjd 3520219089Spjd while (ztest_random(4 * batchsize) != 0) 3521219089Spjd ztest_io(zd, od[ztest_random(batchsize)].od_object, 3522219089Spjd ztest_random(ZTEST_RANGE_LOCKS) << SPA_MAXBLOCKSHIFT); 3523168404Spjd} 3524168404Spjd 3525168404Spjd/* 3526168404Spjd * Verify that dmu_{read,write} work as expected. 3527168404Spjd */ 3528168404Spjdvoid 3529219089Spjdztest_dmu_read_write(ztest_ds_t *zd, uint64_t id) 3530168404Spjd{ 3531219089Spjd objset_t *os = zd->zd_os; 3532219089Spjd ztest_od_t od[2]; 3533168404Spjd dmu_tx_t *tx; 3534168404Spjd int i, freeit, error; 3535168404Spjd uint64_t n, s, txg; 3536168404Spjd bufwad_t *packbuf, *bigbuf, *pack, *bigH, *bigT; 3537219089Spjd uint64_t packobj, packoff, packsize, bigobj, bigoff, bigsize; 3538219089Spjd uint64_t chunksize = (1000 + ztest_random(1000)) * sizeof (uint64_t); 3539168404Spjd uint64_t regions = 997; 3540168404Spjd uint64_t stride = 123456789ULL; 3541168404Spjd uint64_t width = 40; 3542168404Spjd int free_percent = 5; 3543168404Spjd 3544168404Spjd /* 3545168404Spjd * This test uses two objects, packobj and bigobj, that are always 3546168404Spjd * updated together (i.e. in the same tx) so that their contents are 3547168404Spjd * in sync and can be compared. Their contents relate to each other 3548168404Spjd * in a simple way: packobj is a dense array of 'bufwad' structures, 3549168404Spjd * while bigobj is a sparse array of the same bufwads. Specifically, 3550168404Spjd * for any index n, there are three bufwads that should be identical: 3551168404Spjd * 3552168404Spjd * packobj, at offset n * sizeof (bufwad_t) 3553168404Spjd * bigobj, at the head of the nth chunk 3554168404Spjd * bigobj, at the tail of the nth chunk 3555168404Spjd * 3556168404Spjd * The chunk size is arbitrary. It doesn't have to be a power of two, 3557168404Spjd * and it doesn't have any relation to the object blocksize. 3558168404Spjd * The only requirement is that it can hold at least two bufwads. 3559168404Spjd * 3560168404Spjd * Normally, we write the bufwad to each of these locations. 3561168404Spjd * However, free_percent of the time we instead write zeroes to 3562168404Spjd * packobj and perform a dmu_free_range() on bigobj. By comparing 3563168404Spjd * bigobj to packobj, we can verify that the DMU is correctly 3564168404Spjd * tracking which parts of an object are allocated and free, 3565168404Spjd * and that the contents of the allocated blocks are correct. 3566168404Spjd */ 3567168404Spjd 3568168404Spjd /* 3569168404Spjd * Read the directory info. If it's the first time, set things up. 3570168404Spjd */ 3571219089Spjd ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, 0, chunksize); 3572219089Spjd ztest_od_init(&od[1], id, FTAG, 1, DMU_OT_UINT64_OTHER, 0, chunksize); 3573168404Spjd 3574219089Spjd if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0) 3575219089Spjd return; 3576168404Spjd 3577219089Spjd bigobj = od[0].od_object; 3578219089Spjd packobj = od[1].od_object; 3579219089Spjd chunksize = od[0].od_gen; 3580219089Spjd ASSERT(chunksize == od[1].od_gen); 3581168404Spjd 3582168404Spjd /* 3583168404Spjd * Prefetch a random chunk of the big object. 3584168404Spjd * Our aim here is to get some async reads in flight 3585168404Spjd * for blocks that we may free below; the DMU should 3586168404Spjd * handle this race correctly. 3587168404Spjd */ 3588168404Spjd n = ztest_random(regions) * stride + ztest_random(width); 3589168404Spjd s = 1 + ztest_random(2 * width - 1); 3590286705Smav dmu_prefetch(os, bigobj, 0, n * chunksize, s * chunksize, 3591286705Smav ZIO_PRIORITY_SYNC_READ); 3592168404Spjd 3593168404Spjd /* 3594168404Spjd * Pick a random index and compute the offsets into packobj and bigobj. 3595168404Spjd */ 3596168404Spjd n = ztest_random(regions) * stride + ztest_random(width); 3597168404Spjd s = 1 + ztest_random(width - 1); 3598168404Spjd 3599168404Spjd packoff = n * sizeof (bufwad_t); 3600168404Spjd packsize = s * sizeof (bufwad_t); 3601168404Spjd 3602219089Spjd bigoff = n * chunksize; 3603219089Spjd bigsize = s * chunksize; 3604168404Spjd 3605168404Spjd packbuf = umem_alloc(packsize, UMEM_NOFAIL); 3606168404Spjd bigbuf = umem_alloc(bigsize, UMEM_NOFAIL); 3607168404Spjd 3608168404Spjd /* 3609168404Spjd * free_percent of the time, free a range of bigobj rather than 3610168404Spjd * overwriting it. 3611168404Spjd */ 3612168404Spjd freeit = (ztest_random(100) < free_percent); 3613168404Spjd 3614168404Spjd /* 3615168404Spjd * Read the current contents of our objects. 3616168404Spjd */ 3617219089Spjd error = dmu_read(os, packobj, packoff, packsize, packbuf, 3618209962Smm DMU_READ_PREFETCH); 3619240415Smm ASSERT0(error); 3620219089Spjd error = dmu_read(os, bigobj, bigoff, bigsize, bigbuf, 3621209962Smm DMU_READ_PREFETCH); 3622240415Smm ASSERT0(error); 3623168404Spjd 3624168404Spjd /* 3625168404Spjd * Get a tx for the mods to both packobj and bigobj. 3626168404Spjd */ 3627168404Spjd tx = dmu_tx_create(os); 3628168404Spjd 3629219089Spjd dmu_tx_hold_write(tx, packobj, packoff, packsize); 3630168404Spjd 3631168404Spjd if (freeit) 3632219089Spjd dmu_tx_hold_free(tx, bigobj, bigoff, bigsize); 3633168404Spjd else 3634219089Spjd dmu_tx_hold_write(tx, bigobj, bigoff, bigsize); 3635168404Spjd 3636254077Sdelphij /* This accounts for setting the checksum/compression. */ 3637254077Sdelphij dmu_tx_hold_bonus(tx, bigobj); 3638254077Sdelphij 3639219089Spjd txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG); 3640219089Spjd if (txg == 0) { 3641168404Spjd umem_free(packbuf, packsize); 3642168404Spjd umem_free(bigbuf, bigsize); 3643168404Spjd return; 3644168404Spjd } 3645168404Spjd 3646268075Sdelphij enum zio_checksum cksum; 3647268075Sdelphij do { 3648268075Sdelphij cksum = (enum zio_checksum) 3649268075Sdelphij ztest_random_dsl_prop(ZFS_PROP_CHECKSUM); 3650268075Sdelphij } while (cksum >= ZIO_CHECKSUM_LEGACY_FUNCTIONS); 3651268075Sdelphij dmu_object_set_checksum(os, bigobj, cksum, tx); 3652168404Spjd 3653268075Sdelphij enum zio_compress comp; 3654268075Sdelphij do { 3655268075Sdelphij comp = (enum zio_compress) 3656268075Sdelphij ztest_random_dsl_prop(ZFS_PROP_COMPRESSION); 3657268075Sdelphij } while (comp >= ZIO_COMPRESS_LEGACY_FUNCTIONS); 3658268075Sdelphij dmu_object_set_compress(os, bigobj, comp, tx); 3659219089Spjd 3660168404Spjd /* 3661168404Spjd * For each index from n to n + s, verify that the existing bufwad 3662168404Spjd * in packobj matches the bufwads at the head and tail of the 3663168404Spjd * corresponding chunk in bigobj. Then update all three bufwads 3664168404Spjd * with the new values we want to write out. 3665168404Spjd */ 3666168404Spjd for (i = 0; i < s; i++) { 3667168404Spjd /* LINTED */ 3668168404Spjd pack = (bufwad_t *)((char *)packbuf + i * sizeof (bufwad_t)); 3669168404Spjd /* LINTED */ 3670219089Spjd bigH = (bufwad_t *)((char *)bigbuf + i * chunksize); 3671168404Spjd /* LINTED */ 3672219089Spjd bigT = (bufwad_t *)((char *)bigH + chunksize) - 1; 3673168404Spjd 3674168404Spjd ASSERT((uintptr_t)bigH - (uintptr_t)bigbuf < bigsize); 3675168404Spjd ASSERT((uintptr_t)bigT - (uintptr_t)bigbuf < bigsize); 3676168404Spjd 3677168404Spjd if (pack->bw_txg > txg) 3678168404Spjd fatal(0, "future leak: got %llx, open txg is %llx", 3679168404Spjd pack->bw_txg, txg); 3680168404Spjd 3681168404Spjd if (pack->bw_data != 0 && pack->bw_index != n + i) 3682168404Spjd fatal(0, "wrong index: got %llx, wanted %llx+%llx", 3683168404Spjd pack->bw_index, n, i); 3684168404Spjd 3685168404Spjd if (bcmp(pack, bigH, sizeof (bufwad_t)) != 0) 3686168404Spjd fatal(0, "pack/bigH mismatch in %p/%p", pack, bigH); 3687168404Spjd 3688168404Spjd if (bcmp(pack, bigT, sizeof (bufwad_t)) != 0) 3689168404Spjd fatal(0, "pack/bigT mismatch in %p/%p", pack, bigT); 3690168404Spjd 3691168404Spjd if (freeit) { 3692168404Spjd bzero(pack, sizeof (bufwad_t)); 3693168404Spjd } else { 3694168404Spjd pack->bw_index = n + i; 3695168404Spjd pack->bw_txg = txg; 3696168404Spjd pack->bw_data = 1 + ztest_random(-2ULL); 3697168404Spjd } 3698168404Spjd *bigH = *pack; 3699168404Spjd *bigT = *pack; 3700168404Spjd } 3701168404Spjd 3702168404Spjd /* 3703168404Spjd * We've verified all the old bufwads, and made new ones. 3704168404Spjd * Now write them out. 3705168404Spjd */ 3706219089Spjd dmu_write(os, packobj, packoff, packsize, packbuf, tx); 3707168404Spjd 3708168404Spjd if (freeit) { 3709236143Smm if (ztest_opts.zo_verbose >= 7) { 3710168404Spjd (void) printf("freeing offset %llx size %llx" 3711168404Spjd " txg %llx\n", 3712168404Spjd (u_longlong_t)bigoff, 3713168404Spjd (u_longlong_t)bigsize, 3714168404Spjd (u_longlong_t)txg); 3715168404Spjd } 3716219089Spjd VERIFY(0 == dmu_free_range(os, bigobj, bigoff, bigsize, tx)); 3717168404Spjd } else { 3718236143Smm if (ztest_opts.zo_verbose >= 7) { 3719168404Spjd (void) printf("writing offset %llx size %llx" 3720168404Spjd " txg %llx\n", 3721168404Spjd (u_longlong_t)bigoff, 3722168404Spjd (u_longlong_t)bigsize, 3723168404Spjd (u_longlong_t)txg); 3724168404Spjd } 3725219089Spjd dmu_write(os, bigobj, bigoff, bigsize, bigbuf, tx); 3726168404Spjd } 3727168404Spjd 3728168404Spjd dmu_tx_commit(tx); 3729168404Spjd 3730168404Spjd /* 3731168404Spjd * Sanity check the stuff we just wrote. 3732168404Spjd */ 3733168404Spjd { 3734168404Spjd void *packcheck = umem_alloc(packsize, UMEM_NOFAIL); 3735168404Spjd void *bigcheck = umem_alloc(bigsize, UMEM_NOFAIL); 3736168404Spjd 3737219089Spjd VERIFY(0 == dmu_read(os, packobj, packoff, 3738209962Smm packsize, packcheck, DMU_READ_PREFETCH)); 3739219089Spjd VERIFY(0 == dmu_read(os, bigobj, bigoff, 3740209962Smm bigsize, bigcheck, DMU_READ_PREFETCH)); 3741168404Spjd 3742168404Spjd ASSERT(bcmp(packbuf, packcheck, packsize) == 0); 3743168404Spjd ASSERT(bcmp(bigbuf, bigcheck, bigsize) == 0); 3744168404Spjd 3745168404Spjd umem_free(packcheck, packsize); 3746168404Spjd umem_free(bigcheck, bigsize); 3747168404Spjd } 3748168404Spjd 3749168404Spjd umem_free(packbuf, packsize); 3750168404Spjd umem_free(bigbuf, bigsize); 3751168404Spjd} 3752168404Spjd 3753168404Spjdvoid 3754209962Smmcompare_and_update_pbbufs(uint64_t s, bufwad_t *packbuf, bufwad_t *bigbuf, 3755219089Spjd uint64_t bigsize, uint64_t n, uint64_t chunksize, uint64_t txg) 3756209962Smm{ 3757209962Smm uint64_t i; 3758209962Smm bufwad_t *pack; 3759209962Smm bufwad_t *bigH; 3760209962Smm bufwad_t *bigT; 3761209962Smm 3762209962Smm /* 3763209962Smm * For each index from n to n + s, verify that the existing bufwad 3764209962Smm * in packobj matches the bufwads at the head and tail of the 3765209962Smm * corresponding chunk in bigobj. Then update all three bufwads 3766209962Smm * with the new values we want to write out. 3767209962Smm */ 3768209962Smm for (i = 0; i < s; i++) { 3769209962Smm /* LINTED */ 3770209962Smm pack = (bufwad_t *)((char *)packbuf + i * sizeof (bufwad_t)); 3771209962Smm /* LINTED */ 3772219089Spjd bigH = (bufwad_t *)((char *)bigbuf + i * chunksize); 3773209962Smm /* LINTED */ 3774219089Spjd bigT = (bufwad_t *)((char *)bigH + chunksize) - 1; 3775209962Smm 3776209962Smm ASSERT((uintptr_t)bigH - (uintptr_t)bigbuf < bigsize); 3777209962Smm ASSERT((uintptr_t)bigT - (uintptr_t)bigbuf < bigsize); 3778209962Smm 3779209962Smm if (pack->bw_txg > txg) 3780209962Smm fatal(0, "future leak: got %llx, open txg is %llx", 3781209962Smm pack->bw_txg, txg); 3782209962Smm 3783209962Smm if (pack->bw_data != 0 && pack->bw_index != n + i) 3784209962Smm fatal(0, "wrong index: got %llx, wanted %llx+%llx", 3785209962Smm pack->bw_index, n, i); 3786209962Smm 3787209962Smm if (bcmp(pack, bigH, sizeof (bufwad_t)) != 0) 3788209962Smm fatal(0, "pack/bigH mismatch in %p/%p", pack, bigH); 3789209962Smm 3790209962Smm if (bcmp(pack, bigT, sizeof (bufwad_t)) != 0) 3791209962Smm fatal(0, "pack/bigT mismatch in %p/%p", pack, bigT); 3792209962Smm 3793209962Smm pack->bw_index = n + i; 3794209962Smm pack->bw_txg = txg; 3795209962Smm pack->bw_data = 1 + ztest_random(-2ULL); 3796209962Smm 3797209962Smm *bigH = *pack; 3798209962Smm *bigT = *pack; 3799209962Smm } 3800209962Smm} 3801209962Smm 3802209962Smmvoid 3803219089Spjdztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id) 3804209962Smm{ 3805219089Spjd objset_t *os = zd->zd_os; 3806219089Spjd ztest_od_t od[2]; 3807209962Smm dmu_tx_t *tx; 3808209962Smm uint64_t i; 3809209962Smm int error; 3810209962Smm uint64_t n, s, txg; 3811209962Smm bufwad_t *packbuf, *bigbuf; 3812219089Spjd uint64_t packobj, packoff, packsize, bigobj, bigoff, bigsize; 3813219089Spjd uint64_t blocksize = ztest_random_blocksize(); 3814219089Spjd uint64_t chunksize = blocksize; 3815209962Smm uint64_t regions = 997; 3816209962Smm uint64_t stride = 123456789ULL; 3817209962Smm uint64_t width = 9; 3818209962Smm dmu_buf_t *bonus_db; 3819209962Smm arc_buf_t **bigbuf_arcbufs; 3820219089Spjd dmu_object_info_t doi; 3821209962Smm 3822209962Smm /* 3823209962Smm * This test uses two objects, packobj and bigobj, that are always 3824209962Smm * updated together (i.e. in the same tx) so that their contents are 3825209962Smm * in sync and can be compared. Their contents relate to each other 3826209962Smm * in a simple way: packobj is a dense array of 'bufwad' structures, 3827209962Smm * while bigobj is a sparse array of the same bufwads. Specifically, 3828209962Smm * for any index n, there are three bufwads that should be identical: 3829209962Smm * 3830209962Smm * packobj, at offset n * sizeof (bufwad_t) 3831209962Smm * bigobj, at the head of the nth chunk 3832209962Smm * bigobj, at the tail of the nth chunk 3833209962Smm * 3834209962Smm * The chunk size is set equal to bigobj block size so that 3835209962Smm * dmu_assign_arcbuf() can be tested for object updates. 3836209962Smm */ 3837209962Smm 3838209962Smm /* 3839209962Smm * Read the directory info. If it's the first time, set things up. 3840209962Smm */ 3841219089Spjd ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize, 0); 3842219089Spjd ztest_od_init(&od[1], id, FTAG, 1, DMU_OT_UINT64_OTHER, 0, chunksize); 3843209962Smm 3844219089Spjd if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0) 3845219089Spjd return; 3846209962Smm 3847219089Spjd bigobj = od[0].od_object; 3848219089Spjd packobj = od[1].od_object; 3849219089Spjd blocksize = od[0].od_blocksize; 3850219089Spjd chunksize = blocksize; 3851219089Spjd ASSERT(chunksize == od[1].od_gen); 3852209962Smm 3853219089Spjd VERIFY(dmu_object_info(os, bigobj, &doi) == 0); 3854219089Spjd VERIFY(ISP2(doi.doi_data_block_size)); 3855219089Spjd VERIFY(chunksize == doi.doi_data_block_size); 3856219089Spjd VERIFY(chunksize >= 2 * sizeof (bufwad_t)); 3857209962Smm 3858209962Smm /* 3859209962Smm * Pick a random index and compute the offsets into packobj and bigobj. 3860209962Smm */ 3861209962Smm n = ztest_random(regions) * stride + ztest_random(width); 3862209962Smm s = 1 + ztest_random(width - 1); 3863209962Smm 3864209962Smm packoff = n * sizeof (bufwad_t); 3865209962Smm packsize = s * sizeof (bufwad_t); 3866209962Smm 3867219089Spjd bigoff = n * chunksize; 3868219089Spjd bigsize = s * chunksize; 3869209962Smm 3870209962Smm packbuf = umem_zalloc(packsize, UMEM_NOFAIL); 3871209962Smm bigbuf = umem_zalloc(bigsize, UMEM_NOFAIL); 3872209962Smm 3873219089Spjd VERIFY3U(0, ==, dmu_bonus_hold(os, bigobj, FTAG, &bonus_db)); 3874209962Smm 3875209962Smm bigbuf_arcbufs = umem_zalloc(2 * s * sizeof (arc_buf_t *), UMEM_NOFAIL); 3876209962Smm 3877209962Smm /* 3878209962Smm * Iteration 0 test zcopy for DB_UNCACHED dbufs. 3879209962Smm * Iteration 1 test zcopy to already referenced dbufs. 3880209962Smm * Iteration 2 test zcopy to dirty dbuf in the same txg. 3881209962Smm * Iteration 3 test zcopy to dbuf dirty in previous txg. 3882209962Smm * Iteration 4 test zcopy when dbuf is no longer dirty. 3883209962Smm * Iteration 5 test zcopy when it can't be done. 3884209962Smm * Iteration 6 one more zcopy write. 3885209962Smm */ 3886209962Smm for (i = 0; i < 7; i++) { 3887209962Smm uint64_t j; 3888209962Smm uint64_t off; 3889209962Smm 3890209962Smm /* 3891209962Smm * In iteration 5 (i == 5) use arcbufs 3892209962Smm * that don't match bigobj blksz to test 3893209962Smm * dmu_assign_arcbuf() when it can't directly 3894209962Smm * assign an arcbuf to a dbuf. 3895209962Smm */ 3896209962Smm for (j = 0; j < s; j++) { 3897209962Smm if (i != 5) { 3898209962Smm bigbuf_arcbufs[j] = 3899219089Spjd dmu_request_arcbuf(bonus_db, chunksize); 3900209962Smm } else { 3901209962Smm bigbuf_arcbufs[2 * j] = 3902219089Spjd dmu_request_arcbuf(bonus_db, chunksize / 2); 3903209962Smm bigbuf_arcbufs[2 * j + 1] = 3904219089Spjd dmu_request_arcbuf(bonus_db, chunksize / 2); 3905209962Smm } 3906209962Smm } 3907209962Smm 3908209962Smm /* 3909209962Smm * Get a tx for the mods to both packobj and bigobj. 3910209962Smm */ 3911209962Smm tx = dmu_tx_create(os); 3912209962Smm 3913219089Spjd dmu_tx_hold_write(tx, packobj, packoff, packsize); 3914219089Spjd dmu_tx_hold_write(tx, bigobj, bigoff, bigsize); 3915209962Smm 3916219089Spjd txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG); 3917219089Spjd if (txg == 0) { 3918209962Smm umem_free(packbuf, packsize); 3919209962Smm umem_free(bigbuf, bigsize); 3920209962Smm for (j = 0; j < s; j++) { 3921209962Smm if (i != 5) { 3922209962Smm dmu_return_arcbuf(bigbuf_arcbufs[j]); 3923209962Smm } else { 3924209962Smm dmu_return_arcbuf( 3925209962Smm bigbuf_arcbufs[2 * j]); 3926209962Smm dmu_return_arcbuf( 3927209962Smm bigbuf_arcbufs[2 * j + 1]); 3928209962Smm } 3929209962Smm } 3930209962Smm umem_free(bigbuf_arcbufs, 2 * s * sizeof (arc_buf_t *)); 3931209962Smm dmu_buf_rele(bonus_db, FTAG); 3932209962Smm return; 3933209962Smm } 3934209962Smm 3935209962Smm /* 3936209962Smm * 50% of the time don't read objects in the 1st iteration to 3937209962Smm * test dmu_assign_arcbuf() for the case when there're no 3938209962Smm * existing dbufs for the specified offsets. 3939209962Smm */ 3940209962Smm if (i != 0 || ztest_random(2) != 0) { 3941219089Spjd error = dmu_read(os, packobj, packoff, 3942209962Smm packsize, packbuf, DMU_READ_PREFETCH); 3943240415Smm ASSERT0(error); 3944219089Spjd error = dmu_read(os, bigobj, bigoff, bigsize, 3945209962Smm bigbuf, DMU_READ_PREFETCH); 3946240415Smm ASSERT0(error); 3947209962Smm } 3948209962Smm compare_and_update_pbbufs(s, packbuf, bigbuf, bigsize, 3949219089Spjd n, chunksize, txg); 3950209962Smm 3951209962Smm /* 3952209962Smm * We've verified all the old bufwads, and made new ones. 3953209962Smm * Now write them out. 3954209962Smm */ 3955219089Spjd dmu_write(os, packobj, packoff, packsize, packbuf, tx); 3956236143Smm if (ztest_opts.zo_verbose >= 7) { 3957209962Smm (void) printf("writing offset %llx size %llx" 3958209962Smm " txg %llx\n", 3959209962Smm (u_longlong_t)bigoff, 3960209962Smm (u_longlong_t)bigsize, 3961209962Smm (u_longlong_t)txg); 3962209962Smm } 3963219089Spjd for (off = bigoff, j = 0; j < s; j++, off += chunksize) { 3964209962Smm dmu_buf_t *dbt; 3965209962Smm if (i != 5) { 3966209962Smm bcopy((caddr_t)bigbuf + (off - bigoff), 3967219089Spjd bigbuf_arcbufs[j]->b_data, chunksize); 3968209962Smm } else { 3969209962Smm bcopy((caddr_t)bigbuf + (off - bigoff), 3970209962Smm bigbuf_arcbufs[2 * j]->b_data, 3971219089Spjd chunksize / 2); 3972209962Smm bcopy((caddr_t)bigbuf + (off - bigoff) + 3973219089Spjd chunksize / 2, 3974209962Smm bigbuf_arcbufs[2 * j + 1]->b_data, 3975219089Spjd chunksize / 2); 3976209962Smm } 3977209962Smm 3978209962Smm if (i == 1) { 3979219089Spjd VERIFY(dmu_buf_hold(os, bigobj, off, 3980219089Spjd FTAG, &dbt, DMU_READ_NO_PREFETCH) == 0); 3981209962Smm } 3982209962Smm if (i != 5) { 3983209962Smm dmu_assign_arcbuf(bonus_db, off, 3984209962Smm bigbuf_arcbufs[j], tx); 3985209962Smm } else { 3986209962Smm dmu_assign_arcbuf(bonus_db, off, 3987209962Smm bigbuf_arcbufs[2 * j], tx); 3988209962Smm dmu_assign_arcbuf(bonus_db, 3989219089Spjd off + chunksize / 2, 3990209962Smm bigbuf_arcbufs[2 * j + 1], tx); 3991209962Smm } 3992209962Smm if (i == 1) { 3993209962Smm dmu_buf_rele(dbt, FTAG); 3994209962Smm } 3995209962Smm } 3996209962Smm dmu_tx_commit(tx); 3997209962Smm 3998209962Smm /* 3999209962Smm * Sanity check the stuff we just wrote. 4000209962Smm */ 4001209962Smm { 4002209962Smm void *packcheck = umem_alloc(packsize, UMEM_NOFAIL); 4003209962Smm void *bigcheck = umem_alloc(bigsize, UMEM_NOFAIL); 4004209962Smm 4005219089Spjd VERIFY(0 == dmu_read(os, packobj, packoff, 4006209962Smm packsize, packcheck, DMU_READ_PREFETCH)); 4007219089Spjd VERIFY(0 == dmu_read(os, bigobj, bigoff, 4008209962Smm bigsize, bigcheck, DMU_READ_PREFETCH)); 4009209962Smm 4010209962Smm ASSERT(bcmp(packbuf, packcheck, packsize) == 0); 4011209962Smm ASSERT(bcmp(bigbuf, bigcheck, bigsize) == 0); 4012209962Smm 4013209962Smm umem_free(packcheck, packsize); 4014209962Smm umem_free(bigcheck, bigsize); 4015209962Smm } 4016209962Smm if (i == 2) { 4017209962Smm txg_wait_open(dmu_objset_pool(os), 0); 4018209962Smm } else if (i == 3) { 4019209962Smm txg_wait_synced(dmu_objset_pool(os), 0); 4020209962Smm } 4021209962Smm } 4022209962Smm 4023209962Smm dmu_buf_rele(bonus_db, FTAG); 4024209962Smm umem_free(packbuf, packsize); 4025209962Smm umem_free(bigbuf, bigsize); 4026209962Smm umem_free(bigbuf_arcbufs, 2 * s * sizeof (arc_buf_t *)); 4027209962Smm} 4028209962Smm 4029219089Spjd/* ARGSUSED */ 4030209962Smmvoid 4031219089Spjdztest_dmu_write_parallel(ztest_ds_t *zd, uint64_t id) 4032168404Spjd{ 4033219089Spjd ztest_od_t od[1]; 4034219089Spjd uint64_t offset = (1ULL << (ztest_random(20) + 43)) + 4035219089Spjd (ztest_random(ZTEST_RANGE_LOCKS) << SPA_MAXBLOCKSHIFT); 4036168404Spjd 4037168404Spjd /* 4038219089Spjd * Have multiple threads write to large offsets in an object 4039219089Spjd * to verify that parallel writes to an object -- even to the 4040219089Spjd * same blocks within the object -- doesn't cause any trouble. 4041168404Spjd */ 4042219089Spjd ztest_od_init(&od[0], ID_PARALLEL, FTAG, 0, DMU_OT_UINT64_OTHER, 0, 0); 4043219089Spjd 4044219089Spjd if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0) 4045219089Spjd return; 4046219089Spjd 4047219089Spjd while (ztest_random(10) != 0) 4048219089Spjd ztest_io(zd, od[0].od_object, offset); 4049168404Spjd} 4050168404Spjd 4051168404Spjdvoid 4052219089Spjdztest_dmu_prealloc(ztest_ds_t *zd, uint64_t id) 4053168404Spjd{ 4054219089Spjd ztest_od_t od[1]; 4055219089Spjd uint64_t offset = (1ULL << (ztest_random(4) + SPA_MAXBLOCKSHIFT)) + 4056219089Spjd (ztest_random(ZTEST_RANGE_LOCKS) << SPA_MAXBLOCKSHIFT); 4057219089Spjd uint64_t count = ztest_random(20) + 1; 4058219089Spjd uint64_t blocksize = ztest_random_blocksize(); 4059219089Spjd void *data; 4060168404Spjd 4061219089Spjd ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize, 0); 4062168404Spjd 4063219089Spjd if (ztest_object_init(zd, od, sizeof (od), !ztest_random(2)) != 0) 4064185029Spjd return; 4065168404Spjd 4066219089Spjd if (ztest_truncate(zd, od[0].od_object, offset, count * blocksize) != 0) 4067185029Spjd return; 4068168404Spjd 4069219089Spjd ztest_prealloc(zd, od[0].od_object, offset, count * blocksize); 4070185029Spjd 4071219089Spjd data = umem_zalloc(blocksize, UMEM_NOFAIL); 4072185029Spjd 4073219089Spjd while (ztest_random(count) != 0) { 4074219089Spjd uint64_t randoff = offset + (ztest_random(count) * blocksize); 4075219089Spjd if (ztest_write(zd, od[0].od_object, randoff, blocksize, 4076219089Spjd data) != 0) 4077219089Spjd break; 4078219089Spjd while (ztest_random(4) != 0) 4079219089Spjd ztest_io(zd, od[0].od_object, randoff); 4080185029Spjd } 4081168404Spjd 4082219089Spjd umem_free(data, blocksize); 4083168404Spjd} 4084168404Spjd 4085168404Spjd/* 4086168404Spjd * Verify that zap_{create,destroy,add,remove,update} work as expected. 4087168404Spjd */ 4088168404Spjd#define ZTEST_ZAP_MIN_INTS 1 4089168404Spjd#define ZTEST_ZAP_MAX_INTS 4 4090168404Spjd#define ZTEST_ZAP_MAX_PROPS 1000 4091168404Spjd 4092168404Spjdvoid 4093219089Spjdztest_zap(ztest_ds_t *zd, uint64_t id) 4094168404Spjd{ 4095219089Spjd objset_t *os = zd->zd_os; 4096219089Spjd ztest_od_t od[1]; 4097168404Spjd uint64_t object; 4098168404Spjd uint64_t txg, last_txg; 4099168404Spjd uint64_t value[ZTEST_ZAP_MAX_INTS]; 4100168404Spjd uint64_t zl_ints, zl_intsize, prop; 4101168404Spjd int i, ints; 4102168404Spjd dmu_tx_t *tx; 4103168404Spjd char propname[100], txgname[100]; 4104168404Spjd int error; 4105168404Spjd char *hc[2] = { "s.acl.h", ".s.open.h.hyLZlg" }; 4106168404Spjd 4107219089Spjd ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_ZAP_OTHER, 0, 0); 4108168404Spjd 4109219089Spjd if (ztest_object_init(zd, od, sizeof (od), !ztest_random(2)) != 0) 4110219089Spjd return; 4111219089Spjd 4112219089Spjd object = od[0].od_object; 4113219089Spjd 4114168404Spjd /* 4115219089Spjd * Generate a known hash collision, and verify that 4116219089Spjd * we can lookup and remove both entries. 4117168404Spjd */ 4118219089Spjd tx = dmu_tx_create(os); 4119219089Spjd dmu_tx_hold_zap(tx, object, B_TRUE, NULL); 4120219089Spjd txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG); 4121219089Spjd if (txg == 0) 4122219089Spjd return; 4123219089Spjd for (i = 0; i < 2; i++) { 4124219089Spjd value[i] = i; 4125219089Spjd VERIFY3U(0, ==, zap_add(os, object, hc[i], sizeof (uint64_t), 4126219089Spjd 1, &value[i], tx)); 4127168404Spjd } 4128219089Spjd for (i = 0; i < 2; i++) { 4129219089Spjd VERIFY3U(EEXIST, ==, zap_add(os, object, hc[i], 4130219089Spjd sizeof (uint64_t), 1, &value[i], tx)); 4131219089Spjd VERIFY3U(0, ==, 4132219089Spjd zap_length(os, object, hc[i], &zl_intsize, &zl_ints)); 4133219089Spjd ASSERT3U(zl_intsize, ==, sizeof (uint64_t)); 4134219089Spjd ASSERT3U(zl_ints, ==, 1); 4135219089Spjd } 4136219089Spjd for (i = 0; i < 2; i++) { 4137219089Spjd VERIFY3U(0, ==, zap_remove(os, object, hc[i], tx)); 4138219089Spjd } 4139219089Spjd dmu_tx_commit(tx); 4140168404Spjd 4141219089Spjd /* 4142219089Spjd * Generate a buch of random entries. 4143219089Spjd */ 4144168404Spjd ints = MAX(ZTEST_ZAP_MIN_INTS, object % ZTEST_ZAP_MAX_INTS); 4145168404Spjd 4146185029Spjd prop = ztest_random(ZTEST_ZAP_MAX_PROPS); 4147185029Spjd (void) sprintf(propname, "prop_%llu", (u_longlong_t)prop); 4148185029Spjd (void) sprintf(txgname, "txg_%llu", (u_longlong_t)prop); 4149185029Spjd bzero(value, sizeof (value)); 4150185029Spjd last_txg = 0; 4151168404Spjd 4152185029Spjd /* 4153185029Spjd * If these zap entries already exist, validate their contents. 4154185029Spjd */ 4155185029Spjd error = zap_length(os, object, txgname, &zl_intsize, &zl_ints); 4156185029Spjd if (error == 0) { 4157185029Spjd ASSERT3U(zl_intsize, ==, sizeof (uint64_t)); 4158185029Spjd ASSERT3U(zl_ints, ==, 1); 4159168404Spjd 4160185029Spjd VERIFY(zap_lookup(os, object, txgname, zl_intsize, 4161185029Spjd zl_ints, &last_txg) == 0); 4162168404Spjd 4163185029Spjd VERIFY(zap_length(os, object, propname, &zl_intsize, 4164185029Spjd &zl_ints) == 0); 4165168404Spjd 4166185029Spjd ASSERT3U(zl_intsize, ==, sizeof (uint64_t)); 4167185029Spjd ASSERT3U(zl_ints, ==, ints); 4168168404Spjd 4169185029Spjd VERIFY(zap_lookup(os, object, propname, zl_intsize, 4170185029Spjd zl_ints, value) == 0); 4171168404Spjd 4172185029Spjd for (i = 0; i < ints; i++) { 4173185029Spjd ASSERT3U(value[i], ==, last_txg + object + i); 4174168404Spjd } 4175185029Spjd } else { 4176185029Spjd ASSERT3U(error, ==, ENOENT); 4177185029Spjd } 4178168404Spjd 4179185029Spjd /* 4180185029Spjd * Atomically update two entries in our zap object. 4181185029Spjd * The first is named txg_%llu, and contains the txg 4182185029Spjd * in which the property was last updated. The second 4183185029Spjd * is named prop_%llu, and the nth element of its value 4184185029Spjd * should be txg + object + n. 4185185029Spjd */ 4186185029Spjd tx = dmu_tx_create(os); 4187219089Spjd dmu_tx_hold_zap(tx, object, B_TRUE, NULL); 4188219089Spjd txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG); 4189219089Spjd if (txg == 0) 4190185029Spjd return; 4191168404Spjd 4192185029Spjd if (last_txg > txg) 4193185029Spjd fatal(0, "zap future leak: old %llu new %llu", last_txg, txg); 4194168404Spjd 4195185029Spjd for (i = 0; i < ints; i++) 4196185029Spjd value[i] = txg + object + i; 4197168404Spjd 4198219089Spjd VERIFY3U(0, ==, zap_update(os, object, txgname, sizeof (uint64_t), 4199219089Spjd 1, &txg, tx)); 4200219089Spjd VERIFY3U(0, ==, zap_update(os, object, propname, sizeof (uint64_t), 4201219089Spjd ints, value, tx)); 4202168404Spjd 4203185029Spjd dmu_tx_commit(tx); 4204168404Spjd 4205185029Spjd /* 4206185029Spjd * Remove a random pair of entries. 4207185029Spjd */ 4208185029Spjd prop = ztest_random(ZTEST_ZAP_MAX_PROPS); 4209185029Spjd (void) sprintf(propname, "prop_%llu", (u_longlong_t)prop); 4210185029Spjd (void) sprintf(txgname, "txg_%llu", (u_longlong_t)prop); 4211168404Spjd 4212185029Spjd error = zap_length(os, object, txgname, &zl_intsize, &zl_ints); 4213168404Spjd 4214185029Spjd if (error == ENOENT) 4215185029Spjd return; 4216168404Spjd 4217240415Smm ASSERT0(error); 4218168404Spjd 4219185029Spjd tx = dmu_tx_create(os); 4220219089Spjd dmu_tx_hold_zap(tx, object, B_TRUE, NULL); 4221219089Spjd txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG); 4222219089Spjd if (txg == 0) 4223185029Spjd return; 4224219089Spjd VERIFY3U(0, ==, zap_remove(os, object, txgname, tx)); 4225219089Spjd VERIFY3U(0, ==, zap_remove(os, object, propname, tx)); 4226185029Spjd dmu_tx_commit(tx); 4227168404Spjd} 4228168404Spjd 4229209962Smm/* 4230209962Smm * Testcase to test the upgrading of a microzap to fatzap. 4231209962Smm */ 4232168404Spjdvoid 4233219089Spjdztest_fzap(ztest_ds_t *zd, uint64_t id) 4234209962Smm{ 4235219089Spjd objset_t *os = zd->zd_os; 4236219089Spjd ztest_od_t od[1]; 4237219089Spjd uint64_t object, txg; 4238209962Smm 4239219089Spjd ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_ZAP_OTHER, 0, 0); 4240209962Smm 4241219089Spjd if (ztest_object_init(zd, od, sizeof (od), !ztest_random(2)) != 0) 4242219089Spjd return; 4243209962Smm 4244219089Spjd object = od[0].od_object; 4245209962Smm 4246209962Smm /* 4247219089Spjd * Add entries to this ZAP and make sure it spills over 4248209962Smm * and gets upgraded to a fatzap. Also, since we are adding 4249219089Spjd * 2050 entries we should see ptrtbl growth and leaf-block split. 4250209962Smm */ 4251219089Spjd for (int i = 0; i < 2050; i++) { 4252219089Spjd char name[MAXNAMELEN]; 4253219089Spjd uint64_t value = i; 4254219089Spjd dmu_tx_t *tx; 4255219089Spjd int error; 4256209962Smm 4257219089Spjd (void) snprintf(name, sizeof (name), "fzap-%llu-%llu", 4258219089Spjd id, value); 4259219089Spjd 4260209962Smm tx = dmu_tx_create(os); 4261219089Spjd dmu_tx_hold_zap(tx, object, B_TRUE, name); 4262219089Spjd txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG); 4263219089Spjd if (txg == 0) 4264209962Smm return; 4265219089Spjd error = zap_add(os, object, name, sizeof (uint64_t), 1, 4266219089Spjd &value, tx); 4267209962Smm ASSERT(error == 0 || error == EEXIST); 4268209962Smm dmu_tx_commit(tx); 4269209962Smm } 4270209962Smm} 4271209962Smm 4272219089Spjd/* ARGSUSED */ 4273209962Smmvoid 4274219089Spjdztest_zap_parallel(ztest_ds_t *zd, uint64_t id) 4275168404Spjd{ 4276219089Spjd objset_t *os = zd->zd_os; 4277219089Spjd ztest_od_t od[1]; 4278168404Spjd uint64_t txg, object, count, wsize, wc, zl_wsize, zl_wc; 4279168404Spjd dmu_tx_t *tx; 4280168404Spjd int i, namelen, error; 4281219089Spjd int micro = ztest_random(2); 4282168404Spjd char name[20], string_value[20]; 4283168404Spjd void *data; 4284168404Spjd 4285219089Spjd ztest_od_init(&od[0], ID_PARALLEL, FTAG, micro, DMU_OT_ZAP_OTHER, 0, 0); 4286219089Spjd 4287219089Spjd if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0) 4288219089Spjd return; 4289219089Spjd 4290219089Spjd object = od[0].od_object; 4291219089Spjd 4292185029Spjd /* 4293185029Spjd * Generate a random name of the form 'xxx.....' where each 4294185029Spjd * x is a random printable character and the dots are dots. 4295185029Spjd * There are 94 such characters, and the name length goes from 4296185029Spjd * 6 to 20, so there are 94^3 * 15 = 12,458,760 possible names. 4297185029Spjd */ 4298185029Spjd namelen = ztest_random(sizeof (name) - 5) + 5 + 1; 4299168404Spjd 4300185029Spjd for (i = 0; i < 3; i++) 4301185029Spjd name[i] = '!' + ztest_random('~' - '!' + 1); 4302185029Spjd for (; i < namelen - 1; i++) 4303185029Spjd name[i] = '.'; 4304185029Spjd name[i] = '\0'; 4305168404Spjd 4306219089Spjd if ((namelen & 1) || micro) { 4307185029Spjd wsize = sizeof (txg); 4308185029Spjd wc = 1; 4309185029Spjd data = &txg; 4310185029Spjd } else { 4311185029Spjd wsize = 1; 4312185029Spjd wc = namelen; 4313185029Spjd data = string_value; 4314185029Spjd } 4315168404Spjd 4316185029Spjd count = -1ULL; 4317248571Smm VERIFY0(zap_count(os, object, &count)); 4318185029Spjd ASSERT(count != -1ULL); 4319168404Spjd 4320185029Spjd /* 4321185029Spjd * Select an operation: length, lookup, add, update, remove. 4322185029Spjd */ 4323185029Spjd i = ztest_random(5); 4324168404Spjd 4325185029Spjd if (i >= 2) { 4326185029Spjd tx = dmu_tx_create(os); 4327219089Spjd dmu_tx_hold_zap(tx, object, B_TRUE, NULL); 4328219089Spjd txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG); 4329219089Spjd if (txg == 0) 4330185029Spjd return; 4331185029Spjd bcopy(name, string_value, namelen); 4332185029Spjd } else { 4333185029Spjd tx = NULL; 4334185029Spjd txg = 0; 4335185029Spjd bzero(string_value, namelen); 4336185029Spjd } 4337168404Spjd 4338185029Spjd switch (i) { 4339168404Spjd 4340185029Spjd case 0: 4341185029Spjd error = zap_length(os, object, name, &zl_wsize, &zl_wc); 4342185029Spjd if (error == 0) { 4343185029Spjd ASSERT3U(wsize, ==, zl_wsize); 4344185029Spjd ASSERT3U(wc, ==, zl_wc); 4345185029Spjd } else { 4346185029Spjd ASSERT3U(error, ==, ENOENT); 4347185029Spjd } 4348185029Spjd break; 4349168404Spjd 4350185029Spjd case 1: 4351185029Spjd error = zap_lookup(os, object, name, wsize, wc, data); 4352185029Spjd if (error == 0) { 4353185029Spjd if (data == string_value && 4354185029Spjd bcmp(name, data, namelen) != 0) 4355185029Spjd fatal(0, "name '%s' != val '%s' len %d", 4356185029Spjd name, data, namelen); 4357185029Spjd } else { 4358185029Spjd ASSERT3U(error, ==, ENOENT); 4359185029Spjd } 4360185029Spjd break; 4361168404Spjd 4362185029Spjd case 2: 4363185029Spjd error = zap_add(os, object, name, wsize, wc, data, tx); 4364185029Spjd ASSERT(error == 0 || error == EEXIST); 4365185029Spjd break; 4366168404Spjd 4367185029Spjd case 3: 4368185029Spjd VERIFY(zap_update(os, object, name, wsize, wc, data, tx) == 0); 4369185029Spjd break; 4370168404Spjd 4371185029Spjd case 4: 4372185029Spjd error = zap_remove(os, object, name, tx); 4373185029Spjd ASSERT(error == 0 || error == ENOENT); 4374185029Spjd break; 4375185029Spjd } 4376168404Spjd 4377185029Spjd if (tx != NULL) 4378185029Spjd dmu_tx_commit(tx); 4379168404Spjd} 4380168404Spjd 4381219089Spjd/* 4382219089Spjd * Commit callback data. 4383219089Spjd */ 4384219089Spjdtypedef struct ztest_cb_data { 4385219089Spjd list_node_t zcd_node; 4386219089Spjd uint64_t zcd_txg; 4387219089Spjd int zcd_expected_err; 4388219089Spjd boolean_t zcd_added; 4389219089Spjd boolean_t zcd_called; 4390219089Spjd spa_t *zcd_spa; 4391219089Spjd} ztest_cb_data_t; 4392219089Spjd 4393219089Spjd/* This is the actual commit callback function */ 4394219089Spjdstatic void 4395219089Spjdztest_commit_callback(void *arg, int error) 4396219089Spjd{ 4397219089Spjd ztest_cb_data_t *data = arg; 4398219089Spjd uint64_t synced_txg; 4399219089Spjd 4400219089Spjd VERIFY(data != NULL); 4401219089Spjd VERIFY3S(data->zcd_expected_err, ==, error); 4402219089Spjd VERIFY(!data->zcd_called); 4403219089Spjd 4404219089Spjd synced_txg = spa_last_synced_txg(data->zcd_spa); 4405219089Spjd if (data->zcd_txg > synced_txg) 4406219089Spjd fatal(0, "commit callback of txg %" PRIu64 " called prematurely" 4407219089Spjd ", last synced txg = %" PRIu64 "\n", data->zcd_txg, 4408219089Spjd synced_txg); 4409219089Spjd 4410219089Spjd data->zcd_called = B_TRUE; 4411219089Spjd 4412219089Spjd if (error == ECANCELED) { 4413240415Smm ASSERT0(data->zcd_txg); 4414219089Spjd ASSERT(!data->zcd_added); 4415219089Spjd 4416219089Spjd /* 4417219089Spjd * The private callback data should be destroyed here, but 4418219089Spjd * since we are going to check the zcd_called field after 4419219089Spjd * dmu_tx_abort(), we will destroy it there. 4420219089Spjd */ 4421219089Spjd return; 4422219089Spjd } 4423219089Spjd 4424219089Spjd /* Was this callback added to the global callback list? */ 4425219089Spjd if (!data->zcd_added) 4426219089Spjd goto out; 4427219089Spjd 4428219089Spjd ASSERT3U(data->zcd_txg, !=, 0); 4429219089Spjd 4430219089Spjd /* Remove our callback from the list */ 4431219089Spjd (void) mutex_lock(&zcl.zcl_callbacks_lock); 4432219089Spjd list_remove(&zcl.zcl_callbacks, data); 4433219089Spjd (void) mutex_unlock(&zcl.zcl_callbacks_lock); 4434219089Spjd 4435219089Spjdout: 4436219089Spjd umem_free(data, sizeof (ztest_cb_data_t)); 4437219089Spjd} 4438219089Spjd 4439219089Spjd/* Allocate and initialize callback data structure */ 4440219089Spjdstatic ztest_cb_data_t * 4441219089Spjdztest_create_cb_data(objset_t *os, uint64_t txg) 4442219089Spjd{ 4443219089Spjd ztest_cb_data_t *cb_data; 4444219089Spjd 4445219089Spjd cb_data = umem_zalloc(sizeof (ztest_cb_data_t), UMEM_NOFAIL); 4446219089Spjd 4447219089Spjd cb_data->zcd_txg = txg; 4448219089Spjd cb_data->zcd_spa = dmu_objset_spa(os); 4449219089Spjd 4450219089Spjd return (cb_data); 4451219089Spjd} 4452219089Spjd 4453219089Spjd/* 4454219089Spjd * If a number of txgs equal to this threshold have been created after a commit 4455219089Spjd * callback has been registered but not called, then we assume there is an 4456219089Spjd * implementation bug. 4457219089Spjd */ 4458219089Spjd#define ZTEST_COMMIT_CALLBACK_THRESH (TXG_CONCURRENT_STATES + 2) 4459219089Spjd 4460219089Spjd/* 4461219089Spjd * Commit callback test. 4462219089Spjd */ 4463168404Spjdvoid 4464219089Spjdztest_dmu_commit_callbacks(ztest_ds_t *zd, uint64_t id) 4465168404Spjd{ 4466219089Spjd objset_t *os = zd->zd_os; 4467219089Spjd ztest_od_t od[1]; 4468219089Spjd dmu_tx_t *tx; 4469219089Spjd ztest_cb_data_t *cb_data[3], *tmp_cb; 4470219089Spjd uint64_t old_txg, txg; 4471219089Spjd int i, error; 4472219089Spjd 4473219089Spjd ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, 0, 0); 4474219089Spjd 4475219089Spjd if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0) 4476219089Spjd return; 4477219089Spjd 4478219089Spjd tx = dmu_tx_create(os); 4479219089Spjd 4480219089Spjd cb_data[0] = ztest_create_cb_data(os, 0); 4481219089Spjd dmu_tx_callback_register(tx, ztest_commit_callback, cb_data[0]); 4482219089Spjd 4483219089Spjd dmu_tx_hold_write(tx, od[0].od_object, 0, sizeof (uint64_t)); 4484219089Spjd 4485219089Spjd /* Every once in a while, abort the transaction on purpose */ 4486219089Spjd if (ztest_random(100) == 0) 4487219089Spjd error = -1; 4488219089Spjd 4489219089Spjd if (!error) 4490219089Spjd error = dmu_tx_assign(tx, TXG_NOWAIT); 4491219089Spjd 4492219089Spjd txg = error ? 0 : dmu_tx_get_txg(tx); 4493219089Spjd 4494219089Spjd cb_data[0]->zcd_txg = txg; 4495219089Spjd cb_data[1] = ztest_create_cb_data(os, txg); 4496219089Spjd dmu_tx_callback_register(tx, ztest_commit_callback, cb_data[1]); 4497219089Spjd 4498219089Spjd if (error) { 4499219089Spjd /* 4500219089Spjd * It's not a strict requirement to call the registered 4501219089Spjd * callbacks from inside dmu_tx_abort(), but that's what 4502219089Spjd * it's supposed to happen in the current implementation 4503219089Spjd * so we will check for that. 4504219089Spjd */ 4505219089Spjd for (i = 0; i < 2; i++) { 4506219089Spjd cb_data[i]->zcd_expected_err = ECANCELED; 4507219089Spjd VERIFY(!cb_data[i]->zcd_called); 4508219089Spjd } 4509219089Spjd 4510219089Spjd dmu_tx_abort(tx); 4511219089Spjd 4512219089Spjd for (i = 0; i < 2; i++) { 4513219089Spjd VERIFY(cb_data[i]->zcd_called); 4514219089Spjd umem_free(cb_data[i], sizeof (ztest_cb_data_t)); 4515219089Spjd } 4516219089Spjd 4517219089Spjd return; 4518219089Spjd } 4519219089Spjd 4520219089Spjd cb_data[2] = ztest_create_cb_data(os, txg); 4521219089Spjd dmu_tx_callback_register(tx, ztest_commit_callback, cb_data[2]); 4522219089Spjd 4523219089Spjd /* 4524219089Spjd * Read existing data to make sure there isn't a future leak. 4525219089Spjd */ 4526219089Spjd VERIFY(0 == dmu_read(os, od[0].od_object, 0, sizeof (uint64_t), 4527219089Spjd &old_txg, DMU_READ_PREFETCH)); 4528219089Spjd 4529219089Spjd if (old_txg > txg) 4530219089Spjd fatal(0, "future leak: got %" PRIu64 ", open txg is %" PRIu64, 4531219089Spjd old_txg, txg); 4532219089Spjd 4533219089Spjd dmu_write(os, od[0].od_object, 0, sizeof (uint64_t), &txg, tx); 4534219089Spjd 4535219089Spjd (void) mutex_lock(&zcl.zcl_callbacks_lock); 4536219089Spjd 4537219089Spjd /* 4538219089Spjd * Since commit callbacks don't have any ordering requirement and since 4539219089Spjd * it is theoretically possible for a commit callback to be called 4540219089Spjd * after an arbitrary amount of time has elapsed since its txg has been 4541219089Spjd * synced, it is difficult to reliably determine whether a commit 4542219089Spjd * callback hasn't been called due to high load or due to a flawed 4543219089Spjd * implementation. 4544219089Spjd * 4545219089Spjd * In practice, we will assume that if after a certain number of txgs a 4546219089Spjd * commit callback hasn't been called, then most likely there's an 4547219089Spjd * implementation bug.. 4548219089Spjd */ 4549219089Spjd tmp_cb = list_head(&zcl.zcl_callbacks); 4550219089Spjd if (tmp_cb != NULL && 4551251635Sdelphij (txg - ZTEST_COMMIT_CALLBACK_THRESH) > tmp_cb->zcd_txg) { 4552219089Spjd fatal(0, "Commit callback threshold exceeded, oldest txg: %" 4553219089Spjd PRIu64 ", open txg: %" PRIu64 "\n", tmp_cb->zcd_txg, txg); 4554219089Spjd } 4555219089Spjd 4556219089Spjd /* 4557219089Spjd * Let's find the place to insert our callbacks. 4558219089Spjd * 4559219089Spjd * Even though the list is ordered by txg, it is possible for the 4560219089Spjd * insertion point to not be the end because our txg may already be 4561219089Spjd * quiescing at this point and other callbacks in the open txg 4562219089Spjd * (from other objsets) may have sneaked in. 4563219089Spjd */ 4564219089Spjd tmp_cb = list_tail(&zcl.zcl_callbacks); 4565219089Spjd while (tmp_cb != NULL && tmp_cb->zcd_txg > txg) 4566219089Spjd tmp_cb = list_prev(&zcl.zcl_callbacks, tmp_cb); 4567219089Spjd 4568219089Spjd /* Add the 3 callbacks to the list */ 4569219089Spjd for (i = 0; i < 3; i++) { 4570219089Spjd if (tmp_cb == NULL) 4571219089Spjd list_insert_head(&zcl.zcl_callbacks, cb_data[i]); 4572219089Spjd else 4573219089Spjd list_insert_after(&zcl.zcl_callbacks, tmp_cb, 4574219089Spjd cb_data[i]); 4575219089Spjd 4576219089Spjd cb_data[i]->zcd_added = B_TRUE; 4577219089Spjd VERIFY(!cb_data[i]->zcd_called); 4578219089Spjd 4579219089Spjd tmp_cb = cb_data[i]; 4580219089Spjd } 4581219089Spjd 4582219089Spjd (void) mutex_unlock(&zcl.zcl_callbacks_lock); 4583219089Spjd 4584219089Spjd dmu_tx_commit(tx); 4585219089Spjd} 4586219089Spjd 4587219089Spjd/* ARGSUSED */ 4588219089Spjdvoid 4589219089Spjdztest_dsl_prop_get_set(ztest_ds_t *zd, uint64_t id) 4590219089Spjd{ 4591219089Spjd zfs_prop_t proplist[] = { 4592219089Spjd ZFS_PROP_CHECKSUM, 4593219089Spjd ZFS_PROP_COMPRESSION, 4594219089Spjd ZFS_PROP_COPIES, 4595219089Spjd ZFS_PROP_DEDUP 4596219089Spjd }; 4597219089Spjd 4598236143Smm (void) rw_rdlock(&ztest_name_lock); 4599219089Spjd 4600219089Spjd for (int p = 0; p < sizeof (proplist) / sizeof (proplist[0]); p++) 4601219089Spjd (void) ztest_dsl_prop_set_uint64(zd->zd_name, proplist[p], 4602219089Spjd ztest_random_dsl_prop(proplist[p]), (int)ztest_random(2)); 4603219089Spjd 4604236143Smm (void) rw_unlock(&ztest_name_lock); 4605219089Spjd} 4606219089Spjd 4607219089Spjd/* ARGSUSED */ 4608219089Spjdvoid 4609219089Spjdztest_spa_prop_get_set(ztest_ds_t *zd, uint64_t id) 4610219089Spjd{ 4611219089Spjd nvlist_t *props = NULL; 4612219089Spjd 4613236143Smm (void) rw_rdlock(&ztest_name_lock); 4614219089Spjd 4615236143Smm (void) ztest_spa_prop_set_uint64(ZPOOL_PROP_DEDUPDITTO, 4616219089Spjd ZIO_DEDUPDITTO_MIN + ztest_random(ZIO_DEDUPDITTO_MIN)); 4617219089Spjd 4618240415Smm VERIFY0(spa_prop_get(ztest_spa, &props)); 4619219089Spjd 4620236143Smm if (ztest_opts.zo_verbose >= 6) 4621219089Spjd dump_nvlist(props, 4); 4622219089Spjd 4623219089Spjd nvlist_free(props); 4624219089Spjd 4625236143Smm (void) rw_unlock(&ztest_name_lock); 4626219089Spjd} 4627219089Spjd 4628248571Smmstatic int 4629248571Smmuser_release_one(const char *snapname, const char *holdname) 4630248571Smm{ 4631248571Smm nvlist_t *snaps, *holds; 4632248571Smm int error; 4633248571Smm 4634248571Smm snaps = fnvlist_alloc(); 4635248571Smm holds = fnvlist_alloc(); 4636248571Smm fnvlist_add_boolean(holds, holdname); 4637248571Smm fnvlist_add_nvlist(snaps, snapname, holds); 4638248571Smm fnvlist_free(holds); 4639248571Smm error = dsl_dataset_user_release(snaps, NULL); 4640248571Smm fnvlist_free(snaps); 4641248571Smm return (error); 4642248571Smm} 4643248571Smm 4644219089Spjd/* 4645219089Spjd * Test snapshot hold/release and deferred destroy. 4646219089Spjd */ 4647219089Spjdvoid 4648219089Spjdztest_dmu_snapshot_hold(ztest_ds_t *zd, uint64_t id) 4649219089Spjd{ 4650219089Spjd int error; 4651219089Spjd objset_t *os = zd->zd_os; 4652219089Spjd objset_t *origin; 4653219089Spjd char snapname[100]; 4654219089Spjd char fullname[100]; 4655219089Spjd char clonename[100]; 4656219089Spjd char tag[100]; 4657168404Spjd char osname[MAXNAMELEN]; 4658248571Smm nvlist_t *holds; 4659168404Spjd 4660236143Smm (void) rw_rdlock(&ztest_name_lock); 4661168404Spjd 4662168404Spjd dmu_objset_name(os, osname); 4663168404Spjd 4664248571Smm (void) snprintf(snapname, sizeof (snapname), "sh1_%llu", id); 4665248571Smm (void) snprintf(fullname, sizeof (fullname), "%s@%s", osname, snapname); 4666248571Smm (void) snprintf(clonename, sizeof (clonename), 4667248571Smm "%s/ch1_%llu", osname, id); 4668248571Smm (void) snprintf(tag, sizeof (tag), "tag_%llu", id); 4669219089Spjd 4670219089Spjd /* 4671219089Spjd * Clean up from any previous run. 4672219089Spjd */ 4673248571Smm error = dsl_destroy_head(clonename); 4674248571Smm if (error != ENOENT) 4675248571Smm ASSERT0(error); 4676248571Smm error = user_release_one(fullname, tag); 4677248571Smm if (error != ESRCH && error != ENOENT) 4678248571Smm ASSERT0(error); 4679248571Smm error = dsl_destroy_snapshot(fullname, B_FALSE); 4680248571Smm if (error != ENOENT) 4681248571Smm ASSERT0(error); 4682219089Spjd 4683219089Spjd /* 4684219089Spjd * Create snapshot, clone it, mark snap for deferred destroy, 4685219089Spjd * destroy clone, verify snap was also destroyed. 4686219089Spjd */ 4687248571Smm error = dmu_objset_snapshot_one(osname, snapname); 4688219089Spjd if (error) { 4689219089Spjd if (error == ENOSPC) { 4690219089Spjd ztest_record_enospc("dmu_objset_snapshot"); 4691219089Spjd goto out; 4692168404Spjd } 4693219089Spjd fatal(0, "dmu_objset_snapshot(%s) = %d", fullname, error); 4694219089Spjd } 4695168404Spjd 4696248571Smm error = dmu_objset_clone(clonename, fullname); 4697219089Spjd if (error) { 4698168404Spjd if (error == ENOSPC) { 4699219089Spjd ztest_record_enospc("dmu_objset_clone"); 4700219089Spjd goto out; 4701168404Spjd } 4702219089Spjd fatal(0, "dmu_objset_clone(%s) = %d", clonename, error); 4703219089Spjd } 4704168404Spjd 4705248571Smm error = dsl_destroy_snapshot(fullname, B_TRUE); 4706219089Spjd if (error) { 4707248571Smm fatal(0, "dsl_destroy_snapshot(%s, B_TRUE) = %d", 4708219089Spjd fullname, error); 4709219089Spjd } 4710168404Spjd 4711248571Smm error = dsl_destroy_head(clonename); 4712219089Spjd if (error) 4713248571Smm fatal(0, "dsl_destroy_head(%s) = %d", clonename, error); 4714168404Spjd 4715219089Spjd error = dmu_objset_hold(fullname, FTAG, &origin); 4716219089Spjd if (error != ENOENT) 4717219089Spjd fatal(0, "dmu_objset_hold(%s) = %d", fullname, error); 4718168404Spjd 4719219089Spjd /* 4720219089Spjd * Create snapshot, add temporary hold, verify that we can't 4721219089Spjd * destroy a held snapshot, mark for deferred destroy, 4722219089Spjd * release hold, verify snapshot was destroyed. 4723219089Spjd */ 4724248571Smm error = dmu_objset_snapshot_one(osname, snapname); 4725219089Spjd if (error) { 4726219089Spjd if (error == ENOSPC) { 4727219089Spjd ztest_record_enospc("dmu_objset_snapshot"); 4728219089Spjd goto out; 4729168404Spjd } 4730219089Spjd fatal(0, "dmu_objset_snapshot(%s) = %d", fullname, error); 4731168404Spjd } 4732168404Spjd 4733248571Smm holds = fnvlist_alloc(); 4734248571Smm fnvlist_add_string(holds, fullname, tag); 4735248571Smm error = dsl_dataset_user_hold(holds, 0, NULL); 4736248571Smm fnvlist_free(holds); 4737248571Smm 4738268075Sdelphij if (error == ENOSPC) { 4739268075Sdelphij ztest_record_enospc("dsl_dataset_user_hold"); 4740268075Sdelphij goto out; 4741268075Sdelphij } else if (error) { 4742268075Sdelphij fatal(0, "dsl_dataset_user_hold(%s, %s) = %u", 4743268075Sdelphij fullname, tag, error); 4744268075Sdelphij } 4745219089Spjd 4746248571Smm error = dsl_destroy_snapshot(fullname, B_FALSE); 4747219089Spjd if (error != EBUSY) { 4748248571Smm fatal(0, "dsl_destroy_snapshot(%s, B_FALSE) = %d", 4749219089Spjd fullname, error); 4750219089Spjd } 4751219089Spjd 4752248571Smm error = dsl_destroy_snapshot(fullname, B_TRUE); 4753219089Spjd if (error) { 4754248571Smm fatal(0, "dsl_destroy_snapshot(%s, B_TRUE) = %d", 4755219089Spjd fullname, error); 4756219089Spjd } 4757219089Spjd 4758248571Smm error = user_release_one(fullname, tag); 4759219089Spjd if (error) 4760251646Sdelphij fatal(0, "user_release_one(%s, %s) = %d", fullname, tag, error); 4761219089Spjd 4762248571Smm VERIFY3U(dmu_objset_hold(fullname, FTAG, &origin), ==, ENOENT); 4763219089Spjd 4764219089Spjdout: 4765236143Smm (void) rw_unlock(&ztest_name_lock); 4766168404Spjd} 4767168404Spjd 4768168404Spjd/* 4769168404Spjd * Inject random faults into the on-disk data. 4770168404Spjd */ 4771219089Spjd/* ARGSUSED */ 4772168404Spjdvoid 4773219089Spjdztest_fault_inject(ztest_ds_t *zd, uint64_t id) 4774168404Spjd{ 4775219089Spjd ztest_shared_t *zs = ztest_shared; 4776236143Smm spa_t *spa = ztest_spa; 4777168404Spjd int fd; 4778168404Spjd uint64_t offset; 4779219089Spjd uint64_t leaves; 4780168404Spjd uint64_t bad = 0x1990c0ffeedecadeULL; 4781168404Spjd uint64_t top, leaf; 4782168404Spjd char path0[MAXPATHLEN]; 4783168404Spjd char pathrand[MAXPATHLEN]; 4784168404Spjd size_t fsize; 4785274337Sdelphij int bshift = SPA_OLD_MAXBLOCKSHIFT + 2; /* don't scrog all labels */ 4786168404Spjd int iters = 1000; 4787219089Spjd int maxfaults; 4788219089Spjd int mirror_save; 4789185029Spjd vdev_t *vd0 = NULL; 4790168404Spjd uint64_t guid0 = 0; 4791219089Spjd boolean_t islog = B_FALSE; 4792168404Spjd 4793236143Smm VERIFY(mutex_lock(&ztest_vdev_lock) == 0); 4794219089Spjd maxfaults = MAXFAULTS(); 4795236143Smm leaves = MAX(zs->zs_mirrors, 1) * ztest_opts.zo_raidz; 4796219089Spjd mirror_save = zs->zs_mirrors; 4797236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 4798219089Spjd 4799185029Spjd ASSERT(leaves >= 1); 4800168404Spjd 4801168404Spjd /* 4802254074Sdelphij * Grab the name lock as reader. There are some operations 4803254074Sdelphij * which don't like to have their vdevs changed while 4804254074Sdelphij * they are in progress (i.e. spa_change_guid). Those 4805254074Sdelphij * operations will have grabbed the name lock as writer. 4806254074Sdelphij */ 4807254074Sdelphij (void) rw_rdlock(&ztest_name_lock); 4808254074Sdelphij 4809254074Sdelphij /* 4810185029Spjd * We need SCL_STATE here because we're going to look at vd0->vdev_tsd. 4811168404Spjd */ 4812185029Spjd spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 4813168404Spjd 4814185029Spjd if (ztest_random(2) == 0) { 4815185029Spjd /* 4816219089Spjd * Inject errors on a normal data device or slog device. 4817185029Spjd */ 4818219089Spjd top = ztest_random_vdev_top(spa, B_TRUE); 4819219089Spjd leaf = ztest_random(leaves) + zs->zs_splits; 4820168404Spjd 4821185029Spjd /* 4822185029Spjd * Generate paths to the first leaf in this top-level vdev, 4823185029Spjd * and to the random leaf we selected. We'll induce transient 4824185029Spjd * write failures and random online/offline activity on leaf 0, 4825185029Spjd * and we'll write random garbage to the randomly chosen leaf. 4826185029Spjd */ 4827185029Spjd (void) snprintf(path0, sizeof (path0), ztest_dev_template, 4828236143Smm ztest_opts.zo_dir, ztest_opts.zo_pool, 4829236143Smm top * leaves + zs->zs_splits); 4830185029Spjd (void) snprintf(pathrand, sizeof (pathrand), ztest_dev_template, 4831236143Smm ztest_opts.zo_dir, ztest_opts.zo_pool, 4832236143Smm top * leaves + leaf); 4833168404Spjd 4834185029Spjd vd0 = vdev_lookup_by_path(spa->spa_root_vdev, path0); 4835219089Spjd if (vd0 != NULL && vd0->vdev_top->vdev_islog) 4836219089Spjd islog = B_TRUE; 4837219089Spjd 4838254074Sdelphij /* 4839254074Sdelphij * If the top-level vdev needs to be resilvered 4840254074Sdelphij * then we only allow faults on the device that is 4841254074Sdelphij * resilvering. 4842254074Sdelphij */ 4843254074Sdelphij if (vd0 != NULL && maxfaults != 1 && 4844254074Sdelphij (!vdev_resilver_needed(vd0->vdev_top, NULL, NULL) || 4845254112Sdelphij vd0->vdev_resilver_txg != 0)) { 4846185029Spjd /* 4847185029Spjd * Make vd0 explicitly claim to be unreadable, 4848185029Spjd * or unwriteable, or reach behind its back 4849185029Spjd * and close the underlying fd. We can do this if 4850185029Spjd * maxfaults == 0 because we'll fail and reexecute, 4851185029Spjd * and we can do it if maxfaults >= 2 because we'll 4852185029Spjd * have enough redundancy. If maxfaults == 1, the 4853185029Spjd * combination of this with injection of random data 4854185029Spjd * corruption below exceeds the pool's fault tolerance. 4855185029Spjd */ 4856185029Spjd vdev_file_t *vf = vd0->vdev_tsd; 4857168404Spjd 4858185029Spjd if (vf != NULL && ztest_random(3) == 0) { 4859185029Spjd (void) close(vf->vf_vnode->v_fd); 4860185029Spjd vf->vf_vnode->v_fd = -1; 4861185029Spjd } else if (ztest_random(2) == 0) { 4862185029Spjd vd0->vdev_cant_read = B_TRUE; 4863185029Spjd } else { 4864185029Spjd vd0->vdev_cant_write = B_TRUE; 4865185029Spjd } 4866185029Spjd guid0 = vd0->vdev_guid; 4867185029Spjd } 4868185029Spjd } else { 4869185029Spjd /* 4870185029Spjd * Inject errors on an l2cache device. 4871185029Spjd */ 4872185029Spjd spa_aux_vdev_t *sav = &spa->spa_l2cache; 4873168404Spjd 4874185029Spjd if (sav->sav_count == 0) { 4875185029Spjd spa_config_exit(spa, SCL_STATE, FTAG); 4876254074Sdelphij (void) rw_unlock(&ztest_name_lock); 4877185029Spjd return; 4878185029Spjd } 4879185029Spjd vd0 = sav->sav_vdevs[ztest_random(sav->sav_count)]; 4880168404Spjd guid0 = vd0->vdev_guid; 4881185029Spjd (void) strcpy(path0, vd0->vdev_path); 4882185029Spjd (void) strcpy(pathrand, vd0->vdev_path); 4883185029Spjd 4884185029Spjd leaf = 0; 4885185029Spjd leaves = 1; 4886185029Spjd maxfaults = INT_MAX; /* no limit on cache devices */ 4887168404Spjd } 4888168404Spjd 4889185029Spjd spa_config_exit(spa, SCL_STATE, FTAG); 4890254074Sdelphij (void) rw_unlock(&ztest_name_lock); 4891185029Spjd 4892168404Spjd /* 4893219089Spjd * If we can tolerate two or more faults, or we're dealing 4894219089Spjd * with a slog, randomly online/offline vd0. 4895168404Spjd */ 4896219089Spjd if ((maxfaults >= 2 || islog) && guid0 != 0) { 4897209962Smm if (ztest_random(10) < 6) { 4898209962Smm int flags = (ztest_random(2) == 0 ? 4899209962Smm ZFS_OFFLINE_TEMPORARY : 0); 4900219089Spjd 4901219089Spjd /* 4902219089Spjd * We have to grab the zs_name_lock as writer to 4903219089Spjd * prevent a race between offlining a slog and 4904219089Spjd * destroying a dataset. Offlining the slog will 4905219089Spjd * grab a reference on the dataset which may cause 4906219089Spjd * dmu_objset_destroy() to fail with EBUSY thus 4907219089Spjd * leaving the dataset in an inconsistent state. 4908219089Spjd */ 4909219089Spjd if (islog) 4910236143Smm (void) rw_wrlock(&ztest_name_lock); 4911219089Spjd 4912209962Smm VERIFY(vdev_offline(spa, guid0, flags) != EBUSY); 4913219089Spjd 4914219089Spjd if (islog) 4915236143Smm (void) rw_unlock(&ztest_name_lock); 4916209962Smm } else { 4917242845Sdelphij /* 4918242845Sdelphij * Ideally we would like to be able to randomly 4919242845Sdelphij * call vdev_[on|off]line without holding locks 4920242845Sdelphij * to force unpredictable failures but the side 4921242845Sdelphij * effects of vdev_[on|off]line prevent us from 4922242845Sdelphij * doing so. We grab the ztest_vdev_lock here to 4923242845Sdelphij * prevent a race between injection testing and 4924242845Sdelphij * aux_vdev removal. 4925242845Sdelphij */ 4926242845Sdelphij VERIFY(mutex_lock(&ztest_vdev_lock) == 0); 4927209962Smm (void) vdev_online(spa, guid0, 0, NULL); 4928242845Sdelphij VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 4929209962Smm } 4930168404Spjd } 4931168404Spjd 4932219089Spjd if (maxfaults == 0) 4933219089Spjd return; 4934219089Spjd 4935168404Spjd /* 4936168404Spjd * We have at least single-fault tolerance, so inject data corruption. 4937168404Spjd */ 4938168404Spjd fd = open(pathrand, O_RDWR); 4939168404Spjd 4940168404Spjd if (fd == -1) /* we hit a gap in the device namespace */ 4941168404Spjd return; 4942168404Spjd 4943168404Spjd fsize = lseek(fd, 0, SEEK_END); 4944168404Spjd 4945168404Spjd while (--iters != 0) { 4946168404Spjd offset = ztest_random(fsize / (leaves << bshift)) * 4947168404Spjd (leaves << bshift) + (leaf << bshift) + 4948168404Spjd (ztest_random(1ULL << (bshift - 1)) & -8ULL); 4949168404Spjd 4950168404Spjd if (offset >= fsize) 4951168404Spjd continue; 4952168404Spjd 4953236143Smm VERIFY(mutex_lock(&ztest_vdev_lock) == 0); 4954219089Spjd if (mirror_save != zs->zs_mirrors) { 4955236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 4956219089Spjd (void) close(fd); 4957219089Spjd return; 4958219089Spjd } 4959168404Spjd 4960168404Spjd if (pwrite(fd, &bad, sizeof (bad), offset) != sizeof (bad)) 4961168404Spjd fatal(1, "can't inject bad word at 0x%llx in %s", 4962168404Spjd offset, pathrand); 4963219089Spjd 4964236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 4965219089Spjd 4966236143Smm if (ztest_opts.zo_verbose >= 7) 4967219089Spjd (void) printf("injected bad word into %s," 4968219089Spjd " offset 0x%llx\n", pathrand, (u_longlong_t)offset); 4969168404Spjd } 4970168404Spjd 4971168404Spjd (void) close(fd); 4972168404Spjd} 4973168404Spjd 4974168404Spjd/* 4975219089Spjd * Verify that DDT repair works as expected. 4976219089Spjd */ 4977219089Spjdvoid 4978219089Spjdztest_ddt_repair(ztest_ds_t *zd, uint64_t id) 4979219089Spjd{ 4980219089Spjd ztest_shared_t *zs = ztest_shared; 4981236143Smm spa_t *spa = ztest_spa; 4982219089Spjd objset_t *os = zd->zd_os; 4983219089Spjd ztest_od_t od[1]; 4984219089Spjd uint64_t object, blocksize, txg, pattern, psize; 4985219089Spjd enum zio_checksum checksum = spa_dedup_checksum(spa); 4986219089Spjd dmu_buf_t *db; 4987219089Spjd dmu_tx_t *tx; 4988219089Spjd void *buf; 4989219089Spjd blkptr_t blk; 4990219089Spjd int copies = 2 * ZIO_DEDUPDITTO_MIN; 4991219089Spjd 4992219089Spjd blocksize = ztest_random_blocksize(); 4993219089Spjd blocksize = MIN(blocksize, 2048); /* because we write so many */ 4994219089Spjd 4995219089Spjd ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize, 0); 4996219089Spjd 4997219089Spjd if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0) 4998219089Spjd return; 4999219089Spjd 5000219089Spjd /* 5001219089Spjd * Take the name lock as writer to prevent anyone else from changing 5002219089Spjd * the pool and dataset properies we need to maintain during this test. 5003219089Spjd */ 5004236143Smm (void) rw_wrlock(&ztest_name_lock); 5005219089Spjd 5006219089Spjd if (ztest_dsl_prop_set_uint64(zd->zd_name, ZFS_PROP_DEDUP, checksum, 5007219089Spjd B_FALSE) != 0 || 5008219089Spjd ztest_dsl_prop_set_uint64(zd->zd_name, ZFS_PROP_COPIES, 1, 5009219089Spjd B_FALSE) != 0) { 5010236143Smm (void) rw_unlock(&ztest_name_lock); 5011219089Spjd return; 5012219089Spjd } 5013219089Spjd 5014219089Spjd object = od[0].od_object; 5015219089Spjd blocksize = od[0].od_blocksize; 5016228103Smm pattern = zs->zs_guid ^ dmu_objset_fsid_guid(os); 5017219089Spjd 5018219089Spjd ASSERT(object != 0); 5019219089Spjd 5020219089Spjd tx = dmu_tx_create(os); 5021219089Spjd dmu_tx_hold_write(tx, object, 0, copies * blocksize); 5022219089Spjd txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); 5023219089Spjd if (txg == 0) { 5024236143Smm (void) rw_unlock(&ztest_name_lock); 5025219089Spjd return; 5026219089Spjd } 5027219089Spjd 5028219089Spjd /* 5029219089Spjd * Write all the copies of our block. 5030219089Spjd */ 5031219089Spjd for (int i = 0; i < copies; i++) { 5032219089Spjd uint64_t offset = i * blocksize; 5033248571Smm int error = dmu_buf_hold(os, object, offset, FTAG, &db, 5034248571Smm DMU_READ_NO_PREFETCH); 5035248571Smm if (error != 0) { 5036248571Smm fatal(B_FALSE, "dmu_buf_hold(%p, %llu, %llu) = %u", 5037248571Smm os, (long long)object, (long long) offset, error); 5038248571Smm } 5039219089Spjd ASSERT(db->db_offset == offset); 5040219089Spjd ASSERT(db->db_size == blocksize); 5041219089Spjd ASSERT(ztest_pattern_match(db->db_data, db->db_size, pattern) || 5042219089Spjd ztest_pattern_match(db->db_data, db->db_size, 0ULL)); 5043219089Spjd dmu_buf_will_fill(db, tx); 5044219089Spjd ztest_pattern_set(db->db_data, db->db_size, pattern); 5045219089Spjd dmu_buf_rele(db, FTAG); 5046219089Spjd } 5047219089Spjd 5048219089Spjd dmu_tx_commit(tx); 5049219089Spjd txg_wait_synced(spa_get_dsl(spa), txg); 5050219089Spjd 5051219089Spjd /* 5052219089Spjd * Find out what block we got. 5053219089Spjd */ 5054243524Smm VERIFY0(dmu_buf_hold(os, object, 0, FTAG, &db, 5055243524Smm DMU_READ_NO_PREFETCH)); 5056219089Spjd blk = *((dmu_buf_impl_t *)db)->db_blkptr; 5057219089Spjd dmu_buf_rele(db, FTAG); 5058219089Spjd 5059219089Spjd /* 5060219089Spjd * Damage the block. Dedup-ditto will save us when we read it later. 5061219089Spjd */ 5062219089Spjd psize = BP_GET_PSIZE(&blk); 5063219089Spjd buf = zio_buf_alloc(psize); 5064219089Spjd ztest_pattern_set(buf, psize, ~pattern); 5065219089Spjd 5066219089Spjd (void) zio_wait(zio_rewrite(NULL, spa, 0, &blk, 5067219089Spjd buf, psize, NULL, NULL, ZIO_PRIORITY_SYNC_WRITE, 5068219089Spjd ZIO_FLAG_CANFAIL | ZIO_FLAG_INDUCE_DAMAGE, NULL)); 5069219089Spjd 5070219089Spjd zio_buf_free(buf, psize); 5071219089Spjd 5072236143Smm (void) rw_unlock(&ztest_name_lock); 5073219089Spjd} 5074219089Spjd 5075219089Spjd/* 5076168404Spjd * Scrub the pool. 5077168404Spjd */ 5078219089Spjd/* ARGSUSED */ 5079168404Spjdvoid 5080219089Spjdztest_scrub(ztest_ds_t *zd, uint64_t id) 5081168404Spjd{ 5082236143Smm spa_t *spa = ztest_spa; 5083168404Spjd 5084219089Spjd (void) spa_scan(spa, POOL_SCAN_SCRUB); 5085219089Spjd (void) poll(NULL, 0, 100); /* wait a moment, then force a restart */ 5086219089Spjd (void) spa_scan(spa, POOL_SCAN_SCRUB); 5087168404Spjd} 5088168404Spjd 5089168404Spjd/* 5090228103Smm * Change the guid for the pool. 5091228103Smm */ 5092228103Smm/* ARGSUSED */ 5093228103Smmvoid 5094228103Smmztest_reguid(ztest_ds_t *zd, uint64_t id) 5095228103Smm{ 5096236143Smm spa_t *spa = ztest_spa; 5097228103Smm uint64_t orig, load; 5098239620Smm int error; 5099228103Smm 5100228103Smm orig = spa_guid(spa); 5101228103Smm load = spa_load_guid(spa); 5102239620Smm 5103239620Smm (void) rw_wrlock(&ztest_name_lock); 5104239620Smm error = spa_change_guid(spa); 5105239620Smm (void) rw_unlock(&ztest_name_lock); 5106239620Smm 5107239620Smm if (error != 0) 5108228103Smm return; 5109228103Smm 5110243505Smm if (ztest_opts.zo_verbose >= 4) { 5111228103Smm (void) printf("Changed guid old %llu -> %llu\n", 5112228103Smm (u_longlong_t)orig, (u_longlong_t)spa_guid(spa)); 5113228103Smm } 5114228103Smm 5115228103Smm VERIFY3U(orig, !=, spa_guid(spa)); 5116228103Smm VERIFY3U(load, ==, spa_load_guid(spa)); 5117228103Smm} 5118228103Smm 5119228103Smm/* 5120168404Spjd * Rename the pool to a different name and then rename it back. 5121168404Spjd */ 5122219089Spjd/* ARGSUSED */ 5123168404Spjdvoid 5124219089Spjdztest_spa_rename(ztest_ds_t *zd, uint64_t id) 5125168404Spjd{ 5126168404Spjd char *oldname, *newname; 5127168404Spjd spa_t *spa; 5128168404Spjd 5129236143Smm (void) rw_wrlock(&ztest_name_lock); 5130168404Spjd 5131236143Smm oldname = ztest_opts.zo_pool; 5132168404Spjd newname = umem_alloc(strlen(oldname) + 5, UMEM_NOFAIL); 5133168404Spjd (void) strcpy(newname, oldname); 5134168404Spjd (void) strcat(newname, "_tmp"); 5135168404Spjd 5136168404Spjd /* 5137168404Spjd * Do the rename 5138168404Spjd */ 5139219089Spjd VERIFY3U(0, ==, spa_rename(oldname, newname)); 5140168404Spjd 5141168404Spjd /* 5142168404Spjd * Try to open it under the old name, which shouldn't exist 5143168404Spjd */ 5144219089Spjd VERIFY3U(ENOENT, ==, spa_open(oldname, &spa, FTAG)); 5145168404Spjd 5146168404Spjd /* 5147168404Spjd * Open it under the new name and make sure it's still the same spa_t. 5148168404Spjd */ 5149219089Spjd VERIFY3U(0, ==, spa_open(newname, &spa, FTAG)); 5150168404Spjd 5151236143Smm ASSERT(spa == ztest_spa); 5152168404Spjd spa_close(spa, FTAG); 5153168404Spjd 5154168404Spjd /* 5155168404Spjd * Rename it back to the original 5156168404Spjd */ 5157219089Spjd VERIFY3U(0, ==, spa_rename(newname, oldname)); 5158168404Spjd 5159168404Spjd /* 5160168404Spjd * Make sure it can still be opened 5161168404Spjd */ 5162219089Spjd VERIFY3U(0, ==, spa_open(oldname, &spa, FTAG)); 5163168404Spjd 5164236143Smm ASSERT(spa == ztest_spa); 5165168404Spjd spa_close(spa, FTAG); 5166168404Spjd 5167168404Spjd umem_free(newname, strlen(newname) + 1); 5168168404Spjd 5169236143Smm (void) rw_unlock(&ztest_name_lock); 5170168404Spjd} 5171168404Spjd 5172168404Spjd/* 5173219089Spjd * Verify pool integrity by running zdb. 5174168404Spjd */ 5175168404Spjdstatic void 5176219089Spjdztest_run_zdb(char *pool) 5177168404Spjd{ 5178168404Spjd int status; 5179168404Spjd char zdb[MAXPATHLEN + MAXNAMELEN + 20]; 5180168404Spjd char zbuf[1024]; 5181168404Spjd char *bin; 5182185029Spjd char *ztest; 5183185029Spjd char *isa; 5184185029Spjd int isalen; 5185168404Spjd FILE *fp; 5186168404Spjd 5187214623Spjd strlcpy(zdb, "/usr/bin/ztest", sizeof(zdb)); 5188168404Spjd 5189168404Spjd /* zdb lives in /usr/sbin, while ztest lives in /usr/bin */ 5190168404Spjd bin = strstr(zdb, "/usr/bin/"); 5191185029Spjd ztest = strstr(bin, "/ztest"); 5192185029Spjd isa = bin + 8; 5193185029Spjd isalen = ztest - isa; 5194185029Spjd isa = strdup(isa); 5195168404Spjd /* LINTED */ 5196185029Spjd (void) sprintf(bin, 5197268075Sdelphij "/usr/sbin%.*s/zdb -bcc%s%s -d -U %s %s", 5198185029Spjd isalen, 5199185029Spjd isa, 5200236143Smm ztest_opts.zo_verbose >= 3 ? "s" : "", 5201236143Smm ztest_opts.zo_verbose >= 4 ? "v" : "", 5202219089Spjd spa_config_path, 5203208047Smm pool); 5204185029Spjd free(isa); 5205168404Spjd 5206236143Smm if (ztest_opts.zo_verbose >= 5) 5207168404Spjd (void) printf("Executing %s\n", strstr(zdb, "zdb ")); 5208168404Spjd 5209168404Spjd fp = popen(zdb, "r"); 5210168404Spjd assert(fp != NULL); 5211168404Spjd 5212168404Spjd while (fgets(zbuf, sizeof (zbuf), fp) != NULL) 5213236143Smm if (ztest_opts.zo_verbose >= 3) 5214168404Spjd (void) printf("%s", zbuf); 5215168404Spjd 5216168404Spjd status = pclose(fp); 5217168404Spjd 5218168404Spjd if (status == 0) 5219168404Spjd return; 5220168404Spjd 5221168404Spjd ztest_dump_core = 0; 5222168404Spjd if (WIFEXITED(status)) 5223168404Spjd fatal(0, "'%s' exit code %d", zdb, WEXITSTATUS(status)); 5224168404Spjd else 5225168404Spjd fatal(0, "'%s' died with signal %d", zdb, WTERMSIG(status)); 5226168404Spjd} 5227168404Spjd 5228168404Spjdstatic void 5229168404Spjdztest_walk_pool_directory(char *header) 5230168404Spjd{ 5231168404Spjd spa_t *spa = NULL; 5232168404Spjd 5233236143Smm if (ztest_opts.zo_verbose >= 6) 5234168404Spjd (void) printf("%s\n", header); 5235168404Spjd 5236168404Spjd mutex_enter(&spa_namespace_lock); 5237168404Spjd while ((spa = spa_next(spa)) != NULL) 5238236143Smm if (ztest_opts.zo_verbose >= 6) 5239168404Spjd (void) printf("\t%s\n", spa_name(spa)); 5240168404Spjd mutex_exit(&spa_namespace_lock); 5241168404Spjd} 5242168404Spjd 5243168404Spjdstatic void 5244168404Spjdztest_spa_import_export(char *oldname, char *newname) 5245168404Spjd{ 5246209962Smm nvlist_t *config, *newconfig; 5247168404Spjd uint64_t pool_guid; 5248168404Spjd spa_t *spa; 5249248571Smm int error; 5250168404Spjd 5251236143Smm if (ztest_opts.zo_verbose >= 4) { 5252168404Spjd (void) printf("import/export: old = %s, new = %s\n", 5253168404Spjd oldname, newname); 5254168404Spjd } 5255168404Spjd 5256168404Spjd /* 5257168404Spjd * Clean up from previous runs. 5258168404Spjd */ 5259168404Spjd (void) spa_destroy(newname); 5260168404Spjd 5261168404Spjd /* 5262168404Spjd * Get the pool's configuration and guid. 5263168404Spjd */ 5264219089Spjd VERIFY3U(0, ==, spa_open(oldname, &spa, FTAG)); 5265168404Spjd 5266209962Smm /* 5267209962Smm * Kick off a scrub to tickle scrub/export races. 5268209962Smm */ 5269209962Smm if (ztest_random(2) == 0) 5270219089Spjd (void) spa_scan(spa, POOL_SCAN_SCRUB); 5271209962Smm 5272168404Spjd pool_guid = spa_guid(spa); 5273168404Spjd spa_close(spa, FTAG); 5274168404Spjd 5275168404Spjd ztest_walk_pool_directory("pools before export"); 5276168404Spjd 5277168404Spjd /* 5278168404Spjd * Export it. 5279168404Spjd */ 5280219089Spjd VERIFY3U(0, ==, spa_export(oldname, &config, B_FALSE, B_FALSE)); 5281168404Spjd 5282168404Spjd ztest_walk_pool_directory("pools after export"); 5283168404Spjd 5284168404Spjd /* 5285209962Smm * Try to import it. 5286209962Smm */ 5287209962Smm newconfig = spa_tryimport(config); 5288209962Smm ASSERT(newconfig != NULL); 5289209962Smm nvlist_free(newconfig); 5290209962Smm 5291209962Smm /* 5292168404Spjd * Import it under the new name. 5293168404Spjd */ 5294248571Smm error = spa_import(newname, config, NULL, 0); 5295248571Smm if (error != 0) { 5296248571Smm dump_nvlist(config, 0); 5297248571Smm fatal(B_FALSE, "couldn't import pool %s as %s: error %u", 5298248571Smm oldname, newname, error); 5299248571Smm } 5300168404Spjd 5301168404Spjd ztest_walk_pool_directory("pools after import"); 5302168404Spjd 5303168404Spjd /* 5304168404Spjd * Try to import it again -- should fail with EEXIST. 5305168404Spjd */ 5306219089Spjd VERIFY3U(EEXIST, ==, spa_import(newname, config, NULL, 0)); 5307168404Spjd 5308168404Spjd /* 5309168404Spjd * Try to import it under a different name -- should fail with EEXIST. 5310168404Spjd */ 5311219089Spjd VERIFY3U(EEXIST, ==, spa_import(oldname, config, NULL, 0)); 5312168404Spjd 5313168404Spjd /* 5314168404Spjd * Verify that the pool is no longer visible under the old name. 5315168404Spjd */ 5316219089Spjd VERIFY3U(ENOENT, ==, spa_open(oldname, &spa, FTAG)); 5317168404Spjd 5318168404Spjd /* 5319168404Spjd * Verify that we can open and close the pool using the new name. 5320168404Spjd */ 5321219089Spjd VERIFY3U(0, ==, spa_open(newname, &spa, FTAG)); 5322168404Spjd ASSERT(pool_guid == spa_guid(spa)); 5323168404Spjd spa_close(spa, FTAG); 5324168404Spjd 5325168404Spjd nvlist_free(config); 5326168404Spjd} 5327168404Spjd 5328209962Smmstatic void 5329209962Smmztest_resume(spa_t *spa) 5330209962Smm{ 5331236143Smm if (spa_suspended(spa) && ztest_opts.zo_verbose >= 6) 5332219089Spjd (void) printf("resuming from suspended state\n"); 5333219089Spjd spa_vdev_state_enter(spa, SCL_NONE); 5334219089Spjd vdev_clear(spa, NULL); 5335219089Spjd (void) spa_vdev_state_exit(spa, NULL, 0); 5336219089Spjd (void) zio_resume(spa); 5337209962Smm} 5338209962Smm 5339168404Spjdstatic void * 5340209962Smmztest_resume_thread(void *arg) 5341185029Spjd{ 5342185029Spjd spa_t *spa = arg; 5343185029Spjd 5344185029Spjd while (!ztest_exiting) { 5345219089Spjd if (spa_suspended(spa)) 5346219089Spjd ztest_resume(spa); 5347219089Spjd (void) poll(NULL, 0, 100); 5348185029Spjd } 5349185029Spjd return (NULL); 5350185029Spjd} 5351185029Spjd 5352185029Spjdstatic void * 5353219089Spjdztest_deadman_thread(void *arg) 5354219089Spjd{ 5355219089Spjd ztest_shared_t *zs = arg; 5356254074Sdelphij spa_t *spa = ztest_spa; 5357254074Sdelphij hrtime_t delta, total = 0; 5358219089Spjd 5359254074Sdelphij for (;;) { 5360258632Savg delta = zs->zs_thread_stop - zs->zs_thread_start + 5361258632Savg MSEC2NSEC(zfs_deadman_synctime_ms); 5362219089Spjd 5363258632Savg (void) poll(NULL, 0, (int)NSEC2MSEC(delta)); 5364219089Spjd 5365254074Sdelphij /* 5366254074Sdelphij * If the pool is suspended then fail immediately. Otherwise, 5367254074Sdelphij * check to see if the pool is making any progress. If 5368254074Sdelphij * vdev_deadman() discovers that there hasn't been any recent 5369254074Sdelphij * I/Os then it will end up aborting the tests. 5370254074Sdelphij */ 5371258717Savg if (spa_suspended(spa) || spa->spa_root_vdev == NULL) { 5372254074Sdelphij fatal(0, "aborting test after %llu seconds because " 5373254074Sdelphij "pool has transitioned to a suspended state.", 5374258632Savg zfs_deadman_synctime_ms / 1000); 5375254074Sdelphij return (NULL); 5376254074Sdelphij } 5377254074Sdelphij vdev_deadman(spa->spa_root_vdev); 5378219089Spjd 5379258632Savg total += zfs_deadman_synctime_ms/1000; 5380254074Sdelphij (void) printf("ztest has been running for %lld seconds\n", 5381254074Sdelphij total); 5382254074Sdelphij } 5383219089Spjd} 5384219089Spjd 5385219089Spjdstatic void 5386236143Smmztest_execute(int test, ztest_info_t *zi, uint64_t id) 5387219089Spjd{ 5388236143Smm ztest_ds_t *zd = &ztest_ds[id % ztest_opts.zo_datasets]; 5389236143Smm ztest_shared_callstate_t *zc = ZTEST_GET_SHARED_CALLSTATE(test); 5390219089Spjd hrtime_t functime = gethrtime(); 5391219089Spjd 5392219089Spjd for (int i = 0; i < zi->zi_iters; i++) 5393219089Spjd zi->zi_func(zd, id); 5394219089Spjd 5395219089Spjd functime = gethrtime() - functime; 5396219089Spjd 5397236143Smm atomic_add_64(&zc->zc_count, 1); 5398236143Smm atomic_add_64(&zc->zc_time, functime); 5399219089Spjd 5400236143Smm if (ztest_opts.zo_verbose >= 4) { 5401219089Spjd Dl_info dli; 5402219089Spjd (void) dladdr((void *)zi->zi_func, &dli); 5403219089Spjd (void) printf("%6.2f sec in %s\n", 5404219089Spjd (double)functime / NANOSEC, dli.dli_sname); 5405219089Spjd } 5406219089Spjd} 5407219089Spjd 5408219089Spjdstatic void * 5409168404Spjdztest_thread(void *arg) 5410168404Spjd{ 5411236143Smm int rand; 5412219089Spjd uint64_t id = (uintptr_t)arg; 5413168404Spjd ztest_shared_t *zs = ztest_shared; 5414219089Spjd uint64_t call_next; 5415219089Spjd hrtime_t now; 5416168404Spjd ztest_info_t *zi; 5417236143Smm ztest_shared_callstate_t *zc; 5418168404Spjd 5419219089Spjd while ((now = gethrtime()) < zs->zs_thread_stop) { 5420168404Spjd /* 5421168404Spjd * See if it's time to force a crash. 5422168404Spjd */ 5423219089Spjd if (now > zs->zs_thread_kill) 5424219089Spjd ztest_kill(zs); 5425168404Spjd 5426168404Spjd /* 5427219089Spjd * If we're getting ENOSPC with some regularity, stop. 5428168404Spjd */ 5429219089Spjd if (zs->zs_enospc_count > 10) 5430219089Spjd break; 5431168404Spjd 5432168404Spjd /* 5433219089Spjd * Pick a random function to execute. 5434168404Spjd */ 5435236143Smm rand = ztest_random(ZTEST_FUNCS); 5436236143Smm zi = &ztest_info[rand]; 5437236143Smm zc = ZTEST_GET_SHARED_CALLSTATE(rand); 5438236143Smm call_next = zc->zc_next; 5439168404Spjd 5440219089Spjd if (now >= call_next && 5441236143Smm atomic_cas_64(&zc->zc_next, call_next, call_next + 5442236143Smm ztest_random(2 * zi->zi_interval[0] + 1)) == call_next) { 5443236143Smm ztest_execute(rand, zi, id); 5444236143Smm } 5445219089Spjd } 5446168404Spjd 5447219089Spjd return (NULL); 5448219089Spjd} 5449168404Spjd 5450219089Spjdstatic void 5451219089Spjdztest_dataset_name(char *dsname, char *pool, int d) 5452219089Spjd{ 5453219089Spjd (void) snprintf(dsname, MAXNAMELEN, "%s/ds_%d", pool, d); 5454219089Spjd} 5455168404Spjd 5456219089Spjdstatic void 5457236143Smmztest_dataset_destroy(int d) 5458219089Spjd{ 5459219089Spjd char name[MAXNAMELEN]; 5460168404Spjd 5461236143Smm ztest_dataset_name(name, ztest_opts.zo_pool, d); 5462168404Spjd 5463236143Smm if (ztest_opts.zo_verbose >= 3) 5464219089Spjd (void) printf("Destroying %s to free up space\n", name); 5465168404Spjd 5466219089Spjd /* 5467219089Spjd * Cleanup any non-standard clones and snapshots. In general, 5468219089Spjd * ztest thread t operates on dataset (t % zopt_datasets), 5469219089Spjd * so there may be more than one thing to clean up. 5470219089Spjd */ 5471236143Smm for (int t = d; t < ztest_opts.zo_threads; 5472236143Smm t += ztest_opts.zo_datasets) { 5473219089Spjd ztest_dsl_dataset_cleanup(name, t); 5474236143Smm } 5475219089Spjd 5476219089Spjd (void) dmu_objset_find(name, ztest_objset_destroy_cb, NULL, 5477219089Spjd DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN); 5478219089Spjd} 5479219089Spjd 5480219089Spjdstatic void 5481219089Spjdztest_dataset_dirobj_verify(ztest_ds_t *zd) 5482219089Spjd{ 5483219089Spjd uint64_t usedobjs, dirobjs, scratch; 5484219089Spjd 5485219089Spjd /* 5486219089Spjd * ZTEST_DIROBJ is the object directory for the entire dataset. 5487219089Spjd * Therefore, the number of objects in use should equal the 5488219089Spjd * number of ZTEST_DIROBJ entries, +1 for ZTEST_DIROBJ itself. 5489219089Spjd * If not, we have an object leak. 5490219089Spjd * 5491219089Spjd * Note that we can only check this in ztest_dataset_open(), 5492219089Spjd * when the open-context and syncing-context values agree. 5493219089Spjd * That's because zap_count() returns the open-context value, 5494219089Spjd * while dmu_objset_space() returns the rootbp fill count. 5495219089Spjd */ 5496219089Spjd VERIFY3U(0, ==, zap_count(zd->zd_os, ZTEST_DIROBJ, &dirobjs)); 5497219089Spjd dmu_objset_space(zd->zd_os, &scratch, &scratch, &usedobjs, &scratch); 5498219089Spjd ASSERT3U(dirobjs + 1, ==, usedobjs); 5499219089Spjd} 5500219089Spjd 5501219089Spjdstatic int 5502236143Smmztest_dataset_open(int d) 5503219089Spjd{ 5504236143Smm ztest_ds_t *zd = &ztest_ds[d]; 5505236143Smm uint64_t committed_seq = ZTEST_GET_SHARED_DS(d)->zd_seq; 5506219089Spjd objset_t *os; 5507219089Spjd zilog_t *zilog; 5508219089Spjd char name[MAXNAMELEN]; 5509219089Spjd int error; 5510219089Spjd 5511236143Smm ztest_dataset_name(name, ztest_opts.zo_pool, d); 5512219089Spjd 5513236143Smm (void) rw_rdlock(&ztest_name_lock); 5514219089Spjd 5515219089Spjd error = ztest_dataset_create(name); 5516219089Spjd if (error == ENOSPC) { 5517236143Smm (void) rw_unlock(&ztest_name_lock); 5518219089Spjd ztest_record_enospc(FTAG); 5519219089Spjd return (error); 5520168404Spjd } 5521219089Spjd ASSERT(error == 0 || error == EEXIST); 5522168404Spjd 5523248571Smm VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, zd, &os)); 5524236143Smm (void) rw_unlock(&ztest_name_lock); 5525219089Spjd 5526236143Smm ztest_zd_init(zd, ZTEST_GET_SHARED_DS(d), os); 5527219089Spjd 5528219089Spjd zilog = zd->zd_zilog; 5529219089Spjd 5530219089Spjd if (zilog->zl_header->zh_claim_lr_seq != 0 && 5531219089Spjd zilog->zl_header->zh_claim_lr_seq < committed_seq) 5532219089Spjd fatal(0, "missing log records: claimed %llu < committed %llu", 5533219089Spjd zilog->zl_header->zh_claim_lr_seq, committed_seq); 5534219089Spjd 5535219089Spjd ztest_dataset_dirobj_verify(zd); 5536219089Spjd 5537219089Spjd zil_replay(os, zd, ztest_replay_vector); 5538219089Spjd 5539219089Spjd ztest_dataset_dirobj_verify(zd); 5540219089Spjd 5541236143Smm if (ztest_opts.zo_verbose >= 6) 5542219089Spjd (void) printf("%s replay %llu blocks, %llu records, seq %llu\n", 5543219089Spjd zd->zd_name, 5544219089Spjd (u_longlong_t)zilog->zl_parse_blk_count, 5545219089Spjd (u_longlong_t)zilog->zl_parse_lr_count, 5546219089Spjd (u_longlong_t)zilog->zl_replaying_seq); 5547219089Spjd 5548219089Spjd zilog = zil_open(os, ztest_get_data); 5549219089Spjd 5550219089Spjd if (zilog->zl_replaying_seq != 0 && 5551219089Spjd zilog->zl_replaying_seq < committed_seq) 5552219089Spjd fatal(0, "missing log records: replayed %llu < committed %llu", 5553219089Spjd zilog->zl_replaying_seq, committed_seq); 5554219089Spjd 5555219089Spjd return (0); 5556168404Spjd} 5557168404Spjd 5558219089Spjdstatic void 5559236143Smmztest_dataset_close(int d) 5560219089Spjd{ 5561236143Smm ztest_ds_t *zd = &ztest_ds[d]; 5562219089Spjd 5563219089Spjd zil_close(zd->zd_zilog); 5564248571Smm dmu_objset_disown(zd->zd_os, zd); 5565219089Spjd 5566219089Spjd ztest_zd_fini(zd); 5567219089Spjd} 5568219089Spjd 5569168404Spjd/* 5570168404Spjd * Kick off threads to run tests on all datasets in parallel. 5571168404Spjd */ 5572168404Spjdstatic void 5573219089Spjdztest_run(ztest_shared_t *zs) 5574168404Spjd{ 5575219089Spjd thread_t *tid; 5576168404Spjd spa_t *spa; 5577228103Smm objset_t *os; 5578185029Spjd thread_t resume_tid; 5579219089Spjd int error; 5580168404Spjd 5581185029Spjd ztest_exiting = B_FALSE; 5582185029Spjd 5583168404Spjd /* 5584219089Spjd * Initialize parent/child shared state. 5585168404Spjd */ 5586236143Smm VERIFY(_mutex_init(&ztest_vdev_lock, USYNC_THREAD, NULL) == 0); 5587236143Smm VERIFY(rwlock_init(&ztest_name_lock, USYNC_THREAD, NULL) == 0); 5588168404Spjd 5589219089Spjd zs->zs_thread_start = gethrtime(); 5590236143Smm zs->zs_thread_stop = 5591236143Smm zs->zs_thread_start + ztest_opts.zo_passtime * NANOSEC; 5592219089Spjd zs->zs_thread_stop = MIN(zs->zs_thread_stop, zs->zs_proc_stop); 5593219089Spjd zs->zs_thread_kill = zs->zs_thread_stop; 5594236143Smm if (ztest_random(100) < ztest_opts.zo_killrate) { 5595236143Smm zs->zs_thread_kill -= 5596236143Smm ztest_random(ztest_opts.zo_passtime * NANOSEC); 5597236143Smm } 5598168404Spjd 5599219089Spjd (void) _mutex_init(&zcl.zcl_callbacks_lock, USYNC_THREAD, NULL); 5600168404Spjd 5601219089Spjd list_create(&zcl.zcl_callbacks, sizeof (ztest_cb_data_t), 5602219089Spjd offsetof(ztest_cb_data_t, zcd_node)); 5603168404Spjd 5604168404Spjd /* 5605219089Spjd * Open our pool. 5606168404Spjd */ 5607219089Spjd kernel_init(FREAD | FWRITE); 5608248571Smm VERIFY0(spa_open(ztest_opts.zo_pool, &spa, FTAG)); 5609224177Smm spa->spa_debug = B_TRUE; 5610268086Sdelphij metaslab_preload_limit = ztest_random(20) + 1; 5611236143Smm ztest_spa = spa; 5612168404Spjd 5613248571Smm VERIFY0(dmu_objset_own(ztest_opts.zo_pool, 5614248571Smm DMU_OST_ANY, B_TRUE, FTAG, &os)); 5615228103Smm zs->zs_guid = dmu_objset_fsid_guid(os); 5616248571Smm dmu_objset_disown(os, FTAG); 5617228103Smm 5618219089Spjd spa->spa_dedup_ditto = 2 * ZIO_DEDUPDITTO_MIN; 5619168404Spjd 5620168404Spjd /* 5621209962Smm * We don't expect the pool to suspend unless maxfaults == 0, 5622209962Smm * in which case ztest_fault_inject() temporarily takes away 5623209962Smm * the only valid replica. 5624209962Smm */ 5625219089Spjd if (MAXFAULTS() == 0) 5626209962Smm spa->spa_failmode = ZIO_FAILURE_MODE_WAIT; 5627209962Smm else 5628209962Smm spa->spa_failmode = ZIO_FAILURE_MODE_PANIC; 5629209962Smm 5630209962Smm /* 5631185029Spjd * Create a thread to periodically resume suspended I/O. 5632185029Spjd */ 5633209962Smm VERIFY(thr_create(0, 0, ztest_resume_thread, spa, THR_BOUND, 5634185029Spjd &resume_tid) == 0); 5635185029Spjd 5636185029Spjd /* 5637219089Spjd * Create a deadman thread to abort() if we hang. 5638219089Spjd */ 5639219089Spjd VERIFY(thr_create(0, 0, ztest_deadman_thread, zs, THR_BOUND, 5640219089Spjd NULL) == 0); 5641219089Spjd 5642219089Spjd /* 5643168404Spjd * Verify that we can safely inquire about about any object, 5644168404Spjd * whether it's allocated or not. To make it interesting, 5645168404Spjd * we probe a 5-wide window around each power of two. 5646168404Spjd * This hits all edge cases, including zero and the max. 5647168404Spjd */ 5648219089Spjd for (int t = 0; t < 64; t++) { 5649219089Spjd for (int d = -5; d <= 5; d++) { 5650168404Spjd error = dmu_object_info(spa->spa_meta_objset, 5651168404Spjd (1ULL << t) + d, NULL); 5652168404Spjd ASSERT(error == 0 || error == ENOENT || 5653168404Spjd error == EINVAL); 5654168404Spjd } 5655168404Spjd } 5656168404Spjd 5657168404Spjd /* 5658219089Spjd * If we got any ENOSPC errors on the previous run, destroy something. 5659168404Spjd */ 5660219089Spjd if (zs->zs_enospc_count != 0) { 5661236143Smm int d = ztest_random(ztest_opts.zo_datasets); 5662236143Smm ztest_dataset_destroy(d); 5663219089Spjd } 5664168404Spjd zs->zs_enospc_count = 0; 5665168404Spjd 5666236143Smm tid = umem_zalloc(ztest_opts.zo_threads * sizeof (thread_t), 5667236143Smm UMEM_NOFAIL); 5668168404Spjd 5669236143Smm if (ztest_opts.zo_verbose >= 4) 5670168404Spjd (void) printf("starting main threads...\n"); 5671168404Spjd 5672219089Spjd /* 5673219089Spjd * Kick off all the tests that run in parallel. 5674219089Spjd */ 5675236143Smm for (int t = 0; t < ztest_opts.zo_threads; t++) { 5676236143Smm if (t < ztest_opts.zo_datasets && 5677236143Smm ztest_dataset_open(t) != 0) 5678219089Spjd return; 5679219089Spjd VERIFY(thr_create(0, 0, ztest_thread, (void *)(uintptr_t)t, 5680219089Spjd THR_BOUND, &tid[t]) == 0); 5681219089Spjd } 5682168404Spjd 5683219089Spjd /* 5684219089Spjd * Wait for all of the tests to complete. We go in reverse order 5685219089Spjd * so we don't close datasets while threads are still using them. 5686219089Spjd */ 5687236143Smm for (int t = ztest_opts.zo_threads - 1; t >= 0; t--) { 5688219089Spjd VERIFY(thr_join(tid[t], NULL, NULL) == 0); 5689236143Smm if (t < ztest_opts.zo_datasets) 5690236143Smm ztest_dataset_close(t); 5691219089Spjd } 5692185029Spjd 5693219089Spjd txg_wait_synced(spa_get_dsl(spa), 0); 5694185029Spjd 5695219089Spjd zs->zs_alloc = metaslab_class_get_alloc(spa_normal_class(spa)); 5696219089Spjd zs->zs_space = metaslab_class_get_space(spa_normal_class(spa)); 5697254112Sdelphij zfs_dbgmsg_print(FTAG); 5698168404Spjd 5699236143Smm umem_free(tid, ztest_opts.zo_threads * sizeof (thread_t)); 5700168404Spjd 5701219089Spjd /* Kill the resume thread */ 5702219089Spjd ztest_exiting = B_TRUE; 5703219089Spjd VERIFY(thr_join(resume_tid, NULL, NULL) == 0); 5704219089Spjd ztest_resume(spa); 5705219089Spjd 5706219089Spjd /* 5707219089Spjd * Right before closing the pool, kick off a bunch of async I/O; 5708219089Spjd * spa_close() should wait for it to complete. 5709219089Spjd */ 5710286705Smav for (uint64_t object = 1; object < 50; object++) { 5711286705Smav dmu_prefetch(spa->spa_meta_objset, object, 0, 0, 1ULL << 20, 5712286705Smav ZIO_PRIORITY_SYNC_READ); 5713286705Smav } 5714219089Spjd 5715219089Spjd spa_close(spa, FTAG); 5716219089Spjd 5717219089Spjd /* 5718219089Spjd * Verify that we can loop over all pools. 5719219089Spjd */ 5720219089Spjd mutex_enter(&spa_namespace_lock); 5721219089Spjd for (spa = spa_next(NULL); spa != NULL; spa = spa_next(spa)) 5722236143Smm if (ztest_opts.zo_verbose > 3) 5723219089Spjd (void) printf("spa_next: found %s\n", spa_name(spa)); 5724219089Spjd mutex_exit(&spa_namespace_lock); 5725219089Spjd 5726219089Spjd /* 5727219089Spjd * Verify that we can export the pool and reimport it under a 5728219089Spjd * different name. 5729219089Spjd */ 5730219089Spjd if (ztest_random(2) == 0) { 5731219089Spjd char name[MAXNAMELEN]; 5732236143Smm (void) snprintf(name, MAXNAMELEN, "%s_import", 5733236143Smm ztest_opts.zo_pool); 5734236143Smm ztest_spa_import_export(ztest_opts.zo_pool, name); 5735236143Smm ztest_spa_import_export(name, ztest_opts.zo_pool); 5736168404Spjd } 5737168404Spjd 5738219089Spjd kernel_fini(); 5739219089Spjd 5740219089Spjd list_destroy(&zcl.zcl_callbacks); 5741219089Spjd 5742219089Spjd (void) _mutex_destroy(&zcl.zcl_callbacks_lock); 5743219089Spjd 5744236143Smm (void) rwlock_destroy(&ztest_name_lock); 5745236143Smm (void) _mutex_destroy(&ztest_vdev_lock); 5746219089Spjd} 5747219089Spjd 5748219089Spjdstatic void 5749236143Smmztest_freeze(void) 5750219089Spjd{ 5751236143Smm ztest_ds_t *zd = &ztest_ds[0]; 5752219089Spjd spa_t *spa; 5753219089Spjd int numloops = 0; 5754219089Spjd 5755236143Smm if (ztest_opts.zo_verbose >= 3) 5756219089Spjd (void) printf("testing spa_freeze()...\n"); 5757168404Spjd 5758219089Spjd kernel_init(FREAD | FWRITE); 5759236143Smm VERIFY3U(0, ==, spa_open(ztest_opts.zo_pool, &spa, FTAG)); 5760236143Smm VERIFY3U(0, ==, ztest_dataset_open(0)); 5761243524Smm spa->spa_debug = B_TRUE; 5762243524Smm ztest_spa = spa; 5763168404Spjd 5764168404Spjd /* 5765219089Spjd * Force the first log block to be transactionally allocated. 5766219089Spjd * We have to do this before we freeze the pool -- otherwise 5767219089Spjd * the log chain won't be anchored. 5768168404Spjd */ 5769219089Spjd while (BP_IS_HOLE(&zd->zd_zilog->zl_header->zh_log)) { 5770219089Spjd ztest_dmu_object_alloc_free(zd, 0); 5771219089Spjd zil_commit(zd->zd_zilog, 0); 5772168404Spjd } 5773168404Spjd 5774168404Spjd txg_wait_synced(spa_get_dsl(spa), 0); 5775168404Spjd 5776219089Spjd /* 5777219089Spjd * Freeze the pool. This stops spa_sync() from doing anything, 5778219089Spjd * so that the only way to record changes from now on is the ZIL. 5779219089Spjd */ 5780219089Spjd spa_freeze(spa); 5781185029Spjd 5782219089Spjd /* 5783268855Sdelphij * Because it is hard to predict how much space a write will actually 5784268855Sdelphij * require beforehand, we leave ourselves some fudge space to write over 5785268855Sdelphij * capacity. 5786268855Sdelphij */ 5787268855Sdelphij uint64_t capacity = metaslab_class_get_space(spa_normal_class(spa)) / 2; 5788268855Sdelphij 5789268855Sdelphij /* 5790219089Spjd * Run tests that generate log records but don't alter the pool config 5791219089Spjd * or depend on DSL sync tasks (snapshots, objset create/destroy, etc). 5792219089Spjd * We do a txg_wait_synced() after each iteration to force the txg 5793219089Spjd * to increase well beyond the last synced value in the uberblock. 5794219089Spjd * The ZIL should be OK with that. 5795268855Sdelphij * 5796268855Sdelphij * Run a random number of times less than zo_maxloops and ensure we do 5797268855Sdelphij * not run out of space on the pool. 5798219089Spjd */ 5799236143Smm while (ztest_random(10) != 0 && 5800268855Sdelphij numloops++ < ztest_opts.zo_maxloops && 5801268855Sdelphij metaslab_class_get_alloc(spa_normal_class(spa)) < capacity) { 5802268855Sdelphij ztest_od_t od; 5803268855Sdelphij ztest_od_init(&od, 0, FTAG, 0, DMU_OT_UINT64_OTHER, 0, 0); 5804268855Sdelphij VERIFY0(ztest_object_init(zd, &od, sizeof (od), B_FALSE)); 5805268855Sdelphij ztest_io(zd, od.od_object, 5806268855Sdelphij ztest_random(ZTEST_RANGE_LOCKS) << SPA_MAXBLOCKSHIFT); 5807219089Spjd txg_wait_synced(spa_get_dsl(spa), 0); 5808219089Spjd } 5809185029Spjd 5810168404Spjd /* 5811219089Spjd * Commit all of the changes we just generated. 5812168404Spjd */ 5813219089Spjd zil_commit(zd->zd_zilog, 0); 5814219089Spjd txg_wait_synced(spa_get_dsl(spa), 0); 5815168404Spjd 5816219089Spjd /* 5817219089Spjd * Close our dataset and close the pool. 5818219089Spjd */ 5819236143Smm ztest_dataset_close(0); 5820168404Spjd spa_close(spa, FTAG); 5821219089Spjd kernel_fini(); 5822168404Spjd 5823219089Spjd /* 5824219089Spjd * Open and close the pool and dataset to induce log replay. 5825219089Spjd */ 5826219089Spjd kernel_init(FREAD | FWRITE); 5827236143Smm VERIFY3U(0, ==, spa_open(ztest_opts.zo_pool, &spa, FTAG)); 5828239620Smm ASSERT(spa_freeze_txg(spa) == UINT64_MAX); 5829236143Smm VERIFY3U(0, ==, ztest_dataset_open(0)); 5830236143Smm ztest_dataset_close(0); 5831239620Smm 5832239620Smm spa->spa_debug = B_TRUE; 5833239620Smm ztest_spa = spa; 5834239620Smm txg_wait_synced(spa_get_dsl(spa), 0); 5835239620Smm ztest_reguid(NULL, 0); 5836239620Smm 5837219089Spjd spa_close(spa, FTAG); 5838168404Spjd kernel_fini(); 5839168404Spjd} 5840168404Spjd 5841168404Spjdvoid 5842168404Spjdprint_time(hrtime_t t, char *timebuf) 5843168404Spjd{ 5844168404Spjd hrtime_t s = t / NANOSEC; 5845168404Spjd hrtime_t m = s / 60; 5846168404Spjd hrtime_t h = m / 60; 5847168404Spjd hrtime_t d = h / 24; 5848168404Spjd 5849168404Spjd s -= m * 60; 5850168404Spjd m -= h * 60; 5851168404Spjd h -= d * 24; 5852168404Spjd 5853168404Spjd timebuf[0] = '\0'; 5854168404Spjd 5855168404Spjd if (d) 5856168404Spjd (void) sprintf(timebuf, 5857168404Spjd "%llud%02lluh%02llum%02llus", d, h, m, s); 5858168404Spjd else if (h) 5859168404Spjd (void) sprintf(timebuf, "%lluh%02llum%02llus", h, m, s); 5860168404Spjd else if (m) 5861168404Spjd (void) sprintf(timebuf, "%llum%02llus", m, s); 5862168404Spjd else 5863168404Spjd (void) sprintf(timebuf, "%llus", s); 5864168404Spjd} 5865168404Spjd 5866219089Spjdstatic nvlist_t * 5867219089Spjdmake_random_props() 5868219089Spjd{ 5869219089Spjd nvlist_t *props; 5870219089Spjd 5871236884Smm VERIFY(nvlist_alloc(&props, NV_UNIQUE_NAME, 0) == 0); 5872219089Spjd if (ztest_random(2) == 0) 5873236884Smm return (props); 5874219089Spjd VERIFY(nvlist_add_uint64(props, "autoreplace", 1) == 0); 5875219089Spjd 5876219089Spjd return (props); 5877219089Spjd} 5878219089Spjd 5879168404Spjd/* 5880168404Spjd * Create a storage pool with the given name and initial vdev size. 5881219089Spjd * Then test spa_freeze() functionality. 5882168404Spjd */ 5883168404Spjdstatic void 5884219089Spjdztest_init(ztest_shared_t *zs) 5885168404Spjd{ 5886168404Spjd spa_t *spa; 5887219089Spjd nvlist_t *nvroot, *props; 5888168404Spjd 5889236143Smm VERIFY(_mutex_init(&ztest_vdev_lock, USYNC_THREAD, NULL) == 0); 5890236143Smm VERIFY(rwlock_init(&ztest_name_lock, USYNC_THREAD, NULL) == 0); 5891219089Spjd 5892168404Spjd kernel_init(FREAD | FWRITE); 5893168404Spjd 5894168404Spjd /* 5895168404Spjd * Create the storage pool. 5896168404Spjd */ 5897236143Smm (void) spa_destroy(ztest_opts.zo_pool); 5898219089Spjd ztest_shared->zs_vdev_next_leaf = 0; 5899219089Spjd zs->zs_splits = 0; 5900236143Smm zs->zs_mirrors = ztest_opts.zo_mirrors; 5901243505Smm nvroot = make_vdev_root(NULL, NULL, NULL, ztest_opts.zo_vdev_size, 0, 5902236143Smm 0, ztest_opts.zo_raidz, zs->zs_mirrors, 1); 5903219089Spjd props = make_random_props(); 5904236884Smm for (int i = 0; i < SPA_FEATURES; i++) { 5905236884Smm char buf[1024]; 5906236884Smm (void) snprintf(buf, sizeof (buf), "feature@%s", 5907236884Smm spa_feature_table[i].fi_uname); 5908236884Smm VERIFY3U(0, ==, nvlist_add_uint64(props, buf, 0)); 5909236884Smm } 5910248571Smm VERIFY3U(0, ==, spa_create(ztest_opts.zo_pool, nvroot, props, NULL)); 5911168404Spjd nvlist_free(nvroot); 5912286737Sdelphij nvlist_free(props); 5913168404Spjd 5914236143Smm VERIFY3U(0, ==, spa_open(ztest_opts.zo_pool, &spa, FTAG)); 5915236143Smm zs->zs_metaslab_sz = 5916236143Smm 1ULL << spa->spa_root_vdev->vdev_child[0]->vdev_ms_shift; 5917236884Smm 5918219089Spjd spa_close(spa, FTAG); 5919209962Smm 5920219089Spjd kernel_fini(); 5921168404Spjd 5922236143Smm ztest_run_zdb(ztest_opts.zo_pool); 5923168404Spjd 5924236143Smm ztest_freeze(); 5925219089Spjd 5926236143Smm ztest_run_zdb(ztest_opts.zo_pool); 5927219089Spjd 5928236143Smm (void) rwlock_destroy(&ztest_name_lock); 5929236143Smm (void) _mutex_destroy(&ztest_vdev_lock); 5930168404Spjd} 5931168404Spjd 5932236143Smmstatic void 5933242845Sdelphijsetup_data_fd(void) 5934236143Smm{ 5935242845Sdelphij static char ztest_name_data[] = "/tmp/ztest.data.XXXXXX"; 5936236143Smm 5937242845Sdelphij ztest_fd_data = mkstemp(ztest_name_data); 5938242845Sdelphij ASSERT3S(ztest_fd_data, >=, 0); 5939242845Sdelphij (void) unlink(ztest_name_data); 5940242845Sdelphij} 5941236143Smm 5942236143Smm 5943236884Smmstatic int 5944236884Smmshared_data_size(ztest_shared_hdr_t *hdr) 5945236884Smm{ 5946236884Smm int size; 5947236884Smm 5948236884Smm size = hdr->zh_hdr_size; 5949236884Smm size += hdr->zh_opts_size; 5950236884Smm size += hdr->zh_size; 5951236884Smm size += hdr->zh_stats_size * hdr->zh_stats_count; 5952236884Smm size += hdr->zh_ds_size * hdr->zh_ds_count; 5953236884Smm 5954236884Smm return (size); 5955236884Smm} 5956236884Smm 5957236143Smmstatic void 5958236143Smmsetup_hdr(void) 5959236143Smm{ 5960236884Smm int size; 5961236143Smm ztest_shared_hdr_t *hdr; 5962236143Smm 5963236143Smm hdr = (void *)mmap(0, P2ROUNDUP(sizeof (*hdr), getpagesize()), 5964242845Sdelphij PROT_READ | PROT_WRITE, MAP_SHARED, ztest_fd_data, 0); 5965236143Smm ASSERT(hdr != MAP_FAILED); 5966236143Smm 5967242845Sdelphij VERIFY3U(0, ==, ftruncate(ztest_fd_data, sizeof (ztest_shared_hdr_t))); 5968236884Smm 5969236143Smm hdr->zh_hdr_size = sizeof (ztest_shared_hdr_t); 5970236143Smm hdr->zh_opts_size = sizeof (ztest_shared_opts_t); 5971236143Smm hdr->zh_size = sizeof (ztest_shared_t); 5972236143Smm hdr->zh_stats_size = sizeof (ztest_shared_callstate_t); 5973236143Smm hdr->zh_stats_count = ZTEST_FUNCS; 5974236143Smm hdr->zh_ds_size = sizeof (ztest_shared_ds_t); 5975236143Smm hdr->zh_ds_count = ztest_opts.zo_datasets; 5976236143Smm 5977236884Smm size = shared_data_size(hdr); 5978242845Sdelphij VERIFY3U(0, ==, ftruncate(ztest_fd_data, size)); 5979236884Smm 5980236143Smm (void) munmap((caddr_t)hdr, P2ROUNDUP(sizeof (*hdr), getpagesize())); 5981236143Smm} 5982236143Smm 5983236143Smmstatic void 5984236143Smmsetup_data(void) 5985236143Smm{ 5986236143Smm int size, offset; 5987236143Smm ztest_shared_hdr_t *hdr; 5988236143Smm uint8_t *buf; 5989236143Smm 5990236143Smm hdr = (void *)mmap(0, P2ROUNDUP(sizeof (*hdr), getpagesize()), 5991242845Sdelphij PROT_READ, MAP_SHARED, ztest_fd_data, 0); 5992236143Smm ASSERT(hdr != MAP_FAILED); 5993236143Smm 5994236884Smm size = shared_data_size(hdr); 5995236143Smm 5996236143Smm (void) munmap((caddr_t)hdr, P2ROUNDUP(sizeof (*hdr), getpagesize())); 5997236143Smm hdr = ztest_shared_hdr = (void *)mmap(0, P2ROUNDUP(size, getpagesize()), 5998242845Sdelphij PROT_READ | PROT_WRITE, MAP_SHARED, ztest_fd_data, 0); 5999236143Smm ASSERT(hdr != MAP_FAILED); 6000236143Smm buf = (uint8_t *)hdr; 6001236143Smm 6002236143Smm offset = hdr->zh_hdr_size; 6003236143Smm ztest_shared_opts = (void *)&buf[offset]; 6004236143Smm offset += hdr->zh_opts_size; 6005236143Smm ztest_shared = (void *)&buf[offset]; 6006236143Smm offset += hdr->zh_size; 6007236143Smm ztest_shared_callstate = (void *)&buf[offset]; 6008236143Smm offset += hdr->zh_stats_size * hdr->zh_stats_count; 6009236143Smm ztest_shared_ds = (void *)&buf[offset]; 6010236143Smm} 6011236143Smm 6012236143Smmstatic boolean_t 6013236143Smmexec_child(char *cmd, char *libpath, boolean_t ignorekill, int *statusp) 6014236143Smm{ 6015236143Smm pid_t pid; 6016236143Smm int status; 6017242845Sdelphij char *cmdbuf = NULL; 6018236143Smm 6019236143Smm pid = fork(); 6020236143Smm 6021236143Smm if (cmd == NULL) { 6022242845Sdelphij cmdbuf = umem_alloc(MAXPATHLEN, UMEM_NOFAIL); 6023242845Sdelphij (void) strlcpy(cmdbuf, getexecname(), MAXPATHLEN); 6024236143Smm cmd = cmdbuf; 6025236143Smm } 6026236143Smm 6027236143Smm if (pid == -1) 6028236143Smm fatal(1, "fork failed"); 6029236143Smm 6030236143Smm if (pid == 0) { /* child */ 6031236143Smm char *emptyargv[2] = { cmd, NULL }; 6032242845Sdelphij char fd_data_str[12]; 6033236143Smm 6034236143Smm struct rlimit rl = { 1024, 1024 }; 6035236143Smm (void) setrlimit(RLIMIT_NOFILE, &rl); 6036242845Sdelphij 6037242845Sdelphij (void) close(ztest_fd_rand); 6038242845Sdelphij VERIFY3U(11, >=, 6039242845Sdelphij snprintf(fd_data_str, 12, "%d", ztest_fd_data)); 6040242845Sdelphij VERIFY0(setenv("ZTEST_FD_DATA", fd_data_str, 1)); 6041242845Sdelphij 6042236143Smm (void) enable_extended_FILE_stdio(-1, -1); 6043236143Smm if (libpath != NULL) 6044236143Smm VERIFY(0 == setenv("LD_LIBRARY_PATH", libpath, 1)); 6045236143Smm#ifdef illumos 6046236143Smm (void) execv(cmd, emptyargv); 6047236143Smm#else 6048236143Smm (void) execvp(cmd, emptyargv); 6049236143Smm#endif 6050236143Smm ztest_dump_core = B_FALSE; 6051236143Smm fatal(B_TRUE, "exec failed: %s", cmd); 6052236143Smm } 6053236143Smm 6054242845Sdelphij if (cmdbuf != NULL) { 6055242845Sdelphij umem_free(cmdbuf, MAXPATHLEN); 6056242845Sdelphij cmd = NULL; 6057242845Sdelphij } 6058242845Sdelphij 6059236143Smm while (waitpid(pid, &status, 0) != pid) 6060236143Smm continue; 6061236143Smm if (statusp != NULL) 6062236143Smm *statusp = status; 6063236143Smm 6064236143Smm if (WIFEXITED(status)) { 6065236143Smm if (WEXITSTATUS(status) != 0) { 6066236143Smm (void) fprintf(stderr, "child exited with code %d\n", 6067236143Smm WEXITSTATUS(status)); 6068236143Smm exit(2); 6069236143Smm } 6070236143Smm return (B_FALSE); 6071236143Smm } else if (WIFSIGNALED(status)) { 6072236143Smm if (!ignorekill || WTERMSIG(status) != SIGKILL) { 6073236143Smm (void) fprintf(stderr, "child died with signal %d\n", 6074236143Smm WTERMSIG(status)); 6075236143Smm exit(3); 6076236143Smm } 6077236143Smm return (B_TRUE); 6078236143Smm } else { 6079236143Smm (void) fprintf(stderr, "something strange happened to child\n"); 6080236143Smm exit(4); 6081236143Smm /* NOTREACHED */ 6082236143Smm } 6083236143Smm} 6084236143Smm 6085236143Smmstatic void 6086236143Smmztest_run_init(void) 6087236143Smm{ 6088236143Smm ztest_shared_t *zs = ztest_shared; 6089236143Smm 6090236143Smm ASSERT(ztest_opts.zo_init != 0); 6091236143Smm 6092236143Smm /* 6093236143Smm * Blow away any existing copy of zpool.cache 6094236143Smm */ 6095236143Smm (void) remove(spa_config_path); 6096236143Smm 6097236143Smm /* 6098236143Smm * Create and initialize our storage pool. 6099236143Smm */ 6100236143Smm for (int i = 1; i <= ztest_opts.zo_init; i++) { 6101236143Smm bzero(zs, sizeof (ztest_shared_t)); 6102236143Smm if (ztest_opts.zo_verbose >= 3 && 6103236143Smm ztest_opts.zo_init != 1) { 6104236143Smm (void) printf("ztest_init(), pass %d\n", i); 6105236143Smm } 6106236143Smm ztest_init(zs); 6107236143Smm } 6108236143Smm} 6109236143Smm 6110168404Spjdint 6111168404Spjdmain(int argc, char **argv) 6112168404Spjd{ 6113168404Spjd int kills = 0; 6114168404Spjd int iters = 0; 6115236143Smm int older = 0; 6116236143Smm int newer = 0; 6117168404Spjd ztest_shared_t *zs; 6118168404Spjd ztest_info_t *zi; 6119236143Smm ztest_shared_callstate_t *zc; 6120168404Spjd char timebuf[100]; 6121168404Spjd char numbuf[6]; 6122219089Spjd spa_t *spa; 6123242845Sdelphij char *cmd; 6124236143Smm boolean_t hasalt; 6125242845Sdelphij char *fd_data_str = getenv("ZTEST_FD_DATA"); 6126168404Spjd 6127168404Spjd (void) setvbuf(stdout, NULL, _IOLBF, 0); 6128168404Spjd 6129240133Smm dprintf_setup(&argc, argv); 6130258632Savg zfs_deadman_synctime_ms = 300000; 6131240133Smm 6132242845Sdelphij ztest_fd_rand = open("/dev/urandom", O_RDONLY); 6133242845Sdelphij ASSERT3S(ztest_fd_rand, >=, 0); 6134242845Sdelphij 6135242845Sdelphij if (!fd_data_str) { 6136236143Smm process_options(argc, argv); 6137168404Spjd 6138242845Sdelphij setup_data_fd(); 6139236143Smm setup_hdr(); 6140236143Smm setup_data(); 6141236143Smm bcopy(&ztest_opts, ztest_shared_opts, 6142236143Smm sizeof (*ztest_shared_opts)); 6143236143Smm } else { 6144242845Sdelphij ztest_fd_data = atoi(fd_data_str); 6145236143Smm setup_data(); 6146236143Smm bcopy(ztest_shared_opts, &ztest_opts, sizeof (ztest_opts)); 6147236143Smm } 6148236143Smm ASSERT3U(ztest_opts.zo_datasets, ==, ztest_shared_hdr->zh_ds_count); 6149168404Spjd 6150219089Spjd /* Override location of zpool.cache */ 6151242845Sdelphij VERIFY3U(asprintf((char **)&spa_config_path, "%s/zpool.cache", 6152242845Sdelphij ztest_opts.zo_dir), !=, -1); 6153219089Spjd 6154236143Smm ztest_ds = umem_alloc(ztest_opts.zo_datasets * sizeof (ztest_ds_t), 6155236143Smm UMEM_NOFAIL); 6156236143Smm zs = ztest_shared; 6157168404Spjd 6158242845Sdelphij if (fd_data_str) { 6159236143Smm metaslab_gang_bang = ztest_opts.zo_metaslab_gang_bang; 6160236143Smm metaslab_df_alloc_threshold = 6161236143Smm zs->zs_metaslab_df_alloc_threshold; 6162219089Spjd 6163236143Smm if (zs->zs_do_init) 6164236143Smm ztest_run_init(); 6165236143Smm else 6166236143Smm ztest_run(zs); 6167236143Smm exit(0); 6168236143Smm } 6169168404Spjd 6170236143Smm hasalt = (strlen(ztest_opts.zo_alt_ztest) != 0); 6171236143Smm 6172236143Smm if (ztest_opts.zo_verbose >= 1) { 6173168404Spjd (void) printf("%llu vdevs, %d datasets, %d threads," 6174168404Spjd " %llu seconds...\n", 6175236143Smm (u_longlong_t)ztest_opts.zo_vdevs, 6176236143Smm ztest_opts.zo_datasets, 6177236143Smm ztest_opts.zo_threads, 6178236143Smm (u_longlong_t)ztest_opts.zo_time); 6179168404Spjd } 6180168404Spjd 6181242845Sdelphij cmd = umem_alloc(MAXNAMELEN, UMEM_NOFAIL); 6182242845Sdelphij (void) strlcpy(cmd, getexecname(), MAXNAMELEN); 6183236143Smm 6184236143Smm zs->zs_do_init = B_TRUE; 6185236143Smm if (strlen(ztest_opts.zo_alt_ztest) != 0) { 6186236143Smm if (ztest_opts.zo_verbose >= 1) { 6187236143Smm (void) printf("Executing older ztest for " 6188236143Smm "initialization: %s\n", ztest_opts.zo_alt_ztest); 6189236143Smm } 6190236143Smm VERIFY(!exec_child(ztest_opts.zo_alt_ztest, 6191236143Smm ztest_opts.zo_alt_libpath, B_FALSE, NULL)); 6192236143Smm } else { 6193236143Smm VERIFY(!exec_child(NULL, NULL, B_FALSE, NULL)); 6194168404Spjd } 6195236143Smm zs->zs_do_init = B_FALSE; 6196168404Spjd 6197219089Spjd zs->zs_proc_start = gethrtime(); 6198236143Smm zs->zs_proc_stop = zs->zs_proc_start + ztest_opts.zo_time * NANOSEC; 6199219089Spjd 6200219089Spjd for (int f = 0; f < ZTEST_FUNCS; f++) { 6201236143Smm zi = &ztest_info[f]; 6202236143Smm zc = ZTEST_GET_SHARED_CALLSTATE(f); 6203219089Spjd if (zs->zs_proc_start + zi->zi_interval[0] > zs->zs_proc_stop) 6204236143Smm zc->zc_next = UINT64_MAX; 6205168404Spjd else 6206236143Smm zc->zc_next = zs->zs_proc_start + 6207219089Spjd ztest_random(2 * zi->zi_interval[0] + 1); 6208168404Spjd } 6209168404Spjd 6210168404Spjd /* 6211168404Spjd * Run the tests in a loop. These tests include fault injection 6212168404Spjd * to verify that self-healing data works, and forced crashes 6213168404Spjd * to verify that we never lose on-disk consistency. 6214168404Spjd */ 6215219089Spjd while (gethrtime() < zs->zs_proc_stop) { 6216168404Spjd int status; 6217236143Smm boolean_t killed; 6218168404Spjd 6219168404Spjd /* 6220168404Spjd * Initialize the workload counters for each function. 6221168404Spjd */ 6222219089Spjd for (int f = 0; f < ZTEST_FUNCS; f++) { 6223236143Smm zc = ZTEST_GET_SHARED_CALLSTATE(f); 6224236143Smm zc->zc_count = 0; 6225236143Smm zc->zc_time = 0; 6226168404Spjd } 6227168404Spjd 6228209962Smm /* Set the allocation switch size */ 6229236143Smm zs->zs_metaslab_df_alloc_threshold = 6230236143Smm ztest_random(zs->zs_metaslab_sz / 4) + 1; 6231209962Smm 6232236143Smm if (!hasalt || ztest_random(2) == 0) { 6233236143Smm if (hasalt && ztest_opts.zo_verbose >= 1) { 6234236143Smm (void) printf("Executing newer ztest: %s\n", 6235236143Smm cmd); 6236168404Spjd } 6237236143Smm newer++; 6238236143Smm killed = exec_child(cmd, NULL, B_TRUE, &status); 6239236143Smm } else { 6240236143Smm if (hasalt && ztest_opts.zo_verbose >= 1) { 6241236143Smm (void) printf("Executing older ztest: %s\n", 6242236143Smm ztest_opts.zo_alt_ztest); 6243168404Spjd } 6244236143Smm older++; 6245236143Smm killed = exec_child(ztest_opts.zo_alt_ztest, 6246236143Smm ztest_opts.zo_alt_libpath, B_TRUE, &status); 6247168404Spjd } 6248168404Spjd 6249236143Smm if (killed) 6250236143Smm kills++; 6251168404Spjd iters++; 6252168404Spjd 6253236143Smm if (ztest_opts.zo_verbose >= 1) { 6254168404Spjd hrtime_t now = gethrtime(); 6255168404Spjd 6256219089Spjd now = MIN(now, zs->zs_proc_stop); 6257219089Spjd print_time(zs->zs_proc_stop - now, timebuf); 6258168404Spjd nicenum(zs->zs_space, numbuf); 6259168404Spjd 6260168404Spjd (void) printf("Pass %3d, %8s, %3llu ENOSPC, " 6261168404Spjd "%4.1f%% of %5s used, %3.0f%% done, %8s to go\n", 6262168404Spjd iters, 6263168404Spjd WIFEXITED(status) ? "Complete" : "SIGKILL", 6264168404Spjd (u_longlong_t)zs->zs_enospc_count, 6265168404Spjd 100.0 * zs->zs_alloc / zs->zs_space, 6266168404Spjd numbuf, 6267219089Spjd 100.0 * (now - zs->zs_proc_start) / 6268236143Smm (ztest_opts.zo_time * NANOSEC), timebuf); 6269168404Spjd } 6270168404Spjd 6271236143Smm if (ztest_opts.zo_verbose >= 2) { 6272168404Spjd (void) printf("\nWorkload summary:\n\n"); 6273168404Spjd (void) printf("%7s %9s %s\n", 6274168404Spjd "Calls", "Time", "Function"); 6275168404Spjd (void) printf("%7s %9s %s\n", 6276168404Spjd "-----", "----", "--------"); 6277219089Spjd for (int f = 0; f < ZTEST_FUNCS; f++) { 6278168404Spjd Dl_info dli; 6279168404Spjd 6280236143Smm zi = &ztest_info[f]; 6281236143Smm zc = ZTEST_GET_SHARED_CALLSTATE(f); 6282236143Smm print_time(zc->zc_time, timebuf); 6283168404Spjd (void) dladdr((void *)zi->zi_func, &dli); 6284168404Spjd (void) printf("%7llu %9s %s\n", 6285236143Smm (u_longlong_t)zc->zc_count, timebuf, 6286168404Spjd dli.dli_sname); 6287168404Spjd } 6288168404Spjd (void) printf("\n"); 6289168404Spjd } 6290168404Spjd 6291168404Spjd /* 6292219089Spjd * It's possible that we killed a child during a rename test, 6293219089Spjd * in which case we'll have a 'ztest_tmp' pool lying around 6294219089Spjd * instead of 'ztest'. Do a blind rename in case this happened. 6295168404Spjd */ 6296219089Spjd kernel_init(FREAD); 6297236143Smm if (spa_open(ztest_opts.zo_pool, &spa, FTAG) == 0) { 6298219089Spjd spa_close(spa, FTAG); 6299219089Spjd } else { 6300219089Spjd char tmpname[MAXNAMELEN]; 6301219089Spjd kernel_fini(); 6302219089Spjd kernel_init(FREAD | FWRITE); 6303219089Spjd (void) snprintf(tmpname, sizeof (tmpname), "%s_tmp", 6304236143Smm ztest_opts.zo_pool); 6305236143Smm (void) spa_rename(tmpname, ztest_opts.zo_pool); 6306219089Spjd } 6307168404Spjd kernel_fini(); 6308219089Spjd 6309236143Smm ztest_run_zdb(ztest_opts.zo_pool); 6310168404Spjd } 6311168404Spjd 6312236143Smm if (ztest_opts.zo_verbose >= 1) { 6313236143Smm if (hasalt) { 6314236143Smm (void) printf("%d runs of older ztest: %s\n", older, 6315236143Smm ztest_opts.zo_alt_ztest); 6316236143Smm (void) printf("%d runs of newer ztest: %s\n", newer, 6317236143Smm cmd); 6318236143Smm } 6319168404Spjd (void) printf("%d killed, %d completed, %.0f%% kill rate\n", 6320168404Spjd kills, iters - kills, (100.0 * kills) / MAX(1, iters)); 6321168404Spjd } 6322168404Spjd 6323242845Sdelphij umem_free(cmd, MAXNAMELEN); 6324242845Sdelphij 6325168404Spjd return (0); 6326168404Spjd} 6327